summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
authorAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-06-10 05:30:17 -0300
committerAndré Fabian Silva Delgado <emulatorman@parabola.nu>2016-06-10 05:30:17 -0300
commitd635711daa98be86d4c7fd01499c34f566b54ccb (patch)
treeaa5cc3760a27c3d57146498cb82fa549547de06c /fs
parentc91265cd0efb83778f015b4d4b1129bd2cfd075e (diff)
Linux-libre 4.6.2-gnu
Diffstat (limited to 'fs')
-rw-r--r--fs/9p/vfs_addr.c18
-rw-r--r--fs/9p/vfs_file.c4
-rw-r--r--fs/9p/vfs_super.c2
-rw-r--r--fs/Kconfig3
-rw-r--r--fs/Makefile2
-rw-r--r--fs/affs/file.c26
-rw-r--r--fs/affs/super.c5
-rw-r--r--fs/afs/dir.c2
-rw-r--r--fs/afs/file.c4
-rw-r--r--fs/afs/mntpt.c6
-rw-r--r--fs/afs/super.c4
-rw-r--r--fs/afs/write.c26
-rw-r--r--fs/aufs/branch.c209
-rw-r--r--fs/aufs/branch.h34
-rw-r--r--fs/aufs/cpup.c44
-rw-r--r--fs/aufs/dbgaufs.c14
-rw-r--r--fs/aufs/debug.c76
-rw-r--r--fs/aufs/dentry.c124
-rw-r--r--fs/aufs/dentry.h26
-rw-r--r--fs/aufs/dinfo.c124
-rw-r--r--fs/aufs/dir.c98
-rw-r--r--fs/aufs/dynop.c8
-rw-r--r--fs/aufs/export.c15
-rw-r--r--fs/aufs/f_op.c12
-rw-r--r--fs/aufs/fhsm.c18
-rw-r--r--fs/aufs/file.c102
-rw-r--r--fs/aufs/file.h8
-rw-r--r--fs/aufs/finfo.c11
-rw-r--r--fs/aufs/hfsnotify.c3
-rw-r--r--fs/aufs/hnotify.c24
-rw-r--r--fs/aufs/i_op.c90
-rw-r--r--fs/aufs/i_op_add.c66
-rw-r--r--fs/aufs/i_op_del.c44
-rw-r--r--fs/aufs/i_op_ren.c92
-rw-r--r--fs/aufs/iinfo.c95
-rw-r--r--fs/aufs/inode.c90
-rw-r--r--fs/aufs/inode.h41
-rw-r--r--fs/aufs/ioctl.c10
-rw-r--r--fs/aufs/module.c44
-rw-r--r--fs/aufs/module.h16
-rw-r--r--fs/aufs/mvdown.c48
-rw-r--r--fs/aufs/opts.c38
-rw-r--r--fs/aufs/plink.c12
-rw-r--r--fs/aufs/posix_acl.c2
-rw-r--r--fs/aufs/rdu.c16
-rw-r--r--fs/aufs/rwsem.h79
-rw-r--r--fs/aufs/sbinfo.c17
-rw-r--r--fs/aufs/super.c34
-rw-r--r--fs/aufs/super.h36
-rw-r--r--fs/aufs/sysfs.c26
-rw-r--r--fs/aufs/vdir.c10
-rw-r--r--fs/aufs/vfsub.c2
-rw-r--r--fs/aufs/wbr_policy.c76
-rw-r--r--fs/aufs/whout.c12
-rw-r--r--fs/aufs/wkq.c11
-rw-r--r--fs/aufs/wkq.h8
-rw-r--r--fs/aufs/xino.c71
-rw-r--r--fs/autofs4/autofs_i.h72
-rw-r--r--fs/autofs4/dev-ioctl.c57
-rw-r--r--fs/autofs4/expire.c84
-rw-r--r--fs/autofs4/init.c10
-rw-r--r--fs/autofs4/inode.c52
-rw-r--r--fs/autofs4/root.c165
-rw-r--r--fs/autofs4/symlink.c11
-rw-r--r--fs/autofs4/waitq.c78
-rw-r--r--fs/binfmt_elf.c2
-rw-r--r--fs/binfmt_elf_fdpic.c2
-rw-r--r--fs/block_dev.c10
-rw-r--r--fs/btrfs/backref.c12
-rw-r--r--fs/btrfs/check-integrity.c76
-rw-r--r--fs/btrfs/compression.c84
-rw-r--r--fs/btrfs/compression.h9
-rw-r--r--fs/btrfs/ctree.c46
-rw-r--r--fs/btrfs/ctree.h88
-rw-r--r--fs/btrfs/delayed-inode.c10
-rw-r--r--fs/btrfs/delayed-ref.c12
-rw-r--r--fs/btrfs/dev-replace.c136
-rw-r--r--fs/btrfs/dev-replace.h7
-rw-r--r--fs/btrfs/disk-io.c130
-rw-r--r--fs/btrfs/extent-tree.c65
-rw-r--r--fs/btrfs/extent_io.c304
-rw-r--r--fs/btrfs/extent_io.h11
-rw-r--r--fs/btrfs/extent_map.c8
-rw-r--r--fs/btrfs/file-item.c107
-rw-r--r--fs/btrfs/file.c216
-rw-r--r--fs/btrfs/free-space-cache.c30
-rw-r--r--fs/btrfs/inode-map.c13
-rw-r--r--fs/btrfs/inode.c422
-rw-r--r--fs/btrfs/ioctl.c138
-rw-r--r--fs/btrfs/lzo.c32
-rw-r--r--fs/btrfs/ordered-data.c6
-rw-r--r--fs/btrfs/print-tree.c23
-rw-r--r--fs/btrfs/props.c1
-rw-r--r--fs/btrfs/qgroup.c63
-rw-r--r--fs/btrfs/raid56.c28
-rw-r--r--fs/btrfs/reada.c296
-rw-r--r--fs/btrfs/relocation.c17
-rw-r--r--fs/btrfs/root-tree.c2
-rw-r--r--fs/btrfs/scrub.c56
-rw-r--r--fs/btrfs/send.c53
-rw-r--r--fs/btrfs/struct-funcs.c4
-rw-r--r--fs/btrfs/super.c52
-rw-r--r--fs/btrfs/tests/btrfs-tests.c3
-rw-r--r--fs/btrfs/tests/extent-io-tests.c44
-rw-r--r--fs/btrfs/tests/free-space-tests.c2
-rw-r--r--fs/btrfs/tests/inode-tests.c1
-rw-r--r--fs/btrfs/transaction.c13
-rw-r--r--fs/btrfs/tree-log.c102
-rw-r--r--fs/btrfs/tree-log.h2
-rw-r--r--fs/btrfs/volumes.c65
-rw-r--r--fs/btrfs/xattr.c67
-rw-r--r--fs/btrfs/zlib.c38
-rw-r--r--fs/buffer.c124
-rw-r--r--fs/cachefiles/daemon.c13
-rw-r--r--fs/cachefiles/interface.c11
-rw-r--r--fs/cachefiles/internal.h4
-rw-r--r--fs/cachefiles/namei.c28
-rw-r--r--fs/cachefiles/rdwr.c38
-rw-r--r--fs/ceph/addr.c426
-rw-r--r--fs/ceph/caps.c13
-rw-r--r--fs/ceph/dir.c73
-rw-r--r--fs/ceph/export.c13
-rw-r--r--fs/ceph/file.c47
-rw-r--r--fs/ceph/inode.c57
-rw-r--r--fs/ceph/mds_client.c15
-rw-r--r--fs/ceph/mds_client.h2
-rw-r--r--fs/ceph/snap.c16
-rw-r--r--fs/ceph/super.c55
-rw-r--r--fs/ceph/super.h23
-rw-r--r--fs/ceph/xattr.c78
-rw-r--r--fs/cifs/cifs_debug.c56
-rw-r--r--fs/cifs/cifs_debug.h2
-rw-r--r--fs/cifs/cifsencrypt.c32
-rw-r--r--fs/cifs/cifsfs.c12
-rw-r--r--fs/cifs/cifsglob.h8
-rw-r--r--fs/cifs/cifssmb.c16
-rw-r--r--fs/cifs/connect.c2
-rw-r--r--fs/cifs/file.c96
-rw-r--r--fs/cifs/inode.c10
-rw-r--r--fs/cifs/sess.c139
-rw-r--r--fs/cifs/smb2glob.h1
-rw-r--r--fs/cifs/smb2inode.c8
-rw-r--r--fs/cifs/smb2pdu.c16
-rw-r--r--fs/cifs/smb2proto.h2
-rw-r--r--fs/cifs/smbencrypt.c26
-rw-r--r--fs/compat_ioctl.c22
-rw-r--r--fs/configfs/dir.c53
-rw-r--r--fs/configfs/inode.c20
-rw-r--r--fs/configfs/item.c1
-rw-r--r--fs/configfs/mount.c4
-rw-r--r--fs/cramfs/README26
-rw-r--r--fs/cramfs/inode.c32
-rw-r--r--fs/crypto/Kconfig18
-rw-r--r--fs/crypto/Makefile3
-rw-r--r--fs/crypto/crypto.c568
-rw-r--r--fs/crypto/fname.c (renamed from fs/f2fs/crypto_fname.c)276
-rw-r--r--fs/crypto/keyinfo.c304
-rw-r--r--fs/crypto/policy.c229
-rw-r--r--fs/dax.c43
-rw-r--r--fs/dcache.c177
-rw-r--r--fs/devpts/inode.c100
-rw-r--r--fs/direct-io.c38
-rw-r--r--fs/dlm/config.c41
-rw-r--r--fs/dlm/lowcomms.c82
-rw-r--r--fs/ecryptfs/crypto.c156
-rw-r--r--fs/ecryptfs/ecryptfs_kernel.h13
-rw-r--r--fs/ecryptfs/file.c71
-rw-r--r--fs/ecryptfs/inode.c20
-rw-r--r--fs/ecryptfs/keystore.c226
-rw-r--r--fs/ecryptfs/main.c9
-rw-r--r--fs/ecryptfs/mmap.c45
-rw-r--r--fs/ecryptfs/read_write.c14
-rw-r--r--fs/ecryptfs/super.c1
-rw-r--r--fs/efivarfs/super.c4
-rw-r--r--fs/eventfd.c42
-rw-r--r--fs/eventpoll.c2
-rw-r--r--fs/exec.c100
-rw-r--r--fs/exofs/dir.c30
-rw-r--r--fs/exofs/inode.c34
-rw-r--r--fs/exofs/namei.c4
-rw-r--r--fs/ext2/dir.c36
-rw-r--r--fs/ext2/ext2.h3
-rw-r--r--fs/ext2/namei.c6
-rw-r--r--fs/ext2/super.c25
-rw-r--r--fs/ext2/xattr.c139
-rw-r--r--fs/ext2/xattr.h21
-rw-r--r--fs/ext4/crypto.c65
-rw-r--r--fs/ext4/crypto_fname.c32
-rw-r--r--fs/ext4/crypto_key.c42
-rw-r--r--fs/ext4/dir.c6
-rw-r--r--fs/ext4/ext4.h85
-rw-r--r--fs/ext4/ext4_crypto.h2
-rw-r--r--fs/ext4/ext4_extents.h2
-rw-r--r--fs/ext4/extents.c128
-rw-r--r--fs/ext4/extents_status.c4
-rw-r--r--fs/ext4/file.c133
-rw-r--r--fs/ext4/ialloc.c61
-rw-r--r--fs/ext4/indirect.c29
-rw-r--r--fs/ext4/inline.c26
-rw-r--r--fs/ext4/inode.c534
-rw-r--r--fs/ext4/ioctl.c2
-rw-r--r--fs/ext4/mballoc.c131
-rw-r--r--fs/ext4/mballoc.h12
-rw-r--r--fs/ext4/migrate.c2
-rw-r--r--fs/ext4/mmp.c34
-rw-r--r--fs/ext4/move_extent.c16
-rw-r--r--fs/ext4/namei.c2
-rw-r--r--fs/ext4/page-io.c32
-rw-r--r--fs/ext4/readpage.c14
-rw-r--r--fs/ext4/super.c55
-rw-r--r--fs/ext4/symlink.c4
-rw-r--r--fs/ext4/xattr.c198
-rw-r--r--fs/ext4/xattr.h3
-rw-r--r--fs/f2fs/Kconfig12
-rw-r--r--fs/f2fs/Makefile2
-rw-r--r--fs/f2fs/checkpoint.c77
-rw-r--r--fs/f2fs/crypto.c491
-rw-r--r--fs/f2fs/crypto_key.c254
-rw-r--r--fs/f2fs/crypto_policy.c210
-rw-r--r--fs/f2fs/data.c484
-rw-r--r--fs/f2fs/debug.c6
-rw-r--r--fs/f2fs/dir.c92
-rw-r--r--fs/f2fs/extent_cache.c176
-rw-r--r--fs/f2fs/f2fs.h325
-rw-r--r--fs/f2fs/f2fs_crypto.h151
-rw-r--r--fs/f2fs/file.c188
-rw-r--r--fs/f2fs/gc.c245
-rw-r--r--fs/f2fs/inline.c53
-rw-r--r--fs/f2fs/inode.c15
-rw-r--r--fs/f2fs/namei.c172
-rw-r--r--fs/f2fs/node.c233
-rw-r--r--fs/f2fs/node.h26
-rw-r--r--fs/f2fs/recovery.c16
-rw-r--r--fs/f2fs/segment.c402
-rw-r--r--fs/f2fs/segment.h5
-rw-r--r--fs/f2fs/super.c148
-rw-r--r--fs/f2fs/trace.c6
-rw-r--r--fs/f2fs/xattr.c6
-rw-r--r--fs/f2fs/xattr.h3
-rw-r--r--fs/fat/Kconfig18
-rw-r--r--fs/fat/inode.c4
-rw-r--r--fs/freevxfs/vxfs_immed.c4
-rw-r--r--fs/freevxfs/vxfs_lookup.c12
-rw-r--r--fs/freevxfs/vxfs_subr.c2
-rw-r--r--fs/fs-writeback.c2
-rw-r--r--fs/fscache/page.c10
-rw-r--r--fs/fuse/dev.c26
-rw-r--r--fs/fuse/file.c95
-rw-r--r--fs/fuse/inode.c16
-rw-r--r--fs/gfs2/aops.c44
-rw-r--r--fs/gfs2/bmap.c12
-rw-r--r--fs/gfs2/dir.c6
-rw-r--r--fs/gfs2/export.c2
-rw-r--r--fs/gfs2/file.c16
-rw-r--r--fs/gfs2/glock.c10
-rw-r--r--fs/gfs2/incore.h1
-rw-r--r--fs/gfs2/inode.c71
-rw-r--r--fs/gfs2/inode.h5
-rw-r--r--fs/gfs2/meta_io.c4
-rw-r--r--fs/gfs2/ops_fstype.c2
-rw-r--r--fs/gfs2/quota.c14
-rw-r--r--fs/gfs2/rgrp.c5
-rw-r--r--fs/gfs2/super.c26
-rw-r--r--fs/hfs/bnode.c12
-rw-r--r--fs/hfs/btree.c20
-rw-r--r--fs/hfs/inode.c8
-rw-r--r--fs/hfsplus/bitmap.c2
-rw-r--r--fs/hfsplus/bnode.c90
-rw-r--r--fs/hfsplus/btree.c22
-rw-r--r--fs/hfsplus/inode.c8
-rw-r--r--fs/hfsplus/super.c2
-rw-r--r--fs/hfsplus/xattr.c6
-rw-r--r--fs/hostfs/hostfs_kern.c18
-rw-r--r--fs/hpfs/super.c42
-rw-r--r--fs/hugetlbfs/inode.c10
-rw-r--r--fs/isofs/compress.c36
-rw-r--r--fs/isofs/inode.c2
-rw-r--r--fs/isofs/rock.c13
-rw-r--r--fs/jbd2/commit.c53
-rw-r--r--fs/jbd2/journal.c28
-rw-r--r--fs/jbd2/recovery.c31
-rw-r--r--fs/jbd2/revoke.c60
-rw-r--r--fs/jbd2/transaction.c26
-rw-r--r--fs/jffs2/debug.c8
-rw-r--r--fs/jffs2/file.c23
-rw-r--r--fs/jffs2/fs.c8
-rw-r--r--fs/jffs2/gc.c72
-rw-r--r--fs/jffs2/jffs2_fs_sb.h2
-rw-r--r--fs/jffs2/nodelist.c8
-rw-r--r--fs/jffs2/nodemgmt.c4
-rw-r--r--fs/jffs2/wbuf.c6
-rw-r--r--fs/jffs2/write.c7
-rw-r--r--fs/jfs/jfs_metapage.c42
-rw-r--r--fs/jfs/jfs_metapage.h4
-rw-r--r--fs/jfs/super.c2
-rw-r--r--fs/kernfs/dir.c210
-rw-r--r--fs/kernfs/mount.c88
-rw-r--r--fs/libfs.c24
-rw-r--r--fs/logfs/dev_bdev.c2
-rw-r--r--fs/logfs/dev_mtd.c10
-rw-r--r--fs/logfs/dir.c12
-rw-r--r--fs/logfs/file.c26
-rw-r--r--fs/logfs/readwrite.c20
-rw-r--r--fs/logfs/segment.c28
-rw-r--r--fs/logfs/super.c16
-rw-r--r--fs/mbcache.c1093
-rw-r--r--fs/minix/dir.c18
-rw-r--r--fs/minix/namei.c4
-rw-r--r--fs/mpage.c25
-rw-r--r--fs/namei.c364
-rw-r--r--fs/ncpfs/dir.c10
-rw-r--r--fs/ncpfs/ncplib_kernel.h2
-rw-r--r--fs/nfs/blocklayout/blocklayout.c96
-rw-r--r--fs/nfs/blocklayout/blocklayout.h18
-rw-r--r--fs/nfs/blocklayout/dev.c144
-rw-r--r--fs/nfs/blocklayout/extent_tree.c44
-rw-r--r--fs/nfs/blocklayout/rpc_pipefs.c2
-rw-r--r--fs/nfs/callback.h3
-rw-r--r--fs/nfs/callback_proc.c71
-rw-r--r--fs/nfs/callback_xdr.c12
-rw-r--r--fs/nfs/client.c8
-rw-r--r--fs/nfs/dir.c16
-rw-r--r--fs/nfs/direct.c8
-rw-r--r--fs/nfs/file.c32
-rw-r--r--fs/nfs/flexfilelayout/flexfilelayoutdev.c2
-rw-r--r--fs/nfs/inode.c2
-rw-r--r--fs/nfs/internal.h12
-rw-r--r--fs/nfs/nfs4file.c33
-rw-r--r--fs/nfs/nfs4proc.c76
-rw-r--r--fs/nfs/nfs4session.c54
-rw-r--r--fs/nfs/nfs4session.h8
-rw-r--r--fs/nfs/nfs4xdr.c2
-rw-r--r--fs/nfs/objlayout/objio_osd.c2
-rw-r--r--fs/nfs/pagelist.c6
-rw-r--r--fs/nfs/pnfs.c6
-rw-r--r--fs/nfs/pnfs_nfs.c44
-rw-r--r--fs/nfs/read.c16
-rw-r--r--fs/nfs/write.c8
-rw-r--r--fs/nfsd/Kconfig28
-rw-r--r--fs/nfsd/Makefile4
-rw-r--r--fs/nfsd/blocklayout.c298
-rw-r--r--fs/nfsd/blocklayoutxdr.c77
-rw-r--r--fs/nfsd/blocklayoutxdr.h14
-rw-r--r--fs/nfsd/nfs3proc.c7
-rw-r--r--fs/nfsd/nfs4layouts.c31
-rw-r--r--fs/nfsd/nfs4proc.c8
-rw-r--r--fs/nfsd/nfs4recover.c29
-rw-r--r--fs/nfsd/nfs4state.c29
-rw-r--r--fs/nfsd/nfs4xdr.c13
-rw-r--r--fs/nfsd/pnfs.h8
-rw-r--r--fs/nfsd/vfs.c4
-rw-r--r--fs/nfsd/vfs.h19
-rw-r--r--fs/nilfs2/bmap.c2
-rw-r--r--fs/nilfs2/btnode.c10
-rw-r--r--fs/nilfs2/dir.c32
-rw-r--r--fs/nilfs2/gcinode.c2
-rw-r--r--fs/nilfs2/inode.c4
-rw-r--r--fs/nilfs2/mdt.c14
-rw-r--r--fs/nilfs2/namei.c4
-rw-r--r--fs/nilfs2/page.c20
-rw-r--r--fs/nilfs2/recovery.c4
-rw-r--r--fs/nilfs2/segment.c2
-rw-r--r--fs/ntfs/aops.c50
-rw-r--r--fs/ntfs/aops.h4
-rw-r--r--fs/ntfs/attrib.c28
-rw-r--r--fs/ntfs/bitmap.c10
-rw-r--r--fs/ntfs/compress.c77
-rw-r--r--fs/ntfs/dir.c56
-rw-r--r--fs/ntfs/file.c56
-rw-r--r--fs/ntfs/index.c14
-rw-r--r--fs/ntfs/inode.c12
-rw-r--r--fs/ntfs/lcnalloc.c6
-rw-r--r--fs/ntfs/logfile.c16
-rw-r--r--fs/ntfs/mft.c34
-rw-r--r--fs/ntfs/ntfs.h2
-rw-r--r--fs/ntfs/super.c72
-rw-r--r--fs/ocfs2/Makefile3
-rw-r--r--fs/ocfs2/acl.c87
-rw-r--r--fs/ocfs2/acl.h5
-rw-r--r--fs/ocfs2/alloc.c133
-rw-r--r--fs/ocfs2/aops.c1189
-rw-r--r--fs/ocfs2/aops.h19
-rw-r--r--fs/ocfs2/cluster/heartbeat.c20
-rw-r--r--fs/ocfs2/cluster/nodemanager.c22
-rw-r--r--fs/ocfs2/dlm/dlmcommon.h26
-rw-r--r--fs/ocfs2/dlm/dlmconvert.c6
-rw-r--r--fs/ocfs2/dlm/dlmdomain.c13
-rw-r--r--fs/ocfs2/dlm/dlmmaster.c129
-rw-r--r--fs/ocfs2/dlm/dlmrecovery.c40
-rw-r--r--fs/ocfs2/dlm/dlmthread.c13
-rw-r--r--fs/ocfs2/dlmfs/dlmfs.c4
-rw-r--r--fs/ocfs2/file.c183
-rw-r--r--fs/ocfs2/filecheck.c606
-rw-r--r--fs/ocfs2/filecheck.h49
-rw-r--r--fs/ocfs2/inode.c228
-rw-r--r--fs/ocfs2/inode.h9
-rw-r--r--fs/ocfs2/journal.c8
-rw-r--r--fs/ocfs2/localalloc.c4
-rw-r--r--fs/ocfs2/mmap.c10
-rw-r--r--fs/ocfs2/namei.c23
-rw-r--r--fs/ocfs2/ocfs2.h28
-rw-r--r--fs/ocfs2/ocfs2_trace.h20
-rw-r--r--fs/ocfs2/quota_global.c34
-rw-r--r--fs/ocfs2/refcounttree.c41
-rw-r--r--fs/ocfs2/resize.c2
-rw-r--r--fs/ocfs2/stackglue.c3
-rw-r--r--fs/ocfs2/stackglue.h2
-rw-r--r--fs/ocfs2/super.c53
-rw-r--r--fs/ocfs2/super.h2
-rw-r--r--fs/ocfs2/xattr.c14
-rw-r--r--fs/ocfs2/xattr.h4
-rw-r--r--fs/open.c12
-rw-r--r--fs/orangefs/Kconfig6
-rw-r--r--fs/orangefs/Makefile10
-rw-r--r--fs/orangefs/acl.c175
-rw-r--r--fs/orangefs/dcache.c138
-rw-r--r--fs/orangefs/devorangefs-req.c943
-rw-r--r--fs/orangefs/dir.c396
-rw-r--r--fs/orangefs/downcall.h133
-rw-r--r--fs/orangefs/file.c717
-rw-r--r--fs/orangefs/inode.c461
-rw-r--r--fs/orangefs/namei.c462
-rw-r--r--fs/orangefs/orangefs-bufmap.c556
-rw-r--r--fs/orangefs/orangefs-bufmap.h36
-rw-r--r--fs/orangefs/orangefs-cache.c161
-rw-r--r--fs/orangefs/orangefs-debug.h92
-rw-r--r--fs/orangefs/orangefs-debugfs.c454
-rw-r--r--fs/orangefs/orangefs-debugfs.h3
-rw-r--r--fs/orangefs/orangefs-dev-proto.h62
-rw-r--r--fs/orangefs/orangefs-kernel.h623
-rw-r--r--fs/orangefs/orangefs-mod.c293
-rw-r--r--fs/orangefs/orangefs-sysfs.c1772
-rw-r--r--fs/orangefs/orangefs-sysfs.h2
-rw-r--r--fs/orangefs/orangefs-utils.c1052
-rw-r--r--fs/orangefs/protocol.h455
-rw-r--r--fs/orangefs/super.c559
-rw-r--r--fs/orangefs/symlink.c19
-rw-r--r--fs/orangefs/upcall.h246
-rw-r--r--fs/orangefs/waitqueue.c357
-rw-r--r--fs/orangefs/xattr.c530
-rw-r--r--fs/overlayfs/copy_up.c35
-rw-r--r--fs/overlayfs/dir.c61
-rw-r--r--fs/overlayfs/overlayfs.h1
-rw-r--r--fs/overlayfs/readdir.c53
-rw-r--r--fs/overlayfs/super.c22
-rw-r--r--fs/pipe.c6
-rw-r--r--fs/proc/base.c69
-rw-r--r--fs/proc/meminfo.c31
-rw-r--r--fs/proc/namespaces.c3
-rw-r--r--fs/proc/page.c8
-rw-r--r--fs/proc/task_mmu.c16
-rw-r--r--fs/proc/vmcore.c11
-rw-r--r--fs/pstore/inode.c4
-rw-r--r--fs/pstore/ram.c4
-rw-r--r--fs/qnx6/dir.c16
-rw-r--r--fs/qnx6/inode.c4
-rw-r--r--fs/qnx6/qnx6.h2
-rw-r--r--fs/quota/dquot.c64
-rw-r--r--fs/quota/quota.c70
-rw-r--r--fs/quota/quota_tree.c67
-rw-r--r--fs/quota/quota_v2.c6
-rw-r--r--fs/ramfs/inode.c4
-rw-r--r--fs/read_write.c197
-rw-r--r--fs/reiserfs/file.c4
-rw-r--r--fs/reiserfs/inode.c44
-rw-r--r--fs/reiserfs/ioctl.c4
-rw-r--r--fs/reiserfs/journal.c6
-rw-r--r--fs/reiserfs/stree.c4
-rw-r--r--fs/reiserfs/super.c1
-rw-r--r--fs/reiserfs/tail_conversion.c4
-rw-r--r--fs/reiserfs/xattr.c18
-rw-r--r--fs/select.c8
-rw-r--r--fs/seq_file.c7
-rw-r--r--fs/splice.c37
-rw-r--r--fs/squashfs/block.c4
-rw-r--r--fs/squashfs/cache.c18
-rw-r--r--fs/squashfs/decompressor.c2
-rw-r--r--fs/squashfs/file.c24
-rw-r--r--fs/squashfs/file_direct.c22
-rw-r--r--fs/squashfs/lz4_wrapper.c8
-rw-r--r--fs/squashfs/lzo_wrapper.c8
-rw-r--r--fs/squashfs/page_actor.c4
-rw-r--r--fs/squashfs/page_actor.h2
-rw-r--r--fs/squashfs/super.c2
-rw-r--r--fs/squashfs/symlink.c6
-rw-r--r--fs/squashfs/xz_wrapper.c4
-rw-r--r--fs/squashfs/zlib_wrapper.c4
-rw-r--r--fs/sync.c4
-rw-r--r--fs/sysv/dir.c18
-rw-r--r--fs/sysv/namei.c4
-rw-r--r--fs/ubifs/Makefile1
-rw-r--r--fs/ubifs/file.c54
-rw-r--r--fs/ubifs/misc.c57
-rw-r--r--fs/ubifs/super.c6
-rw-r--r--fs/ubifs/ubifs.h45
-rw-r--r--fs/ubifs/xattr.c1
-rw-r--r--fs/udf/dir.c13
-rw-r--r--fs/udf/file.c6
-rw-r--r--fs/udf/inode.c4
-rw-r--r--fs/udf/namei.c29
-rw-r--r--fs/udf/super.c38
-rw-r--r--fs/udf/udfdecl.h21
-rw-r--r--fs/udf/unicode.c642
-rw-r--r--fs/ufs/balloc.c6
-rw-r--r--fs/ufs/dir.c32
-rw-r--r--fs/ufs/inode.c4
-rw-r--r--fs/ufs/namei.c6
-rw-r--r--fs/ufs/util.c4
-rw-r--r--fs/ufs/util.h2
-rw-r--r--fs/xfs/Makefile3
-rw-r--r--fs/xfs/libxfs/xfs_alloc_btree.c2
-rw-r--r--fs/xfs/libxfs/xfs_attr_sf.h16
-rw-r--r--fs/xfs/libxfs/xfs_bmap.c176
-rw-r--r--fs/xfs/libxfs/xfs_bmap_btree.c5
-rw-r--r--fs/xfs/libxfs/xfs_btree.c32
-rw-r--r--fs/xfs/libxfs/xfs_da_format.h16
-rw-r--r--fs/xfs/libxfs/xfs_dir2.c12
-rw-r--r--fs/xfs/libxfs/xfs_dir2_node.c4
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c4
-rw-r--r--fs/xfs/libxfs/xfs_ialloc_btree.c12
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c170
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.h38
-rw-r--r--fs/xfs/libxfs/xfs_inode_fork.c3
-rw-r--r--fs/xfs/libxfs/xfs_log_format.h19
-rw-r--r--fs/xfs/libxfs/xfs_quota_defs.h3
-rw-r--r--fs/xfs/libxfs/xfs_rtbitmap.c32
-rw-r--r--fs/xfs/libxfs/xfs_sb.h1
-rw-r--r--fs/xfs/libxfs/xfs_shared.h1
-rw-r--r--fs/xfs/xfs_aops.c1047
-rw-r--r--fs/xfs/xfs_aops.h4
-rw-r--r--fs/xfs/xfs_bmap_util.c12
-rw-r--r--fs/xfs/xfs_buf.c2
-rw-r--r--fs/xfs/xfs_buf.h26
-rw-r--r--fs/xfs/xfs_buf_item.c10
-rw-r--r--fs/xfs/xfs_dir2_readdir.c2
-rw-r--r--fs/xfs/xfs_discard.c2
-rw-r--r--fs/xfs/xfs_dquot.c129
-rw-r--r--fs/xfs/xfs_export.c4
-rw-r--r--fs/xfs/xfs_file.c100
-rw-r--r--fs/xfs/xfs_filestream.c4
-rw-r--r--fs/xfs/xfs_fsops.c4
-rw-r--r--fs/xfs/xfs_fsops.h1
-rw-r--r--fs/xfs/xfs_icache.c43
-rw-r--r--fs/xfs/xfs_inode.c200
-rw-r--r--fs/xfs/xfs_inode.h10
-rw-r--r--fs/xfs/xfs_inode_item.c82
-rw-r--r--fs/xfs/xfs_ioctl.c121
-rw-r--r--fs/xfs/xfs_iops.c59
-rw-r--r--fs/xfs/xfs_itable.c22
-rw-r--r--fs/xfs/xfs_linux.h2
-rw-r--r--fs/xfs/xfs_log.c152
-rw-r--r--fs/xfs/xfs_log_recover.c97
-rw-r--r--fs/xfs/xfs_mount.c26
-rw-r--r--fs/xfs/xfs_mount.h35
-rw-r--r--fs/xfs/xfs_ondisk.h117
-rw-r--r--fs/xfs/xfs_pnfs.c4
-rw-r--r--fs/xfs/xfs_pnfs.h2
-rw-r--r--fs/xfs/xfs_qm.c55
-rw-r--r--fs/xfs/xfs_qm.h48
-rw-r--r--fs/xfs/xfs_qm_syscalls.c27
-rw-r--r--fs/xfs/xfs_quotaops.c36
-rw-r--r--fs/xfs/xfs_rtalloc.c2
-rw-r--r--fs/xfs/xfs_super.c574
-rw-r--r--fs/xfs/xfs_super.h4
-rw-r--r--fs/xfs/xfs_sysfs.c78
-rw-r--r--fs/xfs/xfs_trace.h9
-rw-r--r--fs/xfs/xfs_trans.c4
-rw-r--r--fs/xfs/xfs_trans.h1
-rw-r--r--fs/xfs/xfs_trans_ail.c5
-rw-r--r--fs/xfs/xfs_trans_buf.c10
-rw-r--r--fs/xfs/xfs_trans_dquot.c15
-rw-r--r--fs/xfs/xfs_trans_inode.c14
572 files changed, 27203 insertions, 12686 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index e9e04376c..ac9225e86 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -153,7 +153,7 @@ static void v9fs_invalidate_page(struct page *page, unsigned int offset,
* If called with zero offset, we should release
* the private state assocated with the page
*/
- if (offset == 0 && length == PAGE_CACHE_SIZE)
+ if (offset == 0 && length == PAGE_SIZE)
v9fs_fscache_invalidate_page(page);
}
@@ -166,10 +166,10 @@ static int v9fs_vfs_writepage_locked(struct page *page)
struct bio_vec bvec;
int err, len;
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
+ if (page->index == size >> PAGE_SHIFT)
+ len = size & ~PAGE_MASK;
else
- len = PAGE_CACHE_SIZE;
+ len = PAGE_SIZE;
bvec.bv_page = page;
bvec.bv_offset = 0;
@@ -271,7 +271,7 @@ static int v9fs_write_begin(struct file *filp, struct address_space *mapping,
int retval = 0;
struct page *page;
struct v9fs_inode *v9inode;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
struct inode *inode = mapping->host;
@@ -288,11 +288,11 @@ start:
if (PageUptodate(page))
goto out;
- if (len == PAGE_CACHE_SIZE)
+ if (len == PAGE_SIZE)
goto out;
retval = v9fs_fid_readpage(v9inode->writeback_fid, page);
- page_cache_release(page);
+ put_page(page);
if (!retval)
goto start;
out:
@@ -313,7 +313,7 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
/*
* zero out the rest of the area
*/
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned from = pos & (PAGE_SIZE - 1);
zero_user(page, from + copied, len - copied);
flush_dcache_page(page);
@@ -331,7 +331,7 @@ static int v9fs_write_end(struct file *filp, struct address_space *mapping,
}
set_page_dirty(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return copied;
}
diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c
index eadc894fa..b84c291ba 100644
--- a/fs/9p/vfs_file.c
+++ b/fs/9p/vfs_file.c
@@ -421,8 +421,8 @@ v9fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct inode *inode = file_inode(file);
loff_t i_size;
unsigned long pg_start, pg_end;
- pg_start = origin >> PAGE_CACHE_SHIFT;
- pg_end = (origin + retval - 1) >> PAGE_CACHE_SHIFT;
+ pg_start = origin >> PAGE_SHIFT;
+ pg_end = (origin + retval - 1) >> PAGE_SHIFT;
if (inode->i_mapping && inode->i_mapping->nrpages)
invalidate_inode_pages2_range(inode->i_mapping,
pg_start, pg_end);
diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c
index bf495cede..de3ed8629 100644
--- a/fs/9p/vfs_super.c
+++ b/fs/9p/vfs_super.c
@@ -87,7 +87,7 @@ v9fs_fill_super(struct super_block *sb, struct v9fs_session_info *v9ses,
sb->s_op = &v9fs_super_ops;
sb->s_bdi = &v9ses->bdi;
if (v9ses->cache)
- sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_CACHE_SIZE;
+ sb->s_bdi->ra_pages = (VM_MAX_READAHEAD * 1024)/PAGE_SIZE;
sb->s_flags |= MS_ACTIVE | MS_DIRSYNC | MS_NOATIME;
if (!v9ses->cache)
diff --git a/fs/Kconfig b/fs/Kconfig
index 5c1e6f459..349581307 100644
--- a/fs/Kconfig
+++ b/fs/Kconfig
@@ -84,6 +84,8 @@ config MANDATORY_FILE_LOCKING
To the best of my knowledge this is dead code that no one cares about.
+source "fs/crypto/Kconfig"
+
source "fs/notify/Kconfig"
source "fs/quota/Kconfig"
@@ -208,6 +210,7 @@ menuconfig MISC_FILESYSTEMS
if MISC_FILESYSTEMS
+source "fs/orangefs/Kconfig"
source "fs/adfs/Kconfig"
source "fs/affs/Kconfig"
source "fs/ecryptfs/Kconfig"
diff --git a/fs/Makefile b/fs/Makefile
index 0c61756a2..1e728bce9 100644
--- a/fs/Makefile
+++ b/fs/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_EVENTFD) += eventfd.o
obj-$(CONFIG_USERFAULTFD) += userfaultfd.o
obj-$(CONFIG_AIO) += aio.o
obj-$(CONFIG_FS_DAX) += dax.o
+obj-$(CONFIG_FS_ENCRYPTION) += crypto/
obj-$(CONFIG_FILE_LOCKING) += locks.o
obj-$(CONFIG_COMPAT) += compat.o compat_ioctl.o
obj-$(CONFIG_BINFMT_AOUT) += binfmt_aout.o
@@ -106,6 +107,7 @@ obj-$(CONFIG_AUTOFS4_FS) += autofs4/
obj-$(CONFIG_ADFS_FS) += adfs/
obj-$(CONFIG_FUSE_FS) += fuse/
obj-$(CONFIG_OVERLAY_FS) += overlayfs/
+obj-$(CONFIG_ORANGEFS_FS) += orangefs/
obj-$(CONFIG_UDF_FS) += udf/
obj-$(CONFIG_SUN_OPENPROMFS) += openpromfs/
obj-$(CONFIG_OMFS_FS) += omfs/
diff --git a/fs/affs/file.c b/fs/affs/file.c
index 22fc7c802..0cde55005 100644
--- a/fs/affs/file.c
+++ b/fs/affs/file.c
@@ -510,9 +510,9 @@ affs_do_readpage_ofs(struct page *page, unsigned to)
pr_debug("%s(%lu, %ld, 0, %d)\n", __func__, inode->i_ino,
page->index, to);
- BUG_ON(to > PAGE_CACHE_SIZE);
+ BUG_ON(to > PAGE_SIZE);
bsize = AFFS_SB(sb)->s_data_blksize;
- tmp = page->index << PAGE_CACHE_SHIFT;
+ tmp = page->index << PAGE_SHIFT;
bidx = tmp / bsize;
boff = tmp % bsize;
@@ -613,10 +613,10 @@ affs_readpage_ofs(struct file *file, struct page *page)
int err;
pr_debug("%s(%lu, %ld)\n", __func__, inode->i_ino, page->index);
- to = PAGE_CACHE_SIZE;
- if (((page->index + 1) << PAGE_CACHE_SHIFT) > inode->i_size) {
- to = inode->i_size & ~PAGE_CACHE_MASK;
- memset(page_address(page) + to, 0, PAGE_CACHE_SIZE - to);
+ to = PAGE_SIZE;
+ if (((page->index + 1) << PAGE_SHIFT) > inode->i_size) {
+ to = inode->i_size & ~PAGE_MASK;
+ memset(page_address(page) + to, 0, PAGE_SIZE - to);
}
err = affs_do_readpage_ofs(page, to);
@@ -646,7 +646,7 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
return err;
}
- index = pos >> PAGE_CACHE_SHIFT;
+ index = pos >> PAGE_SHIFT;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
@@ -656,10 +656,10 @@ static int affs_write_begin_ofs(struct file *file, struct address_space *mapping
return 0;
/* XXX: inefficient but safe in the face of short writes */
- err = affs_do_readpage_ofs(page, PAGE_CACHE_SIZE);
+ err = affs_do_readpage_ofs(page, PAGE_SIZE);
if (err) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
return err;
}
@@ -677,7 +677,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
u32 tmp;
int written;
- from = pos & (PAGE_CACHE_SIZE - 1);
+ from = pos & (PAGE_SIZE - 1);
to = pos + len;
/*
* XXX: not sure if this can handle short copies (len < copied), but
@@ -692,7 +692,7 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
bh = NULL;
written = 0;
- tmp = (page->index << PAGE_CACHE_SHIFT) + from;
+ tmp = (page->index << PAGE_SHIFT) + from;
bidx = tmp / bsize;
boff = tmp % bsize;
if (boff) {
@@ -788,13 +788,13 @@ static int affs_write_end_ofs(struct file *file, struct address_space *mapping,
done:
affs_brelse(bh);
- tmp = (page->index << PAGE_CACHE_SHIFT) + from;
+ tmp = (page->index << PAGE_SHIFT) + from;
if (tmp > inode->i_size)
inode->i_size = AFFS_I(inode)->mmu_private = tmp;
err_first_bh:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return written;
diff --git a/fs/affs/super.c b/fs/affs/super.c
index 2a6713b6b..d63848631 100644
--- a/fs/affs/super.c
+++ b/fs/affs/super.c
@@ -528,7 +528,7 @@ affs_remount(struct super_block *sb, int *flags, char *data)
char *prefix = NULL;
new_opts = kstrdup(data, GFP_KERNEL);
- if (!new_opts)
+ if (data && !new_opts)
return -ENOMEM;
pr_debug("%s(flags=0x%x,opts=\"%s\")\n", __func__, *flags, data);
@@ -546,7 +546,8 @@ affs_remount(struct super_block *sb, int *flags, char *data)
}
flush_delayed_work(&sbi->sb_work);
- replace_mount_options(sb, new_opts);
+ if (new_opts)
+ replace_mount_options(sb, new_opts);
sbi->s_flags = mount_flags;
sbi->s_mode = mode;
diff --git a/fs/afs/dir.c b/fs/afs/dir.c
index e10e17788..5fda2bc53 100644
--- a/fs/afs/dir.c
+++ b/fs/afs/dir.c
@@ -181,7 +181,7 @@ error:
static inline void afs_dir_put_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
/*
diff --git a/fs/afs/file.c b/fs/afs/file.c
index 999bc3cae..6344aee4a 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -164,7 +164,7 @@ int afs_page_filler(void *data, struct page *page)
_debug("cache said ENOBUFS");
default:
go_on:
- offset = page->index << PAGE_CACHE_SHIFT;
+ offset = page->index << PAGE_SHIFT;
len = min_t(size_t, i_size_read(inode) - offset, PAGE_SIZE);
/* read the contents of the file from the server into the
@@ -319,7 +319,7 @@ static void afs_invalidatepage(struct page *page, unsigned int offset,
BUG_ON(!PageLocked(page));
/* we clean up only if the entire page is being invalidated */
- if (offset == 0 && length == PAGE_CACHE_SIZE) {
+ if (offset == 0 && length == PAGE_SIZE) {
#ifdef CONFIG_AFS_FSCACHE
if (PageFsCache(page)) {
struct afs_vnode *vnode = AFS_FS_I(page->mapping->host);
diff --git a/fs/afs/mntpt.c b/fs/afs/mntpt.c
index ccd0b212e..81dd07535 100644
--- a/fs/afs/mntpt.c
+++ b/fs/afs/mntpt.c
@@ -93,7 +93,7 @@ int afs_mntpt_check_symlink(struct afs_vnode *vnode, struct key *key)
kunmap(page);
out_free:
- page_cache_release(page);
+ put_page(page);
out:
_leave(" = %d", ret);
return ret;
@@ -189,7 +189,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
buf = kmap_atomic(page);
memcpy(devname, buf, size);
kunmap_atomic(buf);
- page_cache_release(page);
+ put_page(page);
page = NULL;
}
@@ -211,7 +211,7 @@ static struct vfsmount *afs_mntpt_do_automount(struct dentry *mntpt)
return mnt;
error:
- page_cache_release(page);
+ put_page(page);
error_no_page:
free_page((unsigned long) options);
error_no_options:
diff --git a/fs/afs/super.c b/fs/afs/super.c
index 81afefe7d..fbdb022b7 100644
--- a/fs/afs/super.c
+++ b/fs/afs/super.c
@@ -315,8 +315,8 @@ static int afs_fill_super(struct super_block *sb,
_enter("");
/* fill in the superblock */
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = AFS_FS_MAGIC;
sb->s_op = &afs_super_ops;
sb->s_bdi = &as->volume->bdi;
diff --git a/fs/afs/write.c b/fs/afs/write.c
index dfef94f70..65de439bd 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -93,10 +93,10 @@ static int afs_fill_page(struct afs_vnode *vnode, struct key *key,
_enter(",,%llu", (unsigned long long)pos);
i_size = i_size_read(&vnode->vfs_inode);
- if (pos + PAGE_CACHE_SIZE > i_size)
+ if (pos + PAGE_SIZE > i_size)
len = i_size - pos;
else
- len = PAGE_CACHE_SIZE;
+ len = PAGE_SIZE;
ret = afs_vnode_fetch_data(vnode, key, pos, len, page);
if (ret < 0) {
@@ -123,9 +123,9 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
struct afs_vnode *vnode = AFS_FS_I(file_inode(file));
struct page *page;
struct key *key = file->private_data;
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
int ret;
_enter("{%x:%u},{%lx},%u,%u",
@@ -151,8 +151,8 @@ int afs_write_begin(struct file *file, struct address_space *mapping,
*pagep = page;
/* page won't leak in error case: it eventually gets cleaned off LRU */
- if (!PageUptodate(page) && len != PAGE_CACHE_SIZE) {
- ret = afs_fill_page(vnode, key, index << PAGE_CACHE_SHIFT, page);
+ if (!PageUptodate(page) && len != PAGE_SIZE) {
+ ret = afs_fill_page(vnode, key, index << PAGE_SHIFT, page);
if (ret < 0) {
kfree(candidate);
_leave(" = %d [prep]", ret);
@@ -266,7 +266,7 @@ int afs_write_end(struct file *file, struct address_space *mapping,
if (PageDirty(page))
_debug("dirtied");
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return copied;
}
@@ -480,7 +480,7 @@ static int afs_writepages_region(struct address_space *mapping,
if (page->index > end) {
*_next = index;
- page_cache_release(page);
+ put_page(page);
_leave(" = 0 [%lx]", *_next);
return 0;
}
@@ -494,7 +494,7 @@ static int afs_writepages_region(struct address_space *mapping,
if (page->mapping != mapping) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
continue;
}
@@ -515,7 +515,7 @@ static int afs_writepages_region(struct address_space *mapping,
ret = afs_write_back_from_locked_page(wb, page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (ret < 0) {
_leave(" = %d", ret);
return ret;
@@ -551,13 +551,13 @@ int afs_writepages(struct address_space *mapping,
&next);
mapping->writeback_index = next;
} else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
- end = (pgoff_t)(LLONG_MAX >> PAGE_CACHE_SHIFT);
+ end = (pgoff_t)(LLONG_MAX >> PAGE_SHIFT);
ret = afs_writepages_region(mapping, wbc, 0, end, &next);
if (wbc->nr_to_write > 0)
mapping->writeback_index = next;
} else {
- start = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ start = wbc->range_start >> PAGE_SHIFT;
+ end = wbc->range_end >> PAGE_SHIFT;
ret = afs_writepages_region(mapping, wbc, start, end, &next);
}
diff --git a/fs/aufs/branch.c b/fs/aufs/branch.c
index 496bdaa99..5259d30d1 100644
--- a/fs/aufs/branch.c
+++ b/fs/aufs/branch.c
@@ -25,7 +25,8 @@ static void au_br_do_free(struct au_branch *br)
fput(br->br_xino.xi_file);
mutex_destroy(&br->br_xino.xi_nondir_mtx);
- AuDebugOn(atomic_read(&br->br_count));
+ AuDebugOn(au_br_count(br));
+ au_br_count_fin(br);
wbr = br->br_wbr;
if (wbr) {
@@ -65,7 +66,7 @@ void au_br_free(struct au_sbinfo *sbinfo)
AuRwMustWriteLock(&sbinfo->si_rwsem);
- bmax = sbinfo->si_bend + 1;
+ bmax = sbinfo->si_bbot + 1;
br = sbinfo->si_branch;
while (bmax--)
au_br_do_free(*br++);
@@ -76,10 +77,10 @@ void au_br_free(struct au_sbinfo *sbinfo)
*/
int au_br_index(struct super_block *sb, aufs_bindex_t br_id)
{
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++)
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++)
if (au_sbr_id(sb, bindex) == br_id)
return bindex;
return -1;
@@ -144,7 +145,7 @@ static struct au_branch *au_br_alloc(struct super_block *sb, int new_nbranch,
err = au_di_realloc(au_di(root), new_nbranch);
if (!err) {
inode = d_inode(root);
- err = au_ii_realloc(au_ii(inode), new_nbranch);
+ err = au_hinode_realloc(au_ii(inode), new_nbranch);
}
if (!err)
return add_branch; /* success */
@@ -186,13 +187,13 @@ out:
static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
{
int err;
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
struct dentry *root, *h_dentry;
struct inode *inode, *h_inode;
root = sb->s_root;
- bend = au_sbend(sb);
- if (unlikely(bend >= 0
+ bbot = au_sbbot(sb);
+ if (unlikely(bbot >= 0
&& au_find_dbindex(root, add->path.dentry) >= 0)) {
err = 1;
if (!remount) {
@@ -204,13 +205,13 @@ static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
err = -ENOSPC; /* -E2BIG; */
if (unlikely(AUFS_BRANCH_MAX <= add->bindex
- || AUFS_BRANCH_MAX - 1 <= bend)) {
+ || AUFS_BRANCH_MAX - 1 <= bbot)) {
pr_err("number of branches exceeded %s\n", add->pathname);
goto out;
}
err = -EDOM;
- if (unlikely(add->bindex < 0 || bend + 1 < add->bindex)) {
+ if (unlikely(add->bindex < 0 || bbot + 1 < add->bindex)) {
pr_err("bad index %d\n", add->bindex);
goto out;
}
@@ -244,11 +245,11 @@ static int test_add(struct super_block *sb, struct au_opt_add *add, int remount)
if (unlikely(err))
goto out;
- if (bend < 0)
+ if (bbot < 0)
return 0; /* success */
err = -EINVAL;
- for (bindex = 0; bindex <= bend; bindex++)
+ for (bindex = 0; bindex <= bbot; bindex++)
if (unlikely(test_overlap(sb, add->path.dentry,
au_h_dptr(root, bindex)))) {
pr_err("%s is overlapped\n", add->pathname);
@@ -370,7 +371,7 @@ static int au_br_init(struct au_branch *br, struct super_block *sb,
br->br_perm = add->perm;
br->br_path = add->path; /* set first, path_get() later */
spin_lock_init(&br->br_dykey_lock);
- atomic_set(&br->br_count, 0);
+ au_br_count_init(br);
atomic_set(&br->br_xino_running, 0);
br->br_id = au_new_br_id(sb);
AuDebugOn(br->br_id < 0);
@@ -402,7 +403,7 @@ out:
}
static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
- struct au_branch *br, aufs_bindex_t bend,
+ struct au_branch *br, aufs_bindex_t bbot,
aufs_bindex_t amount)
{
struct au_branch **brp;
@@ -412,13 +413,13 @@ static void au_br_do_add_brp(struct au_sbinfo *sbinfo, aufs_bindex_t bindex,
brp = sbinfo->si_branch + bindex;
memmove(brp + 1, brp, sizeof(*brp) * amount);
*brp = br;
- sbinfo->si_bend++;
- if (unlikely(bend < 0))
- sbinfo->si_bend = 0;
+ sbinfo->si_bbot++;
+ if (unlikely(bbot < 0))
+ sbinfo->si_bbot = 0;
}
static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
- aufs_bindex_t bend, aufs_bindex_t amount)
+ aufs_bindex_t bbot, aufs_bindex_t amount)
{
struct au_hdentry *hdp;
@@ -427,25 +428,24 @@ static void au_br_do_add_hdp(struct au_dinfo *dinfo, aufs_bindex_t bindex,
hdp = dinfo->di_hdentry + bindex;
memmove(hdp + 1, hdp, sizeof(*hdp) * amount);
au_h_dentry_init(hdp);
- dinfo->di_bend++;
- if (unlikely(bend < 0))
- dinfo->di_bstart = 0;
+ dinfo->di_bbot++;
+ if (unlikely(bbot < 0))
+ dinfo->di_btop = 0;
}
static void au_br_do_add_hip(struct au_iinfo *iinfo, aufs_bindex_t bindex,
- aufs_bindex_t bend, aufs_bindex_t amount)
+ aufs_bindex_t bbot, aufs_bindex_t amount)
{
struct au_hinode *hip;
AuRwMustWriteLock(&iinfo->ii_rwsem);
- hip = iinfo->ii_hinode + bindex;
+ hip = au_hinode(iinfo, bindex);
memmove(hip + 1, hip, sizeof(*hip) * amount);
- hip->hi_inode = NULL;
- au_hn_init(hip);
- iinfo->ii_bend++;
- if (unlikely(bend < 0))
- iinfo->ii_bstart = 0;
+ au_hinode_init(hip);
+ iinfo->ii_bbot++;
+ if (unlikely(bbot < 0))
+ iinfo->ii_btop = 0;
}
static void au_br_do_add(struct super_block *sb, struct au_branch *br,
@@ -453,17 +453,17 @@ static void au_br_do_add(struct super_block *sb, struct au_branch *br,
{
struct dentry *root, *h_dentry;
struct inode *root_inode, *h_inode;
- aufs_bindex_t bend, amount;
+ aufs_bindex_t bbot, amount;
root = sb->s_root;
root_inode = d_inode(root);
- bend = au_sbend(sb);
- amount = bend + 1 - bindex;
+ bbot = au_sbbot(sb);
+ amount = bbot + 1 - bindex;
h_dentry = au_br_dentry(br);
au_sbilist_lock();
- au_br_do_add_brp(au_sbi(sb), bindex, br, bend, amount);
- au_br_do_add_hdp(au_di(root), bindex, bend, amount);
- au_br_do_add_hip(au_ii(root_inode), bindex, bend, amount);
+ au_br_do_add_brp(au_sbi(sb), bindex, br, bbot, amount);
+ au_br_do_add_hdp(au_di(root), bindex, bbot, amount);
+ au_br_do_add_hip(au_ii(root_inode), bindex, bbot, amount);
au_set_h_dptr(root, bindex, dget(h_dentry));
h_inode = d_inode(h_dentry);
au_set_h_iptr(root_inode, bindex, au_igrab(h_inode), /*flags*/0);
@@ -473,7 +473,7 @@ static void au_br_do_add(struct super_block *sb, struct au_branch *br,
int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
{
int err;
- aufs_bindex_t bend, add_bindex;
+ aufs_bindex_t bbot, add_bindex;
struct dentry *root, *h_dentry;
struct inode *root_inode;
struct au_branch *add_branch;
@@ -489,8 +489,8 @@ int au_br_add(struct super_block *sb, struct au_opt_add *add, int remount)
goto out; /* success */
}
- bend = au_sbend(sb);
- add_branch = au_br_alloc(sb, bend + 2, add->perm);
+ bbot = au_sbbot(sb);
+ add_branch = au_br_alloc(sb, bbot + 2, add->perm);
err = PTR_ERR(add_branch);
if (IS_ERR(add_branch))
goto out;
@@ -566,7 +566,7 @@ static unsigned long long au_farray_cb(struct super_block *sb, void *a,
static struct file **au_farray_alloc(struct super_block *sb,
unsigned long long *max)
{
- *max = atomic_long_read(&au_sbi(sb)->si_nfiles);
+ *max = au_nfiles(sb);
return au_array_alloc(max, au_farray_cb, sb, /*arg*/NULL);
}
@@ -592,16 +592,16 @@ static void au_farray_free(struct file **a, unsigned long long max)
pr_info(fmt, ##__VA_ARGS__); \
} while (0)
-static int au_test_ibusy(struct inode *inode, aufs_bindex_t bstart,
- aufs_bindex_t bend)
+static int au_test_ibusy(struct inode *inode, aufs_bindex_t btop,
+ aufs_bindex_t bbot)
{
- return (inode && !S_ISDIR(inode->i_mode)) || bstart == bend;
+ return (inode && !S_ISDIR(inode->i_mode)) || btop == bbot;
}
-static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t bstart,
- aufs_bindex_t bend)
+static int au_test_dbusy(struct dentry *dentry, aufs_bindex_t btop,
+ aufs_bindex_t bbot)
{
- return au_test_ibusy(d_inode(dentry), bstart, bend);
+ return au_test_ibusy(d_inode(dentry), btop, bbot);
}
/*
@@ -611,7 +611,7 @@ static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
unsigned int sigen, const unsigned int verbose)
{
int err, i, j, ndentry;
- aufs_bindex_t bstart, bend;
+ aufs_bindex_t btop, bbot;
struct au_dcsub_pages dpages;
struct au_dpage *dpage;
struct dentry *d;
@@ -651,12 +651,12 @@ static int test_dentry_busy(struct dentry *root, aufs_bindex_t bindex,
}
/* AuDbgDentry(d); */
- bstart = au_dbstart(d);
- bend = au_dbend(d);
- if (bstart <= bindex
- && bindex <= bend
+ btop = au_dbtop(d);
+ bbot = au_dbbot(d);
+ if (btop <= bindex
+ && bindex <= bbot
&& au_h_dptr(d, bindex)
- && au_test_dbusy(d, bstart, bend)) {
+ && au_test_dbusy(d, btop, bbot)) {
err = -EBUSY;
AuVerbose(verbose, "busy %pd\n", d);
AuDbgDentry(d);
@@ -677,7 +677,7 @@ static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
int err;
unsigned long long max, ull;
struct inode *i, **array;
- aufs_bindex_t bstart, bend;
+ aufs_bindex_t btop, bbot;
array = au_iarray_alloc(sb, &max);
err = PTR_ERR(array);
@@ -708,12 +708,12 @@ static int test_inode_busy(struct super_block *sb, aufs_bindex_t bindex,
}
}
- bstart = au_ibstart(i);
- bend = au_ibend(i);
- if (bstart <= bindex
- && bindex <= bend
+ btop = au_ibtop(i);
+ bbot = au_ibbot(i);
+ if (btop <= bindex
+ && bindex <= bbot
&& au_h_iptr(i, bindex)
- && au_test_ibusy(i, bstart, bend)) {
+ && au_test_ibusy(i, btop, bbot)) {
err = -EBUSY;
AuVerbose(verbose, "busy i%lu\n", i->i_ino);
AuDbgInode(i);
@@ -749,7 +749,7 @@ static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
{
int err;
unsigned char matched, root;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct au_fidir *fidir;
struct au_hfile *hfile;
@@ -765,8 +765,8 @@ static int test_dir_busy(struct file *file, aufs_bindex_t br_id,
matched = 0;
fidir = au_fi(file)->fi_hdir;
AuDebugOn(!fidir);
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file); bindex <= bend; bindex++) {
+ bbot = au_fbbot_dir(file);
+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++) {
hfile = fidir->fd_hfile + bindex;
if (!hfile->hf_file)
continue;
@@ -788,7 +788,7 @@ static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
{
int err, idx;
unsigned long long ull, max;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
struct file *file, **array;
struct dentry *root;
struct au_hfile *hfile;
@@ -809,7 +809,7 @@ static int test_file_busy(struct super_block *sb, aufs_bindex_t br_id,
/* AuDbg("%pD\n", file); */
fi_read_lock(file);
- bstart = au_fbstart(file);
+ btop = au_fbtop(file);
if (!d_is_dir(file->f_path.dentry)) {
hfile = &au_fi(file)->fi_htop;
if (hfile->hf_br->br_id == br_id)
@@ -832,7 +832,7 @@ static void br_del_file(struct file **to_free, unsigned long long opened,
aufs_bindex_t br_id)
{
unsigned long long ull;
- aufs_bindex_t bindex, bstart, bend, bfound;
+ aufs_bindex_t bindex, btop, bbot, bfound;
struct file *file;
struct au_fidir *fidir;
struct au_hfile *hfile;
@@ -848,9 +848,9 @@ static void br_del_file(struct file **to_free, unsigned long long opened,
fidir = au_fi(file)->fi_hdir;
AuDebugOn(!fidir);
fi_write_lock(file);
- bstart = au_fbstart(file);
- bend = au_fbend_dir(file);
- for (bindex = bstart; bindex <= bend; bindex++) {
+ btop = au_fbtop(file);
+ bbot = au_fbbot_dir(file);
+ for (bindex = btop; bindex <= bbot; bindex++) {
hfile = fidir->fd_hfile + bindex;
if (!hfile->hf_file)
continue;
@@ -862,10 +862,10 @@ static void br_del_file(struct file **to_free, unsigned long long opened,
}
AuDebugOn(bfound < 0);
au_set_h_fptr(file, bfound, NULL);
- if (bfound == bstart) {
- for (bstart++; bstart <= bend; bstart++)
- if (au_hf_dir(file, bstart)) {
- au_set_fbstart(file, bstart);
+ if (bfound == btop) {
+ for (btop++; btop <= bbot; btop++)
+ if (au_hf_dir(file, btop)) {
+ au_set_fbtop(file, btop);
break;
}
}
@@ -875,59 +875,58 @@ static void br_del_file(struct file **to_free, unsigned long long opened,
static void au_br_do_del_brp(struct au_sbinfo *sbinfo,
const aufs_bindex_t bindex,
- const aufs_bindex_t bend)
+ const aufs_bindex_t bbot)
{
struct au_branch **brp, **p;
AuRwMustWriteLock(&sbinfo->si_rwsem);
brp = sbinfo->si_branch + bindex;
- if (bindex < bend)
- memmove(brp, brp + 1, sizeof(*brp) * (bend - bindex));
- sbinfo->si_branch[0 + bend] = NULL;
- sbinfo->si_bend--;
+ if (bindex < bbot)
+ memmove(brp, brp + 1, sizeof(*brp) * (bbot - bindex));
+ sbinfo->si_branch[0 + bbot] = NULL;
+ sbinfo->si_bbot--;
- p = krealloc(sbinfo->si_branch, sizeof(*p) * bend, AuGFP_SBILIST);
+ p = krealloc(sbinfo->si_branch, sizeof(*p) * bbot, AuGFP_SBILIST);
if (p)
sbinfo->si_branch = p;
/* harmless error */
}
static void au_br_do_del_hdp(struct au_dinfo *dinfo, const aufs_bindex_t bindex,
- const aufs_bindex_t bend)
+ const aufs_bindex_t bbot)
{
struct au_hdentry *hdp, *p;
AuRwMustWriteLock(&dinfo->di_rwsem);
hdp = dinfo->di_hdentry;
- if (bindex < bend)
+ if (bindex < bbot)
memmove(hdp + bindex, hdp + bindex + 1,
- sizeof(*hdp) * (bend - bindex));
- hdp[0 + bend].hd_dentry = NULL;
- dinfo->di_bend--;
+ sizeof(*hdp) * (bbot - bindex));
+ hdp[0 + bbot].hd_dentry = NULL;
+ dinfo->di_bbot--;
- p = krealloc(hdp, sizeof(*p) * bend, AuGFP_SBILIST);
+ p = krealloc(hdp, sizeof(*p) * bbot, AuGFP_SBILIST);
if (p)
dinfo->di_hdentry = p;
/* harmless error */
}
static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
- const aufs_bindex_t bend)
+ const aufs_bindex_t bbot)
{
struct au_hinode *hip, *p;
AuRwMustWriteLock(&iinfo->ii_rwsem);
- hip = iinfo->ii_hinode + bindex;
- if (bindex < bend)
- memmove(hip, hip + 1, sizeof(*hip) * (bend - bindex));
- iinfo->ii_hinode[0 + bend].hi_inode = NULL;
- au_hn_init(iinfo->ii_hinode + bend);
- iinfo->ii_bend--;
+ hip = au_hinode(iinfo, bindex);
+ if (bindex < bbot)
+ memmove(hip, hip + 1, sizeof(*hip) * (bbot - bindex));
+ /* au_hinode_init(au_hinode(iinfo, bbot)); */
+ iinfo->ii_bbot--;
- p = krealloc(iinfo->ii_hinode, sizeof(*p) * bend, AuGFP_SBILIST);
+ p = krealloc(iinfo->ii_hinode, sizeof(*p) * bbot, AuGFP_SBILIST);
if (p)
iinfo->ii_hinode = p;
/* harmless error */
@@ -936,7 +935,7 @@ static void au_br_do_del_hip(struct au_iinfo *iinfo, const aufs_bindex_t bindex,
static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
struct au_branch *br)
{
- aufs_bindex_t bend;
+ aufs_bindex_t bbot;
struct au_sbinfo *sbinfo;
struct dentry *root, *h_root;
struct inode *inode, *h_inode;
@@ -947,7 +946,7 @@ static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
root = sb->s_root;
inode = d_inode(root);
sbinfo = au_sbi(sb);
- bend = sbinfo->si_bend;
+ bbot = sbinfo->si_bbot;
h_root = au_h_dptr(root, bindex);
hinode = au_hi(inode, bindex);
@@ -955,9 +954,9 @@ static void au_br_do_del(struct super_block *sb, aufs_bindex_t bindex,
au_hiput(hinode);
au_sbilist_lock();
- au_br_do_del_brp(sbinfo, bindex, bend);
- au_br_do_del_hdp(au_di(root), bindex, bend);
- au_br_do_del_hip(au_ii(inode), bindex, bend);
+ au_br_do_del_brp(sbinfo, bindex, bbot);
+ au_br_do_del_hdp(au_di(root), bindex, bbot);
+ au_br_do_del_hip(au_ii(inode), bindex, bbot);
au_sbilist_unlock();
dput(h_root);
@@ -976,7 +975,7 @@ int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
int err, rerr, i;
unsigned long long opened;
unsigned int mnt_flags;
- aufs_bindex_t bindex, bend, br_id;
+ aufs_bindex_t bindex, bbot, br_id;
unsigned char do_wh, verbose;
struct au_branch *br;
struct au_wbr *wbr;
@@ -1000,8 +999,8 @@ int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
err = -EBUSY;
mnt_flags = au_mntflags(sb);
verbose = !!au_opt_test(mnt_flags, VERBOSE);
- bend = au_sbend(sb);
- if (unlikely(!bend)) {
+ bbot = au_sbbot(sb);
+ if (unlikely(!bbot)) {
AuVerbose(verbose, "no more branches left\n");
goto out;
}
@@ -1009,7 +1008,7 @@ int au_br_del(struct super_block *sb, struct au_opt_del *del, int remount)
AuDebugOn(!path_equal(&br->br_path, &del->h_path));
br_id = br->br_id;
- opened = atomic_read(&br->br_count);
+ opened = au_br_count(br);
if (unlikely(opened)) {
to_free = au_array_alloc(&opened, empty_cb, sb, NULL);
err = PTR_ERR(to_free);
@@ -1089,7 +1088,7 @@ out:
static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
{
int err;
- aufs_bindex_t bstart, bend;
+ aufs_bindex_t btop, bbot;
struct aufs_ibusy ibusy;
struct inode *inode, *h_inode;
@@ -1108,7 +1107,7 @@ static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
err = -EINVAL;
si_read_lock(sb, AuLock_FLUSH);
- if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbend(sb)))
+ if (unlikely(ibusy.bindex < 0 || ibusy.bindex > au_sbbot(sb)))
goto out_unlock;
err = 0;
@@ -1120,11 +1119,11 @@ static int au_ibusy(struct super_block *sb, struct aufs_ibusy __user *arg)
goto out_unlock;
ii_read_lock_child(inode);
- bstart = au_ibstart(inode);
- bend = au_ibend(inode);
- if (bstart <= ibusy.bindex && ibusy.bindex <= bend) {
+ btop = au_ibtop(inode);
+ bbot = au_ibbot(inode);
+ if (btop <= ibusy.bindex && ibusy.bindex <= bbot) {
h_inode = au_h_iptr(inode, ibusy.bindex);
- if (h_inode && au_test_ibusy(inode, bstart, bend))
+ if (h_inode && au_test_ibusy(inode, btop, bbot))
ibusy.h_ino = h_inode->i_ino;
}
ii_read_unlock(inode);
diff --git a/fs/aufs/branch.h b/fs/aufs/branch.h
index 4c52ae166..32a4d8f36 100644
--- a/fs/aufs/branch.h
+++ b/fs/aufs/branch.h
@@ -86,7 +86,7 @@ struct au_branch {
struct path br_path;
spinlock_t br_dykey_lock;
struct au_dykey *br_dykey[AuBrDynOp];
- atomic_t br_count;
+ struct percpu_counter br_count;
struct au_wbr *br_wbr;
struct au_br_fhsm *br_fhsm;
@@ -121,6 +121,31 @@ static inline struct super_block *au_br_sb(struct au_branch *br)
return au_br_mnt(br)->mnt_sb;
}
+static inline void au_br_get(struct au_branch *br)
+{
+ percpu_counter_inc(&br->br_count);
+}
+
+static inline void au_br_put(struct au_branch *br)
+{
+ percpu_counter_dec(&br->br_count);
+}
+
+static inline s64 au_br_count(struct au_branch *br)
+{
+ return percpu_counter_sum(&br->br_count);
+}
+
+static inline void au_br_count_init(struct au_branch *br)
+{
+ percpu_counter_init(&br->br_count, 0, GFP_NOFS);
+}
+
+static inline void au_br_count_fin(struct au_branch *br)
+{
+ percpu_counter_destroy(&br->br_count);
+}
+
static inline int au_br_rdonly(struct au_branch *br)
{
return ((au_br_sb(br)->s_flags & MS_RDONLY)
@@ -216,9 +241,14 @@ struct super_block *au_sbr_sb(struct super_block *sb, aufs_bindex_t bindex)
return au_br_sb(au_sbr(sb, bindex));
}
+static inline void au_sbr_get(struct super_block *sb, aufs_bindex_t bindex)
+{
+ au_br_get(au_sbr(sb, bindex));
+}
+
static inline void au_sbr_put(struct super_block *sb, aufs_bindex_t bindex)
{
- atomic_dec(&au_sbr(sb, bindex)->br_count);
+ au_br_put(au_sbr(sb, bindex));
}
static inline int au_sbr_perm(struct super_block *sb, aufs_bindex_t bindex)
diff --git a/fs/aufs/cpup.c b/fs/aufs/cpup.c
index 9ff0963ad..dbf96fd60 100644
--- a/fs/aufs/cpup.c
+++ b/fs/aufs/cpup.c
@@ -27,7 +27,7 @@ void au_cpup_attr_timesizes(struct inode *inode)
{
struct inode *h_inode;
- h_inode = au_h_iptr(inode, au_ibstart(inode));
+ h_inode = au_h_iptr(inode, au_ibtop(inode));
fsstack_copy_attr_times(inode, h_inode);
fsstack_copy_inode_size(inode, h_inode);
}
@@ -36,10 +36,10 @@ void au_cpup_attr_nlink(struct inode *inode, int force)
{
struct inode *h_inode;
struct super_block *sb;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
sb = inode->i_sb;
- bindex = au_ibstart(inode);
+ bindex = au_ibtop(inode);
h_inode = au_h_iptr(inode, bindex);
if (!force
&& !S_ISDIR(h_inode->i_mode)
@@ -62,8 +62,8 @@ void au_cpup_attr_nlink(struct inode *inode, int force)
* it may includes whplink directory.
*/
if (S_ISDIR(h_inode->i_mode)) {
- bend = au_ibend(inode);
- for (bindex++; bindex <= bend; bindex++) {
+ bbot = au_ibbot(inode);
+ for (bindex++; bindex <= bbot; bindex++) {
h_inode = au_h_iptr(inode, bindex);
if (h_inode)
au_add_nlink(inode, h_inode);
@@ -75,7 +75,7 @@ void au_cpup_attr_changeable(struct inode *inode)
{
struct inode *h_inode;
- h_inode = au_h_iptr(inode, au_ibstart(inode));
+ h_inode = au_h_iptr(inode, au_ibtop(inode));
inode->i_mode = h_inode->i_mode;
inode->i_uid = h_inode->i_uid;
inode->i_gid = h_inode->i_gid;
@@ -97,7 +97,7 @@ void au_cpup_attr_all(struct inode *inode, int force)
{
struct inode *h_inode;
- h_inode = au_h_iptr(inode, au_ibstart(inode));
+ h_inode = au_h_iptr(inode, au_ibtop(inode));
au_cpup_attr_changeable(inode);
if (inode->i_nlink > 0)
au_cpup_attr_nlink(inode, force);
@@ -554,7 +554,7 @@ static int au_do_cpup_dir(struct au_cp_generic *cpg, struct dentry *dst_parent,
* particularry setattr case
*/
dir = d_inode(dst_parent);
- if (au_ibstart(dir) == cpg->bdst)
+ if (au_ibtop(dir) == cpg->bdst)
au_cpup_attr_nlink(dir, /*force*/1);
inode = d_inode(cpg->dentry);
au_cpup_attr_nlink(inode, /*force*/1);
@@ -715,7 +715,7 @@ out:
static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
{
int err, rerr;
- aufs_bindex_t old_ibstart;
+ aufs_bindex_t old_ibtop;
unsigned char isdir, plink;
struct dentry *h_src, *h_dst, *h_parent;
struct inode *dst_inode, *h_dir, *inode, *delegated, *src_inode;
@@ -803,7 +803,7 @@ static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
}
isdir = S_ISDIR(inode->i_mode);
- old_ibstart = au_ibstart(inode);
+ old_ibtop = au_ibtop(inode);
err = cpup_entry(cpg, dst_parent, &a->h_src_attr);
if (unlikely(err))
goto out_rev;
@@ -820,7 +820,7 @@ static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
goto out_rev;
}
- if (cpg->bdst < old_ibstart) {
+ if (cpg->bdst < old_ibtop) {
if (S_ISREG(inode->i_mode)) {
err = au_dy_iaop(inode, cpg->bdst, dst_inode);
if (unlikely(err)) {
@@ -830,9 +830,9 @@ static int au_cpup_single(struct au_cp_generic *cpg, struct dentry *dst_parent)
goto out_rev;
}
}
- au_set_ibstart(inode, cpg->bdst);
+ au_set_ibtop(inode, cpg->bdst);
} else
- au_set_ibend(inode, cpg->bdst);
+ au_set_ibbot(inode, cpg->bdst);
au_set_h_iptr(inode, cpg->bdst, au_igrab(dst_inode),
au_hi_flags(inode, isdir));
@@ -988,7 +988,7 @@ static int au_cpup_simple(struct au_cp_generic *cpg)
/* revert */
au_set_h_dptr(dentry, cpg->bdst, NULL);
- au_set_dbstart(dentry, cpg->bsrc);
+ au_set_dbtop(dentry, cpg->bsrc);
}
return err;
@@ -1050,20 +1050,20 @@ out:
int au_sio_cpup_simple(struct au_cp_generic *cpg)
{
- aufs_bindex_t bsrc, bend;
+ aufs_bindex_t bsrc, bbot;
struct dentry *dentry, *h_dentry;
if (cpg->bsrc < 0) {
dentry = cpg->dentry;
- bend = au_dbend(dentry);
- for (bsrc = cpg->bdst + 1; bsrc <= bend; bsrc++) {
+ bbot = au_dbbot(dentry);
+ for (bsrc = cpg->bdst + 1; bsrc <= bbot; bsrc++) {
h_dentry = au_h_dptr(dentry, bsrc);
if (h_dentry) {
AuDebugOn(d_is_negative(h_dentry));
break;
}
}
- AuDebugOn(bsrc > bend);
+ AuDebugOn(bsrc > bbot);
cpg->bsrc = bsrc;
}
AuDebugOn(cpg->bsrc <= cpg->bdst);
@@ -1095,10 +1095,10 @@ static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
AuRwMustWriteLock(&dinfo->di_rwsem);
bsrc_orig = cpg->bsrc;
- cpg->bsrc = dinfo->di_bstart;
+ cpg->bsrc = dinfo->di_btop;
hdp = dinfo->di_hdentry;
h_d_dst = hdp[0 + cpg->bdst].hd_dentry;
- dinfo->di_bstart = cpg->bdst;
+ dinfo->di_btop = cpg->bdst;
hdp[0 + cpg->bdst].hd_dentry = wh_dentry;
h_d_start = NULL;
if (file) {
@@ -1115,7 +1115,7 @@ static int au_do_cpup_wh(struct au_cp_generic *cpg, struct dentry *wh_dentry,
hdp[0 + cpg->bsrc].hd_dentry = h_d_start;
}
hdp[0 + cpg->bdst].hd_dentry = h_d_dst;
- dinfo->di_bstart = cpg->bsrc;
+ dinfo->di_btop = cpg->bsrc;
cpg->bsrc = bsrc_orig;
return err;
@@ -1299,7 +1299,7 @@ int au_cp_dirs(struct dentry *dentry, aufs_bindex_t bdst,
h_dentry = au_h_dptr(d, bdst);
if (!h_dentry || d_is_negative(h_dentry)) {
if (h_dentry)
- au_update_dbstart(d);
+ au_update_dbtop(d);
au_pin_set_dentry(&pin, d);
err = au_do_pin(&pin);
diff --git a/fs/aufs/dbgaufs.c b/fs/aufs/dbgaufs.c
index 0aefb5ed8..21ac77398 100644
--- a/fs/aufs/dbgaufs.c
+++ b/fs/aufs/dbgaufs.c
@@ -225,7 +225,7 @@ static int dbgaufs_xino_open(struct inode *inode, struct file *file)
sbinfo = inode->i_private;
sb = sbinfo->si_sb;
si_noflush_read_lock(sb);
- if (l <= au_sbend(sb)) {
+ if (l <= au_sbbot(sb)) {
xf = au_sbr(sb, (aufs_bindex_t)l)->br_xino.xi_file;
err = dbgaufs_xi_open(xf, file, /*do_fcnt*/1);
} else
@@ -245,15 +245,15 @@ static const struct file_operations dbgaufs_xino_fop = {
void dbgaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
{
- aufs_bindex_t bend;
+ aufs_bindex_t bbot;
struct au_branch *br;
struct au_xino_file *xi;
if (!au_sbi(sb)->si_dbgaufs)
return;
- bend = au_sbend(sb);
- for (; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
xi = &br->br_xino;
debugfs_remove(xi->xi_dbgaufs);
@@ -267,7 +267,7 @@ void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
struct dentry *parent;
struct au_branch *br;
struct au_xino_file *xi;
- aufs_bindex_t bend;
+ aufs_bindex_t bbot;
char name[sizeof(DbgaufsXi_PREFIX) + 5]; /* "xi" bindex NULL */
sbinfo = au_sbi(sb);
@@ -275,8 +275,8 @@ void dbgaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
if (!parent)
return;
- bend = au_sbend(sb);
- for (; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (; bindex <= bbot; bindex++) {
snprintf(name, sizeof(name), DbgaufsXi_PREFIX "%d", bindex);
br = au_sbr(sb, bindex);
xi = &br->br_xino;
diff --git a/fs/aufs/debug.c b/fs/aufs/debug.c
index 4529831a9..14e69cbc8 100644
--- a/fs/aufs/debug.c
+++ b/fs/aufs/debug.c
@@ -128,25 +128,24 @@ static int do_pri_inode(aufs_bindex_t bindex, struct inode *inode, int hn,
void au_dpri_inode(struct inode *inode)
{
struct au_iinfo *iinfo;
+ struct au_hinode *hi;
aufs_bindex_t bindex;
int err, hn;
err = do_pri_inode(-1, inode, -1, NULL);
- if (err || !au_test_aufs(inode->i_sb))
+ if (err || !au_test_aufs(inode->i_sb) || is_bad_inode(inode))
return;
iinfo = au_ii(inode);
- if (!iinfo)
- return;
- dpri("i-1: bstart %d, bend %d, gen %d\n",
- iinfo->ii_bstart, iinfo->ii_bend, au_iigen(inode, NULL));
- if (iinfo->ii_bstart < 0)
+ dpri("i-1: btop %d, bbot %d, gen %d\n",
+ iinfo->ii_btop, iinfo->ii_bbot, au_iigen(inode, NULL));
+ if (iinfo->ii_btop < 0)
return;
hn = 0;
- for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend; bindex++) {
- hn = !!au_hn(iinfo->ii_hinode + bindex);
- do_pri_inode(bindex, iinfo->ii_hinode[0 + bindex].hi_inode, hn,
- iinfo->ii_hinode[0 + bindex].hi_whdentry);
+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot; bindex++) {
+ hi = au_hinode(iinfo, bindex);
+ hn = !!au_hn(hi);
+ do_pri_inode(bindex, hi->hi_inode, hn, hi->hi_whdentry);
}
}
@@ -165,6 +164,8 @@ static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
struct dentry *wh = NULL;
int hn;
struct au_iinfo *iinfo;
+ struct au_hinode *hi;
+ struct inode *inode;
if (!dentry || IS_ERR(dentry)) {
dpri("d%d: err %ld\n", bindex, PTR_ERR(dentry));
@@ -178,16 +179,18 @@ static int do_pri_dentry(aufs_bindex_t bindex, struct dentry *dentry)
au_dcount(dentry), dentry->d_flags,
d_unhashed(dentry) ? "un" : "");
hn = -1;
+ inode = NULL;
if (bindex >= 0
&& d_is_positive(dentry)
- && au_test_aufs(dentry->d_sb)) {
- iinfo = au_ii(d_inode(dentry));
- if (iinfo) {
- hn = !!au_hn(iinfo->ii_hinode + bindex);
- wh = iinfo->ii_hinode[0 + bindex].hi_whdentry;
- }
+ && au_test_aufs(dentry->d_sb))
+ inode = d_inode(dentry);
+ if (inode && !is_bad_inode(inode)) {
+ iinfo = au_ii(inode);
+ hi = au_hinode(iinfo, bindex);
+ hn = !!au_hn(hi);
+ wh = hi->hi_whdentry;
}
- do_pri_inode(bindex, d_inode(dentry), hn, wh);
+ do_pri_inode(bindex, inode, hn, wh);
return 0;
}
@@ -205,14 +208,14 @@ void au_dpri_dentry(struct dentry *dentry)
dinfo = au_di(dentry);
if (!dinfo)
return;
- dpri("d-1: bstart %d, bend %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
- dinfo->di_bstart, dinfo->di_bend,
+ dpri("d-1: btop %d, bbot %d, bwh %d, bdiropq %d, gen %d, tmp %d\n",
+ dinfo->di_btop, dinfo->di_bbot,
dinfo->di_bwh, dinfo->di_bdiropq, au_digen(dentry),
dinfo->di_tmpfile);
- if (dinfo->di_bstart < 0)
+ if (dinfo->di_btop < 0)
return;
hdp = dinfo->di_hdentry;
- for (bindex = dinfo->di_bstart; bindex <= dinfo->di_bend; bindex++)
+ for (bindex = dinfo->di_btop; bindex <= dinfo->di_bbot; bindex++)
do_pri_dentry(bindex, hdp[0 + bindex].hd_dentry);
}
@@ -284,10 +287,10 @@ static int do_pri_br(aufs_bindex_t bindex, struct au_branch *br)
if (!sb || IS_ERR(sb))
goto out;
- dpri("s%d: {perm 0x%x, id %d, cnt %d, wbr %p}, "
+ dpri("s%d: {perm 0x%x, id %d, cnt %lld, wbr %p}, "
"%s, dev 0x%02x%02x, flags 0x%lx, cnt %d, active %d, "
"xino %d\n",
- bindex, br->br_perm, br->br_id, atomic_read(&br->br_count),
+ bindex, br->br_perm, br->br_id, au_br_count(br),
br->br_wbr, au_sbtype(sb), MAJOR(sb->s_dev), MINOR(sb->s_dev),
sb->s_flags, sb->s_count,
atomic_read(&sb->s_active), !!br->br_xino.xi_file);
@@ -318,9 +321,9 @@ void au_dpri_sb(struct super_block *sb)
a->mnt.mnt_sb = sb;
a->fake.br_path.mnt = &a->mnt;
- atomic_set(&a->fake.br_count, 0);
- smp_mb(); /* atomic_set */
+ au_br_count_init(&a->fake);
err = do_pri_br(-1, &a->fake);
+ au_br_count_fin(&a->fake);
kfree(a);
dpri("dev 0x%x\n", sb->s_dev);
if (err || !au_test_aufs(sb))
@@ -329,10 +332,11 @@ void au_dpri_sb(struct super_block *sb)
sbinfo = au_sbi(sb);
if (!sbinfo)
return;
- dpri("nw %d, gen %u, kobj %d\n",
- atomic_read(&sbinfo->si_nowait.nw_len), sbinfo->si_generation,
+ dpri("nw %lld, gen %u, kobj %d\n",
+ percpu_counter_sum(&sbinfo->si_nowait.nw_len),
+ sbinfo->si_generation,
atomic_read(&sbinfo->si_kobj.kref.refcount));
- for (bindex = 0; bindex <= sbinfo->si_bend; bindex++)
+ for (bindex = 0; bindex <= sbinfo->si_bbot; bindex++)
do_pri_br(bindex, sbinfo->si_branch[0 + bindex]);
}
@@ -342,21 +346,21 @@ void __au_dbg_verify_dinode(struct dentry *dentry, const char *func, int line)
{
struct inode *h_inode, *inode = d_inode(dentry);
struct dentry *h_dentry;
- aufs_bindex_t bindex, bend, bi;
+ aufs_bindex_t bindex, bbot, bi;
if (!inode /* || au_di(dentry)->di_lsc == AuLsc_DI_TMP */)
return;
- bend = au_dbend(dentry);
- bi = au_ibend(inode);
- if (bi < bend)
- bend = bi;
- bindex = au_dbstart(dentry);
- bi = au_ibstart(inode);
+ bbot = au_dbbot(dentry);
+ bi = au_ibbot(inode);
+ if (bi < bbot)
+ bbot = bi;
+ bindex = au_dbtop(dentry);
+ bi = au_ibtop(inode);
if (bi > bindex)
bindex = bi;
- for (; bindex <= bend; bindex++) {
+ for (; bindex <= bbot; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
diff --git a/fs/aufs/dentry.c b/fs/aufs/dentry.c
index e65fadd05..ce2a13632 100644
--- a/fs/aufs/dentry.c
+++ b/fs/aufs/dentry.c
@@ -52,7 +52,7 @@ au_do_lookup(struct dentry *h_parent, struct dentry *dentry,
goto out;
/* We found a whiteout */
- /* au_set_dbend(dentry, bindex); */
+ /* au_set_dbbot(dentry, bindex); */
au_set_dbwh(dentry, bindex);
if (!allow_neg)
return NULL; /* success */
@@ -77,10 +77,10 @@ real_lookup:
|| (args->type && args->type != (h_inode->i_mode & S_IFMT)))
goto out_neg;
- if (au_dbend(dentry) <= bindex)
- au_set_dbend(dentry, bindex);
- if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
- au_set_dbstart(dentry, bindex);
+ if (au_dbbot(dentry) <= bindex)
+ au_set_dbbot(dentry, bindex);
+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
+ au_set_dbtop(dentry, bindex);
au_set_h_dptr(dentry, bindex, h_dentry);
if (!d_is_dir(h_dentry)
@@ -119,7 +119,7 @@ static int au_test_shwh(struct super_block *sb, const struct qstr *name)
* otherwise an error.
* can be called at unlinking with @type is zero.
*/
-int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type)
+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop, mode_t type)
{
int npositive, err;
aufs_bindex_t bindex, btail, bdiropq;
@@ -150,7 +150,7 @@ int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type)
npositive = 0;
parent = dget_parent(dentry);
btail = au_dbtaildir(parent);
- for (bindex = bstart; bindex <= btail; bindex++) {
+ for (bindex = btop; bindex <= btail; bindex++) {
struct dentry *h_parent, *h_dentry;
struct inode *h_inode, *h_dir;
@@ -201,11 +201,11 @@ int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type)
if (npositive) {
AuLabel(positive);
- au_update_dbstart(dentry);
+ au_update_dbtop(dentry);
}
err = npositive;
if (unlikely(!au_opt_test(au_mntflags(sb), UDBA_NONE)
- && au_dbstart(dentry) < 0)) {
+ && au_dbtop(dentry) < 0)) {
err = -EIO;
AuIOErr("both of real entry and whiteout found, %pd, err %d\n",
dentry, err);
@@ -267,10 +267,10 @@ int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh)
}
err = 0;
- if (bindex < au_dbstart(dentry))
- au_set_dbstart(dentry, bindex);
- if (au_dbend(dentry) < bindex)
- au_set_dbend(dentry, bindex);
+ if (bindex < au_dbtop(dentry))
+ au_set_dbtop(dentry, bindex);
+ if (au_dbbot(dentry) < bindex)
+ au_set_dbbot(dentry, bindex);
au_set_h_dptr(dentry, bindex, h_dentry);
out:
@@ -382,7 +382,7 @@ int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
{
int err;
- aufs_bindex_t new_bindex, bindex, bend, bwh, bdiropq;
+ aufs_bindex_t new_bindex, bindex, bbot, bwh, bdiropq;
struct au_hdentry tmp, *p, *q;
struct au_dinfo *dinfo;
struct super_block *sb;
@@ -391,11 +391,11 @@ static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
sb = dentry->d_sb;
dinfo = au_di(dentry);
- bend = dinfo->di_bend;
+ bbot = dinfo->di_bbot;
bwh = dinfo->di_bwh;
bdiropq = dinfo->di_bdiropq;
- p = dinfo->di_hdentry + dinfo->di_bstart;
- for (bindex = dinfo->di_bstart; bindex <= bend; bindex++, p++) {
+ p = dinfo->di_hdentry + dinfo->di_btop;
+ for (bindex = dinfo->di_btop; bindex <= bbot; bindex++, p++) {
if (!p->hd_dentry)
continue;
@@ -425,31 +425,31 @@ static int au_do_refresh_hdentry(struct dentry *dentry, struct dentry *parent)
}
dinfo->di_bwh = -1;
- if (bwh >= 0 && bwh <= au_sbend(sb) && au_sbr_whable(sb, bwh))
+ if (bwh >= 0 && bwh <= au_sbbot(sb) && au_sbr_whable(sb, bwh))
dinfo->di_bwh = bwh;
dinfo->di_bdiropq = -1;
if (bdiropq >= 0
- && bdiropq <= au_sbend(sb)
+ && bdiropq <= au_sbbot(sb)
&& au_sbr_whable(sb, bdiropq))
dinfo->di_bdiropq = bdiropq;
err = -EIO;
- dinfo->di_bstart = -1;
- dinfo->di_bend = -1;
- bend = au_dbend(parent);
+ dinfo->di_btop = -1;
+ dinfo->di_bbot = -1;
+ bbot = au_dbbot(parent);
p = dinfo->di_hdentry;
- for (bindex = 0; bindex <= bend; bindex++, p++)
+ for (bindex = 0; bindex <= bbot; bindex++, p++)
if (p->hd_dentry) {
- dinfo->di_bstart = bindex;
+ dinfo->di_btop = bindex;
break;
}
- if (dinfo->di_bstart >= 0) {
- p = dinfo->di_hdentry + bend;
- for (bindex = bend; bindex >= 0; bindex--, p--)
+ if (dinfo->di_btop >= 0) {
+ p = dinfo->di_hdentry + bbot;
+ for (bindex = bbot; bindex >= 0; bindex--, p--)
if (p->hd_dentry) {
- dinfo->di_bend = bindex;
+ dinfo->di_bbot = bindex;
err = 0;
break;
}
@@ -549,7 +549,7 @@ static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
struct au_dinfo *tmp)
{
int err;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct {
struct dentry *dentry;
struct inode *inode;
@@ -562,16 +562,16 @@ static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
struct dentry *h_dentry;
err = 0;
- AuDebugOn(dinfo->di_bstart < 0);
+ AuDebugOn(dinfo->di_btop < 0);
orig_h.mode = 0;
- orig_h.dentry = dinfo->di_hdentry[dinfo->di_bstart].hd_dentry;
+ orig_h.dentry = dinfo->di_hdentry[dinfo->di_btop].hd_dentry;
orig_h.inode = NULL;
if (d_is_positive(orig_h.dentry)) {
orig_h.inode = d_inode(orig_h.dentry);
orig_h.mode = orig_h.inode->i_mode & S_IFMT;
}
- if (tmp->di_bstart >= 0) {
- tmp_h.dentry = tmp->di_hdentry[tmp->di_bstart].hd_dentry;
+ if (tmp->di_btop >= 0) {
+ tmp_h.dentry = tmp->di_hdentry[tmp->di_btop].hd_dentry;
if (d_is_positive(tmp_h.dentry)) {
tmp_h.inode = d_inode(tmp_h.dentry);
tmp_h.mode = tmp_h.inode->i_mode & S_IFMT;
@@ -588,20 +588,20 @@ static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
goto out;
}
AuDebugOn(inode);
- AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
AuDebugOn(dinfo->di_bdiropq != -1);
if (!tmp_h.inode) {
AuDbg("negative --> negative\n");
/* should have only one negative lower */
- if (tmp->di_bstart >= 0
- && tmp->di_bstart < dinfo->di_bstart) {
- AuDebugOn(tmp->di_bstart != tmp->di_bend);
- AuDebugOn(dinfo->di_bstart != dinfo->di_bend);
- au_set_h_dptr(dentry, dinfo->di_bstart, NULL);
+ if (tmp->di_btop >= 0
+ && tmp->di_btop < dinfo->di_btop) {
+ AuDebugOn(tmp->di_btop != tmp->di_bbot);
+ AuDebugOn(dinfo->di_btop != dinfo->di_bbot);
+ au_set_h_dptr(dentry, dinfo->di_btop, NULL);
au_di_cp(dinfo, tmp);
- hd = tmp->di_hdentry + tmp->di_bstart;
- au_set_h_dptr(dentry, tmp->di_bstart,
+ hd = tmp->di_hdentry + tmp->di_btop;
+ au_set_h_dptr(dentry, tmp->di_btop,
dget(hd->hd_dentry));
}
au_dbg_verify_dinode(dentry);
@@ -626,7 +626,7 @@ static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
AuDbg("positive --> negative\n");
/* or bypassing aufs */
au_hide(dentry);
- if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_bstart)
+ if (tmp->di_bwh >= 0 && tmp->di_bwh <= dinfo->di_btop)
dinfo->di_bwh = tmp->di_bwh;
if (inode)
err = au_refresh_hinode_self(inode);
@@ -634,7 +634,7 @@ static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
} else if (orig_h.mode == tmp_h.mode) {
AuDbg("positive --> positive, same type\n");
if (!S_ISDIR(orig_h.mode)
- && dinfo->di_bstart > tmp->di_bstart) {
+ && dinfo->di_btop > tmp->di_btop) {
/*
* similar to the behaviour of removing and
* creating.
@@ -645,15 +645,15 @@ static int au_refresh_by_dinfo(struct dentry *dentry, struct au_dinfo *dinfo,
au_dbg_verify_dinode(dentry);
} else {
/* fill empty slots */
- if (dinfo->di_bstart > tmp->di_bstart)
- dinfo->di_bstart = tmp->di_bstart;
- if (dinfo->di_bend < tmp->di_bend)
- dinfo->di_bend = tmp->di_bend;
+ if (dinfo->di_btop > tmp->di_btop)
+ dinfo->di_btop = tmp->di_btop;
+ if (dinfo->di_bbot < tmp->di_bbot)
+ dinfo->di_bbot = tmp->di_bbot;
dinfo->di_bwh = tmp->di_bwh;
dinfo->di_bdiropq = tmp->di_bdiropq;
hd = tmp->di_hdentry;
- bend = dinfo->di_bend;
- for (bindex = tmp->di_bstart; bindex <= bend;
+ bbot = dinfo->di_bbot;
+ for (bindex = tmp->di_btop; bindex <= bbot;
bindex++) {
if (au_h_dptr(dentry, bindex))
continue;
@@ -726,7 +726,7 @@ int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
goto out;
dinfo = au_di(dentry);
- err = au_di_realloc(dinfo, au_sbend(sb) + 1);
+ err = au_di_realloc(dinfo, au_sbbot(sb) + 1);
if (unlikely(err))
goto out;
ebrange = au_dbrange_test(dentry);
@@ -734,7 +734,7 @@ int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
ebrange = au_do_refresh_hdentry(dentry, parent);
if (d_unhashed(dentry) || ebrange /* || dinfo->di_tmpfile */) {
- AuDebugOn(au_dbstart(dentry) < 0 && au_dbend(dentry) >= 0);
+ AuDebugOn(au_dbtop(dentry) < 0 && au_dbbot(dentry) >= 0);
if (d_really_is_positive(dentry)) {
inode = d_inode(dentry);
err = au_refresh_hinode_self(inode);
@@ -757,7 +757,7 @@ int au_refresh_dentry(struct dentry *dentry, struct dentry *parent)
* if current working dir is removed, it returns an error.
* but the dentry is legal.
*/
- err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
+ err = au_lkup_dentry(dentry, /*btop*/0, /*type*/0);
AuDbgDentry(dentry);
au_di_swap(tmp, dinfo);
if (err == -ENOENT)
@@ -820,7 +820,7 @@ static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
{
int err;
umode_t mode, h_mode;
- aufs_bindex_t bindex, btail, bstart, ibs, ibe;
+ aufs_bindex_t bindex, btail, btop, ibs, ibe;
unsigned char plus, unhashed, is_root, h_plus, h_nfs, tmpfile;
struct inode *h_inode, *h_cached_inode;
struct dentry *h_dentry;
@@ -846,15 +846,15 @@ static int h_d_revalidate(struct dentry *dentry, struct inode *inode,
if (do_udba && inode) {
mode = (inode->i_mode & S_IFMT);
plus = (inode->i_nlink > 0);
- ibs = au_ibstart(inode);
- ibe = au_ibend(inode);
+ ibs = au_ibtop(inode);
+ ibe = au_ibbot(inode);
}
- bstart = au_dbstart(dentry);
- btail = bstart;
+ btop = au_dbtop(dentry);
+ btail = btop;
if (inode && S_ISDIR(inode->i_mode))
btail = au_dbtaildir(dentry);
- for (bindex = bstart; bindex <= btail; bindex++) {
+ for (bindex = btop; bindex <= btail; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
@@ -1069,11 +1069,11 @@ static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
do_udba = !au_opt_test(au_mntflags(sb), UDBA_NONE);
if (do_udba && inode) {
- aufs_bindex_t bstart = au_ibstart(inode);
+ aufs_bindex_t btop = au_ibtop(inode);
struct inode *h_inode;
- if (bstart >= 0) {
- h_inode = au_h_iptr(inode, bstart);
+ if (btop >= 0) {
+ h_inode = au_h_iptr(inode, btop);
if (h_inode && au_test_higen(inode, h_inode)) {
AuTraceErr(err);
goto out_inval;
@@ -1082,7 +1082,7 @@ static int aufs_d_revalidate(struct dentry *dentry, unsigned int flags)
}
err = h_d_revalidate(dentry, inode, flags, do_udba);
- if (unlikely(!err && do_udba && au_dbstart(dentry) < 0)) {
+ if (unlikely(!err && do_udba && au_dbtop(dentry) < 0)) {
err = -EIO;
AuDbg("both of real entry and whiteout found, %p, err %d\n",
dentry, err);
diff --git a/fs/aufs/dentry.h b/fs/aufs/dentry.h
index c794adf59..3e48b5cde 100644
--- a/fs/aufs/dentry.h
+++ b/fs/aufs/dentry.h
@@ -23,7 +23,7 @@ struct au_dinfo {
atomic_t di_generation;
struct au_rwsem di_rwsem;
- aufs_bindex_t di_bstart, di_bend, di_bwh, di_bdiropq;
+ aufs_bindex_t di_btop, di_bbot, di_bwh, di_bdiropq;
unsigned char di_tmpfile; /* to allow the different name */
struct au_hdentry *di_hdentry;
} ____cacheline_aligned_in_smp;
@@ -37,7 +37,7 @@ struct dentry *au_sio_lkup_one(struct qstr *name, struct dentry *parent);
int au_h_verify(struct dentry *h_dentry, unsigned int udba, struct inode *h_dir,
struct dentry *h_parent, struct au_branch *br);
-int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t bstart, mode_t type);
+int au_lkup_dentry(struct dentry *dentry, aufs_bindex_t btop, mode_t type);
int au_lkup_neg(struct dentry *dentry, aufs_bindex_t bindex, int wh);
int au_refresh_dentry(struct dentry *dentry, struct dentry *parent);
int au_reval_dpath(struct dentry *dentry, unsigned int sigen);
@@ -73,8 +73,8 @@ int au_digen_test(struct dentry *dentry, unsigned int sigen);
int au_dbrange_test(struct dentry *dentry);
void au_update_digen(struct dentry *dentry);
void au_update_dbrange(struct dentry *dentry, int do_put_zero);
-void au_update_dbstart(struct dentry *dentry);
-void au_update_dbend(struct dentry *dentry);
+void au_update_dbtop(struct dentry *dentry);
+void au_update_dbbot(struct dentry *dentry);
int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry);
/* ---------------------------------------------------------------------- */
@@ -151,16 +151,16 @@ static inline void au_hdput(struct au_hdentry *hd)
dput(hd->hd_dentry);
}
-static inline aufs_bindex_t au_dbstart(struct dentry *dentry)
+static inline aufs_bindex_t au_dbtop(struct dentry *dentry)
{
DiMustAnyLock(dentry);
- return au_di(dentry)->di_bstart;
+ return au_di(dentry)->di_btop;
}
-static inline aufs_bindex_t au_dbend(struct dentry *dentry)
+static inline aufs_bindex_t au_dbbot(struct dentry *dentry)
{
DiMustAnyLock(dentry);
- return au_di(dentry)->di_bend;
+ return au_di(dentry)->di_bbot;
}
static inline aufs_bindex_t au_dbwh(struct dentry *dentry)
@@ -176,22 +176,22 @@ static inline aufs_bindex_t au_dbdiropq(struct dentry *dentry)
}
/* todo: hard/soft set? */
-static inline void au_set_dbstart(struct dentry *dentry, aufs_bindex_t bindex)
+static inline void au_set_dbtop(struct dentry *dentry, aufs_bindex_t bindex)
{
DiMustWriteLock(dentry);
- au_di(dentry)->di_bstart = bindex;
+ au_di(dentry)->di_btop = bindex;
}
-static inline void au_set_dbend(struct dentry *dentry, aufs_bindex_t bindex)
+static inline void au_set_dbbot(struct dentry *dentry, aufs_bindex_t bindex)
{
DiMustWriteLock(dentry);
- au_di(dentry)->di_bend = bindex;
+ au_di(dentry)->di_bbot = bindex;
}
static inline void au_set_dbwh(struct dentry *dentry, aufs_bindex_t bindex)
{
DiMustWriteLock(dentry);
- /* dbwh can be outside of bstart - bend range */
+ /* dbwh can be outside of btop - bbot range */
au_di(dentry)->di_bwh = bindex;
}
diff --git a/fs/aufs/dinfo.c b/fs/aufs/dinfo.c
index ad6d045c4..b2eb8c2de 100644
--- a/fs/aufs/dinfo.c
+++ b/fs/aufs/dinfo.c
@@ -11,10 +11,8 @@
void au_di_init_once(void *_dinfo)
{
struct au_dinfo *dinfo = _dinfo;
- static struct lock_class_key aufs_di;
au_rw_init(&dinfo->di_rwsem);
- au_rw_class(&dinfo->di_rwsem, &aufs_di);
}
struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
@@ -26,14 +24,14 @@ struct au_dinfo *au_di_alloc(struct super_block *sb, unsigned int lsc)
if (unlikely(!dinfo))
goto out;
- nbr = au_sbend(sb) + 1;
+ nbr = au_sbbot(sb) + 1;
if (nbr <= 0)
nbr = 1;
dinfo->di_hdentry = kcalloc(nbr, sizeof(*dinfo->di_hdentry), GFP_NOFS);
if (dinfo->di_hdentry) {
au_rw_write_lock_nested(&dinfo->di_rwsem, lsc);
- dinfo->di_bstart = -1;
- dinfo->di_bend = -1;
+ dinfo->di_btop = -1;
+ dinfo->di_bbot = -1;
dinfo->di_bwh = -1;
dinfo->di_bdiropq = -1;
dinfo->di_tmpfile = 0;
@@ -52,14 +50,14 @@ out:
void au_di_free(struct au_dinfo *dinfo)
{
struct au_hdentry *p;
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
/* dentry may not be revalidated */
- bindex = dinfo->di_bstart;
+ bindex = dinfo->di_btop;
if (bindex >= 0) {
- bend = dinfo->di_bend;
+ bbot = dinfo->di_bbot;
p = dinfo->di_hdentry + bindex;
- while (bindex++ <= bend)
+ while (bindex++ <= bbot)
au_hdput(p++);
}
kfree(dinfo->di_hdentry);
@@ -82,8 +80,8 @@ void au_di_swap(struct au_dinfo *a, struct au_dinfo *b)
} while (0)
DiSwap(p, hdentry);
- DiSwap(bi, bstart);
- DiSwap(bi, bend);
+ DiSwap(bi, btop);
+ DiSwap(bi, bbot);
DiSwap(bi, bwh);
DiSwap(bi, bdiropq);
/* smp_mb(); */
@@ -96,8 +94,8 @@ void au_di_cp(struct au_dinfo *dst, struct au_dinfo *src)
AuRwMustWriteLock(&dst->di_rwsem);
AuRwMustWriteLock(&src->di_rwsem);
- dst->di_bstart = src->di_bstart;
- dst->di_bend = src->di_bend;
+ dst->di_btop = src->di_btop;
+ dst->di_bbot = src->di_bbot;
dst->di_bwh = src->di_bwh;
dst->di_bdiropq = src->di_bdiropq;
/* smp_mb(); */
@@ -139,7 +137,7 @@ int au_di_realloc(struct au_dinfo *dinfo, int nbr)
AuRwMustWriteLock(&dinfo->di_rwsem);
err = -ENOMEM;
- sz = sizeof(*hdp) * (dinfo->di_bend + 1);
+ sz = sizeof(*hdp) * (dinfo->di_bbot + 1);
if (!sz)
sz = sizeof(*hdp);
hdp = au_kzrealloc(dinfo->di_hdentry, sz, sizeof(*hdp) * nbr, GFP_NOFS);
@@ -307,7 +305,7 @@ struct dentry *au_h_dptr(struct dentry *dentry, aufs_bindex_t bindex)
DiMustAnyLock(dentry);
- if (au_dbstart(dentry) < 0 || bindex < au_dbstart(dentry))
+ if (au_dbtop(dentry) < 0 || bindex < au_dbtop(dentry))
return NULL;
AuDebugOn(bindex < 0);
d = au_di(dentry)->di_hdentry[0 + bindex].hd_dentry;
@@ -328,8 +326,8 @@ struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
AuDebugOn(d_really_is_negative(dentry));
h_dentry = NULL;
- if (au_dbstart(dentry) <= bindex
- && bindex <= au_dbend(dentry))
+ if (au_dbtop(dentry) <= bindex
+ && bindex <= au_dbbot(dentry))
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry && !au_d_linkable(h_dentry)) {
dget(h_dentry);
@@ -337,8 +335,8 @@ struct dentry *au_h_d_alias(struct dentry *dentry, aufs_bindex_t bindex)
}
inode = d_inode(dentry);
- AuDebugOn(bindex < au_ibstart(inode));
- AuDebugOn(au_ibend(inode) < bindex);
+ AuDebugOn(bindex < au_ibtop(inode));
+ AuDebugOn(au_ibbot(inode) < bindex);
h_inode = au_h_iptr(inode, bindex);
h_dentry = d_find_alias(h_inode);
if (h_dentry) {
@@ -368,30 +366,30 @@ out:
aufs_bindex_t au_dbtail(struct dentry *dentry)
{
- aufs_bindex_t bend, bwh;
+ aufs_bindex_t bbot, bwh;
- bend = au_dbend(dentry);
- if (0 <= bend) {
+ bbot = au_dbbot(dentry);
+ if (0 <= bbot) {
bwh = au_dbwh(dentry);
if (!bwh)
return bwh;
- if (0 < bwh && bwh < bend)
+ if (0 < bwh && bwh < bbot)
return bwh - 1;
}
- return bend;
+ return bbot;
}
aufs_bindex_t au_dbtaildir(struct dentry *dentry)
{
- aufs_bindex_t bend, bopq;
+ aufs_bindex_t bbot, bopq;
- bend = au_dbtail(dentry);
- if (0 <= bend) {
+ bbot = au_dbtail(dentry);
+ if (0 <= bbot) {
bopq = au_dbdiropq(dentry);
- if (0 <= bopq && bopq < bend)
- bend = bopq;
+ if (0 <= bopq && bopq < bbot)
+ bbot = bopq;
}
- return bend;
+ return bbot;
}
/* ---------------------------------------------------------------------- */
@@ -415,16 +413,16 @@ void au_set_h_dptr(struct dentry *dentry, aufs_bindex_t bindex,
int au_dbrange_test(struct dentry *dentry)
{
int err;
- aufs_bindex_t bstart, bend;
+ aufs_bindex_t btop, bbot;
err = 0;
- bstart = au_dbstart(dentry);
- bend = au_dbend(dentry);
- if (bstart >= 0)
- AuDebugOn(bend < 0 && bstart > bend);
+ btop = au_dbtop(dentry);
+ bbot = au_dbbot(dentry);
+ if (btop >= 0)
+ AuDebugOn(bbot < 0 && btop > bbot);
else {
err = -EIO;
- AuDebugOn(bend >= 0);
+ AuDebugOn(bbot >= 0);
}
return err;
@@ -457,68 +455,68 @@ void au_update_dbrange(struct dentry *dentry, int do_put_zero)
DiMustWriteLock(dentry);
dinfo = au_di(dentry);
- if (!dinfo || dinfo->di_bstart < 0)
+ if (!dinfo || dinfo->di_btop < 0)
return;
hdp = dinfo->di_hdentry;
if (do_put_zero) {
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
- bend = dinfo->di_bend;
- for (bindex = dinfo->di_bstart; bindex <= bend; bindex++) {
+ bbot = dinfo->di_bbot;
+ for (bindex = dinfo->di_btop; bindex <= bbot; bindex++) {
h_d = hdp[0 + bindex].hd_dentry;
if (h_d && d_is_negative(h_d))
au_set_h_dptr(dentry, bindex, NULL);
}
}
- dinfo->di_bstart = -1;
- while (++dinfo->di_bstart <= dinfo->di_bend)
- if (hdp[0 + dinfo->di_bstart].hd_dentry)
+ dinfo->di_btop = -1;
+ while (++dinfo->di_btop <= dinfo->di_bbot)
+ if (hdp[0 + dinfo->di_btop].hd_dentry)
break;
- if (dinfo->di_bstart > dinfo->di_bend) {
- dinfo->di_bstart = -1;
- dinfo->di_bend = -1;
+ if (dinfo->di_btop > dinfo->di_bbot) {
+ dinfo->di_btop = -1;
+ dinfo->di_bbot = -1;
return;
}
- dinfo->di_bend++;
- while (0 <= --dinfo->di_bend)
- if (hdp[0 + dinfo->di_bend].hd_dentry)
+ dinfo->di_bbot++;
+ while (0 <= --dinfo->di_bbot)
+ if (hdp[0 + dinfo->di_bbot].hd_dentry)
break;
- AuDebugOn(dinfo->di_bstart > dinfo->di_bend || dinfo->di_bend < 0);
+ AuDebugOn(dinfo->di_btop > dinfo->di_bbot || dinfo->di_bbot < 0);
}
-void au_update_dbstart(struct dentry *dentry)
+void au_update_dbtop(struct dentry *dentry)
{
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct dentry *h_dentry;
- bend = au_dbend(dentry);
- for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
+ bbot = au_dbbot(dentry);
+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
if (d_is_positive(h_dentry)) {
- au_set_dbstart(dentry, bindex);
+ au_set_dbtop(dentry, bindex);
return;
}
au_set_h_dptr(dentry, bindex, NULL);
}
}
-void au_update_dbend(struct dentry *dentry)
+void au_update_dbbot(struct dentry *dentry)
{
- aufs_bindex_t bindex, bstart;
+ aufs_bindex_t bindex, btop;
struct dentry *h_dentry;
- bstart = au_dbstart(dentry);
- for (bindex = au_dbend(dentry); bindex >= bstart; bindex--) {
+ btop = au_dbtop(dentry);
+ for (bindex = au_dbbot(dentry); bindex >= btop; bindex--) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
if (d_is_positive(h_dentry)) {
- au_set_dbend(dentry, bindex);
+ au_set_dbbot(dentry, bindex);
return;
}
au_set_h_dptr(dentry, bindex, NULL);
@@ -527,10 +525,10 @@ void au_update_dbend(struct dentry *dentry)
int au_find_dbindex(struct dentry *dentry, struct dentry *h_dentry)
{
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
- bend = au_dbend(dentry);
- for (bindex = au_dbstart(dentry); bindex <= bend; bindex++)
+ bbot = au_dbbot(dentry);
+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++)
if (au_h_dptr(dentry, bindex) == h_dentry)
return bindex;
return -1;
diff --git a/fs/aufs/dir.c b/fs/aufs/dir.c
index 4644de4a9..4dc666eb4 100644
--- a/fs/aufs/dir.c
+++ b/fs/aufs/dir.c
@@ -42,7 +42,7 @@ void au_sub_nlink(struct inode *dir, struct inode *h_dir)
loff_t au_dir_size(struct file *file, struct dentry *dentry)
{
loff_t sz;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct file *h_file;
struct dentry *h_dentry;
@@ -50,9 +50,9 @@ loff_t au_dir_size(struct file *file, struct dentry *dentry)
if (file) {
AuDebugOn(!d_is_dir(file->f_path.dentry));
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file);
- bindex <= bend && sz < KMALLOC_MAX_SIZE;
+ bbot = au_fbbot_dir(file);
+ for (bindex = au_fbtop(file);
+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
bindex++) {
h_file = au_hf_dir(file, bindex);
if (h_file && file_inode(h_file))
@@ -62,9 +62,9 @@ loff_t au_dir_size(struct file *file, struct dentry *dentry)
AuDebugOn(!dentry);
AuDebugOn(!d_is_dir(dentry));
- bend = au_dbtaildir(dentry);
- for (bindex = au_dbstart(dentry);
- bindex <= bend && sz < KMALLOC_MAX_SIZE;
+ bbot = au_dbtaildir(dentry);
+ for (bindex = au_dbtop(dentry);
+ bindex <= bbot && sz < KMALLOC_MAX_SIZE;
bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry && d_is_positive(h_dentry))
@@ -97,7 +97,7 @@ static void au_do_dir_ts(void *arg)
struct au_branch *br;
struct au_hinode *hdir;
int err;
- aufs_bindex_t bstart, bindex;
+ aufs_bindex_t btop, bindex;
sb = a->dentry->d_sb;
if (d_really_is_negative(a->dentry))
@@ -106,9 +106,9 @@ static void au_do_dir_ts(void *arg)
aufs_read_lock(a->dentry, AuLock_DW); /* noflush */
dir = d_inode(a->dentry);
- bstart = au_ibstart(dir);
+ btop = au_ibtop(dir);
bindex = au_br_index(sb, a->brid);
- if (bindex < bstart)
+ if (bindex < btop)
goto out_unlock;
br = au_sbr(sb, bindex);
@@ -118,17 +118,17 @@ static void au_do_dir_ts(void *arg)
h_path.mnt = au_br_mnt(br);
au_dtime_store(&dt, a->dentry, &h_path);
- br = au_sbr(sb, bstart);
+ br = au_sbr(sb, btop);
if (!au_br_writable(br->br_perm))
goto out_unlock;
- h_path.dentry = au_h_dptr(a->dentry, bstart);
+ h_path.dentry = au_h_dptr(a->dentry, btop);
h_path.mnt = au_br_mnt(br);
err = vfsub_mnt_want_write(h_path.mnt);
if (err)
goto out_unlock;
- hdir = au_hi(dir, bstart);
+ hdir = au_hi(dir, btop);
au_hn_imtx_lock_nested(hdir, AuLsc_I_PARENT);
- h_dir = au_h_iptr(dir, bstart);
+ h_dir = au_h_iptr(dir, btop);
if (h_dir->i_nlink
&& timespec_compare(&h_dir->i_mtime, &dt.dt_mtime) < 0) {
dt.dt_h_path = h_path;
@@ -149,7 +149,7 @@ out:
void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
{
int perm, wkq_err;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
struct au_dir_ts_arg *arg;
struct dentry *dentry;
struct super_block *sb;
@@ -159,13 +159,13 @@ void au_dir_ts(struct inode *dir, aufs_bindex_t bindex)
dentry = d_find_any_alias(dir);
AuDebugOn(!dentry);
sb = dentry->d_sb;
- bstart = au_ibstart(dir);
- if (bstart == bindex) {
+ btop = au_ibtop(dir);
+ if (btop == bindex) {
au_cpup_attr_timesizes(dir);
goto out;
}
- perm = au_sbr_perm(sb, bstart);
+ perm = au_sbr_perm(sb, btop);
if (!au_br_writable(perm))
goto out;
@@ -192,24 +192,24 @@ static int reopen_dir(struct file *file)
{
int err;
unsigned int flags;
- aufs_bindex_t bindex, btail, bstart;
+ aufs_bindex_t bindex, btail, btop;
struct dentry *dentry, *h_dentry;
struct file *h_file;
/* open all lower dirs */
dentry = file->f_path.dentry;
- bstart = au_dbstart(dentry);
- for (bindex = au_fbstart(file); bindex < bstart; bindex++)
+ btop = au_dbtop(dentry);
+ for (bindex = au_fbtop(file); bindex < btop; bindex++)
au_set_h_fptr(file, bindex, NULL);
- au_set_fbstart(file, bstart);
+ au_set_fbtop(file, btop);
btail = au_dbtaildir(dentry);
- for (bindex = au_fbend_dir(file); btail < bindex; bindex--)
+ for (bindex = au_fbbot_dir(file); btail < bindex; bindex--)
au_set_h_fptr(file, bindex, NULL);
- au_set_fbend_dir(file, btail);
+ au_set_fbbot_dir(file, btail);
flags = vfsub_file_flags(file);
- for (bindex = bstart; bindex <= btail; bindex++) {
+ for (bindex = btop; bindex <= btail; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
continue;
@@ -246,10 +246,10 @@ static int do_open_dir(struct file *file, int flags, struct file *h_file)
mnt = file->f_path.mnt;
dentry = file->f_path.dentry;
file->f_version = d_inode(dentry)->i_version;
- bindex = au_dbstart(dentry);
- au_set_fbstart(file, bindex);
+ bindex = au_dbtop(dentry);
+ au_set_fbtop(file, bindex);
btail = au_dbtaildir(dentry);
- au_set_fbend_dir(file, btail);
+ au_set_fbbot_dir(file, btail);
for (; !err && bindex <= btail; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (!h_dentry)
@@ -272,10 +272,10 @@ static int do_open_dir(struct file *file, int flags, struct file *h_file)
return 0; /* success */
/* close all */
- for (bindex = au_fbstart(file); bindex <= btail; bindex++)
+ for (bindex = au_fbtop(file); bindex <= btail; bindex++)
au_set_h_fptr(file, bindex, NULL);
- au_set_fbstart(file, -1);
- au_set_fbend_dir(file, -1);
+ au_set_fbtop(file, -1);
+ au_set_fbbot_dir(file, -1);
return err;
}
@@ -310,7 +310,7 @@ static int aufs_release_dir(struct inode *inode __maybe_unused,
struct au_vdir *vdir_cache;
struct au_finfo *finfo;
struct au_fidir *fidir;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
finfo = au_fi(file);
fidir = finfo->fi_hdir;
@@ -327,8 +327,8 @@ static int aufs_release_dir(struct inode *inode __maybe_unused,
* calls fput() instead of filp_close(),
* since no dnotify or lock for the lower file.
*/
- bend = fidir->fd_bbot;
- for (; bindex <= bend; bindex++)
+ bbot = fidir->fd_bbot;
+ for (; bindex <= bbot; bindex++)
au_set_h_fptr(file, bindex, NULL);
}
kfree(fidir);
@@ -343,12 +343,12 @@ static int aufs_release_dir(struct inode *inode __maybe_unused,
static int au_do_flush_dir(struct file *file, fl_owner_t id)
{
int err;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct file *h_file;
err = 0;
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
+ bbot = au_fbbot_dir(file);
+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
h_file = au_hf_dir(file, bindex);
if (h_file)
err = vfsub_flush(h_file, id);
@@ -366,7 +366,7 @@ static int aufs_flush_dir(struct file *file, fl_owner_t id)
static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
{
int err;
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
struct inode *inode;
struct super_block *sb;
@@ -374,8 +374,8 @@ static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
sb = dentry->d_sb;
inode = d_inode(dentry);
IMustLock(inode);
- bend = au_dbend(dentry);
- for (bindex = au_dbstart(dentry); !err && bindex <= bend; bindex++) {
+ bbot = au_dbbot(dentry);
+ for (bindex = au_dbtop(dentry); !err && bindex <= bbot; bindex++) {
struct path h_path;
if (au_test_ro(sb, bindex, inode))
@@ -394,7 +394,7 @@ static int au_do_fsync_dir_no_file(struct dentry *dentry, int datasync)
static int au_do_fsync_dir(struct file *file, int datasync)
{
int err;
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
struct file *h_file;
struct super_block *sb;
struct inode *inode;
@@ -405,8 +405,8 @@ static int au_do_fsync_dir(struct file *file, int datasync)
inode = file_inode(file);
sb = inode->i_sb;
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file); !err && bindex <= bend; bindex++) {
+ bbot = au_fbbot_dir(file);
+ for (bindex = au_fbtop(file); !err && bindex <= bbot; bindex++) {
h_file = au_hf_dir(file, bindex);
if (!h_file || au_test_ro(sb, bindex, inode))
continue;
@@ -478,7 +478,7 @@ static int aufs_iterate(struct file *file, struct dir_context *ctx)
if (unlikely(err))
goto out_unlock;
- h_inode = au_h_iptr(inode, au_ibstart(inode));
+ h_inode = au_h_iptr(inode, au_ibtop(inode));
if (!au_test_nfsd()) {
err = au_vdir_fill_de(file, ctx);
fsstack_copy_attr_atime(inode, h_inode);
@@ -647,7 +647,7 @@ int au_test_empty_lower(struct dentry *dentry)
{
int err;
unsigned int rdhash;
- aufs_bindex_t bindex, bstart, btail;
+ aufs_bindex_t bindex, btop, btail;
struct au_nhash whlist;
struct test_empty_arg arg = {
.ctx = {
@@ -667,20 +667,20 @@ int au_test_empty_lower(struct dentry *dentry)
arg.flags = 0;
arg.whlist = &whlist;
- bstart = au_dbstart(dentry);
+ btop = au_dbtop(dentry);
if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
au_fset_testempty(arg.flags, SHWH);
test_empty = do_test_empty;
if (au_opt_test(au_mntflags(dentry->d_sb), DIRPERM1))
test_empty = sio_test_empty;
- arg.bindex = bstart;
+ arg.bindex = btop;
err = test_empty(dentry, &arg);
if (unlikely(err))
goto out_whlist;
au_fset_testempty(arg.flags, WHONLY);
btail = au_dbtaildir(dentry);
- for (bindex = bstart + 1; !err && bindex <= btail; bindex++) {
+ for (bindex = btop + 1; !err && bindex <= btail; bindex++) {
struct dentry *h_dentry;
h_dentry = au_h_dptr(dentry, bindex);
@@ -712,7 +712,7 @@ int au_test_empty(struct dentry *dentry, struct au_nhash *whlist)
if (au_opt_test(au_mntflags(dentry->d_sb), SHWH))
au_fset_testempty(arg.flags, SHWH);
btail = au_dbtaildir(dentry);
- for (bindex = au_dbstart(dentry); !err && bindex <= btail; bindex++) {
+ for (bindex = au_dbtop(dentry); !err && bindex <= btail; bindex++) {
struct dentry *h_dentry;
h_dentry = au_h_dptr(dentry, bindex);
diff --git a/fs/aufs/dynop.c b/fs/aufs/dynop.c
index 53a8b55d8..dfb3a718d 100644
--- a/fs/aufs/dynop.c
+++ b/fs/aufs/dynop.c
@@ -308,14 +308,14 @@ out:
int au_dy_irefresh(struct inode *inode)
{
int err;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
struct inode *h_inode;
err = 0;
if (S_ISREG(inode->i_mode)) {
- bstart = au_ibstart(inode);
- h_inode = au_h_iptr(inode, bstart);
- err = au_dy_iaop(inode, bstart, h_inode);
+ btop = au_ibtop(inode);
+ h_inode = au_h_iptr(inode, btop);
+ err = au_dy_iaop(inode, btop, h_inode);
}
return err;
}
diff --git a/fs/aufs/export.c b/fs/aufs/export.c
index 69b70f2a1..8755b1ef0 100644
--- a/fs/aufs/export.c
+++ b/fs/aufs/export.c
@@ -500,9 +500,11 @@ struct dentry *decode_by_path(struct super_block *sb, ino_t ino, __u32 *fh,
h_mnt = au_br_mnt(br);
h_sb = h_mnt->mnt_sb;
/* todo: call lower fh_to_dentry()? fh_to_parent()? */
+ lockdep_off();
h_parent = exportfs_decode_fh(h_mnt, (void *)(fh + Fh_tail),
fh_len - Fh_tail, fh[Fh_h_type],
h_acceptable, /*context*/NULL);
+ lockdep_on();
dentry = h_parent;
if (unlikely(!h_parent || IS_ERR(h_parent))) {
AuWarn1("%s decode_fh failed, %ld\n",
@@ -606,7 +608,7 @@ aufs_fh_to_dentry(struct super_block *sb, struct fid *fid, int fh_len,
/* is the parent dir cached? */
br = au_sbr(sb, nsi_lock.bindex);
- atomic_inc(&br->br_count);
+ au_br_get(br);
dentry = decode_by_dir_ino(sb, ino, dir_ino, &nsi_lock);
if (IS_ERR(dentry))
goto out_unlock;
@@ -630,7 +632,7 @@ accept:
dentry = ERR_PTR(-ESTALE);
out_unlock:
if (br)
- atomic_dec(&br->br_count);
+ au_br_put(br);
si_read_unlock(sb);
out:
AuTraceErrPtr(dentry);
@@ -697,7 +699,7 @@ static int aufs_encode_fh(struct inode *inode, __u32 *fh, int *max_len,
err = -EIO;
parent = NULL;
ii_read_lock_child(inode);
- bindex = au_ibstart(inode);
+ bindex = au_ibtop(inode);
if (!dir) {
dentry = d_find_any_alias(inode);
if (unlikely(!dentry))
@@ -772,7 +774,7 @@ static int aufs_commit_metadata(struct inode *inode)
sb = inode->i_sb;
si_read_lock(sb, AuLock_FLUSH | AuLock_NOPLMW);
ii_write_lock_child(inode);
- bindex = au_ibstart(inode);
+ bindex = au_ibtop(inode);
AuDebugOn(bindex < 0);
h_inode = au_h_iptr(inode, bindex);
@@ -808,6 +810,11 @@ void au_export_init(struct super_block *sb)
struct au_sbinfo *sbinfo;
__u32 u;
+ BUILD_BUG_ON_MSG(IS_BUILTIN(CONFIG_AUFS_FS)
+ && IS_MODULE(CONFIG_EXPORTFS),
+ AUFS_NAME ": unsupported configuration "
+ "CONFIG_EXPORTFS=m and CONFIG_AUFS_FS=y");
+
sb->s_export_op = &aufs_export_op;
sbinfo = au_sbi(sb);
sbinfo->si_xigen = NULL;
diff --git a/fs/aufs/f_op.c b/fs/aufs/f_op.c
index ac1304a18..b0b0fff5f 100644
--- a/fs/aufs/f_op.c
+++ b/fs/aufs/f_op.c
@@ -28,7 +28,7 @@ int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
finfo = au_fi(file);
memset(&finfo->fi_htop, 0, sizeof(finfo->fi_htop));
atomic_set(&finfo->fi_mmapped, 0);
- bindex = au_dbstart(dentry);
+ bindex = au_dbtop(dentry);
if (!h_file) {
h_dentry = au_h_dptr(dentry, bindex);
err = vfsub_test_mntns(file->f_path.mnt, h_dentry->d_sb);
@@ -52,7 +52,7 @@ int au_do_open_nondir(struct file *file, int flags, struct file *h_file)
h_inode->i_state |= I_LINKABLE;
spin_unlock(&h_inode->i_lock);
}
- au_set_fbstart(file, bindex);
+ au_set_fbtop(file, bindex);
au_set_h_fptr(file, bindex, h_file);
au_update_figen(file);
/* todo: necessary? */
@@ -154,7 +154,7 @@ static void au_read_post(struct inode *inode, struct file *h_file)
struct au_write_pre {
blkcnt_t blks;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
};
/*
@@ -187,7 +187,7 @@ static struct file *au_write_pre(struct file *file, int do_ready,
di_downgrade_lock(dentry, /*flags*/0);
if (wpre)
- wpre->bstart = au_fbstart(file);
+ wpre->btop = au_fbtop(file);
h_file = au_hf_top(file);
get_file(h_file);
if (wpre)
@@ -208,7 +208,7 @@ static void au_write_post(struct inode *inode, struct file *h_file,
struct inode *h_inode;
au_cpup_attr_timesizes(inode);
- AuDebugOn(au_ibstart(inode) != wpre->bstart);
+ AuDebugOn(au_ibtop(inode) != wpre->btop);
h_inode = file_inode(h_file);
inode->i_mode = h_inode->i_mode;
ii_write_unlock(inode);
@@ -216,7 +216,7 @@ static void au_write_post(struct inode *inode, struct file *h_file,
/* AuDbg("blks %llu, %llu\n", (u64)blks, (u64)h_inode->i_blocks); */
if (written > 0)
- au_fhsm_wrote(inode->i_sb, wpre->bstart,
+ au_fhsm_wrote(inode->i_sb, wpre->btop,
/*force*/h_inode->i_blocks > wpre->blks);
}
diff --git a/fs/aufs/fhsm.c b/fs/aufs/fhsm.c
index db079d6ee..e3cb6ed95 100644
--- a/fs/aufs/fhsm.c
+++ b/fs/aufs/fhsm.c
@@ -141,12 +141,12 @@ void au_fhsm_wrote(struct super_block *sb, aufs_bindex_t bindex, int force)
void au_fhsm_wrote_all(struct super_block *sb, int force)
{
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct au_branch *br;
/* exclude the bottom */
- bend = au_fhsm_bottom(sb);
- for (bindex = 0; bindex < bend; bindex++) {
+ bbot = au_fhsm_bottom(sb);
+ for (bindex = 0; bindex < bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_fhsm(br->br_perm))
au_fhsm_wrote(sb, bindex, force);
@@ -192,15 +192,15 @@ static ssize_t au_fhsm_do_read(struct super_block *sb,
{
ssize_t err;
int nstbr;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct au_branch *br;
struct au_br_fhsm *bf;
/* except the bottom branch */
err = 0;
nstbr = 0;
- bend = au_fhsm_bottom(sb);
- for (bindex = 0; !err && bindex < bend; bindex++) {
+ bbot = au_fhsm_bottom(sb);
+ for (bindex = 0; !err && bindex < bbot; bindex++) {
br = au_sbr(sb, bindex);
if (!au_br_fhsm(br->br_perm))
continue;
@@ -231,7 +231,7 @@ static ssize_t au_fhsm_read(struct file *file, char __user *buf, size_t count,
{
ssize_t err;
int readable;
- aufs_bindex_t nfhsm, bindex, bend;
+ aufs_bindex_t nfhsm, bindex, bbot;
struct au_sbinfo *sbinfo;
struct au_fhsm *fhsm;
struct au_branch *br;
@@ -262,8 +262,8 @@ need_data:
AuDebugOn(!sb);
/* exclude the bottom branch */
nfhsm = 0;
- bend = au_fhsm_bottom(sb);
- for (bindex = 0; bindex < bend; bindex++) {
+ bbot = au_fhsm_bottom(sb);
+ for (bindex = 0; bindex < bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_fhsm(br->br_perm))
nfhsm++;
diff --git a/fs/aufs/file.c b/fs/aufs/file.c
index 6b8a66b4a..27c5bf825 100644
--- a/fs/aufs/file.c
+++ b/fs/aufs/file.c
@@ -69,7 +69,7 @@ struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
}
}
flags &= ~O_CREAT;
- atomic_inc(&br->br_count);
+ au_br_get(br);
h_path.dentry = h_dentry;
h_path.mnt = au_br_mnt(br);
h_file = vfsub_dentry_open(&h_path, flags);
@@ -88,7 +88,7 @@ struct file *au_h_open(struct dentry *dentry, aufs_bindex_t bindex, int flags,
goto out; /* success */
out_br:
- atomic_dec(&br->br_count);
+ au_br_put(br);
out:
return h_file;
}
@@ -122,7 +122,7 @@ static int au_cmoo(struct dentry *dentry)
err = 0;
if (IS_ROOT(dentry))
goto out;
- cpg.bsrc = au_dbstart(dentry);
+ cpg.bsrc = au_dbtop(dentry);
if (!cpg.bsrc)
goto out;
@@ -237,7 +237,7 @@ int au_do_open(struct file *file, struct au_do_open_args *args)
if (!err)
err = args->open(file, vfsub_file_flags(file),
args->h_file);
- if (!err && au_fbstart(file) != au_dbstart(dentry))
+ if (!err && au_fbtop(file) != au_dbtop(dentry))
/*
* cmoo happens after h_file was opened.
* need to refresh file later.
@@ -270,49 +270,49 @@ out:
int au_reopen_nondir(struct file *file)
{
int err;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
struct dentry *dentry;
struct file *h_file, *h_file_tmp;
dentry = file->f_path.dentry;
- bstart = au_dbstart(dentry);
+ btop = au_dbtop(dentry);
h_file_tmp = NULL;
- if (au_fbstart(file) == bstart) {
+ if (au_fbtop(file) == btop) {
h_file = au_hf_top(file);
if (file->f_mode == h_file->f_mode)
return 0; /* success */
h_file_tmp = h_file;
get_file(h_file_tmp);
- au_set_h_fptr(file, bstart, NULL);
+ au_set_h_fptr(file, btop, NULL);
}
AuDebugOn(au_fi(file)->fi_hdir);
/*
* it can happen
* file exists on both of rw and ro
- * open --> dbstart and fbstart are both 0
+ * open --> dbtop and fbtop are both 0
* prepend a branch as rw, "rw" become ro
* remove rw/file
* delete the top branch, "rw" becomes rw again
- * --> dbstart is 1, fbstart is still 0
- * write --> fbstart is 0 but dbstart is 1
+ * --> dbtop is 1, fbtop is still 0
+ * write --> fbtop is 0 but dbtop is 1
*/
- /* AuDebugOn(au_fbstart(file) < bstart); */
+ /* AuDebugOn(au_fbtop(file) < btop); */
- h_file = au_h_open(dentry, bstart, vfsub_file_flags(file) & ~O_TRUNC,
+ h_file = au_h_open(dentry, btop, vfsub_file_flags(file) & ~O_TRUNC,
file, /*force_wr*/0);
err = PTR_ERR(h_file);
if (IS_ERR(h_file)) {
if (h_file_tmp) {
- atomic_inc(&au_sbr(dentry->d_sb, bstart)->br_count);
- au_set_h_fptr(file, bstart, h_file_tmp);
+ au_sbr_get(dentry->d_sb, btop);
+ au_set_h_fptr(file, btop, h_file_tmp);
h_file_tmp = NULL;
}
goto out; /* todo: close all? */
}
err = 0;
- au_set_fbstart(file, bstart);
- au_set_h_fptr(file, bstart, h_file);
+ au_set_fbtop(file, btop);
+ au_set_h_fptr(file, btop, h_file);
au_update_figen(file);
/* todo: necessary? */
/* file->f_ra = h_file->f_ra; */
@@ -329,7 +329,7 @@ static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
struct dentry *hi_wh)
{
int err;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
struct au_dinfo *dinfo;
struct dentry *h_dentry;
struct au_hdentry *hdp;
@@ -337,14 +337,14 @@ static int au_reopen_wh(struct file *file, aufs_bindex_t btgt,
dinfo = au_di(file->f_path.dentry);
AuRwMustWriteLock(&dinfo->di_rwsem);
- bstart = dinfo->di_bstart;
- dinfo->di_bstart = btgt;
+ btop = dinfo->di_btop;
+ dinfo->di_btop = btgt;
hdp = dinfo->di_hdentry;
h_dentry = hdp[0 + btgt].hd_dentry;
hdp[0 + btgt].hd_dentry = hi_wh;
err = au_reopen_nondir(file);
hdp[0 + btgt].hd_dentry = h_dentry;
- dinfo->di_bstart = bstart;
+ dinfo->di_btop = btop;
return err;
}
@@ -363,11 +363,11 @@ static int au_ready_to_write_wh(struct file *file, loff_t len,
.pin = pin
};
- au_update_dbstart(cpg.dentry);
+ au_update_dbtop(cpg.dentry);
inode = d_inode(cpg.dentry);
h_inode = NULL;
- if (au_dbstart(cpg.dentry) <= bcpup
- && au_dbend(cpg.dentry) >= bcpup) {
+ if (au_dbtop(cpg.dentry) <= bcpup
+ && au_dbbot(cpg.dentry) >= bcpup) {
h_dentry = au_h_dptr(cpg.dentry, bcpup);
if (h_dentry && d_is_positive(h_dentry))
h_inode = d_inode(h_dentry);
@@ -394,7 +394,7 @@ static int au_ready_to_write_wh(struct file *file, loff_t len,
int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
{
int err;
- aufs_bindex_t dbstart;
+ aufs_bindex_t dbtop;
struct dentry *parent;
struct inode *inode;
struct super_block *sb;
@@ -410,7 +410,7 @@ int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
sb = cpg.dentry->d_sb;
inode = d_inode(cpg.dentry);
- cpg.bsrc = au_fbstart(file);
+ cpg.bsrc = au_fbtop(file);
err = au_test_ro(sb, cpg.bsrc, inode);
if (!err && (au_hf_top(file)->f_mode & FMODE_WRITE)) {
err = au_pin(pin, cpg.dentry, cpg.bsrc, AuOpt_UDBA_NONE,
@@ -438,11 +438,11 @@ int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
if (unlikely(err))
goto out_dgrade;
- dbstart = au_dbstart(cpg.dentry);
- if (dbstart <= cpg.bdst)
+ dbtop = au_dbtop(cpg.dentry);
+ if (dbtop <= cpg.bdst)
cpg.bsrc = cpg.bdst;
- if (dbstart <= cpg.bdst /* just reopen */
+ if (dbtop <= cpg.bdst /* just reopen */
|| !d_unhashed(cpg.dentry) /* copyup and reopen */
) {
h_file = au_h_open_pre(cpg.dentry, cpg.bsrc, /*force_wr*/0);
@@ -450,7 +450,7 @@ int au_ready_to_write(struct file *file, loff_t len, struct au_pin *pin)
err = PTR_ERR(h_file);
else {
di_downgrade_lock(parent, AuLock_IR);
- if (dbstart > cpg.bdst)
+ if (dbtop > cpg.bdst)
err = au_sio_cpup_simple(&cpg);
if (!err)
err = au_reopen_nondir(file);
@@ -531,7 +531,7 @@ static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
finfo = au_fi(file);
sb = cpg.dentry->d_sb;
inode = d_inode(cpg.dentry);
- cpg.bdst = au_ibstart(inode);
+ cpg.bdst = au_ibtop(inode);
if (cpg.bdst == finfo->fi_btop || IS_ROOT(cpg.dentry))
goto out;
@@ -552,7 +552,7 @@ static int au_file_refresh_by_inode(struct file *file, int *need_reopen)
&& au_opt_test(au_mntflags(sb), PLINK)
&& au_plink_test(inode)
&& !d_unhashed(cpg.dentry)
- && cpg.bdst < au_dbstart(cpg.dentry)) {
+ && cpg.bdst < au_dbtop(cpg.dentry)) {
err = au_test_and_cpup_dirs(cpg.dentry, cpg.bdst);
if (unlikely(err))
goto out_unlock;
@@ -580,7 +580,7 @@ out:
static void au_do_refresh_dir(struct file *file)
{
- aufs_bindex_t bindex, bend, new_bindex, brid;
+ aufs_bindex_t bindex, bbot, new_bindex, brid;
struct au_hfile *p, tmp, *q;
struct au_finfo *finfo;
struct super_block *sb;
@@ -594,8 +594,8 @@ static void au_do_refresh_dir(struct file *file)
AuDebugOn(!fidir);
p = fidir->fd_hfile + finfo->fi_btop;
brid = p->hf_br->br_id;
- bend = fidir->fd_bbot;
- for (bindex = finfo->fi_btop; bindex <= bend; bindex++, p++) {
+ bbot = fidir->fd_bbot;
+ for (bindex = finfo->fi_btop; bindex <= bbot; bindex++, p++) {
if (!p->hf_file)
continue;
@@ -620,8 +620,8 @@ static void au_do_refresh_dir(struct file *file)
p = fidir->fd_hfile;
if (!au_test_mmapped(file) && !d_unlinked(file->f_path.dentry)) {
- bend = au_sbend(sb);
- for (finfo->fi_btop = 0; finfo->fi_btop <= bend;
+ bbot = au_sbbot(sb);
+ for (finfo->fi_btop = 0; finfo->fi_btop <= bbot;
finfo->fi_btop++, p++)
if (p->hf_file) {
if (file_inode(p->hf_file))
@@ -629,16 +629,16 @@ static void au_do_refresh_dir(struct file *file)
au_hfput(p, file);
}
} else {
- bend = au_br_index(sb, brid);
- for (finfo->fi_btop = 0; finfo->fi_btop < bend;
+ bbot = au_br_index(sb, brid);
+ for (finfo->fi_btop = 0; finfo->fi_btop < bbot;
finfo->fi_btop++, p++)
if (p->hf_file)
au_hfput(p, file);
- bend = au_sbend(sb);
+ bbot = au_sbbot(sb);
}
- p = fidir->fd_hfile + bend;
- for (fidir->fd_bbot = bend; fidir->fd_bbot >= finfo->fi_btop;
+ p = fidir->fd_hfile + bbot;
+ for (fidir->fd_bbot = bbot; fidir->fd_bbot >= finfo->fi_btop;
fidir->fd_bbot--, p--)
if (p->hf_file) {
if (file_inode(p->hf_file))
@@ -654,7 +654,7 @@ static void au_do_refresh_dir(struct file *file)
static int refresh_file(struct file *file, int (*reopen)(struct file *file))
{
int err, need_reopen;
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
struct dentry *dentry;
struct au_finfo *finfo;
struct au_hfile *hfile;
@@ -667,9 +667,9 @@ static int refresh_file(struct file *file, int (*reopen)(struct file *file))
bindex = au_br_index(dentry->d_sb, hfile->hf_br->br_id);
AuDebugOn(bindex < 0);
if (bindex != finfo->fi_btop)
- au_set_fbstart(file, bindex);
+ au_set_fbtop(file, bindex);
} else {
- err = au_fidir_realloc(finfo, au_sbend(dentry->d_sb) + 1);
+ err = au_fidir_realloc(finfo, au_sbbot(dentry->d_sb) + 1);
if (unlikely(err))
goto out;
au_do_refresh_dir(file);
@@ -688,8 +688,8 @@ static int refresh_file(struct file *file, int (*reopen)(struct file *file))
/* error, close all lower files */
if (finfo->fi_hdir) {
- bend = au_fbend_dir(file);
- for (bindex = au_fbstart(file); bindex <= bend; bindex++)
+ bbot = au_fbbot_dir(file);
+ for (bindex = au_fbtop(file); bindex <= bbot; bindex++)
au_set_h_fptr(file, bindex, NULL);
}
@@ -703,7 +703,7 @@ int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
{
int err;
unsigned int sigen, figen;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
unsigned char pseudo_link;
struct dentry *dentry;
struct inode *inode;
@@ -715,9 +715,9 @@ int au_reval_and_lock_fdi(struct file *file, int (*reopen)(struct file *file),
fi_write_lock(file);
figen = au_figen(file);
di_write_lock_child(dentry);
- bstart = au_dbstart(dentry);
- pseudo_link = (bstart != au_ibstart(inode));
- if (sigen == figen && !pseudo_link && au_fbstart(file) == bstart) {
+ btop = au_dbtop(dentry);
+ pseudo_link = (btop != au_ibtop(inode));
+ if (sigen == figen && !pseudo_link && au_fbtop(file) == btop) {
if (!wlock) {
di_downgrade_lock(dentry, AuLock_IR);
fi_downgrade_lock(file);
diff --git a/fs/aufs/file.h b/fs/aufs/file.h
index 27d802487..96ab0a088 100644
--- a/fs/aufs/file.h
+++ b/fs/aufs/file.h
@@ -144,13 +144,13 @@ AuSimpleRwsemFuncs(fi, struct file *f, &au_fi(f)->fi_rwsem);
/* ---------------------------------------------------------------------- */
/* todo: hard/soft set? */
-static inline aufs_bindex_t au_fbstart(struct file *file)
+static inline aufs_bindex_t au_fbtop(struct file *file)
{
FiMustAnyLock(file);
return au_fi(file)->fi_btop;
}
-static inline aufs_bindex_t au_fbend_dir(struct file *file)
+static inline aufs_bindex_t au_fbbot_dir(struct file *file)
{
FiMustAnyLock(file);
AuDebugOn(!au_fi(file)->fi_hdir);
@@ -164,13 +164,13 @@ static inline struct au_vdir *au_fvdir_cache(struct file *file)
return au_fi(file)->fi_hdir->fd_vdir_cache;
}
-static inline void au_set_fbstart(struct file *file, aufs_bindex_t bindex)
+static inline void au_set_fbtop(struct file *file, aufs_bindex_t bindex)
{
FiMustWriteLock(file);
au_fi(file)->fi_btop = bindex;
}
-static inline void au_set_fbend_dir(struct file *file, aufs_bindex_t bindex)
+static inline void au_set_fbbot_dir(struct file *file, aufs_bindex_t bindex)
{
FiMustWriteLock(file);
AuDebugOn(!au_fi(file)->fi_hdir);
diff --git a/fs/aufs/finfo.c b/fs/aufs/finfo.c
index b5eb55dfb..07f6eb32d 100644
--- a/fs/aufs/finfo.c
+++ b/fs/aufs/finfo.c
@@ -15,7 +15,7 @@ void au_hfput(struct au_hfile *hf, struct file *file)
allow_write_access(hf->hf_file);
fput(hf->hf_file);
hf->hf_file = NULL;
- atomic_dec(&hf->hf_br->br_count);
+ au_br_put(hf->hf_br);
hf->hf_br = NULL;
}
@@ -55,7 +55,7 @@ struct au_fidir *au_fidir_alloc(struct super_block *sb)
struct au_fidir *fidir;
int nbr;
- nbr = au_sbend(sb) + 1;
+ nbr = au_sbbot(sb) + 1;
if (nbr < 2)
nbr = 2; /* initial allocate for 2 branches */
fidir = kzalloc(au_fidir_sz(nbr), GFP_NOFS);
@@ -105,10 +105,8 @@ void au_finfo_fin(struct file *file)
void au_fi_init_once(void *_finfo)
{
struct au_finfo *finfo = _finfo;
- static struct lock_class_key aufs_fi;
au_rw_init(&finfo->fi_rwsem);
- au_rw_class(&finfo->fi_rwsem, &aufs_fi);
}
int au_finfo_init(struct file *file, struct au_fidir *fidir)
@@ -125,11 +123,6 @@ int au_finfo_init(struct file *file, struct au_fidir *fidir)
err = 0;
au_nfiles_inc(dentry->d_sb);
- /* verbose coding for lock class name */
- if (!fidir)
- au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcNonDir_FIINFO);
- else
- au_rw_class(&finfo->fi_rwsem, au_lc_key + AuLcDir_FIINFO);
au_rw_write_lock(&finfo->fi_rwsem);
finfo->fi_btop = -1;
finfo->fi_hdir = fidir;
diff --git a/fs/aufs/hfsnotify.c b/fs/aufs/hfsnotify.c
index c0a1a63a9..07b4ecc53 100644
--- a/fs/aufs/hfsnotify.c
+++ b/fs/aufs/hfsnotify.c
@@ -50,8 +50,6 @@ static int au_hfsn_alloc(struct au_hinode *hinode)
lockdep_off();
err = fsnotify_add_mark(mark, br->br_hfsn->hfsn_group, hinode->hi_inode,
/*mnt*/NULL, /*allow_dups*/1);
- /* even if err */
- fsnotify_put_mark(mark);
lockdep_on();
return err;
@@ -73,6 +71,7 @@ static int au_hfsn_free(struct au_hinode *hinode, struct au_hnotify *hn)
spin_unlock(&mark->lock);
lockdep_off();
fsnotify_destroy_mark(mark, group);
+ fsnotify_put_mark(mark);
fsnotify_put_group(group);
lockdep_on();
diff --git a/fs/aufs/hnotify.c b/fs/aufs/hnotify.c
index 92e432b6a..3016c5e73 100644
--- a/fs/aufs/hnotify.c
+++ b/fs/aufs/hnotify.c
@@ -60,12 +60,12 @@ void au_hn_ctl(struct au_hinode *hinode, int do_set)
void au_hn_reset(struct inode *inode, unsigned int flags)
{
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct inode *hi;
struct dentry *iwhdentry;
- bend = au_ibend(inode);
- for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
+ bbot = au_ibbot(inode);
+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
hi = au_h_iptr(inode, bindex);
if (!hi)
continue;
@@ -89,7 +89,7 @@ void au_hn_reset(struct inode *inode, unsigned int flags)
static int hn_xino(struct inode *inode, struct inode *h_inode)
{
int err;
- aufs_bindex_t bindex, bend, bfound, bstart;
+ aufs_bindex_t bindex, bbot, bfound, btop;
struct inode *h_i;
err = 0;
@@ -99,15 +99,15 @@ static int hn_xino(struct inode *inode, struct inode *h_inode)
}
bfound = -1;
- bend = au_ibend(inode);
- bstart = au_ibstart(inode);
+ bbot = au_ibbot(inode);
+ btop = au_ibtop(inode);
#if 0 /* reserved for future use */
- if (bindex == bend) {
+ if (bindex == bbot) {
/* keep this ino in rename case */
goto out;
}
#endif
- for (bindex = bstart; bindex <= bend; bindex++)
+ for (bindex = btop; bindex <= bbot; bindex++)
if (au_h_iptr(inode, bindex) == h_inode) {
bfound = bindex;
break;
@@ -115,7 +115,7 @@ static int hn_xino(struct inode *inode, struct inode *h_inode)
if (bfound < 0)
goto out;
- for (bindex = bstart; bindex <= bend; bindex++) {
+ for (bindex = btop; bindex <= bbot; bindex++) {
h_i = au_h_iptr(inode, bindex);
if (!h_i)
continue;
@@ -420,7 +420,7 @@ static void au_hn_bh(void *_args)
{
struct au_hnotify_args *a = _args;
struct super_block *sb;
- aufs_bindex_t bindex, bend, bfound;
+ aufs_bindex_t bindex, bbot, bfound;
unsigned char xino, try_iput;
int err;
struct inode *inode;
@@ -451,8 +451,8 @@ static void au_hn_bh(void *_args)
ii_read_lock_parent(a->dir);
bfound = -1;
- bend = au_ibend(a->dir);
- for (bindex = au_ibstart(a->dir); bindex <= bend; bindex++)
+ bbot = au_ibbot(a->dir);
+ for (bindex = au_ibtop(a->dir); bindex <= bbot; bindex++)
if (au_h_iptr(a->dir, bindex) == a->h_dir) {
bfound = bindex;
break;
diff --git a/fs/aufs/i_op.c b/fs/aufs/i_op.c
index a70d5b050..1572a7d4d 100644
--- a/fs/aufs/i_op.c
+++ b/fs/aufs/i_op.c
@@ -74,7 +74,7 @@ out:
static int aufs_permission(struct inode *inode, int mask)
{
int err;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
const unsigned char isdir = !!S_ISDIR(inode->i_mode),
write_mask = !!(mask & (MAY_WRITE | MAY_APPEND));
struct inode *h_inode;
@@ -98,14 +98,14 @@ static int aufs_permission(struct inode *inode, int mask)
|| write_mask
|| au_opt_test(au_mntflags(sb), DIRPERM1)) {
err = au_busy_or_stale();
- h_inode = au_h_iptr(inode, au_ibstart(inode));
+ h_inode = au_h_iptr(inode, au_ibtop(inode));
if (unlikely(!h_inode
|| (h_inode->i_mode & S_IFMT)
!= (inode->i_mode & S_IFMT)))
goto out;
err = 0;
- bindex = au_ibstart(inode);
+ bindex = au_ibtop(inode);
br = au_sbr(sb, bindex);
err = h_permission(h_inode, mask, au_br_mnt(br), br->br_perm);
if (write_mask
@@ -124,8 +124,8 @@ static int aufs_permission(struct inode *inode, int mask)
/* non-write to dir */
err = 0;
- bend = au_ibend(inode);
- for (bindex = au_ibstart(inode); !err && bindex <= bend; bindex++) {
+ bbot = au_ibbot(inode);
+ for (bindex = au_ibtop(inode); !err && bindex <= bbot; bindex++) {
h_inode = au_h_iptr(inode, bindex);
if (h_inode) {
err = au_busy_or_stale();
@@ -184,7 +184,7 @@ static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
if (!err)
err = au_digen_test(parent, au_sigen(sb));
if (!err) {
- npositive = au_lkup_dentry(dentry, au_dbstart(parent),
+ npositive = au_lkup_dentry(dentry, au_dbtop(parent),
/*type*/0);
err = npositive;
}
@@ -226,18 +226,6 @@ static struct dentry *aufs_lookup(struct inode *dir, struct dentry *dentry,
out_unlock:
di_write_unlock(dentry);
- if (inode) {
- /* verbose coding for lock class name */
- if (unlikely(S_ISLNK(inode->i_mode)))
- au_rw_class(&au_di(dentry)->di_rwsem,
- au_lc_key + AuLcSymlink_DIINFO);
- else if (unlikely(S_ISDIR(inode->i_mode)))
- au_rw_class(&au_di(dentry)->di_rwsem,
- au_lc_key + AuLcDir_DIINFO);
- else /* likely */
- au_rw_class(&au_di(dentry)->di_rwsem,
- au_lc_key + AuLcNonDir_DIINFO);
- }
out_si:
si_read_unlock(sb);
out:
@@ -324,7 +312,7 @@ static int aufs_atomic_open(struct inode *dir, struct dentry *dentry,
parent = dentry->d_parent; /* dir is locked */
di_write_lock_parent(parent);
- err = au_lkup_dentry(dentry, /*bstart*/0, /*type*/0);
+ err = au_lkup_dentry(dentry, /*btop*/0, /*type*/0);
if (unlikely(err))
goto out_unlock;
@@ -391,7 +379,7 @@ out:
static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
const unsigned char add_entry, aufs_bindex_t bcpup,
- aufs_bindex_t bstart)
+ aufs_bindex_t btop)
{
int err;
struct dentry *h_parent;
@@ -404,9 +392,9 @@ static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
err = 0;
if (!au_h_dptr(parent, bcpup)) {
- if (bstart > bcpup)
+ if (btop > bcpup)
err = au_cpup_dirs(dentry, bcpup);
- else if (bstart < bcpup)
+ else if (btop < bcpup)
err = au_cpdown_dirs(dentry, bcpup);
else
BUG();
@@ -422,7 +410,7 @@ static int au_wr_dir_cpup(struct dentry *dentry, struct dentry *parent,
AuDbg("bcpup %d\n", bcpup);
if (!err) {
if (d_really_is_negative(dentry))
- au_set_h_dptr(dentry, bstart, NULL);
+ au_set_h_dptr(dentry, btop, NULL);
au_update_dbrange(dentry, /*do_put_zero*/0);
}
}
@@ -446,7 +434,7 @@ int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
{
int err;
unsigned int flags;
- aufs_bindex_t bcpup, bstart, src_bstart;
+ aufs_bindex_t bcpup, btop, src_btop;
const unsigned char add_entry
= au_ftest_wrdir(args->flags, ADD_ENTRY)
| au_ftest_wrdir(args->flags, TMPFILE);
@@ -457,13 +445,13 @@ int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
sb = dentry->d_sb;
sbinfo = au_sbi(sb);
parent = dget_parent(dentry);
- bstart = au_dbstart(dentry);
- bcpup = bstart;
+ btop = au_dbtop(dentry);
+ bcpup = btop;
if (args->force_btgt < 0) {
if (src_dentry) {
- src_bstart = au_dbstart(src_dentry);
- if (src_bstart < bstart)
- bcpup = src_bstart;
+ src_btop = au_dbtop(src_dentry);
+ if (src_btop < btop)
+ bcpup = src_btop;
} else if (add_entry) {
flags = 0;
if (au_ftest_wrdir(args->flags, ISDIR))
@@ -492,18 +480,18 @@ int au_wr_dir(struct dentry *dentry, struct dentry *src_dentry,
AuDebugOn(au_test_ro(sb, bcpup, d_inode(dentry)));
}
- AuDbg("bstart %d, bcpup %d\n", bstart, bcpup);
+ AuDbg("btop %d, bcpup %d\n", btop, bcpup);
err = bcpup;
- if (bcpup == bstart)
+ if (bcpup == btop)
goto out; /* success */
/* copyup the new parent into the branch we process */
- err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, bstart);
+ err = au_wr_dir_cpup(dentry, parent, add_entry, bcpup, btop);
if (err >= 0) {
if (d_really_is_negative(dentry)) {
- au_set_h_dptr(dentry, bstart, NULL);
- au_set_dbstart(dentry, bcpup);
- au_set_dbend(dentry, bcpup);
+ au_set_h_dptr(dentry, btop, NULL);
+ au_set_dbtop(dentry, bcpup);
+ au_set_dbbot(dentry, bcpup);
}
AuDebugOn(add_entry
&& !au_ftest_wrdir(args->flags, TMPFILE)
@@ -645,7 +633,7 @@ int au_do_pin(struct au_pin *p)
}
p->h_dentry = NULL;
- if (p->bindex <= au_dbend(p->dentry))
+ if (p->bindex <= au_dbbot(p->dentry))
p->h_dentry = au_h_dptr(p->dentry, p->bindex);
p->parent = dget_parent(p->dentry);
@@ -761,7 +749,7 @@ int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
{
int err;
loff_t sz;
- aufs_bindex_t bstart, ibstart;
+ aufs_bindex_t btop, ibtop;
struct dentry *hi_wh, *parent;
struct inode *inode;
struct au_wr_dir_args wr_dir_args = {
@@ -772,16 +760,16 @@ int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
if (d_is_dir(dentry))
au_fset_wrdir(wr_dir_args.flags, ISDIR);
/* plink or hi_wh() case */
- bstart = au_dbstart(dentry);
+ btop = au_dbtop(dentry);
inode = d_inode(dentry);
- ibstart = au_ibstart(inode);
- if (bstart != ibstart && !au_test_ro(inode->i_sb, ibstart, inode))
- wr_dir_args.force_btgt = ibstart;
+ ibtop = au_ibtop(inode);
+ if (btop != ibtop && !au_test_ro(inode->i_sb, ibtop, inode))
+ wr_dir_args.force_btgt = ibtop;
err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
if (unlikely(err < 0))
goto out;
a->btgt = err;
- if (err != bstart)
+ if (err != btop)
au_fset_icpup(a->flags, DID_CPUP);
err = 0;
@@ -797,8 +785,8 @@ int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
if (unlikely(err))
goto out_parent;
- a->h_path.dentry = au_h_dptr(dentry, bstart);
sz = -1;
+ a->h_path.dentry = au_h_dptr(dentry, btop);
a->h_inode = d_inode(a->h_path.dentry);
if (ia && (ia->ia_valid & ATTR_SIZE)) {
inode_lock_nested(a->h_inode, AuLsc_I_CHILD);
@@ -839,7 +827,7 @@ int au_pin_and_icpup(struct dentry *dentry, struct iattr *ia,
struct au_cp_generic cpg = {
.dentry = dentry,
.bdst = a->btgt,
- .bsrc = bstart,
+ .bsrc = btop,
.len = sz,
.pin = &a->pin,
.flags = AuCpup_DTIME | AuCpup_HOPEN
@@ -976,7 +964,7 @@ out_unlock:
inode_unlock(a->h_inode);
au_unpin(&a->pin);
if (unlikely(err))
- au_update_dbstart(dentry);
+ au_update_dbtop(dentry);
out_dentry:
di_write_unlock(dentry);
if (file) {
@@ -1073,7 +1061,7 @@ ssize_t au_srxattr(struct dentry *dentry, struct au_srxattr *arg)
au_unpin(&a->pin);
if (unlikely(err))
- au_update_dbstart(dentry);
+ au_update_dbtop(dentry);
out_di:
di_write_unlock(dentry);
@@ -1165,7 +1153,7 @@ int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
di_read_lock_child(dentry, AuLock_IR);
inode = d_inode(dentry);
- bindex = au_ibstart(inode);
+ bindex = au_ibtop(inode);
h_path->mnt = au_sbr_mnt(sb, bindex);
h_sb = h_path->mnt->mnt_sb;
if (!force
@@ -1173,7 +1161,7 @@ int au_h_path_getattr(struct dentry *dentry, int force, struct path *h_path)
&& udba_none)
goto out; /* success */
- if (au_dbstart(dentry) == bindex)
+ if (au_dbtop(dentry) == bindex)
h_path->dentry = au_h_dptr(dentry, bindex);
else if (au_opt_test(mnt_flags, PLINK) && au_plink_test(inode)) {
h_path->dentry = au_plink_lkup(inode, bindex);
@@ -1258,14 +1246,14 @@ static const char *aufs_get_link(struct dentry *dentry, struct inode *inode,
err = -EINVAL;
inode = d_inode(dentry);
- bindex = au_ibstart(inode);
+ bindex = au_ibtop(inode);
h_inode = au_h_iptr(inode, bindex);
if (unlikely(!h_inode->i_op->get_link))
goto out_unlock;
err = -EBUSY;
h_dentry = NULL;
- if (au_dbstart(dentry) <= bindex) {
+ if (au_dbtop(dentry) <= bindex) {
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry)
dget(h_dentry);
@@ -1311,7 +1299,7 @@ static int aufs_update_time(struct inode *inode, struct timespec *ts, int flags)
si_read_lock(sb, AuLock_FLUSH);
ii_write_lock_child(inode);
lockdep_on();
- h_inode = au_h_iptr(inode, au_ibstart(inode));
+ h_inode = au_h_iptr(inode, au_ibtop(inode));
err = vfsub_update_time(h_inode, ts, flags);
lockdep_off();
if (!err)
diff --git a/fs/aufs/i_op_add.c b/fs/aufs/i_op_add.c
index 533b36a2a..8e3fb61e8 100644
--- a/fs/aufs/i_op_add.c
+++ b/fs/aufs/i_op_add.c
@@ -167,7 +167,7 @@ lock_hdir_lkup_wh(struct dentry *dentry, struct au_dtime *dt,
h_parent = au_pinned_h_parent(pin);
if (udba != AuOpt_UDBA_NONE
- && au_dbstart(dentry) == bcpup)
+ && au_dbtop(dentry) == bcpup)
err = au_may_add(dentry, bcpup, h_parent,
au_ftest_wrdir(wr_dir_args->flags, ISDIR));
else if (unlikely(dentry->d_name.len > AUFS_MAX_NAMELEN))
@@ -229,7 +229,7 @@ static int add_simple(struct inode *dir, struct dentry *dentry,
struct simple_arg *arg)
{
int err, rerr;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
unsigned char created;
const unsigned char try_aopen
= (arg->type == Creat && arg->u.c.try_aopen);
@@ -272,10 +272,10 @@ static int add_simple(struct inode *dir, struct dentry *dentry,
if (IS_ERR(wh_dentry))
goto out_parent;
- bstart = au_dbstart(dentry);
+ btop = au_dbtop(dentry);
sb = dentry->d_sb;
- br = au_sbr(sb, bstart);
- a->h_path.dentry = au_h_dptr(dentry, bstart);
+ br = au_sbr(sb, btop);
+ a->h_path.dentry = au_h_dptr(dentry, btop);
a->h_path.mnt = au_br_mnt(br);
h_dir = au_pinned_h_dir(&a->pin);
switch (arg->type) {
@@ -300,7 +300,7 @@ static int add_simple(struct inode *dir, struct dentry *dentry,
}
created = !err;
if (!err)
- err = epilog(dir, bstart, wh_dentry, dentry);
+ err = epilog(dir, btop, wh_dentry, dentry);
/* revert */
if (unlikely(created && err && d_is_positive(a->h_path.dentry))) {
@@ -326,7 +326,7 @@ out_parent:
di_write_unlock(parent);
out_unlock:
if (unlikely(err)) {
- au_update_dbstart(dentry);
+ au_update_dbtop(dentry);
d_drop(dentry);
}
if (!try_aopen)
@@ -423,9 +423,9 @@ int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
if (unlikely(err))
goto out_parent;
- bindex = au_dbstart(parent);
- au_set_dbstart(dentry, bindex);
- au_set_dbend(dentry, bindex);
+ bindex = au_dbtop(parent);
+ au_set_dbtop(dentry, bindex);
+ au_set_dbbot(dentry, bindex);
err = au_wr_dir(dentry, /*src_dentry*/NULL, &wr_dir_args);
bindex = err;
if (unlikely(err < 0))
@@ -455,15 +455,15 @@ int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
if (unlikely(err))
goto out_dentry;
- au_set_dbstart(dentry, bindex);
- au_set_dbend(dentry, bindex);
+ au_set_dbtop(dentry, bindex);
+ au_set_dbbot(dentry, bindex);
au_set_h_dptr(dentry, bindex, dget(h_dentry));
inode = au_new_inode(dentry, /*must_new*/1);
if (IS_ERR(inode)) {
err = PTR_ERR(inode);
au_set_h_dptr(dentry, bindex, NULL);
- au_set_dbstart(dentry, -1);
- au_set_dbend(dentry, -1);
+ au_set_dbtop(dentry, -1);
+ au_set_dbbot(dentry, -1);
} else {
if (!inode->i_nlink)
set_nlink(inode, 1);
@@ -471,7 +471,7 @@ int aufs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
au_di(dentry)->di_tmpfile = 1;
/* update without i_mutex */
- if (au_ibstart(dir) == au_dbstart(dentry))
+ if (au_ibtop(dir) == au_dbtop(dentry))
au_cpup_attr_timesizes(dir);
}
@@ -483,15 +483,7 @@ out_parent:
di_write_unlock(parent);
dput(parent);
di_write_unlock(dentry);
- if (!err)
-#if 0
- /* verbose coding for lock class name */
- au_rw_class(&au_di(dentry)->di_rwsem,
- au_lc_key + AuLcNonDir_DIINFO);
-#else
- ;
-#endif
- else {
+ if (unlikely(err)) {
au_di_fin(dentry);
dentry->d_fsdata = NULL;
}
@@ -550,7 +542,7 @@ static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
{
int err;
unsigned char plink;
- aufs_bindex_t bend;
+ aufs_bindex_t bbot;
struct dentry *h_src_dentry;
struct inode *h_inode, *inode, *delegated;
struct super_block *sb;
@@ -560,13 +552,13 @@ static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
h_inode = NULL;
sb = src_dentry->d_sb;
inode = d_inode(src_dentry);
- if (au_ibstart(inode) <= a->bdst)
+ if (au_ibtop(inode) <= a->bdst)
h_inode = au_h_iptr(inode, a->bdst);
if (!h_inode || !h_inode->i_nlink) {
/* copyup src_dentry as the name of dentry. */
- bend = au_dbend(dentry);
- if (bend < a->bsrc)
- au_set_dbend(dentry, a->bsrc);
+ bbot = au_dbbot(dentry);
+ if (bbot < a->bsrc)
+ au_set_dbbot(dentry, a->bsrc);
au_set_h_dptr(dentry, a->bsrc,
dget(au_h_dptr(src_dentry, a->bsrc)));
dget(a->h_path.dentry);
@@ -601,7 +593,7 @@ static int au_cpup_or_link(struct dentry *src_dentry, struct dentry *dentry,
spin_unlock(&dentry->d_lock);
AuDbg("temporary d_inode...done\n");
au_set_h_dptr(dentry, a->bsrc, NULL);
- au_set_dbend(dentry, bend);
+ au_set_dbbot(dentry, bbot);
} else {
/* the inode of src_dentry already exists on a.bdst branch */
h_src_dentry = d_find_alias(h_inode);
@@ -679,7 +671,7 @@ int aufs_link(struct dentry *src_dentry, struct inode *dir,
goto out_unlock;
a->src_parent = dget_parent(src_dentry);
- wr_dir_args.force_btgt = au_ibstart(inode);
+ wr_dir_args.force_btgt = au_ibtop(inode);
di_write_lock_parent(a->parent);
wr_dir_args.force_btgt = au_wbr(dentry, wr_dir_args.force_btgt);
@@ -691,15 +683,15 @@ int aufs_link(struct dentry *src_dentry, struct inode *dir,
err = 0;
sb = dentry->d_sb;
- a->bdst = au_dbstart(dentry);
+ a->bdst = au_dbtop(dentry);
a->h_path.dentry = au_h_dptr(dentry, a->bdst);
a->h_path.mnt = au_sbr_mnt(sb, a->bdst);
- a->bsrc = au_ibstart(inode);
+ a->bsrc = au_ibtop(inode);
h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
if (!h_src_dentry && au_di(src_dentry)->di_tmpfile)
h_src_dentry = dget(au_hi_wh(inode, a->bsrc));
if (!h_src_dentry) {
- a->bsrc = au_dbstart(src_dentry);
+ a->bsrc = au_dbtop(src_dentry);
h_src_dentry = au_h_d_alias(src_dentry, a->bsrc);
AuDebugOn(!h_src_dentry);
} else if (IS_ERR(h_src_dentry)) {
@@ -799,7 +791,7 @@ out_parent:
dput(a->src_parent);
out_unlock:
if (unlikely(err)) {
- au_update_dbstart(dentry);
+ au_update_dbtop(dentry);
d_drop(dentry);
}
aufs_read_and_write_unlock2(dentry, src_dentry);
@@ -851,7 +843,7 @@ int aufs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
goto out_parent;
sb = dentry->d_sb;
- bindex = au_dbstart(dentry);
+ bindex = au_dbtop(dentry);
h_path.dentry = au_h_dptr(dentry, bindex);
h_path.mnt = au_sbr_mnt(sb, bindex);
err = vfsub_mkdir(au_pinned_h_dir(&a->pin), &h_path, mode);
@@ -908,7 +900,7 @@ out_parent:
di_write_unlock(parent);
out_unlock:
if (unlikely(err)) {
- au_update_dbstart(dentry);
+ au_update_dbtop(dentry);
d_drop(dentry);
}
aufs_read_unlock(dentry, AuLock_DW);
diff --git a/fs/aufs/i_op_del.c b/fs/aufs/i_op_del.c
index 68741aadb..bda70b008 100644
--- a/fs/aufs/i_op_del.c
+++ b/fs/aufs/i_op_del.c
@@ -21,25 +21,25 @@
int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
{
int need_wh, err;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
struct super_block *sb;
sb = dentry->d_sb;
- bstart = au_dbstart(dentry);
+ btop = au_dbtop(dentry);
if (*bcpup < 0) {
- *bcpup = bstart;
- if (au_test_ro(sb, bstart, d_inode(dentry))) {
+ *bcpup = btop;
+ if (au_test_ro(sb, btop, d_inode(dentry))) {
err = AuWbrCopyup(au_sbi(sb), dentry);
*bcpup = err;
if (unlikely(err < 0))
goto out;
}
} else
- AuDebugOn(bstart < *bcpup
+ AuDebugOn(btop < *bcpup
|| au_test_ro(sb, *bcpup, d_inode(dentry)));
- AuDbg("bcpup %d, bstart %d\n", *bcpup, bstart);
+ AuDbg("bcpup %d, btop %d\n", *bcpup, btop);
- if (*bcpup != bstart) {
+ if (*bcpup != btop) {
err = au_cpup_dirs(dentry, *bcpup);
if (unlikely(err))
goto out;
@@ -54,7 +54,7 @@ int au_wr_dir_need_wh(struct dentry *dentry, int isdir, aufs_bindex_t *bcpup)
au_di_cp(tmp, dinfo);
au_di_swap(tmp, dinfo);
/* returns the number of positive dentries */
- need_wh = au_lkup_dentry(dentry, bstart + 1, /*type*/0);
+ need_wh = au_lkup_dentry(dentry, btop + 1, /*type*/0);
au_di_swap(tmp, dinfo);
au_rw_write_unlock(&tmp->di_rwsem);
au_di_free(tmp);
@@ -165,7 +165,7 @@ lock_hdir_create_wh(struct dentry *dentry, int isdir, aufs_bindex_t *rbcpup,
h_path.dentry = au_pinned_h_parent(pin);
if (udba != AuOpt_UDBA_NONE
- && au_dbstart(dentry) == bcpup) {
+ && au_dbtop(dentry) == bcpup) {
err = au_may_del(dentry, bcpup, h_path.dentry, isdir);
wh_dentry = ERR_PTR(err);
if (unlikely(err))
@@ -287,7 +287,7 @@ static int do_revert(int err, struct inode *dir, aufs_bindex_t bindex,
int aufs_unlink(struct inode *dir, struct dentry *dentry)
{
int err;
- aufs_bindex_t bwh, bindex, bstart;
+ aufs_bindex_t bwh, bindex, btop;
struct inode *inode, *h_dir, *delegated;
struct dentry *parent, *wh_dentry;
/* to reuduce stack size */
@@ -316,7 +316,7 @@ int aufs_unlink(struct inode *dir, struct dentry *dentry)
if (unlikely(d_is_dir(dentry)))
goto out_unlock; /* possible? */
- bstart = au_dbstart(dentry);
+ btop = au_dbtop(dentry);
bwh = au_dbwh(dentry);
bindex = -1;
parent = dentry->d_parent; /* dir inode is locked */
@@ -327,10 +327,10 @@ int aufs_unlink(struct inode *dir, struct dentry *dentry)
if (IS_ERR(wh_dentry))
goto out_parent;
- a->h_path.mnt = au_sbr_mnt(dentry->d_sb, bstart);
- a->h_path.dentry = au_h_dptr(dentry, bstart);
+ a->h_path.mnt = au_sbr_mnt(dentry->d_sb, btop);
+ a->h_path.dentry = au_h_dptr(dentry, btop);
dget(a->h_path.dentry);
- if (bindex == bstart) {
+ if (bindex == btop) {
h_dir = au_pinned_h_dir(&a->pin);
delegated = NULL;
err = vfsub_unlink(h_dir, &a->h_path, &delegated, /*force*/0);
@@ -351,7 +351,7 @@ int aufs_unlink(struct inode *dir, struct dentry *dentry)
epilog(dir, dentry, bindex);
/* update target timestamps */
- if (bindex == bstart) {
+ if (bindex == btop) {
vfsub_update_h_iattr(&a->h_path, /*did*/NULL);
/*ignore*/
inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
@@ -388,7 +388,7 @@ out:
int aufs_rmdir(struct inode *dir, struct dentry *dentry)
{
int err, rmdir_later;
- aufs_bindex_t bwh, bindex, bstart;
+ aufs_bindex_t bwh, bindex, btop;
struct inode *inode;
struct dentry *parent, *wh_dentry, *h_dentry;
struct au_whtmp_rmdir *args;
@@ -428,7 +428,7 @@ int aufs_rmdir(struct inode *dir, struct dentry *dentry)
if (unlikely(err))
goto out_parent;
- bstart = au_dbstart(dentry);
+ btop = au_dbtop(dentry);
bwh = au_dbwh(dentry);
bindex = -1;
wh_dentry = lock_hdir_create_wh(dentry, /*isdir*/1, &bindex, &a->dt,
@@ -437,18 +437,18 @@ int aufs_rmdir(struct inode *dir, struct dentry *dentry)
if (IS_ERR(wh_dentry))
goto out_parent;
- h_dentry = au_h_dptr(dentry, bstart);
+ h_dentry = au_h_dptr(dentry, btop);
dget(h_dentry);
rmdir_later = 0;
- if (bindex == bstart) {
- err = renwh_and_rmdir(dentry, bstart, &args->whlist, dir);
+ if (bindex == btop) {
+ err = renwh_and_rmdir(dentry, btop, &args->whlist, dir);
if (err > 0) {
rmdir_later = err;
err = 0;
}
} else {
/* stop monitoring */
- au_hn_free(au_hi(inode, bstart));
+ au_hn_free(au_hi(inode, btop));
/* dir inode is locked */
IMustLock(d_inode(wh_dentry->d_parent));
@@ -461,7 +461,7 @@ int aufs_rmdir(struct inode *dir, struct dentry *dentry)
epilog(dir, dentry, bindex);
if (rmdir_later) {
- au_whtmp_kick_rmdir(dir, bstart, h_dentry, args);
+ au_whtmp_kick_rmdir(dir, btop, h_dentry, args);
args = NULL;
}
diff --git a/fs/aufs/i_op_ren.c b/fs/aufs/i_op_ren.c
index de4e03bf0..fdc299221 100644
--- a/fs/aufs/i_op_ren.c
+++ b/fs/aufs/i_op_ren.c
@@ -32,7 +32,7 @@ struct au_ren_args {
struct inode *dir, *inode;
struct au_hinode *hdir;
struct au_dtime dt[AuParentChild];
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
} sd[AuSrcDst];
#define src_dentry sd[AuSRC].dentry
@@ -45,7 +45,7 @@ struct au_ren_args {
#define src_hdir sd[AuSRC].hdir
#define src_h_dir sd[AuSRC].hdir->hi_inode
#define src_dt sd[AuSRC].dt
-#define src_bstart sd[AuSRC].bstart
+#define src_btop sd[AuSRC].btop
#define dst_dentry sd[AuDST].dentry
#define dst_dir sd[AuDST].dir
@@ -57,7 +57,7 @@ struct au_ren_args {
#define dst_hdir sd[AuDST].hdir
#define dst_h_dir sd[AuDST].hdir->hi_inode
#define dst_dt sd[AuDST].dt
-#define dst_bstart sd[AuDST].bstart
+#define dst_btop sd[AuDST].btop
struct dentry *h_trap;
struct au_branch *br;
@@ -189,12 +189,12 @@ static int au_ren_or_cpup(struct au_ren_args *a)
struct inode *delegated;
d = a->src_dentry;
- if (au_dbstart(d) == a->btgt) {
+ if (au_dbtop(d) == a->btgt) {
a->h_path.dentry = a->dst_h_dentry;
if (au_ftest_ren(a->flags, DIROPQ)
&& au_dbdiropq(d) == a->btgt)
au_fclr_ren(a->flags, DIROPQ);
- AuDebugOn(au_dbstart(d) != a->btgt);
+ AuDebugOn(au_dbtop(d) != a->btgt);
delegated = NULL;
err = vfsub_rename(a->src_h_dir, au_h_dptr(d, a->btgt),
a->dst_h_dir, &a->h_path, &delegated);
@@ -313,7 +313,7 @@ static int do_rename(struct au_ren_args *a)
a->dst_h_dentry = au_h_dptr(d, a->btgt);
}
- BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_bstart != a->btgt);
+ BUG_ON(d_is_positive(a->dst_h_dentry) && a->src_btop != a->btgt);
/* rename by vfs_rename or cpup */
d = a->dst_dentry;
@@ -321,7 +321,7 @@ static int do_rename(struct au_ren_args *a)
&& (a->dst_wh_dentry
|| au_dbdiropq(d) == a->btgt
/* hide the lower to keep xino */
- || a->btgt < au_dbend(d)
+ || a->btgt < au_dbbot(d)
|| au_opt_test(au_mntflags(d->d_sb), ALWAYS_DIROPQ)))
au_fset_ren(a->flags, DIROPQ);
err = au_ren_or_cpup(a);
@@ -337,7 +337,7 @@ static int do_rename(struct au_ren_args *a)
}
/* update target timestamps */
- AuDebugOn(au_dbstart(a->src_dentry) != a->btgt);
+ AuDebugOn(au_dbtop(a->src_dentry) != a->btgt);
a->h_path.dentry = au_h_dptr(a->src_dentry, a->btgt);
vfsub_update_h_iattr(&a->h_path, /*did*/NULL); /*ignore*/
a->src_inode->i_ctime = d_inode(a->h_path.dentry)->i_ctime;
@@ -410,10 +410,10 @@ static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
{
int err;
unsigned int rdhash;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
- bstart = au_dbstart(dentry);
- if (bstart != btgt) {
+ btop = au_dbtop(dentry);
+ if (btop != btgt) {
struct au_nhash whlist;
SiMustAnyLock(dentry->d_sb);
@@ -429,7 +429,7 @@ static int may_rename_srcdir(struct dentry *dentry, aufs_bindex_t btgt)
goto out;
}
- if (bstart == au_dbtaildir(dentry))
+ if (btop == au_dbtaildir(dentry))
return 0; /* success */
err = au_test_empty_lower(dentry);
@@ -462,16 +462,16 @@ static int au_ren_may_dir(struct au_ren_args *a)
if (unlikely(err))
goto out;
- au_set_dbstart(d, a->dst_bstart);
+ au_set_dbtop(d, a->dst_btop);
err = may_rename_dstdir(d, &a->whlist);
- au_set_dbstart(d, a->btgt);
+ au_set_dbtop(d, a->btgt);
}
- a->dst_h_dentry = au_h_dptr(d, au_dbstart(d));
+ a->dst_h_dentry = au_h_dptr(d, au_dbtop(d));
if (unlikely(err))
goto out;
d = a->src_dentry;
- a->src_h_dentry = au_h_dptr(d, au_dbstart(d));
+ a->src_h_dentry = au_h_dptr(d, au_dbtop(d));
if (au_ftest_ren(a->flags, ISDIR)) {
err = may_rename_srcdir(d, a->btgt);
if (unlikely(err)) {
@@ -494,7 +494,7 @@ static int au_may_ren(struct au_ren_args *a)
int err, isdir;
struct inode *h_inode;
- if (a->src_bstart == a->btgt) {
+ if (a->src_btop == a->btgt) {
err = au_may_del(a->src_dentry, a->btgt, a->src_h_parent,
au_ftest_ren(a->flags, ISDIR));
if (unlikely(err))
@@ -505,7 +505,7 @@ static int au_may_ren(struct au_ren_args *a)
}
err = 0;
- if (a->dst_bstart != a->btgt)
+ if (a->dst_btop != a->btgt)
goto out;
err = -ENOTEMPTY;
@@ -588,11 +588,11 @@ static int au_ren_lock(struct au_ren_args *a)
if (unlikely(a->src_hdir->hi_inode != d_inode(a->src_h_parent)
|| a->dst_hdir->hi_inode != d_inode(a->dst_h_parent)))
err = au_busy_or_stale();
- if (!err && au_dbstart(a->src_dentry) == a->btgt)
+ if (!err && au_dbtop(a->src_dentry) == a->btgt)
err = au_h_verify(a->src_h_dentry, udba,
d_inode(a->src_h_parent), a->src_h_parent,
a->br);
- if (!err && au_dbstart(a->dst_dentry) == a->btgt)
+ if (!err && au_dbtop(a->dst_dentry) == a->btgt)
err = au_h_verify(a->dst_h_dentry, udba,
d_inode(a->dst_h_parent), a->dst_h_parent,
a->br);
@@ -634,7 +634,7 @@ static void au_ren_refresh_dir(struct au_ren_args *a)
static void au_ren_refresh(struct au_ren_args *a)
{
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
struct dentry *d, *h_d;
struct inode *i, *h_i;
struct super_block *sb;
@@ -655,32 +655,32 @@ static void au_ren_refresh(struct au_ren_args *a)
}
au_update_dbrange(d, /*do_put_zero*/1);
} else {
- bend = a->btgt;
- for (bindex = au_dbstart(d); bindex < bend; bindex++)
+ bbot = a->btgt;
+ for (bindex = au_dbtop(d); bindex < bbot; bindex++)
au_set_h_dptr(d, bindex, NULL);
- bend = au_dbend(d);
- for (bindex = a->btgt + 1; bindex <= bend; bindex++)
+ bbot = au_dbbot(d);
+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++)
au_set_h_dptr(d, bindex, NULL);
au_update_dbrange(d, /*do_put_zero*/0);
}
d = a->src_dentry;
au_set_dbwh(d, -1);
- bend = au_dbend(d);
- for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
+ bbot = au_dbbot(d);
+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
h_d = au_h_dptr(d, bindex);
if (h_d)
au_set_h_dptr(d, bindex, NULL);
}
- au_set_dbend(d, a->btgt);
+ au_set_dbbot(d, a->btgt);
sb = d->d_sb;
i = a->src_inode;
if (au_opt_test(au_mntflags(sb), PLINK) && au_plink_test(i))
return; /* success */
- bend = au_ibend(i);
- for (bindex = a->btgt + 1; bindex <= bend; bindex++) {
+ bbot = au_ibbot(i);
+ for (bindex = a->btgt + 1; bindex <= bbot; bindex++) {
h_i = au_h_iptr(i, bindex);
if (h_i) {
au_xino_write(sb, bindex, h_i->i_ino, /*ino*/0);
@@ -688,7 +688,7 @@ static void au_ren_refresh(struct au_ren_args *a)
au_set_h_iptr(i, bindex, NULL, 0);
}
}
- au_set_ibend(i, a->btgt);
+ au_set_ibbot(i, a->btgt);
}
/* ---------------------------------------------------------------------- */
@@ -715,7 +715,7 @@ int au_wbr(struct dentry *dentry, aufs_bindex_t btgt)
return btgt;
}
-/* sets src_bstart, dst_bstart and btgt */
+/* sets src_btop, dst_btop and btgt */
static int au_ren_wbr(struct au_ren_args *a)
{
int err;
@@ -724,13 +724,13 @@ static int au_ren_wbr(struct au_ren_args *a)
.flags = AuWrDir_ADD_ENTRY
};
- a->src_bstart = au_dbstart(a->src_dentry);
- a->dst_bstart = au_dbstart(a->dst_dentry);
+ a->src_btop = au_dbtop(a->src_dentry);
+ a->dst_btop = au_dbtop(a->dst_dentry);
if (au_ftest_ren(a->flags, ISDIR))
au_fset_wrdir(wr_dir_args.flags, ISDIR);
- wr_dir_args.force_btgt = a->src_bstart;
- if (a->dst_inode && a->dst_bstart < a->src_bstart)
- wr_dir_args.force_btgt = a->dst_bstart;
+ wr_dir_args.force_btgt = a->src_btop;
+ if (a->dst_inode && a->dst_btop < a->src_btop)
+ wr_dir_args.force_btgt = a->dst_btop;
wr_dir_args.force_btgt = au_wbr(a->dst_dentry, wr_dir_args.force_btgt);
err = au_wr_dir(a->dst_dentry, a->src_dentry, &wr_dir_args);
a->btgt = err;
@@ -880,7 +880,7 @@ int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
goto out_children;
/* prepare the writable parent dir on the same branch */
- if (a->dst_bstart == a->btgt) {
+ if (a->dst_btop == a->btgt) {
au_fset_ren(a->flags, WHDST);
} else {
err = au_cpup_dirs(a->dst_dentry, a->btgt);
@@ -911,7 +911,7 @@ int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
au_fset_ren(a->flags, WHSRC);
/* cpup src */
- if (a->src_bstart != a->btgt) {
+ if (a->src_btop != a->btgt) {
struct au_pin pin;
err = au_pin(&pin, a->src_dentry, a->btgt,
@@ -921,18 +921,18 @@ int aufs_rename(struct inode *_src_dir, struct dentry *_src_dentry,
struct au_cp_generic cpg = {
.dentry = a->src_dentry,
.bdst = a->btgt,
- .bsrc = a->src_bstart,
+ .bsrc = a->src_btop,
.len = -1,
.pin = &pin,
.flags = AuCpup_DTIME | AuCpup_HOPEN
};
- AuDebugOn(au_dbstart(a->src_dentry) != a->src_bstart);
+ AuDebugOn(au_dbtop(a->src_dentry) != a->src_btop);
err = au_sio_cpup_simple(&cpg);
au_unpin(&pin);
}
if (unlikely(err))
goto out_children;
- a->src_bstart = a->btgt;
+ a->src_btop = a->btgt;
a->src_h_dentry = au_h_dptr(a->src_dentry, a->btgt);
au_fset_ren(a->flags, WHSRC);
}
@@ -972,16 +972,16 @@ out_hdir:
au_ren_unlock(a);
out_children:
au_nhash_wh_free(&a->whlist);
- if (err && a->dst_inode && a->dst_bstart != a->btgt) {
- AuDbg("bstart %d, btgt %d\n", a->dst_bstart, a->btgt);
+ if (err && a->dst_inode && a->dst_btop != a->btgt) {
+ AuDbg("btop %d, btgt %d\n", a->dst_btop, a->btgt);
au_set_h_dptr(a->dst_dentry, a->btgt, NULL);
- au_set_dbstart(a->dst_dentry, a->dst_bstart);
+ au_set_dbtop(a->dst_dentry, a->dst_btop);
}
out_parent:
if (!err)
d_move(a->src_dentry, a->dst_dentry);
else {
- au_update_dbstart(a->dst_dentry);
+ au_update_dbtop(a->dst_dentry);
if (!a->dst_inode)
d_drop(a->dst_dentry);
}
diff --git a/fs/aufs/iinfo.c b/fs/aufs/iinfo.c
index 67ef672a0..3a10d8f13 100644
--- a/fs/aufs/iinfo.c
+++ b/fs/aufs/iinfo.c
@@ -11,10 +11,12 @@
struct inode *au_h_iptr(struct inode *inode, aufs_bindex_t bindex)
{
struct inode *h_inode;
+ struct au_hinode *hinode;
IiMustAnyLock(inode);
- h_inode = au_ii(inode)->ii_hinode[0 + bindex].hi_inode;
+ hinode = au_hinode(au_ii(inode), bindex);
+ h_inode = hinode->hi_inode;
AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
return h_inode;
}
@@ -49,7 +51,7 @@ void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
IiMustWriteLock(inode);
- hinode = iinfo->ii_hinode + bindex;
+ hinode = au_hinode(iinfo, bindex);
hi = hinode->hi_inode;
AuDebugOn(h_inode && atomic_read(&h_inode->i_count) <= 0);
@@ -64,7 +66,7 @@ void au_set_h_iptr(struct inode *inode, aufs_bindex_t bindex,
AuDebugOn(inode->i_mode
&& (h_inode->i_mode & S_IFMT)
!= (inode->i_mode & S_IFMT));
- if (bindex == iinfo->ii_bstart)
+ if (bindex == iinfo->ii_btop)
au_cpup_igen(inode, h_inode);
br = au_sbr(sb, bindex);
hinode->hi_id = br->br_id;
@@ -91,7 +93,7 @@ void au_set_hi_wh(struct inode *inode, aufs_bindex_t bindex,
IiMustWriteLock(inode);
- hinode = au_ii(inode)->ii_hinode + bindex;
+ hinode = au_hinode(au_ii(inode), bindex);
AuDebugOn(hinode->hi_whdentry);
hinode->hi_whdentry = h_wh;
}
@@ -118,20 +120,18 @@ void au_update_iigen(struct inode *inode, int half)
void au_update_ibrange(struct inode *inode, int do_put_zero)
{
struct au_iinfo *iinfo;
- aufs_bindex_t bindex, bend;
-
- iinfo = au_ii(inode);
- if (!iinfo)
- return;
+ aufs_bindex_t bindex, bbot;
+ AuDebugOn(is_bad_inode(inode));
IiMustWriteLock(inode);
- if (do_put_zero && iinfo->ii_bstart >= 0) {
- for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
+ iinfo = au_ii(inode);
+ if (do_put_zero && iinfo->ii_btop >= 0) {
+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
bindex++) {
struct inode *h_i;
- h_i = iinfo->ii_hinode[0 + bindex].hi_inode;
+ h_i = au_hinode(iinfo, bindex)->hi_inode;
if (h_i
&& !h_i->i_nlink
&& !(h_i->i_state & I_LINKABLE))
@@ -139,21 +139,21 @@ void au_update_ibrange(struct inode *inode, int do_put_zero)
}
}
- iinfo->ii_bstart = -1;
- iinfo->ii_bend = -1;
- bend = au_sbend(inode->i_sb);
- for (bindex = 0; bindex <= bend; bindex++)
- if (iinfo->ii_hinode[0 + bindex].hi_inode) {
- iinfo->ii_bstart = bindex;
+ iinfo->ii_btop = -1;
+ iinfo->ii_bbot = -1;
+ bbot = au_sbbot(inode->i_sb);
+ for (bindex = 0; bindex <= bbot; bindex++)
+ if (au_hinode(iinfo, bindex)->hi_inode) {
+ iinfo->ii_btop = bindex;
break;
}
- if (iinfo->ii_bstart >= 0)
- for (bindex = bend; bindex >= iinfo->ii_bstart; bindex--)
- if (iinfo->ii_hinode[0 + bindex].hi_inode) {
- iinfo->ii_bend = bindex;
+ if (iinfo->ii_btop >= 0)
+ for (bindex = bbot; bindex >= iinfo->ii_btop; bindex--)
+ if (au_hinode(iinfo, bindex)->hi_inode) {
+ iinfo->ii_bbot = bindex;
break;
}
- AuDebugOn(iinfo->ii_bstart > iinfo->ii_bend);
+ AuDebugOn(iinfo->ii_btop > iinfo->ii_bbot);
}
/* ---------------------------------------------------------------------- */
@@ -162,14 +162,20 @@ void au_icntnr_init_once(void *_c)
{
struct au_icntnr *c = _c;
struct au_iinfo *iinfo = &c->iinfo;
- static struct lock_class_key aufs_ii;
spin_lock_init(&iinfo->ii_generation.ig_spin);
au_rw_init(&iinfo->ii_rwsem);
- au_rw_class(&iinfo->ii_rwsem, &aufs_ii);
inode_init_once(&c->vfs_inode);
}
+void au_hinode_init(struct au_hinode *hinode)
+{
+ hinode->hi_inode = NULL;
+ hinode->hi_id = -1;
+ au_hn_init(hinode);
+ hinode->hi_whdentry = NULL;
+}
+
int au_iinfo_init(struct inode *inode)
{
struct au_iinfo *iinfo;
@@ -178,37 +184,37 @@ int au_iinfo_init(struct inode *inode)
sb = inode->i_sb;
iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
- nbr = au_sbend(sb) + 1;
+ nbr = au_sbbot(sb) + 1;
if (unlikely(nbr <= 0))
nbr = 1;
- iinfo->ii_hinode = kcalloc(nbr, sizeof(*iinfo->ii_hinode), GFP_NOFS);
+ iinfo->ii_hinode = kmalloc_array(nbr, sizeof(*iinfo->ii_hinode),
+ GFP_NOFS);
if (iinfo->ii_hinode) {
au_ninodes_inc(sb);
for (i = 0; i < nbr; i++)
- iinfo->ii_hinode[i].hi_id = -1;
+ au_hinode_init(iinfo->ii_hinode + i);
iinfo->ii_generation.ig_generation = au_sigen(sb);
- iinfo->ii_bstart = -1;
- iinfo->ii_bend = -1;
+ iinfo->ii_btop = -1;
+ iinfo->ii_bbot = -1;
iinfo->ii_vdir = NULL;
return 0;
}
return -ENOMEM;
}
-int au_ii_realloc(struct au_iinfo *iinfo, int nbr)
+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr)
{
- int err, sz;
+ int err, i;
struct au_hinode *hip;
AuRwMustWriteLock(&iinfo->ii_rwsem);
err = -ENOMEM;
- sz = sizeof(*hip) * (iinfo->ii_bend + 1);
- if (!sz)
- sz = sizeof(*hip);
- hip = au_kzrealloc(iinfo->ii_hinode, sz, sizeof(*hip) * nbr, GFP_NOFS);
+ hip = krealloc(iinfo->ii_hinode, sizeof(*hip) * nbr, GFP_NOFS);
if (hip) {
+ for (i = iinfo->ii_bbot + 1; i < nbr; i++)
+ au_hinode_init(hip + i);
iinfo->ii_hinode = hip;
err = 0;
}
@@ -221,13 +227,10 @@ void au_iinfo_fin(struct inode *inode)
struct au_iinfo *iinfo;
struct au_hinode *hi;
struct super_block *sb;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
const unsigned char unlinked = !inode->i_nlink;
- iinfo = au_ii(inode);
- /* bad_inode case */
- if (!iinfo)
- return;
+ AuDebugOn(is_bad_inode(inode));
sb = inode->i_sb;
au_ninodes_dec(sb);
@@ -245,20 +248,20 @@ void au_iinfo_fin(struct inode *inode)
lockdep_on();
}
+ iinfo = au_ii(inode);
if (iinfo->ii_vdir)
au_vdir_free(iinfo->ii_vdir);
- bindex = iinfo->ii_bstart;
+ bindex = iinfo->ii_btop;
if (bindex >= 0) {
- hi = iinfo->ii_hinode + bindex;
- bend = iinfo->ii_bend;
- while (bindex++ <= bend) {
+ hi = au_hinode(iinfo, bindex);
+ bbot = iinfo->ii_bbot;
+ while (bindex++ <= bbot) {
if (hi->hi_inode)
au_hiput(hi);
hi++;
}
}
kfree(iinfo->ii_hinode);
- iinfo->ii_hinode = NULL;
AuRwDestroy(&iinfo->ii_rwsem);
}
diff --git a/fs/aufs/inode.c b/fs/aufs/inode.c
index 5a87727ba..d599b4c80 100644
--- a/fs/aufs/inode.c
+++ b/fs/aufs/inode.c
@@ -34,19 +34,20 @@ static int au_ii_refresh(struct inode *inode, int *update)
struct au_iinfo *iinfo;
struct au_hinode *p, *q, tmp;
+ AuDebugOn(is_bad_inode(inode));
IiMustWriteLock(inode);
*update = 0;
sb = inode->i_sb;
type = inode->i_mode & S_IFMT;
iinfo = au_ii(inode);
- err = au_ii_realloc(iinfo, au_sbend(sb) + 1);
+ err = au_hinode_realloc(iinfo, au_sbbot(sb) + 1);
if (unlikely(err))
goto out;
- AuDebugOn(iinfo->ii_bstart < 0);
- p = iinfo->ii_hinode + iinfo->ii_bstart;
- for (bindex = iinfo->ii_bstart; bindex <= iinfo->ii_bend;
+ AuDebugOn(iinfo->ii_btop < 0);
+ p = au_hinode(iinfo, iinfo->ii_btop);
+ for (bindex = iinfo->ii_btop; bindex <= iinfo->ii_bbot;
bindex++, p++) {
if (!p->hi_inode)
continue;
@@ -63,12 +64,12 @@ static int au_ii_refresh(struct inode *inode, int *update)
continue;
}
- if (new_bindex < iinfo->ii_bstart)
- iinfo->ii_bstart = new_bindex;
- if (iinfo->ii_bend < new_bindex)
- iinfo->ii_bend = new_bindex;
+ if (new_bindex < iinfo->ii_btop)
+ iinfo->ii_btop = new_bindex;
+ if (iinfo->ii_bbot < new_bindex)
+ iinfo->ii_bbot = new_bindex;
/* swap two lower inode, and loop again */
- q = iinfo->ii_hinode + new_bindex;
+ q = au_hinode(iinfo, new_bindex);
tmp = *q;
*q = *p;
*p = tmp;
@@ -130,7 +131,7 @@ int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
int err, e, update;
unsigned int flags;
umode_t mode;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
unsigned char isdir;
struct au_hinode *p;
struct au_iinfo *iinfo;
@@ -141,12 +142,12 @@ int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
update = 0;
iinfo = au_ii(inode);
- p = iinfo->ii_hinode + iinfo->ii_bstart;
+ p = au_hinode(iinfo, iinfo->ii_btop);
mode = (inode->i_mode & S_IFMT);
isdir = S_ISDIR(mode);
flags = au_hi_flags(inode, isdir);
- bend = au_dbend(dentry);
- for (bindex = au_dbstart(dentry); bindex <= bend; bindex++) {
+ bbot = au_dbbot(dentry);
+ for (bindex = au_dbtop(dentry); bindex <= bbot; bindex++) {
struct inode *h_i, *h_inode;
struct dentry *h_d;
@@ -156,7 +157,7 @@ int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
h_inode = d_inode(h_d);
AuDebugOn(mode != (h_inode->i_mode & S_IFMT));
- if (iinfo->ii_bstart <= bindex && bindex <= iinfo->ii_bend) {
+ if (iinfo->ii_btop <= bindex && bindex <= iinfo->ii_bbot) {
h_i = au_h_iptr(inode, bindex);
if (h_i) {
if (h_i == h_inode)
@@ -165,10 +166,10 @@ int au_refresh_hinode(struct inode *inode, struct dentry *dentry)
break;
}
}
- if (bindex < iinfo->ii_bstart)
- iinfo->ii_bstart = bindex;
- if (iinfo->ii_bend < bindex)
- iinfo->ii_bend = bindex;
+ if (bindex < iinfo->ii_btop)
+ iinfo->ii_btop = bindex;
+ if (iinfo->ii_bbot < bindex)
+ iinfo->ii_bbot = bindex;
au_set_h_iptr(inode, bindex, au_igrab(h_inode), flags);
update = 1;
}
@@ -189,7 +190,7 @@ static int set_inode(struct inode *inode, struct dentry *dentry)
int err;
unsigned int flags;
umode_t mode;
- aufs_bindex_t bindex, bstart, btail;
+ aufs_bindex_t bindex, btop, btail;
unsigned char isdir;
struct dentry *h_dentry;
struct inode *h_inode;
@@ -201,8 +202,8 @@ static int set_inode(struct inode *inode, struct dentry *dentry)
err = 0;
isdir = 0;
iop = au_sbi(inode->i_sb)->si_iop_array;
- bstart = au_dbstart(dentry);
- h_dentry = au_h_dptr(dentry, bstart);
+ btop = au_dbtop(dentry);
+ h_dentry = au_h_dptr(dentry, btop);
h_inode = d_inode(h_dentry);
mode = h_inode->i_mode;
switch (mode & S_IFMT) {
@@ -210,7 +211,7 @@ static int set_inode(struct inode *inode, struct dentry *dentry)
btail = au_dbtail(dentry);
inode->i_op = iop + AuIop_OTHER;
inode->i_fop = &aufs_file_fop;
- err = au_dy_iaop(inode, bstart, h_inode);
+ err = au_dy_iaop(inode, btop, h_inode);
if (unlikely(err))
goto out;
break;
@@ -246,9 +247,9 @@ static int set_inode(struct inode *inode, struct dentry *dentry)
&& !memcmp(dentry->d_name.name, AUFS_WH_PFX, AUFS_WH_PFX_LEN))
au_fclr_hi(flags, HNOTIFY);
iinfo = au_ii(inode);
- iinfo->ii_bstart = bstart;
- iinfo->ii_bend = btail;
- for (bindex = bstart; bindex <= btail; bindex++) {
+ iinfo->ii_btop = btop;
+ iinfo->ii_bbot = btail;
+ for (bindex = btop; bindex <= btail; bindex++) {
h_dentry = au_h_dptr(dentry, bindex);
if (h_dentry)
au_set_h_iptr(inode, bindex,
@@ -274,7 +275,7 @@ static int reval_inode(struct inode *inode, struct dentry *dentry)
{
int err;
unsigned int gen, igflags;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct inode *h_inode, *h_dinode;
struct dentry *h_dentry;
@@ -289,10 +290,10 @@ static int reval_inode(struct inode *inode, struct dentry *dentry)
err = 1;
ii_write_lock_new_child(inode);
- h_dentry = au_h_dptr(dentry, au_dbstart(dentry));
+ h_dentry = au_h_dptr(dentry, au_dbtop(dentry));
h_dinode = d_inode(h_dentry);
- bend = au_ibend(inode);
- for (bindex = au_ibstart(inode); bindex <= bend; bindex++) {
+ bbot = au_ibbot(inode);
+ for (bindex = au_ibtop(inode); bindex <= bbot; bindex++) {
h_inode = au_h_iptr(inode, bindex);
if (!h_inode || h_inode != h_dinode)
continue;
@@ -358,11 +359,11 @@ struct inode *au_new_inode(struct dentry *dentry, int must_new)
struct mutex *mtx;
ino_t h_ino, ino;
int err;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
sb = dentry->d_sb;
- bstart = au_dbstart(dentry);
- h_dentry = au_h_dptr(dentry, bstart);
+ btop = au_dbtop(dentry);
+ h_dentry = au_h_dptr(dentry, btop);
h_inode = d_inode(h_dentry);
h_ino = h_inode->i_ino;
@@ -372,12 +373,12 @@ struct inode *au_new_inode(struct dentry *dentry, int must_new)
*/
mtx = NULL;
if (!d_is_dir(h_dentry))
- mtx = &au_sbr(sb, bstart)->br_xino.xi_nondir_mtx;
+ mtx = &au_sbr(sb, btop)->br_xino.xi_nondir_mtx;
new_ino:
if (mtx)
mutex_lock(mtx);
- err = au_xino_read(sb, bstart, h_ino, &ino);
+ err = au_xino_read(sb, btop, h_ino, &ino);
inode = ERR_PTR(err);
if (unlikely(err))
goto out;
@@ -398,17 +399,6 @@ new_ino:
AuDbg("%lx, new %d\n", inode->i_state, !!(inode->i_state & I_NEW));
if (inode->i_state & I_NEW) {
- /* verbose coding for lock class name */
- if (unlikely(d_is_symlink(h_dentry)))
- au_rw_class(&au_ii(inode)->ii_rwsem,
- au_lc_key + AuLcSymlink_IIINFO);
- else if (unlikely(d_is_dir(h_dentry)))
- au_rw_class(&au_ii(inode)->ii_rwsem,
- au_lc_key + AuLcDir_IIINFO);
- else /* likely */
- au_rw_class(&au_ii(inode)->ii_rwsem,
- au_lc_key + AuLcNonDir_IIINFO);
-
ii_write_lock_new_child(inode);
err = set_inode(inode, dentry);
if (!err) {
@@ -424,7 +414,7 @@ new_ino:
atomic_inc(&inode->i_count);
iget_failed(inode);
ii_write_unlock(inode);
- au_xino_write(sb, bstart, h_ino, /*ino*/0);
+ au_xino_write(sb, btop, h_ino, /*ino*/0);
/* ignore this error */
goto out_iput;
} else if (!must_new && !IS_DEADDIR(inode) && inode->i_nlink) {
@@ -450,10 +440,10 @@ new_ino:
if (unlikely(au_test_fs_unique_ino(h_inode)))
AuWarn1("Warning: Un-notified UDBA or repeatedly renamed dir,"
" b%d, %s, %pd, hi%lu, i%lu.\n",
- bstart, au_sbtype(h_dentry->d_sb), dentry,
+ btop, au_sbtype(h_dentry->d_sb), dentry,
(unsigned long)h_ino, (unsigned long)ino);
ino = 0;
- err = au_xino_write(sb, bstart, h_ino, /*ino*/0);
+ err = au_xino_write(sb, btop, h_ino, /*ino*/0);
if (!err) {
iput(inode);
if (mtx)
@@ -483,8 +473,8 @@ int au_test_ro(struct super_block *sb, aufs_bindex_t bindex,
/* pseudo-link after flushed may happen out of bounds */
if (!err
&& inode
- && au_ibstart(inode) <= bindex
- && bindex <= au_ibend(inode)) {
+ && au_ibtop(inode) <= bindex
+ && bindex <= au_ibbot(inode)) {
/*
* permission check is unnecessary since vfsub routine
* will be called later
diff --git a/fs/aufs/inode.h b/fs/aufs/inode.h
index e694be498..0d654e83a 100644
--- a/fs/aufs/inode.h
+++ b/fs/aufs/inode.h
@@ -56,7 +56,7 @@ struct au_iinfo {
struct super_block *ii_hsb1; /* no get/put */
struct au_rwsem ii_rwsem;
- aufs_bindex_t ii_bstart, ii_bend;
+ aufs_bindex_t ii_btop, ii_bbot;
__u32 ii_higen;
struct au_hinode *ii_hinode;
struct au_vdir *ii_vdir;
@@ -106,12 +106,8 @@ void au_pin_hdir_release(struct au_pin *p);
static inline struct au_iinfo *au_ii(struct inode *inode)
{
- struct au_iinfo *iinfo;
-
- iinfo = &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
- if (iinfo->ii_hinode)
- return iinfo;
- return NULL; /* debugging bad_inode case */
+ BUG_ON(is_bad_inode(inode));
+ return &(container_of(inode, struct au_icntnr, vfs_inode)->iinfo);
}
/* ---------------------------------------------------------------------- */
@@ -254,9 +250,10 @@ void au_update_iigen(struct inode *inode, int half);
void au_update_ibrange(struct inode *inode, int do_put_zero);
void au_icntnr_init_once(void *_c);
+void au_hinode_init(struct au_hinode *hinode);
int au_iinfo_init(struct inode *inode);
void au_iinfo_fin(struct inode *inode);
-int au_ii_realloc(struct au_iinfo *iinfo, int nbr);
+int au_hinode_realloc(struct au_iinfo *iinfo, int nbr);
#ifdef CONFIG_PROC_FS
/* plink.c */
@@ -464,23 +461,29 @@ static inline int au_iigen_test(struct inode *inode, unsigned int sigen)
/* ---------------------------------------------------------------------- */
+static inline struct au_hinode *au_hinode(struct au_iinfo *iinfo,
+ aufs_bindex_t bindex)
+{
+ return iinfo->ii_hinode + bindex;
+}
+
static inline aufs_bindex_t au_ii_br_id(struct inode *inode,
aufs_bindex_t bindex)
{
IiMustAnyLock(inode);
- return au_ii(inode)->ii_hinode[0 + bindex].hi_id;
+ return au_hinode(au_ii(inode), bindex)->hi_id;
}
-static inline aufs_bindex_t au_ibstart(struct inode *inode)
+static inline aufs_bindex_t au_ibtop(struct inode *inode)
{
IiMustAnyLock(inode);
- return au_ii(inode)->ii_bstart;
+ return au_ii(inode)->ii_btop;
}
-static inline aufs_bindex_t au_ibend(struct inode *inode)
+static inline aufs_bindex_t au_ibbot(struct inode *inode)
{
IiMustAnyLock(inode);
- return au_ii(inode)->ii_bend;
+ return au_ii(inode)->ii_bbot;
}
static inline struct au_vdir *au_ivdir(struct inode *inode)
@@ -492,19 +495,19 @@ static inline struct au_vdir *au_ivdir(struct inode *inode)
static inline struct dentry *au_hi_wh(struct inode *inode, aufs_bindex_t bindex)
{
IiMustAnyLock(inode);
- return au_ii(inode)->ii_hinode[0 + bindex].hi_whdentry;
+ return au_hinode(au_ii(inode), bindex)->hi_whdentry;
}
-static inline void au_set_ibstart(struct inode *inode, aufs_bindex_t bindex)
+static inline void au_set_ibtop(struct inode *inode, aufs_bindex_t bindex)
{
IiMustWriteLock(inode);
- au_ii(inode)->ii_bstart = bindex;
+ au_ii(inode)->ii_btop = bindex;
}
-static inline void au_set_ibend(struct inode *inode, aufs_bindex_t bindex)
+static inline void au_set_ibbot(struct inode *inode, aufs_bindex_t bindex)
{
IiMustWriteLock(inode);
- au_ii(inode)->ii_bend = bindex;
+ au_ii(inode)->ii_bbot = bindex;
}
static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
@@ -516,7 +519,7 @@ static inline void au_set_ivdir(struct inode *inode, struct au_vdir *vdir)
static inline struct au_hinode *au_hi(struct inode *inode, aufs_bindex_t bindex)
{
IiMustAnyLock(inode);
- return au_ii(inode)->ii_hinode + bindex;
+ return au_hinode(au_ii(inode), bindex);
}
/* ---------------------------------------------------------------------- */
diff --git a/fs/aufs/ioctl.c b/fs/aufs/ioctl.c
index 6528fb911..2ebfdc4c1 100644
--- a/fs/aufs/ioctl.c
+++ b/fs/aufs/ioctl.c
@@ -17,7 +17,7 @@
static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
{
int err, fd;
- aufs_bindex_t wbi, bindex, bend;
+ aufs_bindex_t wbi, bindex, bbot;
struct file *h_file;
struct super_block *sb;
struct dentry *root;
@@ -57,10 +57,10 @@ static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
sb = path->dentry->d_sb;
root = sb->s_root;
aufs_read_lock(root, AuLock_IR);
- bend = au_sbend(sb);
+ bbot = au_sbbot(sb);
if (wbrfd.brid >= 0) {
wbi = au_br_index(sb, wbrfd.brid);
- if (unlikely(wbi < 0 || wbi > bend))
+ if (unlikely(wbi < 0 || wbi > bbot))
goto out_unlock;
}
@@ -72,7 +72,7 @@ static int au_wbr_fd(struct path *path, struct aufs_wbr_fd __user *arg)
bindex = wbi + 1;
wbi = -1;
- for (; bindex <= bend; bindex++) {
+ for (; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_writable(br->br_perm)) {
wbi = bindex;
@@ -92,7 +92,7 @@ out_unlock:
if (IS_ERR(h_file))
goto out_fd;
- atomic_dec(&br->br_count); /* cf. au_h_open() */
+ au_br_put(br); /* cf. au_h_open() */
fd_install(fd, h_file);
err = fd;
goto out; /* success */
diff --git a/fs/aufs/module.c b/fs/aufs/module.c
index 317fde014..88f8f4123 100644
--- a/fs/aufs/module.c
+++ b/fs/aufs/module.c
@@ -26,7 +26,28 @@ void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp)
/*
* aufs caches
*/
-struct kmem_cache *au_cachep[AuCache_Last];
+struct kmem_cache *au_cachep[AuCache_Last] = {
+ [0] = NULL
+};
+
+static void au_cache_fin(void)
+{
+ int i;
+
+ /*
+ * Make sure all delayed rcu free inodes are flushed before we
+ * destroy cache.
+ */
+ rcu_barrier();
+
+ /* excluding AuCache_HNOTIFY */
+ BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
+ for (i = 0; i < AuCache_HNOTIFY; i++) {
+ kmem_cache_destroy(au_cachep[i]);
+ au_cachep[i] = NULL;
+ }
+}
+
static int __init au_cache_init(void)
{
au_cachep[AuCache_DINFO] = AuCacheCtor(au_dinfo, au_di_init_once);
@@ -44,27 +65,10 @@ static int __init au_cache_init(void)
if (au_cachep[AuCache_DEHSTR])
return 0;
+ au_cache_fin();
return -ENOMEM;
}
-static void au_cache_fin(void)
-{
- int i;
-
- /*
- * Make sure all delayed rcu free inodes are flushed before we
- * destroy cache.
- */
- rcu_barrier();
-
- /* excluding AuCache_HNOTIFY */
- BUILD_BUG_ON(AuCache_HNOTIFY + 1 != AuCache_Last);
- for (i = 0; i < AuCache_HNOTIFY; i++) {
- kmem_cache_destroy(au_cachep[i]);
- au_cachep[i] = NULL;
- }
-}
-
/* ---------------------------------------------------------------------- */
int au_dir_roflags;
@@ -77,8 +81,6 @@ int au_dir_roflags;
struct au_sphlhead au_sbilist;
#endif
-struct lock_class_key au_lc_key[AuLcKey_Last];
-
/*
* functions for module interface.
*/
diff --git a/fs/aufs/module.h b/fs/aufs/module.h
index bb8644730..1383e3da9 100644
--- a/fs/aufs/module.h
+++ b/fs/aufs/module.h
@@ -24,22 +24,6 @@ extern bool au_userns;
extern int au_dir_roflags;
-enum {
- AuLcNonDir_FIINFO,
- AuLcNonDir_DIINFO,
- AuLcNonDir_IIINFO,
-
- AuLcDir_FIINFO,
- AuLcDir_DIINFO,
- AuLcDir_IIINFO,
-
- AuLcSymlink_DIINFO,
- AuLcSymlink_IIINFO,
-
- AuLcKey_Last
-};
-extern struct lock_class_key au_lc_key[AuLcKey_Last];
-
void *au_kzrealloc(void *p, unsigned int nused, unsigned int new_sz, gfp_t gfp);
int au_seq_path(struct seq_file *seq, struct path *path);
diff --git a/fs/aufs/mvdown.c b/fs/aufs/mvdown.c
index d8415422b..334f25c13 100644
--- a/fs/aufs/mvdown.c
+++ b/fs/aufs/mvdown.c
@@ -53,27 +53,27 @@ struct au_mvd_args {
static int find_lower_writable(struct au_mvd_args *a)
{
struct super_block *sb;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct au_branch *br;
sb = a->sb;
bindex = a->mvd_bsrc;
- bend = au_sbend(sb);
+ bbot = au_sbbot(sb);
if (a->mvdown.flags & AUFS_MVDOWN_FHSM_LOWER)
- for (bindex++; bindex <= bend; bindex++) {
+ for (bindex++; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_fhsm(br->br_perm)
&& (!(au_br_sb(br)->s_flags & MS_RDONLY)))
return bindex;
}
else if (!(a->mvdown.flags & AUFS_MVDOWN_ROLOWER))
- for (bindex++; bindex <= bend; bindex++) {
+ for (bindex++; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (!au_br_rdonly(br))
return bindex;
}
else
- for (bindex++; bindex <= bend; bindex++) {
+ for (bindex++; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (!(au_br_sb(br)->s_flags & MS_RDONLY)) {
if (au_br_rdonly(br))
@@ -96,7 +96,7 @@ static int au_do_mkdir(const unsigned char dmsg, struct au_mvd_args *a)
a->mvd_hdir_dst = au_hi(a->dir, a->mvd_bdst);
a->mvd_h_src_parent = au_h_dptr(a->parent, a->mvd_bsrc);
a->mvd_h_dst_parent = NULL;
- if (au_dbend(a->parent) >= a->mvd_bdst)
+ if (au_dbbot(a->parent) >= a->mvd_bdst)
a->mvd_h_dst_parent = au_h_dptr(a->parent, a->mvd_bdst);
if (!a->mvd_h_dst_parent) {
err = au_cpdown_dirs(a->dentry, a->mvd_bdst);
@@ -350,20 +350,20 @@ static int au_do_mvdown(const unsigned char dmsg, struct au_mvd_args *a)
/* maintain internal array */
if (!(a->mvdown.flags & AUFS_MVDOWN_KUPPER)) {
au_set_h_dptr(a->dentry, a->mvd_bsrc, NULL);
- au_set_dbstart(a->dentry, a->mvd_bdst);
+ au_set_dbtop(a->dentry, a->mvd_bdst);
au_set_h_iptr(a->inode, a->mvd_bsrc, NULL, /*flags*/0);
- au_set_ibstart(a->inode, a->mvd_bdst);
+ au_set_ibtop(a->inode, a->mvd_bdst);
} else {
/* hide the lower */
au_set_h_dptr(a->dentry, a->mvd_bdst, NULL);
- au_set_dbend(a->dentry, a->mvd_bsrc);
+ au_set_dbbot(a->dentry, a->mvd_bsrc);
au_set_h_iptr(a->inode, a->mvd_bdst, NULL, /*flags*/0);
- au_set_ibend(a->inode, a->mvd_bsrc);
+ au_set_ibbot(a->inode, a->mvd_bsrc);
}
- if (au_dbend(a->dentry) < a->mvd_bdst)
- au_set_dbend(a->dentry, a->mvd_bdst);
- if (au_ibend(a->inode) < a->mvd_bdst)
- au_set_ibend(a->inode, a->mvd_bdst);
+ if (au_dbbot(a->dentry) < a->mvd_bdst)
+ au_set_dbbot(a->dentry, a->mvd_bdst);
+ if (au_ibbot(a->inode) < a->mvd_bdst)
+ au_set_ibbot(a->inode, a->mvd_bdst);
out_unlock:
au_do_unlock(dmsg, a);
@@ -381,7 +381,7 @@ static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
err = 0;
plinked = !!au_opt_test(au_mntflags(a->sb), PLINK);
- if (au_dbstart(a->dentry) == a->mvd_bsrc
+ if (au_dbtop(a->dentry) == a->mvd_bsrc
&& au_dcount(a->dentry) == 1
&& atomic_read(&a->inode->i_count) == 1
/* && a->mvd_h_src_inode->i_nlink == 1 */
@@ -392,7 +392,7 @@ static int au_mvd_args_busy(const unsigned char dmsg, struct au_mvd_args *a)
err = -EBUSY;
AU_MVD_PR(dmsg,
"b%d, d{b%d, c%d?}, i{c%d?, l%u}, hi{l%u}, p{%d, %d}\n",
- a->mvd_bsrc, au_dbstart(a->dentry), au_dcount(a->dentry),
+ a->mvd_bsrc, au_dbtop(a->dentry), au_dcount(a->dentry),
atomic_read(&a->inode->i_count), a->inode->i_nlink,
a->mvd_h_src_inode->i_nlink,
plinked, plinked ? au_plink_test(a->inode) : 0);
@@ -455,7 +455,7 @@ static int au_mvd_args_intermediate(const unsigned char dmsg,
if (!err)
a->bwh = au_dbwh(a->dentry);
else if (err > 0)
- a->bfound = au_dbstart(a->dentry);
+ a->bfound = au_dbtop(a->dentry);
au_di_swap(tmp, dinfo);
au_rw_write_unlock(&tmp->di_rwsem);
@@ -542,22 +542,22 @@ static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
err = -EINVAL;
if (!(a->mvdown.flags & AUFS_MVDOWN_BRID_UPPER))
- a->mvd_bsrc = au_ibstart(a->inode);
+ a->mvd_bsrc = au_ibtop(a->inode);
else {
a->mvd_bsrc = au_br_index(a->sb, a->mvd_src_brid);
if (unlikely(a->mvd_bsrc < 0
- || (a->mvd_bsrc < au_dbstart(a->dentry)
- || au_dbend(a->dentry) < a->mvd_bsrc
+ || (a->mvd_bsrc < au_dbtop(a->dentry)
+ || au_dbbot(a->dentry) < a->mvd_bsrc
|| !au_h_dptr(a->dentry, a->mvd_bsrc))
- || (a->mvd_bsrc < au_ibstart(a->inode)
- || au_ibend(a->inode) < a->mvd_bsrc
+ || (a->mvd_bsrc < au_ibtop(a->inode)
+ || au_ibbot(a->inode) < a->mvd_bsrc
|| !au_h_iptr(a->inode, a->mvd_bsrc)))) {
a->mvd_errno = EAU_MVDOWN_NOUPPER;
AU_MVD_PR(dmsg, "no upper\n");
goto out;
}
}
- if (unlikely(a->mvd_bsrc == au_sbend(a->sb))) {
+ if (unlikely(a->mvd_bsrc == au_sbbot(a->sb))) {
a->mvd_errno = EAU_MVDOWN_BOTTOM;
AU_MVD_PR(dmsg, "on the bottom\n");
goto out;
@@ -587,7 +587,7 @@ static int au_mvd_args(const unsigned char dmsg, struct au_mvd_args *a)
} else {
a->mvd_bdst = au_br_index(a->sb, a->mvd_dst_brid);
if (unlikely(a->mvd_bdst < 0
- || au_sbend(a->sb) < a->mvd_bdst)) {
+ || au_sbbot(a->sb) < a->mvd_bdst)) {
a->mvd_errno = EAU_MVDOWN_NOLOWERBR;
AU_MVD_PR(dmsg, "no lower brid\n");
goto out;
diff --git a/fs/aufs/opts.c b/fs/aufs/opts.c
index fc90b657a..23a146d17 100644
--- a/fs/aufs/opts.c
+++ b/fs/aufs/opts.c
@@ -824,7 +824,7 @@ static int au_opts_parse_idel(struct super_block *sb, aufs_bindex_t bindex,
err = -EINVAL;
root = sb->s_root;
aufs_read_lock(root, AuLock_FLUSH);
- if (bindex < 0 || au_sbend(sb) < bindex) {
+ if (bindex < 0 || au_sbbot(sb) < bindex) {
pr_err("out of bounds, %d\n", bindex);
goto out;
}
@@ -880,7 +880,7 @@ static int au_opts_parse_imod(struct super_block *sb, aufs_bindex_t bindex,
err = -EINVAL;
root = sb->s_root;
aufs_read_lock(root, AuLock_FLUSH);
- if (bindex < 0 || au_sbend(sb) < bindex) {
+ if (bindex < 0 || au_sbbot(sb) < bindex) {
pr_err("out of bounds, %d\n", bindex);
goto out;
}
@@ -929,7 +929,7 @@ au_opts_parse_xino_itrunc_path(struct super_block *sb,
substring_t args[])
{
int err;
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
struct path path;
struct dentry *root;
@@ -942,8 +942,8 @@ au_opts_parse_xino_itrunc_path(struct super_block *sb,
xino_itrunc->bindex = -1;
root = sb->s_root;
aufs_read_lock(root, AuLock_FLUSH);
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++) {
if (au_h_dptr(root, bindex) == path.dentry) {
xino_itrunc->bindex = bindex;
break;
@@ -1087,7 +1087,7 @@ int au_opts_parse(struct super_block *sb, char *str, struct au_opts *opts)
}
u.xino_itrunc->bindex = n;
aufs_read_lock(root, AuLock_FLUSH);
- if (n < 0 || au_sbend(sb) < n) {
+ if (n < 0 || au_sbbot(sb) < n) {
pr_err("out of bounds, %d\n", n);
aufs_read_unlock(root, !AuLock_IR);
break;
@@ -1465,7 +1465,7 @@ static int au_opt_br(struct super_block *sb, struct au_opt *opt,
err = 0;
switch (opt->type) {
case Opt_append:
- opt->add.bindex = au_sbend(sb) + 1;
+ opt->add.bindex = au_sbbot(sb) + 1;
if (opt->add.bindex < 0)
opt->add.bindex = 0;
goto add;
@@ -1513,7 +1513,7 @@ static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
struct au_opts *opts)
{
int err;
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
struct dentry *root, *parent, *h_root;
err = 0;
@@ -1530,8 +1530,8 @@ static int au_opt_xino(struct super_block *sb, struct au_opt *opt,
/* safe d_parent access */
parent = opt->xino.file->f_path.dentry->d_parent;
root = sb->s_root;
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++) {
h_root = au_h_dptr(root, bindex);
if (h_root == parent) {
au_xino_brid_set(sb, au_sbr_id(sb, bindex));
@@ -1554,7 +1554,7 @@ int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
unsigned int pending)
{
int err, fhsm;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
unsigned char do_plink, skip, do_free, can_no_dreval;
struct au_branch *br;
struct au_wbr *wbr;
@@ -1590,8 +1590,8 @@ int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
do_plink = !!au_opt_test(sbinfo->si_mntflags, PLINK);
can_no_dreval = !!au_opt_test((sbinfo->si_mntflags | pending),
UDBA_NONE);
- bend = au_sbend(sb);
- for (bindex = 0; !err && bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
skip = 0;
h_dir = au_h_iptr(dir, bindex);
br = au_sbr(sb, bindex);
@@ -1677,7 +1677,7 @@ int au_opts_verify(struct super_block *sb, unsigned long sb_flags,
if (fhsm >= 2) {
au_fset_si(sbinfo, FHSM);
- for (bindex = bend; bindex >= 0; bindex--) {
+ for (bindex = bbot; bindex >= 0; bindex--) {
br = au_sbr(sb, bindex);
if (au_br_fhsm(br->br_perm)) {
au_fhsm_set_bottom(sb, bindex);
@@ -1696,7 +1696,7 @@ int au_opts_mount(struct super_block *sb, struct au_opts *opts)
{
int err;
unsigned int tmp;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct au_opt *opt;
struct au_opt_xino *opt_xino, xino;
struct au_sbinfo *sbinfo;
@@ -1729,8 +1729,8 @@ int au_opts_mount(struct super_block *sb, struct au_opts *opts)
else if (unlikely(err < 0))
goto out;
- bend = au_sbend(sb);
- if (unlikely(bend < 0)) {
+ bbot = au_sbbot(sb);
+ if (unlikely(bbot < 0)) {
err = -EINVAL;
pr_err("no branches\n");
goto out;
@@ -1765,8 +1765,8 @@ int au_opts_mount(struct super_block *sb, struct au_opts *opts)
tmp &= AuOptMask_UDBA;
sbinfo->si_mntflags &= ~AuOptMask_UDBA;
sbinfo->si_mntflags |= tmp;
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
err = au_hnotify_reset_br(tmp, br, br->br_perm);
if (unlikely(err))
diff --git a/fs/aufs/plink.c b/fs/aufs/plink.c
index b6d63269c..c42734dd4 100644
--- a/fs/aufs/plink.c
+++ b/fs/aufs/plink.c
@@ -429,13 +429,13 @@ void au_plink_clean(struct super_block *sb, int verbose)
static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
{
int do_put;
- aufs_bindex_t bstart, bend, bindex;
+ aufs_bindex_t btop, bbot, bindex;
do_put = 0;
- bstart = au_ibstart(inode);
- bend = au_ibend(inode);
- if (bstart >= 0) {
- for (bindex = bstart; bindex <= bend; bindex++) {
+ btop = au_ibtop(inode);
+ bbot = au_ibbot(inode);
+ if (btop >= 0) {
+ for (bindex = btop; bindex <= bbot; bindex++) {
if (!au_h_iptr(inode, bindex)
|| au_ii_br_id(inode, bindex) != br_id)
continue;
@@ -444,7 +444,7 @@ static int au_plink_do_half_refresh(struct inode *inode, aufs_bindex_t br_id)
break;
}
if (do_put)
- for (bindex = bstart; bindex <= bend; bindex++)
+ for (bindex = btop; bindex <= bbot; bindex++)
if (au_h_iptr(inode, bindex)) {
do_put = 0;
break;
diff --git a/fs/aufs/posix_acl.c b/fs/aufs/posix_acl.c
index d6eeebdec..0ddd46997 100644
--- a/fs/aufs/posix_acl.c
+++ b/fs/aufs/posix_acl.c
@@ -24,7 +24,7 @@ struct posix_acl *aufs_get_acl(struct inode *inode, int type)
if (!(sb->s_flags & MS_POSIXACL))
goto out;
- bindex = au_ibstart(inode);
+ bindex = au_ibtop(inode);
h_inode = au_h_iptr(inode, bindex);
if (unlikely(!h_inode
|| ((h_inode->i_mode & S_IFMT)
diff --git a/fs/aufs/rdu.c b/fs/aufs/rdu.c
index ec0844673..7180f18e6 100644
--- a/fs/aufs/rdu.c
+++ b/fs/aufs/rdu.c
@@ -109,7 +109,7 @@ out:
static int au_rdu(struct file *file, struct aufs_rdu *rdu)
{
int err;
- aufs_bindex_t bend;
+ aufs_bindex_t bbot;
struct au_rdu_arg arg = {
.ctx = {
.actor = au_rdu_fill
@@ -176,12 +176,12 @@ static int au_rdu(struct file *file, struct aufs_rdu *rdu)
if (!rdu->blk)
rdu->blk = au_dir_size(file, /*dentry*/NULL);
}
- bend = au_fbstart(file);
- if (cookie->bindex < bend)
- cookie->bindex = bend;
- bend = au_fbend_dir(file);
- /* AuDbg("b%d, b%d\n", cookie->bindex, bend); */
- for (; !err && cookie->bindex <= bend;
+ bbot = au_fbtop(file);
+ if (cookie->bindex < bbot)
+ cookie->bindex = bbot;
+ bbot = au_fbbot_dir(file);
+ /* AuDbg("b%d, b%d\n", cookie->bindex, bbot); */
+ for (; !err && cookie->bindex <= bbot;
cookie->bindex++, cookie->h_pos = 0) {
h_file = au_hf_dir(file, cookie->bindex);
if (!h_file)
@@ -202,7 +202,7 @@ static int au_rdu(struct file *file, struct aufs_rdu *rdu)
}
ii_read_lock_child(inode);
- fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibstart(inode)));
+ fsstack_copy_attr_atime(inode, au_h_iptr(inode, au_ibtop(inode)));
ii_read_unlock(inode);
out_unlock:
diff --git a/fs/aufs/rwsem.h b/fs/aufs/rwsem.h
index ef50c2ccb..6c0d5a902 100644
--- a/fs/aufs/rwsem.h
+++ b/fs/aufs/rwsem.h
@@ -21,6 +21,15 @@ struct au_rwsem {
#endif
};
+#ifdef CONFIG_LOCKDEP
+#define au_lockdep_set_name(rw) \
+ lockdep_set_class_and_name(&(rw)->rwsem, \
+ /*original key*/(rw)->rwsem.dep_map.key, \
+ /*name*/#rw)
+#else
+#define au_lockdep_set_name(rw) do {} while (0)
+#endif
+
#ifdef CONFIG_AUFS_DEBUG
#define AuDbgCntInit(rw) do { \
atomic_set(&(rw)->rcnt, 0); \
@@ -28,11 +37,15 @@ struct au_rwsem {
smp_mb(); /* atomic set */ \
} while (0)
-#define AuDbgRcntInc(rw) atomic_inc(&(rw)->rcnt)
-#define AuDbgRcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->rcnt) < 0)
-#define AuDbgWcntInc(rw) atomic_inc(&(rw)->wcnt)
-#define AuDbgWcntDec(rw) WARN_ON(atomic_dec_return(&(rw)->wcnt) < 0)
+#define AuDbgCnt(rw, cnt) atomic_read(&(rw)->cnt)
+#define AuDbgCntInc(rw, cnt) atomic_inc(&(rw)->cnt)
+#define AuDbgCntDec(rw, cnt) WARN_ON(atomic_dec_return(&(rw)->cnt) < 0)
+#define AuDbgRcntInc(rw) AuDbgCntInc(rw, rcnt)
+#define AuDbgRcntDec(rw) AuDbgCntDec(rw, rcnt)
+#define AuDbgWcntInc(rw) AuDbgCntInc(rw, wcnt)
+#define AuDbgWcntDec(rw) AuDbgCntDec(rw, wcnt)
#else
+#define AuDbgCnt(rw, cnt) 0
#define AuDbgCntInit(rw) do {} while (0)
#define AuDbgRcntInc(rw) do {} while (0)
#define AuDbgRcntDec(rw) do {} while (0)
@@ -41,37 +54,32 @@ struct au_rwsem {
#endif /* CONFIG_AUFS_DEBUG */
/* to debug easier, do not make them inlined functions */
-#define AuRwMustNoWaiters(rw) AuDebugOn(!list_empty(&(rw)->rwsem.wait_list))
+#define AuRwMustNoWaiters(rw) AuDebugOn(rwsem_is_contended(&(rw)->rwsem))
/* rwsem_is_locked() is unusable */
-#define AuRwMustReadLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0)
-#define AuRwMustWriteLock(rw) AuDebugOn(atomic_read(&(rw)->wcnt) <= 0)
-#define AuRwMustAnyLock(rw) AuDebugOn(atomic_read(&(rw)->rcnt) <= 0 \
- && atomic_read(&(rw)->wcnt) <= 0)
-#define AuRwDestroy(rw) AuDebugOn(atomic_read(&(rw)->rcnt) \
- || atomic_read(&(rw)->wcnt))
-
-#define au_rw_class(rw, key) lockdep_set_class(&(rw)->rwsem, key)
-
-static inline void au_rw_init(struct au_rwsem *rw)
-{
- AuDbgCntInit(rw);
- init_rwsem(&rw->rwsem);
-}
-
-static inline void au_rw_init_wlock(struct au_rwsem *rw)
-{
- au_rw_init(rw);
- down_write(&rw->rwsem);
- AuDbgWcntInc(rw);
-}
-
-static inline void au_rw_init_wlock_nested(struct au_rwsem *rw,
- unsigned int lsc)
-{
- au_rw_init(rw);
- down_write_nested(&rw->rwsem, lsc);
- AuDbgWcntInc(rw);
-}
+#define AuRwMustReadLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0)
+#define AuRwMustWriteLock(rw) AuDebugOn(AuDbgCnt(rw, wcnt) <= 0)
+#define AuRwMustAnyLock(rw) AuDebugOn(AuDbgCnt(rw, rcnt) <= 0 \
+ && AuDbgCnt(rw, wcnt) <= 0)
+#define AuRwDestroy(rw) AuDebugOn(AuDbgCnt(rw, rcnt) \
+ || AuDbgCnt(rw, wcnt))
+
+#define au_rw_init(rw) do { \
+ AuDbgCntInit(rw); \
+ init_rwsem(&(rw)->rwsem); \
+ au_lockdep_set_name(rw); \
+ } while (0)
+
+#define au_rw_init_wlock(rw) do { \
+ au_rw_init(rw); \
+ down_write(&(rw)->rwsem); \
+ AuDbgWcntInc(rw); \
+ } while (0)
+
+#define au_rw_init_wlock_nested(rw, lsc) do { \
+ au_rw_init(rw); \
+ down_write_nested(&(rw)->rwsem, lsc); \
+ AuDbgWcntInc(rw); \
+ } while (0)
static inline void au_rw_read_lock(struct au_rwsem *rw)
{
@@ -141,10 +149,9 @@ static inline int au_rw_write_trylock(struct au_rwsem *rw)
return ret;
}
-#undef AuDbgCntInit
+#undef AuDbgCntDec
#undef AuDbgRcntInc
#undef AuDbgRcntDec
-#undef AuDbgWcntInc
#undef AuDbgWcntDec
#define AuSimpleLockRwsemFuncs(prefix, param, rwsem) \
diff --git a/fs/aufs/sbinfo.c b/fs/aufs/sbinfo.c
index a9d782e43..aa970593a 100644
--- a/fs/aufs/sbinfo.c
+++ b/fs/aufs/sbinfo.c
@@ -20,7 +20,12 @@ void au_si_free(struct kobject *kobj)
sbinfo = container_of(kobj, struct au_sbinfo, si_kobj);
for (i = 0; i < AuPlink_NHASH; i++)
AuDebugOn(!hlist_empty(&sbinfo->si_plink[i].head));
- AuDebugOn(atomic_read(&sbinfo->si_nowait.nw_len));
+ au_nwt_fin(&sbinfo->si_nowait);
+
+ AuDebugOn(percpu_counter_sum(&sbinfo->si_ninodes));
+ percpu_counter_destroy(&sbinfo->si_ninodes);
+ AuDebugOn(percpu_counter_sum(&sbinfo->si_nfiles));
+ percpu_counter_destroy(&sbinfo->si_nfiles);
au_rw_write_lock(&sbinfo->si_rwsem);
au_br_free(sbinfo);
@@ -40,7 +45,6 @@ int au_si_alloc(struct super_block *sb)
{
int err, i;
struct au_sbinfo *sbinfo;
- static struct lock_class_key aufs_si;
err = -ENOMEM;
sbinfo = kzalloc(sizeof(*sbinfo), GFP_NOFS);
@@ -58,13 +62,12 @@ int au_si_alloc(struct super_block *sb)
au_nwt_init(&sbinfo->si_nowait);
au_rw_init_wlock(&sbinfo->si_rwsem);
- au_rw_class(&sbinfo->si_rwsem, &aufs_si);
mutex_init(&sbinfo->au_si_pid.pid_mtx);
- atomic_long_set(&sbinfo->si_ninodes, 0);
- atomic_long_set(&sbinfo->si_nfiles, 0);
+ percpu_counter_init(&sbinfo->si_ninodes, 0, GFP_NOFS);
+ percpu_counter_init(&sbinfo->si_nfiles, 0, GFP_NOFS);
- sbinfo->si_bend = -1;
+ sbinfo->si_bbot = -1;
sbinfo->si_last_br_id = AUFS_BRANCH_MAX / 2;
sbinfo->si_wbr_copyup = AuWbrCopyup_Def;
@@ -122,7 +125,7 @@ int au_sbr_realloc(struct au_sbinfo *sbinfo, int nbr)
AuRwMustWriteLock(&sbinfo->si_rwsem);
err = -ENOMEM;
- sz = sizeof(*brp) * (sbinfo->si_bend + 1);
+ sz = sizeof(*brp) * (sbinfo->si_bbot + 1);
if (unlikely(!sz))
sz = sizeof(*brp);
brp = au_kzrealloc(sbinfo->si_branch, sz, sizeof(*brp) * nbr, GFP_NOFS);
diff --git a/fs/aufs/super.c b/fs/aufs/super.c
index 7928a5031..18190aa5f 100644
--- a/fs/aufs/super.c
+++ b/fs/aufs/super.c
@@ -23,7 +23,6 @@ static struct inode *aufs_alloc_inode(struct super_block *sb __maybe_unused)
if (c) {
au_icntnr_init(c);
c->vfs_inode.i_version = 1; /* sigen(sb); */
- c->iinfo.ii_hinode = NULL;
return &c->vfs_inode;
}
return NULL;
@@ -39,7 +38,8 @@ static void aufs_destroy_inode_cb(struct rcu_head *head)
static void aufs_destroy_inode(struct inode *inode)
{
- au_iinfo_fin(inode);
+ if (!is_bad_inode(inode))
+ au_iinfo_fin(inode);
call_rcu(&inode->i_rcu, aufs_destroy_inode_cb);
}
@@ -77,16 +77,16 @@ out:
static int au_show_brs(struct seq_file *seq, struct super_block *sb)
{
int err;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct path path;
struct au_hdentry *hdp;
struct au_branch *br;
au_br_perm_str_t perm;
err = 0;
- bend = au_sbend(sb);
+ bbot = au_sbbot(sb);
hdp = au_di(sb->s_root)->di_hdentry;
- for (bindex = 0; !err && bindex <= bend; bindex++) {
+ for (bindex = 0; !err && bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
path.mnt = au_br_mnt(br);
path.dentry = hdp[bindex].hd_dentry;
@@ -94,7 +94,7 @@ static int au_show_brs(struct seq_file *seq, struct super_block *sb)
if (!err) {
au_optstr_br_perm(&perm, br->br_perm);
seq_printf(seq, "=%s", perm.a);
- if (bindex != bend)
+ if (bindex != bbot)
seq_putc(seq, ':');
}
}
@@ -318,7 +318,7 @@ static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
int err;
long bsize, factor;
u64 blocks, bfree, bavail, files, ffree;
- aufs_bindex_t bend, bindex, i;
+ aufs_bindex_t bbot, bindex, i;
unsigned char shared;
struct path h_path;
struct super_block *h_sb;
@@ -330,8 +330,8 @@ static int au_statfs_sum(struct super_block *sb, struct kstatfs *buf)
blocks = 0;
bfree = 0;
bavail = 0;
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++) {
h_path.mnt = au_sbr_mnt(sb, bindex);
h_sb = h_path.mnt->mnt_sb;
shared = 0;
@@ -414,14 +414,14 @@ static int aufs_statfs(struct dentry *dentry, struct kstatfs *buf)
static int aufs_sync_fs(struct super_block *sb, int wait)
{
int err, e;
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
struct au_branch *br;
struct super_block *h_sb;
err = 0;
si_noflush_read_lock(sb);
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (!au_br_writable(br->br_perm))
continue;
@@ -504,7 +504,7 @@ static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
spin_lock(&sb->s_inode_list_lock);
list_for_each_entry(inode, head, i_sb_list) {
if (!is_bad_inode(inode)
- && au_ii(inode)->ii_bstart >= 0) {
+ && au_ii(inode)->ii_btop >= 0) {
spin_lock(&inode->i_lock);
if (atomic_read(&inode->i_count)) {
au_igrab(inode);
@@ -522,7 +522,7 @@ static unsigned long long au_iarray_cb(struct super_block *sb, void *a,
struct inode **au_iarray_alloc(struct super_block *sb, unsigned long long *max)
{
- *max = atomic_long_read(&au_sbi(sb)->si_ninodes);
+ *max = au_ninodes(sb);
return au_array_alloc(max, au_iarray_cb, sb, &sb->s_inodes);
}
@@ -681,7 +681,7 @@ static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
{
int err, e;
unsigned int udba;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct dentry *root;
struct inode *inode;
struct au_branch *br;
@@ -697,8 +697,8 @@ static void au_remount_refresh(struct super_block *sb, unsigned int do_idop)
IiMustNoWaiters(inode);
udba = au_opt_udba(sb);
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
err = au_hnotify_reset_br(udba, br, br->br_perm);
if (unlikely(err))
diff --git a/fs/aufs/super.h b/fs/aufs/super.h
index 23d03a067..b8ca14994 100644
--- a/fs/aufs/super.h
+++ b/fs/aufs/super.h
@@ -91,7 +91,7 @@ struct au_sbinfo {
* dirty approach to protect sb->sb_inodes and ->s_files (gone) from
* remount.
*/
- atomic_long_t si_ninodes, si_nfiles;
+ struct percpu_counter si_ninodes, si_nfiles;
/* branch management */
unsigned int si_generation;
@@ -99,7 +99,7 @@ struct au_sbinfo {
/* see AuSi_ flags */
unsigned char au_si_status;
- aufs_bindex_t si_bend;
+ aufs_bindex_t si_bbot;
/* dirty trick to keep br_id plus */
unsigned int si_last_br_id :
@@ -285,7 +285,7 @@ extern struct au_wbr_copyup_operations au_wbr_copyup_ops[];
extern struct au_wbr_create_operations au_wbr_create_ops[];
int au_cpdown_dirs(struct dentry *dentry, aufs_bindex_t bdst);
int au_wbr_nonopq(struct dentry *dentry, aufs_bindex_t bindex);
-int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart);
+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop);
/* mvdown.c */
int au_mvdown(struct dentry *dentry, struct aufs_mvdown __user *arg);
@@ -548,10 +548,10 @@ static inline void si_downgrade_lock(struct super_block *sb)
/* ---------------------------------------------------------------------- */
-static inline aufs_bindex_t au_sbend(struct super_block *sb)
+static inline aufs_bindex_t au_sbbot(struct super_block *sb)
{
SiMustAnyLock(sb);
- return au_sbi(sb)->si_bend;
+ return au_sbi(sb)->si_bbot;
}
static inline unsigned int au_mntflags(struct super_block *sb)
@@ -566,26 +566,40 @@ static inline unsigned int au_sigen(struct super_block *sb)
return au_sbi(sb)->si_generation;
}
+static inline unsigned long long au_ninodes(struct super_block *sb)
+{
+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_ninodes);
+
+ BUG_ON(n < 0);
+ return n;
+}
+
static inline void au_ninodes_inc(struct super_block *sb)
{
- atomic_long_inc(&au_sbi(sb)->si_ninodes);
+ percpu_counter_inc(&au_sbi(sb)->si_ninodes);
}
static inline void au_ninodes_dec(struct super_block *sb)
{
- AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_ninodes));
- atomic_long_dec(&au_sbi(sb)->si_ninodes);
+ percpu_counter_dec(&au_sbi(sb)->si_ninodes);
+}
+
+static inline unsigned long long au_nfiles(struct super_block *sb)
+{
+ s64 n = percpu_counter_sum(&au_sbi(sb)->si_nfiles);
+
+ BUG_ON(n < 0);
+ return n;
}
static inline void au_nfiles_inc(struct super_block *sb)
{
- atomic_long_inc(&au_sbi(sb)->si_nfiles);
+ percpu_counter_inc(&au_sbi(sb)->si_nfiles);
}
static inline void au_nfiles_dec(struct super_block *sb)
{
- AuDebugOn(!atomic_long_read(&au_sbi(sb)->si_nfiles));
- atomic_long_dec(&au_sbi(sb)->si_nfiles);
+ percpu_counter_dec(&au_sbi(sb)->si_nfiles);
}
static inline struct au_branch *au_sbr(struct super_block *sb,
diff --git a/fs/aufs/sysfs.c b/fs/aufs/sysfs.c
index ed42f53d0..0efb77a4e 100644
--- a/fs/aufs/sysfs.c
+++ b/fs/aufs/sysfs.c
@@ -106,7 +106,7 @@ ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
ssize_t err;
int idx;
long l;
- aufs_bindex_t bend;
+ aufs_bindex_t bbot;
struct au_sbinfo *sbinfo;
struct super_block *sb;
struct seq_file *seq;
@@ -159,8 +159,8 @@ ssize_t sysaufs_si_show(struct kobject *kobj, struct attribute *attr,
err = kstrtol(name, 10, &l);
if (!err) {
- bend = au_sbend(sb);
- if (l <= bend)
+ bbot = au_sbbot(sb);
+ if (l <= bbot)
err = sysaufs_si_br(seq, sb, (aufs_bindex_t)l, idx);
else
err = -ENOENT;
@@ -186,15 +186,15 @@ static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
{
int err;
int16_t brid;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
size_t sz;
char *buf;
struct seq_file *seq;
struct au_branch *br;
si_read_lock(sb, AuLock_FLUSH);
- bend = au_sbend(sb);
- err = bend + 1;
+ bbot = au_sbbot(sb);
+ err = bbot + 1;
if (!arg)
goto out;
@@ -209,7 +209,7 @@ static int au_brinfo(struct super_block *sb, union aufs_brinfo __user *arg)
goto out_buf;
sz = sizeof(*arg) - offsetof(union aufs_brinfo, path);
- for (bindex = 0; bindex <= bend; bindex++, arg++) {
+ for (bindex = 0; bindex <= bbot; bindex++, arg++) {
err = !access_ok(VERIFY_WRITE, arg, sizeof(*arg));
if (unlikely(err))
break;
@@ -288,7 +288,7 @@ void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
struct kobject *kobj;
struct au_brsysfs *br_sysfs;
int i;
- aufs_bindex_t bend;
+ aufs_bindex_t bbot;
dbgaufs_brs_del(sb, bindex);
@@ -296,8 +296,8 @@ void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
return;
kobj = &au_sbi(sb)->si_kobj;
- bend = au_sbend(sb);
- for (; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
br_sysfs = br->br_sysfs;
for (i = 0; i < ARRAY_SIZE(br->br_sysfs); i++) {
@@ -310,7 +310,7 @@ void sysaufs_brs_del(struct super_block *sb, aufs_bindex_t bindex)
void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
{
int err, i;
- aufs_bindex_t bend;
+ aufs_bindex_t bbot;
struct kobject *kobj;
struct au_branch *br;
struct au_brsysfs *br_sysfs;
@@ -321,8 +321,8 @@ void sysaufs_brs_add(struct super_block *sb, aufs_bindex_t bindex)
return;
kobj = &au_sbi(sb)->si_kobj;
- bend = au_sbend(sb);
- for (; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
br_sysfs = br->br_sysfs;
snprintf(br_sysfs[AuBrSysfs_BR].name, sizeof(br_sysfs->name),
diff --git a/fs/aufs/vdir.c b/fs/aufs/vdir.c
index f64cc2b7a..feddcc204 100644
--- a/fs/aufs/vdir.c
+++ b/fs/aufs/vdir.c
@@ -538,7 +538,7 @@ static int au_do_read_vdir(struct fillvdir_arg *arg)
int err;
unsigned int rdhash;
loff_t offset;
- aufs_bindex_t bend, bindex, bstart;
+ aufs_bindex_t bbot, bindex, btop;
unsigned char shwh;
struct file *hf, *file;
struct super_block *sb;
@@ -564,9 +564,9 @@ static int au_do_read_vdir(struct fillvdir_arg *arg)
shwh = 1;
au_fset_fillvdir(arg->flags, SHWH);
}
- bstart = au_fbstart(file);
- bend = au_fbend_dir(file);
- for (bindex = bstart; !err && bindex <= bend; bindex++) {
+ btop = au_fbtop(file);
+ bbot = au_fbbot_dir(file);
+ for (bindex = btop; !err && bindex <= bbot; bindex++) {
hf = au_hf_dir(file, bindex);
if (!hf)
continue;
@@ -579,7 +579,7 @@ static int au_do_read_vdir(struct fillvdir_arg *arg)
arg->bindex = bindex;
au_fclr_fillvdir(arg->flags, WHABLE);
if (shwh
- || (bindex != bend
+ || (bindex != bbot
&& au_br_whable(au_sbr_perm(sb, bindex))))
au_fset_fillvdir(arg->flags, WHABLE);
do {
diff --git a/fs/aufs/vfsub.c b/fs/aufs/vfsub.c
index 29e0bbc32..2e54bad6a 100644
--- a/fs/aufs/vfsub.c
+++ b/fs/aufs/vfsub.c
@@ -125,7 +125,7 @@ int vfsub_atomic_open(struct inode *dir, struct dentry *dentry,
goto out;
}
- atomic_inc(&br->br_count);
+ au_br_get(br);
fsnotify_open(file);
out:
diff --git a/fs/aufs/wbr_policy.c b/fs/aufs/wbr_policy.c
index 994316302..91a010a11 100644
--- a/fs/aufs/wbr_policy.c
+++ b/fs/aufs/wbr_policy.c
@@ -94,13 +94,13 @@ static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
struct dentry *h_parent, void *arg)
{
int err, rerr;
- aufs_bindex_t bopq, bstart;
+ aufs_bindex_t bopq, btop;
struct path h_path;
struct dentry *parent;
struct inode *h_dir, *h_inode, *inode, *dir;
unsigned int *flags = arg;
- bstart = au_dbstart(dentry);
+ btop = au_dbtop(dentry);
/* dentry is di-locked */
parent = dget_parent(dentry);
dir = d_inode(parent);
@@ -136,7 +136,7 @@ static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
}
}
- err = au_cpdown_attr(&h_path, au_h_dptr(dentry, bstart));
+ err = au_cpdown_attr(&h_path, au_h_dptr(dentry, btop));
inode_unlock(h_inode);
if (unlikely(err))
goto out_opq;
@@ -148,8 +148,8 @@ static int au_cpdown_dir(struct dentry *dentry, aufs_bindex_t bdst,
}
inode = d_inode(dentry);
- if (au_ibend(inode) < bdst)
- au_set_ibend(inode, bdst);
+ if (au_ibbot(inode) < bdst)
+ au_set_ibbot(inode, bdst);
au_set_h_iptr(inode, bdst, au_igrab(h_inode),
au_hi_flags(inode, /*isdir*/1));
au_fhsm_wrote(dentry->d_sb, bdst, /*force*/0);
@@ -179,8 +179,8 @@ out_dir:
}
out_put:
au_set_h_dptr(dentry, bdst, NULL);
- if (au_dbend(dentry) == bdst)
- au_update_dbend(dentry);
+ if (au_dbbot(dentry) == bdst)
+ au_update_dbbot(dentry);
out:
dput(parent);
return err;
@@ -252,19 +252,19 @@ static int au_wbr_create_tdp(struct dentry *dentry,
unsigned int flags __maybe_unused)
{
int err;
- aufs_bindex_t bstart, bindex;
+ aufs_bindex_t btop, bindex;
struct super_block *sb;
struct dentry *parent, *h_parent;
sb = dentry->d_sb;
- bstart = au_dbstart(dentry);
- err = bstart;
- if (!au_br_rdonly(au_sbr(sb, bstart)))
+ btop = au_dbtop(dentry);
+ err = btop;
+ if (!au_br_rdonly(au_sbr(sb, btop)))
goto out;
err = -EROFS;
parent = dget_parent(dentry);
- for (bindex = au_dbstart(parent); bindex < bstart; bindex++) {
+ for (bindex = au_dbtop(parent); bindex < btop; bindex++) {
h_parent = au_h_dptr(parent, bindex);
if (!h_parent || d_is_negative(h_parent))
continue;
@@ -278,7 +278,7 @@ static int au_wbr_create_tdp(struct dentry *dentry,
/* bottom up here */
if (unlikely(err < 0)) {
- err = au_wbr_bu(sb, bstart - 1);
+ err = au_wbr_bu(sb, btop - 1);
if (err >= 0)
err = au_wbr_nonopq(dentry, err);
}
@@ -330,7 +330,7 @@ static int au_wbr_create_init_rr(struct super_block *sb)
{
int err;
- err = au_wbr_bu(sb, au_sbend(sb));
+ err = au_wbr_bu(sb, au_sbbot(sb));
atomic_set(&au_sbi(sb)->si_wbr_rr_next, -err); /* less important */
/* smp_mb(); */
@@ -342,7 +342,7 @@ static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
{
int err, nbr;
unsigned int u;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct super_block *sb;
atomic_t *next;
@@ -352,9 +352,9 @@ static int au_wbr_create_rr(struct dentry *dentry, unsigned int flags)
sb = dentry->d_sb;
next = &au_sbi(sb)->si_wbr_rr_next;
- bend = au_sbend(sb);
- nbr = bend + 1;
- for (bindex = 0; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ nbr = bbot + 1;
+ for (bindex = 0; bindex <= bbot; bindex++) {
if (!au_ftest_wbr(flags, DIR)) {
err = atomic_dec_return(next) + 1;
/* modulo for 0 is meaningless */
@@ -388,7 +388,7 @@ static void au_mfs(struct dentry *dentry, struct dentry *parent)
struct au_branch *br;
struct au_wbr_mfs *mfs;
struct dentry *h_parent;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
int err;
unsigned long long b, bavail;
struct path h_path;
@@ -409,13 +409,13 @@ static void au_mfs(struct dentry *dentry, struct dentry *parent)
mfs->mfsrr_bytes = 0;
if (!parent) {
bindex = 0;
- bend = au_sbend(sb);
+ bbot = au_sbbot(sb);
} else {
- bindex = au_dbstart(parent);
- bend = au_dbtaildir(parent);
+ bindex = au_dbtop(parent);
+ bbot = au_dbtaildir(parent);
}
- for (; bindex <= bend; bindex++) {
+ for (; bindex <= bbot; bindex++) {
if (parent) {
h_parent = au_h_dptr(parent, bindex);
if (!h_parent || d_is_negative(h_parent))
@@ -540,7 +540,7 @@ static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
{
int err, e2;
unsigned long long b;
- aufs_bindex_t bindex, bstart, bend;
+ aufs_bindex_t bindex, btop, bbot;
struct super_block *sb;
struct dentry *parent, *h_parent;
struct au_branch *br;
@@ -549,9 +549,9 @@ static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
if (unlikely(err < 0))
goto out;
parent = dget_parent(dentry);
- bstart = au_dbstart(parent);
- bend = au_dbtaildir(parent);
- if (bstart == bend)
+ btop = au_dbtop(parent);
+ bbot = au_dbtaildir(parent);
+ if (btop == bbot)
goto out_parent; /* success */
e2 = au_wbr_create_mfs(dentry, flags);
@@ -564,7 +564,7 @@ static int au_wbr_create_pmfs(struct dentry *dentry, unsigned int flags)
b = br->br_wbr->wbr_bytes;
AuDbg("b%d, %llu\n", err, b);
- for (bindex = bstart; bindex <= bend; bindex++) {
+ for (bindex = btop; bindex <= bbot; bindex++) {
h_parent = au_h_dptr(parent, bindex);
if (!h_parent || d_is_negative(h_parent))
continue;
@@ -635,15 +635,15 @@ static int au_wbr_copyup_tdp(struct dentry *dentry)
static int au_wbr_copyup_bup(struct dentry *dentry)
{
int err;
- aufs_bindex_t bindex, bstart;
+ aufs_bindex_t bindex, btop;
struct dentry *parent, *h_parent;
struct super_block *sb;
err = -EROFS;
sb = dentry->d_sb;
parent = dget_parent(dentry);
- bstart = au_dbstart(parent);
- for (bindex = au_dbstart(dentry); bindex >= bstart; bindex--) {
+ btop = au_dbtop(parent);
+ for (bindex = au_dbtop(dentry); bindex >= btop; bindex--) {
h_parent = au_h_dptr(parent, bindex);
if (!h_parent || d_is_negative(h_parent))
continue;
@@ -657,20 +657,20 @@ static int au_wbr_copyup_bup(struct dentry *dentry)
/* bottom up here */
if (unlikely(err < 0))
- err = au_wbr_bu(sb, bstart - 1);
+ err = au_wbr_bu(sb, btop - 1);
AuDbg("b%d\n", err);
return err;
}
/* bottom up */
-int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart)
+int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t btop)
{
int err;
- err = au_wbr_bu(dentry->d_sb, bstart);
+ err = au_wbr_bu(dentry->d_sb, btop);
AuDbg("b%d\n", err);
- if (err > bstart)
+ if (err > btop)
err = au_wbr_nonopq(dentry, err);
AuDbg("b%d\n", err);
@@ -680,10 +680,10 @@ int au_wbr_do_copyup_bu(struct dentry *dentry, aufs_bindex_t bstart)
static int au_wbr_copyup_bu(struct dentry *dentry)
{
int err;
- aufs_bindex_t bstart;
+ aufs_bindex_t btop;
- bstart = au_dbstart(dentry);
- err = au_wbr_do_copyup_bu(dentry, bstart);
+ btop = au_dbtop(dentry);
+ err = au_wbr_do_copyup_bu(dentry, btop);
return err;
}
diff --git a/fs/aufs/whout.c b/fs/aufs/whout.c
index fdda85eef..78f97ed60 100644
--- a/fs/aufs/whout.c
+++ b/fs/aufs/whout.c
@@ -585,7 +585,7 @@ static void reinit_br_wh(void *arg)
out:
if (wbr)
atomic_dec(&wbr->wbr_wh_running);
- atomic_dec(&a->br->br_count);
+ au_br_put(a->br);
si_write_unlock(a->sb);
au_nwt_done(&au_sbi(a->sb)->si_nowait);
kfree(arg);
@@ -611,11 +611,11 @@ static void kick_reinit_br_wh(struct super_block *sb, struct au_branch *br)
*/
arg->sb = sb;
arg->br = br;
- atomic_inc(&br->br_count);
+ au_br_get(br);
wkq_err = au_wkq_nowait(reinit_br_wh, arg, sb, /*flags*/0);
if (unlikely(wkq_err)) {
atomic_dec(&br->br_wbr->wbr_wh_running);
- atomic_dec(&br->br_count);
+ au_br_put(br);
kfree(arg);
}
do_dec = 0;
@@ -904,7 +904,7 @@ out:
void au_whtmp_rmdir_free(struct au_whtmp_rmdir *whtmp)
{
if (whtmp->br)
- atomic_dec(&whtmp->br->br_count);
+ au_br_put(whtmp->br);
dput(whtmp->wh_dentry);
iput(whtmp->dir);
au_nhash_wh_free(&whtmp->whlist);
@@ -963,7 +963,7 @@ int au_whtmp_rmdir(struct inode *dir, aufs_bindex_t bindex,
}
if (!err) {
- if (au_ibstart(dir) == bindex) {
+ if (au_ibtop(dir) == bindex) {
/* todo: dir->i_mutex is necessary */
au_cpup_attr_timesizes(dir);
if (h_nlink)
@@ -1037,7 +1037,7 @@ void au_whtmp_kick_rmdir(struct inode *dir, aufs_bindex_t bindex,
sb = dir->i_sb;
args->dir = au_igrab(dir);
args->br = au_sbr(sb, bindex);
- atomic_inc(&args->br->br_count);
+ au_br_get(args->br);
args->wh_dentry = dget(wh_dentry);
wkq_err = au_wkq_nowait(call_rmdir_whtmp, args, sb, /*flags*/0);
if (unlikely(wkq_err)) {
diff --git a/fs/aufs/wkq.c b/fs/aufs/wkq.c
index 0f1500e93..65c0137a5 100644
--- a/fs/aufs/wkq.c
+++ b/fs/aufs/wkq.c
@@ -145,7 +145,7 @@ int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
int err;
struct au_wkinfo *wkinfo;
- atomic_inc(&au_sbi(sb)->si_nowait.nw_len);
+ percpu_counter_inc(&au_sbi(sb)->si_nowait.nw_len);
/*
* wkq_func() must free this wkinfo.
@@ -175,11 +175,16 @@ int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
void au_nwt_init(struct au_nowait_tasks *nwt)
{
- atomic_set(&nwt->nw_len, 0);
- /* smp_mb(); */ /* atomic_set */
+ percpu_counter_init(&nwt->nw_len, 0, GFP_NOFS);
init_waitqueue_head(&nwt->nw_wq);
}
+void au_nwt_fin(struct au_nowait_tasks *nwt)
+{
+ AuDebugOn(percpu_counter_sum(&nwt->nw_len));
+ percpu_counter_destroy(&nwt->nw_len);
+}
+
void au_wkq_fin(void)
{
destroy_workqueue(au_wkq);
diff --git a/fs/aufs/wkq.h b/fs/aufs/wkq.h
index f6c9b9902..752b9c52d 100644
--- a/fs/aufs/wkq.h
+++ b/fs/aufs/wkq.h
@@ -20,7 +20,7 @@ struct super_block;
* in the next operation, wait for the 'nowait' tasks in system-wide workqueue
*/
struct au_nowait_tasks {
- atomic_t nw_len;
+ struct percpu_counter nw_len;
wait_queue_head_t nw_wq;
};
@@ -47,6 +47,7 @@ int au_wkq_do_wait(unsigned int flags, au_wkq_func_t func, void *args);
int au_wkq_nowait(au_wkq_func_t func, void *args, struct super_block *sb,
unsigned int flags);
void au_nwt_init(struct au_nowait_tasks *nwt);
+void au_nwt_fin(struct au_nowait_tasks *nwt);
int __init au_wkq_init(void);
void au_wkq_fin(void);
@@ -64,13 +65,14 @@ static inline int au_wkq_wait(au_wkq_func_t func, void *args)
static inline void au_nwt_done(struct au_nowait_tasks *nwt)
{
- if (atomic_dec_and_test(&nwt->nw_len))
+ percpu_counter_dec(&nwt->nw_len);
+ if (!percpu_counter_sum(&nwt->nw_len))
wake_up_all(&nwt->nw_wq);
}
static inline int au_nwt_flush(struct au_nowait_tasks *nwt)
{
- wait_event(nwt->nw_wq, !atomic_read(&nwt->nw_len));
+ wait_event(nwt->nw_wq, !percpu_counter_sum(&nwt->nw_len));
return 0;
}
diff --git a/fs/aufs/xino.c b/fs/aufs/xino.c
index 2773caedc..c0eb51f3c 100644
--- a/fs/aufs/xino.c
+++ b/fs/aufs/xino.c
@@ -259,7 +259,7 @@ int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
int err;
unsigned long jiffy;
blkcnt_t blocks;
- aufs_bindex_t bi, bend;
+ aufs_bindex_t bi, bbot;
struct kstatfs *st;
struct au_branch *br;
struct file *new_xino, *file;
@@ -272,8 +272,8 @@ int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
goto out;
err = -EINVAL;
- bend = au_sbend(sb);
- if (unlikely(bindex < 0 || bend < bindex))
+ bbot = au_sbbot(sb);
+ if (unlikely(bindex < 0 || bbot < bindex))
goto out_st;
br = au_sbr(sb, bindex);
file = br->br_xino.xi_file;
@@ -302,7 +302,7 @@ int au_xino_trunc(struct super_block *sb, aufs_bindex_t bindex)
br->br_xino.xi_file = new_xino;
h_sb = au_br_sb(br);
- for (bi = 0; bi <= bend; bi++) {
+ for (bi = 0; bi <= bbot; bi++) {
if (unlikely(bi == bindex))
continue;
br = au_sbr(sb, bi);
@@ -357,7 +357,7 @@ static void xino_do_trunc(void *_args)
if (unlikely(err))
pr_warn("err b%d, (%d)\n", bindex, err);
atomic_dec(&br->br_xino_running);
- atomic_dec(&br->br_count);
+ au_br_put(br);
si_write_unlock(sb);
au_nwt_done(&au_sbi(sb)->si_nowait);
kfree(args);
@@ -405,7 +405,7 @@ static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
goto out_args;
}
- atomic_inc(&br->br_count);
+ au_br_get(br);
args->sb = sb;
args->br = br;
wkq_err = au_wkq_nowait(xino_do_trunc, args, sb, /*flags*/0);
@@ -413,7 +413,7 @@ static void xino_try_trunc(struct super_block *sb, struct au_branch *br)
return; /* success */
pr_err("wkq %d\n", wkq_err);
- atomic_dec(&br->br_count);
+ au_br_put(br);
out_args:
kfree(args);
@@ -576,7 +576,7 @@ void au_xino_delete_inode(struct inode *inode, const int unlinked)
{
int err;
unsigned int mnt_flags;
- aufs_bindex_t bindex, bend, bi;
+ aufs_bindex_t bindex, bbot, bi;
unsigned char try_trunc;
struct au_iinfo *iinfo;
struct super_block *sb;
@@ -585,6 +585,8 @@ void au_xino_delete_inode(struct inode *inode, const int unlinked)
struct au_branch *br;
vfs_writef_t xwrite;
+ AuDebugOn(is_bad_inode(inode));
+
sb = inode->i_sb;
mnt_flags = au_mntflags(sb);
if (!au_opt_test(mnt_flags, XINO)
@@ -597,18 +599,15 @@ void au_xino_delete_inode(struct inode *inode, const int unlinked)
}
iinfo = au_ii(inode);
- if (!iinfo)
- return;
-
- bindex = iinfo->ii_bstart;
+ bindex = iinfo->ii_btop;
if (bindex < 0)
return;
xwrite = au_sbi(sb)->si_xwrite;
try_trunc = !!au_opt_test(mnt_flags, TRUNC_XINO);
- hi = iinfo->ii_hinode + bindex;
- bend = iinfo->ii_bend;
- for (; bindex <= bend; bindex++, hi++) {
+ hi = au_hinode(iinfo, bindex);
+ bbot = iinfo->ii_bbot;
+ for (; bindex <= bbot; bindex++, hi++) {
h_inode = hi->hi_inode;
if (!h_inode
|| (!unlinked && h_inode->i_nlink))
@@ -797,10 +796,10 @@ out:
/*
* find another branch who is on the same filesystem of the specified
- * branch{@btgt}. search until @bend.
+ * branch{@btgt}. search until @bbot.
*/
static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
- aufs_bindex_t bend)
+ aufs_bindex_t bbot)
{
aufs_bindex_t bindex;
struct super_block *tgt_sb = au_sbr_sb(sb, btgt);
@@ -808,7 +807,7 @@ static int is_sb_shared(struct super_block *sb, aufs_bindex_t btgt,
for (bindex = 0; bindex < btgt; bindex++)
if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
return bindex;
- for (bindex++; bindex <= bend; bindex++)
+ for (bindex++; bindex <= bbot; bindex++)
if (unlikely(tgt_sb == au_sbr_sb(sb, bindex)))
return bindex;
return -1;
@@ -827,16 +826,16 @@ int au_xino_br(struct super_block *sb, struct au_branch *br, ino_t h_ino,
{
int err;
ino_t ino;
- aufs_bindex_t bend, bindex;
+ aufs_bindex_t bbot, bindex;
struct au_branch *shared_br, *b;
struct file *file;
struct super_block *tgt_sb;
shared_br = NULL;
- bend = au_sbend(sb);
+ bbot = au_sbbot(sb);
if (do_test) {
tgt_sb = au_br_sb(br);
- for (bindex = 0; bindex <= bend; bindex++) {
+ for (bindex = 0; bindex <= bbot; bindex++) {
b = au_sbr(sb, bindex);
if (tgt_sb == au_br_sb(b)) {
shared_br = b;
@@ -924,7 +923,7 @@ out:
static int xib_restore(struct super_block *sb)
{
int err;
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
void *page;
err = -ENOMEM;
@@ -933,8 +932,8 @@ static int xib_restore(struct super_block *sb)
goto out;
err = 0;
- bend = au_sbend(sb);
- for (bindex = 0; !err && bindex <= bend; bindex++)
+ bbot = au_sbbot(sb);
+ for (bindex = 0; !err && bindex <= bbot; bindex++)
if (!bindex || is_sb_shared(sb, bindex, bindex - 1) < 0)
err = do_xib_restore
(sb, au_sbr(sb, bindex)->br_xino.xi_file, page);
@@ -1076,11 +1075,11 @@ out:
/* xino for each branch */
static void xino_clear_br(struct super_block *sb)
{
- aufs_bindex_t bindex, bend;
+ aufs_bindex_t bindex, bbot;
struct au_branch *br;
- bend = au_sbend(sb);
- for (bindex = 0; bindex <= bend; bindex++) {
+ bbot = au_sbbot(sb);
+ for (bindex = 0; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (!br || !br->br_xino.xi_file)
continue;
@@ -1094,7 +1093,7 @@ static int au_xino_set_br(struct super_block *sb, struct file *base)
{
int err;
ino_t ino;
- aufs_bindex_t bindex, bend, bshared;
+ aufs_bindex_t bindex, bbot, bshared;
struct {
struct file *old, *new;
} *fpair, *p;
@@ -1105,15 +1104,15 @@ static int au_xino_set_br(struct super_block *sb, struct file *base)
SiMustWriteLock(sb);
err = -ENOMEM;
- bend = au_sbend(sb);
- fpair = kcalloc(bend + 1, sizeof(*fpair), GFP_NOFS);
+ bbot = au_sbbot(sb);
+ fpair = kcalloc(bbot + 1, sizeof(*fpair), GFP_NOFS);
if (unlikely(!fpair))
goto out;
inode = d_inode(sb->s_root);
ino = AUFS_ROOT_INO;
writef = au_sbi(sb)->si_xwrite;
- for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
br = au_sbr(sb, bindex);
bshared = is_sb_shared(sb, bindex, bindex - 1);
if (bshared >= 0) {
@@ -1139,7 +1138,7 @@ static int au_xino_set_br(struct super_block *sb, struct file *base)
goto out_pair;
}
- for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++) {
+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++) {
br = au_sbr(sb, bindex);
if (br->br_xino.xi_file)
fput(br->br_xino.xi_file);
@@ -1148,7 +1147,7 @@ static int au_xino_set_br(struct super_block *sb, struct file *base)
}
out_pair:
- for (bindex = 0, p = fpair; bindex <= bend; bindex++, p++)
+ for (bindex = 0, p = fpair; bindex <= bbot; bindex++, p++)
if (p->new)
fput(p->new);
else
@@ -1234,12 +1233,12 @@ struct file *au_xino_def(struct super_block *sb)
struct au_branch *br;
struct super_block *h_sb;
struct path path;
- aufs_bindex_t bend, bindex, bwr;
+ aufs_bindex_t bbot, bindex, bwr;
br = NULL;
- bend = au_sbend(sb);
+ bbot = au_sbbot(sb);
bwr = -1;
- for (bindex = 0; bindex <= bend; bindex++) {
+ for (bindex = 0; bindex <= bbot; bindex++) {
br = au_sbr(sb, bindex);
if (au_br_writable(br->br_perm)
&& !au_test_fs_bad_xino(au_br_sb(br))) {
diff --git a/fs/autofs4/autofs_i.h b/fs/autofs4/autofs_i.h
index c37149b92..f0d268b97 100644
--- a/fs/autofs4/autofs_i.h
+++ b/fs/autofs4/autofs_i.h
@@ -1,15 +1,11 @@
-/* -*- c -*- ------------------------------------------------------------- *
- *
- * linux/fs/autofs/autofs_i.h
- *
- * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
- * Copyright 2005-2006 Ian Kent <raven@themaw.net>
+/*
+ * Copyright 1997-1998 Transmeta Corporation - All Rights Reserved
+ * Copyright 2005-2006 Ian Kent <raven@themaw.net>
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
+ */
/* Internal header file for autofs */
@@ -35,28 +31,23 @@
#include <linux/mount.h>
#include <linux/namei.h>
#include <asm/current.h>
-#include <asm/uaccess.h>
+#include <linux/uaccess.h>
/* #define DEBUG */
-#define DPRINTK(fmt, ...) \
- pr_debug("pid %d: %s: " fmt "\n", \
- current->pid, __func__, ##__VA_ARGS__)
-
-#define AUTOFS_WARN(fmt, ...) \
- printk(KERN_WARNING "pid %d: %s: " fmt "\n", \
- current->pid, __func__, ##__VA_ARGS__)
-
-#define AUTOFS_ERROR(fmt, ...) \
- printk(KERN_ERR "pid %d: %s: " fmt "\n", \
- current->pid, __func__, ##__VA_ARGS__)
-
-/* Unified info structure. This is pointed to by both the dentry and
- inode structures. Each file in the filesystem has an instance of this
- structure. It holds a reference to the dentry, so dentries are never
- flushed while the file exists. All name lookups are dealt with at the
- dentry level, although the filesystem can interfere in the validation
- process. Readdir is implemented by traversing the dentry lists. */
+#ifdef pr_fmt
+#undef pr_fmt
+#endif
+#define pr_fmt(fmt) KBUILD_MODNAME ":pid:%d:%s: " fmt, current->pid, __func__
+
+/*
+ * Unified info structure. This is pointed to by both the dentry and
+ * inode structures. Each file in the filesystem has an instance of this
+ * structure. It holds a reference to the dentry, so dentries are never
+ * flushed while the file exists. All name lookups are dealt with at the
+ * dentry level, although the filesystem can interfere in the validation
+ * process. Readdir is implemented by traversing the dentry lists.
+ */
struct autofs_info {
struct dentry *dentry;
struct inode *inode;
@@ -78,7 +69,7 @@ struct autofs_info {
kgid_t gid;
};
-#define AUTOFS_INF_EXPIRING (1<<0) /* dentry is in the process of expiring */
+#define AUTOFS_INF_EXPIRING (1<<0) /* dentry in the process of expiring */
#define AUTOFS_INF_NO_RCU (1<<1) /* the dentry is being considered
* for expiry, so RCU_walk is
* not permitted
@@ -140,10 +131,11 @@ static inline struct autofs_info *autofs4_dentry_ino(struct dentry *dentry)
}
/* autofs4_oz_mode(): do we see the man behind the curtain? (The
- processes which do manipulations for us in user space sees the raw
- filesystem without "magic".) */
-
-static inline int autofs4_oz_mode(struct autofs_sb_info *sbi) {
+ * processes which do manipulations for us in user space sees the raw
+ * filesystem without "magic".)
+ */
+static inline int autofs4_oz_mode(struct autofs_sb_info *sbi)
+{
return sbi->catatonic || task_pgrp(current) == sbi->oz_pgrp;
}
@@ -154,12 +146,12 @@ void autofs4_free_ino(struct autofs_info *);
int is_autofs4_dentry(struct dentry *);
int autofs4_expire_wait(struct dentry *dentry, int rcu_walk);
int autofs4_expire_run(struct super_block *, struct vfsmount *,
- struct autofs_sb_info *,
- struct autofs_packet_expire __user *);
+ struct autofs_sb_info *,
+ struct autofs_packet_expire __user *);
int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
struct autofs_sb_info *sbi, int when);
int autofs4_expire_multi(struct super_block *, struct vfsmount *,
- struct autofs_sb_info *, int __user *);
+ struct autofs_sb_info *, int __user *);
struct dentry *autofs4_expire_direct(struct super_block *sb,
struct vfsmount *mnt,
struct autofs_sb_info *sbi, int how);
@@ -224,8 +216,8 @@ static inline int autofs_prepare_pipe(struct file *pipe)
/* Queue management functions */
-int autofs4_wait(struct autofs_sb_info *,struct dentry *, enum autofs_notify);
-int autofs4_wait_release(struct autofs_sb_info *,autofs_wqt_t,int);
+int autofs4_wait(struct autofs_sb_info *, struct dentry *, enum autofs_notify);
+int autofs4_wait_release(struct autofs_sb_info *, autofs_wqt_t, int);
void autofs4_catatonic_mode(struct autofs_sb_info *);
static inline u32 autofs4_get_dev(struct autofs_sb_info *sbi)
@@ -242,37 +234,37 @@ static inline void __autofs4_add_expiring(struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
+
if (ino) {
if (list_empty(&ino->expiring))
list_add(&ino->expiring, &sbi->expiring_list);
}
- return;
}
static inline void autofs4_add_expiring(struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
+
if (ino) {
spin_lock(&sbi->lookup_lock);
if (list_empty(&ino->expiring))
list_add(&ino->expiring, &sbi->expiring_list);
spin_unlock(&sbi->lookup_lock);
}
- return;
}
static inline void autofs4_del_expiring(struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
+
if (ino) {
spin_lock(&sbi->lookup_lock);
if (!list_empty(&ino->expiring))
list_del_init(&ino->expiring);
spin_unlock(&sbi->lookup_lock);
}
- return;
}
extern void autofs4_kill_sb(struct super_block *);
diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c
index ac7d921ed..c7fcc7438 100644
--- a/fs/autofs4/dev-ioctl.c
+++ b/fs/autofs4/dev-ioctl.c
@@ -72,13 +72,13 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
{
int err = 0;
- if ((AUTOFS_DEV_IOCTL_VERSION_MAJOR != param->ver_major) ||
- (AUTOFS_DEV_IOCTL_VERSION_MINOR < param->ver_minor)) {
- AUTOFS_WARN("ioctl control interface version mismatch: "
- "kernel(%u.%u), user(%u.%u), cmd(%d)",
- AUTOFS_DEV_IOCTL_VERSION_MAJOR,
- AUTOFS_DEV_IOCTL_VERSION_MINOR,
- param->ver_major, param->ver_minor, cmd);
+ if ((param->ver_major != AUTOFS_DEV_IOCTL_VERSION_MAJOR) ||
+ (param->ver_minor > AUTOFS_DEV_IOCTL_VERSION_MINOR)) {
+ pr_warn("ioctl control interface version mismatch: "
+ "kernel(%u.%u), user(%u.%u), cmd(%d)\n",
+ AUTOFS_DEV_IOCTL_VERSION_MAJOR,
+ AUTOFS_DEV_IOCTL_VERSION_MINOR,
+ param->ver_major, param->ver_minor, cmd);
err = -EINVAL;
}
@@ -93,7 +93,8 @@ static int check_dev_ioctl_version(int cmd, struct autofs_dev_ioctl *param)
* Copy parameter control struct, including a possible path allocated
* at the end of the struct.
*/
-static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
+static struct autofs_dev_ioctl *
+ copy_dev_ioctl(struct autofs_dev_ioctl __user *in)
{
struct autofs_dev_ioctl tmp, *res;
@@ -116,7 +117,6 @@ static struct autofs_dev_ioctl *copy_dev_ioctl(struct autofs_dev_ioctl __user *i
static inline void free_dev_ioctl(struct autofs_dev_ioctl *param)
{
kfree(param);
- return;
}
/*
@@ -129,24 +129,24 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
err = check_dev_ioctl_version(cmd, param);
if (err) {
- AUTOFS_WARN("invalid device control module version "
- "supplied for cmd(0x%08x)", cmd);
+ pr_warn("invalid device control module version "
+ "supplied for cmd(0x%08x)\n", cmd);
goto out;
}
if (param->size > sizeof(*param)) {
err = invalid_str(param->path, param->size - sizeof(*param));
if (err) {
- AUTOFS_WARN(
- "path string terminator missing for cmd(0x%08x)",
+ pr_warn(
+ "path string terminator missing for cmd(0x%08x)\n",
cmd);
goto out;
}
err = check_name(param->path);
if (err) {
- AUTOFS_WARN("invalid path supplied for cmd(0x%08x)",
- cmd);
+ pr_warn("invalid path supplied for cmd(0x%08x)\n",
+ cmd);
goto out;
}
}
@@ -197,7 +197,9 @@ static int find_autofs_mount(const char *pathname,
void *data)
{
struct path path;
- int err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0);
+ int err;
+
+ err = kern_path_mountpoint(AT_FDCWD, pathname, &path, 0);
if (err)
return err;
err = -ENOENT;
@@ -225,6 +227,7 @@ static int test_by_dev(struct path *path, void *p)
static int test_by_type(struct path *path, void *p)
{
struct autofs_info *ino = autofs4_dentry_ino(path->dentry);
+
return ino && ino->sbi->type & *(unsigned *)p;
}
@@ -370,7 +373,7 @@ static int autofs_dev_ioctl_setpipefd(struct file *fp,
new_pid = get_task_pid(current, PIDTYPE_PGID);
if (ns_of_pid(new_pid) != ns_of_pid(sbi->oz_pgrp)) {
- AUTOFS_WARN("Not allowed to change PID namespace");
+ pr_warn("not allowed to change PID namespace\n");
err = -EINVAL;
goto out;
}
@@ -456,8 +459,10 @@ static int autofs_dev_ioctl_requester(struct file *fp,
err = 0;
autofs4_expire_wait(path.dentry, 0);
spin_lock(&sbi->fs_lock);
- param->requester.uid = from_kuid_munged(current_user_ns(), ino->uid);
- param->requester.gid = from_kgid_munged(current_user_ns(), ino->gid);
+ param->requester.uid =
+ from_kuid_munged(current_user_ns(), ino->uid);
+ param->requester.gid =
+ from_kgid_munged(current_user_ns(), ino->gid);
spin_unlock(&sbi->fs_lock);
}
path_put(&path);
@@ -619,7 +624,8 @@ static ioctl_fn lookup_dev_ioctl(unsigned int cmd)
}
/* ioctl dispatcher */
-static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __user *user)
+static int _autofs_dev_ioctl(unsigned int command,
+ struct autofs_dev_ioctl __user *user)
{
struct autofs_dev_ioctl *param;
struct file *fp;
@@ -655,7 +661,7 @@ static int _autofs_dev_ioctl(unsigned int command, struct autofs_dev_ioctl __use
fn = lookup_dev_ioctl(cmd);
if (!fn) {
- AUTOFS_WARN("unknown command 0x%08x", command);
+ pr_warn("unknown command 0x%08x\n", command);
return -ENOTTY;
}
@@ -711,6 +717,7 @@ out:
static long autofs_dev_ioctl(struct file *file, uint command, ulong u)
{
int err;
+
err = _autofs_dev_ioctl(command, (struct autofs_dev_ioctl __user *) u);
return (long) err;
}
@@ -733,8 +740,8 @@ static const struct file_operations _dev_ioctl_fops = {
static struct miscdevice _autofs_dev_ioctl_misc = {
.minor = AUTOFS_MINOR,
- .name = AUTOFS_DEVICE_NAME,
- .fops = &_dev_ioctl_fops
+ .name = AUTOFS_DEVICE_NAME,
+ .fops = &_dev_ioctl_fops
};
MODULE_ALIAS_MISCDEV(AUTOFS_MINOR);
@@ -747,7 +754,7 @@ int __init autofs_dev_ioctl_init(void)
r = misc_register(&_autofs_dev_ioctl_misc);
if (r) {
- AUTOFS_ERROR("misc_register failed for control device");
+ pr_err("misc_register failed for control device\n");
return r;
}
@@ -757,6 +764,4 @@ int __init autofs_dev_ioctl_init(void)
void autofs_dev_ioctl_exit(void)
{
misc_deregister(&_autofs_dev_ioctl_misc);
- return;
}
-
diff --git a/fs/autofs4/expire.c b/fs/autofs4/expire.c
index 1cebc3c52..9510d8d2e 100644
--- a/fs/autofs4/expire.c
+++ b/fs/autofs4/expire.c
@@ -1,16 +1,12 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/expire.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- * Copyright 2001-2006 Ian Kent <raven@themaw.net>
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
+ * Copyright 2001-2006 Ian Kent <raven@themaw.net>
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
#include "autofs_i.h"
@@ -18,7 +14,7 @@ static unsigned long now;
/* Check if a dentry can be expired */
static inline int autofs4_can_expire(struct dentry *dentry,
- unsigned long timeout, int do_now)
+ unsigned long timeout, int do_now)
{
struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -41,7 +37,7 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
struct path path = {.mnt = mnt, .dentry = dentry};
int status = 1;
- DPRINTK("dentry %p %pd", dentry, dentry);
+ pr_debug("dentry %p %pd\n", dentry, dentry);
path_get(&path);
@@ -58,14 +54,16 @@ static int autofs4_mount_busy(struct vfsmount *mnt, struct dentry *dentry)
/* Update the expiry counter if fs is busy */
if (!may_umount_tree(path.mnt)) {
- struct autofs_info *ino = autofs4_dentry_ino(top);
+ struct autofs_info *ino;
+
+ ino = autofs4_dentry_ino(top);
ino->last_used = jiffies;
goto done;
}
status = 0;
done:
- DPRINTK("returning = %d", status);
+ pr_debug("returning = %d\n", status);
path_put(&path);
return status;
}
@@ -74,7 +72,7 @@ done:
* Calculate and dget next entry in the subdirs list under root.
*/
static struct dentry *get_next_positive_subdir(struct dentry *prev,
- struct dentry *root)
+ struct dentry *root)
{
struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
struct list_head *next;
@@ -121,7 +119,7 @@ cont:
* Calculate and dget next entry in top down tree traversal.
*/
static struct dentry *get_next_positive_dentry(struct dentry *prev,
- struct dentry *root)
+ struct dentry *root)
{
struct autofs_sb_info *sbi = autofs4_sbi(root->d_sb);
struct list_head *next;
@@ -187,15 +185,17 @@ again:
* autofs submounts.
*/
static int autofs4_direct_busy(struct vfsmount *mnt,
- struct dentry *top,
- unsigned long timeout,
- int do_now)
+ struct dentry *top,
+ unsigned long timeout,
+ int do_now)
{
- DPRINTK("top %p %pd", top, top);
+ pr_debug("top %p %pd\n", top, top);
/* If it's busy update the expiry counters */
if (!may_umount_tree(mnt)) {
- struct autofs_info *ino = autofs4_dentry_ino(top);
+ struct autofs_info *ino;
+
+ ino = autofs4_dentry_ino(top);
if (ino)
ino->last_used = jiffies;
return 1;
@@ -208,7 +208,8 @@ static int autofs4_direct_busy(struct vfsmount *mnt,
return 0;
}
-/* Check a directory tree of mount points for busyness
+/*
+ * Check a directory tree of mount points for busyness
* The tree is not busy iff no mountpoints are busy
*/
static int autofs4_tree_busy(struct vfsmount *mnt,
@@ -219,7 +220,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
struct autofs_info *top_ino = autofs4_dentry_ino(top);
struct dentry *p;
- DPRINTK("top %p %pd", top, top);
+ pr_debug("top %p %pd\n", top, top);
/* Negative dentry - give up */
if (!simple_positive(top))
@@ -227,7 +228,7 @@ static int autofs4_tree_busy(struct vfsmount *mnt,
p = NULL;
while ((p = get_next_positive_dentry(p, top))) {
- DPRINTK("dentry %p %pd", p, p);
+ pr_debug("dentry %p %pd\n", p, p);
/*
* Is someone visiting anywhere in the subtree ?
@@ -273,11 +274,11 @@ static struct dentry *autofs4_check_leaves(struct vfsmount *mnt,
{
struct dentry *p;
- DPRINTK("parent %p %pd", parent, parent);
+ pr_debug("parent %p %pd\n", parent, parent);
p = NULL;
while ((p = get_next_positive_dentry(p, parent))) {
- DPRINTK("dentry %p %pd", p, p);
+ pr_debug("dentry %p %pd\n", p, p);
if (d_mountpoint(p)) {
/* Can we umount this guy */
@@ -362,7 +363,7 @@ static struct dentry *should_expire(struct dentry *dentry,
* offset (autofs-5.0+).
*/
if (d_mountpoint(dentry)) {
- DPRINTK("checking mountpoint %p %pd", dentry, dentry);
+ pr_debug("checking mountpoint %p %pd\n", dentry, dentry);
/* Can we umount this guy */
if (autofs4_mount_busy(mnt, dentry))
@@ -375,7 +376,7 @@ static struct dentry *should_expire(struct dentry *dentry,
}
if (d_really_is_positive(dentry) && d_is_symlink(dentry)) {
- DPRINTK("checking symlink %p %pd", dentry, dentry);
+ pr_debug("checking symlink %p %pd\n", dentry, dentry);
/*
* A symlink can't be "busy" in the usual sense so
* just check last used for expire timeout.
@@ -404,6 +405,7 @@ static struct dentry *should_expire(struct dentry *dentry,
} else {
/* Path walk currently on this dentry? */
struct dentry *expired;
+
ino_count = atomic_read(&ino->count) + 1;
if (d_count(dentry) > ino_count)
return NULL;
@@ -471,7 +473,7 @@ struct dentry *autofs4_expire_indirect(struct super_block *sb,
return NULL;
found:
- DPRINTK("returning %p %pd", expired, expired);
+ pr_debug("returning %p %pd\n", expired, expired);
ino->flags |= AUTOFS_INF_EXPIRING;
smp_mb();
ino->flags &= ~AUTOFS_INF_NO_RCU;
@@ -503,12 +505,12 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
if (ino->flags & AUTOFS_INF_EXPIRING) {
spin_unlock(&sbi->fs_lock);
- DPRINTK("waiting for expire %p name=%pd", dentry, dentry);
+ pr_debug("waiting for expire %p name=%pd\n", dentry, dentry);
status = autofs4_wait(sbi, dentry, NFY_NONE);
wait_for_completion(&ino->expire_complete);
- DPRINTK("expire done status=%d", status);
+ pr_debug("expire done status=%d\n", status);
if (d_unhashed(dentry))
return -EAGAIN;
@@ -522,21 +524,22 @@ int autofs4_expire_wait(struct dentry *dentry, int rcu_walk)
/* Perform an expiry operation */
int autofs4_expire_run(struct super_block *sb,
- struct vfsmount *mnt,
- struct autofs_sb_info *sbi,
- struct autofs_packet_expire __user *pkt_p)
+ struct vfsmount *mnt,
+ struct autofs_sb_info *sbi,
+ struct autofs_packet_expire __user *pkt_p)
{
struct autofs_packet_expire pkt;
struct autofs_info *ino;
struct dentry *dentry;
int ret = 0;
- memset(&pkt,0,sizeof pkt);
+ memset(&pkt, 0, sizeof(pkt));
pkt.hdr.proto_version = sbi->version;
pkt.hdr.type = autofs_ptype_expire;
- if ((dentry = autofs4_expire_indirect(sb, mnt, sbi, 0)) == NULL)
+ dentry = autofs4_expire_indirect(sb, mnt, sbi, 0);
+ if (!dentry)
return -EAGAIN;
pkt.len = dentry->d_name.len;
@@ -544,7 +547,7 @@ int autofs4_expire_run(struct super_block *sb,
pkt.name[pkt.len] = '\0';
dput(dentry);
- if ( copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)) )
+ if (copy_to_user(pkt_p, &pkt, sizeof(struct autofs_packet_expire)))
ret = -EFAULT;
spin_lock(&sbi->fs_lock);
@@ -573,7 +576,8 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
struct autofs_info *ino = autofs4_dentry_ino(dentry);
/* This is synchronous because it makes the daemon a
- little easier */
+ * little easier
+ */
ret = autofs4_wait(sbi, dentry, NFY_EXPIRE);
spin_lock(&sbi->fs_lock);
@@ -588,8 +592,10 @@ int autofs4_do_expire_multi(struct super_block *sb, struct vfsmount *mnt,
return ret;
}
-/* Call repeatedly until it returns -EAGAIN, meaning there's nothing
- more to be done */
+/*
+ * Call repeatedly until it returns -EAGAIN, meaning there's nothing
+ * more to be done.
+ */
int autofs4_expire_multi(struct super_block *sb, struct vfsmount *mnt,
struct autofs_sb_info *sbi, int __user *arg)
{
diff --git a/fs/autofs4/init.c b/fs/autofs4/init.c
index b3db517e8..8cf0e6338 100644
--- a/fs/autofs4/init.c
+++ b/fs/autofs4/init.c
@@ -1,14 +1,10 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/init.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
#include <linux/module.h>
#include <linux/init.h>
diff --git a/fs/autofs4/inode.c b/fs/autofs4/inode.c
index a3ae0b2ae..61b21051b 100644
--- a/fs/autofs4/inode.c
+++ b/fs/autofs4/inode.c
@@ -1,15 +1,11 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/inode.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- * Copyright 2005-2006 Ian Kent <raven@themaw.net>
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ * Copyright 2005-2006 Ian Kent <raven@themaw.net>
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
#include <linux/kernel.h>
#include <linux/slab.h>
@@ -24,7 +20,9 @@
struct autofs_info *autofs4_new_ino(struct autofs_sb_info *sbi)
{
- struct autofs_info *ino = kzalloc(sizeof(*ino), GFP_KERNEL);
+ struct autofs_info *ino;
+
+ ino = kzalloc(sizeof(*ino), GFP_KERNEL);
if (ino) {
INIT_LIST_HEAD(&ino->active);
INIT_LIST_HEAD(&ino->expiring);
@@ -62,7 +60,7 @@ void autofs4_kill_sb(struct super_block *sb)
put_pid(sbi->oz_pgrp);
}
- DPRINTK("shutting down");
+ pr_debug("shutting down\n");
kill_litter_super(sb);
if (sbi)
kfree_rcu(sbi, rcu);
@@ -94,7 +92,12 @@ static int autofs4_show_options(struct seq_file *m, struct dentry *root)
seq_printf(m, ",direct");
else
seq_printf(m, ",indirect");
-
+#ifdef CONFIG_CHECKPOINT_RESTORE
+ if (sbi->pipe)
+ seq_printf(m, ",pipe_ino=%ld", sbi->pipe->f_inode->i_ino);
+ else
+ seq_printf(m, ",pipe_ino=-1");
+#endif
return 0;
}
@@ -147,6 +150,7 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
while ((p = strsep(&options, ",")) != NULL) {
int token;
+
if (!*p)
continue;
@@ -204,9 +208,9 @@ static int parse_options(char *options, int *pipefd, kuid_t *uid, kgid_t *gid,
int autofs4_fill_super(struct super_block *s, void *data, int silent)
{
- struct inode * root_inode;
- struct dentry * root;
- struct file * pipe;
+ struct inode *root_inode;
+ struct dentry *root;
+ struct file *pipe;
int pipefd;
struct autofs_sb_info *sbi;
struct autofs_info *ino;
@@ -217,7 +221,7 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi)
return -ENOMEM;
- DPRINTK("starting up, sbi = %p",sbi);
+ pr_debug("starting up, sbi = %p\n", sbi);
s->s_fs_info = sbi;
sbi->magic = AUTOFS_SBI_MAGIC;
@@ -266,14 +270,14 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
if (parse_options(data, &pipefd, &root_inode->i_uid, &root_inode->i_gid,
&pgrp, &pgrp_set, &sbi->type, &sbi->min_proto,
&sbi->max_proto)) {
- printk("autofs: called with bogus options\n");
+ pr_err("called with bogus options\n");
goto fail_dput;
}
if (pgrp_set) {
sbi->oz_pgrp = find_get_pid(pgrp);
if (!sbi->oz_pgrp) {
- pr_warn("autofs: could not find process group %d\n",
+ pr_err("could not find process group %d\n",
pgrp);
goto fail_dput;
}
@@ -290,10 +294,10 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
/* Couldn't this be tested earlier? */
if (sbi->max_proto < AUTOFS_MIN_PROTO_VERSION ||
sbi->min_proto > AUTOFS_MAX_PROTO_VERSION) {
- printk("autofs: kernel does not match daemon version "
+ pr_err("kernel does not match daemon version "
"daemon (%d, %d) kernel (%d, %d)\n",
- sbi->min_proto, sbi->max_proto,
- AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
+ sbi->min_proto, sbi->max_proto,
+ AUTOFS_MIN_PROTO_VERSION, AUTOFS_MAX_PROTO_VERSION);
goto fail_dput;
}
@@ -304,11 +308,11 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
sbi->version = sbi->max_proto;
sbi->sub_version = AUTOFS_PROTO_SUBVERSION;
- DPRINTK("pipe fd = %d, pgrp = %u", pipefd, pid_nr(sbi->oz_pgrp));
+ pr_debug("pipe fd = %d, pgrp = %u\n", pipefd, pid_nr(sbi->oz_pgrp));
pipe = fget(pipefd);
if (!pipe) {
- printk("autofs: could not open pipe file descriptor\n");
+ pr_err("could not open pipe file descriptor\n");
goto fail_dput;
}
ret = autofs_prepare_pipe(pipe);
@@ -323,12 +327,12 @@ int autofs4_fill_super(struct super_block *s, void *data, int silent)
*/
s->s_root = root;
return 0;
-
+
/*
* Failure ... clean up.
*/
fail_fput:
- printk("autofs: pipe file descriptor does not contain proper ops\n");
+ pr_err("pipe file descriptor does not contain proper ops\n");
fput(pipe);
/* fall through */
fail_dput:
diff --git a/fs/autofs4/root.c b/fs/autofs4/root.c
index c6d7d3dbd..7ab923940 100644
--- a/fs/autofs4/root.c
+++ b/fs/autofs4/root.c
@@ -1,16 +1,12 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/root.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
- * Copyright 2001-2006 Ian Kent <raven@themaw.net>
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ * Copyright 1999-2000 Jeremy Fitzhardinge <jeremy@goop.org>
+ * Copyright 2001-2006 Ian Kent <raven@themaw.net>
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
#include <linux/capability.h>
#include <linux/errno.h>
@@ -23,16 +19,18 @@
#include "autofs_i.h"
-static int autofs4_dir_symlink(struct inode *,struct dentry *,const char *);
-static int autofs4_dir_unlink(struct inode *,struct dentry *);
-static int autofs4_dir_rmdir(struct inode *,struct dentry *);
-static int autofs4_dir_mkdir(struct inode *,struct dentry *,umode_t);
-static long autofs4_root_ioctl(struct file *,unsigned int,unsigned long);
+static int autofs4_dir_symlink(struct inode *, struct dentry *, const char *);
+static int autofs4_dir_unlink(struct inode *, struct dentry *);
+static int autofs4_dir_rmdir(struct inode *, struct dentry *);
+static int autofs4_dir_mkdir(struct inode *, struct dentry *, umode_t);
+static long autofs4_root_ioctl(struct file *, unsigned int, unsigned long);
#ifdef CONFIG_COMPAT
-static long autofs4_root_compat_ioctl(struct file *,unsigned int,unsigned long);
+static long autofs4_root_compat_ioctl(struct file *,
+ unsigned int, unsigned long);
#endif
static int autofs4_dir_open(struct inode *inode, struct file *file);
-static struct dentry *autofs4_lookup(struct inode *,struct dentry *, unsigned int);
+static struct dentry *autofs4_lookup(struct inode *,
+ struct dentry *, unsigned int);
static struct vfsmount *autofs4_d_automount(struct path *);
static int autofs4_d_manage(struct dentry *, bool);
static void autofs4_dentry_release(struct dentry *);
@@ -74,7 +72,9 @@ const struct dentry_operations autofs4_dentry_operations = {
static void autofs4_add_active(struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ struct autofs_info *ino;
+
+ ino = autofs4_dentry_ino(dentry);
if (ino) {
spin_lock(&sbi->lookup_lock);
if (!ino->active_count) {
@@ -84,13 +84,14 @@ static void autofs4_add_active(struct dentry *dentry)
ino->active_count++;
spin_unlock(&sbi->lookup_lock);
}
- return;
}
static void autofs4_del_active(struct dentry *dentry)
{
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- struct autofs_info *ino = autofs4_dentry_ino(dentry);
+ struct autofs_info *ino;
+
+ ino = autofs4_dentry_ino(dentry);
if (ino) {
spin_lock(&sbi->lookup_lock);
ino->active_count--;
@@ -100,7 +101,6 @@ static void autofs4_del_active(struct dentry *dentry)
}
spin_unlock(&sbi->lookup_lock);
}
- return;
}
static int autofs4_dir_open(struct inode *inode, struct file *file)
@@ -108,7 +108,7 @@ static int autofs4_dir_open(struct inode *inode, struct file *file)
struct dentry *dentry = file->f_path.dentry;
struct autofs_sb_info *sbi = autofs4_sbi(dentry->d_sb);
- DPRINTK("file=%p dentry=%p %pd", file, dentry, dentry);
+ pr_debug("file=%p dentry=%p %pd\n", file, dentry, dentry);
if (autofs4_oz_mode(sbi))
goto out;
@@ -138,7 +138,7 @@ static void autofs4_dentry_release(struct dentry *de)
struct autofs_info *ino = autofs4_dentry_ino(de);
struct autofs_sb_info *sbi = autofs4_sbi(de->d_sb);
- DPRINTK("releasing %p", de);
+ pr_debug("releasing %p\n", de);
if (!ino)
return;
@@ -278,9 +278,9 @@ static int autofs4_mount_wait(struct dentry *dentry, bool rcu_walk)
if (ino->flags & AUTOFS_INF_PENDING) {
if (rcu_walk)
return -ECHILD;
- DPRINTK("waiting for mount name=%pd", dentry);
+ pr_debug("waiting for mount name=%pd\n", dentry);
status = autofs4_wait(sbi, dentry, NFY_MOUNT);
- DPRINTK("mount wait done status=%d", status);
+ pr_debug("mount wait done status=%d\n", status);
}
ino->last_used = jiffies;
return status;
@@ -320,7 +320,9 @@ static struct dentry *autofs4_mountpoint_changed(struct path *path)
if (autofs_type_indirect(sbi->type) && d_unhashed(dentry)) {
struct dentry *parent = dentry->d_parent;
struct autofs_info *ino;
- struct dentry *new = d_lookup(parent, &dentry->d_name);
+ struct dentry *new;
+
+ new = d_lookup(parent, &dentry->d_name);
if (!new)
return NULL;
ino = autofs4_dentry_ino(new);
@@ -338,7 +340,7 @@ static struct vfsmount *autofs4_d_automount(struct path *path)
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
- DPRINTK("dentry=%p %pd", dentry, dentry);
+ pr_debug("dentry=%p %pd\n", dentry, dentry);
/* The daemon never triggers a mount. */
if (autofs4_oz_mode(sbi))
@@ -425,7 +427,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
struct autofs_info *ino = autofs4_dentry_ino(dentry);
int status;
- DPRINTK("dentry=%p %pd", dentry, dentry);
+ pr_debug("dentry=%p %pd\n", dentry, dentry);
/* The daemon never waits. */
if (autofs4_oz_mode(sbi)) {
@@ -455,6 +457,7 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
* a mount-trap.
*/
struct inode *inode;
+
if (ino->flags & (AUTOFS_INF_EXPIRING | AUTOFS_INF_NO_RCU))
return 0;
if (d_mountpoint(dentry))
@@ -494,13 +497,14 @@ static int autofs4_d_manage(struct dentry *dentry, bool rcu_walk)
}
/* Lookups in the root directory */
-static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, unsigned int flags)
+static struct dentry *autofs4_lookup(struct inode *dir,
+ struct dentry *dentry, unsigned int flags)
{
struct autofs_sb_info *sbi;
struct autofs_info *ino;
struct dentry *active;
- DPRINTK("name = %pd", dentry);
+ pr_debug("name = %pd\n", dentry);
/* File name too long to exist */
if (dentry->d_name.len > NAME_MAX)
@@ -508,14 +512,14 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u
sbi = autofs4_sbi(dir->i_sb);
- DPRINTK("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d",
- current->pid, task_pgrp_nr(current), sbi->catatonic,
- autofs4_oz_mode(sbi));
+ pr_debug("pid = %u, pgrp = %u, catatonic = %d, oz_mode = %d\n",
+ current->pid, task_pgrp_nr(current), sbi->catatonic,
+ autofs4_oz_mode(sbi));
active = autofs4_lookup_active(dentry);
- if (active) {
+ if (active)
return active;
- } else {
+ else {
/*
* A dentry that is not within the root can never trigger a
* mount operation, unless the directory already exists, so we
@@ -526,7 +530,8 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u
return ERR_PTR(-ENOENT);
/* Mark entries in the root as mount triggers */
- if (autofs_type_indirect(sbi->type) && IS_ROOT(dentry->d_parent))
+ if (IS_ROOT(dentry->d_parent) &&
+ autofs_type_indirect(sbi->type))
__managed_dentry_set_managed(dentry);
ino = autofs4_new_ino(sbi);
@@ -537,8 +542,6 @@ static struct dentry *autofs4_lookup(struct inode *dir, struct dentry *dentry, u
ino->dentry = dentry;
autofs4_add_active(dentry);
-
- d_instantiate(dentry, NULL);
}
return NULL;
}
@@ -554,7 +557,7 @@ static int autofs4_dir_symlink(struct inode *dir,
size_t size = strlen(symname);
char *cp;
- DPRINTK("%s <- %pd", symname, dentry);
+ pr_debug("%s <- %pd\n", symname, dentry);
if (!autofs4_oz_mode(sbi))
return -EACCES;
@@ -613,7 +616,7 @@ static int autofs4_dir_unlink(struct inode *dir, struct dentry *dentry)
struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
struct autofs_info *p_ino;
-
+
/* This allows root to remove symlinks */
if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
return -EPERM;
@@ -664,7 +667,6 @@ static void autofs_set_leaf_automount_flags(struct dentry *dentry)
if (IS_ROOT(parent->d_parent))
return;
managed_dentry_clear_managed(parent);
- return;
}
static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
@@ -687,7 +689,6 @@ static void autofs_clear_leaf_automount_flags(struct dentry *dentry)
if (d_child->next == &parent->d_subdirs &&
d_child->prev == &parent->d_subdirs)
managed_dentry_set_managed(parent);
- return;
}
static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
@@ -695,8 +696,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
struct autofs_info *p_ino;
-
- DPRINTK("dentry %p, removing %pd", dentry, dentry);
+
+ pr_debug("dentry %p, removing %pd\n", dentry, dentry);
if (!autofs4_oz_mode(sbi))
return -EACCES;
@@ -728,7 +729,8 @@ static int autofs4_dir_rmdir(struct inode *dir, struct dentry *dentry)
return 0;
}
-static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+static int autofs4_dir_mkdir(struct inode *dir,
+ struct dentry *dentry, umode_t mode)
{
struct autofs_sb_info *sbi = autofs4_sbi(dir->i_sb);
struct autofs_info *ino = autofs4_dentry_ino(dentry);
@@ -738,7 +740,7 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m
if (!autofs4_oz_mode(sbi))
return -EACCES;
- DPRINTK("dentry %p, creating %pd", dentry, dentry);
+ pr_debug("dentry %p, creating %pd\n", dentry, dentry);
BUG_ON(!ino);
@@ -768,14 +770,18 @@ static int autofs4_dir_mkdir(struct inode *dir, struct dentry *dentry, umode_t m
/* Get/set timeout ioctl() operation */
#ifdef CONFIG_COMPAT
static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi,
- compat_ulong_t __user *p)
+ compat_ulong_t __user *p)
{
- int rv;
unsigned long ntimeout;
+ int rv;
+
+ rv = get_user(ntimeout, p);
+ if (rv)
+ goto error;
- if ((rv = get_user(ntimeout, p)) ||
- (rv = put_user(sbi->exp_timeout/HZ, p)))
- return rv;
+ rv = put_user(sbi->exp_timeout/HZ, p);
+ if (rv)
+ goto error;
if (ntimeout > UINT_MAX/HZ)
sbi->exp_timeout = 0;
@@ -783,18 +789,24 @@ static inline int autofs4_compat_get_set_timeout(struct autofs_sb_info *sbi,
sbi->exp_timeout = ntimeout * HZ;
return 0;
+error:
+ return rv;
}
#endif
static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi,
- unsigned long __user *p)
+ unsigned long __user *p)
{
- int rv;
unsigned long ntimeout;
+ int rv;
+
+ rv = get_user(ntimeout, p);
+ if (rv)
+ goto error;
- if ((rv = get_user(ntimeout, p)) ||
- (rv = put_user(sbi->exp_timeout/HZ, p)))
- return rv;
+ rv = put_user(sbi->exp_timeout/HZ, p);
+ if (rv)
+ goto error;
if (ntimeout > ULONG_MAX/HZ)
sbi->exp_timeout = 0;
@@ -802,16 +814,20 @@ static inline int autofs4_get_set_timeout(struct autofs_sb_info *sbi,
sbi->exp_timeout = ntimeout * HZ;
return 0;
+error:
+ return rv;
}
/* Return protocol version */
-static inline int autofs4_get_protover(struct autofs_sb_info *sbi, int __user *p)
+static inline int autofs4_get_protover(struct autofs_sb_info *sbi,
+ int __user *p)
{
return put_user(sbi->version, p);
}
/* Return protocol sub version */
-static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi, int __user *p)
+static inline int autofs4_get_protosubver(struct autofs_sb_info *sbi,
+ int __user *p)
{
return put_user(sbi->sub_version, p);
}
@@ -826,7 +842,7 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
if (may_umount(mnt))
status = 1;
- DPRINTK("returning %d", status);
+ pr_debug("returning %d\n", status);
status = put_user(status, p);
@@ -834,9 +850,9 @@ static inline int autofs4_ask_umount(struct vfsmount *mnt, int __user *p)
}
/* Identify autofs4_dentries - this is so we can tell if there's
- an extra dentry refcount or not. We only hold a refcount on the
- dentry if its non-negative (ie, d_inode != NULL)
-*/
+ * an extra dentry refcount or not. We only hold a refcount on the
+ * dentry if its non-negative (ie, d_inode != NULL)
+ */
int is_autofs4_dentry(struct dentry *dentry)
{
return dentry && d_really_is_positive(dentry) &&
@@ -854,21 +870,21 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
struct autofs_sb_info *sbi = autofs4_sbi(inode->i_sb);
void __user *p = (void __user *)arg;
- DPRINTK("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u",
- cmd,arg,sbi,task_pgrp_nr(current));
+ pr_debug("cmd = 0x%08x, arg = 0x%08lx, sbi = %p, pgrp = %u\n",
+ cmd, arg, sbi, task_pgrp_nr(current));
if (_IOC_TYPE(cmd) != _IOC_TYPE(AUTOFS_IOC_FIRST) ||
_IOC_NR(cmd) - _IOC_NR(AUTOFS_IOC_FIRST) >= AUTOFS_IOC_COUNT)
return -ENOTTY;
-
+
if (!autofs4_oz_mode(sbi) && !capable(CAP_SYS_ADMIN))
return -EPERM;
-
- switch(cmd) {
+
+ switch (cmd) {
case AUTOFS_IOC_READY: /* Wait queue: go ahead and retry */
- return autofs4_wait_release(sbi,(autofs_wqt_t)arg,0);
+ return autofs4_wait_release(sbi, (autofs_wqt_t) arg, 0);
case AUTOFS_IOC_FAIL: /* Wait queue: fail with ENOENT */
- return autofs4_wait_release(sbi,(autofs_wqt_t)arg,-ENOENT);
+ return autofs4_wait_release(sbi, (autofs_wqt_t) arg, -ENOENT);
case AUTOFS_IOC_CATATONIC: /* Enter catatonic mode (daemon shutdown) */
autofs4_catatonic_mode(sbi);
return 0;
@@ -888,13 +904,15 @@ static int autofs4_root_ioctl_unlocked(struct inode *inode, struct file *filp,
/* return a single thing to expire */
case AUTOFS_IOC_EXPIRE:
- return autofs4_expire_run(inode->i_sb,filp->f_path.mnt,sbi, p);
+ return autofs4_expire_run(inode->i_sb,
+ filp->f_path.mnt, sbi, p);
/* same as above, but can send multiple expires through pipe */
case AUTOFS_IOC_EXPIRE_MULTI:
- return autofs4_expire_multi(inode->i_sb,filp->f_path.mnt,sbi, p);
+ return autofs4_expire_multi(inode->i_sb,
+ filp->f_path.mnt, sbi, p);
default:
- return -ENOSYS;
+ return -EINVAL;
}
}
@@ -902,12 +920,13 @@ static long autofs4_root_ioctl(struct file *filp,
unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
+
return autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
}
#ifdef CONFIG_COMPAT
static long autofs4_root_compat_ioctl(struct file *filp,
- unsigned int cmd, unsigned long arg)
+ unsigned int cmd, unsigned long arg)
{
struct inode *inode = file_inode(filp);
int ret;
@@ -916,7 +935,7 @@ static long autofs4_root_compat_ioctl(struct file *filp,
ret = autofs4_root_ioctl_unlocked(inode, filp, cmd, arg);
else
ret = autofs4_root_ioctl_unlocked(inode, filp, cmd,
- (unsigned long)compat_ptr(arg));
+ (unsigned long) compat_ptr(arg));
return ret;
}
diff --git a/fs/autofs4/symlink.c b/fs/autofs4/symlink.c
index 84e037d1d..99aab00dc 100644
--- a/fs/autofs4/symlink.c
+++ b/fs/autofs4/symlink.c
@@ -1,14 +1,10 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/symlink.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
#include "autofs_i.h"
@@ -18,6 +14,7 @@ static const char *autofs4_get_link(struct dentry *dentry,
{
struct autofs_sb_info *sbi;
struct autofs_info *ino;
+
if (!dentry)
return ERR_PTR(-ECHILD);
sbi = autofs4_sbi(dentry->d_sb);
diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c
index 35b755e79..0146d911f 100644
--- a/fs/autofs4/waitq.c
+++ b/fs/autofs4/waitq.c
@@ -1,15 +1,11 @@
-/* -*- c -*- --------------------------------------------------------------- *
- *
- * linux/fs/autofs/waitq.c
- *
- * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
- * Copyright 2001-2006 Ian Kent <raven@themaw.net>
+/*
+ * Copyright 1997-1998 Transmeta Corporation -- All Rights Reserved
+ * Copyright 2001-2006 Ian Kent <raven@themaw.net>
*
* This file is part of the Linux kernel and is made available under
* the terms of the GNU General Public License, version 2, or at your
* option, any later version, incorporated herein by reference.
- *
- * ------------------------------------------------------------------------- */
+ */
#include <linux/slab.h>
#include <linux/time.h>
@@ -18,7 +14,8 @@
#include "autofs_i.h"
/* We make this a static variable rather than a part of the superblock; it
- is better if we don't reassign numbers easily even across filesystems */
+ * is better if we don't reassign numbers easily even across filesystems
+ */
static autofs_wqt_t autofs4_next_wait_queue = 1;
/* These are the signals we allow interrupting a pending mount */
@@ -34,7 +31,7 @@ void autofs4_catatonic_mode(struct autofs_sb_info *sbi)
return;
}
- DPRINTK("entering catatonic mode");
+ pr_debug("entering catatonic mode\n");
sbi->catatonic = 1;
wq = sbi->queues;
@@ -69,17 +66,19 @@ static int autofs4_write(struct autofs_sb_info *sbi,
set_fs(KERNEL_DS);
mutex_lock(&sbi->pipe_mutex);
- while (bytes &&
- (wr = __vfs_write(file,data,bytes,&file->f_pos)) > 0) {
+ wr = __vfs_write(file, data, bytes, &file->f_pos);
+ while (bytes && wr) {
data += wr;
bytes -= wr;
+ wr = __vfs_write(file, data, bytes, &file->f_pos);
}
mutex_unlock(&sbi->pipe_mutex);
set_fs(fs);
/* Keep the currently executing process from receiving a
- SIGPIPE unless it was already supposed to get one */
+ * SIGPIPE unless it was already supposed to get one
+ */
if (wr == -EPIPE && !sigpipe) {
spin_lock_irqsave(&current->sighand->siglock, flags);
sigdelset(&current->pending.signal, SIGPIPE);
@@ -89,7 +88,7 @@ static int autofs4_write(struct autofs_sb_info *sbi,
return (bytes > 0);
}
-
+
static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
struct autofs_wait_queue *wq,
int type)
@@ -102,10 +101,11 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
struct file *pipe = NULL;
size_t pktsz;
- DPRINTK("wait id = 0x%08lx, name = %.*s, type=%d",
- (unsigned long) wq->wait_queue_token, wq->name.len, wq->name.name, type);
+ pr_debug("wait id = 0x%08lx, name = %.*s, type=%d\n",
+ (unsigned long) wq->wait_queue_token,
+ wq->name.len, wq->name.name, type);
- memset(&pkt,0,sizeof pkt); /* For security reasons */
+ memset(&pkt, 0, sizeof(pkt)); /* For security reasons */
pkt.hdr.proto_version = sbi->version;
pkt.hdr.type = type;
@@ -126,7 +126,8 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
}
case autofs_ptype_expire_multi:
{
- struct autofs_packet_expire_multi *ep = &pkt.v4_pkt.expire_multi;
+ struct autofs_packet_expire_multi *ep =
+ &pkt.v4_pkt.expire_multi;
pktsz = sizeof(*ep);
@@ -163,7 +164,7 @@ static void autofs4_notify_daemon(struct autofs_sb_info *sbi,
break;
}
default:
- printk("autofs4_notify_daemon: bad type %d!\n", type);
+ pr_warn("bad type %d!\n", type);
mutex_unlock(&sbi->wq_mutex);
return;
}
@@ -231,7 +232,7 @@ autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
if (wq->name.hash == qstr->hash &&
wq->name.len == qstr->len &&
wq->name.name &&
- !memcmp(wq->name.name, qstr->name, qstr->len))
+ !memcmp(wq->name.name, qstr->name, qstr->len))
break;
}
return wq;
@@ -248,7 +249,7 @@ autofs4_find_wait(struct autofs_sb_info *sbi, struct qstr *qstr)
static int validate_request(struct autofs_wait_queue **wait,
struct autofs_sb_info *sbi,
struct qstr *qstr,
- struct dentry*dentry, enum autofs_notify notify)
+ struct dentry *dentry, enum autofs_notify notify)
{
struct autofs_wait_queue *wq;
struct autofs_info *ino;
@@ -322,8 +323,10 @@ static int validate_request(struct autofs_wait_queue **wait,
* continue on and create a new request.
*/
if (!IS_ROOT(dentry)) {
- if (d_really_is_positive(dentry) && d_unhashed(dentry)) {
+ if (d_unhashed(dentry) &&
+ d_really_is_positive(dentry)) {
struct dentry *parent = dentry->d_parent;
+
new = d_lookup(parent, &dentry->d_name);
if (new)
dentry = new;
@@ -340,8 +343,8 @@ static int validate_request(struct autofs_wait_queue **wait,
return 1;
}
-int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
- enum autofs_notify notify)
+int autofs4_wait(struct autofs_sb_info *sbi,
+ struct dentry *dentry, enum autofs_notify notify)
{
struct autofs_wait_queue *wq;
struct qstr qstr;
@@ -411,7 +414,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
if (!wq) {
/* Create a new wait queue */
- wq = kmalloc(sizeof(struct autofs_wait_queue),GFP_KERNEL);
+ wq = kmalloc(sizeof(struct autofs_wait_queue), GFP_KERNEL);
if (!wq) {
kfree(qstr.name);
mutex_unlock(&sbi->wq_mutex);
@@ -450,17 +453,19 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
autofs_ptype_expire_indirect;
}
- DPRINTK("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
- (unsigned long) wq->wait_queue_token, wq->name.len,
- wq->name.name, notify);
+ pr_debug("new wait id = 0x%08lx, name = %.*s, nfy=%d\n",
+ (unsigned long) wq->wait_queue_token, wq->name.len,
+ wq->name.name, notify);
- /* autofs4_notify_daemon() may block; it will unlock ->wq_mutex */
+ /*
+ * autofs4_notify_daemon() may block; it will unlock ->wq_mutex
+ */
autofs4_notify_daemon(sbi, wq, type);
} else {
wq->wait_ctr++;
- DPRINTK("existing wait id = 0x%08lx, name = %.*s, nfy=%d",
- (unsigned long) wq->wait_queue_token, wq->name.len,
- wq->name.name, notify);
+ pr_debug("existing wait id = 0x%08lx, name = %.*s, nfy=%d\n",
+ (unsigned long) wq->wait_queue_token, wq->name.len,
+ wq->name.name, notify);
mutex_unlock(&sbi->wq_mutex);
kfree(qstr.name);
}
@@ -471,12 +476,14 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
*/
if (wq->name.name) {
/* Block all but "shutdown" signals while waiting */
- sigset_t oldset;
+ unsigned long shutdown_sigs_mask;
unsigned long irqflags;
+ sigset_t oldset;
spin_lock_irqsave(&current->sighand->siglock, irqflags);
oldset = current->blocked;
- siginitsetinv(&current->blocked, SHUTDOWN_SIGS & ~oldset.sig[0]);
+ shutdown_sigs_mask = SHUTDOWN_SIGS & ~oldset.sig[0];
+ siginitsetinv(&current->blocked, shutdown_sigs_mask);
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
@@ -487,7 +494,7 @@ int autofs4_wait(struct autofs_sb_info *sbi, struct dentry *dentry,
recalc_sigpending();
spin_unlock_irqrestore(&current->sighand->siglock, irqflags);
} else {
- DPRINTK("skipped sleeping");
+ pr_debug("skipped sleeping\n");
}
status = wq->status;
@@ -562,4 +569,3 @@ int autofs4_wait_release(struct autofs_sb_info *sbi, autofs_wqt_t wait_queue_tok
return 0;
}
-
diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c
index 7d914c67a..81381cc0d 100644
--- a/fs/binfmt_elf.c
+++ b/fs/binfmt_elf.c
@@ -2292,7 +2292,7 @@ static int elf_core_dump(struct coredump_params *cprm)
void *kaddr = kmap(page);
stop = !dump_emit(cprm, kaddr, PAGE_SIZE);
kunmap(page);
- page_cache_release(page);
+ put_page(page);
} else
stop = !dump_skip(cprm, PAGE_SIZE);
if (stop)
diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c
index b1adb92e6..083ea2bc6 100644
--- a/fs/binfmt_elf_fdpic.c
+++ b/fs/binfmt_elf_fdpic.c
@@ -1533,7 +1533,7 @@ static bool elf_fdpic_dump_segments(struct coredump_params *cprm)
void *kaddr = kmap(page);
res = dump_emit(cprm, kaddr, PAGE_SIZE);
kunmap(page);
- page_cache_release(page);
+ put_page(page);
} else {
res = dump_skip(cprm, PAGE_SIZE);
}
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 826b164a4..20a2c02b7 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -331,7 +331,7 @@ static int blkdev_write_end(struct file *file, struct address_space *mapping,
ret = block_write_end(file, mapping, pos, len, copied, page, fsdata);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return ret;
}
@@ -575,7 +575,11 @@ static const struct super_operations bdev_sops = {
static struct dentry *bd_mount(struct file_system_type *fs_type,
int flags, const char *dev_name, void *data)
{
- return mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
+ struct dentry *dent;
+ dent = mount_pseudo(fs_type, "bdev:", &bdev_sops, NULL, BDEVFS_MAGIC);
+ if (dent)
+ dent->d_sb->s_iflags |= SB_I_CGROUPWB;
+ return dent;
}
static struct file_system_type bd_type = {
@@ -1145,7 +1149,7 @@ void bd_set_size(struct block_device *bdev, loff_t size)
inode_lock(bdev->bd_inode);
i_size_write(bdev->bd_inode, size);
inode_unlock(bdev->bd_inode);
- while (bsize < PAGE_CACHE_SIZE) {
+ while (bsize < PAGE_SIZE) {
if (size & bsize)
break;
bsize <<= 1;
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index f6dac40f8..80e8472d6 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -148,8 +148,7 @@ int __init btrfs_prelim_ref_init(void)
void btrfs_prelim_ref_exit(void)
{
- if (btrfs_prelim_ref_cache)
- kmem_cache_destroy(btrfs_prelim_ref_cache);
+ kmem_cache_destroy(btrfs_prelim_ref_cache);
}
/*
@@ -566,17 +565,14 @@ static void __merge_refs(struct list_head *head, int mode)
struct __prelim_ref *pos2 = pos1, *tmp;
list_for_each_entry_safe_continue(pos2, tmp, head, list) {
- struct __prelim_ref *xchg, *ref1 = pos1, *ref2 = pos2;
+ struct __prelim_ref *ref1 = pos1, *ref2 = pos2;
struct extent_inode_elem *eie;
if (!ref_for_same_block(ref1, ref2))
continue;
if (mode == 1) {
- if (!ref1->parent && ref2->parent) {
- xchg = ref1;
- ref1 = ref2;
- ref2 = xchg;
- }
+ if (!ref1->parent && ref2->parent)
+ swap(ref1, ref2);
} else {
if (ref1->parent != ref2->parent)
continue;
diff --git a/fs/btrfs/check-integrity.c b/fs/btrfs/check-integrity.c
index 861d47256..516e19d1d 100644
--- a/fs/btrfs/check-integrity.c
+++ b/fs/btrfs/check-integrity.c
@@ -95,6 +95,7 @@
#include <linux/genhd.h>
#include <linux/blkdev.h>
#include <linux/vmalloc.h>
+#include <linux/string.h>
#include "ctree.h"
#include "disk-io.h"
#include "hash.h"
@@ -105,6 +106,7 @@
#include "locking.h"
#include "check-integrity.h"
#include "rcu-string.h"
+#include "compression.h"
#define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
#define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
@@ -176,7 +178,7 @@ struct btrfsic_block {
* Elements of this type are allocated dynamically and required because
* each block object can refer to and can be ref from multiple blocks.
* The key to lookup them in the hashtable is the dev_bytenr of
- * the block ref to plus the one from the block refered from.
+ * the block ref to plus the one from the block referred from.
* The fact that they are searchable via a hashtable and that a
* ref_cnt is maintained is not required for the btrfs integrity
* check algorithm itself, it is only used to make the output more
@@ -755,7 +757,7 @@ static int btrfsic_process_superblock(struct btrfsic_state *state,
BUG_ON(NULL == l);
ret = btrfsic_read_block(state, &tmp_next_block_ctx);
- if (ret < (int)PAGE_CACHE_SIZE) {
+ if (ret < (int)PAGE_SIZE) {
printk(KERN_INFO
"btrfsic: read @logical %llu failed!\n",
tmp_next_block_ctx.start);
@@ -1229,15 +1231,15 @@ static void btrfsic_read_from_block_data(
size_t offset_in_page;
char *kaddr;
char *dst = (char *)dstv;
- size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
+ size_t start_offset = block_ctx->start & ((u64)PAGE_SIZE - 1);
+ unsigned long i = (start_offset + offset) >> PAGE_SHIFT;
WARN_ON(offset + len > block_ctx->len);
- offset_in_page = (start_offset + offset) & (PAGE_CACHE_SIZE - 1);
+ offset_in_page = (start_offset + offset) & (PAGE_SIZE - 1);
while (len > 0) {
- cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
- BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_CACHE_SIZE));
+ cur = min(len, ((size_t)PAGE_SIZE - offset_in_page));
+ BUG_ON(i >= DIV_ROUND_UP(block_ctx->len, PAGE_SIZE));
kaddr = block_ctx->datav[i];
memcpy(dst, kaddr + offset_in_page, cur);
@@ -1603,8 +1605,8 @@ static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
BUG_ON(!block_ctx->datav);
BUG_ON(!block_ctx->pagev);
- num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+ num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
+ PAGE_SHIFT;
while (num_pages > 0) {
num_pages--;
if (block_ctx->datav[num_pages]) {
@@ -1635,15 +1637,15 @@ static int btrfsic_read_block(struct btrfsic_state *state,
BUG_ON(block_ctx->datav);
BUG_ON(block_ctx->pagev);
BUG_ON(block_ctx->mem_to_free);
- if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
+ if (block_ctx->dev_bytenr & ((u64)PAGE_SIZE - 1)) {
printk(KERN_INFO
"btrfsic: read_block() with unaligned bytenr %llu\n",
block_ctx->dev_bytenr);
return -1;
}
- num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+ num_pages = (block_ctx->len + (u64)PAGE_SIZE - 1) >>
+ PAGE_SHIFT;
block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
sizeof(*block_ctx->pagev)) *
num_pages, GFP_NOFS);
@@ -1674,8 +1676,8 @@ static int btrfsic_read_block(struct btrfsic_state *state,
for (j = i; j < num_pages; j++) {
ret = bio_add_page(bio, block_ctx->pagev[j],
- PAGE_CACHE_SIZE, 0);
- if (PAGE_CACHE_SIZE != ret)
+ PAGE_SIZE, 0);
+ if (PAGE_SIZE != ret)
break;
}
if (j == i) {
@@ -1691,7 +1693,7 @@ static int btrfsic_read_block(struct btrfsic_state *state,
return -1;
}
bio_put(bio);
- dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
+ dev_bytenr += (j - i) * PAGE_SIZE;
i = j;
}
for (i = 0; i < num_pages; i++) {
@@ -1767,9 +1769,9 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
u32 crc = ~(u32)0;
unsigned int i;
- if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
+ if (num_pages * PAGE_SIZE < state->metablock_size)
return 1; /* not metadata */
- num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
+ num_pages = state->metablock_size >> PAGE_SHIFT;
h = (struct btrfs_header *)datav[0];
if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
@@ -1777,8 +1779,8 @@ static int btrfsic_test_for_metadata(struct btrfsic_state *state,
for (i = 0; i < num_pages; i++) {
u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
- size_t sublen = i ? PAGE_CACHE_SIZE :
- (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
+ size_t sublen = i ? PAGE_SIZE :
+ (PAGE_SIZE - BTRFS_CSUM_SIZE);
crc = btrfs_crc32c(crc, data, sublen);
}
@@ -1824,14 +1826,14 @@ again:
if (block->is_superblock) {
bytenr = btrfs_super_bytenr((struct btrfs_super_block *)
mapped_datav[0]);
- if (num_pages * PAGE_CACHE_SIZE <
+ if (num_pages * PAGE_SIZE <
BTRFS_SUPER_INFO_SIZE) {
printk(KERN_INFO
"btrfsic: cannot work with too short bios!\n");
return;
}
is_metadata = 1;
- BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
+ BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_SIZE - 1));
processed_len = BTRFS_SUPER_INFO_SIZE;
if (state->print_mask &
BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
@@ -1842,7 +1844,7 @@ again:
}
if (is_metadata) {
if (!block->is_superblock) {
- if (num_pages * PAGE_CACHE_SIZE <
+ if (num_pages * PAGE_SIZE <
state->metablock_size) {
printk(KERN_INFO
"btrfsic: cannot work with too short bios!\n");
@@ -1878,7 +1880,7 @@ again:
}
block->logical_bytenr = bytenr;
} else {
- if (num_pages * PAGE_CACHE_SIZE <
+ if (num_pages * PAGE_SIZE <
state->datablock_size) {
printk(KERN_INFO
"btrfsic: cannot work with too short bios!\n");
@@ -2011,7 +2013,7 @@ again:
block->logical_bytenr = bytenr;
block->is_metadata = 1;
if (block->is_superblock) {
- BUG_ON(PAGE_CACHE_SIZE !=
+ BUG_ON(PAGE_SIZE !=
BTRFS_SUPER_INFO_SIZE);
ret = btrfsic_process_written_superblock(
state,
@@ -2170,8 +2172,8 @@ again:
continue_loop:
BUG_ON(!processed_len);
dev_bytenr += processed_len;
- mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
- num_pages -= processed_len >> PAGE_CACHE_SHIFT;
+ mapped_datav += processed_len >> PAGE_SHIFT;
+ num_pages -= processed_len >> PAGE_SHIFT;
goto again;
}
@@ -2952,7 +2954,7 @@ static void __btrfsic_submit_bio(int rw, struct bio *bio)
goto leave;
cur_bytenr = dev_bytenr;
for (i = 0; i < bio->bi_vcnt; i++) {
- BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
+ BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_SIZE);
mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
if (!mapped_datav[i]) {
while (i > 0) {
@@ -3035,16 +3037,16 @@ int btrfsic_mount(struct btrfs_root *root,
struct list_head *dev_head = &fs_devices->devices;
struct btrfs_device *device;
- if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
+ if (root->nodesize & ((u64)PAGE_SIZE - 1)) {
printk(KERN_INFO
- "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
- root->nodesize, PAGE_CACHE_SIZE);
+ "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_SIZE %ld!\n",
+ root->nodesize, PAGE_SIZE);
return -1;
}
- if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
+ if (root->sectorsize & ((u64)PAGE_SIZE - 1)) {
printk(KERN_INFO
- "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
- root->sectorsize, PAGE_CACHE_SIZE);
+ "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_SIZE %ld!\n",
+ root->sectorsize, PAGE_SIZE);
return -1;
}
state = kzalloc(sizeof(*state), GFP_KERNEL | __GFP_NOWARN | __GFP_REPEAT);
@@ -3076,7 +3078,7 @@ int btrfsic_mount(struct btrfs_root *root,
list_for_each_entry(device, dev_head, dev_list) {
struct btrfsic_dev_state *ds;
- char *p;
+ const char *p;
if (!device->bdev || !device->name)
continue;
@@ -3092,11 +3094,7 @@ int btrfsic_mount(struct btrfs_root *root,
ds->state = state;
bdevname(ds->bdev, ds->name);
ds->name[BDEVNAME_SIZE - 1] = '\0';
- for (p = ds->name; *p != '\0'; p++);
- while (p > ds->name && *p != '/')
- p--;
- if (*p == '/')
- p++;
+ p = kbasename(ds->name);
strlcpy(ds->name, p, sizeof(ds->name));
btrfsic_dev_state_hashtable_add(ds,
&btrfsic_dev_state_hashtable);
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c
index 3346cd8f9..ff61a41ac 100644
--- a/fs/btrfs/compression.c
+++ b/fs/btrfs/compression.c
@@ -119,7 +119,7 @@ static int check_compressed_csum(struct inode *inode,
csum = ~(u32)0;
kaddr = kmap_atomic(page);
- csum = btrfs_csum_data(kaddr, csum, PAGE_CACHE_SIZE);
+ csum = btrfs_csum_data(kaddr, csum, PAGE_SIZE);
btrfs_csum_final(csum, (char *)&csum);
kunmap_atomic(kaddr);
@@ -190,7 +190,7 @@ csum_failed:
for (index = 0; index < cb->nr_pages; index++) {
page = cb->compressed_pages[index];
page->mapping = NULL;
- page_cache_release(page);
+ put_page(page);
}
/* do io completion on the original bio */
@@ -224,8 +224,8 @@ out:
static noinline void end_compressed_writeback(struct inode *inode,
const struct compressed_bio *cb)
{
- unsigned long index = cb->start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_CACHE_SHIFT;
+ unsigned long index = cb->start >> PAGE_SHIFT;
+ unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT;
struct page *pages[16];
unsigned long nr_pages = end_index - index + 1;
int i;
@@ -247,7 +247,7 @@ static noinline void end_compressed_writeback(struct inode *inode,
if (cb->errors)
SetPageError(pages[i]);
end_page_writeback(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
nr_pages -= ret;
index += ret;
@@ -304,7 +304,7 @@ static void end_compressed_bio_write(struct bio *bio)
for (index = 0; index < cb->nr_pages; index++) {
page = cb->compressed_pages[index];
page->mapping = NULL;
- page_cache_release(page);
+ put_page(page);
}
/* finally free the cb struct */
@@ -341,7 +341,7 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
int ret;
int skip_sum = BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM;
- WARN_ON(start & ((u64)PAGE_CACHE_SIZE - 1));
+ WARN_ON(start & ((u64)PAGE_SIZE - 1));
cb = kmalloc(compressed_bio_size(root, compressed_len), GFP_NOFS);
if (!cb)
return -ENOMEM;
@@ -374,14 +374,14 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
page->mapping = inode->i_mapping;
if (bio->bi_iter.bi_size)
ret = io_tree->ops->merge_bio_hook(WRITE, page, 0,
- PAGE_CACHE_SIZE,
+ PAGE_SIZE,
bio, 0);
else
ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) <
- PAGE_CACHE_SIZE) {
+ if (ret || bio_add_page(bio, page, PAGE_SIZE, 0) <
+ PAGE_SIZE) {
bio_get(bio);
/*
@@ -410,15 +410,15 @@ int btrfs_submit_compressed_write(struct inode *inode, u64 start,
BUG_ON(!bio);
bio->bi_private = cb;
bio->bi_end_io = end_compressed_bio_write;
- bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
+ bio_add_page(bio, page, PAGE_SIZE, 0);
}
- if (bytes_left < PAGE_CACHE_SIZE) {
+ if (bytes_left < PAGE_SIZE) {
btrfs_info(BTRFS_I(inode)->root->fs_info,
"bytes left %lu compress len %lu nr %lu",
bytes_left, cb->compressed_len, cb->nr_pages);
}
- bytes_left -= PAGE_CACHE_SIZE;
- first_byte += PAGE_CACHE_SIZE;
+ bytes_left -= PAGE_SIZE;
+ first_byte += PAGE_SIZE;
cond_resched();
}
bio_get(bio);
@@ -457,17 +457,17 @@ static noinline int add_ra_bio_pages(struct inode *inode,
int misses = 0;
page = cb->orig_bio->bi_io_vec[cb->orig_bio->bi_vcnt - 1].bv_page;
- last_offset = (page_offset(page) + PAGE_CACHE_SIZE);
+ last_offset = (page_offset(page) + PAGE_SIZE);
em_tree = &BTRFS_I(inode)->extent_tree;
tree = &BTRFS_I(inode)->io_tree;
if (isize == 0)
return 0;
- end_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
while (last_offset < compressed_end) {
- pg_index = last_offset >> PAGE_CACHE_SHIFT;
+ pg_index = last_offset >> PAGE_SHIFT;
if (pg_index > end_index)
break;
@@ -488,11 +488,11 @@ static noinline int add_ra_bio_pages(struct inode *inode,
break;
if (add_to_page_cache_lru(page, mapping, pg_index, GFP_NOFS)) {
- page_cache_release(page);
+ put_page(page);
goto next;
}
- end = last_offset + PAGE_CACHE_SIZE - 1;
+ end = last_offset + PAGE_SIZE - 1;
/*
* at this point, we have a locked page in the page cache
* for these bytes in the file. But, we have to make
@@ -502,27 +502,27 @@ static noinline int add_ra_bio_pages(struct inode *inode,
lock_extent(tree, last_offset, end);
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree, last_offset,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
read_unlock(&em_tree->lock);
if (!em || last_offset < em->start ||
- (last_offset + PAGE_CACHE_SIZE > extent_map_end(em)) ||
+ (last_offset + PAGE_SIZE > extent_map_end(em)) ||
(em->block_start >> 9) != cb->orig_bio->bi_iter.bi_sector) {
free_extent_map(em);
unlock_extent(tree, last_offset, end);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
break;
}
free_extent_map(em);
if (page->index == end_index) {
char *userpage;
- size_t zero_offset = isize & (PAGE_CACHE_SIZE - 1);
+ size_t zero_offset = isize & (PAGE_SIZE - 1);
if (zero_offset) {
int zeros;
- zeros = PAGE_CACHE_SIZE - zero_offset;
+ zeros = PAGE_SIZE - zero_offset;
userpage = kmap_atomic(page);
memset(userpage + zero_offset, 0, zeros);
flush_dcache_page(page);
@@ -531,19 +531,19 @@ static noinline int add_ra_bio_pages(struct inode *inode,
}
ret = bio_add_page(cb->orig_bio, page,
- PAGE_CACHE_SIZE, 0);
+ PAGE_SIZE, 0);
- if (ret == PAGE_CACHE_SIZE) {
+ if (ret == PAGE_SIZE) {
nr_pages++;
- page_cache_release(page);
+ put_page(page);
} else {
unlock_extent(tree, last_offset, end);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
break;
}
next:
- last_offset += PAGE_CACHE_SIZE;
+ last_offset += PAGE_SIZE;
}
return 0;
}
@@ -567,7 +567,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
struct extent_map_tree *em_tree;
struct compressed_bio *cb;
struct btrfs_root *root = BTRFS_I(inode)->root;
- unsigned long uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
+ unsigned long uncompressed_len = bio->bi_vcnt * PAGE_SIZE;
unsigned long compressed_len;
unsigned long nr_pages;
unsigned long pg_index;
@@ -589,7 +589,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
read_lock(&em_tree->lock);
em = lookup_extent_mapping(em_tree,
page_offset(bio->bi_io_vec->bv_page),
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
read_unlock(&em_tree->lock);
if (!em)
return -EIO;
@@ -617,7 +617,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
cb->compress_type = extent_compress_type(bio_flags);
cb->orig_bio = bio;
- nr_pages = DIV_ROUND_UP(compressed_len, PAGE_CACHE_SIZE);
+ nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE);
cb->compressed_pages = kcalloc(nr_pages, sizeof(struct page *),
GFP_NOFS);
if (!cb->compressed_pages)
@@ -640,7 +640,7 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
add_ra_bio_pages(inode, em_start + em_len, cb);
/* include any pages we added in add_ra-bio_pages */
- uncompressed_len = bio->bi_vcnt * PAGE_CACHE_SIZE;
+ uncompressed_len = bio->bi_vcnt * PAGE_SIZE;
cb->len = uncompressed_len;
comp_bio = compressed_bio_alloc(bdev, cur_disk_byte, GFP_NOFS);
@@ -653,18 +653,18 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
for (pg_index = 0; pg_index < nr_pages; pg_index++) {
page = cb->compressed_pages[pg_index];
page->mapping = inode->i_mapping;
- page->index = em_start >> PAGE_CACHE_SHIFT;
+ page->index = em_start >> PAGE_SHIFT;
if (comp_bio->bi_iter.bi_size)
ret = tree->ops->merge_bio_hook(READ, page, 0,
- PAGE_CACHE_SIZE,
+ PAGE_SIZE,
comp_bio, 0);
else
ret = 0;
page->mapping = NULL;
- if (ret || bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0) <
- PAGE_CACHE_SIZE) {
+ if (ret || bio_add_page(comp_bio, page, PAGE_SIZE, 0) <
+ PAGE_SIZE) {
bio_get(comp_bio);
ret = btrfs_bio_wq_end_io(root->fs_info, comp_bio,
@@ -702,9 +702,9 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
comp_bio->bi_private = cb;
comp_bio->bi_end_io = end_compressed_bio_read;
- bio_add_page(comp_bio, page, PAGE_CACHE_SIZE, 0);
+ bio_add_page(comp_bio, page, PAGE_SIZE, 0);
}
- cur_disk_byte += PAGE_CACHE_SIZE;
+ cur_disk_byte += PAGE_SIZE;
}
bio_get(comp_bio);
@@ -1013,8 +1013,8 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
/* copy bytes from the working buffer into the pages */
while (working_bytes > 0) {
- bytes = min(PAGE_CACHE_SIZE - *pg_offset,
- PAGE_CACHE_SIZE - buf_offset);
+ bytes = min(PAGE_SIZE - *pg_offset,
+ PAGE_SIZE - buf_offset);
bytes = min(bytes, working_bytes);
kaddr = kmap_atomic(page_out);
memcpy(kaddr + *pg_offset, buf + buf_offset, bytes);
@@ -1027,7 +1027,7 @@ int btrfs_decompress_buf2page(char *buf, unsigned long buf_start,
current_buf_start += bytes;
/* check if we need to pick another page */
- if (*pg_offset == PAGE_CACHE_SIZE) {
+ if (*pg_offset == PAGE_SIZE) {
(*pg_index)++;
if (*pg_index >= vcnt)
return 0;
diff --git a/fs/btrfs/compression.h b/fs/btrfs/compression.h
index 13a4dc043..f49d8b8c0 100644
--- a/fs/btrfs/compression.h
+++ b/fs/btrfs/compression.h
@@ -48,6 +48,15 @@ int btrfs_submit_compressed_read(struct inode *inode, struct bio *bio,
void btrfs_clear_biovec_end(struct bio_vec *bvec, int vcnt,
unsigned long pg_index,
unsigned long pg_offset);
+
+enum btrfs_compression_type {
+ BTRFS_COMPRESS_NONE = 0,
+ BTRFS_COMPRESS_ZLIB = 1,
+ BTRFS_COMPRESS_LZO = 2,
+ BTRFS_COMPRESS_TYPES = 2,
+ BTRFS_COMPRESS_LAST = 3,
+};
+
struct btrfs_compress_op {
struct list_head *(*alloc_workspace)(void);
diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index 769e0ff1b..ec7928a27 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -19,6 +19,7 @@
#include <linux/sched.h>
#include <linux/slab.h>
#include <linux/rbtree.h>
+#include <linux/vmalloc.h>
#include "ctree.h"
#include "disk-io.h"
#include "transaction.h"
@@ -311,7 +312,7 @@ struct tree_mod_root {
struct tree_mod_elem {
struct rb_node node;
- u64 index; /* shifted logical */
+ u64 logical;
u64 seq;
enum mod_log_op op;
@@ -435,11 +436,11 @@ void btrfs_put_tree_mod_seq(struct btrfs_fs_info *fs_info,
/*
* key order of the log:
- * index -> sequence
+ * node/leaf start address -> sequence
*
- * the index is the shifted logical of the *new* root node for root replace
- * operations, or the shifted logical of the affected block for all other
- * operations.
+ * The 'start address' is the logical address of the *new* root node
+ * for root replace operations, or the logical address of the affected
+ * block for all other operations.
*
* Note: must be called with write lock (tree_mod_log_write_lock).
*/
@@ -460,9 +461,9 @@ __tree_mod_log_insert(struct btrfs_fs_info *fs_info, struct tree_mod_elem *tm)
while (*new) {
cur = container_of(*new, struct tree_mod_elem, node);
parent = *new;
- if (cur->index < tm->index)
+ if (cur->logical < tm->logical)
new = &((*new)->rb_left);
- else if (cur->index > tm->index)
+ else if (cur->logical > tm->logical)
new = &((*new)->rb_right);
else if (cur->seq < tm->seq)
new = &((*new)->rb_left);
@@ -523,7 +524,7 @@ alloc_tree_mod_elem(struct extent_buffer *eb, int slot,
if (!tm)
return NULL;
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->logical = eb->start;
if (op != MOD_LOG_KEY_ADD) {
btrfs_node_key(eb, &tm->key, slot);
tm->blockptr = btrfs_node_blockptr(eb, slot);
@@ -588,7 +589,7 @@ tree_mod_log_insert_move(struct btrfs_fs_info *fs_info,
goto free_tms;
}
- tm->index = eb->start >> PAGE_CACHE_SHIFT;
+ tm->logical = eb->start;
tm->slot = src_slot;
tm->move.dst_slot = dst_slot;
tm->move.nr_items = nr_items;
@@ -699,7 +700,7 @@ tree_mod_log_insert_root(struct btrfs_fs_info *fs_info,
goto free_tms;
}
- tm->index = new_root->start >> PAGE_CACHE_SHIFT;
+ tm->logical = new_root->start;
tm->old_root.logical = old_root->start;
tm->old_root.level = btrfs_header_level(old_root);
tm->generation = btrfs_header_generation(old_root);
@@ -739,16 +740,15 @@ __tree_mod_log_search(struct btrfs_fs_info *fs_info, u64 start, u64 min_seq,
struct rb_node *node;
struct tree_mod_elem *cur = NULL;
struct tree_mod_elem *found = NULL;
- u64 index = start >> PAGE_CACHE_SHIFT;
tree_mod_log_read_lock(fs_info);
tm_root = &fs_info->tree_mod_log;
node = tm_root->rb_node;
while (node) {
cur = container_of(node, struct tree_mod_elem, node);
- if (cur->index < index) {
+ if (cur->logical < start) {
node = node->rb_left;
- } else if (cur->index > index) {
+ } else if (cur->logical > start) {
node = node->rb_right;
} else if (cur->seq < min_seq) {
node = node->rb_left;
@@ -1230,9 +1230,10 @@ __tree_mod_log_oldest_root(struct btrfs_fs_info *fs_info,
return NULL;
/*
- * the very last operation that's logged for a root is the replacement
- * operation (if it is replaced at all). this has the index of the *new*
- * root, making it the very first operation that's logged for this root.
+ * the very last operation that's logged for a root is the
+ * replacement operation (if it is replaced at all). this has
+ * the logical address of the *new* root, making it the very
+ * first operation that's logged for this root.
*/
while (1) {
tm = tree_mod_log_search_oldest(fs_info, root_logical,
@@ -1336,7 +1337,7 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
if (!next)
break;
tm = container_of(next, struct tree_mod_elem, node);
- if (tm->index != first_tm->index)
+ if (tm->logical != first_tm->logical)
break;
}
tree_mod_log_read_unlock(fs_info);
@@ -5361,10 +5362,13 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
goto out;
}
- tmp_buf = kmalloc(left_root->nodesize, GFP_NOFS);
+ tmp_buf = kmalloc(left_root->nodesize, GFP_KERNEL | __GFP_NOWARN);
if (!tmp_buf) {
- ret = -ENOMEM;
- goto out;
+ tmp_buf = vmalloc(left_root->nodesize);
+ if (!tmp_buf) {
+ ret = -ENOMEM;
+ goto out;
+ }
}
left_path->search_commit_root = 1;
@@ -5565,7 +5569,7 @@ int btrfs_compare_trees(struct btrfs_root *left_root,
out:
btrfs_free_path(left_path);
btrfs_free_path(right_path);
- kfree(tmp_buf);
+ kvfree(tmp_buf);
return ret;
}
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index bfe4a337f..208d19938 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -100,6 +100,9 @@ struct btrfs_ordered_sum;
/* tracks free space in block groups. */
#define BTRFS_FREE_SPACE_TREE_OBJECTID 10ULL
+/* device stats in the device tree */
+#define BTRFS_DEV_STATS_OBJECTID 0ULL
+
/* for storing balance parameters in the root tree */
#define BTRFS_BALANCE_OBJECTID -4ULL
@@ -715,14 +718,6 @@ struct btrfs_timespec {
__le32 nsec;
} __attribute__ ((__packed__));
-enum btrfs_compression_type {
- BTRFS_COMPRESS_NONE = 0,
- BTRFS_COMPRESS_ZLIB = 1,
- BTRFS_COMPRESS_LZO = 2,
- BTRFS_COMPRESS_TYPES = 2,
- BTRFS_COMPRESS_LAST = 3,
-};
-
struct btrfs_inode_item {
/* nfs style generation number */
__le64 generation;
@@ -793,7 +788,7 @@ struct btrfs_root_item {
/*
* This generation number is used to test if the new fields are valid
- * and up to date while reading the root item. Everytime the root item
+ * and up to date while reading the root item. Every time the root item
* is written out, the "generation" field is copied into this field. If
* anyone ever mounted the fs with an older kernel, we will have
* mismatching generation values here and thus must invalidate the
@@ -1002,8 +997,10 @@ struct btrfs_dev_replace {
pid_t lock_owner;
atomic_t nesting_level;
struct mutex lock_finishing_cancel_unmount;
- struct mutex lock_management_lock;
- struct mutex lock;
+ rwlock_t lock;
+ atomic_t read_locks;
+ atomic_t blocking_readers;
+ wait_queue_head_t read_lock_wq;
struct btrfs_scrub_progress scrub_progress;
};
@@ -1222,10 +1219,10 @@ struct btrfs_space_info {
* we've called update_block_group and dropped the bytes_used counter
* and increased the bytes_pinned counter. However this means that
* bytes_pinned does not reflect the bytes that will be pinned once the
- * delayed refs are flushed, so this counter is inc'ed everytime we call
- * btrfs_free_extent so it is a realtime count of what will be freed
- * once the transaction is committed. It will be zero'ed everytime the
- * transaction commits.
+ * delayed refs are flushed, so this counter is inc'ed every time we
+ * call btrfs_free_extent so it is a realtime count of what will be
+ * freed once the transaction is committed. It will be zero'ed every
+ * time the transaction commits.
*/
struct percpu_counter total_bytes_pinned;
@@ -1822,6 +1819,9 @@ struct btrfs_fs_info {
spinlock_t reada_lock;
struct radix_tree_root reada_tree;
+ /* readahead works cnt */
+ atomic_t reada_works_cnt;
+
/* Extent buffer radix tree */
spinlock_t buffer_lock;
struct radix_tree_root buffer_radix;
@@ -2185,13 +2185,43 @@ struct btrfs_ioctl_defrag_range_args {
*/
#define BTRFS_QGROUP_RELATION_KEY 246
+/*
+ * Obsolete name, see BTRFS_TEMPORARY_ITEM_KEY.
+ */
#define BTRFS_BALANCE_ITEM_KEY 248
/*
- * Persistantly stores the io stats in the device tree.
- * One key for all stats, (0, BTRFS_DEV_STATS_KEY, devid).
+ * The key type for tree items that are stored persistently, but do not need to
+ * exist for extended period of time. The items can exist in any tree.
+ *
+ * [subtype, BTRFS_TEMPORARY_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - balance status item
+ * (BTRFS_BALANCE_OBJECTID, BTRFS_TEMPORARY_ITEM_KEY, 0)
*/
-#define BTRFS_DEV_STATS_KEY 249
+#define BTRFS_TEMPORARY_ITEM_KEY 248
+
+/*
+ * Obsolete name, see BTRFS_PERSISTENT_ITEM_KEY
+ */
+#define BTRFS_DEV_STATS_KEY 249
+
+/*
+ * The key type for tree items that are stored persistently and usually exist
+ * for a long period, eg. filesystem lifetime. The item kinds can be status
+ * information, stats or preference values. The item can exist in any tree.
+ *
+ * [subtype, BTRFS_PERSISTENT_ITEM_KEY, data]
+ *
+ * Existing items:
+ *
+ * - device statistics, store IO stats in the device tree, one key for all
+ * stats
+ * (BTRFS_DEV_STATS_OBJECTID, BTRFS_DEV_STATS_KEY, 0)
+ */
+#define BTRFS_PERSISTENT_ITEM_KEY 249
/*
* Persistantly stores the device replace state in the device tree.
@@ -2241,7 +2271,7 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_ENOSPC_DEBUG (1 << 15)
#define BTRFS_MOUNT_AUTO_DEFRAG (1 << 16)
#define BTRFS_MOUNT_INODE_MAP_CACHE (1 << 17)
-#define BTRFS_MOUNT_RECOVERY (1 << 18)
+#define BTRFS_MOUNT_USEBACKUPROOT (1 << 18)
#define BTRFS_MOUNT_SKIP_BALANCE (1 << 19)
#define BTRFS_MOUNT_CHECK_INTEGRITY (1 << 20)
#define BTRFS_MOUNT_CHECK_INTEGRITY_INCLUDING_EXTENT_DATA (1 << 21)
@@ -2250,9 +2280,10 @@ struct btrfs_ioctl_defrag_range_args {
#define BTRFS_MOUNT_FRAGMENT_DATA (1 << 24)
#define BTRFS_MOUNT_FRAGMENT_METADATA (1 << 25)
#define BTRFS_MOUNT_FREE_SPACE_TREE (1 << 26)
+#define BTRFS_MOUNT_NOLOGREPLAY (1 << 27)
#define BTRFS_DEFAULT_COMMIT_INTERVAL (30)
-#define BTRFS_DEFAULT_MAX_INLINE (8192)
+#define BTRFS_DEFAULT_MAX_INLINE (2048)
#define btrfs_clear_opt(o, opt) ((o) &= ~BTRFS_MOUNT_##opt)
#define btrfs_set_opt(o, opt) ((o) |= BTRFS_MOUNT_##opt)
@@ -2353,6 +2384,9 @@ struct btrfs_map_token {
unsigned long offset;
};
+#define BTRFS_BYTES_TO_BLKS(fs_info, bytes) \
+ ((bytes) >> (fs_info)->sb->s_blocksize_bits)
+
static inline void btrfs_init_map_token (struct btrfs_map_token *token)
{
token->kaddr = NULL;
@@ -3448,8 +3482,7 @@ u64 btrfs_csum_bytes_to_leaves(struct btrfs_root *root, u64 csum_bytes);
static inline u64 btrfs_calc_trans_metadata_size(struct btrfs_root *root,
unsigned num_items)
{
- return (root->nodesize + root->nodesize * (BTRFS_MAX_LEVEL - 1)) *
- 2 * num_items;
+ return root->nodesize * BTRFS_MAX_LEVEL * 2 * num_items;
}
/*
@@ -4027,7 +4060,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct inode *dir, u64 objectid,
const char *name, int name_len);
-int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
int front);
int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
@@ -4089,6 +4122,8 @@ void btrfs_test_inode_set_ops(struct inode *inode);
/* ioctl.c */
long btrfs_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg);
+int btrfs_ioctl_get_supported_features(void __user *arg);
void btrfs_update_iflags(struct inode *inode);
void btrfs_inherit_iflags(struct inode *inode, struct inode *dir);
int btrfs_is_empty_uuid(u8 *uuid);
@@ -4151,7 +4186,8 @@ void btrfs_sysfs_remove_mounted(struct btrfs_fs_info *fs_info);
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size);
/* super.c */
-int btrfs_parse_options(struct btrfs_root *root, char *options);
+int btrfs_parse_options(struct btrfs_root *root, char *options,
+ unsigned long new_flags);
int btrfs_sync_fs(struct super_block *sb, int wait);
#ifdef CONFIG_PRINTK
@@ -4525,8 +4561,8 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct btrfs_key *start, struct btrfs_key *end);
int btrfs_reada_wait(void *handle);
void btrfs_reada_detach(void *handle);
-int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err);
+int btree_readahead_hook(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, u64 start, int err);
static inline int is_fstree(u64 rootid)
{
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index b57daa895..6cef0062f 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -43,8 +43,7 @@ int __init btrfs_delayed_inode_init(void)
void btrfs_delayed_inode_exit(void)
{
- if (delayed_node_cache)
- kmem_cache_destroy(delayed_node_cache);
+ kmem_cache_destroy(delayed_node_cache);
}
static inline void btrfs_init_delayed_node(
@@ -651,9 +650,14 @@ static int btrfs_delayed_inode_reserve_metadata(
goto out;
ret = btrfs_block_rsv_migrate(src_rsv, dst_rsv, num_bytes);
- if (!WARN_ON(ret))
+ if (!ret)
goto out;
+ if (btrfs_test_opt(root, ENOSPC_DEBUG)) {
+ btrfs_debug(root->fs_info,
+ "block rsv migrate returned %d", ret);
+ WARN_ON(1);
+ }
/*
* Ok this is a problem, let's just steal from the global rsv
* since this really shouldn't happen that often.
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 914ac13bd..430b3689b 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -929,14 +929,10 @@ btrfs_find_delayed_ref_head(struct btrfs_trans_handle *trans, u64 bytenr)
void btrfs_delayed_ref_exit(void)
{
- if (btrfs_delayed_ref_head_cachep)
- kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
- if (btrfs_delayed_tree_ref_cachep)
- kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
- if (btrfs_delayed_data_ref_cachep)
- kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
- if (btrfs_delayed_extent_op_cachep)
- kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
+ kmem_cache_destroy(btrfs_delayed_ref_head_cachep);
+ kmem_cache_destroy(btrfs_delayed_tree_ref_cachep);
+ kmem_cache_destroy(btrfs_delayed_data_ref_cachep);
+ kmem_cache_destroy(btrfs_delayed_extent_op_cachep);
}
int btrfs_delayed_ref_init(void)
diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c
index cbb7dbfb3..26bcb487f 100644
--- a/fs/btrfs/dev-replace.c
+++ b/fs/btrfs/dev-replace.c
@@ -202,13 +202,13 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
struct btrfs_dev_replace_item *ptr;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
if (!dev_replace->is_valid ||
!dev_replace->item_needs_writeback) {
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
return 0;
}
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
key.objectid = 0;
key.type = BTRFS_DEV_REPLACE_KEY;
@@ -264,7 +264,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
ptr = btrfs_item_ptr(eb, path->slots[0],
struct btrfs_dev_replace_item);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
if (dev_replace->srcdev)
btrfs_set_dev_replace_src_devid(eb, ptr,
dev_replace->srcdev->devid);
@@ -287,7 +287,7 @@ int btrfs_run_dev_replace(struct btrfs_trans_handle *trans,
btrfs_set_dev_replace_cursor_right(eb, ptr,
dev_replace->cursor_right);
dev_replace->item_needs_writeback = 0;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_mark_buffer_dirty(eb);
@@ -356,7 +356,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
return PTR_ERR(trans);
}
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -394,8 +394,10 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
dev_replace->cursor_right = 0;
dev_replace->is_valid = 1;
dev_replace->item_needs_writeback = 1;
+ atomic64_set(&dev_replace->num_write_errors, 0);
+ atomic64_set(&dev_replace->num_uncorrectable_read_errors, 0);
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
ret = btrfs_sysfs_add_device_link(tgt_device->fs_devices, tgt_device);
if (ret)
@@ -407,7 +409,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
trans = btrfs_start_transaction(root, 0);
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
goto leave;
}
@@ -433,7 +435,7 @@ int btrfs_dev_replace_start(struct btrfs_root *root,
leave:
dev_replace->srcdev = NULL;
dev_replace->tgtdev = NULL;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_destroy_dev_replace_tgtdev(fs_info, tgt_device);
return ret;
}
@@ -471,18 +473,18 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* don't allow cancel or unmount to disturb the finishing procedure */
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
/* was the operation canceled, or is it finished? */
if (dev_replace->replace_state !=
BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED) {
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
return 0;
}
tgt_device = dev_replace->tgtdev;
src_device = dev_replace->srcdev;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
/*
* flush all outstanding I/O and inode extent mappings before the
@@ -507,7 +509,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
/* keep away write_all_supers() during the finishing procedure */
mutex_lock(&root->fs_info->fs_devices->device_list_mutex);
mutex_lock(&root->fs_info->chunk_mutex);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
dev_replace->replace_state =
scrub_ret ? BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED
: BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED;
@@ -528,7 +530,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
rcu_str_deref(src_device->name),
src_device->devid,
rcu_str_deref(tgt_device->name), scrub_ret);
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
mutex_unlock(&root->fs_info->chunk_mutex);
mutex_unlock(&root->fs_info->fs_devices->device_list_mutex);
mutex_unlock(&uuid_mutex);
@@ -565,7 +567,7 @@ static int btrfs_dev_replace_finishing(struct btrfs_fs_info *fs_info,
list_add(&tgt_device->dev_alloc_list, &fs_info->fs_devices->alloc_list);
fs_info->fs_devices->rw_devices++;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_rm_dev_replace_blocked(fs_info);
@@ -649,7 +651,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
struct btrfs_device *srcdev;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
/* even if !dev_replace_is_valid, the values are good enough for
* the replace_status ioctl */
args->result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NO_ERROR;
@@ -675,7 +677,7 @@ void btrfs_dev_replace_status(struct btrfs_fs_info *fs_info,
div_u64(btrfs_device_get_total_bytes(srcdev), 1000));
break;
}
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
}
int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
@@ -698,13 +700,13 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
return -EROFS;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
result = BTRFS_IOCTL_DEV_REPLACE_RESULT_NOT_STARTED;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
goto leave;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_SUSPENDED:
@@ -717,7 +719,7 @@ static u64 __btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info)
dev_replace->replace_state = BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED;
dev_replace->time_stopped = get_seconds();
dev_replace->item_needs_writeback = 1;
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
btrfs_scrub_cancel(fs_info);
trans = btrfs_start_transaction(root, 0);
@@ -740,7 +742,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
mutex_lock(&dev_replace->lock_finishing_cancel_unmount);
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
@@ -756,7 +758,7 @@ void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info)
break;
}
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
mutex_unlock(&dev_replace->lock_finishing_cancel_unmount);
}
@@ -766,12 +768,12 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
struct task_struct *task;
struct btrfs_dev_replace *dev_replace = &fs_info->dev_replace;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 1);
switch (dev_replace->replace_state) {
case BTRFS_IOCTL_DEV_REPLACE_STATE_NEVER_STARTED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_FINISHED:
case BTRFS_IOCTL_DEV_REPLACE_STATE_CANCELED:
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
return 0;
case BTRFS_IOCTL_DEV_REPLACE_STATE_STARTED:
break;
@@ -784,10 +786,10 @@ int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info)
btrfs_info(fs_info, "cannot continue dev_replace, tgtdev is missing");
btrfs_info(fs_info,
"you may cancel the operation after 'mount -o degraded'");
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
return 0;
}
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 1);
WARN_ON(atomic_xchg(
&fs_info->mutually_exclusive_operation_running, 1));
@@ -802,7 +804,7 @@ static int btrfs_dev_replace_kthread(void *data)
struct btrfs_ioctl_dev_replace_args *status_args;
u64 progress;
- status_args = kzalloc(sizeof(*status_args), GFP_NOFS);
+ status_args = kzalloc(sizeof(*status_args), GFP_KERNEL);
if (status_args) {
btrfs_dev_replace_status(fs_info, status_args);
progress = status_args->status.progress_1000;
@@ -858,55 +860,65 @@ int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace)
* not called and the the filesystem is remounted
* in degraded state. This does not stop the
* dev_replace procedure. It needs to be canceled
- * manually if the cancelation is wanted.
+ * manually if the cancellation is wanted.
*/
break;
}
return 1;
}
-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace)
+void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw)
{
- /* the beginning is just an optimization for the typical case */
- if (atomic_read(&dev_replace->nesting_level) == 0) {
-acquire_lock:
- /* this is not a nested case where the same thread
- * is trying to acqurire the same lock twice */
- mutex_lock(&dev_replace->lock);
- mutex_lock(&dev_replace->lock_management_lock);
- dev_replace->lock_owner = current->pid;
- atomic_inc(&dev_replace->nesting_level);
- mutex_unlock(&dev_replace->lock_management_lock);
- return;
+ if (rw == 1) {
+ /* write */
+again:
+ wait_event(dev_replace->read_lock_wq,
+ atomic_read(&dev_replace->blocking_readers) == 0);
+ write_lock(&dev_replace->lock);
+ if (atomic_read(&dev_replace->blocking_readers)) {
+ write_unlock(&dev_replace->lock);
+ goto again;
+ }
+ } else {
+ read_lock(&dev_replace->lock);
+ atomic_inc(&dev_replace->read_locks);
}
+}
- mutex_lock(&dev_replace->lock_management_lock);
- if (atomic_read(&dev_replace->nesting_level) > 0 &&
- dev_replace->lock_owner == current->pid) {
- WARN_ON(!mutex_is_locked(&dev_replace->lock));
- atomic_inc(&dev_replace->nesting_level);
- mutex_unlock(&dev_replace->lock_management_lock);
- return;
+void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw)
+{
+ if (rw == 1) {
+ /* write */
+ ASSERT(atomic_read(&dev_replace->blocking_readers) == 0);
+ write_unlock(&dev_replace->lock);
+ } else {
+ ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+ atomic_dec(&dev_replace->read_locks);
+ read_unlock(&dev_replace->lock);
}
+}
- mutex_unlock(&dev_replace->lock_management_lock);
- goto acquire_lock;
+/* inc blocking cnt and release read lock */
+void btrfs_dev_replace_set_lock_blocking(
+ struct btrfs_dev_replace *dev_replace)
+{
+ /* only set blocking for read lock */
+ ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+ atomic_inc(&dev_replace->blocking_readers);
+ read_unlock(&dev_replace->lock);
}
-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace)
+/* acquire read lock and dec blocking cnt */
+void btrfs_dev_replace_clear_lock_blocking(
+ struct btrfs_dev_replace *dev_replace)
{
- WARN_ON(!mutex_is_locked(&dev_replace->lock));
- mutex_lock(&dev_replace->lock_management_lock);
- WARN_ON(atomic_read(&dev_replace->nesting_level) < 1);
- WARN_ON(dev_replace->lock_owner != current->pid);
- atomic_dec(&dev_replace->nesting_level);
- if (atomic_read(&dev_replace->nesting_level) == 0) {
- dev_replace->lock_owner = 0;
- mutex_unlock(&dev_replace->lock_management_lock);
- mutex_unlock(&dev_replace->lock);
- } else {
- mutex_unlock(&dev_replace->lock_management_lock);
- }
+ /* only set blocking for read lock */
+ ASSERT(atomic_read(&dev_replace->read_locks) > 0);
+ ASSERT(atomic_read(&dev_replace->blocking_readers) > 0);
+ read_lock(&dev_replace->lock);
+ if (atomic_dec_and_test(&dev_replace->blocking_readers) &&
+ waitqueue_active(&dev_replace->read_lock_wq))
+ wake_up(&dev_replace->read_lock_wq);
}
void btrfs_bio_counter_inc_noblocked(struct btrfs_fs_info *fs_info)
diff --git a/fs/btrfs/dev-replace.h b/fs/btrfs/dev-replace.h
index 20035cbbf..29e3ef5f9 100644
--- a/fs/btrfs/dev-replace.h
+++ b/fs/btrfs/dev-replace.h
@@ -34,8 +34,11 @@ int btrfs_dev_replace_cancel(struct btrfs_fs_info *fs_info,
void btrfs_dev_replace_suspend_for_unmount(struct btrfs_fs_info *fs_info);
int btrfs_resume_dev_replace_async(struct btrfs_fs_info *fs_info);
int btrfs_dev_replace_is_ongoing(struct btrfs_dev_replace *dev_replace);
-void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace);
-void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_lock(struct btrfs_dev_replace *dev_replace, int rw);
+void btrfs_dev_replace_unlock(struct btrfs_dev_replace *dev_replace, int rw);
+void btrfs_dev_replace_set_lock_blocking(struct btrfs_dev_replace *dev_replace);
+void btrfs_dev_replace_clear_lock_blocking(
+ struct btrfs_dev_replace *dev_replace);
static inline void btrfs_dev_replace_stats_inc(atomic64_t *stat_value)
{
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index d8d68af5a..4e47849d7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -25,7 +25,6 @@
#include <linux/buffer_head.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
-#include <linux/freezer.h>
#include <linux/slab.h>
#include <linux/migrate.h>
#include <linux/ratelimit.h>
@@ -50,6 +49,7 @@
#include "raid56.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "compression.h"
#ifdef CONFIG_X86
#include <asm/cpufeature.h>
@@ -110,8 +110,7 @@ int __init btrfs_end_io_wq_init(void)
void btrfs_end_io_wq_exit(void)
{
- if (btrfs_end_io_wq_cache)
- kmem_cache_destroy(btrfs_end_io_wq_cache);
+ kmem_cache_destroy(btrfs_end_io_wq_cache);
}
/*
@@ -303,7 +302,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
err = map_private_extent_buffer(buf, offset, 32,
&kaddr, &map_start, &map_len);
if (err)
- return 1;
+ return err;
cur_len = min(len, map_len - (offset - map_start));
crc = btrfs_csum_data(kaddr + offset - map_start,
crc, cur_len);
@@ -313,7 +312,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
if (csum_size > sizeof(inline_result)) {
result = kzalloc(csum_size, GFP_NOFS);
if (!result)
- return 1;
+ return -ENOMEM;
} else {
result = (char *)&inline_result;
}
@@ -334,7 +333,7 @@ static int csum_tree_block(struct btrfs_fs_info *fs_info,
val, found, btrfs_header_level(buf));
if (result != (char *)&inline_result)
kfree(result);
- return 1;
+ return -EUCLEAN;
}
} else {
write_extent_buffer(buf, result, 0, csum_size);
@@ -513,11 +512,21 @@ static int csum_dirty_buffer(struct btrfs_fs_info *fs_info, struct page *page)
eb = (struct extent_buffer *)page->private;
if (page != eb->pages[0])
return 0;
+
found_start = btrfs_header_bytenr(eb);
- if (WARN_ON(found_start != start || !PageUptodate(page)))
- return 0;
- csum_tree_block(fs_info, eb, 0);
- return 0;
+ /*
+ * Please do not consolidate these warnings into a single if.
+ * It is useful to know what went wrong.
+ */
+ if (WARN_ON(found_start != start))
+ return -EUCLEAN;
+ if (WARN_ON(!PageUptodate(page)))
+ return -EUCLEAN;
+
+ ASSERT(memcmp_extent_buffer(eb, fs_info->fsid,
+ btrfs_header_fsid(), BTRFS_FSID_SIZE) == 0);
+
+ return csum_tree_block(fs_info, eb, 0);
}
static int check_tree_block_fsid(struct btrfs_fs_info *fs_info,
@@ -612,6 +621,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
int found_level;
struct extent_buffer *eb;
struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
+ struct btrfs_fs_info *fs_info = root->fs_info;
int ret = 0;
int reads_done;
@@ -637,21 +647,21 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
found_start = btrfs_header_bytenr(eb);
if (found_start != eb->start) {
- btrfs_err_rl(eb->fs_info, "bad tree block start %llu %llu",
- found_start, eb->start);
+ btrfs_err_rl(fs_info, "bad tree block start %llu %llu",
+ found_start, eb->start);
ret = -EIO;
goto err;
}
- if (check_tree_block_fsid(root->fs_info, eb)) {
- btrfs_err_rl(eb->fs_info, "bad fsid on block %llu",
- eb->start);
+ if (check_tree_block_fsid(fs_info, eb)) {
+ btrfs_err_rl(fs_info, "bad fsid on block %llu",
+ eb->start);
ret = -EIO;
goto err;
}
found_level = btrfs_header_level(eb);
if (found_level >= BTRFS_MAX_LEVEL) {
- btrfs_err(root->fs_info, "bad tree block level %d",
- (int)btrfs_header_level(eb));
+ btrfs_err(fs_info, "bad tree block level %d",
+ (int)btrfs_header_level(eb));
ret = -EIO;
goto err;
}
@@ -659,11 +669,9 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
btrfs_set_buffer_lockdep_class(btrfs_header_owner(eb),
eb, found_level);
- ret = csum_tree_block(root->fs_info, eb, 1);
- if (ret) {
- ret = -EIO;
+ ret = csum_tree_block(fs_info, eb, 1);
+ if (ret)
goto err;
- }
/*
* If this is a leaf block and it is corrupt, set the corrupt bit so
@@ -680,7 +688,7 @@ static int btree_readpage_end_io_hook(struct btrfs_io_bio *io_bio,
err:
if (reads_done &&
test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
- btree_readahead_hook(root, eb, eb->start, ret);
+ btree_readahead_hook(fs_info, eb, eb->start, ret);
if (ret) {
/*
@@ -699,14 +707,13 @@ out:
static int btree_io_failed_hook(struct page *page, int failed_mirror)
{
struct extent_buffer *eb;
- struct btrfs_root *root = BTRFS_I(page->mapping->host)->root;
eb = (struct extent_buffer *)page->private;
set_bit(EXTENT_BUFFER_READ_ERR, &eb->bflags);
eb->read_mirror = failed_mirror;
atomic_dec(&eb->io_pages);
if (test_and_clear_bit(EXTENT_BUFFER_READAHEAD, &eb->bflags))
- btree_readahead_hook(root, eb, eb->start, -EIO);
+ btree_readahead_hook(eb->fs_info, eb, eb->start, -EIO);
return -EIO; /* we fixed nothing */
}
@@ -816,7 +823,7 @@ static void run_one_async_done(struct btrfs_work *work)
waitqueue_active(&fs_info->async_submit_wait))
wake_up(&fs_info->async_submit_wait);
- /* If an error occured we just want to clean up the bio and move on */
+ /* If an error occurred we just want to clean up the bio and move on */
if (async->error) {
async->bio->bi_error = async->error;
bio_endio(async->bio);
@@ -931,7 +938,7 @@ static int check_async_write(struct inode *inode, unsigned long bio_flags)
if (bio_flags & EXTENT_BIO_TREE_LOG)
return 0;
#ifdef CONFIG_X86
- if (static_cpu_has_safe(X86_FEATURE_XMM4_2))
+ if (static_cpu_has(X86_FEATURE_XMM4_2))
return 0;
#endif
return 1;
@@ -1055,7 +1062,7 @@ static void btree_invalidatepage(struct page *page, unsigned int offset,
(unsigned long long)page_offset(page));
ClearPagePrivate(page);
set_page_private(page, 0);
- page_cache_release(page);
+ put_page(page);
}
}
@@ -1296,9 +1303,10 @@ static void __setup_root(u32 nodesize, u32 sectorsize, u32 stripesize,
spin_lock_init(&root->root_item_lock);
}
-static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info)
+static struct btrfs_root *btrfs_alloc_root(struct btrfs_fs_info *fs_info,
+ gfp_t flags)
{
- struct btrfs_root *root = kzalloc(sizeof(*root), GFP_NOFS);
+ struct btrfs_root *root = kzalloc(sizeof(*root), flags);
if (root)
root->fs_info = fs_info;
return root;
@@ -1310,7 +1318,7 @@ struct btrfs_root *btrfs_alloc_dummy_root(void)
{
struct btrfs_root *root;
- root = btrfs_alloc_root(NULL);
+ root = btrfs_alloc_root(NULL, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
__setup_root(4096, 4096, 4096, root, NULL, 1);
@@ -1332,7 +1340,7 @@ struct btrfs_root *btrfs_create_tree(struct btrfs_trans_handle *trans,
int ret = 0;
uuid_le uuid;
- root = btrfs_alloc_root(fs_info);
+ root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!root)
return ERR_PTR(-ENOMEM);
@@ -1408,7 +1416,7 @@ static struct btrfs_root *alloc_log_tree(struct btrfs_trans_handle *trans,
struct btrfs_root *tree_root = fs_info->tree_root;
struct extent_buffer *leaf;
- root = btrfs_alloc_root(fs_info);
+ root = btrfs_alloc_root(fs_info, GFP_NOFS);
if (!root)
return ERR_PTR(-ENOMEM);
@@ -1506,7 +1514,7 @@ static struct btrfs_root *btrfs_read_tree_root(struct btrfs_root *tree_root,
if (!path)
return ERR_PTR(-ENOMEM);
- root = btrfs_alloc_root(fs_info);
+ root = btrfs_alloc_root(fs_info, GFP_NOFS);
if (!root) {
ret = -ENOMEM;
goto alloc_fail;
@@ -1756,7 +1764,7 @@ static int setup_bdi(struct btrfs_fs_info *info, struct backing_dev_info *bdi)
if (err)
return err;
- bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
+ bdi->ra_pages = VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
bdi->congested_fn = btrfs_congested_fn;
bdi->congested_data = info;
bdi->capabilities |= BDI_CAP_CGROUP_WRITEBACK;
@@ -1920,14 +1928,12 @@ sleep:
if (unlikely(test_bit(BTRFS_FS_STATE_ERROR,
&root->fs_info->fs_state)))
btrfs_cleanup_transaction(root);
- if (!try_to_freeze()) {
- set_current_state(TASK_INTERRUPTIBLE);
- if (!kthread_should_stop() &&
- (!btrfs_transaction_blocked(root->fs_info) ||
- cannot_commit))
- schedule_timeout(delay);
- __set_current_state(TASK_RUNNING);
- }
+ set_current_state(TASK_INTERRUPTIBLE);
+ if (!kthread_should_stop() &&
+ (!btrfs_transaction_blocked(root->fs_info) ||
+ cannot_commit))
+ schedule_timeout(delay);
+ __set_current_state(TASK_RUNNING);
} while (!kthread_should_stop());
return 0;
}
@@ -2272,9 +2278,11 @@ static void btrfs_init_dev_replace_locks(struct btrfs_fs_info *fs_info)
fs_info->dev_replace.lock_owner = 0;
atomic_set(&fs_info->dev_replace.nesting_level, 0);
mutex_init(&fs_info->dev_replace.lock_finishing_cancel_unmount);
- mutex_init(&fs_info->dev_replace.lock_management_lock);
- mutex_init(&fs_info->dev_replace.lock);
+ rwlock_init(&fs_info->dev_replace.lock);
+ atomic_set(&fs_info->dev_replace.read_locks, 0);
+ atomic_set(&fs_info->dev_replace.blocking_readers, 0);
init_waitqueue_head(&fs_info->replace_wait);
+ init_waitqueue_head(&fs_info->dev_replace.read_lock_wq);
}
static void btrfs_init_qgroup(struct btrfs_fs_info *fs_info)
@@ -2385,7 +2393,7 @@ static int btrfs_replay_log(struct btrfs_fs_info *fs_info,
return -EIO;
}
- log_tree_root = btrfs_alloc_root(fs_info);
+ log_tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!log_tree_root)
return -ENOMEM;
@@ -2510,8 +2518,8 @@ int open_ctree(struct super_block *sb,
int backup_index = 0;
int max_active;
- tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info);
- chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info);
+ tree_root = fs_info->tree_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
+ chunk_root = fs_info->chunk_root = btrfs_alloc_root(fs_info, GFP_KERNEL);
if (!tree_root || !chunk_root) {
err = -ENOMEM;
goto fail;
@@ -2534,7 +2542,7 @@ int open_ctree(struct super_block *sb,
err = ret;
goto fail_bdi;
}
- fs_info->dirty_metadata_batch = PAGE_CACHE_SIZE *
+ fs_info->dirty_metadata_batch = PAGE_SIZE *
(1 + ilog2(nr_cpu_ids));
ret = percpu_counter_init(&fs_info->delalloc_bytes, 0, GFP_KERNEL);
@@ -2603,6 +2611,7 @@ int open_ctree(struct super_block *sb,
atomic_set(&fs_info->nr_async_bios, 0);
atomic_set(&fs_info->defrag_running, 0);
atomic_set(&fs_info->qgroup_op_seq, 0);
+ atomic_set(&fs_info->reada_works_cnt, 0);
atomic64_set(&fs_info->tree_mod_seq, 0);
fs_info->sb = sb;
fs_info->max_inline = BTRFS_DEFAULT_MAX_INLINE;
@@ -2622,7 +2631,7 @@ int open_ctree(struct super_block *sb,
INIT_LIST_HEAD(&fs_info->ordered_roots);
spin_lock_init(&fs_info->ordered_root_lock);
fs_info->delayed_root = kmalloc(sizeof(struct btrfs_delayed_root),
- GFP_NOFS);
+ GFP_KERNEL);
if (!fs_info->delayed_root) {
err = -ENOMEM;
goto fail_iput;
@@ -2750,7 +2759,7 @@ int open_ctree(struct super_block *sb,
*/
fs_info->compress_type = BTRFS_COMPRESS_ZLIB;
- ret = btrfs_parse_options(tree_root, options);
+ ret = btrfs_parse_options(tree_root, options, sb->s_flags);
if (ret) {
err = ret;
goto fail_alloc;
@@ -2778,7 +2787,7 @@ int open_ctree(struct super_block *sb,
* flag our filesystem as having big metadata blocks if
* they are bigger than the page size
*/
- if (btrfs_super_nodesize(disk_super) > PAGE_CACHE_SIZE) {
+ if (btrfs_super_nodesize(disk_super) > PAGE_SIZE) {
if (!(features & BTRFS_FEATURE_INCOMPAT_BIG_METADATA))
printk(KERN_INFO "BTRFS: flagging fs with big metadata feature\n");
features |= BTRFS_FEATURE_INCOMPAT_BIG_METADATA;
@@ -2828,7 +2837,7 @@ int open_ctree(struct super_block *sb,
fs_info->bdi.ra_pages *= btrfs_super_num_devices(disk_super);
fs_info->bdi.ra_pages = max(fs_info->bdi.ra_pages,
- SZ_4M / PAGE_CACHE_SIZE);
+ SZ_4M / PAGE_SIZE);
tree_root->nodesize = nodesize;
tree_root->sectorsize = sectorsize;
@@ -3029,8 +3038,9 @@ retry_root_backup:
if (ret)
goto fail_trans_kthread;
- /* do not make disk changes in broken FS */
- if (btrfs_super_log_root(disk_super) != 0) {
+ /* do not make disk changes in broken FS or nologreplay is given */
+ if (btrfs_super_log_root(disk_super) != 0 &&
+ !btrfs_test_opt(tree_root, NOLOGREPLAY)) {
ret = btrfs_replay_log(fs_info, fs_devices);
if (ret) {
err = ret;
@@ -3146,6 +3156,12 @@ retry_root_backup:
fs_info->open = 1;
+ /*
+ * backuproot only affect mount behavior, and if open_ctree succeeded,
+ * no need to keep the flag
+ */
+ btrfs_clear_opt(fs_info->mount_opt, USEBACKUPROOT);
+
return 0;
fail_qgroup:
@@ -3200,7 +3216,7 @@ fail:
return err;
recovery_tree_root:
- if (!btrfs_test_opt(tree_root, RECOVERY))
+ if (!btrfs_test_opt(tree_root, USEBACKUPROOT))
goto fail_tree_roots;
free_root_pointers(fs_info, 0);
@@ -4060,9 +4076,9 @@ static int btrfs_check_super_valid(struct btrfs_fs_info *fs_info,
ret = -EINVAL;
}
/* Only PAGE SIZE is supported yet */
- if (sectorsize != PAGE_CACHE_SIZE) {
+ if (sectorsize != PAGE_SIZE) {
printk(KERN_ERR "BTRFS: sectorsize %llu not supported yet, only support %lu\n",
- sectorsize, PAGE_CACHE_SIZE);
+ sectorsize, PAGE_SIZE);
ret = -EINVAL;
}
if (!is_power_of_2(nodesize) || nodesize < sectorsize ||
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index e2287c7c1..84e060eb0 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -3452,7 +3452,7 @@ again:
num_pages = 1;
num_pages *= 16;
- num_pages *= PAGE_CACHE_SIZE;
+ num_pages *= PAGE_SIZE;
ret = btrfs_check_data_free_space(inode, 0, num_pages);
if (ret)
@@ -4639,7 +4639,7 @@ static void shrink_delalloc(struct btrfs_root *root, u64 to_reclaim, u64 orig,
loops = 0;
while (delalloc_bytes && loops < 3) {
max_reclaim = min(delalloc_bytes, to_reclaim);
- nr_pages = max_reclaim >> PAGE_CACHE_SHIFT;
+ nr_pages = max_reclaim >> PAGE_SHIFT;
btrfs_writeback_inodes_sb_nr(root, nr_pages, items);
/*
* We need to wait for the async pages to actually start before
@@ -4838,7 +4838,7 @@ static inline int need_do_async_reclaim(struct btrfs_space_info *space_info,
u64 thresh = div_factor_fine(space_info->total_bytes, 98);
/* If we're just plain full then async reclaim just slows us down. */
- if (space_info->bytes_used >= thresh)
+ if ((space_info->bytes_used + space_info->bytes_reserved) >= thresh)
return 0;
return (used >= thresh && !btrfs_fs_closing(fs_info) &&
@@ -5373,27 +5373,33 @@ static void update_global_block_rsv(struct btrfs_fs_info *fs_info)
block_rsv->size = min_t(u64, num_bytes, SZ_512M);
- num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
- sinfo->bytes_reserved + sinfo->bytes_readonly +
- sinfo->bytes_may_use;
-
- if (sinfo->total_bytes > num_bytes) {
- num_bytes = sinfo->total_bytes - num_bytes;
- block_rsv->reserved += num_bytes;
- sinfo->bytes_may_use += num_bytes;
- trace_btrfs_space_reservation(fs_info, "space_info",
- sinfo->flags, num_bytes, 1);
- }
-
- if (block_rsv->reserved >= block_rsv->size) {
+ if (block_rsv->reserved < block_rsv->size) {
+ num_bytes = sinfo->bytes_used + sinfo->bytes_pinned +
+ sinfo->bytes_reserved + sinfo->bytes_readonly +
+ sinfo->bytes_may_use;
+ if (sinfo->total_bytes > num_bytes) {
+ num_bytes = sinfo->total_bytes - num_bytes;
+ num_bytes = min(num_bytes,
+ block_rsv->size - block_rsv->reserved);
+ block_rsv->reserved += num_bytes;
+ sinfo->bytes_may_use += num_bytes;
+ trace_btrfs_space_reservation(fs_info, "space_info",
+ sinfo->flags, num_bytes,
+ 1);
+ }
+ } else if (block_rsv->reserved > block_rsv->size) {
num_bytes = block_rsv->reserved - block_rsv->size;
sinfo->bytes_may_use -= num_bytes;
trace_btrfs_space_reservation(fs_info, "space_info",
sinfo->flags, num_bytes, 0);
block_rsv->reserved = block_rsv->size;
- block_rsv->full = 1;
}
+ if (block_rsv->reserved == block_rsv->size)
+ block_rsv->full = 1;
+ else
+ block_rsv->full = 0;
+
spin_unlock(&block_rsv->lock);
spin_unlock(&sinfo->lock);
}
@@ -5752,7 +5758,7 @@ out_fail:
/*
* This is tricky, but first we need to figure out how much we
- * free'd from any free-ers that occured during this
+ * free'd from any free-ers that occurred during this
* reservation, so we reset ->csum_bytes to the csum_bytes
* before we dropped our lock, and then call the free for the
* number of bytes that were freed while we were trying our
@@ -7018,7 +7024,7 @@ btrfs_lock_cluster(struct btrfs_block_group_cache *block_group,
struct btrfs_free_cluster *cluster,
int delalloc)
{
- struct btrfs_block_group_cache *used_bg;
+ struct btrfs_block_group_cache *used_bg = NULL;
bool locked = false;
again:
spin_lock(&cluster->refill_lock);
@@ -9380,15 +9386,23 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
u64 dev_min = 1;
u64 dev_nr = 0;
u64 target;
+ int debug;
int index;
int full = 0;
int ret = 0;
+ debug = btrfs_test_opt(root, ENOSPC_DEBUG);
+
block_group = btrfs_lookup_block_group(root->fs_info, bytenr);
/* odd, couldn't find the block group, leave it alone */
- if (!block_group)
+ if (!block_group) {
+ if (debug)
+ btrfs_warn(root->fs_info,
+ "can't find block group for bytenr %llu",
+ bytenr);
return -1;
+ }
min_free = btrfs_block_group_used(&block_group->item);
@@ -9442,8 +9456,13 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
* this is just a balance, so if we were marked as full
* we know there is no space for a new chunk
*/
- if (full)
+ if (full) {
+ if (debug)
+ btrfs_warn(root->fs_info,
+ "no space to alloc new chunk for block group %llu",
+ block_group->key.objectid);
goto out;
+ }
index = get_block_group_index(block_group);
}
@@ -9490,6 +9509,10 @@ int btrfs_can_relocate(struct btrfs_root *root, u64 bytenr)
ret = -1;
}
}
+ if (debug && ret == -1)
+ btrfs_warn(root->fs_info,
+ "no space to allocate a new chunk for block group %llu",
+ block_group->key.objectid);
mutex_unlock(&root->fs_info->chunk_mutex);
btrfs_end_transaction(trans, root);
out:
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 392592dc7..d247fc0ee 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -206,10 +206,8 @@ void extent_io_exit(void)
* destroy caches.
*/
rcu_barrier();
- if (extent_state_cache)
- kmem_cache_destroy(extent_state_cache);
- if (extent_buffer_cache)
- kmem_cache_destroy(extent_buffer_cache);
+ kmem_cache_destroy(extent_state_cache);
+ kmem_cache_destroy(extent_buffer_cache);
if (btrfs_bioset)
bioset_free(btrfs_bioset);
}
@@ -232,7 +230,7 @@ static struct extent_state *alloc_extent_state(gfp_t mask)
if (!state)
return state;
state->state = 0;
- state->private = 0;
+ state->failrec = NULL;
RB_CLEAR_NODE(&state->rb_node);
btrfs_leak_debug_add(&state->leak_list, &states);
atomic_set(&state->refs, 1);
@@ -1365,23 +1363,23 @@ int try_lock_extent(struct extent_io_tree *tree, u64 start, u64 end)
void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end)
{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long index = start >> PAGE_SHIFT;
+ unsigned long end_index = end >> PAGE_SHIFT;
struct page *page;
while (index <= end_index) {
page = find_get_page(inode->i_mapping, index);
BUG_ON(!page); /* Pages should be in the extent_io_tree */
clear_page_dirty_for_io(page);
- page_cache_release(page);
+ put_page(page);
index++;
}
}
void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long index = start >> PAGE_SHIFT;
+ unsigned long end_index = end >> PAGE_SHIFT;
struct page *page;
while (index <= end_index) {
@@ -1389,7 +1387,7 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
BUG_ON(!page); /* Pages should be in the extent_io_tree */
__set_page_dirty_nobuffers(page);
account_page_redirty(page);
- page_cache_release(page);
+ put_page(page);
index++;
}
}
@@ -1399,15 +1397,15 @@ void extent_range_redirty_for_io(struct inode *inode, u64 start, u64 end)
*/
static void set_range_writeback(struct extent_io_tree *tree, u64 start, u64 end)
{
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long index = start >> PAGE_SHIFT;
+ unsigned long end_index = end >> PAGE_SHIFT;
struct page *page;
while (index <= end_index) {
page = find_get_page(tree->mapping, index);
BUG_ON(!page); /* Pages should be in the extent_io_tree */
set_page_writeback(page);
- page_cache_release(page);
+ put_page(page);
index++;
}
}
@@ -1558,8 +1556,8 @@ static noinline void __unlock_for_delalloc(struct inode *inode,
{
int ret;
struct page *pages[16];
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long index = start >> PAGE_SHIFT;
+ unsigned long end_index = end >> PAGE_SHIFT;
unsigned long nr_pages = end_index - index + 1;
int i;
@@ -1573,7 +1571,7 @@ static noinline void __unlock_for_delalloc(struct inode *inode,
for (i = 0; i < ret; i++) {
if (pages[i] != locked_page)
unlock_page(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
nr_pages -= ret;
index += ret;
@@ -1586,9 +1584,9 @@ static noinline int lock_delalloc_pages(struct inode *inode,
u64 delalloc_start,
u64 delalloc_end)
{
- unsigned long index = delalloc_start >> PAGE_CACHE_SHIFT;
+ unsigned long index = delalloc_start >> PAGE_SHIFT;
unsigned long start_index = index;
- unsigned long end_index = delalloc_end >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = delalloc_end >> PAGE_SHIFT;
unsigned long pages_locked = 0;
struct page *pages[16];
unsigned long nrpages;
@@ -1621,11 +1619,11 @@ static noinline int lock_delalloc_pages(struct inode *inode,
pages[i]->mapping != inode->i_mapping) {
ret = -EAGAIN;
unlock_page(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
goto done;
}
}
- page_cache_release(pages[i]);
+ put_page(pages[i]);
pages_locked++;
}
nrpages -= ret;
@@ -1638,7 +1636,7 @@ done:
__unlock_for_delalloc(inode, locked_page,
delalloc_start,
((u64)(start_index + pages_locked - 1)) <<
- PAGE_CACHE_SHIFT);
+ PAGE_SHIFT);
}
return ret;
}
@@ -1698,7 +1696,7 @@ again:
free_extent_state(cached_state);
cached_state = NULL;
if (!loops) {
- max_bytes = PAGE_CACHE_SIZE;
+ max_bytes = PAGE_SIZE;
loops = 1;
goto again;
} else {
@@ -1737,8 +1735,8 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
int ret;
struct page *pages[16];
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long index = start >> PAGE_SHIFT;
+ unsigned long end_index = end >> PAGE_SHIFT;
unsigned long nr_pages = end_index - index + 1;
int i;
@@ -1759,7 +1757,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
SetPagePrivate2(pages[i]);
if (pages[i] == locked_page) {
- page_cache_release(pages[i]);
+ put_page(pages[i]);
continue;
}
if (page_ops & PAGE_CLEAR_DIRTY)
@@ -1772,7 +1770,7 @@ void extent_clear_unlock_delalloc(struct inode *inode, u64 start, u64 end,
end_page_writeback(pages[i]);
if (page_ops & PAGE_UNLOCK)
unlock_page(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
nr_pages -= ret;
index += ret;
@@ -1844,7 +1842,8 @@ out:
* set the private field for a given byte offset in the tree. If there isn't
* an extent_state there already, this does nothing.
*/
-static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private)
+static noinline int set_state_failrec(struct extent_io_tree *tree, u64 start,
+ struct io_failure_record *failrec)
{
struct rb_node *node;
struct extent_state *state;
@@ -1865,13 +1864,14 @@ static int set_state_private(struct extent_io_tree *tree, u64 start, u64 private
ret = -ENOENT;
goto out;
}
- state->private = private;
+ state->failrec = failrec;
out:
spin_unlock(&tree->lock);
return ret;
}
-int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
+static noinline int get_state_failrec(struct extent_io_tree *tree, u64 start,
+ struct io_failure_record **failrec)
{
struct rb_node *node;
struct extent_state *state;
@@ -1892,7 +1892,7 @@ int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private)
ret = -ENOENT;
goto out;
}
- *private = state->private;
+ *failrec = state->failrec;
out:
spin_unlock(&tree->lock);
return ret;
@@ -1961,7 +1961,7 @@ int test_range_bit(struct extent_io_tree *tree, u64 start, u64 end,
static void check_page_uptodate(struct extent_io_tree *tree, struct page *page)
{
u64 start = page_offset(page);
- u64 end = start + PAGE_CACHE_SIZE - 1;
+ u64 end = start + PAGE_SIZE - 1;
if (test_range_bit(tree, start, end, EXTENT_UPTODATE, 1, NULL))
SetPageUptodate(page);
}
@@ -1972,7 +1972,7 @@ int free_io_failure(struct inode *inode, struct io_failure_record *rec)
int err = 0;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
- set_state_private(failure_tree, rec->start, 0);
+ set_state_failrec(failure_tree, rec->start, NULL);
ret = clear_extent_bits(failure_tree, rec->start,
rec->start + rec->len - 1,
EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
@@ -2071,11 +2071,11 @@ int repair_eb_io_failure(struct btrfs_root *root, struct extent_buffer *eb,
struct page *p = eb->pages[i];
ret = repair_io_failure(root->fs_info->btree_inode, start,
- PAGE_CACHE_SIZE, start, p,
+ PAGE_SIZE, start, p,
start - page_offset(p), mirror_num);
if (ret)
break;
- start += PAGE_CACHE_SIZE;
+ start += PAGE_SIZE;
}
return ret;
@@ -2089,7 +2089,6 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
unsigned int pg_offset)
{
u64 private;
- u64 private_failure;
struct io_failure_record *failrec;
struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
struct extent_state *state;
@@ -2102,12 +2101,11 @@ int clean_io_failure(struct inode *inode, u64 start, struct page *page,
if (!ret)
return 0;
- ret = get_state_private(&BTRFS_I(inode)->io_failure_tree, start,
- &private_failure);
+ ret = get_state_failrec(&BTRFS_I(inode)->io_failure_tree, start,
+ &failrec);
if (ret)
return 0;
- failrec = (struct io_failure_record *)(unsigned long) private_failure;
BUG_ON(!failrec->this_mirror);
if (failrec->in_validation) {
@@ -2167,7 +2165,7 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
next = next_state(state);
- failrec = (struct io_failure_record *)(unsigned long)state->private;
+ failrec = state->failrec;
free_extent_state(state);
kfree(failrec);
@@ -2177,10 +2175,9 @@ void btrfs_free_io_failure_record(struct inode *inode, u64 start, u64 end)
}
int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
- struct io_failure_record **failrec_ret)
+ struct io_failure_record **failrec_ret)
{
struct io_failure_record *failrec;
- u64 private;
struct extent_map *em;
struct extent_io_tree *failure_tree = &BTRFS_I(inode)->io_failure_tree;
struct extent_io_tree *tree = &BTRFS_I(inode)->io_tree;
@@ -2188,7 +2185,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
int ret;
u64 logical;
- ret = get_state_private(failure_tree, start, &private);
+ ret = get_state_failrec(failure_tree, start, &failrec);
if (ret) {
failrec = kzalloc(sizeof(*failrec), GFP_NOFS);
if (!failrec)
@@ -2237,8 +2234,7 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
ret = set_extent_bits(failure_tree, start, end,
EXTENT_LOCKED | EXTENT_DIRTY, GFP_NOFS);
if (ret >= 0)
- ret = set_state_private(failure_tree, start,
- (u64)(unsigned long)failrec);
+ ret = set_state_failrec(failure_tree, start, failrec);
/* set the bits in the inode's tree */
if (ret >= 0)
ret = set_extent_bits(tree, start, end, EXTENT_DAMAGED,
@@ -2248,7 +2244,6 @@ int btrfs_get_io_failure_record(struct inode *inode, u64 start, u64 end,
return ret;
}
} else {
- failrec = (struct io_failure_record *)(unsigned long)private;
pr_debug("Get IO Failure Record: (found) logical=%llu, start=%llu, len=%llu, validation=%d\n",
failrec->logical, failrec->start, failrec->len,
failrec->in_validation);
@@ -2471,8 +2466,8 @@ static void end_bio_extent_writepage(struct bio *bio)
* advance bv_offset and adjust bv_len to compensate.
* Print a warning for nonzero offsets, and an error
* if they don't add up to a full page. */
- if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
- if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+ if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
+ if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
"partial page write in btrfs with offset %u and length %u",
bvec->bv_offset, bvec->bv_len);
@@ -2546,8 +2541,8 @@ static void end_bio_extent_readpage(struct bio *bio)
* advance bv_offset and adjust bv_len to compensate.
* Print a warning for nonzero offsets, and an error
* if they don't add up to a full page. */
- if (bvec->bv_offset || bvec->bv_len != PAGE_CACHE_SIZE) {
- if (bvec->bv_offset + bvec->bv_len != PAGE_CACHE_SIZE)
+ if (bvec->bv_offset || bvec->bv_len != PAGE_SIZE) {
+ if (bvec->bv_offset + bvec->bv_len != PAGE_SIZE)
btrfs_err(BTRFS_I(page->mapping->host)->root->fs_info,
"partial page read in btrfs with offset %u and length %u",
bvec->bv_offset, bvec->bv_len);
@@ -2603,13 +2598,13 @@ static void end_bio_extent_readpage(struct bio *bio)
readpage_ok:
if (likely(uptodate)) {
loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned off;
/* Zero out the end if this page straddles i_size */
- off = i_size & (PAGE_CACHE_SIZE-1);
+ off = i_size & (PAGE_SIZE-1);
if (page->index == end_index && off)
- zero_user_segment(page, off, PAGE_CACHE_SIZE);
+ zero_user_segment(page, off, PAGE_SIZE);
SetPageUptodate(page);
} else {
ClearPageUptodate(page);
@@ -2773,7 +2768,7 @@ static int submit_extent_page(int rw, struct extent_io_tree *tree,
struct bio *bio;
int contig = 0;
int old_compressed = prev_bio_flags & EXTENT_BIO_COMPRESSED;
- size_t page_size = min_t(size_t, size, PAGE_CACHE_SIZE);
+ size_t page_size = min_t(size_t, size, PAGE_SIZE);
if (bio_ret && *bio_ret) {
bio = *bio_ret;
@@ -2826,7 +2821,7 @@ static void attach_extent_buffer_page(struct extent_buffer *eb,
{
if (!PagePrivate(page)) {
SetPagePrivate(page);
- page_cache_get(page);
+ get_page(page);
set_page_private(page, (unsigned long)eb);
} else {
WARN_ON(page->private != (unsigned long)eb);
@@ -2837,7 +2832,7 @@ void set_page_extent_mapped(struct page *page)
{
if (!PagePrivate(page)) {
SetPagePrivate(page);
- page_cache_get(page);
+ get_page(page);
set_page_private(page, EXTENT_PAGE_PRIVATE);
}
}
@@ -2885,7 +2880,7 @@ static int __do_readpage(struct extent_io_tree *tree,
{
struct inode *inode = page->mapping->host;
u64 start = page_offset(page);
- u64 page_end = start + PAGE_CACHE_SIZE - 1;
+ u64 page_end = start + PAGE_SIZE - 1;
u64 end;
u64 cur = start;
u64 extent_offset;
@@ -2914,12 +2909,12 @@ static int __do_readpage(struct extent_io_tree *tree,
}
}
- if (page->index == last_byte >> PAGE_CACHE_SHIFT) {
+ if (page->index == last_byte >> PAGE_SHIFT) {
char *userpage;
- size_t zero_offset = last_byte & (PAGE_CACHE_SIZE - 1);
+ size_t zero_offset = last_byte & (PAGE_SIZE - 1);
if (zero_offset) {
- iosize = PAGE_CACHE_SIZE - zero_offset;
+ iosize = PAGE_SIZE - zero_offset;
userpage = kmap_atomic(page);
memset(userpage + zero_offset, 0, iosize);
flush_dcache_page(page);
@@ -2927,14 +2922,14 @@ static int __do_readpage(struct extent_io_tree *tree,
}
}
while (cur <= end) {
- unsigned long pnr = (last_byte >> PAGE_CACHE_SHIFT) + 1;
+ unsigned long pnr = (last_byte >> PAGE_SHIFT) + 1;
bool force_bio_submit = false;
if (cur >= last_byte) {
char *userpage;
struct extent_state *cached = NULL;
- iosize = PAGE_CACHE_SIZE - pg_offset;
+ iosize = PAGE_SIZE - pg_offset;
userpage = kmap_atomic(page);
memset(userpage + pg_offset, 0, iosize);
flush_dcache_page(page);
@@ -3117,7 +3112,7 @@ static inline void __do_contiguous_readpages(struct extent_io_tree *tree,
for (index = 0; index < nr_pages; index++) {
__do_readpage(tree, pages[index], get_extent, em_cached, bio,
mirror_num, bio_flags, rw, prev_em_start);
- page_cache_release(pages[index]);
+ put_page(pages[index]);
}
}
@@ -3139,10 +3134,10 @@ static void __extent_readpages(struct extent_io_tree *tree,
page_start = page_offset(pages[index]);
if (!end) {
start = page_start;
- end = start + PAGE_CACHE_SIZE - 1;
+ end = start + PAGE_SIZE - 1;
first_index = index;
} else if (end + 1 == page_start) {
- end += PAGE_CACHE_SIZE;
+ end += PAGE_SIZE;
} else {
__do_contiguous_readpages(tree, &pages[first_index],
index - first_index, start,
@@ -3150,7 +3145,7 @@ static void __extent_readpages(struct extent_io_tree *tree,
bio, mirror_num, bio_flags,
rw, prev_em_start);
start = page_start;
- end = start + PAGE_CACHE_SIZE - 1;
+ end = start + PAGE_SIZE - 1;
first_index = index;
}
}
@@ -3172,12 +3167,13 @@ static int __extent_read_full_page(struct extent_io_tree *tree,
struct inode *inode = page->mapping->host;
struct btrfs_ordered_extent *ordered;
u64 start = page_offset(page);
- u64 end = start + PAGE_CACHE_SIZE - 1;
+ u64 end = start + PAGE_SIZE - 1;
int ret;
while (1) {
lock_extent(tree, start, end);
- ordered = btrfs_lookup_ordered_extent(inode, start);
+ ordered = btrfs_lookup_ordered_range(inode, start,
+ PAGE_SIZE);
if (!ordered)
break;
unlock_extent(tree, start, end);
@@ -3231,7 +3227,7 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
unsigned long *nr_written)
{
struct extent_io_tree *tree = epd->tree;
- u64 page_end = delalloc_start + PAGE_CACHE_SIZE - 1;
+ u64 page_end = delalloc_start + PAGE_SIZE - 1;
u64 nr_delalloc;
u64 delalloc_to_write = 0;
u64 delalloc_end = 0;
@@ -3268,13 +3264,11 @@ static noinline_for_stack int writepage_delalloc(struct inode *inode,
goto done;
}
/*
- * delalloc_end is already one less than the total
- * length, so we don't subtract one from
- * PAGE_CACHE_SIZE
+ * delalloc_end is already one less than the total length, so
+ * we don't subtract one from PAGE_SIZE
*/
delalloc_to_write += (delalloc_end - delalloc_start +
- PAGE_CACHE_SIZE) >>
- PAGE_CACHE_SHIFT;
+ PAGE_SIZE) >> PAGE_SHIFT;
delalloc_start = delalloc_end + 1;
}
if (wbc->nr_to_write < delalloc_to_write) {
@@ -3323,7 +3317,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
{
struct extent_io_tree *tree = epd->tree;
u64 start = page_offset(page);
- u64 page_end = start + PAGE_CACHE_SIZE - 1;
+ u64 page_end = start + PAGE_SIZE - 1;
u64 end;
u64 cur = start;
u64 extent_offset;
@@ -3438,7 +3432,7 @@ static noinline_for_stack int __extent_writepage_io(struct inode *inode,
if (ret) {
SetPageError(page);
} else {
- unsigned long max_nr = (i_size >> PAGE_CACHE_SHIFT) + 1;
+ unsigned long max_nr = (i_size >> PAGE_SHIFT) + 1;
set_range_writeback(tree, cur, cur + iosize - 1);
if (!PageWriteback(page)) {
@@ -3481,12 +3475,12 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
struct inode *inode = page->mapping->host;
struct extent_page_data *epd = data;
u64 start = page_offset(page);
- u64 page_end = start + PAGE_CACHE_SIZE - 1;
+ u64 page_end = start + PAGE_SIZE - 1;
int ret;
int nr = 0;
size_t pg_offset = 0;
loff_t i_size = i_size_read(inode);
- unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = i_size >> PAGE_SHIFT;
int write_flags;
unsigned long nr_written = 0;
@@ -3501,10 +3495,10 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
ClearPageError(page);
- pg_offset = i_size & (PAGE_CACHE_SIZE - 1);
+ pg_offset = i_size & (PAGE_SIZE - 1);
if (page->index > end_index ||
(page->index == end_index && !pg_offset)) {
- page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
return 0;
}
@@ -3514,7 +3508,7 @@ static int __extent_writepage(struct page *page, struct writeback_control *wbc,
userpage = kmap_atomic(page);
memset(userpage + pg_offset, 0,
- PAGE_CACHE_SIZE - pg_offset);
+ PAGE_SIZE - pg_offset);
kunmap_atomic(userpage);
flush_dcache_page(page);
}
@@ -3752,7 +3746,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
clear_page_dirty_for_io(p);
set_page_writeback(p);
ret = submit_extent_page(rw, tree, wbc, p, offset >> 9,
- PAGE_CACHE_SIZE, 0, bdev, &epd->bio,
+ PAGE_SIZE, 0, bdev, &epd->bio,
-1, end_bio_extent_buffer_writepage,
0, epd->bio_flags, bio_flags, false);
epd->bio_flags = bio_flags;
@@ -3764,7 +3758,7 @@ static noinline_for_stack int write_one_eb(struct extent_buffer *eb,
ret = -EIO;
break;
}
- offset += PAGE_CACHE_SIZE;
+ offset += PAGE_SIZE;
update_nr_written(p, wbc, 1);
unlock_page(p);
}
@@ -3808,8 +3802,8 @@ int btree_write_cache_pages(struct address_space *mapping,
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
} else {
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ index = wbc->range_start >> PAGE_SHIFT;
+ end = wbc->range_end >> PAGE_SHIFT;
scanned = 1;
}
if (wbc->sync_mode == WB_SYNC_ALL)
@@ -3952,8 +3946,8 @@ static int extent_write_cache_pages(struct extent_io_tree *tree,
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
} else {
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ index = wbc->range_start >> PAGE_SHIFT;
+ end = wbc->range_end >> PAGE_SHIFT;
scanned = 1;
}
if (wbc->sync_mode == WB_SYNC_ALL)
@@ -4087,8 +4081,8 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
int ret = 0;
struct address_space *mapping = inode->i_mapping;
struct page *page;
- unsigned long nr_pages = (end - start + PAGE_CACHE_SIZE) >>
- PAGE_CACHE_SHIFT;
+ unsigned long nr_pages = (end - start + PAGE_SIZE) >>
+ PAGE_SHIFT;
struct extent_page_data epd = {
.bio = NULL,
@@ -4106,18 +4100,18 @@ int extent_write_locked_range(struct extent_io_tree *tree, struct inode *inode,
};
while (start <= end) {
- page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ page = find_get_page(mapping, start >> PAGE_SHIFT);
if (clear_page_dirty_for_io(page))
ret = __extent_writepage(page, &wbc_writepages, &epd);
else {
if (tree->ops && tree->ops->writepage_end_io_hook)
tree->ops->writepage_end_io_hook(page, start,
- start + PAGE_CACHE_SIZE - 1,
+ start + PAGE_SIZE - 1,
NULL, 1);
unlock_page(page);
}
- page_cache_release(page);
- start += PAGE_CACHE_SIZE;
+ put_page(page);
+ start += PAGE_SIZE;
}
flush_epd_write_bio(&epd);
@@ -4167,7 +4161,7 @@ int extent_readpages(struct extent_io_tree *tree,
list_del(&page->lru);
if (add_to_page_cache_lru(page, mapping,
page->index, GFP_NOFS)) {
- page_cache_release(page);
+ put_page(page);
continue;
}
@@ -4201,7 +4195,7 @@ int extent_invalidatepage(struct extent_io_tree *tree,
{
struct extent_state *cached_state = NULL;
u64 start = page_offset(page);
- u64 end = start + PAGE_CACHE_SIZE - 1;
+ u64 end = start + PAGE_SIZE - 1;
size_t blocksize = page->mapping->host->i_sb->s_blocksize;
start += ALIGN(offset, blocksize);
@@ -4227,7 +4221,7 @@ static int try_release_extent_state(struct extent_map_tree *map,
struct page *page, gfp_t mask)
{
u64 start = page_offset(page);
- u64 end = start + PAGE_CACHE_SIZE - 1;
+ u64 end = start + PAGE_SIZE - 1;
int ret = 1;
if (test_range_bit(tree, start, end,
@@ -4266,7 +4260,7 @@ int try_release_extent_mapping(struct extent_map_tree *map,
{
struct extent_map *em;
u64 start = page_offset(page);
- u64 end = start + PAGE_CACHE_SIZE - 1;
+ u64 end = start + PAGE_SIZE - 1;
if (gfpflags_allow_blocking(mask) &&
page->mapping->host->i_size > SZ_16M) {
@@ -4591,14 +4585,14 @@ static void btrfs_release_extent_buffer_page(struct extent_buffer *eb)
ClearPagePrivate(page);
set_page_private(page, 0);
/* One for the page private */
- page_cache_release(page);
+ put_page(page);
}
if (mapped)
spin_unlock(&page->mapping->private_lock);
/* One for when we alloced the page */
- page_cache_release(page);
+ put_page(page);
} while (index != 0);
}
@@ -4783,7 +4777,7 @@ struct extent_buffer *find_extent_buffer(struct btrfs_fs_info *fs_info,
rcu_read_lock();
eb = radix_tree_lookup(&fs_info->buffer_radix,
- start >> PAGE_CACHE_SHIFT);
+ start >> PAGE_SHIFT);
if (eb && atomic_inc_not_zero(&eb->refs)) {
rcu_read_unlock();
/*
@@ -4833,7 +4827,7 @@ again:
goto free_eb;
spin_lock(&fs_info->buffer_lock);
ret = radix_tree_insert(&fs_info->buffer_radix,
- start >> PAGE_CACHE_SHIFT, eb);
+ start >> PAGE_SHIFT, eb);
spin_unlock(&fs_info->buffer_lock);
radix_tree_preload_end();
if (ret == -EEXIST) {
@@ -4866,7 +4860,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
unsigned long len = fs_info->tree_root->nodesize;
unsigned long num_pages = num_extent_pages(start, len);
unsigned long i;
- unsigned long index = start >> PAGE_CACHE_SHIFT;
+ unsigned long index = start >> PAGE_SHIFT;
struct extent_buffer *eb;
struct extent_buffer *exists = NULL;
struct page *p;
@@ -4900,7 +4894,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
if (atomic_inc_not_zero(&exists->refs)) {
spin_unlock(&mapping->private_lock);
unlock_page(p);
- page_cache_release(p);
+ put_page(p);
mark_extent_buffer_accessed(exists, p);
goto free_eb;
}
@@ -4912,7 +4906,7 @@ struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
*/
ClearPagePrivate(p);
WARN_ON(PageDirty(p));
- page_cache_release(p);
+ put_page(p);
}
attach_extent_buffer_page(eb, p);
spin_unlock(&mapping->private_lock);
@@ -4935,7 +4929,7 @@ again:
spin_lock(&fs_info->buffer_lock);
ret = radix_tree_insert(&fs_info->buffer_radix,
- start >> PAGE_CACHE_SHIFT, eb);
+ start >> PAGE_SHIFT, eb);
spin_unlock(&fs_info->buffer_lock);
radix_tree_preload_end();
if (ret == -EEXIST) {
@@ -4998,7 +4992,7 @@ static int release_extent_buffer(struct extent_buffer *eb)
spin_lock(&fs_info->buffer_lock);
radix_tree_delete(&fs_info->buffer_radix,
- eb->start >> PAGE_CACHE_SHIFT);
+ eb->start >> PAGE_SHIFT);
spin_unlock(&fs_info->buffer_lock);
} else {
spin_unlock(&eb->refs_lock);
@@ -5172,8 +5166,8 @@ int read_extent_buffer_pages(struct extent_io_tree *tree,
if (start) {
WARN_ON(start < eb->start);
- start_i = (start >> PAGE_CACHE_SHIFT) -
- (eb->start >> PAGE_CACHE_SHIFT);
+ start_i = (start >> PAGE_SHIFT) -
+ (eb->start >> PAGE_SHIFT);
} else {
start_i = 0;
}
@@ -5256,18 +5250,18 @@ void read_extent_buffer(struct extent_buffer *eb, void *dstv,
struct page *page;
char *kaddr;
char *dst = (char *)dstv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_SHIFT;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_SIZE - 1);
while (len > 0) {
page = eb->pages[i];
- cur = min(len, (PAGE_CACHE_SIZE - offset));
+ cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
memcpy(dst, kaddr + offset, cur);
@@ -5287,19 +5281,19 @@ int read_extent_buffer_to_user(struct extent_buffer *eb, void __user *dstv,
struct page *page;
char *kaddr;
char __user *dst = (char __user *)dstv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_SHIFT;
int ret = 0;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_SIZE - 1);
while (len > 0) {
page = eb->pages[i];
- cur = min(len, (PAGE_CACHE_SIZE - offset));
+ cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
if (copy_to_user(dst, kaddr + offset, cur)) {
ret = -EFAULT;
@@ -5320,13 +5314,13 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
unsigned long *map_start,
unsigned long *map_len)
{
- size_t offset = start & (PAGE_CACHE_SIZE - 1);
+ size_t offset = start & (PAGE_SIZE - 1);
char *kaddr;
struct page *p;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_SHIFT;
unsigned long end_i = (start_offset + start + min_len - 1) >>
- PAGE_CACHE_SHIFT;
+ PAGE_SHIFT;
if (i != end_i)
return -EINVAL;
@@ -5336,7 +5330,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
*map_start = 0;
} else {
offset = 0;
- *map_start = ((u64)i << PAGE_CACHE_SHIFT) - start_offset;
+ *map_start = ((u64)i << PAGE_SHIFT) - start_offset;
}
if (start + min_len > eb->len) {
@@ -5349,7 +5343,7 @@ int map_private_extent_buffer(struct extent_buffer *eb, unsigned long start,
p = eb->pages[i];
kaddr = page_address(p);
*map = kaddr + offset;
- *map_len = PAGE_CACHE_SIZE - offset;
+ *map_len = PAGE_SIZE - offset;
return 0;
}
@@ -5362,19 +5356,19 @@ int memcmp_extent_buffer(struct extent_buffer *eb, const void *ptrv,
struct page *page;
char *kaddr;
char *ptr = (char *)ptrv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_SHIFT;
int ret = 0;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_SIZE - 1);
while (len > 0) {
page = eb->pages[i];
- cur = min(len, (PAGE_CACHE_SIZE - offset));
+ cur = min(len, (PAGE_SIZE - offset));
kaddr = page_address(page);
ret = memcmp(ptr, kaddr + offset, cur);
@@ -5397,19 +5391,19 @@ void write_extent_buffer(struct extent_buffer *eb, const void *srcv,
struct page *page;
char *kaddr;
char *src = (char *)srcv;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_SHIFT;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_SIZE - 1);
while (len > 0) {
page = eb->pages[i];
WARN_ON(!PageUptodate(page));
- cur = min(len, PAGE_CACHE_SIZE - offset);
+ cur = min(len, PAGE_SIZE - offset);
kaddr = page_address(page);
memcpy(kaddr + offset, src, cur);
@@ -5427,19 +5421,19 @@ void memset_extent_buffer(struct extent_buffer *eb, char c,
size_t offset;
struct page *page;
char *kaddr;
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + start) >> PAGE_CACHE_SHIFT;
+ size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
+ unsigned long i = (start_offset + start) >> PAGE_SHIFT;
WARN_ON(start > eb->len);
WARN_ON(start + len > eb->start + eb->len);
- offset = (start_offset + start) & (PAGE_CACHE_SIZE - 1);
+ offset = (start_offset + start) & (PAGE_SIZE - 1);
while (len > 0) {
page = eb->pages[i];
WARN_ON(!PageUptodate(page));
- cur = min(len, PAGE_CACHE_SIZE - offset);
+ cur = min(len, PAGE_SIZE - offset);
kaddr = page_address(page);
memset(kaddr + offset, c, cur);
@@ -5458,19 +5452,19 @@ void copy_extent_buffer(struct extent_buffer *dst, struct extent_buffer *src,
size_t offset;
struct page *page;
char *kaddr;
- size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
- unsigned long i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
+ size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
+ unsigned long i = (start_offset + dst_offset) >> PAGE_SHIFT;
WARN_ON(src->len != dst_len);
offset = (start_offset + dst_offset) &
- (PAGE_CACHE_SIZE - 1);
+ (PAGE_SIZE - 1);
while (len > 0) {
page = dst->pages[i];
WARN_ON(!PageUptodate(page));
- cur = min(len, (unsigned long)(PAGE_CACHE_SIZE - offset));
+ cur = min(len, (unsigned long)(PAGE_SIZE - offset));
kaddr = page_address(page);
read_extent_buffer(src, kaddr + offset, src_offset, cur);
@@ -5512,7 +5506,7 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
unsigned long *page_index,
size_t *page_offset)
{
- size_t start_offset = eb->start & ((u64)PAGE_CACHE_SIZE - 1);
+ size_t start_offset = eb->start & ((u64)PAGE_SIZE - 1);
size_t byte_offset = BIT_BYTE(nr);
size_t offset;
@@ -5523,8 +5517,8 @@ static inline void eb_bitmap_offset(struct extent_buffer *eb,
*/
offset = start_offset + start + byte_offset;
- *page_index = offset >> PAGE_CACHE_SHIFT;
- *page_offset = offset & (PAGE_CACHE_SIZE - 1);
+ *page_index = offset >> PAGE_SHIFT;
+ *page_offset = offset & (PAGE_SIZE - 1);
}
/**
@@ -5576,7 +5570,7 @@ void extent_buffer_bitmap_set(struct extent_buffer *eb, unsigned long start,
len -= bits_to_set;
bits_to_set = BITS_PER_BYTE;
mask_to_set = ~0U;
- if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+ if (++offset >= PAGE_SIZE && len > 0) {
offset = 0;
page = eb->pages[++i];
WARN_ON(!PageUptodate(page));
@@ -5618,7 +5612,7 @@ void extent_buffer_bitmap_clear(struct extent_buffer *eb, unsigned long start,
len -= bits_to_clear;
bits_to_clear = BITS_PER_BYTE;
mask_to_clear = ~0U;
- if (++offset >= PAGE_CACHE_SIZE && len > 0) {
+ if (++offset >= PAGE_SIZE && len > 0) {
offset = 0;
page = eb->pages[++i];
WARN_ON(!PageUptodate(page));
@@ -5665,7 +5659,7 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
size_t cur;
size_t dst_off_in_page;
size_t src_off_in_page;
- size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+ size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
unsigned long dst_i;
unsigned long src_i;
@@ -5684,17 +5678,17 @@ void memcpy_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
while (len > 0) {
dst_off_in_page = (start_offset + dst_offset) &
- (PAGE_CACHE_SIZE - 1);
+ (PAGE_SIZE - 1);
src_off_in_page = (start_offset + src_offset) &
- (PAGE_CACHE_SIZE - 1);
+ (PAGE_SIZE - 1);
- dst_i = (start_offset + dst_offset) >> PAGE_CACHE_SHIFT;
- src_i = (start_offset + src_offset) >> PAGE_CACHE_SHIFT;
+ dst_i = (start_offset + dst_offset) >> PAGE_SHIFT;
+ src_i = (start_offset + src_offset) >> PAGE_SHIFT;
- cur = min(len, (unsigned long)(PAGE_CACHE_SIZE -
+ cur = min(len, (unsigned long)(PAGE_SIZE -
src_off_in_page));
cur = min_t(unsigned long, cur,
- (unsigned long)(PAGE_CACHE_SIZE - dst_off_in_page));
+ (unsigned long)(PAGE_SIZE - dst_off_in_page));
copy_pages(dst->pages[dst_i], dst->pages[src_i],
dst_off_in_page, src_off_in_page, cur);
@@ -5713,7 +5707,7 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
size_t src_off_in_page;
unsigned long dst_end = dst_offset + len - 1;
unsigned long src_end = src_offset + len - 1;
- size_t start_offset = dst->start & ((u64)PAGE_CACHE_SIZE - 1);
+ size_t start_offset = dst->start & ((u64)PAGE_SIZE - 1);
unsigned long dst_i;
unsigned long src_i;
@@ -5732,13 +5726,13 @@ void memmove_extent_buffer(struct extent_buffer *dst, unsigned long dst_offset,
return;
}
while (len > 0) {
- dst_i = (start_offset + dst_end) >> PAGE_CACHE_SHIFT;
- src_i = (start_offset + src_end) >> PAGE_CACHE_SHIFT;
+ dst_i = (start_offset + dst_end) >> PAGE_SHIFT;
+ src_i = (start_offset + src_end) >> PAGE_SHIFT;
dst_off_in_page = (start_offset + dst_end) &
- (PAGE_CACHE_SIZE - 1);
+ (PAGE_SIZE - 1);
src_off_in_page = (start_offset + src_end) &
- (PAGE_CACHE_SIZE - 1);
+ (PAGE_SIZE - 1);
cur = min_t(unsigned long, len, src_off_in_page + 1);
cur = min(cur, dst_off_in_page + 1);
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h
index 880d5292e..b5e0ade90 100644
--- a/fs/btrfs/extent_io.h
+++ b/fs/btrfs/extent_io.h
@@ -61,6 +61,7 @@
struct extent_state;
struct btrfs_root;
struct btrfs_io_bio;
+struct io_failure_record;
typedef int (extent_submit_bio_hook_t)(struct inode *inode, int rw,
struct bio *bio, int mirror_num,
@@ -111,8 +112,7 @@ struct extent_state {
atomic_t refs;
unsigned state;
- /* for use by the FS */
- u64 private;
+ struct io_failure_record *failrec;
#ifdef CONFIG_BTRFS_DEBUG
struct list_head leak_list;
@@ -120,7 +120,7 @@ struct extent_state {
};
#define INLINE_EXTENT_BUFFER_PAGES 16
-#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_CACHE_SIZE)
+#define MAX_INLINE_EXTENT_BUFFER_SIZE (INLINE_EXTENT_BUFFER_PAGES * PAGE_SIZE)
struct extent_buffer {
u64 start;
unsigned long len;
@@ -342,7 +342,6 @@ int extent_readpages(struct extent_io_tree *tree,
get_extent_t get_extent);
int extent_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
__u64 start, __u64 len, get_extent_t *get_extent);
-int get_state_private(struct extent_io_tree *tree, u64 start, u64 *private);
void set_page_extent_mapped(struct page *page);
struct extent_buffer *alloc_extent_buffer(struct btrfs_fs_info *fs_info,
@@ -366,8 +365,8 @@ void wait_on_extent_buffer_writeback(struct extent_buffer *eb);
static inline unsigned long num_extent_pages(u64 start, u64 len)
{
- return ((start + len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
- (start >> PAGE_CACHE_SHIFT);
+ return ((start + len + PAGE_SIZE - 1) >> PAGE_SHIFT) -
+ (start >> PAGE_SHIFT);
}
static inline void extent_buffer_get(struct extent_buffer *eb)
diff --git a/fs/btrfs/extent_map.c b/fs/btrfs/extent_map.c
index 84fb56d5c..318b048eb 100644
--- a/fs/btrfs/extent_map.c
+++ b/fs/btrfs/extent_map.c
@@ -4,6 +4,7 @@
#include <linux/hardirq.h>
#include "ctree.h"
#include "extent_map.h"
+#include "compression.h"
static struct kmem_cache *extent_map_cache;
@@ -20,8 +21,7 @@ int __init extent_map_init(void)
void extent_map_exit(void)
{
- if (extent_map_cache)
- kmem_cache_destroy(extent_map_cache);
+ kmem_cache_destroy(extent_map_cache);
}
/**
@@ -62,7 +62,7 @@ struct extent_map *alloc_extent_map(void)
/**
* free_extent_map - drop reference count of an extent_map
- * @em: extent map beeing releasead
+ * @em: extent map being releasead
*
* Drops the reference out on @em by one and free the structure
* if the reference count hits zero.
@@ -422,7 +422,7 @@ struct extent_map *search_extent_mapping(struct extent_map_tree *tree,
/**
* remove_extent_mapping - removes an extent_map from the extent tree
* @tree: extent tree to remove from
- * @em: extent map beeing removed
+ * @em: extent map being removed
*
* Removes @em from @tree. No reference counts are dropped, and no checks
* are done to see if the range is in use
diff --git a/fs/btrfs/file-item.c b/fs/btrfs/file-item.c
index a67e1c828..7a7d6e253 100644
--- a/fs/btrfs/file-item.c
+++ b/fs/btrfs/file-item.c
@@ -25,13 +25,14 @@
#include "transaction.h"
#include "volumes.h"
#include "print-tree.h"
+#include "compression.h"
#define __MAX_CSUM_ITEMS(r, size) ((unsigned long)(((BTRFS_LEAF_DATA_SIZE(r) - \
sizeof(struct btrfs_item) * 2) / \
size) - 1))
#define MAX_CSUM_ITEMS(r, size) (min_t(u32, __MAX_CSUM_ITEMS(r, size), \
- PAGE_CACHE_SIZE))
+ PAGE_SIZE))
#define MAX_ORDERED_SUM_BYTES(r) ((PAGE_SIZE - \
sizeof(struct btrfs_ordered_sum)) / \
@@ -172,6 +173,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
u64 item_start_offset = 0;
u64 item_last_offset = 0;
u64 disk_bytenr;
+ u64 page_bytes_left;
u32 diff;
int nblocks;
int bio_index = 0;
@@ -201,7 +203,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
csum = (u8 *)dst;
}
- if (bio->bi_iter.bi_size > PAGE_CACHE_SIZE * 8)
+ if (bio->bi_iter.bi_size > PAGE_SIZE * 8)
path->reada = READA_FORWARD;
WARN_ON(bio->bi_vcnt <= 0);
@@ -220,6 +222,8 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
disk_bytenr = (u64)bio->bi_iter.bi_sector << 9;
if (dio)
offset = logical_offset;
+
+ page_bytes_left = bvec->bv_len;
while (bio_index < bio->bi_vcnt) {
if (!dio)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
@@ -243,7 +247,7 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
if (BTRFS_I(inode)->root->root_key.objectid ==
BTRFS_DATA_RELOC_TREE_OBJECTID) {
set_extent_bits(io_tree, offset,
- offset + bvec->bv_len - 1,
+ offset + root->sectorsize - 1,
EXTENT_NODATASUM, GFP_NOFS);
} else {
btrfs_info(BTRFS_I(inode)->root->fs_info,
@@ -281,13 +285,29 @@ static int __btrfs_lookup_bio_sums(struct btrfs_root *root,
found:
csum += count * csum_size;
nblocks -= count;
- bio_index += count;
+
while (count--) {
- disk_bytenr += bvec->bv_len;
- offset += bvec->bv_len;
- bvec++;
+ disk_bytenr += root->sectorsize;
+ offset += root->sectorsize;
+ page_bytes_left -= root->sectorsize;
+ if (!page_bytes_left) {
+ bio_index++;
+ /*
+ * make sure we're still inside the
+ * bio before we update page_bytes_left
+ */
+ if (bio_index >= bio->bi_vcnt) {
+ WARN_ON_ONCE(count);
+ goto done;
+ }
+ bvec++;
+ page_bytes_left = bvec->bv_len;
+ }
+
}
}
+
+done:
btrfs_free_path(path);
return 0;
}
@@ -432,6 +452,8 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
struct bio_vec *bvec = bio->bi_io_vec;
int bio_index = 0;
int index;
+ int nr_sectors;
+ int i;
unsigned long total_bytes = 0;
unsigned long this_sum_bytes = 0;
u64 offset;
@@ -459,41 +481,56 @@ int btrfs_csum_one_bio(struct btrfs_root *root, struct inode *inode,
if (!contig)
offset = page_offset(bvec->bv_page) + bvec->bv_offset;
- if (offset >= ordered->file_offset + ordered->len ||
- offset < ordered->file_offset) {
- unsigned long bytes_left;
- sums->len = this_sum_bytes;
- this_sum_bytes = 0;
- btrfs_add_ordered_sum(inode, ordered, sums);
- btrfs_put_ordered_extent(ordered);
+ data = kmap_atomic(bvec->bv_page);
- bytes_left = bio->bi_iter.bi_size - total_bytes;
+ nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+ bvec->bv_len + root->sectorsize
+ - 1);
+
+ for (i = 0; i < nr_sectors; i++) {
+ if (offset >= ordered->file_offset + ordered->len ||
+ offset < ordered->file_offset) {
+ unsigned long bytes_left;
+
+ kunmap_atomic(data);
+ sums->len = this_sum_bytes;
+ this_sum_bytes = 0;
+ btrfs_add_ordered_sum(inode, ordered, sums);
+ btrfs_put_ordered_extent(ordered);
+
+ bytes_left = bio->bi_iter.bi_size - total_bytes;
+
+ sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
+ GFP_NOFS);
+ BUG_ON(!sums); /* -ENOMEM */
+ sums->len = bytes_left;
+ ordered = btrfs_lookup_ordered_extent(inode,
+ offset);
+ ASSERT(ordered); /* Logic error */
+ sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9)
+ + total_bytes;
+ index = 0;
+
+ data = kmap_atomic(bvec->bv_page);
+ }
- sums = kzalloc(btrfs_ordered_sum_size(root, bytes_left),
- GFP_NOFS);
- BUG_ON(!sums); /* -ENOMEM */
- sums->len = bytes_left;
- ordered = btrfs_lookup_ordered_extent(inode, offset);
- BUG_ON(!ordered); /* Logic error */
- sums->bytenr = ((u64)bio->bi_iter.bi_sector << 9) +
- total_bytes;
- index = 0;
+ sums->sums[index] = ~(u32)0;
+ sums->sums[index]
+ = btrfs_csum_data(data + bvec->bv_offset
+ + (i * root->sectorsize),
+ sums->sums[index],
+ root->sectorsize);
+ btrfs_csum_final(sums->sums[index],
+ (char *)(sums->sums + index));
+ index++;
+ offset += root->sectorsize;
+ this_sum_bytes += root->sectorsize;
+ total_bytes += root->sectorsize;
}
- data = kmap_atomic(bvec->bv_page);
- sums->sums[index] = ~(u32)0;
- sums->sums[index] = btrfs_csum_data(data + bvec->bv_offset,
- sums->sums[index],
- bvec->bv_len);
kunmap_atomic(data);
- btrfs_csum_final(sums->sums[index],
- (char *)(sums->sums + index));
bio_index++;
- index++;
- total_bytes += bvec->bv_len;
- this_sum_bytes += bvec->bv_len;
- offset += bvec->bv_len;
bvec++;
}
this_sum_bytes = 0;
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index 9a30ca640..af5c7fa22 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -41,6 +41,7 @@
#include "locking.h"
#include "volumes.h"
#include "qgroup.h"
+#include "compression.h"
static struct kmem_cache *btrfs_inode_defrag_cachep;
/*
@@ -413,11 +414,11 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
size_t copied = 0;
size_t total_copied = 0;
int pg = 0;
- int offset = pos & (PAGE_CACHE_SIZE - 1);
+ int offset = pos & (PAGE_SIZE - 1);
while (write_bytes > 0) {
size_t count = min_t(size_t,
- PAGE_CACHE_SIZE - offset, write_bytes);
+ PAGE_SIZE - offset, write_bytes);
struct page *page = prepared_pages[pg];
/*
* Copy data from userspace to the current page
@@ -447,7 +448,7 @@ static noinline int btrfs_copy_from_user(loff_t pos, size_t write_bytes,
if (unlikely(copied == 0))
break;
- if (copied < PAGE_CACHE_SIZE - offset) {
+ if (copied < PAGE_SIZE - offset) {
offset += copied;
} else {
pg++;
@@ -472,7 +473,7 @@ static void btrfs_drop_pages(struct page **pages, size_t num_pages)
*/
ClearPageChecked(pages[i]);
unlock_page(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
}
@@ -498,7 +499,7 @@ int btrfs_dirty_pages(struct btrfs_root *root, struct inode *inode,
loff_t isize = i_size_read(inode);
start_pos = pos & ~((u64)root->sectorsize - 1);
- num_bytes = ALIGN(write_bytes + pos - start_pos, root->sectorsize);
+ num_bytes = round_up(write_bytes + pos - start_pos, root->sectorsize);
end_of_last_block = start_pos + num_bytes - 1;
err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
@@ -1296,7 +1297,7 @@ static int prepare_uptodate_page(struct inode *inode,
{
int ret = 0;
- if (((pos & (PAGE_CACHE_SIZE - 1)) || force_uptodate) &&
+ if (((pos & (PAGE_SIZE - 1)) || force_uptodate) &&
!PageUptodate(page)) {
ret = btrfs_readpage(NULL, page);
if (ret)
@@ -1322,7 +1323,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
size_t write_bytes, bool force_uptodate)
{
int i;
- unsigned long index = pos >> PAGE_CACHE_SHIFT;
+ unsigned long index = pos >> PAGE_SHIFT;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
int err = 0;
int faili;
@@ -1344,7 +1345,7 @@ again:
err = prepare_uptodate_page(inode, pages[i],
pos + write_bytes, false);
if (err) {
- page_cache_release(pages[i]);
+ put_page(pages[i]);
if (err == -EAGAIN) {
err = 0;
goto again;
@@ -1359,7 +1360,7 @@ again:
fail:
while (faili >= 0) {
unlock_page(pages[faili]);
- page_cache_release(pages[faili]);
+ put_page(pages[faili]);
faili--;
}
return err;
@@ -1379,16 +1380,19 @@ fail:
static noinline int
lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
size_t num_pages, loff_t pos,
+ size_t write_bytes,
u64 *lockstart, u64 *lockend,
struct extent_state **cached_state)
{
+ struct btrfs_root *root = BTRFS_I(inode)->root;
u64 start_pos;
u64 last_pos;
int i;
int ret = 0;
- start_pos = pos & ~((u64)PAGE_CACHE_SIZE - 1);
- last_pos = start_pos + ((u64)num_pages << PAGE_CACHE_SHIFT) - 1;
+ start_pos = round_down(pos, root->sectorsize);
+ last_pos = start_pos
+ + round_up(pos + write_bytes - start_pos, root->sectorsize) - 1;
if (start_pos < inode->i_size) {
struct btrfs_ordered_extent *ordered;
@@ -1404,7 +1408,7 @@ lock_and_cleanup_extent_if_need(struct inode *inode, struct page **pages,
cached_state, GFP_NOFS);
for (i = 0; i < num_pages; i++) {
unlock_page(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
@@ -1493,8 +1497,8 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
bool force_page_uptodate = false;
bool need_unlock;
- nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_CACHE_SIZE),
- PAGE_CACHE_SIZE / (sizeof(struct page *)));
+ nrptrs = min(DIV_ROUND_UP(iov_iter_count(i), PAGE_SIZE),
+ PAGE_SIZE / (sizeof(struct page *)));
nrptrs = min(nrptrs, current->nr_dirtied_pause - current->nr_dirtied);
nrptrs = max(nrptrs, 8);
pages = kmalloc_array(nrptrs, sizeof(struct page *), GFP_KERNEL);
@@ -1502,15 +1506,18 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
return -ENOMEM;
while (iov_iter_count(i) > 0) {
- size_t offset = pos & (PAGE_CACHE_SIZE - 1);
+ size_t offset = pos & (PAGE_SIZE - 1);
+ size_t sector_offset;
size_t write_bytes = min(iov_iter_count(i),
- nrptrs * (size_t)PAGE_CACHE_SIZE -
+ nrptrs * (size_t)PAGE_SIZE -
offset);
size_t num_pages = DIV_ROUND_UP(write_bytes + offset,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
size_t reserve_bytes;
size_t dirty_pages;
size_t copied;
+ size_t dirty_sectors;
+ size_t num_sectors;
WARN_ON(num_pages > nrptrs);
@@ -1523,29 +1530,29 @@ static noinline ssize_t __btrfs_buffered_write(struct file *file,
break;
}
- reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
+ sector_offset = pos & (root->sectorsize - 1);
+ reserve_bytes = round_up(write_bytes + sector_offset,
+ root->sectorsize);
- if (BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
- BTRFS_INODE_PREALLOC)) {
- ret = check_can_nocow(inode, pos, &write_bytes);
- if (ret < 0)
- break;
- if (ret > 0) {
- /*
- * For nodata cow case, no need to reserve
- * data space.
- */
- only_release_metadata = true;
- /*
- * our prealloc extent may be smaller than
- * write_bytes, so scale down.
- */
- num_pages = DIV_ROUND_UP(write_bytes + offset,
- PAGE_CACHE_SIZE);
- reserve_bytes = num_pages << PAGE_CACHE_SHIFT;
- goto reserve_metadata;
- }
+ if ((BTRFS_I(inode)->flags & (BTRFS_INODE_NODATACOW |
+ BTRFS_INODE_PREALLOC)) &&
+ check_can_nocow(inode, pos, &write_bytes) > 0) {
+ /*
+ * For nodata cow case, no need to reserve
+ * data space.
+ */
+ only_release_metadata = true;
+ /*
+ * our prealloc extent may be smaller than
+ * write_bytes, so scale down.
+ */
+ num_pages = DIV_ROUND_UP(write_bytes + offset,
+ PAGE_SIZE);
+ reserve_bytes = round_up(write_bytes + sector_offset,
+ root->sectorsize);
+ goto reserve_metadata;
}
+
ret = btrfs_check_data_free_space(inode, pos, write_bytes);
if (ret < 0)
break;
@@ -1576,8 +1583,8 @@ again:
break;
ret = lock_and_cleanup_extent_if_need(inode, pages, num_pages,
- pos, &lockstart, &lockend,
- &cached_state);
+ pos, write_bytes, &lockstart,
+ &lockend, &cached_state);
if (ret < 0) {
if (ret == -EAGAIN)
goto again;
@@ -1589,6 +1596,13 @@ again:
copied = btrfs_copy_from_user(pos, write_bytes, pages, i);
+ num_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+ reserve_bytes);
+ dirty_sectors = round_up(copied + sector_offset,
+ root->sectorsize);
+ dirty_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info,
+ dirty_sectors);
+
/*
* if we have trouble faulting in the pages, fall
* back to one page at a time
@@ -1598,23 +1612,30 @@ again:
if (copied == 0) {
force_page_uptodate = true;
+ dirty_sectors = 0;
dirty_pages = 0;
} else {
force_page_uptodate = false;
dirty_pages = DIV_ROUND_UP(copied + offset,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
}
/*
* If we had a short copy we need to release the excess delaloc
* bytes we reserved. We need to increment outstanding_extents
- * because btrfs_delalloc_release_space will decrement it, but
+ * because btrfs_delalloc_release_space and
+ * btrfs_delalloc_release_metadata will decrement it, but
* we still have an outstanding extent for the chunk we actually
* managed to copy.
*/
- if (num_pages > dirty_pages) {
- release_bytes = (num_pages - dirty_pages) <<
- PAGE_CACHE_SHIFT;
+ if (num_sectors > dirty_sectors) {
+ /*
+ * we round down because we don't want to count
+ * any partial blocks actually sent through the
+ * IO machines
+ */
+ release_bytes = round_down(release_bytes - copied,
+ root->sectorsize);
if (copied > 0) {
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->outstanding_extents++;
@@ -1627,13 +1648,14 @@ again:
u64 __pos;
__pos = round_down(pos, root->sectorsize) +
- (dirty_pages << PAGE_CACHE_SHIFT);
+ (dirty_pages << PAGE_SHIFT);
btrfs_delalloc_release_space(inode, __pos,
release_bytes);
}
}
- release_bytes = dirty_pages << PAGE_CACHE_SHIFT;
+ release_bytes = round_up(copied + sector_offset,
+ root->sectorsize);
if (copied > 0)
ret = btrfs_dirty_pages(root, inode, pages,
@@ -1654,8 +1676,7 @@ again:
if (only_release_metadata && copied > 0) {
lockstart = round_down(pos, root->sectorsize);
- lockend = lockstart +
- (dirty_pages << PAGE_CACHE_SHIFT) - 1;
+ lockend = round_up(pos + copied, root->sectorsize) - 1;
set_extent_bit(&BTRFS_I(inode)->io_tree, lockstart,
lockend, EXTENT_NORESERVE, NULL,
@@ -1668,7 +1689,7 @@ again:
cond_resched();
balance_dirty_pages_ratelimited(inode->i_mapping);
- if (dirty_pages < (root->nodesize >> PAGE_CACHE_SHIFT) + 1)
+ if (dirty_pages < (root->nodesize >> PAGE_SHIFT) + 1)
btrfs_btree_balance_dirty(root);
pos += copied;
@@ -1724,8 +1745,8 @@ static ssize_t __btrfs_direct_write(struct kiocb *iocb,
goto out;
written += written_buffered;
iocb->ki_pos = pos + written_buffered;
- invalidate_mapping_pages(file->f_mapping, pos >> PAGE_CACHE_SHIFT,
- endbyte >> PAGE_CACHE_SHIFT);
+ invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
+ endbyte >> PAGE_SHIFT);
out:
return written ? written : err;
}
@@ -1761,6 +1782,8 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
ssize_t err;
loff_t pos;
size_t count;
+ loff_t oldsize;
+ int clean_page = 0;
inode_lock(inode);
err = generic_write_checks(iocb, from);
@@ -1799,14 +1822,17 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
pos = iocb->ki_pos;
count = iov_iter_count(from);
start_pos = round_down(pos, root->sectorsize);
- if (start_pos > i_size_read(inode)) {
+ oldsize = i_size_read(inode);
+ if (start_pos > oldsize) {
/* Expand hole size to cover write data, preventing empty gap */
end_pos = round_up(pos + count, root->sectorsize);
- err = btrfs_cont_expand(inode, i_size_read(inode), end_pos);
+ err = btrfs_cont_expand(inode, oldsize, end_pos);
if (err) {
inode_unlock(inode);
goto out;
}
+ if (start_pos > round_up(oldsize, root->sectorsize))
+ clean_page = 1;
}
if (sync)
@@ -1818,6 +1844,9 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
num_written = __btrfs_buffered_write(file, from, pos);
if (num_written > 0)
iocb->ki_pos = pos + num_written;
+ if (clean_page)
+ pagecache_isize_extended(inode, oldsize,
+ i_size_read(inode));
}
inode_unlock(inode);
@@ -1825,7 +1854,7 @@ static ssize_t btrfs_file_write_iter(struct kiocb *iocb,
/*
* We also have to set last_sub_trans to the current log transid,
* otherwise subsequent syncs to a file that's been synced in this
- * transaction will appear to have already occured.
+ * transaction will appear to have already occurred.
*/
spin_lock(&BTRFS_I(inode)->lock);
BTRFS_I(inode)->last_sub_trans = root->log_transid;
@@ -1996,10 +2025,11 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
*/
smp_mb();
if (btrfs_inode_in_log(inode, root->fs_info->generation) ||
- (BTRFS_I(inode)->last_trans <=
- root->fs_info->last_trans_committed &&
- (full_sync ||
- !btrfs_have_ordered_extents_in_range(inode, start, len)))) {
+ (full_sync && BTRFS_I(inode)->last_trans <=
+ root->fs_info->last_trans_committed) ||
+ (!btrfs_have_ordered_extents_in_range(inode, start, len) &&
+ BTRFS_I(inode)->last_trans
+ <= root->fs_info->last_trans_committed)) {
/*
* We'v had everything committed since the last time we were
* modified so clear this flag in case it was set for whatever
@@ -2293,10 +2323,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
int ret = 0;
int err = 0;
unsigned int rsv_count;
- bool same_page;
+ bool same_block;
bool no_holes = btrfs_fs_incompat(root->fs_info, NO_HOLES);
u64 ino_size;
- bool truncated_page = false;
+ bool truncated_block = false;
bool updated_inode = false;
ret = btrfs_wait_ordered_range(inode, offset, len);
@@ -2304,7 +2334,7 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
return ret;
inode_lock(inode);
- ino_size = round_up(inode->i_size, PAGE_CACHE_SIZE);
+ ino_size = round_up(inode->i_size, root->sectorsize);
ret = find_first_non_hole(inode, &offset, &len);
if (ret < 0)
goto out_only_mutex;
@@ -2317,31 +2347,30 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
lockstart = round_up(offset, BTRFS_I(inode)->root->sectorsize);
lockend = round_down(offset + len,
BTRFS_I(inode)->root->sectorsize) - 1;
- same_page = ((offset >> PAGE_CACHE_SHIFT) ==
- ((offset + len - 1) >> PAGE_CACHE_SHIFT));
-
+ same_block = (BTRFS_BYTES_TO_BLKS(root->fs_info, offset))
+ == (BTRFS_BYTES_TO_BLKS(root->fs_info, offset + len - 1));
/*
- * We needn't truncate any page which is beyond the end of the file
+ * We needn't truncate any block which is beyond the end of the file
* because we are sure there is no data there.
*/
/*
- * Only do this if we are in the same page and we aren't doing the
- * entire page.
+ * Only do this if we are in the same block and we aren't doing the
+ * entire block.
*/
- if (same_page && len < PAGE_CACHE_SIZE) {
+ if (same_block && len < root->sectorsize) {
if (offset < ino_size) {
- truncated_page = true;
- ret = btrfs_truncate_page(inode, offset, len, 0);
+ truncated_block = true;
+ ret = btrfs_truncate_block(inode, offset, len, 0);
} else {
ret = 0;
}
goto out_only_mutex;
}
- /* zero back part of the first page */
+ /* zero back part of the first block */
if (offset < ino_size) {
- truncated_page = true;
- ret = btrfs_truncate_page(inode, offset, 0, 0);
+ truncated_block = true;
+ ret = btrfs_truncate_block(inode, offset, 0, 0);
if (ret) {
inode_unlock(inode);
return ret;
@@ -2376,9 +2405,10 @@ static int btrfs_punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (!ret) {
/* zero the front end of the last page */
if (tail_start + tail_len < ino_size) {
- truncated_page = true;
- ret = btrfs_truncate_page(inode,
- tail_start + tail_len, 0, 1);
+ truncated_block = true;
+ ret = btrfs_truncate_block(inode,
+ tail_start + tail_len,
+ 0, 1);
if (ret)
goto out_only_mutex;
}
@@ -2544,7 +2574,7 @@ out_trans:
goto out_free;
inode_inc_iversion(inode);
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
trans->block_rsv = &root->fs_info->trans_block_rsv;
ret = btrfs_update_inode(trans, root, inode);
@@ -2558,7 +2588,7 @@ out:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state, GFP_NOFS);
out_only_mutex:
- if (!updated_inode && truncated_page && !ret && !err) {
+ if (!updated_inode && truncated_block && !ret && !err) {
/*
* If we only end up zeroing part of a page, we still need to
* update the inode item, so that all the time fields are
@@ -2611,7 +2641,7 @@ static int add_falloc_range(struct list_head *head, u64 start, u64 len)
return 0;
}
insert:
- range = kmalloc(sizeof(*range), GFP_NOFS);
+ range = kmalloc(sizeof(*range), GFP_KERNEL);
if (!range)
return -ENOMEM;
range->start = start;
@@ -2659,9 +2689,12 @@ static long btrfs_fallocate(struct file *file, int mode,
return ret;
inode_lock(inode);
- ret = inode_newsize_ok(inode, alloc_end);
- if (ret)
- goto out;
+
+ if (!(mode & FALLOC_FL_KEEP_SIZE) && offset + len > inode->i_size) {
+ ret = inode_newsize_ok(inode, offset + len);
+ if (ret)
+ goto out;
+ }
/*
* TODO: Move these two operations after we have checked
@@ -2678,10 +2711,10 @@ static long btrfs_fallocate(struct file *file, int mode,
} else if (offset + len > inode->i_size) {
/*
* If we are fallocating from the end of the file onward we
- * need to zero out the end of the page if i_size lands in the
- * middle of a page.
+ * need to zero out the end of the block if i_size lands in the
+ * middle of a block.
*/
- ret = btrfs_truncate_page(inode, inode->i_size, 0, 0);
+ ret = btrfs_truncate_block(inode, inode->i_size, 0, 0);
if (ret)
goto out;
}
@@ -2712,7 +2745,7 @@ static long btrfs_fallocate(struct file *file, int mode,
btrfs_put_ordered_extent(ordered);
unlock_extent_cached(&BTRFS_I(inode)->io_tree,
alloc_start, locked_end,
- &cached_state, GFP_NOFS);
+ &cached_state, GFP_KERNEL);
/*
* we can't wait on the range with the transaction
* running or with the extent lock held
@@ -2794,7 +2827,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (IS_ERR(trans)) {
ret = PTR_ERR(trans);
} else {
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
i_size_write(inode, actual_end);
btrfs_ordered_update_i_size(inode, actual_end, NULL);
ret = btrfs_update_inode(trans, root, inode);
@@ -2806,7 +2839,7 @@ static long btrfs_fallocate(struct file *file, int mode,
}
out_unlock:
unlock_extent_cached(&BTRFS_I(inode)->io_tree, alloc_start, locked_end,
- &cached_state, GFP_NOFS);
+ &cached_state, GFP_KERNEL);
out:
/*
* As we waited the extent range, the data_rsv_map must be empty
@@ -2930,7 +2963,7 @@ const struct file_operations btrfs_file_operations = {
.fallocate = btrfs_fallocate,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
- .compat_ioctl = btrfs_ioctl,
+ .compat_ioctl = btrfs_compat_ioctl,
#endif
.copy_file_range = btrfs_copy_file_range,
.clone_file_range = btrfs_clone_file_range,
@@ -2939,8 +2972,7 @@ const struct file_operations btrfs_file_operations = {
void btrfs_auto_defrag_exit(void)
{
- if (btrfs_inode_defrag_cachep)
- kmem_cache_destroy(btrfs_inode_defrag_cachep);
+ kmem_cache_destroy(btrfs_inode_defrag_cachep);
}
int btrfs_auto_defrag_init(void)
diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index 8f835bfa1..5e6062c26 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -29,7 +29,7 @@
#include "inode-map.h"
#include "volumes.h"
-#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
+#define BITS_PER_BITMAP (PAGE_SIZE * 8)
#define MAX_CACHE_BYTES_PER_GIG SZ_32K
struct btrfs_trim_range {
@@ -295,7 +295,7 @@ static int readahead_cache(struct inode *inode)
return -ENOMEM;
file_ra_state_init(ra, inode->i_mapping);
- last_index = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
+ last_index = (i_size_read(inode) - 1) >> PAGE_SHIFT;
page_cache_sync_readahead(inode->i_mapping, ra, NULL, 0, last_index);
@@ -310,14 +310,14 @@ static int io_ctl_init(struct btrfs_io_ctl *io_ctl, struct inode *inode,
int num_pages;
int check_crcs = 0;
- num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
+ num_pages = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (btrfs_ino(inode) != BTRFS_FREE_INO_OBJECTID)
check_crcs = 1;
/* Make sure we can fit our crcs into the first page */
if (write && check_crcs &&
- (num_pages * sizeof(u32)) >= PAGE_CACHE_SIZE)
+ (num_pages * sizeof(u32)) >= PAGE_SIZE)
return -ENOSPC;
memset(io_ctl, 0, sizeof(struct btrfs_io_ctl));
@@ -354,9 +354,9 @@ static void io_ctl_map_page(struct btrfs_io_ctl *io_ctl, int clear)
io_ctl->page = io_ctl->pages[io_ctl->index++];
io_ctl->cur = page_address(io_ctl->page);
io_ctl->orig = io_ctl->cur;
- io_ctl->size = PAGE_CACHE_SIZE;
+ io_ctl->size = PAGE_SIZE;
if (clear)
- memset(io_ctl->cur, 0, PAGE_CACHE_SIZE);
+ memset(io_ctl->cur, 0, PAGE_SIZE);
}
static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
@@ -369,7 +369,7 @@ static void io_ctl_drop_pages(struct btrfs_io_ctl *io_ctl)
if (io_ctl->pages[i]) {
ClearPageChecked(io_ctl->pages[i]);
unlock_page(io_ctl->pages[i]);
- page_cache_release(io_ctl->pages[i]);
+ put_page(io_ctl->pages[i]);
}
}
}
@@ -475,7 +475,7 @@ static void io_ctl_set_crc(struct btrfs_io_ctl *io_ctl, int index)
offset = sizeof(u32) * io_ctl->num_pages;
crc = btrfs_csum_data(io_ctl->orig + offset, crc,
- PAGE_CACHE_SIZE - offset);
+ PAGE_SIZE - offset);
btrfs_csum_final(crc, (char *)&crc);
io_ctl_unmap_page(io_ctl);
tmp = page_address(io_ctl->pages[0]);
@@ -503,7 +503,7 @@ static int io_ctl_check_crc(struct btrfs_io_ctl *io_ctl, int index)
io_ctl_map_page(io_ctl, 0);
crc = btrfs_csum_data(io_ctl->orig + offset, crc,
- PAGE_CACHE_SIZE - offset);
+ PAGE_SIZE - offset);
btrfs_csum_final(crc, (char *)&crc);
if (val != crc) {
btrfs_err_rl(io_ctl->root->fs_info,
@@ -561,7 +561,7 @@ static int io_ctl_add_bitmap(struct btrfs_io_ctl *io_ctl, void *bitmap)
io_ctl_map_page(io_ctl, 0);
}
- memcpy(io_ctl->cur, bitmap, PAGE_CACHE_SIZE);
+ memcpy(io_ctl->cur, bitmap, PAGE_SIZE);
io_ctl_set_crc(io_ctl, io_ctl->index - 1);
if (io_ctl->index < io_ctl->num_pages)
io_ctl_map_page(io_ctl, 0);
@@ -621,7 +621,7 @@ static int io_ctl_read_bitmap(struct btrfs_io_ctl *io_ctl,
if (ret)
return ret;
- memcpy(entry->bitmap, io_ctl->cur, PAGE_CACHE_SIZE);
+ memcpy(entry->bitmap, io_ctl->cur, PAGE_SIZE);
io_ctl_unmap_page(io_ctl);
return 0;
@@ -775,7 +775,7 @@ static int __load_free_space_cache(struct btrfs_root *root, struct inode *inode,
} else {
ASSERT(num_bitmaps);
num_bitmaps--;
- e->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+ e->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
if (!e->bitmap) {
kmem_cache_free(
btrfs_free_space_cachep, e);
@@ -1660,7 +1660,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
* sure we don't go over our overall goal of MAX_CACHE_BYTES_PER_GIG as
* we add more bitmaps.
*/
- bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_CACHE_SIZE;
+ bitmap_bytes = (ctl->total_bitmaps + 1) * PAGE_SIZE;
if (bitmap_bytes >= max_bytes) {
ctl->extents_thresh = 0;
@@ -2111,7 +2111,7 @@ new_bitmap:
}
/* allocate the bitmap */
- info->bitmap = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+ info->bitmap = kzalloc(PAGE_SIZE, GFP_NOFS);
spin_lock(&ctl->tree_lock);
if (!info->bitmap) {
ret = -ENOMEM;
@@ -3580,7 +3580,7 @@ again:
}
if (!map) {
- map = kzalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+ map = kzalloc(PAGE_SIZE, GFP_NOFS);
if (!map) {
kmem_cache_free(btrfs_free_space_cachep, info);
return -ENOMEM;
diff --git a/fs/btrfs/inode-map.c b/fs/btrfs/inode-map.c
index e50316c4a..70107f7c9 100644
--- a/fs/btrfs/inode-map.c
+++ b/fs/btrfs/inode-map.c
@@ -283,7 +283,7 @@ void btrfs_unpin_free_ino(struct btrfs_root *root)
}
#define INIT_THRESHOLD ((SZ_32K / 2) / sizeof(struct btrfs_free_space))
-#define INODES_PER_BITMAP (PAGE_CACHE_SIZE * 8)
+#define INODES_PER_BITMAP (PAGE_SIZE * 8)
/*
* The goal is to keep the memory used by the free_ino tree won't
@@ -317,7 +317,7 @@ static void recalculate_thresholds(struct btrfs_free_space_ctl *ctl)
}
ctl->extents_thresh = (max_bitmaps - ctl->total_bitmaps) *
- PAGE_CACHE_SIZE / sizeof(*info);
+ PAGE_SIZE / sizeof(*info);
}
/*
@@ -481,12 +481,12 @@ again:
spin_lock(&ctl->tree_lock);
prealloc = sizeof(struct btrfs_free_space) * ctl->free_extents;
- prealloc = ALIGN(prealloc, PAGE_CACHE_SIZE);
- prealloc += ctl->total_bitmaps * PAGE_CACHE_SIZE;
+ prealloc = ALIGN(prealloc, PAGE_SIZE);
+ prealloc += ctl->total_bitmaps * PAGE_SIZE;
spin_unlock(&ctl->tree_lock);
/* Just to make sure we have enough space */
- prealloc += 8 * PAGE_CACHE_SIZE;
+ prealloc += 8 * PAGE_SIZE;
ret = btrfs_delalloc_reserve_space(inode, 0, prealloc);
if (ret)
@@ -556,6 +556,9 @@ int btrfs_find_free_objectid(struct btrfs_root *root, u64 *objectid)
mutex_lock(&root->objectid_mutex);
if (unlikely(root->highest_objectid >= BTRFS_LAST_FREE_OBJECTID)) {
+ btrfs_warn(root->fs_info,
+ "the objectid of root %llu reaches its highest value",
+ root->root_key.objectid);
ret = -ENOSPC;
goto out;
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index d96f5cf38..167fc3d49 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -194,7 +194,7 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
while (compressed_size > 0) {
cpage = compressed_pages[i];
cur_size = min_t(unsigned long, compressed_size,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
kaddr = kmap_atomic(cpage);
write_extent_buffer(leaf, kaddr, ptr, cur_size);
@@ -208,13 +208,13 @@ static int insert_inline_extent(struct btrfs_trans_handle *trans,
compress_type);
} else {
page = find_get_page(inode->i_mapping,
- start >> PAGE_CACHE_SHIFT);
+ start >> PAGE_SHIFT);
btrfs_set_file_extent_compression(leaf, ei, 0);
kaddr = kmap_atomic(page);
- offset = start & (PAGE_CACHE_SIZE - 1);
+ offset = start & (PAGE_SIZE - 1);
write_extent_buffer(leaf, kaddr + offset, ptr, size);
kunmap_atomic(kaddr);
- page_cache_release(page);
+ put_page(page);
}
btrfs_mark_buffer_dirty(leaf);
btrfs_release_path(path);
@@ -263,7 +263,7 @@ static noinline int cow_file_range_inline(struct btrfs_root *root,
data_len = compressed_size;
if (start > 0 ||
- actual_end > PAGE_CACHE_SIZE ||
+ actual_end > root->sectorsize ||
data_len > BTRFS_MAX_INLINE_DATA_SIZE(root) ||
(!compressed_size &&
(actual_end & (root->sectorsize - 1)) == 0) ||
@@ -322,7 +322,7 @@ out:
* And at reserve time, it's always aligned to page size, so
* just free one page here.
*/
- btrfs_qgroup_free_data(inode, 0, PAGE_CACHE_SIZE);
+ btrfs_qgroup_free_data(inode, 0, PAGE_SIZE);
btrfs_free_path(path);
btrfs_end_transaction(trans, root);
return ret;
@@ -435,8 +435,8 @@ static noinline void compress_file_range(struct inode *inode,
actual_end = min_t(u64, isize, end + 1);
again:
will_compress = 0;
- nr_pages = (end >> PAGE_CACHE_SHIFT) - (start >> PAGE_CACHE_SHIFT) + 1;
- nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_CACHE_SIZE);
+ nr_pages = (end >> PAGE_SHIFT) - (start >> PAGE_SHIFT) + 1;
+ nr_pages = min_t(unsigned long, nr_pages, SZ_128K / PAGE_SIZE);
/*
* we don't want to send crud past the end of i_size through
@@ -514,7 +514,7 @@ again:
if (!ret) {
unsigned long offset = total_compressed &
- (PAGE_CACHE_SIZE - 1);
+ (PAGE_SIZE - 1);
struct page *page = pages[nr_pages_ret - 1];
char *kaddr;
@@ -524,7 +524,7 @@ again:
if (offset) {
kaddr = kmap_atomic(page);
memset(kaddr + offset, 0,
- PAGE_CACHE_SIZE - offset);
+ PAGE_SIZE - offset);
kunmap_atomic(kaddr);
}
will_compress = 1;
@@ -580,7 +580,7 @@ cont:
* one last check to make sure the compression is really a
* win, compare the page count read with the blocks on disk
*/
- total_in = ALIGN(total_in, PAGE_CACHE_SIZE);
+ total_in = ALIGN(total_in, PAGE_SIZE);
if (total_compressed >= total_in) {
will_compress = 0;
} else {
@@ -594,7 +594,7 @@ cont:
*/
for (i = 0; i < nr_pages_ret; i++) {
WARN_ON(pages[i]->mapping);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
kfree(pages);
pages = NULL;
@@ -650,7 +650,7 @@ cleanup_and_bail_uncompressed:
free_pages_out:
for (i = 0; i < nr_pages_ret; i++) {
WARN_ON(pages[i]->mapping);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
kfree(pages);
}
@@ -664,7 +664,7 @@ static void free_async_extent_pages(struct async_extent *async_extent)
for (i = 0; i < async_extent->nr_pages; i++) {
WARN_ON(async_extent->pages[i]->mapping);
- page_cache_release(async_extent->pages[i]);
+ put_page(async_extent->pages[i]);
}
kfree(async_extent->pages);
async_extent->nr_pages = 0;
@@ -966,7 +966,7 @@ static noinline int cow_file_range(struct inode *inode,
PAGE_END_WRITEBACK);
*nr_written = *nr_written +
- (end - start + PAGE_CACHE_SIZE) / PAGE_CACHE_SIZE;
+ (end - start + PAGE_SIZE) / PAGE_SIZE;
*page_started = 1;
goto out;
} else if (ret < 0) {
@@ -1106,8 +1106,8 @@ static noinline void async_cow_submit(struct btrfs_work *work)
async_cow = container_of(work, struct async_cow, work);
root = async_cow->root;
- nr_pages = (async_cow->end - async_cow->start + PAGE_CACHE_SIZE) >>
- PAGE_CACHE_SHIFT;
+ nr_pages = (async_cow->end - async_cow->start + PAGE_SIZE) >>
+ PAGE_SHIFT;
/*
* atomic_sub_return implies a barrier for waitqueue_active
@@ -1164,8 +1164,8 @@ static int cow_file_range_async(struct inode *inode, struct page *locked_page,
async_cow_start, async_cow_submit,
async_cow_free);
- nr_pages = (cur_end - start + PAGE_CACHE_SIZE) >>
- PAGE_CACHE_SHIFT;
+ nr_pages = (cur_end - start + PAGE_SIZE) >>
+ PAGE_SHIFT;
atomic_add(nr_pages, &root->fs_info->async_delalloc_pages);
btrfs_queue_work(root->fs_info->delalloc_workers,
@@ -1960,7 +1960,7 @@ static noinline int add_pending_csums(struct btrfs_trans_handle *trans,
int btrfs_set_extent_delalloc(struct inode *inode, u64 start, u64 end,
struct extent_state **cached_state)
{
- WARN_ON((end & (PAGE_CACHE_SIZE - 1)) == 0);
+ WARN_ON((end & (PAGE_SIZE - 1)) == 0);
return set_extent_delalloc(&BTRFS_I(inode)->io_tree, start, end,
cached_state, GFP_NOFS);
}
@@ -1993,7 +1993,7 @@ again:
inode = page->mapping->host;
page_start = page_offset(page);
- page_end = page_offset(page) + PAGE_CACHE_SIZE - 1;
+ page_end = page_offset(page) + PAGE_SIZE - 1;
lock_extent_bits(&BTRFS_I(inode)->io_tree, page_start, page_end,
&cached_state);
@@ -2002,7 +2002,8 @@ again:
if (PagePrivate2(page))
goto out;
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ ordered = btrfs_lookup_ordered_range(inode, page_start,
+ PAGE_SIZE);
if (ordered) {
unlock_extent_cached(&BTRFS_I(inode)->io_tree, page_start,
page_end, &cached_state, GFP_NOFS);
@@ -2013,7 +2014,7 @@ again:
}
ret = btrfs_delalloc_reserve_space(inode, page_start,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
if (ret) {
mapping_set_error(page->mapping, ret);
end_extent_writepage(page, ret, page_start, page_end);
@@ -2029,7 +2030,7 @@ out:
&cached_state, GFP_NOFS);
out_page:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
kfree(fixup);
}
@@ -2062,7 +2063,7 @@ static int btrfs_writepage_start_hook(struct page *page, u64 start, u64 end)
return -EAGAIN;
SetPageChecked(page);
- page_cache_get(page);
+ get_page(page);
btrfs_init_work(&fixup->work, btrfs_fixup_helper,
btrfs_writepage_fixup_worker, NULL, NULL);
fixup->page = page;
@@ -4013,7 +4014,8 @@ err:
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
inode_inc_iversion(inode);
inode_inc_iversion(dir);
- inode->i_ctime = dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ inode->i_ctime = dir->i_mtime =
+ dir->i_ctime = current_fs_time(inode->i_sb);
ret = btrfs_update_inode(trans, root, dir);
out:
return ret;
@@ -4156,7 +4158,7 @@ int btrfs_unlink_subvol(struct btrfs_trans_handle *trans,
btrfs_i_size_write(dir, dir->i_size - name_len * 2);
inode_inc_iversion(dir);
- dir->i_mtime = dir->i_ctime = CURRENT_TIME;
+ dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
ret = btrfs_update_inode_fallback(trans, root, dir);
if (ret)
btrfs_abort_transaction(trans, root, ret);
@@ -4211,11 +4213,20 @@ static int truncate_space_check(struct btrfs_trans_handle *trans,
{
int ret;
+ /*
+ * This is only used to apply pressure to the enospc system, we don't
+ * intend to use this reservation at all.
+ */
bytes_deleted = btrfs_csum_bytes_to_leaves(root, bytes_deleted);
+ bytes_deleted *= root->nodesize;
ret = btrfs_block_rsv_add(root, &root->fs_info->trans_block_rsv,
bytes_deleted, BTRFS_RESERVE_NO_FLUSH);
- if (!ret)
+ if (!ret) {
+ trace_btrfs_space_reservation(root->fs_info, "transaction",
+ trans->transid,
+ bytes_deleted, 1);
trans->bytes_reserved += bytes_deleted;
+ }
return ret;
}
@@ -4236,7 +4247,7 @@ static int truncate_inline_extent(struct inode *inode,
if (btrfs_file_extent_compression(leaf, fi) != BTRFS_COMPRESS_NONE) {
loff_t offset = new_size;
- loff_t page_end = ALIGN(offset, PAGE_CACHE_SIZE);
+ loff_t page_end = ALIGN(offset, PAGE_SIZE);
/*
* Zero out the remaining of the last page of our inline extent,
@@ -4248,7 +4259,8 @@ static int truncate_inline_extent(struct inode *inode,
* read the extent item from disk (data not in the page cache).
*/
btrfs_release_path(path);
- return btrfs_truncate_page(inode, offset, page_end - offset, 0);
+ return btrfs_truncate_block(inode, offset, page_end - offset,
+ 0);
}
btrfs_set_file_extent_ram_bytes(leaf, fi, size);
@@ -4601,17 +4613,17 @@ error:
}
/*
- * btrfs_truncate_page - read, zero a chunk and write a page
+ * btrfs_truncate_block - read, zero a chunk and write a block
* @inode - inode that we're zeroing
* @from - the offset to start zeroing
* @len - the length to zero, 0 to zero the entire range respective to the
* offset
* @front - zero up to the offset instead of from the offset on
*
- * This will find the page for the "from" offset and cow the page and zero the
+ * This will find the block for the "from" offset and cow the block and zero the
* part we want to zero. This is used with truncate and hole punching.
*/
-int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
+int btrfs_truncate_block(struct inode *inode, loff_t from, loff_t len,
int front)
{
struct address_space *mapping = inode->i_mapping;
@@ -4621,19 +4633,20 @@ int btrfs_truncate_page(struct inode *inode, loff_t from, loff_t len,
struct extent_state *cached_state = NULL;
char *kaddr;
u32 blocksize = root->sectorsize;
- pgoff_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ pgoff_t index = from >> PAGE_SHIFT;
+ unsigned offset = from & (blocksize - 1);
struct page *page;
gfp_t mask = btrfs_alloc_write_mask(mapping);
int ret = 0;
- u64 page_start;
- u64 page_end;
+ u64 block_start;
+ u64 block_end;
if ((offset & (blocksize - 1)) == 0 &&
(!len || ((len & (blocksize - 1)) == 0)))
goto out;
+
ret = btrfs_delalloc_reserve_space(inode,
- round_down(from, PAGE_CACHE_SIZE), PAGE_CACHE_SIZE);
+ round_down(from, blocksize), blocksize);
if (ret)
goto out;
@@ -4641,21 +4654,21 @@ again:
page = find_or_create_page(mapping, index, mask);
if (!page) {
btrfs_delalloc_release_space(inode,
- round_down(from, PAGE_CACHE_SIZE),
- PAGE_CACHE_SIZE);
+ round_down(from, blocksize),
+ blocksize);
ret = -ENOMEM;
goto out;
}
- page_start = page_offset(page);
- page_end = page_start + PAGE_CACHE_SIZE - 1;
+ block_start = round_down(from, blocksize);
+ block_end = block_start + blocksize - 1;
if (!PageUptodate(page)) {
ret = btrfs_readpage(NULL, page);
lock_page(page);
if (page->mapping != mapping) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
goto again;
}
if (!PageUptodate(page)) {
@@ -4665,55 +4678,57 @@ again:
}
wait_on_page_writeback(page);
- lock_extent_bits(io_tree, page_start, page_end, &cached_state);
+ lock_extent_bits(io_tree, block_start, block_end, &cached_state);
set_page_extent_mapped(page);
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ ordered = btrfs_lookup_ordered_extent(inode, block_start);
if (ordered) {
- unlock_extent_cached(io_tree, page_start, page_end,
+ unlock_extent_cached(io_tree, block_start, block_end,
&cached_state, GFP_NOFS);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
btrfs_start_ordered_extent(inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
goto again;
}
- clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, block_start, block_end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
- ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+ ret = btrfs_set_extent_delalloc(inode, block_start, block_end,
&cached_state);
if (ret) {
- unlock_extent_cached(io_tree, page_start, page_end,
+ unlock_extent_cached(io_tree, block_start, block_end,
&cached_state, GFP_NOFS);
goto out_unlock;
}
- if (offset != PAGE_CACHE_SIZE) {
+ if (offset != blocksize) {
if (!len)
- len = PAGE_CACHE_SIZE - offset;
+ len = blocksize - offset;
kaddr = kmap(page);
if (front)
- memset(kaddr, 0, offset);
+ memset(kaddr + (block_start - page_offset(page)),
+ 0, offset);
else
- memset(kaddr + offset, 0, len);
+ memset(kaddr + (block_start - page_offset(page)) + offset,
+ 0, len);
flush_dcache_page(page);
kunmap(page);
}
ClearPageChecked(page);
set_page_dirty(page);
- unlock_extent_cached(io_tree, page_start, page_end, &cached_state,
+ unlock_extent_cached(io_tree, block_start, block_end, &cached_state,
GFP_NOFS);
out_unlock:
if (ret)
- btrfs_delalloc_release_space(inode, page_start,
- PAGE_CACHE_SIZE);
+ btrfs_delalloc_release_space(inode, block_start,
+ blocksize);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
out:
return ret;
}
@@ -4782,11 +4797,11 @@ int btrfs_cont_expand(struct inode *inode, loff_t oldsize, loff_t size)
int err = 0;
/*
- * If our size started in the middle of a page we need to zero out the
- * rest of the page before we expand the i_size, otherwise we could
+ * If our size started in the middle of a block we need to zero out the
+ * rest of the block before we expand the i_size, otherwise we could
* expose stale data.
*/
- err = btrfs_truncate_page(inode, oldsize, 0, 0);
+ err = btrfs_truncate_block(inode, oldsize, 0, 0);
if (err)
return err;
@@ -4895,7 +4910,6 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
}
if (newsize > oldsize) {
- truncate_pagecache(inode, newsize);
/*
* Don't do an expanding truncate while snapshoting is ongoing.
* This is to ensure the snapshot captures a fully consistent
@@ -4918,6 +4932,7 @@ static int btrfs_setsize(struct inode *inode, struct iattr *attr)
i_size_write(inode, newsize);
btrfs_ordered_update_i_size(inode, i_size_read(inode), NULL);
+ pagecache_isize_extended(inode, oldsize, newsize);
ret = btrfs_update_inode(trans, root, inode);
btrfs_end_write_no_snapshoting(root);
btrfs_end_transaction(trans, root);
@@ -5588,7 +5603,7 @@ static struct inode *new_simple_dir(struct super_block *s,
inode->i_op = &btrfs_dir_ro_inode_operations;
inode->i_fop = &simple_dir_operations;
inode->i_mode = S_IFDIR | S_IRUGO | S_IWUSR | S_IXUGO;
- inode->i_mtime = CURRENT_TIME;
+ inode->i_mtime = current_fs_time(inode->i_sb);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
@@ -5790,7 +5805,7 @@ static int btrfs_real_readdir(struct file *file, struct dir_context *ctx)
if (name_len <= sizeof(tmp_name)) {
name_ptr = tmp_name;
} else {
- name_ptr = kmalloc(name_len, GFP_NOFS);
+ name_ptr = kmalloc(name_len, GFP_KERNEL);
if (!name_ptr) {
ret = -ENOMEM;
goto err;
@@ -6172,7 +6187,7 @@ static struct inode *btrfs_new_inode(struct btrfs_trans_handle *trans,
inode_init_owner(inode, dir, mode);
inode_set_bytes(inode, 0);
- inode->i_mtime = CURRENT_TIME;
+ inode->i_mtime = current_fs_time(inode->i_sb);
inode->i_atime = inode->i_mtime;
inode->i_ctime = inode->i_mtime;
BTRFS_I(inode)->i_otime = inode->i_mtime;
@@ -6285,7 +6300,8 @@ int btrfs_add_link(struct btrfs_trans_handle *trans,
btrfs_i_size_write(parent_inode, parent_inode->i_size +
name_len * 2);
inode_inc_iversion(parent_inode);
- parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
+ parent_inode->i_mtime = parent_inode->i_ctime =
+ current_fs_time(parent_inode->i_sb);
ret = btrfs_update_inode(trans, root, parent_inode);
if (ret)
btrfs_abort_transaction(trans, root, ret);
@@ -6503,7 +6519,7 @@ static int btrfs_link(struct dentry *old_dentry, struct inode *dir,
BTRFS_I(inode)->dir_index = 0ULL;
inc_nlink(inode);
inode_inc_iversion(inode);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
ihold(inode);
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
@@ -6701,7 +6717,7 @@ static noinline int uncompress_inline(struct btrfs_path *path,
read_extent_buffer(leaf, tmp, ptr, inline_size);
- max_size = min_t(unsigned long, PAGE_CACHE_SIZE, max_size);
+ max_size = min_t(unsigned long, PAGE_SIZE, max_size);
ret = btrfs_decompress(compress_type, tmp, page,
extent_offset, inline_size, max_size);
kfree(tmp);
@@ -6863,8 +6879,8 @@ next:
size = btrfs_file_extent_inline_len(leaf, path->slots[0], item);
extent_offset = page_offset(page) + pg_offset - extent_start;
- copy_size = min_t(u64, PAGE_CACHE_SIZE - pg_offset,
- size - extent_offset);
+ copy_size = min_t(u64, PAGE_SIZE - pg_offset,
+ size - extent_offset);
em->start = extent_start + extent_offset;
em->len = ALIGN(copy_size, root->sectorsize);
em->orig_block_len = em->len;
@@ -6883,9 +6899,9 @@ next:
map = kmap(page);
read_extent_buffer(leaf, map + pg_offset, ptr,
copy_size);
- if (pg_offset + copy_size < PAGE_CACHE_SIZE) {
+ if (pg_offset + copy_size < PAGE_SIZE) {
memset(map + pg_offset + copy_size, 0,
- PAGE_CACHE_SIZE - pg_offset -
+ PAGE_SIZE - pg_offset -
copy_size);
}
kunmap(page);
@@ -7320,12 +7336,12 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
int start_idx;
int end_idx;
- start_idx = start >> PAGE_CACHE_SHIFT;
+ start_idx = start >> PAGE_SHIFT;
/*
* end is the last byte in the last page. end == start is legal
*/
- end_idx = end >> PAGE_CACHE_SHIFT;
+ end_idx = end >> PAGE_SHIFT;
rcu_read_lock();
@@ -7366,7 +7382,7 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
* include/linux/pagemap.h for details.
*/
if (unlikely(page != *pagep)) {
- page_cache_release(page);
+ put_page(page);
page = NULL;
}
}
@@ -7374,7 +7390,7 @@ bool btrfs_page_exists_in_range(struct inode *inode, loff_t start, loff_t end)
if (page) {
if (page->index <= end_idx)
found = true;
- page_cache_release(page);
+ put_page(page);
}
rcu_read_unlock();
@@ -7414,7 +7430,26 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
cached_state, GFP_NOFS);
if (ordered) {
- btrfs_start_ordered_extent(inode, ordered, 1);
+ /*
+ * If we are doing a DIO read and the ordered extent we
+ * found is for a buffered write, we can not wait for it
+ * to complete and retry, because if we do so we can
+ * deadlock with concurrent buffered writes on page
+ * locks. This happens only if our DIO read covers more
+ * than one extent map, if at this point has already
+ * created an ordered extent for a previous extent map
+ * and locked its range in the inode's io tree, and a
+ * concurrent write against that previous extent map's
+ * range and this range started (we unlock the ranges
+ * in the io tree only when the bios complete and
+ * buffered writes always lock pages before attempting
+ * to lock range in the io tree).
+ */
+ if (writing ||
+ test_bit(BTRFS_ORDERED_DIRECT, &ordered->flags))
+ btrfs_start_ordered_extent(inode, ordered, 1);
+ else
+ ret = -ENOTBLK;
btrfs_put_ordered_extent(ordered);
} else {
/*
@@ -7431,9 +7466,11 @@ static int lock_extent_direct(struct inode *inode, u64 lockstart, u64 lockend,
* that page.
*/
ret = -ENOTBLK;
- break;
}
+ if (ret)
+ break;
+
cond_resched();
}
@@ -7764,9 +7801,9 @@ static int btrfs_check_dio_repairable(struct inode *inode,
}
static int dio_read_error(struct inode *inode, struct bio *failed_bio,
- struct page *page, u64 start, u64 end,
- int failed_mirror, bio_end_io_t *repair_endio,
- void *repair_arg)
+ struct page *page, unsigned int pgoff,
+ u64 start, u64 end, int failed_mirror,
+ bio_end_io_t *repair_endio, void *repair_arg)
{
struct io_failure_record *failrec;
struct bio *bio;
@@ -7787,7 +7824,9 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
return -EIO;
}
- if (failed_bio->bi_vcnt > 1)
+ if ((failed_bio->bi_vcnt > 1)
+ || (failed_bio->bi_io_vec->bv_len
+ > BTRFS_I(inode)->root->sectorsize))
read_mode = READ_SYNC | REQ_FAILFAST_DEV;
else
read_mode = READ_SYNC;
@@ -7795,7 +7834,7 @@ static int dio_read_error(struct inode *inode, struct bio *failed_bio,
isector = start - btrfs_io_bio(failed_bio)->logical;
isector >>= inode->i_sb->s_blocksize_bits;
bio = btrfs_create_repair_bio(inode, failed_bio, failrec, page,
- 0, isector, repair_endio, repair_arg);
+ pgoff, isector, repair_endio, repair_arg);
if (!bio) {
free_io_failure(inode, failrec);
return -EIO;
@@ -7825,12 +7864,17 @@ struct btrfs_retry_complete {
static void btrfs_retry_endio_nocsum(struct bio *bio)
{
struct btrfs_retry_complete *done = bio->bi_private;
+ struct inode *inode;
struct bio_vec *bvec;
int i;
if (bio->bi_error)
goto end;
+ ASSERT(bio->bi_vcnt == 1);
+ inode = bio->bi_io_vec->bv_page->mapping->host;
+ ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
+
done->uptodate = 1;
bio_for_each_segment_all(bvec, bio, i)
clean_io_failure(done->inode, done->start, bvec->bv_page, 0);
@@ -7842,25 +7886,35 @@ end:
static int __btrfs_correct_data_nocsum(struct inode *inode,
struct btrfs_io_bio *io_bio)
{
+ struct btrfs_fs_info *fs_info;
struct bio_vec *bvec;
struct btrfs_retry_complete done;
u64 start;
+ unsigned int pgoff;
+ u32 sectorsize;
+ int nr_sectors;
int i;
int ret;
+ fs_info = BTRFS_I(inode)->root->fs_info;
+ sectorsize = BTRFS_I(inode)->root->sectorsize;
+
start = io_bio->logical;
done.inode = inode;
bio_for_each_segment_all(bvec, &io_bio->bio, i) {
-try_again:
+ nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+ pgoff = bvec->bv_offset;
+
+next_block_or_try_again:
done.uptodate = 0;
done.start = start;
init_completion(&done.done);
- ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
- start + bvec->bv_len - 1,
- io_bio->mirror_num,
- btrfs_retry_endio_nocsum, &done);
+ ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+ pgoff, start, start + sectorsize - 1,
+ io_bio->mirror_num,
+ btrfs_retry_endio_nocsum, &done);
if (ret)
return ret;
@@ -7868,10 +7922,15 @@ try_again:
if (!done.uptodate) {
/* We might have another mirror, so try again */
- goto try_again;
+ goto next_block_or_try_again;
}
- start += bvec->bv_len;
+ start += sectorsize;
+
+ if (nr_sectors--) {
+ pgoff += sectorsize;
+ goto next_block_or_try_again;
+ }
}
return 0;
@@ -7881,7 +7940,9 @@ static void btrfs_retry_endio(struct bio *bio)
{
struct btrfs_retry_complete *done = bio->bi_private;
struct btrfs_io_bio *io_bio = btrfs_io_bio(bio);
+ struct inode *inode;
struct bio_vec *bvec;
+ u64 start;
int uptodate;
int ret;
int i;
@@ -7890,13 +7951,20 @@ static void btrfs_retry_endio(struct bio *bio)
goto end;
uptodate = 1;
+
+ start = done->start;
+
+ ASSERT(bio->bi_vcnt == 1);
+ inode = bio->bi_io_vec->bv_page->mapping->host;
+ ASSERT(bio->bi_io_vec->bv_len == BTRFS_I(inode)->root->sectorsize);
+
bio_for_each_segment_all(bvec, bio, i) {
ret = __readpage_endio_check(done->inode, io_bio, i,
- bvec->bv_page, 0,
- done->start, bvec->bv_len);
+ bvec->bv_page, bvec->bv_offset,
+ done->start, bvec->bv_len);
if (!ret)
clean_io_failure(done->inode, done->start,
- bvec->bv_page, 0);
+ bvec->bv_page, bvec->bv_offset);
else
uptodate = 0;
}
@@ -7910,20 +7978,34 @@ end:
static int __btrfs_subio_endio_read(struct inode *inode,
struct btrfs_io_bio *io_bio, int err)
{
+ struct btrfs_fs_info *fs_info;
struct bio_vec *bvec;
struct btrfs_retry_complete done;
u64 start;
u64 offset = 0;
+ u32 sectorsize;
+ int nr_sectors;
+ unsigned int pgoff;
+ int csum_pos;
int i;
int ret;
+ fs_info = BTRFS_I(inode)->root->fs_info;
+ sectorsize = BTRFS_I(inode)->root->sectorsize;
+
err = 0;
start = io_bio->logical;
done.inode = inode;
bio_for_each_segment_all(bvec, &io_bio->bio, i) {
- ret = __readpage_endio_check(inode, io_bio, i, bvec->bv_page,
- 0, start, bvec->bv_len);
+ nr_sectors = BTRFS_BYTES_TO_BLKS(fs_info, bvec->bv_len);
+
+ pgoff = bvec->bv_offset;
+next_block:
+ csum_pos = BTRFS_BYTES_TO_BLKS(fs_info, offset);
+ ret = __readpage_endio_check(inode, io_bio, csum_pos,
+ bvec->bv_page, pgoff, start,
+ sectorsize);
if (likely(!ret))
goto next;
try_again:
@@ -7931,10 +8013,10 @@ try_again:
done.start = start;
init_completion(&done.done);
- ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page, start,
- start + bvec->bv_len - 1,
- io_bio->mirror_num,
- btrfs_retry_endio, &done);
+ ret = dio_read_error(inode, &io_bio->bio, bvec->bv_page,
+ pgoff, start, start + sectorsize - 1,
+ io_bio->mirror_num,
+ btrfs_retry_endio, &done);
if (ret) {
err = ret;
goto next;
@@ -7947,8 +8029,15 @@ try_again:
goto try_again;
}
next:
- offset += bvec->bv_len;
- start += bvec->bv_len;
+ offset += sectorsize;
+ start += sectorsize;
+
+ ASSERT(nr_sectors);
+
+ if (--nr_sectors) {
+ pgoff += sectorsize;
+ goto next_block;
+ }
}
return err;
@@ -8202,9 +8291,11 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
u64 file_offset = dip->logical_offset;
u64 submit_len = 0;
u64 map_length;
- int nr_pages = 0;
- int ret;
+ u32 blocksize = root->sectorsize;
int async_submit = 0;
+ int nr_sectors;
+ int ret;
+ int i;
map_length = orig_bio->bi_iter.bi_size;
ret = btrfs_map_block(root->fs_info, rw, start_sector << 9,
@@ -8234,9 +8325,12 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
atomic_inc(&dip->pending_bios);
while (bvec <= (orig_bio->bi_io_vec + orig_bio->bi_vcnt - 1)) {
- if (map_length < submit_len + bvec->bv_len ||
- bio_add_page(bio, bvec->bv_page, bvec->bv_len,
- bvec->bv_offset) < bvec->bv_len) {
+ nr_sectors = BTRFS_BYTES_TO_BLKS(root->fs_info, bvec->bv_len);
+ i = 0;
+next_block:
+ if (unlikely(map_length < submit_len + blocksize ||
+ bio_add_page(bio, bvec->bv_page, blocksize,
+ bvec->bv_offset + (i * blocksize)) < blocksize)) {
/*
* inc the count before we submit the bio so
* we know the end IO handler won't happen before
@@ -8257,7 +8351,6 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
file_offset += submit_len;
submit_len = 0;
- nr_pages = 0;
bio = btrfs_dio_bio_alloc(orig_bio->bi_bdev,
start_sector, GFP_NOFS);
@@ -8275,9 +8368,14 @@ static int btrfs_submit_direct_hook(int rw, struct btrfs_dio_private *dip,
bio_put(bio);
goto out_err;
}
+
+ goto next_block;
} else {
- submit_len += bvec->bv_len;
- nr_pages++;
+ submit_len += blocksize;
+ if (--nr_sectors) {
+ i++;
+ goto next_block;
+ }
bvec++;
}
}
@@ -8621,7 +8719,7 @@ static int __btrfs_releasepage(struct page *page, gfp_t gfp_flags)
if (ret == 1) {
ClearPagePrivate(page);
set_page_private(page, 0);
- page_cache_release(page);
+ put_page(page);
}
return ret;
}
@@ -8641,7 +8739,9 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
struct btrfs_ordered_extent *ordered;
struct extent_state *cached_state = NULL;
u64 page_start = page_offset(page);
- u64 page_end = page_start + PAGE_CACHE_SIZE - 1;
+ u64 page_end = page_start + PAGE_SIZE - 1;
+ u64 start;
+ u64 end;
int inode_evicting = inode->i_state & I_FREEING;
/*
@@ -8661,14 +8761,18 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
if (!inode_evicting)
lock_extent_bits(tree, page_start, page_end, &cached_state);
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
+again:
+ start = page_start;
+ ordered = btrfs_lookup_ordered_range(inode, start,
+ page_end - start + 1);
if (ordered) {
+ end = min(page_end, ordered->file_offset + ordered->len - 1);
/*
* IO on this page will never be started, so we need
* to account for any ordered extents now
*/
if (!inode_evicting)
- clear_extent_bit(tree, page_start, page_end,
+ clear_extent_bit(tree, start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_LOCKED | EXTENT_DO_ACCOUNTING |
EXTENT_DEFRAG, 1, 0, &cached_state,
@@ -8685,22 +8789,26 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
spin_lock_irq(&tree->lock);
set_bit(BTRFS_ORDERED_TRUNCATED, &ordered->flags);
- new_len = page_start - ordered->file_offset;
+ new_len = start - ordered->file_offset;
if (new_len < ordered->truncated_len)
ordered->truncated_len = new_len;
spin_unlock_irq(&tree->lock);
if (btrfs_dec_test_ordered_pending(inode, &ordered,
- page_start,
- PAGE_CACHE_SIZE, 1))
+ start,
+ end - start + 1, 1))
btrfs_finish_ordered_io(ordered);
}
btrfs_put_ordered_extent(ordered);
if (!inode_evicting) {
cached_state = NULL;
- lock_extent_bits(tree, page_start, page_end,
+ lock_extent_bits(tree, start, end,
&cached_state);
}
+
+ start = end + 1;
+ if (start < page_end)
+ goto again;
}
/*
@@ -8714,7 +8822,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
* 2) Not written to disk
* This means the reserved space should be freed here.
*/
- btrfs_qgroup_free_data(inode, page_start, PAGE_CACHE_SIZE);
+ btrfs_qgroup_free_data(inode, page_start, PAGE_SIZE);
if (!inode_evicting) {
clear_extent_bit(tree, page_start, page_end,
EXTENT_LOCKED | EXTENT_DIRTY |
@@ -8729,7 +8837,7 @@ static void btrfs_invalidatepage(struct page *page, unsigned int offset,
if (PagePrivate(page)) {
ClearPagePrivate(page);
set_page_private(page, 0);
- page_cache_release(page);
+ put_page(page);
}
}
@@ -8761,15 +8869,28 @@ int btrfs_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
loff_t size;
int ret;
int reserved = 0;
+ u64 reserved_space;
u64 page_start;
u64 page_end;
+ u64 end;
+
+ reserved_space = PAGE_SIZE;
sb_start_pagefault(inode->i_sb);
page_start = page_offset(page);
- page_end = page_start + PAGE_CACHE_SIZE - 1;
+ page_end = page_start + PAGE_SIZE - 1;
+ end = page_end;
+ /*
+ * Reserving delalloc space after obtaining the page lock can lead to
+ * deadlock. For example, if a dirty page is locked by this function
+ * and the call to btrfs_delalloc_reserve_space() ends up triggering
+ * dirty page write out, then the btrfs_writepage() function could
+ * end up waiting indefinitely to get a lock on the page currently
+ * being processed by btrfs_page_mkwrite() function.
+ */
ret = btrfs_delalloc_reserve_space(inode, page_start,
- PAGE_CACHE_SIZE);
+ reserved_space);
if (!ret) {
ret = file_update_time(vma->vm_file);
reserved = 1;
@@ -8803,7 +8924,7 @@ again:
* we can't set the delalloc bits if there are pending ordered
* extents. Drop our locks and wait for them to finish
*/
- ordered = btrfs_lookup_ordered_extent(inode, page_start);
+ ordered = btrfs_lookup_ordered_range(inode, page_start, page_end);
if (ordered) {
unlock_extent_cached(io_tree, page_start, page_end,
&cached_state, GFP_NOFS);
@@ -8813,6 +8934,18 @@ again:
goto again;
}
+ if (page->index == ((size - 1) >> PAGE_SHIFT)) {
+ reserved_space = round_up(size - page_start, root->sectorsize);
+ if (reserved_space < PAGE_SIZE) {
+ end = page_start + reserved_space - 1;
+ spin_lock(&BTRFS_I(inode)->lock);
+ BTRFS_I(inode)->outstanding_extents++;
+ spin_unlock(&BTRFS_I(inode)->lock);
+ btrfs_delalloc_release_space(inode, page_start,
+ PAGE_SIZE - reserved_space);
+ }
+ }
+
/*
* XXX - page_mkwrite gets called every time the page is dirtied, even
* if it was already dirty, so for space accounting reasons we need to
@@ -8820,12 +8953,12 @@ again:
* is probably a better way to do this, but for now keep consistent with
* prepare_pages in the normal write path.
*/
- clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, page_end,
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
EXTENT_DIRTY | EXTENT_DELALLOC |
EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
0, 0, &cached_state, GFP_NOFS);
- ret = btrfs_set_extent_delalloc(inode, page_start, page_end,
+ ret = btrfs_set_extent_delalloc(inode, page_start, end,
&cached_state);
if (ret) {
unlock_extent_cached(io_tree, page_start, page_end,
@@ -8836,14 +8969,14 @@ again:
ret = 0;
/* page is wholly or partially inside EOF */
- if (page_start + PAGE_CACHE_SIZE > size)
- zero_start = size & ~PAGE_CACHE_MASK;
+ if (page_start + PAGE_SIZE > size)
+ zero_start = size & ~PAGE_MASK;
else
- zero_start = PAGE_CACHE_SIZE;
+ zero_start = PAGE_SIZE;
- if (zero_start != PAGE_CACHE_SIZE) {
+ if (zero_start != PAGE_SIZE) {
kaddr = kmap(page);
- memset(kaddr + zero_start, 0, PAGE_CACHE_SIZE - zero_start);
+ memset(kaddr + zero_start, 0, PAGE_SIZE - zero_start);
flush_dcache_page(page);
kunmap(page);
}
@@ -8864,7 +8997,7 @@ out_unlock:
}
unlock_page(page);
out:
- btrfs_delalloc_release_space(inode, page_start, PAGE_CACHE_SIZE);
+ btrfs_delalloc_release_space(inode, page_start, reserved_space);
out_noreserve:
sb_end_pagefault(inode->i_sb);
return ret;
@@ -9190,16 +9323,11 @@ void btrfs_destroy_cachep(void)
* destroy cache.
*/
rcu_barrier();
- if (btrfs_inode_cachep)
- kmem_cache_destroy(btrfs_inode_cachep);
- if (btrfs_trans_handle_cachep)
- kmem_cache_destroy(btrfs_trans_handle_cachep);
- if (btrfs_transaction_cachep)
- kmem_cache_destroy(btrfs_transaction_cachep);
- if (btrfs_path_cachep)
- kmem_cache_destroy(btrfs_path_cachep);
- if (btrfs_free_space_cachep)
- kmem_cache_destroy(btrfs_free_space_cachep);
+ kmem_cache_destroy(btrfs_inode_cachep);
+ kmem_cache_destroy(btrfs_trans_handle_cachep);
+ kmem_cache_destroy(btrfs_transaction_cachep);
+ kmem_cache_destroy(btrfs_path_cachep);
+ kmem_cache_destroy(btrfs_free_space_cachep);
}
int btrfs_init_cachep(void)
@@ -9250,7 +9378,6 @@ static int btrfs_getattr(struct vfsmount *mnt,
generic_fillattr(inode, stat);
stat->dev = BTRFS_I(inode)->root->anon_dev;
- stat->blksize = PAGE_CACHE_SIZE;
spin_lock(&BTRFS_I(inode)->lock);
delalloc_bytes = BTRFS_I(inode)->delalloc_bytes;
@@ -9268,7 +9395,6 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct btrfs_root *dest = BTRFS_I(new_dir)->root;
struct inode *new_inode = d_inode(new_dentry);
struct inode *old_inode = d_inode(old_dentry);
- struct timespec ctime = CURRENT_TIME;
u64 index = 0;
u64 root_objectid;
int ret;
@@ -9365,9 +9491,9 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
inode_inc_iversion(old_dir);
inode_inc_iversion(new_dir);
inode_inc_iversion(old_inode);
- old_dir->i_ctime = old_dir->i_mtime = ctime;
- new_dir->i_ctime = new_dir->i_mtime = ctime;
- old_inode->i_ctime = ctime;
+ old_dir->i_ctime = old_dir->i_mtime =
+ new_dir->i_ctime = new_dir->i_mtime =
+ old_inode->i_ctime = current_fs_time(old_dir->i_sb);
if (old_dentry->d_parent != new_dentry->d_parent)
btrfs_record_unlink_dir(trans, old_dir, old_inode, 1);
@@ -9392,7 +9518,7 @@ static int btrfs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (new_inode) {
inode_inc_iversion(new_inode);
- new_inode->i_ctime = CURRENT_TIME;
+ new_inode->i_ctime = current_fs_time(new_inode->i_sb);
if (unlikely(btrfs_ino(new_inode) ==
BTRFS_EMPTY_SUBVOL_DIR_OBJECTID)) {
root_objectid = BTRFS_I(new_inode)->location.objectid;
@@ -9870,7 +9996,7 @@ next:
*alloc_hint = ins.objectid + ins.offset;
inode_inc_iversion(inode);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
BTRFS_I(inode)->flags |= BTRFS_INODE_PREALLOC;
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
(actual_len > inode->i_size) &&
@@ -10058,7 +10184,7 @@ static const struct file_operations btrfs_dir_file_operations = {
.iterate = btrfs_real_readdir,
.unlocked_ioctl = btrfs_ioctl,
#ifdef CONFIG_COMPAT
- .compat_ioctl = btrfs_ioctl,
+ .compat_ioctl = btrfs_compat_ioctl,
#endif
.release = btrfs_release_file,
.fsync = btrfs_sync_file,
diff --git a/fs/btrfs/ioctl.c b/fs/btrfs/ioctl.c
index 48aee9846..f545f81f6 100644
--- a/fs/btrfs/ioctl.c
+++ b/fs/btrfs/ioctl.c
@@ -59,6 +59,8 @@
#include "props.h"
#include "sysfs.h"
#include "qgroup.h"
+#include "tree-log.h"
+#include "compression.h"
#ifdef CONFIG_64BIT
/* If we have a 32-bit userspace and 64-bit kernel, then the UAPI
@@ -347,7 +349,7 @@ static int btrfs_ioctl_setflags(struct file *file, void __user *arg)
btrfs_update_iflags(inode);
inode_inc_iversion(inode);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
ret = btrfs_update_inode(trans, root, inode);
btrfs_end_transaction(trans, root);
@@ -443,7 +445,7 @@ static noinline int create_subvol(struct inode *dir,
struct btrfs_root *root = BTRFS_I(dir)->root;
struct btrfs_root *new_root;
struct btrfs_block_rsv block_rsv;
- struct timespec cur_time = CURRENT_TIME;
+ struct timespec cur_time = current_fs_time(dir->i_sb);
struct inode *inode;
int ret;
int err;
@@ -844,10 +846,6 @@ static noinline int btrfs_mksubvol(struct path *parent,
if (IS_ERR(dentry))
goto out_unlock;
- error = -EEXIST;
- if (d_really_is_positive(dentry))
- goto out_dput;
-
error = btrfs_may_create(dir, dentry);
if (error)
goto out_dput;
@@ -900,7 +898,7 @@ static int check_defrag_in_cache(struct inode *inode, u64 offset, u32 thresh)
u64 end;
read_lock(&em_tree->lock);
- em = lookup_extent_mapping(em_tree, offset, PAGE_CACHE_SIZE);
+ em = lookup_extent_mapping(em_tree, offset, PAGE_SIZE);
read_unlock(&em_tree->lock);
if (em) {
@@ -990,7 +988,7 @@ static struct extent_map *defrag_lookup_extent(struct inode *inode, u64 start)
struct extent_map_tree *em_tree = &BTRFS_I(inode)->extent_tree;
struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
struct extent_map *em;
- u64 len = PAGE_CACHE_SIZE;
+ u64 len = PAGE_SIZE;
/*
* hopefully we have this extent in the tree already, try without
@@ -1126,15 +1124,15 @@ static int cluster_pages_for_defrag(struct inode *inode,
struct extent_io_tree *tree;
gfp_t mask = btrfs_alloc_write_mask(inode->i_mapping);
- file_end = (isize - 1) >> PAGE_CACHE_SHIFT;
+ file_end = (isize - 1) >> PAGE_SHIFT;
if (!isize || start_index > file_end)
return 0;
page_cnt = min_t(u64, (u64)num_pages, (u64)file_end - start_index + 1);
ret = btrfs_delalloc_reserve_space(inode,
- start_index << PAGE_CACHE_SHIFT,
- page_cnt << PAGE_CACHE_SHIFT);
+ start_index << PAGE_SHIFT,
+ page_cnt << PAGE_SHIFT);
if (ret)
return ret;
i_done = 0;
@@ -1150,7 +1148,7 @@ again:
break;
page_start = page_offset(page);
- page_end = page_start + PAGE_CACHE_SIZE - 1;
+ page_end = page_start + PAGE_SIZE - 1;
while (1) {
lock_extent_bits(tree, page_start, page_end,
&cached_state);
@@ -1171,7 +1169,7 @@ again:
*/
if (page->mapping != inode->i_mapping) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
goto again;
}
}
@@ -1181,7 +1179,7 @@ again:
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
ret = -EIO;
break;
}
@@ -1189,7 +1187,7 @@ again:
if (page->mapping != inode->i_mapping) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
goto again;
}
@@ -1210,7 +1208,7 @@ again:
wait_on_page_writeback(pages[i]);
page_start = page_offset(pages[0]);
- page_end = page_offset(pages[i_done - 1]) + PAGE_CACHE_SIZE;
+ page_end = page_offset(pages[i_done - 1]) + PAGE_SIZE;
lock_extent_bits(&BTRFS_I(inode)->io_tree,
page_start, page_end - 1, &cached_state);
@@ -1224,8 +1222,8 @@ again:
BTRFS_I(inode)->outstanding_extents++;
spin_unlock(&BTRFS_I(inode)->lock);
btrfs_delalloc_release_space(inode,
- start_index << PAGE_CACHE_SHIFT,
- (page_cnt - i_done) << PAGE_CACHE_SHIFT);
+ start_index << PAGE_SHIFT,
+ (page_cnt - i_done) << PAGE_SHIFT);
}
@@ -1242,17 +1240,17 @@ again:
set_page_extent_mapped(pages[i]);
set_page_dirty(pages[i]);
unlock_page(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
return i_done;
out:
for (i = 0; i < i_done; i++) {
unlock_page(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
btrfs_delalloc_release_space(inode,
- start_index << PAGE_CACHE_SHIFT,
- page_cnt << PAGE_CACHE_SHIFT);
+ start_index << PAGE_SHIFT,
+ page_cnt << PAGE_SHIFT);
return ret;
}
@@ -1275,7 +1273,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
int defrag_count = 0;
int compress_type = BTRFS_COMPRESS_ZLIB;
u32 extent_thresh = range->extent_thresh;
- unsigned long max_cluster = SZ_256K >> PAGE_CACHE_SHIFT;
+ unsigned long max_cluster = SZ_256K >> PAGE_SHIFT;
unsigned long cluster = max_cluster;
u64 new_align = ~((u64)SZ_128K - 1);
struct page **pages = NULL;
@@ -1319,9 +1317,9 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
/* find the last page to defrag */
if (range->start + range->len > range->start) {
last_index = min_t(u64, isize - 1,
- range->start + range->len - 1) >> PAGE_CACHE_SHIFT;
+ range->start + range->len - 1) >> PAGE_SHIFT;
} else {
- last_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+ last_index = (isize - 1) >> PAGE_SHIFT;
}
if (newer_than) {
@@ -1333,11 +1331,11 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
* we always align our defrag to help keep
* the extents in the file evenly spaced
*/
- i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
+ i = (newer_off & new_align) >> PAGE_SHIFT;
} else
goto out_ra;
} else {
- i = range->start >> PAGE_CACHE_SHIFT;
+ i = range->start >> PAGE_SHIFT;
}
if (!max_to_defrag)
max_to_defrag = last_index - i + 1;
@@ -1350,7 +1348,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
inode->i_mapping->writeback_index = i;
while (i <= last_index && defrag_count < max_to_defrag &&
- (i < DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE))) {
+ (i < DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE))) {
/*
* make sure we stop running if someone unmounts
* the FS
@@ -1364,7 +1362,7 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
break;
}
- if (!should_defrag_range(inode, (u64)i << PAGE_CACHE_SHIFT,
+ if (!should_defrag_range(inode, (u64)i << PAGE_SHIFT,
extent_thresh, &last_len, &skip,
&defrag_end, range->flags &
BTRFS_DEFRAG_RANGE_COMPRESS)) {
@@ -1373,14 +1371,14 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
* the should_defrag function tells us how much to skip
* bump our counter by the suggested amount
*/
- next = DIV_ROUND_UP(skip, PAGE_CACHE_SIZE);
+ next = DIV_ROUND_UP(skip, PAGE_SIZE);
i = max(i + 1, next);
continue;
}
if (!newer_than) {
- cluster = (PAGE_CACHE_ALIGN(defrag_end) >>
- PAGE_CACHE_SHIFT) - i;
+ cluster = (PAGE_ALIGN(defrag_end) >>
+ PAGE_SHIFT) - i;
cluster = min(cluster, max_cluster);
} else {
cluster = max_cluster;
@@ -1414,20 +1412,20 @@ int btrfs_defrag_file(struct inode *inode, struct file *file,
i += ret;
newer_off = max(newer_off + 1,
- (u64)i << PAGE_CACHE_SHIFT);
+ (u64)i << PAGE_SHIFT);
ret = find_new_extents(root, inode, newer_than,
&newer_off, SZ_64K);
if (!ret) {
range->start = newer_off;
- i = (newer_off & new_align) >> PAGE_CACHE_SHIFT;
+ i = (newer_off & new_align) >> PAGE_SHIFT;
} else {
break;
}
} else {
if (ret > 0) {
i += ret;
- last_len += ret << PAGE_CACHE_SHIFT;
+ last_len += ret << PAGE_SHIFT;
} else {
i++;
last_len = 0;
@@ -1656,7 +1654,7 @@ static noinline int btrfs_ioctl_snap_create_transid(struct file *file,
src_inode = file_inode(src.file);
if (src_inode->i_sb != file_inode(file)->i_sb) {
- btrfs_info(BTRFS_I(src_inode)->root->fs_info,
+ btrfs_info(BTRFS_I(file_inode(file))->root->fs_info,
"Snapshot src from another FS");
ret = -EXDEV;
} else if (!inode_owner_or_capable(src_inode)) {
@@ -1724,7 +1722,7 @@ static noinline int btrfs_ioctl_snap_create_v2(struct file *file,
if (vol_args->flags & BTRFS_SUBVOL_RDONLY)
readonly = true;
if (vol_args->flags & BTRFS_SUBVOL_QGROUP_INHERIT) {
- if (vol_args->size > PAGE_CACHE_SIZE) {
+ if (vol_args->size > PAGE_SIZE) {
ret = -EINVAL;
goto free_args;
}
@@ -2097,8 +2095,6 @@ static noinline int search_ioctl(struct inode *inode,
key.offset = (u64)-1;
root = btrfs_read_fs_root_no_name(info, &key);
if (IS_ERR(root)) {
- btrfs_err(info, "could not find root %llu",
- sk->tree_id);
btrfs_free_path(path);
return -ENOENT;
}
@@ -2476,6 +2472,8 @@ static noinline int btrfs_ioctl_snap_destroy(struct file *file,
trans->block_rsv = &block_rsv;
trans->bytes_reserved = block_rsv.size;
+ btrfs_record_snapshot_destroy(trans, dir);
+
ret = btrfs_unlink_subvol(trans, root, dir,
dest->root_key.objectid,
dentry->d_name.name,
@@ -2808,12 +2806,12 @@ static struct page *extent_same_get_page(struct inode *inode, pgoff_t index)
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return ERR_PTR(-EIO);
}
if (page->mapping != inode->i_mapping) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return ERR_PTR(-EAGAIN);
}
}
@@ -2825,7 +2823,7 @@ static int gather_extent_pages(struct inode *inode, struct page **pages,
int num_pages, u64 off)
{
int i;
- pgoff_t index = off >> PAGE_CACHE_SHIFT;
+ pgoff_t index = off >> PAGE_SHIFT;
for (i = 0; i < num_pages; i++) {
again:
@@ -2934,12 +2932,12 @@ static void btrfs_cmp_data_free(struct cmp_pages *cmp)
pg = cmp->src_pages[i];
if (pg) {
unlock_page(pg);
- page_cache_release(pg);
+ put_page(pg);
}
pg = cmp->dst_pages[i];
if (pg) {
unlock_page(pg);
- page_cache_release(pg);
+ put_page(pg);
}
}
kfree(cmp->src_pages);
@@ -2951,7 +2949,7 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
u64 len, struct cmp_pages *cmp)
{
int ret;
- int num_pages = PAGE_CACHE_ALIGN(len) >> PAGE_CACHE_SHIFT;
+ int num_pages = PAGE_ALIGN(len) >> PAGE_SHIFT;
struct page **src_pgarr, **dst_pgarr;
/*
@@ -2960,8 +2958,8 @@ static int btrfs_cmp_data_prepare(struct inode *src, u64 loff,
* of the array is bounded by len, which is in turn bounded by
* BTRFS_MAX_DEDUPE_LEN.
*/
- src_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
- dst_pgarr = kzalloc(num_pages * sizeof(struct page *), GFP_NOFS);
+ src_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
+ dst_pgarr = kcalloc(num_pages, sizeof(struct page *), GFP_KERNEL);
if (!src_pgarr || !dst_pgarr) {
kfree(src_pgarr);
kfree(dst_pgarr);
@@ -2989,12 +2987,12 @@ static int btrfs_cmp_data(struct inode *src, u64 loff, struct inode *dst,
int ret = 0;
int i;
struct page *src_page, *dst_page;
- unsigned int cmp_len = PAGE_CACHE_SIZE;
+ unsigned int cmp_len = PAGE_SIZE;
void *addr, *dst_addr;
i = 0;
while (len) {
- if (len < PAGE_CACHE_SIZE)
+ if (len < PAGE_SIZE)
cmp_len = len;
BUG_ON(i >= cmp->num_pages);
@@ -3068,6 +3066,9 @@ static int btrfs_extent_same(struct inode *src, u64 loff, u64 olen,
ret = extent_same_check_offsets(src, loff, &len, olen);
if (ret)
goto out_unlock;
+ ret = extent_same_check_offsets(src, dst_loff, &len, olen);
+ if (ret)
+ goto out_unlock;
/*
* Single inode case wants the same checks, except we
@@ -3190,7 +3191,7 @@ ssize_t btrfs_dedupe_file_range(struct file *src_file, u64 loff, u64 olen,
if (olen > BTRFS_MAX_DEDUPE_LEN)
olen = BTRFS_MAX_DEDUPE_LEN;
- if (WARN_ON_ONCE(bs < PAGE_CACHE_SIZE)) {
+ if (WARN_ON_ONCE(bs < PAGE_SIZE)) {
/*
* Btrfs does not support blocksize < page_size. As a
* result, btrfs_cmp_data() won't correctly handle
@@ -3217,7 +3218,7 @@ static int clone_finish_inode_update(struct btrfs_trans_handle *trans,
inode_inc_iversion(inode);
if (!no_time_update)
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_mtime = inode->i_ctime = current_fs_time(inode->i_sb);
/*
* We round up to the block size at eof when determining which
* extents to clone above, but shouldn't round up the file size.
@@ -3889,8 +3890,9 @@ static noinline int btrfs_clone_files(struct file *file, struct file *file_src,
* Truncate page cache pages so that future reads will see the cloned
* data immediately and not the previous data.
*/
- truncate_inode_pages_range(&inode->i_data, destoff,
- PAGE_CACHE_ALIGN(destoff + len) - 1);
+ truncate_inode_pages_range(&inode->i_data,
+ round_down(destoff, PAGE_SIZE),
+ round_up(destoff + len, PAGE_SIZE) - 1);
out_unlock:
if (!same_inode)
btrfs_double_inode_unlock(src, inode);
@@ -4122,7 +4124,7 @@ static long btrfs_ioctl_space_info(struct btrfs_root *root, void __user *arg)
/* we generally have at most 6 or so space infos, one for each raid
* level. So, a whole page should be more than enough for everyone
*/
- if (alloc_size > PAGE_CACHE_SIZE)
+ if (alloc_size > PAGE_SIZE)
return -ENOMEM;
space_args.total_spaces = 0;
@@ -5031,7 +5033,7 @@ static long _btrfs_ioctl_set_received_subvol(struct file *file,
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_root_item *root_item = &root->root_item;
struct btrfs_trans_handle *trans;
- struct timespec ct = CURRENT_TIME;
+ struct timespec ct = current_fs_time(inode->i_sb);
int ret = 0;
int received_uuid_changed;
@@ -5262,8 +5264,7 @@ out_unlock:
.compat_ro_flags = BTRFS_FEATURE_COMPAT_RO_##suffix, \
.incompat_flags = BTRFS_FEATURE_INCOMPAT_##suffix }
-static int btrfs_ioctl_get_supported_features(struct file *file,
- void __user *arg)
+int btrfs_ioctl_get_supported_features(void __user *arg)
{
static const struct btrfs_ioctl_feature_flags features[3] = {
INIT_FEATURE_FLAGS(SUPP),
@@ -5542,7 +5543,7 @@ long btrfs_ioctl(struct file *file, unsigned int
case BTRFS_IOC_SET_FSLABEL:
return btrfs_ioctl_set_fslabel(file, argp);
case BTRFS_IOC_GET_SUPPORTED_FEATURES:
- return btrfs_ioctl_get_supported_features(file, argp);
+ return btrfs_ioctl_get_supported_features(argp);
case BTRFS_IOC_GET_FEATURES:
return btrfs_ioctl_get_features(file, argp);
case BTRFS_IOC_SET_FEATURES:
@@ -5551,3 +5552,24 @@ long btrfs_ioctl(struct file *file, unsigned int
return -ENOTTY;
}
+
+#ifdef CONFIG_COMPAT
+long btrfs_compat_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ switch (cmd) {
+ case FS_IOC32_GETFLAGS:
+ cmd = FS_IOC_GETFLAGS;
+ break;
+ case FS_IOC32_SETFLAGS:
+ cmd = FS_IOC_SETFLAGS;
+ break;
+ case FS_IOC32_GETVERSION:
+ cmd = FS_IOC_GETVERSION;
+ break;
+ default:
+ return -ENOIOCTLCMD;
+ }
+
+ return btrfs_ioctl(file, cmd, (unsigned long) compat_ptr(arg));
+}
+#endif
diff --git a/fs/btrfs/lzo.c b/fs/btrfs/lzo.c
index a2f051347..1adfbe7be 100644
--- a/fs/btrfs/lzo.c
+++ b/fs/btrfs/lzo.c
@@ -55,8 +55,8 @@ static struct list_head *lzo_alloc_workspace(void)
return ERR_PTR(-ENOMEM);
workspace->mem = vmalloc(LZO1X_MEM_COMPRESS);
- workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
- workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_CACHE_SIZE));
+ workspace->buf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
+ workspace->cbuf = vmalloc(lzo1x_worst_compress(PAGE_SIZE));
if (!workspace->mem || !workspace->buf || !workspace->cbuf)
goto fail;
@@ -116,7 +116,7 @@ static int lzo_compress_pages(struct list_head *ws,
*total_out = 0;
*total_in = 0;
- in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ in_page = find_get_page(mapping, start >> PAGE_SHIFT);
data_in = kmap(in_page);
/*
@@ -133,10 +133,10 @@ static int lzo_compress_pages(struct list_head *ws,
tot_out = LZO_LEN;
pages[0] = out_page;
nr_pages = 1;
- pg_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+ pg_bytes_left = PAGE_SIZE - LZO_LEN;
/* compress at most one page of data each time */
- in_len = min(len, PAGE_CACHE_SIZE);
+ in_len = min(len, PAGE_SIZE);
while (tot_in < len) {
ret = lzo1x_1_compress(data_in, in_len, workspace->cbuf,
&out_len, workspace->mem);
@@ -201,7 +201,7 @@ static int lzo_compress_pages(struct list_head *ws,
cpage_out = kmap(out_page);
pages[nr_pages++] = out_page;
- pg_bytes_left = PAGE_CACHE_SIZE;
+ pg_bytes_left = PAGE_SIZE;
out_offset = 0;
}
}
@@ -221,12 +221,12 @@ static int lzo_compress_pages(struct list_head *ws,
bytes_left = len - tot_in;
kunmap(in_page);
- page_cache_release(in_page);
+ put_page(in_page);
- start += PAGE_CACHE_SIZE;
- in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ start += PAGE_SIZE;
+ in_page = find_get_page(mapping, start >> PAGE_SHIFT);
data_in = kmap(in_page);
- in_len = min(bytes_left, PAGE_CACHE_SIZE);
+ in_len = min(bytes_left, PAGE_SIZE);
}
if (tot_out > tot_in)
@@ -248,7 +248,7 @@ out:
if (in_page) {
kunmap(in_page);
- page_cache_release(in_page);
+ put_page(in_page);
}
return ret;
@@ -266,7 +266,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
char *data_in;
unsigned long page_in_index = 0;
unsigned long page_out_index = 0;
- unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE);
+ unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long buf_start;
unsigned long buf_offset = 0;
unsigned long bytes;
@@ -289,7 +289,7 @@ static int lzo_decompress_biovec(struct list_head *ws,
tot_in = LZO_LEN;
in_offset = LZO_LEN;
tot_len = min_t(size_t, srclen, tot_len);
- in_page_bytes_left = PAGE_CACHE_SIZE - LZO_LEN;
+ in_page_bytes_left = PAGE_SIZE - LZO_LEN;
tot_out = 0;
pg_offset = 0;
@@ -345,12 +345,12 @@ cont:
data_in = kmap(pages_in[++page_in_index]);
- in_page_bytes_left = PAGE_CACHE_SIZE;
+ in_page_bytes_left = PAGE_SIZE;
in_offset = 0;
}
}
- out_len = lzo1x_worst_compress(PAGE_CACHE_SIZE);
+ out_len = lzo1x_worst_compress(PAGE_SIZE);
ret = lzo1x_decompress_safe(buf, in_len, workspace->buf,
&out_len);
if (need_unmap)
@@ -399,7 +399,7 @@ static int lzo_decompress(struct list_head *ws, unsigned char *data_in,
in_len = read_compress_length(data_in);
data_in += LZO_LEN;
- out_len = PAGE_CACHE_SIZE;
+ out_len = PAGE_SIZE;
ret = lzo1x_decompress_safe(data_in, in_len, workspace->buf, &out_len);
if (ret != LZO_E_OK) {
printk(KERN_WARNING "BTRFS: decompress failed!\n");
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 8c27292ea..0de7da5a6 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -25,6 +25,7 @@
#include "btrfs_inode.h"
#include "extent_io.h"
#include "disk-io.h"
+#include "compression.h"
static struct kmem_cache *btrfs_ordered_extent_cache;
@@ -1009,7 +1010,7 @@ int btrfs_ordered_update_i_size(struct inode *inode, u64 offset,
for (; node; node = rb_prev(node)) {
test = rb_entry(node, struct btrfs_ordered_extent, rb_node);
- /* We treat this entry as if it doesnt exist */
+ /* We treat this entry as if it doesn't exist */
if (test_bit(BTRFS_ORDERED_UPDATED_ISIZE, &test->flags))
continue;
if (test->file_offset + test->len <= disk_i_size)
@@ -1114,6 +1115,5 @@ int __init ordered_data_init(void)
void ordered_data_exit(void)
{
- if (btrfs_ordered_extent_cache)
- kmem_cache_destroy(btrfs_ordered_extent_cache);
+ kmem_cache_destroy(btrfs_ordered_extent_cache);
}
diff --git a/fs/btrfs/print-tree.c b/fs/btrfs/print-tree.c
index 647ab12fd..147dc6ca5 100644
--- a/fs/btrfs/print-tree.c
+++ b/fs/btrfs/print-tree.c
@@ -295,8 +295,27 @@ void btrfs_print_leaf(struct btrfs_root *root, struct extent_buffer *l)
btrfs_dev_extent_chunk_offset(l, dev_extent),
btrfs_dev_extent_length(l, dev_extent));
break;
- case BTRFS_DEV_STATS_KEY:
- printk(KERN_INFO "\t\tdevice stats\n");
+ case BTRFS_PERSISTENT_ITEM_KEY:
+ printk(KERN_INFO "\t\tpersistent item objectid %llu offset %llu\n",
+ key.objectid, key.offset);
+ switch (key.objectid) {
+ case BTRFS_DEV_STATS_OBJECTID:
+ printk(KERN_INFO "\t\tdevice stats\n");
+ break;
+ default:
+ printk(KERN_INFO "\t\tunknown persistent item\n");
+ }
+ break;
+ case BTRFS_TEMPORARY_ITEM_KEY:
+ printk(KERN_INFO "\t\ttemporary item objectid %llu offset %llu\n",
+ key.objectid, key.offset);
+ switch (key.objectid) {
+ case BTRFS_BALANCE_OBJECTID:
+ printk(KERN_INFO "\t\tbalance status\n");
+ break;
+ default:
+ printk(KERN_INFO "\t\tunknown temporary item\n");
+ }
break;
case BTRFS_DEV_REPLACE_KEY:
printk(KERN_INFO "\t\tdev replace\n");
diff --git a/fs/btrfs/props.c b/fs/btrfs/props.c
index f9e60231f..36992128c 100644
--- a/fs/btrfs/props.c
+++ b/fs/btrfs/props.c
@@ -22,6 +22,7 @@
#include "hash.h"
#include "transaction.h"
#include "xattr.h"
+#include "compression.h"
#define BTRFS_PROP_HANDLERS_HT_BITS 8
static DEFINE_HASHTABLE(prop_handlers_ht, BTRFS_PROP_HANDLERS_HT_BITS);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 5279fdae7..9e119552e 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1463,6 +1463,7 @@ struct btrfs_qgroup_extent_record
u64 bytenr = record->bytenr;
assert_spin_locked(&delayed_refs->lock);
+ trace_btrfs_qgroup_insert_dirty_extent(record);
while (*p) {
parent_node = *p;
@@ -1594,6 +1595,9 @@ static int qgroup_update_counters(struct btrfs_fs_info *fs_info,
cur_old_count = btrfs_qgroup_get_old_refcnt(qg, seq);
cur_new_count = btrfs_qgroup_get_new_refcnt(qg, seq);
+ trace_qgroup_update_counters(qg->qgroupid, cur_old_count,
+ cur_new_count);
+
/* Rfer update part */
if (cur_old_count == 0 && cur_new_count > 0) {
qg->rfer += num_bytes;
@@ -1683,6 +1687,9 @@ btrfs_qgroup_account_extent(struct btrfs_trans_handle *trans,
goto out_free;
BUG_ON(!fs_info->quota_root);
+ trace_btrfs_qgroup_account_extent(bytenr, num_bytes, nr_old_roots,
+ nr_new_roots);
+
qgroups = ulist_alloc(GFP_NOFS);
if (!qgroups) {
ret = -ENOMEM;
@@ -1752,6 +1759,8 @@ int btrfs_qgroup_account_extents(struct btrfs_trans_handle *trans,
record = rb_entry(node, struct btrfs_qgroup_extent_record,
node);
+ trace_btrfs_qgroup_account_extents(record);
+
if (!ret) {
/*
* Use (u64)-1 as time_seq to do special search, which
@@ -1842,8 +1851,10 @@ out:
}
/*
- * copy the acounting information between qgroups. This is necessary when a
- * snapshot or a subvolume is created
+ * Copy the acounting information between qgroups. This is necessary
+ * when a snapshot or a subvolume is created. Throwing an error will
+ * cause a transaction abort so we take extra care here to only error
+ * when a readonly fs is a reasonable outcome.
*/
int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info, u64 srcid, u64 objectid,
@@ -1873,15 +1884,15 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
2 * inherit->num_excl_copies;
for (i = 0; i < nums; ++i) {
srcgroup = find_qgroup_rb(fs_info, *i_qgroups);
- if (!srcgroup) {
- ret = -EINVAL;
- goto out;
- }
- if ((srcgroup->qgroupid >> 48) <= (objectid >> 48)) {
- ret = -EINVAL;
- goto out;
- }
+ /*
+ * Zero out invalid groups so we can ignore
+ * them later.
+ */
+ if (!srcgroup ||
+ ((srcgroup->qgroupid >> 48) <= (objectid >> 48)))
+ *i_qgroups = 0ULL;
+
++i_qgroups;
}
}
@@ -1916,17 +1927,19 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
*/
if (inherit) {
i_qgroups = (u64 *)(inherit + 1);
- for (i = 0; i < inherit->num_qgroups; ++i) {
+ for (i = 0; i < inherit->num_qgroups; ++i, ++i_qgroups) {
+ if (*i_qgroups == 0)
+ continue;
ret = add_qgroup_relation_item(trans, quota_root,
objectid, *i_qgroups);
- if (ret)
+ if (ret && ret != -EEXIST)
goto out;
ret = add_qgroup_relation_item(trans, quota_root,
*i_qgroups, objectid);
- if (ret)
+ if (ret && ret != -EEXIST)
goto out;
- ++i_qgroups;
}
+ ret = 0;
}
@@ -1987,17 +2000,22 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
i_qgroups = (u64 *)(inherit + 1);
for (i = 0; i < inherit->num_qgroups; ++i) {
- ret = add_relation_rb(quota_root->fs_info, objectid,
- *i_qgroups);
- if (ret)
- goto unlock;
+ if (*i_qgroups) {
+ ret = add_relation_rb(quota_root->fs_info, objectid,
+ *i_qgroups);
+ if (ret)
+ goto unlock;
+ }
++i_qgroups;
}
- for (i = 0; i < inherit->num_ref_copies; ++i) {
+ for (i = 0; i < inherit->num_ref_copies; ++i, i_qgroups += 2) {
struct btrfs_qgroup *src;
struct btrfs_qgroup *dst;
+ if (!i_qgroups[0] || !i_qgroups[1])
+ continue;
+
src = find_qgroup_rb(fs_info, i_qgroups[0]);
dst = find_qgroup_rb(fs_info, i_qgroups[1]);
@@ -2008,12 +2026,14 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
dst->rfer = src->rfer - level_size;
dst->rfer_cmpr = src->rfer_cmpr - level_size;
- i_qgroups += 2;
}
- for (i = 0; i < inherit->num_excl_copies; ++i) {
+ for (i = 0; i < inherit->num_excl_copies; ++i, i_qgroups += 2) {
struct btrfs_qgroup *src;
struct btrfs_qgroup *dst;
+ if (!i_qgroups[0] || !i_qgroups[1])
+ continue;
+
src = find_qgroup_rb(fs_info, i_qgroups[0]);
dst = find_qgroup_rb(fs_info, i_qgroups[1]);
@@ -2024,7 +2044,6 @@ int btrfs_qgroup_inherit(struct btrfs_trans_handle *trans,
dst->excl = src->excl + level_size;
dst->excl_cmpr = src->excl_cmpr + level_size;
- i_qgroups += 2;
}
unlock:
diff --git a/fs/btrfs/raid56.c b/fs/btrfs/raid56.c
index 55161369f..0b7792e02 100644
--- a/fs/btrfs/raid56.c
+++ b/fs/btrfs/raid56.c
@@ -270,7 +270,7 @@ static void cache_rbio_pages(struct btrfs_raid_bio *rbio)
s = kmap(rbio->bio_pages[i]);
d = kmap(rbio->stripe_pages[i]);
- memcpy(d, s, PAGE_CACHE_SIZE);
+ memcpy(d, s, PAGE_SIZE);
kunmap(rbio->bio_pages[i]);
kunmap(rbio->stripe_pages[i]);
@@ -962,7 +962,7 @@ static struct page *page_in_rbio(struct btrfs_raid_bio *rbio,
*/
static unsigned long rbio_nr_pages(unsigned long stripe_len, int nr_stripes)
{
- return DIV_ROUND_UP(stripe_len, PAGE_CACHE_SIZE) * nr_stripes;
+ return DIV_ROUND_UP(stripe_len, PAGE_SIZE) * nr_stripes;
}
/*
@@ -1078,7 +1078,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
u64 disk_start;
stripe = &rbio->bbio->stripes[stripe_nr];
- disk_start = stripe->physical + (page_index << PAGE_CACHE_SHIFT);
+ disk_start = stripe->physical + (page_index << PAGE_SHIFT);
/* if the device is missing, just fail this stripe */
if (!stripe->dev->bdev)
@@ -1096,8 +1096,8 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
if (last_end == disk_start && stripe->dev->bdev &&
!last->bi_error &&
last->bi_bdev == stripe->dev->bdev) {
- ret = bio_add_page(last, page, PAGE_CACHE_SIZE, 0);
- if (ret == PAGE_CACHE_SIZE)
+ ret = bio_add_page(last, page, PAGE_SIZE, 0);
+ if (ret == PAGE_SIZE)
return 0;
}
}
@@ -1111,7 +1111,7 @@ static int rbio_add_io_page(struct btrfs_raid_bio *rbio,
bio->bi_bdev = stripe->dev->bdev;
bio->bi_iter.bi_sector = disk_start >> 9;
- bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
+ bio_add_page(bio, page, PAGE_SIZE, 0);
bio_list_add(bio_list, bio);
return 0;
}
@@ -1154,7 +1154,7 @@ static void index_rbio_pages(struct btrfs_raid_bio *rbio)
bio_list_for_each(bio, &rbio->bio_list) {
start = (u64)bio->bi_iter.bi_sector << 9;
stripe_offset = start - rbio->bbio->raid_map[0];
- page_index = stripe_offset >> PAGE_CACHE_SHIFT;
+ page_index = stripe_offset >> PAGE_SHIFT;
for (i = 0; i < bio->bi_vcnt; i++) {
p = bio->bi_io_vec[i].bv_page;
@@ -1253,7 +1253,7 @@ static noinline void finish_rmw(struct btrfs_raid_bio *rbio)
} else {
/* raid5 */
memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
- run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
+ run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
}
@@ -1914,7 +1914,7 @@ pstripe:
/* Copy parity block into failed block to start with */
memcpy(pointers[faila],
pointers[rbio->nr_data],
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
/* rearrange the pointer array */
p = pointers[faila];
@@ -1923,7 +1923,7 @@ pstripe:
pointers[rbio->nr_data - 1] = p;
/* xor in the rest */
- run_xor(pointers, rbio->nr_data - 1, PAGE_CACHE_SIZE);
+ run_xor(pointers, rbio->nr_data - 1, PAGE_SIZE);
}
/* if we're doing this rebuild as part of an rmw, go through
* and set all of our private rbio pages in the
@@ -2250,7 +2250,7 @@ void raid56_add_scrub_pages(struct btrfs_raid_bio *rbio, struct page *page,
ASSERT(logical + PAGE_SIZE <= rbio->bbio->raid_map[0] +
rbio->stripe_len * rbio->nr_data);
stripe_offset = (int)(logical - rbio->bbio->raid_map[0]);
- index = stripe_offset >> PAGE_CACHE_SHIFT;
+ index = stripe_offset >> PAGE_SHIFT;
rbio->bio_pages[index] = page;
}
@@ -2365,14 +2365,14 @@ static noinline void finish_parity_scrub(struct btrfs_raid_bio *rbio,
} else {
/* raid5 */
memcpy(pointers[nr_data], pointers[0], PAGE_SIZE);
- run_xor(pointers + 1, nr_data - 1, PAGE_CACHE_SIZE);
+ run_xor(pointers + 1, nr_data - 1, PAGE_SIZE);
}
/* Check scrubbing pairty and repair it */
p = rbio_stripe_page(rbio, rbio->scrubp, pagenr);
parity = kmap(p);
- if (memcmp(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE))
- memcpy(parity, pointers[rbio->scrubp], PAGE_CACHE_SIZE);
+ if (memcmp(parity, pointers[rbio->scrubp], PAGE_SIZE))
+ memcpy(parity, pointers[rbio->scrubp], PAGE_SIZE);
else
/* Parity is right, needn't writeback */
bitmap_clear(rbio->dbitmap, pagenr, 1);
diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
index 619f92963..298631eae 100644
--- a/fs/btrfs/reada.c
+++ b/fs/btrfs/reada.c
@@ -72,7 +72,7 @@ struct reada_extent {
spinlock_t lock;
struct reada_zone *zones[BTRFS_MAX_MIRRORS];
int nzones;
- struct btrfs_device *scheduled_for;
+ int scheduled;
};
struct reada_zone {
@@ -101,67 +101,53 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info);
static void __reada_start_machine(struct btrfs_fs_info *fs_info);
static int reada_add_block(struct reada_control *rc, u64 logical,
- struct btrfs_key *top, int level, u64 generation);
+ struct btrfs_key *top, u64 generation);
/* recurses */
/* in case of err, eb might be NULL */
-static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err)
+static void __readahead_hook(struct btrfs_fs_info *fs_info,
+ struct reada_extent *re, struct extent_buffer *eb,
+ u64 start, int err)
{
int level = 0;
int nritems;
int i;
u64 bytenr;
u64 generation;
- struct reada_extent *re;
- struct btrfs_fs_info *fs_info = root->fs_info;
struct list_head list;
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- struct btrfs_device *for_dev;
if (eb)
level = btrfs_header_level(eb);
- /* find extent */
- spin_lock(&fs_info->reada_lock);
- re = radix_tree_lookup(&fs_info->reada_tree, index);
- if (re)
- re->refcnt++;
- spin_unlock(&fs_info->reada_lock);
-
- if (!re)
- return -1;
-
spin_lock(&re->lock);
/*
* just take the full list from the extent. afterwards we
* don't need the lock anymore
*/
list_replace_init(&re->extctl, &list);
- for_dev = re->scheduled_for;
- re->scheduled_for = NULL;
+ re->scheduled = 0;
spin_unlock(&re->lock);
- if (err == 0) {
- nritems = level ? btrfs_header_nritems(eb) : 0;
- generation = btrfs_header_generation(eb);
- /*
- * FIXME: currently we just set nritems to 0 if this is a leaf,
- * effectively ignoring the content. In a next step we could
- * trigger more readahead depending from the content, e.g.
- * fetch the checksums for the extents in the leaf.
- */
- } else {
- /*
- * this is the error case, the extent buffer has not been
- * read correctly. We won't access anything from it and
- * just cleanup our data structures. Effectively this will
- * cut the branch below this node from read ahead.
- */
- nritems = 0;
- generation = 0;
- }
+ /*
+ * this is the error case, the extent buffer has not been
+ * read correctly. We won't access anything from it and
+ * just cleanup our data structures. Effectively this will
+ * cut the branch below this node from read ahead.
+ */
+ if (err)
+ goto cleanup;
+ /*
+ * FIXME: currently we just set nritems to 0 if this is a leaf,
+ * effectively ignoring the content. In a next step we could
+ * trigger more readahead depending from the content, e.g.
+ * fetch the checksums for the extents in the leaf.
+ */
+ if (!level)
+ goto cleanup;
+
+ nritems = btrfs_header_nritems(eb);
+ generation = btrfs_header_generation(eb);
for (i = 0; i < nritems; i++) {
struct reada_extctl *rec;
u64 n_gen;
@@ -188,19 +174,20 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
*/
#ifdef DEBUG
if (rec->generation != generation) {
- btrfs_debug(root->fs_info,
- "generation mismatch for (%llu,%d,%llu) %llu != %llu",
- key.objectid, key.type, key.offset,
- rec->generation, generation);
+ btrfs_debug(fs_info,
+ "generation mismatch for (%llu,%d,%llu) %llu != %llu",
+ key.objectid, key.type, key.offset,
+ rec->generation, generation);
}
#endif
if (rec->generation == generation &&
btrfs_comp_cpu_keys(&key, &rc->key_end) < 0 &&
btrfs_comp_cpu_keys(&next_key, &rc->key_start) > 0)
- reada_add_block(rc, bytenr, &next_key,
- level - 1, n_gen);
+ reada_add_block(rc, bytenr, &next_key, n_gen);
}
}
+
+cleanup:
/*
* free extctl records
*/
@@ -222,26 +209,37 @@ static int __readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
reada_extent_put(fs_info, re); /* one ref for each entry */
}
- reada_extent_put(fs_info, re); /* our ref */
- if (for_dev)
- atomic_dec(&for_dev->reada_in_flight);
- return 0;
+ return;
}
/*
* start is passed separately in case eb in NULL, which may be the case with
* failed I/O
*/
-int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
- u64 start, int err)
+int btree_readahead_hook(struct btrfs_fs_info *fs_info,
+ struct extent_buffer *eb, u64 start, int err)
{
- int ret;
+ int ret = 0;
+ struct reada_extent *re;
- ret = __readahead_hook(root, eb, start, err);
+ /* find extent */
+ spin_lock(&fs_info->reada_lock);
+ re = radix_tree_lookup(&fs_info->reada_tree,
+ start >> PAGE_SHIFT);
+ if (re)
+ re->refcnt++;
+ spin_unlock(&fs_info->reada_lock);
+ if (!re) {
+ ret = -1;
+ goto start_machine;
+ }
- reada_start_machine(root->fs_info);
+ __readahead_hook(fs_info, re, eb, start, err);
+ reada_extent_put(fs_info, re); /* our ref */
+start_machine:
+ reada_start_machine(fs_info);
return ret;
}
@@ -259,19 +257,15 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
zone = NULL;
spin_lock(&fs_info->reada_lock);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
- logical >> PAGE_CACHE_SHIFT, 1);
- if (ret == 1)
+ logical >> PAGE_SHIFT, 1);
+ if (ret == 1 && logical >= zone->start && logical <= zone->end) {
kref_get(&zone->refcnt);
- spin_unlock(&fs_info->reada_lock);
-
- if (ret == 1) {
- if (logical >= zone->start && logical < zone->end)
- return zone;
- spin_lock(&fs_info->reada_lock);
- kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
+ return zone;
}
+ spin_unlock(&fs_info->reada_lock);
+
cache = btrfs_lookup_block_group(fs_info, logical);
if (!cache)
return NULL;
@@ -280,7 +274,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
end = start + cache->key.offset - 1;
btrfs_put_block_group(cache);
- zone = kzalloc(sizeof(*zone), GFP_NOFS);
+ zone = kzalloc(sizeof(*zone), GFP_KERNEL);
if (!zone)
return NULL;
@@ -300,15 +294,17 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&dev->reada_zones,
- (unsigned long)(zone->end >> PAGE_CACHE_SHIFT),
+ (unsigned long)(zone->end >> PAGE_SHIFT),
zone);
if (ret == -EEXIST) {
kfree(zone);
ret = radix_tree_gang_lookup(&dev->reada_zones, (void **)&zone,
- logical >> PAGE_CACHE_SHIFT, 1);
- if (ret == 1)
+ logical >> PAGE_SHIFT, 1);
+ if (ret == 1 && logical >= zone->start && logical <= zone->end)
kref_get(&zone->refcnt);
+ else
+ zone = NULL;
}
spin_unlock(&fs_info->reada_lock);
@@ -317,7 +313,7 @@ static struct reada_zone *reada_find_zone(struct btrfs_fs_info *fs_info,
static struct reada_extent *reada_find_extent(struct btrfs_root *root,
u64 logical,
- struct btrfs_key *top, int level)
+ struct btrfs_key *top)
{
int ret;
struct reada_extent *re = NULL;
@@ -330,9 +326,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
u64 length;
int real_stripes;
int nzones = 0;
- int i;
- unsigned long index = logical >> PAGE_CACHE_SHIFT;
+ unsigned long index = logical >> PAGE_SHIFT;
int dev_replace_is_ongoing;
+ int have_zone = 0;
spin_lock(&fs_info->reada_lock);
re = radix_tree_lookup(&fs_info->reada_tree, index);
@@ -343,7 +339,7 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
if (re)
return re;
- re = kzalloc(sizeof(*re), GFP_NOFS);
+ re = kzalloc(sizeof(*re), GFP_KERNEL);
if (!re)
return NULL;
@@ -375,11 +371,16 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
struct reada_zone *zone;
dev = bbio->stripes[nzones].dev;
+
+ /* cannot read ahead on missing device. */
+ if (!dev->bdev)
+ continue;
+
zone = reada_find_zone(fs_info, dev, logical, bbio);
if (!zone)
- break;
+ continue;
- re->zones[nzones] = zone;
+ re->zones[re->nzones++] = zone;
spin_lock(&zone->lock);
if (!zone->elems)
kref_get(&zone->refcnt);
@@ -389,14 +390,13 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
- re->nzones = nzones;
- if (nzones == 0) {
+ if (re->nzones == 0) {
/* not a single zone found, error and out */
goto error;
}
/* insert extent in reada_tree + all per-device trees, all or nothing */
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
spin_lock(&fs_info->reada_lock);
ret = radix_tree_insert(&fs_info->reada_tree, index, re);
if (ret == -EEXIST) {
@@ -404,19 +404,20 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(!re_exist);
re_exist->refcnt++;
spin_unlock(&fs_info->reada_lock);
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error;
}
if (ret) {
spin_unlock(&fs_info->reada_lock);
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error;
}
prev_dev = NULL;
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(
&fs_info->dev_replace);
- for (i = 0; i < nzones; ++i) {
- dev = bbio->stripes[i].dev;
+ for (nzones = 0; nzones < re->nzones; ++nzones) {
+ dev = re->zones[nzones]->device;
+
if (dev == prev_dev) {
/*
* in case of DUP, just add the first zone. As both
@@ -427,15 +428,9 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
*/
continue;
}
- if (!dev->bdev) {
- /*
- * cannot read ahead on missing device, but for RAID5/6,
- * REQ_GET_READ_MIRRORS return 1. So don't skip missing
- * device for such case.
- */
- if (nzones > 1)
- continue;
- }
+ if (!dev->bdev)
+ continue;
+
if (dev_replace_is_ongoing &&
dev == fs_info->dev_replace.tgtdev) {
/*
@@ -447,8 +442,8 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
prev_dev = dev;
ret = radix_tree_insert(&dev->reada_extents, index, re);
if (ret) {
- while (--i >= 0) {
- dev = bbio->stripes[i].dev;
+ while (--nzones >= 0) {
+ dev = re->zones[nzones]->device;
BUG_ON(dev == NULL);
/* ignore whether the entry was inserted */
radix_tree_delete(&dev->reada_extents, index);
@@ -456,21 +451,24 @@ static struct reada_extent *reada_find_extent(struct btrfs_root *root,
BUG_ON(fs_info == NULL);
radix_tree_delete(&fs_info->reada_tree, index);
spin_unlock(&fs_info->reada_lock);
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
goto error;
}
+ have_zone = 1;
}
spin_unlock(&fs_info->reada_lock);
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
+
+ if (!have_zone)
+ goto error;
btrfs_put_bbio(bbio);
return re;
error:
- while (nzones) {
+ for (nzones = 0; nzones < re->nzones; ++nzones) {
struct reada_zone *zone;
- --nzones;
zone = re->zones[nzones];
kref_get(&zone->refcnt);
spin_lock(&zone->lock);
@@ -497,7 +495,7 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
struct reada_extent *re)
{
int i;
- unsigned long index = re->logical >> PAGE_CACHE_SHIFT;
+ unsigned long index = re->logical >> PAGE_SHIFT;
spin_lock(&fs_info->reada_lock);
if (--re->refcnt) {
@@ -531,8 +529,6 @@ static void reada_extent_put(struct btrfs_fs_info *fs_info,
kref_put(&zone->refcnt, reada_zone_release);
spin_unlock(&fs_info->reada_lock);
}
- if (re->scheduled_for)
- atomic_dec(&re->scheduled_for->reada_in_flight);
kfree(re);
}
@@ -542,7 +538,7 @@ static void reada_zone_release(struct kref *kref)
struct reada_zone *zone = container_of(kref, struct reada_zone, refcnt);
radix_tree_delete(&zone->device->reada_zones,
- zone->end >> PAGE_CACHE_SHIFT);
+ zone->end >> PAGE_SHIFT);
kfree(zone);
}
@@ -556,17 +552,17 @@ static void reada_control_release(struct kref *kref)
}
static int reada_add_block(struct reada_control *rc, u64 logical,
- struct btrfs_key *top, int level, u64 generation)
+ struct btrfs_key *top, u64 generation)
{
struct btrfs_root *root = rc->root;
struct reada_extent *re;
struct reada_extctl *rec;
- re = reada_find_extent(root, logical, top, level); /* takes one ref */
+ re = reada_find_extent(root, logical, top); /* takes one ref */
if (!re)
return -1;
- rec = kzalloc(sizeof(*rec), GFP_NOFS);
+ rec = kzalloc(sizeof(*rec), GFP_KERNEL);
if (!rec) {
reada_extent_put(root->fs_info, re);
return -ENOMEM;
@@ -591,7 +587,7 @@ static int reada_add_block(struct reada_control *rc, u64 logical,
static void reada_peer_zones_set_lock(struct reada_zone *zone, int lock)
{
int i;
- unsigned long index = zone->end >> PAGE_CACHE_SHIFT;
+ unsigned long index = zone->end >> PAGE_SHIFT;
for (i = 0; i < zone->ndevs; ++i) {
struct reada_zone *peer;
@@ -626,7 +622,7 @@ static int reada_pick_zone(struct btrfs_device *dev)
(void **)&zone, index, 1);
if (ret == 0)
break;
- index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
+ index = (zone->end >> PAGE_SHIFT) + 1;
if (zone->locked) {
if (zone->elems > top_locked_elems) {
top_locked_elems = zone->elems;
@@ -662,7 +658,6 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
u64 logical;
int ret;
int i;
- int need_kick = 0;
spin_lock(&fs_info->reada_lock);
if (dev->reada_curr_zone == NULL) {
@@ -678,8 +673,8 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
* plugging to speed things up
*/
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
- dev->reada_next >> PAGE_CACHE_SHIFT, 1);
- if (ret == 0 || re->logical >= dev->reada_curr_zone->end) {
+ dev->reada_next >> PAGE_SHIFT, 1);
+ if (ret == 0 || re->logical > dev->reada_curr_zone->end) {
ret = reada_pick_zone(dev);
if (!ret) {
spin_unlock(&fs_info->reada_lock);
@@ -687,7 +682,7 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
}
re = NULL;
ret = radix_tree_gang_lookup(&dev->reada_extents, (void **)&re,
- dev->reada_next >> PAGE_CACHE_SHIFT, 1);
+ dev->reada_next >> PAGE_SHIFT, 1);
}
if (ret == 0) {
spin_unlock(&fs_info->reada_lock);
@@ -698,6 +693,15 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
spin_unlock(&fs_info->reada_lock);
+ spin_lock(&re->lock);
+ if (re->scheduled || list_empty(&re->extctl)) {
+ spin_unlock(&re->lock);
+ reada_extent_put(fs_info, re);
+ return 0;
+ }
+ re->scheduled = 1;
+ spin_unlock(&re->lock);
+
/*
* find mirror num
*/
@@ -709,29 +713,20 @@ static int reada_start_machine_dev(struct btrfs_fs_info *fs_info,
}
logical = re->logical;
- spin_lock(&re->lock);
- if (re->scheduled_for == NULL) {
- re->scheduled_for = dev;
- need_kick = 1;
- }
- spin_unlock(&re->lock);
-
- reada_extent_put(fs_info, re);
-
- if (!need_kick)
- return 0;
-
atomic_inc(&dev->reada_in_flight);
ret = reada_tree_block_flagged(fs_info->extent_root, logical,
mirror_num, &eb);
if (ret)
- __readahead_hook(fs_info->extent_root, NULL, logical, ret);
+ __readahead_hook(fs_info, re, NULL, logical, ret);
else if (eb)
- __readahead_hook(fs_info->extent_root, eb, eb->start, ret);
+ __readahead_hook(fs_info, re, eb, eb->start, ret);
if (eb)
free_extent_buffer(eb);
+ atomic_dec(&dev->reada_in_flight);
+ reada_extent_put(fs_info, re);
+
return 1;
}
@@ -752,6 +747,8 @@ static void reada_start_machine_worker(struct btrfs_work *work)
set_task_ioprio(current, BTRFS_IOPRIO_READA);
__reada_start_machine(fs_info);
set_task_ioprio(current, old_ioprio);
+
+ atomic_dec(&fs_info->reada_works_cnt);
}
static void __reada_start_machine(struct btrfs_fs_info *fs_info)
@@ -783,15 +780,19 @@ static void __reada_start_machine(struct btrfs_fs_info *fs_info)
* enqueue to workers to finish it. This will distribute the load to
* the cores.
*/
- for (i = 0; i < 2; ++i)
+ for (i = 0; i < 2; ++i) {
reada_start_machine(fs_info);
+ if (atomic_read(&fs_info->reada_works_cnt) >
+ BTRFS_MAX_MIRRORS * 2)
+ break;
+ }
}
static void reada_start_machine(struct btrfs_fs_info *fs_info)
{
struct reada_machine_work *rmw;
- rmw = kzalloc(sizeof(*rmw), GFP_NOFS);
+ rmw = kzalloc(sizeof(*rmw), GFP_KERNEL);
if (!rmw) {
/* FIXME we cannot handle this properly right now */
BUG();
@@ -801,6 +802,7 @@ static void reada_start_machine(struct btrfs_fs_info *fs_info)
rmw->fs_info = fs_info;
btrfs_queue_work(fs_info->readahead_workers, &rmw->work);
+ atomic_inc(&fs_info->reada_works_cnt);
}
#ifdef DEBUG
@@ -836,7 +838,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
printk(KERN_CONT " curr off %llu",
device->reada_next - zone->start);
printk(KERN_CONT "\n");
- index = (zone->end >> PAGE_CACHE_SHIFT) + 1;
+ index = (zone->end >> PAGE_SHIFT) + 1;
}
cnt = 0;
index = 0;
@@ -848,10 +850,9 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
if (ret == 0)
break;
printk(KERN_DEBUG
- " re: logical %llu size %u empty %d for %lld",
+ " re: logical %llu size %u empty %d scheduled %d",
re->logical, fs_info->tree_root->nodesize,
- list_empty(&re->extctl), re->scheduled_for ?
- re->scheduled_for->devid : -1);
+ list_empty(&re->extctl), re->scheduled);
for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs",
@@ -863,7 +864,7 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
}
}
printk(KERN_CONT "\n");
- index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
+ index = (re->logical >> PAGE_SHIFT) + 1;
if (++cnt > 15)
break;
}
@@ -878,31 +879,25 @@ static void dump_devs(struct btrfs_fs_info *fs_info, int all)
index, 1);
if (ret == 0)
break;
- if (!re->scheduled_for) {
- index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
+ if (!re->scheduled) {
+ index = (re->logical >> PAGE_SHIFT) + 1;
continue;
}
printk(KERN_DEBUG
- "re: logical %llu size %u list empty %d for %lld",
+ "re: logical %llu size %u list empty %d scheduled %d",
re->logical, fs_info->tree_root->nodesize,
- list_empty(&re->extctl),
- re->scheduled_for ? re->scheduled_for->devid : -1);
+ list_empty(&re->extctl), re->scheduled);
for (i = 0; i < re->nzones; ++i) {
printk(KERN_CONT " zone %llu-%llu devs",
re->zones[i]->start,
re->zones[i]->end);
- for (i = 0; i < re->nzones; ++i) {
- printk(KERN_CONT " zone %llu-%llu devs",
- re->zones[i]->start,
- re->zones[i]->end);
- for (j = 0; j < re->zones[i]->ndevs; ++j) {
- printk(KERN_CONT " %lld",
- re->zones[i]->devs[j]->devid);
- }
+ for (j = 0; j < re->zones[i]->ndevs; ++j) {
+ printk(KERN_CONT " %lld",
+ re->zones[i]->devs[j]->devid);
}
}
printk(KERN_CONT "\n");
- index = (re->logical >> PAGE_CACHE_SHIFT) + 1;
+ index = (re->logical >> PAGE_SHIFT) + 1;
}
spin_unlock(&fs_info->reada_lock);
}
@@ -917,7 +912,6 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
struct reada_control *rc;
u64 start;
u64 generation;
- int level;
int ret;
struct extent_buffer *node;
static struct btrfs_key max_key = {
@@ -926,7 +920,7 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
.offset = (u64)-1
};
- rc = kzalloc(sizeof(*rc), GFP_NOFS);
+ rc = kzalloc(sizeof(*rc), GFP_KERNEL);
if (!rc)
return ERR_PTR(-ENOMEM);
@@ -940,11 +934,10 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
node = btrfs_root_node(root);
start = node->start;
- level = btrfs_header_level(node);
generation = btrfs_header_generation(node);
free_extent_buffer(node);
- ret = reada_add_block(rc, start, &max_key, level, generation);
+ ret = reada_add_block(rc, start, &max_key, generation);
if (ret) {
kfree(rc);
return ERR_PTR(ret);
@@ -959,8 +952,11 @@ struct reada_control *btrfs_reada_add(struct btrfs_root *root,
int btrfs_reada_wait(void *handle)
{
struct reada_control *rc = handle;
+ struct btrfs_fs_info *fs_info = rc->root->fs_info;
while (atomic_read(&rc->elems)) {
+ if (!atomic_read(&fs_info->reada_works_cnt))
+ reada_start_machine(fs_info);
wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
5 * HZ);
dump_devs(rc->root->fs_info,
@@ -977,9 +973,13 @@ int btrfs_reada_wait(void *handle)
int btrfs_reada_wait(void *handle)
{
struct reada_control *rc = handle;
+ struct btrfs_fs_info *fs_info = rc->root->fs_info;
while (atomic_read(&rc->elems)) {
- wait_event(rc->wait, atomic_read(&rc->elems) == 0);
+ if (!atomic_read(&fs_info->reada_works_cnt))
+ reada_start_machine(fs_info);
+ wait_event_timeout(rc->wait, atomic_read(&rc->elems) == 0,
+ (HZ + 9) / 10);
}
kref_put(&rc->refcnt, reada_control_release);
diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 2bd001145..08ef890de 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -1850,6 +1850,7 @@ again:
eb = read_tree_block(dest, old_bytenr, old_ptr_gen);
if (IS_ERR(eb)) {
ret = PTR_ERR(eb);
+ break;
} else if (!extent_buffer_uptodate(eb)) {
ret = -EIO;
free_extent_buffer(eb);
@@ -3129,10 +3130,10 @@ static int relocate_file_extent_cluster(struct inode *inode,
if (ret)
goto out;
- index = (cluster->start - offset) >> PAGE_CACHE_SHIFT;
- last_index = (cluster->end - offset) >> PAGE_CACHE_SHIFT;
+ index = (cluster->start - offset) >> PAGE_SHIFT;
+ last_index = (cluster->end - offset) >> PAGE_SHIFT;
while (index <= last_index) {
- ret = btrfs_delalloc_reserve_metadata(inode, PAGE_CACHE_SIZE);
+ ret = btrfs_delalloc_reserve_metadata(inode, PAGE_SIZE);
if (ret)
goto out;
@@ -3145,7 +3146,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
mask);
if (!page) {
btrfs_delalloc_release_metadata(inode,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
ret = -ENOMEM;
goto out;
}
@@ -3162,16 +3163,16 @@ static int relocate_file_extent_cluster(struct inode *inode,
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
btrfs_delalloc_release_metadata(inode,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
ret = -EIO;
goto out;
}
}
page_start = page_offset(page);
- page_end = page_start + PAGE_CACHE_SIZE - 1;
+ page_end = page_start + PAGE_SIZE - 1;
lock_extent(&BTRFS_I(inode)->io_tree, page_start, page_end);
@@ -3191,7 +3192,7 @@ static int relocate_file_extent_cluster(struct inode *inode,
unlock_extent(&BTRFS_I(inode)->io_tree,
page_start, page_end);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
index++;
balance_dirty_pages_ratelimited(inode->i_mapping);
diff --git a/fs/btrfs/root-tree.c b/fs/btrfs/root-tree.c
index 2c849b08a..9fcd6dfc3 100644
--- a/fs/btrfs/root-tree.c
+++ b/fs/btrfs/root-tree.c
@@ -496,7 +496,7 @@ void btrfs_update_root_times(struct btrfs_trans_handle *trans,
struct btrfs_root *root)
{
struct btrfs_root_item *item = &root->root_item;
- struct timespec ct = CURRENT_TIME;
+ struct timespec ct = current_fs_time(root->fs_info->sb);
spin_lock(&root->root_item_lock);
btrfs_set_root_ctransid(item, trans->transid);
diff --git a/fs/btrfs/scrub.c b/fs/btrfs/scrub.c
index 92bf5ee73..4678f03e8 100644
--- a/fs/btrfs/scrub.c
+++ b/fs/btrfs/scrub.c
@@ -461,7 +461,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
struct btrfs_fs_info *fs_info = dev->dev_root->fs_info;
int ret;
- sctx = kzalloc(sizeof(*sctx), GFP_NOFS);
+ sctx = kzalloc(sizeof(*sctx), GFP_KERNEL);
if (!sctx)
goto nomem;
atomic_set(&sctx->refs, 1);
@@ -472,7 +472,7 @@ struct scrub_ctx *scrub_setup_ctx(struct btrfs_device *dev, int is_dev_replace)
for (i = 0; i < SCRUB_BIOS_PER_SCTX; ++i) {
struct scrub_bio *sbio;
- sbio = kzalloc(sizeof(*sbio), GFP_NOFS);
+ sbio = kzalloc(sizeof(*sbio), GFP_KERNEL);
if (!sbio)
goto nomem;
sctx->bios[i] = sbio;
@@ -611,7 +611,7 @@ static void scrub_print_warning(const char *errstr, struct scrub_block *sblock)
u64 flags = 0;
u64 ref_root;
u32 item_size;
- u8 ref_level;
+ u8 ref_level = 0;
int ret;
WARN_ON(sblock->page_count < 1);
@@ -703,7 +703,7 @@ static int scrub_fixup_readpage(u64 inum, u64 offset, u64 root, void *fixup_ctx)
if (IS_ERR(inode))
return PTR_ERR(inode);
- index = offset >> PAGE_CACHE_SHIFT;
+ index = offset >> PAGE_SHIFT;
page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page) {
@@ -1636,7 +1636,7 @@ static int scrub_write_page_to_dev_replace(struct scrub_block *sblock,
if (spage->io_error) {
void *mapped_buffer = kmap_atomic(spage->page);
- memset(mapped_buffer, 0, PAGE_CACHE_SIZE);
+ memset(mapped_buffer, 0, PAGE_SIZE);
flush_dcache_page(spage->page);
kunmap_atomic(mapped_buffer);
}
@@ -1654,7 +1654,7 @@ static int scrub_add_page_to_wr_bio(struct scrub_ctx *sctx,
again:
if (!wr_ctx->wr_curr_bio) {
wr_ctx->wr_curr_bio = kzalloc(sizeof(*wr_ctx->wr_curr_bio),
- GFP_NOFS);
+ GFP_KERNEL);
if (!wr_ctx->wr_curr_bio) {
mutex_unlock(&wr_ctx->wr_lock);
return -ENOMEM;
@@ -1671,7 +1671,8 @@ again:
sbio->dev = wr_ctx->tgtdev;
bio = sbio->bio;
if (!bio) {
- bio = btrfs_io_bio_alloc(GFP_NOFS, wr_ctx->pages_per_wr_bio);
+ bio = btrfs_io_bio_alloc(GFP_KERNEL,
+ wr_ctx->pages_per_wr_bio);
if (!bio) {
mutex_unlock(&wr_ctx->wr_lock);
return -ENOMEM;
@@ -2076,7 +2077,8 @@ again:
sbio->dev = spage->dev;
bio = sbio->bio;
if (!bio) {
- bio = btrfs_io_bio_alloc(GFP_NOFS, sctx->pages_per_rd_bio);
+ bio = btrfs_io_bio_alloc(GFP_KERNEL,
+ sctx->pages_per_rd_bio);
if (!bio)
return -ENOMEM;
sbio->bio = bio;
@@ -2241,7 +2243,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
struct scrub_block *sblock;
int index;
- sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
+ sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
if (!sblock) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2259,7 +2261,7 @@ static int scrub_pages(struct scrub_ctx *sctx, u64 logical, u64 len,
struct scrub_page *spage;
u64 l = min_t(u64, len, PAGE_SIZE);
- spage = kzalloc(sizeof(*spage), GFP_NOFS);
+ spage = kzalloc(sizeof(*spage), GFP_KERNEL);
if (!spage) {
leave_nomem:
spin_lock(&sctx->stat_lock);
@@ -2286,7 +2288,7 @@ leave_nomem:
spage->have_csum = 0;
}
sblock->page_count++;
- spage->page = alloc_page(GFP_NOFS);
+ spage->page = alloc_page(GFP_KERNEL);
if (!spage->page)
goto leave_nomem;
len -= l;
@@ -2541,7 +2543,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
struct scrub_block *sblock;
int index;
- sblock = kzalloc(sizeof(*sblock), GFP_NOFS);
+ sblock = kzalloc(sizeof(*sblock), GFP_KERNEL);
if (!sblock) {
spin_lock(&sctx->stat_lock);
sctx->stat.malloc_errors++;
@@ -2561,7 +2563,7 @@ static int scrub_pages_for_parity(struct scrub_parity *sparity,
struct scrub_page *spage;
u64 l = min_t(u64, len, PAGE_SIZE);
- spage = kzalloc(sizeof(*spage), GFP_NOFS);
+ spage = kzalloc(sizeof(*spage), GFP_KERNEL);
if (!spage) {
leave_nomem:
spin_lock(&sctx->stat_lock);
@@ -2591,7 +2593,7 @@ leave_nomem:
spage->have_csum = 0;
}
sblock->page_count++;
- spage->page = alloc_page(GFP_NOFS);
+ spage->page = alloc_page(GFP_KERNEL);
if (!spage->page)
goto leave_nomem;
len -= l;
@@ -3857,16 +3859,16 @@ int btrfs_scrub_dev(struct btrfs_fs_info *fs_info, u64 devid, u64 start,
return -EIO;
}
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (dev->scrub_device ||
(!is_dev_replace &&
btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))) {
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
mutex_unlock(&fs_info->scrub_lock);
mutex_unlock(&fs_info->fs_devices->device_list_mutex);
return -EINPROGRESS;
}
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
ret = scrub_workers_get(fs_info, is_dev_replace);
if (ret) {
@@ -4292,8 +4294,8 @@ static int copy_nocow_pages_for_inode(u64 inum, u64 offset, u64 root,
goto out;
}
- while (len >= PAGE_CACHE_SIZE) {
- index = offset >> PAGE_CACHE_SHIFT;
+ while (len >= PAGE_SIZE) {
+ index = offset >> PAGE_SHIFT;
again:
page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
if (!page) {
@@ -4324,7 +4326,7 @@ again:
*/
if (page->mapping != inode->i_mapping) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
goto again;
}
if (!PageUptodate(page)) {
@@ -4346,15 +4348,15 @@ again:
ret = err;
next_page:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (ret)
break;
- offset += PAGE_CACHE_SIZE;
- physical_for_dev_replace += PAGE_CACHE_SIZE;
- nocow_ctx_logical += PAGE_CACHE_SIZE;
- len -= PAGE_CACHE_SIZE;
+ offset += PAGE_SIZE;
+ physical_for_dev_replace += PAGE_SIZE;
+ nocow_ctx_logical += PAGE_SIZE;
+ len -= PAGE_SIZE;
}
ret = COPY_COMPLETE;
out:
@@ -4388,8 +4390,8 @@ static int write_page_nocow(struct scrub_ctx *sctx,
bio->bi_iter.bi_size = 0;
bio->bi_iter.bi_sector = physical_for_dev_replace >> 9;
bio->bi_bdev = dev->bdev;
- ret = bio_add_page(bio, page, PAGE_CACHE_SIZE, 0);
- if (ret != PAGE_CACHE_SIZE) {
+ ret = bio_add_page(bio, page, PAGE_SIZE, 0);
+ if (ret != PAGE_SIZE) {
leave_with_eio:
bio_put(bio);
btrfs_dev_stat_inc_and_print(dev, BTRFS_DEV_STAT_WRITE_ERRS);
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 63a6152be..8d358c547 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -34,6 +34,7 @@
#include "disk-io.h"
#include "btrfs_inode.h"
#include "transaction.h"
+#include "compression.h"
static int g_verbose = 0;
@@ -304,7 +305,7 @@ static struct fs_path *fs_path_alloc(void)
{
struct fs_path *p;
- p = kmalloc(sizeof(*p), GFP_NOFS);
+ p = kmalloc(sizeof(*p), GFP_KERNEL);
if (!p)
return NULL;
p->reversed = 0;
@@ -363,11 +364,11 @@ static int fs_path_ensure_buf(struct fs_path *p, int len)
* First time the inline_buf does not suffice
*/
if (p->buf == p->inline_buf) {
- tmp_buf = kmalloc(len, GFP_NOFS);
+ tmp_buf = kmalloc(len, GFP_KERNEL);
if (tmp_buf)
memcpy(tmp_buf, p->buf, old_buf_len);
} else {
- tmp_buf = krealloc(p->buf, len, GFP_NOFS);
+ tmp_buf = krealloc(p->buf, len, GFP_KERNEL);
}
if (!tmp_buf)
return -ENOMEM;
@@ -995,7 +996,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
* values are small.
*/
buf_len = PATH_MAX;
- buf = kmalloc(buf_len, GFP_NOFS);
+ buf = kmalloc(buf_len, GFP_KERNEL);
if (!buf) {
ret = -ENOMEM;
goto out;
@@ -1042,7 +1043,7 @@ static int iterate_dir_item(struct btrfs_root *root, struct btrfs_path *path,
buf = NULL;
} else {
char *tmp = krealloc(buf, buf_len,
- GFP_NOFS | __GFP_NOWARN);
+ GFP_KERNEL | __GFP_NOWARN);
if (!tmp)
kfree(buf);
@@ -1303,7 +1304,7 @@ static int find_extent_clone(struct send_ctx *sctx,
/* We only use this path under the commit sem */
tmp_path->need_commit_sem = 0;
- backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_NOFS);
+ backref_ctx = kmalloc(sizeof(*backref_ctx), GFP_KERNEL);
if (!backref_ctx) {
ret = -ENOMEM;
goto out;
@@ -1984,7 +1985,7 @@ static int name_cache_insert(struct send_ctx *sctx,
nce_head = radix_tree_lookup(&sctx->name_cache,
(unsigned long)nce->ino);
if (!nce_head) {
- nce_head = kmalloc(sizeof(*nce_head), GFP_NOFS);
+ nce_head = kmalloc(sizeof(*nce_head), GFP_KERNEL);
if (!nce_head) {
kfree(nce);
return -ENOMEM;
@@ -2179,7 +2180,7 @@ out_cache:
/*
* Store the result of the lookup in the name cache.
*/
- nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_NOFS);
+ nce = kmalloc(sizeof(*nce) + fs_path_len(dest) + 1, GFP_KERNEL);
if (!nce) {
ret = -ENOMEM;
goto out;
@@ -2315,7 +2316,7 @@ static int send_subvol_begin(struct send_ctx *sctx)
if (!path)
return -ENOMEM;
- name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_NOFS);
+ name = kmalloc(BTRFS_PATH_NAME_MAX, GFP_KERNEL);
if (!name) {
btrfs_free_path(path);
return -ENOMEM;
@@ -2730,7 +2731,7 @@ static int __record_ref(struct list_head *head, u64 dir,
{
struct recorded_ref *ref;
- ref = kmalloc(sizeof(*ref), GFP_NOFS);
+ ref = kmalloc(sizeof(*ref), GFP_KERNEL);
if (!ref)
return -ENOMEM;
@@ -2755,7 +2756,7 @@ static int dup_ref(struct recorded_ref *ref, struct list_head *list)
{
struct recorded_ref *new;
- new = kmalloc(sizeof(*ref), GFP_NOFS);
+ new = kmalloc(sizeof(*ref), GFP_KERNEL);
if (!new)
return -ENOMEM;
@@ -2818,7 +2819,7 @@ add_orphan_dir_info(struct send_ctx *sctx, u64 dir_ino)
struct rb_node *parent = NULL;
struct orphan_dir_info *entry, *odi;
- odi = kmalloc(sizeof(*odi), GFP_NOFS);
+ odi = kmalloc(sizeof(*odi), GFP_KERNEL);
if (!odi)
return ERR_PTR(-ENOMEM);
odi->ino = dir_ino;
@@ -2973,7 +2974,7 @@ static int add_waiting_dir_move(struct send_ctx *sctx, u64 ino, bool orphanized)
struct rb_node *parent = NULL;
struct waiting_dir_move *entry, *dm;
- dm = kmalloc(sizeof(*dm), GFP_NOFS);
+ dm = kmalloc(sizeof(*dm), GFP_KERNEL);
if (!dm)
return -ENOMEM;
dm->ino = ino;
@@ -3040,7 +3041,7 @@ static int add_pending_dir_move(struct send_ctx *sctx,
int exists = 0;
int ret;
- pm = kmalloc(sizeof(*pm), GFP_NOFS);
+ pm = kmalloc(sizeof(*pm), GFP_KERNEL);
if (!pm)
return -ENOMEM;
pm->parent_ino = parent_ino;
@@ -4280,7 +4281,7 @@ static int __find_xattr(int num, struct btrfs_key *di_key,
strncmp(name, ctx->name, name_len) == 0) {
ctx->found_idx = num;
ctx->found_data_len = data_len;
- ctx->found_data = kmemdup(data, data_len, GFP_NOFS);
+ ctx->found_data = kmemdup(data, data_len, GFP_KERNEL);
if (!ctx->found_data)
return -ENOMEM;
return 1;
@@ -4448,9 +4449,9 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
struct page *page;
char *addr;
struct btrfs_key key;
- pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t index = offset >> PAGE_SHIFT;
pgoff_t last_index;
- unsigned pg_offset = offset & ~PAGE_CACHE_MASK;
+ unsigned pg_offset = offset & ~PAGE_MASK;
ssize_t ret = 0;
key.objectid = sctx->cur_ino;
@@ -4470,7 +4471,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
if (len == 0)
goto out;
- last_index = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ last_index = (offset + len - 1) >> PAGE_SHIFT;
/* initial readahead */
memset(&sctx->ra, 0, sizeof(struct file_ra_state));
@@ -4480,8 +4481,8 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
while (index <= last_index) {
unsigned cur_len = min_t(unsigned, len,
- PAGE_CACHE_SIZE - pg_offset);
- page = find_or_create_page(inode->i_mapping, index, GFP_NOFS);
+ PAGE_SIZE - pg_offset);
+ page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) {
ret = -ENOMEM;
break;
@@ -4492,7 +4493,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
lock_page(page);
if (!PageUptodate(page)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
ret = -EIO;
break;
}
@@ -4502,7 +4503,7 @@ static ssize_t fill_read_buf(struct send_ctx *sctx, u64 offset, u32 len)
memcpy(sctx->read_buf + ret, addr + pg_offset, cur_len);
kunmap(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
index++;
pg_offset = 0;
len -= cur_len;
@@ -4803,7 +4804,7 @@ static int clone_range(struct send_ctx *sctx,
type = btrfs_file_extent_type(leaf, ei);
if (type == BTRFS_FILE_EXTENT_INLINE) {
ext_len = btrfs_file_extent_inline_len(leaf, slot, ei);
- ext_len = PAGE_CACHE_ALIGN(ext_len);
+ ext_len = PAGE_ALIGN(ext_len);
} else {
ext_len = btrfs_file_extent_num_bytes(leaf, ei);
}
@@ -4885,7 +4886,7 @@ static int send_write_or_clone(struct send_ctx *sctx,
* but there may be items after this page. Make
* sure to send the whole thing
*/
- len = PAGE_CACHE_ALIGN(len);
+ len = PAGE_ALIGN(len);
} else {
len = btrfs_file_extent_num_bytes(path->nodes[0], ei);
}
@@ -5989,7 +5990,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
goto out;
}
- sctx = kzalloc(sizeof(struct send_ctx), GFP_NOFS);
+ sctx = kzalloc(sizeof(struct send_ctx), GFP_KERNEL);
if (!sctx) {
ret = -ENOMEM;
goto out;
@@ -5997,7 +5998,7 @@ long btrfs_ioctl_send(struct file *mnt_file, void __user *arg_)
INIT_LIST_HEAD(&sctx->new_refs);
INIT_LIST_HEAD(&sctx->deleted_refs);
- INIT_RADIX_TREE(&sctx->name_cache, GFP_NOFS);
+ INIT_RADIX_TREE(&sctx->name_cache, GFP_KERNEL);
INIT_LIST_HEAD(&sctx->name_cache_list);
sctx->flags = arg->flags;
diff --git a/fs/btrfs/struct-funcs.c b/fs/btrfs/struct-funcs.c
index b976597b0..e05619f24 100644
--- a/fs/btrfs/struct-funcs.c
+++ b/fs/btrfs/struct-funcs.c
@@ -66,7 +66,7 @@ u##bits btrfs_get_token_##bits(struct extent_buffer *eb, void *ptr, \
\
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
- (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
+ (token->offset + PAGE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
res = get_unaligned_le##bits(p + off); \
@@ -104,7 +104,7 @@ void btrfs_set_token_##bits(struct extent_buffer *eb, \
\
if (token && token->kaddr && token->offset <= offset && \
token->eb == eb && \
- (token->offset + PAGE_CACHE_SIZE >= offset + size)) { \
+ (token->offset + PAGE_SIZE >= offset + size)) { \
kaddr = token->kaddr; \
p = kaddr + part_offset - token->offset; \
put_unaligned_le##bits(val, p + off); \
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index d41e09fe8..00b8f37cc 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -303,7 +303,8 @@ enum {
Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
Opt_commit_interval, Opt_barrier, Opt_nodefrag, Opt_nodiscard,
Opt_noenospc_debug, Opt_noflushoncommit, Opt_acl, Opt_datacow,
- Opt_datasum, Opt_treelog, Opt_noinode_cache,
+ Opt_datasum, Opt_treelog, Opt_noinode_cache, Opt_usebackuproot,
+ Opt_nologreplay, Opt_norecovery,
#ifdef CONFIG_BTRFS_DEBUG
Opt_fragment_data, Opt_fragment_metadata, Opt_fragment_all,
#endif
@@ -335,6 +336,8 @@ static const match_table_t tokens = {
{Opt_noacl, "noacl"},
{Opt_notreelog, "notreelog"},
{Opt_treelog, "treelog"},
+ {Opt_nologreplay, "nologreplay"},
+ {Opt_norecovery, "norecovery"},
{Opt_flushoncommit, "flushoncommit"},
{Opt_noflushoncommit, "noflushoncommit"},
{Opt_ratio, "metadata_ratio=%d"},
@@ -352,7 +355,8 @@ static const match_table_t tokens = {
{Opt_inode_cache, "inode_cache"},
{Opt_noinode_cache, "noinode_cache"},
{Opt_no_space_cache, "nospace_cache"},
- {Opt_recovery, "recovery"},
+ {Opt_recovery, "recovery"}, /* deprecated */
+ {Opt_usebackuproot, "usebackuproot"},
{Opt_skip_balance, "skip_balance"},
{Opt_check_integrity, "check_int"},
{Opt_check_integrity_including_extent_data, "check_int_data"},
@@ -373,7 +377,8 @@ static const match_table_t tokens = {
* reading in a new superblock is parsed here.
* XXX JDM: This needs to be cleaned up for remount.
*/
-int btrfs_parse_options(struct btrfs_root *root, char *options)
+int btrfs_parse_options(struct btrfs_root *root, char *options,
+ unsigned long new_flags)
{
struct btrfs_fs_info *info = root->fs_info;
substring_t args[MAX_OPT_ARGS];
@@ -393,8 +398,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
else if (cache_gen)
btrfs_set_opt(info->mount_opt, SPACE_CACHE);
+ /*
+ * Even the options are empty, we still need to do extra check
+ * against new flags
+ */
if (!options)
- goto out;
+ goto check;
/*
* strsep changes the string, duplicate it because parse_options
@@ -606,6 +615,11 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
btrfs_clear_and_info(root, NOTREELOG,
"enabling tree log");
break;
+ case Opt_norecovery:
+ case Opt_nologreplay:
+ btrfs_set_and_info(root, NOLOGREPLAY,
+ "disabling log replay at mount time");
+ break;
case Opt_flushoncommit:
btrfs_set_and_info(root, FLUSHONCOMMIT,
"turning on flush-on-commit");
@@ -696,8 +710,12 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
"disabling auto defrag");
break;
case Opt_recovery:
- btrfs_info(root->fs_info, "enabling auto recovery");
- btrfs_set_opt(info->mount_opt, RECOVERY);
+ btrfs_warn(root->fs_info,
+ "'recovery' is deprecated, use 'usebackuproot' instead");
+ case Opt_usebackuproot:
+ btrfs_info(root->fs_info,
+ "trying to use backup root at mount time");
+ btrfs_set_opt(info->mount_opt, USEBACKUPROOT);
break;
case Opt_skip_balance:
btrfs_set_opt(info->mount_opt, SKIP_BALANCE);
@@ -792,6 +810,15 @@ int btrfs_parse_options(struct btrfs_root *root, char *options)
break;
}
}
+check:
+ /*
+ * Extra check for current option against current flag
+ */
+ if (btrfs_test_opt(root, NOLOGREPLAY) && !(new_flags & MS_RDONLY)) {
+ btrfs_err(root->fs_info,
+ "nologreplay must be used with ro mount option");
+ ret = -EINVAL;
+ }
out:
if (btrfs_fs_compat_ro(root->fs_info, FREE_SPACE_TREE) &&
!btrfs_test_opt(root, FREE_SPACE_TREE) &&
@@ -1202,6 +1229,8 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",ssd");
if (btrfs_test_opt(root, NOTREELOG))
seq_puts(seq, ",notreelog");
+ if (btrfs_test_opt(root, NOLOGREPLAY))
+ seq_puts(seq, ",nologreplay");
if (btrfs_test_opt(root, FLUSHONCOMMIT))
seq_puts(seq, ",flushoncommit");
if (btrfs_test_opt(root, DISCARD))
@@ -1228,8 +1257,6 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry)
seq_puts(seq, ",inode_cache");
if (btrfs_test_opt(root, SKIP_BALANCE))
seq_puts(seq, ",skip_balance");
- if (btrfs_test_opt(root, RECOVERY))
- seq_puts(seq, ",recovery");
#ifdef CONFIG_BTRFS_FS_CHECK_INTEGRITY
if (btrfs_test_opt(root, CHECK_INTEGRITY_INCLUDING_EXTENT_DATA))
seq_puts(seq, ",check_int_data");
@@ -1685,7 +1712,7 @@ static int btrfs_remount(struct super_block *sb, int *flags, char *data)
}
}
- ret = btrfs_parse_options(root, data);
+ ret = btrfs_parse_options(root, data, *flags);
if (ret) {
ret = -EINVAL;
goto restore;
@@ -2163,6 +2190,9 @@ static long btrfs_control_ioctl(struct file *file, unsigned int cmd,
break;
ret = !(fs_devices->num_devices == fs_devices->total_devices);
break;
+ case BTRFS_IOC_GET_SUPPORTED_FEATURES:
+ ret = btrfs_ioctl_get_supported_features((void __user*)arg);
+ break;
}
kfree(vol);
@@ -2261,7 +2291,7 @@ static void btrfs_interface_exit(void)
misc_deregister(&btrfs_misc);
}
-static void btrfs_print_info(void)
+static void btrfs_print_mod_info(void)
{
printk(KERN_INFO "Btrfs loaded"
#ifdef CONFIG_BTRFS_DEBUG
@@ -2363,7 +2393,7 @@ static int __init init_btrfs_fs(void)
btrfs_init_lockdep();
- btrfs_print_info();
+ btrfs_print_mod_info();
err = btrfs_run_sanity_tests();
if (err)
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index d39f714da..f54bf450b 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -137,7 +137,6 @@ static void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
void **slot;
spin_lock(&fs_info->buffer_lock);
-restart:
radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
struct extent_buffer *eb;
@@ -147,7 +146,7 @@ restart:
/* Shouldn't happen but that kind of thinking creates CVE's */
if (radix_tree_exception(eb)) {
if (radix_tree_deref_retry(eb))
- goto restart;
+ slot = radix_tree_iter_retry(&iter);
continue;
}
spin_unlock(&fs_info->buffer_lock);
diff --git a/fs/btrfs/tests/extent-io-tests.c b/fs/btrfs/tests/extent-io-tests.c
index 669b58201..70948b13b 100644
--- a/fs/btrfs/tests/extent-io-tests.c
+++ b/fs/btrfs/tests/extent-io-tests.c
@@ -32,8 +32,8 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
{
int ret;
struct page *pages[16];
- unsigned long index = start >> PAGE_CACHE_SHIFT;
- unsigned long end_index = end >> PAGE_CACHE_SHIFT;
+ unsigned long index = start >> PAGE_SHIFT;
+ unsigned long end_index = end >> PAGE_SHIFT;
unsigned long nr_pages = end_index - index + 1;
int i;
int count = 0;
@@ -49,9 +49,9 @@ static noinline int process_page_range(struct inode *inode, u64 start, u64 end,
count++;
if (flags & PROCESS_UNLOCK && PageLocked(pages[i]))
unlock_page(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
if (flags & PROCESS_RELEASE)
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
nr_pages -= ret;
index += ret;
@@ -93,7 +93,7 @@ static int test_find_delalloc(void)
* everything to make sure our pages don't get evicted and screw up our
* test.
*/
- for (index = 0; index < (total_dirty >> PAGE_CACHE_SHIFT); index++) {
+ for (index = 0; index < (total_dirty >> PAGE_SHIFT); index++) {
page = find_or_create_page(inode->i_mapping, index, GFP_KERNEL);
if (!page) {
test_msg("Failed to allocate test page\n");
@@ -104,7 +104,7 @@ static int test_find_delalloc(void)
if (index) {
unlock_page(page);
} else {
- page_cache_get(page);
+ get_page(page);
locked_page = page;
}
}
@@ -129,7 +129,7 @@ static int test_find_delalloc(void)
}
unlock_extent(&tmp, start, end);
unlock_page(locked_page);
- page_cache_release(locked_page);
+ put_page(locked_page);
/*
* Test this scenario
@@ -139,7 +139,7 @@ static int test_find_delalloc(void)
*/
test_start = SZ_64M;
locked_page = find_lock_page(inode->i_mapping,
- test_start >> PAGE_CACHE_SHIFT);
+ test_start >> PAGE_SHIFT);
if (!locked_page) {
test_msg("Couldn't find the locked page\n");
goto out_bits;
@@ -165,7 +165,7 @@ static int test_find_delalloc(void)
}
unlock_extent(&tmp, start, end);
/* locked_page was unlocked above */
- page_cache_release(locked_page);
+ put_page(locked_page);
/*
* Test this scenario
@@ -174,7 +174,7 @@ static int test_find_delalloc(void)
*/
test_start = max_bytes + 4096;
locked_page = find_lock_page(inode->i_mapping, test_start >>
- PAGE_CACHE_SHIFT);
+ PAGE_SHIFT);
if (!locked_page) {
test_msg("Could'nt find the locked page\n");
goto out_bits;
@@ -225,13 +225,13 @@ static int test_find_delalloc(void)
* range we want to find.
*/
page = find_get_page(inode->i_mapping,
- (max_bytes + SZ_1M) >> PAGE_CACHE_SHIFT);
+ (max_bytes + SZ_1M) >> PAGE_SHIFT);
if (!page) {
test_msg("Couldn't find our page\n");
goto out_bits;
}
ClearPageDirty(page);
- page_cache_release(page);
+ put_page(page);
/* We unlocked it in the previous test */
lock_page(locked_page);
@@ -239,7 +239,7 @@ static int test_find_delalloc(void)
end = 0;
/*
* Currently if we fail to find dirty pages in the delalloc range we
- * will adjust max_bytes down to PAGE_CACHE_SIZE and then re-search. If
+ * will adjust max_bytes down to PAGE_SIZE and then re-search. If
* this changes at any point in the future we will need to fix this
* tests expected behavior.
*/
@@ -249,9 +249,9 @@ static int test_find_delalloc(void)
test_msg("Didn't find our range\n");
goto out_bits;
}
- if (start != test_start && end != test_start + PAGE_CACHE_SIZE - 1) {
+ if (start != test_start && end != test_start + PAGE_SIZE - 1) {
test_msg("Expected start %Lu end %Lu, got start %Lu end %Lu\n",
- test_start, test_start + PAGE_CACHE_SIZE - 1, start,
+ test_start, test_start + PAGE_SIZE - 1, start,
end);
goto out_bits;
}
@@ -265,7 +265,7 @@ out_bits:
clear_extent_bits(&tmp, 0, total_dirty - 1, (unsigned)-1, GFP_KERNEL);
out:
if (locked_page)
- page_cache_release(locked_page);
+ put_page(locked_page);
process_page_range(inode, 0, total_dirty - 1,
PROCESS_UNLOCK | PROCESS_RELEASE);
iput(inode);
@@ -298,9 +298,9 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
return -EINVAL;
}
- bitmap_set(bitmap, (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+ bitmap_set(bitmap, (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
sizeof(long) * BITS_PER_BYTE);
- extent_buffer_bitmap_set(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+ extent_buffer_bitmap_set(eb, PAGE_SIZE - sizeof(long) / 2, 0,
sizeof(long) * BITS_PER_BYTE);
if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
test_msg("Setting straddling pages failed\n");
@@ -309,10 +309,10 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
bitmap_set(bitmap, 0, len * BITS_PER_BYTE);
bitmap_clear(bitmap,
- (PAGE_CACHE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
+ (PAGE_SIZE - sizeof(long) / 2) * BITS_PER_BYTE,
sizeof(long) * BITS_PER_BYTE);
extent_buffer_bitmap_set(eb, 0, 0, len * BITS_PER_BYTE);
- extent_buffer_bitmap_clear(eb, PAGE_CACHE_SIZE - sizeof(long) / 2, 0,
+ extent_buffer_bitmap_clear(eb, PAGE_SIZE - sizeof(long) / 2, 0,
sizeof(long) * BITS_PER_BYTE);
if (memcmp_extent_buffer(eb, bitmap, 0, len) != 0) {
test_msg("Clearing straddling pages failed\n");
@@ -353,7 +353,7 @@ static int __test_eb_bitmaps(unsigned long *bitmap, struct extent_buffer *eb,
static int test_eb_bitmaps(void)
{
- unsigned long len = PAGE_CACHE_SIZE * 4;
+ unsigned long len = PAGE_SIZE * 4;
unsigned long *bitmap;
struct extent_buffer *eb;
int ret;
@@ -379,7 +379,7 @@ static int test_eb_bitmaps(void)
/* Do it over again with an extent buffer which isn't page-aligned. */
free_extent_buffer(eb);
- eb = __alloc_dummy_extent_buffer(NULL, PAGE_CACHE_SIZE / 2, len);
+ eb = __alloc_dummy_extent_buffer(NULL, PAGE_SIZE / 2, len);
if (!eb) {
test_msg("Couldn't allocate test extent buffer\n");
kfree(bitmap);
diff --git a/fs/btrfs/tests/free-space-tests.c b/fs/btrfs/tests/free-space-tests.c
index c9ad97b1e..514247515 100644
--- a/fs/btrfs/tests/free-space-tests.c
+++ b/fs/btrfs/tests/free-space-tests.c
@@ -22,7 +22,7 @@
#include "../disk-io.h"
#include "../free-space-cache.h"
-#define BITS_PER_BITMAP (PAGE_CACHE_SIZE * 8)
+#define BITS_PER_BITMAP (PAGE_SIZE * 8)
/*
* This test just does basic sanity checking, making sure we can add an exten
diff --git a/fs/btrfs/tests/inode-tests.c b/fs/btrfs/tests/inode-tests.c
index e2d3da02d..863a6a3af 100644
--- a/fs/btrfs/tests/inode-tests.c
+++ b/fs/btrfs/tests/inode-tests.c
@@ -22,6 +22,7 @@
#include "../disk-io.h"
#include "../extent_io.h"
#include "../volumes.h"
+#include "../compression.h"
static void insert_extent(struct btrfs_root *root, u64 start, u64 len,
u64 ram_bytes, u64 offset, u64 disk_bytenr,
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index b6031ce47..43885e51b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -637,6 +637,8 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
trans->block_rsv = &root->fs_info->trans_block_rsv;
trans->bytes_reserved = num_bytes;
+ trace_btrfs_space_reservation(root->fs_info, "transaction",
+ trans->transid, num_bytes, 1);
return trans;
}
@@ -1333,7 +1335,7 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
struct dentry *dentry;
struct extent_buffer *tmp;
struct extent_buffer *old;
- struct timespec cur_time = CURRENT_TIME;
+ struct timespec cur_time;
int ret = 0;
u64 to_reserve = 0;
u64 index = 0;
@@ -1375,12 +1377,16 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
rsv = trans->block_rsv;
trans->block_rsv = &pending->block_rsv;
trans->bytes_reserved = trans->block_rsv->reserved;
-
+ trace_btrfs_space_reservation(root->fs_info, "transaction",
+ trans->transid,
+ trans->bytes_reserved, 1);
dentry = pending->dentry;
parent_inode = pending->dir;
parent_root = BTRFS_I(parent_inode)->root;
record_root_in_trans(trans, parent_root);
+ cur_time = current_fs_time(parent_inode->i_sb);
+
/*
* insert the directory item
*/
@@ -1523,7 +1529,8 @@ static noinline int create_pending_snapshot(struct btrfs_trans_handle *trans,
btrfs_i_size_write(parent_inode, parent_inode->i_size +
dentry->d_name.len * 2);
- parent_inode->i_mtime = parent_inode->i_ctime = CURRENT_TIME;
+ parent_inode->i_mtime = parent_inode->i_ctime =
+ current_fs_time(parent_inode->i_sb);
ret = btrfs_update_inode_fallback(trans, parent_root, parent_inode);
if (ret) {
btrfs_abort_transaction(trans, root, ret);
diff --git a/fs/btrfs/tree-log.c b/fs/btrfs/tree-log.c
index 849a30aa1..517d0ccb3 100644
--- a/fs/btrfs/tree-log.c
+++ b/fs/btrfs/tree-log.c
@@ -26,6 +26,7 @@
#include "print-tree.h"
#include "backref.h"
#include "hash.h"
+#include "compression.h"
/* magic values for the inode_only field in btrfs_log_inode:
*
@@ -1045,7 +1046,7 @@ again:
/*
* NOTE: we have searched root tree and checked the
- * coresponding ref, it does not need to check again.
+ * corresponding ref, it does not need to check again.
*/
*search_done = 1;
}
@@ -4621,7 +4622,22 @@ static int btrfs_log_inode(struct btrfs_trans_handle *trans,
mutex_lock(&BTRFS_I(inode)->log_mutex);
- btrfs_get_logged_extents(inode, &logged_list, start, end);
+ /*
+ * Collect ordered extents only if we are logging data. This is to
+ * ensure a subsequent request to log this inode in LOG_INODE_ALL mode
+ * will process the ordered extents if they still exists at the time,
+ * because when we collect them we test and set for the flag
+ * BTRFS_ORDERED_LOGGED to prevent multiple log requests to process the
+ * same ordered extents. The consequence for the LOG_INODE_ALL log mode
+ * not processing the ordered extents is that we end up logging the
+ * corresponding file extent items, based on the extent maps in the
+ * inode's extent_map_tree's modified_list, without logging the
+ * respective checksums (since the may still be only attached to the
+ * ordered extents and have not been inserted in the csum tree by
+ * btrfs_finish_ordered_io() yet).
+ */
+ if (inode_only == LOG_INODE_ALL)
+ btrfs_get_logged_extents(inode, &logged_list, start, end);
/*
* a brute force approach to making sure we get the most uptodate
@@ -4909,6 +4925,42 @@ out_unlock:
}
/*
+ * Check if we must fallback to a transaction commit when logging an inode.
+ * This must be called after logging the inode and is used only in the context
+ * when fsyncing an inode requires the need to log some other inode - in which
+ * case we can't lock the i_mutex of each other inode we need to log as that
+ * can lead to deadlocks with concurrent fsync against other inodes (as we can
+ * log inodes up or down in the hierarchy) or rename operations for example. So
+ * we take the log_mutex of the inode after we have logged it and then check for
+ * its last_unlink_trans value - this is safe because any task setting
+ * last_unlink_trans must take the log_mutex and it must do this before it does
+ * the actual unlink operation, so if we do this check before a concurrent task
+ * sets last_unlink_trans it means we've logged a consistent version/state of
+ * all the inode items, otherwise we are not sure and must do a transaction
+ * commit (the concurrent task migth have only updated last_unlink_trans before
+ * we logged the inode or it might have also done the unlink).
+ */
+static bool btrfs_must_commit_transaction(struct btrfs_trans_handle *trans,
+ struct inode *inode)
+{
+ struct btrfs_fs_info *fs_info = BTRFS_I(inode)->root->fs_info;
+ bool ret = false;
+
+ mutex_lock(&BTRFS_I(inode)->log_mutex);
+ if (BTRFS_I(inode)->last_unlink_trans > fs_info->last_trans_committed) {
+ /*
+ * Make sure any commits to the log are forced to be full
+ * commits.
+ */
+ btrfs_set_log_full_commit(fs_info, trans);
+ ret = true;
+ }
+ mutex_unlock(&BTRFS_I(inode)->log_mutex);
+
+ return ret;
+}
+
+/*
* follow the dentry parent pointers up the chain and see if any
* of the directories in it require a full commit before they can
* be logged. Returns zero if nothing special needs to be done or 1 if
@@ -4921,7 +4973,6 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
u64 last_committed)
{
int ret = 0;
- struct btrfs_root *root;
struct dentry *old_parent = NULL;
struct inode *orig_inode = inode;
@@ -4953,14 +5004,7 @@ static noinline int check_parent_dirs_for_sync(struct btrfs_trans_handle *trans,
BTRFS_I(inode)->logged_trans = trans->transid;
smp_mb();
- if (BTRFS_I(inode)->last_unlink_trans > last_committed) {
- root = BTRFS_I(inode)->root;
-
- /*
- * make sure any commits to the log are forced
- * to be full commits
- */
- btrfs_set_log_full_commit(root->fs_info, trans);
+ if (btrfs_must_commit_transaction(trans, inode)) {
ret = 1;
break;
}
@@ -5119,6 +5163,9 @@ process_leaf:
btrfs_release_path(path);
ret = btrfs_log_inode(trans, root, di_inode,
log_mode, 0, LLONG_MAX, ctx);
+ if (!ret &&
+ btrfs_must_commit_transaction(trans, di_inode))
+ ret = 1;
iput(di_inode);
if (ret)
goto next_dir_inode;
@@ -5233,6 +5280,9 @@ static int btrfs_log_all_parents(struct btrfs_trans_handle *trans,
ret = btrfs_log_inode(trans, root, dir_inode,
LOG_INODE_ALL, 0, LLONG_MAX, ctx);
+ if (!ret &&
+ btrfs_must_commit_transaction(trans, dir_inode))
+ ret = 1;
iput(dir_inode);
if (ret)
goto out;
@@ -5584,6 +5634,9 @@ error:
* They revolve around files there were unlinked from the directory, and
* this function updates the parent directory so that a full commit is
* properly done if it is fsync'd later after the unlinks are done.
+ *
+ * Must be called before the unlink operations (updates to the subvolume tree,
+ * inodes, etc) are done.
*/
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct inode *dir, struct inode *inode,
@@ -5599,8 +5652,11 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
* into the file. When the file is logged we check it and
* don't log the parents if the file is fully on disk.
*/
- if (S_ISREG(inode->i_mode))
+ if (S_ISREG(inode->i_mode)) {
+ mutex_lock(&BTRFS_I(inode)->log_mutex);
BTRFS_I(inode)->last_unlink_trans = trans->transid;
+ mutex_unlock(&BTRFS_I(inode)->log_mutex);
+ }
/*
* if this directory was already logged any new
@@ -5631,7 +5687,29 @@ void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
return;
record:
+ mutex_lock(&BTRFS_I(dir)->log_mutex);
+ BTRFS_I(dir)->last_unlink_trans = trans->transid;
+ mutex_unlock(&BTRFS_I(dir)->log_mutex);
+}
+
+/*
+ * Make sure that if someone attempts to fsync the parent directory of a deleted
+ * snapshot, it ends up triggering a transaction commit. This is to guarantee
+ * that after replaying the log tree of the parent directory's root we will not
+ * see the snapshot anymore and at log replay time we will not see any log tree
+ * corresponding to the deleted snapshot's root, which could lead to replaying
+ * it after replaying the log tree of the parent directory (which would replay
+ * the snapshot delete operation).
+ *
+ * Must be called before the actual snapshot destroy operation (updates to the
+ * parent root and tree of tree roots trees, etc) are done.
+ */
+void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ struct inode *dir)
+{
+ mutex_lock(&BTRFS_I(dir)->log_mutex);
BTRFS_I(dir)->last_unlink_trans = trans->transid;
+ mutex_unlock(&BTRFS_I(dir)->log_mutex);
}
/*
diff --git a/fs/btrfs/tree-log.h b/fs/btrfs/tree-log.h
index 6916a781e..a9f1b75d0 100644
--- a/fs/btrfs/tree-log.h
+++ b/fs/btrfs/tree-log.h
@@ -79,6 +79,8 @@ int btrfs_pin_log_trans(struct btrfs_root *root);
void btrfs_record_unlink_dir(struct btrfs_trans_handle *trans,
struct inode *dir, struct inode *inode,
int for_rename);
+void btrfs_record_snapshot_destroy(struct btrfs_trans_handle *trans,
+ struct inode *dir);
int btrfs_log_new_name(struct btrfs_trans_handle *trans,
struct inode *inode, struct inode *old_dir,
struct dentry *parent);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 366b33594..bd0f45fb3 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -138,7 +138,7 @@ static struct btrfs_fs_devices *__alloc_fs_devices(void)
{
struct btrfs_fs_devices *fs_devs;
- fs_devs = kzalloc(sizeof(*fs_devs), GFP_NOFS);
+ fs_devs = kzalloc(sizeof(*fs_devs), GFP_KERNEL);
if (!fs_devs)
return ERR_PTR(-ENOMEM);
@@ -220,7 +220,7 @@ static struct btrfs_device *__alloc_device(void)
{
struct btrfs_device *dev;
- dev = kzalloc(sizeof(*dev), GFP_NOFS);
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
if (!dev)
return ERR_PTR(-ENOMEM);
@@ -733,7 +733,8 @@ static struct btrfs_fs_devices *clone_fs_devices(struct btrfs_fs_devices *orig)
* uuid mutex so nothing we touch in here is going to disappear.
*/
if (orig_dev->name) {
- name = rcu_string_strdup(orig_dev->name->str, GFP_NOFS);
+ name = rcu_string_strdup(orig_dev->name->str,
+ GFP_KERNEL);
if (!name) {
kfree(device);
goto error;
@@ -1024,16 +1025,16 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
}
/* make sure our super fits in the device */
- if (bytenr + PAGE_CACHE_SIZE >= i_size_read(bdev->bd_inode))
+ if (bytenr + PAGE_SIZE >= i_size_read(bdev->bd_inode))
goto error_bdev_put;
/* make sure our super fits in the page */
- if (sizeof(*disk_super) > PAGE_CACHE_SIZE)
+ if (sizeof(*disk_super) > PAGE_SIZE)
goto error_bdev_put;
/* make sure our super doesn't straddle pages on disk */
- index = bytenr >> PAGE_CACHE_SHIFT;
- if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_CACHE_SHIFT != index)
+ index = bytenr >> PAGE_SHIFT;
+ if ((bytenr + sizeof(*disk_super) - 1) >> PAGE_SHIFT != index)
goto error_bdev_put;
/* pull in the page with our super */
@@ -1046,7 +1047,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
p = kmap(page);
/* align our pointer to the offset of the super block */
- disk_super = p + (bytenr & ~PAGE_CACHE_MASK);
+ disk_super = p + (bytenr & ~PAGE_MASK);
if (btrfs_super_bytenr(disk_super) != bytenr ||
btrfs_super_magic(disk_super) != BTRFS_MAGIC)
@@ -1074,7 +1075,7 @@ int btrfs_scan_one_device(const char *path, fmode_t flags, void *holder,
error_unmap:
kunmap(page);
- page_cache_release(page);
+ put_page(page);
error_bdev_put:
blkdev_put(bdev, flags);
@@ -1714,12 +1715,12 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path)
} while (read_seqretry(&root->fs_info->profiles_lock, seq));
num_devices = root->fs_info->fs_devices->num_devices;
- btrfs_dev_replace_lock(&root->fs_info->dev_replace);
+ btrfs_dev_replace_lock(&root->fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&root->fs_info->dev_replace)) {
WARN_ON(num_devices < 1);
num_devices--;
}
- btrfs_dev_replace_unlock(&root->fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&root->fs_info->dev_replace, 0);
if ((all_avail & BTRFS_BLOCK_GROUP_RAID10) && num_devices <= 4) {
ret = BTRFS_ERROR_DEV_RAID10_MIN_NOT_MET;
@@ -2287,7 +2288,7 @@ int btrfs_init_new_device(struct btrfs_root *root, char *device_path)
goto error;
}
- name = rcu_string_strdup(device_path, GFP_NOFS);
+ name = rcu_string_strdup(device_path, GFP_KERNEL);
if (!name) {
kfree(device);
ret = -ENOMEM;
@@ -2748,7 +2749,7 @@ int btrfs_remove_chunk(struct btrfs_trans_handle *trans,
em->start + em->len < chunk_offset) {
/*
* This is a logic error, but we don't want to just rely on the
- * user having built with ASSERT enabled, so if ASSERT doens't
+ * user having built with ASSERT enabled, so if ASSERT doesn't
* do anything we still error out.
*/
ASSERT(0);
@@ -2966,7 +2967,7 @@ static int insert_balance_item(struct btrfs_root *root,
}
key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.type = BTRFS_TEMPORARY_ITEM_KEY;
key.offset = 0;
ret = btrfs_insert_empty_item(trans, root, path, &key,
@@ -3015,7 +3016,7 @@ static int del_balance_item(struct btrfs_root *root)
}
key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.type = BTRFS_TEMPORARY_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(trans, root, &key, path, -1, 1);
@@ -3686,12 +3687,12 @@ int btrfs_balance(struct btrfs_balance_control *bctl,
}
num_devices = fs_info->fs_devices->num_devices;
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace)) {
BUG_ON(num_devices < 1);
num_devices--;
}
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
allowed = BTRFS_AVAIL_ALLOC_BIT_SINGLE;
if (num_devices == 1)
allowed |= BTRFS_BLOCK_GROUP_DUP;
@@ -3867,7 +3868,7 @@ int btrfs_recover_balance(struct btrfs_fs_info *fs_info)
return -ENOMEM;
key.objectid = BTRFS_BALANCE_OBJECTID;
- key.type = BTRFS_BALANCE_ITEM_KEY;
+ key.type = BTRFS_TEMPORARY_ITEM_KEY;
key.offset = 0;
ret = btrfs_search_slot(NULL, fs_info->tree_root, &key, path, 0, 0);
@@ -4118,7 +4119,7 @@ out:
* Callback for btrfs_uuid_tree_iterate().
* returns:
* 0 check succeeded, the entry is not outdated.
- * < 0 if an error occured.
+ * < 0 if an error occurred.
* > 0 if the check failed, which means the caller shall remove the entry.
*/
static int btrfs_check_uuid_tree_entry(struct btrfs_fs_info *fs_info,
@@ -5062,10 +5063,10 @@ int btrfs_num_copies(struct btrfs_fs_info *fs_info, u64 logical, u64 len)
ret = 1;
free_extent_map(em);
- btrfs_dev_replace_lock(&fs_info->dev_replace);
+ btrfs_dev_replace_lock(&fs_info->dev_replace, 0);
if (btrfs_dev_replace_is_ongoing(&fs_info->dev_replace))
ret++;
- btrfs_dev_replace_unlock(&fs_info->dev_replace);
+ btrfs_dev_replace_unlock(&fs_info->dev_replace, 0);
return ret;
}
@@ -5325,10 +5326,12 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
if (!bbio_ret)
goto out;
- btrfs_dev_replace_lock(dev_replace);
+ btrfs_dev_replace_lock(dev_replace, 0);
dev_replace_is_ongoing = btrfs_dev_replace_is_ongoing(dev_replace);
if (!dev_replace_is_ongoing)
- btrfs_dev_replace_unlock(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
+ else
+ btrfs_dev_replace_set_lock_blocking(dev_replace);
if (dev_replace_is_ongoing && mirror_num == map->num_stripes + 1 &&
!(rw & (REQ_WRITE | REQ_DISCARD | REQ_GET_READ_MIRRORS)) &&
@@ -5751,8 +5754,10 @@ static int __btrfs_map_block(struct btrfs_fs_info *fs_info, int rw,
bbio->mirror_num = map->num_stripes + 1;
}
out:
- if (dev_replace_is_ongoing)
- btrfs_dev_replace_unlock(dev_replace);
+ if (dev_replace_is_ongoing) {
+ btrfs_dev_replace_clear_lock_blocking(dev_replace);
+ btrfs_dev_replace_unlock(dev_replace, 0);
+ }
free_extent_map(em);
return ret;
}
@@ -6522,7 +6527,7 @@ int btrfs_read_sys_array(struct btrfs_root *root)
* but sb spans only this function. Add an explicit SetPageUptodate call
* to silence the warning eg. on PowerPC 64.
*/
- if (PAGE_CACHE_SIZE > BTRFS_SUPER_INFO_SIZE)
+ if (PAGE_SIZE > BTRFS_SUPER_INFO_SIZE)
SetPageUptodate(sb->pages[0]);
write_extent_buffer(sb, super_copy, 0, BTRFS_SUPER_INFO_SIZE);
@@ -6705,8 +6710,8 @@ int btrfs_init_dev_stats(struct btrfs_fs_info *fs_info)
int item_size;
struct btrfs_dev_stats_item *ptr;
- key.objectid = 0;
- key.type = BTRFS_DEV_STATS_KEY;
+ key.objectid = BTRFS_DEV_STATS_OBJECTID;
+ key.type = BTRFS_PERSISTENT_ITEM_KEY;
key.offset = device->devid;
ret = btrfs_search_slot(NULL, dev_root, &key, path, 0, 0);
if (ret) {
@@ -6753,8 +6758,8 @@ static int update_dev_stat_item(struct btrfs_trans_handle *trans,
int ret;
int i;
- key.objectid = 0;
- key.type = BTRFS_DEV_STATS_KEY;
+ key.objectid = BTRFS_DEV_STATS_OBJECTID;
+ key.type = BTRFS_PERSISTENT_ITEM_KEY;
key.offset = device->devid;
path = btrfs_alloc_path();
diff --git a/fs/btrfs/xattr.c b/fs/btrfs/xattr.c
index 6c68d6356..145d2b89e 100644
--- a/fs/btrfs/xattr.c
+++ b/fs/btrfs/xattr.c
@@ -249,7 +249,7 @@ int __btrfs_setxattr(struct btrfs_trans_handle *trans,
goto out;
inode_inc_iversion(inode);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
set_bit(BTRFS_INODE_COPY_EVERYTHING, &BTRFS_I(inode)->runtime_flags);
ret = btrfs_update_inode(trans, root, inode);
BUG_ON(ret);
@@ -260,16 +260,12 @@ out:
ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
{
- struct btrfs_key key, found_key;
+ struct btrfs_key key;
struct inode *inode = d_inode(dentry);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_path *path;
- struct extent_buffer *leaf;
- struct btrfs_dir_item *di;
- int ret = 0, slot;
+ int ret = 0;
size_t total_size = 0, size_left = size;
- unsigned long name_ptr;
- size_t name_len;
/*
* ok we want all objects associated with this id.
@@ -291,6 +287,13 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
goto err;
while (1) {
+ struct extent_buffer *leaf;
+ int slot;
+ struct btrfs_dir_item *di;
+ struct btrfs_key found_key;
+ u32 item_size;
+ u32 cur;
+
leaf = path->nodes[0];
slot = path->slots[0];
@@ -316,31 +319,45 @@ ssize_t btrfs_listxattr(struct dentry *dentry, char *buffer, size_t size)
if (found_key.type > BTRFS_XATTR_ITEM_KEY)
break;
if (found_key.type < BTRFS_XATTR_ITEM_KEY)
- goto next;
+ goto next_item;
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
- if (verify_dir_item(root, leaf, di))
- goto next;
-
- name_len = btrfs_dir_name_len(leaf, di);
- total_size += name_len + 1;
+ item_size = btrfs_item_size_nr(leaf, slot);
+ cur = 0;
+ while (cur < item_size) {
+ u16 name_len = btrfs_dir_name_len(leaf, di);
+ u16 data_len = btrfs_dir_data_len(leaf, di);
+ u32 this_len = sizeof(*di) + name_len + data_len;
+ unsigned long name_ptr = (unsigned long)(di + 1);
+
+ if (verify_dir_item(root, leaf, di)) {
+ ret = -EIO;
+ goto err;
+ }
- /* we are just looking for how big our buffer needs to be */
- if (!size)
- goto next;
+ total_size += name_len + 1;
+ /*
+ * We are just looking for how big our buffer needs to
+ * be.
+ */
+ if (!size)
+ goto next;
- if (!buffer || (name_len + 1) > size_left) {
- ret = -ERANGE;
- goto err;
- }
+ if (!buffer || (name_len + 1) > size_left) {
+ ret = -ERANGE;
+ goto err;
+ }
- name_ptr = (unsigned long)(di + 1);
- read_extent_buffer(leaf, buffer, name_ptr, name_len);
- buffer[name_len] = '\0';
+ read_extent_buffer(leaf, buffer, name_ptr, name_len);
+ buffer[name_len] = '\0';
- size_left -= name_len + 1;
- buffer += name_len + 1;
+ size_left -= name_len + 1;
+ buffer += name_len + 1;
next:
+ cur += this_len;
+ di = (struct btrfs_dir_item *)((char *)di + this_len);
+ }
+next_item:
path->slots[0]++;
}
ret = total_size;
diff --git a/fs/btrfs/zlib.c b/fs/btrfs/zlib.c
index 82990b8f8..88d274e8e 100644
--- a/fs/btrfs/zlib.c
+++ b/fs/btrfs/zlib.c
@@ -59,7 +59,7 @@ static struct list_head *zlib_alloc_workspace(void)
workspacesize = max(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
zlib_inflate_workspacesize());
workspace->strm.workspace = vmalloc(workspacesize);
- workspace->buf = kmalloc(PAGE_CACHE_SIZE, GFP_NOFS);
+ workspace->buf = kmalloc(PAGE_SIZE, GFP_NOFS);
if (!workspace->strm.workspace || !workspace->buf)
goto fail;
@@ -103,7 +103,7 @@ static int zlib_compress_pages(struct list_head *ws,
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
- in_page = find_get_page(mapping, start >> PAGE_CACHE_SHIFT);
+ in_page = find_get_page(mapping, start >> PAGE_SHIFT);
data_in = kmap(in_page);
out_page = alloc_page(GFP_NOFS | __GFP_HIGHMEM);
@@ -117,8 +117,8 @@ static int zlib_compress_pages(struct list_head *ws,
workspace->strm.next_in = data_in;
workspace->strm.next_out = cpage_out;
- workspace->strm.avail_out = PAGE_CACHE_SIZE;
- workspace->strm.avail_in = min(len, PAGE_CACHE_SIZE);
+ workspace->strm.avail_out = PAGE_SIZE;
+ workspace->strm.avail_in = min(len, PAGE_SIZE);
while (workspace->strm.total_in < len) {
ret = zlib_deflate(&workspace->strm, Z_SYNC_FLUSH);
@@ -156,7 +156,7 @@ static int zlib_compress_pages(struct list_head *ws,
cpage_out = kmap(out_page);
pages[nr_pages] = out_page;
nr_pages++;
- workspace->strm.avail_out = PAGE_CACHE_SIZE;
+ workspace->strm.avail_out = PAGE_SIZE;
workspace->strm.next_out = cpage_out;
}
/* we're all done */
@@ -170,14 +170,14 @@ static int zlib_compress_pages(struct list_head *ws,
bytes_left = len - workspace->strm.total_in;
kunmap(in_page);
- page_cache_release(in_page);
+ put_page(in_page);
- start += PAGE_CACHE_SIZE;
+ start += PAGE_SIZE;
in_page = find_get_page(mapping,
- start >> PAGE_CACHE_SHIFT);
+ start >> PAGE_SHIFT);
data_in = kmap(in_page);
workspace->strm.avail_in = min(bytes_left,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
workspace->strm.next_in = data_in;
}
}
@@ -205,7 +205,7 @@ out:
if (in_page) {
kunmap(in_page);
- page_cache_release(in_page);
+ put_page(in_page);
}
return ret;
}
@@ -223,18 +223,18 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
size_t total_out = 0;
unsigned long page_in_index = 0;
unsigned long page_out_index = 0;
- unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_CACHE_SIZE);
+ unsigned long total_pages_in = DIV_ROUND_UP(srclen, PAGE_SIZE);
unsigned long buf_start;
unsigned long pg_offset;
data_in = kmap(pages_in[page_in_index]);
workspace->strm.next_in = data_in;
- workspace->strm.avail_in = min_t(size_t, srclen, PAGE_CACHE_SIZE);
+ workspace->strm.avail_in = min_t(size_t, srclen, PAGE_SIZE);
workspace->strm.total_in = 0;
workspace->strm.total_out = 0;
workspace->strm.next_out = workspace->buf;
- workspace->strm.avail_out = PAGE_CACHE_SIZE;
+ workspace->strm.avail_out = PAGE_SIZE;
pg_offset = 0;
/* If it's deflate, and it's got no preset dictionary, then
@@ -274,7 +274,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
}
workspace->strm.next_out = workspace->buf;
- workspace->strm.avail_out = PAGE_CACHE_SIZE;
+ workspace->strm.avail_out = PAGE_SIZE;
if (workspace->strm.avail_in == 0) {
unsigned long tmp;
@@ -288,7 +288,7 @@ static int zlib_decompress_biovec(struct list_head *ws, struct page **pages_in,
workspace->strm.next_in = data_in;
tmp = srclen - workspace->strm.total_in;
workspace->strm.avail_in = min(tmp,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
}
}
if (ret != Z_STREAM_END)
@@ -325,7 +325,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
workspace->strm.total_in = 0;
workspace->strm.next_out = workspace->buf;
- workspace->strm.avail_out = PAGE_CACHE_SIZE;
+ workspace->strm.avail_out = PAGE_SIZE;
workspace->strm.total_out = 0;
/* If it's deflate, and it's got no preset dictionary, then
we can tell zlib to skip the adler32 check. */
@@ -368,8 +368,8 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
else
buf_offset = 0;
- bytes = min(PAGE_CACHE_SIZE - pg_offset,
- PAGE_CACHE_SIZE - buf_offset);
+ bytes = min(PAGE_SIZE - pg_offset,
+ PAGE_SIZE - buf_offset);
bytes = min(bytes, bytes_left);
kaddr = kmap_atomic(dest_page);
@@ -380,7 +380,7 @@ static int zlib_decompress(struct list_head *ws, unsigned char *data_in,
bytes_left -= bytes;
next:
workspace->strm.next_out = workspace->buf;
- workspace->strm.avail_out = PAGE_CACHE_SIZE;
+ workspace->strm.avail_out = PAGE_SIZE;
}
if (ret != Z_STREAM_END && bytes_left != 0)
diff --git a/fs/buffer.c b/fs/buffer.c
index e1632abb4..af0d9a82a 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -129,7 +129,7 @@ __clear_page_buffers(struct page *page)
{
ClearPagePrivate(page);
set_page_private(page, 0);
- page_cache_release(page);
+ put_page(page);
}
static void buffer_io_error(struct buffer_head *bh, char *msg)
@@ -207,7 +207,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
struct page *page;
int all_mapped = 1;
- index = block >> (PAGE_CACHE_SHIFT - bd_inode->i_blkbits);
+ index = block >> (PAGE_SHIFT - bd_inode->i_blkbits);
page = find_get_page_flags(bd_mapping, index, FGP_ACCESSED);
if (!page)
goto out;
@@ -245,7 +245,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block)
}
out_unlock:
spin_unlock(&bd_mapping->private_lock);
- page_cache_release(page);
+ put_page(page);
out:
return ret;
}
@@ -621,17 +621,17 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode);
* If warn is true, then emit a warning if the page is not uptodate and has
* not been truncated.
*
- * The caller must hold mem_cgroup_begin_page_stat() lock.
+ * The caller must hold lock_page_memcg().
*/
static void __set_page_dirty(struct page *page, struct address_space *mapping,
- struct mem_cgroup *memcg, int warn)
+ int warn)
{
unsigned long flags;
spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(warn && !PageUptodate(page));
- account_page_dirtied(page, mapping, memcg);
+ account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
@@ -666,7 +666,6 @@ static void __set_page_dirty(struct page *page, struct address_space *mapping,
int __set_page_dirty_buffers(struct page *page)
{
int newly_dirty;
- struct mem_cgroup *memcg;
struct address_space *mapping = page_mapping(page);
if (unlikely(!mapping))
@@ -683,17 +682,17 @@ int __set_page_dirty_buffers(struct page *page)
} while (bh != head);
}
/*
- * Use mem_group_begin_page_stat() to keep PageDirty synchronized with
- * per-memcg dirty page counters.
+ * Lock out page->mem_cgroup migration to keep PageDirty
+ * synchronized with per-memcg dirty page counters.
*/
- memcg = mem_cgroup_begin_page_stat(page);
+ lock_page_memcg(page);
newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);
if (newly_dirty)
- __set_page_dirty(page, mapping, memcg, 1);
+ __set_page_dirty(page, mapping, 1);
- mem_cgroup_end_page_stat(memcg);
+ unlock_page_memcg(page);
if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
@@ -1041,7 +1040,7 @@ done:
ret = (block < end_block) ? 1 : -ENXIO;
failed:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return ret;
}
@@ -1167,15 +1166,14 @@ void mark_buffer_dirty(struct buffer_head *bh)
if (!test_set_buffer_dirty(bh)) {
struct page *page = bh->b_page;
struct address_space *mapping = NULL;
- struct mem_cgroup *memcg;
- memcg = mem_cgroup_begin_page_stat(page);
+ lock_page_memcg(page);
if (!TestSetPageDirty(page)) {
mapping = page_mapping(page);
if (mapping)
- __set_page_dirty(page, mapping, memcg, 0);
+ __set_page_dirty(page, mapping, 0);
}
- mem_cgroup_end_page_stat(memcg);
+ unlock_page_memcg(page);
if (mapping)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
}
@@ -1535,7 +1533,7 @@ void block_invalidatepage(struct page *page, unsigned int offset,
/*
* Check for overflow
*/
- BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+ BUG_ON(stop > PAGE_SIZE || stop < length);
head = page_buffers(page);
bh = head;
@@ -1718,7 +1716,7 @@ static int __block_write_full_page(struct inode *inode, struct page *page,
blocksize = bh->b_size;
bbits = block_size_bits(blocksize);
- block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+ block = (sector_t)page->index << (PAGE_SHIFT - bbits);
last_block = (i_size_read(inode) - 1) >> bbits;
/*
@@ -1896,7 +1894,7 @@ EXPORT_SYMBOL(page_zero_new_buffers);
int __block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
{
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
struct inode *inode = page->mapping->host;
unsigned block_start, block_end;
@@ -1906,15 +1904,15 @@ int __block_write_begin(struct page *page, loff_t pos, unsigned len,
struct buffer_head *bh, *head, *wait[2], **wait_bh=wait;
BUG_ON(!PageLocked(page));
- BUG_ON(from > PAGE_CACHE_SIZE);
- BUG_ON(to > PAGE_CACHE_SIZE);
+ BUG_ON(from > PAGE_SIZE);
+ BUG_ON(to > PAGE_SIZE);
BUG_ON(from > to);
head = create_page_buffers(page, inode, 0);
blocksize = head->b_size;
bbits = block_size_bits(blocksize);
- block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+ block = (sector_t)page->index << (PAGE_SHIFT - bbits);
for(bh = head, block_start = 0; bh != head || !block_start;
block++, block_start=block_end, bh = bh->b_this_page) {
@@ -2022,7 +2020,7 @@ static int __block_commit_write(struct inode *inode, struct page *page,
int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
unsigned flags, struct page **pagep, get_block_t *get_block)
{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
struct page *page;
int status;
@@ -2033,7 +2031,7 @@ int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len,
status = __block_write_begin(page, pos, len, get_block);
if (unlikely(status)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
page = NULL;
}
@@ -2049,7 +2047,7 @@ int block_write_end(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
unsigned start;
- start = pos & (PAGE_CACHE_SIZE - 1);
+ start = pos & (PAGE_SIZE - 1);
if (unlikely(copied < len)) {
/*
@@ -2101,7 +2099,7 @@ int generic_write_end(struct file *file, struct address_space *mapping,
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
@@ -2138,9 +2136,9 @@ int block_is_partially_uptodate(struct page *page, unsigned long from,
head = page_buffers(page);
blocksize = head->b_size;
- to = min_t(unsigned, PAGE_CACHE_SIZE - from, count);
+ to = min_t(unsigned, PAGE_SIZE - from, count);
to = from + to;
- if (from < blocksize && to > PAGE_CACHE_SIZE - blocksize)
+ if (from < blocksize && to > PAGE_SIZE - blocksize)
return 0;
bh = head;
@@ -2183,7 +2181,7 @@ int block_read_full_page(struct page *page, get_block_t *get_block)
blocksize = head->b_size;
bbits = block_size_bits(blocksize);
- iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+ iblock = (sector_t)page->index << (PAGE_SHIFT - bbits);
lblock = (i_size_read(inode)+blocksize-1) >> bbits;
bh = head;
nr = 0;
@@ -2297,16 +2295,16 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
unsigned zerofrom, offset, len;
int err = 0;
- index = pos >> PAGE_CACHE_SHIFT;
- offset = pos & ~PAGE_CACHE_MASK;
+ index = pos >> PAGE_SHIFT;
+ offset = pos & ~PAGE_MASK;
- while (index > (curidx = (curpos = *bytes)>>PAGE_CACHE_SHIFT)) {
- zerofrom = curpos & ~PAGE_CACHE_MASK;
+ while (index > (curidx = (curpos = *bytes)>>PAGE_SHIFT)) {
+ zerofrom = curpos & ~PAGE_MASK;
if (zerofrom & (blocksize-1)) {
*bytes |= (blocksize-1);
(*bytes)++;
}
- len = PAGE_CACHE_SIZE - zerofrom;
+ len = PAGE_SIZE - zerofrom;
err = pagecache_write_begin(file, mapping, curpos, len,
AOP_FLAG_UNINTERRUPTIBLE,
@@ -2331,7 +2329,7 @@ static int cont_expand_zero(struct file *file, struct address_space *mapping,
/* page covers the boundary, find the boundary offset */
if (index == curidx) {
- zerofrom = curpos & ~PAGE_CACHE_MASK;
+ zerofrom = curpos & ~PAGE_MASK;
/* if we will expand the thing last block will be filled */
if (offset <= zerofrom) {
goto out;
@@ -2377,7 +2375,7 @@ int cont_write_begin(struct file *file, struct address_space *mapping,
if (err)
return err;
- zerofrom = *bytes & ~PAGE_CACHE_MASK;
+ zerofrom = *bytes & ~PAGE_MASK;
if (pos+len > *bytes && zerofrom & (blocksize-1)) {
*bytes |= (blocksize-1);
(*bytes)++;
@@ -2432,10 +2430,10 @@ int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf,
}
/* page is wholly or partially inside EOF */
- if (((page->index + 1) << PAGE_CACHE_SHIFT) > size)
- end = size & ~PAGE_CACHE_MASK;
+ if (((page->index + 1) << PAGE_SHIFT) > size)
+ end = size & ~PAGE_MASK;
else
- end = PAGE_CACHE_SIZE;
+ end = PAGE_SIZE;
ret = __block_write_begin(page, 0, end, get_block);
if (!ret)
@@ -2510,8 +2508,8 @@ int nobh_write_begin(struct address_space *mapping,
int ret = 0;
int is_mapped_to_disk = 1;
- index = pos >> PAGE_CACHE_SHIFT;
- from = pos & (PAGE_CACHE_SIZE - 1);
+ index = pos >> PAGE_SHIFT;
+ from = pos & (PAGE_SIZE - 1);
to = from + len;
page = grab_cache_page_write_begin(mapping, index, flags);
@@ -2545,7 +2543,7 @@ int nobh_write_begin(struct address_space *mapping,
goto out_release;
}
- block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+ block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
/*
* We loop across all blocks in the page, whether or not they are
@@ -2553,7 +2551,7 @@ int nobh_write_begin(struct address_space *mapping,
* page is fully mapped-to-disk.
*/
for (block_start = 0, block_in_page = 0, bh = head;
- block_start < PAGE_CACHE_SIZE;
+ block_start < PAGE_SIZE;
block_in_page++, block_start += blocksize, bh = bh->b_this_page) {
int create;
@@ -2625,7 +2623,7 @@ failed:
out_release:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
*pagep = NULL;
return ret;
@@ -2655,7 +2653,7 @@ int nobh_write_end(struct file *file, struct address_space *mapping,
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
while (head) {
bh = head;
@@ -2677,7 +2675,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
- const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ const pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned offset;
int ret;
@@ -2686,7 +2684,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
goto out;
/* Is the page fully outside i_size? (truncate in progress) */
- offset = i_size & (PAGE_CACHE_SIZE-1);
+ offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
/*
* The page may have dirty, unmapped buffers. For example,
@@ -2709,7 +2707,7 @@ int nobh_writepage(struct page *page, get_block_t *get_block,
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+ zero_user_segment(page, offset, PAGE_SIZE);
out:
ret = mpage_writepage(page, get_block, wbc);
if (ret == -EAGAIN)
@@ -2722,8 +2720,8 @@ EXPORT_SYMBOL(nobh_writepage);
int nobh_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{
- pgoff_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ pgoff_t index = from >> PAGE_SHIFT;
+ unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize;
sector_t iblock;
unsigned length, pos;
@@ -2740,7 +2738,7 @@ int nobh_truncate_page(struct address_space *mapping,
return 0;
length = blocksize - length;
- iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
page = grab_cache_page(mapping, index);
err = -ENOMEM;
@@ -2750,7 +2748,7 @@ int nobh_truncate_page(struct address_space *mapping,
if (page_has_buffers(page)) {
has_buffers:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return block_truncate_page(mapping, from, get_block);
}
@@ -2774,7 +2772,7 @@ has_buffers:
if (!PageUptodate(page)) {
err = mapping->a_ops->readpage(NULL, page);
if (err) {
- page_cache_release(page);
+ put_page(page);
goto out;
}
lock_page(page);
@@ -2791,7 +2789,7 @@ has_buffers:
unlock:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
out:
return err;
}
@@ -2800,8 +2798,8 @@ EXPORT_SYMBOL(nobh_truncate_page);
int block_truncate_page(struct address_space *mapping,
loff_t from, get_block_t *get_block)
{
- pgoff_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ pgoff_t index = from >> PAGE_SHIFT;
+ unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize;
sector_t iblock;
unsigned length, pos;
@@ -2818,7 +2816,7 @@ int block_truncate_page(struct address_space *mapping,
return 0;
length = blocksize - length;
- iblock = (sector_t)index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ iblock = (sector_t)index << (PAGE_SHIFT - inode->i_blkbits);
page = grab_cache_page(mapping, index);
err = -ENOMEM;
@@ -2867,7 +2865,7 @@ int block_truncate_page(struct address_space *mapping,
unlock:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
out:
return err;
}
@@ -2881,7 +2879,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
{
struct inode * const inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
- const pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ const pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned offset;
/* Is the page fully inside i_size? */
@@ -2890,14 +2888,14 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
end_buffer_async_write);
/* Is the page fully outside i_size? (truncate in progress) */
- offset = i_size & (PAGE_CACHE_SIZE-1);
+ offset = i_size & (PAGE_SIZE-1);
if (page->index >= end_index+1 || !offset) {
/*
* The page may have dirty, unmapped buffers. For example,
* they may have been added in ext3_writepage(). Make them
* freeable here, so the page does not leak.
*/
- do_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ do_invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
return 0; /* don't care */
}
@@ -2909,7 +2907,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block,
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+ zero_user_segment(page, offset, PAGE_SIZE);
return __block_write_full_page(inode, page, get_block, wbc,
end_buffer_async_write);
}
diff --git a/fs/cachefiles/daemon.c b/fs/cachefiles/daemon.c
index 452e98dd7..1ee54ffd3 100644
--- a/fs/cachefiles/daemon.c
+++ b/fs/cachefiles/daemon.c
@@ -162,6 +162,8 @@ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer,
size_t buflen, loff_t *pos)
{
struct cachefiles_cache *cache = file->private_data;
+ unsigned long long b_released;
+ unsigned f_released;
char buffer[256];
int n;
@@ -174,6 +176,8 @@ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer,
cachefiles_has_space(cache, 0, 0);
/* summarise */
+ f_released = atomic_xchg(&cache->f_released, 0);
+ b_released = atomic_long_xchg(&cache->b_released, 0);
clear_bit(CACHEFILES_STATE_CHANGED, &cache->flags);
n = snprintf(buffer, sizeof(buffer),
@@ -183,15 +187,18 @@ static ssize_t cachefiles_daemon_read(struct file *file, char __user *_buffer,
" fstop=%llx"
" brun=%llx"
" bcull=%llx"
- " bstop=%llx",
+ " bstop=%llx"
+ " freleased=%x"
+ " breleased=%llx",
test_bit(CACHEFILES_CULLING, &cache->flags) ? '1' : '0',
(unsigned long long) cache->frun,
(unsigned long long) cache->fcull,
(unsigned long long) cache->fstop,
(unsigned long long) cache->brun,
(unsigned long long) cache->bcull,
- (unsigned long long) cache->bstop
- );
+ (unsigned long long) cache->bstop,
+ f_released,
+ b_released);
if (n > buflen)
return -EMSGSIZE;
diff --git a/fs/cachefiles/interface.c b/fs/cachefiles/interface.c
index 675a3332d..861d611b8 100644
--- a/fs/cachefiles/interface.c
+++ b/fs/cachefiles/interface.c
@@ -291,15 +291,8 @@ static void cachefiles_drop_object(struct fscache_object *_object)
}
/* note that the object is now inactive */
- if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags)) {
- write_lock(&cache->active_lock);
- if (!test_and_clear_bit(CACHEFILES_OBJECT_ACTIVE,
- &object->flags))
- BUG();
- rb_erase(&object->active_node, &cache->active_nodes);
- wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
- write_unlock(&cache->active_lock);
- }
+ if (test_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags))
+ cachefiles_mark_object_inactive(cache, object);
dput(object->dentry);
object->dentry = NULL;
diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h
index 9c4b737a5..2fcde1a34 100644
--- a/fs/cachefiles/internal.h
+++ b/fs/cachefiles/internal.h
@@ -66,6 +66,8 @@ struct cachefiles_cache {
struct rb_root active_nodes; /* active nodes (can't be culled) */
rwlock_t active_lock; /* lock for active_nodes */
atomic_t gravecounter; /* graveyard uniquifier */
+ atomic_t f_released; /* number of objects released lately */
+ atomic_long_t b_released; /* number of blocks released lately */
unsigned frun_percent; /* when to stop culling (% files) */
unsigned fcull_percent; /* when to start culling (% files) */
unsigned fstop_percent; /* when to stop allocating (% files) */
@@ -157,6 +159,8 @@ extern char *cachefiles_cook_key(const u8 *raw, int keylen, uint8_t type);
/*
* namei.c
*/
+extern void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
+ struct cachefiles_object *object);
extern int cachefiles_delete_object(struct cachefiles_cache *cache,
struct cachefiles_object *object);
extern int cachefiles_walk_to_object(struct cachefiles_object *parent,
diff --git a/fs/cachefiles/namei.c b/fs/cachefiles/namei.c
index 1c2334c16..4ae75006e 100644
--- a/fs/cachefiles/namei.c
+++ b/fs/cachefiles/namei.c
@@ -258,6 +258,28 @@ requeue:
}
/*
+ * Mark an object as being inactive.
+ */
+void cachefiles_mark_object_inactive(struct cachefiles_cache *cache,
+ struct cachefiles_object *object)
+{
+ write_lock(&cache->active_lock);
+ rb_erase(&object->active_node, &cache->active_nodes);
+ clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
+ write_unlock(&cache->active_lock);
+
+ wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
+
+ /* This object can now be culled, so we need to let the daemon know
+ * that there is something it can remove if it needs to.
+ */
+ atomic_long_add(d_backing_inode(object->dentry)->i_blocks,
+ &cache->b_released);
+ if (atomic_inc_return(&cache->f_released))
+ cachefiles_state_changed(cache);
+}
+
+/*
* delete an object representation from the cache
* - file backed objects are unlinked
* - directory backed objects are stuffed into the graveyard for userspace to
@@ -684,11 +706,7 @@ mark_active_timed_out:
check_error:
_debug("check error %d", ret);
- write_lock(&cache->active_lock);
- rb_erase(&object->active_node, &cache->active_nodes);
- clear_bit(CACHEFILES_OBJECT_ACTIVE, &object->flags);
- wake_up_bit(&object->flags, CACHEFILES_OBJECT_ACTIVE);
- write_unlock(&cache->active_lock);
+ cachefiles_mark_object_inactive(cache, object);
release_dentry:
dput(object->dentry);
object->dentry = NULL;
diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c
index c0f3da392..afbdc4189 100644
--- a/fs/cachefiles/rdwr.c
+++ b/fs/cachefiles/rdwr.c
@@ -194,10 +194,10 @@ static void cachefiles_read_copier(struct fscache_operation *_op)
error = -EIO;
}
- page_cache_release(monitor->back_page);
+ put_page(monitor->back_page);
fscache_end_io(op, monitor->netfs_page, error);
- page_cache_release(monitor->netfs_page);
+ put_page(monitor->netfs_page);
fscache_retrieval_complete(op, 1);
fscache_put_retrieval(op);
kfree(monitor);
@@ -288,8 +288,8 @@ monitor_backing_page:
_debug("- monitor add");
/* install the monitor */
- page_cache_get(monitor->netfs_page);
- page_cache_get(backpage);
+ get_page(monitor->netfs_page);
+ get_page(backpage);
monitor->back_page = backpage;
monitor->monitor.private = backpage;
add_page_wait_queue(backpage, &monitor->monitor);
@@ -310,7 +310,7 @@ backing_page_already_present:
_debug("- present");
if (newpage) {
- page_cache_release(newpage);
+ put_page(newpage);
newpage = NULL;
}
@@ -342,7 +342,7 @@ success:
out:
if (backpage)
- page_cache_release(backpage);
+ put_page(backpage);
if (monitor) {
fscache_put_retrieval(monitor->op);
kfree(monitor);
@@ -363,7 +363,7 @@ io_error:
goto out;
nomem_page:
- page_cache_release(newpage);
+ put_page(newpage);
nomem_monitor:
fscache_put_retrieval(monitor->op);
kfree(monitor);
@@ -530,7 +530,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
netpage->index, cachefiles_gfp);
if (ret < 0) {
if (ret == -EEXIST) {
- page_cache_release(netpage);
+ put_page(netpage);
fscache_retrieval_complete(op, 1);
continue;
}
@@ -538,10 +538,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
}
/* install a monitor */
- page_cache_get(netpage);
+ get_page(netpage);
monitor->netfs_page = netpage;
- page_cache_get(backpage);
+ get_page(backpage);
monitor->back_page = backpage;
monitor->monitor.private = backpage;
add_page_wait_queue(backpage, &monitor->monitor);
@@ -555,10 +555,10 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
unlock_page(backpage);
}
- page_cache_release(backpage);
+ put_page(backpage);
backpage = NULL;
- page_cache_release(netpage);
+ put_page(netpage);
netpage = NULL;
continue;
@@ -603,7 +603,7 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
netpage->index, cachefiles_gfp);
if (ret < 0) {
if (ret == -EEXIST) {
- page_cache_release(netpage);
+ put_page(netpage);
fscache_retrieval_complete(op, 1);
continue;
}
@@ -612,14 +612,14 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
copy_highpage(netpage, backpage);
- page_cache_release(backpage);
+ put_page(backpage);
backpage = NULL;
fscache_mark_page_cached(op, netpage);
/* the netpage is unlocked and marked up to date here */
fscache_end_io(op, netpage, 0);
- page_cache_release(netpage);
+ put_page(netpage);
netpage = NULL;
fscache_retrieval_complete(op, 1);
continue;
@@ -632,11 +632,11 @@ static int cachefiles_read_backing_file(struct cachefiles_object *object,
out:
/* tidy up */
if (newpage)
- page_cache_release(newpage);
+ put_page(newpage);
if (netpage)
- page_cache_release(netpage);
+ put_page(netpage);
if (backpage)
- page_cache_release(backpage);
+ put_page(backpage);
if (monitor) {
fscache_put_retrieval(op);
kfree(monitor);
@@ -644,7 +644,7 @@ out:
list_for_each_entry_safe(netpage, _n, list, lru) {
list_del(&netpage->lru);
- page_cache_release(netpage);
+ put_page(netpage);
fscache_retrieval_complete(op, 1);
}
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 19adeb0ef..4801571f5 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -143,7 +143,7 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
inode = page->mapping->host;
ci = ceph_inode(inode);
- if (offset != 0 || length != PAGE_CACHE_SIZE) {
+ if (offset != 0 || length != PAGE_SIZE) {
dout("%p invalidatepage %p idx %lu partial dirty page %u~%u\n",
inode, page, page->index, offset, length);
return;
@@ -175,8 +175,8 @@ static void ceph_invalidatepage(struct page *page, unsigned int offset,
static int ceph_releasepage(struct page *page, gfp_t g)
{
- struct inode *inode = page->mapping ? page->mapping->host : NULL;
- dout("%p releasepage %p idx %lu\n", inode, page, page->index);
+ dout("%p releasepage %p idx %lu\n", page->mapping->host,
+ page, page->index);
WARN_ON(PageDirty(page));
/* Can we release the page from the cache? */
@@ -197,10 +197,10 @@ static int readpage_nounlock(struct file *filp, struct page *page)
&ceph_inode_to_client(inode)->client->osdc;
int err = 0;
u64 off = page_offset(page);
- u64 len = PAGE_CACHE_SIZE;
+ u64 len = PAGE_SIZE;
if (off >= i_size_read(inode)) {
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
return 0;
}
@@ -212,7 +212,7 @@ static int readpage_nounlock(struct file *filp, struct page *page)
*/
if (off == 0)
return -EINVAL;
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
return 0;
}
@@ -234,9 +234,9 @@ static int readpage_nounlock(struct file *filp, struct page *page)
ceph_fscache_readpage_cancel(inode, page);
goto out;
}
- if (err < PAGE_CACHE_SIZE)
+ if (err < PAGE_SIZE)
/* zero fill remainder of page */
- zero_user_segment(page, err, PAGE_CACHE_SIZE);
+ zero_user_segment(page, err, PAGE_SIZE);
else
flush_dcache_page(page);
@@ -276,12 +276,12 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
for (i = 0; i < num_pages; i++) {
struct page *page = osd_data->pages[i];
- if (rc < 0 && rc != ENOENT)
+ if (rc < 0 && rc != -ENOENT)
goto unlock;
- if (bytes < (int)PAGE_CACHE_SIZE) {
+ if (bytes < (int)PAGE_SIZE) {
/* zero (remainder of) page */
int s = bytes < 0 ? 0 : bytes;
- zero_user_segment(page, s, PAGE_CACHE_SIZE);
+ zero_user_segment(page, s, PAGE_SIZE);
}
dout("finish_read %p uptodate %p idx %lu\n", inode, page,
page->index);
@@ -290,8 +290,8 @@ static void finish_read(struct ceph_osd_request *req, struct ceph_msg *msg)
ceph_readpage_to_fscache(inode, page);
unlock:
unlock_page(page);
- page_cache_release(page);
- bytes -= PAGE_CACHE_SIZE;
+ put_page(page);
+ bytes -= PAGE_SIZE;
}
kfree(osd_data->pages);
}
@@ -336,7 +336,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
if (max && nr_pages == max)
break;
}
- len = nr_pages << PAGE_CACHE_SHIFT;
+ len = nr_pages << PAGE_SHIFT;
dout("start_read %p nr_pages %d is %lld~%lld\n", inode, nr_pages,
off, len);
vino = ceph_vino(inode);
@@ -364,7 +364,7 @@ static int start_read(struct inode *inode, struct list_head *page_list, int max)
if (add_to_page_cache_lru(page, &inode->i_data, page->index,
GFP_KERNEL)) {
ceph_fscache_uncache_page(inode, page);
- page_cache_release(page);
+ put_page(page);
dout("start_read %p add_to_page_cache failed %p\n",
inode, page);
nr_pages = i;
@@ -415,8 +415,8 @@ static int ceph_readpages(struct file *file, struct address_space *mapping,
if (rc == 0)
goto out;
- if (fsc->mount_options->rsize >= PAGE_CACHE_SIZE)
- max = (fsc->mount_options->rsize + PAGE_CACHE_SIZE - 1)
+ if (fsc->mount_options->rsize >= PAGE_SIZE)
+ max = (fsc->mount_options->rsize + PAGE_SIZE - 1)
>> PAGE_SHIFT;
dout("readpages %p file %p nr_pages %d max %d\n", inode,
@@ -484,7 +484,7 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
long writeback_stat;
u64 truncate_size;
u32 truncate_seq;
- int err = 0, len = PAGE_CACHE_SIZE;
+ int err = 0, len = PAGE_SIZE;
dout("writepage %p idx %lu\n", page, page->index);
@@ -606,71 +606,71 @@ static void writepages_finish(struct ceph_osd_request *req,
struct inode *inode = req->r_inode;
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_osd_data *osd_data;
- unsigned wrote;
struct page *page;
- int num_pages;
- int i;
+ int num_pages, total_pages = 0;
+ int i, j;
+ int rc = req->r_result;
struct ceph_snap_context *snapc = req->r_snapc;
struct address_space *mapping = inode->i_mapping;
- int rc = req->r_result;
- u64 bytes = req->r_ops[0].extent.length;
struct ceph_fs_client *fsc = ceph_inode_to_client(inode);
- long writeback_stat;
- unsigned issued = ceph_caps_issued(ci);
+ bool remove_page;
- osd_data = osd_req_op_extent_osd_data(req, 0);
- BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
- num_pages = calc_pages_for((u64)osd_data->alignment,
- (u64)osd_data->length);
- if (rc >= 0) {
- /*
- * Assume we wrote the pages we originally sent. The
- * osd might reply with fewer pages if our writeback
- * raced with a truncation and was adjusted at the osd,
- * so don't believe the reply.
- */
- wrote = num_pages;
- } else {
- wrote = 0;
+
+ dout("writepages_finish %p rc %d\n", inode, rc);
+ if (rc < 0)
mapping_set_error(mapping, rc);
- }
- dout("writepages_finish %p rc %d bytes %llu wrote %d (pages)\n",
- inode, rc, bytes, wrote);
- /* clean all pages */
- for (i = 0; i < num_pages; i++) {
- page = osd_data->pages[i];
- BUG_ON(!page);
- WARN_ON(!PageUptodate(page));
+ /*
+ * We lost the cache cap, need to truncate the page before
+ * it is unlocked, otherwise we'd truncate it later in the
+ * page truncation thread, possibly losing some data that
+ * raced its way in
+ */
+ remove_page = !(ceph_caps_issued(ci) &
+ (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO));
- writeback_stat =
- atomic_long_dec_return(&fsc->writeback_count);
- if (writeback_stat <
- CONGESTION_OFF_THRESH(fsc->mount_options->congestion_kb))
- clear_bdi_congested(&fsc->backing_dev_info,
- BLK_RW_ASYNC);
+ /* clean all pages */
+ for (i = 0; i < req->r_num_ops; i++) {
+ if (req->r_ops[i].op != CEPH_OSD_OP_WRITE)
+ break;
- ceph_put_snap_context(page_snap_context(page));
- page->private = 0;
- ClearPagePrivate(page);
- dout("unlocking %d %p\n", i, page);
- end_page_writeback(page);
+ osd_data = osd_req_op_extent_osd_data(req, i);
+ BUG_ON(osd_data->type != CEPH_OSD_DATA_TYPE_PAGES);
+ num_pages = calc_pages_for((u64)osd_data->alignment,
+ (u64)osd_data->length);
+ total_pages += num_pages;
+ for (j = 0; j < num_pages; j++) {
+ page = osd_data->pages[j];
+ BUG_ON(!page);
+ WARN_ON(!PageUptodate(page));
+
+ if (atomic_long_dec_return(&fsc->writeback_count) <
+ CONGESTION_OFF_THRESH(
+ fsc->mount_options->congestion_kb))
+ clear_bdi_congested(&fsc->backing_dev_info,
+ BLK_RW_ASYNC);
+
+ ceph_put_snap_context(page_snap_context(page));
+ page->private = 0;
+ ClearPagePrivate(page);
+ dout("unlocking %p\n", page);
+ end_page_writeback(page);
+
+ if (remove_page)
+ generic_error_remove_page(inode->i_mapping,
+ page);
- /*
- * We lost the cache cap, need to truncate the page before
- * it is unlocked, otherwise we'd truncate it later in the
- * page truncation thread, possibly losing some data that
- * raced its way in
- */
- if ((issued & (CEPH_CAP_FILE_CACHE|CEPH_CAP_FILE_LAZYIO)) == 0)
- generic_error_remove_page(inode->i_mapping, page);
+ unlock_page(page);
+ }
+ dout("writepages_finish %p wrote %llu bytes cleaned %d pages\n",
+ inode, osd_data->length, rc >= 0 ? num_pages : 0);
- unlock_page(page);
+ ceph_release_pages(osd_data->pages, num_pages);
}
- dout("%p wrote+cleaned %d pages\n", inode, wrote);
- ceph_put_wrbuffer_cap_refs(ci, num_pages, snapc);
- ceph_release_pages(osd_data->pages, num_pages);
+ ceph_put_wrbuffer_cap_refs(ci, total_pages, snapc);
+
+ osd_data = osd_req_op_extent_osd_data(req, 0);
if (osd_data->pages_from_pool)
mempool_free(osd_data->pages,
ceph_sb_to_client(inode->i_sb)->wb_pagevec_pool);
@@ -725,9 +725,9 @@ static int ceph_writepages_start(struct address_space *mapping,
}
if (fsc->mount_options->wsize && fsc->mount_options->wsize < wsize)
wsize = fsc->mount_options->wsize;
- if (wsize < PAGE_CACHE_SIZE)
- wsize = PAGE_CACHE_SIZE;
- max_pages_ever = wsize >> PAGE_CACHE_SHIFT;
+ if (wsize < PAGE_SIZE)
+ wsize = PAGE_SIZE;
+ max_pages_ever = wsize >> PAGE_SHIFT;
pagevec_init(&pvec, 0);
@@ -737,8 +737,8 @@ static int ceph_writepages_start(struct address_space *mapping,
end = -1;
dout(" cyclic, start at %lu\n", start);
} else {
- start = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ start = wbc->range_start >> PAGE_SHIFT;
+ end = wbc->range_end >> PAGE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
should_loop = 0;
@@ -778,17 +778,15 @@ retry:
while (!done && index <= end) {
unsigned i;
int first;
- pgoff_t next;
- int pvec_pages, locked_pages;
- struct page **pages = NULL;
+ pgoff_t strip_unit_end = 0;
+ int num_ops = 0, op_idx;
+ int pvec_pages, locked_pages = 0;
+ struct page **pages = NULL, **data_pages;
mempool_t *pool = NULL; /* Becomes non-null if mempool used */
struct page *page;
int want;
- u64 offset, len;
- long writeback_stat;
+ u64 offset = 0, len = 0;
- next = 0;
- locked_pages = 0;
max_pages = max_pages_ever;
get_more_pages:
@@ -824,8 +822,8 @@ get_more_pages:
unlock_page(page);
break;
}
- if (next && (page->index != next)) {
- dout("not consecutive %p\n", page);
+ if (strip_unit_end && (page->index > strip_unit_end)) {
+ dout("end of strip unit %p\n", page);
unlock_page(page);
break;
}
@@ -867,36 +865,31 @@ get_more_pages:
/*
* We have something to write. If this is
* the first locked page this time through,
- * allocate an osd request and a page array
- * that it will use.
+ * calculate max possinle write size and
+ * allocate a page array
*/
if (locked_pages == 0) {
- BUG_ON(pages);
+ u64 objnum;
+ u64 objoff;
+
/* prepare async write request */
offset = (u64)page_offset(page);
len = wsize;
- req = ceph_osdc_new_request(&fsc->client->osdc,
- &ci->i_layout, vino,
- offset, &len, 0,
- do_sync ? 2 : 1,
- CEPH_OSD_OP_WRITE,
- CEPH_OSD_FLAG_WRITE |
- CEPH_OSD_FLAG_ONDISK,
- snapc, truncate_seq,
- truncate_size, true);
- if (IS_ERR(req)) {
- rc = PTR_ERR(req);
+
+ rc = ceph_calc_file_object_mapping(&ci->i_layout,
+ offset, len,
+ &objnum, &objoff,
+ &len);
+ if (rc < 0) {
unlock_page(page);
break;
}
- if (do_sync)
- osd_req_op_init(req, 1,
- CEPH_OSD_OP_STARTSYNC, 0);
-
- req->r_callback = writepages_finish;
- req->r_inode = inode;
+ num_ops = 1 + do_sync;
+ strip_unit_end = page->index +
+ ((len - 1) >> PAGE_SHIFT);
+ BUG_ON(pages);
max_pages = calc_pages_for(0, (u64)len);
pages = kmalloc(max_pages * sizeof (*pages),
GFP_NOFS);
@@ -905,6 +898,20 @@ get_more_pages:
pages = mempool_alloc(pool, GFP_NOFS);
BUG_ON(!pages);
}
+
+ len = 0;
+ } else if (page->index !=
+ (offset + len) >> PAGE_SHIFT) {
+ if (num_ops >= (pool ? CEPH_OSD_SLAB_OPS :
+ CEPH_OSD_MAX_OPS)) {
+ redirty_page_for_writepage(wbc, page);
+ unlock_page(page);
+ break;
+ }
+
+ num_ops++;
+ offset = (u64)page_offset(page);
+ len = 0;
}
/* note position of first page in pvec */
@@ -913,18 +920,16 @@ get_more_pages:
dout("%p will write page %p idx %lu\n",
inode, page, page->index);
- writeback_stat =
- atomic_long_inc_return(&fsc->writeback_count);
- if (writeback_stat > CONGESTION_ON_THRESH(
+ if (atomic_long_inc_return(&fsc->writeback_count) >
+ CONGESTION_ON_THRESH(
fsc->mount_options->congestion_kb)) {
set_bdi_congested(&fsc->backing_dev_info,
BLK_RW_ASYNC);
}
- set_page_writeback(page);
pages[locked_pages] = page;
locked_pages++;
- next = page->index + 1;
+ len += PAGE_SIZE;
}
/* did we get anything? */
@@ -944,38 +949,119 @@ get_more_pages:
/* shift unused pages over in the pvec... we
* will need to release them below. */
for (j = i; j < pvec_pages; j++) {
- dout(" pvec leftover page %p\n",
- pvec.pages[j]);
+ dout(" pvec leftover page %p\n", pvec.pages[j]);
pvec.pages[j-i+first] = pvec.pages[j];
}
pvec.nr -= i-first;
}
- /* Format the osd request message and submit the write */
+new_request:
offset = page_offset(pages[0]);
- len = (u64)locked_pages << PAGE_CACHE_SHIFT;
- if (snap_size == -1) {
- len = min(len, (u64)i_size_read(inode) - offset);
- /* writepages_finish() clears writeback pages
- * according to the data length, so make sure
- * data length covers all locked pages */
- len = max(len, 1 +
- ((u64)(locked_pages - 1) << PAGE_CACHE_SHIFT));
- } else {
- len = min(len, snap_size - offset);
+ len = wsize;
+
+ req = ceph_osdc_new_request(&fsc->client->osdc,
+ &ci->i_layout, vino,
+ offset, &len, 0, num_ops,
+ CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_WRITE |
+ CEPH_OSD_FLAG_ONDISK,
+ snapc, truncate_seq,
+ truncate_size, false);
+ if (IS_ERR(req)) {
+ req = ceph_osdc_new_request(&fsc->client->osdc,
+ &ci->i_layout, vino,
+ offset, &len, 0,
+ min(num_ops,
+ CEPH_OSD_SLAB_OPS),
+ CEPH_OSD_OP_WRITE,
+ CEPH_OSD_FLAG_WRITE |
+ CEPH_OSD_FLAG_ONDISK,
+ snapc, truncate_seq,
+ truncate_size, true);
+ BUG_ON(IS_ERR(req));
}
- dout("writepages got %d pages at %llu~%llu\n",
- locked_pages, offset, len);
+ BUG_ON(len < page_offset(pages[locked_pages - 1]) +
+ PAGE_SIZE - offset);
+
+ req->r_callback = writepages_finish;
+ req->r_inode = inode;
- osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0,
+ /* Format the osd request message and submit the write */
+ len = 0;
+ data_pages = pages;
+ op_idx = 0;
+ for (i = 0; i < locked_pages; i++) {
+ u64 cur_offset = page_offset(pages[i]);
+ if (offset + len != cur_offset) {
+ if (op_idx + do_sync + 1 == req->r_num_ops)
+ break;
+ osd_req_op_extent_dup_last(req, op_idx,
+ cur_offset - offset);
+ dout("writepages got pages at %llu~%llu\n",
+ offset, len);
+ osd_req_op_extent_osd_data_pages(req, op_idx,
+ data_pages, len, 0,
!!pool, false);
+ osd_req_op_extent_update(req, op_idx, len);
- pages = NULL; /* request message now owns the pages array */
- pool = NULL;
+ len = 0;
+ offset = cur_offset;
+ data_pages = pages + i;
+ op_idx++;
+ }
- /* Update the write op length in case we changed it */
+ set_page_writeback(pages[i]);
+ len += PAGE_SIZE;
+ }
+
+ if (snap_size != -1) {
+ len = min(len, snap_size - offset);
+ } else if (i == locked_pages) {
+ /* writepages_finish() clears writeback pages
+ * according to the data length, so make sure
+ * data length covers all locked pages */
+ u64 min_len = len + 1 - PAGE_SIZE;
+ len = min(len, (u64)i_size_read(inode) - offset);
+ len = max(len, min_len);
+ }
+ dout("writepages got pages at %llu~%llu\n", offset, len);
+
+ osd_req_op_extent_osd_data_pages(req, op_idx, data_pages, len,
+ 0, !!pool, false);
+ osd_req_op_extent_update(req, op_idx, len);
- osd_req_op_extent_update(req, 0, len);
+ if (do_sync) {
+ op_idx++;
+ osd_req_op_init(req, op_idx, CEPH_OSD_OP_STARTSYNC, 0);
+ }
+ BUG_ON(op_idx + 1 != req->r_num_ops);
+
+ pool = NULL;
+ if (i < locked_pages) {
+ BUG_ON(num_ops <= req->r_num_ops);
+ num_ops -= req->r_num_ops;
+ num_ops += do_sync;
+ locked_pages -= i;
+
+ /* allocate new pages array for next request */
+ data_pages = pages;
+ pages = kmalloc(locked_pages * sizeof (*pages),
+ GFP_NOFS);
+ if (!pages) {
+ pool = fsc->wb_pagevec_pool;
+ pages = mempool_alloc(pool, GFP_NOFS);
+ BUG_ON(!pages);
+ }
+ memcpy(pages, data_pages + i,
+ locked_pages * sizeof(*pages));
+ memset(data_pages + i, 0,
+ locked_pages * sizeof(*pages));
+ } else {
+ BUG_ON(num_ops != req->r_num_ops);
+ index = pages[i - 1]->index + 1;
+ /* request message now owns the pages array */
+ pages = NULL;
+ }
vino = ceph_vino(inode);
ceph_osdc_build_request(req, offset, snapc, vino.snap,
@@ -985,9 +1071,10 @@ get_more_pages:
BUG_ON(rc);
req = NULL;
- /* continue? */
- index = next;
- wbc->nr_to_write -= locked_pages;
+ wbc->nr_to_write -= i;
+ if (pages)
+ goto new_request;
+
if (wbc->nr_to_write <= 0)
done = 1;
@@ -1048,8 +1135,8 @@ static int ceph_update_writeable_page(struct file *file,
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- loff_t page_off = pos & PAGE_CACHE_MASK;
- int pos_in_page = pos & ~PAGE_CACHE_MASK;
+ loff_t page_off = pos & PAGE_MASK;
+ int pos_in_page = pos & ~PAGE_MASK;
int end_in_page = pos_in_page + len;
loff_t i_size;
int r;
@@ -1104,7 +1191,7 @@ retry_locked:
}
/* full page? */
- if (pos_in_page == 0 && len == PAGE_CACHE_SIZE)
+ if (pos_in_page == 0 && len == PAGE_SIZE)
return 0;
/* past end of file? */
@@ -1112,12 +1199,12 @@ retry_locked:
if (page_off >= i_size ||
(pos_in_page == 0 && (pos+len) >= i_size &&
- end_in_page - pos_in_page != PAGE_CACHE_SIZE)) {
+ end_in_page - pos_in_page != PAGE_SIZE)) {
dout(" zeroing %p 0 - %d and %d - %d\n",
- page, pos_in_page, end_in_page, (int)PAGE_CACHE_SIZE);
+ page, pos_in_page, end_in_page, (int)PAGE_SIZE);
zero_user_segments(page,
0, pos_in_page,
- end_in_page, PAGE_CACHE_SIZE);
+ end_in_page, PAGE_SIZE);
return 0;
}
@@ -1141,7 +1228,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
{
struct inode *inode = file_inode(file);
struct page *page;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
int r;
do {
@@ -1155,7 +1242,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
r = ceph_update_writeable_page(file, pos, len, page);
if (r < 0)
- page_cache_release(page);
+ put_page(page);
else
*pagep = page;
} while (r == -EAGAIN);
@@ -1172,7 +1259,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct inode *inode = file_inode(file);
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned from = pos & (PAGE_SIZE - 1);
int check_cap = 0;
dout("write_end file %p inode %p page %p %d~%d (%d)\n", file,
@@ -1192,7 +1279,7 @@ static int ceph_write_end(struct file *file, struct address_space *mapping,
set_page_dirty(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (check_cap)
ceph_check_caps(ceph_inode(inode), CHECK_CAPS_AUTHONLY, NULL);
@@ -1235,11 +1322,11 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
struct ceph_inode_info *ci = ceph_inode(inode);
struct ceph_file_info *fi = vma->vm_file->private_data;
struct page *pinned_page = NULL;
- loff_t off = vmf->pgoff << PAGE_CACHE_SHIFT;
+ loff_t off = vmf->pgoff << PAGE_SHIFT;
int want, got, ret;
dout("filemap_fault %p %llx.%llx %llu~%zd trying to get caps\n",
- inode, ceph_vinop(inode), off, (size_t)PAGE_CACHE_SIZE);
+ inode, ceph_vinop(inode), off, (size_t)PAGE_SIZE);
if (fi->fmode & CEPH_FILE_MODE_LAZY)
want = CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO;
else
@@ -1256,7 +1343,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
}
dout("filemap_fault %p %llu~%zd got cap refs on %s\n",
- inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got));
+ inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got));
if ((got & (CEPH_CAP_FILE_CACHE | CEPH_CAP_FILE_LAZYIO)) ||
ci->i_inline_version == CEPH_INLINE_NONE)
@@ -1265,16 +1352,16 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
ret = -EAGAIN;
dout("filemap_fault %p %llu~%zd dropping cap refs on %s ret %d\n",
- inode, off, (size_t)PAGE_CACHE_SIZE, ceph_cap_string(got), ret);
+ inode, off, (size_t)PAGE_SIZE, ceph_cap_string(got), ret);
if (pinned_page)
- page_cache_release(pinned_page);
+ put_page(pinned_page);
ceph_put_cap_refs(ci, got);
if (ret != -EAGAIN)
return ret;
/* read inline data */
- if (off >= PAGE_CACHE_SIZE) {
+ if (off >= PAGE_SIZE) {
/* does not support inline data > PAGE_SIZE */
ret = VM_FAULT_SIGBUS;
} else {
@@ -1291,12 +1378,12 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
CEPH_STAT_CAP_INLINE_DATA, true);
if (ret1 < 0 || off >= i_size_read(inode)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
ret = VM_FAULT_SIGBUS;
goto out;
}
- if (ret1 < PAGE_CACHE_SIZE)
- zero_user_segment(page, ret1, PAGE_CACHE_SIZE);
+ if (ret1 < PAGE_SIZE)
+ zero_user_segment(page, ret1, PAGE_SIZE);
else
flush_dcache_page(page);
SetPageUptodate(page);
@@ -1305,7 +1392,7 @@ static int ceph_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
}
out:
dout("filemap_fault %p %llu~%zd read inline data ret %d\n",
- inode, off, (size_t)PAGE_CACHE_SIZE, ret);
+ inode, off, (size_t)PAGE_SIZE, ret);
return ret;
}
@@ -1343,10 +1430,10 @@ static int ceph_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
}
}
- if (off + PAGE_CACHE_SIZE <= size)
- len = PAGE_CACHE_SIZE;
+ if (off + PAGE_SIZE <= size)
+ len = PAGE_SIZE;
else
- len = size & ~PAGE_CACHE_MASK;
+ len = size & ~PAGE_MASK;
dout("page_mkwrite %p %llx.%llx %llu~%zd getting caps i_size %llu\n",
inode, ceph_vinop(inode), off, len, size);
@@ -1432,7 +1519,7 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
return;
if (PageUptodate(page)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return;
}
}
@@ -1447,14 +1534,14 @@ void ceph_fill_inline_data(struct inode *inode, struct page *locked_page,
}
if (page != locked_page) {
- if (len < PAGE_CACHE_SIZE)
- zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ if (len < PAGE_SIZE)
+ zero_user_segment(page, len, PAGE_SIZE);
else
flush_dcache_page(page);
SetPageUptodate(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
}
@@ -1491,7 +1578,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
from_pagecache = true;
lock_page(page);
} else {
- page_cache_release(page);
+ put_page(page);
page = NULL;
}
}
@@ -1499,8 +1586,8 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
if (page) {
len = i_size_read(inode);
- if (len > PAGE_CACHE_SIZE)
- len = PAGE_CACHE_SIZE;
+ if (len > PAGE_SIZE)
+ len = PAGE_SIZE;
} else {
page = __page_cache_alloc(GFP_NOFS);
if (!page) {
@@ -1522,7 +1609,7 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
ceph_vino(inode), 0, &len, 0, 1,
CEPH_OSD_OP_CREATE,
CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
- ceph_empty_snapc, 0, 0, false);
+ NULL, 0, 0, false);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto out;
@@ -1540,9 +1627,8 @@ int ceph_uninline_data(struct file *filp, struct page *locked_page)
ceph_vino(inode), 0, &len, 1, 3,
CEPH_OSD_OP_WRITE,
CEPH_OSD_FLAG_ONDISK | CEPH_OSD_FLAG_WRITE,
- ceph_empty_snapc,
- ci->i_truncate_seq, ci->i_truncate_size,
- false);
+ NULL, ci->i_truncate_seq,
+ ci->i_truncate_size, false);
if (IS_ERR(req)) {
err = PTR_ERR(req);
goto out;
@@ -1584,7 +1670,7 @@ out:
if (page && page != locked_page) {
if (from_pagecache) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
} else
__free_pages(page, 0);
}
@@ -1663,8 +1749,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
goto out;
}
- rd_req = ceph_osdc_alloc_request(&fsc->client->osdc,
- ceph_empty_snapc,
+ rd_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL,
1, false, GFP_NOFS);
if (!rd_req) {
err = -ENOMEM;
@@ -1678,8 +1763,7 @@ static int __ceph_pool_perm_get(struct ceph_inode_info *ci, u32 pool)
"%llx.00000000", ci->i_vino.ino);
rd_req->r_base_oid.name_len = strlen(rd_req->r_base_oid.name);
- wr_req = ceph_osdc_alloc_request(&fsc->client->osdc,
- ceph_empty_snapc,
+ wr_req = ceph_osdc_alloc_request(&fsc->client->osdc, NULL,
1, false, GFP_NOFS);
if (!wr_req) {
err = -ENOMEM;
diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c
index 6fe0ad26a..cfaeef18c 100644
--- a/fs/ceph/caps.c
+++ b/fs/ceph/caps.c
@@ -991,7 +991,7 @@ static int send_cap_msg(struct ceph_mds_session *session,
u32 seq, u64 flush_tid, u64 oldest_flush_tid,
u32 issue_seq, u32 mseq, u64 size, u64 max_size,
struct timespec *mtime, struct timespec *atime,
- u64 time_warp_seq,
+ struct timespec *ctime, u64 time_warp_seq,
kuid_t uid, kgid_t gid, umode_t mode,
u64 xattr_version,
struct ceph_buffer *xattrs_buf,
@@ -1042,6 +1042,8 @@ static int send_cap_msg(struct ceph_mds_session *session,
ceph_encode_timespec(&fc->mtime, mtime);
if (atime)
ceph_encode_timespec(&fc->atime, atime);
+ if (ctime)
+ ceph_encode_timespec(&fc->ctime, ctime);
fc->time_warp_seq = cpu_to_le32(time_warp_seq);
fc->uid = cpu_to_le32(from_kuid(&init_user_ns, uid));
@@ -1116,7 +1118,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
int held, revoking, dropping, keep;
u64 seq, issue_seq, mseq, time_warp_seq, follows;
u64 size, max_size;
- struct timespec mtime, atime;
+ struct timespec mtime, atime, ctime;
int wake = 0;
umode_t mode;
kuid_t uid;
@@ -1180,6 +1182,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
ci->i_requested_max_size = max_size;
mtime = inode->i_mtime;
atime = inode->i_atime;
+ ctime = inode->i_ctime;
time_warp_seq = ci->i_time_warp_seq;
uid = inode->i_uid;
gid = inode->i_gid;
@@ -1198,7 +1201,7 @@ static int __send_cap(struct ceph_mds_client *mdsc, struct ceph_cap *cap,
ret = send_cap_msg(session, ceph_vino(inode).ino, cap_id,
op, keep, want, flushing, seq,
flush_tid, oldest_flush_tid, issue_seq, mseq,
- size, max_size, &mtime, &atime, time_warp_seq,
+ size, max_size, &mtime, &atime, &ctime, time_warp_seq,
uid, gid, mode, xattr_version, xattr_blob,
follows, inline_data);
if (ret < 0) {
@@ -1320,7 +1323,7 @@ retry:
capsnap->dirty, 0, capsnap->flush_tid, 0,
0, mseq, capsnap->size, 0,
&capsnap->mtime, &capsnap->atime,
- capsnap->time_warp_seq,
+ &capsnap->ctime, capsnap->time_warp_seq,
capsnap->uid, capsnap->gid, capsnap->mode,
capsnap->xattr_version, capsnap->xattr_blob,
capsnap->follows, capsnap->inline_data);
@@ -2507,7 +2510,7 @@ int ceph_get_caps(struct ceph_inode_info *ci, int need, int want,
*pinned_page = page;
break;
}
- page_cache_release(page);
+ put_page(page);
}
/*
* drop cap refs first because getattr while
diff --git a/fs/ceph/dir.c b/fs/ceph/dir.c
index fd11fb231..4fb2bbc2a 100644
--- a/fs/ceph/dir.c
+++ b/fs/ceph/dir.c
@@ -38,7 +38,7 @@ int ceph_init_dentry(struct dentry *dentry)
if (dentry->d_fsdata)
return 0;
- di = kmem_cache_alloc(ceph_dentry_cachep, GFP_KERNEL | __GFP_ZERO);
+ di = kmem_cache_zalloc(ceph_dentry_cachep, GFP_KERNEL);
if (!di)
return -ENOMEM; /* oh well */
@@ -68,23 +68,6 @@ out_unlock:
return 0;
}
-struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry)
-{
- struct inode *inode = NULL;
-
- if (!dentry)
- return NULL;
-
- spin_lock(&dentry->d_lock);
- if (!IS_ROOT(dentry)) {
- inode = d_inode(dentry->d_parent);
- ihold(inode);
- }
- spin_unlock(&dentry->d_lock);
- return inode;
-}
-
-
/*
* for readdir, we encode the directory frag and offset within that
* frag into f_pos.
@@ -146,7 +129,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
struct inode *dir = d_inode(parent);
struct dentry *dentry, *last = NULL;
struct ceph_dentry_info *di;
- unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry *);
+ unsigned nsize = PAGE_SIZE / sizeof(struct dentry *);
int err = 0;
loff_t ptr_pos = 0;
struct ceph_readdir_cache_control cache_ctl = {};
@@ -171,7 +154,7 @@ static int __dcache_readdir(struct file *file, struct dir_context *ctx,
}
err = -EAGAIN;
- pgoff = ptr_pos >> PAGE_CACHE_SHIFT;
+ pgoff = ptr_pos >> PAGE_SHIFT;
if (!cache_ctl.page || pgoff != page_index(cache_ctl.page)) {
ceph_readdir_cache_release(&cache_ctl);
cache_ctl.page = find_lock_page(&dir->i_data, pgoff);
@@ -624,6 +607,7 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
struct ceph_mds_client *mdsc = fsc->mdsc;
struct ceph_mds_request *req;
int op;
+ int mask;
int err;
dout("lookup %p dentry %p '%pd'\n",
@@ -666,8 +650,12 @@ static struct dentry *ceph_lookup(struct inode *dir, struct dentry *dentry,
return ERR_CAST(req);
req->r_dentry = dget(dentry);
req->r_num_caps = 2;
- /* we only need inode linkage */
- req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INODE);
+
+ mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
+ if (ceph_security_xattr_wanted(dir))
+ mask |= CEPH_CAP_XATTR_SHARED;
+ req->r_args.getattr.mask = cpu_to_le32(mask);
+
req->r_locked_dir = dir;
err = ceph_mdsc_do_request(mdsc, NULL, req);
err = ceph_handle_snapdir(req, dentry, err);
@@ -1095,6 +1083,7 @@ static int dir_lease_is_valid(struct inode *dir, struct dentry *dentry)
static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
{
int valid = 0;
+ struct dentry *parent;
struct inode *dir;
if (flags & LOOKUP_RCU)
@@ -1103,7 +1092,8 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
dout("d_revalidate %p '%pd' inode %p offset %lld\n", dentry,
dentry, d_inode(dentry), ceph_dentry(dentry)->offset);
- dir = ceph_get_dentry_parent_inode(dentry);
+ parent = dget_parent(dentry);
+ dir = d_inode(parent);
/* always trust cached snapped dentries, snapdir dentry */
if (ceph_snap(dir) != CEPH_NOSNAP) {
@@ -1121,13 +1111,48 @@ static int ceph_d_revalidate(struct dentry *dentry, unsigned int flags)
valid = 1;
}
+ if (!valid) {
+ struct ceph_mds_client *mdsc =
+ ceph_sb_to_client(dir->i_sb)->mdsc;
+ struct ceph_mds_request *req;
+ int op, mask, err;
+
+ op = ceph_snap(dir) == CEPH_SNAPDIR ?
+ CEPH_MDS_OP_LOOKUPSNAP : CEPH_MDS_OP_LOOKUP;
+ req = ceph_mdsc_create_request(mdsc, op, USE_ANY_MDS);
+ if (!IS_ERR(req)) {
+ req->r_dentry = dget(dentry);
+ req->r_num_caps = 2;
+
+ mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
+ if (ceph_security_xattr_wanted(dir))
+ mask |= CEPH_CAP_XATTR_SHARED;
+ req->r_args.getattr.mask = mask;
+
+ req->r_locked_dir = dir;
+ err = ceph_mdsc_do_request(mdsc, NULL, req);
+ if (err == 0 || err == -ENOENT) {
+ if (dentry == req->r_dentry) {
+ valid = !d_unhashed(dentry);
+ } else {
+ d_invalidate(req->r_dentry);
+ err = -EAGAIN;
+ }
+ }
+ ceph_mdsc_put_request(req);
+ dout("d_revalidate %p lookup result=%d\n",
+ dentry, err);
+ }
+ }
+
dout("d_revalidate %p %s\n", dentry, valid ? "valid" : "invalid");
if (valid) {
ceph_dentry_lru_touch(dentry);
} else {
ceph_dir_clear_complete(dir);
}
- iput(dir);
+
+ dput(parent);
return valid;
}
diff --git a/fs/ceph/export.c b/fs/ceph/export.c
index 3b3172357..6e72c9816 100644
--- a/fs/ceph/export.c
+++ b/fs/ceph/export.c
@@ -71,12 +71,18 @@ static struct dentry *__fh_to_dentry(struct super_block *sb, u64 ino)
inode = ceph_find_inode(sb, vino);
if (!inode) {
struct ceph_mds_request *req;
+ int mask;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPINO,
USE_ANY_MDS);
if (IS_ERR(req))
return ERR_CAST(req);
+ mask = CEPH_STAT_CAP_INODE;
+ if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
+ mask |= CEPH_CAP_XATTR_SHARED;
+ req->r_args.getattr.mask = cpu_to_le32(mask);
+
req->r_ino1 = vino;
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
@@ -128,6 +134,7 @@ static struct dentry *__get_parent(struct super_block *sb,
struct ceph_mds_request *req;
struct inode *inode;
struct dentry *dentry;
+ int mask;
int err;
req = ceph_mdsc_create_request(mdsc, CEPH_MDS_OP_LOOKUPPARENT,
@@ -144,6 +151,12 @@ static struct dentry *__get_parent(struct super_block *sb,
.snap = CEPH_NOSNAP,
};
}
+
+ mask = CEPH_STAT_CAP_INODE;
+ if (ceph_security_xattr_wanted(d_inode(sb->s_root)))
+ mask |= CEPH_CAP_XATTR_SHARED;
+ req->r_args.getattr.mask = cpu_to_le32(mask);
+
req->r_num_caps = 1;
err = ceph_mdsc_do_request(mdsc, NULL, req);
inode = req->r_target_inode;
diff --git a/fs/ceph/file.c b/fs/ceph/file.c
index eb9028e8c..a79f92698 100644
--- a/fs/ceph/file.c
+++ b/fs/ceph/file.c
@@ -157,7 +157,7 @@ static int ceph_init_file(struct inode *inode, struct file *file, int fmode)
case S_IFDIR:
dout("init_file %p %p 0%o (regular)\n", inode, file,
inode->i_mode);
- cf = kmem_cache_alloc(ceph_file_cachep, GFP_KERNEL | __GFP_ZERO);
+ cf = kmem_cache_zalloc(ceph_file_cachep, GFP_KERNEL);
if (cf == NULL) {
ceph_put_fmode(ceph_inode(inode), fmode); /* clean up */
return -ENOMEM;
@@ -300,6 +300,7 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
struct ceph_mds_request *req;
struct dentry *dn;
struct ceph_acls_info acls = {};
+ int mask;
int err;
dout("atomic_open %p dentry %p '%pd' %s flags %d mode 0%o\n",
@@ -335,6 +336,12 @@ int ceph_atomic_open(struct inode *dir, struct dentry *dentry,
acls.pagelist = NULL;
}
}
+
+ mask = CEPH_STAT_CAP_INODE | CEPH_CAP_AUTH_SHARED;
+ if (ceph_security_xattr_wanted(dir))
+ mask |= CEPH_CAP_XATTR_SHARED;
+ req->r_args.open.mask = cpu_to_le32(mask);
+
req->r_locked_dir = dir; /* caller holds dir->i_mutex */
err = ceph_mdsc_do_request(mdsc,
(flags & (O_CREAT|O_TRUNC)) ? dir : NULL,
@@ -459,7 +466,7 @@ more:
ret += zlen;
}
- didpages = (page_align + ret) >> PAGE_CACHE_SHIFT;
+ didpages = (page_align + ret) >> PAGE_SHIFT;
pos += ret;
read = pos - off;
left -= ret;
@@ -725,7 +732,6 @@ static void ceph_aio_retry_work(struct work_struct *work)
ret = ceph_osdc_start_request(req->r_osdc, req, false);
out:
if (ret < 0) {
- BUG_ON(ret == -EOLDSNAPC);
req->r_result = ret;
ceph_aio_complete_req(req, NULL);
}
@@ -783,7 +789,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
int num_pages = 0;
int flags;
int ret;
- struct timespec mtime = CURRENT_TIME;
+ struct timespec mtime = current_fs_time(inode->i_sb);
size_t count = iov_iter_count(iter);
loff_t pos = iocb->ki_pos;
bool write = iov_iter_rw(iter) == WRITE;
@@ -800,8 +806,8 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
if (write) {
ret = invalidate_inode_pages2_range(inode->i_mapping,
- pos >> PAGE_CACHE_SHIFT,
- (pos + count) >> PAGE_CACHE_SHIFT);
+ pos >> PAGE_SHIFT,
+ (pos + count) >> PAGE_SHIFT);
if (ret < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret);
@@ -866,7 +872,7 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
* may block.
*/
truncate_inode_pages_range(inode->i_mapping, pos,
- (pos+len) | (PAGE_CACHE_SIZE - 1));
+ (pos+len) | (PAGE_SIZE - 1));
osd_req_op_init(req, 1, CEPH_OSD_OP_STARTSYNC, 0);
}
@@ -949,7 +955,6 @@ ceph_direct_read_write(struct kiocb *iocb, struct iov_iter *iter,
ret = ceph_osdc_start_request(req->r_osdc,
req, false);
if (ret < 0) {
- BUG_ON(ret == -EOLDSNAPC);
req->r_result = ret;
ceph_aio_complete_req(req, NULL);
}
@@ -988,7 +993,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
int flags;
int check_caps = 0;
int ret;
- struct timespec mtime = CURRENT_TIME;
+ struct timespec mtime = current_fs_time(inode->i_sb);
size_t count = iov_iter_count(from);
if (ceph_snap(file_inode(file)) != CEPH_NOSNAP)
@@ -1001,8 +1006,8 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
return ret;
ret = invalidate_inode_pages2_range(inode->i_mapping,
- pos >> PAGE_CACHE_SHIFT,
- (pos + count) >> PAGE_CACHE_SHIFT);
+ pos >> PAGE_SHIFT,
+ (pos + count) >> PAGE_SHIFT);
if (ret < 0)
dout("invalidate_inode_pages2_range returned %d\n", ret);
@@ -1031,7 +1036,7 @@ ceph_sync_write(struct kiocb *iocb, struct iov_iter *from, loff_t pos,
* write from beginning of first page,
* regardless of io alignment
*/
- num_pages = (len + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ num_pages = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
pages = ceph_alloc_page_vector(num_pages, GFP_KERNEL);
if (IS_ERR(pages)) {
@@ -1154,7 +1159,7 @@ again:
dout("aio_read %p %llx.%llx dropping cap refs on %s = %d\n",
inode, ceph_vinop(inode), ceph_cap_string(got), (int)ret);
if (pinned_page) {
- page_cache_release(pinned_page);
+ put_page(pinned_page);
pinned_page = NULL;
}
ceph_put_cap_refs(ci, got);
@@ -1183,10 +1188,10 @@ again:
if (retry_op == READ_INLINE) {
BUG_ON(ret > 0 || read > 0);
if (iocb->ki_pos < i_size &&
- iocb->ki_pos < PAGE_CACHE_SIZE) {
+ iocb->ki_pos < PAGE_SIZE) {
loff_t end = min_t(loff_t, i_size,
iocb->ki_pos + len);
- end = min_t(loff_t, end, PAGE_CACHE_SIZE);
+ end = min_t(loff_t, end, PAGE_SIZE);
if (statret < end)
zero_user_segment(page, statret, end);
ret = copy_page_to_iter(page,
@@ -1458,21 +1463,21 @@ static inline void ceph_zero_partial_page(
struct inode *inode, loff_t offset, unsigned size)
{
struct page *page;
- pgoff_t index = offset >> PAGE_CACHE_SHIFT;
+ pgoff_t index = offset >> PAGE_SHIFT;
page = find_lock_page(inode->i_mapping, index);
if (page) {
wait_on_page_writeback(page);
- zero_user(page, offset & (PAGE_CACHE_SIZE - 1), size);
+ zero_user(page, offset & (PAGE_SIZE - 1), size);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
}
static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
loff_t length)
{
- loff_t nearly = round_up(offset, PAGE_CACHE_SIZE);
+ loff_t nearly = round_up(offset, PAGE_SIZE);
if (offset < nearly) {
loff_t size = nearly - offset;
if (length < size)
@@ -1481,8 +1486,8 @@ static void ceph_zero_pagecache_range(struct inode *inode, loff_t offset,
offset += size;
length -= size;
}
- if (length >= PAGE_CACHE_SIZE) {
- loff_t size = round_down(length, PAGE_CACHE_SIZE);
+ if (length >= PAGE_SIZE) {
+ loff_t size = round_down(length, PAGE_SIZE);
truncate_pagecache_range(inode, offset, offset + size - 1);
offset += size;
length -= size;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 5849b88bb..edfade037 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -549,6 +549,10 @@ int ceph_fill_file_size(struct inode *inode, int issued,
if (ceph_seq_cmp(truncate_seq, ci->i_truncate_seq) > 0 ||
(truncate_seq == ci->i_truncate_seq && size > inode->i_size)) {
dout("size %lld -> %llu\n", inode->i_size, size);
+ if (size > 0 && S_ISDIR(inode->i_mode)) {
+ pr_err("fill_file_size non-zero size for directory\n");
+ size = 0;
+ }
i_size_write(inode, size);
inode->i_blocks = (size + (1<<9) - 1) >> 9;
ci->i_reported_size = size;
@@ -977,13 +981,8 @@ out_unlock:
/*
* splice a dentry to an inode.
* caller must hold directory i_mutex for this to be safe.
- *
- * we will only rehash the resulting dentry if @prehash is
- * true; @prehash will be set to false (for the benefit of
- * the caller) if we fail.
*/
-static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
- bool *prehash)
+static struct dentry *splice_dentry(struct dentry *dn, struct inode *in)
{
struct dentry *realdn;
@@ -996,8 +995,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
if (IS_ERR(realdn)) {
pr_err("splice_dentry error %ld %p inode %p ino %llx.%llx\n",
PTR_ERR(realdn), dn, in, ceph_vinop(in));
- if (prehash)
- *prehash = false; /* don't rehash on error */
dn = realdn; /* note realdn contains the error */
goto out;
} else if (realdn) {
@@ -1013,8 +1010,6 @@ static struct dentry *splice_dentry(struct dentry *dn, struct inode *in,
dout("dn %p attached to %p ino %llx.%llx\n",
dn, d_inode(dn), ceph_vinop(d_inode(dn)));
}
- if ((!prehash || *prehash) && d_unhashed(dn))
- d_rehash(dn);
out:
return dn;
}
@@ -1247,10 +1242,8 @@ retry_lookup:
dout("d_delete %p\n", dn);
d_delete(dn);
} else {
- dout("d_instantiate %p NULL\n", dn);
- d_instantiate(dn, NULL);
if (have_lease && d_unhashed(dn))
- d_rehash(dn);
+ d_add(dn, NULL);
update_dentry_lease(dn, rinfo->dlease,
session,
req->r_request_started);
@@ -1262,7 +1255,7 @@ retry_lookup:
if (d_really_is_negative(dn)) {
ceph_dir_clear_ordered(dir);
ihold(in);
- dn = splice_dentry(dn, in, &have_lease);
+ dn = splice_dentry(dn, in);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
@@ -1272,6 +1265,7 @@ retry_lookup:
dout(" %p links to %p %llx.%llx, not %llx.%llx\n",
dn, d_inode(dn), ceph_vinop(d_inode(dn)),
ceph_vinop(in));
+ d_invalidate(dn);
have_lease = false;
}
@@ -1292,7 +1286,7 @@ retry_lookup:
dout(" linking snapped dir %p to dn %p\n", in, dn);
ceph_dir_clear_ordered(dir);
ihold(in);
- dn = splice_dentry(dn, in, NULL);
+ dn = splice_dentry(dn, in);
if (IS_ERR(dn)) {
err = PTR_ERR(dn);
goto done;
@@ -1344,7 +1338,7 @@ void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl)
{
if (ctl->page) {
kunmap(ctl->page);
- page_cache_release(ctl->page);
+ put_page(ctl->page);
ctl->page = NULL;
}
}
@@ -1354,21 +1348,26 @@ static int fill_readdir_cache(struct inode *dir, struct dentry *dn,
struct ceph_mds_request *req)
{
struct ceph_inode_info *ci = ceph_inode(dir);
- unsigned nsize = PAGE_CACHE_SIZE / sizeof(struct dentry*);
+ unsigned nsize = PAGE_SIZE / sizeof(struct dentry*);
unsigned idx = ctl->index % nsize;
pgoff_t pgoff = ctl->index / nsize;
if (!ctl->page || pgoff != page_index(ctl->page)) {
ceph_readdir_cache_release(ctl);
- ctl->page = grab_cache_page(&dir->i_data, pgoff);
+ if (idx == 0)
+ ctl->page = grab_cache_page(&dir->i_data, pgoff);
+ else
+ ctl->page = find_lock_page(&dir->i_data, pgoff);
if (!ctl->page) {
ctl->index = -1;
- return -ENOMEM;
+ return idx == 0 ? -ENOMEM : 0;
}
/* reading/filling the cache are serialized by
* i_mutex, no need to use page lock */
unlock_page(ctl->page);
ctl->dentries = kmap(ctl->page);
+ if (idx == 0)
+ memset(ctl->dentries, 0, PAGE_SIZE);
}
if (req->r_dir_release_cnt == atomic64_read(&ci->i_release_count) &&
@@ -1391,7 +1390,7 @@ int ceph_readdir_prepopulate(struct ceph_mds_request *req,
struct qstr dname;
struct dentry *dn;
struct inode *in;
- int err = 0, ret, i;
+ int err = 0, skipped = 0, ret, i;
struct inode *snapdir = NULL;
struct ceph_mds_request_head *rhead = req->r_request->front.iov_base;
struct ceph_dentry_info *di;
@@ -1503,7 +1502,17 @@ retry_lookup:
}
if (d_really_is_negative(dn)) {
- struct dentry *realdn = splice_dentry(dn, in, NULL);
+ struct dentry *realdn;
+
+ if (ceph_security_xattr_deadlock(in)) {
+ dout(" skip splicing dn %p to inode %p"
+ " (security xattr deadlock)\n", dn, in);
+ iput(in);
+ skipped++;
+ goto next_item;
+ }
+
+ realdn = splice_dentry(dn, in);
if (IS_ERR(realdn)) {
err = PTR_ERR(realdn);
d_drop(dn);
@@ -1520,7 +1529,7 @@ retry_lookup:
req->r_session,
req->r_request_started);
- if (err == 0 && cache_ctl.index >= 0) {
+ if (err == 0 && skipped == 0 && cache_ctl.index >= 0) {
ret = fill_readdir_cache(d_inode(parent), dn,
&cache_ctl, req);
if (ret < 0)
@@ -1531,7 +1540,7 @@ next_item:
dput(dn);
}
out:
- if (err == 0) {
+ if (err == 0 && skipped == 0) {
req->r_did_prepopulate = true;
req->r_readdir_cache_idx = cache_ctl.index;
}
@@ -1961,7 +1970,7 @@ int ceph_setattr(struct dentry *dentry, struct iattr *attr)
if (dirtied) {
inode_dirty_flags = __ceph_mark_dirty_caps(ci, dirtied,
&prealloc_cf);
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
}
release &= issued;
diff --git a/fs/ceph/mds_client.c b/fs/ceph/mds_client.c
index 911d64d86..85b8517f1 100644
--- a/fs/ceph/mds_client.c
+++ b/fs/ceph/mds_client.c
@@ -386,9 +386,7 @@ void ceph_put_mds_session(struct ceph_mds_session *s)
atomic_read(&s->s_ref), atomic_read(&s->s_ref)-1);
if (atomic_dec_and_test(&s->s_ref)) {
if (s->s_auth.authorizer)
- ceph_auth_destroy_authorizer(
- s->s_mdsc->fsc->client->monc.auth,
- s->s_auth.authorizer);
+ ceph_auth_destroy_authorizer(s->s_auth.authorizer);
kfree(s);
}
}
@@ -1610,7 +1608,7 @@ again:
while (!list_empty(&tmp_list)) {
if (!msg) {
msg = ceph_msg_new(CEPH_MSG_CLIENT_CAPRELEASE,
- PAGE_CACHE_SIZE, GFP_NOFS, false);
+ PAGE_SIZE, GFP_NOFS, false);
if (!msg)
goto out_err;
head = msg->front.iov_base;
@@ -1729,7 +1727,7 @@ ceph_mdsc_create_request(struct ceph_mds_client *mdsc, int op, int mode)
init_completion(&req->r_safe_completion);
INIT_LIST_HEAD(&req->r_unsafe_item);
- req->r_stamp = CURRENT_TIME;
+ req->r_stamp = current_fs_time(mdsc->fsc->sb);
req->r_op = op;
req->r_direct_mode = mode;
@@ -2540,6 +2538,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
/* insert trace into our cache */
mutex_lock(&req->r_fill_mutex);
+ current->journal_info = req;
err = ceph_fill_trace(mdsc->fsc->sb, req, req->r_session);
if (err == 0) {
if (result == 0 && (req->r_op == CEPH_MDS_OP_READDIR ||
@@ -2547,6 +2546,7 @@ static void handle_reply(struct ceph_mds_session *session, struct ceph_msg *msg)
ceph_readdir_prepopulate(req, req->r_session);
ceph_unreserve_caps(mdsc, &req->r_caps_reservation);
}
+ current->journal_info = NULL;
mutex_unlock(&req->r_fill_mutex);
up_read(&mdsc->snap_rwsem);
@@ -3764,7 +3764,6 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
dout("handle_map epoch %u len %d\n", epoch, (int)maplen);
/* do we need it? */
- ceph_monc_got_mdsmap(&mdsc->fsc->client->monc, epoch);
mutex_lock(&mdsc->mutex);
if (mdsc->mdsmap && epoch <= mdsc->mdsmap->m_epoch) {
dout("handle_map epoch %u <= our %u\n",
@@ -3791,6 +3790,8 @@ void ceph_mdsc_handle_map(struct ceph_mds_client *mdsc, struct ceph_msg *msg)
mdsc->fsc->sb->s_maxbytes = mdsc->mdsmap->m_max_file_size;
__wake_requests(mdsc, &mdsc->waiting_for_map);
+ ceph_monc_got_map(&mdsc->fsc->client->monc, CEPH_SUB_MDSMAP,
+ mdsc->mdsmap->m_epoch);
mutex_unlock(&mdsc->mutex);
schedule_delayed(mdsc);
@@ -3897,7 +3898,7 @@ static struct ceph_auth_handshake *get_authorizer(struct ceph_connection *con,
struct ceph_auth_handshake *auth = &s->s_auth;
if (force_new && auth->authorizer) {
- ceph_auth_destroy_authorizer(ac, auth->authorizer);
+ ceph_auth_destroy_authorizer(auth->authorizer);
auth->authorizer = NULL;
}
if (!auth->authorizer) {
diff --git a/fs/ceph/mds_client.h b/fs/ceph/mds_client.h
index 37712ccff..ee69a537d 100644
--- a/fs/ceph/mds_client.h
+++ b/fs/ceph/mds_client.h
@@ -97,7 +97,7 @@ struct ceph_mds_reply_info_parsed {
/*
* cap releases are batched and sent to the MDS en masse.
*/
-#define CEPH_CAPS_PER_RELEASE ((PAGE_CACHE_SIZE - \
+#define CEPH_CAPS_PER_RELEASE ((PAGE_SIZE - \
sizeof(struct ceph_mds_cap_release)) / \
sizeof(struct ceph_mds_cap_item))
diff --git a/fs/ceph/snap.c b/fs/ceph/snap.c
index 4aa7122a8..9caaa7ffc 100644
--- a/fs/ceph/snap.c
+++ b/fs/ceph/snap.c
@@ -296,8 +296,6 @@ static int cmpu64_rev(const void *a, const void *b)
}
-struct ceph_snap_context *ceph_empty_snapc;
-
/*
* build the snap context for a given realm.
*/
@@ -987,17 +985,3 @@ out:
up_write(&mdsc->snap_rwsem);
return;
}
-
-int __init ceph_snap_init(void)
-{
- ceph_empty_snapc = ceph_create_snap_context(0, GFP_NOFS);
- if (!ceph_empty_snapc)
- return -ENOMEM;
- ceph_empty_snapc->seq = 1;
- return 0;
-}
-
-void ceph_snap_exit(void)
-{
- ceph_put_snap_context(ceph_empty_snapc);
-}
diff --git a/fs/ceph/super.c b/fs/ceph/super.c
index ca4d5e845..f12d5e295 100644
--- a/fs/ceph/super.c
+++ b/fs/ceph/super.c
@@ -439,8 +439,8 @@ static int ceph_show_options(struct seq_file *m, struct dentry *root)
if (fsopt->flags & CEPH_MOUNT_OPT_DIRSTAT)
seq_puts(m, ",dirstat");
- if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES) == 0)
- seq_puts(m, ",norbytes");
+ if ((fsopt->flags & CEPH_MOUNT_OPT_RBYTES))
+ seq_puts(m, ",rbytes");
if (fsopt->flags & CEPH_MOUNT_OPT_NOASYNCREADDIR)
seq_puts(m, ",noasyncreaddir");
if ((fsopt->flags & CEPH_MOUNT_OPT_DCACHE) == 0)
@@ -530,7 +530,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
goto fail;
}
fsc->client->extra_mon_dispatch = extra_mon_dispatch;
- fsc->client->monc.want_mdsmap = 1;
+ ceph_monc_want_map(&fsc->client->monc, CEPH_SUB_MDSMAP, 0, true);
fsc->mount_options = fsopt;
@@ -560,7 +560,7 @@ static struct ceph_fs_client *create_fs_client(struct ceph_mount_options *fsopt,
/* set up mempools */
err = -ENOMEM;
- page_count = fsc->mount_options->wsize >> PAGE_CACHE_SHIFT;
+ page_count = fsc->mount_options->wsize >> PAGE_SHIFT;
size = sizeof (struct page *) * (page_count ? page_count : 1);
fsc->wb_pagevec_pool = mempool_create_kmalloc_pool(10, size);
if (!fsc->wb_pagevec_pool)
@@ -793,22 +793,20 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
struct dentry *root;
int first = 0; /* first vfsmount for this super_block */
- dout("mount start\n");
+ dout("mount start %p\n", fsc);
mutex_lock(&fsc->client->mount_mutex);
- err = __ceph_open_session(fsc->client, started);
- if (err < 0)
- goto out;
+ if (!fsc->sb->s_root) {
+ err = __ceph_open_session(fsc->client, started);
+ if (err < 0)
+ goto out;
- dout("mount opening root\n");
- root = open_root_dentry(fsc, "", started);
- if (IS_ERR(root)) {
- err = PTR_ERR(root);
- goto out;
- }
- if (fsc->sb->s_root) {
- dput(root);
- } else {
+ dout("mount opening root\n");
+ root = open_root_dentry(fsc, "", started);
+ if (IS_ERR(root)) {
+ err = PTR_ERR(root);
+ goto out;
+ }
fsc->sb->s_root = root;
first = 1;
@@ -818,6 +816,7 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
}
if (path[0] == 0) {
+ root = fsc->sb->s_root;
dget(root);
} else {
dout("mount opening base mountpoint\n");
@@ -833,16 +832,14 @@ static struct dentry *ceph_real_mount(struct ceph_fs_client *fsc,
mutex_unlock(&fsc->client->mount_mutex);
return root;
-out:
- mutex_unlock(&fsc->client->mount_mutex);
- return ERR_PTR(err);
-
fail:
if (first) {
dput(fsc->sb->s_root);
fsc->sb->s_root = NULL;
}
- goto out;
+out:
+ mutex_unlock(&fsc->client->mount_mutex);
+ return ERR_PTR(err);
}
static int ceph_set_super(struct super_block *s, void *data)
@@ -915,13 +912,13 @@ static int ceph_register_bdi(struct super_block *sb,
int err;
/* set ra_pages based on rasize mount option? */
- if (fsc->mount_options->rasize >= PAGE_CACHE_SIZE)
+ if (fsc->mount_options->rasize >= PAGE_SIZE)
fsc->backing_dev_info.ra_pages =
- (fsc->mount_options->rasize + PAGE_CACHE_SIZE - 1)
+ (fsc->mount_options->rasize + PAGE_SIZE - 1)
>> PAGE_SHIFT;
else
fsc->backing_dev_info.ra_pages =
- VM_MAX_READAHEAD * 1024 / PAGE_CACHE_SIZE;
+ VM_MAX_READAHEAD * 1024 / PAGE_SIZE;
err = bdi_register(&fsc->backing_dev_info, NULL, "ceph-%ld",
atomic_long_inc_return(&bdi_seq));
@@ -1042,19 +1039,14 @@ static int __init init_ceph(void)
ceph_flock_init();
ceph_xattr_init();
- ret = ceph_snap_init();
- if (ret)
- goto out_xattr;
ret = register_filesystem(&ceph_fs_type);
if (ret)
- goto out_snap;
+ goto out_xattr;
pr_info("loaded (mds proto %d)\n", CEPH_MDSC_PROTOCOL);
return 0;
-out_snap:
- ceph_snap_exit();
out_xattr:
ceph_xattr_exit();
destroy_caches();
@@ -1066,7 +1058,6 @@ static void __exit exit_ceph(void)
{
dout("exit_ceph\n");
unregister_filesystem(&ceph_fs_type);
- ceph_snap_exit();
ceph_xattr_exit();
destroy_caches();
}
diff --git a/fs/ceph/super.h b/fs/ceph/super.h
index 9c458eb52..e705c4d61 100644
--- a/fs/ceph/super.h
+++ b/fs/ceph/super.h
@@ -37,8 +37,7 @@
#define CEPH_MOUNT_OPT_FSCACHE (1<<10) /* use fscache */
#define CEPH_MOUNT_OPT_NOPOOLPERM (1<<11) /* no pool permission check */
-#define CEPH_MOUNT_OPT_DEFAULT (CEPH_MOUNT_OPT_RBYTES | \
- CEPH_MOUNT_OPT_DCACHE)
+#define CEPH_MOUNT_OPT_DEFAULT CEPH_MOUNT_OPT_DCACHE
#define ceph_set_mount_opt(fsc, opt) \
(fsc)->mount_options->flags |= CEPH_MOUNT_OPT_##opt;
@@ -469,7 +468,7 @@ static inline struct inode *ceph_find_inode(struct super_block *sb,
#define CEPH_I_POOL_PERM (1 << 4) /* pool rd/wr bits are valid */
#define CEPH_I_POOL_RD (1 << 5) /* can read from pool */
#define CEPH_I_POOL_WR (1 << 6) /* can write to pool */
-
+#define CEPH_I_SEC_INITED (1 << 7) /* security initialized */
static inline void __ceph_dir_set_complete(struct ceph_inode_info *ci,
long long release_count,
@@ -721,7 +720,6 @@ static inline int default_congestion_kb(void)
/* snap.c */
-extern struct ceph_snap_context *ceph_empty_snapc;
struct ceph_snap_realm *ceph_lookup_snap_realm(struct ceph_mds_client *mdsc,
u64 ino);
extern void ceph_get_snap_realm(struct ceph_mds_client *mdsc,
@@ -738,8 +736,6 @@ extern void ceph_queue_cap_snap(struct ceph_inode_info *ci);
extern int __ceph_finish_cap_snap(struct ceph_inode_info *ci,
struct ceph_cap_snap *capsnap);
extern void ceph_cleanup_empty_realms(struct ceph_mds_client *mdsc);
-extern int ceph_snap_init(void);
-extern void ceph_snap_exit(void);
/*
* a cap_snap is "pending" if it is still awaiting an in-progress
@@ -808,6 +804,20 @@ extern void __init ceph_xattr_init(void);
extern void ceph_xattr_exit(void);
extern const struct xattr_handler *ceph_xattr_handlers[];
+#ifdef CONFIG_SECURITY
+extern bool ceph_security_xattr_deadlock(struct inode *in);
+extern bool ceph_security_xattr_wanted(struct inode *in);
+#else
+static inline bool ceph_security_xattr_deadlock(struct inode *in)
+{
+ return false;
+}
+static inline bool ceph_security_xattr_wanted(struct inode *in)
+{
+ return false;
+}
+#endif
+
/* acl.c */
struct ceph_acls_info {
void *default_acl;
@@ -947,7 +957,6 @@ extern void ceph_dentry_lru_touch(struct dentry *dn);
extern void ceph_dentry_lru_del(struct dentry *dn);
extern void ceph_invalidate_dentry_lease(struct dentry *dentry);
extern unsigned ceph_dentry_hash(struct inode *dir, struct dentry *dn);
-extern struct inode *ceph_get_dentry_parent_inode(struct dentry *dentry);
extern void ceph_readdir_cache_release(struct ceph_readdir_cache_control *ctl);
/*
diff --git a/fs/ceph/xattr.c b/fs/ceph/xattr.c
index 819163d83..9410abdef 100644
--- a/fs/ceph/xattr.c
+++ b/fs/ceph/xattr.c
@@ -714,31 +714,62 @@ void __ceph_build_xattrs_blob(struct ceph_inode_info *ci)
}
}
+static inline int __get_request_mask(struct inode *in) {
+ struct ceph_mds_request *req = current->journal_info;
+ int mask = 0;
+ if (req && req->r_target_inode == in) {
+ if (req->r_op == CEPH_MDS_OP_LOOKUP ||
+ req->r_op == CEPH_MDS_OP_LOOKUPINO ||
+ req->r_op == CEPH_MDS_OP_LOOKUPPARENT ||
+ req->r_op == CEPH_MDS_OP_GETATTR) {
+ mask = le32_to_cpu(req->r_args.getattr.mask);
+ } else if (req->r_op == CEPH_MDS_OP_OPEN ||
+ req->r_op == CEPH_MDS_OP_CREATE) {
+ mask = le32_to_cpu(req->r_args.open.mask);
+ }
+ }
+ return mask;
+}
+
ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
size_t size)
{
struct ceph_inode_info *ci = ceph_inode(inode);
- int err;
struct ceph_inode_xattr *xattr;
struct ceph_vxattr *vxattr = NULL;
+ int req_mask;
+ int err;
if (!ceph_is_valid_xattr(name))
return -ENODATA;
/* let's see if a virtual xattr was requested */
vxattr = ceph_match_vxattr(inode, name);
- if (vxattr && !(vxattr->exists_cb && !vxattr->exists_cb(ci))) {
- err = vxattr->getxattr_cb(ci, value, size);
+ if (vxattr) {
+ err = -ENODATA;
+ if (!(vxattr->exists_cb && !vxattr->exists_cb(ci)))
+ err = vxattr->getxattr_cb(ci, value, size);
return err;
}
+ req_mask = __get_request_mask(inode);
+
spin_lock(&ci->i_ceph_lock);
dout("getxattr %p ver=%lld index_ver=%lld\n", inode,
ci->i_xattrs.version, ci->i_xattrs.index_version);
if (ci->i_xattrs.version == 0 ||
- !__ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1)) {
+ !((req_mask & CEPH_CAP_XATTR_SHARED) ||
+ __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 1))) {
spin_unlock(&ci->i_ceph_lock);
+
+ /* security module gets xattr while filling trace */
+ if (current->journal_info != NULL) {
+ pr_warn_ratelimited("sync getxattr %p "
+ "during filling trace\n", inode);
+ return -EBUSY;
+ }
+
/* get xattrs from mds (if we don't already have them) */
err = ceph_do_getattr(inode, CEPH_STAT_CAP_XATTR, true);
if (err)
@@ -765,6 +796,9 @@ ssize_t __ceph_getxattr(struct inode *inode, const char *name, void *value,
memcpy(value, xattr->val, xattr->val_len);
+ if (current->journal_info != NULL &&
+ !strncmp(name, XATTR_SECURITY_PREFIX, XATTR_SECURITY_PREFIX_LEN))
+ ci->i_ceph_flags |= CEPH_I_SEC_INITED;
out:
spin_unlock(&ci->i_ceph_lock);
return err;
@@ -999,7 +1033,7 @@ retry:
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
&prealloc_cf);
ci->i_xattrs.dirty = true;
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
}
spin_unlock(&ci->i_ceph_lock);
@@ -1015,7 +1049,15 @@ do_sync:
do_sync_unlocked:
if (lock_snap_rwsem)
up_read(&mdsc->snap_rwsem);
- err = ceph_sync_setxattr(dentry, name, value, size, flags);
+
+ /* security module set xattr while filling trace */
+ if (current->journal_info != NULL) {
+ pr_warn_ratelimited("sync setxattr %p "
+ "during filling trace\n", inode);
+ err = -EBUSY;
+ } else {
+ err = ceph_sync_setxattr(dentry, name, value, size, flags);
+ }
out:
ceph_free_cap_flush(prealloc_cf);
kfree(newname);
@@ -1136,7 +1178,7 @@ retry:
dirty = __ceph_mark_dirty_caps(ci, CEPH_CAP_XATTR_EXCL,
&prealloc_cf);
ci->i_xattrs.dirty = true;
- inode->i_ctime = CURRENT_TIME;
+ inode->i_ctime = current_fs_time(inode->i_sb);
spin_unlock(&ci->i_ceph_lock);
if (lock_snap_rwsem)
up_read(&mdsc->snap_rwsem);
@@ -1164,3 +1206,25 @@ int ceph_removexattr(struct dentry *dentry, const char *name)
return __ceph_removexattr(dentry, name);
}
+
+#ifdef CONFIG_SECURITY
+bool ceph_security_xattr_wanted(struct inode *in)
+{
+ return in->i_security != NULL;
+}
+
+bool ceph_security_xattr_deadlock(struct inode *in)
+{
+ struct ceph_inode_info *ci;
+ bool ret;
+ if (in->i_security == NULL)
+ return false;
+ ci = ceph_inode(in);
+ spin_lock(&ci->i_ceph_lock);
+ ret = !(ci->i_ceph_flags & CEPH_I_SEC_INITED) &&
+ !(ci->i_xattrs.version > 0 &&
+ __ceph_caps_issued_mask(ci, CEPH_CAP_XATTR_SHARED, 0));
+ spin_unlock(&ci->i_ceph_lock);
+ return ret;
+}
+#endif
diff --git a/fs/cifs/cifs_debug.c b/fs/cifs/cifs_debug.c
index 50b268483..788e19195 100644
--- a/fs/cifs/cifs_debug.c
+++ b/fs/cifs/cifs_debug.c
@@ -255,7 +255,6 @@ static const struct file_operations cifs_debug_data_proc_fops = {
static ssize_t cifs_stats_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
- char c;
bool bv;
int rc;
struct list_head *tmp1, *tmp2, *tmp3;
@@ -263,11 +262,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
struct cifs_ses *ses;
struct cifs_tcon *tcon;
- rc = get_user(c, buffer);
- if (rc)
- return rc;
-
- if (strtobool(&c, &bv) == 0) {
+ rc = kstrtobool_from_user(buffer, count, &bv);
+ if (rc == 0) {
#ifdef CONFIG_CIFS_STATS2
atomic_set(&totBufAllocCount, 0);
atomic_set(&totSmBufAllocCount, 0);
@@ -290,6 +286,8 @@ static ssize_t cifs_stats_proc_write(struct file *file,
}
}
spin_unlock(&cifs_tcp_ses_lock);
+ } else {
+ return rc;
}
return count;
@@ -433,17 +431,17 @@ static int cifsFYI_proc_open(struct inode *inode, struct file *file)
static ssize_t cifsFYI_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- char c;
+ char c[2] = { '\0' };
bool bv;
int rc;
- rc = get_user(c, buffer);
+ rc = get_user(c[0], buffer);
if (rc)
return rc;
- if (strtobool(&c, &bv) == 0)
+ if (strtobool(c, &bv) == 0)
cifsFYI = bv;
- else if ((c > '1') && (c <= '9'))
- cifsFYI = (int) (c - '0'); /* see cifs_debug.h for meanings */
+ else if ((c[0] > '1') && (c[0] <= '9'))
+ cifsFYI = (int) (c[0] - '0'); /* see cifs_debug.h for meanings */
return count;
}
@@ -471,20 +469,12 @@ static int cifs_linux_ext_proc_open(struct inode *inode, struct file *file)
static ssize_t cifs_linux_ext_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
- char c;
- bool bv;
int rc;
- rc = get_user(c, buffer);
+ rc = kstrtobool_from_user(buffer, count, &linuxExtEnabled);
if (rc)
return rc;
- rc = strtobool(&c, &bv);
- if (rc)
- return rc;
-
- linuxExtEnabled = bv;
-
return count;
}
@@ -511,20 +501,12 @@ static int cifs_lookup_cache_proc_open(struct inode *inode, struct file *file)
static ssize_t cifs_lookup_cache_proc_write(struct file *file,
const char __user *buffer, size_t count, loff_t *ppos)
{
- char c;
- bool bv;
int rc;
- rc = get_user(c, buffer);
+ rc = kstrtobool_from_user(buffer, count, &lookupCacheEnabled);
if (rc)
return rc;
- rc = strtobool(&c, &bv);
- if (rc)
- return rc;
-
- lookupCacheEnabled = bv;
-
return count;
}
@@ -551,20 +533,12 @@ static int traceSMB_proc_open(struct inode *inode, struct file *file)
static ssize_t traceSMB_proc_write(struct file *file, const char __user *buffer,
size_t count, loff_t *ppos)
{
- char c;
- bool bv;
int rc;
- rc = get_user(c, buffer);
+ rc = kstrtobool_from_user(buffer, count, &traceSMB);
if (rc)
return rc;
- rc = strtobool(&c, &bv);
- if (rc)
- return rc;
-
- traceSMB = bv;
-
return count;
}
@@ -622,7 +596,6 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
int rc;
unsigned int flags;
char flags_string[12];
- char c;
bool bv;
if ((count < 1) || (count > 11))
@@ -635,11 +608,10 @@ static ssize_t cifs_security_flags_proc_write(struct file *file,
if (count < 3) {
/* single char or single char followed by null */
- c = flags_string[0];
- if (strtobool(&c, &bv) == 0) {
+ if (strtobool(flags_string, &bv) == 0) {
global_secflags = bv ? CIFSSEC_MAX : CIFSSEC_DEF;
return count;
- } else if (!isdigit(c)) {
+ } else if (!isdigit(flags_string[0])) {
cifs_dbg(VFS, "Invalid SecurityFlags: %s\n",
flags_string);
return -EINVAL;
diff --git a/fs/cifs/cifs_debug.h b/fs/cifs/cifs_debug.h
index 66cf0f9ff..c611ca233 100644
--- a/fs/cifs/cifs_debug.h
+++ b/fs/cifs/cifs_debug.h
@@ -25,7 +25,7 @@
void cifs_dump_mem(char *label, void *data, int length);
void cifs_dump_detail(void *);
void cifs_dump_mids(struct TCP_Server_Info *);
-extern int traceSMB; /* flag which enables the function below */
+extern bool traceSMB; /* flag which enables the function below */
void dump_smb(void *, int);
#define CIFS_INFO 0x01
#define CIFS_RC 0x02
diff --git a/fs/cifs/cifsencrypt.c b/fs/cifs/cifsencrypt.c
index e682b36a2..4897dacf8 100644
--- a/fs/cifs/cifsencrypt.c
+++ b/fs/cifs/cifsencrypt.c
@@ -33,6 +33,7 @@
#include <linux/ctype.h>
#include <linux/random.h>
#include <linux/highmem.h>
+#include <crypto/skcipher.h>
static int
cifs_crypto_shash_md5_allocate(struct TCP_Server_Info *server)
@@ -789,38 +790,46 @@ int
calc_seckey(struct cifs_ses *ses)
{
int rc;
- struct crypto_blkcipher *tfm_arc4;
+ struct crypto_skcipher *tfm_arc4;
struct scatterlist sgin, sgout;
- struct blkcipher_desc desc;
+ struct skcipher_request *req;
unsigned char sec_key[CIFS_SESS_KEY_SIZE]; /* a nonce */
get_random_bytes(sec_key, CIFS_SESS_KEY_SIZE);
- tfm_arc4 = crypto_alloc_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
+ tfm_arc4 = crypto_alloc_skcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_arc4)) {
rc = PTR_ERR(tfm_arc4);
cifs_dbg(VFS, "could not allocate crypto API arc4\n");
return rc;
}
- desc.tfm = tfm_arc4;
-
- rc = crypto_blkcipher_setkey(tfm_arc4, ses->auth_key.response,
+ rc = crypto_skcipher_setkey(tfm_arc4, ses->auth_key.response,
CIFS_SESS_KEY_SIZE);
if (rc) {
cifs_dbg(VFS, "%s: Could not set response as a key\n",
__func__);
- return rc;
+ goto out_free_cipher;
+ }
+
+ req = skcipher_request_alloc(tfm_arc4, GFP_KERNEL);
+ if (!req) {
+ rc = -ENOMEM;
+ cifs_dbg(VFS, "could not allocate crypto API arc4 request\n");
+ goto out_free_cipher;
}
sg_init_one(&sgin, sec_key, CIFS_SESS_KEY_SIZE);
sg_init_one(&sgout, ses->ntlmssp->ciphertext, CIFS_CPHTXT_SIZE);
- rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, CIFS_CPHTXT_SIZE);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sgin, &sgout, CIFS_CPHTXT_SIZE, NULL);
+
+ rc = crypto_skcipher_encrypt(req);
+ skcipher_request_free(req);
if (rc) {
cifs_dbg(VFS, "could not encrypt session key rc: %d\n", rc);
- crypto_free_blkcipher(tfm_arc4);
- return rc;
+ goto out_free_cipher;
}
/* make secondary_key/nonce as session key */
@@ -828,7 +837,8 @@ calc_seckey(struct cifs_ses *ses)
/* and make len as that of session key only */
ses->auth_key.len = CIFS_SESS_KEY_SIZE;
- crypto_free_blkcipher(tfm_arc4);
+out_free_cipher:
+ crypto_free_skcipher(tfm_arc4);
return rc;
}
diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c
index 2eea40353..89201564c 100644
--- a/fs/cifs/cifsfs.c
+++ b/fs/cifs/cifsfs.c
@@ -54,10 +54,10 @@
#endif
int cifsFYI = 0;
-int traceSMB = 0;
+bool traceSMB;
bool enable_oplocks = true;
-unsigned int linuxExtEnabled = 1;
-unsigned int lookupCacheEnabled = 1;
+bool linuxExtEnabled = true;
+bool lookupCacheEnabled = true;
unsigned int global_secflags = CIFSSEC_DEF;
/* unsigned int ntlmv2_support = 0; */
unsigned int sign_CIFS_PDUs = 1;
@@ -642,9 +642,7 @@ cifs_get_root(struct smb_vol *vol, struct super_block *sb)
while (*s && *s != sep)
s++;
- inode_lock(dir);
- child = lookup_one_len(p, dentry, s - p);
- inode_unlock(dir);
+ child = lookup_one_len_unlocked(p, dentry, s - p);
dput(dentry);
dentry = child;
} while (!IS_ERR(dentry));
@@ -964,7 +962,7 @@ static int cifs_clone_file_range(struct file *src_file, loff_t off,
cifs_dbg(FYI, "about to flush pages\n");
/* should we flush first and last page first */
truncate_inode_pages_range(&target_inode->i_data, destoff,
- PAGE_CACHE_ALIGN(destoff + len)-1);
+ PAGE_ALIGN(destoff + len)-1);
if (target_tcon->ses->server->ops->duplicate_extents)
rc = target_tcon->ses->server->ops->duplicate_extents(xid,
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index a25b2513f..f2cc0b3d1 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -714,7 +714,7 @@ compare_mid(__u16 mid, const struct smb_hdr *smb)
*
* Note that this might make for "interesting" allocation problems during
* writeback however as we have to allocate an array of pointers for the
- * pages. A 16M write means ~32kb page array with PAGE_CACHE_SIZE == 4096.
+ * pages. A 16M write means ~32kb page array with PAGE_SIZE == 4096.
*
* For reads, there is a similar problem as we need to allocate an array
* of kvecs to handle the receive, though that should only need to be done
@@ -733,7 +733,7 @@ compare_mid(__u16 mid, const struct smb_hdr *smb)
/*
* The default wsize is 1M. find_get_pages seems to return a maximum of 256
- * pages in a single call. With PAGE_CACHE_SIZE == 4k, this means we can fill
+ * pages in a single call. With PAGE_SIZE == 4k, this means we can fill
* a single wsize request with a single call.
*/
#define CIFS_DEFAULT_IOSIZE (1024 * 1024)
@@ -1596,11 +1596,11 @@ GLOBAL_EXTERN atomic_t midCount;
/* Misc globals */
GLOBAL_EXTERN bool enable_oplocks; /* enable or disable oplocks */
-GLOBAL_EXTERN unsigned int lookupCacheEnabled;
+GLOBAL_EXTERN bool lookupCacheEnabled;
GLOBAL_EXTERN unsigned int global_secflags; /* if on, session setup sent
with more secure ntlmssp2 challenge/resp */
GLOBAL_EXTERN unsigned int sign_CIFS_PDUs; /* enable smb packet signing */
-GLOBAL_EXTERN unsigned int linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
+GLOBAL_EXTERN bool linuxExtEnabled;/*enable Linux/Unix CIFS extensions*/
GLOBAL_EXTERN unsigned int CIFSMaxBufSize; /* max size not including hdr */
GLOBAL_EXTERN unsigned int cifs_min_rcv; /* min size of big ntwrk buf pool */
GLOBAL_EXTERN unsigned int cifs_min_small; /* min size of small buf pool */
diff --git a/fs/cifs/cifssmb.c b/fs/cifs/cifssmb.c
index 76fcb5029..a894bf809 100644
--- a/fs/cifs/cifssmb.c
+++ b/fs/cifs/cifssmb.c
@@ -1929,17 +1929,17 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
wsize = server->ops->wp_retry_size(inode);
if (wsize < rest_len) {
- nr_pages = wsize / PAGE_CACHE_SIZE;
+ nr_pages = wsize / PAGE_SIZE;
if (!nr_pages) {
rc = -ENOTSUPP;
break;
}
- cur_len = nr_pages * PAGE_CACHE_SIZE;
- tailsz = PAGE_CACHE_SIZE;
+ cur_len = nr_pages * PAGE_SIZE;
+ tailsz = PAGE_SIZE;
} else {
- nr_pages = DIV_ROUND_UP(rest_len, PAGE_CACHE_SIZE);
+ nr_pages = DIV_ROUND_UP(rest_len, PAGE_SIZE);
cur_len = rest_len;
- tailsz = rest_len - (nr_pages - 1) * PAGE_CACHE_SIZE;
+ tailsz = rest_len - (nr_pages - 1) * PAGE_SIZE;
}
wdata2 = cifs_writedata_alloc(nr_pages, cifs_writev_complete);
@@ -1957,7 +1957,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
wdata2->sync_mode = wdata->sync_mode;
wdata2->nr_pages = nr_pages;
wdata2->offset = page_offset(wdata2->pages[0]);
- wdata2->pagesz = PAGE_CACHE_SIZE;
+ wdata2->pagesz = PAGE_SIZE;
wdata2->tailsz = tailsz;
wdata2->bytes = cur_len;
@@ -1975,7 +1975,7 @@ cifs_writev_requeue(struct cifs_writedata *wdata)
if (rc != 0 && rc != -EAGAIN) {
SetPageError(wdata2->pages[j]);
end_page_writeback(wdata2->pages[j]);
- page_cache_release(wdata2->pages[j]);
+ put_page(wdata2->pages[j]);
}
}
@@ -2018,7 +2018,7 @@ cifs_writev_complete(struct work_struct *work)
else if (wdata->result < 0)
SetPageError(page);
end_page_writeback(page);
- page_cache_release(page);
+ put_page(page);
}
if (wdata->result != -EAGAIN)
mapping_set_error(inode->i_mapping, wdata->result);
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index a763cd3d9..6f62ac821 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3630,7 +3630,7 @@ try_mount_again:
cifs_sb->rsize = server->ops->negotiate_rsize(tcon, volume_info);
/* tune readahead according to rsize */
- cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_CACHE_SIZE;
+ cifs_sb->bdi.ra_pages = cifs_sb->rsize / PAGE_SIZE;
remote_path_check:
#ifdef CONFIG_CIFS_DFS_UPCALL
diff --git a/fs/cifs/file.c b/fs/cifs/file.c
index ff882aeac..c03d07446 100644
--- a/fs/cifs/file.c
+++ b/fs/cifs/file.c
@@ -1833,7 +1833,7 @@ refind_writable:
static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
{
struct address_space *mapping = page->mapping;
- loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+ loff_t offset = (loff_t)page->index << PAGE_SHIFT;
char *write_data;
int rc = -EFAULT;
int bytes_written = 0;
@@ -1849,7 +1849,7 @@ static int cifs_partialpagewrite(struct page *page, unsigned from, unsigned to)
write_data = kmap(page);
write_data += from;
- if ((to > PAGE_CACHE_SIZE) || (from > to)) {
+ if ((to > PAGE_SIZE) || (from > to)) {
kunmap(page);
return -EIO;
}
@@ -1902,7 +1902,7 @@ wdata_alloc_and_fillpages(pgoff_t tofind, struct address_space *mapping,
* find_get_pages_tag seems to return a max of 256 on each
* iteration, so we must call it several times in order to
* fill the array or the wsize is effectively limited to
- * 256 * PAGE_CACHE_SIZE.
+ * 256 * PAGE_SIZE.
*/
*found_pages = 0;
pages = wdata->pages;
@@ -1991,7 +1991,7 @@ wdata_prepare_pages(struct cifs_writedata *wdata, unsigned int found_pages,
/* put any pages we aren't going to use */
for (i = nr_pages; i < found_pages; i++) {
- page_cache_release(wdata->pages[i]);
+ put_page(wdata->pages[i]);
wdata->pages[i] = NULL;
}
@@ -2009,11 +2009,11 @@ wdata_send_pages(struct cifs_writedata *wdata, unsigned int nr_pages,
wdata->sync_mode = wbc->sync_mode;
wdata->nr_pages = nr_pages;
wdata->offset = page_offset(wdata->pages[0]);
- wdata->pagesz = PAGE_CACHE_SIZE;
+ wdata->pagesz = PAGE_SIZE;
wdata->tailsz = min(i_size_read(mapping->host) -
page_offset(wdata->pages[nr_pages - 1]),
- (loff_t)PAGE_CACHE_SIZE);
- wdata->bytes = ((nr_pages - 1) * PAGE_CACHE_SIZE) + wdata->tailsz;
+ (loff_t)PAGE_SIZE);
+ wdata->bytes = ((nr_pages - 1) * PAGE_SIZE) + wdata->tailsz;
if (wdata->cfile != NULL)
cifsFileInfo_put(wdata->cfile);
@@ -2047,15 +2047,15 @@ static int cifs_writepages(struct address_space *mapping,
* If wsize is smaller than the page cache size, default to writing
* one page at a time via cifs_writepage
*/
- if (cifs_sb->wsize < PAGE_CACHE_SIZE)
+ if (cifs_sb->wsize < PAGE_SIZE)
return generic_writepages(mapping, wbc);
if (wbc->range_cyclic) {
index = mapping->writeback_index; /* Start from prev offset */
end = -1;
} else {
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ index = wbc->range_start >> PAGE_SHIFT;
+ end = wbc->range_end >> PAGE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = true;
scanned = true;
@@ -2071,7 +2071,7 @@ retry:
if (rc)
break;
- tofind = min((wsize / PAGE_CACHE_SIZE) - 1, end - index) + 1;
+ tofind = min((wsize / PAGE_SIZE) - 1, end - index) + 1;
wdata = wdata_alloc_and_fillpages(tofind, mapping, end, &index,
&found_pages);
@@ -2111,7 +2111,7 @@ retry:
else
SetPageError(wdata->pages[i]);
end_page_writeback(wdata->pages[i]);
- page_cache_release(wdata->pages[i]);
+ put_page(wdata->pages[i]);
}
if (rc != -EAGAIN)
mapping_set_error(mapping, rc);
@@ -2154,7 +2154,7 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
xid = get_xid();
/* BB add check for wbc flags */
- page_cache_get(page);
+ get_page(page);
if (!PageUptodate(page))
cifs_dbg(FYI, "ppw - page not up to date\n");
@@ -2170,7 +2170,7 @@ cifs_writepage_locked(struct page *page, struct writeback_control *wbc)
*/
set_page_writeback(page);
retry_write:
- rc = cifs_partialpagewrite(page, 0, PAGE_CACHE_SIZE);
+ rc = cifs_partialpagewrite(page, 0, PAGE_SIZE);
if (rc == -EAGAIN && wbc->sync_mode == WB_SYNC_ALL)
goto retry_write;
else if (rc == -EAGAIN)
@@ -2180,7 +2180,7 @@ retry_write:
else
SetPageUptodate(page);
end_page_writeback(page);
- page_cache_release(page);
+ put_page(page);
free_xid(xid);
return rc;
}
@@ -2214,12 +2214,12 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
if (copied == len)
SetPageUptodate(page);
ClearPageChecked(page);
- } else if (!PageUptodate(page) && copied == PAGE_CACHE_SIZE)
+ } else if (!PageUptodate(page) && copied == PAGE_SIZE)
SetPageUptodate(page);
if (!PageUptodate(page)) {
char *page_data;
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned offset = pos & (PAGE_SIZE - 1);
unsigned int xid;
xid = get_xid();
@@ -2248,7 +2248,7 @@ static int cifs_write_end(struct file *file, struct address_space *mapping,
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return rc;
}
@@ -3286,9 +3286,9 @@ cifs_readv_complete(struct work_struct *work)
(rdata->result == -EAGAIN && got_bytes))
cifs_readpage_to_fscache(rdata->mapping->host, page);
- got_bytes -= min_t(unsigned int, PAGE_CACHE_SIZE, got_bytes);
+ got_bytes -= min_t(unsigned int, PAGE_SIZE, got_bytes);
- page_cache_release(page);
+ put_page(page);
rdata->pages[i] = NULL;
}
kref_put(&rdata->refcount, cifs_readdata_release);
@@ -3307,21 +3307,21 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
/* determine the eof that the server (probably) has */
eof = CIFS_I(rdata->mapping->host)->server_eof;
- eof_index = eof ? (eof - 1) >> PAGE_CACHE_SHIFT : 0;
+ eof_index = eof ? (eof - 1) >> PAGE_SHIFT : 0;
cifs_dbg(FYI, "eof=%llu eof_index=%lu\n", eof, eof_index);
rdata->got_bytes = 0;
- rdata->tailsz = PAGE_CACHE_SIZE;
+ rdata->tailsz = PAGE_SIZE;
for (i = 0; i < nr_pages; i++) {
struct page *page = rdata->pages[i];
- if (len >= PAGE_CACHE_SIZE) {
+ if (len >= PAGE_SIZE) {
/* enough data to fill the page */
iov.iov_base = kmap(page);
- iov.iov_len = PAGE_CACHE_SIZE;
+ iov.iov_len = PAGE_SIZE;
cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
i, page->index, iov.iov_base, iov.iov_len);
- len -= PAGE_CACHE_SIZE;
+ len -= PAGE_SIZE;
} else if (len > 0) {
/* enough for partial page, fill and zero the rest */
iov.iov_base = kmap(page);
@@ -3329,7 +3329,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
cifs_dbg(FYI, "%u: idx=%lu iov_base=%p iov_len=%zu\n",
i, page->index, iov.iov_base, iov.iov_len);
memset(iov.iov_base + len,
- '\0', PAGE_CACHE_SIZE - len);
+ '\0', PAGE_SIZE - len);
rdata->tailsz = len;
len = 0;
} else if (page->index > eof_index) {
@@ -3341,12 +3341,12 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
* to prevent the VFS from repeatedly attempting to
* fill them until the writes are flushed.
*/
- zero_user(page, 0, PAGE_CACHE_SIZE);
+ zero_user(page, 0, PAGE_SIZE);
lru_cache_add_file(page);
flush_dcache_page(page);
SetPageUptodate(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
rdata->pages[i] = NULL;
rdata->nr_pages--;
continue;
@@ -3354,7 +3354,7 @@ cifs_readpages_read_into_pages(struct TCP_Server_Info *server,
/* no need to hold page hostage */
lru_cache_add_file(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
rdata->pages[i] = NULL;
rdata->nr_pages--;
continue;
@@ -3402,8 +3402,8 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
}
/* move first page to the tmplist */
- *offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
- *bytes = PAGE_CACHE_SIZE;
+ *offset = (loff_t)page->index << PAGE_SHIFT;
+ *bytes = PAGE_SIZE;
*nr_pages = 1;
list_move_tail(&page->lru, tmplist);
@@ -3415,7 +3415,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
break;
/* would this page push the read over the rsize? */
- if (*bytes + PAGE_CACHE_SIZE > rsize)
+ if (*bytes + PAGE_SIZE > rsize)
break;
__SetPageLocked(page);
@@ -3424,7 +3424,7 @@ readpages_get_pages(struct address_space *mapping, struct list_head *page_list,
break;
}
list_move_tail(&page->lru, tmplist);
- (*bytes) += PAGE_CACHE_SIZE;
+ (*bytes) += PAGE_SIZE;
expected_index++;
(*nr_pages)++;
}
@@ -3493,7 +3493,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
* reach this point however since we set ra_pages to 0 when the
* rsize is smaller than a cache page.
*/
- if (unlikely(rsize < PAGE_CACHE_SIZE)) {
+ if (unlikely(rsize < PAGE_SIZE)) {
add_credits_and_wake_if(server, credits, 0);
return 0;
}
@@ -3512,7 +3512,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
list_del(&page->lru);
lru_cache_add_file(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
rc = -ENOMEM;
add_credits_and_wake_if(server, credits, 0);
@@ -3524,7 +3524,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
rdata->offset = offset;
rdata->bytes = bytes;
rdata->pid = pid;
- rdata->pagesz = PAGE_CACHE_SIZE;
+ rdata->pagesz = PAGE_SIZE;
rdata->read_into_pages = cifs_readpages_read_into_pages;
rdata->credits = credits;
@@ -3542,7 +3542,7 @@ static int cifs_readpages(struct file *file, struct address_space *mapping,
page = rdata->pages[i];
lru_cache_add_file(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
/* Fallback to the readpage in error/reconnect cases */
kref_put(&rdata->refcount, cifs_readdata_release);
@@ -3577,7 +3577,7 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
read_data = kmap(page);
/* for reads over a certain size could initiate async read ahead */
- rc = cifs_read(file, read_data, PAGE_CACHE_SIZE, poffset);
+ rc = cifs_read(file, read_data, PAGE_SIZE, poffset);
if (rc < 0)
goto io_error;
@@ -3587,8 +3587,8 @@ static int cifs_readpage_worker(struct file *file, struct page *page,
file_inode(file)->i_atime =
current_fs_time(file_inode(file)->i_sb);
- if (PAGE_CACHE_SIZE > rc)
- memset(read_data + rc, 0, PAGE_CACHE_SIZE - rc);
+ if (PAGE_SIZE > rc)
+ memset(read_data + rc, 0, PAGE_SIZE - rc);
flush_dcache_page(page);
SetPageUptodate(page);
@@ -3608,7 +3608,7 @@ read_complete:
static int cifs_readpage(struct file *file, struct page *page)
{
- loff_t offset = (loff_t)page->index << PAGE_CACHE_SHIFT;
+ loff_t offset = (loff_t)page->index << PAGE_SHIFT;
int rc = -EACCES;
unsigned int xid;
@@ -3679,8 +3679,8 @@ static int cifs_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata)
{
int oncethru = 0;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- loff_t offset = pos & (PAGE_CACHE_SIZE - 1);
+ pgoff_t index = pos >> PAGE_SHIFT;
+ loff_t offset = pos & (PAGE_SIZE - 1);
loff_t page_start = pos & PAGE_MASK;
loff_t i_size;
struct page *page;
@@ -3703,7 +3703,7 @@ start:
* the server. If the write is short, we'll end up doing a sync write
* instead.
*/
- if (len == PAGE_CACHE_SIZE)
+ if (len == PAGE_SIZE)
goto out;
/*
@@ -3718,7 +3718,7 @@ start:
(offset == 0 && (pos + len) >= i_size)) {
zero_user_segments(page, 0, offset,
offset + len,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
/*
* PageChecked means that the parts of the page
* to which we're not writing are considered up
@@ -3737,7 +3737,7 @@ start:
* do a sync write instead since PG_uptodate isn't set.
*/
cifs_readpage_worker(file, page, &page_start);
- page_cache_release(page);
+ put_page(page);
oncethru = 1;
goto start;
} else {
@@ -3764,7 +3764,7 @@ static void cifs_invalidate_page(struct page *page, unsigned int offset,
{
struct cifsInodeInfo *cifsi = CIFS_I(page->mapping->host);
- if (offset == 0 && length == PAGE_CACHE_SIZE)
+ if (offset == 0 && length == PAGE_SIZE)
cifs_fscache_invalidate_page(page, &cifsi->vfs_inode);
}
@@ -3772,7 +3772,7 @@ static int cifs_launder_page(struct page *page)
{
int rc = 0;
loff_t range_start = page_offset(page);
- loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+ loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c
index aeb26dbfa..5f9ad5c42 100644
--- a/fs/cifs/inode.c
+++ b/fs/cifs/inode.c
@@ -59,7 +59,7 @@ static void cifs_set_ops(struct inode *inode)
/* check if server can support readpages */
if (cifs_sb_master_tcon(cifs_sb)->ses->server->maxBuf <
- PAGE_CACHE_SIZE + MAX_CIFS_HDR_SIZE)
+ PAGE_SIZE + MAX_CIFS_HDR_SIZE)
inode->i_data.a_ops = &cifs_addr_ops_smallbuf;
else
inode->i_data.a_ops = &cifs_addr_ops;
@@ -2019,8 +2019,8 @@ int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry,
static int cifs_truncate_page(struct address_space *mapping, loff_t from)
{
- pgoff_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE - 1);
+ pgoff_t index = from >> PAGE_SHIFT;
+ unsigned offset = from & (PAGE_SIZE - 1);
struct page *page;
int rc = 0;
@@ -2028,9 +2028,9 @@ static int cifs_truncate_page(struct address_space *mapping, loff_t from)
if (!page)
return -ENOMEM;
- zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+ zero_user_segment(page, offset, PAGE_SIZE);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return rc;
}
diff --git a/fs/cifs/sess.c b/fs/cifs/sess.c
index 59727e32e..af0ec2d5a 100644
--- a/fs/cifs/sess.c
+++ b/fs/cifs/sess.c
@@ -400,19 +400,27 @@ int build_ntlmssp_auth_blob(unsigned char *pbuffer,
sec_blob->LmChallengeResponse.MaximumLength = 0;
sec_blob->NtChallengeResponse.BufferOffset = cpu_to_le32(tmp - pbuffer);
- rc = setup_ntlmv2_rsp(ses, nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
- goto setup_ntlmv2_ret;
+ if (ses->user_name != NULL) {
+ rc = setup_ntlmv2_rsp(ses, nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLMSSP authentication\n", rc);
+ goto setup_ntlmv2_ret;
+ }
+ memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+
+ sec_blob->NtChallengeResponse.Length =
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ sec_blob->NtChallengeResponse.MaximumLength =
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ } else {
+ /*
+ * don't send an NT Response for anonymous access
+ */
+ sec_blob->NtChallengeResponse.Length = 0;
+ sec_blob->NtChallengeResponse.MaximumLength = 0;
}
- memcpy(tmp, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- ses->auth_key.len - CIFS_SESS_KEY_SIZE);
- tmp += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
-
- sec_blob->NtChallengeResponse.Length =
- cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
- sec_blob->NtChallengeResponse.MaximumLength =
- cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
if (ses->domainName == NULL) {
sec_blob->DomainName.BufferOffset = cpu_to_le32(tmp - pbuffer);
@@ -670,20 +678,24 @@ sess_auth_lanman(struct sess_data *sess_data)
pSMB->req.hdr.Flags2 &= ~SMBFLG2_UNICODE;
- /* no capabilities flags in old lanman negotiation */
- pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
-
- /* Calculate hash with password and copy into bcc_ptr.
- * Encryption Key (stored as in cryptkey) gets used if the
- * security mode bit in Negottiate Protocol response states
- * to use challenge/response method (i.e. Password bit is 1).
- */
- rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
- ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
- true : false, lnm_session_key);
-
- memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ if (ses->user_name != NULL) {
+ /* no capabilities flags in old lanman negotiation */
+ pSMB->old_req.PasswordLength = cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+
+ /* Calculate hash with password and copy into bcc_ptr.
+ * Encryption Key (stored as in cryptkey) gets used if the
+ * security mode bit in Negottiate Protocol response states
+ * to use challenge/response method (i.e. Password bit is 1).
+ */
+ rc = calc_lanman_hash(ses->password, ses->server->cryptkey,
+ ses->server->sec_mode & SECMODE_PW_ENCRYPT ?
+ true : false, lnm_session_key);
+
+ memcpy(bcc_ptr, (char *)lnm_session_key, CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ } else {
+ pSMB->old_req.PasswordLength = 0;
+ }
/*
* can not sign if LANMAN negotiated so no need
@@ -769,26 +781,31 @@ sess_auth_ntlm(struct sess_data *sess_data)
capabilities = cifs_ssetup_hdr(ses, pSMB);
pSMB->req_no_secext.Capabilities = cpu_to_le32(capabilities);
- pSMB->req_no_secext.CaseInsensitivePasswordLength =
- cpu_to_le16(CIFS_AUTH_RESP_SIZE);
- pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(CIFS_AUTH_RESP_SIZE);
-
- /* calculate ntlm response and session key */
- rc = setup_ntlm_response(ses, sess_data->nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLM authentication\n",
- rc);
- goto out;
- }
+ if (ses->user_name != NULL) {
+ pSMB->req_no_secext.CaseInsensitivePasswordLength =
+ cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+ pSMB->req_no_secext.CaseSensitivePasswordLength =
+ cpu_to_le16(CIFS_AUTH_RESP_SIZE);
+
+ /* calculate ntlm response and session key */
+ rc = setup_ntlm_response(ses, sess_data->nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLM authentication\n",
+ rc);
+ goto out;
+ }
- /* copy ntlm response */
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- CIFS_AUTH_RESP_SIZE);
- bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ /* copy ntlm response */
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ CIFS_AUTH_RESP_SIZE);
+ bcc_ptr += CIFS_AUTH_RESP_SIZE;
+ } else {
+ pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
+ pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
+ }
if (ses->capabilities & CAP_UNICODE) {
/* unicode strings must be word aligned */
@@ -878,22 +895,26 @@ sess_auth_ntlmv2(struct sess_data *sess_data)
/* LM2 password would be here if we supported it */
pSMB->req_no_secext.CaseInsensitivePasswordLength = 0;
- /* calculate nlmv2 response and session key */
- rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp);
- if (rc) {
- cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc);
- goto out;
- }
+ if (ses->user_name != NULL) {
+ /* calculate nlmv2 response and session key */
+ rc = setup_ntlmv2_rsp(ses, sess_data->nls_cp);
+ if (rc) {
+ cifs_dbg(VFS, "Error %d during NTLMv2 authentication\n", rc);
+ goto out;
+ }
- memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
- ses->auth_key.len - CIFS_SESS_KEY_SIZE);
- bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
+ memcpy(bcc_ptr, ses->auth_key.response + CIFS_SESS_KEY_SIZE,
+ ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ bcc_ptr += ses->auth_key.len - CIFS_SESS_KEY_SIZE;
- /* set case sensitive password length after tilen may get
- * assigned, tilen is 0 otherwise.
- */
- pSMB->req_no_secext.CaseSensitivePasswordLength =
- cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ /* set case sensitive password length after tilen may get
+ * assigned, tilen is 0 otherwise.
+ */
+ pSMB->req_no_secext.CaseSensitivePasswordLength =
+ cpu_to_le16(ses->auth_key.len - CIFS_SESS_KEY_SIZE);
+ } else {
+ pSMB->req_no_secext.CaseSensitivePasswordLength = 0;
+ }
if (ses->capabilities & CAP_UNICODE) {
if (sess_data->iov[0].iov_len % 2) {
diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h
index bc0bb9c34..0ffa18094 100644
--- a/fs/cifs/smb2glob.h
+++ b/fs/cifs/smb2glob.h
@@ -44,6 +44,7 @@
#define SMB2_OP_DELETE 7
#define SMB2_OP_HARDLINK 8
#define SMB2_OP_SET_EOF 9
+#define SMB2_OP_RMDIR 10
/* Used when constructing chained read requests. */
#define CHAINED_REQUEST 1
diff --git a/fs/cifs/smb2inode.c b/fs/cifs/smb2inode.c
index 899bbc86f..4f0231e68 100644
--- a/fs/cifs/smb2inode.c
+++ b/fs/cifs/smb2inode.c
@@ -80,6 +80,10 @@ smb2_open_op_close(const unsigned int xid, struct cifs_tcon *tcon,
* SMB2_open() call.
*/
break;
+ case SMB2_OP_RMDIR:
+ tmprc = SMB2_rmdir(xid, tcon, fid.persistent_fid,
+ fid.volatile_fid);
+ break;
case SMB2_OP_RENAME:
tmprc = SMB2_rename(xid, tcon, fid.persistent_fid,
fid.volatile_fid, (__le16 *)data);
@@ -191,8 +195,8 @@ smb2_rmdir(const unsigned int xid, struct cifs_tcon *tcon, const char *name,
struct cifs_sb_info *cifs_sb)
{
return smb2_open_op_close(xid, tcon, cifs_sb, name, DELETE, FILE_OPEN,
- CREATE_NOT_FILE | CREATE_DELETE_ON_CLOSE,
- NULL, SMB2_OP_DELETE);
+ CREATE_NOT_FILE,
+ NULL, SMB2_OP_RMDIR);
}
int
diff --git a/fs/cifs/smb2pdu.c b/fs/cifs/smb2pdu.c
index 42e1f440e..8f38e33d3 100644
--- a/fs/cifs/smb2pdu.c
+++ b/fs/cifs/smb2pdu.c
@@ -2575,6 +2575,22 @@ SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
}
int
+SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid)
+{
+ __u8 delete_pending = 1;
+ void *data;
+ unsigned int size;
+
+ data = &delete_pending;
+ size = 1; /* sizeof __u8 */
+
+ return send_set_info(xid, tcon, persistent_fid, volatile_fid,
+ current->tgid, FILE_DISPOSITION_INFORMATION, 1, &data,
+ &size);
+}
+
+int
SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid, __le16 *target_file)
{
diff --git a/fs/cifs/smb2proto.h b/fs/cifs/smb2proto.h
index 4f07dc936..eb2cde2f6 100644
--- a/fs/cifs/smb2proto.h
+++ b/fs/cifs/smb2proto.h
@@ -141,6 +141,8 @@ extern int SMB2_query_directory(const unsigned int xid, struct cifs_tcon *tcon,
extern int SMB2_rename(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
__le16 *target_file);
+extern int SMB2_rmdir(const unsigned int xid, struct cifs_tcon *tcon,
+ u64 persistent_fid, u64 volatile_fid);
extern int SMB2_set_hardlink(const unsigned int xid, struct cifs_tcon *tcon,
u64 persistent_fid, u64 volatile_fid,
__le16 *target_file);
diff --git a/fs/cifs/smbencrypt.c b/fs/cifs/smbencrypt.c
index a4232ec4f..699b78681 100644
--- a/fs/cifs/smbencrypt.c
+++ b/fs/cifs/smbencrypt.c
@@ -23,6 +23,7 @@
Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*/
+#include <crypto/skcipher.h>
#include <linux/module.h>
#include <linux/slab.h>
#include <linux/fs.h>
@@ -70,31 +71,42 @@ smbhash(unsigned char *out, const unsigned char *in, unsigned char *key)
{
int rc;
unsigned char key2[8];
- struct crypto_blkcipher *tfm_des;
+ struct crypto_skcipher *tfm_des;
struct scatterlist sgin, sgout;
- struct blkcipher_desc desc;
+ struct skcipher_request *req;
str_to_key(key, key2);
- tfm_des = crypto_alloc_blkcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
+ tfm_des = crypto_alloc_skcipher("ecb(des)", 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(tfm_des)) {
rc = PTR_ERR(tfm_des);
cifs_dbg(VFS, "could not allocate des crypto API\n");
goto smbhash_err;
}
- desc.tfm = tfm_des;
+ req = skcipher_request_alloc(tfm_des, GFP_KERNEL);
+ if (!req) {
+ rc = -ENOMEM;
+ cifs_dbg(VFS, "could not allocate des crypto API\n");
+ goto smbhash_free_skcipher;
+ }
- crypto_blkcipher_setkey(tfm_des, key2, 8);
+ crypto_skcipher_setkey(tfm_des, key2, 8);
sg_init_one(&sgin, in, 8);
sg_init_one(&sgout, out, 8);
- rc = crypto_blkcipher_encrypt(&desc, &sgout, &sgin, 8);
+ skcipher_request_set_callback(req, 0, NULL, NULL);
+ skcipher_request_set_crypt(req, &sgin, &sgout, 8, NULL);
+
+ rc = crypto_skcipher_encrypt(req);
if (rc)
cifs_dbg(VFS, "could not encrypt crypt key rc: %d\n", rc);
- crypto_free_blkcipher(tfm_des);
+ skcipher_request_free(req);
+
+smbhash_free_skcipher:
+ crypto_free_skcipher(tfm_des);
smbhash_err:
return rc;
}
diff --git a/fs/compat_ioctl.c b/fs/compat_ioctl.c
index 6402eaf8a..bd01b92aa 100644
--- a/fs/compat_ioctl.c
+++ b/fs/compat_ioctl.c
@@ -1040,28 +1040,6 @@ COMPATIBLE_IOCTL(PPPIOCGL2TPSTATS)
/* PPPOX */
COMPATIBLE_IOCTL(PPPOEIOCSFWD)
COMPATIBLE_IOCTL(PPPOEIOCDFWD)
-/* ppdev */
-COMPATIBLE_IOCTL(PPSETMODE)
-COMPATIBLE_IOCTL(PPRSTATUS)
-COMPATIBLE_IOCTL(PPRCONTROL)
-COMPATIBLE_IOCTL(PPWCONTROL)
-COMPATIBLE_IOCTL(PPFCONTROL)
-COMPATIBLE_IOCTL(PPRDATA)
-COMPATIBLE_IOCTL(PPWDATA)
-COMPATIBLE_IOCTL(PPCLAIM)
-COMPATIBLE_IOCTL(PPRELEASE)
-COMPATIBLE_IOCTL(PPYIELD)
-COMPATIBLE_IOCTL(PPEXCL)
-COMPATIBLE_IOCTL(PPDATADIR)
-COMPATIBLE_IOCTL(PPNEGOT)
-COMPATIBLE_IOCTL(PPWCTLONIRQ)
-COMPATIBLE_IOCTL(PPCLRIRQ)
-COMPATIBLE_IOCTL(PPSETPHASE)
-COMPATIBLE_IOCTL(PPGETMODES)
-COMPATIBLE_IOCTL(PPGETMODE)
-COMPATIBLE_IOCTL(PPGETPHASE)
-COMPATIBLE_IOCTL(PPGETFLAGS)
-COMPATIBLE_IOCTL(PPSETFLAGS)
/* Big A */
/* sparc only */
/* Big Q for sound/OSS */
diff --git a/fs/configfs/dir.c b/fs/configfs/dir.c
index f419519ec..ea59c891f 100644
--- a/fs/configfs/dir.c
+++ b/fs/configfs/dir.c
@@ -432,14 +432,9 @@ static int configfs_attach_attr(struct configfs_dirent * sd, struct dentry * den
(sd->s_type & CONFIGFS_ITEM_BIN_ATTR) ?
configfs_init_bin_file :
configfs_init_file);
- if (error) {
+ if (error)
configfs_put(sd);
- return error;
- }
-
- d_rehash(dentry);
-
- return 0;
+ return error;
}
static struct dentry * configfs_lookup(struct inode *dir,
@@ -701,23 +696,29 @@ static int populate_groups(struct config_group *group)
{
struct config_group *new_group;
int ret = 0;
- int i;
-
- if (group->default_groups) {
- for (i = 0; group->default_groups[i]; i++) {
- new_group = group->default_groups[i];
- ret = create_default_group(group, new_group);
- if (ret) {
- detach_groups(group);
- break;
- }
+ list_for_each_entry(new_group, &group->default_groups, group_entry) {
+ ret = create_default_group(group, new_group);
+ if (ret) {
+ detach_groups(group);
+ break;
}
}
return ret;
}
+void configfs_remove_default_groups(struct config_group *group)
+{
+ struct config_group *g, *n;
+
+ list_for_each_entry_safe(g, n, &group->default_groups, group_entry) {
+ list_del(&g->group_entry);
+ config_item_put(&g->cg_item);
+ }
+}
+EXPORT_SYMBOL(configfs_remove_default_groups);
+
/*
* All of link_obj/unlink_obj/link_group/unlink_group require that
* subsys->su_mutex is held.
@@ -766,15 +767,10 @@ static void link_obj(struct config_item *parent_item, struct config_item *item)
static void unlink_group(struct config_group *group)
{
- int i;
struct config_group *new_group;
- if (group->default_groups) {
- for (i = 0; group->default_groups[i]; i++) {
- new_group = group->default_groups[i];
- unlink_group(new_group);
- }
- }
+ list_for_each_entry(new_group, &group->default_groups, group_entry)
+ unlink_group(new_group);
group->cg_subsys = NULL;
unlink_obj(&group->cg_item);
@@ -782,7 +778,6 @@ static void unlink_group(struct config_group *group)
static void link_group(struct config_group *parent_group, struct config_group *group)
{
- int i;
struct config_group *new_group;
struct configfs_subsystem *subsys = NULL; /* gcc is a turd */
@@ -796,12 +791,8 @@ static void link_group(struct config_group *parent_group, struct config_group *g
BUG();
group->cg_subsys = subsys;
- if (group->default_groups) {
- for (i = 0; group->default_groups[i]; i++) {
- new_group = group->default_groups[i];
- link_group(group, new_group);
- }
- }
+ list_for_each_entry(new_group, &group->default_groups, group_entry)
+ link_group(group, new_group);
}
/*
diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
index cee087d8f..03d124ae2 100644
--- a/fs/configfs/inode.c
+++ b/fs/configfs/inode.c
@@ -75,7 +75,8 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
sd_iattr->ia_mode = sd->s_mode;
sd_iattr->ia_uid = GLOBAL_ROOT_UID;
sd_iattr->ia_gid = GLOBAL_ROOT_GID;
- sd_iattr->ia_atime = sd_iattr->ia_mtime = sd_iattr->ia_ctime = CURRENT_TIME;
+ sd_iattr->ia_atime = sd_iattr->ia_mtime =
+ sd_iattr->ia_ctime = current_fs_time(inode->i_sb);
sd->s_iattr = sd_iattr;
}
/* attributes were changed atleast once in past */
@@ -111,7 +112,8 @@ int configfs_setattr(struct dentry * dentry, struct iattr * iattr)
static inline void set_default_inode_attr(struct inode * inode, umode_t mode)
{
inode->i_mode = mode;
- inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_atime = inode->i_mtime =
+ inode->i_ctime = current_fs_time(inode->i_sb);
}
static inline void set_inode_attr(struct inode * inode, struct iattr * iattr)
@@ -195,13 +197,21 @@ int configfs_create(struct dentry * dentry, umode_t mode, void (*init)(struct in
return -ENOMEM;
p_inode = d_inode(dentry->d_parent);
- p_inode->i_mtime = p_inode->i_ctime = CURRENT_TIME;
+ p_inode->i_mtime = p_inode->i_ctime = current_fs_time(p_inode->i_sb);
configfs_set_inode_lock_class(sd, inode);
init(inode);
- d_instantiate(dentry, inode);
- if (S_ISDIR(mode) || S_ISLNK(mode))
+ if (S_ISDIR(mode) || S_ISLNK(mode)) {
+ /*
+ * ->symlink(), ->mkdir(), configfs_register_subsystem() or
+ * create_default_group() - already hashed.
+ */
+ d_instantiate(dentry, inode);
dget(dentry); /* pin link and directory dentries in core */
+ } else {
+ /* ->lookup() */
+ d_add(dentry, inode);
+ }
return error;
}
diff --git a/fs/configfs/item.c b/fs/configfs/item.c
index b863a09cd..8b2a99404 100644
--- a/fs/configfs/item.c
+++ b/fs/configfs/item.c
@@ -182,6 +182,7 @@ void config_group_init(struct config_group *group)
{
config_item_init(&group->cg_item);
INIT_LIST_HEAD(&group->cg_children);
+ INIT_LIST_HEAD(&group->default_groups);
}
EXPORT_SYMBOL(config_group_init);
diff --git a/fs/configfs/mount.c b/fs/configfs/mount.c
index a8f3b589a..cfd91320e 100644
--- a/fs/configfs/mount.c
+++ b/fs/configfs/mount.c
@@ -71,8 +71,8 @@ static int configfs_fill_super(struct super_block *sb, void *data, int silent)
struct inode *inode;
struct dentry *root;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = CONFIGFS_MAGIC;
sb->s_op = &configfs_ops;
sb->s_time_gran = 1;
diff --git a/fs/cramfs/README b/fs/cramfs/README
index 445d1c2d7..9d4e7ea31 100644
--- a/fs/cramfs/README
+++ b/fs/cramfs/README
@@ -86,26 +86,26 @@ Block Size
(Block size in cramfs refers to the size of input data that is
compressed at a time. It's intended to be somewhere around
-PAGE_CACHE_SIZE for cramfs_readpage's convenience.)
+PAGE_SIZE for cramfs_readpage's convenience.)
The superblock ought to indicate the block size that the fs was
written for, since comments in <linux/pagemap.h> indicate that
-PAGE_CACHE_SIZE may grow in future (if I interpret the comment
+PAGE_SIZE may grow in future (if I interpret the comment
correctly).
-Currently, mkcramfs #define's PAGE_CACHE_SIZE as 4096 and uses that
-for blksize, whereas Linux-2.3.39 uses its PAGE_CACHE_SIZE, which in
+Currently, mkcramfs #define's PAGE_SIZE as 4096 and uses that
+for blksize, whereas Linux-2.3.39 uses its PAGE_SIZE, which in
turn is defined as PAGE_SIZE (which can be as large as 32KB on arm).
This discrepancy is a bug, though it's not clear which should be
changed.
-One option is to change mkcramfs to take its PAGE_CACHE_SIZE from
+One option is to change mkcramfs to take its PAGE_SIZE from
<asm/page.h>. Personally I don't like this option, but it does
require the least amount of change: just change `#define
-PAGE_CACHE_SIZE (4096)' to `#include <asm/page.h>'. The disadvantage
+PAGE_SIZE (4096)' to `#include <asm/page.h>'. The disadvantage
is that the generated cramfs cannot always be shared between different
kernels, not even necessarily kernels of the same architecture if
-PAGE_CACHE_SIZE is subject to change between kernel versions
+PAGE_SIZE is subject to change between kernel versions
(currently possible with arm and ia64).
The remaining options try to make cramfs more sharable.
@@ -126,22 +126,22 @@ size. The options are:
1. Always 4096 bytes.
2. Writer chooses blocksize; kernel adapts but rejects blocksize >
- PAGE_CACHE_SIZE.
+ PAGE_SIZE.
3. Writer chooses blocksize; kernel adapts even to blocksize >
- PAGE_CACHE_SIZE.
+ PAGE_SIZE.
It's easy enough to change the kernel to use a smaller value than
-PAGE_CACHE_SIZE: just make cramfs_readpage read multiple blocks.
+PAGE_SIZE: just make cramfs_readpage read multiple blocks.
-The cost of option 1 is that kernels with a larger PAGE_CACHE_SIZE
+The cost of option 1 is that kernels with a larger PAGE_SIZE
value don't get as good compression as they can.
The cost of option 2 relative to option 1 is that the code uses
variables instead of #define'd constants. The gain is that people
-with kernels having larger PAGE_CACHE_SIZE can make use of that if
+with kernels having larger PAGE_SIZE can make use of that if
they don't mind their cramfs being inaccessible to kernels with
-smaller PAGE_CACHE_SIZE values.
+smaller PAGE_SIZE values.
Option 3 is easy to implement if we don't mind being CPU-inefficient:
e.g. get readpage to decompress to a buffer of size MAX_BLKSIZE (which
diff --git a/fs/cramfs/inode.c b/fs/cramfs/inode.c
index b862bc219..3a32ddf98 100644
--- a/fs/cramfs/inode.c
+++ b/fs/cramfs/inode.c
@@ -137,7 +137,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
* page cache and dentry tree anyway..
*
* This also acts as a way to guarantee contiguous areas of up to
- * BLKS_PER_BUF*PAGE_CACHE_SIZE, so that the caller doesn't need to
+ * BLKS_PER_BUF*PAGE_SIZE, so that the caller doesn't need to
* worry about end-of-buffer issues even when decompressing a full
* page cache.
*/
@@ -152,7 +152,7 @@ static struct inode *get_cramfs_inode(struct super_block *sb,
*/
#define BLKS_PER_BUF_SHIFT (2)
#define BLKS_PER_BUF (1 << BLKS_PER_BUF_SHIFT)
-#define BUFFER_SIZE (BLKS_PER_BUF*PAGE_CACHE_SIZE)
+#define BUFFER_SIZE (BLKS_PER_BUF*PAGE_SIZE)
static unsigned char read_buffers[READ_BUFFERS][BUFFER_SIZE];
static unsigned buffer_blocknr[READ_BUFFERS];
@@ -173,8 +173,8 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
if (!len)
return NULL;
- blocknr = offset >> PAGE_CACHE_SHIFT;
- offset &= PAGE_CACHE_SIZE - 1;
+ blocknr = offset >> PAGE_SHIFT;
+ offset &= PAGE_SIZE - 1;
/* Check if an existing buffer already has the data.. */
for (i = 0; i < READ_BUFFERS; i++) {
@@ -184,14 +184,14 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
continue;
if (blocknr < buffer_blocknr[i])
continue;
- blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_CACHE_SHIFT;
+ blk_offset = (blocknr - buffer_blocknr[i]) << PAGE_SHIFT;
blk_offset += offset;
if (blk_offset + len > BUFFER_SIZE)
continue;
return read_buffers[i] + blk_offset;
}
- devsize = mapping->host->i_size >> PAGE_CACHE_SHIFT;
+ devsize = mapping->host->i_size >> PAGE_SHIFT;
/* Ok, read in BLKS_PER_BUF pages completely first. */
for (i = 0; i < BLKS_PER_BUF; i++) {
@@ -213,7 +213,7 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
wait_on_page_locked(page);
if (!PageUptodate(page)) {
/* asynchronous error */
- page_cache_release(page);
+ put_page(page);
pages[i] = NULL;
}
}
@@ -229,12 +229,12 @@ static void *cramfs_read(struct super_block *sb, unsigned int offset, unsigned i
struct page *page = pages[i];
if (page) {
- memcpy(data, kmap(page), PAGE_CACHE_SIZE);
+ memcpy(data, kmap(page), PAGE_SIZE);
kunmap(page);
- page_cache_release(page);
+ put_page(page);
} else
- memset(data, 0, PAGE_CACHE_SIZE);
- data += PAGE_CACHE_SIZE;
+ memset(data, 0, PAGE_SIZE);
+ data += PAGE_SIZE;
}
return read_buffers[buffer] + offset;
}
@@ -353,7 +353,7 @@ static int cramfs_statfs(struct dentry *dentry, struct kstatfs *buf)
u64 id = huge_encode_dev(sb->s_bdev->bd_dev);
buf->f_type = CRAMFS_MAGIC;
- buf->f_bsize = PAGE_CACHE_SIZE;
+ buf->f_bsize = PAGE_SIZE;
buf->f_blocks = CRAMFS_SB(sb)->blocks;
buf->f_bfree = 0;
buf->f_bavail = 0;
@@ -496,7 +496,7 @@ static int cramfs_readpage(struct file *file, struct page *page)
int bytes_filled;
void *pgdata;
- maxblock = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ maxblock = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
bytes_filled = 0;
pgdata = kmap(page);
@@ -516,14 +516,14 @@ static int cramfs_readpage(struct file *file, struct page *page)
if (compr_len == 0)
; /* hole */
- else if (unlikely(compr_len > (PAGE_CACHE_SIZE << 1))) {
+ else if (unlikely(compr_len > (PAGE_SIZE << 1))) {
pr_err("bad compressed blocksize %u\n",
compr_len);
goto err;
} else {
mutex_lock(&read_mutex);
bytes_filled = cramfs_uncompress_block(pgdata,
- PAGE_CACHE_SIZE,
+ PAGE_SIZE,
cramfs_read(sb, start_offset, compr_len),
compr_len);
mutex_unlock(&read_mutex);
@@ -532,7 +532,7 @@ static int cramfs_readpage(struct file *file, struct page *page)
}
}
- memset(pgdata + bytes_filled, 0, PAGE_CACHE_SIZE - bytes_filled);
+ memset(pgdata + bytes_filled, 0, PAGE_SIZE - bytes_filled);
flush_dcache_page(page);
kunmap(page);
SetPageUptodate(page);
diff --git a/fs/crypto/Kconfig b/fs/crypto/Kconfig
new file mode 100644
index 000000000..92348faf9
--- /dev/null
+++ b/fs/crypto/Kconfig
@@ -0,0 +1,18 @@
+config FS_ENCRYPTION
+ tristate "FS Encryption (Per-file encryption)"
+ depends on BLOCK
+ select CRYPTO
+ select CRYPTO_AES
+ select CRYPTO_CBC
+ select CRYPTO_ECB
+ select CRYPTO_XTS
+ select CRYPTO_CTS
+ select CRYPTO_CTR
+ select CRYPTO_SHA256
+ select KEYS
+ select ENCRYPTED_KEYS
+ help
+ Enable encryption of files and directories. This
+ feature is similar to ecryptfs, but it is more memory
+ efficient since it avoids caching the encrypted and
+ decrypted pages in the page cache.
diff --git a/fs/crypto/Makefile b/fs/crypto/Makefile
new file mode 100644
index 000000000..f17684c48
--- /dev/null
+++ b/fs/crypto/Makefile
@@ -0,0 +1,3 @@
+obj-$(CONFIG_FS_ENCRYPTION) += fscrypto.o
+
+fscrypto-y := crypto.o fname.o policy.o keyinfo.o
diff --git a/fs/crypto/crypto.c b/fs/crypto/crypto.c
new file mode 100644
index 000000000..2fc8c43ce
--- /dev/null
+++ b/fs/crypto/crypto.c
@@ -0,0 +1,568 @@
+/*
+ * This contains encryption functions for per-file encryption.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility
+ *
+ * Written by Michael Halcrow, 2014.
+ *
+ * Filename encryption additions
+ * Uday Savagaonkar, 2014
+ * Encryption policy handling additions
+ * Ildar Muslukhov, 2014
+ * Add fscrypt_pullback_bio_page()
+ * Jaegeuk Kim, 2015.
+ *
+ * This has not yet undergone a rigorous security audit.
+ *
+ * The usage of AES-XTS should conform to recommendations in NIST
+ * Special Publication 800-38E and IEEE P1619/D16.
+ */
+
+#include <linux/pagemap.h>
+#include <linux/mempool.h>
+#include <linux/module.h>
+#include <linux/scatterlist.h>
+#include <linux/ratelimit.h>
+#include <linux/bio.h>
+#include <linux/dcache.h>
+#include <linux/namei.h>
+#include <linux/fscrypto.h>
+#include <linux/ecryptfs.h>
+
+static unsigned int num_prealloc_crypto_pages = 32;
+static unsigned int num_prealloc_crypto_ctxs = 128;
+
+module_param(num_prealloc_crypto_pages, uint, 0444);
+MODULE_PARM_DESC(num_prealloc_crypto_pages,
+ "Number of crypto pages to preallocate");
+module_param(num_prealloc_crypto_ctxs, uint, 0444);
+MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
+ "Number of crypto contexts to preallocate");
+
+static mempool_t *fscrypt_bounce_page_pool = NULL;
+
+static LIST_HEAD(fscrypt_free_ctxs);
+static DEFINE_SPINLOCK(fscrypt_ctx_lock);
+
+static struct workqueue_struct *fscrypt_read_workqueue;
+static DEFINE_MUTEX(fscrypt_init_mutex);
+
+static struct kmem_cache *fscrypt_ctx_cachep;
+struct kmem_cache *fscrypt_info_cachep;
+
+/**
+ * fscrypt_release_ctx() - Releases an encryption context
+ * @ctx: The encryption context to release.
+ *
+ * If the encryption context was allocated from the pre-allocated pool, returns
+ * it to that pool. Else, frees it.
+ *
+ * If there's a bounce page in the context, this frees that.
+ */
+void fscrypt_release_ctx(struct fscrypt_ctx *ctx)
+{
+ unsigned long flags;
+
+ if (ctx->flags & FS_WRITE_PATH_FL && ctx->w.bounce_page) {
+ mempool_free(ctx->w.bounce_page, fscrypt_bounce_page_pool);
+ ctx->w.bounce_page = NULL;
+ }
+ ctx->w.control_page = NULL;
+ if (ctx->flags & FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
+ kmem_cache_free(fscrypt_ctx_cachep, ctx);
+ } else {
+ spin_lock_irqsave(&fscrypt_ctx_lock, flags);
+ list_add(&ctx->free_list, &fscrypt_free_ctxs);
+ spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
+ }
+}
+EXPORT_SYMBOL(fscrypt_release_ctx);
+
+/**
+ * fscrypt_get_ctx() - Gets an encryption context
+ * @inode: The inode for which we are doing the crypto
+ * @gfp_flags: The gfp flag for memory allocation
+ *
+ * Allocates and initializes an encryption context.
+ *
+ * Return: An allocated and initialized encryption context on success; error
+ * value or NULL otherwise.
+ */
+struct fscrypt_ctx *fscrypt_get_ctx(struct inode *inode, gfp_t gfp_flags)
+{
+ struct fscrypt_ctx *ctx = NULL;
+ struct fscrypt_info *ci = inode->i_crypt_info;
+ unsigned long flags;
+
+ if (ci == NULL)
+ return ERR_PTR(-ENOKEY);
+
+ /*
+ * We first try getting the ctx from a free list because in
+ * the common case the ctx will have an allocated and
+ * initialized crypto tfm, so it's probably a worthwhile
+ * optimization. For the bounce page, we first try getting it
+ * from the kernel allocator because that's just about as fast
+ * as getting it from a list and because a cache of free pages
+ * should generally be a "last resort" option for a filesystem
+ * to be able to do its job.
+ */
+ spin_lock_irqsave(&fscrypt_ctx_lock, flags);
+ ctx = list_first_entry_or_null(&fscrypt_free_ctxs,
+ struct fscrypt_ctx, free_list);
+ if (ctx)
+ list_del(&ctx->free_list);
+ spin_unlock_irqrestore(&fscrypt_ctx_lock, flags);
+ if (!ctx) {
+ ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, gfp_flags);
+ if (!ctx)
+ return ERR_PTR(-ENOMEM);
+ ctx->flags |= FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
+ } else {
+ ctx->flags &= ~FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
+ }
+ ctx->flags &= ~FS_WRITE_PATH_FL;
+ return ctx;
+}
+EXPORT_SYMBOL(fscrypt_get_ctx);
+
+/**
+ * fscrypt_complete() - The completion callback for page encryption
+ * @req: The asynchronous encryption request context
+ * @res: The result of the encryption operation
+ */
+static void fscrypt_complete(struct crypto_async_request *req, int res)
+{
+ struct fscrypt_completion_result *ecr = req->data;
+
+ if (res == -EINPROGRESS)
+ return;
+ ecr->res = res;
+ complete(&ecr->completion);
+}
+
+typedef enum {
+ FS_DECRYPT = 0,
+ FS_ENCRYPT,
+} fscrypt_direction_t;
+
+static int do_page_crypto(struct inode *inode,
+ fscrypt_direction_t rw, pgoff_t index,
+ struct page *src_page, struct page *dest_page,
+ gfp_t gfp_flags)
+{
+ u8 xts_tweak[FS_XTS_TWEAK_SIZE];
+ struct skcipher_request *req = NULL;
+ DECLARE_FS_COMPLETION_RESULT(ecr);
+ struct scatterlist dst, src;
+ struct fscrypt_info *ci = inode->i_crypt_info;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
+ int res = 0;
+
+ req = skcipher_request_alloc(tfm, gfp_flags);
+ if (!req) {
+ printk_ratelimited(KERN_ERR
+ "%s: crypto_request_alloc() failed\n",
+ __func__);
+ return -ENOMEM;
+ }
+
+ skcipher_request_set_callback(
+ req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ fscrypt_complete, &ecr);
+
+ BUILD_BUG_ON(FS_XTS_TWEAK_SIZE < sizeof(index));
+ memcpy(xts_tweak, &index, sizeof(index));
+ memset(&xts_tweak[sizeof(index)], 0,
+ FS_XTS_TWEAK_SIZE - sizeof(index));
+
+ sg_init_table(&dst, 1);
+ sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
+ sg_init_table(&src, 1);
+ sg_set_page(&src, src_page, PAGE_SIZE, 0);
+ skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
+ xts_tweak);
+ if (rw == FS_DECRYPT)
+ res = crypto_skcipher_decrypt(req);
+ else
+ res = crypto_skcipher_encrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ BUG_ON(req->base.data != &ecr);
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+ skcipher_request_free(req);
+ if (res) {
+ printk_ratelimited(KERN_ERR
+ "%s: crypto_skcipher_encrypt() returned %d\n",
+ __func__, res);
+ return res;
+ }
+ return 0;
+}
+
+static struct page *alloc_bounce_page(struct fscrypt_ctx *ctx, gfp_t gfp_flags)
+{
+ ctx->w.bounce_page = mempool_alloc(fscrypt_bounce_page_pool, gfp_flags);
+ if (ctx->w.bounce_page == NULL)
+ return ERR_PTR(-ENOMEM);
+ ctx->flags |= FS_WRITE_PATH_FL;
+ return ctx->w.bounce_page;
+}
+
+/**
+ * fscypt_encrypt_page() - Encrypts a page
+ * @inode: The inode for which the encryption should take place
+ * @plaintext_page: The page to encrypt. Must be locked.
+ * @gfp_flags: The gfp flag for memory allocation
+ *
+ * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
+ * encryption context.
+ *
+ * Called on the page write path. The caller must call
+ * fscrypt_restore_control_page() on the returned ciphertext page to
+ * release the bounce buffer and the encryption context.
+ *
+ * Return: An allocated page with the encrypted content on success. Else, an
+ * error value or NULL.
+ */
+struct page *fscrypt_encrypt_page(struct inode *inode,
+ struct page *plaintext_page, gfp_t gfp_flags)
+{
+ struct fscrypt_ctx *ctx;
+ struct page *ciphertext_page = NULL;
+ int err;
+
+ BUG_ON(!PageLocked(plaintext_page));
+
+ ctx = fscrypt_get_ctx(inode, gfp_flags);
+ if (IS_ERR(ctx))
+ return (struct page *)ctx;
+
+ /* The encryption operation will require a bounce page. */
+ ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
+ if (IS_ERR(ciphertext_page))
+ goto errout;
+
+ ctx->w.control_page = plaintext_page;
+ err = do_page_crypto(inode, FS_ENCRYPT, plaintext_page->index,
+ plaintext_page, ciphertext_page,
+ gfp_flags);
+ if (err) {
+ ciphertext_page = ERR_PTR(err);
+ goto errout;
+ }
+ SetPagePrivate(ciphertext_page);
+ set_page_private(ciphertext_page, (unsigned long)ctx);
+ lock_page(ciphertext_page);
+ return ciphertext_page;
+
+errout:
+ fscrypt_release_ctx(ctx);
+ return ciphertext_page;
+}
+EXPORT_SYMBOL(fscrypt_encrypt_page);
+
+/**
+ * f2crypt_decrypt_page() - Decrypts a page in-place
+ * @page: The page to decrypt. Must be locked.
+ *
+ * Decrypts page in-place using the ctx encryption context.
+ *
+ * Called from the read completion callback.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int fscrypt_decrypt_page(struct page *page)
+{
+ BUG_ON(!PageLocked(page));
+
+ return do_page_crypto(page->mapping->host,
+ FS_DECRYPT, page->index, page, page, GFP_NOFS);
+}
+EXPORT_SYMBOL(fscrypt_decrypt_page);
+
+int fscrypt_zeroout_range(struct inode *inode, pgoff_t lblk,
+ sector_t pblk, unsigned int len)
+{
+ struct fscrypt_ctx *ctx;
+ struct page *ciphertext_page = NULL;
+ struct bio *bio;
+ int ret, err = 0;
+
+ BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
+
+ ctx = fscrypt_get_ctx(inode, GFP_NOFS);
+ if (IS_ERR(ctx))
+ return PTR_ERR(ctx);
+
+ ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT);
+ if (IS_ERR(ciphertext_page)) {
+ err = PTR_ERR(ciphertext_page);
+ goto errout;
+ }
+
+ while (len--) {
+ err = do_page_crypto(inode, FS_ENCRYPT, lblk,
+ ZERO_PAGE(0), ciphertext_page,
+ GFP_NOFS);
+ if (err)
+ goto errout;
+
+ bio = bio_alloc(GFP_NOWAIT, 1);
+ if (!bio) {
+ err = -ENOMEM;
+ goto errout;
+ }
+ bio->bi_bdev = inode->i_sb->s_bdev;
+ bio->bi_iter.bi_sector =
+ pblk << (inode->i_sb->s_blocksize_bits - 9);
+ ret = bio_add_page(bio, ciphertext_page,
+ inode->i_sb->s_blocksize, 0);
+ if (ret != inode->i_sb->s_blocksize) {
+ /* should never happen! */
+ WARN_ON(1);
+ bio_put(bio);
+ err = -EIO;
+ goto errout;
+ }
+ err = submit_bio_wait(WRITE, bio);
+ if ((err == 0) && bio->bi_error)
+ err = -EIO;
+ bio_put(bio);
+ if (err)
+ goto errout;
+ lblk++;
+ pblk++;
+ }
+ err = 0;
+errout:
+ fscrypt_release_ctx(ctx);
+ return err;
+}
+EXPORT_SYMBOL(fscrypt_zeroout_range);
+
+/*
+ * Validate dentries for encrypted directories to make sure we aren't
+ * potentially caching stale data after a key has been added or
+ * removed.
+ */
+static int fscrypt_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct dentry *dir;
+ struct fscrypt_info *ci;
+ int dir_has_key, cached_with_key;
+
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ dir = dget_parent(dentry);
+ if (!d_inode(dir)->i_sb->s_cop->is_encrypted(d_inode(dir))) {
+ dput(dir);
+ return 0;
+ }
+
+ ci = d_inode(dir)->i_crypt_info;
+ if (ci && ci->ci_keyring_key &&
+ (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED) |
+ (1 << KEY_FLAG_DEAD))))
+ ci = NULL;
+
+ /* this should eventually be an flag in d_flags */
+ spin_lock(&dentry->d_lock);
+ cached_with_key = dentry->d_flags & DCACHE_ENCRYPTED_WITH_KEY;
+ spin_unlock(&dentry->d_lock);
+ dir_has_key = (ci != NULL);
+ dput(dir);
+
+ /*
+ * If the dentry was cached without the key, and it is a
+ * negative dentry, it might be a valid name. We can't check
+ * if the key has since been made available due to locking
+ * reasons, so we fail the validation so ext4_lookup() can do
+ * this check.
+ *
+ * We also fail the validation if the dentry was created with
+ * the key present, but we no longer have the key, or vice versa.
+ */
+ if ((!cached_with_key && d_is_negative(dentry)) ||
+ (!cached_with_key && dir_has_key) ||
+ (cached_with_key && !dir_has_key))
+ return 0;
+ return 1;
+}
+
+const struct dentry_operations fscrypt_d_ops = {
+ .d_revalidate = fscrypt_d_revalidate,
+};
+EXPORT_SYMBOL(fscrypt_d_ops);
+
+/*
+ * Call fscrypt_decrypt_page on every single page, reusing the encryption
+ * context.
+ */
+static void completion_pages(struct work_struct *work)
+{
+ struct fscrypt_ctx *ctx =
+ container_of(work, struct fscrypt_ctx, r.work);
+ struct bio *bio = ctx->r.bio;
+ struct bio_vec *bv;
+ int i;
+
+ bio_for_each_segment_all(bv, bio, i) {
+ struct page *page = bv->bv_page;
+ int ret = fscrypt_decrypt_page(page);
+
+ if (ret) {
+ WARN_ON_ONCE(1);
+ SetPageError(page);
+ } else {
+ SetPageUptodate(page);
+ }
+ unlock_page(page);
+ }
+ fscrypt_release_ctx(ctx);
+ bio_put(bio);
+}
+
+void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio)
+{
+ INIT_WORK(&ctx->r.work, completion_pages);
+ ctx->r.bio = bio;
+ queue_work(fscrypt_read_workqueue, &ctx->r.work);
+}
+EXPORT_SYMBOL(fscrypt_decrypt_bio_pages);
+
+void fscrypt_pullback_bio_page(struct page **page, bool restore)
+{
+ struct fscrypt_ctx *ctx;
+ struct page *bounce_page;
+
+ /* The bounce data pages are unmapped. */
+ if ((*page)->mapping)
+ return;
+
+ /* The bounce data page is unmapped. */
+ bounce_page = *page;
+ ctx = (struct fscrypt_ctx *)page_private(bounce_page);
+
+ /* restore control page */
+ *page = ctx->w.control_page;
+
+ if (restore)
+ fscrypt_restore_control_page(bounce_page);
+}
+EXPORT_SYMBOL(fscrypt_pullback_bio_page);
+
+void fscrypt_restore_control_page(struct page *page)
+{
+ struct fscrypt_ctx *ctx;
+
+ ctx = (struct fscrypt_ctx *)page_private(page);
+ set_page_private(page, (unsigned long)NULL);
+ ClearPagePrivate(page);
+ unlock_page(page);
+ fscrypt_release_ctx(ctx);
+}
+EXPORT_SYMBOL(fscrypt_restore_control_page);
+
+static void fscrypt_destroy(void)
+{
+ struct fscrypt_ctx *pos, *n;
+
+ list_for_each_entry_safe(pos, n, &fscrypt_free_ctxs, free_list)
+ kmem_cache_free(fscrypt_ctx_cachep, pos);
+ INIT_LIST_HEAD(&fscrypt_free_ctxs);
+ mempool_destroy(fscrypt_bounce_page_pool);
+ fscrypt_bounce_page_pool = NULL;
+}
+
+/**
+ * fscrypt_initialize() - allocate major buffers for fs encryption.
+ *
+ * We only call this when we start accessing encrypted files, since it
+ * results in memory getting allocated that wouldn't otherwise be used.
+ *
+ * Return: Zero on success, non-zero otherwise.
+ */
+int fscrypt_initialize(void)
+{
+ int i, res = -ENOMEM;
+
+ if (fscrypt_bounce_page_pool)
+ return 0;
+
+ mutex_lock(&fscrypt_init_mutex);
+ if (fscrypt_bounce_page_pool)
+ goto already_initialized;
+
+ for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
+ struct fscrypt_ctx *ctx;
+
+ ctx = kmem_cache_zalloc(fscrypt_ctx_cachep, GFP_NOFS);
+ if (!ctx)
+ goto fail;
+ list_add(&ctx->free_list, &fscrypt_free_ctxs);
+ }
+
+ fscrypt_bounce_page_pool =
+ mempool_create_page_pool(num_prealloc_crypto_pages, 0);
+ if (!fscrypt_bounce_page_pool)
+ goto fail;
+
+already_initialized:
+ mutex_unlock(&fscrypt_init_mutex);
+ return 0;
+fail:
+ fscrypt_destroy();
+ mutex_unlock(&fscrypt_init_mutex);
+ return res;
+}
+EXPORT_SYMBOL(fscrypt_initialize);
+
+/**
+ * fscrypt_init() - Set up for fs encryption.
+ */
+static int __init fscrypt_init(void)
+{
+ fscrypt_read_workqueue = alloc_workqueue("fscrypt_read_queue",
+ WQ_HIGHPRI, 0);
+ if (!fscrypt_read_workqueue)
+ goto fail;
+
+ fscrypt_ctx_cachep = KMEM_CACHE(fscrypt_ctx, SLAB_RECLAIM_ACCOUNT);
+ if (!fscrypt_ctx_cachep)
+ goto fail_free_queue;
+
+ fscrypt_info_cachep = KMEM_CACHE(fscrypt_info, SLAB_RECLAIM_ACCOUNT);
+ if (!fscrypt_info_cachep)
+ goto fail_free_ctx;
+
+ return 0;
+
+fail_free_ctx:
+ kmem_cache_destroy(fscrypt_ctx_cachep);
+fail_free_queue:
+ destroy_workqueue(fscrypt_read_workqueue);
+fail:
+ return -ENOMEM;
+}
+module_init(fscrypt_init)
+
+/**
+ * fscrypt_exit() - Shutdown the fs encryption system
+ */
+static void __exit fscrypt_exit(void)
+{
+ fscrypt_destroy();
+
+ if (fscrypt_read_workqueue)
+ destroy_workqueue(fscrypt_read_workqueue);
+ kmem_cache_destroy(fscrypt_ctx_cachep);
+ kmem_cache_destroy(fscrypt_info_cachep);
+}
+module_exit(fscrypt_exit);
+
+MODULE_LICENSE("GPL");
diff --git a/fs/f2fs/crypto_fname.c b/fs/crypto/fname.c
index ab377d496..5d6d49113 100644
--- a/fs/f2fs/crypto_fname.c
+++ b/fs/crypto/fname.c
@@ -1,46 +1,32 @@
/*
- * linux/fs/f2fs/crypto_fname.c
- *
- * Copied from linux/fs/ext4/crypto.c
+ * This contains functions for filename crypto management
*
* Copyright (C) 2015, Google, Inc.
* Copyright (C) 2015, Motorola Mobility
*
- * This contains functions for filename crypto management in f2fs
- *
* Written by Uday Savagaonkar, 2014.
- *
- * Adjust f2fs dentry structure
- * Jaegeuk Kim, 2015.
+ * Modified by Jaegeuk Kim, 2015.
*
* This has not yet undergone a rigorous security audit.
*/
-#include <crypto/hash.h>
-#include <crypto/sha.h>
+
#include <keys/encrypted-type.h>
#include <keys/user-type.h>
-#include <linux/crypto.h>
-#include <linux/gfp.h>
-#include <linux/kernel.h>
-#include <linux/key.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/random.h>
#include <linux/scatterlist.h>
-#include <linux/spinlock_types.h>
-#include <linux/f2fs_fs.h>
#include <linux/ratelimit.h>
+#include <linux/fscrypto.h>
-#include "f2fs.h"
-#include "f2fs_crypto.h"
-#include "xattr.h"
+static u32 size_round_up(size_t size, size_t blksize)
+{
+ return ((size + blksize - 1) / blksize) * blksize;
+}
/**
- * f2fs_dir_crypt_complete() -
+ * dir_crypt_complete() -
*/
-static void f2fs_dir_crypt_complete(struct crypto_async_request *req, int res)
+static void dir_crypt_complete(struct crypto_async_request *req, int res)
{
- struct f2fs_completion_result *ecr = req->data;
+ struct fscrypt_completion_result *ecr = req->data;
if (res == -EINPROGRESS)
return;
@@ -48,45 +34,35 @@ static void f2fs_dir_crypt_complete(struct crypto_async_request *req, int res)
complete(&ecr->completion);
}
-bool f2fs_valid_filenames_enc_mode(uint32_t mode)
-{
- return (mode == F2FS_ENCRYPTION_MODE_AES_256_CTS);
-}
-
-static unsigned max_name_len(struct inode *inode)
-{
- return S_ISLNK(inode->i_mode) ? inode->i_sb->s_blocksize :
- F2FS_NAME_LEN;
-}
-
/**
- * f2fs_fname_encrypt() -
+ * fname_encrypt() -
*
* This function encrypts the input filename, and returns the length of the
* ciphertext. Errors are returned as negative numbers. We trust the caller to
* allocate sufficient memory to oname string.
*/
-static int f2fs_fname_encrypt(struct inode *inode,
- const struct qstr *iname, struct f2fs_str *oname)
+static int fname_encrypt(struct inode *inode,
+ const struct qstr *iname, struct fscrypt_str *oname)
{
u32 ciphertext_len;
- struct ablkcipher_request *req = NULL;
- DECLARE_F2FS_COMPLETION_RESULT(ecr);
- struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct skcipher_request *req = NULL;
+ DECLARE_FS_COMPLETION_RESULT(ecr);
+ struct fscrypt_info *ci = inode->i_crypt_info;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
- char iv[F2FS_CRYPTO_BLOCK_SIZE];
+ char iv[FS_CRYPTO_BLOCK_SIZE];
struct scatterlist src_sg, dst_sg;
- int padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK);
+ int padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
char *workbuf, buf[32], *alloc_buf = NULL;
- unsigned lim = max_name_len(inode);
+ unsigned lim;
+ lim = inode->i_sb->s_cop->max_namelen(inode);
if (iname->len <= 0 || iname->len > lim)
return -EIO;
- ciphertext_len = (iname->len < F2FS_CRYPTO_BLOCK_SIZE) ?
- F2FS_CRYPTO_BLOCK_SIZE : iname->len;
- ciphertext_len = f2fs_fname_crypto_round_up(ciphertext_len, padding);
+ ciphertext_len = (iname->len < FS_CRYPTO_BLOCK_SIZE) ?
+ FS_CRYPTO_BLOCK_SIZE : iname->len;
+ ciphertext_len = size_round_up(ciphertext_len, padding);
ciphertext_len = (ciphertext_len > lim) ? lim : ciphertext_len;
if (ciphertext_len <= sizeof(buf)) {
@@ -99,16 +75,16 @@ static int f2fs_fname_encrypt(struct inode *inode,
}
/* Allocate request */
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n", __func__);
kfree(alloc_buf);
return -ENOMEM;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- f2fs_dir_crypt_complete, &ecr);
+ dir_crypt_complete, &ecr);
/* Copy the input */
memcpy(workbuf, iname->name, iname->len);
@@ -116,79 +92,78 @@ static int f2fs_fname_encrypt(struct inode *inode,
memset(workbuf + iname->len, 0, ciphertext_len - iname->len);
/* Initialize IV */
- memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE);
+ memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
/* Create encryption request */
sg_init_one(&src_sg, workbuf, ciphertext_len);
sg_init_one(&dst_sg, oname->name, ciphertext_len);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
- res = crypto_ablkcipher_encrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
- BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
kfree(alloc_buf);
- ablkcipher_request_free(req);
- if (res < 0) {
+ skcipher_request_free(req);
+ if (res < 0)
printk_ratelimited(KERN_ERR
"%s: Error (error code %d)\n", __func__, res);
- }
+
oname->len = ciphertext_len;
return res;
}
/*
- * f2fs_fname_decrypt()
+ * fname_decrypt()
* This function decrypts the input filename, and returns
* the length of the plaintext.
* Errors are returned as negative numbers.
* We trust the caller to allocate sufficient memory to oname string.
*/
-static int f2fs_fname_decrypt(struct inode *inode,
- const struct f2fs_str *iname, struct f2fs_str *oname)
+static int fname_decrypt(struct inode *inode,
+ const struct fscrypt_str *iname,
+ struct fscrypt_str *oname)
{
- struct ablkcipher_request *req = NULL;
- DECLARE_F2FS_COMPLETION_RESULT(ecr);
+ struct skcipher_request *req = NULL;
+ DECLARE_FS_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
- struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct fscrypt_info *ci = inode->i_crypt_info;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
- char iv[F2FS_CRYPTO_BLOCK_SIZE];
- unsigned lim = max_name_len(inode);
+ char iv[FS_CRYPTO_BLOCK_SIZE];
+ unsigned lim;
+ lim = inode->i_sb->s_cop->max_namelen(inode);
if (iname->len <= 0 || iname->len > lim)
return -EIO;
/* Allocate request */
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n", __func__);
return -ENOMEM;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- f2fs_dir_crypt_complete, &ecr);
+ dir_crypt_complete, &ecr);
/* Initialize IV */
- memset(iv, 0, F2FS_CRYPTO_BLOCK_SIZE);
+ memset(iv, 0, FS_CRYPTO_BLOCK_SIZE);
/* Create decryption request */
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
- res = crypto_ablkcipher_decrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
+ res = crypto_skcipher_decrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
- BUG_ON(req->base.data != &ecr);
wait_for_completion(&ecr.completion);
res = ecr.res;
}
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(KERN_ERR
- "%s: Error in f2fs_fname_decrypt (error code %d)\n",
- __func__, res);
+ "%s: Error (error code %d)\n", __func__, res);
return res;
}
@@ -200,7 +175,7 @@ static const char *lookup_table =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,";
/**
- * f2fs_fname_encode_digest() -
+ * digest_encode() -
*
* Encodes the input digest using characters from the set [a-zA-Z0-9_+].
* The encoded string is roughly 4/3 times the size of the input string.
@@ -249,148 +224,152 @@ static int digest_decode(const char *src, int len, char *dst)
return cp - dst;
}
-/**
- * f2fs_fname_crypto_round_up() -
- *
- * Return: The next multiple of block size
- */
-u32 f2fs_fname_crypto_round_up(u32 size, u32 blksize)
+u32 fscrypt_fname_encrypted_size(struct inode *inode, u32 ilen)
{
- return ((size + blksize - 1) / blksize) * blksize;
+ int padding = 32;
+ struct fscrypt_info *ci = inode->i_crypt_info;
+
+ if (ci)
+ padding = 4 << (ci->ci_flags & FS_POLICY_FLAGS_PAD_MASK);
+ if (ilen < FS_CRYPTO_BLOCK_SIZE)
+ ilen = FS_CRYPTO_BLOCK_SIZE;
+ return size_round_up(ilen, padding);
}
+EXPORT_SYMBOL(fscrypt_fname_encrypted_size);
/**
- * f2fs_fname_crypto_alloc_obuff() -
+ * fscrypt_fname_crypto_alloc_obuff() -
*
* Allocates an output buffer that is sufficient for the crypto operation
* specified by the context and the direction.
*/
-int f2fs_fname_crypto_alloc_buffer(struct inode *inode,
- u32 ilen, struct f2fs_str *crypto_str)
+int fscrypt_fname_alloc_buffer(struct inode *inode,
+ u32 ilen, struct fscrypt_str *crypto_str)
{
- unsigned int olen;
- int padding = 16;
- struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
+ unsigned int olen = fscrypt_fname_encrypted_size(inode, ilen);
- if (ci)
- padding = 4 << (ci->ci_flags & F2FS_POLICY_FLAGS_PAD_MASK);
- if (padding < F2FS_CRYPTO_BLOCK_SIZE)
- padding = F2FS_CRYPTO_BLOCK_SIZE;
- olen = f2fs_fname_crypto_round_up(ilen, padding);
crypto_str->len = olen;
- if (olen < F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2)
- olen = F2FS_FNAME_CRYPTO_DIGEST_SIZE * 2;
- /* Allocated buffer can hold one more character to null-terminate the
- * string */
+ if (olen < FS_FNAME_CRYPTO_DIGEST_SIZE * 2)
+ olen = FS_FNAME_CRYPTO_DIGEST_SIZE * 2;
+ /*
+ * Allocated buffer can hold one more character to null-terminate the
+ * string
+ */
crypto_str->name = kmalloc(olen + 1, GFP_NOFS);
if (!(crypto_str->name))
return -ENOMEM;
return 0;
}
+EXPORT_SYMBOL(fscrypt_fname_alloc_buffer);
/**
- * f2fs_fname_crypto_free_buffer() -
+ * fscrypt_fname_crypto_free_buffer() -
*
* Frees the buffer allocated for crypto operation.
*/
-void f2fs_fname_crypto_free_buffer(struct f2fs_str *crypto_str)
+void fscrypt_fname_free_buffer(struct fscrypt_str *crypto_str)
{
if (!crypto_str)
return;
kfree(crypto_str->name);
crypto_str->name = NULL;
}
+EXPORT_SYMBOL(fscrypt_fname_free_buffer);
/**
- * f2fs_fname_disk_to_usr() - converts a filename from disk space to user space
+ * fscrypt_fname_disk_to_usr() - converts a filename from disk space to user
+ * space
*/
-int f2fs_fname_disk_to_usr(struct inode *inode,
- f2fs_hash_t *hash,
- const struct f2fs_str *iname,
- struct f2fs_str *oname)
+int fscrypt_fname_disk_to_usr(struct inode *inode,
+ u32 hash, u32 minor_hash,
+ const struct fscrypt_str *iname,
+ struct fscrypt_str *oname)
{
const struct qstr qname = FSTR_TO_QSTR(iname);
char buf[24];
int ret;
- if (is_dot_dotdot(&qname)) {
+ if (fscrypt_is_dot_dotdot(&qname)) {
oname->name[0] = '.';
oname->name[iname->len - 1] = '.';
oname->len = iname->len;
return oname->len;
}
- if (F2FS_I(inode)->i_crypt_info)
- return f2fs_fname_decrypt(inode, iname, oname);
+ if (iname->len < FS_CRYPTO_BLOCK_SIZE)
+ return -EUCLEAN;
- if (iname->len <= F2FS_FNAME_CRYPTO_DIGEST_SIZE) {
+ if (inode->i_crypt_info)
+ return fname_decrypt(inode, iname, oname);
+
+ if (iname->len <= FS_FNAME_CRYPTO_DIGEST_SIZE) {
ret = digest_encode(iname->name, iname->len, oname->name);
oname->len = ret;
return ret;
}
if (hash) {
- memcpy(buf, hash, 4);
- memset(buf + 4, 0, 4);
- } else
+ memcpy(buf, &hash, 4);
+ memcpy(buf + 4, &minor_hash, 4);
+ } else {
memset(buf, 0, 8);
+ }
memcpy(buf + 8, iname->name + iname->len - 16, 16);
oname->name[0] = '_';
ret = digest_encode(buf, 24, oname->name + 1);
oname->len = ret + 1;
return ret + 1;
}
+EXPORT_SYMBOL(fscrypt_fname_disk_to_usr);
/**
- * f2fs_fname_usr_to_disk() - converts a filename from user space to disk space
+ * fscrypt_fname_usr_to_disk() - converts a filename from user space to disk
+ * space
*/
-int f2fs_fname_usr_to_disk(struct inode *inode,
+int fscrypt_fname_usr_to_disk(struct inode *inode,
const struct qstr *iname,
- struct f2fs_str *oname)
+ struct fscrypt_str *oname)
{
- int res;
- struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-
- if (is_dot_dotdot(iname)) {
+ if (fscrypt_is_dot_dotdot(iname)) {
oname->name[0] = '.';
oname->name[iname->len - 1] = '.';
oname->len = iname->len;
return oname->len;
}
-
- if (ci) {
- res = f2fs_fname_encrypt(inode, iname, oname);
- return res;
- }
- /* Without a proper key, a user is not allowed to modify the filenames
+ if (inode->i_crypt_info)
+ return fname_encrypt(inode, iname, oname);
+ /*
+ * Without a proper key, a user is not allowed to modify the filenames
* in a directory. Consequently, a user space name cannot be mapped to
- * a disk-space name */
+ * a disk-space name
+ */
return -EACCES;
}
+EXPORT_SYMBOL(fscrypt_fname_usr_to_disk);
-int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname,
- int lookup, struct f2fs_filename *fname)
+int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname,
+ int lookup, struct fscrypt_name *fname)
{
- struct f2fs_crypt_info *ci;
int ret = 0, bigname = 0;
- memset(fname, 0, sizeof(struct f2fs_filename));
+ memset(fname, 0, sizeof(struct fscrypt_name));
fname->usr_fname = iname;
- if (!f2fs_encrypted_inode(dir) || is_dot_dotdot(iname)) {
+ if (!dir->i_sb->s_cop->is_encrypted(dir) ||
+ fscrypt_is_dot_dotdot(iname)) {
fname->disk_name.name = (unsigned char *)iname->name;
fname->disk_name.len = iname->len;
return 0;
}
- ret = f2fs_get_encryption_info(dir);
- if (ret)
+ ret = get_crypt_info(dir);
+ if (ret && ret != -EOPNOTSUPP)
return ret;
- ci = F2FS_I(dir)->i_crypt_info;
- if (ci) {
- ret = f2fs_fname_crypto_alloc_buffer(dir, iname->len,
- &fname->crypto_buf);
+
+ if (dir->i_crypt_info) {
+ ret = fscrypt_fname_alloc_buffer(dir, iname->len,
+ &fname->crypto_buf);
if (ret < 0)
return ret;
- ret = f2fs_fname_encrypt(dir, iname, &fname->crypto_buf);
+ ret = fname_encrypt(dir, iname, &fname->crypto_buf);
if (ret < 0)
goto errout;
fname->disk_name.name = fname->crypto_buf.name;
@@ -400,18 +379,19 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname,
if (!lookup)
return -EACCES;
- /* We don't have the key and we are doing a lookup; decode the
+ /*
+ * We don't have the key and we are doing a lookup; decode the
* user-supplied name
*/
if (iname->name[0] == '_')
bigname = 1;
- if ((bigname && (iname->len != 33)) ||
- (!bigname && (iname->len > 43)))
+ if ((bigname && (iname->len != 33)) || (!bigname && (iname->len > 43)))
return -ENOENT;
fname->crypto_buf.name = kmalloc(32, GFP_KERNEL);
if (fname->crypto_buf.name == NULL)
return -ENOMEM;
+
ret = digest_decode(iname->name + bigname, iname->len - bigname,
fname->crypto_buf.name);
if (ret < 0) {
@@ -421,20 +401,24 @@ int f2fs_fname_setup_filename(struct inode *dir, const struct qstr *iname,
fname->crypto_buf.len = ret;
if (bigname) {
memcpy(&fname->hash, fname->crypto_buf.name, 4);
+ memcpy(&fname->minor_hash, fname->crypto_buf.name + 4, 4);
} else {
fname->disk_name.name = fname->crypto_buf.name;
fname->disk_name.len = fname->crypto_buf.len;
}
return 0;
+
errout:
- f2fs_fname_crypto_free_buffer(&fname->crypto_buf);
+ fscrypt_fname_free_buffer(&fname->crypto_buf);
return ret;
}
+EXPORT_SYMBOL(fscrypt_setup_filename);
-void f2fs_fname_free_filename(struct f2fs_filename *fname)
+void fscrypt_free_filename(struct fscrypt_name *fname)
{
kfree(fname->crypto_buf.name);
fname->crypto_buf.name = NULL;
fname->usr_fname = NULL;
fname->disk_name.name = NULL;
}
+EXPORT_SYMBOL(fscrypt_free_filename);
diff --git a/fs/crypto/keyinfo.c b/fs/crypto/keyinfo.c
new file mode 100644
index 000000000..1ac263edd
--- /dev/null
+++ b/fs/crypto/keyinfo.c
@@ -0,0 +1,304 @@
+/*
+ * key management facility for FS encryption support.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ *
+ * This contains encryption key functions.
+ *
+ * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
+ */
+
+#include <keys/encrypted-type.h>
+#include <keys/user-type.h>
+#include <linux/random.h>
+#include <linux/scatterlist.h>
+#include <uapi/linux/keyctl.h>
+#include <linux/fscrypto.h>
+
+static void derive_crypt_complete(struct crypto_async_request *req, int rc)
+{
+ struct fscrypt_completion_result *ecr = req->data;
+
+ if (rc == -EINPROGRESS)
+ return;
+
+ ecr->res = rc;
+ complete(&ecr->completion);
+}
+
+/**
+ * derive_key_aes() - Derive a key using AES-128-ECB
+ * @deriving_key: Encryption key used for derivation.
+ * @source_key: Source key to which to apply derivation.
+ * @derived_key: Derived key.
+ *
+ * Return: Zero on success; non-zero otherwise.
+ */
+static int derive_key_aes(u8 deriving_key[FS_AES_128_ECB_KEY_SIZE],
+ u8 source_key[FS_AES_256_XTS_KEY_SIZE],
+ u8 derived_key[FS_AES_256_XTS_KEY_SIZE])
+{
+ int res = 0;
+ struct skcipher_request *req = NULL;
+ DECLARE_FS_COMPLETION_RESULT(ecr);
+ struct scatterlist src_sg, dst_sg;
+ struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
+
+ if (IS_ERR(tfm)) {
+ res = PTR_ERR(tfm);
+ tfm = NULL;
+ goto out;
+ }
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
+ if (!req) {
+ res = -ENOMEM;
+ goto out;
+ }
+ skcipher_request_set_callback(req,
+ CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
+ derive_crypt_complete, &ecr);
+ res = crypto_skcipher_setkey(tfm, deriving_key,
+ FS_AES_128_ECB_KEY_SIZE);
+ if (res < 0)
+ goto out;
+
+ sg_init_one(&src_sg, source_key, FS_AES_256_XTS_KEY_SIZE);
+ sg_init_one(&dst_sg, derived_key, FS_AES_256_XTS_KEY_SIZE);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg,
+ FS_AES_256_XTS_KEY_SIZE, NULL);
+ res = crypto_skcipher_encrypt(req);
+ if (res == -EINPROGRESS || res == -EBUSY) {
+ wait_for_completion(&ecr.completion);
+ res = ecr.res;
+ }
+out:
+ skcipher_request_free(req);
+ crypto_free_skcipher(tfm);
+ return res;
+}
+
+static int validate_user_key(struct fscrypt_info *crypt_info,
+ struct fscrypt_context *ctx, u8 *raw_key,
+ u8 *prefix, int prefix_size)
+{
+ u8 *full_key_descriptor;
+ struct key *keyring_key;
+ struct fscrypt_key *master_key;
+ const struct user_key_payload *ukp;
+ int full_key_len = prefix_size + (FS_KEY_DESCRIPTOR_SIZE * 2) + 1;
+ int res;
+
+ full_key_descriptor = kmalloc(full_key_len, GFP_NOFS);
+ if (!full_key_descriptor)
+ return -ENOMEM;
+
+ memcpy(full_key_descriptor, prefix, prefix_size);
+ sprintf(full_key_descriptor + prefix_size,
+ "%*phN", FS_KEY_DESCRIPTOR_SIZE,
+ ctx->master_key_descriptor);
+ full_key_descriptor[full_key_len - 1] = '\0';
+ keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
+ kfree(full_key_descriptor);
+ if (IS_ERR(keyring_key))
+ return PTR_ERR(keyring_key);
+
+ if (keyring_key->type != &key_type_logon) {
+ printk_once(KERN_WARNING
+ "%s: key type must be logon\n", __func__);
+ res = -ENOKEY;
+ goto out;
+ }
+ down_read(&keyring_key->sem);
+ ukp = user_key_payload(keyring_key);
+ if (ukp->datalen != sizeof(struct fscrypt_key)) {
+ res = -EINVAL;
+ up_read(&keyring_key->sem);
+ goto out;
+ }
+ master_key = (struct fscrypt_key *)ukp->data;
+ BUILD_BUG_ON(FS_AES_128_ECB_KEY_SIZE != FS_KEY_DERIVATION_NONCE_SIZE);
+
+ if (master_key->size != FS_AES_256_XTS_KEY_SIZE) {
+ printk_once(KERN_WARNING
+ "%s: key size incorrect: %d\n",
+ __func__, master_key->size);
+ res = -ENOKEY;
+ up_read(&keyring_key->sem);
+ goto out;
+ }
+ res = derive_key_aes(ctx->nonce, master_key->raw, raw_key);
+ up_read(&keyring_key->sem);
+ if (res)
+ goto out;
+
+ crypt_info->ci_keyring_key = keyring_key;
+ return 0;
+out:
+ key_put(keyring_key);
+ return res;
+}
+
+static void put_crypt_info(struct fscrypt_info *ci)
+{
+ if (!ci)
+ return;
+
+ key_put(ci->ci_keyring_key);
+ crypto_free_skcipher(ci->ci_ctfm);
+ kmem_cache_free(fscrypt_info_cachep, ci);
+}
+
+int get_crypt_info(struct inode *inode)
+{
+ struct fscrypt_info *crypt_info;
+ struct fscrypt_context ctx;
+ struct crypto_skcipher *ctfm;
+ const char *cipher_str;
+ u8 raw_key[FS_MAX_KEY_SIZE];
+ u8 mode;
+ int res;
+
+ res = fscrypt_initialize();
+ if (res)
+ return res;
+
+ if (!inode->i_sb->s_cop->get_context)
+ return -EOPNOTSUPP;
+retry:
+ crypt_info = ACCESS_ONCE(inode->i_crypt_info);
+ if (crypt_info) {
+ if (!crypt_info->ci_keyring_key ||
+ key_validate(crypt_info->ci_keyring_key) == 0)
+ return 0;
+ fscrypt_put_encryption_info(inode, crypt_info);
+ goto retry;
+ }
+
+ res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+ if (res < 0) {
+ if (!fscrypt_dummy_context_enabled(inode))
+ return res;
+ ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
+ ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
+ ctx.flags = 0;
+ } else if (res != sizeof(ctx)) {
+ return -EINVAL;
+ }
+ res = 0;
+
+ crypt_info = kmem_cache_alloc(fscrypt_info_cachep, GFP_NOFS);
+ if (!crypt_info)
+ return -ENOMEM;
+
+ crypt_info->ci_flags = ctx.flags;
+ crypt_info->ci_data_mode = ctx.contents_encryption_mode;
+ crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
+ crypt_info->ci_ctfm = NULL;
+ crypt_info->ci_keyring_key = NULL;
+ memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
+ sizeof(crypt_info->ci_master_key));
+ if (S_ISREG(inode->i_mode))
+ mode = crypt_info->ci_data_mode;
+ else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
+ mode = crypt_info->ci_filename_mode;
+ else
+ BUG();
+
+ switch (mode) {
+ case FS_ENCRYPTION_MODE_AES_256_XTS:
+ cipher_str = "xts(aes)";
+ break;
+ case FS_ENCRYPTION_MODE_AES_256_CTS:
+ cipher_str = "cts(cbc(aes))";
+ break;
+ default:
+ printk_once(KERN_WARNING
+ "%s: unsupported key mode %d (ino %u)\n",
+ __func__, mode, (unsigned) inode->i_ino);
+ res = -ENOKEY;
+ goto out;
+ }
+ if (fscrypt_dummy_context_enabled(inode)) {
+ memset(raw_key, 0x42, FS_AES_256_XTS_KEY_SIZE);
+ goto got_key;
+ }
+
+ res = validate_user_key(crypt_info, &ctx, raw_key,
+ FS_KEY_DESC_PREFIX, FS_KEY_DESC_PREFIX_SIZE);
+ if (res && inode->i_sb->s_cop->key_prefix) {
+ u8 *prefix = NULL;
+ int prefix_size, res2;
+
+ prefix_size = inode->i_sb->s_cop->key_prefix(inode, &prefix);
+ res2 = validate_user_key(crypt_info, &ctx, raw_key,
+ prefix, prefix_size);
+ if (res2) {
+ if (res2 == -ENOKEY)
+ res = -ENOKEY;
+ goto out;
+ }
+ } else if (res) {
+ goto out;
+ }
+got_key:
+ ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
+ if (!ctfm || IS_ERR(ctfm)) {
+ res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
+ printk(KERN_DEBUG
+ "%s: error %d (inode %u) allocating crypto tfm\n",
+ __func__, res, (unsigned) inode->i_ino);
+ goto out;
+ }
+ crypt_info->ci_ctfm = ctfm;
+ crypto_skcipher_clear_flags(ctfm, ~0);
+ crypto_skcipher_set_flags(ctfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ res = crypto_skcipher_setkey(ctfm, raw_key, fscrypt_key_size(mode));
+ if (res)
+ goto out;
+
+ memzero_explicit(raw_key, sizeof(raw_key));
+ if (cmpxchg(&inode->i_crypt_info, NULL, crypt_info) != NULL) {
+ put_crypt_info(crypt_info);
+ goto retry;
+ }
+ return 0;
+
+out:
+ if (res == -ENOKEY)
+ res = 0;
+ put_crypt_info(crypt_info);
+ memzero_explicit(raw_key, sizeof(raw_key));
+ return res;
+}
+
+void fscrypt_put_encryption_info(struct inode *inode, struct fscrypt_info *ci)
+{
+ struct fscrypt_info *prev;
+
+ if (ci == NULL)
+ ci = ACCESS_ONCE(inode->i_crypt_info);
+ if (ci == NULL)
+ return;
+
+ prev = cmpxchg(&inode->i_crypt_info, ci, NULL);
+ if (prev != ci)
+ return;
+
+ put_crypt_info(ci);
+}
+EXPORT_SYMBOL(fscrypt_put_encryption_info);
+
+int fscrypt_get_encryption_info(struct inode *inode)
+{
+ struct fscrypt_info *ci = inode->i_crypt_info;
+
+ if (!ci ||
+ (ci->ci_keyring_key &&
+ (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED) |
+ (1 << KEY_FLAG_DEAD)))))
+ return get_crypt_info(inode);
+ return 0;
+}
+EXPORT_SYMBOL(fscrypt_get_encryption_info);
diff --git a/fs/crypto/policy.c b/fs/crypto/policy.c
new file mode 100644
index 000000000..0f9961eed
--- /dev/null
+++ b/fs/crypto/policy.c
@@ -0,0 +1,229 @@
+/*
+ * Encryption policy functions for per-file encryption support.
+ *
+ * Copyright (C) 2015, Google, Inc.
+ * Copyright (C) 2015, Motorola Mobility.
+ *
+ * Written by Michael Halcrow, 2015.
+ * Modified by Jaegeuk Kim, 2015.
+ */
+
+#include <linux/random.h>
+#include <linux/string.h>
+#include <linux/fscrypto.h>
+
+static int inode_has_encryption_context(struct inode *inode)
+{
+ if (!inode->i_sb->s_cop->get_context)
+ return 0;
+ return (inode->i_sb->s_cop->get_context(inode, NULL, 0L) > 0);
+}
+
+/*
+ * check whether the policy is consistent with the encryption context
+ * for the inode
+ */
+static int is_encryption_context_consistent_with_policy(struct inode *inode,
+ const struct fscrypt_policy *policy)
+{
+ struct fscrypt_context ctx;
+ int res;
+
+ if (!inode->i_sb->s_cop->get_context)
+ return 0;
+
+ res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+ if (res != sizeof(ctx))
+ return 0;
+
+ return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (ctx.flags == policy->flags) &&
+ (ctx.contents_encryption_mode ==
+ policy->contents_encryption_mode) &&
+ (ctx.filenames_encryption_mode ==
+ policy->filenames_encryption_mode));
+}
+
+static int create_encryption_context_from_policy(struct inode *inode,
+ const struct fscrypt_policy *policy)
+{
+ struct fscrypt_context ctx;
+ int res;
+
+ if (!inode->i_sb->s_cop->set_context)
+ return -EOPNOTSUPP;
+
+ if (inode->i_sb->s_cop->prepare_context) {
+ res = inode->i_sb->s_cop->prepare_context(inode);
+ if (res)
+ return res;
+ }
+
+ ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
+ memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE);
+
+ if (!fscrypt_valid_contents_enc_mode(
+ policy->contents_encryption_mode)) {
+ printk(KERN_WARNING
+ "%s: Invalid contents encryption mode %d\n", __func__,
+ policy->contents_encryption_mode);
+ return -EINVAL;
+ }
+
+ if (!fscrypt_valid_filenames_enc_mode(
+ policy->filenames_encryption_mode)) {
+ printk(KERN_WARNING
+ "%s: Invalid filenames encryption mode %d\n", __func__,
+ policy->filenames_encryption_mode);
+ return -EINVAL;
+ }
+
+ if (policy->flags & ~FS_POLICY_FLAGS_VALID)
+ return -EINVAL;
+
+ ctx.contents_encryption_mode = policy->contents_encryption_mode;
+ ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
+ ctx.flags = policy->flags;
+ BUILD_BUG_ON(sizeof(ctx.nonce) != FS_KEY_DERIVATION_NONCE_SIZE);
+ get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+
+ return inode->i_sb->s_cop->set_context(inode, &ctx, sizeof(ctx), NULL);
+}
+
+int fscrypt_process_policy(struct inode *inode,
+ const struct fscrypt_policy *policy)
+{
+ if (policy->version != 0)
+ return -EINVAL;
+
+ if (!inode_has_encryption_context(inode)) {
+ if (!inode->i_sb->s_cop->empty_dir)
+ return -EOPNOTSUPP;
+ if (!inode->i_sb->s_cop->empty_dir(inode))
+ return -ENOTEMPTY;
+ return create_encryption_context_from_policy(inode, policy);
+ }
+
+ if (is_encryption_context_consistent_with_policy(inode, policy))
+ return 0;
+
+ printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
+ __func__);
+ return -EINVAL;
+}
+EXPORT_SYMBOL(fscrypt_process_policy);
+
+int fscrypt_get_policy(struct inode *inode, struct fscrypt_policy *policy)
+{
+ struct fscrypt_context ctx;
+ int res;
+
+ if (!inode->i_sb->s_cop->get_context ||
+ !inode->i_sb->s_cop->is_encrypted(inode))
+ return -ENODATA;
+
+ res = inode->i_sb->s_cop->get_context(inode, &ctx, sizeof(ctx));
+ if (res != sizeof(ctx))
+ return -ENODATA;
+ if (ctx.format != FS_ENCRYPTION_CONTEXT_FORMAT_V1)
+ return -EINVAL;
+
+ policy->version = 0;
+ policy->contents_encryption_mode = ctx.contents_encryption_mode;
+ policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
+ policy->flags = ctx.flags;
+ memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
+ FS_KEY_DESCRIPTOR_SIZE);
+ return 0;
+}
+EXPORT_SYMBOL(fscrypt_get_policy);
+
+int fscrypt_has_permitted_context(struct inode *parent, struct inode *child)
+{
+ struct fscrypt_info *parent_ci, *child_ci;
+ int res;
+
+ if ((parent == NULL) || (child == NULL)) {
+ printk(KERN_ERR "parent %p child %p\n", parent, child);
+ BUG_ON(1);
+ }
+
+ /* no restrictions if the parent directory is not encrypted */
+ if (!parent->i_sb->s_cop->is_encrypted(parent))
+ return 1;
+ /* if the child directory is not encrypted, this is always a problem */
+ if (!parent->i_sb->s_cop->is_encrypted(child))
+ return 0;
+ res = fscrypt_get_encryption_info(parent);
+ if (res)
+ return 0;
+ res = fscrypt_get_encryption_info(child);
+ if (res)
+ return 0;
+ parent_ci = parent->i_crypt_info;
+ child_ci = child->i_crypt_info;
+ if (!parent_ci && !child_ci)
+ return 1;
+ if (!parent_ci || !child_ci)
+ return 0;
+
+ return (memcmp(parent_ci->ci_master_key,
+ child_ci->ci_master_key,
+ FS_KEY_DESCRIPTOR_SIZE) == 0 &&
+ (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
+ (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
+ (parent_ci->ci_flags == child_ci->ci_flags));
+}
+EXPORT_SYMBOL(fscrypt_has_permitted_context);
+
+/**
+ * fscrypt_inherit_context() - Sets a child context from its parent
+ * @parent: Parent inode from which the context is inherited.
+ * @child: Child inode that inherits the context from @parent.
+ * @fs_data: private data given by FS.
+ * @preload: preload child i_crypt_info
+ *
+ * Return: Zero on success, non-zero otherwise
+ */
+int fscrypt_inherit_context(struct inode *parent, struct inode *child,
+ void *fs_data, bool preload)
+{
+ struct fscrypt_context ctx;
+ struct fscrypt_info *ci;
+ int res;
+
+ if (!parent->i_sb->s_cop->set_context)
+ return -EOPNOTSUPP;
+
+ res = fscrypt_get_encryption_info(parent);
+ if (res < 0)
+ return res;
+
+ ci = parent->i_crypt_info;
+ if (ci == NULL)
+ return -ENOKEY;
+
+ ctx.format = FS_ENCRYPTION_CONTEXT_FORMAT_V1;
+ if (fscrypt_dummy_context_enabled(parent)) {
+ ctx.contents_encryption_mode = FS_ENCRYPTION_MODE_AES_256_XTS;
+ ctx.filenames_encryption_mode = FS_ENCRYPTION_MODE_AES_256_CTS;
+ ctx.flags = 0;
+ memset(ctx.master_key_descriptor, 0x42, FS_KEY_DESCRIPTOR_SIZE);
+ res = 0;
+ } else {
+ ctx.contents_encryption_mode = ci->ci_data_mode;
+ ctx.filenames_encryption_mode = ci->ci_filename_mode;
+ ctx.flags = ci->ci_flags;
+ memcpy(ctx.master_key_descriptor, ci->ci_master_key,
+ FS_KEY_DESCRIPTOR_SIZE);
+ }
+ get_random_bytes(ctx.nonce, FS_KEY_DERIVATION_NONCE_SIZE);
+ res = parent->i_sb->s_cop->set_context(child, &ctx,
+ sizeof(ctx), fs_data);
+ if (res)
+ return res;
+ return preload ? fscrypt_get_encryption_info(child): 0;
+}
+EXPORT_SYMBOL(fscrypt_inherit_context);
diff --git a/fs/dax.c b/fs/dax.c
index bbb2ad783..75ba46d82 100644
--- a/fs/dax.c
+++ b/fs/dax.c
@@ -286,8 +286,13 @@ ssize_t dax_do_io(struct kiocb *iocb, struct inode *inode,
if ((flags & DIO_LOCKING) && iov_iter_rw(iter) == READ)
inode_unlock(inode);
- if ((retval > 0) && end_io)
- end_io(iocb, pos, retval, bh.b_private);
+ if (end_io) {
+ int err;
+
+ err = end_io(iocb, pos, retval, bh.b_private);
+ if (err)
+ retval = err;
+ }
if (!(flags & DIO_SKIP_DIO_COUNT))
inode_dio_end(inode);
@@ -318,7 +323,7 @@ static int dax_load_hole(struct address_space *mapping, struct page *page,
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
if (vmf->pgoff >= size) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return VM_FAULT_SIGBUS;
}
@@ -346,7 +351,7 @@ static int copy_user_bh(struct page *to, struct inode *inode,
}
#define NO_SECTOR -1
-#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_CACHE_SHIFT))
+#define DAX_PMD_INDEX(page_index) (page_index & (PMD_MASK >> PAGE_SHIFT))
static int dax_radix_entry(struct address_space *mapping, pgoff_t index,
sector_t sector, bool pmd_entry, bool dirty)
@@ -501,8 +506,8 @@ int dax_writeback_mapping_range(struct address_space *mapping,
if (!mapping->nrexceptional || wbc->sync_mode != WB_SYNC_ALL)
return 0;
- start_index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end_index = wbc->range_end >> PAGE_CACHE_SHIFT;
+ start_index = wbc->range_start >> PAGE_SHIFT;
+ end_index = wbc->range_end >> PAGE_SHIFT;
pmd_index = DAX_PMD_INDEX(start_index);
rcu_read_lock();
@@ -637,12 +642,12 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
page = find_get_page(mapping, vmf->pgoff);
if (page) {
if (!lock_page_or_retry(page, vma->vm_mm, vmf->flags)) {
- page_cache_release(page);
+ put_page(page);
return VM_FAULT_RETRY;
}
if (unlikely(page->mapping != mapping)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
goto repeat;
}
size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
@@ -706,10 +711,10 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
if (page) {
unmap_mapping_range(mapping, vmf->pgoff << PAGE_SHIFT,
- PAGE_CACHE_SIZE, 0);
+ PAGE_SIZE, 0);
delete_from_page_cache(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
page = NULL;
}
@@ -742,7 +747,7 @@ int __dax_fault(struct vm_area_struct *vma, struct vm_fault *vmf,
unlock_page:
if (page) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
goto out;
}
@@ -1089,7 +1094,7 @@ EXPORT_SYMBOL_GPL(dax_pfn_mkwrite);
* you are truncating a file, the helper function dax_truncate_page() may be
* more convenient.
*
- * We work in terms of PAGE_CACHE_SIZE here for commonality with
+ * We work in terms of PAGE_SIZE here for commonality with
* block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
* took care of disposing of the unnecessary blocks. Even if the filesystem
* block size is smaller than PAGE_SIZE, we have to zero the rest of the page
@@ -1099,18 +1104,18 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
get_block_t get_block)
{
struct buffer_head bh;
- pgoff_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ pgoff_t index = from >> PAGE_SHIFT;
+ unsigned offset = from & (PAGE_SIZE-1);
int err;
/* Block boundary? Nothing to do */
if (!length)
return 0;
- BUG_ON((offset + length) > PAGE_CACHE_SIZE);
+ BUG_ON((offset + length) > PAGE_SIZE);
memset(&bh, 0, sizeof(bh));
bh.b_bdev = inode->i_sb->s_bdev;
- bh.b_size = PAGE_CACHE_SIZE;
+ bh.b_size = PAGE_SIZE;
err = get_block(inode, index, &bh, 0);
if (err < 0)
return err;
@@ -1118,7 +1123,7 @@ int dax_zero_page_range(struct inode *inode, loff_t from, unsigned length,
struct block_device *bdev = bh.b_bdev;
struct blk_dax_ctl dax = {
.sector = to_sector(&bh, inode),
- .size = PAGE_CACHE_SIZE,
+ .size = PAGE_SIZE,
};
if (dax_map_atomic(bdev, &dax) < 0)
@@ -1141,7 +1146,7 @@ EXPORT_SYMBOL_GPL(dax_zero_page_range);
* Similar to block_truncate_page(), this function can be called by a
* filesystem when it is truncating a DAX file to handle the partial page.
*
- * We work in terms of PAGE_CACHE_SIZE here for commonality with
+ * We work in terms of PAGE_SIZE here for commonality with
* block_truncate_page(), but we could go down to PAGE_SIZE if the filesystem
* took care of disposing of the unnecessary blocks. Even if the filesystem
* block size is smaller than PAGE_SIZE, we have to zero the rest of the page
@@ -1149,7 +1154,7 @@ EXPORT_SYMBOL_GPL(dax_zero_page_range);
*/
int dax_truncate_page(struct inode *inode, loff_t from, get_block_t get_block)
{
- unsigned length = PAGE_CACHE_ALIGN(from) - from;
+ unsigned length = PAGE_ALIGN(from) - from;
return dax_zero_page_range(inode, from, length, get_block);
}
EXPORT_SYMBOL_GPL(dax_truncate_page);
diff --git a/fs/dcache.c b/fs/dcache.c
index 6bdabe2fc..0dd0237f8 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1749,13 +1749,12 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
unsigned add_flags = d_flags_for_inode(inode);
spin_lock(&dentry->d_lock);
- if (inode)
- hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
+ hlist_add_head(&dentry->d_u.d_alias, &inode->i_dentry);
raw_write_seqcount_begin(&dentry->d_seq);
__d_set_inode_and_type(dentry, inode, add_flags);
raw_write_seqcount_end(&dentry->d_seq);
+ __fsnotify_d_instantiate(dentry);
spin_unlock(&dentry->d_lock);
- fsnotify_d_instantiate(dentry, inode);
}
/**
@@ -1776,91 +1775,16 @@ static void __d_instantiate(struct dentry *dentry, struct inode *inode)
void d_instantiate(struct dentry *entry, struct inode * inode)
{
BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
- if (inode)
+ if (inode) {
spin_lock(&inode->i_lock);
- __d_instantiate(entry, inode);
- if (inode)
+ __d_instantiate(entry, inode);
spin_unlock(&inode->i_lock);
+ }
security_d_instantiate(entry, inode);
}
EXPORT_SYMBOL(d_instantiate);
/**
- * d_instantiate_unique - instantiate a non-aliased dentry
- * @entry: dentry to instantiate
- * @inode: inode to attach to this dentry
- *
- * Fill in inode information in the entry. On success, it returns NULL.
- * If an unhashed alias of "entry" already exists, then we return the
- * aliased dentry instead and drop one reference to inode.
- *
- * Note that in order to avoid conflicts with rename() etc, the caller
- * had better be holding the parent directory semaphore.
- *
- * This also assumes that the inode count has been incremented
- * (or otherwise set) by the caller to indicate that it is now
- * in use by the dcache.
- */
-static struct dentry *__d_instantiate_unique(struct dentry *entry,
- struct inode *inode)
-{
- struct dentry *alias;
- int len = entry->d_name.len;
- const char *name = entry->d_name.name;
- unsigned int hash = entry->d_name.hash;
-
- if (!inode) {
- __d_instantiate(entry, NULL);
- return NULL;
- }
-
- hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
- /*
- * Don't need alias->d_lock here, because aliases with
- * d_parent == entry->d_parent are not subject to name or
- * parent changes, because the parent inode i_mutex is held.
- */
- if (alias->d_name.hash != hash)
- continue;
- if (alias->d_parent != entry->d_parent)
- continue;
- if (alias->d_name.len != len)
- continue;
- if (dentry_cmp(alias, name, len))
- continue;
- __dget(alias);
- return alias;
- }
-
- __d_instantiate(entry, inode);
- return NULL;
-}
-
-struct dentry *d_instantiate_unique(struct dentry *entry, struct inode *inode)
-{
- struct dentry *result;
-
- BUG_ON(!hlist_unhashed(&entry->d_u.d_alias));
-
- if (inode)
- spin_lock(&inode->i_lock);
- result = __d_instantiate_unique(entry, inode);
- if (inode)
- spin_unlock(&inode->i_lock);
-
- if (!result) {
- security_d_instantiate(entry, inode);
- return NULL;
- }
-
- BUG_ON(!d_unhashed(result));
- iput(inode);
- return result;
-}
-
-EXPORT_SYMBOL(d_instantiate_unique);
-
-/**
* d_instantiate_no_diralias - instantiate a non-aliased dentry
* @entry: dentry to complete
* @inode: inode to attach to this dentry
@@ -2440,6 +2364,86 @@ void d_rehash(struct dentry * entry)
}
EXPORT_SYMBOL(d_rehash);
+
+/* inode->i_lock held if inode is non-NULL */
+
+static inline void __d_add(struct dentry *dentry, struct inode *inode)
+{
+ if (inode) {
+ __d_instantiate(dentry, inode);
+ spin_unlock(&inode->i_lock);
+ }
+ security_d_instantiate(dentry, inode);
+ d_rehash(dentry);
+}
+
+/**
+ * d_add - add dentry to hash queues
+ * @entry: dentry to add
+ * @inode: The inode to attach to this dentry
+ *
+ * This adds the entry to the hash queues and initializes @inode.
+ * The entry was actually filled in earlier during d_alloc().
+ */
+
+void d_add(struct dentry *entry, struct inode *inode)
+{
+ if (inode)
+ spin_lock(&inode->i_lock);
+ __d_add(entry, inode);
+}
+EXPORT_SYMBOL(d_add);
+
+/**
+ * d_exact_alias - find and hash an exact unhashed alias
+ * @entry: dentry to add
+ * @inode: The inode to go with this dentry
+ *
+ * If an unhashed dentry with the same name/parent and desired
+ * inode already exists, hash and return it. Otherwise, return
+ * NULL.
+ *
+ * Parent directory should be locked.
+ */
+struct dentry *d_exact_alias(struct dentry *entry, struct inode *inode)
+{
+ struct dentry *alias;
+ int len = entry->d_name.len;
+ const char *name = entry->d_name.name;
+ unsigned int hash = entry->d_name.hash;
+
+ spin_lock(&inode->i_lock);
+ hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
+ /*
+ * Don't need alias->d_lock here, because aliases with
+ * d_parent == entry->d_parent are not subject to name or
+ * parent changes, because the parent inode i_mutex is held.
+ */
+ if (alias->d_name.hash != hash)
+ continue;
+ if (alias->d_parent != entry->d_parent)
+ continue;
+ if (alias->d_name.len != len)
+ continue;
+ if (dentry_cmp(alias, name, len))
+ continue;
+ spin_lock(&alias->d_lock);
+ if (!d_unhashed(alias)) {
+ spin_unlock(&alias->d_lock);
+ alias = NULL;
+ } else {
+ __dget_dlock(alias);
+ _d_rehash(alias);
+ spin_unlock(&alias->d_lock);
+ }
+ spin_unlock(&inode->i_lock);
+ return alias;
+ }
+ spin_unlock(&inode->i_lock);
+ return NULL;
+}
+EXPORT_SYMBOL(d_exact_alias);
+
/**
* dentry_update_name_case - update case insensitive dentry with a new name
* @dentry: dentry to be updated
@@ -2776,10 +2780,9 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
BUG_ON(!d_unhashed(dentry));
- if (!inode) {
- __d_instantiate(dentry, NULL);
+ if (!inode)
goto out;
- }
+
spin_lock(&inode->i_lock);
if (S_ISDIR(inode->i_mode)) {
struct dentry *new = __d_find_any_alias(inode);
@@ -2813,12 +2816,8 @@ struct dentry *d_splice_alias(struct inode *inode, struct dentry *dentry)
return new;
}
}
- /* already taking inode->i_lock, so d_add() by hand */
- __d_instantiate(dentry, inode);
- spin_unlock(&inode->i_lock);
out:
- security_d_instantiate(dentry, inode);
- d_rehash(dentry);
+ __d_add(dentry, inode);
return NULL;
}
EXPORT_SYMBOL(d_splice_alias);
diff --git a/fs/devpts/inode.c b/fs/devpts/inode.c
index 655f21f99..0b2954d71 100644
--- a/fs/devpts/inode.c
+++ b/fs/devpts/inode.c
@@ -128,6 +128,7 @@ static const match_table_t tokens = {
struct pts_fs_info {
struct ida allocated_ptys;
struct pts_mount_opts mount_opts;
+ struct super_block *sb;
struct dentry *ptmx_dentry;
};
@@ -358,7 +359,7 @@ static const struct super_operations devpts_sops = {
.show_options = devpts_show_options,
};
-static void *new_pts_fs_info(void)
+static void *new_pts_fs_info(struct super_block *sb)
{
struct pts_fs_info *fsi;
@@ -369,6 +370,7 @@ static void *new_pts_fs_info(void)
ida_init(&fsi->allocated_ptys);
fsi->mount_opts.mode = DEVPTS_DEFAULT_MODE;
fsi->mount_opts.ptmxmode = DEVPTS_DEFAULT_PTMX_MODE;
+ fsi->sb = sb;
return fsi;
}
@@ -384,7 +386,7 @@ devpts_fill_super(struct super_block *s, void *data, int silent)
s->s_op = &devpts_sops;
s->s_time_gran = 1;
- s->s_fs_info = new_pts_fs_info();
+ s->s_fs_info = new_pts_fs_info(s);
if (!s->s_fs_info)
goto fail;
@@ -524,17 +526,14 @@ static struct file_system_type devpts_fs_type = {
* to the System V naming convention
*/
-int devpts_new_index(struct inode *ptmx_inode)
+int devpts_new_index(struct pts_fs_info *fsi)
{
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
- struct pts_fs_info *fsi;
int index;
int ida_ret;
- if (!sb)
+ if (!fsi)
return -ENODEV;
- fsi = DEVPTS_SB(sb);
retry:
if (!ida_pre_get(&fsi->allocated_ptys, GFP_KERNEL))
return -ENOMEM;
@@ -564,11 +563,8 @@ retry:
return index;
}
-void devpts_kill_index(struct inode *ptmx_inode, int idx)
+void devpts_kill_index(struct pts_fs_info *fsi, int idx)
{
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
- struct pts_fs_info *fsi = DEVPTS_SB(sb);
-
mutex_lock(&allocated_ptys_lock);
ida_remove(&fsi->allocated_ptys, idx);
pty_count--;
@@ -578,21 +574,25 @@ void devpts_kill_index(struct inode *ptmx_inode, int idx)
/*
* pty code needs to hold extra references in case of last /dev/tty close
*/
-
-void devpts_add_ref(struct inode *ptmx_inode)
+struct pts_fs_info *devpts_get_ref(struct inode *ptmx_inode, struct file *file)
{
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+ struct super_block *sb;
+ struct pts_fs_info *fsi;
+
+ sb = pts_sb_from_inode(ptmx_inode);
+ if (!sb)
+ return NULL;
+ fsi = DEVPTS_SB(sb);
+ if (!fsi)
+ return NULL;
atomic_inc(&sb->s_active);
- ihold(ptmx_inode);
+ return fsi;
}
-void devpts_del_ref(struct inode *ptmx_inode)
+void devpts_put_ref(struct pts_fs_info *fsi)
{
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
-
- iput(ptmx_inode);
- deactivate_super(sb);
+ deactivate_super(fsi->sb);
}
/**
@@ -604,22 +604,20 @@ void devpts_del_ref(struct inode *ptmx_inode)
*
* The created inode is returned. Remove it from /dev/pts/ by devpts_pty_kill.
*/
-struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
- void *priv)
+struct dentry *devpts_pty_new(struct pts_fs_info *fsi, int index, void *priv)
{
struct dentry *dentry;
- struct super_block *sb = pts_sb_from_inode(ptmx_inode);
+ struct super_block *sb;
struct inode *inode;
struct dentry *root;
- struct pts_fs_info *fsi;
struct pts_mount_opts *opts;
char s[12];
- if (!sb)
+ if (!fsi)
return ERR_PTR(-ENODEV);
+ sb = fsi->sb;
root = sb->s_root;
- fsi = DEVPTS_SB(sb);
opts = &fsi->mount_opts;
inode = new_inode(sb);
@@ -630,25 +628,21 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
inode->i_uid = opts->setuid ? opts->uid : current_fsuid();
inode->i_gid = opts->setgid ? opts->gid : current_fsgid();
inode->i_mtime = inode->i_atime = inode->i_ctime = CURRENT_TIME;
- init_special_inode(inode, S_IFCHR|opts->mode, device);
- inode->i_private = priv;
+ init_special_inode(inode, S_IFCHR|opts->mode, MKDEV(UNIX98_PTY_SLAVE_MAJOR, index));
sprintf(s, "%d", index);
- inode_lock(d_inode(root));
-
dentry = d_alloc_name(root, s);
if (dentry) {
+ dentry->d_fsdata = priv;
d_add(dentry, inode);
fsnotify_create(d_inode(root), dentry);
} else {
iput(inode);
- inode = ERR_PTR(-ENOMEM);
+ dentry = ERR_PTR(-ENOMEM);
}
- inode_unlock(d_inode(root));
-
- return inode;
+ return dentry;
}
/**
@@ -657,24 +651,10 @@ struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index,
*
* Returns whatever was passed as priv in devpts_pty_new for a given inode.
*/
-void *devpts_get_priv(struct inode *pts_inode)
+void *devpts_get_priv(struct dentry *dentry)
{
- struct dentry *dentry;
- void *priv = NULL;
-
- BUG_ON(pts_inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
-
- /* Ensure dentry has not been deleted by devpts_pty_kill() */
- dentry = d_find_alias(pts_inode);
- if (!dentry)
- return NULL;
-
- if (pts_inode->i_sb->s_magic == DEVPTS_SUPER_MAGIC)
- priv = pts_inode->i_private;
-
- dput(dentry);
-
- return priv;
+ WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC);
+ return dentry->d_fsdata;
}
/**
@@ -683,24 +663,14 @@ void *devpts_get_priv(struct inode *pts_inode)
*
* This is an inverse operation of devpts_pty_new.
*/
-void devpts_pty_kill(struct inode *inode)
+void devpts_pty_kill(struct dentry *dentry)
{
- struct super_block *sb = pts_sb_from_inode(inode);
- struct dentry *root = sb->s_root;
- struct dentry *dentry;
+ WARN_ON_ONCE(dentry->d_sb->s_magic != DEVPTS_SUPER_MAGIC);
- BUG_ON(inode->i_rdev == MKDEV(TTYAUX_MAJOR, PTMX_MINOR));
-
- inode_lock(d_inode(root));
-
- dentry = d_find_alias(inode);
-
- drop_nlink(inode);
+ dentry->d_fsdata = NULL;
+ drop_nlink(dentry->d_inode);
d_delete(dentry);
dput(dentry); /* d_alloc_name() in devpts_pty_new() */
- dput(dentry); /* d_find_alias above */
-
- inode_unlock(d_inode(root));
}
static int __init init_devpts_fs(void)
diff --git a/fs/direct-io.c b/fs/direct-io.c
index d6a9012d4..472037732 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -172,7 +172,7 @@ static inline int dio_refill_pages(struct dio *dio, struct dio_submit *sdio)
*/
if (dio->page_errors == 0)
dio->page_errors = ret;
- page_cache_get(page);
+ get_page(page);
dio->pages[0] = page;
sdio->head = 0;
sdio->tail = 1;
@@ -253,8 +253,13 @@ static ssize_t dio_complete(struct dio *dio, loff_t offset, ssize_t ret,
if (ret == 0)
ret = transferred;
- if (dio->end_io && dio->result)
- dio->end_io(dio->iocb, offset, transferred, dio->private);
+ if (dio->end_io) {
+ int err;
+
+ err = dio->end_io(dio->iocb, offset, ret, dio->private);
+ if (err)
+ ret = err;
+ }
if (!(dio->flags & DIO_SKIP_DIO_COUNT))
inode_dio_end(dio->inode);
@@ -419,7 +424,7 @@ static inline void dio_bio_submit(struct dio *dio, struct dio_submit *sdio)
static inline void dio_cleanup(struct dio *dio, struct dio_submit *sdio)
{
while (sdio->head < sdio->tail)
- page_cache_release(dio->pages[sdio->head++]);
+ put_page(dio->pages[sdio->head++]);
}
/*
@@ -445,7 +450,8 @@ static struct bio *dio_await_one(struct dio *dio)
__set_current_state(TASK_UNINTERRUPTIBLE);
dio->waiter = current;
spin_unlock_irqrestore(&dio->bio_lock, flags);
- if (!blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
+ if (!(dio->iocb->ki_flags & IOCB_HIPRI) ||
+ !blk_poll(bdev_get_queue(dio->bio_bdev), dio->bio_cookie))
io_schedule();
/* wake up sets us TASK_RUNNING */
spin_lock_irqsave(&dio->bio_lock, flags);
@@ -481,7 +487,7 @@ static int dio_bio_complete(struct dio *dio, struct bio *bio)
if (dio->rw == READ && !PageCompound(page) &&
dio->should_dirty)
set_page_dirty_lock(page);
- page_cache_release(page);
+ put_page(page);
}
err = bio->bi_error;
bio_put(bio);
@@ -690,7 +696,7 @@ static inline int dio_bio_add_page(struct dio_submit *sdio)
*/
if ((sdio->cur_page_len + sdio->cur_page_offset) == PAGE_SIZE)
sdio->pages_in_io--;
- page_cache_get(sdio->cur_page);
+ get_page(sdio->cur_page);
sdio->final_block_in_bio = sdio->cur_page_block +
(sdio->cur_page_len >> sdio->blkbits);
ret = 0;
@@ -804,13 +810,13 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page,
*/
if (sdio->cur_page) {
ret = dio_send_cur_page(dio, sdio, map_bh);
- page_cache_release(sdio->cur_page);
+ put_page(sdio->cur_page);
sdio->cur_page = NULL;
if (ret)
return ret;
}
- page_cache_get(page); /* It is in dio */
+ get_page(page); /* It is in dio */
sdio->cur_page = page;
sdio->cur_page_offset = offset;
sdio->cur_page_len = len;
@@ -824,7 +830,7 @@ out:
if (sdio->boundary) {
ret = dio_send_cur_page(dio, sdio, map_bh);
dio_bio_submit(dio, sdio);
- page_cache_release(sdio->cur_page);
+ put_page(sdio->cur_page);
sdio->cur_page = NULL;
}
return ret;
@@ -941,7 +947,7 @@ static int do_direct_IO(struct dio *dio, struct dio_submit *sdio,
ret = get_more_blocks(dio, sdio, map_bh);
if (ret) {
- page_cache_release(page);
+ put_page(page);
goto out;
}
if (!buffer_mapped(map_bh))
@@ -982,7 +988,7 @@ do_holes:
/* AKPM: eargh, -ENOTBLK is a hack */
if (dio->rw & WRITE) {
- page_cache_release(page);
+ put_page(page);
return -ENOTBLK;
}
@@ -995,7 +1001,7 @@ do_holes:
if (sdio->block_in_file >=
i_size_aligned >> blkbits) {
/* We hit eof */
- page_cache_release(page);
+ put_page(page);
goto out;
}
zero_user(page, from, 1 << blkbits);
@@ -1035,7 +1041,7 @@ do_holes:
sdio->next_block_for_io,
map_bh);
if (ret) {
- page_cache_release(page);
+ put_page(page);
goto out;
}
sdio->next_block_for_io += this_chunk_blocks;
@@ -1051,7 +1057,7 @@ next_block:
}
/* Drop the ref which was taken in get_user_pages() */
- page_cache_release(page);
+ put_page(page);
}
out:
return ret;
@@ -1275,7 +1281,7 @@ do_blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
ret2 = dio_send_cur_page(dio, &sdio, &map_bh);
if (retval == 0)
retval = ret2;
- page_cache_release(sdio.cur_page);
+ put_page(sdio.cur_page);
sdio.cur_page = NULL;
}
if (sdio.bio)
diff --git a/fs/dlm/config.c b/fs/dlm/config.c
index 8e294fbba..1669f6291 100644
--- a/fs/dlm/config.c
+++ b/fs/dlm/config.c
@@ -343,24 +343,20 @@ static struct config_group *make_cluster(struct config_group *g,
struct dlm_cluster *cl = NULL;
struct dlm_spaces *sps = NULL;
struct dlm_comms *cms = NULL;
- void *gps = NULL;
cl = kzalloc(sizeof(struct dlm_cluster), GFP_NOFS);
- gps = kcalloc(3, sizeof(struct config_group *), GFP_NOFS);
sps = kzalloc(sizeof(struct dlm_spaces), GFP_NOFS);
cms = kzalloc(sizeof(struct dlm_comms), GFP_NOFS);
- if (!cl || !gps || !sps || !cms)
+ if (!cl || !sps || !cms)
goto fail;
config_group_init_type_name(&cl->group, name, &cluster_type);
config_group_init_type_name(&sps->ss_group, "spaces", &spaces_type);
config_group_init_type_name(&cms->cs_group, "comms", &comms_type);
- cl->group.default_groups = gps;
- cl->group.default_groups[0] = &sps->ss_group;
- cl->group.default_groups[1] = &cms->cs_group;
- cl->group.default_groups[2] = NULL;
+ configfs_add_default_group(&sps->ss_group, &cl->group);
+ configfs_add_default_group(&cms->cs_group, &cl->group);
cl->cl_tcp_port = dlm_config.ci_tcp_port;
cl->cl_buffer_size = dlm_config.ci_buffer_size;
@@ -383,7 +379,6 @@ static struct config_group *make_cluster(struct config_group *g,
fail:
kfree(cl);
- kfree(gps);
kfree(sps);
kfree(cms);
return ERR_PTR(-ENOMEM);
@@ -392,14 +387,8 @@ static struct config_group *make_cluster(struct config_group *g,
static void drop_cluster(struct config_group *g, struct config_item *i)
{
struct dlm_cluster *cl = config_item_to_cluster(i);
- struct config_item *tmp;
- int j;
- for (j = 0; cl->group.default_groups[j]; j++) {
- tmp = &cl->group.default_groups[j]->cg_item;
- cl->group.default_groups[j] = NULL;
- config_item_put(tmp);
- }
+ configfs_remove_default_groups(&cl->group);
space_list = NULL;
comm_list = NULL;
@@ -410,7 +399,6 @@ static void drop_cluster(struct config_group *g, struct config_item *i)
static void release_cluster(struct config_item *i)
{
struct dlm_cluster *cl = config_item_to_cluster(i);
- kfree(cl->group.default_groups);
kfree(cl);
}
@@ -418,21 +406,17 @@ static struct config_group *make_space(struct config_group *g, const char *name)
{
struct dlm_space *sp = NULL;
struct dlm_nodes *nds = NULL;
- void *gps = NULL;
sp = kzalloc(sizeof(struct dlm_space), GFP_NOFS);
- gps = kcalloc(2, sizeof(struct config_group *), GFP_NOFS);
nds = kzalloc(sizeof(struct dlm_nodes), GFP_NOFS);
- if (!sp || !gps || !nds)
+ if (!sp || !nds)
goto fail;
config_group_init_type_name(&sp->group, name, &space_type);
- config_group_init_type_name(&nds->ns_group, "nodes", &nodes_type);
- sp->group.default_groups = gps;
- sp->group.default_groups[0] = &nds->ns_group;
- sp->group.default_groups[1] = NULL;
+ config_group_init_type_name(&nds->ns_group, "nodes", &nodes_type);
+ configfs_add_default_group(&nds->ns_group, &sp->group);
INIT_LIST_HEAD(&sp->members);
mutex_init(&sp->members_lock);
@@ -441,7 +425,6 @@ static struct config_group *make_space(struct config_group *g, const char *name)
fail:
kfree(sp);
- kfree(gps);
kfree(nds);
return ERR_PTR(-ENOMEM);
}
@@ -449,24 +432,16 @@ static struct config_group *make_space(struct config_group *g, const char *name)
static void drop_space(struct config_group *g, struct config_item *i)
{
struct dlm_space *sp = config_item_to_space(i);
- struct config_item *tmp;
- int j;
/* assert list_empty(&sp->members) */
- for (j = 0; sp->group.default_groups[j]; j++) {
- tmp = &sp->group.default_groups[j]->cg_item;
- sp->group.default_groups[j] = NULL;
- config_item_put(tmp);
- }
-
+ configfs_remove_default_groups(&sp->group);
config_item_put(i);
}
static void release_space(struct config_item *i)
{
struct dlm_space *sp = config_item_to_space(i);
- kfree(sp->group.default_groups);
kfree(sp);
}
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 3a37bd3f9..1ab012a27 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -124,7 +124,10 @@ struct connection {
struct connection *othercon;
struct work_struct rwork; /* Receive workqueue */
struct work_struct swork; /* Send workqueue */
- void (*orig_error_report)(struct sock *sk);
+ void (*orig_error_report)(struct sock *);
+ void (*orig_data_ready)(struct sock *);
+ void (*orig_state_change)(struct sock *);
+ void (*orig_write_space)(struct sock *);
};
#define sock2con(x) ((struct connection *)(x)->sk_user_data)
@@ -467,16 +470,24 @@ int dlm_lowcomms_connect_node(int nodeid)
static void lowcomms_error_report(struct sock *sk)
{
- struct connection *con = sock2con(sk);
+ struct connection *con;
struct sockaddr_storage saddr;
+ int buflen;
+ void (*orig_report)(struct sock *) = NULL;
- if (nodeid_to_addr(con->nodeid, &saddr, NULL, false)) {
+ read_lock_bh(&sk->sk_callback_lock);
+ con = sock2con(sk);
+ if (con == NULL)
+ goto out;
+
+ orig_report = con->orig_error_report;
+ if (con->sock == NULL ||
+ kernel_getpeername(con->sock, (struct sockaddr *)&saddr, &buflen)) {
printk_ratelimited(KERN_ERR "dlm: node %d: socket error "
"sending to node %d, port %d, "
"sk_err=%d/%d\n", dlm_our_nodeid(),
con->nodeid, dlm_config.ci_tcp_port,
sk->sk_err, sk->sk_err_soft);
- return;
} else if (saddr.ss_family == AF_INET) {
struct sockaddr_in *sin4 = (struct sockaddr_in *)&saddr;
@@ -499,22 +510,54 @@ static void lowcomms_error_report(struct sock *sk)
dlm_config.ci_tcp_port, sk->sk_err,
sk->sk_err_soft);
}
- con->orig_error_report(sk);
+out:
+ read_unlock_bh(&sk->sk_callback_lock);
+ if (orig_report)
+ orig_report(sk);
+}
+
+/* Note: sk_callback_lock must be locked before calling this function. */
+static void save_callbacks(struct connection *con, struct sock *sk)
+{
+ lock_sock(sk);
+ con->orig_data_ready = sk->sk_data_ready;
+ con->orig_state_change = sk->sk_state_change;
+ con->orig_write_space = sk->sk_write_space;
+ con->orig_error_report = sk->sk_error_report;
+ release_sock(sk);
+}
+
+static void restore_callbacks(struct connection *con, struct sock *sk)
+{
+ write_lock_bh(&sk->sk_callback_lock);
+ lock_sock(sk);
+ sk->sk_user_data = NULL;
+ sk->sk_data_ready = con->orig_data_ready;
+ sk->sk_state_change = con->orig_state_change;
+ sk->sk_write_space = con->orig_write_space;
+ sk->sk_error_report = con->orig_error_report;
+ release_sock(sk);
+ write_unlock_bh(&sk->sk_callback_lock);
}
/* Make a socket active */
static void add_sock(struct socket *sock, struct connection *con)
{
+ struct sock *sk = sock->sk;
+
+ write_lock_bh(&sk->sk_callback_lock);
con->sock = sock;
+ sk->sk_user_data = con;
+ if (!test_bit(CF_IS_OTHERCON, &con->flags))
+ save_callbacks(con, sk);
/* Install a data_ready callback */
- con->sock->sk->sk_data_ready = lowcomms_data_ready;
- con->sock->sk->sk_write_space = lowcomms_write_space;
- con->sock->sk->sk_state_change = lowcomms_state_change;
- con->sock->sk->sk_user_data = con;
- con->sock->sk->sk_allocation = GFP_NOFS;
- con->orig_error_report = con->sock->sk->sk_error_report;
- con->sock->sk->sk_error_report = lowcomms_error_report;
+ sk->sk_data_ready = lowcomms_data_ready;
+ sk->sk_write_space = lowcomms_write_space;
+ sk->sk_state_change = lowcomms_state_change;
+ sk->sk_allocation = GFP_NOFS;
+ sk->sk_error_report = lowcomms_error_report;
+ write_unlock_bh(&sk->sk_callback_lock);
}
/* Add the port number to an IPv6 or 4 sockaddr and return the address
@@ -549,6 +592,8 @@ static void close_connection(struct connection *con, bool and_other,
mutex_lock(&con->sock_mutex);
if (con->sock) {
+ if (!test_bit(CF_IS_OTHERCON, &con->flags))
+ restore_callbacks(con, con->sock->sk);
sock_release(con->sock);
con->sock = NULL;
}
@@ -595,7 +640,7 @@ static int receive_from_sock(struct connection *con)
con->rx_page = alloc_page(GFP_ATOMIC);
if (con->rx_page == NULL)
goto out_resched;
- cbuf_init(&con->cb, PAGE_CACHE_SIZE);
+ cbuf_init(&con->cb, PAGE_SIZE);
}
/*
@@ -612,7 +657,7 @@ static int receive_from_sock(struct connection *con)
* buffer and the start of the currently used section (cb.base)
*/
if (cbuf_data(&con->cb) >= con->cb.base) {
- iov[0].iov_len = PAGE_CACHE_SIZE - cbuf_data(&con->cb);
+ iov[0].iov_len = PAGE_SIZE - cbuf_data(&con->cb);
iov[1].iov_len = con->cb.base;
iov[1].iov_base = page_address(con->rx_page);
nvec = 2;
@@ -630,7 +675,7 @@ static int receive_from_sock(struct connection *con)
ret = dlm_process_incoming_buffer(con->nodeid,
page_address(con->rx_page),
con->cb.base, con->cb.len,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
if (ret == -EBADMSG) {
log_print("lowcomms: addr=%p, base=%u, len=%u, read=%d",
page_address(con->rx_page), con->cb.base,
@@ -1190,6 +1235,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
if (result < 0) {
log_print("Failed to set SO_REUSEADDR on socket: %d", result);
}
+ sock->sk->sk_user_data = con;
+
con->rx_action = tcp_accept_from_sock;
con->connect_action = tcp_connect_to_sock;
@@ -1271,6 +1318,7 @@ static int sctp_listen_for_all(void)
if (result < 0)
log_print("Could not set SCTP NODELAY error %d\n", result);
+ write_lock_bh(&sock->sk->sk_callback_lock);
/* Init con struct */
sock->sk->sk_user_data = con;
con->sock = sock;
@@ -1278,6 +1326,8 @@ static int sctp_listen_for_all(void)
con->rx_action = sctp_accept_from_sock;
con->connect_action = sctp_connect_to_sock;
+ write_unlock_bh(&sock->sk->sk_callback_lock);
+
/* Bind to all addresses. */
if (sctp_bind_addrs(con, dlm_config.ci_tcp_port))
goto create_delsock;
@@ -1366,7 +1416,7 @@ void *dlm_lowcomms_get_buffer(int nodeid, int len, gfp_t allocation, char **ppc)
spin_lock(&con->writequeue_lock);
e = list_entry(con->writequeue.prev, struct writequeue_entry, list);
if ((&e->list == &con->writequeue) ||
- (PAGE_CACHE_SIZE - e->end < len)) {
+ (PAGE_SIZE - e->end < len)) {
e = NULL;
} else {
offset = e->end;
diff --git a/fs/ecryptfs/crypto.c b/fs/ecryptfs/crypto.c
index 80d690149..d09cb4cdd 100644
--- a/fs/ecryptfs/crypto.c
+++ b/fs/ecryptfs/crypto.c
@@ -23,6 +23,8 @@
* 02111-1307, USA.
*/
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <linux/fs.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
@@ -30,7 +32,6 @@
#include <linux/compiler.h>
#include <linux/key.h>
#include <linux/namei.h>
-#include <linux/crypto.h>
#include <linux/file.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
@@ -74,6 +75,19 @@ void ecryptfs_from_hex(char *dst, char *src, int dst_size)
}
}
+static int ecryptfs_hash_digest(struct crypto_shash *tfm,
+ char *src, int len, char *dst)
+{
+ SHASH_DESC_ON_STACK(desc, tfm);
+ int err;
+
+ desc->tfm = tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+ err = crypto_shash_digest(desc, src, len, dst);
+ shash_desc_zero(desc);
+ return err;
+}
+
/**
* ecryptfs_calculate_md5 - calculates the md5 of @src
* @dst: Pointer to 16 bytes of allocated memory
@@ -88,45 +102,26 @@ static int ecryptfs_calculate_md5(char *dst,
struct ecryptfs_crypt_stat *crypt_stat,
char *src, int len)
{
- struct scatterlist sg;
- struct hash_desc desc = {
- .tfm = crypt_stat->hash_tfm,
- .flags = CRYPTO_TFM_REQ_MAY_SLEEP
- };
+ struct crypto_shash *tfm;
int rc = 0;
mutex_lock(&crypt_stat->cs_hash_tfm_mutex);
- sg_init_one(&sg, (u8 *)src, len);
- if (!desc.tfm) {
- desc.tfm = crypto_alloc_hash(ECRYPTFS_DEFAULT_HASH, 0,
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(desc.tfm)) {
- rc = PTR_ERR(desc.tfm);
+ tfm = crypt_stat->hash_tfm;
+ if (!tfm) {
+ tfm = crypto_alloc_shash(ECRYPTFS_DEFAULT_HASH, 0, 0);
+ if (IS_ERR(tfm)) {
+ rc = PTR_ERR(tfm);
ecryptfs_printk(KERN_ERR, "Error attempting to "
"allocate crypto context; rc = [%d]\n",
rc);
goto out;
}
- crypt_stat->hash_tfm = desc.tfm;
- }
- rc = crypto_hash_init(&desc);
- if (rc) {
- printk(KERN_ERR
- "%s: Error initializing crypto hash; rc = [%d]\n",
- __func__, rc);
- goto out;
+ crypt_stat->hash_tfm = tfm;
}
- rc = crypto_hash_update(&desc, &sg, len);
+ rc = ecryptfs_hash_digest(tfm, src, len, dst);
if (rc) {
printk(KERN_ERR
- "%s: Error updating crypto hash; rc = [%d]\n",
- __func__, rc);
- goto out;
- }
- rc = crypto_hash_final(&desc, dst);
- if (rc) {
- printk(KERN_ERR
- "%s: Error finalizing crypto hash; rc = [%d]\n",
+ "%s: Error computing crypto hash; rc = [%d]\n",
__func__, rc);
goto out;
}
@@ -234,10 +229,8 @@ void ecryptfs_destroy_crypt_stat(struct ecryptfs_crypt_stat *crypt_stat)
{
struct ecryptfs_key_sig *key_sig, *key_sig_tmp;
- if (crypt_stat->tfm)
- crypto_free_ablkcipher(crypt_stat->tfm);
- if (crypt_stat->hash_tfm)
- crypto_free_hash(crypt_stat->hash_tfm);
+ crypto_free_skcipher(crypt_stat->tfm);
+ crypto_free_shash(crypt_stat->hash_tfm);
list_for_each_entry_safe(key_sig, key_sig_tmp,
&crypt_stat->keysig_list, crypt_stat_list) {
list_del(&key_sig->crypt_stat_list);
@@ -293,7 +286,7 @@ int virt_to_scatterlist(const void *addr, int size, struct scatterlist *sg,
pg = virt_to_page(addr);
offset = offset_in_page(addr);
sg_set_page(&sg[i], pg, 0, offset);
- remainder_of_page = PAGE_CACHE_SIZE - offset;
+ remainder_of_page = PAGE_SIZE - offset;
if (size >= remainder_of_page) {
sg[i].length = remainder_of_page;
addr += remainder_of_page;
@@ -342,7 +335,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
struct scatterlist *src_sg, int size,
unsigned char *iv, int op)
{
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
struct extent_crypt_result ecr;
int rc = 0;
@@ -358,20 +351,20 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
init_completion(&ecr.completion);
mutex_lock(&crypt_stat->cs_tfm_mutex);
- req = ablkcipher_request_alloc(crypt_stat->tfm, GFP_NOFS);
+ req = skcipher_request_alloc(crypt_stat->tfm, GFP_NOFS);
if (!req) {
mutex_unlock(&crypt_stat->cs_tfm_mutex);
rc = -ENOMEM;
goto out;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
extent_crypt_complete, &ecr);
/* Consider doing this once, when the file is opened */
if (!(crypt_stat->flags & ECRYPTFS_KEY_SET)) {
- rc = crypto_ablkcipher_setkey(crypt_stat->tfm, crypt_stat->key,
- crypt_stat->key_size);
+ rc = crypto_skcipher_setkey(crypt_stat->tfm, crypt_stat->key,
+ crypt_stat->key_size);
if (rc) {
ecryptfs_printk(KERN_ERR,
"Error setting key; rc = [%d]\n",
@@ -383,9 +376,9 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
crypt_stat->flags |= ECRYPTFS_KEY_SET;
}
mutex_unlock(&crypt_stat->cs_tfm_mutex);
- ablkcipher_request_set_crypt(req, src_sg, dst_sg, size, iv);
- rc = op == ENCRYPT ? crypto_ablkcipher_encrypt(req) :
- crypto_ablkcipher_decrypt(req);
+ skcipher_request_set_crypt(req, src_sg, dst_sg, size, iv);
+ rc = op == ENCRYPT ? crypto_skcipher_encrypt(req) :
+ crypto_skcipher_decrypt(req);
if (rc == -EINPROGRESS || rc == -EBUSY) {
struct extent_crypt_result *ecr = req->base.data;
@@ -394,7 +387,7 @@ static int crypt_scatterlist(struct ecryptfs_crypt_stat *crypt_stat,
reinit_completion(&ecr->completion);
}
out:
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
return rc;
}
@@ -407,7 +400,7 @@ static loff_t lower_offset_for_page(struct ecryptfs_crypt_stat *crypt_stat,
struct page *page)
{
return ecryptfs_lower_header_size(crypt_stat) +
- ((loff_t)page->index << PAGE_CACHE_SHIFT);
+ ((loff_t)page->index << PAGE_SHIFT);
}
/**
@@ -435,7 +428,7 @@ static int crypt_extent(struct ecryptfs_crypt_stat *crypt_stat,
size_t extent_size = crypt_stat->extent_size;
int rc;
- extent_base = (((loff_t)page_index) * (PAGE_CACHE_SIZE / extent_size));
+ extent_base = (((loff_t)page_index) * (PAGE_SIZE / extent_size));
rc = ecryptfs_derive_iv(extent_iv, crypt_stat,
(extent_base + extent_offset));
if (rc) {
@@ -505,7 +498,7 @@ int ecryptfs_encrypt_page(struct page *page)
}
for (extent_offset = 0;
- extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+ extent_offset < (PAGE_SIZE / crypt_stat->extent_size);
extent_offset++) {
rc = crypt_extent(crypt_stat, enc_extent_page, page,
extent_offset, ENCRYPT);
@@ -519,7 +512,7 @@ int ecryptfs_encrypt_page(struct page *page)
lower_offset = lower_offset_for_page(crypt_stat, page);
enc_extent_virt = kmap(enc_extent_page);
rc = ecryptfs_write_lower(ecryptfs_inode, enc_extent_virt, lower_offset,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
kunmap(enc_extent_page);
if (rc < 0) {
ecryptfs_printk(KERN_ERR,
@@ -567,7 +560,7 @@ int ecryptfs_decrypt_page(struct page *page)
lower_offset = lower_offset_for_page(crypt_stat, page);
page_virt = kmap(page);
- rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_CACHE_SIZE,
+ rc = ecryptfs_read_lower(page_virt, lower_offset, PAGE_SIZE,
ecryptfs_inode);
kunmap(page);
if (rc < 0) {
@@ -578,7 +571,7 @@ int ecryptfs_decrypt_page(struct page *page)
}
for (extent_offset = 0;
- extent_offset < (PAGE_CACHE_SIZE / crypt_stat->extent_size);
+ extent_offset < (PAGE_SIZE / crypt_stat->extent_size);
extent_offset++) {
rc = crypt_extent(crypt_stat, page, page,
extent_offset, DECRYPT);
@@ -622,7 +615,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
crypt_stat->cipher, "cbc");
if (rc)
goto out_unlock;
- crypt_stat->tfm = crypto_alloc_ablkcipher(full_alg_name, 0, 0);
+ crypt_stat->tfm = crypto_alloc_skcipher(full_alg_name, 0, 0);
if (IS_ERR(crypt_stat->tfm)) {
rc = PTR_ERR(crypt_stat->tfm);
crypt_stat->tfm = NULL;
@@ -631,7 +624,7 @@ int ecryptfs_init_crypt_ctx(struct ecryptfs_crypt_stat *crypt_stat)
full_alg_name);
goto out_free;
}
- crypto_ablkcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ crypto_skcipher_set_flags(crypt_stat->tfm, CRYPTO_TFM_REQ_WEAK_KEY);
rc = 0;
out_free:
kfree(full_alg_name);
@@ -666,11 +659,11 @@ void ecryptfs_set_default_sizes(struct ecryptfs_crypt_stat *crypt_stat)
if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR)
crypt_stat->metadata_size = ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
else {
- if (PAGE_CACHE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
+ if (PAGE_SIZE <= ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE)
crypt_stat->metadata_size =
ECRYPTFS_MINIMUM_HEADER_EXTENT_SIZE;
else
- crypt_stat->metadata_size = PAGE_CACHE_SIZE;
+ crypt_stat->metadata_size = PAGE_SIZE;
}
}
@@ -1449,7 +1442,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
ECRYPTFS_VALIDATE_HEADER_SIZE);
if (rc) {
/* metadata is not in the file header, so try xattrs */
- memset(page_virt, 0, PAGE_CACHE_SIZE);
+ memset(page_virt, 0, PAGE_SIZE);
rc = ecryptfs_read_xattr_region(page_virt, ecryptfs_inode);
if (rc) {
printk(KERN_DEBUG "Valid eCryptfs headers not found in "
@@ -1482,7 +1475,7 @@ int ecryptfs_read_metadata(struct dentry *ecryptfs_dentry)
}
out:
if (page_virt) {
- memset(page_virt, 0, PAGE_CACHE_SIZE);
+ memset(page_virt, 0, PAGE_SIZE);
kmem_cache_free(ecryptfs_header_cache, page_virt);
}
return rc;
@@ -1499,16 +1492,14 @@ out:
*/
static int
ecryptfs_encrypt_filename(struct ecryptfs_filename *filename,
- struct ecryptfs_crypt_stat *crypt_stat,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
{
int rc = 0;
filename->encrypted_filename = NULL;
filename->encrypted_filename_size = 0;
- if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
- || (mount_crypt_stat && (mount_crypt_stat->flags
- & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) {
+ if (mount_crypt_stat && (mount_crypt_stat->flags
+ & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)) {
size_t packet_size;
size_t remaining_bytes;
@@ -1591,7 +1582,7 @@ out:
* event, regardless of whether this function succeeds for fails.
*/
static int
-ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
+ecryptfs_process_key_cipher(struct crypto_skcipher **key_tfm,
char *cipher_name, size_t *key_size)
{
char dummy_key[ECRYPTFS_MAX_KEY_BYTES];
@@ -1609,21 +1600,18 @@ ecryptfs_process_key_cipher(struct crypto_blkcipher **key_tfm,
"ecb");
if (rc)
goto out;
- *key_tfm = crypto_alloc_blkcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
+ *key_tfm = crypto_alloc_skcipher(full_alg_name, 0, CRYPTO_ALG_ASYNC);
if (IS_ERR(*key_tfm)) {
rc = PTR_ERR(*key_tfm);
printk(KERN_ERR "Unable to allocate crypto cipher with name "
"[%s]; rc = [%d]\n", full_alg_name, rc);
goto out;
}
- crypto_blkcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY);
- if (*key_size == 0) {
- struct blkcipher_alg *alg = crypto_blkcipher_alg(*key_tfm);
-
- *key_size = alg->max_keysize;
- }
+ crypto_skcipher_set_flags(*key_tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ if (*key_size == 0)
+ *key_size = crypto_skcipher_default_keysize(*key_tfm);
get_random_bytes(dummy_key, *key_size);
- rc = crypto_blkcipher_setkey(*key_tfm, dummy_key, *key_size);
+ rc = crypto_skcipher_setkey(*key_tfm, dummy_key, *key_size);
if (rc) {
printk(KERN_ERR "Error attempting to set key of size [%zd] for "
"cipher [%s]; rc = [%d]\n", *key_size, full_alg_name,
@@ -1660,8 +1648,7 @@ int ecryptfs_destroy_crypto(void)
list_for_each_entry_safe(key_tfm, key_tfm_tmp, &key_tfm_list,
key_tfm_list) {
list_del(&key_tfm->key_tfm_list);
- if (key_tfm->key_tfm)
- crypto_free_blkcipher(key_tfm->key_tfm);
+ crypto_free_skcipher(key_tfm->key_tfm);
kmem_cache_free(ecryptfs_key_tfm_cache, key_tfm);
}
mutex_unlock(&key_tfm_list_mutex);
@@ -1747,7 +1734,7 @@ int ecryptfs_tfm_exists(char *cipher_name, struct ecryptfs_key_tfm **key_tfm)
* Searches for cached item first, and creates new if not found.
* Returns 0 on success, non-zero if adding new cipher failed
*/
-int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
+int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_skcipher **tfm,
struct mutex **tfm_mutex,
char *cipher_name)
{
@@ -1944,7 +1931,6 @@ out:
int ecryptfs_encrypt_and_encode_filename(
char **encoded_name,
size_t *encoded_name_size,
- struct ecryptfs_crypt_stat *crypt_stat,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
const char *name, size_t name_size)
{
@@ -1953,9 +1939,8 @@ int ecryptfs_encrypt_and_encode_filename(
(*encoded_name) = NULL;
(*encoded_name_size) = 0;
- if ((crypt_stat && (crypt_stat->flags & ECRYPTFS_ENCRYPT_FILENAMES))
- || (mount_crypt_stat && (mount_crypt_stat->flags
- & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES))) {
+ if (mount_crypt_stat && (mount_crypt_stat->flags
+ & ECRYPTFS_GLOBAL_ENCRYPT_FILENAMES)) {
struct ecryptfs_filename *filename;
filename = kzalloc(sizeof(*filename), GFP_KERNEL);
@@ -1968,8 +1953,7 @@ int ecryptfs_encrypt_and_encode_filename(
}
filename->filename = (char *)name;
filename->filename_size = name_size;
- rc = ecryptfs_encrypt_filename(filename, crypt_stat,
- mount_crypt_stat);
+ rc = ecryptfs_encrypt_filename(filename, mount_crypt_stat);
if (rc) {
printk(KERN_ERR "%s: Error attempting to encrypt "
"filename; rc = [%d]\n", __func__, rc);
@@ -1980,11 +1964,9 @@ int ecryptfs_encrypt_and_encode_filename(
NULL, &encoded_name_no_prefix_size,
filename->encrypted_filename,
filename->encrypted_filename_size);
- if ((crypt_stat && (crypt_stat->flags
- & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
- || (mount_crypt_stat
+ if (mount_crypt_stat
&& (mount_crypt_stat->flags
- & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)))
+ & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))
(*encoded_name_size) =
(ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE
+ encoded_name_no_prefix_size);
@@ -2002,11 +1984,9 @@ int ecryptfs_encrypt_and_encode_filename(
kfree(filename);
goto out;
}
- if ((crypt_stat && (crypt_stat->flags
- & ECRYPTFS_ENCFN_USE_MOUNT_FNEK))
- || (mount_crypt_stat
+ if (mount_crypt_stat
&& (mount_crypt_stat->flags
- & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK))) {
+ & ECRYPTFS_GLOBAL_ENCFN_USE_MOUNT_FNEK)) {
memcpy((*encoded_name),
ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX,
ECRYPTFS_FNEK_ENCRYPTED_FILENAME_PREFIX_SIZE);
@@ -2120,7 +2100,7 @@ out:
int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat)
{
- struct blkcipher_desc desc;
+ struct crypto_skcipher *tfm;
struct mutex *tfm_mutex;
size_t cipher_blocksize;
int rc;
@@ -2130,7 +2110,7 @@ int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
return 0;
}
- rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+ rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&tfm, &tfm_mutex,
mount_crypt_stat->global_default_fn_cipher_name);
if (unlikely(rc)) {
(*namelen) = 0;
@@ -2138,7 +2118,7 @@ int ecryptfs_set_f_namelen(long *namelen, long lower_namelen,
}
mutex_lock(tfm_mutex);
- cipher_blocksize = crypto_blkcipher_blocksize(desc.tfm);
+ cipher_blocksize = crypto_skcipher_blocksize(tfm);
mutex_unlock(tfm_mutex);
/* Return an exact amount for the common cases */
diff --git a/fs/ecryptfs/ecryptfs_kernel.h b/fs/ecryptfs/ecryptfs_kernel.h
index 7b39260c7..d123fbaa2 100644
--- a/fs/ecryptfs/ecryptfs_kernel.h
+++ b/fs/ecryptfs/ecryptfs_kernel.h
@@ -28,6 +28,7 @@
#ifndef ECRYPTFS_KERNEL_H
#define ECRYPTFS_KERNEL_H
+#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
#include <linux/fs.h>
@@ -38,7 +39,6 @@
#include <linux/nsproxy.h>
#include <linux/backing-dev.h>
#include <linux/ecryptfs.h>
-#include <linux/crypto.h>
#define ECRYPTFS_DEFAULT_IV_BYTES 16
#define ECRYPTFS_DEFAULT_EXTENT_SIZE 4096
@@ -233,9 +233,9 @@ struct ecryptfs_crypt_stat {
size_t extent_shift;
unsigned int extent_mask;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat;
- struct crypto_ablkcipher *tfm;
- struct crypto_hash *hash_tfm; /* Crypto context for generating
- * the initialization vectors */
+ struct crypto_skcipher *tfm;
+ struct crypto_shash *hash_tfm; /* Crypto context for generating
+ * the initialization vectors */
unsigned char cipher[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
unsigned char key[ECRYPTFS_MAX_KEY_BYTES];
unsigned char root_iv[ECRYPTFS_MAX_IV_BYTES];
@@ -309,7 +309,7 @@ struct ecryptfs_global_auth_tok {
* keeps a list of crypto API contexts around to use when needed.
*/
struct ecryptfs_key_tfm {
- struct crypto_blkcipher *key_tfm;
+ struct crypto_skcipher *key_tfm;
size_t key_size;
struct mutex key_tfm_mutex;
struct list_head key_tfm_list;
@@ -569,7 +569,6 @@ int ecryptfs_fill_zeros(struct file *file, loff_t new_length);
int ecryptfs_encrypt_and_encode_filename(
char **encoded_name,
size_t *encoded_name_size,
- struct ecryptfs_crypt_stat *crypt_stat,
struct ecryptfs_mount_crypt_stat *mount_crypt_stat,
const char *name, size_t name_size);
struct dentry *ecryptfs_lower_dentry(struct dentry *this_dentry);
@@ -659,7 +658,7 @@ ecryptfs_add_new_key_tfm(struct ecryptfs_key_tfm **key_tfm, char *cipher_name,
int ecryptfs_init_crypto(void);
int ecryptfs_destroy_crypto(void);
int ecryptfs_tfm_exists(char *cipher_name, struct ecryptfs_key_tfm **key_tfm);
-int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_blkcipher **tfm,
+int ecryptfs_get_tfm_and_mutex_for_cipher_name(struct crypto_skcipher **tfm,
struct mutex **tfm_mutex,
char *cipher_name);
int ecryptfs_keyring_auth_tok_for_sig(struct key **auth_tok_key,
diff --git a/fs/ecryptfs/file.c b/fs/ecryptfs/file.c
index feef8a9c4..f02404052 100644
--- a/fs/ecryptfs/file.c
+++ b/fs/ecryptfs/file.c
@@ -112,7 +112,6 @@ static int ecryptfs_readdir(struct file *file, struct dir_context *ctx)
.sb = inode->i_sb,
};
lower_file = ecryptfs_file_to_lower(file);
- lower_file->f_pos = ctx->pos;
rc = iterate_dir(lower_file, &buf.ctx);
ctx->pos = buf.ctx.pos;
if (rc < 0)
@@ -223,14 +222,6 @@ static int ecryptfs_open(struct inode *inode, struct file *file)
}
ecryptfs_set_file_lower(
file, ecryptfs_inode_to_private(inode)->lower_file);
- if (d_is_dir(ecryptfs_dentry)) {
- ecryptfs_printk(KERN_DEBUG, "This is a directory\n");
- mutex_lock(&crypt_stat->cs_mutex);
- crypt_stat->flags &= ~(ECRYPTFS_ENCRYPTED);
- mutex_unlock(&crypt_stat->cs_mutex);
- rc = 0;
- goto out;
- }
rc = read_or_initialize_metadata(ecryptfs_dentry);
if (rc)
goto out_put;
@@ -247,6 +238,45 @@ out:
return rc;
}
+/**
+ * ecryptfs_dir_open
+ * @inode: inode speciying file to open
+ * @file: Structure to return filled in
+ *
+ * Opens the file specified by inode.
+ *
+ * Returns zero on success; non-zero otherwise
+ */
+static int ecryptfs_dir_open(struct inode *inode, struct file *file)
+{
+ struct dentry *ecryptfs_dentry = file->f_path.dentry;
+ /* Private value of ecryptfs_dentry allocated in
+ * ecryptfs_lookup() */
+ struct ecryptfs_file_info *file_info;
+ struct file *lower_file;
+
+ /* Released in ecryptfs_release or end of function if failure */
+ file_info = kmem_cache_zalloc(ecryptfs_file_info_cache, GFP_KERNEL);
+ ecryptfs_set_file_private(file, file_info);
+ if (unlikely(!file_info)) {
+ ecryptfs_printk(KERN_ERR,
+ "Error attempting to allocate memory\n");
+ return -ENOMEM;
+ }
+ lower_file = dentry_open(ecryptfs_dentry_to_lower_path(ecryptfs_dentry),
+ file->f_flags, current_cred());
+ if (IS_ERR(lower_file)) {
+ printk(KERN_ERR "%s: Error attempting to initialize "
+ "the lower file for the dentry with name "
+ "[%pd]; rc = [%ld]\n", __func__,
+ ecryptfs_dentry, PTR_ERR(lower_file));
+ kmem_cache_free(ecryptfs_file_info_cache, file_info);
+ return PTR_ERR(lower_file);
+ }
+ ecryptfs_set_file_lower(file, lower_file);
+ return 0;
+}
+
static int ecryptfs_flush(struct file *file, fl_owner_t td)
{
struct file *lower_file = ecryptfs_file_to_lower(file);
@@ -267,6 +297,19 @@ static int ecryptfs_release(struct inode *inode, struct file *file)
return 0;
}
+static int ecryptfs_dir_release(struct inode *inode, struct file *file)
+{
+ fput(ecryptfs_file_to_lower(file));
+ kmem_cache_free(ecryptfs_file_info_cache,
+ ecryptfs_file_to_private(file));
+ return 0;
+}
+
+static loff_t ecryptfs_dir_llseek(struct file *file, loff_t offset, int whence)
+{
+ return vfs_llseek(ecryptfs_file_to_lower(file), offset, whence);
+}
+
static int
ecryptfs_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
@@ -346,20 +389,16 @@ const struct file_operations ecryptfs_dir_fops = {
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
#endif
- .open = ecryptfs_open,
- .flush = ecryptfs_flush,
- .release = ecryptfs_release,
+ .open = ecryptfs_dir_open,
+ .release = ecryptfs_dir_release,
.fsync = ecryptfs_fsync,
- .fasync = ecryptfs_fasync,
- .splice_read = generic_file_splice_read,
- .llseek = default_llseek,
+ .llseek = ecryptfs_dir_llseek,
};
const struct file_operations ecryptfs_main_fops = {
.llseek = generic_file_llseek,
.read_iter = ecryptfs_read_update_atime,
.write_iter = generic_file_write_iter,
- .iterate = ecryptfs_readdir,
.unlocked_ioctl = ecryptfs_unlocked_ioctl,
#ifdef CONFIG_COMPAT
.compat_ioctl = ecryptfs_compat_ioctl,
diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c
index 4e685ac10..224b49e71 100644
--- a/fs/ecryptfs/inode.c
+++ b/fs/ecryptfs/inode.c
@@ -29,7 +29,6 @@
#include <linux/dcache.h>
#include <linux/namei.h>
#include <linux/mount.h>
-#include <linux/crypto.h>
#include <linux/fs_stack.h>
#include <linux/slab.h>
#include <linux/xattr.h>
@@ -397,11 +396,9 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
int rc = 0;
lower_dir_dentry = ecryptfs_dentry_to_lower(ecryptfs_dentry->d_parent);
- inode_lock(d_inode(lower_dir_dentry));
- lower_dentry = lookup_one_len(ecryptfs_dentry->d_name.name,
+ lower_dentry = lookup_one_len_unlocked(ecryptfs_dentry->d_name.name,
lower_dir_dentry,
ecryptfs_dentry->d_name.len);
- inode_unlock(d_inode(lower_dir_dentry));
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -419,18 +416,16 @@ static struct dentry *ecryptfs_lookup(struct inode *ecryptfs_dir_inode,
dput(lower_dentry);
rc = ecryptfs_encrypt_and_encode_filename(
&encrypted_and_encoded_name, &encrypted_and_encoded_name_size,
- NULL, mount_crypt_stat, ecryptfs_dentry->d_name.name,
+ mount_crypt_stat, ecryptfs_dentry->d_name.name,
ecryptfs_dentry->d_name.len);
if (rc) {
printk(KERN_ERR "%s: Error attempting to encrypt and encode "
"filename; rc = [%d]\n", __func__, rc);
goto out;
}
- inode_lock(d_inode(lower_dir_dentry));
- lower_dentry = lookup_one_len(encrypted_and_encoded_name,
+ lower_dentry = lookup_one_len_unlocked(encrypted_and_encoded_name,
lower_dir_dentry,
encrypted_and_encoded_name_size);
- inode_unlock(d_inode(lower_dir_dentry));
if (IS_ERR(lower_dentry)) {
rc = PTR_ERR(lower_dentry);
ecryptfs_printk(KERN_DEBUG, "%s: lookup_one_len() returned "
@@ -502,7 +497,6 @@ static int ecryptfs_symlink(struct inode *dir, struct dentry *dentry,
dir->i_sb)->mount_crypt_stat;
rc = ecryptfs_encrypt_and_encode_filename(&encoded_symname,
&encoded_symlen,
- NULL,
mount_crypt_stat, symname,
strlen(symname));
if (rc)
@@ -769,10 +763,10 @@ static int truncate_upper(struct dentry *dentry, struct iattr *ia,
} else { /* ia->ia_size < i_size_read(inode) */
/* We're chopping off all the pages down to the page
* in which ia->ia_size is located. Fill in the end of
- * that page from (ia->ia_size & ~PAGE_CACHE_MASK) to
- * PAGE_CACHE_SIZE with zeros. */
- size_t num_zeros = (PAGE_CACHE_SIZE
- - (ia->ia_size & ~PAGE_CACHE_MASK));
+ * that page from (ia->ia_size & ~PAGE_MASK) to
+ * PAGE_SIZE with zeros. */
+ size_t num_zeros = (PAGE_SIZE
+ - (ia->ia_size & ~PAGE_MASK));
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
truncate_setsize(inode, ia->ia_size);
diff --git a/fs/ecryptfs/keystore.c b/fs/ecryptfs/keystore.c
index 6bd67e201..3cf1546dc 100644
--- a/fs/ecryptfs/keystore.c
+++ b/fs/ecryptfs/keystore.c
@@ -25,11 +25,12 @@
* 02111-1307, USA.
*/
+#include <crypto/hash.h>
+#include <crypto/skcipher.h>
#include <linux/string.h>
#include <linux/pagemap.h>
#include <linux/key.h>
#include <linux/random.h>
-#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include "ecryptfs_kernel.h"
@@ -601,12 +602,13 @@ struct ecryptfs_write_tag_70_packet_silly_stack {
struct ecryptfs_auth_tok *auth_tok;
struct scatterlist src_sg[2];
struct scatterlist dst_sg[2];
- struct blkcipher_desc desc;
+ struct crypto_skcipher *skcipher_tfm;
+ struct skcipher_request *skcipher_req;
char iv[ECRYPTFS_MAX_IV_BYTES];
char hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
char tmp_hash[ECRYPTFS_TAG_70_DIGEST_SIZE];
- struct hash_desc hash_desc;
- struct scatterlist hash_sg;
+ struct crypto_shash *hash_tfm;
+ struct shash_desc *hash_desc;
};
/**
@@ -629,14 +631,12 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
struct key *auth_tok_key = NULL;
int rc = 0;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s) {
printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
"[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
- rc = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
- s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
(*packet_size) = 0;
rc = ecryptfs_find_auth_tok_for_sig(
&auth_tok_key,
@@ -649,7 +649,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
goto out;
}
rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(
- &s->desc.tfm,
+ &s->skcipher_tfm,
&s->tfm_mutex, mount_crypt_stat->global_default_fn_cipher_name);
if (unlikely(rc)) {
printk(KERN_ERR "Internal error whilst attempting to get "
@@ -658,7 +658,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
goto out;
}
mutex_lock(s->tfm_mutex);
- s->block_size = crypto_blkcipher_blocksize(s->desc.tfm);
+ s->block_size = crypto_skcipher_blocksize(s->skcipher_tfm);
/* Plus one for the \0 separator between the random prefix
* and the plaintext filename */
s->num_rand_bytes = (ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES + 1);
@@ -691,6 +691,19 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
rc = -EINVAL;
goto out_unlock;
}
+
+ s->skcipher_req = skcipher_request_alloc(s->skcipher_tfm, GFP_KERNEL);
+ if (!s->skcipher_req) {
+ printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
+ "skcipher_request_alloc for %s\n", __func__,
+ crypto_skcipher_driver_name(s->skcipher_tfm));
+ rc = -ENOMEM;
+ goto out_unlock;
+ }
+
+ skcipher_request_set_callback(s->skcipher_req,
+ CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
s->block_aligned_filename = kzalloc(s->block_aligned_filename_size,
GFP_KERNEL);
if (!s->block_aligned_filename) {
@@ -700,7 +713,6 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
rc = -ENOMEM;
goto out_unlock;
}
- s->i = 0;
dest[s->i++] = ECRYPTFS_TAG_70_PACKET_TYPE;
rc = ecryptfs_write_packet_length(&dest[s->i],
(ECRYPTFS_SIG_SIZE
@@ -738,40 +750,36 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
"password tokens\n", __func__);
goto out_free_unlock;
}
- sg_init_one(
- &s->hash_sg,
- (u8 *)s->auth_tok->token.password.session_key_encryption_key,
- s->auth_tok->token.password.session_key_encryption_key_bytes);
- s->hash_desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- s->hash_desc.tfm = crypto_alloc_hash(ECRYPTFS_TAG_70_DIGEST, 0,
- CRYPTO_ALG_ASYNC);
- if (IS_ERR(s->hash_desc.tfm)) {
- rc = PTR_ERR(s->hash_desc.tfm);
+ s->hash_tfm = crypto_alloc_shash(ECRYPTFS_TAG_70_DIGEST, 0, 0);
+ if (IS_ERR(s->hash_tfm)) {
+ rc = PTR_ERR(s->hash_tfm);
printk(KERN_ERR "%s: Error attempting to "
"allocate hash crypto context; rc = [%d]\n",
__func__, rc);
goto out_free_unlock;
}
- rc = crypto_hash_init(&s->hash_desc);
- if (rc) {
- printk(KERN_ERR
- "%s: Error initializing crypto hash; rc = [%d]\n",
- __func__, rc);
- goto out_release_free_unlock;
- }
- rc = crypto_hash_update(
- &s->hash_desc, &s->hash_sg,
- s->auth_tok->token.password.session_key_encryption_key_bytes);
- if (rc) {
- printk(KERN_ERR
- "%s: Error updating crypto hash; rc = [%d]\n",
- __func__, rc);
+
+ s->hash_desc = kmalloc(sizeof(*s->hash_desc) +
+ crypto_shash_descsize(s->hash_tfm), GFP_KERNEL);
+ if (!s->hash_desc) {
+ printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
+ "kmalloc [%zd] bytes\n", __func__,
+ sizeof(*s->hash_desc) +
+ crypto_shash_descsize(s->hash_tfm));
+ rc = -ENOMEM;
goto out_release_free_unlock;
}
- rc = crypto_hash_final(&s->hash_desc, s->hash);
+
+ s->hash_desc->tfm = s->hash_tfm;
+ s->hash_desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ rc = crypto_shash_digest(s->hash_desc,
+ (u8 *)s->auth_tok->token.password.session_key_encryption_key,
+ s->auth_tok->token.password.session_key_encryption_key_bytes,
+ s->hash);
if (rc) {
printk(KERN_ERR
- "%s: Error finalizing crypto hash; rc = [%d]\n",
+ "%s: Error computing crypto hash; rc = [%d]\n",
__func__, rc);
goto out_release_free_unlock;
}
@@ -780,27 +788,12 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
s->hash[(s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)];
if ((s->j % ECRYPTFS_TAG_70_DIGEST_SIZE)
== (ECRYPTFS_TAG_70_DIGEST_SIZE - 1)) {
- sg_init_one(&s->hash_sg, (u8 *)s->hash,
- ECRYPTFS_TAG_70_DIGEST_SIZE);
- rc = crypto_hash_init(&s->hash_desc);
- if (rc) {
- printk(KERN_ERR
- "%s: Error initializing crypto hash; "
- "rc = [%d]\n", __func__, rc);
- goto out_release_free_unlock;
- }
- rc = crypto_hash_update(&s->hash_desc, &s->hash_sg,
- ECRYPTFS_TAG_70_DIGEST_SIZE);
+ rc = crypto_shash_digest(s->hash_desc, (u8 *)s->hash,
+ ECRYPTFS_TAG_70_DIGEST_SIZE,
+ s->tmp_hash);
if (rc) {
printk(KERN_ERR
- "%s: Error updating crypto hash; "
- "rc = [%d]\n", __func__, rc);
- goto out_release_free_unlock;
- }
- rc = crypto_hash_final(&s->hash_desc, s->tmp_hash);
- if (rc) {
- printk(KERN_ERR
- "%s: Error finalizing crypto hash; "
+ "%s: Error computing crypto hash; "
"rc = [%d]\n", __func__, rc);
goto out_release_free_unlock;
}
@@ -834,10 +827,8 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
* of the IV here, so we just use 0's for the IV. Note the
* constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES
* >= ECRYPTFS_MAX_IV_BYTES. */
- memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
- s->desc.info = s->iv;
- rc = crypto_blkcipher_setkey(
- s->desc.tfm,
+ rc = crypto_skcipher_setkey(
+ s->skcipher_tfm,
s->auth_tok->token.password.session_key_encryption_key,
mount_crypt_stat->global_default_fn_cipher_key_bytes);
if (rc < 0) {
@@ -850,8 +841,9 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
mount_crypt_stat->global_default_fn_cipher_key_bytes);
goto out_release_free_unlock;
}
- rc = crypto_blkcipher_encrypt_iv(&s->desc, s->dst_sg, s->src_sg,
- s->block_aligned_filename_size);
+ skcipher_request_set_crypt(s->skcipher_req, s->src_sg, s->dst_sg,
+ s->block_aligned_filename_size, s->iv);
+ rc = crypto_skcipher_encrypt(s->skcipher_req);
if (rc) {
printk(KERN_ERR "%s: Error attempting to encrypt filename; "
"rc = [%d]\n", __func__, rc);
@@ -861,7 +853,7 @@ ecryptfs_write_tag_70_packet(char *dest, size_t *remaining_bytes,
(*packet_size) = s->i;
(*remaining_bytes) -= (*packet_size);
out_release_free_unlock:
- crypto_free_hash(s->hash_desc.tfm);
+ crypto_free_shash(s->hash_tfm);
out_free_unlock:
kzfree(s->block_aligned_filename);
out_unlock:
@@ -871,6 +863,8 @@ out:
up_write(&(auth_tok_key->sem));
key_put(auth_tok_key);
}
+ skcipher_request_free(s->skcipher_req);
+ kzfree(s->hash_desc);
kfree(s);
return rc;
}
@@ -888,7 +882,8 @@ struct ecryptfs_parse_tag_70_packet_silly_stack {
struct ecryptfs_auth_tok *auth_tok;
struct scatterlist src_sg[2];
struct scatterlist dst_sg[2];
- struct blkcipher_desc desc;
+ struct crypto_skcipher *skcipher_tfm;
+ struct skcipher_request *skcipher_req;
char fnek_sig_hex[ECRYPTFS_SIG_SIZE_HEX + 1];
char iv[ECRYPTFS_MAX_IV_BYTES];
char cipher_string[ECRYPTFS_MAX_CIPHER_NAME_SIZE + 1];
@@ -922,14 +917,12 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
(*packet_size) = 0;
(*filename_size) = 0;
(*filename) = NULL;
- s = kmalloc(sizeof(*s), GFP_KERNEL);
+ s = kzalloc(sizeof(*s), GFP_KERNEL);
if (!s) {
printk(KERN_ERR "%s: Out of memory whilst trying to kmalloc "
"[%zd] bytes of kernel memory\n", __func__, sizeof(*s));
- rc = -ENOMEM;
- goto out;
+ return -ENOMEM;
}
- s->desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
if (max_packet_size < ECRYPTFS_TAG_70_MIN_METADATA_SIZE) {
printk(KERN_WARNING "%s: max_packet_size is [%zd]; it must be "
"at least [%d]\n", __func__, max_packet_size,
@@ -992,7 +985,7 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
rc);
goto out;
}
- rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->desc.tfm,
+ rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&s->skcipher_tfm,
&s->tfm_mutex,
s->cipher_string);
if (unlikely(rc)) {
@@ -1030,12 +1023,23 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
__func__, rc, s->block_aligned_filename_size);
goto out_free_unlock;
}
+
+ s->skcipher_req = skcipher_request_alloc(s->skcipher_tfm, GFP_KERNEL);
+ if (!s->skcipher_req) {
+ printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
+ "skcipher_request_alloc for %s\n", __func__,
+ crypto_skcipher_driver_name(s->skcipher_tfm));
+ rc = -ENOMEM;
+ goto out_free_unlock;
+ }
+
+ skcipher_request_set_callback(s->skcipher_req,
+ CRYPTO_TFM_REQ_MAY_SLEEP, NULL, NULL);
+
/* The characters in the first block effectively do the job of
* the IV here, so we just use 0's for the IV. Note the
* constraint that ECRYPTFS_FILENAME_MIN_RANDOM_PREPEND_BYTES
* >= ECRYPTFS_MAX_IV_BYTES. */
- memset(s->iv, 0, ECRYPTFS_MAX_IV_BYTES);
- s->desc.info = s->iv;
/* TODO: Support other key modules than passphrase for
* filename encryption */
if (s->auth_tok->token_type != ECRYPTFS_PASSWORD) {
@@ -1044,8 +1048,8 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
"password tokens\n", __func__);
goto out_free_unlock;
}
- rc = crypto_blkcipher_setkey(
- s->desc.tfm,
+ rc = crypto_skcipher_setkey(
+ s->skcipher_tfm,
s->auth_tok->token.password.session_key_encryption_key,
mount_crypt_stat->global_default_fn_cipher_key_bytes);
if (rc < 0) {
@@ -1058,14 +1062,14 @@ ecryptfs_parse_tag_70_packet(char **filename, size_t *filename_size,
mount_crypt_stat->global_default_fn_cipher_key_bytes);
goto out_free_unlock;
}
- rc = crypto_blkcipher_decrypt_iv(&s->desc, s->dst_sg, s->src_sg,
- s->block_aligned_filename_size);
+ skcipher_request_set_crypt(s->skcipher_req, s->src_sg, s->dst_sg,
+ s->block_aligned_filename_size, s->iv);
+ rc = crypto_skcipher_decrypt(s->skcipher_req);
if (rc) {
printk(KERN_ERR "%s: Error attempting to decrypt filename; "
"rc = [%d]\n", __func__, rc);
goto out_free_unlock;
}
- s->i = 0;
while (s->decrypted_filename[s->i] != '\0'
&& s->i < s->block_aligned_filename_size)
s->i++;
@@ -1108,6 +1112,7 @@ out:
up_write(&(auth_tok_key->sem));
key_put(auth_tok_key);
}
+ skcipher_request_free(s->skcipher_req);
kfree(s);
return rc;
}
@@ -1667,9 +1672,8 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
struct scatterlist dst_sg[2];
struct scatterlist src_sg[2];
struct mutex *tfm_mutex;
- struct blkcipher_desc desc = {
- .flags = CRYPTO_TFM_REQ_MAY_SLEEP
- };
+ struct crypto_skcipher *tfm;
+ struct skcipher_request *req = NULL;
int rc = 0;
if (unlikely(ecryptfs_verbosity > 0)) {
@@ -1680,7 +1684,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
auth_tok->token.password.session_key_encryption_key,
auth_tok->token.password.session_key_encryption_key_bytes);
}
- rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+ rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&tfm, &tfm_mutex,
crypt_stat->cipher);
if (unlikely(rc)) {
printk(KERN_ERR "Internal error whilst attempting to get "
@@ -1711,8 +1715,20 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
goto out;
}
mutex_lock(tfm_mutex);
- rc = crypto_blkcipher_setkey(
- desc.tfm, auth_tok->token.password.session_key_encryption_key,
+ req = skcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ mutex_unlock(tfm_mutex);
+ printk(KERN_ERR "%s: Out of kernel memory whilst attempting to "
+ "skcipher_request_alloc for %s\n", __func__,
+ crypto_skcipher_driver_name(tfm));
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
+ NULL, NULL);
+ rc = crypto_skcipher_setkey(
+ tfm, auth_tok->token.password.session_key_encryption_key,
crypt_stat->key_size);
if (unlikely(rc < 0)) {
mutex_unlock(tfm_mutex);
@@ -1720,8 +1736,10 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
rc = -EINVAL;
goto out;
}
- rc = crypto_blkcipher_decrypt(&desc, dst_sg, src_sg,
- auth_tok->session_key.encrypted_key_size);
+ skcipher_request_set_crypt(req, src_sg, dst_sg,
+ auth_tok->session_key.encrypted_key_size,
+ NULL);
+ rc = crypto_skcipher_decrypt(req);
mutex_unlock(tfm_mutex);
if (unlikely(rc)) {
printk(KERN_ERR "Error decrypting; rc = [%d]\n", rc);
@@ -1738,6 +1756,7 @@ decrypt_passphrase_encrypted_session_key(struct ecryptfs_auth_tok *auth_tok,
crypt_stat->key_size);
}
out:
+ skcipher_request_free(req);
return rc;
}
@@ -1779,7 +1798,7 @@ int ecryptfs_parse_packet_set(struct ecryptfs_crypt_stat *crypt_stat,
* added the our &auth_tok_list */
next_packet_is_auth_tok_packet = 1;
while (next_packet_is_auth_tok_packet) {
- size_t max_packet_size = ((PAGE_CACHE_SIZE - 8) - i);
+ size_t max_packet_size = ((PAGE_SIZE - 8) - i);
switch (src[i]) {
case ECRYPTFS_TAG_3_PACKET_TYPE:
@@ -2191,16 +2210,14 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
size_t max_packet_size;
struct ecryptfs_mount_crypt_stat *mount_crypt_stat =
crypt_stat->mount_crypt_stat;
- struct blkcipher_desc desc = {
- .tfm = NULL,
- .flags = CRYPTO_TFM_REQ_MAY_SLEEP
- };
+ struct crypto_skcipher *tfm;
+ struct skcipher_request *req;
int rc = 0;
(*packet_size) = 0;
ecryptfs_from_hex(key_rec->sig, auth_tok->token.password.signature,
ECRYPTFS_SIG_SIZE);
- rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&desc.tfm, &tfm_mutex,
+ rc = ecryptfs_get_tfm_and_mutex_for_cipher_name(&tfm, &tfm_mutex,
crypt_stat->cipher);
if (unlikely(rc)) {
printk(KERN_ERR "Internal error whilst attempting to get "
@@ -2209,12 +2226,11 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
goto out;
}
if (mount_crypt_stat->global_default_cipher_key_size == 0) {
- struct blkcipher_alg *alg = crypto_blkcipher_alg(desc.tfm);
-
printk(KERN_WARNING "No key size specified at mount; "
- "defaulting to [%d]\n", alg->max_keysize);
+ "defaulting to [%d]\n",
+ crypto_skcipher_default_keysize(tfm));
mount_crypt_stat->global_default_cipher_key_size =
- alg->max_keysize;
+ crypto_skcipher_default_keysize(tfm);
}
if (crypt_stat->key_size == 0)
crypt_stat->key_size =
@@ -2284,20 +2300,36 @@ write_tag_3_packet(char *dest, size_t *remaining_bytes,
goto out;
}
mutex_lock(tfm_mutex);
- rc = crypto_blkcipher_setkey(desc.tfm, session_key_encryption_key,
- crypt_stat->key_size);
+ rc = crypto_skcipher_setkey(tfm, session_key_encryption_key,
+ crypt_stat->key_size);
if (rc < 0) {
mutex_unlock(tfm_mutex);
ecryptfs_printk(KERN_ERR, "Error setting key for crypto "
"context; rc = [%d]\n", rc);
goto out;
}
+
+ req = skcipher_request_alloc(tfm, GFP_KERNEL);
+ if (!req) {
+ mutex_unlock(tfm_mutex);
+ ecryptfs_printk(KERN_ERR, "Out of kernel memory whilst "
+ "attempting to skcipher_request_alloc for "
+ "%s\n", crypto_skcipher_driver_name(tfm));
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ skcipher_request_set_callback(req, CRYPTO_TFM_REQ_MAY_SLEEP,
+ NULL, NULL);
+
rc = 0;
ecryptfs_printk(KERN_DEBUG, "Encrypting [%zd] bytes of the key\n",
crypt_stat->key_size);
- rc = crypto_blkcipher_encrypt(&desc, dst_sg, src_sg,
- (*key_rec).enc_key_size);
+ skcipher_request_set_crypt(req, src_sg, dst_sg,
+ (*key_rec).enc_key_size, NULL);
+ rc = crypto_skcipher_encrypt(req);
mutex_unlock(tfm_mutex);
+ skcipher_request_free(req);
if (rc) {
printk(KERN_ERR "Error encrypting; rc = [%d]\n", rc);
goto out;
diff --git a/fs/ecryptfs/main.c b/fs/ecryptfs/main.c
index e25b6b06b..1698132d0 100644
--- a/fs/ecryptfs/main.c
+++ b/fs/ecryptfs/main.c
@@ -29,7 +29,6 @@
#include <linux/module.h>
#include <linux/namei.h>
#include <linux/skbuff.h>
-#include <linux/crypto.h>
#include <linux/mount.h>
#include <linux/pagemap.h>
#include <linux/key.h>
@@ -696,12 +695,12 @@ static struct ecryptfs_cache_info {
{
.cache = &ecryptfs_header_cache,
.name = "ecryptfs_headers",
- .size = PAGE_CACHE_SIZE,
+ .size = PAGE_SIZE,
},
{
.cache = &ecryptfs_xattr_cache,
.name = "ecryptfs_xattr_cache",
- .size = PAGE_CACHE_SIZE,
+ .size = PAGE_SIZE,
},
{
.cache = &ecryptfs_key_record_cache,
@@ -819,7 +818,7 @@ static int __init ecryptfs_init(void)
{
int rc;
- if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_CACHE_SIZE) {
+ if (ECRYPTFS_DEFAULT_EXTENT_SIZE > PAGE_SIZE) {
rc = -EINVAL;
ecryptfs_printk(KERN_ERR, "The eCryptfs extent size is "
"larger than the host's page size, and so "
@@ -827,7 +826,7 @@ static int __init ecryptfs_init(void)
"default eCryptfs extent size is [%u] bytes; "
"the page size is [%lu] bytes.\n",
ECRYPTFS_DEFAULT_EXTENT_SIZE,
- (unsigned long)PAGE_CACHE_SIZE);
+ (unsigned long)PAGE_SIZE);
goto out;
}
rc = ecryptfs_init_kmem_caches();
diff --git a/fs/ecryptfs/mmap.c b/fs/ecryptfs/mmap.c
index c6ced4cbf..e6b1d8095 100644
--- a/fs/ecryptfs/mmap.c
+++ b/fs/ecryptfs/mmap.c
@@ -30,7 +30,6 @@
#include <linux/page-flags.h>
#include <linux/mount.h>
#include <linux/file.h>
-#include <linux/crypto.h>
#include <linux/scatterlist.h>
#include <linux/slab.h>
#include <asm/unaligned.h>
@@ -123,7 +122,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
struct ecryptfs_crypt_stat *crypt_stat)
{
loff_t extent_num_in_page = 0;
- loff_t num_extents_per_page = (PAGE_CACHE_SIZE
+ loff_t num_extents_per_page = (PAGE_SIZE
/ crypt_stat->extent_size);
int rc = 0;
@@ -139,7 +138,7 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
char *page_virt;
page_virt = kmap_atomic(page);
- memset(page_virt, 0, PAGE_CACHE_SIZE);
+ memset(page_virt, 0, PAGE_SIZE);
/* TODO: Support more than one header extent */
if (view_extent_num == 0) {
size_t written;
@@ -165,8 +164,8 @@ ecryptfs_copy_up_encrypted_with_header(struct page *page,
- crypt_stat->metadata_size);
rc = ecryptfs_read_lower_page_segment(
- page, (lower_offset >> PAGE_CACHE_SHIFT),
- (lower_offset & ~PAGE_CACHE_MASK),
+ page, (lower_offset >> PAGE_SHIFT),
+ (lower_offset & ~PAGE_MASK),
crypt_stat->extent_size, page->mapping->host);
if (rc) {
printk(KERN_ERR "%s: Error attempting to read "
@@ -199,7 +198,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
if (!crypt_stat || !(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
rc = ecryptfs_read_lower_page_segment(page, page->index, 0,
- PAGE_CACHE_SIZE,
+ PAGE_SIZE,
page->mapping->host);
} else if (crypt_stat->flags & ECRYPTFS_VIEW_AS_ENCRYPTED) {
if (crypt_stat->flags & ECRYPTFS_METADATA_IN_XATTR) {
@@ -216,7 +215,7 @@ static int ecryptfs_readpage(struct file *file, struct page *page)
} else {
rc = ecryptfs_read_lower_page_segment(
- page, page->index, 0, PAGE_CACHE_SIZE,
+ page, page->index, 0, PAGE_SIZE,
page->mapping->host);
if (rc) {
printk(KERN_ERR "Error reading page; rc = "
@@ -251,12 +250,12 @@ static int fill_zeros_to_end_of_page(struct page *page, unsigned int to)
struct inode *inode = page->mapping->host;
int end_byte_in_page;
- if ((i_size_read(inode) / PAGE_CACHE_SIZE) != page->index)
+ if ((i_size_read(inode) / PAGE_SIZE) != page->index)
goto out;
- end_byte_in_page = i_size_read(inode) % PAGE_CACHE_SIZE;
+ end_byte_in_page = i_size_read(inode) % PAGE_SIZE;
if (to > end_byte_in_page)
end_byte_in_page = to;
- zero_user_segment(page, end_byte_in_page, PAGE_CACHE_SIZE);
+ zero_user_segment(page, end_byte_in_page, PAGE_SIZE);
out:
return 0;
}
@@ -280,7 +279,7 @@ static int ecryptfs_write_begin(struct file *file,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
struct page *page;
loff_t prev_page_end_size;
int rc = 0;
@@ -290,14 +289,14 @@ static int ecryptfs_write_begin(struct file *file,
return -ENOMEM;
*pagep = page;
- prev_page_end_size = ((loff_t)index << PAGE_CACHE_SHIFT);
+ prev_page_end_size = ((loff_t)index << PAGE_SHIFT);
if (!PageUptodate(page)) {
struct ecryptfs_crypt_stat *crypt_stat =
&ecryptfs_inode_to_private(mapping->host)->crypt_stat;
if (!(crypt_stat->flags & ECRYPTFS_ENCRYPTED)) {
rc = ecryptfs_read_lower_page_segment(
- page, index, 0, PAGE_CACHE_SIZE, mapping->host);
+ page, index, 0, PAGE_SIZE, mapping->host);
if (rc) {
printk(KERN_ERR "%s: Error attempting to read "
"lower page segment; rc = [%d]\n",
@@ -323,7 +322,7 @@ static int ecryptfs_write_begin(struct file *file,
SetPageUptodate(page);
} else {
rc = ecryptfs_read_lower_page_segment(
- page, index, 0, PAGE_CACHE_SIZE,
+ page, index, 0, PAGE_SIZE,
mapping->host);
if (rc) {
printk(KERN_ERR "%s: Error reading "
@@ -337,9 +336,9 @@ static int ecryptfs_write_begin(struct file *file,
} else {
if (prev_page_end_size
>= i_size_read(page->mapping->host)) {
- zero_user(page, 0, PAGE_CACHE_SIZE);
+ zero_user(page, 0, PAGE_SIZE);
SetPageUptodate(page);
- } else if (len < PAGE_CACHE_SIZE) {
+ } else if (len < PAGE_SIZE) {
rc = ecryptfs_decrypt_page(page);
if (rc) {
printk(KERN_ERR "%s: Error decrypting "
@@ -372,11 +371,11 @@ static int ecryptfs_write_begin(struct file *file,
* of page? Zero it out. */
if ((i_size_read(mapping->host) == prev_page_end_size)
&& (pos != 0))
- zero_user(page, 0, PAGE_CACHE_SIZE);
+ zero_user(page, 0, PAGE_SIZE);
out:
if (unlikely(rc)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
*pagep = NULL;
}
return rc;
@@ -438,7 +437,7 @@ static int ecryptfs_write_inode_size_to_xattr(struct inode *ecryptfs_inode)
}
inode_lock(lower_inode);
size = lower_inode->i_op->getxattr(lower_dentry, ECRYPTFS_XATTR_NAME,
- xattr_virt, PAGE_CACHE_SIZE);
+ xattr_virt, PAGE_SIZE);
if (size < 0)
size = 8;
put_unaligned_be64(i_size_read(ecryptfs_inode), xattr_virt);
@@ -480,8 +479,8 @@ static int ecryptfs_write_end(struct file *file,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ pgoff_t index = pos >> PAGE_SHIFT;
+ unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + copied;
struct inode *ecryptfs_inode = mapping->host;
struct ecryptfs_crypt_stat *crypt_stat =
@@ -501,7 +500,7 @@ static int ecryptfs_write_end(struct file *file,
goto out;
}
if (!PageUptodate(page)) {
- if (copied < PAGE_CACHE_SIZE) {
+ if (copied < PAGE_SIZE) {
rc = 0;
goto out;
}
@@ -534,7 +533,7 @@ static int ecryptfs_write_end(struct file *file,
rc = copied;
out:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return rc;
}
diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c
index 09fe62227..158a3a39f 100644
--- a/fs/ecryptfs/read_write.c
+++ b/fs/ecryptfs/read_write.c
@@ -74,7 +74,7 @@ int ecryptfs_write_lower_page_segment(struct inode *ecryptfs_inode,
loff_t offset;
int rc;
- offset = ((((loff_t)page_for_lower->index) << PAGE_CACHE_SHIFT)
+ offset = ((((loff_t)page_for_lower->index) << PAGE_SHIFT)
+ offset_in_page);
virt = kmap(page_for_lower);
rc = ecryptfs_write_lower(ecryptfs_inode, virt, offset, size);
@@ -123,9 +123,9 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
else
pos = offset;
while (pos < (offset + size)) {
- pgoff_t ecryptfs_page_idx = (pos >> PAGE_CACHE_SHIFT);
- size_t start_offset_in_page = (pos & ~PAGE_CACHE_MASK);
- size_t num_bytes = (PAGE_CACHE_SIZE - start_offset_in_page);
+ pgoff_t ecryptfs_page_idx = (pos >> PAGE_SHIFT);
+ size_t start_offset_in_page = (pos & ~PAGE_MASK);
+ size_t num_bytes = (PAGE_SIZE - start_offset_in_page);
loff_t total_remaining_bytes = ((offset + size) - pos);
if (fatal_signal_pending(current)) {
@@ -165,7 +165,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
* Fill in zero values to the end of the page */
memset(((char *)ecryptfs_page_virt
+ start_offset_in_page), 0,
- PAGE_CACHE_SIZE - start_offset_in_page);
+ PAGE_SIZE - start_offset_in_page);
}
/* pos >= offset, we are now writing the data request */
@@ -186,7 +186,7 @@ int ecryptfs_write(struct inode *ecryptfs_inode, char *data, loff_t offset,
ecryptfs_page,
start_offset_in_page,
data_offset);
- page_cache_release(ecryptfs_page);
+ put_page(ecryptfs_page);
if (rc) {
printk(KERN_ERR "%s: Error encrypting "
"page; rc = [%d]\n", __func__, rc);
@@ -262,7 +262,7 @@ int ecryptfs_read_lower_page_segment(struct page *page_for_ecryptfs,
loff_t offset;
int rc;
- offset = ((((loff_t)page_index) << PAGE_CACHE_SHIFT) + offset_in_page);
+ offset = ((((loff_t)page_index) << PAGE_SHIFT) + offset_in_page);
virt = kmap(page_for_ecryptfs);
rc = ecryptfs_read_lower(virt, offset, size, ecryptfs_inode);
if (rc > 0)
diff --git a/fs/ecryptfs/super.c b/fs/ecryptfs/super.c
index afa1b81c3..77a486d3a 100644
--- a/fs/ecryptfs/super.c
+++ b/fs/ecryptfs/super.c
@@ -29,7 +29,6 @@
#include <linux/slab.h>
#include <linux/seq_file.h>
#include <linux/file.h>
-#include <linux/crypto.h>
#include <linux/statfs.h>
#include <linux/magic.h>
#include "ecryptfs_kernel.h"
diff --git a/fs/efivarfs/super.c b/fs/efivarfs/super.c
index dd029d13e..553c5d2db 100644
--- a/fs/efivarfs/super.c
+++ b/fs/efivarfs/super.c
@@ -197,8 +197,8 @@ static int efivarfs_fill_super(struct super_block *sb, void *data, int silent)
efivarfs_sb = sb;
sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = EFIVARFS_MAGIC;
sb->s_op = &efivarfs_ops;
sb->s_d_op = &efivarfs_d_ops;
diff --git a/fs/eventfd.c b/fs/eventfd.c
index ed70cf9fd..1231cd199 100644
--- a/fs/eventfd.c
+++ b/fs/eventfd.c
@@ -121,8 +121,46 @@ static unsigned int eventfd_poll(struct file *file, poll_table *wait)
u64 count;
poll_wait(file, &ctx->wqh, wait);
- smp_rmb();
- count = ctx->count;
+
+ /*
+ * All writes to ctx->count occur within ctx->wqh.lock. This read
+ * can be done outside ctx->wqh.lock because we know that poll_wait
+ * takes that lock (through add_wait_queue) if our caller will sleep.
+ *
+ * The read _can_ therefore seep into add_wait_queue's critical
+ * section, but cannot move above it! add_wait_queue's spin_lock acts
+ * as an acquire barrier and ensures that the read be ordered properly
+ * against the writes. The following CAN happen and is safe:
+ *
+ * poll write
+ * ----------------- ------------
+ * lock ctx->wqh.lock (in poll_wait)
+ * count = ctx->count
+ * __add_wait_queue
+ * unlock ctx->wqh.lock
+ * lock ctx->qwh.lock
+ * ctx->count += n
+ * if (waitqueue_active)
+ * wake_up_locked_poll
+ * unlock ctx->qwh.lock
+ * eventfd_poll returns 0
+ *
+ * but the following, which would miss a wakeup, cannot happen:
+ *
+ * poll write
+ * ----------------- ------------
+ * count = ctx->count (INVALID!)
+ * lock ctx->qwh.lock
+ * ctx->count += n
+ * **waitqueue_active is false**
+ * **no wake_up_locked_poll!**
+ * unlock ctx->qwh.lock
+ * lock ctx->wqh.lock (in poll_wait)
+ * __add_wait_queue
+ * unlock ctx->wqh.lock
+ * eventfd_poll returns 0
+ */
+ count = READ_ONCE(ctx->count);
if (count > 0)
events |= POLLIN;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index cde60741c..8a74a2a52 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -1616,7 +1616,7 @@ static int ep_poll(struct eventpoll *ep, struct epoll_event __user *events,
{
int res = 0, eavail, timed_out = 0;
unsigned long flags;
- long slack = 0;
+ u64 slack = 0;
wait_queue_t wait;
ktime_t expires, *to = NULL;
diff --git a/fs/exec.c b/fs/exec.c
index 2cc69a450..2dece9a70 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -56,6 +56,7 @@
#include <linux/pipe_fs_i.h>
#include <linux/oom.h>
#include <linux/compat.h>
+#include <linux/vmalloc.h>
#include <trace/events/fs.h>
@@ -201,8 +202,12 @@ static struct page *get_arg_page(struct linux_binprm *bprm, unsigned long pos,
return NULL;
}
#endif
- ret = get_user_pages(current, bprm->mm, pos,
- 1, write, 1, &page, NULL);
+ /*
+ * We are doing an exec(). 'current' is the process
+ * doing the exec and bprm->mm is the new process's mm.
+ */
+ ret = get_user_pages_remote(current, bprm->mm, pos, 1, write,
+ 1, &page, NULL);
if (ret <= 0)
return NULL;
@@ -836,6 +841,97 @@ int kernel_read(struct file *file, loff_t offset,
EXPORT_SYMBOL(kernel_read);
+int kernel_read_file(struct file *file, void **buf, loff_t *size,
+ loff_t max_size, enum kernel_read_file_id id)
+{
+ loff_t i_size, pos;
+ ssize_t bytes = 0;
+ int ret;
+
+ if (!S_ISREG(file_inode(file)->i_mode) || max_size < 0)
+ return -EINVAL;
+
+ ret = security_kernel_read_file(file, id);
+ if (ret)
+ return ret;
+
+ i_size = i_size_read(file_inode(file));
+ if (max_size > 0 && i_size > max_size)
+ return -EFBIG;
+ if (i_size <= 0)
+ return -EINVAL;
+
+ *buf = vmalloc(i_size);
+ if (!*buf)
+ return -ENOMEM;
+
+ pos = 0;
+ while (pos < i_size) {
+ bytes = kernel_read(file, pos, (char *)(*buf) + pos,
+ i_size - pos);
+ if (bytes < 0) {
+ ret = bytes;
+ goto out;
+ }
+
+ if (bytes == 0)
+ break;
+ pos += bytes;
+ }
+
+ if (pos != i_size) {
+ ret = -EIO;
+ goto out;
+ }
+
+ ret = security_kernel_post_read_file(file, *buf, i_size, id);
+ if (!ret)
+ *size = pos;
+
+out:
+ if (ret < 0) {
+ vfree(*buf);
+ *buf = NULL;
+ }
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kernel_read_file);
+
+int kernel_read_file_from_path(char *path, void **buf, loff_t *size,
+ loff_t max_size, enum kernel_read_file_id id)
+{
+ struct file *file;
+ int ret;
+
+ if (!path || !*path)
+ return -EINVAL;
+
+ file = filp_open(path, O_RDONLY, 0);
+ if (IS_ERR(file))
+ return PTR_ERR(file);
+
+ ret = kernel_read_file(file, buf, size, max_size, id);
+ fput(file);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kernel_read_file_from_path);
+
+int kernel_read_file_from_fd(int fd, void **buf, loff_t *size, loff_t max_size,
+ enum kernel_read_file_id id)
+{
+ struct fd f = fdget(fd);
+ int ret = -EBADF;
+
+ if (!f.file)
+ goto out;
+
+ ret = kernel_read_file(f.file, buf, size, max_size, id);
+out:
+ fdput(f);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kernel_read_file_from_fd);
+
ssize_t read_code(struct file *file, unsigned long addr, loff_t pos, size_t len)
{
ssize_t res = vfs_read(file, (void __user *)addr, len, &pos);
diff --git a/fs/exofs/dir.c b/fs/exofs/dir.c
index e5bb2abf7..547b93cbe 100644
--- a/fs/exofs/dir.c
+++ b/fs/exofs/dir.c
@@ -41,16 +41,16 @@ static inline unsigned exofs_chunk_size(struct inode *inode)
static inline void exofs_put_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
static unsigned exofs_last_byte(struct inode *inode, unsigned long page_nr)
{
loff_t last_byte = inode->i_size;
- last_byte -= page_nr << PAGE_CACHE_SHIFT;
- if (last_byte > PAGE_CACHE_SIZE)
- last_byte = PAGE_CACHE_SIZE;
+ last_byte -= page_nr << PAGE_SHIFT;
+ if (last_byte > PAGE_SIZE)
+ last_byte = PAGE_SIZE;
return last_byte;
}
@@ -85,13 +85,13 @@ static void exofs_check_page(struct page *page)
unsigned chunk_size = exofs_chunk_size(dir);
char *kaddr = page_address(page);
unsigned offs, rec_len;
- unsigned limit = PAGE_CACHE_SIZE;
+ unsigned limit = PAGE_SIZE;
struct exofs_dir_entry *p;
char *error;
/* if the page is the last one in the directory */
- if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
- limit = dir->i_size & ~PAGE_CACHE_MASK;
+ if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+ limit = dir->i_size & ~PAGE_MASK;
if (limit & (chunk_size - 1))
goto Ebadsize;
if (!limit)
@@ -138,7 +138,7 @@ bad_entry:
EXOFS_ERR(
"ERROR [exofs_check_page]: bad entry in directory(0x%lx): %s - "
"offset=%lu, inode=0x%llu, rec_len=%d, name_len=%d\n",
- dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
_LLU(le64_to_cpu(p->inode_no)),
rec_len, p->name_len);
goto fail;
@@ -147,7 +147,7 @@ Eend:
EXOFS_ERR("ERROR [exofs_check_page]: "
"entry in directory(0x%lx) spans the page boundary"
"offset=%lu, inode=0x%llx\n",
- dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
_LLU(le64_to_cpu(p->inode_no)));
fail:
SetPageChecked(page);
@@ -237,8 +237,8 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
{
loff_t pos = ctx->pos;
struct inode *inode = file_inode(file);
- unsigned int offset = pos & ~PAGE_CACHE_MASK;
- unsigned long n = pos >> PAGE_CACHE_SHIFT;
+ unsigned int offset = pos & ~PAGE_MASK;
+ unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(exofs_chunk_size(inode)-1);
int need_revalidate = (file->f_version != inode->i_version);
@@ -254,7 +254,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
if (IS_ERR(page)) {
EXOFS_ERR("ERROR: bad page in directory(0x%lx)\n",
inode->i_ino);
- ctx->pos += PAGE_CACHE_SIZE - offset;
+ ctx->pos += PAGE_SIZE - offset;
return PTR_ERR(page);
}
kaddr = page_address(page);
@@ -262,7 +262,7 @@ exofs_readdir(struct file *file, struct dir_context *ctx)
if (offset) {
offset = exofs_validate_entry(kaddr, offset,
chunk_mask);
- ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset;
+ ctx->pos = (n<<PAGE_SHIFT) + offset;
}
file->f_version = inode->i_version;
need_revalidate = 0;
@@ -449,7 +449,7 @@ int exofs_add_link(struct dentry *dentry, struct inode *inode)
kaddr = page_address(page);
dir_end = kaddr + exofs_last_byte(dir, n);
de = (struct exofs_dir_entry *)kaddr;
- kaddr += PAGE_CACHE_SIZE - reclen;
+ kaddr += PAGE_SIZE - reclen;
while ((char *)de <= kaddr) {
if ((char *)de == dir_end) {
name_len = 0;
@@ -602,7 +602,7 @@ int exofs_make_empty(struct inode *inode, struct inode *parent)
kunmap_atomic(kaddr);
err = exofs_commit_chunk(page, 0, chunk_size);
fail:
- page_cache_release(page);
+ put_page(page);
return err;
}
diff --git a/fs/exofs/inode.c b/fs/exofs/inode.c
index 9eaf595ae..49e1bd00b 100644
--- a/fs/exofs/inode.c
+++ b/fs/exofs/inode.c
@@ -317,7 +317,7 @@ static int read_exec(struct page_collect *pcol)
if (!pcol->ios) {
int ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, true,
- pcol->pg_first << PAGE_CACHE_SHIFT,
+ pcol->pg_first << PAGE_SHIFT,
pcol->length, &pcol->ios);
if (ret)
@@ -383,7 +383,7 @@ static int readpage_strip(void *data, struct page *page)
struct inode *inode = pcol->inode;
struct exofs_i_info *oi = exofs_i(inode);
loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ pgoff_t end_index = i_size >> PAGE_SHIFT;
size_t len;
int ret;
@@ -397,9 +397,9 @@ static int readpage_strip(void *data, struct page *page)
pcol->that_locked_page = page;
if (page->index < end_index)
- len = PAGE_CACHE_SIZE;
+ len = PAGE_SIZE;
else if (page->index == end_index)
- len = i_size & ~PAGE_CACHE_MASK;
+ len = i_size & ~PAGE_MASK;
else
len = 0;
@@ -442,8 +442,8 @@ try_again:
goto fail;
}
- if (len != PAGE_CACHE_SIZE)
- zero_user(page, len, PAGE_CACHE_SIZE - len);
+ if (len != PAGE_SIZE)
+ zero_user(page, len, PAGE_SIZE - len);
EXOFS_DBGMSG2(" readpage_strip(0x%lx, 0x%lx) len=0x%zx\n",
inode->i_ino, page->index, len);
@@ -609,7 +609,7 @@ static void __r4w_put_page(void *priv, struct page *page)
if ((pcol->that_locked_page != page) && (ZERO_PAGE(0) != page)) {
EXOFS_DBGMSG2("index=0x%lx\n", page->index);
- page_cache_release(page);
+ put_page(page);
return;
}
EXOFS_DBGMSG2("that_locked_page index=0x%lx\n",
@@ -633,7 +633,7 @@ static int write_exec(struct page_collect *pcol)
BUG_ON(pcol->ios);
ret = ore_get_rw_state(&pcol->sbi->layout, &oi->oc, false,
- pcol->pg_first << PAGE_CACHE_SHIFT,
+ pcol->pg_first << PAGE_SHIFT,
pcol->length, &pcol->ios);
if (unlikely(ret))
goto err;
@@ -696,7 +696,7 @@ static int writepage_strip(struct page *page,
struct inode *inode = pcol->inode;
struct exofs_i_info *oi = exofs_i(inode);
loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ pgoff_t end_index = i_size >> PAGE_SHIFT;
size_t len;
int ret;
@@ -708,9 +708,9 @@ static int writepage_strip(struct page *page,
if (page->index < end_index)
/* in this case, the page is within the limits of the file */
- len = PAGE_CACHE_SIZE;
+ len = PAGE_SIZE;
else {
- len = i_size & ~PAGE_CACHE_MASK;
+ len = i_size & ~PAGE_MASK;
if (page->index > end_index || !len) {
/* in this case, the page is outside the limits
@@ -790,10 +790,10 @@ static int exofs_writepages(struct address_space *mapping,
long start, end, expected_pages;
int ret;
- start = wbc->range_start >> PAGE_CACHE_SHIFT;
+ start = wbc->range_start >> PAGE_SHIFT;
end = (wbc->range_end == LLONG_MAX) ?
start + mapping->nrpages :
- wbc->range_end >> PAGE_CACHE_SHIFT;
+ wbc->range_end >> PAGE_SHIFT;
if (start || end)
expected_pages = end - start + 1;
@@ -881,15 +881,15 @@ int exofs_write_begin(struct file *file, struct address_space *mapping,
}
/* read modify write */
- if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
+ if (!PageUptodate(page) && (len != PAGE_SIZE)) {
loff_t i_size = i_size_read(mapping->host);
- pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ pgoff_t end_index = i_size >> PAGE_SHIFT;
size_t rlen;
if (page->index < end_index)
- rlen = PAGE_CACHE_SIZE;
+ rlen = PAGE_SIZE;
else if (page->index == end_index)
- rlen = i_size & ~PAGE_CACHE_MASK;
+ rlen = i_size & ~PAGE_MASK;
else
rlen = 0;
diff --git a/fs/exofs/namei.c b/fs/exofs/namei.c
index c20d77df2..622a686bb 100644
--- a/fs/exofs/namei.c
+++ b/fs/exofs/namei.c
@@ -292,11 +292,11 @@ static int exofs_rename(struct inode *old_dir, struct dentry *old_dentry,
out_dir:
if (dir_de) {
kunmap(dir_page);
- page_cache_release(dir_page);
+ put_page(dir_page);
}
out_old:
kunmap(old_page);
- page_cache_release(old_page);
+ put_page(old_page);
out:
return err;
}
diff --git a/fs/ext2/dir.c b/fs/ext2/dir.c
index 0c6638b40..7ff6fcfa6 100644
--- a/fs/ext2/dir.c
+++ b/fs/ext2/dir.c
@@ -37,7 +37,7 @@ static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
{
unsigned len = le16_to_cpu(dlen);
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
if (len == EXT2_MAX_REC_LEN)
return 1 << 16;
#endif
@@ -46,7 +46,7 @@ static inline unsigned ext2_rec_len_from_disk(__le16 dlen)
static inline __le16 ext2_rec_len_to_disk(unsigned len)
{
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
if (len == (1 << 16))
return cpu_to_le16(EXT2_MAX_REC_LEN);
else
@@ -67,7 +67,7 @@ static inline unsigned ext2_chunk_size(struct inode *inode)
static inline void ext2_put_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
/*
@@ -79,9 +79,9 @@ ext2_last_byte(struct inode *inode, unsigned long page_nr)
{
unsigned last_byte = inode->i_size;
- last_byte -= page_nr << PAGE_CACHE_SHIFT;
- if (last_byte > PAGE_CACHE_SIZE)
- last_byte = PAGE_CACHE_SIZE;
+ last_byte -= page_nr << PAGE_SHIFT;
+ if (last_byte > PAGE_SIZE)
+ last_byte = PAGE_SIZE;
return last_byte;
}
@@ -118,12 +118,12 @@ static void ext2_check_page(struct page *page, int quiet)
char *kaddr = page_address(page);
u32 max_inumber = le32_to_cpu(EXT2_SB(sb)->s_es->s_inodes_count);
unsigned offs, rec_len;
- unsigned limit = PAGE_CACHE_SIZE;
+ unsigned limit = PAGE_SIZE;
ext2_dirent *p;
char *error;
- if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
- limit = dir->i_size & ~PAGE_CACHE_MASK;
+ if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+ limit = dir->i_size & ~PAGE_MASK;
if (limit & (chunk_size - 1))
goto Ebadsize;
if (!limit)
@@ -176,7 +176,7 @@ bad_entry:
if (!quiet)
ext2_error(sb, __func__, "bad entry in directory #%lu: : %s - "
"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
- dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
(unsigned long) le32_to_cpu(p->inode),
rec_len, p->name_len);
goto fail;
@@ -186,7 +186,7 @@ Eend:
ext2_error(sb, "ext2_check_page",
"entry in directory #%lu spans the page boundary"
"offset=%lu, inode=%lu",
- dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
(unsigned long) le32_to_cpu(p->inode));
}
fail:
@@ -287,8 +287,8 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
loff_t pos = ctx->pos;
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
- unsigned int offset = pos & ~PAGE_CACHE_MASK;
- unsigned long n = pos >> PAGE_CACHE_SHIFT;
+ unsigned int offset = pos & ~PAGE_MASK;
+ unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(ext2_chunk_size(inode)-1);
unsigned char *types = NULL;
@@ -309,14 +309,14 @@ ext2_readdir(struct file *file, struct dir_context *ctx)
ext2_error(sb, __func__,
"bad page in #%lu",
inode->i_ino);
- ctx->pos += PAGE_CACHE_SIZE - offset;
+ ctx->pos += PAGE_SIZE - offset;
return PTR_ERR(page);
}
kaddr = page_address(page);
if (unlikely(need_revalidate)) {
if (offset) {
offset = ext2_validate_entry(kaddr, offset, chunk_mask);
- ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset;
+ ctx->pos = (n<<PAGE_SHIFT) + offset;
}
file->f_version = inode->i_version;
need_revalidate = 0;
@@ -406,7 +406,7 @@ struct ext2_dir_entry_2 *ext2_find_entry (struct inode * dir,
if (++n >= npages)
n = 0;
/* next page is past the blocks we've got */
- if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
+ if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
ext2_error(dir->i_sb, __func__,
"dir %lu size %lld exceeds block count %llu",
dir->i_ino, dir->i_size,
@@ -511,7 +511,7 @@ int ext2_add_link (struct dentry *dentry, struct inode *inode)
kaddr = page_address(page);
dir_end = kaddr + ext2_last_byte(dir, n);
de = (ext2_dirent *)kaddr;
- kaddr += PAGE_CACHE_SIZE - reclen;
+ kaddr += PAGE_SIZE - reclen;
while ((char *)de <= kaddr) {
if ((char *)de == dir_end) {
/* We hit i_size */
@@ -655,7 +655,7 @@ int ext2_make_empty(struct inode *inode, struct inode *parent)
kunmap_atomic(kaddr);
err = ext2_commit_chunk(page, 0, chunk_size);
fail:
- page_cache_release(page);
+ put_page(page);
return err;
}
diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h
index 4c69c94ca..170939f37 100644
--- a/fs/ext2/ext2.h
+++ b/fs/ext2/ext2.h
@@ -61,6 +61,8 @@ struct ext2_block_alloc_info {
#define rsv_start rsv_window._rsv_start
#define rsv_end rsv_window._rsv_end
+struct mb_cache;
+
/*
* second extended-fs super-block data in memory
*/
@@ -111,6 +113,7 @@ struct ext2_sb_info {
* of the mount options.
*/
spinlock_t s_lock;
+ struct mb_cache *s_mb_cache;
};
static inline spinlock_t *
diff --git a/fs/ext2/namei.c b/fs/ext2/namei.c
index 7a2be8f7f..d34843925 100644
--- a/fs/ext2/namei.c
+++ b/fs/ext2/namei.c
@@ -398,7 +398,7 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
ext2_set_link(old_inode, dir_de, dir_page, new_dir, 0);
else {
kunmap(dir_page);
- page_cache_release(dir_page);
+ put_page(dir_page);
}
inode_dec_link_count(old_dir);
}
@@ -408,11 +408,11 @@ static int ext2_rename (struct inode * old_dir, struct dentry * old_dentry,
out_dir:
if (dir_de) {
kunmap(dir_page);
- page_cache_release(dir_page);
+ put_page(dir_page);
}
out_old:
kunmap(old_page);
- page_cache_release(old_page);
+ put_page(old_page);
out:
return err;
}
diff --git a/fs/ext2/super.c b/fs/ext2/super.c
index 2a188413a..b78caf25f 100644
--- a/fs/ext2/super.c
+++ b/fs/ext2/super.c
@@ -131,7 +131,10 @@ static void ext2_put_super (struct super_block * sb)
dquot_disable(sb, -1, DQUOT_USAGE_ENABLED | DQUOT_LIMITS_ENABLED);
- ext2_xattr_put_super(sb);
+ if (sbi->s_mb_cache) {
+ ext2_xattr_destroy_cache(sbi->s_mb_cache);
+ sbi->s_mb_cache = NULL;
+ }
if (!(sb->s_flags & MS_RDONLY)) {
struct ext2_super_block *es = sbi->s_es;
@@ -1104,6 +1107,14 @@ static int ext2_fill_super(struct super_block *sb, void *data, int silent)
ext2_msg(sb, KERN_ERR, "error: insufficient memory");
goto failed_mount3;
}
+
+#ifdef CONFIG_EXT2_FS_XATTR
+ sbi->s_mb_cache = ext2_xattr_create_cache();
+ if (!sbi->s_mb_cache) {
+ ext2_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ goto failed_mount3;
+ }
+#endif
/*
* set up enough so that it can read an inode
*/
@@ -1149,6 +1160,8 @@ cantfind_ext2:
sb->s_id);
goto failed_mount;
failed_mount3:
+ if (sbi->s_mb_cache)
+ ext2_xattr_destroy_cache(sbi->s_mb_cache);
percpu_counter_destroy(&sbi->s_freeblocks_counter);
percpu_counter_destroy(&sbi->s_freeinodes_counter);
percpu_counter_destroy(&sbi->s_dirs_counter);
@@ -1555,20 +1568,17 @@ MODULE_ALIAS_FS("ext2");
static int __init init_ext2_fs(void)
{
- int err = init_ext2_xattr();
- if (err)
- return err;
+ int err;
+
err = init_inodecache();
if (err)
- goto out1;
+ return err;
err = register_filesystem(&ext2_fs_type);
if (err)
goto out;
return 0;
out:
destroy_inodecache();
-out1:
- exit_ext2_xattr();
return err;
}
@@ -1576,7 +1586,6 @@ static void __exit exit_ext2_fs(void)
{
unregister_filesystem(&ext2_fs_type);
destroy_inodecache();
- exit_ext2_xattr();
}
MODULE_AUTHOR("Remy Card and others");
diff --git a/fs/ext2/xattr.c b/fs/ext2/xattr.c
index f57a7aba3..1a5e3bff0 100644
--- a/fs/ext2/xattr.c
+++ b/fs/ext2/xattr.c
@@ -90,14 +90,12 @@
static int ext2_xattr_set2(struct inode *, struct buffer_head *,
struct ext2_xattr_header *);
-static int ext2_xattr_cache_insert(struct buffer_head *);
+static int ext2_xattr_cache_insert(struct mb_cache *, struct buffer_head *);
static struct buffer_head *ext2_xattr_cache_find(struct inode *,
struct ext2_xattr_header *);
static void ext2_xattr_rehash(struct ext2_xattr_header *,
struct ext2_xattr_entry *);
-static struct mb_cache *ext2_xattr_cache;
-
static const struct xattr_handler *ext2_xattr_handler_map[] = {
[EXT2_XATTR_INDEX_USER] = &ext2_xattr_user_handler,
#ifdef CONFIG_EXT2_FS_POSIX_ACL
@@ -152,6 +150,7 @@ ext2_xattr_get(struct inode *inode, int name_index, const char *name,
size_t name_len, size;
char *end;
int error;
+ struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
ea_idebug(inode, "name=%d.%s, buffer=%p, buffer_size=%ld",
name_index, name, buffer, (long)buffer_size);
@@ -196,7 +195,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_get",
goto found;
entry = next;
}
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
error = -ENODATA;
goto cleanup;
@@ -209,7 +208,7 @@ found:
le16_to_cpu(entry->e_value_offs) + size > inode->i_sb->s_blocksize)
goto bad_block;
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
if (buffer) {
error = -ERANGE;
@@ -247,6 +246,7 @@ ext2_xattr_list(struct dentry *dentry, char *buffer, size_t buffer_size)
char *end;
size_t rest = buffer_size;
int error;
+ struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
ea_idebug(inode, "buffer=%p, buffer_size=%ld",
buffer, (long)buffer_size);
@@ -281,7 +281,7 @@ bad_block: ext2_error(inode->i_sb, "ext2_xattr_list",
goto bad_block;
entry = next;
}
- if (ext2_xattr_cache_insert(bh))
+ if (ext2_xattr_cache_insert(ext2_mb_cache, bh))
ea_idebug(inode, "cache insert failed");
/* list the attribute names */
@@ -483,22 +483,23 @@ bad_block: ext2_error(sb, "ext2_xattr_set",
/* Here we know that we can set the new attribute. */
if (header) {
- struct mb_cache_entry *ce;
-
/* assert(header == HDR(bh)); */
- ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev,
- bh->b_blocknr);
lock_buffer(bh);
if (header->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(header->h_hash);
+
ea_bdebug(bh, "modifying in-place");
- if (ce)
- mb_cache_entry_free(ce);
+ /*
+ * This must happen under buffer lock for
+ * ext2_xattr_set2() to reliably detect modified block
+ */
+ mb_cache_entry_delete_block(EXT2_SB(sb)->s_mb_cache,
+ hash, bh->b_blocknr);
+
/* keep the buffer locked while modifying it. */
} else {
int offset;
- if (ce)
- mb_cache_entry_release(ce);
unlock_buffer(bh);
ea_bdebug(bh, "cloning");
header = kmalloc(bh->b_size, GFP_KERNEL);
@@ -626,6 +627,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
struct super_block *sb = inode->i_sb;
struct buffer_head *new_bh = NULL;
int error;
+ struct mb_cache *ext2_mb_cache = EXT2_SB(sb)->s_mb_cache;
if (header) {
new_bh = ext2_xattr_cache_find(inode, header);
@@ -653,7 +655,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
don't need to change the reference count. */
new_bh = old_bh;
get_bh(new_bh);
- ext2_xattr_cache_insert(new_bh);
+ ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
} else {
/* We need to allocate a new block */
ext2_fsblk_t goal = ext2_group_first_block_no(sb,
@@ -674,7 +676,7 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
memcpy(new_bh->b_data, header, new_bh->b_size);
set_buffer_uptodate(new_bh);
unlock_buffer(new_bh);
- ext2_xattr_cache_insert(new_bh);
+ ext2_xattr_cache_insert(ext2_mb_cache, new_bh);
ext2_xattr_update_super_block(sb);
}
@@ -707,19 +709,21 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
error = 0;
if (old_bh && old_bh != new_bh) {
- struct mb_cache_entry *ce;
-
/*
* If there was an old block and we are no longer using it,
* release the old block.
*/
- ce = mb_cache_entry_get(ext2_xattr_cache, old_bh->b_bdev,
- old_bh->b_blocknr);
lock_buffer(old_bh);
if (HDR(old_bh)->h_refcount == cpu_to_le32(1)) {
+ __u32 hash = le32_to_cpu(HDR(old_bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext2_xattr_set2() to reliably detect freed block
+ */
+ mb_cache_entry_delete_block(ext2_mb_cache,
+ hash, old_bh->b_blocknr);
/* Free the old block. */
- if (ce)
- mb_cache_entry_free(ce);
ea_bdebug(old_bh, "freeing");
ext2_free_blocks(inode, old_bh->b_blocknr, 1);
mark_inode_dirty(inode);
@@ -730,8 +734,6 @@ ext2_xattr_set2(struct inode *inode, struct buffer_head *old_bh,
} else {
/* Decrement the refcount only. */
le32_add_cpu(&HDR(old_bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
dquot_free_block_nodirty(inode, 1);
mark_inode_dirty(inode);
mark_buffer_dirty(old_bh);
@@ -757,7 +759,6 @@ void
ext2_xattr_delete_inode(struct inode *inode)
{
struct buffer_head *bh = NULL;
- struct mb_cache_entry *ce;
down_write(&EXT2_I(inode)->xattr_sem);
if (!EXT2_I(inode)->i_file_acl)
@@ -777,19 +778,22 @@ ext2_xattr_delete_inode(struct inode *inode)
EXT2_I(inode)->i_file_acl);
goto cleanup;
}
- ce = mb_cache_entry_get(ext2_xattr_cache, bh->b_bdev, bh->b_blocknr);
lock_buffer(bh);
if (HDR(bh)->h_refcount == cpu_to_le32(1)) {
- if (ce)
- mb_cache_entry_free(ce);
+ __u32 hash = le32_to_cpu(HDR(bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for ext2_xattr_set2() to
+ * reliably detect freed block
+ */
+ mb_cache_entry_delete_block(EXT2_SB(inode->i_sb)->s_mb_cache,
+ hash, bh->b_blocknr);
ext2_free_blocks(inode, EXT2_I(inode)->i_file_acl, 1);
get_bh(bh);
bforget(bh);
unlock_buffer(bh);
} else {
le32_add_cpu(&HDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
ea_bdebug(bh, "refcount now=%d",
le32_to_cpu(HDR(bh)->h_refcount));
unlock_buffer(bh);
@@ -806,18 +810,6 @@ cleanup:
}
/*
- * ext2_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext2_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-
-/*
* ext2_xattr_cache_insert()
*
* Create a new entry in the extended attribute cache, and insert
@@ -826,28 +818,20 @@ ext2_xattr_put_super(struct super_block *sb)
* Returns 0, or a negative error number on failure.
*/
static int
-ext2_xattr_cache_insert(struct buffer_head *bh)
+ext2_xattr_cache_insert(struct mb_cache *cache, struct buffer_head *bh)
{
__u32 hash = le32_to_cpu(HDR(bh)->h_hash);
- struct mb_cache_entry *ce;
int error;
- ce = mb_cache_entry_alloc(ext2_xattr_cache, GFP_NOFS);
- if (!ce)
- return -ENOMEM;
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
+ error = mb_cache_entry_create(cache, GFP_NOFS, hash, bh->b_blocknr, 1);
if (error) {
- mb_cache_entry_free(ce);
if (error == -EBUSY) {
ea_bdebug(bh, "already in cache (%d cache entries)",
atomic_read(&ext2_xattr_cache->c_entry_count));
error = 0;
}
- } else {
- ea_bdebug(bh, "inserting [%x] (%d cache entries)", (int)hash,
- atomic_read(&ext2_xattr_cache->c_entry_count));
- mb_cache_entry_release(ce);
- }
+ } else
+ ea_bdebug(bh, "inserting [%x]", (int)hash);
return error;
}
@@ -904,22 +888,16 @@ ext2_xattr_cache_find(struct inode *inode, struct ext2_xattr_header *header)
{
__u32 hash = le32_to_cpu(header->h_hash);
struct mb_cache_entry *ce;
+ struct mb_cache *ext2_mb_cache = EXT2_SB(inode->i_sb)->s_mb_cache;
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
again:
- ce = mb_cache_entry_find_first(ext2_xattr_cache, inode->i_sb->s_bdev,
- hash);
+ ce = mb_cache_entry_find_first(ext2_mb_cache, hash);
while (ce) {
struct buffer_head *bh;
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
-
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
ext2_error(inode->i_sb, "ext2_xattr_cache_find",
@@ -927,7 +905,21 @@ again:
inode->i_ino, (unsigned long) ce->e_block);
} else {
lock_buffer(bh);
- if (le32_to_cpu(HDR(bh)->h_refcount) >
+ /*
+ * We have to be careful about races with freeing or
+ * rehashing of xattr block. Once we hold buffer lock
+ * xattr block's state is stable so we can check
+ * whether the block got freed / rehashed or not.
+ * Since we unhash mbcache entry under buffer lock when
+ * freeing / rehashing xattr block, checking whether
+ * entry is still hashed is reliable.
+ */
+ if (hlist_bl_unhashed(&ce->e_hash_list)) {
+ mb_cache_entry_put(ext2_mb_cache, ce);
+ unlock_buffer(bh);
+ brelse(bh);
+ goto again;
+ } else if (le32_to_cpu(HDR(bh)->h_refcount) >
EXT2_XATTR_REFCOUNT_MAX) {
ea_idebug(inode, "block %ld refcount %d>%d",
(unsigned long) ce->e_block,
@@ -936,13 +928,14 @@ again:
} else if (!ext2_xattr_cmp(header, HDR(bh))) {
ea_bdebug(bh, "b_count=%d",
atomic_read(&(bh->b_count)));
- mb_cache_entry_release(ce);
+ mb_cache_entry_touch(ext2_mb_cache, ce);
+ mb_cache_entry_put(ext2_mb_cache, ce);
return bh;
}
unlock_buffer(bh);
brelse(bh);
}
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
+ ce = mb_cache_entry_find_next(ext2_mb_cache, ce);
}
return NULL;
}
@@ -1015,17 +1008,15 @@ static void ext2_xattr_rehash(struct ext2_xattr_header *header,
#undef BLOCK_HASH_SHIFT
-int __init
-init_ext2_xattr(void)
+#define HASH_BUCKET_BITS 10
+
+struct mb_cache *ext2_xattr_create_cache(void)
{
- ext2_xattr_cache = mb_cache_create("ext2_xattr", 6);
- if (!ext2_xattr_cache)
- return -ENOMEM;
- return 0;
+ return mb_cache_create(HASH_BUCKET_BITS);
}
-void
-exit_ext2_xattr(void)
+void ext2_xattr_destroy_cache(struct mb_cache *cache)
{
- mb_cache_destroy(ext2_xattr_cache);
+ if (cache)
+ mb_cache_destroy(cache);
}
diff --git a/fs/ext2/xattr.h b/fs/ext2/xattr.h
index 60edf2986..6f82ab1b0 100644
--- a/fs/ext2/xattr.h
+++ b/fs/ext2/xattr.h
@@ -53,6 +53,8 @@ struct ext2_xattr_entry {
#define EXT2_XATTR_SIZE(size) \
(((size) + EXT2_XATTR_ROUND) & ~EXT2_XATTR_ROUND)
+struct mb_cache;
+
# ifdef CONFIG_EXT2_FS_XATTR
extern const struct xattr_handler ext2_xattr_user_handler;
@@ -65,10 +67,9 @@ extern int ext2_xattr_get(struct inode *, int, const char *, void *, size_t);
extern int ext2_xattr_set(struct inode *, int, const char *, const void *, size_t, int);
extern void ext2_xattr_delete_inode(struct inode *);
-extern void ext2_xattr_put_super(struct super_block *);
-extern int init_ext2_xattr(void);
-extern void exit_ext2_xattr(void);
+extern struct mb_cache *ext2_xattr_create_cache(void);
+extern void ext2_xattr_destroy_cache(struct mb_cache *cache);
extern const struct xattr_handler *ext2_xattr_handlers[];
@@ -93,19 +94,7 @@ ext2_xattr_delete_inode(struct inode *inode)
{
}
-static inline void
-ext2_xattr_put_super(struct super_block *sb)
-{
-}
-
-static inline int
-init_ext2_xattr(void)
-{
- return 0;
-}
-
-static inline void
-exit_ext2_xattr(void)
+static inline void ext2_xattr_destroy_cache(struct mb_cache *cache)
{
}
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index 25634c353..6a6c27373 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -18,11 +18,9 @@
* Special Publication 800-38E and IEEE P1619/D16.
*/
-#include <crypto/hash.h>
-#include <crypto/sha.h>
+#include <crypto/skcipher.h>
#include <keys/user-type.h>
#include <keys/encrypted-type.h>
-#include <linux/crypto.h>
#include <linux/ecryptfs.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
@@ -94,7 +92,8 @@ void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx)
* Return: An allocated and initialized encryption context on success; error
* value or NULL otherwise.
*/
-struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
+struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
+ gfp_t gfp_flags)
{
struct ext4_crypto_ctx *ctx = NULL;
int res = 0;
@@ -121,7 +120,7 @@ struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode)
list_del(&ctx->free_list);
spin_unlock_irqrestore(&ext4_crypto_ctx_lock, flags);
if (!ctx) {
- ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, GFP_NOFS);
+ ctx = kmem_cache_zalloc(ext4_crypto_ctx_cachep, gfp_flags);
if (!ctx) {
res = -ENOMEM;
goto out;
@@ -258,25 +257,26 @@ static int ext4_page_crypto(struct inode *inode,
ext4_direction_t rw,
pgoff_t index,
struct page *src_page,
- struct page *dest_page)
+ struct page *dest_page,
+ gfp_t gfp_flags)
{
u8 xts_tweak[EXT4_XTS_TWEAK_SIZE];
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct scatterlist dst, src;
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, gfp_flags);
if (!req) {
printk_ratelimited(KERN_ERR
"%s: crypto_request_alloc() failed\n",
__func__);
return -ENOMEM;
}
- ablkcipher_request_set_callback(
+ skcipher_request_set_callback(
req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
ext4_crypt_complete, &ecr);
@@ -286,33 +286,34 @@ static int ext4_page_crypto(struct inode *inode,
EXT4_XTS_TWEAK_SIZE - sizeof(index));
sg_init_table(&dst, 1);
- sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
+ sg_set_page(&dst, dest_page, PAGE_SIZE, 0);
sg_init_table(&src, 1);
- sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
- ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
- xts_tweak);
+ sg_set_page(&src, src_page, PAGE_SIZE, 0);
+ skcipher_request_set_crypt(req, &src, &dst, PAGE_SIZE,
+ xts_tweak);
if (rw == EXT4_DECRYPT)
- res = crypto_ablkcipher_decrypt(req);
+ res = crypto_skcipher_decrypt(req);
else
- res = crypto_ablkcipher_encrypt(req);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
res = ecr.res;
}
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res) {
printk_ratelimited(
KERN_ERR
- "%s: crypto_ablkcipher_encrypt() returned %d\n",
+ "%s: crypto_skcipher_encrypt() returned %d\n",
__func__, res);
return res;
}
return 0;
}
-static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx)
+static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx,
+ gfp_t gfp_flags)
{
- ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, GFP_NOWAIT);
+ ctx->w.bounce_page = mempool_alloc(ext4_bounce_page_pool, gfp_flags);
if (ctx->w.bounce_page == NULL)
return ERR_PTR(-ENOMEM);
ctx->flags |= EXT4_WRITE_PATH_FL;
@@ -335,7 +336,8 @@ static struct page *alloc_bounce_page(struct ext4_crypto_ctx *ctx)
* error value or NULL.
*/
struct page *ext4_encrypt(struct inode *inode,
- struct page *plaintext_page)
+ struct page *plaintext_page,
+ gfp_t gfp_flags)
{
struct ext4_crypto_ctx *ctx;
struct page *ciphertext_page = NULL;
@@ -343,17 +345,17 @@ struct page *ext4_encrypt(struct inode *inode,
BUG_ON(!PageLocked(plaintext_page));
- ctx = ext4_get_crypto_ctx(inode);
+ ctx = ext4_get_crypto_ctx(inode, gfp_flags);
if (IS_ERR(ctx))
return (struct page *) ctx;
/* The encryption operation will require a bounce page. */
- ciphertext_page = alloc_bounce_page(ctx);
+ ciphertext_page = alloc_bounce_page(ctx, gfp_flags);
if (IS_ERR(ciphertext_page))
goto errout;
ctx->w.control_page = plaintext_page;
err = ext4_page_crypto(inode, EXT4_ENCRYPT, plaintext_page->index,
- plaintext_page, ciphertext_page);
+ plaintext_page, ciphertext_page, gfp_flags);
if (err) {
ciphertext_page = ERR_PTR(err);
errout:
@@ -381,8 +383,8 @@ int ext4_decrypt(struct page *page)
{
BUG_ON(!PageLocked(page));
- return ext4_page_crypto(page->mapping->host,
- EXT4_DECRYPT, page->index, page, page);
+ return ext4_page_crypto(page->mapping->host, EXT4_DECRYPT,
+ page->index, page, page, GFP_NOFS);
}
int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
@@ -399,13 +401,13 @@ int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
(unsigned long) inode->i_ino, lblk, len);
#endif
- BUG_ON(inode->i_sb->s_blocksize != PAGE_CACHE_SIZE);
+ BUG_ON(inode->i_sb->s_blocksize != PAGE_SIZE);
- ctx = ext4_get_crypto_ctx(inode);
+ ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
return PTR_ERR(ctx);
- ciphertext_page = alloc_bounce_page(ctx);
+ ciphertext_page = alloc_bounce_page(ctx, GFP_NOWAIT);
if (IS_ERR(ciphertext_page)) {
err = PTR_ERR(ciphertext_page);
goto errout;
@@ -413,11 +415,12 @@ int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
while (len--) {
err = ext4_page_crypto(inode, EXT4_ENCRYPT, lblk,
- ZERO_PAGE(0), ciphertext_page);
+ ZERO_PAGE(0), ciphertext_page,
+ GFP_NOFS);
if (err)
goto errout;
- bio = bio_alloc(GFP_KERNEL, 1);
+ bio = bio_alloc(GFP_NOWAIT, 1);
if (!bio) {
err = -ENOMEM;
goto errout;
diff --git a/fs/ext4/crypto_fname.c b/fs/ext4/crypto_fname.c
index 2fbef8a14..1a2f36040 100644
--- a/fs/ext4/crypto_fname.c
+++ b/fs/ext4/crypto_fname.c
@@ -11,11 +11,9 @@
*
*/
-#include <crypto/hash.h>
-#include <crypto/sha.h>
+#include <crypto/skcipher.h>
#include <keys/encrypted-type.h>
#include <keys/user-type.h>
-#include <linux/crypto.h>
#include <linux/gfp.h>
#include <linux/kernel.h>
#include <linux/key.h>
@@ -65,10 +63,10 @@ static int ext4_fname_encrypt(struct inode *inode,
struct ext4_str *oname)
{
u32 ciphertext_len;
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[EXT4_CRYPTO_BLOCK_SIZE];
struct scatterlist src_sg, dst_sg;
@@ -95,14 +93,14 @@ static int ext4_fname_encrypt(struct inode *inode,
}
/* Allocate request */
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(
KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
kfree(alloc_buf);
return -ENOMEM;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
ext4_dir_crypt_complete, &ecr);
@@ -117,14 +115,14 @@ static int ext4_fname_encrypt(struct inode *inode,
/* Create encryption request */
sg_init_one(&src_sg, workbuf, ciphertext_len);
sg_init_one(&dst_sg, oname->name, ciphertext_len);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
- res = crypto_ablkcipher_encrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, ciphertext_len, iv);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
res = ecr.res;
}
kfree(alloc_buf);
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(
KERN_ERR "%s: Error (error code %d)\n", __func__, res);
@@ -145,11 +143,11 @@ static int ext4_fname_decrypt(struct inode *inode,
struct ext4_str *oname)
{
struct ext4_str tmp_in[2], tmp_out[1];
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
struct ext4_crypt_info *ci = EXT4_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
+ struct crypto_skcipher *tfm = ci->ci_ctfm;
int res = 0;
char iv[EXT4_CRYPTO_BLOCK_SIZE];
unsigned lim = max_name_len(inode);
@@ -162,13 +160,13 @@ static int ext4_fname_decrypt(struct inode *inode,
tmp_out[0].name = oname->name;
/* Allocate request */
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
printk_ratelimited(
KERN_ERR "%s: crypto_request_alloc() failed\n", __func__);
return -ENOMEM;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
ext4_dir_crypt_complete, &ecr);
@@ -178,13 +176,13 @@ static int ext4_fname_decrypt(struct inode *inode,
/* Create encryption request */
sg_init_one(&src_sg, iname->name, iname->len);
sg_init_one(&dst_sg, oname->name, oname->len);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
- res = crypto_ablkcipher_decrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg, iname->len, iv);
+ res = crypto_skcipher_decrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
res = ecr.res;
}
- ablkcipher_request_free(req);
+ skcipher_request_free(req);
if (res < 0) {
printk_ratelimited(
KERN_ERR "%s: Error in ext4_fname_encrypt (error code %d)\n",
diff --git a/fs/ext4/crypto_key.c b/fs/ext4/crypto_key.c
index 9a16d1e75..0129d688d 100644
--- a/fs/ext4/crypto_key.c
+++ b/fs/ext4/crypto_key.c
@@ -8,6 +8,7 @@
* Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
*/
+#include <crypto/skcipher.h>
#include <keys/encrypted-type.h>
#include <keys/user-type.h>
#include <linux/random.h>
@@ -41,45 +42,42 @@ static int ext4_derive_key_aes(char deriving_key[EXT4_AES_128_ECB_KEY_SIZE],
char derived_key[EXT4_AES_256_XTS_KEY_SIZE])
{
int res = 0;
- struct ablkcipher_request *req = NULL;
+ struct skcipher_request *req = NULL;
DECLARE_EXT4_COMPLETION_RESULT(ecr);
struct scatterlist src_sg, dst_sg;
- struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
- 0);
+ struct crypto_skcipher *tfm = crypto_alloc_skcipher("ecb(aes)", 0, 0);
if (IS_ERR(tfm)) {
res = PTR_ERR(tfm);
tfm = NULL;
goto out;
}
- crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
+ crypto_skcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
+ req = skcipher_request_alloc(tfm, GFP_NOFS);
if (!req) {
res = -ENOMEM;
goto out;
}
- ablkcipher_request_set_callback(req,
+ skcipher_request_set_callback(req,
CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
derive_crypt_complete, &ecr);
- res = crypto_ablkcipher_setkey(tfm, deriving_key,
- EXT4_AES_128_ECB_KEY_SIZE);
+ res = crypto_skcipher_setkey(tfm, deriving_key,
+ EXT4_AES_128_ECB_KEY_SIZE);
if (res < 0)
goto out;
sg_init_one(&src_sg, source_key, EXT4_AES_256_XTS_KEY_SIZE);
sg_init_one(&dst_sg, derived_key, EXT4_AES_256_XTS_KEY_SIZE);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
- EXT4_AES_256_XTS_KEY_SIZE, NULL);
- res = crypto_ablkcipher_encrypt(req);
+ skcipher_request_set_crypt(req, &src_sg, &dst_sg,
+ EXT4_AES_256_XTS_KEY_SIZE, NULL);
+ res = crypto_skcipher_encrypt(req);
if (res == -EINPROGRESS || res == -EBUSY) {
wait_for_completion(&ecr.completion);
res = ecr.res;
}
out:
- if (req)
- ablkcipher_request_free(req);
- if (tfm)
- crypto_free_ablkcipher(tfm);
+ skcipher_request_free(req);
+ crypto_free_skcipher(tfm);
return res;
}
@@ -90,7 +88,7 @@ void ext4_free_crypt_info(struct ext4_crypt_info *ci)
if (ci->ci_keyring_key)
key_put(ci->ci_keyring_key);
- crypto_free_ablkcipher(ci->ci_ctfm);
+ crypto_free_skcipher(ci->ci_ctfm);
kmem_cache_free(ext4_crypt_info_cachep, ci);
}
@@ -122,7 +120,7 @@ int _ext4_get_encryption_info(struct inode *inode)
struct ext4_encryption_context ctx;
const struct user_key_payload *ukp;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
- struct crypto_ablkcipher *ctfm;
+ struct crypto_skcipher *ctfm;
const char *cipher_str;
char raw_key[EXT4_MAX_KEY_SIZE];
char mode;
@@ -237,7 +235,7 @@ retry:
if (res)
goto out;
got_key:
- ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
+ ctfm = crypto_alloc_skcipher(cipher_str, 0, 0);
if (!ctfm || IS_ERR(ctfm)) {
res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
printk(KERN_DEBUG
@@ -246,11 +244,11 @@ got_key:
goto out;
}
crypt_info->ci_ctfm = ctfm;
- crypto_ablkcipher_clear_flags(ctfm, ~0);
- crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
+ crypto_skcipher_clear_flags(ctfm, ~0);
+ crypto_tfm_set_flags(crypto_skcipher_tfm(ctfm),
CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_ablkcipher_setkey(ctfm, raw_key,
- ext4_encryption_key_size(mode));
+ res = crypto_skcipher_setkey(ctfm, raw_key,
+ ext4_encryption_key_size(mode));
if (res)
goto out;
memzero_explicit(raw_key, sizeof(raw_key));
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 33f5e2a50..561d7308b 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -155,13 +155,13 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
err = ext4_map_blocks(NULL, inode, &map, 0);
if (err > 0) {
pgoff_t index = map.m_pblk >>
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ (PAGE_SHIFT - inode->i_blkbits);
if (!ra_has_index(&file->f_ra, index))
page_cache_sync_readahead(
sb->s_bdev->bd_inode->i_mapping,
&file->f_ra, file,
index, 1);
- file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
+ file->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
if (IS_ERR(bh)) {
err = PTR_ERR(bh);
@@ -285,7 +285,7 @@ errout:
static inline int is_32bit_api(void)
{
#ifdef CONFIG_COMPAT
- return is_compat_task();
+ return in_compat_syscall();
#else
return (BITS_PER_LONG == 32);
#endif
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index b213449a5..349afebe2 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -42,6 +42,18 @@
*/
/*
+ * with AGGRESSIVE_CHECK allocator runs consistency checks over
+ * structures. these checks slow things down a lot
+ */
+#define AGGRESSIVE_CHECK__
+
+/*
+ * with DOUBLE_CHECK defined mballoc creates persistent in-core
+ * bitmaps, maintains and uses them to check for double allocations
+ */
+#define DOUBLE_CHECK__
+
+/*
* Define EXT4FS_DEBUG to produce debug messages
*/
#undef EXT4FS_DEBUG
@@ -182,9 +194,9 @@ typedef struct ext4_io_end {
struct bio *bio; /* Linked list of completed
* bios covering the extent */
unsigned int flag; /* unwritten or not */
+ atomic_t count; /* reference counter */
loff_t offset; /* offset in the file */
ssize_t size; /* size of the extent */
- atomic_t count; /* reference counter */
} ext4_io_end_t;
struct ext4_io_submit {
@@ -1047,13 +1059,8 @@ struct ext4_inode_info {
* transaction reserved
*/
struct list_head i_rsv_conversion_list;
- /*
- * Completed IOs that need unwritten extents handling and don't have
- * transaction reserved
- */
- atomic_t i_ioend_count; /* Number of outstanding io_end structs */
- atomic_t i_unwritten; /* Nr. of inflight conversions pending */
struct work_struct i_rsv_conversion_work;
+ atomic_t i_unwritten; /* Nr. of inflight conversions pending */
spinlock_t i_block_reservation_lock;
@@ -1527,25 +1534,6 @@ static inline int ext4_valid_inum(struct super_block *sb, unsigned long ino)
ino <= le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count));
}
-static inline void ext4_set_io_unwritten_flag(struct inode *inode,
- struct ext4_io_end *io_end)
-{
- if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
- io_end->flag |= EXT4_IO_END_UNWRITTEN;
- atomic_inc(&EXT4_I(inode)->i_unwritten);
- }
-}
-
-static inline ext4_io_end_t *ext4_inode_aio(struct inode *inode)
-{
- return inode->i_private;
-}
-
-static inline void ext4_inode_aio_set(struct inode *inode, ext4_io_end_t *io)
-{
- inode->i_private = io;
-}
-
/*
* Inode dynamic state flags
*/
@@ -1996,7 +1984,7 @@ ext4_rec_len_from_disk(__le16 dlen, unsigned blocksize)
{
unsigned len = le16_to_cpu(dlen);
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
if (len == EXT4_MAX_REC_LEN || len == 0)
return blocksize;
return (len & 65532) | ((len & 3) << 16);
@@ -2009,7 +1997,7 @@ static inline __le16 ext4_rec_len_to_disk(unsigned len, unsigned blocksize)
{
if ((len > blocksize) || (blocksize > (1 << 18)) || (len & 3))
BUG();
-#if (PAGE_CACHE_SIZE >= 65536)
+#if (PAGE_SIZE >= 65536)
if (len < 65536)
return cpu_to_le16(len);
if (len == blocksize) {
@@ -2317,11 +2305,13 @@ extern struct kmem_cache *ext4_crypt_info_cachep;
bool ext4_valid_contents_enc_mode(uint32_t mode);
uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size);
extern struct workqueue_struct *ext4_read_workqueue;
-struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode);
+struct ext4_crypto_ctx *ext4_get_crypto_ctx(struct inode *inode,
+ gfp_t gfp_flags);
void ext4_release_crypto_ctx(struct ext4_crypto_ctx *ctx);
void ext4_restore_control_page(struct page *data_page);
struct page *ext4_encrypt(struct inode *inode,
- struct page *plaintext_page);
+ struct page *plaintext_page,
+ gfp_t gfp_flags);
int ext4_decrypt(struct page *page);
int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);
@@ -2529,12 +2519,14 @@ extern int ext4_trim_fs(struct super_block *, struct fstrim_range *);
int ext4_inode_is_fast_symlink(struct inode *inode);
struct buffer_head *ext4_getblk(handle_t *, struct inode *, ext4_lblk_t, int);
struct buffer_head *ext4_bread(handle_t *, struct inode *, ext4_lblk_t, int);
-int ext4_get_block_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
+int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create);
int ext4_get_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create);
+ struct buffer_head *bh_result, int create);
+int ext4_dio_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create);
int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int create);
int ext4_walk_page_buffers(handle_t *handle,
@@ -2582,6 +2574,9 @@ extern void ext4_da_update_reserve_space(struct inode *inode,
int used, int quota_claim);
extern int ext4_issue_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);
+extern int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
+ unsigned int map_len,
+ struct extent_status *result);
/* indirect.c */
extern int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
@@ -3308,15 +3303,33 @@ static inline void ext4_inode_resume_unlocked_dio(struct inode *inode)
#define EXT4_WQ_HASH_SZ 37
#define ext4_ioend_wq(v) (&ext4__ioend_wq[((unsigned long)(v)) %\
EXT4_WQ_HASH_SZ])
-#define ext4_aio_mutex(v) (&ext4__aio_mutex[((unsigned long)(v)) %\
- EXT4_WQ_HASH_SZ])
extern wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-extern struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
#define EXT4_RESIZING 0
extern int ext4_resize_begin(struct super_block *sb);
extern void ext4_resize_end(struct super_block *sb);
+static inline void ext4_set_io_unwritten_flag(struct inode *inode,
+ struct ext4_io_end *io_end)
+{
+ if (!(io_end->flag & EXT4_IO_END_UNWRITTEN)) {
+ io_end->flag |= EXT4_IO_END_UNWRITTEN;
+ atomic_inc(&EXT4_I(inode)->i_unwritten);
+ }
+}
+
+static inline void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
+{
+ struct inode *inode = io_end->inode;
+
+ if (io_end->flag & EXT4_IO_END_UNWRITTEN) {
+ io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
+ /* Wake up anyone waiting on unwritten extent conversion */
+ if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
+ wake_up_all(ext4_ioend_wq(inode));
+ }
+}
+
#endif /* __KERNEL__ */
#define EFSBADCRC EBADMSG /* Bad CRC detected */
diff --git a/fs/ext4/ext4_crypto.h b/fs/ext4/ext4_crypto.h
index ac7d4e813..1f73c2971 100644
--- a/fs/ext4/ext4_crypto.h
+++ b/fs/ext4/ext4_crypto.h
@@ -77,7 +77,7 @@ struct ext4_crypt_info {
char ci_data_mode;
char ci_filename_mode;
char ci_flags;
- struct crypto_ablkcipher *ci_ctfm;
+ struct crypto_skcipher *ci_ctfm;
struct key *ci_keyring_key;
char ci_master_key[EXT4_KEY_DESCRIPTOR_SIZE];
};
diff --git a/fs/ext4/ext4_extents.h b/fs/ext4/ext4_extents.h
index 3c9381547..8ecf84b8f 100644
--- a/fs/ext4/ext4_extents.h
+++ b/fs/ext4/ext4_extents.h
@@ -11,7 +11,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * You should have received a copy of the GNU General Public Licens
+ * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*/
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 3753ceb0b..95bf4679a 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -15,7 +15,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * You should have received a copy of the GNU General Public Licens
+ * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*/
@@ -1736,6 +1736,12 @@ ext4_can_extents_be_merged(struct inode *inode, struct ext4_extent *ex1,
*/
if (ext1_ee_len + ext2_ee_len > EXT_INIT_MAX_LEN)
return 0;
+ /*
+ * The check for IO to unwritten extent is somewhat racy as we
+ * increment i_unwritten / set EXT4_STATE_DIO_UNWRITTEN only after
+ * dropping i_data_sem. But reserved blocks should save us in that
+ * case.
+ */
if (ext4_ext_is_unwritten(ex1) &&
(ext4_test_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN) ||
atomic_read(&EXT4_I(inode)->i_unwritten) ||
@@ -2293,59 +2299,69 @@ static int ext4_fill_fiemap_extents(struct inode *inode,
}
/*
- * ext4_ext_put_gap_in_cache:
- * calculate boundaries of the gap that the requested block fits into
- * and cache this gap
+ * ext4_ext_determine_hole - determine hole around given block
+ * @inode: inode we lookup in
+ * @path: path in extent tree to @lblk
+ * @lblk: pointer to logical block around which we want to determine hole
+ *
+ * Determine hole length (and start if easily possible) around given logical
+ * block. We don't try too hard to find the beginning of the hole but @path
+ * actually points to extent before @lblk, we provide it.
+ *
+ * The function returns the length of a hole starting at @lblk. We update @lblk
+ * to the beginning of the hole if we managed to find it.
*/
-static void
-ext4_ext_put_gap_in_cache(struct inode *inode, struct ext4_ext_path *path,
- ext4_lblk_t block)
+static ext4_lblk_t ext4_ext_determine_hole(struct inode *inode,
+ struct ext4_ext_path *path,
+ ext4_lblk_t *lblk)
{
int depth = ext_depth(inode);
- ext4_lblk_t len;
- ext4_lblk_t lblock;
struct ext4_extent *ex;
- struct extent_status es;
+ ext4_lblk_t len;
ex = path[depth].p_ext;
if (ex == NULL) {
/* there is no extent yet, so gap is [0;-] */
- lblock = 0;
+ *lblk = 0;
len = EXT_MAX_BLOCKS;
- ext_debug("cache gap(whole file):");
- } else if (block < le32_to_cpu(ex->ee_block)) {
- lblock = block;
- len = le32_to_cpu(ex->ee_block) - block;
- ext_debug("cache gap(before): %u [%u:%u]",
- block,
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex));
- } else if (block >= le32_to_cpu(ex->ee_block)
+ } else if (*lblk < le32_to_cpu(ex->ee_block)) {
+ len = le32_to_cpu(ex->ee_block) - *lblk;
+ } else if (*lblk >= le32_to_cpu(ex->ee_block)
+ ext4_ext_get_actual_len(ex)) {
ext4_lblk_t next;
- lblock = le32_to_cpu(ex->ee_block)
- + ext4_ext_get_actual_len(ex);
+ *lblk = le32_to_cpu(ex->ee_block) + ext4_ext_get_actual_len(ex);
next = ext4_ext_next_allocated_block(path);
- ext_debug("cache gap(after): [%u:%u] %u",
- le32_to_cpu(ex->ee_block),
- ext4_ext_get_actual_len(ex),
- block);
- BUG_ON(next == lblock);
- len = next - lblock;
+ BUG_ON(next == *lblk);
+ len = next - *lblk;
} else {
BUG();
}
+ return len;
+}
- ext4_es_find_delayed_extent_range(inode, lblock, lblock + len - 1, &es);
+/*
+ * ext4_ext_put_gap_in_cache:
+ * calculate boundaries of the gap that the requested block fits into
+ * and cache this gap
+ */
+static void
+ext4_ext_put_gap_in_cache(struct inode *inode, ext4_lblk_t hole_start,
+ ext4_lblk_t hole_len)
+{
+ struct extent_status es;
+
+ ext4_es_find_delayed_extent_range(inode, hole_start,
+ hole_start + hole_len - 1, &es);
if (es.es_len) {
/* There's delayed extent containing lblock? */
- if (es.es_lblk <= lblock)
+ if (es.es_lblk <= hole_start)
return;
- len = min(es.es_lblk - lblock, len);
+ hole_len = min(es.es_lblk - hole_start, hole_len);
}
- ext_debug(" -> %u:%u\n", lblock, len);
- ext4_es_insert_extent(inode, lblock, len, ~0, EXTENT_STATUS_HOLE);
+ ext_debug(" -> %u:%u\n", hole_start, hole_len);
+ ext4_es_insert_extent(inode, hole_start, hole_len, ~0,
+ EXTENT_STATUS_HOLE);
}
/*
@@ -3927,7 +3943,7 @@ get_reserved_cluster_alloc(struct inode *inode, ext4_lblk_t lblk_start,
static int
convert_initialized_extent(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
- struct ext4_ext_path **ppath, int flags,
+ struct ext4_ext_path **ppath,
unsigned int allocated)
{
struct ext4_ext_path *path = *ppath;
@@ -4007,7 +4023,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
struct ext4_ext_path *path = *ppath;
int ret = 0;
int err = 0;
- ext4_io_end_t *io = ext4_inode_aio(inode);
ext_debug("ext4_ext_handle_unwritten_extents: inode %lu, logical "
"block %llu, max_blocks %u, flags %x, allocated %u\n",
@@ -4030,15 +4045,6 @@ ext4_ext_handle_unwritten_extents(handle_t *handle, struct inode *inode,
flags | EXT4_GET_BLOCKS_CONVERT);
if (ret <= 0)
goto out;
- /*
- * Flag the inode(non aio case) or end_io struct (aio case)
- * that this IO needs to conversion to written when IO is
- * completed
- */
- if (io)
- ext4_set_io_unwritten_flag(inode, io);
- else
- ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
map->m_flags |= EXT4_MAP_UNWRITTEN;
goto out;
}
@@ -4283,9 +4289,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
unsigned int allocated = 0, offset = 0;
unsigned int allocated_clusters = 0;
struct ext4_allocation_request ar;
- ext4_io_end_t *io = ext4_inode_aio(inode);
ext4_lblk_t cluster_offset;
- int set_unwritten = 0;
bool map_from_cluster = false;
ext_debug("blocks %u/%u requested for inode %lu\n",
@@ -4347,7 +4351,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
allocated = convert_initialized_extent(
handle, inode, map, &path,
- flags, allocated);
+ allocated);
goto out2;
} else if (!ext4_ext_is_unwritten(ex))
goto out;
@@ -4368,11 +4372,22 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
* we couldn't try to create block if create flag is zero
*/
if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+ ext4_lblk_t hole_start, hole_len;
+
+ hole_start = map->m_lblk;
+ hole_len = ext4_ext_determine_hole(inode, path, &hole_start);
/*
* put just found gap into cache to speed up
* subsequent requests
*/
- ext4_ext_put_gap_in_cache(inode, path, map->m_lblk);
+ ext4_ext_put_gap_in_cache(inode, hole_start, hole_len);
+
+ /* Update hole_len to reflect hole size after map->m_lblk */
+ if (hole_start != map->m_lblk)
+ hole_len -= map->m_lblk - hole_start;
+ map->m_pblk = 0;
+ map->m_len = min_t(unsigned int, map->m_len, hole_len);
+
goto out2;
}
@@ -4482,15 +4497,6 @@ got_allocated_blocks:
if (flags & EXT4_GET_BLOCKS_UNWRIT_EXT){
ext4_ext_mark_unwritten(&newex);
map->m_flags |= EXT4_MAP_UNWRITTEN;
- /*
- * io_end structure was created for every IO write to an
- * unwritten extent. To avoid unnecessary conversion,
- * here we flag the IO that really needs the conversion.
- * For non asycn direct IO case, flag the inode state
- * that we need to perform conversion when IO is done.
- */
- if (flags & EXT4_GET_BLOCKS_PRE_IO)
- set_unwritten = 1;
}
err = 0;
@@ -4501,14 +4507,6 @@ got_allocated_blocks:
err = ext4_ext_insert_extent(handle, inode, &path,
&newex, flags);
- if (!err && set_unwritten) {
- if (io)
- ext4_set_io_unwritten_flag(inode, io);
- else
- ext4_set_inode_state(inode,
- EXT4_STATE_DIO_UNWRITTEN);
- }
-
if (err && free_on_err) {
int fb_flags = flags & EXT4_GET_BLOCKS_DELALLOC_RESERVE ?
EXT4_FREE_BLOCKS_NO_QUOT_UPDATE : 0;
diff --git a/fs/ext4/extents_status.c b/fs/ext4/extents_status.c
index ac748b3af..e38b987ac 100644
--- a/fs/ext4/extents_status.c
+++ b/fs/ext4/extents_status.c
@@ -823,8 +823,8 @@ out:
es->es_lblk = es1->es_lblk;
es->es_len = es1->es_len;
es->es_pblk = es1->es_pblk;
- if (!ext4_es_is_referenced(es))
- ext4_es_set_referenced(es);
+ if (!ext4_es_is_referenced(es1))
+ ext4_es_set_referenced(es1);
stats->es_stats_cache_hits++;
} else {
stats->es_stats_cache_misses++;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 38847f38b..fa2208bae 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -93,31 +93,29 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(iocb->ki_filp);
- struct mutex *aio_mutex = NULL;
struct blk_plug plug;
int o_direct = iocb->ki_flags & IOCB_DIRECT;
+ int unaligned_aio = 0;
int overwrite = 0;
ssize_t ret;
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto out;
+
/*
- * Unaligned direct AIO must be serialized; see comment above
- * In the case of O_APPEND, assume that we must always serialize
+ * Unaligned direct AIO must be serialized among each other as zeroing
+ * of partial blocks of two competing unaligned AIOs can result in data
+ * corruption.
*/
- if (o_direct &&
- ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
+ if (o_direct && ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS) &&
!is_sync_kiocb(iocb) &&
- (iocb->ki_flags & IOCB_APPEND ||
- ext4_unaligned_aio(inode, from, iocb->ki_pos))) {
- aio_mutex = ext4_aio_mutex(inode);
- mutex_lock(aio_mutex);
+ ext4_unaligned_aio(inode, from, iocb->ki_pos)) {
+ unaligned_aio = 1;
ext4_unwritten_wait(inode);
}
- inode_lock(inode);
- ret = generic_write_checks(iocb, from);
- if (ret <= 0)
- goto out;
-
/*
* If we have encountered a bitmap-format file, the size limit
* is smaller than s_maxbytes, which is for extent-mapped files.
@@ -139,7 +137,7 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
blk_start_plug(&plug);
/* check whether we do a DIO overwrite or not */
- if (ext4_should_dioread_nolock(inode) && !aio_mutex &&
+ if (ext4_should_dioread_nolock(inode) && !unaligned_aio &&
!file->f_mapping->nrpages && pos + length <= i_size_read(inode)) {
struct ext4_map_blocks map;
unsigned int blkbits = inode->i_blkbits;
@@ -181,14 +179,10 @@ ext4_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
if (o_direct)
blk_finish_plug(&plug);
- if (aio_mutex)
- mutex_unlock(aio_mutex);
return ret;
out:
inode_unlock(inode);
- if (aio_mutex)
- mutex_unlock(aio_mutex);
return ret;
}
@@ -421,7 +415,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
*/
static int ext4_find_unwritten_pgoff(struct inode *inode,
int whence,
- struct ext4_map_blocks *map,
+ ext4_lblk_t end_blk,
loff_t *offset)
{
struct pagevec pvec;
@@ -436,10 +430,10 @@ static int ext4_find_unwritten_pgoff(struct inode *inode,
blkbits = inode->i_sb->s_blocksize_bits;
startoff = *offset;
lastoff = startoff;
- endoff = (loff_t)(map->m_lblk + map->m_len) << blkbits;
+ endoff = (loff_t)end_blk << blkbits;
- index = startoff >> PAGE_CACHE_SHIFT;
- end = endoff >> PAGE_CACHE_SHIFT;
+ index = startoff >> PAGE_SHIFT;
+ end = endoff >> PAGE_SHIFT;
pagevec_init(&pvec, 0);
do {
@@ -554,12 +548,11 @@ out:
static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_map_blocks map;
struct extent_status es;
ext4_lblk_t start, last, end;
loff_t dataoff, isize;
int blkbits;
- int ret = 0;
+ int ret;
inode_lock(inode);
@@ -576,41 +569,32 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
dataoff = offset;
do {
- map.m_lblk = last;
- map.m_len = end - last + 1;
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
- if (last != start)
- dataoff = (loff_t)last << blkbits;
- break;
+ ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
+ if (ret <= 0) {
+ /* No extent found -> no data */
+ if (ret == 0)
+ ret = -ENXIO;
+ inode_unlock(inode);
+ return ret;
}
- /*
- * If there is a delay extent at this offset,
- * it will be as a data.
- */
- ext4_es_find_delayed_extent_range(inode, last, last, &es);
- if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
- if (last != start)
- dataoff = (loff_t)last << blkbits;
+ last = es.es_lblk;
+ if (last != start)
+ dataoff = (loff_t)last << blkbits;
+ if (!ext4_es_is_unwritten(&es))
break;
- }
/*
* If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page
* cache that has data or not.
*/
- if (map.m_flags & EXT4_MAP_UNWRITTEN) {
- int unwritten;
- unwritten = ext4_find_unwritten_pgoff(inode, SEEK_DATA,
- &map, &dataoff);
- if (unwritten)
- break;
- }
-
- last++;
+ if (ext4_find_unwritten_pgoff(inode, SEEK_DATA,
+ es.es_lblk + es.es_len, &dataoff))
+ break;
+ last += es.es_len;
dataoff = (loff_t)last << blkbits;
+ cond_resched();
} while (last <= end);
inode_unlock(inode);
@@ -627,12 +611,11 @@ static loff_t ext4_seek_data(struct file *file, loff_t offset, loff_t maxsize)
static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
{
struct inode *inode = file->f_mapping->host;
- struct ext4_map_blocks map;
struct extent_status es;
ext4_lblk_t start, last, end;
loff_t holeoff, isize;
int blkbits;
- int ret = 0;
+ int ret;
inode_lock(inode);
@@ -649,44 +632,30 @@ static loff_t ext4_seek_hole(struct file *file, loff_t offset, loff_t maxsize)
holeoff = offset;
do {
- map.m_lblk = last;
- map.m_len = end - last + 1;
- ret = ext4_map_blocks(NULL, inode, &map, 0);
- if (ret > 0 && !(map.m_flags & EXT4_MAP_UNWRITTEN)) {
- last += ret;
- holeoff = (loff_t)last << blkbits;
- continue;
+ ret = ext4_get_next_extent(inode, last, end - last + 1, &es);
+ if (ret < 0) {
+ inode_unlock(inode);
+ return ret;
}
-
- /*
- * If there is a delay extent at this offset,
- * we will skip this extent.
- */
- ext4_es_find_delayed_extent_range(inode, last, last, &es);
- if (es.es_len != 0 && in_range(last, es.es_lblk, es.es_len)) {
- last = es.es_lblk + es.es_len;
- holeoff = (loff_t)last << blkbits;
- continue;
+ /* Found a hole? */
+ if (ret == 0 || es.es_lblk > last) {
+ if (last != start)
+ holeoff = (loff_t)last << blkbits;
+ break;
}
-
/*
* If there is a unwritten extent at this offset,
* it will be as a data or a hole according to page
* cache that has data or not.
*/
- if (map.m_flags & EXT4_MAP_UNWRITTEN) {
- int unwritten;
- unwritten = ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
- &map, &holeoff);
- if (!unwritten) {
- last += ret;
- holeoff = (loff_t)last << blkbits;
- continue;
- }
- }
+ if (ext4_es_is_unwritten(&es) &&
+ ext4_find_unwritten_pgoff(inode, SEEK_HOLE,
+ last + es.es_len, &holeoff))
+ break;
- /* find a hole */
- break;
+ last += es.es_len;
+ holeoff = (loff_t)last << blkbits;
+ cond_resched();
} while (last <= end);
inode_unlock(inode);
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index acc0ad56b..3da4cf8d1 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -787,7 +787,7 @@ struct inode *__ext4_new_inode(handle_t *handle, struct inode *dir,
sbi = EXT4_SB(sb);
/*
- * Initalize owners and quota early so that we don't have to account
+ * Initialize owners and quota early so that we don't have to account
* for quota initialization worst case in standard inode creating
* transaction
*/
@@ -1150,25 +1150,20 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
unsigned long max_ino = le32_to_cpu(EXT4_SB(sb)->s_es->s_inodes_count);
ext4_group_t block_group;
int bit;
- struct buffer_head *bitmap_bh;
+ struct buffer_head *bitmap_bh = NULL;
struct inode *inode = NULL;
- long err = -EIO;
+ int err = -EFSCORRUPTED;
- /* Error cases - e2fsck has already cleaned up for us */
- if (ino > max_ino) {
- ext4_warning(sb, "bad orphan ino %lu! e2fsck was run?", ino);
- err = -EFSCORRUPTED;
- goto error;
- }
+ if (ino < EXT4_FIRST_INO(sb) || ino > max_ino)
+ goto bad_orphan;
block_group = (ino - 1) / EXT4_INODES_PER_GROUP(sb);
bit = (ino - 1) % EXT4_INODES_PER_GROUP(sb);
bitmap_bh = ext4_read_inode_bitmap(sb, block_group);
if (IS_ERR(bitmap_bh)) {
- err = PTR_ERR(bitmap_bh);
- ext4_warning(sb, "inode bitmap error %ld for orphan %lu",
- ino, err);
- goto error;
+ ext4_error(sb, "inode bitmap error %ld for orphan %lu",
+ ino, PTR_ERR(bitmap_bh));
+ return (struct inode *) bitmap_bh;
}
/* Having the inode bit set should be a 100% indicator that this
@@ -1179,15 +1174,21 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
goto bad_orphan;
inode = ext4_iget(sb, ino);
- if (IS_ERR(inode))
- goto iget_failed;
+ if (IS_ERR(inode)) {
+ err = PTR_ERR(inode);
+ ext4_error(sb, "couldn't read orphan inode %lu (err %d)",
+ ino, err);
+ return inode;
+ }
/*
- * If the orphans has i_nlinks > 0 then it should be able to be
- * truncated, otherwise it won't be removed from the orphan list
- * during processing and an infinite loop will result.
+ * If the orphans has i_nlinks > 0 then it should be able to
+ * be truncated, otherwise it won't be removed from the orphan
+ * list during processing and an infinite loop will result.
+ * Similarly, it must not be a bad inode.
*/
- if (inode->i_nlink && !ext4_can_truncate(inode))
+ if ((inode->i_nlink && !ext4_can_truncate(inode)) ||
+ is_bad_inode(inode))
goto bad_orphan;
if (NEXT_ORPHAN(inode) > max_ino)
@@ -1195,29 +1196,25 @@ struct inode *ext4_orphan_get(struct super_block *sb, unsigned long ino)
brelse(bitmap_bh);
return inode;
-iget_failed:
- err = PTR_ERR(inode);
- inode = NULL;
bad_orphan:
- ext4_warning(sb, "bad orphan inode %lu! e2fsck was run?", ino);
- printk(KERN_WARNING "ext4_test_bit(bit=%d, block=%llu) = %d\n",
- bit, (unsigned long long)bitmap_bh->b_blocknr,
- ext4_test_bit(bit, bitmap_bh->b_data));
- printk(KERN_WARNING "inode=%p\n", inode);
+ ext4_error(sb, "bad orphan inode %lu", ino);
+ if (bitmap_bh)
+ printk(KERN_ERR "ext4_test_bit(bit=%d, block=%llu) = %d\n",
+ bit, (unsigned long long)bitmap_bh->b_blocknr,
+ ext4_test_bit(bit, bitmap_bh->b_data));
if (inode) {
- printk(KERN_WARNING "is_bad_inode(inode)=%d\n",
+ printk(KERN_ERR "is_bad_inode(inode)=%d\n",
is_bad_inode(inode));
- printk(KERN_WARNING "NEXT_ORPHAN(inode)=%u\n",
+ printk(KERN_ERR "NEXT_ORPHAN(inode)=%u\n",
NEXT_ORPHAN(inode));
- printk(KERN_WARNING "max_ino=%lu\n", max_ino);
- printk(KERN_WARNING "i_nlink=%u\n", inode->i_nlink);
+ printk(KERN_ERR "max_ino=%lu\n", max_ino);
+ printk(KERN_ERR "i_nlink=%u\n", inode->i_nlink);
/* Avoid freeing blocks if we got a bad deleted inode */
if (inode->i_nlink == 0)
inode->i_blocks = 0;
iput(inode);
}
brelse(bitmap_bh);
-error:
return ERR_PTR(err);
}
diff --git a/fs/ext4/indirect.c b/fs/ext4/indirect.c
index 355ef9c36..3027fa681 100644
--- a/fs/ext4/indirect.c
+++ b/fs/ext4/indirect.c
@@ -555,8 +555,23 @@ int ext4_ind_map_blocks(handle_t *handle, struct inode *inode,
goto got_it;
}
- /* Next simple case - plain lookup or failed read of indirect block */
- if ((flags & EXT4_GET_BLOCKS_CREATE) == 0 || err == -EIO)
+ /* Next simple case - plain lookup failed */
+ if ((flags & EXT4_GET_BLOCKS_CREATE) == 0) {
+ unsigned epb = inode->i_sb->s_blocksize / sizeof(u32);
+ int i;
+
+ /* Count number blocks in a subtree under 'partial' */
+ count = 1;
+ for (i = 0; partial + i != chain + depth - 1; i++)
+ count *= epb;
+ /* Fill in size of a hole we found */
+ map->m_pblk = 0;
+ map->m_len = min_t(unsigned int, map->m_len, count);
+ goto cleanup;
+ }
+
+ /* Failed read of indirect block */
+ if (err == -EIO)
goto cleanup;
/*
@@ -693,21 +708,21 @@ retry:
}
if (IS_DAX(inode))
ret = dax_do_io(iocb, inode, iter, offset,
- ext4_get_block, NULL, 0);
+ ext4_dio_get_block, NULL, 0);
else
ret = __blockdev_direct_IO(iocb, inode,
inode->i_sb->s_bdev, iter,
- offset, ext4_get_block, NULL,
- NULL, 0);
+ offset, ext4_dio_get_block,
+ NULL, NULL, 0);
inode_dio_end(inode);
} else {
locked:
if (IS_DAX(inode))
ret = dax_do_io(iocb, inode, iter, offset,
- ext4_get_block, NULL, DIO_LOCKING);
+ ext4_dio_get_block, NULL, DIO_LOCKING);
else
ret = blockdev_direct_IO(iocb, inode, iter, offset,
- ext4_get_block);
+ ext4_dio_get_block);
if (unlikely(iov_iter_rw(iter) == WRITE && ret < 0)) {
loff_t isize = i_size_read(inode);
diff --git a/fs/ext4/inline.c b/fs/ext4/inline.c
index dfe3b9baf..7bc6c855c 100644
--- a/fs/ext4/inline.c
+++ b/fs/ext4/inline.c
@@ -482,7 +482,7 @@ static int ext4_read_inline_page(struct inode *inode, struct page *page)
ret = ext4_read_inline_data(inode, kaddr, len, &iloc);
flush_dcache_page(page);
kunmap_atomic(kaddr);
- zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ zero_user_segment(page, len, PAGE_SIZE);
SetPageUptodate(page);
brelse(iloc.bh);
@@ -507,7 +507,7 @@ int ext4_readpage_inline(struct inode *inode, struct page *page)
if (!page->index)
ret = ext4_read_inline_page(inode, page);
else if (!PageUptodate(page)) {
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
}
@@ -581,9 +581,10 @@ retry:
if (ret)
goto out;
- if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(page, from, to, ext4_get_block_write);
- else
+ if (ext4_should_dioread_nolock(inode)) {
+ ret = __block_write_begin(page, from, to,
+ ext4_get_block_unwritten);
+ } else
ret = __block_write_begin(page, from, to, ext4_get_block);
if (!ret && ext4_should_journal_data(inode)) {
@@ -594,7 +595,7 @@ retry:
if (ret) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
page = NULL;
ext4_orphan_add(handle, inode);
up_write(&EXT4_I(inode)->xattr_sem);
@@ -620,7 +621,7 @@ retry:
out:
if (page) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
if (sem_held)
up_write(&EXT4_I(inode)->xattr_sem);
@@ -689,7 +690,7 @@ int ext4_try_to_write_inline_data(struct address_space *mapping,
if (!ext4_has_inline_data(inode)) {
ret = 0;
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
goto out_up_read;
}
@@ -814,7 +815,7 @@ static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping,
if (ret) {
up_read(&EXT4_I(inode)->xattr_sem);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
ext4_truncate_failed_write(inode);
return ret;
}
@@ -828,7 +829,7 @@ out:
up_read(&EXT4_I(inode)->xattr_sem);
if (page) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
return ret;
}
@@ -918,7 +919,7 @@ retry_journal:
out_release_page:
up_read(&EXT4_I(inode)->xattr_sem);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
out_journal:
ext4_journal_stop(handle);
out:
@@ -946,7 +947,7 @@ int ext4_da_write_inline_data_end(struct inode *inode, loff_t pos,
i_size_changed = 1;
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
/*
* Don't mark the inode dirty under page lock. First, it unnecessarily
@@ -1696,7 +1697,6 @@ int ext4_delete_inline_entry(handle_t *handle,
if (err)
goto out;
- BUFFER_TRACE(bh, "call ext4_handle_dirty_metadata");
err = ext4_mark_inode_dirty(handle, dir);
if (unlikely(err))
goto out;
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index e6218cbc8..250c2df04 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -216,7 +216,6 @@ void ext4_evict_inode(struct inode *inode)
}
truncate_inode_pages_final(&inode->i_data);
- WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
goto no_delete;
}
@@ -228,8 +227,6 @@ void ext4_evict_inode(struct inode *inode)
ext4_begin_ordered_truncate(inode, 0);
truncate_inode_pages_final(&inode->i_data);
- WARN_ON(atomic_read(&EXT4_I(inode)->i_ioend_count));
-
/*
* Protect us against freezing - iput() caller didn't have to have any
* protection against it
@@ -458,13 +455,13 @@ static void ext4_map_blocks_es_recheck(handle_t *handle,
* Otherwise, call with ext4_ind_map_blocks() to handle indirect mapping
* based files
*
- * On success, it returns the number of blocks being mapped or allocated.
- * if create==0 and the blocks are pre-allocated and unwritten block,
- * the result buffer head is unmapped. If the create ==1, it will make sure
- * the buffer head is mapped.
+ * On success, it returns the number of blocks being mapped or allocated. if
+ * create==0 and the blocks are pre-allocated and unwritten, the resulting @map
+ * is marked as unwritten. If the create == 1, it will mark @map as mapped.
*
* It returns 0 if plain look up failed (blocks have not been allocated), in
- * that case, buffer head is unmapped
+ * that case, @map is returned as unmapped but we still do fill map->m_len to
+ * indicate the length of a hole starting at map->m_lblk.
*
* It returns the error in case of allocation failure.
*/
@@ -507,6 +504,11 @@ int ext4_map_blocks(handle_t *handle, struct inode *inode,
retval = map->m_len;
map->m_len = retval;
} else if (ext4_es_is_delayed(&es) || ext4_es_is_hole(&es)) {
+ map->m_pblk = 0;
+ retval = es.es_len - (map->m_lblk - es.es_lblk);
+ if (retval > map->m_len)
+ retval = map->m_len;
+ map->m_len = retval;
retval = 0;
} else {
BUG_ON(1);
@@ -682,6 +684,21 @@ out_sem:
ret = check_block_validity(inode, map);
if (ret != 0)
return ret;
+
+ /*
+ * Inodes with freshly allocated blocks where contents will be
+ * visible after transaction commit must be on transaction's
+ * ordered data list.
+ */
+ if (map->m_flags & EXT4_MAP_NEW &&
+ !(map->m_flags & EXT4_MAP_UNWRITTEN) &&
+ !(flags & EXT4_GET_BLOCKS_ZERO) &&
+ !IS_NOQUOTA(inode) &&
+ ext4_should_order_data(inode)) {
+ ret = ext4_jbd2_file_inode(handle, inode);
+ if (ret)
+ return ret;
+ }
}
return retval;
}
@@ -714,16 +731,11 @@ static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
cmpxchg(&bh->b_state, old_state, new_state) != old_state));
}
-/* Maximum number of blocks we map for direct IO at once. */
-#define DIO_MAX_BLOCKS 4096
-
static int _ext4_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh, int flags)
{
- handle_t *handle = ext4_journal_current_handle();
struct ext4_map_blocks map;
- int ret = 0, started = 0;
- int dio_credits;
+ int ret = 0;
if (ext4_has_inline_data(inode))
return -ERANGE;
@@ -731,33 +743,14 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
- if (flags && !handle) {
- /* Direct IO write... */
- if (map.m_len > DIO_MAX_BLOCKS)
- map.m_len = DIO_MAX_BLOCKS;
- dio_credits = ext4_chunk_trans_blocks(inode, map.m_len);
- handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS,
- dio_credits);
- if (IS_ERR(handle)) {
- ret = PTR_ERR(handle);
- return ret;
- }
- started = 1;
- }
-
- ret = ext4_map_blocks(handle, inode, &map, flags);
+ ret = ext4_map_blocks(ext4_journal_current_handle(), inode, &map,
+ flags);
if (ret > 0) {
- ext4_io_end_t *io_end = ext4_inode_aio(inode);
-
map_bh(bh, inode->i_sb, map.m_pblk);
ext4_update_bh_state(bh, map.m_flags);
- if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
- set_buffer_defer_completion(bh);
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
ret = 0;
}
- if (started)
- ext4_journal_stop(handle);
return ret;
}
@@ -769,6 +762,153 @@ int ext4_get_block(struct inode *inode, sector_t iblock,
}
/*
+ * Get block function used when preparing for buffered write if we require
+ * creating an unwritten extent if blocks haven't been allocated. The extent
+ * will be converted to written after the IO is complete.
+ */
+int ext4_get_block_unwritten(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ ext4_debug("ext4_get_block_unwritten: inode %lu, create flag %d\n",
+ inode->i_ino, create);
+ return _ext4_get_block(inode, iblock, bh_result,
+ EXT4_GET_BLOCKS_IO_CREATE_EXT);
+}
+
+/* Maximum number of blocks we map for direct IO at once. */
+#define DIO_MAX_BLOCKS 4096
+
+/*
+ * Get blocks function for the cases that need to start a transaction -
+ * generally difference cases of direct IO and DAX IO. It also handles retries
+ * in case of ENOSPC.
+ */
+static int ext4_get_block_trans(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int flags)
+{
+ int dio_credits;
+ handle_t *handle;
+ int retries = 0;
+ int ret;
+
+ /* Trim mapping request to maximum we can map at once for DIO */
+ if (bh_result->b_size >> inode->i_blkbits > DIO_MAX_BLOCKS)
+ bh_result->b_size = DIO_MAX_BLOCKS << inode->i_blkbits;
+ dio_credits = ext4_chunk_trans_blocks(inode,
+ bh_result->b_size >> inode->i_blkbits);
+retry:
+ handle = ext4_journal_start(inode, EXT4_HT_MAP_BLOCKS, dio_credits);
+ if (IS_ERR(handle))
+ return PTR_ERR(handle);
+
+ ret = _ext4_get_block(inode, iblock, bh_result, flags);
+ ext4_journal_stop(handle);
+
+ if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
+ goto retry;
+ return ret;
+}
+
+/* Get block function for DIO reads and writes to inodes without extents */
+int ext4_dio_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh, int create)
+{
+ /* We don't expect handle for direct IO */
+ WARN_ON_ONCE(ext4_journal_current_handle());
+
+ if (!create)
+ return _ext4_get_block(inode, iblock, bh, 0);
+ return ext4_get_block_trans(inode, iblock, bh, EXT4_GET_BLOCKS_CREATE);
+}
+
+/*
+ * Get block function for AIO DIO writes when we create unwritten extent if
+ * blocks are not allocated yet. The extent will be converted to written
+ * after IO is complete.
+ */
+static int ext4_dio_get_block_unwritten_async(struct inode *inode,
+ sector_t iblock, struct buffer_head *bh_result, int create)
+{
+ int ret;
+
+ /* We don't expect handle for direct IO */
+ WARN_ON_ONCE(ext4_journal_current_handle());
+
+ ret = ext4_get_block_trans(inode, iblock, bh_result,
+ EXT4_GET_BLOCKS_IO_CREATE_EXT);
+
+ /*
+ * When doing DIO using unwritten extents, we need io_end to convert
+ * unwritten extents to written on IO completion. We allocate io_end
+ * once we spot unwritten extent and store it in b_private. Generic
+ * DIO code keeps b_private set and furthermore passes the value to
+ * our completion callback in 'private' argument.
+ */
+ if (!ret && buffer_unwritten(bh_result)) {
+ if (!bh_result->b_private) {
+ ext4_io_end_t *io_end;
+
+ io_end = ext4_init_io_end(inode, GFP_KERNEL);
+ if (!io_end)
+ return -ENOMEM;
+ bh_result->b_private = io_end;
+ ext4_set_io_unwritten_flag(inode, io_end);
+ }
+ set_buffer_defer_completion(bh_result);
+ }
+
+ return ret;
+}
+
+/*
+ * Get block function for non-AIO DIO writes when we create unwritten extent if
+ * blocks are not allocated yet. The extent will be converted to written
+ * after IO is complete from ext4_ext_direct_IO() function.
+ */
+static int ext4_dio_get_block_unwritten_sync(struct inode *inode,
+ sector_t iblock, struct buffer_head *bh_result, int create)
+{
+ int ret;
+
+ /* We don't expect handle for direct IO */
+ WARN_ON_ONCE(ext4_journal_current_handle());
+
+ ret = ext4_get_block_trans(inode, iblock, bh_result,
+ EXT4_GET_BLOCKS_IO_CREATE_EXT);
+
+ /*
+ * Mark inode as having pending DIO writes to unwritten extents.
+ * ext4_ext_direct_IO() checks this flag and converts extents to
+ * written.
+ */
+ if (!ret && buffer_unwritten(bh_result))
+ ext4_set_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
+
+ return ret;
+}
+
+static int ext4_dio_get_block_overwrite(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ int ret;
+
+ ext4_debug("ext4_dio_get_block_overwrite: inode %lu, create flag %d\n",
+ inode->i_ino, create);
+ /* We don't expect handle for direct IO */
+ WARN_ON_ONCE(ext4_journal_current_handle());
+
+ ret = _ext4_get_block(inode, iblock, bh_result, 0);
+ /*
+ * Blocks should have been preallocated! ext4_file_write_iter() checks
+ * that.
+ */
+ WARN_ON_ONCE(!buffer_mapped(bh_result) || buffer_unwritten(bh_result));
+
+ return ret;
+}
+
+
+/*
* `handle' can be NULL if create is zero
*/
struct buffer_head *ext4_getblk(handle_t *handle, struct inode *inode,
@@ -930,7 +1070,7 @@ int do_journal_get_write_access(handle_t *handle,
static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
get_block_t *get_block)
{
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned from = pos & (PAGE_SIZE - 1);
unsigned to = from + len;
struct inode *inode = page->mapping->host;
unsigned block_start, block_end;
@@ -942,15 +1082,15 @@ static int ext4_block_write_begin(struct page *page, loff_t pos, unsigned len,
bool decrypt = false;
BUG_ON(!PageLocked(page));
- BUG_ON(from > PAGE_CACHE_SIZE);
- BUG_ON(to > PAGE_CACHE_SIZE);
+ BUG_ON(from > PAGE_SIZE);
+ BUG_ON(to > PAGE_SIZE);
BUG_ON(from > to);
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
head = page_buffers(page);
bbits = ilog2(blocksize);
- block = (sector_t)page->index << (PAGE_CACHE_SHIFT - bbits);
+ block = (sector_t)page->index << (PAGE_SHIFT - bbits);
for (bh = head, block_start = 0; bh != head || !block_start;
block++, block_start = block_end, bh = bh->b_this_page) {
@@ -1032,8 +1172,8 @@ static int ext4_write_begin(struct file *file, struct address_space *mapping,
* we allocate blocks but write fails for some reason
*/
needed_blocks = ext4_writepage_trans_blocks(inode) + 1;
- index = pos >> PAGE_CACHE_SHIFT;
- from = pos & (PAGE_CACHE_SIZE - 1);
+ index = pos >> PAGE_SHIFT;
+ from = pos & (PAGE_SIZE - 1);
to = from + len;
if (ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) {
@@ -1061,7 +1201,7 @@ retry_grab:
retry_journal:
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks);
if (IS_ERR(handle)) {
- page_cache_release(page);
+ put_page(page);
return PTR_ERR(handle);
}
@@ -1069,7 +1209,7 @@ retry_journal:
if (page->mapping != mapping) {
/* The page got truncated from under us */
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
ext4_journal_stop(handle);
goto retry_grab;
}
@@ -1079,13 +1219,14 @@ retry_journal:
#ifdef CONFIG_EXT4_FS_ENCRYPTION
if (ext4_should_dioread_nolock(inode))
ret = ext4_block_write_begin(page, pos, len,
- ext4_get_block_write);
+ ext4_get_block_unwritten);
else
ret = ext4_block_write_begin(page, pos, len,
ext4_get_block);
#else
if (ext4_should_dioread_nolock(inode))
- ret = __block_write_begin(page, pos, len, ext4_get_block_write);
+ ret = __block_write_begin(page, pos, len,
+ ext4_get_block_unwritten);
else
ret = __block_write_begin(page, pos, len, ext4_get_block);
#endif
@@ -1124,7 +1265,7 @@ retry_journal:
if (ret == -ENOSPC &&
ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_journal;
- page_cache_release(page);
+ put_page(page);
return ret;
}
*pagep = page;
@@ -1163,15 +1304,6 @@ static int ext4_write_end(struct file *file,
int i_size_changed = 0;
trace_ext4_write_end(inode, pos, len, copied);
- if (ext4_test_inode_state(inode, EXT4_STATE_ORDERED_MODE)) {
- ret = ext4_jbd2_file_inode(handle, inode);
- if (ret) {
- unlock_page(page);
- page_cache_release(page);
- goto errout;
- }
- }
-
if (ext4_has_inline_data(inode)) {
ret = ext4_write_inline_data_end(inode, pos, len,
copied, page);
@@ -1187,7 +1319,7 @@ static int ext4_write_end(struct file *file,
*/
i_size_changed = ext4_update_inode_size(inode, pos + copied);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
@@ -1271,7 +1403,7 @@ static int ext4_journalled_write_end(struct file *file,
int size_changed = 0;
trace_ext4_journalled_write_end(inode, pos, len, copied);
- from = pos & (PAGE_CACHE_SIZE - 1);
+ from = pos & (PAGE_SIZE - 1);
to = from + len;
BUG_ON(!ext4_handle_valid(handle));
@@ -1295,7 +1427,7 @@ static int ext4_journalled_write_end(struct file *file,
ext4_set_inode_state(inode, EXT4_STATE_JDATA);
EXT4_I(inode)->i_datasync_tid = handle->h_transaction->t_tid;
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (old_size < pos)
pagecache_isize_extended(inode, old_size, pos);
@@ -1409,7 +1541,7 @@ static void ext4_da_page_release_reservation(struct page *page,
int num_clusters;
ext4_fsblk_t lblk;
- BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+ BUG_ON(stop > PAGE_SIZE || stop < length);
head = page_buffers(page);
bh = head;
@@ -1425,7 +1557,7 @@ static void ext4_da_page_release_reservation(struct page *page,
clear_buffer_delay(bh);
} else if (contiguous_blks) {
lblk = page->index <<
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ (PAGE_SHIFT - inode->i_blkbits);
lblk += (curr_off >> inode->i_blkbits) -
contiguous_blks;
ext4_es_remove_extent(inode, lblk, contiguous_blks);
@@ -1435,7 +1567,7 @@ static void ext4_da_page_release_reservation(struct page *page,
} while ((bh = bh->b_this_page) != head);
if (contiguous_blks) {
- lblk = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ lblk = page->index << (PAGE_SHIFT - inode->i_blkbits);
lblk += (curr_off >> inode->i_blkbits) - contiguous_blks;
ext4_es_remove_extent(inode, lblk, contiguous_blks);
}
@@ -1444,7 +1576,7 @@ static void ext4_da_page_release_reservation(struct page *page,
* need to release the reserved space for that cluster. */
num_clusters = EXT4_NUM_B2C(sbi, to_release);
while (num_clusters > 0) {
- lblk = (page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits)) +
+ lblk = (page->index << (PAGE_SHIFT - inode->i_blkbits)) +
((num_clusters - 1) << sbi->s_cluster_bits);
if (sbi->s_cluster_ratio == 1 ||
!ext4_find_delalloc_cluster(inode, lblk))
@@ -1491,8 +1623,8 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
end = mpd->next_page - 1;
if (invalidate) {
ext4_lblk_t start, last;
- start = index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
- last = end << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ start = index << (PAGE_SHIFT - inode->i_blkbits);
+ last = end << (PAGE_SHIFT - inode->i_blkbits);
ext4_es_remove_extent(inode, start, last - start + 1);
}
@@ -1508,7 +1640,7 @@ static void mpage_release_unused_pages(struct mpage_da_data *mpd,
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
if (invalidate) {
- block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ block_invalidatepage(page, 0, PAGE_SIZE);
ClearPageUptodate(page);
}
unlock_page(page);
@@ -1879,10 +2011,10 @@ static int ext4_writepage(struct page *page,
trace_ext4_writepage(page);
size = i_size_read(inode);
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
+ if (page->index == size >> PAGE_SHIFT)
+ len = size & ~PAGE_MASK;
else
- len = PAGE_CACHE_SIZE;
+ len = PAGE_SIZE;
page_bufs = page_buffers(page);
/*
@@ -1906,7 +2038,7 @@ static int ext4_writepage(struct page *page,
ext4_bh_delay_or_unwritten)) {
redirty_page_for_writepage(wbc, page);
if ((current->flags & PF_MEMALLOC) ||
- (inode->i_sb->s_blocksize == PAGE_CACHE_SIZE)) {
+ (inode->i_sb->s_blocksize == PAGE_SIZE)) {
/*
* For memory cleaning there's no point in writing only
* some buffers. So just bail out. Warn if we came here
@@ -1948,10 +2080,10 @@ static int mpage_submit_page(struct mpage_da_data *mpd, struct page *page)
int err;
BUG_ON(page->index != mpd->first_page);
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
+ if (page->index == size >> PAGE_SHIFT)
+ len = size & ~PAGE_MASK;
else
- len = PAGE_CACHE_SIZE;
+ len = PAGE_SIZE;
clear_page_dirty_for_io(page);
err = ext4_bio_write_page(&mpd->io_submit, page, len, mpd->wbc, false);
if (!err)
@@ -2085,7 +2217,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
int nr_pages, i;
struct inode *inode = mpd->inode;
struct buffer_head *head, *bh;
- int bpp_bits = PAGE_CACHE_SHIFT - inode->i_blkbits;
+ int bpp_bits = PAGE_SHIFT - inode->i_blkbits;
pgoff_t start, end;
ext4_lblk_t lblk;
sector_t pblock;
@@ -2146,7 +2278,7 @@ static int mpage_map_and_submit_buffers(struct mpage_da_data *mpd)
* supports blocksize < pagesize as we will try to
* convert potentially unmapped parts of inode.
*/
- mpd->io_submit.io_end->size += PAGE_CACHE_SIZE;
+ mpd->io_submit.io_end->size += PAGE_SIZE;
/* Page fully mapped - let IO run! */
err = mpage_submit_page(mpd, page);
if (err < 0) {
@@ -2298,7 +2430,7 @@ update_disksize:
* Update on-disk size after IO is submitted. Races with
* truncate are avoided by checking i_size under i_data_sem.
*/
- disksize = ((loff_t)mpd->first_page) << PAGE_CACHE_SHIFT;
+ disksize = ((loff_t)mpd->first_page) << PAGE_SHIFT;
if (disksize > EXT4_I(inode)->i_disksize) {
int err2;
loff_t i_size;
@@ -2434,7 +2566,7 @@ static int mpage_prepare_extent_to_map(struct mpage_da_data *mpd)
mpd->next_page = page->index + 1;
/* Add all dirty buffers to mpd */
lblk = ((ext4_lblk_t)page->index) <<
- (PAGE_CACHE_SHIFT - blkbits);
+ (PAGE_SHIFT - blkbits);
head = page_buffers(page);
err = mpage_process_page_bufs(mpd, head, head, lblk);
if (err <= 0)
@@ -2519,7 +2651,7 @@ static int ext4_writepages(struct address_space *mapping,
* We may need to convert up to one extent per block in
* the page and we may dirty the inode.
*/
- rsv_blocks = 1 + (PAGE_CACHE_SIZE >> inode->i_blkbits);
+ rsv_blocks = 1 + (PAGE_SIZE >> inode->i_blkbits);
}
/*
@@ -2550,8 +2682,8 @@ static int ext4_writepages(struct address_space *mapping,
mpd.first_page = writeback_index;
mpd.last_page = -1;
} else {
- mpd.first_page = wbc->range_start >> PAGE_CACHE_SHIFT;
- mpd.last_page = wbc->range_end >> PAGE_CACHE_SHIFT;
+ mpd.first_page = wbc->range_start >> PAGE_SHIFT;
+ mpd.last_page = wbc->range_end >> PAGE_SHIFT;
}
mpd.inode = inode;
@@ -2710,7 +2842,7 @@ static int ext4_da_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
handle_t *handle;
- index = pos >> PAGE_CACHE_SHIFT;
+ index = pos >> PAGE_SHIFT;
if (ext4_nonda_switch(inode->i_sb)) {
*fsdata = (void *)FALL_BACK_TO_NONDELALLOC;
@@ -2753,7 +2885,7 @@ retry_journal:
handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE,
ext4_da_write_credits(inode, pos, len));
if (IS_ERR(handle)) {
- page_cache_release(page);
+ put_page(page);
return PTR_ERR(handle);
}
@@ -2761,7 +2893,7 @@ retry_journal:
if (page->mapping != mapping) {
/* The page got truncated from under us */
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
ext4_journal_stop(handle);
goto retry_grab;
}
@@ -2789,7 +2921,7 @@ retry_journal:
ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry_journal;
- page_cache_release(page);
+ put_page(page);
return ret;
}
@@ -2837,7 +2969,7 @@ static int ext4_da_write_end(struct file *file,
len, copied, page, fsdata);
trace_ext4_da_write_end(inode, pos, len, copied);
- start = pos & (PAGE_CACHE_SIZE - 1);
+ start = pos & (PAGE_SIZE - 1);
end = start + copied - 1;
/*
@@ -3059,7 +3191,7 @@ static int __ext4_journalled_invalidatepage(struct page *page,
/*
* If it's a full truncate we just forget about the pending dirtying
*/
- if (offset == 0 && length == PAGE_CACHE_SIZE)
+ if (offset == 0 && length == PAGE_SIZE)
ClearPageChecked(page);
return jbd2_journal_invalidatepage(journal, page, offset, length);
@@ -3088,37 +3220,6 @@ static int ext4_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
-/*
- * ext4_get_block used when preparing for a DIO write or buffer write.
- * We allocate an uinitialized extent if blocks haven't been allocated.
- * The extent will be converted to initialized after the IO is complete.
- */
-int ext4_get_block_write(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- ext4_debug("ext4_get_block_write: inode %lu, create flag %d\n",
- inode->i_ino, create);
- return _ext4_get_block(inode, iblock, bh_result,
- EXT4_GET_BLOCKS_IO_CREATE_EXT);
-}
-
-static int ext4_get_block_overwrite(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- int ret;
-
- ext4_debug("ext4_get_block_overwrite: inode %lu, create flag %d\n",
- inode->i_ino, create);
- ret = _ext4_get_block(inode, iblock, bh_result, 0);
- /*
- * Blocks should have been preallocated! ext4_file_write_iter() checks
- * that.
- */
- WARN_ON_ONCE(!buffer_mapped(bh_result));
-
- return ret;
-}
-
#ifdef CONFIG_FS_DAX
int ext4_dax_mmap_get_block(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
@@ -3179,13 +3280,12 @@ out:
WARN_ON_ONCE(ret == 0 && create);
if (ret > 0) {
map_bh(bh_result, inode->i_sb, map.m_pblk);
- bh_result->b_state = (bh_result->b_state & ~EXT4_MAP_FLAGS) |
- map.m_flags;
/*
* At least for now we have to clear BH_New so that DAX code
* doesn't attempt to zero blocks again in a racy way.
*/
- bh_result->b_state &= ~(1 << BH_New);
+ map.m_flags &= ~EXT4_MAP_NEW;
+ ext4_update_bh_state(bh_result, map.m_flags);
bh_result->b_size = map.m_len << inode->i_blkbits;
ret = 0;
}
@@ -3193,24 +3293,32 @@ out:
}
#endif
-static void ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
+static int ext4_end_io_dio(struct kiocb *iocb, loff_t offset,
ssize_t size, void *private)
{
- ext4_io_end_t *io_end = iocb->private;
+ ext4_io_end_t *io_end = private;
/* if not async direct IO just return */
if (!io_end)
- return;
+ return 0;
ext_debug("ext4_end_io_dio(): io_end 0x%p "
"for inode %lu, iocb 0x%p, offset %llu, size %zd\n",
- iocb->private, io_end->inode->i_ino, iocb, offset,
- size);
+ io_end, io_end->inode->i_ino, iocb, offset, size);
- iocb->private = NULL;
+ /*
+ * Error during AIO DIO. We cannot convert unwritten extents as the
+ * data was not written. Just clear the unwritten flag and drop io_end.
+ */
+ if (size <= 0) {
+ ext4_clear_io_unwritten_flag(io_end);
+ size = 0;
+ }
io_end->offset = offset;
io_end->size = size;
ext4_put_io_end(io_end);
+
+ return 0;
}
/*
@@ -3243,7 +3351,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
get_block_t *get_block_func = NULL;
int dio_flags = 0;
loff_t final_size = offset + count;
- ext4_io_end_t *io_end = NULL;
/* Use the old path for reads and writes beyond i_size. */
if (iov_iter_rw(iter) != WRITE || final_size > inode->i_size)
@@ -3268,16 +3375,17 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
/*
* We could direct write to holes and fallocate.
*
- * Allocated blocks to fill the hole are marked as
- * unwritten to prevent parallel buffered read to expose
- * the stale data before DIO complete the data IO.
+ * Allocated blocks to fill the hole are marked as unwritten to prevent
+ * parallel buffered read to expose the stale data before DIO complete
+ * the data IO.
*
- * As to previously fallocated extents, ext4 get_block will
- * just simply mark the buffer mapped but still keep the
- * extents unwritten.
+ * As to previously fallocated extents, ext4 get_block will just simply
+ * mark the buffer mapped but still keep the extents unwritten.
*
- * For non AIO case, we will convert those unwritten extents
- * to written after return back from blockdev_direct_IO.
+ * For non AIO case, we will convert those unwritten extents to written
+ * after return back from blockdev_direct_IO. That way we save us from
+ * allocating io_end structure and also the overhead of offloading
+ * the extent convertion to a workqueue.
*
* For async DIO, the conversion needs to be deferred when the
* IO is completed. The ext4 end_io callback function will be
@@ -3285,30 +3393,13 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* case, we allocate an io_end structure to hook to the iocb.
*/
iocb->private = NULL;
- if (overwrite) {
- get_block_func = ext4_get_block_overwrite;
+ if (overwrite)
+ get_block_func = ext4_dio_get_block_overwrite;
+ else if (is_sync_kiocb(iocb)) {
+ get_block_func = ext4_dio_get_block_unwritten_sync;
+ dio_flags = DIO_LOCKING;
} else {
- ext4_inode_aio_set(inode, NULL);
- if (!is_sync_kiocb(iocb)) {
- io_end = ext4_init_io_end(inode, GFP_NOFS);
- if (!io_end) {
- ret = -ENOMEM;
- goto retake_lock;
- }
- /*
- * Grab reference for DIO. Will be dropped in
- * ext4_end_io_dio()
- */
- iocb->private = ext4_get_io_end(io_end);
- /*
- * we save the io structure for current async direct
- * IO, so that later ext4_map_blocks() could flag the
- * io structure whether there is a unwritten extents
- * needs to be converted when IO is completed.
- */
- ext4_inode_aio_set(inode, io_end);
- }
- get_block_func = ext4_get_block_write;
+ get_block_func = ext4_dio_get_block_unwritten_async;
dio_flags = DIO_LOCKING;
}
#ifdef CONFIG_EXT4_FS_ENCRYPTION
@@ -3323,27 +3414,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
get_block_func,
ext4_end_io_dio, NULL, dio_flags);
- /*
- * Put our reference to io_end. This can free the io_end structure e.g.
- * in sync IO case or in case of error. It can even perform extent
- * conversion if all bios we submitted finished before we got here.
- * Note that in that case iocb->private can be already set to NULL
- * here.
- */
- if (io_end) {
- ext4_inode_aio_set(inode, NULL);
- ext4_put_io_end(io_end);
- /*
- * When no IO was submitted ext4_end_io_dio() was not
- * called so we have to put iocb's reference.
- */
- if (ret <= 0 && ret != -EIOCBQUEUED && iocb->private) {
- WARN_ON(iocb->private != io_end);
- WARN_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
- ext4_put_io_end(io_end);
- iocb->private = NULL;
- }
- }
if (ret > 0 && !overwrite && ext4_test_inode_state(inode,
EXT4_STATE_DIO_UNWRITTEN)) {
int err;
@@ -3358,7 +3428,6 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
ext4_clear_inode_state(inode, EXT4_STATE_DIO_UNWRITTEN);
}
-retake_lock:
if (iov_iter_rw(iter) == WRITE)
inode_dio_end(inode);
/* take i_mutex locking again if we do a ovewrite dio */
@@ -3491,8 +3560,8 @@ void ext4_set_aops(struct inode *inode)
static int __ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length)
{
- ext4_fsblk_t index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ ext4_fsblk_t index = from >> PAGE_SHIFT;
+ unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize, pos;
ext4_lblk_t iblock;
struct inode *inode = mapping->host;
@@ -3500,14 +3569,14 @@ static int __ext4_block_zero_page_range(handle_t *handle,
struct page *page;
int err = 0;
- page = find_or_create_page(mapping, from >> PAGE_CACHE_SHIFT,
+ page = find_or_create_page(mapping, from >> PAGE_SHIFT,
mapping_gfp_constraint(mapping, ~__GFP_FS));
if (!page)
return -ENOMEM;
blocksize = inode->i_sb->s_blocksize;
- iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+ iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
@@ -3549,7 +3618,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
ext4_encrypted_inode(inode)) {
/* We expect the key to be set. */
BUG_ON(!ext4_has_encryption_key(inode));
- BUG_ON(blocksize != PAGE_CACHE_SIZE);
+ BUG_ON(blocksize != PAGE_SIZE);
WARN_ON_ONCE(ext4_decrypt(page));
}
}
@@ -3573,7 +3642,7 @@ static int __ext4_block_zero_page_range(handle_t *handle,
unlock:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return err;
}
@@ -3588,7 +3657,7 @@ static int ext4_block_zero_page_range(handle_t *handle,
struct address_space *mapping, loff_t from, loff_t length)
{
struct inode *inode = mapping->host;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize = inode->i_sb->s_blocksize;
unsigned max = blocksize - (offset & (blocksize - 1));
@@ -3613,7 +3682,7 @@ static int ext4_block_zero_page_range(handle_t *handle,
static int ext4_block_truncate_page(handle_t *handle,
struct address_space *mapping, loff_t from)
{
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned offset = from & (PAGE_SIZE-1);
unsigned length;
unsigned blocksize;
struct inode *inode = mapping->host;
@@ -3751,7 +3820,7 @@ int ext4_punch_hole(struct inode *inode, loff_t offset, loff_t length)
*/
if (offset + length > inode->i_size) {
length = inode->i_size +
- PAGE_CACHE_SIZE - (inode->i_size & (PAGE_CACHE_SIZE - 1)) -
+ PAGE_SIZE - (inode->i_size & (PAGE_SIZE - 1)) -
offset;
}
@@ -4826,23 +4895,23 @@ static void ext4_wait_for_tail_page_commit(struct inode *inode)
tid_t commit_tid = 0;
int ret;
- offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+ offset = inode->i_size & (PAGE_SIZE - 1);
/*
* All buffers in the last page remain valid? Then there's nothing to
- * do. We do the check mainly to optimize the common PAGE_CACHE_SIZE ==
+ * do. We do the check mainly to optimize the common PAGE_SIZE ==
* blocksize case
*/
- if (offset > PAGE_CACHE_SIZE - (1 << inode->i_blkbits))
+ if (offset > PAGE_SIZE - (1 << inode->i_blkbits))
return;
while (1) {
page = find_lock_page(inode->i_mapping,
- inode->i_size >> PAGE_CACHE_SHIFT);
+ inode->i_size >> PAGE_SHIFT);
if (!page)
return;
ret = __ext4_journalled_invalidatepage(page, offset,
- PAGE_CACHE_SIZE - offset);
+ PAGE_SIZE - offset);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (ret != -EBUSY)
return;
commit_tid = 0;
@@ -5481,10 +5550,10 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
goto out;
}
- if (page->index == size >> PAGE_CACHE_SHIFT)
- len = size & ~PAGE_CACHE_MASK;
+ if (page->index == size >> PAGE_SHIFT)
+ len = size & ~PAGE_MASK;
else
- len = PAGE_CACHE_SIZE;
+ len = PAGE_SIZE;
/*
* Return if we have all the buffers mapped. This avoids the need to do
* journal_start/journal_stop which can block and take a long time
@@ -5502,7 +5571,7 @@ int ext4_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
unlock_page(page);
/* OK, we need to fill the hole... */
if (ext4_should_dioread_nolock(inode))
- get_block = ext4_get_block_write;
+ get_block = ext4_get_block_unwritten;
else
get_block = ext4_get_block;
retry_alloc:
@@ -5515,7 +5584,7 @@ retry_alloc:
ret = block_page_mkwrite(vma, vmf, get_block);
if (!ret && ext4_should_journal_data(inode)) {
if (ext4_walk_page_buffers(handle, page_buffers(page), 0,
- PAGE_CACHE_SIZE, NULL, do_journal_get_write_access)) {
+ PAGE_SIZE, NULL, do_journal_get_write_access)) {
unlock_page(page);
ret = VM_FAULT_SIGBUS;
ext4_journal_stop(handle);
@@ -5545,3 +5614,70 @@ int ext4_filemap_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
return err;
}
+
+/*
+ * Find the first extent at or after @lblk in an inode that is not a hole.
+ * Search for @map_len blocks at most. The extent is returned in @result.
+ *
+ * The function returns 1 if we found an extent. The function returns 0 in
+ * case there is no extent at or after @lblk and in that case also sets
+ * @result->es_len to 0. In case of error, the error code is returned.
+ */
+int ext4_get_next_extent(struct inode *inode, ext4_lblk_t lblk,
+ unsigned int map_len, struct extent_status *result)
+{
+ struct ext4_map_blocks map;
+ struct extent_status es = {};
+ int ret;
+
+ map.m_lblk = lblk;
+ map.m_len = map_len;
+
+ /*
+ * For non-extent based files this loop may iterate several times since
+ * we do not determine full hole size.
+ */
+ while (map.m_len > 0) {
+ ret = ext4_map_blocks(NULL, inode, &map, 0);
+ if (ret < 0)
+ return ret;
+ /* There's extent covering m_lblk? Just return it. */
+ if (ret > 0) {
+ int status;
+
+ ext4_es_store_pblock(result, map.m_pblk);
+ result->es_lblk = map.m_lblk;
+ result->es_len = map.m_len;
+ if (map.m_flags & EXT4_MAP_UNWRITTEN)
+ status = EXTENT_STATUS_UNWRITTEN;
+ else
+ status = EXTENT_STATUS_WRITTEN;
+ ext4_es_store_status(result, status);
+ return 1;
+ }
+ ext4_es_find_delayed_extent_range(inode, map.m_lblk,
+ map.m_lblk + map.m_len - 1,
+ &es);
+ /* Is delalloc data before next block in extent tree? */
+ if (es.es_len && es.es_lblk < map.m_lblk + map.m_len) {
+ ext4_lblk_t offset = 0;
+
+ if (es.es_lblk < lblk)
+ offset = lblk - es.es_lblk;
+ result->es_lblk = es.es_lblk + offset;
+ ext4_es_store_pblock(result,
+ ext4_es_pblock(&es) + offset);
+ result->es_len = es.es_len - offset;
+ ext4_es_store_status(result, ext4_es_status(&es));
+
+ return 1;
+ }
+ /* There's a hole at m_lblk, advance us after it */
+ map.m_lblk += map.m_len;
+ map_len -= map.m_len;
+ map.m_len = map_len;
+ cond_resched();
+ }
+ result->es_len = 0;
+ return 0;
+}
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index eae5917c5..0acf8cacb 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -365,7 +365,7 @@ static int ext4_ioctl_setproject(struct file *filp, __u32 projid)
struct dquot *transfer_to[MAXQUOTAS] = { };
transfer_to[PRJQUOTA] = dqget(sb, make_kqid_projid(kprojid));
- if (transfer_to[PRJQUOTA]) {
+ if (!IS_ERR(transfer_to[PRJQUOTA])) {
err = __dquot_transfer(inode, transfer_to);
dqput(transfer_to[PRJQUOTA]);
if (err)
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 4424b7bf8..9d26fa218 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -11,7 +11,7 @@
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
- * You should have received a copy of the GNU General Public Licens
+ * You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-
*/
@@ -119,7 +119,7 @@ MODULE_PARM_DESC(mballoc_debug, "Debugging level for ext4's mballoc");
*
*
* one block each for bitmap and buddy information. So for each group we
- * take up 2 blocks. A page can contain blocks_per_page (PAGE_CACHE_SIZE /
+ * take up 2 blocks. A page can contain blocks_per_page (PAGE_SIZE /
* blocksize) blocks. So it can have information regarding groups_per_page
* which is blocks_per_page/2
*
@@ -807,7 +807,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
*
* one block each for bitmap and buddy information.
* So for each group we take up 2 blocks. A page can
- * contain blocks_per_page (PAGE_CACHE_SIZE / blocksize) blocks.
+ * contain blocks_per_page (PAGE_SIZE / blocksize) blocks.
* So it can have information regarding groups_per_page which
* is blocks_per_page/2
*
@@ -815,7 +815,7 @@ static void mb_regenerate_buddy(struct ext4_buddy *e4b)
* for this page; do not hold this lock when calling this routine!
*/
-static int ext4_mb_init_cache(struct page *page, char *incore)
+static int ext4_mb_init_cache(struct page *page, char *incore, gfp_t gfp)
{
ext4_group_t ngroups;
int blocksize;
@@ -839,7 +839,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
sb = inode->i_sb;
ngroups = ext4_get_groups_count(sb);
blocksize = 1 << inode->i_blkbits;
- blocks_per_page = PAGE_CACHE_SIZE / blocksize;
+ blocks_per_page = PAGE_SIZE / blocksize;
groups_per_page = blocks_per_page >> 1;
if (groups_per_page == 0)
@@ -848,7 +848,7 @@ static int ext4_mb_init_cache(struct page *page, char *incore)
/* allocate buffer_heads to read bitmaps */
if (groups_per_page > 1) {
i = sizeof(struct buffer_head *) * groups_per_page;
- bh = kzalloc(i, GFP_NOFS);
+ bh = kzalloc(i, gfp);
if (bh == NULL) {
err = -ENOMEM;
goto out;
@@ -983,7 +983,7 @@ out:
* are on the same page e4b->bd_buddy_page is NULL and return value is 0.
*/
static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
- ext4_group_t group, struct ext4_buddy *e4b)
+ ext4_group_t group, struct ext4_buddy *e4b, gfp_t gfp)
{
struct inode *inode = EXT4_SB(sb)->s_buddy_cache;
int block, pnum, poff;
@@ -993,7 +993,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
e4b->bd_buddy_page = NULL;
e4b->bd_bitmap_page = NULL;
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ blocks_per_page = PAGE_SIZE / sb->s_blocksize;
/*
* the buddy cache inode stores the block bitmap
* and buddy information in consecutive blocks.
@@ -1002,7 +1002,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block = group * 2;
pnum = block / blocks_per_page;
poff = block % blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (!page)
return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping);
@@ -1016,7 +1016,7 @@ static int ext4_mb_get_buddy_page_lock(struct super_block *sb,
block++;
pnum = block / blocks_per_page;
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (!page)
return -ENOMEM;
BUG_ON(page->mapping != inode->i_mapping);
@@ -1028,11 +1028,11 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
{
if (e4b->bd_bitmap_page) {
unlock_page(e4b->bd_bitmap_page);
- page_cache_release(e4b->bd_bitmap_page);
+ put_page(e4b->bd_bitmap_page);
}
if (e4b->bd_buddy_page) {
unlock_page(e4b->bd_buddy_page);
- page_cache_release(e4b->bd_buddy_page);
+ put_page(e4b->bd_buddy_page);
}
}
@@ -1042,7 +1042,7 @@ static void ext4_mb_put_buddy_page_lock(struct ext4_buddy *e4b)
* calling this routine!
*/
static noinline_for_stack
-int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
+int ext4_mb_init_group(struct super_block *sb, ext4_group_t group, gfp_t gfp)
{
struct ext4_group_info *this_grp;
@@ -1062,7 +1062,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
* The call to ext4_mb_get_buddy_page_lock will mark the
* page accessed.
*/
- ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b);
+ ret = ext4_mb_get_buddy_page_lock(sb, group, &e4b, gfp);
if (ret || !EXT4_MB_GRP_NEED_INIT(this_grp)) {
/*
* somebody initialized the group
@@ -1072,7 +1072,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
}
page = e4b.bd_bitmap_page;
- ret = ext4_mb_init_cache(page, NULL);
+ ret = ext4_mb_init_cache(page, NULL, gfp);
if (ret)
goto err;
if (!PageUptodate(page)) {
@@ -1091,7 +1091,7 @@ int ext4_mb_init_group(struct super_block *sb, ext4_group_t group)
}
/* init buddy cache */
page = e4b.bd_buddy_page;
- ret = ext4_mb_init_cache(page, e4b.bd_bitmap);
+ ret = ext4_mb_init_cache(page, e4b.bd_bitmap, gfp);
if (ret)
goto err;
if (!PageUptodate(page)) {
@@ -1109,8 +1109,8 @@ err:
* calling this routine!
*/
static noinline_for_stack int
-ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
- struct ext4_buddy *e4b)
+ext4_mb_load_buddy_gfp(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b, gfp_t gfp)
{
int blocks_per_page;
int block;
@@ -1125,7 +1125,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
might_sleep();
mb_debug(1, "load group %u\n", group);
- blocks_per_page = PAGE_CACHE_SIZE / sb->s_blocksize;
+ blocks_per_page = PAGE_SIZE / sb->s_blocksize;
grp = ext4_get_group_info(sb, group);
e4b->bd_blkbits = sb->s_blocksize_bits;
@@ -1140,7 +1140,7 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* we need full data about the group
* to make a good selection
*/
- ret = ext4_mb_init_group(sb, group);
+ ret = ext4_mb_init_group(sb, group, gfp);
if (ret)
return ret;
}
@@ -1167,12 +1167,12 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
* is yet to initialize the same. So
* wait for it to initialize.
*/
- page_cache_release(page);
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ put_page(page);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, NULL);
+ ret = ext4_mb_init_cache(page, NULL, gfp);
if (ret) {
unlock_page(page);
goto err;
@@ -1203,12 +1203,13 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
page = find_get_page_flags(inode->i_mapping, pnum, FGP_ACCESSED);
if (page == NULL || !PageUptodate(page)) {
if (page)
- page_cache_release(page);
- page = find_or_create_page(inode->i_mapping, pnum, GFP_NOFS);
+ put_page(page);
+ page = find_or_create_page(inode->i_mapping, pnum, gfp);
if (page) {
BUG_ON(page->mapping != inode->i_mapping);
if (!PageUptodate(page)) {
- ret = ext4_mb_init_cache(page, e4b->bd_bitmap);
+ ret = ext4_mb_init_cache(page, e4b->bd_bitmap,
+ gfp);
if (ret) {
unlock_page(page);
goto err;
@@ -1237,28 +1238,35 @@ ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
err:
if (page)
- page_cache_release(page);
+ put_page(page);
if (e4b->bd_bitmap_page)
- page_cache_release(e4b->bd_bitmap_page);
+ put_page(e4b->bd_bitmap_page);
if (e4b->bd_buddy_page)
- page_cache_release(e4b->bd_buddy_page);
+ put_page(e4b->bd_buddy_page);
e4b->bd_buddy = NULL;
e4b->bd_bitmap = NULL;
return ret;
}
+static int ext4_mb_load_buddy(struct super_block *sb, ext4_group_t group,
+ struct ext4_buddy *e4b)
+{
+ return ext4_mb_load_buddy_gfp(sb, group, e4b, GFP_NOFS);
+}
+
static void ext4_mb_unload_buddy(struct ext4_buddy *e4b)
{
if (e4b->bd_bitmap_page)
- page_cache_release(e4b->bd_bitmap_page);
+ put_page(e4b->bd_bitmap_page);
if (e4b->bd_buddy_page)
- page_cache_release(e4b->bd_buddy_page);
+ put_page(e4b->bd_buddy_page);
}
static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
{
int order = 1;
+ int bb_incr = 1 << (e4b->bd_blkbits - 1);
void *bb;
BUG_ON(e4b->bd_bitmap == e4b->bd_buddy);
@@ -1271,7 +1279,8 @@ static int mb_find_order_for_block(struct ext4_buddy *e4b, int block)
/* this block is part of buddy of order 'order' */
return order;
}
- bb += 1 << (e4b->bd_blkbits - order);
+ bb += bb_incr;
+ bb_incr >>= 1;
order++;
}
return 0;
@@ -2045,7 +2054,7 @@ static int ext4_mb_good_group(struct ext4_allocation_context *ac,
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- int ret = ext4_mb_init_group(ac->ac_sb, group);
+ int ret = ext4_mb_init_group(ac->ac_sb, group, GFP_NOFS);
if (ret)
return ret;
}
@@ -2576,7 +2585,7 @@ int ext4_mb_init(struct super_block *sb)
{
struct ext4_sb_info *sbi = EXT4_SB(sb);
unsigned i, j;
- unsigned offset;
+ unsigned offset, offset_incr;
unsigned max;
int ret;
@@ -2605,11 +2614,13 @@ int ext4_mb_init(struct super_block *sb)
i = 1;
offset = 0;
+ offset_incr = 1 << (sb->s_blocksize_bits - 1);
max = sb->s_blocksize << 2;
do {
sbi->s_mb_offsets[i] = offset;
sbi->s_mb_maxs[i] = max;
- offset += 1 << (sb->s_blocksize_bits - i);
+ offset += offset_incr;
+ offset_incr = offset_incr >> 1;
max = max >> 1;
i++;
} while (i <= sb->s_blocksize_bits + 1);
@@ -2826,8 +2837,8 @@ static void ext4_free_data_callback(struct super_block *sb,
/* No more items in the per group rb tree
* balance refcounts from ext4_mb_free_metadata()
*/
- page_cache_release(e4b.bd_buddy_page);
- page_cache_release(e4b.bd_bitmap_page);
+ put_page(e4b.bd_buddy_page);
+ put_page(e4b.bd_bitmap_page);
}
ext4_unlock_group(sb, entry->efd_group);
kmem_cache_free(ext4_free_data_cachep, entry);
@@ -4378,9 +4389,9 @@ static int ext4_mb_release_context(struct ext4_allocation_context *ac)
ext4_mb_put_pa(ac, ac->ac_sb, pa);
}
if (ac->ac_bitmap_page)
- page_cache_release(ac->ac_bitmap_page);
+ put_page(ac->ac_bitmap_page);
if (ac->ac_buddy_page)
- page_cache_release(ac->ac_buddy_page);
+ put_page(ac->ac_buddy_page);
if (ac->ac_flags & EXT4_MB_HINT_GROUP_ALLOC)
mutex_unlock(&ac->ac_lg->lg_mutex);
ext4_mb_collect_stats(ac);
@@ -4592,8 +4603,8 @@ ext4_mb_free_metadata(handle_t *handle, struct ext4_buddy *e4b,
* otherwise we'll refresh it from
* on-disk bitmap and lose not-yet-available
* blocks */
- page_cache_get(e4b->bd_buddy_page);
- page_cache_get(e4b->bd_bitmap_page);
+ get_page(e4b->bd_buddy_page);
+ get_page(e4b->bd_bitmap_page);
}
while (*n) {
parent = *n;
@@ -4695,16 +4706,6 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
}
/*
- * We need to make sure we don't reuse the freed block until
- * after the transaction is committed, which we can do by
- * treating the block as metadata, below. We make an
- * exception if the inode is to be written in writeback mode
- * since writeback mode has weak data consistency guarantees.
- */
- if (!ext4_should_writeback_data(inode))
- flags |= EXT4_FREE_BLOCKS_METADATA;
-
- /*
* If the extent to be freed does not begin on a cluster
* boundary, we need to deal with partial clusters at the
* beginning and end of the extent. Normally we will free
@@ -4738,14 +4739,13 @@ void ext4_free_blocks(handle_t *handle, struct inode *inode,
if (!bh && (flags & EXT4_FREE_BLOCKS_FORGET)) {
int i;
+ int is_metadata = flags & EXT4_FREE_BLOCKS_METADATA;
for (i = 0; i < count; i++) {
cond_resched();
- bh = sb_find_get_block(inode->i_sb, block + i);
- if (!bh)
- continue;
- ext4_forget(handle, flags & EXT4_FREE_BLOCKS_METADATA,
- inode, bh, block + i);
+ if (is_metadata)
+ bh = sb_find_get_block(inode->i_sb, block + i);
+ ext4_forget(handle, is_metadata, inode, bh, block + i);
}
}
@@ -4815,16 +4815,23 @@ do_more:
#endif
trace_ext4_mballoc_free(sb, inode, block_group, bit, count_clusters);
- err = ext4_mb_load_buddy(sb, block_group, &e4b);
+ /* __GFP_NOFAIL: retry infinitely, ignore TIF_MEMDIE and memcg limit. */
+ err = ext4_mb_load_buddy_gfp(sb, block_group, &e4b,
+ GFP_NOFS|__GFP_NOFAIL);
if (err)
goto error_return;
- if ((flags & EXT4_FREE_BLOCKS_METADATA) && ext4_handle_valid(handle)) {
+ /*
+ * We need to make sure we don't reuse the freed block until after the
+ * transaction is committed. We make an exception if the inode is to be
+ * written in writeback mode since writeback mode has weak data
+ * consistency guarantees.
+ */
+ if (ext4_handle_valid(handle) &&
+ ((flags & EXT4_FREE_BLOCKS_METADATA) ||
+ !ext4_should_writeback_data(inode))) {
struct ext4_free_data *new_entry;
/*
- * blocks being freed are metadata. these blocks shouldn't
- * be used until this transaction is committed
- *
* We use __GFP_NOFAIL because ext4_free_blocks() is not allowed
* to fail.
*/
@@ -5217,7 +5224,7 @@ int ext4_trim_fs(struct super_block *sb, struct fstrim_range *range)
grp = ext4_get_group_info(sb, group);
/* We only do this if the grp has never been initialized */
if (unlikely(EXT4_MB_GRP_NEED_INIT(grp))) {
- ret = ext4_mb_init_group(sb, group);
+ ret = ext4_mb_init_group(sb, group, GFP_NOFS);
if (ret)
break;
}
diff --git a/fs/ext4/mballoc.h b/fs/ext4/mballoc.h
index d634e183b..3ef1df6ae 100644
--- a/fs/ext4/mballoc.h
+++ b/fs/ext4/mballoc.h
@@ -23,18 +23,6 @@
#include "ext4.h"
/*
- * with AGGRESSIVE_CHECK allocator runs consistency checks over
- * structures. these checks slow things down a lot
- */
-#define AGGRESSIVE_CHECK__
-
-/*
- * with DOUBLE_CHECK defined mballoc creates persistent in-core
- * bitmaps, maintains and uses them to check for double allocations
- */
-#define DOUBLE_CHECK__
-
-/*
*/
#ifdef CONFIG_EXT4_DEBUG
extern ushort ext4_mballoc_debug;
diff --git a/fs/ext4/migrate.c b/fs/ext4/migrate.c
index a4651894c..364ea4d4a 100644
--- a/fs/ext4/migrate.c
+++ b/fs/ext4/migrate.c
@@ -361,7 +361,7 @@ static int ext4_ext_swap_inode_data(handle_t *handle, struct inode *inode,
* blocks.
*
* While converting to extents we need not
- * update the orignal inode i_blocks for extent blocks
+ * update the original inode i_blocks for extent blocks
* via quota APIs. The quota update happened via tmp_inode already.
*/
spin_lock(&inode->i_lock);
diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c
index 0a512aa81..24445275d 100644
--- a/fs/ext4/mmp.c
+++ b/fs/ext4/mmp.c
@@ -91,21 +91,22 @@ static int read_mmp_block(struct super_block *sb, struct buffer_head **bh,
submit_bh(READ_SYNC | REQ_META | REQ_PRIO, *bh);
wait_on_buffer(*bh);
if (!buffer_uptodate(*bh)) {
- brelse(*bh);
- *bh = NULL;
ret = -EIO;
goto warn_exit;
}
-
mmp = (struct mmp_struct *)((*bh)->b_data);
- if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC)
+ if (le32_to_cpu(mmp->mmp_magic) != EXT4_MMP_MAGIC) {
ret = -EFSCORRUPTED;
- else if (!ext4_mmp_csum_verify(sb, mmp))
+ goto warn_exit;
+ }
+ if (!ext4_mmp_csum_verify(sb, mmp)) {
ret = -EFSBADCRC;
- else
- return 0;
-
+ goto warn_exit;
+ }
+ return 0;
warn_exit:
+ brelse(*bh);
+ *bh = NULL;
ext4_warning(sb, "Error %d while reading MMP block %llu",
ret, mmp_block);
return ret;
@@ -181,15 +182,13 @@ static int kmmpd(void *data)
EXT4_FEATURE_INCOMPAT_MMP)) {
ext4_warning(sb, "kmmpd being stopped since MMP feature"
" has been disabled.");
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- goto failed;
+ goto exit_thread;
}
if (sb->s_flags & MS_RDONLY) {
ext4_warning(sb, "kmmpd being stopped since filesystem "
"has been remounted as readonly.");
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- goto failed;
+ goto exit_thread;
}
diff = jiffies - last_update_time;
@@ -211,9 +210,7 @@ static int kmmpd(void *data)
if (retval) {
ext4_error(sb, "error reading MMP data: %d",
retval);
-
- EXT4_SB(sb)->s_mmp_tsk = NULL;
- goto failed;
+ goto exit_thread;
}
mmp_check = (struct mmp_struct *)(bh_check->b_data);
@@ -225,7 +222,9 @@ static int kmmpd(void *data)
"The filesystem seems to have been"
" multiply mounted.");
ext4_error(sb, "abort");
- goto failed;
+ put_bh(bh_check);
+ retval = -EBUSY;
+ goto exit_thread;
}
put_bh(bh_check);
}
@@ -248,7 +247,8 @@ static int kmmpd(void *data)
retval = write_mmp_block(sb, bh);
-failed:
+exit_thread:
+ EXT4_SB(sb)->s_mmp_tsk = NULL;
kfree(data);
brelse(bh);
return retval;
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index 796ff0eaf..325cef48b 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -156,7 +156,7 @@ mext_page_double_lock(struct inode *inode1, struct inode *inode2,
page[1] = grab_cache_page_write_begin(mapping[1], index2, fl);
if (!page[1]) {
unlock_page(page[0]);
- page_cache_release(page[0]);
+ put_page(page[0]);
return -ENOMEM;
}
/*
@@ -192,7 +192,7 @@ mext_page_mkuptodate(struct page *page, unsigned from, unsigned to)
create_empty_buffers(page, blocksize, 0);
head = page_buffers(page);
- block = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ block = (sector_t)page->index << (PAGE_SHIFT - inode->i_blkbits);
for (bh = head, block_start = 0; bh != head || !block_start;
block++, block_start = block_end, bh = bh->b_this_page) {
block_end = block_start + blocksize;
@@ -268,7 +268,7 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
int i, err2, jblocks, retries = 0;
int replaced_count = 0;
int from = data_offset_in_page << orig_inode->i_blkbits;
- int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+ int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
struct super_block *sb = orig_inode->i_sb;
struct buffer_head *bh = NULL;
@@ -404,9 +404,9 @@ data_copy:
unlock_pages:
unlock_page(pagep[0]);
- page_cache_release(pagep[0]);
+ put_page(pagep[0]);
unlock_page(pagep[1]);
- page_cache_release(pagep[1]);
+ put_page(pagep[1]);
stop_journal:
ext4_journal_stop(handle);
if (*err == -ENOSPC &&
@@ -561,7 +561,7 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
struct inode *orig_inode = file_inode(o_filp);
struct inode *donor_inode = file_inode(d_filp);
struct ext4_ext_path *path = NULL;
- int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
+ int blocks_per_page = PAGE_SIZE >> orig_inode->i_blkbits;
ext4_lblk_t o_end, o_start = orig_blk;
ext4_lblk_t d_start = donor_blk;
int ret;
@@ -655,9 +655,9 @@ ext4_move_extents(struct file *o_filp, struct file *d_filp, __u64 orig_blk,
if (o_end - o_start < cur_len)
cur_len = o_end - o_start;
- orig_page_index = o_start >> (PAGE_CACHE_SHIFT -
+ orig_page_index = o_start >> (PAGE_SHIFT -
orig_inode->i_blkbits);
- donor_page_index = d_start >> (PAGE_CACHE_SHIFT -
+ donor_page_index = d_start >> (PAGE_SHIFT -
donor_inode->i_blkbits);
offset_in_page = o_start % blocks_per_page;
if (cur_len > blocks_per_page- offset_in_page)
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 48e4b8907..fdd151f91 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -2828,7 +2828,7 @@ int ext4_orphan_add(handle_t *handle, struct inode *inode)
* list entries can cause panics at unmount time.
*/
mutex_lock(&sbi->s_orphan_lock);
- list_del(&EXT4_I(inode)->i_orphan);
+ list_del_init(&EXT4_I(inode)->i_orphan);
mutex_unlock(&sbi->s_orphan_lock);
}
}
diff --git a/fs/ext4/page-io.c b/fs/ext4/page-io.c
index 090b34986..e4fc8ea45 100644
--- a/fs/ext4/page-io.c
+++ b/fs/ext4/page-io.c
@@ -23,6 +23,7 @@
#include <linux/kernel.h>
#include <linux/slab.h>
#include <linux/mm.h>
+#include <linux/backing-dev.h>
#include "ext4_jbd2.h"
#include "xattr.h"
@@ -128,9 +129,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
BUG_ON(io_end->flag & EXT4_IO_END_UNWRITTEN);
WARN_ON(io_end->handle);
- if (atomic_dec_and_test(&EXT4_I(io_end->inode)->i_ioend_count))
- wake_up_all(ext4_ioend_wq(io_end->inode));
-
for (bio = io_end->bio; bio; bio = next_bio) {
next_bio = bio->bi_private;
ext4_finish_bio(bio);
@@ -139,16 +137,6 @@ static void ext4_release_io_end(ext4_io_end_t *io_end)
kmem_cache_free(io_end_cachep, io_end);
}
-static void ext4_clear_io_unwritten_flag(ext4_io_end_t *io_end)
-{
- struct inode *inode = io_end->inode;
-
- io_end->flag &= ~EXT4_IO_END_UNWRITTEN;
- /* Wake up anyone waiting on unwritten extent conversion */
- if (atomic_dec_and_test(&EXT4_I(inode)->i_unwritten))
- wake_up_all(ext4_ioend_wq(inode));
-}
-
/*
* Check a range of space and convert unwritten extents to written. Note that
* we are protected from truncate touching same part of extent tree by the
@@ -265,7 +253,6 @@ ext4_io_end_t *ext4_init_io_end(struct inode *inode, gfp_t flags)
{
ext4_io_end_t *io = kmem_cache_zalloc(io_end_cachep, flags);
if (io) {
- atomic_inc(&EXT4_I(inode)->i_ioend_count);
io->inode = inode;
INIT_LIST_HEAD(&io->list);
atomic_set(&io->count, 1);
@@ -446,8 +433,8 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- if (len < PAGE_CACHE_SIZE)
- zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ if (len < PAGE_SIZE)
+ zero_user_segment(page, len, PAGE_SIZE);
/*
* In the first loop we prepare and mark buffers to submit. We have to
* mark all buffers in the page before submitting so that
@@ -484,9 +471,20 @@ int ext4_bio_write_page(struct ext4_io_submit *io,
if (ext4_encrypted_inode(inode) && S_ISREG(inode->i_mode) &&
nr_to_submit) {
- data_page = ext4_encrypt(inode, page);
+ gfp_t gfp_flags = GFP_NOFS;
+
+ retry_encrypt:
+ data_page = ext4_encrypt(inode, page, gfp_flags);
if (IS_ERR(data_page)) {
ret = PTR_ERR(data_page);
+ if (ret == -ENOMEM && wbc->sync_mode == WB_SYNC_ALL) {
+ if (io->io_bio) {
+ ext4_io_submit(io);
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ }
+ gfp_flags |= __GFP_NOFAIL;
+ goto retry_encrypt;
+ }
data_page = NULL;
goto out;
}
diff --git a/fs/ext4/readpage.c b/fs/ext4/readpage.c
index 5dc5e9506..dc54a4b60 100644
--- a/fs/ext4/readpage.c
+++ b/fs/ext4/readpage.c
@@ -23,7 +23,7 @@
*
* then this code just gives up and calls the buffer_head-based read function.
* It does handle a page which has holes at the end - that is a common case:
- * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
+ * the end-of-file on blocksize < PAGE_SIZE setups.
*
*/
@@ -140,7 +140,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
struct inode *inode = mapping->host;
const unsigned blkbits = inode->i_blkbits;
- const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+ const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
const unsigned blocksize = 1 << blkbits;
sector_t block_in_file;
sector_t last_block;
@@ -173,7 +173,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
if (page_has_buffers(page))
goto confused;
- block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+ block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
last_block = block_in_file + nr_pages * blocks_per_page;
last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
if (last_block > last_block_in_file)
@@ -217,7 +217,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
set_error_page:
SetPageError(page);
zero_user_segment(page, 0,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
unlock_page(page);
goto next_page;
}
@@ -250,7 +250,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
}
if (first_hole != blocks_per_page) {
zero_user_segment(page, first_hole << blkbits,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
if (first_hole == 0) {
SetPageUptodate(page);
unlock_page(page);
@@ -279,7 +279,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
if (ext4_encrypted_inode(inode) &&
S_ISREG(inode->i_mode)) {
- ctx = ext4_get_crypto_ctx(inode);
+ ctx = ext4_get_crypto_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
goto set_error_page;
}
@@ -319,7 +319,7 @@ int ext4_mpage_readpages(struct address_space *mapping,
unlock_page(page);
next_page:
if (pages)
- page_cache_release(page);
+ put_page(page);
}
BUG_ON(pages && !list_empty(pages));
if (bio)
diff --git a/fs/ext4/super.c b/fs/ext4/super.c
index a76ca677f..304c712db 100644
--- a/fs/ext4/super.c
+++ b/fs/ext4/super.c
@@ -55,7 +55,6 @@
static struct ext4_lazy_init *ext4_li_info;
static struct mutex ext4_li_mtx;
-static int ext4_mballoc_ready;
static struct ratelimit_state ext4_mount_msg_ratelimit;
static int ext4_load_journal(struct super_block *, struct ext4_super_block *,
@@ -844,7 +843,6 @@ static void ext4_put_super(struct super_block *sb)
ext4_release_system_zone(sb);
ext4_mb_release(sb);
ext4_ext_release(sb);
- ext4_xattr_put_super(sb);
if (!(sb->s_flags & MS_RDONLY)) {
ext4_clear_feature_journal_needs_recovery(sb);
@@ -944,7 +942,6 @@ static struct inode *ext4_alloc_inode(struct super_block *sb)
spin_lock_init(&ei->i_completed_io_lock);
ei->i_sync_tid = 0;
ei->i_datasync_tid = 0;
- atomic_set(&ei->i_ioend_count, 0);
atomic_set(&ei->i_unwritten, 0);
INIT_WORK(&ei->i_rsv_conversion_work, ext4_end_io_rsv_work);
#ifdef CONFIG_EXT4_FS_ENCRYPTION
@@ -1116,6 +1113,7 @@ static ssize_t ext4_quota_write(struct super_block *sb, int type,
static int ext4_quota_enable(struct super_block *sb, int type, int format_id,
unsigned int flags);
static int ext4_enable_quotas(struct super_block *sb);
+static int ext4_get_next_id(struct super_block *sb, struct kqid *qid);
static struct dquot **ext4_get_dquots(struct inode *inode)
{
@@ -1132,6 +1130,7 @@ static const struct dquot_operations ext4_quota_operations = {
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
.get_projid = ext4_get_projid,
+ .get_next_id = ext4_get_next_id,
};
static const struct quotactl_ops ext4_qctl_operations = {
@@ -1141,7 +1140,8 @@ static const struct quotactl_ops ext4_qctl_operations = {
.get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
- .set_dqblk = dquot_set_dqblk
+ .set_dqblk = dquot_set_dqblk,
+ .get_nextdqblk = dquot_get_next_dqblk,
};
#endif
@@ -1425,9 +1425,9 @@ static const struct mount_opts {
{Opt_err_ro, EXT4_MOUNT_ERRORS_RO, MOPT_SET | MOPT_CLEAR_ERR},
{Opt_err_cont, EXT4_MOUNT_ERRORS_CONT, MOPT_SET | MOPT_CLEAR_ERR},
{Opt_data_err_abort, EXT4_MOUNT_DATA_ERR_ABORT,
- MOPT_NO_EXT2 | MOPT_SET},
+ MOPT_NO_EXT2},
{Opt_data_err_ignore, EXT4_MOUNT_DATA_ERR_ABORT,
- MOPT_NO_EXT2 | MOPT_CLEAR},
+ MOPT_NO_EXT2},
{Opt_barrier, EXT4_MOUNT_BARRIER, MOPT_SET},
{Opt_nobarrier, EXT4_MOUNT_BARRIER, MOPT_CLEAR},
{Opt_noauto_da_alloc, EXT4_MOUNT_NO_AUTO_DA_ALLOC, MOPT_SET},
@@ -1705,6 +1705,10 @@ static int handle_mount_opt(struct super_block *sb, char *opt, int token,
ext4_msg(sb, KERN_INFO, "dax option not supported");
return -1;
#endif
+ } else if (token == Opt_data_err_abort) {
+ sbi->s_mount_opt |= m->mount_opt;
+ } else if (token == Opt_data_err_ignore) {
+ sbi->s_mount_opt &= ~m->mount_opt;
} else {
if (!args->from)
arg = 1;
@@ -1781,7 +1785,7 @@ static int parse_options(char *options, struct super_block *sb,
int blocksize =
BLOCK_SIZE << le32_to_cpu(sbi->s_es->s_log_block_size);
- if (blocksize < PAGE_CACHE_SIZE) {
+ if (blocksize < PAGE_SIZE) {
ext4_msg(sb, KERN_ERR, "can't mount with "
"dioread_nolock if block size != PAGE_SIZE");
return 0;
@@ -1914,6 +1918,8 @@ static int _ext4_show_options(struct seq_file *seq, struct super_block *sb,
SEQ_OPTS_PRINT("init_itable=%u", sbi->s_li_wait_mult);
if (nodefs || sbi->s_max_dir_size_kb)
SEQ_OPTS_PRINT("max_dir_size_kb=%u", sbi->s_max_dir_size_kb);
+ if (test_opt(sb, DATA_ERR_ABORT))
+ SEQ_OPTS_PUTS("data_err=abort");
ext4_show_quota_options(seq, sb);
return 0;
@@ -3796,16 +3802,14 @@ static int ext4_fill_super(struct super_block *sb, void *data, int silent)
sbi->s_journal->j_commit_callback = ext4_journal_commit_callback;
no_journal:
- if (ext4_mballoc_ready) {
- sbi->s_mb_cache = ext4_xattr_create_cache(sb->s_id);
- if (!sbi->s_mb_cache) {
- ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
- goto failed_mount_wq;
- }
+ sbi->s_mb_cache = ext4_xattr_create_cache();
+ if (!sbi->s_mb_cache) {
+ ext4_msg(sb, KERN_ERR, "Failed to create an mb_cache");
+ goto failed_mount_wq;
}
if ((DUMMY_ENCRYPTION_ENABLED(sbi) || ext4_has_feature_encrypt(sb)) &&
- (blocksize != PAGE_CACHE_SIZE)) {
+ (blocksize != PAGE_SIZE)) {
ext4_msg(sb, KERN_ERR,
"Unsupported blocksize for fs encryption");
goto failed_mount_wq;
@@ -4027,6 +4031,10 @@ failed_mount4:
if (EXT4_SB(sb)->rsv_conversion_wq)
destroy_workqueue(EXT4_SB(sb)->rsv_conversion_wq);
failed_mount_wq:
+ if (sbi->s_mb_cache) {
+ ext4_xattr_destroy_cache(sbi->s_mb_cache);
+ sbi->s_mb_cache = NULL;
+ }
if (sbi->s_journal) {
jbd2_journal_destroy(sbi->s_journal);
sbi->s_journal = NULL;
@@ -5267,6 +5275,17 @@ out:
return len;
}
+static int ext4_get_next_id(struct super_block *sb, struct kqid *qid)
+{
+ const struct quota_format_ops *ops;
+
+ if (!sb_has_quota_loaded(sb, qid->type))
+ return -ESRCH;
+ ops = sb_dqopt(sb)->ops[qid->type];
+ if (!ops || !ops->get_next_id)
+ return -ENOSYS;
+ return dquot_get_next_id(sb, qid);
+}
#endif
static struct dentry *ext4_mount(struct file_system_type *fs_type, int flags,
@@ -5342,7 +5361,6 @@ MODULE_ALIAS_FS("ext4");
/* Shared across all ext4 file systems */
wait_queue_head_t ext4__ioend_wq[EXT4_WQ_HASH_SZ];
-struct mutex ext4__aio_mutex[EXT4_WQ_HASH_SZ];
static int __init ext4_init_fs(void)
{
@@ -5355,10 +5373,8 @@ static int __init ext4_init_fs(void)
/* Build-time check for flags consistency */
ext4_check_flag_values();
- for (i = 0; i < EXT4_WQ_HASH_SZ; i++) {
- mutex_init(&ext4__aio_mutex[i]);
+ for (i = 0; i < EXT4_WQ_HASH_SZ; i++)
init_waitqueue_head(&ext4__ioend_wq[i]);
- }
err = ext4_init_es();
if (err)
@@ -5379,8 +5395,6 @@ static int __init ext4_init_fs(void)
err = ext4_init_mballoc();
if (err)
goto out2;
- else
- ext4_mballoc_ready = 1;
err = init_inodecache();
if (err)
goto out1;
@@ -5396,7 +5410,6 @@ out:
unregister_as_ext3();
destroy_inodecache();
out1:
- ext4_mballoc_ready = 0;
ext4_exit_mballoc();
out2:
ext4_exit_sysfs();
diff --git a/fs/ext4/symlink.c b/fs/ext4/symlink.c
index 6f7ee30a8..75ed5c2f0 100644
--- a/fs/ext4/symlink.c
+++ b/fs/ext4/symlink.c
@@ -80,12 +80,12 @@ static const char *ext4_encrypted_get_link(struct dentry *dentry,
if (res <= plen)
paddr[res] = '\0';
if (cpage)
- page_cache_release(cpage);
+ put_page(cpage);
set_delayed_call(done, kfree_link, paddr);
return paddr;
errout:
if (cpage)
- page_cache_release(cpage);
+ put_page(cpage);
kfree(paddr);
return ERR_PTR(res);
}
diff --git a/fs/ext4/xattr.c b/fs/ext4/xattr.c
index a95151e87..e79bd32b9 100644
--- a/fs/ext4/xattr.c
+++ b/fs/ext4/xattr.c
@@ -230,6 +230,27 @@ ext4_xattr_check_block(struct inode *inode, struct buffer_head *bh)
return error;
}
+static int
+__xattr_check_inode(struct inode *inode, struct ext4_xattr_ibody_header *header,
+ void *end, const char *function, unsigned int line)
+{
+ struct ext4_xattr_entry *entry = IFIRST(header);
+ int error = -EFSCORRUPTED;
+
+ if (((void *) header >= end) ||
+ (header->h_magic != le32_to_cpu(EXT4_XATTR_MAGIC)))
+ goto errout;
+ error = ext4_xattr_check_names(entry, end, entry);
+errout:
+ if (error)
+ __ext4_error_inode(inode, function, line, 0,
+ "corrupted in-inode xattr");
+ return error;
+}
+
+#define xattr_check_inode(inode, header, end) \
+ __xattr_check_inode((inode), (header), (end), __func__, __LINE__)
+
static inline int
ext4_xattr_check_entry(struct ext4_xattr_entry *entry, size_t size)
{
@@ -341,7 +362,7 @@ ext4_xattr_ibody_get(struct inode *inode, int name_index, const char *name,
header = IHDR(inode, raw_inode);
entry = IFIRST(header);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(entry, end, entry);
+ error = xattr_check_inode(inode, header, end);
if (error)
goto cleanup;
error = ext4_xattr_find_entry(&entry, name_index, name,
@@ -477,7 +498,7 @@ ext4_xattr_ibody_list(struct dentry *dentry, char *buffer, size_t buffer_size)
raw_inode = ext4_raw_inode(&iloc);
header = IHDR(inode, raw_inode);
end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
- error = ext4_xattr_check_names(IFIRST(header), end, IFIRST(header));
+ error = xattr_check_inode(inode, header, end);
if (error)
goto cleanup;
error = ext4_xattr_list_entries(dentry, IFIRST(header),
@@ -545,30 +566,44 @@ static void
ext4_xattr_release_block(handle_t *handle, struct inode *inode,
struct buffer_head *bh)
{
- struct mb_cache_entry *ce = NULL;
- int error = 0;
struct mb_cache *ext4_mb_cache = EXT4_GET_MB_CACHE(inode);
+ u32 hash, ref;
+ int error = 0;
- ce = mb_cache_entry_get(ext4_mb_cache, bh->b_bdev, bh->b_blocknr);
BUFFER_TRACE(bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bh);
if (error)
goto out;
lock_buffer(bh);
- if (BHDR(bh)->h_refcount == cpu_to_le32(1)) {
+ hash = le32_to_cpu(BHDR(bh)->h_hash);
+ ref = le32_to_cpu(BHDR(bh)->h_refcount);
+ if (ref == 1) {
ea_bdebug(bh, "refcount now=0; freeing");
- if (ce)
- mb_cache_entry_free(ce);
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect freed block
+ */
+ mb_cache_entry_delete_block(ext4_mb_cache, hash, bh->b_blocknr);
get_bh(bh);
unlock_buffer(bh);
ext4_free_blocks(handle, inode, bh, 0, 1,
EXT4_FREE_BLOCKS_METADATA |
EXT4_FREE_BLOCKS_FORGET);
} else {
- le32_add_cpu(&BHDR(bh)->h_refcount, -1);
- if (ce)
- mb_cache_entry_release(ce);
+ ref--;
+ BHDR(bh)->h_refcount = cpu_to_le32(ref);
+ if (ref == EXT4_XATTR_REFCOUNT_MAX - 1) {
+ struct mb_cache_entry *ce;
+
+ ce = mb_cache_entry_get(ext4_mb_cache, hash,
+ bh->b_blocknr);
+ if (ce) {
+ ce->e_reusable = 1;
+ mb_cache_entry_put(ext4_mb_cache, ce);
+ }
+ }
+
/*
* Beware of this ugliness: Releasing of xattr block references
* from different inodes can race and so we have to protect
@@ -790,8 +825,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
if (i->value && i->value_len > sb->s_blocksize)
return -ENOSPC;
if (s->base) {
- ce = mb_cache_entry_get(ext4_mb_cache, bs->bh->b_bdev,
- bs->bh->b_blocknr);
BUFFER_TRACE(bs->bh, "get_write_access");
error = ext4_journal_get_write_access(handle, bs->bh);
if (error)
@@ -799,10 +832,15 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
lock_buffer(bs->bh);
if (header(s->base)->h_refcount == cpu_to_le32(1)) {
- if (ce) {
- mb_cache_entry_free(ce);
- ce = NULL;
- }
+ __u32 hash = le32_to_cpu(BHDR(bs->bh)->h_hash);
+
+ /*
+ * This must happen under buffer lock for
+ * ext4_xattr_block_set() to reliably detect modified
+ * block
+ */
+ mb_cache_entry_delete_block(ext4_mb_cache, hash,
+ bs->bh->b_blocknr);
ea_bdebug(bs->bh, "modifying in-place");
error = ext4_xattr_set_entry(i, s);
if (!error) {
@@ -826,10 +864,6 @@ ext4_xattr_block_set(handle_t *handle, struct inode *inode,
int offset = (char *)s->here - bs->bh->b_data;
unlock_buffer(bs->bh);
- if (ce) {
- mb_cache_entry_release(ce);
- ce = NULL;
- }
ea_bdebug(bs->bh, "cloning");
s->base = kmalloc(bs->bh->b_size, GFP_NOFS);
error = -ENOMEM;
@@ -872,6 +906,8 @@ inserted:
if (new_bh == bs->bh)
ea_bdebug(new_bh, "keeping");
else {
+ u32 ref;
+
/* The old block is released after updating
the inode. */
error = dquot_alloc_block(inode,
@@ -884,9 +920,40 @@ inserted:
if (error)
goto cleanup_dquot;
lock_buffer(new_bh);
- le32_add_cpu(&BHDR(new_bh)->h_refcount, 1);
+ /*
+ * We have to be careful about races with
+ * freeing, rehashing or adding references to
+ * xattr block. Once we hold buffer lock xattr
+ * block's state is stable so we can check
+ * whether the block got freed / rehashed or
+ * not. Since we unhash mbcache entry under
+ * buffer lock when freeing / rehashing xattr
+ * block, checking whether entry is still
+ * hashed is reliable. Same rules hold for
+ * e_reusable handling.
+ */
+ if (hlist_bl_unhashed(&ce->e_hash_list) ||
+ !ce->e_reusable) {
+ /*
+ * Undo everything and check mbcache
+ * again.
+ */
+ unlock_buffer(new_bh);
+ dquot_free_block(inode,
+ EXT4_C2B(EXT4_SB(sb),
+ 1));
+ brelse(new_bh);
+ mb_cache_entry_put(ext4_mb_cache, ce);
+ ce = NULL;
+ new_bh = NULL;
+ goto inserted;
+ }
+ ref = le32_to_cpu(BHDR(new_bh)->h_refcount) + 1;
+ BHDR(new_bh)->h_refcount = cpu_to_le32(ref);
+ if (ref >= EXT4_XATTR_REFCOUNT_MAX)
+ ce->e_reusable = 0;
ea_bdebug(new_bh, "reusing; refcount now=%d",
- le32_to_cpu(BHDR(new_bh)->h_refcount));
+ ref);
unlock_buffer(new_bh);
error = ext4_handle_dirty_xattr_block(handle,
inode,
@@ -894,7 +961,8 @@ inserted:
if (error)
goto cleanup_dquot;
}
- mb_cache_entry_release(ce);
+ mb_cache_entry_touch(ext4_mb_cache, ce);
+ mb_cache_entry_put(ext4_mb_cache, ce);
ce = NULL;
} else if (bs->bh && s->base == bs->bh->b_data) {
/* We were modifying this block in-place. */
@@ -959,7 +1027,7 @@ getblk_failed:
cleanup:
if (ce)
- mb_cache_entry_release(ce);
+ mb_cache_entry_put(ext4_mb_cache, ce);
brelse(new_bh);
if (!(bs->bh && s->base == bs->bh->b_data))
kfree(s->base);
@@ -993,8 +1061,7 @@ int ext4_xattr_ibody_find(struct inode *inode, struct ext4_xattr_info *i,
is->s.here = is->s.first;
is->s.end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size;
if (ext4_test_inode_state(inode, EXT4_STATE_XATTR)) {
- error = ext4_xattr_check_names(IFIRST(header), is->s.end,
- IFIRST(header));
+ error = xattr_check_inode(inode, header, is->s.end);
if (error)
return error;
/* Find the named attribute. */
@@ -1070,6 +1137,17 @@ static int ext4_xattr_ibody_set(handle_t *handle, struct inode *inode,
return 0;
}
+static int ext4_xattr_value_same(struct ext4_xattr_search *s,
+ struct ext4_xattr_info *i)
+{
+ void *value;
+
+ if (le32_to_cpu(s->here->e_value_size) != i->value_len)
+ return 0;
+ value = ((void *)s->base) + le16_to_cpu(s->here->e_value_offs);
+ return !memcmp(value, i->value, i->value_len);
+}
+
/*
* ext4_xattr_set_handle()
*
@@ -1146,6 +1224,13 @@ ext4_xattr_set_handle(handle_t *handle, struct inode *inode, int name_index,
else if (!bs.s.not_found)
error = ext4_xattr_block_set(handle, inode, &i, &bs);
} else {
+ error = 0;
+ /* Xattr value did not change? Save us some work and bail out */
+ if (!is.s.not_found && ext4_xattr_value_same(&is.s, &i))
+ goto cleanup;
+ if (!bs.s.not_found && ext4_xattr_value_same(&bs.s, &i))
+ goto cleanup;
+
error = ext4_xattr_ibody_set(handle, inode, &i, &is);
if (!error && !bs.s.not_found) {
i.value = NULL;
@@ -1291,6 +1376,10 @@ retry:
last = entry;
total_ino = sizeof(struct ext4_xattr_ibody_header);
+ error = xattr_check_inode(inode, header, end);
+ if (error)
+ goto cleanup;
+
free = ext4_xattr_free_space(last, &min_offs, base, &total_ino);
if (free >= new_extra_isize) {
entry = IFIRST(header);
@@ -1512,17 +1601,6 @@ cleanup:
}
/*
- * ext4_xattr_put_super()
- *
- * This is called when a file system is unmounted.
- */
-void
-ext4_xattr_put_super(struct super_block *sb)
-{
- mb_cache_shrink(sb->s_bdev);
-}
-
-/*
* ext4_xattr_cache_insert()
*
* Create a new entry in the extended attribute cache, and insert
@@ -1533,26 +1611,19 @@ ext4_xattr_put_super(struct super_block *sb)
static void
ext4_xattr_cache_insert(struct mb_cache *ext4_mb_cache, struct buffer_head *bh)
{
- __u32 hash = le32_to_cpu(BHDR(bh)->h_hash);
- struct mb_cache_entry *ce;
+ struct ext4_xattr_header *header = BHDR(bh);
+ __u32 hash = le32_to_cpu(header->h_hash);
+ int reusable = le32_to_cpu(header->h_refcount) <
+ EXT4_XATTR_REFCOUNT_MAX;
int error;
- ce = mb_cache_entry_alloc(ext4_mb_cache, GFP_NOFS);
- if (!ce) {
- ea_bdebug(bh, "out of memory");
- return;
- }
- error = mb_cache_entry_insert(ce, bh->b_bdev, bh->b_blocknr, hash);
+ error = mb_cache_entry_create(ext4_mb_cache, GFP_NOFS, hash,
+ bh->b_blocknr, reusable);
if (error) {
- mb_cache_entry_free(ce);
- if (error == -EBUSY) {
+ if (error == -EBUSY)
ea_bdebug(bh, "already in cache");
- error = 0;
- }
- } else {
+ } else
ea_bdebug(bh, "inserting [%x]", (int)hash);
- mb_cache_entry_release(ce);
- }
}
/*
@@ -1614,33 +1685,20 @@ ext4_xattr_cache_find(struct inode *inode, struct ext4_xattr_header *header,
if (!header->h_hash)
return NULL; /* never share */
ea_idebug(inode, "looking for cached blocks [%x]", (int)hash);
-again:
- ce = mb_cache_entry_find_first(ext4_mb_cache, inode->i_sb->s_bdev,
- hash);
+ ce = mb_cache_entry_find_first(ext4_mb_cache, hash);
while (ce) {
struct buffer_head *bh;
- if (IS_ERR(ce)) {
- if (PTR_ERR(ce) == -EAGAIN)
- goto again;
- break;
- }
bh = sb_bread(inode->i_sb, ce->e_block);
if (!bh) {
EXT4_ERROR_INODE(inode, "block %lu read error",
(unsigned long) ce->e_block);
- } else if (le32_to_cpu(BHDR(bh)->h_refcount) >=
- EXT4_XATTR_REFCOUNT_MAX) {
- ea_idebug(inode, "block %lu refcount %d>=%d",
- (unsigned long) ce->e_block,
- le32_to_cpu(BHDR(bh)->h_refcount),
- EXT4_XATTR_REFCOUNT_MAX);
} else if (ext4_xattr_cmp(header, BHDR(bh)) == 0) {
*pce = ce;
return bh;
}
brelse(bh);
- ce = mb_cache_entry_find_next(ce, inode->i_sb->s_bdev, hash);
+ ce = mb_cache_entry_find_next(ext4_mb_cache, ce);
}
return NULL;
}
@@ -1716,9 +1774,9 @@ static void ext4_xattr_rehash(struct ext4_xattr_header *header,
#define HASH_BUCKET_BITS 10
struct mb_cache *
-ext4_xattr_create_cache(char *name)
+ext4_xattr_create_cache(void)
{
- return mb_cache_create(name, HASH_BUCKET_BITS);
+ return mb_cache_create(HASH_BUCKET_BITS);
}
void ext4_xattr_destroy_cache(struct mb_cache *cache)
diff --git a/fs/ext4/xattr.h b/fs/ext4/xattr.h
index ddc095776..69dd3e656 100644
--- a/fs/ext4/xattr.h
+++ b/fs/ext4/xattr.h
@@ -108,7 +108,6 @@ extern int ext4_xattr_set(struct inode *, int, const char *, const void *, size_
extern int ext4_xattr_set_handle(handle_t *, struct inode *, int, const char *, const void *, size_t, int);
extern void ext4_xattr_delete_inode(handle_t *, struct inode *);
-extern void ext4_xattr_put_super(struct super_block *);
extern int ext4_expand_extra_isize_ea(struct inode *inode, int new_extra_isize,
struct ext4_inode *raw_inode, handle_t *handle);
@@ -124,7 +123,7 @@ extern int ext4_xattr_ibody_inline_set(handle_t *handle, struct inode *inode,
struct ext4_xattr_info *i,
struct ext4_xattr_ibody_find *is);
-extern struct mb_cache *ext4_xattr_create_cache(char *name);
+extern struct mb_cache *ext4_xattr_create_cache(void);
extern void ext4_xattr_destroy_cache(struct mb_cache *);
#ifdef CONFIG_EXT4_FS_SECURITY
diff --git a/fs/f2fs/Kconfig b/fs/f2fs/Kconfig
index b0a9dc929..1f8982a95 100644
--- a/fs/f2fs/Kconfig
+++ b/fs/f2fs/Kconfig
@@ -1,6 +1,8 @@
config F2FS_FS
tristate "F2FS filesystem support"
depends on BLOCK
+ select CRYPTO
+ select CRYPTO_CRC32
help
F2FS is based on Log-structured File System (LFS), which supports
versatile "flash-friendly" features. The design has been focused on
@@ -76,15 +78,7 @@ config F2FS_FS_ENCRYPTION
bool "F2FS Encryption"
depends on F2FS_FS
depends on F2FS_FS_XATTR
- select CRYPTO_AES
- select CRYPTO_CBC
- select CRYPTO_ECB
- select CRYPTO_XTS
- select CRYPTO_CTS
- select CRYPTO_CTR
- select CRYPTO_SHA256
- select KEYS
- select ENCRYPTED_KEYS
+ select FS_ENCRYPTION
help
Enable encryption of f2fs files and directories. This
feature is similar to ecryptfs, but it is more memory
diff --git a/fs/f2fs/Makefile b/fs/f2fs/Makefile
index 08e101ed9..ca949ea7c 100644
--- a/fs/f2fs/Makefile
+++ b/fs/f2fs/Makefile
@@ -7,5 +7,3 @@ f2fs-$(CONFIG_F2FS_STAT_FS) += debug.o
f2fs-$(CONFIG_F2FS_FS_XATTR) += xattr.o
f2fs-$(CONFIG_F2FS_FS_POSIX_ACL) += acl.o
f2fs-$(CONFIG_F2FS_IO_TRACE) += trace.o
-f2fs-$(CONFIG_F2FS_FS_ENCRYPTION) += crypto_policy.o crypto.o \
- crypto_key.o crypto_fname.o
diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index 3842af954..0955312e5 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -39,7 +39,7 @@ repeat:
cond_resched();
goto repeat;
}
- f2fs_wait_on_page_writeback(page, META);
+ f2fs_wait_on_page_writeback(page, META, true);
SetPageUptodate(page);
return page;
}
@@ -56,7 +56,8 @@ static struct page *__get_meta_page(struct f2fs_sb_info *sbi, pgoff_t index,
.sbi = sbi,
.type = META,
.rw = READ_SYNC | REQ_META | REQ_PRIO,
- .blk_addr = index,
+ .old_blkaddr = index,
+ .new_blkaddr = index,
.encrypted_page = NULL,
};
@@ -143,7 +144,6 @@ bool is_valid_blkaddr(struct f2fs_sb_info *sbi, block_t blkaddr, int type)
int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
int type, bool sync)
{
- block_t prev_blk_addr = 0;
struct page *page;
block_t blkno = start;
struct f2fs_io_info fio = {
@@ -152,10 +152,12 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
.rw = sync ? (READ_SYNC | REQ_META | REQ_PRIO) : READA,
.encrypted_page = NULL,
};
+ struct blk_plug plug;
if (unlikely(type == META_POR))
fio.rw &= ~REQ_META;
+ blk_start_plug(&plug);
for (; nrpages-- > 0; blkno++) {
if (!is_valid_blkaddr(sbi, blkno, type))
@@ -167,27 +169,24 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
NAT_BLOCK_OFFSET(NM_I(sbi)->max_nid)))
blkno = 0;
/* get nat block addr */
- fio.blk_addr = current_nat_addr(sbi,
+ fio.new_blkaddr = current_nat_addr(sbi,
blkno * NAT_ENTRY_PER_BLOCK);
break;
case META_SIT:
/* get sit block addr */
- fio.blk_addr = current_sit_addr(sbi,
+ fio.new_blkaddr = current_sit_addr(sbi,
blkno * SIT_ENTRY_PER_BLOCK);
- if (blkno != start && prev_blk_addr + 1 != fio.blk_addr)
- goto out;
- prev_blk_addr = fio.blk_addr;
break;
case META_SSA:
case META_CP:
case META_POR:
- fio.blk_addr = blkno;
+ fio.new_blkaddr = blkno;
break;
default:
BUG();
}
- page = grab_cache_page(META_MAPPING(sbi), fio.blk_addr);
+ page = grab_cache_page(META_MAPPING(sbi), fio.new_blkaddr);
if (!page)
continue;
if (PageUptodate(page)) {
@@ -196,11 +195,13 @@ int ra_meta_pages(struct f2fs_sb_info *sbi, block_t start, int nrpages,
}
fio.page = page;
+ fio.old_blkaddr = fio.new_blkaddr;
f2fs_submit_page_mbio(&fio);
f2fs_put_page(page, 0);
}
out:
f2fs_submit_merged_bio(sbi, META, READ);
+ blk_finish_plug(&plug);
return blkno - start;
}
@@ -232,13 +233,17 @@ static int f2fs_write_meta_page(struct page *page,
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
- f2fs_wait_on_page_writeback(page, META);
write_meta_page(sbi, page);
dec_page_count(sbi, F2FS_DIRTY_META);
+
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, META, WRITE);
+
unlock_page(page);
- if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, META, WRITE);
+
return 0;
redirty_out:
@@ -252,13 +257,13 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
long diff, written;
- trace_f2fs_writepages(mapping->host, wbc, META);
-
/* collect a number of dirty meta pages and write together */
if (wbc->for_kupdate ||
get_pages(sbi, F2FS_DIRTY_META) < nr_pages_to_skip(sbi, META))
goto skip_write;
+ trace_f2fs_writepages(mapping->host, wbc, META);
+
/* if mounting is failed, skip writing node pages */
mutex_lock(&sbi->cp_mutex);
diff = nr_pages_to_write(sbi, META, wbc);
@@ -269,6 +274,7 @@ static int f2fs_write_meta_pages(struct address_space *mapping,
skip_write:
wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_META);
+ trace_f2fs_writepages(mapping->host, wbc, META);
return 0;
}
@@ -276,15 +282,18 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
long nr_to_write)
{
struct address_space *mapping = META_MAPPING(sbi);
- pgoff_t index = 0, end = LONG_MAX, prev = LONG_MAX;
+ pgoff_t index = 0, end = ULONG_MAX, prev = ULONG_MAX;
struct pagevec pvec;
long nwritten = 0;
struct writeback_control wbc = {
.for_reclaim = 0,
};
+ struct blk_plug plug;
pagevec_init(&pvec, 0);
+ blk_start_plug(&plug);
+
while (index <= end) {
int i, nr_pages;
nr_pages = pagevec_lookup_tag(&pvec, mapping, &index,
@@ -296,7 +305,7 @@ long sync_meta_pages(struct f2fs_sb_info *sbi, enum page_type type,
for (i = 0; i < nr_pages; i++) {
struct page *page = pvec.pages[i];
- if (prev == LONG_MAX)
+ if (prev == ULONG_MAX)
prev = page->index - 1;
if (nr_to_write != LONG_MAX && page->index != prev + 1) {
pagevec_release(&pvec);
@@ -315,6 +324,9 @@ continue_unlock:
goto continue_unlock;
}
+ f2fs_wait_on_page_writeback(page, META, true);
+
+ BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -334,6 +346,8 @@ stop:
if (nwritten)
f2fs_submit_merged_bio(sbi, type, WRITE);
+ blk_finish_plug(&plug);
+
return nwritten;
}
@@ -621,7 +635,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
goto invalid_cp1;
crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
- if (!f2fs_crc_valid(crc, cp_block, crc_offset))
+ if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset))
goto invalid_cp1;
pre_version = cur_cp_version(cp_block);
@@ -636,7 +650,7 @@ static struct page *validate_checkpoint(struct f2fs_sb_info *sbi,
goto invalid_cp2;
crc = le32_to_cpu(*((__le32 *)((unsigned char *)cp_block + crc_offset)));
- if (!f2fs_crc_valid(crc, cp_block, crc_offset))
+ if (!f2fs_crc_valid(sbi, crc, cp_block, crc_offset))
goto invalid_cp2;
cur_version = cur_cp_version(cp_block);
@@ -696,6 +710,10 @@ int get_valid_checkpoint(struct f2fs_sb_info *sbi)
cp_block = (struct f2fs_checkpoint *)page_address(cur_page);
memcpy(sbi->ckpt, cp_block, blk_size);
+ /* Sanity checking of checkpoint */
+ if (sanity_check_ckpt(sbi))
+ goto fail_no_cp;
+
if (cp_blks <= 1)
goto done;
@@ -902,7 +920,7 @@ static void wait_on_all_pages_writeback(struct f2fs_sb_info *sbi)
if (!get_pages(sbi, F2FS_WRITEBACK))
break;
- io_schedule();
+ io_schedule_timeout(5*HZ);
}
finish_wait(&sbi->cp_wait, &wait);
}
@@ -921,6 +939,9 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
int cp_payload_blks = __cp_payload(sbi);
block_t discard_blk = NEXT_FREE_BLKADDR(sbi, curseg);
bool invalidate = false;
+ struct super_block *sb = sbi->sb;
+ struct curseg_info *seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
+ u64 kbytes_written;
/*
* This avoids to conduct wrong roll-forward operations and uses
@@ -1008,7 +1029,7 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
get_sit_bitmap(sbi, __bitmap_ptr(sbi, SIT_BITMAP));
get_nat_bitmap(sbi, __bitmap_ptr(sbi, NAT_BITMAP));
- crc32 = f2fs_crc32(ckpt, le32_to_cpu(ckpt->checksum_offset));
+ crc32 = f2fs_crc32(sbi, ckpt, le32_to_cpu(ckpt->checksum_offset));
*((__le32 *)((unsigned char *)ckpt +
le32_to_cpu(ckpt->checksum_offset)))
= cpu_to_le32(crc32);
@@ -1034,6 +1055,14 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
write_data_summaries(sbi, start_blk);
start_blk += data_sum_blocks;
+
+ /* Record write statistics in the hot node summary */
+ kbytes_written = sbi->kbytes_written;
+ if (sb->s_bdev->bd_part)
+ kbytes_written += BD_PART_WRITTEN(sbi);
+
+ seg_i->journal->info.kbytes_written = cpu_to_le64(kbytes_written);
+
if (__remain_node_summaries(cpc->reason)) {
write_node_summaries(sbi, start_blk);
start_blk += NR_CURSEG_NODE_TYPE;
@@ -1048,8 +1077,8 @@ static int do_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
if (unlikely(f2fs_cp_error(sbi)))
return -EIO;
- filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LONG_MAX);
- filemap_fdatawait_range(META_MAPPING(sbi), 0, LONG_MAX);
+ filemap_fdatawait_range(NODE_MAPPING(sbi), 0, LLONG_MAX);
+ filemap_fdatawait_range(META_MAPPING(sbi), 0, LLONG_MAX);
/* update user_block_counts */
sbi->last_valid_block_count = sbi->total_valid_block_count;
@@ -1112,9 +1141,7 @@ int write_checkpoint(struct f2fs_sb_info *sbi, struct cp_control *cpc)
trace_f2fs_write_checkpoint(sbi->sb, cpc->reason, "finish block_ops");
- f2fs_submit_merged_bio(sbi, DATA, WRITE);
- f2fs_submit_merged_bio(sbi, NODE, WRITE);
- f2fs_submit_merged_bio(sbi, META, WRITE);
+ f2fs_flush_merged_bios(sbi);
/*
* update checkpoint pack index
diff --git a/fs/f2fs/crypto.c b/fs/f2fs/crypto.c
deleted file mode 100644
index 4a62ef14e..000000000
--- a/fs/f2fs/crypto.c
+++ /dev/null
@@ -1,491 +0,0 @@
-/*
- * linux/fs/f2fs/crypto.c
- *
- * Copied from linux/fs/ext4/crypto.c
- *
- * Copyright (C) 2015, Google, Inc.
- * Copyright (C) 2015, Motorola Mobility
- *
- * This contains encryption functions for f2fs
- *
- * Written by Michael Halcrow, 2014.
- *
- * Filename encryption additions
- * Uday Savagaonkar, 2014
- * Encryption policy handling additions
- * Ildar Muslukhov, 2014
- * Remove ext4_encrypted_zeroout(),
- * add f2fs_restore_and_release_control_page()
- * Jaegeuk Kim, 2015.
- *
- * This has not yet undergone a rigorous security audit.
- *
- * The usage of AES-XTS should conform to recommendations in NIST
- * Special Publication 800-38E and IEEE P1619/D16.
- */
-#include <crypto/hash.h>
-#include <crypto/sha.h>
-#include <keys/user-type.h>
-#include <keys/encrypted-type.h>
-#include <linux/crypto.h>
-#include <linux/ecryptfs.h>
-#include <linux/gfp.h>
-#include <linux/kernel.h>
-#include <linux/key.h>
-#include <linux/list.h>
-#include <linux/mempool.h>
-#include <linux/module.h>
-#include <linux/mutex.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <linux/spinlock_types.h>
-#include <linux/f2fs_fs.h>
-#include <linux/ratelimit.h>
-#include <linux/bio.h>
-
-#include "f2fs.h"
-#include "xattr.h"
-
-/* Encryption added and removed here! (L: */
-
-static unsigned int num_prealloc_crypto_pages = 32;
-static unsigned int num_prealloc_crypto_ctxs = 128;
-
-module_param(num_prealloc_crypto_pages, uint, 0444);
-MODULE_PARM_DESC(num_prealloc_crypto_pages,
- "Number of crypto pages to preallocate");
-module_param(num_prealloc_crypto_ctxs, uint, 0444);
-MODULE_PARM_DESC(num_prealloc_crypto_ctxs,
- "Number of crypto contexts to preallocate");
-
-static mempool_t *f2fs_bounce_page_pool;
-
-static LIST_HEAD(f2fs_free_crypto_ctxs);
-static DEFINE_SPINLOCK(f2fs_crypto_ctx_lock);
-
-static struct workqueue_struct *f2fs_read_workqueue;
-static DEFINE_MUTEX(crypto_init);
-
-static struct kmem_cache *f2fs_crypto_ctx_cachep;
-struct kmem_cache *f2fs_crypt_info_cachep;
-
-/**
- * f2fs_release_crypto_ctx() - Releases an encryption context
- * @ctx: The encryption context to release.
- *
- * If the encryption context was allocated from the pre-allocated pool, returns
- * it to that pool. Else, frees it.
- *
- * If there's a bounce page in the context, this frees that.
- */
-void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *ctx)
-{
- unsigned long flags;
-
- if (ctx->flags & F2FS_WRITE_PATH_FL && ctx->w.bounce_page) {
- mempool_free(ctx->w.bounce_page, f2fs_bounce_page_pool);
- ctx->w.bounce_page = NULL;
- }
- ctx->w.control_page = NULL;
- if (ctx->flags & F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL) {
- kmem_cache_free(f2fs_crypto_ctx_cachep, ctx);
- } else {
- spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags);
- list_add(&ctx->free_list, &f2fs_free_crypto_ctxs);
- spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags);
- }
-}
-
-/**
- * f2fs_get_crypto_ctx() - Gets an encryption context
- * @inode: The inode for which we are doing the crypto
- *
- * Allocates and initializes an encryption context.
- *
- * Return: An allocated and initialized encryption context on success; error
- * value or NULL otherwise.
- */
-struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *inode)
-{
- struct f2fs_crypto_ctx *ctx = NULL;
- unsigned long flags;
- struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-
- if (ci == NULL)
- return ERR_PTR(-ENOKEY);
-
- /*
- * We first try getting the ctx from a free list because in
- * the common case the ctx will have an allocated and
- * initialized crypto tfm, so it's probably a worthwhile
- * optimization. For the bounce page, we first try getting it
- * from the kernel allocator because that's just about as fast
- * as getting it from a list and because a cache of free pages
- * should generally be a "last resort" option for a filesystem
- * to be able to do its job.
- */
- spin_lock_irqsave(&f2fs_crypto_ctx_lock, flags);
- ctx = list_first_entry_or_null(&f2fs_free_crypto_ctxs,
- struct f2fs_crypto_ctx, free_list);
- if (ctx)
- list_del(&ctx->free_list);
- spin_unlock_irqrestore(&f2fs_crypto_ctx_lock, flags);
- if (!ctx) {
- ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_NOFS);
- if (!ctx)
- return ERR_PTR(-ENOMEM);
- ctx->flags |= F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
- } else {
- ctx->flags &= ~F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL;
- }
- ctx->flags &= ~F2FS_WRITE_PATH_FL;
- return ctx;
-}
-
-/*
- * Call f2fs_decrypt on every single page, reusing the encryption
- * context.
- */
-static void completion_pages(struct work_struct *work)
-{
- struct f2fs_crypto_ctx *ctx =
- container_of(work, struct f2fs_crypto_ctx, r.work);
- struct bio *bio = ctx->r.bio;
- struct bio_vec *bv;
- int i;
-
- bio_for_each_segment_all(bv, bio, i) {
- struct page *page = bv->bv_page;
- int ret = f2fs_decrypt(ctx, page);
-
- if (ret) {
- WARN_ON_ONCE(1);
- SetPageError(page);
- } else
- SetPageUptodate(page);
- unlock_page(page);
- }
- f2fs_release_crypto_ctx(ctx);
- bio_put(bio);
-}
-
-void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *ctx, struct bio *bio)
-{
- INIT_WORK(&ctx->r.work, completion_pages);
- ctx->r.bio = bio;
- queue_work(f2fs_read_workqueue, &ctx->r.work);
-}
-
-static void f2fs_crypto_destroy(void)
-{
- struct f2fs_crypto_ctx *pos, *n;
-
- list_for_each_entry_safe(pos, n, &f2fs_free_crypto_ctxs, free_list)
- kmem_cache_free(f2fs_crypto_ctx_cachep, pos);
- INIT_LIST_HEAD(&f2fs_free_crypto_ctxs);
- if (f2fs_bounce_page_pool)
- mempool_destroy(f2fs_bounce_page_pool);
- f2fs_bounce_page_pool = NULL;
-}
-
-/**
- * f2fs_crypto_initialize() - Set up for f2fs encryption.
- *
- * We only call this when we start accessing encrypted files, since it
- * results in memory getting allocated that wouldn't otherwise be used.
- *
- * Return: Zero on success, non-zero otherwise.
- */
-int f2fs_crypto_initialize(void)
-{
- int i, res = -ENOMEM;
-
- if (f2fs_bounce_page_pool)
- return 0;
-
- mutex_lock(&crypto_init);
- if (f2fs_bounce_page_pool)
- goto already_initialized;
-
- for (i = 0; i < num_prealloc_crypto_ctxs; i++) {
- struct f2fs_crypto_ctx *ctx;
-
- ctx = kmem_cache_zalloc(f2fs_crypto_ctx_cachep, GFP_KERNEL);
- if (!ctx)
- goto fail;
- list_add(&ctx->free_list, &f2fs_free_crypto_ctxs);
- }
-
- /* must be allocated at the last step to avoid race condition above */
- f2fs_bounce_page_pool =
- mempool_create_page_pool(num_prealloc_crypto_pages, 0);
- if (!f2fs_bounce_page_pool)
- goto fail;
-
-already_initialized:
- mutex_unlock(&crypto_init);
- return 0;
-fail:
- f2fs_crypto_destroy();
- mutex_unlock(&crypto_init);
- return res;
-}
-
-/**
- * f2fs_exit_crypto() - Shutdown the f2fs encryption system
- */
-void f2fs_exit_crypto(void)
-{
- f2fs_crypto_destroy();
-
- if (f2fs_read_workqueue)
- destroy_workqueue(f2fs_read_workqueue);
- if (f2fs_crypto_ctx_cachep)
- kmem_cache_destroy(f2fs_crypto_ctx_cachep);
- if (f2fs_crypt_info_cachep)
- kmem_cache_destroy(f2fs_crypt_info_cachep);
-}
-
-int __init f2fs_init_crypto(void)
-{
- int res = -ENOMEM;
-
- f2fs_read_workqueue = alloc_workqueue("f2fs_crypto", WQ_HIGHPRI, 0);
- if (!f2fs_read_workqueue)
- goto fail;
-
- f2fs_crypto_ctx_cachep = KMEM_CACHE(f2fs_crypto_ctx,
- SLAB_RECLAIM_ACCOUNT);
- if (!f2fs_crypto_ctx_cachep)
- goto fail;
-
- f2fs_crypt_info_cachep = KMEM_CACHE(f2fs_crypt_info,
- SLAB_RECLAIM_ACCOUNT);
- if (!f2fs_crypt_info_cachep)
- goto fail;
-
- return 0;
-fail:
- f2fs_exit_crypto();
- return res;
-}
-
-void f2fs_restore_and_release_control_page(struct page **page)
-{
- struct f2fs_crypto_ctx *ctx;
- struct page *bounce_page;
-
- /* The bounce data pages are unmapped. */
- if ((*page)->mapping)
- return;
-
- /* The bounce data page is unmapped. */
- bounce_page = *page;
- ctx = (struct f2fs_crypto_ctx *)page_private(bounce_page);
-
- /* restore control page */
- *page = ctx->w.control_page;
-
- f2fs_restore_control_page(bounce_page);
-}
-
-void f2fs_restore_control_page(struct page *data_page)
-{
- struct f2fs_crypto_ctx *ctx =
- (struct f2fs_crypto_ctx *)page_private(data_page);
-
- set_page_private(data_page, (unsigned long)NULL);
- ClearPagePrivate(data_page);
- unlock_page(data_page);
- f2fs_release_crypto_ctx(ctx);
-}
-
-/**
- * f2fs_crypt_complete() - The completion callback for page encryption
- * @req: The asynchronous encryption request context
- * @res: The result of the encryption operation
- */
-static void f2fs_crypt_complete(struct crypto_async_request *req, int res)
-{
- struct f2fs_completion_result *ecr = req->data;
-
- if (res == -EINPROGRESS)
- return;
- ecr->res = res;
- complete(&ecr->completion);
-}
-
-typedef enum {
- F2FS_DECRYPT = 0,
- F2FS_ENCRYPT,
-} f2fs_direction_t;
-
-static int f2fs_page_crypto(struct f2fs_crypto_ctx *ctx,
- struct inode *inode,
- f2fs_direction_t rw,
- pgoff_t index,
- struct page *src_page,
- struct page *dest_page)
-{
- u8 xts_tweak[F2FS_XTS_TWEAK_SIZE];
- struct ablkcipher_request *req = NULL;
- DECLARE_F2FS_COMPLETION_RESULT(ecr);
- struct scatterlist dst, src;
- struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
- struct crypto_ablkcipher *tfm = ci->ci_ctfm;
- int res = 0;
-
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
- if (!req) {
- printk_ratelimited(KERN_ERR
- "%s: crypto_request_alloc() failed\n",
- __func__);
- return -ENOMEM;
- }
- ablkcipher_request_set_callback(
- req, CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- f2fs_crypt_complete, &ecr);
-
- BUILD_BUG_ON(F2FS_XTS_TWEAK_SIZE < sizeof(index));
- memcpy(xts_tweak, &index, sizeof(index));
- memset(&xts_tweak[sizeof(index)], 0,
- F2FS_XTS_TWEAK_SIZE - sizeof(index));
-
- sg_init_table(&dst, 1);
- sg_set_page(&dst, dest_page, PAGE_CACHE_SIZE, 0);
- sg_init_table(&src, 1);
- sg_set_page(&src, src_page, PAGE_CACHE_SIZE, 0);
- ablkcipher_request_set_crypt(req, &src, &dst, PAGE_CACHE_SIZE,
- xts_tweak);
- if (rw == F2FS_DECRYPT)
- res = crypto_ablkcipher_decrypt(req);
- else
- res = crypto_ablkcipher_encrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- BUG_ON(req->base.data != &ecr);
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
- ablkcipher_request_free(req);
- if (res) {
- printk_ratelimited(KERN_ERR
- "%s: crypto_ablkcipher_encrypt() returned %d\n",
- __func__, res);
- return res;
- }
- return 0;
-}
-
-static struct page *alloc_bounce_page(struct f2fs_crypto_ctx *ctx)
-{
- ctx->w.bounce_page = mempool_alloc(f2fs_bounce_page_pool, GFP_NOWAIT);
- if (ctx->w.bounce_page == NULL)
- return ERR_PTR(-ENOMEM);
- ctx->flags |= F2FS_WRITE_PATH_FL;
- return ctx->w.bounce_page;
-}
-
-/**
- * f2fs_encrypt() - Encrypts a page
- * @inode: The inode for which the encryption should take place
- * @plaintext_page: The page to encrypt. Must be locked.
- *
- * Allocates a ciphertext page and encrypts plaintext_page into it using the ctx
- * encryption context.
- *
- * Called on the page write path. The caller must call
- * f2fs_restore_control_page() on the returned ciphertext page to
- * release the bounce buffer and the encryption context.
- *
- * Return: An allocated page with the encrypted content on success. Else, an
- * error value or NULL.
- */
-struct page *f2fs_encrypt(struct inode *inode,
- struct page *plaintext_page)
-{
- struct f2fs_crypto_ctx *ctx;
- struct page *ciphertext_page = NULL;
- int err;
-
- BUG_ON(!PageLocked(plaintext_page));
-
- ctx = f2fs_get_crypto_ctx(inode);
- if (IS_ERR(ctx))
- return (struct page *)ctx;
-
- /* The encryption operation will require a bounce page. */
- ciphertext_page = alloc_bounce_page(ctx);
- if (IS_ERR(ciphertext_page))
- goto err_out;
-
- ctx->w.control_page = plaintext_page;
- err = f2fs_page_crypto(ctx, inode, F2FS_ENCRYPT, plaintext_page->index,
- plaintext_page, ciphertext_page);
- if (err) {
- ciphertext_page = ERR_PTR(err);
- goto err_out;
- }
-
- SetPagePrivate(ciphertext_page);
- set_page_private(ciphertext_page, (unsigned long)ctx);
- lock_page(ciphertext_page);
- return ciphertext_page;
-
-err_out:
- f2fs_release_crypto_ctx(ctx);
- return ciphertext_page;
-}
-
-/**
- * f2fs_decrypt() - Decrypts a page in-place
- * @ctx: The encryption context.
- * @page: The page to decrypt. Must be locked.
- *
- * Decrypts page in-place using the ctx encryption context.
- *
- * Called from the read completion callback.
- *
- * Return: Zero on success, non-zero otherwise.
- */
-int f2fs_decrypt(struct f2fs_crypto_ctx *ctx, struct page *page)
-{
- BUG_ON(!PageLocked(page));
-
- return f2fs_page_crypto(ctx, page->mapping->host,
- F2FS_DECRYPT, page->index, page, page);
-}
-
-/*
- * Convenience function which takes care of allocating and
- * deallocating the encryption context
- */
-int f2fs_decrypt_one(struct inode *inode, struct page *page)
-{
- struct f2fs_crypto_ctx *ctx = f2fs_get_crypto_ctx(inode);
- int ret;
-
- if (IS_ERR(ctx))
- return PTR_ERR(ctx);
- ret = f2fs_decrypt(ctx, page);
- f2fs_release_crypto_ctx(ctx);
- return ret;
-}
-
-bool f2fs_valid_contents_enc_mode(uint32_t mode)
-{
- return (mode == F2FS_ENCRYPTION_MODE_AES_256_XTS);
-}
-
-/**
- * f2fs_validate_encryption_key_size() - Validate the encryption key size
- * @mode: The key mode.
- * @size: The key size to validate.
- *
- * Return: The validated key size for @mode. Zero if invalid.
- */
-uint32_t f2fs_validate_encryption_key_size(uint32_t mode, uint32_t size)
-{
- if (size == f2fs_encryption_key_size(mode))
- return size;
- return 0;
-}
diff --git a/fs/f2fs/crypto_key.c b/fs/f2fs/crypto_key.c
deleted file mode 100644
index 5de2d866a..000000000
--- a/fs/f2fs/crypto_key.c
+++ /dev/null
@@ -1,254 +0,0 @@
-/*
- * linux/fs/f2fs/crypto_key.c
- *
- * Copied from linux/fs/f2fs/crypto_key.c
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * This contains encryption key functions for f2fs
- *
- * Written by Michael Halcrow, Ildar Muslukhov, and Uday Savagaonkar, 2015.
- */
-#include <keys/encrypted-type.h>
-#include <keys/user-type.h>
-#include <linux/random.h>
-#include <linux/scatterlist.h>
-#include <uapi/linux/keyctl.h>
-#include <crypto/hash.h>
-#include <linux/f2fs_fs.h>
-
-#include "f2fs.h"
-#include "xattr.h"
-
-static void derive_crypt_complete(struct crypto_async_request *req, int rc)
-{
- struct f2fs_completion_result *ecr = req->data;
-
- if (rc == -EINPROGRESS)
- return;
-
- ecr->res = rc;
- complete(&ecr->completion);
-}
-
-/**
- * f2fs_derive_key_aes() - Derive a key using AES-128-ECB
- * @deriving_key: Encryption key used for derivatio.
- * @source_key: Source key to which to apply derivation.
- * @derived_key: Derived key.
- *
- * Return: Zero on success; non-zero otherwise.
- */
-static int f2fs_derive_key_aes(char deriving_key[F2FS_AES_128_ECB_KEY_SIZE],
- char source_key[F2FS_AES_256_XTS_KEY_SIZE],
- char derived_key[F2FS_AES_256_XTS_KEY_SIZE])
-{
- int res = 0;
- struct ablkcipher_request *req = NULL;
- DECLARE_F2FS_COMPLETION_RESULT(ecr);
- struct scatterlist src_sg, dst_sg;
- struct crypto_ablkcipher *tfm = crypto_alloc_ablkcipher("ecb(aes)", 0,
- 0);
-
- if (IS_ERR(tfm)) {
- res = PTR_ERR(tfm);
- tfm = NULL;
- goto out;
- }
- crypto_ablkcipher_set_flags(tfm, CRYPTO_TFM_REQ_WEAK_KEY);
- req = ablkcipher_request_alloc(tfm, GFP_NOFS);
- if (!req) {
- res = -ENOMEM;
- goto out;
- }
- ablkcipher_request_set_callback(req,
- CRYPTO_TFM_REQ_MAY_BACKLOG | CRYPTO_TFM_REQ_MAY_SLEEP,
- derive_crypt_complete, &ecr);
- res = crypto_ablkcipher_setkey(tfm, deriving_key,
- F2FS_AES_128_ECB_KEY_SIZE);
- if (res < 0)
- goto out;
-
- sg_init_one(&src_sg, source_key, F2FS_AES_256_XTS_KEY_SIZE);
- sg_init_one(&dst_sg, derived_key, F2FS_AES_256_XTS_KEY_SIZE);
- ablkcipher_request_set_crypt(req, &src_sg, &dst_sg,
- F2FS_AES_256_XTS_KEY_SIZE, NULL);
- res = crypto_ablkcipher_encrypt(req);
- if (res == -EINPROGRESS || res == -EBUSY) {
- BUG_ON(req->base.data != &ecr);
- wait_for_completion(&ecr.completion);
- res = ecr.res;
- }
-out:
- if (req)
- ablkcipher_request_free(req);
- if (tfm)
- crypto_free_ablkcipher(tfm);
- return res;
-}
-
-static void f2fs_free_crypt_info(struct f2fs_crypt_info *ci)
-{
- if (!ci)
- return;
-
- key_put(ci->ci_keyring_key);
- crypto_free_ablkcipher(ci->ci_ctfm);
- kmem_cache_free(f2fs_crypt_info_cachep, ci);
-}
-
-void f2fs_free_encryption_info(struct inode *inode, struct f2fs_crypt_info *ci)
-{
- struct f2fs_inode_info *fi = F2FS_I(inode);
- struct f2fs_crypt_info *prev;
-
- if (ci == NULL)
- ci = ACCESS_ONCE(fi->i_crypt_info);
- if (ci == NULL)
- return;
- prev = cmpxchg(&fi->i_crypt_info, ci, NULL);
- if (prev != ci)
- return;
-
- f2fs_free_crypt_info(ci);
-}
-
-int _f2fs_get_encryption_info(struct inode *inode)
-{
- struct f2fs_inode_info *fi = F2FS_I(inode);
- struct f2fs_crypt_info *crypt_info;
- char full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE +
- (F2FS_KEY_DESCRIPTOR_SIZE * 2) + 1];
- struct key *keyring_key = NULL;
- struct f2fs_encryption_key *master_key;
- struct f2fs_encryption_context ctx;
- const struct user_key_payload *ukp;
- struct crypto_ablkcipher *ctfm;
- const char *cipher_str;
- char raw_key[F2FS_MAX_KEY_SIZE];
- char mode;
- int res;
-
- res = f2fs_crypto_initialize();
- if (res)
- return res;
-retry:
- crypt_info = ACCESS_ONCE(fi->i_crypt_info);
- if (crypt_info) {
- if (!crypt_info->ci_keyring_key ||
- key_validate(crypt_info->ci_keyring_key) == 0)
- return 0;
- f2fs_free_encryption_info(inode, crypt_info);
- goto retry;
- }
-
- res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
- F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
- &ctx, sizeof(ctx), NULL);
- if (res < 0)
- return res;
- else if (res != sizeof(ctx))
- return -EINVAL;
- res = 0;
-
- crypt_info = kmem_cache_alloc(f2fs_crypt_info_cachep, GFP_NOFS);
- if (!crypt_info)
- return -ENOMEM;
-
- crypt_info->ci_flags = ctx.flags;
- crypt_info->ci_data_mode = ctx.contents_encryption_mode;
- crypt_info->ci_filename_mode = ctx.filenames_encryption_mode;
- crypt_info->ci_ctfm = NULL;
- crypt_info->ci_keyring_key = NULL;
- memcpy(crypt_info->ci_master_key, ctx.master_key_descriptor,
- sizeof(crypt_info->ci_master_key));
- if (S_ISREG(inode->i_mode))
- mode = crypt_info->ci_data_mode;
- else if (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))
- mode = crypt_info->ci_filename_mode;
- else
- BUG();
-
- switch (mode) {
- case F2FS_ENCRYPTION_MODE_AES_256_XTS:
- cipher_str = "xts(aes)";
- break;
- case F2FS_ENCRYPTION_MODE_AES_256_CTS:
- cipher_str = "cts(cbc(aes))";
- break;
- default:
- printk_once(KERN_WARNING
- "f2fs: unsupported key mode %d (ino %u)\n",
- mode, (unsigned) inode->i_ino);
- res = -ENOKEY;
- goto out;
- }
-
- memcpy(full_key_descriptor, F2FS_KEY_DESC_PREFIX,
- F2FS_KEY_DESC_PREFIX_SIZE);
- sprintf(full_key_descriptor + F2FS_KEY_DESC_PREFIX_SIZE,
- "%*phN", F2FS_KEY_DESCRIPTOR_SIZE,
- ctx.master_key_descriptor);
- full_key_descriptor[F2FS_KEY_DESC_PREFIX_SIZE +
- (2 * F2FS_KEY_DESCRIPTOR_SIZE)] = '\0';
- keyring_key = request_key(&key_type_logon, full_key_descriptor, NULL);
- if (IS_ERR(keyring_key)) {
- res = PTR_ERR(keyring_key);
- keyring_key = NULL;
- goto out;
- }
- crypt_info->ci_keyring_key = keyring_key;
- BUG_ON(keyring_key->type != &key_type_logon);
- ukp = user_key_payload(keyring_key);
- if (ukp->datalen != sizeof(struct f2fs_encryption_key)) {
- res = -EINVAL;
- goto out;
- }
- master_key = (struct f2fs_encryption_key *)ukp->data;
- BUILD_BUG_ON(F2FS_AES_128_ECB_KEY_SIZE !=
- F2FS_KEY_DERIVATION_NONCE_SIZE);
- BUG_ON(master_key->size != F2FS_AES_256_XTS_KEY_SIZE);
- res = f2fs_derive_key_aes(ctx.nonce, master_key->raw,
- raw_key);
- if (res)
- goto out;
-
- ctfm = crypto_alloc_ablkcipher(cipher_str, 0, 0);
- if (!ctfm || IS_ERR(ctfm)) {
- res = ctfm ? PTR_ERR(ctfm) : -ENOMEM;
- printk(KERN_DEBUG
- "%s: error %d (inode %u) allocating crypto tfm\n",
- __func__, res, (unsigned) inode->i_ino);
- goto out;
- }
- crypt_info->ci_ctfm = ctfm;
- crypto_ablkcipher_clear_flags(ctfm, ~0);
- crypto_tfm_set_flags(crypto_ablkcipher_tfm(ctfm),
- CRYPTO_TFM_REQ_WEAK_KEY);
- res = crypto_ablkcipher_setkey(ctfm, raw_key,
- f2fs_encryption_key_size(mode));
- if (res)
- goto out;
-
- memzero_explicit(raw_key, sizeof(raw_key));
- if (cmpxchg(&fi->i_crypt_info, NULL, crypt_info) != NULL) {
- f2fs_free_crypt_info(crypt_info);
- goto retry;
- }
- return 0;
-
-out:
- if (res == -ENOKEY && !S_ISREG(inode->i_mode))
- res = 0;
-
- f2fs_free_crypt_info(crypt_info);
- memzero_explicit(raw_key, sizeof(raw_key));
- return res;
-}
-
-int f2fs_has_encryption_key(struct inode *inode)
-{
- struct f2fs_inode_info *fi = F2FS_I(inode);
-
- return (fi->i_crypt_info != NULL);
-}
diff --git a/fs/f2fs/crypto_policy.c b/fs/f2fs/crypto_policy.c
deleted file mode 100644
index 596f02490..000000000
--- a/fs/f2fs/crypto_policy.c
+++ /dev/null
@@ -1,210 +0,0 @@
-/*
- * copied from linux/fs/ext4/crypto_policy.c
- *
- * Copyright (C) 2015, Google, Inc.
- * Copyright (C) 2015, Motorola Mobility.
- *
- * This contains encryption policy functions for f2fs with some modifications
- * to support f2fs-specific xattr APIs.
- *
- * Written by Michael Halcrow, 2015.
- * Modified by Jaegeuk Kim, 2015.
- */
-#include <linux/random.h>
-#include <linux/string.h>
-#include <linux/types.h>
-#include <linux/f2fs_fs.h>
-
-#include "f2fs.h"
-#include "xattr.h"
-
-static int f2fs_inode_has_encryption_context(struct inode *inode)
-{
- int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
- F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, NULL, 0, NULL);
- return (res > 0);
-}
-
-/*
- * check whether the policy is consistent with the encryption context
- * for the inode
- */
-static int f2fs_is_encryption_context_consistent_with_policy(
- struct inode *inode, const struct f2fs_encryption_policy *policy)
-{
- struct f2fs_encryption_context ctx;
- int res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
- F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
- sizeof(ctx), NULL);
-
- if (res != sizeof(ctx))
- return 0;
-
- return (memcmp(ctx.master_key_descriptor, policy->master_key_descriptor,
- F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
- (ctx.flags == policy->flags) &&
- (ctx.contents_encryption_mode ==
- policy->contents_encryption_mode) &&
- (ctx.filenames_encryption_mode ==
- policy->filenames_encryption_mode));
-}
-
-static int f2fs_create_encryption_context_from_policy(
- struct inode *inode, const struct f2fs_encryption_policy *policy)
-{
- struct f2fs_encryption_context ctx;
-
- ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1;
- memcpy(ctx.master_key_descriptor, policy->master_key_descriptor,
- F2FS_KEY_DESCRIPTOR_SIZE);
-
- if (!f2fs_valid_contents_enc_mode(policy->contents_encryption_mode)) {
- printk(KERN_WARNING
- "%s: Invalid contents encryption mode %d\n", __func__,
- policy->contents_encryption_mode);
- return -EINVAL;
- }
-
- if (!f2fs_valid_filenames_enc_mode(policy->filenames_encryption_mode)) {
- printk(KERN_WARNING
- "%s: Invalid filenames encryption mode %d\n", __func__,
- policy->filenames_encryption_mode);
- return -EINVAL;
- }
-
- if (policy->flags & ~F2FS_POLICY_FLAGS_VALID)
- return -EINVAL;
-
- ctx.contents_encryption_mode = policy->contents_encryption_mode;
- ctx.filenames_encryption_mode = policy->filenames_encryption_mode;
- ctx.flags = policy->flags;
- BUILD_BUG_ON(sizeof(ctx.nonce) != F2FS_KEY_DERIVATION_NONCE_SIZE);
- get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE);
-
- return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
- F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
- sizeof(ctx), NULL, XATTR_CREATE);
-}
-
-int f2fs_process_policy(const struct f2fs_encryption_policy *policy,
- struct inode *inode)
-{
- if (policy->version != 0)
- return -EINVAL;
-
- if (!S_ISDIR(inode->i_mode))
- return -EINVAL;
-
- if (!f2fs_inode_has_encryption_context(inode)) {
- if (!f2fs_empty_dir(inode))
- return -ENOTEMPTY;
- return f2fs_create_encryption_context_from_policy(inode,
- policy);
- }
-
- if (f2fs_is_encryption_context_consistent_with_policy(inode, policy))
- return 0;
-
- printk(KERN_WARNING "%s: Policy inconsistent with encryption context\n",
- __func__);
- return -EINVAL;
-}
-
-int f2fs_get_policy(struct inode *inode, struct f2fs_encryption_policy *policy)
-{
- struct f2fs_encryption_context ctx;
- int res;
-
- if (!f2fs_encrypted_inode(inode))
- return -ENODATA;
-
- res = f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
- F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
- &ctx, sizeof(ctx), NULL);
- if (res != sizeof(ctx))
- return -ENODATA;
- if (ctx.format != F2FS_ENCRYPTION_CONTEXT_FORMAT_V1)
- return -EINVAL;
-
- policy->version = 0;
- policy->contents_encryption_mode = ctx.contents_encryption_mode;
- policy->filenames_encryption_mode = ctx.filenames_encryption_mode;
- policy->flags = ctx.flags;
- memcpy(&policy->master_key_descriptor, ctx.master_key_descriptor,
- F2FS_KEY_DESCRIPTOR_SIZE);
- return 0;
-}
-
-int f2fs_is_child_context_consistent_with_parent(struct inode *parent,
- struct inode *child)
-{
- struct f2fs_crypt_info *parent_ci, *child_ci;
- int res;
-
- if ((parent == NULL) || (child == NULL)) {
- pr_err("parent %p child %p\n", parent, child);
- BUG_ON(1);
- }
-
- /* no restrictions if the parent directory is not encrypted */
- if (!f2fs_encrypted_inode(parent))
- return 1;
- /* if the child directory is not encrypted, this is always a problem */
- if (!f2fs_encrypted_inode(child))
- return 0;
- res = f2fs_get_encryption_info(parent);
- if (res)
- return 0;
- res = f2fs_get_encryption_info(child);
- if (res)
- return 0;
- parent_ci = F2FS_I(parent)->i_crypt_info;
- child_ci = F2FS_I(child)->i_crypt_info;
- if (!parent_ci && !child_ci)
- return 1;
- if (!parent_ci || !child_ci)
- return 0;
-
- return (memcmp(parent_ci->ci_master_key,
- child_ci->ci_master_key,
- F2FS_KEY_DESCRIPTOR_SIZE) == 0 &&
- (parent_ci->ci_data_mode == child_ci->ci_data_mode) &&
- (parent_ci->ci_filename_mode == child_ci->ci_filename_mode) &&
- (parent_ci->ci_flags == child_ci->ci_flags));
-}
-
-/**
- * f2fs_inherit_context() - Sets a child context from its parent
- * @parent: Parent inode from which the context is inherited.
- * @child: Child inode that inherits the context from @parent.
- *
- * Return: Zero on success, non-zero otherwise
- */
-int f2fs_inherit_context(struct inode *parent, struct inode *child,
- struct page *ipage)
-{
- struct f2fs_encryption_context ctx;
- struct f2fs_crypt_info *ci;
- int res;
-
- res = f2fs_get_encryption_info(parent);
- if (res < 0)
- return res;
-
- ci = F2FS_I(parent)->i_crypt_info;
- if (ci == NULL)
- return -ENOKEY;
-
- ctx.format = F2FS_ENCRYPTION_CONTEXT_FORMAT_V1;
-
- ctx.contents_encryption_mode = ci->ci_data_mode;
- ctx.filenames_encryption_mode = ci->ci_filename_mode;
- ctx.flags = ci->ci_flags;
- memcpy(ctx.master_key_descriptor, ci->ci_master_key,
- F2FS_KEY_DESCRIPTOR_SIZE);
-
- get_random_bytes(ctx.nonce, F2FS_KEY_DERIVATION_NONCE_SIZE);
- return f2fs_setxattr(child, F2FS_XATTR_INDEX_ENCRYPTION,
- F2FS_XATTR_NAME_ENCRYPTION_CONTEXT, &ctx,
- sizeof(ctx), ipage, XATTR_CREATE);
-}
diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 44802599f..1ca31c8e2 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -34,9 +34,9 @@ static void f2fs_read_end_io(struct bio *bio)
if (f2fs_bio_encrypted(bio)) {
if (bio->bi_error) {
- f2fs_release_crypto_ctx(bio->bi_private);
+ fscrypt_release_ctx(bio->bi_private);
} else {
- f2fs_end_io_crypto_work(bio->bi_private, bio);
+ fscrypt_decrypt_bio_pages(bio->bi_private, bio);
return;
}
}
@@ -64,7 +64,7 @@ static void f2fs_write_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
struct page *page = bvec->bv_page;
- f2fs_restore_and_release_control_page(&page);
+ fscrypt_pullback_bio_page(&page, true);
if (unlikely(bio->bi_error)) {
set_bit(AS_EIO, &page->mapping->flags);
@@ -74,8 +74,7 @@ static void f2fs_write_end_io(struct bio *bio)
dec_page_count(sbi, F2FS_WRITEBACK);
}
- if (!get_pages(sbi, F2FS_WRITEBACK) &&
- !list_empty(&sbi->cp_wait.task_list))
+ if (!get_pages(sbi, F2FS_WRITEBACK) && wq_has_sleeper(&sbi->cp_wait))
wake_up(&sbi->cp_wait);
bio_put(bio);
@@ -115,8 +114,54 @@ static void __submit_merged_bio(struct f2fs_bio_info *io)
io->bio = NULL;
}
-void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
- enum page_type type, int rw)
+static bool __has_merged_page(struct f2fs_bio_info *io, struct inode *inode,
+ struct page *page, nid_t ino)
+{
+ struct bio_vec *bvec;
+ struct page *target;
+ int i;
+
+ if (!io->bio)
+ return false;
+
+ if (!inode && !page && !ino)
+ return true;
+
+ bio_for_each_segment_all(bvec, io->bio, i) {
+
+ if (bvec->bv_page->mapping)
+ target = bvec->bv_page;
+ else
+ target = fscrypt_control_page(bvec->bv_page);
+
+ if (inode && inode == target->mapping->host)
+ return true;
+ if (page && page == target)
+ return true;
+ if (ino && ino == ino_of_node(target))
+ return true;
+ }
+
+ return false;
+}
+
+static bool has_merged_page(struct f2fs_sb_info *sbi, struct inode *inode,
+ struct page *page, nid_t ino,
+ enum page_type type)
+{
+ enum page_type btype = PAGE_TYPE_OF_BIO(type);
+ struct f2fs_bio_info *io = &sbi->write_io[btype];
+ bool ret;
+
+ down_read(&io->io_rwsem);
+ ret = __has_merged_page(io, inode, page, ino);
+ up_read(&io->io_rwsem);
+ return ret;
+}
+
+static void __f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
+ struct inode *inode, struct page *page,
+ nid_t ino, enum page_type type, int rw)
{
enum page_type btype = PAGE_TYPE_OF_BIO(type);
struct f2fs_bio_info *io;
@@ -125,6 +170,9 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
down_write(&io->io_rwsem);
+ if (!__has_merged_page(io, inode, page, ino))
+ goto out;
+
/* change META to META_FLUSH in the checkpoint procedure */
if (type >= META_FLUSH) {
io->fio.type = META_FLUSH;
@@ -134,9 +182,31 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
io->fio.rw = WRITE_FLUSH_FUA | REQ_META | REQ_PRIO;
}
__submit_merged_bio(io);
+out:
up_write(&io->io_rwsem);
}
+void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi, enum page_type type,
+ int rw)
+{
+ __f2fs_submit_merged_bio(sbi, NULL, NULL, 0, type, rw);
+}
+
+void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *sbi,
+ struct inode *inode, struct page *page,
+ nid_t ino, enum page_type type, int rw)
+{
+ if (has_merged_page(sbi, inode, page, ino, type))
+ __f2fs_submit_merged_bio(sbi, inode, page, ino, type, rw);
+}
+
+void f2fs_flush_merged_bios(struct f2fs_sb_info *sbi)
+{
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ f2fs_submit_merged_bio(sbi, NODE, WRITE);
+ f2fs_submit_merged_bio(sbi, META, WRITE);
+}
+
/*
* Fill the locked page with data located in the block address.
* Return unlocked page.
@@ -144,15 +214,16 @@ void f2fs_submit_merged_bio(struct f2fs_sb_info *sbi,
int f2fs_submit_page_bio(struct f2fs_io_info *fio)
{
struct bio *bio;
- struct page *page = fio->encrypted_page ? fio->encrypted_page : fio->page;
+ struct page *page = fio->encrypted_page ?
+ fio->encrypted_page : fio->page;
trace_f2fs_submit_page_bio(page, fio);
f2fs_trace_ios(fio, 0);
/* Allocate a new bio */
- bio = __bio_alloc(fio->sbi, fio->blk_addr, 1, is_read_io(fio->rw));
+ bio = __bio_alloc(fio->sbi, fio->new_blkaddr, 1, is_read_io(fio->rw));
- if (bio_add_page(bio, page, PAGE_CACHE_SIZE, 0) < PAGE_CACHE_SIZE) {
+ if (bio_add_page(bio, page, PAGE_SIZE, 0) < PAGE_SIZE) {
bio_put(bio);
return -EFAULT;
}
@@ -171,33 +242,36 @@ void f2fs_submit_page_mbio(struct f2fs_io_info *fio)
io = is_read ? &sbi->read_io : &sbi->write_io[btype];
- verify_block_addr(sbi, fio->blk_addr);
+ if (fio->old_blkaddr != NEW_ADDR)
+ verify_block_addr(sbi, fio->old_blkaddr);
+ verify_block_addr(sbi, fio->new_blkaddr);
down_write(&io->io_rwsem);
if (!is_read)
inc_page_count(sbi, F2FS_WRITEBACK);
- if (io->bio && (io->last_block_in_bio != fio->blk_addr - 1 ||
+ if (io->bio && (io->last_block_in_bio != fio->new_blkaddr - 1 ||
io->fio.rw != fio->rw))
__submit_merged_bio(io);
alloc_new:
if (io->bio == NULL) {
int bio_blocks = MAX_BIO_BLOCKS(sbi);
- io->bio = __bio_alloc(sbi, fio->blk_addr, bio_blocks, is_read);
+ io->bio = __bio_alloc(sbi, fio->new_blkaddr,
+ bio_blocks, is_read);
io->fio = *fio;
}
bio_page = fio->encrypted_page ? fio->encrypted_page : fio->page;
- if (bio_add_page(io->bio, bio_page, PAGE_CACHE_SIZE, 0) <
- PAGE_CACHE_SIZE) {
+ if (bio_add_page(io->bio, bio_page, PAGE_SIZE, 0) <
+ PAGE_SIZE) {
__submit_merged_bio(io);
goto alloc_new;
}
- io->last_block_in_bio = fio->blk_addr;
+ io->last_block_in_bio = fio->new_blkaddr;
f2fs_trace_ios(fio, 0);
up_write(&io->io_rwsem);
@@ -217,7 +291,7 @@ void set_data_blkaddr(struct dnode_of_data *dn)
struct page *node_page = dn->node_page;
unsigned int ofs_in_node = dn->ofs_in_node;
- f2fs_wait_on_page_writeback(node_page, NODE);
+ f2fs_wait_on_page_writeback(node_page, NODE, true);
rn = F2FS_NODE(node_page);
@@ -228,6 +302,13 @@ void set_data_blkaddr(struct dnode_of_data *dn)
dn->node_changed = true;
}
+void f2fs_update_data_blkaddr(struct dnode_of_data *dn, block_t blkaddr)
+{
+ dn->data_blkaddr = blkaddr;
+ set_data_blkaddr(dn);
+ f2fs_update_extent_cache(dn);
+}
+
int reserve_new_block(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
@@ -325,13 +406,13 @@ got_it:
* see, f2fs_add_link -> get_new_data_page -> init_inode_metadata.
*/
if (dn.data_blkaddr == NEW_ADDR) {
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
unlock_page(page);
return page;
}
- fio.blk_addr = dn.data_blkaddr;
+ fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
fio.page = page;
err = f2fs_submit_page_bio(&fio);
if (err)
@@ -436,7 +517,7 @@ struct page *get_new_data_page(struct inode *inode,
goto got_it;
if (dn.data_blkaddr == NEW_ADDR) {
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
} else {
f2fs_put_page(page, 1);
@@ -449,8 +530,8 @@ struct page *get_new_data_page(struct inode *inode,
}
got_it:
if (new_i_size && i_size_read(inode) <
- ((loff_t)(index + 1) << PAGE_CACHE_SHIFT)) {
- i_size_write(inode, ((loff_t)(index + 1) << PAGE_CACHE_SHIFT));
+ ((loff_t)(index + 1) << PAGE_SHIFT)) {
+ i_size_write(inode, ((loff_t)(index + 1) << PAGE_SHIFT));
/* Only the directory inode sets new_i_size */
set_inode_flag(F2FS_I(inode), FI_UPDATE_DIR);
}
@@ -460,7 +541,6 @@ got_it:
static int __allocate_data_block(struct dnode_of_data *dn)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(dn->inode);
- struct f2fs_inode_info *fi = F2FS_I(dn->inode);
struct f2fs_summary sum;
struct node_info ni;
int seg = CURSEG_WARM_DATA;
@@ -488,75 +568,41 @@ alloc:
set_data_blkaddr(dn);
/* update i_size */
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
+ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
dn->ofs_in_node;
- if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT))
+ if (i_size_read(dn->inode) < ((loff_t)(fofs + 1) << PAGE_SHIFT))
i_size_write(dn->inode,
- ((loff_t)(fofs + 1) << PAGE_CACHE_SHIFT));
+ ((loff_t)(fofs + 1) << PAGE_SHIFT));
return 0;
}
-static int __allocate_data_blocks(struct inode *inode, loff_t offset,
- size_t count)
+ssize_t f2fs_preallocate_blocks(struct kiocb *iocb, struct iov_iter *from)
{
- struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
- struct dnode_of_data dn;
- u64 start = F2FS_BYTES_TO_BLK(offset);
- u64 len = F2FS_BYTES_TO_BLK(count);
- bool allocated = false;
- u64 end_offset;
- int err = 0;
-
- while (len) {
- f2fs_lock_op(sbi);
-
- /* When reading holes, we need its node page */
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, start, ALLOC_NODE);
- if (err)
- goto out;
-
- allocated = false;
- end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
-
- while (dn.ofs_in_node < end_offset && len) {
- block_t blkaddr;
-
- if (unlikely(f2fs_cp_error(sbi))) {
- err = -EIO;
- goto sync_out;
- }
-
- blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
- if (blkaddr == NULL_ADDR || blkaddr == NEW_ADDR) {
- err = __allocate_data_block(&dn);
- if (err)
- goto sync_out;
- allocated = true;
- }
- len--;
- start++;
- dn.ofs_in_node++;
- }
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct f2fs_map_blocks map;
+ ssize_t ret = 0;
- if (allocated)
- sync_inode_page(&dn);
+ map.m_lblk = F2FS_BYTES_TO_BLK(iocb->ki_pos);
+ map.m_len = F2FS_BLK_ALIGN(iov_iter_count(from));
+ map.m_next_pgofs = NULL;
- f2fs_put_dnode(&dn);
- f2fs_unlock_op(sbi);
+ if (f2fs_encrypted_inode(inode))
+ return 0;
- f2fs_balance_fs(sbi, allocated);
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_DIO);
}
- return err;
-
-sync_out:
- if (allocated)
- sync_inode_page(&dn);
- f2fs_put_dnode(&dn);
-out:
- f2fs_unlock_op(sbi);
- f2fs_balance_fs(sbi, allocated);
- return err;
+ if (iocb->ki_pos + iov_iter_count(from) > MAX_INLINE_DATA) {
+ ret = f2fs_convert_inline_inode(inode);
+ if (ret)
+ return ret;
+ }
+ if (!f2fs_has_inline_data(inode))
+ return f2fs_map_blocks(inode, &map, 1, F2FS_GET_BLOCK_PRE_AIO);
+ return ret;
}
/*
@@ -587,13 +633,14 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
/* it only supports block size == page size */
pgofs = (pgoff_t)map->m_lblk;
- if (f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
+ if (!create && f2fs_lookup_extent_cache(inode, pgofs, &ei)) {
map->m_pblk = ei.blk + pgofs - ei.fofs;
map->m_len = min((pgoff_t)maxblocks, ei.fofs + ei.len - pgofs);
map->m_flags = F2FS_MAP_MAPPED;
goto out;
}
+next_dnode:
if (create)
f2fs_lock_op(sbi);
@@ -601,52 +648,78 @@ int f2fs_map_blocks(struct inode *inode, struct f2fs_map_blocks *map,
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, pgofs, mode);
if (err) {
- if (err == -ENOENT)
+ if (err == -ENOENT) {
err = 0;
+ if (map->m_next_pgofs)
+ *map->m_next_pgofs =
+ get_next_page_offset(&dn, pgofs);
+ }
goto unlock_out;
}
- if (dn.data_blkaddr == NEW_ADDR || dn.data_blkaddr == NULL_ADDR) {
+ end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
+
+next_block:
+ blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
+
+ if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
if (create) {
if (unlikely(f2fs_cp_error(sbi))) {
err = -EIO;
- goto put_out;
+ goto sync_out;
+ }
+ if (flag == F2FS_GET_BLOCK_PRE_AIO) {
+ if (blkaddr == NULL_ADDR)
+ err = reserve_new_block(&dn);
+ } else {
+ err = __allocate_data_block(&dn);
}
- err = __allocate_data_block(&dn);
if (err)
- goto put_out;
+ goto sync_out;
allocated = true;
map->m_flags = F2FS_MAP_NEW;
+ blkaddr = dn.data_blkaddr;
} else {
+ if (flag == F2FS_GET_BLOCK_FIEMAP &&
+ blkaddr == NULL_ADDR) {
+ if (map->m_next_pgofs)
+ *map->m_next_pgofs = pgofs + 1;
+ }
if (flag != F2FS_GET_BLOCK_FIEMAP ||
- dn.data_blkaddr != NEW_ADDR) {
+ blkaddr != NEW_ADDR) {
if (flag == F2FS_GET_BLOCK_BMAP)
err = -ENOENT;
- goto put_out;
+ goto sync_out;
}
-
- /*
- * preallocated unwritten block should be mapped
- * for fiemap.
- */
- if (dn.data_blkaddr == NEW_ADDR)
- map->m_flags = F2FS_MAP_UNWRITTEN;
}
}
- map->m_flags |= F2FS_MAP_MAPPED;
- map->m_pblk = dn.data_blkaddr;
- map->m_len = 1;
+ if (map->m_len == 0) {
+ /* preallocated unwritten block should be mapped for fiemap. */
+ if (blkaddr == NEW_ADDR)
+ map->m_flags |= F2FS_MAP_UNWRITTEN;
+ map->m_flags |= F2FS_MAP_MAPPED;
+
+ map->m_pblk = blkaddr;
+ map->m_len = 1;
+ } else if ((map->m_pblk != NEW_ADDR &&
+ blkaddr == (map->m_pblk + ofs)) ||
+ (map->m_pblk == NEW_ADDR && blkaddr == NEW_ADDR) ||
+ flag == F2FS_GET_BLOCK_PRE_DIO ||
+ flag == F2FS_GET_BLOCK_PRE_AIO) {
+ ofs++;
+ map->m_len++;
+ } else {
+ goto sync_out;
+ }
- end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
dn.ofs_in_node++;
pgofs++;
-get_next:
- if (map->m_len >= maxblocks)
- goto sync_out;
+ if (map->m_len < maxblocks) {
+ if (dn.ofs_in_node < end_offset)
+ goto next_block;
- if (dn.ofs_in_node >= end_offset) {
if (allocated)
sync_inode_page(&dn);
f2fs_put_dnode(&dn);
@@ -654,62 +727,14 @@ get_next:
if (create) {
f2fs_unlock_op(sbi);
f2fs_balance_fs(sbi, allocated);
- f2fs_lock_op(sbi);
}
allocated = false;
-
- set_new_dnode(&dn, inode, NULL, NULL, 0);
- err = get_dnode_of_data(&dn, pgofs, mode);
- if (err) {
- if (err == -ENOENT)
- err = 0;
- goto unlock_out;
- }
-
- end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
- }
-
- blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
-
- if (blkaddr == NEW_ADDR || blkaddr == NULL_ADDR) {
- if (create) {
- if (unlikely(f2fs_cp_error(sbi))) {
- err = -EIO;
- goto sync_out;
- }
- err = __allocate_data_block(&dn);
- if (err)
- goto sync_out;
- allocated = true;
- map->m_flags |= F2FS_MAP_NEW;
- blkaddr = dn.data_blkaddr;
- } else {
- /*
- * we only merge preallocated unwritten blocks
- * for fiemap.
- */
- if (flag != F2FS_GET_BLOCK_FIEMAP ||
- blkaddr != NEW_ADDR)
- goto sync_out;
- }
- }
-
- /* Give more consecutive addresses for the readahead */
- if ((map->m_pblk != NEW_ADDR &&
- blkaddr == (map->m_pblk + ofs)) ||
- (map->m_pblk == NEW_ADDR &&
- blkaddr == NEW_ADDR)) {
- ofs++;
- dn.ofs_in_node++;
- pgofs++;
- map->m_len++;
- goto get_next;
+ goto next_dnode;
}
sync_out:
if (allocated)
sync_inode_page(&dn);
-put_out:
f2fs_put_dnode(&dn);
unlock_out:
if (create) {
@@ -722,13 +747,15 @@ out:
}
static int __get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh, int create, int flag)
+ struct buffer_head *bh, int create, int flag,
+ pgoff_t *next_pgofs)
{
struct f2fs_map_blocks map;
int ret;
map.m_lblk = iblock;
map.m_len = bh->b_size >> inode->i_blkbits;
+ map.m_next_pgofs = next_pgofs;
ret = f2fs_map_blocks(inode, &map, create, flag);
if (!ret) {
@@ -740,16 +767,18 @@ static int __get_data_block(struct inode *inode, sector_t iblock,
}
static int get_data_block(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create, int flag)
+ struct buffer_head *bh_result, int create, int flag,
+ pgoff_t *next_pgofs)
{
- return __get_data_block(inode, iblock, bh_result, create, flag);
+ return __get_data_block(inode, iblock, bh_result, create,
+ flag, next_pgofs);
}
static int get_data_block_dio(struct inode *inode, sector_t iblock,
struct buffer_head *bh_result, int create)
{
return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_DIO);
+ F2FS_GET_BLOCK_DIO, NULL);
}
static int get_data_block_bmap(struct inode *inode, sector_t iblock,
@@ -760,7 +789,7 @@ static int get_data_block_bmap(struct inode *inode, sector_t iblock,
return -EFBIG;
return __get_data_block(inode, iblock, bh_result, create,
- F2FS_GET_BLOCK_BMAP);
+ F2FS_GET_BLOCK_BMAP, NULL);
}
static inline sector_t logical_to_blk(struct inode *inode, loff_t offset)
@@ -778,6 +807,7 @@ int f2fs_fiemap(struct inode *inode, struct fiemap_extent_info *fieinfo,
{
struct buffer_head map_bh;
sector_t start_blk, last_blk;
+ pgoff_t next_pgofs;
loff_t isize;
u64 logical = 0, phys = 0, size = 0;
u32 flags = 0;
@@ -813,14 +843,15 @@ next:
map_bh.b_size = len;
ret = get_data_block(inode, start_blk, &map_bh, 0,
- F2FS_GET_BLOCK_FIEMAP);
+ F2FS_GET_BLOCK_FIEMAP, &next_pgofs);
if (ret)
goto out;
/* HOLE */
if (!buffer_mapped(&map_bh)) {
+ start_blk = next_pgofs;
/* Go through holes util pass the EOF */
- if (blk_to_logical(inode, start_blk++) < isize)
+ if (blk_to_logical(inode, start_blk) < isize)
goto prep_next;
/* Found a hole beyond isize means no more extents.
* Note that the premise is that filesystems don't
@@ -888,6 +919,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
map.m_lblk = 0;
map.m_len = 0;
map.m_flags = 0;
+ map.m_next_pgofs = NULL;
for (page_idx = 0; nr_pages; page_idx++, nr_pages--) {
@@ -926,7 +958,7 @@ static int f2fs_mpage_readpages(struct address_space *mapping,
map.m_len = last_block - block_in_file;
if (f2fs_map_blocks(inode, &map, 0,
- F2FS_GET_BLOCK_READ))
+ F2FS_GET_BLOCK_READ))
goto set_error_page;
}
got_it:
@@ -939,7 +971,7 @@ got_it:
goto confused;
}
} else {
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
SetPageUptodate(page);
unlock_page(page);
goto next_page;
@@ -955,12 +987,12 @@ submit_and_realloc:
bio = NULL;
}
if (bio == NULL) {
- struct f2fs_crypto_ctx *ctx = NULL;
+ struct fscrypt_ctx *ctx = NULL;
if (f2fs_encrypted_inode(inode) &&
S_ISREG(inode->i_mode)) {
- ctx = f2fs_get_crypto_ctx(inode);
+ ctx = fscrypt_get_ctx(inode, GFP_NOFS);
if (IS_ERR(ctx))
goto set_error_page;
@@ -973,7 +1005,7 @@ submit_and_realloc:
min_t(int, nr_pages, BIO_MAX_PAGES));
if (!bio) {
if (ctx)
- f2fs_release_crypto_ctx(ctx);
+ fscrypt_release_ctx(ctx);
goto set_error_page;
}
bio->bi_bdev = bdev;
@@ -989,7 +1021,7 @@ submit_and_realloc:
goto next_page;
set_error_page:
SetPageError(page);
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
unlock_page(page);
goto next_page;
confused:
@@ -1000,7 +1032,7 @@ confused:
unlock_page(page);
next_page:
if (pages)
- page_cache_release(page);
+ put_page(page);
}
BUG_ON(pages && !list_empty(pages));
if (bio)
@@ -1051,23 +1083,33 @@ int do_write_data_page(struct f2fs_io_info *fio)
if (err)
return err;
- fio->blk_addr = dn.data_blkaddr;
+ fio->old_blkaddr = dn.data_blkaddr;
/* This page is already truncated */
- if (fio->blk_addr == NULL_ADDR) {
+ if (fio->old_blkaddr == NULL_ADDR) {
ClearPageUptodate(page);
goto out_writepage;
}
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
+ gfp_t gfp_flags = GFP_NOFS;
/* wait for GCed encrypted page writeback */
f2fs_wait_on_encrypted_page_writeback(F2FS_I_SB(inode),
- fio->blk_addr);
-
- fio->encrypted_page = f2fs_encrypt(inode, fio->page);
+ fio->old_blkaddr);
+retry_encrypt:
+ fio->encrypted_page = fscrypt_encrypt_page(inode, fio->page,
+ gfp_flags);
if (IS_ERR(fio->encrypted_page)) {
err = PTR_ERR(fio->encrypted_page);
+ if (err == -ENOMEM) {
+ /* flush pending ios and wait for a while */
+ f2fs_flush_merged_bios(F2FS_I_SB(inode));
+ congestion_wait(BLK_RW_ASYNC, HZ/50);
+ gfp_flags |= __GFP_NOFAIL;
+ err = 0;
+ goto retry_encrypt;
+ }
goto out_writepage;
}
}
@@ -1078,7 +1120,7 @@ int do_write_data_page(struct f2fs_io_info *fio)
* If current allocation needs SSR,
* it had better in-place writes for updated data.
*/
- if (unlikely(fio->blk_addr != NEW_ADDR &&
+ if (unlikely(fio->old_blkaddr != NEW_ADDR &&
!is_cold_data(page) &&
!IS_ATOMIC_WRITTEN_PAGE(page) &&
need_inplace_update(inode))) {
@@ -1087,8 +1129,6 @@ int do_write_data_page(struct f2fs_io_info *fio)
trace_f2fs_do_write_data_page(page, IPU);
} else {
write_data_page(&dn, fio);
- set_data_blkaddr(&dn);
- f2fs_update_extent_cache(&dn);
trace_f2fs_do_write_data_page(page, OPU);
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
if (page->index == 0)
@@ -1106,7 +1146,7 @@ static int f2fs_write_data_page(struct page *page,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
loff_t i_size = i_size_read(inode);
const pgoff_t end_index = ((unsigned long long) i_size)
- >> PAGE_CACHE_SHIFT;
+ >> PAGE_SHIFT;
unsigned offset = 0;
bool need_balance_fs = false;
int err = 0;
@@ -1127,11 +1167,11 @@ static int f2fs_write_data_page(struct page *page,
* If the offset is out-of-range of file size,
* this page does not have to be written to disk.
*/
- offset = i_size & (PAGE_CACHE_SIZE - 1);
+ offset = i_size & (PAGE_SIZE - 1);
if ((page->index >= end_index + 1) || !offset)
goto out;
- zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+ zero_user_segment(page, offset, PAGE_SIZE);
write:
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto redirty_out;
@@ -1176,12 +1216,18 @@ out:
inode_dec_dirty_pages(inode);
if (err)
ClearPageUptodate(page);
+
+ if (wbc->for_reclaim) {
+ f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, DATA, WRITE);
+ remove_dirty_inode(inode);
+ }
+
unlock_page(page);
f2fs_balance_fs(sbi, need_balance_fs);
- if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi))) {
+
+ if (unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, DATA, WRITE);
- remove_dirty_inode(inode);
- }
+
return 0;
redirty_out:
@@ -1231,8 +1277,8 @@ next:
cycled = 0;
end = -1;
} else {
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ index = wbc->range_start >> PAGE_SHIFT;
+ end = wbc->range_end >> PAGE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
cycled = 1; /* ignore range_cyclic tests */
@@ -1281,7 +1327,8 @@ continue_unlock:
if (PageWriteback(page)) {
if (wbc->sync_mode != WB_SYNC_NONE)
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page,
+ DATA, true);
else
goto continue_unlock;
}
@@ -1338,8 +1385,6 @@ static int f2fs_write_data_pages(struct address_space *mapping,
int ret;
long diff;
- trace_f2fs_writepages(mapping->host, wbc, DATA);
-
/* deal with chardevs and other special file */
if (!mapping->a_ops->writepage)
return 0;
@@ -1361,14 +1406,16 @@ static int f2fs_write_data_pages(struct address_space *mapping,
if (unlikely(is_sbi_flag_set(sbi, SBI_POR_DOING)))
goto skip_write;
+ trace_f2fs_writepages(mapping->host, wbc, DATA);
+
diff = nr_pages_to_write(sbi, DATA, wbc);
- if (!S_ISDIR(inode->i_mode)) {
+ if (!S_ISDIR(inode->i_mode) && wbc->sync_mode == WB_SYNC_ALL) {
mutex_lock(&sbi->writepages);
locked = true;
}
ret = f2fs_write_cache_pages(mapping, wbc, __f2fs_writepage, mapping);
- f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
if (locked)
mutex_unlock(&sbi->writepages);
@@ -1379,6 +1426,7 @@ static int f2fs_write_data_pages(struct address_space *mapping,
skip_write:
wbc->pages_skipped += get_dirty_pages(inode);
+ trace_f2fs_writepages(mapping->host, wbc, DATA);
return 0;
}
@@ -1405,8 +1453,16 @@ static int prepare_write_begin(struct f2fs_sb_info *sbi,
struct extent_info ei;
int err = 0;
+ /*
+ * we already allocated all the blocks, so we don't need to get
+ * the block addresses when there is no need to fill the page.
+ */
+ if (!f2fs_has_inline_data(inode) && !f2fs_encrypted_inode(inode) &&
+ len == PAGE_SIZE)
+ return 0;
+
if (f2fs_has_inline_data(inode) ||
- (pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
+ (pos & PAGE_MASK) >= i_size_read(inode)) {
f2fs_lock_op(sbi);
locked = true;
}
@@ -1424,7 +1480,8 @@ restart:
if (pos + len <= MAX_INLINE_DATA) {
read_inline_data(page, ipage);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
- sync_inode_page(&dn);
+ if (inode->i_nlink)
+ set_inline_node(ipage);
} else {
err = f2fs_convert_inline_page(&dn, page);
if (err)
@@ -1438,13 +1495,9 @@ restart:
if (f2fs_lookup_extent_cache(inode, index, &ei)) {
dn.data_blkaddr = ei.blk + index - ei.fofs;
} else {
- bool restart = false;
-
/* hole case */
err = get_dnode_of_data(&dn, index, LOOKUP_NODE);
- if (err || (!err && dn.data_blkaddr == NULL_ADDR))
- restart = true;
- if (restart) {
+ if (err || (!err && dn.data_blkaddr == NULL_ADDR)) {
f2fs_put_dnode(&dn);
f2fs_lock_op(sbi);
locked = true;
@@ -1471,7 +1524,7 @@ static int f2fs_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct page *page = NULL;
- pgoff_t index = ((unsigned long long) pos) >> PAGE_CACHE_SHIFT;
+ pgoff_t index = ((unsigned long long) pos) >> PAGE_SHIFT;
bool need_balance = false;
block_t blkaddr = NULL_ADDR;
int err = 0;
@@ -1513,34 +1566,35 @@ repeat:
}
}
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page, DATA, false);
/* wait for GCed encrypted page writeback */
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
f2fs_wait_on_encrypted_page_writeback(sbi, blkaddr);
- if (len == PAGE_CACHE_SIZE)
+ if (len == PAGE_SIZE)
goto out_update;
if (PageUptodate(page))
goto out_clear;
- if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
- unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+ if ((pos & PAGE_MASK) >= i_size_read(inode)) {
+ unsigned start = pos & (PAGE_SIZE - 1);
unsigned end = start + len;
/* Reading beyond i_size is simple: memset to zero */
- zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+ zero_user_segments(page, 0, start, end, PAGE_SIZE);
goto out_update;
}
if (blkaddr == NEW_ADDR) {
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
} else {
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
.rw = READ_SYNC,
- .blk_addr = blkaddr,
+ .old_blkaddr = blkaddr,
+ .new_blkaddr = blkaddr,
.page = page,
.encrypted_page = NULL,
};
@@ -1560,7 +1614,7 @@ repeat:
/* avoid symlink page */
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode)) {
- err = f2fs_decrypt_one(inode, page);
+ err = fscrypt_decrypt_page(page);
if (err)
goto fail;
}
@@ -1591,7 +1645,6 @@ static int f2fs_write_end(struct file *file,
if (pos + copied > i_size_read(inode)) {
i_size_write(inode, pos + copied);
mark_inode_dirty(inode);
- update_inode_page(inode);
}
f2fs_put_page(page, 1);
@@ -1616,34 +1669,21 @@ static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
static ssize_t f2fs_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
loff_t offset)
{
- struct file *file = iocb->ki_filp;
- struct address_space *mapping = file->f_mapping;
+ struct address_space *mapping = iocb->ki_filp->f_mapping;
struct inode *inode = mapping->host;
size_t count = iov_iter_count(iter);
int err;
- /* we don't need to use inline_data strictly */
- err = f2fs_convert_inline_inode(inode);
+ err = check_direct_IO(inode, iter, offset);
if (err)
return err;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
return 0;
- err = check_direct_IO(inode, iter, offset);
- if (err)
- return err;
-
trace_f2fs_direct_IO_enter(inode, offset, count, iov_iter_rw(iter));
- if (iov_iter_rw(iter) == WRITE) {
- err = __allocate_data_blocks(inode, offset, count);
- if (err)
- goto out;
- }
-
err = blockdev_direct_IO(iocb, inode, iter, offset, get_data_block_dio);
-out:
if (err < 0 && iov_iter_rw(iter) == WRITE)
f2fs_write_failed(mapping, offset + count);
@@ -1659,7 +1699,7 @@ void f2fs_invalidate_page(struct page *page, unsigned int offset,
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
if (inode->i_ino >= F2FS_ROOT_INO(sbi) &&
- (offset % PAGE_CACHE_SIZE || length != PAGE_CACHE_SIZE))
+ (offset % PAGE_SIZE || length != PAGE_SIZE))
return;
if (PageDirty(page)) {
diff --git a/fs/f2fs/debug.c b/fs/f2fs/debug.c
index 4fb6ef88a..f4a61a5ff 100644
--- a/fs/f2fs/debug.c
+++ b/fs/f2fs/debug.c
@@ -164,7 +164,7 @@ static void update_mem_info(struct f2fs_sb_info *sbi)
/* build curseg */
si->base_mem += sizeof(struct curseg_info) * NR_CURSEG_TYPE;
- si->base_mem += PAGE_CACHE_SIZE * NR_CURSEG_TYPE;
+ si->base_mem += PAGE_SIZE * NR_CURSEG_TYPE;
/* build dirty segmap */
si->base_mem += sizeof(struct dirty_seglist_info);
@@ -201,9 +201,9 @@ get_cache:
si->page_mem = 0;
npages = NODE_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
npages = META_MAPPING(sbi)->nrpages;
- si->page_mem += (unsigned long long)npages << PAGE_CACHE_SHIFT;
+ si->page_mem += (unsigned long long)npages << PAGE_SHIFT;
}
static int stat_show(struct seq_file *s, void *v)
diff --git a/fs/f2fs/dir.c b/fs/f2fs/dir.c
index 30e6b6563..af819571b 100644
--- a/fs/f2fs/dir.c
+++ b/fs/f2fs/dir.c
@@ -17,8 +17,8 @@
static unsigned long dir_blocks(struct inode *inode)
{
- return ((unsigned long long) (i_size_read(inode) + PAGE_CACHE_SIZE - 1))
- >> PAGE_CACHE_SHIFT;
+ return ((unsigned long long) (i_size_read(inode) + PAGE_SIZE - 1))
+ >> PAGE_SHIFT;
}
static unsigned int dir_buckets(unsigned int level, int dir_level)
@@ -77,7 +77,7 @@ static unsigned long dir_block_index(unsigned int level,
}
static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
- struct f2fs_filename *fname,
+ struct fscrypt_name *fname,
f2fs_hash_t namehash,
int *max_slots,
struct page **res_page)
@@ -103,15 +103,15 @@ static struct f2fs_dir_entry *find_in_block(struct page *dentry_page,
return de;
}
-struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *fname,
+struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *fname,
f2fs_hash_t namehash, int *max_slots,
struct f2fs_dentry_ptr *d)
{
struct f2fs_dir_entry *de;
unsigned long bit_pos = 0;
int max_len = 0;
- struct f2fs_str de_name = FSTR_INIT(NULL, 0);
- struct f2fs_str *name = &fname->disk_name;
+ struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
+ struct fscrypt_str *name = &fname->disk_name;
if (max_slots)
*max_slots = 0;
@@ -157,7 +157,7 @@ found:
static struct f2fs_dir_entry *find_in_level(struct inode *dir,
unsigned int level,
- struct f2fs_filename *fname,
+ struct fscrypt_name *fname,
struct page **res_page)
{
struct qstr name = FSTR_TO_QSTR(&fname->disk_name);
@@ -218,12 +218,12 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
struct f2fs_dir_entry *de = NULL;
unsigned int max_depth;
unsigned int level;
- struct f2fs_filename fname;
+ struct fscrypt_name fname;
int err;
*res_page = NULL;
- err = f2fs_fname_setup_filename(dir, child, 1, &fname);
+ err = fscrypt_setup_filename(dir, child, 1, &fname);
if (err)
return NULL;
@@ -251,7 +251,7 @@ struct f2fs_dir_entry *f2fs_find_entry(struct inode *dir,
break;
}
out:
- f2fs_fname_free_filename(&fname);
+ fscrypt_free_filename(&fname);
return de;
}
@@ -296,7 +296,7 @@ void f2fs_set_link(struct inode *dir, struct f2fs_dir_entry *de,
{
enum page_type type = f2fs_has_inline_dentry(dir) ? NODE : DATA;
lock_page(page);
- f2fs_wait_on_page_writeback(page, type);
+ f2fs_wait_on_page_writeback(page, type, true);
de->ino = cpu_to_le32(inode->i_ino);
set_de_type(de, inode->i_mode);
f2fs_dentry_kunmap(dir, page);
@@ -311,7 +311,7 @@ static void init_dent_inode(const struct qstr *name, struct page *ipage)
{
struct f2fs_inode *ri;
- f2fs_wait_on_page_writeback(ipage, NODE);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
/* copy name info. to this inode page */
ri = F2FS_INODE(ipage);
@@ -341,24 +341,14 @@ int update_dent_inode(struct inode *inode, struct inode *to,
void do_make_empty_dir(struct inode *inode, struct inode *parent,
struct f2fs_dentry_ptr *d)
{
- struct f2fs_dir_entry *de;
-
- de = &d->dentry[0];
- de->name_len = cpu_to_le16(1);
- de->hash_code = 0;
- de->ino = cpu_to_le32(inode->i_ino);
- memcpy(d->filename[0], ".", 1);
- set_de_type(de, inode->i_mode);
+ struct qstr dot = QSTR_INIT(".", 1);
+ struct qstr dotdot = QSTR_INIT("..", 2);
- de = &d->dentry[1];
- de->hash_code = 0;
- de->name_len = cpu_to_le16(2);
- de->ino = cpu_to_le32(parent->i_ino);
- memcpy(d->filename[1], "..", 2);
- set_de_type(de, parent->i_mode);
+ /* update dirent of "." */
+ f2fs_update_dentry(inode->i_ino, inode->i_mode, d, &dot, 0, 0);
- test_and_set_bit_le(0, (void *)d->bitmap);
- test_and_set_bit_le(1, (void *)d->bitmap);
+ /* update dirent of ".." */
+ f2fs_update_dentry(parent->i_ino, parent->i_mode, d, &dotdot, 0, 1);
}
static int make_empty_dir(struct inode *inode,
@@ -413,7 +403,7 @@ struct page *init_inode_metadata(struct inode *inode, struct inode *dir,
goto put_error;
if (f2fs_encrypted_inode(dir) && f2fs_may_encrypt(inode)) {
- err = f2fs_inherit_context(dir, inode, page);
+ err = fscrypt_inherit_context(dir, inode, page, false);
if (err)
goto put_error;
}
@@ -511,8 +501,12 @@ void f2fs_update_dentry(nid_t ino, umode_t mode, struct f2fs_dentry_ptr *d,
memcpy(d->filename[bit_pos], name->name, name->len);
de->ino = cpu_to_le32(ino);
set_de_type(de, mode);
- for (i = 0; i < slots; i++)
+ for (i = 0; i < slots; i++) {
test_and_set_bit_le(bit_pos + i, (void *)d->bitmap);
+ /* avoid wrong garbage data for readdir */
+ if (i)
+ (de + i)->name_len = 0;
+ }
}
/*
@@ -532,11 +526,11 @@ int __f2fs_add_link(struct inode *dir, const struct qstr *name,
struct f2fs_dentry_block *dentry_blk = NULL;
struct f2fs_dentry_ptr d;
struct page *page = NULL;
- struct f2fs_filename fname;
+ struct fscrypt_name fname;
struct qstr new_name;
int slots, err;
- err = f2fs_fname_setup_filename(dir, name, 0, &fname);
+ err = fscrypt_setup_filename(dir, name, 0, &fname);
if (err)
return err;
@@ -598,7 +592,7 @@ start:
++level;
goto start;
add_dentry:
- f2fs_wait_on_page_writeback(dentry_page, DATA);
+ f2fs_wait_on_page_writeback(dentry_page, DATA, true);
if (inode) {
down_write(&F2FS_I(inode)->i_sem);
@@ -635,7 +629,7 @@ fail:
kunmap(dentry_page);
f2fs_put_page(dentry_page, 1);
out:
- f2fs_fname_free_filename(&fname);
+ fscrypt_free_filename(&fname);
f2fs_update_time(F2FS_I_SB(dir), REQ_TIME);
return err;
}
@@ -709,7 +703,7 @@ void f2fs_delete_entry(struct f2fs_dir_entry *dentry, struct page *page,
return f2fs_delete_inline_entry(dentry, page, dir, inode);
lock_page(page);
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page, DATA, true);
dentry_blk = page_address(page);
bit_pos = dentry - dentry_blk->dentry;
@@ -777,12 +771,12 @@ bool f2fs_empty_dir(struct inode *dir)
}
bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
- unsigned int start_pos, struct f2fs_str *fstr)
+ unsigned int start_pos, struct fscrypt_str *fstr)
{
unsigned char d_type = DT_UNKNOWN;
unsigned int bit_pos;
struct f2fs_dir_entry *de = NULL;
- struct f2fs_str de_name = FSTR_INIT(NULL, 0);
+ struct fscrypt_str de_name = FSTR_INIT(NULL, 0);
bit_pos = ((unsigned long)ctx->pos % d->max);
@@ -792,6 +786,12 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
break;
de = &d->dentry[bit_pos];
+ if (de->name_len == 0) {
+ bit_pos++;
+ ctx->pos = start_pos + bit_pos;
+ continue;
+ }
+
if (de->file_type < F2FS_FT_MAX)
d_type = f2fs_filetype_table[de->file_type];
else
@@ -810,8 +810,9 @@ bool f2fs_fill_dentries(struct dir_context *ctx, struct f2fs_dentry_ptr *d,
memcpy(de_name.name, d->filename[bit_pos], de_name.len);
- ret = f2fs_fname_disk_to_usr(d->inode, &de->hash_code,
- &de_name, fstr);
+ ret = fscrypt_fname_disk_to_usr(d->inode,
+ (u32)de->hash_code, 0,
+ &de_name, fstr);
kfree(de_name.name);
if (ret < 0)
return true;
@@ -839,16 +840,15 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
struct file_ra_state *ra = &file->f_ra;
unsigned int n = ((unsigned long)ctx->pos / NR_DENTRY_IN_BLOCK);
struct f2fs_dentry_ptr d;
- struct f2fs_str fstr = FSTR_INIT(NULL, 0);
+ struct fscrypt_str fstr = FSTR_INIT(NULL, 0);
int err = 0;
if (f2fs_encrypted_inode(inode)) {
- err = f2fs_get_encryption_info(inode);
- if (err)
+ err = fscrypt_get_encryption_info(inode);
+ if (err && err != -ENOKEY)
return err;
- err = f2fs_fname_crypto_alloc_buffer(inode, F2FS_NAME_LEN,
- &fstr);
+ err = fscrypt_fname_alloc_buffer(inode, F2FS_NAME_LEN, &fstr);
if (err < 0)
return err;
}
@@ -888,14 +888,14 @@ static int f2fs_readdir(struct file *file, struct dir_context *ctx)
f2fs_put_page(dentry_page, 1);
}
out:
- f2fs_fname_crypto_free_buffer(&fstr);
+ fscrypt_fname_free_buffer(&fstr);
return err;
}
static int f2fs_dir_open(struct inode *inode, struct file *filp)
{
if (f2fs_encrypted_inode(inode))
- return f2fs_get_encryption_info(inode) ? -EACCES : 0;
+ return fscrypt_get_encryption_info(inode) ? -EACCES : 0;
return 0;
}
diff --git a/fs/f2fs/extent_cache.c b/fs/f2fs/extent_cache.c
index ccd5c636d..c859bb044 100644
--- a/fs/f2fs/extent_cache.c
+++ b/fs/f2fs/extent_cache.c
@@ -33,6 +33,7 @@ static struct extent_node *__attach_extent_node(struct f2fs_sb_info *sbi,
en->ei = *ei;
INIT_LIST_HEAD(&en->list);
+ en->et = et;
rb_link_node(&en->rb_node, parent, p);
rb_insert_color(&en->rb_node, &et->root);
@@ -50,6 +51,24 @@ static void __detach_extent_node(struct f2fs_sb_info *sbi,
if (et->cached_en == en)
et->cached_en = NULL;
+ kmem_cache_free(extent_node_slab, en);
+}
+
+/*
+ * Flow to release an extent_node:
+ * 1. list_del_init
+ * 2. __detach_extent_node
+ * 3. kmem_cache_free.
+ */
+static void __release_extent_node(struct f2fs_sb_info *sbi,
+ struct extent_tree *et, struct extent_node *en)
+{
+ spin_lock(&sbi->extent_lock);
+ f2fs_bug_on(sbi, list_empty(&en->list));
+ list_del_init(&en->list);
+ spin_unlock(&sbi->extent_lock);
+
+ __detach_extent_node(sbi, et, en);
}
static struct extent_tree *__grab_extent_tree(struct inode *inode)
@@ -129,7 +148,7 @@ static struct extent_node *__init_extent_tree(struct f2fs_sb_info *sbi,
}
static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
- struct extent_tree *et, bool free_all)
+ struct extent_tree *et)
{
struct rb_node *node, *next;
struct extent_node *en;
@@ -139,18 +158,7 @@ static unsigned int __free_extent_tree(struct f2fs_sb_info *sbi,
while (node) {
next = rb_next(node);
en = rb_entry(node, struct extent_node, rb_node);
-
- if (free_all) {
- spin_lock(&sbi->extent_lock);
- if (!list_empty(&en->list))
- list_del_init(&en->list);
- spin_unlock(&sbi->extent_lock);
- }
-
- if (free_all || list_empty(&en->list)) {
- __detach_extent_node(sbi, et, en);
- kmem_cache_free(extent_node_slab, en);
- }
+ __release_extent_node(sbi, et, en);
node = next;
}
@@ -232,9 +240,10 @@ static bool f2fs_lookup_extent_tree(struct inode *inode, pgoff_t pgofs,
if (en) {
*ei = en->ei;
spin_lock(&sbi->extent_lock);
- if (!list_empty(&en->list))
+ if (!list_empty(&en->list)) {
list_move_tail(&en->list, &sbi->extent_list);
- et->cached_en = en;
+ et->cached_en = en;
+ }
spin_unlock(&sbi->extent_lock);
ret = true;
}
@@ -329,7 +338,6 @@ lookup_neighbors:
static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
struct extent_tree *et, struct extent_info *ei,
- struct extent_node **den,
struct extent_node *prev_ex,
struct extent_node *next_ex)
{
@@ -342,20 +350,25 @@ static struct extent_node *__try_merge_extent_node(struct f2fs_sb_info *sbi,
}
if (next_ex && __is_front_mergeable(ei, &next_ex->ei)) {
- if (en) {
- __detach_extent_node(sbi, et, prev_ex);
- *den = prev_ex;
- }
+ if (en)
+ __release_extent_node(sbi, et, prev_ex);
next_ex->ei.fofs = ei->fofs;
next_ex->ei.blk = ei->blk;
next_ex->ei.len += ei->len;
en = next_ex;
}
- if (en) {
- __try_update_largest_extent(et, en);
+ if (!en)
+ return NULL;
+
+ __try_update_largest_extent(et, en);
+
+ spin_lock(&sbi->extent_lock);
+ if (!list_empty(&en->list)) {
+ list_move_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
}
+ spin_unlock(&sbi->extent_lock);
return en;
}
@@ -391,7 +404,12 @@ do_insert:
return NULL;
__try_update_largest_extent(et, en);
+
+ /* update in global extent list */
+ spin_lock(&sbi->extent_lock);
+ list_add_tail(&en->list, &sbi->extent_list);
et->cached_en = en;
+ spin_unlock(&sbi->extent_lock);
return en;
}
@@ -479,7 +497,7 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
if (parts)
__try_update_largest_extent(et, en);
else
- __detach_extent_node(sbi, et, en);
+ __release_extent_node(sbi, et, en);
/*
* if original extent is split into zero or two parts, extent
@@ -490,31 +508,15 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
insert_p = NULL;
insert_parent = NULL;
}
-
- /* update in global extent list */
- spin_lock(&sbi->extent_lock);
- if (!parts && !list_empty(&en->list))
- list_del(&en->list);
- if (en1)
- list_add_tail(&en1->list, &sbi->extent_list);
- spin_unlock(&sbi->extent_lock);
-
- /* release extent node */
- if (!parts)
- kmem_cache_free(extent_node_slab, en);
-
en = next_en;
}
/* 3. update extent in extent cache */
if (blkaddr) {
- struct extent_node *den = NULL;
set_extent_info(&ei, fofs, blkaddr, len);
- en1 = __try_merge_extent_node(sbi, et, &ei, &den,
- prev_en, next_en);
- if (!en1)
- en1 = __insert_extent_tree(sbi, et, &ei,
+ if (!__try_merge_extent_node(sbi, et, &ei, prev_en, next_en))
+ __insert_extent_tree(sbi, et, &ei,
insert_p, insert_parent);
/* give up extent_cache, if split and small updates happen */
@@ -524,24 +526,10 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
et->largest.len = 0;
set_inode_flag(F2FS_I(inode), FI_NO_EXTENT);
}
-
- spin_lock(&sbi->extent_lock);
- if (en1) {
- if (list_empty(&en1->list))
- list_add_tail(&en1->list, &sbi->extent_list);
- else
- list_move_tail(&en1->list, &sbi->extent_list);
- }
- if (den && !list_empty(&den->list))
- list_del(&den->list);
- spin_unlock(&sbi->extent_lock);
-
- if (den)
- kmem_cache_free(extent_node_slab, den);
}
if (is_inode_flag_set(F2FS_I(inode), FI_NO_EXTENT))
- __free_extent_tree(sbi, et, true);
+ __free_extent_tree(sbi, et);
write_unlock(&et->lock);
@@ -550,14 +538,10 @@ static unsigned int f2fs_update_extent_tree_range(struct inode *inode,
unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
{
- struct extent_tree *treevec[EXT_TREE_VEC_SIZE];
struct extent_tree *et, *next;
- struct extent_node *en, *tmp;
- unsigned long ino = F2FS_ROOT_INO(sbi);
- unsigned int found;
+ struct extent_node *en;
unsigned int node_cnt = 0, tree_cnt = 0;
int remained;
- bool do_free = false;
if (!test_opt(sbi, EXTENT_CACHE))
return 0;
@@ -572,10 +556,10 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
list_for_each_entry_safe(et, next, &sbi->zombie_list, list) {
if (atomic_read(&et->node_cnt)) {
write_lock(&et->lock);
- node_cnt += __free_extent_tree(sbi, et, true);
+ node_cnt += __free_extent_tree(sbi, et);
write_unlock(&et->lock);
}
-
+ f2fs_bug_on(sbi, atomic_read(&et->node_cnt));
list_del_init(&et->list);
radix_tree_delete(&sbi->extent_tree_root, et->ino);
kmem_cache_free(extent_tree_slab, et);
@@ -585,6 +569,7 @@ unsigned int f2fs_shrink_extent_tree(struct f2fs_sb_info *sbi, int nr_shrink)
if (node_cnt + tree_cnt >= nr_shrink)
goto unlock_out;
+ cond_resched();
}
up_write(&sbi->extent_tree_lock);
@@ -596,42 +581,29 @@ free_node:
remained = nr_shrink - (node_cnt + tree_cnt);
spin_lock(&sbi->extent_lock);
- list_for_each_entry_safe(en, tmp, &sbi->extent_list, list) {
- if (!remained--)
+ for (; remained > 0; remained--) {
+ if (list_empty(&sbi->extent_list))
break;
- list_del_init(&en->list);
- do_free = true;
- }
- spin_unlock(&sbi->extent_lock);
-
- if (do_free == false)
- goto unlock_out;
-
- /*
- * reset ino for searching victims from beginning of global extent tree.
- */
- ino = F2FS_ROOT_INO(sbi);
-
- while ((found = radix_tree_gang_lookup(&sbi->extent_tree_root,
- (void **)treevec, ino, EXT_TREE_VEC_SIZE))) {
- unsigned i;
-
- ino = treevec[found - 1]->ino + 1;
- for (i = 0; i < found; i++) {
- struct extent_tree *et = treevec[i];
+ en = list_first_entry(&sbi->extent_list,
+ struct extent_node, list);
+ et = en->et;
+ if (!write_trylock(&et->lock)) {
+ /* refresh this extent node's position in extent list */
+ list_move_tail(&en->list, &sbi->extent_list);
+ continue;
+ }
- if (!atomic_read(&et->node_cnt))
- continue;
+ list_del_init(&en->list);
+ spin_unlock(&sbi->extent_lock);
- if (write_trylock(&et->lock)) {
- node_cnt += __free_extent_tree(sbi, et, false);
- write_unlock(&et->lock);
- }
+ __detach_extent_node(sbi, et, en);
- if (node_cnt + tree_cnt >= nr_shrink)
- goto unlock_out;
- }
+ write_unlock(&et->lock);
+ node_cnt++;
+ spin_lock(&sbi->extent_lock);
}
+ spin_unlock(&sbi->extent_lock);
+
unlock_out:
up_write(&sbi->extent_tree_lock);
out:
@@ -650,7 +622,7 @@ unsigned int f2fs_destroy_extent_node(struct inode *inode)
return 0;
write_lock(&et->lock);
- node_cnt = __free_extent_tree(sbi, et, true);
+ node_cnt = __free_extent_tree(sbi, et);
write_unlock(&et->lock);
return node_cnt;
@@ -701,19 +673,21 @@ bool f2fs_lookup_extent_cache(struct inode *inode, pgoff_t pgofs,
void f2fs_update_extent_cache(struct dnode_of_data *dn)
{
- struct f2fs_inode_info *fi = F2FS_I(dn->inode);
pgoff_t fofs;
+ block_t blkaddr;
if (!f2fs_may_extent_tree(dn->inode))
return;
- f2fs_bug_on(F2FS_I_SB(dn->inode), dn->data_blkaddr == NEW_ADDR);
-
+ if (dn->data_blkaddr == NEW_ADDR)
+ blkaddr = NULL_ADDR;
+ else
+ blkaddr = dn->data_blkaddr;
- fofs = start_bidx_of_node(ofs_of_node(dn->node_page), fi) +
- dn->ofs_in_node;
+ fofs = start_bidx_of_node(ofs_of_node(dn->node_page), dn->inode) +
+ dn->ofs_in_node;
- if (f2fs_update_extent_tree_range(dn->inode, fofs, dn->data_blkaddr, 1))
+ if (f2fs_update_extent_tree_range(dn->inode, fofs, blkaddr, 1))
sync_inode_page(dn);
}
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index ff79054c6..2effb79b5 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -22,10 +22,11 @@
#include <linux/vmalloc.h>
#include <linux/bio.h>
#include <linux/blkdev.h>
+#include <linux/fscrypto.h>
+#include <crypto/hash.h>
#ifdef CONFIG_F2FS_CHECK_FS
#define f2fs_bug_on(sbi, condition) BUG_ON(condition)
-#define f2fs_down_write(x, y) down_write_nest_lock(x, y)
#else
#define f2fs_bug_on(sbi, condition) \
do { \
@@ -34,7 +35,6 @@
set_sbi_flag(sbi, SBI_NEED_FSCK); \
} \
} while (0)
-#define f2fs_down_write(x, y) down_write(x)
#endif
/*
@@ -84,27 +84,6 @@ struct f2fs_mount_info {
#define F2FS_CLEAR_FEATURE(sb, mask) \
F2FS_SB(sb)->raw_super->feature &= ~cpu_to_le32(mask)
-#define CRCPOLY_LE 0xedb88320
-
-static inline __u32 f2fs_crc32(void *buf, size_t len)
-{
- unsigned char *p = (unsigned char *)buf;
- __u32 crc = F2FS_SUPER_MAGIC;
- int i;
-
- while (len--) {
- crc ^= *p++;
- for (i = 0; i < 8; i++)
- crc = (crc >> 1) ^ ((crc & 1) ? CRCPOLY_LE : 0);
- }
- return crc;
-}
-
-static inline bool f2fs_crc_valid(__u32 blk_crc, void *buf, size_t buf_size)
-{
- return f2fs_crc32(buf, buf_size) == blk_crc;
-}
-
/*
* For checkpoint manager
*/
@@ -183,37 +162,37 @@ struct fsync_inode_entry {
block_t last_inode; /* block address locating the last inode */
};
-#define nats_in_cursum(sum) (le16_to_cpu(sum->n_nats))
-#define sits_in_cursum(sum) (le16_to_cpu(sum->n_sits))
+#define nats_in_cursum(jnl) (le16_to_cpu(jnl->n_nats))
+#define sits_in_cursum(jnl) (le16_to_cpu(jnl->n_sits))
-#define nat_in_journal(sum, i) (sum->nat_j.entries[i].ne)
-#define nid_in_journal(sum, i) (sum->nat_j.entries[i].nid)
-#define sit_in_journal(sum, i) (sum->sit_j.entries[i].se)
-#define segno_in_journal(sum, i) (sum->sit_j.entries[i].segno)
+#define nat_in_journal(jnl, i) (jnl->nat_j.entries[i].ne)
+#define nid_in_journal(jnl, i) (jnl->nat_j.entries[i].nid)
+#define sit_in_journal(jnl, i) (jnl->sit_j.entries[i].se)
+#define segno_in_journal(jnl, i) (jnl->sit_j.entries[i].segno)
-#define MAX_NAT_JENTRIES(sum) (NAT_JOURNAL_ENTRIES - nats_in_cursum(sum))
-#define MAX_SIT_JENTRIES(sum) (SIT_JOURNAL_ENTRIES - sits_in_cursum(sum))
+#define MAX_NAT_JENTRIES(jnl) (NAT_JOURNAL_ENTRIES - nats_in_cursum(jnl))
+#define MAX_SIT_JENTRIES(jnl) (SIT_JOURNAL_ENTRIES - sits_in_cursum(jnl))
-static inline int update_nats_in_cursum(struct f2fs_summary_block *rs, int i)
+static inline int update_nats_in_cursum(struct f2fs_journal *journal, int i)
{
- int before = nats_in_cursum(rs);
- rs->n_nats = cpu_to_le16(before + i);
+ int before = nats_in_cursum(journal);
+ journal->n_nats = cpu_to_le16(before + i);
return before;
}
-static inline int update_sits_in_cursum(struct f2fs_summary_block *rs, int i)
+static inline int update_sits_in_cursum(struct f2fs_journal *journal, int i)
{
- int before = sits_in_cursum(rs);
- rs->n_sits = cpu_to_le16(before + i);
+ int before = sits_in_cursum(journal);
+ journal->n_sits = cpu_to_le16(before + i);
return before;
}
-static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
- int type)
+static inline bool __has_cursum_space(struct f2fs_journal *journal,
+ int size, int type)
{
if (type == NAT_JOURNAL)
- return size <= MAX_NAT_JENTRIES(sum);
- return size <= MAX_SIT_JENTRIES(sum);
+ return size <= MAX_NAT_JENTRIES(journal);
+ return size <= MAX_SIT_JENTRIES(journal);
}
/*
@@ -233,12 +212,9 @@ static inline bool __has_cursum_space(struct f2fs_summary_block *sum, int size,
#define F2FS_IOC_WRITE_CHECKPOINT _IO(F2FS_IOCTL_MAGIC, 7)
#define F2FS_IOC_DEFRAGMENT _IO(F2FS_IOCTL_MAGIC, 8)
-#define F2FS_IOC_SET_ENCRYPTION_POLICY \
- _IOR('f', 19, struct f2fs_encryption_policy)
-#define F2FS_IOC_GET_ENCRYPTION_PWSALT \
- _IOW('f', 20, __u8[16])
-#define F2FS_IOC_GET_ENCRYPTION_POLICY \
- _IOW('f', 21, struct f2fs_encryption_policy)
+#define F2FS_IOC_SET_ENCRYPTION_POLICY FS_IOC_SET_ENCRYPTION_POLICY
+#define F2FS_IOC_GET_ENCRYPTION_POLICY FS_IOC_GET_ENCRYPTION_POLICY
+#define F2FS_IOC_GET_ENCRYPTION_PWSALT FS_IOC_GET_ENCRYPTION_PWSALT
/*
* should be same as XFS_IOC_GOINGDOWN.
@@ -268,25 +244,6 @@ struct f2fs_defragment {
* For INODE and NODE manager
*/
/* for directory operations */
-struct f2fs_str {
- unsigned char *name;
- u32 len;
-};
-
-struct f2fs_filename {
- const struct qstr *usr_fname;
- struct f2fs_str disk_name;
- f2fs_hash_t hash;
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- struct f2fs_str crypto_buf;
-#endif
-};
-
-#define FSTR_INIT(n, l) { .name = n, .len = l }
-#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len)
-#define fname_name(p) ((p)->disk_name.name)
-#define fname_len(p) ((p)->disk_name.len)
-
struct f2fs_dentry_ptr {
struct inode *inode;
const void *bitmap;
@@ -354,6 +311,7 @@ struct extent_node {
struct rb_node rb_node; /* rb node located in rb-tree */
struct list_head list; /* node in global extent list of sbi */
struct extent_info ei; /* extent info */
+ struct extent_tree *et; /* extent tree pointer */
};
struct extent_tree {
@@ -382,6 +340,7 @@ struct f2fs_map_blocks {
block_t m_lblk;
unsigned int m_len;
unsigned int m_flags;
+ pgoff_t *m_next_pgofs; /* point next possible non-hole pgofs */
};
/* for flag in get_data_block */
@@ -389,6 +348,8 @@ struct f2fs_map_blocks {
#define F2FS_GET_BLOCK_DIO 1
#define F2FS_GET_BLOCK_FIEMAP 2
#define F2FS_GET_BLOCK_BMAP 3
+#define F2FS_GET_BLOCK_PRE_DIO 4
+#define F2FS_GET_BLOCK_PRE_AIO 5
/*
* i_advise uses FADVISE_XXX_BIT. We can add additional hints later.
@@ -410,15 +371,6 @@ struct f2fs_map_blocks {
#define file_enc_name(inode) is_file(inode, FADVISE_ENC_NAME_BIT)
#define file_set_enc_name(inode) set_file(inode, FADVISE_ENC_NAME_BIT)
-/* Encryption algorithms */
-#define F2FS_ENCRYPTION_MODE_INVALID 0
-#define F2FS_ENCRYPTION_MODE_AES_256_XTS 1
-#define F2FS_ENCRYPTION_MODE_AES_256_GCM 2
-#define F2FS_ENCRYPTION_MODE_AES_256_CBC 3
-#define F2FS_ENCRYPTION_MODE_AES_256_CTS 4
-
-#include "f2fs_crypto.h"
-
#define DEF_DIR_LEVEL 0
struct f2fs_inode_info {
@@ -442,13 +394,7 @@ struct f2fs_inode_info {
struct list_head dirty_list; /* linked in global dirty list */
struct list_head inmem_pages; /* inmemory pages managed by f2fs */
struct mutex inmem_lock; /* lock for inmemory pages */
-
struct extent_tree *extent_tree; /* cached extent_tree entry */
-
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- /* Encryption params */
- struct f2fs_crypt_info *i_crypt_info;
-#endif
};
static inline void get_extent_info(struct extent_info *ext,
@@ -515,6 +461,7 @@ struct f2fs_nm_info {
nid_t next_scan_nid; /* the next nid to be scanned */
unsigned int ram_thresh; /* control the memory footprint */
unsigned int ra_nid_pages; /* # of nid pages to be readaheaded */
+ unsigned int dirty_nats_ratio; /* control dirty nats ratio threshold */
/* NAT cache management */
struct radix_tree_root nat_root;/* root of the nat entry cache */
@@ -549,6 +496,8 @@ struct dnode_of_data {
unsigned int ofs_in_node; /* data offset in the node page */
bool inode_page_locked; /* inode page is locked or not */
bool node_changed; /* is node block changed */
+ char cur_level; /* level of hole node page */
+ char max_level; /* level of current page located */
block_t data_blkaddr; /* block address of the node block */
};
@@ -679,6 +628,7 @@ enum page_type {
META_FLUSH,
INMEM, /* the below types are used by tracepoints only. */
INMEM_DROP,
+ INMEM_REVOKE,
IPU,
OPU,
};
@@ -687,7 +637,8 @@ struct f2fs_io_info {
struct f2fs_sb_info *sbi; /* f2fs_sb_info pointer */
enum page_type type; /* contains DATA/NODE/META/META_FLUSH */
int rw; /* contains R/RS/W/WS with REQ_META/REQ_PRIO */
- block_t blk_addr; /* block address to be written */
+ block_t new_blkaddr; /* new block address to be written */
+ block_t old_blkaddr; /* old block address before Cow */
struct page *page; /* page to be written */
struct page *encrypted_page; /* encrypted page */
};
@@ -729,6 +680,10 @@ enum {
MAX_TIME,
};
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+#define F2FS_KEY_DESC_PREFIX "f2fs:"
+#define F2FS_KEY_DESC_PREFIX_SIZE 5
+#endif
struct f2fs_sb_info {
struct super_block *sb; /* pointer to VFS super block */
struct proc_dir_entry *s_proc; /* proc entry */
@@ -736,6 +691,10 @@ struct f2fs_sb_info {
int valid_super_block; /* valid super block no */
int s_flag; /* flags for sbi */
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ u8 key_prefix[F2FS_KEY_DESC_PREFIX_SIZE];
+ u8 key_prefix_size;
+#endif
/* for node-related operations */
struct f2fs_nm_info *nm_info; /* node manager */
struct inode *node_inode; /* cache node blocks */
@@ -844,8 +803,22 @@ struct f2fs_sb_info {
struct list_head s_list;
struct mutex umount_mutex;
unsigned int shrinker_run_no;
+
+ /* For write statistics */
+ u64 sectors_written_start;
+ u64 kbytes_written;
+
+ /* Reference to checksum algorithm driver via cryptoapi */
+ struct crypto_shash *s_chksum_driver;
};
+/* For write statistics. Suppose sector size is 512 bytes,
+ * and the return value is in kbytes. s is of struct f2fs_sb_info.
+ */
+#define BD_PART_WRITTEN(s) \
+(((u64)part_stat_read(s->sb->s_bdev->bd_part, sectors[1]) - \
+ s->sectors_written_start) >> 1)
+
static inline void f2fs_update_time(struct f2fs_sb_info *sbi, int type)
{
sbi->last_time[type] = jiffies;
@@ -874,6 +847,29 @@ static inline bool is_idle(struct f2fs_sb_info *sbi)
/*
* Inline functions
*/
+static inline u32 f2fs_crc32(struct f2fs_sb_info *sbi, const void *address,
+ unsigned int length)
+{
+ SHASH_DESC_ON_STACK(shash, sbi->s_chksum_driver);
+ u32 *ctx = (u32 *)shash_desc_ctx(shash);
+ int err;
+
+ shash->tfm = sbi->s_chksum_driver;
+ shash->flags = 0;
+ *ctx = F2FS_SUPER_MAGIC;
+
+ err = crypto_shash_update(shash, address, length);
+ BUG_ON(err);
+
+ return *ctx;
+}
+
+static inline bool f2fs_crc_valid(struct f2fs_sb_info *sbi, __u32 blk_crc,
+ void *buf, size_t buf_size)
+{
+ return f2fs_crc32(sbi, buf, buf_size) == blk_crc;
+}
+
static inline struct f2fs_inode_info *F2FS_I(struct inode *inode)
{
return container_of(inode, struct f2fs_inode_info, vfs_inode);
@@ -1006,7 +1002,7 @@ static inline void f2fs_unlock_op(struct f2fs_sb_info *sbi)
static inline void f2fs_lock_all(struct f2fs_sb_info *sbi)
{
- f2fs_down_write(&sbi->cp_rwsem, &sbi->cp_mutex);
+ down_write(&sbi->cp_rwsem);
}
static inline void f2fs_unlock_all(struct f2fs_sb_info *sbi)
@@ -1306,7 +1302,7 @@ static inline void f2fs_put_page(struct page *page, int unlock)
f2fs_bug_on(F2FS_P_SB(page), !PageLocked(page));
unlock_page(page);
}
- page_cache_release(page);
+ put_page(page);
}
static inline void f2fs_put_dnode(struct dnode_of_data *dn)
@@ -1525,9 +1521,9 @@ static inline int f2fs_has_inline_xattr(struct inode *inode)
return is_inode_flag_set(F2FS_I(inode), FI_INLINE_XATTR);
}
-static inline unsigned int addrs_per_inode(struct f2fs_inode_info *fi)
+static inline unsigned int addrs_per_inode(struct inode *inode)
{
- if (f2fs_has_inline_xattr(&fi->vfs_inode))
+ if (f2fs_has_inline_xattr(inode))
return DEF_ADDRS_PER_INODE - F2FS_INLINE_XATTR_ADDRS;
return DEF_ADDRS_PER_INODE;
}
@@ -1681,10 +1677,10 @@ static inline void *f2fs_kvzalloc(size_t size, gfp_t flags)
(F2FS_I(i)->i_acl_mode) : ((i)->i_mode))
/* get offset of first page in next direct node */
-#define PGOFS_OF_NEXT_DNODE(pgofs, fi) \
- ((pgofs < ADDRS_PER_INODE(fi)) ? ADDRS_PER_INODE(fi) : \
- (pgofs - ADDRS_PER_INODE(fi) + ADDRS_PER_BLOCK) / \
- ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi))
+#define PGOFS_OF_NEXT_DNODE(pgofs, inode) \
+ ((pgofs < ADDRS_PER_INODE(inode)) ? ADDRS_PER_INODE(inode) : \
+ (pgofs - ADDRS_PER_INODE(inode) + ADDRS_PER_BLOCK) / \
+ ADDRS_PER_BLOCK * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode))
/*
* file.c
@@ -1723,10 +1719,10 @@ struct dentry *f2fs_get_parent(struct dentry *child);
extern unsigned char f2fs_filetype_table[F2FS_FT_MAX];
void set_de_type(struct f2fs_dir_entry *, umode_t);
-struct f2fs_dir_entry *find_target_dentry(struct f2fs_filename *,
+struct f2fs_dir_entry *find_target_dentry(struct fscrypt_name *,
f2fs_hash_t, int *, struct f2fs_dentry_ptr *);
bool f2fs_fill_dentries(struct dir_context *, struct f2fs_dentry_ptr *,
- unsigned int, struct f2fs_str *);
+ unsigned int, struct fscrypt_str *);
void do_make_empty_dir(struct inode *, struct inode *,
struct f2fs_dentry_ptr *);
struct page *init_inode_metadata(struct inode *, struct inode *,
@@ -1763,6 +1759,7 @@ int f2fs_commit_super(struct f2fs_sb_info *, bool);
int f2fs_sync_fs(struct super_block *, int);
extern __printf(3, 4)
void f2fs_msg(struct super_block *, const char *, const char *, ...);
+int sanity_check_ckpt(struct f2fs_sb_info *sbi);
/*
* hash.c
@@ -1780,6 +1777,7 @@ int need_dentry_mark(struct f2fs_sb_info *, nid_t);
bool is_checkpointed_node(struct f2fs_sb_info *, nid_t);
bool need_inode_block_update(struct f2fs_sb_info *, nid_t);
void get_node_info(struct f2fs_sb_info *, nid_t, struct node_info *);
+pgoff_t get_next_page_offset(struct dnode_of_data *, pgoff_t);
int get_dnode_of_data(struct dnode_of_data *, pgoff_t, int);
int truncate_inode_blocks(struct inode *, pgoff_t);
int truncate_xattr_node(struct inode *, struct page *);
@@ -1811,7 +1809,8 @@ void destroy_node_manager_caches(void);
* segment.c
*/
void register_inmem_page(struct inode *, struct page *);
-int commit_inmem_pages(struct inode *, bool);
+void drop_inmem_pages(struct inode *);
+int commit_inmem_pages(struct inode *);
void f2fs_balance_fs(struct f2fs_sb_info *, bool);
void f2fs_balance_fs_bg(struct f2fs_sb_info *);
int f2fs_issue_flush(struct f2fs_sb_info *);
@@ -1832,16 +1831,17 @@ void write_meta_page(struct f2fs_sb_info *, struct page *);
void write_node_page(unsigned int, struct f2fs_io_info *);
void write_data_page(struct dnode_of_data *, struct f2fs_io_info *);
void rewrite_data_page(struct f2fs_io_info *);
+void __f2fs_replace_block(struct f2fs_sb_info *, struct f2fs_summary *,
+ block_t, block_t, bool, bool);
void f2fs_replace_block(struct f2fs_sb_info *, struct dnode_of_data *,
- block_t, block_t, unsigned char, bool);
+ block_t, block_t, unsigned char, bool, bool);
void allocate_data_block(struct f2fs_sb_info *, struct page *,
block_t, block_t *, struct f2fs_summary *, int);
-void f2fs_wait_on_page_writeback(struct page *, enum page_type);
+void f2fs_wait_on_page_writeback(struct page *, enum page_type, bool);
void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *, block_t);
void write_data_summaries(struct f2fs_sb_info *, block_t);
void write_node_summaries(struct f2fs_sb_info *, block_t);
-int lookup_journal_in_cursum(struct f2fs_summary_block *,
- int, unsigned int, int);
+int lookup_journal_in_cursum(struct f2fs_journal *, int, unsigned int, int);
void flush_sit_entries(struct f2fs_sb_info *, struct cp_control *);
int build_segment_manager(struct f2fs_sb_info *);
void destroy_segment_manager(struct f2fs_sb_info *);
@@ -1881,11 +1881,16 @@ void destroy_checkpoint_caches(void);
* data.c
*/
void f2fs_submit_merged_bio(struct f2fs_sb_info *, enum page_type, int);
+void f2fs_submit_merged_bio_cond(struct f2fs_sb_info *, struct inode *,
+ struct page *, nid_t, enum page_type, int);
+void f2fs_flush_merged_bios(struct f2fs_sb_info *);
int f2fs_submit_page_bio(struct f2fs_io_info *);
void f2fs_submit_page_mbio(struct f2fs_io_info *);
void set_data_blkaddr(struct dnode_of_data *);
+void f2fs_update_data_blkaddr(struct dnode_of_data *, block_t);
int reserve_new_block(struct dnode_of_data *);
int f2fs_get_block(struct dnode_of_data *, pgoff_t);
+ssize_t f2fs_preallocate_blocks(struct kiocb *, struct iov_iter *);
int f2fs_reserve_block(struct dnode_of_data *, pgoff_t);
struct page *get_read_data_page(struct inode *, pgoff_t, int, bool);
struct page *find_data_page(struct inode *, pgoff_t);
@@ -1902,7 +1907,7 @@ int f2fs_release_page(struct page *, gfp_t);
*/
int start_gc_thread(struct f2fs_sb_info *);
void stop_gc_thread(struct f2fs_sb_info *);
-block_t start_bidx_of_node(unsigned int, struct f2fs_inode_info *);
+block_t start_bidx_of_node(unsigned int, struct inode *);
int f2fs_gc(struct f2fs_sb_info *, bool);
void build_gc_manager(struct f2fs_sb_info *);
@@ -2093,7 +2098,7 @@ int f2fs_convert_inline_inode(struct inode *);
int f2fs_write_inline_data(struct inode *, struct page *);
bool recover_inline_data(struct inode *, struct page *);
struct f2fs_dir_entry *find_in_inline_dir(struct inode *,
- struct f2fs_filename *, struct page **);
+ struct fscrypt_name *, struct page **);
struct f2fs_dir_entry *f2fs_parent_inline_dir(struct inode *, struct page **);
int make_empty_inline_dir(struct inode *inode, struct inode *, struct page *);
int f2fs_add_inline_entry(struct inode *, const struct qstr *, struct inode *,
@@ -2102,7 +2107,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *, struct page *,
struct inode *, struct inode *);
bool f2fs_empty_inline_dir(struct inode *);
int f2fs_read_inline_dir(struct file *, struct dir_context *,
- struct f2fs_str *);
+ struct fscrypt_str *);
int f2fs_inline_data_fiemap(struct inode *,
struct fiemap_extent_info *, __u64, __u64);
@@ -2132,13 +2137,9 @@ void destroy_extent_cache(void);
/*
* crypto support
*/
-static inline int f2fs_encrypted_inode(struct inode *inode)
+static inline bool f2fs_encrypted_inode(struct inode *inode)
{
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
return file_is_encrypt(inode);
-#else
- return 0;
-#endif
}
static inline void f2fs_set_encrypted_inode(struct inode *inode)
@@ -2150,20 +2151,12 @@ static inline void f2fs_set_encrypted_inode(struct inode *inode)
static inline bool f2fs_bio_encrypted(struct bio *bio)
{
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- return unlikely(bio->bi_private != NULL);
-#else
- return false;
-#endif
+ return bio->bi_private != NULL;
}
static inline int f2fs_sb_has_crypto(struct super_block *sb)
{
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
return F2FS_HAS_FEATURE(sb, F2FS_FEATURE_ENCRYPT);
-#else
- return 0;
-#endif
}
static inline bool f2fs_may_encrypt(struct inode *inode)
@@ -2177,86 +2170,28 @@ static inline bool f2fs_may_encrypt(struct inode *inode)
#endif
}
-/* crypto_policy.c */
-int f2fs_is_child_context_consistent_with_parent(struct inode *,
- struct inode *);
-int f2fs_inherit_context(struct inode *, struct inode *, struct page *);
-int f2fs_process_policy(const struct f2fs_encryption_policy *, struct inode *);
-int f2fs_get_policy(struct inode *, struct f2fs_encryption_policy *);
-
-/* crypt.c */
-extern struct kmem_cache *f2fs_crypt_info_cachep;
-bool f2fs_valid_contents_enc_mode(uint32_t);
-uint32_t f2fs_validate_encryption_key_size(uint32_t, uint32_t);
-struct f2fs_crypto_ctx *f2fs_get_crypto_ctx(struct inode *);
-void f2fs_release_crypto_ctx(struct f2fs_crypto_ctx *);
-struct page *f2fs_encrypt(struct inode *, struct page *);
-int f2fs_decrypt(struct f2fs_crypto_ctx *, struct page *);
-int f2fs_decrypt_one(struct inode *, struct page *);
-void f2fs_end_io_crypto_work(struct f2fs_crypto_ctx *, struct bio *);
-
-/* crypto_key.c */
-void f2fs_free_encryption_info(struct inode *, struct f2fs_crypt_info *);
-int _f2fs_get_encryption_info(struct inode *inode);
-
-/* crypto_fname.c */
-bool f2fs_valid_filenames_enc_mode(uint32_t);
-u32 f2fs_fname_crypto_round_up(u32, u32);
-int f2fs_fname_crypto_alloc_buffer(struct inode *, u32, struct f2fs_str *);
-int f2fs_fname_disk_to_usr(struct inode *, f2fs_hash_t *,
- const struct f2fs_str *, struct f2fs_str *);
-int f2fs_fname_usr_to_disk(struct inode *, const struct qstr *,
- struct f2fs_str *);
-
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
-void f2fs_restore_and_release_control_page(struct page **);
-void f2fs_restore_control_page(struct page *);
-
-int __init f2fs_init_crypto(void);
-int f2fs_crypto_initialize(void);
-void f2fs_exit_crypto(void);
-
-int f2fs_has_encryption_key(struct inode *);
-
-static inline int f2fs_get_encryption_info(struct inode *inode)
-{
- struct f2fs_crypt_info *ci = F2FS_I(inode)->i_crypt_info;
-
- if (!ci ||
- (ci->ci_keyring_key &&
- (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
- (1 << KEY_FLAG_REVOKED) |
- (1 << KEY_FLAG_DEAD)))))
- return _f2fs_get_encryption_info(inode);
- return 0;
-}
-
-void f2fs_fname_crypto_free_buffer(struct f2fs_str *);
-int f2fs_fname_setup_filename(struct inode *, const struct qstr *,
- int lookup, struct f2fs_filename *);
-void f2fs_fname_free_filename(struct f2fs_filename *);
-#else
-static inline void f2fs_restore_and_release_control_page(struct page **p) { }
-static inline void f2fs_restore_control_page(struct page *p) { }
-
-static inline int __init f2fs_init_crypto(void) { return 0; }
-static inline void f2fs_exit_crypto(void) { }
-
-static inline int f2fs_has_encryption_key(struct inode *i) { return 0; }
-static inline int f2fs_get_encryption_info(struct inode *i) { return 0; }
-static inline void f2fs_fname_crypto_free_buffer(struct f2fs_str *p) { }
-
-static inline int f2fs_fname_setup_filename(struct inode *dir,
- const struct qstr *iname,
- int lookup, struct f2fs_filename *fname)
-{
- memset(fname, 0, sizeof(struct f2fs_filename));
- fname->usr_fname = iname;
- fname->disk_name.name = (unsigned char *)iname->name;
- fname->disk_name.len = iname->len;
- return 0;
-}
-
-static inline void f2fs_fname_free_filename(struct f2fs_filename *fname) { }
+#ifndef CONFIG_F2FS_FS_ENCRYPTION
+#define fscrypt_set_d_op(i)
+#define fscrypt_get_ctx fscrypt_notsupp_get_ctx
+#define fscrypt_release_ctx fscrypt_notsupp_release_ctx
+#define fscrypt_encrypt_page fscrypt_notsupp_encrypt_page
+#define fscrypt_decrypt_page fscrypt_notsupp_decrypt_page
+#define fscrypt_decrypt_bio_pages fscrypt_notsupp_decrypt_bio_pages
+#define fscrypt_pullback_bio_page fscrypt_notsupp_pullback_bio_page
+#define fscrypt_restore_control_page fscrypt_notsupp_restore_control_page
+#define fscrypt_zeroout_range fscrypt_notsupp_zeroout_range
+#define fscrypt_process_policy fscrypt_notsupp_process_policy
+#define fscrypt_get_policy fscrypt_notsupp_get_policy
+#define fscrypt_has_permitted_context fscrypt_notsupp_has_permitted_context
+#define fscrypt_inherit_context fscrypt_notsupp_inherit_context
+#define fscrypt_get_encryption_info fscrypt_notsupp_get_encryption_info
+#define fscrypt_put_encryption_info fscrypt_notsupp_put_encryption_info
+#define fscrypt_setup_filename fscrypt_notsupp_setup_filename
+#define fscrypt_free_filename fscrypt_notsupp_free_filename
+#define fscrypt_fname_encrypted_size fscrypt_notsupp_fname_encrypted_size
+#define fscrypt_fname_alloc_buffer fscrypt_notsupp_fname_alloc_buffer
+#define fscrypt_fname_free_buffer fscrypt_notsupp_fname_free_buffer
+#define fscrypt_fname_disk_to_usr fscrypt_notsupp_fname_disk_to_usr
+#define fscrypt_fname_usr_to_disk fscrypt_notsupp_fname_usr_to_disk
#endif
#endif
diff --git a/fs/f2fs/f2fs_crypto.h b/fs/f2fs/f2fs_crypto.h
deleted file mode 100644
index c2c1c2b63..000000000
--- a/fs/f2fs/f2fs_crypto.h
+++ /dev/null
@@ -1,151 +0,0 @@
-/*
- * linux/fs/f2fs/f2fs_crypto.h
- *
- * Copied from linux/fs/ext4/ext4_crypto.h
- *
- * Copyright (C) 2015, Google, Inc.
- *
- * This contains encryption header content for f2fs
- *
- * Written by Michael Halcrow, 2015.
- * Modified by Jaegeuk Kim, 2015.
- */
-#ifndef _F2FS_CRYPTO_H
-#define _F2FS_CRYPTO_H
-
-#include <linux/fs.h>
-
-#define F2FS_KEY_DESCRIPTOR_SIZE 8
-
-/* Policy provided via an ioctl on the topmost directory */
-struct f2fs_encryption_policy {
- char version;
- char contents_encryption_mode;
- char filenames_encryption_mode;
- char flags;
- char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE];
-} __attribute__((__packed__));
-
-#define F2FS_ENCRYPTION_CONTEXT_FORMAT_V1 1
-#define F2FS_KEY_DERIVATION_NONCE_SIZE 16
-
-#define F2FS_POLICY_FLAGS_PAD_4 0x00
-#define F2FS_POLICY_FLAGS_PAD_8 0x01
-#define F2FS_POLICY_FLAGS_PAD_16 0x02
-#define F2FS_POLICY_FLAGS_PAD_32 0x03
-#define F2FS_POLICY_FLAGS_PAD_MASK 0x03
-#define F2FS_POLICY_FLAGS_VALID 0x03
-
-/**
- * Encryption context for inode
- *
- * Protector format:
- * 1 byte: Protector format (1 = this version)
- * 1 byte: File contents encryption mode
- * 1 byte: File names encryption mode
- * 1 byte: Flags
- * 8 bytes: Master Key descriptor
- * 16 bytes: Encryption Key derivation nonce
- */
-struct f2fs_encryption_context {
- char format;
- char contents_encryption_mode;
- char filenames_encryption_mode;
- char flags;
- char master_key_descriptor[F2FS_KEY_DESCRIPTOR_SIZE];
- char nonce[F2FS_KEY_DERIVATION_NONCE_SIZE];
-} __attribute__((__packed__));
-
-/* Encryption parameters */
-#define F2FS_XTS_TWEAK_SIZE 16
-#define F2FS_AES_128_ECB_KEY_SIZE 16
-#define F2FS_AES_256_GCM_KEY_SIZE 32
-#define F2FS_AES_256_CBC_KEY_SIZE 32
-#define F2FS_AES_256_CTS_KEY_SIZE 32
-#define F2FS_AES_256_XTS_KEY_SIZE 64
-#define F2FS_MAX_KEY_SIZE 64
-
-#define F2FS_KEY_DESC_PREFIX "f2fs:"
-#define F2FS_KEY_DESC_PREFIX_SIZE 5
-
-struct f2fs_encryption_key {
- __u32 mode;
- char raw[F2FS_MAX_KEY_SIZE];
- __u32 size;
-} __attribute__((__packed__));
-
-struct f2fs_crypt_info {
- char ci_data_mode;
- char ci_filename_mode;
- char ci_flags;
- struct crypto_ablkcipher *ci_ctfm;
- struct key *ci_keyring_key;
- char ci_master_key[F2FS_KEY_DESCRIPTOR_SIZE];
-};
-
-#define F2FS_CTX_REQUIRES_FREE_ENCRYPT_FL 0x00000001
-#define F2FS_WRITE_PATH_FL 0x00000002
-
-struct f2fs_crypto_ctx {
- union {
- struct {
- struct page *bounce_page; /* Ciphertext page */
- struct page *control_page; /* Original page */
- } w;
- struct {
- struct bio *bio;
- struct work_struct work;
- } r;
- struct list_head free_list; /* Free list */
- };
- char flags; /* Flags */
-};
-
-struct f2fs_completion_result {
- struct completion completion;
- int res;
-};
-
-#define DECLARE_F2FS_COMPLETION_RESULT(ecr) \
- struct f2fs_completion_result ecr = { \
- COMPLETION_INITIALIZER((ecr).completion), 0 }
-
-static inline int f2fs_encryption_key_size(int mode)
-{
- switch (mode) {
- case F2FS_ENCRYPTION_MODE_AES_256_XTS:
- return F2FS_AES_256_XTS_KEY_SIZE;
- case F2FS_ENCRYPTION_MODE_AES_256_GCM:
- return F2FS_AES_256_GCM_KEY_SIZE;
- case F2FS_ENCRYPTION_MODE_AES_256_CBC:
- return F2FS_AES_256_CBC_KEY_SIZE;
- case F2FS_ENCRYPTION_MODE_AES_256_CTS:
- return F2FS_AES_256_CTS_KEY_SIZE;
- default:
- BUG();
- }
- return 0;
-}
-
-#define F2FS_FNAME_NUM_SCATTER_ENTRIES 4
-#define F2FS_CRYPTO_BLOCK_SIZE 16
-#define F2FS_FNAME_CRYPTO_DIGEST_SIZE 32
-
-/**
- * For encrypted symlinks, the ciphertext length is stored at the beginning
- * of the string in little-endian format.
- */
-struct f2fs_encrypted_symlink_data {
- __le16 len;
- char encrypted_path[1];
-} __attribute__((__packed__));
-
-/**
- * This function is used to calculate the disk space required to
- * store a filename of length l in encrypted symlink format.
- */
-static inline u32 encrypted_symlink_data_len(u32 l)
-{
- return (l + sizeof(struct f2fs_encrypted_symlink_data) - 1);
-}
-#endif /* _F2FS_CRYPTO_H */
diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c
index 5a322bc00..90d1157a0 100644
--- a/fs/f2fs/file.c
+++ b/fs/f2fs/file.c
@@ -74,11 +74,11 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
goto mapped;
/* page is wholly or partially inside EOF */
- if (((loff_t)(page->index + 1) << PAGE_CACHE_SHIFT) >
+ if (((loff_t)(page->index + 1) << PAGE_SHIFT) >
i_size_read(inode)) {
unsigned offset;
- offset = i_size_read(inode) & ~PAGE_CACHE_MASK;
- zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+ offset = i_size_read(inode) & ~PAGE_MASK;
+ zero_user_segment(page, offset, PAGE_SIZE);
}
set_page_dirty(page);
SetPageUptodate(page);
@@ -86,7 +86,7 @@ static int f2fs_vm_page_mkwrite(struct vm_area_struct *vma,
trace_f2fs_vm_page_mkwrite(page, DATA);
mapped:
/* fill the page */
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page, DATA, false);
/* wait for GCed encrypted page writeback */
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
@@ -301,7 +301,7 @@ static pgoff_t __get_first_dirty_index(struct address_space *mapping,
pagevec_init(&pvec, 0);
nr_pages = pagevec_lookup_tag(&pvec, mapping, &pgofs,
PAGECACHE_TAG_DIRTY, 1);
- pgofs = nr_pages ? pvec.pages[0]->index : LONG_MAX;
+ pgofs = nr_pages ? pvec.pages[0]->index : ULONG_MAX;
pagevec_release(&pvec);
return pgofs;
}
@@ -346,11 +346,11 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
goto found;
}
- pgofs = (pgoff_t)(offset >> PAGE_CACHE_SHIFT);
+ pgofs = (pgoff_t)(offset >> PAGE_SHIFT);
dirty = __get_first_dirty_index(inode->i_mapping, pgofs, whence);
- for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
+ for (; data_ofs < isize; data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
set_new_dnode(&dn, inode, NULL, NULL, 0);
err = get_dnode_of_data(&dn, pgofs, LOOKUP_NODE_RA);
if (err && err != -ENOENT) {
@@ -358,20 +358,19 @@ static loff_t f2fs_seek_block(struct file *file, loff_t offset, int whence)
} else if (err == -ENOENT) {
/* direct node does not exists */
if (whence == SEEK_DATA) {
- pgofs = PGOFS_OF_NEXT_DNODE(pgofs,
- F2FS_I(inode));
+ pgofs = get_next_page_offset(&dn, pgofs);
continue;
} else {
goto found;
}
}
- end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
/* find data/hole in dnode block */
for (; dn.ofs_in_node < end_offset;
dn.ofs_in_node++, pgofs++,
- data_ofs = (loff_t)pgofs << PAGE_CACHE_SHIFT) {
+ data_ofs = (loff_t)pgofs << PAGE_SHIFT) {
block_t blkaddr;
blkaddr = datablock_addr(dn.node_page, dn.ofs_in_node);
@@ -422,7 +421,7 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
int err;
if (f2fs_encrypted_inode(inode)) {
- err = f2fs_get_encryption_info(inode);
+ err = fscrypt_get_encryption_info(inode);
if (err)
return 0;
if (!f2fs_encrypted_inode(inode))
@@ -442,14 +441,22 @@ static int f2fs_file_mmap(struct file *file, struct vm_area_struct *vma)
static int f2fs_file_open(struct inode *inode, struct file *filp)
{
int ret = generic_file_open(inode, filp);
+ struct dentry *dir;
if (!ret && f2fs_encrypted_inode(inode)) {
- ret = f2fs_get_encryption_info(inode);
+ ret = fscrypt_get_encryption_info(inode);
if (ret)
return -EACCES;
- if (!f2fs_encrypted_inode(inode))
+ if (!fscrypt_has_encryption_key(inode))
return -ENOKEY;
}
+ dir = dget_parent(file_dentry(filp));
+ if (f2fs_encrypted_inode(d_inode(dir)) &&
+ !fscrypt_has_permitted_context(d_inode(dir), inode)) {
+ dput(dir);
+ return -EPERM;
+ }
+ dput(dir);
return ret;
}
@@ -484,7 +491,7 @@ int truncate_data_blocks_range(struct dnode_of_data *dn, int count)
* we will invalidate all blkaddr in the whole range.
*/
fofs = start_bidx_of_node(ofs_of_node(dn->node_page),
- F2FS_I(dn->inode)) + ofs;
+ dn->inode) + ofs;
f2fs_update_extent_cache_range(dn, fofs, 0, len);
dec_valid_block_count(sbi, dn->inode, nr_free);
sync_inode_page(dn);
@@ -505,8 +512,8 @@ void truncate_data_blocks(struct dnode_of_data *dn)
static int truncate_partial_data_page(struct inode *inode, u64 from,
bool cache_only)
{
- unsigned offset = from & (PAGE_CACHE_SIZE - 1);
- pgoff_t index = from >> PAGE_CACHE_SHIFT;
+ unsigned offset = from & (PAGE_SIZE - 1);
+ pgoff_t index = from >> PAGE_SHIFT;
struct address_space *mapping = inode->i_mapping;
struct page *page;
@@ -525,9 +532,10 @@ static int truncate_partial_data_page(struct inode *inode, u64 from,
if (IS_ERR(page))
return 0;
truncate_out:
- f2fs_wait_on_page_writeback(page, DATA);
- zero_user(page, offset, PAGE_CACHE_SIZE - offset);
- if (!cache_only || !f2fs_encrypted_inode(inode) || !S_ISREG(inode->i_mode))
+ f2fs_wait_on_page_writeback(page, DATA, true);
+ zero_user(page, offset, PAGE_SIZE - offset);
+ if (!cache_only || !f2fs_encrypted_inode(inode) ||
+ !S_ISREG(inode->i_mode))
set_page_dirty(page);
f2fs_put_page(page, 1);
return 0;
@@ -572,7 +580,7 @@ int truncate_blocks(struct inode *inode, u64 from, bool lock)
goto out;
}
- count = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ count = ADDRS_PER_PAGE(dn.node_page, inode);
count -= dn.ofs_in_node;
f2fs_bug_on(sbi, count < 0);
@@ -675,7 +683,7 @@ int f2fs_setattr(struct dentry *dentry, struct iattr *attr)
if (attr->ia_valid & ATTR_SIZE) {
if (f2fs_encrypted_inode(inode) &&
- f2fs_get_encryption_info(inode))
+ fscrypt_get_encryption_info(inode))
return -EACCES;
if (attr->ia_size <= i_size_read(inode)) {
@@ -747,7 +755,7 @@ static int fill_zero(struct inode *inode, pgoff_t index,
if (IS_ERR(page))
return PTR_ERR(page);
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page, DATA, true);
zero_user(page, start, len);
set_page_dirty(page);
f2fs_put_page(page, 1);
@@ -772,7 +780,7 @@ int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end)
return err;
}
- end_offset = ADDRS_PER_PAGE(dn.node_page, F2FS_I(inode));
+ end_offset = ADDRS_PER_PAGE(dn.node_page, inode);
count = min(end_offset - dn.ofs_in_node, pg_end - pg_start);
f2fs_bug_on(F2FS_I_SB(inode), count == 0 || count > end_offset);
@@ -795,11 +803,11 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
- pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+ pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
+ pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
- off_start = offset & (PAGE_CACHE_SIZE - 1);
- off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+ off_start = offset & (PAGE_SIZE - 1);
+ off_end = (offset + len) & (PAGE_SIZE - 1);
if (pg_start == pg_end) {
ret = fill_zero(inode, pg_start, off_start,
@@ -809,7 +817,7 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
} else {
if (off_start) {
ret = fill_zero(inode, pg_start++, off_start,
- PAGE_CACHE_SIZE - off_start);
+ PAGE_SIZE - off_start);
if (ret)
return ret;
}
@@ -826,8 +834,8 @@ static int punch_hole(struct inode *inode, loff_t offset, loff_t len)
f2fs_balance_fs(sbi, true);
- blk_start = (loff_t)pg_start << PAGE_CACHE_SHIFT;
- blk_end = (loff_t)pg_end << PAGE_CACHE_SHIFT;
+ blk_start = (loff_t)pg_start << PAGE_SHIFT;
+ blk_end = (loff_t)pg_end << PAGE_SHIFT;
truncate_inode_pages_range(mapping, blk_start,
blk_end - 1);
@@ -858,10 +866,8 @@ static int __exchange_data_block(struct inode *inode, pgoff_t src,
} else {
new_addr = dn.data_blkaddr;
if (!is_checkpointed_data(sbi, new_addr)) {
- dn.data_blkaddr = NULL_ADDR;
/* do not invalidate this block address */
- set_data_blkaddr(&dn);
- f2fs_update_extent_cache(&dn);
+ f2fs_update_data_blkaddr(&dn, NULL_ADDR);
do_replace = true;
}
f2fs_put_dnode(&dn);
@@ -888,7 +894,7 @@ static int __exchange_data_block(struct inode *inode, pgoff_t src,
get_node_info(sbi, dn.nid, &ni);
f2fs_replace_block(sbi, &dn, dn.data_blkaddr, new_addr,
- ni.version, true);
+ ni.version, true, false);
f2fs_put_dnode(&dn);
} else {
struct page *psrc, *pdst;
@@ -896,7 +902,7 @@ static int __exchange_data_block(struct inode *inode, pgoff_t src,
psrc = get_lock_data_page(inode, src, true);
if (IS_ERR(psrc))
return PTR_ERR(psrc);
- pdst = get_new_data_page(inode, NULL, dst, false);
+ pdst = get_new_data_page(inode, NULL, dst, true);
if (IS_ERR(pdst)) {
f2fs_put_page(psrc, 1);
return PTR_ERR(pdst);
@@ -912,9 +918,7 @@ static int __exchange_data_block(struct inode *inode, pgoff_t src,
err_out:
if (!get_dnode_of_data(&dn, src, LOOKUP_NODE)) {
- dn.data_blkaddr = new_addr;
- set_data_blkaddr(&dn);
- f2fs_update_extent_cache(&dn);
+ f2fs_update_data_blkaddr(&dn, new_addr);
f2fs_put_dnode(&dn);
}
return ret;
@@ -954,8 +958,8 @@ static int f2fs_collapse_range(struct inode *inode, loff_t offset, loff_t len)
if (ret)
return ret;
- pg_start = offset >> PAGE_CACHE_SHIFT;
- pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
+ pg_start = offset >> PAGE_SHIFT;
+ pg_end = (offset + len) >> PAGE_SHIFT;
/* write out all dirty pages from offset */
ret = filemap_write_and_wait_range(inode->i_mapping, offset, LLONG_MAX);
@@ -1006,11 +1010,11 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
truncate_pagecache_range(inode, offset, offset + len - 1);
- pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
- pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+ pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
+ pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
- off_start = offset & (PAGE_CACHE_SIZE - 1);
- off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+ off_start = offset & (PAGE_SIZE - 1);
+ off_end = (offset + len) & (PAGE_SIZE - 1);
if (pg_start == pg_end) {
ret = fill_zero(inode, pg_start, off_start,
@@ -1024,12 +1028,12 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
} else {
if (off_start) {
ret = fill_zero(inode, pg_start++, off_start,
- PAGE_CACHE_SIZE - off_start);
+ PAGE_SIZE - off_start);
if (ret)
return ret;
new_size = max_t(loff_t, new_size,
- (loff_t)pg_start << PAGE_CACHE_SHIFT);
+ (loff_t)pg_start << PAGE_SHIFT);
}
for (index = pg_start; index < pg_end; index++) {
@@ -1054,18 +1058,13 @@ static int f2fs_zero_range(struct inode *inode, loff_t offset, loff_t len,
if (dn.data_blkaddr != NEW_ADDR) {
invalidate_blocks(sbi, dn.data_blkaddr);
-
- dn.data_blkaddr = NEW_ADDR;
- set_data_blkaddr(&dn);
-
- dn.data_blkaddr = NULL_ADDR;
- f2fs_update_extent_cache(&dn);
+ f2fs_update_data_blkaddr(&dn, NEW_ADDR);
}
f2fs_put_dnode(&dn);
f2fs_unlock_op(sbi);
new_size = max_t(loff_t, new_size,
- (loff_t)(index + 1) << PAGE_CACHE_SHIFT);
+ (loff_t)(index + 1) << PAGE_SHIFT);
}
if (off_end) {
@@ -1122,8 +1121,8 @@ static int f2fs_insert_range(struct inode *inode, loff_t offset, loff_t len)
truncate_pagecache(inode, offset);
- pg_start = offset >> PAGE_CACHE_SHIFT;
- pg_end = (offset + len) >> PAGE_CACHE_SHIFT;
+ pg_start = offset >> PAGE_SHIFT;
+ pg_end = (offset + len) >> PAGE_SHIFT;
delta = pg_end - pg_start;
nrpages = (i_size_read(inode) + PAGE_SIZE - 1) / PAGE_SIZE;
@@ -1163,11 +1162,11 @@ static int expand_inode_data(struct inode *inode, loff_t offset,
f2fs_balance_fs(sbi, true);
- pg_start = ((unsigned long long) offset) >> PAGE_CACHE_SHIFT;
- pg_end = ((unsigned long long) offset + len) >> PAGE_CACHE_SHIFT;
+ pg_start = ((unsigned long long) offset) >> PAGE_SHIFT;
+ pg_end = ((unsigned long long) offset + len) >> PAGE_SHIFT;
- off_start = offset & (PAGE_CACHE_SIZE - 1);
- off_end = (offset + len) & (PAGE_CACHE_SIZE - 1);
+ off_start = offset & (PAGE_SIZE - 1);
+ off_end = (offset + len) & (PAGE_SIZE - 1);
f2fs_lock_op(sbi);
@@ -1185,12 +1184,12 @@ noalloc:
if (pg_start == pg_end)
new_size = offset + len;
else if (index == pg_start && off_start)
- new_size = (loff_t)(index + 1) << PAGE_CACHE_SHIFT;
+ new_size = (loff_t)(index + 1) << PAGE_SHIFT;
else if (index == pg_end)
- new_size = ((loff_t)index << PAGE_CACHE_SHIFT) +
+ new_size = ((loff_t)index << PAGE_SHIFT) +
off_end;
else
- new_size += PAGE_CACHE_SIZE;
+ new_size += PAGE_SIZE;
}
if (!(mode & FALLOC_FL_KEEP_SIZE) &&
@@ -1257,7 +1256,7 @@ static int f2fs_release_file(struct inode *inode, struct file *filp)
{
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
- commit_inmem_pages(inode, true);
+ drop_inmem_pages(inode);
if (f2fs_is_volatile_file(inode)) {
set_inode_flag(F2FS_I(inode), FI_DROP_CACHE);
filemap_fdatawrite(inode->i_mapping);
@@ -1381,7 +1380,7 @@ static int f2fs_ioc_commit_atomic_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
- ret = commit_inmem_pages(inode, false);
+ ret = commit_inmem_pages(inode);
if (ret) {
set_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
goto err_out;
@@ -1444,7 +1443,7 @@ static int f2fs_ioc_abort_volatile_write(struct file *filp)
if (f2fs_is_atomic_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_ATOMIC_FILE);
- commit_inmem_pages(inode, true);
+ drop_inmem_pages(inode);
}
if (f2fs_is_volatile_file(inode)) {
clear_inode_flag(F2FS_I(inode), FI_VOLATILE_FILE);
@@ -1539,39 +1538,30 @@ static bool uuid_is_nonzero(__u8 u[16])
static int f2fs_ioc_set_encryption_policy(struct file *filp, unsigned long arg)
{
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- struct f2fs_encryption_policy policy;
+ struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
- if (copy_from_user(&policy, (struct f2fs_encryption_policy __user *)arg,
- sizeof(policy)))
+ if (copy_from_user(&policy, (struct fscrypt_policy __user *)arg,
+ sizeof(policy)))
return -EFAULT;
f2fs_update_time(F2FS_I_SB(inode), REQ_TIME);
- return f2fs_process_policy(&policy, inode);
-#else
- return -EOPNOTSUPP;
-#endif
+ return fscrypt_process_policy(inode, &policy);
}
static int f2fs_ioc_get_encryption_policy(struct file *filp, unsigned long arg)
{
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- struct f2fs_encryption_policy policy;
+ struct fscrypt_policy policy;
struct inode *inode = file_inode(filp);
int err;
- err = f2fs_get_policy(inode, &policy);
+ err = fscrypt_get_policy(inode, &policy);
if (err)
return err;
- if (copy_to_user((struct f2fs_encryption_policy __user *)arg, &policy,
- sizeof(policy)))
+ if (copy_to_user((struct fscrypt_policy __user *)arg, &policy, sizeof(policy)))
return -EFAULT;
return 0;
-#else
- return -EOPNOTSUPP;
-#endif
}
static int f2fs_ioc_get_encryption_pwsalt(struct file *filp, unsigned long arg)
@@ -1652,7 +1642,7 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
struct f2fs_defragment *range)
{
struct inode *inode = file_inode(filp);
- struct f2fs_map_blocks map;
+ struct f2fs_map_blocks map = { .m_next_pgofs = NULL };
struct extent_info ei;
pgoff_t pg_start, pg_end;
unsigned int blk_per_seg = sbi->blocks_per_seg;
@@ -1666,8 +1656,8 @@ static int f2fs_defragment_range(struct f2fs_sb_info *sbi,
if (need_inplace_update(inode))
return -EINVAL;
- pg_start = range->start >> PAGE_CACHE_SHIFT;
- pg_end = (range->start + range->len) >> PAGE_CACHE_SHIFT;
+ pg_start = range->start >> PAGE_SHIFT;
+ pg_end = (range->start + range->len) >> PAGE_SHIFT;
f2fs_balance_fs(sbi, true);
@@ -1784,7 +1774,7 @@ clear_out:
out:
inode_unlock(inode);
if (!err)
- range->len = (u64)total << PAGE_CACHE_SHIFT;
+ range->len = (u64)total << PAGE_SHIFT;
return err;
}
@@ -1878,14 +1868,32 @@ long f2fs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg)
static ssize_t f2fs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- struct inode *inode = file_inode(iocb->ki_filp);
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file);
+ ssize_t ret;
if (f2fs_encrypted_inode(inode) &&
- !f2fs_has_encryption_key(inode) &&
- f2fs_get_encryption_info(inode))
+ !fscrypt_has_encryption_key(inode) &&
+ fscrypt_get_encryption_info(inode))
return -EACCES;
- return generic_file_write_iter(iocb, from);
+ inode_lock(inode);
+ ret = generic_write_checks(iocb, from);
+ if (ret > 0) {
+ ret = f2fs_preallocate_blocks(iocb, from);
+ if (!ret)
+ ret = __generic_file_write_iter(iocb, from);
+ }
+ inode_unlock(inode);
+
+ if (ret > 0) {
+ ssize_t err;
+
+ err = generic_write_sync(file, iocb->ki_pos - ret, ret);
+ if (err < 0)
+ ret = err;
+ }
+ return ret;
}
#ifdef CONFIG_COMPAT
diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index f610c2a9b..b0051a978 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -245,6 +245,18 @@ static inline unsigned int get_gc_cost(struct f2fs_sb_info *sbi,
return get_cb_cost(sbi, segno);
}
+static unsigned int count_bits(const unsigned long *addr,
+ unsigned int offset, unsigned int len)
+{
+ unsigned int end = offset + len, sum = 0;
+
+ while (offset < end) {
+ if (test_bit(offset++, addr))
+ ++sum;
+ }
+ return sum;
+}
+
/*
* This function is called from two paths.
* One is garbage collection and the other is SSR segment selection.
@@ -258,9 +270,9 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
{
struct dirty_seglist_info *dirty_i = DIRTY_I(sbi);
struct victim_sel_policy p;
- unsigned int secno, max_cost;
+ unsigned int secno, max_cost, last_victim;
unsigned int last_segment = MAIN_SEGS(sbi);
- int nsearched = 0;
+ unsigned int nsearched = 0;
mutex_lock(&dirty_i->seglist_lock);
@@ -273,6 +285,7 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
if (p.max_search == 0)
goto out;
+ last_victim = sbi->last_victim[p.gc_mode];
if (p.alloc_mode == LFS && gc_type == FG_GC) {
p.min_segno = check_bg_victims(sbi);
if (p.min_segno != NULL_SEGNO)
@@ -295,27 +308,35 @@ static int get_victim_by_default(struct f2fs_sb_info *sbi,
}
p.offset = segno + p.ofs_unit;
- if (p.ofs_unit > 1)
+ if (p.ofs_unit > 1) {
p.offset -= segno % p.ofs_unit;
+ nsearched += count_bits(p.dirty_segmap,
+ p.offset - p.ofs_unit,
+ p.ofs_unit);
+ } else {
+ nsearched++;
+ }
+
secno = GET_SECNO(sbi, segno);
if (sec_usage_check(sbi, secno))
- continue;
+ goto next;
if (gc_type == BG_GC && test_bit(secno, dirty_i->victim_secmap))
- continue;
+ goto next;
cost = get_gc_cost(sbi, segno, &p);
if (p.min_cost > cost) {
p.min_segno = segno;
p.min_cost = cost;
- } else if (unlikely(cost == max_cost)) {
- continue;
}
-
- if (nsearched++ >= p.max_search) {
- sbi->last_victim[p.gc_mode] = segno;
+next:
+ if (nsearched >= p.max_search) {
+ if (!sbi->last_victim[p.gc_mode] && segno <= last_victim)
+ sbi->last_victim[p.gc_mode] = last_victim + 1;
+ else
+ sbi->last_victim[p.gc_mode] = segno + 1;
break;
}
}
@@ -399,7 +420,7 @@ static int check_valid_map(struct f2fs_sb_info *sbi,
* On validity, copy that node with cold status, otherwise (invalid node)
* ignore that.
*/
-static int gc_node_segment(struct f2fs_sb_info *sbi,
+static void gc_node_segment(struct f2fs_sb_info *sbi,
struct f2fs_summary *sum, unsigned int segno, int gc_type)
{
bool initial = true;
@@ -419,7 +440,7 @@ next_step:
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
- return 0;
+ return;
if (check_valid_map(sbi, segno, off) == 0)
continue;
@@ -446,7 +467,7 @@ next_step:
/* set page dirty and write it */
if (gc_type == FG_GC) {
- f2fs_wait_on_page_writeback(node_page, NODE);
+ f2fs_wait_on_page_writeback(node_page, NODE, true);
set_page_dirty(node_page);
} else {
if (!PageWriteback(node_page))
@@ -460,20 +481,6 @@ next_step:
initial = false;
goto next_step;
}
-
- if (gc_type == FG_GC) {
- struct writeback_control wbc = {
- .sync_mode = WB_SYNC_ALL,
- .nr_to_write = LONG_MAX,
- .for_reclaim = 0,
- };
- sync_node_pages(sbi, 0, &wbc);
-
- /* return 1 only if FG_GC succefully reclaimed one */
- if (get_valid_blocks(sbi, segno, 1) == 0)
- return 1;
- }
- return 0;
}
/*
@@ -483,7 +490,7 @@ next_step:
* as indirect or double indirect node blocks, are given, it must be a caller's
* bug.
*/
-block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
+block_t start_bidx_of_node(unsigned int node_ofs, struct inode *inode)
{
unsigned int indirect_blks = 2 * NIDS_PER_BLOCK + 4;
unsigned int bidx;
@@ -500,7 +507,7 @@ block_t start_bidx_of_node(unsigned int node_ofs, struct f2fs_inode_info *fi)
int dec = (node_ofs - indirect_blks - 3) / (NIDS_PER_BLOCK + 1);
bidx = node_ofs - 5 - dec;
}
- return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(fi);
+ return bidx * ADDRS_PER_BLOCK + ADDRS_PER_INODE(inode);
}
static bool is_alive(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
@@ -546,6 +553,7 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
struct f2fs_summary sum;
struct node_info ni;
struct page *page;
+ block_t newaddr;
int err;
/* do not read out */
@@ -567,21 +575,24 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
* don't cache encrypted data into meta inode until previous dirty
* data were writebacked to avoid racing between GC and flush.
*/
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page, DATA, true);
get_node_info(fio.sbi, dn.nid, &ni);
set_summary(&sum, dn.nid, dn.ofs_in_node, ni.version);
/* read page */
fio.page = page;
- fio.blk_addr = dn.data_blkaddr;
+ fio.new_blkaddr = fio.old_blkaddr = dn.data_blkaddr;
- fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi),
- fio.blk_addr,
- FGP_LOCK|FGP_CREAT,
- GFP_NOFS);
- if (!fio.encrypted_page)
- goto put_out;
+ allocate_data_block(fio.sbi, NULL, fio.old_blkaddr, &newaddr,
+ &sum, CURSEG_COLD_DATA);
+
+ fio.encrypted_page = pagecache_get_page(META_MAPPING(fio.sbi), newaddr,
+ FGP_LOCK | FGP_CREAT, GFP_NOFS);
+ if (!fio.encrypted_page) {
+ err = -ENOMEM;
+ goto recover_block;
+ }
err = f2fs_submit_page_bio(&fio);
if (err)
@@ -590,33 +601,39 @@ static void move_encrypted_block(struct inode *inode, block_t bidx)
/* write page */
lock_page(fio.encrypted_page);
- if (unlikely(!PageUptodate(fio.encrypted_page)))
+ if (unlikely(!PageUptodate(fio.encrypted_page))) {
+ err = -EIO;
goto put_page_out;
- if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi)))
+ }
+ if (unlikely(fio.encrypted_page->mapping != META_MAPPING(fio.sbi))) {
+ err = -EIO;
goto put_page_out;
+ }
set_page_dirty(fio.encrypted_page);
- f2fs_wait_on_page_writeback(fio.encrypted_page, DATA);
+ f2fs_wait_on_page_writeback(fio.encrypted_page, DATA, true);
if (clear_page_dirty_for_io(fio.encrypted_page))
dec_page_count(fio.sbi, F2FS_DIRTY_META);
set_page_writeback(fio.encrypted_page);
/* allocate block address */
- f2fs_wait_on_page_writeback(dn.node_page, NODE);
- allocate_data_block(fio.sbi, NULL, fio.blk_addr,
- &fio.blk_addr, &sum, CURSEG_COLD_DATA);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
+
fio.rw = WRITE_SYNC;
+ fio.new_blkaddr = newaddr;
f2fs_submit_page_mbio(&fio);
- dn.data_blkaddr = fio.blk_addr;
- set_data_blkaddr(&dn);
- f2fs_update_extent_cache(&dn);
+ f2fs_update_data_blkaddr(&dn, newaddr);
set_inode_flag(F2FS_I(inode), FI_APPEND_WRITE);
if (page->index == 0)
set_inode_flag(F2FS_I(inode), FI_FIRST_BLOCK_WRITTEN);
put_page_out:
f2fs_put_page(fio.encrypted_page, 1);
+recover_block:
+ if (err)
+ __f2fs_replace_block(fio.sbi, &sum, newaddr, fio.old_blkaddr,
+ true, true);
put_out:
f2fs_put_dnode(&dn);
out:
@@ -645,7 +662,7 @@ static void move_data_page(struct inode *inode, block_t bidx, int gc_type)
.encrypted_page = NULL,
};
set_page_dirty(page);
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page, DATA, true);
if (clear_page_dirty_for_io(page))
inode_dec_dirty_pages(inode);
set_cold_data(page);
@@ -663,7 +680,7 @@ out:
* If the parent node is not valid or the data block address is different,
* the victim data block is ignored.
*/
-static int gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
+static void gc_data_segment(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
struct gc_inode_list *gc_list, unsigned int segno, int gc_type)
{
struct super_block *sb = sbi->sb;
@@ -686,7 +703,7 @@ next_step:
/* stop BG_GC if there is not enough free sections. */
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, 0))
- return 0;
+ return;
if (check_valid_map(sbi, segno, off) == 0)
continue;
@@ -719,7 +736,7 @@ next_step:
continue;
}
- start_bidx = start_bidx_of_node(nofs, F2FS_I(inode));
+ start_bidx = start_bidx_of_node(nofs, inode);
data_page = get_read_data_page(inode,
start_bidx + ofs_in_node, READA, true);
if (IS_ERR(data_page)) {
@@ -735,7 +752,7 @@ next_step:
/* phase 3 */
inode = find_gc_inode(gc_list, dni.ino);
if (inode) {
- start_bidx = start_bidx_of_node(nofs, F2FS_I(inode))
+ start_bidx = start_bidx_of_node(nofs, inode)
+ ofs_in_node;
if (f2fs_encrypted_inode(inode) && S_ISREG(inode->i_mode))
move_encrypted_block(inode, start_bidx);
@@ -747,15 +764,6 @@ next_step:
if (++phase < 4)
goto next_step;
-
- if (gc_type == FG_GC) {
- f2fs_submit_merged_bio(sbi, DATA, WRITE);
-
- /* return 1 only if FG_GC succefully reclaimed one */
- if (get_valid_blocks(sbi, segno, 1) == 0)
- return 1;
- }
- return 0;
}
static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
@@ -771,53 +779,92 @@ static int __get_victim(struct f2fs_sb_info *sbi, unsigned int *victim,
return ret;
}
-static int do_garbage_collect(struct f2fs_sb_info *sbi, unsigned int segno,
+static int do_garbage_collect(struct f2fs_sb_info *sbi,
+ unsigned int start_segno,
struct gc_inode_list *gc_list, int gc_type)
{
struct page *sum_page;
struct f2fs_summary_block *sum;
struct blk_plug plug;
- int nfree = 0;
+ unsigned int segno = start_segno;
+ unsigned int end_segno = start_segno + sbi->segs_per_sec;
+ int seg_freed = 0;
+ unsigned char type = IS_DATASEG(get_seg_entry(sbi, segno)->type) ?
+ SUM_TYPE_DATA : SUM_TYPE_NODE;
- /* read segment summary of victim */
- sum_page = get_sum_page(sbi, segno);
+ /* readahead multi ssa blocks those have contiguous address */
+ if (sbi->segs_per_sec > 1)
+ ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno),
+ sbi->segs_per_sec, META_SSA, true);
+
+ /* reference all summary page */
+ while (segno < end_segno) {
+ sum_page = get_sum_page(sbi, segno++);
+ unlock_page(sum_page);
+ }
blk_start_plug(&plug);
- sum = page_address(sum_page);
+ for (segno = start_segno; segno < end_segno; segno++) {
+ /* find segment summary of victim */
+ sum_page = find_get_page(META_MAPPING(sbi),
+ GET_SUM_BLOCK(sbi, segno));
+ f2fs_bug_on(sbi, !PageUptodate(sum_page));
+ f2fs_put_page(sum_page, 0);
- /*
- * this is to avoid deadlock:
- * - lock_page(sum_page) - f2fs_replace_block
- * - check_valid_map() - mutex_lock(sentry_lock)
- * - mutex_lock(sentry_lock) - change_curseg()
- * - lock_page(sum_page)
- */
- unlock_page(sum_page);
-
- switch (GET_SUM_TYPE((&sum->footer))) {
- case SUM_TYPE_NODE:
- nfree = gc_node_segment(sbi, sum->entries, segno, gc_type);
- break;
- case SUM_TYPE_DATA:
- nfree = gc_data_segment(sbi, sum->entries, gc_list,
- segno, gc_type);
- break;
+ sum = page_address(sum_page);
+ f2fs_bug_on(sbi, type != GET_SUM_TYPE((&sum->footer)));
+
+ /*
+ * this is to avoid deadlock:
+ * - lock_page(sum_page) - f2fs_replace_block
+ * - check_valid_map() - mutex_lock(sentry_lock)
+ * - mutex_lock(sentry_lock) - change_curseg()
+ * - lock_page(sum_page)
+ */
+
+ if (type == SUM_TYPE_NODE)
+ gc_node_segment(sbi, sum->entries, segno, gc_type);
+ else
+ gc_data_segment(sbi, sum->entries, gc_list, segno,
+ gc_type);
+
+ stat_inc_seg_count(sbi, type, gc_type);
+
+ f2fs_put_page(sum_page, 0);
+ }
+
+ if (gc_type == FG_GC) {
+ if (type == SUM_TYPE_NODE) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .for_reclaim = 0,
+ };
+ sync_node_pages(sbi, 0, &wbc);
+ } else {
+ f2fs_submit_merged_bio(sbi, DATA, WRITE);
+ }
}
+
blk_finish_plug(&plug);
- stat_inc_seg_count(sbi, GET_SUM_TYPE((&sum->footer)), gc_type);
+ if (gc_type == FG_GC) {
+ while (start_segno < end_segno)
+ if (get_valid_blocks(sbi, start_segno++, 1) == 0)
+ seg_freed++;
+ }
+
stat_inc_call_count(sbi->stat_info);
- f2fs_put_page(sum_page, 0);
- return nfree;
+ return seg_freed;
}
int f2fs_gc(struct f2fs_sb_info *sbi, bool sync)
{
- unsigned int segno, i;
+ unsigned int segno;
int gc_type = sync ? FG_GC : BG_GC;
- int sec_freed = 0;
+ int sec_freed = 0, seg_freed;
int ret = -EINVAL;
struct cp_control cpc;
struct gc_inode_list gc_list = {
@@ -838,30 +885,24 @@ gc_more:
if (gc_type == BG_GC && has_not_enough_free_secs(sbi, sec_freed)) {
gc_type = FG_GC;
+ /*
+ * If there is no victim and no prefree segment but still not
+ * enough free sections, we should flush dent/node blocks and do
+ * garbage collections.
+ */
if (__get_victim(sbi, &segno, gc_type) || prefree_segments(sbi))
write_checkpoint(sbi, &cpc);
+ else if (has_not_enough_free_secs(sbi, 0))
+ write_checkpoint(sbi, &cpc);
}
if (segno == NULL_SEGNO && !__get_victim(sbi, &segno, gc_type))
goto stop;
ret = 0;
- /* readahead multi ssa blocks those have contiguous address */
- if (sbi->segs_per_sec > 1)
- ra_meta_pages(sbi, GET_SUM_BLOCK(sbi, segno), sbi->segs_per_sec,
- META_SSA, true);
-
- for (i = 0; i < sbi->segs_per_sec; i++) {
- /*
- * for FG_GC case, halt gcing left segments once failed one
- * of segments in selected section to avoid long latency.
- */
- if (!do_garbage_collect(sbi, segno + i, &gc_list, gc_type) &&
- gc_type == FG_GC)
- break;
- }
+ seg_freed = do_garbage_collect(sbi, segno, &gc_list, gc_type);
- if (i == sbi->segs_per_sec && gc_type == FG_GC)
+ if (gc_type == FG_GC && seg_freed == sbi->segs_per_sec)
sec_freed++;
if (gc_type == FG_GC)
diff --git a/fs/f2fs/inline.c b/fs/f2fs/inline.c
index c3f0b7d4c..a2fbe6f42 100644
--- a/fs/f2fs/inline.c
+++ b/fs/f2fs/inline.c
@@ -51,7 +51,7 @@ void read_inline_data(struct page *page, struct page *ipage)
f2fs_bug_on(F2FS_P_SB(page), page->index);
- zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
/* Copy the whole inline data block */
src_addr = inline_data_addr(ipage);
@@ -71,7 +71,7 @@ bool truncate_inline_inode(struct page *ipage, u64 from)
addr = inline_data_addr(ipage);
- f2fs_wait_on_page_writeback(ipage, NODE);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
memset(addr + from, 0, MAX_INLINE_DATA - from);
return true;
@@ -93,7 +93,7 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
}
if (page->index)
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
else
read_inline_data(page, ipage);
@@ -105,7 +105,6 @@ int f2fs_read_inline_data(struct inode *inode, struct page *page)
int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
{
- void *src_addr, *dst_addr;
struct f2fs_io_info fio = {
.sbi = F2FS_I_SB(dn->inode),
.type = DATA,
@@ -115,8 +114,6 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
};
int dirty, err;
- f2fs_bug_on(F2FS_I_SB(dn->inode), page->index);
-
if (!f2fs_exist_data(dn->inode))
goto clear_out;
@@ -124,21 +121,9 @@ int f2fs_convert_inline_page(struct dnode_of_data *dn, struct page *page)
if (err)
return err;
- f2fs_wait_on_page_writeback(page, DATA);
-
- if (PageUptodate(page))
- goto no_update;
-
- zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+ f2fs_bug_on(F2FS_P_SB(page), PageWriteback(page));
- /* Copy the whole inline data block */
- src_addr = inline_data_addr(dn->inode_page);
- dst_addr = kmap_atomic(page);
- memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
- flush_dcache_page(page);
- kunmap_atomic(dst_addr);
- SetPageUptodate(page);
-no_update:
+ read_inline_data(page, dn->inode_page);
set_page_dirty(page);
/* clear dirty state */
@@ -146,11 +131,9 @@ no_update:
/* write data page to try to make data consistent */
set_page_writeback(page);
- fio.blk_addr = dn->data_blkaddr;
+ fio.old_blkaddr = dn->data_blkaddr;
write_data_page(dn, &fio);
- set_data_blkaddr(dn);
- f2fs_update_extent_cache(dn);
- f2fs_wait_on_page_writeback(page, DATA);
+ f2fs_wait_on_page_writeback(page, DATA, true);
if (dirty)
inode_dec_dirty_pages(dn->inode);
@@ -159,6 +142,7 @@ no_update:
/* clear inline data and flag after data writeback */
truncate_inline_inode(dn->inode_page, 0);
+ clear_inline_node(dn->inode_page);
clear_out:
stat_dec_inline_inode(dn->inode);
f2fs_clear_inline_inode(dn->inode);
@@ -223,7 +207,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
f2fs_bug_on(F2FS_I_SB(inode), page->index);
- f2fs_wait_on_page_writeback(dn.inode_page, NODE);
+ f2fs_wait_on_page_writeback(dn.inode_page, NODE, true);
src_addr = kmap_atomic(page);
dst_addr = inline_data_addr(dn.inode_page);
memcpy(dst_addr, src_addr, MAX_INLINE_DATA);
@@ -233,6 +217,7 @@ int f2fs_write_inline_data(struct inode *inode, struct page *page)
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
sync_inode_page(&dn);
+ clear_inline_node(dn.inode_page);
f2fs_put_dnode(&dn);
return 0;
}
@@ -261,7 +246,7 @@ process_inline:
ipage = get_node_page(sbi, inode->i_ino);
f2fs_bug_on(sbi, IS_ERR(ipage));
- f2fs_wait_on_page_writeback(ipage, NODE);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
src_addr = inline_data_addr(npage);
dst_addr = inline_data_addr(ipage);
@@ -292,7 +277,7 @@ process_inline:
}
struct f2fs_dir_entry *find_in_inline_dir(struct inode *dir,
- struct f2fs_filename *fname, struct page **res_page)
+ struct fscrypt_name *fname, struct page **res_page)
{
struct f2fs_sb_info *sbi = F2FS_SB(dir->i_sb);
struct f2fs_inline_dentry *inline_dentry;
@@ -389,8 +374,8 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
if (err)
goto out;
- f2fs_wait_on_page_writeback(page, DATA);
- zero_user_segment(page, MAX_INLINE_DATA, PAGE_CACHE_SIZE);
+ f2fs_wait_on_page_writeback(page, DATA, true);
+ zero_user_segment(page, MAX_INLINE_DATA, PAGE_SIZE);
dentry_blk = kmap_atomic(page);
@@ -420,8 +405,8 @@ static int f2fs_convert_inline_dir(struct inode *dir, struct page *ipage,
stat_dec_inline_dir(dir);
clear_inode_flag(F2FS_I(dir), FI_INLINE_DENTRY);
- if (i_size_read(dir) < PAGE_CACHE_SIZE) {
- i_size_write(dir, PAGE_CACHE_SIZE);
+ if (i_size_read(dir) < PAGE_SIZE) {
+ i_size_write(dir, PAGE_SIZE);
set_inode_flag(F2FS_I(dir), FI_UPDATE_DIR);
}
@@ -469,7 +454,7 @@ int f2fs_add_inline_entry(struct inode *dir, const struct qstr *name,
}
}
- f2fs_wait_on_page_writeback(ipage, NODE);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
name_hash = f2fs_dentry_hash(name);
make_dentry_ptr(NULL, &d, (void *)dentry_blk, 2);
@@ -507,7 +492,7 @@ void f2fs_delete_inline_entry(struct f2fs_dir_entry *dentry, struct page *page,
int i;
lock_page(page);
- f2fs_wait_on_page_writeback(page, NODE);
+ f2fs_wait_on_page_writeback(page, NODE, true);
inline_dentry = inline_data_addr(page);
bit_pos = dentry - inline_dentry->dentry;
@@ -550,7 +535,7 @@ bool f2fs_empty_inline_dir(struct inode *dir)
}
int f2fs_read_inline_dir(struct file *file, struct dir_context *ctx,
- struct f2fs_str *fstr)
+ struct fscrypt_str *fstr)
{
struct inode *inode = file_inode(file);
struct f2fs_inline_dentry *inline_dentry = NULL;
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 2adeff26b..cb269c46a 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -83,7 +83,7 @@ static void __recover_inline_status(struct inode *inode, struct page *ipage)
while (start < end) {
if (*start++) {
- f2fs_wait_on_page_writeback(ipage, NODE);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
set_inode_flag(F2FS_I(inode), FI_DATA_EXIST);
set_raw_inline(F2FS_I(inode), F2FS_INODE(ipage));
@@ -227,7 +227,7 @@ int update_inode(struct inode *inode, struct page *node_page)
{
struct f2fs_inode *ri;
- f2fs_wait_on_page_writeback(node_page, NODE);
+ f2fs_wait_on_page_writeback(node_page, NODE, true);
ri = F2FS_INODE(node_page);
@@ -263,6 +263,10 @@ int update_inode(struct inode *inode, struct page *node_page)
set_cold_node(inode, node_page);
clear_inode_flag(F2FS_I(inode), FI_DIRTY_INODE);
+ /* deleted inode */
+ if (inode->i_nlink == 0)
+ clear_inline_node(node_page);
+
return set_page_dirty(node_page);
}
@@ -320,7 +324,7 @@ void f2fs_evict_inode(struct inode *inode)
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
- commit_inmem_pages(inode, true);
+ drop_inmem_pages(inode);
trace_f2fs_evict_inode(inode);
truncate_inode_pages_final(&inode->i_data);
@@ -385,10 +389,7 @@ no_delete:
}
}
out_clear:
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- if (fi->i_crypt_info)
- f2fs_free_encryption_info(inode, fi->i_crypt_info);
-#endif
+ fscrypt_put_encryption_info(inode, NULL);
clear_inode(inode);
}
diff --git a/fs/f2fs/namei.c b/fs/f2fs/namei.c
index 7e9e38769..013e57932 100644
--- a/fs/f2fs/namei.c
+++ b/fs/f2fs/namei.c
@@ -169,7 +169,7 @@ static int f2fs_link(struct dentry *old_dentry, struct inode *dir,
int err;
if (f2fs_encrypted_inode(dir) &&
- !f2fs_is_child_context_consistent_with_parent(dir, inode))
+ !fscrypt_has_permitted_context(dir, inode))
return -EPERM;
f2fs_balance_fs(sbi, true);
@@ -260,6 +260,22 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
struct page *page;
nid_t ino;
int err = 0;
+ unsigned int root_ino = F2FS_ROOT_INO(F2FS_I_SB(dir));
+
+ if (f2fs_encrypted_inode(dir)) {
+ int res = fscrypt_get_encryption_info(dir);
+
+ /*
+ * DCACHE_ENCRYPTED_WITH_KEY is set if the dentry is
+ * created while the directory was encrypted and we
+ * don't have access to the key.
+ */
+ if (fscrypt_has_encryption_key(dir))
+ fscrypt_set_encrypted_dentry(dentry);
+ fscrypt_set_d_op(dentry);
+ if (res && res != -ENOKEY)
+ return ERR_PTR(res);
+ }
if (dentry->d_name.len > F2FS_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -276,15 +292,29 @@ static struct dentry *f2fs_lookup(struct inode *dir, struct dentry *dentry,
if (IS_ERR(inode))
return ERR_CAST(inode);
+ if ((dir->i_ino == root_ino) && f2fs_has_inline_dots(dir)) {
+ err = __recover_dot_dentries(dir, root_ino);
+ if (err)
+ goto err_out;
+ }
+
if (f2fs_has_inline_dots(inode)) {
err = __recover_dot_dentries(inode, dir->i_ino);
if (err)
goto err_out;
}
+ if (!IS_ERR(inode) && f2fs_encrypted_inode(dir) &&
+ (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
+ !fscrypt_has_permitted_context(dir, inode)) {
+ bool nokey = f2fs_encrypted_inode(inode) &&
+ !fscrypt_has_encryption_key(inode);
+ err = nokey ? -ENOKEY : -EPERM;
+ goto err_out;
+ }
return d_splice_alias(inode, dentry);
err_out:
- iget_failed(inode);
+ iput(inode);
return ERR_PTR(err);
}
@@ -345,13 +375,23 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
struct f2fs_sb_info *sbi = F2FS_I_SB(dir);
struct inode *inode;
size_t len = strlen(symname);
- size_t p_len;
- char *p_str;
- struct f2fs_str disk_link = FSTR_INIT(NULL, 0);
- struct f2fs_encrypted_symlink_data *sd = NULL;
+ struct fscrypt_str disk_link = FSTR_INIT((char *)symname, len + 1);
+ struct fscrypt_symlink_data *sd = NULL;
int err;
- if (len > dir->i_sb->s_blocksize)
+ if (f2fs_encrypted_inode(dir)) {
+ err = fscrypt_get_encryption_info(dir);
+ if (err)
+ return err;
+
+ if (!fscrypt_has_encryption_key(dir))
+ return -EPERM;
+
+ disk_link.len = (fscrypt_fname_encrypted_size(dir, len) +
+ sizeof(struct fscrypt_symlink_data));
+ }
+
+ if (disk_link.len > dir->i_sb->s_blocksize)
return -ENAMETOOLONG;
inode = f2fs_new_inode(dir, S_IFLNK | S_IRWXUGO);
@@ -374,42 +414,36 @@ static int f2fs_symlink(struct inode *dir, struct dentry *dentry,
f2fs_unlock_op(sbi);
alloc_nid_done(sbi, inode->i_ino);
- if (f2fs_encrypted_inode(dir)) {
+ if (f2fs_encrypted_inode(inode)) {
struct qstr istr = QSTR_INIT(symname, len);
+ struct fscrypt_str ostr;
- err = f2fs_get_encryption_info(inode);
- if (err)
+ sd = kzalloc(disk_link.len, GFP_NOFS);
+ if (!sd) {
+ err = -ENOMEM;
goto err_out;
+ }
- err = f2fs_fname_crypto_alloc_buffer(inode, len, &disk_link);
+ err = fscrypt_get_encryption_info(inode);
if (err)
goto err_out;
- err = f2fs_fname_usr_to_disk(inode, &istr, &disk_link);
- if (err < 0)
- goto err_out;
-
- p_len = encrypted_symlink_data_len(disk_link.len) + 1;
-
- if (p_len > dir->i_sb->s_blocksize) {
- err = -ENAMETOOLONG;
+ if (!fscrypt_has_encryption_key(inode)) {
+ err = -EPERM;
goto err_out;
}
- sd = kzalloc(p_len, GFP_NOFS);
- if (!sd) {
- err = -ENOMEM;
+ ostr.name = sd->encrypted_path;
+ ostr.len = disk_link.len;
+ err = fscrypt_fname_usr_to_disk(inode, &istr, &ostr);
+ if (err < 0)
goto err_out;
- }
- memcpy(sd->encrypted_path, disk_link.name, disk_link.len);
- sd->len = cpu_to_le16(disk_link.len);
- p_str = (char *)sd;
- } else {
- p_len = len + 1;
- p_str = (char *)symname;
+
+ sd->len = cpu_to_le16(ostr.len);
+ disk_link.name = (char *)sd;
}
- err = page_symlink(inode, p_str, p_len);
+ err = page_symlink(inode, disk_link.name, disk_link.len);
err_out:
d_instantiate(dentry, inode);
@@ -425,7 +459,8 @@ err_out:
* performance regression.
*/
if (!err) {
- filemap_write_and_wait_range(inode->i_mapping, 0, p_len - 1);
+ filemap_write_and_wait_range(inode->i_mapping, 0,
+ disk_link.len - 1);
if (IS_DIRSYNC(dir))
f2fs_sync_fs(sbi->sb, 1);
@@ -434,7 +469,6 @@ err_out:
}
kfree(sd);
- f2fs_fname_crypto_free_buffer(&disk_link);
return err;
out:
handle_failed_inode(inode);
@@ -582,7 +616,7 @@ out:
static int f2fs_tmpfile(struct inode *dir, struct dentry *dentry, umode_t mode)
{
if (f2fs_encrypted_inode(dir)) {
- int err = f2fs_get_encryption_info(dir);
+ int err = fscrypt_get_encryption_info(dir);
if (err)
return err;
}
@@ -608,11 +642,11 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
struct f2fs_dir_entry *old_dir_entry = NULL;
struct f2fs_dir_entry *old_entry;
struct f2fs_dir_entry *new_entry;
+ bool is_old_inline = f2fs_has_inline_dentry(old_dir);
int err = -ENOENT;
if ((old_dir != new_dir) && f2fs_encrypted_inode(new_dir) &&
- !f2fs_is_child_context_consistent_with_parent(new_dir,
- old_inode)) {
+ !fscrypt_has_permitted_context(new_dir, old_inode)) {
err = -EPERM;
goto out;
}
@@ -654,8 +688,9 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
if (err)
goto put_out_dir;
- if (update_dent_inode(old_inode, new_inode,
- &new_dentry->d_name)) {
+ err = update_dent_inode(old_inode, new_inode,
+ &new_dentry->d_name);
+ if (err) {
release_orphan_inode(sbi);
goto put_out_dir;
}
@@ -693,6 +728,26 @@ static int f2fs_rename(struct inode *old_dir, struct dentry *old_dentry,
inc_nlink(new_dir);
update_inode_page(new_dir);
}
+
+ /*
+ * old entry and new entry can locate in the same inline
+ * dentry in inode, when attaching new entry in inline dentry,
+ * it could force inline dentry conversion, after that,
+ * old_entry and old_page will point to wrong address, in
+ * order to avoid this, let's do the check and update here.
+ */
+ if (is_old_inline && !f2fs_has_inline_dentry(old_dir)) {
+ f2fs_put_page(old_page, 0);
+ old_page = NULL;
+
+ old_entry = f2fs_find_entry(old_dir,
+ &old_dentry->d_name, &old_page);
+ if (!old_entry) {
+ err = -EIO;
+ f2fs_unlock_op(sbi);
+ goto out_whiteout;
+ }
+ }
}
down_write(&F2FS_I(old_inode)->i_sem);
@@ -771,11 +826,9 @@ static int f2fs_cross_rename(struct inode *old_dir, struct dentry *old_dentry,
int err = -ENOENT;
if ((f2fs_encrypted_inode(old_dir) || f2fs_encrypted_inode(new_dir)) &&
- (old_dir != new_dir) &&
- (!f2fs_is_child_context_consistent_with_parent(new_dir,
- old_inode) ||
- !f2fs_is_child_context_consistent_with_parent(old_dir,
- new_inode)))
+ (old_dir != new_dir) &&
+ (!fscrypt_has_permitted_context(new_dir, old_inode) ||
+ !fscrypt_has_permitted_context(old_dir, new_inode)))
return -EPERM;
old_entry = f2fs_find_entry(old_dir, &old_dentry->d_name, &old_page);
@@ -937,16 +990,15 @@ static int f2fs_rename2(struct inode *old_dir, struct dentry *old_dentry,
return f2fs_rename(old_dir, old_dentry, new_dir, new_dentry, flags);
}
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
static const char *f2fs_encrypted_get_link(struct dentry *dentry,
struct inode *inode,
struct delayed_call *done)
{
struct page *cpage = NULL;
char *caddr, *paddr = NULL;
- struct f2fs_str cstr = FSTR_INIT(NULL, 0);
- struct f2fs_str pstr = FSTR_INIT(NULL, 0);
- struct f2fs_encrypted_symlink_data *sd;
+ struct fscrypt_str cstr = FSTR_INIT(NULL, 0);
+ struct fscrypt_str pstr = FSTR_INIT(NULL, 0);
+ struct fscrypt_symlink_data *sd;
loff_t size = min_t(loff_t, i_size_read(inode), PAGE_SIZE - 1);
u32 max_size = inode->i_sb->s_blocksize;
int res;
@@ -954,7 +1006,7 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
if (!dentry)
return ERR_PTR(-ECHILD);
- res = f2fs_get_encryption_info(inode);
+ res = fscrypt_get_encryption_info(inode);
if (res)
return ERR_PTR(res);
@@ -965,7 +1017,8 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
caddr[size] = 0;
/* Symlink is encrypted */
- sd = (struct f2fs_encrypted_symlink_data *)caddr;
+ sd = (struct fscrypt_symlink_data *)caddr;
+ cstr.name = sd->encrypted_path;
cstr.len = le16_to_cpu(sd->len);
/* this is broken symlink case */
@@ -973,29 +1026,20 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
res = -ENOENT;
goto errout;
}
- cstr.name = kmalloc(cstr.len, GFP_NOFS);
- if (!cstr.name) {
- res = -ENOMEM;
- goto errout;
- }
- memcpy(cstr.name, sd->encrypted_path, cstr.len);
- if ((cstr.len + sizeof(struct f2fs_encrypted_symlink_data) - 1) >
- max_size) {
+ if ((cstr.len + sizeof(struct fscrypt_symlink_data) - 1) > max_size) {
/* Symlink data on the disk is corrupted */
res = -EIO;
goto errout;
}
- res = f2fs_fname_crypto_alloc_buffer(inode, cstr.len, &pstr);
+ res = fscrypt_fname_alloc_buffer(inode, cstr.len, &pstr);
if (res)
goto errout;
- res = f2fs_fname_disk_to_usr(inode, NULL, &cstr, &pstr);
+ res = fscrypt_fname_disk_to_usr(inode, 0, 0, &cstr, &pstr);
if (res < 0)
goto errout;
- kfree(cstr.name);
-
/* this is broken symlink case */
if (unlikely(pstr.name[0] == 0)) {
res = -ENOENT;
@@ -1007,13 +1051,12 @@ static const char *f2fs_encrypted_get_link(struct dentry *dentry,
/* Null-terminate the name */
paddr[res] = '\0';
- page_cache_release(cpage);
+ put_page(cpage);
set_delayed_call(done, kfree_link, paddr);
return paddr;
errout:
- kfree(cstr.name);
- f2fs_fname_crypto_free_buffer(&pstr);
- page_cache_release(cpage);
+ fscrypt_fname_free_buffer(&pstr);
+ put_page(cpage);
return ERR_PTR(res);
}
@@ -1029,7 +1072,6 @@ const struct inode_operations f2fs_encrypted_symlink_inode_operations = {
.removexattr = generic_removexattr,
#endif
};
-#endif
const struct inode_operations f2fs_dir_inode_operations = {
.create = f2fs_create,
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 342597a58..1a33de9d8 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -46,11 +46,11 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
*/
if (type == FREE_NIDS) {
mem_size = (nm_i->fcnt * sizeof(struct free_nid)) >>
- PAGE_CACHE_SHIFT;
+ PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == NAT_ENTRIES) {
mem_size = (nm_i->nat_cnt * sizeof(struct nat_entry)) >>
- PAGE_CACHE_SHIFT;
+ PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 2);
} else if (type == DIRTY_DENTS) {
if (sbi->sb->s_bdi->wb.dirty_exceeded)
@@ -62,13 +62,13 @@ bool available_free_memory(struct f2fs_sb_info *sbi, int type)
for (i = 0; i <= UPDATE_INO; i++)
mem_size += (sbi->im[i].ino_num *
- sizeof(struct ino_entry)) >> PAGE_CACHE_SHIFT;
+ sizeof(struct ino_entry)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else if (type == EXTENT_CACHE) {
mem_size = (atomic_read(&sbi->total_ext_tree) *
sizeof(struct extent_tree) +
atomic_read(&sbi->total_ext_node) *
- sizeof(struct extent_node)) >> PAGE_CACHE_SHIFT;
+ sizeof(struct extent_node)) >> PAGE_SHIFT;
res = mem_size < ((avail_ram * nm_i->ram_thresh / 100) >> 1);
} else {
if (!sbi->sb->s_bdi->wb.dirty_exceeded)
@@ -121,7 +121,7 @@ static struct page *get_next_nat_page(struct f2fs_sb_info *sbi, nid_t nid)
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
- memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+ memcpy(dst_addr, src_addr, PAGE_SIZE);
set_page_dirty(dst_page);
f2fs_put_page(src_page, 1);
@@ -257,15 +257,20 @@ static struct nat_entry *grab_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid)
return new;
}
-static void cache_nat_entry(struct f2fs_nm_info *nm_i, nid_t nid,
+static void cache_nat_entry(struct f2fs_sb_info *sbi, nid_t nid,
struct f2fs_nat_entry *ne)
{
+ struct f2fs_nm_info *nm_i = NM_I(sbi);
struct nat_entry *e;
e = __lookup_nat_cache(nm_i, nid);
if (!e) {
e = grab_nat_entry(nm_i, nid);
node_info_from_raw_nat(&e->ni, ne);
+ } else {
+ f2fs_bug_on(sbi, nat_get_ino(e) != ne->ino ||
+ nat_get_blkaddr(e) != ne->block_addr ||
+ nat_get_version(e) != ne->version);
}
}
@@ -354,7 +359,7 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
nid_t start_nid = START_NID(nid);
struct f2fs_nat_block *nat_blk;
struct page *page = NULL;
@@ -371,23 +376,20 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
ni->ino = nat_get_ino(e);
ni->blk_addr = nat_get_blkaddr(e);
ni->version = nat_get_version(e);
- }
- up_read(&nm_i->nat_tree_lock);
- if (e)
+ up_read(&nm_i->nat_tree_lock);
return;
+ }
memset(&ne, 0, sizeof(struct f2fs_nat_entry));
- down_write(&nm_i->nat_tree_lock);
-
/* Check current segment summary */
- mutex_lock(&curseg->curseg_mutex);
- i = lookup_journal_in_cursum(sum, NAT_JOURNAL, nid, 0);
+ down_read(&curseg->journal_rwsem);
+ i = lookup_journal_in_cursum(journal, NAT_JOURNAL, nid, 0);
if (i >= 0) {
- ne = nat_in_journal(sum, i);
+ ne = nat_in_journal(journal, i);
node_info_from_raw_nat(ni, &ne);
}
- mutex_unlock(&curseg->curseg_mutex);
+ up_read(&curseg->journal_rwsem);
if (i >= 0)
goto cache;
@@ -398,19 +400,52 @@ void get_node_info(struct f2fs_sb_info *sbi, nid_t nid, struct node_info *ni)
node_info_from_raw_nat(ni, &ne);
f2fs_put_page(page, 1);
cache:
+ up_read(&nm_i->nat_tree_lock);
/* cache nat entry */
- cache_nat_entry(NM_I(sbi), nid, &ne);
+ down_write(&nm_i->nat_tree_lock);
+ cache_nat_entry(sbi, nid, &ne);
up_write(&nm_i->nat_tree_lock);
}
+pgoff_t get_next_page_offset(struct dnode_of_data *dn, pgoff_t pgofs)
+{
+ const long direct_index = ADDRS_PER_INODE(dn->inode);
+ const long direct_blks = ADDRS_PER_BLOCK;
+ const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
+ unsigned int skipped_unit = ADDRS_PER_BLOCK;
+ int cur_level = dn->cur_level;
+ int max_level = dn->max_level;
+ pgoff_t base = 0;
+
+ if (!dn->max_level)
+ return pgofs + 1;
+
+ while (max_level-- > cur_level)
+ skipped_unit *= NIDS_PER_BLOCK;
+
+ switch (dn->max_level) {
+ case 3:
+ base += 2 * indirect_blks;
+ case 2:
+ base += 2 * direct_blks;
+ case 1:
+ base += direct_index;
+ break;
+ default:
+ f2fs_bug_on(F2FS_I_SB(dn->inode), 1);
+ }
+
+ return ((pgofs - base) / skipped_unit + 1) * skipped_unit + base;
+}
+
/*
* The maximum depth is four.
* Offset[0] will have raw inode offset.
*/
-static int get_node_path(struct f2fs_inode_info *fi, long block,
+static int get_node_path(struct inode *inode, long block,
int offset[4], unsigned int noffset[4])
{
- const long direct_index = ADDRS_PER_INODE(fi);
+ const long direct_index = ADDRS_PER_INODE(inode);
const long direct_blks = ADDRS_PER_BLOCK;
const long dptrs_per_blk = NIDS_PER_BLOCK;
const long indirect_blks = ADDRS_PER_BLOCK * NIDS_PER_BLOCK;
@@ -495,10 +530,10 @@ int get_dnode_of_data(struct dnode_of_data *dn, pgoff_t index, int mode)
int offset[4];
unsigned int noffset[4];
nid_t nids[4];
- int level, i;
+ int level, i = 0;
int err = 0;
- level = get_node_path(F2FS_I(dn->inode), index, offset, noffset);
+ level = get_node_path(dn->inode, index, offset, noffset);
nids[0] = dn->inode->i_ino;
npage[0] = dn->inode_page;
@@ -585,6 +620,10 @@ release_pages:
release_out:
dn->inode_page = NULL;
dn->node_page = NULL;
+ if (err == -ENOENT) {
+ dn->cur_level = i;
+ dn->max_level = level;
+ }
return err;
}
@@ -792,7 +831,7 @@ int truncate_inode_blocks(struct inode *inode, pgoff_t from)
trace_f2fs_truncate_inode_blocks_enter(inode, from);
- level = get_node_path(F2FS_I(inode), from, offset, noffset);
+ level = get_node_path(inode, from, offset, noffset);
restart:
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
@@ -861,7 +900,7 @@ skip_partial:
f2fs_put_page(page, 1);
goto restart;
}
- f2fs_wait_on_page_writeback(page, NODE);
+ f2fs_wait_on_page_writeback(page, NODE, true);
ri->i_nid[offset[0] - NODE_DIR1_BLOCK] = 0;
set_page_dirty(page);
unlock_page(page);
@@ -976,7 +1015,7 @@ struct page *new_node_page(struct dnode_of_data *dn,
new_ni.ino = dn->inode->i_ino;
set_node_addr(sbi, &new_ni, NEW_ADDR, false);
- f2fs_wait_on_page_writeback(page, NODE);
+ f2fs_wait_on_page_writeback(page, NODE, true);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(dn->inode, page);
SetPageUptodate(page);
@@ -1029,7 +1068,7 @@ static int read_node_page(struct page *page, int rw)
if (PageUptodate(page))
return LOCKED_PAGE;
- fio.blk_addr = ni.blk_addr;
+ fio.new_blkaddr = fio.old_blkaddr = ni.blk_addr;
return f2fs_submit_page_bio(&fio);
}
@@ -1045,12 +1084,11 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
return;
f2fs_bug_on(sbi, check_nid_range(sbi, nid));
- apage = find_get_page(NODE_MAPPING(sbi), nid);
- if (apage && PageUptodate(apage)) {
- f2fs_put_page(apage, 0);
+ rcu_read_lock();
+ apage = radix_tree_lookup(&NODE_MAPPING(sbi)->page_tree, nid);
+ rcu_read_unlock();
+ if (apage)
return;
- }
- f2fs_put_page(apage, 0);
apage = grab_cache_page(NODE_MAPPING(sbi), nid);
if (!apage)
@@ -1063,7 +1101,7 @@ void ra_node_page(struct f2fs_sb_info *sbi, nid_t nid)
/*
* readahead MAX_RA_NODE number of node pages.
*/
-void ra_node_pages(struct page *parent, int start)
+static void ra_node_pages(struct page *parent, int start)
{
struct f2fs_sb_info *sbi = F2FS_P_SB(parent);
struct blk_plug plug;
@@ -1083,7 +1121,7 @@ void ra_node_pages(struct page *parent, int start)
blk_finish_plug(&plug);
}
-struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
+static struct page *__get_node_page(struct f2fs_sb_info *sbi, pgoff_t nid,
struct page *parent, int start)
{
struct page *page;
@@ -1154,19 +1192,57 @@ void sync_inode_page(struct dnode_of_data *dn)
dn->node_changed = ret ? true: false;
}
+static void flush_inline_data(struct f2fs_sb_info *sbi, nid_t ino)
+{
+ struct inode *inode;
+ struct page *page;
+
+ /* should flush inline_data before evict_inode */
+ inode = ilookup(sbi->sb, ino);
+ if (!inode)
+ return;
+
+ page = pagecache_get_page(inode->i_mapping, 0, FGP_NOWAIT, 0);
+ if (!page)
+ goto iput_out;
+
+ if (!trylock_page(page))
+ goto release_out;
+
+ if (!PageUptodate(page))
+ goto page_out;
+
+ if (!PageDirty(page))
+ goto page_out;
+
+ if (!clear_page_dirty_for_io(page))
+ goto page_out;
+
+ if (!f2fs_write_inline_data(inode, page))
+ inode_dec_dirty_pages(inode);
+ else
+ set_page_dirty(page);
+page_out:
+ unlock_page(page);
+release_out:
+ f2fs_put_page(page, 0);
+iput_out:
+ iput(inode);
+}
+
int sync_node_pages(struct f2fs_sb_info *sbi, nid_t ino,
struct writeback_control *wbc)
{
pgoff_t index, end;
struct pagevec pvec;
int step = ino ? 2 : 0;
- int nwritten = 0, wrote = 0;
+ int nwritten = 0;
pagevec_init(&pvec, 0);
next_step:
index = 0;
- end = LONG_MAX;
+ end = ULONG_MAX;
while (index <= end) {
int i, nr_pages;
@@ -1203,6 +1279,7 @@ next_step:
* If an fsync mode,
* we should not skip writing node pages.
*/
+lock_node:
if (ino && ino_of_node(page) == ino)
lock_page(page);
else if (!trylock_page(page))
@@ -1221,6 +1298,17 @@ continue_unlock:
goto continue_unlock;
}
+ /* flush inline_data */
+ if (!ino && is_inline_node(page)) {
+ clear_inline_node(page);
+ unlock_page(page);
+ flush_inline_data(sbi, ino_of_node(page));
+ goto lock_node;
+ }
+
+ f2fs_wait_on_page_writeback(page, NODE, true);
+
+ BUG_ON(PageWriteback(page));
if (!clear_page_dirty_for_io(page))
goto continue_unlock;
@@ -1238,8 +1326,6 @@ continue_unlock:
if (NODE_MAPPING(sbi)->a_ops->writepage(page, wbc))
unlock_page(page);
- else
- wrote++;
if (--wbc->nr_to_write == 0)
break;
@@ -1257,15 +1343,12 @@ continue_unlock:
step++;
goto next_step;
}
-
- if (wrote)
- f2fs_submit_merged_bio(sbi, NODE, WRITE);
return nwritten;
}
int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
{
- pgoff_t index = 0, end = LONG_MAX;
+ pgoff_t index = 0, end = ULONG_MAX;
struct pagevec pvec;
int ret2 = 0, ret = 0;
@@ -1287,7 +1370,7 @@ int wait_on_node_pages_writeback(struct f2fs_sb_info *sbi, nid_t ino)
continue;
if (ino && ino_of_node(page) == ino) {
- f2fs_wait_on_page_writeback(page, NODE);
+ f2fs_wait_on_page_writeback(page, NODE, true);
if (TestClearPageError(page))
ret = -EIO;
}
@@ -1326,8 +1409,6 @@ static int f2fs_write_node_page(struct page *page,
if (unlikely(f2fs_cp_error(sbi)))
goto redirty_out;
- f2fs_wait_on_page_writeback(page, NODE);
-
/* get old block addr of this node page */
nid = nid_of_node(page);
f2fs_bug_on(sbi, page->index != nid);
@@ -1351,14 +1432,18 @@ static int f2fs_write_node_page(struct page *page,
}
set_page_writeback(page);
- fio.blk_addr = ni.blk_addr;
+ fio.old_blkaddr = ni.blk_addr;
write_node_page(nid, &fio);
- set_node_addr(sbi, &ni, fio.blk_addr, is_fsync_dnode(page));
+ set_node_addr(sbi, &ni, fio.new_blkaddr, is_fsync_dnode(page));
dec_page_count(sbi, F2FS_DIRTY_NODES);
up_read(&sbi->node_write);
+
+ if (wbc->for_reclaim)
+ f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, NODE, WRITE);
+
unlock_page(page);
- if (wbc->for_reclaim || unlikely(f2fs_cp_error(sbi)))
+ if (unlikely(f2fs_cp_error(sbi)))
f2fs_submit_merged_bio(sbi, NODE, WRITE);
return 0;
@@ -1374,8 +1459,6 @@ static int f2fs_write_node_pages(struct address_space *mapping,
struct f2fs_sb_info *sbi = F2FS_M_SB(mapping);
long diff;
- trace_f2fs_writepages(mapping->host, wbc, NODE);
-
/* balancing f2fs's metadata in background */
f2fs_balance_fs_bg(sbi);
@@ -1383,6 +1466,8 @@ static int f2fs_write_node_pages(struct address_space *mapping,
if (get_pages(sbi, F2FS_DIRTY_NODES) < nr_pages_to_skip(sbi, NODE))
goto skip_write;
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
+
diff = nr_pages_to_write(sbi, NODE, wbc);
wbc->sync_mode = WB_SYNC_NONE;
sync_node_pages(sbi, 0, wbc);
@@ -1391,6 +1476,7 @@ static int f2fs_write_node_pages(struct address_space *mapping,
skip_write:
wbc->pages_skipped += get_pages(sbi, F2FS_DIRTY_NODES);
+ trace_f2fs_writepages(mapping->host, wbc, NODE);
return 0;
}
@@ -1526,7 +1612,7 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
int i = 0;
nid_t nid = nm_i->next_scan_nid;
@@ -1558,16 +1644,18 @@ static void build_free_nids(struct f2fs_sb_info *sbi)
nm_i->next_scan_nid = nid;
/* find free nids from current sum_pages */
- mutex_lock(&curseg->curseg_mutex);
- for (i = 0; i < nats_in_cursum(sum); i++) {
- block_t addr = le32_to_cpu(nat_in_journal(sum, i).block_addr);
- nid = le32_to_cpu(nid_in_journal(sum, i));
+ down_read(&curseg->journal_rwsem);
+ for (i = 0; i < nats_in_cursum(journal); i++) {
+ block_t addr;
+
+ addr = le32_to_cpu(nat_in_journal(journal, i).block_addr);
+ nid = le32_to_cpu(nid_in_journal(journal, i));
if (addr == NULL_ADDR)
add_free_nid(sbi, nid, true);
else
remove_free_nid(nm_i, nid);
}
- mutex_unlock(&curseg->curseg_mutex);
+ up_read(&curseg->journal_rwsem);
up_read(&nm_i->nat_tree_lock);
ra_meta_pages(sbi, NAT_BLOCK_OFFSET(nm_i->next_scan_nid),
@@ -1703,7 +1791,7 @@ void recover_inline_xattr(struct inode *inode, struct page *page)
src_addr = inline_xattr_addr(page);
inline_size = inline_xattr_size(inode);
- f2fs_wait_on_page_writeback(ipage, NODE);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
memcpy(dst_addr, src_addr, inline_size);
update_inode:
update_inode(inode, ipage);
@@ -1831,16 +1919,16 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
int i;
- mutex_lock(&curseg->curseg_mutex);
- for (i = 0; i < nats_in_cursum(sum); i++) {
+ down_write(&curseg->journal_rwsem);
+ for (i = 0; i < nats_in_cursum(journal); i++) {
struct nat_entry *ne;
struct f2fs_nat_entry raw_ne;
- nid_t nid = le32_to_cpu(nid_in_journal(sum, i));
+ nid_t nid = le32_to_cpu(nid_in_journal(journal, i));
- raw_ne = nat_in_journal(sum, i);
+ raw_ne = nat_in_journal(journal, i);
ne = __lookup_nat_cache(nm_i, nid);
if (!ne) {
@@ -1849,8 +1937,8 @@ static void remove_nats_in_journal(struct f2fs_sb_info *sbi)
}
__set_nat_cache_dirty(nm_i, ne);
}
- update_nats_in_cursum(sum, -i);
- mutex_unlock(&curseg->curseg_mutex);
+ update_nats_in_cursum(journal, -i);
+ up_write(&curseg->journal_rwsem);
}
static void __adjust_nat_entry_set(struct nat_entry_set *nes,
@@ -1875,7 +1963,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
struct nat_entry_set *set)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
bool to_journal = true;
struct f2fs_nat_block *nat_blk;
@@ -1887,11 +1975,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
* #1, flush nat entries to journal in current hot data summary block.
* #2, flush nat entries to nat page.
*/
- if (!__has_cursum_space(sum, set->entry_cnt, NAT_JOURNAL))
+ if (!__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
to_journal = false;
if (to_journal) {
- mutex_lock(&curseg->curseg_mutex);
+ down_write(&curseg->journal_rwsem);
} else {
page = get_next_nat_page(sbi, start_nid);
nat_blk = page_address(page);
@@ -1908,11 +1996,11 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
continue;
if (to_journal) {
- offset = lookup_journal_in_cursum(sum,
+ offset = lookup_journal_in_cursum(journal,
NAT_JOURNAL, nid, 1);
f2fs_bug_on(sbi, offset < 0);
- raw_ne = &nat_in_journal(sum, offset);
- nid_in_journal(sum, offset) = cpu_to_le32(nid);
+ raw_ne = &nat_in_journal(journal, offset);
+ nid_in_journal(journal, offset) = cpu_to_le32(nid);
} else {
raw_ne = &nat_blk->entries[nid - start_nid];
}
@@ -1924,7 +2012,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info *sbi,
}
if (to_journal)
- mutex_unlock(&curseg->curseg_mutex);
+ up_write(&curseg->journal_rwsem);
else
f2fs_put_page(page, 1);
@@ -1941,7 +2029,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
{
struct f2fs_nm_info *nm_i = NM_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
struct nat_entry_set *setvec[SETVEC_SIZE];
struct nat_entry_set *set, *tmp;
unsigned int found;
@@ -1958,7 +2046,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
* entries, remove all entries from journal and merge them
* into nat entry set.
*/
- if (!__has_cursum_space(sum, nm_i->dirty_nat_cnt, NAT_JOURNAL))
+ if (!__has_cursum_space(journal, nm_i->dirty_nat_cnt, NAT_JOURNAL))
remove_nats_in_journal(sbi);
while ((found = __gang_lookup_nat_set(nm_i,
@@ -1967,7 +2055,7 @@ void flush_nat_entries(struct f2fs_sb_info *sbi)
set_idx = setvec[found - 1]->set + 1;
for (idx = 0; idx < found; idx++)
__adjust_nat_entry_set(setvec[idx], &sets,
- MAX_NAT_JENTRIES(sum));
+ MAX_NAT_JENTRIES(journal));
}
/* flush dirty nats in nat entry set */
@@ -2000,6 +2088,7 @@ static int init_node_manager(struct f2fs_sb_info *sbi)
nm_i->nat_cnt = 0;
nm_i->ram_thresh = DEF_RAM_THRESHOLD;
nm_i->ra_nid_pages = DEF_RA_NID_PAGES;
+ nm_i->dirty_nats_ratio = DEF_DIRTY_NAT_RATIO_THRESHOLD;
INIT_RADIX_TREE(&nm_i->free_nid_root, GFP_ATOMIC);
INIT_LIST_HEAD(&nm_i->free_nid_list);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index d4d1f636f..1f4f9d456 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -25,6 +25,9 @@
/* control the memory footprint threshold (10MB per 1GB ram) */
#define DEF_RAM_THRESHOLD 10
+/* control dirty nats ratio threshold (default: 10% over max nid count) */
+#define DEF_DIRTY_NAT_RATIO_THRESHOLD 10
+
/* vector size for gang look-up from nat cache that consists of radix tree */
#define NATVEC_SIZE 64
#define SETVEC_SIZE 32
@@ -117,6 +120,12 @@ static inline void raw_nat_from_node_info(struct f2fs_nat_entry *raw_ne,
raw_ne->version = ni->version;
}
+static inline bool excess_dirty_nats(struct f2fs_sb_info *sbi)
+{
+ return NM_I(sbi)->dirty_nat_cnt >= NM_I(sbi)->max_nid *
+ NM_I(sbi)->dirty_nats_ratio / 100;
+}
+
enum mem_type {
FREE_NIDS, /* indicates the free nid list */
NAT_ENTRIES, /* indicates the cached nat entry */
@@ -321,7 +330,7 @@ static inline int set_nid(struct page *p, int off, nid_t nid, bool i)
{
struct f2fs_node *rn = F2FS_NODE(p);
- f2fs_wait_on_page_writeback(p, NODE);
+ f2fs_wait_on_page_writeback(p, NODE, true);
if (i)
rn->i.i_nid[off - NODE_DIR1_BLOCK] = cpu_to_le32(nid);
@@ -370,6 +379,21 @@ static inline int is_node(struct page *page, int type)
#define is_fsync_dnode(page) is_node(page, FSYNC_BIT_SHIFT)
#define is_dent_dnode(page) is_node(page, DENT_BIT_SHIFT)
+static inline int is_inline_node(struct page *page)
+{
+ return PageChecked(page);
+}
+
+static inline void set_inline_node(struct page *page)
+{
+ SetPageChecked(page);
+}
+
+static inline void clear_inline_node(struct page *page)
+{
+ ClearPageChecked(page);
+}
+
static inline void set_cold_node(struct inode *inode, struct page *page)
{
struct f2fs_node *rn = F2FS_NODE(page);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 589b20b86..011942f94 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -350,8 +350,7 @@ got_it:
inode = dn->inode;
}
- bidx = start_bidx_of_node(offset, F2FS_I(inode)) +
- le16_to_cpu(sum.ofs_in_node);
+ bidx = start_bidx_of_node(offset, inode) + le16_to_cpu(sum.ofs_in_node);
/*
* if inode page is locked, unlock temporarily, but its reference
@@ -386,10 +385,9 @@ truncate_out:
static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
struct page *page, block_t blkaddr)
{
- struct f2fs_inode_info *fi = F2FS_I(inode);
- unsigned int start, end;
struct dnode_of_data dn;
struct node_info ni;
+ unsigned int start, end;
int err = 0, recovered = 0;
/* step 1: recover xattr */
@@ -409,8 +407,8 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
goto out;
/* step 3: recover data indices */
- start = start_bidx_of_node(ofs_of_node(page), fi);
- end = start + ADDRS_PER_PAGE(page, fi);
+ start = start_bidx_of_node(ofs_of_node(page), inode);
+ end = start + ADDRS_PER_PAGE(page, inode);
set_new_dnode(&dn, inode, NULL, NULL, 0);
@@ -418,7 +416,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
if (err)
goto out;
- f2fs_wait_on_page_writeback(dn.node_page, NODE);
+ f2fs_wait_on_page_writeback(dn.node_page, NODE, true);
get_node_info(sbi, dn.nid, &ni);
f2fs_bug_on(sbi, ni.ino != ino_of_node(page));
@@ -467,7 +465,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct inode *inode,
/* write dummy data page */
f2fs_replace_block(sbi, &dn, src, dest,
- ni.version, false);
+ ni.version, false, false);
recovered++;
}
}
@@ -593,7 +591,7 @@ out:
/* truncate meta pages to be used by the recovery */
truncate_inode_pages_range(META_MAPPING(sbi),
- (loff_t)MAIN_BLKADDR(sbi) << PAGE_CACHE_SHIFT, -1);
+ (loff_t)MAIN_BLKADDR(sbi) << PAGE_SHIFT, -1);
if (err) {
truncate_inode_pages_final(NODE_MAPPING(sbi));
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5904a411c..540669d69 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -191,70 +191,145 @@ void register_inmem_page(struct inode *inode, struct page *page)
trace_f2fs_register_inmem_page(page, INMEM);
}
-int commit_inmem_pages(struct inode *inode, bool abort)
+static int __revoke_inmem_pages(struct inode *inode,
+ struct list_head *head, bool drop, bool recover)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct inmem_pages *cur, *tmp;
+ int err = 0;
+
+ list_for_each_entry_safe(cur, tmp, head, list) {
+ struct page *page = cur->page;
+
+ if (drop)
+ trace_f2fs_commit_inmem_page(page, INMEM_DROP);
+
+ lock_page(page);
+
+ if (recover) {
+ struct dnode_of_data dn;
+ struct node_info ni;
+
+ trace_f2fs_commit_inmem_page(page, INMEM_REVOKE);
+
+ set_new_dnode(&dn, inode, NULL, NULL, 0);
+ if (get_dnode_of_data(&dn, page->index, LOOKUP_NODE)) {
+ err = -EAGAIN;
+ goto next;
+ }
+ get_node_info(sbi, dn.nid, &ni);
+ f2fs_replace_block(sbi, &dn, dn.data_blkaddr,
+ cur->old_addr, ni.version, true, true);
+ f2fs_put_dnode(&dn);
+ }
+next:
+ ClearPageUptodate(page);
+ set_page_private(page, 0);
+ ClearPageUptodate(page);
+ f2fs_put_page(page, 1);
+
+ list_del(&cur->list);
+ kmem_cache_free(inmem_entry_slab, cur);
+ dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+ }
+ return err;
+}
+
+void drop_inmem_pages(struct inode *inode)
+{
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+
+ mutex_lock(&fi->inmem_lock);
+ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
+ mutex_unlock(&fi->inmem_lock);
+}
+
+static int __commit_inmem_pages(struct inode *inode,
+ struct list_head *revoke_list)
{
struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
struct f2fs_inode_info *fi = F2FS_I(inode);
struct inmem_pages *cur, *tmp;
- bool submit_bio = false;
struct f2fs_io_info fio = {
.sbi = sbi,
.type = DATA,
.rw = WRITE_SYNC | REQ_PRIO,
.encrypted_page = NULL,
};
+ bool submit_bio = false;
int err = 0;
- /*
- * The abort is true only when f2fs_evict_inode is called.
- * Basically, the f2fs_evict_inode doesn't produce any data writes, so
- * that we don't need to call f2fs_balance_fs.
- * Otherwise, f2fs_gc in f2fs_balance_fs can wait forever until this
- * inode becomes free by iget_locked in f2fs_iget.
- */
- if (!abort) {
- f2fs_balance_fs(sbi, true);
- f2fs_lock_op(sbi);
- }
-
- mutex_lock(&fi->inmem_lock);
list_for_each_entry_safe(cur, tmp, &fi->inmem_pages, list) {
- lock_page(cur->page);
- if (!abort) {
- if (cur->page->mapping == inode->i_mapping) {
- set_page_dirty(cur->page);
- f2fs_wait_on_page_writeback(cur->page, DATA);
- if (clear_page_dirty_for_io(cur->page))
- inode_dec_dirty_pages(inode);
- trace_f2fs_commit_inmem_page(cur->page, INMEM);
- fio.page = cur->page;
- err = do_write_data_page(&fio);
- if (err) {
- unlock_page(cur->page);
- break;
- }
- clear_cold_data(cur->page);
- submit_bio = true;
+ struct page *page = cur->page;
+
+ lock_page(page);
+ if (page->mapping == inode->i_mapping) {
+ trace_f2fs_commit_inmem_page(page, INMEM);
+
+ set_page_dirty(page);
+ f2fs_wait_on_page_writeback(page, DATA, true);
+ if (clear_page_dirty_for_io(page))
+ inode_dec_dirty_pages(inode);
+
+ fio.page = page;
+ err = do_write_data_page(&fio);
+ if (err) {
+ unlock_page(page);
+ break;
}
- } else {
- ClearPageUptodate(cur->page);
- trace_f2fs_commit_inmem_page(cur->page, INMEM_DROP);
+
+ /* record old blkaddr for revoking */
+ cur->old_addr = fio.old_blkaddr;
+
+ clear_cold_data(page);
+ submit_bio = true;
}
- set_page_private(cur->page, 0);
- ClearPagePrivate(cur->page);
- f2fs_put_page(cur->page, 1);
+ unlock_page(page);
+ list_move_tail(&cur->list, revoke_list);
+ }
- list_del(&cur->list);
- kmem_cache_free(inmem_entry_slab, cur);
- dec_page_count(F2FS_I_SB(inode), F2FS_INMEM_PAGES);
+ if (submit_bio)
+ f2fs_submit_merged_bio_cond(sbi, inode, NULL, 0, DATA, WRITE);
+
+ if (!err)
+ __revoke_inmem_pages(inode, revoke_list, false, false);
+
+ return err;
+}
+
+int commit_inmem_pages(struct inode *inode)
+{
+ struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+ struct f2fs_inode_info *fi = F2FS_I(inode);
+ struct list_head revoke_list;
+ int err;
+
+ INIT_LIST_HEAD(&revoke_list);
+ f2fs_balance_fs(sbi, true);
+ f2fs_lock_op(sbi);
+
+ mutex_lock(&fi->inmem_lock);
+ err = __commit_inmem_pages(inode, &revoke_list);
+ if (err) {
+ int ret;
+ /*
+ * try to revoke all committed pages, but still we could fail
+ * due to no memory or other reason, if that happened, EAGAIN
+ * will be returned, which means in such case, transaction is
+ * already not integrity, caller should use journal to do the
+ * recovery or rewrite & commit last transaction. For other
+ * error number, revoking was done by filesystem itself.
+ */
+ ret = __revoke_inmem_pages(inode, &revoke_list, false, true);
+ if (ret)
+ err = ret;
+
+ /* drop all uncommitted pages */
+ __revoke_inmem_pages(inode, &fi->inmem_pages, true, false);
}
mutex_unlock(&fi->inmem_lock);
- if (!abort) {
- f2fs_unlock_op(sbi);
- if (submit_bio)
- f2fs_submit_merged_bio(sbi, DATA, WRITE);
- }
+ f2fs_unlock_op(sbi);
return err;
}
@@ -291,11 +366,17 @@ void f2fs_balance_fs_bg(struct f2fs_sb_info *sbi)
/* checkpoint is the only way to shrink partial cached entries */
if (!available_free_memory(sbi, NAT_ENTRIES) ||
- excess_prefree_segs(sbi) ||
!available_free_memory(sbi, INO_ENTRIES) ||
+ excess_prefree_segs(sbi) ||
+ excess_dirty_nats(sbi) ||
(is_idle(sbi) && f2fs_time_over(sbi, CP_TIME))) {
- if (test_opt(sbi, DATA_FLUSH))
+ if (test_opt(sbi, DATA_FLUSH)) {
+ struct blk_plug plug;
+
+ blk_start_plug(&plug);
sync_dirty_inodes(sbi, FILE_INODE);
+ blk_finish_plug(&plug);
+ }
f2fs_sync_fs(sbi->sb, true);
stat_inc_bg_cp_count(sbi->stat_info);
}
@@ -502,7 +583,7 @@ static int f2fs_issue_discard(struct f2fs_sb_info *sbi,
bool discard_next_dnode(struct f2fs_sb_info *sbi, block_t blkaddr)
{
- int err = -ENOTSUPP;
+ int err = -EOPNOTSUPP;
if (test_opt(sbi, DISCARD)) {
struct seg_entry *se = get_seg_entry(sbi,
@@ -804,12 +885,12 @@ int npages_for_summary_flush(struct f2fs_sb_info *sbi, bool for_ra)
}
}
- sum_in_page = (PAGE_CACHE_SIZE - 2 * SUM_JOURNAL_SIZE -
+ sum_in_page = (PAGE_SIZE - 2 * SUM_JOURNAL_SIZE -
SUM_FOOTER_SIZE) / SUMMARY_SIZE;
if (valid_sum_count <= sum_in_page)
return 1;
else if ((valid_sum_count - sum_in_page) <=
- (PAGE_CACHE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
+ (PAGE_SIZE - SUM_FOOTER_SIZE) / SUMMARY_SIZE)
return 2;
return 3;
}
@@ -828,9 +909,9 @@ void update_meta_page(struct f2fs_sb_info *sbi, void *src, block_t blk_addr)
void *dst = page_address(page);
if (src)
- memcpy(dst, src, PAGE_CACHE_SIZE);
+ memcpy(dst, src, PAGE_SIZE);
else
- memset(dst, 0, PAGE_CACHE_SIZE);
+ memset(dst, 0, PAGE_SIZE);
set_page_dirty(page);
f2fs_put_page(page, 1);
}
@@ -841,6 +922,31 @@ static void write_sum_page(struct f2fs_sb_info *sbi,
update_meta_page(sbi, (void *)sum_blk, blk_addr);
}
+static void write_current_sum_page(struct f2fs_sb_info *sbi,
+ int type, block_t blk_addr)
+{
+ struct curseg_info *curseg = CURSEG_I(sbi, type);
+ struct page *page = grab_meta_page(sbi, blk_addr);
+ struct f2fs_summary_block *src = curseg->sum_blk;
+ struct f2fs_summary_block *dst;
+
+ dst = (struct f2fs_summary_block *)page_address(page);
+
+ mutex_lock(&curseg->curseg_mutex);
+
+ down_read(&curseg->journal_rwsem);
+ memcpy(&dst->journal, curseg->journal, SUM_JOURNAL_SIZE);
+ up_read(&curseg->journal_rwsem);
+
+ memcpy(dst->entries, src->entries, SUM_ENTRY_SIZE);
+ memcpy(&dst->footer, &src->footer, SUM_FOOTER_SIZE);
+
+ mutex_unlock(&curseg->curseg_mutex);
+
+ set_page_dirty(page);
+ f2fs_put_page(page, 1);
+}
+
static int is_next_segment_free(struct f2fs_sb_info *sbi, int type)
{
struct curseg_info *curseg = CURSEG_I(sbi, type);
@@ -873,9 +979,8 @@ static void get_new_segment(struct f2fs_sb_info *sbi,
if (!new_sec && ((*newseg + 1) % sbi->segs_per_sec)) {
segno = find_next_zero_bit(free_i->free_segmap,
- MAIN_SEGS(sbi), *newseg + 1);
- if (segno - *newseg < sbi->segs_per_sec -
- (*newseg % sbi->segs_per_sec))
+ (hint + 1) * sbi->segs_per_sec, *newseg + 1);
+ if (segno < (hint + 1) * sbi->segs_per_sec)
goto got_it;
}
find_other_zone:
@@ -1280,8 +1385,8 @@ static void do_write_page(struct f2fs_summary *sum, struct f2fs_io_info *fio)
{
int type = __get_segment_type(fio->page, fio->type);
- allocate_data_block(fio->sbi, fio->page, fio->blk_addr,
- &fio->blk_addr, sum, type);
+ allocate_data_block(fio->sbi, fio->page, fio->old_blkaddr,
+ &fio->new_blkaddr, sum, type);
/* writeout dirty page into bdev */
f2fs_submit_page_mbio(fio);
@@ -1293,7 +1398,8 @@ void write_meta_page(struct f2fs_sb_info *sbi, struct page *page)
.sbi = sbi,
.type = META,
.rw = WRITE_SYNC | REQ_META | REQ_PRIO,
- .blk_addr = page->index,
+ .old_blkaddr = page->index,
+ .new_blkaddr = page->index,
.page = page,
.encrypted_page = NULL,
};
@@ -1323,19 +1429,19 @@ void write_data_page(struct dnode_of_data *dn, struct f2fs_io_info *fio)
get_node_info(sbi, dn->nid, &ni);
set_summary(&sum, dn->nid, dn->ofs_in_node, ni.version);
do_write_page(&sum, fio);
- dn->data_blkaddr = fio->blk_addr;
+ f2fs_update_data_blkaddr(dn, fio->new_blkaddr);
}
void rewrite_data_page(struct f2fs_io_info *fio)
{
+ fio->new_blkaddr = fio->old_blkaddr;
stat_inc_inplace_blocks(fio->sbi);
f2fs_submit_page_mbio(fio);
}
-static void __f2fs_replace_block(struct f2fs_sb_info *sbi,
- struct f2fs_summary *sum,
+void __f2fs_replace_block(struct f2fs_sb_info *sbi, struct f2fs_summary *sum,
block_t old_blkaddr, block_t new_blkaddr,
- bool recover_curseg)
+ bool recover_curseg, bool recover_newaddr)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg;
@@ -1378,7 +1484,7 @@ static void __f2fs_replace_block(struct f2fs_sb_info *sbi,
curseg->next_blkoff = GET_BLKOFF_FROM_SEG0(sbi, new_blkaddr);
__add_sum_entry(sbi, type, sum);
- if (!recover_curseg)
+ if (!recover_curseg || recover_newaddr)
update_sit_entry(sbi, new_blkaddr, 1);
if (GET_SEGNO(sbi, old_blkaddr) != NULL_SEGNO)
update_sit_entry(sbi, old_blkaddr, -1);
@@ -1402,66 +1508,30 @@ static void __f2fs_replace_block(struct f2fs_sb_info *sbi,
void f2fs_replace_block(struct f2fs_sb_info *sbi, struct dnode_of_data *dn,
block_t old_addr, block_t new_addr,
- unsigned char version, bool recover_curseg)
+ unsigned char version, bool recover_curseg,
+ bool recover_newaddr)
{
struct f2fs_summary sum;
set_summary(&sum, dn->nid, dn->ofs_in_node, version);
- __f2fs_replace_block(sbi, &sum, old_addr, new_addr, recover_curseg);
+ __f2fs_replace_block(sbi, &sum, old_addr, new_addr,
+ recover_curseg, recover_newaddr);
- dn->data_blkaddr = new_addr;
- set_data_blkaddr(dn);
- f2fs_update_extent_cache(dn);
-}
-
-static inline bool is_merged_page(struct f2fs_sb_info *sbi,
- struct page *page, enum page_type type)
-{
- enum page_type btype = PAGE_TYPE_OF_BIO(type);
- struct f2fs_bio_info *io = &sbi->write_io[btype];
- struct bio_vec *bvec;
- struct page *target;
- int i;
-
- down_read(&io->io_rwsem);
- if (!io->bio) {
- up_read(&io->io_rwsem);
- return false;
- }
-
- bio_for_each_segment_all(bvec, io->bio, i) {
-
- if (bvec->bv_page->mapping) {
- target = bvec->bv_page;
- } else {
- struct f2fs_crypto_ctx *ctx;
-
- /* encrypted page */
- ctx = (struct f2fs_crypto_ctx *)page_private(
- bvec->bv_page);
- target = ctx->w.control_page;
- }
-
- if (page == target) {
- up_read(&io->io_rwsem);
- return true;
- }
- }
-
- up_read(&io->io_rwsem);
- return false;
+ f2fs_update_data_blkaddr(dn, new_addr);
}
void f2fs_wait_on_page_writeback(struct page *page,
- enum page_type type)
+ enum page_type type, bool ordered)
{
if (PageWriteback(page)) {
struct f2fs_sb_info *sbi = F2FS_P_SB(page);
- if (is_merged_page(sbi, page, type))
- f2fs_submit_merged_bio(sbi, type, WRITE);
- wait_on_page_writeback(page);
+ f2fs_submit_merged_bio_cond(sbi, NULL, page, 0, type, WRITE);
+ if (ordered)
+ wait_on_page_writeback(page);
+ else
+ wait_for_stable_page(page);
}
}
@@ -1477,7 +1547,7 @@ void f2fs_wait_on_encrypted_page_writeback(struct f2fs_sb_info *sbi,
cpage = find_lock_page(META_MAPPING(sbi), blkaddr);
if (cpage) {
- f2fs_wait_on_page_writeback(cpage, DATA);
+ f2fs_wait_on_page_writeback(cpage, DATA, true);
f2fs_put_page(cpage, 1);
}
}
@@ -1498,12 +1568,11 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
/* Step 1: restore nat cache */
seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
- memcpy(&seg_i->sum_blk->n_nats, kaddr, SUM_JOURNAL_SIZE);
+ memcpy(seg_i->journal, kaddr, SUM_JOURNAL_SIZE);
/* Step 2: restore sit cache */
seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
- memcpy(&seg_i->sum_blk->n_sits, kaddr + SUM_JOURNAL_SIZE,
- SUM_JOURNAL_SIZE);
+ memcpy(seg_i->journal, kaddr + SUM_JOURNAL_SIZE, SUM_JOURNAL_SIZE);
offset = 2 * SUM_JOURNAL_SIZE;
/* Step 3: restore summary entries */
@@ -1527,7 +1596,7 @@ static int read_compacted_summaries(struct f2fs_sb_info *sbi)
s = (struct f2fs_summary *)(kaddr + offset);
seg_i->sum_blk->entries[j] = *s;
offset += SUMMARY_SIZE;
- if (offset + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+ if (offset + SUMMARY_SIZE <= PAGE_SIZE -
SUM_FOOTER_SIZE)
continue;
@@ -1599,7 +1668,14 @@ static int read_normal_summaries(struct f2fs_sb_info *sbi, int type)
/* set uncompleted segment to curseg */
curseg = CURSEG_I(sbi, type);
mutex_lock(&curseg->curseg_mutex);
- memcpy(curseg->sum_blk, sum, PAGE_CACHE_SIZE);
+
+ /* update journal info */
+ down_write(&curseg->journal_rwsem);
+ memcpy(curseg->journal, &sum->journal, SUM_JOURNAL_SIZE);
+ up_write(&curseg->journal_rwsem);
+
+ memcpy(curseg->sum_blk->entries, sum->entries, SUM_ENTRY_SIZE);
+ memcpy(&curseg->sum_blk->footer, &sum->footer, SUM_FOOTER_SIZE);
curseg->next_segno = segno;
reset_curseg(sbi, type, 0);
curseg->alloc_type = ckpt->alloc_type[type];
@@ -1654,13 +1730,12 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
/* Step 1: write nat cache */
seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
- memcpy(kaddr, &seg_i->sum_blk->n_nats, SUM_JOURNAL_SIZE);
+ memcpy(kaddr, seg_i->journal, SUM_JOURNAL_SIZE);
written_size += SUM_JOURNAL_SIZE;
/* Step 2: write sit cache */
seg_i = CURSEG_I(sbi, CURSEG_COLD_DATA);
- memcpy(kaddr + written_size, &seg_i->sum_blk->n_sits,
- SUM_JOURNAL_SIZE);
+ memcpy(kaddr + written_size, seg_i->journal, SUM_JOURNAL_SIZE);
written_size += SUM_JOURNAL_SIZE;
/* Step 3: write summary entries */
@@ -1682,7 +1757,7 @@ static void write_compacted_summaries(struct f2fs_sb_info *sbi, block_t blkaddr)
*summary = seg_i->sum_blk->entries[j];
written_size += SUMMARY_SIZE;
- if (written_size + SUMMARY_SIZE <= PAGE_CACHE_SIZE -
+ if (written_size + SUMMARY_SIZE <= PAGE_SIZE -
SUM_FOOTER_SIZE)
continue;
@@ -1706,12 +1781,8 @@ static void write_normal_summaries(struct f2fs_sb_info *sbi,
else
end = type + NR_CURSEG_NODE_TYPE;
- for (i = type; i < end; i++) {
- struct curseg_info *sum = CURSEG_I(sbi, i);
- mutex_lock(&sum->curseg_mutex);
- write_sum_page(sbi, sum->sum_blk, blkaddr + (i - type));
- mutex_unlock(&sum->curseg_mutex);
- }
+ for (i = type; i < end; i++)
+ write_current_sum_page(sbi, i, blkaddr + (i - type));
}
void write_data_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
@@ -1727,24 +1798,24 @@ void write_node_summaries(struct f2fs_sb_info *sbi, block_t start_blk)
write_normal_summaries(sbi, start_blk, CURSEG_HOT_NODE);
}
-int lookup_journal_in_cursum(struct f2fs_summary_block *sum, int type,
+int lookup_journal_in_cursum(struct f2fs_journal *journal, int type,
unsigned int val, int alloc)
{
int i;
if (type == NAT_JOURNAL) {
- for (i = 0; i < nats_in_cursum(sum); i++) {
- if (le32_to_cpu(nid_in_journal(sum, i)) == val)
+ for (i = 0; i < nats_in_cursum(journal); i++) {
+ if (le32_to_cpu(nid_in_journal(journal, i)) == val)
return i;
}
- if (alloc && __has_cursum_space(sum, 1, NAT_JOURNAL))
- return update_nats_in_cursum(sum, 1);
+ if (alloc && __has_cursum_space(journal, 1, NAT_JOURNAL))
+ return update_nats_in_cursum(journal, 1);
} else if (type == SIT_JOURNAL) {
- for (i = 0; i < sits_in_cursum(sum); i++)
- if (le32_to_cpu(segno_in_journal(sum, i)) == val)
+ for (i = 0; i < sits_in_cursum(journal); i++)
+ if (le32_to_cpu(segno_in_journal(journal, i)) == val)
return i;
- if (alloc && __has_cursum_space(sum, 1, SIT_JOURNAL))
- return update_sits_in_cursum(sum, 1);
+ if (alloc && __has_cursum_space(journal, 1, SIT_JOURNAL))
+ return update_sits_in_cursum(journal, 1);
}
return -1;
}
@@ -1773,7 +1844,7 @@ static struct page *get_next_sit_page(struct f2fs_sb_info *sbi,
src_addr = page_address(src_page);
dst_addr = page_address(dst_page);
- memcpy(dst_addr, src_addr, PAGE_CACHE_SIZE);
+ memcpy(dst_addr, src_addr, PAGE_SIZE);
set_page_dirty(dst_page);
f2fs_put_page(src_page, 1);
@@ -1848,20 +1919,22 @@ static void add_sits_in_set(struct f2fs_sb_info *sbi)
static void remove_sits_in_journal(struct f2fs_sb_info *sbi)
{
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
int i;
- for (i = sits_in_cursum(sum) - 1; i >= 0; i--) {
+ down_write(&curseg->journal_rwsem);
+ for (i = 0; i < sits_in_cursum(journal); i++) {
unsigned int segno;
bool dirtied;
- segno = le32_to_cpu(segno_in_journal(sum, i));
+ segno = le32_to_cpu(segno_in_journal(journal, i));
dirtied = __mark_sit_entry_dirty(sbi, segno);
if (!dirtied)
add_sit_entry(segno, &SM_I(sbi)->sit_entry_set);
}
- update_sits_in_cursum(sum, -sits_in_cursum(sum));
+ update_sits_in_cursum(journal, -i);
+ up_write(&curseg->journal_rwsem);
}
/*
@@ -1873,13 +1946,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
struct sit_info *sit_i = SIT_I(sbi);
unsigned long *bitmap = sit_i->dirty_sentries_bitmap;
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
struct sit_entry_set *ses, *tmp;
struct list_head *head = &SM_I(sbi)->sit_entry_set;
bool to_journal = true;
struct seg_entry *se;
- mutex_lock(&curseg->curseg_mutex);
mutex_lock(&sit_i->sentry_lock);
if (!sit_i->dirty_sentries)
@@ -1896,7 +1968,7 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
* entries, remove all entries from journal and add and account
* them in sit entry set.
*/
- if (!__has_cursum_space(sum, sit_i->dirty_sentries, SIT_JOURNAL))
+ if (!__has_cursum_space(journal, sit_i->dirty_sentries, SIT_JOURNAL))
remove_sits_in_journal(sbi);
/*
@@ -1913,10 +1985,12 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
unsigned int segno = start_segno;
if (to_journal &&
- !__has_cursum_space(sum, ses->entry_cnt, SIT_JOURNAL))
+ !__has_cursum_space(journal, ses->entry_cnt, SIT_JOURNAL))
to_journal = false;
- if (!to_journal) {
+ if (to_journal) {
+ down_write(&curseg->journal_rwsem);
+ } else {
page = get_next_sit_page(sbi, start_segno);
raw_sit = page_address(page);
}
@@ -1934,13 +2008,13 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
}
if (to_journal) {
- offset = lookup_journal_in_cursum(sum,
+ offset = lookup_journal_in_cursum(journal,
SIT_JOURNAL, segno, 1);
f2fs_bug_on(sbi, offset < 0);
- segno_in_journal(sum, offset) =
+ segno_in_journal(journal, offset) =
cpu_to_le32(segno);
seg_info_to_raw_sit(se,
- &sit_in_journal(sum, offset));
+ &sit_in_journal(journal, offset));
} else {
sit_offset = SIT_ENTRY_OFFSET(sit_i, segno);
seg_info_to_raw_sit(se,
@@ -1952,7 +2026,9 @@ void flush_sit_entries(struct f2fs_sb_info *sbi, struct cp_control *cpc)
ses->entry_cnt--;
}
- if (!to_journal)
+ if (to_journal)
+ up_write(&curseg->journal_rwsem);
+ else
f2fs_put_page(page, 1);
f2fs_bug_on(sbi, ses->entry_cnt);
@@ -1967,7 +2043,6 @@ out:
add_discard_addrs(sbi, cpc);
}
mutex_unlock(&sit_i->sentry_lock);
- mutex_unlock(&curseg->curseg_mutex);
set_prefree_as_free_segments(sbi);
}
@@ -2096,9 +2171,14 @@ static int build_curseg(struct f2fs_sb_info *sbi)
for (i = 0; i < NR_CURSEG_TYPE; i++) {
mutex_init(&array[i].curseg_mutex);
- array[i].sum_blk = kzalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ array[i].sum_blk = kzalloc(PAGE_SIZE, GFP_KERNEL);
if (!array[i].sum_blk)
return -ENOMEM;
+ init_rwsem(&array[i].journal_rwsem);
+ array[i].journal = kzalloc(sizeof(struct f2fs_journal),
+ GFP_KERNEL);
+ if (!array[i].journal)
+ return -ENOMEM;
array[i].segno = NULL_SEGNO;
array[i].next_blkoff = 0;
}
@@ -2109,11 +2189,11 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
{
struct sit_info *sit_i = SIT_I(sbi);
struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_COLD_DATA);
- struct f2fs_summary_block *sum = curseg->sum_blk;
+ struct f2fs_journal *journal = curseg->journal;
int sit_blk_cnt = SIT_BLK_CNT(sbi);
unsigned int i, start, end;
unsigned int readed, start_blk = 0;
- int nrpages = MAX_BIO_BLOCKS(sbi);
+ int nrpages = MAX_BIO_BLOCKS(sbi) * 8;
do {
readed = ra_meta_pages(sbi, start_blk, nrpages, META_SIT, true);
@@ -2127,16 +2207,16 @@ static void build_sit_entries(struct f2fs_sb_info *sbi)
struct f2fs_sit_entry sit;
struct page *page;
- mutex_lock(&curseg->curseg_mutex);
- for (i = 0; i < sits_in_cursum(sum); i++) {
- if (le32_to_cpu(segno_in_journal(sum, i))
+ down_read(&curseg->journal_rwsem);
+ for (i = 0; i < sits_in_cursum(journal); i++) {
+ if (le32_to_cpu(segno_in_journal(journal, i))
== start) {
- sit = sit_in_journal(sum, i);
- mutex_unlock(&curseg->curseg_mutex);
+ sit = sit_in_journal(journal, i);
+ up_read(&curseg->journal_rwsem);
goto got_it;
}
}
- mutex_unlock(&curseg->curseg_mutex);
+ up_read(&curseg->journal_rwsem);
page = get_current_sit_page(sbi, start);
sit_blk = (struct f2fs_sit_block *)page_address(page);
@@ -2371,8 +2451,10 @@ static void destroy_curseg(struct f2fs_sb_info *sbi)
if (!array)
return;
SM_I(sbi)->curseg_array = NULL;
- for (i = 0; i < NR_CURSEG_TYPE; i++)
+ for (i = 0; i < NR_CURSEG_TYPE; i++) {
kfree(array[i].sum_blk);
+ kfree(array[i].journal);
+ }
kfree(array);
}
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index ee44d346e..975c33df6 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -183,7 +183,7 @@ struct segment_allocation {
* this value is set in page as a private data which indicate that
* the page is atomically written, and it is in inmem_pages list.
*/
-#define ATOMIC_WRITTEN_PAGE 0x0000ffff
+#define ATOMIC_WRITTEN_PAGE ((unsigned long)-1)
#define IS_ATOMIC_WRITTEN_PAGE(page) \
(page_private(page) == (unsigned long)ATOMIC_WRITTEN_PAGE)
@@ -191,6 +191,7 @@ struct segment_allocation {
struct inmem_pages {
struct list_head list;
struct page *page;
+ block_t old_addr; /* for revoking when fail to commit */
};
struct sit_info {
@@ -257,6 +258,8 @@ struct victim_selection {
struct curseg_info {
struct mutex curseg_mutex; /* lock for consistency */
struct f2fs_summary_block *sum_blk; /* cached summary block */
+ struct rw_semaphore journal_rwsem; /* protect journal area */
+ struct f2fs_journal *journal; /* cached journal info */
unsigned char alloc_type; /* current allocation type */
unsigned int segno; /* current segment number */
unsigned short next_blkoff; /* next block offset to write */
diff --git a/fs/f2fs/super.c b/fs/f2fs/super.c
index 013a62b2f..e11385bbd 100644
--- a/fs/f2fs/super.c
+++ b/fs/f2fs/super.c
@@ -126,6 +126,19 @@ static unsigned char *__struct_ptr(struct f2fs_sb_info *sbi, int struct_type)
return NULL;
}
+static ssize_t lifetime_write_kbytes_show(struct f2fs_attr *a,
+ struct f2fs_sb_info *sbi, char *buf)
+{
+ struct super_block *sb = sbi->sb;
+
+ if (!sb->s_bdev->bd_part)
+ return snprintf(buf, PAGE_SIZE, "0\n");
+
+ return snprintf(buf, PAGE_SIZE, "%llu\n",
+ (unsigned long long)(sbi->kbytes_written +
+ BD_PART_WRITTEN(sbi)));
+}
+
static ssize_t f2fs_sbi_show(struct f2fs_attr *a,
struct f2fs_sb_info *sbi, char *buf)
{
@@ -204,6 +217,9 @@ static struct f2fs_attr f2fs_attr_##_name = { \
f2fs_sbi_show, f2fs_sbi_store, \
offsetof(struct struct_name, elname))
+#define F2FS_GENERAL_RO_ATTR(name) \
+static struct f2fs_attr f2fs_attr_##name = __ATTR(name, 0444, name##_show, NULL)
+
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_min_sleep_time, min_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_max_sleep_time, max_sleep_time);
F2FS_RW_ATTR(GC_THREAD, f2fs_gc_kthread, gc_no_gc_sleep_time, no_gc_sleep_time);
@@ -216,10 +232,12 @@ F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_ipu_util, min_ipu_util);
F2FS_RW_ATTR(SM_INFO, f2fs_sm_info, min_fsync_blocks, min_fsync_blocks);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ram_thresh, ram_thresh);
F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, ra_nid_pages, ra_nid_pages);
+F2FS_RW_ATTR(NM_INFO, f2fs_nm_info, dirty_nats_ratio, dirty_nats_ratio);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, max_victim_search, max_victim_search);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, dir_level, dir_level);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, cp_interval, interval_time[CP_TIME]);
F2FS_RW_ATTR(F2FS_SBI, f2fs_sb_info, idle_interval, interval_time[REQ_TIME]);
+F2FS_GENERAL_RO_ATTR(lifetime_write_kbytes);
#define ATTR_LIST(name) (&f2fs_attr_##name.attr)
static struct attribute *f2fs_attrs[] = {
@@ -237,8 +255,10 @@ static struct attribute *f2fs_attrs[] = {
ATTR_LIST(dir_level),
ATTR_LIST(ram_thresh),
ATTR_LIST(ra_nid_pages),
+ ATTR_LIST(dirty_nats_ratio),
ATTR_LIST(cp_interval),
ATTR_LIST(idle_interval),
+ ATTR_LIST(lifetime_write_kbytes),
NULL,
};
@@ -450,10 +470,6 @@ static struct inode *f2fs_alloc_inode(struct super_block *sb)
/* Will be used by directory only */
fi->i_dir_level = F2FS_SB(sb)->dir_level;
-
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- fi->i_crypt_info = NULL;
-#endif
return &fi->vfs_inode;
}
@@ -474,7 +490,7 @@ static int f2fs_drop_inode(struct inode *inode)
/* some remained atomic pages should discarded */
if (f2fs_is_atomic_file(inode))
- commit_inmem_pages(inode, true);
+ drop_inmem_pages(inode);
/* should remain fi->extent_tree for writepage */
f2fs_destroy_extent_node(inode);
@@ -487,11 +503,7 @@ static int f2fs_drop_inode(struct inode *inode)
sb_end_intwrite(inode->i_sb);
-#ifdef CONFIG_F2FS_FS_ENCRYPTION
- if (F2FS_I(inode)->i_crypt_info)
- f2fs_free_encryption_info(inode,
- F2FS_I(inode)->i_crypt_info);
-#endif
+ fscrypt_put_encryption_info(inode, NULL);
spin_lock(&inode->i_lock);
atomic_dec(&inode->i_count);
}
@@ -562,6 +574,10 @@ static void f2fs_put_super(struct super_block *sb)
f2fs_leave_shrinker(sbi);
mutex_unlock(&sbi->umount_mutex);
+ /* our cp_error case, we can wait for any writeback page */
+ if (get_pages(sbi, F2FS_WRITEBACK))
+ f2fs_flush_merged_bios(sbi);
+
iput(sbi->node_inode);
iput(sbi->meta_inode);
@@ -574,6 +590,8 @@ static void f2fs_put_super(struct super_block *sb)
wait_for_completion(&sbi->s_kobj_unregister);
sb->s_fs_info = NULL;
+ if (sbi->s_chksum_driver)
+ crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi->raw_super);
kfree(sbi);
}
@@ -766,8 +784,6 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
bool need_stop_gc = false;
bool no_extent_cache = !test_opt(sbi, EXTENT_CACHE);
- sync_filesystem(sb);
-
/*
* Save the old mount options in case we
* need to restore them.
@@ -775,6 +791,13 @@ static int f2fs_remount(struct super_block *sb, int *flags, char *data)
org_mount_opt = sbi->mount_opt;
active_logs = sbi->active_logs;
+ if (*flags & MS_RDONLY) {
+ set_opt(sbi, FASTBOOT);
+ set_sbi_flag(sbi, SBI_IS_DIRTY);
+ }
+
+ sync_filesystem(sb);
+
sbi->mount_opt.opt = 0;
default_options(sbi);
@@ -862,6 +885,48 @@ static struct super_operations f2fs_sops = {
.remount_fs = f2fs_remount,
};
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+static int f2fs_get_context(struct inode *inode, void *ctx, size_t len)
+{
+ return f2fs_getxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
+ ctx, len, NULL);
+}
+
+static int f2fs_key_prefix(struct inode *inode, u8 **key)
+{
+ *key = F2FS_I_SB(inode)->key_prefix;
+ return F2FS_I_SB(inode)->key_prefix_size;
+}
+
+static int f2fs_set_context(struct inode *inode, const void *ctx, size_t len,
+ void *fs_data)
+{
+ return f2fs_setxattr(inode, F2FS_XATTR_INDEX_ENCRYPTION,
+ F2FS_XATTR_NAME_ENCRYPTION_CONTEXT,
+ ctx, len, fs_data, XATTR_CREATE);
+}
+
+static unsigned f2fs_max_namelen(struct inode *inode)
+{
+ return S_ISLNK(inode->i_mode) ?
+ inode->i_sb->s_blocksize : F2FS_NAME_LEN;
+}
+
+static struct fscrypt_operations f2fs_cryptops = {
+ .get_context = f2fs_get_context,
+ .key_prefix = f2fs_key_prefix,
+ .set_context = f2fs_set_context,
+ .is_encrypted = f2fs_encrypted_inode,
+ .empty_dir = f2fs_empty_dir,
+ .max_namelen = f2fs_max_namelen,
+};
+#else
+static struct fscrypt_operations f2fs_cryptops = {
+ .is_encrypted = f2fs_encrypted_inode,
+};
+#endif
+
static struct inode *f2fs_nfs_get_inode(struct super_block *sb,
u64 ino, u32 generation)
{
@@ -1055,10 +1120,10 @@ static int sanity_check_raw_super(struct super_block *sb,
}
/* Currently, support only 4KB page cache size */
- if (F2FS_BLKSIZE != PAGE_CACHE_SIZE) {
+ if (F2FS_BLKSIZE != PAGE_SIZE) {
f2fs_msg(sb, KERN_INFO,
"Invalid page_cache_size (%lu), supports only 4KB\n",
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
return 1;
}
@@ -1117,7 +1182,7 @@ static int sanity_check_raw_super(struct super_block *sb,
return 0;
}
-static int sanity_check_ckpt(struct f2fs_sb_info *sbi)
+int sanity_check_ckpt(struct f2fs_sb_info *sbi)
{
unsigned int total, fsmeta;
struct f2fs_super_block *raw_super = F2FS_RAW_SUPER(sbi);
@@ -1173,6 +1238,12 @@ static void init_sb_info(struct f2fs_sb_info *sbi)
INIT_LIST_HEAD(&sbi->s_list);
mutex_init(&sbi->umount_mutex);
+
+#ifdef CONFIG_F2FS_FS_ENCRYPTION
+ memcpy(sbi->key_prefix, F2FS_KEY_DESC_PREFIX,
+ F2FS_KEY_DESC_PREFIX_SIZE);
+ sbi->key_prefix_size = F2FS_KEY_DESC_PREFIX_SIZE;
+#endif
}
/*
@@ -1269,6 +1340,7 @@ static int f2fs_fill_super(struct super_block *sb, void *data, int silent)
bool retry = true, need_fsck = false;
char *options = NULL;
int recovery, i, valid_super_block;
+ struct curseg_info *seg_i;
try_onemore:
err = -EINVAL;
@@ -1281,6 +1353,15 @@ try_onemore:
if (!sbi)
return -ENOMEM;
+ /* Load the checksum driver */
+ sbi->s_chksum_driver = crypto_alloc_shash("crc32", 0, 0);
+ if (IS_ERR(sbi->s_chksum_driver)) {
+ f2fs_msg(sb, KERN_ERR, "Cannot load crc32 driver.");
+ err = PTR_ERR(sbi->s_chksum_driver);
+ sbi->s_chksum_driver = NULL;
+ goto free_sbi;
+ }
+
/* set a block size */
if (unlikely(!sb_set_blocksize(sb, F2FS_BLKSIZE))) {
f2fs_msg(sb, KERN_ERR, "unable to set blocksize");
@@ -1312,6 +1393,7 @@ try_onemore:
get_random_bytes(&sbi->s_next_generation, sizeof(u32));
sb->s_op = &f2fs_sops;
+ sb->s_cop = &f2fs_cryptops;
sb->s_xattr = f2fs_xattr_handlers;
sb->s_export_op = &f2fs_export_ops;
sb->s_magic = F2FS_SUPER_MAGIC;
@@ -1360,13 +1442,6 @@ try_onemore:
goto free_meta_inode;
}
- /* sanity checking of checkpoint */
- err = -EINVAL;
- if (sanity_check_ckpt(sbi)) {
- f2fs_msg(sb, KERN_ERR, "Invalid F2FS checkpoint");
- goto free_cp;
- }
-
sbi->total_valid_node_count =
le32_to_cpu(sbi->ckpt->valid_node_count);
sbi->total_valid_inode_count =
@@ -1399,6 +1474,17 @@ try_onemore:
goto free_nm;
}
+ /* For write statistics */
+ if (sb->s_bdev->bd_part)
+ sbi->sectors_written_start =
+ (u64)part_stat_read(sb->s_bdev->bd_part, sectors[1]);
+
+ /* Read accumulated write IO statistics if exists */
+ seg_i = CURSEG_I(sbi, CURSEG_HOT_NODE);
+ if (__exist_node_summaries(sbi))
+ sbi->kbytes_written =
+ le64_to_cpu(seg_i->journal->info.kbytes_written);
+
build_gc_manager(sbi);
/* get an inode for node space */
@@ -1493,8 +1579,10 @@ try_onemore:
/* recover broken superblock */
if (recovery && !f2fs_readonly(sb) && !bdev_read_only(sb->s_bdev)) {
- f2fs_msg(sb, KERN_INFO, "Recover invalid superblock");
- f2fs_commit_super(sbi, true);
+ err = f2fs_commit_super(sbi, true);
+ f2fs_msg(sb, KERN_INFO,
+ "Try to recover %dth superblock, ret: %ld",
+ sbi->valid_super_block ? 1 : 2, err);
}
f2fs_update_time(sbi, CP_TIME);
@@ -1523,7 +1611,6 @@ free_nm:
destroy_node_manager(sbi);
free_sm:
destroy_segment_manager(sbi);
-free_cp:
kfree(sbi->ckpt);
free_meta_inode:
make_bad_inode(sbi->meta_inode);
@@ -1533,6 +1620,8 @@ free_options:
free_sb_buf:
kfree(raw_super);
free_sbi:
+ if (sbi->s_chksum_driver)
+ crypto_free_shash(sbi->s_chksum_driver);
kfree(sbi);
/* give only one another chance */
@@ -1612,13 +1701,9 @@ static int __init init_f2fs_fs(void)
err = -ENOMEM;
goto free_extent_cache;
}
- err = f2fs_init_crypto();
- if (err)
- goto free_kset;
-
err = register_shrinker(&f2fs_shrinker_info);
if (err)
- goto free_crypto;
+ goto free_kset;
err = register_filesystem(&f2fs_fs_type);
if (err)
@@ -1633,8 +1718,6 @@ free_filesystem:
unregister_filesystem(&f2fs_fs_type);
free_shrinker:
unregister_shrinker(&f2fs_shrinker_info);
-free_crypto:
- f2fs_exit_crypto();
free_kset:
kset_unregister(f2fs_kset);
free_extent_cache:
@@ -1657,7 +1740,6 @@ static void __exit exit_f2fs_fs(void)
f2fs_destroy_root_stats();
unregister_shrinker(&f2fs_shrinker_info);
unregister_filesystem(&f2fs_fs_type);
- f2fs_exit_crypto();
destroy_extent_cache();
destroy_checkpoint_caches();
destroy_segment_manager_caches();
diff --git a/fs/f2fs/trace.c b/fs/f2fs/trace.c
index 145fb659a..562ce0821 100644
--- a/fs/f2fs/trace.c
+++ b/fs/f2fs/trace.c
@@ -29,7 +29,8 @@ static inline void __print_last_io(void)
last_io.major, last_io.minor,
last_io.pid, "----------------",
last_io.type,
- last_io.fio.rw, last_io.fio.blk_addr,
+ last_io.fio.rw,
+ last_io.fio.new_blkaddr,
last_io.len);
memset(&last_io, 0, sizeof(last_io));
}
@@ -101,7 +102,8 @@ void f2fs_trace_ios(struct f2fs_io_info *fio, int flush)
last_io.pid == pid &&
last_io.type == __file_type(inode, pid) &&
last_io.fio.rw == fio->rw &&
- last_io.fio.blk_addr + last_io.len == fio->blk_addr) {
+ last_io.fio.new_blkaddr + last_io.len ==
+ fio->new_blkaddr) {
last_io.len++;
return;
}
diff --git a/fs/f2fs/xattr.c b/fs/f2fs/xattr.c
index 10f1e784f..06a72dc01 100644
--- a/fs/f2fs/xattr.c
+++ b/fs/f2fs/xattr.c
@@ -300,7 +300,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
if (ipage) {
inline_addr = inline_xattr_addr(ipage);
- f2fs_wait_on_page_writeback(ipage, NODE);
+ f2fs_wait_on_page_writeback(ipage, NODE, true);
} else {
page = get_node_page(sbi, inode->i_ino);
if (IS_ERR(page)) {
@@ -308,7 +308,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
return PTR_ERR(page);
}
inline_addr = inline_xattr_addr(page);
- f2fs_wait_on_page_writeback(page, NODE);
+ f2fs_wait_on_page_writeback(page, NODE, true);
}
memcpy(inline_addr, txattr_addr, inline_size);
f2fs_put_page(page, 1);
@@ -329,7 +329,7 @@ static inline int write_all_xattrs(struct inode *inode, __u32 hsize,
return PTR_ERR(xpage);
}
f2fs_bug_on(sbi, new_nid);
- f2fs_wait_on_page_writeback(xpage, NODE);
+ f2fs_wait_on_page_writeback(xpage, NODE, true);
} else {
struct dnode_of_data dn;
set_new_dnode(&dn, inode, NULL, NULL, new_nid);
diff --git a/fs/f2fs/xattr.h b/fs/f2fs/xattr.h
index 79dccc825..f990de20c 100644
--- a/fs/f2fs/xattr.h
+++ b/fs/f2fs/xattr.h
@@ -126,7 +126,8 @@ extern ssize_t f2fs_listxattr(struct dentry *, char *, size_t);
#define f2fs_xattr_handlers NULL
static inline int f2fs_setxattr(struct inode *inode, int index,
- const char *name, const void *value, size_t size, int flags)
+ const char *name, const void *value, size_t size,
+ struct page *page, int flags)
{
return -EOPNOTSUPP;
}
diff --git a/fs/fat/Kconfig b/fs/fat/Kconfig
index 182f9ffe2..3ff1772f6 100644
--- a/fs/fat/Kconfig
+++ b/fs/fat/Kconfig
@@ -93,8 +93,24 @@ config FAT_DEFAULT_IOCHARSET
that most of your FAT filesystems use, and can be overridden
with the "iocharset" mount option for FAT filesystems.
Note that "utf8" is not recommended for FAT filesystems.
- If unsure, you shouldn't set "utf8" here.
+ If unsure, you shouldn't set "utf8" here - select the next option
+ instead if you would like to use UTF-8 encoded file names by default.
See <file:Documentation/filesystems/vfat.txt> for more information.
Enable any character sets you need in File Systems/Native Language
Support.
+
+config FAT_DEFAULT_UTF8
+ bool "Enable FAT UTF-8 option by default"
+ depends on VFAT_FS
+ default n
+ help
+ Set this if you would like to have "utf8" mount option set
+ by default when mounting FAT filesystems.
+
+ Even if you say Y here can always disable UTF-8 for
+ particular mount by adding "utf8=0" to mount options.
+
+ Say Y if you use UTF-8 encoding for file names, N otherwise.
+
+ See <file:Documentation/filesystems/vfat.txt> for more information.
diff --git a/fs/fat/inode.c b/fs/fat/inode.c
index a55990521..226281068 100644
--- a/fs/fat/inode.c
+++ b/fs/fat/inode.c
@@ -1127,7 +1127,7 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
}
opts->name_check = 'n';
opts->quiet = opts->showexec = opts->sys_immutable = opts->dotsOK = 0;
- opts->utf8 = opts->unicode_xlate = 0;
+ opts->unicode_xlate = 0;
opts->numtail = 1;
opts->usefree = opts->nocase = 0;
opts->tz_set = 0;
@@ -1135,6 +1135,8 @@ static int parse_options(struct super_block *sb, char *options, int is_vfat,
opts->errors = FAT_ERRORS_RO;
*debug = 0;
+ opts->utf8 = IS_ENABLED(CONFIG_FAT_DEFAULT_UTF8) && is_vfat;
+
if (!options)
goto out;
diff --git a/fs/freevxfs/vxfs_immed.c b/fs/freevxfs/vxfs_immed.c
index cb84f0fcc..bfc780c68 100644
--- a/fs/freevxfs/vxfs_immed.c
+++ b/fs/freevxfs/vxfs_immed.c
@@ -66,11 +66,11 @@ static int
vxfs_immed_readpage(struct file *fp, struct page *pp)
{
struct vxfs_inode_info *vip = VXFS_INO(pp->mapping->host);
- u_int64_t offset = (u_int64_t)pp->index << PAGE_CACHE_SHIFT;
+ u_int64_t offset = (u_int64_t)pp->index << PAGE_SHIFT;
caddr_t kaddr;
kaddr = kmap(pp);
- memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_CACHE_SIZE);
+ memcpy(kaddr, vip->vii_immed.vi_immed + offset, PAGE_SIZE);
kunmap(pp);
flush_dcache_page(pp);
diff --git a/fs/freevxfs/vxfs_lookup.c b/fs/freevxfs/vxfs_lookup.c
index 1cff72df0..a49e0cfbb 100644
--- a/fs/freevxfs/vxfs_lookup.c
+++ b/fs/freevxfs/vxfs_lookup.c
@@ -45,7 +45,7 @@
/*
* Number of VxFS blocks per page.
*/
-#define VXFS_BLOCK_PER_PAGE(sbp) ((PAGE_CACHE_SIZE / (sbp)->s_blocksize))
+#define VXFS_BLOCK_PER_PAGE(sbp) ((PAGE_SIZE / (sbp)->s_blocksize))
static struct dentry * vxfs_lookup(struct inode *, struct dentry *, unsigned int);
@@ -175,7 +175,7 @@ vxfs_inode_by_name(struct inode *dip, struct dentry *dp)
if (de) {
ino = de->d_ino;
kunmap(pp);
- page_cache_release(pp);
+ put_page(pp);
}
return (ino);
@@ -255,8 +255,8 @@ vxfs_readdir(struct file *fp, struct dir_context *ctx)
nblocks = dir_blocks(ip);
pblocks = VXFS_BLOCK_PER_PAGE(sbp);
- page = pos >> PAGE_CACHE_SHIFT;
- offset = pos & ~PAGE_CACHE_MASK;
+ page = pos >> PAGE_SHIFT;
+ offset = pos & ~PAGE_MASK;
block = (u_long)(pos >> sbp->s_blocksize_bits) % pblocks;
for (; page < npages; page++, block = 0) {
@@ -289,7 +289,7 @@ vxfs_readdir(struct file *fp, struct dir_context *ctx)
continue;
offset = (char *)de - kaddr;
- ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2;
+ ctx->pos = ((page << PAGE_SHIFT) | offset) + 2;
if (!dir_emit(ctx, de->d_name, de->d_namelen,
de->d_ino, DT_UNKNOWN)) {
vxfs_put_page(pp);
@@ -301,6 +301,6 @@ vxfs_readdir(struct file *fp, struct dir_context *ctx)
vxfs_put_page(pp);
offset = 0;
}
- ctx->pos = ((page << PAGE_CACHE_SHIFT) | offset) + 2;
+ ctx->pos = ((page << PAGE_SHIFT) | offset) + 2;
return 0;
}
diff --git a/fs/freevxfs/vxfs_subr.c b/fs/freevxfs/vxfs_subr.c
index 5d318c44f..e806694d4 100644
--- a/fs/freevxfs/vxfs_subr.c
+++ b/fs/freevxfs/vxfs_subr.c
@@ -50,7 +50,7 @@ inline void
vxfs_put_page(struct page *pp)
{
kunmap(pp);
- page_cache_release(pp);
+ put_page(pp);
}
/**
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index fee81e876..592cea54c 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -33,7 +33,7 @@
/*
* 4MB minimal write chunk size
*/
-#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_CACHE_SHIFT - 10))
+#define MIN_WRITEBACK_PAGES (4096UL >> (PAGE_SHIFT - 10))
struct wb_completion {
atomic_t cnt;
diff --git a/fs/fscache/page.c b/fs/fscache/page.c
index 6b35fc486..3078b679f 100644
--- a/fs/fscache/page.c
+++ b/fs/fscache/page.c
@@ -113,7 +113,7 @@ try_again:
wake_up_bit(&cookie->flags, 0);
if (xpage)
- page_cache_release(xpage);
+ put_page(xpage);
__fscache_uncache_page(cookie, page);
return true;
@@ -164,7 +164,7 @@ static void fscache_end_page_write(struct fscache_object *object,
}
spin_unlock(&object->lock);
if (xpage)
- page_cache_release(xpage);
+ put_page(xpage);
}
/*
@@ -884,7 +884,7 @@ void fscache_invalidate_writes(struct fscache_cookie *cookie)
spin_unlock(&cookie->stores_lock);
for (i = n - 1; i >= 0; i--)
- page_cache_release(results[i]);
+ put_page(results[i]);
}
_leave("");
@@ -982,7 +982,7 @@ int __fscache_write_page(struct fscache_cookie *cookie,
radix_tree_tag_set(&cookie->stores, page->index,
FSCACHE_COOKIE_PENDING_TAG);
- page_cache_get(page);
+ get_page(page);
/* we only want one writer at a time, but we do need to queue new
* writers after exclusive ops */
@@ -1026,7 +1026,7 @@ submit_failed:
radix_tree_delete(&cookie->stores, page->index);
spin_unlock(&cookie->stores_lock);
wake_cookie = __fscache_unuse_cookie(cookie);
- page_cache_release(page);
+ put_page(page);
ret = -ENOBUFS;
goto nobufs;
diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index ebb5e3745..cbece1221 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -897,7 +897,7 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
return err;
}
- page_cache_get(newpage);
+ get_page(newpage);
if (!(buf->flags & PIPE_BUF_FLAG_LRU))
lru_cache_add_file(newpage);
@@ -912,12 +912,12 @@ static int fuse_try_move_page(struct fuse_copy_state *cs, struct page **pagep)
if (err) {
unlock_page(newpage);
- page_cache_release(newpage);
+ put_page(newpage);
return err;
}
unlock_page(oldpage);
- page_cache_release(oldpage);
+ put_page(oldpage);
cs->len = 0;
return 0;
@@ -951,7 +951,7 @@ static int fuse_ref_page(struct fuse_copy_state *cs, struct page *page,
fuse_copy_finish(cs);
buf = cs->pipebufs;
- page_cache_get(page);
+ get_page(page);
buf->page = page;
buf->offset = offset;
buf->len = count;
@@ -1435,7 +1435,7 @@ out_unlock:
out:
for (; page_nr < cs.nr_segs; page_nr++)
- page_cache_release(bufs[page_nr].page);
+ put_page(bufs[page_nr].page);
kfree(bufs);
return ret;
@@ -1632,8 +1632,8 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
goto out_up_killsb;
mapping = inode->i_mapping;
- index = outarg.offset >> PAGE_CACHE_SHIFT;
- offset = outarg.offset & ~PAGE_CACHE_MASK;
+ index = outarg.offset >> PAGE_SHIFT;
+ offset = outarg.offset & ~PAGE_MASK;
file_size = i_size_read(inode);
end = outarg.offset + outarg.size;
if (end > file_size) {
@@ -1652,13 +1652,13 @@ static int fuse_notify_store(struct fuse_conn *fc, unsigned int size,
if (!page)
goto out_iput;
- this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+ this_num = min_t(unsigned, num, PAGE_SIZE - offset);
err = fuse_copy_page(cs, &page, offset, this_num, 0);
if (!err && offset == 0 &&
- (this_num == PAGE_CACHE_SIZE || file_size == end))
+ (this_num == PAGE_SIZE || file_size == end))
SetPageUptodate(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (err)
goto out_iput;
@@ -1697,7 +1697,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
size_t total_len = 0;
int num_pages;
- offset = outarg->offset & ~PAGE_CACHE_MASK;
+ offset = outarg->offset & ~PAGE_MASK;
file_size = i_size_read(inode);
num = outarg->size;
@@ -1720,7 +1720,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
req->page_descs[0].offset = offset;
req->end = fuse_retrieve_end;
- index = outarg->offset >> PAGE_CACHE_SHIFT;
+ index = outarg->offset >> PAGE_SHIFT;
while (num && req->num_pages < num_pages) {
struct page *page;
@@ -1730,7 +1730,7 @@ static int fuse_retrieve(struct fuse_conn *fc, struct inode *inode,
if (!page)
break;
- this_num = min_t(unsigned, num, PAGE_CACHE_SIZE - offset);
+ this_num = min_t(unsigned, num, PAGE_SIZE - offset);
req->pages[req->num_pages] = page;
req->page_descs[req->num_pages].length = this_num;
req->num_pages++;
diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index 416108b42..dcad5e210 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -348,7 +348,7 @@ static bool fuse_range_is_writeback(struct inode *inode, pgoff_t idx_from,
pgoff_t curr_index;
BUG_ON(req->inode != inode);
- curr_index = req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+ curr_index = req->misc.write.in.offset >> PAGE_SHIFT;
if (idx_from < curr_index + req->num_pages &&
curr_index <= idx_to) {
found = true;
@@ -683,11 +683,11 @@ static void fuse_short_read(struct fuse_req *req, struct inode *inode,
* present there.
*/
int i;
- int start_idx = num_read >> PAGE_CACHE_SHIFT;
- size_t off = num_read & (PAGE_CACHE_SIZE - 1);
+ int start_idx = num_read >> PAGE_SHIFT;
+ size_t off = num_read & (PAGE_SIZE - 1);
for (i = start_idx; i < req->num_pages; i++) {
- zero_user_segment(req->pages[i], off, PAGE_CACHE_SIZE);
+ zero_user_segment(req->pages[i], off, PAGE_SIZE);
off = 0;
}
} else {
@@ -704,7 +704,7 @@ static int fuse_do_readpage(struct file *file, struct page *page)
struct fuse_req *req;
size_t num_read;
loff_t pos = page_offset(page);
- size_t count = PAGE_CACHE_SIZE;
+ size_t count = PAGE_SIZE;
u64 attr_ver;
int err;
@@ -789,7 +789,7 @@ static void fuse_readpages_end(struct fuse_conn *fc, struct fuse_req *req)
else
SetPageError(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
if (req->ff)
fuse_file_put(req->ff, false);
@@ -800,7 +800,7 @@ static void fuse_send_readpages(struct fuse_req *req, struct file *file)
struct fuse_file *ff = file->private_data;
struct fuse_conn *fc = ff->fc;
loff_t pos = page_offset(req->pages[0]);
- size_t count = req->num_pages << PAGE_CACHE_SHIFT;
+ size_t count = req->num_pages << PAGE_SHIFT;
req->out.argpages = 1;
req->out.page_zeroing = 1;
@@ -836,7 +836,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
if (req->num_pages &&
(req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
- (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_read ||
+ (req->num_pages + 1) * PAGE_SIZE > fc->max_read ||
req->pages[req->num_pages - 1]->index + 1 != page->index)) {
int nr_alloc = min_t(unsigned, data->nr_pages,
FUSE_MAX_PAGES_PER_REQ);
@@ -858,7 +858,7 @@ static int fuse_readpages_fill(void *_data, struct page *page)
return -EIO;
}
- page_cache_get(page);
+ get_page(page);
req->pages[req->num_pages] = page;
req->page_descs[req->num_pages].length = PAGE_SIZE;
req->num_pages++;
@@ -1003,17 +1003,17 @@ static size_t fuse_send_write_pages(struct fuse_req *req, struct file *file,
for (i = 0; i < req->num_pages; i++) {
struct page *page = req->pages[i];
- if (!req->out.h.error && !offset && count >= PAGE_CACHE_SIZE)
+ if (!req->out.h.error && !offset && count >= PAGE_SIZE)
SetPageUptodate(page);
- if (count > PAGE_CACHE_SIZE - offset)
- count -= PAGE_CACHE_SIZE - offset;
+ if (count > PAGE_SIZE - offset)
+ count -= PAGE_SIZE - offset;
else
count = 0;
offset = 0;
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
return res;
@@ -1024,7 +1024,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
struct iov_iter *ii, loff_t pos)
{
struct fuse_conn *fc = get_fuse_conn(mapping->host);
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned offset = pos & (PAGE_SIZE - 1);
size_t count = 0;
int err;
@@ -1034,8 +1034,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
do {
size_t tmp;
struct page *page;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- size_t bytes = min_t(size_t, PAGE_CACHE_SIZE - offset,
+ pgoff_t index = pos >> PAGE_SHIFT;
+ size_t bytes = min_t(size_t, PAGE_SIZE - offset,
iov_iter_count(ii));
bytes = min_t(size_t, bytes, fc->max_write - count);
@@ -1059,7 +1059,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
iov_iter_advance(ii, tmp);
if (!tmp) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
bytes = min(bytes, iov_iter_single_seg_count(ii));
goto again;
}
@@ -1072,7 +1072,7 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
count += tmp;
pos += tmp;
offset += tmp;
- if (offset == PAGE_CACHE_SIZE)
+ if (offset == PAGE_SIZE)
offset = 0;
if (!fc->big_writes)
@@ -1086,8 +1086,8 @@ static ssize_t fuse_fill_write_pages(struct fuse_req *req,
static inline unsigned fuse_wr_pages(loff_t pos, size_t len)
{
return min_t(unsigned,
- ((pos + len - 1) >> PAGE_CACHE_SHIFT) -
- (pos >> PAGE_CACHE_SHIFT) + 1,
+ ((pos + len - 1) >> PAGE_SHIFT) -
+ (pos >> PAGE_SHIFT) + 1,
FUSE_MAX_PAGES_PER_REQ);
}
@@ -1205,8 +1205,8 @@ static ssize_t fuse_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
invalidate_mapping_pages(file->f_mapping,
- pos >> PAGE_CACHE_SHIFT,
- endbyte >> PAGE_CACHE_SHIFT);
+ pos >> PAGE_SHIFT,
+ endbyte >> PAGE_SHIFT);
written += written_buffered;
iocb->ki_pos = pos + written_buffered;
@@ -1247,6 +1247,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
size_t *nbytesp, int write)
{
size_t nbytes = 0; /* # bytes already packed in req */
+ ssize_t ret = 0;
/* Special case for kernel I/O: can copy directly into the buffer */
if (ii->type & ITER_KVEC) {
@@ -1266,13 +1267,12 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
while (nbytes < *nbytesp && req->num_pages < req->max_pages) {
unsigned npages;
size_t start;
- ssize_t ret = iov_iter_get_pages(ii,
- &req->pages[req->num_pages],
+ ret = iov_iter_get_pages(ii, &req->pages[req->num_pages],
*nbytesp - nbytes,
req->max_pages - req->num_pages,
&start);
if (ret < 0)
- return ret;
+ break;
iov_iter_advance(ii, ret);
nbytes += ret;
@@ -1295,7 +1295,7 @@ static int fuse_get_user_pages(struct fuse_req *req, struct iov_iter *ii,
*nbytesp = nbytes;
- return 0;
+ return ret < 0 ? ret : 0;
}
static inline int fuse_iter_npages(const struct iov_iter *ii_p)
@@ -1315,10 +1315,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
size_t nmax = write ? fc->max_write : fc->max_read;
loff_t pos = *ppos;
size_t count = iov_iter_count(iter);
- pgoff_t idx_from = pos >> PAGE_CACHE_SHIFT;
- pgoff_t idx_to = (pos + count - 1) >> PAGE_CACHE_SHIFT;
+ pgoff_t idx_from = pos >> PAGE_SHIFT;
+ pgoff_t idx_to = (pos + count - 1) >> PAGE_SHIFT;
ssize_t res = 0;
struct fuse_req *req;
+ int err = 0;
if (io->async)
req = fuse_get_req_for_background(fc, fuse_iter_npages(iter));
@@ -1339,11 +1340,9 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
size_t nres;
fl_owner_t owner = current->files;
size_t nbytes = min(count, nmax);
- int err = fuse_get_user_pages(req, iter, &nbytes, write);
- if (err) {
- res = err;
+ err = fuse_get_user_pages(req, iter, &nbytes, write);
+ if (err && !nbytes)
break;
- }
if (write)
nres = fuse_send_write(req, io, pos, nbytes, owner);
@@ -1353,11 +1352,11 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (!io->async)
fuse_release_user_pages(req, !write);
if (req->out.h.error) {
- if (!res)
- res = req->out.h.error;
+ err = req->out.h.error;
break;
} else if (nres > nbytes) {
- res = -EIO;
+ res = 0;
+ err = -EIO;
break;
}
count -= nres;
@@ -1381,7 +1380,7 @@ ssize_t fuse_direct_io(struct fuse_io_priv *io, struct iov_iter *iter,
if (res > 0)
*ppos = pos;
- return res;
+ return res > 0 ? res : err;
}
EXPORT_SYMBOL_GPL(fuse_direct_io);
@@ -1467,7 +1466,7 @@ __acquires(fc->lock)
{
struct fuse_inode *fi = get_fuse_inode(req->inode);
struct fuse_write_in *inarg = &req->misc.write.in;
- __u64 data_size = req->num_pages * PAGE_CACHE_SIZE;
+ __u64 data_size = req->num_pages * PAGE_SIZE;
if (!fc->connected)
goto out_free;
@@ -1728,7 +1727,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
list_del(&new_req->writepages_entry);
list_for_each_entry(old_req, &fi->writepages, writepages_entry) {
BUG_ON(old_req->inode != new_req->inode);
- curr_index = old_req->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+ curr_index = old_req->misc.write.in.offset >> PAGE_SHIFT;
if (curr_index <= page->index &&
page->index < curr_index + old_req->num_pages) {
found = true;
@@ -1743,7 +1742,7 @@ static bool fuse_writepage_in_flight(struct fuse_req *new_req,
new_req->num_pages = 1;
for (tmp = old_req; tmp != NULL; tmp = tmp->misc.write.next) {
BUG_ON(tmp->inode != new_req->inode);
- curr_index = tmp->misc.write.in.offset >> PAGE_CACHE_SHIFT;
+ curr_index = tmp->misc.write.in.offset >> PAGE_SHIFT;
if (tmp->num_pages == 1 &&
curr_index == page->index) {
old_req = tmp;
@@ -1800,7 +1799,7 @@ static int fuse_writepages_fill(struct page *page,
if (req && req->num_pages &&
(is_writeback || req->num_pages == FUSE_MAX_PAGES_PER_REQ ||
- (req->num_pages + 1) * PAGE_CACHE_SIZE > fc->max_write ||
+ (req->num_pages + 1) * PAGE_SIZE > fc->max_write ||
data->orig_pages[req->num_pages - 1]->index + 1 != page->index)) {
fuse_writepages_send(data);
data->req = NULL;
@@ -1925,7 +1924,7 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
struct fuse_conn *fc = get_fuse_conn(file_inode(file));
struct page *page;
loff_t fsize;
@@ -1939,15 +1938,15 @@ static int fuse_write_begin(struct file *file, struct address_space *mapping,
fuse_wait_on_page_writeback(mapping->host, page->index);
- if (PageUptodate(page) || len == PAGE_CACHE_SIZE)
+ if (PageUptodate(page) || len == PAGE_SIZE)
goto success;
/*
* Check if the start this page comes after the end of file, in which
* case the readpage can be optimized away.
*/
fsize = i_size_read(mapping->host);
- if (fsize <= (pos & PAGE_CACHE_MASK)) {
- size_t off = pos & ~PAGE_CACHE_MASK;
+ if (fsize <= (pos & PAGE_MASK)) {
+ size_t off = pos & ~PAGE_MASK;
if (off)
zero_user_segment(page, 0, off);
goto success;
@@ -1961,7 +1960,7 @@ success:
cleanup:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
error:
return err;
}
@@ -1974,16 +1973,16 @@ static int fuse_write_end(struct file *file, struct address_space *mapping,
if (!PageUptodate(page)) {
/* Zero any unwritten bytes at the end of the page */
- size_t endoff = (pos + copied) & ~PAGE_CACHE_MASK;
+ size_t endoff = (pos + copied) & ~PAGE_MASK;
if (endoff)
- zero_user_segment(page, endoff, PAGE_CACHE_SIZE);
+ zero_user_segment(page, endoff, PAGE_SIZE);
SetPageUptodate(page);
}
fuse_write_update_size(inode, pos + copied);
set_page_dirty(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return copied;
}
diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4d69d5c0b..1ce67668a 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -339,11 +339,11 @@ int fuse_reverse_inval_inode(struct super_block *sb, u64 nodeid,
fuse_invalidate_attr(inode);
if (offset >= 0) {
- pg_start = offset >> PAGE_CACHE_SHIFT;
+ pg_start = offset >> PAGE_SHIFT;
if (len <= 0)
pg_end = -1;
else
- pg_end = (offset + len - 1) >> PAGE_CACHE_SHIFT;
+ pg_end = (offset + len - 1) >> PAGE_SHIFT;
invalidate_inode_pages2_range(inode->i_mapping,
pg_start, pg_end);
}
@@ -864,7 +864,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
process_init_limits(fc, arg);
if (arg->minor >= 6) {
- ra_pages = arg->max_readahead / PAGE_CACHE_SIZE;
+ ra_pages = arg->max_readahead / PAGE_SIZE;
if (arg->flags & FUSE_ASYNC_READ)
fc->async_read = 1;
if (!(arg->flags & FUSE_POSIX_LOCKS))
@@ -901,7 +901,7 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
} else {
- ra_pages = fc->max_read / PAGE_CACHE_SIZE;
+ ra_pages = fc->max_read / PAGE_SIZE;
fc->no_lock = 1;
fc->no_flock = 1;
}
@@ -922,7 +922,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
arg->major = FUSE_KERNEL_VERSION;
arg->minor = FUSE_KERNEL_MINOR_VERSION;
- arg->max_readahead = fc->bdi.ra_pages * PAGE_CACHE_SIZE;
+ arg->max_readahead = fc->bdi.ra_pages * PAGE_SIZE;
arg->flags |= FUSE_ASYNC_READ | FUSE_POSIX_LOCKS | FUSE_ATOMIC_O_TRUNC |
FUSE_EXPORT_SUPPORT | FUSE_BIG_WRITES | FUSE_DONT_MASK |
FUSE_SPLICE_WRITE | FUSE_SPLICE_MOVE | FUSE_SPLICE_READ |
@@ -955,7 +955,7 @@ static int fuse_bdi_init(struct fuse_conn *fc, struct super_block *sb)
int err;
fc->bdi.name = "fuse";
- fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE;
+ fc->bdi.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_SIZE;
/* fuse does it's own writeback accounting */
fc->bdi.capabilities = BDI_CAP_NO_ACCT_WB | BDI_CAP_STRICTLIMIT;
@@ -1053,8 +1053,8 @@ static int fuse_fill_super(struct super_block *sb, void *data, int silent)
goto err;
#endif
} else {
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
}
sb->s_magic = FUSE_SUPER_MAGIC;
sb->s_op = &fuse_super_operations;
diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c
index 93f07465e..1bbbee945 100644
--- a/fs/gfs2/aops.c
+++ b/fs/gfs2/aops.c
@@ -101,7 +101,7 @@ static int gfs2_writepage_common(struct page *page,
struct gfs2_inode *ip = GFS2_I(inode);
struct gfs2_sbd *sdp = GFS2_SB(inode);
loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned offset;
if (gfs2_assert_withdraw(sdp, gfs2_glock_is_held_excl(ip->i_gl)))
@@ -109,9 +109,9 @@ static int gfs2_writepage_common(struct page *page,
if (current->journal_info)
goto redirty;
/* Is the page fully outside i_size? (truncate in progress) */
- offset = i_size & (PAGE_CACHE_SIZE-1);
+ offset = i_size & (PAGE_SIZE-1);
if (page->index > end_index || (page->index == end_index && !offset)) {
- page->mapping->a_ops->invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ page->mapping->a_ops->invalidatepage(page, 0, PAGE_SIZE);
goto out;
}
return 1;
@@ -238,7 +238,7 @@ static int gfs2_write_jdata_pagevec(struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct gfs2_sbd *sdp = GFS2_SB(inode);
- unsigned nrblocks = nr_pages * (PAGE_CACHE_SIZE/inode->i_sb->s_blocksize);
+ unsigned nrblocks = nr_pages * (PAGE_SIZE/inode->i_sb->s_blocksize);
int i;
int ret;
@@ -366,8 +366,8 @@ static int gfs2_write_cache_jdata(struct address_space *mapping,
cycled = 0;
end = -1;
} else {
- index = wbc->range_start >> PAGE_CACHE_SHIFT;
- end = wbc->range_end >> PAGE_CACHE_SHIFT;
+ index = wbc->range_start >> PAGE_SHIFT;
+ end = wbc->range_end >> PAGE_SHIFT;
if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX)
range_whole = 1;
cycled = 1; /* ignore range_cyclic tests */
@@ -458,7 +458,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
* so we need to supply one here. It doesn't happen often.
*/
if (unlikely(page->index)) {
- zero_user(page, 0, PAGE_CACHE_SIZE);
+ zero_user(page, 0, PAGE_SIZE);
SetPageUptodate(page);
return 0;
}
@@ -471,7 +471,7 @@ static int stuffed_readpage(struct gfs2_inode *ip, struct page *page)
if (dsize > (dibh->b_size - sizeof(struct gfs2_dinode)))
dsize = (dibh->b_size - sizeof(struct gfs2_dinode));
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
- memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
+ memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
kunmap_atomic(kaddr);
flush_dcache_page(page);
brelse(dibh);
@@ -560,8 +560,8 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
unsigned size)
{
struct address_space *mapping = ip->i_inode.i_mapping;
- unsigned long index = *pos / PAGE_CACHE_SIZE;
- unsigned offset = *pos & (PAGE_CACHE_SIZE - 1);
+ unsigned long index = *pos / PAGE_SIZE;
+ unsigned offset = *pos & (PAGE_SIZE - 1);
unsigned copied = 0;
unsigned amt;
struct page *page;
@@ -569,15 +569,15 @@ int gfs2_internal_read(struct gfs2_inode *ip, char *buf, loff_t *pos,
do {
amt = size - copied;
- if (offset + size > PAGE_CACHE_SIZE)
- amt = PAGE_CACHE_SIZE - offset;
+ if (offset + size > PAGE_SIZE)
+ amt = PAGE_SIZE - offset;
page = read_cache_page(mapping, index, __gfs2_readpage, NULL);
if (IS_ERR(page))
return PTR_ERR(page);
p = kmap_atomic(page);
memcpy(buf + copied, p + offset, amt);
kunmap_atomic(p);
- page_cache_release(page);
+ put_page(page);
copied += amt;
index++;
offset = 0;
@@ -651,8 +651,8 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
unsigned requested = 0;
int alloc_required;
int error = 0;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ pgoff_t index = pos >> PAGE_SHIFT;
+ unsigned from = pos & (PAGE_SIZE - 1);
struct page *page;
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &ip->i_gh);
@@ -697,7 +697,7 @@ static int gfs2_write_begin(struct file *file, struct address_space *mapping,
rblocks += gfs2_rg_blocks(ip, requested);
error = gfs2_trans_begin(sdp, rblocks,
- PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+ PAGE_SIZE/sdp->sd_sb.sb_bsize);
if (error)
goto out_trans_fail;
@@ -727,7 +727,7 @@ out:
return 0;
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
gfs2_trans_end(sdp);
if (pos + len > ip->i_inode.i_size)
@@ -827,7 +827,7 @@ static int gfs2_stuffed_write_end(struct inode *inode, struct buffer_head *dibh,
if (!PageUptodate(page))
SetPageUptodate(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (copied) {
if (inode->i_size < to)
@@ -877,7 +877,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_inode *m_ip = GFS2_I(sdp->sd_statfs_inode);
struct buffer_head *dibh;
- unsigned int from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned int from = pos & (PAGE_SIZE - 1);
unsigned int to = from + len;
int ret;
struct gfs2_trans *tr = current->journal_info;
@@ -888,7 +888,7 @@ static int gfs2_write_end(struct file *file, struct address_space *mapping,
ret = gfs2_meta_inode_buffer(ip, &dibh);
if (unlikely(ret)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
goto failed;
}
@@ -992,7 +992,7 @@ static void gfs2_invalidatepage(struct page *page, unsigned int offset,
{
struct gfs2_sbd *sdp = GFS2_SB(page->mapping->host);
unsigned int stop = offset + length;
- int partial_page = (offset || length < PAGE_CACHE_SIZE);
+ int partial_page = (offset || length < PAGE_SIZE);
struct buffer_head *bh, *head;
unsigned long pos = 0;
@@ -1082,7 +1082,7 @@ static ssize_t gfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* the first place, mapping->nr_pages will always be zero.
*/
if (mapping->nrpages) {
- loff_t lstart = offset & (PAGE_CACHE_SIZE - 1);
+ loff_t lstart = offset & ~(PAGE_SIZE - 1);
loff_t len = iov_iter_count(iter);
loff_t end = PAGE_ALIGN(offset + len) - 1;
diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c
index 0860f0b5b..24ce1cdd4 100644
--- a/fs/gfs2/bmap.c
+++ b/fs/gfs2/bmap.c
@@ -75,7 +75,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
dsize = dibh->b_size - sizeof(struct gfs2_dinode);
memcpy(kaddr, dibh->b_data + sizeof(struct gfs2_dinode), dsize);
- memset(kaddr + dsize, 0, PAGE_CACHE_SIZE - dsize);
+ memset(kaddr + dsize, 0, PAGE_SIZE - dsize);
kunmap(page);
SetPageUptodate(page);
@@ -98,7 +98,7 @@ static int gfs2_unstuffer_page(struct gfs2_inode *ip, struct buffer_head *dibh,
if (release) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
return 0;
@@ -932,8 +932,8 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
{
struct inode *inode = mapping->host;
struct gfs2_inode *ip = GFS2_I(inode);
- unsigned long index = from >> PAGE_CACHE_SHIFT;
- unsigned offset = from & (PAGE_CACHE_SIZE-1);
+ unsigned long index = from >> PAGE_SHIFT;
+ unsigned offset = from & (PAGE_SIZE-1);
unsigned blocksize, iblock, length, pos;
struct buffer_head *bh;
struct page *page;
@@ -945,7 +945,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
blocksize = inode->i_sb->s_blocksize;
length = blocksize - (offset & (blocksize - 1));
- iblock = index << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+ iblock = index << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
if (!page_has_buffers(page))
create_empty_buffers(page, blocksize, 0);
@@ -989,7 +989,7 @@ static int gfs2_block_truncate_page(struct address_space *mapping, loff_t from)
mark_buffer_dirty(bh);
unlock:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return err;
}
diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c
index 6a9259230..4a01f30e9 100644
--- a/fs/gfs2/dir.c
+++ b/fs/gfs2/dir.c
@@ -798,7 +798,7 @@ static int get_first_leaf(struct gfs2_inode *dip, u32 index,
int error;
error = get_leaf_nr(dip, index, &leaf_no);
- if (!error)
+ if (!IS_ERR_VALUE(error))
error = get_leaf(dip, leaf_no, bh_out);
return error;
@@ -1014,7 +1014,7 @@ static int dir_split_leaf(struct inode *inode, const struct qstr *name)
index = name->hash >> (32 - dip->i_depth);
error = get_leaf_nr(dip, index, &leaf_no);
- if (error)
+ if (IS_ERR_VALUE(error))
return error;
/* Get the old leaf block */
@@ -1660,7 +1660,7 @@ struct inode *gfs2_dir_search(struct inode *dir, const struct qstr *name,
brelse(bh);
if (fail_on_exist)
return ERR_PTR(-EEXIST);
- inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino, 0);
+ inode = gfs2_inode_lookup(dir->i_sb, dtype, addr, formal_ino);
if (!IS_ERR(inode))
GFS2_I(inode)->i_rahead = rahead;
return inode;
diff --git a/fs/gfs2/export.c b/fs/gfs2/export.c
index 5d15e9498..d5bda8513 100644
--- a/fs/gfs2/export.c
+++ b/fs/gfs2/export.c
@@ -137,7 +137,7 @@ static struct dentry *gfs2_get_dentry(struct super_block *sb,
struct gfs2_sbd *sdp = sb->s_fs_info;
struct inode *inode;
- inode = gfs2_ilookup(sb, inum->no_addr, 0);
+ inode = gfs2_ilookup(sb, inum->no_addr);
if (inode) {
if (GFS2_I(inode)->i_no_formal_ino != inum->no_formal_ino) {
iput(inode);
diff --git a/fs/gfs2/file.c b/fs/gfs2/file.c
index c9384f932..208efc70a 100644
--- a/fs/gfs2/file.c
+++ b/fs/gfs2/file.c
@@ -354,8 +354,8 @@ static int gfs2_allocate_page_backing(struct page *page)
{
struct inode *inode = page->mapping->host;
struct buffer_head bh;
- unsigned long size = PAGE_CACHE_SIZE;
- u64 lblock = page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ unsigned long size = PAGE_SIZE;
+ u64 lblock = page->index << (PAGE_SHIFT - inode->i_blkbits);
do {
bh.b_state = 0;
@@ -386,7 +386,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
struct gfs2_sbd *sdp = GFS2_SB(inode);
struct gfs2_alloc_parms ap = { .aflags = 0, };
unsigned long last_index;
- u64 pos = page->index << PAGE_CACHE_SHIFT;
+ u64 pos = page->index << PAGE_SHIFT;
unsigned int data_blocks, ind_blocks, rblocks;
struct gfs2_holder gh;
loff_t size;
@@ -401,7 +401,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out;
- gfs2_size_hint(vma->vm_file, pos, PAGE_CACHE_SIZE);
+ gfs2_size_hint(vma->vm_file, pos, PAGE_SIZE);
gfs2_holder_init(ip->i_gl, LM_ST_EXCLUSIVE, 0, &gh);
ret = gfs2_glock_nq(&gh);
@@ -411,7 +411,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
set_bit(GLF_DIRTY, &ip->i_gl->gl_flags);
set_bit(GIF_SW_PAGED, &ip->i_flags);
- if (!gfs2_write_alloc_required(ip, pos, PAGE_CACHE_SIZE)) {
+ if (!gfs2_write_alloc_required(ip, pos, PAGE_SIZE)) {
lock_page(page);
if (!PageUptodate(page) || page->mapping != inode->i_mapping) {
ret = -EAGAIN;
@@ -424,7 +424,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
if (ret)
goto out_unlock;
- gfs2_write_calc_reserv(ip, PAGE_CACHE_SIZE, &data_blocks, &ind_blocks);
+ gfs2_write_calc_reserv(ip, PAGE_SIZE, &data_blocks, &ind_blocks);
ap.target = data_blocks + ind_blocks;
ret = gfs2_quota_lock_check(ip, &ap);
if (ret)
@@ -447,7 +447,7 @@ static int gfs2_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
lock_page(page);
ret = -EINVAL;
size = i_size_read(inode);
- last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+ last_index = (size - 1) >> PAGE_SHIFT;
/* Check page index against inode size */
if (size == 0 || (page->index > last_index))
goto out_trans_end;
@@ -873,7 +873,7 @@ static long __gfs2_fallocate(struct file *file, int mode, loff_t offset, loff_t
rblocks += data_blocks ? data_blocks : 1;
error = gfs2_trans_begin(sdp, rblocks,
- PAGE_CACHE_SIZE/sdp->sd_sb.sb_bsize);
+ PAGE_SIZE/sdp->sd_sb.sb_bsize);
if (error)
goto out_trans_fail;
diff --git a/fs/gfs2/glock.c b/fs/gfs2/glock.c
index a4ff7b56f..6539131c5 100644
--- a/fs/gfs2/glock.c
+++ b/fs/gfs2/glock.c
@@ -572,17 +572,24 @@ static void delete_work_func(struct work_struct *work)
struct inode *inode;
u64 no_addr = gl->gl_name.ln_number;
+ /* If someone's using this glock to create a new dinode, the block must
+ have been freed by another node, then re-used, in which case our
+ iopen callback is too late after the fact. Ignore it. */
+ if (test_bit(GLF_INODE_CREATING, &gl->gl_flags))
+ goto out;
+
ip = gl->gl_object;
/* Note: Unsafe to dereference ip as we don't hold right refs/locks */
if (ip)
- inode = gfs2_ilookup(sdp->sd_vfs, no_addr, 1);
+ inode = gfs2_ilookup(sdp->sd_vfs, no_addr);
else
inode = gfs2_lookup_by_inum(sdp, no_addr, NULL, GFS2_BLKST_UNLINKED);
if (inode && !IS_ERR(inode)) {
d_prune_aliases(inode);
iput(inode);
}
+out:
gfs2_glock_put(gl);
}
@@ -1015,6 +1022,7 @@ void gfs2_glock_dq(struct gfs2_holder *gh)
handle_callback(gl, LM_ST_UNLOCKED, 0, false);
list_del_init(&gh->gh_list);
+ clear_bit(HIF_HOLDER, &gh->gh_iflags);
if (find_first_holder(gl) == NULL) {
if (glops->go_unlock) {
GLOCK_BUG_ON(gl, test_and_set_bit(GLF_LOCK, &gl->gl_flags));
diff --git a/fs/gfs2/incore.h b/fs/gfs2/incore.h
index 845fb09cc..a6a3389a0 100644
--- a/fs/gfs2/incore.h
+++ b/fs/gfs2/incore.h
@@ -328,6 +328,7 @@ enum {
GLF_LRU = 13,
GLF_OBJECT = 14, /* Used only for tracing */
GLF_BLOCKING = 15,
+ GLF_INODE_CREATING = 16, /* Inode creation occurring */
};
struct gfs2_glock {
diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c
index 352f95876..bb30f9a72 100644
--- a/fs/gfs2/inode.c
+++ b/fs/gfs2/inode.c
@@ -37,61 +37,9 @@
#include "super.h"
#include "glops.h"
-struct gfs2_skip_data {
- u64 no_addr;
- int skipped;
- int non_block;
-};
-
-static int iget_test(struct inode *inode, void *opaque)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_skip_data *data = opaque;
-
- if (ip->i_no_addr == data->no_addr) {
- if (data->non_block &&
- inode->i_state & (I_FREEING|I_CLEAR|I_WILL_FREE)) {
- data->skipped = 1;
- return 0;
- }
- return 1;
- }
- return 0;
-}
-
-static int iget_set(struct inode *inode, void *opaque)
-{
- struct gfs2_inode *ip = GFS2_I(inode);
- struct gfs2_skip_data *data = opaque;
-
- if (data->skipped)
- return -ENOENT;
- inode->i_ino = (unsigned long)(data->no_addr);
- ip->i_no_addr = data->no_addr;
- return 0;
-}
-
-struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int non_block)
+struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr)
{
- unsigned long hash = (unsigned long)no_addr;
- struct gfs2_skip_data data;
-
- data.no_addr = no_addr;
- data.skipped = 0;
- data.non_block = non_block;
- return ilookup5(sb, hash, iget_test, &data);
-}
-
-static struct inode *gfs2_iget(struct super_block *sb, u64 no_addr,
- int non_block)
-{
- struct gfs2_skip_data data;
- unsigned long hash = (unsigned long)no_addr;
-
- data.no_addr = no_addr;
- data.skipped = 0;
- data.non_block = non_block;
- return iget5_locked(sb, hash, iget_test, iget_set, &data);
+ return ilookup(sb, (unsigned long)no_addr);
}
/**
@@ -132,21 +80,21 @@ static void gfs2_set_iop(struct inode *inode)
* @sb: The super block
* @no_addr: The inode number
* @type: The type of the inode
- * non_block: Can we block on inodes that are being freed?
*
* Returns: A VFS inode, or an error
*/
struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned int type,
- u64 no_addr, u64 no_formal_ino, int non_block)
+ u64 no_addr, u64 no_formal_ino)
{
struct inode *inode;
struct gfs2_inode *ip;
struct gfs2_glock *io_gl = NULL;
int error;
- inode = gfs2_iget(sb, no_addr, non_block);
+ inode = iget_locked(sb, (unsigned long)no_addr);
ip = GFS2_I(inode);
+ ip->i_no_addr = no_addr;
if (!inode)
return ERR_PTR(-ENOMEM);
@@ -221,7 +169,7 @@ struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
if (error)
goto fail;
- inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0, 1);
+ inode = gfs2_inode_lookup(sb, DT_UNKNOWN, no_addr, 0);
if (IS_ERR(inode))
goto fail;
@@ -592,7 +540,7 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
struct inode *inode = NULL;
struct gfs2_inode *dip = GFS2_I(dir), *ip;
struct gfs2_sbd *sdp = GFS2_SB(&dip->i_inode);
- struct gfs2_glock *io_gl;
+ struct gfs2_glock *io_gl = NULL;
int error, free_vfs_inode = 1;
u32 aflags = 0;
unsigned blocks = 1;
@@ -729,6 +677,8 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
if (error)
goto fail_gunlock2;
+ BUG_ON(test_and_set_bit(GLF_INODE_CREATING, &io_gl->gl_flags));
+
error = gfs2_glock_nq_init(io_gl, LM_ST_SHARED, GL_EXACT, &ip->i_iopen_gh);
if (error)
goto fail_gunlock2;
@@ -771,12 +721,15 @@ static int gfs2_create_inode(struct inode *dir, struct dentry *dentry,
}
gfs2_glock_dq_uninit(ghs);
gfs2_glock_dq_uninit(ghs + 1);
+ clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
return error;
fail_gunlock3:
gfs2_glock_dq_uninit(&ip->i_iopen_gh);
gfs2_glock_put(io_gl);
fail_gunlock2:
+ if (io_gl)
+ clear_bit(GLF_INODE_CREATING, &io_gl->gl_flags);
gfs2_glock_dq_uninit(ghs + 1);
fail_free_inode:
if (ip->i_gl)
diff --git a/fs/gfs2/inode.h b/fs/gfs2/inode.h
index ba4d9492d..e1af0d4aa 100644
--- a/fs/gfs2/inode.h
+++ b/fs/gfs2/inode.h
@@ -94,12 +94,11 @@ err:
}
extern struct inode *gfs2_inode_lookup(struct super_block *sb, unsigned type,
- u64 no_addr, u64 no_formal_ino,
- int non_block);
+ u64 no_addr, u64 no_formal_ino);
extern struct inode *gfs2_lookup_by_inum(struct gfs2_sbd *sdp, u64 no_addr,
u64 *no_formal_ino,
unsigned int blktype);
-extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr, int nonblock);
+extern struct inode *gfs2_ilookup(struct super_block *sb, u64 no_addr);
extern int gfs2_inode_refresh(struct gfs2_inode *ip);
diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c
index e137d96f1..0448524c1 100644
--- a/fs/gfs2/meta_io.c
+++ b/fs/gfs2/meta_io.c
@@ -124,7 +124,7 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
if (mapping == NULL)
mapping = &sdp->sd_aspace;
- shift = PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift;
+ shift = PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift;
index = blkno >> shift; /* convert block to page */
bufnum = blkno - (index << shift); /* block buf index within page */
@@ -154,7 +154,7 @@ struct buffer_head *gfs2_getbuf(struct gfs2_glock *gl, u64 blkno, int create)
map_bh(bh, sdp->sd_vfs, blkno);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return bh;
}
diff --git a/fs/gfs2/ops_fstype.c b/fs/gfs2/ops_fstype.c
index dbed9e243..49b0bff18 100644
--- a/fs/gfs2/ops_fstype.c
+++ b/fs/gfs2/ops_fstype.c
@@ -454,7 +454,7 @@ static int gfs2_lookup_root(struct super_block *sb, struct dentry **dptr,
struct dentry *dentry;
struct inode *inode;
- inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0, 0);
+ inode = gfs2_inode_lookup(sb, DT_DIR, no_addr, 0);
if (IS_ERR(inode)) {
fs_err(sdp, "can't read in %s inode: %ld\n", name, PTR_ERR(inode));
return PTR_ERR(inode);
diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c
index a39891344..ce7d69a2f 100644
--- a/fs/gfs2/quota.c
+++ b/fs/gfs2/quota.c
@@ -701,7 +701,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
unsigned to_write = bytes, pg_off = off;
int done = 0;
- blk = index << (PAGE_CACHE_SHIFT - sdp->sd_sb.sb_bsize_shift);
+ blk = index << (PAGE_SHIFT - sdp->sd_sb.sb_bsize_shift);
boff = off % bsize;
page = find_or_create_page(mapping, index, GFP_NOFS);
@@ -753,13 +753,13 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index,
flush_dcache_page(page);
kunmap_atomic(kaddr);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return 0;
unlock_out:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return -EIO;
}
@@ -773,13 +773,13 @@ static int gfs2_write_disk_quota(struct gfs2_inode *ip, struct gfs2_quota *qp,
nbytes = sizeof(struct gfs2_quota);
- pg_beg = loc >> PAGE_CACHE_SHIFT;
- pg_off = loc % PAGE_CACHE_SIZE;
+ pg_beg = loc >> PAGE_SHIFT;
+ pg_off = loc % PAGE_SIZE;
/* If the quota straddles a page boundary, split the write in two */
- if ((pg_off + nbytes) > PAGE_CACHE_SIZE) {
+ if ((pg_off + nbytes) > PAGE_SIZE) {
pg_oflow = 1;
- overflow = (pg_off + nbytes) - PAGE_CACHE_SIZE;
+ overflow = (pg_off + nbytes) - PAGE_SIZE;
}
ptr = qp;
diff --git a/fs/gfs2/rgrp.c b/fs/gfs2/rgrp.c
index 07c0265aa..99a0bdac8 100644
--- a/fs/gfs2/rgrp.c
+++ b/fs/gfs2/rgrp.c
@@ -918,9 +918,8 @@ static int read_rindex_entry(struct gfs2_inode *ip)
goto fail;
rgd->rd_gl->gl_object = rgd;
- rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_CACHE_MASK;
- rgd->rd_gl->gl_vm.end = PAGE_CACHE_ALIGN((rgd->rd_addr +
- rgd->rd_length) * bsize) - 1;
+ rgd->rd_gl->gl_vm.start = (rgd->rd_addr * bsize) & PAGE_MASK;
+ rgd->rd_gl->gl_vm.end = PAGE_ALIGN((rgd->rd_addr + rgd->rd_length) * bsize) - 1;
rgd->rd_rgl = (struct gfs2_rgrp_lvb *)rgd->rd_gl->gl_lksb.sb_lvbptr;
rgd->rd_flags &= ~(GFS2_RDF_UPTODATE | GFS2_RDF_PREFERRED);
if (rgd->rd_data > sdp->sd_max_rg_data)
diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c
index 8f960a51a..f8a0cd821 100644
--- a/fs/gfs2/super.c
+++ b/fs/gfs2/super.c
@@ -1551,12 +1551,16 @@ static void gfs2_evict_inode(struct inode *inode)
goto out_truncate;
}
- ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq_wait(&ip->i_iopen_gh);
- gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE, &ip->i_iopen_gh);
- error = gfs2_glock_nq(&ip->i_iopen_gh);
- if (error)
- goto out_truncate;
+ if (ip->i_iopen_gh.gh_gl &&
+ test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_dq_wait(&ip->i_iopen_gh);
+ gfs2_holder_reinit(LM_ST_EXCLUSIVE, LM_FLAG_TRY_1CB | GL_NOCACHE,
+ &ip->i_iopen_gh);
+ error = gfs2_glock_nq(&ip->i_iopen_gh);
+ if (error)
+ goto out_truncate;
+ }
/* Case 1 starts here */
@@ -1606,11 +1610,13 @@ out_unlock:
if (gfs2_rs_active(&ip->i_res))
gfs2_rs_deltree(&ip->i_res);
- if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
- ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
- gfs2_glock_dq_wait(&ip->i_iopen_gh);
+ if (ip->i_iopen_gh.gh_gl) {
+ if (test_bit(HIF_HOLDER, &ip->i_iopen_gh.gh_iflags)) {
+ ip->i_iopen_gh.gh_flags |= GL_NOCACHE;
+ gfs2_glock_dq_wait(&ip->i_iopen_gh);
+ }
+ gfs2_holder_uninit(&ip->i_iopen_gh);
}
- gfs2_holder_uninit(&ip->i_iopen_gh);
gfs2_glock_dq_uninit(&gh);
if (error && error != GLR_TRYFAILED && error != -EROFS)
fs_warn(sdp, "gfs2_evict_inode: %d\n", error);
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index 221719eac..d77d844b6 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -278,14 +278,14 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
mapping = tree->inode->i_mapping;
off = (loff_t)cnid * tree->node_size;
- block = off >> PAGE_CACHE_SHIFT;
- node->page_offset = off & ~PAGE_CACHE_MASK;
+ block = off >> PAGE_SHIFT;
+ node->page_offset = off & ~PAGE_MASK;
for (i = 0; i < tree->pages_per_bnode; i++) {
page = read_mapping_page(mapping, block++, NULL);
if (IS_ERR(page))
goto fail;
if (PageError(page)) {
- page_cache_release(page);
+ put_page(page);
goto fail;
}
node->page[i] = page;
@@ -401,7 +401,7 @@ void hfs_bnode_free(struct hfs_bnode *node)
for (i = 0; i < node->tree->pages_per_bnode; i++)
if (node->page[i])
- page_cache_release(node->page[i]);
+ put_page(node->page[i]);
kfree(node);
}
@@ -429,11 +429,11 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
pagep = node->page;
memset(kmap(*pagep) + node->page_offset, 0,
- min((int)PAGE_CACHE_SIZE, (int)tree->node_size));
+ min((int)PAGE_SIZE, (int)tree->node_size));
set_page_dirty(*pagep);
kunmap(*pagep);
for (i = 1; i < tree->pages_per_bnode; i++) {
- memset(kmap(*++pagep), 0, PAGE_CACHE_SIZE);
+ memset(kmap(*++pagep), 0, PAGE_SIZE);
set_page_dirty(*pagep);
kunmap(*pagep);
}
diff --git a/fs/hfs/btree.c b/fs/hfs/btree.c
index 1ab19e660..37cdd955e 100644
--- a/fs/hfs/btree.c
+++ b/fs/hfs/btree.c
@@ -116,14 +116,14 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id, btree_keycmp ke
}
tree->node_size_shift = ffs(size) - 1;
- tree->pages_per_bnode = (tree->node_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ tree->pages_per_bnode = (tree->node_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
kunmap(page);
- page_cache_release(page);
+ put_page(page);
return tree;
fail_page:
- page_cache_release(page);
+ put_page(page);
free_inode:
tree->inode->i_mapping->a_ops = &hfs_aops;
iput(tree->inode);
@@ -257,9 +257,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
off = off16;
off += node->page_offset;
- pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+ pagep = node->page + (off >> PAGE_SHIFT);
data = kmap(*pagep);
- off &= ~PAGE_CACHE_MASK;
+ off &= ~PAGE_MASK;
idx = 0;
for (;;) {
@@ -279,7 +279,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
}
}
}
- if (++off >= PAGE_CACHE_SIZE) {
+ if (++off >= PAGE_SIZE) {
kunmap(*pagep);
data = kmap(*++pagep);
off = 0;
@@ -302,9 +302,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
len = hfs_brec_lenoff(node, 0, &off16);
off = off16;
off += node->page_offset;
- pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+ pagep = node->page + (off >> PAGE_SHIFT);
data = kmap(*pagep);
- off &= ~PAGE_CACHE_MASK;
+ off &= ~PAGE_MASK;
}
}
@@ -348,9 +348,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
len = hfs_brec_lenoff(node, 0, &off);
}
off += node->page_offset + nidx / 8;
- page = node->page[off >> PAGE_CACHE_SHIFT];
+ page = node->page[off >> PAGE_SHIFT];
data = kmap(page);
- off &= ~PAGE_CACHE_MASK;
+ off &= ~PAGE_MASK;
m = 1 << (~nidx & 7);
byte = data[off];
if (!(byte & m)) {
diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c
index 6686bf39a..cb1e5faa2 100644
--- a/fs/hfs/inode.c
+++ b/fs/hfs/inode.c
@@ -91,8 +91,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
if (!tree)
return 0;
- if (tree->node_size >= PAGE_CACHE_SIZE) {
- nidx = page->index >> (tree->node_size_shift - PAGE_CACHE_SHIFT);
+ if (tree->node_size >= PAGE_SIZE) {
+ nidx = page->index >> (tree->node_size_shift - PAGE_SHIFT);
spin_lock(&tree->hash_lock);
node = hfs_bnode_findhash(tree, nidx);
if (!node)
@@ -105,8 +105,8 @@ static int hfs_releasepage(struct page *page, gfp_t mask)
}
spin_unlock(&tree->hash_lock);
} else {
- nidx = page->index << (PAGE_CACHE_SHIFT - tree->node_size_shift);
- i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift);
+ nidx = page->index << (PAGE_SHIFT - tree->node_size_shift);
+ i = 1 << (PAGE_SHIFT - tree->node_size_shift);
spin_lock(&tree->hash_lock);
do {
node = hfs_bnode_findhash(tree, nidx++);
diff --git a/fs/hfsplus/bitmap.c b/fs/hfsplus/bitmap.c
index d29544515..c0ae274c0 100644
--- a/fs/hfsplus/bitmap.c
+++ b/fs/hfsplus/bitmap.c
@@ -13,7 +13,7 @@
#include "hfsplus_fs.h"
#include "hfsplus_raw.h"
-#define PAGE_CACHE_BITS (PAGE_CACHE_SIZE * 8)
+#define PAGE_CACHE_BITS (PAGE_SIZE * 8)
int hfsplus_block_allocate(struct super_block *sb, u32 size,
u32 offset, u32 *max)
diff --git a/fs/hfsplus/bnode.c b/fs/hfsplus/bnode.c
index 63924662a..ce014ceb8 100644
--- a/fs/hfsplus/bnode.c
+++ b/fs/hfsplus/bnode.c
@@ -24,16 +24,16 @@ void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
int l;
off += node->page_offset;
- pagep = node->page + (off >> PAGE_CACHE_SHIFT);
- off &= ~PAGE_CACHE_MASK;
+ pagep = node->page + (off >> PAGE_SHIFT);
+ off &= ~PAGE_MASK;
- l = min_t(int, len, PAGE_CACHE_SIZE - off);
+ l = min_t(int, len, PAGE_SIZE - off);
memcpy(buf, kmap(*pagep) + off, l);
kunmap(*pagep);
while ((len -= l) != 0) {
buf += l;
- l = min_t(int, len, PAGE_CACHE_SIZE);
+ l = min_t(int, len, PAGE_SIZE);
memcpy(buf, kmap(*++pagep), l);
kunmap(*pagep);
}
@@ -77,17 +77,17 @@ void hfs_bnode_write(struct hfs_bnode *node, void *buf, int off, int len)
int l;
off += node->page_offset;
- pagep = node->page + (off >> PAGE_CACHE_SHIFT);
- off &= ~PAGE_CACHE_MASK;
+ pagep = node->page + (off >> PAGE_SHIFT);
+ off &= ~PAGE_MASK;
- l = min_t(int, len, PAGE_CACHE_SIZE - off);
+ l = min_t(int, len, PAGE_SIZE - off);
memcpy(kmap(*pagep) + off, buf, l);
set_page_dirty(*pagep);
kunmap(*pagep);
while ((len -= l) != 0) {
buf += l;
- l = min_t(int, len, PAGE_CACHE_SIZE);
+ l = min_t(int, len, PAGE_SIZE);
memcpy(kmap(*++pagep), buf, l);
set_page_dirty(*pagep);
kunmap(*pagep);
@@ -107,16 +107,16 @@ void hfs_bnode_clear(struct hfs_bnode *node, int off, int len)
int l;
off += node->page_offset;
- pagep = node->page + (off >> PAGE_CACHE_SHIFT);
- off &= ~PAGE_CACHE_MASK;
+ pagep = node->page + (off >> PAGE_SHIFT);
+ off &= ~PAGE_MASK;
- l = min_t(int, len, PAGE_CACHE_SIZE - off);
+ l = min_t(int, len, PAGE_SIZE - off);
memset(kmap(*pagep) + off, 0, l);
set_page_dirty(*pagep);
kunmap(*pagep);
while ((len -= l) != 0) {
- l = min_t(int, len, PAGE_CACHE_SIZE);
+ l = min_t(int, len, PAGE_SIZE);
memset(kmap(*++pagep), 0, l);
set_page_dirty(*pagep);
kunmap(*pagep);
@@ -136,20 +136,20 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
tree = src_node->tree;
src += src_node->page_offset;
dst += dst_node->page_offset;
- src_page = src_node->page + (src >> PAGE_CACHE_SHIFT);
- src &= ~PAGE_CACHE_MASK;
- dst_page = dst_node->page + (dst >> PAGE_CACHE_SHIFT);
- dst &= ~PAGE_CACHE_MASK;
+ src_page = src_node->page + (src >> PAGE_SHIFT);
+ src &= ~PAGE_MASK;
+ dst_page = dst_node->page + (dst >> PAGE_SHIFT);
+ dst &= ~PAGE_MASK;
if (src == dst) {
- l = min_t(int, len, PAGE_CACHE_SIZE - src);
+ l = min_t(int, len, PAGE_SIZE - src);
memcpy(kmap(*dst_page) + src, kmap(*src_page) + src, l);
kunmap(*src_page);
set_page_dirty(*dst_page);
kunmap(*dst_page);
while ((len -= l) != 0) {
- l = min_t(int, len, PAGE_CACHE_SIZE);
+ l = min_t(int, len, PAGE_SIZE);
memcpy(kmap(*++dst_page), kmap(*++src_page), l);
kunmap(*src_page);
set_page_dirty(*dst_page);
@@ -161,12 +161,12 @@ void hfs_bnode_copy(struct hfs_bnode *dst_node, int dst,
do {
src_ptr = kmap(*src_page) + src;
dst_ptr = kmap(*dst_page) + dst;
- if (PAGE_CACHE_SIZE - src < PAGE_CACHE_SIZE - dst) {
- l = PAGE_CACHE_SIZE - src;
+ if (PAGE_SIZE - src < PAGE_SIZE - dst) {
+ l = PAGE_SIZE - src;
src = 0;
dst += l;
} else {
- l = PAGE_CACHE_SIZE - dst;
+ l = PAGE_SIZE - dst;
src += l;
dst = 0;
}
@@ -195,11 +195,11 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
dst += node->page_offset;
if (dst > src) {
src += len - 1;
- src_page = node->page + (src >> PAGE_CACHE_SHIFT);
- src = (src & ~PAGE_CACHE_MASK) + 1;
+ src_page = node->page + (src >> PAGE_SHIFT);
+ src = (src & ~PAGE_MASK) + 1;
dst += len - 1;
- dst_page = node->page + (dst >> PAGE_CACHE_SHIFT);
- dst = (dst & ~PAGE_CACHE_MASK) + 1;
+ dst_page = node->page + (dst >> PAGE_SHIFT);
+ dst = (dst & ~PAGE_MASK) + 1;
if (src == dst) {
while (src < len) {
@@ -208,7 +208,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
set_page_dirty(*dst_page);
kunmap(*dst_page);
len -= src;
- src = PAGE_CACHE_SIZE;
+ src = PAGE_SIZE;
src_page--;
dst_page--;
}
@@ -226,32 +226,32 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
dst_ptr = kmap(*dst_page) + dst;
if (src < dst) {
l = src;
- src = PAGE_CACHE_SIZE;
+ src = PAGE_SIZE;
dst -= l;
} else {
l = dst;
src -= l;
- dst = PAGE_CACHE_SIZE;
+ dst = PAGE_SIZE;
}
l = min(len, l);
memmove(dst_ptr - l, src_ptr - l, l);
kunmap(*src_page);
set_page_dirty(*dst_page);
kunmap(*dst_page);
- if (dst == PAGE_CACHE_SIZE)
+ if (dst == PAGE_SIZE)
dst_page--;
else
src_page--;
} while ((len -= l));
}
} else {
- src_page = node->page + (src >> PAGE_CACHE_SHIFT);
- src &= ~PAGE_CACHE_MASK;
- dst_page = node->page + (dst >> PAGE_CACHE_SHIFT);
- dst &= ~PAGE_CACHE_MASK;
+ src_page = node->page + (src >> PAGE_SHIFT);
+ src &= ~PAGE_MASK;
+ dst_page = node->page + (dst >> PAGE_SHIFT);
+ dst &= ~PAGE_MASK;
if (src == dst) {
- l = min_t(int, len, PAGE_CACHE_SIZE - src);
+ l = min_t(int, len, PAGE_SIZE - src);
memmove(kmap(*dst_page) + src,
kmap(*src_page) + src, l);
kunmap(*src_page);
@@ -259,7 +259,7 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
kunmap(*dst_page);
while ((len -= l) != 0) {
- l = min_t(int, len, PAGE_CACHE_SIZE);
+ l = min_t(int, len, PAGE_SIZE);
memmove(kmap(*++dst_page),
kmap(*++src_page), l);
kunmap(*src_page);
@@ -272,13 +272,13 @@ void hfs_bnode_move(struct hfs_bnode *node, int dst, int src, int len)
do {
src_ptr = kmap(*src_page) + src;
dst_ptr = kmap(*dst_page) + dst;
- if (PAGE_CACHE_SIZE - src <
- PAGE_CACHE_SIZE - dst) {
- l = PAGE_CACHE_SIZE - src;
+ if (PAGE_SIZE - src <
+ PAGE_SIZE - dst) {
+ l = PAGE_SIZE - src;
src = 0;
dst += l;
} else {
- l = PAGE_CACHE_SIZE - dst;
+ l = PAGE_SIZE - dst;
src += l;
dst = 0;
}
@@ -444,14 +444,14 @@ static struct hfs_bnode *__hfs_bnode_create(struct hfs_btree *tree, u32 cnid)
mapping = tree->inode->i_mapping;
off = (loff_t)cnid << tree->node_size_shift;
- block = off >> PAGE_CACHE_SHIFT;
- node->page_offset = off & ~PAGE_CACHE_MASK;
+ block = off >> PAGE_SHIFT;
+ node->page_offset = off & ~PAGE_MASK;
for (i = 0; i < tree->pages_per_bnode; block++, i++) {
page = read_mapping_page(mapping, block, NULL);
if (IS_ERR(page))
goto fail;
if (PageError(page)) {
- page_cache_release(page);
+ put_page(page);
goto fail;
}
node->page[i] = page;
@@ -569,7 +569,7 @@ void hfs_bnode_free(struct hfs_bnode *node)
for (i = 0; i < node->tree->pages_per_bnode; i++)
if (node->page[i])
- page_cache_release(node->page[i]);
+ put_page(node->page[i]);
kfree(node);
}
@@ -597,11 +597,11 @@ struct hfs_bnode *hfs_bnode_create(struct hfs_btree *tree, u32 num)
pagep = node->page;
memset(kmap(*pagep) + node->page_offset, 0,
- min_t(int, PAGE_CACHE_SIZE, tree->node_size));
+ min_t(int, PAGE_SIZE, tree->node_size));
set_page_dirty(*pagep);
kunmap(*pagep);
for (i = 1; i < tree->pages_per_bnode; i++) {
- memset(kmap(*++pagep), 0, PAGE_CACHE_SIZE);
+ memset(kmap(*++pagep), 0, PAGE_SIZE);
set_page_dirty(*pagep);
kunmap(*pagep);
}
diff --git a/fs/hfsplus/btree.c b/fs/hfsplus/btree.c
index 3345c7553..d9d1a36ba 100644
--- a/fs/hfsplus/btree.c
+++ b/fs/hfsplus/btree.c
@@ -236,15 +236,15 @@ struct hfs_btree *hfs_btree_open(struct super_block *sb, u32 id)
tree->node_size_shift = ffs(size) - 1;
tree->pages_per_bnode =
- (tree->node_size + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+ (tree->node_size + PAGE_SIZE - 1) >>
+ PAGE_SHIFT;
kunmap(page);
- page_cache_release(page);
+ put_page(page);
return tree;
fail_page:
- page_cache_release(page);
+ put_page(page);
free_inode:
tree->inode->i_mapping->a_ops = &hfsplus_aops;
iput(tree->inode);
@@ -380,9 +380,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
off = off16;
off += node->page_offset;
- pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+ pagep = node->page + (off >> PAGE_SHIFT);
data = kmap(*pagep);
- off &= ~PAGE_CACHE_MASK;
+ off &= ~PAGE_MASK;
idx = 0;
for (;;) {
@@ -403,7 +403,7 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
}
}
}
- if (++off >= PAGE_CACHE_SIZE) {
+ if (++off >= PAGE_SIZE) {
kunmap(*pagep);
data = kmap(*++pagep);
off = 0;
@@ -426,9 +426,9 @@ struct hfs_bnode *hfs_bmap_alloc(struct hfs_btree *tree)
len = hfs_brec_lenoff(node, 0, &off16);
off = off16;
off += node->page_offset;
- pagep = node->page + (off >> PAGE_CACHE_SHIFT);
+ pagep = node->page + (off >> PAGE_SHIFT);
data = kmap(*pagep);
- off &= ~PAGE_CACHE_MASK;
+ off &= ~PAGE_MASK;
}
}
@@ -475,9 +475,9 @@ void hfs_bmap_free(struct hfs_bnode *node)
len = hfs_brec_lenoff(node, 0, &off);
}
off += node->page_offset + nidx / 8;
- page = node->page[off >> PAGE_CACHE_SHIFT];
+ page = node->page[off >> PAGE_SHIFT];
data = kmap(page);
- off &= ~PAGE_CACHE_MASK;
+ off &= ~PAGE_MASK;
m = 1 << (~nidx & 7);
byte = data[off];
if (!(byte & m)) {
diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c
index 1a6394cdb..b28f39865 100644
--- a/fs/hfsplus/inode.c
+++ b/fs/hfsplus/inode.c
@@ -87,9 +87,9 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
}
if (!tree)
return 0;
- if (tree->node_size >= PAGE_CACHE_SIZE) {
+ if (tree->node_size >= PAGE_SIZE) {
nidx = page->index >>
- (tree->node_size_shift - PAGE_CACHE_SHIFT);
+ (tree->node_size_shift - PAGE_SHIFT);
spin_lock(&tree->hash_lock);
node = hfs_bnode_findhash(tree, nidx);
if (!node)
@@ -103,8 +103,8 @@ static int hfsplus_releasepage(struct page *page, gfp_t mask)
spin_unlock(&tree->hash_lock);
} else {
nidx = page->index <<
- (PAGE_CACHE_SHIFT - tree->node_size_shift);
- i = 1 << (PAGE_CACHE_SHIFT - tree->node_size_shift);
+ (PAGE_SHIFT - tree->node_size_shift);
+ i = 1 << (PAGE_SHIFT - tree->node_size_shift);
spin_lock(&tree->hash_lock);
do {
node = hfs_bnode_findhash(tree, nidx++);
diff --git a/fs/hfsplus/super.c b/fs/hfsplus/super.c
index 5d54490a1..c35911362 100644
--- a/fs/hfsplus/super.c
+++ b/fs/hfsplus/super.c
@@ -438,7 +438,7 @@ static int hfsplus_fill_super(struct super_block *sb, void *data, int silent)
err = -EFBIG;
last_fs_block = sbi->total_blocks - 1;
last_fs_page = (last_fs_block << sbi->alloc_blksz_shift) >>
- PAGE_CACHE_SHIFT;
+ PAGE_SHIFT;
if ((last_fs_block > (sector_t)(~0ULL) >> (sbi->alloc_blksz_shift - 9)) ||
(last_fs_page > (pgoff_t)(~0ULL))) {
diff --git a/fs/hfsplus/xattr.c b/fs/hfsplus/xattr.c
index ab01530b4..70e445ff0 100644
--- a/fs/hfsplus/xattr.c
+++ b/fs/hfsplus/xattr.c
@@ -220,7 +220,7 @@ check_attr_tree_state_again:
index = 0;
written = 0;
- for (; written < node_size; index++, written += PAGE_CACHE_SIZE) {
+ for (; written < node_size; index++, written += PAGE_SIZE) {
void *kaddr;
page = read_mapping_page(mapping, index, NULL);
@@ -231,11 +231,11 @@ check_attr_tree_state_again:
kaddr = kmap_atomic(page);
memcpy(kaddr, buf + written,
- min_t(size_t, PAGE_CACHE_SIZE, node_size - written));
+ min_t(size_t, PAGE_SIZE, node_size - written));
kunmap_atomic(kaddr);
set_page_dirty(page);
- page_cache_release(page);
+ put_page(page);
}
hfsplus_mark_inode_dirty(attr_file, HFSPLUS_I_ATTR_DIRTY);
diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c
index d1abbee28..7016653f3 100644
--- a/fs/hostfs/hostfs_kern.c
+++ b/fs/hostfs/hostfs_kern.c
@@ -410,12 +410,12 @@ static int hostfs_writepage(struct page *page, struct writeback_control *wbc)
struct inode *inode = mapping->host;
char *buffer;
loff_t base = page_offset(page);
- int count = PAGE_CACHE_SIZE;
- int end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ int count = PAGE_SIZE;
+ int end_index = inode->i_size >> PAGE_SHIFT;
int err;
if (page->index >= end_index)
- count = inode->i_size & (PAGE_CACHE_SIZE-1);
+ count = inode->i_size & (PAGE_SIZE-1);
buffer = kmap(page);
@@ -447,7 +447,7 @@ static int hostfs_readpage(struct file *file, struct page *page)
buffer = kmap(page);
bytes_read = read_file(FILE_HOSTFS_I(file)->fd, &start, buffer,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
if (bytes_read < 0) {
ClearPageUptodate(page);
SetPageError(page);
@@ -455,7 +455,7 @@ static int hostfs_readpage(struct file *file, struct page *page)
goto out;
}
- memset(buffer + bytes_read, 0, PAGE_CACHE_SIZE - bytes_read);
+ memset(buffer + bytes_read, 0, PAGE_SIZE - bytes_read);
ClearPageError(page);
SetPageUptodate(page);
@@ -471,7 +471,7 @@ static int hostfs_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned flags,
struct page **pagep, void **fsdata)
{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
*pagep = grab_cache_page_write_begin(mapping, index, flags);
if (!*pagep)
@@ -485,14 +485,14 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
void *buffer;
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned from = pos & (PAGE_SIZE - 1);
int err;
buffer = kmap(page);
err = write_file(FILE_HOSTFS_I(file)->fd, &pos, buffer + from, copied);
kunmap(page);
- if (!PageUptodate(page) && err == PAGE_CACHE_SIZE)
+ if (!PageUptodate(page) && err == PAGE_SIZE)
SetPageUptodate(page);
/*
@@ -502,7 +502,7 @@ static int hostfs_write_end(struct file *file, struct address_space *mapping,
if (err > 0 && (pos > inode->i_size))
inode->i_size = pos;
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return err;
}
diff --git a/fs/hpfs/super.c b/fs/hpfs/super.c
index 458cf4630..82067ca22 100644
--- a/fs/hpfs/super.c
+++ b/fs/hpfs/super.c
@@ -15,6 +15,7 @@
#include <linux/sched.h>
#include <linux/bitmap.h>
#include <linux/slab.h>
+#include <linux/seq_file.h>
/* Mark the filesystem dirty, so that chkdsk checks it when os/2 booted */
@@ -453,10 +454,6 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
int lowercase, eas, chk, errs, chkdsk, timeshift;
int o;
struct hpfs_sb_info *sbi = hpfs_sb(s);
- char *new_opts = kstrdup(data, GFP_KERNEL);
-
- if (!new_opts)
- return -ENOMEM;
sync_filesystem(s);
@@ -493,17 +490,44 @@ static int hpfs_remount_fs(struct super_block *s, int *flags, char *data)
if (!(*flags & MS_RDONLY)) mark_dirty(s, 1);
- replace_mount_options(s, new_opts);
-
hpfs_unlock(s);
return 0;
out_err:
hpfs_unlock(s);
- kfree(new_opts);
return -EINVAL;
}
+static int hpfs_show_options(struct seq_file *seq, struct dentry *root)
+{
+ struct hpfs_sb_info *sbi = hpfs_sb(root->d_sb);
+
+ seq_printf(seq, ",uid=%u", from_kuid_munged(&init_user_ns, sbi->sb_uid));
+ seq_printf(seq, ",gid=%u", from_kgid_munged(&init_user_ns, sbi->sb_gid));
+ seq_printf(seq, ",umask=%03o", (~sbi->sb_mode & 0777));
+ if (sbi->sb_lowercase)
+ seq_printf(seq, ",case=lower");
+ if (!sbi->sb_chk)
+ seq_printf(seq, ",check=none");
+ if (sbi->sb_chk == 2)
+ seq_printf(seq, ",check=strict");
+ if (!sbi->sb_err)
+ seq_printf(seq, ",errors=continue");
+ if (sbi->sb_err == 2)
+ seq_printf(seq, ",errors=panic");
+ if (!sbi->sb_chkdsk)
+ seq_printf(seq, ",chkdsk=no");
+ if (sbi->sb_chkdsk == 2)
+ seq_printf(seq, ",chkdsk=always");
+ if (!sbi->sb_eas)
+ seq_printf(seq, ",eas=no");
+ if (sbi->sb_eas == 1)
+ seq_printf(seq, ",eas=ro");
+ if (sbi->sb_timeshift)
+ seq_printf(seq, ",timeshift=%d", sbi->sb_timeshift);
+ return 0;
+}
+
/* Super operations */
static const struct super_operations hpfs_sops =
@@ -514,7 +538,7 @@ static const struct super_operations hpfs_sops =
.put_super = hpfs_put_super,
.statfs = hpfs_statfs,
.remount_fs = hpfs_remount_fs,
- .show_options = generic_show_options,
+ .show_options = hpfs_show_options,
};
static int hpfs_fill_super(struct super_block *s, void *options, int silent)
@@ -537,8 +561,6 @@ static int hpfs_fill_super(struct super_block *s, void *options, int silent)
int o;
- save_mount_options(s, options);
-
sbi = kzalloc(sizeof(*sbi), GFP_KERNEL);
if (!sbi) {
return -ENOMEM;
diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c
index e1f465a38..4ea71eba4 100644
--- a/fs/hugetlbfs/inode.c
+++ b/fs/hugetlbfs/inode.c
@@ -213,12 +213,12 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
int i, chunksize;
/* Find which 4k chunk and offset with in that chunk */
- i = offset >> PAGE_CACHE_SHIFT;
- offset = offset & ~PAGE_CACHE_MASK;
+ i = offset >> PAGE_SHIFT;
+ offset = offset & ~PAGE_MASK;
while (size) {
size_t n;
- chunksize = PAGE_CACHE_SIZE;
+ chunksize = PAGE_SIZE;
if (offset)
chunksize -= offset;
if (chunksize > size)
@@ -237,7 +237,7 @@ hugetlbfs_read_actor(struct page *page, unsigned long offset,
/*
* Support for read() - Find the page attached to f_mapping and copy out the
* data. Its *very* similar to do_generic_mapping_read(), we can't use that
- * since it has PAGE_CACHE_SIZE assumptions.
+ * since it has PAGE_SIZE assumptions.
*/
static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
@@ -285,7 +285,7 @@ static ssize_t hugetlbfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
* We have the page, copy it to user space buffer.
*/
copied = hugetlbfs_read_actor(page, offset, to, nr);
- page_cache_release(page);
+ put_page(page);
}
offset += copied;
retval += copied;
diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c
index f311bf084..2e4e834d1 100644
--- a/fs/isofs/compress.c
+++ b/fs/isofs/compress.c
@@ -26,7 +26,7 @@
#include "zisofs.h"
/* This should probably be global. */
-static char zisofs_sink_page[PAGE_CACHE_SIZE];
+static char zisofs_sink_page[PAGE_SIZE];
/*
* This contains the zlib memory allocation and the mutex for the
@@ -70,11 +70,11 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
for ( i = 0 ; i < pcount ; i++ ) {
if (!pages[i])
continue;
- memset(page_address(pages[i]), 0, PAGE_CACHE_SIZE);
+ memset(page_address(pages[i]), 0, PAGE_SIZE);
flush_dcache_page(pages[i]);
SetPageUptodate(pages[i]);
}
- return ((loff_t)pcount) << PAGE_CACHE_SHIFT;
+ return ((loff_t)pcount) << PAGE_SHIFT;
}
/* Because zlib is not thread-safe, do all the I/O at the top. */
@@ -121,11 +121,11 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start,
if (pages[curpage]) {
stream.next_out = page_address(pages[curpage])
+ poffset;
- stream.avail_out = PAGE_CACHE_SIZE - poffset;
+ stream.avail_out = PAGE_SIZE - poffset;
poffset = 0;
} else {
stream.next_out = (void *)&zisofs_sink_page;
- stream.avail_out = PAGE_CACHE_SIZE;
+ stream.avail_out = PAGE_SIZE;
}
}
if (!stream.avail_in) {
@@ -220,14 +220,14 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
* pages with the data we have anyway...
*/
start_off = page_offset(pages[full_page]);
- end_off = min_t(loff_t, start_off + PAGE_CACHE_SIZE, inode->i_size);
+ end_off = min_t(loff_t, start_off + PAGE_SIZE, inode->i_size);
cstart_block = start_off >> zisofs_block_shift;
cend_block = (end_off + (1 << zisofs_block_shift) - 1)
>> zisofs_block_shift;
- WARN_ON(start_off - (full_page << PAGE_CACHE_SHIFT) !=
- ((cstart_block << zisofs_block_shift) & PAGE_CACHE_MASK));
+ WARN_ON(start_off - (full_page << PAGE_SHIFT) !=
+ ((cstart_block << zisofs_block_shift) & PAGE_MASK));
/* Find the pointer to this specific chunk */
/* Note: we're not using isonum_731() here because the data is known aligned */
@@ -260,10 +260,10 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
ret = zisofs_uncompress_block(inode, block_start, block_end,
pcount, pages, poffset, &err);
poffset += ret;
- pages += poffset >> PAGE_CACHE_SHIFT;
- pcount -= poffset >> PAGE_CACHE_SHIFT;
- full_page -= poffset >> PAGE_CACHE_SHIFT;
- poffset &= ~PAGE_CACHE_MASK;
+ pages += poffset >> PAGE_SHIFT;
+ pcount -= poffset >> PAGE_SHIFT;
+ full_page -= poffset >> PAGE_SHIFT;
+ poffset &= ~PAGE_MASK;
if (err) {
brelse(bh);
@@ -282,7 +282,7 @@ static int zisofs_fill_pages(struct inode *inode, int full_page, int pcount,
if (poffset && *pages) {
memset(page_address(*pages) + poffset, 0,
- PAGE_CACHE_SIZE - poffset);
+ PAGE_SIZE - poffset);
flush_dcache_page(*pages);
SetPageUptodate(*pages);
}
@@ -302,12 +302,12 @@ static int zisofs_readpage(struct file *file, struct page *page)
int i, pcount, full_page;
unsigned int zisofs_block_shift = ISOFS_I(inode)->i_format_parm[1];
unsigned int zisofs_pages_per_cblock =
- PAGE_CACHE_SHIFT <= zisofs_block_shift ?
- (1 << (zisofs_block_shift - PAGE_CACHE_SHIFT)) : 0;
+ PAGE_SHIFT <= zisofs_block_shift ?
+ (1 << (zisofs_block_shift - PAGE_SHIFT)) : 0;
struct page *pages[max_t(unsigned, zisofs_pages_per_cblock, 1)];
pgoff_t index = page->index, end_index;
- end_index = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ end_index = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
/*
* If this page is wholly outside i_size we just return zero;
* do_generic_file_read() will handle this for us
@@ -318,7 +318,7 @@ static int zisofs_readpage(struct file *file, struct page *page)
return 0;
}
- if (PAGE_CACHE_SHIFT <= zisofs_block_shift) {
+ if (PAGE_SHIFT <= zisofs_block_shift) {
/* We have already been given one page, this is the one
we must do. */
full_page = index & (zisofs_pages_per_cblock - 1);
@@ -351,7 +351,7 @@ static int zisofs_readpage(struct file *file, struct page *page)
kunmap(pages[i]);
unlock_page(pages[i]);
if (i != full_page)
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
}
diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c
index bcd2d41b3..131dedc92 100644
--- a/fs/isofs/inode.c
+++ b/fs/isofs/inode.c
@@ -1021,7 +1021,7 @@ int isofs_get_blocks(struct inode *inode, sector_t iblock,
* the page with useless information without generating any
* I/O errors.
*/
- if (b_off > ((inode->i_size + PAGE_CACHE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
+ if (b_off > ((inode->i_size + PAGE_SIZE - 1) >> ISOFS_BUFFER_BITS(inode))) {
printk(KERN_DEBUG "%s: block >= EOF (%lu, %llu)\n",
__func__, b_off,
(unsigned long long)inode->i_size);
diff --git a/fs/isofs/rock.c b/fs/isofs/rock.c
index 5384ceb35..98b3eb7d8 100644
--- a/fs/isofs/rock.c
+++ b/fs/isofs/rock.c
@@ -203,6 +203,8 @@ int get_rock_ridge_filename(struct iso_directory_record *de,
int retnamlen = 0;
int truncate = 0;
int ret = 0;
+ char *p;
+ int len;
if (!ISOFS_SB(inode->i_sb)->s_rock)
return 0;
@@ -267,12 +269,17 @@ repeat:
rr->u.NM.flags);
break;
}
- if ((strlen(retname) + rr->len - 5) >= 254) {
+ len = rr->len - 5;
+ if (retnamlen + len >= 254) {
truncate = 1;
break;
}
- strncat(retname, rr->u.NM.name, rr->len - 5);
- retnamlen += rr->len - 5;
+ p = memchr(rr->u.NM.name, '\0', len);
+ if (unlikely(p))
+ len = p - rr->u.NM.name;
+ memcpy(retname + retnamlen, rr->u.NM.name, len);
+ retnamlen += len;
+ retname[retnamlen] = '\0';
break;
case SIG('R', 'E'):
kfree(rs.buffer);
diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c
index 36345fefa..2ad98d6e1 100644
--- a/fs/jbd2/commit.c
+++ b/fs/jbd2/commit.c
@@ -81,11 +81,11 @@ static void release_buffer_page(struct buffer_head *bh)
if (!trylock_page(page))
goto nope;
- page_cache_get(page);
+ get_page(page);
__brelse(bh);
try_to_free_buffers(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return;
nope:
@@ -131,14 +131,12 @@ static int journal_submit_commit_record(journal_t *journal,
if (is_journal_aborted(journal))
return 0;
- bh = jbd2_journal_get_descriptor_buffer(journal);
+ bh = jbd2_journal_get_descriptor_buffer(commit_transaction,
+ JBD2_COMMIT_BLOCK);
if (!bh)
return 1;
tmp = (struct commit_header *)bh->b_data;
- tmp->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
- tmp->h_blocktype = cpu_to_be32(JBD2_COMMIT_BLOCK);
- tmp->h_sequence = cpu_to_be32(commit_transaction->t_tid);
tmp->h_commit_sec = cpu_to_be64(now.tv_sec);
tmp->h_commit_nsec = cpu_to_be32(now.tv_nsec);
@@ -222,7 +220,7 @@ static int journal_submit_data_buffers(journal_t *journal,
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
mapping = jinode->i_vfs_inode->i_mapping;
- set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
/*
* submit the inode data buffers. We use writepage
@@ -236,8 +234,8 @@ static int journal_submit_data_buffers(journal_t *journal,
ret = err;
spin_lock(&journal->j_list_lock);
J_ASSERT(jinode->i_transaction == commit_transaction);
- clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
- smp_mb__after_atomic();
+ jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
spin_unlock(&journal->j_list_lock);
@@ -258,7 +256,7 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
/* For locking, see the comment in journal_submit_data_buffers() */
spin_lock(&journal->j_list_lock);
list_for_each_entry(jinode, &commit_transaction->t_inode_list, i_list) {
- set_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
+ jinode->i_flags |= JI_COMMIT_RUNNING;
spin_unlock(&journal->j_list_lock);
err = filemap_fdatawait(jinode->i_vfs_inode->i_mapping);
if (err) {
@@ -274,8 +272,8 @@ static int journal_finish_inode_data_buffers(journal_t *journal,
ret = err;
}
spin_lock(&journal->j_list_lock);
- clear_bit(__JI_COMMIT_RUNNING, &jinode->i_flags);
- smp_mb__after_atomic();
+ jinode->i_flags &= ~JI_COMMIT_RUNNING;
+ smp_mb();
wake_up_bit(&jinode->i_flags, __JI_COMMIT_RUNNING);
}
@@ -319,22 +317,6 @@ static void write_tag_block(journal_t *j, journal_block_tag_t *tag,
tag->t_blocknr_high = cpu_to_be32((block >> 31) >> 1);
}
-static void jbd2_descr_block_csum_set(journal_t *j,
- struct buffer_head *bh)
-{
- struct jbd2_journal_block_tail *tail;
- __u32 csum;
-
- if (!jbd2_journal_has_csum_v2or3(j))
- return;
-
- tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
- sizeof(struct jbd2_journal_block_tail));
- tail->t_checksum = 0;
- csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
- tail->t_checksum = cpu_to_be32(csum);
-}
-
static void jbd2_block_tag_csum_set(journal_t *j, journal_block_tag_t *tag,
struct buffer_head *bh, __u32 sequence)
{
@@ -379,7 +361,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
ktime_t start_time;
u64 commit_time;
char *tagp = NULL;
- journal_header_t *header;
journal_block_tag_t *tag = NULL;
int space_left = 0;
int first_tag = 0;
@@ -554,8 +535,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd2_journal_abort(journal, err);
blk_start_plug(&plug);
- jbd2_journal_write_revoke_records(journal, commit_transaction,
- &log_bufs, WRITE_SYNC);
+ jbd2_journal_write_revoke_records(commit_transaction, &log_bufs);
jbd_debug(3, "JBD2: commit phase 2b\n");
@@ -616,7 +596,9 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(4, "JBD2: get descriptor\n");
- descriptor = jbd2_journal_get_descriptor_buffer(journal);
+ descriptor = jbd2_journal_get_descriptor_buffer(
+ commit_transaction,
+ JBD2_DESCRIPTOR_BLOCK);
if (!descriptor) {
jbd2_journal_abort(journal, -EIO);
continue;
@@ -625,11 +607,6 @@ void jbd2_journal_commit_transaction(journal_t *journal)
jbd_debug(4, "JBD2: got buffer %llu (%p)\n",
(unsigned long long)descriptor->b_blocknr,
descriptor->b_data);
- header = (journal_header_t *)descriptor->b_data;
- header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
- header->h_blocktype = cpu_to_be32(JBD2_DESCRIPTOR_BLOCK);
- header->h_sequence = cpu_to_be32(commit_transaction->t_tid);
-
tagp = &descriptor->b_data[sizeof(journal_header_t)];
space_left = descriptor->b_size -
sizeof(journal_header_t);
@@ -721,7 +698,7 @@ void jbd2_journal_commit_transaction(journal_t *journal)
tag->t_flags |= cpu_to_be16(JBD2_FLAG_LAST_TAG);
- jbd2_descr_block_csum_set(journal, descriptor);
+ jbd2_descriptor_block_csum_set(journal, descriptor);
start_journal_io:
for (i = 0; i < bufs; i++) {
struct buffer_head *bh = wbuf[i];
diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c
index 624a57a9c..435f0b26a 100644
--- a/fs/jbd2/journal.c
+++ b/fs/jbd2/journal.c
@@ -805,10 +805,13 @@ int jbd2_journal_bmap(journal_t *journal, unsigned long blocknr,
* But we don't bother doing that, so there will be coherency problems with
* mmaps of blockdevs which hold live JBD-controlled filesystems.
*/
-struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
+struct buffer_head *
+jbd2_journal_get_descriptor_buffer(transaction_t *transaction, int type)
{
+ journal_t *journal = transaction->t_journal;
struct buffer_head *bh;
unsigned long long blocknr;
+ journal_header_t *header;
int err;
err = jbd2_journal_next_log_block(journal, &blocknr);
@@ -821,12 +824,31 @@ struct buffer_head *jbd2_journal_get_descriptor_buffer(journal_t *journal)
return NULL;
lock_buffer(bh);
memset(bh->b_data, 0, journal->j_blocksize);
+ header = (journal_header_t *)bh->b_data;
+ header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
+ header->h_blocktype = cpu_to_be32(type);
+ header->h_sequence = cpu_to_be32(transaction->t_tid);
set_buffer_uptodate(bh);
unlock_buffer(bh);
BUFFER_TRACE(bh, "return this buffer");
return bh;
}
+void jbd2_descriptor_block_csum_set(journal_t *j, struct buffer_head *bh)
+{
+ struct jbd2_journal_block_tail *tail;
+ __u32 csum;
+
+ if (!jbd2_journal_has_csum_v2or3(j))
+ return;
+
+ tail = (struct jbd2_journal_block_tail *)(bh->b_data + j->j_blocksize -
+ sizeof(struct jbd2_journal_block_tail));
+ tail->t_checksum = 0;
+ csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
+ tail->t_checksum = cpu_to_be32(csum);
+}
+
/*
* Return tid of the oldest transaction in the journal and block in the journal
* where the transaction starts.
@@ -2199,7 +2221,7 @@ void jbd2_journal_ack_err(journal_t *journal)
int jbd2_journal_blocks_per_page(struct inode *inode)
{
- return 1 << (PAGE_CACHE_SHIFT - inode->i_sb->s_blocksize_bits);
+ return 1 << (PAGE_SHIFT - inode->i_sb->s_blocksize_bits);
}
/*
@@ -2572,7 +2594,7 @@ void jbd2_journal_release_jbd_inode(journal_t *journal,
restart:
spin_lock(&journal->j_list_lock);
/* Is commit writing out inode - we have to wait */
- if (test_bit(__JI_COMMIT_RUNNING, &jinode->i_flags)) {
+ if (jinode->i_flags & JI_COMMIT_RUNNING) {
wait_queue_head_t *wq;
DEFINE_WAIT_BIT(wait, &jinode->i_flags, __JI_COMMIT_RUNNING);
wq = bit_waitqueue(&jinode->i_flags, __JI_COMMIT_RUNNING);
diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c
index 7f277e49f..08a456b96 100644
--- a/fs/jbd2/recovery.c
+++ b/fs/jbd2/recovery.c
@@ -174,8 +174,7 @@ static int jread(struct buffer_head **bhp, journal_t *journal,
return 0;
}
-static int jbd2_descr_block_csum_verify(journal_t *j,
- void *buf)
+static int jbd2_descriptor_block_csum_verify(journal_t *j, void *buf)
{
struct jbd2_journal_block_tail *tail;
__be32 provided;
@@ -522,8 +521,8 @@ static int do_one_pass(journal_t *journal,
descr_csum_size =
sizeof(struct jbd2_journal_block_tail);
if (descr_csum_size > 0 &&
- !jbd2_descr_block_csum_verify(journal,
- bh->b_data)) {
+ !jbd2_descriptor_block_csum_verify(journal,
+ bh->b_data)) {
printk(KERN_ERR "JBD2: Invalid checksum "
"recovering block %lu in log\n",
next_log_block);
@@ -811,26 +810,6 @@ static int do_one_pass(journal_t *journal,
return err;
}
-static int jbd2_revoke_block_csum_verify(journal_t *j,
- void *buf)
-{
- struct jbd2_journal_revoke_tail *tail;
- __be32 provided;
- __u32 calculated;
-
- if (!jbd2_journal_has_csum_v2or3(j))
- return 1;
-
- tail = (struct jbd2_journal_revoke_tail *)(buf + j->j_blocksize -
- sizeof(struct jbd2_journal_revoke_tail));
- provided = tail->r_checksum;
- tail->r_checksum = 0;
- calculated = jbd2_chksum(j, j->j_csum_seed, buf, j->j_blocksize);
- tail->r_checksum = provided;
-
- return provided == cpu_to_be32(calculated);
-}
-
/* Scan a revoke record, marking all blocks mentioned as revoked. */
static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
@@ -846,11 +825,11 @@ static int scan_revoke_records(journal_t *journal, struct buffer_head *bh,
offset = sizeof(jbd2_journal_revoke_header_t);
rcount = be32_to_cpu(header->r_count);
- if (!jbd2_revoke_block_csum_verify(journal, header))
+ if (!jbd2_descriptor_block_csum_verify(journal, header))
return -EFSBADCRC;
if (jbd2_journal_has_csum_v2or3(journal))
- csum_size = sizeof(struct jbd2_journal_revoke_tail);
+ csum_size = sizeof(struct jbd2_journal_block_tail);
if (rcount > journal->j_blocksize - csum_size)
return -EINVAL;
max = rcount;
diff --git a/fs/jbd2/revoke.c b/fs/jbd2/revoke.c
index 705ae5778..91171dc35 100644
--- a/fs/jbd2/revoke.c
+++ b/fs/jbd2/revoke.c
@@ -122,11 +122,11 @@ struct jbd2_revoke_table_s
#ifdef __KERNEL__
-static void write_one_revoke_record(journal_t *, transaction_t *,
+static void write_one_revoke_record(transaction_t *,
struct list_head *,
struct buffer_head **, int *,
- struct jbd2_revoke_record_s *, int);
-static void flush_descriptor(journal_t *, struct buffer_head *, int, int);
+ struct jbd2_revoke_record_s *);
+static void flush_descriptor(journal_t *, struct buffer_head *, int);
#endif
/* Utility functions to maintain the revoke table */
@@ -519,11 +519,10 @@ void jbd2_journal_switch_revoke_table(journal_t *journal)
* Write revoke records to the journal for all entries in the current
* revoke hash, deleting the entries as we go.
*/
-void jbd2_journal_write_revoke_records(journal_t *journal,
- transaction_t *transaction,
- struct list_head *log_bufs,
- int write_op)
+void jbd2_journal_write_revoke_records(transaction_t *transaction,
+ struct list_head *log_bufs)
{
+ journal_t *journal = transaction->t_journal;
struct buffer_head *descriptor;
struct jbd2_revoke_record_s *record;
struct jbd2_revoke_table_s *revoke;
@@ -544,16 +543,15 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
while (!list_empty(hash_list)) {
record = (struct jbd2_revoke_record_s *)
hash_list->next;
- write_one_revoke_record(journal, transaction, log_bufs,
- &descriptor, &offset,
- record, write_op);
+ write_one_revoke_record(transaction, log_bufs,
+ &descriptor, &offset, record);
count++;
list_del(&record->hash);
kmem_cache_free(jbd2_revoke_record_cache, record);
}
}
if (descriptor)
- flush_descriptor(journal, descriptor, offset, write_op);
+ flush_descriptor(journal, descriptor, offset);
jbd_debug(1, "Wrote %d revoke records\n", count);
}
@@ -562,18 +560,16 @@ void jbd2_journal_write_revoke_records(journal_t *journal,
* block if the old one is full or if we have not already created one.
*/
-static void write_one_revoke_record(journal_t *journal,
- transaction_t *transaction,
+static void write_one_revoke_record(transaction_t *transaction,
struct list_head *log_bufs,
struct buffer_head **descriptorp,
int *offsetp,
- struct jbd2_revoke_record_s *record,
- int write_op)
+ struct jbd2_revoke_record_s *record)
{
+ journal_t *journal = transaction->t_journal;
int csum_size = 0;
struct buffer_head *descriptor;
int sz, offset;
- journal_header_t *header;
/* If we are already aborting, this all becomes a noop. We
still need to go round the loop in
@@ -587,7 +583,7 @@ static void write_one_revoke_record(journal_t *journal,
/* Do we need to leave space at the end for a checksum? */
if (jbd2_journal_has_csum_v2or3(journal))
- csum_size = sizeof(struct jbd2_journal_revoke_tail);
+ csum_size = sizeof(struct jbd2_journal_block_tail);
if (jbd2_has_feature_64bit(journal))
sz = 8;
@@ -597,19 +593,16 @@ static void write_one_revoke_record(journal_t *journal,
/* Make sure we have a descriptor with space left for the record */
if (descriptor) {
if (offset + sz > journal->j_blocksize - csum_size) {
- flush_descriptor(journal, descriptor, offset, write_op);
+ flush_descriptor(journal, descriptor, offset);
descriptor = NULL;
}
}
if (!descriptor) {
- descriptor = jbd2_journal_get_descriptor_buffer(journal);
+ descriptor = jbd2_journal_get_descriptor_buffer(transaction,
+ JBD2_REVOKE_BLOCK);
if (!descriptor)
return;
- header = (journal_header_t *)descriptor->b_data;
- header->h_magic = cpu_to_be32(JBD2_MAGIC_NUMBER);
- header->h_blocktype = cpu_to_be32(JBD2_REVOKE_BLOCK);
- header->h_sequence = cpu_to_be32(transaction->t_tid);
/* Record it so that we can wait for IO completion later */
BUFFER_TRACE(descriptor, "file in log_bufs");
@@ -630,21 +623,6 @@ static void write_one_revoke_record(journal_t *journal,
*offsetp = offset;
}
-static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
-{
- struct jbd2_journal_revoke_tail *tail;
- __u32 csum;
-
- if (!jbd2_journal_has_csum_v2or3(j))
- return;
-
- tail = (struct jbd2_journal_revoke_tail *)(bh->b_data + j->j_blocksize -
- sizeof(struct jbd2_journal_revoke_tail));
- tail->r_checksum = 0;
- csum = jbd2_chksum(j, j->j_csum_seed, bh->b_data, j->j_blocksize);
- tail->r_checksum = cpu_to_be32(csum);
-}
-
/*
* Flush a revoke descriptor out to the journal. If we are aborting,
* this is a noop; otherwise we are generating a buffer which needs to
@@ -654,7 +632,7 @@ static void jbd2_revoke_csum_set(journal_t *j, struct buffer_head *bh)
static void flush_descriptor(journal_t *journal,
struct buffer_head *descriptor,
- int offset, int write_op)
+ int offset)
{
jbd2_journal_revoke_header_t *header;
@@ -665,12 +643,12 @@ static void flush_descriptor(journal_t *journal,
header = (jbd2_journal_revoke_header_t *)descriptor->b_data;
header->r_count = cpu_to_be32(offset);
- jbd2_revoke_csum_set(journal, descriptor);
+ jbd2_descriptor_block_csum_set(journal, descriptor);
set_buffer_jwrite(descriptor);
BUFFER_TRACE(descriptor, "write");
set_buffer_dirty(descriptor);
- write_dirty_buffer(descriptor, write_op);
+ write_dirty_buffer(descriptor, WRITE_SYNC);
}
#endif
diff --git a/fs/jbd2/transaction.c b/fs/jbd2/transaction.c
index 081dff087..67c103867 100644
--- a/fs/jbd2/transaction.c
+++ b/fs/jbd2/transaction.c
@@ -966,14 +966,8 @@ repeat:
if (!frozen_buffer) {
JBUFFER_TRACE(jh, "allocate memory for buffer");
jbd_unlock_bh_state(bh);
- frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
- if (!frozen_buffer) {
- printk(KERN_ERR "%s: OOM for frozen_buffer\n",
- __func__);
- JBUFFER_TRACE(jh, "oom!");
- error = -ENOMEM;
- goto out;
- }
+ frozen_buffer = jbd2_alloc(jh2bh(jh)->b_size,
+ GFP_NOFS | __GFP_NOFAIL);
goto repeat;
}
jh->b_frozen_data = frozen_buffer;
@@ -1226,15 +1220,9 @@ int jbd2_journal_get_undo_access(handle_t *handle, struct buffer_head *bh)
goto out;
repeat:
- if (!jh->b_committed_data) {
- committed_data = jbd2_alloc(jh2bh(jh)->b_size, GFP_NOFS);
- if (!committed_data) {
- printk(KERN_ERR "%s: No memory for committed data\n",
- __func__);
- err = -ENOMEM;
- goto out;
- }
- }
+ if (!jh->b_committed_data)
+ committed_data = jbd2_alloc(jh2bh(jh)->b_size,
+ GFP_NOFS|__GFP_NOFAIL);
jbd_lock_bh_state(bh);
if (!jh->b_committed_data) {
@@ -2275,7 +2263,7 @@ int jbd2_journal_invalidatepage(journal_t *journal,
struct buffer_head *head, *bh, *next;
unsigned int stop = offset + length;
unsigned int curr_off = 0;
- int partial_page = (offset || length < PAGE_CACHE_SIZE);
+ int partial_page = (offset || length < PAGE_SIZE);
int may_free = 1;
int ret = 0;
@@ -2284,7 +2272,7 @@ int jbd2_journal_invalidatepage(journal_t *journal,
if (!page_has_buffers(page))
return 0;
- BUG_ON(stop > PAGE_CACHE_SIZE || stop < length);
+ BUG_ON(stop > PAGE_SIZE || stop < length);
/* We will potentially be playing with lists other than just the
* data lists (especially for journaled data mode), so be
diff --git a/fs/jffs2/debug.c b/fs/jffs2/debug.c
index 1090eb64b..9d26b1b9f 100644
--- a/fs/jffs2/debug.c
+++ b/fs/jffs2/debug.c
@@ -95,15 +95,15 @@ __jffs2_dbg_fragtree_paranoia_check_nolock(struct jffs2_inode_info *f)
rather than mucking around with actually reading the node
and checking the compression type, which is the real way
to tell a hole node. */
- if (frag->ofs & (PAGE_CACHE_SIZE-1) && frag_prev(frag)
- && frag_prev(frag)->size < PAGE_CACHE_SIZE && frag_prev(frag)->node) {
+ if (frag->ofs & (PAGE_SIZE-1) && frag_prev(frag)
+ && frag_prev(frag)->size < PAGE_SIZE && frag_prev(frag)->node) {
JFFS2_ERROR("REF_PRISTINE node at 0x%08x had a previous non-hole frag in the same page. Tell dwmw2.\n",
ref_offset(fn->raw));
bitched = 1;
}
- if ((frag->ofs+frag->size) & (PAGE_CACHE_SIZE-1) && frag_next(frag)
- && frag_next(frag)->size < PAGE_CACHE_SIZE && frag_next(frag)->node) {
+ if ((frag->ofs+frag->size) & (PAGE_SIZE-1) && frag_next(frag)
+ && frag_next(frag)->size < PAGE_SIZE && frag_next(frag)->node) {
JFFS2_ERROR("REF_PRISTINE node at 0x%08x (%08x-%08x) had a following non-hole frag in the same page. Tell dwmw2.\n",
ref_offset(fn->raw), frag->ofs, frag->ofs+frag->size);
bitched = 1;
diff --git a/fs/jffs2/file.c b/fs/jffs2/file.c
index cad86bac3..0e62dec3e 100644
--- a/fs/jffs2/file.c
+++ b/fs/jffs2/file.c
@@ -87,14 +87,15 @@ static int jffs2_do_readpage_nolock (struct inode *inode, struct page *pg)
int ret;
jffs2_dbg(2, "%s(): ino #%lu, page at offset 0x%lx\n",
- __func__, inode->i_ino, pg->index << PAGE_CACHE_SHIFT);
+ __func__, inode->i_ino, pg->index << PAGE_SHIFT);
BUG_ON(!PageLocked(pg));
pg_buf = kmap(pg);
/* FIXME: Can kmap fail? */
- ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_CACHE_SHIFT, PAGE_CACHE_SIZE);
+ ret = jffs2_read_inode_range(c, f, pg_buf, pg->index << PAGE_SHIFT,
+ PAGE_SIZE);
if (ret) {
ClearPageUptodate(pg);
@@ -137,8 +138,8 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
struct page *pg;
struct inode *inode = mapping->host;
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
- uint32_t pageofs = index << PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
+ uint32_t pageofs = index << PAGE_SHIFT;
int ret = 0;
pg = grab_cache_page_write_begin(mapping, index, flags);
@@ -230,7 +231,7 @@ static int jffs2_write_begin(struct file *filp, struct address_space *mapping,
out_page:
unlock_page(pg);
- page_cache_release(pg);
+ put_page(pg);
return ret;
}
@@ -245,14 +246,14 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
struct jffs2_inode_info *f = JFFS2_INODE_INFO(inode);
struct jffs2_sb_info *c = JFFS2_SB_INFO(inode->i_sb);
struct jffs2_raw_inode *ri;
- unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned start = pos & (PAGE_SIZE - 1);
unsigned end = start + copied;
unsigned aligned_start = start & ~3;
int ret = 0;
uint32_t writtenlen = 0;
jffs2_dbg(1, "%s(): ino #%lu, page at 0x%lx, range %d-%d, flags %lx\n",
- __func__, inode->i_ino, pg->index << PAGE_CACHE_SHIFT,
+ __func__, inode->i_ino, pg->index << PAGE_SHIFT,
start, end, pg->flags);
/* We need to avoid deadlock with page_cache_read() in
@@ -261,7 +262,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
to re-lock it. */
BUG_ON(!PageUptodate(pg));
- if (end == PAGE_CACHE_SIZE) {
+ if (end == PAGE_SIZE) {
/* When writing out the end of a page, write out the
_whole_ page. This helps to reduce the number of
nodes in files which have many short writes, like
@@ -275,7 +276,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
jffs2_dbg(1, "%s(): Allocation of raw inode failed\n",
__func__);
unlock_page(pg);
- page_cache_release(pg);
+ put_page(pg);
return -ENOMEM;
}
@@ -292,7 +293,7 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
kmap(pg);
ret = jffs2_write_inode_range(c, f, ri, page_address(pg) + aligned_start,
- (pg->index << PAGE_CACHE_SHIFT) + aligned_start,
+ (pg->index << PAGE_SHIFT) + aligned_start,
end - aligned_start, &writtenlen);
kunmap(pg);
@@ -329,6 +330,6 @@ static int jffs2_write_end(struct file *filp, struct address_space *mapping,
jffs2_dbg(1, "%s() returning %d\n",
__func__, writtenlen > 0 ? writtenlen : ret);
unlock_page(pg);
- page_cache_release(pg);
+ put_page(pg);
return writtenlen > 0 ? writtenlen : ret;
}
diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c
index bead25ae8..ae2ebb26b 100644
--- a/fs/jffs2/fs.c
+++ b/fs/jffs2/fs.c
@@ -586,8 +586,8 @@ int jffs2_do_fill_super(struct super_block *sb, void *data, int silent)
goto out_root;
sb->s_maxbytes = 0xFFFFFFFF;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = JFFS2_SUPER_MAGIC;
if (!(sb->s_flags & MS_RDONLY))
jffs2_start_garbage_collect_thread(c);
@@ -685,7 +685,7 @@ unsigned char *jffs2_gc_fetch_page(struct jffs2_sb_info *c,
struct inode *inode = OFNI_EDONI_2SFFJ(f);
struct page *pg;
- pg = read_cache_page(inode->i_mapping, offset >> PAGE_CACHE_SHIFT,
+ pg = read_cache_page(inode->i_mapping, offset >> PAGE_SHIFT,
(void *)jffs2_do_readpage_unlock, inode);
if (IS_ERR(pg))
return (void *)pg;
@@ -701,7 +701,7 @@ void jffs2_gc_release_page(struct jffs2_sb_info *c,
struct page *pg = (void *)*priv;
kunmap(pg);
- page_cache_release(pg);
+ put_page(pg);
}
static int jffs2_flash_setup(struct jffs2_sb_info *c) {
diff --git a/fs/jffs2/gc.c b/fs/jffs2/gc.c
index 95d5880a6..9ed0f26cf 100644
--- a/fs/jffs2/gc.c
+++ b/fs/jffs2/gc.c
@@ -134,37 +134,59 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
if (mutex_lock_interruptible(&c->alloc_sem))
return -EINTR;
+
for (;;) {
+ /* We can't start doing GC until we've finished checking
+ the node CRCs etc. */
+ int bucket, want_ino;
+
spin_lock(&c->erase_completion_lock);
if (!c->unchecked_size)
break;
-
- /* We can't start doing GC yet. We haven't finished checking
- the node CRCs etc. Do it now. */
-
- /* checked_ino is protected by the alloc_sem */
- if (c->checked_ino > c->highest_ino && xattr) {
- pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
- c->unchecked_size);
- jffs2_dbg_dump_block_lists_nolock(c);
- spin_unlock(&c->erase_completion_lock);
- mutex_unlock(&c->alloc_sem);
- return -ENOSPC;
- }
-
spin_unlock(&c->erase_completion_lock);
if (!xattr)
xattr = jffs2_verify_xattr(c);
spin_lock(&c->inocache_lock);
+ /* Instead of doing the inodes in numeric order, doing a lookup
+ * in the hash for each possible number, just walk the hash
+ * buckets of *existing* inodes. This means that we process
+ * them out-of-order, but it can be a lot faster if there's
+ * a sparse inode# space. Which there often is. */
+ want_ino = c->check_ino;
+ for (bucket = c->check_ino % c->inocache_hashsize ; bucket < c->inocache_hashsize; bucket++) {
+ for (ic = c->inocache_list[bucket]; ic; ic = ic->next) {
+ if (ic->ino < want_ino)
+ continue;
+
+ if (ic->state != INO_STATE_CHECKEDABSENT &&
+ ic->state != INO_STATE_PRESENT)
+ goto got_next; /* with inocache_lock held */
+
+ jffs2_dbg(1, "Skipping ino #%u already checked\n",
+ ic->ino);
+ }
+ want_ino = 0;
+ }
- ic = jffs2_get_ino_cache(c, c->checked_ino++);
+ /* Point c->check_ino past the end of the last bucket. */
+ c->check_ino = ((c->highest_ino + c->inocache_hashsize + 1) &
+ ~c->inocache_hashsize) - 1;
- if (!ic) {
- spin_unlock(&c->inocache_lock);
- continue;
- }
+ spin_unlock(&c->inocache_lock);
+
+ pr_crit("Checked all inodes but still 0x%x bytes of unchecked space?\n",
+ c->unchecked_size);
+ jffs2_dbg_dump_block_lists_nolock(c);
+ mutex_unlock(&c->alloc_sem);
+ return -ENOSPC;
+
+ got_next:
+ /* For next time round the loop, we want c->checked_ino to indicate
+ * the *next* one we want to check. And since we're walking the
+ * buckets rather than doing it sequentially, it's: */
+ c->check_ino = ic->ino + c->inocache_hashsize;
if (!ic->pino_nlink) {
jffs2_dbg(1, "Skipping check of ino #%d with nlink/pino zero\n",
@@ -176,8 +198,6 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
switch(ic->state) {
case INO_STATE_CHECKEDABSENT:
case INO_STATE_PRESENT:
- jffs2_dbg(1, "Skipping ino #%u already checked\n",
- ic->ino);
spin_unlock(&c->inocache_lock);
continue;
@@ -196,7 +216,7 @@ int jffs2_garbage_collect_pass(struct jffs2_sb_info *c)
ic->ino);
/* We need to come back again for the _same_ inode. We've
made no progress in this case, but that should be OK */
- c->checked_ino--;
+ c->check_ino = ic->ino;
mutex_unlock(&c->alloc_sem);
sleep_on_spinunlock(&c->inocache_wq, &c->inocache_lock);
@@ -532,7 +552,7 @@ static int jffs2_garbage_collect_live(struct jffs2_sb_info *c, struct jffs2_era
goto upnout;
}
/* We found a datanode. Do the GC */
- if((start >> PAGE_CACHE_SHIFT) < ((end-1) >> PAGE_CACHE_SHIFT)) {
+ if((start >> PAGE_SHIFT) < ((end-1) >> PAGE_SHIFT)) {
/* It crosses a page boundary. Therefore, it must be a hole. */
ret = jffs2_garbage_collect_hole(c, jeb, f, fn, start, end);
} else {
@@ -1172,8 +1192,8 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
struct jffs2_node_frag *frag;
uint32_t min, max;
- min = start & ~(PAGE_CACHE_SIZE-1);
- max = min + PAGE_CACHE_SIZE;
+ min = start & ~(PAGE_SIZE-1);
+ max = min + PAGE_SIZE;
frag = jffs2_lookup_node_frag(&f->fragtree, start);
@@ -1331,7 +1351,7 @@ static int jffs2_garbage_collect_dnode(struct jffs2_sb_info *c, struct jffs2_era
cdatalen = min_t(uint32_t, alloclen - sizeof(ri), end - offset);
datalen = end - offset;
- writebuf = pg_ptr + (offset & (PAGE_CACHE_SIZE -1));
+ writebuf = pg_ptr + (offset & (PAGE_SIZE -1));
comprtype = jffs2_compress(c, f, writebuf, &comprbuf, &datalen, &cdatalen);
diff --git a/fs/jffs2/jffs2_fs_sb.h b/fs/jffs2/jffs2_fs_sb.h
index 046fee8b6..778275f48 100644
--- a/fs/jffs2/jffs2_fs_sb.h
+++ b/fs/jffs2/jffs2_fs_sb.h
@@ -49,7 +49,7 @@ struct jffs2_sb_info {
struct mtd_info *mtd;
uint32_t highest_ino;
- uint32_t checked_ino;
+ uint32_t check_ino; /* *NEXT* inode to be checked */
unsigned int flags;
diff --git a/fs/jffs2/nodelist.c b/fs/jffs2/nodelist.c
index 9a5449bc3..b86c78d17 100644
--- a/fs/jffs2/nodelist.c
+++ b/fs/jffs2/nodelist.c
@@ -90,7 +90,7 @@ uint32_t jffs2_truncate_fragtree(struct jffs2_sb_info *c, struct rb_root *list,
/* If the last fragment starts at the RAM page boundary, it is
* REF_PRISTINE irrespective of its size. */
- if (frag->node && (frag->ofs & (PAGE_CACHE_SIZE - 1)) == 0) {
+ if (frag->node && (frag->ofs & (PAGE_SIZE - 1)) == 0) {
dbg_fragtree2("marking the last fragment 0x%08x-0x%08x REF_PRISTINE.\n",
frag->ofs, frag->ofs + frag->size);
frag->node->raw->flash_offset = ref_offset(frag->node->raw) | REF_PRISTINE;
@@ -237,7 +237,7 @@ static int jffs2_add_frag_to_fragtree(struct jffs2_sb_info *c, struct rb_root *r
If so, both 'this' and the new node get marked REF_NORMAL so
the GC can take a look.
*/
- if (lastend && (lastend-1) >> PAGE_CACHE_SHIFT == newfrag->ofs >> PAGE_CACHE_SHIFT) {
+ if (lastend && (lastend-1) >> PAGE_SHIFT == newfrag->ofs >> PAGE_SHIFT) {
if (this->node)
mark_ref_normal(this->node->raw);
mark_ref_normal(newfrag->node->raw);
@@ -382,7 +382,7 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
/* If we now share a page with other nodes, mark either previous
or next node REF_NORMAL, as appropriate. */
- if (newfrag->ofs & (PAGE_CACHE_SIZE-1)) {
+ if (newfrag->ofs & (PAGE_SIZE-1)) {
struct jffs2_node_frag *prev = frag_prev(newfrag);
mark_ref_normal(fn->raw);
@@ -391,7 +391,7 @@ int jffs2_add_full_dnode_to_inode(struct jffs2_sb_info *c, struct jffs2_inode_in
mark_ref_normal(prev->node->raw);
}
- if ((newfrag->ofs+newfrag->size) & (PAGE_CACHE_SIZE-1)) {
+ if ((newfrag->ofs+newfrag->size) & (PAGE_SIZE-1)) {
struct jffs2_node_frag *next = frag_next(newfrag);
if (next) {
diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c
index b6bd4affd..cda0774c2 100644
--- a/fs/jffs2/nodemgmt.c
+++ b/fs/jffs2/nodemgmt.c
@@ -846,8 +846,8 @@ int jffs2_thread_should_wake(struct jffs2_sb_info *c)
return 1;
if (c->unchecked_size) {
- jffs2_dbg(1, "jffs2_thread_should_wake(): unchecked_size %d, checked_ino #%d\n",
- c->unchecked_size, c->checked_ino);
+ jffs2_dbg(1, "jffs2_thread_should_wake(): unchecked_size %d, check_ino #%d\n",
+ c->unchecked_size, c->check_ino);
return 1;
}
diff --git a/fs/jffs2/wbuf.c b/fs/jffs2/wbuf.c
index 5a3da3f52..b25d28a21 100644
--- a/fs/jffs2/wbuf.c
+++ b/fs/jffs2/wbuf.c
@@ -1183,22 +1183,20 @@ void jffs2_dirty_trigger(struct jffs2_sb_info *c)
int jffs2_nand_flash_setup(struct jffs2_sb_info *c)
{
- struct nand_ecclayout *oinfo = c->mtd->ecclayout;
-
if (!c->mtd->oobsize)
return 0;
/* Cleanmarker is out-of-band, so inline size zero */
c->cleanmarker_size = 0;
- if (!oinfo || oinfo->oobavail == 0) {
+ if (c->mtd->oobavail == 0) {
pr_err("inconsistent device description\n");
return -EINVAL;
}
jffs2_dbg(1, "using OOB on NAND\n");
- c->oobavail = oinfo->oobavail;
+ c->oobavail = c->mtd->oobavail;
/* Initialise write buffer */
init_rwsem(&c->wbuf_sem);
diff --git a/fs/jffs2/write.c b/fs/jffs2/write.c
index b634de4c8..7fb187ab2 100644
--- a/fs/jffs2/write.c
+++ b/fs/jffs2/write.c
@@ -172,8 +172,8 @@ struct jffs2_full_dnode *jffs2_write_dnode(struct jffs2_sb_info *c, struct jffs2
beginning of a page and runs to the end of the file, or if
it's a hole node, mark it REF_PRISTINE, else REF_NORMAL.
*/
- if ((je32_to_cpu(ri->dsize) >= PAGE_CACHE_SIZE) ||
- ( ((je32_to_cpu(ri->offset)&(PAGE_CACHE_SIZE-1))==0) &&
+ if ((je32_to_cpu(ri->dsize) >= PAGE_SIZE) ||
+ ( ((je32_to_cpu(ri->offset)&(PAGE_SIZE-1))==0) &&
(je32_to_cpu(ri->dsize)+je32_to_cpu(ri->offset) == je32_to_cpu(ri->isize)))) {
flash_ofs |= REF_PRISTINE;
} else {
@@ -366,7 +366,8 @@ int jffs2_write_inode_range(struct jffs2_sb_info *c, struct jffs2_inode_info *f,
break;
}
mutex_lock(&f->sem);
- datalen = min_t(uint32_t, writelen, PAGE_CACHE_SIZE - (offset & (PAGE_CACHE_SIZE-1)));
+ datalen = min_t(uint32_t, writelen,
+ PAGE_SIZE - (offset & (PAGE_SIZE-1)));
cdatalen = min_t(uint32_t, alloclen - sizeof(*ri), datalen);
comprtype = jffs2_compress(c, f, buf, &comprbuf, &datalen, &cdatalen);
diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c
index a3eb316b1..b60e015cc 100644
--- a/fs/jfs/jfs_metapage.c
+++ b/fs/jfs/jfs_metapage.c
@@ -80,7 +80,7 @@ static inline void lock_metapage(struct metapage *mp)
static struct kmem_cache *metapage_cache;
static mempool_t *metapage_mempool;
-#define MPS_PER_PAGE (PAGE_CACHE_SIZE >> L2PSIZE)
+#define MPS_PER_PAGE (PAGE_SIZE >> L2PSIZE)
#if MPS_PER_PAGE > 1
@@ -316,7 +316,7 @@ static void last_write_complete(struct page *page)
struct metapage *mp;
unsigned int offset;
- for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+ for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
mp = page_to_mp(page, offset);
if (mp && test_bit(META_io, &mp->flag)) {
if (mp->lsn)
@@ -366,12 +366,12 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
int bad_blocks = 0;
page_start = (sector_t)page->index <<
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ (PAGE_SHIFT - inode->i_blkbits);
BUG_ON(!PageLocked(page));
BUG_ON(PageWriteback(page));
set_page_writeback(page);
- for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+ for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
mp = page_to_mp(page, offset);
if (!mp || !test_bit(META_dirty, &mp->flag))
@@ -416,7 +416,7 @@ static int metapage_writepage(struct page *page, struct writeback_control *wbc)
bio = NULL;
} else
inc_io(page);
- xlen = (PAGE_CACHE_SIZE - offset) >> inode->i_blkbits;
+ xlen = (PAGE_SIZE - offset) >> inode->i_blkbits;
pblock = metapage_get_blocks(inode, lblock, &xlen);
if (!pblock) {
printk(KERN_ERR "JFS: metapage_get_blocks failed\n");
@@ -485,7 +485,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
struct inode *inode = page->mapping->host;
struct bio *bio = NULL;
int block_offset;
- int blocks_per_page = PAGE_CACHE_SIZE >> inode->i_blkbits;
+ int blocks_per_page = PAGE_SIZE >> inode->i_blkbits;
sector_t page_start; /* address of page in fs blocks */
sector_t pblock;
int xlen;
@@ -494,7 +494,7 @@ static int metapage_readpage(struct file *fp, struct page *page)
BUG_ON(!PageLocked(page));
page_start = (sector_t)page->index <<
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ (PAGE_SHIFT - inode->i_blkbits);
block_offset = 0;
while (block_offset < blocks_per_page) {
@@ -542,7 +542,7 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
int ret = 1;
int offset;
- for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+ for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
mp = page_to_mp(page, offset);
if (!mp)
@@ -568,7 +568,7 @@ static int metapage_releasepage(struct page *page, gfp_t gfp_mask)
static void metapage_invalidatepage(struct page *page, unsigned int offset,
unsigned int length)
{
- BUG_ON(offset || length < PAGE_CACHE_SIZE);
+ BUG_ON(offset || length < PAGE_SIZE);
BUG_ON(PageWriteback(page));
@@ -599,10 +599,10 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
inode->i_ino, lblock, absolute);
l2bsize = inode->i_blkbits;
- l2BlocksPerPage = PAGE_CACHE_SHIFT - l2bsize;
+ l2BlocksPerPage = PAGE_SHIFT - l2bsize;
page_index = lblock >> l2BlocksPerPage;
page_offset = (lblock - (page_index << l2BlocksPerPage)) << l2bsize;
- if ((page_offset + size) > PAGE_CACHE_SIZE) {
+ if ((page_offset + size) > PAGE_SIZE) {
jfs_err("MetaData crosses page boundary!!");
jfs_err("lblock = %lx, size = %d", lblock, size);
dump_stack();
@@ -621,7 +621,7 @@ struct metapage *__get_metapage(struct inode *inode, unsigned long lblock,
mapping = inode->i_mapping;
}
- if (new && (PSIZE == PAGE_CACHE_SIZE)) {
+ if (new && (PSIZE == PAGE_SIZE)) {
page = grab_cache_page(mapping, page_index);
if (!page) {
jfs_err("grab_cache_page failed!");
@@ -693,7 +693,7 @@ unlock:
void grab_metapage(struct metapage * mp)
{
jfs_info("grab_metapage: mp = 0x%p", mp);
- page_cache_get(mp->page);
+ get_page(mp->page);
lock_page(mp->page);
mp->count++;
lock_metapage(mp);
@@ -706,12 +706,12 @@ void force_metapage(struct metapage *mp)
jfs_info("force_metapage: mp = 0x%p", mp);
set_bit(META_forcewrite, &mp->flag);
clear_bit(META_sync, &mp->flag);
- page_cache_get(page);
+ get_page(page);
lock_page(page);
set_page_dirty(page);
write_one_page(page, 1);
clear_bit(META_forcewrite, &mp->flag);
- page_cache_release(page);
+ put_page(page);
}
void hold_metapage(struct metapage *mp)
@@ -726,7 +726,7 @@ void put_metapage(struct metapage *mp)
unlock_page(mp->page);
return;
}
- page_cache_get(mp->page);
+ get_page(mp->page);
mp->count++;
lock_metapage(mp);
unlock_page(mp->page);
@@ -746,7 +746,7 @@ void release_metapage(struct metapage * mp)
assert(mp->count);
if (--mp->count || mp->nohomeok) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return;
}
@@ -764,13 +764,13 @@ void release_metapage(struct metapage * mp)
drop_metapage(page, mp);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
void __invalidate_metapages(struct inode *ip, s64 addr, int len)
{
sector_t lblock;
- int l2BlocksPerPage = PAGE_CACHE_SHIFT - ip->i_blkbits;
+ int l2BlocksPerPage = PAGE_SHIFT - ip->i_blkbits;
int BlocksPerPage = 1 << l2BlocksPerPage;
/* All callers are interested in block device's mapping */
struct address_space *mapping =
@@ -788,7 +788,7 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
page = find_lock_page(mapping, lblock >> l2BlocksPerPage);
if (!page)
continue;
- for (offset = 0; offset < PAGE_CACHE_SIZE; offset += PSIZE) {
+ for (offset = 0; offset < PAGE_SIZE; offset += PSIZE) {
mp = page_to_mp(page, offset);
if (!mp)
continue;
@@ -803,7 +803,7 @@ void __invalidate_metapages(struct inode *ip, s64 addr, int len)
remove_from_logsync(mp);
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
}
diff --git a/fs/jfs/jfs_metapage.h b/fs/jfs/jfs_metapage.h
index 337e9e51a..a869fb4a2 100644
--- a/fs/jfs/jfs_metapage.h
+++ b/fs/jfs/jfs_metapage.h
@@ -106,7 +106,7 @@ static inline void metapage_nohomeok(struct metapage *mp)
lock_page(page);
if (!mp->nohomeok++) {
mark_metapage_dirty(mp);
- page_cache_get(page);
+ get_page(page);
wait_on_page_writeback(page);
}
unlock_page(page);
@@ -128,7 +128,7 @@ static inline void metapage_wait_for_io(struct metapage *mp)
static inline void _metapage_homeok(struct metapage *mp)
{
if (!--mp->nohomeok)
- page_cache_release(mp->page);
+ put_page(mp->page);
}
static inline void metapage_homeok(struct metapage *mp)
diff --git a/fs/jfs/super.c b/fs/jfs/super.c
index 4f5d85ba8..78d599198 100644
--- a/fs/jfs/super.c
+++ b/fs/jfs/super.c
@@ -596,7 +596,7 @@ static int jfs_fill_super(struct super_block *sb, void *data, int silent)
* Page cache is indexed by long.
* I would use MAX_LFS_FILESIZE, but it's only half as big
*/
- sb->s_maxbytes = min(((u64) PAGE_CACHE_SIZE << 32) - 1,
+ sb->s_maxbytes = min(((u64) PAGE_SIZE << 32) - 1,
(u64)sb->s_maxbytes);
#endif
sb->s_time_gran = 1;
diff --git a/fs/kernfs/dir.c b/fs/kernfs/dir.c
index 996b7742c..37f9678ae 100644
--- a/fs/kernfs/dir.c
+++ b/fs/kernfs/dir.c
@@ -44,28 +44,122 @@ static int kernfs_name_locked(struct kernfs_node *kn, char *buf, size_t buflen)
return strlcpy(buf, kn->parent ? kn->name : "/", buflen);
}
-static char * __must_check kernfs_path_locked(struct kernfs_node *kn, char *buf,
- size_t buflen)
+/* kernfs_node_depth - compute depth from @from to @to */
+static size_t kernfs_depth(struct kernfs_node *from, struct kernfs_node *to)
{
- char *p = buf + buflen;
- int len;
+ size_t depth = 0;
- *--p = '\0';
+ while (to->parent && to != from) {
+ depth++;
+ to = to->parent;
+ }
+ return depth;
+}
- do {
- len = strlen(kn->name);
- if (p - buf < len + 1) {
- buf[0] = '\0';
- p = NULL;
- break;
- }
- p -= len;
- memcpy(p, kn->name, len);
- *--p = '/';
- kn = kn->parent;
- } while (kn && kn->parent);
+static struct kernfs_node *kernfs_common_ancestor(struct kernfs_node *a,
+ struct kernfs_node *b)
+{
+ size_t da, db;
+ struct kernfs_root *ra = kernfs_root(a), *rb = kernfs_root(b);
+
+ if (ra != rb)
+ return NULL;
+
+ da = kernfs_depth(ra->kn, a);
+ db = kernfs_depth(rb->kn, b);
+
+ while (da > db) {
+ a = a->parent;
+ da--;
+ }
+ while (db > da) {
+ b = b->parent;
+ db--;
+ }
+
+ /* worst case b and a will be the same at root */
+ while (b != a) {
+ b = b->parent;
+ a = a->parent;
+ }
+
+ return a;
+}
+
+/**
+ * kernfs_path_from_node_locked - find a pseudo-absolute path to @kn_to,
+ * where kn_from is treated as root of the path.
+ * @kn_from: kernfs node which should be treated as root for the path
+ * @kn_to: kernfs node to which path is needed
+ * @buf: buffer to copy the path into
+ * @buflen: size of @buf
+ *
+ * We need to handle couple of scenarios here:
+ * [1] when @kn_from is an ancestor of @kn_to at some level
+ * kn_from: /n1/n2/n3
+ * kn_to: /n1/n2/n3/n4/n5
+ * result: /n4/n5
+ *
+ * [2] when @kn_from is on a different hierarchy and we need to find common
+ * ancestor between @kn_from and @kn_to.
+ * kn_from: /n1/n2/n3/n4
+ * kn_to: /n1/n2/n5
+ * result: /../../n5
+ * OR
+ * kn_from: /n1/n2/n3/n4/n5 [depth=5]
+ * kn_to: /n1/n2/n3 [depth=3]
+ * result: /../..
+ *
+ * return value: length of the string. If greater than buflen,
+ * then contents of buf are undefined. On error, -1 is returned.
+ */
+static int kernfs_path_from_node_locked(struct kernfs_node *kn_to,
+ struct kernfs_node *kn_from,
+ char *buf, size_t buflen)
+{
+ struct kernfs_node *kn, *common;
+ const char parent_str[] = "/..";
+ size_t depth_from, depth_to, len = 0, nlen = 0;
+ char *p;
+ int i;
+
+ if (!kn_from)
+ kn_from = kernfs_root(kn_to)->kn;
+
+ if (kn_from == kn_to)
+ return strlcpy(buf, "/", buflen);
+
+ common = kernfs_common_ancestor(kn_from, kn_to);
+ if (WARN_ON(!common))
+ return -1;
+
+ depth_to = kernfs_depth(common, kn_to);
+ depth_from = kernfs_depth(common, kn_from);
+
+ if (buf)
+ buf[0] = '\0';
+
+ for (i = 0; i < depth_from; i++)
+ len += strlcpy(buf + len, parent_str,
+ len < buflen ? buflen - len : 0);
+
+ /* Calculate how many bytes we need for the rest */
+ for (kn = kn_to; kn != common; kn = kn->parent)
+ nlen += strlen(kn->name) + 1;
+
+ if (len + nlen >= buflen)
+ return len + nlen;
+
+ p = buf + len + nlen;
+ *p = '\0';
+ for (kn = kn_to; kn != common; kn = kn->parent) {
+ size_t tmp = strlen(kn->name);
+ p -= tmp;
+ memcpy(p, kn->name, tmp);
+ *(--p) = '/';
+ }
- return p;
+ return len + nlen;
}
/**
@@ -115,6 +209,34 @@ size_t kernfs_path_len(struct kernfs_node *kn)
}
/**
+ * kernfs_path_from_node - build path of node @to relative to @from.
+ * @from: parent kernfs_node relative to which we need to build the path
+ * @to: kernfs_node of interest
+ * @buf: buffer to copy @to's path into
+ * @buflen: size of @buf
+ *
+ * Builds @to's path relative to @from in @buf. @from and @to must
+ * be on the same kernfs-root. If @from is not parent of @to, then a relative
+ * path (which includes '..'s) as needed to reach from @from to @to is
+ * returned.
+ *
+ * If @buf isn't long enough, the return value will be greater than @buflen
+ * and @buf contents are undefined.
+ */
+int kernfs_path_from_node(struct kernfs_node *to, struct kernfs_node *from,
+ char *buf, size_t buflen)
+{
+ unsigned long flags;
+ int ret;
+
+ spin_lock_irqsave(&kernfs_rename_lock, flags);
+ ret = kernfs_path_from_node_locked(to, from, buf, buflen);
+ spin_unlock_irqrestore(&kernfs_rename_lock, flags);
+ return ret;
+}
+EXPORT_SYMBOL_GPL(kernfs_path_from_node);
+
+/**
* kernfs_path - build full path of a given node
* @kn: kernfs_node of interest
* @buf: buffer to copy @kn's name into
@@ -127,13 +249,12 @@ size_t kernfs_path_len(struct kernfs_node *kn)
*/
char *kernfs_path(struct kernfs_node *kn, char *buf, size_t buflen)
{
- unsigned long flags;
- char *p;
+ int ret;
- spin_lock_irqsave(&kernfs_rename_lock, flags);
- p = kernfs_path_locked(kn, buf, buflen);
- spin_unlock_irqrestore(&kernfs_rename_lock, flags);
- return p;
+ ret = kernfs_path_from_node(kn, NULL, buf, buflen);
+ if (ret < 0 || ret >= buflen)
+ return NULL;
+ return buf;
}
EXPORT_SYMBOL_GPL(kernfs_path);
@@ -164,17 +285,25 @@ void pr_cont_kernfs_name(struct kernfs_node *kn)
void pr_cont_kernfs_path(struct kernfs_node *kn)
{
unsigned long flags;
- char *p;
+ int sz;
spin_lock_irqsave(&kernfs_rename_lock, flags);
- p = kernfs_path_locked(kn, kernfs_pr_cont_buf,
- sizeof(kernfs_pr_cont_buf));
- if (p)
- pr_cont("%s", p);
- else
- pr_cont("<name too long>");
+ sz = kernfs_path_from_node_locked(kn, NULL, kernfs_pr_cont_buf,
+ sizeof(kernfs_pr_cont_buf));
+ if (sz < 0) {
+ pr_cont("(error)");
+ goto out;
+ }
+
+ if (sz >= sizeof(kernfs_pr_cont_buf)) {
+ pr_cont("(name too long)");
+ goto out;
+ }
+ pr_cont("%s", kernfs_pr_cont_buf);
+
+out:
spin_unlock_irqrestore(&kernfs_rename_lock, flags);
}
@@ -691,15 +820,22 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
const unsigned char *path,
const void *ns)
{
- static char path_buf[PATH_MAX]; /* protected by kernfs_mutex */
- size_t len = strlcpy(path_buf, path, PATH_MAX);
- char *p = path_buf;
- char *name;
+ size_t len;
+ char *p, *name;
lockdep_assert_held(&kernfs_mutex);
- if (len >= PATH_MAX)
+ /* grab kernfs_rename_lock to piggy back on kernfs_pr_cont_buf */
+ spin_lock_irq(&kernfs_rename_lock);
+
+ len = strlcpy(kernfs_pr_cont_buf, path, sizeof(kernfs_pr_cont_buf));
+
+ if (len >= sizeof(kernfs_pr_cont_buf)) {
+ spin_unlock_irq(&kernfs_rename_lock);
return NULL;
+ }
+
+ p = kernfs_pr_cont_buf;
while ((name = strsep(&p, "/")) && parent) {
if (*name == '\0')
@@ -707,6 +843,8 @@ static struct kernfs_node *kernfs_walk_ns(struct kernfs_node *parent,
parent = kernfs_find_ns(parent, name, ns);
}
+ spin_unlock_irq(&kernfs_rename_lock);
+
return parent;
}
diff --git a/fs/kernfs/mount.c b/fs/kernfs/mount.c
index 8eaf41718..3d670a367 100644
--- a/fs/kernfs/mount.c
+++ b/fs/kernfs/mount.c
@@ -14,6 +14,8 @@
#include <linux/magic.h>
#include <linux/slab.h>
#include <linux/pagemap.h>
+#include <linux/namei.h>
+#include <linux/seq_file.h>
#include "kernfs-internal.h"
@@ -39,6 +41,19 @@ static int kernfs_sop_show_options(struct seq_file *sf, struct dentry *dentry)
return 0;
}
+static int kernfs_sop_show_path(struct seq_file *sf, struct dentry *dentry)
+{
+ struct kernfs_node *node = dentry->d_fsdata;
+ struct kernfs_root *root = kernfs_root(node);
+ struct kernfs_syscall_ops *scops = root->syscall_ops;
+
+ if (scops && scops->show_path)
+ return scops->show_path(sf, node, root);
+
+ seq_dentry(sf, dentry, " \t\n\\");
+ return 0;
+}
+
const struct super_operations kernfs_sops = {
.statfs = simple_statfs,
.drop_inode = generic_delete_inode,
@@ -46,6 +61,7 @@ const struct super_operations kernfs_sops = {
.remount_fs = kernfs_sop_remount_fs,
.show_options = kernfs_sop_show_options,
+ .show_path = kernfs_sop_show_path,
};
/**
@@ -62,6 +78,74 @@ struct kernfs_root *kernfs_root_from_sb(struct super_block *sb)
return NULL;
}
+/*
+ * find the next ancestor in the path down to @child, where @parent was the
+ * ancestor whose descendant we want to find.
+ *
+ * Say the path is /a/b/c/d. @child is d, @parent is NULL. We return the root
+ * node. If @parent is b, then we return the node for c.
+ * Passing in d as @parent is not ok.
+ */
+static struct kernfs_node *find_next_ancestor(struct kernfs_node *child,
+ struct kernfs_node *parent)
+{
+ if (child == parent) {
+ pr_crit_once("BUG in find_next_ancestor: called with parent == child");
+ return NULL;
+ }
+
+ while (child->parent != parent) {
+ if (!child->parent)
+ return NULL;
+ child = child->parent;
+ }
+
+ return child;
+}
+
+/**
+ * kernfs_node_dentry - get a dentry for the given kernfs_node
+ * @kn: kernfs_node for which a dentry is needed
+ * @sb: the kernfs super_block
+ */
+struct dentry *kernfs_node_dentry(struct kernfs_node *kn,
+ struct super_block *sb)
+{
+ struct dentry *dentry;
+ struct kernfs_node *knparent = NULL;
+
+ BUG_ON(sb->s_op != &kernfs_sops);
+
+ dentry = dget(sb->s_root);
+
+ /* Check if this is the root kernfs_node */
+ if (!kn->parent)
+ return dentry;
+
+ knparent = find_next_ancestor(kn, NULL);
+ if (WARN_ON(!knparent))
+ return ERR_PTR(-EINVAL);
+
+ do {
+ struct dentry *dtmp;
+ struct kernfs_node *kntmp;
+
+ if (kn == knparent)
+ return dentry;
+ kntmp = find_next_ancestor(kn, knparent);
+ if (WARN_ON(!kntmp))
+ return ERR_PTR(-EINVAL);
+ mutex_lock(&d_inode(dentry)->i_mutex);
+ dtmp = lookup_one_len(kntmp->name, dentry, strlen(kntmp->name));
+ mutex_unlock(&d_inode(dentry)->i_mutex);
+ dput(dentry);
+ if (IS_ERR(dtmp))
+ return dtmp;
+ knparent = kntmp;
+ dentry = dtmp;
+ } while (true);
+}
+
static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
{
struct kernfs_super_info *info = kernfs_info(sb);
@@ -69,8 +153,8 @@ static int kernfs_fill_super(struct super_block *sb, unsigned long magic)
struct dentry *root;
info->sb = sb;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = magic;
sb->s_op = &kernfs_sops;
sb->s_time_gran = 1;
diff --git a/fs/libfs.c b/fs/libfs.c
index 0ca80b2af..f3fa82ce9 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -25,7 +25,7 @@ int simple_getattr(struct vfsmount *mnt, struct dentry *dentry,
{
struct inode *inode = d_inode(dentry);
generic_fillattr(inode, stat);
- stat->blocks = inode->i_mapping->nrpages << (PAGE_CACHE_SHIFT - 9);
+ stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9);
return 0;
}
EXPORT_SYMBOL(simple_getattr);
@@ -33,7 +33,7 @@ EXPORT_SYMBOL(simple_getattr);
int simple_statfs(struct dentry *dentry, struct kstatfs *buf)
{
buf->f_type = dentry->d_sb->s_magic;
- buf->f_bsize = PAGE_CACHE_SIZE;
+ buf->f_bsize = PAGE_SIZE;
buf->f_namelen = NAME_MAX;
return 0;
}
@@ -395,7 +395,7 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
struct page *page;
pgoff_t index;
- index = pos >> PAGE_CACHE_SHIFT;
+ index = pos >> PAGE_SHIFT;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
@@ -403,10 +403,10 @@ int simple_write_begin(struct file *file, struct address_space *mapping,
*pagep = page;
- if (!PageUptodate(page) && (len != PAGE_CACHE_SIZE)) {
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ if (!PageUptodate(page) && (len != PAGE_SIZE)) {
+ unsigned from = pos & (PAGE_SIZE - 1);
- zero_user_segments(page, 0, from, from + len, PAGE_CACHE_SIZE);
+ zero_user_segments(page, 0, from, from + len, PAGE_SIZE);
}
return 0;
}
@@ -442,7 +442,7 @@ int simple_write_end(struct file *file, struct address_space *mapping,
/* zero the stale part of the page if we did a short copy */
if (copied < len) {
- unsigned from = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned from = pos & (PAGE_SIZE - 1);
zero_user(page, from + copied, len - copied);
}
@@ -458,7 +458,7 @@ int simple_write_end(struct file *file, struct address_space *mapping,
set_page_dirty(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return copied;
}
@@ -477,8 +477,8 @@ int simple_fill_super(struct super_block *s, unsigned long magic,
struct dentry *dentry;
int i;
- s->s_blocksize = PAGE_CACHE_SIZE;
- s->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ s->s_blocksize = PAGE_SIZE;
+ s->s_blocksize_bits = PAGE_SHIFT;
s->s_magic = magic;
s->s_op = &simple_super_operations;
s->s_time_gran = 1;
@@ -994,12 +994,12 @@ int generic_check_addressable(unsigned blocksize_bits, u64 num_blocks)
{
u64 last_fs_block = num_blocks - 1;
u64 last_fs_page =
- last_fs_block >> (PAGE_CACHE_SHIFT - blocksize_bits);
+ last_fs_block >> (PAGE_SHIFT - blocksize_bits);
if (unlikely(num_blocks == 0))
return 0;
- if ((blocksize_bits < 9) || (blocksize_bits > PAGE_CACHE_SHIFT))
+ if ((blocksize_bits < 9) || (blocksize_bits > PAGE_SHIFT))
return -EINVAL;
if ((last_fs_block > (sector_t)(~0ULL) >> (blocksize_bits - 9)) ||
diff --git a/fs/logfs/dev_bdev.c b/fs/logfs/dev_bdev.c
index a709d80c8..cc26f8f21 100644
--- a/fs/logfs/dev_bdev.c
+++ b/fs/logfs/dev_bdev.c
@@ -64,7 +64,7 @@ static void writeseg_end_io(struct bio *bio)
bio_for_each_segment_all(bvec, bio, i) {
end_page_writeback(bvec->bv_page);
- page_cache_release(bvec->bv_page);
+ put_page(bvec->bv_page);
}
bio_put(bio);
if (atomic_dec_and_test(&super->s_pending_writes))
diff --git a/fs/logfs/dev_mtd.c b/fs/logfs/dev_mtd.c
index 9c5014494..b76a62b19 100644
--- a/fs/logfs/dev_mtd.c
+++ b/fs/logfs/dev_mtd.c
@@ -46,9 +46,9 @@ static int loffs_mtd_write(struct super_block *sb, loff_t ofs, size_t len,
BUG_ON((ofs >= mtd->size) || (len > mtd->size - ofs));
BUG_ON(ofs != (ofs >> super->s_writeshift) << super->s_writeshift);
- BUG_ON(len > PAGE_CACHE_SIZE);
- page_start = ofs & PAGE_CACHE_MASK;
- page_end = PAGE_CACHE_ALIGN(ofs + len) - 1;
+ BUG_ON(len > PAGE_SIZE);
+ page_start = ofs & PAGE_MASK;
+ page_end = PAGE_ALIGN(ofs + len) - 1;
ret = mtd_write(mtd, ofs, len, &retlen, buf);
if (ret || (retlen != len))
return -EIO;
@@ -82,7 +82,7 @@ static int logfs_mtd_erase_mapping(struct super_block *sb, loff_t ofs,
if (!page)
continue;
memset(page_address(page), 0xFF, PAGE_SIZE);
- page_cache_release(page);
+ put_page(page);
}
return 0;
}
@@ -195,7 +195,7 @@ static int __logfs_mtd_writeseg(struct super_block *sb, u64 ofs, pgoff_t index,
err = loffs_mtd_write(sb, page->index << PAGE_SHIFT, PAGE_SIZE,
page_address(page));
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (err)
return err;
}
diff --git a/fs/logfs/dir.c b/fs/logfs/dir.c
index 542468e9b..ddbed2be5 100644
--- a/fs/logfs/dir.c
+++ b/fs/logfs/dir.c
@@ -183,7 +183,7 @@ static struct page *logfs_get_dd_page(struct inode *dir, struct dentry *dentry)
if (name->len != be16_to_cpu(dd->namelen) ||
memcmp(name->name, dd->name, name->len)) {
kunmap_atomic(dd);
- page_cache_release(page);
+ put_page(page);
continue;
}
@@ -238,7 +238,7 @@ static int logfs_unlink(struct inode *dir, struct dentry *dentry)
return PTR_ERR(page);
}
index = page->index;
- page_cache_release(page);
+ put_page(page);
mutex_lock(&super->s_dirop_mutex);
logfs_add_transaction(dir, ta);
@@ -316,7 +316,7 @@ static int logfs_readdir(struct file *file, struct dir_context *ctx)
be16_to_cpu(dd->namelen),
be64_to_cpu(dd->ino), dd->type);
kunmap(page);
- page_cache_release(page);
+ put_page(page);
if (full)
break;
}
@@ -349,7 +349,7 @@ static struct dentry *logfs_lookup(struct inode *dir, struct dentry *dentry,
dd = kmap_atomic(page);
ino = be64_to_cpu(dd->ino);
kunmap_atomic(dd);
- page_cache_release(page);
+ put_page(page);
inode = logfs_iget(dir->i_sb, ino);
if (IS_ERR(inode))
@@ -392,7 +392,7 @@ static int logfs_write_dir(struct inode *dir, struct dentry *dentry,
err = logfs_write_buf(dir, page, WF_LOCK);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (!err)
grow_dir(dir, index);
return err;
@@ -561,7 +561,7 @@ static int logfs_get_dd(struct inode *dir, struct dentry *dentry,
map = kmap_atomic(page);
memcpy(dd, map, sizeof(*dd));
kunmap_atomic(map);
- page_cache_release(page);
+ put_page(page);
return 0;
}
diff --git a/fs/logfs/file.c b/fs/logfs/file.c
index 61eaeb1b6..f01ddfb1a 100644
--- a/fs/logfs/file.c
+++ b/fs/logfs/file.c
@@ -15,21 +15,21 @@ static int logfs_write_begin(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct page *page;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
*pagep = page;
- if ((len == PAGE_CACHE_SIZE) || PageUptodate(page))
+ if ((len == PAGE_SIZE) || PageUptodate(page))
return 0;
- if ((pos & PAGE_CACHE_MASK) >= i_size_read(inode)) {
- unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+ if ((pos & PAGE_MASK) >= i_size_read(inode)) {
+ unsigned start = pos & (PAGE_SIZE - 1);
unsigned end = start + len;
/* Reading beyond i_size is simple: memset to zero */
- zero_user_segments(page, 0, start, end, PAGE_CACHE_SIZE);
+ zero_user_segments(page, 0, start, end, PAGE_SIZE);
return 0;
}
return logfs_readpage_nolock(page);
@@ -41,11 +41,11 @@ static int logfs_write_end(struct file *file, struct address_space *mapping,
{
struct inode *inode = mapping->host;
pgoff_t index = page->index;
- unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned start = pos & (PAGE_SIZE - 1);
unsigned end = start + copied;
int ret = 0;
- BUG_ON(PAGE_CACHE_SIZE != inode->i_sb->s_blocksize);
+ BUG_ON(PAGE_SIZE != inode->i_sb->s_blocksize);
BUG_ON(page->index > I3_BLOCKS);
if (copied < len) {
@@ -61,8 +61,8 @@ static int logfs_write_end(struct file *file, struct address_space *mapping,
if (copied == 0)
goto out; /* FIXME: do we need to update inode? */
- if (i_size_read(inode) < (index << PAGE_CACHE_SHIFT) + end) {
- i_size_write(inode, (index << PAGE_CACHE_SHIFT) + end);
+ if (i_size_read(inode) < (index << PAGE_SHIFT) + end) {
+ i_size_write(inode, (index << PAGE_SHIFT) + end);
mark_inode_dirty_sync(inode);
}
@@ -75,7 +75,7 @@ static int logfs_write_end(struct file *file, struct address_space *mapping,
}
out:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return ret ? ret : copied;
}
@@ -118,7 +118,7 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
loff_t i_size = i_size_read(inode);
- pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
+ pgoff_t end_index = i_size >> PAGE_SHIFT;
unsigned offset;
u64 bix;
level_t level;
@@ -142,7 +142,7 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc)
return __logfs_writepage(page);
/* Is the page fully outside i_size? (truncate in progress) */
- offset = i_size & (PAGE_CACHE_SIZE-1);
+ offset = i_size & (PAGE_SIZE-1);
if (bix > end_index || offset == 0) {
unlock_page(page);
return 0; /* don't care */
@@ -155,7 +155,7 @@ static int logfs_writepage(struct page *page, struct writeback_control *wbc)
* the page size, the remaining memory is zeroed when mapped, and
* writes to that region are not written out to the file."
*/
- zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+ zero_user_segment(page, offset, PAGE_SIZE);
return __logfs_writepage(page);
}
diff --git a/fs/logfs/readwrite.c b/fs/logfs/readwrite.c
index 20973c9e5..3fb8c6d67 100644
--- a/fs/logfs/readwrite.c
+++ b/fs/logfs/readwrite.c
@@ -281,7 +281,7 @@ static struct page *logfs_get_read_page(struct inode *inode, u64 bix,
static void logfs_put_read_page(struct page *page)
{
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
static void logfs_lock_write_page(struct page *page)
@@ -323,7 +323,7 @@ repeat:
return NULL;
err = add_to_page_cache_lru(page, mapping, index, GFP_NOFS);
if (unlikely(err)) {
- page_cache_release(page);
+ put_page(page);
if (err == -EEXIST)
goto repeat;
return NULL;
@@ -342,7 +342,7 @@ static void logfs_unlock_write_page(struct page *page)
static void logfs_put_write_page(struct page *page)
{
logfs_unlock_write_page(page);
- page_cache_release(page);
+ put_page(page);
}
static struct page *logfs_get_page(struct inode *inode, u64 bix, level_t level,
@@ -562,7 +562,7 @@ static void indirect_free_block(struct super_block *sb,
if (PagePrivate(page)) {
ClearPagePrivate(page);
- page_cache_release(page);
+ put_page(page);
set_page_private(page, 0);
}
__free_block(sb, block);
@@ -655,7 +655,7 @@ static void alloc_data_block(struct inode *inode, struct page *page)
block->page = page;
SetPagePrivate(page);
- page_cache_get(page);
+ get_page(page);
set_page_private(page, (unsigned long) block);
block->ops = &indirect_block_ops;
@@ -709,7 +709,7 @@ static u64 block_get_pointer(struct page *page, int index)
static int logfs_read_empty(struct page *page)
{
- zero_user_segment(page, 0, PAGE_CACHE_SIZE);
+ zero_user_segment(page, 0, PAGE_SIZE);
return 0;
}
@@ -1660,7 +1660,7 @@ static int truncate_data_block(struct inode *inode, struct page *page,
if (err)
return err;
- zero_user_segment(page, size - pageofs, PAGE_CACHE_SIZE);
+ zero_user_segment(page, size - pageofs, PAGE_SIZE);
return logfs_segment_write(inode, page, shadow);
}
@@ -1919,7 +1919,7 @@ static void move_page_to_inode(struct inode *inode, struct page *page)
block->page = NULL;
if (PagePrivate(page)) {
ClearPagePrivate(page);
- page_cache_release(page);
+ put_page(page);
set_page_private(page, 0);
}
}
@@ -1940,7 +1940,7 @@ static void move_inode_to_page(struct page *page, struct inode *inode)
if (!PagePrivate(page)) {
SetPagePrivate(page);
- page_cache_get(page);
+ get_page(page);
set_page_private(page, (unsigned long) block);
}
@@ -1971,7 +1971,7 @@ int logfs_read_inode(struct inode *inode)
logfs_disk_to_inode(di, inode);
kunmap_atomic(di);
move_page_to_inode(inode, page);
- page_cache_release(page);
+ put_page(page);
return 0;
}
diff --git a/fs/logfs/segment.c b/fs/logfs/segment.c
index d270e4b2a..1efd6055f 100644
--- a/fs/logfs/segment.c
+++ b/fs/logfs/segment.c
@@ -90,9 +90,9 @@ int __logfs_buf_write(struct logfs_area *area, u64 ofs, void *buf, size_t len,
if (!PagePrivate(page)) {
SetPagePrivate(page);
- page_cache_get(page);
+ get_page(page);
}
- page_cache_release(page);
+ put_page(page);
buf += copylen;
len -= copylen;
@@ -117,9 +117,9 @@ static void pad_partial_page(struct logfs_area *area)
memset(page_address(page) + offset, 0xff, len);
if (!PagePrivate(page)) {
SetPagePrivate(page);
- page_cache_get(page);
+ get_page(page);
}
- page_cache_release(page);
+ put_page(page);
}
}
@@ -129,20 +129,20 @@ static void pad_full_pages(struct logfs_area *area)
struct logfs_super *super = logfs_super(sb);
u64 ofs = dev_ofs(sb, area->a_segno, area->a_used_bytes);
u32 len = super->s_segsize - area->a_used_bytes;
- pgoff_t index = PAGE_CACHE_ALIGN(ofs) >> PAGE_CACHE_SHIFT;
- pgoff_t no_indizes = len >> PAGE_CACHE_SHIFT;
+ pgoff_t index = PAGE_ALIGN(ofs) >> PAGE_SHIFT;
+ pgoff_t no_indizes = len >> PAGE_SHIFT;
struct page *page;
while (no_indizes) {
page = get_mapping_page(sb, index, 0);
BUG_ON(!page); /* FIXME: reserve a pool */
SetPageUptodate(page);
- memset(page_address(page), 0xff, PAGE_CACHE_SIZE);
+ memset(page_address(page), 0xff, PAGE_SIZE);
if (!PagePrivate(page)) {
SetPagePrivate(page);
- page_cache_get(page);
+ get_page(page);
}
- page_cache_release(page);
+ put_page(page);
index++;
no_indizes--;
}
@@ -411,7 +411,7 @@ int wbuf_read(struct super_block *sb, u64 ofs, size_t len, void *buf)
if (IS_ERR(page))
return PTR_ERR(page);
memcpy(buf, page_address(page) + offset, copylen);
- page_cache_release(page);
+ put_page(page);
buf += copylen;
len -= copylen;
@@ -499,7 +499,7 @@ static void move_btree_to_page(struct inode *inode, struct page *page,
if (!PagePrivate(page)) {
SetPagePrivate(page);
- page_cache_get(page);
+ get_page(page);
set_page_private(page, (unsigned long) block);
}
block->ops = &indirect_block_ops;
@@ -554,7 +554,7 @@ void move_page_to_btree(struct page *page)
if (PagePrivate(page)) {
ClearPagePrivate(page);
- page_cache_release(page);
+ put_page(page);
set_page_private(page, 0);
}
block->ops = &btree_block_ops;
@@ -723,9 +723,9 @@ void freeseg(struct super_block *sb, u32 segno)
continue;
if (PagePrivate(page)) {
ClearPagePrivate(page);
- page_cache_release(page);
+ put_page(page);
}
- page_cache_release(page);
+ put_page(page);
}
}
diff --git a/fs/logfs/super.c b/fs/logfs/super.c
index 54360293b..5751082db 100644
--- a/fs/logfs/super.c
+++ b/fs/logfs/super.c
@@ -48,7 +48,7 @@ void emergency_read_end(struct page *page)
if (page == emergency_page)
mutex_unlock(&emergency_mutex);
else
- page_cache_release(page);
+ put_page(page);
}
static void dump_segfile(struct super_block *sb)
@@ -206,7 +206,7 @@ static int write_one_sb(struct super_block *sb,
logfs_set_segment_erased(sb, segno, ec, 0);
logfs_write_ds(sb, ds, segno, ec);
err = super->s_devops->write_sb(sb, page);
- page_cache_release(page);
+ put_page(page);
return err;
}
@@ -366,24 +366,24 @@ static struct page *find_super_block(struct super_block *sb)
return NULL;
last = super->s_devops->find_last_sb(sb, &super->s_sb_ofs[1]);
if (!last || IS_ERR(last)) {
- page_cache_release(first);
+ put_page(first);
return NULL;
}
if (!logfs_check_ds(page_address(first))) {
- page_cache_release(last);
+ put_page(last);
return first;
}
/* First one didn't work, try the second superblock */
if (!logfs_check_ds(page_address(last))) {
- page_cache_release(first);
+ put_page(first);
return last;
}
/* Neither worked, sorry folks */
- page_cache_release(first);
- page_cache_release(last);
+ put_page(first);
+ put_page(last);
return NULL;
}
@@ -425,7 +425,7 @@ static int __logfs_read_sb(struct super_block *sb)
super->s_data_levels = ds->ds_data_levels;
super->s_total_levels = super->s_ifile_levels + super->s_iblock_levels
+ super->s_data_levels;
- page_cache_release(page);
+ put_page(page);
return 0;
}
diff --git a/fs/mbcache.c b/fs/mbcache.c
index 187477ded..eccda3a02 100644
--- a/fs/mbcache.c
+++ b/fs/mbcache.c
@@ -1,858 +1,433 @@
-/*
- * linux/fs/mbcache.c
- * (C) 2001-2002 Andreas Gruenbacher, <a.gruenbacher@computer.org>
- */
-
-/*
- * Filesystem Meta Information Block Cache (mbcache)
- *
- * The mbcache caches blocks of block devices that need to be located
- * by their device/block number, as well as by other criteria (such
- * as the block's contents).
- *
- * There can only be one cache entry in a cache per device and block number.
- * Additional indexes need not be unique in this sense. The number of
- * additional indexes (=other criteria) can be hardwired at compile time
- * or specified at cache create time.
- *
- * Each cache entry is of fixed size. An entry may be `valid' or `invalid'
- * in the cache. A valid entry is in the main hash tables of the cache,
- * and may also be in the lru list. An invalid entry is not in any hashes
- * or lists.
- *
- * A valid cache entry is only in the lru list if no handles refer to it.
- * Invalid cache entries will be freed when the last handle to the cache
- * entry is released. Entries that cannot be freed immediately are put
- * back on the lru list.
- */
-
-/*
- * Lock descriptions and usage:
- *
- * Each hash chain of both the block and index hash tables now contains
- * a built-in lock used to serialize accesses to the hash chain.
- *
- * Accesses to global data structures mb_cache_list and mb_cache_lru_list
- * are serialized via the global spinlock mb_cache_spinlock.
- *
- * Each mb_cache_entry contains a spinlock, e_entry_lock, to serialize
- * accesses to its local data, such as e_used and e_queued.
- *
- * Lock ordering:
- *
- * Each block hash chain's lock has the highest lock order, followed by an
- * index hash chain's lock, mb_cache_bg_lock (used to implement mb_cache_entry's
- * lock), and mb_cach_spinlock, with the lowest order. While holding
- * either a block or index hash chain lock, a thread can acquire an
- * mc_cache_bg_lock, which in turn can also acquire mb_cache_spinlock.
- *
- * Synchronization:
- *
- * Since both mb_cache_entry_get and mb_cache_entry_find scan the block and
- * index hash chian, it needs to lock the corresponding hash chain. For each
- * mb_cache_entry within the chain, it needs to lock the mb_cache_entry to
- * prevent either any simultaneous release or free on the entry and also
- * to serialize accesses to either the e_used or e_queued member of the entry.
- *
- * To avoid having a dangling reference to an already freed
- * mb_cache_entry, an mb_cache_entry is only freed when it is not on a
- * block hash chain and also no longer being referenced, both e_used,
- * and e_queued are 0's. When an mb_cache_entry is explicitly freed it is
- * first removed from a block hash chain.
- */
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-
-#include <linux/hash.h>
-#include <linux/fs.h>
-#include <linux/mm.h>
+#include <linux/spinlock.h>
#include <linux/slab.h>
-#include <linux/sched.h>
+#include <linux/list.h>
#include <linux/list_bl.h>
+#include <linux/module.h>
+#include <linux/sched.h>
+#include <linux/workqueue.h>
#include <linux/mbcache.h>
-#include <linux/init.h>
-#include <linux/blockgroup_lock.h>
-#include <linux/log2.h>
-
-#ifdef MB_CACHE_DEBUG
-# define mb_debug(f...) do { \
- printk(KERN_DEBUG f); \
- printk("\n"); \
- } while (0)
-#define mb_assert(c) do { if (!(c)) \
- printk(KERN_ERR "assertion " #c " failed\n"); \
- } while(0)
-#else
-# define mb_debug(f...) do { } while(0)
-# define mb_assert(c) do { } while(0)
-#endif
-#define mb_error(f...) do { \
- printk(KERN_ERR f); \
- printk("\n"); \
- } while(0)
-
-#define MB_CACHE_WRITER ((unsigned short)~0U >> 1)
-
-#define MB_CACHE_ENTRY_LOCK_BITS ilog2(NR_BG_LOCKS)
-#define MB_CACHE_ENTRY_LOCK_INDEX(ce) \
- (hash_long((unsigned long)ce, MB_CACHE_ENTRY_LOCK_BITS))
-
-static DECLARE_WAIT_QUEUE_HEAD(mb_cache_queue);
-static struct blockgroup_lock *mb_cache_bg_lock;
-static struct kmem_cache *mb_cache_kmem_cache;
-
-MODULE_AUTHOR("Andreas Gruenbacher <a.gruenbacher@computer.org>");
-MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
-MODULE_LICENSE("GPL");
-
-EXPORT_SYMBOL(mb_cache_create);
-EXPORT_SYMBOL(mb_cache_shrink);
-EXPORT_SYMBOL(mb_cache_destroy);
-EXPORT_SYMBOL(mb_cache_entry_alloc);
-EXPORT_SYMBOL(mb_cache_entry_insert);
-EXPORT_SYMBOL(mb_cache_entry_release);
-EXPORT_SYMBOL(mb_cache_entry_free);
-EXPORT_SYMBOL(mb_cache_entry_get);
-#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
-EXPORT_SYMBOL(mb_cache_entry_find_first);
-EXPORT_SYMBOL(mb_cache_entry_find_next);
-#endif
/*
- * Global data: list of all mbcache's, lru list, and a spinlock for
- * accessing cache data structures on SMP machines. The lru list is
- * global across all mbcaches.
+ * Mbcache is a simple key-value store. Keys need not be unique, however
+ * key-value pairs are expected to be unique (we use this fact in
+ * mb_cache_entry_delete_block()).
+ *
+ * Ext2 and ext4 use this cache for deduplication of extended attribute blocks.
+ * They use hash of a block contents as a key and block number as a value.
+ * That's why keys need not be unique (different xattr blocks may end up having
+ * the same hash). However block number always uniquely identifies a cache
+ * entry.
+ *
+ * We provide functions for creation and removal of entries, search by key,
+ * and a special "delete entry with given key-value pair" operation. Fixed
+ * size hash table is used for fast key lookups.
*/
-static LIST_HEAD(mb_cache_list);
-static LIST_HEAD(mb_cache_lru_list);
-static DEFINE_SPINLOCK(mb_cache_spinlock);
-
-static inline void
-__spin_lock_mb_cache_entry(struct mb_cache_entry *ce)
-{
- spin_lock(bgl_lock_ptr(mb_cache_bg_lock,
- MB_CACHE_ENTRY_LOCK_INDEX(ce)));
-}
-
-static inline void
-__spin_unlock_mb_cache_entry(struct mb_cache_entry *ce)
-{
- spin_unlock(bgl_lock_ptr(mb_cache_bg_lock,
- MB_CACHE_ENTRY_LOCK_INDEX(ce)));
-}
-
-static inline int
-__mb_cache_entry_is_block_hashed(struct mb_cache_entry *ce)
-{
- return !hlist_bl_unhashed(&ce->e_block_list);
-}
+struct mb_cache {
+ /* Hash table of entries */
+ struct hlist_bl_head *c_hash;
+ /* log2 of hash table size */
+ int c_bucket_bits;
+ /* Maximum entries in cache to avoid degrading hash too much */
+ int c_max_entries;
+ /* Protects c_list, c_entry_count */
+ spinlock_t c_list_lock;
+ struct list_head c_list;
+ /* Number of entries in cache */
+ unsigned long c_entry_count;
+ struct shrinker c_shrink;
+ /* Work for shrinking when the cache has too many entries */
+ struct work_struct c_shrink_work;
+};
+static struct kmem_cache *mb_entry_cache;
-static inline void
-__mb_cache_entry_unhash_block(struct mb_cache_entry *ce)
-{
- if (__mb_cache_entry_is_block_hashed(ce))
- hlist_bl_del_init(&ce->e_block_list);
-}
+static unsigned long mb_cache_shrink(struct mb_cache *cache,
+ unsigned int nr_to_scan);
-static inline int
-__mb_cache_entry_is_index_hashed(struct mb_cache_entry *ce)
+static inline struct hlist_bl_head *mb_cache_entry_head(struct mb_cache *cache,
+ u32 key)
{
- return !hlist_bl_unhashed(&ce->e_index.o_list);
+ return &cache->c_hash[hash_32(key, cache->c_bucket_bits)];
}
-static inline void
-__mb_cache_entry_unhash_index(struct mb_cache_entry *ce)
-{
- if (__mb_cache_entry_is_index_hashed(ce))
- hlist_bl_del_init(&ce->e_index.o_list);
-}
+/*
+ * Number of entries to reclaim synchronously when there are too many entries
+ * in cache
+ */
+#define SYNC_SHRINK_BATCH 64
/*
- * __mb_cache_entry_unhash_unlock()
- *
- * This function is called to unhash both the block and index hash
- * chain.
- * It assumes both the block and index hash chain is locked upon entry.
- * It also unlock both hash chains both exit
+ * mb_cache_entry_create - create entry in cache
+ * @cache - cache where the entry should be created
+ * @mask - gfp mask with which the entry should be allocated
+ * @key - key of the entry
+ * @block - block that contains data
+ * @reusable - is the block reusable by other inodes?
+ *
+ * Creates entry in @cache with key @key and records that data is stored in
+ * block @block. The function returns -EBUSY if entry with the same key
+ * and for the same block already exists in cache. Otherwise 0 is returned.
*/
-static inline void
-__mb_cache_entry_unhash_unlock(struct mb_cache_entry *ce)
+int mb_cache_entry_create(struct mb_cache *cache, gfp_t mask, u32 key,
+ sector_t block, bool reusable)
{
- __mb_cache_entry_unhash_index(ce);
- hlist_bl_unlock(ce->e_index_hash_p);
- __mb_cache_entry_unhash_block(ce);
- hlist_bl_unlock(ce->e_block_hash_p);
+ struct mb_cache_entry *entry, *dup;
+ struct hlist_bl_node *dup_node;
+ struct hlist_bl_head *head;
+
+ /* Schedule background reclaim if there are too many entries */
+ if (cache->c_entry_count >= cache->c_max_entries)
+ schedule_work(&cache->c_shrink_work);
+ /* Do some sync reclaim if background reclaim cannot keep up */
+ if (cache->c_entry_count >= 2*cache->c_max_entries)
+ mb_cache_shrink(cache, SYNC_SHRINK_BATCH);
+
+ entry = kmem_cache_alloc(mb_entry_cache, mask);
+ if (!entry)
+ return -ENOMEM;
+
+ INIT_LIST_HEAD(&entry->e_list);
+ /* One ref for hash, one ref returned */
+ atomic_set(&entry->e_refcnt, 1);
+ entry->e_key = key;
+ entry->e_block = block;
+ entry->e_reusable = reusable;
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(dup, dup_node, head, e_hash_list) {
+ if (dup->e_key == key && dup->e_block == block) {
+ hlist_bl_unlock(head);
+ kmem_cache_free(mb_entry_cache, entry);
+ return -EBUSY;
+ }
+ }
+ hlist_bl_add_head(&entry->e_hash_list, head);
+ hlist_bl_unlock(head);
+
+ spin_lock(&cache->c_list_lock);
+ list_add_tail(&entry->e_list, &cache->c_list);
+ /* Grab ref for LRU list */
+ atomic_inc(&entry->e_refcnt);
+ cache->c_entry_count++;
+ spin_unlock(&cache->c_list_lock);
+
+ return 0;
}
+EXPORT_SYMBOL(mb_cache_entry_create);
-static void
-__mb_cache_entry_forget(struct mb_cache_entry *ce, gfp_t gfp_mask)
+void __mb_cache_entry_free(struct mb_cache_entry *entry)
{
- struct mb_cache *cache = ce->e_cache;
-
- mb_assert(!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt)));
- kmem_cache_free(cache->c_entry_cache, ce);
- atomic_dec(&cache->c_entry_count);
+ kmem_cache_free(mb_entry_cache, entry);
}
+EXPORT_SYMBOL(__mb_cache_entry_free);
-static void
-__mb_cache_entry_release(struct mb_cache_entry *ce)
+static struct mb_cache_entry *__entry_find(struct mb_cache *cache,
+ struct mb_cache_entry *entry,
+ u32 key)
{
- /* First lock the entry to serialize access to its local data. */
- __spin_lock_mb_cache_entry(ce);
- /* Wake up all processes queuing for this cache entry. */
- if (ce->e_queued)
- wake_up_all(&mb_cache_queue);
- if (ce->e_used >= MB_CACHE_WRITER)
- ce->e_used -= MB_CACHE_WRITER;
- /*
- * Make sure that all cache entries on lru_list have
- * both e_used and e_qued of 0s.
- */
- ce->e_used--;
- if (!(ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))) {
- if (!__mb_cache_entry_is_block_hashed(ce)) {
- __spin_unlock_mb_cache_entry(ce);
- goto forget;
+ struct mb_cache_entry *old_entry = entry;
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ if (entry && !hlist_bl_unhashed(&entry->e_hash_list))
+ node = entry->e_hash_list.next;
+ else
+ node = hlist_bl_first(head);
+ while (node) {
+ entry = hlist_bl_entry(node, struct mb_cache_entry,
+ e_hash_list);
+ if (entry->e_key == key && entry->e_reusable) {
+ atomic_inc(&entry->e_refcnt);
+ goto out;
}
- /*
- * Need access to lru list, first drop entry lock,
- * then reacquire the lock in the proper order.
- */
- spin_lock(&mb_cache_spinlock);
- if (list_empty(&ce->e_lru_list))
- list_add_tail(&ce->e_lru_list, &mb_cache_lru_list);
- spin_unlock(&mb_cache_spinlock);
+ node = node->next;
}
- __spin_unlock_mb_cache_entry(ce);
- return;
-forget:
- mb_assert(list_empty(&ce->e_lru_list));
- __mb_cache_entry_forget(ce, GFP_KERNEL);
+ entry = NULL;
+out:
+ hlist_bl_unlock(head);
+ if (old_entry)
+ mb_cache_entry_put(cache, old_entry);
+
+ return entry;
}
/*
- * mb_cache_shrink_scan() memory pressure callback
- *
- * This function is called by the kernel memory management when memory
- * gets low.
+ * mb_cache_entry_find_first - find the first entry in cache with given key
+ * @cache: cache where we should search
+ * @key: key to look for
*
- * @shrink: (ignored)
- * @sc: shrink_control passed from reclaim
- *
- * Returns the number of objects freed.
+ * Search in @cache for entry with key @key. Grabs reference to the first
+ * entry found and returns the entry.
*/
-static unsigned long
-mb_cache_shrink_scan(struct shrinker *shrink, struct shrink_control *sc)
+struct mb_cache_entry *mb_cache_entry_find_first(struct mb_cache *cache,
+ u32 key)
{
- LIST_HEAD(free_list);
- struct mb_cache_entry *entry, *tmp;
- int nr_to_scan = sc->nr_to_scan;
- gfp_t gfp_mask = sc->gfp_mask;
- unsigned long freed = 0;
-
- mb_debug("trying to free %d entries", nr_to_scan);
- spin_lock(&mb_cache_spinlock);
- while ((nr_to_scan-- > 0) && !list_empty(&mb_cache_lru_list)) {
- struct mb_cache_entry *ce =
- list_entry(mb_cache_lru_list.next,
- struct mb_cache_entry, e_lru_list);
- list_del_init(&ce->e_lru_list);
- if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt))
- continue;
- spin_unlock(&mb_cache_spinlock);
- /* Prevent any find or get operation on the entry */
- hlist_bl_lock(ce->e_block_hash_p);
- hlist_bl_lock(ce->e_index_hash_p);
- /* Ignore if it is touched by a find/get */
- if (ce->e_used || ce->e_queued || atomic_read(&ce->e_refcnt) ||
- !list_empty(&ce->e_lru_list)) {
- hlist_bl_unlock(ce->e_index_hash_p);
- hlist_bl_unlock(ce->e_block_hash_p);
- spin_lock(&mb_cache_spinlock);
- continue;
- }
- __mb_cache_entry_unhash_unlock(ce);
- list_add_tail(&ce->e_lru_list, &free_list);
- spin_lock(&mb_cache_spinlock);
- }
- spin_unlock(&mb_cache_spinlock);
-
- list_for_each_entry_safe(entry, tmp, &free_list, e_lru_list) {
- __mb_cache_entry_forget(entry, gfp_mask);
- freed++;
- }
- return freed;
+ return __entry_find(cache, NULL, key);
}
+EXPORT_SYMBOL(mb_cache_entry_find_first);
-static unsigned long
-mb_cache_shrink_count(struct shrinker *shrink, struct shrink_control *sc)
+/*
+ * mb_cache_entry_find_next - find next entry in cache with the same
+ * @cache: cache where we should search
+ * @entry: entry to start search from
+ *
+ * Finds next entry in the hash chain which has the same key as @entry.
+ * If @entry is unhashed (which can happen when deletion of entry races
+ * with the search), finds the first entry in the hash chain. The function
+ * drops reference to @entry and returns with a reference to the found entry.
+ */
+struct mb_cache_entry *mb_cache_entry_find_next(struct mb_cache *cache,
+ struct mb_cache_entry *entry)
{
- struct mb_cache *cache;
- unsigned long count = 0;
-
- spin_lock(&mb_cache_spinlock);
- list_for_each_entry(cache, &mb_cache_list, c_cache_list) {
- mb_debug("cache %s (%d)", cache->c_name,
- atomic_read(&cache->c_entry_count));
- count += atomic_read(&cache->c_entry_count);
- }
- spin_unlock(&mb_cache_spinlock);
-
- return vfs_pressure_ratio(count);
+ return __entry_find(cache, entry, entry->e_key);
}
-
-static struct shrinker mb_cache_shrinker = {
- .count_objects = mb_cache_shrink_count,
- .scan_objects = mb_cache_shrink_scan,
- .seeks = DEFAULT_SEEKS,
-};
+EXPORT_SYMBOL(mb_cache_entry_find_next);
/*
- * mb_cache_create() create a new cache
- *
- * All entries in one cache are equal size. Cache entries may be from
- * multiple devices. If this is the first mbcache created, registers
- * the cache with kernel memory management. Returns NULL if no more
- * memory was available.
- *
- * @name: name of the cache (informal)
- * @bucket_bits: log2(number of hash buckets)
+ * mb_cache_entry_get - get a cache entry by block number (and key)
+ * @cache - cache we work with
+ * @key - key of block number @block
+ * @block - block number
*/
-struct mb_cache *
-mb_cache_create(const char *name, int bucket_bits)
+struct mb_cache_entry *mb_cache_entry_get(struct mb_cache *cache, u32 key,
+ sector_t block)
{
- int n, bucket_count = 1 << bucket_bits;
- struct mb_cache *cache = NULL;
-
- if (!mb_cache_bg_lock) {
- mb_cache_bg_lock = kmalloc(sizeof(struct blockgroup_lock),
- GFP_KERNEL);
- if (!mb_cache_bg_lock)
- return NULL;
- bgl_lock_init(mb_cache_bg_lock);
- }
-
- cache = kmalloc(sizeof(struct mb_cache), GFP_KERNEL);
- if (!cache)
- return NULL;
- cache->c_name = name;
- atomic_set(&cache->c_entry_count, 0);
- cache->c_bucket_bits = bucket_bits;
- cache->c_block_hash = kmalloc(bucket_count *
- sizeof(struct hlist_bl_head), GFP_KERNEL);
- if (!cache->c_block_hash)
- goto fail;
- for (n=0; n<bucket_count; n++)
- INIT_HLIST_BL_HEAD(&cache->c_block_hash[n]);
- cache->c_index_hash = kmalloc(bucket_count *
- sizeof(struct hlist_bl_head), GFP_KERNEL);
- if (!cache->c_index_hash)
- goto fail;
- for (n=0; n<bucket_count; n++)
- INIT_HLIST_BL_HEAD(&cache->c_index_hash[n]);
- if (!mb_cache_kmem_cache) {
- mb_cache_kmem_cache = kmem_cache_create(name,
- sizeof(struct mb_cache_entry), 0,
- SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
- if (!mb_cache_kmem_cache)
- goto fail2;
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+ struct mb_cache_entry *entry;
+
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+ if (entry->e_key == key && entry->e_block == block) {
+ atomic_inc(&entry->e_refcnt);
+ goto out;
+ }
}
- cache->c_entry_cache = mb_cache_kmem_cache;
-
- /*
- * Set an upper limit on the number of cache entries so that the hash
- * chains won't grow too long.
- */
- cache->c_max_entries = bucket_count << 4;
-
- spin_lock(&mb_cache_spinlock);
- list_add(&cache->c_cache_list, &mb_cache_list);
- spin_unlock(&mb_cache_spinlock);
- return cache;
-
-fail2:
- kfree(cache->c_index_hash);
-
-fail:
- kfree(cache->c_block_hash);
- kfree(cache);
- return NULL;
+ entry = NULL;
+out:
+ hlist_bl_unlock(head);
+ return entry;
}
+EXPORT_SYMBOL(mb_cache_entry_get);
-
-/*
- * mb_cache_shrink()
- *
- * Removes all cache entries of a device from the cache. All cache entries
- * currently in use cannot be freed, and thus remain in the cache. All others
- * are freed.
+/* mb_cache_entry_delete_block - remove information about block from cache
+ * @cache - cache we work with
+ * @key - key of block @block
+ * @block - block number
*
- * @bdev: which device's cache entries to shrink
+ * Remove entry from cache @cache with key @key with data stored in @block.
*/
-void
-mb_cache_shrink(struct block_device *bdev)
+void mb_cache_entry_delete_block(struct mb_cache *cache, u32 key,
+ sector_t block)
{
- LIST_HEAD(free_list);
- struct list_head *l;
- struct mb_cache_entry *ce, *tmp;
-
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
- while (!list_is_last(l, &mb_cache_lru_list)) {
- l = l->next;
- ce = list_entry(l, struct mb_cache_entry, e_lru_list);
- if (ce->e_bdev == bdev) {
- list_del_init(&ce->e_lru_list);
- if (ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt))
- continue;
- spin_unlock(&mb_cache_spinlock);
- /*
- * Prevent any find or get operation on the entry.
- */
- hlist_bl_lock(ce->e_block_hash_p);
- hlist_bl_lock(ce->e_index_hash_p);
- /* Ignore if it is touched by a find/get */
- if (ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt) ||
- !list_empty(&ce->e_lru_list)) {
- hlist_bl_unlock(ce->e_index_hash_p);
- hlist_bl_unlock(ce->e_block_hash_p);
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
- continue;
+ struct hlist_bl_node *node;
+ struct hlist_bl_head *head;
+ struct mb_cache_entry *entry;
+
+ head = mb_cache_entry_head(cache, key);
+ hlist_bl_lock(head);
+ hlist_bl_for_each_entry(entry, node, head, e_hash_list) {
+ if (entry->e_key == key && entry->e_block == block) {
+ /* We keep hash list reference to keep entry alive */
+ hlist_bl_del_init(&entry->e_hash_list);
+ hlist_bl_unlock(head);
+ spin_lock(&cache->c_list_lock);
+ if (!list_empty(&entry->e_list)) {
+ list_del_init(&entry->e_list);
+ cache->c_entry_count--;
+ atomic_dec(&entry->e_refcnt);
}
- __mb_cache_entry_unhash_unlock(ce);
- mb_assert(!(ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt)));
- list_add_tail(&ce->e_lru_list, &free_list);
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
+ spin_unlock(&cache->c_list_lock);
+ mb_cache_entry_put(cache, entry);
+ return;
}
}
- spin_unlock(&mb_cache_spinlock);
-
- list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
- __mb_cache_entry_forget(ce, GFP_KERNEL);
- }
+ hlist_bl_unlock(head);
}
+EXPORT_SYMBOL(mb_cache_entry_delete_block);
-
-/*
- * mb_cache_destroy()
+/* mb_cache_entry_touch - cache entry got used
+ * @cache - cache the entry belongs to
+ * @entry - entry that got used
*
- * Shrinks the cache to its minimum possible size (hopefully 0 entries),
- * and then destroys it. If this was the last mbcache, un-registers the
- * mbcache from kernel memory management.
+ * Marks entry as used to give hit higher chances of surviving in cache.
*/
-void
-mb_cache_destroy(struct mb_cache *cache)
+void mb_cache_entry_touch(struct mb_cache *cache,
+ struct mb_cache_entry *entry)
{
- LIST_HEAD(free_list);
- struct mb_cache_entry *ce, *tmp;
-
- spin_lock(&mb_cache_spinlock);
- list_for_each_entry_safe(ce, tmp, &mb_cache_lru_list, e_lru_list) {
- if (ce->e_cache == cache)
- list_move_tail(&ce->e_lru_list, &free_list);
- }
- list_del(&cache->c_cache_list);
- spin_unlock(&mb_cache_spinlock);
-
- list_for_each_entry_safe(ce, tmp, &free_list, e_lru_list) {
- list_del_init(&ce->e_lru_list);
- /*
- * Prevent any find or get operation on the entry.
- */
- hlist_bl_lock(ce->e_block_hash_p);
- hlist_bl_lock(ce->e_index_hash_p);
- mb_assert(!(ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt)));
- __mb_cache_entry_unhash_unlock(ce);
- __mb_cache_entry_forget(ce, GFP_KERNEL);
- }
-
- if (atomic_read(&cache->c_entry_count) > 0) {
- mb_error("cache %s: %d orphaned entries",
- cache->c_name,
- atomic_read(&cache->c_entry_count));
- }
-
- if (list_empty(&mb_cache_list)) {
- kmem_cache_destroy(mb_cache_kmem_cache);
- mb_cache_kmem_cache = NULL;
- }
- kfree(cache->c_index_hash);
- kfree(cache->c_block_hash);
- kfree(cache);
+ entry->e_referenced = 1;
}
+EXPORT_SYMBOL(mb_cache_entry_touch);
-/*
- * mb_cache_entry_alloc()
- *
- * Allocates a new cache entry. The new entry will not be valid initially,
- * and thus cannot be looked up yet. It should be filled with data, and
- * then inserted into the cache using mb_cache_entry_insert(). Returns NULL
- * if no more memory was available.
- */
-struct mb_cache_entry *
-mb_cache_entry_alloc(struct mb_cache *cache, gfp_t gfp_flags)
+static unsigned long mb_cache_count(struct shrinker *shrink,
+ struct shrink_control *sc)
{
- struct mb_cache_entry *ce;
-
- if (atomic_read(&cache->c_entry_count) >= cache->c_max_entries) {
- struct list_head *l;
-
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
- while (!list_is_last(l, &mb_cache_lru_list)) {
- l = l->next;
- ce = list_entry(l, struct mb_cache_entry, e_lru_list);
- if (ce->e_cache == cache) {
- list_del_init(&ce->e_lru_list);
- if (ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt))
- continue;
- spin_unlock(&mb_cache_spinlock);
- /*
- * Prevent any find or get operation on the
- * entry.
- */
- hlist_bl_lock(ce->e_block_hash_p);
- hlist_bl_lock(ce->e_index_hash_p);
- /* Ignore if it is touched by a find/get */
- if (ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt) ||
- !list_empty(&ce->e_lru_list)) {
- hlist_bl_unlock(ce->e_index_hash_p);
- hlist_bl_unlock(ce->e_block_hash_p);
- l = &mb_cache_lru_list;
- spin_lock(&mb_cache_spinlock);
- continue;
- }
- mb_assert(list_empty(&ce->e_lru_list));
- mb_assert(!(ce->e_used || ce->e_queued ||
- atomic_read(&ce->e_refcnt)));
- __mb_cache_entry_unhash_unlock(ce);
- goto found;
- }
- }
- spin_unlock(&mb_cache_spinlock);
- }
+ struct mb_cache *cache = container_of(shrink, struct mb_cache,
+ c_shrink);
- ce = kmem_cache_alloc(cache->c_entry_cache, gfp_flags);
- if (!ce)
- return NULL;
- atomic_inc(&cache->c_entry_count);
- INIT_LIST_HEAD(&ce->e_lru_list);
- INIT_HLIST_BL_NODE(&ce->e_block_list);
- INIT_HLIST_BL_NODE(&ce->e_index.o_list);
- ce->e_cache = cache;
- ce->e_queued = 0;
- atomic_set(&ce->e_refcnt, 0);
-found:
- ce->e_block_hash_p = &cache->c_block_hash[0];
- ce->e_index_hash_p = &cache->c_index_hash[0];
- ce->e_used = 1 + MB_CACHE_WRITER;
- return ce;
+ return cache->c_entry_count;
}
-
-/*
- * mb_cache_entry_insert()
- *
- * Inserts an entry that was allocated using mb_cache_entry_alloc() into
- * the cache. After this, the cache entry can be looked up, but is not yet
- * in the lru list as the caller still holds a handle to it. Returns 0 on
- * success, or -EBUSY if a cache entry for that device + inode exists
- * already (this may happen after a failed lookup, but when another process
- * has inserted the same cache entry in the meantime).
- *
- * @bdev: device the cache entry belongs to
- * @block: block number
- * @key: lookup key
- */
-int
-mb_cache_entry_insert(struct mb_cache_entry *ce, struct block_device *bdev,
- sector_t block, unsigned int key)
+/* Shrink number of entries in cache */
+static unsigned long mb_cache_shrink(struct mb_cache *cache,
+ unsigned int nr_to_scan)
{
- struct mb_cache *cache = ce->e_cache;
- unsigned int bucket;
- struct hlist_bl_node *l;
- struct hlist_bl_head *block_hash_p;
- struct hlist_bl_head *index_hash_p;
- struct mb_cache_entry *lce;
-
- mb_assert(ce);
- bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
- cache->c_bucket_bits);
- block_hash_p = &cache->c_block_hash[bucket];
- hlist_bl_lock(block_hash_p);
- hlist_bl_for_each_entry(lce, l, block_hash_p, e_block_list) {
- if (lce->e_bdev == bdev && lce->e_block == block) {
- hlist_bl_unlock(block_hash_p);
- return -EBUSY;
+ struct mb_cache_entry *entry;
+ struct hlist_bl_head *head;
+ unsigned int shrunk = 0;
+
+ spin_lock(&cache->c_list_lock);
+ while (nr_to_scan-- && !list_empty(&cache->c_list)) {
+ entry = list_first_entry(&cache->c_list,
+ struct mb_cache_entry, e_list);
+ if (entry->e_referenced) {
+ entry->e_referenced = 0;
+ list_move_tail(&cache->c_list, &entry->e_list);
+ continue;
}
+ list_del_init(&entry->e_list);
+ cache->c_entry_count--;
+ /*
+ * We keep LRU list reference so that entry doesn't go away
+ * from under us.
+ */
+ spin_unlock(&cache->c_list_lock);
+ head = mb_cache_entry_head(cache, entry->e_key);
+ hlist_bl_lock(head);
+ if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+ hlist_bl_del_init(&entry->e_hash_list);
+ atomic_dec(&entry->e_refcnt);
+ }
+ hlist_bl_unlock(head);
+ if (mb_cache_entry_put(cache, entry))
+ shrunk++;
+ cond_resched();
+ spin_lock(&cache->c_list_lock);
}
- mb_assert(!__mb_cache_entry_is_block_hashed(ce));
- __mb_cache_entry_unhash_block(ce);
- __mb_cache_entry_unhash_index(ce);
- ce->e_bdev = bdev;
- ce->e_block = block;
- ce->e_block_hash_p = block_hash_p;
- ce->e_index.o_key = key;
- hlist_bl_add_head(&ce->e_block_list, block_hash_p);
- hlist_bl_unlock(block_hash_p);
- bucket = hash_long(key, cache->c_bucket_bits);
- index_hash_p = &cache->c_index_hash[bucket];
- hlist_bl_lock(index_hash_p);
- ce->e_index_hash_p = index_hash_p;
- hlist_bl_add_head(&ce->e_index.o_list, index_hash_p);
- hlist_bl_unlock(index_hash_p);
- return 0;
-}
+ spin_unlock(&cache->c_list_lock);
+ return shrunk;
+}
-/*
- * mb_cache_entry_release()
- *
- * Release a handle to a cache entry. When the last handle to a cache entry
- * is released it is either freed (if it is invalid) or otherwise inserted
- * in to the lru list.
- */
-void
-mb_cache_entry_release(struct mb_cache_entry *ce)
+static unsigned long mb_cache_scan(struct shrinker *shrink,
+ struct shrink_control *sc)
{
- __mb_cache_entry_release(ce);
+ int nr_to_scan = sc->nr_to_scan;
+ struct mb_cache *cache = container_of(shrink, struct mb_cache,
+ c_shrink);
+ return mb_cache_shrink(cache, nr_to_scan);
}
+/* We shrink 1/X of the cache when we have too many entries in it */
+#define SHRINK_DIVISOR 16
-/*
- * mb_cache_entry_free()
- *
- */
-void
-mb_cache_entry_free(struct mb_cache_entry *ce)
+static void mb_cache_shrink_worker(struct work_struct *work)
{
- mb_assert(ce);
- mb_assert(list_empty(&ce->e_lru_list));
- hlist_bl_lock(ce->e_index_hash_p);
- __mb_cache_entry_unhash_index(ce);
- hlist_bl_unlock(ce->e_index_hash_p);
- hlist_bl_lock(ce->e_block_hash_p);
- __mb_cache_entry_unhash_block(ce);
- hlist_bl_unlock(ce->e_block_hash_p);
- __mb_cache_entry_release(ce);
+ struct mb_cache *cache = container_of(work, struct mb_cache,
+ c_shrink_work);
+ mb_cache_shrink(cache, cache->c_max_entries / SHRINK_DIVISOR);
}
-
/*
- * mb_cache_entry_get()
+ * mb_cache_create - create cache
+ * @bucket_bits: log2 of the hash table size
*
- * Get a cache entry by device / block number. (There can only be one entry
- * in the cache per device and block.) Returns NULL if no such cache entry
- * exists. The returned cache entry is locked for exclusive access ("single
- * writer").
+ * Create cache for keys with 2^bucket_bits hash entries.
*/
-struct mb_cache_entry *
-mb_cache_entry_get(struct mb_cache *cache, struct block_device *bdev,
- sector_t block)
+struct mb_cache *mb_cache_create(int bucket_bits)
{
- unsigned int bucket;
- struct hlist_bl_node *l;
- struct mb_cache_entry *ce;
- struct hlist_bl_head *block_hash_p;
-
- bucket = hash_long((unsigned long)bdev + (block & 0xffffffff),
- cache->c_bucket_bits);
- block_hash_p = &cache->c_block_hash[bucket];
- /* First serialize access to the block corresponding hash chain. */
- hlist_bl_lock(block_hash_p);
- hlist_bl_for_each_entry(ce, l, block_hash_p, e_block_list) {
- mb_assert(ce->e_block_hash_p == block_hash_p);
- if (ce->e_bdev == bdev && ce->e_block == block) {
- /*
- * Prevent a free from removing the entry.
- */
- atomic_inc(&ce->e_refcnt);
- hlist_bl_unlock(block_hash_p);
- __spin_lock_mb_cache_entry(ce);
- atomic_dec(&ce->e_refcnt);
- if (ce->e_used > 0) {
- DEFINE_WAIT(wait);
- while (ce->e_used > 0) {
- ce->e_queued++;
- prepare_to_wait(&mb_cache_queue, &wait,
- TASK_UNINTERRUPTIBLE);
- __spin_unlock_mb_cache_entry(ce);
- schedule();
- __spin_lock_mb_cache_entry(ce);
- ce->e_queued--;
- }
- finish_wait(&mb_cache_queue, &wait);
- }
- ce->e_used += 1 + MB_CACHE_WRITER;
- __spin_unlock_mb_cache_entry(ce);
+ struct mb_cache *cache;
+ int bucket_count = 1 << bucket_bits;
+ int i;
- if (!list_empty(&ce->e_lru_list)) {
- spin_lock(&mb_cache_spinlock);
- list_del_init(&ce->e_lru_list);
- spin_unlock(&mb_cache_spinlock);
- }
- if (!__mb_cache_entry_is_block_hashed(ce)) {
- __mb_cache_entry_release(ce);
- return NULL;
- }
- return ce;
- }
+ if (!try_module_get(THIS_MODULE))
+ return NULL;
+
+ cache = kzalloc(sizeof(struct mb_cache), GFP_KERNEL);
+ if (!cache)
+ goto err_out;
+ cache->c_bucket_bits = bucket_bits;
+ cache->c_max_entries = bucket_count << 4;
+ INIT_LIST_HEAD(&cache->c_list);
+ spin_lock_init(&cache->c_list_lock);
+ cache->c_hash = kmalloc(bucket_count * sizeof(struct hlist_bl_head),
+ GFP_KERNEL);
+ if (!cache->c_hash) {
+ kfree(cache);
+ goto err_out;
}
- hlist_bl_unlock(block_hash_p);
- return NULL;
-}
+ for (i = 0; i < bucket_count; i++)
+ INIT_HLIST_BL_HEAD(&cache->c_hash[i]);
-#if !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0)
+ cache->c_shrink.count_objects = mb_cache_count;
+ cache->c_shrink.scan_objects = mb_cache_scan;
+ cache->c_shrink.seeks = DEFAULT_SEEKS;
+ register_shrinker(&cache->c_shrink);
-static struct mb_cache_entry *
-__mb_cache_entry_find(struct hlist_bl_node *l, struct hlist_bl_head *head,
- struct block_device *bdev, unsigned int key)
-{
+ INIT_WORK(&cache->c_shrink_work, mb_cache_shrink_worker);
- /* The index hash chain is alredy acquire by caller. */
- while (l != NULL) {
- struct mb_cache_entry *ce =
- hlist_bl_entry(l, struct mb_cache_entry,
- e_index.o_list);
- mb_assert(ce->e_index_hash_p == head);
- if (ce->e_bdev == bdev && ce->e_index.o_key == key) {
- /*
- * Prevent a free from removing the entry.
- */
- atomic_inc(&ce->e_refcnt);
- hlist_bl_unlock(head);
- __spin_lock_mb_cache_entry(ce);
- atomic_dec(&ce->e_refcnt);
- ce->e_used++;
- /* Incrementing before holding the lock gives readers
- priority over writers. */
- if (ce->e_used >= MB_CACHE_WRITER) {
- DEFINE_WAIT(wait);
-
- while (ce->e_used >= MB_CACHE_WRITER) {
- ce->e_queued++;
- prepare_to_wait(&mb_cache_queue, &wait,
- TASK_UNINTERRUPTIBLE);
- __spin_unlock_mb_cache_entry(ce);
- schedule();
- __spin_lock_mb_cache_entry(ce);
- ce->e_queued--;
- }
- finish_wait(&mb_cache_queue, &wait);
- }
- __spin_unlock_mb_cache_entry(ce);
- if (!list_empty(&ce->e_lru_list)) {
- spin_lock(&mb_cache_spinlock);
- list_del_init(&ce->e_lru_list);
- spin_unlock(&mb_cache_spinlock);
- }
- if (!__mb_cache_entry_is_block_hashed(ce)) {
- __mb_cache_entry_release(ce);
- return ERR_PTR(-EAGAIN);
- }
- return ce;
- }
- l = l->next;
- }
- hlist_bl_unlock(head);
+ return cache;
+
+err_out:
+ module_put(THIS_MODULE);
return NULL;
}
-
+EXPORT_SYMBOL(mb_cache_create);
/*
- * mb_cache_entry_find_first()
- *
- * Find the first cache entry on a given device with a certain key in
- * an additional index. Additional matches can be found with
- * mb_cache_entry_find_next(). Returns NULL if no match was found. The
- * returned cache entry is locked for shared access ("multiple readers").
+ * mb_cache_destroy - destroy cache
+ * @cache: the cache to destroy
*
- * @cache: the cache to search
- * @bdev: the device the cache entry should belong to
- * @key: the key in the index
+ * Free all entries in cache and cache itself. Caller must make sure nobody
+ * (except shrinker) can reach @cache when calling this.
*/
-struct mb_cache_entry *
-mb_cache_entry_find_first(struct mb_cache *cache, struct block_device *bdev,
- unsigned int key)
+void mb_cache_destroy(struct mb_cache *cache)
{
- unsigned int bucket = hash_long(key, cache->c_bucket_bits);
- struct hlist_bl_node *l;
- struct mb_cache_entry *ce = NULL;
- struct hlist_bl_head *index_hash_p;
-
- index_hash_p = &cache->c_index_hash[bucket];
- hlist_bl_lock(index_hash_p);
- if (!hlist_bl_empty(index_hash_p)) {
- l = hlist_bl_first(index_hash_p);
- ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
- } else
- hlist_bl_unlock(index_hash_p);
- return ce;
-}
+ struct mb_cache_entry *entry, *next;
+ unregister_shrinker(&cache->c_shrink);
-/*
- * mb_cache_entry_find_next()
- *
- * Find the next cache entry on a given device with a certain key in an
- * additional index. Returns NULL if no match could be found. The previous
- * entry is atomatically released, so that mb_cache_entry_find_next() can
- * be called like this:
- *
- * entry = mb_cache_entry_find_first();
- * while (entry) {
- * ...
- * entry = mb_cache_entry_find_next(entry, ...);
- * }
- *
- * @prev: The previous match
- * @bdev: the device the cache entry should belong to
- * @key: the key in the index
- */
-struct mb_cache_entry *
-mb_cache_entry_find_next(struct mb_cache_entry *prev,
- struct block_device *bdev, unsigned int key)
-{
- struct mb_cache *cache = prev->e_cache;
- unsigned int bucket = hash_long(key, cache->c_bucket_bits);
- struct hlist_bl_node *l;
- struct mb_cache_entry *ce;
- struct hlist_bl_head *index_hash_p;
-
- index_hash_p = &cache->c_index_hash[bucket];
- mb_assert(prev->e_index_hash_p == index_hash_p);
- hlist_bl_lock(index_hash_p);
- mb_assert(!hlist_bl_empty(index_hash_p));
- l = prev->e_index.o_list.next;
- ce = __mb_cache_entry_find(l, index_hash_p, bdev, key);
- __mb_cache_entry_release(prev);
- return ce;
+ /*
+ * We don't bother with any locking. Cache must not be used at this
+ * point.
+ */
+ list_for_each_entry_safe(entry, next, &cache->c_list, e_list) {
+ if (!hlist_bl_unhashed(&entry->e_hash_list)) {
+ hlist_bl_del_init(&entry->e_hash_list);
+ atomic_dec(&entry->e_refcnt);
+ } else
+ WARN_ON(1);
+ list_del(&entry->e_list);
+ WARN_ON(atomic_read(&entry->e_refcnt) != 1);
+ mb_cache_entry_put(cache, entry);
+ }
+ kfree(cache->c_hash);
+ kfree(cache);
+ module_put(THIS_MODULE);
}
+EXPORT_SYMBOL(mb_cache_destroy);
-#endif /* !defined(MB_CACHE_INDEXES_COUNT) || (MB_CACHE_INDEXES_COUNT > 0) */
-
-static int __init init_mbcache(void)
+static int __init mbcache_init(void)
{
- register_shrinker(&mb_cache_shrinker);
+ mb_entry_cache = kmem_cache_create("mbcache",
+ sizeof(struct mb_cache_entry), 0,
+ SLAB_RECLAIM_ACCOUNT|SLAB_MEM_SPREAD, NULL);
+ BUG_ON(!mb_entry_cache);
return 0;
}
-static void __exit exit_mbcache(void)
+static void __exit mbcache_exit(void)
{
- unregister_shrinker(&mb_cache_shrinker);
+ kmem_cache_destroy(mb_entry_cache);
}
-module_init(init_mbcache)
-module_exit(exit_mbcache)
+module_init(mbcache_init)
+module_exit(mbcache_exit)
+MODULE_AUTHOR("Jan Kara <jack@suse.cz>");
+MODULE_DESCRIPTION("Meta block cache (for extended attributes)");
+MODULE_LICENSE("GPL");
diff --git a/fs/minix/dir.c b/fs/minix/dir.c
index d19ac2581..33957c07c 100644
--- a/fs/minix/dir.c
+++ b/fs/minix/dir.c
@@ -28,7 +28,7 @@ const struct file_operations minix_dir_operations = {
static inline void dir_put_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
/*
@@ -38,10 +38,10 @@ static inline void dir_put_page(struct page *page)
static unsigned
minix_last_byte(struct inode *inode, unsigned long page_nr)
{
- unsigned last_byte = PAGE_CACHE_SIZE;
+ unsigned last_byte = PAGE_SIZE;
- if (page_nr == (inode->i_size >> PAGE_CACHE_SHIFT))
- last_byte = inode->i_size & (PAGE_CACHE_SIZE - 1);
+ if (page_nr == (inode->i_size >> PAGE_SHIFT))
+ last_byte = inode->i_size & (PAGE_SIZE - 1);
return last_byte;
}
@@ -92,8 +92,8 @@ static int minix_readdir(struct file *file, struct dir_context *ctx)
if (pos >= inode->i_size)
return 0;
- offset = pos & ~PAGE_CACHE_MASK;
- n = pos >> PAGE_CACHE_SHIFT;
+ offset = pos & ~PAGE_MASK;
+ n = pos >> PAGE_SHIFT;
for ( ; n < npages; n++, offset = 0) {
char *p, *kaddr, *limit;
@@ -229,7 +229,7 @@ int minix_add_link(struct dentry *dentry, struct inode *inode)
lock_page(page);
kaddr = (char*)page_address(page);
dir_end = kaddr + minix_last_byte(dir, n);
- limit = kaddr + PAGE_CACHE_SIZE - sbi->s_dirsize;
+ limit = kaddr + PAGE_SIZE - sbi->s_dirsize;
for (p = kaddr; p <= limit; p = minix_next_entry(p, sbi)) {
de = (minix_dirent *)p;
de3 = (minix3_dirent *)p;
@@ -327,7 +327,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
}
kaddr = kmap_atomic(page);
- memset(kaddr, 0, PAGE_CACHE_SIZE);
+ memset(kaddr, 0, PAGE_SIZE);
if (sbi->s_version == MINIX_V3) {
minix3_dirent *de3 = (minix3_dirent *)kaddr;
@@ -350,7 +350,7 @@ int minix_make_empty(struct inode *inode, struct inode *dir)
err = dir_commit_chunk(page, 0, 2 * sbi->s_dirsize);
fail:
- page_cache_release(page);
+ put_page(page);
return err;
}
diff --git a/fs/minix/namei.c b/fs/minix/namei.c
index a795a11e5..2887d1d95 100644
--- a/fs/minix/namei.c
+++ b/fs/minix/namei.c
@@ -243,11 +243,11 @@ static int minix_rename(struct inode * old_dir, struct dentry *old_dentry,
out_dir:
if (dir_de) {
kunmap(dir_page);
- page_cache_release(dir_page);
+ put_page(dir_page);
}
out_old:
kunmap(old_page);
- page_cache_release(old_page);
+ put_page(old_page);
out:
return err;
}
diff --git a/fs/mpage.c b/fs/mpage.c
index 1480d3a18..eedc644b7 100644
--- a/fs/mpage.c
+++ b/fs/mpage.c
@@ -24,6 +24,7 @@
#include <linux/highmem.h>
#include <linux/prefetch.h>
#include <linux/mpage.h>
+#include <linux/mm_inline.h>
#include <linux/writeback.h>
#include <linux/backing-dev.h>
#include <linux/pagevec.h>
@@ -106,7 +107,7 @@ map_buffer_to_page(struct page *page, struct buffer_head *bh, int page_block)
* don't make any buffers if there is only one buffer on
* the page and the page just needs to be set up to date
*/
- if (inode->i_blkbits == PAGE_CACHE_SHIFT &&
+ if (inode->i_blkbits == PAGE_SHIFT &&
buffer_uptodate(bh)) {
SetPageUptodate(page);
return;
@@ -144,7 +145,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
{
struct inode *inode = page->mapping->host;
const unsigned blkbits = inode->i_blkbits;
- const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+ const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
const unsigned blocksize = 1 << blkbits;
sector_t block_in_file;
sector_t last_block;
@@ -161,7 +162,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
if (page_has_buffers(page))
goto confused;
- block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+ block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
last_block = block_in_file + nr_pages * blocks_per_page;
last_block_in_file = (i_size_read(inode) + blocksize - 1) >> blkbits;
if (last_block > last_block_in_file)
@@ -248,7 +249,7 @@ do_mpage_readpage(struct bio *bio, struct page *page, unsigned nr_pages,
}
if (first_hole != blocks_per_page) {
- zero_user_segment(page, first_hole << blkbits, PAGE_CACHE_SIZE);
+ zero_user_segment(page, first_hole << blkbits, PAGE_SIZE);
if (first_hole == 0) {
SetPageUptodate(page);
unlock_page(page);
@@ -330,7 +331,7 @@ confused:
*
* then this code just gives up and calls the buffer_head-based read function.
* It does handle a page which has holes at the end - that is a common case:
- * the end-of-file on blocksize < PAGE_CACHE_SIZE setups.
+ * the end-of-file on blocksize < PAGE_SIZE setups.
*
* BH_Boundary explanation:
*
@@ -366,7 +367,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
map_bh.b_state = 0;
map_bh.b_size = 0;
for (page_idx = 0; page_idx < nr_pages; page_idx++) {
- struct page *page = list_entry(pages->prev, struct page, lru);
+ struct page *page = lru_to_page(pages);
prefetchw(&page->flags);
list_del(&page->lru);
@@ -379,7 +380,7 @@ mpage_readpages(struct address_space *mapping, struct list_head *pages,
&first_logical_block,
get_block, gfp);
}
- page_cache_release(page);
+ put_page(page);
}
BUG_ON(!list_empty(pages));
if (bio)
@@ -471,7 +472,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
struct inode *inode = page->mapping->host;
const unsigned blkbits = inode->i_blkbits;
unsigned long end_index;
- const unsigned blocks_per_page = PAGE_CACHE_SIZE >> blkbits;
+ const unsigned blocks_per_page = PAGE_SIZE >> blkbits;
sector_t last_block;
sector_t block_in_file;
sector_t blocks[MAX_BUF_PER_PAGE];
@@ -541,7 +542,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
* The page has no buffers: map it to disk
*/
BUG_ON(!PageUptodate(page));
- block_in_file = (sector_t)page->index << (PAGE_CACHE_SHIFT - blkbits);
+ block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits);
last_block = (i_size - 1) >> blkbits;
map_bh.b_page = page;
for (page_block = 0; page_block < blocks_per_page; ) {
@@ -573,7 +574,7 @@ static int __mpage_writepage(struct page *page, struct writeback_control *wbc,
first_unmapped = page_block;
page_is_mapped:
- end_index = i_size >> PAGE_CACHE_SHIFT;
+ end_index = i_size >> PAGE_SHIFT;
if (page->index >= end_index) {
/*
* The page straddles i_size. It must be zeroed out on each
@@ -583,11 +584,11 @@ page_is_mapped:
* is zeroed when mapped, and writes to that region are not
* written out to the file."
*/
- unsigned offset = i_size & (PAGE_CACHE_SIZE - 1);
+ unsigned offset = i_size & (PAGE_SIZE - 1);
if (page->index > end_index || !offset)
goto confused;
- zero_user_segment(page, offset, PAGE_CACHE_SIZE);
+ zero_user_segment(page, offset, PAGE_SIZE);
}
/*
diff --git a/fs/namei.c b/fs/namei.c
index 9c590e0f6..30145f8f2 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1220,8 +1220,8 @@ static int follow_managed(struct path *path, struct nameidata *nd)
if (need_mntput && path->mnt == mnt)
mntput(path->mnt);
- if (ret == -EISDIR)
- ret = 0;
+ if (ret == -EISDIR || !ret)
+ ret = 1;
if (need_mntput)
nd->flags |= LOOKUP_JUMPED;
if (unlikely(ret < 0))
@@ -1444,40 +1444,26 @@ static int follow_dotdot(struct nameidata *nd)
* This looks up the name in dcache, possibly revalidates the old dentry and
* allocates a new one if not found or not valid. In the need_lookup argument
* returns whether i_op->lookup is necessary.
- *
- * dir->d_inode->i_mutex must be held
*/
-static struct dentry *lookup_dcache(struct qstr *name, struct dentry *dir,
- unsigned int flags, bool *need_lookup)
+static struct dentry *lookup_dcache(const struct qstr *name,
+ struct dentry *dir,
+ unsigned int flags)
{
struct dentry *dentry;
int error;
- *need_lookup = false;
dentry = d_lookup(dir, name);
if (dentry) {
if (dentry->d_flags & DCACHE_OP_REVALIDATE) {
error = d_revalidate(dentry, flags);
if (unlikely(error <= 0)) {
- if (error < 0) {
- dput(dentry);
- return ERR_PTR(error);
- } else {
+ if (!error)
d_invalidate(dentry);
- dput(dentry);
- dentry = NULL;
- }
+ dput(dentry);
+ return ERR_PTR(error);
}
}
}
-
- if (!dentry) {
- dentry = d_alloc(dir, name);
- if (unlikely(!dentry))
- return ERR_PTR(-ENOMEM);
-
- *need_lookup = true;
- }
return dentry;
}
@@ -1506,45 +1492,44 @@ static struct dentry *lookup_real(struct inode *dir, struct dentry *dentry,
return dentry;
}
-static struct dentry *__lookup_hash(struct qstr *name,
+static struct dentry *__lookup_hash(const struct qstr *name,
struct dentry *base, unsigned int flags)
{
- bool need_lookup;
- struct dentry *dentry;
+ struct dentry *dentry = lookup_dcache(name, base, flags);
- dentry = lookup_dcache(name, base, flags, &need_lookup);
- if (!need_lookup)
+ if (dentry)
return dentry;
+ dentry = d_alloc(base, name);
+ if (unlikely(!dentry))
+ return ERR_PTR(-ENOMEM);
+
return lookup_real(base->d_inode, dentry, flags);
}
-/*
- * It's more convoluted than I'd like it to be, but... it's still fairly
- * small and for now I'd prefer to have fast path as straight as possible.
- * It _is_ time-critical.
- */
static int lookup_fast(struct nameidata *nd,
struct path *path, struct inode **inode,
unsigned *seqp)
{
struct vfsmount *mnt = nd->path.mnt;
struct dentry *dentry, *parent = nd->path.dentry;
- int need_reval = 1;
int status = 1;
int err;
/*
* Rename seqlock is not required here because in the off chance
- * of a false negative due to a concurrent rename, we're going to
- * do the non-racy lookup, below.
+ * of a false negative due to a concurrent rename, the caller is
+ * going to fall back to non-racy lookup.
*/
if (nd->flags & LOOKUP_RCU) {
unsigned seq;
bool negative;
dentry = __d_lookup_rcu(parent, &nd->last, &seq);
- if (!dentry)
- goto unlazy;
+ if (unlikely(!dentry)) {
+ if (unlazy_walk(nd, NULL, 0))
+ return -ECHILD;
+ return 0;
+ }
/*
* This sequence count validates that the inode matches
@@ -1552,7 +1537,7 @@ static int lookup_fast(struct nameidata *nd,
*/
*inode = d_backing_inode(dentry);
negative = d_is_negative(dentry);
- if (read_seqcount_retry(&dentry->d_seq, seq))
+ if (unlikely(read_seqcount_retry(&dentry->d_seq, seq)))
return -ECHILD;
/*
@@ -1562,81 +1547,89 @@ static int lookup_fast(struct nameidata *nd,
* The memory barrier in read_seqcount_begin of child is
* enough, we can use __read_seqcount_retry here.
*/
- if (__read_seqcount_retry(&parent->d_seq, nd->seq))
+ if (unlikely(__read_seqcount_retry(&parent->d_seq, nd->seq)))
return -ECHILD;
*seqp = seq;
- if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE)) {
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
status = d_revalidate(dentry, nd->flags);
- if (unlikely(status <= 0)) {
- if (status != -ECHILD)
- need_reval = 0;
- goto unlazy;
- }
+ if (unlikely(status <= 0)) {
+ if (unlazy_walk(nd, dentry, seq))
+ return -ECHILD;
+ if (status == -ECHILD)
+ status = d_revalidate(dentry, nd->flags);
+ } else {
+ /*
+ * Note: do negative dentry check after revalidation in
+ * case that drops it.
+ */
+ if (unlikely(negative))
+ return -ENOENT;
+ path->mnt = mnt;
+ path->dentry = dentry;
+ if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
+ return 1;
+ if (unlazy_walk(nd, dentry, seq))
+ return -ECHILD;
}
- /*
- * Note: do negative dentry check after revalidation in
- * case that drops it.
- */
- if (negative)
- return -ENOENT;
- path->mnt = mnt;
- path->dentry = dentry;
- if (likely(__follow_mount_rcu(nd, path, inode, seqp)))
- return 0;
-unlazy:
- if (unlazy_walk(nd, dentry, seq))
- return -ECHILD;
} else {
dentry = __d_lookup(parent, &nd->last);
+ if (unlikely(!dentry))
+ return 0;
+ if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE))
+ status = d_revalidate(dentry, nd->flags);
}
-
- if (unlikely(!dentry))
- goto need_lookup;
-
- if (unlikely(dentry->d_flags & DCACHE_OP_REVALIDATE) && need_reval)
- status = d_revalidate(dentry, nd->flags);
if (unlikely(status <= 0)) {
- if (status < 0) {
- dput(dentry);
- return status;
- }
- d_invalidate(dentry);
+ if (!status)
+ d_invalidate(dentry);
dput(dentry);
- goto need_lookup;
+ return status;
}
-
if (unlikely(d_is_negative(dentry))) {
dput(dentry);
return -ENOENT;
}
+
path->mnt = mnt;
path->dentry = dentry;
err = follow_managed(path, nd);
- if (likely(!err))
+ if (likely(err > 0))
*inode = d_backing_inode(path->dentry);
return err;
-
-need_lookup:
- return 1;
}
/* Fast lookup failed, do it the slow way */
-static int lookup_slow(struct nameidata *nd, struct path *path)
+static struct dentry *lookup_slow(const struct qstr *name,
+ struct dentry *dir,
+ unsigned int flags)
{
- struct dentry *dentry, *parent;
-
- parent = nd->path.dentry;
- BUG_ON(nd->inode != parent->d_inode);
-
- inode_lock(parent->d_inode);
- dentry = __lookup_hash(&nd->last, parent, nd->flags);
- inode_unlock(parent->d_inode);
- if (IS_ERR(dentry))
- return PTR_ERR(dentry);
- path->mnt = nd->path.mnt;
- path->dentry = dentry;
- return follow_managed(path, nd);
+ struct dentry *dentry;
+ inode_lock(dir->d_inode);
+ dentry = d_lookup(dir, name);
+ if (unlikely(dentry)) {
+ if ((dentry->d_flags & DCACHE_OP_REVALIDATE) &&
+ !(flags & LOOKUP_NO_REVAL)) {
+ int error = d_revalidate(dentry, flags);
+ if (unlikely(error <= 0)) {
+ if (!error)
+ d_invalidate(dentry);
+ dput(dentry);
+ dentry = ERR_PTR(error);
+ }
+ }
+ if (dentry) {
+ inode_unlock(dir->d_inode);
+ return dentry;
+ }
+ }
+ dentry = d_alloc(dir, name);
+ if (unlikely(!dentry)) {
+ inode_unlock(dir->d_inode);
+ return ERR_PTR(-ENOMEM);
+ }
+ dentry = lookup_real(dir->d_inode, dentry, flags);
+ inode_unlock(dir->d_inode);
+ return dentry;
}
static inline int may_lookup(struct nameidata *nd)
@@ -1740,18 +1733,25 @@ static int walk_component(struct nameidata *nd, int flags)
return err;
}
err = lookup_fast(nd, &path, &inode, &seq);
- if (unlikely(err)) {
+ if (unlikely(err <= 0)) {
if (err < 0)
return err;
-
- err = lookup_slow(nd, &path);
- if (err < 0)
+ path.dentry = lookup_slow(&nd->last, nd->path.dentry,
+ nd->flags);
+ if (IS_ERR(path.dentry))
+ return PTR_ERR(path.dentry);
+
+ path.mnt = nd->path.mnt;
+ err = follow_managed(&path, nd);
+ if (unlikely(err < 0))
return err;
+ if (unlikely(d_is_negative(path.dentry))) {
+ path_to_nameidata(&path, nd);
+ return -ENOENT;
+ }
+
seq = 0; /* we are already out of RCU mode */
- err = -ENOENT;
- if (d_is_negative(path.dentry))
- goto out_path_put;
inode = d_backing_inode(path.dentry);
}
@@ -1764,10 +1764,6 @@ static int walk_component(struct nameidata *nd, int flags)
nd->inode = inode;
nd->seq = seq;
return 0;
-
-out_path_put:
- path_to_nameidata(&path, nd);
- return err;
}
/*
@@ -2271,6 +2267,33 @@ int vfs_path_lookup(struct dentry *dentry, struct vfsmount *mnt,
EXPORT_SYMBOL(vfs_path_lookup);
/**
+ * lookup_hash - lookup single pathname component on already hashed name
+ * @name: name and hash to lookup
+ * @base: base directory to lookup from
+ *
+ * The name must have been verified and hashed (see lookup_one_len()). Using
+ * this after just full_name_hash() is unsafe.
+ *
+ * This function also doesn't check for search permission on base directory.
+ *
+ * Use lookup_one_len_unlocked() instead, unless you really know what you are
+ * doing.
+ *
+ * Do not hold i_mutex; this helper takes i_mutex if necessary.
+ */
+struct dentry *lookup_hash(const struct qstr *name, struct dentry *base)
+{
+ struct dentry *ret;
+
+ ret = lookup_dcache(name, base, 0);
+ if (!ret)
+ ret = lookup_slow(name, base, 0);
+
+ return ret;
+}
+EXPORT_SYMBOL(lookup_hash);
+
+/**
* lookup_one_len - filesystem helper to lookup single pathname component
* @name: pathname component to lookup
* @base: base directory to lookup from
@@ -2341,7 +2364,6 @@ struct dentry *lookup_one_len_unlocked(const char *name,
struct qstr this;
unsigned int c;
int err;
- struct dentry *ret;
this.name = name;
this.len = len;
@@ -2373,22 +2395,7 @@ struct dentry *lookup_one_len_unlocked(const char *name,
if (err)
return ERR_PTR(err);
- /*
- * __d_lookup() is used to try to get a quick answer and avoid the
- * mutex. A false-negative does no harm.
- */
- ret = __d_lookup(base, &this);
- if (ret && unlikely(ret->d_flags & DCACHE_OP_REVALIDATE)) {
- dput(ret);
- ret = NULL;
- }
- if (ret)
- return ret;
-
- inode_lock(base->d_inode);
- ret = __lookup_hash(&this, base, 0);
- inode_unlock(base->d_inode);
- return ret;
+ return lookup_hash(&this, base);
}
EXPORT_SYMBOL(lookup_one_len_unlocked);
@@ -2465,31 +2472,21 @@ mountpoint_last(struct nameidata *nd, struct path *path)
if (error)
return error;
dentry = dget(nd->path.dentry);
- goto done;
- }
-
- inode_lock(dir->d_inode);
- dentry = d_lookup(dir, &nd->last);
- if (!dentry) {
- /*
- * No cached dentry. Mounted dentries are pinned in the cache,
- * so that means that this dentry is probably a symlink or the
- * path doesn't actually point to a mounted dentry.
- */
- dentry = d_alloc(dir, &nd->last);
+ } else {
+ dentry = d_lookup(dir, &nd->last);
if (!dentry) {
- inode_unlock(dir->d_inode);
- return -ENOMEM;
- }
- dentry = lookup_real(dir->d_inode, dentry, nd->flags);
- if (IS_ERR(dentry)) {
- inode_unlock(dir->d_inode);
- return PTR_ERR(dentry);
+ /*
+ * No cached dentry. Mounted dentries are pinned in the
+ * cache, so that means that this dentry is probably
+ * a symlink or the path doesn't actually point
+ * to a mounted dentry.
+ */
+ dentry = lookup_slow(&nd->last, dir,
+ nd->flags | LOOKUP_NO_REVAL);
+ if (IS_ERR(dentry))
+ return PTR_ERR(dentry);
}
}
- inode_unlock(dir->d_inode);
-
-done:
if (d_is_negative(dentry)) {
dput(dentry);
return -ENOENT;
@@ -2968,22 +2965,10 @@ no_open:
dentry = lookup_real(dir, dentry, nd->flags);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
-
- if (create_error) {
- int open_flag = op->open_flag;
-
- error = create_error;
- if ((open_flag & O_EXCL)) {
- if (!dentry->d_inode)
- goto out;
- } else if (!dentry->d_inode) {
- goto out;
- } else if ((open_flag & O_TRUNC) &&
- d_is_reg(dentry)) {
- goto out;
- }
- /* will fail later, go on to get the right error */
- }
+ }
+ if (create_error && !dentry->d_inode) {
+ error = create_error;
+ goto out;
}
looked_up:
path->dentry = dentry;
@@ -3018,16 +3003,22 @@ static int lookup_open(struct nameidata *nd, struct path *path,
struct inode *dir_inode = dir->d_inode;
struct dentry *dentry;
int error;
- bool need_lookup;
+ bool need_lookup = false;
*opened &= ~FILE_CREATED;
- dentry = lookup_dcache(&nd->last, dir, nd->flags, &need_lookup);
+ dentry = lookup_dcache(&nd->last, dir, nd->flags);
if (IS_ERR(dentry))
return PTR_ERR(dentry);
- /* Cached positive dentry: will open in f_op->open */
- if (!need_lookup && dentry->d_inode)
+ if (!dentry) {
+ dentry = d_alloc(dir, &nd->last);
+ if (unlikely(!dentry))
+ return -ENOMEM;
+ need_lookup = true;
+ } else if (dentry->d_inode) {
+ /* Cached positive dentry: will open in f_op->open */
goto out_no_open;
+ }
if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
return atomic_open(nd, dentry, path, file, op, got_write,
@@ -3111,13 +3102,14 @@ static int do_last(struct nameidata *nd,
nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
/* we _can_ be in RCU mode here */
error = lookup_fast(nd, &path, &inode, &seq);
- if (likely(!error))
+ if (likely(error > 0))
goto finish_lookup;
if (error < 0)
return error;
BUG_ON(nd->inode != dir->d_inode);
+ BUG_ON(nd->flags & LOOKUP_RCU);
} else {
/* create side of things */
/*
@@ -3172,12 +3164,6 @@ retry_lookup:
}
/*
- * create/update audit record if it already exists.
- */
- if (d_is_positive(path.dentry))
- audit_inode(nd->name, path.dentry, 0);
-
- /*
* If atomic_open() acquired write access it is dropped now due to
* possible mount and symlink following (this might be optimized away if
* necessary...)
@@ -3187,6 +3173,16 @@ retry_lookup:
got_write = false;
}
+ if (unlikely(d_is_negative(path.dentry))) {
+ path_to_nameidata(&path, nd);
+ return -ENOENT;
+ }
+
+ /*
+ * create/update audit record if it already exists.
+ */
+ audit_inode(nd->name, path.dentry, 0);
+
if (unlikely((open_flag & (O_EXCL | O_CREAT)) == (O_EXCL | O_CREAT))) {
path_to_nameidata(&path, nd);
return -EEXIST;
@@ -3196,12 +3192,7 @@ retry_lookup:
if (unlikely(error < 0))
return error;
- BUG_ON(nd->flags & LOOKUP_RCU);
seq = 0; /* out of RCU mode, so the value doesn't matter */
- if (unlikely(d_is_negative(path.dentry))) {
- path_to_nameidata(&path, nd);
- return -ENOENT;
- }
inode = d_backing_inode(path.dentry);
finish_lookup:
if (nd->depth)
@@ -3707,31 +3698,6 @@ SYSCALL_DEFINE2(mkdir, const char __user *, pathname, umode_t, mode)
return sys_mkdirat(AT_FDCWD, pathname, mode);
}
-/*
- * The dentry_unhash() helper will try to drop the dentry early: we
- * should have a usage count of 1 if we're the only user of this
- * dentry, and if that is true (possibly after pruning the dcache),
- * then we drop the dentry now.
- *
- * A low-level filesystem can, if it choses, legally
- * do a
- *
- * if (!d_unhashed(dentry))
- * return -EBUSY;
- *
- * if it cannot handle the case of removing a directory
- * that is still in use by something else..
- */
-void dentry_unhash(struct dentry *dentry)
-{
- shrink_dcache_parent(dentry);
- spin_lock(&dentry->d_lock);
- if (dentry->d_lockref.count == 1)
- __d_drop(dentry);
- spin_unlock(&dentry->d_lock);
-}
-EXPORT_SYMBOL(dentry_unhash);
-
int vfs_rmdir(struct inode *dir, struct dentry *dentry)
{
int error = may_delete(dir, dentry, 1);
@@ -4258,7 +4224,11 @@ int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
bool new_is_dir = false;
unsigned max_links = new_dir->i_sb->s_max_links;
- if (source == target)
+ /*
+ * Check source == target.
+ * On overlayfs need to look at underlying inodes.
+ */
+ if (vfs_select_inode(old_dentry, 0) == vfs_select_inode(new_dentry, 0))
return 0;
error = may_delete(old_dir, old_dentry, is_dir);
diff --git a/fs/ncpfs/dir.c b/fs/ncpfs/dir.c
index b7f8eaeea..bfdad003e 100644
--- a/fs/ncpfs/dir.c
+++ b/fs/ncpfs/dir.c
@@ -510,7 +510,7 @@ static int ncp_readdir(struct file *file, struct dir_context *ctx)
kunmap(ctl.page);
SetPageUptodate(ctl.page);
unlock_page(ctl.page);
- page_cache_release(ctl.page);
+ put_page(ctl.page);
ctl.page = NULL;
}
ctl.idx = 0;
@@ -520,7 +520,7 @@ invalid_cache:
if (ctl.page) {
kunmap(ctl.page);
unlock_page(ctl.page);
- page_cache_release(ctl.page);
+ put_page(ctl.page);
ctl.page = NULL;
}
ctl.cache = cache;
@@ -554,14 +554,14 @@ finished:
kunmap(ctl.page);
SetPageUptodate(ctl.page);
unlock_page(ctl.page);
- page_cache_release(ctl.page);
+ put_page(ctl.page);
}
if (page) {
cache->head = ctl.head;
kunmap(page);
SetPageUptodate(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
out:
return result;
@@ -649,7 +649,7 @@ ncp_fill_cache(struct file *file, struct dir_context *ctx,
kunmap(ctl.page);
SetPageUptodate(ctl.page);
unlock_page(ctl.page);
- page_cache_release(ctl.page);
+ put_page(ctl.page);
}
ctl.cache = NULL;
ctl.idx -= NCP_DIRCACHE_SIZE;
diff --git a/fs/ncpfs/ncplib_kernel.h b/fs/ncpfs/ncplib_kernel.h
index 5233fbc17..17cfb743b 100644
--- a/fs/ncpfs/ncplib_kernel.h
+++ b/fs/ncpfs/ncplib_kernel.h
@@ -191,7 +191,7 @@ struct ncp_cache_head {
int eof;
};
-#define NCP_DIRCACHE_SIZE ((int)(PAGE_CACHE_SIZE/sizeof(struct dentry *)))
+#define NCP_DIRCACHE_SIZE ((int)(PAGE_SIZE/sizeof(struct dentry *)))
union ncp_dir_cache {
struct ncp_cache_head head;
struct dentry *dentry[NCP_DIRCACHE_SIZE];
diff --git a/fs/nfs/blocklayout/blocklayout.c b/fs/nfs/blocklayout/blocklayout.c
index ddd0138f4..17a42e4eb 100644
--- a/fs/nfs/blocklayout/blocklayout.c
+++ b/fs/nfs/blocklayout/blocklayout.c
@@ -231,7 +231,7 @@ bl_read_pagelist(struct nfs_pgio_header *header)
size_t bytes_left = header->args.count;
unsigned int pg_offset = header->args.pgbase, pg_len;
struct page **pages = header->args.pages;
- int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+ int pg_index = header->args.pgbase >> PAGE_SHIFT;
const bool is_dio = (header->dreq != NULL);
struct blk_plug plug;
int i;
@@ -263,13 +263,13 @@ bl_read_pagelist(struct nfs_pgio_header *header)
}
if (is_dio) {
- if (pg_offset + bytes_left > PAGE_CACHE_SIZE)
- pg_len = PAGE_CACHE_SIZE - pg_offset;
+ if (pg_offset + bytes_left > PAGE_SIZE)
+ pg_len = PAGE_SIZE - pg_offset;
else
pg_len = bytes_left;
} else {
BUG_ON(pg_offset != 0);
- pg_len = PAGE_CACHE_SIZE;
+ pg_len = PAGE_SIZE;
}
if (is_hole(&be)) {
@@ -339,9 +339,9 @@ static void bl_write_cleanup(struct work_struct *work)
if (likely(!hdr->pnfs_error)) {
struct pnfs_block_layout *bl = BLK_LSEG2EXT(hdr->lseg);
- u64 start = hdr->args.offset & (loff_t)PAGE_CACHE_MASK;
+ u64 start = hdr->args.offset & (loff_t)PAGE_MASK;
u64 end = (hdr->args.offset + hdr->args.count +
- PAGE_CACHE_SIZE - 1) & (loff_t)PAGE_CACHE_MASK;
+ PAGE_SIZE - 1) & (loff_t)PAGE_MASK;
ext_tree_mark_written(bl, start >> SECTOR_SHIFT,
(end - start) >> SECTOR_SHIFT);
@@ -373,7 +373,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
loff_t offset = header->args.offset;
size_t count = header->args.count;
struct page **pages = header->args.pages;
- int pg_index = header->args.pgbase >> PAGE_CACHE_SHIFT;
+ int pg_index = header->args.pgbase >> PAGE_SHIFT;
unsigned int pg_len;
struct blk_plug plug;
int i;
@@ -392,7 +392,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
blk_start_plug(&plug);
/* we always write out the whole page */
- offset = offset & (loff_t)PAGE_CACHE_MASK;
+ offset = offset & (loff_t)PAGE_MASK;
isect = offset >> SECTOR_SHIFT;
for (i = pg_index; i < header->page_array.npages; i++) {
@@ -408,7 +408,7 @@ bl_write_pagelist(struct nfs_pgio_header *header, int sync)
extent_length = be.be_length - (isect - be.be_f_offset);
}
- pg_len = PAGE_CACHE_SIZE;
+ pg_len = PAGE_SIZE;
bio = do_add_page_to_bio(bio, header->page_array.npages - i,
WRITE, isect, pages[i], &map, &be,
bl_end_io_write, par,
@@ -446,8 +446,8 @@ static void bl_free_layout_hdr(struct pnfs_layout_hdr *lo)
kfree(bl);
}
-static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
- gfp_t gfp_flags)
+static struct pnfs_layout_hdr *__bl_alloc_layout_hdr(struct inode *inode,
+ gfp_t gfp_flags, bool is_scsi_layout)
{
struct pnfs_block_layout *bl;
@@ -460,9 +460,22 @@ static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
bl->bl_ext_ro = RB_ROOT;
spin_lock_init(&bl->bl_ext_lock);
+ bl->bl_scsi_layout = is_scsi_layout;
return &bl->bl_layout;
}
+static struct pnfs_layout_hdr *bl_alloc_layout_hdr(struct inode *inode,
+ gfp_t gfp_flags)
+{
+ return __bl_alloc_layout_hdr(inode, gfp_flags, false);
+}
+
+static struct pnfs_layout_hdr *sl_alloc_layout_hdr(struct inode *inode,
+ gfp_t gfp_flags)
+{
+ return __bl_alloc_layout_hdr(inode, gfp_flags, true);
+}
+
static void bl_free_lseg(struct pnfs_layout_segment *lseg)
{
dprintk("%s enter\n", __func__);
@@ -743,7 +756,7 @@ bl_set_layoutdriver(struct nfs_server *server, const struct nfs_fh *fh)
static bool
is_aligned_req(struct nfs_pageio_descriptor *pgio,
- struct nfs_page *req, unsigned int alignment)
+ struct nfs_page *req, unsigned int alignment, bool is_write)
{
/*
* Always accept buffered writes, higher layers take care of the
@@ -758,7 +771,8 @@ is_aligned_req(struct nfs_pageio_descriptor *pgio,
if (IS_ALIGNED(req->wb_bytes, alignment))
return true;
- if (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode)) {
+ if (is_write &&
+ (req_offset(req) + req->wb_bytes == i_size_read(pgio->pg_inode))) {
/*
* If the write goes up to the inode size, just write
* the full page. Data past the inode size is
@@ -775,7 +789,7 @@ is_aligned_req(struct nfs_pageio_descriptor *pgio,
static void
bl_pg_init_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
- if (!is_aligned_req(pgio, req, SECTOR_SIZE)) {
+ if (!is_aligned_req(pgio, req, SECTOR_SIZE, false)) {
nfs_pageio_reset_read_mds(pgio);
return;
}
@@ -791,7 +805,7 @@ static size_t
bl_pg_test_read(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- if (!is_aligned_req(pgio, req, SECTOR_SIZE))
+ if (!is_aligned_req(pgio, req, SECTOR_SIZE, false))
return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
@@ -806,7 +820,7 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
pgoff_t end;
/* Optimize common case that writes from 0 to end of file */
- end = DIV_ROUND_UP(i_size_read(inode), PAGE_CACHE_SIZE);
+ end = DIV_ROUND_UP(i_size_read(inode), PAGE_SIZE);
if (end != inode->i_mapping->nrpages) {
rcu_read_lock();
end = page_cache_next_hole(mapping, idx + 1, ULONG_MAX);
@@ -814,9 +828,9 @@ static u64 pnfs_num_cont_bytes(struct inode *inode, pgoff_t idx)
}
if (!end)
- return i_size_read(inode) - (idx << PAGE_CACHE_SHIFT);
+ return i_size_read(inode) - (idx << PAGE_SHIFT);
else
- return (end - idx) << PAGE_CACHE_SHIFT;
+ return (end - idx) << PAGE_SHIFT;
}
static void
@@ -824,7 +838,7 @@ bl_pg_init_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *req)
{
u64 wb_size;
- if (!is_aligned_req(pgio, req, PAGE_SIZE)) {
+ if (!is_aligned_req(pgio, req, PAGE_SIZE, true)) {
nfs_pageio_reset_write_mds(pgio);
return;
}
@@ -846,7 +860,7 @@ static size_t
bl_pg_test_write(struct nfs_pageio_descriptor *pgio, struct nfs_page *prev,
struct nfs_page *req)
{
- if (!is_aligned_req(pgio, req, PAGE_SIZE))
+ if (!is_aligned_req(pgio, req, PAGE_SIZE, true))
return 0;
return pnfs_generic_pg_test(pgio, prev, req);
}
@@ -888,22 +902,53 @@ static struct pnfs_layoutdriver_type blocklayout_type = {
.sync = pnfs_generic_sync,
};
+static struct pnfs_layoutdriver_type scsilayout_type = {
+ .id = LAYOUT_SCSI,
+ .name = "LAYOUT_SCSI",
+ .owner = THIS_MODULE,
+ .flags = PNFS_LAYOUTRET_ON_SETATTR |
+ PNFS_READ_WHOLE_PAGE,
+ .read_pagelist = bl_read_pagelist,
+ .write_pagelist = bl_write_pagelist,
+ .alloc_layout_hdr = sl_alloc_layout_hdr,
+ .free_layout_hdr = bl_free_layout_hdr,
+ .alloc_lseg = bl_alloc_lseg,
+ .free_lseg = bl_free_lseg,
+ .return_range = bl_return_range,
+ .prepare_layoutcommit = bl_prepare_layoutcommit,
+ .cleanup_layoutcommit = bl_cleanup_layoutcommit,
+ .set_layoutdriver = bl_set_layoutdriver,
+ .alloc_deviceid_node = bl_alloc_deviceid_node,
+ .free_deviceid_node = bl_free_deviceid_node,
+ .pg_read_ops = &bl_pg_read_ops,
+ .pg_write_ops = &bl_pg_write_ops,
+ .sync = pnfs_generic_sync,
+};
+
+
static int __init nfs4blocklayout_init(void)
{
int ret;
dprintk("%s: NFSv4 Block Layout Driver Registering...\n", __func__);
- ret = pnfs_register_layoutdriver(&blocklayout_type);
+ ret = bl_init_pipefs();
if (ret)
goto out;
- ret = bl_init_pipefs();
+
+ ret = pnfs_register_layoutdriver(&blocklayout_type);
if (ret)
- goto out_unregister;
+ goto out_cleanup_pipe;
+
+ ret = pnfs_register_layoutdriver(&scsilayout_type);
+ if (ret)
+ goto out_unregister_block;
return 0;
-out_unregister:
+out_unregister_block:
pnfs_unregister_layoutdriver(&blocklayout_type);
+out_cleanup_pipe:
+ bl_cleanup_pipefs();
out:
return ret;
}
@@ -913,8 +958,9 @@ static void __exit nfs4blocklayout_exit(void)
dprintk("%s: NFSv4 Block Layout Driver Unregistering...\n",
__func__);
- bl_cleanup_pipefs();
+ pnfs_unregister_layoutdriver(&scsilayout_type);
pnfs_unregister_layoutdriver(&blocklayout_type);
+ bl_cleanup_pipefs();
}
MODULE_ALIAS("nfs-layouttype4-3");
diff --git a/fs/nfs/blocklayout/blocklayout.h b/fs/nfs/blocklayout/blocklayout.h
index c556640dc..18e6fd0b9 100644
--- a/fs/nfs/blocklayout/blocklayout.h
+++ b/fs/nfs/blocklayout/blocklayout.h
@@ -40,8 +40,8 @@
#include "../pnfs.h"
#include "../netns.h"
-#define PAGE_CACHE_SECTORS (PAGE_CACHE_SIZE >> SECTOR_SHIFT)
-#define PAGE_CACHE_SECTOR_SHIFT (PAGE_CACHE_SHIFT - SECTOR_SHIFT)
+#define PAGE_CACHE_SECTORS (PAGE_SIZE >> SECTOR_SHIFT)
+#define PAGE_CACHE_SECTOR_SHIFT (PAGE_SHIFT - SECTOR_SHIFT)
#define SECTOR_SIZE (1 << SECTOR_SHIFT)
struct pnfs_block_dev;
@@ -55,7 +55,6 @@ struct pnfs_block_dev;
*/
#define PNFS_BLOCK_UUID_LEN 128
-
struct pnfs_block_volume {
enum pnfs_block_volume_type type;
union {
@@ -82,6 +81,13 @@ struct pnfs_block_volume {
u32 volumes_count;
u32 volumes[PNFS_BLOCK_MAX_DEVICES];
} stripe;
+ struct {
+ enum scsi_code_set code_set;
+ enum scsi_designator_type designator_type;
+ int designator_len;
+ u8 designator[256];
+ u64 pr_key;
+ } scsi;
};
};
@@ -106,6 +112,9 @@ struct pnfs_block_dev {
struct block_device *bdev;
u64 disk_offset;
+ u64 pr_key;
+ bool pr_registered;
+
bool (*map)(struct pnfs_block_dev *dev, u64 offset,
struct pnfs_block_dev_map *map);
};
@@ -131,6 +140,7 @@ struct pnfs_block_layout {
struct rb_root bl_ext_rw;
struct rb_root bl_ext_ro;
spinlock_t bl_ext_lock; /* Protects list manipulation */
+ bool bl_scsi_layout;
};
static inline struct pnfs_block_layout *
@@ -182,6 +192,6 @@ void ext_tree_mark_committed(struct nfs4_layoutcommit_args *arg, int status);
dev_t bl_resolve_deviceid(struct nfs_server *server,
struct pnfs_block_volume *b, gfp_t gfp_mask);
int __init bl_init_pipefs(void);
-void __exit bl_cleanup_pipefs(void);
+void bl_cleanup_pipefs(void);
#endif /* FS_NFS_NFS4BLOCKLAYOUT_H */
diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c
index a861bbdfe..e5b896752 100644
--- a/fs/nfs/blocklayout/dev.c
+++ b/fs/nfs/blocklayout/dev.c
@@ -1,11 +1,12 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/sunrpc/svc.h>
#include <linux/blkdev.h>
#include <linux/nfs4.h>
#include <linux/nfs_fs.h>
#include <linux/nfs_xdr.h>
+#include <linux/pr.h>
#include "blocklayout.h"
@@ -21,6 +22,17 @@ bl_free_device(struct pnfs_block_dev *dev)
bl_free_device(&dev->children[i]);
kfree(dev->children);
} else {
+ if (dev->pr_registered) {
+ const struct pr_ops *ops =
+ dev->bdev->bd_disk->fops->pr_ops;
+ int error;
+
+ error = ops->pr_register(dev->bdev, dev->pr_key, 0,
+ false);
+ if (error)
+ pr_err("failed to unregister PR key.\n");
+ }
+
if (dev->bdev)
blkdev_put(dev->bdev, FMODE_READ | FMODE_WRITE);
}
@@ -113,6 +125,24 @@ nfs4_block_decode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
for (i = 0; i < b->stripe.volumes_count; i++)
b->stripe.volumes[i] = be32_to_cpup(p++);
break;
+ case PNFS_BLOCK_VOLUME_SCSI:
+ p = xdr_inline_decode(xdr, 4 + 4 + 4);
+ if (!p)
+ return -EIO;
+ b->scsi.code_set = be32_to_cpup(p++);
+ b->scsi.designator_type = be32_to_cpup(p++);
+ b->scsi.designator_len = be32_to_cpup(p++);
+ p = xdr_inline_decode(xdr, b->scsi.designator_len);
+ if (!p)
+ return -EIO;
+ if (b->scsi.designator_len > 256)
+ return -EIO;
+ memcpy(&b->scsi.designator, p, b->scsi.designator_len);
+ p = xdr_inline_decode(xdr, 8);
+ if (!p)
+ return -EIO;
+ p = xdr_decode_hyper(p, &b->scsi.pr_key);
+ break;
default:
dprintk("unknown volume type!\n");
return -EIO;
@@ -216,6 +246,116 @@ bl_parse_simple(struct nfs_server *server, struct pnfs_block_dev *d,
return 0;
}
+static bool
+bl_validate_designator(struct pnfs_block_volume *v)
+{
+ switch (v->scsi.designator_type) {
+ case PS_DESIGNATOR_EUI64:
+ if (v->scsi.code_set != PS_CODE_SET_BINARY)
+ return false;
+
+ if (v->scsi.designator_len != 8 &&
+ v->scsi.designator_len != 10 &&
+ v->scsi.designator_len != 16)
+ return false;
+
+ return true;
+ case PS_DESIGNATOR_NAA:
+ if (v->scsi.code_set != PS_CODE_SET_BINARY)
+ return false;
+
+ if (v->scsi.designator_len != 8 &&
+ v->scsi.designator_len != 16)
+ return false;
+
+ return true;
+ case PS_DESIGNATOR_T10:
+ case PS_DESIGNATOR_NAME:
+ pr_err("pNFS: unsupported designator "
+ "(code set %d, type %d, len %d.\n",
+ v->scsi.code_set,
+ v->scsi.designator_type,
+ v->scsi.designator_len);
+ return false;
+ default:
+ pr_err("pNFS: invalid designator "
+ "(code set %d, type %d, len %d.\n",
+ v->scsi.code_set,
+ v->scsi.designator_type,
+ v->scsi.designator_len);
+ return false;
+ }
+}
+
+static int
+bl_parse_scsi(struct nfs_server *server, struct pnfs_block_dev *d,
+ struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
+{
+ struct pnfs_block_volume *v = &volumes[idx];
+ const struct pr_ops *ops;
+ const char *devname;
+ int error;
+
+ if (!bl_validate_designator(v))
+ return -EINVAL;
+
+ switch (v->scsi.designator_len) {
+ case 8:
+ devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%8phN",
+ v->scsi.designator);
+ break;
+ case 12:
+ devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%12phN",
+ v->scsi.designator);
+ break;
+ case 16:
+ devname = kasprintf(GFP_KERNEL, "/dev/disk/by-id/wwn-0x%16phN",
+ v->scsi.designator);
+ break;
+ default:
+ return -EINVAL;
+ }
+
+ d->bdev = blkdev_get_by_path(devname, FMODE_READ, NULL);
+ if (IS_ERR(d->bdev)) {
+ pr_warn("pNFS: failed to open device %s (%ld)\n",
+ devname, PTR_ERR(d->bdev));
+ kfree(devname);
+ return PTR_ERR(d->bdev);
+ }
+
+ kfree(devname);
+
+ d->len = i_size_read(d->bdev->bd_inode);
+ d->map = bl_map_simple;
+ d->pr_key = v->scsi.pr_key;
+
+ pr_info("pNFS: using block device %s (reservation key 0x%llx)\n",
+ d->bdev->bd_disk->disk_name, d->pr_key);
+
+ ops = d->bdev->bd_disk->fops->pr_ops;
+ if (!ops) {
+ pr_err("pNFS: block device %s does not support reservations.",
+ d->bdev->bd_disk->disk_name);
+ error = -EINVAL;
+ goto out_blkdev_put;
+ }
+
+ error = ops->pr_register(d->bdev, 0, d->pr_key, true);
+ if (error) {
+ pr_err("pNFS: failed to register key for block device %s.",
+ d->bdev->bd_disk->disk_name);
+ goto out_blkdev_put;
+ }
+
+ d->pr_registered = true;
+ return 0;
+
+out_blkdev_put:
+ blkdev_put(d->bdev, FMODE_READ);
+ return error;
+}
+
static int
bl_parse_slice(struct nfs_server *server, struct pnfs_block_dev *d,
struct pnfs_block_volume *volumes, int idx, gfp_t gfp_mask)
@@ -303,6 +443,8 @@ bl_parse_deviceid(struct nfs_server *server, struct pnfs_block_dev *d,
return bl_parse_concat(server, d, volumes, idx, gfp_mask);
case PNFS_BLOCK_VOLUME_STRIPE:
return bl_parse_stripe(server, d, volumes, idx, gfp_mask);
+ case PNFS_BLOCK_VOLUME_SCSI:
+ return bl_parse_scsi(server, d, volumes, idx, gfp_mask);
default:
dprintk("unsupported volume type: %d\n", volumes[idx].type);
return -EIO;
diff --git a/fs/nfs/blocklayout/extent_tree.c b/fs/nfs/blocklayout/extent_tree.c
index 35ab51c04..720b3ff55 100644
--- a/fs/nfs/blocklayout/extent_tree.c
+++ b/fs/nfs/blocklayout/extent_tree.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/vmalloc.h>
@@ -462,10 +462,12 @@ out:
return err;
}
-static size_t ext_tree_layoutupdate_size(size_t count)
+static size_t ext_tree_layoutupdate_size(struct pnfs_block_layout *bl, size_t count)
{
- return sizeof(__be32) /* number of entries */ +
- PNFS_BLOCK_EXTENT_SIZE * count;
+ if (bl->bl_scsi_layout)
+ return sizeof(__be32) + PNFS_SCSI_RANGE_SIZE * count;
+ else
+ return sizeof(__be32) + PNFS_BLOCK_EXTENT_SIZE * count;
}
static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
@@ -483,6 +485,23 @@ static void ext_tree_free_commitdata(struct nfs4_layoutcommit_args *arg,
}
}
+static __be32 *encode_block_extent(struct pnfs_block_extent *be, __be32 *p)
+{
+ p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
+ NFS4_DEVICEID4_SIZE);
+ p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+ p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+ p = xdr_encode_hyper(p, 0LL);
+ *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
+ return p;
+}
+
+static __be32 *encode_scsi_range(struct pnfs_block_extent *be, __be32 *p)
+{
+ p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
+ return xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
+}
+
static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
size_t buffer_size, size_t *count)
{
@@ -496,19 +515,16 @@ static int ext_tree_encode_commit(struct pnfs_block_layout *bl, __be32 *p,
continue;
(*count)++;
- if (ext_tree_layoutupdate_size(*count) > buffer_size) {
+ if (ext_tree_layoutupdate_size(bl, *count) > buffer_size) {
/* keep counting.. */
ret = -ENOSPC;
continue;
}
- p = xdr_encode_opaque_fixed(p, be->be_device->deviceid.data,
- NFS4_DEVICEID4_SIZE);
- p = xdr_encode_hyper(p, be->be_f_offset << SECTOR_SHIFT);
- p = xdr_encode_hyper(p, be->be_length << SECTOR_SHIFT);
- p = xdr_encode_hyper(p, 0LL);
- *p++ = cpu_to_be32(PNFS_BLOCK_READWRITE_DATA);
-
+ if (bl->bl_scsi_layout)
+ p = encode_scsi_range(be, p);
+ else
+ p = encode_block_extent(be, p);
be->be_tag = EXTENT_COMMITTING;
}
spin_unlock(&bl->bl_ext_lock);
@@ -537,7 +553,7 @@ retry:
if (unlikely(ret)) {
ext_tree_free_commitdata(arg, buffer_size);
- buffer_size = ext_tree_layoutupdate_size(count);
+ buffer_size = ext_tree_layoutupdate_size(bl, count);
count = 0;
arg->layoutupdate_pages =
@@ -556,7 +572,7 @@ retry:
}
*start_p = cpu_to_be32(count);
- arg->layoutupdate_len = ext_tree_layoutupdate_size(count);
+ arg->layoutupdate_len = ext_tree_layoutupdate_size(bl, count);
if (unlikely(arg->layoutupdate_pages != &arg->layoutupdate_page)) {
void *p = start_p, *end = p + arg->layoutupdate_len;
diff --git a/fs/nfs/blocklayout/rpc_pipefs.c b/fs/nfs/blocklayout/rpc_pipefs.c
index dbe5839cd..9fb067a6f 100644
--- a/fs/nfs/blocklayout/rpc_pipefs.c
+++ b/fs/nfs/blocklayout/rpc_pipefs.c
@@ -281,7 +281,7 @@ out:
return ret;
}
-void __exit bl_cleanup_pipefs(void)
+void bl_cleanup_pipefs(void)
{
rpc_pipefs_notifier_unregister(&nfs4blocklayout_block);
unregister_pernet_subsys(&nfs4blocklayout_net_ops);
diff --git a/fs/nfs/callback.h b/fs/nfs/callback.h
index ff8195bd7..5fe1cecbf 100644
--- a/fs/nfs/callback.h
+++ b/fs/nfs/callback.h
@@ -37,10 +37,11 @@ enum nfs4_callback_opnum {
OP_CB_ILLEGAL = 10044,
};
+struct nfs4_slot;
struct cb_process_state {
__be32 drc_status;
struct nfs_client *clp;
- u32 slotid;
+ struct nfs4_slot *slot;
u32 minorversion;
struct net *net;
};
diff --git a/fs/nfs/callback_proc.c b/fs/nfs/callback_proc.c
index f0939d097..7c9fbf504 100644
--- a/fs/nfs/callback_proc.c
+++ b/fs/nfs/callback_proc.c
@@ -354,47 +354,38 @@ out:
* a single outstanding callback request at a time.
*/
static __be32
-validate_seqid(struct nfs4_slot_table *tbl, struct cb_sequenceargs * args)
+validate_seqid(const struct nfs4_slot_table *tbl, const struct nfs4_slot *slot,
+ const struct cb_sequenceargs * args)
{
- struct nfs4_slot *slot;
-
- dprintk("%s enter. slotid %u seqid %u\n",
- __func__, args->csa_slotid, args->csa_sequenceid);
+ dprintk("%s enter. slotid %u seqid %u, slot table seqid: %u\n",
+ __func__, args->csa_slotid, args->csa_sequenceid, slot->seq_nr);
- if (args->csa_slotid >= NFS41_BC_MAX_CALLBACKS)
+ if (args->csa_slotid > tbl->server_highest_slotid)
return htonl(NFS4ERR_BADSLOT);
- slot = tbl->slots + args->csa_slotid;
- dprintk("%s slot table seqid: %u\n", __func__, slot->seq_nr);
-
- /* Normal */
- if (likely(args->csa_sequenceid == slot->seq_nr + 1))
- goto out_ok;
-
/* Replay */
if (args->csa_sequenceid == slot->seq_nr) {
dprintk("%s seqid %u is a replay\n",
__func__, args->csa_sequenceid);
+ if (nfs4_test_locked_slot(tbl, slot->slot_nr))
+ return htonl(NFS4ERR_DELAY);
/* Signal process_op to set this error on next op */
if (args->csa_cachethis == 0)
return htonl(NFS4ERR_RETRY_UNCACHED_REP);
- /* The ca_maxresponsesize_cached is 0 with no DRC */
- else if (args->csa_cachethis == 1)
- return htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
+ /* Liar! We never allowed you to set csa_cachethis != 0 */
+ return htonl(NFS4ERR_SEQ_FALSE_RETRY);
}
/* Wraparound */
- if (args->csa_sequenceid == 1 && (slot->seq_nr + 1) == 0) {
- slot->seq_nr = 1;
- goto out_ok;
- }
+ if (unlikely(slot->seq_nr == 0xFFFFFFFFU)) {
+ if (args->csa_sequenceid == 1)
+ return htonl(NFS4_OK);
+ } else if (likely(args->csa_sequenceid == slot->seq_nr + 1))
+ return htonl(NFS4_OK);
/* Misordered request */
return htonl(NFS4ERR_SEQ_MISORDERED);
-out_ok:
- tbl->highest_used_slotid = args->csa_slotid;
- return htonl(NFS4_OK);
}
/*
@@ -473,6 +464,12 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
tbl = &clp->cl_session->bc_slot_table;
slot = tbl->slots + args->csa_slotid;
+ /* Set up res before grabbing the spinlock */
+ memcpy(&res->csr_sessionid, &args->csa_sessionid,
+ sizeof(res->csr_sessionid));
+ res->csr_sequenceid = args->csa_sequenceid;
+ res->csr_slotid = args->csa_slotid;
+
spin_lock(&tbl->slot_tbl_lock);
/* state manager is resetting the session */
if (test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state)) {
@@ -485,18 +482,28 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
goto out_unlock;
}
- memcpy(&res->csr_sessionid, &args->csa_sessionid,
- sizeof(res->csr_sessionid));
- res->csr_sequenceid = args->csa_sequenceid;
- res->csr_slotid = args->csa_slotid;
- res->csr_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
- res->csr_target_highestslotid = NFS41_BC_MAX_CALLBACKS - 1;
+ status = htonl(NFS4ERR_BADSLOT);
+ slot = nfs4_lookup_slot(tbl, args->csa_slotid);
+ if (IS_ERR(slot))
+ goto out_unlock;
+
+ res->csr_highestslotid = tbl->server_highest_slotid;
+ res->csr_target_highestslotid = tbl->target_highest_slotid;
- status = validate_seqid(tbl, args);
+ status = validate_seqid(tbl, slot, args);
if (status)
goto out_unlock;
+ if (!nfs4_try_to_lock_slot(tbl, slot)) {
+ status = htonl(NFS4ERR_DELAY);
+ goto out_unlock;
+ }
+ cps->slot = slot;
- cps->slotid = args->csa_slotid;
+ /* The ca_maxresponsesize_cached is 0 with no DRC */
+ if (args->csa_cachethis != 0) {
+ status = htonl(NFS4ERR_REP_TOO_BIG_TO_CACHE);
+ goto out_unlock;
+ }
/*
* Check for pending referring calls. If a match is found, a
@@ -513,7 +520,7 @@ __be32 nfs4_callback_sequence(struct cb_sequenceargs *args,
* If CB_SEQUENCE returns an error, then the state of the slot
* (sequence ID, cached reply) MUST NOT change.
*/
- slot->seq_nr++;
+ slot->seq_nr = args->csa_sequenceid;
out_unlock:
spin_unlock(&tbl->slot_tbl_lock);
diff --git a/fs/nfs/callback_xdr.c b/fs/nfs/callback_xdr.c
index 646cdac73..976c90608 100644
--- a/fs/nfs/callback_xdr.c
+++ b/fs/nfs/callback_xdr.c
@@ -752,7 +752,8 @@ preprocess_nfs41_op(int nop, unsigned int op_nr, struct callback_op **op)
return htonl(NFS_OK);
}
-static void nfs4_callback_free_slot(struct nfs4_session *session)
+static void nfs4_callback_free_slot(struct nfs4_session *session,
+ struct nfs4_slot *slot)
{
struct nfs4_slot_table *tbl = &session->bc_slot_table;
@@ -761,15 +762,17 @@ static void nfs4_callback_free_slot(struct nfs4_session *session)
* Let the state manager know callback processing done.
* A single slot, so highest used slotid is either 0 or -1
*/
- tbl->highest_used_slotid = NFS4_NO_SLOT;
+ nfs4_free_slot(tbl, slot);
nfs4_slot_tbl_drain_complete(tbl);
spin_unlock(&tbl->slot_tbl_lock);
}
static void nfs4_cb_free_slot(struct cb_process_state *cps)
{
- if (cps->slotid != NFS4_NO_SLOT)
- nfs4_callback_free_slot(cps->clp->cl_session);
+ if (cps->slot) {
+ nfs4_callback_free_slot(cps->clp->cl_session, cps->slot);
+ cps->slot = NULL;
+ }
}
#else /* CONFIG_NFS_V4_1 */
@@ -893,7 +896,6 @@ static __be32 nfs4_callback_compound(struct svc_rqst *rqstp, void *argp, void *r
struct cb_process_state cps = {
.drc_status = 0,
.clp = NULL,
- .slotid = NFS4_NO_SLOT,
.net = SVC_NET(rqstp),
};
unsigned int nops = 0;
diff --git a/fs/nfs/client.c b/fs/nfs/client.c
index d6d5d2a48..0c96528db 100644
--- a/fs/nfs/client.c
+++ b/fs/nfs/client.c
@@ -736,7 +736,7 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->rsize = max_rpc_payload;
if (server->rsize > NFS_MAX_FILE_IO_SIZE)
server->rsize = NFS_MAX_FILE_IO_SIZE;
- server->rpages = (server->rsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ server->rpages = (server->rsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
server->backing_dev_info.name = "nfs";
server->backing_dev_info.ra_pages = server->rpages * NFS_MAX_READAHEAD;
@@ -745,13 +745,13 @@ static void nfs_server_set_fsinfo(struct nfs_server *server,
server->wsize = max_rpc_payload;
if (server->wsize > NFS_MAX_FILE_IO_SIZE)
server->wsize = NFS_MAX_FILE_IO_SIZE;
- server->wpages = (server->wsize + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ server->wpages = (server->wsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
server->wtmult = nfs_block_bits(fsinfo->wtmult, NULL);
server->dtsize = nfs_block_size(fsinfo->dtpref, NULL);
- if (server->dtsize > PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES)
- server->dtsize = PAGE_CACHE_SIZE * NFS_MAX_READDIR_PAGES;
+ if (server->dtsize > PAGE_SIZE * NFS_MAX_READDIR_PAGES)
+ server->dtsize = PAGE_SIZE * NFS_MAX_READDIR_PAGES;
if (server->dtsize > server->rsize)
server->dtsize = server->rsize;
diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
index 7ded17764..33eb81738 100644
--- a/fs/nfs/dir.c
+++ b/fs/nfs/dir.c
@@ -707,7 +707,7 @@ void cache_page_release(nfs_readdir_descriptor_t *desc)
{
if (!desc->page->mapping)
nfs_readdir_clear_array(desc->page);
- page_cache_release(desc->page);
+ put_page(desc->page);
desc->page = NULL;
}
@@ -1360,19 +1360,15 @@ struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned in
dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
- res = ERR_PTR(-ENAMETOOLONG);
- if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
- goto out;
+ if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
+ return ERR_PTR(-ENAMETOOLONG);
/*
* If we're doing an exclusive create, optimize away the lookup
* but don't hash the dentry.
*/
- if (nfs_is_exclusive_create(dir, flags)) {
- d_instantiate(dentry, NULL);
- res = NULL;
- goto out;
- }
+ if (nfs_is_exclusive_create(dir, flags))
+ return NULL;
res = ERR_PTR(-ENOMEM);
fhandle = nfs_alloc_fhandle();
@@ -1927,7 +1923,7 @@ int nfs_symlink(struct inode *dir, struct dentry *dentry, const char *symname)
* add_to_page_cache_lru() grabs an extra page refcount.
* Drop it here to avoid leaking this page later.
*/
- page_cache_release(page);
+ put_page(page);
} else
__free_page(page);
diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c
index 7a0cfd326..c93826e4a 100644
--- a/fs/nfs/direct.c
+++ b/fs/nfs/direct.c
@@ -269,7 +269,7 @@ static void nfs_direct_release_pages(struct page **pages, unsigned int npages)
{
unsigned int i;
for (i = 0; i < npages; i++)
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
@@ -1003,7 +1003,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
iov_iter_count(iter));
pos = iocb->ki_pos;
- end = (pos + iov_iter_count(iter) - 1) >> PAGE_CACHE_SHIFT;
+ end = (pos + iov_iter_count(iter) - 1) >> PAGE_SHIFT;
inode_lock(inode);
@@ -1013,7 +1013,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
if (mapping->nrpages) {
result = invalidate_inode_pages2_range(mapping,
- pos >> PAGE_CACHE_SHIFT, end);
+ pos >> PAGE_SHIFT, end);
if (result)
goto out_unlock;
}
@@ -1042,7 +1042,7 @@ ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter)
if (mapping->nrpages) {
invalidate_inode_pages2_range(mapping,
- pos >> PAGE_CACHE_SHIFT, end);
+ pos >> PAGE_SHIFT, end);
}
inode_unlock(inode);
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 748bb813b..be01095b9 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -233,7 +233,7 @@ EXPORT_SYMBOL_GPL(nfs_file_mmap);
* nfs_file_write() that a write error occurred, and hence cause it to
* fall back to doing a synchronous write.
*/
-int
+static int
nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
{
struct nfs_open_context *ctx = nfs_file_open_context(file);
@@ -263,9 +263,8 @@ nfs_file_fsync_commit(struct file *file, loff_t start, loff_t end, int datasync)
out:
return ret;
}
-EXPORT_SYMBOL_GPL(nfs_file_fsync_commit);
-static int
+int
nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
{
int ret;
@@ -273,13 +272,15 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
trace_nfs_fsync_enter(inode);
- nfs_inode_dio_wait(inode);
+ inode_dio_wait(inode);
do {
ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
if (ret != 0)
break;
inode_lock(inode);
ret = nfs_file_fsync_commit(file, start, end, datasync);
+ if (!ret)
+ ret = pnfs_sync_inode(inode, !!datasync);
inode_unlock(inode);
/*
* If nfs_file_fsync_commit detected a server reboot, then
@@ -293,6 +294,7 @@ nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
trace_nfs_fsync_exit(inode, ret);
return ret;
}
+EXPORT_SYMBOL_GPL(nfs_file_fsync);
/*
* Decide whether a read/modify/write cycle may be more efficient
@@ -318,7 +320,7 @@ static int nfs_want_read_modify_write(struct file *file, struct page *page,
loff_t pos, unsigned len)
{
unsigned int pglen = nfs_page_length(page);
- unsigned int offset = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned int offset = pos & (PAGE_SIZE - 1);
unsigned int end = offset + len;
if (pnfs_ld_read_whole_page(file->f_mapping->host)) {
@@ -349,7 +351,7 @@ static int nfs_write_begin(struct file *file, struct address_space *mapping,
struct page **pagep, void **fsdata)
{
int ret;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
struct page *page;
int once_thru = 0;
@@ -368,7 +370,7 @@ start:
/*
* Wait for O_DIRECT to complete
*/
- nfs_inode_dio_wait(mapping->host);
+ inode_dio_wait(mapping->host);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
@@ -378,12 +380,12 @@ start:
ret = nfs_flush_incompatible(file, page);
if (ret) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
} else if (!once_thru &&
nfs_want_read_modify_write(file, page, pos, len)) {
once_thru = 1;
ret = nfs_readpage(file, page);
- page_cache_release(page);
+ put_page(page);
if (!ret)
goto start;
}
@@ -394,7 +396,7 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata)
{
- unsigned offset = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned offset = pos & (PAGE_SIZE - 1);
struct nfs_open_context *ctx = nfs_file_open_context(file);
int status;
@@ -411,20 +413,20 @@ static int nfs_write_end(struct file *file, struct address_space *mapping,
if (pglen == 0) {
zero_user_segments(page, 0, offset,
- end, PAGE_CACHE_SIZE);
+ end, PAGE_SIZE);
SetPageUptodate(page);
} else if (end >= pglen) {
- zero_user_segment(page, end, PAGE_CACHE_SIZE);
+ zero_user_segment(page, end, PAGE_SIZE);
if (offset == 0)
SetPageUptodate(page);
} else
- zero_user_segment(page, pglen, PAGE_CACHE_SIZE);
+ zero_user_segment(page, pglen, PAGE_SIZE);
}
status = nfs_updatepage(file, page, offset, copied);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (status < 0)
return status;
@@ -452,7 +454,7 @@ static void nfs_invalidate_page(struct page *page, unsigned int offset,
dfprintk(PAGECACHE, "NFS: invalidate_page(%p, %u, %u)\n",
page, offset, length);
- if (offset != 0 || length < PAGE_CACHE_SIZE)
+ if (offset != 0 || length < PAGE_SIZE)
return;
/* Cancel any unstarted writes on this page */
nfs_wb_page_cancel(page_file_mapping(page)->host, page);
diff --git a/fs/nfs/flexfilelayout/flexfilelayoutdev.c b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
index eb370460c..add0e5a70 100644
--- a/fs/nfs/flexfilelayout/flexfilelayoutdev.c
+++ b/fs/nfs/flexfilelayout/flexfilelayoutdev.c
@@ -418,6 +418,8 @@ nfs4_ff_layout_prepare_ds(struct pnfs_layout_segment *lseg, u32 ds_idx,
pnfs_error_mark_layout_for_return(ino, lseg);
} else
pnfs_error_mark_layout_for_return(ino, lseg);
+ ds = NULL;
+ goto out;
}
out_update_creds:
if (ff_layout_update_mirror_cred(mirror, ds))
diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c
index 847b678af..738c84a42 100644
--- a/fs/nfs/inode.c
+++ b/fs/nfs/inode.c
@@ -141,7 +141,7 @@ void nfs_evict_inode(struct inode *inode)
int nfs_sync_inode(struct inode *inode)
{
- nfs_inode_dio_wait(inode);
+ inode_dio_wait(inode);
return nfs_wb_all(inode);
}
EXPORT_SYMBOL_GPL(nfs_sync_inode);
diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h
index 9a547aa3e..f1d1d2c47 100644
--- a/fs/nfs/internal.h
+++ b/fs/nfs/internal.h
@@ -358,7 +358,7 @@ int nfs_mknod(struct inode *, struct dentry *, umode_t, dev_t);
int nfs_rename(struct inode *, struct dentry *, struct inode *, struct dentry *);
/* file.c */
-int nfs_file_fsync_commit(struct file *, loff_t, loff_t, int);
+int nfs_file_fsync(struct file *file, loff_t start, loff_t end, int datasync);
loff_t nfs_file_llseek(struct file *, loff_t, int);
ssize_t nfs_file_read(struct kiocb *, struct iov_iter *);
ssize_t nfs_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *,
@@ -515,10 +515,6 @@ extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry);
/* direct.c */
void nfs_init_cinfo_from_dreq(struct nfs_commit_info *cinfo,
struct nfs_direct_req *dreq);
-static inline void nfs_inode_dio_wait(struct inode *inode)
-{
- inode_dio_wait(inode);
-}
extern ssize_t nfs_dreq_bytes_left(struct nfs_direct_req *dreq);
/* nfs4proc.c */
@@ -642,11 +638,11 @@ unsigned int nfs_page_length(struct page *page)
if (i_size > 0) {
pgoff_t page_index = page_file_index(page);
- pgoff_t end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+ pgoff_t end_index = (i_size - 1) >> PAGE_SHIFT;
if (page_index < end_index)
- return PAGE_CACHE_SIZE;
+ return PAGE_SIZE;
if (page_index == end_index)
- return ((i_size - 1) & ~PAGE_CACHE_MASK) + 1;
+ return ((i_size - 1) & ~PAGE_MASK) + 1;
}
return 0;
}
diff --git a/fs/nfs/nfs4file.c b/fs/nfs/nfs4file.c
index 2a9ff14cf..d03905164 100644
--- a/fs/nfs/nfs4file.c
+++ b/fs/nfs/nfs4file.c
@@ -128,37 +128,6 @@ nfs4_file_flush(struct file *file, fl_owner_t id)
return vfs_fsync(file, 0);
}
-static int
-nfs4_file_fsync(struct file *file, loff_t start, loff_t end, int datasync)
-{
- int ret;
- struct inode *inode = file_inode(file);
-
- trace_nfs_fsync_enter(inode);
-
- nfs_inode_dio_wait(inode);
- do {
- ret = filemap_write_and_wait_range(inode->i_mapping, start, end);
- if (ret != 0)
- break;
- inode_lock(inode);
- ret = nfs_file_fsync_commit(file, start, end, datasync);
- if (!ret)
- ret = pnfs_sync_inode(inode, !!datasync);
- inode_unlock(inode);
- /*
- * If nfs_file_fsync_commit detected a server reboot, then
- * resend all dirty pages that might have been covered by
- * the NFS_CONTEXT_RESEND_WRITES flag
- */
- start = 0;
- end = LLONG_MAX;
- } while (ret == -EAGAIN);
-
- trace_nfs_fsync_exit(inode, ret);
- return ret;
-}
-
#ifdef CONFIG_NFS_V4_2
static loff_t nfs4_file_llseek(struct file *filep, loff_t offset, int whence)
{
@@ -266,7 +235,7 @@ const struct file_operations nfs4_file_operations = {
.open = nfs4_file_open,
.flush = nfs4_file_flush,
.release = nfs_file_release,
- .fsync = nfs4_file_fsync,
+ .fsync = nfs_file_fsync,
.lock = nfs_lock,
.flock = nfs_flock,
.splice_read = nfs_file_splice_read,
diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c
index 14881594d..327b8c34d 100644
--- a/fs/nfs/nfs4proc.c
+++ b/fs/nfs/nfs4proc.c
@@ -2461,14 +2461,15 @@ static int _nfs4_open_and_get_state(struct nfs4_opendata *opendata,
dentry = opendata->dentry;
if (d_really_is_negative(dentry)) {
- /* FIXME: Is this d_drop() ever needed? */
+ struct dentry *alias;
d_drop(dentry);
- dentry = d_add_unique(dentry, igrab(state->inode));
- if (dentry == NULL) {
- dentry = opendata->dentry;
- } else {
+ alias = d_exact_alias(dentry, state->inode);
+ if (!alias)
+ alias = d_splice_alias(igrab(state->inode), dentry);
+ /* d_splice_alias() can't fail here - it's a non-directory */
+ if (alias) {
dput(ctx->dentry);
- ctx->dentry = dentry;
+ ctx->dentry = dentry = alias;
}
nfs_set_verifier(dentry,
nfs_save_change_attribute(d_inode(opendata->dir)));
@@ -6782,13 +6783,26 @@ nfs41_same_server_scope(struct nfs41_server_scope *a,
return false;
}
+static void
+nfs4_bind_one_conn_to_session_done(struct rpc_task *task, void *calldata)
+{
+}
+
+static const struct rpc_call_ops nfs4_bind_one_conn_to_session_ops = {
+ .rpc_call_done = &nfs4_bind_one_conn_to_session_done,
+};
+
/*
- * nfs4_proc_bind_conn_to_session()
+ * nfs4_proc_bind_one_conn_to_session()
*
* The 4.1 client currently uses the same TCP connection for the
* fore and backchannel.
*/
-int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
+static
+int nfs4_proc_bind_one_conn_to_session(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ struct nfs_client *clp,
+ struct rpc_cred *cred)
{
int status;
struct nfs41_bind_conn_to_session_args args = {
@@ -6803,6 +6817,14 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred
.rpc_resp = &res,
.rpc_cred = cred,
};
+ struct rpc_task_setup task_setup_data = {
+ .rpc_client = clnt,
+ .rpc_xprt = xprt,
+ .callback_ops = &nfs4_bind_one_conn_to_session_ops,
+ .rpc_message = &msg,
+ .flags = RPC_TASK_TIMEOUT,
+ };
+ struct rpc_task *task;
dprintk("--> %s\n", __func__);
@@ -6810,7 +6832,16 @@ int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred
if (!(clp->cl_session->flags & SESSION4_BACK_CHAN))
args.dir = NFS4_CDFC4_FORE;
- status = rpc_call_sync(clp->cl_rpcclient, &msg, RPC_TASK_TIMEOUT);
+ /* Do not set the backchannel flag unless this is clnt->cl_xprt */
+ if (xprt != rcu_access_pointer(clnt->cl_xprt))
+ args.dir = NFS4_CDFC4_FORE;
+
+ task = rpc_run_task(&task_setup_data);
+ if (!IS_ERR(task)) {
+ status = task->tk_status;
+ rpc_put_task(task);
+ } else
+ status = PTR_ERR(task);
trace_nfs4_bind_conn_to_session(clp, status);
if (status == 0) {
if (memcmp(res.sessionid.data,
@@ -6837,6 +6868,31 @@ out:
return status;
}
+struct rpc_bind_conn_calldata {
+ struct nfs_client *clp;
+ struct rpc_cred *cred;
+};
+
+static int
+nfs4_proc_bind_conn_to_session_callback(struct rpc_clnt *clnt,
+ struct rpc_xprt *xprt,
+ void *calldata)
+{
+ struct rpc_bind_conn_calldata *p = calldata;
+
+ return nfs4_proc_bind_one_conn_to_session(clnt, xprt, p->clp, p->cred);
+}
+
+int nfs4_proc_bind_conn_to_session(struct nfs_client *clp, struct rpc_cred *cred)
+{
+ struct rpc_bind_conn_calldata data = {
+ .clp = clp,
+ .cred = cred,
+ };
+ return rpc_clnt_iterate_for_each_xprt(clp->cl_rpcclient,
+ nfs4_proc_bind_conn_to_session_callback, &data);
+}
+
/*
* Minimum set of SP4_MACH_CRED operations from RFC 5661 in the enforce map
* and operations we'd like to see to enable certain features in the allow map
@@ -7319,7 +7375,7 @@ static void nfs4_init_channel_attrs(struct nfs41_create_session_args *args)
args->bc_attrs.max_resp_sz = PAGE_SIZE;
args->bc_attrs.max_resp_sz_cached = 0;
args->bc_attrs.max_ops = NFS4_MAX_BACK_CHANNEL_OPS;
- args->bc_attrs.max_reqs = 1;
+ args->bc_attrs.max_reqs = NFS41_BC_MAX_CALLBACKS;
dprintk("%s: Back Channel : max_rqst_sz=%u max_resp_sz=%u "
"max_resp_sz_cached=%u max_ops=%u max_reqs=%u\n",
diff --git a/fs/nfs/nfs4session.c b/fs/nfs/nfs4session.c
index e23366eff..332d06e64 100644
--- a/fs/nfs/nfs4session.c
+++ b/fs/nfs/nfs4session.c
@@ -135,6 +135,43 @@ static struct nfs4_slot *nfs4_find_or_create_slot(struct nfs4_slot_table *tbl,
return ERR_PTR(-ENOMEM);
}
+static void nfs4_lock_slot(struct nfs4_slot_table *tbl,
+ struct nfs4_slot *slot)
+{
+ u32 slotid = slot->slot_nr;
+
+ __set_bit(slotid, tbl->used_slots);
+ if (slotid > tbl->highest_used_slotid ||
+ tbl->highest_used_slotid == NFS4_NO_SLOT)
+ tbl->highest_used_slotid = slotid;
+ slot->generation = tbl->generation;
+}
+
+/*
+ * nfs4_try_to_lock_slot - Given a slot try to allocate it
+ *
+ * Note: must be called with the slot_tbl_lock held.
+ */
+bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot)
+{
+ if (nfs4_test_locked_slot(tbl, slot->slot_nr))
+ return false;
+ nfs4_lock_slot(tbl, slot);
+ return true;
+}
+
+/*
+ * nfs4_lookup_slot - Find a slot but don't allocate it
+ *
+ * Note: must be called with the slot_tbl_lock held.
+ */
+struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid)
+{
+ if (slotid <= tbl->max_slotid)
+ return nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
+ return ERR_PTR(-E2BIG);
+}
+
/*
* nfs4_alloc_slot - efficiently look for a free slot
*
@@ -153,18 +190,11 @@ struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl)
__func__, tbl->used_slots[0], tbl->highest_used_slotid,
tbl->max_slotid + 1);
slotid = find_first_zero_bit(tbl->used_slots, tbl->max_slotid + 1);
- if (slotid > tbl->max_slotid)
- goto out;
- ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
- if (IS_ERR(ret))
- goto out;
- __set_bit(slotid, tbl->used_slots);
- if (slotid > tbl->highest_used_slotid ||
- tbl->highest_used_slotid == NFS4_NO_SLOT)
- tbl->highest_used_slotid = slotid;
- ret->generation = tbl->generation;
-
-out:
+ if (slotid <= tbl->max_slotid) {
+ ret = nfs4_find_or_create_slot(tbl, slotid, 1, GFP_NOWAIT);
+ if (!IS_ERR(ret))
+ nfs4_lock_slot(tbl, ret);
+ }
dprintk("<-- %s used_slots=%04lx highest_used=%u slotid=%u\n",
__func__, tbl->used_slots[0], tbl->highest_used_slotid,
!IS_ERR(ret) ? ret->slot_nr : NFS4_NO_SLOT);
diff --git a/fs/nfs/nfs4session.h b/fs/nfs/nfs4session.h
index e3ea2c532..5b51298d1 100644
--- a/fs/nfs/nfs4session.h
+++ b/fs/nfs/nfs4session.h
@@ -77,6 +77,8 @@ extern int nfs4_setup_slot_table(struct nfs4_slot_table *tbl,
unsigned int max_reqs, const char *queue);
extern void nfs4_shutdown_slot_table(struct nfs4_slot_table *tbl);
extern struct nfs4_slot *nfs4_alloc_slot(struct nfs4_slot_table *tbl);
+extern struct nfs4_slot *nfs4_lookup_slot(struct nfs4_slot_table *tbl, u32 slotid);
+extern bool nfs4_try_to_lock_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
extern void nfs4_free_slot(struct nfs4_slot_table *tbl, struct nfs4_slot *slot);
extern void nfs4_slot_tbl_drain_complete(struct nfs4_slot_table *tbl);
bool nfs41_wake_and_assign_slot(struct nfs4_slot_table *tbl,
@@ -88,6 +90,12 @@ static inline bool nfs4_slot_tbl_draining(struct nfs4_slot_table *tbl)
return !!test_bit(NFS4_SLOT_TBL_DRAINING, &tbl->slot_tbl_state);
}
+static inline bool nfs4_test_locked_slot(const struct nfs4_slot_table *tbl,
+ u32 slotid)
+{
+ return !!test_bit(slotid, tbl->used_slots);
+}
+
#if defined(CONFIG_NFS_V4_1)
extern void nfs41_set_target_slotid(struct nfs4_slot_table *tbl,
u32 target_highest_slotid);
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 4e4441216..88474a4fc 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -5001,7 +5001,7 @@ static int decode_space_limit(struct xdr_stream *xdr,
blocksize = be32_to_cpup(p);
maxsize = (uint64_t)nblocks * (uint64_t)blocksize;
}
- maxsize >>= PAGE_CACHE_SHIFT;
+ maxsize >>= PAGE_SHIFT;
*pagemod_limit = min_t(u64, maxsize, ULONG_MAX);
return 0;
out_overflow:
diff --git a/fs/nfs/objlayout/objio_osd.c b/fs/nfs/objlayout/objio_osd.c
index 9aebffb40..049c1b1f2 100644
--- a/fs/nfs/objlayout/objio_osd.c
+++ b/fs/nfs/objlayout/objio_osd.c
@@ -486,7 +486,7 @@ static void __r4w_put_page(void *priv, struct page *page)
dprintk("%s: index=0x%lx\n", __func__,
(page == ZERO_PAGE(0)) ? -1UL : page->index);
if (ZERO_PAGE(0) != page)
- page_cache_release(page);
+ put_page(page);
return;
}
diff --git a/fs/nfs/pagelist.c b/fs/nfs/pagelist.c
index 8ce4f61cb..1f6db4231 100644
--- a/fs/nfs/pagelist.c
+++ b/fs/nfs/pagelist.c
@@ -342,7 +342,7 @@ nfs_create_request(struct nfs_open_context *ctx, struct page *page,
* update_nfs_request below if the region is not locked. */
req->wb_page = page;
req->wb_index = page_file_index(page);
- page_cache_get(page);
+ get_page(page);
req->wb_offset = offset;
req->wb_pgbase = offset;
req->wb_bytes = count;
@@ -392,7 +392,7 @@ static void nfs_clear_request(struct nfs_page *req)
struct nfs_lock_context *l_ctx = req->wb_lock_context;
if (page != NULL) {
- page_cache_release(page);
+ put_page(page);
req->wb_page = NULL;
}
if (l_ctx != NULL) {
@@ -904,7 +904,7 @@ static bool nfs_can_coalesce_requests(struct nfs_page *prev,
return false;
} else {
if (req->wb_pgbase != 0 ||
- prev->wb_pgbase + prev->wb_bytes != PAGE_CACHE_SIZE)
+ prev->wb_pgbase + prev->wb_bytes != PAGE_SIZE)
return false;
}
}
diff --git a/fs/nfs/pnfs.c b/fs/nfs/pnfs.c
index 2fa483e6d..89a5ef4df 100644
--- a/fs/nfs/pnfs.c
+++ b/fs/nfs/pnfs.c
@@ -841,7 +841,7 @@ send_layoutget(struct pnfs_layout_hdr *lo,
i_size = i_size_read(ino);
- lgp->args.minlength = PAGE_CACHE_SIZE;
+ lgp->args.minlength = PAGE_SIZE;
if (lgp->args.minlength > range->length)
lgp->args.minlength = range->length;
if (range->iomode == IOMODE_READ) {
@@ -1618,13 +1618,13 @@ lookup_again:
spin_unlock(&clp->cl_lock);
}
- pg_offset = arg.offset & ~PAGE_CACHE_MASK;
+ pg_offset = arg.offset & ~PAGE_MASK;
if (pg_offset) {
arg.offset -= pg_offset;
arg.length += pg_offset;
}
if (arg.length != NFS4_MAX_UINT64)
- arg.length = PAGE_CACHE_ALIGN(arg.length);
+ arg.length = PAGE_ALIGN(arg.length);
lseg = send_layoutget(lo, ctx, &arg, gfp_flags);
atomic_dec(&lo->plh_outstanding);
diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c
index 81ac6480f..776dccbc3 100644
--- a/fs/nfs/pnfs_nfs.c
+++ b/fs/nfs/pnfs_nfs.c
@@ -246,6 +246,23 @@ void pnfs_fetch_commit_bucket_list(struct list_head *pages,
}
+/* Helper function for pnfs_generic_commit_pagelist to catch an empty
+ * page list. This can happen when two commits race. */
+static bool
+pnfs_generic_commit_cancel_empty_pagelist(struct list_head *pages,
+ struct nfs_commit_data *data,
+ struct nfs_commit_info *cinfo)
+{
+ if (list_empty(pages)) {
+ if (atomic_dec_and_test(&cinfo->mds->rpcs_out))
+ wake_up_atomic_t(&cinfo->mds->rpcs_out);
+ nfs_commitdata_release(data);
+ return true;
+ }
+
+ return false;
+}
+
/* This follows nfs_commit_list pretty closely */
int
pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
@@ -280,6 +297,11 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
list_for_each_entry_safe(data, tmp, &list, pages) {
list_del_init(&data->pages);
if (data->ds_commit_index < 0) {
+ /* another commit raced with us */
+ if (pnfs_generic_commit_cancel_empty_pagelist(mds_pages,
+ data, cinfo))
+ continue;
+
nfs_init_commit(data, mds_pages, NULL, cinfo);
nfs_initiate_commit(NFS_CLIENT(inode), data,
NFS_PROTO(data->inode),
@@ -288,6 +310,12 @@ pnfs_generic_commit_pagelist(struct inode *inode, struct list_head *mds_pages,
LIST_HEAD(pages);
pnfs_fetch_commit_bucket_list(&pages, data, cinfo);
+
+ /* another commit raced with us */
+ if (pnfs_generic_commit_cancel_empty_pagelist(&pages,
+ data, cinfo))
+ continue;
+
nfs_init_commit(data, &pages, data->lseg, cinfo);
initiate_commit(data, how);
}
@@ -606,12 +634,22 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv,
dprintk("%s: DS %s: trying address %s\n",
__func__, ds->ds_remotestr, da->da_remotestr);
- clp = get_v3_ds_connect(mds_srv->nfs_client,
+ if (!IS_ERR(clp)) {
+ struct xprt_create xprt_args = {
+ .ident = XPRT_TRANSPORT_TCP,
+ .net = clp->cl_net,
+ .dstaddr = (struct sockaddr *)&da->da_addr,
+ .addrlen = da->da_addrlen,
+ .servername = clp->cl_hostname,
+ };
+ /* Add this address as an alias */
+ rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args,
+ rpc_clnt_test_and_add_xprt, NULL);
+ } else
+ clp = get_v3_ds_connect(mds_srv->nfs_client,
(struct sockaddr *)&da->da_addr,
da->da_addrlen, IPPROTO_TCP,
timeo, retrans, au_flavor);
- if (!IS_ERR(clp))
- break;
}
if (IS_ERR(clp)) {
diff --git a/fs/nfs/read.c b/fs/nfs/read.c
index eb31e23e7..6776d7a78 100644
--- a/fs/nfs/read.c
+++ b/fs/nfs/read.c
@@ -46,7 +46,7 @@ static void nfs_readhdr_free(struct nfs_pgio_header *rhdr)
static
int nfs_return_empty_page(struct page *page)
{
- zero_user(page, 0, PAGE_CACHE_SIZE);
+ zero_user(page, 0, PAGE_SIZE);
SetPageUptodate(page);
unlock_page(page);
return 0;
@@ -118,8 +118,8 @@ int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
unlock_page(page);
return PTR_ERR(new);
}
- if (len < PAGE_CACHE_SIZE)
- zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ if (len < PAGE_SIZE)
+ zero_user_segment(page, len, PAGE_SIZE);
nfs_pageio_init_read(&pgio, inode, false,
&nfs_async_read_completion_ops);
@@ -295,7 +295,7 @@ int nfs_readpage(struct file *file, struct page *page)
int error;
dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
- page, PAGE_CACHE_SIZE, page_file_index(page));
+ page, PAGE_SIZE, page_file_index(page));
nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
nfs_add_stats(inode, NFSIOS_READPAGES, 1);
@@ -361,8 +361,8 @@ readpage_async_filler(void *data, struct page *page)
if (IS_ERR(new))
goto out_error;
- if (len < PAGE_CACHE_SIZE)
- zero_user_segment(page, len, PAGE_CACHE_SIZE);
+ if (len < PAGE_SIZE)
+ zero_user_segment(page, len, PAGE_SIZE);
if (!nfs_pageio_add_request(desc->pgio, new)) {
nfs_list_remove_request(new);
nfs_readpage_release(new);
@@ -424,8 +424,8 @@ int nfs_readpages(struct file *filp, struct address_space *mapping,
pgm = &pgio.pg_mirrors[0];
NFS_I(inode)->read_io += pgm->pg_bytes_written;
- npages = (pgm->pg_bytes_written + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
+ npages = (pgm->pg_bytes_written + PAGE_SIZE - 1) >>
+ PAGE_SHIFT;
nfs_add_stats(inode, NFSIOS_READPAGES, npages);
read_complete:
put_nfs_open_context(desc.ctx);
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5754835a2..f5e613395 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -150,7 +150,7 @@ static void nfs_grow_file(struct page *page, unsigned int offset, unsigned int c
spin_lock(&inode->i_lock);
i_size = i_size_read(inode);
- end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+ end_index = (i_size - 1) >> PAGE_SHIFT;
if (i_size > 0 && page_file_index(page) < end_index)
goto out;
end = page_file_offset(page) + ((loff_t)offset+count);
@@ -1709,6 +1709,10 @@ nfs_commit_list(struct inode *inode, struct list_head *head, int how,
{
struct nfs_commit_data *data;
+ /* another commit raced with us */
+ if (list_empty(head))
+ return 0;
+
data = nfs_commitdata_alloc();
if (!data)
@@ -1942,7 +1946,7 @@ int nfs_wb_page_cancel(struct inode *inode, struct page *page)
int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder)
{
loff_t range_start = page_file_offset(page);
- loff_t range_end = range_start + (loff_t)(PAGE_CACHE_SIZE - 1);
+ loff_t range_end = range_start + (loff_t)(PAGE_SIZE - 1);
struct writeback_control wbc = {
.sync_mode = WB_SYNC_ALL,
.nr_to_write = 0,
diff --git a/fs/nfsd/Kconfig b/fs/nfsd/Kconfig
index a0b77fc1b..c9f583d7b 100644
--- a/fs/nfsd/Kconfig
+++ b/fs/nfsd/Kconfig
@@ -84,12 +84,30 @@ config NFSD_V4
If unsure, say N.
config NFSD_PNFS
- bool "NFSv4.1 server support for Parallel NFS (pNFS)"
- depends on NFSD_V4
+ bool
+
+config NFSD_BLOCKLAYOUT
+ bool "NFSv4.1 server support for pNFS block layouts"
+ depends on NFSD_V4 && BLOCK
+ select NFSD_PNFS
+ help
+ This option enables support for the exporting pNFS block layouts
+ in the kernel's NFS server. The pNFS block layout enables NFS
+ clients to directly perform I/O to block devices accesible to both
+ the server and the clients. See RFC 5663 for more details.
+
+ If unsure, say N.
+
+config NFSD_SCSILAYOUT
+ bool "NFSv4.1 server support for pNFS SCSI layouts"
+ depends on NFSD_V4 && BLOCK
+ select NFSD_PNFS
help
- This option enables support for the parallel NFS features of the
- minor version 1 of the NFSv4 protocol (RFC5661) in the kernel's NFS
- server.
+ This option enables support for the exporting pNFS SCSI layouts
+ in the kernel's NFS server. The pNFS SCSI layout enables NFS
+ clients to directly perform I/O to SCSI devices accesible to both
+ the server and the clients. See draft-ietf-nfsv4-scsi-layout for
+ more details.
If unsure, say N.
diff --git a/fs/nfsd/Makefile b/fs/nfsd/Makefile
index 9a6028e12..3ae5f3c77 100644
--- a/fs/nfsd/Makefile
+++ b/fs/nfsd/Makefile
@@ -17,4 +17,6 @@ nfsd-$(CONFIG_NFSD_V3) += nfs3proc.o nfs3xdr.o
nfsd-$(CONFIG_NFSD_V3_ACL) += nfs3acl.o
nfsd-$(CONFIG_NFSD_V4) += nfs4proc.o nfs4xdr.o nfs4state.o nfs4idmap.o \
nfs4acl.o nfs4callback.o nfs4recover.o
-nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_PNFS) += nfs4layouts.o
+nfsd-$(CONFIG_NFSD_BLOCKLAYOUT) += blocklayout.o blocklayoutxdr.o
+nfsd-$(CONFIG_NFSD_SCSILAYOUT) += blocklayout.o blocklayoutxdr.o
diff --git a/fs/nfsd/blocklayout.c b/fs/nfsd/blocklayout.c
index c29d9421b..e55b52426 100644
--- a/fs/nfsd/blocklayout.c
+++ b/fs/nfsd/blocklayout.c
@@ -1,11 +1,14 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/exportfs.h>
#include <linux/genhd.h>
#include <linux/slab.h>
+#include <linux/pr.h>
#include <linux/nfsd/debug.h>
+#include <scsi/scsi_proto.h>
+#include <scsi/scsi_common.h>
#include "blocklayoutxdr.h"
#include "pnfs.h"
@@ -13,37 +16,6 @@
#define NFSDDBG_FACILITY NFSDDBG_PNFS
-static int
-nfsd4_block_get_device_info_simple(struct super_block *sb,
- struct nfsd4_getdeviceinfo *gdp)
-{
- struct pnfs_block_deviceaddr *dev;
- struct pnfs_block_volume *b;
-
- dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
- sizeof(struct pnfs_block_volume), GFP_KERNEL);
- if (!dev)
- return -ENOMEM;
- gdp->gd_device = dev;
-
- dev->nr_volumes = 1;
- b = &dev->volumes[0];
-
- b->type = PNFS_BLOCK_VOLUME_SIMPLE;
- b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
- return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
- &b->simple.offset);
-}
-
-static __be32
-nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
- struct nfsd4_getdeviceinfo *gdp)
-{
- if (sb->s_bdev != sb->s_bdev->bd_contains)
- return nfserr_inval;
- return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
-}
-
static __be32
nfsd4_block_proc_layoutget(struct inode *inode, const struct svc_fh *fhp,
struct nfsd4_layoutget *args)
@@ -141,20 +113,13 @@ out_layoutunavailable:
}
static __be32
-nfsd4_block_proc_layoutcommit(struct inode *inode,
- struct nfsd4_layoutcommit *lcp)
+nfsd4_block_commit_blocks(struct inode *inode, struct nfsd4_layoutcommit *lcp,
+ struct iomap *iomaps, int nr_iomaps)
{
loff_t new_size = lcp->lc_last_wr + 1;
struct iattr iattr = { .ia_valid = 0 };
- struct iomap *iomaps;
- int nr_iomaps;
int error;
- nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
- lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
- if (nr_iomaps < 0)
- return nfserrno(nr_iomaps);
-
if (lcp->lc_mtime.tv_nsec == UTIME_NOW ||
timespec_compare(&lcp->lc_mtime, &inode->i_mtime) < 0)
lcp->lc_mtime = current_fs_time(inode->i_sb);
@@ -172,6 +137,54 @@ nfsd4_block_proc_layoutcommit(struct inode *inode,
return nfserrno(error);
}
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
+static int
+nfsd4_block_get_device_info_simple(struct super_block *sb,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ struct pnfs_block_deviceaddr *dev;
+ struct pnfs_block_volume *b;
+
+ dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+ sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ gdp->gd_device = dev;
+
+ dev->nr_volumes = 1;
+ b = &dev->volumes[0];
+
+ b->type = PNFS_BLOCK_VOLUME_SIMPLE;
+ b->simple.sig_len = PNFS_BLOCK_UUID_LEN;
+ return sb->s_export_op->get_uuid(sb, b->simple.sig, &b->simple.sig_len,
+ &b->simple.offset);
+}
+
+static __be32
+nfsd4_block_proc_getdeviceinfo(struct super_block *sb,
+ struct nfs4_client *clp,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ if (sb->s_bdev != sb->s_bdev->bd_contains)
+ return nfserr_inval;
+ return nfserrno(nfsd4_block_get_device_info_simple(sb, gdp));
+}
+
+static __be32
+nfsd4_block_proc_layoutcommit(struct inode *inode,
+ struct nfsd4_layoutcommit *lcp)
+{
+ struct iomap *iomaps;
+ int nr_iomaps;
+
+ nr_iomaps = nfsd4_block_decode_layoutupdate(lcp->lc_up_layout,
+ lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ if (nr_iomaps < 0)
+ return nfserrno(nr_iomaps);
+
+ return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
const struct nfsd4_layout_ops bl_layout_ops = {
/*
* Pretend that we send notification to the client. This is a blatant
@@ -190,3 +203,206 @@ const struct nfsd4_layout_ops bl_layout_ops = {
.encode_layoutget = nfsd4_block_encode_layoutget,
.proc_layoutcommit = nfsd4_block_proc_layoutcommit,
};
+#endif /* CONFIG_NFSD_BLOCKLAYOUT */
+
+#ifdef CONFIG_NFSD_SCSILAYOUT
+static int nfsd4_scsi_identify_device(struct block_device *bdev,
+ struct pnfs_block_volume *b)
+{
+ struct request_queue *q = bdev->bd_disk->queue;
+ struct request *rq;
+ size_t bufflen = 252, len, id_len;
+ u8 *buf, *d, type, assoc;
+ int error;
+
+ buf = kzalloc(bufflen, GFP_KERNEL);
+ if (!buf)
+ return -ENOMEM;
+
+ rq = blk_get_request(q, READ, GFP_KERNEL);
+ if (IS_ERR(rq)) {
+ error = -ENOMEM;
+ goto out_free_buf;
+ }
+ blk_rq_set_block_pc(rq);
+
+ error = blk_rq_map_kern(q, rq, buf, bufflen, GFP_KERNEL);
+ if (error)
+ goto out_put_request;
+
+ rq->cmd[0] = INQUIRY;
+ rq->cmd[1] = 1;
+ rq->cmd[2] = 0x83;
+ rq->cmd[3] = bufflen >> 8;
+ rq->cmd[4] = bufflen & 0xff;
+ rq->cmd_len = COMMAND_SIZE(INQUIRY);
+
+ error = blk_execute_rq(rq->q, NULL, rq, 1);
+ if (error) {
+ pr_err("pNFS: INQUIRY 0x83 failed with: %x\n",
+ rq->errors);
+ goto out_put_request;
+ }
+
+ len = (buf[2] << 8) + buf[3] + 4;
+ if (len > bufflen) {
+ pr_err("pNFS: INQUIRY 0x83 response invalid (len = %zd)\n",
+ len);
+ goto out_put_request;
+ }
+
+ d = buf + 4;
+ for (d = buf + 4; d < buf + len; d += id_len + 4) {
+ id_len = d[3];
+ type = d[1] & 0xf;
+ assoc = (d[1] >> 4) & 0x3;
+
+ /*
+ * We only care about a EUI-64 and NAA designator types
+ * with LU association.
+ */
+ if (assoc != 0x00)
+ continue;
+ if (type != 0x02 && type != 0x03)
+ continue;
+ if (id_len != 8 && id_len != 12 && id_len != 16)
+ continue;
+
+ b->scsi.code_set = PS_CODE_SET_BINARY;
+ b->scsi.designator_type = type == 0x02 ?
+ PS_DESIGNATOR_EUI64 : PS_DESIGNATOR_NAA;
+ b->scsi.designator_len = id_len;
+ memcpy(b->scsi.designator, d + 4, id_len);
+
+ /*
+ * If we found a 8 or 12 byte descriptor continue on to
+ * see if a 16 byte one is available. If we find a
+ * 16 byte descriptor we're done.
+ */
+ if (id_len == 16)
+ break;
+ }
+
+out_put_request:
+ blk_put_request(rq);
+out_free_buf:
+ kfree(buf);
+ return error;
+}
+
+#define NFSD_MDS_PR_KEY 0x0100000000000000
+
+/*
+ * We use the client ID as a unique key for the reservations.
+ * This allows us to easily fence a client when recalls fail.
+ */
+static u64 nfsd4_scsi_pr_key(struct nfs4_client *clp)
+{
+ return ((u64)clp->cl_clientid.cl_boot << 32) | clp->cl_clientid.cl_id;
+}
+
+static int
+nfsd4_block_get_device_info_scsi(struct super_block *sb,
+ struct nfs4_client *clp,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ struct pnfs_block_deviceaddr *dev;
+ struct pnfs_block_volume *b;
+ const struct pr_ops *ops;
+ int error;
+
+ dev = kzalloc(sizeof(struct pnfs_block_deviceaddr) +
+ sizeof(struct pnfs_block_volume), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ gdp->gd_device = dev;
+
+ dev->nr_volumes = 1;
+ b = &dev->volumes[0];
+
+ b->type = PNFS_BLOCK_VOLUME_SCSI;
+ b->scsi.pr_key = nfsd4_scsi_pr_key(clp);
+
+ error = nfsd4_scsi_identify_device(sb->s_bdev, b);
+ if (error)
+ return error;
+
+ ops = sb->s_bdev->bd_disk->fops->pr_ops;
+ if (!ops) {
+ pr_err("pNFS: device %s does not support PRs.\n",
+ sb->s_id);
+ return -EINVAL;
+ }
+
+ error = ops->pr_register(sb->s_bdev, 0, NFSD_MDS_PR_KEY, true);
+ if (error) {
+ pr_err("pNFS: failed to register key for device %s.\n",
+ sb->s_id);
+ return -EINVAL;
+ }
+
+ error = ops->pr_reserve(sb->s_bdev, NFSD_MDS_PR_KEY,
+ PR_EXCLUSIVE_ACCESS_REG_ONLY, 0);
+ if (error) {
+ pr_err("pNFS: failed to reserve device %s.\n",
+ sb->s_id);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+static __be32
+nfsd4_scsi_proc_getdeviceinfo(struct super_block *sb,
+ struct nfs4_client *clp,
+ struct nfsd4_getdeviceinfo *gdp)
+{
+ if (sb->s_bdev != sb->s_bdev->bd_contains)
+ return nfserr_inval;
+ return nfserrno(nfsd4_block_get_device_info_scsi(sb, clp, gdp));
+}
+static __be32
+nfsd4_scsi_proc_layoutcommit(struct inode *inode,
+ struct nfsd4_layoutcommit *lcp)
+{
+ struct iomap *iomaps;
+ int nr_iomaps;
+
+ nr_iomaps = nfsd4_scsi_decode_layoutupdate(lcp->lc_up_layout,
+ lcp->lc_up_len, &iomaps, 1 << inode->i_blkbits);
+ if (nr_iomaps < 0)
+ return nfserrno(nr_iomaps);
+
+ return nfsd4_block_commit_blocks(inode, lcp, iomaps, nr_iomaps);
+}
+
+static void
+nfsd4_scsi_fence_client(struct nfs4_layout_stateid *ls)
+{
+ struct nfs4_client *clp = ls->ls_stid.sc_client;
+ struct block_device *bdev = ls->ls_file->f_path.mnt->mnt_sb->s_bdev;
+
+ bdev->bd_disk->fops->pr_ops->pr_preempt(bdev, NFSD_MDS_PR_KEY,
+ nfsd4_scsi_pr_key(clp), 0, true);
+}
+
+const struct nfsd4_layout_ops scsi_layout_ops = {
+ /*
+ * Pretend that we send notification to the client. This is a blatant
+ * lie to force recent Linux clients to cache our device IDs.
+ * We rarely ever change the device ID, so the harm of leaking deviceids
+ * for a while isn't too bad. Unfortunately RFC5661 is a complete mess
+ * in this regard, but I filed errata 4119 for this a while ago, and
+ * hopefully the Linux client will eventually start caching deviceids
+ * without this again.
+ */
+ .notify_types =
+ NOTIFY_DEVICEID4_DELETE | NOTIFY_DEVICEID4_CHANGE,
+ .proc_getdeviceinfo = nfsd4_scsi_proc_getdeviceinfo,
+ .encode_getdeviceinfo = nfsd4_block_encode_getdeviceinfo,
+ .proc_layoutget = nfsd4_block_proc_layoutget,
+ .encode_layoutget = nfsd4_block_encode_layoutget,
+ .proc_layoutcommit = nfsd4_scsi_proc_layoutcommit,
+ .fence_client = nfsd4_scsi_fence_client,
+};
+#endif /* CONFIG_NFSD_SCSILAYOUT */
diff --git a/fs/nfsd/blocklayoutxdr.c b/fs/nfsd/blocklayoutxdr.c
index 6d834dc9b..6c3b316f9 100644
--- a/fs/nfsd/blocklayoutxdr.c
+++ b/fs/nfsd/blocklayoutxdr.c
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2014 Christoph Hellwig.
+ * Copyright (c) 2014-2016 Christoph Hellwig.
*/
#include <linux/sunrpc/svc.h>
#include <linux/exportfs.h>
@@ -53,6 +53,18 @@ nfsd4_block_encode_volume(struct xdr_stream *xdr, struct pnfs_block_volume *b)
p = xdr_encode_hyper(p, b->simple.offset);
p = xdr_encode_opaque(p, b->simple.sig, b->simple.sig_len);
break;
+ case PNFS_BLOCK_VOLUME_SCSI:
+ len = 4 + 4 + 4 + 4 + b->scsi.designator_len + 8;
+ p = xdr_reserve_space(xdr, len);
+ if (!p)
+ return -ETOOSMALL;
+
+ *p++ = cpu_to_be32(b->type);
+ *p++ = cpu_to_be32(b->scsi.code_set);
+ *p++ = cpu_to_be32(b->scsi.designator_type);
+ p = xdr_encode_opaque(p, b->scsi.designator, b->scsi.designator_len);
+ p = xdr_encode_hyper(p, b->scsi.pr_key);
+ break;
default:
return -ENOTSUPP;
}
@@ -93,18 +105,22 @@ nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
u32 block_size)
{
struct iomap *iomaps;
- u32 nr_iomaps, expected, i;
+ u32 nr_iomaps, i;
if (len < sizeof(u32)) {
dprintk("%s: extent array too small: %u\n", __func__, len);
return -EINVAL;
}
+ len -= sizeof(u32);
+ if (len % PNFS_BLOCK_EXTENT_SIZE) {
+ dprintk("%s: extent array invalid: %u\n", __func__, len);
+ return -EINVAL;
+ }
nr_iomaps = be32_to_cpup(p++);
- expected = sizeof(__be32) + nr_iomaps * PNFS_BLOCK_EXTENT_SIZE;
- if (len != expected) {
+ if (nr_iomaps != len / PNFS_BLOCK_EXTENT_SIZE) {
dprintk("%s: extent array size mismatch: %u/%u\n",
- __func__, len, expected);
+ __func__, len, nr_iomaps);
return -EINVAL;
}
@@ -155,3 +171,54 @@ fail:
kfree(iomaps);
return -EINVAL;
}
+
+int
+nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+ u32 block_size)
+{
+ struct iomap *iomaps;
+ u32 nr_iomaps, expected, i;
+
+ if (len < sizeof(u32)) {
+ dprintk("%s: extent array too small: %u\n", __func__, len);
+ return -EINVAL;
+ }
+
+ nr_iomaps = be32_to_cpup(p++);
+ expected = sizeof(__be32) + nr_iomaps * PNFS_SCSI_RANGE_SIZE;
+ if (len != expected) {
+ dprintk("%s: extent array size mismatch: %u/%u\n",
+ __func__, len, expected);
+ return -EINVAL;
+ }
+
+ iomaps = kcalloc(nr_iomaps, sizeof(*iomaps), GFP_KERNEL);
+ if (!iomaps) {
+ dprintk("%s: failed to allocate extent array\n", __func__);
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < nr_iomaps; i++) {
+ u64 val;
+
+ p = xdr_decode_hyper(p, &val);
+ if (val & (block_size - 1)) {
+ dprintk("%s: unaligned offset 0x%llx\n", __func__, val);
+ goto fail;
+ }
+ iomaps[i].offset = val;
+
+ p = xdr_decode_hyper(p, &val);
+ if (val & (block_size - 1)) {
+ dprintk("%s: unaligned length 0x%llx\n", __func__, val);
+ goto fail;
+ }
+ iomaps[i].length = val;
+ }
+
+ *iomapp = iomaps;
+ return nr_iomaps;
+fail:
+ kfree(iomaps);
+ return -EINVAL;
+}
diff --git a/fs/nfsd/blocklayoutxdr.h b/fs/nfsd/blocklayoutxdr.h
index 6de925fe8..397bc7563 100644
--- a/fs/nfsd/blocklayoutxdr.h
+++ b/fs/nfsd/blocklayoutxdr.h
@@ -15,6 +15,11 @@ struct pnfs_block_extent {
enum pnfs_block_extent_state es;
};
+struct pnfs_block_range {
+ u64 foff;
+ u64 len;
+};
+
/*
* Random upper cap for the uuid length to avoid unbounded allocation.
* Not actually limited by the protocol.
@@ -29,6 +34,13 @@ struct pnfs_block_volume {
u32 sig_len;
u8 sig[PNFS_BLOCK_UUID_LEN];
} simple;
+ struct {
+ enum scsi_code_set code_set;
+ enum scsi_designator_type designator_type;
+ int designator_len;
+ u8 designator[256];
+ u64 pr_key;
+ } scsi;
};
};
@@ -43,5 +55,7 @@ __be32 nfsd4_block_encode_layoutget(struct xdr_stream *xdr,
struct nfsd4_layoutget *lgp);
int nfsd4_block_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
u32 block_size);
+int nfsd4_scsi_decode_layoutupdate(__be32 *p, u32 len, struct iomap **iomapp,
+ u32 block_size);
#endif /* _NFSD_BLOCKLAYOUTXDR_H */
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 7b755b7f7..51c3b06e8 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -147,6 +147,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
{
__be32 nfserr;
u32 max_blocksize = svc_max_payload(rqstp);
+ unsigned long cnt = min(argp->count, max_blocksize);
dprintk("nfsd: READ(3) %s %lu bytes at %Lu\n",
SVCFH_fmt(&argp->fh),
@@ -157,7 +158,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
* 1 (status) + 22 (post_op_attr) + 1 (count) + 1 (eof)
* + 1 (xdr opaque byte count) = 26
*/
- resp->count = min(argp->count, max_blocksize);
+ resp->count = cnt;
svc_reserve_auth(rqstp, ((1 + NFS3_POST_OP_ATTR_WORDS + 3)<<2) + resp->count +4);
fh_copy(&resp->fh, &argp->fh);
@@ -167,8 +168,8 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
&resp->count);
if (nfserr == 0) {
struct inode *inode = d_inode(resp->fh.fh_dentry);
-
- resp->eof = (argp->offset + resp->count) >= inode->i_size;
+ resp->eof = nfsd_eof_on_read(cnt, resp->count, argp->offset,
+ inode->i_size);
}
RETURN_STATUS(nfserr);
diff --git a/fs/nfsd/nfs4layouts.c b/fs/nfsd/nfs4layouts.c
index ce2d010d3..825c7bc8d 100644
--- a/fs/nfsd/nfs4layouts.c
+++ b/fs/nfsd/nfs4layouts.c
@@ -1,6 +1,7 @@
/*
* Copyright (c) 2014 Christoph Hellwig.
*/
+#include <linux/blkdev.h>
#include <linux/kmod.h>
#include <linux/file.h>
#include <linux/jhash.h>
@@ -26,7 +27,12 @@ static const struct nfsd4_callback_ops nfsd4_cb_layout_ops;
static const struct lock_manager_operations nfsd4_layouts_lm_ops;
const struct nfsd4_layout_ops *nfsd4_layout_ops[LAYOUT_TYPE_MAX] = {
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
[LAYOUT_BLOCK_VOLUME] = &bl_layout_ops,
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+ [LAYOUT_SCSI] = &scsi_layout_ops,
+#endif
};
/* pNFS device ID to export fsid mapping */
@@ -121,10 +127,24 @@ void nfsd4_setup_layout_type(struct svc_export *exp)
if (!(exp->ex_flags & NFSEXP_PNFS))
return;
+ /*
+ * Check if the file system supports exporting a block-like layout.
+ * If the block device supports reservations prefer the SCSI layout,
+ * otherwise advertise the block layout.
+ */
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
if (sb->s_export_op->get_uuid &&
sb->s_export_op->map_blocks &&
sb->s_export_op->commit_blocks)
exp->ex_layout_type = LAYOUT_BLOCK_VOLUME;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+ /* overwrite block layout selection if needed */
+ if (sb->s_export_op->map_blocks &&
+ sb->s_export_op->commit_blocks &&
+ sb->s_bdev && sb->s_bdev->bd_disk->fops->pr_ops)
+ exp->ex_layout_type = LAYOUT_SCSI;
+#endif
}
static void
@@ -590,8 +610,6 @@ nfsd4_cb_layout_fail(struct nfs4_layout_stateid *ls)
rpc_ntop((struct sockaddr *)&clp->cl_addr, addr_str, sizeof(addr_str));
- trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
-
printk(KERN_WARNING
"nfsd: client %s failed to respond to layout recall. "
" Fencing..\n", addr_str);
@@ -626,6 +644,7 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
container_of(cb, struct nfs4_layout_stateid, ls_recall);
struct nfsd_net *nn;
ktime_t now, cutoff;
+ const struct nfsd4_layout_ops *ops;
LIST_HEAD(reaplist);
@@ -661,7 +680,13 @@ nfsd4_cb_layout_done(struct nfsd4_callback *cb, struct rpc_task *task)
/*
* Unknown error or non-responding client, we'll need to fence.
*/
- nfsd4_cb_layout_fail(ls);
+ trace_layout_recall_fail(&ls->ls_stid.sc_stateid);
+
+ ops = nfsd4_layout_ops[ls->ls_layout_type];
+ if (ops->fence_client)
+ ops->fence_client(ls);
+ else
+ nfsd4_cb_layout_fail(ls);
return -1;
}
}
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index f8082c7cd..de1ff1d98 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -864,12 +864,10 @@ static __be32
nfsd4_secinfo(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
struct nfsd4_secinfo *secinfo)
{
- struct svc_fh resfh;
struct svc_export *exp;
struct dentry *dentry;
__be32 err;
- fh_init(&resfh, NFS4_FHSIZE);
err = fh_verify(rqstp, &cstate->current_fh, S_IFDIR, NFSD_MAY_EXEC);
if (err)
return err;
@@ -1270,8 +1268,10 @@ nfsd4_getdeviceinfo(struct svc_rqst *rqstp,
goto out;
nfserr = nfs_ok;
- if (gdp->gd_maxcount != 0)
- nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb, gdp);
+ if (gdp->gd_maxcount != 0) {
+ nfserr = ops->proc_getdeviceinfo(exp->ex_path.mnt->mnt_sb,
+ cstate->session->se_client, gdp);
+ }
gdp->gd_notify_types &= ops->notify_types;
out:
diff --git a/fs/nfsd/nfs4recover.c b/fs/nfsd/nfs4recover.c
index dc8ebecf5..66eaeb1e8 100644
--- a/fs/nfsd/nfs4recover.c
+++ b/fs/nfsd/nfs4recover.c
@@ -32,10 +32,10 @@
*
*/
+#include <crypto/hash.h>
#include <linux/file.h>
#include <linux/slab.h>
#include <linux/namei.h>
-#include <linux/crypto.h>
#include <linux/sched.h>
#include <linux/fs.h>
#include <linux/module.h>
@@ -104,29 +104,35 @@ static int
nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
{
struct xdr_netobj cksum;
- struct hash_desc desc;
- struct scatterlist sg;
+ struct crypto_shash *tfm;
int status;
dprintk("NFSD: nfs4_make_rec_clidname for %.*s\n",
clname->len, clname->data);
- desc.flags = CRYPTO_TFM_REQ_MAY_SLEEP;
- desc.tfm = crypto_alloc_hash("md5", 0, CRYPTO_ALG_ASYNC);
- if (IS_ERR(desc.tfm)) {
- status = PTR_ERR(desc.tfm);
+ tfm = crypto_alloc_shash("md5", 0, 0);
+ if (IS_ERR(tfm)) {
+ status = PTR_ERR(tfm);
goto out_no_tfm;
}
- cksum.len = crypto_hash_digestsize(desc.tfm);
+ cksum.len = crypto_shash_digestsize(tfm);
cksum.data = kmalloc(cksum.len, GFP_KERNEL);
if (cksum.data == NULL) {
status = -ENOMEM;
goto out;
}
- sg_init_one(&sg, clname->data, clname->len);
+ {
+ SHASH_DESC_ON_STACK(desc, tfm);
+
+ desc->tfm = tfm;
+ desc->flags = CRYPTO_TFM_REQ_MAY_SLEEP;
+
+ status = crypto_shash_digest(desc, clname->data, clname->len,
+ cksum.data);
+ shash_desc_zero(desc);
+ }
- status = crypto_hash_digest(&desc, &sg, sg.length, cksum.data);
if (status)
goto out;
@@ -135,7 +141,7 @@ nfs4_make_rec_clidname(char *dname, const struct xdr_netobj *clname)
status = 0;
out:
kfree(cksum.data);
- crypto_free_hash(desc.tfm);
+ crypto_free_shash(tfm);
out_no_tfm:
return status;
}
@@ -1260,6 +1266,7 @@ nfsd4_umh_cltrack_init(struct net *net)
/* XXX: The usermode helper s not working in container yet. */
if (net != &init_net) {
pr_warn("NFSD: attempt to initialize umh client tracking in a container ignored.\n");
+ kfree(grace_start);
return -EINVAL;
}
diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
index c484a2b6c..0462eeddf 100644
--- a/fs/nfsd/nfs4state.c
+++ b/fs/nfsd/nfs4state.c
@@ -2408,7 +2408,8 @@ nfsd4_exchange_id(struct svc_rqst *rqstp,
default: /* checked by xdr code */
WARN_ON_ONCE(1);
case SP4_SSV:
- return nfserr_encr_alg_unsupp;
+ status = nfserr_encr_alg_unsupp;
+ goto out_nolock;
}
/* Cases below refer to rfc 5661 section 18.35.4: */
@@ -2586,21 +2587,26 @@ static __be32 check_forechannel_attrs(struct nfsd4_channel_attrs *ca, struct nfs
return nfs_ok;
}
+/*
+ * Server's NFSv4.1 backchannel support is AUTH_SYS-only for now.
+ * These are based on similar macros in linux/sunrpc/msg_prot.h .
+ */
+#define RPC_MAX_HEADER_WITH_AUTH_SYS \
+ (RPC_CALLHDRSIZE + 2 * (2 + UNX_CALLSLACK))
+
+#define RPC_MAX_REPHEADER_WITH_AUTH_SYS \
+ (RPC_REPHDRSIZE + (2 + NUL_REPLYSLACK))
+
#define NFSD_CB_MAX_REQ_SZ ((NFS4_enc_cb_recall_sz + \
- RPC_MAX_HEADER_WITH_AUTH) * sizeof(__be32))
+ RPC_MAX_HEADER_WITH_AUTH_SYS) * sizeof(__be32))
#define NFSD_CB_MAX_RESP_SZ ((NFS4_dec_cb_recall_sz + \
- RPC_MAX_REPHEADER_WITH_AUTH) * sizeof(__be32))
+ RPC_MAX_REPHEADER_WITH_AUTH_SYS) * \
+ sizeof(__be32))
static __be32 check_backchannel_attrs(struct nfsd4_channel_attrs *ca)
{
ca->headerpadsz = 0;
- /*
- * These RPC_MAX_HEADER macros are overkill, especially since we
- * don't even do gss on the backchannel yet. But this is still
- * less than 1k. Tighten up this estimate in the unlikely event
- * it turns out to be a problem for some client:
- */
if (ca->maxreq_sz < NFSD_CB_MAX_REQ_SZ)
return nfserr_toosmall;
if (ca->maxresp_sz < NFSD_CB_MAX_RESP_SZ)
@@ -2710,10 +2716,9 @@ nfsd4_create_session(struct svc_rqst *rqstp,
goto out_free_conn;
}
status = nfs_ok;
- /*
- * We do not support RDMA or persistent sessions
- */
+ /* Persistent sessions are not supported */
cr_ses->flags &= ~SESSION4_PERSIST;
+ /* Upshifting from TCP to RDMA is not supported */
cr_ses->flags &= ~SESSION4_RDMA;
init_session(rqstp, new, conf, cr_ses);
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 1600ec470..9df898ba6 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -3063,7 +3063,7 @@ static __be32 nfsd4_encode_bind_conn_to_session(struct nfsd4_compoundres *resp,
p = xdr_encode_opaque_fixed(p, bcts->sessionid.data,
NFS4_MAX_SESSIONID_LEN);
*p++ = cpu_to_be32(bcts->dir);
- /* Sorry, we do not yet support RDMA over 4.1: */
+ /* Upshifting from TCP to RDMA is not supported */
*p++ = cpu_to_be32(0);
}
return nfserr;
@@ -3365,6 +3365,7 @@ static __be32 nfsd4_encode_splice_read(
struct xdr_stream *xdr = &resp->xdr;
struct xdr_buf *buf = xdr->buf;
u32 eof;
+ long len;
int space_left;
__be32 nfserr;
__be32 *p = xdr->p - 2;
@@ -3373,6 +3374,7 @@ static __be32 nfsd4_encode_splice_read(
if (xdr->end - xdr->p < 1)
return nfserr_resource;
+ len = maxcount;
nfserr = nfsd_splice_read(read->rd_rqstp, file,
read->rd_offset, &maxcount);
if (nfserr) {
@@ -3385,8 +3387,8 @@ static __be32 nfsd4_encode_splice_read(
return nfserr;
}
- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+ d_inode(read->rd_fhp->fh_dentry)->i_size);
*(p++) = htonl(eof);
*(p++) = htonl(maxcount);
@@ -3456,14 +3458,15 @@ static __be32 nfsd4_encode_readv(struct nfsd4_compoundres *resp,
}
read->rd_vlen = v;
+ len = maxcount;
nfserr = nfsd_readv(file, read->rd_offset, resp->rqstp->rq_vec,
read->rd_vlen, &maxcount);
if (nfserr)
return nfserr;
xdr_truncate_encode(xdr, starting_len + 8 + ((maxcount+3)&~3));
- eof = (read->rd_offset + maxcount >=
- d_inode(read->rd_fhp->fh_dentry)->i_size);
+ eof = nfsd_eof_on_read(len, maxcount, read->rd_offset,
+ d_inode(read->rd_fhp->fh_dentry)->i_size);
tmp = htonl(eof);
write_bytes_to_xdr_buf(xdr->buf, starting_len , &tmp, 4);
diff --git a/fs/nfsd/pnfs.h b/fs/nfsd/pnfs.h
index d4c445367..7d073b9b1 100644
--- a/fs/nfsd/pnfs.h
+++ b/fs/nfsd/pnfs.h
@@ -21,6 +21,7 @@ struct nfsd4_layout_ops {
u32 notify_types;
__be32 (*proc_getdeviceinfo)(struct super_block *sb,
+ struct nfs4_client *clp,
struct nfsd4_getdeviceinfo *gdevp);
__be32 (*encode_getdeviceinfo)(struct xdr_stream *xdr,
struct nfsd4_getdeviceinfo *gdevp);
@@ -32,10 +33,17 @@ struct nfsd4_layout_ops {
__be32 (*proc_layoutcommit)(struct inode *inode,
struct nfsd4_layoutcommit *lcp);
+
+ void (*fence_client)(struct nfs4_layout_stateid *ls);
};
extern const struct nfsd4_layout_ops *nfsd4_layout_ops[];
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
extern const struct nfsd4_layout_ops bl_layout_ops;
+#endif
+#ifdef CONFIG_NFSD_SCSILAYOUT
+extern const struct nfsd4_layout_ops scsi_layout_ops;
+#endif
__be32 nfsd4_preprocess_layout_stateid(struct svc_rqst *rqstp,
struct nfsd4_compound_state *cstate, stateid_t *stateid,
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index 5d2a57e4c..d40010e4f 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -870,7 +870,7 @@ __be32 nfsd_readv(struct file *file, loff_t offset, struct kvec *vec, int vlen,
oldfs = get_fs();
set_fs(KERNEL_DS);
- host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset);
+ host_err = vfs_readv(file, (struct iovec __user *)vec, vlen, &offset, 0);
set_fs(oldfs);
return nfsd_finish_read(file, count, host_err);
}
@@ -957,7 +957,7 @@ nfsd_vfs_write(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
/* Write the data. */
oldfs = get_fs(); set_fs(KERNEL_DS);
- host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos);
+ host_err = vfs_writev(file, (struct iovec __user *)vec, vlen, &pos, 0);
set_fs(oldfs);
if (host_err < 0)
goto out_nfserr;
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index c11ba316f..2d573ec05 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -139,4 +139,23 @@ static inline int nfsd_create_is_exclusive(int createmode)
|| createmode == NFS4_CREATE_EXCLUSIVE4_1;
}
+static inline bool nfsd_eof_on_read(long requested, long read,
+ loff_t offset, loff_t size)
+{
+ /* We assume a short read means eof: */
+ if (requested > read)
+ return true;
+ /*
+ * A non-short read might also reach end of file. The spec
+ * still requires us to set eof in that case.
+ *
+ * Further operations may have modified the file size since
+ * the read, so the following check is not atomic with the read.
+ * We've only seen that cause a problem for a client in the case
+ * where the read returned a count of 0 without setting eof.
+ * That case was fixed by the addition of the above check.
+ */
+ return (offset + read >= size);
+}
+
#endif /* LINUX_NFSD_VFS_H */
diff --git a/fs/nilfs2/bmap.c b/fs/nilfs2/bmap.c
index 27f75bcbe..a9fb3636c 100644
--- a/fs/nilfs2/bmap.c
+++ b/fs/nilfs2/bmap.c
@@ -458,7 +458,7 @@ __u64 nilfs_bmap_data_get_key(const struct nilfs_bmap *bmap,
struct buffer_head *pbh;
__u64 key;
- key = page_index(bh->b_page) << (PAGE_CACHE_SHIFT -
+ key = page_index(bh->b_page) << (PAGE_SHIFT -
bmap->b_inode->i_blkbits);
for (pbh = page_buffers(bh->b_page); pbh != bh; pbh = pbh->b_this_page)
key++;
diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c
index a35ae35e6..e0c9daf9a 100644
--- a/fs/nilfs2/btnode.c
+++ b/fs/nilfs2/btnode.c
@@ -62,7 +62,7 @@ nilfs_btnode_create_block(struct address_space *btnc, __u64 blocknr)
set_buffer_uptodate(bh);
unlock_page(bh->b_page);
- page_cache_release(bh->b_page);
+ put_page(bh->b_page);
return bh;
}
@@ -128,7 +128,7 @@ found:
out_locked:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return err;
}
@@ -146,7 +146,7 @@ void nilfs_btnode_delete(struct buffer_head *bh)
pgoff_t index = page_index(page);
int still_dirty;
- page_cache_get(page);
+ get_page(page);
lock_page(page);
wait_on_page_writeback(page);
@@ -154,7 +154,7 @@ void nilfs_btnode_delete(struct buffer_head *bh)
still_dirty = PageDirty(page);
mapping = page->mapping;
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (!still_dirty && mapping)
invalidate_inode_pages2_range(mapping, index, index);
@@ -181,7 +181,7 @@ int nilfs_btnode_prepare_change_key(struct address_space *btnc,
obh = ctxt->bh;
ctxt->newbh = NULL;
- if (inode->i_blkbits == PAGE_CACHE_SHIFT) {
+ if (inode->i_blkbits == PAGE_SHIFT) {
lock_page(obh->b_page);
/*
* We cannot call radix_tree_preload for the kernels older
diff --git a/fs/nilfs2/dir.c b/fs/nilfs2/dir.c
index 6b8b92b19..e08f064e4 100644
--- a/fs/nilfs2/dir.c
+++ b/fs/nilfs2/dir.c
@@ -58,7 +58,7 @@ static inline unsigned nilfs_chunk_size(struct inode *inode)
static inline void nilfs_put_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
/*
@@ -69,9 +69,9 @@ static unsigned nilfs_last_byte(struct inode *inode, unsigned long page_nr)
{
unsigned last_byte = inode->i_size;
- last_byte -= page_nr << PAGE_CACHE_SHIFT;
- if (last_byte > PAGE_CACHE_SIZE)
- last_byte = PAGE_CACHE_SIZE;
+ last_byte -= page_nr << PAGE_SHIFT;
+ if (last_byte > PAGE_SIZE)
+ last_byte = PAGE_SIZE;
return last_byte;
}
@@ -109,12 +109,12 @@ static void nilfs_check_page(struct page *page)
unsigned chunk_size = nilfs_chunk_size(dir);
char *kaddr = page_address(page);
unsigned offs, rec_len;
- unsigned limit = PAGE_CACHE_SIZE;
+ unsigned limit = PAGE_SIZE;
struct nilfs_dir_entry *p;
char *error;
- if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
- limit = dir->i_size & ~PAGE_CACHE_MASK;
+ if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+ limit = dir->i_size & ~PAGE_MASK;
if (limit & (chunk_size - 1))
goto Ebadsize;
if (!limit)
@@ -161,7 +161,7 @@ Espan:
bad_entry:
nilfs_error(sb, "nilfs_check_page", "bad entry in directory #%lu: %s - "
"offset=%lu, inode=%lu, rec_len=%d, name_len=%d",
- dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
(unsigned long) le64_to_cpu(p->inode),
rec_len, p->name_len);
goto fail;
@@ -170,7 +170,7 @@ Eend:
nilfs_error(sb, "nilfs_check_page",
"entry in directory #%lu spans the page boundary"
"offset=%lu, inode=%lu",
- dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ dir->i_ino, (page->index<<PAGE_SHIFT)+offs,
(unsigned long) le64_to_cpu(p->inode));
fail:
SetPageChecked(page);
@@ -256,8 +256,8 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx)
loff_t pos = ctx->pos;
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
- unsigned int offset = pos & ~PAGE_CACHE_MASK;
- unsigned long n = pos >> PAGE_CACHE_SHIFT;
+ unsigned int offset = pos & ~PAGE_MASK;
+ unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
/* unsigned chunk_mask = ~(nilfs_chunk_size(inode)-1); */
@@ -272,7 +272,7 @@ static int nilfs_readdir(struct file *file, struct dir_context *ctx)
if (IS_ERR(page)) {
nilfs_error(sb, __func__, "bad page in #%lu",
inode->i_ino);
- ctx->pos += PAGE_CACHE_SIZE - offset;
+ ctx->pos += PAGE_SIZE - offset;
return -EIO;
}
kaddr = page_address(page);
@@ -361,7 +361,7 @@ nilfs_find_entry(struct inode *dir, const struct qstr *qstr,
if (++n >= npages)
n = 0;
/* next page is past the blocks we've got */
- if (unlikely(n > (dir->i_blocks >> (PAGE_CACHE_SHIFT - 9)))) {
+ if (unlikely(n > (dir->i_blocks >> (PAGE_SHIFT - 9)))) {
nilfs_error(dir->i_sb, __func__,
"dir %lu size %lld exceeds block count %llu",
dir->i_ino, dir->i_size,
@@ -401,7 +401,7 @@ ino_t nilfs_inode_by_name(struct inode *dir, const struct qstr *qstr)
if (de) {
res = le64_to_cpu(de->inode);
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
return res;
}
@@ -460,7 +460,7 @@ int nilfs_add_link(struct dentry *dentry, struct inode *inode)
kaddr = page_address(page);
dir_end = kaddr + nilfs_last_byte(dir, n);
de = (struct nilfs_dir_entry *)kaddr;
- kaddr += PAGE_CACHE_SIZE - reclen;
+ kaddr += PAGE_SIZE - reclen;
while ((char *)de <= kaddr) {
if ((char *)de == dir_end) {
/* We hit i_size */
@@ -603,7 +603,7 @@ int nilfs_make_empty(struct inode *inode, struct inode *parent)
kunmap_atomic(kaddr);
nilfs_commit_chunk(page, mapping, 0, chunk_size);
fail:
- page_cache_release(page);
+ put_page(page);
return err;
}
diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c
index 748ca2389..0224b7826 100644
--- a/fs/nilfs2/gcinode.c
+++ b/fs/nilfs2/gcinode.c
@@ -115,7 +115,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff,
failed:
unlock_page(bh->b_page);
- page_cache_release(bh->b_page);
+ put_page(bh->b_page);
return err;
}
diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
index 21a1e2e0d..534631358 100644
--- a/fs/nilfs2/inode.c
+++ b/fs/nilfs2/inode.c
@@ -249,7 +249,7 @@ static int nilfs_set_page_dirty(struct page *page)
if (nr_dirty)
nilfs_set_file_dirty(inode, nr_dirty);
} else if (ret) {
- unsigned nr_dirty = 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ unsigned nr_dirty = 1 << (PAGE_SHIFT - inode->i_blkbits);
nilfs_set_file_dirty(inode, nr_dirty);
}
@@ -291,7 +291,7 @@ static int nilfs_write_end(struct file *file, struct address_space *mapping,
struct page *page, void *fsdata)
{
struct inode *inode = mapping->host;
- unsigned start = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned start = pos & (PAGE_SIZE - 1);
unsigned nr_dirty;
int err;
diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
index 1125f4023..f6982b915 100644
--- a/fs/nilfs2/mdt.c
+++ b/fs/nilfs2/mdt.c
@@ -110,7 +110,7 @@ static int nilfs_mdt_create_block(struct inode *inode, unsigned long block,
failed_bh:
unlock_page(bh->b_page);
- page_cache_release(bh->b_page);
+ put_page(bh->b_page);
brelse(bh);
failed_unlock:
@@ -170,7 +170,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff,
failed_bh:
unlock_page(bh->b_page);
- page_cache_release(bh->b_page);
+ put_page(bh->b_page);
brelse(bh);
failed:
return ret;
@@ -363,7 +363,7 @@ int nilfs_mdt_delete_block(struct inode *inode, unsigned long block)
int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
{
pgoff_t index = (pgoff_t)block >>
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ (PAGE_SHIFT - inode->i_blkbits);
struct page *page;
unsigned long first_block;
int ret = 0;
@@ -376,7 +376,7 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
wait_on_page_writeback(page);
first_block = (unsigned long)index <<
- (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ (PAGE_SHIFT - inode->i_blkbits);
if (page_has_buffers(page)) {
struct buffer_head *bh;
@@ -385,7 +385,7 @@ int nilfs_mdt_forget_block(struct inode *inode, unsigned long block)
}
still_dirty = PageDirty(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (still_dirty ||
invalidate_inode_pages2_range(inode->i_mapping, index, index) != 0)
@@ -578,7 +578,7 @@ int nilfs_mdt_freeze_buffer(struct inode *inode, struct buffer_head *bh)
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return 0;
}
@@ -597,7 +597,7 @@ nilfs_mdt_get_frozen_buffer(struct inode *inode, struct buffer_head *bh)
bh_frozen = nilfs_page_get_nth_block(page, n);
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
return bh_frozen;
}
diff --git a/fs/nilfs2/namei.c b/fs/nilfs2/namei.c
index 7ccdb961e..151bc19d4 100644
--- a/fs/nilfs2/namei.c
+++ b/fs/nilfs2/namei.c
@@ -431,11 +431,11 @@ static int nilfs_rename(struct inode *old_dir, struct dentry *old_dentry,
out_dir:
if (dir_de) {
kunmap(dir_page);
- page_cache_release(dir_page);
+ put_page(dir_page);
}
out_old:
kunmap(old_page);
- page_cache_release(old_page);
+ put_page(old_page);
out:
nilfs_transaction_abort(old_dir->i_sb);
return err;
diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
index 45d650add..489391561 100644
--- a/fs/nilfs2/page.c
+++ b/fs/nilfs2/page.c
@@ -50,7 +50,7 @@ __nilfs_get_page_block(struct page *page, unsigned long block, pgoff_t index,
if (!page_has_buffers(page))
create_empty_buffers(page, 1 << blkbits, b_state);
- first_block = (unsigned long)index << (PAGE_CACHE_SHIFT - blkbits);
+ first_block = (unsigned long)index << (PAGE_SHIFT - blkbits);
bh = nilfs_page_get_nth_block(page, block - first_block);
touch_buffer(bh);
@@ -64,7 +64,7 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
unsigned long b_state)
{
int blkbits = inode->i_blkbits;
- pgoff_t index = blkoff >> (PAGE_CACHE_SHIFT - blkbits);
+ pgoff_t index = blkoff >> (PAGE_SHIFT - blkbits);
struct page *page;
struct buffer_head *bh;
@@ -75,7 +75,7 @@ struct buffer_head *nilfs_grab_buffer(struct inode *inode,
bh = __nilfs_get_page_block(page, blkoff, index, blkbits, b_state);
if (unlikely(!bh)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return NULL;
}
return bh;
@@ -180,7 +180,7 @@ void nilfs_page_bug(struct page *page)
printk(KERN_CRIT "NILFS_PAGE_BUG(%p): cnt=%d index#=%llu flags=0x%lx "
"mapping=%p ino=%lu\n",
- page, atomic_read(&page->_count),
+ page, page_ref_count(page),
(unsigned long long)page->index, page->flags, m, ino);
if (page_has_buffers(page)) {
@@ -288,7 +288,7 @@ repeat:
__set_page_dirty_nobuffers(dpage);
unlock_page(dpage);
- page_cache_release(dpage);
+ put_page(dpage);
unlock_page(page);
}
pagevec_release(&pvec);
@@ -333,7 +333,7 @@ repeat:
WARN_ON(PageDirty(dpage));
nilfs_copy_page(dpage, page, 0);
unlock_page(dpage);
- page_cache_release(dpage);
+ put_page(dpage);
} else {
struct page *page2;
@@ -350,7 +350,7 @@ repeat:
if (unlikely(err < 0)) {
WARN_ON(err == -EEXIST);
page->mapping = NULL;
- page_cache_release(page); /* for cache */
+ put_page(page); /* for cache */
} else {
page->mapping = dmap;
dmap->nrpages++;
@@ -523,8 +523,8 @@ unsigned long nilfs_find_uncommitted_extent(struct inode *inode,
if (inode->i_mapping->nrpages == 0)
return 0;
- index = start_blk >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
- nblocks_in_page = 1U << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ index = start_blk >> (PAGE_SHIFT - inode->i_blkbits);
+ nblocks_in_page = 1U << (PAGE_SHIFT - inode->i_blkbits);
pagevec_init(&pvec, 0);
@@ -537,7 +537,7 @@ repeat:
if (length > 0 && pvec.pages[0]->index > index)
goto out;
- b = pvec.pages[0]->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ b = pvec.pages[0]->index << (PAGE_SHIFT - inode->i_blkbits);
i = 0;
do {
page = pvec.pages[i];
diff --git a/fs/nilfs2/recovery.c b/fs/nilfs2/recovery.c
index 9b4f205d1..5afa77fad 100644
--- a/fs/nilfs2/recovery.c
+++ b/fs/nilfs2/recovery.c
@@ -544,14 +544,14 @@ static int nilfs_recover_dsync_blocks(struct the_nilfs *nilfs,
blocksize, page, NULL);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
(*nr_salvaged_blocks)++;
goto next;
failed_page:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
failed_inode:
printk(KERN_WARNING
diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c
index 3b65adaae..4317f7256 100644
--- a/fs/nilfs2/segment.c
+++ b/fs/nilfs2/segment.c
@@ -2070,7 +2070,7 @@ static int nilfs_segctor_do_construct(struct nilfs_sc_info *sci, int mode)
goto failed_to_write;
if (nilfs_sc_cstage_get(sci) == NILFS_ST_DONE ||
- nilfs->ns_blocksize_bits != PAGE_CACHE_SHIFT) {
+ nilfs->ns_blocksize_bits != PAGE_SHIFT) {
/*
* At this point, we avoid double buffering
* for blocksize < pagesize because page dirty
diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c
index 7521e11db..97768a137 100644
--- a/fs/ntfs/aops.c
+++ b/fs/ntfs/aops.c
@@ -74,7 +74,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
set_buffer_uptodate(bh);
- file_ofs = ((s64)page->index << PAGE_CACHE_SHIFT) +
+ file_ofs = ((s64)page->index << PAGE_SHIFT) +
bh_offset(bh);
read_lock_irqsave(&ni->size_lock, flags);
init_size = ni->initialized_size;
@@ -142,7 +142,7 @@ static void ntfs_end_buffer_async_read(struct buffer_head *bh, int uptodate)
u32 rec_size;
rec_size = ni->itype.index.block_size;
- recs = PAGE_CACHE_SIZE / rec_size;
+ recs = PAGE_SIZE / rec_size;
/* Should have been verified before we got here... */
BUG_ON(!recs);
local_irq_save(flags);
@@ -229,7 +229,7 @@ static int ntfs_read_block(struct page *page)
* fully truncated, truncate will throw it away as soon as we unlock
* it so no need to worry what we do with it.
*/
- iblock = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+ iblock = (s64)page->index << (PAGE_SHIFT - blocksize_bits);
read_lock_irqsave(&ni->size_lock, flags);
lblock = (ni->allocated_size + blocksize - 1) >> blocksize_bits;
init_size = ni->initialized_size;
@@ -412,9 +412,9 @@ retry_readpage:
vi = page->mapping->host;
i_size = i_size_read(vi);
/* Is the page fully outside i_size? (truncate in progress) */
- if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT)) {
- zero_user(page, 0, PAGE_CACHE_SIZE);
+ if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >>
+ PAGE_SHIFT)) {
+ zero_user(page, 0, PAGE_SIZE);
ntfs_debug("Read outside i_size - truncated?");
goto done;
}
@@ -463,7 +463,7 @@ retry_readpage:
* ok to ignore the compressed flag here.
*/
if (unlikely(page->index > 0)) {
- zero_user(page, 0, PAGE_CACHE_SIZE);
+ zero_user(page, 0, PAGE_SIZE);
goto done;
}
if (!NInoAttr(ni))
@@ -509,7 +509,7 @@ retry_readpage:
le16_to_cpu(ctx->attr->data.resident.value_offset),
attr_len);
/* Zero the remainder of the page. */
- memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+ memset(addr + attr_len, 0, PAGE_SIZE - attr_len);
flush_dcache_page(page);
kunmap_atomic(addr);
put_unm_err_out:
@@ -599,7 +599,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
/* NOTE: Different naming scheme to ntfs_read_block()! */
/* The first block in the page. */
- block = (s64)page->index << (PAGE_CACHE_SHIFT - blocksize_bits);
+ block = (s64)page->index << (PAGE_SHIFT - blocksize_bits);
read_lock_irqsave(&ni->size_lock, flags);
i_size = i_size_read(vi);
@@ -674,7 +674,7 @@ static int ntfs_write_block(struct page *page, struct writeback_control *wbc)
// in the inode.
// Again, for each page do:
// __set_page_dirty_buffers();
- // page_cache_release()
+ // put_page()
// We don't need to wait on the writes.
// Update iblock.
}
@@ -925,7 +925,7 @@ static int ntfs_write_mst_block(struct page *page,
ntfs_volume *vol = ni->vol;
u8 *kaddr;
unsigned int rec_size = ni->itype.index.block_size;
- ntfs_inode *locked_nis[PAGE_CACHE_SIZE / rec_size];
+ ntfs_inode *locked_nis[PAGE_SIZE / rec_size];
struct buffer_head *bh, *head, *tbh, *rec_start_bh;
struct buffer_head *bhs[MAX_BUF_PER_PAGE];
runlist_element *rl;
@@ -949,7 +949,7 @@ static int ntfs_write_mst_block(struct page *page,
(NInoAttr(ni) && ni->type == AT_INDEX_ALLOCATION)));
bh_size = vol->sb->s_blocksize;
bh_size_bits = vol->sb->s_blocksize_bits;
- max_bhs = PAGE_CACHE_SIZE / bh_size;
+ max_bhs = PAGE_SIZE / bh_size;
BUG_ON(!max_bhs);
BUG_ON(max_bhs > MAX_BUF_PER_PAGE);
@@ -961,13 +961,13 @@ static int ntfs_write_mst_block(struct page *page,
BUG_ON(!bh);
rec_size_bits = ni->itype.index.block_size_bits;
- BUG_ON(!(PAGE_CACHE_SIZE >> rec_size_bits));
+ BUG_ON(!(PAGE_SIZE >> rec_size_bits));
bhs_per_rec = rec_size >> bh_size_bits;
BUG_ON(!bhs_per_rec);
/* The first block in the page. */
rec_block = block = (sector_t)page->index <<
- (PAGE_CACHE_SHIFT - bh_size_bits);
+ (PAGE_SHIFT - bh_size_bits);
/* The first out of bounds block for the data size. */
dblock = (i_size_read(vi) + bh_size - 1) >> bh_size_bits;
@@ -1133,7 +1133,7 @@ lock_retry_remap:
unsigned long mft_no;
/* Get the mft record number. */
- mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
+ mft_no = (((s64)page->index << PAGE_SHIFT) + ofs)
>> rec_size_bits;
/* Check whether to write this mft record. */
tni = NULL;
@@ -1249,7 +1249,7 @@ do_mirror:
continue;
ofs = bh_offset(tbh);
/* Get the mft record number. */
- mft_no = (((s64)page->index << PAGE_CACHE_SHIFT) + ofs)
+ mft_no = (((s64)page->index << PAGE_SHIFT) + ofs)
>> rec_size_bits;
if (mft_no < vol->mftmirr_size)
ntfs_sync_mft_mirror(vol, mft_no,
@@ -1300,7 +1300,7 @@ done:
* Set page error if there is only one ntfs record in the page.
* Otherwise we would loose per-record granularity.
*/
- if (ni->itype.index.block_size == PAGE_CACHE_SIZE)
+ if (ni->itype.index.block_size == PAGE_SIZE)
SetPageError(page);
NVolSetErrors(vol);
}
@@ -1308,7 +1308,7 @@ done:
ntfs_debug("Page still contains one or more dirty ntfs "
"records. Redirtying the page starting at "
"record 0x%lx.", page->index <<
- (PAGE_CACHE_SHIFT - rec_size_bits));
+ (PAGE_SHIFT - rec_size_bits));
redirty_page_for_writepage(wbc, page);
unlock_page(page);
} else {
@@ -1365,13 +1365,13 @@ retry_writepage:
BUG_ON(!PageLocked(page));
i_size = i_size_read(vi);
/* Is the page fully outside i_size? (truncate in progress) */
- if (unlikely(page->index >= (i_size + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT)) {
+ if (unlikely(page->index >= (i_size + PAGE_SIZE - 1) >>
+ PAGE_SHIFT)) {
/*
* The page may have dirty, unmapped buffers. Make them
* freeable here, so the page does not leak.
*/
- block_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ block_invalidatepage(page, 0, PAGE_SIZE);
unlock_page(page);
ntfs_debug("Write outside i_size - truncated?");
return 0;
@@ -1414,10 +1414,10 @@ retry_writepage:
/* NInoNonResident() == NInoIndexAllocPresent() */
if (NInoNonResident(ni)) {
/* We have to zero every time due to mmap-at-end-of-file. */
- if (page->index >= (i_size >> PAGE_CACHE_SHIFT)) {
+ if (page->index >= (i_size >> PAGE_SHIFT)) {
/* The page straddles i_size. */
- unsigned int ofs = i_size & ~PAGE_CACHE_MASK;
- zero_user_segment(page, ofs, PAGE_CACHE_SIZE);
+ unsigned int ofs = i_size & ~PAGE_MASK;
+ zero_user_segment(page, ofs, PAGE_SIZE);
}
/* Handle mst protected attributes. */
if (NInoMstProtected(ni))
@@ -1500,7 +1500,7 @@ retry_writepage:
le16_to_cpu(ctx->attr->data.resident.value_offset),
addr, attr_len);
/* Zero out of bounds area in the page cache page. */
- memset(addr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+ memset(addr + attr_len, 0, PAGE_SIZE - attr_len);
kunmap_atomic(addr);
flush_dcache_page(page);
flush_dcache_mft_record_page(ctx->ntfs_ino);
diff --git a/fs/ntfs/aops.h b/fs/ntfs/aops.h
index caecc58f5..820d6eabf 100644
--- a/fs/ntfs/aops.h
+++ b/fs/ntfs/aops.h
@@ -40,7 +40,7 @@
static inline void ntfs_unmap_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
/**
@@ -49,7 +49,7 @@ static inline void ntfs_unmap_page(struct page *page)
* @index: index into the page cache for @mapping of the page to map
*
* Read a page from the page cache of the address space @mapping at position
- * @index, where @index is in units of PAGE_CACHE_SIZE, and not in bytes.
+ * @index, where @index is in units of PAGE_SIZE, and not in bytes.
*
* If the page is not in memory it is loaded from disk first using the readpage
* method defined in the address space operations of @mapping and the page is
diff --git a/fs/ntfs/attrib.c b/fs/ntfs/attrib.c
index 250ed5b20..44a39a099 100644
--- a/fs/ntfs/attrib.c
+++ b/fs/ntfs/attrib.c
@@ -152,7 +152,7 @@ int ntfs_map_runlist_nolock(ntfs_inode *ni, VCN vcn, ntfs_attr_search_ctx *ctx)
if (old_ctx.base_ntfs_ino && old_ctx.ntfs_ino !=
old_ctx.base_ntfs_ino) {
put_this_page = old_ctx.ntfs_ino->page;
- page_cache_get(put_this_page);
+ get_page(put_this_page);
}
/*
* Reinitialize the search context so we can lookup the
@@ -275,7 +275,7 @@ retry_map:
* the pieces anyway.
*/
if (put_this_page)
- page_cache_release(put_this_page);
+ put_page(put_this_page);
}
return err;
}
@@ -1660,7 +1660,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
memcpy(kaddr, (u8*)a +
le16_to_cpu(a->data.resident.value_offset),
attr_size);
- memset(kaddr + attr_size, 0, PAGE_CACHE_SIZE - attr_size);
+ memset(kaddr + attr_size, 0, PAGE_SIZE - attr_size);
kunmap_atomic(kaddr);
flush_dcache_page(page);
SetPageUptodate(page);
@@ -1748,7 +1748,7 @@ int ntfs_attr_make_non_resident(ntfs_inode *ni, const u32 data_size)
if (page) {
set_page_dirty(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
ntfs_debug("Done.");
return 0;
@@ -1835,7 +1835,7 @@ rl_err_out:
ntfs_free(rl);
page_err_out:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
if (err == -EINVAL)
err = -EIO;
@@ -2513,17 +2513,17 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
BUG_ON(NInoEncrypted(ni));
mapping = VFS_I(ni)->i_mapping;
/* Work out the starting index and page offset. */
- idx = ofs >> PAGE_CACHE_SHIFT;
- start_ofs = ofs & ~PAGE_CACHE_MASK;
+ idx = ofs >> PAGE_SHIFT;
+ start_ofs = ofs & ~PAGE_MASK;
/* Work out the ending index and page offset. */
end = ofs + cnt;
- end_ofs = end & ~PAGE_CACHE_MASK;
+ end_ofs = end & ~PAGE_MASK;
/* If the end is outside the inode size return -ESPIPE. */
if (unlikely(end > i_size_read(VFS_I(ni)))) {
ntfs_error(vol->sb, "Request exceeds end of attribute.");
return -ESPIPE;
}
- end >>= PAGE_CACHE_SHIFT;
+ end >>= PAGE_SHIFT;
/* If there is a first partial page, need to do it the slow way. */
if (start_ofs) {
page = read_mapping_page(mapping, idx, NULL);
@@ -2536,7 +2536,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
* If the last page is the same as the first page, need to
* limit the write to the end offset.
*/
- size = PAGE_CACHE_SIZE;
+ size = PAGE_SIZE;
if (idx == end)
size = end_ofs;
kaddr = kmap_atomic(page);
@@ -2544,7 +2544,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
flush_dcache_page(page);
kunmap_atomic(kaddr);
set_page_dirty(page);
- page_cache_release(page);
+ put_page(page);
balance_dirty_pages_ratelimited(mapping);
cond_resched();
if (idx == end)
@@ -2561,7 +2561,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
return -ENOMEM;
}
kaddr = kmap_atomic(page);
- memset(kaddr, val, PAGE_CACHE_SIZE);
+ memset(kaddr, val, PAGE_SIZE);
flush_dcache_page(page);
kunmap_atomic(kaddr);
/*
@@ -2585,7 +2585,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
set_page_dirty(page);
/* Finally unlock and release the page. */
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
balance_dirty_pages_ratelimited(mapping);
cond_resched();
}
@@ -2602,7 +2602,7 @@ int ntfs_attr_set(ntfs_inode *ni, const s64 ofs, const s64 cnt, const u8 val)
flush_dcache_page(page);
kunmap_atomic(kaddr);
set_page_dirty(page);
- page_cache_release(page);
+ put_page(page);
balance_dirty_pages_ratelimited(mapping);
cond_resched();
}
diff --git a/fs/ntfs/bitmap.c b/fs/ntfs/bitmap.c
index 0809cf876..ec130c588 100644
--- a/fs/ntfs/bitmap.c
+++ b/fs/ntfs/bitmap.c
@@ -67,8 +67,8 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
* Calculate the indices for the pages containing the first and last
* bits, i.e. @start_bit and @start_bit + @cnt - 1, respectively.
*/
- index = start_bit >> (3 + PAGE_CACHE_SHIFT);
- end_index = (start_bit + cnt - 1) >> (3 + PAGE_CACHE_SHIFT);
+ index = start_bit >> (3 + PAGE_SHIFT);
+ end_index = (start_bit + cnt - 1) >> (3 + PAGE_SHIFT);
/* Get the page containing the first bit (@start_bit). */
mapping = vi->i_mapping;
@@ -82,7 +82,7 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
kaddr = page_address(page);
/* Set @pos to the position of the byte containing @start_bit. */
- pos = (start_bit >> 3) & ~PAGE_CACHE_MASK;
+ pos = (start_bit >> 3) & ~PAGE_MASK;
/* Calculate the position of @start_bit in the first byte. */
bit = start_bit & 7;
@@ -108,7 +108,7 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
* Depending on @value, modify all remaining whole bytes in the page up
* to @cnt.
*/
- len = min_t(s64, cnt >> 3, PAGE_CACHE_SIZE - pos);
+ len = min_t(s64, cnt >> 3, PAGE_SIZE - pos);
memset(kaddr + pos, value ? 0xff : 0, len);
cnt -= len << 3;
@@ -132,7 +132,7 @@ int __ntfs_bitmap_set_bits_in_run(struct inode *vi, const s64 start_bit,
* Depending on @value, modify all remaining whole bytes in the
* page up to @cnt.
*/
- len = min_t(s64, cnt >> 3, PAGE_CACHE_SIZE);
+ len = min_t(s64, cnt >> 3, PAGE_SIZE);
memset(kaddr, value ? 0xff : 0, len);
cnt -= len << 3;
}
diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c
index f82498c35..f2b5e746f 100644
--- a/fs/ntfs/compress.c
+++ b/fs/ntfs/compress.c
@@ -104,16 +104,12 @@ static void zero_partial_compressed_page(struct page *page,
unsigned int kp_ofs;
ntfs_debug("Zeroing page region outside initialized size.");
- if (((s64)page->index << PAGE_CACHE_SHIFT) >= initialized_size) {
- /*
- * FIXME: Using clear_page() will become wrong when we get
- * PAGE_CACHE_SIZE != PAGE_SIZE but for now there is no problem.
- */
+ if (((s64)page->index << PAGE_SHIFT) >= initialized_size) {
clear_page(kp);
return;
}
- kp_ofs = initialized_size & ~PAGE_CACHE_MASK;
- memset(kp + kp_ofs, 0, PAGE_CACHE_SIZE - kp_ofs);
+ kp_ofs = initialized_size & ~PAGE_MASK;
+ memset(kp + kp_ofs, 0, PAGE_SIZE - kp_ofs);
return;
}
@@ -123,7 +119,7 @@ static void zero_partial_compressed_page(struct page *page,
static inline void handle_bounds_compressed_page(struct page *page,
const loff_t i_size, const s64 initialized_size)
{
- if ((page->index >= (initialized_size >> PAGE_CACHE_SHIFT)) &&
+ if ((page->index >= (initialized_size >> PAGE_SHIFT)) &&
(initialized_size < i_size))
zero_partial_compressed_page(page, initialized_size);
return;
@@ -160,7 +156,7 @@ static inline void handle_bounds_compressed_page(struct page *page,
* @xpage_done indicates whether the target page (@dest_pages[@xpage]) was
* completed during the decompression of the compression block (@cb_start).
*
- * Warning: This function *REQUIRES* PAGE_CACHE_SIZE >= 4096 or it will blow up
+ * Warning: This function *REQUIRES* PAGE_SIZE >= 4096 or it will blow up
* unpredicatbly! You have been warned!
*
* Note to hackers: This function may not sleep until it has finished accessing
@@ -241,7 +237,7 @@ return_error:
if (di == xpage)
*xpage_done = 1;
else
- page_cache_release(dp);
+ put_page(dp);
dest_pages[di] = NULL;
}
}
@@ -274,7 +270,7 @@ return_error:
cb = cb_sb_end;
/* Advance destination position to next sub-block. */
- *dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_CACHE_MASK;
+ *dest_ofs = (*dest_ofs + NTFS_SB_SIZE) & ~PAGE_MASK;
if (!*dest_ofs && (++*dest_index > dest_max_index))
goto return_overflow;
goto do_next_sb;
@@ -301,7 +297,7 @@ return_error:
/* Advance destination position to next sub-block. */
*dest_ofs += NTFS_SB_SIZE;
- if (!(*dest_ofs &= ~PAGE_CACHE_MASK)) {
+ if (!(*dest_ofs &= ~PAGE_MASK)) {
finalize_page:
/*
* First stage: add current page index to array of
@@ -335,7 +331,7 @@ do_next_tag:
*dest_ofs += nr_bytes;
}
/* We have finished the current sub-block. */
- if (!(*dest_ofs &= ~PAGE_CACHE_MASK))
+ if (!(*dest_ofs &= ~PAGE_MASK))
goto finalize_page;
goto do_next_sb;
}
@@ -462,7 +458,7 @@ return_overflow:
* have been written to so that we would lose data if we were to just overwrite
* them with the out-of-date uncompressed data.
*
- * FIXME: For PAGE_CACHE_SIZE > cb_size we are not doing the Right Thing(TM) at
+ * FIXME: For PAGE_SIZE > cb_size we are not doing the Right Thing(TM) at
* the end of the file I think. We need to detect this case and zero the out
* of bounds remainder of the page in question and mark it as handled. At the
* moment we would just return -EIO on such a page. This bug will only become
@@ -470,7 +466,7 @@ return_overflow:
* clusters so is probably not going to be seen by anyone. Still this should
* be fixed. (AIA)
*
- * FIXME: Again for PAGE_CACHE_SIZE > cb_size we are screwing up both in
+ * FIXME: Again for PAGE_SIZE > cb_size we are screwing up both in
* handling sparse and compressed cbs. (AIA)
*
* FIXME: At the moment we don't do any zeroing out in the case that
@@ -497,14 +493,14 @@ int ntfs_read_compressed_block(struct page *page)
u64 cb_size_mask = cb_size - 1UL;
VCN vcn;
LCN lcn;
- /* The first wanted vcn (minimum alignment is PAGE_CACHE_SIZE). */
- VCN start_vcn = (((s64)index << PAGE_CACHE_SHIFT) & ~cb_size_mask) >>
+ /* The first wanted vcn (minimum alignment is PAGE_SIZE). */
+ VCN start_vcn = (((s64)index << PAGE_SHIFT) & ~cb_size_mask) >>
vol->cluster_size_bits;
/*
* The first vcn after the last wanted vcn (minimum alignment is again
- * PAGE_CACHE_SIZE.
+ * PAGE_SIZE.
*/
- VCN end_vcn = ((((s64)(index + 1UL) << PAGE_CACHE_SHIFT) + cb_size - 1)
+ VCN end_vcn = ((((s64)(index + 1UL) << PAGE_SHIFT) + cb_size - 1)
& ~cb_size_mask) >> vol->cluster_size_bits;
/* Number of compression blocks (cbs) in the wanted vcn range. */
unsigned int nr_cbs = (end_vcn - start_vcn) << vol->cluster_size_bits
@@ -515,7 +511,7 @@ int ntfs_read_compressed_block(struct page *page)
* guarantees of start_vcn and end_vcn, no need to round up here.
*/
unsigned int nr_pages = (end_vcn - start_vcn) <<
- vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
+ vol->cluster_size_bits >> PAGE_SHIFT;
unsigned int xpage, max_page, cur_page, cur_ofs, i;
unsigned int cb_clusters, cb_max_ofs;
int block, max_block, cb_max_page, bhs_size, nr_bhs, err = 0;
@@ -549,7 +545,7 @@ int ntfs_read_compressed_block(struct page *page)
* We have already been given one page, this is the one we must do.
* Once again, the alignment guarantees keep it simple.
*/
- offset = start_vcn << vol->cluster_size_bits >> PAGE_CACHE_SHIFT;
+ offset = start_vcn << vol->cluster_size_bits >> PAGE_SHIFT;
xpage = index - offset;
pages[xpage] = page;
/*
@@ -560,13 +556,13 @@ int ntfs_read_compressed_block(struct page *page)
i_size = i_size_read(VFS_I(ni));
initialized_size = ni->initialized_size;
read_unlock_irqrestore(&ni->size_lock, flags);
- max_page = ((i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT) -
+ max_page = ((i_size + PAGE_SIZE - 1) >> PAGE_SHIFT) -
offset;
/* Is the page fully outside i_size? (truncate in progress) */
if (xpage >= max_page) {
kfree(bhs);
kfree(pages);
- zero_user(page, 0, PAGE_CACHE_SIZE);
+ zero_user(page, 0, PAGE_SIZE);
ntfs_debug("Compressed read outside i_size - truncated?");
SetPageUptodate(page);
unlock_page(page);
@@ -591,7 +587,7 @@ int ntfs_read_compressed_block(struct page *page)
continue;
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
pages[i] = NULL;
}
}
@@ -735,9 +731,9 @@ lock_retry_remap:
ntfs_debug("Successfully read the compression block.");
/* The last page and maximum offset within it for the current cb. */
- cb_max_page = (cur_page << PAGE_CACHE_SHIFT) + cur_ofs + cb_size;
- cb_max_ofs = cb_max_page & ~PAGE_CACHE_MASK;
- cb_max_page >>= PAGE_CACHE_SHIFT;
+ cb_max_page = (cur_page << PAGE_SHIFT) + cur_ofs + cb_size;
+ cb_max_ofs = cb_max_page & ~PAGE_MASK;
+ cb_max_page >>= PAGE_SHIFT;
/* Catch end of file inside a compression block. */
if (cb_max_page > max_page)
@@ -753,16 +749,11 @@ lock_retry_remap:
for (; cur_page < cb_max_page; cur_page++) {
page = pages[cur_page];
if (page) {
- /*
- * FIXME: Using clear_page() will become wrong
- * when we get PAGE_CACHE_SIZE != PAGE_SIZE but
- * for now there is no problem.
- */
if (likely(!cur_ofs))
clear_page(page_address(page));
else
memset(page_address(page) + cur_ofs, 0,
- PAGE_CACHE_SIZE -
+ PAGE_SIZE -
cur_ofs);
flush_dcache_page(page);
kunmap(page);
@@ -771,10 +762,10 @@ lock_retry_remap:
if (cur_page == xpage)
xpage_done = 1;
else
- page_cache_release(page);
+ put_page(page);
pages[cur_page] = NULL;
}
- cb_pos += PAGE_CACHE_SIZE - cur_ofs;
+ cb_pos += PAGE_SIZE - cur_ofs;
cur_ofs = 0;
if (cb_pos >= cb_end)
break;
@@ -807,7 +798,7 @@ lock_retry_remap:
* synchronous io for the majority of pages.
* Or if we choose not to do the read-ahead/-behind stuff, we
* could just return block_read_full_page(pages[xpage]) as long
- * as PAGE_CACHE_SIZE <= cb_size.
+ * as PAGE_SIZE <= cb_size.
*/
if (cb_max_ofs)
cb_max_page--;
@@ -816,8 +807,8 @@ lock_retry_remap:
page = pages[cur_page];
if (page)
memcpy(page_address(page) + cur_ofs, cb_pos,
- PAGE_CACHE_SIZE - cur_ofs);
- cb_pos += PAGE_CACHE_SIZE - cur_ofs;
+ PAGE_SIZE - cur_ofs);
+ cb_pos += PAGE_SIZE - cur_ofs;
cur_ofs = 0;
if (cb_pos >= cb_end)
break;
@@ -850,10 +841,10 @@ lock_retry_remap:
if (cur2_page == xpage)
xpage_done = 1;
else
- page_cache_release(page);
+ put_page(page);
pages[cur2_page] = NULL;
}
- cb_pos2 += PAGE_CACHE_SIZE - cur_ofs2;
+ cb_pos2 += PAGE_SIZE - cur_ofs2;
cur_ofs2 = 0;
if (cb_pos2 >= cb_end)
break;
@@ -884,7 +875,7 @@ lock_retry_remap:
kunmap(page);
unlock_page(page);
if (prev_cur_page != xpage)
- page_cache_release(page);
+ put_page(page);
pages[prev_cur_page] = NULL;
}
}
@@ -914,7 +905,7 @@ lock_retry_remap:
kunmap(page);
unlock_page(page);
if (cur_page != xpage)
- page_cache_release(page);
+ put_page(page);
pages[cur_page] = NULL;
}
}
@@ -961,7 +952,7 @@ err_out:
kunmap(page);
unlock_page(page);
if (i != xpage)
- page_cache_release(page);
+ put_page(page);
}
}
kfree(pages);
diff --git a/fs/ntfs/dir.c b/fs/ntfs/dir.c
index b2eff5816..a18613579 100644
--- a/fs/ntfs/dir.c
+++ b/fs/ntfs/dir.c
@@ -315,11 +315,11 @@ found_it:
descend_into_child_node:
/*
* Convert vcn to index into the index allocation attribute in units
- * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+ * of PAGE_SIZE and map the page cache page, reading it from
* disk if necessary.
*/
page = ntfs_map_page(ia_mapping, vcn <<
- dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
+ dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
if (IS_ERR(page)) {
ntfs_error(sb, "Failed to map directory index page, error %ld.",
-PTR_ERR(page));
@@ -331,9 +331,9 @@ descend_into_child_node:
fast_descend_into_child_node:
/* Get to the index allocation block. */
ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
- dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
+ dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
/* Bounds checks. */
- if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+ if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
"inode 0x%lx or driver bug.", dir_ni->mft_no);
goto unm_err_out;
@@ -366,7 +366,7 @@ fast_descend_into_child_node:
goto unm_err_out;
}
index_end = (u8*)ia + dir_ni->itype.index.block_size;
- if (index_end > kaddr + PAGE_CACHE_SIZE) {
+ if (index_end > kaddr + PAGE_SIZE) {
ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
"0x%lx crosses page boundary. Impossible! "
"Cannot access! This is probably a bug in the "
@@ -559,9 +559,9 @@ found_it2:
/* If vcn is in the same page cache page as old_vcn we
* recycle the mapped page. */
if (old_vcn << vol->cluster_size_bits >>
- PAGE_CACHE_SHIFT == vcn <<
+ PAGE_SHIFT == vcn <<
vol->cluster_size_bits >>
- PAGE_CACHE_SHIFT)
+ PAGE_SHIFT)
goto fast_descend_into_child_node;
unlock_page(page);
ntfs_unmap_page(page);
@@ -793,11 +793,11 @@ found_it:
descend_into_child_node:
/*
* Convert vcn to index into the index allocation attribute in units
- * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+ * of PAGE_SIZE and map the page cache page, reading it from
* disk if necessary.
*/
page = ntfs_map_page(ia_mapping, vcn <<
- dir_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
+ dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
if (IS_ERR(page)) {
ntfs_error(sb, "Failed to map directory index page, error %ld.",
-PTR_ERR(page));
@@ -809,9 +809,9 @@ descend_into_child_node:
fast_descend_into_child_node:
/* Get to the index allocation block. */
ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
- dir_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
+ dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
/* Bounds checks. */
- if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+ if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
"inode 0x%lx or driver bug.", dir_ni->mft_no);
goto unm_err_out;
@@ -844,7 +844,7 @@ fast_descend_into_child_node:
goto unm_err_out;
}
index_end = (u8*)ia + dir_ni->itype.index.block_size;
- if (index_end > kaddr + PAGE_CACHE_SIZE) {
+ if (index_end > kaddr + PAGE_SIZE) {
ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
"0x%lx crosses page boundary. Impossible! "
"Cannot access! This is probably a bug in the "
@@ -968,9 +968,9 @@ found_it2:
/* If vcn is in the same page cache page as old_vcn we
* recycle the mapped page. */
if (old_vcn << vol->cluster_size_bits >>
- PAGE_CACHE_SHIFT == vcn <<
+ PAGE_SHIFT == vcn <<
vol->cluster_size_bits >>
- PAGE_CACHE_SHIFT)
+ PAGE_SHIFT)
goto fast_descend_into_child_node;
unlock_page(page);
ntfs_unmap_page(page);
@@ -1246,15 +1246,15 @@ skip_index_root:
goto iput_err_out;
}
/* Get the starting bit position in the current bitmap page. */
- cur_bmp_pos = bmp_pos & ((PAGE_CACHE_SIZE * 8) - 1);
- bmp_pos &= ~(u64)((PAGE_CACHE_SIZE * 8) - 1);
+ cur_bmp_pos = bmp_pos & ((PAGE_SIZE * 8) - 1);
+ bmp_pos &= ~(u64)((PAGE_SIZE * 8) - 1);
get_next_bmp_page:
ntfs_debug("Reading bitmap with page index 0x%llx, bit ofs 0x%llx",
- (unsigned long long)bmp_pos >> (3 + PAGE_CACHE_SHIFT),
+ (unsigned long long)bmp_pos >> (3 + PAGE_SHIFT),
(unsigned long long)bmp_pos &
- (unsigned long long)((PAGE_CACHE_SIZE * 8) - 1));
+ (unsigned long long)((PAGE_SIZE * 8) - 1));
bmp_page = ntfs_map_page(bmp_mapping,
- bmp_pos >> (3 + PAGE_CACHE_SHIFT));
+ bmp_pos >> (3 + PAGE_SHIFT));
if (IS_ERR(bmp_page)) {
ntfs_error(sb, "Reading index bitmap failed.");
err = PTR_ERR(bmp_page);
@@ -1270,9 +1270,9 @@ find_next_index_buffer:
* If we have reached the end of the bitmap page, get the next
* page, and put away the old one.
*/
- if (unlikely((cur_bmp_pos >> 3) >= PAGE_CACHE_SIZE)) {
+ if (unlikely((cur_bmp_pos >> 3) >= PAGE_SIZE)) {
ntfs_unmap_page(bmp_page);
- bmp_pos += PAGE_CACHE_SIZE * 8;
+ bmp_pos += PAGE_SIZE * 8;
cur_bmp_pos = 0;
goto get_next_bmp_page;
}
@@ -1285,8 +1285,8 @@ find_next_index_buffer:
ntfs_debug("Handling index buffer 0x%llx.",
(unsigned long long)bmp_pos + cur_bmp_pos);
/* If the current index buffer is in the same page we reuse the page. */
- if ((prev_ia_pos & (s64)PAGE_CACHE_MASK) !=
- (ia_pos & (s64)PAGE_CACHE_MASK)) {
+ if ((prev_ia_pos & (s64)PAGE_MASK) !=
+ (ia_pos & (s64)PAGE_MASK)) {
prev_ia_pos = ia_pos;
if (likely(ia_page != NULL)) {
unlock_page(ia_page);
@@ -1296,7 +1296,7 @@ find_next_index_buffer:
* Map the page cache page containing the current ia_pos,
* reading it from disk if necessary.
*/
- ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_CACHE_SHIFT);
+ ia_page = ntfs_map_page(ia_mapping, ia_pos >> PAGE_SHIFT);
if (IS_ERR(ia_page)) {
ntfs_error(sb, "Reading index allocation data failed.");
err = PTR_ERR(ia_page);
@@ -1307,10 +1307,10 @@ find_next_index_buffer:
kaddr = (u8*)page_address(ia_page);
}
/* Get the current index buffer. */
- ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_CACHE_MASK &
- ~(s64)(ndir->itype.index.block_size - 1)));
+ ia = (INDEX_ALLOCATION*)(kaddr + (ia_pos & ~PAGE_MASK &
+ ~(s64)(ndir->itype.index.block_size - 1)));
/* Bounds checks. */
- if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE)) {
+ if (unlikely((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE)) {
ntfs_error(sb, "Out of bounds check failed. Corrupt directory "
"inode 0x%lx or driver bug.", vdir->i_ino);
goto err_out;
@@ -1348,7 +1348,7 @@ find_next_index_buffer:
goto err_out;
}
index_end = (u8*)ia + ndir->itype.index.block_size;
- if (unlikely(index_end > kaddr + PAGE_CACHE_SIZE)) {
+ if (unlikely(index_end > kaddr + PAGE_SIZE)) {
ntfs_error(sb, "Index buffer (VCN 0x%llx) of directory inode "
"0x%lx crosses page boundary. Impossible! "
"Cannot access! This is probably a bug in the "
diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c
index bed4d427d..91117ada8 100644
--- a/fs/ntfs/file.c
+++ b/fs/ntfs/file.c
@@ -220,8 +220,8 @@ do_non_resident_extend:
m = NULL;
}
mapping = vi->i_mapping;
- index = old_init_size >> PAGE_CACHE_SHIFT;
- end_index = (new_init_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ index = old_init_size >> PAGE_SHIFT;
+ end_index = (new_init_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
do {
/*
* Read the page. If the page is not present, this will zero
@@ -233,7 +233,7 @@ do_non_resident_extend:
goto init_err_out;
}
if (unlikely(PageError(page))) {
- page_cache_release(page);
+ put_page(page);
err = -EIO;
goto init_err_out;
}
@@ -242,13 +242,13 @@ do_non_resident_extend:
* enough to make ntfs_writepage() work.
*/
write_lock_irqsave(&ni->size_lock, flags);
- ni->initialized_size = (s64)(index + 1) << PAGE_CACHE_SHIFT;
+ ni->initialized_size = (s64)(index + 1) << PAGE_SHIFT;
if (ni->initialized_size > new_init_size)
ni->initialized_size = new_init_size;
write_unlock_irqrestore(&ni->size_lock, flags);
/* Set the page dirty so it gets written out. */
set_page_dirty(page);
- page_cache_release(page);
+ put_page(page);
/*
* Play nice with the vm and the rest of the system. This is
* very much needed as we can potentially be modifying the
@@ -543,7 +543,7 @@ out:
err_out:
while (nr > 0) {
unlock_page(pages[--nr]);
- page_cache_release(pages[nr]);
+ put_page(pages[nr]);
}
goto out;
}
@@ -573,7 +573,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh)
* only partially being written to.
*
* If @nr_pages is greater than one, we are guaranteed that the cluster size is
- * greater than PAGE_CACHE_SIZE, that all pages in @pages are entirely inside
+ * greater than PAGE_SIZE, that all pages in @pages are entirely inside
* the same cluster and that they are the entirety of that cluster, and that
* the cluster is sparse, i.e. we need to allocate a cluster to fill the hole.
*
@@ -653,7 +653,7 @@ static int ntfs_prepare_pages_for_non_resident_write(struct page **pages,
u = 0;
do_next_page:
page = pages[u];
- bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+ bh_pos = (s64)page->index << PAGE_SHIFT;
bh = head = page_buffers(page);
do {
VCN cdelta;
@@ -810,11 +810,11 @@ map_buffer_cached:
kaddr = kmap_atomic(page);
if (bh_pos < pos) {
- pofs = bh_pos & ~PAGE_CACHE_MASK;
+ pofs = bh_pos & ~PAGE_MASK;
memset(kaddr + pofs, 0, pos - bh_pos);
}
if (bh_end > end) {
- pofs = end & ~PAGE_CACHE_MASK;
+ pofs = end & ~PAGE_MASK;
memset(kaddr + pofs, 0, bh_end - end);
}
kunmap_atomic(kaddr);
@@ -942,7 +942,7 @@ rl_not_mapped_enoent:
* unmapped. This can only happen when the cluster size is
* less than the page cache size.
*/
- if (unlikely(vol->cluster_size < PAGE_CACHE_SIZE)) {
+ if (unlikely(vol->cluster_size < PAGE_SIZE)) {
bh_cend = (bh_end + vol->cluster_size - 1) >>
vol->cluster_size_bits;
if ((bh_cend <= cpos || bh_cpos >= cend)) {
@@ -1208,7 +1208,7 @@ rl_not_mapped_enoent:
wait_on_buffer(bh);
if (likely(buffer_uptodate(bh))) {
page = bh->b_page;
- bh_pos = ((s64)page->index << PAGE_CACHE_SHIFT) +
+ bh_pos = ((s64)page->index << PAGE_SHIFT) +
bh_offset(bh);
/*
* If the buffer overflows the initialized size, need
@@ -1350,7 +1350,7 @@ rl_not_mapped_enoent:
bh = head = page_buffers(page);
do {
if (u == nr_pages &&
- ((s64)page->index << PAGE_CACHE_SHIFT) +
+ ((s64)page->index << PAGE_SHIFT) +
bh_offset(bh) >= end)
break;
if (!buffer_new(bh))
@@ -1422,7 +1422,7 @@ static inline int ntfs_commit_pages_after_non_resident_write(
bool partial;
page = pages[u];
- bh_pos = (s64)page->index << PAGE_CACHE_SHIFT;
+ bh_pos = (s64)page->index << PAGE_SHIFT;
bh = head = page_buffers(page);
partial = false;
do {
@@ -1639,7 +1639,7 @@ static int ntfs_commit_pages_after_write(struct page **pages,
if (end < attr_len)
memcpy(kaddr + end, kattr + end, attr_len - end);
/* Zero the region outside the end of the attribute value. */
- memset(kaddr + attr_len, 0, PAGE_CACHE_SIZE - attr_len);
+ memset(kaddr + attr_len, 0, PAGE_SIZE - attr_len);
flush_dcache_page(page);
SetPageUptodate(page);
}
@@ -1706,7 +1706,7 @@ static size_t ntfs_copy_from_user_iter(struct page **pages, unsigned nr_pages,
unsigned len, copied;
do {
- len = PAGE_CACHE_SIZE - ofs;
+ len = PAGE_SIZE - ofs;
if (len > bytes)
len = bytes;
copied = iov_iter_copy_from_user_atomic(*pages, &data, ofs,
@@ -1724,14 +1724,14 @@ out:
return total;
err:
/* Zero the rest of the target like __copy_from_user(). */
- len = PAGE_CACHE_SIZE - copied;
+ len = PAGE_SIZE - copied;
do {
if (len > bytes)
len = bytes;
zero_user(*pages, copied, len);
bytes -= len;
copied = 0;
- len = PAGE_CACHE_SIZE;
+ len = PAGE_SIZE;
} while (++pages < last_page);
goto out;
}
@@ -1787,8 +1787,8 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
* attributes.
*/
nr_pages = 1;
- if (vol->cluster_size > PAGE_CACHE_SIZE && NInoNonResident(ni))
- nr_pages = vol->cluster_size >> PAGE_CACHE_SHIFT;
+ if (vol->cluster_size > PAGE_SIZE && NInoNonResident(ni))
+ nr_pages = vol->cluster_size >> PAGE_SHIFT;
last_vcn = -1;
do {
VCN vcn;
@@ -1796,9 +1796,9 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
unsigned ofs, do_pages, u;
size_t copied;
- start_idx = idx = pos >> PAGE_CACHE_SHIFT;
- ofs = pos & ~PAGE_CACHE_MASK;
- bytes = PAGE_CACHE_SIZE - ofs;
+ start_idx = idx = pos >> PAGE_SHIFT;
+ ofs = pos & ~PAGE_MASK;
+ bytes = PAGE_SIZE - ofs;
do_pages = 1;
if (nr_pages > 1) {
vcn = pos >> vol->cluster_size_bits;
@@ -1832,7 +1832,7 @@ static ssize_t ntfs_perform_write(struct file *file, struct iov_iter *i,
if (lcn == LCN_HOLE) {
start_idx = (pos & ~(s64)
vol->cluster_size_mask)
- >> PAGE_CACHE_SHIFT;
+ >> PAGE_SHIFT;
bytes = vol->cluster_size - (pos &
vol->cluster_size_mask);
do_pages = nr_pages;
@@ -1871,12 +1871,12 @@ again:
if (unlikely(status)) {
do {
unlock_page(pages[--do_pages]);
- page_cache_release(pages[do_pages]);
+ put_page(pages[do_pages]);
} while (do_pages);
break;
}
}
- u = (pos >> PAGE_CACHE_SHIFT) - pages[0]->index;
+ u = (pos >> PAGE_SHIFT) - pages[0]->index;
copied = ntfs_copy_from_user_iter(pages + u, do_pages - u, ofs,
i, bytes);
ntfs_flush_dcache_pages(pages + u, do_pages - u);
@@ -1889,7 +1889,7 @@ again:
}
do {
unlock_page(pages[--do_pages]);
- page_cache_release(pages[do_pages]);
+ put_page(pages[do_pages]);
} while (do_pages);
if (unlikely(status < 0))
break;
@@ -1921,7 +1921,7 @@ again:
}
} while (iov_iter_count(i));
if (cached_page)
- page_cache_release(cached_page);
+ put_page(cached_page);
ntfs_debug("Done. Returning %s (written 0x%lx, status %li).",
written ? "written" : "status", (unsigned long)written,
(long)status);
diff --git a/fs/ntfs/index.c b/fs/ntfs/index.c
index 096c13569..0d645f357 100644
--- a/fs/ntfs/index.c
+++ b/fs/ntfs/index.c
@@ -272,11 +272,11 @@ done:
descend_into_child_node:
/*
* Convert vcn to index into the index allocation attribute in units
- * of PAGE_CACHE_SIZE and map the page cache page, reading it from
+ * of PAGE_SIZE and map the page cache page, reading it from
* disk if necessary.
*/
page = ntfs_map_page(ia_mapping, vcn <<
- idx_ni->itype.index.vcn_size_bits >> PAGE_CACHE_SHIFT);
+ idx_ni->itype.index.vcn_size_bits >> PAGE_SHIFT);
if (IS_ERR(page)) {
ntfs_error(sb, "Failed to map index page, error %ld.",
-PTR_ERR(page));
@@ -288,9 +288,9 @@ descend_into_child_node:
fast_descend_into_child_node:
/* Get to the index allocation block. */
ia = (INDEX_ALLOCATION*)(kaddr + ((vcn <<
- idx_ni->itype.index.vcn_size_bits) & ~PAGE_CACHE_MASK));
+ idx_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
/* Bounds checks. */
- if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_CACHE_SIZE) {
+ if ((u8*)ia < kaddr || (u8*)ia > kaddr + PAGE_SIZE) {
ntfs_error(sb, "Out of bounds check failed. Corrupt inode "
"0x%lx or driver bug.", idx_ni->mft_no);
goto unm_err_out;
@@ -323,7 +323,7 @@ fast_descend_into_child_node:
goto unm_err_out;
}
index_end = (u8*)ia + idx_ni->itype.index.block_size;
- if (index_end > kaddr + PAGE_CACHE_SIZE) {
+ if (index_end > kaddr + PAGE_SIZE) {
ntfs_error(sb, "Index buffer (VCN 0x%llx) of inode 0x%lx "
"crosses page boundary. Impossible! Cannot "
"access! This is probably a bug in the "
@@ -427,9 +427,9 @@ ia_done:
* the mapped page.
*/
if (old_vcn << vol->cluster_size_bits >>
- PAGE_CACHE_SHIFT == vcn <<
+ PAGE_SHIFT == vcn <<
vol->cluster_size_bits >>
- PAGE_CACHE_SHIFT)
+ PAGE_SHIFT)
goto fast_descend_into_child_node;
unlock_page(page);
ntfs_unmap_page(page);
diff --git a/fs/ntfs/inode.c b/fs/ntfs/inode.c
index d284f07ed..f40972d6d 100644
--- a/fs/ntfs/inode.c
+++ b/fs/ntfs/inode.c
@@ -868,12 +868,12 @@ skip_attr_list_load:
ni->itype.index.block_size);
goto unm_err_out;
}
- if (ni->itype.index.block_size > PAGE_CACHE_SIZE) {
+ if (ni->itype.index.block_size > PAGE_SIZE) {
ntfs_error(vi->i_sb, "Index block size (%u) > "
- "PAGE_CACHE_SIZE (%ld) is not "
+ "PAGE_SIZE (%ld) is not "
"supported. Sorry.",
ni->itype.index.block_size,
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
err = -EOPNOTSUPP;
goto unm_err_out;
}
@@ -1585,10 +1585,10 @@ static int ntfs_read_locked_index_inode(struct inode *base_vi, struct inode *vi)
"two.", ni->itype.index.block_size);
goto unm_err_out;
}
- if (ni->itype.index.block_size > PAGE_CACHE_SIZE) {
- ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_CACHE_SIZE "
+ if (ni->itype.index.block_size > PAGE_SIZE) {
+ ntfs_error(vi->i_sb, "Index block size (%u) > PAGE_SIZE "
"(%ld) is not supported. Sorry.",
- ni->itype.index.block_size, PAGE_CACHE_SIZE);
+ ni->itype.index.block_size, PAGE_SIZE);
err = -EOPNOTSUPP;
goto unm_err_out;
}
diff --git a/fs/ntfs/lcnalloc.c b/fs/ntfs/lcnalloc.c
index 1711b710b..27a24a42f 100644
--- a/fs/ntfs/lcnalloc.c
+++ b/fs/ntfs/lcnalloc.c
@@ -283,15 +283,15 @@ runlist_element *ntfs_cluster_alloc(ntfs_volume *vol, const VCN start_vcn,
ntfs_unmap_page(page);
}
page = ntfs_map_page(mapping, last_read_pos >>
- PAGE_CACHE_SHIFT);
+ PAGE_SHIFT);
if (IS_ERR(page)) {
err = PTR_ERR(page);
ntfs_error(vol->sb, "Failed to map page.");
goto out;
}
- buf_size = last_read_pos & ~PAGE_CACHE_MASK;
+ buf_size = last_read_pos & ~PAGE_MASK;
buf = page_address(page) + buf_size;
- buf_size = PAGE_CACHE_SIZE - buf_size;
+ buf_size = PAGE_SIZE - buf_size;
if (unlikely(last_read_pos + buf_size > i_size))
buf_size = i_size - last_read_pos;
buf_size <<= 3;
diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c
index c71de292c..9d71213ca 100644
--- a/fs/ntfs/logfile.c
+++ b/fs/ntfs/logfile.c
@@ -381,7 +381,7 @@ static int ntfs_check_and_load_restart_page(struct inode *vi,
* completely inside @rp, just copy it from there. Otherwise map all
* the required pages and copy the data from them.
*/
- size = PAGE_CACHE_SIZE - (pos & ~PAGE_CACHE_MASK);
+ size = PAGE_SIZE - (pos & ~PAGE_MASK);
if (size >= le32_to_cpu(rp->system_page_size)) {
memcpy(trp, rp, le32_to_cpu(rp->system_page_size));
} else {
@@ -394,8 +394,8 @@ static int ntfs_check_and_load_restart_page(struct inode *vi,
/* Copy the remaining data one page at a time. */
have_read = size;
to_read = le32_to_cpu(rp->system_page_size) - size;
- idx = (pos + size) >> PAGE_CACHE_SHIFT;
- BUG_ON((pos + size) & ~PAGE_CACHE_MASK);
+ idx = (pos + size) >> PAGE_SHIFT;
+ BUG_ON((pos + size) & ~PAGE_MASK);
do {
page = ntfs_map_page(vi->i_mapping, idx);
if (IS_ERR(page)) {
@@ -406,7 +406,7 @@ static int ntfs_check_and_load_restart_page(struct inode *vi,
err = -EIO;
goto err_out;
}
- size = min_t(int, to_read, PAGE_CACHE_SIZE);
+ size = min_t(int, to_read, PAGE_SIZE);
memcpy((u8*)trp + have_read, page_address(page), size);
ntfs_unmap_page(page);
have_read += size;
@@ -509,11 +509,11 @@ bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
* log page size if the page cache size is between the default log page
* size and twice that.
*/
- if (PAGE_CACHE_SIZE >= DefaultLogPageSize && PAGE_CACHE_SIZE <=
+ if (PAGE_SIZE >= DefaultLogPageSize && PAGE_SIZE <=
DefaultLogPageSize * 2)
log_page_size = DefaultLogPageSize;
else
- log_page_size = PAGE_CACHE_SIZE;
+ log_page_size = PAGE_SIZE;
log_page_mask = log_page_size - 1;
/*
* Use ntfs_ffs() instead of ffs() to enable the compiler to
@@ -539,7 +539,7 @@ bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
* to be empty.
*/
for (pos = 0; pos < size; pos <<= 1) {
- pgoff_t idx = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t idx = pos >> PAGE_SHIFT;
if (!page || page->index != idx) {
if (page)
ntfs_unmap_page(page);
@@ -550,7 +550,7 @@ bool ntfs_check_logfile(struct inode *log_vi, RESTART_PAGE_HEADER **rp)
goto err_out;
}
}
- kaddr = (u8*)page_address(page) + (pos & ~PAGE_CACHE_MASK);
+ kaddr = (u8*)page_address(page) + (pos & ~PAGE_MASK);
/*
* A non-empty block means the logfile is not empty while an
* empty block after a non-empty block has been encountered
diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c
index 3014a36a2..37b2501ca 100644
--- a/fs/ntfs/mft.c
+++ b/fs/ntfs/mft.c
@@ -61,16 +61,16 @@ static inline MFT_RECORD *map_mft_record_page(ntfs_inode *ni)
* here if the volume was that big...
*/
index = (u64)ni->mft_no << vol->mft_record_size_bits >>
- PAGE_CACHE_SHIFT;
- ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+ PAGE_SHIFT;
+ ofs = (ni->mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
i_size = i_size_read(mft_vi);
/* The maximum valid index into the page cache for $MFT's data. */
- end_index = i_size >> PAGE_CACHE_SHIFT;
+ end_index = i_size >> PAGE_SHIFT;
/* If the wanted index is out of bounds the mft record doesn't exist. */
if (unlikely(index >= end_index)) {
- if (index > end_index || (i_size & ~PAGE_CACHE_MASK) < ofs +
+ if (index > end_index || (i_size & ~PAGE_MASK) < ofs +
vol->mft_record_size) {
page = ERR_PTR(-ENOENT);
ntfs_error(vol->sb, "Attempt to read mft record 0x%lx, "
@@ -487,7 +487,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
}
/* Get the page containing the mirror copy of the mft record @m. */
page = ntfs_map_page(vol->mftmirr_ino->i_mapping, mft_no >>
- (PAGE_CACHE_SHIFT - vol->mft_record_size_bits));
+ (PAGE_SHIFT - vol->mft_record_size_bits));
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to map mft mirror page.");
err = PTR_ERR(page);
@@ -497,7 +497,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no,
BUG_ON(!PageUptodate(page));
ClearPageUptodate(page);
/* Offset of the mft mirror record inside the page. */
- page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+ page_ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
/* The address in the page of the mirror copy of the mft record @m. */
kmirr = page_address(page) + page_ofs;
/* Copy the mst protected mft record to the mirror. */
@@ -1178,8 +1178,8 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
for (; pass <= 2;) {
/* Cap size to pass_end. */
ofs = data_pos >> 3;
- page_ofs = ofs & ~PAGE_CACHE_MASK;
- size = PAGE_CACHE_SIZE - page_ofs;
+ page_ofs = ofs & ~PAGE_MASK;
+ size = PAGE_SIZE - page_ofs;
ll = ((pass_end + 7) >> 3) - ofs;
if (size > ll)
size = ll;
@@ -1190,7 +1190,7 @@ static int ntfs_mft_bitmap_find_and_alloc_free_rec_nolock(ntfs_volume *vol,
*/
if (size) {
page = ntfs_map_page(mftbmp_mapping,
- ofs >> PAGE_CACHE_SHIFT);
+ ofs >> PAGE_SHIFT);
if (IS_ERR(page)) {
ntfs_error(vol->sb, "Failed to read mft "
"bitmap, aborting.");
@@ -1328,13 +1328,13 @@ static int ntfs_mft_bitmap_extend_allocation_nolock(ntfs_volume *vol)
*/
ll = lcn >> 3;
page = ntfs_map_page(vol->lcnbmp_ino->i_mapping,
- ll >> PAGE_CACHE_SHIFT);
+ ll >> PAGE_SHIFT);
if (IS_ERR(page)) {
up_write(&mftbmp_ni->runlist.lock);
ntfs_error(vol->sb, "Failed to read from lcn bitmap.");
return PTR_ERR(page);
}
- b = (u8*)page_address(page) + (ll & ~PAGE_CACHE_MASK);
+ b = (u8*)page_address(page) + (ll & ~PAGE_MASK);
tb = 1 << (lcn & 7ull);
down_write(&vol->lcnbmp_lock);
if (*b != 0xff && !(*b & tb)) {
@@ -2103,14 +2103,14 @@ static int ntfs_mft_record_format(const ntfs_volume *vol, const s64 mft_no)
* The index into the page cache and the offset within the page cache
* page of the wanted mft record.
*/
- index = mft_no << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
- ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+ index = mft_no << vol->mft_record_size_bits >> PAGE_SHIFT;
+ ofs = (mft_no << vol->mft_record_size_bits) & ~PAGE_MASK;
/* The maximum valid index into the page cache for $MFT's data. */
i_size = i_size_read(mft_vi);
- end_index = i_size >> PAGE_CACHE_SHIFT;
+ end_index = i_size >> PAGE_SHIFT;
if (unlikely(index >= end_index)) {
if (unlikely(index > end_index || ofs + vol->mft_record_size >=
- (i_size & ~PAGE_CACHE_MASK))) {
+ (i_size & ~PAGE_MASK))) {
ntfs_error(vol->sb, "Tried to format non-existing mft "
"record 0x%llx.", (long long)mft_no);
return -ENOENT;
@@ -2515,8 +2515,8 @@ mft_rec_already_initialized:
* We now have allocated and initialized the mft record. Calculate the
* index of and the offset within the page cache page the record is in.
*/
- index = bit << vol->mft_record_size_bits >> PAGE_CACHE_SHIFT;
- ofs = (bit << vol->mft_record_size_bits) & ~PAGE_CACHE_MASK;
+ index = bit << vol->mft_record_size_bits >> PAGE_SHIFT;
+ ofs = (bit << vol->mft_record_size_bits) & ~PAGE_MASK;
/* Read, map, and pin the page containing the mft record. */
page = ntfs_map_page(vol->mft_ino->i_mapping, index);
if (IS_ERR(page)) {
diff --git a/fs/ntfs/ntfs.h b/fs/ntfs/ntfs.h
index c581e26a3..12de47b96 100644
--- a/fs/ntfs/ntfs.h
+++ b/fs/ntfs/ntfs.h
@@ -43,7 +43,7 @@ typedef enum {
NTFS_MAX_NAME_LEN = 255,
NTFS_MAX_ATTR_NAME_LEN = 255,
NTFS_MAX_CLUSTER_SIZE = 64 * 1024, /* 64kiB */
- NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_CACHE_SIZE,
+ NTFS_MAX_PAGES_PER_CLUSTER = NTFS_MAX_CLUSTER_SIZE / PAGE_SIZE,
} NTFS_CONSTANTS;
/* Global variables. */
diff --git a/fs/ntfs/super.c b/fs/ntfs/super.c
index 1b38abdaa..ecb49870a 100644
--- a/fs/ntfs/super.c
+++ b/fs/ntfs/super.c
@@ -823,14 +823,14 @@ static bool parse_ntfs_boot_sector(ntfs_volume *vol, const NTFS_BOOT_SECTOR *b)
ntfs_debug("vol->mft_record_size_bits = %i (0x%x)",
vol->mft_record_size_bits, vol->mft_record_size_bits);
/*
- * We cannot support mft record sizes above the PAGE_CACHE_SIZE since
+ * We cannot support mft record sizes above the PAGE_SIZE since
* we store $MFT/$DATA, the table of mft records in the page cache.
*/
- if (vol->mft_record_size > PAGE_CACHE_SIZE) {
+ if (vol->mft_record_size > PAGE_SIZE) {
ntfs_error(vol->sb, "Mft record size (%i) exceeds the "
- "PAGE_CACHE_SIZE on your system (%lu). "
+ "PAGE_SIZE on your system (%lu). "
"This is not supported. Sorry.",
- vol->mft_record_size, PAGE_CACHE_SIZE);
+ vol->mft_record_size, PAGE_SIZE);
return false;
}
/* We cannot support mft record sizes below the sector size. */
@@ -1096,7 +1096,7 @@ static bool check_mft_mirror(ntfs_volume *vol)
ntfs_debug("Entering.");
/* Compare contents of $MFT and $MFTMirr. */
- mrecs_per_page = PAGE_CACHE_SIZE / vol->mft_record_size;
+ mrecs_per_page = PAGE_SIZE / vol->mft_record_size;
BUG_ON(!mrecs_per_page);
BUG_ON(!vol->mftmirr_size);
mft_page = mirr_page = NULL;
@@ -1615,20 +1615,20 @@ static bool load_and_init_attrdef(ntfs_volume *vol)
if (!vol->attrdef)
goto iput_failed;
index = 0;
- max_index = i_size >> PAGE_CACHE_SHIFT;
- size = PAGE_CACHE_SIZE;
+ max_index = i_size >> PAGE_SHIFT;
+ size = PAGE_SIZE;
while (index < max_index) {
/* Read the attrdef table and copy it into the linear buffer. */
read_partial_attrdef_page:
page = ntfs_map_page(ino->i_mapping, index);
if (IS_ERR(page))
goto free_iput_failed;
- memcpy((u8*)vol->attrdef + (index++ << PAGE_CACHE_SHIFT),
+ memcpy((u8*)vol->attrdef + (index++ << PAGE_SHIFT),
page_address(page), size);
ntfs_unmap_page(page);
};
- if (size == PAGE_CACHE_SIZE) {
- size = i_size & ~PAGE_CACHE_MASK;
+ if (size == PAGE_SIZE) {
+ size = i_size & ~PAGE_MASK;
if (size)
goto read_partial_attrdef_page;
}
@@ -1684,20 +1684,20 @@ static bool load_and_init_upcase(ntfs_volume *vol)
if (!vol->upcase)
goto iput_upcase_failed;
index = 0;
- max_index = i_size >> PAGE_CACHE_SHIFT;
- size = PAGE_CACHE_SIZE;
+ max_index = i_size >> PAGE_SHIFT;
+ size = PAGE_SIZE;
while (index < max_index) {
/* Read the upcase table and copy it into the linear buffer. */
read_partial_upcase_page:
page = ntfs_map_page(ino->i_mapping, index);
if (IS_ERR(page))
goto iput_upcase_failed;
- memcpy((char*)vol->upcase + (index++ << PAGE_CACHE_SHIFT),
+ memcpy((char*)vol->upcase + (index++ << PAGE_SHIFT),
page_address(page), size);
ntfs_unmap_page(page);
};
- if (size == PAGE_CACHE_SIZE) {
- size = i_size & ~PAGE_CACHE_MASK;
+ if (size == PAGE_SIZE) {
+ size = i_size & ~PAGE_MASK;
if (size)
goto read_partial_upcase_page;
}
@@ -2471,14 +2471,14 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
down_read(&vol->lcnbmp_lock);
/*
* Convert the number of bits into bytes rounded up, then convert into
- * multiples of PAGE_CACHE_SIZE, rounding up so that if we have one
+ * multiples of PAGE_SIZE, rounding up so that if we have one
* full and one partial page max_index = 2.
*/
- max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT;
- /* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
+ max_index = (((vol->nr_clusters + 7) >> 3) + PAGE_SIZE - 1) >>
+ PAGE_SHIFT;
+ /* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
ntfs_debug("Reading $Bitmap, max_index = 0x%lx, max_size = 0x%lx.",
- max_index, PAGE_CACHE_SIZE / 4);
+ max_index, PAGE_SIZE / 4);
for (index = 0; index < max_index; index++) {
unsigned long *kaddr;
@@ -2491,7 +2491,7 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
if (IS_ERR(page)) {
ntfs_debug("read_mapping_page() error. Skipping "
"page (index 0x%lx).", index);
- nr_free -= PAGE_CACHE_SIZE * 8;
+ nr_free -= PAGE_SIZE * 8;
continue;
}
kaddr = kmap_atomic(page);
@@ -2503,9 +2503,9 @@ static s64 get_nr_free_clusters(ntfs_volume *vol)
* ntfs_readpage().
*/
nr_free -= bitmap_weight(kaddr,
- PAGE_CACHE_SIZE * BITS_PER_BYTE);
+ PAGE_SIZE * BITS_PER_BYTE);
kunmap_atomic(kaddr);
- page_cache_release(page);
+ put_page(page);
}
ntfs_debug("Finished reading $Bitmap, last index = 0x%lx.", index - 1);
/*
@@ -2547,9 +2547,9 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
pgoff_t index;
ntfs_debug("Entering.");
- /* Use multiples of 4 bytes, thus max_size is PAGE_CACHE_SIZE / 4. */
+ /* Use multiples of 4 bytes, thus max_size is PAGE_SIZE / 4. */
ntfs_debug("Reading $MFT/$BITMAP, max_index = 0x%lx, max_size = "
- "0x%lx.", max_index, PAGE_CACHE_SIZE / 4);
+ "0x%lx.", max_index, PAGE_SIZE / 4);
for (index = 0; index < max_index; index++) {
unsigned long *kaddr;
@@ -2562,7 +2562,7 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
if (IS_ERR(page)) {
ntfs_debug("read_mapping_page() error. Skipping "
"page (index 0x%lx).", index);
- nr_free -= PAGE_CACHE_SIZE * 8;
+ nr_free -= PAGE_SIZE * 8;
continue;
}
kaddr = kmap_atomic(page);
@@ -2574,9 +2574,9 @@ static unsigned long __get_nr_free_mft_records(ntfs_volume *vol,
* ntfs_readpage().
*/
nr_free -= bitmap_weight(kaddr,
- PAGE_CACHE_SIZE * BITS_PER_BYTE);
+ PAGE_SIZE * BITS_PER_BYTE);
kunmap_atomic(kaddr);
- page_cache_release(page);
+ put_page(page);
}
ntfs_debug("Finished reading $MFT/$BITMAP, last index = 0x%lx.",
index - 1);
@@ -2618,17 +2618,17 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
/* Type of filesystem. */
sfs->f_type = NTFS_SB_MAGIC;
/* Optimal transfer block size. */
- sfs->f_bsize = PAGE_CACHE_SIZE;
+ sfs->f_bsize = PAGE_SIZE;
/*
* Total data blocks in filesystem in units of f_bsize and since
* inodes are also stored in data blocs ($MFT is a file) this is just
* the total clusters.
*/
sfs->f_blocks = vol->nr_clusters << vol->cluster_size_bits >>
- PAGE_CACHE_SHIFT;
+ PAGE_SHIFT;
/* Free data blocks in filesystem in units of f_bsize. */
size = get_nr_free_clusters(vol) << vol->cluster_size_bits >>
- PAGE_CACHE_SHIFT;
+ PAGE_SHIFT;
if (size < 0LL)
size = 0LL;
/* Free blocks avail to non-superuser, same as above on NTFS. */
@@ -2639,11 +2639,11 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *sfs)
size = i_size_read(vol->mft_ino) >> vol->mft_record_size_bits;
/*
* Convert the maximum number of set bits into bytes rounded up, then
- * convert into multiples of PAGE_CACHE_SIZE, rounding up so that if we
+ * convert into multiples of PAGE_SIZE, rounding up so that if we
* have one full and one partial page max_index = 2.
*/
max_index = ((((mft_ni->initialized_size >> vol->mft_record_size_bits)
- + 7) >> 3) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ + 7) >> 3) + PAGE_SIZE - 1) >> PAGE_SHIFT;
read_unlock_irqrestore(&mft_ni->size_lock, flags);
/* Number of inodes in filesystem (at this point in time). */
sfs->f_files = size;
@@ -2765,15 +2765,15 @@ static int ntfs_fill_super(struct super_block *sb, void *opt, const int silent)
if (!parse_options(vol, (char*)opt))
goto err_out_now;
- /* We support sector sizes up to the PAGE_CACHE_SIZE. */
- if (bdev_logical_block_size(sb->s_bdev) > PAGE_CACHE_SIZE) {
+ /* We support sector sizes up to the PAGE_SIZE. */
+ if (bdev_logical_block_size(sb->s_bdev) > PAGE_SIZE) {
if (!silent)
ntfs_error(sb, "Device has unsupported sector size "
"(%i). The maximum supported sector "
"size on this architecture is %lu "
"bytes.",
bdev_logical_block_size(sb->s_bdev),
- PAGE_CACHE_SIZE);
+ PAGE_SIZE);
goto err_out_now;
}
/*
diff --git a/fs/ocfs2/Makefile b/fs/ocfs2/Makefile
index ce210d495..e27e65279 100644
--- a/fs/ocfs2/Makefile
+++ b/fs/ocfs2/Makefile
@@ -41,7 +41,8 @@ ocfs2-objs := \
quota_local.o \
quota_global.o \
xattr.o \
- acl.o
+ acl.o \
+ filecheck.o
ocfs2_stackglue-objs := stackglue.o
ocfs2_stack_o2cb-objs := stack_o2cb.o
diff --git a/fs/ocfs2/acl.c b/fs/ocfs2/acl.c
index 0cdf497c9..216243472 100644
--- a/fs/ocfs2/acl.c
+++ b/fs/ocfs2/acl.c
@@ -322,3 +322,90 @@ struct posix_acl *ocfs2_iop_get_acl(struct inode *inode, int type)
brelse(di_bh);
return acl;
}
+
+int ocfs2_acl_chmod(struct inode *inode, struct buffer_head *bh)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl;
+ int ret;
+
+ if (S_ISLNK(inode->i_mode))
+ return -EOPNOTSUPP;
+
+ if (!(osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL))
+ return 0;
+
+ acl = ocfs2_get_acl_nolock(inode, ACL_TYPE_ACCESS, bh);
+ if (IS_ERR(acl) || !acl)
+ return PTR_ERR(acl);
+ ret = __posix_acl_chmod(&acl, GFP_KERNEL, inode->i_mode);
+ if (ret)
+ return ret;
+ ret = ocfs2_set_acl(NULL, inode, NULL, ACL_TYPE_ACCESS,
+ acl, NULL, NULL);
+ posix_acl_release(acl);
+ return ret;
+}
+
+/*
+ * Initialize the ACLs of a new inode. If parent directory has default ACL,
+ * then clone to new inode. Called from ocfs2_mknod.
+ */
+int ocfs2_init_acl(handle_t *handle,
+ struct inode *inode,
+ struct inode *dir,
+ struct buffer_head *di_bh,
+ struct buffer_head *dir_bh,
+ struct ocfs2_alloc_context *meta_ac,
+ struct ocfs2_alloc_context *data_ac)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct posix_acl *acl = NULL;
+ int ret = 0, ret2;
+ umode_t mode;
+
+ if (!S_ISLNK(inode->i_mode)) {
+ if (osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) {
+ acl = ocfs2_get_acl_nolock(dir, ACL_TYPE_DEFAULT,
+ dir_bh);
+ if (IS_ERR(acl))
+ return PTR_ERR(acl);
+ }
+ if (!acl) {
+ mode = inode->i_mode & ~current_umask();
+ ret = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+ if (ret) {
+ mlog_errno(ret);
+ goto cleanup;
+ }
+ }
+ }
+ if ((osb->s_mount_opt & OCFS2_MOUNT_POSIX_ACL) && acl) {
+ if (S_ISDIR(inode->i_mode)) {
+ ret = ocfs2_set_acl(handle, inode, di_bh,
+ ACL_TYPE_DEFAULT, acl,
+ meta_ac, data_ac);
+ if (ret)
+ goto cleanup;
+ }
+ mode = inode->i_mode;
+ ret = __posix_acl_create(&acl, GFP_NOFS, &mode);
+ if (ret < 0)
+ return ret;
+
+ ret2 = ocfs2_acl_set_mode(inode, di_bh, handle, mode);
+ if (ret2) {
+ mlog_errno(ret2);
+ ret = ret2;
+ goto cleanup;
+ }
+ if (ret > 0) {
+ ret = ocfs2_set_acl(handle, inode,
+ di_bh, ACL_TYPE_ACCESS,
+ acl, meta_ac, data_ac);
+ }
+ }
+cleanup:
+ posix_acl_release(acl);
+ return ret;
+}
diff --git a/fs/ocfs2/acl.h b/fs/ocfs2/acl.h
index 3fce68d08..2783a75b3 100644
--- a/fs/ocfs2/acl.h
+++ b/fs/ocfs2/acl.h
@@ -35,5 +35,10 @@ int ocfs2_set_acl(handle_t *handle,
struct posix_acl *acl,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_alloc_context *data_ac);
+extern int ocfs2_acl_chmod(struct inode *, struct buffer_head *);
+extern int ocfs2_init_acl(handle_t *, struct inode *, struct inode *,
+ struct buffer_head *, struct buffer_head *,
+ struct ocfs2_alloc_context *,
+ struct ocfs2_alloc_context *);
#endif /* OCFS2_ACL_H */
diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c
index d002579c6..e361d1a0c 100644
--- a/fs/ocfs2/alloc.c
+++ b/fs/ocfs2/alloc.c
@@ -2516,21 +2516,6 @@ static int ocfs2_update_edge_lengths(handle_t *handle,
struct ocfs2_extent_block *eb;
u32 range;
- /*
- * In normal tree rotation process, we will never touch the
- * tree branch above subtree_index and ocfs2_extend_rotate_transaction
- * doesn't reserve the credits for them either.
- *
- * But we do have a special case here which will update the rightmost
- * records for all the bh in the path.
- * So we have to allocate extra credits and access them.
- */
- ret = ocfs2_extend_trans(handle, subtree_index);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
-
ret = ocfs2_journal_access_path(et->et_ci, handle, path);
if (ret) {
mlog_errno(ret);
@@ -2956,7 +2941,7 @@ static int __ocfs2_rotate_tree_left(handle_t *handle,
right_path->p_node[subtree_root].bh->b_blocknr,
right_path->p_tree_depth);
- ret = ocfs2_extend_rotate_transaction(handle, subtree_root,
+ ret = ocfs2_extend_rotate_transaction(handle, 0,
orig_credits, left_path);
if (ret) {
mlog_errno(ret);
@@ -3029,21 +3014,9 @@ static int ocfs2_remove_rightmost_path(handle_t *handle,
struct ocfs2_extent_block *eb;
struct ocfs2_extent_list *el;
-
ret = ocfs2_et_sanity_check(et);
if (ret)
goto out;
- /*
- * There's two ways we handle this depending on
- * whether path is the only existing one.
- */
- ret = ocfs2_extend_rotate_transaction(handle, 0,
- handle->h_buffer_credits,
- path);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
ret = ocfs2_journal_access_path(et->et_ci, handle, path);
if (ret) {
@@ -3641,6 +3614,14 @@ static int ocfs2_merge_rec_left(struct ocfs2_path *right_path,
*/
if (le16_to_cpu(right_rec->e_leaf_clusters) == 0 &&
le16_to_cpu(el->l_next_free_rec) == 1) {
+ /* extend credit for ocfs2_remove_rightmost_path */
+ ret = ocfs2_extend_rotate_transaction(handle, 0,
+ handle->h_buffer_credits,
+ right_path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
ret = ocfs2_remove_rightmost_path(handle, et,
right_path,
@@ -3679,6 +3660,14 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
BUG_ON(ctxt->c_contig_type == CONTIG_NONE);
if (ctxt->c_split_covers_rec && ctxt->c_has_empty_extent) {
+ /* extend credit for ocfs2_remove_rightmost_path */
+ ret = ocfs2_extend_rotate_transaction(handle, 0,
+ handle->h_buffer_credits,
+ path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
/*
* The merge code will need to create an empty
* extent to take the place of the newly
@@ -3727,6 +3716,15 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
*/
BUG_ON(!ocfs2_is_empty_extent(&el->l_recs[0]));
+ /* extend credit for ocfs2_remove_rightmost_path */
+ ret = ocfs2_extend_rotate_transaction(handle, 0,
+ handle->h_buffer_credits,
+ path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
/* The merge left us with an empty extent, remove it. */
ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
if (ret) {
@@ -3748,6 +3746,15 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
goto out;
}
+ /* extend credit for ocfs2_remove_rightmost_path */
+ ret = ocfs2_extend_rotate_transaction(handle, 0,
+ handle->h_buffer_credits,
+ path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
/*
* Error from this last rotate is not critical, so
@@ -3783,6 +3790,16 @@ static int ocfs2_try_to_merge_extent(handle_t *handle,
}
if (ctxt->c_split_covers_rec) {
+ /* extend credit for ocfs2_remove_rightmost_path */
+ ret = ocfs2_extend_rotate_transaction(handle, 0,
+ handle->h_buffer_credits,
+ path);
+ if (ret) {
+ mlog_errno(ret);
+ ret = 0;
+ goto out;
+ }
+
/*
* The merge may have left an empty extent in
* our leaf. Try to rotate it away.
@@ -5342,6 +5359,15 @@ static int ocfs2_truncate_rec(handle_t *handle,
struct ocfs2_extent_block *eb;
if (ocfs2_is_empty_extent(&el->l_recs[0]) && index > 0) {
+ /* extend credit for ocfs2_remove_rightmost_path */
+ ret = ocfs2_extend_rotate_transaction(handle, 0,
+ handle->h_buffer_credits,
+ path);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
ret = ocfs2_rotate_tree_left(handle, et, path, dealloc);
if (ret) {
mlog_errno(ret);
@@ -5928,16 +5954,6 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
ocfs2_journal_dirty(handle, tl_bh);
- /* TODO: Perhaps we can calculate the bulk of the
- * credits up front rather than extending like
- * this. */
- status = ocfs2_extend_trans(handle,
- OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
- if (status < 0) {
- mlog_errno(status);
- goto bail;
- }
-
rec = tl->tl_recs[i];
start_blk = ocfs2_clusters_to_blocks(data_alloc_inode->i_sb,
le32_to_cpu(rec.t_start));
@@ -5958,6 +5974,13 @@ static int ocfs2_replay_truncate_records(struct ocfs2_super *osb,
goto bail;
}
}
+
+ status = ocfs2_extend_trans(handle,
+ OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
i--;
}
@@ -6016,7 +6039,7 @@ int __ocfs2_flush_truncate_log(struct ocfs2_super *osb)
goto out_mutex;
}
- handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_UPDATE);
+ handle = ocfs2_start_trans(osb, OCFS2_TRUNCATE_LOG_FLUSH_ONE_REC);
if (IS_ERR(handle)) {
status = PTR_ERR(handle);
mlog_errno(status);
@@ -6079,7 +6102,7 @@ void ocfs2_schedule_truncate_log_flush(struct ocfs2_super *osb,
if (cancel)
cancel_delayed_work(&osb->osb_truncate_log_wq);
- queue_delayed_work(ocfs2_wq, &osb->osb_truncate_log_wq,
+ queue_delayed_work(osb->ocfs2_wq, &osb->osb_truncate_log_wq,
OCFS2_TRUNCATE_LOG_FLUSH_INTERVAL);
}
}
@@ -6253,7 +6276,7 @@ void ocfs2_truncate_log_shutdown(struct ocfs2_super *osb)
if (tl_inode) {
cancel_delayed_work(&osb->osb_truncate_log_wq);
- flush_workqueue(ocfs2_wq);
+ flush_workqueue(osb->ocfs2_wq);
status = ocfs2_flush_truncate_log(osb);
if (status < 0)
@@ -6648,7 +6671,7 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
{
int i;
struct page *page;
- unsigned int from, to = PAGE_CACHE_SIZE;
+ unsigned int from, to = PAGE_SIZE;
struct super_block *sb = inode->i_sb;
BUG_ON(!ocfs2_sparse_alloc(OCFS2_SB(sb)));
@@ -6656,21 +6679,21 @@ static void ocfs2_zero_cluster_pages(struct inode *inode, loff_t start,
if (numpages == 0)
goto out;
- to = PAGE_CACHE_SIZE;
+ to = PAGE_SIZE;
for(i = 0; i < numpages; i++) {
page = pages[i];
- from = start & (PAGE_CACHE_SIZE - 1);
- if ((end >> PAGE_CACHE_SHIFT) == page->index)
- to = end & (PAGE_CACHE_SIZE - 1);
+ from = start & (PAGE_SIZE - 1);
+ if ((end >> PAGE_SHIFT) == page->index)
+ to = end & (PAGE_SIZE - 1);
- BUG_ON(from > PAGE_CACHE_SIZE);
- BUG_ON(to > PAGE_CACHE_SIZE);
+ BUG_ON(from > PAGE_SIZE);
+ BUG_ON(to > PAGE_SIZE);
ocfs2_map_and_dirty_page(inode, handle, from, to, page, 1,
&phys);
- start = (page->index + 1) << PAGE_CACHE_SHIFT;
+ start = (page->index + 1) << PAGE_SHIFT;
}
out:
if (pages)
@@ -6689,7 +6712,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
numpages = 0;
last_page_bytes = PAGE_ALIGN(end);
- index = start >> PAGE_CACHE_SHIFT;
+ index = start >> PAGE_SHIFT;
do {
pages[numpages] = find_or_create_page(mapping, index, GFP_NOFS);
if (!pages[numpages]) {
@@ -6700,7 +6723,7 @@ int ocfs2_grab_pages(struct inode *inode, loff_t start, loff_t end,
numpages++;
index++;
- } while (index < (last_page_bytes >> PAGE_CACHE_SHIFT));
+ } while (index < (last_page_bytes >> PAGE_SHIFT));
out:
if (ret != 0) {
@@ -6927,8 +6950,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
* to do that now.
*/
if (!ocfs2_sparse_alloc(osb) &&
- PAGE_CACHE_SIZE < osb->s_clustersize)
- end = PAGE_CACHE_SIZE;
+ PAGE_SIZE < osb->s_clustersize)
+ end = PAGE_SIZE;
ret = ocfs2_grab_eof_pages(inode, 0, end, pages, &num_pages);
if (ret) {
@@ -6948,8 +6971,8 @@ int ocfs2_convert_inline_data_to_extents(struct inode *inode,
goto out_unlock;
}
- page_end = PAGE_CACHE_SIZE;
- if (PAGE_CACHE_SIZE > osb->s_clustersize)
+ page_end = PAGE_SIZE;
+ if (PAGE_SIZE > osb->s_clustersize)
page_end = osb->s_clustersize;
for (i = 0; i < num_pages; i++)
diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c
index cda0361e9..ad1577348 100644
--- a/fs/ocfs2/aops.c
+++ b/fs/ocfs2/aops.c
@@ -234,7 +234,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
size = i_size_read(inode);
- if (size > PAGE_CACHE_SIZE ||
+ if (size > PAGE_SIZE ||
size > ocfs2_max_inline_data_with_xattr(inode->i_sb, di)) {
ocfs2_error(inode->i_sb,
"Inode %llu has with inline data has bad size: %Lu\n",
@@ -247,7 +247,7 @@ int ocfs2_read_inline_data(struct inode *inode, struct page *page,
if (size)
memcpy(kaddr, di->id2.i_data.id_data, size);
/* Clear the remaining part of the page */
- memset(kaddr + size, 0, PAGE_CACHE_SIZE - size);
+ memset(kaddr + size, 0, PAGE_SIZE - size);
flush_dcache_page(page);
kunmap_atomic(kaddr);
@@ -282,7 +282,7 @@ static int ocfs2_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
struct ocfs2_inode_info *oi = OCFS2_I(inode);
- loff_t start = (loff_t)page->index << PAGE_CACHE_SHIFT;
+ loff_t start = (loff_t)page->index << PAGE_SHIFT;
int ret, unlock = 1;
trace_ocfs2_readpage((unsigned long long)oi->ip_blkno,
@@ -385,7 +385,7 @@ static int ocfs2_readpages(struct file *filp, struct address_space *mapping,
* drop out in that case as it's not worth handling here.
*/
last = list_entry(pages->prev, struct page, lru);
- start = (loff_t)last->index << PAGE_CACHE_SHIFT;
+ start = (loff_t)last->index << PAGE_SHIFT;
if (start >= i_size_read(inode))
goto out_unlock;
@@ -499,153 +499,6 @@ bail:
return status;
}
-/*
- * TODO: Make this into a generic get_blocks function.
- *
- * From do_direct_io in direct-io.c:
- * "So what we do is to permit the ->get_blocks function to populate
- * bh.b_size with the size of IO which is permitted at this offset and
- * this i_blkbits."
- *
- * This function is called directly from get_more_blocks in direct-io.c.
- *
- * called like this: dio->get_blocks(dio->inode, fs_startblk,
- * fs_count, map_bh, dio->rw == WRITE);
- */
-static int ocfs2_direct_IO_get_blocks(struct inode *inode, sector_t iblock,
- struct buffer_head *bh_result, int create)
-{
- int ret;
- u32 cpos = 0;
- int alloc_locked = 0;
- u64 p_blkno, inode_blocks, contig_blocks;
- unsigned int ext_flags;
- unsigned char blocksize_bits = inode->i_sb->s_blocksize_bits;
- unsigned long max_blocks = bh_result->b_size >> inode->i_blkbits;
- unsigned long len = bh_result->b_size;
- unsigned int clusters_to_alloc = 0, contig_clusters = 0;
-
- cpos = ocfs2_blocks_to_clusters(inode->i_sb, iblock);
-
- /* This function won't even be called if the request isn't all
- * nicely aligned and of the right size, so there's no need
- * for us to check any of that. */
-
- inode_blocks = ocfs2_blocks_for_bytes(inode->i_sb, i_size_read(inode));
-
- down_read(&OCFS2_I(inode)->ip_alloc_sem);
-
- /* This figures out the size of the next contiguous block, and
- * our logical offset */
- ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
- &contig_blocks, &ext_flags);
- up_read(&OCFS2_I(inode)->ip_alloc_sem);
-
- if (ret) {
- mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
- (unsigned long long)iblock);
- ret = -EIO;
- goto bail;
- }
-
- /* We should already CoW the refcounted extent in case of create. */
- BUG_ON(create && (ext_flags & OCFS2_EXT_REFCOUNTED));
-
- /* allocate blocks if no p_blkno is found, and create == 1 */
- if (!p_blkno && create) {
- ret = ocfs2_inode_lock(inode, NULL, 1);
- if (ret < 0) {
- mlog_errno(ret);
- goto bail;
- }
-
- alloc_locked = 1;
-
- down_write(&OCFS2_I(inode)->ip_alloc_sem);
-
- /* fill hole, allocate blocks can't be larger than the size
- * of the hole */
- clusters_to_alloc = ocfs2_clusters_for_bytes(inode->i_sb, len);
- contig_clusters = ocfs2_clusters_for_blocks(inode->i_sb,
- contig_blocks);
- if (clusters_to_alloc > contig_clusters)
- clusters_to_alloc = contig_clusters;
-
- /* allocate extent and insert them into the extent tree */
- ret = ocfs2_extend_allocation(inode, cpos,
- clusters_to_alloc, 0);
- if (ret < 0) {
- up_write(&OCFS2_I(inode)->ip_alloc_sem);
- mlog_errno(ret);
- goto bail;
- }
-
- ret = ocfs2_extent_map_get_blocks(inode, iblock, &p_blkno,
- &contig_blocks, &ext_flags);
- if (ret < 0) {
- up_write(&OCFS2_I(inode)->ip_alloc_sem);
- mlog(ML_ERROR, "get_blocks() failed iblock=%llu\n",
- (unsigned long long)iblock);
- ret = -EIO;
- goto bail;
- }
- set_buffer_new(bh_result);
- up_write(&OCFS2_I(inode)->ip_alloc_sem);
- }
-
- /*
- * get_more_blocks() expects us to describe a hole by clearing
- * the mapped bit on bh_result().
- *
- * Consider an unwritten extent as a hole.
- */
- if (p_blkno && !(ext_flags & OCFS2_EXT_UNWRITTEN))
- map_bh(bh_result, inode->i_sb, p_blkno);
- else
- clear_buffer_mapped(bh_result);
-
- /* make sure we don't map more than max_blocks blocks here as
- that's all the kernel will handle at this point. */
- if (max_blocks < contig_blocks)
- contig_blocks = max_blocks;
- bh_result->b_size = contig_blocks << blocksize_bits;
-bail:
- if (alloc_locked)
- ocfs2_inode_unlock(inode, 1);
- return ret;
-}
-
-/*
- * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
- * particularly interested in the aio/dio case. We use the rw_lock DLM lock
- * to protect io on one node from truncation on another.
- */
-static void ocfs2_dio_end_io(struct kiocb *iocb,
- loff_t offset,
- ssize_t bytes,
- void *private)
-{
- struct inode *inode = file_inode(iocb->ki_filp);
- int level;
-
- /* this io's submitter should not have unlocked this before we could */
- BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
-
- if (ocfs2_iocb_is_unaligned_aio(iocb)) {
- ocfs2_iocb_clear_unaligned_aio(iocb);
-
- mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
- }
-
- /* Let rw unlock to be done later to protect append direct io write */
- if (offset + bytes <= i_size_read(inode)) {
- ocfs2_iocb_clear_rw_locked(iocb);
-
- level = ocfs2_iocb_rw_locked_level(iocb);
- ocfs2_rw_unlock(inode, level);
- }
-}
-
static int ocfs2_releasepage(struct page *page, gfp_t wait)
{
if (!page_has_buffers(page))
@@ -653,374 +506,17 @@ static int ocfs2_releasepage(struct page *page, gfp_t wait)
return try_to_free_buffers(page);
}
-static int ocfs2_is_overwrite(struct ocfs2_super *osb,
- struct inode *inode, loff_t offset)
-{
- int ret = 0;
- u32 v_cpos = 0;
- u32 p_cpos = 0;
- unsigned int num_clusters = 0;
- unsigned int ext_flags = 0;
-
- v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
- ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
- &num_clusters, &ext_flags);
- if (ret < 0) {
- mlog_errno(ret);
- return ret;
- }
-
- if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN))
- return 1;
-
- return 0;
-}
-
-static int ocfs2_direct_IO_zero_extend(struct ocfs2_super *osb,
- struct inode *inode, loff_t offset,
- u64 zero_len, int cluster_align)
-{
- u32 p_cpos = 0;
- u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
- unsigned int num_clusters = 0;
- unsigned int ext_flags = 0;
- int ret = 0;
-
- if (offset <= i_size_read(inode) || cluster_align)
- return 0;
-
- ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
- &ext_flags);
- if (ret < 0) {
- mlog_errno(ret);
- return ret;
- }
-
- if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
- u64 s = i_size_read(inode);
- sector_t sector = ((u64)p_cpos << (osb->s_clustersize_bits - 9)) +
- (do_div(s, osb->s_clustersize) >> 9);
-
- ret = blkdev_issue_zeroout(osb->sb->s_bdev, sector,
- zero_len >> 9, GFP_NOFS, false);
- if (ret < 0)
- mlog_errno(ret);
- }
-
- return ret;
-}
-
-static int ocfs2_direct_IO_extend_no_holes(struct ocfs2_super *osb,
- struct inode *inode, loff_t offset)
-{
- u64 zero_start, zero_len, total_zero_len;
- u32 p_cpos = 0, clusters_to_add;
- u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, i_size_read(inode));
- unsigned int num_clusters = 0;
- unsigned int ext_flags = 0;
- u32 size_div, offset_div;
- int ret = 0;
-
- {
- u64 o = offset;
- u64 s = i_size_read(inode);
-
- offset_div = do_div(o, osb->s_clustersize);
- size_div = do_div(s, osb->s_clustersize);
- }
-
- if (offset <= i_size_read(inode))
- return 0;
-
- clusters_to_add = ocfs2_bytes_to_clusters(inode->i_sb, offset) -
- ocfs2_bytes_to_clusters(inode->i_sb, i_size_read(inode));
- total_zero_len = offset - i_size_read(inode);
- if (clusters_to_add)
- total_zero_len -= offset_div;
-
- /* Allocate clusters to fill out holes, and this is only needed
- * when we add more than one clusters. Otherwise the cluster will
- * be allocated during direct IO */
- if (clusters_to_add > 1) {
- ret = ocfs2_extend_allocation(inode,
- OCFS2_I(inode)->ip_clusters,
- clusters_to_add - 1, 0);
- if (ret) {
- mlog_errno(ret);
- goto out;
- }
- }
-
- while (total_zero_len) {
- ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos, &num_clusters,
- &ext_flags);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
-
- zero_start = ocfs2_clusters_to_bytes(osb->sb, p_cpos) +
- size_div;
- zero_len = ocfs2_clusters_to_bytes(osb->sb, num_clusters) -
- size_div;
- zero_len = min(total_zero_len, zero_len);
-
- if (p_cpos && !(ext_flags & OCFS2_EXT_UNWRITTEN)) {
- ret = blkdev_issue_zeroout(osb->sb->s_bdev,
- zero_start >> 9, zero_len >> 9,
- GFP_NOFS, false);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- }
-
- total_zero_len -= zero_len;
- v_cpos += ocfs2_bytes_to_clusters(osb->sb, zero_len + size_div);
-
- /* Only at first iteration can be cluster not aligned.
- * So set size_div to 0 for the rest */
- size_div = 0;
- }
-
-out:
- return ret;
-}
-
-static ssize_t ocfs2_direct_IO_write(struct kiocb *iocb,
- struct iov_iter *iter,
- loff_t offset)
-{
- ssize_t ret = 0;
- ssize_t written = 0;
- bool orphaned = false;
- int is_overwrite = 0;
- struct file *file = iocb->ki_filp;
- struct inode *inode = file_inode(file)->i_mapping->host;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- struct buffer_head *di_bh = NULL;
- size_t count = iter->count;
- journal_t *journal = osb->journal->j_journal;
- u64 zero_len_head, zero_len_tail;
- int cluster_align_head, cluster_align_tail;
- loff_t final_size = offset + count;
- int append_write = offset >= i_size_read(inode) ? 1 : 0;
- unsigned int num_clusters = 0;
- unsigned int ext_flags = 0;
-
- {
- u64 o = offset;
- u64 s = i_size_read(inode);
-
- zero_len_head = do_div(o, 1 << osb->s_clustersize_bits);
- cluster_align_head = !zero_len_head;
-
- zero_len_tail = osb->s_clustersize -
- do_div(s, osb->s_clustersize);
- if ((offset - i_size_read(inode)) < zero_len_tail)
- zero_len_tail = offset - i_size_read(inode);
- cluster_align_tail = !zero_len_tail;
- }
-
- /*
- * when final_size > inode->i_size, inode->i_size will be
- * updated after direct write, so add the inode to orphan
- * dir first.
- */
- if (final_size > i_size_read(inode)) {
- ret = ocfs2_add_inode_to_orphan(osb, inode);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
- orphaned = true;
- }
-
- if (append_write) {
- ret = ocfs2_inode_lock(inode, NULL, 1);
- if (ret < 0) {
- mlog_errno(ret);
- goto clean_orphan;
- }
-
- /* zeroing out the previously allocated cluster tail
- * that but not zeroed */
- if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb))) {
- down_read(&OCFS2_I(inode)->ip_alloc_sem);
- ret = ocfs2_direct_IO_zero_extend(osb, inode, offset,
- zero_len_tail, cluster_align_tail);
- up_read(&OCFS2_I(inode)->ip_alloc_sem);
- } else {
- down_write(&OCFS2_I(inode)->ip_alloc_sem);
- ret = ocfs2_direct_IO_extend_no_holes(osb, inode,
- offset);
- up_write(&OCFS2_I(inode)->ip_alloc_sem);
- }
- if (ret < 0) {
- mlog_errno(ret);
- ocfs2_inode_unlock(inode, 1);
- goto clean_orphan;
- }
-
- is_overwrite = ocfs2_is_overwrite(osb, inode, offset);
- if (is_overwrite < 0) {
- mlog_errno(is_overwrite);
- ret = is_overwrite;
- ocfs2_inode_unlock(inode, 1);
- goto clean_orphan;
- }
-
- ocfs2_inode_unlock(inode, 1);
- }
-
- written = __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
- offset, ocfs2_direct_IO_get_blocks,
- ocfs2_dio_end_io, NULL, 0);
- /* overwrite aio may return -EIOCBQUEUED, and it is not an error */
- if ((written < 0) && (written != -EIOCBQUEUED)) {
- loff_t i_size = i_size_read(inode);
-
- if (offset + count > i_size) {
- ret = ocfs2_inode_lock(inode, &di_bh, 1);
- if (ret < 0) {
- mlog_errno(ret);
- goto clean_orphan;
- }
-
- if (i_size == i_size_read(inode)) {
- ret = ocfs2_truncate_file(inode, di_bh,
- i_size);
- if (ret < 0) {
- if (ret != -ENOSPC)
- mlog_errno(ret);
-
- ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
- di_bh = NULL;
- goto clean_orphan;
- }
- }
-
- ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
- di_bh = NULL;
-
- ret = jbd2_journal_force_commit(journal);
- if (ret < 0)
- mlog_errno(ret);
- }
- } else if (written > 0 && append_write && !is_overwrite &&
- !cluster_align_head) {
- /* zeroing out the allocated cluster head */
- u32 p_cpos = 0;
- u32 v_cpos = ocfs2_bytes_to_clusters(osb->sb, offset);
-
- ret = ocfs2_inode_lock(inode, NULL, 0);
- if (ret < 0) {
- mlog_errno(ret);
- goto clean_orphan;
- }
-
- ret = ocfs2_get_clusters(inode, v_cpos, &p_cpos,
- &num_clusters, &ext_flags);
- if (ret < 0) {
- mlog_errno(ret);
- ocfs2_inode_unlock(inode, 0);
- goto clean_orphan;
- }
-
- BUG_ON(!p_cpos || (ext_flags & OCFS2_EXT_UNWRITTEN));
-
- ret = blkdev_issue_zeroout(osb->sb->s_bdev,
- (u64)p_cpos << (osb->s_clustersize_bits - 9),
- zero_len_head >> 9, GFP_NOFS, false);
- if (ret < 0)
- mlog_errno(ret);
-
- ocfs2_inode_unlock(inode, 0);
- }
-
-clean_orphan:
- if (orphaned) {
- int tmp_ret;
- int update_isize = written > 0 ? 1 : 0;
- loff_t end = update_isize ? offset + written : 0;
-
- tmp_ret = ocfs2_inode_lock(inode, &di_bh, 1);
- if (tmp_ret < 0) {
- ret = tmp_ret;
- mlog_errno(ret);
- goto out;
- }
-
- tmp_ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
- update_isize, end);
- if (tmp_ret < 0) {
- ocfs2_inode_unlock(inode, 1);
- ret = tmp_ret;
- mlog_errno(ret);
- brelse(di_bh);
- goto out;
- }
-
- ocfs2_inode_unlock(inode, 1);
- brelse(di_bh);
-
- tmp_ret = jbd2_journal_force_commit(journal);
- if (tmp_ret < 0) {
- ret = tmp_ret;
- mlog_errno(tmp_ret);
- }
- }
-
-out:
- if (ret >= 0)
- ret = written;
- return ret;
-}
-
-static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
- loff_t offset)
-{
- struct file *file = iocb->ki_filp;
- struct inode *inode = file_inode(file)->i_mapping->host;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- int full_coherency = !(osb->s_mount_opt &
- OCFS2_MOUNT_COHERENCY_BUFFERED);
-
- /*
- * Fallback to buffered I/O if we see an inode without
- * extents.
- */
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
- return 0;
-
- /* Fallback to buffered I/O if we are appending and
- * concurrent O_DIRECT writes are allowed.
- */
- if (i_size_read(inode) <= offset && !full_coherency)
- return 0;
-
- if (iov_iter_rw(iter) == READ)
- return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
- iter, offset,
- ocfs2_direct_IO_get_blocks,
- ocfs2_dio_end_io, NULL, 0);
- else
- return ocfs2_direct_IO_write(iocb, iter, offset);
-}
-
static void ocfs2_figure_cluster_boundaries(struct ocfs2_super *osb,
u32 cpos,
unsigned int *start,
unsigned int *end)
{
- unsigned int cluster_start = 0, cluster_end = PAGE_CACHE_SIZE;
+ unsigned int cluster_start = 0, cluster_end = PAGE_SIZE;
- if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits)) {
+ if (unlikely(PAGE_SHIFT > osb->s_clustersize_bits)) {
unsigned int cpp;
- cpp = 1 << (PAGE_CACHE_SHIFT - osb->s_clustersize_bits);
+ cpp = 1 << (PAGE_SHIFT - osb->s_clustersize_bits);
cluster_start = cpos % cpp;
cluster_start = cluster_start << osb->s_clustersize_bits;
@@ -1188,13 +684,20 @@ next_bh:
return ret;
}
-#if (PAGE_CACHE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
+#if (PAGE_SIZE >= OCFS2_MAX_CLUSTERSIZE)
#define OCFS2_MAX_CTXT_PAGES 1
#else
-#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_CACHE_SIZE)
+#define OCFS2_MAX_CTXT_PAGES (OCFS2_MAX_CLUSTERSIZE / PAGE_SIZE)
#endif
-#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_CACHE_SIZE / OCFS2_MIN_CLUSTERSIZE)
+#define OCFS2_MAX_CLUSTERS_PER_PAGE (PAGE_SIZE / OCFS2_MIN_CLUSTERSIZE)
+
+struct ocfs2_unwritten_extent {
+ struct list_head ue_node;
+ struct list_head ue_ip_node;
+ u32 ue_cpos;
+ u32 ue_phys;
+};
/*
* Describe the state of a single cluster to be written to.
@@ -1207,7 +710,7 @@ struct ocfs2_write_cluster_desc {
* filled.
*/
unsigned c_new;
- unsigned c_unwritten;
+ unsigned c_clear_unwritten;
unsigned c_needs_zero;
};
@@ -1219,6 +722,9 @@ struct ocfs2_write_ctxt {
/* First cluster allocated in a nonsparse extend */
u32 w_first_new_cpos;
+ /* Type of caller. Must be one of buffer, mmap, direct. */
+ ocfs2_write_type_t w_type;
+
struct ocfs2_write_cluster_desc w_desc[OCFS2_MAX_CLUSTERS_PER_PAGE];
/*
@@ -1267,6 +773,8 @@ struct ocfs2_write_ctxt {
struct buffer_head *w_di_bh;
struct ocfs2_cached_dealloc_ctxt w_dealloc;
+
+ struct list_head w_unwritten_list;
};
void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
@@ -1277,7 +785,7 @@ void ocfs2_unlock_and_free_pages(struct page **pages, int num_pages)
if (pages[i]) {
unlock_page(pages[i]);
mark_page_accessed(pages[i]);
- page_cache_release(pages[i]);
+ put_page(pages[i]);
}
}
}
@@ -1300,13 +808,30 @@ static void ocfs2_unlock_pages(struct ocfs2_write_ctxt *wc)
}
}
mark_page_accessed(wc->w_target_page);
- page_cache_release(wc->w_target_page);
+ put_page(wc->w_target_page);
}
ocfs2_unlock_and_free_pages(wc->w_pages, wc->w_num_pages);
}
-static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
+static void ocfs2_free_unwritten_list(struct inode *inode,
+ struct list_head *head)
{
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_unwritten_extent *ue = NULL, *tmp = NULL;
+
+ list_for_each_entry_safe(ue, tmp, head, ue_node) {
+ list_del(&ue->ue_node);
+ spin_lock(&oi->ip_lock);
+ list_del(&ue->ue_ip_node);
+ spin_unlock(&oi->ip_lock);
+ kfree(ue);
+ }
+}
+
+static void ocfs2_free_write_ctxt(struct inode *inode,
+ struct ocfs2_write_ctxt *wc)
+{
+ ocfs2_free_unwritten_list(inode, &wc->w_unwritten_list);
ocfs2_unlock_pages(wc);
brelse(wc->w_di_bh);
kfree(wc);
@@ -1314,7 +839,8 @@ static void ocfs2_free_write_ctxt(struct ocfs2_write_ctxt *wc)
static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
struct ocfs2_super *osb, loff_t pos,
- unsigned len, struct buffer_head *di_bh)
+ unsigned len, ocfs2_write_type_t type,
+ struct buffer_head *di_bh)
{
u32 cend;
struct ocfs2_write_ctxt *wc;
@@ -1329,13 +855,15 @@ static int ocfs2_alloc_write_ctxt(struct ocfs2_write_ctxt **wcp,
wc->w_clen = cend - wc->w_cpos + 1;
get_bh(di_bh);
wc->w_di_bh = di_bh;
+ wc->w_type = type;
- if (unlikely(PAGE_CACHE_SHIFT > osb->s_clustersize_bits))
+ if (unlikely(PAGE_SHIFT > osb->s_clustersize_bits))
wc->w_large_pages = 1;
else
wc->w_large_pages = 0;
ocfs2_init_dealloc_ctxt(&wc->w_dealloc);
+ INIT_LIST_HEAD(&wc->w_unwritten_list);
*wcp = wc;
@@ -1392,16 +920,17 @@ static void ocfs2_write_failure(struct inode *inode,
loff_t user_pos, unsigned user_len)
{
int i;
- unsigned from = user_pos & (PAGE_CACHE_SIZE - 1),
+ unsigned from = user_pos & (PAGE_SIZE - 1),
to = user_pos + user_len;
struct page *tmppage;
- ocfs2_zero_new_buffers(wc->w_target_page, from, to);
+ if (wc->w_target_page)
+ ocfs2_zero_new_buffers(wc->w_target_page, from, to);
for(i = 0; i < wc->w_num_pages; i++) {
tmppage = wc->w_pages[i];
- if (page_has_buffers(tmppage)) {
+ if (tmppage && page_has_buffers(tmppage)) {
if (ocfs2_should_order_data(inode))
ocfs2_jbd2_file_inode(wc->w_handle, inode);
@@ -1431,7 +960,7 @@ static int ocfs2_prepare_page_for_write(struct inode *inode, u64 *p_blkno,
(page_offset(page) <= user_pos));
if (page == wc->w_target_page) {
- map_from = user_pos & (PAGE_CACHE_SIZE - 1);
+ map_from = user_pos & (PAGE_SIZE - 1);
map_to = map_from + user_len;
if (new)
@@ -1505,7 +1034,7 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
struct inode *inode = mapping->host;
loff_t last_byte;
- target_index = user_pos >> PAGE_CACHE_SHIFT;
+ target_index = user_pos >> PAGE_SHIFT;
/*
* Figure out how many pages we'll be manipulating here. For
@@ -1524,18 +1053,20 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
*/
last_byte = max(user_pos + user_len, i_size_read(inode));
BUG_ON(last_byte < 1);
- end_index = ((last_byte - 1) >> PAGE_CACHE_SHIFT) + 1;
+ end_index = ((last_byte - 1) >> PAGE_SHIFT) + 1;
if ((start + wc->w_num_pages) > end_index)
wc->w_num_pages = end_index - start;
} else {
wc->w_num_pages = 1;
start = target_index;
}
+ end_index = (user_pos + user_len - 1) >> PAGE_SHIFT;
for(i = 0; i < wc->w_num_pages; i++) {
index = start + i;
- if (index == target_index && mmap_page) {
+ if (index >= target_index && index <= end_index &&
+ wc->w_type == OCFS2_WRITE_MMAP) {
/*
* ocfs2_pagemkwrite() is a little different
* and wants us to directly use the page
@@ -1551,9 +1082,14 @@ static int ocfs2_grab_pages_for_write(struct address_space *mapping,
goto out;
}
- page_cache_get(mmap_page);
+ get_page(mmap_page);
wc->w_pages[i] = mmap_page;
wc->w_target_locked = true;
+ } else if (index >= target_index && index <= end_index &&
+ wc->w_type == OCFS2_WRITE_DIRECT) {
+ /* Direct write has no mapping page. */
+ wc->w_pages[i] = NULL;
+ continue;
} else {
wc->w_pages[i] = find_or_create_page(mapping, index,
GFP_NOFS);
@@ -1578,19 +1114,20 @@ out:
* Prepare a single cluster for write one cluster into the file.
*/
static int ocfs2_write_cluster(struct address_space *mapping,
- u32 phys, unsigned int unwritten,
+ u32 *phys, unsigned int new,
+ unsigned int clear_unwritten,
unsigned int should_zero,
struct ocfs2_alloc_context *data_ac,
struct ocfs2_alloc_context *meta_ac,
struct ocfs2_write_ctxt *wc, u32 cpos,
loff_t user_pos, unsigned user_len)
{
- int ret, i, new;
- u64 v_blkno, p_blkno;
+ int ret, i;
+ u64 p_blkno;
struct inode *inode = mapping->host;
struct ocfs2_extent_tree et;
+ int bpc = ocfs2_clusters_to_blocks(inode->i_sb, 1);
- new = phys == 0 ? 1 : 0;
if (new) {
u32 tmp_pos;
@@ -1600,9 +1137,9 @@ static int ocfs2_write_cluster(struct address_space *mapping,
*/
tmp_pos = cpos;
ret = ocfs2_add_inode_data(OCFS2_SB(inode->i_sb), inode,
- &tmp_pos, 1, 0, wc->w_di_bh,
- wc->w_handle, data_ac,
- meta_ac, NULL);
+ &tmp_pos, 1, !clear_unwritten,
+ wc->w_di_bh, wc->w_handle,
+ data_ac, meta_ac, NULL);
/*
* This shouldn't happen because we must have already
* calculated the correct meta data allocation required. The
@@ -1619,11 +1156,11 @@ static int ocfs2_write_cluster(struct address_space *mapping,
mlog_errno(ret);
goto out;
}
- } else if (unwritten) {
+ } else if (clear_unwritten) {
ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode),
wc->w_di_bh);
ret = ocfs2_mark_extent_written(inode, &et,
- wc->w_handle, cpos, 1, phys,
+ wc->w_handle, cpos, 1, *phys,
meta_ac, &wc->w_dealloc);
if (ret < 0) {
mlog_errno(ret);
@@ -1631,30 +1168,33 @@ static int ocfs2_write_cluster(struct address_space *mapping,
}
}
- if (should_zero)
- v_blkno = ocfs2_clusters_to_blocks(inode->i_sb, cpos);
- else
- v_blkno = user_pos >> inode->i_sb->s_blocksize_bits;
-
/*
* The only reason this should fail is due to an inability to
* find the extent added.
*/
- ret = ocfs2_extent_map_get_blocks(inode, v_blkno, &p_blkno, NULL,
- NULL);
+ ret = ocfs2_get_clusters(inode, cpos, phys, NULL, NULL);
if (ret < 0) {
mlog(ML_ERROR, "Get physical blkno failed for inode %llu, "
- "at logical block %llu",
- (unsigned long long)OCFS2_I(inode)->ip_blkno,
- (unsigned long long)v_blkno);
+ "at logical cluster %u",
+ (unsigned long long)OCFS2_I(inode)->ip_blkno, cpos);
goto out;
}
- BUG_ON(p_blkno == 0);
+ BUG_ON(*phys == 0);
+
+ p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, *phys);
+ if (!should_zero)
+ p_blkno += (user_pos >> inode->i_sb->s_blocksize_bits) & (u64)(bpc - 1);
for(i = 0; i < wc->w_num_pages; i++) {
int tmpret;
+ /* This is the direct io target page. */
+ if (wc->w_pages[i] == NULL) {
+ p_blkno++;
+ continue;
+ }
+
tmpret = ocfs2_prepare_page_for_write(inode, &p_blkno, wc,
wc->w_pages[i], cpos,
user_pos, user_len,
@@ -1701,8 +1241,9 @@ static int ocfs2_write_cluster_by_desc(struct address_space *mapping,
if ((cluster_off + local_len) > osb->s_clustersize)
local_len = osb->s_clustersize - cluster_off;
- ret = ocfs2_write_cluster(mapping, desc->c_phys,
- desc->c_unwritten,
+ ret = ocfs2_write_cluster(mapping, &desc->c_phys,
+ desc->c_new,
+ desc->c_clear_unwritten,
desc->c_needs_zero,
data_ac, meta_ac,
wc, desc->c_cpos, pos, local_len);
@@ -1731,7 +1272,7 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
{
struct ocfs2_write_cluster_desc *desc;
- wc->w_target_from = pos & (PAGE_CACHE_SIZE - 1);
+ wc->w_target_from = pos & (PAGE_SIZE - 1);
wc->w_target_to = wc->w_target_from + len;
if (alloc == 0)
@@ -1768,8 +1309,68 @@ static void ocfs2_set_target_boundaries(struct ocfs2_super *osb,
&wc->w_target_to);
} else {
wc->w_target_from = 0;
- wc->w_target_to = PAGE_CACHE_SIZE;
+ wc->w_target_to = PAGE_SIZE;
+ }
+}
+
+/*
+ * Check if this extent is marked UNWRITTEN by direct io. If so, we need not to
+ * do the zero work. And should not to clear UNWRITTEN since it will be cleared
+ * by the direct io procedure.
+ * If this is a new extent that allocated by direct io, we should mark it in
+ * the ip_unwritten_list.
+ */
+static int ocfs2_unwritten_check(struct inode *inode,
+ struct ocfs2_write_ctxt *wc,
+ struct ocfs2_write_cluster_desc *desc)
+{
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_unwritten_extent *ue = NULL, *new = NULL;
+ int ret = 0;
+
+ if (!desc->c_needs_zero)
+ return 0;
+
+retry:
+ spin_lock(&oi->ip_lock);
+ /* Needs not to zero no metter buffer or direct. The one who is zero
+ * the cluster is doing zero. And he will clear unwritten after all
+ * cluster io finished. */
+ list_for_each_entry(ue, &oi->ip_unwritten_list, ue_ip_node) {
+ if (desc->c_cpos == ue->ue_cpos) {
+ BUG_ON(desc->c_new);
+ desc->c_needs_zero = 0;
+ desc->c_clear_unwritten = 0;
+ goto unlock;
+ }
}
+
+ if (wc->w_type != OCFS2_WRITE_DIRECT)
+ goto unlock;
+
+ if (new == NULL) {
+ spin_unlock(&oi->ip_lock);
+ new = kmalloc(sizeof(struct ocfs2_unwritten_extent),
+ GFP_NOFS);
+ if (new == NULL) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ goto retry;
+ }
+ /* This direct write will doing zero. */
+ new->ue_cpos = desc->c_cpos;
+ new->ue_phys = desc->c_phys;
+ desc->c_clear_unwritten = 0;
+ list_add_tail(&new->ue_ip_node, &oi->ip_unwritten_list);
+ list_add_tail(&new->ue_node, &wc->w_unwritten_list);
+ new = NULL;
+unlock:
+ spin_unlock(&oi->ip_lock);
+out:
+ if (new)
+ kfree(new);
+ return ret;
}
/*
@@ -1847,14 +1448,21 @@ static int ocfs2_populate_write_desc(struct inode *inode,
if (phys == 0) {
desc->c_new = 1;
desc->c_needs_zero = 1;
+ desc->c_clear_unwritten = 1;
*clusters_to_alloc = *clusters_to_alloc + 1;
}
if (ext_flags & OCFS2_EXT_UNWRITTEN) {
- desc->c_unwritten = 1;
+ desc->c_clear_unwritten = 1;
desc->c_needs_zero = 1;
}
+ ret = ocfs2_unwritten_check(inode, wc, desc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
num_clusters--;
}
@@ -2017,8 +1625,10 @@ static int ocfs2_expand_nonsparse_inode(struct inode *inode,
if (ret)
mlog_errno(ret);
- wc->w_first_new_cpos =
- ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
+ /* There is no wc if this is call from direct. */
+ if (wc)
+ wc->w_first_new_cpos =
+ ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode));
return ret;
}
@@ -2072,9 +1682,8 @@ out:
return ret;
}
-int ocfs2_write_begin_nolock(struct file *filp,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
+int ocfs2_write_begin_nolock(struct address_space *mapping,
+ loff_t pos, unsigned len, ocfs2_write_type_t type,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page)
{
@@ -2091,7 +1700,7 @@ int ocfs2_write_begin_nolock(struct file *filp,
int try_free = 1, ret1;
try_again:
- ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, di_bh);
+ ret = ocfs2_alloc_write_ctxt(&wc, osb, pos, len, type, di_bh);
if (ret) {
mlog_errno(ret);
return ret;
@@ -2110,14 +1719,17 @@ try_again:
}
}
- if (ocfs2_sparse_alloc(osb))
- ret = ocfs2_zero_tail(inode, di_bh, pos);
- else
- ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos, len,
- wc);
- if (ret) {
- mlog_errno(ret);
- goto out;
+ /* Direct io change i_size late, should not zero tail here. */
+ if (type != OCFS2_WRITE_DIRECT) {
+ if (ocfs2_sparse_alloc(osb))
+ ret = ocfs2_zero_tail(inode, di_bh, pos);
+ else
+ ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos,
+ len, wc);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
}
ret = ocfs2_check_range_for_refcount(inode, pos, len);
@@ -2148,7 +1760,7 @@ try_again:
(unsigned long long)OCFS2_I(inode)->ip_blkno,
(long long)i_size_read(inode),
le32_to_cpu(di->i_clusters),
- pos, len, flags, mmap_page,
+ pos, len, type, mmap_page,
clusters_to_alloc, extents_to_split);
/*
@@ -2178,17 +1790,17 @@ try_again:
credits = ocfs2_calc_extend_credits(inode->i_sb,
&di->id2.i_list);
-
- }
+ } else if (type == OCFS2_WRITE_DIRECT)
+ /* direct write needs not to start trans if no extents alloc. */
+ goto success;
/*
* We have to zero sparse allocated clusters, unwritten extent clusters,
* and non-sparse clusters we just extended. For non-sparse writes,
* we know zeros will only be needed in the first and/or last cluster.
*/
- if (clusters_to_alloc || extents_to_split ||
- (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
- wc->w_desc[wc->w_clen - 1].c_needs_zero)))
+ if (wc->w_clen && (wc->w_desc[0].c_needs_zero ||
+ wc->w_desc[wc->w_clen - 1].c_needs_zero))
cluster_of_pages = 1;
else
cluster_of_pages = 0;
@@ -2255,7 +1867,8 @@ try_again:
ocfs2_free_alloc_context(meta_ac);
success:
- *pagep = wc->w_target_page;
+ if (pagep)
+ *pagep = wc->w_target_page;
*fsdata = wc;
return 0;
out_quota:
@@ -2266,7 +1879,7 @@ out_commit:
ocfs2_commit_trans(osb, handle);
out:
- ocfs2_free_write_ctxt(wc);
+ ocfs2_free_write_ctxt(inode, wc);
if (data_ac) {
ocfs2_free_alloc_context(data_ac);
@@ -2318,8 +1931,8 @@ static int ocfs2_write_begin(struct file *file, struct address_space *mapping,
*/
down_write(&OCFS2_I(inode)->ip_alloc_sem);
- ret = ocfs2_write_begin_nolock(file, mapping, pos, len, flags, pagep,
- fsdata, di_bh, NULL);
+ ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_BUFFER,
+ pagep, fsdata, di_bh, NULL);
if (ret) {
mlog_errno(ret);
goto out_fail;
@@ -2368,7 +1981,7 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
struct page *page, void *fsdata)
{
int i, ret;
- unsigned from, to, start = pos & (PAGE_CACHE_SIZE - 1);
+ unsigned from, to, start = pos & (PAGE_SIZE - 1);
struct inode *inode = mapping->host;
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
struct ocfs2_write_ctxt *wc = fsdata;
@@ -2376,12 +1989,16 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
handle_t *handle = wc->w_handle;
struct page *tmppage;
- ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), wc->w_di_bh,
- OCFS2_JOURNAL_ACCESS_WRITE);
- if (ret) {
- copied = ret;
- mlog_errno(ret);
- goto out;
+ BUG_ON(!list_empty(&wc->w_unwritten_list));
+
+ if (handle) {
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode),
+ wc->w_di_bh, OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ copied = ret;
+ mlog_errno(ret);
+ goto out;
+ }
}
if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
@@ -2389,24 +2006,29 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
goto out_write_size;
}
- if (unlikely(copied < len)) {
+ if (unlikely(copied < len) && wc->w_target_page) {
if (!PageUptodate(wc->w_target_page))
copied = 0;
ocfs2_zero_new_buffers(wc->w_target_page, start+copied,
start+len);
}
- flush_dcache_page(wc->w_target_page);
+ if (wc->w_target_page)
+ flush_dcache_page(wc->w_target_page);
for(i = 0; i < wc->w_num_pages; i++) {
tmppage = wc->w_pages[i];
+ /* This is the direct io target page. */
+ if (tmppage == NULL)
+ continue;
+
if (tmppage == wc->w_target_page) {
from = wc->w_target_from;
to = wc->w_target_to;
- BUG_ON(from > PAGE_CACHE_SIZE ||
- to > PAGE_CACHE_SIZE ||
+ BUG_ON(from > PAGE_SIZE ||
+ to > PAGE_SIZE ||
to < from);
} else {
/*
@@ -2415,29 +2037,33 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
* to flush their entire range.
*/
from = 0;
- to = PAGE_CACHE_SIZE;
+ to = PAGE_SIZE;
}
if (page_has_buffers(tmppage)) {
- if (ocfs2_should_order_data(inode))
- ocfs2_jbd2_file_inode(wc->w_handle, inode);
+ if (handle && ocfs2_should_order_data(inode))
+ ocfs2_jbd2_file_inode(handle, inode);
block_commit_write(tmppage, from, to);
}
}
out_write_size:
- pos += copied;
- if (pos > i_size_read(inode)) {
- i_size_write(inode, pos);
- mark_inode_dirty(inode);
- }
- inode->i_blocks = ocfs2_inode_sector_count(inode);
- di->i_size = cpu_to_le64((u64)i_size_read(inode));
- inode->i_mtime = inode->i_ctime = CURRENT_TIME;
- di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
- di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
- ocfs2_update_inode_fsync_trans(handle, inode, 1);
- ocfs2_journal_dirty(handle, wc->w_di_bh);
+ /* Direct io do not update i_size here. */
+ if (wc->w_type != OCFS2_WRITE_DIRECT) {
+ pos += copied;
+ if (pos > i_size_read(inode)) {
+ i_size_write(inode, pos);
+ mark_inode_dirty(inode);
+ }
+ inode->i_blocks = ocfs2_inode_sector_count(inode);
+ di->i_size = cpu_to_le64((u64)i_size_read(inode));
+ inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ di->i_mtime = di->i_ctime = cpu_to_le64(inode->i_mtime.tv_sec);
+ di->i_mtime_nsec = di->i_ctime_nsec = cpu_to_le32(inode->i_mtime.tv_nsec);
+ ocfs2_update_inode_fsync_trans(handle, inode, 1);
+ }
+ if (handle)
+ ocfs2_journal_dirty(handle, wc->w_di_bh);
out:
/* unlock pages before dealloc since it needs acquiring j_trans_barrier
@@ -2447,7 +2073,8 @@ out:
*/
ocfs2_unlock_pages(wc);
- ocfs2_commit_trans(osb, handle);
+ if (handle)
+ ocfs2_commit_trans(osb, handle);
ocfs2_run_deallocs(osb, &wc->w_dealloc);
@@ -2472,6 +2099,360 @@ static int ocfs2_write_end(struct file *file, struct address_space *mapping,
return ret;
}
+struct ocfs2_dio_write_ctxt {
+ struct list_head dw_zero_list;
+ unsigned dw_zero_count;
+ int dw_orphaned;
+ pid_t dw_writer_pid;
+};
+
+static struct ocfs2_dio_write_ctxt *
+ocfs2_dio_alloc_write_ctx(struct buffer_head *bh, int *alloc)
+{
+ struct ocfs2_dio_write_ctxt *dwc = NULL;
+
+ if (bh->b_private)
+ return bh->b_private;
+
+ dwc = kmalloc(sizeof(struct ocfs2_dio_write_ctxt), GFP_NOFS);
+ if (dwc == NULL)
+ return NULL;
+ INIT_LIST_HEAD(&dwc->dw_zero_list);
+ dwc->dw_zero_count = 0;
+ dwc->dw_orphaned = 0;
+ dwc->dw_writer_pid = task_pid_nr(current);
+ bh->b_private = dwc;
+ *alloc = 1;
+
+ return dwc;
+}
+
+static void ocfs2_dio_free_write_ctx(struct inode *inode,
+ struct ocfs2_dio_write_ctxt *dwc)
+{
+ ocfs2_free_unwritten_list(inode, &dwc->dw_zero_list);
+ kfree(dwc);
+}
+
+/*
+ * TODO: Make this into a generic get_blocks function.
+ *
+ * From do_direct_io in direct-io.c:
+ * "So what we do is to permit the ->get_blocks function to populate
+ * bh.b_size with the size of IO which is permitted at this offset and
+ * this i_blkbits."
+ *
+ * This function is called directly from get_more_blocks in direct-io.c.
+ *
+ * called like this: dio->get_blocks(dio->inode, fs_startblk,
+ * fs_count, map_bh, dio->rw == WRITE);
+ */
+static int ocfs2_dio_get_block(struct inode *inode, sector_t iblock,
+ struct buffer_head *bh_result, int create)
+{
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_write_ctxt *wc;
+ struct ocfs2_write_cluster_desc *desc = NULL;
+ struct ocfs2_dio_write_ctxt *dwc = NULL;
+ struct buffer_head *di_bh = NULL;
+ u64 p_blkno;
+ loff_t pos = iblock << inode->i_sb->s_blocksize_bits;
+ unsigned len, total_len = bh_result->b_size;
+ int ret = 0, first_get_block = 0;
+
+ len = osb->s_clustersize - (pos & (osb->s_clustersize - 1));
+ len = min(total_len, len);
+
+ mlog(0, "get block of %lu at %llu:%u req %u\n",
+ inode->i_ino, pos, len, total_len);
+
+ /*
+ * Because we need to change file size in ocfs2_dio_end_io_write(), or
+ * we may need to add it to orphan dir. So can not fall to fast path
+ * while file size will be changed.
+ */
+ if (pos + total_len <= i_size_read(inode)) {
+ down_read(&oi->ip_alloc_sem);
+ /* This is the fast path for re-write. */
+ ret = ocfs2_get_block(inode, iblock, bh_result, create);
+
+ up_read(&oi->ip_alloc_sem);
+
+ if (buffer_mapped(bh_result) &&
+ !buffer_new(bh_result) &&
+ ret == 0)
+ goto out;
+
+ /* Clear state set by ocfs2_get_block. */
+ bh_result->b_state = 0;
+ }
+
+ dwc = ocfs2_dio_alloc_write_ctx(bh_result, &first_get_block);
+ if (unlikely(dwc == NULL)) {
+ ret = -ENOMEM;
+ mlog_errno(ret);
+ goto out;
+ }
+
+ if (ocfs2_clusters_for_bytes(inode->i_sb, pos + total_len) >
+ ocfs2_clusters_for_bytes(inode->i_sb, i_size_read(inode)) &&
+ !dwc->dw_orphaned) {
+ /*
+ * when we are going to alloc extents beyond file size, add the
+ * inode to orphan dir, so we can recall those spaces when
+ * system crashed during write.
+ */
+ ret = ocfs2_add_inode_to_orphan(osb, inode);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+ dwc->dw_orphaned = 1;
+ }
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ down_write(&oi->ip_alloc_sem);
+
+ if (first_get_block) {
+ if (ocfs2_sparse_alloc(OCFS2_SB(inode->i_sb)))
+ ret = ocfs2_zero_tail(inode, di_bh, pos);
+ else
+ ret = ocfs2_expand_nonsparse_inode(inode, di_bh, pos,
+ total_len, NULL);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto unlock;
+ }
+ }
+
+ ret = ocfs2_write_begin_nolock(inode->i_mapping, pos, len,
+ OCFS2_WRITE_DIRECT, NULL,
+ (void **)&wc, di_bh, NULL);
+ if (ret) {
+ mlog_errno(ret);
+ goto unlock;
+ }
+
+ desc = &wc->w_desc[0];
+
+ p_blkno = ocfs2_clusters_to_blocks(inode->i_sb, desc->c_phys);
+ BUG_ON(p_blkno == 0);
+ p_blkno += iblock & (u64)(ocfs2_clusters_to_blocks(inode->i_sb, 1) - 1);
+
+ map_bh(bh_result, inode->i_sb, p_blkno);
+ bh_result->b_size = len;
+ if (desc->c_needs_zero)
+ set_buffer_new(bh_result);
+
+ /* May sleep in end_io. It should not happen in a irq context. So defer
+ * it to dio work queue. */
+ set_buffer_defer_completion(bh_result);
+
+ if (!list_empty(&wc->w_unwritten_list)) {
+ struct ocfs2_unwritten_extent *ue = NULL;
+
+ ue = list_first_entry(&wc->w_unwritten_list,
+ struct ocfs2_unwritten_extent,
+ ue_node);
+ BUG_ON(ue->ue_cpos != desc->c_cpos);
+ /* The physical address may be 0, fill it. */
+ ue->ue_phys = desc->c_phys;
+
+ list_splice_tail_init(&wc->w_unwritten_list, &dwc->dw_zero_list);
+ dwc->dw_zero_count++;
+ }
+
+ ret = ocfs2_write_end_nolock(inode->i_mapping, pos, len, len, NULL, wc);
+ BUG_ON(ret != len);
+ ret = 0;
+unlock:
+ up_write(&oi->ip_alloc_sem);
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+out:
+ if (ret < 0)
+ ret = -EIO;
+ return ret;
+}
+
+static void ocfs2_dio_end_io_write(struct inode *inode,
+ struct ocfs2_dio_write_ctxt *dwc,
+ loff_t offset,
+ ssize_t bytes)
+{
+ struct ocfs2_cached_dealloc_ctxt dealloc;
+ struct ocfs2_extent_tree et;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ struct ocfs2_inode_info *oi = OCFS2_I(inode);
+ struct ocfs2_unwritten_extent *ue = NULL;
+ struct buffer_head *di_bh = NULL;
+ struct ocfs2_dinode *di;
+ struct ocfs2_alloc_context *data_ac = NULL;
+ struct ocfs2_alloc_context *meta_ac = NULL;
+ handle_t *handle = NULL;
+ loff_t end = offset + bytes;
+ int ret = 0, credits = 0, locked = 0;
+
+ ocfs2_init_dealloc_ctxt(&dealloc);
+
+ /* We do clear unwritten, delete orphan, change i_size here. If neither
+ * of these happen, we can skip all this. */
+ if (list_empty(&dwc->dw_zero_list) &&
+ end <= i_size_read(inode) &&
+ !dwc->dw_orphaned)
+ goto out;
+
+ /* ocfs2_file_write_iter will get i_mutex, so we need not lock if we
+ * are in that context. */
+ if (dwc->dw_writer_pid != task_pid_nr(current)) {
+ mutex_lock(&inode->i_mutex);
+ locked = 1;
+ }
+
+ ret = ocfs2_inode_lock(inode, &di_bh, 1);
+ if (ret < 0) {
+ mlog_errno(ret);
+ goto out;
+ }
+
+ down_write(&oi->ip_alloc_sem);
+
+ /* Delete orphan before acquire i_mutex. */
+ if (dwc->dw_orphaned) {
+ BUG_ON(dwc->dw_writer_pid != task_pid_nr(current));
+
+ end = end > i_size_read(inode) ? end : 0;
+
+ ret = ocfs2_del_inode_from_orphan(osb, inode, di_bh,
+ !!end, end);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+
+ di = (struct ocfs2_dinode *)di_bh;
+
+ ocfs2_init_dinode_extent_tree(&et, INODE_CACHE(inode), di_bh);
+
+ ret = ocfs2_lock_allocators(inode, &et, 0, dwc->dw_zero_count*2,
+ &data_ac, &meta_ac);
+ if (ret) {
+ mlog_errno(ret);
+ goto unlock;
+ }
+
+ credits = ocfs2_calc_extend_credits(inode->i_sb, &di->id2.i_list);
+
+ handle = ocfs2_start_trans(osb, credits);
+ if (IS_ERR(handle)) {
+ ret = PTR_ERR(handle);
+ mlog_errno(ret);
+ goto unlock;
+ }
+ ret = ocfs2_journal_access_di(handle, INODE_CACHE(inode), di_bh,
+ OCFS2_JOURNAL_ACCESS_WRITE);
+ if (ret) {
+ mlog_errno(ret);
+ goto commit;
+ }
+
+ list_for_each_entry(ue, &dwc->dw_zero_list, ue_node) {
+ ret = ocfs2_mark_extent_written(inode, &et, handle,
+ ue->ue_cpos, 1,
+ ue->ue_phys,
+ meta_ac, &dealloc);
+ if (ret < 0) {
+ mlog_errno(ret);
+ break;
+ }
+ }
+
+ if (end > i_size_read(inode)) {
+ ret = ocfs2_set_inode_size(handle, inode, di_bh, end);
+ if (ret < 0)
+ mlog_errno(ret);
+ }
+commit:
+ ocfs2_commit_trans(osb, handle);
+unlock:
+ up_write(&oi->ip_alloc_sem);
+ ocfs2_inode_unlock(inode, 1);
+ brelse(di_bh);
+out:
+ if (data_ac)
+ ocfs2_free_alloc_context(data_ac);
+ if (meta_ac)
+ ocfs2_free_alloc_context(meta_ac);
+ ocfs2_run_deallocs(osb, &dealloc);
+ if (locked)
+ mutex_unlock(&inode->i_mutex);
+ ocfs2_dio_free_write_ctx(inode, dwc);
+}
+
+/*
+ * ocfs2_dio_end_io is called by the dio core when a dio is finished. We're
+ * particularly interested in the aio/dio case. We use the rw_lock DLM lock
+ * to protect io on one node from truncation on another.
+ */
+static int ocfs2_dio_end_io(struct kiocb *iocb,
+ loff_t offset,
+ ssize_t bytes,
+ void *private)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ int level;
+
+ if (bytes <= 0)
+ return 0;
+
+ /* this io's submitter should not have unlocked this before we could */
+ BUG_ON(!ocfs2_iocb_is_rw_locked(iocb));
+
+ if (private)
+ ocfs2_dio_end_io_write(inode, private, offset, bytes);
+
+ ocfs2_iocb_clear_rw_locked(iocb);
+
+ level = ocfs2_iocb_rw_locked_level(iocb);
+ ocfs2_rw_unlock(inode, level);
+ return 0;
+}
+
+static ssize_t ocfs2_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
+ loff_t offset)
+{
+ struct file *file = iocb->ki_filp;
+ struct inode *inode = file_inode(file)->i_mapping->host;
+ struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
+ loff_t end = offset + iter->count;
+ get_block_t *get_block;
+
+ /*
+ * Fallback to buffered I/O if we see an inode without
+ * extents.
+ */
+ if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL)
+ return 0;
+
+ /* Fallback to buffered I/O if we do not support append dio. */
+ if (end > i_size_read(inode) && !ocfs2_supports_append_dio(osb))
+ return 0;
+
+ if (iov_iter_rw(iter) == READ)
+ get_block = ocfs2_get_block;
+ else
+ get_block = ocfs2_dio_get_block;
+
+ return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev,
+ iter, offset, get_block,
+ ocfs2_dio_end_io, NULL, 0);
+}
+
const struct address_space_operations ocfs2_aops = {
.readpage = ocfs2_readpage,
.readpages = ocfs2_readpages,
diff --git a/fs/ocfs2/aops.h b/fs/ocfs2/aops.h
index 24e496d6b..b1c9f28a5 100644
--- a/fs/ocfs2/aops.h
+++ b/fs/ocfs2/aops.h
@@ -47,9 +47,14 @@ int ocfs2_write_end_nolock(struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
struct page *page, void *fsdata);
-int ocfs2_write_begin_nolock(struct file *filp,
- struct address_space *mapping,
- loff_t pos, unsigned len, unsigned flags,
+typedef enum {
+ OCFS2_WRITE_BUFFER = 0,
+ OCFS2_WRITE_DIRECT,
+ OCFS2_WRITE_MMAP,
+} ocfs2_write_type_t;
+
+int ocfs2_write_begin_nolock(struct address_space *mapping,
+ loff_t pos, unsigned len, ocfs2_write_type_t type,
struct page **pagep, void **fsdata,
struct buffer_head *di_bh, struct page *mmap_page);
@@ -79,7 +84,6 @@ static inline void ocfs2_iocb_set_rw_locked(struct kiocb *iocb, int level)
enum ocfs2_iocb_lock_bits {
OCFS2_IOCB_RW_LOCK = 0,
OCFS2_IOCB_RW_LOCK_LEVEL,
- OCFS2_IOCB_UNALIGNED_IO,
OCFS2_IOCB_NUM_LOCKS
};
@@ -88,11 +92,4 @@ enum ocfs2_iocb_lock_bits {
#define ocfs2_iocb_rw_locked_level(iocb) \
test_bit(OCFS2_IOCB_RW_LOCK_LEVEL, (unsigned long *)&iocb->private)
-#define ocfs2_iocb_set_unaligned_aio(iocb) \
- set_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
-#define ocfs2_iocb_clear_unaligned_aio(iocb) \
- clear_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
-#define ocfs2_iocb_is_unaligned_aio(iocb) \
- test_bit(OCFS2_IOCB_UNALIGNED_IO, (unsigned long *)&iocb->private)
-
#endif /* OCFS2_FILE_H */
diff --git a/fs/ocfs2/cluster/heartbeat.c b/fs/ocfs2/cluster/heartbeat.c
index a2370e2c7..1934abb6b 100644
--- a/fs/ocfs2/cluster/heartbeat.c
+++ b/fs/ocfs2/cluster/heartbeat.c
@@ -287,7 +287,6 @@ struct o2hb_bio_wait_ctxt {
static void o2hb_write_timeout(struct work_struct *work)
{
int failed, quorum;
- unsigned long flags;
struct o2hb_region *reg =
container_of(work, struct o2hb_region,
hr_write_timeout_work.work);
@@ -297,14 +296,14 @@ static void o2hb_write_timeout(struct work_struct *work)
jiffies_to_msecs(jiffies - reg->hr_last_timeout_start));
if (o2hb_global_heartbeat_active()) {
- spin_lock_irqsave(&o2hb_live_lock, flags);
+ spin_lock(&o2hb_live_lock);
if (test_bit(reg->hr_region_num, o2hb_quorum_region_bitmap))
set_bit(reg->hr_region_num, o2hb_failed_region_bitmap);
failed = bitmap_weight(o2hb_failed_region_bitmap,
O2NM_MAX_REGIONS);
quorum = bitmap_weight(o2hb_quorum_region_bitmap,
O2NM_MAX_REGIONS);
- spin_unlock_irqrestore(&o2hb_live_lock, flags);
+ spin_unlock(&o2hb_live_lock);
mlog(ML_HEARTBEAT, "Number of regions %d, failed regions %d\n",
quorum, failed);
@@ -418,13 +417,13 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
bio->bi_private = wc;
bio->bi_end_io = o2hb_bio_end_io;
- vec_start = (cs << bits) % PAGE_CACHE_SIZE;
+ vec_start = (cs << bits) % PAGE_SIZE;
while(cs < max_slots) {
current_page = cs / spp;
page = reg->hr_slot_data[current_page];
- vec_len = min(PAGE_CACHE_SIZE - vec_start,
- (max_slots-cs) * (PAGE_CACHE_SIZE/spp) );
+ vec_len = min(PAGE_SIZE - vec_start,
+ (max_slots-cs) * (PAGE_SIZE/spp) );
mlog(ML_HB_BIO, "page %d, vec_len = %u, vec_start = %u\n",
current_page, vec_len, vec_start);
@@ -432,7 +431,7 @@ static struct bio *o2hb_setup_one_bio(struct o2hb_region *reg,
len = bio_add_page(bio, page, vec_len, vec_start);
if (len != vec_len) break;
- cs += vec_len / (PAGE_CACHE_SIZE/spp);
+ cs += vec_len / (PAGE_SIZE/spp);
vec_start = 0;
}
@@ -1577,7 +1576,7 @@ static ssize_t o2hb_region_dev_show(struct config_item *item, char *page)
static void o2hb_init_region_params(struct o2hb_region *reg)
{
- reg->hr_slots_per_page = PAGE_CACHE_SIZE >> reg->hr_block_bits;
+ reg->hr_slots_per_page = PAGE_SIZE >> reg->hr_block_bits;
reg->hr_timeout_ms = O2HB_REGION_TIMEOUT_MS;
mlog(ML_HEARTBEAT, "hr_start_block = %llu, hr_blocks = %u\n",
@@ -2425,11 +2424,10 @@ EXPORT_SYMBOL_GPL(o2hb_check_node_heartbeating);
int o2hb_check_node_heartbeating_no_sem(u8 node_num)
{
unsigned long testing_map[BITS_TO_LONGS(O2NM_MAX_NODES)];
- unsigned long flags;
- spin_lock_irqsave(&o2hb_live_lock, flags);
+ spin_lock(&o2hb_live_lock);
o2hb_fill_node_map_from_callback(testing_map, sizeof(testing_map));
- spin_unlock_irqrestore(&o2hb_live_lock, flags);
+ spin_unlock(&o2hb_live_lock);
if (!test_bit(node_num, testing_map)) {
mlog(ML_HEARTBEAT,
"node (%u) does not have heartbeating enabled.\n",
diff --git a/fs/ocfs2/cluster/nodemanager.c b/fs/ocfs2/cluster/nodemanager.c
index ebe543894..b17d180bd 100644
--- a/fs/ocfs2/cluster/nodemanager.c
+++ b/fs/ocfs2/cluster/nodemanager.c
@@ -630,7 +630,6 @@ static void o2nm_cluster_release(struct config_item *item)
{
struct o2nm_cluster *cluster = to_o2nm_cluster(item);
- kfree(cluster->cl_group.default_groups);
kfree(cluster);
}
@@ -666,7 +665,6 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
struct o2nm_cluster *cluster = NULL;
struct o2nm_node_group *ns = NULL;
struct config_group *o2hb_group = NULL, *ret = NULL;
- void *defs = NULL;
/* this runs under the parent dir's i_mutex; there can be only
* one caller in here at a time */
@@ -675,20 +673,18 @@ static struct config_group *o2nm_cluster_group_make_group(struct config_group *g
cluster = kzalloc(sizeof(struct o2nm_cluster), GFP_KERNEL);
ns = kzalloc(sizeof(struct o2nm_node_group), GFP_KERNEL);
- defs = kcalloc(3, sizeof(struct config_group *), GFP_KERNEL);
o2hb_group = o2hb_alloc_hb_set();
- if (cluster == NULL || ns == NULL || o2hb_group == NULL || defs == NULL)
+ if (cluster == NULL || ns == NULL || o2hb_group == NULL)
goto out;
config_group_init_type_name(&cluster->cl_group, name,
&o2nm_cluster_type);
+ configfs_add_default_group(&ns->ns_group, &cluster->cl_group);
+
config_group_init_type_name(&ns->ns_group, "node",
&o2nm_node_group_type);
+ configfs_add_default_group(o2hb_group, &cluster->cl_group);
- cluster->cl_group.default_groups = defs;
- cluster->cl_group.default_groups[0] = &ns->ns_group;
- cluster->cl_group.default_groups[1] = o2hb_group;
- cluster->cl_group.default_groups[2] = NULL;
rwlock_init(&cluster->cl_nodes_lock);
cluster->cl_node_ip_tree = RB_ROOT;
cluster->cl_reconnect_delay_ms = O2NET_RECONNECT_DELAY_MS_DEFAULT;
@@ -704,7 +700,6 @@ out:
kfree(cluster);
kfree(ns);
o2hb_free_hb_set(o2hb_group);
- kfree(defs);
ret = ERR_PTR(-ENOMEM);
}
@@ -714,18 +709,11 @@ out:
static void o2nm_cluster_group_drop_item(struct config_group *group, struct config_item *item)
{
struct o2nm_cluster *cluster = to_o2nm_cluster(item);
- int i;
- struct config_item *killme;
BUG_ON(o2nm_single_cluster != cluster);
o2nm_single_cluster = NULL;
- for (i = 0; cluster->cl_group.default_groups[i]; i++) {
- killme = &cluster->cl_group.default_groups[i]->cg_item;
- cluster->cl_group.default_groups[i] = NULL;
- config_item_put(killme);
- }
-
+ configfs_remove_default_groups(&cluster->cl_group);
config_item_put(item);
}
diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h
index 68c607e63..004f2cbe8 100644
--- a/fs/ocfs2/dlm/dlmcommon.h
+++ b/fs/ocfs2/dlm/dlmcommon.h
@@ -282,6 +282,7 @@ static inline void __dlm_set_joining_node(struct dlm_ctxt *dlm,
#define DLM_LOCK_RES_DROPPING_REF 0x00000040
#define DLM_LOCK_RES_BLOCK_DIRTY 0x00001000
#define DLM_LOCK_RES_SETREF_INPROG 0x00002000
+#define DLM_LOCK_RES_RECOVERY_WAITING 0x00004000
/* max milliseconds to wait to sync up a network failure with a node death */
#define DLM_NODE_DEATH_WAIT_MAX (5 * 1000)
@@ -451,6 +452,7 @@ enum {
DLM_QUERY_REGION = 519,
DLM_QUERY_NODEINFO = 520,
DLM_BEGIN_EXIT_DOMAIN_MSG = 521,
+ DLM_DEREF_LOCKRES_DONE = 522,
};
struct dlm_reco_node_data
@@ -545,7 +547,7 @@ struct dlm_master_requery
* };
*
* from ../cluster/tcp.h
- * NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg))
+ * O2NET_MAX_PAYLOAD_BYTES (4096 - sizeof(net_msg))
* (roughly 4080 bytes)
* and sizeof(dlm_migratable_lockres) = 112 bytes
* and sizeof(dlm_migratable_lock) = 16 bytes
@@ -586,7 +588,7 @@ struct dlm_migratable_lockres
/* from above, 128 bytes
* for some undetermined future use */
-#define DLM_MIG_LOCKRES_RESERVED (NET_MAX_PAYLOAD_BYTES - \
+#define DLM_MIG_LOCKRES_RESERVED (O2NET_MAX_PAYLOAD_BYTES - \
DLM_MIG_LOCKRES_MAX_LEN)
struct dlm_create_lock
@@ -782,6 +784,20 @@ struct dlm_deref_lockres
u8 name[O2NM_MAX_NAME_LEN];
};
+enum {
+ DLM_DEREF_RESPONSE_DONE = 0,
+ DLM_DEREF_RESPONSE_INPROG = 1,
+};
+
+struct dlm_deref_lockres_done {
+ u32 pad1;
+ u16 pad2;
+ u8 node_idx;
+ u8 namelen;
+
+ u8 name[O2NM_MAX_NAME_LEN];
+};
+
static inline enum dlm_status
__dlm_lockres_state_to_status(struct dlm_lock_resource *res)
{
@@ -789,7 +805,8 @@ __dlm_lockres_state_to_status(struct dlm_lock_resource *res)
assert_spin_locked(&res->spinlock);
- if (res->state & DLM_LOCK_RES_RECOVERING)
+ if (res->state & (DLM_LOCK_RES_RECOVERING|
+ DLM_LOCK_RES_RECOVERY_WAITING))
status = DLM_RECOVERING;
else if (res->state & DLM_LOCK_RES_MIGRATING)
status = DLM_MIGRATING;
@@ -968,6 +985,8 @@ int dlm_assert_master_handler(struct o2net_msg *msg, u32 len, void *data,
void dlm_assert_master_post_handler(int status, void *data, void *ret_data);
int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
+int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data);
int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data,
void **ret_data);
int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
@@ -1009,6 +1028,7 @@ static inline void __dlm_wait_on_lockres(struct dlm_lock_resource *res)
{
__dlm_wait_on_lockres_flags(res, (DLM_LOCK_RES_IN_PROGRESS|
DLM_LOCK_RES_RECOVERING|
+ DLM_LOCK_RES_RECOVERY_WAITING|
DLM_LOCK_RES_MIGRATING));
}
diff --git a/fs/ocfs2/dlm/dlmconvert.c b/fs/ocfs2/dlm/dlmconvert.c
index f90931335..cdeafb4e7 100644
--- a/fs/ocfs2/dlm/dlmconvert.c
+++ b/fs/ocfs2/dlm/dlmconvert.c
@@ -212,6 +212,12 @@ grant:
if (lock->lksb->flags & DLM_LKSB_PUT_LVB)
memcpy(res->lvb, lock->lksb->lvb, DLM_LVB_LEN);
+ /*
+ * Move the lock to the tail because it may be the only lock which has
+ * an invalid lvb.
+ */
+ list_move_tail(&lock->list, &res->granted);
+
status = DLM_NORMAL;
*call_ast = 1;
goto unlock_exit;
diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c
index 2ee7fe747..12e064b8b 100644
--- a/fs/ocfs2/dlm/dlmdomain.c
+++ b/fs/ocfs2/dlm/dlmdomain.c
@@ -132,10 +132,13 @@ static DECLARE_WAIT_QUEUE_HEAD(dlm_domain_events);
* - Message DLM_QUERY_NODEINFO added to allow online node removes
* New in version 1.2:
* - Message DLM_BEGIN_EXIT_DOMAIN_MSG added to mark start of exit domain
+ * New in version 1.3:
+ * - Message DLM_DEREF_LOCKRES_DONE added to inform non-master that the
+ * refmap is cleared
*/
static const struct dlm_protocol_version dlm_protocol = {
.pv_major = 1,
- .pv_minor = 2,
+ .pv_minor = 3,
};
#define DLM_DOMAIN_BACKOFF_MS 200
@@ -1396,7 +1399,7 @@ static int dlm_send_join_cancels(struct dlm_ctxt *dlm,
unsigned int map_size)
{
int status, tmpstat;
- unsigned int node;
+ int node;
if (map_size != (BITS_TO_LONGS(O2NM_MAX_NODES) *
sizeof(unsigned long))) {
@@ -1853,7 +1856,13 @@ static int dlm_register_domain_handlers(struct dlm_ctxt *dlm)
sizeof(struct dlm_exit_domain),
dlm_begin_exit_domain_handler,
dlm, NULL, &dlm->dlm_domain_handlers);
+ if (status)
+ goto bail;
+ status = o2net_register_handler(DLM_DEREF_LOCKRES_DONE, dlm->key,
+ sizeof(struct dlm_deref_lockres_done),
+ dlm_deref_lockres_done_handler,
+ dlm, NULL, &dlm->dlm_domain_handlers);
bail:
if (status)
dlm_unregister_domain_handlers(dlm);
diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c
index 9477d6e1d..13719d3f3 100644
--- a/fs/ocfs2/dlm/dlmmaster.c
+++ b/fs/ocfs2/dlm/dlmmaster.c
@@ -2278,7 +2278,7 @@ int dlm_drop_lockres_ref(struct dlm_ctxt *dlm, struct dlm_lock_resource *res)
dlm_print_one_lock_resource(res);
BUG();
}
- return ret;
+ return ret ? ret : r;
}
int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
@@ -2345,7 +2345,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
res->lockname.len, res->lockname.name, node);
dlm_print_one_lock_resource(res);
}
- ret = 0;
+ ret = DLM_DEREF_RESPONSE_DONE;
goto done;
}
@@ -2365,7 +2365,7 @@ int dlm_deref_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
spin_unlock(&dlm->work_lock);
queue_work(dlm->dlm_worker, &dlm->dispatched_work);
- return 0;
+ return DLM_DEREF_RESPONSE_INPROG;
done:
if (res)
@@ -2375,6 +2375,124 @@ done:
return ret;
}
+int dlm_deref_lockres_done_handler(struct o2net_msg *msg, u32 len, void *data,
+ void **ret_data)
+{
+ struct dlm_ctxt *dlm = data;
+ struct dlm_deref_lockres_done *deref
+ = (struct dlm_deref_lockres_done *)msg->buf;
+ struct dlm_lock_resource *res = NULL;
+ char *name;
+ unsigned int namelen;
+ int ret = -EINVAL;
+ u8 node;
+ unsigned int hash;
+
+ if (!dlm_grab(dlm))
+ return 0;
+
+ name = deref->name;
+ namelen = deref->namelen;
+ node = deref->node_idx;
+
+ if (namelen > DLM_LOCKID_NAME_MAX) {
+ mlog(ML_ERROR, "Invalid name length!");
+ goto done;
+ }
+ if (deref->node_idx >= O2NM_MAX_NODES) {
+ mlog(ML_ERROR, "Invalid node number: %u\n", node);
+ goto done;
+ }
+
+ hash = dlm_lockid_hash(name, namelen);
+
+ spin_lock(&dlm->spinlock);
+ res = __dlm_lookup_lockres_full(dlm, name, namelen, hash);
+ if (!res) {
+ spin_unlock(&dlm->spinlock);
+ mlog(ML_ERROR, "%s:%.*s: bad lockres name\n",
+ dlm->name, namelen, name);
+ goto done;
+ }
+
+ spin_lock(&res->spinlock);
+ BUG_ON(!(res->state & DLM_LOCK_RES_DROPPING_REF));
+ if (!list_empty(&res->purge)) {
+ mlog(0, "%s: Removing res %.*s from purgelist\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ list_del_init(&res->purge);
+ dlm_lockres_put(res);
+ dlm->purge_count--;
+ }
+
+ if (!__dlm_lockres_unused(res)) {
+ mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ __dlm_print_one_lock_resource(res);
+ BUG();
+ }
+
+ __dlm_unhash_lockres(dlm, res);
+
+ spin_lock(&dlm->track_lock);
+ if (!list_empty(&res->tracking))
+ list_del_init(&res->tracking);
+ else {
+ mlog(ML_ERROR, "%s: Resource %.*s not on the Tracking list\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ __dlm_print_one_lock_resource(res);
+ }
+ spin_unlock(&dlm->track_lock);
+
+ /* lockres is not in the hash now. drop the flag and wake up
+ * any processes waiting in dlm_get_lock_resource.
+ */
+ res->state &= ~DLM_LOCK_RES_DROPPING_REF;
+ spin_unlock(&res->spinlock);
+ wake_up(&res->wq);
+
+ dlm_lockres_put(res);
+
+ spin_unlock(&dlm->spinlock);
+
+ ret = 0;
+
+done:
+ dlm_put(dlm);
+ return ret;
+}
+
+static void dlm_drop_lockres_ref_done(struct dlm_ctxt *dlm,
+ struct dlm_lock_resource *res, u8 node)
+{
+ struct dlm_deref_lockres_done deref;
+ int ret = 0, r;
+ const char *lockname;
+ unsigned int namelen;
+
+ lockname = res->lockname.name;
+ namelen = res->lockname.len;
+ BUG_ON(namelen > O2NM_MAX_NAME_LEN);
+
+ memset(&deref, 0, sizeof(deref));
+ deref.node_idx = dlm->node_num;
+ deref.namelen = namelen;
+ memcpy(deref.name, lockname, namelen);
+
+ ret = o2net_send_message(DLM_DEREF_LOCKRES_DONE, dlm->key,
+ &deref, sizeof(deref), node, &r);
+ if (ret < 0) {
+ mlog(ML_ERROR, "%s: res %.*s, error %d send DEREF DONE "
+ " to node %u\n", dlm->name, namelen,
+ lockname, ret, node);
+ } else if (r < 0) {
+ /* ignore the error */
+ mlog(ML_ERROR, "%s: res %.*s, DEREF to node %u got %d\n",
+ dlm->name, namelen, lockname, node, r);
+ dlm_print_one_lock_resource(res);
+ }
+}
+
static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
{
struct dlm_ctxt *dlm;
@@ -2395,6 +2513,8 @@ static void dlm_deref_lockres_worker(struct dlm_work_item *item, void *data)
}
spin_unlock(&res->spinlock);
+ dlm_drop_lockres_ref_done(dlm, res, node);
+
if (cleared) {
mlog(0, "%s:%.*s node %u ref dropped in dispatch\n",
dlm->name, res->lockname.len, res->lockname.name, node);
@@ -2432,7 +2552,8 @@ static int dlm_is_lockres_migrateable(struct dlm_ctxt *dlm,
return 0;
/* delay migration when the lockres is in RECOCERING state */
- if (res->state & DLM_LOCK_RES_RECOVERING)
+ if (res->state & (DLM_LOCK_RES_RECOVERING|
+ DLM_LOCK_RES_RECOVERY_WAITING))
return 0;
if (res->owner != dlm->node_num)
diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c
index 23d0ab881..f6b313898 100644
--- a/fs/ocfs2/dlm/dlmrecovery.c
+++ b/fs/ocfs2/dlm/dlmrecovery.c
@@ -1403,12 +1403,24 @@ int dlm_mig_lockres_handler(struct o2net_msg *msg, u32 len, void *data,
* and RECOVERY flag changed when it completes. */
hash = dlm_lockid_hash(mres->lockname, mres->lockname_len);
spin_lock(&dlm->spinlock);
- res = __dlm_lookup_lockres(dlm, mres->lockname, mres->lockname_len,
+ res = __dlm_lookup_lockres_full(dlm, mres->lockname, mres->lockname_len,
hash);
if (res) {
/* this will get a ref on res */
/* mark it as recovering/migrating and hash it */
spin_lock(&res->spinlock);
+ if (res->state & DLM_LOCK_RES_DROPPING_REF) {
+ mlog(0, "%s: node is attempting to migrate "
+ "lockres %.*s, but marked as dropping "
+ " ref!\n", dlm->name,
+ mres->lockname_len, mres->lockname);
+ ret = -EINVAL;
+ spin_unlock(&res->spinlock);
+ spin_unlock(&dlm->spinlock);
+ dlm_lockres_put(res);
+ goto leave;
+ }
+
if (mres->flags & DLM_MRES_RECOVERY) {
res->state |= DLM_LOCK_RES_RECOVERING;
} else {
@@ -2162,6 +2174,13 @@ static void dlm_finish_local_lockres_recovery(struct dlm_ctxt *dlm,
for (i = 0; i < DLM_HASH_BUCKETS; i++) {
bucket = dlm_lockres_hash(dlm, i);
hlist_for_each_entry(res, bucket, hash_node) {
+ if (res->state & DLM_LOCK_RES_RECOVERY_WAITING) {
+ spin_lock(&res->spinlock);
+ res->state &= ~DLM_LOCK_RES_RECOVERY_WAITING;
+ spin_unlock(&res->spinlock);
+ wake_up(&res->wq);
+ }
+
if (!(res->state & DLM_LOCK_RES_RECOVERING))
continue;
@@ -2299,6 +2318,7 @@ static void dlm_free_dead_locks(struct dlm_ctxt *dlm,
res->lockname.len, res->lockname.name, freed, dead_node);
__dlm_print_one_lock_resource(res);
}
+ res->state |= DLM_LOCK_RES_RECOVERY_WAITING;
dlm_lockres_clear_refmap_bit(dlm, res, dead_node);
} else if (test_bit(dead_node, res->refmap)) {
mlog(0, "%s:%.*s: dead node %u had a ref, but had "
@@ -2376,14 +2396,16 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node)
dlm_revalidate_lvb(dlm, res, dead_node);
if (res->owner == dead_node) {
if (res->state & DLM_LOCK_RES_DROPPING_REF) {
- mlog(ML_NOTICE, "%s: res %.*s, Skip "
- "recovery as it is being freed\n",
- dlm->name, res->lockname.len,
- res->lockname.name);
- } else
- dlm_move_lockres_to_recovery_list(dlm,
- res);
-
+ mlog(0, "%s:%.*s: owned by "
+ "dead node %u, this node was "
+ "dropping its ref when it died. "
+ "continue, dropping the flag.\n",
+ dlm->name, res->lockname.len,
+ res->lockname.name, dead_node);
+ }
+ res->state &= ~DLM_LOCK_RES_DROPPING_REF;
+ dlm_move_lockres_to_recovery_list(dlm,
+ res);
} else if (res->owner == dlm->node_num) {
dlm_free_dead_locks(dlm, res, dead_node);
__dlm_lockres_calc_usage(dlm, res);
diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c
index c5f6c241e..68d239ba0 100644
--- a/fs/ocfs2/dlm/dlmthread.c
+++ b/fs/ocfs2/dlm/dlmthread.c
@@ -106,7 +106,8 @@ int __dlm_lockres_unused(struct dlm_lock_resource *res)
if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY)
return 0;
- if (res->state & DLM_LOCK_RES_RECOVERING)
+ if (res->state & (DLM_LOCK_RES_RECOVERING|
+ DLM_LOCK_RES_RECOVERY_WAITING))
return 0;
/* Another node has this resource with this node as the master */
@@ -202,6 +203,13 @@ static void dlm_purge_lockres(struct dlm_ctxt *dlm,
dlm->purge_count--;
}
+ if (!master && ret != 0) {
+ mlog(0, "%s: deref %.*s in progress or master goes down\n",
+ dlm->name, res->lockname.len, res->lockname.name);
+ spin_unlock(&res->spinlock);
+ return;
+ }
+
if (!__dlm_lockres_unused(res)) {
mlog(ML_ERROR, "%s: res %.*s in use after deref\n",
dlm->name, res->lockname.len, res->lockname.name);
@@ -700,7 +708,8 @@ static int dlm_thread(void *data)
* dirty for a short while. */
BUG_ON(res->state & DLM_LOCK_RES_MIGRATING);
if (res->state & (DLM_LOCK_RES_IN_PROGRESS |
- DLM_LOCK_RES_RECOVERING)) {
+ DLM_LOCK_RES_RECOVERING |
+ DLM_LOCK_RES_RECOVERY_WAITING)) {
/* move it to the tail and keep going */
res->state &= ~DLM_LOCK_RES_DIRTY;
spin_unlock(&res->spinlock);
diff --git a/fs/ocfs2/dlmfs/dlmfs.c b/fs/ocfs2/dlmfs/dlmfs.c
index 03768bb3a..47b3b2d4e 100644
--- a/fs/ocfs2/dlmfs/dlmfs.c
+++ b/fs/ocfs2/dlmfs/dlmfs.c
@@ -571,8 +571,8 @@ static int dlmfs_fill_super(struct super_block * sb,
int silent)
{
sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = DLMFS_MAGIC;
sb->s_op = &dlmfs_ops;
sb->s_root = d_make_root(dlmfs_get_root_inode(sb));
diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c
index 7cb38fdca..59cce53c9 100644
--- a/fs/ocfs2/file.c
+++ b/fs/ocfs2/file.c
@@ -770,14 +770,14 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
{
struct address_space *mapping = inode->i_mapping;
struct page *page;
- unsigned long index = abs_from >> PAGE_CACHE_SHIFT;
+ unsigned long index = abs_from >> PAGE_SHIFT;
handle_t *handle;
int ret = 0;
unsigned zero_from, zero_to, block_start, block_end;
struct ocfs2_dinode *di = (struct ocfs2_dinode *)di_bh->b_data;
BUG_ON(abs_from >= abs_to);
- BUG_ON(abs_to > (((u64)index + 1) << PAGE_CACHE_SHIFT));
+ BUG_ON(abs_to > (((u64)index + 1) << PAGE_SHIFT));
BUG_ON(abs_from & (inode->i_blkbits - 1));
handle = ocfs2_zero_start_ordered_transaction(inode, di_bh);
@@ -794,10 +794,10 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
}
/* Get the offsets within the page that we want to zero */
- zero_from = abs_from & (PAGE_CACHE_SIZE - 1);
- zero_to = abs_to & (PAGE_CACHE_SIZE - 1);
+ zero_from = abs_from & (PAGE_SIZE - 1);
+ zero_to = abs_to & (PAGE_SIZE - 1);
if (!zero_to)
- zero_to = PAGE_CACHE_SIZE;
+ zero_to = PAGE_SIZE;
trace_ocfs2_write_zero_page(
(unsigned long long)OCFS2_I(inode)->ip_blkno,
@@ -851,7 +851,7 @@ static int ocfs2_write_zero_page(struct inode *inode, u64 abs_from,
out_unlock:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
out_commit_trans:
if (handle)
ocfs2_commit_trans(OCFS2_SB(inode->i_sb), handle);
@@ -959,7 +959,7 @@ static int ocfs2_zero_extend_range(struct inode *inode, u64 range_start,
BUG_ON(range_start >= range_end);
while (zero_pos < range_end) {
- next_pos = (zero_pos & PAGE_CACHE_MASK) + PAGE_CACHE_SIZE;
+ next_pos = (zero_pos & PAGE_MASK) + PAGE_SIZE;
if (next_pos > range_end)
next_pos = range_end;
rc = ocfs2_write_zero_page(inode, zero_pos, next_pos, di_bh);
@@ -1268,20 +1268,20 @@ bail_unlock_rw:
if (size_change)
ocfs2_rw_unlock(inode, 1);
bail:
- brelse(bh);
/* Release quota pointers in case we acquired them */
for (qtype = 0; qtype < OCFS2_MAXQUOTAS; qtype++)
dqput(transfer_to[qtype]);
if (!status && attr->ia_valid & ATTR_MODE) {
- status = posix_acl_chmod(inode, inode->i_mode);
+ status = ocfs2_acl_chmod(inode, bh);
if (status < 0)
mlog_errno(status);
}
if (inode_locked)
ocfs2_inode_unlock(inode, 1);
+ brelse(bh);
return status;
}
@@ -1381,44 +1381,6 @@ out:
return ret;
}
-/*
- * Will look for holes and unwritten extents in the range starting at
- * pos for count bytes (inclusive).
- */
-static int ocfs2_check_range_for_holes(struct inode *inode, loff_t pos,
- size_t count)
-{
- int ret = 0;
- unsigned int extent_flags;
- u32 cpos, clusters, extent_len, phys_cpos;
- struct super_block *sb = inode->i_sb;
-
- cpos = pos >> OCFS2_SB(sb)->s_clustersize_bits;
- clusters = ocfs2_clusters_for_bytes(sb, pos + count) - cpos;
-
- while (clusters) {
- ret = ocfs2_get_clusters(inode, cpos, &phys_cpos, &extent_len,
- &extent_flags);
- if (ret < 0) {
- mlog_errno(ret);
- goto out;
- }
-
- if (phys_cpos == 0 || (extent_flags & OCFS2_EXT_UNWRITTEN)) {
- ret = 1;
- break;
- }
-
- if (extent_len > clusters)
- extent_len = clusters;
-
- clusters -= extent_len;
- cpos += extent_len;
- }
-out:
- return ret;
-}
-
static int ocfs2_write_remove_suid(struct inode *inode)
{
int ret;
@@ -2129,18 +2091,12 @@ out:
static int ocfs2_prepare_inode_for_write(struct file *file,
loff_t pos,
- size_t count,
- int appending,
- int *direct_io,
- int *has_refcount)
+ size_t count)
{
int ret = 0, meta_level = 0;
struct dentry *dentry = file->f_path.dentry;
struct inode *inode = d_inode(dentry);
loff_t end;
- struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
- int full_coherency = !(osb->s_mount_opt &
- OCFS2_MOUNT_COHERENCY_BUFFERED);
/*
* We start with a read level meta lock and only jump to an ex
@@ -2189,10 +2145,6 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
pos,
count,
&meta_level);
- if (has_refcount)
- *has_refcount = 1;
- if (direct_io)
- *direct_io = 0;
}
if (ret < 0) {
@@ -2200,67 +2152,12 @@ static int ocfs2_prepare_inode_for_write(struct file *file,
goto out_unlock;
}
- /*
- * Skip the O_DIRECT checks if we don't need
- * them.
- */
- if (!direct_io || !(*direct_io))
- break;
-
- /*
- * There's no sane way to do direct writes to an inode
- * with inline data.
- */
- if (OCFS2_I(inode)->ip_dyn_features & OCFS2_INLINE_DATA_FL) {
- *direct_io = 0;
- break;
- }
-
- /*
- * Allowing concurrent direct writes means
- * i_size changes wouldn't be synchronized, so
- * one node could wind up truncating another
- * nodes writes.
- */
- if (end > i_size_read(inode) && !full_coherency) {
- *direct_io = 0;
- break;
- }
-
- /*
- * Fallback to old way if the feature bit is not set.
- */
- if (end > i_size_read(inode) &&
- !ocfs2_supports_append_dio(osb)) {
- *direct_io = 0;
- break;
- }
-
- /*
- * We don't fill holes during direct io, so
- * check for them here. If any are found, the
- * caller will have to retake some cluster
- * locks and initiate the io as buffered.
- */
- ret = ocfs2_check_range_for_holes(inode, pos, count);
- if (ret == 1) {
- /*
- * Fallback to old way if the feature bit is not set.
- * Otherwise try dio first and then complete the rest
- * request through buffer io.
- */
- if (!ocfs2_supports_append_dio(osb))
- *direct_io = 0;
- ret = 0;
- } else if (ret < 0)
- mlog_errno(ret);
break;
}
out_unlock:
trace_ocfs2_prepare_inode_for_write(OCFS2_I(inode)->ip_blkno,
- pos, appending, count,
- direct_io, has_refcount);
+ pos, count);
if (meta_level >= 0)
ocfs2_inode_unlock(inode, meta_level);
@@ -2272,18 +2169,16 @@ out:
static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
struct iov_iter *from)
{
- int direct_io, appending, rw_level;
- int can_do_direct, has_refcount = 0;
+ int direct_io, rw_level;
ssize_t written = 0;
ssize_t ret;
- size_t count = iov_iter_count(from), orig_count;
+ size_t count = iov_iter_count(from);
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct ocfs2_super *osb = OCFS2_SB(inode->i_sb);
int full_coherency = !(osb->s_mount_opt &
OCFS2_MOUNT_COHERENCY_BUFFERED);
- int unaligned_dio = 0;
- int dropped_dio = 0;
+ void *saved_ki_complete = NULL;
int append_write = ((iocb->ki_pos + count) >=
i_size_read(inode) ? 1 : 0);
@@ -2296,12 +2191,10 @@ static ssize_t ocfs2_file_write_iter(struct kiocb *iocb,
if (count == 0)
return 0;
- appending = iocb->ki_flags & IOCB_APPEND ? 1 : 0;
direct_io = iocb->ki_flags & IOCB_DIRECT ? 1 : 0;
inode_lock(inode);
-relock:
/*
* Concurrent O_DIRECT writes are allowed with
* mount_option "coherency=buffered".
@@ -2334,7 +2227,6 @@ relock:
ocfs2_inode_unlock(inode, 1);
}
- orig_count = iov_iter_count(from);
ret = generic_write_checks(iocb, from);
if (ret <= 0) {
if (ret)
@@ -2343,41 +2235,18 @@ relock:
}
count = ret;
- can_do_direct = direct_io;
- ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count, appending,
- &can_do_direct, &has_refcount);
+ ret = ocfs2_prepare_inode_for_write(file, iocb->ki_pos, count);
if (ret < 0) {
mlog_errno(ret);
goto out;
}
- if (direct_io && !is_sync_kiocb(iocb))
- unaligned_dio = ocfs2_is_io_unaligned(inode, count, iocb->ki_pos);
-
- /*
- * We can't complete the direct I/O as requested, fall back to
- * buffered I/O.
- */
- if (direct_io && !can_do_direct) {
- ocfs2_rw_unlock(inode, rw_level);
-
- rw_level = -1;
-
- direct_io = 0;
- iocb->ki_flags &= ~IOCB_DIRECT;
- iov_iter_reexpand(from, orig_count);
- dropped_dio = 1;
- goto relock;
- }
-
- if (unaligned_dio) {
+ if (direct_io && !is_sync_kiocb(iocb) &&
+ ocfs2_is_io_unaligned(inode, count, iocb->ki_pos)) {
/*
- * Wait on previous unaligned aio to complete before
- * proceeding.
+ * Make it a sync io if it's an unaligned aio.
*/
- mutex_lock(&OCFS2_I(inode)->ip_unaligned_aio);
- /* Mark the iocb as needing an unlock in ocfs2_dio_end_io */
- ocfs2_iocb_set_unaligned_aio(iocb);
+ saved_ki_complete = xchg(&iocb->ki_complete, NULL);
}
/* communicate with ocfs2_dio_end_io */
@@ -2398,14 +2267,13 @@ relock:
*/
if ((written == -EIOCBQUEUED) || (!ocfs2_iocb_is_rw_locked(iocb))) {
rw_level = -1;
- unaligned_dio = 0;
}
if (unlikely(written <= 0))
- goto no_sync;
+ goto out;
if (((file->f_flags & O_DSYNC) && !direct_io) ||
- IS_SYNC(inode) || dropped_dio) {
+ IS_SYNC(inode)) {
ret = filemap_fdatawrite_range(file->f_mapping,
iocb->ki_pos - written,
iocb->ki_pos - 1);
@@ -2424,13 +2292,10 @@ relock:
iocb->ki_pos - 1);
}
-no_sync:
- if (unaligned_dio && ocfs2_iocb_is_unaligned_aio(iocb)) {
- ocfs2_iocb_clear_unaligned_aio(iocb);
- mutex_unlock(&OCFS2_I(inode)->ip_unaligned_aio);
- }
-
out:
+ if (saved_ki_complete)
+ xchg(&iocb->ki_complete, saved_ki_complete);
+
if (rw_level != -1)
ocfs2_rw_unlock(inode, rw_level);
diff --git a/fs/ocfs2/filecheck.c b/fs/ocfs2/filecheck.c
new file mode 100644
index 000000000..2cabbcf2f
--- /dev/null
+++ b/fs/ocfs2/filecheck.c
@@ -0,0 +1,606 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * filecheck.c
+ *
+ * Code which implements online file check.
+ *
+ * Copyright (C) 2016 SuSE. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+#include <linux/list.h>
+#include <linux/spinlock.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/kmod.h>
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/sysfs.h>
+#include <linux/sysctl.h>
+#include <cluster/masklog.h>
+
+#include "ocfs2.h"
+#include "ocfs2_fs.h"
+#include "stackglue.h"
+#include "inode.h"
+
+#include "filecheck.h"
+
+
+/* File check error strings,
+ * must correspond with error number in header file.
+ */
+static const char * const ocfs2_filecheck_errs[] = {
+ "SUCCESS",
+ "FAILED",
+ "INPROGRESS",
+ "READONLY",
+ "INJBD",
+ "INVALIDINO",
+ "BLOCKECC",
+ "BLOCKNO",
+ "VALIDFLAG",
+ "GENERATION",
+ "UNSUPPORTED"
+};
+
+static DEFINE_SPINLOCK(ocfs2_filecheck_sysfs_lock);
+static LIST_HEAD(ocfs2_filecheck_sysfs_list);
+
+struct ocfs2_filecheck {
+ struct list_head fc_head; /* File check entry list head */
+ spinlock_t fc_lock;
+ unsigned int fc_max; /* Maximum number of entry in list */
+ unsigned int fc_size; /* Current entry count in list */
+ unsigned int fc_done; /* Finished entry count in list */
+};
+
+struct ocfs2_filecheck_sysfs_entry { /* sysfs entry per mounting */
+ struct list_head fs_list;
+ atomic_t fs_count;
+ struct super_block *fs_sb;
+ struct kset *fs_devicekset;
+ struct kset *fs_fcheckkset;
+ struct ocfs2_filecheck *fs_fcheck;
+};
+
+#define OCFS2_FILECHECK_MAXSIZE 100
+#define OCFS2_FILECHECK_MINSIZE 10
+
+/* File check operation type */
+enum {
+ OCFS2_FILECHECK_TYPE_CHK = 0, /* Check a file(inode) */
+ OCFS2_FILECHECK_TYPE_FIX, /* Fix a file(inode) */
+ OCFS2_FILECHECK_TYPE_SET = 100 /* Set entry list maximum size */
+};
+
+struct ocfs2_filecheck_entry {
+ struct list_head fe_list;
+ unsigned long fe_ino;
+ unsigned int fe_type;
+ unsigned int fe_done:1;
+ unsigned int fe_status:31;
+};
+
+struct ocfs2_filecheck_args {
+ unsigned int fa_type;
+ union {
+ unsigned long fa_ino;
+ unsigned int fa_len;
+ };
+};
+
+static const char *
+ocfs2_filecheck_error(int errno)
+{
+ if (!errno)
+ return ocfs2_filecheck_errs[errno];
+
+ BUG_ON(errno < OCFS2_FILECHECK_ERR_START ||
+ errno > OCFS2_FILECHECK_ERR_END);
+ return ocfs2_filecheck_errs[errno - OCFS2_FILECHECK_ERR_START + 1];
+}
+
+static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf);
+static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count);
+static struct kobj_attribute ocfs2_attr_filecheck_chk =
+ __ATTR(check, S_IRUSR | S_IWUSR,
+ ocfs2_filecheck_show,
+ ocfs2_filecheck_store);
+static struct kobj_attribute ocfs2_attr_filecheck_fix =
+ __ATTR(fix, S_IRUSR | S_IWUSR,
+ ocfs2_filecheck_show,
+ ocfs2_filecheck_store);
+static struct kobj_attribute ocfs2_attr_filecheck_set =
+ __ATTR(set, S_IRUSR | S_IWUSR,
+ ocfs2_filecheck_show,
+ ocfs2_filecheck_store);
+
+static int ocfs2_filecheck_sysfs_wait(atomic_t *p)
+{
+ schedule();
+ return 0;
+}
+
+static void
+ocfs2_filecheck_sysfs_free(struct ocfs2_filecheck_sysfs_entry *entry)
+{
+ struct ocfs2_filecheck_entry *p;
+
+ if (!atomic_dec_and_test(&entry->fs_count))
+ wait_on_atomic_t(&entry->fs_count, ocfs2_filecheck_sysfs_wait,
+ TASK_UNINTERRUPTIBLE);
+
+ spin_lock(&entry->fs_fcheck->fc_lock);
+ while (!list_empty(&entry->fs_fcheck->fc_head)) {
+ p = list_first_entry(&entry->fs_fcheck->fc_head,
+ struct ocfs2_filecheck_entry, fe_list);
+ list_del(&p->fe_list);
+ BUG_ON(!p->fe_done); /* To free a undone file check entry */
+ kfree(p);
+ }
+ spin_unlock(&entry->fs_fcheck->fc_lock);
+
+ kset_unregister(entry->fs_fcheckkset);
+ kset_unregister(entry->fs_devicekset);
+ kfree(entry->fs_fcheck);
+ kfree(entry);
+}
+
+static void
+ocfs2_filecheck_sysfs_add(struct ocfs2_filecheck_sysfs_entry *entry)
+{
+ spin_lock(&ocfs2_filecheck_sysfs_lock);
+ list_add_tail(&entry->fs_list, &ocfs2_filecheck_sysfs_list);
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+}
+
+static int ocfs2_filecheck_sysfs_del(const char *devname)
+{
+ struct ocfs2_filecheck_sysfs_entry *p;
+
+ spin_lock(&ocfs2_filecheck_sysfs_lock);
+ list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
+ if (!strcmp(p->fs_sb->s_id, devname)) {
+ list_del(&p->fs_list);
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+ ocfs2_filecheck_sysfs_free(p);
+ return 0;
+ }
+ }
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+ return 1;
+}
+
+static void
+ocfs2_filecheck_sysfs_put(struct ocfs2_filecheck_sysfs_entry *entry)
+{
+ if (atomic_dec_and_test(&entry->fs_count))
+ wake_up_atomic_t(&entry->fs_count);
+}
+
+static struct ocfs2_filecheck_sysfs_entry *
+ocfs2_filecheck_sysfs_get(const char *devname)
+{
+ struct ocfs2_filecheck_sysfs_entry *p = NULL;
+
+ spin_lock(&ocfs2_filecheck_sysfs_lock);
+ list_for_each_entry(p, &ocfs2_filecheck_sysfs_list, fs_list) {
+ if (!strcmp(p->fs_sb->s_id, devname)) {
+ atomic_inc(&p->fs_count);
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+ return p;
+ }
+ }
+ spin_unlock(&ocfs2_filecheck_sysfs_lock);
+ return NULL;
+}
+
+int ocfs2_filecheck_create_sysfs(struct super_block *sb)
+{
+ int ret = 0;
+ struct kset *device_kset = NULL;
+ struct kset *fcheck_kset = NULL;
+ struct ocfs2_filecheck *fcheck = NULL;
+ struct ocfs2_filecheck_sysfs_entry *entry = NULL;
+ struct attribute **attrs = NULL;
+ struct attribute_group attrgp;
+
+ if (!ocfs2_kset)
+ return -ENOMEM;
+
+ attrs = kmalloc(sizeof(struct attribute *) * 4, GFP_NOFS);
+ if (!attrs) {
+ ret = -ENOMEM;
+ goto error;
+ } else {
+ attrs[0] = &ocfs2_attr_filecheck_chk.attr;
+ attrs[1] = &ocfs2_attr_filecheck_fix.attr;
+ attrs[2] = &ocfs2_attr_filecheck_set.attr;
+ attrs[3] = NULL;
+ memset(&attrgp, 0, sizeof(attrgp));
+ attrgp.attrs = attrs;
+ }
+
+ fcheck = kmalloc(sizeof(struct ocfs2_filecheck), GFP_NOFS);
+ if (!fcheck) {
+ ret = -ENOMEM;
+ goto error;
+ } else {
+ INIT_LIST_HEAD(&fcheck->fc_head);
+ spin_lock_init(&fcheck->fc_lock);
+ fcheck->fc_max = OCFS2_FILECHECK_MINSIZE;
+ fcheck->fc_size = 0;
+ fcheck->fc_done = 0;
+ }
+
+ if (strlen(sb->s_id) <= 0) {
+ mlog(ML_ERROR,
+ "Cannot get device basename when create filecheck sysfs\n");
+ ret = -ENODEV;
+ goto error;
+ }
+
+ device_kset = kset_create_and_add(sb->s_id, NULL, &ocfs2_kset->kobj);
+ if (!device_kset) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ fcheck_kset = kset_create_and_add("filecheck", NULL,
+ &device_kset->kobj);
+ if (!fcheck_kset) {
+ ret = -ENOMEM;
+ goto error;
+ }
+
+ ret = sysfs_create_group(&fcheck_kset->kobj, &attrgp);
+ if (ret)
+ goto error;
+
+ entry = kmalloc(sizeof(struct ocfs2_filecheck_sysfs_entry), GFP_NOFS);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto error;
+ } else {
+ atomic_set(&entry->fs_count, 1);
+ entry->fs_sb = sb;
+ entry->fs_devicekset = device_kset;
+ entry->fs_fcheckkset = fcheck_kset;
+ entry->fs_fcheck = fcheck;
+ ocfs2_filecheck_sysfs_add(entry);
+ }
+
+ kfree(attrs);
+ return 0;
+
+error:
+ kfree(attrs);
+ kfree(entry);
+ kfree(fcheck);
+ kset_unregister(fcheck_kset);
+ kset_unregister(device_kset);
+ return ret;
+}
+
+int ocfs2_filecheck_remove_sysfs(struct super_block *sb)
+{
+ return ocfs2_filecheck_sysfs_del(sb->s_id);
+}
+
+static int
+ocfs2_filecheck_erase_entries(struct ocfs2_filecheck_sysfs_entry *ent,
+ unsigned int count);
+static int
+ocfs2_filecheck_adjust_max(struct ocfs2_filecheck_sysfs_entry *ent,
+ unsigned int len)
+{
+ int ret;
+
+ if ((len < OCFS2_FILECHECK_MINSIZE) || (len > OCFS2_FILECHECK_MAXSIZE))
+ return -EINVAL;
+
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ if (len < (ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done)) {
+ mlog(ML_ERROR,
+ "Cannot set online file check maximum entry number "
+ "to %u due to too many pending entries(%u)\n",
+ len, ent->fs_fcheck->fc_size - ent->fs_fcheck->fc_done);
+ ret = -EBUSY;
+ } else {
+ if (len < ent->fs_fcheck->fc_size)
+ BUG_ON(!ocfs2_filecheck_erase_entries(ent,
+ ent->fs_fcheck->fc_size - len));
+
+ ent->fs_fcheck->fc_max = len;
+ ret = 0;
+ }
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+
+ return ret;
+}
+
+#define OCFS2_FILECHECK_ARGS_LEN 24
+static int
+ocfs2_filecheck_args_get_long(const char *buf, size_t count,
+ unsigned long *val)
+{
+ char buffer[OCFS2_FILECHECK_ARGS_LEN];
+
+ memcpy(buffer, buf, count);
+ buffer[count] = '\0';
+
+ if (kstrtoul(buffer, 0, val))
+ return 1;
+
+ return 0;
+}
+
+static int
+ocfs2_filecheck_type_parse(const char *name, unsigned int *type)
+{
+ if (!strncmp(name, "fix", 4))
+ *type = OCFS2_FILECHECK_TYPE_FIX;
+ else if (!strncmp(name, "check", 6))
+ *type = OCFS2_FILECHECK_TYPE_CHK;
+ else if (!strncmp(name, "set", 4))
+ *type = OCFS2_FILECHECK_TYPE_SET;
+ else
+ return 1;
+
+ return 0;
+}
+
+static int
+ocfs2_filecheck_args_parse(const char *name, const char *buf, size_t count,
+ struct ocfs2_filecheck_args *args)
+{
+ unsigned long val = 0;
+ unsigned int type;
+
+ /* too short/long args length */
+ if ((count < 1) || (count >= OCFS2_FILECHECK_ARGS_LEN))
+ return 1;
+
+ if (ocfs2_filecheck_type_parse(name, &type))
+ return 1;
+ if (ocfs2_filecheck_args_get_long(buf, count, &val))
+ return 1;
+
+ if (val <= 0)
+ return 1;
+
+ args->fa_type = type;
+ if (type == OCFS2_FILECHECK_TYPE_SET)
+ args->fa_len = (unsigned int)val;
+ else
+ args->fa_ino = val;
+
+ return 0;
+}
+
+static ssize_t ocfs2_filecheck_show(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ char *buf)
+{
+
+ ssize_t ret = 0, total = 0, remain = PAGE_SIZE;
+ unsigned int type;
+ struct ocfs2_filecheck_entry *p;
+ struct ocfs2_filecheck_sysfs_entry *ent;
+
+ if (ocfs2_filecheck_type_parse(attr->attr.name, &type))
+ return -EINVAL;
+
+ ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
+ if (!ent) {
+ mlog(ML_ERROR,
+ "Cannot get the corresponding entry via device basename %s\n",
+ kobj->name);
+ return -ENODEV;
+ }
+
+ if (type == OCFS2_FILECHECK_TYPE_SET) {
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ total = snprintf(buf, remain, "%u\n", ent->fs_fcheck->fc_max);
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+ goto exit;
+ }
+
+ ret = snprintf(buf, remain, "INO\t\tDONE\tERROR\n");
+ total += ret;
+ remain -= ret;
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) {
+ if (p->fe_type != type)
+ continue;
+
+ ret = snprintf(buf + total, remain, "%lu\t\t%u\t%s\n",
+ p->fe_ino, p->fe_done,
+ ocfs2_filecheck_error(p->fe_status));
+ if (ret < 0) {
+ total = ret;
+ break;
+ }
+ if (ret == remain) {
+ /* snprintf() didn't fit */
+ total = -E2BIG;
+ break;
+ }
+ total += ret;
+ remain -= ret;
+ }
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+
+exit:
+ ocfs2_filecheck_sysfs_put(ent);
+ return total;
+}
+
+static int
+ocfs2_filecheck_erase_entry(struct ocfs2_filecheck_sysfs_entry *ent)
+{
+ struct ocfs2_filecheck_entry *p;
+
+ list_for_each_entry(p, &ent->fs_fcheck->fc_head, fe_list) {
+ if (p->fe_done) {
+ list_del(&p->fe_list);
+ kfree(p);
+ ent->fs_fcheck->fc_size--;
+ ent->fs_fcheck->fc_done--;
+ return 1;
+ }
+ }
+
+ return 0;
+}
+
+static int
+ocfs2_filecheck_erase_entries(struct ocfs2_filecheck_sysfs_entry *ent,
+ unsigned int count)
+{
+ unsigned int i = 0;
+ unsigned int ret = 0;
+
+ while (i++ < count) {
+ if (ocfs2_filecheck_erase_entry(ent))
+ ret++;
+ else
+ break;
+ }
+
+ return (ret == count ? 1 : 0);
+}
+
+static void
+ocfs2_filecheck_done_entry(struct ocfs2_filecheck_sysfs_entry *ent,
+ struct ocfs2_filecheck_entry *entry)
+{
+ entry->fe_done = 1;
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ ent->fs_fcheck->fc_done++;
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+}
+
+static unsigned int
+ocfs2_filecheck_handle(struct super_block *sb,
+ unsigned long ino, unsigned int flags)
+{
+ unsigned int ret = OCFS2_FILECHECK_ERR_SUCCESS;
+ struct inode *inode = NULL;
+ int rc;
+
+ inode = ocfs2_iget(OCFS2_SB(sb), ino, flags, 0);
+ if (IS_ERR(inode)) {
+ rc = (int)(-(long)inode);
+ if (rc >= OCFS2_FILECHECK_ERR_START &&
+ rc < OCFS2_FILECHECK_ERR_END)
+ ret = rc;
+ else
+ ret = OCFS2_FILECHECK_ERR_FAILED;
+ } else
+ iput(inode);
+
+ return ret;
+}
+
+static void
+ocfs2_filecheck_handle_entry(struct ocfs2_filecheck_sysfs_entry *ent,
+ struct ocfs2_filecheck_entry *entry)
+{
+ if (entry->fe_type == OCFS2_FILECHECK_TYPE_CHK)
+ entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+ entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_CHK);
+ else if (entry->fe_type == OCFS2_FILECHECK_TYPE_FIX)
+ entry->fe_status = ocfs2_filecheck_handle(ent->fs_sb,
+ entry->fe_ino, OCFS2_FI_FLAG_FILECHECK_FIX);
+ else
+ entry->fe_status = OCFS2_FILECHECK_ERR_UNSUPPORTED;
+
+ ocfs2_filecheck_done_entry(ent, entry);
+}
+
+static ssize_t ocfs2_filecheck_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count)
+{
+ struct ocfs2_filecheck_args args;
+ struct ocfs2_filecheck_entry *entry;
+ struct ocfs2_filecheck_sysfs_entry *ent;
+ ssize_t ret = 0;
+
+ if (count == 0)
+ return count;
+
+ if (ocfs2_filecheck_args_parse(attr->attr.name, buf, count, &args)) {
+ mlog(ML_ERROR, "Invalid arguments for online file check\n");
+ return -EINVAL;
+ }
+
+ ent = ocfs2_filecheck_sysfs_get(kobj->parent->name);
+ if (!ent) {
+ mlog(ML_ERROR,
+ "Cannot get the corresponding entry via device basename %s\n",
+ kobj->parent->name);
+ return -ENODEV;
+ }
+
+ if (args.fa_type == OCFS2_FILECHECK_TYPE_SET) {
+ ret = ocfs2_filecheck_adjust_max(ent, args.fa_len);
+ goto exit;
+ }
+
+ entry = kmalloc(sizeof(struct ocfs2_filecheck_entry), GFP_NOFS);
+ if (!entry) {
+ ret = -ENOMEM;
+ goto exit;
+ }
+
+ spin_lock(&ent->fs_fcheck->fc_lock);
+ if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+ (ent->fs_fcheck->fc_done == 0)) {
+ mlog(ML_ERROR,
+ "Cannot do more file check "
+ "since file check queue(%u) is full now\n",
+ ent->fs_fcheck->fc_max);
+ ret = -EBUSY;
+ kfree(entry);
+ } else {
+ if ((ent->fs_fcheck->fc_size >= ent->fs_fcheck->fc_max) &&
+ (ent->fs_fcheck->fc_done > 0)) {
+ /* Delete the oldest entry which was done,
+ * make sure the entry size in list does
+ * not exceed maximum value
+ */
+ BUG_ON(!ocfs2_filecheck_erase_entry(ent));
+ }
+
+ entry->fe_ino = args.fa_ino;
+ entry->fe_type = args.fa_type;
+ entry->fe_done = 0;
+ entry->fe_status = OCFS2_FILECHECK_ERR_INPROGRESS;
+ list_add_tail(&entry->fe_list, &ent->fs_fcheck->fc_head);
+ ent->fs_fcheck->fc_size++;
+ }
+ spin_unlock(&ent->fs_fcheck->fc_lock);
+
+ if (!ret)
+ ocfs2_filecheck_handle_entry(ent, entry);
+
+exit:
+ ocfs2_filecheck_sysfs_put(ent);
+ return (!ret ? count : ret);
+}
diff --git a/fs/ocfs2/filecheck.h b/fs/ocfs2/filecheck.h
new file mode 100644
index 000000000..e5cd002a2
--- /dev/null
+++ b/fs/ocfs2/filecheck.h
@@ -0,0 +1,49 @@
+/* -*- mode: c; c-basic-offset: 8; -*-
+ * vim: noexpandtab sw=8 ts=8 sts=0:
+ *
+ * filecheck.h
+ *
+ * Online file check.
+ *
+ * Copyright (C) 2016 SuSE. All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License as published by the Free Software Foundation, version 2.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ */
+
+
+#ifndef FILECHECK_H
+#define FILECHECK_H
+
+#include <linux/types.h>
+#include <linux/list.h>
+
+
+/* File check errno */
+enum {
+ OCFS2_FILECHECK_ERR_SUCCESS = 0, /* Success */
+ OCFS2_FILECHECK_ERR_FAILED = 1000, /* Other failure */
+ OCFS2_FILECHECK_ERR_INPROGRESS, /* In progress */
+ OCFS2_FILECHECK_ERR_READONLY, /* Read only */
+ OCFS2_FILECHECK_ERR_INJBD, /* Buffer in jbd */
+ OCFS2_FILECHECK_ERR_INVALIDINO, /* Invalid ino */
+ OCFS2_FILECHECK_ERR_BLOCKECC, /* Block ecc */
+ OCFS2_FILECHECK_ERR_BLOCKNO, /* Block number */
+ OCFS2_FILECHECK_ERR_VALIDFLAG, /* Inode valid flag */
+ OCFS2_FILECHECK_ERR_GENERATION, /* Inode generation */
+ OCFS2_FILECHECK_ERR_UNSUPPORTED /* Unsupported */
+};
+
+#define OCFS2_FILECHECK_ERR_START OCFS2_FILECHECK_ERR_FAILED
+#define OCFS2_FILECHECK_ERR_END OCFS2_FILECHECK_ERR_UNSUPPORTED
+
+int ocfs2_filecheck_create_sysfs(struct super_block *sb);
+int ocfs2_filecheck_remove_sysfs(struct super_block *sb);
+
+#endif /* FILECHECK_H */
diff --git a/fs/ocfs2/inode.c b/fs/ocfs2/inode.c
index 36294446d..12f4a9e98 100644
--- a/fs/ocfs2/inode.c
+++ b/fs/ocfs2/inode.c
@@ -53,6 +53,7 @@
#include "xattr.h"
#include "refcounttree.h"
#include "ocfs2_trace.h"
+#include "filecheck.h"
#include "buffer_head_io.h"
@@ -74,6 +75,14 @@ static int ocfs2_truncate_for_delete(struct ocfs2_super *osb,
struct inode *inode,
struct buffer_head *fe_bh);
+static int ocfs2_filecheck_read_inode_block_full(struct inode *inode,
+ struct buffer_head **bh,
+ int flags, int type);
+static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
+ struct buffer_head *bh);
+static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
+ struct buffer_head *bh);
+
void ocfs2_set_inode_flags(struct inode *inode)
{
unsigned int flags = OCFS2_I(inode)->ip_attr;
@@ -127,6 +136,7 @@ struct inode *ocfs2_ilookup(struct super_block *sb, u64 blkno)
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
int sysfile_type)
{
+ int rc = 0;
struct inode *inode = NULL;
struct super_block *sb = osb->sb;
struct ocfs2_find_inode_args args;
@@ -161,12 +171,17 @@ struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 blkno, unsigned flags,
}
trace_ocfs2_iget5_locked(inode->i_state);
if (inode->i_state & I_NEW) {
- ocfs2_read_locked_inode(inode, &args);
+ rc = ocfs2_read_locked_inode(inode, &args);
unlock_new_inode(inode);
}
if (is_bad_inode(inode)) {
iput(inode);
- inode = ERR_PTR(-ESTALE);
+ if ((flags & OCFS2_FI_FLAG_FILECHECK_CHK) ||
+ (flags & OCFS2_FI_FLAG_FILECHECK_FIX))
+ /* Return OCFS2_FILECHECK_ERR_XXX related errno */
+ inode = ERR_PTR(rc);
+ else
+ inode = ERR_PTR(-ESTALE);
goto bail;
}
@@ -410,7 +425,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
struct ocfs2_super *osb;
struct ocfs2_dinode *fe;
struct buffer_head *bh = NULL;
- int status, can_lock;
+ int status, can_lock, lock_level = 0;
u32 generation = 0;
status = -EINVAL;
@@ -478,7 +493,7 @@ static int ocfs2_read_locked_inode(struct inode *inode,
mlog_errno(status);
return status;
}
- status = ocfs2_inode_lock(inode, NULL, 0);
+ status = ocfs2_inode_lock(inode, NULL, lock_level);
if (status) {
make_bad_inode(inode);
mlog_errno(status);
@@ -495,16 +510,32 @@ static int ocfs2_read_locked_inode(struct inode *inode,
}
if (can_lock) {
- status = ocfs2_read_inode_block_full(inode, &bh,
- OCFS2_BH_IGNORE_CACHE);
+ if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK)
+ status = ocfs2_filecheck_read_inode_block_full(inode,
+ &bh, OCFS2_BH_IGNORE_CACHE, 0);
+ else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX)
+ status = ocfs2_filecheck_read_inode_block_full(inode,
+ &bh, OCFS2_BH_IGNORE_CACHE, 1);
+ else
+ status = ocfs2_read_inode_block_full(inode,
+ &bh, OCFS2_BH_IGNORE_CACHE);
} else {
status = ocfs2_read_blocks_sync(osb, args->fi_blkno, 1, &bh);
/*
* If buffer is in jbd, then its checksum may not have been
* computed as yet.
*/
- if (!status && !buffer_jbd(bh))
- status = ocfs2_validate_inode_block(osb->sb, bh);
+ if (!status && !buffer_jbd(bh)) {
+ if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_CHK)
+ status = ocfs2_filecheck_validate_inode_block(
+ osb->sb, bh);
+ else if (args->fi_flags & OCFS2_FI_FLAG_FILECHECK_FIX)
+ status = ocfs2_filecheck_repair_inode_block(
+ osb->sb, bh);
+ else
+ status = ocfs2_validate_inode_block(
+ osb->sb, bh);
+ }
}
if (status < 0) {
mlog_errno(status);
@@ -532,11 +563,24 @@ static int ocfs2_read_locked_inode(struct inode *inode,
BUG_ON(args->fi_blkno != le64_to_cpu(fe->i_blkno));
+ if (buffer_dirty(bh) && !buffer_jbd(bh)) {
+ if (can_lock) {
+ ocfs2_inode_unlock(inode, lock_level);
+ lock_level = 1;
+ ocfs2_inode_lock(inode, NULL, lock_level);
+ }
+ status = ocfs2_write_block(osb, bh, INODE_CACHE(inode));
+ if (status < 0) {
+ mlog_errno(status);
+ goto bail;
+ }
+ }
+
status = 0;
bail:
if (can_lock)
- ocfs2_inode_unlock(inode, 0);
+ ocfs2_inode_unlock(inode, lock_level);
if (status < 0)
make_bad_inode(inode);
@@ -1126,6 +1170,9 @@ static void ocfs2_clear_inode(struct inode *inode)
mlog_bug_on_msg(!list_empty(&oi->ip_io_markers),
"Clear inode of %llu, inode has io markers\n",
(unsigned long long)oi->ip_blkno);
+ mlog_bug_on_msg(!list_empty(&oi->ip_unwritten_list),
+ "Clear inode of %llu, inode has unwritten extents\n",
+ (unsigned long long)oi->ip_blkno);
ocfs2_extent_map_trunc(inode, 0);
@@ -1397,6 +1444,169 @@ bail:
return rc;
}
+static int ocfs2_filecheck_validate_inode_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int rc = 0;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+
+ trace_ocfs2_filecheck_validate_inode_block(
+ (unsigned long long)bh->b_blocknr);
+
+ BUG_ON(!buffer_uptodate(bh));
+
+ /*
+ * Call ocfs2_validate_meta_ecc() first since it has ecc repair
+ * function, but we should not return error immediately when ecc
+ * validation fails, because the reason is quite likely the invalid
+ * inode number inputed.
+ */
+ rc = ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check);
+ if (rc) {
+ mlog(ML_ERROR,
+ "Filecheck: checksum failed for dinode %llu\n",
+ (unsigned long long)bh->b_blocknr);
+ rc = -OCFS2_FILECHECK_ERR_BLOCKECC;
+ }
+
+ if (!OCFS2_IS_VALID_DINODE(di)) {
+ mlog(ML_ERROR,
+ "Filecheck: invalid dinode #%llu: signature = %.*s\n",
+ (unsigned long long)bh->b_blocknr, 7, di->i_signature);
+ rc = -OCFS2_FILECHECK_ERR_INVALIDINO;
+ goto bail;
+ } else if (rc)
+ goto bail;
+
+ if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
+ mlog(ML_ERROR,
+ "Filecheck: invalid dinode #%llu: i_blkno is %llu\n",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ rc = -OCFS2_FILECHECK_ERR_BLOCKNO;
+ goto bail;
+ }
+
+ if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+ mlog(ML_ERROR,
+ "Filecheck: invalid dinode #%llu: OCFS2_VALID_FL "
+ "not set\n",
+ (unsigned long long)bh->b_blocknr);
+ rc = -OCFS2_FILECHECK_ERR_VALIDFLAG;
+ goto bail;
+ }
+
+ if (le32_to_cpu(di->i_fs_generation) !=
+ OCFS2_SB(sb)->fs_generation) {
+ mlog(ML_ERROR,
+ "Filecheck: invalid dinode #%llu: fs_generation is %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(di->i_fs_generation));
+ rc = -OCFS2_FILECHECK_ERR_GENERATION;
+ goto bail;
+ }
+
+bail:
+ return rc;
+}
+
+static int ocfs2_filecheck_repair_inode_block(struct super_block *sb,
+ struct buffer_head *bh)
+{
+ int changed = 0;
+ struct ocfs2_dinode *di = (struct ocfs2_dinode *)bh->b_data;
+
+ if (!ocfs2_filecheck_validate_inode_block(sb, bh))
+ return 0;
+
+ trace_ocfs2_filecheck_repair_inode_block(
+ (unsigned long long)bh->b_blocknr);
+
+ if (ocfs2_is_hard_readonly(OCFS2_SB(sb)) ||
+ ocfs2_is_soft_readonly(OCFS2_SB(sb))) {
+ mlog(ML_ERROR,
+ "Filecheck: cannot repair dinode #%llu "
+ "on readonly filesystem\n",
+ (unsigned long long)bh->b_blocknr);
+ return -OCFS2_FILECHECK_ERR_READONLY;
+ }
+
+ if (buffer_jbd(bh)) {
+ mlog(ML_ERROR,
+ "Filecheck: cannot repair dinode #%llu, "
+ "its buffer is in jbd\n",
+ (unsigned long long)bh->b_blocknr);
+ return -OCFS2_FILECHECK_ERR_INJBD;
+ }
+
+ if (!OCFS2_IS_VALID_DINODE(di)) {
+ /* Cannot fix invalid inode block */
+ return -OCFS2_FILECHECK_ERR_INVALIDINO;
+ }
+
+ if (!(di->i_flags & cpu_to_le32(OCFS2_VALID_FL))) {
+ /* Cannot just add VALID_FL flag back as a fix,
+ * need more things to check here.
+ */
+ return -OCFS2_FILECHECK_ERR_VALIDFLAG;
+ }
+
+ if (le64_to_cpu(di->i_blkno) != bh->b_blocknr) {
+ di->i_blkno = cpu_to_le64(bh->b_blocknr);
+ changed = 1;
+ mlog(ML_ERROR,
+ "Filecheck: reset dinode #%llu: i_blkno to %llu\n",
+ (unsigned long long)bh->b_blocknr,
+ (unsigned long long)le64_to_cpu(di->i_blkno));
+ }
+
+ if (le32_to_cpu(di->i_fs_generation) !=
+ OCFS2_SB(sb)->fs_generation) {
+ di->i_fs_generation = cpu_to_le32(OCFS2_SB(sb)->fs_generation);
+ changed = 1;
+ mlog(ML_ERROR,
+ "Filecheck: reset dinode #%llu: fs_generation to %u\n",
+ (unsigned long long)bh->b_blocknr,
+ le32_to_cpu(di->i_fs_generation));
+ }
+
+ if (changed || ocfs2_validate_meta_ecc(sb, bh->b_data, &di->i_check)) {
+ ocfs2_compute_meta_ecc(sb, bh->b_data, &di->i_check);
+ mark_buffer_dirty(bh);
+ mlog(ML_ERROR,
+ "Filecheck: reset dinode #%llu: compute meta ecc\n",
+ (unsigned long long)bh->b_blocknr);
+ }
+
+ return 0;
+}
+
+static int
+ocfs2_filecheck_read_inode_block_full(struct inode *inode,
+ struct buffer_head **bh,
+ int flags, int type)
+{
+ int rc;
+ struct buffer_head *tmp = *bh;
+
+ if (!type) /* Check inode block */
+ rc = ocfs2_read_blocks(INODE_CACHE(inode),
+ OCFS2_I(inode)->ip_blkno,
+ 1, &tmp, flags,
+ ocfs2_filecheck_validate_inode_block);
+ else /* Repair inode block */
+ rc = ocfs2_read_blocks(INODE_CACHE(inode),
+ OCFS2_I(inode)->ip_blkno,
+ 1, &tmp, flags,
+ ocfs2_filecheck_repair_inode_block);
+
+ /* If ocfs2_read_blocks() got us a new bh, pass it up. */
+ if (!rc && !*bh)
+ *bh = tmp;
+
+ return rc;
+}
+
int ocfs2_read_inode_block_full(struct inode *inode, struct buffer_head **bh,
int flags)
{
diff --git a/fs/ocfs2/inode.h b/fs/ocfs2/inode.h
index aac8b86f3..d8f3fc8d2 100644
--- a/fs/ocfs2/inode.h
+++ b/fs/ocfs2/inode.h
@@ -43,9 +43,6 @@ struct ocfs2_inode_info
/* protects extended attribute changes on this inode */
struct rw_semaphore ip_xattr_sem;
- /* Number of outstanding AIO's which are not page aligned */
- struct mutex ip_unaligned_aio;
-
/* These fields are protected by ip_lock */
spinlock_t ip_lock;
u32 ip_open_count;
@@ -57,6 +54,9 @@ struct ocfs2_inode_info
u32 ip_flags; /* see below */
u32 ip_attr; /* inode attributes */
+ /* Record unwritten extents during direct io. */
+ struct list_head ip_unwritten_list;
+
/* protected by recovery_lock. */
struct inode *ip_next_orphan;
@@ -139,6 +139,9 @@ int ocfs2_drop_inode(struct inode *inode);
/* Flags for ocfs2_iget() */
#define OCFS2_FI_FLAG_SYSFILE 0x1
#define OCFS2_FI_FLAG_ORPHAN_RECOVERY 0x2
+#define OCFS2_FI_FLAG_FILECHECK_CHK 0x4
+#define OCFS2_FI_FLAG_FILECHECK_FIX 0x8
+
struct inode *ocfs2_ilookup(struct super_block *sb, u64 feoff);
struct inode *ocfs2_iget(struct ocfs2_super *osb, u64 feoff, unsigned flags,
int sysfile_type);
diff --git a/fs/ocfs2/journal.c b/fs/ocfs2/journal.c
index 61b833b72..e607419cd 100644
--- a/fs/ocfs2/journal.c
+++ b/fs/ocfs2/journal.c
@@ -231,7 +231,7 @@ void ocfs2_recovery_exit(struct ocfs2_super *osb)
/* At this point, we know that no more recovery threads can be
* launched, so wait for any recovery completion work to
* complete. */
- flush_workqueue(ocfs2_wq);
+ flush_workqueue(osb->ocfs2_wq);
/*
* Now that recovery is shut down, and the osb is about to be
@@ -1326,7 +1326,7 @@ static void ocfs2_queue_recovery_completion(struct ocfs2_journal *journal,
spin_lock(&journal->j_lock);
list_add_tail(&item->lri_list, &journal->j_la_cleanups);
- queue_work(ocfs2_wq, &journal->j_recovery_work);
+ queue_work(journal->j_osb->ocfs2_wq, &journal->j_recovery_work);
spin_unlock(&journal->j_lock);
}
@@ -1968,7 +1968,7 @@ static void ocfs2_orphan_scan_work(struct work_struct *work)
mutex_lock(&os->os_lock);
ocfs2_queue_orphan_scan(osb);
if (atomic_read(&os->os_state) == ORPHAN_SCAN_ACTIVE)
- queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
+ queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
ocfs2_orphan_scan_timeout());
mutex_unlock(&os->os_lock);
}
@@ -2008,7 +2008,7 @@ void ocfs2_orphan_scan_start(struct ocfs2_super *osb)
atomic_set(&os->os_state, ORPHAN_SCAN_INACTIVE);
else {
atomic_set(&os->os_state, ORPHAN_SCAN_ACTIVE);
- queue_delayed_work(ocfs2_wq, &os->os_orphan_scan_work,
+ queue_delayed_work(osb->ocfs2_wq, &os->os_orphan_scan_work,
ocfs2_orphan_scan_timeout());
}
}
diff --git a/fs/ocfs2/localalloc.c b/fs/ocfs2/localalloc.c
index 7d62c43a2..fe0d1f957 100644
--- a/fs/ocfs2/localalloc.c
+++ b/fs/ocfs2/localalloc.c
@@ -386,7 +386,7 @@ void ocfs2_shutdown_local_alloc(struct ocfs2_super *osb)
struct ocfs2_dinode *alloc = NULL;
cancel_delayed_work(&osb->la_enable_wq);
- flush_workqueue(ocfs2_wq);
+ flush_workqueue(osb->ocfs2_wq);
if (osb->local_alloc_state == OCFS2_LA_UNUSED)
goto out;
@@ -1085,7 +1085,7 @@ static int ocfs2_recalc_la_window(struct ocfs2_super *osb,
} else {
osb->local_alloc_state = OCFS2_LA_DISABLED;
}
- queue_delayed_work(ocfs2_wq, &osb->la_enable_wq,
+ queue_delayed_work(osb->ocfs2_wq, &osb->la_enable_wq,
OCFS2_LA_ENABLE_INTERVAL);
goto out_unlock;
}
diff --git a/fs/ocfs2/mmap.c b/fs/ocfs2/mmap.c
index 77ebc2bc1..71545ad46 100644
--- a/fs/ocfs2/mmap.c
+++ b/fs/ocfs2/mmap.c
@@ -65,13 +65,13 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
struct inode *inode = file_inode(file);
struct address_space *mapping = inode->i_mapping;
loff_t pos = page_offset(page);
- unsigned int len = PAGE_CACHE_SIZE;
+ unsigned int len = PAGE_SIZE;
pgoff_t last_index;
struct page *locked_page = NULL;
void *fsdata;
loff_t size = i_size_read(inode);
- last_index = (size - 1) >> PAGE_CACHE_SHIFT;
+ last_index = (size - 1) >> PAGE_SHIFT;
/*
* There are cases that lead to the page no longer bebongs to the
@@ -102,10 +102,10 @@ static int __ocfs2_page_mkwrite(struct file *file, struct buffer_head *di_bh,
* because the "write" would invalidate their data.
*/
if (page->index == last_index)
- len = ((size - 1) & ~PAGE_CACHE_MASK) + 1;
+ len = ((size - 1) & ~PAGE_MASK) + 1;
- ret = ocfs2_write_begin_nolock(file, mapping, pos, len, 0, &locked_page,
- &fsdata, di_bh, page);
+ ret = ocfs2_write_begin_nolock(mapping, pos, len, OCFS2_WRITE_MMAP,
+ &locked_page, &fsdata, di_bh, page);
if (ret) {
if (ret != -ENOSPC)
mlog_errno(ret);
diff --git a/fs/ocfs2/namei.c b/fs/ocfs2/namei.c
index 6b3e87189..a8f1225e6 100644
--- a/fs/ocfs2/namei.c
+++ b/fs/ocfs2/namei.c
@@ -259,7 +259,6 @@ static int ocfs2_mknod(struct inode *dir,
struct ocfs2_dir_lookup_result lookup = { NULL, };
sigset_t oldset;
int did_block_signals = 0;
- struct posix_acl *default_acl = NULL, *acl = NULL;
struct ocfs2_dentry_lock *dl = NULL;
trace_ocfs2_mknod(dir, dentry, dentry->d_name.len, dentry->d_name.name,
@@ -367,12 +366,6 @@ static int ocfs2_mknod(struct inode *dir,
goto leave;
}
- status = posix_acl_create(dir, &inode->i_mode, &default_acl, &acl);
- if (status) {
- mlog_errno(status);
- goto leave;
- }
-
handle = ocfs2_start_trans(osb, ocfs2_mknod_credits(osb->sb,
S_ISDIR(mode),
xattr_credits));
@@ -421,16 +414,8 @@ static int ocfs2_mknod(struct inode *dir,
inc_nlink(dir);
}
- if (default_acl) {
- status = ocfs2_set_acl(handle, inode, new_fe_bh,
- ACL_TYPE_DEFAULT, default_acl,
- meta_ac, data_ac);
- }
- if (!status && acl) {
- status = ocfs2_set_acl(handle, inode, new_fe_bh,
- ACL_TYPE_ACCESS, acl,
- meta_ac, data_ac);
- }
+ status = ocfs2_init_acl(handle, inode, dir, new_fe_bh, parent_fe_bh,
+ meta_ac, data_ac);
if (status < 0) {
mlog_errno(status);
@@ -472,10 +457,6 @@ static int ocfs2_mknod(struct inode *dir,
d_instantiate(dentry, inode);
status = 0;
leave:
- if (default_acl)
- posix_acl_release(default_acl);
- if (acl)
- posix_acl_release(acl);
if (status < 0 && did_quota_inode)
dquot_free_inode(inode);
if (handle)
diff --git a/fs/ocfs2/ocfs2.h b/fs/ocfs2/ocfs2.h
index 7a0126267..e63af7ddf 100644
--- a/fs/ocfs2/ocfs2.h
+++ b/fs/ocfs2/ocfs2.h
@@ -464,6 +464,14 @@ struct ocfs2_super
struct ocfs2_refcount_tree *osb_ref_tree_lru;
struct mutex system_file_mutex;
+
+ /*
+ * OCFS2 needs to schedule several different types of work which
+ * require cluster locking, disk I/O, recovery waits, etc. Since these
+ * types of work tend to be heavy we avoid using the kernel events
+ * workqueue and schedule on our own.
+ */
+ struct workqueue_struct *ocfs2_wq;
};
#define OCFS2_SB(sb) ((struct ocfs2_super *)(sb)->s_fs_info)
@@ -814,10 +822,10 @@ static inline unsigned int ocfs2_page_index_to_clusters(struct super_block *sb,
u32 clusters = pg_index;
unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
- if (unlikely(PAGE_CACHE_SHIFT > cbits))
- clusters = pg_index << (PAGE_CACHE_SHIFT - cbits);
- else if (PAGE_CACHE_SHIFT < cbits)
- clusters = pg_index >> (cbits - PAGE_CACHE_SHIFT);
+ if (unlikely(PAGE_SHIFT > cbits))
+ clusters = pg_index << (PAGE_SHIFT - cbits);
+ else if (PAGE_SHIFT < cbits)
+ clusters = pg_index >> (cbits - PAGE_SHIFT);
return clusters;
}
@@ -831,10 +839,10 @@ static inline pgoff_t ocfs2_align_clusters_to_page_index(struct super_block *sb,
unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
pgoff_t index = clusters;
- if (PAGE_CACHE_SHIFT > cbits) {
- index = (pgoff_t)clusters >> (PAGE_CACHE_SHIFT - cbits);
- } else if (PAGE_CACHE_SHIFT < cbits) {
- index = (pgoff_t)clusters << (cbits - PAGE_CACHE_SHIFT);
+ if (PAGE_SHIFT > cbits) {
+ index = (pgoff_t)clusters >> (PAGE_SHIFT - cbits);
+ } else if (PAGE_SHIFT < cbits) {
+ index = (pgoff_t)clusters << (cbits - PAGE_SHIFT);
}
return index;
@@ -845,8 +853,8 @@ static inline unsigned int ocfs2_pages_per_cluster(struct super_block *sb)
unsigned int cbits = OCFS2_SB(sb)->s_clustersize_bits;
unsigned int pages_per_cluster = 1;
- if (PAGE_CACHE_SHIFT < cbits)
- pages_per_cluster = 1 << (cbits - PAGE_CACHE_SHIFT);
+ if (PAGE_SHIFT < cbits)
+ pages_per_cluster = 1 << (cbits - PAGE_SHIFT);
return pages_per_cluster;
}
diff --git a/fs/ocfs2/ocfs2_trace.h b/fs/ocfs2/ocfs2_trace.h
index 6cb019b7c..f8f5fc5e6 100644
--- a/fs/ocfs2/ocfs2_trace.h
+++ b/fs/ocfs2/ocfs2_trace.h
@@ -1450,28 +1450,20 @@ DEFINE_OCFS2_ULL_ULL_ULL_EVENT(ocfs2_remove_inode_range);
TRACE_EVENT(ocfs2_prepare_inode_for_write,
TP_PROTO(unsigned long long ino, unsigned long long saved_pos,
- int appending, unsigned long count,
- int *direct_io, int *has_refcount),
- TP_ARGS(ino, saved_pos, appending, count, direct_io, has_refcount),
+ unsigned long count),
+ TP_ARGS(ino, saved_pos, count),
TP_STRUCT__entry(
__field(unsigned long long, ino)
__field(unsigned long long, saved_pos)
- __field(int, appending)
__field(unsigned long, count)
- __field(int, direct_io)
- __field(int, has_refcount)
),
TP_fast_assign(
__entry->ino = ino;
__entry->saved_pos = saved_pos;
- __entry->appending = appending;
__entry->count = count;
- __entry->direct_io = direct_io ? *direct_io : -1;
- __entry->has_refcount = has_refcount ? *has_refcount : -1;
),
- TP_printk("%llu %llu %d %lu %d %d", __entry->ino,
- __entry->saved_pos, __entry->appending, __entry->count,
- __entry->direct_io, __entry->has_refcount)
+ TP_printk("%llu %llu %lu", __entry->ino,
+ __entry->saved_pos, __entry->count)
);
DEFINE_OCFS2_INT_EVENT(generic_file_aio_read_ret);
@@ -1540,6 +1532,8 @@ DEFINE_OCFS2_ULL_INT_EVENT(ocfs2_read_locked_inode);
DEFINE_OCFS2_INT_INT_EVENT(ocfs2_check_orphan_recovery_state);
DEFINE_OCFS2_ULL_EVENT(ocfs2_validate_inode_block);
+DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_validate_inode_block);
+DEFINE_OCFS2_ULL_EVENT(ocfs2_filecheck_repair_inode_block);
TRACE_EVENT(ocfs2_inode_is_valid_to_delete,
TP_PROTO(void *task, void *dc_task, unsigned long long ino,
@@ -2035,6 +2029,8 @@ DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_release_dquot);
DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_acquire_dquot);
+DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_get_next_id);
+
DEFINE_OCFS2_UINT_INT_EVENT(ocfs2_mark_dquot_dirty);
/* End of trace events for fs/ocfs2/quota_global.c. */
diff --git a/fs/ocfs2/quota_global.c b/fs/ocfs2/quota_global.c
index 9c9dd30bc..ab6a6cdcf 100644
--- a/fs/ocfs2/quota_global.c
+++ b/fs/ocfs2/quota_global.c
@@ -726,7 +726,7 @@ static int ocfs2_release_dquot(struct dquot *dquot)
dqgrab(dquot);
/* First entry on list -> queue work */
if (llist_add(&OCFS2_DQUOT(dquot)->list, &osb->dquot_drop_list))
- queue_work(ocfs2_wq, &osb->dquot_drop_work);
+ queue_work(osb->ocfs2_wq, &osb->dquot_drop_work);
goto out;
}
status = ocfs2_lock_global_qf(oinfo, 1);
@@ -860,6 +860,37 @@ out:
return status;
}
+static int ocfs2_get_next_id(struct super_block *sb, struct kqid *qid)
+{
+ int type = qid->type;
+ struct ocfs2_mem_dqinfo *info = sb_dqinfo(sb, type)->dqi_priv;
+ int status = 0;
+
+ trace_ocfs2_get_next_id(from_kqid(&init_user_ns, *qid), type);
+ if (!sb_has_quota_loaded(sb, type)) {
+ status = -ESRCH;
+ goto out;
+ }
+ status = ocfs2_lock_global_qf(info, 0);
+ if (status < 0)
+ goto out;
+ status = ocfs2_qinfo_lock(info, 0);
+ if (status < 0)
+ goto out_global;
+ status = qtree_get_next_id(&info->dqi_gi, qid);
+ ocfs2_qinfo_unlock(info, 0);
+out_global:
+ ocfs2_unlock_global_qf(info, 0);
+out:
+ /*
+ * Avoid logging ENOENT since it just means there isn't next ID and
+ * ESRCH which means quota isn't enabled for the filesystem.
+ */
+ if (status && status != -ENOENT && status != -ESRCH)
+ mlog_errno(status);
+ return status;
+}
+
static int ocfs2_mark_dquot_dirty(struct dquot *dquot)
{
unsigned long mask = (1 << (DQ_LASTSET_B + QIF_ILIMITS_B)) |
@@ -968,4 +999,5 @@ const struct dquot_operations ocfs2_quota_operations = {
.write_info = ocfs2_write_info,
.alloc_dquot = ocfs2_alloc_dquot,
.destroy_dquot = ocfs2_destroy_dquot,
+ .get_next_id = ocfs2_get_next_id,
};
diff --git a/fs/ocfs2/refcounttree.c b/fs/ocfs2/refcounttree.c
index 3eff031aa..92bbe93bf 100644
--- a/fs/ocfs2/refcounttree.c
+++ b/fs/ocfs2/refcounttree.c
@@ -2937,16 +2937,16 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
end = i_size_read(inode);
while (offset < end) {
- page_index = offset >> PAGE_CACHE_SHIFT;
- map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
+ page_index = offset >> PAGE_SHIFT;
+ map_end = ((loff_t)page_index + 1) << PAGE_SHIFT;
if (map_end > end)
map_end = end;
/* from, to is the offset within the page. */
- from = offset & (PAGE_CACHE_SIZE - 1);
- to = PAGE_CACHE_SIZE;
- if (map_end & (PAGE_CACHE_SIZE - 1))
- to = map_end & (PAGE_CACHE_SIZE - 1);
+ from = offset & (PAGE_SIZE - 1);
+ to = PAGE_SIZE;
+ if (map_end & (PAGE_SIZE - 1))
+ to = map_end & (PAGE_SIZE - 1);
page = find_or_create_page(mapping, page_index, GFP_NOFS);
if (!page) {
@@ -2956,10 +2956,10 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
}
/*
- * In case PAGE_CACHE_SIZE <= CLUSTER_SIZE, This page
+ * In case PAGE_SIZE <= CLUSTER_SIZE, This page
* can't be dirtied before we CoW it out.
*/
- if (PAGE_CACHE_SIZE <= OCFS2_SB(sb)->s_clustersize)
+ if (PAGE_SIZE <= OCFS2_SB(sb)->s_clustersize)
BUG_ON(PageDirty(page));
if (!PageUptodate(page)) {
@@ -2987,7 +2987,7 @@ int ocfs2_duplicate_clusters_by_page(handle_t *handle,
mark_page_accessed(page);
unlock:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
page = NULL;
offset = map_end;
if (ret)
@@ -3165,8 +3165,8 @@ int ocfs2_cow_sync_writeback(struct super_block *sb,
}
while (offset < end) {
- page_index = offset >> PAGE_CACHE_SHIFT;
- map_end = ((loff_t)page_index + 1) << PAGE_CACHE_SHIFT;
+ page_index = offset >> PAGE_SHIFT;
+ map_end = ((loff_t)page_index + 1) << PAGE_SHIFT;
if (map_end > end)
map_end = end;
@@ -3182,7 +3182,7 @@ int ocfs2_cow_sync_writeback(struct super_block *sb,
mark_page_accessed(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
page = NULL;
offset = map_end;
if (ret)
@@ -4248,20 +4248,12 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
struct inode *inode = d_inode(old_dentry);
struct buffer_head *old_bh = NULL;
struct inode *new_orphan_inode = NULL;
- struct posix_acl *default_acl, *acl;
- umode_t mode;
if (!ocfs2_refcount_tree(OCFS2_SB(inode->i_sb)))
return -EOPNOTSUPP;
- mode = inode->i_mode;
- error = posix_acl_create(dir, &mode, &default_acl, &acl);
- if (error) {
- mlog_errno(error);
- return error;
- }
- error = ocfs2_create_inode_in_orphan(dir, mode,
+ error = ocfs2_create_inode_in_orphan(dir, inode->i_mode,
&new_orphan_inode);
if (error) {
mlog_errno(error);
@@ -4300,16 +4292,11 @@ static int ocfs2_reflink(struct dentry *old_dentry, struct inode *dir,
/* If the security isn't preserved, we need to re-initialize them. */
if (!preserve) {
error = ocfs2_init_security_and_acl(dir, new_orphan_inode,
- &new_dentry->d_name,
- default_acl, acl);
+ &new_dentry->d_name);
if (error)
mlog_errno(error);
}
out:
- if (default_acl)
- posix_acl_release(default_acl);
- if (acl)
- posix_acl_release(acl);
if (!error) {
error = ocfs2_mv_orphaned_inode_to_new(dir, new_orphan_inode,
new_dentry);
diff --git a/fs/ocfs2/resize.c b/fs/ocfs2/resize.c
index 576b9a048..18451e0fa 100644
--- a/fs/ocfs2/resize.c
+++ b/fs/ocfs2/resize.c
@@ -196,7 +196,7 @@ static int update_backups(struct inode * inode, u32 clusters, char *data)
for (i = 0; i < OCFS2_MAX_BACKUP_SUPERBLOCKS; i++) {
blkno = ocfs2_backup_super_blkno(inode->i_sb, i);
cluster = ocfs2_blocks_to_clusters(inode->i_sb, blkno);
- if (cluster > clusters)
+ if (cluster >= clusters)
break;
ret = ocfs2_read_blocks_sync(osb, blkno, 1, &backup);
diff --git a/fs/ocfs2/stackglue.c b/fs/ocfs2/stackglue.c
index 5d965e83b..13219ed73 100644
--- a/fs/ocfs2/stackglue.c
+++ b/fs/ocfs2/stackglue.c
@@ -629,7 +629,8 @@ static struct attribute_group ocfs2_attr_group = {
.attrs = ocfs2_attrs,
};
-static struct kset *ocfs2_kset;
+struct kset *ocfs2_kset;
+EXPORT_SYMBOL_GPL(ocfs2_kset);
static void ocfs2_sysfs_exit(void)
{
diff --git a/fs/ocfs2/stackglue.h b/fs/ocfs2/stackglue.h
index 66334a30c..f2dce10fa 100644
--- a/fs/ocfs2/stackglue.h
+++ b/fs/ocfs2/stackglue.h
@@ -298,4 +298,6 @@ void ocfs2_stack_glue_set_max_proto_version(struct ocfs2_protocol_version *max_p
int ocfs2_stack_glue_register(struct ocfs2_stack_plugin *plugin);
void ocfs2_stack_glue_unregister(struct ocfs2_stack_plugin *plugin);
+extern struct kset *ocfs2_kset;
+
#endif /* STACKGLUE_H */
diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c
index faa136509..d7cae3327 100644
--- a/fs/ocfs2/super.c
+++ b/fs/ocfs2/super.c
@@ -74,17 +74,12 @@
#include "suballoc.h"
#include "buffer_head_io.h"
+#include "filecheck.h"
static struct kmem_cache *ocfs2_inode_cachep;
struct kmem_cache *ocfs2_dquot_cachep;
struct kmem_cache *ocfs2_qf_chunk_cachep;
-/* OCFS2 needs to schedule several different types of work which
- * require cluster locking, disk I/O, recovery waits, etc. Since these
- * types of work tend to be heavy we avoid using the kernel events
- * workqueue and schedule on our own. */
-struct workqueue_struct *ocfs2_wq = NULL;
-
static struct dentry *ocfs2_debugfs_root;
MODULE_AUTHOR("Oracle");
@@ -236,6 +231,7 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
struct ocfs2_recovery_map *rm = osb->recovery_map;
struct ocfs2_orphan_scan *os = &osb->osb_orphan_scan;
int i, out = 0;
+ unsigned long flags;
out += snprintf(buf + out, len - out,
"%10s => Id: %-s Uuid: %-s Gen: 0x%X Label: %-s\n",
@@ -271,14 +267,14 @@ static int ocfs2_osb_dump(struct ocfs2_super *osb, char *buf, int len)
cconn->cc_version.pv_minor);
}
- spin_lock(&osb->dc_task_lock);
+ spin_lock_irqsave(&osb->dc_task_lock, flags);
out += snprintf(buf + out, len - out,
"%10s => Pid: %d Count: %lu WakeSeq: %lu "
"WorkSeq: %lu\n", "DownCnvt",
(osb->dc_task ? task_pid_nr(osb->dc_task) : -1),
osb->blocked_lock_count, osb->dc_wake_sequence,
osb->dc_work_sequence);
- spin_unlock(&osb->dc_task_lock);
+ spin_unlock_irqrestore(&osb->dc_task_lock, flags);
spin_lock(&osb->osb_lock);
out += snprintf(buf + out, len - out, "%10s => Pid: %d Nodes:",
@@ -609,8 +605,8 @@ static unsigned long long ocfs2_max_file_offset(unsigned int bbits,
/*
* We might be limited by page cache size.
*/
- if (bytes > PAGE_CACHE_SIZE) {
- bytes = PAGE_CACHE_SIZE;
+ if (bytes > PAGE_SIZE) {
+ bytes = PAGE_SIZE;
trim = 1;
/*
* Shift by 31 here so that we don't get larger than
@@ -1204,6 +1200,9 @@ static int ocfs2_fill_super(struct super_block *sb, void *data, int silent)
/* Start this when the mount is almost sure of being successful */
ocfs2_orphan_scan_start(osb);
+ /* Create filecheck sysfile /sys/fs/ocfs2/<devname>/filecheck */
+ ocfs2_filecheck_create_sysfs(sb);
+
return status;
read_super_error:
@@ -1608,33 +1607,25 @@ static int __init ocfs2_init(void)
if (status < 0)
goto out2;
- ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
- if (!ocfs2_wq) {
- status = -ENOMEM;
- goto out3;
- }
-
ocfs2_debugfs_root = debugfs_create_dir("ocfs2", NULL);
if (!ocfs2_debugfs_root) {
status = -ENOMEM;
mlog(ML_ERROR, "Unable to create ocfs2 debugfs root.\n");
- goto out4;
+ goto out3;
}
ocfs2_set_locking_protocol();
status = register_quota_format(&ocfs2_quota_format);
if (status < 0)
- goto out4;
+ goto out3;
status = register_filesystem(&ocfs2_fs_type);
if (!status)
return 0;
unregister_quota_format(&ocfs2_quota_format);
-out4:
- destroy_workqueue(ocfs2_wq);
- debugfs_remove(ocfs2_debugfs_root);
out3:
+ debugfs_remove(ocfs2_debugfs_root);
ocfs2_free_mem_caches();
out2:
exit_ocfs2_uptodate_cache();
@@ -1645,11 +1636,6 @@ out1:
static void __exit ocfs2_exit(void)
{
- if (ocfs2_wq) {
- flush_workqueue(ocfs2_wq);
- destroy_workqueue(ocfs2_wq);
- }
-
unregister_quota_format(&ocfs2_quota_format);
debugfs_remove(ocfs2_debugfs_root);
@@ -1667,6 +1653,7 @@ static void ocfs2_put_super(struct super_block *sb)
ocfs2_sync_blockdev(sb);
ocfs2_dismount_volume(sb, 0);
+ ocfs2_filecheck_remove_sysfs(sb);
}
static int ocfs2_statfs(struct dentry *dentry, struct kstatfs *buf)
@@ -1739,8 +1726,8 @@ static void ocfs2_inode_init_once(void *data)
spin_lock_init(&oi->ip_lock);
ocfs2_extent_map_init(&oi->vfs_inode);
INIT_LIST_HEAD(&oi->ip_io_markers);
+ INIT_LIST_HEAD(&oi->ip_unwritten_list);
oi->ip_dir_start_lookup = 0;
- mutex_init(&oi->ip_unaligned_aio);
init_rwsem(&oi->ip_alloc_sem);
init_rwsem(&oi->ip_xattr_sem);
mutex_init(&oi->ip_io_mutex);
@@ -2343,6 +2330,12 @@ static int ocfs2_initialize_super(struct super_block *sb,
}
cleancache_init_shared_fs(sb);
+ osb->ocfs2_wq = create_singlethread_workqueue("ocfs2_wq");
+ if (!osb->ocfs2_wq) {
+ status = -ENOMEM;
+ mlog_errno(status);
+ }
+
bail:
return status;
}
@@ -2530,6 +2523,12 @@ static void ocfs2_delete_osb(struct ocfs2_super *osb)
{
/* This function assumes that the caller has the main osb resource */
+ /* ocfs2_initializer_super have already created this workqueue */
+ if (osb->ocfs2_wq) {
+ flush_workqueue(osb->ocfs2_wq);
+ destroy_workqueue(osb->ocfs2_wq);
+ }
+
ocfs2_free_slot_info(osb);
kfree(osb->osb_orphan_wipes);
diff --git a/fs/ocfs2/super.h b/fs/ocfs2/super.h
index b477d0b1c..b023e4f3d 100644
--- a/fs/ocfs2/super.h
+++ b/fs/ocfs2/super.h
@@ -26,8 +26,6 @@
#ifndef OCFS2_SUPER_H
#define OCFS2_SUPER_H
-extern struct workqueue_struct *ocfs2_wq;
-
int ocfs2_publish_get_mount_state(struct ocfs2_super *osb,
int node_num);
diff --git a/fs/ocfs2/xattr.c b/fs/ocfs2/xattr.c
index 7d3d979f5..f19b7381a 100644
--- a/fs/ocfs2/xattr.c
+++ b/fs/ocfs2/xattr.c
@@ -7216,12 +7216,10 @@ out:
*/
int ocfs2_init_security_and_acl(struct inode *dir,
struct inode *inode,
- const struct qstr *qstr,
- struct posix_acl *default_acl,
- struct posix_acl *acl)
+ const struct qstr *qstr)
{
- struct buffer_head *dir_bh = NULL;
int ret = 0;
+ struct buffer_head *dir_bh = NULL;
ret = ocfs2_init_security_get(inode, dir, qstr, NULL);
if (ret) {
@@ -7234,11 +7232,9 @@ int ocfs2_init_security_and_acl(struct inode *dir,
mlog_errno(ret);
goto leave;
}
-
- if (!ret && default_acl)
- ret = ocfs2_iop_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
- if (!ret && acl)
- ret = ocfs2_iop_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ ret = ocfs2_init_acl(NULL, inode, dir, NULL, dir_bh, NULL, NULL);
+ if (ret)
+ mlog_errno(ret);
ocfs2_inode_unlock(dir, 0);
brelse(dir_bh);
diff --git a/fs/ocfs2/xattr.h b/fs/ocfs2/xattr.h
index f10d5b93c..1633cc15e 100644
--- a/fs/ocfs2/xattr.h
+++ b/fs/ocfs2/xattr.h
@@ -94,7 +94,5 @@ int ocfs2_reflink_xattrs(struct inode *old_inode,
bool preserve_security);
int ocfs2_init_security_and_acl(struct inode *dir,
struct inode *inode,
- const struct qstr *qstr,
- struct posix_acl *default_acl,
- struct posix_acl *acl);
+ const struct qstr *qstr);
#endif /* OCFS2_XATTR_H */
diff --git a/fs/open.c b/fs/open.c
index e6933ee33..abb0b1bfa 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -845,16 +845,12 @@ EXPORT_SYMBOL(file_path);
int vfs_open(const struct path *path, struct file *file,
const struct cred *cred)
{
- struct dentry *dentry = path->dentry;
- struct inode *inode = dentry->d_inode;
+ struct inode *inode = vfs_select_inode(path->dentry, file->f_flags);
- file->f_path = *path;
- if (dentry->d_flags & DCACHE_OP_SELECT_INODE) {
- inode = dentry->d_op->d_select_inode(dentry, file->f_flags);
- if (IS_ERR(inode))
- return PTR_ERR(inode);
- }
+ if (IS_ERR(inode))
+ return PTR_ERR(inode);
+ file->f_path = *path;
return do_dentry_open(file, inode, NULL, cred);
}
diff --git a/fs/orangefs/Kconfig b/fs/orangefs/Kconfig
new file mode 100644
index 000000000..1554c0248
--- /dev/null
+++ b/fs/orangefs/Kconfig
@@ -0,0 +1,6 @@
+config ORANGEFS_FS
+ tristate "ORANGEFS (Powered by PVFS) support"
+ select FS_POSIX_ACL
+ help
+ Orange is a parallel file system designed for use on high end
+ computing (HEC) systems.
diff --git a/fs/orangefs/Makefile b/fs/orangefs/Makefile
new file mode 100644
index 000000000..a9d6a968f
--- /dev/null
+++ b/fs/orangefs/Makefile
@@ -0,0 +1,10 @@
+#
+# Makefile for the ORANGEFS filesystem.
+#
+
+obj-$(CONFIG_ORANGEFS_FS) += orangefs.o
+
+orangefs-objs := acl.o file.o orangefs-cache.o orangefs-utils.o xattr.o \
+ dcache.o inode.o orangefs-sysfs.o orangefs-mod.o super.o \
+ devorangefs-req.o namei.o symlink.o dir.o orangefs-bufmap.o \
+ orangefs-debugfs.o waitqueue.o
diff --git a/fs/orangefs/acl.c b/fs/orangefs/acl.c
new file mode 100644
index 000000000..03f89dbb2
--- /dev/null
+++ b/fs/orangefs/acl.c
@@ -0,0 +1,175 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/fs_struct.h>
+
+struct posix_acl *orangefs_get_acl(struct inode *inode, int type)
+{
+ struct posix_acl *acl;
+ int ret;
+ char *key = NULL, *value = NULL;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ key = ORANGEFS_XATTR_NAME_ACL_ACCESS;
+ break;
+ case ACL_TYPE_DEFAULT:
+ key = ORANGEFS_XATTR_NAME_ACL_DEFAULT;
+ break;
+ default:
+ gossip_err("orangefs_get_acl: bogus value of type %d\n", type);
+ return ERR_PTR(-EINVAL);
+ }
+ /*
+ * Rather than incurring a network call just to determine the exact
+ * length of the attribute, I just allocate a max length to save on
+ * the network call. Conceivably, we could pass NULL to
+ * orangefs_inode_getxattr() to probe the length of the value, but
+ * I don't do that for now.
+ */
+ value = kmalloc(ORANGEFS_MAX_XATTR_VALUELEN, GFP_KERNEL);
+ if (value == NULL)
+ return ERR_PTR(-ENOMEM);
+
+ gossip_debug(GOSSIP_ACL_DEBUG,
+ "inode %pU, key %s, type %d\n",
+ get_khandle_from_ino(inode),
+ key,
+ type);
+ ret = orangefs_inode_getxattr(inode,
+ "",
+ key,
+ value,
+ ORANGEFS_MAX_XATTR_VALUELEN);
+ /* if the key exists, convert it to an in-memory rep */
+ if (ret > 0) {
+ acl = posix_acl_from_xattr(&init_user_ns, value, ret);
+ } else if (ret == -ENODATA || ret == -ENOSYS) {
+ acl = NULL;
+ } else {
+ gossip_err("inode %pU retrieving acl's failed with error %d\n",
+ get_khandle_from_ino(inode),
+ ret);
+ acl = ERR_PTR(ret);
+ }
+ /* kfree(NULL) is safe, so don't worry if value ever got used */
+ kfree(value);
+ return acl;
+}
+
+int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ int error = 0;
+ void *value = NULL;
+ size_t size = 0;
+ const char *name = NULL;
+
+ switch (type) {
+ case ACL_TYPE_ACCESS:
+ name = ORANGEFS_XATTR_NAME_ACL_ACCESS;
+ if (acl) {
+ umode_t mode = inode->i_mode;
+ /*
+ * can we represent this with the traditional file
+ * mode permission bits?
+ */
+ error = posix_acl_equiv_mode(acl, &mode);
+ if (error < 0) {
+ gossip_err("%s: posix_acl_equiv_mode err: %d\n",
+ __func__,
+ error);
+ return error;
+ }
+
+ if (inode->i_mode != mode)
+ SetModeFlag(orangefs_inode);
+ inode->i_mode = mode;
+ mark_inode_dirty_sync(inode);
+ if (error == 0)
+ acl = NULL;
+ }
+ break;
+ case ACL_TYPE_DEFAULT:
+ name = ORANGEFS_XATTR_NAME_ACL_DEFAULT;
+ break;
+ default:
+ gossip_err("%s: invalid type %d!\n", __func__, type);
+ return -EINVAL;
+ }
+
+ gossip_debug(GOSSIP_ACL_DEBUG,
+ "%s: inode %pU, key %s type %d\n",
+ __func__, get_khandle_from_ino(inode),
+ name,
+ type);
+
+ if (acl) {
+ size = posix_acl_xattr_size(acl->a_count);
+ value = kmalloc(size, GFP_KERNEL);
+ if (!value)
+ return -ENOMEM;
+
+ error = posix_acl_to_xattr(&init_user_ns, acl, value, size);
+ if (error < 0)
+ goto out;
+ }
+
+ gossip_debug(GOSSIP_ACL_DEBUG,
+ "%s: name %s, value %p, size %zd, acl %p\n",
+ __func__, name, value, size, acl);
+ /*
+ * Go ahead and set the extended attribute now. NOTE: Suppose acl
+ * was NULL, then value will be NULL and size will be 0 and that
+ * will xlate to a removexattr. However, we don't want removexattr
+ * complain if attributes does not exist.
+ */
+ error = orangefs_inode_setxattr(inode, "", name, value, size, 0);
+
+out:
+ kfree(value);
+ if (!error)
+ set_cached_acl(inode, type, acl);
+ return error;
+}
+
+int orangefs_init_acl(struct inode *inode, struct inode *dir)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct posix_acl *default_acl, *acl;
+ umode_t mode = inode->i_mode;
+ int error = 0;
+
+ ClearModeFlag(orangefs_inode);
+
+ error = posix_acl_create(dir, &mode, &default_acl, &acl);
+ if (error)
+ return error;
+
+ if (default_acl) {
+ error = orangefs_set_acl(inode, default_acl, ACL_TYPE_DEFAULT);
+ posix_acl_release(default_acl);
+ }
+
+ if (acl) {
+ if (!error)
+ error = orangefs_set_acl(inode, acl, ACL_TYPE_ACCESS);
+ posix_acl_release(acl);
+ }
+
+ /* If mode of the inode was changed, then do a forcible ->setattr */
+ if (mode != inode->i_mode) {
+ SetModeFlag(orangefs_inode);
+ inode->i_mode = mode;
+ orangefs_flush_inode(inode);
+ }
+
+ return error;
+}
diff --git a/fs/orangefs/dcache.c b/fs/orangefs/dcache.c
new file mode 100644
index 000000000..5dfc4f3cf
--- /dev/null
+++ b/fs/orangefs/dcache.c
@@ -0,0 +1,138 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * Implementation of dentry (directory cache) functions.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+
+/* Returns 1 if dentry can still be trusted, else 0. */
+static int orangefs_revalidate_lookup(struct dentry *dentry)
+{
+ struct dentry *parent_dentry = dget_parent(dentry);
+ struct inode *parent_inode = parent_dentry->d_inode;
+ struct orangefs_inode_s *parent = ORANGEFS_I(parent_inode);
+ struct inode *inode = dentry->d_inode;
+ struct orangefs_kernel_op_s *new_op;
+ int ret = 0;
+ int err = 0;
+
+ gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: attempting lookup.\n", __func__);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP);
+ if (!new_op)
+ goto out_put_parent;
+
+ new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
+ new_op->upcall.req.lookup.parent_refn = parent->refn;
+ strncpy(new_op->upcall.req.lookup.d_name,
+ dentry->d_name.name,
+ ORANGEFS_NAME_MAX);
+
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s:%s:%d interrupt flag [%d]\n",
+ __FILE__,
+ __func__,
+ __LINE__,
+ get_interruptible_flag(parent_inode));
+
+ err = service_operation(new_op, "orangefs_lookup",
+ get_interruptible_flag(parent_inode));
+
+ /* Positive dentry: reject if error or not the same inode. */
+ if (inode) {
+ if (err) {
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s:%s:%d lookup failure.\n",
+ __FILE__, __func__, __LINE__);
+ goto out_drop;
+ }
+ if (!match_handle(new_op->downcall.resp.lookup.refn.khandle,
+ inode)) {
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s:%s:%d no match.\n",
+ __FILE__, __func__, __LINE__);
+ goto out_drop;
+ }
+
+ /* Negative dentry: reject if success or error other than ENOENT. */
+ } else {
+ gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: negative dentry.\n",
+ __func__);
+ if (!err || err != -ENOENT) {
+ if (new_op->downcall.status != 0)
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s:%s:%d lookup failure.\n",
+ __FILE__, __func__, __LINE__);
+ goto out_drop;
+ }
+ }
+
+ ret = 1;
+out_release_op:
+ op_release(new_op);
+out_put_parent:
+ dput(parent_dentry);
+ return ret;
+out_drop:
+ gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d revalidate failed\n",
+ __FILE__, __func__, __LINE__);
+ goto out_release_op;
+}
+
+/*
+ * Verify that dentry is valid.
+ *
+ * Should return 1 if dentry can still be trusted, else 0.
+ */
+static int orangefs_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ int ret;
+
+ if (flags & LOOKUP_RCU)
+ return -ECHILD;
+
+ gossip_debug(GOSSIP_DCACHE_DEBUG, "%s: called on dentry %p.\n",
+ __func__, dentry);
+
+ /* skip root handle lookups. */
+ if (dentry->d_inode && is_root_handle(dentry->d_inode))
+ return 1;
+
+ /*
+ * If this passes, the positive dentry still exists or the negative
+ * dentry still does not exist.
+ */
+ if (!orangefs_revalidate_lookup(dentry))
+ return 0;
+
+ /* We do not need to continue with negative dentries. */
+ if (!dentry->d_inode)
+ goto out;
+
+ /* Now we must perform a getattr to validate the inode contents. */
+
+ ret = orangefs_inode_check_changed(dentry->d_inode);
+ if (ret < 0) {
+ gossip_debug(GOSSIP_DCACHE_DEBUG, "%s:%s:%d getattr failure.\n",
+ __FILE__, __func__, __LINE__);
+ return 0;
+ }
+ if (ret == 0)
+ return 0;
+
+out:
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s: negative dentry or positive dentry and inode valid.\n",
+ __func__);
+ return 1;
+}
+
+const struct dentry_operations orangefs_dentry_operations = {
+ .d_revalidate = orangefs_d_revalidate,
+};
diff --git a/fs/orangefs/devorangefs-req.c b/fs/orangefs/devorangefs-req.c
new file mode 100644
index 000000000..db170beba
--- /dev/null
+++ b/fs/orangefs/devorangefs-req.c
@@ -0,0 +1,943 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * Changes by Acxiom Corporation to add protocol version to kernel
+ * communication, Copyright Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-dev-proto.h"
+#include "orangefs-bufmap.h"
+
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+/* this file implements the /dev/pvfs2-req device node */
+
+static int open_access_count;
+
+#define DUMP_DEVICE_ERROR() \
+do { \
+ gossip_err("*****************************************************\n");\
+ gossip_err("ORANGEFS Device Error: You cannot open the device file "); \
+ gossip_err("\n/dev/%s more than once. Please make sure that\nthere " \
+ "are no ", ORANGEFS_REQDEVICE_NAME); \
+ gossip_err("instances of a program using this device\ncurrently " \
+ "running. (You must verify this!)\n"); \
+ gossip_err("For example, you can use the lsof program as follows:\n");\
+ gossip_err("'lsof | grep %s' (run this as root)\n", \
+ ORANGEFS_REQDEVICE_NAME); \
+ gossip_err(" open_access_count = %d\n", open_access_count); \
+ gossip_err("*****************************************************\n");\
+} while (0)
+
+static int hash_func(__u64 tag, int table_size)
+{
+ return do_div(tag, (unsigned int)table_size);
+}
+
+static void orangefs_devreq_add_op(struct orangefs_kernel_op_s *op)
+{
+ int index = hash_func(op->tag, hash_table_size);
+
+ list_add_tail(&op->list, &htable_ops_in_progress[index]);
+}
+
+/*
+ * find the op with this tag and remove it from the in progress
+ * hash table.
+ */
+static struct orangefs_kernel_op_s *orangefs_devreq_remove_op(__u64 tag)
+{
+ struct orangefs_kernel_op_s *op, *next;
+ int index;
+
+ index = hash_func(tag, hash_table_size);
+
+ spin_lock(&htable_ops_in_progress_lock);
+ list_for_each_entry_safe(op,
+ next,
+ &htable_ops_in_progress[index],
+ list) {
+ if (op->tag == tag && !op_state_purged(op) &&
+ !op_state_given_up(op)) {
+ list_del_init(&op->list);
+ spin_unlock(&htable_ops_in_progress_lock);
+ return op;
+ }
+ }
+
+ spin_unlock(&htable_ops_in_progress_lock);
+ return NULL;
+}
+
+/* Returns whether any FS are still pending remounted */
+static int mark_all_pending_mounts(void)
+{
+ int unmounted = 1;
+ struct orangefs_sb_info_s *orangefs_sb = NULL;
+
+ spin_lock(&orangefs_superblocks_lock);
+ list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
+ /* All of these file system require a remount */
+ orangefs_sb->mount_pending = 1;
+ unmounted = 0;
+ }
+ spin_unlock(&orangefs_superblocks_lock);
+ return unmounted;
+}
+
+/*
+ * Determine if a given file system needs to be remounted or not
+ * Returns -1 on error
+ * 0 if already mounted
+ * 1 if needs remount
+ */
+static int fs_mount_pending(__s32 fsid)
+{
+ int mount_pending = -1;
+ struct orangefs_sb_info_s *orangefs_sb = NULL;
+
+ spin_lock(&orangefs_superblocks_lock);
+ list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
+ if (orangefs_sb->fs_id == fsid) {
+ mount_pending = orangefs_sb->mount_pending;
+ break;
+ }
+ }
+ spin_unlock(&orangefs_superblocks_lock);
+ return mount_pending;
+}
+
+static int orangefs_devreq_open(struct inode *inode, struct file *file)
+{
+ int ret = -EINVAL;
+
+ if (!(file->f_flags & O_NONBLOCK)) {
+ gossip_err("%s: device cannot be opened in blocking mode\n",
+ __func__);
+ goto out;
+ }
+ ret = -EACCES;
+ gossip_debug(GOSSIP_DEV_DEBUG, "client-core: opening device\n");
+ mutex_lock(&devreq_mutex);
+
+ if (open_access_count == 0) {
+ open_access_count = 1;
+ ret = 0;
+ } else {
+ DUMP_DEVICE_ERROR();
+ }
+ mutex_unlock(&devreq_mutex);
+
+out:
+
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "pvfs2-client-core: open device complete (ret = %d)\n",
+ ret);
+ return ret;
+}
+
+/* Function for read() callers into the device */
+static ssize_t orangefs_devreq_read(struct file *file,
+ char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct orangefs_kernel_op_s *op, *temp;
+ __s32 proto_ver = ORANGEFS_KERNEL_PROTO_VERSION;
+ static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
+ struct orangefs_kernel_op_s *cur_op = NULL;
+ unsigned long ret;
+
+ /* We do not support blocking IO. */
+ if (!(file->f_flags & O_NONBLOCK)) {
+ gossip_err("%s: blocking read from client-core.\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ /*
+ * The client will do an ioctl to find MAX_DEV_REQ_UPSIZE, then
+ * always read with that size buffer.
+ */
+ if (count != MAX_DEV_REQ_UPSIZE) {
+ gossip_err("orangefs: client-core tried to read wrong size\n");
+ return -EINVAL;
+ }
+
+restart:
+ /* Get next op (if any) from top of list. */
+ spin_lock(&orangefs_request_list_lock);
+ list_for_each_entry_safe(op, temp, &orangefs_request_list, list) {
+ __s32 fsid;
+ /* This lock is held past the end of the loop when we break. */
+ spin_lock(&op->lock);
+ if (unlikely(op_state_purged(op) || op_state_given_up(op))) {
+ spin_unlock(&op->lock);
+ continue;
+ }
+
+ fsid = fsid_of_op(op);
+ if (fsid != ORANGEFS_FS_ID_NULL) {
+ int ret;
+ /* Skip ops whose filesystem needs to be mounted. */
+ ret = fs_mount_pending(fsid);
+ if (ret == 1) {
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: mount pending, skipping op tag "
+ "%llu %s\n",
+ __func__,
+ llu(op->tag),
+ get_opname_string(op));
+ spin_unlock(&op->lock);
+ continue;
+ /*
+ * Skip ops whose filesystem we don't know about unless
+ * it is being mounted.
+ */
+ /* XXX: is there a better way to detect this? */
+ } else if (ret == -1 &&
+ !(op->upcall.type ==
+ ORANGEFS_VFS_OP_FS_MOUNT ||
+ op->upcall.type ==
+ ORANGEFS_VFS_OP_GETATTR)) {
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "orangefs: skipping op tag %llu %s\n",
+ llu(op->tag), get_opname_string(op));
+ gossip_err(
+ "orangefs: ERROR: fs_mount_pending %d\n",
+ fsid);
+ spin_unlock(&op->lock);
+ continue;
+ }
+ }
+ /*
+ * Either this op does not pertain to a filesystem, is mounting
+ * a filesystem, or pertains to a mounted filesystem. Let it
+ * through.
+ */
+ cur_op = op;
+ break;
+ }
+
+ /*
+ * At this point we either have a valid op and can continue or have not
+ * found an op and must ask the client to try again later.
+ */
+ if (!cur_op) {
+ spin_unlock(&orangefs_request_list_lock);
+ return -EAGAIN;
+ }
+
+ gossip_debug(GOSSIP_DEV_DEBUG, "%s: reading op tag %llu %s\n",
+ __func__,
+ llu(cur_op->tag),
+ get_opname_string(cur_op));
+
+ /*
+ * Such an op should never be on the list in the first place. If so, we
+ * will abort.
+ */
+ if (op_state_in_progress(cur_op) || op_state_serviced(cur_op)) {
+ gossip_err("orangefs: ERROR: Current op already queued.\n");
+ list_del_init(&cur_op->list);
+ spin_unlock(&cur_op->lock);
+ spin_unlock(&orangefs_request_list_lock);
+ return -EAGAIN;
+ }
+
+ list_del_init(&cur_op->list);
+ spin_unlock(&orangefs_request_list_lock);
+
+ spin_unlock(&cur_op->lock);
+
+ /* Push the upcall out. */
+ ret = copy_to_user(buf, &proto_ver, sizeof(__s32));
+ if (ret != 0)
+ goto error;
+ ret = copy_to_user(buf+sizeof(__s32), &magic, sizeof(__s32));
+ if (ret != 0)
+ goto error;
+ ret = copy_to_user(buf+2 * sizeof(__s32), &cur_op->tag, sizeof(__u64));
+ if (ret != 0)
+ goto error;
+ ret = copy_to_user(buf+2*sizeof(__s32)+sizeof(__u64), &cur_op->upcall,
+ sizeof(struct orangefs_upcall_s));
+ if (ret != 0)
+ goto error;
+
+ spin_lock(&htable_ops_in_progress_lock);
+ spin_lock(&cur_op->lock);
+ if (unlikely(op_state_given_up(cur_op))) {
+ spin_unlock(&cur_op->lock);
+ spin_unlock(&htable_ops_in_progress_lock);
+ complete(&cur_op->waitq);
+ goto restart;
+ }
+
+ /*
+ * Set the operation to be in progress and move it between lists since
+ * it has been sent to the client.
+ */
+ set_op_state_inprogress(cur_op);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: 1 op:%s: op_state:%d: process:%s:\n",
+ __func__,
+ get_opname_string(cur_op),
+ cur_op->op_state,
+ current->comm);
+ orangefs_devreq_add_op(cur_op);
+ spin_unlock(&cur_op->lock);
+ spin_unlock(&htable_ops_in_progress_lock);
+
+ /* The client only asks to read one size buffer. */
+ return MAX_DEV_REQ_UPSIZE;
+error:
+ /*
+ * We were unable to copy the op data to the client. Put the op back in
+ * list. If client has crashed, the op will be purged later when the
+ * device is released.
+ */
+ gossip_err("orangefs: Failed to copy data to user space\n");
+ spin_lock(&orangefs_request_list_lock);
+ spin_lock(&cur_op->lock);
+ if (likely(!op_state_given_up(cur_op))) {
+ set_op_state_waiting(cur_op);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: 2 op:%s: op_state:%d: process:%s:\n",
+ __func__,
+ get_opname_string(cur_op),
+ cur_op->op_state,
+ current->comm);
+ list_add(&cur_op->list, &orangefs_request_list);
+ spin_unlock(&cur_op->lock);
+ } else {
+ spin_unlock(&cur_op->lock);
+ complete(&cur_op->waitq);
+ }
+ spin_unlock(&orangefs_request_list_lock);
+ return -EFAULT;
+}
+
+/*
+ * Function for writev() callers into the device.
+ *
+ * Userspace should have written:
+ * - __u32 version
+ * - __u32 magic
+ * - __u64 tag
+ * - struct orangefs_downcall_s
+ * - trailer buffer (in the case of READDIR operations)
+ */
+static ssize_t orangefs_devreq_write_iter(struct kiocb *iocb,
+ struct iov_iter *iter)
+{
+ ssize_t ret;
+ struct orangefs_kernel_op_s *op = NULL;
+ struct {
+ __u32 version;
+ __u32 magic;
+ __u64 tag;
+ } head;
+ int total = ret = iov_iter_count(iter);
+ int n;
+ int downcall_size = sizeof(struct orangefs_downcall_s);
+ int head_size = sizeof(head);
+
+ gossip_debug(GOSSIP_DEV_DEBUG, "%s: total:%d: ret:%zd:\n",
+ __func__,
+ total,
+ ret);
+
+ if (total < MAX_DEV_REQ_DOWNSIZE) {
+ gossip_err("%s: total:%d: must be at least:%u:\n",
+ __func__,
+ total,
+ (unsigned int) MAX_DEV_REQ_DOWNSIZE);
+ return -EFAULT;
+ }
+
+ n = copy_from_iter(&head, head_size, iter);
+ if (n < head_size) {
+ gossip_err("%s: failed to copy head.\n", __func__);
+ return -EFAULT;
+ }
+
+ if (head.version < ORANGEFS_MINIMUM_USERSPACE_VERSION) {
+ gossip_err("%s: userspace claims version"
+ "%d, minimum version required: %d.\n",
+ __func__,
+ head.version,
+ ORANGEFS_MINIMUM_USERSPACE_VERSION);
+ return -EPROTO;
+ }
+
+ if (head.magic != ORANGEFS_DEVREQ_MAGIC) {
+ gossip_err("Error: Device magic number does not match.\n");
+ return -EPROTO;
+ }
+
+ /* remove the op from the in progress hash table */
+ op = orangefs_devreq_remove_op(head.tag);
+ if (!op) {
+ gossip_err("WARNING: No one's waiting for tag %llu\n",
+ llu(head.tag));
+ return ret;
+ }
+
+ n = copy_from_iter(&op->downcall, downcall_size, iter);
+ if (n != downcall_size) {
+ gossip_err("%s: failed to copy downcall.\n", __func__);
+ goto Efault;
+ }
+
+ if (op->downcall.status)
+ goto wakeup;
+
+ /*
+ * We've successfully peeled off the head and the downcall.
+ * Something has gone awry if total doesn't equal the
+ * sum of head_size, downcall_size and trailer_size.
+ */
+ if ((head_size + downcall_size + op->downcall.trailer_size) != total) {
+ gossip_err("%s: funky write, head_size:%d"
+ ": downcall_size:%d: trailer_size:%lld"
+ ": total size:%d:\n",
+ __func__,
+ head_size,
+ downcall_size,
+ op->downcall.trailer_size,
+ total);
+ goto Efault;
+ }
+
+ /* Only READDIR operations should have trailers. */
+ if ((op->downcall.type != ORANGEFS_VFS_OP_READDIR) &&
+ (op->downcall.trailer_size != 0)) {
+ gossip_err("%s: %x operation with trailer.",
+ __func__,
+ op->downcall.type);
+ goto Efault;
+ }
+
+ /* READDIR operations should always have trailers. */
+ if ((op->downcall.type == ORANGEFS_VFS_OP_READDIR) &&
+ (op->downcall.trailer_size == 0)) {
+ gossip_err("%s: %x operation with no trailer.",
+ __func__,
+ op->downcall.type);
+ goto Efault;
+ }
+
+ if (op->downcall.type != ORANGEFS_VFS_OP_READDIR)
+ goto wakeup;
+
+ op->downcall.trailer_buf =
+ vmalloc(op->downcall.trailer_size);
+ if (op->downcall.trailer_buf == NULL) {
+ gossip_err("%s: failed trailer vmalloc.\n",
+ __func__);
+ goto Enomem;
+ }
+ memset(op->downcall.trailer_buf, 0, op->downcall.trailer_size);
+ n = copy_from_iter(op->downcall.trailer_buf,
+ op->downcall.trailer_size,
+ iter);
+ if (n != op->downcall.trailer_size) {
+ gossip_err("%s: failed to copy trailer.\n", __func__);
+ vfree(op->downcall.trailer_buf);
+ goto Efault;
+ }
+
+wakeup:
+ /*
+ * Return to vfs waitqueue, and back to service_operation
+ * through wait_for_matching_downcall.
+ */
+ spin_lock(&op->lock);
+ if (unlikely(op_is_cancel(op))) {
+ spin_unlock(&op->lock);
+ put_cancel(op);
+ } else if (unlikely(op_state_given_up(op))) {
+ spin_unlock(&op->lock);
+ complete(&op->waitq);
+ } else {
+ set_op_state_serviced(op);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: op:%s: op_state:%d: process:%s:\n",
+ __func__,
+ get_opname_string(op),
+ op->op_state,
+ current->comm);
+ spin_unlock(&op->lock);
+ }
+ return ret;
+
+Efault:
+ op->downcall.status = -(ORANGEFS_ERROR_BIT | 9);
+ ret = -EFAULT;
+ goto wakeup;
+
+Enomem:
+ op->downcall.status = -(ORANGEFS_ERROR_BIT | 8);
+ ret = -ENOMEM;
+ goto wakeup;
+}
+
+/*
+ * NOTE: gets called when the last reference to this device is dropped.
+ * Using the open_access_count variable, we enforce a reference count
+ * on this file so that it can be opened by only one process at a time.
+ * the devreq_mutex is used to make sure all i/o has completed
+ * before we call orangefs_bufmap_finalize, and similar such tricky
+ * situations
+ */
+static int orangefs_devreq_release(struct inode *inode, struct file *file)
+{
+ int unmounted = 0;
+
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s:pvfs2-client-core: exiting, closing device\n",
+ __func__);
+
+ mutex_lock(&devreq_mutex);
+ orangefs_bufmap_finalize();
+
+ open_access_count = -1;
+
+ unmounted = mark_all_pending_mounts();
+ gossip_debug(GOSSIP_DEV_DEBUG, "ORANGEFS Device Close: Filesystem(s) %s\n",
+ (unmounted ? "UNMOUNTED" : "MOUNTED"));
+
+ purge_waiting_ops();
+ purge_inprogress_ops();
+
+ orangefs_bufmap_run_down();
+
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "pvfs2-client-core: device close complete\n");
+ open_access_count = 0;
+ mutex_unlock(&devreq_mutex);
+ return 0;
+}
+
+int is_daemon_in_service(void)
+{
+ int in_service;
+
+ /*
+ * What this function does is checks if client-core is alive
+ * based on the access count we maintain on the device.
+ */
+ mutex_lock(&devreq_mutex);
+ in_service = open_access_count == 1 ? 0 : -EIO;
+ mutex_unlock(&devreq_mutex);
+ return in_service;
+}
+
+bool __is_daemon_in_service(void)
+{
+ return open_access_count == 1;
+}
+
+static inline long check_ioctl_command(unsigned int command)
+{
+ /* Check for valid ioctl codes */
+ if (_IOC_TYPE(command) != ORANGEFS_DEV_MAGIC) {
+ gossip_err("device ioctl magic numbers don't match! Did you rebuild pvfs2-client-core/libpvfs2? [cmd %x, magic %x != %x]\n",
+ command,
+ _IOC_TYPE(command),
+ ORANGEFS_DEV_MAGIC);
+ return -EINVAL;
+ }
+ /* and valid ioctl commands */
+ if (_IOC_NR(command) >= ORANGEFS_DEV_MAXNR || _IOC_NR(command) <= 0) {
+ gossip_err("Invalid ioctl command number [%d >= %d]\n",
+ _IOC_NR(command), ORANGEFS_DEV_MAXNR);
+ return -ENOIOCTLCMD;
+ }
+ return 0;
+}
+
+static long dispatch_ioctl_command(unsigned int command, unsigned long arg)
+{
+ static __s32 magic = ORANGEFS_DEVREQ_MAGIC;
+ static __s32 max_up_size = MAX_DEV_REQ_UPSIZE;
+ static __s32 max_down_size = MAX_DEV_REQ_DOWNSIZE;
+ struct ORANGEFS_dev_map_desc user_desc;
+ int ret = 0;
+ struct dev_mask_info_s mask_info = { 0 };
+ struct dev_mask2_info_s mask2_info = { 0, 0 };
+ int upstream_kmod = 1;
+ struct orangefs_sb_info_s *orangefs_sb;
+
+ /* mtmoore: add locking here */
+
+ switch (command) {
+ case ORANGEFS_DEV_GET_MAGIC:
+ return ((put_user(magic, (__s32 __user *) arg) == -EFAULT) ?
+ -EIO :
+ 0);
+ case ORANGEFS_DEV_GET_MAX_UPSIZE:
+ return ((put_user(max_up_size,
+ (__s32 __user *) arg) == -EFAULT) ?
+ -EIO :
+ 0);
+ case ORANGEFS_DEV_GET_MAX_DOWNSIZE:
+ return ((put_user(max_down_size,
+ (__s32 __user *) arg) == -EFAULT) ?
+ -EIO :
+ 0);
+ case ORANGEFS_DEV_MAP:
+ ret = copy_from_user(&user_desc,
+ (struct ORANGEFS_dev_map_desc __user *)
+ arg,
+ sizeof(struct ORANGEFS_dev_map_desc));
+ /* WTF -EIO and not -EFAULT? */
+ return ret ? -EIO : orangefs_bufmap_initialize(&user_desc);
+ case ORANGEFS_DEV_REMOUNT_ALL:
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: got ORANGEFS_DEV_REMOUNT_ALL\n",
+ __func__);
+
+ /*
+ * remount all mounted orangefs volumes to regain the lost
+ * dynamic mount tables (if any) -- NOTE: this is done
+ * without keeping the superblock list locked due to the
+ * upcall/downcall waiting. also, the request mutex is
+ * used to ensure that no operations will be serviced until
+ * all of the remounts are serviced (to avoid ops between
+ * mounts to fail)
+ */
+ ret = mutex_lock_interruptible(&request_mutex);
+ if (ret < 0)
+ return ret;
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: priority remount in progress\n",
+ __func__);
+ spin_lock(&orangefs_superblocks_lock);
+ list_for_each_entry(orangefs_sb, &orangefs_superblocks, list) {
+ /*
+ * We have to drop the spinlock, so entries can be
+ * removed. They can't be freed, though, so we just
+ * keep the forward pointers and zero the back ones -
+ * that way we can get to the rest of the list.
+ */
+ if (!orangefs_sb->list.prev)
+ continue;
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: Remounting SB %p\n",
+ __func__,
+ orangefs_sb);
+
+ spin_unlock(&orangefs_superblocks_lock);
+ ret = orangefs_remount(orangefs_sb);
+ spin_lock(&orangefs_superblocks_lock);
+ if (ret) {
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "SB %p remount failed\n",
+ orangefs_sb);
+ break;
+ }
+ }
+ spin_unlock(&orangefs_superblocks_lock);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: priority remount complete\n",
+ __func__);
+ mutex_unlock(&request_mutex);
+ return ret;
+
+ case ORANGEFS_DEV_UPSTREAM:
+ ret = copy_to_user((void __user *)arg,
+ &upstream_kmod,
+ sizeof(upstream_kmod));
+
+ if (ret != 0)
+ return -EIO;
+ else
+ return ret;
+
+ case ORANGEFS_DEV_CLIENT_MASK:
+ ret = copy_from_user(&mask2_info,
+ (void __user *)arg,
+ sizeof(struct dev_mask2_info_s));
+
+ if (ret != 0)
+ return -EIO;
+
+ client_debug_mask.mask1 = mask2_info.mask1_value;
+ client_debug_mask.mask2 = mask2_info.mask2_value;
+
+ pr_info("%s: client debug mask has been been received "
+ ":%llx: :%llx:\n",
+ __func__,
+ (unsigned long long)client_debug_mask.mask1,
+ (unsigned long long)client_debug_mask.mask2);
+
+ return ret;
+
+ case ORANGEFS_DEV_CLIENT_STRING:
+ ret = copy_from_user(&client_debug_array_string,
+ (void __user *)arg,
+ ORANGEFS_MAX_DEBUG_STRING_LEN);
+ /*
+ * The real client-core makes an effort to ensure
+ * that actual strings that aren't too long to fit in
+ * this buffer is what we get here. We're going to use
+ * string functions on the stuff we got, so we'll make
+ * this extra effort to try and keep from
+ * flowing out of this buffer when we use the string
+ * functions, even if somehow the stuff we end up
+ * with here is garbage.
+ */
+ client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN - 1] =
+ '\0';
+
+ if (ret != 0) {
+ pr_info("%s: CLIENT_STRING: copy_from_user failed\n",
+ __func__);
+ return -EIO;
+ }
+
+ pr_info("%s: client debug array string has been received.\n",
+ __func__);
+
+ if (!help_string_initialized) {
+
+ /* Free the "we don't know yet" default string... */
+ kfree(debug_help_string);
+
+ /* build a proper debug help string */
+ if (orangefs_prepare_debugfs_help_string(0)) {
+ gossip_err("%s: no debug help string \n",
+ __func__);
+ return -EIO;
+ }
+
+ /* Replace the boilerplate boot-time debug-help file. */
+ debugfs_remove(help_file_dentry);
+
+ help_file_dentry =
+ debugfs_create_file(
+ ORANGEFS_KMOD_DEBUG_HELP_FILE,
+ 0444,
+ debug_dir,
+ debug_help_string,
+ &debug_help_fops);
+
+ if (!help_file_dentry) {
+ gossip_err("%s: debugfs_create_file failed for"
+ " :%s:!\n",
+ __func__,
+ ORANGEFS_KMOD_DEBUG_HELP_FILE);
+ return -EIO;
+ }
+ }
+
+ debug_mask_to_string(&client_debug_mask, 1);
+
+ debugfs_remove(client_debug_dentry);
+
+ orangefs_client_debug_init();
+
+ help_string_initialized++;
+
+ return ret;
+
+ case ORANGEFS_DEV_DEBUG:
+ ret = copy_from_user(&mask_info,
+ (void __user *)arg,
+ sizeof(mask_info));
+
+ if (ret != 0)
+ return -EIO;
+
+ if (mask_info.mask_type == KERNEL_MASK) {
+ if ((mask_info.mask_value == 0)
+ && (kernel_mask_set_mod_init)) {
+ /*
+ * the kernel debug mask was set when the
+ * kernel module was loaded; don't override
+ * it if the client-core was started without
+ * a value for ORANGEFS_KMODMASK.
+ */
+ return 0;
+ }
+ debug_mask_to_string(&mask_info.mask_value,
+ mask_info.mask_type);
+ gossip_debug_mask = mask_info.mask_value;
+ pr_info("%s: kernel debug mask has been modified to "
+ ":%s: :%llx:\n",
+ __func__,
+ kernel_debug_string,
+ (unsigned long long)gossip_debug_mask);
+ } else if (mask_info.mask_type == CLIENT_MASK) {
+ debug_mask_to_string(&mask_info.mask_value,
+ mask_info.mask_type);
+ pr_info("%s: client debug mask has been modified to"
+ ":%s: :%llx:\n",
+ __func__,
+ client_debug_string,
+ llu(mask_info.mask_value));
+ } else {
+ gossip_lerr("Invalid mask type....\n");
+ return -EINVAL;
+ }
+
+ return ret;
+
+ default:
+ return -ENOIOCTLCMD;
+ }
+ return -ENOIOCTLCMD;
+}
+
+static long orangefs_devreq_ioctl(struct file *file,
+ unsigned int command, unsigned long arg)
+{
+ long ret;
+
+ /* Check for properly constructed commands */
+ ret = check_ioctl_command(command);
+ if (ret < 0)
+ return (int)ret;
+
+ return (int)dispatch_ioctl_command(command, arg);
+}
+
+#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
+
+/* Compat structure for the ORANGEFS_DEV_MAP ioctl */
+struct ORANGEFS_dev_map_desc32 {
+ compat_uptr_t ptr;
+ __s32 total_size;
+ __s32 size;
+ __s32 count;
+};
+
+static unsigned long translate_dev_map26(unsigned long args, long *error)
+{
+ struct ORANGEFS_dev_map_desc32 __user *p32 = (void __user *)args;
+ /*
+ * Depending on the architecture, allocate some space on the
+ * user-call-stack based on our expected layout.
+ */
+ struct ORANGEFS_dev_map_desc __user *p =
+ compat_alloc_user_space(sizeof(*p));
+ compat_uptr_t addr;
+
+ *error = 0;
+ /* get the ptr from the 32 bit user-space */
+ if (get_user(addr, &p32->ptr))
+ goto err;
+ /* try to put that into a 64-bit layout */
+ if (put_user(compat_ptr(addr), &p->ptr))
+ goto err;
+ /* copy the remaining fields */
+ if (copy_in_user(&p->total_size, &p32->total_size, sizeof(__s32)))
+ goto err;
+ if (copy_in_user(&p->size, &p32->size, sizeof(__s32)))
+ goto err;
+ if (copy_in_user(&p->count, &p32->count, sizeof(__s32)))
+ goto err;
+ return (unsigned long)p;
+err:
+ *error = -EFAULT;
+ return 0;
+}
+
+/*
+ * 32 bit user-space apps' ioctl handlers when kernel modules
+ * is compiled as a 64 bit one
+ */
+static long orangefs_devreq_compat_ioctl(struct file *filp, unsigned int cmd,
+ unsigned long args)
+{
+ long ret;
+ unsigned long arg = args;
+
+ /* Check for properly constructed commands */
+ ret = check_ioctl_command(cmd);
+ if (ret < 0)
+ return ret;
+ if (cmd == ORANGEFS_DEV_MAP) {
+ /*
+ * convert the arguments to what we expect internally
+ * in kernel space
+ */
+ arg = translate_dev_map26(args, &ret);
+ if (ret < 0) {
+ gossip_err("Could not translate dev map\n");
+ return ret;
+ }
+ }
+ /* no other ioctl requires translation */
+ return dispatch_ioctl_command(cmd, arg);
+}
+
+#endif /* CONFIG_COMPAT is in .config */
+
+/* the assigned character device major number */
+static int orangefs_dev_major;
+
+/*
+ * Initialize orangefs device specific state:
+ * Must be called at module load time only
+ */
+int orangefs_dev_init(void)
+{
+ /* register orangefs-req device */
+ orangefs_dev_major = register_chrdev(0,
+ ORANGEFS_REQDEVICE_NAME,
+ &orangefs_devreq_file_operations);
+ if (orangefs_dev_major < 0) {
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "Failed to register /dev/%s (error %d)\n",
+ ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
+ return orangefs_dev_major;
+ }
+
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "*** /dev/%s character device registered ***\n",
+ ORANGEFS_REQDEVICE_NAME);
+ gossip_debug(GOSSIP_DEV_DEBUG, "'mknod /dev/%s c %d 0'.\n",
+ ORANGEFS_REQDEVICE_NAME, orangefs_dev_major);
+ return 0;
+}
+
+void orangefs_dev_cleanup(void)
+{
+ unregister_chrdev(orangefs_dev_major, ORANGEFS_REQDEVICE_NAME);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "*** /dev/%s character device unregistered ***\n",
+ ORANGEFS_REQDEVICE_NAME);
+}
+
+static unsigned int orangefs_devreq_poll(struct file *file,
+ struct poll_table_struct *poll_table)
+{
+ int poll_revent_mask = 0;
+
+ poll_wait(file, &orangefs_request_list_waitq, poll_table);
+
+ if (!list_empty(&orangefs_request_list))
+ poll_revent_mask |= POLL_IN;
+ return poll_revent_mask;
+}
+
+const struct file_operations orangefs_devreq_file_operations = {
+ .owner = THIS_MODULE,
+ .read = orangefs_devreq_read,
+ .write_iter = orangefs_devreq_write_iter,
+ .open = orangefs_devreq_open,
+ .release = orangefs_devreq_release,
+ .unlocked_ioctl = orangefs_devreq_ioctl,
+
+#ifdef CONFIG_COMPAT /* CONFIG_COMPAT is in .config */
+ .compat_ioctl = orangefs_devreq_compat_ioctl,
+#endif
+ .poll = orangefs_devreq_poll
+};
diff --git a/fs/orangefs/dir.c b/fs/orangefs/dir.c
new file mode 100644
index 000000000..324f0af40
--- /dev/null
+++ b/fs/orangefs/dir.c
@@ -0,0 +1,396 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+/*
+ * decode routine used by kmod to deal with the blob sent from
+ * userspace for readdirs. The blob contains zero or more of these
+ * sub-blobs:
+ * __u32 - represents length of the character string that follows.
+ * string - between 1 and ORANGEFS_NAME_MAX bytes long.
+ * padding - (if needed) to cause the __u32 plus the string to be
+ * eight byte aligned.
+ * khandle - sizeof(khandle) bytes.
+ */
+static long decode_dirents(char *ptr, size_t size,
+ struct orangefs_readdir_response_s *readdir)
+{
+ int i;
+ struct orangefs_readdir_response_s *rd =
+ (struct orangefs_readdir_response_s *) ptr;
+ char *buf = ptr;
+ int khandle_size = sizeof(struct orangefs_khandle);
+ size_t offset = offsetof(struct orangefs_readdir_response_s,
+ dirent_array);
+ /* 8 reflects eight byte alignment */
+ int smallest_blob = khandle_size + 8;
+ __u32 len;
+ int aligned_len;
+ int sizeof_u32 = sizeof(__u32);
+ long ret;
+
+ gossip_debug(GOSSIP_DIR_DEBUG, "%s: size:%zu:\n", __func__, size);
+
+ /* size is = offset on empty dirs, > offset on non-empty dirs... */
+ if (size < offset) {
+ gossip_err("%s: size:%zu: offset:%zu:\n",
+ __func__,
+ size,
+ offset);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ if ((size == offset) && (readdir->orangefs_dirent_outcount != 0)) {
+ gossip_err("%s: size:%zu: dirent_outcount:%d:\n",
+ __func__,
+ size,
+ readdir->orangefs_dirent_outcount);
+ ret = -EINVAL;
+ goto out;
+ }
+
+ readdir->token = rd->token;
+ readdir->orangefs_dirent_outcount = rd->orangefs_dirent_outcount;
+ readdir->dirent_array = kcalloc(readdir->orangefs_dirent_outcount,
+ sizeof(*readdir->dirent_array),
+ GFP_KERNEL);
+ if (readdir->dirent_array == NULL) {
+ gossip_err("%s: kcalloc failed.\n", __func__);
+ ret = -ENOMEM;
+ goto out;
+ }
+
+ buf += offset;
+ size -= offset;
+
+ for (i = 0; i < readdir->orangefs_dirent_outcount; i++) {
+ if (size < smallest_blob) {
+ gossip_err("%s: size:%zu: smallest_blob:%d:\n",
+ __func__,
+ size,
+ smallest_blob);
+ ret = -EINVAL;
+ goto free;
+ }
+
+ len = *(__u32 *)buf;
+ if ((len < 1) || (len > ORANGEFS_NAME_MAX)) {
+ gossip_err("%s: len:%d:\n", __func__, len);
+ ret = -EINVAL;
+ goto free;
+ }
+
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "%s: size:%zu: len:%d:\n",
+ __func__,
+ size,
+ len);
+
+ readdir->dirent_array[i].d_name = buf + sizeof_u32;
+ readdir->dirent_array[i].d_length = len;
+
+ /*
+ * Calculate "aligned" length of this string and its
+ * associated __u32 descriptor.
+ */
+ aligned_len = ((sizeof_u32 + len + 1) + 7) & ~7;
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "%s: aligned_len:%d:\n",
+ __func__,
+ aligned_len);
+
+ /*
+ * The end of the blob should coincide with the end
+ * of the last sub-blob.
+ */
+ if (size < aligned_len + khandle_size) {
+ gossip_err("%s: ran off the end of the blob.\n",
+ __func__);
+ ret = -EINVAL;
+ goto free;
+ }
+ size -= aligned_len + khandle_size;
+
+ buf += aligned_len;
+
+ readdir->dirent_array[i].khandle =
+ *(struct orangefs_khandle *) buf;
+ buf += khandle_size;
+ }
+ ret = buf - ptr;
+ gossip_debug(GOSSIP_DIR_DEBUG, "%s: returning:%ld:\n", __func__, ret);
+ goto out;
+
+free:
+ kfree(readdir->dirent_array);
+ readdir->dirent_array = NULL;
+
+out:
+ return ret;
+}
+
+/*
+ * Read directory entries from an instance of an open directory.
+ */
+static int orangefs_readdir(struct file *file, struct dir_context *ctx)
+{
+ int ret = 0;
+ int buffer_index;
+ /*
+ * ptoken supports Orangefs' distributed directory logic, added
+ * in 2.9.2.
+ */
+ __u64 *ptoken = file->private_data;
+ __u64 pos = 0;
+ ino_t ino = 0;
+ struct dentry *dentry = file->f_path.dentry;
+ struct orangefs_kernel_op_s *new_op = NULL;
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(dentry->d_inode);
+ struct orangefs_readdir_response_s readdir_response;
+ void *dents_buf;
+ int i = 0;
+ int len = 0;
+ ino_t current_ino = 0;
+ char *current_entry = NULL;
+ long bytes_decoded;
+
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "%s: ctx->pos:%lld, ptoken = %llu\n",
+ __func__,
+ lld(ctx->pos),
+ llu(*ptoken));
+
+ pos = (__u64) ctx->pos;
+
+ /* are we done? */
+ if (pos == ORANGEFS_READDIR_END) {
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "Skipping to termination path\n");
+ return 0;
+ }
+
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "orangefs_readdir called on %s (pos=%llu)\n",
+ dentry->d_name.name, llu(pos));
+
+ memset(&readdir_response, 0, sizeof(readdir_response));
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_READDIR);
+ if (!new_op)
+ return -ENOMEM;
+
+ /*
+ * Only the indices are shared. No memory is actually shared, but the
+ * mechanism is used.
+ */
+ new_op->uses_shared_memory = 1;
+ new_op->upcall.req.readdir.refn = orangefs_inode->refn;
+ new_op->upcall.req.readdir.max_dirent_count =
+ ORANGEFS_MAX_DIRENT_COUNT_READDIR;
+
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "%s: upcall.req.readdir.refn.khandle: %pU\n",
+ __func__,
+ &new_op->upcall.req.readdir.refn.khandle);
+
+ new_op->upcall.req.readdir.token = *ptoken;
+
+get_new_buffer_index:
+ buffer_index = orangefs_readdir_index_get();
+ if (buffer_index < 0) {
+ ret = buffer_index;
+ gossip_lerr("orangefs_readdir: orangefs_readdir_index_get() failure (%d)\n",
+ ret);
+ goto out_free_op;
+ }
+ new_op->upcall.req.readdir.buf_index = buffer_index;
+
+ ret = service_operation(new_op,
+ "orangefs_readdir",
+ get_interruptible_flag(dentry->d_inode));
+
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "Readdir downcall status is %d. ret:%d\n",
+ new_op->downcall.status,
+ ret);
+
+ orangefs_readdir_index_put(buffer_index);
+
+ if (ret == -EAGAIN && op_state_purged(new_op)) {
+ /* Client-core indices are invalid after it restarted. */
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "%s: Getting new buffer_index for retry of readdir..\n",
+ __func__);
+ goto get_new_buffer_index;
+ }
+
+ if (ret == -EIO && op_state_purged(new_op)) {
+ gossip_err("%s: Client is down. Aborting readdir call.\n",
+ __func__);
+ goto out_free_op;
+ }
+
+ if (ret < 0 || new_op->downcall.status != 0) {
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "Readdir request failed. Status:%d\n",
+ new_op->downcall.status);
+ if (ret >= 0)
+ ret = new_op->downcall.status;
+ goto out_free_op;
+ }
+
+ dents_buf = new_op->downcall.trailer_buf;
+ if (dents_buf == NULL) {
+ gossip_err("Invalid NULL buffer in readdir response\n");
+ ret = -ENOMEM;
+ goto out_free_op;
+ }
+
+ bytes_decoded = decode_dirents(dents_buf, new_op->downcall.trailer_size,
+ &readdir_response);
+ if (bytes_decoded < 0) {
+ ret = bytes_decoded;
+ gossip_err("Could not decode readdir from buffer %d\n", ret);
+ goto out_vfree;
+ }
+
+ if (bytes_decoded != new_op->downcall.trailer_size) {
+ gossip_err("orangefs_readdir: # bytes decoded (%ld) "
+ "!= trailer size (%ld)\n",
+ bytes_decoded,
+ (long)new_op->downcall.trailer_size);
+ ret = -EINVAL;
+ goto out_destroy_handle;
+ }
+
+ /*
+ * orangefs doesn't actually store dot and dot-dot, but
+ * we need to have them represented.
+ */
+ if (pos == 0) {
+ ino = get_ino_from_khandle(dentry->d_inode);
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "%s: calling dir_emit of \".\" with pos = %llu\n",
+ __func__,
+ llu(pos));
+ ret = dir_emit(ctx, ".", 1, ino, DT_DIR);
+ pos += 1;
+ }
+
+ if (pos == 1) {
+ ino = get_parent_ino_from_dentry(dentry);
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "%s: calling dir_emit of \"..\" with pos = %llu\n",
+ __func__,
+ llu(pos));
+ ret = dir_emit(ctx, "..", 2, ino, DT_DIR);
+ pos += 1;
+ }
+
+ /*
+ * we stored ORANGEFS_ITERATE_NEXT in ctx->pos last time around
+ * to prevent "finding" dot and dot-dot on any iteration
+ * other than the first.
+ */
+ if (ctx->pos == ORANGEFS_ITERATE_NEXT)
+ ctx->pos = 0;
+
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "%s: dirent_outcount:%d:\n",
+ __func__,
+ readdir_response.orangefs_dirent_outcount);
+ for (i = ctx->pos;
+ i < readdir_response.orangefs_dirent_outcount;
+ i++) {
+ len = readdir_response.dirent_array[i].d_length;
+ current_entry = readdir_response.dirent_array[i].d_name;
+ current_ino = orangefs_khandle_to_ino(
+ &readdir_response.dirent_array[i].khandle);
+
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "calling dir_emit for %s with len %d"
+ ", ctx->pos %ld\n",
+ current_entry,
+ len,
+ (unsigned long)ctx->pos);
+ /*
+ * type is unknown. We don't return object type
+ * in the dirent_array. This leaves getdents
+ * clueless about type.
+ */
+ ret =
+ dir_emit(ctx, current_entry, len, current_ino, DT_UNKNOWN);
+ if (!ret)
+ break;
+ ctx->pos++;
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "%s: ctx->pos:%lld\n",
+ __func__,
+ lld(ctx->pos));
+
+ }
+
+ /*
+ * we ran all the way through the last batch, set up for
+ * getting another batch...
+ */
+ if (ret) {
+ *ptoken = readdir_response.token;
+ ctx->pos = ORANGEFS_ITERATE_NEXT;
+ }
+
+ /*
+ * Did we hit the end of the directory?
+ */
+ if (readdir_response.token == ORANGEFS_READDIR_END) {
+ gossip_debug(GOSSIP_DIR_DEBUG,
+ "End of dir detected; setting ctx->pos to ORANGEFS_READDIR_END.\n");
+ ctx->pos = ORANGEFS_READDIR_END;
+ }
+
+out_destroy_handle:
+ /* kfree(NULL) is safe */
+ kfree(readdir_response.dirent_array);
+out_vfree:
+ gossip_debug(GOSSIP_DIR_DEBUG, "vfree %p\n", dents_buf);
+ vfree(dents_buf);
+out_free_op:
+ op_release(new_op);
+ gossip_debug(GOSSIP_DIR_DEBUG, "orangefs_readdir returning %d\n", ret);
+ return ret;
+}
+
+static int orangefs_dir_open(struct inode *inode, struct file *file)
+{
+ __u64 *ptoken;
+
+ file->private_data = kmalloc(sizeof(__u64), GFP_KERNEL);
+ if (!file->private_data)
+ return -ENOMEM;
+
+ ptoken = file->private_data;
+ *ptoken = ORANGEFS_READDIR_START;
+ return 0;
+}
+
+static int orangefs_dir_release(struct inode *inode, struct file *file)
+{
+ orangefs_flush_inode(inode);
+ kfree(file->private_data);
+ return 0;
+}
+
+/** ORANGEFS implementation of VFS directory operations */
+const struct file_operations orangefs_dir_operations = {
+ .read = generic_read_dir,
+ .iterate = orangefs_readdir,
+ .open = orangefs_dir_open,
+ .release = orangefs_dir_release,
+};
diff --git a/fs/orangefs/downcall.h b/fs/orangefs/downcall.h
new file mode 100644
index 000000000..66b99210f
--- /dev/null
+++ b/fs/orangefs/downcall.h
@@ -0,0 +1,133 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * Definitions of downcalls used in Linux kernel module.
+ */
+
+#ifndef __DOWNCALL_H
+#define __DOWNCALL_H
+
+/*
+ * Sanitized the device-client core interaction
+ * for clean 32-64 bit usage
+ */
+struct orangefs_io_response {
+ __s64 amt_complete;
+};
+
+struct orangefs_lookup_response {
+ struct orangefs_object_kref refn;
+};
+
+struct orangefs_create_response {
+ struct orangefs_object_kref refn;
+};
+
+struct orangefs_symlink_response {
+ struct orangefs_object_kref refn;
+};
+
+struct orangefs_getattr_response {
+ struct ORANGEFS_sys_attr_s attributes;
+ char link_target[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_mkdir_response {
+ struct orangefs_object_kref refn;
+};
+
+/*
+ * duplication of some system interface structures so that I don't have
+ * to allocate extra memory
+ */
+struct orangefs_dirent {
+ char *d_name;
+ int d_length;
+ struct orangefs_khandle khandle;
+};
+
+struct orangefs_statfs_response {
+ __s64 block_size;
+ __s64 blocks_total;
+ __s64 blocks_avail;
+ __s64 files_total;
+ __s64 files_avail;
+};
+
+struct orangefs_fs_mount_response {
+ __s32 fs_id;
+ __s32 id;
+ struct orangefs_khandle root_khandle;
+};
+
+/* the getxattr response is the attribute value */
+struct orangefs_getxattr_response {
+ __s32 val_sz;
+ __s32 __pad1;
+ char val[ORANGEFS_MAX_XATTR_VALUELEN];
+};
+
+/* the listxattr response is an array of attribute names */
+struct orangefs_listxattr_response {
+ __s32 returned_count;
+ __s32 __pad1;
+ __u64 token;
+ char key[ORANGEFS_MAX_XATTR_LISTLEN * ORANGEFS_MAX_XATTR_NAMELEN];
+ __s32 keylen;
+ __s32 __pad2;
+ __s32 lengths[ORANGEFS_MAX_XATTR_LISTLEN];
+};
+
+struct orangefs_param_response {
+ __s64 value;
+};
+
+#define PERF_COUNT_BUF_SIZE 4096
+struct orangefs_perf_count_response {
+ char buffer[PERF_COUNT_BUF_SIZE];
+};
+
+#define FS_KEY_BUF_SIZE 4096
+struct orangefs_fs_key_response {
+ __s32 fs_keylen;
+ __s32 __pad1;
+ char fs_key[FS_KEY_BUF_SIZE];
+};
+
+struct orangefs_downcall_s {
+ __s32 type;
+ __s32 status;
+ /* currently trailer is used only by readdir */
+ __s64 trailer_size;
+ char *trailer_buf;
+
+ union {
+ struct orangefs_io_response io;
+ struct orangefs_lookup_response lookup;
+ struct orangefs_create_response create;
+ struct orangefs_symlink_response sym;
+ struct orangefs_getattr_response getattr;
+ struct orangefs_mkdir_response mkdir;
+ struct orangefs_statfs_response statfs;
+ struct orangefs_fs_mount_response fs_mount;
+ struct orangefs_getxattr_response getxattr;
+ struct orangefs_listxattr_response listxattr;
+ struct orangefs_param_response param;
+ struct orangefs_perf_count_response perf_count;
+ struct orangefs_fs_key_response fs_key;
+ } resp;
+};
+
+struct orangefs_readdir_response_s {
+ __u64 token;
+ __u64 directory_version;
+ __u32 __pad2;
+ __u32 orangefs_dirent_outcount;
+ struct orangefs_dirent *dirent_array;
+};
+
+#endif /* __DOWNCALL_H */
diff --git a/fs/orangefs/file.c b/fs/orangefs/file.c
new file mode 100644
index 000000000..ae92795ed
--- /dev/null
+++ b/fs/orangefs/file.c
@@ -0,0 +1,717 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * Linux VFS file operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+#include <linux/fs.h>
+#include <linux/pagemap.h>
+
+/*
+ * Copy to client-core's address space from the buffers specified
+ * by the iovec upto total_size bytes.
+ * NOTE: the iovector can either contain addresses which
+ * can futher be kernel-space or user-space addresses.
+ * or it can pointers to struct page's
+ */
+static int precopy_buffers(int buffer_index,
+ struct iov_iter *iter,
+ size_t total_size)
+{
+ int ret = 0;
+ /*
+ * copy data from application/kernel by pulling it out
+ * of the iovec.
+ */
+
+
+ if (total_size) {
+ ret = orangefs_bufmap_copy_from_iovec(iter,
+ buffer_index,
+ total_size);
+ if (ret < 0)
+ gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
+ __func__,
+ (long)ret);
+ }
+
+ if (ret < 0)
+ gossip_err("%s: Failed to copy-in buffers. Please make sure that the pvfs2-client is running. %ld\n",
+ __func__,
+ (long)ret);
+ return ret;
+}
+
+/*
+ * Copy from client-core's address space to the buffers specified
+ * by the iovec upto total_size bytes.
+ * NOTE: the iovector can either contain addresses which
+ * can futher be kernel-space or user-space addresses.
+ * or it can pointers to struct page's
+ */
+static int postcopy_buffers(int buffer_index,
+ struct iov_iter *iter,
+ size_t total_size)
+{
+ int ret = 0;
+ /*
+ * copy data to application/kernel by pushing it out to
+ * the iovec. NOTE; target buffers can be addresses or
+ * struct page pointers.
+ */
+ if (total_size) {
+ ret = orangefs_bufmap_copy_to_iovec(iter,
+ buffer_index,
+ total_size);
+ if (ret < 0)
+ gossip_err("%s: Failed to copy-out buffers. Please make sure that the pvfs2-client is running (%ld)\n",
+ __func__,
+ (long)ret);
+ }
+ return ret;
+}
+
+/*
+ * Post and wait for the I/O upcall to finish
+ */
+static ssize_t wait_for_direct_io(enum ORANGEFS_io_type type, struct inode *inode,
+ loff_t *offset, struct iov_iter *iter,
+ size_t total_size, loff_t readahead_size)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
+ struct orangefs_kernel_op_s *new_op = NULL;
+ struct iov_iter saved = *iter;
+ int buffer_index = -1;
+ ssize_t ret;
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_FILE_IO);
+ if (!new_op)
+ return -ENOMEM;
+
+ /* synchronous I/O */
+ new_op->upcall.req.io.readahead_size = readahead_size;
+ new_op->upcall.req.io.io_type = type;
+ new_op->upcall.req.io.refn = orangefs_inode->refn;
+
+populate_shared_memory:
+ /* get a shared buffer index */
+ buffer_index = orangefs_bufmap_get();
+ if (buffer_index < 0) {
+ ret = buffer_index;
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s: orangefs_bufmap_get failure (%zd)\n",
+ __func__, ret);
+ goto out;
+ }
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): GET op %p -> buffer_index %d\n",
+ __func__,
+ handle,
+ new_op,
+ buffer_index);
+
+ new_op->uses_shared_memory = 1;
+ new_op->upcall.req.io.buf_index = buffer_index;
+ new_op->upcall.req.io.count = total_size;
+ new_op->upcall.req.io.offset = *offset;
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): offset: %llu total_size: %zd\n",
+ __func__,
+ handle,
+ llu(*offset),
+ total_size);
+ /*
+ * Stage 1: copy the buffers into client-core's address space
+ * precopy_buffers only pertains to writes.
+ */
+ if (type == ORANGEFS_IO_WRITE) {
+ ret = precopy_buffers(buffer_index,
+ iter,
+ total_size);
+ if (ret < 0)
+ goto out;
+ }
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): Calling post_io_request with tag (%llu)\n",
+ __func__,
+ handle,
+ llu(new_op->tag));
+
+ /* Stage 2: Service the I/O operation */
+ ret = service_operation(new_op,
+ type == ORANGEFS_IO_WRITE ?
+ "file_write" :
+ "file_read",
+ get_interruptible_flag(inode));
+
+ /*
+ * If service_operation() returns -EAGAIN #and# the operation was
+ * purged from orangefs_request_list or htable_ops_in_progress, then
+ * we know that the client was restarted, causing the shared memory
+ * area to be wiped clean. To restart a write operation in this
+ * case, we must re-copy the data from the user's iovec to a NEW
+ * shared memory location. To restart a read operation, we must get
+ * a new shared memory location.
+ */
+ if (ret == -EAGAIN && op_state_purged(new_op)) {
+ orangefs_bufmap_put(buffer_index);
+ buffer_index = -1;
+ if (type == ORANGEFS_IO_WRITE)
+ *iter = saved;
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s:going to repopulate_shared_memory.\n",
+ __func__);
+ goto populate_shared_memory;
+ }
+
+ if (ret < 0) {
+ if (ret == -EINTR) {
+ /*
+ * We can't return EINTR if any data was written,
+ * it's not POSIX. It is minimally acceptable
+ * to give a partial write, the way NFS does.
+ *
+ * It would be optimal to return all or nothing,
+ * but if a userspace write is bigger than
+ * an IO buffer, and the interrupt occurs
+ * between buffer writes, that would not be
+ * possible.
+ */
+ switch (new_op->op_state - OP_VFS_STATE_GIVEN_UP) {
+ /*
+ * If the op was waiting when the interrupt
+ * occurred, then the client-core did not
+ * trigger the write.
+ */
+ case OP_VFS_STATE_WAITING:
+ if (*offset == 0)
+ ret = -EINTR;
+ else
+ ret = 0;
+ break;
+ /*
+ * If the op was in progress when the interrupt
+ * occurred, then the client-core was able to
+ * trigger the write.
+ */
+ case OP_VFS_STATE_INPROGR:
+ ret = total_size;
+ break;
+ default:
+ gossip_err("%s: unexpected op state :%d:.\n",
+ __func__,
+ new_op->op_state);
+ ret = 0;
+ break;
+ }
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s: got EINTR, state:%d: %p\n",
+ __func__,
+ new_op->op_state,
+ new_op);
+ } else {
+ gossip_err("%s: error in %s handle %pU, returning %zd\n",
+ __func__,
+ type == ORANGEFS_IO_READ ?
+ "read from" : "write to",
+ handle, ret);
+ }
+ if (orangefs_cancel_op_in_progress(new_op))
+ return ret;
+
+ goto out;
+ }
+
+ /*
+ * Stage 3: Post copy buffers from client-core's address space
+ * postcopy_buffers only pertains to reads.
+ */
+ if (type == ORANGEFS_IO_READ) {
+ ret = postcopy_buffers(buffer_index,
+ iter,
+ new_op->downcall.resp.io.amt_complete);
+ if (ret < 0)
+ goto out;
+ }
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): Amount %s, returned by the sys-io call:%d\n",
+ __func__,
+ handle,
+ type == ORANGEFS_IO_READ ? "read" : "written",
+ (int)new_op->downcall.resp.io.amt_complete);
+
+ ret = new_op->downcall.resp.io.amt_complete;
+
+out:
+ if (buffer_index >= 0) {
+ orangefs_bufmap_put(buffer_index);
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): PUT buffer_index %d\n",
+ __func__, handle, buffer_index);
+ buffer_index = -1;
+ }
+ op_release(new_op);
+ return ret;
+}
+
+/*
+ * Common entry point for read/write/readv/writev
+ * This function will dispatch it to either the direct I/O
+ * or buffered I/O path depending on the mount options and/or
+ * augmented/extended metadata attached to the file.
+ * Note: File extended attributes override any mount options.
+ */
+static ssize_t do_readv_writev(enum ORANGEFS_io_type type, struct file *file,
+ loff_t *offset, struct iov_iter *iter)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_khandle *handle = &orangefs_inode->refn.khandle;
+ size_t count = iov_iter_count(iter);
+ ssize_t total_count = 0;
+ ssize_t ret = -EINVAL;
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s-BEGIN(%pU): count(%d) after estimate_max_iovecs.\n",
+ __func__,
+ handle,
+ (int)count);
+
+ if (type == ORANGEFS_IO_WRITE) {
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): proceeding with offset : %llu, "
+ "size %d\n",
+ __func__,
+ handle,
+ llu(*offset),
+ (int)count);
+ }
+
+ if (count == 0) {
+ ret = 0;
+ goto out;
+ }
+
+ while (iov_iter_count(iter)) {
+ size_t each_count = iov_iter_count(iter);
+ size_t amt_complete;
+
+ /* how much to transfer in this loop iteration */
+ if (each_count > orangefs_bufmap_size_query())
+ each_count = orangefs_bufmap_size_query();
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): size of each_count(%d)\n",
+ __func__,
+ handle,
+ (int)each_count);
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): BEFORE wait_for_io: offset is %d\n",
+ __func__,
+ handle,
+ (int)*offset);
+
+ ret = wait_for_direct_io(type, inode, offset, iter,
+ each_count, 0);
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): return from wait_for_io:%d\n",
+ __func__,
+ handle,
+ (int)ret);
+
+ if (ret < 0)
+ goto out;
+
+ *offset += ret;
+ total_count += ret;
+ amt_complete = ret;
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): AFTER wait_for_io: offset is %d\n",
+ __func__,
+ handle,
+ (int)*offset);
+
+ /*
+ * if we got a short I/O operations,
+ * fall out and return what we got so far
+ */
+ if (amt_complete < each_count)
+ break;
+ } /*end while */
+
+out:
+ if (total_count > 0)
+ ret = total_count;
+ if (ret > 0) {
+ if (type == ORANGEFS_IO_READ) {
+ file_accessed(file);
+ } else {
+ SetMtimeFlag(orangefs_inode);
+ inode->i_mtime = CURRENT_TIME;
+ mark_inode_dirty_sync(inode);
+ }
+ }
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): Value(%d) returned.\n",
+ __func__,
+ handle,
+ (int)ret);
+
+ return ret;
+}
+
+/*
+ * Read data from a specified offset in a file (referenced by inode).
+ * Data may be placed either in a user or kernel buffer.
+ */
+ssize_t orangefs_inode_read(struct inode *inode,
+ struct iov_iter *iter,
+ loff_t *offset,
+ loff_t readahead_size)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ size_t count = iov_iter_count(iter);
+ size_t bufmap_size;
+ ssize_t ret = -EINVAL;
+
+ g_orangefs_stats.reads++;
+
+ bufmap_size = orangefs_bufmap_size_query();
+ if (count > bufmap_size) {
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s: count is too large (%zd/%zd)!\n",
+ __func__, count, bufmap_size);
+ return -EINVAL;
+ }
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU) %zd@%llu\n",
+ __func__,
+ &orangefs_inode->refn.khandle,
+ count,
+ llu(*offset));
+
+ ret = wait_for_direct_io(ORANGEFS_IO_READ, inode, offset, iter,
+ count, readahead_size);
+ if (ret > 0)
+ *offset += ret;
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s(%pU): Value(%zd) returned.\n",
+ __func__,
+ &orangefs_inode->refn.khandle,
+ ret);
+
+ return ret;
+}
+
+static ssize_t orangefs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ loff_t pos = *(&iocb->ki_pos);
+ ssize_t rc = 0;
+
+ BUG_ON(iocb->private);
+
+ gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_read_iter\n");
+
+ g_orangefs_stats.reads++;
+
+ rc = do_readv_writev(ORANGEFS_IO_READ, file, &pos, iter);
+ iocb->ki_pos = pos;
+
+ return rc;
+}
+
+static ssize_t orangefs_file_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct file *file = iocb->ki_filp;
+ loff_t pos;
+ ssize_t rc;
+
+ BUG_ON(iocb->private);
+
+ gossip_debug(GOSSIP_FILE_DEBUG, "orangefs_file_write_iter\n");
+
+ mutex_lock(&file->f_mapping->host->i_mutex);
+
+ /* Make sure generic_write_checks sees an up to date inode size. */
+ if (file->f_flags & O_APPEND) {
+ rc = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
+ if (rc == -ESTALE)
+ rc = -EIO;
+ if (rc) {
+ gossip_err("%s: orangefs_inode_getattr failed, "
+ "rc:%zd:.\n", __func__, rc);
+ goto out;
+ }
+ }
+
+ if (file->f_pos > i_size_read(file->f_mapping->host))
+ orangefs_i_size_write(file->f_mapping->host, file->f_pos);
+
+ rc = generic_write_checks(iocb, iter);
+
+ if (rc <= 0) {
+ gossip_err("%s: generic_write_checks failed, rc:%zd:.\n",
+ __func__, rc);
+ goto out;
+ }
+
+ /*
+ * if we are appending, generic_write_checks would have updated
+ * pos to the end of the file, so we will wait till now to set
+ * pos...
+ */
+ pos = *(&iocb->ki_pos);
+
+ rc = do_readv_writev(ORANGEFS_IO_WRITE,
+ file,
+ &pos,
+ iter);
+ if (rc < 0) {
+ gossip_err("%s: do_readv_writev failed, rc:%zd:.\n",
+ __func__, rc);
+ goto out;
+ }
+
+ iocb->ki_pos = pos;
+ g_orangefs_stats.writes++;
+
+out:
+
+ mutex_unlock(&file->f_mapping->host->i_mutex);
+ return rc;
+}
+
+/*
+ * Perform a miscellaneous operation on a file.
+ */
+static long orangefs_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
+{
+ int ret = -ENOTTY;
+ __u64 val = 0;
+ unsigned long uval;
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "orangefs_ioctl: called with cmd %d\n",
+ cmd);
+
+ /*
+ * we understand some general ioctls on files, such as the immutable
+ * and append flags
+ */
+ if (cmd == FS_IOC_GETFLAGS) {
+ val = 0;
+ ret = orangefs_inode_getxattr(file_inode(file),
+ ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+ "user.pvfs2.meta_hint",
+ &val, sizeof(val));
+ if (ret < 0 && ret != -ENODATA)
+ return ret;
+ else if (ret == -ENODATA)
+ val = 0;
+ uval = val;
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "orangefs_ioctl: FS_IOC_GETFLAGS: %llu\n",
+ (unsigned long long)uval);
+ return put_user(uval, (int __user *)arg);
+ } else if (cmd == FS_IOC_SETFLAGS) {
+ ret = 0;
+ if (get_user(uval, (int __user *)arg))
+ return -EFAULT;
+ /*
+ * ORANGEFS_MIRROR_FL is set internally when the mirroring mode
+ * is turned on for a file. The user is not allowed to turn
+ * on this bit, but the bit is present if the user first gets
+ * the flags and then updates the flags with some new
+ * settings. So, we ignore it in the following edit. bligon.
+ */
+ if ((uval & ~ORANGEFS_MIRROR_FL) &
+ (~(FS_IMMUTABLE_FL | FS_APPEND_FL | FS_NOATIME_FL))) {
+ gossip_err("orangefs_ioctl: the FS_IOC_SETFLAGS only supports setting one of FS_IMMUTABLE_FL|FS_APPEND_FL|FS_NOATIME_FL\n");
+ return -EINVAL;
+ }
+ val = uval;
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "orangefs_ioctl: FS_IOC_SETFLAGS: %llu\n",
+ (unsigned long long)val);
+ ret = orangefs_inode_setxattr(file_inode(file),
+ ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+ "user.pvfs2.meta_hint",
+ &val, sizeof(val), 0);
+ }
+
+ return ret;
+}
+
+/*
+ * Memory map a region of a file.
+ */
+static int orangefs_file_mmap(struct file *file, struct vm_area_struct *vma)
+{
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "orangefs_file_mmap: called on %s\n",
+ (file ?
+ (char *)file->f_path.dentry->d_name.name :
+ (char *)"Unknown"));
+
+ /* set the sequential readahead hint */
+ vma->vm_flags |= VM_SEQ_READ;
+ vma->vm_flags &= ~VM_RAND_READ;
+
+ /* Use readonly mmap since we cannot support writable maps. */
+ return generic_file_readonly_mmap(file, vma);
+}
+
+#define mapping_nrpages(idata) ((idata)->nrpages)
+
+/*
+ * Called to notify the module that there are no more references to
+ * this file (i.e. no processes have it open).
+ *
+ * \note Not called when each file is closed.
+ */
+static int orangefs_file_release(struct inode *inode, struct file *file)
+{
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "orangefs_file_release: called on %s\n",
+ file->f_path.dentry->d_name.name);
+
+ orangefs_flush_inode(inode);
+
+ /*
+ * remove all associated inode pages from the page cache and mmap
+ * readahead cache (if any); this forces an expensive refresh of
+ * data for the next caller of mmap (or 'get_block' accesses)
+ */
+ if (file->f_path.dentry->d_inode &&
+ file->f_path.dentry->d_inode->i_mapping &&
+ mapping_nrpages(&file->f_path.dentry->d_inode->i_data))
+ truncate_inode_pages(file->f_path.dentry->d_inode->i_mapping,
+ 0);
+ return 0;
+}
+
+/*
+ * Push all data for a specific file onto permanent storage.
+ */
+static int orangefs_fsync(struct file *file,
+ loff_t start,
+ loff_t end,
+ int datasync)
+{
+ int ret = -EINVAL;
+ struct orangefs_inode_s *orangefs_inode =
+ ORANGEFS_I(file->f_path.dentry->d_inode);
+ struct orangefs_kernel_op_s *new_op = NULL;
+
+ /* required call */
+ filemap_write_and_wait_range(file->f_mapping, start, end);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_FSYNC);
+ if (!new_op)
+ return -ENOMEM;
+ new_op->upcall.req.fsync.refn = orangefs_inode->refn;
+
+ ret = service_operation(new_op,
+ "orangefs_fsync",
+ get_interruptible_flag(file->f_path.dentry->d_inode));
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "orangefs_fsync got return value of %d\n",
+ ret);
+
+ op_release(new_op);
+
+ orangefs_flush_inode(file->f_path.dentry->d_inode);
+ return ret;
+}
+
+/*
+ * Change the file pointer position for an instance of an open file.
+ *
+ * \note If .llseek is overriden, we must acquire lock as described in
+ * Documentation/filesystems/Locking.
+ *
+ * Future upgrade could support SEEK_DATA and SEEK_HOLE but would
+ * require much changes to the FS
+ */
+static loff_t orangefs_file_llseek(struct file *file, loff_t offset, int origin)
+{
+ int ret = -EINVAL;
+ struct inode *inode = file_inode(file);
+
+ if (origin == SEEK_END) {
+ /*
+ * revalidate the inode's file size.
+ * NOTE: We are only interested in file size here,
+ * so we set mask accordingly.
+ */
+ ret = orangefs_inode_getattr(file->f_mapping->host, 0, 1);
+ if (ret == -ESTALE)
+ ret = -EIO;
+ if (ret) {
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "%s:%s:%d calling make bad inode\n",
+ __FILE__,
+ __func__,
+ __LINE__);
+ return ret;
+ }
+ }
+
+ gossip_debug(GOSSIP_FILE_DEBUG,
+ "orangefs_file_llseek: offset is %ld | origin is %d"
+ " | inode size is %lu\n",
+ (long)offset,
+ origin,
+ (unsigned long)i_size_read(inode));
+
+ return generic_file_llseek(file, offset, origin);
+}
+
+/*
+ * Support local locks (locks that only this kernel knows about)
+ * if Orangefs was mounted -o local_lock.
+ */
+static int orangefs_lock(struct file *filp, int cmd, struct file_lock *fl)
+{
+ int rc = -EINVAL;
+
+ if (ORANGEFS_SB(filp->f_inode->i_sb)->flags & ORANGEFS_OPT_LOCAL_LOCK) {
+ if (cmd == F_GETLK) {
+ rc = 0;
+ posix_test_lock(filp, fl);
+ } else {
+ rc = posix_lock_file(filp, fl, NULL);
+ }
+ }
+
+ return rc;
+}
+
+/** ORANGEFS implementation of VFS file operations */
+const struct file_operations orangefs_file_operations = {
+ .llseek = orangefs_file_llseek,
+ .read_iter = orangefs_file_read_iter,
+ .write_iter = orangefs_file_write_iter,
+ .lock = orangefs_lock,
+ .unlocked_ioctl = orangefs_ioctl,
+ .mmap = orangefs_file_mmap,
+ .open = generic_file_open,
+ .release = orangefs_file_release,
+ .fsync = orangefs_fsync,
+};
diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c
new file mode 100644
index 000000000..85640e955
--- /dev/null
+++ b/fs/orangefs/inode.c
@@ -0,0 +1,461 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * Linux VFS inode operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+static int read_one_page(struct page *page)
+{
+ int ret;
+ int max_block;
+ ssize_t bytes_read = 0;
+ struct inode *inode = page->mapping->host;
+ const __u32 blocksize = PAGE_SIZE; /* inode->i_blksize */
+ const __u32 blockbits = PAGE_SHIFT; /* inode->i_blkbits */
+ struct iov_iter to;
+ struct bio_vec bv = {.bv_page = page, .bv_len = PAGE_SIZE};
+
+ iov_iter_bvec(&to, ITER_BVEC | READ, &bv, 1, PAGE_SIZE);
+
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "orangefs_readpage called with page %p\n",
+ page);
+
+ max_block = ((inode->i_size / blocksize) + 1);
+
+ if (page->index < max_block) {
+ loff_t blockptr_offset = (((loff_t) page->index) << blockbits);
+
+ bytes_read = orangefs_inode_read(inode,
+ &to,
+ &blockptr_offset,
+ inode->i_size);
+ }
+ /* this will only zero remaining unread portions of the page data */
+ iov_iter_zero(~0U, &to);
+ /* takes care of potential aliasing */
+ flush_dcache_page(page);
+ if (bytes_read < 0) {
+ ret = bytes_read;
+ SetPageError(page);
+ } else {
+ SetPageUptodate(page);
+ if (PageError(page))
+ ClearPageError(page);
+ ret = 0;
+ }
+ /* unlock the page after the ->readpage() routine completes */
+ unlock_page(page);
+ return ret;
+}
+
+static int orangefs_readpage(struct file *file, struct page *page)
+{
+ return read_one_page(page);
+}
+
+static int orangefs_readpages(struct file *file,
+ struct address_space *mapping,
+ struct list_head *pages,
+ unsigned nr_pages)
+{
+ int page_idx;
+ int ret;
+
+ gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_readpages called\n");
+
+ for (page_idx = 0; page_idx < nr_pages; page_idx++) {
+ struct page *page;
+
+ page = list_entry(pages->prev, struct page, lru);
+ list_del(&page->lru);
+ if (!add_to_page_cache(page,
+ mapping,
+ page->index,
+ GFP_KERNEL)) {
+ ret = read_one_page(page);
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "failure adding page to cache, read_one_page returned: %d\n",
+ ret);
+ } else {
+ put_page(page);
+ }
+ }
+ BUG_ON(!list_empty(pages));
+ return 0;
+}
+
+static void orangefs_invalidatepage(struct page *page,
+ unsigned int offset,
+ unsigned int length)
+{
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "orangefs_invalidatepage called on page %p "
+ "(offset is %u)\n",
+ page,
+ offset);
+
+ ClearPageUptodate(page);
+ ClearPageMappedToDisk(page);
+ return;
+
+}
+
+static int orangefs_releasepage(struct page *page, gfp_t foo)
+{
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "orangefs_releasepage called on page %p\n",
+ page);
+ return 0;
+}
+
+/*
+ * Having a direct_IO entry point in the address_space_operations
+ * struct causes the kernel to allows us to use O_DIRECT on
+ * open. Nothing will ever call this thing, but in the future we
+ * will need to be able to use O_DIRECT on open in order to support
+ * AIO. Modeled after NFS, they do this too.
+ */
+/*
+ * static ssize_t orangefs_direct_IO(int rw,
+ * struct kiocb *iocb,
+ * struct iov_iter *iter,
+ * loff_t offset)
+ *{
+ * gossip_debug(GOSSIP_INODE_DEBUG,
+ * "orangefs_direct_IO: %s\n",
+ * iocb->ki_filp->f_path.dentry->d_name.name);
+ *
+ * return -EINVAL;
+ *}
+ */
+
+struct backing_dev_info orangefs_backing_dev_info = {
+ .name = "orangefs",
+ .ra_pages = 0,
+ .capabilities = BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_WRITEBACK,
+};
+
+/** ORANGEFS2 implementation of address space operations */
+const struct address_space_operations orangefs_address_operations = {
+ .readpage = orangefs_readpage,
+ .readpages = orangefs_readpages,
+ .invalidatepage = orangefs_invalidatepage,
+ .releasepage = orangefs_releasepage,
+/* .direct_IO = orangefs_direct_IO */
+};
+
+static int orangefs_setattr_size(struct inode *inode, struct iattr *iattr)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_kernel_op_s *new_op;
+ loff_t orig_size;
+ int ret = -EINVAL;
+
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "%s: %pU: Handle is %pU | fs_id %d | size is %llu\n",
+ __func__,
+ get_khandle_from_ino(inode),
+ &orangefs_inode->refn.khandle,
+ orangefs_inode->refn.fs_id,
+ iattr->ia_size);
+
+ /* Ensure that we have a up to date size, so we know if it changed. */
+ ret = orangefs_inode_getattr(inode, 0, 1);
+ if (ret == -ESTALE)
+ ret = -EIO;
+ if (ret) {
+ gossip_err("%s: orangefs_inode_getattr failed, ret:%d:.\n",
+ __func__, ret);
+ return ret;
+ }
+ orig_size = i_size_read(inode);
+
+ truncate_setsize(inode, iattr->ia_size);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_TRUNCATE);
+ if (!new_op)
+ return -ENOMEM;
+
+ new_op->upcall.req.truncate.refn = orangefs_inode->refn;
+ new_op->upcall.req.truncate.size = (__s64) iattr->ia_size;
+
+ ret = service_operation(new_op, __func__,
+ get_interruptible_flag(inode));
+
+ /*
+ * the truncate has no downcall members to retrieve, but
+ * the status value tells us if it went through ok or not
+ */
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "orangefs: orangefs_truncate got return value of %d\n",
+ ret);
+
+ op_release(new_op);
+
+ if (ret != 0)
+ return ret;
+
+ if (orig_size != i_size_read(inode))
+ iattr->ia_valid |= ATTR_CTIME | ATTR_MTIME;
+
+ return ret;
+}
+
+/*
+ * Change attributes of an object referenced by dentry.
+ */
+int orangefs_setattr(struct dentry *dentry, struct iattr *iattr)
+{
+ int ret = -EINVAL;
+ struct inode *inode = dentry->d_inode;
+
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "orangefs_setattr: called on %s\n",
+ dentry->d_name.name);
+
+ ret = inode_change_ok(inode, iattr);
+ if (ret)
+ goto out;
+
+ if ((iattr->ia_valid & ATTR_SIZE) &&
+ iattr->ia_size != i_size_read(inode)) {
+ ret = orangefs_setattr_size(inode, iattr);
+ if (ret)
+ goto out;
+ }
+
+ setattr_copy(inode, iattr);
+ mark_inode_dirty(inode);
+
+ ret = orangefs_inode_setattr(inode, iattr);
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "orangefs_setattr: inode_setattr returned %d\n",
+ ret);
+
+ if (!ret && (iattr->ia_valid & ATTR_MODE))
+ /* change mod on a file that has ACLs */
+ ret = posix_acl_chmod(inode, inode->i_mode);
+
+out:
+ gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_setattr: returning %d\n", ret);
+ return ret;
+}
+
+/*
+ * Obtain attributes of an object given a dentry
+ */
+int orangefs_getattr(struct vfsmount *mnt,
+ struct dentry *dentry,
+ struct kstat *kstat)
+{
+ int ret = -ENOENT;
+ struct inode *inode = dentry->d_inode;
+ struct orangefs_inode_s *orangefs_inode = NULL;
+
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "orangefs_getattr: called on %s\n",
+ dentry->d_name.name);
+
+ ret = orangefs_inode_getattr(inode, 0, 1);
+ if (ret == 0) {
+ generic_fillattr(inode, kstat);
+
+ /* override block size reported to stat */
+ orangefs_inode = ORANGEFS_I(inode);
+ kstat->blksize = orangefs_inode->blksize;
+ }
+ return ret;
+}
+
+int orangefs_permission(struct inode *inode, int mask)
+{
+ int ret;
+
+ if (mask & MAY_NOT_BLOCK)
+ return -ECHILD;
+
+ gossip_debug(GOSSIP_INODE_DEBUG, "%s: refreshing\n", __func__);
+
+ /* Make sure the permission (and other common attrs) are up to date. */
+ ret = orangefs_inode_getattr(inode, 0, 0);
+ if (ret < 0)
+ return ret;
+
+ return generic_permission(inode, mask);
+}
+
+/* ORANGEDS2 implementation of VFS inode operations for files */
+struct inode_operations orangefs_file_inode_operations = {
+ .get_acl = orangefs_get_acl,
+ .set_acl = orangefs_set_acl,
+ .setattr = orangefs_setattr,
+ .getattr = orangefs_getattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .listxattr = orangefs_listxattr,
+ .removexattr = generic_removexattr,
+ .permission = orangefs_permission,
+};
+
+static int orangefs_init_iops(struct inode *inode)
+{
+ inode->i_mapping->a_ops = &orangefs_address_operations;
+
+ switch (inode->i_mode & S_IFMT) {
+ case S_IFREG:
+ inode->i_op = &orangefs_file_inode_operations;
+ inode->i_fop = &orangefs_file_operations;
+ inode->i_blkbits = PAGE_SHIFT;
+ break;
+ case S_IFLNK:
+ inode->i_op = &orangefs_symlink_inode_operations;
+ break;
+ case S_IFDIR:
+ inode->i_op = &orangefs_dir_inode_operations;
+ inode->i_fop = &orangefs_dir_operations;
+ break;
+ default:
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "%s: unsupported mode\n",
+ __func__);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+/*
+ * Given a ORANGEFS object identifier (fsid, handle), convert it into a ino_t type
+ * that will be used as a hash-index from where the handle will
+ * be searched for in the VFS hash table of inodes.
+ */
+static inline ino_t orangefs_handle_hash(struct orangefs_object_kref *ref)
+{
+ if (!ref)
+ return 0;
+ return orangefs_khandle_to_ino(&(ref->khandle));
+}
+
+/*
+ * Called to set up an inode from iget5_locked.
+ */
+static int orangefs_set_inode(struct inode *inode, void *data)
+{
+ struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
+ ORANGEFS_I(inode)->refn.fs_id = ref->fs_id;
+ ORANGEFS_I(inode)->refn.khandle = ref->khandle;
+ return 0;
+}
+
+/*
+ * Called to determine if handles match.
+ */
+static int orangefs_test_inode(struct inode *inode, void *data)
+{
+ struct orangefs_object_kref *ref = (struct orangefs_object_kref *) data;
+ struct orangefs_inode_s *orangefs_inode = NULL;
+
+ orangefs_inode = ORANGEFS_I(inode);
+ return (!ORANGEFS_khandle_cmp(&(orangefs_inode->refn.khandle), &(ref->khandle))
+ && orangefs_inode->refn.fs_id == ref->fs_id);
+}
+
+/*
+ * Front-end to lookup the inode-cache maintained by the VFS using the ORANGEFS
+ * file handle.
+ *
+ * @sb: the file system super block instance.
+ * @ref: The ORANGEFS object for which we are trying to locate an inode structure.
+ */
+struct inode *orangefs_iget(struct super_block *sb, struct orangefs_object_kref *ref)
+{
+ struct inode *inode = NULL;
+ unsigned long hash;
+ int error;
+
+ hash = orangefs_handle_hash(ref);
+ inode = iget5_locked(sb, hash, orangefs_test_inode, orangefs_set_inode, ref);
+ if (!inode || !(inode->i_state & I_NEW))
+ return inode;
+
+ error = orangefs_inode_getattr(inode, 1, 0);
+ if (error) {
+ iget_failed(inode);
+ return ERR_PTR(error);
+ }
+
+ inode->i_ino = hash; /* needed for stat etc */
+ orangefs_init_iops(inode);
+ unlock_new_inode(inode);
+
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "iget handle %pU, fsid %d hash %ld i_ino %lu\n",
+ &ref->khandle,
+ ref->fs_id,
+ hash,
+ inode->i_ino);
+
+ return inode;
+}
+
+/*
+ * Allocate an inode for a newly created file and insert it into the inode hash.
+ */
+struct inode *orangefs_new_inode(struct super_block *sb, struct inode *dir,
+ int mode, dev_t dev, struct orangefs_object_kref *ref)
+{
+ unsigned long hash = orangefs_handle_hash(ref);
+ struct inode *inode;
+ int error;
+
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "%s:(sb is %p | MAJOR(dev)=%u | MINOR(dev)=%u mode=%o)\n",
+ __func__,
+ sb,
+ MAJOR(dev),
+ MINOR(dev),
+ mode);
+
+ inode = new_inode(sb);
+ if (!inode)
+ return NULL;
+
+ orangefs_set_inode(inode, ref);
+ inode->i_ino = hash; /* needed for stat etc */
+
+ error = orangefs_inode_getattr(inode, 1, 0);
+ if (error)
+ goto out_iput;
+
+ orangefs_init_iops(inode);
+
+ inode->i_mode = mode;
+ inode->i_uid = current_fsuid();
+ inode->i_gid = current_fsgid();
+ inode->i_atime = inode->i_mtime = inode->i_ctime = CURRENT_TIME;
+ inode->i_size = PAGE_SIZE;
+ inode->i_rdev = dev;
+
+ error = insert_inode_locked4(inode, hash, orangefs_test_inode, ref);
+ if (error < 0)
+ goto out_iput;
+
+ gossip_debug(GOSSIP_INODE_DEBUG,
+ "Initializing ACL's for inode %pU\n",
+ get_khandle_from_ino(inode));
+ orangefs_init_acl(inode, dir);
+ return inode;
+
+out_iput:
+ iput(inode);
+ return ERR_PTR(error);
+}
diff --git a/fs/orangefs/namei.c b/fs/orangefs/namei.c
new file mode 100644
index 000000000..5a60c508a
--- /dev/null
+++ b/fs/orangefs/namei.c
@@ -0,0 +1,462 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * Linux VFS namei operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+
+/*
+ * Get a newly allocated inode to go with a negative dentry.
+ */
+static int orangefs_create(struct inode *dir,
+ struct dentry *dentry,
+ umode_t mode,
+ bool exclusive)
+{
+ struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+ struct orangefs_kernel_op_s *new_op;
+ struct inode *inode;
+ int ret;
+
+ gossip_debug(GOSSIP_NAME_DEBUG, "%s: %s\n",
+ __func__,
+ dentry->d_name.name);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_CREATE);
+ if (!new_op)
+ return -ENOMEM;
+
+ new_op->upcall.req.create.parent_refn = parent->refn;
+
+ fill_default_sys_attrs(new_op->upcall.req.create.attributes,
+ ORANGEFS_TYPE_METAFILE, mode);
+
+ strncpy(new_op->upcall.req.create.d_name,
+ dentry->d_name.name, ORANGEFS_NAME_MAX);
+
+ ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s: %s: handle:%pU: fsid:%d: new_op:%p: ret:%d:\n",
+ __func__,
+ dentry->d_name.name,
+ &new_op->downcall.resp.create.refn.khandle,
+ new_op->downcall.resp.create.refn.fs_id,
+ new_op,
+ ret);
+
+ if (ret < 0)
+ goto out;
+
+ inode = orangefs_new_inode(dir->i_sb, dir, S_IFREG | mode, 0,
+ &new_op->downcall.resp.create.refn);
+ if (IS_ERR(inode)) {
+ gossip_err("%s: Failed to allocate inode for file :%s:\n",
+ __func__,
+ dentry->d_name.name);
+ ret = PTR_ERR(inode);
+ goto out;
+ }
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s: Assigned inode :%pU: for file :%s:\n",
+ __func__,
+ get_khandle_from_ino(inode),
+ dentry->d_name.name);
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s: dentry instantiated for %s\n",
+ __func__,
+ dentry->d_name.name);
+
+ SetMtimeFlag(parent);
+ dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+ mark_inode_dirty_sync(dir);
+ ret = 0;
+out:
+ op_release(new_op);
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s: %s: returning %d\n",
+ __func__,
+ dentry->d_name.name,
+ ret);
+ return ret;
+}
+
+/*
+ * Attempt to resolve an object name (dentry->d_name), parent handle, and
+ * fsid into a handle for the object.
+ */
+static struct dentry *orangefs_lookup(struct inode *dir, struct dentry *dentry,
+ unsigned int flags)
+{
+ struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+ struct orangefs_kernel_op_s *new_op;
+ struct inode *inode;
+ struct dentry *res;
+ int ret = -EINVAL;
+
+ /*
+ * in theory we could skip a lookup here (if the intent is to
+ * create) in order to avoid a potentially failed lookup, but
+ * leaving it in can skip a valid lookup and try to create a file
+ * that already exists (e.g. the vfs already handles checking for
+ * -EEXIST on O_EXCL opens, which is broken if we skip this lookup
+ * in the create path)
+ */
+ gossip_debug(GOSSIP_NAME_DEBUG, "%s called on %s\n",
+ __func__, dentry->d_name.name);
+
+ if (dentry->d_name.len > (ORANGEFS_NAME_MAX - 1))
+ return ERR_PTR(-ENAMETOOLONG);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_LOOKUP);
+ if (!new_op)
+ return ERR_PTR(-ENOMEM);
+
+ new_op->upcall.req.lookup.sym_follow = ORANGEFS_LOOKUP_LINK_NO_FOLLOW;
+
+ gossip_debug(GOSSIP_NAME_DEBUG, "%s:%s:%d using parent %pU\n",
+ __FILE__,
+ __func__,
+ __LINE__,
+ &parent->refn.khandle);
+ new_op->upcall.req.lookup.parent_refn = parent->refn;
+
+ strncpy(new_op->upcall.req.lookup.d_name, dentry->d_name.name,
+ ORANGEFS_NAME_MAX);
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s: doing lookup on %s under %pU,%d\n",
+ __func__,
+ new_op->upcall.req.lookup.d_name,
+ &new_op->upcall.req.lookup.parent_refn.khandle,
+ new_op->upcall.req.lookup.parent_refn.fs_id);
+
+ ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "Lookup Got %pU, fsid %d (ret=%d)\n",
+ &new_op->downcall.resp.lookup.refn.khandle,
+ new_op->downcall.resp.lookup.refn.fs_id,
+ ret);
+
+ if (ret < 0) {
+ if (ret == -ENOENT) {
+ /*
+ * if no inode was found, add a negative dentry to
+ * dcache anyway; if we don't, we don't hold expected
+ * lookup semantics and we most noticeably break
+ * during directory renames.
+ *
+ * however, if the operation failed or exited, do not
+ * add the dentry (e.g. in the case that a touch is
+ * issued on a file that already exists that was
+ * interrupted during this lookup -- no need to add
+ * another negative dentry for an existing file)
+ */
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "orangefs_lookup: Adding *negative* dentry "
+ "%p for %s\n",
+ dentry,
+ dentry->d_name.name);
+
+ d_add(dentry, NULL);
+ res = NULL;
+ goto out;
+ }
+
+ /* must be a non-recoverable error */
+ res = ERR_PTR(ret);
+ goto out;
+ }
+
+ inode = orangefs_iget(dir->i_sb, &new_op->downcall.resp.lookup.refn);
+ if (IS_ERR(inode)) {
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "error %ld from iget\n", PTR_ERR(inode));
+ res = ERR_CAST(inode);
+ goto out;
+ }
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s:%s:%d "
+ "Found good inode [%lu] with count [%d]\n",
+ __FILE__,
+ __func__,
+ __LINE__,
+ inode->i_ino,
+ (int)atomic_read(&inode->i_count));
+
+ /* update dentry/inode pair into dcache */
+ res = d_splice_alias(inode, dentry);
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "Lookup success (inode ct = %d)\n",
+ (int)atomic_read(&inode->i_count));
+out:
+ op_release(new_op);
+ return res;
+}
+
+/* return 0 on success; non-zero otherwise */
+static int orangefs_unlink(struct inode *dir, struct dentry *dentry)
+{
+ struct inode *inode = dentry->d_inode;
+ struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+ struct orangefs_kernel_op_s *new_op;
+ int ret;
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s: called on %s\n"
+ " (inode %pU): Parent is %pU | fs_id %d\n",
+ __func__,
+ dentry->d_name.name,
+ get_khandle_from_ino(inode),
+ &parent->refn.khandle,
+ parent->refn.fs_id);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_REMOVE);
+ if (!new_op)
+ return -ENOMEM;
+
+ new_op->upcall.req.remove.parent_refn = parent->refn;
+ strncpy(new_op->upcall.req.remove.d_name, dentry->d_name.name,
+ ORANGEFS_NAME_MAX);
+
+ ret = service_operation(new_op, "orangefs_unlink",
+ get_interruptible_flag(inode));
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s: service_operation returned:%d:\n",
+ __func__,
+ ret);
+
+ op_release(new_op);
+
+ if (!ret) {
+ drop_nlink(inode);
+
+ SetMtimeFlag(parent);
+ dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+ mark_inode_dirty_sync(dir);
+ }
+ return ret;
+}
+
+static int orangefs_symlink(struct inode *dir,
+ struct dentry *dentry,
+ const char *symname)
+{
+ struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+ struct orangefs_kernel_op_s *new_op;
+ struct inode *inode;
+ int mode = 755;
+ int ret;
+
+ gossip_debug(GOSSIP_NAME_DEBUG, "%s: called\n", __func__);
+
+ if (!symname)
+ return -EINVAL;
+
+ if (strlen(symname)+1 > ORANGEFS_NAME_MAX)
+ return -ENAMETOOLONG;
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_SYMLINK);
+ if (!new_op)
+ return -ENOMEM;
+
+ new_op->upcall.req.sym.parent_refn = parent->refn;
+
+ fill_default_sys_attrs(new_op->upcall.req.sym.attributes,
+ ORANGEFS_TYPE_SYMLINK,
+ mode);
+
+ strncpy(new_op->upcall.req.sym.entry_name,
+ dentry->d_name.name,
+ ORANGEFS_NAME_MAX);
+ strncpy(new_op->upcall.req.sym.target, symname, ORANGEFS_NAME_MAX);
+
+ ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "Symlink Got ORANGEFS handle %pU on fsid %d (ret=%d)\n",
+ &new_op->downcall.resp.sym.refn.khandle,
+ new_op->downcall.resp.sym.refn.fs_id, ret);
+
+ if (ret < 0) {
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s: failed with error code %d\n",
+ __func__, ret);
+ goto out;
+ }
+
+ inode = orangefs_new_inode(dir->i_sb, dir, S_IFLNK | mode, 0,
+ &new_op->downcall.resp.sym.refn);
+ if (IS_ERR(inode)) {
+ gossip_err
+ ("*** Failed to allocate orangefs symlink inode\n");
+ ret = PTR_ERR(inode);
+ goto out;
+ }
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "Assigned symlink inode new number of %pU\n",
+ get_khandle_from_ino(inode));
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "Inode (Symlink) %pU -> %s\n",
+ get_khandle_from_ino(inode),
+ dentry->d_name.name);
+
+ SetMtimeFlag(parent);
+ dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+ mark_inode_dirty_sync(dir);
+ ret = 0;
+out:
+ op_release(new_op);
+ return ret;
+}
+
+static int orangefs_mkdir(struct inode *dir, struct dentry *dentry, umode_t mode)
+{
+ struct orangefs_inode_s *parent = ORANGEFS_I(dir);
+ struct orangefs_kernel_op_s *new_op;
+ struct inode *inode;
+ int ret;
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_MKDIR);
+ if (!new_op)
+ return -ENOMEM;
+
+ new_op->upcall.req.mkdir.parent_refn = parent->refn;
+
+ fill_default_sys_attrs(new_op->upcall.req.mkdir.attributes,
+ ORANGEFS_TYPE_DIRECTORY, mode);
+
+ strncpy(new_op->upcall.req.mkdir.d_name,
+ dentry->d_name.name, ORANGEFS_NAME_MAX);
+
+ ret = service_operation(new_op, __func__, get_interruptible_flag(dir));
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "Mkdir Got ORANGEFS handle %pU on fsid %d\n",
+ &new_op->downcall.resp.mkdir.refn.khandle,
+ new_op->downcall.resp.mkdir.refn.fs_id);
+
+ if (ret < 0) {
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "%s: failed with error code %d\n",
+ __func__, ret);
+ goto out;
+ }
+
+ inode = orangefs_new_inode(dir->i_sb, dir, S_IFDIR | mode, 0,
+ &new_op->downcall.resp.mkdir.refn);
+ if (IS_ERR(inode)) {
+ gossip_err("*** Failed to allocate orangefs dir inode\n");
+ ret = PTR_ERR(inode);
+ goto out;
+ }
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "Assigned dir inode new number of %pU\n",
+ get_khandle_from_ino(inode));
+
+ d_instantiate(dentry, inode);
+ unlock_new_inode(inode);
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "Inode (Directory) %pU -> %s\n",
+ get_khandle_from_ino(inode),
+ dentry->d_name.name);
+
+ /*
+ * NOTE: we have no good way to keep nlink consistent for directories
+ * across clients; keep constant at 1.
+ */
+ SetMtimeFlag(parent);
+ dir->i_mtime = dir->i_ctime = current_fs_time(dir->i_sb);
+ mark_inode_dirty_sync(dir);
+out:
+ op_release(new_op);
+ return ret;
+}
+
+static int orangefs_rename(struct inode *old_dir,
+ struct dentry *old_dentry,
+ struct inode *new_dir,
+ struct dentry *new_dentry)
+{
+ struct orangefs_kernel_op_s *new_op;
+ int ret;
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "orangefs_rename: called (%s/%s => %s/%s) ct=%d\n",
+ old_dentry->d_parent->d_name.name,
+ old_dentry->d_name.name,
+ new_dentry->d_parent->d_name.name,
+ new_dentry->d_name.name,
+ d_count(new_dentry));
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_RENAME);
+ if (!new_op)
+ return -EINVAL;
+
+ new_op->upcall.req.rename.old_parent_refn = ORANGEFS_I(old_dir)->refn;
+ new_op->upcall.req.rename.new_parent_refn = ORANGEFS_I(new_dir)->refn;
+
+ strncpy(new_op->upcall.req.rename.d_old_name,
+ old_dentry->d_name.name,
+ ORANGEFS_NAME_MAX);
+ strncpy(new_op->upcall.req.rename.d_new_name,
+ new_dentry->d_name.name,
+ ORANGEFS_NAME_MAX);
+
+ ret = service_operation(new_op,
+ "orangefs_rename",
+ get_interruptible_flag(old_dentry->d_inode));
+
+ gossip_debug(GOSSIP_NAME_DEBUG,
+ "orangefs_rename: got downcall status %d\n",
+ ret);
+
+ if (new_dentry->d_inode)
+ new_dentry->d_inode->i_ctime = CURRENT_TIME;
+
+ op_release(new_op);
+ return ret;
+}
+
+/* ORANGEFS implementation of VFS inode operations for directories */
+struct inode_operations orangefs_dir_inode_operations = {
+ .lookup = orangefs_lookup,
+ .get_acl = orangefs_get_acl,
+ .set_acl = orangefs_set_acl,
+ .create = orangefs_create,
+ .unlink = orangefs_unlink,
+ .symlink = orangefs_symlink,
+ .mkdir = orangefs_mkdir,
+ .rmdir = orangefs_unlink,
+ .rename = orangefs_rename,
+ .setattr = orangefs_setattr,
+ .getattr = orangefs_getattr,
+ .setxattr = generic_setxattr,
+ .getxattr = generic_getxattr,
+ .removexattr = generic_removexattr,
+ .listxattr = orangefs_listxattr,
+ .permission = orangefs_permission,
+};
diff --git a/fs/orangefs/orangefs-bufmap.c b/fs/orangefs/orangefs-bufmap.c
new file mode 100644
index 000000000..75375e90a
--- /dev/null
+++ b/fs/orangefs/orangefs-bufmap.c
@@ -0,0 +1,556 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+struct slot_map {
+ int c;
+ wait_queue_head_t q;
+ int count;
+ unsigned long *map;
+};
+
+static struct slot_map rw_map = {
+ .c = -1,
+ .q = __WAIT_QUEUE_HEAD_INITIALIZER(rw_map.q)
+};
+static struct slot_map readdir_map = {
+ .c = -1,
+ .q = __WAIT_QUEUE_HEAD_INITIALIZER(readdir_map.q)
+};
+
+
+static void install(struct slot_map *m, int count, unsigned long *map)
+{
+ spin_lock(&m->q.lock);
+ m->c = m->count = count;
+ m->map = map;
+ wake_up_all_locked(&m->q);
+ spin_unlock(&m->q.lock);
+}
+
+static void mark_killed(struct slot_map *m)
+{
+ spin_lock(&m->q.lock);
+ m->c -= m->count + 1;
+ spin_unlock(&m->q.lock);
+}
+
+static void run_down(struct slot_map *m)
+{
+ DEFINE_WAIT(wait);
+ spin_lock(&m->q.lock);
+ if (m->c != -1) {
+ for (;;) {
+ if (likely(list_empty(&wait.task_list)))
+ __add_wait_queue_tail(&m->q, &wait);
+ set_current_state(TASK_UNINTERRUPTIBLE);
+
+ if (m->c == -1)
+ break;
+
+ spin_unlock(&m->q.lock);
+ schedule();
+ spin_lock(&m->q.lock);
+ }
+ __remove_wait_queue(&m->q, &wait);
+ __set_current_state(TASK_RUNNING);
+ }
+ m->map = NULL;
+ spin_unlock(&m->q.lock);
+}
+
+static void put(struct slot_map *m, int slot)
+{
+ int v;
+ spin_lock(&m->q.lock);
+ __clear_bit(slot, m->map);
+ v = ++m->c;
+ if (unlikely(v == 1)) /* no free slots -> one free slot */
+ wake_up_locked(&m->q);
+ else if (unlikely(v == -1)) /* finished dying */
+ wake_up_all_locked(&m->q);
+ spin_unlock(&m->q.lock);
+}
+
+static int wait_for_free(struct slot_map *m)
+{
+ long left = slot_timeout_secs * HZ;
+ DEFINE_WAIT(wait);
+
+ do {
+ long n = left, t;
+ if (likely(list_empty(&wait.task_list)))
+ __add_wait_queue_tail_exclusive(&m->q, &wait);
+ set_current_state(TASK_INTERRUPTIBLE);
+
+ if (m->c > 0)
+ break;
+
+ if (m->c < 0) {
+ /* we are waiting for map to be installed */
+ /* it would better be there soon, or we go away */
+ if (n > ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ)
+ n = ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS * HZ;
+ }
+ spin_unlock(&m->q.lock);
+ t = schedule_timeout(n);
+ spin_lock(&m->q.lock);
+ if (unlikely(!t) && n != left && m->c < 0)
+ left = t;
+ else
+ left = t + (left - n);
+ if (unlikely(signal_pending(current)))
+ left = -EINTR;
+ } while (left > 0);
+
+ if (!list_empty(&wait.task_list))
+ list_del(&wait.task_list);
+ else if (left <= 0 && waitqueue_active(&m->q))
+ __wake_up_locked_key(&m->q, TASK_INTERRUPTIBLE, NULL);
+ __set_current_state(TASK_RUNNING);
+
+ if (likely(left > 0))
+ return 0;
+
+ return left < 0 ? -EINTR : -ETIMEDOUT;
+}
+
+static int get(struct slot_map *m)
+{
+ int res = 0;
+ spin_lock(&m->q.lock);
+ if (unlikely(m->c <= 0))
+ res = wait_for_free(m);
+ if (likely(!res)) {
+ m->c--;
+ res = find_first_zero_bit(m->map, m->count);
+ __set_bit(res, m->map);
+ }
+ spin_unlock(&m->q.lock);
+ return res;
+}
+
+/* used to describe mapped buffers */
+struct orangefs_bufmap_desc {
+ void *uaddr; /* user space address pointer */
+ struct page **page_array; /* array of mapped pages */
+ int array_count; /* size of above arrays */
+ struct list_head list_link;
+};
+
+static struct orangefs_bufmap {
+ int desc_size;
+ int desc_shift;
+ int desc_count;
+ int total_size;
+ int page_count;
+
+ struct page **page_array;
+ struct orangefs_bufmap_desc *desc_array;
+
+ /* array to track usage of buffer descriptors */
+ unsigned long *buffer_index_array;
+
+ /* array to track usage of buffer descriptors for readdir */
+#define N DIV_ROUND_UP(ORANGEFS_READDIR_DEFAULT_DESC_COUNT, BITS_PER_LONG)
+ unsigned long readdir_index_array[N];
+#undef N
+} *__orangefs_bufmap;
+
+static DEFINE_SPINLOCK(orangefs_bufmap_lock);
+
+static void
+orangefs_bufmap_unmap(struct orangefs_bufmap *bufmap)
+{
+ int i;
+
+ for (i = 0; i < bufmap->page_count; i++)
+ put_page(bufmap->page_array[i]);
+}
+
+static void
+orangefs_bufmap_free(struct orangefs_bufmap *bufmap)
+{
+ kfree(bufmap->page_array);
+ kfree(bufmap->desc_array);
+ kfree(bufmap->buffer_index_array);
+ kfree(bufmap);
+}
+
+/*
+ * XXX: Can the size and shift change while the caller gives up the
+ * XXX: lock between calling this and doing something useful?
+ */
+
+int orangefs_bufmap_size_query(void)
+{
+ struct orangefs_bufmap *bufmap;
+ int size = 0;
+ spin_lock(&orangefs_bufmap_lock);
+ bufmap = __orangefs_bufmap;
+ if (bufmap)
+ size = bufmap->desc_size;
+ spin_unlock(&orangefs_bufmap_lock);
+ return size;
+}
+
+int orangefs_bufmap_shift_query(void)
+{
+ struct orangefs_bufmap *bufmap;
+ int shift = 0;
+ spin_lock(&orangefs_bufmap_lock);
+ bufmap = __orangefs_bufmap;
+ if (bufmap)
+ shift = bufmap->desc_shift;
+ spin_unlock(&orangefs_bufmap_lock);
+ return shift;
+}
+
+static DECLARE_WAIT_QUEUE_HEAD(bufmap_waitq);
+static DECLARE_WAIT_QUEUE_HEAD(readdir_waitq);
+
+/*
+ * orangefs_get_bufmap_init
+ *
+ * If bufmap_init is 1, then the shared memory system, including the
+ * buffer_index_array, is available. Otherwise, it is not.
+ *
+ * returns the value of bufmap_init
+ */
+int orangefs_get_bufmap_init(void)
+{
+ return __orangefs_bufmap ? 1 : 0;
+}
+
+
+static struct orangefs_bufmap *
+orangefs_bufmap_alloc(struct ORANGEFS_dev_map_desc *user_desc)
+{
+ struct orangefs_bufmap *bufmap;
+
+ bufmap = kzalloc(sizeof(*bufmap), GFP_KERNEL);
+ if (!bufmap)
+ goto out;
+
+ bufmap->total_size = user_desc->total_size;
+ bufmap->desc_count = user_desc->count;
+ bufmap->desc_size = user_desc->size;
+ bufmap->desc_shift = ilog2(bufmap->desc_size);
+
+ bufmap->buffer_index_array =
+ kzalloc(DIV_ROUND_UP(bufmap->desc_count, BITS_PER_LONG), GFP_KERNEL);
+ if (!bufmap->buffer_index_array) {
+ gossip_err("orangefs: could not allocate %d buffer indices\n",
+ bufmap->desc_count);
+ goto out_free_bufmap;
+ }
+
+ bufmap->desc_array =
+ kcalloc(bufmap->desc_count, sizeof(struct orangefs_bufmap_desc),
+ GFP_KERNEL);
+ if (!bufmap->desc_array) {
+ gossip_err("orangefs: could not allocate %d descriptors\n",
+ bufmap->desc_count);
+ goto out_free_index_array;
+ }
+
+ bufmap->page_count = bufmap->total_size / PAGE_SIZE;
+
+ /* allocate storage to track our page mappings */
+ bufmap->page_array =
+ kcalloc(bufmap->page_count, sizeof(struct page *), GFP_KERNEL);
+ if (!bufmap->page_array)
+ goto out_free_desc_array;
+
+ return bufmap;
+
+out_free_desc_array:
+ kfree(bufmap->desc_array);
+out_free_index_array:
+ kfree(bufmap->buffer_index_array);
+out_free_bufmap:
+ kfree(bufmap);
+out:
+ return NULL;
+}
+
+static int
+orangefs_bufmap_map(struct orangefs_bufmap *bufmap,
+ struct ORANGEFS_dev_map_desc *user_desc)
+{
+ int pages_per_desc = bufmap->desc_size / PAGE_SIZE;
+ int offset = 0, ret, i;
+
+ /* map the pages */
+ ret = get_user_pages_fast((unsigned long)user_desc->ptr,
+ bufmap->page_count, 1, bufmap->page_array);
+
+ if (ret < 0)
+ return ret;
+
+ if (ret != bufmap->page_count) {
+ gossip_err("orangefs error: asked for %d pages, only got %d.\n",
+ bufmap->page_count, ret);
+
+ for (i = 0; i < ret; i++) {
+ SetPageError(bufmap->page_array[i]);
+ put_page(bufmap->page_array[i]);
+ }
+ return -ENOMEM;
+ }
+
+ /*
+ * ideally we want to get kernel space pointers for each page, but
+ * we can't kmap that many pages at once if highmem is being used.
+ * so instead, we just kmap/kunmap the page address each time the
+ * kaddr is needed.
+ */
+ for (i = 0; i < bufmap->page_count; i++)
+ flush_dcache_page(bufmap->page_array[i]);
+
+ /* build a list of available descriptors */
+ for (offset = 0, i = 0; i < bufmap->desc_count; i++) {
+ bufmap->desc_array[i].page_array = &bufmap->page_array[offset];
+ bufmap->desc_array[i].array_count = pages_per_desc;
+ bufmap->desc_array[i].uaddr =
+ (user_desc->ptr + (i * pages_per_desc * PAGE_SIZE));
+ offset += pages_per_desc;
+ }
+
+ return 0;
+}
+
+/*
+ * orangefs_bufmap_initialize()
+ *
+ * initializes the mapped buffer interface
+ *
+ * returns 0 on success, -errno on failure
+ */
+int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc)
+{
+ struct orangefs_bufmap *bufmap;
+ int ret = -EINVAL;
+
+ gossip_debug(GOSSIP_BUFMAP_DEBUG,
+ "orangefs_bufmap_initialize: called (ptr ("
+ "%p) sz (%d) cnt(%d).\n",
+ user_desc->ptr,
+ user_desc->size,
+ user_desc->count);
+
+ /*
+ * sanity check alignment and size of buffer that caller wants to
+ * work with
+ */
+ if (PAGE_ALIGN((unsigned long)user_desc->ptr) !=
+ (unsigned long)user_desc->ptr) {
+ gossip_err("orangefs error: memory alignment (front). %p\n",
+ user_desc->ptr);
+ goto out;
+ }
+
+ if (PAGE_ALIGN(((unsigned long)user_desc->ptr + user_desc->total_size))
+ != (unsigned long)(user_desc->ptr + user_desc->total_size)) {
+ gossip_err("orangefs error: memory alignment (back).(%p + %d)\n",
+ user_desc->ptr,
+ user_desc->total_size);
+ goto out;
+ }
+
+ if (user_desc->total_size != (user_desc->size * user_desc->count)) {
+ gossip_err("orangefs error: user provided an oddly sized buffer: (%d, %d, %d)\n",
+ user_desc->total_size,
+ user_desc->size,
+ user_desc->count);
+ goto out;
+ }
+
+ if ((user_desc->size % PAGE_SIZE) != 0) {
+ gossip_err("orangefs error: bufmap size not page size divisible (%d).\n",
+ user_desc->size);
+ goto out;
+ }
+
+ ret = -ENOMEM;
+ bufmap = orangefs_bufmap_alloc(user_desc);
+ if (!bufmap)
+ goto out;
+
+ ret = orangefs_bufmap_map(bufmap, user_desc);
+ if (ret)
+ goto out_free_bufmap;
+
+
+ spin_lock(&orangefs_bufmap_lock);
+ if (__orangefs_bufmap) {
+ spin_unlock(&orangefs_bufmap_lock);
+ gossip_err("orangefs: error: bufmap already initialized.\n");
+ ret = -EINVAL;
+ goto out_unmap_bufmap;
+ }
+ __orangefs_bufmap = bufmap;
+ install(&rw_map,
+ bufmap->desc_count,
+ bufmap->buffer_index_array);
+ install(&readdir_map,
+ ORANGEFS_READDIR_DEFAULT_DESC_COUNT,
+ bufmap->readdir_index_array);
+ spin_unlock(&orangefs_bufmap_lock);
+
+ gossip_debug(GOSSIP_BUFMAP_DEBUG,
+ "orangefs_bufmap_initialize: exiting normally\n");
+ return 0;
+
+out_unmap_bufmap:
+ orangefs_bufmap_unmap(bufmap);
+out_free_bufmap:
+ orangefs_bufmap_free(bufmap);
+out:
+ return ret;
+}
+
+/*
+ * orangefs_bufmap_finalize()
+ *
+ * shuts down the mapped buffer interface and releases any resources
+ * associated with it
+ *
+ * no return value
+ */
+void orangefs_bufmap_finalize(void)
+{
+ struct orangefs_bufmap *bufmap = __orangefs_bufmap;
+ if (!bufmap)
+ return;
+ gossip_debug(GOSSIP_BUFMAP_DEBUG, "orangefs_bufmap_finalize: called\n");
+ mark_killed(&rw_map);
+ mark_killed(&readdir_map);
+ gossip_debug(GOSSIP_BUFMAP_DEBUG,
+ "orangefs_bufmap_finalize: exiting normally\n");
+}
+
+void orangefs_bufmap_run_down(void)
+{
+ struct orangefs_bufmap *bufmap = __orangefs_bufmap;
+ if (!bufmap)
+ return;
+ run_down(&rw_map);
+ run_down(&readdir_map);
+ spin_lock(&orangefs_bufmap_lock);
+ __orangefs_bufmap = NULL;
+ spin_unlock(&orangefs_bufmap_lock);
+ orangefs_bufmap_unmap(bufmap);
+ orangefs_bufmap_free(bufmap);
+}
+
+/*
+ * orangefs_bufmap_get()
+ *
+ * gets a free mapped buffer descriptor, will sleep until one becomes
+ * available if necessary
+ *
+ * returns slot on success, -errno on failure
+ */
+int orangefs_bufmap_get(void)
+{
+ return get(&rw_map);
+}
+
+/*
+ * orangefs_bufmap_put()
+ *
+ * returns a mapped buffer descriptor to the collection
+ *
+ * no return value
+ */
+void orangefs_bufmap_put(int buffer_index)
+{
+ put(&rw_map, buffer_index);
+}
+
+/*
+ * orangefs_readdir_index_get()
+ *
+ * gets a free descriptor, will sleep until one becomes
+ * available if necessary.
+ * Although the readdir buffers are not mapped into kernel space
+ * we could do that at a later point of time. Regardless, these
+ * indices are used by the client-core.
+ *
+ * returns slot on success, -errno on failure
+ */
+int orangefs_readdir_index_get(void)
+{
+ return get(&readdir_map);
+}
+
+void orangefs_readdir_index_put(int buffer_index)
+{
+ put(&readdir_map, buffer_index);
+}
+
+/*
+ * we've been handed an iovec, we need to copy it to
+ * the shared memory descriptor at "buffer_index".
+ */
+int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter,
+ int buffer_index,
+ size_t size)
+{
+ struct orangefs_bufmap_desc *to;
+ int i;
+
+ gossip_debug(GOSSIP_BUFMAP_DEBUG,
+ "%s: buffer_index:%d: size:%zu:\n",
+ __func__, buffer_index, size);
+
+ to = &__orangefs_bufmap->desc_array[buffer_index];
+ for (i = 0; size; i++) {
+ struct page *page = to->page_array[i];
+ size_t n = size;
+ if (n > PAGE_SIZE)
+ n = PAGE_SIZE;
+ n = copy_page_from_iter(page, 0, n, iter);
+ if (!n)
+ return -EFAULT;
+ size -= n;
+ }
+ return 0;
+
+}
+
+/*
+ * we've been handed an iovec, we need to fill it from
+ * the shared memory descriptor at "buffer_index".
+ */
+int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter,
+ int buffer_index,
+ size_t size)
+{
+ struct orangefs_bufmap_desc *from;
+ int i;
+
+ from = &__orangefs_bufmap->desc_array[buffer_index];
+ gossip_debug(GOSSIP_BUFMAP_DEBUG,
+ "%s: buffer_index:%d: size:%zu:\n",
+ __func__, buffer_index, size);
+
+
+ for (i = 0; size; i++) {
+ struct page *page = from->page_array[i];
+ size_t n = size;
+ if (n > PAGE_SIZE)
+ n = PAGE_SIZE;
+ n = copy_page_to_iter(page, 0, n, iter);
+ if (!n)
+ return -EFAULT;
+ size -= n;
+ }
+ return 0;
+}
diff --git a/fs/orangefs/orangefs-bufmap.h b/fs/orangefs/orangefs-bufmap.h
new file mode 100644
index 000000000..71f64f405
--- /dev/null
+++ b/fs/orangefs/orangefs-bufmap.h
@@ -0,0 +1,36 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#ifndef __ORANGEFS_BUFMAP_H
+#define __ORANGEFS_BUFMAP_H
+
+int orangefs_bufmap_size_query(void);
+
+int orangefs_bufmap_shift_query(void);
+
+int orangefs_bufmap_initialize(struct ORANGEFS_dev_map_desc *user_desc);
+
+void orangefs_bufmap_finalize(void);
+
+void orangefs_bufmap_run_down(void);
+
+int orangefs_bufmap_get(void);
+
+void orangefs_bufmap_put(int buffer_index);
+
+int orangefs_readdir_index_get(void);
+
+void orangefs_readdir_index_put(int buffer_index);
+
+int orangefs_bufmap_copy_from_iovec(struct iov_iter *iter,
+ int buffer_index,
+ size_t size);
+
+int orangefs_bufmap_copy_to_iovec(struct iov_iter *iter,
+ int buffer_index,
+ size_t size);
+
+#endif /* __ORANGEFS_BUFMAP_H */
diff --git a/fs/orangefs/orangefs-cache.c b/fs/orangefs/orangefs-cache.c
new file mode 100644
index 000000000..900a2e38e
--- /dev/null
+++ b/fs/orangefs/orangefs-cache.c
@@ -0,0 +1,161 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+
+/* tags assigned to kernel upcall operations */
+static __u64 next_tag_value;
+static DEFINE_SPINLOCK(next_tag_value_lock);
+
+/* the orangefs memory caches */
+
+/* a cache for orangefs upcall/downcall operations */
+static struct kmem_cache *op_cache;
+
+int op_cache_initialize(void)
+{
+ op_cache = kmem_cache_create("orangefs_op_cache",
+ sizeof(struct orangefs_kernel_op_s),
+ 0,
+ ORANGEFS_CACHE_CREATE_FLAGS,
+ NULL);
+
+ if (!op_cache) {
+ gossip_err("Cannot create orangefs_op_cache\n");
+ return -ENOMEM;
+ }
+
+ /* initialize our atomic tag counter */
+ spin_lock(&next_tag_value_lock);
+ next_tag_value = 100;
+ spin_unlock(&next_tag_value_lock);
+ return 0;
+}
+
+int op_cache_finalize(void)
+{
+ kmem_cache_destroy(op_cache);
+ return 0;
+}
+
+char *get_opname_string(struct orangefs_kernel_op_s *new_op)
+{
+ if (new_op) {
+ __s32 type = new_op->upcall.type;
+
+ if (type == ORANGEFS_VFS_OP_FILE_IO)
+ return "OP_FILE_IO";
+ else if (type == ORANGEFS_VFS_OP_LOOKUP)
+ return "OP_LOOKUP";
+ else if (type == ORANGEFS_VFS_OP_CREATE)
+ return "OP_CREATE";
+ else if (type == ORANGEFS_VFS_OP_GETATTR)
+ return "OP_GETATTR";
+ else if (type == ORANGEFS_VFS_OP_REMOVE)
+ return "OP_REMOVE";
+ else if (type == ORANGEFS_VFS_OP_MKDIR)
+ return "OP_MKDIR";
+ else if (type == ORANGEFS_VFS_OP_READDIR)
+ return "OP_READDIR";
+ else if (type == ORANGEFS_VFS_OP_READDIRPLUS)
+ return "OP_READDIRPLUS";
+ else if (type == ORANGEFS_VFS_OP_SETATTR)
+ return "OP_SETATTR";
+ else if (type == ORANGEFS_VFS_OP_SYMLINK)
+ return "OP_SYMLINK";
+ else if (type == ORANGEFS_VFS_OP_RENAME)
+ return "OP_RENAME";
+ else if (type == ORANGEFS_VFS_OP_STATFS)
+ return "OP_STATFS";
+ else if (type == ORANGEFS_VFS_OP_TRUNCATE)
+ return "OP_TRUNCATE";
+ else if (type == ORANGEFS_VFS_OP_MMAP_RA_FLUSH)
+ return "OP_MMAP_RA_FLUSH";
+ else if (type == ORANGEFS_VFS_OP_FS_MOUNT)
+ return "OP_FS_MOUNT";
+ else if (type == ORANGEFS_VFS_OP_FS_UMOUNT)
+ return "OP_FS_UMOUNT";
+ else if (type == ORANGEFS_VFS_OP_GETXATTR)
+ return "OP_GETXATTR";
+ else if (type == ORANGEFS_VFS_OP_SETXATTR)
+ return "OP_SETXATTR";
+ else if (type == ORANGEFS_VFS_OP_LISTXATTR)
+ return "OP_LISTXATTR";
+ else if (type == ORANGEFS_VFS_OP_REMOVEXATTR)
+ return "OP_REMOVEXATTR";
+ else if (type == ORANGEFS_VFS_OP_PARAM)
+ return "OP_PARAM";
+ else if (type == ORANGEFS_VFS_OP_PERF_COUNT)
+ return "OP_PERF_COUNT";
+ else if (type == ORANGEFS_VFS_OP_CANCEL)
+ return "OP_CANCEL";
+ else if (type == ORANGEFS_VFS_OP_FSYNC)
+ return "OP_FSYNC";
+ else if (type == ORANGEFS_VFS_OP_FSKEY)
+ return "OP_FSKEY";
+ }
+ return "OP_UNKNOWN?";
+}
+
+void orangefs_new_tag(struct orangefs_kernel_op_s *op)
+{
+ spin_lock(&next_tag_value_lock);
+ op->tag = next_tag_value++;
+ if (next_tag_value == 0)
+ next_tag_value = 100;
+ spin_unlock(&next_tag_value_lock);
+}
+
+struct orangefs_kernel_op_s *op_alloc(__s32 type)
+{
+ struct orangefs_kernel_op_s *new_op = NULL;
+
+ new_op = kmem_cache_zalloc(op_cache, GFP_KERNEL);
+ if (new_op) {
+ INIT_LIST_HEAD(&new_op->list);
+ spin_lock_init(&new_op->lock);
+ init_completion(&new_op->waitq);
+
+ new_op->upcall.type = ORANGEFS_VFS_OP_INVALID;
+ new_op->downcall.type = ORANGEFS_VFS_OP_INVALID;
+ new_op->downcall.status = -1;
+
+ new_op->op_state = OP_VFS_STATE_UNKNOWN;
+
+ /* initialize the op specific tag and upcall credentials */
+ orangefs_new_tag(new_op);
+ new_op->upcall.type = type;
+ new_op->attempts = 0;
+ gossip_debug(GOSSIP_CACHE_DEBUG,
+ "Alloced OP (%p: %llu %s)\n",
+ new_op,
+ llu(new_op->tag),
+ get_opname_string(new_op));
+
+ new_op->upcall.uid = from_kuid(current_user_ns(),
+ current_fsuid());
+
+ new_op->upcall.gid = from_kgid(current_user_ns(),
+ current_fsgid());
+ } else {
+ gossip_err("op_alloc: kmem_cache_zalloc failed!\n");
+ }
+ return new_op;
+}
+
+void op_release(struct orangefs_kernel_op_s *orangefs_op)
+{
+ if (orangefs_op) {
+ gossip_debug(GOSSIP_CACHE_DEBUG,
+ "Releasing OP (%p: %llu)\n",
+ orangefs_op,
+ llu(orangefs_op->tag));
+ kmem_cache_free(op_cache, orangefs_op);
+ } else {
+ gossip_err("NULL pointer in op_release\n");
+ }
+}
diff --git a/fs/orangefs/orangefs-debug.h b/fs/orangefs/orangefs-debug.h
new file mode 100644
index 000000000..387db17cd
--- /dev/null
+++ b/fs/orangefs/orangefs-debug.h
@@ -0,0 +1,92 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/* This file just defines debugging masks to be used with the gossip
+ * logging utility. All debugging masks for ORANGEFS are kept here to make
+ * sure we don't have collisions.
+ */
+
+#ifndef __ORANGEFS_DEBUG_H
+#define __ORANGEFS_DEBUG_H
+
+#ifdef __KERNEL__
+#include <linux/types.h>
+#else
+#include <stdint.h>
+#endif
+
+#define GOSSIP_NO_DEBUG (__u64)0
+
+#define GOSSIP_SUPER_DEBUG ((__u64)1 << 0)
+#define GOSSIP_INODE_DEBUG ((__u64)1 << 1)
+#define GOSSIP_FILE_DEBUG ((__u64)1 << 2)
+#define GOSSIP_DIR_DEBUG ((__u64)1 << 3)
+#define GOSSIP_UTILS_DEBUG ((__u64)1 << 4)
+#define GOSSIP_WAIT_DEBUG ((__u64)1 << 5)
+#define GOSSIP_ACL_DEBUG ((__u64)1 << 6)
+#define GOSSIP_DCACHE_DEBUG ((__u64)1 << 7)
+#define GOSSIP_DEV_DEBUG ((__u64)1 << 8)
+#define GOSSIP_NAME_DEBUG ((__u64)1 << 9)
+#define GOSSIP_BUFMAP_DEBUG ((__u64)1 << 10)
+#define GOSSIP_CACHE_DEBUG ((__u64)1 << 11)
+#define GOSSIP_DEBUGFS_DEBUG ((__u64)1 << 12)
+#define GOSSIP_XATTR_DEBUG ((__u64)1 << 13)
+#define GOSSIP_INIT_DEBUG ((__u64)1 << 14)
+#define GOSSIP_SYSFS_DEBUG ((__u64)1 << 15)
+
+#define GOSSIP_MAX_NR 16
+#define GOSSIP_MAX_DEBUG (((__u64)1 << GOSSIP_MAX_NR) - 1)
+
+/*function prototypes*/
+__u64 ORANGEFS_kmod_eventlog_to_mask(const char *event_logging);
+__u64 ORANGEFS_debug_eventlog_to_mask(const char *event_logging);
+char *ORANGEFS_debug_mask_to_eventlog(__u64 mask);
+char *ORANGEFS_kmod_mask_to_eventlog(__u64 mask);
+
+/* a private internal type */
+struct __keyword_mask_s {
+ const char *keyword;
+ __u64 mask_val;
+};
+
+/*
+ * Map all kmod keywords to kmod debug masks here. Keep this
+ * structure "packed":
+ *
+ * "all" is always last...
+ *
+ * keyword mask_val index
+ * foo 1 0
+ * bar 2 1
+ * baz 4 2
+ * qux 8 3
+ * . . .
+ */
+static struct __keyword_mask_s s_kmod_keyword_mask_map[] = {
+ {"super", GOSSIP_SUPER_DEBUG},
+ {"inode", GOSSIP_INODE_DEBUG},
+ {"file", GOSSIP_FILE_DEBUG},
+ {"dir", GOSSIP_DIR_DEBUG},
+ {"utils", GOSSIP_UTILS_DEBUG},
+ {"wait", GOSSIP_WAIT_DEBUG},
+ {"acl", GOSSIP_ACL_DEBUG},
+ {"dcache", GOSSIP_DCACHE_DEBUG},
+ {"dev", GOSSIP_DEV_DEBUG},
+ {"name", GOSSIP_NAME_DEBUG},
+ {"bufmap", GOSSIP_BUFMAP_DEBUG},
+ {"cache", GOSSIP_CACHE_DEBUG},
+ {"debugfs", GOSSIP_DEBUGFS_DEBUG},
+ {"xattr", GOSSIP_XATTR_DEBUG},
+ {"init", GOSSIP_INIT_DEBUG},
+ {"sysfs", GOSSIP_SYSFS_DEBUG},
+ {"none", GOSSIP_NO_DEBUG},
+ {"all", GOSSIP_MAX_DEBUG}
+};
+
+static const int num_kmod_keyword_mask_map = (int)
+ (sizeof(s_kmod_keyword_mask_map) / sizeof(struct __keyword_mask_s));
+
+#endif /* __ORANGEFS_DEBUG_H */
diff --git a/fs/orangefs/orangefs-debugfs.c b/fs/orangefs/orangefs-debugfs.c
new file mode 100644
index 000000000..1714a737d
--- /dev/null
+++ b/fs/orangefs/orangefs-debugfs.c
@@ -0,0 +1,454 @@
+/*
+ * What: /sys/kernel/debug/orangefs/debug-help
+ * Date: June 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * List of client and kernel debug keywords.
+ *
+ *
+ * What: /sys/kernel/debug/orangefs/client-debug
+ * Date: June 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * Debug setting for "the client", the userspace
+ * helper for the kernel module.
+ *
+ *
+ * What: /sys/kernel/debug/orangefs/kernel-debug
+ * Date: June 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * Debug setting for the orangefs kernel module.
+ *
+ * Any of the keywords, or comma-separated lists
+ * of keywords, from debug-help can be catted to
+ * client-debug or kernel-debug.
+ *
+ * "none", "all" and "verbose" are special keywords
+ * for client-debug. Setting client-debug to "all"
+ * is kind of like trying to drink water from a
+ * fire hose, "verbose" triggers most of the same
+ * output except for the constant flow of output
+ * from the main wait loop.
+ *
+ * "none" and "all" are similar settings for kernel-debug
+ * no need for a "verbose".
+ */
+#include <linux/debugfs.h>
+#include <linux/slab.h>
+
+#include <linux/uaccess.h>
+
+#include "orangefs-debugfs.h"
+#include "protocol.h"
+#include "orangefs-kernel.h"
+
+static int orangefs_debug_disabled = 1;
+
+static int orangefs_debug_help_open(struct inode *, struct file *);
+
+const struct file_operations debug_help_fops = {
+ .open = orangefs_debug_help_open,
+ .read = seq_read,
+ .release = seq_release,
+ .llseek = seq_lseek,
+};
+
+static void *help_start(struct seq_file *, loff_t *);
+static void *help_next(struct seq_file *, void *, loff_t *);
+static void help_stop(struct seq_file *, void *);
+static int help_show(struct seq_file *, void *);
+
+static const struct seq_operations help_debug_ops = {
+ .start = help_start,
+ .next = help_next,
+ .stop = help_stop,
+ .show = help_show,
+};
+
+/*
+ * Used to protect data in ORANGEFS_KMOD_DEBUG_FILE and
+ * ORANGEFS_KMOD_DEBUG_FILE.
+ */
+static DEFINE_MUTEX(orangefs_debug_lock);
+
+int orangefs_debug_open(struct inode *, struct file *);
+
+static ssize_t orangefs_debug_read(struct file *,
+ char __user *,
+ size_t,
+ loff_t *);
+
+static ssize_t orangefs_debug_write(struct file *,
+ const char __user *,
+ size_t,
+ loff_t *);
+
+static const struct file_operations kernel_debug_fops = {
+ .open = orangefs_debug_open,
+ .read = orangefs_debug_read,
+ .write = orangefs_debug_write,
+ .llseek = generic_file_llseek,
+};
+
+/*
+ * initialize kmod debug operations, create orangefs debugfs dir and
+ * ORANGEFS_KMOD_DEBUG_HELP_FILE.
+ */
+int orangefs_debugfs_init(void)
+{
+
+ int rc = -ENOMEM;
+
+ debug_dir = debugfs_create_dir("orangefs", NULL);
+ if (!debug_dir) {
+ pr_info("%s: debugfs_create_dir failed.\n", __func__);
+ goto out;
+ }
+
+ help_file_dentry = debugfs_create_file(ORANGEFS_KMOD_DEBUG_HELP_FILE,
+ 0444,
+ debug_dir,
+ debug_help_string,
+ &debug_help_fops);
+ if (!help_file_dentry) {
+ pr_info("%s: debugfs_create_file failed.\n", __func__);
+ goto out;
+ }
+
+ orangefs_debug_disabled = 0;
+ rc = 0;
+
+out:
+
+ return rc;
+}
+
+void orangefs_debugfs_cleanup(void)
+{
+ debugfs_remove_recursive(debug_dir);
+}
+
+/* open ORANGEFS_KMOD_DEBUG_HELP_FILE */
+static int orangefs_debug_help_open(struct inode *inode, struct file *file)
+{
+ int rc = -ENODEV;
+ int ret;
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "orangefs_debug_help_open: start\n");
+
+ if (orangefs_debug_disabled)
+ goto out;
+
+ ret = seq_open(file, &help_debug_ops);
+ if (ret)
+ goto out;
+
+ ((struct seq_file *)(file->private_data))->private = inode->i_private;
+
+ rc = 0;
+
+out:
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "orangefs_debug_help_open: rc:%d:\n",
+ rc);
+ return rc;
+}
+
+/*
+ * I think start always gets called again after stop. Start
+ * needs to return NULL when it is done. The whole "payload"
+ * in this case is a single (long) string, so by the second
+ * time we get to start (pos = 1), we're done.
+ */
+static void *help_start(struct seq_file *m, loff_t *pos)
+{
+ void *payload = NULL;
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_start: start\n");
+
+ if (*pos == 0)
+ payload = m->private;
+
+ return payload;
+}
+
+static void *help_next(struct seq_file *m, void *v, loff_t *pos)
+{
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_next: start\n");
+
+ return NULL;
+}
+
+static void help_stop(struct seq_file *m, void *p)
+{
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_stop: start\n");
+}
+
+static int help_show(struct seq_file *m, void *v)
+{
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "help_show: start\n");
+
+ seq_puts(m, v);
+
+ return 0;
+}
+
+/*
+ * initialize the kernel-debug file.
+ */
+int orangefs_kernel_debug_init(void)
+{
+ int rc = -ENOMEM;
+ struct dentry *ret;
+ char *k_buffer = NULL;
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
+
+ k_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+ if (!k_buffer)
+ goto out;
+
+ if (strlen(kernel_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
+ strcpy(k_buffer, kernel_debug_string);
+ strcat(k_buffer, "\n");
+ } else {
+ strcpy(k_buffer, "none\n");
+ pr_info("%s: overflow 1!\n", __func__);
+ }
+
+ ret = debugfs_create_file(ORANGEFS_KMOD_DEBUG_FILE,
+ 0444,
+ debug_dir,
+ k_buffer,
+ &kernel_debug_fops);
+ if (!ret) {
+ pr_info("%s: failed to create %s.\n",
+ __func__,
+ ORANGEFS_KMOD_DEBUG_FILE);
+ goto out;
+ }
+
+ rc = 0;
+
+out:
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
+ return rc;
+}
+
+/*
+ * initialize the client-debug file.
+ */
+int orangefs_client_debug_init(void)
+{
+
+ int rc = -ENOMEM;
+ char *c_buffer = NULL;
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: start\n", __func__);
+
+ c_buffer = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+ if (!c_buffer)
+ goto out;
+
+ if (strlen(client_debug_string) + 1 < ORANGEFS_MAX_DEBUG_STRING_LEN) {
+ strcpy(c_buffer, client_debug_string);
+ strcat(c_buffer, "\n");
+ } else {
+ strcpy(c_buffer, "none\n");
+ pr_info("%s: overflow! 2\n", __func__);
+ }
+
+ client_debug_dentry = debugfs_create_file(ORANGEFS_CLIENT_DEBUG_FILE,
+ 0444,
+ debug_dir,
+ c_buffer,
+ &kernel_debug_fops);
+ if (!client_debug_dentry) {
+ pr_info("%s: failed to create updated %s.\n",
+ __func__,
+ ORANGEFS_CLIENT_DEBUG_FILE);
+ goto out;
+ }
+
+ rc = 0;
+
+out:
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "%s: rc:%d:\n", __func__, rc);
+ return rc;
+}
+
+/* open ORANGEFS_KMOD_DEBUG_FILE or ORANGEFS_CLIENT_DEBUG_FILE.*/
+int orangefs_debug_open(struct inode *inode, struct file *file)
+{
+ int rc = -ENODEV;
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "%s: orangefs_debug_disabled: %d\n",
+ __func__,
+ orangefs_debug_disabled);
+
+ if (orangefs_debug_disabled)
+ goto out;
+
+ rc = 0;
+ mutex_lock(&orangefs_debug_lock);
+ file->private_data = inode->i_private;
+ mutex_unlock(&orangefs_debug_lock);
+
+out:
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "orangefs_debug_open: rc: %d\n",
+ rc);
+ return rc;
+}
+
+static ssize_t orangefs_debug_read(struct file *file,
+ char __user *ubuf,
+ size_t count,
+ loff_t *ppos)
+{
+ char *buf;
+ int sprintf_ret;
+ ssize_t read_ret = -ENOMEM;
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG, "orangefs_debug_read: start\n");
+
+ buf = kmalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+ if (!buf)
+ goto out;
+
+ mutex_lock(&orangefs_debug_lock);
+ sprintf_ret = sprintf(buf, "%s", (char *)file->private_data);
+ mutex_unlock(&orangefs_debug_lock);
+
+ read_ret = simple_read_from_buffer(ubuf, count, ppos, buf, sprintf_ret);
+
+ kfree(buf);
+
+out:
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "orangefs_debug_read: ret: %zu\n",
+ read_ret);
+
+ return read_ret;
+}
+
+static ssize_t orangefs_debug_write(struct file *file,
+ const char __user *ubuf,
+ size_t count,
+ loff_t *ppos)
+{
+ char *buf;
+ int rc = -EFAULT;
+ size_t silly = 0;
+ char *debug_string;
+ struct orangefs_kernel_op_s *new_op = NULL;
+ struct client_debug_mask c_mask = { NULL, 0, 0 };
+
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "orangefs_debug_write: %s\n",
+ file->f_path.dentry->d_name.name);
+
+ /*
+ * Thwart users who try to jamb a ridiculous number
+ * of bytes into the debug file...
+ */
+ if (count > ORANGEFS_MAX_DEBUG_STRING_LEN + 1) {
+ silly = count;
+ count = ORANGEFS_MAX_DEBUG_STRING_LEN + 1;
+ }
+
+ buf = kzalloc(ORANGEFS_MAX_DEBUG_STRING_LEN, GFP_KERNEL);
+ if (!buf)
+ goto out;
+
+ if (copy_from_user(buf, ubuf, count - 1)) {
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "%s: copy_from_user failed!\n",
+ __func__);
+ goto out;
+ }
+
+ /*
+ * Map the keyword string from userspace into a valid debug mask.
+ * The mapping process involves mapping the human-inputted string
+ * into a valid mask, and then rebuilding the string from the
+ * verified valid mask.
+ *
+ * A service operation is required to set a new client-side
+ * debug mask.
+ */
+ if (!strcmp(file->f_path.dentry->d_name.name,
+ ORANGEFS_KMOD_DEBUG_FILE)) {
+ debug_string_to_mask(buf, &gossip_debug_mask, 0);
+ debug_mask_to_string(&gossip_debug_mask, 0);
+ debug_string = kernel_debug_string;
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "New kernel debug string is %s\n",
+ kernel_debug_string);
+ } else {
+ /* Can't reset client debug mask if client is not running. */
+ if (is_daemon_in_service()) {
+ pr_info("%s: Client not running :%d:\n",
+ __func__,
+ is_daemon_in_service());
+ goto out;
+ }
+
+ debug_string_to_mask(buf, &c_mask, 1);
+ debug_mask_to_string(&c_mask, 1);
+ debug_string = client_debug_string;
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_PARAM);
+ if (!new_op) {
+ pr_info("%s: op_alloc failed!\n", __func__);
+ goto out;
+ }
+
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_TWO_MASK_VALUES;
+ new_op->upcall.req.param.type = ORANGEFS_PARAM_REQUEST_SET;
+ memset(new_op->upcall.req.param.s_value,
+ 0,
+ ORANGEFS_MAX_DEBUG_STRING_LEN);
+ sprintf(new_op->upcall.req.param.s_value,
+ "%llx %llx\n",
+ c_mask.mask1,
+ c_mask.mask2);
+
+ /* service_operation returns 0 on success... */
+ rc = service_operation(new_op,
+ "orangefs_param",
+ ORANGEFS_OP_INTERRUPTIBLE);
+
+ if (rc)
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "%s: service_operation failed! rc:%d:\n",
+ __func__,
+ rc);
+
+ op_release(new_op);
+ }
+
+ mutex_lock(&orangefs_debug_lock);
+ memset(file->f_inode->i_private, 0, ORANGEFS_MAX_DEBUG_STRING_LEN);
+ sprintf((char *)file->f_inode->i_private, "%s\n", debug_string);
+ mutex_unlock(&orangefs_debug_lock);
+
+ *ppos += count;
+ if (silly)
+ rc = silly;
+ else
+ rc = count;
+
+out:
+ gossip_debug(GOSSIP_DEBUGFS_DEBUG,
+ "orangefs_debug_write: rc: %d\n",
+ rc);
+ kfree(buf);
+ return rc;
+}
diff --git a/fs/orangefs/orangefs-debugfs.h b/fs/orangefs/orangefs-debugfs.h
new file mode 100644
index 000000000..e4828c0e3
--- /dev/null
+++ b/fs/orangefs/orangefs-debugfs.h
@@ -0,0 +1,3 @@
+int orangefs_debugfs_init(void);
+int orangefs_kernel_debug_init(void);
+void orangefs_debugfs_cleanup(void);
diff --git a/fs/orangefs/orangefs-dev-proto.h b/fs/orangefs/orangefs-dev-proto.h
new file mode 100644
index 000000000..9eac9d9a3
--- /dev/null
+++ b/fs/orangefs/orangefs-dev-proto.h
@@ -0,0 +1,62 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#ifndef _ORANGEFS_DEV_PROTO_H
+#define _ORANGEFS_DEV_PROTO_H
+
+/*
+ * types and constants shared between user space and kernel space for
+ * device interaction using a common protocol
+ */
+
+/*
+ * valid orangefs kernel operation types
+ */
+#define ORANGEFS_VFS_OP_INVALID 0xFF000000
+#define ORANGEFS_VFS_OP_FILE_IO 0xFF000001
+#define ORANGEFS_VFS_OP_LOOKUP 0xFF000002
+#define ORANGEFS_VFS_OP_CREATE 0xFF000003
+#define ORANGEFS_VFS_OP_GETATTR 0xFF000004
+#define ORANGEFS_VFS_OP_REMOVE 0xFF000005
+#define ORANGEFS_VFS_OP_MKDIR 0xFF000006
+#define ORANGEFS_VFS_OP_READDIR 0xFF000007
+#define ORANGEFS_VFS_OP_SETATTR 0xFF000008
+#define ORANGEFS_VFS_OP_SYMLINK 0xFF000009
+#define ORANGEFS_VFS_OP_RENAME 0xFF00000A
+#define ORANGEFS_VFS_OP_STATFS 0xFF00000B
+#define ORANGEFS_VFS_OP_TRUNCATE 0xFF00000C
+#define ORANGEFS_VFS_OP_MMAP_RA_FLUSH 0xFF00000D
+#define ORANGEFS_VFS_OP_FS_MOUNT 0xFF00000E
+#define ORANGEFS_VFS_OP_FS_UMOUNT 0xFF00000F
+#define ORANGEFS_VFS_OP_GETXATTR 0xFF000010
+#define ORANGEFS_VFS_OP_SETXATTR 0xFF000011
+#define ORANGEFS_VFS_OP_LISTXATTR 0xFF000012
+#define ORANGEFS_VFS_OP_REMOVEXATTR 0xFF000013
+#define ORANGEFS_VFS_OP_PARAM 0xFF000014
+#define ORANGEFS_VFS_OP_PERF_COUNT 0xFF000015
+#define ORANGEFS_VFS_OP_CANCEL 0xFF00EE00
+#define ORANGEFS_VFS_OP_FSYNC 0xFF00EE01
+#define ORANGEFS_VFS_OP_FSKEY 0xFF00EE02
+#define ORANGEFS_VFS_OP_READDIRPLUS 0xFF00EE03
+
+/*
+ * Misc constants. Please retain them as multiples of 8!
+ * Otherwise 32-64 bit interactions will be messed up :)
+ */
+#define ORANGEFS_MAX_DEBUG_STRING_LEN 0x00000400
+#define ORANGEFS_MAX_DEBUG_ARRAY_LEN 0x00000800
+
+/*
+ * The maximum number of directory entries in a single request is 96.
+ * XXX: Why can this not be higher. The client-side code can handle up to 512.
+ * XXX: What happens if we expect more than the client can return?
+ */
+#define ORANGEFS_MAX_DIRENT_COUNT_READDIR 96
+
+#include "upcall.h"
+#include "downcall.h"
+
+#endif
diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h
new file mode 100644
index 000000000..a9925e296
--- /dev/null
+++ b/fs/orangefs/orangefs-kernel.h
@@ -0,0 +1,623 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * The ORANGEFS Linux kernel support allows ORANGEFS volumes to be mounted and
+ * accessed through the Linux VFS (i.e. using standard I/O system calls).
+ * This support is only needed on clients that wish to mount the file system.
+ *
+ */
+
+/*
+ * Declarations and macros for the ORANGEFS Linux kernel support.
+ */
+
+#ifndef __ORANGEFSKERNEL_H
+#define __ORANGEFSKERNEL_H
+
+#include <linux/kernel.h>
+#include <linux/moduleparam.h>
+#include <linux/statfs.h>
+#include <linux/backing-dev.h>
+#include <linux/device.h>
+#include <linux/mpage.h>
+#include <linux/namei.h>
+#include <linux/errno.h>
+#include <linux/init.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+#include <linux/fs.h>
+#include <linux/vmalloc.h>
+
+#include <linux/aio.h>
+#include <linux/posix_acl.h>
+#include <linux/posix_acl_xattr.h>
+#include <linux/compat.h>
+#include <linux/mount.h>
+#include <linux/uaccess.h>
+#include <linux/atomic.h>
+#include <linux/uio.h>
+#include <linux/sched.h>
+#include <linux/mm.h>
+#include <linux/wait.h>
+#include <linux/dcache.h>
+#include <linux/pagemap.h>
+#include <linux/poll.h>
+#include <linux/rwsem.h>
+#include <linux/xattr.h>
+#include <linux/exportfs.h>
+
+#include <asm/unaligned.h>
+
+#include "orangefs-dev-proto.h"
+
+#ifdef ORANGEFS_KERNEL_DEBUG
+#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS 10
+#else
+#define ORANGEFS_DEFAULT_OP_TIMEOUT_SECS 20
+#endif
+
+#define ORANGEFS_BUFMAP_WAIT_TIMEOUT_SECS 30
+
+#define ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS 900 /* 15 minutes */
+
+#define ORANGEFS_REQDEVICE_NAME "pvfs2-req"
+
+#define ORANGEFS_DEVREQ_MAGIC 0x20030529
+#define ORANGEFS_LINK_MAX 0x000000FF
+#define ORANGEFS_PURGE_RETRY_COUNT 0x00000005
+#define ORANGEFS_MAX_NUM_OPTIONS 0x00000004
+#define ORANGEFS_MAX_MOUNT_OPT_LEN 0x00000080
+#define ORANGEFS_MAX_FSKEY_LEN 64
+
+#define MAX_DEV_REQ_UPSIZE (2 * sizeof(__s32) + \
+sizeof(__u64) + sizeof(struct orangefs_upcall_s))
+#define MAX_DEV_REQ_DOWNSIZE (2 * sizeof(__s32) + \
+sizeof(__u64) + sizeof(struct orangefs_downcall_s))
+
+/*
+ * valid orangefs kernel operation states
+ *
+ * unknown - op was just initialized
+ * waiting - op is on request_list (upward bound)
+ * inprogr - op is in progress (waiting for downcall)
+ * serviced - op has matching downcall; ok
+ * purged - op has to start a timer since client-core
+ * exited uncleanly before servicing op
+ * given up - submitter has given up waiting for it
+ */
+enum orangefs_vfs_op_states {
+ OP_VFS_STATE_UNKNOWN = 0,
+ OP_VFS_STATE_WAITING = 1,
+ OP_VFS_STATE_INPROGR = 2,
+ OP_VFS_STATE_SERVICED = 4,
+ OP_VFS_STATE_PURGED = 8,
+ OP_VFS_STATE_GIVEN_UP = 16,
+};
+
+/*
+ * An array of client_debug_mask will be built to hold debug keyword/mask
+ * values fetched from userspace.
+ */
+struct client_debug_mask {
+ char *keyword;
+ __u64 mask1;
+ __u64 mask2;
+};
+
+/*
+ * orangefs kernel memory related flags
+ */
+
+#if ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB))
+#define ORANGEFS_CACHE_CREATE_FLAGS SLAB_RED_ZONE
+#else
+#define ORANGEFS_CACHE_CREATE_FLAGS 0
+#endif /* ((defined ORANGEFS_KERNEL_DEBUG) && (defined CONFIG_DEBUG_SLAB)) */
+
+/* orangefs xattr and acl related defines */
+#define ORANGEFS_XATTR_INDEX_POSIX_ACL_ACCESS 1
+#define ORANGEFS_XATTR_INDEX_POSIX_ACL_DEFAULT 2
+#define ORANGEFS_XATTR_INDEX_TRUSTED 3
+#define ORANGEFS_XATTR_INDEX_DEFAULT 4
+
+#define ORANGEFS_XATTR_NAME_ACL_ACCESS XATTR_NAME_POSIX_ACL_ACCESS
+#define ORANGEFS_XATTR_NAME_ACL_DEFAULT XATTR_NAME_POSIX_ACL_DEFAULT
+#define ORANGEFS_XATTR_NAME_TRUSTED_PREFIX "trusted."
+#define ORANGEFS_XATTR_NAME_DEFAULT_PREFIX ""
+
+/* these functions are defined in orangefs-utils.c */
+int orangefs_prepare_cdm_array(char *debug_array_string);
+int orangefs_prepare_debugfs_help_string(int);
+
+/* defined in orangefs-debugfs.c */
+int orangefs_client_debug_init(void);
+
+void debug_string_to_mask(char *, void *, int);
+void do_c_mask(int, char *, struct client_debug_mask **);
+void do_k_mask(int, char *, __u64 **);
+
+void debug_mask_to_string(void *, int);
+void do_k_string(void *, int);
+void do_c_string(void *, int);
+int check_amalgam_keyword(void *, int);
+int keyword_is_amalgam(char *);
+
+/*these variables are defined in orangefs-mod.c */
+extern char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+extern char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+extern char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+extern unsigned int kernel_mask_set_mod_init;
+
+extern int orangefs_init_acl(struct inode *inode, struct inode *dir);
+extern const struct xattr_handler *orangefs_xattr_handlers[];
+
+extern struct posix_acl *orangefs_get_acl(struct inode *inode, int type);
+extern int orangefs_set_acl(struct inode *inode, struct posix_acl *acl, int type);
+
+/*
+ * Redefine xtvec structure so that we could move helper functions out of
+ * the define
+ */
+struct xtvec {
+ __kernel_off_t xtv_off; /* must be off_t */
+ __kernel_size_t xtv_len; /* must be size_t */
+};
+
+/*
+ * orangefs data structures
+ */
+struct orangefs_kernel_op_s {
+ enum orangefs_vfs_op_states op_state;
+ __u64 tag;
+
+ /*
+ * Set uses_shared_memory to non zero if this operation uses
+ * shared memory. If true, then a retry on the op must also
+ * get a new shared memory buffer and re-populate it.
+ * Cancels don't care - it only matters for service_operation()
+ * retry logics and cancels don't go through it anymore. It
+ * safely stays non-zero when we use it as slot_to_free.
+ */
+ union {
+ int uses_shared_memory;
+ int slot_to_free;
+ };
+
+ struct orangefs_upcall_s upcall;
+ struct orangefs_downcall_s downcall;
+
+ struct completion waitq;
+ spinlock_t lock;
+
+ int attempts;
+
+ struct list_head list;
+};
+
+#define set_op_state_waiting(op) ((op)->op_state = OP_VFS_STATE_WAITING)
+#define set_op_state_inprogress(op) ((op)->op_state = OP_VFS_STATE_INPROGR)
+#define set_op_state_given_up(op) ((op)->op_state = OP_VFS_STATE_GIVEN_UP)
+static inline void set_op_state_serviced(struct orangefs_kernel_op_s *op)
+{
+ op->op_state = OP_VFS_STATE_SERVICED;
+ complete(&op->waitq);
+}
+
+#define op_state_waiting(op) ((op)->op_state & OP_VFS_STATE_WAITING)
+#define op_state_in_progress(op) ((op)->op_state & OP_VFS_STATE_INPROGR)
+#define op_state_serviced(op) ((op)->op_state & OP_VFS_STATE_SERVICED)
+#define op_state_purged(op) ((op)->op_state & OP_VFS_STATE_PURGED)
+#define op_state_given_up(op) ((op)->op_state & OP_VFS_STATE_GIVEN_UP)
+#define op_is_cancel(op) ((op)->upcall.type == ORANGEFS_VFS_OP_CANCEL)
+
+void op_release(struct orangefs_kernel_op_s *op);
+
+extern void orangefs_bufmap_put(int);
+static inline void put_cancel(struct orangefs_kernel_op_s *op)
+{
+ orangefs_bufmap_put(op->slot_to_free);
+ op_release(op);
+}
+
+static inline void set_op_state_purged(struct orangefs_kernel_op_s *op)
+{
+ spin_lock(&op->lock);
+ if (unlikely(op_is_cancel(op))) {
+ list_del_init(&op->list);
+ spin_unlock(&op->lock);
+ put_cancel(op);
+ } else {
+ op->op_state |= OP_VFS_STATE_PURGED;
+ complete(&op->waitq);
+ spin_unlock(&op->lock);
+ }
+}
+
+/* per inode private orangefs info */
+struct orangefs_inode_s {
+ struct orangefs_object_kref refn;
+ char link_target[ORANGEFS_NAME_MAX];
+ __s64 blksize;
+ /*
+ * Reading/Writing Extended attributes need to acquire the appropriate
+ * reader/writer semaphore on the orangefs_inode_s structure.
+ */
+ struct rw_semaphore xattr_sem;
+
+ struct inode vfs_inode;
+ sector_t last_failed_block_index_read;
+
+ /*
+ * State of in-memory attributes not yet flushed to disk associated
+ * with this object
+ */
+ unsigned long pinode_flags;
+};
+
+#define P_ATIME_FLAG 0
+#define P_MTIME_FLAG 1
+#define P_CTIME_FLAG 2
+#define P_MODE_FLAG 3
+
+#define ClearAtimeFlag(pinode) clear_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
+#define SetAtimeFlag(pinode) set_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
+#define AtimeFlag(pinode) test_bit(P_ATIME_FLAG, &(pinode)->pinode_flags)
+
+#define ClearMtimeFlag(pinode) clear_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
+#define SetMtimeFlag(pinode) set_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
+#define MtimeFlag(pinode) test_bit(P_MTIME_FLAG, &(pinode)->pinode_flags)
+
+#define ClearCtimeFlag(pinode) clear_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
+#define SetCtimeFlag(pinode) set_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
+#define CtimeFlag(pinode) test_bit(P_CTIME_FLAG, &(pinode)->pinode_flags)
+
+#define ClearModeFlag(pinode) clear_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
+#define SetModeFlag(pinode) set_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
+#define ModeFlag(pinode) test_bit(P_MODE_FLAG, &(pinode)->pinode_flags)
+
+/* per superblock private orangefs info */
+struct orangefs_sb_info_s {
+ struct orangefs_khandle root_khandle;
+ __s32 fs_id;
+ int id;
+ int flags;
+#define ORANGEFS_OPT_INTR 0x01
+#define ORANGEFS_OPT_LOCAL_LOCK 0x02
+ char devname[ORANGEFS_MAX_SERVER_ADDR_LEN];
+ struct super_block *sb;
+ int mount_pending;
+ struct list_head list;
+};
+
+/*
+ * structure that holds the state of any async I/O operation issued
+ * through the VFS. Needed especially to handle cancellation requests
+ * or even completion notification so that the VFS client-side daemon
+ * can free up its vfs_request slots.
+ */
+struct orangefs_kiocb_s {
+ /* the pointer to the task that initiated the AIO */
+ struct task_struct *tsk;
+
+ /* pointer to the kiocb that kicked this operation */
+ struct kiocb *kiocb;
+
+ /* buffer index that was used for the I/O */
+ struct orangefs_bufmap *bufmap;
+ int buffer_index;
+
+ /* orangefs kernel operation type */
+ struct orangefs_kernel_op_s *op;
+
+ /* The user space buffers from/to which I/O is being staged */
+ struct iovec *iov;
+
+ /* number of elements in the iovector */
+ unsigned long nr_segs;
+
+ /* set to indicate the type of the operation */
+ int rw;
+
+ /* file offset */
+ loff_t offset;
+
+ /* and the count in bytes */
+ size_t bytes_to_be_copied;
+
+ ssize_t bytes_copied;
+ int needs_cleanup;
+};
+
+struct orangefs_stats {
+ unsigned long cache_hits;
+ unsigned long cache_misses;
+ unsigned long reads;
+ unsigned long writes;
+};
+
+extern struct orangefs_stats g_orangefs_stats;
+
+/*
+ * NOTE: See Documentation/filesystems/porting for information
+ * on implementing FOO_I and properly accessing fs private data
+ */
+static inline struct orangefs_inode_s *ORANGEFS_I(struct inode *inode)
+{
+ return container_of(inode, struct orangefs_inode_s, vfs_inode);
+}
+
+static inline struct orangefs_sb_info_s *ORANGEFS_SB(struct super_block *sb)
+{
+ return (struct orangefs_sb_info_s *) sb->s_fs_info;
+}
+
+/* ino_t descends from "unsigned long", 8 bytes, 64 bits. */
+static inline ino_t orangefs_khandle_to_ino(struct orangefs_khandle *khandle)
+{
+ union {
+ unsigned char u[8];
+ __u64 ino;
+ } ihandle;
+
+ ihandle.u[0] = khandle->u[0] ^ khandle->u[4];
+ ihandle.u[1] = khandle->u[1] ^ khandle->u[5];
+ ihandle.u[2] = khandle->u[2] ^ khandle->u[6];
+ ihandle.u[3] = khandle->u[3] ^ khandle->u[7];
+ ihandle.u[4] = khandle->u[12] ^ khandle->u[8];
+ ihandle.u[5] = khandle->u[13] ^ khandle->u[9];
+ ihandle.u[6] = khandle->u[14] ^ khandle->u[10];
+ ihandle.u[7] = khandle->u[15] ^ khandle->u[11];
+
+ return ihandle.ino;
+}
+
+static inline struct orangefs_khandle *get_khandle_from_ino(struct inode *inode)
+{
+ return &(ORANGEFS_I(inode)->refn.khandle);
+}
+
+static inline __s32 get_fsid_from_ino(struct inode *inode)
+{
+ return ORANGEFS_I(inode)->refn.fs_id;
+}
+
+static inline ino_t get_ino_from_khandle(struct inode *inode)
+{
+ struct orangefs_khandle *khandle;
+ ino_t ino;
+
+ khandle = get_khandle_from_ino(inode);
+ ino = orangefs_khandle_to_ino(khandle);
+ return ino;
+}
+
+static inline ino_t get_parent_ino_from_dentry(struct dentry *dentry)
+{
+ return get_ino_from_khandle(dentry->d_parent->d_inode);
+}
+
+static inline int is_root_handle(struct inode *inode)
+{
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s: root handle: %pU, this handle: %pU:\n",
+ __func__,
+ &ORANGEFS_SB(inode->i_sb)->root_khandle,
+ get_khandle_from_ino(inode));
+
+ if (ORANGEFS_khandle_cmp(&(ORANGEFS_SB(inode->i_sb)->root_khandle),
+ get_khandle_from_ino(inode)))
+ return 0;
+ else
+ return 1;
+}
+
+static inline int match_handle(struct orangefs_khandle resp_handle,
+ struct inode *inode)
+{
+ gossip_debug(GOSSIP_DCACHE_DEBUG,
+ "%s: one handle: %pU, another handle:%pU:\n",
+ __func__,
+ &resp_handle,
+ get_khandle_from_ino(inode));
+
+ if (ORANGEFS_khandle_cmp(&resp_handle, get_khandle_from_ino(inode)))
+ return 0;
+ else
+ return 1;
+}
+
+/*
+ * defined in orangefs-cache.c
+ */
+int op_cache_initialize(void);
+int op_cache_finalize(void);
+struct orangefs_kernel_op_s *op_alloc(__s32 type);
+void orangefs_new_tag(struct orangefs_kernel_op_s *op);
+char *get_opname_string(struct orangefs_kernel_op_s *new_op);
+
+int orangefs_inode_cache_initialize(void);
+int orangefs_inode_cache_finalize(void);
+
+/*
+ * defined in orangefs-mod.c
+ */
+void purge_inprogress_ops(void);
+
+/*
+ * defined in waitqueue.c
+ */
+void purge_waiting_ops(void);
+
+/*
+ * defined in super.c
+ */
+struct dentry *orangefs_mount(struct file_system_type *fst,
+ int flags,
+ const char *devname,
+ void *data);
+
+void orangefs_kill_sb(struct super_block *sb);
+int orangefs_remount(struct orangefs_sb_info_s *);
+
+int fsid_key_table_initialize(void);
+void fsid_key_table_finalize(void);
+
+/*
+ * defined in inode.c
+ */
+__u32 convert_to_orangefs_mask(unsigned long lite_mask);
+struct inode *orangefs_new_inode(struct super_block *sb,
+ struct inode *dir,
+ int mode,
+ dev_t dev,
+ struct orangefs_object_kref *ref);
+
+int orangefs_setattr(struct dentry *dentry, struct iattr *iattr);
+
+int orangefs_getattr(struct vfsmount *mnt,
+ struct dentry *dentry,
+ struct kstat *kstat);
+
+int orangefs_permission(struct inode *inode, int mask);
+
+/*
+ * defined in xattr.c
+ */
+int orangefs_setxattr(struct dentry *dentry,
+ const char *name,
+ const void *value,
+ size_t size,
+ int flags);
+
+ssize_t orangefs_getxattr(struct dentry *dentry,
+ const char *name,
+ void *buffer,
+ size_t size);
+
+ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size);
+
+/*
+ * defined in namei.c
+ */
+struct inode *orangefs_iget(struct super_block *sb,
+ struct orangefs_object_kref *ref);
+
+ssize_t orangefs_inode_read(struct inode *inode,
+ struct iov_iter *iter,
+ loff_t *offset,
+ loff_t readahead_size);
+
+/*
+ * defined in devorangefs-req.c
+ */
+int orangefs_dev_init(void);
+void orangefs_dev_cleanup(void);
+int is_daemon_in_service(void);
+bool __is_daemon_in_service(void);
+
+/*
+ * defined in orangefs-utils.c
+ */
+__s32 fsid_of_op(struct orangefs_kernel_op_s *op);
+
+int orangefs_flush_inode(struct inode *inode);
+
+ssize_t orangefs_inode_getxattr(struct inode *inode,
+ const char *prefix,
+ const char *name,
+ void *buffer,
+ size_t size);
+
+int orangefs_inode_setxattr(struct inode *inode,
+ const char *prefix,
+ const char *name,
+ const void *value,
+ size_t size,
+ int flags);
+
+int orangefs_inode_getattr(struct inode *inode, int new, int size);
+
+int orangefs_inode_check_changed(struct inode *inode);
+
+int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr);
+
+void orangefs_make_bad_inode(struct inode *inode);
+
+int orangefs_unmount_sb(struct super_block *sb);
+
+bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op);
+
+int orangefs_normalize_to_errno(__s32 error_code);
+
+extern struct mutex devreq_mutex;
+extern struct mutex request_mutex;
+extern int debug;
+extern int op_timeout_secs;
+extern int slot_timeout_secs;
+extern struct list_head orangefs_superblocks;
+extern spinlock_t orangefs_superblocks_lock;
+extern struct list_head orangefs_request_list;
+extern spinlock_t orangefs_request_list_lock;
+extern wait_queue_head_t orangefs_request_list_waitq;
+extern struct list_head *htable_ops_in_progress;
+extern spinlock_t htable_ops_in_progress_lock;
+extern int hash_table_size;
+
+extern const struct address_space_operations orangefs_address_operations;
+extern struct backing_dev_info orangefs_backing_dev_info;
+extern struct inode_operations orangefs_file_inode_operations;
+extern const struct file_operations orangefs_file_operations;
+extern struct inode_operations orangefs_symlink_inode_operations;
+extern struct inode_operations orangefs_dir_inode_operations;
+extern const struct file_operations orangefs_dir_operations;
+extern const struct dentry_operations orangefs_dentry_operations;
+extern const struct file_operations orangefs_devreq_file_operations;
+
+extern wait_queue_head_t orangefs_bufmap_init_waitq;
+
+/*
+ * misc convenience macros
+ */
+
+#define ORANGEFS_OP_INTERRUPTIBLE 1 /* service_operation() is interruptible */
+#define ORANGEFS_OP_PRIORITY 2 /* service_operation() is high priority */
+#define ORANGEFS_OP_CANCELLATION 4 /* this is a cancellation */
+#define ORANGEFS_OP_NO_MUTEX 8 /* don't acquire request_mutex */
+#define ORANGEFS_OP_ASYNC 16 /* Queue it, but don't wait */
+
+int service_operation(struct orangefs_kernel_op_s *op,
+ const char *op_name,
+ int flags);
+
+#define get_interruptible_flag(inode) \
+ ((ORANGEFS_SB(inode->i_sb)->flags & ORANGEFS_OPT_INTR) ? \
+ ORANGEFS_OP_INTERRUPTIBLE : 0)
+
+#define fill_default_sys_attrs(sys_attr, type, mode) \
+do { \
+ sys_attr.owner = from_kuid(current_user_ns(), current_fsuid()); \
+ sys_attr.group = from_kgid(current_user_ns(), current_fsgid()); \
+ sys_attr.perms = ORANGEFS_util_translate_mode(mode); \
+ sys_attr.mtime = 0; \
+ sys_attr.atime = 0; \
+ sys_attr.ctime = 0; \
+ sys_attr.mask = ORANGEFS_ATTR_SYS_ALL_SETABLE; \
+} while (0)
+
+static inline void orangefs_i_size_write(struct inode *inode, loff_t i_size)
+{
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ mutex_lock(&inode->i_mutex);
+#endif
+ i_size_write(inode, i_size);
+#if BITS_PER_LONG == 32 && defined(CONFIG_SMP)
+ mutex_unlock(&inode->i_mutex);
+#endif
+}
+
+#endif /* __ORANGEFSKERNEL_H */
diff --git a/fs/orangefs/orangefs-mod.c b/fs/orangefs/orangefs-mod.c
new file mode 100644
index 000000000..6f072a8c0
--- /dev/null
+++ b/fs/orangefs/orangefs-mod.c
@@ -0,0 +1,293 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * Changes by Acxiom Corporation to add proc file handler for pvfs2 client
+ * parameters, Copyright Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-debugfs.h"
+#include "orangefs-sysfs.h"
+
+/* ORANGEFS_VERSION is a ./configure define */
+#ifndef ORANGEFS_VERSION
+#define ORANGEFS_VERSION "upstream"
+#endif
+
+/*
+ * global variables declared here
+ */
+
+/* array of client debug keyword/mask values */
+struct client_debug_mask *cdm_array;
+int cdm_element_count;
+
+char kernel_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN] = "none";
+char client_debug_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+char client_debug_array_string[ORANGEFS_MAX_DEBUG_STRING_LEN];
+
+char *debug_help_string;
+int help_string_initialized;
+struct dentry *help_file_dentry;
+struct dentry *client_debug_dentry;
+struct dentry *debug_dir;
+int client_verbose_index;
+int client_all_index;
+struct orangefs_stats g_orangefs_stats;
+
+/* the size of the hash tables for ops in progress */
+int hash_table_size = 509;
+
+static ulong module_parm_debug_mask;
+__u64 gossip_debug_mask;
+struct client_debug_mask client_debug_mask = { NULL, 0, 0 };
+unsigned int kernel_mask_set_mod_init; /* implicitly false */
+int op_timeout_secs = ORANGEFS_DEFAULT_OP_TIMEOUT_SECS;
+int slot_timeout_secs = ORANGEFS_DEFAULT_SLOT_TIMEOUT_SECS;
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("ORANGEFS Development Team");
+MODULE_DESCRIPTION("The Linux Kernel VFS interface to ORANGEFS");
+MODULE_PARM_DESC(module_parm_debug_mask, "debugging level (see orangefs-debug.h for values)");
+MODULE_PARM_DESC(op_timeout_secs, "Operation timeout in seconds");
+MODULE_PARM_DESC(slot_timeout_secs, "Slot timeout in seconds");
+MODULE_PARM_DESC(hash_table_size,
+ "size of hash table for operations in progress");
+
+static struct file_system_type orangefs_fs_type = {
+ .name = "pvfs2",
+ .mount = orangefs_mount,
+ .kill_sb = orangefs_kill_sb,
+ .owner = THIS_MODULE,
+};
+
+module_param(hash_table_size, int, 0);
+module_param(module_parm_debug_mask, ulong, 0644);
+module_param(op_timeout_secs, int, 0);
+module_param(slot_timeout_secs, int, 0);
+
+/* synchronizes the request device file */
+DEFINE_MUTEX(devreq_mutex);
+
+/*
+ * Blocks non-priority requests from being queued for servicing. This
+ * could be used for protecting the request list data structure, but
+ * for now it's only being used to stall the op addition to the request
+ * list
+ */
+DEFINE_MUTEX(request_mutex);
+
+/* hash table for storing operations waiting for matching downcall */
+struct list_head *htable_ops_in_progress;
+DEFINE_SPINLOCK(htable_ops_in_progress_lock);
+
+/* list for queueing upcall operations */
+LIST_HEAD(orangefs_request_list);
+
+/* used to protect the above orangefs_request_list */
+DEFINE_SPINLOCK(orangefs_request_list_lock);
+
+/* used for incoming request notification */
+DECLARE_WAIT_QUEUE_HEAD(orangefs_request_list_waitq);
+
+static int __init orangefs_init(void)
+{
+ int ret = -1;
+ __u32 i = 0;
+
+ /* convert input debug mask to a 64-bit unsigned integer */
+ gossip_debug_mask = (unsigned long long) module_parm_debug_mask;
+
+ /*
+ * set the kernel's gossip debug string; invalid mask values will
+ * be ignored.
+ */
+ debug_mask_to_string(&gossip_debug_mask, 0);
+
+ /* remove any invalid values from the mask */
+ debug_string_to_mask(kernel_debug_string, &gossip_debug_mask, 0);
+
+ /*
+ * if the mask has a non-zero value, then indicate that the mask
+ * was set when the kernel module was loaded. The orangefs dev ioctl
+ * command will look at this boolean to determine if the kernel's
+ * debug mask should be overwritten when the client-core is started.
+ */
+ if (gossip_debug_mask != 0)
+ kernel_mask_set_mod_init = true;
+
+ pr_info("%s: called with debug mask: :%s: :%llx:\n",
+ __func__,
+ kernel_debug_string,
+ (unsigned long long)gossip_debug_mask);
+
+ ret = bdi_init(&orangefs_backing_dev_info);
+
+ if (ret)
+ return ret;
+
+ if (op_timeout_secs < 0)
+ op_timeout_secs = 0;
+
+ if (slot_timeout_secs < 0)
+ slot_timeout_secs = 0;
+
+ /* initialize global book keeping data structures */
+ ret = op_cache_initialize();
+ if (ret < 0)
+ goto err;
+
+ ret = orangefs_inode_cache_initialize();
+ if (ret < 0)
+ goto cleanup_op;
+
+ htable_ops_in_progress =
+ kcalloc(hash_table_size, sizeof(struct list_head), GFP_KERNEL);
+ if (!htable_ops_in_progress) {
+ gossip_err("Failed to initialize op hashtable");
+ ret = -ENOMEM;
+ goto cleanup_inode;
+ }
+
+ /* initialize a doubly linked at each hash table index */
+ for (i = 0; i < hash_table_size; i++)
+ INIT_LIST_HEAD(&htable_ops_in_progress[i]);
+
+ ret = fsid_key_table_initialize();
+ if (ret < 0)
+ goto cleanup_progress_table;
+
+ /*
+ * Build the contents of /sys/kernel/debug/orangefs/debug-help
+ * from the keywords in the kernel keyword/mask array.
+ *
+ * The keywords in the client keyword/mask array are
+ * unknown at boot time.
+ *
+ * orangefs_prepare_debugfs_help_string will be used again
+ * later to rebuild the debug-help file after the client starts
+ * and passes along the needed info. The argument signifies
+ * which time orangefs_prepare_debugfs_help_string is being
+ * called.
+ */
+ ret = orangefs_prepare_debugfs_help_string(1);
+ if (ret)
+ goto cleanup_key_table;
+
+ ret = orangefs_debugfs_init();
+ if (ret)
+ goto debugfs_init_failed;
+
+ ret = orangefs_kernel_debug_init();
+ if (ret)
+ goto kernel_debug_init_failed;
+
+ ret = orangefs_sysfs_init();
+ if (ret)
+ goto sysfs_init_failed;
+
+ /* Initialize the orangefsdev subsystem. */
+ ret = orangefs_dev_init();
+ if (ret < 0) {
+ gossip_err("%s: could not initialize device subsystem %d!\n",
+ __func__,
+ ret);
+ goto cleanup_device;
+ }
+
+ ret = register_filesystem(&orangefs_fs_type);
+ if (ret == 0) {
+ pr_info("orangefs: module version %s loaded\n", ORANGEFS_VERSION);
+ ret = 0;
+ goto out;
+ }
+
+ orangefs_sysfs_exit();
+
+cleanup_device:
+ orangefs_dev_cleanup();
+
+sysfs_init_failed:
+
+kernel_debug_init_failed:
+
+debugfs_init_failed:
+ orangefs_debugfs_cleanup();
+
+cleanup_key_table:
+ fsid_key_table_finalize();
+
+cleanup_progress_table:
+ kfree(htable_ops_in_progress);
+
+cleanup_inode:
+ orangefs_inode_cache_finalize();
+
+cleanup_op:
+ op_cache_finalize();
+
+err:
+ bdi_destroy(&orangefs_backing_dev_info);
+
+out:
+ return ret;
+}
+
+static void __exit orangefs_exit(void)
+{
+ int i = 0;
+ gossip_debug(GOSSIP_INIT_DEBUG, "orangefs: orangefs_exit called\n");
+
+ unregister_filesystem(&orangefs_fs_type);
+ orangefs_debugfs_cleanup();
+ orangefs_sysfs_exit();
+ fsid_key_table_finalize();
+ orangefs_dev_cleanup();
+ BUG_ON(!list_empty(&orangefs_request_list));
+ for (i = 0; i < hash_table_size; i++)
+ BUG_ON(!list_empty(&htable_ops_in_progress[i]));
+
+ orangefs_inode_cache_finalize();
+ op_cache_finalize();
+
+ kfree(htable_ops_in_progress);
+
+ bdi_destroy(&orangefs_backing_dev_info);
+
+ pr_info("orangefs: module version %s unloaded\n", ORANGEFS_VERSION);
+}
+
+/*
+ * What we do in this function is to walk the list of operations
+ * that are in progress in the hash table and mark them as purged as well.
+ */
+void purge_inprogress_ops(void)
+{
+ int i;
+
+ for (i = 0; i < hash_table_size; i++) {
+ struct orangefs_kernel_op_s *op;
+ struct orangefs_kernel_op_s *next;
+
+ spin_lock(&htable_ops_in_progress_lock);
+ list_for_each_entry_safe(op,
+ next,
+ &htable_ops_in_progress[i],
+ list) {
+ set_op_state_purged(op);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: op:%s: op_state:%d: process:%s:\n",
+ __func__,
+ get_opname_string(op),
+ op->op_state,
+ current->comm);
+ }
+ spin_unlock(&htable_ops_in_progress_lock);
+ }
+}
+
+module_init(orangefs_init);
+module_exit(orangefs_exit);
diff --git a/fs/orangefs/orangefs-sysfs.c b/fs/orangefs/orangefs-sysfs.c
new file mode 100644
index 000000000..5c03113e3
--- /dev/null
+++ b/fs/orangefs/orangefs-sysfs.c
@@ -0,0 +1,1772 @@
+/*
+ * Documentation/ABI/stable/orangefs-sysfs:
+ *
+ * What: /sys/fs/orangefs/perf_counter_reset
+ * Date: June 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * echo a 0 or a 1 into perf_counter_reset to
+ * reset all the counters in
+ * /sys/fs/orangefs/perf_counters
+ * except ones with PINT_PERF_PRESERVE set.
+ *
+ *
+ * What: /sys/fs/orangefs/perf_counters/...
+ * Date: Jun 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * Counters and settings for various caches.
+ * Read only.
+ *
+ *
+ * What: /sys/fs/orangefs/perf_time_interval_secs
+ * Date: Jun 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * Length of perf counter intervals in
+ * seconds.
+ *
+ *
+ * What: /sys/fs/orangefs/perf_history_size
+ * Date: Jun 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * The perf_counters cache statistics have N, or
+ * perf_history_size, samples. The default is
+ * one.
+ *
+ * Every perf_time_interval_secs the (first)
+ * samples are reset.
+ *
+ * If N is greater than one, the "current" set
+ * of samples is reset, and the samples from the
+ * other N-1 intervals remain available.
+ *
+ *
+ * What: /sys/fs/orangefs/op_timeout_secs
+ * Date: Jun 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * Service operation timeout in seconds.
+ *
+ *
+ * What: /sys/fs/orangefs/slot_timeout_secs
+ * Date: Jun 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * "Slot" timeout in seconds. A "slot"
+ * is an indexed buffer in the shared
+ * memory segment used for communication
+ * between the kernel module and userspace.
+ * Slots are requested and waited for,
+ * the wait times out after slot_timeout_secs.
+ *
+ *
+ * What: /sys/fs/orangefs/acache/...
+ * Date: Jun 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * Attribute cache configurable settings.
+ *
+ *
+ * What: /sys/fs/orangefs/ncache/...
+ * Date: Jun 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * Name cache configurable settings.
+ *
+ *
+ * What: /sys/fs/orangefs/capcache/...
+ * Date: Jun 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * Capability cache configurable settings.
+ *
+ *
+ * What: /sys/fs/orangefs/ccache/...
+ * Date: Jun 2015
+ * Contact: Mike Marshall <hubcap@omnibond.com>
+ * Description:
+ * Credential cache configurable settings.
+ *
+ */
+
+#include <linux/fs.h>
+#include <linux/kobject.h>
+#include <linux/string.h>
+#include <linux/sysfs.h>
+#include <linux/module.h>
+#include <linux/init.h>
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-sysfs.h"
+
+#define ORANGEFS_KOBJ_ID "orangefs"
+#define ACACHE_KOBJ_ID "acache"
+#define CAPCACHE_KOBJ_ID "capcache"
+#define CCACHE_KOBJ_ID "ccache"
+#define NCACHE_KOBJ_ID "ncache"
+#define PC_KOBJ_ID "pc"
+#define STATS_KOBJ_ID "stats"
+
+struct orangefs_obj {
+ struct kobject kobj;
+ int op_timeout_secs;
+ int perf_counter_reset;
+ int perf_history_size;
+ int perf_time_interval_secs;
+ int slot_timeout_secs;
+};
+
+struct acache_orangefs_obj {
+ struct kobject kobj;
+ int hard_limit;
+ int reclaim_percentage;
+ int soft_limit;
+ int timeout_msecs;
+};
+
+struct capcache_orangefs_obj {
+ struct kobject kobj;
+ int hard_limit;
+ int reclaim_percentage;
+ int soft_limit;
+ int timeout_secs;
+};
+
+struct ccache_orangefs_obj {
+ struct kobject kobj;
+ int hard_limit;
+ int reclaim_percentage;
+ int soft_limit;
+ int timeout_secs;
+};
+
+struct ncache_orangefs_obj {
+ struct kobject kobj;
+ int hard_limit;
+ int reclaim_percentage;
+ int soft_limit;
+ int timeout_msecs;
+};
+
+struct pc_orangefs_obj {
+ struct kobject kobj;
+ char *acache;
+ char *capcache;
+ char *ncache;
+};
+
+struct stats_orangefs_obj {
+ struct kobject kobj;
+ int reads;
+ int writes;
+};
+
+struct orangefs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct orangefs_obj *orangefs_obj,
+ struct orangefs_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct orangefs_obj *orangefs_obj,
+ struct orangefs_attribute *attr,
+ const char *buf,
+ size_t count);
+};
+
+struct acache_orangefs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct acache_orangefs_obj *acache_orangefs_obj,
+ struct acache_orangefs_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct acache_orangefs_obj *acache_orangefs_obj,
+ struct acache_orangefs_attribute *attr,
+ const char *buf,
+ size_t count);
+};
+
+struct capcache_orangefs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct capcache_orangefs_obj *capcache_orangefs_obj,
+ struct capcache_orangefs_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct capcache_orangefs_obj *capcache_orangefs_obj,
+ struct capcache_orangefs_attribute *attr,
+ const char *buf,
+ size_t count);
+};
+
+struct ccache_orangefs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct ccache_orangefs_obj *ccache_orangefs_obj,
+ struct ccache_orangefs_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct ccache_orangefs_obj *ccache_orangefs_obj,
+ struct ccache_orangefs_attribute *attr,
+ const char *buf,
+ size_t count);
+};
+
+struct ncache_orangefs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct ncache_orangefs_obj *ncache_orangefs_obj,
+ struct ncache_orangefs_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct ncache_orangefs_obj *ncache_orangefs_obj,
+ struct ncache_orangefs_attribute *attr,
+ const char *buf,
+ size_t count);
+};
+
+struct pc_orangefs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct pc_orangefs_obj *pc_orangefs_obj,
+ struct pc_orangefs_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct pc_orangefs_obj *pc_orangefs_obj,
+ struct pc_orangefs_attribute *attr,
+ const char *buf,
+ size_t count);
+};
+
+struct stats_orangefs_attribute {
+ struct attribute attr;
+ ssize_t (*show)(struct stats_orangefs_obj *stats_orangefs_obj,
+ struct stats_orangefs_attribute *attr,
+ char *buf);
+ ssize_t (*store)(struct stats_orangefs_obj *stats_orangefs_obj,
+ struct stats_orangefs_attribute *attr,
+ const char *buf,
+ size_t count);
+};
+
+static ssize_t orangefs_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct orangefs_attribute *attribute;
+ struct orangefs_obj *orangefs_obj;
+ int rc;
+
+ attribute = container_of(attr, struct orangefs_attribute, attr);
+ orangefs_obj = container_of(kobj, struct orangefs_obj, kobj);
+
+ if (!attribute->show) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->show(orangefs_obj, attribute, buf);
+
+out:
+ return rc;
+}
+
+static ssize_t orangefs_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct orangefs_attribute *attribute;
+ struct orangefs_obj *orangefs_obj;
+ int rc;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "orangefs_attr_store: start\n");
+
+ attribute = container_of(attr, struct orangefs_attribute, attr);
+ orangefs_obj = container_of(kobj, struct orangefs_obj, kobj);
+
+ if (!attribute->store) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->store(orangefs_obj, attribute, buf, len);
+
+out:
+ return rc;
+}
+
+static const struct sysfs_ops orangefs_sysfs_ops = {
+ .show = orangefs_attr_show,
+ .store = orangefs_attr_store,
+};
+
+static ssize_t acache_orangefs_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct acache_orangefs_attribute *attribute;
+ struct acache_orangefs_obj *acache_orangefs_obj;
+ int rc;
+
+ attribute = container_of(attr, struct acache_orangefs_attribute, attr);
+ acache_orangefs_obj =
+ container_of(kobj, struct acache_orangefs_obj, kobj);
+
+ if (!attribute->show) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->show(acache_orangefs_obj, attribute, buf);
+
+out:
+ return rc;
+}
+
+static ssize_t acache_orangefs_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct acache_orangefs_attribute *attribute;
+ struct acache_orangefs_obj *acache_orangefs_obj;
+ int rc;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "acache_orangefs_attr_store: start\n");
+
+ attribute = container_of(attr, struct acache_orangefs_attribute, attr);
+ acache_orangefs_obj =
+ container_of(kobj, struct acache_orangefs_obj, kobj);
+
+ if (!attribute->store) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->store(acache_orangefs_obj, attribute, buf, len);
+
+out:
+ return rc;
+}
+
+static const struct sysfs_ops acache_orangefs_sysfs_ops = {
+ .show = acache_orangefs_attr_show,
+ .store = acache_orangefs_attr_store,
+};
+
+static ssize_t capcache_orangefs_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct capcache_orangefs_attribute *attribute;
+ struct capcache_orangefs_obj *capcache_orangefs_obj;
+ int rc;
+
+ attribute =
+ container_of(attr, struct capcache_orangefs_attribute, attr);
+ capcache_orangefs_obj =
+ container_of(kobj, struct capcache_orangefs_obj, kobj);
+
+ if (!attribute->show) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->show(capcache_orangefs_obj, attribute, buf);
+
+out:
+ return rc;
+}
+
+static ssize_t capcache_orangefs_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct capcache_orangefs_attribute *attribute;
+ struct capcache_orangefs_obj *capcache_orangefs_obj;
+ int rc;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "capcache_orangefs_attr_store: start\n");
+
+ attribute =
+ container_of(attr, struct capcache_orangefs_attribute, attr);
+ capcache_orangefs_obj =
+ container_of(kobj, struct capcache_orangefs_obj, kobj);
+
+ if (!attribute->store) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->store(capcache_orangefs_obj, attribute, buf, len);
+
+out:
+ return rc;
+}
+
+static const struct sysfs_ops capcache_orangefs_sysfs_ops = {
+ .show = capcache_orangefs_attr_show,
+ .store = capcache_orangefs_attr_store,
+};
+
+static ssize_t ccache_orangefs_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ccache_orangefs_attribute *attribute;
+ struct ccache_orangefs_obj *ccache_orangefs_obj;
+ int rc;
+
+ attribute =
+ container_of(attr, struct ccache_orangefs_attribute, attr);
+ ccache_orangefs_obj =
+ container_of(kobj, struct ccache_orangefs_obj, kobj);
+
+ if (!attribute->show) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->show(ccache_orangefs_obj, attribute, buf);
+
+out:
+ return rc;
+}
+
+static ssize_t ccache_orangefs_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct ccache_orangefs_attribute *attribute;
+ struct ccache_orangefs_obj *ccache_orangefs_obj;
+ int rc;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "ccache_orangefs_attr_store: start\n");
+
+ attribute =
+ container_of(attr, struct ccache_orangefs_attribute, attr);
+ ccache_orangefs_obj =
+ container_of(kobj, struct ccache_orangefs_obj, kobj);
+
+ if (!attribute->store) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->store(ccache_orangefs_obj, attribute, buf, len);
+
+out:
+ return rc;
+}
+
+static const struct sysfs_ops ccache_orangefs_sysfs_ops = {
+ .show = ccache_orangefs_attr_show,
+ .store = ccache_orangefs_attr_store,
+};
+
+static ssize_t ncache_orangefs_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct ncache_orangefs_attribute *attribute;
+ struct ncache_orangefs_obj *ncache_orangefs_obj;
+ int rc;
+
+ attribute = container_of(attr, struct ncache_orangefs_attribute, attr);
+ ncache_orangefs_obj =
+ container_of(kobj, struct ncache_orangefs_obj, kobj);
+
+ if (!attribute->show) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->show(ncache_orangefs_obj, attribute, buf);
+
+out:
+ return rc;
+}
+
+static ssize_t ncache_orangefs_attr_store(struct kobject *kobj,
+ struct attribute *attr,
+ const char *buf,
+ size_t len)
+{
+ struct ncache_orangefs_attribute *attribute;
+ struct ncache_orangefs_obj *ncache_orangefs_obj;
+ int rc;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "ncache_orangefs_attr_store: start\n");
+
+ attribute = container_of(attr, struct ncache_orangefs_attribute, attr);
+ ncache_orangefs_obj =
+ container_of(kobj, struct ncache_orangefs_obj, kobj);
+
+ if (!attribute->store) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->store(ncache_orangefs_obj, attribute, buf, len);
+
+out:
+ return rc;
+}
+
+static const struct sysfs_ops ncache_orangefs_sysfs_ops = {
+ .show = ncache_orangefs_attr_show,
+ .store = ncache_orangefs_attr_store,
+};
+
+static ssize_t pc_orangefs_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct pc_orangefs_attribute *attribute;
+ struct pc_orangefs_obj *pc_orangefs_obj;
+ int rc;
+
+ attribute = container_of(attr, struct pc_orangefs_attribute, attr);
+ pc_orangefs_obj =
+ container_of(kobj, struct pc_orangefs_obj, kobj);
+
+ if (!attribute->show) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->show(pc_orangefs_obj, attribute, buf);
+
+out:
+ return rc;
+}
+
+static const struct sysfs_ops pc_orangefs_sysfs_ops = {
+ .show = pc_orangefs_attr_show,
+};
+
+static ssize_t stats_orangefs_attr_show(struct kobject *kobj,
+ struct attribute *attr,
+ char *buf)
+{
+ struct stats_orangefs_attribute *attribute;
+ struct stats_orangefs_obj *stats_orangefs_obj;
+ int rc;
+
+ attribute = container_of(attr, struct stats_orangefs_attribute, attr);
+ stats_orangefs_obj =
+ container_of(kobj, struct stats_orangefs_obj, kobj);
+
+ if (!attribute->show) {
+ rc = -EIO;
+ goto out;
+ }
+
+ rc = attribute->show(stats_orangefs_obj, attribute, buf);
+
+out:
+ return rc;
+}
+
+static const struct sysfs_ops stats_orangefs_sysfs_ops = {
+ .show = stats_orangefs_attr_show,
+};
+
+static void orangefs_release(struct kobject *kobj)
+{
+ struct orangefs_obj *orangefs_obj;
+
+ orangefs_obj = container_of(kobj, struct orangefs_obj, kobj);
+ kfree(orangefs_obj);
+}
+
+static void acache_orangefs_release(struct kobject *kobj)
+{
+ struct acache_orangefs_obj *acache_orangefs_obj;
+
+ acache_orangefs_obj =
+ container_of(kobj, struct acache_orangefs_obj, kobj);
+ kfree(acache_orangefs_obj);
+}
+
+static void capcache_orangefs_release(struct kobject *kobj)
+{
+ struct capcache_orangefs_obj *capcache_orangefs_obj;
+
+ capcache_orangefs_obj =
+ container_of(kobj, struct capcache_orangefs_obj, kobj);
+ kfree(capcache_orangefs_obj);
+}
+
+static void ccache_orangefs_release(struct kobject *kobj)
+{
+ struct ccache_orangefs_obj *ccache_orangefs_obj;
+
+ ccache_orangefs_obj =
+ container_of(kobj, struct ccache_orangefs_obj, kobj);
+ kfree(ccache_orangefs_obj);
+}
+
+static void ncache_orangefs_release(struct kobject *kobj)
+{
+ struct ncache_orangefs_obj *ncache_orangefs_obj;
+
+ ncache_orangefs_obj =
+ container_of(kobj, struct ncache_orangefs_obj, kobj);
+ kfree(ncache_orangefs_obj);
+}
+
+static void pc_orangefs_release(struct kobject *kobj)
+{
+ struct pc_orangefs_obj *pc_orangefs_obj;
+
+ pc_orangefs_obj =
+ container_of(kobj, struct pc_orangefs_obj, kobj);
+ kfree(pc_orangefs_obj);
+}
+
+static void stats_orangefs_release(struct kobject *kobj)
+{
+ struct stats_orangefs_obj *stats_orangefs_obj;
+
+ stats_orangefs_obj =
+ container_of(kobj, struct stats_orangefs_obj, kobj);
+ kfree(stats_orangefs_obj);
+}
+
+static ssize_t sysfs_int_show(char *kobj_id, char *buf, void *attr)
+{
+ int rc = -EIO;
+ struct orangefs_attribute *orangefs_attr;
+ struct stats_orangefs_attribute *stats_orangefs_attr;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG, "sysfs_int_show: id:%s:\n", kobj_id);
+
+ if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) {
+ orangefs_attr = (struct orangefs_attribute *)attr;
+
+ if (!strcmp(orangefs_attr->attr.name, "op_timeout_secs")) {
+ rc = scnprintf(buf,
+ PAGE_SIZE,
+ "%d\n",
+ op_timeout_secs);
+ goto out;
+ } else if (!strcmp(orangefs_attr->attr.name,
+ "slot_timeout_secs")) {
+ rc = scnprintf(buf,
+ PAGE_SIZE,
+ "%d\n",
+ slot_timeout_secs);
+ goto out;
+ } else {
+ goto out;
+ }
+
+ } else if (!strcmp(kobj_id, STATS_KOBJ_ID)) {
+ stats_orangefs_attr = (struct stats_orangefs_attribute *)attr;
+
+ if (!strcmp(stats_orangefs_attr->attr.name, "reads")) {
+ rc = scnprintf(buf,
+ PAGE_SIZE,
+ "%lu\n",
+ g_orangefs_stats.reads);
+ goto out;
+ } else if (!strcmp(stats_orangefs_attr->attr.name, "writes")) {
+ rc = scnprintf(buf,
+ PAGE_SIZE,
+ "%lu\n",
+ g_orangefs_stats.writes);
+ goto out;
+ } else {
+ goto out;
+ }
+ }
+
+out:
+
+ return rc;
+}
+
+static ssize_t int_orangefs_show(struct orangefs_obj *orangefs_obj,
+ struct orangefs_attribute *attr,
+ char *buf)
+{
+ int rc;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "int_orangefs_show:start attr->attr.name:%s:\n",
+ attr->attr.name);
+
+ rc = sysfs_int_show(ORANGEFS_KOBJ_ID, buf, (void *) attr);
+
+ return rc;
+}
+
+static ssize_t int_stats_show(struct stats_orangefs_obj *stats_orangefs_obj,
+ struct stats_orangefs_attribute *attr,
+ char *buf)
+{
+ int rc;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "int_stats_show:start attr->attr.name:%s:\n",
+ attr->attr.name);
+
+ rc = sysfs_int_show(STATS_KOBJ_ID, buf, (void *) attr);
+
+ return rc;
+}
+
+static ssize_t int_store(struct orangefs_obj *orangefs_obj,
+ struct orangefs_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int rc = 0;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "int_store: start attr->attr.name:%s: buf:%s:\n",
+ attr->attr.name, buf);
+
+ if (!strcmp(attr->attr.name, "op_timeout_secs")) {
+ rc = kstrtoint(buf, 0, &op_timeout_secs);
+ goto out;
+ } else if (!strcmp(attr->attr.name, "slot_timeout_secs")) {
+ rc = kstrtoint(buf, 0, &slot_timeout_secs);
+ goto out;
+ } else {
+ goto out;
+ }
+
+out:
+ if (rc)
+ rc = -EINVAL;
+ else
+ rc = count;
+
+ return rc;
+}
+
+/*
+ * obtain attribute values from userspace with a service operation.
+ */
+static int sysfs_service_op_show(char *kobj_id, char *buf, void *attr)
+{
+ struct orangefs_kernel_op_s *new_op = NULL;
+ int rc = 0;
+ char *ser_op_type = NULL;
+ struct orangefs_attribute *orangefs_attr;
+ struct acache_orangefs_attribute *acache_attr;
+ struct capcache_orangefs_attribute *capcache_attr;
+ struct ccache_orangefs_attribute *ccache_attr;
+ struct ncache_orangefs_attribute *ncache_attr;
+ struct pc_orangefs_attribute *pc_attr;
+ __u32 op_alloc_type;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "sysfs_service_op_show: id:%s:\n",
+ kobj_id);
+
+ if (strcmp(kobj_id, PC_KOBJ_ID))
+ op_alloc_type = ORANGEFS_VFS_OP_PARAM;
+ else
+ op_alloc_type = ORANGEFS_VFS_OP_PERF_COUNT;
+
+ new_op = op_alloc(op_alloc_type);
+ if (!new_op)
+ return -ENOMEM;
+
+ /* Can't do a service_operation if the client is not running... */
+ rc = is_daemon_in_service();
+ if (rc) {
+ pr_info("%s: Client not running :%d:\n",
+ __func__,
+ is_daemon_in_service());
+ goto out;
+ }
+
+ if (strcmp(kobj_id, PC_KOBJ_ID))
+ new_op->upcall.req.param.type = ORANGEFS_PARAM_REQUEST_GET;
+
+ if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) {
+ orangefs_attr = (struct orangefs_attribute *)attr;
+
+ if (!strcmp(orangefs_attr->attr.name, "perf_history_size"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_PERF_HISTORY_SIZE;
+ else if (!strcmp(orangefs_attr->attr.name,
+ "perf_time_interval_secs"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS;
+ else if (!strcmp(orangefs_attr->attr.name,
+ "perf_counter_reset"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_PERF_RESET;
+
+ } else if (!strcmp(kobj_id, ACACHE_KOBJ_ID)) {
+ acache_attr = (struct acache_orangefs_attribute *)attr;
+
+ if (!strcmp(acache_attr->attr.name, "timeout_msecs"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS;
+
+ if (!strcmp(acache_attr->attr.name, "hard_limit"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT;
+
+ if (!strcmp(acache_attr->attr.name, "soft_limit"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT;
+
+ if (!strcmp(acache_attr->attr.name, "reclaim_percentage"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE;
+
+ } else if (!strcmp(kobj_id, CAPCACHE_KOBJ_ID)) {
+ capcache_attr = (struct capcache_orangefs_attribute *)attr;
+
+ if (!strcmp(capcache_attr->attr.name, "timeout_secs"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS;
+
+ if (!strcmp(capcache_attr->attr.name, "hard_limit"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT;
+
+ if (!strcmp(capcache_attr->attr.name, "soft_limit"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT;
+
+ if (!strcmp(capcache_attr->attr.name, "reclaim_percentage"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE;
+
+ } else if (!strcmp(kobj_id, CCACHE_KOBJ_ID)) {
+ ccache_attr = (struct ccache_orangefs_attribute *)attr;
+
+ if (!strcmp(ccache_attr->attr.name, "timeout_secs"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS;
+
+ if (!strcmp(ccache_attr->attr.name, "hard_limit"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT;
+
+ if (!strcmp(ccache_attr->attr.name, "soft_limit"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT;
+
+ if (!strcmp(ccache_attr->attr.name, "reclaim_percentage"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE;
+
+ } else if (!strcmp(kobj_id, NCACHE_KOBJ_ID)) {
+ ncache_attr = (struct ncache_orangefs_attribute *)attr;
+
+ if (!strcmp(ncache_attr->attr.name, "timeout_msecs"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS;
+
+ if (!strcmp(ncache_attr->attr.name, "hard_limit"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT;
+
+ if (!strcmp(ncache_attr->attr.name, "soft_limit"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT;
+
+ if (!strcmp(ncache_attr->attr.name, "reclaim_percentage"))
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE;
+
+ } else if (!strcmp(kobj_id, PC_KOBJ_ID)) {
+ pc_attr = (struct pc_orangefs_attribute *)attr;
+
+ if (!strcmp(pc_attr->attr.name, ACACHE_KOBJ_ID))
+ new_op->upcall.req.perf_count.type =
+ ORANGEFS_PERF_COUNT_REQUEST_ACACHE;
+
+ if (!strcmp(pc_attr->attr.name, CAPCACHE_KOBJ_ID))
+ new_op->upcall.req.perf_count.type =
+ ORANGEFS_PERF_COUNT_REQUEST_CAPCACHE;
+
+ if (!strcmp(pc_attr->attr.name, NCACHE_KOBJ_ID))
+ new_op->upcall.req.perf_count.type =
+ ORANGEFS_PERF_COUNT_REQUEST_NCACHE;
+
+ } else {
+ gossip_err("sysfs_service_op_show: unknown kobj_id:%s:\n",
+ kobj_id);
+ rc = -EINVAL;
+ goto out;
+ }
+
+
+ if (strcmp(kobj_id, PC_KOBJ_ID))
+ ser_op_type = "orangefs_param";
+ else
+ ser_op_type = "orangefs_perf_count";
+
+ /*
+ * The service_operation will return an errno return code on
+ * error, and zero on success.
+ */
+ rc = service_operation(new_op, ser_op_type, ORANGEFS_OP_INTERRUPTIBLE);
+
+out:
+ if (!rc) {
+ if (strcmp(kobj_id, PC_KOBJ_ID)) {
+ rc = scnprintf(buf,
+ PAGE_SIZE,
+ "%d\n",
+ (int)new_op->downcall.resp.param.value);
+ } else {
+ rc = scnprintf(
+ buf,
+ PAGE_SIZE,
+ "%s",
+ new_op->downcall.resp.perf_count.buffer);
+ }
+ }
+
+ op_release(new_op);
+
+ return rc;
+
+}
+
+static ssize_t service_orangefs_show(struct orangefs_obj *orangefs_obj,
+ struct orangefs_attribute *attr,
+ char *buf)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_show(ORANGEFS_KOBJ_ID, buf, (void *)attr);
+
+ return rc;
+}
+
+static ssize_t
+ service_acache_show(struct acache_orangefs_obj *acache_orangefs_obj,
+ struct acache_orangefs_attribute *attr,
+ char *buf)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_show(ACACHE_KOBJ_ID, buf, (void *)attr);
+
+ return rc;
+}
+
+static ssize_t service_capcache_show(struct capcache_orangefs_obj
+ *capcache_orangefs_obj,
+ struct capcache_orangefs_attribute *attr,
+ char *buf)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_show(CAPCACHE_KOBJ_ID, buf, (void *)attr);
+
+ return rc;
+}
+
+static ssize_t service_ccache_show(struct ccache_orangefs_obj
+ *ccache_orangefs_obj,
+ struct ccache_orangefs_attribute *attr,
+ char *buf)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_show(CCACHE_KOBJ_ID, buf, (void *)attr);
+
+ return rc;
+}
+
+static ssize_t
+ service_ncache_show(struct ncache_orangefs_obj *ncache_orangefs_obj,
+ struct ncache_orangefs_attribute *attr,
+ char *buf)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_show(NCACHE_KOBJ_ID, buf, (void *)attr);
+
+ return rc;
+}
+
+static ssize_t
+ service_pc_show(struct pc_orangefs_obj *pc_orangefs_obj,
+ struct pc_orangefs_attribute *attr,
+ char *buf)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_show(PC_KOBJ_ID, buf, (void *)attr);
+
+ return rc;
+}
+
+/*
+ * pass attribute values back to userspace with a service operation.
+ *
+ * We have to do a memory allocation, an sscanf and a service operation.
+ * And we have to evaluate what the user entered, to make sure the
+ * value is within the range supported by the attribute. So, there's
+ * a lot of return code checking and mapping going on here.
+ *
+ * We want to return 1 if we think everything went OK, and
+ * EINVAL if not.
+ */
+static int sysfs_service_op_store(char *kobj_id, const char *buf, void *attr)
+{
+ struct orangefs_kernel_op_s *new_op = NULL;
+ int val = 0;
+ int rc = 0;
+ struct orangefs_attribute *orangefs_attr;
+ struct acache_orangefs_attribute *acache_attr;
+ struct capcache_orangefs_attribute *capcache_attr;
+ struct ccache_orangefs_attribute *ccache_attr;
+ struct ncache_orangefs_attribute *ncache_attr;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG,
+ "sysfs_service_op_store: id:%s:\n",
+ kobj_id);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_PARAM);
+ if (!new_op)
+ return -EINVAL; /* sic */
+
+ /* Can't do a service_operation if the client is not running... */
+ rc = is_daemon_in_service();
+ if (rc) {
+ pr_info("%s: Client not running :%d:\n",
+ __func__,
+ is_daemon_in_service());
+ goto out;
+ }
+
+ /*
+ * The value we want to send back to userspace is in buf.
+ */
+ rc = kstrtoint(buf, 0, &val);
+ if (rc)
+ goto out;
+
+ if (!strcmp(kobj_id, ORANGEFS_KOBJ_ID)) {
+ orangefs_attr = (struct orangefs_attribute *)attr;
+
+ if (!strcmp(orangefs_attr->attr.name, "perf_history_size")) {
+ if (val > 0) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_PERF_HISTORY_SIZE;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(orangefs_attr->attr.name,
+ "perf_time_interval_secs")) {
+ if (val > 0) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(orangefs_attr->attr.name,
+ "perf_counter_reset")) {
+ if ((val == 0) || (val == 1)) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_PERF_RESET;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ }
+
+ } else if (!strcmp(kobj_id, ACACHE_KOBJ_ID)) {
+ acache_attr = (struct acache_orangefs_attribute *)attr;
+
+ if (!strcmp(acache_attr->attr.name, "hard_limit")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(acache_attr->attr.name, "soft_limit")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(acache_attr->attr.name,
+ "reclaim_percentage")) {
+ if ((val > -1) && (val < 101)) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(acache_attr->attr.name, "timeout_msecs")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ }
+
+ } else if (!strcmp(kobj_id, CAPCACHE_KOBJ_ID)) {
+ capcache_attr = (struct capcache_orangefs_attribute *)attr;
+
+ if (!strcmp(capcache_attr->attr.name, "hard_limit")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(capcache_attr->attr.name, "soft_limit")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(capcache_attr->attr.name,
+ "reclaim_percentage")) {
+ if ((val > -1) && (val < 101)) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(capcache_attr->attr.name, "timeout_secs")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ }
+
+ } else if (!strcmp(kobj_id, CCACHE_KOBJ_ID)) {
+ ccache_attr = (struct ccache_orangefs_attribute *)attr;
+
+ if (!strcmp(ccache_attr->attr.name, "hard_limit")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(ccache_attr->attr.name, "soft_limit")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(ccache_attr->attr.name,
+ "reclaim_percentage")) {
+ if ((val > -1) && (val < 101)) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(ccache_attr->attr.name, "timeout_secs")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ }
+
+ } else if (!strcmp(kobj_id, NCACHE_KOBJ_ID)) {
+ ncache_attr = (struct ncache_orangefs_attribute *)attr;
+
+ if (!strcmp(ncache_attr->attr.name, "hard_limit")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(ncache_attr->attr.name, "soft_limit")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(ncache_attr->attr.name,
+ "reclaim_percentage")) {
+ if ((val > -1) && (val < 101)) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ } else if (!strcmp(ncache_attr->attr.name, "timeout_msecs")) {
+ if (val > -1) {
+ new_op->upcall.req.param.op =
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS;
+ } else {
+ rc = 0;
+ goto out;
+ }
+ }
+
+ } else {
+ gossip_err("sysfs_service_op_store: unknown kobj_id:%s:\n",
+ kobj_id);
+ rc = -EINVAL;
+ goto out;
+ }
+
+ new_op->upcall.req.param.type = ORANGEFS_PARAM_REQUEST_SET;
+
+ new_op->upcall.req.param.value = val;
+
+ /*
+ * The service_operation will return a errno return code on
+ * error, and zero on success.
+ */
+ rc = service_operation(new_op, "orangefs_param", ORANGEFS_OP_INTERRUPTIBLE);
+
+ if (rc < 0) {
+ gossip_err("sysfs_service_op_store: service op returned:%d:\n",
+ rc);
+ rc = 0;
+ } else {
+ rc = 1;
+ }
+
+out:
+ op_release(new_op);
+
+ if (rc == -ENOMEM || rc == 0)
+ rc = -EINVAL;
+
+ return rc;
+}
+
+static ssize_t
+ service_orangefs_store(struct orangefs_obj *orangefs_obj,
+ struct orangefs_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_store(ORANGEFS_KOBJ_ID, buf, (void *) attr);
+
+ /* rc should have an errno value if the service_op went bad. */
+ if (rc == 1)
+ rc = count;
+
+ return rc;
+}
+
+static ssize_t
+ service_acache_store(struct acache_orangefs_obj *acache_orangefs_obj,
+ struct acache_orangefs_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_store(ACACHE_KOBJ_ID, buf, (void *) attr);
+
+ /* rc should have an errno value if the service_op went bad. */
+ if (rc == 1)
+ rc = count;
+
+ return rc;
+}
+
+static ssize_t
+ service_capcache_store(struct capcache_orangefs_obj
+ *capcache_orangefs_obj,
+ struct capcache_orangefs_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_store(CAPCACHE_KOBJ_ID, buf, (void *) attr);
+
+ /* rc should have an errno value if the service_op went bad. */
+ if (rc == 1)
+ rc = count;
+
+ return rc;
+}
+
+static ssize_t service_ccache_store(struct ccache_orangefs_obj
+ *ccache_orangefs_obj,
+ struct ccache_orangefs_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_store(CCACHE_KOBJ_ID, buf, (void *) attr);
+
+ /* rc should have an errno value if the service_op went bad. */
+ if (rc == 1)
+ rc = count;
+
+ return rc;
+}
+
+static ssize_t
+ service_ncache_store(struct ncache_orangefs_obj *ncache_orangefs_obj,
+ struct ncache_orangefs_attribute *attr,
+ const char *buf,
+ size_t count)
+{
+ int rc = 0;
+
+ rc = sysfs_service_op_store(NCACHE_KOBJ_ID, buf, (void *) attr);
+
+ /* rc should have an errno value if the service_op went bad. */
+ if (rc == 1)
+ rc = count;
+
+ return rc;
+}
+
+static struct orangefs_attribute op_timeout_secs_attribute =
+ __ATTR(op_timeout_secs, 0664, int_orangefs_show, int_store);
+
+static struct orangefs_attribute slot_timeout_secs_attribute =
+ __ATTR(slot_timeout_secs, 0664, int_orangefs_show, int_store);
+
+static struct orangefs_attribute perf_counter_reset_attribute =
+ __ATTR(perf_counter_reset,
+ 0664,
+ service_orangefs_show,
+ service_orangefs_store);
+
+static struct orangefs_attribute perf_history_size_attribute =
+ __ATTR(perf_history_size,
+ 0664,
+ service_orangefs_show,
+ service_orangefs_store);
+
+static struct orangefs_attribute perf_time_interval_secs_attribute =
+ __ATTR(perf_time_interval_secs,
+ 0664,
+ service_orangefs_show,
+ service_orangefs_store);
+
+static struct attribute *orangefs_default_attrs[] = {
+ &op_timeout_secs_attribute.attr,
+ &slot_timeout_secs_attribute.attr,
+ &perf_counter_reset_attribute.attr,
+ &perf_history_size_attribute.attr,
+ &perf_time_interval_secs_attribute.attr,
+ NULL,
+};
+
+static struct kobj_type orangefs_ktype = {
+ .sysfs_ops = &orangefs_sysfs_ops,
+ .release = orangefs_release,
+ .default_attrs = orangefs_default_attrs,
+};
+
+static struct acache_orangefs_attribute acache_hard_limit_attribute =
+ __ATTR(hard_limit,
+ 0664,
+ service_acache_show,
+ service_acache_store);
+
+static struct acache_orangefs_attribute acache_reclaim_percent_attribute =
+ __ATTR(reclaim_percentage,
+ 0664,
+ service_acache_show,
+ service_acache_store);
+
+static struct acache_orangefs_attribute acache_soft_limit_attribute =
+ __ATTR(soft_limit,
+ 0664,
+ service_acache_show,
+ service_acache_store);
+
+static struct acache_orangefs_attribute acache_timeout_msecs_attribute =
+ __ATTR(timeout_msecs,
+ 0664,
+ service_acache_show,
+ service_acache_store);
+
+static struct attribute *acache_orangefs_default_attrs[] = {
+ &acache_hard_limit_attribute.attr,
+ &acache_reclaim_percent_attribute.attr,
+ &acache_soft_limit_attribute.attr,
+ &acache_timeout_msecs_attribute.attr,
+ NULL,
+};
+
+static struct kobj_type acache_orangefs_ktype = {
+ .sysfs_ops = &acache_orangefs_sysfs_ops,
+ .release = acache_orangefs_release,
+ .default_attrs = acache_orangefs_default_attrs,
+};
+
+static struct capcache_orangefs_attribute capcache_hard_limit_attribute =
+ __ATTR(hard_limit,
+ 0664,
+ service_capcache_show,
+ service_capcache_store);
+
+static struct capcache_orangefs_attribute capcache_reclaim_percent_attribute =
+ __ATTR(reclaim_percentage,
+ 0664,
+ service_capcache_show,
+ service_capcache_store);
+
+static struct capcache_orangefs_attribute capcache_soft_limit_attribute =
+ __ATTR(soft_limit,
+ 0664,
+ service_capcache_show,
+ service_capcache_store);
+
+static struct capcache_orangefs_attribute capcache_timeout_secs_attribute =
+ __ATTR(timeout_secs,
+ 0664,
+ service_capcache_show,
+ service_capcache_store);
+
+static struct attribute *capcache_orangefs_default_attrs[] = {
+ &capcache_hard_limit_attribute.attr,
+ &capcache_reclaim_percent_attribute.attr,
+ &capcache_soft_limit_attribute.attr,
+ &capcache_timeout_secs_attribute.attr,
+ NULL,
+};
+
+static struct kobj_type capcache_orangefs_ktype = {
+ .sysfs_ops = &capcache_orangefs_sysfs_ops,
+ .release = capcache_orangefs_release,
+ .default_attrs = capcache_orangefs_default_attrs,
+};
+
+static struct ccache_orangefs_attribute ccache_hard_limit_attribute =
+ __ATTR(hard_limit,
+ 0664,
+ service_ccache_show,
+ service_ccache_store);
+
+static struct ccache_orangefs_attribute ccache_reclaim_percent_attribute =
+ __ATTR(reclaim_percentage,
+ 0664,
+ service_ccache_show,
+ service_ccache_store);
+
+static struct ccache_orangefs_attribute ccache_soft_limit_attribute =
+ __ATTR(soft_limit,
+ 0664,
+ service_ccache_show,
+ service_ccache_store);
+
+static struct ccache_orangefs_attribute ccache_timeout_secs_attribute =
+ __ATTR(timeout_secs,
+ 0664,
+ service_ccache_show,
+ service_ccache_store);
+
+static struct attribute *ccache_orangefs_default_attrs[] = {
+ &ccache_hard_limit_attribute.attr,
+ &ccache_reclaim_percent_attribute.attr,
+ &ccache_soft_limit_attribute.attr,
+ &ccache_timeout_secs_attribute.attr,
+ NULL,
+};
+
+static struct kobj_type ccache_orangefs_ktype = {
+ .sysfs_ops = &ccache_orangefs_sysfs_ops,
+ .release = ccache_orangefs_release,
+ .default_attrs = ccache_orangefs_default_attrs,
+};
+
+static struct ncache_orangefs_attribute ncache_hard_limit_attribute =
+ __ATTR(hard_limit,
+ 0664,
+ service_ncache_show,
+ service_ncache_store);
+
+static struct ncache_orangefs_attribute ncache_reclaim_percent_attribute =
+ __ATTR(reclaim_percentage,
+ 0664,
+ service_ncache_show,
+ service_ncache_store);
+
+static struct ncache_orangefs_attribute ncache_soft_limit_attribute =
+ __ATTR(soft_limit,
+ 0664,
+ service_ncache_show,
+ service_ncache_store);
+
+static struct ncache_orangefs_attribute ncache_timeout_msecs_attribute =
+ __ATTR(timeout_msecs,
+ 0664,
+ service_ncache_show,
+ service_ncache_store);
+
+static struct attribute *ncache_orangefs_default_attrs[] = {
+ &ncache_hard_limit_attribute.attr,
+ &ncache_reclaim_percent_attribute.attr,
+ &ncache_soft_limit_attribute.attr,
+ &ncache_timeout_msecs_attribute.attr,
+ NULL,
+};
+
+static struct kobj_type ncache_orangefs_ktype = {
+ .sysfs_ops = &ncache_orangefs_sysfs_ops,
+ .release = ncache_orangefs_release,
+ .default_attrs = ncache_orangefs_default_attrs,
+};
+
+static struct pc_orangefs_attribute pc_acache_attribute =
+ __ATTR(acache,
+ 0664,
+ service_pc_show,
+ NULL);
+
+static struct pc_orangefs_attribute pc_capcache_attribute =
+ __ATTR(capcache,
+ 0664,
+ service_pc_show,
+ NULL);
+
+static struct pc_orangefs_attribute pc_ncache_attribute =
+ __ATTR(ncache,
+ 0664,
+ service_pc_show,
+ NULL);
+
+static struct attribute *pc_orangefs_default_attrs[] = {
+ &pc_acache_attribute.attr,
+ &pc_capcache_attribute.attr,
+ &pc_ncache_attribute.attr,
+ NULL,
+};
+
+static struct kobj_type pc_orangefs_ktype = {
+ .sysfs_ops = &pc_orangefs_sysfs_ops,
+ .release = pc_orangefs_release,
+ .default_attrs = pc_orangefs_default_attrs,
+};
+
+static struct stats_orangefs_attribute stats_reads_attribute =
+ __ATTR(reads,
+ 0664,
+ int_stats_show,
+ NULL);
+
+static struct stats_orangefs_attribute stats_writes_attribute =
+ __ATTR(writes,
+ 0664,
+ int_stats_show,
+ NULL);
+
+static struct attribute *stats_orangefs_default_attrs[] = {
+ &stats_reads_attribute.attr,
+ &stats_writes_attribute.attr,
+ NULL,
+};
+
+static struct kobj_type stats_orangefs_ktype = {
+ .sysfs_ops = &stats_orangefs_sysfs_ops,
+ .release = stats_orangefs_release,
+ .default_attrs = stats_orangefs_default_attrs,
+};
+
+static struct orangefs_obj *orangefs_obj;
+static struct acache_orangefs_obj *acache_orangefs_obj;
+static struct capcache_orangefs_obj *capcache_orangefs_obj;
+static struct ccache_orangefs_obj *ccache_orangefs_obj;
+static struct ncache_orangefs_obj *ncache_orangefs_obj;
+static struct pc_orangefs_obj *pc_orangefs_obj;
+static struct stats_orangefs_obj *stats_orangefs_obj;
+
+int orangefs_sysfs_init(void)
+{
+ int rc = -EINVAL;
+
+ gossip_debug(GOSSIP_SYSFS_DEBUG, "orangefs_sysfs_init: start\n");
+
+ /* create /sys/fs/orangefs. */
+ orangefs_obj = kzalloc(sizeof(*orangefs_obj), GFP_KERNEL);
+ if (!orangefs_obj)
+ goto out;
+
+ rc = kobject_init_and_add(&orangefs_obj->kobj,
+ &orangefs_ktype,
+ fs_kobj,
+ ORANGEFS_KOBJ_ID);
+
+ if (rc)
+ goto ofs_obj_bail;
+
+ kobject_uevent(&orangefs_obj->kobj, KOBJ_ADD);
+
+ /* create /sys/fs/orangefs/acache. */
+ acache_orangefs_obj = kzalloc(sizeof(*acache_orangefs_obj), GFP_KERNEL);
+ if (!acache_orangefs_obj) {
+ rc = -EINVAL;
+ goto ofs_obj_bail;
+ }
+
+ rc = kobject_init_and_add(&acache_orangefs_obj->kobj,
+ &acache_orangefs_ktype,
+ &orangefs_obj->kobj,
+ ACACHE_KOBJ_ID);
+
+ if (rc)
+ goto acache_obj_bail;
+
+ kobject_uevent(&acache_orangefs_obj->kobj, KOBJ_ADD);
+
+ /* create /sys/fs/orangefs/capcache. */
+ capcache_orangefs_obj =
+ kzalloc(sizeof(*capcache_orangefs_obj), GFP_KERNEL);
+ if (!capcache_orangefs_obj) {
+ rc = -EINVAL;
+ goto acache_obj_bail;
+ }
+
+ rc = kobject_init_and_add(&capcache_orangefs_obj->kobj,
+ &capcache_orangefs_ktype,
+ &orangefs_obj->kobj,
+ CAPCACHE_KOBJ_ID);
+ if (rc)
+ goto capcache_obj_bail;
+
+ kobject_uevent(&capcache_orangefs_obj->kobj, KOBJ_ADD);
+
+ /* create /sys/fs/orangefs/ccache. */
+ ccache_orangefs_obj =
+ kzalloc(sizeof(*ccache_orangefs_obj), GFP_KERNEL);
+ if (!ccache_orangefs_obj) {
+ rc = -EINVAL;
+ goto capcache_obj_bail;
+ }
+
+ rc = kobject_init_and_add(&ccache_orangefs_obj->kobj,
+ &ccache_orangefs_ktype,
+ &orangefs_obj->kobj,
+ CCACHE_KOBJ_ID);
+ if (rc)
+ goto ccache_obj_bail;
+
+ kobject_uevent(&ccache_orangefs_obj->kobj, KOBJ_ADD);
+
+ /* create /sys/fs/orangefs/ncache. */
+ ncache_orangefs_obj = kzalloc(sizeof(*ncache_orangefs_obj), GFP_KERNEL);
+ if (!ncache_orangefs_obj) {
+ rc = -EINVAL;
+ goto ccache_obj_bail;
+ }
+
+ rc = kobject_init_and_add(&ncache_orangefs_obj->kobj,
+ &ncache_orangefs_ktype,
+ &orangefs_obj->kobj,
+ NCACHE_KOBJ_ID);
+
+ if (rc)
+ goto ncache_obj_bail;
+
+ kobject_uevent(&ncache_orangefs_obj->kobj, KOBJ_ADD);
+
+ /* create /sys/fs/orangefs/perf_counters. */
+ pc_orangefs_obj = kzalloc(sizeof(*pc_orangefs_obj), GFP_KERNEL);
+ if (!pc_orangefs_obj) {
+ rc = -EINVAL;
+ goto ncache_obj_bail;
+ }
+
+ rc = kobject_init_and_add(&pc_orangefs_obj->kobj,
+ &pc_orangefs_ktype,
+ &orangefs_obj->kobj,
+ "perf_counters");
+
+ if (rc)
+ goto pc_obj_bail;
+
+ kobject_uevent(&pc_orangefs_obj->kobj, KOBJ_ADD);
+
+ /* create /sys/fs/orangefs/stats. */
+ stats_orangefs_obj = kzalloc(sizeof(*stats_orangefs_obj), GFP_KERNEL);
+ if (!stats_orangefs_obj) {
+ rc = -EINVAL;
+ goto pc_obj_bail;
+ }
+
+ rc = kobject_init_and_add(&stats_orangefs_obj->kobj,
+ &stats_orangefs_ktype,
+ &orangefs_obj->kobj,
+ STATS_KOBJ_ID);
+
+ if (rc)
+ goto stats_obj_bail;
+
+ kobject_uevent(&stats_orangefs_obj->kobj, KOBJ_ADD);
+ goto out;
+
+stats_obj_bail:
+ kobject_put(&stats_orangefs_obj->kobj);
+
+pc_obj_bail:
+ kobject_put(&pc_orangefs_obj->kobj);
+
+ncache_obj_bail:
+ kobject_put(&ncache_orangefs_obj->kobj);
+
+ccache_obj_bail:
+ kobject_put(&ccache_orangefs_obj->kobj);
+
+capcache_obj_bail:
+ kobject_put(&capcache_orangefs_obj->kobj);
+
+acache_obj_bail:
+ kobject_put(&acache_orangefs_obj->kobj);
+
+ofs_obj_bail:
+ kobject_put(&orangefs_obj->kobj);
+out:
+ return rc;
+}
+
+void orangefs_sysfs_exit(void)
+{
+ gossip_debug(GOSSIP_SYSFS_DEBUG, "orangefs_sysfs_exit: start\n");
+
+ kobject_put(&acache_orangefs_obj->kobj);
+ kobject_put(&capcache_orangefs_obj->kobj);
+ kobject_put(&ccache_orangefs_obj->kobj);
+ kobject_put(&ncache_orangefs_obj->kobj);
+ kobject_put(&pc_orangefs_obj->kobj);
+ kobject_put(&stats_orangefs_obj->kobj);
+
+ kobject_put(&orangefs_obj->kobj);
+}
diff --git a/fs/orangefs/orangefs-sysfs.h b/fs/orangefs/orangefs-sysfs.h
new file mode 100644
index 000000000..f0b76382d
--- /dev/null
+++ b/fs/orangefs/orangefs-sysfs.h
@@ -0,0 +1,2 @@
+extern int orangefs_sysfs_init(void);
+extern void orangefs_sysfs_exit(void);
diff --git a/fs/orangefs/orangefs-utils.c b/fs/orangefs/orangefs-utils.c
new file mode 100644
index 000000000..2d129b588
--- /dev/null
+++ b/fs/orangefs/orangefs-utils.c
@@ -0,0 +1,1052 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-dev-proto.h"
+#include "orangefs-bufmap.h"
+
+__s32 fsid_of_op(struct orangefs_kernel_op_s *op)
+{
+ __s32 fsid = ORANGEFS_FS_ID_NULL;
+
+ if (op) {
+ switch (op->upcall.type) {
+ case ORANGEFS_VFS_OP_FILE_IO:
+ fsid = op->upcall.req.io.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_LOOKUP:
+ fsid = op->upcall.req.lookup.parent_refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_CREATE:
+ fsid = op->upcall.req.create.parent_refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_GETATTR:
+ fsid = op->upcall.req.getattr.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_REMOVE:
+ fsid = op->upcall.req.remove.parent_refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_MKDIR:
+ fsid = op->upcall.req.mkdir.parent_refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_READDIR:
+ fsid = op->upcall.req.readdir.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_SETATTR:
+ fsid = op->upcall.req.setattr.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_SYMLINK:
+ fsid = op->upcall.req.sym.parent_refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_RENAME:
+ fsid = op->upcall.req.rename.old_parent_refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_STATFS:
+ fsid = op->upcall.req.statfs.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_TRUNCATE:
+ fsid = op->upcall.req.truncate.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_MMAP_RA_FLUSH:
+ fsid = op->upcall.req.ra_cache_flush.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_FS_UMOUNT:
+ fsid = op->upcall.req.fs_umount.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_GETXATTR:
+ fsid = op->upcall.req.getxattr.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_SETXATTR:
+ fsid = op->upcall.req.setxattr.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_LISTXATTR:
+ fsid = op->upcall.req.listxattr.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_REMOVEXATTR:
+ fsid = op->upcall.req.removexattr.refn.fs_id;
+ break;
+ case ORANGEFS_VFS_OP_FSYNC:
+ fsid = op->upcall.req.fsync.refn.fs_id;
+ break;
+ default:
+ break;
+ }
+ }
+ return fsid;
+}
+
+static int orangefs_inode_flags(struct ORANGEFS_sys_attr_s *attrs)
+{
+ int flags = 0;
+ if (attrs->flags & ORANGEFS_IMMUTABLE_FL)
+ flags |= S_IMMUTABLE;
+ else
+ flags &= ~S_IMMUTABLE;
+ if (attrs->flags & ORANGEFS_APPEND_FL)
+ flags |= S_APPEND;
+ else
+ flags &= ~S_APPEND;
+ if (attrs->flags & ORANGEFS_NOATIME_FL)
+ flags |= S_NOATIME;
+ else
+ flags &= ~S_NOATIME;
+ return flags;
+}
+
+static int orangefs_inode_perms(struct ORANGEFS_sys_attr_s *attrs)
+{
+ int perm_mode = 0;
+
+ if (attrs->perms & ORANGEFS_O_EXECUTE)
+ perm_mode |= S_IXOTH;
+ if (attrs->perms & ORANGEFS_O_WRITE)
+ perm_mode |= S_IWOTH;
+ if (attrs->perms & ORANGEFS_O_READ)
+ perm_mode |= S_IROTH;
+
+ if (attrs->perms & ORANGEFS_G_EXECUTE)
+ perm_mode |= S_IXGRP;
+ if (attrs->perms & ORANGEFS_G_WRITE)
+ perm_mode |= S_IWGRP;
+ if (attrs->perms & ORANGEFS_G_READ)
+ perm_mode |= S_IRGRP;
+
+ if (attrs->perms & ORANGEFS_U_EXECUTE)
+ perm_mode |= S_IXUSR;
+ if (attrs->perms & ORANGEFS_U_WRITE)
+ perm_mode |= S_IWUSR;
+ if (attrs->perms & ORANGEFS_U_READ)
+ perm_mode |= S_IRUSR;
+
+ if (attrs->perms & ORANGEFS_G_SGID)
+ perm_mode |= S_ISGID;
+ if (attrs->perms & ORANGEFS_U_SUID)
+ perm_mode |= S_ISUID;
+
+ return perm_mode;
+}
+
+/*
+ * NOTE: in kernel land, we never use the sys_attr->link_target for
+ * anything, so don't bother copying it into the sys_attr object here.
+ */
+static inline int copy_attributes_from_inode(struct inode *inode,
+ struct ORANGEFS_sys_attr_s *attrs,
+ struct iattr *iattr)
+{
+ umode_t tmp_mode;
+
+ if (!iattr || !inode || !attrs) {
+ gossip_err("NULL iattr (%p), inode (%p), attrs (%p) "
+ "in copy_attributes_from_inode!\n",
+ iattr,
+ inode,
+ attrs);
+ return -EINVAL;
+ }
+ /*
+ * We need to be careful to only copy the attributes out of the
+ * iattr object that we know are valid.
+ */
+ attrs->mask = 0;
+ if (iattr->ia_valid & ATTR_UID) {
+ attrs->owner = from_kuid(current_user_ns(), iattr->ia_uid);
+ attrs->mask |= ORANGEFS_ATTR_SYS_UID;
+ gossip_debug(GOSSIP_UTILS_DEBUG, "(UID) %d\n", attrs->owner);
+ }
+ if (iattr->ia_valid & ATTR_GID) {
+ attrs->group = from_kgid(current_user_ns(), iattr->ia_gid);
+ attrs->mask |= ORANGEFS_ATTR_SYS_GID;
+ gossip_debug(GOSSIP_UTILS_DEBUG, "(GID) %d\n", attrs->group);
+ }
+
+ if (iattr->ia_valid & ATTR_ATIME) {
+ attrs->mask |= ORANGEFS_ATTR_SYS_ATIME;
+ if (iattr->ia_valid & ATTR_ATIME_SET) {
+ attrs->atime = (time64_t)iattr->ia_atime.tv_sec;
+ attrs->mask |= ORANGEFS_ATTR_SYS_ATIME_SET;
+ }
+ }
+ if (iattr->ia_valid & ATTR_MTIME) {
+ attrs->mask |= ORANGEFS_ATTR_SYS_MTIME;
+ if (iattr->ia_valid & ATTR_MTIME_SET) {
+ attrs->mtime = (time64_t)iattr->ia_mtime.tv_sec;
+ attrs->mask |= ORANGEFS_ATTR_SYS_MTIME_SET;
+ }
+ }
+ if (iattr->ia_valid & ATTR_CTIME)
+ attrs->mask |= ORANGEFS_ATTR_SYS_CTIME;
+
+ /*
+ * ORANGEFS cannot set size with a setattr operation. Probably not likely
+ * to be requested through the VFS, but just in case, don't worry about
+ * ATTR_SIZE
+ */
+
+ if (iattr->ia_valid & ATTR_MODE) {
+ tmp_mode = iattr->ia_mode;
+ if (tmp_mode & (S_ISVTX)) {
+ if (is_root_handle(inode)) {
+ /*
+ * allow sticky bit to be set on root (since
+ * it shows up that way by default anyhow),
+ * but don't show it to the server
+ */
+ tmp_mode -= S_ISVTX;
+ } else {
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "User attempted to set sticky bit on non-root directory; returning EINVAL.\n");
+ return -EINVAL;
+ }
+ }
+
+ if (tmp_mode & (S_ISUID)) {
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "Attempting to set setuid bit (not supported); returning EINVAL.\n");
+ return -EINVAL;
+ }
+
+ attrs->perms = ORANGEFS_util_translate_mode(tmp_mode);
+ attrs->mask |= ORANGEFS_ATTR_SYS_PERM;
+ }
+
+ return 0;
+}
+
+static int orangefs_inode_type(enum orangefs_ds_type objtype)
+{
+ if (objtype == ORANGEFS_TYPE_METAFILE)
+ return S_IFREG;
+ else if (objtype == ORANGEFS_TYPE_DIRECTORY)
+ return S_IFDIR;
+ else if (objtype == ORANGEFS_TYPE_SYMLINK)
+ return S_IFLNK;
+ else
+ return -1;
+}
+
+static int orangefs_inode_is_stale(struct inode *inode, int new,
+ struct ORANGEFS_sys_attr_s *attrs, char *link_target)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ int type = orangefs_inode_type(attrs->objtype);
+ if (!new) {
+ /*
+ * If the inode type or symlink target have changed then this
+ * inode is stale.
+ */
+ if (type == -1 || !(inode->i_mode & type)) {
+ orangefs_make_bad_inode(inode);
+ return 1;
+ }
+ if (type == S_IFLNK && strncmp(orangefs_inode->link_target,
+ link_target, ORANGEFS_NAME_MAX)) {
+ orangefs_make_bad_inode(inode);
+ return 1;
+ }
+ }
+ return 0;
+}
+
+int orangefs_inode_getattr(struct inode *inode, int new, int size)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_kernel_op_s *new_op;
+ loff_t inode_size, rounded_up_size;
+ int ret, type;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
+ get_khandle_from_ino(inode));
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
+ if (!new_op)
+ return -ENOMEM;
+ new_op->upcall.req.getattr.refn = orangefs_inode->refn;
+ new_op->upcall.req.getattr.mask = size ?
+ ORANGEFS_ATTR_SYS_ALL_NOHINT : ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE;
+
+ ret = service_operation(new_op, __func__,
+ get_interruptible_flag(inode));
+ if (ret != 0)
+ goto out;
+
+ type = orangefs_inode_type(new_op->
+ downcall.resp.getattr.attributes.objtype);
+ ret = orangefs_inode_is_stale(inode, new,
+ &new_op->downcall.resp.getattr.attributes,
+ new_op->downcall.resp.getattr.link_target);
+ if (ret) {
+ ret = -ESTALE;
+ goto out;
+ }
+
+ switch (type) {
+ case S_IFREG:
+ inode->i_flags = orangefs_inode_flags(&new_op->
+ downcall.resp.getattr.attributes);
+ if (size) {
+ inode_size = (loff_t)new_op->
+ downcall.resp.getattr.attributes.size;
+ rounded_up_size =
+ (inode_size + (4096 - (inode_size % 4096)));
+ inode->i_size = inode_size;
+ orangefs_inode->blksize =
+ new_op->downcall.resp.getattr.attributes.blksize;
+ spin_lock(&inode->i_lock);
+ inode->i_bytes = inode_size;
+ inode->i_blocks =
+ (unsigned long)(rounded_up_size / 512);
+ spin_unlock(&inode->i_lock);
+ }
+ break;
+ case S_IFDIR:
+ inode->i_size = PAGE_SIZE;
+ orangefs_inode->blksize = (1 << inode->i_blkbits);
+ spin_lock(&inode->i_lock);
+ inode_set_bytes(inode, inode->i_size);
+ spin_unlock(&inode->i_lock);
+ set_nlink(inode, 1);
+ break;
+ case S_IFLNK:
+ if (new) {
+ inode->i_size = (loff_t)strlen(new_op->
+ downcall.resp.getattr.link_target);
+ orangefs_inode->blksize = (1 << inode->i_blkbits);
+ ret = strscpy(orangefs_inode->link_target,
+ new_op->downcall.resp.getattr.link_target,
+ ORANGEFS_NAME_MAX);
+ if (ret == -E2BIG) {
+ ret = -EIO;
+ goto out;
+ }
+ inode->i_link = orangefs_inode->link_target;
+ }
+ break;
+ }
+
+ inode->i_uid = make_kuid(&init_user_ns, new_op->
+ downcall.resp.getattr.attributes.owner);
+ inode->i_gid = make_kgid(&init_user_ns, new_op->
+ downcall.resp.getattr.attributes.group);
+ inode->i_atime.tv_sec = (time64_t)new_op->
+ downcall.resp.getattr.attributes.atime;
+ inode->i_mtime.tv_sec = (time64_t)new_op->
+ downcall.resp.getattr.attributes.mtime;
+ inode->i_ctime.tv_sec = (time64_t)new_op->
+ downcall.resp.getattr.attributes.ctime;
+ inode->i_atime.tv_nsec = 0;
+ inode->i_mtime.tv_nsec = 0;
+ inode->i_ctime.tv_nsec = 0;
+
+ /* special case: mark the root inode as sticky */
+ inode->i_mode = type | (is_root_handle(inode) ? S_ISVTX : 0) |
+ orangefs_inode_perms(&new_op->downcall.resp.getattr.attributes);
+
+ ret = 0;
+out:
+ op_release(new_op);
+ return ret;
+}
+
+int orangefs_inode_check_changed(struct inode *inode)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_kernel_op_s *new_op;
+ int ret;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: called on inode %pU\n", __func__,
+ get_khandle_from_ino(inode));
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_GETATTR);
+ if (!new_op)
+ return -ENOMEM;
+ new_op->upcall.req.getattr.refn = orangefs_inode->refn;
+ new_op->upcall.req.getattr.mask = ORANGEFS_ATTR_SYS_TYPE |
+ ORANGEFS_ATTR_SYS_LNK_TARGET;
+
+ ret = service_operation(new_op, __func__,
+ get_interruptible_flag(inode));
+ if (ret != 0)
+ goto out;
+
+ ret = orangefs_inode_is_stale(inode, 0,
+ &new_op->downcall.resp.getattr.attributes,
+ new_op->downcall.resp.getattr.link_target);
+out:
+ op_release(new_op);
+ return ret;
+}
+
+/*
+ * issues a orangefs setattr request to make sure the new attribute values
+ * take effect if successful. returns 0 on success; -errno otherwise
+ */
+int orangefs_inode_setattr(struct inode *inode, struct iattr *iattr)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_kernel_op_s *new_op;
+ int ret;
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_SETATTR);
+ if (!new_op)
+ return -ENOMEM;
+
+ new_op->upcall.req.setattr.refn = orangefs_inode->refn;
+ ret = copy_attributes_from_inode(inode,
+ &new_op->upcall.req.setattr.attributes,
+ iattr);
+ if (ret >= 0) {
+ ret = service_operation(new_op, __func__,
+ get_interruptible_flag(inode));
+
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "orangefs_inode_setattr: returning %d\n",
+ ret);
+ }
+
+ op_release(new_op);
+
+ /*
+ * successful setattr should clear the atime, mtime and
+ * ctime flags.
+ */
+ if (ret == 0) {
+ ClearAtimeFlag(orangefs_inode);
+ ClearMtimeFlag(orangefs_inode);
+ ClearCtimeFlag(orangefs_inode);
+ ClearModeFlag(orangefs_inode);
+ }
+
+ return ret;
+}
+
+int orangefs_flush_inode(struct inode *inode)
+{
+ /*
+ * If it is a dirty inode, this function gets called.
+ * Gather all the information that needs to be setattr'ed
+ * Right now, this will only be used for mode, atime, mtime
+ * and/or ctime.
+ */
+ struct iattr wbattr;
+ int ret;
+ int mtime_flag;
+ int ctime_flag;
+ int atime_flag;
+ int mode_flag;
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+
+ memset(&wbattr, 0, sizeof(wbattr));
+
+ /*
+ * check inode flags up front, and clear them if they are set. This
+ * will prevent multiple processes from all trying to flush the same
+ * inode if they call close() simultaneously
+ */
+ mtime_flag = MtimeFlag(orangefs_inode);
+ ClearMtimeFlag(orangefs_inode);
+ ctime_flag = CtimeFlag(orangefs_inode);
+ ClearCtimeFlag(orangefs_inode);
+ atime_flag = AtimeFlag(orangefs_inode);
+ ClearAtimeFlag(orangefs_inode);
+ mode_flag = ModeFlag(orangefs_inode);
+ ClearModeFlag(orangefs_inode);
+
+ /* -- Lazy atime,mtime and ctime update --
+ * Note: all times are dictated by server in the new scheme
+ * and not by the clients
+ *
+ * Also mode updates are being handled now..
+ */
+
+ if (mtime_flag)
+ wbattr.ia_valid |= ATTR_MTIME;
+ if (ctime_flag)
+ wbattr.ia_valid |= ATTR_CTIME;
+ if (atime_flag)
+ wbattr.ia_valid |= ATTR_ATIME;
+
+ if (mode_flag) {
+ wbattr.ia_mode = inode->i_mode;
+ wbattr.ia_valid |= ATTR_MODE;
+ }
+
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "*********** orangefs_flush_inode: %pU "
+ "(ia_valid %d)\n",
+ get_khandle_from_ino(inode),
+ wbattr.ia_valid);
+ if (wbattr.ia_valid == 0) {
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "orangefs_flush_inode skipping setattr()\n");
+ return 0;
+ }
+
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "orangefs_flush_inode (%pU) writing mode %o\n",
+ get_khandle_from_ino(inode),
+ inode->i_mode);
+
+ ret = orangefs_inode_setattr(inode, &wbattr);
+
+ return ret;
+}
+
+int orangefs_unmount_sb(struct super_block *sb)
+{
+ int ret = -EINVAL;
+ struct orangefs_kernel_op_s *new_op = NULL;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "orangefs_unmount_sb called on sb %p\n",
+ sb);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_FS_UMOUNT);
+ if (!new_op)
+ return -ENOMEM;
+ new_op->upcall.req.fs_umount.id = ORANGEFS_SB(sb)->id;
+ new_op->upcall.req.fs_umount.fs_id = ORANGEFS_SB(sb)->fs_id;
+ strncpy(new_op->upcall.req.fs_umount.orangefs_config_server,
+ ORANGEFS_SB(sb)->devname,
+ ORANGEFS_MAX_SERVER_ADDR_LEN);
+
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "Attempting ORANGEFS Unmount via host %s\n",
+ new_op->upcall.req.fs_umount.orangefs_config_server);
+
+ ret = service_operation(new_op, "orangefs_fs_umount", 0);
+
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "orangefs_unmount: got return value of %d\n", ret);
+ if (ret)
+ sb = ERR_PTR(ret);
+ else
+ ORANGEFS_SB(sb)->mount_pending = 1;
+
+ op_release(new_op);
+ return ret;
+}
+
+void orangefs_make_bad_inode(struct inode *inode)
+{
+ if (is_root_handle(inode)) {
+ /*
+ * if this occurs, the pvfs2-client-core was killed but we
+ * can't afford to lose the inode operations and such
+ * associated with the root handle in any case.
+ */
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "*** NOT making bad root inode %pU\n",
+ get_khandle_from_ino(inode));
+ } else {
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "*** making bad inode %pU\n",
+ get_khandle_from_ino(inode));
+ make_bad_inode(inode);
+ }
+}
+
+/*
+ * The following is a very dirty hack that is now a permanent part of the
+ * ORANGEFS protocol. See protocol.h for more error definitions.
+ */
+
+/* The order matches include/orangefs-types.h in the OrangeFS source. */
+static int PINT_errno_mapping[] = {
+ 0, EPERM, ENOENT, EINTR, EIO, ENXIO, EBADF, EAGAIN, ENOMEM,
+ EFAULT, EBUSY, EEXIST, ENODEV, ENOTDIR, EISDIR, EINVAL, EMFILE,
+ EFBIG, ENOSPC, EROFS, EMLINK, EPIPE, EDEADLK, ENAMETOOLONG,
+ ENOLCK, ENOSYS, ENOTEMPTY, ELOOP, EWOULDBLOCK, ENOMSG, EUNATCH,
+ EBADR, EDEADLOCK, ENODATA, ETIME, ENONET, EREMOTE, ECOMM,
+ EPROTO, EBADMSG, EOVERFLOW, ERESTART, EMSGSIZE, EPROTOTYPE,
+ ENOPROTOOPT, EPROTONOSUPPORT, EOPNOTSUPP, EADDRINUSE,
+ EADDRNOTAVAIL, ENETDOWN, ENETUNREACH, ENETRESET, ENOBUFS,
+ ETIMEDOUT, ECONNREFUSED, EHOSTDOWN, EHOSTUNREACH, EALREADY,
+ EACCES, ECONNRESET, ERANGE
+};
+
+int orangefs_normalize_to_errno(__s32 error_code)
+{
+ __u32 i;
+
+ /* Success */
+ if (error_code == 0) {
+ return 0;
+ /*
+ * This shouldn't ever happen. If it does it should be fixed on the
+ * server.
+ */
+ } else if (error_code > 0) {
+ gossip_err("orangefs: error status receieved.\n");
+ gossip_err("orangefs: assuming error code is inverted.\n");
+ error_code = -error_code;
+ }
+
+ /*
+ * XXX: This is very bad since error codes from ORANGEFS may not be
+ * suitable for return into userspace.
+ */
+
+ /*
+ * Convert ORANGEFS error values into errno values suitable for return
+ * from the kernel.
+ */
+ if ((-error_code) & ORANGEFS_NON_ERRNO_ERROR_BIT) {
+ if (((-error_code) &
+ (ORANGEFS_ERROR_NUMBER_BITS|ORANGEFS_NON_ERRNO_ERROR_BIT|
+ ORANGEFS_ERROR_BIT)) == ORANGEFS_ECANCEL) {
+ /*
+ * cancellation error codes generally correspond to
+ * a timeout from the client's perspective
+ */
+ error_code = -ETIMEDOUT;
+ } else {
+ /* assume a default error code */
+ gossip_err("orangefs: warning: got error code without errno equivalent: %d.\n", error_code);
+ error_code = -EINVAL;
+ }
+
+ /* Convert ORANGEFS encoded errno values into regular errno values. */
+ } else if ((-error_code) & ORANGEFS_ERROR_BIT) {
+ i = (-error_code) & ~(ORANGEFS_ERROR_BIT|ORANGEFS_ERROR_CLASS_BITS);
+ if (i < sizeof(PINT_errno_mapping)/sizeof(*PINT_errno_mapping))
+ error_code = -PINT_errno_mapping[i];
+ else
+ error_code = -EINVAL;
+
+ /*
+ * Only ORANGEFS protocol error codes should ever come here. Otherwise
+ * there is a bug somewhere.
+ */
+ } else {
+ gossip_err("orangefs: orangefs_normalize_to_errno: got error code which is not from ORANGEFS.\n");
+ }
+ return error_code;
+}
+
+#define NUM_MODES 11
+__s32 ORANGEFS_util_translate_mode(int mode)
+{
+ int ret = 0;
+ int i = 0;
+ static int modes[NUM_MODES] = {
+ S_IXOTH, S_IWOTH, S_IROTH,
+ S_IXGRP, S_IWGRP, S_IRGRP,
+ S_IXUSR, S_IWUSR, S_IRUSR,
+ S_ISGID, S_ISUID
+ };
+ static int orangefs_modes[NUM_MODES] = {
+ ORANGEFS_O_EXECUTE, ORANGEFS_O_WRITE, ORANGEFS_O_READ,
+ ORANGEFS_G_EXECUTE, ORANGEFS_G_WRITE, ORANGEFS_G_READ,
+ ORANGEFS_U_EXECUTE, ORANGEFS_U_WRITE, ORANGEFS_U_READ,
+ ORANGEFS_G_SGID, ORANGEFS_U_SUID
+ };
+
+ for (i = 0; i < NUM_MODES; i++)
+ if (mode & modes[i])
+ ret |= orangefs_modes[i];
+
+ return ret;
+}
+#undef NUM_MODES
+
+/*
+ * After obtaining a string representation of the client's debug
+ * keywords and their associated masks, this function is called to build an
+ * array of these values.
+ */
+int orangefs_prepare_cdm_array(char *debug_array_string)
+{
+ int i;
+ int rc = -EINVAL;
+ char *cds_head = NULL;
+ char *cds_delimiter = NULL;
+ int keyword_len = 0;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+ /*
+ * figure out how many elements the cdm_array needs.
+ */
+ for (i = 0; i < strlen(debug_array_string); i++)
+ if (debug_array_string[i] == '\n')
+ cdm_element_count++;
+
+ if (!cdm_element_count) {
+ pr_info("No elements in client debug array string!\n");
+ goto out;
+ }
+
+ cdm_array =
+ kzalloc(cdm_element_count * sizeof(struct client_debug_mask),
+ GFP_KERNEL);
+ if (!cdm_array) {
+ pr_info("malloc failed for cdm_array!\n");
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ cds_head = debug_array_string;
+
+ for (i = 0; i < cdm_element_count; i++) {
+ cds_delimiter = strchr(cds_head, '\n');
+ *cds_delimiter = '\0';
+
+ keyword_len = strcspn(cds_head, " ");
+
+ cdm_array[i].keyword = kzalloc(keyword_len + 1, GFP_KERNEL);
+ if (!cdm_array[i].keyword) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ sscanf(cds_head,
+ "%s %llx %llx",
+ cdm_array[i].keyword,
+ (unsigned long long *)&(cdm_array[i].mask1),
+ (unsigned long long *)&(cdm_array[i].mask2));
+
+ if (!strcmp(cdm_array[i].keyword, ORANGEFS_VERBOSE))
+ client_verbose_index = i;
+
+ if (!strcmp(cdm_array[i].keyword, ORANGEFS_ALL))
+ client_all_index = i;
+
+ cds_head = cds_delimiter + 1;
+ }
+
+ rc = cdm_element_count;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: rc:%d:\n", __func__, rc);
+
+out:
+
+ return rc;
+
+}
+
+/*
+ * /sys/kernel/debug/orangefs/debug-help can be catted to
+ * see all the available kernel and client debug keywords.
+ *
+ * When the kernel boots, we have no idea what keywords the
+ * client supports, nor their associated masks.
+ *
+ * We pass through this function once at boot and stamp a
+ * boilerplate "we don't know" message for the client in the
+ * debug-help file. We pass through here again when the client
+ * starts and then we can fill out the debug-help file fully.
+ *
+ * The client might be restarted any number of times between
+ * reboots, we only build the debug-help file the first time.
+ */
+int orangefs_prepare_debugfs_help_string(int at_boot)
+{
+ int rc = -EINVAL;
+ int i;
+ int byte_count = 0;
+ char *client_title = "Client Debug Keywords:\n";
+ char *kernel_title = "Kernel Debug Keywords:\n";
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+ if (at_boot) {
+ byte_count += strlen(HELP_STRING_UNINITIALIZED);
+ client_title = HELP_STRING_UNINITIALIZED;
+ } else {
+ /*
+ * fill the client keyword/mask array and remember
+ * how many elements there were.
+ */
+ cdm_element_count =
+ orangefs_prepare_cdm_array(client_debug_array_string);
+ if (cdm_element_count <= 0)
+ goto out;
+
+ /* Count the bytes destined for debug_help_string. */
+ byte_count += strlen(client_title);
+
+ for (i = 0; i < cdm_element_count; i++) {
+ byte_count += strlen(cdm_array[i].keyword + 2);
+ if (byte_count >= DEBUG_HELP_STRING_SIZE) {
+ pr_info("%s: overflow 1!\n", __func__);
+ goto out;
+ }
+ }
+
+ gossip_debug(GOSSIP_UTILS_DEBUG,
+ "%s: cdm_element_count:%d:\n",
+ __func__,
+ cdm_element_count);
+ }
+
+ byte_count += strlen(kernel_title);
+ for (i = 0; i < num_kmod_keyword_mask_map; i++) {
+ byte_count +=
+ strlen(s_kmod_keyword_mask_map[i].keyword + 2);
+ if (byte_count >= DEBUG_HELP_STRING_SIZE) {
+ pr_info("%s: overflow 2!\n", __func__);
+ goto out;
+ }
+ }
+
+ /* build debug_help_string. */
+ debug_help_string = kzalloc(DEBUG_HELP_STRING_SIZE, GFP_KERNEL);
+ if (!debug_help_string) {
+ rc = -ENOMEM;
+ goto out;
+ }
+
+ strcat(debug_help_string, client_title);
+
+ if (!at_boot) {
+ for (i = 0; i < cdm_element_count; i++) {
+ strcat(debug_help_string, "\t");
+ strcat(debug_help_string, cdm_array[i].keyword);
+ strcat(debug_help_string, "\n");
+ }
+ }
+
+ strcat(debug_help_string, "\n");
+ strcat(debug_help_string, kernel_title);
+
+ for (i = 0; i < num_kmod_keyword_mask_map; i++) {
+ strcat(debug_help_string, "\t");
+ strcat(debug_help_string, s_kmod_keyword_mask_map[i].keyword);
+ strcat(debug_help_string, "\n");
+ }
+
+ rc = 0;
+
+out:
+
+ return rc;
+
+}
+
+/*
+ * kernel = type 0
+ * client = type 1
+ */
+void debug_mask_to_string(void *mask, int type)
+{
+ int i;
+ int len = 0;
+ char *debug_string;
+ int element_count = 0;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+ if (type) {
+ debug_string = client_debug_string;
+ element_count = cdm_element_count;
+ } else {
+ debug_string = kernel_debug_string;
+ element_count = num_kmod_keyword_mask_map;
+ }
+
+ memset(debug_string, 0, ORANGEFS_MAX_DEBUG_STRING_LEN);
+
+ /*
+ * Some keywords, like "all" or "verbose", are amalgams of
+ * numerous other keywords. Make a special check for those
+ * before grinding through the whole mask only to find out
+ * later...
+ */
+ if (check_amalgam_keyword(mask, type))
+ goto out;
+
+ /* Build the debug string. */
+ for (i = 0; i < element_count; i++)
+ if (type)
+ do_c_string(mask, i);
+ else
+ do_k_string(mask, i);
+
+ len = strlen(debug_string);
+
+ if ((len) && (type))
+ client_debug_string[len - 1] = '\0';
+ else if (len)
+ kernel_debug_string[len - 1] = '\0';
+ else if (type)
+ strcpy(client_debug_string, "none");
+ else
+ strcpy(kernel_debug_string, "none");
+
+out:
+gossip_debug(GOSSIP_UTILS_DEBUG, "%s: string:%s:\n", __func__, debug_string);
+
+ return;
+
+}
+
+void do_k_string(void *k_mask, int index)
+{
+ __u64 *mask = (__u64 *) k_mask;
+
+ if (keyword_is_amalgam((char *) s_kmod_keyword_mask_map[index].keyword))
+ goto out;
+
+ if (*mask & s_kmod_keyword_mask_map[index].mask_val) {
+ if ((strlen(kernel_debug_string) +
+ strlen(s_kmod_keyword_mask_map[index].keyword))
+ < ORANGEFS_MAX_DEBUG_STRING_LEN - 1) {
+ strcat(kernel_debug_string,
+ s_kmod_keyword_mask_map[index].keyword);
+ strcat(kernel_debug_string, ",");
+ } else {
+ gossip_err("%s: overflow!\n", __func__);
+ strcpy(kernel_debug_string, ORANGEFS_ALL);
+ goto out;
+ }
+ }
+
+out:
+
+ return;
+}
+
+void do_c_string(void *c_mask, int index)
+{
+ struct client_debug_mask *mask = (struct client_debug_mask *) c_mask;
+
+ if (keyword_is_amalgam(cdm_array[index].keyword))
+ goto out;
+
+ if ((mask->mask1 & cdm_array[index].mask1) ||
+ (mask->mask2 & cdm_array[index].mask2)) {
+ if ((strlen(client_debug_string) +
+ strlen(cdm_array[index].keyword) + 1)
+ < ORANGEFS_MAX_DEBUG_STRING_LEN - 2) {
+ strcat(client_debug_string,
+ cdm_array[index].keyword);
+ strcat(client_debug_string, ",");
+ } else {
+ gossip_err("%s: overflow!\n", __func__);
+ strcpy(client_debug_string, ORANGEFS_ALL);
+ goto out;
+ }
+ }
+out:
+ return;
+}
+
+int keyword_is_amalgam(char *keyword)
+{
+ int rc = 0;
+
+ if ((!strcmp(keyword, ORANGEFS_ALL)) || (!strcmp(keyword, ORANGEFS_VERBOSE)))
+ rc = 1;
+
+ return rc;
+}
+
+/*
+ * kernel = type 0
+ * client = type 1
+ *
+ * return 1 if we found an amalgam.
+ */
+int check_amalgam_keyword(void *mask, int type)
+{
+ __u64 *k_mask;
+ struct client_debug_mask *c_mask;
+ int k_all_index = num_kmod_keyword_mask_map - 1;
+ int rc = 0;
+
+ if (type) {
+ c_mask = (struct client_debug_mask *) mask;
+
+ if ((c_mask->mask1 == cdm_array[client_all_index].mask1) &&
+ (c_mask->mask2 == cdm_array[client_all_index].mask2)) {
+ strcpy(client_debug_string, ORANGEFS_ALL);
+ rc = 1;
+ goto out;
+ }
+
+ if ((c_mask->mask1 == cdm_array[client_verbose_index].mask1) &&
+ (c_mask->mask2 == cdm_array[client_verbose_index].mask2)) {
+ strcpy(client_debug_string, ORANGEFS_VERBOSE);
+ rc = 1;
+ goto out;
+ }
+
+ } else {
+ k_mask = (__u64 *) mask;
+
+ if (*k_mask >= s_kmod_keyword_mask_map[k_all_index].mask_val) {
+ strcpy(kernel_debug_string, ORANGEFS_ALL);
+ rc = 1;
+ goto out;
+ }
+ }
+
+out:
+
+ return rc;
+}
+
+/*
+ * kernel = type 0
+ * client = type 1
+ */
+void debug_string_to_mask(char *debug_string, void *mask, int type)
+{
+ char *unchecked_keyword;
+ int i;
+ char *strsep_fodder = kstrdup(debug_string, GFP_KERNEL);
+ char *original_pointer;
+ int element_count = 0;
+ struct client_debug_mask *c_mask;
+ __u64 *k_mask;
+
+ gossip_debug(GOSSIP_UTILS_DEBUG, "%s: start\n", __func__);
+
+ if (type) {
+ c_mask = (struct client_debug_mask *)mask;
+ element_count = cdm_element_count;
+ } else {
+ k_mask = (__u64 *)mask;
+ *k_mask = 0;
+ element_count = num_kmod_keyword_mask_map;
+ }
+
+ original_pointer = strsep_fodder;
+ while ((unchecked_keyword = strsep(&strsep_fodder, ",")))
+ if (strlen(unchecked_keyword)) {
+ for (i = 0; i < element_count; i++)
+ if (type)
+ do_c_mask(i,
+ unchecked_keyword,
+ &c_mask);
+ else
+ do_k_mask(i,
+ unchecked_keyword,
+ &k_mask);
+ }
+
+ kfree(original_pointer);
+}
+
+void do_c_mask(int i,
+ char *unchecked_keyword,
+ struct client_debug_mask **sane_mask)
+{
+
+ if (!strcmp(cdm_array[i].keyword, unchecked_keyword)) {
+ (**sane_mask).mask1 = (**sane_mask).mask1 | cdm_array[i].mask1;
+ (**sane_mask).mask2 = (**sane_mask).mask2 | cdm_array[i].mask2;
+ }
+}
+
+void do_k_mask(int i, char *unchecked_keyword, __u64 **sane_mask)
+{
+
+ if (!strcmp(s_kmod_keyword_mask_map[i].keyword, unchecked_keyword))
+ **sane_mask = (**sane_mask) |
+ s_kmod_keyword_mask_map[i].mask_val;
+}
diff --git a/fs/orangefs/protocol.h b/fs/orangefs/protocol.h
new file mode 100644
index 000000000..1efc6f8a5
--- /dev/null
+++ b/fs/orangefs/protocol.h
@@ -0,0 +1,455 @@
+#include <linux/kernel.h>
+#include <linux/types.h>
+#include <linux/spinlock_types.h>
+#include <linux/slab.h>
+#include <linux/ioctl.h>
+
+extern struct client_debug_mask *cdm_array;
+extern char *debug_help_string;
+extern int help_string_initialized;
+extern struct dentry *debug_dir;
+extern struct dentry *help_file_dentry;
+extern struct dentry *client_debug_dentry;
+extern const struct file_operations debug_help_fops;
+extern int client_all_index;
+extern int client_verbose_index;
+extern int cdm_element_count;
+#define DEBUG_HELP_STRING_SIZE 4096
+#define HELP_STRING_UNINITIALIZED \
+ "Client Debug Keywords are unknown until the first time\n" \
+ "the client is started after boot.\n"
+#define ORANGEFS_KMOD_DEBUG_HELP_FILE "debug-help"
+#define ORANGEFS_KMOD_DEBUG_FILE "kernel-debug"
+#define ORANGEFS_CLIENT_DEBUG_FILE "client-debug"
+#define ORANGEFS_VERBOSE "verbose"
+#define ORANGEFS_ALL "all"
+
+/* pvfs2-config.h ***********************************************************/
+#define ORANGEFS_VERSION_MAJOR 2
+#define ORANGEFS_VERSION_MINOR 9
+#define ORANGEFS_VERSION_SUB 0
+
+/* khandle stuff ***********************************************************/
+
+/*
+ * The 2.9 core will put 64 bit handles in here like this:
+ * 1234 0000 0000 5678
+ * The 3.0 and beyond cores will put 128 bit handles in here like this:
+ * 1234 5678 90AB CDEF
+ * The kernel module will always use the first four bytes and
+ * the last four bytes as an inum.
+ */
+struct orangefs_khandle {
+ unsigned char u[16];
+} __aligned(8);
+
+/*
+ * kernel version of an object ref.
+ */
+struct orangefs_object_kref {
+ struct orangefs_khandle khandle;
+ __s32 fs_id;
+ __s32 __pad1;
+};
+
+/*
+ * compare 2 khandles assumes little endian thus from large address to
+ * small address
+ */
+static inline int ORANGEFS_khandle_cmp(const struct orangefs_khandle *kh1,
+ const struct orangefs_khandle *kh2)
+{
+ int i;
+
+ for (i = 15; i >= 0; i--) {
+ if (kh1->u[i] > kh2->u[i])
+ return 1;
+ if (kh1->u[i] < kh2->u[i])
+ return -1;
+ }
+
+ return 0;
+}
+
+static inline void ORANGEFS_khandle_to(const struct orangefs_khandle *kh,
+ void *p, int size)
+{
+
+ memcpy(p, kh->u, 16);
+ memset(p + 16, 0, size - 16);
+
+}
+
+static inline void ORANGEFS_khandle_from(struct orangefs_khandle *kh,
+ void *p, int size)
+{
+ memset(kh, 0, 16);
+ memcpy(kh->u, p, 16);
+
+}
+
+/* pvfs2-types.h ************************************************************/
+typedef __u32 ORANGEFS_uid;
+typedef __u32 ORANGEFS_gid;
+typedef __s32 ORANGEFS_fs_id;
+typedef __u32 ORANGEFS_permissions;
+typedef __u64 ORANGEFS_time;
+typedef __s64 ORANGEFS_size;
+typedef __u64 ORANGEFS_flags;
+typedef __u64 ORANGEFS_ds_position;
+typedef __s32 ORANGEFS_error;
+typedef __s64 ORANGEFS_offset;
+
+#define ORANGEFS_SUPER_MAGIC 0x20030528
+
+/*
+ * ORANGEFS error codes are a signed 32-bit integer. Error codes are negative, but
+ * the sign is stripped before decoding.
+ */
+
+/* Bit 31 is not used since it is the sign. */
+
+/*
+ * Bit 30 specifies that this is a ORANGEFS error. A ORANGEFS error is either an
+ * encoded errno value or a ORANGEFS protocol error.
+ */
+#define ORANGEFS_ERROR_BIT (1 << 30)
+
+/*
+ * Bit 29 specifies that this is a ORANGEFS protocol error and not an encoded
+ * errno value.
+ */
+#define ORANGEFS_NON_ERRNO_ERROR_BIT (1 << 29)
+
+/*
+ * Bits 9, 8, and 7 specify the error class, which encodes the section of
+ * server code the error originated in for logging purposes. It is not used
+ * in the kernel except to be masked out.
+ */
+#define ORANGEFS_ERROR_CLASS_BITS 0x380
+
+/* Bits 6 - 0 are reserved for the actual error code. */
+#define ORANGEFS_ERROR_NUMBER_BITS 0x7f
+
+/* Encoded errno values decoded by PINT_errno_mapping in orangefs-utils.c. */
+
+/* Our own ORANGEFS protocol error codes. */
+#define ORANGEFS_ECANCEL (1|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_EDEVINIT (2|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_EDETAIL (3|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_EHOSTNTFD (4|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_EADDRNTFD (5|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_ENORECVR (6|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_ETRYAGAIN (7|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_ENOTPVFS (8|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+#define ORANGEFS_ESECURITY (9|ORANGEFS_NON_ERRNO_ERROR_BIT|ORANGEFS_ERROR_BIT)
+
+/* permission bits */
+#define ORANGEFS_O_EXECUTE (1 << 0)
+#define ORANGEFS_O_WRITE (1 << 1)
+#define ORANGEFS_O_READ (1 << 2)
+#define ORANGEFS_G_EXECUTE (1 << 3)
+#define ORANGEFS_G_WRITE (1 << 4)
+#define ORANGEFS_G_READ (1 << 5)
+#define ORANGEFS_U_EXECUTE (1 << 6)
+#define ORANGEFS_U_WRITE (1 << 7)
+#define ORANGEFS_U_READ (1 << 8)
+/* no ORANGEFS_U_VTX (sticky bit) */
+#define ORANGEFS_G_SGID (1 << 10)
+#define ORANGEFS_U_SUID (1 << 11)
+
+/* definition taken from stdint.h */
+#define INT32_MAX (2147483647)
+#define ORANGEFS_ITERATE_START (INT32_MAX - 1)
+#define ORANGEFS_ITERATE_END (INT32_MAX - 2)
+#define ORANGEFS_ITERATE_NEXT (INT32_MAX - 3)
+#define ORANGEFS_READDIR_START ORANGEFS_ITERATE_START
+#define ORANGEFS_READDIR_END ORANGEFS_ITERATE_END
+#define ORANGEFS_IMMUTABLE_FL FS_IMMUTABLE_FL
+#define ORANGEFS_APPEND_FL FS_APPEND_FL
+#define ORANGEFS_NOATIME_FL FS_NOATIME_FL
+#define ORANGEFS_MIRROR_FL 0x01000000ULL
+#define ORANGEFS_O_EXECUTE (1 << 0)
+#define ORANGEFS_FS_ID_NULL ((__s32)0)
+#define ORANGEFS_ATTR_SYS_UID (1 << 0)
+#define ORANGEFS_ATTR_SYS_GID (1 << 1)
+#define ORANGEFS_ATTR_SYS_PERM (1 << 2)
+#define ORANGEFS_ATTR_SYS_ATIME (1 << 3)
+#define ORANGEFS_ATTR_SYS_CTIME (1 << 4)
+#define ORANGEFS_ATTR_SYS_MTIME (1 << 5)
+#define ORANGEFS_ATTR_SYS_TYPE (1 << 6)
+#define ORANGEFS_ATTR_SYS_ATIME_SET (1 << 7)
+#define ORANGEFS_ATTR_SYS_MTIME_SET (1 << 8)
+#define ORANGEFS_ATTR_SYS_SIZE (1 << 20)
+#define ORANGEFS_ATTR_SYS_LNK_TARGET (1 << 24)
+#define ORANGEFS_ATTR_SYS_DFILE_COUNT (1 << 25)
+#define ORANGEFS_ATTR_SYS_DIRENT_COUNT (1 << 26)
+#define ORANGEFS_ATTR_SYS_BLKSIZE (1 << 28)
+#define ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT (1 << 29)
+#define ORANGEFS_ATTR_SYS_COMMON_ALL \
+ (ORANGEFS_ATTR_SYS_UID | \
+ ORANGEFS_ATTR_SYS_GID | \
+ ORANGEFS_ATTR_SYS_PERM | \
+ ORANGEFS_ATTR_SYS_ATIME | \
+ ORANGEFS_ATTR_SYS_CTIME | \
+ ORANGEFS_ATTR_SYS_MTIME | \
+ ORANGEFS_ATTR_SYS_TYPE)
+
+#define ORANGEFS_ATTR_SYS_ALL_SETABLE \
+(ORANGEFS_ATTR_SYS_COMMON_ALL-ORANGEFS_ATTR_SYS_TYPE)
+
+#define ORANGEFS_ATTR_SYS_ALL_NOHINT \
+ (ORANGEFS_ATTR_SYS_COMMON_ALL | \
+ ORANGEFS_ATTR_SYS_SIZE | \
+ ORANGEFS_ATTR_SYS_LNK_TARGET | \
+ ORANGEFS_ATTR_SYS_DFILE_COUNT | \
+ ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT | \
+ ORANGEFS_ATTR_SYS_DIRENT_COUNT | \
+ ORANGEFS_ATTR_SYS_BLKSIZE)
+
+#define ORANGEFS_ATTR_SYS_ALL_NOHINT_NOSIZE \
+ (ORANGEFS_ATTR_SYS_COMMON_ALL | \
+ ORANGEFS_ATTR_SYS_LNK_TARGET | \
+ ORANGEFS_ATTR_SYS_DFILE_COUNT | \
+ ORANGEFS_ATTR_SYS_MIRROR_COPIES_COUNT | \
+ ORANGEFS_ATTR_SYS_DIRENT_COUNT | \
+ ORANGEFS_ATTR_SYS_BLKSIZE)
+
+#define ORANGEFS_XATTR_REPLACE 0x2
+#define ORANGEFS_XATTR_CREATE 0x1
+#define ORANGEFS_MAX_SERVER_ADDR_LEN 256
+#define ORANGEFS_NAME_MAX 256
+/*
+ * max extended attribute name len as imposed by the VFS and exploited for the
+ * upcall request types.
+ * NOTE: Please retain them as multiples of 8 even if you wish to change them
+ * This is *NECESSARY* for supporting 32 bit user-space binaries on a 64-bit
+ * kernel. Due to implementation within DBPF, this really needs to be
+ * ORANGEFS_NAME_MAX, which it was the same value as, but no reason to let it
+ * break if that changes in the future.
+ */
+#define ORANGEFS_MAX_XATTR_NAMELEN ORANGEFS_NAME_MAX /* Not the same as
+ * XATTR_NAME_MAX defined
+ * by <linux/xattr.h>
+ */
+#define ORANGEFS_MAX_XATTR_VALUELEN 8192 /* Not the same as XATTR_SIZE_MAX
+ * defined by <linux/xattr.h>
+ */
+#define ORANGEFS_MAX_XATTR_LISTLEN 16 /* Not the same as XATTR_LIST_MAX
+ * defined by <linux/xattr.h>
+ */
+/*
+ * ORANGEFS I/O operation types, used in both system and server interfaces.
+ */
+enum ORANGEFS_io_type {
+ ORANGEFS_IO_READ = 1,
+ ORANGEFS_IO_WRITE = 2
+};
+
+/*
+ * If this enum is modified the server parameters related to the precreate pool
+ * batch and low threshold sizes may need to be modified to reflect this
+ * change.
+ */
+enum orangefs_ds_type {
+ ORANGEFS_TYPE_NONE = 0,
+ ORANGEFS_TYPE_METAFILE = (1 << 0),
+ ORANGEFS_TYPE_DATAFILE = (1 << 1),
+ ORANGEFS_TYPE_DIRECTORY = (1 << 2),
+ ORANGEFS_TYPE_SYMLINK = (1 << 3),
+ ORANGEFS_TYPE_DIRDATA = (1 << 4),
+ ORANGEFS_TYPE_INTERNAL = (1 << 5) /* for the server's private use */
+};
+
+/*
+ * ORANGEFS_certificate simply stores a buffer with the buffer size.
+ * The buffer can be converted to an OpenSSL X509 struct for use.
+ */
+struct ORANGEFS_certificate {
+ __u32 buf_size;
+ unsigned char *buf;
+};
+
+/*
+ * A credential identifies a user and is signed by the client/user
+ * private key.
+ */
+struct ORANGEFS_credential {
+ __u32 userid; /* user id */
+ __u32 num_groups; /* length of group_array */
+ __u32 *group_array; /* groups for which the user is a member */
+ char *issuer; /* alias of the issuing server */
+ __u64 timeout; /* seconds after epoch to time out */
+ __u32 sig_size; /* length of the signature in bytes */
+ unsigned char *signature; /* digital signature */
+ struct ORANGEFS_certificate certificate; /* user certificate buffer */
+};
+#define extra_size_ORANGEFS_credential (ORANGEFS_REQ_LIMIT_GROUPS * \
+ sizeof(__u32) + \
+ ORANGEFS_REQ_LIMIT_ISSUER + \
+ ORANGEFS_REQ_LIMIT_SIGNATURE + \
+ extra_size_ORANGEFS_certificate)
+
+/* This structure is used by the VFS-client interaction alone */
+struct ORANGEFS_keyval_pair {
+ char key[ORANGEFS_MAX_XATTR_NAMELEN];
+ __s32 key_sz; /* __s32 for portable, fixed-size structures */
+ __s32 val_sz;
+ char val[ORANGEFS_MAX_XATTR_VALUELEN];
+};
+
+/* pvfs2-sysint.h ***********************************************************/
+/* Describes attributes for a file, directory, or symlink. */
+struct ORANGEFS_sys_attr_s {
+ __u32 owner;
+ __u32 group;
+ __u32 perms;
+ __u64 atime;
+ __u64 mtime;
+ __u64 ctime;
+ __s64 size;
+
+ /* NOTE: caller must free if valid */
+ char *link_target;
+
+ /* Changed to __s32 so that size of structure does not change */
+ __s32 dfile_count;
+
+ /* Changed to __s32 so that size of structure does not change */
+ __s32 distr_dir_servers_initial;
+
+ /* Changed to __s32 so that size of structure does not change */
+ __s32 distr_dir_servers_max;
+
+ /* Changed to __s32 so that size of structure does not change */
+ __s32 distr_dir_split_size;
+
+ __u32 mirror_copies_count;
+
+ /* NOTE: caller must free if valid */
+ char *dist_name;
+
+ /* NOTE: caller must free if valid */
+ char *dist_params;
+
+ __s64 dirent_count;
+ enum orangefs_ds_type objtype;
+ __u64 flags;
+ __u32 mask;
+ __s64 blksize;
+};
+
+#define ORANGEFS_LOOKUP_LINK_NO_FOLLOW 0
+
+/* pint-dev.h ***************************************************************/
+
+/* parameter structure used in ORANGEFS_DEV_DEBUG ioctl command */
+struct dev_mask_info_s {
+ enum {
+ KERNEL_MASK,
+ CLIENT_MASK,
+ } mask_type;
+ __u64 mask_value;
+};
+
+struct dev_mask2_info_s {
+ __u64 mask1_value;
+ __u64 mask2_value;
+};
+
+/* pvfs2-util.h *************************************************************/
+__s32 ORANGEFS_util_translate_mode(int mode);
+
+/* pvfs2-debug.h ************************************************************/
+#include "orangefs-debug.h"
+
+/* pvfs2-internal.h *********************************************************/
+#define llu(x) (unsigned long long)(x)
+#define lld(x) (long long)(x)
+
+/* pint-dev-shared.h ********************************************************/
+#define ORANGEFS_DEV_MAGIC 'k'
+
+#define ORANGEFS_READDIR_DEFAULT_DESC_COUNT 5
+
+#define DEV_GET_MAGIC 0x1
+#define DEV_GET_MAX_UPSIZE 0x2
+#define DEV_GET_MAX_DOWNSIZE 0x3
+#define DEV_MAP 0x4
+#define DEV_REMOUNT_ALL 0x5
+#define DEV_DEBUG 0x6
+#define DEV_UPSTREAM 0x7
+#define DEV_CLIENT_MASK 0x8
+#define DEV_CLIENT_STRING 0x9
+#define DEV_MAX_NR 0xa
+
+/* supported ioctls, codes are with respect to user-space */
+enum {
+ ORANGEFS_DEV_GET_MAGIC = _IOW(ORANGEFS_DEV_MAGIC, DEV_GET_MAGIC, __s32),
+ ORANGEFS_DEV_GET_MAX_UPSIZE =
+ _IOW(ORANGEFS_DEV_MAGIC, DEV_GET_MAX_UPSIZE, __s32),
+ ORANGEFS_DEV_GET_MAX_DOWNSIZE =
+ _IOW(ORANGEFS_DEV_MAGIC, DEV_GET_MAX_DOWNSIZE, __s32),
+ ORANGEFS_DEV_MAP = _IO(ORANGEFS_DEV_MAGIC, DEV_MAP),
+ ORANGEFS_DEV_REMOUNT_ALL = _IO(ORANGEFS_DEV_MAGIC, DEV_REMOUNT_ALL),
+ ORANGEFS_DEV_DEBUG = _IOR(ORANGEFS_DEV_MAGIC, DEV_DEBUG, __s32),
+ ORANGEFS_DEV_UPSTREAM = _IOW(ORANGEFS_DEV_MAGIC, DEV_UPSTREAM, int),
+ ORANGEFS_DEV_CLIENT_MASK = _IOW(ORANGEFS_DEV_MAGIC,
+ DEV_CLIENT_MASK,
+ struct dev_mask2_info_s),
+ ORANGEFS_DEV_CLIENT_STRING = _IOW(ORANGEFS_DEV_MAGIC,
+ DEV_CLIENT_STRING,
+ char *),
+ ORANGEFS_DEV_MAXNR = DEV_MAX_NR,
+};
+
+/*
+ * version number for use in communicating between kernel space and user
+ * space. Zero signifies the upstream version of the kernel module.
+ */
+#define ORANGEFS_KERNEL_PROTO_VERSION 0
+#define ORANGEFS_MINIMUM_USERSPACE_VERSION 20903
+
+/*
+ * describes memory regions to map in the ORANGEFS_DEV_MAP ioctl.
+ * NOTE: See devorangefs-req.c for 32 bit compat structure.
+ * Since this structure has a variable-sized layout that is different
+ * on 32 and 64 bit platforms, we need to normalize to a 64 bit layout
+ * on such systems before servicing ioctl calls from user-space binaries
+ * that may be 32 bit!
+ */
+struct ORANGEFS_dev_map_desc {
+ void *ptr;
+ __s32 total_size;
+ __s32 size;
+ __s32 count;
+};
+
+/* gossip.h *****************************************************************/
+
+#ifdef GOSSIP_DISABLE_DEBUG
+#define gossip_debug(mask, fmt, ...) \
+do { \
+ if (0) \
+ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+} while (0)
+#else
+extern __u64 gossip_debug_mask;
+extern struct client_debug_mask client_debug_mask;
+
+/* try to avoid function call overhead by checking masks in macro */
+#define gossip_debug(mask, fmt, ...) \
+do { \
+ if (gossip_debug_mask & (mask)) \
+ printk(KERN_DEBUG fmt, ##__VA_ARGS__); \
+} while (0)
+#endif /* GOSSIP_DISABLE_DEBUG */
+
+/* do file and line number printouts w/ the GNU preprocessor */
+#define gossip_ldebug(mask, fmt, ...) \
+ gossip_debug(mask, "%s: " fmt, __func__, ##__VA_ARGS__)
+
+#define gossip_err pr_err
+#define gossip_lerr(fmt, ...) \
+ gossip_err("%s line %d: " fmt, \
+ __FILE__, __LINE__, ##__VA_ARGS__)
diff --git a/fs/orangefs/super.c b/fs/orangefs/super.c
new file mode 100644
index 000000000..b9da9a028
--- /dev/null
+++ b/fs/orangefs/super.c
@@ -0,0 +1,559 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+#include <linux/parser.h>
+
+/* a cache for orangefs-inode objects (i.e. orangefs inode private data) */
+static struct kmem_cache *orangefs_inode_cache;
+
+/* list for storing orangefs specific superblocks in use */
+LIST_HEAD(orangefs_superblocks);
+
+DEFINE_SPINLOCK(orangefs_superblocks_lock);
+
+enum {
+ Opt_intr,
+ Opt_acl,
+ Opt_local_lock,
+
+ Opt_err
+};
+
+static const match_table_t tokens = {
+ { Opt_acl, "acl" },
+ { Opt_intr, "intr" },
+ { Opt_local_lock, "local_lock" },
+ { Opt_err, NULL }
+};
+
+
+static int parse_mount_options(struct super_block *sb, char *options,
+ int silent)
+{
+ struct orangefs_sb_info_s *orangefs_sb = ORANGEFS_SB(sb);
+ substring_t args[MAX_OPT_ARGS];
+ char *p;
+
+ /*
+ * Force any potential flags that might be set from the mount
+ * to zero, ie, initialize to unset.
+ */
+ sb->s_flags &= ~MS_POSIXACL;
+ orangefs_sb->flags &= ~ORANGEFS_OPT_INTR;
+ orangefs_sb->flags &= ~ORANGEFS_OPT_LOCAL_LOCK;
+
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
+ continue;
+
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_acl:
+ sb->s_flags |= MS_POSIXACL;
+ break;
+ case Opt_intr:
+ orangefs_sb->flags |= ORANGEFS_OPT_INTR;
+ break;
+ case Opt_local_lock:
+ orangefs_sb->flags |= ORANGEFS_OPT_LOCAL_LOCK;
+ break;
+ default:
+ goto fail;
+ }
+ }
+
+ return 0;
+fail:
+ if (!silent)
+ gossip_err("Error: mount option [%s] is not supported.\n", p);
+ return -EINVAL;
+}
+
+static void orangefs_inode_cache_ctor(void *req)
+{
+ struct orangefs_inode_s *orangefs_inode = req;
+
+ inode_init_once(&orangefs_inode->vfs_inode);
+ init_rwsem(&orangefs_inode->xattr_sem);
+
+ orangefs_inode->vfs_inode.i_version = 1;
+}
+
+static struct inode *orangefs_alloc_inode(struct super_block *sb)
+{
+ struct orangefs_inode_s *orangefs_inode;
+
+ orangefs_inode = kmem_cache_alloc(orangefs_inode_cache, GFP_KERNEL);
+ if (orangefs_inode == NULL) {
+ gossip_err("Failed to allocate orangefs_inode\n");
+ return NULL;
+ }
+
+ /*
+ * We want to clear everything except for rw_semaphore and the
+ * vfs_inode.
+ */
+ memset(&orangefs_inode->refn.khandle, 0, 16);
+ orangefs_inode->refn.fs_id = ORANGEFS_FS_ID_NULL;
+ orangefs_inode->last_failed_block_index_read = 0;
+ memset(orangefs_inode->link_target, 0, sizeof(orangefs_inode->link_target));
+ orangefs_inode->pinode_flags = 0;
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "orangefs_alloc_inode: allocated %p\n",
+ &orangefs_inode->vfs_inode);
+ return &orangefs_inode->vfs_inode;
+}
+
+static void orangefs_destroy_inode(struct inode *inode)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "%s: deallocated %p destroying inode %pU\n",
+ __func__, orangefs_inode, get_khandle_from_ino(inode));
+
+ kmem_cache_free(orangefs_inode_cache, orangefs_inode);
+}
+
+/*
+ * NOTE: information filled in here is typically reflected in the
+ * output of the system command 'df'
+*/
+static int orangefs_statfs(struct dentry *dentry, struct kstatfs *buf)
+{
+ int ret = -ENOMEM;
+ struct orangefs_kernel_op_s *new_op = NULL;
+ int flags = 0;
+ struct super_block *sb = NULL;
+
+ sb = dentry->d_sb;
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "orangefs_statfs: called on sb %p (fs_id is %d)\n",
+ sb,
+ (int)(ORANGEFS_SB(sb)->fs_id));
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_STATFS);
+ if (!new_op)
+ return ret;
+ new_op->upcall.req.statfs.fs_id = ORANGEFS_SB(sb)->fs_id;
+
+ if (ORANGEFS_SB(sb)->flags & ORANGEFS_OPT_INTR)
+ flags = ORANGEFS_OP_INTERRUPTIBLE;
+
+ ret = service_operation(new_op, "orangefs_statfs", flags);
+
+ if (new_op->downcall.status < 0)
+ goto out_op_release;
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "%s: got %ld blocks available | "
+ "%ld blocks total | %ld block size | "
+ "%ld files total | %ld files avail\n",
+ __func__,
+ (long)new_op->downcall.resp.statfs.blocks_avail,
+ (long)new_op->downcall.resp.statfs.blocks_total,
+ (long)new_op->downcall.resp.statfs.block_size,
+ (long)new_op->downcall.resp.statfs.files_total,
+ (long)new_op->downcall.resp.statfs.files_avail);
+
+ buf->f_type = sb->s_magic;
+ memcpy(&buf->f_fsid, &ORANGEFS_SB(sb)->fs_id, sizeof(buf->f_fsid));
+ buf->f_bsize = new_op->downcall.resp.statfs.block_size;
+ buf->f_namelen = ORANGEFS_NAME_MAX;
+
+ buf->f_blocks = (sector_t) new_op->downcall.resp.statfs.blocks_total;
+ buf->f_bfree = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
+ buf->f_bavail = (sector_t) new_op->downcall.resp.statfs.blocks_avail;
+ buf->f_files = (sector_t) new_op->downcall.resp.statfs.files_total;
+ buf->f_ffree = (sector_t) new_op->downcall.resp.statfs.files_avail;
+ buf->f_frsize = sb->s_blocksize;
+
+out_op_release:
+ op_release(new_op);
+ gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_statfs: returning %d\n", ret);
+ return ret;
+}
+
+/*
+ * Remount as initiated by VFS layer. We just need to reparse the mount
+ * options, no need to signal pvfs2-client-core about it.
+ */
+static int orangefs_remount_fs(struct super_block *sb, int *flags, char *data)
+{
+ gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_remount_fs: called\n");
+ return parse_mount_options(sb, data, 1);
+}
+
+/*
+ * Remount as initiated by pvfs2-client-core on restart. This is used to
+ * repopulate mount information left from previous pvfs2-client-core.
+ *
+ * the idea here is that given a valid superblock, we're
+ * re-initializing the user space client with the initial mount
+ * information specified when the super block was first initialized.
+ * this is very different than the first initialization/creation of a
+ * superblock. we use the special service_priority_operation to make
+ * sure that the mount gets ahead of any other pending operation that
+ * is waiting for servicing. this means that the pvfs2-client won't
+ * fail to start several times for all other pending operations before
+ * the client regains all of the mount information from us.
+ * NOTE: this function assumes that the request_mutex is already acquired!
+ */
+int orangefs_remount(struct orangefs_sb_info_s *orangefs_sb)
+{
+ struct orangefs_kernel_op_s *new_op;
+ int ret = -EINVAL;
+
+ gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_remount: called\n");
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT);
+ if (!new_op)
+ return -ENOMEM;
+ strncpy(new_op->upcall.req.fs_mount.orangefs_config_server,
+ orangefs_sb->devname,
+ ORANGEFS_MAX_SERVER_ADDR_LEN);
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "Attempting ORANGEFS Remount via host %s\n",
+ new_op->upcall.req.fs_mount.orangefs_config_server);
+
+ /*
+ * we assume that the calling function has already acquired the
+ * request_mutex to prevent other operations from bypassing
+ * this one
+ */
+ ret = service_operation(new_op, "orangefs_remount",
+ ORANGEFS_OP_PRIORITY | ORANGEFS_OP_NO_MUTEX);
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "orangefs_remount: mount got return value of %d\n",
+ ret);
+ if (ret == 0) {
+ /*
+ * store the id assigned to this sb -- it's just a
+ * short-lived mapping that the system interface uses
+ * to map this superblock to a particular mount entry
+ */
+ orangefs_sb->id = new_op->downcall.resp.fs_mount.id;
+ orangefs_sb->mount_pending = 0;
+ }
+
+ op_release(new_op);
+ return ret;
+}
+
+int fsid_key_table_initialize(void)
+{
+ return 0;
+}
+
+void fsid_key_table_finalize(void)
+{
+}
+
+/* Called whenever the VFS dirties the inode in response to atime updates */
+static void orangefs_dirty_inode(struct inode *inode, int flags)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "orangefs_dirty_inode: %pU\n",
+ get_khandle_from_ino(inode));
+ SetAtimeFlag(orangefs_inode);
+}
+
+static const struct super_operations orangefs_s_ops = {
+ .alloc_inode = orangefs_alloc_inode,
+ .destroy_inode = orangefs_destroy_inode,
+ .dirty_inode = orangefs_dirty_inode,
+ .drop_inode = generic_delete_inode,
+ .statfs = orangefs_statfs,
+ .remount_fs = orangefs_remount_fs,
+ .show_options = generic_show_options,
+};
+
+static struct dentry *orangefs_fh_to_dentry(struct super_block *sb,
+ struct fid *fid,
+ int fh_len,
+ int fh_type)
+{
+ struct orangefs_object_kref refn;
+
+ if (fh_len < 5 || fh_type > 2)
+ return NULL;
+
+ ORANGEFS_khandle_from(&(refn.khandle), fid->raw, 16);
+ refn.fs_id = (u32) fid->raw[4];
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "fh_to_dentry: handle %pU, fs_id %d\n",
+ &refn.khandle,
+ refn.fs_id);
+
+ return d_obtain_alias(orangefs_iget(sb, &refn));
+}
+
+static int orangefs_encode_fh(struct inode *inode,
+ __u32 *fh,
+ int *max_len,
+ struct inode *parent)
+{
+ int len = parent ? 10 : 5;
+ int type = 1;
+ struct orangefs_object_kref refn;
+
+ if (*max_len < len) {
+ gossip_lerr("fh buffer is too small for encoding\n");
+ *max_len = len;
+ type = 255;
+ goto out;
+ }
+
+ refn = ORANGEFS_I(inode)->refn;
+ ORANGEFS_khandle_to(&refn.khandle, fh, 16);
+ fh[4] = refn.fs_id;
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "Encoding fh: handle %pU, fsid %u\n",
+ &refn.khandle,
+ refn.fs_id);
+
+
+ if (parent) {
+ refn = ORANGEFS_I(parent)->refn;
+ ORANGEFS_khandle_to(&refn.khandle, (char *) fh + 20, 16);
+ fh[9] = refn.fs_id;
+
+ type = 2;
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "Encoding parent: handle %pU, fsid %u\n",
+ &refn.khandle,
+ refn.fs_id);
+ }
+ *max_len = len;
+
+out:
+ return type;
+}
+
+static const struct export_operations orangefs_export_ops = {
+ .encode_fh = orangefs_encode_fh,
+ .fh_to_dentry = orangefs_fh_to_dentry,
+};
+
+static int orangefs_fill_sb(struct super_block *sb,
+ struct orangefs_fs_mount_response *fs_mount,
+ void *data, int silent)
+{
+ int ret = -EINVAL;
+ struct inode *root = NULL;
+ struct dentry *root_dentry = NULL;
+ struct orangefs_object_kref root_object;
+
+ /* alloc and init our private orangefs sb info */
+ sb->s_fs_info = kzalloc(sizeof(struct orangefs_sb_info_s), GFP_KERNEL);
+ if (!ORANGEFS_SB(sb))
+ return -ENOMEM;
+ ORANGEFS_SB(sb)->sb = sb;
+
+ ORANGEFS_SB(sb)->root_khandle = fs_mount->root_khandle;
+ ORANGEFS_SB(sb)->fs_id = fs_mount->fs_id;
+ ORANGEFS_SB(sb)->id = fs_mount->id;
+
+ if (data) {
+ ret = parse_mount_options(sb, data, silent);
+ if (ret)
+ return ret;
+ }
+
+ /* Hang the xattr handlers off the superblock */
+ sb->s_xattr = orangefs_xattr_handlers;
+ sb->s_magic = ORANGEFS_SUPER_MAGIC;
+ sb->s_op = &orangefs_s_ops;
+ sb->s_d_op = &orangefs_dentry_operations;
+
+ sb->s_blocksize = orangefs_bufmap_size_query();
+ sb->s_blocksize_bits = orangefs_bufmap_shift_query();
+ sb->s_maxbytes = MAX_LFS_FILESIZE;
+
+ root_object.khandle = ORANGEFS_SB(sb)->root_khandle;
+ root_object.fs_id = ORANGEFS_SB(sb)->fs_id;
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "get inode %pU, fsid %d\n",
+ &root_object.khandle,
+ root_object.fs_id);
+
+ root = orangefs_iget(sb, &root_object);
+ if (IS_ERR(root))
+ return PTR_ERR(root);
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "Allocated root inode [%p] with mode %x\n",
+ root,
+ root->i_mode);
+
+ /* allocates and places root dentry in dcache */
+ root_dentry = d_make_root(root);
+ if (!root_dentry)
+ return -ENOMEM;
+
+ sb->s_export_op = &orangefs_export_ops;
+ sb->s_root = root_dentry;
+ return 0;
+}
+
+struct dentry *orangefs_mount(struct file_system_type *fst,
+ int flags,
+ const char *devname,
+ void *data)
+{
+ int ret = -EINVAL;
+ struct super_block *sb = ERR_PTR(-EINVAL);
+ struct orangefs_kernel_op_s *new_op;
+ struct dentry *d = ERR_PTR(-EINVAL);
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "orangefs_mount: called with devname %s\n",
+ devname);
+
+ if (!devname) {
+ gossip_err("ERROR: device name not specified.\n");
+ return ERR_PTR(-EINVAL);
+ }
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_FS_MOUNT);
+ if (!new_op)
+ return ERR_PTR(-ENOMEM);
+
+ strncpy(new_op->upcall.req.fs_mount.orangefs_config_server,
+ devname,
+ ORANGEFS_MAX_SERVER_ADDR_LEN);
+
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "Attempting ORANGEFS Mount via host %s\n",
+ new_op->upcall.req.fs_mount.orangefs_config_server);
+
+ ret = service_operation(new_op, "orangefs_mount", 0);
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "orangefs_mount: mount got return value of %d\n", ret);
+ if (ret)
+ goto free_op;
+
+ if (new_op->downcall.resp.fs_mount.fs_id == ORANGEFS_FS_ID_NULL) {
+ gossip_err("ERROR: Retrieved null fs_id\n");
+ ret = -EINVAL;
+ goto free_op;
+ }
+
+ sb = sget(fst, NULL, set_anon_super, flags, NULL);
+
+ if (IS_ERR(sb)) {
+ d = ERR_CAST(sb);
+ goto free_op;
+ }
+
+ ret = orangefs_fill_sb(sb,
+ &new_op->downcall.resp.fs_mount, data,
+ flags & MS_SILENT ? 1 : 0);
+
+ if (ret) {
+ d = ERR_PTR(ret);
+ goto free_op;
+ }
+
+ /*
+ * on successful mount, store the devname and data
+ * used
+ */
+ strncpy(ORANGEFS_SB(sb)->devname,
+ devname,
+ ORANGEFS_MAX_SERVER_ADDR_LEN);
+
+ /* mount_pending must be cleared */
+ ORANGEFS_SB(sb)->mount_pending = 0;
+
+ /*
+ * finally, add this sb to our list of known orangefs
+ * sb's
+ */
+ gossip_debug(GOSSIP_SUPER_DEBUG,
+ "Adding SB %p to orangefs superblocks\n",
+ ORANGEFS_SB(sb));
+ spin_lock(&orangefs_superblocks_lock);
+ list_add_tail(&ORANGEFS_SB(sb)->list, &orangefs_superblocks);
+ spin_unlock(&orangefs_superblocks_lock);
+ op_release(new_op);
+ return dget(sb->s_root);
+
+free_op:
+ gossip_err("orangefs_mount: mount request failed with %d\n", ret);
+ if (ret == -EINVAL) {
+ gossip_err("Ensure that all orangefs-servers have the same FS configuration files\n");
+ gossip_err("Look at pvfs2-client-core log file (typically /tmp/pvfs2-client.log) for more details\n");
+ }
+
+ op_release(new_op);
+
+ return d;
+}
+
+void orangefs_kill_sb(struct super_block *sb)
+{
+ gossip_debug(GOSSIP_SUPER_DEBUG, "orangefs_kill_sb: called\n");
+
+ /* provided sb cleanup */
+ kill_anon_super(sb);
+
+ /*
+ * issue the unmount to userspace to tell it to remove the
+ * dynamic mount info it has for this superblock
+ */
+ orangefs_unmount_sb(sb);
+
+ /* remove the sb from our list of orangefs specific sb's */
+
+ spin_lock(&orangefs_superblocks_lock);
+ __list_del_entry(&ORANGEFS_SB(sb)->list); /* not list_del_init */
+ ORANGEFS_SB(sb)->list.prev = NULL;
+ spin_unlock(&orangefs_superblocks_lock);
+
+ /*
+ * make sure that ORANGEFS_DEV_REMOUNT_ALL loop that might've seen us
+ * gets completed before we free the dang thing.
+ */
+ mutex_lock(&request_mutex);
+ mutex_unlock(&request_mutex);
+
+ /* free the orangefs superblock private data */
+ kfree(ORANGEFS_SB(sb));
+}
+
+int orangefs_inode_cache_initialize(void)
+{
+ orangefs_inode_cache = kmem_cache_create("orangefs_inode_cache",
+ sizeof(struct orangefs_inode_s),
+ 0,
+ ORANGEFS_CACHE_CREATE_FLAGS,
+ orangefs_inode_cache_ctor);
+
+ if (!orangefs_inode_cache) {
+ gossip_err("Cannot create orangefs_inode_cache\n");
+ return -ENOMEM;
+ }
+ return 0;
+}
+
+int orangefs_inode_cache_finalize(void)
+{
+ kmem_cache_destroy(orangefs_inode_cache);
+ return 0;
+}
diff --git a/fs/orangefs/symlink.c b/fs/orangefs/symlink.c
new file mode 100644
index 000000000..6418dd638
--- /dev/null
+++ b/fs/orangefs/symlink.c
@@ -0,0 +1,19 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+struct inode_operations orangefs_symlink_inode_operations = {
+ .readlink = generic_readlink,
+ .get_link = simple_get_link,
+ .setattr = orangefs_setattr,
+ .getattr = orangefs_getattr,
+ .listxattr = orangefs_listxattr,
+ .setxattr = generic_setxattr,
+ .permission = orangefs_permission,
+};
diff --git a/fs/orangefs/upcall.h b/fs/orangefs/upcall.h
new file mode 100644
index 000000000..001b20239
--- /dev/null
+++ b/fs/orangefs/upcall.h
@@ -0,0 +1,246 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+#ifndef __UPCALL_H
+#define __UPCALL_H
+
+/*
+ * Sanitized this header file to fix
+ * 32-64 bit interaction issues between
+ * client-core and device
+ */
+struct orangefs_io_request_s {
+ __s32 __pad1;
+ __s32 buf_index;
+ __s32 count;
+ __s32 __pad2;
+ __s64 offset;
+ struct orangefs_object_kref refn;
+ enum ORANGEFS_io_type io_type;
+ __s32 readahead_size;
+};
+
+struct orangefs_lookup_request_s {
+ __s32 sym_follow;
+ __s32 __pad1;
+ struct orangefs_object_kref parent_refn;
+ char d_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_create_request_s {
+ struct orangefs_object_kref parent_refn;
+ struct ORANGEFS_sys_attr_s attributes;
+ char d_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_symlink_request_s {
+ struct orangefs_object_kref parent_refn;
+ struct ORANGEFS_sys_attr_s attributes;
+ char entry_name[ORANGEFS_NAME_MAX];
+ char target[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_getattr_request_s {
+ struct orangefs_object_kref refn;
+ __u32 mask;
+ __u32 __pad1;
+};
+
+struct orangefs_setattr_request_s {
+ struct orangefs_object_kref refn;
+ struct ORANGEFS_sys_attr_s attributes;
+};
+
+struct orangefs_remove_request_s {
+ struct orangefs_object_kref parent_refn;
+ char d_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_mkdir_request_s {
+ struct orangefs_object_kref parent_refn;
+ struct ORANGEFS_sys_attr_s attributes;
+ char d_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_readdir_request_s {
+ struct orangefs_object_kref refn;
+ __u64 token;
+ __s32 max_dirent_count;
+ __s32 buf_index;
+};
+
+struct orangefs_readdirplus_request_s {
+ struct orangefs_object_kref refn;
+ __u64 token;
+ __s32 max_dirent_count;
+ __u32 mask;
+ __s32 buf_index;
+ __s32 __pad1;
+};
+
+struct orangefs_rename_request_s {
+ struct orangefs_object_kref old_parent_refn;
+ struct orangefs_object_kref new_parent_refn;
+ char d_old_name[ORANGEFS_NAME_MAX];
+ char d_new_name[ORANGEFS_NAME_MAX];
+};
+
+struct orangefs_statfs_request_s {
+ __s32 fs_id;
+ __s32 __pad1;
+};
+
+struct orangefs_truncate_request_s {
+ struct orangefs_object_kref refn;
+ __s64 size;
+};
+
+struct orangefs_mmap_ra_cache_flush_request_s {
+ struct orangefs_object_kref refn;
+};
+
+struct orangefs_fs_mount_request_s {
+ char orangefs_config_server[ORANGEFS_MAX_SERVER_ADDR_LEN];
+};
+
+struct orangefs_fs_umount_request_s {
+ __s32 id;
+ __s32 fs_id;
+ char orangefs_config_server[ORANGEFS_MAX_SERVER_ADDR_LEN];
+};
+
+struct orangefs_getxattr_request_s {
+ struct orangefs_object_kref refn;
+ __s32 key_sz;
+ __s32 __pad1;
+ char key[ORANGEFS_MAX_XATTR_NAMELEN];
+};
+
+struct orangefs_setxattr_request_s {
+ struct orangefs_object_kref refn;
+ struct ORANGEFS_keyval_pair keyval;
+ __s32 flags;
+ __s32 __pad1;
+};
+
+struct orangefs_listxattr_request_s {
+ struct orangefs_object_kref refn;
+ __s32 requested_count;
+ __s32 __pad1;
+ __u64 token;
+};
+
+struct orangefs_removexattr_request_s {
+ struct orangefs_object_kref refn;
+ __s32 key_sz;
+ __s32 __pad1;
+ char key[ORANGEFS_MAX_XATTR_NAMELEN];
+};
+
+struct orangefs_op_cancel_s {
+ __u64 op_tag;
+};
+
+struct orangefs_fsync_request_s {
+ struct orangefs_object_kref refn;
+};
+
+enum orangefs_param_request_type {
+ ORANGEFS_PARAM_REQUEST_SET = 1,
+ ORANGEFS_PARAM_REQUEST_GET = 2
+};
+
+enum orangefs_param_request_op {
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_TIMEOUT_MSECS = 1,
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_HARD_LIMIT = 2,
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_SOFT_LIMIT = 3,
+ ORANGEFS_PARAM_REQUEST_OP_ACACHE_RECLAIM_PERCENTAGE = 4,
+ ORANGEFS_PARAM_REQUEST_OP_PERF_TIME_INTERVAL_SECS = 5,
+ ORANGEFS_PARAM_REQUEST_OP_PERF_HISTORY_SIZE = 6,
+ ORANGEFS_PARAM_REQUEST_OP_PERF_RESET = 7,
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_TIMEOUT_MSECS = 8,
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_HARD_LIMIT = 9,
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_SOFT_LIMIT = 10,
+ ORANGEFS_PARAM_REQUEST_OP_NCACHE_RECLAIM_PERCENTAGE = 11,
+ ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_TIMEOUT_MSECS = 12,
+ ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_HARD_LIMIT = 13,
+ ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_SOFT_LIMIT = 14,
+ ORANGEFS_PARAM_REQUEST_OP_STATIC_ACACHE_RECLAIM_PERCENTAGE = 15,
+ ORANGEFS_PARAM_REQUEST_OP_CLIENT_DEBUG = 16,
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_TIMEOUT_SECS = 17,
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_HARD_LIMIT = 18,
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_SOFT_LIMIT = 19,
+ ORANGEFS_PARAM_REQUEST_OP_CCACHE_RECLAIM_PERCENTAGE = 20,
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_TIMEOUT_SECS = 21,
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_HARD_LIMIT = 22,
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_SOFT_LIMIT = 23,
+ ORANGEFS_PARAM_REQUEST_OP_CAPCACHE_RECLAIM_PERCENTAGE = 24,
+ ORANGEFS_PARAM_REQUEST_OP_TWO_MASK_VALUES = 25,
+};
+
+struct orangefs_param_request_s {
+ enum orangefs_param_request_type type;
+ enum orangefs_param_request_op op;
+ __s64 value;
+ char s_value[ORANGEFS_MAX_DEBUG_STRING_LEN];
+};
+
+enum orangefs_perf_count_request_type {
+ ORANGEFS_PERF_COUNT_REQUEST_ACACHE = 1,
+ ORANGEFS_PERF_COUNT_REQUEST_NCACHE = 2,
+ ORANGEFS_PERF_COUNT_REQUEST_CAPCACHE = 3,
+};
+
+struct orangefs_perf_count_request_s {
+ enum orangefs_perf_count_request_type type;
+ __s32 __pad1;
+};
+
+struct orangefs_fs_key_request_s {
+ __s32 fsid;
+ __s32 __pad1;
+};
+
+struct orangefs_upcall_s {
+ __s32 type;
+ __u32 uid;
+ __u32 gid;
+ int pid;
+ int tgid;
+ /* Trailers unused but must be retained for protocol compatibility. */
+ __s64 trailer_size;
+ char *trailer_buf;
+
+ union {
+ struct orangefs_io_request_s io;
+ struct orangefs_lookup_request_s lookup;
+ struct orangefs_create_request_s create;
+ struct orangefs_symlink_request_s sym;
+ struct orangefs_getattr_request_s getattr;
+ struct orangefs_setattr_request_s setattr;
+ struct orangefs_remove_request_s remove;
+ struct orangefs_mkdir_request_s mkdir;
+ struct orangefs_readdir_request_s readdir;
+ struct orangefs_readdirplus_request_s readdirplus;
+ struct orangefs_rename_request_s rename;
+ struct orangefs_statfs_request_s statfs;
+ struct orangefs_truncate_request_s truncate;
+ struct orangefs_mmap_ra_cache_flush_request_s ra_cache_flush;
+ struct orangefs_fs_mount_request_s fs_mount;
+ struct orangefs_fs_umount_request_s fs_umount;
+ struct orangefs_getxattr_request_s getxattr;
+ struct orangefs_setxattr_request_s setxattr;
+ struct orangefs_listxattr_request_s listxattr;
+ struct orangefs_removexattr_request_s removexattr;
+ struct orangefs_op_cancel_s cancel;
+ struct orangefs_fsync_request_s fsync;
+ struct orangefs_param_request_s param;
+ struct orangefs_perf_count_request_s perf_count;
+ struct orangefs_fs_key_request_s fs_key;
+ } req;
+};
+
+#endif /* __UPCALL_H */
diff --git a/fs/orangefs/waitqueue.c b/fs/orangefs/waitqueue.c
new file mode 100644
index 000000000..31635bc30
--- /dev/null
+++ b/fs/orangefs/waitqueue.c
@@ -0,0 +1,357 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ * (C) 2011 Omnibond Systems
+ *
+ * Changes by Acxiom Corporation to implement generic service_operation()
+ * function, Copyright Acxiom Corporation, 2005.
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * In-kernel waitqueue operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+
+static int wait_for_matching_downcall(struct orangefs_kernel_op_s *, long, bool);
+static void orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *);
+
+/*
+ * What we do in this function is to walk the list of operations that are
+ * present in the request queue and mark them as purged.
+ * NOTE: This is called from the device close after client-core has
+ * guaranteed that no new operations could appear on the list since the
+ * client-core is anyway going to exit.
+ */
+void purge_waiting_ops(void)
+{
+ struct orangefs_kernel_op_s *op;
+
+ spin_lock(&orangefs_request_list_lock);
+ list_for_each_entry(op, &orangefs_request_list, list) {
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "pvfs2-client-core: purging op tag %llu %s\n",
+ llu(op->tag),
+ get_opname_string(op));
+ set_op_state_purged(op);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: op:%s: op_state:%d: process:%s:\n",
+ __func__,
+ get_opname_string(op),
+ op->op_state,
+ current->comm);
+ }
+ spin_unlock(&orangefs_request_list_lock);
+}
+
+/*
+ * submits a ORANGEFS operation and waits for it to complete
+ *
+ * Note op->downcall.status will contain the status of the operation (in
+ * errno format), whether provided by pvfs2-client or a result of failure to
+ * service the operation. If the caller wishes to distinguish, then
+ * op->state can be checked to see if it was serviced or not.
+ *
+ * Returns contents of op->downcall.status for convenience
+ */
+int service_operation(struct orangefs_kernel_op_s *op,
+ const char *op_name,
+ int flags)
+{
+ long timeout = MAX_SCHEDULE_TIMEOUT;
+ int ret = 0;
+
+ DEFINE_WAIT(wait_entry);
+
+ op->upcall.tgid = current->tgid;
+ op->upcall.pid = current->pid;
+
+retry_servicing:
+ op->downcall.status = 0;
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "%s: %s op:%p: process:%s: pid:%d:\n",
+ __func__,
+ op_name,
+ op,
+ current->comm,
+ current->pid);
+
+ /*
+ * If ORANGEFS_OP_NO_MUTEX was set in flags, we need to avoid
+ * acquiring the request_mutex because we're servicing a
+ * high priority remount operation and the request_mutex is
+ * already taken.
+ */
+ if (!(flags & ORANGEFS_OP_NO_MUTEX)) {
+ if (flags & ORANGEFS_OP_INTERRUPTIBLE)
+ ret = mutex_lock_interruptible(&request_mutex);
+ else
+ ret = mutex_lock_killable(&request_mutex);
+ /*
+ * check to see if we were interrupted while waiting for
+ * mutex
+ */
+ if (ret < 0) {
+ op->downcall.status = ret;
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "%s: service_operation interrupted.\n",
+ __func__);
+ return ret;
+ }
+ }
+
+ /* queue up the operation */
+ spin_lock(&orangefs_request_list_lock);
+ spin_lock(&op->lock);
+ set_op_state_waiting(op);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: op:%s: op_state:%d: process:%s:\n",
+ __func__,
+ get_opname_string(op),
+ op->op_state,
+ current->comm);
+ /* add high priority remount op to the front of the line. */
+ if (flags & ORANGEFS_OP_PRIORITY)
+ list_add(&op->list, &orangefs_request_list);
+ else
+ list_add_tail(&op->list, &orangefs_request_list);
+ spin_unlock(&op->lock);
+ wake_up_interruptible(&orangefs_request_list_waitq);
+ if (!__is_daemon_in_service()) {
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "%s:client core is NOT in service.\n",
+ __func__);
+ timeout = op_timeout_secs * HZ;
+ }
+ spin_unlock(&orangefs_request_list_lock);
+
+ if (!(flags & ORANGEFS_OP_NO_MUTEX))
+ mutex_unlock(&request_mutex);
+
+ ret = wait_for_matching_downcall(op, timeout,
+ flags & ORANGEFS_OP_INTERRUPTIBLE);
+
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "%s: wait_for_matching_downcall returned %d for %p\n",
+ __func__,
+ ret,
+ op);
+
+ /* got matching downcall; make sure status is in errno format */
+ if (!ret) {
+ spin_unlock(&op->lock);
+ op->downcall.status =
+ orangefs_normalize_to_errno(op->downcall.status);
+ ret = op->downcall.status;
+ goto out;
+ }
+
+ /* failed to get matching downcall */
+ if (ret == -ETIMEDOUT) {
+ gossip_err("%s: %s -- wait timed out; aborting attempt.\n",
+ __func__,
+ op_name);
+ }
+
+ /*
+ * remove a waiting op from the request list or
+ * remove an in-progress op from the in-progress list.
+ */
+ orangefs_clean_up_interrupted_operation(op);
+
+ op->downcall.status = ret;
+ /* retry if operation has not been serviced and if requested */
+ if (ret == -EAGAIN) {
+ op->attempts++;
+ timeout = op_timeout_secs * HZ;
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "orangefs: tag %llu (%s)"
+ " -- operation to be retried (%d attempt)\n",
+ llu(op->tag),
+ op_name,
+ op->attempts);
+
+ /*
+ * io ops (ops that use the shared memory buffer) have
+ * to be returned to their caller for a retry. Other ops
+ * can just be recycled here.
+ */
+ if (!op->uses_shared_memory)
+ goto retry_servicing;
+ }
+
+out:
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "%s: %s returning: %d for %p.\n",
+ __func__,
+ op_name,
+ ret,
+ op);
+ return ret;
+}
+
+/* This can get called on an I/O op if it had a bad service_operation. */
+bool orangefs_cancel_op_in_progress(struct orangefs_kernel_op_s *op)
+{
+ u64 tag = op->tag;
+ if (!op_state_in_progress(op))
+ return false;
+
+ op->slot_to_free = op->upcall.req.io.buf_index;
+ memset(&op->upcall, 0, sizeof(op->upcall));
+ memset(&op->downcall, 0, sizeof(op->downcall));
+ op->upcall.type = ORANGEFS_VFS_OP_CANCEL;
+ op->upcall.req.cancel.op_tag = tag;
+ op->downcall.type = ORANGEFS_VFS_OP_INVALID;
+ op->downcall.status = -1;
+ orangefs_new_tag(op);
+
+ spin_lock(&orangefs_request_list_lock);
+ /* orangefs_request_list_lock is enough of a barrier here */
+ if (!__is_daemon_in_service()) {
+ spin_unlock(&orangefs_request_list_lock);
+ return false;
+ }
+ spin_lock(&op->lock);
+ set_op_state_waiting(op);
+ gossip_debug(GOSSIP_DEV_DEBUG,
+ "%s: op:%s: op_state:%d: process:%s:\n",
+ __func__,
+ get_opname_string(op),
+ op->op_state,
+ current->comm);
+ list_add(&op->list, &orangefs_request_list);
+ spin_unlock(&op->lock);
+ spin_unlock(&orangefs_request_list_lock);
+
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "Attempting ORANGEFS operation cancellation of tag %llu\n",
+ llu(tag));
+ return true;
+}
+
+/*
+ * Change an op to the "given up" state and remove it from its list.
+ */
+static void
+ orangefs_clean_up_interrupted_operation(struct orangefs_kernel_op_s *op)
+{
+ /*
+ * handle interrupted cases depending on what state we were in when
+ * the interruption is detected.
+ *
+ * Called with op->lock held.
+ */
+
+ /*
+ * List manipulation code elsewhere will ignore ops that
+ * have been given up upon.
+ */
+ op->op_state |= OP_VFS_STATE_GIVEN_UP;
+
+ if (list_empty(&op->list)) {
+ /* caught copying to/from daemon */
+ BUG_ON(op_state_serviced(op));
+ spin_unlock(&op->lock);
+ wait_for_completion(&op->waitq);
+ } else if (op_state_waiting(op)) {
+ /*
+ * upcall hasn't been read; remove op from upcall request
+ * list.
+ */
+ spin_unlock(&op->lock);
+ spin_lock(&orangefs_request_list_lock);
+ list_del_init(&op->list);
+ spin_unlock(&orangefs_request_list_lock);
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "Interrupted: Removed op %p from request_list\n",
+ op);
+ } else if (op_state_in_progress(op)) {
+ /* op must be removed from the in progress htable */
+ spin_unlock(&op->lock);
+ spin_lock(&htable_ops_in_progress_lock);
+ list_del_init(&op->list);
+ spin_unlock(&htable_ops_in_progress_lock);
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "Interrupted: Removed op %p"
+ " from htable_ops_in_progress\n",
+ op);
+ } else {
+ spin_unlock(&op->lock);
+ gossip_err("interrupted operation is in a weird state 0x%x\n",
+ op->op_state);
+ }
+ reinit_completion(&op->waitq);
+}
+
+/*
+ * Sleeps on waitqueue waiting for matching downcall.
+ * If client-core finishes servicing, then we are good to go.
+ * else if client-core exits, we get woken up here, and retry with a timeout
+ *
+ * When this call returns to the caller, the specified op will no
+ * longer be in either the in_progress hash table or on the request list.
+ *
+ * Returns 0 on success and -errno on failure
+ * Errors are:
+ * EAGAIN in case we want the caller to requeue and try again..
+ * EINTR/EIO/ETIMEDOUT indicating we are done trying to service this
+ * operation since client-core seems to be exiting too often
+ * or if we were interrupted.
+ *
+ * Returns with op->lock taken.
+ */
+static int wait_for_matching_downcall(struct orangefs_kernel_op_s *op,
+ long timeout,
+ bool interruptible)
+{
+ long n;
+
+ /*
+ * There's a "schedule_timeout" inside of these wait
+ * primitives, during which the op is out of the hands of the
+ * user process that needs something done and is being
+ * manipulated by the client-core process.
+ */
+ if (interruptible)
+ n = wait_for_completion_interruptible_timeout(&op->waitq,
+ timeout);
+ else
+ n = wait_for_completion_killable_timeout(&op->waitq, timeout);
+
+ spin_lock(&op->lock);
+
+ if (op_state_serviced(op))
+ return 0;
+
+ if (unlikely(n < 0)) {
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "%s: operation interrupted, tag %llu, %p\n",
+ __func__,
+ llu(op->tag),
+ op);
+ return -EINTR;
+ }
+ if (op_state_purged(op)) {
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "%s: operation purged, tag %llu, %p, %d\n",
+ __func__,
+ llu(op->tag),
+ op,
+ op->attempts);
+ return (op->attempts < ORANGEFS_PURGE_RETRY_COUNT) ?
+ -EAGAIN :
+ -EIO;
+ }
+ /* must have timed out, then... */
+ gossip_debug(GOSSIP_WAIT_DEBUG,
+ "%s: operation timed out, tag %llu, %p, %d)\n",
+ __func__,
+ llu(op->tag),
+ op,
+ op->attempts);
+ return -ETIMEDOUT;
+}
diff --git a/fs/orangefs/xattr.c b/fs/orangefs/xattr.c
new file mode 100644
index 000000000..63a6280d8
--- /dev/null
+++ b/fs/orangefs/xattr.c
@@ -0,0 +1,530 @@
+/*
+ * (C) 2001 Clemson University and The University of Chicago
+ *
+ * See COPYING in top-level directory.
+ */
+
+/*
+ * Linux VFS extended attribute operations.
+ */
+
+#include "protocol.h"
+#include "orangefs-kernel.h"
+#include "orangefs-bufmap.h"
+#include <linux/posix_acl_xattr.h>
+#include <linux/xattr.h>
+
+
+#define SYSTEM_ORANGEFS_KEY "system.pvfs2."
+#define SYSTEM_ORANGEFS_KEY_LEN 13
+
+/*
+ * this function returns
+ * 0 if the key corresponding to name is not meant to be printed as part
+ * of a listxattr.
+ * 1 if the key corresponding to name is meant to be returned as part of
+ * a listxattr.
+ * The ones that start SYSTEM_ORANGEFS_KEY are the ones to avoid printing.
+ */
+static int is_reserved_key(const char *key, size_t size)
+{
+
+ if (size < SYSTEM_ORANGEFS_KEY_LEN)
+ return 1;
+
+ return strncmp(key, SYSTEM_ORANGEFS_KEY, SYSTEM_ORANGEFS_KEY_LEN) ? 1 : 0;
+}
+
+static inline int convert_to_internal_xattr_flags(int setxattr_flags)
+{
+ int internal_flag = 0;
+
+ if (setxattr_flags & XATTR_REPLACE) {
+ /* Attribute must exist! */
+ internal_flag = ORANGEFS_XATTR_REPLACE;
+ } else if (setxattr_flags & XATTR_CREATE) {
+ /* Attribute must not exist */
+ internal_flag = ORANGEFS_XATTR_CREATE;
+ }
+ return internal_flag;
+}
+
+
+/*
+ * Tries to get a specified key's attributes of a given
+ * file into a user-specified buffer. Note that the getxattr
+ * interface allows for the users to probe the size of an
+ * extended attribute by passing in a value of 0 to size.
+ * Thus our return value is always the size of the attribute
+ * unless the key does not exist for the file and/or if
+ * there were errors in fetching the attribute value.
+ */
+ssize_t orangefs_inode_getxattr(struct inode *inode, const char *prefix,
+ const char *name, void *buffer, size_t size)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_kernel_op_s *new_op = NULL;
+ ssize_t ret = -ENOMEM;
+ ssize_t length = 0;
+ int fsuid;
+ int fsgid;
+
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "%s: prefix %s name %s, buffer_size %zd\n",
+ __func__, prefix, name, size);
+
+ if ((strlen(name) + strlen(prefix)) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+ gossip_err("Invalid key length (%d)\n",
+ (int)(strlen(name) + strlen(prefix)));
+ return -EINVAL;
+ }
+
+ fsuid = from_kuid(current_user_ns(), current_fsuid());
+ fsgid = from_kgid(current_user_ns(), current_fsgid());
+
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "getxattr on inode %pU, name %s "
+ "(uid %o, gid %o)\n",
+ get_khandle_from_ino(inode),
+ name,
+ fsuid,
+ fsgid);
+
+ down_read(&orangefs_inode->xattr_sem);
+
+ new_op = op_alloc(ORANGEFS_VFS_OP_GETXATTR);
+ if (!new_op)
+ goto out_unlock;
+
+ new_op->upcall.req.getxattr.refn = orangefs_inode->refn;
+ ret = snprintf((char *)new_op->upcall.req.getxattr.key,
+ ORANGEFS_MAX_XATTR_NAMELEN, "%s%s", prefix, name);
+
+ /*
+ * NOTE: Although keys are meant to be NULL terminated textual
+ * strings, I am going to explicitly pass the length just in case
+ * we change this later on...
+ */
+ new_op->upcall.req.getxattr.key_sz = ret + 1;
+
+ ret = service_operation(new_op, "orangefs_inode_getxattr",
+ get_interruptible_flag(inode));
+ if (ret != 0) {
+ if (ret == -ENOENT) {
+ ret = -ENODATA;
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "orangefs_inode_getxattr: inode %pU key %s"
+ " does not exist!\n",
+ get_khandle_from_ino(inode),
+ (char *)new_op->upcall.req.getxattr.key);
+ }
+ goto out_release_op;
+ }
+
+ /*
+ * Length returned includes null terminator.
+ */
+ length = new_op->downcall.resp.getxattr.val_sz;
+
+ /*
+ * Just return the length of the queried attribute.
+ */
+ if (size == 0) {
+ ret = length;
+ goto out_release_op;
+ }
+
+ /*
+ * Check to see if key length is > provided buffer size.
+ */
+ if (length > size) {
+ ret = -ERANGE;
+ goto out_release_op;
+ }
+
+ memcpy(buffer, new_op->downcall.resp.getxattr.val, length);
+ memset(buffer + length, 0, size - length);
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "orangefs_inode_getxattr: inode %pU "
+ "key %s key_sz %d, val_len %d\n",
+ get_khandle_from_ino(inode),
+ (char *)new_op->
+ upcall.req.getxattr.key,
+ (int)new_op->
+ upcall.req.getxattr.key_sz,
+ (int)ret);
+
+ ret = length;
+
+out_release_op:
+ op_release(new_op);
+out_unlock:
+ up_read(&orangefs_inode->xattr_sem);
+ return ret;
+}
+
+static int orangefs_inode_removexattr(struct inode *inode,
+ const char *prefix,
+ const char *name,
+ int flags)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_kernel_op_s *new_op = NULL;
+ int ret = -ENOMEM;
+
+ down_write(&orangefs_inode->xattr_sem);
+ new_op = op_alloc(ORANGEFS_VFS_OP_REMOVEXATTR);
+ if (!new_op)
+ goto out_unlock;
+
+ new_op->upcall.req.removexattr.refn = orangefs_inode->refn;
+ /*
+ * NOTE: Although keys are meant to be NULL terminated
+ * textual strings, I am going to explicitly pass the
+ * length just in case we change this later on...
+ */
+ ret = snprintf((char *)new_op->upcall.req.removexattr.key,
+ ORANGEFS_MAX_XATTR_NAMELEN,
+ "%s%s",
+ (prefix ? prefix : ""),
+ name);
+ new_op->upcall.req.removexattr.key_sz = ret + 1;
+
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "orangefs_inode_removexattr: key %s, key_sz %d\n",
+ (char *)new_op->upcall.req.removexattr.key,
+ (int)new_op->upcall.req.removexattr.key_sz);
+
+ ret = service_operation(new_op,
+ "orangefs_inode_removexattr",
+ get_interruptible_flag(inode));
+ if (ret == -ENOENT) {
+ /*
+ * Request to replace a non-existent attribute is an error.
+ */
+ if (flags & XATTR_REPLACE)
+ ret = -ENODATA;
+ else
+ ret = 0;
+ }
+
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "orangefs_inode_removexattr: returning %d\n", ret);
+
+ op_release(new_op);
+out_unlock:
+ up_write(&orangefs_inode->xattr_sem);
+ return ret;
+}
+
+/*
+ * Tries to set an attribute for a given key on a file.
+ *
+ * Returns a -ve number on error and 0 on success. Key is text, but value
+ * can be binary!
+ */
+int orangefs_inode_setxattr(struct inode *inode, const char *prefix,
+ const char *name, const void *value, size_t size, int flags)
+{
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_kernel_op_s *new_op;
+ int internal_flag = 0;
+ int ret = -ENOMEM;
+
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "%s: prefix %s, name %s, buffer_size %zd\n",
+ __func__, prefix, name, size);
+
+ if (size >= ORANGEFS_MAX_XATTR_VALUELEN ||
+ flags < 0) {
+ gossip_err("orangefs_inode_setxattr: bogus values of size(%d), flags(%d)\n",
+ (int)size,
+ flags);
+ return -EINVAL;
+ }
+
+ internal_flag = convert_to_internal_xattr_flags(flags);
+
+ if (prefix) {
+ if (strlen(name) + strlen(prefix) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+ gossip_err
+ ("orangefs_inode_setxattr: bogus key size (%d)\n",
+ (int)(strlen(name) + strlen(prefix)));
+ return -EINVAL;
+ }
+ } else {
+ if (strlen(name) >= ORANGEFS_MAX_XATTR_NAMELEN) {
+ gossip_err
+ ("orangefs_inode_setxattr: bogus key size (%d)\n",
+ (int)(strlen(name)));
+ return -EINVAL;
+ }
+ }
+
+ /* This is equivalent to a removexattr */
+ if (size == 0 && value == NULL) {
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "removing xattr (%s%s)\n",
+ prefix,
+ name);
+ return orangefs_inode_removexattr(inode, prefix, name, flags);
+ }
+
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "setxattr on inode %pU, name %s\n",
+ get_khandle_from_ino(inode),
+ name);
+
+ down_write(&orangefs_inode->xattr_sem);
+ new_op = op_alloc(ORANGEFS_VFS_OP_SETXATTR);
+ if (!new_op)
+ goto out_unlock;
+
+
+ new_op->upcall.req.setxattr.refn = orangefs_inode->refn;
+ new_op->upcall.req.setxattr.flags = internal_flag;
+ /*
+ * NOTE: Although keys are meant to be NULL terminated textual
+ * strings, I am going to explicitly pass the length just in
+ * case we change this later on...
+ */
+ ret = snprintf((char *)new_op->upcall.req.setxattr.keyval.key,
+ ORANGEFS_MAX_XATTR_NAMELEN,
+ "%s%s",
+ prefix, name);
+ new_op->upcall.req.setxattr.keyval.key_sz = ret + 1;
+ memcpy(new_op->upcall.req.setxattr.keyval.val, value, size);
+ new_op->upcall.req.setxattr.keyval.val_sz = size;
+
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "orangefs_inode_setxattr: key %s, key_sz %d "
+ " value size %zd\n",
+ (char *)new_op->upcall.req.setxattr.keyval.key,
+ (int)new_op->upcall.req.setxattr.keyval.key_sz,
+ size);
+
+ ret = service_operation(new_op,
+ "orangefs_inode_setxattr",
+ get_interruptible_flag(inode));
+
+ gossip_debug(GOSSIP_XATTR_DEBUG,
+ "orangefs_inode_setxattr: returning %d\n",
+ ret);
+
+ /* when request is serviced properly, free req op struct */
+ op_release(new_op);
+out_unlock:
+ up_write(&orangefs_inode->xattr_sem);
+ return ret;
+}
+
+/*
+ * Tries to get a specified object's keys into a user-specified buffer of a
+ * given size. Note that like the previous instances of xattr routines, this
+ * also allows you to pass in a NULL pointer and 0 size to probe the size for
+ * subsequent memory allocations. Thus our return value is always the size of
+ * all the keys unless there were errors in fetching the keys!
+ */
+ssize_t orangefs_listxattr(struct dentry *dentry, char *buffer, size_t size)
+{
+ struct inode *inode = dentry->d_inode;
+ struct orangefs_inode_s *orangefs_inode = ORANGEFS_I(inode);
+ struct orangefs_kernel_op_s *new_op;
+ __u64 token = ORANGEFS_ITERATE_START;
+ ssize_t ret = -ENOMEM;
+ ssize_t total = 0;
+ int count_keys = 0;
+ int key_size;
+ int i = 0;
+ int returned_count = 0;
+
+ if (size > 0 && buffer == NULL) {
+ gossip_err("%s: bogus NULL pointers\n", __func__);
+ return -EINVAL;
+ }
+
+ down_read(&orangefs_inode->xattr_sem);
+ new_op = op_alloc(ORANGEFS_VFS_OP_LISTXATTR);
+ if (!new_op)
+ goto out_unlock;
+
+ if (buffer && size > 0)
+ memset(buffer, 0, size);
+
+try_again:
+ key_size = 0;
+ new_op->upcall.req.listxattr.refn = orangefs_inode->refn;
+ new_op->upcall.req.listxattr.token = token;
+ new_op->upcall.req.listxattr.requested_count =
+ (size == 0) ? 0 : ORANGEFS_MAX_XATTR_LISTLEN;
+ ret = service_operation(new_op, __func__,
+ get_interruptible_flag(inode));
+ if (ret != 0)
+ goto done;
+
+ if (size == 0) {
+ /*
+ * This is a bit of a big upper limit, but I did not want to
+ * spend too much time getting this correct, since users end
+ * up allocating memory rather than us...
+ */
+ total = new_op->downcall.resp.listxattr.returned_count *
+ ORANGEFS_MAX_XATTR_NAMELEN;
+ goto done;
+ }
+
+ returned_count = new_op->downcall.resp.listxattr.returned_count;
+ if (returned_count < 0 ||
+ returned_count >= ORANGEFS_MAX_XATTR_LISTLEN) {
+ gossip_err("%s: impossible value for returned_count:%d:\n",
+ __func__,
+ returned_count);
+ ret = -EIO;
+ goto done;
+ }
+
+ /*
+ * Check to see how much can be fit in the buffer. Fit only whole keys.
+ */
+ for (i = 0; i < returned_count; i++) {
+ if (new_op->downcall.resp.listxattr.lengths[i] < 0 ||
+ new_op->downcall.resp.listxattr.lengths[i] >
+ ORANGEFS_MAX_XATTR_NAMELEN) {
+ gossip_err("%s: impossible value for lengths[%d]\n",
+ __func__,
+ new_op->downcall.resp.listxattr.lengths[i]);
+ ret = -EIO;
+ goto done;
+ }
+ if (total + new_op->downcall.resp.listxattr.lengths[i] > size)
+ goto done;
+
+ /*
+ * Since many dumb programs try to setxattr() on our reserved
+ * xattrs this is a feeble attempt at defeating those by not
+ * listing them in the output of listxattr.. sigh
+ */
+ if (is_reserved_key(new_op->downcall.resp.listxattr.key +
+ key_size,
+ new_op->downcall.resp.
+ listxattr.lengths[i])) {
+ gossip_debug(GOSSIP_XATTR_DEBUG, "Copying key %d -> %s\n",
+ i, new_op->downcall.resp.listxattr.key +
+ key_size);
+ memcpy(buffer + total,
+ new_op->downcall.resp.listxattr.key + key_size,
+ new_op->downcall.resp.listxattr.lengths[i]);
+ total += new_op->downcall.resp.listxattr.lengths[i];
+ count_keys++;
+ } else {
+ gossip_debug(GOSSIP_XATTR_DEBUG, "[RESERVED] key %d -> %s\n",
+ i, new_op->downcall.resp.listxattr.key +
+ key_size);
+ }
+ key_size += new_op->downcall.resp.listxattr.lengths[i];
+ }
+
+ /*
+ * Since the buffer was large enough, we might have to continue
+ * fetching more keys!
+ */
+ token = new_op->downcall.resp.listxattr.token;
+ if (token != ORANGEFS_ITERATE_END)
+ goto try_again;
+
+done:
+ gossip_debug(GOSSIP_XATTR_DEBUG, "%s: returning %d"
+ " [size of buffer %ld] (filled in %d keys)\n",
+ __func__,
+ ret ? (int)ret : (int)total,
+ (long)size,
+ count_keys);
+ op_release(new_op);
+ if (ret == 0)
+ ret = total;
+out_unlock:
+ up_read(&orangefs_inode->xattr_sem);
+ return ret;
+}
+
+static int orangefs_xattr_set_default(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ const char *name,
+ const void *buffer,
+ size_t size,
+ int flags)
+{
+ return orangefs_inode_setxattr(dentry->d_inode,
+ ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+ name,
+ buffer,
+ size,
+ flags);
+}
+
+static int orangefs_xattr_get_default(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ const char *name,
+ void *buffer,
+ size_t size)
+{
+ return orangefs_inode_getxattr(dentry->d_inode,
+ ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+ name,
+ buffer,
+ size);
+
+}
+
+static int orangefs_xattr_set_trusted(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ const char *name,
+ const void *buffer,
+ size_t size,
+ int flags)
+{
+ return orangefs_inode_setxattr(dentry->d_inode,
+ ORANGEFS_XATTR_NAME_TRUSTED_PREFIX,
+ name,
+ buffer,
+ size,
+ flags);
+}
+
+static int orangefs_xattr_get_trusted(const struct xattr_handler *handler,
+ struct dentry *dentry,
+ const char *name,
+ void *buffer,
+ size_t size)
+{
+ return orangefs_inode_getxattr(dentry->d_inode,
+ ORANGEFS_XATTR_NAME_TRUSTED_PREFIX,
+ name,
+ buffer,
+ size);
+}
+
+static struct xattr_handler orangefs_xattr_trusted_handler = {
+ .prefix = ORANGEFS_XATTR_NAME_TRUSTED_PREFIX,
+ .get = orangefs_xattr_get_trusted,
+ .set = orangefs_xattr_set_trusted,
+};
+
+static struct xattr_handler orangefs_xattr_default_handler = {
+ /*
+ * NOTE: this is set to be the empty string.
+ * so that all un-prefixed xattrs keys get caught
+ * here!
+ */
+ .prefix = ORANGEFS_XATTR_NAME_DEFAULT_PREFIX,
+ .get = orangefs_xattr_get_default,
+ .set = orangefs_xattr_set_default,
+};
+
+const struct xattr_handler *orangefs_xattr_handlers[] = {
+ &posix_acl_access_xattr_handler,
+ &posix_acl_default_xattr_handler,
+ &orangefs_xattr_trusted_handler,
+ &orangefs_xattr_default_handler,
+ NULL
+};
diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c
index d894e7cd9..cc514da6f 100644
--- a/fs/overlayfs/copy_up.c
+++ b/fs/overlayfs/copy_up.c
@@ -7,6 +7,7 @@
* the Free Software Foundation.
*/
+#include <linux/module.h>
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/file.h>
@@ -16,10 +17,41 @@
#include <linux/uaccess.h>
#include <linux/sched.h>
#include <linux/namei.h>
+#include <linux/fdtable.h>
+#include <linux/ratelimit.h>
#include "overlayfs.h"
#define OVL_COPY_UP_CHUNK_SIZE (1 << 20)
+static bool __read_mostly ovl_check_copy_up;
+module_param_named(check_copy_up, ovl_check_copy_up, bool,
+ S_IWUSR | S_IRUGO);
+MODULE_PARM_DESC(ovl_check_copy_up,
+ "Warn on copy-up when causing process also has a R/O fd open");
+
+static int ovl_check_fd(const void *data, struct file *f, unsigned int fd)
+{
+ const struct dentry *dentry = data;
+
+ if (f->f_inode == d_inode(dentry))
+ pr_warn_ratelimited("overlayfs: Warning: Copying up %pD, but open R/O on fd %u which will cease to be coherent [pid=%d %s]\n",
+ f, fd, current->pid, current->comm);
+ return 0;
+}
+
+/*
+ * Check the fds open by this process and warn if something like the following
+ * scenario is about to occur:
+ *
+ * fd1 = open("foo", O_RDONLY);
+ * fd2 = open("foo", O_RDWR);
+ */
+static void ovl_do_check_copy_up(struct dentry *dentry)
+{
+ if (ovl_check_copy_up)
+ iterate_fd(current->files, 0, ovl_check_fd, dentry);
+}
+
int ovl_copy_xattr(struct dentry *old, struct dentry *new)
{
ssize_t list_size, size, value_size = 0;
@@ -235,6 +267,7 @@ static int ovl_copy_up_locked(struct dentry *workdir, struct dentry *upperdir,
if (S_ISREG(stat->mode)) {
struct path upperpath;
+
ovl_path_upper(dentry, &upperpath);
BUG_ON(upperpath.dentry != NULL);
upperpath.dentry = newdentry;
@@ -309,6 +342,8 @@ int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry,
if (WARN_ON(!workdir))
return -EROFS;
+ ovl_do_check_copy_up(lowerpath->dentry);
+
ovl_path_upper(parent, &parentpath);
upperdir = parentpath.dentry;
diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c
index 52f6de5d4..b3fc0a35b 100644
--- a/fs/overlayfs/dir.c
+++ b/fs/overlayfs/dir.c
@@ -596,21 +596,25 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
{
struct dentry *upperdir = ovl_dentry_upper(dentry->d_parent);
struct inode *dir = upperdir->d_inode;
- struct dentry *upper = ovl_dentry_upper(dentry);
+ struct dentry *upper;
int err;
inode_lock_nested(dir, I_MUTEX_PARENT);
+ upper = lookup_one_len(dentry->d_name.name, upperdir,
+ dentry->d_name.len);
+ err = PTR_ERR(upper);
+ if (IS_ERR(upper))
+ goto out_unlock;
+
err = -ESTALE;
- if (upper->d_parent == upperdir) {
- /* Don't let d_delete() think it can reset d_inode */
- dget(upper);
+ if (upper == ovl_dentry_upper(dentry)) {
if (is_dir)
err = vfs_rmdir(dir, upper);
else
err = vfs_unlink(dir, upper, NULL);
- dput(upper);
ovl_dentry_version_inc(dentry->d_parent);
}
+ dput(upper);
/*
* Keeping this dentry hashed would mean having to release
@@ -620,6 +624,7 @@ static int ovl_remove_upper(struct dentry *dentry, bool is_dir)
*/
if (!err)
d_drop(dentry);
+out_unlock:
inode_unlock(dir);
return err;
@@ -714,7 +719,6 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
struct dentry *trap;
bool old_opaque;
bool new_opaque;
- bool new_create = false;
bool cleanup_whiteout = false;
bool overwrite = !(flags & RENAME_EXCHANGE);
bool is_dir = d_is_dir(old);
@@ -840,29 +844,38 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
trap = lock_rename(new_upperdir, old_upperdir);
- olddentry = ovl_dentry_upper(old);
- newdentry = ovl_dentry_upper(new);
- if (newdentry) {
+
+ olddentry = lookup_one_len(old->d_name.name, old_upperdir,
+ old->d_name.len);
+ err = PTR_ERR(olddentry);
+ if (IS_ERR(olddentry))
+ goto out_unlock;
+
+ err = -ESTALE;
+ if (olddentry != ovl_dentry_upper(old))
+ goto out_dput_old;
+
+ newdentry = lookup_one_len(new->d_name.name, new_upperdir,
+ new->d_name.len);
+ err = PTR_ERR(newdentry);
+ if (IS_ERR(newdentry))
+ goto out_dput_old;
+
+ err = -ESTALE;
+ if (ovl_dentry_upper(new)) {
if (opaquedir) {
- newdentry = opaquedir;
- opaquedir = NULL;
+ if (newdentry != opaquedir)
+ goto out_dput;
} else {
- dget(newdentry);
+ if (newdentry != ovl_dentry_upper(new))
+ goto out_dput;
}
} else {
- new_create = true;
- newdentry = lookup_one_len(new->d_name.name, new_upperdir,
- new->d_name.len);
- err = PTR_ERR(newdentry);
- if (IS_ERR(newdentry))
- goto out_unlock;
+ if (!d_is_negative(newdentry) &&
+ (!new_opaque || !ovl_is_whiteout(newdentry)))
+ goto out_dput;
}
- err = -ESTALE;
- if (olddentry->d_parent != old_upperdir)
- goto out_dput;
- if (newdentry->d_parent != new_upperdir)
- goto out_dput;
if (olddentry == trap)
goto out_dput;
if (newdentry == trap)
@@ -925,6 +938,8 @@ static int ovl_rename2(struct inode *olddir, struct dentry *old,
out_dput:
dput(newdentry);
+out_dput_old:
+ dput(olddentry);
out_unlock:
unlock_rename(new_upperdir, old_upperdir);
out_revert_creds:
diff --git a/fs/overlayfs/overlayfs.h b/fs/overlayfs/overlayfs.h
index 99b4168c3..6a7090f4a 100644
--- a/fs/overlayfs/overlayfs.h
+++ b/fs/overlayfs/overlayfs.h
@@ -166,6 +166,7 @@ extern const struct file_operations ovl_dir_operations;
int ovl_check_empty_dir(struct dentry *dentry, struct list_head *list);
void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list);
void ovl_cache_free(struct list_head *list);
+int ovl_check_d_type_supported(struct path *realpath);
/* inode.c */
int ovl_setattr(struct dentry *dentry, struct iattr *attr);
diff --git a/fs/overlayfs/readdir.c b/fs/overlayfs/readdir.c
index fdaf28f75..6ec1e43a9 100644
--- a/fs/overlayfs/readdir.c
+++ b/fs/overlayfs/readdir.c
@@ -36,13 +36,14 @@ struct ovl_dir_cache {
struct ovl_readdir_data {
struct dir_context ctx;
- bool is_merge;
+ bool is_lowest;
struct rb_root root;
struct list_head *list;
struct list_head middle;
struct ovl_cache_entry *first_maybe_whiteout;
int count;
int err;
+ bool d_type_supported;
};
struct ovl_dir_file {
@@ -139,9 +140,9 @@ static int ovl_cache_entry_add_rb(struct ovl_readdir_data *rdd,
return 0;
}
-static int ovl_fill_lower(struct ovl_readdir_data *rdd,
- const char *name, int namelen,
- loff_t offset, u64 ino, unsigned int d_type)
+static int ovl_fill_lowest(struct ovl_readdir_data *rdd,
+ const char *name, int namelen,
+ loff_t offset, u64 ino, unsigned int d_type)
{
struct ovl_cache_entry *p;
@@ -193,10 +194,10 @@ static int ovl_fill_merge(struct dir_context *ctx, const char *name,
container_of(ctx, struct ovl_readdir_data, ctx);
rdd->count++;
- if (!rdd->is_merge)
+ if (!rdd->is_lowest)
return ovl_cache_entry_add_rb(rdd, name, namelen, ino, d_type);
else
- return ovl_fill_lower(rdd, name, namelen, offset, ino, d_type);
+ return ovl_fill_lowest(rdd, name, namelen, offset, ino, d_type);
}
static int ovl_check_whiteouts(struct dentry *dir, struct ovl_readdir_data *rdd)
@@ -289,7 +290,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
.ctx.actor = ovl_fill_merge,
.list = list,
.root = RB_ROOT,
- .is_merge = false,
+ .is_lowest = false,
};
int idx, next;
@@ -306,7 +307,7 @@ static int ovl_dir_read_merged(struct dentry *dentry, struct list_head *list)
* allows offsets to be reasonably constant
*/
list_add(&rdd.middle, rdd.list);
- rdd.is_merge = true;
+ rdd.is_lowest = true;
err = ovl_dir_read(&realpath, &rdd);
list_del(&rdd.middle);
}
@@ -577,3 +578,39 @@ void ovl_cleanup_whiteouts(struct dentry *upper, struct list_head *list)
}
inode_unlock(upper->d_inode);
}
+
+static int ovl_check_d_type(struct dir_context *ctx, const char *name,
+ int namelen, loff_t offset, u64 ino,
+ unsigned int d_type)
+{
+ struct ovl_readdir_data *rdd =
+ container_of(ctx, struct ovl_readdir_data, ctx);
+
+ /* Even if d_type is not supported, DT_DIR is returned for . and .. */
+ if (!strncmp(name, ".", namelen) || !strncmp(name, "..", namelen))
+ return 0;
+
+ if (d_type != DT_UNKNOWN)
+ rdd->d_type_supported = true;
+
+ return 0;
+}
+
+/*
+ * Returns 1 if d_type is supported, 0 not supported/unknown. Negative values
+ * if error is encountered.
+ */
+int ovl_check_d_type_supported(struct path *realpath)
+{
+ int err;
+ struct ovl_readdir_data rdd = {
+ .ctx.actor = ovl_check_d_type,
+ .d_type_supported = false,
+ };
+
+ err = ovl_dir_read(realpath, &rdd);
+ if (err)
+ return err;
+
+ return rdd.d_type_supported;
+}
diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c
index 4399ea804..791235e03 100644
--- a/fs/overlayfs/super.c
+++ b/fs/overlayfs/super.c
@@ -411,9 +411,7 @@ static inline struct dentry *ovl_lookup_real(struct dentry *dir,
{
struct dentry *dentry;
- inode_lock(dir->d_inode);
- dentry = lookup_one_len(name->name, dir, name->len);
- inode_unlock(dir->d_inode);
+ dentry = lookup_hash(name, dir);
if (IS_ERR(dentry)) {
if (PTR_ERR(dentry) == -ENOENT)
@@ -969,7 +967,8 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
err = -EINVAL;
if (!ufs->config.lowerdir) {
- pr_err("overlayfs: missing 'lowerdir'\n");
+ if (!silent)
+ pr_err("overlayfs: missing 'lowerdir'\n");
goto out_free_config;
}
@@ -1061,6 +1060,21 @@ static int ovl_fill_super(struct super_block *sb, void *data, int silent)
sb->s_flags |= MS_RDONLY;
ufs->workdir = NULL;
}
+
+ /*
+ * Upper should support d_type, else whiteouts are visible.
+ * Given workdir and upper are on same fs, we can do
+ * iterate_dir() on workdir.
+ */
+ err = ovl_check_d_type_supported(&workpath);
+ if (err < 0)
+ goto out_put_workdir;
+
+ if (!err) {
+ pr_err("overlayfs: upper fs needs to support d_type.\n");
+ err = -EINVAL;
+ goto out_put_workdir;
+ }
}
err = -ENOMEM;
diff --git a/fs/pipe.c b/fs/pipe.c
index ab8dad3cc..0d3f5165c 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -134,7 +134,7 @@ static void anon_pipe_buf_release(struct pipe_inode_info *pipe,
if (page_count(page) == 1 && !pipe->tmp_page)
pipe->tmp_page = page;
else
- page_cache_release(page);
+ put_page(page);
}
/**
@@ -180,7 +180,7 @@ EXPORT_SYMBOL(generic_pipe_buf_steal);
*/
void generic_pipe_buf_get(struct pipe_inode_info *pipe, struct pipe_buffer *buf)
{
- page_cache_get(buf->page);
+ get_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_get);
@@ -211,7 +211,7 @@ EXPORT_SYMBOL(generic_pipe_buf_confirm);
void generic_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- page_cache_release(buf->page);
+ put_page(buf->page);
}
EXPORT_SYMBOL(generic_pipe_buf_release);
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 80e4bc69f..6bf0776cd 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -2159,6 +2159,7 @@ static const struct file_operations proc_map_files_operations = {
.llseek = default_llseek,
};
+#ifdef CONFIG_CHECKPOINT_RESTORE
struct timers_private {
struct pid *pid;
struct task_struct *task;
@@ -2257,6 +2258,73 @@ static const struct file_operations proc_timers_operations = {
.llseek = seq_lseek,
.release = seq_release_private,
};
+#endif
+
+static ssize_t timerslack_ns_write(struct file *file, const char __user *buf,
+ size_t count, loff_t *offset)
+{
+ struct inode *inode = file_inode(file);
+ struct task_struct *p;
+ u64 slack_ns;
+ int err;
+
+ err = kstrtoull_from_user(buf, count, 10, &slack_ns);
+ if (err < 0)
+ return err;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
+ task_lock(p);
+ if (slack_ns == 0)
+ p->timer_slack_ns = p->default_timer_slack_ns;
+ else
+ p->timer_slack_ns = slack_ns;
+ task_unlock(p);
+ } else
+ count = -EPERM;
+
+ put_task_struct(p);
+
+ return count;
+}
+
+static int timerslack_ns_show(struct seq_file *m, void *v)
+{
+ struct inode *inode = m->private;
+ struct task_struct *p;
+ int err = 0;
+
+ p = get_proc_task(inode);
+ if (!p)
+ return -ESRCH;
+
+ if (ptrace_may_access(p, PTRACE_MODE_ATTACH_FSCREDS)) {
+ task_lock(p);
+ seq_printf(m, "%llu\n", p->timer_slack_ns);
+ task_unlock(p);
+ } else
+ err = -EPERM;
+
+ put_task_struct(p);
+
+ return err;
+}
+
+static int timerslack_ns_open(struct inode *inode, struct file *filp)
+{
+ return single_open(filp, timerslack_ns_show, inode);
+}
+
+static const struct file_operations proc_pid_set_timerslack_ns_operations = {
+ .open = timerslack_ns_open,
+ .read = seq_read,
+ .write = timerslack_ns_write,
+ .llseek = seq_lseek,
+ .release = single_release,
+};
static int proc_pident_instantiate(struct inode *dir,
struct dentry *dentry, struct task_struct *task, const void *ptr)
@@ -2832,6 +2900,7 @@ static const struct pid_entry tgid_base_stuff[] = {
#ifdef CONFIG_CHECKPOINT_RESTORE
REG("timers", S_IRUGO, proc_timers_operations),
#endif
+ REG("timerslack_ns", S_IRUGO|S_IWUGO, proc_pid_set_timerslack_ns_operations),
};
static int proc_tgid_base_readdir(struct file *file, struct dir_context *ctx)
diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c
index df4661aba..83720460c 100644
--- a/fs/proc/meminfo.c
+++ b/fs/proc/meminfo.c
@@ -29,10 +29,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
unsigned long committed;
long cached;
long available;
- unsigned long pagecache;
- unsigned long wmark_low = 0;
unsigned long pages[NR_LRU_LISTS];
- struct zone *zone;
int lru;
/*
@@ -51,33 +48,7 @@ static int meminfo_proc_show(struct seq_file *m, void *v)
for (lru = LRU_BASE; lru < NR_LRU_LISTS; lru++)
pages[lru] = global_page_state(NR_LRU_BASE + lru);
- for_each_zone(zone)
- wmark_low += zone->watermark[WMARK_LOW];
-
- /*
- * Estimate the amount of memory available for userspace allocations,
- * without causing swapping.
- */
- available = i.freeram - totalreserve_pages;
-
- /*
- * Not all the page cache can be freed, otherwise the system will
- * start swapping. Assume at least half of the page cache, or the
- * low watermark worth of cache, needs to stay.
- */
- pagecache = pages[LRU_ACTIVE_FILE] + pages[LRU_INACTIVE_FILE];
- pagecache -= min(pagecache / 2, wmark_low);
- available += pagecache;
-
- /*
- * Part of the reclaimable slab consists of items that are in use,
- * and cannot be freed. Cap this estimate at the low watermark.
- */
- available += global_page_state(NR_SLAB_RECLAIMABLE) -
- min(global_page_state(NR_SLAB_RECLAIMABLE) / 2, wmark_low);
-
- if (available < 0)
- available = 0;
+ available = si_mem_available();
/*
* Tagged format, for easy grepping and expansion.
diff --git a/fs/proc/namespaces.c b/fs/proc/namespaces.c
index 276f12431..72cb26f85 100644
--- a/fs/proc/namespaces.c
+++ b/fs/proc/namespaces.c
@@ -28,6 +28,9 @@ static const struct proc_ns_operations *ns_entries[] = {
&userns_operations,
#endif
&mntns_operations,
+#ifdef CONFIG_CGROUPS
+ &cgroupns_operations,
+#endif
};
static const char *proc_ns_get_link(struct dentry *dentry,
diff --git a/fs/proc/page.c b/fs/proc/page.c
index b2855eea5..712f1b999 100644
--- a/fs/proc/page.c
+++ b/fs/proc/page.c
@@ -103,9 +103,9 @@ u64 stable_page_flags(struct page *page)
* pseudo flags for the well known (anonymous) memory mapped pages
*
* Note that page->_mapcount is overloaded in SLOB/SLUB/SLQB, so the
- * simple test in page_mapcount() is not enough.
+ * simple test in page_mapped() is not enough.
*/
- if (!PageSlab(page) && page_mapcount(page))
+ if (!PageSlab(page) && page_mapped(page))
u |= 1 << KPF_MMAP;
if (PageAnon(page))
u |= 1 << KPF_ANON;
@@ -148,6 +148,8 @@ u64 stable_page_flags(struct page *page)
*/
if (PageBuddy(page))
u |= 1 << KPF_BUDDY;
+ else if (page_count(page) == 0 && is_free_buddy_page(page))
+ u |= 1 << KPF_BUDDY;
if (PageBalloon(page))
u |= 1 << KPF_BALLOON;
@@ -158,6 +160,8 @@ u64 stable_page_flags(struct page *page)
u |= kpf_copy_bit(k, KPF_LOCKED, PG_locked);
u |= kpf_copy_bit(k, KPF_SLAB, PG_slab);
+ if (PageTail(page) && PageSlab(compound_head(page)))
+ u |= 1 << KPF_SLAB;
u |= kpf_copy_bit(k, KPF_ERROR, PG_error);
u |= kpf_copy_bit(k, KPF_DIRTY, PG_dirty);
diff --git a/fs/proc/task_mmu.c b/fs/proc/task_mmu.c
index 6c4f2fb26..c41eb7324 100644
--- a/fs/proc/task_mmu.c
+++ b/fs/proc/task_mmu.c
@@ -556,7 +556,7 @@ static void smaps_pte_entry(pte_t *pte, unsigned long addr,
if (radix_tree_exceptional_entry(page))
mss->swap += PAGE_SIZE;
else
- page_cache_release(page);
+ put_page(page);
return;
}
@@ -663,11 +663,20 @@ static void show_smap_vma_flags(struct seq_file *m, struct vm_area_struct *vma)
[ilog2(VM_MERGEABLE)] = "mg",
[ilog2(VM_UFFD_MISSING)]= "um",
[ilog2(VM_UFFD_WP)] = "uw",
+#ifdef CONFIG_X86_INTEL_MEMORY_PROTECTION_KEYS
+ /* These come out via ProtectionKey: */
+ [ilog2(VM_PKEY_BIT0)] = "",
+ [ilog2(VM_PKEY_BIT1)] = "",
+ [ilog2(VM_PKEY_BIT2)] = "",
+ [ilog2(VM_PKEY_BIT3)] = "",
+#endif
};
size_t i;
seq_puts(m, "VmFlags: ");
for (i = 0; i < BITS_PER_LONG; i++) {
+ if (!mnemonics[i][0])
+ continue;
if (vma->vm_flags & (1UL << i)) {
seq_printf(m, "%c%c ",
mnemonics[i][0], mnemonics[i][1]);
@@ -705,6 +714,10 @@ static int smaps_hugetlb_range(pte_t *pte, unsigned long hmask,
}
#endif /* HUGETLB_PAGE */
+void __weak arch_show_smap(struct seq_file *m, struct vm_area_struct *vma)
+{
+}
+
static int show_smap(struct seq_file *m, void *v, int is_pid)
{
struct vm_area_struct *vma = v;
@@ -786,6 +799,7 @@ static int show_smap(struct seq_file *m, void *v, int is_pid)
(vma->vm_flags & VM_LOCKED) ?
(unsigned long)(mss.pss >> (10 + PSS_SHIFT)) : 0);
+ arch_show_smap(m, vma);
show_smap_vma_flags(m, vma);
m_cache_vma(m, vma);
return 0;
diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c
index 4e61388ec..8afe10cf7 100644
--- a/fs/proc/vmcore.c
+++ b/fs/proc/vmcore.c
@@ -231,7 +231,9 @@ static ssize_t __read_vmcore(char *buffer, size_t buflen, loff_t *fpos,
list_for_each_entry(m, &vmcore_list, list) {
if (*fpos < m->offset + m->size) {
- tsz = min_t(size_t, m->offset + m->size - *fpos, buflen);
+ tsz = (size_t)min_t(unsigned long long,
+ m->offset + m->size - *fpos,
+ buflen);
start = m->paddr + *fpos - m->offset;
tmp = read_from_oldmem(buffer, tsz, &start, userbuf);
if (tmp < 0)
@@ -277,12 +279,12 @@ static int mmap_vmcore_fault(struct vm_area_struct *vma, struct vm_fault *vmf)
if (!page)
return VM_FAULT_OOM;
if (!PageUptodate(page)) {
- offset = (loff_t) index << PAGE_CACHE_SHIFT;
+ offset = (loff_t) index << PAGE_SHIFT;
buf = __va((page_to_pfn(page) << PAGE_SHIFT));
rc = __read_vmcore(buf, PAGE_SIZE, &offset, 0);
if (rc < 0) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return (rc == -ENOMEM) ? VM_FAULT_OOM : VM_FAULT_SIGBUS;
}
SetPageUptodate(page);
@@ -461,7 +463,8 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma)
if (start < m->offset + m->size) {
u64 paddr = 0;
- tsz = min_t(size_t, m->offset + m->size - start, size);
+ tsz = (size_t)min_t(unsigned long long,
+ m->offset + m->size - start, size);
paddr = m->paddr + start - m->offset;
if (vmcore_remap_oldmem_pfn(vma, vma->vm_start + len,
paddr >> PAGE_SHIFT, tsz,
diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c
index dc645b66c..45d611074 100644
--- a/fs/pstore/inode.c
+++ b/fs/pstore/inode.c
@@ -420,8 +420,8 @@ static int pstore_fill_super(struct super_block *sb, void *data, int silent)
pstore_sb = sb;
sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = PSTOREFS_MAGIC;
sb->s_op = &pstore_ops;
sb->s_time_gran = 1;
diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c
index 319c3a60c..bd9812e83 100644
--- a/fs/pstore/ram.c
+++ b/fs/pstore/ram.c
@@ -55,8 +55,8 @@ static ulong ramoops_pmsg_size = MIN_MEM_SIZE;
module_param_named(pmsg_size, ramoops_pmsg_size, ulong, 0400);
MODULE_PARM_DESC(pmsg_size, "size of user space message log");
-static ulong mem_address;
-module_param(mem_address, ulong, 0400);
+static unsigned long long mem_address;
+module_param(mem_address, ullong, 0400);
MODULE_PARM_DESC(mem_address,
"start of reserved RAM used to store oops/panic logs");
diff --git a/fs/qnx6/dir.c b/fs/qnx6/dir.c
index e1f37278c..144ceda49 100644
--- a/fs/qnx6/dir.c
+++ b/fs/qnx6/dir.c
@@ -35,9 +35,9 @@ static struct page *qnx6_get_page(struct inode *dir, unsigned long n)
static unsigned last_entry(struct inode *inode, unsigned long page_nr)
{
unsigned long last_byte = inode->i_size;
- last_byte -= page_nr << PAGE_CACHE_SHIFT;
- if (last_byte > PAGE_CACHE_SIZE)
- last_byte = PAGE_CACHE_SIZE;
+ last_byte -= page_nr << PAGE_SHIFT;
+ if (last_byte > PAGE_SIZE)
+ last_byte = PAGE_SIZE;
return last_byte / QNX6_DIR_ENTRY_SIZE;
}
@@ -47,9 +47,9 @@ static struct qnx6_long_filename *qnx6_longname(struct super_block *sb,
{
struct qnx6_sb_info *sbi = QNX6_SB(sb);
u32 s = fs32_to_cpu(sbi, de->de_long_inode); /* in block units */
- u32 n = s >> (PAGE_CACHE_SHIFT - sb->s_blocksize_bits); /* in pages */
+ u32 n = s >> (PAGE_SHIFT - sb->s_blocksize_bits); /* in pages */
/* within page */
- u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_CACHE_MASK;
+ u32 offs = (s << sb->s_blocksize_bits) & ~PAGE_MASK;
struct address_space *mapping = sbi->longfile->i_mapping;
struct page *page = read_mapping_page(mapping, n, NULL);
if (IS_ERR(page))
@@ -115,8 +115,8 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
struct qnx6_sb_info *sbi = QNX6_SB(s);
loff_t pos = ctx->pos & ~(QNX6_DIR_ENTRY_SIZE - 1);
unsigned long npages = dir_pages(inode);
- unsigned long n = pos >> PAGE_CACHE_SHIFT;
- unsigned start = (pos & ~PAGE_CACHE_MASK) / QNX6_DIR_ENTRY_SIZE;
+ unsigned long n = pos >> PAGE_SHIFT;
+ unsigned start = (pos & ~PAGE_MASK) / QNX6_DIR_ENTRY_SIZE;
bool done = false;
ctx->pos = pos;
@@ -131,7 +131,7 @@ static int qnx6_readdir(struct file *file, struct dir_context *ctx)
if (IS_ERR(page)) {
pr_err("%s(): read failed\n", __func__);
- ctx->pos = (n + 1) << PAGE_CACHE_SHIFT;
+ ctx->pos = (n + 1) << PAGE_SHIFT;
return PTR_ERR(page);
}
de = ((struct qnx6_dir_entry *)page_address(page)) + start;
diff --git a/fs/qnx6/inode.c b/fs/qnx6/inode.c
index 47bb1de07..1192422a1 100644
--- a/fs/qnx6/inode.c
+++ b/fs/qnx6/inode.c
@@ -542,8 +542,8 @@ struct inode *qnx6_iget(struct super_block *sb, unsigned ino)
iget_failed(inode);
return ERR_PTR(-EIO);
}
- n = (ino - 1) >> (PAGE_CACHE_SHIFT - QNX6_INODE_SIZE_BITS);
- offs = (ino - 1) & (~PAGE_CACHE_MASK >> QNX6_INODE_SIZE_BITS);
+ n = (ino - 1) >> (PAGE_SHIFT - QNX6_INODE_SIZE_BITS);
+ offs = (ino - 1) & (~PAGE_MASK >> QNX6_INODE_SIZE_BITS);
mapping = sbi->inodes->i_mapping;
page = read_mapping_page(mapping, n, NULL);
if (IS_ERR(page)) {
diff --git a/fs/qnx6/qnx6.h b/fs/qnx6/qnx6.h
index d3fb2b698..f23b5c4a6 100644
--- a/fs/qnx6/qnx6.h
+++ b/fs/qnx6/qnx6.h
@@ -128,7 +128,7 @@ extern struct qnx6_super_block *qnx6_mmi_fill_super(struct super_block *s,
static inline void qnx6_put_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
extern unsigned qnx6_find_entry(int len, struct inode *dir, const char *name,
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index 850d17fa0..ff21980d0 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -411,6 +411,8 @@ int dquot_acquire(struct dquot *dquot)
ret = dqopt->ops[dquot->dq_id.type]->read_dqblk(dquot);
if (ret < 0)
goto out_iolock;
+ /* Make sure flags update is visible after dquot has been filled */
+ smp_mb__before_atomic();
set_bit(DQ_READ_B, &dquot->dq_flags);
/* Instantiate dquot if needed */
if (!test_bit(DQ_ACTIVE_B, &dquot->dq_flags) && !dquot->dq_off) {
@@ -427,6 +429,11 @@ int dquot_acquire(struct dquot *dquot)
goto out_iolock;
}
}
+ /*
+ * Make sure flags update is visible after on-disk struct has been
+ * allocated. Paired with smp_rmb() in dqget().
+ */
+ smp_mb__before_atomic();
set_bit(DQ_ACTIVE_B, &dquot->dq_flags);
out_iolock:
mutex_unlock(&dqopt->dqio_mutex);
@@ -887,6 +894,11 @@ we_slept:
goto out;
}
}
+ /*
+ * Make sure following reads see filled structure - paired with
+ * smp_mb__before_atomic() in dquot_acquire().
+ */
+ smp_rmb();
#ifdef CONFIG_QUOTA_DEBUG
BUG_ON(!dquot->dq_sb); /* Has somebody invalidated entry under us? */
#endif
@@ -2030,6 +2042,30 @@ int dquot_commit_info(struct super_block *sb, int type)
}
EXPORT_SYMBOL(dquot_commit_info);
+int dquot_get_next_id(struct super_block *sb, struct kqid *qid)
+{
+ struct quota_info *dqopt = sb_dqopt(sb);
+ int err;
+
+ mutex_lock(&dqopt->dqonoff_mutex);
+ if (!sb_has_quota_active(sb, qid->type)) {
+ err = -ESRCH;
+ goto out;
+ }
+ if (!dqopt->ops[qid->type]->get_next_id) {
+ err = -ENOSYS;
+ goto out;
+ }
+ mutex_lock(&dqopt->dqio_mutex);
+ err = dqopt->ops[qid->type]->get_next_id(sb, qid);
+ mutex_unlock(&dqopt->dqio_mutex);
+out:
+ mutex_unlock(&dqopt->dqonoff_mutex);
+
+ return err;
+}
+EXPORT_SYMBOL(dquot_get_next_id);
+
/*
* Definitions of diskquota operations.
*/
@@ -2041,6 +2077,7 @@ const struct dquot_operations dquot_operations = {
.write_info = dquot_commit_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
+ .get_next_id = dquot_get_next_id,
};
EXPORT_SYMBOL(dquot_operations);
@@ -2429,9 +2466,7 @@ int dquot_quota_on_mount(struct super_block *sb, char *qf_name,
struct dentry *dentry;
int error;
- inode_lock(d_inode(sb->s_root));
- dentry = lookup_one_len(qf_name, sb->s_root, strlen(qf_name));
- inode_unlock(d_inode(sb->s_root));
+ dentry = lookup_one_len_unlocked(qf_name, sb->s_root, strlen(qf_name));
if (IS_ERR(dentry))
return PTR_ERR(dentry);
@@ -2564,6 +2599,27 @@ int dquot_get_dqblk(struct super_block *sb, struct kqid qid,
}
EXPORT_SYMBOL(dquot_get_dqblk);
+int dquot_get_next_dqblk(struct super_block *sb, struct kqid *qid,
+ struct qc_dqblk *di)
+{
+ struct dquot *dquot;
+ int err;
+
+ if (!sb->dq_op->get_next_id)
+ return -ENOSYS;
+ err = sb->dq_op->get_next_id(sb, qid);
+ if (err < 0)
+ return err;
+ dquot = dqget(sb, *qid);
+ if (IS_ERR(dquot))
+ return PTR_ERR(dquot);
+ do_get_dqblk(dquot, di);
+ dqput(dquot);
+
+ return 0;
+}
+EXPORT_SYMBOL(dquot_get_next_dqblk);
+
#define VFS_QC_MASK \
(QC_SPACE | QC_SPC_SOFT | QC_SPC_HARD | \
QC_INO_COUNT | QC_INO_SOFT | QC_INO_HARD | \
@@ -2764,6 +2820,7 @@ const struct quotactl_ops dquot_quotactl_ops = {
.get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
+ .get_nextdqblk = dquot_get_next_dqblk,
.set_dqblk = dquot_set_dqblk
};
EXPORT_SYMBOL(dquot_quotactl_ops);
@@ -2775,6 +2832,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = {
.get_state = dquot_get_state,
.set_info = dquot_set_dqinfo,
.get_dqblk = dquot_get_dqblk,
+ .get_nextdqblk = dquot_get_next_dqblk,
.set_dqblk = dquot_set_dqblk
};
EXPORT_SYMBOL(dquot_quotactl_sysfile_ops);
diff --git a/fs/quota/quota.c b/fs/quota/quota.c
index 374636709..0f10ee989 100644
--- a/fs/quota/quota.c
+++ b/fs/quota/quota.c
@@ -79,7 +79,7 @@ unsigned int qtype_enforce_flag(int type)
return 0;
}
-static int quota_quotaon(struct super_block *sb, int type, int cmd, qid_t id,
+static int quota_quotaon(struct super_block *sb, int type, qid_t id,
struct path *path)
{
if (!sb->s_qcop->quota_on && !sb->s_qcop->quota_enable)
@@ -222,6 +222,34 @@ static int quota_getquota(struct super_block *sb, int type, qid_t id,
return 0;
}
+/*
+ * Return quota for next active quota >= this id, if any exists,
+ * otherwise return -ENOENT via ->get_nextdqblk
+ */
+static int quota_getnextquota(struct super_block *sb, int type, qid_t id,
+ void __user *addr)
+{
+ struct kqid qid;
+ struct qc_dqblk fdq;
+ struct if_nextdqblk idq;
+ int ret;
+
+ if (!sb->s_qcop->get_nextdqblk)
+ return -ENOSYS;
+ qid = make_kqid(current_user_ns(), type, id);
+ if (!qid_valid(qid))
+ return -EINVAL;
+ ret = sb->s_qcop->get_nextdqblk(sb, &qid, &fdq);
+ if (ret)
+ return ret;
+ /* struct if_nextdqblk is a superset of struct if_dqblk */
+ copy_to_if_dqblk((struct if_dqblk *)&idq, &fdq);
+ idq.dqb_id = from_kqid(current_user_ns(), qid);
+ if (copy_to_user(addr, &idq, sizeof(idq)))
+ return -EFAULT;
+ return 0;
+}
+
static void copy_from_if_dqblk(struct qc_dqblk *dst, struct if_dqblk *src)
{
dst->d_spc_hardlimit = qbtos(src->dqb_bhardlimit);
@@ -625,6 +653,34 @@ static int quota_getxquota(struct super_block *sb, int type, qid_t id,
return ret;
}
+/*
+ * Return quota for next active quota >= this id, if any exists,
+ * otherwise return -ENOENT via ->get_nextdqblk.
+ */
+static int quota_getnextxquota(struct super_block *sb, int type, qid_t id,
+ void __user *addr)
+{
+ struct fs_disk_quota fdq;
+ struct qc_dqblk qdq;
+ struct kqid qid;
+ qid_t id_out;
+ int ret;
+
+ if (!sb->s_qcop->get_nextdqblk)
+ return -ENOSYS;
+ qid = make_kqid(current_user_ns(), type, id);
+ if (!qid_valid(qid))
+ return -EINVAL;
+ ret = sb->s_qcop->get_nextdqblk(sb, &qid, &qdq);
+ if (ret)
+ return ret;
+ id_out = from_kqid(current_user_ns(), qid);
+ copy_to_xfs_dqblk(&fdq, &qdq, type, id_out);
+ if (copy_to_user(addr, &fdq, sizeof(fdq)))
+ return -EFAULT;
+ return ret;
+}
+
static int quota_rmxquota(struct super_block *sb, void __user *addr)
{
__u32 flags;
@@ -659,7 +715,7 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
switch (cmd) {
case Q_QUOTAON:
- return quota_quotaon(sb, type, cmd, id, path);
+ return quota_quotaon(sb, type, id, path);
case Q_QUOTAOFF:
return quota_quotaoff(sb, type);
case Q_GETFMT:
@@ -670,6 +726,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
return quota_setinfo(sb, type, addr);
case Q_GETQUOTA:
return quota_getquota(sb, type, id, addr);
+ case Q_GETNEXTQUOTA:
+ return quota_getnextquota(sb, type, id, addr);
case Q_SETQUOTA:
return quota_setquota(sb, type, id, addr);
case Q_SYNC:
@@ -690,6 +748,8 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
return quota_setxquota(sb, type, id, addr);
case Q_XGETQUOTA:
return quota_getxquota(sb, type, id, addr);
+ case Q_XGETNEXTQUOTA:
+ return quota_getnextxquota(sb, type, id, addr);
case Q_XQUOTASYNC:
if (sb->s_flags & MS_RDONLY)
return -EROFS;
@@ -705,6 +765,11 @@ static int do_quotactl(struct super_block *sb, int type, int cmd, qid_t id,
/* Return 1 if 'cmd' will block on frozen filesystem */
static int quotactl_cmd_write(int cmd)
{
+ /*
+ * We cannot allow Q_GETQUOTA and Q_GETNEXTQUOTA without write access
+ * as dquot_acquire() may allocate space for new structure and OCFS2
+ * needs to increment on-disk use count.
+ */
switch (cmd) {
case Q_GETFMT:
case Q_GETINFO:
@@ -712,6 +777,7 @@ static int quotactl_cmd_write(int cmd)
case Q_XGETQSTAT:
case Q_XGETQSTATV:
case Q_XGETQUOTA:
+ case Q_XGETNEXTQUOTA:
case Q_XQUOTASYNC:
return 0;
}
diff --git a/fs/quota/quota_tree.c b/fs/quota/quota_tree.c
index 58efb83de..0738972e8 100644
--- a/fs/quota/quota_tree.c
+++ b/fs/quota/quota_tree.c
@@ -22,10 +22,9 @@ MODULE_LICENSE("GPL");
#define __QUOTA_QT_PARANOIA
-static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth)
+static int __get_index(struct qtree_mem_dqinfo *info, qid_t id, int depth)
{
unsigned int epb = info->dqi_usable_bs >> 2;
- qid_t id = from_kqid(&init_user_ns, qid);
depth = info->dqi_qtree_depth - depth - 1;
while (depth--)
@@ -33,6 +32,13 @@ static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth)
return id % epb;
}
+static int get_index(struct qtree_mem_dqinfo *info, struct kqid qid, int depth)
+{
+ qid_t id = from_kqid(&init_user_ns, qid);
+
+ return __get_index(info, id, depth);
+}
+
/* Number of entries in one blocks */
static int qtree_dqstr_in_blk(struct qtree_mem_dqinfo *info)
{
@@ -668,3 +674,60 @@ int qtree_release_dquot(struct qtree_mem_dqinfo *info, struct dquot *dquot)
return 0;
}
EXPORT_SYMBOL(qtree_release_dquot);
+
+static int find_next_id(struct qtree_mem_dqinfo *info, qid_t *id,
+ unsigned int blk, int depth)
+{
+ char *buf = getdqbuf(info->dqi_usable_bs);
+ __le32 *ref = (__le32 *)buf;
+ ssize_t ret;
+ unsigned int epb = info->dqi_usable_bs >> 2;
+ unsigned int level_inc = 1;
+ int i;
+
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = depth; i < info->dqi_qtree_depth - 1; i++)
+ level_inc *= epb;
+
+ ret = read_blk(info, blk, buf);
+ if (ret < 0) {
+ quota_error(info->dqi_sb,
+ "Can't read quota tree block %u", blk);
+ goto out_buf;
+ }
+ for (i = __get_index(info, *id, depth); i < epb; i++) {
+ if (ref[i] == cpu_to_le32(0)) {
+ *id += level_inc;
+ continue;
+ }
+ if (depth == info->dqi_qtree_depth - 1) {
+ ret = 0;
+ goto out_buf;
+ }
+ ret = find_next_id(info, id, le32_to_cpu(ref[i]), depth + 1);
+ if (ret != -ENOENT)
+ break;
+ }
+ if (i == epb) {
+ ret = -ENOENT;
+ goto out_buf;
+ }
+out_buf:
+ kfree(buf);
+ return ret;
+}
+
+int qtree_get_next_id(struct qtree_mem_dqinfo *info, struct kqid *qid)
+{
+ qid_t id = from_kqid(&init_user_ns, *qid);
+ int ret;
+
+ ret = find_next_id(info, &id, QT_TREEOFF, 0);
+ if (ret < 0)
+ return ret;
+ *qid = make_kqid(&init_user_ns, qid->type, id);
+ return 0;
+}
+EXPORT_SYMBOL(qtree_get_next_id);
diff --git a/fs/quota/quota_v2.c b/fs/quota/quota_v2.c
index ed85d4f35..ca71bf881 100644
--- a/fs/quota/quota_v2.c
+++ b/fs/quota/quota_v2.c
@@ -304,6 +304,11 @@ static int v2_free_file_info(struct super_block *sb, int type)
return 0;
}
+static int v2_get_next_id(struct super_block *sb, struct kqid *qid)
+{
+ return qtree_get_next_id(sb_dqinfo(sb, qid->type)->dqi_priv, qid);
+}
+
static const struct quota_format_ops v2_format_ops = {
.check_quota_file = v2_check_quota_file,
.read_file_info = v2_read_file_info,
@@ -312,6 +317,7 @@ static const struct quota_format_ops v2_format_ops = {
.read_dqblk = v2_read_dquot,
.commit_dqblk = v2_write_dquot,
.release_dqblk = v2_release_dquot,
+ .get_next_id = v2_get_next_id,
};
static struct quota_format_type v2r0_quota_format = {
diff --git a/fs/ramfs/inode.c b/fs/ramfs/inode.c
index 38981b037..1ab6e6c2e 100644
--- a/fs/ramfs/inode.c
+++ b/fs/ramfs/inode.c
@@ -223,8 +223,8 @@ int ramfs_fill_super(struct super_block *sb, void *data, int silent)
return err;
sb->s_maxbytes = MAX_LFS_FILESIZE;
- sb->s_blocksize = PAGE_CACHE_SIZE;
- sb->s_blocksize_bits = PAGE_CACHE_SHIFT;
+ sb->s_blocksize = PAGE_SIZE;
+ sb->s_blocksize_bits = PAGE_SHIFT;
sb->s_magic = RAMFS_MAGIC;
sb->s_op = &ramfs_ops;
sb->s_time_gran = 1;
diff --git a/fs/read_write.c b/fs/read_write.c
index 9461d1264..0a43d7b25 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -717,12 +717,17 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to)
EXPORT_SYMBOL(iov_shorten);
static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, iter_fn_t fn)
+ loff_t *ppos, iter_fn_t fn, int flags)
{
struct kiocb kiocb;
ssize_t ret;
+ if (flags & ~RWF_HIPRI)
+ return -EOPNOTSUPP;
+
init_sync_kiocb(&kiocb, filp);
+ if (flags & RWF_HIPRI)
+ kiocb.ki_flags |= IOCB_HIPRI;
kiocb.ki_pos = *ppos;
ret = fn(&kiocb, iter);
@@ -733,10 +738,13 @@ static ssize_t do_iter_readv_writev(struct file *filp, struct iov_iter *iter,
/* Do it by hand, with file-ops */
static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
- loff_t *ppos, io_fn_t fn)
+ loff_t *ppos, io_fn_t fn, int flags)
{
ssize_t ret = 0;
+ if (flags & ~RWF_HIPRI)
+ return -EOPNOTSUPP;
+
while (iov_iter_count(iter)) {
struct iovec iovec = iov_iter_iovec(iter);
ssize_t nr;
@@ -837,7 +845,8 @@ out:
static ssize_t do_readv_writev(int type, struct file *file,
const struct iovec __user * uvector,
- unsigned long nr_segs, loff_t *pos)
+ unsigned long nr_segs, loff_t *pos,
+ int flags)
{
size_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
@@ -869,9 +878,9 @@ static ssize_t do_readv_writev(int type, struct file *file,
}
if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+ ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
else
- ret = do_loop_readv_writev(file, &iter, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
if (type != READ)
file_end_write(file);
@@ -888,40 +897,40 @@ out:
}
ssize_t vfs_readv(struct file *file, const struct iovec __user *vec,
- unsigned long vlen, loff_t *pos)
+ unsigned long vlen, loff_t *pos, int flags)
{
if (!(file->f_mode & FMODE_READ))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_READ))
return -EINVAL;
- return do_readv_writev(READ, file, vec, vlen, pos);
+ return do_readv_writev(READ, file, vec, vlen, pos, flags);
}
EXPORT_SYMBOL(vfs_readv);
ssize_t vfs_writev(struct file *file, const struct iovec __user *vec,
- unsigned long vlen, loff_t *pos)
+ unsigned long vlen, loff_t *pos, int flags)
{
if (!(file->f_mode & FMODE_WRITE))
return -EBADF;
if (!(file->f_mode & FMODE_CAN_WRITE))
return -EINVAL;
- return do_readv_writev(WRITE, file, vec, vlen, pos);
+ return do_readv_writev(WRITE, file, vec, vlen, pos, flags);
}
EXPORT_SYMBOL(vfs_writev);
-SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
- unsigned long, vlen)
+static ssize_t do_readv(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
loff_t pos = file_pos_read(f.file);
- ret = vfs_readv(f.file, vec, vlen, &pos);
+ ret = vfs_readv(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput_pos(f);
@@ -933,15 +942,15 @@ SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
return ret;
}
-SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
- unsigned long, vlen)
+static ssize_t do_writev(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret = -EBADF;
if (f.file) {
loff_t pos = file_pos_read(f.file);
- ret = vfs_writev(f.file, vec, vlen, &pos);
+ ret = vfs_writev(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
file_pos_write(f.file, pos);
fdput_pos(f);
@@ -959,10 +968,9 @@ static inline loff_t pos_from_hilo(unsigned long high, unsigned long low)
return (((loff_t)high << HALF_LONG_BITS) << HALF_LONG_BITS) | low;
}
-SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
- unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+static ssize_t do_preadv(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, loff_t pos, int flags)
{
- loff_t pos = pos_from_hilo(pos_h, pos_l);
struct fd f;
ssize_t ret = -EBADF;
@@ -973,7 +981,7 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
if (f.file) {
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PREAD)
- ret = vfs_readv(f.file, vec, vlen, &pos);
+ ret = vfs_readv(f.file, vec, vlen, &pos, flags);
fdput(f);
}
@@ -983,10 +991,9 @@ SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
return ret;
}
-SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
- unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+static ssize_t do_pwritev(unsigned long fd, const struct iovec __user *vec,
+ unsigned long vlen, loff_t pos, int flags)
{
- loff_t pos = pos_from_hilo(pos_h, pos_l);
struct fd f;
ssize_t ret = -EBADF;
@@ -997,7 +1004,7 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
if (f.file) {
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PWRITE)
- ret = vfs_writev(f.file, vec, vlen, &pos);
+ ret = vfs_writev(f.file, vec, vlen, &pos, flags);
fdput(f);
}
@@ -1007,11 +1014,64 @@ SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
return ret;
}
+SYSCALL_DEFINE3(readv, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen)
+{
+ return do_readv(fd, vec, vlen, 0);
+}
+
+SYSCALL_DEFINE3(writev, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen)
+{
+ return do_writev(fd, vec, vlen, 0);
+}
+
+SYSCALL_DEFINE5(preadv, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+{
+ loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+ return do_preadv(fd, vec, vlen, pos, 0);
+}
+
+SYSCALL_DEFINE6(preadv2, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
+ int, flags)
+{
+ loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+ if (pos == -1)
+ return do_readv(fd, vec, vlen, flags);
+
+ return do_preadv(fd, vec, vlen, pos, flags);
+}
+
+SYSCALL_DEFINE5(pwritev, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h)
+{
+ loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+ return do_pwritev(fd, vec, vlen, pos, 0);
+}
+
+SYSCALL_DEFINE6(pwritev2, unsigned long, fd, const struct iovec __user *, vec,
+ unsigned long, vlen, unsigned long, pos_l, unsigned long, pos_h,
+ int, flags)
+{
+ loff_t pos = pos_from_hilo(pos_h, pos_l);
+
+ if (pos == -1)
+ return do_writev(fd, vec, vlen, flags);
+
+ return do_pwritev(fd, vec, vlen, pos, flags);
+}
+
#ifdef CONFIG_COMPAT
static ssize_t compat_do_readv_writev(int type, struct file *file,
const struct compat_iovec __user *uvector,
- unsigned long nr_segs, loff_t *pos)
+ unsigned long nr_segs, loff_t *pos,
+ int flags)
{
compat_ssize_t tot_len;
struct iovec iovstack[UIO_FASTIOV];
@@ -1043,9 +1103,9 @@ static ssize_t compat_do_readv_writev(int type, struct file *file,
}
if (iter_fn)
- ret = do_iter_readv_writev(file, &iter, pos, iter_fn);
+ ret = do_iter_readv_writev(file, &iter, pos, iter_fn, flags);
else
- ret = do_loop_readv_writev(file, &iter, pos, fn);
+ ret = do_loop_readv_writev(file, &iter, pos, fn, flags);
if (type != READ)
file_end_write(file);
@@ -1063,7 +1123,7 @@ out:
static size_t compat_readv(struct file *file,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos)
+ unsigned long vlen, loff_t *pos, int flags)
{
ssize_t ret = -EBADF;
@@ -1074,7 +1134,7 @@ static size_t compat_readv(struct file *file,
if (!(file->f_mode & FMODE_CAN_READ))
goto out;
- ret = compat_do_readv_writev(READ, file, vec, vlen, pos);
+ ret = compat_do_readv_writev(READ, file, vec, vlen, pos, flags);
out:
if (ret > 0)
@@ -1083,9 +1143,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
- const struct compat_iovec __user *,vec,
- compat_ulong_t, vlen)
+static size_t do_compat_readv(compat_ulong_t fd,
+ const struct compat_iovec __user *vec,
+ compat_ulong_t vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret;
@@ -1094,16 +1154,24 @@ COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
if (!f.file)
return -EBADF;
pos = f.file->f_pos;
- ret = compat_readv(f.file, vec, vlen, &pos);
+ ret = compat_readv(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
f.file->f_pos = pos;
fdput_pos(f);
return ret;
+
+}
+
+COMPAT_SYSCALL_DEFINE3(readv, compat_ulong_t, fd,
+ const struct compat_iovec __user *,vec,
+ compat_ulong_t, vlen)
+{
+ return do_compat_readv(fd, vec, vlen, 0);
}
-static long __compat_sys_preadv64(unsigned long fd,
+static long do_compat_preadv64(unsigned long fd,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos)
+ unsigned long vlen, loff_t pos, int flags)
{
struct fd f;
ssize_t ret;
@@ -1115,7 +1183,7 @@ static long __compat_sys_preadv64(unsigned long fd,
return -EBADF;
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PREAD)
- ret = compat_readv(f.file, vec, vlen, &pos);
+ ret = compat_readv(f.file, vec, vlen, &pos, flags);
fdput(f);
return ret;
}
@@ -1125,7 +1193,7 @@ COMPAT_SYSCALL_DEFINE4(preadv64, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos)
{
- return __compat_sys_preadv64(fd, vec, vlen, pos);
+ return do_compat_preadv64(fd, vec, vlen, pos, 0);
}
#endif
@@ -1135,12 +1203,25 @@ COMPAT_SYSCALL_DEFINE5(preadv, compat_ulong_t, fd,
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return __compat_sys_preadv64(fd, vec, vlen, pos);
+ return do_compat_preadv64(fd, vec, vlen, pos, 0);
+}
+
+COMPAT_SYSCALL_DEFINE6(preadv2, compat_ulong_t, fd,
+ const struct compat_iovec __user *,vec,
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high,
+ int, flags)
+{
+ loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+
+ if (pos == -1)
+ return do_compat_readv(fd, vec, vlen, flags);
+
+ return do_compat_preadv64(fd, vec, vlen, pos, flags);
}
static size_t compat_writev(struct file *file,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t *pos)
+ unsigned long vlen, loff_t *pos, int flags)
{
ssize_t ret = -EBADF;
@@ -1151,7 +1232,7 @@ static size_t compat_writev(struct file *file,
if (!(file->f_mode & FMODE_CAN_WRITE))
goto out;
- ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos);
+ ret = compat_do_readv_writev(WRITE, file, vec, vlen, pos, 0);
out:
if (ret > 0)
@@ -1160,9 +1241,9 @@ out:
return ret;
}
-COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
- const struct compat_iovec __user *, vec,
- compat_ulong_t, vlen)
+static size_t do_compat_writev(compat_ulong_t fd,
+ const struct compat_iovec __user* vec,
+ compat_ulong_t vlen, int flags)
{
struct fd f = fdget_pos(fd);
ssize_t ret;
@@ -1171,16 +1252,23 @@ COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
if (!f.file)
return -EBADF;
pos = f.file->f_pos;
- ret = compat_writev(f.file, vec, vlen, &pos);
+ ret = compat_writev(f.file, vec, vlen, &pos, flags);
if (ret >= 0)
f.file->f_pos = pos;
fdput_pos(f);
return ret;
}
-static long __compat_sys_pwritev64(unsigned long fd,
+COMPAT_SYSCALL_DEFINE3(writev, compat_ulong_t, fd,
+ const struct compat_iovec __user *, vec,
+ compat_ulong_t, vlen)
+{
+ return do_compat_writev(fd, vec, vlen, 0);
+}
+
+static long do_compat_pwritev64(unsigned long fd,
const struct compat_iovec __user *vec,
- unsigned long vlen, loff_t pos)
+ unsigned long vlen, loff_t pos, int flags)
{
struct fd f;
ssize_t ret;
@@ -1192,7 +1280,7 @@ static long __compat_sys_pwritev64(unsigned long fd,
return -EBADF;
ret = -ESPIPE;
if (f.file->f_mode & FMODE_PWRITE)
- ret = compat_writev(f.file, vec, vlen, &pos);
+ ret = compat_writev(f.file, vec, vlen, &pos, flags);
fdput(f);
return ret;
}
@@ -1202,7 +1290,7 @@ COMPAT_SYSCALL_DEFINE4(pwritev64, unsigned long, fd,
const struct compat_iovec __user *,vec,
unsigned long, vlen, loff_t, pos)
{
- return __compat_sys_pwritev64(fd, vec, vlen, pos);
+ return do_compat_pwritev64(fd, vec, vlen, pos, 0);
}
#endif
@@ -1212,8 +1300,21 @@ COMPAT_SYSCALL_DEFINE5(pwritev, compat_ulong_t, fd,
{
loff_t pos = ((loff_t)pos_high << 32) | pos_low;
- return __compat_sys_pwritev64(fd, vec, vlen, pos);
+ return do_compat_pwritev64(fd, vec, vlen, pos, 0);
+}
+
+COMPAT_SYSCALL_DEFINE6(pwritev2, compat_ulong_t, fd,
+ const struct compat_iovec __user *,vec,
+ compat_ulong_t, vlen, u32, pos_low, u32, pos_high, int, flags)
+{
+ loff_t pos = ((loff_t)pos_high << 32) | pos_low;
+
+ if (pos == -1)
+ return do_compat_writev(fd, vec, vlen, flags);
+
+ return do_compat_pwritev64(fd, vec, vlen, pos, flags);
}
+
#endif
static ssize_t do_sendfile(int out_fd, int in_fd, loff_t *ppos,
diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c
index 9424a4ba9..389773711 100644
--- a/fs/reiserfs/file.c
+++ b/fs/reiserfs/file.c
@@ -180,11 +180,11 @@ int reiserfs_commit_page(struct inode *inode, struct page *page,
int partial = 0;
unsigned blocksize;
struct buffer_head *bh, *head;
- unsigned long i_size_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ unsigned long i_size_index = inode->i_size >> PAGE_SHIFT;
int new;
int logit = reiserfs_file_data_log(inode);
struct super_block *s = inode->i_sb;
- int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
+ int bh_per_page = PAGE_SIZE / s->s_blocksize;
struct reiserfs_transaction_handle th;
int ret = 0;
diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c
index ae9e5b308..d5c2e9c86 100644
--- a/fs/reiserfs/inode.c
+++ b/fs/reiserfs/inode.c
@@ -386,7 +386,7 @@ static int _get_block_create_0(struct inode *inode, sector_t block,
goto finished;
}
/* read file tail into part of page */
- offset = (cpu_key_k_offset(&key) - 1) & (PAGE_CACHE_SIZE - 1);
+ offset = (cpu_key_k_offset(&key) - 1) & (PAGE_SIZE - 1);
copy_item_head(&tmp_ih, ih);
/*
@@ -587,10 +587,10 @@ static int convert_tail_for_hole(struct inode *inode,
return -EIO;
/* always try to read until the end of the block */
- tail_start = tail_offset & (PAGE_CACHE_SIZE - 1);
+ tail_start = tail_offset & (PAGE_SIZE - 1);
tail_end = (tail_start | (bh_result->b_size - 1)) + 1;
- index = tail_offset >> PAGE_CACHE_SHIFT;
+ index = tail_offset >> PAGE_SHIFT;
/*
* hole_page can be zero in case of direct_io, we are sure
* that we cannot get here if we write with O_DIRECT into tail page
@@ -629,7 +629,7 @@ static int convert_tail_for_hole(struct inode *inode,
unlock:
if (tail_page != hole_page) {
unlock_page(tail_page);
- page_cache_release(tail_page);
+ put_page(tail_page);
}
out:
return retval;
@@ -2189,11 +2189,11 @@ static int grab_tail_page(struct inode *inode,
* we want the page with the last byte in the file,
* not the page that will hold the next byte for appending
*/
- unsigned long index = (inode->i_size - 1) >> PAGE_CACHE_SHIFT;
+ unsigned long index = (inode->i_size - 1) >> PAGE_SHIFT;
unsigned long pos = 0;
unsigned long start = 0;
unsigned long blocksize = inode->i_sb->s_blocksize;
- unsigned long offset = (inode->i_size) & (PAGE_CACHE_SIZE - 1);
+ unsigned long offset = (inode->i_size) & (PAGE_SIZE - 1);
struct buffer_head *bh;
struct buffer_head *head;
struct page *page;
@@ -2251,7 +2251,7 @@ out:
unlock:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return error;
}
@@ -2265,7 +2265,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
{
struct reiserfs_transaction_handle th;
/* we want the offset for the first byte after the end of the file */
- unsigned long offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+ unsigned long offset = inode->i_size & (PAGE_SIZE - 1);
unsigned blocksize = inode->i_sb->s_blocksize;
unsigned length;
struct page *page = NULL;
@@ -2345,7 +2345,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
}
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
reiserfs_write_unlock(inode->i_sb);
@@ -2354,7 +2354,7 @@ int reiserfs_truncate_file(struct inode *inode, int update_timestamps)
out:
if (page) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
reiserfs_write_unlock(inode->i_sb);
@@ -2426,7 +2426,7 @@ research:
} else if (is_direct_le_ih(ih)) {
char *p;
p = page_address(bh_result->b_page);
- p += (byte_offset - 1) & (PAGE_CACHE_SIZE - 1);
+ p += (byte_offset - 1) & (PAGE_SIZE - 1);
copy_size = ih_item_len(ih) - pos_in_item;
fs_gen = get_generation(inode->i_sb);
@@ -2525,7 +2525,7 @@ static int reiserfs_write_full_page(struct page *page,
struct writeback_control *wbc)
{
struct inode *inode = page->mapping->host;
- unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+ unsigned long end_index = inode->i_size >> PAGE_SHIFT;
int error = 0;
unsigned long block;
sector_t last_block;
@@ -2535,7 +2535,7 @@ static int reiserfs_write_full_page(struct page *page,
int checked = PageChecked(page);
struct reiserfs_transaction_handle th;
struct super_block *s = inode->i_sb;
- int bh_per_page = PAGE_CACHE_SIZE / s->s_blocksize;
+ int bh_per_page = PAGE_SIZE / s->s_blocksize;
th.t_trans_id = 0;
/* no logging allowed when nonblocking or from PF_MEMALLOC */
@@ -2564,16 +2564,16 @@ static int reiserfs_write_full_page(struct page *page,
if (page->index >= end_index) {
unsigned last_offset;
- last_offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+ last_offset = inode->i_size & (PAGE_SIZE - 1);
/* no file contents in this page */
if (page->index >= end_index + 1 || !last_offset) {
unlock_page(page);
return 0;
}
- zero_user_segment(page, last_offset, PAGE_CACHE_SIZE);
+ zero_user_segment(page, last_offset, PAGE_SIZE);
}
bh = head;
- block = page->index << (PAGE_CACHE_SHIFT - s->s_blocksize_bits);
+ block = page->index << (PAGE_SHIFT - s->s_blocksize_bits);
last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
/* first map all the buffers, logging any direct items we find */
do {
@@ -2774,7 +2774,7 @@ static int reiserfs_write_begin(struct file *file,
*fsdata = (void *)(unsigned long)flags;
}
- index = pos >> PAGE_CACHE_SHIFT;
+ index = pos >> PAGE_SHIFT;
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
@@ -2822,7 +2822,7 @@ static int reiserfs_write_begin(struct file *file,
}
if (ret) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
/* Truncate allocated blocks */
reiserfs_truncate_failed_write(inode);
}
@@ -2909,7 +2909,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping,
else
th = NULL;
- start = pos & (PAGE_CACHE_SIZE - 1);
+ start = pos & (PAGE_SIZE - 1);
if (unlikely(copied < len)) {
if (!PageUptodate(page))
copied = 0;
@@ -2974,7 +2974,7 @@ out:
if (locked)
reiserfs_write_unlock(inode->i_sb);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (pos + len > inode->i_size)
reiserfs_truncate_failed_write(inode);
@@ -2996,7 +2996,7 @@ int reiserfs_commit_write(struct file *f, struct page *page,
unsigned from, unsigned to)
{
struct inode *inode = page->mapping->host;
- loff_t pos = ((loff_t) page->index << PAGE_CACHE_SHIFT) + to;
+ loff_t pos = ((loff_t) page->index << PAGE_SHIFT) + to;
int ret = 0;
int update_sd = 0;
struct reiserfs_transaction_handle *th = NULL;
@@ -3181,7 +3181,7 @@ static void reiserfs_invalidatepage(struct page *page, unsigned int offset,
struct inode *inode = page->mapping->host;
unsigned int curr_off = 0;
unsigned int stop = offset + length;
- int partial_page = (offset || length < PAGE_CACHE_SIZE);
+ int partial_page = (offset || length < PAGE_SIZE);
int ret = 1;
BUG_ON(!PageLocked(page));
diff --git a/fs/reiserfs/ioctl.c b/fs/reiserfs/ioctl.c
index 036a1fc0a..57045f423 100644
--- a/fs/reiserfs/ioctl.c
+++ b/fs/reiserfs/ioctl.c
@@ -203,7 +203,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
* __reiserfs_write_begin on that page. This will force a
* reiserfs_get_block to unpack the tail for us.
*/
- index = inode->i_size >> PAGE_CACHE_SHIFT;
+ index = inode->i_size >> PAGE_SHIFT;
mapping = inode->i_mapping;
page = grab_cache_page(mapping, index);
retval = -ENOMEM;
@@ -221,7 +221,7 @@ int reiserfs_unpack(struct inode *inode, struct file *filp)
out_unlock:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
out:
inode_unlock(inode);
diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c
index 44c2bdced..2ace90e98 100644
--- a/fs/reiserfs/journal.c
+++ b/fs/reiserfs/journal.c
@@ -599,18 +599,18 @@ static int journal_list_still_alive(struct super_block *s,
* This does a check to see if the buffer belongs to one of these
* lost pages before doing the final put_bh. If page->mapping was
* null, it tries to free buffers on the page, which should make the
- * final page_cache_release drop the page from the lru.
+ * final put_page drop the page from the lru.
*/
static void release_buffer_page(struct buffer_head *bh)
{
struct page *page = bh->b_page;
if (!page->mapping && trylock_page(page)) {
- page_cache_get(page);
+ get_page(page);
put_bh(bh);
if (!page->mapping)
try_to_free_buffers(page);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
} else {
put_bh(bh);
}
diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c
index 24cbe0132..5feacd689 100644
--- a/fs/reiserfs/stree.c
+++ b/fs/reiserfs/stree.c
@@ -1342,7 +1342,7 @@ int reiserfs_delete_item(struct reiserfs_transaction_handle *th,
*/
data = kmap_atomic(un_bh->b_page);
- off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_CACHE_SIZE - 1));
+ off = ((le_ih_k_offset(&s_ih) - 1) & (PAGE_SIZE - 1));
memcpy(data + off,
ih_item_body(PATH_PLAST_BUFFER(path), &s_ih),
ret_value);
@@ -1511,7 +1511,7 @@ static void unmap_buffers(struct page *page, loff_t pos)
if (page) {
if (page_has_buffers(page)) {
- tail_index = pos & (PAGE_CACHE_SIZE - 1);
+ tail_index = pos & (PAGE_SIZE - 1);
cur_index = 0;
head = page_buffers(page);
bh = head;
diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c
index c0306ec8e..b8f2d1e8c 100644
--- a/fs/reiserfs/super.c
+++ b/fs/reiserfs/super.c
@@ -802,6 +802,7 @@ static const struct dquot_operations reiserfs_quota_operations = {
.write_info = reiserfs_write_info,
.alloc_dquot = dquot_alloc,
.destroy_dquot = dquot_destroy,
+ .get_next_id = dquot_get_next_id,
};
static const struct quotactl_ops reiserfs_qctl_operations = {
diff --git a/fs/reiserfs/tail_conversion.c b/fs/reiserfs/tail_conversion.c
index f41e19b4b..2d5489b0a 100644
--- a/fs/reiserfs/tail_conversion.c
+++ b/fs/reiserfs/tail_conversion.c
@@ -151,7 +151,7 @@ int direct2indirect(struct reiserfs_transaction_handle *th, struct inode *inode,
*/
if (up_to_date_bh) {
unsigned pgoff =
- (tail_offset + total_tail - 1) & (PAGE_CACHE_SIZE - 1);
+ (tail_offset + total_tail - 1) & (PAGE_SIZE - 1);
char *kaddr = kmap_atomic(up_to_date_bh->b_page);
memset(kaddr + pgoff, 0, blk_size - total_tail);
kunmap_atomic(kaddr);
@@ -271,7 +271,7 @@ int indirect2direct(struct reiserfs_transaction_handle *th,
* the page was locked and this part of the page was up to date when
* indirect2direct was called, so we know the bytes are still valid
*/
- tail = tail + (pos & (PAGE_CACHE_SIZE - 1));
+ tail = tail + (pos & (PAGE_SIZE - 1));
PATH_LAST_POSITION(path)++;
diff --git a/fs/reiserfs/xattr.c b/fs/reiserfs/xattr.c
index 57e0b2310..28f5f8b11 100644
--- a/fs/reiserfs/xattr.c
+++ b/fs/reiserfs/xattr.c
@@ -415,7 +415,7 @@ out:
static inline void reiserfs_put_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
static struct page *reiserfs_get_page(struct inode *dir, size_t n)
@@ -427,7 +427,7 @@ static struct page *reiserfs_get_page(struct inode *dir, size_t n)
* and an unlink/rmdir has just occurred - GFP_NOFS avoids this
*/
mapping_set_gfp_mask(mapping, GFP_NOFS);
- page = read_mapping_page(mapping, n >> PAGE_CACHE_SHIFT, NULL);
+ page = read_mapping_page(mapping, n >> PAGE_SHIFT, NULL);
if (!IS_ERR(page)) {
kmap(page);
if (PageError(page))
@@ -526,10 +526,10 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
while (buffer_pos < buffer_size || buffer_pos == 0) {
size_t chunk;
size_t skip = 0;
- size_t page_offset = (file_pos & (PAGE_CACHE_SIZE - 1));
+ size_t page_offset = (file_pos & (PAGE_SIZE - 1));
- if (buffer_size - buffer_pos > PAGE_CACHE_SIZE)
- chunk = PAGE_CACHE_SIZE;
+ if (buffer_size - buffer_pos > PAGE_SIZE)
+ chunk = PAGE_SIZE;
else
chunk = buffer_size - buffer_pos;
@@ -546,8 +546,8 @@ reiserfs_xattr_set_handle(struct reiserfs_transaction_handle *th,
struct reiserfs_xattr_header *rxh;
skip = file_pos = sizeof(struct reiserfs_xattr_header);
- if (chunk + skip > PAGE_CACHE_SIZE)
- chunk = PAGE_CACHE_SIZE - skip;
+ if (chunk + skip > PAGE_SIZE)
+ chunk = PAGE_SIZE - skip;
rxh = (struct reiserfs_xattr_header *)data;
rxh->h_magic = cpu_to_le32(REISERFS_XATTR_MAGIC);
rxh->h_hash = cpu_to_le32(xahash);
@@ -675,8 +675,8 @@ reiserfs_xattr_get(struct inode *inode, const char *name, void *buffer,
char *data;
size_t skip = 0;
- if (isize - file_pos > PAGE_CACHE_SIZE)
- chunk = PAGE_CACHE_SIZE;
+ if (isize - file_pos > PAGE_SIZE)
+ chunk = PAGE_SIZE;
else
chunk = isize - file_pos;
diff --git a/fs/select.c b/fs/select.c
index 79d0d4953..869293988 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -70,9 +70,9 @@ static long __estimate_accuracy(struct timespec *tv)
return slack;
}
-long select_estimate_accuracy(struct timespec *tv)
+u64 select_estimate_accuracy(struct timespec *tv)
{
- unsigned long ret;
+ u64 ret;
struct timespec now;
/*
@@ -402,7 +402,7 @@ int do_select(int n, fd_set_bits *fds, struct timespec *end_time)
struct poll_wqueues table;
poll_table *wait;
int retval, i, timed_out = 0;
- unsigned long slack = 0;
+ u64 slack = 0;
unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
unsigned long busy_end = 0;
@@ -784,7 +784,7 @@ static int do_poll(struct poll_list *list, struct poll_wqueues *wait,
poll_table* pt = &wait->pt;
ktime_t expire, *to = NULL;
int timed_out = 0, count = 0;
- unsigned long slack = 0;
+ u64 slack = 0;
unsigned int busy_flag = net_busy_loop_on() ? POLL_BUSY_LOOP : 0;
unsigned long busy_end = 0;
diff --git a/fs/seq_file.c b/fs/seq_file.c
index e85664b7c..19f532e7d 100644
--- a/fs/seq_file.c
+++ b/fs/seq_file.c
@@ -72,9 +72,10 @@ int seq_open(struct file *file, const struct seq_operations *op)
mutex_init(&p->lock);
p->op = op;
-#ifdef CONFIG_USER_NS
- p->user_ns = file->f_cred->user_ns;
-#endif
+
+ // No refcounting: the lifetime of 'p' is constrained
+ // to the lifetime of the file.
+ p->file = file;
/*
* Wrappers around seq_open(e.g. swaps_open) need to be
diff --git a/fs/splice.c b/fs/splice.c
index 3d8407c0d..0606690f9 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -88,7 +88,7 @@ out_unlock:
static void page_cache_pipe_buf_release(struct pipe_inode_info *pipe,
struct pipe_buffer *buf)
{
- page_cache_release(buf->page);
+ put_page(buf->page);
buf->flags &= ~PIPE_BUF_FLAG_LRU;
}
@@ -268,7 +268,7 @@ EXPORT_SYMBOL_GPL(splice_to_pipe);
void spd_release_page(struct splice_pipe_desc *spd, unsigned int i)
{
- page_cache_release(spd->pages[i]);
+ put_page(spd->pages[i]);
}
/*
@@ -328,9 +328,9 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
if (splice_grow_spd(pipe, &spd))
return -ENOMEM;
- index = *ppos >> PAGE_CACHE_SHIFT;
- loff = *ppos & ~PAGE_CACHE_MASK;
- req_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ index = *ppos >> PAGE_SHIFT;
+ loff = *ppos & ~PAGE_MASK;
+ req_pages = (len + loff + PAGE_SIZE - 1) >> PAGE_SHIFT;
nr_pages = min(req_pages, spd.nr_pages_max);
/*
@@ -365,7 +365,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
error = add_to_page_cache_lru(page, mapping, index,
mapping_gfp_constraint(mapping, GFP_KERNEL));
if (unlikely(error)) {
- page_cache_release(page);
+ put_page(page);
if (error == -EEXIST)
continue;
break;
@@ -385,7 +385,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
* Now loop over the map and see if we need to start IO on any
* pages, fill in the partial map, etc.
*/
- index = *ppos >> PAGE_CACHE_SHIFT;
+ index = *ppos >> PAGE_SHIFT;
nr_pages = spd.nr_pages;
spd.nr_pages = 0;
for (page_nr = 0; page_nr < nr_pages; page_nr++) {
@@ -397,7 +397,7 @@ __generic_file_splice_read(struct file *in, loff_t *ppos,
/*
* this_len is the max we'll use from this page
*/
- this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
+ this_len = min_t(unsigned long, len, PAGE_SIZE - loff);
page = spd.pages[page_nr];
if (PageReadahead(page))
@@ -426,7 +426,7 @@ retry_lookup:
error = -ENOMEM;
break;
}
- page_cache_release(spd.pages[page_nr]);
+ put_page(spd.pages[page_nr]);
spd.pages[page_nr] = page;
}
/*
@@ -456,7 +456,7 @@ fill_it:
* i_size must be checked after PageUptodate.
*/
isize = i_size_read(mapping->host);
- end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+ end_index = (isize - 1) >> PAGE_SHIFT;
if (unlikely(!isize || index > end_index))
break;
@@ -470,7 +470,7 @@ fill_it:
/*
* max good bytes in this page
*/
- plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+ plen = ((isize - 1) & ~PAGE_MASK) + 1;
if (plen <= loff)
break;
@@ -494,8 +494,8 @@ fill_it:
* we got, 'nr_pages' is how many pages are in the map.
*/
while (page_nr < nr_pages)
- page_cache_release(spd.pages[page_nr++]);
- in->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
+ put_page(spd.pages[page_nr++]);
+ in->f_ra.prev_pos = (loff_t)index << PAGE_SHIFT;
if (spd.nr_pages)
error = splice_to_pipe(pipe, &spd);
@@ -580,7 +580,7 @@ static ssize_t kernel_readv(struct file *file, const struct iovec *vec,
old_fs = get_fs();
set_fs(get_ds());
/* The cast to a user pointer is valid due to the set_fs() */
- res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos);
+ res = vfs_readv(file, (const struct iovec __user *)vec, vlen, &pos, 0);
set_fs(old_fs);
return res;
@@ -636,8 +636,8 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
goto shrink_ret;
}
- offset = *ppos & ~PAGE_CACHE_MASK;
- nr_pages = (len + offset + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ offset = *ppos & ~PAGE_MASK;
+ nr_pages = (len + offset + PAGE_SIZE - 1) >> PAGE_SHIFT;
for (i = 0; i < nr_pages && i < spd.nr_pages_max && len; i++) {
struct page *page;
@@ -647,7 +647,7 @@ ssize_t default_file_splice_read(struct file *in, loff_t *ppos,
if (!page)
goto err;
- this_len = min_t(size_t, len, PAGE_CACHE_SIZE - offset);
+ this_len = min_t(size_t, len, PAGE_SIZE - offset);
vec[i].iov_base = (void __user *) page_address(page);
vec[i].iov_len = this_len;
spd.pages[i] = page;
@@ -1144,6 +1144,9 @@ long do_splice_to(struct file *in, loff_t *ppos,
if (unlikely(ret < 0))
return ret;
+ if (unlikely(len > MAX_RW_COUNT))
+ len = MAX_RW_COUNT;
+
if (in->f_op->splice_read)
splice_read = in->f_op->splice_read;
else
diff --git a/fs/squashfs/block.c b/fs/squashfs/block.c
index 0cea9b923..2c2618410 100644
--- a/fs/squashfs/block.c
+++ b/fs/squashfs/block.c
@@ -181,11 +181,11 @@ int squashfs_read_data(struct super_block *sb, u64 index, int length,
in = min(bytes, msblk->devblksize - offset);
bytes -= in;
while (in) {
- if (pg_offset == PAGE_CACHE_SIZE) {
+ if (pg_offset == PAGE_SIZE) {
data = squashfs_next_page(output);
pg_offset = 0;
}
- avail = min_t(int, in, PAGE_CACHE_SIZE -
+ avail = min_t(int, in, PAGE_SIZE -
pg_offset);
memcpy(data + pg_offset, bh[k]->b_data + offset,
avail);
diff --git a/fs/squashfs/cache.c b/fs/squashfs/cache.c
index 1cb70a0b2..23813c078 100644
--- a/fs/squashfs/cache.c
+++ b/fs/squashfs/cache.c
@@ -30,7 +30,7 @@
* access the metadata and fragment caches.
*
* To avoid out of memory and fragmentation issues with vmalloc the cache
- * uses sequences of kmalloced PAGE_CACHE_SIZE buffers.
+ * uses sequences of kmalloced PAGE_SIZE buffers.
*
* It should be noted that the cache is not used for file datablocks, these
* are decompressed and cached in the page-cache in the normal way. The
@@ -231,7 +231,7 @@ void squashfs_cache_delete(struct squashfs_cache *cache)
/*
* Initialise cache allocating the specified number of entries, each of
* size block_size. To avoid vmalloc fragmentation issues each entry
- * is allocated as a sequence of kmalloced PAGE_CACHE_SIZE buffers.
+ * is allocated as a sequence of kmalloced PAGE_SIZE buffers.
*/
struct squashfs_cache *squashfs_cache_init(char *name, int entries,
int block_size)
@@ -255,7 +255,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
cache->unused = entries;
cache->entries = entries;
cache->block_size = block_size;
- cache->pages = block_size >> PAGE_CACHE_SHIFT;
+ cache->pages = block_size >> PAGE_SHIFT;
cache->pages = cache->pages ? cache->pages : 1;
cache->name = name;
cache->num_waiters = 0;
@@ -275,7 +275,7 @@ struct squashfs_cache *squashfs_cache_init(char *name, int entries,
}
for (j = 0; j < cache->pages; j++) {
- entry->data[j] = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ entry->data[j] = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (entry->data[j] == NULL) {
ERROR("Failed to allocate %s buffer\n", name);
goto cleanup;
@@ -314,10 +314,10 @@ int squashfs_copy_data(void *buffer, struct squashfs_cache_entry *entry,
return min(length, entry->length - offset);
while (offset < entry->length) {
- void *buff = entry->data[offset / PAGE_CACHE_SIZE]
- + (offset % PAGE_CACHE_SIZE);
+ void *buff = entry->data[offset / PAGE_SIZE]
+ + (offset % PAGE_SIZE);
int bytes = min_t(int, entry->length - offset,
- PAGE_CACHE_SIZE - (offset % PAGE_CACHE_SIZE));
+ PAGE_SIZE - (offset % PAGE_SIZE));
if (bytes >= remaining) {
memcpy(buffer, buff, remaining);
@@ -415,7 +415,7 @@ struct squashfs_cache_entry *squashfs_get_datablock(struct super_block *sb,
*/
void *squashfs_read_table(struct super_block *sb, u64 block, int length)
{
- int pages = (length + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+ int pages = (length + PAGE_SIZE - 1) >> PAGE_SHIFT;
int i, res;
void *table, *buffer, **data;
struct squashfs_page_actor *actor;
@@ -436,7 +436,7 @@ void *squashfs_read_table(struct super_block *sb, u64 block, int length)
goto failed2;
}
- for (i = 0; i < pages; i++, buffer += PAGE_CACHE_SIZE)
+ for (i = 0; i < pages; i++, buffer += PAGE_SIZE)
data[i] = buffer;
res = squashfs_read_data(sb, block, length |
diff --git a/fs/squashfs/decompressor.c b/fs/squashfs/decompressor.c
index e9034bf6e..d2bc13636 100644
--- a/fs/squashfs/decompressor.c
+++ b/fs/squashfs/decompressor.c
@@ -102,7 +102,7 @@ static void *get_comp_opts(struct super_block *sb, unsigned short flags)
* Read decompressor specific options from file system if present
*/
if (SQUASHFS_COMP_OPTS(flags)) {
- buffer = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ buffer = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (buffer == NULL) {
comp_opts = ERR_PTR(-ENOMEM);
goto out;
diff --git a/fs/squashfs/file.c b/fs/squashfs/file.c
index e5c968906..13d80947b 100644
--- a/fs/squashfs/file.c
+++ b/fs/squashfs/file.c
@@ -175,7 +175,7 @@ static long long read_indexes(struct super_block *sb, int n,
{
int err, i;
long long block = 0;
- __le32 *blist = kmalloc(PAGE_CACHE_SIZE, GFP_KERNEL);
+ __le32 *blist = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (blist == NULL) {
ERROR("read_indexes: Failed to allocate block_list\n");
@@ -183,7 +183,7 @@ static long long read_indexes(struct super_block *sb, int n,
}
while (n) {
- int blocks = min_t(int, n, PAGE_CACHE_SIZE >> 2);
+ int blocks = min_t(int, n, PAGE_SIZE >> 2);
err = squashfs_read_metadata(sb, blist, start_block,
offset, blocks << 2);
@@ -377,19 +377,19 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
struct inode *inode = page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
void *pageaddr;
- int i, mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ int i, mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
int start_index = page->index & ~mask, end_index = start_index | mask;
/*
* Loop copying datablock into pages. As the datablock likely covers
- * many PAGE_CACHE_SIZE pages (default block size is 128 KiB) explicitly
+ * many PAGE_SIZE pages (default block size is 128 KiB) explicitly
* grab the pages from the page cache, except for the page that we've
* been called to fill.
*/
for (i = start_index; i <= end_index && bytes > 0; i++,
- bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
+ bytes -= PAGE_SIZE, offset += PAGE_SIZE) {
struct page *push_page;
- int avail = buffer ? min_t(int, bytes, PAGE_CACHE_SIZE) : 0;
+ int avail = buffer ? min_t(int, bytes, PAGE_SIZE) : 0;
TRACE("bytes %d, i %d, available_bytes %d\n", bytes, i, avail);
@@ -404,14 +404,14 @@ void squashfs_copy_cache(struct page *page, struct squashfs_cache_entry *buffer,
pageaddr = kmap_atomic(push_page);
squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+ memset(pageaddr + avail, 0, PAGE_SIZE - avail);
kunmap_atomic(pageaddr);
flush_dcache_page(push_page);
SetPageUptodate(push_page);
skip_page:
unlock_page(push_page);
if (i != page->index)
- page_cache_release(push_page);
+ put_page(push_page);
}
}
@@ -454,7 +454,7 @@ static int squashfs_readpage(struct file *file, struct page *page)
{
struct inode *inode = page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int index = page->index >> (msblk->block_log - PAGE_CACHE_SHIFT);
+ int index = page->index >> (msblk->block_log - PAGE_SHIFT);
int file_end = i_size_read(inode) >> msblk->block_log;
int res;
void *pageaddr;
@@ -462,8 +462,8 @@ static int squashfs_readpage(struct file *file, struct page *page)
TRACE("Entered squashfs_readpage, page index %lx, start block %llx\n",
page->index, squashfs_i(inode)->start);
- if (page->index >= ((i_size_read(inode) + PAGE_CACHE_SIZE - 1) >>
- PAGE_CACHE_SHIFT))
+ if (page->index >= ((i_size_read(inode) + PAGE_SIZE - 1) >>
+ PAGE_SHIFT))
goto out;
if (index < file_end || squashfs_i(inode)->fragment_block ==
@@ -487,7 +487,7 @@ error_out:
SetPageError(page);
out:
pageaddr = kmap_atomic(page);
- memset(pageaddr, 0, PAGE_CACHE_SIZE);
+ memset(pageaddr, 0, PAGE_SIZE);
kunmap_atomic(pageaddr);
flush_dcache_page(page);
if (!PageError(page))
diff --git a/fs/squashfs/file_direct.c b/fs/squashfs/file_direct.c
index 43e7a7edd..cb485d8e0 100644
--- a/fs/squashfs/file_direct.c
+++ b/fs/squashfs/file_direct.c
@@ -30,8 +30,8 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
struct inode *inode = target_page->mapping->host;
struct squashfs_sb_info *msblk = inode->i_sb->s_fs_info;
- int file_end = (i_size_read(inode) - 1) >> PAGE_CACHE_SHIFT;
- int mask = (1 << (msblk->block_log - PAGE_CACHE_SHIFT)) - 1;
+ int file_end = (i_size_read(inode) - 1) >> PAGE_SHIFT;
+ int mask = (1 << (msblk->block_log - PAGE_SHIFT)) - 1;
int start_index = target_page->index & ~mask;
int end_index = start_index | mask;
int i, n, pages, missing_pages, bytes, res = -ENOMEM;
@@ -68,7 +68,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
if (PageUptodate(page[i])) {
unlock_page(page[i]);
- page_cache_release(page[i]);
+ put_page(page[i]);
page[i] = NULL;
missing_pages++;
}
@@ -96,10 +96,10 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
goto mark_errored;
/* Last page may have trailing bytes not filled */
- bytes = res % PAGE_CACHE_SIZE;
+ bytes = res % PAGE_SIZE;
if (bytes) {
pageaddr = kmap_atomic(page[pages - 1]);
- memset(pageaddr + bytes, 0, PAGE_CACHE_SIZE - bytes);
+ memset(pageaddr + bytes, 0, PAGE_SIZE - bytes);
kunmap_atomic(pageaddr);
}
@@ -109,7 +109,7 @@ int squashfs_readpage_block(struct page *target_page, u64 block, int bsize)
SetPageUptodate(page[i]);
unlock_page(page[i]);
if (page[i] != target_page)
- page_cache_release(page[i]);
+ put_page(page[i]);
}
kfree(actor);
@@ -127,7 +127,7 @@ mark_errored:
flush_dcache_page(page[i]);
SetPageError(page[i]);
unlock_page(page[i]);
- page_cache_release(page[i]);
+ put_page(page[i]);
}
out:
@@ -153,21 +153,21 @@ static int squashfs_read_cache(struct page *target_page, u64 block, int bsize,
}
for (n = 0; n < pages && bytes > 0; n++,
- bytes -= PAGE_CACHE_SIZE, offset += PAGE_CACHE_SIZE) {
- int avail = min_t(int, bytes, PAGE_CACHE_SIZE);
+ bytes -= PAGE_SIZE, offset += PAGE_SIZE) {
+ int avail = min_t(int, bytes, PAGE_SIZE);
if (page[n] == NULL)
continue;
pageaddr = kmap_atomic(page[n]);
squashfs_copy_data(pageaddr, buffer, offset, avail);
- memset(pageaddr + avail, 0, PAGE_CACHE_SIZE - avail);
+ memset(pageaddr + avail, 0, PAGE_SIZE - avail);
kunmap_atomic(pageaddr);
flush_dcache_page(page[n]);
SetPageUptodate(page[n]);
unlock_page(page[n]);
if (page[n] != target_page)
- page_cache_release(page[n]);
+ put_page(page[n]);
}
out:
diff --git a/fs/squashfs/lz4_wrapper.c b/fs/squashfs/lz4_wrapper.c
index c31e2bc9c..ff4468bd1 100644
--- a/fs/squashfs/lz4_wrapper.c
+++ b/fs/squashfs/lz4_wrapper.c
@@ -117,13 +117,13 @@ static int lz4_uncompress(struct squashfs_sb_info *msblk, void *strm,
data = squashfs_first_page(output);
buff = stream->output;
while (data) {
- if (bytes <= PAGE_CACHE_SIZE) {
+ if (bytes <= PAGE_SIZE) {
memcpy(data, buff, bytes);
break;
}
- memcpy(data, buff, PAGE_CACHE_SIZE);
- buff += PAGE_CACHE_SIZE;
- bytes -= PAGE_CACHE_SIZE;
+ memcpy(data, buff, PAGE_SIZE);
+ buff += PAGE_SIZE;
+ bytes -= PAGE_SIZE;
data = squashfs_next_page(output);
}
squashfs_finish_page(output);
diff --git a/fs/squashfs/lzo_wrapper.c b/fs/squashfs/lzo_wrapper.c
index 244b9fbff..934c17e96 100644
--- a/fs/squashfs/lzo_wrapper.c
+++ b/fs/squashfs/lzo_wrapper.c
@@ -102,13 +102,13 @@ static int lzo_uncompress(struct squashfs_sb_info *msblk, void *strm,
data = squashfs_first_page(output);
buff = stream->output;
while (data) {
- if (bytes <= PAGE_CACHE_SIZE) {
+ if (bytes <= PAGE_SIZE) {
memcpy(data, buff, bytes);
break;
} else {
- memcpy(data, buff, PAGE_CACHE_SIZE);
- buff += PAGE_CACHE_SIZE;
- bytes -= PAGE_CACHE_SIZE;
+ memcpy(data, buff, PAGE_SIZE);
+ buff += PAGE_SIZE;
+ bytes -= PAGE_SIZE;
data = squashfs_next_page(output);
}
}
diff --git a/fs/squashfs/page_actor.c b/fs/squashfs/page_actor.c
index 5a1c11f56..9b7b1b6a7 100644
--- a/fs/squashfs/page_actor.c
+++ b/fs/squashfs/page_actor.c
@@ -48,7 +48,7 @@ struct squashfs_page_actor *squashfs_page_actor_init(void **buffer,
if (actor == NULL)
return NULL;
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->length = length ? : pages * PAGE_SIZE;
actor->buffer = buffer;
actor->pages = pages;
actor->next_page = 0;
@@ -88,7 +88,7 @@ struct squashfs_page_actor *squashfs_page_actor_init_special(struct page **page,
if (actor == NULL)
return NULL;
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->length = length ? : pages * PAGE_SIZE;
actor->page = page;
actor->pages = pages;
actor->next_page = 0;
diff --git a/fs/squashfs/page_actor.h b/fs/squashfs/page_actor.h
index 26dd82008..98537eab2 100644
--- a/fs/squashfs/page_actor.h
+++ b/fs/squashfs/page_actor.h
@@ -24,7 +24,7 @@ static inline struct squashfs_page_actor *squashfs_page_actor_init(void **page,
if (actor == NULL)
return NULL;
- actor->length = length ? : pages * PAGE_CACHE_SIZE;
+ actor->length = length ? : pages * PAGE_SIZE;
actor->page = page;
actor->pages = pages;
actor->next_page = 0;
diff --git a/fs/squashfs/super.c b/fs/squashfs/super.c
index 5e79bfa4f..cf01e15a7 100644
--- a/fs/squashfs/super.c
+++ b/fs/squashfs/super.c
@@ -152,7 +152,7 @@ static int squashfs_fill_super(struct super_block *sb, void *data, int silent)
* Check the system page size is not larger than the filesystem
* block size (by default 128K). This is currently not supported.
*/
- if (PAGE_CACHE_SIZE > msblk->block_size) {
+ if (PAGE_SIZE > msblk->block_size) {
ERROR("Page size > filesystem block size (%d). This is "
"currently not supported!\n", msblk->block_size);
goto failed_mount;
diff --git a/fs/squashfs/symlink.c b/fs/squashfs/symlink.c
index dbcc2f54b..d688ef42a 100644
--- a/fs/squashfs/symlink.c
+++ b/fs/squashfs/symlink.c
@@ -48,10 +48,10 @@ static int squashfs_symlink_readpage(struct file *file, struct page *page)
struct inode *inode = page->mapping->host;
struct super_block *sb = inode->i_sb;
struct squashfs_sb_info *msblk = sb->s_fs_info;
- int index = page->index << PAGE_CACHE_SHIFT;
+ int index = page->index << PAGE_SHIFT;
u64 block = squashfs_i(inode)->start;
int offset = squashfs_i(inode)->offset;
- int length = min_t(int, i_size_read(inode) - index, PAGE_CACHE_SIZE);
+ int length = min_t(int, i_size_read(inode) - index, PAGE_SIZE);
int bytes, copied;
void *pageaddr;
struct squashfs_cache_entry *entry;
@@ -94,7 +94,7 @@ static int squashfs_symlink_readpage(struct file *file, struct page *page)
copied = squashfs_copy_data(pageaddr + bytes, entry, offset,
length - bytes);
if (copied == length - bytes)
- memset(pageaddr + length, 0, PAGE_CACHE_SIZE - length);
+ memset(pageaddr + length, 0, PAGE_SIZE - length);
else
block = entry->next_index;
kunmap_atomic(pageaddr);
diff --git a/fs/squashfs/xz_wrapper.c b/fs/squashfs/xz_wrapper.c
index c609624e4..6bfaef73d 100644
--- a/fs/squashfs/xz_wrapper.c
+++ b/fs/squashfs/xz_wrapper.c
@@ -141,7 +141,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
stream->buf.in_pos = 0;
stream->buf.in_size = 0;
stream->buf.out_pos = 0;
- stream->buf.out_size = PAGE_CACHE_SIZE;
+ stream->buf.out_size = PAGE_SIZE;
stream->buf.out = squashfs_first_page(output);
do {
@@ -158,7 +158,7 @@ static int squashfs_xz_uncompress(struct squashfs_sb_info *msblk, void *strm,
stream->buf.out = squashfs_next_page(output);
if (stream->buf.out != NULL) {
stream->buf.out_pos = 0;
- total += PAGE_CACHE_SIZE;
+ total += PAGE_SIZE;
}
}
diff --git a/fs/squashfs/zlib_wrapper.c b/fs/squashfs/zlib_wrapper.c
index 8727caba6..2ec24d128 100644
--- a/fs/squashfs/zlib_wrapper.c
+++ b/fs/squashfs/zlib_wrapper.c
@@ -69,7 +69,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
int zlib_err, zlib_init = 0, k = 0;
z_stream *stream = strm;
- stream->avail_out = PAGE_CACHE_SIZE;
+ stream->avail_out = PAGE_SIZE;
stream->next_out = squashfs_first_page(output);
stream->avail_in = 0;
@@ -85,7 +85,7 @@ static int zlib_uncompress(struct squashfs_sb_info *msblk, void *strm,
if (stream->avail_out == 0) {
stream->next_out = squashfs_next_page(output);
if (stream->next_out != NULL)
- stream->avail_out = PAGE_CACHE_SIZE;
+ stream->avail_out = PAGE_SIZE;
}
if (!zlib_init) {
diff --git a/fs/sync.c b/fs/sync.c
index dd5d1711c..2a54c1f22 100644
--- a/fs/sync.c
+++ b/fs/sync.c
@@ -302,7 +302,7 @@ SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
goto out;
if (sizeof(pgoff_t) == 4) {
- if (offset >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
+ if (offset >= (0x100000000ULL << PAGE_SHIFT)) {
/*
* The range starts outside a 32 bit machine's
* pagecache addressing capabilities. Let it "succeed"
@@ -310,7 +310,7 @@ SYSCALL_DEFINE4(sync_file_range, int, fd, loff_t, offset, loff_t, nbytes,
ret = 0;
goto out;
}
- if (endbyte >= (0x100000000ULL << PAGE_CACHE_SHIFT)) {
+ if (endbyte >= (0x100000000ULL << PAGE_SHIFT)) {
/*
* Out to EOF
*/
diff --git a/fs/sysv/dir.c b/fs/sysv/dir.c
index 63c1bcb22..c0f0a3e64 100644
--- a/fs/sysv/dir.c
+++ b/fs/sysv/dir.c
@@ -30,7 +30,7 @@ const struct file_operations sysv_dir_operations = {
static inline void dir_put_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
static int dir_commit_chunk(struct page *page, loff_t pos, unsigned len)
@@ -73,8 +73,8 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
if (pos >= inode->i_size)
return 0;
- offset = pos & ~PAGE_CACHE_MASK;
- n = pos >> PAGE_CACHE_SHIFT;
+ offset = pos & ~PAGE_MASK;
+ n = pos >> PAGE_SHIFT;
for ( ; n < npages; n++, offset = 0) {
char *kaddr, *limit;
@@ -85,7 +85,7 @@ static int sysv_readdir(struct file *file, struct dir_context *ctx)
continue;
kaddr = (char *)page_address(page);
de = (struct sysv_dir_entry *)(kaddr+offset);
- limit = kaddr + PAGE_CACHE_SIZE - SYSV_DIRSIZE;
+ limit = kaddr + PAGE_SIZE - SYSV_DIRSIZE;
for ( ;(char*)de <= limit; de++, ctx->pos += sizeof(*de)) {
char *name = de->name;
@@ -146,7 +146,7 @@ struct sysv_dir_entry *sysv_find_entry(struct dentry *dentry, struct page **res_
if (!IS_ERR(page)) {
kaddr = (char*)page_address(page);
de = (struct sysv_dir_entry *) kaddr;
- kaddr += PAGE_CACHE_SIZE - SYSV_DIRSIZE;
+ kaddr += PAGE_SIZE - SYSV_DIRSIZE;
for ( ; (char *) de <= kaddr ; de++) {
if (!de->inode)
continue;
@@ -190,7 +190,7 @@ int sysv_add_link(struct dentry *dentry, struct inode *inode)
goto out;
kaddr = (char*)page_address(page);
de = (struct sysv_dir_entry *)kaddr;
- kaddr += PAGE_CACHE_SIZE - SYSV_DIRSIZE;
+ kaddr += PAGE_SIZE - SYSV_DIRSIZE;
while ((char *)de <= kaddr) {
if (!de->inode)
goto got_it;
@@ -261,7 +261,7 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
kmap(page);
base = (char*)page_address(page);
- memset(base, 0, PAGE_CACHE_SIZE);
+ memset(base, 0, PAGE_SIZE);
de = (struct sysv_dir_entry *) base;
de->inode = cpu_to_fs16(SYSV_SB(inode->i_sb), inode->i_ino);
@@ -273,7 +273,7 @@ int sysv_make_empty(struct inode *inode, struct inode *dir)
kunmap(page);
err = dir_commit_chunk(page, 0, 2 * SYSV_DIRSIZE);
fail:
- page_cache_release(page);
+ put_page(page);
return err;
}
@@ -296,7 +296,7 @@ int sysv_empty_dir(struct inode * inode)
kaddr = (char *)page_address(page);
de = (struct sysv_dir_entry *)kaddr;
- kaddr += PAGE_CACHE_SIZE-SYSV_DIRSIZE;
+ kaddr += PAGE_SIZE-SYSV_DIRSIZE;
for ( ;(char *)de <= kaddr; de++) {
if (!de->inode)
diff --git a/fs/sysv/namei.c b/fs/sysv/namei.c
index 11e83ed0b..90b60c03b 100644
--- a/fs/sysv/namei.c
+++ b/fs/sysv/namei.c
@@ -264,11 +264,11 @@ static int sysv_rename(struct inode * old_dir, struct dentry * old_dentry,
out_dir:
if (dir_de) {
kunmap(dir_page);
- page_cache_release(dir_page);
+ put_page(dir_page);
}
out_old:
kunmap(old_page);
- page_cache_release(old_page);
+ put_page(old_page);
out:
return err;
}
diff --git a/fs/ubifs/Makefile b/fs/ubifs/Makefile
index 2c6f0cb81..c54a24360 100644
--- a/fs/ubifs/Makefile
+++ b/fs/ubifs/Makefile
@@ -4,3 +4,4 @@ ubifs-y += shrinker.o journal.o file.o dir.o super.o sb.o io.o
ubifs-y += tnc.o master.o scan.o replay.o log.o commit.o gc.o orphan.o
ubifs-y += budget.o find.o tnc_commit.o compress.o lpt.o lprops.o
ubifs-y += recovery.o ioctl.o lpt_commit.o tnc_misc.o xattr.o debug.o
+ubifs-y += misc.o
diff --git a/fs/ubifs/file.c b/fs/ubifs/file.c
index 065c88f8e..446753d8a 100644
--- a/fs/ubifs/file.c
+++ b/fs/ubifs/file.c
@@ -121,7 +121,7 @@ static int do_readpage(struct page *page)
if (block >= beyond) {
/* Reading beyond inode */
SetPageChecked(page);
- memset(addr, 0, PAGE_CACHE_SIZE);
+ memset(addr, 0, PAGE_SIZE);
goto out;
}
@@ -223,7 +223,7 @@ static int write_begin_slow(struct address_space *mapping,
{
struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
struct ubifs_budget_req req = { .new_page = 1 };
int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
struct page *page;
@@ -254,13 +254,13 @@ static int write_begin_slow(struct address_space *mapping,
}
if (!PageUptodate(page)) {
- if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE)
+ if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE)
SetPageChecked(page);
else {
err = do_readpage(page);
if (err) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
ubifs_release_budget(c, &req);
return err;
}
@@ -428,7 +428,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
struct inode *inode = mapping->host;
struct ubifs_info *c = inode->i_sb->s_fs_info;
struct ubifs_inode *ui = ubifs_inode(inode);
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
int uninitialized_var(err), appending = !!(pos + len > inode->i_size);
int skipped_read = 0;
struct page *page;
@@ -446,7 +446,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
if (!PageUptodate(page)) {
/* The page is not loaded from the flash */
- if (!(pos & ~PAGE_CACHE_MASK) && len == PAGE_CACHE_SIZE) {
+ if (!(pos & ~PAGE_MASK) && len == PAGE_SIZE) {
/*
* We change whole page so no need to load it. But we
* do not know whether this page exists on the media or
@@ -462,7 +462,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
err = do_readpage(page);
if (err) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return err;
}
}
@@ -494,7 +494,7 @@ static int ubifs_write_begin(struct file *file, struct address_space *mapping,
mutex_unlock(&ui->ui_mutex);
}
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return write_begin_slow(mapping, pos, len, pagep, flags);
}
@@ -549,12 +549,12 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
dbg_gen("ino %lu, pos %llu, pg %lu, len %u, copied %d, i_size %lld",
inode->i_ino, pos, page->index, len, copied, inode->i_size);
- if (unlikely(copied < len && len == PAGE_CACHE_SIZE)) {
+ if (unlikely(copied < len && len == PAGE_SIZE)) {
/*
* VFS copied less data to the page that it intended and
* declared in its '->write_begin()' call via the @len
* argument. If the page was not up-to-date, and @len was
- * @PAGE_CACHE_SIZE, the 'ubifs_write_begin()' function did
+ * @PAGE_SIZE, the 'ubifs_write_begin()' function did
* not load it from the media (for optimization reasons). This
* means that part of the page contains garbage. So read the
* page now.
@@ -593,7 +593,7 @@ static int ubifs_write_end(struct file *file, struct address_space *mapping,
out:
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
return copied;
}
@@ -621,10 +621,10 @@ static int populate_page(struct ubifs_info *c, struct page *page,
addr = zaddr = kmap(page);
- end_index = (i_size - 1) >> PAGE_CACHE_SHIFT;
+ end_index = (i_size - 1) >> PAGE_SHIFT;
if (!i_size || page->index > end_index) {
hole = 1;
- memset(addr, 0, PAGE_CACHE_SIZE);
+ memset(addr, 0, PAGE_SIZE);
goto out_hole;
}
@@ -673,7 +673,7 @@ static int populate_page(struct ubifs_info *c, struct page *page,
}
if (end_index == page->index) {
- int len = i_size & (PAGE_CACHE_SIZE - 1);
+ int len = i_size & (PAGE_SIZE - 1);
if (len && len < read)
memset(zaddr + len, 0, read - len);
@@ -773,7 +773,7 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
isize = i_size_read(inode);
if (isize == 0)
goto out_free;
- end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
+ end_index = ((isize - 1) >> PAGE_SHIFT);
for (page_idx = 1; page_idx < page_cnt; page_idx++) {
pgoff_t page_offset = offset + page_idx;
@@ -788,7 +788,7 @@ static int ubifs_do_bulk_read(struct ubifs_info *c, struct bu_info *bu,
if (!PageUptodate(page))
err = populate_page(c, page, bu, &n);
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
if (err)
break;
}
@@ -905,7 +905,7 @@ static int do_writepage(struct page *page, int len)
#ifdef UBIFS_DEBUG
struct ubifs_inode *ui = ubifs_inode(inode);
spin_lock(&ui->ui_lock);
- ubifs_assert(page->index <= ui->synced_i_size >> PAGE_CACHE_SHIFT);
+ ubifs_assert(page->index <= ui->synced_i_size >> PAGE_SHIFT);
spin_unlock(&ui->ui_lock);
#endif
@@ -1001,8 +1001,8 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
struct inode *inode = page->mapping->host;
struct ubifs_inode *ui = ubifs_inode(inode);
loff_t i_size = i_size_read(inode), synced_i_size;
- pgoff_t end_index = i_size >> PAGE_CACHE_SHIFT;
- int err, len = i_size & (PAGE_CACHE_SIZE - 1);
+ pgoff_t end_index = i_size >> PAGE_SHIFT;
+ int err, len = i_size & (PAGE_SIZE - 1);
void *kaddr;
dbg_gen("ino %lu, pg %lu, pg flags %#lx",
@@ -1021,7 +1021,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
/* Is the page fully inside @i_size? */
if (page->index < end_index) {
- if (page->index >= synced_i_size >> PAGE_CACHE_SHIFT) {
+ if (page->index >= synced_i_size >> PAGE_SHIFT) {
err = inode->i_sb->s_op->write_inode(inode, NULL);
if (err)
goto out_unlock;
@@ -1034,7 +1034,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
* with this.
*/
}
- return do_writepage(page, PAGE_CACHE_SIZE);
+ return do_writepage(page, PAGE_SIZE);
}
/*
@@ -1045,7 +1045,7 @@ static int ubifs_writepage(struct page *page, struct writeback_control *wbc)
* writes to that region are not written out to the file."
*/
kaddr = kmap_atomic(page);
- memset(kaddr + len, 0, PAGE_CACHE_SIZE - len);
+ memset(kaddr + len, 0, PAGE_SIZE - len);
flush_dcache_page(page);
kunmap_atomic(kaddr);
@@ -1138,7 +1138,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
truncate_setsize(inode, new_size);
if (offset) {
- pgoff_t index = new_size >> PAGE_CACHE_SHIFT;
+ pgoff_t index = new_size >> PAGE_SHIFT;
struct page *page;
page = find_lock_page(inode->i_mapping, index);
@@ -1157,9 +1157,9 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
clear_page_dirty_for_io(page);
if (UBIFS_BLOCKS_PER_PAGE_SHIFT)
offset = new_size &
- (PAGE_CACHE_SIZE - 1);
+ (PAGE_SIZE - 1);
err = do_writepage(page, offset);
- page_cache_release(page);
+ put_page(page);
if (err)
goto out_budg;
/*
@@ -1173,7 +1173,7 @@ static int do_truncation(struct ubifs_info *c, struct inode *inode,
* having to read it.
*/
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
}
}
@@ -1285,7 +1285,7 @@ static void ubifs_invalidatepage(struct page *page, unsigned int offset,
struct ubifs_info *c = inode->i_sb->s_fs_info;
ubifs_assert(PagePrivate(page));
- if (offset || length < PAGE_CACHE_SIZE)
+ if (offset || length < PAGE_SIZE)
/* Partial page remains dirty */
return;
diff --git a/fs/ubifs/misc.c b/fs/ubifs/misc.c
new file mode 100644
index 000000000..486a28449
--- /dev/null
+++ b/fs/ubifs/misc.c
@@ -0,0 +1,57 @@
+#include <linux/kernel.h>
+#include "ubifs.h"
+
+/* Normal UBIFS messages */
+void ubifs_msg(const struct ubifs_info *c, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_notice("UBIFS (ubi%d:%d): %pV\n",
+ c->vi.ubi_num, c->vi.vol_id, &vaf);
+
+ va_end(args);
+} \
+
+/* UBIFS error messages */
+void ubifs_err(const struct ubifs_info *c, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_err("UBIFS error (ubi%d:%d pid %d): %ps: %pV\n",
+ c->vi.ubi_num, c->vi.vol_id, current->pid,
+ __builtin_return_address(0),
+ &vaf);
+
+ va_end(args);
+} \
+
+/* UBIFS warning messages */
+void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...)
+{
+ struct va_format vaf;
+ va_list args;
+
+ va_start(args, fmt);
+
+ vaf.fmt = fmt;
+ vaf.va = &args;
+
+ pr_warn("UBIFS warning (ubi%d:%d pid %d): %ps: %pV\n",
+ c->vi.ubi_num, c->vi.vol_id, current->pid,
+ __builtin_return_address(0),
+ &vaf);
+
+ va_end(args);
+}
diff --git a/fs/ubifs/super.c b/fs/ubifs/super.c
index a233ba913..e98c24ee2 100644
--- a/fs/ubifs/super.c
+++ b/fs/ubifs/super.c
@@ -2237,12 +2237,12 @@ static int __init ubifs_init(void)
BUILD_BUG_ON(UBIFS_COMPR_TYPES_CNT > 4);
/*
- * We require that PAGE_CACHE_SIZE is greater-than-or-equal-to
+ * We require that PAGE_SIZE is greater-than-or-equal-to
* UBIFS_BLOCK_SIZE. It is assumed that both are powers of 2.
*/
- if (PAGE_CACHE_SIZE < UBIFS_BLOCK_SIZE) {
+ if (PAGE_SIZE < UBIFS_BLOCK_SIZE) {
pr_err("UBIFS error (pid %d): VFS page cache size is %u bytes, but UBIFS requires at least 4096 bytes",
- current->pid, (unsigned int)PAGE_CACHE_SIZE);
+ current->pid, (unsigned int)PAGE_SIZE);
return -EINVAL;
}
diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h
index a5697de76..4cd7e569c 100644
--- a/fs/ubifs/ubifs.h
+++ b/fs/ubifs/ubifs.h
@@ -42,36 +42,12 @@
/* Version of this UBIFS implementation */
#define UBIFS_VERSION 1
-/* Normal UBIFS messages */
-#define ubifs_msg(c, fmt, ...) \
- pr_notice("UBIFS (ubi%d:%d): " fmt "\n", \
- (c)->vi.ubi_num, (c)->vi.vol_id, ##__VA_ARGS__)
-/* UBIFS error messages */
-#define ubifs_err(c, fmt, ...) \
- pr_err("UBIFS error (ubi%d:%d pid %d): %s: " fmt "\n", \
- (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
- __func__, ##__VA_ARGS__)
-/* UBIFS warning messages */
-#define ubifs_warn(c, fmt, ...) \
- pr_warn("UBIFS warning (ubi%d:%d pid %d): %s: " fmt "\n", \
- (c)->vi.ubi_num, (c)->vi.vol_id, current->pid, \
- __func__, ##__VA_ARGS__)
-/*
- * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
- * object as an argument.
- */
-#define ubifs_errc(c, fmt, ...) \
- do { \
- if (!(c)->probing) \
- ubifs_err(c, fmt, ##__VA_ARGS__); \
- } while (0)
-
/* UBIFS file system VFS magic number */
#define UBIFS_SUPER_MAGIC 0x24051905
/* Number of UBIFS blocks per VFS page */
-#define UBIFS_BLOCKS_PER_PAGE (PAGE_CACHE_SIZE / UBIFS_BLOCK_SIZE)
-#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_CACHE_SHIFT - UBIFS_BLOCK_SHIFT)
+#define UBIFS_BLOCKS_PER_PAGE (PAGE_SIZE / UBIFS_BLOCK_SIZE)
+#define UBIFS_BLOCKS_PER_PAGE_SHIFT (PAGE_SHIFT - UBIFS_BLOCK_SHIFT)
/* "File system end of life" sequence number watermark */
#define SQNUM_WARN_WATERMARK 0xFFFFFFFF00000000ULL
@@ -1802,4 +1778,21 @@ int ubifs_decompress(const struct ubifs_info *c, const void *buf, int len,
#include "misc.h"
#include "key.h"
+/* Normal UBIFS messages */
+__printf(2, 3)
+void ubifs_msg(const struct ubifs_info *c, const char *fmt, ...);
+__printf(2, 3)
+void ubifs_err(const struct ubifs_info *c, const char *fmt, ...);
+__printf(2, 3)
+void ubifs_warn(const struct ubifs_info *c, const char *fmt, ...);
+/*
+ * A variant of 'ubifs_err()' which takes the UBIFS file-sytem description
+ * object as an argument.
+ */
+#define ubifs_errc(c, fmt, ...) \
+do { \
+ if (!(c)->probing) \
+ ubifs_err(c, fmt, ##__VA_ARGS__); \
+} while (0)
+
#endif /* !__UBIFS_H__ */
diff --git a/fs/ubifs/xattr.c b/fs/ubifs/xattr.c
index c7f4d434d..b043e0441 100644
--- a/fs/ubifs/xattr.c
+++ b/fs/ubifs/xattr.c
@@ -59,7 +59,6 @@
#include <linux/fs.h>
#include <linux/slab.h>
#include <linux/xattr.h>
-#include <linux/posix_acl_xattr.h>
/*
* Limit the number of extended attributes per inode so that the total size
diff --git a/fs/udf/dir.c b/fs/udf/dir.c
index 541d9c650..b51b371b8 100644
--- a/fs/udf/dir.c
+++ b/fs/udf/dir.c
@@ -45,7 +45,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
int block, iblock;
loff_t nf_pos;
int flen;
- unsigned char *fname = NULL;
+ unsigned char *fname = NULL, *copy_name = NULL;
unsigned char *nameptr;
uint16_t liu;
uint8_t lfi;
@@ -143,7 +143,15 @@ static int udf_readdir(struct file *file, struct dir_context *ctx)
if (poffset >= lfi) {
nameptr = (char *)(fibh.ebh->b_data + poffset - lfi);
} else {
- nameptr = fname;
+ if (!copy_name) {
+ copy_name = kmalloc(UDF_NAME_LEN,
+ GFP_NOFS);
+ if (!copy_name) {
+ ret = -ENOMEM;
+ goto out;
+ }
+ }
+ nameptr = copy_name;
memcpy(nameptr, fi->fileIdent + liu,
lfi - poffset);
memcpy(nameptr + lfi - poffset,
@@ -185,6 +193,7 @@ out:
brelse(fibh.sbh);
brelse(epos.bh);
kfree(fname);
+ kfree(copy_name);
return ret;
}
diff --git a/fs/udf/file.c b/fs/udf/file.c
index 1af98963d..877ba1c9b 100644
--- a/fs/udf/file.c
+++ b/fs/udf/file.c
@@ -46,7 +46,7 @@ static void __udf_adinicb_readpage(struct page *page)
kaddr = kmap(page);
memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr, inode->i_size);
- memset(kaddr + inode->i_size, 0, PAGE_CACHE_SIZE - inode->i_size);
+ memset(kaddr + inode->i_size, 0, PAGE_SIZE - inode->i_size);
flush_dcache_page(page);
SetPageUptodate(page);
kunmap(page);
@@ -87,14 +87,14 @@ static int udf_adinicb_write_begin(struct file *file,
{
struct page *page;
- if (WARN_ON_ONCE(pos >= PAGE_CACHE_SIZE))
+ if (WARN_ON_ONCE(pos >= PAGE_SIZE))
return -EIO;
page = grab_cache_page_write_begin(mapping, 0, flags);
if (!page)
return -ENOMEM;
*pagep = page;
- if (!PageUptodate(page) && len != PAGE_CACHE_SIZE)
+ if (!PageUptodate(page) && len != PAGE_SIZE)
__udf_adinicb_readpage(page);
return 0;
}
diff --git a/fs/udf/inode.c b/fs/udf/inode.c
index 166d3ed32..2dc461eeb 100644
--- a/fs/udf/inode.c
+++ b/fs/udf/inode.c
@@ -287,7 +287,7 @@ int udf_expand_file_adinicb(struct inode *inode)
if (!PageUptodate(page)) {
kaddr = kmap(page);
memset(kaddr + iinfo->i_lenAlloc, 0x00,
- PAGE_CACHE_SIZE - iinfo->i_lenAlloc);
+ PAGE_SIZE - iinfo->i_lenAlloc);
memcpy(kaddr, iinfo->i_ext.i_data + iinfo->i_lenEAttr,
iinfo->i_lenAlloc);
flush_dcache_page(page);
@@ -319,7 +319,7 @@ int udf_expand_file_adinicb(struct inode *inode)
inode->i_data.a_ops = &udf_adinicb_aops;
up_write(&iinfo->i_data_sem);
}
- page_cache_release(page);
+ put_page(page);
mark_inode_dirty(inode);
return err;
diff --git a/fs/udf/namei.c b/fs/udf/namei.c
index 42eafb91f..a2ba11eca 100644
--- a/fs/udf/namei.c
+++ b/fs/udf/namei.c
@@ -165,7 +165,7 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
struct fileIdentDesc *fi = NULL;
loff_t f_pos;
int block, flen;
- unsigned char *fname = NULL;
+ unsigned char *fname = NULL, *copy_name = NULL;
unsigned char *nameptr;
uint8_t lfi;
uint16_t liu;
@@ -236,7 +236,15 @@ static struct fileIdentDesc *udf_find_entry(struct inode *dir,
nameptr = (uint8_t *)(fibh->ebh->b_data +
poffset - lfi);
else {
- nameptr = fname;
+ if (!copy_name) {
+ copy_name = kmalloc(UDF_NAME_LEN,
+ GFP_NOFS);
+ if (!copy_name) {
+ fi = ERR_PTR(-ENOMEM);
+ goto out_err;
+ }
+ }
+ nameptr = copy_name;
memcpy(nameptr, fi->fileIdent + liu,
lfi - poffset);
memcpy(nameptr + lfi - poffset,
@@ -279,6 +287,7 @@ out_err:
out_ok:
brelse(epos.bh);
kfree(fname);
+ kfree(copy_name);
return fi;
}
@@ -291,7 +300,7 @@ static struct dentry *udf_lookup(struct inode *dir, struct dentry *dentry,
struct udf_fileident_bh fibh;
struct fileIdentDesc *fi;
- if (dentry->d_name.len > UDF_NAME_LEN - 2)
+ if (dentry->d_name.len > UDF_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
#ifdef UDF_RECOVERY
@@ -351,7 +360,7 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
struct udf_inode_info *dinfo;
fibh->sbh = fibh->ebh = NULL;
- name = kmalloc(UDF_NAME_LEN, GFP_NOFS);
+ name = kmalloc(UDF_NAME_LEN_CS0, GFP_NOFS);
if (!name) {
*err = -ENOMEM;
goto out_err;
@@ -362,8 +371,9 @@ static struct fileIdentDesc *udf_add_entry(struct inode *dir,
*err = -EINVAL;
goto out_err;
}
- namelen = udf_put_filename(sb, dentry->d_name.name, name,
- dentry->d_name.len);
+ namelen = udf_put_filename(sb, dentry->d_name.name,
+ dentry->d_name.len,
+ name, UDF_NAME_LEN_CS0);
if (!namelen) {
*err = -ENAMETOOLONG;
goto out_err;
@@ -914,7 +924,7 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
iinfo = UDF_I(inode);
down_write(&iinfo->i_data_sem);
- name = kmalloc(UDF_NAME_LEN, GFP_NOFS);
+ name = kmalloc(UDF_NAME_LEN_CS0, GFP_NOFS);
if (!name) {
err = -ENOMEM;
goto out_no_entry;
@@ -997,8 +1007,9 @@ static int udf_symlink(struct inode *dir, struct dentry *dentry,
}
if (pc->componentType == 5) {
- namelen = udf_put_filename(sb, compstart, name,
- symname - compstart);
+ namelen = udf_put_filename(sb, compstart,
+ symname - compstart,
+ name, UDF_NAME_LEN_CS0);
if (!namelen)
goto out_no_entry;
diff --git a/fs/udf/super.c b/fs/udf/super.c
index a522c15a0..36661acaf 100644
--- a/fs/udf/super.c
+++ b/fs/udf/super.c
@@ -887,18 +887,14 @@ static int udf_find_fileset(struct super_block *sb,
static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
{
struct primaryVolDesc *pvoldesc;
- struct ustr *instr, *outstr;
+ uint8_t *outstr;
struct buffer_head *bh;
uint16_t ident;
int ret = -ENOMEM;
- instr = kmalloc(sizeof(struct ustr), GFP_NOFS);
- if (!instr)
- return -ENOMEM;
-
- outstr = kmalloc(sizeof(struct ustr), GFP_NOFS);
+ outstr = kmalloc(128, GFP_NOFS);
if (!outstr)
- goto out1;
+ return -ENOMEM;
bh = udf_read_tagged(sb, block, block, &ident);
if (!bh) {
@@ -923,31 +919,25 @@ static int udf_load_pvoldesc(struct super_block *sb, sector_t block)
#endif
}
- if (!udf_build_ustr(instr, pvoldesc->volIdent, 32)) {
- ret = udf_CS0toUTF8(outstr, instr);
- if (ret < 0)
- goto out_bh;
+ ret = udf_dstrCS0toUTF8(outstr, 31, pvoldesc->volIdent, 32);
+ if (ret < 0)
+ goto out_bh;
- strncpy(UDF_SB(sb)->s_volume_ident, outstr->u_name,
- outstr->u_len > 31 ? 31 : outstr->u_len);
- udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident);
- }
+ strncpy(UDF_SB(sb)->s_volume_ident, outstr, ret);
+ udf_debug("volIdent[] = '%s'\n", UDF_SB(sb)->s_volume_ident);
- if (!udf_build_ustr(instr, pvoldesc->volSetIdent, 128)) {
- ret = udf_CS0toUTF8(outstr, instr);
- if (ret < 0)
- goto out_bh;
+ ret = udf_dstrCS0toUTF8(outstr, 127, pvoldesc->volSetIdent, 128);
+ if (ret < 0)
+ goto out_bh;
- udf_debug("volSetIdent[] = '%s'\n", outstr->u_name);
- }
+ outstr[ret] = 0;
+ udf_debug("volSetIdent[] = '%s'\n", outstr);
ret = 0;
out_bh:
brelse(bh);
out2:
kfree(outstr);
-out1:
- kfree(instr);
return ret;
}
@@ -2358,7 +2348,7 @@ static int udf_statfs(struct dentry *dentry, struct kstatfs *buf)
le32_to_cpu(lvidiu->numDirs)) : 0)
+ buf->f_bfree;
buf->f_ffree = buf->f_bfree;
- buf->f_namelen = UDF_NAME_LEN - 2;
+ buf->f_namelen = UDF_NAME_LEN;
buf->f_fsid.val[0] = (u32)id;
buf->f_fsid.val[1] = (u32)(id >> 32);
diff --git a/fs/udf/udfdecl.h b/fs/udf/udfdecl.h
index fa0044b6b..263829ef1 100644
--- a/fs/udf/udfdecl.h
+++ b/fs/udf/udfdecl.h
@@ -49,8 +49,8 @@ extern __printf(3, 4) void _udf_warn(struct super_block *sb,
#define UDF_EXTENT_FLAG_MASK 0xC0000000
#define UDF_NAME_PAD 4
-#define UDF_NAME_LEN 256
-#define UDF_PATH_LEN 1023
+#define UDF_NAME_LEN 254
+#define UDF_NAME_LEN_CS0 255
static inline size_t udf_file_entry_alloc_offset(struct inode *inode)
{
@@ -106,12 +106,6 @@ struct generic_desc {
__le32 volDescSeqNum;
};
-struct ustr {
- uint8_t u_cmpID;
- uint8_t u_name[UDF_NAME_LEN - 2];
- uint8_t u_len;
-};
-
/* super.c */
@@ -214,12 +208,11 @@ udf_get_lb_pblock(struct super_block *sb, struct kernel_lb_addr *loc,
}
/* unicode.c */
-extern int udf_get_filename(struct super_block *, uint8_t *, int, uint8_t *,
- int);
-extern int udf_put_filename(struct super_block *, const uint8_t *, uint8_t *,
- int);
-extern int udf_build_ustr(struct ustr *, dstring *, int);
-extern int udf_CS0toUTF8(struct ustr *, const struct ustr *);
+extern int udf_get_filename(struct super_block *, const uint8_t *, int,
+ uint8_t *, int);
+extern int udf_put_filename(struct super_block *, const uint8_t *, int,
+ uint8_t *, int);
+extern int udf_dstrCS0toUTF8(uint8_t *, int, const uint8_t *, int);
/* ialloc.c */
extern void udf_free_inode(struct inode *);
diff --git a/fs/udf/unicode.c b/fs/udf/unicode.c
index e788a05aa..695389a4f 100644
--- a/fs/udf/unicode.c
+++ b/fs/udf/unicode.c
@@ -28,199 +28,72 @@
#include "udf_sb.h"
-static int udf_translate_to_linux(uint8_t *, int, uint8_t *, int, uint8_t *,
- int);
-
-static int udf_char_to_ustr(struct ustr *dest, const uint8_t *src, int strlen)
-{
- if ((!dest) || (!src) || (!strlen) || (strlen > UDF_NAME_LEN - 2))
- return 0;
-
- memset(dest, 0, sizeof(struct ustr));
- memcpy(dest->u_name, src, strlen);
- dest->u_cmpID = 0x08;
- dest->u_len = strlen;
-
- return strlen;
-}
-
-/*
- * udf_build_ustr
- */
-int udf_build_ustr(struct ustr *dest, dstring *ptr, int size)
-{
- int usesize;
-
- if (!dest || !ptr || !size)
- return -1;
- BUG_ON(size < 2);
-
- usesize = min_t(size_t, ptr[size - 1], sizeof(dest->u_name));
- usesize = min(usesize, size - 2);
- dest->u_cmpID = ptr[0];
- dest->u_len = usesize;
- memcpy(dest->u_name, ptr + 1, usesize);
- memset(dest->u_name + usesize, 0, sizeof(dest->u_name) - usesize);
-
- return 0;
-}
-
-/*
- * udf_build_ustr_exact
- */
-static void udf_build_ustr_exact(struct ustr *dest, dstring *ptr, int exactsize)
+static int udf_uni2char_utf8(wchar_t uni,
+ unsigned char *out,
+ int boundlen)
{
- memset(dest, 0, sizeof(struct ustr));
- dest->u_cmpID = ptr[0];
- dest->u_len = exactsize - 1;
- memcpy(dest->u_name, ptr + 1, exactsize - 1);
-}
-
-/*
- * udf_CS0toUTF8
- *
- * PURPOSE
- * Convert OSTA Compressed Unicode to the UTF-8 equivalent.
- *
- * PRE-CONDITIONS
- * utf Pointer to UTF-8 output buffer.
- * ocu Pointer to OSTA Compressed Unicode input buffer
- * of size UDF_NAME_LEN bytes.
- * both of type "struct ustr *"
- *
- * POST-CONDITIONS
- * <return> >= 0 on success.
- *
- * HISTORY
- * November 12, 1997 - Andrew E. Mileski
- * Written, tested, and released.
- */
-int udf_CS0toUTF8(struct ustr *utf_o, const struct ustr *ocu_i)
-{
- const uint8_t *ocu;
- uint8_t cmp_id, ocu_len;
- int i;
-
- ocu_len = ocu_i->u_len;
- if (ocu_len == 0) {
- memset(utf_o, 0, sizeof(struct ustr));
- return 0;
- }
-
- cmp_id = ocu_i->u_cmpID;
- if (cmp_id != 8 && cmp_id != 16) {
- memset(utf_o, 0, sizeof(struct ustr));
- pr_err("unknown compression code (%d) stri=%s\n",
- cmp_id, ocu_i->u_name);
- return -EINVAL;
- }
-
- ocu = ocu_i->u_name;
- utf_o->u_len = 0;
- for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
-
- /* Expand OSTA compressed Unicode to Unicode */
- uint32_t c = ocu[i++];
- if (cmp_id == 16)
- c = (c << 8) | ocu[i++];
-
- /* Compress Unicode to UTF-8 */
- if (c < 0x80U)
- utf_o->u_name[utf_o->u_len++] = (uint8_t)c;
- else if (c < 0x800U) {
- if (utf_o->u_len > (UDF_NAME_LEN - 4))
- break;
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0xc0 | (c >> 6));
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0x80 | (c & 0x3f));
- } else {
- if (utf_o->u_len > (UDF_NAME_LEN - 5))
- break;
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0xe0 | (c >> 12));
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0x80 |
- ((c >> 6) & 0x3f));
- utf_o->u_name[utf_o->u_len++] =
- (uint8_t)(0x80 | (c & 0x3f));
- }
+ int u_len = 0;
+
+ if (boundlen <= 0)
+ return -ENAMETOOLONG;
+
+ if (uni < 0x80) {
+ out[u_len++] = (unsigned char)uni;
+ } else if (uni < 0x800) {
+ if (boundlen < 2)
+ return -ENAMETOOLONG;
+ out[u_len++] = (unsigned char)(0xc0 | (uni >> 6));
+ out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
+ } else {
+ if (boundlen < 3)
+ return -ENAMETOOLONG;
+ out[u_len++] = (unsigned char)(0xe0 | (uni >> 12));
+ out[u_len++] = (unsigned char)(0x80 | ((uni >> 6) & 0x3f));
+ out[u_len++] = (unsigned char)(0x80 | (uni & 0x3f));
}
- utf_o->u_cmpID = 8;
-
- return utf_o->u_len;
+ return u_len;
}
-/*
- *
- * udf_UTF8toCS0
- *
- * PURPOSE
- * Convert UTF-8 to the OSTA Compressed Unicode equivalent.
- *
- * DESCRIPTION
- * This routine is only called by udf_lookup().
- *
- * PRE-CONDITIONS
- * ocu Pointer to OSTA Compressed Unicode output
- * buffer of size UDF_NAME_LEN bytes.
- * utf Pointer to UTF-8 input buffer.
- * utf_len Length of UTF-8 input buffer in bytes.
- *
- * POST-CONDITIONS
- * <return> Zero on success.
- *
- * HISTORY
- * November 12, 1997 - Andrew E. Mileski
- * Written, tested, and released.
- */
-static int udf_UTF8toCS0(dstring *ocu, struct ustr *utf, int length)
+static int udf_char2uni_utf8(const unsigned char *in,
+ int boundlen,
+ wchar_t *uni)
{
- unsigned c, i, max_val, utf_char;
- int utf_cnt, u_len, u_ch;
-
- memset(ocu, 0, sizeof(dstring) * length);
- ocu[0] = 8;
- max_val = 0xffU;
- u_ch = 1;
-
-try_again:
- u_len = 0U;
- utf_char = 0U;
- utf_cnt = 0U;
- for (i = 0U; i < utf->u_len; i++) {
- /* Name didn't fit? */
- if (u_len + 1 + u_ch >= length)
- return 0;
+ unsigned int utf_char;
+ unsigned char c;
+ int utf_cnt, u_len;
- c = (uint8_t)utf->u_name[i];
+ utf_char = 0;
+ utf_cnt = 0;
+ for (u_len = 0; u_len < boundlen;) {
+ c = in[u_len++];
/* Complete a multi-byte UTF-8 character */
if (utf_cnt) {
- utf_char = (utf_char << 6) | (c & 0x3fU);
+ utf_char = (utf_char << 6) | (c & 0x3f);
if (--utf_cnt)
continue;
} else {
/* Check for a multi-byte UTF-8 character */
- if (c & 0x80U) {
+ if (c & 0x80) {
/* Start a multi-byte UTF-8 character */
- if ((c & 0xe0U) == 0xc0U) {
- utf_char = c & 0x1fU;
+ if ((c & 0xe0) == 0xc0) {
+ utf_char = c & 0x1f;
utf_cnt = 1;
- } else if ((c & 0xf0U) == 0xe0U) {
- utf_char = c & 0x0fU;
+ } else if ((c & 0xf0) == 0xe0) {
+ utf_char = c & 0x0f;
utf_cnt = 2;
- } else if ((c & 0xf8U) == 0xf0U) {
- utf_char = c & 0x07U;
+ } else if ((c & 0xf8) == 0xf0) {
+ utf_char = c & 0x07;
utf_cnt = 3;
- } else if ((c & 0xfcU) == 0xf8U) {
- utf_char = c & 0x03U;
+ } else if ((c & 0xfc) == 0xf8) {
+ utf_char = c & 0x03;
utf_cnt = 4;
- } else if ((c & 0xfeU) == 0xfcU) {
- utf_char = c & 0x01U;
+ } else if ((c & 0xfe) == 0xfc) {
+ utf_char = c & 0x01;
utf_cnt = 5;
} else {
- goto error_out;
+ utf_cnt = -1;
+ break;
}
continue;
} else {
@@ -228,97 +101,216 @@ try_again:
utf_char = c;
}
}
-
- /* Choose no compression if necessary */
- if (utf_char > max_val) {
- if (max_val == 0xffU) {
- max_val = 0xffffU;
- ocu[0] = (uint8_t)0x10U;
- u_ch = 2;
- goto try_again;
- }
- goto error_out;
- }
-
- if (max_val == 0xffffU)
- ocu[++u_len] = (uint8_t)(utf_char >> 8);
- ocu[++u_len] = (uint8_t)(utf_char & 0xffU);
+ *uni = utf_char;
+ break;
}
-
if (utf_cnt) {
-error_out:
- ocu[++u_len] = '?';
- printk(KERN_DEBUG pr_fmt("bad UTF-8 character\n"));
+ *uni = '?';
+ return -EINVAL;
}
+ return u_len;
+}
- ocu[length - 1] = (uint8_t)u_len + 1;
+#define ILLEGAL_CHAR_MARK '_'
+#define EXT_MARK '.'
+#define CRC_MARK '#'
+#define EXT_SIZE 5
+/* Number of chars we need to store generated CRC to make filename unique */
+#define CRC_LEN 5
- return u_len + 1;
+static int udf_name_conv_char(uint8_t *str_o, int str_o_max_len,
+ int *str_o_idx,
+ const uint8_t *str_i, int str_i_max_len,
+ int *str_i_idx,
+ int u_ch, int *needsCRC,
+ int (*conv_f)(wchar_t, unsigned char *, int),
+ int translate)
+{
+ uint32_t c;
+ int illChar = 0;
+ int len, gotch = 0;
+
+ for (; (!gotch) && (*str_i_idx < str_i_max_len); *str_i_idx += u_ch) {
+ if (*str_o_idx >= str_o_max_len) {
+ *needsCRC = 1;
+ return gotch;
+ }
+
+ /* Expand OSTA compressed Unicode to Unicode */
+ c = str_i[*str_i_idx];
+ if (u_ch > 1)
+ c = (c << 8) | str_i[*str_i_idx + 1];
+
+ if (translate && (c == '/' || c == 0))
+ illChar = 1;
+ else if (illChar)
+ break;
+ else
+ gotch = 1;
+ }
+ if (illChar) {
+ *needsCRC = 1;
+ c = ILLEGAL_CHAR_MARK;
+ gotch = 1;
+ }
+ if (gotch) {
+ len = conv_f(c, &str_o[*str_o_idx], str_o_max_len - *str_o_idx);
+ /* Valid character? */
+ if (len >= 0)
+ *str_o_idx += len;
+ else if (len == -ENAMETOOLONG) {
+ *needsCRC = 1;
+ gotch = 0;
+ } else {
+ str_o[(*str_o_idx)++] = '?';
+ *needsCRC = 1;
+ }
+ }
+ return gotch;
}
-static int udf_CS0toNLS(struct nls_table *nls, struct ustr *utf_o,
- const struct ustr *ocu_i)
+static int udf_name_from_CS0(uint8_t *str_o, int str_max_len,
+ const uint8_t *ocu, int ocu_len,
+ int (*conv_f)(wchar_t, unsigned char *, int),
+ int translate)
{
- const uint8_t *ocu;
- uint8_t cmp_id, ocu_len;
- int i, len;
+ uint32_t c;
+ uint8_t cmp_id;
+ int idx, len;
+ int u_ch;
+ int needsCRC = 0;
+ int ext_i_len, ext_max_len;
+ int str_o_len = 0; /* Length of resulting output */
+ int ext_o_len = 0; /* Extension output length */
+ int ext_crc_len = 0; /* Extension output length if used with CRC */
+ int i_ext = -1; /* Extension position in input buffer */
+ int o_crc = 0; /* Rightmost possible output pos for CRC+ext */
+ unsigned short valueCRC;
+ uint8_t ext[EXT_SIZE * NLS_MAX_CHARSET_SIZE + 1];
+ uint8_t crc[CRC_LEN];
+ if (str_max_len <= 0)
+ return 0;
- ocu_len = ocu_i->u_len;
if (ocu_len == 0) {
- memset(utf_o, 0, sizeof(struct ustr));
+ memset(str_o, 0, str_max_len);
return 0;
}
- cmp_id = ocu_i->u_cmpID;
+ cmp_id = ocu[0];
if (cmp_id != 8 && cmp_id != 16) {
- memset(utf_o, 0, sizeof(struct ustr));
- pr_err("unknown compression code (%d) stri=%s\n",
- cmp_id, ocu_i->u_name);
+ memset(str_o, 0, str_max_len);
+ pr_err("unknown compression code (%d)\n", cmp_id);
return -EINVAL;
}
+ u_ch = cmp_id >> 3;
- ocu = ocu_i->u_name;
- utf_o->u_len = 0;
- for (i = 0; (i < ocu_len) && (utf_o->u_len <= (UDF_NAME_LEN - 3));) {
- /* Expand OSTA compressed Unicode to Unicode */
- uint32_t c = ocu[i++];
- if (cmp_id == 16)
- c = (c << 8) | ocu[i++];
+ ocu++;
+ ocu_len--;
- len = nls->uni2char(c, &utf_o->u_name[utf_o->u_len],
- UDF_NAME_LEN - 2 - utf_o->u_len);
- /* Valid character? */
- if (len >= 0)
- utf_o->u_len += len;
- else
- utf_o->u_name[utf_o->u_len++] = '?';
+ if (ocu_len % u_ch) {
+ pr_err("incorrect filename length (%d)\n", ocu_len + 1);
+ return -EINVAL;
}
- utf_o->u_cmpID = 8;
- return utf_o->u_len;
+ if (translate) {
+ /* Look for extension */
+ for (idx = ocu_len - u_ch, ext_i_len = 0;
+ (idx >= 0) && (ext_i_len < EXT_SIZE);
+ idx -= u_ch, ext_i_len++) {
+ c = ocu[idx];
+ if (u_ch > 1)
+ c = (c << 8) | ocu[idx + 1];
+
+ if (c == EXT_MARK) {
+ if (ext_i_len)
+ i_ext = idx;
+ break;
+ }
+ }
+ if (i_ext >= 0) {
+ /* Convert extension */
+ ext_max_len = min_t(int, sizeof(ext), str_max_len);
+ ext[ext_o_len++] = EXT_MARK;
+ idx = i_ext + u_ch;
+ while (udf_name_conv_char(ext, ext_max_len, &ext_o_len,
+ ocu, ocu_len, &idx,
+ u_ch, &needsCRC,
+ conv_f, translate)) {
+ if ((ext_o_len + CRC_LEN) < str_max_len)
+ ext_crc_len = ext_o_len;
+ }
+ }
+ }
+
+ idx = 0;
+ while (1) {
+ if (translate && (idx == i_ext)) {
+ if (str_o_len > (str_max_len - ext_o_len))
+ needsCRC = 1;
+ break;
+ }
+
+ if (!udf_name_conv_char(str_o, str_max_len, &str_o_len,
+ ocu, ocu_len, &idx,
+ u_ch, &needsCRC, conv_f, translate))
+ break;
+
+ if (translate &&
+ (str_o_len <= (str_max_len - ext_o_len - CRC_LEN)))
+ o_crc = str_o_len;
+ }
+
+ if (translate) {
+ if (str_o_len <= 2 && str_o[0] == '.' &&
+ (str_o_len == 1 || str_o[1] == '.'))
+ needsCRC = 1;
+ if (needsCRC) {
+ str_o_len = o_crc;
+ valueCRC = crc_itu_t(0, ocu, ocu_len);
+ crc[0] = CRC_MARK;
+ crc[1] = hex_asc_upper_hi(valueCRC >> 8);
+ crc[2] = hex_asc_upper_lo(valueCRC >> 8);
+ crc[3] = hex_asc_upper_hi(valueCRC);
+ crc[4] = hex_asc_upper_lo(valueCRC);
+ len = min_t(int, CRC_LEN, str_max_len - str_o_len);
+ memcpy(&str_o[str_o_len], crc, len);
+ str_o_len += len;
+ ext_o_len = ext_crc_len;
+ }
+ if (ext_o_len > 0) {
+ memcpy(&str_o[str_o_len], ext, ext_o_len);
+ str_o_len += ext_o_len;
+ }
+ }
+
+ return str_o_len;
}
-static int udf_NLStoCS0(struct nls_table *nls, dstring *ocu, struct ustr *uni,
- int length)
+static int udf_name_to_CS0(uint8_t *ocu, int ocu_max_len,
+ const uint8_t *str_i, int str_len,
+ int (*conv_f)(const unsigned char *, int, wchar_t *))
{
- int len;
- unsigned i, max_val;
- uint16_t uni_char;
+ int i, len;
+ unsigned int max_val;
+ wchar_t uni_char;
int u_len, u_ch;
- memset(ocu, 0, sizeof(dstring) * length);
+ if (ocu_max_len <= 0)
+ return 0;
+
+ memset(ocu, 0, ocu_max_len);
ocu[0] = 8;
- max_val = 0xffU;
+ max_val = 0xff;
u_ch = 1;
try_again:
- u_len = 0U;
- for (i = 0U; i < uni->u_len; i++) {
+ u_len = 1;
+ for (i = 0; i < str_len; i++) {
/* Name didn't fit? */
- if (u_len + 1 + u_ch >= length)
+ if (u_len + u_ch > ocu_max_len)
return 0;
- len = nls->char2uni(&uni->u_name[i], uni->u_len - i, &uni_char);
+ len = conv_f(&str_i[i], str_len - i, &uni_char);
if (!len)
continue;
/* Invalid character, deal with it */
@@ -328,187 +320,77 @@ try_again:
}
if (uni_char > max_val) {
- max_val = 0xffffU;
- ocu[0] = (uint8_t)0x10U;
+ max_val = 0xffff;
+ ocu[0] = 0x10;
u_ch = 2;
goto try_again;
}
- if (max_val == 0xffffU)
- ocu[++u_len] = (uint8_t)(uni_char >> 8);
- ocu[++u_len] = (uint8_t)(uni_char & 0xffU);
+ if (max_val == 0xffff)
+ ocu[u_len++] = (uint8_t)(uni_char >> 8);
+ ocu[u_len++] = (uint8_t)(uni_char & 0xff);
i += len - 1;
}
- ocu[length - 1] = (uint8_t)u_len + 1;
- return u_len + 1;
+ return u_len;
}
-int udf_get_filename(struct super_block *sb, uint8_t *sname, int slen,
+int udf_dstrCS0toUTF8(uint8_t *utf_o, int o_len,
+ const uint8_t *ocu_i, int i_len)
+{
+ int s_len = 0;
+
+ if (i_len > 0) {
+ s_len = ocu_i[i_len - 1];
+ if (s_len >= i_len) {
+ pr_err("incorrect dstring lengths (%d/%d)\n",
+ s_len, i_len);
+ return -EINVAL;
+ }
+ }
+
+ return udf_name_from_CS0(utf_o, o_len, ocu_i, s_len,
+ udf_uni2char_utf8, 0);
+}
+
+int udf_get_filename(struct super_block *sb, const uint8_t *sname, int slen,
uint8_t *dname, int dlen)
{
- struct ustr *filename, *unifilename;
+ int (*conv_f)(wchar_t, unsigned char *, int);
int ret;
if (!slen)
return -EIO;
- filename = kmalloc(sizeof(struct ustr), GFP_NOFS);
- if (!filename)
- return -ENOMEM;
-
- unifilename = kmalloc(sizeof(struct ustr), GFP_NOFS);
- if (!unifilename) {
- ret = -ENOMEM;
- goto out1;
- }
+ if (dlen <= 0)
+ return 0;
- udf_build_ustr_exact(unifilename, sname, slen);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
- ret = udf_CS0toUTF8(filename, unifilename);
- if (ret < 0) {
- udf_debug("Failed in udf_get_filename: sname = %s\n",
- sname);
- goto out2;
- }
+ conv_f = udf_uni2char_utf8;
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
- ret = udf_CS0toNLS(UDF_SB(sb)->s_nls_map, filename,
- unifilename);
- if (ret < 0) {
- udf_debug("Failed in udf_get_filename: sname = %s\n",
- sname);
- goto out2;
- }
+ conv_f = UDF_SB(sb)->s_nls_map->uni2char;
} else
BUG();
- ret = udf_translate_to_linux(dname, dlen,
- filename->u_name, filename->u_len,
- unifilename->u_name, unifilename->u_len);
+ ret = udf_name_from_CS0(dname, dlen, sname, slen, conv_f, 1);
/* Zero length filename isn't valid... */
if (ret == 0)
ret = -EINVAL;
-out2:
- kfree(unifilename);
-out1:
- kfree(filename);
return ret;
}
-int udf_put_filename(struct super_block *sb, const uint8_t *sname,
- uint8_t *dname, int flen)
+int udf_put_filename(struct super_block *sb, const uint8_t *sname, int slen,
+ uint8_t *dname, int dlen)
{
- struct ustr unifilename;
- int namelen;
-
- if (!udf_char_to_ustr(&unifilename, sname, flen))
- return 0;
+ int (*conv_f)(const unsigned char *, int, wchar_t *);
if (UDF_QUERY_FLAG(sb, UDF_FLAG_UTF8)) {
- namelen = udf_UTF8toCS0(dname, &unifilename, UDF_NAME_LEN);
- if (!namelen)
- return 0;
+ conv_f = udf_char2uni_utf8;
} else if (UDF_QUERY_FLAG(sb, UDF_FLAG_NLS_MAP)) {
- namelen = udf_NLStoCS0(UDF_SB(sb)->s_nls_map, dname,
- &unifilename, UDF_NAME_LEN);
- if (!namelen)
- return 0;
+ conv_f = UDF_SB(sb)->s_nls_map->char2uni;
} else
- return 0;
+ BUG();
- return namelen;
+ return udf_name_to_CS0(dname, dlen, sname, slen, conv_f);
}
-#define ILLEGAL_CHAR_MARK '_'
-#define EXT_MARK '.'
-#define CRC_MARK '#'
-#define EXT_SIZE 5
-/* Number of chars we need to store generated CRC to make filename unique */
-#define CRC_LEN 5
-
-static int udf_translate_to_linux(uint8_t *newName, int newLen,
- uint8_t *udfName, int udfLen,
- uint8_t *fidName, int fidNameLen)
-{
- int index, newIndex = 0, needsCRC = 0;
- int extIndex = 0, newExtIndex = 0, hasExt = 0;
- unsigned short valueCRC;
- uint8_t curr;
-
- if (udfName[0] == '.' &&
- (udfLen == 1 || (udfLen == 2 && udfName[1] == '.'))) {
- needsCRC = 1;
- newIndex = udfLen;
- memcpy(newName, udfName, udfLen);
- } else {
- for (index = 0; index < udfLen; index++) {
- curr = udfName[index];
- if (curr == '/' || curr == 0) {
- needsCRC = 1;
- curr = ILLEGAL_CHAR_MARK;
- while (index + 1 < udfLen &&
- (udfName[index + 1] == '/' ||
- udfName[index + 1] == 0))
- index++;
- }
- if (curr == EXT_MARK &&
- (udfLen - index - 1) <= EXT_SIZE) {
- if (udfLen == index + 1)
- hasExt = 0;
- else {
- hasExt = 1;
- extIndex = index;
- newExtIndex = newIndex;
- }
- }
- if (newIndex < newLen)
- newName[newIndex++] = curr;
- else
- needsCRC = 1;
- }
- }
- if (needsCRC) {
- uint8_t ext[EXT_SIZE];
- int localExtIndex = 0;
-
- if (hasExt) {
- int maxFilenameLen;
- for (index = 0;
- index < EXT_SIZE && extIndex + index + 1 < udfLen;
- index++) {
- curr = udfName[extIndex + index + 1];
-
- if (curr == '/' || curr == 0) {
- needsCRC = 1;
- curr = ILLEGAL_CHAR_MARK;
- while (extIndex + index + 2 < udfLen &&
- (index + 1 < EXT_SIZE &&
- (udfName[extIndex + index + 2] == '/' ||
- udfName[extIndex + index + 2] == 0)))
- index++;
- }
- ext[localExtIndex++] = curr;
- }
- maxFilenameLen = newLen - CRC_LEN - localExtIndex;
- if (newIndex > maxFilenameLen)
- newIndex = maxFilenameLen;
- else
- newIndex = newExtIndex;
- } else if (newIndex > newLen - CRC_LEN)
- newIndex = newLen - CRC_LEN;
- newName[newIndex++] = CRC_MARK;
- valueCRC = crc_itu_t(0, fidName, fidNameLen);
- newName[newIndex++] = hex_asc_upper_hi(valueCRC >> 8);
- newName[newIndex++] = hex_asc_upper_lo(valueCRC >> 8);
- newName[newIndex++] = hex_asc_upper_hi(valueCRC);
- newName[newIndex++] = hex_asc_upper_lo(valueCRC);
-
- if (hasExt) {
- newName[newIndex++] = EXT_MARK;
- for (index = 0; index < localExtIndex; index++)
- newName[newIndex++] = ext[index];
- }
- }
-
- return newIndex;
-}
diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c
index dc5fae601..0447b949c 100644
--- a/fs/ufs/balloc.c
+++ b/fs/ufs/balloc.c
@@ -237,7 +237,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
sector_t newb, struct page *locked_page)
{
const unsigned blks_per_page =
- 1 << (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ 1 << (PAGE_SHIFT - inode->i_blkbits);
const unsigned mask = blks_per_page - 1;
struct address_space * const mapping = inode->i_mapping;
pgoff_t index, cur_index, last_index;
@@ -255,9 +255,9 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg,
cur_index = locked_page->index;
end = count + beg;
- last_index = end >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ last_index = end >> (PAGE_SHIFT - inode->i_blkbits);
for (i = beg; i < end; i = (i | mask) + 1) {
- index = i >> (PAGE_CACHE_SHIFT - inode->i_blkbits);
+ index = i >> (PAGE_SHIFT - inode->i_blkbits);
if (likely(cur_index != index)) {
page = ufs_get_locked_page(mapping, index);
diff --git a/fs/ufs/dir.c b/fs/ufs/dir.c
index 74f2e8028..0b1457292 100644
--- a/fs/ufs/dir.c
+++ b/fs/ufs/dir.c
@@ -62,7 +62,7 @@ static int ufs_commit_chunk(struct page *page, loff_t pos, unsigned len)
static inline void ufs_put_page(struct page *page)
{
kunmap(page);
- page_cache_release(page);
+ put_page(page);
}
ino_t ufs_inode_by_name(struct inode *dir, const struct qstr *qstr)
@@ -111,13 +111,13 @@ static void ufs_check_page(struct page *page)
struct super_block *sb = dir->i_sb;
char *kaddr = page_address(page);
unsigned offs, rec_len;
- unsigned limit = PAGE_CACHE_SIZE;
+ unsigned limit = PAGE_SIZE;
const unsigned chunk_mask = UFS_SB(sb)->s_uspi->s_dirblksize - 1;
struct ufs_dir_entry *p;
char *error;
- if ((dir->i_size >> PAGE_CACHE_SHIFT) == page->index) {
- limit = dir->i_size & ~PAGE_CACHE_MASK;
+ if ((dir->i_size >> PAGE_SHIFT) == page->index) {
+ limit = dir->i_size & ~PAGE_MASK;
if (limit & chunk_mask)
goto Ebadsize;
if (!limit)
@@ -170,7 +170,7 @@ Einumber:
bad_entry:
ufs_error (sb, "ufs_check_page", "bad entry in directory #%lu: %s - "
"offset=%lu, rec_len=%d, name_len=%d",
- dir->i_ino, error, (page->index<<PAGE_CACHE_SHIFT)+offs,
+ dir->i_ino, error, (page->index<<PAGE_SHIFT)+offs,
rec_len, ufs_get_de_namlen(sb, p));
goto fail;
Eend:
@@ -178,7 +178,7 @@ Eend:
ufs_error(sb, __func__,
"entry in directory #%lu spans the page boundary"
"offset=%lu",
- dir->i_ino, (page->index<<PAGE_CACHE_SHIFT)+offs);
+ dir->i_ino, (page->index<<PAGE_SHIFT)+offs);
fail:
SetPageChecked(page);
SetPageError(page);
@@ -211,9 +211,9 @@ ufs_last_byte(struct inode *inode, unsigned long page_nr)
{
unsigned last_byte = inode->i_size;
- last_byte -= page_nr << PAGE_CACHE_SHIFT;
- if (last_byte > PAGE_CACHE_SIZE)
- last_byte = PAGE_CACHE_SIZE;
+ last_byte -= page_nr << PAGE_SHIFT;
+ if (last_byte > PAGE_SIZE)
+ last_byte = PAGE_SIZE;
return last_byte;
}
@@ -341,7 +341,7 @@ int ufs_add_link(struct dentry *dentry, struct inode *inode)
kaddr = page_address(page);
dir_end = kaddr + ufs_last_byte(dir, n);
de = (struct ufs_dir_entry *)kaddr;
- kaddr += PAGE_CACHE_SIZE - reclen;
+ kaddr += PAGE_SIZE - reclen;
while ((char *)de <= kaddr) {
if ((char *)de == dir_end) {
/* We hit i_size */
@@ -432,8 +432,8 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
loff_t pos = ctx->pos;
struct inode *inode = file_inode(file);
struct super_block *sb = inode->i_sb;
- unsigned int offset = pos & ~PAGE_CACHE_MASK;
- unsigned long n = pos >> PAGE_CACHE_SHIFT;
+ unsigned int offset = pos & ~PAGE_MASK;
+ unsigned long n = pos >> PAGE_SHIFT;
unsigned long npages = dir_pages(inode);
unsigned chunk_mask = ~(UFS_SB(sb)->s_uspi->s_dirblksize - 1);
int need_revalidate = file->f_version != inode->i_version;
@@ -454,14 +454,14 @@ ufs_readdir(struct file *file, struct dir_context *ctx)
ufs_error(sb, __func__,
"bad page in #%lu",
inode->i_ino);
- ctx->pos += PAGE_CACHE_SIZE - offset;
+ ctx->pos += PAGE_SIZE - offset;
return -EIO;
}
kaddr = page_address(page);
if (unlikely(need_revalidate)) {
if (offset) {
offset = ufs_validate_entry(sb, kaddr, offset, chunk_mask);
- ctx->pos = (n<<PAGE_CACHE_SHIFT) + offset;
+ ctx->pos = (n<<PAGE_SHIFT) + offset;
}
file->f_version = inode->i_version;
need_revalidate = 0;
@@ -574,7 +574,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
kmap(page);
base = (char*)page_address(page);
- memset(base, 0, PAGE_CACHE_SIZE);
+ memset(base, 0, PAGE_SIZE);
de = (struct ufs_dir_entry *) base;
@@ -594,7 +594,7 @@ int ufs_make_empty(struct inode * inode, struct inode *dir)
err = ufs_commit_chunk(page, 0, chunk_size);
fail:
- page_cache_release(page);
+ put_page(page);
return err;
}
diff --git a/fs/ufs/inode.c b/fs/ufs/inode.c
index d897e169a..9f49431e7 100644
--- a/fs/ufs/inode.c
+++ b/fs/ufs/inode.c
@@ -1051,13 +1051,13 @@ static int ufs_alloc_lastblock(struct inode *inode, loff_t size)
lastfrag--;
lastpage = ufs_get_locked_page(mapping, lastfrag >>
- (PAGE_CACHE_SHIFT - inode->i_blkbits));
+ (PAGE_SHIFT - inode->i_blkbits));
if (IS_ERR(lastpage)) {
err = -EIO;
goto out;
}
- end = lastfrag & ((1 << (PAGE_CACHE_SHIFT - inode->i_blkbits)) - 1);
+ end = lastfrag & ((1 << (PAGE_SHIFT - inode->i_blkbits)) - 1);
bh = page_buffers(lastpage);
for (i = 0; i < end; ++i)
bh = bh->b_this_page;
diff --git a/fs/ufs/namei.c b/fs/ufs/namei.c
index acf4a3b61..a1559f762 100644
--- a/fs/ufs/namei.c
+++ b/fs/ufs/namei.c
@@ -305,7 +305,7 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
ufs_set_link(old_inode, dir_de, dir_page, new_dir, 0);
else {
kunmap(dir_page);
- page_cache_release(dir_page);
+ put_page(dir_page);
}
inode_dec_link_count(old_dir);
}
@@ -315,11 +315,11 @@ static int ufs_rename(struct inode *old_dir, struct dentry *old_dentry,
out_dir:
if (dir_de) {
kunmap(dir_page);
- page_cache_release(dir_page);
+ put_page(dir_page);
}
out_old:
kunmap(old_page);
- page_cache_release(old_page);
+ put_page(old_page);
out:
return err;
}
diff --git a/fs/ufs/util.c b/fs/ufs/util.c
index b6c2f94e0..a409e3e78 100644
--- a/fs/ufs/util.c
+++ b/fs/ufs/util.c
@@ -261,14 +261,14 @@ struct page *ufs_get_locked_page(struct address_space *mapping,
if (unlikely(page->mapping == NULL)) {
/* Truncate got there first */
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
page = NULL;
goto out;
}
if (!PageUptodate(page) || PageError(page)) {
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
printk(KERN_ERR "ufs_change_blocknr: "
"can not read page: ino %lu, index: %lu\n",
diff --git a/fs/ufs/util.h b/fs/ufs/util.h
index 954175928..b7fbf53db 100644
--- a/fs/ufs/util.h
+++ b/fs/ufs/util.h
@@ -283,7 +283,7 @@ extern struct page *ufs_get_locked_page(struct address_space *mapping,
static inline void ufs_put_locked_page(struct page *page)
{
unlock_page(page);
- page_cache_release(page);
+ put_page(page);
}
diff --git a/fs/xfs/Makefile b/fs/xfs/Makefile
index f64639176..3542d94fd 100644
--- a/fs/xfs/Makefile
+++ b/fs/xfs/Makefile
@@ -121,4 +121,5 @@ xfs-$(CONFIG_XFS_RT) += xfs_rtalloc.o
xfs-$(CONFIG_XFS_POSIX_ACL) += xfs_acl.o
xfs-$(CONFIG_SYSCTL) += xfs_sysctl.o
xfs-$(CONFIG_COMPAT) += xfs_ioctl32.o
-xfs-$(CONFIG_NFSD_PNFS) += xfs_pnfs.o
+xfs-$(CONFIG_NFSD_BLOCKLAYOUT) += xfs_pnfs.o
+xfs-$(CONFIG_NFSD_SCSILAYOUT) += xfs_pnfs.o
diff --git a/fs/xfs/libxfs/xfs_alloc_btree.c b/fs/xfs/libxfs/xfs_alloc_btree.c
index 444626ddb..d9b424252 100644
--- a/fs/xfs/libxfs/xfs_alloc_btree.c
+++ b/fs/xfs/libxfs/xfs_alloc_btree.c
@@ -118,8 +118,6 @@ xfs_allocbt_free_block(
xfs_extent_busy_insert(cur->bc_tp, be32_to_cpu(agf->agf_seqno), bno, 1,
XFS_EXTENT_BUSY_SKIP_DISCARD);
xfs_trans_agbtree_delta(cur->bc_tp, -1);
-
- xfs_trans_binval(cur->bc_tp, bp);
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_attr_sf.h b/fs/xfs/libxfs/xfs_attr_sf.h
index 919756e3b..90928bbe6 100644
--- a/fs/xfs/libxfs/xfs_attr_sf.h
+++ b/fs/xfs/libxfs/xfs_attr_sf.h
@@ -24,22 +24,6 @@
* Small attribute lists are packed as tightly as possible so as
* to fit into the literal area of the inode.
*/
-
-/*
- * Entries are packed toward the top as tight as possible.
- */
-typedef struct xfs_attr_shortform {
- struct xfs_attr_sf_hdr { /* constant-structure header block */
- __be16 totsize; /* total bytes in shortform list */
- __u8 count; /* count of active entries */
- } hdr;
- struct xfs_attr_sf_entry {
- __uint8_t namelen; /* actual length of name (no NULL) */
- __uint8_t valuelen; /* actual length of value (no NULL) */
- __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
- __uint8_t nameval[1]; /* name & value bytes concatenated */
- } list[1]; /* variable sized array */
-} xfs_attr_shortform_t;
typedef struct xfs_attr_sf_hdr xfs_attr_sf_hdr_t;
typedef struct xfs_attr_sf_entry xfs_attr_sf_entry_t;
diff --git a/fs/xfs/libxfs/xfs_bmap.c b/fs/xfs/libxfs/xfs_bmap.c
index ef00156f4..ce41d7fe7 100644
--- a/fs/xfs/libxfs/xfs_bmap.c
+++ b/fs/xfs/libxfs/xfs_bmap.c
@@ -477,10 +477,7 @@ xfs_bmap_check_leaf_extents(
}
block = XFS_BUF_TO_BLOCK(bp);
}
- if (bp_release) {
- bp_release = 0;
- xfs_trans_brelse(NULL, bp);
- }
+
return;
error0:
@@ -912,7 +909,7 @@ xfs_bmap_local_to_extents(
* We don't want to deal with the case of keeping inode data inline yet.
* So sending the data fork of a regular inode is invalid.
*/
- ASSERT(!(S_ISREG(ip->i_d.di_mode) && whichfork == XFS_DATA_FORK));
+ ASSERT(!(S_ISREG(VFS_I(ip)->i_mode) && whichfork == XFS_DATA_FORK));
ifp = XFS_IFORK_PTR(ip, whichfork);
ASSERT(XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_LOCAL);
@@ -1079,7 +1076,7 @@ xfs_bmap_add_attrfork_local(
if (ip->i_df.if_bytes <= XFS_IFORK_DSIZE(ip))
return 0;
- if (S_ISDIR(ip->i_d.di_mode)) {
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
memset(&dargs, 0, sizeof(dargs));
dargs.geo = ip->i_mount->m_dir_geo;
dargs.dp = ip;
@@ -1091,7 +1088,7 @@ xfs_bmap_add_attrfork_local(
return xfs_dir2_sf_to_block(&dargs);
}
- if (S_ISLNK(ip->i_d.di_mode))
+ if (S_ISLNK(VFS_I(ip)->i_mode))
return xfs_bmap_local_to_extents(tp, ip, firstblock, 1,
flags, XFS_DATA_FORK,
xfs_symlink_local_to_remote);
@@ -3745,11 +3742,11 @@ xfs_bmap_btalloc(
args.prod = align;
if ((args.mod = (xfs_extlen_t)do_mod(ap->offset, args.prod)))
args.mod = (xfs_extlen_t)(args.prod - args.mod);
- } else if (mp->m_sb.sb_blocksize >= PAGE_CACHE_SIZE) {
+ } else if (mp->m_sb.sb_blocksize >= PAGE_SIZE) {
args.prod = 1;
args.mod = 0;
} else {
- args.prod = PAGE_CACHE_SIZE >> mp->m_sb.sb_blocklog;
+ args.prod = PAGE_SIZE >> mp->m_sb.sb_blocklog;
if ((args.mod = (xfs_extlen_t)(do_mod(ap->offset, args.prod))))
args.mod = (xfs_extlen_t)(args.prod - args.mod);
}
@@ -4721,6 +4718,66 @@ error0:
}
/*
+ * When a delalloc extent is split (e.g., due to a hole punch), the original
+ * indlen reservation must be shared across the two new extents that are left
+ * behind.
+ *
+ * Given the original reservation and the worst case indlen for the two new
+ * extents (as calculated by xfs_bmap_worst_indlen()), split the original
+ * reservation fairly across the two new extents. If necessary, steal available
+ * blocks from a deleted extent to make up a reservation deficiency (e.g., if
+ * ores == 1). The number of stolen blocks is returned. The availability and
+ * subsequent accounting of stolen blocks is the responsibility of the caller.
+ */
+static xfs_filblks_t
+xfs_bmap_split_indlen(
+ xfs_filblks_t ores, /* original res. */
+ xfs_filblks_t *indlen1, /* ext1 worst indlen */
+ xfs_filblks_t *indlen2, /* ext2 worst indlen */
+ xfs_filblks_t avail) /* stealable blocks */
+{
+ xfs_filblks_t len1 = *indlen1;
+ xfs_filblks_t len2 = *indlen2;
+ xfs_filblks_t nres = len1 + len2; /* new total res. */
+ xfs_filblks_t stolen = 0;
+
+ /*
+ * Steal as many blocks as we can to try and satisfy the worst case
+ * indlen for both new extents.
+ */
+ while (nres > ores && avail) {
+ nres--;
+ avail--;
+ stolen++;
+ }
+
+ /*
+ * The only blocks available are those reserved for the original
+ * extent and what we can steal from the extent being removed.
+ * If this still isn't enough to satisfy the combined
+ * requirements for the two new extents, skim blocks off of each
+ * of the new reservations until they match what is available.
+ */
+ while (nres > ores) {
+ if (len1) {
+ len1--;
+ nres--;
+ }
+ if (nres == ores)
+ break;
+ if (len2) {
+ len2--;
+ nres--;
+ }
+ }
+
+ *indlen1 = len1;
+ *indlen2 = len2;
+
+ return stolen;
+}
+
+/*
* Called by xfs_bmapi to update file extent records and the btree
* after removing space (or undoing a delayed allocation).
*/
@@ -4984,28 +5041,29 @@ xfs_bmap_del_extent(
XFS_IFORK_NEXT_SET(ip, whichfork,
XFS_IFORK_NEXTENTS(ip, whichfork) + 1);
} else {
+ xfs_filblks_t stolen;
ASSERT(whichfork == XFS_DATA_FORK);
- temp = xfs_bmap_worst_indlen(ip, temp);
+
+ /*
+ * Distribute the original indlen reservation across the
+ * two new extents. Steal blocks from the deleted extent
+ * if necessary. Stealing blocks simply fudges the
+ * fdblocks accounting in xfs_bunmapi().
+ */
+ temp = xfs_bmap_worst_indlen(ip, got.br_blockcount);
+ temp2 = xfs_bmap_worst_indlen(ip, new.br_blockcount);
+ stolen = xfs_bmap_split_indlen(da_old, &temp, &temp2,
+ del->br_blockcount);
+ da_new = temp + temp2 - stolen;
+ del->br_blockcount -= stolen;
+
+ /*
+ * Set the reservation for each extent. Warn if either
+ * is zero as this can lead to delalloc problems.
+ */
+ WARN_ON_ONCE(!temp || !temp2);
xfs_bmbt_set_startblock(ep, nullstartblock((int)temp));
- temp2 = xfs_bmap_worst_indlen(ip, temp2);
new.br_startblock = nullstartblock((int)temp2);
- da_new = temp + temp2;
- while (da_new > da_old) {
- if (temp) {
- temp--;
- da_new--;
- xfs_bmbt_set_startblock(ep,
- nullstartblock((int)temp));
- }
- if (da_new == da_old)
- break;
- if (temp2) {
- temp2--;
- da_new--;
- new.br_startblock =
- nullstartblock((int)temp2);
- }
- }
}
trace_xfs_bmap_post_update(ip, *idx, state, _THIS_IP_);
xfs_iext_insert(ip, *idx + 1, 1, &new, state);
@@ -5210,7 +5268,7 @@ xfs_bunmapi(
* This is better than zeroing it.
*/
ASSERT(del.br_state == XFS_EXT_NORM);
- ASSERT(xfs_trans_get_block_res(tp) > 0);
+ ASSERT(tp->t_blk_res > 0);
/*
* If this spans a realtime extent boundary,
* chop it back to the start of the one we end at.
@@ -5241,7 +5299,7 @@ xfs_bunmapi(
del.br_startblock += mod;
} else if ((del.br_startoff == start &&
(del.br_state == XFS_EXT_UNWRITTEN ||
- xfs_trans_get_block_res(tp) == 0)) ||
+ tp->t_blk_res == 0)) ||
!xfs_sb_version_hasextflgbit(&mp->m_sb)) {
/*
* Can't make it unwritten. There isn't
@@ -5296,9 +5354,37 @@ xfs_bunmapi(
goto nodelete;
}
}
+
+ /*
+ * If it's the case where the directory code is running
+ * with no block reservation, and the deleted block is in
+ * the middle of its extent, and the resulting insert
+ * of an extent would cause transformation to btree format,
+ * then reject it. The calling code will then swap
+ * blocks around instead.
+ * We have to do this now, rather than waiting for the
+ * conversion to btree format, since the transaction
+ * will be dirty.
+ */
+ if (!wasdel && tp->t_blk_res == 0 &&
+ XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
+ XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
+ XFS_IFORK_MAXEXT(ip, whichfork) &&
+ del.br_startoff > got.br_startoff &&
+ del.br_startoff + del.br_blockcount <
+ got.br_startoff + got.br_blockcount) {
+ error = -ENOSPC;
+ goto error0;
+ }
+
+ /*
+ * Unreserve quota and update realtime free space, if
+ * appropriate. If delayed allocation, update the inode delalloc
+ * counter now and wait to update the sb counters as
+ * xfs_bmap_del_extent() might need to borrow some blocks.
+ */
if (wasdel) {
ASSERT(startblockval(del.br_startblock) > 0);
- /* Update realtime/data freespace, unreserve quota */
if (isrt) {
xfs_filblks_t rtexts;
@@ -5309,8 +5395,6 @@ xfs_bunmapi(
ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_RTBLKS);
} else {
- xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount,
- false);
(void)xfs_trans_reserve_quota_nblks(NULL,
ip, -((long)del.br_blockcount), 0,
XFS_QMOPT_RES_REGBLKS);
@@ -5321,32 +5405,16 @@ xfs_bunmapi(
XFS_BTCUR_BPRV_WASDEL;
} else if (cur)
cur->bc_private.b.flags &= ~XFS_BTCUR_BPRV_WASDEL;
- /*
- * If it's the case where the directory code is running
- * with no block reservation, and the deleted block is in
- * the middle of its extent, and the resulting insert
- * of an extent would cause transformation to btree format,
- * then reject it. The calling code will then swap
- * blocks around instead.
- * We have to do this now, rather than waiting for the
- * conversion to btree format, since the transaction
- * will be dirty.
- */
- if (!wasdel && xfs_trans_get_block_res(tp) == 0 &&
- XFS_IFORK_FORMAT(ip, whichfork) == XFS_DINODE_FMT_EXTENTS &&
- XFS_IFORK_NEXTENTS(ip, whichfork) >= /* Note the >= */
- XFS_IFORK_MAXEXT(ip, whichfork) &&
- del.br_startoff > got.br_startoff &&
- del.br_startoff + del.br_blockcount <
- got.br_startoff + got.br_blockcount) {
- error = -ENOSPC;
- goto error0;
- }
+
error = xfs_bmap_del_extent(ip, tp, &lastx, flist, cur, &del,
&tmp_logflags, whichfork);
logflags |= tmp_logflags;
if (error)
goto error0;
+
+ if (!isrt && wasdel)
+ xfs_mod_fdblocks(mp, (int64_t)del.br_blockcount, false);
+
bno = del.br_startoff - 1;
nodelete:
/*
diff --git a/fs/xfs/libxfs/xfs_bmap_btree.c b/fs/xfs/libxfs/xfs_bmap_btree.c
index 1637c37bf..6282f6e70 100644
--- a/fs/xfs/libxfs/xfs_bmap_btree.c
+++ b/fs/xfs/libxfs/xfs_bmap_btree.c
@@ -461,7 +461,7 @@ xfs_bmbt_alloc_block(
* reservation amount is insufficient then we may fail a
* block allocation here and corrupt the filesystem.
*/
- args.minleft = xfs_trans_get_block_res(args.tp);
+ args.minleft = args.tp->t_blk_res;
} else if (cur->bc_private.b.flist->xbf_low) {
args.type = XFS_ALLOCTYPE_START_BNO;
} else {
@@ -470,7 +470,7 @@ xfs_bmbt_alloc_block(
args.minlen = args.maxlen = args.prod = 1;
args.wasdel = cur->bc_private.b.flags & XFS_BTCUR_BPRV_WASDEL;
- if (!args.wasdel && xfs_trans_get_block_res(args.tp) == 0) {
+ if (!args.wasdel && args.tp->t_blk_res == 0) {
error = -ENOSPC;
goto error0;
}
@@ -531,7 +531,6 @@ xfs_bmbt_free_block(
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
xfs_trans_mod_dquot_byino(tp, ip, XFS_TRANS_DQ_BCOUNT, -1L);
- xfs_trans_binval(tp, bp);
return 0;
}
diff --git a/fs/xfs/libxfs/xfs_btree.c b/fs/xfs/libxfs/xfs_btree.c
index a0eb18ce3..1f88e1ce7 100644
--- a/fs/xfs/libxfs/xfs_btree.c
+++ b/fs/xfs/libxfs/xfs_btree.c
@@ -294,6 +294,21 @@ xfs_btree_sblock_verify_crc(
return true;
}
+static int
+xfs_btree_free_block(
+ struct xfs_btree_cur *cur,
+ struct xfs_buf *bp)
+{
+ int error;
+
+ error = cur->bc_ops->free_block(cur, bp);
+ if (!error) {
+ xfs_trans_binval(cur->bc_tp, bp);
+ XFS_BTREE_STATS_INC(cur, free);
+ }
+ return error;
+}
+
/*
* Delete the btree cursor.
*/
@@ -3209,6 +3224,7 @@ xfs_btree_kill_iroot(
int level;
int index;
int numrecs;
+ int error;
#ifdef DEBUG
union xfs_btree_ptr ptr;
int i;
@@ -3272,8 +3288,6 @@ xfs_btree_kill_iroot(
cpp = xfs_btree_ptr_addr(cur, 1, cblock);
#ifdef DEBUG
for (i = 0; i < numrecs; i++) {
- int error;
-
error = xfs_btree_check_ptr(cur, cpp, i, level - 1);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
@@ -3283,8 +3297,11 @@ xfs_btree_kill_iroot(
#endif
xfs_btree_copy_ptrs(cur, pp, cpp, numrecs);
- cur->bc_ops->free_block(cur, cbp);
- XFS_BTREE_STATS_INC(cur, free);
+ error = xfs_btree_free_block(cur, cbp);
+ if (error) {
+ XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
+ return error;
+ }
cur->bc_bufs[level - 1] = NULL;
be16_add_cpu(&block->bb_level, -1);
@@ -3317,14 +3334,12 @@ xfs_btree_kill_root(
*/
cur->bc_ops->set_root(cur, newroot, -1);
- error = cur->bc_ops->free_block(cur, bp);
+ error = xfs_btree_free_block(cur, bp);
if (error) {
XFS_BTREE_TRACE_CURSOR(cur, XBT_ERROR);
return error;
}
- XFS_BTREE_STATS_INC(cur, free);
-
cur->bc_bufs[level] = NULL;
cur->bc_ra[level] = 0;
cur->bc_nlevels--;
@@ -3830,10 +3845,9 @@ xfs_btree_delrec(
}
/* Free the deleted block. */
- error = cur->bc_ops->free_block(cur, rbp);
+ error = xfs_btree_free_block(cur, rbp);
if (error)
goto error0;
- XFS_BTREE_STATS_INC(cur, free);
/*
* If we joined with the left neighbor, set the buffer in the
diff --git a/fs/xfs/libxfs/xfs_da_format.h b/fs/xfs/libxfs/xfs_da_format.h
index b14bbd6bb..8d4d8bce4 100644
--- a/fs/xfs/libxfs/xfs_da_format.h
+++ b/fs/xfs/libxfs/xfs_da_format.h
@@ -641,6 +641,22 @@ xfs_dir2_block_leaf_p(struct xfs_dir2_block_tail *btp)
*/
#define XFS_ATTR_LEAF_MAPSIZE 3 /* how many freespace slots */
+/*
+ * Entries are packed toward the top as tight as possible.
+ */
+typedef struct xfs_attr_shortform {
+ struct xfs_attr_sf_hdr { /* constant-structure header block */
+ __be16 totsize; /* total bytes in shortform list */
+ __u8 count; /* count of active entries */
+ } hdr;
+ struct xfs_attr_sf_entry {
+ __uint8_t namelen; /* actual length of name (no NULL) */
+ __uint8_t valuelen; /* actual length of value (no NULL) */
+ __uint8_t flags; /* flags bits (see xfs_attr_leaf.h) */
+ __uint8_t nameval[1]; /* name & value bytes concatenated */
+ } list[1]; /* variable sized array */
+} xfs_attr_shortform_t;
+
typedef struct xfs_attr_leaf_map { /* RLE map of free bytes */
__be16 base; /* base of free region */
__be16 size; /* length of free region */
diff --git a/fs/xfs/libxfs/xfs_dir2.c b/fs/xfs/libxfs/xfs_dir2.c
index 2fb53a5c0..af0f9d171 100644
--- a/fs/xfs/libxfs/xfs_dir2.c
+++ b/fs/xfs/libxfs/xfs_dir2.c
@@ -176,7 +176,7 @@ xfs_dir_isempty(
{
xfs_dir2_sf_hdr_t *sfp;
- ASSERT(S_ISDIR(dp->i_d.di_mode));
+ ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
if (dp->i_d.di_size == 0) /* might happen during shutdown. */
return 1;
if (dp->i_d.di_size > XFS_IFORK_DSIZE(dp))
@@ -231,7 +231,7 @@ xfs_dir_init(
struct xfs_da_args *args;
int error;
- ASSERT(S_ISDIR(dp->i_d.di_mode));
+ ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
error = xfs_dir_ino_validate(tp->t_mountp, pdp->i_ino);
if (error)
return error;
@@ -266,7 +266,7 @@ xfs_dir_createname(
int rval;
int v; /* type-checking value */
- ASSERT(S_ISDIR(dp->i_d.di_mode));
+ ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
if (inum) {
rval = xfs_dir_ino_validate(tp->t_mountp, inum);
if (rval)
@@ -364,7 +364,7 @@ xfs_dir_lookup(
int v; /* type-checking value */
int lock_mode;
- ASSERT(S_ISDIR(dp->i_d.di_mode));
+ ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
XFS_STATS_INC(dp->i_mount, xs_dir_lookup);
/*
@@ -443,7 +443,7 @@ xfs_dir_removename(
int rval;
int v; /* type-checking value */
- ASSERT(S_ISDIR(dp->i_d.di_mode));
+ ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
XFS_STATS_INC(dp->i_mount, xs_dir_remove);
args = kmem_zalloc(sizeof(*args), KM_SLEEP | KM_NOFS);
@@ -505,7 +505,7 @@ xfs_dir_replace(
int rval;
int v; /* type-checking value */
- ASSERT(S_ISDIR(dp->i_d.di_mode));
+ ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
rval = xfs_dir_ino_validate(tp->t_mountp, inum);
if (rval)
diff --git a/fs/xfs/libxfs/xfs_dir2_node.c b/fs/xfs/libxfs/xfs_dir2_node.c
index 63ee03db7..75a557432 100644
--- a/fs/xfs/libxfs/xfs_dir2_node.c
+++ b/fs/xfs/libxfs/xfs_dir2_node.c
@@ -2235,6 +2235,9 @@ xfs_dir2_node_trim_free(
dp = args->dp;
tp = args->trans;
+
+ *rvalp = 0;
+
/*
* Read the freespace block.
*/
@@ -2255,7 +2258,6 @@ xfs_dir2_node_trim_free(
*/
if (freehdr.nused > 0) {
xfs_trans_brelse(tp, bp);
- *rvalp = 0;
return 0;
}
/*
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 66d702e6b..22297f9b0 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2403,8 +2403,8 @@ xfs_ialloc_compute_maxlevels(
maxleafents = (1LL << XFS_INO_AGINO_BITS(mp)) >>
XFS_INODES_PER_CHUNK_LOG;
- minleafrecs = mp->m_alloc_mnr[0];
- minnoderecs = mp->m_alloc_mnr[1];
+ minleafrecs = mp->m_inobt_mnr[0];
+ minnoderecs = mp->m_inobt_mnr[1];
maxblocks = (maxleafents + minleafrecs - 1) / minleafrecs;
for (level = 1; maxblocks > 1; level++)
maxblocks = (maxblocks + minnoderecs - 1) / minnoderecs;
diff --git a/fs/xfs/libxfs/xfs_ialloc_btree.c b/fs/xfs/libxfs/xfs_ialloc_btree.c
index c679f3c05..89c21d771 100644
--- a/fs/xfs/libxfs/xfs_ialloc_btree.c
+++ b/fs/xfs/libxfs/xfs_ialloc_btree.c
@@ -125,16 +125,8 @@ xfs_inobt_free_block(
struct xfs_btree_cur *cur,
struct xfs_buf *bp)
{
- xfs_fsblock_t fsbno;
- int error;
-
- fsbno = XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp));
- error = xfs_free_extent(cur->bc_tp, fsbno, 1);
- if (error)
- return error;
-
- xfs_trans_binval(cur->bc_tp, bp);
- return error;
+ return xfs_free_extent(cur->bc_tp,
+ XFS_DADDR_TO_FSB(cur->bc_mp, XFS_BUF_ADDR(bp)), 1);
}
STATIC int
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 1aabfda66..9d9559eb2 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -195,28 +195,50 @@ xfs_imap_to_bp(
}
void
-xfs_dinode_from_disk(
- xfs_icdinode_t *to,
- xfs_dinode_t *from)
+xfs_inode_from_disk(
+ struct xfs_inode *ip,
+ struct xfs_dinode *from)
{
- to->di_magic = be16_to_cpu(from->di_magic);
- to->di_mode = be16_to_cpu(from->di_mode);
- to->di_version = from ->di_version;
+ struct xfs_icdinode *to = &ip->i_d;
+ struct inode *inode = VFS_I(ip);
+
+
+ /*
+ * Convert v1 inodes immediately to v2 inode format as this is the
+ * minimum inode version format we support in the rest of the code.
+ */
+ to->di_version = from->di_version;
+ if (to->di_version == 1) {
+ set_nlink(inode, be16_to_cpu(from->di_onlink));
+ to->di_projid_lo = 0;
+ to->di_projid_hi = 0;
+ to->di_version = 2;
+ } else {
+ set_nlink(inode, be32_to_cpu(from->di_nlink));
+ to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
+ to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
+ }
+
to->di_format = from->di_format;
- to->di_onlink = be16_to_cpu(from->di_onlink);
to->di_uid = be32_to_cpu(from->di_uid);
to->di_gid = be32_to_cpu(from->di_gid);
- to->di_nlink = be32_to_cpu(from->di_nlink);
- to->di_projid_lo = be16_to_cpu(from->di_projid_lo);
- to->di_projid_hi = be16_to_cpu(from->di_projid_hi);
- memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
to->di_flushiter = be16_to_cpu(from->di_flushiter);
- to->di_atime.t_sec = be32_to_cpu(from->di_atime.t_sec);
- to->di_atime.t_nsec = be32_to_cpu(from->di_atime.t_nsec);
- to->di_mtime.t_sec = be32_to_cpu(from->di_mtime.t_sec);
- to->di_mtime.t_nsec = be32_to_cpu(from->di_mtime.t_nsec);
- to->di_ctime.t_sec = be32_to_cpu(from->di_ctime.t_sec);
- to->di_ctime.t_nsec = be32_to_cpu(from->di_ctime.t_nsec);
+
+ /*
+ * Time is signed, so need to convert to signed 32 bit before
+ * storing in inode timestamp which may be 64 bit. Otherwise
+ * a time before epoch is converted to a time long after epoch
+ * on 64 bit systems.
+ */
+ inode->i_atime.tv_sec = (int)be32_to_cpu(from->di_atime.t_sec);
+ inode->i_atime.tv_nsec = (int)be32_to_cpu(from->di_atime.t_nsec);
+ inode->i_mtime.tv_sec = (int)be32_to_cpu(from->di_mtime.t_sec);
+ inode->i_mtime.tv_nsec = (int)be32_to_cpu(from->di_mtime.t_nsec);
+ inode->i_ctime.tv_sec = (int)be32_to_cpu(from->di_ctime.t_sec);
+ inode->i_ctime.tv_nsec = (int)be32_to_cpu(from->di_ctime.t_nsec);
+ inode->i_generation = be32_to_cpu(from->di_gen);
+ inode->i_mode = be16_to_cpu(from->di_mode);
+
to->di_size = be64_to_cpu(from->di_size);
to->di_nblocks = be64_to_cpu(from->di_nblocks);
to->di_extsize = be32_to_cpu(from->di_extsize);
@@ -227,42 +249,96 @@ xfs_dinode_from_disk(
to->di_dmevmask = be32_to_cpu(from->di_dmevmask);
to->di_dmstate = be16_to_cpu(from->di_dmstate);
to->di_flags = be16_to_cpu(from->di_flags);
- to->di_gen = be32_to_cpu(from->di_gen);
if (to->di_version == 3) {
- to->di_changecount = be64_to_cpu(from->di_changecount);
+ inode->i_version = be64_to_cpu(from->di_changecount);
to->di_crtime.t_sec = be32_to_cpu(from->di_crtime.t_sec);
to->di_crtime.t_nsec = be32_to_cpu(from->di_crtime.t_nsec);
to->di_flags2 = be64_to_cpu(from->di_flags2);
- to->di_ino = be64_to_cpu(from->di_ino);
- to->di_lsn = be64_to_cpu(from->di_lsn);
- memcpy(to->di_pad2, from->di_pad2, sizeof(to->di_pad2));
- uuid_copy(&to->di_uuid, &from->di_uuid);
}
}
void
-xfs_dinode_to_disk(
- xfs_dinode_t *to,
- xfs_icdinode_t *from)
+xfs_inode_to_disk(
+ struct xfs_inode *ip,
+ struct xfs_dinode *to,
+ xfs_lsn_t lsn)
+{
+ struct xfs_icdinode *from = &ip->i_d;
+ struct inode *inode = VFS_I(ip);
+
+ to->di_magic = cpu_to_be16(XFS_DINODE_MAGIC);
+ to->di_onlink = 0;
+
+ to->di_version = from->di_version;
+ to->di_format = from->di_format;
+ to->di_uid = cpu_to_be32(from->di_uid);
+ to->di_gid = cpu_to_be32(from->di_gid);
+ to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
+ to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
+
+ memset(to->di_pad, 0, sizeof(to->di_pad));
+ to->di_atime.t_sec = cpu_to_be32(inode->i_atime.tv_sec);
+ to->di_atime.t_nsec = cpu_to_be32(inode->i_atime.tv_nsec);
+ to->di_mtime.t_sec = cpu_to_be32(inode->i_mtime.tv_sec);
+ to->di_mtime.t_nsec = cpu_to_be32(inode->i_mtime.tv_nsec);
+ to->di_ctime.t_sec = cpu_to_be32(inode->i_ctime.tv_sec);
+ to->di_ctime.t_nsec = cpu_to_be32(inode->i_ctime.tv_nsec);
+ to->di_nlink = cpu_to_be32(inode->i_nlink);
+ to->di_gen = cpu_to_be32(inode->i_generation);
+ to->di_mode = cpu_to_be16(inode->i_mode);
+
+ to->di_size = cpu_to_be64(from->di_size);
+ to->di_nblocks = cpu_to_be64(from->di_nblocks);
+ to->di_extsize = cpu_to_be32(from->di_extsize);
+ to->di_nextents = cpu_to_be32(from->di_nextents);
+ to->di_anextents = cpu_to_be16(from->di_anextents);
+ to->di_forkoff = from->di_forkoff;
+ to->di_aformat = from->di_aformat;
+ to->di_dmevmask = cpu_to_be32(from->di_dmevmask);
+ to->di_dmstate = cpu_to_be16(from->di_dmstate);
+ to->di_flags = cpu_to_be16(from->di_flags);
+
+ if (from->di_version == 3) {
+ to->di_changecount = cpu_to_be64(inode->i_version);
+ to->di_crtime.t_sec = cpu_to_be32(from->di_crtime.t_sec);
+ to->di_crtime.t_nsec = cpu_to_be32(from->di_crtime.t_nsec);
+ to->di_flags2 = cpu_to_be64(from->di_flags2);
+
+ to->di_ino = cpu_to_be64(ip->i_ino);
+ to->di_lsn = cpu_to_be64(lsn);
+ memset(to->di_pad2, 0, sizeof(to->di_pad2));
+ uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
+ to->di_flushiter = 0;
+ } else {
+ to->di_flushiter = cpu_to_be16(from->di_flushiter);
+ }
+}
+
+void
+xfs_log_dinode_to_disk(
+ struct xfs_log_dinode *from,
+ struct xfs_dinode *to)
{
to->di_magic = cpu_to_be16(from->di_magic);
to->di_mode = cpu_to_be16(from->di_mode);
- to->di_version = from ->di_version;
+ to->di_version = from->di_version;
to->di_format = from->di_format;
- to->di_onlink = cpu_to_be16(from->di_onlink);
+ to->di_onlink = 0;
to->di_uid = cpu_to_be32(from->di_uid);
to->di_gid = cpu_to_be32(from->di_gid);
to->di_nlink = cpu_to_be32(from->di_nlink);
to->di_projid_lo = cpu_to_be16(from->di_projid_lo);
to->di_projid_hi = cpu_to_be16(from->di_projid_hi);
memcpy(to->di_pad, from->di_pad, sizeof(to->di_pad));
+
to->di_atime.t_sec = cpu_to_be32(from->di_atime.t_sec);
to->di_atime.t_nsec = cpu_to_be32(from->di_atime.t_nsec);
to->di_mtime.t_sec = cpu_to_be32(from->di_mtime.t_sec);
to->di_mtime.t_nsec = cpu_to_be32(from->di_mtime.t_nsec);
to->di_ctime.t_sec = cpu_to_be32(from->di_ctime.t_sec);
to->di_ctime.t_nsec = cpu_to_be32(from->di_ctime.t_nsec);
+
to->di_size = cpu_to_be64(from->di_size);
to->di_nblocks = cpu_to_be64(from->di_nblocks);
to->di_extsize = cpu_to_be32(from->di_extsize);
@@ -367,13 +443,10 @@ xfs_iread(
!(mp->m_flags & XFS_MOUNT_IKEEP)) {
/* initialise the on-disk inode core */
memset(&ip->i_d, 0, sizeof(ip->i_d));
- ip->i_d.di_magic = XFS_DINODE_MAGIC;
- ip->i_d.di_gen = prandom_u32();
- if (xfs_sb_version_hascrc(&mp->m_sb)) {
+ VFS_I(ip)->i_generation = prandom_u32();
+ if (xfs_sb_version_hascrc(&mp->m_sb))
ip->i_d.di_version = 3;
- ip->i_d.di_ino = ip->i_ino;
- uuid_copy(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid);
- } else
+ else
ip->i_d.di_version = 2;
return 0;
}
@@ -403,7 +476,7 @@ xfs_iread(
* Otherwise, just get the truly permanent information.
*/
if (dip->di_mode) {
- xfs_dinode_from_disk(&ip->i_d, dip);
+ xfs_inode_from_disk(ip, dip);
error = xfs_iformat_fork(ip, dip);
if (error) {
#ifdef DEBUG
@@ -417,16 +490,10 @@ xfs_iread(
* Partial initialisation of the in-core inode. Just the bits
* that xfs_ialloc won't overwrite or relies on being correct.
*/
- ip->i_d.di_magic = be16_to_cpu(dip->di_magic);
ip->i_d.di_version = dip->di_version;
- ip->i_d.di_gen = be32_to_cpu(dip->di_gen);
+ VFS_I(ip)->i_generation = be32_to_cpu(dip->di_gen);
ip->i_d.di_flushiter = be16_to_cpu(dip->di_flushiter);
- if (dip->di_version == 3) {
- ip->i_d.di_ino = be64_to_cpu(dip->di_ino);
- uuid_copy(&ip->i_d.di_uuid, &dip->di_uuid);
- }
-
/*
* Make sure to pull in the mode here as well in
* case the inode is released without being used.
@@ -434,25 +501,10 @@ xfs_iread(
* the inode is already free and not try to mess
* with the uninitialized part of it.
*/
- ip->i_d.di_mode = 0;
- }
-
- /*
- * Automatically convert version 1 inode formats in memory to version 2
- * inode format. If the inode is modified, it will get logged and
- * rewritten as a version 2 inode. We can do this because we set the
- * superblock feature bit for v2 inodes unconditionally during mount
- * and it means the reast of the code can assume the inode version is 2
- * or higher.
- */
- if (ip->i_d.di_version == 1) {
- ip->i_d.di_version = 2;
- memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
- ip->i_d.di_nlink = ip->i_d.di_onlink;
- ip->i_d.di_onlink = 0;
- xfs_set_projid(ip, 0);
+ VFS_I(ip)->i_mode = 0;
}
+ ASSERT(ip->i_d.di_version >= 2);
ip->i_delayed_blks = 0;
/*
diff --git a/fs/xfs/libxfs/xfs_inode_buf.h b/fs/xfs/libxfs/xfs_inode_buf.h
index 9308c47f2..7c4dd321b 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.h
+++ b/fs/xfs/libxfs/xfs_inode_buf.h
@@ -20,7 +20,36 @@
struct xfs_inode;
struct xfs_dinode;
-struct xfs_icdinode;
+
+/*
+ * In memory representation of the XFS inode. This is held in the in-core struct
+ * xfs_inode and represents the current on disk values but the structure is not
+ * in on-disk format. That is, this structure is always translated to on-disk
+ * format specific structures at the appropriate time.
+ */
+struct xfs_icdinode {
+ __int8_t di_version; /* inode version */
+ __int8_t di_format; /* format of di_c data */
+ __uint16_t di_flushiter; /* incremented on flush */
+ __uint32_t di_uid; /* owner's user id */
+ __uint32_t di_gid; /* owner's group id */
+ __uint16_t di_projid_lo; /* lower part of owner's project id */
+ __uint16_t di_projid_hi; /* higher part of owner's project id */
+ xfs_fsize_t di_size; /* number of bytes in file */
+ xfs_rfsblock_t di_nblocks; /* # of direct & btree blocks used */
+ xfs_extlen_t di_extsize; /* basic/minimum extent size for file */
+ xfs_extnum_t di_nextents; /* number of extents in data fork */
+ xfs_aextnum_t di_anextents; /* number of extents in attribute fork*/
+ __uint8_t di_forkoff; /* attr fork offs, <<3 for 64b align */
+ __int8_t di_aformat; /* format of attr fork's data */
+ __uint32_t di_dmevmask; /* DMIG event mask */
+ __uint16_t di_dmstate; /* DMIG state info */
+ __uint16_t di_flags; /* random flags, XFS_DIFLAG_... */
+
+ __uint64_t di_flags2; /* more random flags */
+
+ xfs_ictimestamp_t di_crtime; /* time created */
+};
/*
* Inode location information. Stored in the inode and passed to
@@ -38,8 +67,11 @@ int xfs_imap_to_bp(struct xfs_mount *, struct xfs_trans *,
int xfs_iread(struct xfs_mount *, struct xfs_trans *,
struct xfs_inode *, uint);
void xfs_dinode_calc_crc(struct xfs_mount *, struct xfs_dinode *);
-void xfs_dinode_to_disk(struct xfs_dinode *to, struct xfs_icdinode *from);
-void xfs_dinode_from_disk(struct xfs_icdinode *to, struct xfs_dinode *from);
+void xfs_inode_to_disk(struct xfs_inode *ip, struct xfs_dinode *to,
+ xfs_lsn_t lsn);
+void xfs_inode_from_disk(struct xfs_inode *ip, struct xfs_dinode *from);
+void xfs_log_dinode_to_disk(struct xfs_log_dinode *from,
+ struct xfs_dinode *to);
#if defined(DEBUG)
void xfs_inobp_check(struct xfs_mount *, struct xfs_buf *);
diff --git a/fs/xfs/libxfs/xfs_inode_fork.c b/fs/xfs/libxfs/xfs_inode_fork.c
index 0defbd02f..11faf7df1 100644
--- a/fs/xfs/libxfs/xfs_inode_fork.c
+++ b/fs/xfs/libxfs/xfs_inode_fork.c
@@ -31,6 +31,7 @@
#include "xfs_error.h"
#include "xfs_trace.h"
#include "xfs_attr_sf.h"
+#include "xfs_da_format.h"
kmem_zone_t *xfs_ifork_zone;
@@ -120,7 +121,7 @@ xfs_iformat_fork(
return -EFSCORRUPTED;
}
- switch (ip->i_d.di_mode & S_IFMT) {
+ switch (VFS_I(ip)->i_mode & S_IFMT) {
case S_IFIFO:
case S_IFCHR:
case S_IFBLK:
diff --git a/fs/xfs/libxfs/xfs_log_format.h b/fs/xfs/libxfs/xfs_log_format.h
index 265314690..d54a8018b 100644
--- a/fs/xfs/libxfs/xfs_log_format.h
+++ b/fs/xfs/libxfs/xfs_log_format.h
@@ -290,6 +290,7 @@ typedef struct xfs_inode_log_format_64 {
__int32_t ilf_boffset; /* off of inode in buffer */
} xfs_inode_log_format_64_t;
+
/*
* Flags for xfs_trans_log_inode flags field.
*/
@@ -360,15 +361,15 @@ typedef struct xfs_ictimestamp {
} xfs_ictimestamp_t;
/*
- * NOTE: This structure must be kept identical to struct xfs_dinode
- * except for the endianness annotations.
+ * Define the format of the inode core that is logged. This structure must be
+ * kept identical to struct xfs_dinode except for the endianness annotations.
*/
-typedef struct xfs_icdinode {
+struct xfs_log_dinode {
__uint16_t di_magic; /* inode magic # = XFS_DINODE_MAGIC */
__uint16_t di_mode; /* mode and type of file */
__int8_t di_version; /* inode version */
__int8_t di_format; /* format of di_c data */
- __uint16_t di_onlink; /* old number of links to file */
+ __uint8_t di_pad3[2]; /* unused in v2/3 inodes */
__uint32_t di_uid; /* owner's user id */
__uint32_t di_gid; /* owner's group id */
__uint32_t di_nlink; /* number of links to file */
@@ -407,13 +408,13 @@ typedef struct xfs_icdinode {
uuid_t di_uuid; /* UUID of the filesystem */
/* structure must be padded to 64 bit alignment */
-} xfs_icdinode_t;
+};
-static inline uint xfs_icdinode_size(int version)
+static inline uint xfs_log_dinode_size(int version)
{
if (version == 3)
- return sizeof(struct xfs_icdinode);
- return offsetof(struct xfs_icdinode, di_next_unlinked);
+ return sizeof(struct xfs_log_dinode);
+ return offsetof(struct xfs_log_dinode, di_next_unlinked);
}
/*
@@ -495,6 +496,8 @@ enum xfs_blft {
XFS_BLFT_ATTR_LEAF_BUF,
XFS_BLFT_ATTR_RMT_BUF,
XFS_BLFT_SB_BUF,
+ XFS_BLFT_RTBITMAP_BUF,
+ XFS_BLFT_RTSUMMARY_BUF,
XFS_BLFT_MAX_BUF = (1 << XFS_BLFT_BITS),
};
diff --git a/fs/xfs/libxfs/xfs_quota_defs.h b/fs/xfs/libxfs/xfs_quota_defs.h
index f51078f1e..8eed51275 100644
--- a/fs/xfs/libxfs/xfs_quota_defs.h
+++ b/fs/xfs/libxfs/xfs_quota_defs.h
@@ -37,7 +37,7 @@ typedef __uint16_t xfs_qwarncnt_t;
#define XFS_DQ_PROJ 0x0002 /* project quota */
#define XFS_DQ_GROUP 0x0004 /* a group quota */
#define XFS_DQ_DIRTY 0x0008 /* dquot is dirty */
-#define XFS_DQ_FREEING 0x0010 /* dquot is beeing torn down */
+#define XFS_DQ_FREEING 0x0010 /* dquot is being torn down */
#define XFS_DQ_ALLTYPES (XFS_DQ_USER|XFS_DQ_PROJ|XFS_DQ_GROUP)
@@ -116,6 +116,7 @@ typedef __uint16_t xfs_qwarncnt_t;
#define XFS_QMOPT_DQREPAIR 0x0001000 /* repair dquot if damaged */
#define XFS_QMOPT_GQUOTA 0x0002000 /* group dquot requested */
#define XFS_QMOPT_ENOSPC 0x0004000 /* enospc instead of edquot (prj) */
+#define XFS_QMOPT_DQNEXT 0x0008000 /* return next dquot >= this ID */
/*
* flags to xfs_trans_mod_dquot to indicate which field needs to be
diff --git a/fs/xfs/libxfs/xfs_rtbitmap.c b/fs/xfs/libxfs/xfs_rtbitmap.c
index 9b59ffa1f..951c044e2 100644
--- a/fs/xfs/libxfs/xfs_rtbitmap.c
+++ b/fs/xfs/libxfs/xfs_rtbitmap.c
@@ -42,6 +42,31 @@
*/
/*
+ * Real time buffers need verifiers to avoid runtime warnings during IO.
+ * We don't have anything to verify, however, so these are just dummy
+ * operations.
+ */
+static void
+xfs_rtbuf_verify_read(
+ struct xfs_buf *bp)
+{
+ return;
+}
+
+static void
+xfs_rtbuf_verify_write(
+ struct xfs_buf *bp)
+{
+ return;
+}
+
+const struct xfs_buf_ops xfs_rtbuf_ops = {
+ .name = "rtbuf",
+ .verify_read = xfs_rtbuf_verify_read,
+ .verify_write = xfs_rtbuf_verify_write,
+};
+
+/*
* Get a buffer for the bitmap or summary file block specified.
* The buffer is returned read and locked.
*/
@@ -68,9 +93,12 @@ xfs_rtbuf_get(
ASSERT(map.br_startblock != NULLFSBLOCK);
error = xfs_trans_read_buf(mp, tp, mp->m_ddev_targp,
XFS_FSB_TO_DADDR(mp, map.br_startblock),
- mp->m_bsize, 0, &bp, NULL);
+ mp->m_bsize, 0, &bp, &xfs_rtbuf_ops);
if (error)
return error;
+
+ xfs_trans_buf_set_type(tp, bp, issum ? XFS_BLFT_RTSUMMARY_BUF
+ : XFS_BLFT_RTBITMAP_BUF);
*bpp = bp;
return 0;
}
@@ -983,7 +1011,7 @@ xfs_rtfree_extent(
mp->m_sb.sb_rextents) {
if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM))
mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
- *(__uint64_t *)&mp->m_rbmip->i_d.di_atime = 0;
+ *(__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime = 0;
xfs_trans_log_inode(tp, mp->m_rbmip, XFS_ILOG_CORE);
}
return 0;
diff --git a/fs/xfs/libxfs/xfs_sb.h b/fs/xfs/libxfs/xfs_sb.h
index b25bb9a34..961e6475a 100644
--- a/fs/xfs/libxfs/xfs_sb.h
+++ b/fs/xfs/libxfs/xfs_sb.h
@@ -27,7 +27,6 @@ extern struct xfs_perag *xfs_perag_get_tag(struct xfs_mount *, xfs_agnumber_t,
extern void xfs_perag_put(struct xfs_perag *pag);
extern int xfs_initialize_perag_data(struct xfs_mount *, xfs_agnumber_t);
-extern void xfs_sb_calc_crc(struct xfs_buf *bp);
extern void xfs_log_sb(struct xfs_trans *tp);
extern int xfs_sync_sb(struct xfs_mount *mp, bool wait);
extern void xfs_sb_mount_common(struct xfs_mount *mp, struct xfs_sb *sbp);
diff --git a/fs/xfs/libxfs/xfs_shared.h b/fs/xfs/libxfs/xfs_shared.h
index 15c3ceb84..81ac87083 100644
--- a/fs/xfs/libxfs/xfs_shared.h
+++ b/fs/xfs/libxfs/xfs_shared.h
@@ -53,6 +53,7 @@ extern const struct xfs_buf_ops xfs_dquot_buf_ra_ops;
extern const struct xfs_buf_ops xfs_sb_buf_ops;
extern const struct xfs_buf_ops xfs_sb_quiet_buf_ops;
extern const struct xfs_buf_ops xfs_symlink_buf_ops;
+extern const struct xfs_buf_ops xfs_rtbuf_ops;
/*
* Transaction types. Used to distinguish types of buffers. These never reach
diff --git a/fs/xfs/xfs_aops.c b/fs/xfs/xfs_aops.c
index a9ebabfe7..e49b2406d 100644
--- a/fs/xfs/xfs_aops.c
+++ b/fs/xfs/xfs_aops.c
@@ -36,6 +36,21 @@
#include <linux/pagevec.h>
#include <linux/writeback.h>
+/* flags for direct write completions */
+#define XFS_DIO_FLAG_UNWRITTEN (1 << 0)
+#define XFS_DIO_FLAG_APPEND (1 << 1)
+
+/*
+ * structure owned by writepages passed to individual writepage calls
+ */
+struct xfs_writepage_ctx {
+ struct xfs_bmbt_irec imap;
+ bool imap_valid;
+ unsigned int io_type;
+ struct xfs_ioend *ioend;
+ sector_t last_block;
+};
+
void
xfs_count_page_state(
struct page *page,
@@ -214,10 +229,12 @@ xfs_end_io(
struct xfs_inode *ip = XFS_I(ioend->io_inode);
int error = 0;
- if (XFS_FORCED_SHUTDOWN(ip->i_mount)) {
+ /*
+ * Set an error if the mount has shut down and proceed with end I/O
+ * processing so it can perform whatever cleanups are necessary.
+ */
+ if (XFS_FORCED_SHUTDOWN(ip->i_mount))
ioend->io_error = -EIO;
- goto done;
- }
/*
* For unwritten extents we need to issue transactions to convert a
@@ -265,7 +282,7 @@ xfs_alloc_ioend(
*/
atomic_set(&ioend->io_remaining, 1);
ioend->io_error = 0;
- ioend->io_list = NULL;
+ INIT_LIST_HEAD(&ioend->io_list);
ioend->io_type = type;
ioend->io_inode = inode;
ioend->io_buffer_head = NULL;
@@ -283,8 +300,7 @@ xfs_map_blocks(
struct inode *inode,
loff_t offset,
struct xfs_bmbt_irec *imap,
- int type,
- int nonblocking)
+ int type)
{
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
@@ -300,12 +316,7 @@ xfs_map_blocks(
if (type == XFS_IO_UNWRITTEN)
bmapi_flags |= XFS_BMAPI_IGSTATE;
- if (!xfs_ilock_nowait(ip, XFS_ILOCK_SHARED)) {
- if (nonblocking)
- return -EAGAIN;
- xfs_ilock(ip, XFS_ILOCK_SHARED);
- }
-
+ xfs_ilock(ip, XFS_ILOCK_SHARED);
ASSERT(ip->i_d.di_format != XFS_DINODE_FMT_BTREE ||
(ip->i_df.if_flags & XFS_IFEXTENTS));
ASSERT(offset <= mp->m_super->s_maxbytes);
@@ -341,7 +352,7 @@ xfs_map_blocks(
return 0;
}
-STATIC int
+STATIC bool
xfs_imap_valid(
struct inode *inode,
struct xfs_bmbt_irec *imap,
@@ -414,8 +425,7 @@ xfs_start_buffer_writeback(
STATIC void
xfs_start_page_writeback(
struct page *page,
- int clear_dirty,
- int buffers)
+ int clear_dirty)
{
ASSERT(PageLocked(page));
ASSERT(!PageWriteback(page));
@@ -434,10 +444,6 @@ xfs_start_page_writeback(
set_page_writeback_keepwrite(page);
unlock_page(page);
-
- /* If no buffers on the page are to be written, finish it here */
- if (!buffers)
- end_page_writeback(page);
}
static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
@@ -446,153 +452,101 @@ static inline int xfs_bio_add_buffer(struct bio *bio, struct buffer_head *bh)
}
/*
- * Submit all of the bios for all of the ioends we have saved up, covering the
- * initial writepage page and also any probed pages.
- *
- * Because we may have multiple ioends spanning a page, we need to start
- * writeback on all the buffers before we submit them for I/O. If we mark the
- * buffers as we got, then we can end up with a page that only has buffers
- * marked async write and I/O complete on can occur before we mark the other
- * buffers async write.
- *
- * The end result of this is that we trip a bug in end_page_writeback() because
- * we call it twice for the one page as the code in end_buffer_async_write()
- * assumes that all buffers on the page are started at the same time.
- *
- * The fix is two passes across the ioend list - one to start writeback on the
- * buffer_heads, and then submit them for I/O on the second pass.
+ * Submit all of the bios for an ioend. We are only passed a single ioend at a
+ * time; the caller is responsible for chaining prior to submission.
*
* If @fail is non-zero, it means that we have a situation where some part of
* the submission process has failed after we have marked paged for writeback
* and unlocked them. In this situation, we need to fail the ioend chain rather
* than submit it to IO. This typically only happens on a filesystem shutdown.
*/
-STATIC void
+STATIC int
xfs_submit_ioend(
struct writeback_control *wbc,
xfs_ioend_t *ioend,
- int fail)
+ int status)
{
- xfs_ioend_t *head = ioend;
- xfs_ioend_t *next;
struct buffer_head *bh;
struct bio *bio;
sector_t lastblock = 0;
- /* Pass 1 - start writeback */
- do {
- next = ioend->io_list;
- for (bh = ioend->io_buffer_head; bh; bh = bh->b_private)
- xfs_start_buffer_writeback(bh);
- } while ((ioend = next) != NULL);
+ /* Reserve log space if we might write beyond the on-disk inode size. */
+ if (!status &&
+ ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
+ status = xfs_setfilesize_trans_alloc(ioend);
+ /*
+ * If we are failing the IO now, just mark the ioend with an
+ * error and finish it. This will run IO completion immediately
+ * as there is only one reference to the ioend at this point in
+ * time.
+ */
+ if (status) {
+ ioend->io_error = status;
+ xfs_finish_ioend(ioend);
+ return status;
+ }
- /* Pass 2 - submit I/O */
- ioend = head;
- do {
- next = ioend->io_list;
- bio = NULL;
+ bio = NULL;
+ for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
- /*
- * If we are failing the IO now, just mark the ioend with an
- * error and finish it. This will run IO completion immediately
- * as there is only one reference to the ioend at this point in
- * time.
- */
- if (fail) {
- ioend->io_error = fail;
- xfs_finish_ioend(ioend);
- continue;
+ if (!bio) {
+retry:
+ bio = xfs_alloc_ioend_bio(bh);
+ } else if (bh->b_blocknr != lastblock + 1) {
+ xfs_submit_ioend_bio(wbc, ioend, bio);
+ goto retry;
}
- for (bh = ioend->io_buffer_head; bh; bh = bh->b_private) {
-
- if (!bio) {
- retry:
- bio = xfs_alloc_ioend_bio(bh);
- } else if (bh->b_blocknr != lastblock + 1) {
- xfs_submit_ioend_bio(wbc, ioend, bio);
- goto retry;
- }
-
- if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
- xfs_submit_ioend_bio(wbc, ioend, bio);
- goto retry;
- }
-
- lastblock = bh->b_blocknr;
- }
- if (bio)
+ if (xfs_bio_add_buffer(bio, bh) != bh->b_size) {
xfs_submit_ioend_bio(wbc, ioend, bio);
- xfs_finish_ioend(ioend);
- } while ((ioend = next) != NULL);
-}
-
-/*
- * Cancel submission of all buffer_heads so far in this endio.
- * Toss the endio too. Only ever called for the initial page
- * in a writepage request, so only ever one page.
- */
-STATIC void
-xfs_cancel_ioend(
- xfs_ioend_t *ioend)
-{
- xfs_ioend_t *next;
- struct buffer_head *bh, *next_bh;
-
- do {
- next = ioend->io_list;
- bh = ioend->io_buffer_head;
- do {
- next_bh = bh->b_private;
- clear_buffer_async_write(bh);
- /*
- * The unwritten flag is cleared when added to the
- * ioend. We're not submitting for I/O so mark the
- * buffer unwritten again for next time around.
- */
- if (ioend->io_type == XFS_IO_UNWRITTEN)
- set_buffer_unwritten(bh);
- unlock_buffer(bh);
- } while ((bh = next_bh) != NULL);
+ goto retry;
+ }
- mempool_free(ioend, xfs_ioend_pool);
- } while ((ioend = next) != NULL);
+ lastblock = bh->b_blocknr;
+ }
+ if (bio)
+ xfs_submit_ioend_bio(wbc, ioend, bio);
+ xfs_finish_ioend(ioend);
+ return 0;
}
/*
* Test to see if we've been building up a completion structure for
* earlier buffers -- if so, we try to append to this ioend if we
* can, otherwise we finish off any current ioend and start another.
- * Return true if we've finished the given ioend.
+ * Return the ioend we finished off so that the caller can submit it
+ * once it has finished processing the dirty page.
*/
STATIC void
xfs_add_to_ioend(
struct inode *inode,
struct buffer_head *bh,
xfs_off_t offset,
- unsigned int type,
- xfs_ioend_t **result,
- int need_ioend)
+ struct xfs_writepage_ctx *wpc,
+ struct list_head *iolist)
{
- xfs_ioend_t *ioend = *result;
-
- if (!ioend || need_ioend || type != ioend->io_type) {
- xfs_ioend_t *previous = *result;
-
- ioend = xfs_alloc_ioend(inode, type);
- ioend->io_offset = offset;
- ioend->io_buffer_head = bh;
- ioend->io_buffer_tail = bh;
- if (previous)
- previous->io_list = ioend;
- *result = ioend;
+ if (!wpc->ioend || wpc->io_type != wpc->ioend->io_type ||
+ bh->b_blocknr != wpc->last_block + 1 ||
+ offset != wpc->ioend->io_offset + wpc->ioend->io_size) {
+ struct xfs_ioend *new;
+
+ if (wpc->ioend)
+ list_add(&wpc->ioend->io_list, iolist);
+
+ new = xfs_alloc_ioend(inode, wpc->io_type);
+ new->io_offset = offset;
+ new->io_buffer_head = bh;
+ new->io_buffer_tail = bh;
+ wpc->ioend = new;
} else {
- ioend->io_buffer_tail->b_private = bh;
- ioend->io_buffer_tail = bh;
+ wpc->ioend->io_buffer_tail->b_private = bh;
+ wpc->ioend->io_buffer_tail = bh;
}
bh->b_private = NULL;
- ioend->io_size += bh->b_size;
+ wpc->ioend->io_size += bh->b_size;
+ wpc->last_block = bh->b_blocknr;
+ xfs_start_buffer_writeback(bh);
}
STATIC void
@@ -678,183 +632,6 @@ xfs_check_page_type(
return false;
}
-/*
- * Allocate & map buffers for page given the extent map. Write it out.
- * except for the original page of a writepage, this is called on
- * delalloc/unwritten pages only, for the original page it is possible
- * that the page has no mapping at all.
- */
-STATIC int
-xfs_convert_page(
- struct inode *inode,
- struct page *page,
- loff_t tindex,
- struct xfs_bmbt_irec *imap,
- xfs_ioend_t **ioendp,
- struct writeback_control *wbc)
-{
- struct buffer_head *bh, *head;
- xfs_off_t end_offset;
- unsigned long p_offset;
- unsigned int type;
- int len, page_dirty;
- int count = 0, done = 0, uptodate = 1;
- xfs_off_t offset = page_offset(page);
-
- if (page->index != tindex)
- goto fail;
- if (!trylock_page(page))
- goto fail;
- if (PageWriteback(page))
- goto fail_unlock_page;
- if (page->mapping != inode->i_mapping)
- goto fail_unlock_page;
- if (!xfs_check_page_type(page, (*ioendp)->io_type, false))
- goto fail_unlock_page;
-
- /*
- * page_dirty is initially a count of buffers on the page before
- * EOF and is decremented as we move each into a cleanable state.
- *
- * Derivation:
- *
- * End offset is the highest offset that this page should represent.
- * If we are on the last page, (end_offset & (PAGE_CACHE_SIZE - 1))
- * will evaluate non-zero and be less than PAGE_CACHE_SIZE and
- * hence give us the correct page_dirty count. On any other page,
- * it will be zero and in that case we need page_dirty to be the
- * count of buffers on the page.
- */
- end_offset = min_t(unsigned long long,
- (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT,
- i_size_read(inode));
-
- /*
- * If the current map does not span the entire page we are about to try
- * to write, then give up. The only way we can write a page that spans
- * multiple mappings in a single writeback iteration is via the
- * xfs_vm_writepage() function. Data integrity writeback requires the
- * entire page to be written in a single attempt, otherwise the part of
- * the page we don't write here doesn't get written as part of the data
- * integrity sync.
- *
- * For normal writeback, we also don't attempt to write partial pages
- * here as it simply means that write_cache_pages() will see it under
- * writeback and ignore the page until some point in the future, at
- * which time this will be the only page in the file that needs
- * writeback. Hence for more optimal IO patterns, we should always
- * avoid partial page writeback due to multiple mappings on a page here.
- */
- if (!xfs_imap_valid(inode, imap, end_offset))
- goto fail_unlock_page;
-
- len = 1 << inode->i_blkbits;
- p_offset = min_t(unsigned long, end_offset & (PAGE_CACHE_SIZE - 1),
- PAGE_CACHE_SIZE);
- p_offset = p_offset ? roundup(p_offset, len) : PAGE_CACHE_SIZE;
- page_dirty = p_offset / len;
-
- /*
- * The moment we find a buffer that doesn't match our current type
- * specification or can't be written, abort the loop and start
- * writeback. As per the above xfs_imap_valid() check, only
- * xfs_vm_writepage() can handle partial page writeback fully - we are
- * limited here to the buffers that are contiguous with the current
- * ioend, and hence a buffer we can't write breaks that contiguity and
- * we have to defer the rest of the IO to xfs_vm_writepage().
- */
- bh = head = page_buffers(page);
- do {
- if (offset >= end_offset)
- break;
- if (!buffer_uptodate(bh))
- uptodate = 0;
- if (!(PageUptodate(page) || buffer_uptodate(bh))) {
- done = 1;
- break;
- }
-
- if (buffer_unwritten(bh) || buffer_delay(bh) ||
- buffer_mapped(bh)) {
- if (buffer_unwritten(bh))
- type = XFS_IO_UNWRITTEN;
- else if (buffer_delay(bh))
- type = XFS_IO_DELALLOC;
- else
- type = XFS_IO_OVERWRITE;
-
- /*
- * imap should always be valid because of the above
- * partial page end_offset check on the imap.
- */
- ASSERT(xfs_imap_valid(inode, imap, offset));
-
- lock_buffer(bh);
- if (type != XFS_IO_OVERWRITE)
- xfs_map_at_offset(inode, bh, imap, offset);
- xfs_add_to_ioend(inode, bh, offset, type,
- ioendp, done);
-
- page_dirty--;
- count++;
- } else {
- done = 1;
- break;
- }
- } while (offset += len, (bh = bh->b_this_page) != head);
-
- if (uptodate && bh == head)
- SetPageUptodate(page);
-
- if (count) {
- if (--wbc->nr_to_write <= 0 &&
- wbc->sync_mode == WB_SYNC_NONE)
- done = 1;
- }
- xfs_start_page_writeback(page, !page_dirty, count);
-
- return done;
- fail_unlock_page:
- unlock_page(page);
- fail:
- return 1;
-}
-
-/*
- * Convert & write out a cluster of pages in the same extent as defined
- * by mp and following the start page.
- */
-STATIC void
-xfs_cluster_write(
- struct inode *inode,
- pgoff_t tindex,
- struct xfs_bmbt_irec *imap,
- xfs_ioend_t **ioendp,
- struct writeback_control *wbc,
- pgoff_t tlast)
-{
- struct pagevec pvec;
- int done = 0, i;
-
- pagevec_init(&pvec, 0);
- while (!done && tindex <= tlast) {
- unsigned len = min_t(pgoff_t, PAGEVEC_SIZE, tlast - tindex + 1);
-
- if (!pagevec_lookup(&pvec, inode->i_mapping, tindex, len))
- break;
-
- for (i = 0; i < pagevec_count(&pvec); i++) {
- done = xfs_convert_page(inode, pvec.pages[i], tindex++,
- imap, ioendp, wbc);
- if (done)
- break;
- }
-
- pagevec_release(&pvec);
- cond_resched();
- }
-}
-
STATIC void
xfs_vm_invalidatepage(
struct page *page,
@@ -927,11 +704,169 @@ next_buffer:
xfs_iunlock(ip, XFS_ILOCK_EXCL);
out_invalidate:
- xfs_vm_invalidatepage(page, 0, PAGE_CACHE_SIZE);
+ xfs_vm_invalidatepage(page, 0, PAGE_SIZE);
return;
}
/*
+ * We implement an immediate ioend submission policy here to avoid needing to
+ * chain multiple ioends and hence nest mempool allocations which can violate
+ * forward progress guarantees we need to provide. The current ioend we are
+ * adding buffers to is cached on the writepage context, and if the new buffer
+ * does not append to the cached ioend it will create a new ioend and cache that
+ * instead.
+ *
+ * If a new ioend is created and cached, the old ioend is returned and queued
+ * locally for submission once the entire page is processed or an error has been
+ * detected. While ioends are submitted immediately after they are completed,
+ * batching optimisations are provided by higher level block plugging.
+ *
+ * At the end of a writeback pass, there will be a cached ioend remaining on the
+ * writepage context that the caller will need to submit.
+ */
+static int
+xfs_writepage_map(
+ struct xfs_writepage_ctx *wpc,
+ struct writeback_control *wbc,
+ struct inode *inode,
+ struct page *page,
+ loff_t offset,
+ __uint64_t end_offset)
+{
+ LIST_HEAD(submit_list);
+ struct xfs_ioend *ioend, *next;
+ struct buffer_head *bh, *head;
+ ssize_t len = 1 << inode->i_blkbits;
+ int error = 0;
+ int count = 0;
+ int uptodate = 1;
+
+ bh = head = page_buffers(page);
+ offset = page_offset(page);
+ do {
+ if (offset >= end_offset)
+ break;
+ if (!buffer_uptodate(bh))
+ uptodate = 0;
+
+ /*
+ * set_page_dirty dirties all buffers in a page, independent
+ * of their state. The dirty state however is entirely
+ * meaningless for holes (!mapped && uptodate), so skip
+ * buffers covering holes here.
+ */
+ if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
+ wpc->imap_valid = false;
+ continue;
+ }
+
+ if (buffer_unwritten(bh)) {
+ if (wpc->io_type != XFS_IO_UNWRITTEN) {
+ wpc->io_type = XFS_IO_UNWRITTEN;
+ wpc->imap_valid = false;
+ }
+ } else if (buffer_delay(bh)) {
+ if (wpc->io_type != XFS_IO_DELALLOC) {
+ wpc->io_type = XFS_IO_DELALLOC;
+ wpc->imap_valid = false;
+ }
+ } else if (buffer_uptodate(bh)) {
+ if (wpc->io_type != XFS_IO_OVERWRITE) {
+ wpc->io_type = XFS_IO_OVERWRITE;
+ wpc->imap_valid = false;
+ }
+ } else {
+ if (PageUptodate(page))
+ ASSERT(buffer_mapped(bh));
+ /*
+ * This buffer is not uptodate and will not be
+ * written to disk. Ensure that we will put any
+ * subsequent writeable buffers into a new
+ * ioend.
+ */
+ wpc->imap_valid = false;
+ continue;
+ }
+
+ if (wpc->imap_valid)
+ wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+ offset);
+ if (!wpc->imap_valid) {
+ error = xfs_map_blocks(inode, offset, &wpc->imap,
+ wpc->io_type);
+ if (error)
+ goto out;
+ wpc->imap_valid = xfs_imap_valid(inode, &wpc->imap,
+ offset);
+ }
+ if (wpc->imap_valid) {
+ lock_buffer(bh);
+ if (wpc->io_type != XFS_IO_OVERWRITE)
+ xfs_map_at_offset(inode, bh, &wpc->imap, offset);
+ xfs_add_to_ioend(inode, bh, offset, wpc, &submit_list);
+ count++;
+ }
+
+ } while (offset += len, ((bh = bh->b_this_page) != head));
+
+ if (uptodate && bh == head)
+ SetPageUptodate(page);
+
+ ASSERT(wpc->ioend || list_empty(&submit_list));
+
+out:
+ /*
+ * On error, we have to fail the ioend here because we have locked
+ * buffers in the ioend. If we don't do this, we'll deadlock
+ * invalidating the page as that tries to lock the buffers on the page.
+ * Also, because we may have set pages under writeback, we have to make
+ * sure we run IO completion to mark the error state of the IO
+ * appropriately, so we can't cancel the ioend directly here. That means
+ * we have to mark this page as under writeback if we included any
+ * buffers from it in the ioend chain so that completion treats it
+ * correctly.
+ *
+ * If we didn't include the page in the ioend, the on error we can
+ * simply discard and unlock it as there are no other users of the page
+ * or it's buffers right now. The caller will still need to trigger
+ * submission of outstanding ioends on the writepage context so they are
+ * treated correctly on error.
+ */
+ if (count) {
+ xfs_start_page_writeback(page, !error);
+
+ /*
+ * Preserve the original error if there was one, otherwise catch
+ * submission errors here and propagate into subsequent ioend
+ * submissions.
+ */
+ list_for_each_entry_safe(ioend, next, &submit_list, io_list) {
+ int error2;
+
+ list_del_init(&ioend->io_list);
+ error2 = xfs_submit_ioend(wbc, ioend, error);
+ if (error2 && !error)
+ error = error2;
+ }
+ } else if (error) {
+ xfs_aops_discard_page(page);
+ ClearPageUptodate(page);
+ unlock_page(page);
+ } else {
+ /*
+ * We can end up here with no error and nothing to write if we
+ * race with a partial page truncate on a sub-page block sized
+ * filesystem. In that case we need to mark the page clean.
+ */
+ xfs_start_page_writeback(page, 1);
+ end_page_writeback(page);
+ }
+
+ mapping_set_error(page->mapping, error);
+ return error;
+}
+
+/*
* Write out a dirty page.
*
* For delalloc space on the page we need to allocate space and flush it.
@@ -940,22 +875,16 @@ out_invalidate:
* For any other dirty buffer heads on the page we should flush them.
*/
STATIC int
-xfs_vm_writepage(
+xfs_do_writepage(
struct page *page,
- struct writeback_control *wbc)
+ struct writeback_control *wbc,
+ void *data)
{
+ struct xfs_writepage_ctx *wpc = data;
struct inode *inode = page->mapping->host;
- struct buffer_head *bh, *head;
- struct xfs_bmbt_irec imap;
- xfs_ioend_t *ioend = NULL, *iohead = NULL;
loff_t offset;
- unsigned int type;
__uint64_t end_offset;
- pgoff_t end_index, last_index;
- ssize_t len;
- int err, imap_valid = 0, uptodate = 1;
- int count = 0;
- int nonblocking = 0;
+ pgoff_t end_index;
trace_xfs_writepage(inode, page, 0, 0);
@@ -982,12 +911,9 @@ xfs_vm_writepage(
if (WARN_ON_ONCE(current->flags & PF_FSTRANS))
goto redirty;
- /* Is this page beyond the end of the file? */
- offset = i_size_read(inode);
- end_index = offset >> PAGE_CACHE_SHIFT;
- last_index = (offset - 1) >> PAGE_CACHE_SHIFT;
-
/*
+ * Is this page beyond the end of the file?
+ *
* The page index is less than the end_index, adjust the end_offset
* to the highest offset that this page should represent.
* -----------------------------------------------------
@@ -998,8 +924,10 @@ xfs_vm_writepage(
* | desired writeback range | see else |
* ---------------------------------^------------------|
*/
+ offset = i_size_read(inode);
+ end_index = offset >> PAGE_SHIFT;
if (page->index < end_index)
- end_offset = (xfs_off_t)(page->index + 1) << PAGE_CACHE_SHIFT;
+ end_offset = (xfs_off_t)(page->index + 1) << PAGE_SHIFT;
else {
/*
* Check whether the page to write out is beyond or straddles
@@ -1012,7 +940,7 @@ xfs_vm_writepage(
* | | Straddles |
* ---------------------------------^-----------|--------|
*/
- unsigned offset_into_page = offset & (PAGE_CACHE_SIZE - 1);
+ unsigned offset_into_page = offset & (PAGE_SIZE - 1);
/*
* Skip the page if it is fully outside i_size, e.g. due to a
@@ -1043,158 +971,13 @@ xfs_vm_writepage(
* memory is zeroed when mapped, and writes to that region are
* not written out to the file."
*/
- zero_user_segment(page, offset_into_page, PAGE_CACHE_SIZE);
+ zero_user_segment(page, offset_into_page, PAGE_SIZE);
/* Adjust the end_offset to the end of file */
end_offset = offset;
}
- len = 1 << inode->i_blkbits;
-
- bh = head = page_buffers(page);
- offset = page_offset(page);
- type = XFS_IO_OVERWRITE;
-
- if (wbc->sync_mode == WB_SYNC_NONE)
- nonblocking = 1;
-
- do {
- int new_ioend = 0;
-
- if (offset >= end_offset)
- break;
- if (!buffer_uptodate(bh))
- uptodate = 0;
-
- /*
- * set_page_dirty dirties all buffers in a page, independent
- * of their state. The dirty state however is entirely
- * meaningless for holes (!mapped && uptodate), so skip
- * buffers covering holes here.
- */
- if (!buffer_mapped(bh) && buffer_uptodate(bh)) {
- imap_valid = 0;
- continue;
- }
-
- if (buffer_unwritten(bh)) {
- if (type != XFS_IO_UNWRITTEN) {
- type = XFS_IO_UNWRITTEN;
- imap_valid = 0;
- }
- } else if (buffer_delay(bh)) {
- if (type != XFS_IO_DELALLOC) {
- type = XFS_IO_DELALLOC;
- imap_valid = 0;
- }
- } else if (buffer_uptodate(bh)) {
- if (type != XFS_IO_OVERWRITE) {
- type = XFS_IO_OVERWRITE;
- imap_valid = 0;
- }
- } else {
- if (PageUptodate(page))
- ASSERT(buffer_mapped(bh));
- /*
- * This buffer is not uptodate and will not be
- * written to disk. Ensure that we will put any
- * subsequent writeable buffers into a new
- * ioend.
- */
- imap_valid = 0;
- continue;
- }
-
- if (imap_valid)
- imap_valid = xfs_imap_valid(inode, &imap, offset);
- if (!imap_valid) {
- /*
- * If we didn't have a valid mapping then we need to
- * put the new mapping into a separate ioend structure.
- * This ensures non-contiguous extents always have
- * separate ioends, which is particularly important
- * for unwritten extent conversion at I/O completion
- * time.
- */
- new_ioend = 1;
- err = xfs_map_blocks(inode, offset, &imap, type,
- nonblocking);
- if (err)
- goto error;
- imap_valid = xfs_imap_valid(inode, &imap, offset);
- }
- if (imap_valid) {
- lock_buffer(bh);
- if (type != XFS_IO_OVERWRITE)
- xfs_map_at_offset(inode, bh, &imap, offset);
- xfs_add_to_ioend(inode, bh, offset, type, &ioend,
- new_ioend);
- count++;
- }
-
- if (!iohead)
- iohead = ioend;
-
- } while (offset += len, ((bh = bh->b_this_page) != head));
-
- if (uptodate && bh == head)
- SetPageUptodate(page);
-
- xfs_start_page_writeback(page, 1, count);
-
- /* if there is no IO to be submitted for this page, we are done */
- if (!ioend)
- return 0;
-
- ASSERT(iohead);
-
- /*
- * Any errors from this point onwards need tobe reported through the IO
- * completion path as we have marked the initial page as under writeback
- * and unlocked it.
- */
- if (imap_valid) {
- xfs_off_t end_index;
-
- end_index = imap.br_startoff + imap.br_blockcount;
-
- /* to bytes */
- end_index <<= inode->i_blkbits;
-
- /* to pages */
- end_index = (end_index - 1) >> PAGE_CACHE_SHIFT;
-
- /* check against file size */
- if (end_index > last_index)
- end_index = last_index;
-
- xfs_cluster_write(inode, page->index + 1, &imap, &ioend,
- wbc, end_index);
- }
-
-
- /*
- * Reserve log space if we might write beyond the on-disk inode size.
- */
- err = 0;
- if (ioend->io_type != XFS_IO_UNWRITTEN && xfs_ioend_is_append(ioend))
- err = xfs_setfilesize_trans_alloc(ioend);
-
- xfs_submit_ioend(wbc, iohead, err);
-
- return 0;
-
-error:
- if (iohead)
- xfs_cancel_ioend(iohead);
-
- if (err == -EAGAIN)
- goto redirty;
-
- xfs_aops_discard_page(page);
- ClearPageUptodate(page);
- unlock_page(page);
- return err;
+ return xfs_writepage_map(wpc, wbc, inode, page, offset, end_offset);
redirty:
redirty_page_for_writepage(wbc, page);
@@ -1203,16 +986,40 @@ redirty:
}
STATIC int
+xfs_vm_writepage(
+ struct page *page,
+ struct writeback_control *wbc)
+{
+ struct xfs_writepage_ctx wpc = {
+ .io_type = XFS_IO_INVALID,
+ };
+ int ret;
+
+ ret = xfs_do_writepage(page, wbc, &wpc);
+ if (wpc.ioend)
+ ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
+ return ret;
+}
+
+STATIC int
xfs_vm_writepages(
struct address_space *mapping,
struct writeback_control *wbc)
{
+ struct xfs_writepage_ctx wpc = {
+ .io_type = XFS_IO_INVALID,
+ };
+ int ret;
+
xfs_iflags_clear(XFS_I(mapping->host), XFS_ITRUNCATED);
if (dax_mapping(mapping))
return dax_writeback_mapping_range(mapping,
xfs_find_bdev_for_inode(mapping->host), wbc);
- return generic_writepages(mapping, wbc);
+ ret = write_cache_pages(mapping, wbc, xfs_do_writepage, &wpc);
+ if (wpc.ioend)
+ ret = xfs_submit_ioend(wbc, wpc.ioend, ret);
+ return ret;
}
/*
@@ -1242,27 +1049,8 @@ xfs_vm_releasepage(
}
/*
- * When we map a DIO buffer, we may need to attach an ioend that describes the
- * type of write IO we are doing. This passes to the completion function the
- * operations it needs to perform. If the mapping is for an overwrite wholly
- * within the EOF then we don't need an ioend and so we don't allocate one.
- * This avoids the unnecessary overhead of allocating and freeing ioends for
- * workloads that don't require transactions on IO completion.
- *
- * If we get multiple mappings in a single IO, we might be mapping different
- * types. But because the direct IO can only have a single private pointer, we
- * need to ensure that:
- *
- * a) i) the ioend spans the entire region of unwritten mappings; or
- * ii) the ioend spans all the mappings that cross or are beyond EOF; and
- * b) if it contains unwritten extents, it is *permanently* marked as such
- *
- * We could do this by chaining ioends like buffered IO does, but we only
- * actually get one IO completion callback from the direct IO, and that spans
- * the entire IO regardless of how many mappings and IOs are needed to complete
- * the DIO. There is only going to be one reference to the ioend and its life
- * cycle is constrained by the DIO completion code. hence we don't need
- * reference counting here.
+ * When we map a DIO buffer, we may need to pass flags to
+ * xfs_end_io_direct_write to tell it what kind of write IO we are doing.
*
* Note that for DIO, an IO to the highest supported file block offset (i.e.
* 2^63 - 1FSB bytes) will result in the offset + count overflowing a signed 64
@@ -1270,68 +1058,26 @@ xfs_vm_releasepage(
* extending the file size. We won't know for sure until IO completion is run
* and the actual max write offset is communicated to the IO completion
* routine.
- *
- * For DAX page faults, we are preparing to never see unwritten extents here,
- * nor should we ever extend the inode size. Hence we will soon have nothing to
- * do here for this case, ensuring we don't have to provide an IO completion
- * callback to free an ioend that we don't actually need for a fault into the
- * page at offset (2^63 - 1FSB) bytes.
*/
-
static void
xfs_map_direct(
struct inode *inode,
struct buffer_head *bh_result,
struct xfs_bmbt_irec *imap,
- xfs_off_t offset,
- bool dax_fault)
+ xfs_off_t offset)
{
- struct xfs_ioend *ioend;
+ uintptr_t *flags = (uintptr_t *)&bh_result->b_private;
xfs_off_t size = bh_result->b_size;
- int type;
- if (ISUNWRITTEN(imap))
- type = XFS_IO_UNWRITTEN;
- else
- type = XFS_IO_OVERWRITE;
-
- trace_xfs_gbmap_direct(XFS_I(inode), offset, size, type, imap);
-
- if (dax_fault) {
- ASSERT(type == XFS_IO_OVERWRITE);
- trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
- imap);
- return;
- }
+ trace_xfs_get_blocks_map_direct(XFS_I(inode), offset, size,
+ ISUNWRITTEN(imap) ? XFS_IO_UNWRITTEN : XFS_IO_OVERWRITE, imap);
- if (bh_result->b_private) {
- ioend = bh_result->b_private;
- ASSERT(ioend->io_size > 0);
- ASSERT(offset >= ioend->io_offset);
- if (offset + size > ioend->io_offset + ioend->io_size)
- ioend->io_size = offset - ioend->io_offset + size;
-
- if (type == XFS_IO_UNWRITTEN && type != ioend->io_type)
- ioend->io_type = XFS_IO_UNWRITTEN;
-
- trace_xfs_gbmap_direct_update(XFS_I(inode), ioend->io_offset,
- ioend->io_size, ioend->io_type,
- imap);
- } else if (type == XFS_IO_UNWRITTEN ||
- offset + size > i_size_read(inode) ||
- offset + size < 0) {
- ioend = xfs_alloc_ioend(inode, type);
- ioend->io_offset = offset;
- ioend->io_size = size;
-
- bh_result->b_private = ioend;
+ if (ISUNWRITTEN(imap)) {
+ *flags |= XFS_DIO_FLAG_UNWRITTEN;
+ set_buffer_defer_completion(bh_result);
+ } else if (offset + size > i_size_read(inode) || offset + size < 0) {
+ *flags |= XFS_DIO_FLAG_APPEND;
set_buffer_defer_completion(bh_result);
-
- trace_xfs_gbmap_direct_new(XFS_I(inode), offset, size, type,
- imap);
- } else {
- trace_xfs_gbmap_direct_none(XFS_I(inode), offset, size, type,
- imap);
}
}
@@ -1502,9 +1248,12 @@ __xfs_get_blocks(
if (ISUNWRITTEN(&imap))
set_buffer_unwritten(bh_result);
/* direct IO needs special help */
- if (create && direct)
- xfs_map_direct(inode, bh_result, &imap, offset,
- dax_fault);
+ if (create && direct) {
+ if (dax_fault)
+ ASSERT(!ISUNWRITTEN(&imap));
+ else
+ xfs_map_direct(inode, bh_result, &imap, offset);
+ }
}
/*
@@ -1574,42 +1323,50 @@ xfs_get_blocks_dax_fault(
return __xfs_get_blocks(inode, iblock, bh_result, create, true, true);
}
-static void
-__xfs_end_io_direct_write(
- struct inode *inode,
- struct xfs_ioend *ioend,
+/*
+ * Complete a direct I/O write request.
+ *
+ * xfs_map_direct passes us some flags in the private data to tell us what to
+ * do. If no flags are set, then the write IO is an overwrite wholly within
+ * the existing allocated file size and so there is nothing for us to do.
+ *
+ * Note that in this case the completion can be called in interrupt context,
+ * whereas if we have flags set we will always be called in task context
+ * (i.e. from a workqueue).
+ */
+STATIC int
+xfs_end_io_direct_write(
+ struct kiocb *iocb,
loff_t offset,
- ssize_t size)
+ ssize_t size,
+ void *private)
{
- struct xfs_mount *mp = XFS_I(inode)->i_mount;
+ struct inode *inode = file_inode(iocb->ki_filp);
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ uintptr_t flags = (uintptr_t)private;
+ int error = 0;
- if (XFS_FORCED_SHUTDOWN(mp) || ioend->io_error)
- goto out_end_io;
+ trace_xfs_end_io_direct_write(ip, offset, size);
- /*
- * dio completion end_io functions are only called on writes if more
- * than 0 bytes was written.
- */
- ASSERT(size > 0);
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
- /*
- * The ioend only maps whole blocks, while the IO may be sector aligned.
- * Hence the ioend offset/size may not match the IO offset/size exactly.
- * Because we don't map overwrites within EOF into the ioend, the offset
- * may not match, but only if the endio spans EOF. Either way, write
- * the IO sizes into the ioend so that completion processing does the
- * right thing.
- */
- ASSERT(offset + size <= ioend->io_offset + ioend->io_size);
- ioend->io_size = size;
- ioend->io_offset = offset;
+ if (size <= 0)
+ return size;
/*
- * The ioend tells us whether we are doing unwritten extent conversion
+ * The flags tell us whether we are doing unwritten extent conversions
* or an append transaction that updates the on-disk file size. These
* cases are the only cases where we should *potentially* be needing
* to update the VFS inode size.
- *
+ */
+ if (flags == 0) {
+ ASSERT(offset + size <= i_size_read(inode));
+ return 0;
+ }
+
+ /*
* We need to update the in-core inode size here so that we don't end up
* with the on-disk inode size being outside the in-core inode size. We
* have no other method of updating EOF for AIO, so always do it here
@@ -1620,91 +1377,56 @@ __xfs_end_io_direct_write(
* here can result in EOF moving backwards and Bad Things Happen when
* that occurs.
*/
- spin_lock(&XFS_I(inode)->i_flags_lock);
+ spin_lock(&ip->i_flags_lock);
if (offset + size > i_size_read(inode))
i_size_write(inode, offset + size);
- spin_unlock(&XFS_I(inode)->i_flags_lock);
+ spin_unlock(&ip->i_flags_lock);
- /*
- * If we are doing an append IO that needs to update the EOF on disk,
- * do the transaction reserve now so we can use common end io
- * processing. Stashing the error (if there is one) in the ioend will
- * result in the ioend processing passing on the error if it is
- * possible as we can't return it from here.
- */
- if (ioend->io_type == XFS_IO_OVERWRITE)
- ioend->io_error = xfs_setfilesize_trans_alloc(ioend);
+ if (flags & XFS_DIO_FLAG_UNWRITTEN) {
+ trace_xfs_end_io_direct_write_unwritten(ip, offset, size);
-out_end_io:
- xfs_end_io(&ioend->io_work);
- return;
-}
+ error = xfs_iomap_write_unwritten(ip, offset, size);
+ } else if (flags & XFS_DIO_FLAG_APPEND) {
+ struct xfs_trans *tp;
-/*
- * Complete a direct I/O write request.
- *
- * The ioend structure is passed from __xfs_get_blocks() to tell us what to do.
- * If no ioend exists (i.e. @private == NULL) then the write IO is an overwrite
- * wholly within the EOF and so there is nothing for us to do. Note that in this
- * case the completion can be called in interrupt context, whereas if we have an
- * ioend we will always be called in task context (i.e. from a workqueue).
- */
-STATIC void
-xfs_end_io_direct_write(
- struct kiocb *iocb,
- loff_t offset,
- ssize_t size,
- void *private)
-{
- struct inode *inode = file_inode(iocb->ki_filp);
- struct xfs_ioend *ioend = private;
-
- trace_xfs_gbmap_direct_endio(XFS_I(inode), offset, size,
- ioend ? ioend->io_type : 0, NULL);
+ trace_xfs_end_io_direct_write_append(ip, offset, size);
- if (!ioend) {
- ASSERT(offset + size <= i_size_read(inode));
- return;
+ tp = xfs_trans_alloc(mp, XFS_TRANS_FSYNC_TS);
+ error = xfs_trans_reserve(tp, &M_RES(mp)->tr_fsyncts, 0, 0);
+ if (error) {
+ xfs_trans_cancel(tp);
+ return error;
+ }
+ error = xfs_setfilesize(ip, tp, offset, size);
}
- __xfs_end_io_direct_write(inode, ioend, offset, size);
+ return error;
}
-static inline ssize_t
-xfs_vm_do_dio(
- struct inode *inode,
+STATIC ssize_t
+xfs_vm_direct_IO(
struct kiocb *iocb,
struct iov_iter *iter,
- loff_t offset,
- void (*endio)(struct kiocb *iocb,
- loff_t offset,
- ssize_t size,
- void *private),
- int flags)
+ loff_t offset)
{
+ struct inode *inode = iocb->ki_filp->f_mapping->host;
+ dio_iodone_t *endio = NULL;
+ int flags = 0;
struct block_device *bdev;
- if (IS_DAX(inode))
+ if (iov_iter_rw(iter) == WRITE) {
+ endio = xfs_end_io_direct_write;
+ flags = DIO_ASYNC_EXTEND;
+ }
+
+ if (IS_DAX(inode)) {
return dax_do_io(iocb, inode, iter, offset,
xfs_get_blocks_direct, endio, 0);
+ }
bdev = xfs_find_bdev_for_inode(inode);
return __blockdev_direct_IO(iocb, inode, bdev, iter, offset,
- xfs_get_blocks_direct, endio, NULL, flags);
-}
-
-STATIC ssize_t
-xfs_vm_direct_IO(
- struct kiocb *iocb,
- struct iov_iter *iter,
- loff_t offset)
-{
- struct inode *inode = iocb->ki_filp->f_mapping->host;
-
- if (iov_iter_rw(iter) == WRITE)
- return xfs_vm_do_dio(inode, iocb, iter, offset,
- xfs_end_io_direct_write, DIO_ASYNC_EXTEND);
- return xfs_vm_do_dio(inode, iocb, iter, offset, NULL, 0);
+ xfs_get_blocks_direct, endio, NULL, flags);
}
/*
@@ -1753,9 +1475,10 @@ xfs_vm_write_failed(
loff_t block_offset;
loff_t block_start;
loff_t block_end;
- loff_t from = pos & (PAGE_CACHE_SIZE - 1);
+ loff_t from = pos & (PAGE_SIZE - 1);
loff_t to = from + len;
struct buffer_head *bh, *head;
+ struct xfs_mount *mp = XFS_I(inode)->i_mount;
/*
* The request pos offset might be 32 or 64 bit, this is all fine
@@ -1768,7 +1491,7 @@ xfs_vm_write_failed(
* start of the page by using shifts rather than masks the mismatch
* problem.
*/
- block_offset = (pos >> PAGE_CACHE_SHIFT) << PAGE_CACHE_SHIFT;
+ block_offset = (pos >> PAGE_SHIFT) << PAGE_SHIFT;
ASSERT(block_offset + from == pos);
@@ -1787,14 +1510,23 @@ xfs_vm_write_failed(
if (block_start >= to)
break;
- if (!buffer_delay(bh))
+ /*
+ * Process delalloc and unwritten buffers beyond EOF. We can
+ * encounter unwritten buffers in the event that a file has
+ * post-EOF unwritten extents and an extending write happens to
+ * fail (e.g., an unaligned write that also involves a delalloc
+ * to the same page).
+ */
+ if (!buffer_delay(bh) && !buffer_unwritten(bh))
continue;
- if (!buffer_new(bh) && block_offset < i_size_read(inode))
+ if (!xfs_mp_fail_writes(mp) && !buffer_new(bh) &&
+ block_offset < i_size_read(inode))
continue;
- xfs_vm_kill_delalloc_range(inode, block_offset,
- block_offset + bh->b_size);
+ if (buffer_delay(bh))
+ xfs_vm_kill_delalloc_range(inode, block_offset,
+ block_offset + bh->b_size);
/*
* This buffer does not contain data anymore. make sure anyone
@@ -1805,6 +1537,7 @@ xfs_vm_write_failed(
clear_buffer_mapped(bh);
clear_buffer_new(bh);
clear_buffer_dirty(bh);
+ clear_buffer_unwritten(bh);
}
}
@@ -1825,17 +1558,20 @@ xfs_vm_write_begin(
struct page **pagep,
void **fsdata)
{
- pgoff_t index = pos >> PAGE_CACHE_SHIFT;
+ pgoff_t index = pos >> PAGE_SHIFT;
struct page *page;
int status;
+ struct xfs_mount *mp = XFS_I(mapping->host)->i_mount;
- ASSERT(len <= PAGE_CACHE_SIZE);
+ ASSERT(len <= PAGE_SIZE);
page = grab_cache_page_write_begin(mapping, index, flags);
if (!page)
return -ENOMEM;
status = __block_write_begin(page, pos, len, xfs_get_blocks);
+ if (xfs_mp_fail_writes(mp))
+ status = -EIO;
if (unlikely(status)) {
struct inode *inode = mapping->host;
size_t isize = i_size_read(inode);
@@ -1848,13 +1584,15 @@ xfs_vm_write_begin(
* allocated in this write, not blocks that were previously
* written successfully.
*/
+ if (xfs_mp_fail_writes(mp))
+ isize = 0;
if (pos + len > isize) {
ssize_t start = max_t(ssize_t, pos, isize);
truncate_pagecache_range(inode, start, pos + len);
}
- page_cache_release(page);
+ put_page(page);
page = NULL;
}
@@ -1882,7 +1620,7 @@ xfs_vm_write_end(
{
int ret;
- ASSERT(len <= PAGE_CACHE_SIZE);
+ ASSERT(len <= PAGE_SIZE);
ret = generic_write_end(file, mapping, pos, len, copied, page, fsdata);
if (unlikely(ret < len)) {
@@ -1957,7 +1695,6 @@ xfs_vm_set_page_dirty(
loff_t end_offset;
loff_t offset;
int newly_dirty;
- struct mem_cgroup *memcg;
if (unlikely(!mapping))
return !TestSetPageDirty(page);
@@ -1978,10 +1715,10 @@ xfs_vm_set_page_dirty(
} while (bh != head);
}
/*
- * Use mem_group_begin_page_stat() to keep PageDirty synchronized with
- * per-memcg dirty page counters.
+ * Lock out page->mem_cgroup migration to keep PageDirty
+ * synchronized with per-memcg dirty page counters.
*/
- memcg = mem_cgroup_begin_page_stat(page);
+ lock_page_memcg(page);
newly_dirty = !TestSetPageDirty(page);
spin_unlock(&mapping->private_lock);
@@ -1992,13 +1729,13 @@ xfs_vm_set_page_dirty(
spin_lock_irqsave(&mapping->tree_lock, flags);
if (page->mapping) { /* Race with truncate? */
WARN_ON_ONCE(!PageUptodate(page));
- account_page_dirtied(page, mapping, memcg);
+ account_page_dirtied(page, mapping);
radix_tree_tag_set(&mapping->page_tree,
page_index(page), PAGECACHE_TAG_DIRTY);
}
spin_unlock_irqrestore(&mapping->tree_lock, flags);
}
- mem_cgroup_end_page_stat(memcg);
+ unlock_page_memcg(page);
if (newly_dirty)
__mark_inode_dirty(mapping->host, I_DIRTY_PAGES);
return newly_dirty;
diff --git a/fs/xfs/xfs_aops.h b/fs/xfs/xfs_aops.h
index a4343c63f..b4421177b 100644
--- a/fs/xfs/xfs_aops.h
+++ b/fs/xfs/xfs_aops.h
@@ -24,12 +24,14 @@ extern mempool_t *xfs_ioend_pool;
* Types of I/O for bmap clustering and I/O completion tracking.
*/
enum {
+ XFS_IO_INVALID, /* initial state */
XFS_IO_DELALLOC, /* covers delalloc region */
XFS_IO_UNWRITTEN, /* covers allocated but uninitialized data */
XFS_IO_OVERWRITE, /* covers already allocated extent */
};
#define XFS_IO_TYPES \
+ { XFS_IO_INVALID, "invalid" }, \
{ XFS_IO_DELALLOC, "delalloc" }, \
{ XFS_IO_UNWRITTEN, "unwritten" }, \
{ XFS_IO_OVERWRITE, "overwrite" }
@@ -39,7 +41,7 @@ enum {
* It can manage several multi-page bio's at once.
*/
typedef struct xfs_ioend {
- struct xfs_ioend *io_list; /* next ioend in chain */
+ struct list_head io_list; /* next ioend in chain */
unsigned int io_type; /* delalloc / unwritten */
int io_error; /* I/O error code */
atomic_t io_remaining; /* hold count */
diff --git a/fs/xfs/xfs_bmap_util.c b/fs/xfs/xfs_bmap_util.c
index 6c876012b..3b6309865 100644
--- a/fs/xfs/xfs_bmap_util.c
+++ b/fs/xfs/xfs_bmap_util.c
@@ -203,10 +203,12 @@ xfs_bmap_rtalloc(
ralen = MAXEXTLEN / mp->m_sb.sb_rextsize;
/*
- * Lock out other modifications to the RT bitmap inode.
+ * Lock out modifications to both the RT bitmap and summary inodes
*/
xfs_ilock(mp->m_rbmip, XFS_ILOCK_EXCL);
xfs_trans_ijoin(ap->tp, mp->m_rbmip, XFS_ILOCK_EXCL);
+ xfs_ilock(mp->m_rsumip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(ap->tp, mp->m_rsumip, XFS_ILOCK_EXCL);
/*
* If it's an allocation to an empty file at offset 0,
@@ -822,7 +824,7 @@ bool
xfs_can_free_eofblocks(struct xfs_inode *ip, bool force)
{
/* prealloc/delalloc exists only on regular files */
- if (!S_ISREG(ip->i_d.di_mode))
+ if (!S_ISREG(VFS_I(ip)->i_mode))
return false;
/*
@@ -1235,7 +1237,7 @@ xfs_free_file_space(
/* wait for the completion of any pending DIOs */
inode_dio_wait(VFS_I(ip));
- rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_CACHE_SIZE);
+ rounding = max_t(xfs_off_t, 1 << mp->m_sb.sb_blocklog, PAGE_SIZE);
ioffset = round_down(offset, rounding);
iendoffset = round_up(offset + len, rounding) - 1;
error = filemap_write_and_wait_range(VFS_I(ip)->i_mapping, ioffset,
@@ -1464,7 +1466,7 @@ xfs_shift_file_space(
if (error)
return error;
error = invalidate_inode_pages2_range(VFS_I(ip)->i_mapping,
- offset >> PAGE_CACHE_SHIFT, -1);
+ offset >> PAGE_SHIFT, -1);
if (error)
return error;
@@ -1727,7 +1729,7 @@ xfs_swap_extents(
xfs_lock_two_inodes(ip, tip, XFS_MMAPLOCK_EXCL);
/* Verify that both files have the same format */
- if ((ip->i_d.di_mode & S_IFMT) != (tip->i_d.di_mode & S_IFMT)) {
+ if ((VFS_I(ip)->i_mode & S_IFMT) != (VFS_I(tip)->i_mode & S_IFMT)) {
error = -EINVAL;
goto out_unlock;
}
diff --git a/fs/xfs/xfs_buf.c b/fs/xfs/xfs_buf.c
index 435c7de42..9a2191b91 100644
--- a/fs/xfs/xfs_buf.c
+++ b/fs/xfs/xfs_buf.c
@@ -650,7 +650,7 @@ xfs_buf_read_map(
if (bp) {
trace_xfs_buf_read(bp, flags, _RET_IP_);
- if (!XFS_BUF_ISDONE(bp)) {
+ if (!(bp->b_flags & XBF_DONE)) {
XFS_STATS_INC(target->bt_mount, xb_get_read);
bp->b_ops = ops;
_xfs_buf_read(bp, flags);
diff --git a/fs/xfs/xfs_buf.h b/fs/xfs/xfs_buf.h
index c75721acd..4eb89bd4e 100644
--- a/fs/xfs/xfs_buf.h
+++ b/fs/xfs/xfs_buf.h
@@ -302,6 +302,7 @@ extern void xfs_buf_iomove(xfs_buf_t *, size_t, size_t, void *,
/* Buffer Utility Routines */
extern void *xfs_buf_offset(struct xfs_buf *, size_t);
+extern void xfs_buf_stale(struct xfs_buf *bp);
/* Delayed Write Buffer Routines */
extern bool xfs_buf_delwri_queue(struct xfs_buf *, struct list_head *);
@@ -312,31 +313,6 @@ extern int xfs_buf_delwri_submit_nowait(struct list_head *);
extern int xfs_buf_init(void);
extern void xfs_buf_terminate(void);
-#define XFS_BUF_ZEROFLAGS(bp) \
- ((bp)->b_flags &= ~(XBF_READ|XBF_WRITE|XBF_ASYNC| \
- XBF_SYNCIO|XBF_FUA|XBF_FLUSH| \
- XBF_WRITE_FAIL))
-
-void xfs_buf_stale(struct xfs_buf *bp);
-#define XFS_BUF_UNSTALE(bp) ((bp)->b_flags &= ~XBF_STALE)
-#define XFS_BUF_ISSTALE(bp) ((bp)->b_flags & XBF_STALE)
-
-#define XFS_BUF_DONE(bp) ((bp)->b_flags |= XBF_DONE)
-#define XFS_BUF_UNDONE(bp) ((bp)->b_flags &= ~XBF_DONE)
-#define XFS_BUF_ISDONE(bp) ((bp)->b_flags & XBF_DONE)
-
-#define XFS_BUF_ASYNC(bp) ((bp)->b_flags |= XBF_ASYNC)
-#define XFS_BUF_UNASYNC(bp) ((bp)->b_flags &= ~XBF_ASYNC)
-#define XFS_BUF_ISASYNC(bp) ((bp)->b_flags & XBF_ASYNC)
-
-#define XFS_BUF_READ(bp) ((bp)->b_flags |= XBF_READ)
-#define XFS_BUF_UNREAD(bp) ((bp)->b_flags &= ~XBF_READ)
-#define XFS_BUF_ISREAD(bp) ((bp)->b_flags & XBF_READ)
-
-#define XFS_BUF_WRITE(bp) ((bp)->b_flags |= XBF_WRITE)
-#define XFS_BUF_UNWRITE(bp) ((bp)->b_flags &= ~XBF_WRITE)
-#define XFS_BUF_ISWRITE(bp) ((bp)->b_flags & XBF_WRITE)
-
/*
* These macros use the IO block map rather than b_bn. b_bn is now really
* just for the buffer cache index for cached buffers. As IO does not use b_bn
diff --git a/fs/xfs/xfs_buf_item.c b/fs/xfs/xfs_buf_item.c
index 7e986da34..99e91a0e5 100644
--- a/fs/xfs/xfs_buf_item.c
+++ b/fs/xfs/xfs_buf_item.c
@@ -431,7 +431,7 @@ xfs_buf_item_unpin(
if (freed && stale) {
ASSERT(bip->bli_flags & XFS_BLI_STALE);
ASSERT(xfs_buf_islocked(bp));
- ASSERT(XFS_BUF_ISSTALE(bp));
+ ASSERT(bp->b_flags & XBF_STALE);
ASSERT(bip->__bli_format.blf_flags & XFS_BLF_CANCEL);
trace_xfs_buf_item_unpin_stale(bip);
@@ -493,7 +493,7 @@ xfs_buf_item_unpin(
xfs_buf_hold(bp);
bp->b_flags |= XBF_ASYNC;
xfs_buf_ioerror(bp, -EIO);
- XFS_BUF_UNDONE(bp);
+ bp->b_flags &= ~XBF_DONE;
xfs_buf_stale(bp);
xfs_buf_ioend(bp);
}
@@ -1067,7 +1067,7 @@ xfs_buf_iodone_callbacks(
*/
if (XFS_FORCED_SHUTDOWN(mp)) {
xfs_buf_stale(bp);
- XFS_BUF_DONE(bp);
+ bp->b_flags |= XBF_DONE;
trace_xfs_buf_item_iodone(bp, _RET_IP_);
goto do_callbacks;
}
@@ -1090,7 +1090,7 @@ xfs_buf_iodone_callbacks(
* errors tend to affect the whole device and a failing log write
* will make us give up. But we really ought to do better here.
*/
- if (XFS_BUF_ISASYNC(bp)) {
+ if (bp->b_flags & XBF_ASYNC) {
ASSERT(bp->b_iodone != NULL);
trace_xfs_buf_item_iodone_async(bp, _RET_IP_);
@@ -1113,7 +1113,7 @@ xfs_buf_iodone_callbacks(
* sure to return the error to the caller of xfs_bwrite().
*/
xfs_buf_stale(bp);
- XFS_BUF_DONE(bp);
+ bp->b_flags |= XBF_DONE;
trace_xfs_buf_error_relse(bp, _RET_IP_);
diff --git a/fs/xfs/xfs_dir2_readdir.c b/fs/xfs/xfs_dir2_readdir.c
index 642d55d10..93b3ab0c5 100644
--- a/fs/xfs/xfs_dir2_readdir.c
+++ b/fs/xfs/xfs_dir2_readdir.c
@@ -665,7 +665,7 @@ xfs_readdir(
if (XFS_FORCED_SHUTDOWN(dp->i_mount))
return -EIO;
- ASSERT(S_ISDIR(dp->i_d.di_mode));
+ ASSERT(S_ISDIR(VFS_I(dp)->i_mode));
XFS_STATS_INC(dp->i_mount, xs_dir_getdents);
args.dp = dp;
diff --git a/fs/xfs/xfs_discard.c b/fs/xfs/xfs_discard.c
index e85a9519a..272c3f8b6 100644
--- a/fs/xfs/xfs_discard.c
+++ b/fs/xfs/xfs_discard.c
@@ -227,7 +227,7 @@ xfs_discard_extents(
GFP_NOFS, 0);
if (error && error != -EOPNOTSUPP) {
xfs_info(mp,
- "discard failed for extent [0x%llu,%u], error %d",
+ "discard failed for extent [0x%llx,%u], error %d",
(unsigned long long)busyp->bno,
busyp->length,
error);
diff --git a/fs/xfs/xfs_dquot.c b/fs/xfs/xfs_dquot.c
index 9c44d38dc..316b2a1bd 100644
--- a/fs/xfs/xfs_dquot.c
+++ b/fs/xfs/xfs_dquot.c
@@ -92,26 +92,28 @@ xfs_qm_adjust_dqlimits(
{
struct xfs_quotainfo *q = mp->m_quotainfo;
struct xfs_disk_dquot *d = &dq->q_core;
+ struct xfs_def_quota *defq;
int prealloc = 0;
ASSERT(d->d_id);
+ defq = xfs_get_defquota(dq, q);
- if (q->qi_bsoftlimit && !d->d_blk_softlimit) {
- d->d_blk_softlimit = cpu_to_be64(q->qi_bsoftlimit);
+ if (defq->bsoftlimit && !d->d_blk_softlimit) {
+ d->d_blk_softlimit = cpu_to_be64(defq->bsoftlimit);
prealloc = 1;
}
- if (q->qi_bhardlimit && !d->d_blk_hardlimit) {
- d->d_blk_hardlimit = cpu_to_be64(q->qi_bhardlimit);
+ if (defq->bhardlimit && !d->d_blk_hardlimit) {
+ d->d_blk_hardlimit = cpu_to_be64(defq->bhardlimit);
prealloc = 1;
}
- if (q->qi_isoftlimit && !d->d_ino_softlimit)
- d->d_ino_softlimit = cpu_to_be64(q->qi_isoftlimit);
- if (q->qi_ihardlimit && !d->d_ino_hardlimit)
- d->d_ino_hardlimit = cpu_to_be64(q->qi_ihardlimit);
- if (q->qi_rtbsoftlimit && !d->d_rtb_softlimit)
- d->d_rtb_softlimit = cpu_to_be64(q->qi_rtbsoftlimit);
- if (q->qi_rtbhardlimit && !d->d_rtb_hardlimit)
- d->d_rtb_hardlimit = cpu_to_be64(q->qi_rtbhardlimit);
+ if (defq->isoftlimit && !d->d_ino_softlimit)
+ d->d_ino_softlimit = cpu_to_be64(defq->isoftlimit);
+ if (defq->ihardlimit && !d->d_ino_hardlimit)
+ d->d_ino_hardlimit = cpu_to_be64(defq->ihardlimit);
+ if (defq->rtbsoftlimit && !d->d_rtb_softlimit)
+ d->d_rtb_softlimit = cpu_to_be64(defq->rtbsoftlimit);
+ if (defq->rtbhardlimit && !d->d_rtb_hardlimit)
+ d->d_rtb_hardlimit = cpu_to_be64(defq->rtbhardlimit);
if (prealloc)
xfs_dquot_set_prealloc_limits(dq);
@@ -232,7 +234,8 @@ xfs_qm_init_dquot_blk(
{
struct xfs_quotainfo *q = mp->m_quotainfo;
xfs_dqblk_t *d;
- int curid, i;
+ xfs_dqid_t curid;
+ int i;
ASSERT(tp);
ASSERT(xfs_buf_islocked(bp));
@@ -243,7 +246,6 @@ xfs_qm_init_dquot_blk(
* ID of the first dquot in the block - id's are zero based.
*/
curid = id - (id % q->qi_dqperchunk);
- ASSERT(curid >= 0);
memset(d, 0, BBTOB(q->qi_dqchunklen));
for (i = 0; i < q->qi_dqperchunk; i++, d++, curid++) {
d->dd_diskdq.d_magic = cpu_to_be16(XFS_DQUOT_MAGIC);
@@ -464,12 +466,13 @@ xfs_qm_dqtobp(
struct xfs_bmbt_irec map;
int nmaps = 1, error;
struct xfs_buf *bp;
- struct xfs_inode *quotip = xfs_dq_to_quota_inode(dqp);
+ struct xfs_inode *quotip;
struct xfs_mount *mp = dqp->q_mount;
xfs_dqid_t id = be32_to_cpu(dqp->q_core.d_id);
struct xfs_trans *tp = (tpp ? *tpp : NULL);
uint lock_mode;
+ quotip = xfs_quota_inode(dqp->q_mount, dqp->dq_flags);
dqp->q_fileoffset = (xfs_fileoff_t)id / mp->m_quotainfo->qi_dqperchunk;
lock_mode = xfs_ilock_data_map_shared(quotip);
@@ -685,6 +688,56 @@ error0:
}
/*
+ * Advance to the next id in the current chunk, or if at the
+ * end of the chunk, skip ahead to first id in next allocated chunk
+ * using the SEEK_DATA interface.
+ */
+int
+xfs_dq_get_next_id(
+ xfs_mount_t *mp,
+ uint type,
+ xfs_dqid_t *id,
+ loff_t eof)
+{
+ struct xfs_inode *quotip;
+ xfs_fsblock_t start;
+ loff_t offset;
+ uint lock;
+ xfs_dqid_t next_id;
+ int error = 0;
+
+ /* Simple advance */
+ next_id = *id + 1;
+
+ /* If new ID is within the current chunk, advancing it sufficed */
+ if (next_id % mp->m_quotainfo->qi_dqperchunk) {
+ *id = next_id;
+ return 0;
+ }
+
+ /* Nope, next_id is now past the current chunk, so find the next one */
+ start = (xfs_fsblock_t)next_id / mp->m_quotainfo->qi_dqperchunk;
+
+ quotip = xfs_quota_inode(mp, type);
+ lock = xfs_ilock_data_map_shared(quotip);
+
+ offset = __xfs_seek_hole_data(VFS_I(quotip), XFS_FSB_TO_B(mp, start),
+ eof, SEEK_DATA);
+ if (offset < 0)
+ error = offset;
+
+ xfs_iunlock(quotip, lock);
+
+ /* -ENXIO is essentially "no more data" */
+ if (error)
+ return (error == -ENXIO ? -ENOENT: error);
+
+ /* Convert next data offset back to a quota id */
+ *id = XFS_B_TO_FSB(mp, offset) * mp->m_quotainfo->qi_dqperchunk;
+ return 0;
+}
+
+/*
* Given the file system, inode OR id, and type (UDQUOT/GDQUOT), return a
* a locked dquot, doing an allocation (if requested) as needed.
* When both an inode and an id are given, the inode's id takes precedence.
@@ -704,6 +757,7 @@ xfs_qm_dqget(
struct xfs_quotainfo *qi = mp->m_quotainfo;
struct radix_tree_root *tree = xfs_dquot_tree(qi, type);
struct xfs_dquot *dqp;
+ loff_t eof = 0;
int error;
ASSERT(XFS_IS_QUOTA_RUNNING(mp));
@@ -731,6 +785,21 @@ xfs_qm_dqget(
}
#endif
+ /* Get the end of the quota file if we need it */
+ if (flags & XFS_QMOPT_DQNEXT) {
+ struct xfs_inode *quotip;
+ xfs_fileoff_t last;
+ uint lock_mode;
+
+ quotip = xfs_quota_inode(mp, type);
+ lock_mode = xfs_ilock_data_map_shared(quotip);
+ error = xfs_bmap_last_offset(quotip, &last, XFS_DATA_FORK);
+ xfs_iunlock(quotip, lock_mode);
+ if (error)
+ return error;
+ eof = XFS_FSB_TO_B(mp, last);
+ }
+
restart:
mutex_lock(&qi->qi_tree_lock);
dqp = radix_tree_lookup(tree, id);
@@ -744,6 +813,18 @@ restart:
goto restart;
}
+ /* uninit / unused quota found in radix tree, keep looking */
+ if (flags & XFS_QMOPT_DQNEXT) {
+ if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+ xfs_dqunlock(dqp);
+ mutex_unlock(&qi->qi_tree_lock);
+ error = xfs_dq_get_next_id(mp, type, &id, eof);
+ if (error)
+ return error;
+ goto restart;
+ }
+ }
+
dqp->q_nrefs++;
mutex_unlock(&qi->qi_tree_lock);
@@ -770,6 +851,13 @@ restart:
if (ip)
xfs_ilock(ip, XFS_ILOCK_EXCL);
+ /* If we are asked to find next active id, keep looking */
+ if (error == -ENOENT && (flags & XFS_QMOPT_DQNEXT)) {
+ error = xfs_dq_get_next_id(mp, type, &id, eof);
+ if (!error)
+ goto restart;
+ }
+
if (error)
return error;
@@ -820,6 +908,17 @@ restart:
qi->qi_dquots++;
mutex_unlock(&qi->qi_tree_lock);
+ /* If we are asked to find next active id, keep looking */
+ if (flags & XFS_QMOPT_DQNEXT) {
+ if (XFS_IS_DQUOT_UNINITIALIZED(dqp)) {
+ xfs_qm_dqput(dqp);
+ error = xfs_dq_get_next_id(mp, type, &id, eof);
+ if (error)
+ return error;
+ goto restart;
+ }
+ }
+
dqret:
ASSERT((ip == NULL) || xfs_isilocked(ip, XFS_ILOCK_EXCL));
trace_xfs_dqget_miss(dqp);
diff --git a/fs/xfs/xfs_export.c b/fs/xfs/xfs_export.c
index 652cd3c5b..a1b2dd828 100644
--- a/fs/xfs/xfs_export.c
+++ b/fs/xfs/xfs_export.c
@@ -152,7 +152,7 @@ xfs_nfs_get_inode(
return ERR_PTR(error);
}
- if (ip->i_d.di_gen != generation) {
+ if (VFS_I(ip)->i_generation != generation) {
IRELE(ip);
return ERR_PTR(-ESTALE);
}
@@ -246,7 +246,7 @@ const struct export_operations xfs_export_operations = {
.fh_to_parent = xfs_fs_fh_to_parent,
.get_parent = xfs_fs_get_parent,
.commit_metadata = xfs_fs_nfs_commit_metadata,
-#ifdef CONFIG_NFSD_PNFS
+#ifdef CONFIG_NFSD_BLOCKLAYOUT
.get_uuid = xfs_fs_get_uuid,
.map_blocks = xfs_fs_map_blocks,
.commit_blocks = xfs_fs_commit_blocks,
diff --git a/fs/xfs/xfs_file.c b/fs/xfs/xfs_file.c
index 52883ac3c..569938a4a 100644
--- a/fs/xfs/xfs_file.c
+++ b/fs/xfs/xfs_file.c
@@ -106,8 +106,8 @@ xfs_iozero(
unsigned offset, bytes;
void *fsdata;
- offset = (pos & (PAGE_CACHE_SIZE -1)); /* Within page */
- bytes = PAGE_CACHE_SIZE - offset;
+ offset = (pos & (PAGE_SIZE -1)); /* Within page */
+ bytes = PAGE_SIZE - offset;
if (bytes > count)
bytes = count;
@@ -156,9 +156,9 @@ xfs_update_prealloc_flags(
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
if (!(flags & XFS_PREALLOC_INVISIBLE)) {
- ip->i_d.di_mode &= ~S_ISUID;
- if (ip->i_d.di_mode & S_IXGRP)
- ip->i_d.di_mode &= ~S_ISGID;
+ VFS_I(ip)->i_mode &= ~S_ISUID;
+ if (VFS_I(ip)->i_mode & S_IXGRP)
+ VFS_I(ip)->i_mode &= ~S_ISGID;
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG);
}
@@ -799,8 +799,8 @@ xfs_file_dio_aio_write(
/* see generic_file_direct_write() for why this is necessary */
if (mapping->nrpages) {
invalidate_inode_pages2_range(mapping,
- pos >> PAGE_CACHE_SHIFT,
- end >> PAGE_CACHE_SHIFT);
+ pos >> PAGE_SHIFT,
+ end >> PAGE_SHIFT);
}
if (ret > 0) {
@@ -1207,9 +1207,9 @@ xfs_find_get_desired_pgoff(
pagevec_init(&pvec, 0);
- index = startoff >> PAGE_CACHE_SHIFT;
+ index = startoff >> PAGE_SHIFT;
endoff = XFS_FSB_TO_B(mp, map->br_startoff + map->br_blockcount);
- end = endoff >> PAGE_CACHE_SHIFT;
+ end = endoff >> PAGE_SHIFT;
do {
int want;
unsigned nr_pages;
@@ -1337,31 +1337,31 @@ out:
return found;
}
-STATIC loff_t
-xfs_seek_hole_data(
- struct file *file,
+/*
+ * caller must lock inode with xfs_ilock_data_map_shared,
+ * can we craft an appropriate ASSERT?
+ *
+ * end is because the VFS-level lseek interface is defined such that any
+ * offset past i_size shall return -ENXIO, but we use this for quota code
+ * which does not maintain i_size, and we want to SEEK_DATA past i_size.
+ */
+loff_t
+__xfs_seek_hole_data(
+ struct inode *inode,
loff_t start,
+ loff_t end,
int whence)
{
- struct inode *inode = file->f_mapping->host;
struct xfs_inode *ip = XFS_I(inode);
struct xfs_mount *mp = ip->i_mount;
loff_t uninitialized_var(offset);
- xfs_fsize_t isize;
xfs_fileoff_t fsbno;
- xfs_filblks_t end;
- uint lock;
+ xfs_filblks_t lastbno;
int error;
- if (XFS_FORCED_SHUTDOWN(mp))
- return -EIO;
-
- lock = xfs_ilock_data_map_shared(ip);
-
- isize = i_size_read(inode);
- if (start >= isize) {
+ if (start >= end) {
error = -ENXIO;
- goto out_unlock;
+ goto out_error;
}
/*
@@ -1369,22 +1369,22 @@ xfs_seek_hole_data(
* by fsbno to the end block of the file.
*/
fsbno = XFS_B_TO_FSBT(mp, start);
- end = XFS_B_TO_FSB(mp, isize);
+ lastbno = XFS_B_TO_FSB(mp, end);
for (;;) {
struct xfs_bmbt_irec map[2];
int nmap = 2;
unsigned int i;
- error = xfs_bmapi_read(ip, fsbno, end - fsbno, map, &nmap,
+ error = xfs_bmapi_read(ip, fsbno, lastbno - fsbno, map, &nmap,
XFS_BMAPI_ENTIRE);
if (error)
- goto out_unlock;
+ goto out_error;
/* No extents at given offset, must be beyond EOF */
if (nmap == 0) {
error = -ENXIO;
- goto out_unlock;
+ goto out_error;
}
for (i = 0; i < nmap; i++) {
@@ -1426,7 +1426,7 @@ xfs_seek_hole_data(
* hole at the end of any file).
*/
if (whence == SEEK_HOLE) {
- offset = isize;
+ offset = end;
break;
}
/*
@@ -1434,7 +1434,7 @@ xfs_seek_hole_data(
*/
ASSERT(whence == SEEK_DATA);
error = -ENXIO;
- goto out_unlock;
+ goto out_error;
}
ASSERT(i > 1);
@@ -1445,14 +1445,14 @@ xfs_seek_hole_data(
*/
fsbno = map[i - 1].br_startoff + map[i - 1].br_blockcount;
start = XFS_FSB_TO_B(mp, fsbno);
- if (start >= isize) {
+ if (start >= end) {
if (whence == SEEK_HOLE) {
- offset = isize;
+ offset = end;
break;
}
ASSERT(whence == SEEK_DATA);
error = -ENXIO;
- goto out_unlock;
+ goto out_error;
}
}
@@ -1464,7 +1464,39 @@ out:
* situation in particular.
*/
if (whence == SEEK_HOLE)
- offset = min_t(loff_t, offset, isize);
+ offset = min_t(loff_t, offset, end);
+
+ return offset;
+
+out_error:
+ return error;
+}
+
+STATIC loff_t
+xfs_seek_hole_data(
+ struct file *file,
+ loff_t start,
+ int whence)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct xfs_inode *ip = XFS_I(inode);
+ struct xfs_mount *mp = ip->i_mount;
+ uint lock;
+ loff_t offset, end;
+ int error = 0;
+
+ if (XFS_FORCED_SHUTDOWN(mp))
+ return -EIO;
+
+ lock = xfs_ilock_data_map_shared(ip);
+
+ end = i_size_read(inode);
+ offset = __xfs_seek_hole_data(inode, start, end, whence);
+ if (offset < 0) {
+ error = offset;
+ goto out_unlock;
+ }
+
offset = vfs_setpos(file, offset, inode->i_sb->s_maxbytes);
out_unlock:
diff --git a/fs/xfs/xfs_filestream.c b/fs/xfs/xfs_filestream.c
index c4c130f9b..a51353a1f 100644
--- a/fs/xfs/xfs_filestream.c
+++ b/fs/xfs/xfs_filestream.c
@@ -151,7 +151,7 @@ xfs_filestream_pick_ag(
xfs_agnumber_t ag, max_ag = NULLAGNUMBER;
int err, trylock, nscan;
- ASSERT(S_ISDIR(ip->i_d.di_mode));
+ ASSERT(S_ISDIR(VFS_I(ip)->i_mode));
/* 2% of an AG's blocks must be free for it to be chosen. */
minfree = mp->m_sb.sb_agblocks / 50;
@@ -319,7 +319,7 @@ xfs_filestream_lookup_ag(
xfs_agnumber_t startag, ag = NULLAGNUMBER;
struct xfs_mru_cache_elem *mru;
- ASSERT(S_ISREG(ip->i_d.di_mode));
+ ASSERT(S_ISREG(VFS_I(ip)->i_mode));
pip = xfs_filestream_get_parent(ip);
if (!pip)
diff --git a/fs/xfs/xfs_fsops.c b/fs/xfs/xfs_fsops.c
index ee3aaa0a5..ca0d3eb44 100644
--- a/fs/xfs/xfs_fsops.c
+++ b/fs/xfs/xfs_fsops.c
@@ -243,8 +243,8 @@ xfs_growfs_data_private(
agf->agf_roots[XFS_BTNUM_CNTi] = cpu_to_be32(XFS_CNT_BLOCK(mp));
agf->agf_levels[XFS_BTNUM_BNOi] = cpu_to_be32(1);
agf->agf_levels[XFS_BTNUM_CNTi] = cpu_to_be32(1);
- agf->agf_flfirst = 0;
- agf->agf_fllast = cpu_to_be32(XFS_AGFL_SIZE(mp) - 1);
+ agf->agf_flfirst = cpu_to_be32(1);
+ agf->agf_fllast = 0;
agf->agf_flcount = 0;
tmpsize = agsize - XFS_PREALLOC_BLOCKS(mp);
agf->agf_freeblks = cpu_to_be32(tmpsize);
diff --git a/fs/xfs/xfs_fsops.h b/fs/xfs/xfs_fsops.h
index 1b6a98b66..f32713f14 100644
--- a/fs/xfs/xfs_fsops.h
+++ b/fs/xfs/xfs_fsops.h
@@ -25,6 +25,5 @@ extern int xfs_fs_counts(xfs_mount_t *mp, xfs_fsop_counts_t *cnt);
extern int xfs_reserve_blocks(xfs_mount_t *mp, __uint64_t *inval,
xfs_fsop_resblks_t *outval);
extern int xfs_fs_goingdown(xfs_mount_t *mp, __uint32_t inflags);
-extern int xfs_fs_log_dummy(struct xfs_mount *mp);
#endif /* __XFS_FSOPS_H__ */
diff --git a/fs/xfs/xfs_icache.c b/fs/xfs/xfs_icache.c
index d7a490f24..bf2d60749 100644
--- a/fs/xfs/xfs_icache.c
+++ b/fs/xfs/xfs_icache.c
@@ -63,6 +63,9 @@ xfs_inode_alloc(
return NULL;
}
+ /* VFS doesn't initialise i_mode! */
+ VFS_I(ip)->i_mode = 0;
+
XFS_STATS_INC(mp, vn_active);
ASSERT(atomic_read(&ip->i_pincount) == 0);
ASSERT(!spin_is_locked(&ip->i_flags_lock));
@@ -79,7 +82,7 @@ xfs_inode_alloc(
memset(&ip->i_df, 0, sizeof(xfs_ifork_t));
ip->i_flags = 0;
ip->i_delayed_blks = 0;
- memset(&ip->i_d, 0, sizeof(xfs_icdinode_t));
+ memset(&ip->i_d, 0, sizeof(ip->i_d));
return ip;
}
@@ -98,7 +101,7 @@ void
xfs_inode_free(
struct xfs_inode *ip)
{
- switch (ip->i_d.di_mode & S_IFMT) {
+ switch (VFS_I(ip)->i_mode & S_IFMT) {
case S_IFREG:
case S_IFDIR:
case S_IFLNK:
@@ -135,6 +138,34 @@ xfs_inode_free(
}
/*
+ * When we recycle a reclaimable inode, we need to re-initialise the VFS inode
+ * part of the structure. This is made more complex by the fact we store
+ * information about the on-disk values in the VFS inode and so we can't just
+ * overwrite the values unconditionally. Hence we save the parameters we
+ * need to retain across reinitialisation, and rewrite them into the VFS inode
+ * after reinitialisation even if it fails.
+ */
+static int
+xfs_reinit_inode(
+ struct xfs_mount *mp,
+ struct inode *inode)
+{
+ int error;
+ uint32_t nlink = inode->i_nlink;
+ uint32_t generation = inode->i_generation;
+ uint64_t version = inode->i_version;
+ umode_t mode = inode->i_mode;
+
+ error = inode_init_always(mp->m_super, inode);
+
+ set_nlink(inode, nlink);
+ inode->i_generation = generation;
+ inode->i_version = version;
+ inode->i_mode = mode;
+ return error;
+}
+
+/*
* Check the validity of the inode we just found it the cache
*/
static int
@@ -185,7 +216,7 @@ xfs_iget_cache_hit(
/*
* If lookup is racing with unlink return an error immediately.
*/
- if (ip->i_d.di_mode == 0 && !(flags & XFS_IGET_CREATE)) {
+ if (VFS_I(ip)->i_mode == 0 && !(flags & XFS_IGET_CREATE)) {
error = -ENOENT;
goto out_error;
}
@@ -208,7 +239,7 @@ xfs_iget_cache_hit(
spin_unlock(&ip->i_flags_lock);
rcu_read_unlock();
- error = inode_init_always(mp->m_super, inode);
+ error = xfs_reinit_inode(mp, inode);
if (error) {
/*
* Re-initializing the inode failed, and we are in deep
@@ -295,7 +326,7 @@ xfs_iget_cache_miss(
trace_xfs_iget_miss(ip);
- if ((ip->i_d.di_mode == 0) && !(flags & XFS_IGET_CREATE)) {
+ if ((VFS_I(ip)->i_mode == 0) && !(flags & XFS_IGET_CREATE)) {
error = -ENOENT;
goto out_destroy;
}
@@ -444,7 +475,7 @@ again:
* If we have a real type for an on-disk inode, we can setup the inode
* now. If it's a new inode being created, xfs_ialloc will handle it.
*/
- if (xfs_iflags_test(ip, XFS_INEW) && ip->i_d.di_mode != 0)
+ if (xfs_iflags_test(ip, XFS_INEW) && VFS_I(ip)->i_mode != 0)
xfs_setup_existing_inode(ip);
return 0;
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index ceba1a83c..3cbc90317 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -57,9 +57,9 @@ kmem_zone_t *xfs_inode_zone;
*/
#define XFS_ITRUNC_MAX_EXTENTS 2
-STATIC int xfs_iflush_int(xfs_inode_t *, xfs_buf_t *);
-
-STATIC int xfs_iunlink_remove(xfs_trans_t *, xfs_inode_t *);
+STATIC int xfs_iflush_int(struct xfs_inode *, struct xfs_buf *);
+STATIC int xfs_iunlink(struct xfs_trans *, struct xfs_inode *);
+STATIC int xfs_iunlink_remove(struct xfs_trans *, struct xfs_inode *);
/*
* helper function to extract extent size hint from inode
@@ -766,6 +766,7 @@ xfs_ialloc(
uint flags;
int error;
struct timespec tv;
+ struct inode *inode;
/*
* Call the space management code to pick
@@ -791,6 +792,7 @@ xfs_ialloc(
if (error)
return error;
ASSERT(ip != NULL);
+ inode = VFS_I(ip);
/*
* We always convert v1 inodes to v2 now - we only support filesystems
@@ -800,20 +802,16 @@ xfs_ialloc(
if (ip->i_d.di_version == 1)
ip->i_d.di_version = 2;
- ip->i_d.di_mode = mode;
- ip->i_d.di_onlink = 0;
- ip->i_d.di_nlink = nlink;
- ASSERT(ip->i_d.di_nlink == nlink);
+ inode->i_mode = mode;
+ set_nlink(inode, nlink);
ip->i_d.di_uid = xfs_kuid_to_uid(current_fsuid());
ip->i_d.di_gid = xfs_kgid_to_gid(current_fsgid());
xfs_set_projid(ip, prid);
- memset(&(ip->i_d.di_pad[0]), 0, sizeof(ip->i_d.di_pad));
if (pip && XFS_INHERIT_GID(pip)) {
ip->i_d.di_gid = pip->i_d.di_gid;
- if ((pip->i_d.di_mode & S_ISGID) && S_ISDIR(mode)) {
- ip->i_d.di_mode |= S_ISGID;
- }
+ if ((VFS_I(pip)->i_mode & S_ISGID) && S_ISDIR(mode))
+ inode->i_mode |= S_ISGID;
}
/*
@@ -822,38 +820,29 @@ xfs_ialloc(
* (and only if the irix_sgid_inherit compatibility variable is set).
*/
if ((irix_sgid_inherit) &&
- (ip->i_d.di_mode & S_ISGID) &&
- (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid)))) {
- ip->i_d.di_mode &= ~S_ISGID;
- }
+ (inode->i_mode & S_ISGID) &&
+ (!in_group_p(xfs_gid_to_kgid(ip->i_d.di_gid))))
+ inode->i_mode &= ~S_ISGID;
ip->i_d.di_size = 0;
ip->i_d.di_nextents = 0;
ASSERT(ip->i_d.di_nblocks == 0);
tv = current_fs_time(mp->m_super);
- ip->i_d.di_mtime.t_sec = (__int32_t)tv.tv_sec;
- ip->i_d.di_mtime.t_nsec = (__int32_t)tv.tv_nsec;
- ip->i_d.di_atime = ip->i_d.di_mtime;
- ip->i_d.di_ctime = ip->i_d.di_mtime;
+ inode->i_mtime = tv;
+ inode->i_atime = tv;
+ inode->i_ctime = tv;
- /*
- * di_gen will have been taken care of in xfs_iread.
- */
ip->i_d.di_extsize = 0;
ip->i_d.di_dmevmask = 0;
ip->i_d.di_dmstate = 0;
ip->i_d.di_flags = 0;
if (ip->i_d.di_version == 3) {
- ASSERT(ip->i_d.di_ino == ino);
- ASSERT(uuid_equal(&ip->i_d.di_uuid, &mp->m_sb.sb_meta_uuid));
- ip->i_d.di_crc = 0;
- ip->i_d.di_changecount = 1;
- ip->i_d.di_lsn = 0;
+ inode->i_version = 1;
ip->i_d.di_flags2 = 0;
- memset(&(ip->i_d.di_pad2[0]), 0, sizeof(ip->i_d.di_pad2));
- ip->i_d.di_crtime = ip->i_d.di_mtime;
+ ip->i_d.di_crtime.t_sec = (__int32_t)tv.tv_sec;
+ ip->i_d.di_crtime.t_nsec = (__int32_t)tv.tv_nsec;
}
@@ -1092,35 +1081,24 @@ xfs_dir_ialloc(
}
/*
- * Decrement the link count on an inode & log the change.
- * If this causes the link count to go to zero, initiate the
- * logging activity required to truncate a file.
+ * Decrement the link count on an inode & log the change. If this causes the
+ * link count to go to zero, move the inode to AGI unlinked list so that it can
+ * be freed when the last active reference goes away via xfs_inactive().
*/
int /* error */
xfs_droplink(
xfs_trans_t *tp,
xfs_inode_t *ip)
{
- int error;
-
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
- ASSERT (ip->i_d.di_nlink > 0);
- ip->i_d.di_nlink--;
drop_nlink(VFS_I(ip));
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
- error = 0;
- if (ip->i_d.di_nlink == 0) {
- /*
- * We're dropping the last link to this file.
- * Move the on-disk inode to the AGI unlinked list.
- * From xfs_inactive() we will pull the inode from
- * the list and free it.
- */
- error = xfs_iunlink(tp, ip);
- }
- return error;
+ if (VFS_I(ip)->i_nlink)
+ return 0;
+
+ return xfs_iunlink(tp, ip);
}
/*
@@ -1134,8 +1112,6 @@ xfs_bumplink(
xfs_trans_ichgtime(tp, ip, XFS_ICHGTIME_CHG);
ASSERT(ip->i_d.di_version > 1);
- ASSERT(ip->i_d.di_nlink > 0 || (VFS_I(ip)->i_state & I_LINKABLE));
- ip->i_d.di_nlink++;
inc_nlink(VFS_I(ip));
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
return 0;
@@ -1393,7 +1369,6 @@ xfs_create_tmpfile(
*/
xfs_qm_vop_create_dqattach(tp, ip, udqp, gdqp, pdqp);
- ip->i_d.di_nlink--;
error = xfs_iunlink(tp, ip);
if (error)
goto out_trans_cancel;
@@ -1444,7 +1419,7 @@ xfs_link(
trace_xfs_link(tdp, target_name);
- ASSERT(!S_ISDIR(sip->i_d.di_mode));
+ ASSERT(!S_ISDIR(VFS_I(sip)->i_mode));
if (XFS_FORCED_SHUTDOWN(mp))
return -EIO;
@@ -1492,7 +1467,10 @@ xfs_link(
xfs_bmap_init(&free_list, &first_block);
- if (sip->i_d.di_nlink == 0) {
+ /*
+ * Handle initial link state of O_TMPFILE inode
+ */
+ if (VFS_I(sip)->i_nlink == 0) {
error = xfs_iunlink_remove(tp, sip);
if (error)
goto error_return;
@@ -1648,7 +1626,7 @@ xfs_release(
xfs_mount_t *mp = ip->i_mount;
int error;
- if (!S_ISREG(ip->i_d.di_mode) || (ip->i_d.di_mode == 0))
+ if (!S_ISREG(VFS_I(ip)->i_mode) || (VFS_I(ip)->i_mode == 0))
return 0;
/* If this is a read-only mount, don't do this (would generate I/O) */
@@ -1679,7 +1657,7 @@ xfs_release(
}
}
- if (ip->i_d.di_nlink == 0)
+ if (VFS_I(ip)->i_nlink == 0)
return 0;
if (xfs_can_free_eofblocks(ip, false)) {
@@ -1883,7 +1861,7 @@ xfs_inactive(
* If the inode is already free, then there can be nothing
* to clean up here.
*/
- if (ip->i_d.di_mode == 0) {
+ if (VFS_I(ip)->i_mode == 0) {
ASSERT(ip->i_df.if_real_bytes == 0);
ASSERT(ip->i_df.if_broot_bytes == 0);
return;
@@ -1895,7 +1873,7 @@ xfs_inactive(
if (mp->m_flags & XFS_MOUNT_RDONLY)
return;
- if (ip->i_d.di_nlink != 0) {
+ if (VFS_I(ip)->i_nlink != 0) {
/*
* force is true because we are evicting an inode from the
* cache. Post-eof blocks must be freed, lest we end up with
@@ -1907,7 +1885,7 @@ xfs_inactive(
return;
}
- if (S_ISREG(ip->i_d.di_mode) &&
+ if (S_ISREG(VFS_I(ip)->i_mode) &&
(ip->i_d.di_size != 0 || XFS_ISIZE(ip) != 0 ||
ip->i_d.di_nextents > 0 || ip->i_delayed_blks > 0))
truncate = 1;
@@ -1916,7 +1894,7 @@ xfs_inactive(
if (error)
return;
- if (S_ISLNK(ip->i_d.di_mode))
+ if (S_ISLNK(VFS_I(ip)->i_mode))
error = xfs_inactive_symlink(ip);
else if (truncate)
error = xfs_inactive_truncate(ip);
@@ -1952,16 +1930,21 @@ xfs_inactive(
}
/*
- * This is called when the inode's link count goes to 0.
- * We place the on-disk inode on a list in the AGI. It
- * will be pulled from this list when the inode is freed.
+ * This is called when the inode's link count goes to 0 or we are creating a
+ * tmpfile via O_TMPFILE. In the case of a tmpfile, @ignore_linkcount will be
+ * set to true as the link count is dropped to zero by the VFS after we've
+ * created the file successfully, so we have to add it to the unlinked list
+ * while the link count is non-zero.
+ *
+ * We place the on-disk inode on a list in the AGI. It will be pulled from this
+ * list when the inode is freed.
*/
-int
+STATIC int
xfs_iunlink(
- xfs_trans_t *tp,
- xfs_inode_t *ip)
+ struct xfs_trans *tp,
+ struct xfs_inode *ip)
{
- xfs_mount_t *mp;
+ xfs_mount_t *mp = tp->t_mountp;
xfs_agi_t *agi;
xfs_dinode_t *dip;
xfs_buf_t *agibp;
@@ -1971,10 +1954,7 @@ xfs_iunlink(
int offset;
int error;
- ASSERT(ip->i_d.di_nlink == 0);
- ASSERT(ip->i_d.di_mode != 0);
-
- mp = tp->t_mountp;
+ ASSERT(VFS_I(ip)->i_mode != 0);
/*
* Get the agi buffer first. It ensures lock ordering
@@ -2412,10 +2392,10 @@ xfs_ifree(
struct xfs_icluster xic = { 0 };
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ASSERT(ip->i_d.di_nlink == 0);
+ ASSERT(VFS_I(ip)->i_nlink == 0);
ASSERT(ip->i_d.di_nextents == 0);
ASSERT(ip->i_d.di_anextents == 0);
- ASSERT(ip->i_d.di_size == 0 || !S_ISREG(ip->i_d.di_mode));
+ ASSERT(ip->i_d.di_size == 0 || !S_ISREG(VFS_I(ip)->i_mode));
ASSERT(ip->i_d.di_nblocks == 0);
/*
@@ -2429,7 +2409,7 @@ xfs_ifree(
if (error)
return error;
- ip->i_d.di_mode = 0; /* mark incore inode as free */
+ VFS_I(ip)->i_mode = 0; /* mark incore inode as free */
ip->i_d.di_flags = 0;
ip->i_d.di_dmevmask = 0;
ip->i_d.di_forkoff = 0; /* mark the attr fork not in use */
@@ -2439,7 +2419,7 @@ xfs_ifree(
* Bump the generation count so no one will be confused
* by reincarnations of this inode.
*/
- ip->i_d.di_gen++;
+ VFS_I(ip)->i_generation++;
xfs_trans_log_inode(tp, ip, XFS_ILOG_CORE);
if (xic.deleted)
@@ -2526,7 +2506,7 @@ xfs_remove(
{
xfs_mount_t *mp = dp->i_mount;
xfs_trans_t *tp = NULL;
- int is_dir = S_ISDIR(ip->i_d.di_mode);
+ int is_dir = S_ISDIR(VFS_I(ip)->i_mode);
int error = 0;
xfs_bmap_free_t free_list;
xfs_fsblock_t first_block;
@@ -2580,8 +2560,8 @@ xfs_remove(
* If we're removing a directory perform some additional validation.
*/
if (is_dir) {
- ASSERT(ip->i_d.di_nlink >= 2);
- if (ip->i_d.di_nlink != 2) {
+ ASSERT(VFS_I(ip)->i_nlink >= 2);
+ if (VFS_I(ip)->i_nlink != 2) {
error = -ENOTEMPTY;
goto out_trans_cancel;
}
@@ -2771,7 +2751,7 @@ xfs_cross_rename(
if (dp1 != dp2) {
dp2_flags = XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
- if (S_ISDIR(ip2->i_d.di_mode)) {
+ if (S_ISDIR(VFS_I(ip2)->i_mode)) {
error = xfs_dir_replace(tp, ip2, &xfs_name_dotdot,
dp1->i_ino, first_block,
free_list, spaceres);
@@ -2779,7 +2759,7 @@ xfs_cross_rename(
goto out_trans_abort;
/* transfer ip2 ".." reference to dp1 */
- if (!S_ISDIR(ip1->i_d.di_mode)) {
+ if (!S_ISDIR(VFS_I(ip1)->i_mode)) {
error = xfs_droplink(tp, dp2);
if (error)
goto out_trans_abort;
@@ -2798,7 +2778,7 @@ xfs_cross_rename(
ip2_flags |= XFS_ICHGTIME_MOD | XFS_ICHGTIME_CHG;
}
- if (S_ISDIR(ip1->i_d.di_mode)) {
+ if (S_ISDIR(VFS_I(ip1)->i_mode)) {
error = xfs_dir_replace(tp, ip1, &xfs_name_dotdot,
dp2->i_ino, first_block,
free_list, spaceres);
@@ -2806,7 +2786,7 @@ xfs_cross_rename(
goto out_trans_abort;
/* transfer ip1 ".." reference to dp2 */
- if (!S_ISDIR(ip2->i_d.di_mode)) {
+ if (!S_ISDIR(VFS_I(ip2)->i_mode)) {
error = xfs_droplink(tp, dp1);
if (error)
goto out_trans_abort;
@@ -2903,7 +2883,7 @@ xfs_rename(
struct xfs_inode *inodes[__XFS_SORT_INODES];
int num_inodes = __XFS_SORT_INODES;
bool new_parent = (src_dp != target_dp);
- bool src_is_directory = S_ISDIR(src_ip->i_d.di_mode);
+ bool src_is_directory = S_ISDIR(VFS_I(src_ip)->i_mode);
int spaceres;
int error;
@@ -3032,12 +3012,12 @@ xfs_rename(
* target and source are directories and that target can be
* destroyed, or that neither is a directory.
*/
- if (S_ISDIR(target_ip->i_d.di_mode)) {
+ if (S_ISDIR(VFS_I(target_ip)->i_mode)) {
/*
* Make sure target dir is empty.
*/
if (!(xfs_dir_isempty(target_ip)) ||
- (target_ip->i_d.di_nlink > 2)) {
+ (VFS_I(target_ip)->i_nlink > 2)) {
error = -EEXIST;
goto out_trans_cancel;
}
@@ -3144,7 +3124,7 @@ xfs_rename(
* intermediate state on disk.
*/
if (wip) {
- ASSERT(VFS_I(wip)->i_nlink == 0 && wip->i_d.di_nlink == 0);
+ ASSERT(VFS_I(wip)->i_nlink == 0);
error = xfs_bumplink(tp, wip);
if (error)
goto out_bmap_cancel;
@@ -3225,13 +3205,14 @@ xfs_iflush_cluster(
* We need to check under the i_flags_lock for a valid inode
* here. Skip it if it is not valid or the wrong inode.
*/
- spin_lock(&ip->i_flags_lock);
- if (!ip->i_ino ||
+ spin_lock(&iq->i_flags_lock);
+ if (!iq->i_ino ||
+ __xfs_iflags_test(iq, XFS_ISTALE) ||
(XFS_INO_TO_AGINO(mp, iq->i_ino) & mask) != first_index) {
- spin_unlock(&ip->i_flags_lock);
+ spin_unlock(&iq->i_flags_lock);
continue;
}
- spin_unlock(&ip->i_flags_lock);
+ spin_unlock(&iq->i_flags_lock);
/*
* Do an un-protected check to see if the inode is dirty and
@@ -3313,7 +3294,7 @@ cluster_corrupt_out:
* mark it as stale and brelse.
*/
if (bp->b_iodone) {
- XFS_BUF_UNDONE(bp);
+ bp->b_flags &= ~XBF_DONE;
xfs_buf_stale(bp);
xfs_buf_ioerror(bp, -EIO);
xfs_buf_ioend(bp);
@@ -3347,7 +3328,7 @@ xfs_iflush(
struct xfs_buf **bpp)
{
struct xfs_mount *mp = ip->i_mount;
- struct xfs_buf *bp;
+ struct xfs_buf *bp = NULL;
struct xfs_dinode *dip;
int error;
@@ -3389,14 +3370,22 @@ xfs_iflush(
}
/*
- * Get the buffer containing the on-disk inode.
+ * Get the buffer containing the on-disk inode. We are doing a try-lock
+ * operation here, so we may get an EAGAIN error. In that case, we
+ * simply want to return with the inode still dirty.
+ *
+ * If we get any other error, we effectively have a corruption situation
+ * and we cannot flush the inode, so we treat it the same as failing
+ * xfs_iflush_int().
*/
error = xfs_imap_to_bp(mp, NULL, &ip->i_imap, &dip, &bp, XBF_TRYLOCK,
0);
- if (error || !bp) {
+ if (error == -EAGAIN) {
xfs_ifunlock(ip);
return error;
}
+ if (error)
+ goto corrupt_out;
/*
* First flush out the inode that xfs_iflush was called with.
@@ -3424,7 +3413,8 @@ xfs_iflush(
return 0;
corrupt_out:
- xfs_buf_relse(bp);
+ if (bp)
+ xfs_buf_relse(bp);
xfs_force_shutdown(mp, SHUTDOWN_CORRUPT_INCORE);
cluster_corrupt_out:
error = -EFSCORRUPTED;
@@ -3462,14 +3452,7 @@ xfs_iflush_int(
__func__, ip->i_ino, be16_to_cpu(dip->di_magic), dip);
goto corrupt_out;
}
- if (XFS_TEST_ERROR(ip->i_d.di_magic != XFS_DINODE_MAGIC,
- mp, XFS_ERRTAG_IFLUSH_2, XFS_RANDOM_IFLUSH_2)) {
- xfs_alert_tag(mp, XFS_PTAG_IFLUSH,
- "%s: Bad inode %Lu, ptr 0x%p, magic number 0x%x",
- __func__, ip->i_ino, ip, ip->i_d.di_magic);
- goto corrupt_out;
- }
- if (S_ISREG(ip->i_d.di_mode)) {
+ if (S_ISREG(VFS_I(ip)->i_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE),
@@ -3479,7 +3462,7 @@ xfs_iflush_int(
__func__, ip->i_ino, ip);
goto corrupt_out;
}
- } else if (S_ISDIR(ip->i_d.di_mode)) {
+ } else if (S_ISDIR(VFS_I(ip)->i_mode)) {
if (XFS_TEST_ERROR(
(ip->i_d.di_format != XFS_DINODE_FMT_EXTENTS) &&
(ip->i_d.di_format != XFS_DINODE_FMT_BTREE) &&
@@ -3523,12 +3506,11 @@ xfs_iflush_int(
ip->i_d.di_flushiter++;
/*
- * Copy the dirty parts of the inode into the on-disk
- * inode. We always copy out the core of the inode,
- * because if the inode is dirty at all the core must
- * be.
+ * Copy the dirty parts of the inode into the on-disk inode. We always
+ * copy out the core of the inode, because if the inode is dirty at all
+ * the core must be.
*/
- xfs_dinode_to_disk(dip, &ip->i_d);
+ xfs_inode_to_disk(ip, dip, iip->ili_item.li_lsn);
/* Wrap, we never let the log put out DI_MAX_FLUSH */
if (ip->i_d.di_flushiter == DI_MAX_FLUSH)
@@ -3580,10 +3562,6 @@ xfs_iflush_int(
*/
xfs_buf_attach_iodone(bp, xfs_iflush_done, &iip->ili_item);
- /* update the lsn in the on disk inode if required */
- if (ip->i_d.di_version == 3)
- dip->di_lsn = cpu_to_be64(iip->ili_item.li_lsn);
-
/* generate the checksum. */
xfs_dinode_calc_crc(mp, dip);
diff --git a/fs/xfs/xfs_inode.h b/fs/xfs/xfs_inode.h
index ca9e11989..43e1d51b1 100644
--- a/fs/xfs/xfs_inode.h
+++ b/fs/xfs/xfs_inode.h
@@ -63,7 +63,7 @@ typedef struct xfs_inode {
unsigned long i_flags; /* see defined flags below */
unsigned int i_delayed_blks; /* count of delay alloc blks */
- xfs_icdinode_t i_d; /* most of ondisk inode */
+ struct xfs_icdinode i_d; /* most of ondisk inode */
/* VFS inode */
struct inode i_vnode; /* embedded VFS inode */
@@ -88,7 +88,7 @@ static inline struct inode *VFS_I(struct xfs_inode *ip)
*/
static inline xfs_fsize_t XFS_ISIZE(struct xfs_inode *ip)
{
- if (S_ISREG(ip->i_d.di_mode))
+ if (S_ISREG(VFS_I(ip)->i_mode))
return i_size_read(VFS_I(ip));
return ip->i_d.di_size;
}
@@ -369,7 +369,7 @@ static inline int xfs_isiflocked(struct xfs_inode *ip)
*/
#define XFS_INHERIT_GID(pip) \
(((pip)->i_mount->m_flags & XFS_MOUNT_GRPID) || \
- ((pip)->i_d.di_mode & S_ISGID))
+ (VFS_I(pip)->i_mode & S_ISGID))
int xfs_release(struct xfs_inode *ip);
void xfs_inactive(struct xfs_inode *ip);
@@ -405,8 +405,6 @@ int xfs_ifree(struct xfs_trans *, xfs_inode_t *,
struct xfs_bmap_free *);
int xfs_itruncate_extents(struct xfs_trans **, struct xfs_inode *,
int, xfs_fsize_t);
-int xfs_iunlink(struct xfs_trans *, xfs_inode_t *);
-
void xfs_iext_realloc(xfs_inode_t *, int, int);
void xfs_iunpin_wait(xfs_inode_t *);
@@ -437,6 +435,8 @@ int xfs_update_prealloc_flags(struct xfs_inode *ip,
int xfs_zero_eof(struct xfs_inode *ip, xfs_off_t offset,
xfs_fsize_t isize, bool *did_zeroing);
int xfs_iozero(struct xfs_inode *ip, loff_t pos, size_t count);
+loff_t __xfs_seek_hole_data(struct inode *inode, loff_t start,
+ loff_t eof, int whence);
/* from xfs_iops.c */
diff --git a/fs/xfs/xfs_inode_item.c b/fs/xfs/xfs_inode_item.c
index d14b12b8c..c48b5b18d 100644
--- a/fs/xfs/xfs_inode_item.c
+++ b/fs/xfs/xfs_inode_item.c
@@ -135,7 +135,7 @@ xfs_inode_item_size(
*nvecs += 2;
*nbytes += sizeof(struct xfs_inode_log_format) +
- xfs_icdinode_size(ip->i_d.di_version);
+ xfs_log_dinode_size(ip->i_d.di_version);
xfs_inode_item_data_fork_size(iip, nvecs, nbytes);
if (XFS_IFORK_Q(ip))
@@ -322,6 +322,81 @@ xfs_inode_item_format_attr_fork(
}
}
+static void
+xfs_inode_to_log_dinode(
+ struct xfs_inode *ip,
+ struct xfs_log_dinode *to,
+ xfs_lsn_t lsn)
+{
+ struct xfs_icdinode *from = &ip->i_d;
+ struct inode *inode = VFS_I(ip);
+
+ to->di_magic = XFS_DINODE_MAGIC;
+
+ to->di_version = from->di_version;
+ to->di_format = from->di_format;
+ to->di_uid = from->di_uid;
+ to->di_gid = from->di_gid;
+ to->di_projid_lo = from->di_projid_lo;
+ to->di_projid_hi = from->di_projid_hi;
+
+ memset(to->di_pad, 0, sizeof(to->di_pad));
+ memset(to->di_pad3, 0, sizeof(to->di_pad3));
+ to->di_atime.t_sec = inode->i_atime.tv_sec;
+ to->di_atime.t_nsec = inode->i_atime.tv_nsec;
+ to->di_mtime.t_sec = inode->i_mtime.tv_sec;
+ to->di_mtime.t_nsec = inode->i_mtime.tv_nsec;
+ to->di_ctime.t_sec = inode->i_ctime.tv_sec;
+ to->di_ctime.t_nsec = inode->i_ctime.tv_nsec;
+ to->di_nlink = inode->i_nlink;
+ to->di_gen = inode->i_generation;
+ to->di_mode = inode->i_mode;
+
+ to->di_size = from->di_size;
+ to->di_nblocks = from->di_nblocks;
+ to->di_extsize = from->di_extsize;
+ to->di_nextents = from->di_nextents;
+ to->di_anextents = from->di_anextents;
+ to->di_forkoff = from->di_forkoff;
+ to->di_aformat = from->di_aformat;
+ to->di_dmevmask = from->di_dmevmask;
+ to->di_dmstate = from->di_dmstate;
+ to->di_flags = from->di_flags;
+
+ if (from->di_version == 3) {
+ to->di_changecount = inode->i_version;
+ to->di_crtime.t_sec = from->di_crtime.t_sec;
+ to->di_crtime.t_nsec = from->di_crtime.t_nsec;
+ to->di_flags2 = from->di_flags2;
+
+ to->di_ino = ip->i_ino;
+ to->di_lsn = lsn;
+ memset(to->di_pad2, 0, sizeof(to->di_pad2));
+ uuid_copy(&to->di_uuid, &ip->i_mount->m_sb.sb_meta_uuid);
+ to->di_flushiter = 0;
+ } else {
+ to->di_flushiter = from->di_flushiter;
+ }
+}
+
+/*
+ * Format the inode core. Current timestamp data is only in the VFS inode
+ * fields, so we need to grab them from there. Hence rather than just copying
+ * the XFS inode core structure, format the fields directly into the iovec.
+ */
+static void
+xfs_inode_item_format_core(
+ struct xfs_inode *ip,
+ struct xfs_log_vec *lv,
+ struct xfs_log_iovec **vecp)
+{
+ struct xfs_log_dinode *dic;
+
+ dic = xlog_prepare_iovec(lv, vecp, XLOG_REG_TYPE_ICORE);
+ xfs_inode_to_log_dinode(ip, dic, ip->i_itemp->ili_item.li_lsn);
+ xlog_finish_iovec(lv, *vecp, xfs_log_dinode_size(ip->i_d.di_version));
+}
+
/*
* This is called to fill in the vector of log iovecs for the given inode
* log item. It fills the first item with an inode log format structure,
@@ -351,10 +426,7 @@ xfs_inode_item_format(
ilf->ilf_size = 2; /* format + core */
xlog_finish_iovec(lv, vecp, sizeof(struct xfs_inode_log_format));
- xlog_copy_iovec(lv, &vecp, XLOG_REG_TYPE_ICORE,
- &ip->i_d,
- xfs_icdinode_size(ip->i_d.di_version));
-
+ xfs_inode_item_format_core(ip, lv, &vecp);
xfs_inode_item_format_data_fork(iip, ilf, lv, &vecp);
if (XFS_IFORK_Q(ip)) {
xfs_inode_item_format_attr_fork(iip, ilf, lv, &vecp);
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 478d04e07..bcb6c19ce 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -114,7 +114,7 @@ xfs_find_handle(
handle.ha_fid.fid_len = sizeof(xfs_fid_t) -
sizeof(handle.ha_fid.fid_len);
handle.ha_fid.fid_pad = 0;
- handle.ha_fid.fid_gen = ip->i_d.di_gen;
+ handle.ha_fid.fid_gen = inode->i_generation;
handle.ha_fid.fid_ino = ip->i_ino;
hsize = XFS_HSIZE(handle);
@@ -963,7 +963,7 @@ xfs_set_diflags(
di_flags |= XFS_DIFLAG_NODEFRAG;
if (xflags & FS_XFLAG_FILESTREAM)
di_flags |= XFS_DIFLAG_FILESTREAM;
- if (S_ISDIR(ip->i_d.di_mode)) {
+ if (S_ISDIR(VFS_I(ip)->i_mode)) {
if (xflags & FS_XFLAG_RTINHERIT)
di_flags |= XFS_DIFLAG_RTINHERIT;
if (xflags & FS_XFLAG_NOSYMLINKS)
@@ -972,7 +972,7 @@ xfs_set_diflags(
di_flags |= XFS_DIFLAG_EXTSZINHERIT;
if (xflags & FS_XFLAG_PROJINHERIT)
di_flags |= XFS_DIFLAG_PROJINHERIT;
- } else if (S_ISREG(ip->i_d.di_mode)) {
+ } else if (S_ISREG(VFS_I(ip)->i_mode)) {
if (xflags & FS_XFLAG_REALTIME)
di_flags |= XFS_DIFLAG_REALTIME;
if (xflags & FS_XFLAG_EXTSIZE)
@@ -1060,23 +1060,86 @@ xfs_ioctl_setattr_xflags(
}
/*
+ * If we are changing DAX flags, we have to ensure the file is clean and any
+ * cached objects in the address space are invalidated and removed. This
+ * requires us to lock out other IO and page faults similar to a truncate
+ * operation. The locks need to be held until the transaction has been committed
+ * so that the cache invalidation is atomic with respect to the DAX flag
+ * manipulation.
+ */
+static int
+xfs_ioctl_setattr_dax_invalidate(
+ struct xfs_inode *ip,
+ struct fsxattr *fa,
+ int *join_flags)
+{
+ struct inode *inode = VFS_I(ip);
+ int error;
+
+ *join_flags = 0;
+
+ /*
+ * It is only valid to set the DAX flag on regular files and
+ * directories on filesystems where the block size is equal to the page
+ * size. On directories it serves as an inherit hint.
+ */
+ if (fa->fsx_xflags & FS_XFLAG_DAX) {
+ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode)))
+ return -EINVAL;
+ if (ip->i_mount->m_sb.sb_blocksize != PAGE_SIZE)
+ return -EINVAL;
+ }
+
+ /* If the DAX state is not changing, we have nothing to do here. */
+ if ((fa->fsx_xflags & FS_XFLAG_DAX) && IS_DAX(inode))
+ return 0;
+ if (!(fa->fsx_xflags & FS_XFLAG_DAX) && !IS_DAX(inode))
+ return 0;
+
+ /* lock, flush and invalidate mapping in preparation for flag change */
+ xfs_ilock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
+ error = filemap_write_and_wait(inode->i_mapping);
+ if (error)
+ goto out_unlock;
+ error = invalidate_inode_pages2(inode->i_mapping);
+ if (error)
+ goto out_unlock;
+
+ *join_flags = XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL;
+ return 0;
+
+out_unlock:
+ xfs_iunlock(ip, XFS_MMAPLOCK_EXCL | XFS_IOLOCK_EXCL);
+ return error;
+
+}
+
+/*
* Set up the transaction structure for the setattr operation, checking that we
* have permission to do so. On success, return a clean transaction and the
* inode locked exclusively ready for further operation specific checks. On
* failure, return an error without modifying or locking the inode.
+ *
+ * The inode might already be IO locked on call. If this is the case, it is
+ * indicated in @join_flags and we take full responsibility for ensuring they
+ * are unlocked from now on. Hence if we have an error here, we still have to
+ * unlock them. Otherwise, once they are joined to the transaction, they will
+ * be unlocked on commit/cancel.
*/
static struct xfs_trans *
xfs_ioctl_setattr_get_trans(
- struct xfs_inode *ip)
+ struct xfs_inode *ip,
+ int join_flags)
{
struct xfs_mount *mp = ip->i_mount;
struct xfs_trans *tp;
- int error;
+ int error = -EROFS;
if (mp->m_flags & XFS_MOUNT_RDONLY)
- return ERR_PTR(-EROFS);
+ goto out_unlock;
+ error = -EIO;
if (XFS_FORCED_SHUTDOWN(mp))
- return ERR_PTR(-EIO);
+ goto out_unlock;
tp = xfs_trans_alloc(mp, XFS_TRANS_SETATTR_NOT_SIZE);
error = xfs_trans_reserve(tp, &M_RES(mp)->tr_ichange, 0, 0);
@@ -1084,7 +1147,8 @@ xfs_ioctl_setattr_get_trans(
goto out_cancel;
xfs_ilock(ip, XFS_ILOCK_EXCL);
- xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
+ xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL | join_flags);
+ join_flags = 0;
/*
* CAP_FOWNER overrides the following restrictions:
@@ -1104,6 +1168,9 @@ xfs_ioctl_setattr_get_trans(
out_cancel:
xfs_trans_cancel(tp);
+out_unlock:
+ if (join_flags)
+ xfs_iunlock(ip, join_flags);
return ERR_PTR(error);
}
@@ -1128,14 +1195,14 @@ xfs_ioctl_setattr_check_extsize(
{
struct xfs_mount *mp = ip->i_mount;
- if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(ip->i_d.di_mode))
+ if ((fa->fsx_xflags & FS_XFLAG_EXTSIZE) && !S_ISREG(VFS_I(ip)->i_mode))
return -EINVAL;
if ((fa->fsx_xflags & FS_XFLAG_EXTSZINHERIT) &&
- !S_ISDIR(ip->i_d.di_mode))
+ !S_ISDIR(VFS_I(ip)->i_mode))
return -EINVAL;
- if (S_ISREG(ip->i_d.di_mode) && ip->i_d.di_nextents &&
+ if (S_ISREG(VFS_I(ip)->i_mode) && ip->i_d.di_nextents &&
((ip->i_d.di_extsize << mp->m_sb.sb_blocklog) != fa->fsx_extsize))
return -EINVAL;
@@ -1202,6 +1269,7 @@ xfs_ioctl_setattr(
struct xfs_dquot *pdqp = NULL;
struct xfs_dquot *olddquot = NULL;
int code;
+ int join_flags = 0;
trace_xfs_ioctl_setattr(ip);
@@ -1225,7 +1293,18 @@ xfs_ioctl_setattr(
return code;
}
- tp = xfs_ioctl_setattr_get_trans(ip);
+ /*
+ * Changing DAX config may require inode locking for mapping
+ * invalidation. These need to be held all the way to transaction commit
+ * or cancel time, so need to be passed through to
+ * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
+ * appropriately.
+ */
+ code = xfs_ioctl_setattr_dax_invalidate(ip, fa, &join_flags);
+ if (code)
+ goto error_free_dquots;
+
+ tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
if (IS_ERR(tp)) {
code = PTR_ERR(tp);
goto error_free_dquots;
@@ -1256,9 +1335,9 @@ xfs_ioctl_setattr(
* successful return from chown()
*/
- if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+ if ((VFS_I(ip)->i_mode & (S_ISUID|S_ISGID)) &&
!capable_wrt_inode_uidgid(VFS_I(ip), CAP_FSETID))
- ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+ VFS_I(ip)->i_mode &= ~(S_ISUID|S_ISGID);
/* Change the ownerships and register project quota modifications */
if (xfs_get_projid(ip) != fa->fsx_projid) {
@@ -1341,6 +1420,7 @@ xfs_ioc_setxflags(
struct xfs_trans *tp;
struct fsxattr fa;
unsigned int flags;
+ int join_flags = 0;
int error;
if (copy_from_user(&flags, arg, sizeof(flags)))
@@ -1357,7 +1437,18 @@ xfs_ioc_setxflags(
if (error)
return error;
- tp = xfs_ioctl_setattr_get_trans(ip);
+ /*
+ * Changing DAX config may require inode locking for mapping
+ * invalidation. These need to be held all the way to transaction commit
+ * or cancel time, so need to be passed through to
+ * xfs_ioctl_setattr_get_trans() so it can apply them to the join call
+ * appropriately.
+ */
+ error = xfs_ioctl_setattr_dax_invalidate(ip, &fa, &join_flags);
+ if (error)
+ goto out_drop_write;
+
+ tp = xfs_ioctl_setattr_get_trans(ip, join_flags);
if (IS_ERR(tp)) {
error = PTR_ERR(tp);
goto out_drop_write;
diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c
index 76b71a1c6..fb7dc61f4 100644
--- a/fs/xfs/xfs_iops.c
+++ b/fs/xfs/xfs_iops.c
@@ -459,8 +459,8 @@ xfs_vn_getattr(
stat->size = XFS_ISIZE(ip);
stat->dev = inode->i_sb->s_dev;
- stat->mode = ip->i_d.di_mode;
- stat->nlink = ip->i_d.di_nlink;
+ stat->mode = inode->i_mode;
+ stat->nlink = inode->i_nlink;
stat->uid = inode->i_uid;
stat->gid = inode->i_gid;
stat->ino = ip->i_ino;
@@ -506,9 +506,6 @@ xfs_setattr_mode(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- ip->i_d.di_mode &= S_IFMT;
- ip->i_d.di_mode |= mode & ~S_IFMT;
-
inode->i_mode &= S_IFMT;
inode->i_mode |= mode & ~S_IFMT;
}
@@ -522,21 +519,12 @@ xfs_setattr_time(
ASSERT(xfs_isilocked(ip, XFS_ILOCK_EXCL));
- if (iattr->ia_valid & ATTR_ATIME) {
+ if (iattr->ia_valid & ATTR_ATIME)
inode->i_atime = iattr->ia_atime;
- ip->i_d.di_atime.t_sec = iattr->ia_atime.tv_sec;
- ip->i_d.di_atime.t_nsec = iattr->ia_atime.tv_nsec;
- }
- if (iattr->ia_valid & ATTR_CTIME) {
+ if (iattr->ia_valid & ATTR_CTIME)
inode->i_ctime = iattr->ia_ctime;
- ip->i_d.di_ctime.t_sec = iattr->ia_ctime.tv_sec;
- ip->i_d.di_ctime.t_nsec = iattr->ia_ctime.tv_nsec;
- }
- if (iattr->ia_valid & ATTR_MTIME) {
+ if (iattr->ia_valid & ATTR_MTIME)
inode->i_mtime = iattr->ia_mtime;
- ip->i_d.di_mtime.t_sec = iattr->ia_mtime.tv_sec;
- ip->i_d.di_mtime.t_nsec = iattr->ia_mtime.tv_nsec;
- }
}
int
@@ -661,9 +649,9 @@ xfs_setattr_nonsize(
* The set-user-ID and set-group-ID bits of a file will be
* cleared upon successful return from chown()
*/
- if ((ip->i_d.di_mode & (S_ISUID|S_ISGID)) &&
+ if ((inode->i_mode & (S_ISUID|S_ISGID)) &&
!capable(CAP_FSETID))
- ip->i_d.di_mode &= ~(S_ISUID|S_ISGID);
+ inode->i_mode &= ~(S_ISUID|S_ISGID);
/*
* Change the ownerships and register quota modifications
@@ -773,7 +761,7 @@ xfs_setattr_size(
ASSERT(xfs_isilocked(ip, XFS_IOLOCK_EXCL));
ASSERT(xfs_isilocked(ip, XFS_MMAPLOCK_EXCL));
- ASSERT(S_ISREG(ip->i_d.di_mode));
+ ASSERT(S_ISREG(inode->i_mode));
ASSERT((iattr->ia_valid & (ATTR_UID|ATTR_GID|ATTR_ATIME|ATTR_ATIME_SET|
ATTR_MTIME_SET|ATTR_KILL_PRIV|ATTR_TIMES_SET)) == 0);
@@ -991,21 +979,13 @@ xfs_vn_update_time(
}
xfs_ilock(ip, XFS_ILOCK_EXCL);
- if (flags & S_CTIME) {
+ if (flags & S_CTIME)
inode->i_ctime = *now;
- ip->i_d.di_ctime.t_sec = (__int32_t)now->tv_sec;
- ip->i_d.di_ctime.t_nsec = (__int32_t)now->tv_nsec;
- }
- if (flags & S_MTIME) {
+ if (flags & S_MTIME)
inode->i_mtime = *now;
- ip->i_d.di_mtime.t_sec = (__int32_t)now->tv_sec;
- ip->i_d.di_mtime.t_nsec = (__int32_t)now->tv_nsec;
- }
- if (flags & S_ATIME) {
+ if (flags & S_ATIME)
inode->i_atime = *now;
- ip->i_d.di_atime.t_sec = (__int32_t)now->tv_sec;
- ip->i_d.di_atime.t_nsec = (__int32_t)now->tv_nsec;
- }
+
xfs_trans_ijoin(tp, ip, XFS_ILOCK_EXCL);
xfs_trans_log_inode(tp, ip, XFS_ILOG_TIMESTAMP);
return xfs_trans_commit(tp);
@@ -1205,8 +1185,10 @@ xfs_diflags_to_iflags(
inode->i_flags |= S_SYNC;
if (flags & XFS_DIFLAG_NOATIME)
inode->i_flags |= S_NOATIME;
- if (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
- ip->i_d.di_flags2 & XFS_DIFLAG2_DAX)
+ if (S_ISREG(inode->i_mode) &&
+ ip->i_mount->m_sb.sb_blocksize == PAGE_SIZE &&
+ (ip->i_mount->m_flags & XFS_MOUNT_DAX ||
+ ip->i_d.di_flags2 & XFS_DIFLAG2_DAX))
inode->i_flags |= S_DAX;
}
@@ -1232,8 +1214,6 @@ xfs_setup_inode(
/* make the inode look hashed for the writeback code */
hlist_add_fake(&inode->i_hash);
- inode->i_mode = ip->i_d.di_mode;
- set_nlink(inode, ip->i_d.di_nlink);
inode->i_uid = xfs_uid_to_kuid(ip->i_d.di_uid);
inode->i_gid = xfs_gid_to_kgid(ip->i_d.di_gid);
@@ -1249,14 +1229,7 @@ xfs_setup_inode(
break;
}
- inode->i_generation = ip->i_d.di_gen;
i_size_write(inode, ip->i_d.di_size);
- inode->i_atime.tv_sec = ip->i_d.di_atime.t_sec;
- inode->i_atime.tv_nsec = ip->i_d.di_atime.t_nsec;
- inode->i_mtime.tv_sec = ip->i_d.di_mtime.t_sec;
- inode->i_mtime.tv_nsec = ip->i_d.di_mtime.t_nsec;
- inode->i_ctime.tv_sec = ip->i_d.di_ctime.t_sec;
- inode->i_ctime.tv_nsec = ip->i_d.di_ctime.t_nsec;
xfs_diflags_to_iflags(inode, ip);
ip->d_ops = ip->i_mount->m_nondir_inode_ops;
diff --git a/fs/xfs/xfs_itable.c b/fs/xfs/xfs_itable.c
index 930ebd86b..ce73eb346 100644
--- a/fs/xfs/xfs_itable.c
+++ b/fs/xfs/xfs_itable.c
@@ -57,6 +57,7 @@ xfs_bulkstat_one_int(
{
struct xfs_icdinode *dic; /* dinode core info pointer */
struct xfs_inode *ip; /* incore inode pointer */
+ struct inode *inode;
struct xfs_bstat *buf; /* return buffer */
int error = 0; /* error value */
@@ -77,30 +78,33 @@ xfs_bulkstat_one_int(
ASSERT(ip != NULL);
ASSERT(ip->i_imap.im_blkno != 0);
+ inode = VFS_I(ip);
dic = &ip->i_d;
/* xfs_iget returns the following without needing
* further change.
*/
- buf->bs_nlink = dic->di_nlink;
buf->bs_projid_lo = dic->di_projid_lo;
buf->bs_projid_hi = dic->di_projid_hi;
buf->bs_ino = ino;
- buf->bs_mode = dic->di_mode;
buf->bs_uid = dic->di_uid;
buf->bs_gid = dic->di_gid;
buf->bs_size = dic->di_size;
- buf->bs_atime.tv_sec = dic->di_atime.t_sec;
- buf->bs_atime.tv_nsec = dic->di_atime.t_nsec;
- buf->bs_mtime.tv_sec = dic->di_mtime.t_sec;
- buf->bs_mtime.tv_nsec = dic->di_mtime.t_nsec;
- buf->bs_ctime.tv_sec = dic->di_ctime.t_sec;
- buf->bs_ctime.tv_nsec = dic->di_ctime.t_nsec;
+
+ buf->bs_nlink = inode->i_nlink;
+ buf->bs_atime.tv_sec = inode->i_atime.tv_sec;
+ buf->bs_atime.tv_nsec = inode->i_atime.tv_nsec;
+ buf->bs_mtime.tv_sec = inode->i_mtime.tv_sec;
+ buf->bs_mtime.tv_nsec = inode->i_mtime.tv_nsec;
+ buf->bs_ctime.tv_sec = inode->i_ctime.tv_sec;
+ buf->bs_ctime.tv_nsec = inode->i_ctime.tv_nsec;
+ buf->bs_gen = inode->i_generation;
+ buf->bs_mode = inode->i_mode;
+
buf->bs_xflags = xfs_ip2xflags(ip);
buf->bs_extsize = dic->di_extsize << mp->m_sb.sb_blocklog;
buf->bs_extents = dic->di_nextents;
- buf->bs_gen = dic->di_gen;
memset(buf->bs_pad, 0, sizeof(buf->bs_pad));
buf->bs_dmevmask = dic->di_dmevmask;
buf->bs_dmstate = dic->di_dmstate;
diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h
index ec0e239a0..a8192dc79 100644
--- a/fs/xfs/xfs_linux.h
+++ b/fs/xfs/xfs_linux.h
@@ -135,7 +135,7 @@ typedef __u32 xfs_nlink_t;
* Size of block device i/o is parameterized here.
* Currently the system supports page-sized i/o.
*/
-#define BLKDEV_IOSHIFT PAGE_CACHE_SHIFT
+#define BLKDEV_IOSHIFT PAGE_SHIFT
#define BLKDEV_IOSIZE (1<<BLKDEV_IOSHIFT)
/* number of BB's per block device block */
#define BLKDEV_BB BTOBB(BLKDEV_IOSIZE)
diff --git a/fs/xfs/xfs_log.c b/fs/xfs/xfs_log.c
index 9c9a1c9bc..b49ccf5c1 100644
--- a/fs/xfs/xfs_log.c
+++ b/fs/xfs/xfs_log.c
@@ -1212,7 +1212,7 @@ xlog_iodone(xfs_buf_t *bp)
}
/* log I/O is always issued ASYNC */
- ASSERT(XFS_BUF_ISASYNC(bp));
+ ASSERT(bp->b_flags & XBF_ASYNC);
xlog_state_done_syncing(iclog, aborted);
/*
@@ -1864,9 +1864,8 @@ xlog_sync(
bp->b_io_length = BTOBB(count);
bp->b_fspriv = iclog;
- XFS_BUF_ZEROFLAGS(bp);
- XFS_BUF_ASYNC(bp);
- bp->b_flags |= XBF_SYNCIO;
+ bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
+ bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER) {
bp->b_flags |= XBF_FUA;
@@ -1893,12 +1892,11 @@ xlog_sync(
/* account for log which doesn't start at block #0 */
XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
+
/*
* Don't call xfs_bwrite here. We do log-syncs even when the filesystem
* is shutting down.
*/
- XFS_BUF_WRITE(bp);
-
error = xlog_bdstrat(bp);
if (error) {
xfs_buf_ioerror_alert(bp, "xlog_sync");
@@ -1910,9 +1908,8 @@ xlog_sync(
xfs_buf_associate_memory(bp,
(char *)&iclog->ic_header + count, split);
bp->b_fspriv = iclog;
- XFS_BUF_ZEROFLAGS(bp);
- XFS_BUF_ASYNC(bp);
- bp->b_flags |= XBF_SYNCIO;
+ bp->b_flags &= ~(XBF_FUA | XBF_FLUSH);
+ bp->b_flags |= (XBF_ASYNC | XBF_SYNCIO | XBF_WRITE);
if (log->l_mp->m_flags & XFS_MOUNT_BARRIER)
bp->b_flags |= XBF_FUA;
@@ -1921,7 +1918,6 @@ xlog_sync(
/* account for internal log which doesn't start at block #0 */
XFS_BUF_SET_ADDR(bp, XFS_BUF_ADDR(bp) + log->l_logBBstart);
- XFS_BUF_WRITE(bp);
error = xlog_bdstrat(bp);
if (error) {
xfs_buf_ioerror_alert(bp, "xlog_sync (split)");
@@ -2012,77 +2008,81 @@ xlog_print_tic_res(
uint ophdr_spc = ticket->t_res_num_ophdrs * (uint)sizeof(xlog_op_header_t);
/* match with XLOG_REG_TYPE_* in xfs_log.h */
- static char *res_type_str[XLOG_REG_TYPE_MAX] = {
- "bformat",
- "bchunk",
- "efi_format",
- "efd_format",
- "iformat",
- "icore",
- "iext",
- "ibroot",
- "ilocal",
- "iattr_ext",
- "iattr_broot",
- "iattr_local",
- "qformat",
- "dquot",
- "quotaoff",
- "LR header",
- "unmount",
- "commit",
- "trans header"
+#define REG_TYPE_STR(type, str) [XLOG_REG_TYPE_##type] = str
+ static char *res_type_str[XLOG_REG_TYPE_MAX + 1] = {
+ REG_TYPE_STR(BFORMAT, "bformat"),
+ REG_TYPE_STR(BCHUNK, "bchunk"),
+ REG_TYPE_STR(EFI_FORMAT, "efi_format"),
+ REG_TYPE_STR(EFD_FORMAT, "efd_format"),
+ REG_TYPE_STR(IFORMAT, "iformat"),
+ REG_TYPE_STR(ICORE, "icore"),
+ REG_TYPE_STR(IEXT, "iext"),
+ REG_TYPE_STR(IBROOT, "ibroot"),
+ REG_TYPE_STR(ILOCAL, "ilocal"),
+ REG_TYPE_STR(IATTR_EXT, "iattr_ext"),
+ REG_TYPE_STR(IATTR_BROOT, "iattr_broot"),
+ REG_TYPE_STR(IATTR_LOCAL, "iattr_local"),
+ REG_TYPE_STR(QFORMAT, "qformat"),
+ REG_TYPE_STR(DQUOT, "dquot"),
+ REG_TYPE_STR(QUOTAOFF, "quotaoff"),
+ REG_TYPE_STR(LRHEADER, "LR header"),
+ REG_TYPE_STR(UNMOUNT, "unmount"),
+ REG_TYPE_STR(COMMIT, "commit"),
+ REG_TYPE_STR(TRANSHDR, "trans header"),
+ REG_TYPE_STR(ICREATE, "inode create")
};
+#undef REG_TYPE_STR
+#define TRANS_TYPE_STR(type) [XFS_TRANS_##type] = #type
static char *trans_type_str[XFS_TRANS_TYPE_MAX] = {
- "SETATTR_NOT_SIZE",
- "SETATTR_SIZE",
- "INACTIVE",
- "CREATE",
- "CREATE_TRUNC",
- "TRUNCATE_FILE",
- "REMOVE",
- "LINK",
- "RENAME",
- "MKDIR",
- "RMDIR",
- "SYMLINK",
- "SET_DMATTRS",
- "GROWFS",
- "STRAT_WRITE",
- "DIOSTRAT",
- "WRITE_SYNC",
- "WRITEID",
- "ADDAFORK",
- "ATTRINVAL",
- "ATRUNCATE",
- "ATTR_SET",
- "ATTR_RM",
- "ATTR_FLAG",
- "CLEAR_AGI_BUCKET",
- "QM_SBCHANGE",
- "DUMMY1",
- "DUMMY2",
- "QM_QUOTAOFF",
- "QM_DQALLOC",
- "QM_SETQLIM",
- "QM_DQCLUSTER",
- "QM_QINOCREATE",
- "QM_QUOTAOFF_END",
- "FSYNC_TS",
- "GROWFSRT_ALLOC",
- "GROWFSRT_ZERO",
- "GROWFSRT_FREE",
- "SWAPEXT",
- "CHECKPOINT",
- "ICREATE",
- "CREATE_TMPFILE"
+ TRANS_TYPE_STR(SETATTR_NOT_SIZE),
+ TRANS_TYPE_STR(SETATTR_SIZE),
+ TRANS_TYPE_STR(INACTIVE),
+ TRANS_TYPE_STR(CREATE),
+ TRANS_TYPE_STR(CREATE_TRUNC),
+ TRANS_TYPE_STR(TRUNCATE_FILE),
+ TRANS_TYPE_STR(REMOVE),
+ TRANS_TYPE_STR(LINK),
+ TRANS_TYPE_STR(RENAME),
+ TRANS_TYPE_STR(MKDIR),
+ TRANS_TYPE_STR(RMDIR),
+ TRANS_TYPE_STR(SYMLINK),
+ TRANS_TYPE_STR(SET_DMATTRS),
+ TRANS_TYPE_STR(GROWFS),
+ TRANS_TYPE_STR(STRAT_WRITE),
+ TRANS_TYPE_STR(DIOSTRAT),
+ TRANS_TYPE_STR(WRITEID),
+ TRANS_TYPE_STR(ADDAFORK),
+ TRANS_TYPE_STR(ATTRINVAL),
+ TRANS_TYPE_STR(ATRUNCATE),
+ TRANS_TYPE_STR(ATTR_SET),
+ TRANS_TYPE_STR(ATTR_RM),
+ TRANS_TYPE_STR(ATTR_FLAG),
+ TRANS_TYPE_STR(CLEAR_AGI_BUCKET),
+ TRANS_TYPE_STR(SB_CHANGE),
+ TRANS_TYPE_STR(DUMMY1),
+ TRANS_TYPE_STR(DUMMY2),
+ TRANS_TYPE_STR(QM_QUOTAOFF),
+ TRANS_TYPE_STR(QM_DQALLOC),
+ TRANS_TYPE_STR(QM_SETQLIM),
+ TRANS_TYPE_STR(QM_DQCLUSTER),
+ TRANS_TYPE_STR(QM_QINOCREATE),
+ TRANS_TYPE_STR(QM_QUOTAOFF_END),
+ TRANS_TYPE_STR(FSYNC_TS),
+ TRANS_TYPE_STR(GROWFSRT_ALLOC),
+ TRANS_TYPE_STR(GROWFSRT_ZERO),
+ TRANS_TYPE_STR(GROWFSRT_FREE),
+ TRANS_TYPE_STR(SWAPEXT),
+ TRANS_TYPE_STR(CHECKPOINT),
+ TRANS_TYPE_STR(ICREATE),
+ TRANS_TYPE_STR(CREATE_TMPFILE)
};
+#undef TRANS_TYPE_STR
xfs_warn(mp, "xlog_write: reservation summary:");
xfs_warn(mp, " trans type = %s (%u)",
((ticket->t_trans_type <= 0 ||
ticket->t_trans_type > XFS_TRANS_TYPE_MAX) ?
- "bad-trans-type" : trans_type_str[ticket->t_trans_type-1]),
+ "bad-trans-type" : trans_type_str[ticket->t_trans_type]),
ticket->t_trans_type);
xfs_warn(mp, " unit res = %d bytes",
ticket->t_unit_res);
@@ -2101,7 +2101,7 @@ xlog_print_tic_res(
uint r_type = ticket->t_res_arr[i].r_type;
xfs_warn(mp, "region[%u]: %s - %u bytes", i,
((r_type <= 0 || r_type > XLOG_REG_TYPE_MAX) ?
- "bad-rtype" : res_type_str[r_type-1]),
+ "bad-rtype" : res_type_str[r_type]),
ticket->t_res_arr[i].r_len);
}
@@ -3979,7 +3979,7 @@ xfs_log_force_umount(
log->l_flags & XLOG_ACTIVE_RECOVERY) {
mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
if (mp->m_sb_bp)
- XFS_BUF_DONE(mp->m_sb_bp);
+ mp->m_sb_bp->b_flags |= XBF_DONE;
return 0;
}
@@ -4009,7 +4009,7 @@ xfs_log_force_umount(
spin_lock(&log->l_icloglock);
mp->m_flags |= XFS_MOUNT_FS_SHUTDOWN;
if (mp->m_sb_bp)
- XFS_BUF_DONE(mp->m_sb_bp);
+ mp->m_sb_bp->b_flags |= XBF_DONE;
/*
* Mark the log and the iclogs with IO error flags to prevent any
diff --git a/fs/xfs/xfs_log_recover.c b/fs/xfs/xfs_log_recover.c
index be5568839..396565f43 100644
--- a/fs/xfs/xfs_log_recover.c
+++ b/fs/xfs/xfs_log_recover.c
@@ -190,7 +190,7 @@ xlog_bread_noalign(
ASSERT(nbblks <= bp->b_length);
XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
- XFS_BUF_READ(bp);
+ bp->b_flags |= XBF_READ;
bp->b_io_length = nbblks;
bp->b_error = 0;
@@ -275,7 +275,6 @@ xlog_bwrite(
ASSERT(nbblks <= bp->b_length);
XFS_BUF_SET_ADDR(bp, log->l_logBBstart + blk_no);
- XFS_BUF_ZEROFLAGS(bp);
xfs_buf_hold(bp);
xfs_buf_lock(bp);
bp->b_io_length = nbblks;
@@ -2538,6 +2537,13 @@ xlog_recover_validate_buf_type(
}
bp->b_ops = &xfs_sb_buf_ops;
break;
+#ifdef CONFIG_XFS_RT
+ case XFS_BLFT_RTBITMAP_BUF:
+ case XFS_BLFT_RTSUMMARY_BUF:
+ /* no magic numbers for verification of RT buffers */
+ bp->b_ops = &xfs_rtbuf_ops;
+ break;
+#endif /* CONFIG_XFS_RT */
default:
xfs_warn(mp, "Unknown buffer type %d!",
xfs_blft_from_flags(buf_f));
@@ -2858,7 +2864,7 @@ xfs_recover_inode_owner_change(
return -ENOMEM;
/* instantiate the inode */
- xfs_dinode_from_disk(&ip->i_d, dip);
+ xfs_inode_from_disk(ip, dip);
ASSERT(ip->i_d.di_version >= 3);
error = xfs_iformat_fork(ip, dip);
@@ -2904,7 +2910,7 @@ xlog_recover_inode_pass2(
int error;
int attr_index;
uint fields;
- xfs_icdinode_t *dicp;
+ struct xfs_log_dinode *ldip;
uint isize;
int need_free = 0;
@@ -2957,8 +2963,8 @@ xlog_recover_inode_pass2(
error = -EFSCORRUPTED;
goto out_release;
}
- dicp = item->ri_buf[1].i_addr;
- if (unlikely(dicp->di_magic != XFS_DINODE_MAGIC)) {
+ ldip = item->ri_buf[1].i_addr;
+ if (unlikely(ldip->di_magic != XFS_DINODE_MAGIC)) {
xfs_alert(mp,
"%s: Bad inode log record, rec ptr 0x%p, ino %Ld",
__func__, item, in_f->ilf_ino);
@@ -2994,13 +3000,13 @@ xlog_recover_inode_pass2(
* to skip replay when the on disk inode is newer than the log one
*/
if (!xfs_sb_version_hascrc(&mp->m_sb) &&
- dicp->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
+ ldip->di_flushiter < be16_to_cpu(dip->di_flushiter)) {
/*
* Deal with the wrap case, DI_MAX_FLUSH is less
* than smaller numbers
*/
if (be16_to_cpu(dip->di_flushiter) == DI_MAX_FLUSH &&
- dicp->di_flushiter < (DI_MAX_FLUSH >> 1)) {
+ ldip->di_flushiter < (DI_MAX_FLUSH >> 1)) {
/* do nothing */
} else {
trace_xfs_log_recover_inode_skip(log, in_f);
@@ -3010,13 +3016,13 @@ xlog_recover_inode_pass2(
}
/* Take the opportunity to reset the flush iteration count */
- dicp->di_flushiter = 0;
+ ldip->di_flushiter = 0;
- if (unlikely(S_ISREG(dicp->di_mode))) {
- if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
- (dicp->di_format != XFS_DINODE_FMT_BTREE)) {
+ if (unlikely(S_ISREG(ldip->di_mode))) {
+ if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+ (ldip->di_format != XFS_DINODE_FMT_BTREE)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(3)",
- XFS_ERRLEVEL_LOW, mp, dicp);
+ XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad regular inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
@@ -3024,12 +3030,12 @@ xlog_recover_inode_pass2(
error = -EFSCORRUPTED;
goto out_release;
}
- } else if (unlikely(S_ISDIR(dicp->di_mode))) {
- if ((dicp->di_format != XFS_DINODE_FMT_EXTENTS) &&
- (dicp->di_format != XFS_DINODE_FMT_BTREE) &&
- (dicp->di_format != XFS_DINODE_FMT_LOCAL)) {
+ } else if (unlikely(S_ISDIR(ldip->di_mode))) {
+ if ((ldip->di_format != XFS_DINODE_FMT_EXTENTS) &&
+ (ldip->di_format != XFS_DINODE_FMT_BTREE) &&
+ (ldip->di_format != XFS_DINODE_FMT_LOCAL)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(4)",
- XFS_ERRLEVEL_LOW, mp, dicp);
+ XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad dir inode log record, rec ptr 0x%p, "
"ino ptr = 0x%p, ino bp = 0x%p, ino %Ld",
@@ -3038,32 +3044,32 @@ xlog_recover_inode_pass2(
goto out_release;
}
}
- if (unlikely(dicp->di_nextents + dicp->di_anextents > dicp->di_nblocks)){
+ if (unlikely(ldip->di_nextents + ldip->di_anextents > ldip->di_nblocks)){
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(5)",
- XFS_ERRLEVEL_LOW, mp, dicp);
+ XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
"dino bp 0x%p, ino %Ld, total extents = %d, nblocks = %Ld",
__func__, item, dip, bp, in_f->ilf_ino,
- dicp->di_nextents + dicp->di_anextents,
- dicp->di_nblocks);
+ ldip->di_nextents + ldip->di_anextents,
+ ldip->di_nblocks);
error = -EFSCORRUPTED;
goto out_release;
}
- if (unlikely(dicp->di_forkoff > mp->m_sb.sb_inodesize)) {
+ if (unlikely(ldip->di_forkoff > mp->m_sb.sb_inodesize)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(6)",
- XFS_ERRLEVEL_LOW, mp, dicp);
+ XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad inode log record, rec ptr 0x%p, dino ptr 0x%p, "
"dino bp 0x%p, ino %Ld, forkoff 0x%x", __func__,
- item, dip, bp, in_f->ilf_ino, dicp->di_forkoff);
+ item, dip, bp, in_f->ilf_ino, ldip->di_forkoff);
error = -EFSCORRUPTED;
goto out_release;
}
- isize = xfs_icdinode_size(dicp->di_version);
+ isize = xfs_log_dinode_size(ldip->di_version);
if (unlikely(item->ri_buf[1].i_len > isize)) {
XFS_CORRUPTION_ERROR("xlog_recover_inode_pass2(7)",
- XFS_ERRLEVEL_LOW, mp, dicp);
+ XFS_ERRLEVEL_LOW, mp, ldip);
xfs_alert(mp,
"%s: Bad inode log record length %d, rec ptr 0x%p",
__func__, item->ri_buf[1].i_len, item);
@@ -3071,8 +3077,8 @@ xlog_recover_inode_pass2(
goto out_release;
}
- /* The core is in in-core format */
- xfs_dinode_to_disk(dip, dicp);
+ /* recover the log dinode inode into the on disk inode */
+ xfs_log_dinode_to_disk(ldip, dip);
/* the rest is in on-disk format */
if (item->ri_buf[1].i_len > isize) {
@@ -4402,8 +4408,8 @@ xlog_recover_process_one_iunlink(
if (error)
goto fail_iput;
- ASSERT(ip->i_d.di_nlink == 0);
- ASSERT(ip->i_d.di_mode != 0);
+ ASSERT(VFS_I(ip)->i_nlink == 0);
+ ASSERT(VFS_I(ip)->i_mode != 0);
/* setup for the next pass */
agino = be32_to_cpu(dip->di_next_unlinked);
@@ -4957,6 +4963,7 @@ xlog_do_recover(
xfs_daddr_t head_blk,
xfs_daddr_t tail_blk)
{
+ struct xfs_mount *mp = log->l_mp;
int error;
xfs_buf_t *bp;
xfs_sb_t *sbp;
@@ -4971,7 +4978,7 @@ xlog_do_recover(
/*
* If IO errors happened during recovery, bail out.
*/
- if (XFS_FORCED_SHUTDOWN(log->l_mp)) {
+ if (XFS_FORCED_SHUTDOWN(mp)) {
return -EIO;
}
@@ -4984,22 +4991,21 @@ xlog_do_recover(
* or iunlinks they will have some entries in the AIL; so we look at
* the AIL to determine how to set the tail_lsn.
*/
- xlog_assign_tail_lsn(log->l_mp);
+ xlog_assign_tail_lsn(mp);
/*
* Now that we've finished replaying all buffer and inode
* updates, re-read in the superblock and reverify it.
*/
- bp = xfs_getsb(log->l_mp, 0);
- XFS_BUF_UNDONE(bp);
- ASSERT(!(XFS_BUF_ISWRITE(bp)));
- XFS_BUF_READ(bp);
- XFS_BUF_UNASYNC(bp);
+ bp = xfs_getsb(mp, 0);
+ bp->b_flags &= ~(XBF_DONE | XBF_ASYNC);
+ ASSERT(!(bp->b_flags & XBF_WRITE));
+ bp->b_flags |= XBF_READ;
bp->b_ops = &xfs_sb_buf_ops;
error = xfs_buf_submit_wait(bp);
if (error) {
- if (!XFS_FORCED_SHUTDOWN(log->l_mp)) {
+ if (!XFS_FORCED_SHUTDOWN(mp)) {
xfs_buf_ioerror_alert(bp, __func__);
ASSERT(0);
}
@@ -5008,14 +5014,17 @@ xlog_do_recover(
}
/* Convert superblock from on-disk format */
- sbp = &log->l_mp->m_sb;
+ sbp = &mp->m_sb;
xfs_sb_from_disk(sbp, XFS_BUF_TO_SBP(bp));
- ASSERT(sbp->sb_magicnum == XFS_SB_MAGIC);
- ASSERT(xfs_sb_good_version(sbp));
- xfs_reinit_percpu_counters(log->l_mp);
-
xfs_buf_relse(bp);
+ /* re-initialise in-core superblock and geometry structures */
+ xfs_reinit_percpu_counters(mp);
+ error = xfs_initialize_perag(mp, sbp->sb_agcount, &mp->m_maxagi);
+ if (error) {
+ xfs_warn(mp, "Failed post-recovery per-ag init: %d", error);
+ return error;
+ }
xlog_recover_check_summary(log);
diff --git a/fs/xfs/xfs_mount.c b/fs/xfs/xfs_mount.c
index bb753b359..cfd4210dd 100644
--- a/fs/xfs/xfs_mount.c
+++ b/fs/xfs/xfs_mount.c
@@ -171,7 +171,7 @@ xfs_sb_validate_fsb_count(
ASSERT(sbp->sb_blocklog >= BBSHIFT);
/* Limited by ULONG_MAX of page cache index */
- if (nblocks >> (PAGE_CACHE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
+ if (nblocks >> (PAGE_SHIFT - sbp->sb_blocklog) > ULONG_MAX)
return -EFBIG;
return 0;
}
@@ -185,9 +185,6 @@ xfs_initialize_perag(
xfs_agnumber_t index;
xfs_agnumber_t first_initialised = 0;
xfs_perag_t *pag;
- xfs_agino_t agino;
- xfs_ino_t ino;
- xfs_sb_t *sbp = &mp->m_sb;
int error = -ENOMEM;
/*
@@ -230,22 +227,7 @@ xfs_initialize_perag(
radix_tree_preload_end();
}
- /*
- * If we mount with the inode64 option, or no inode overflows
- * the legacy 32-bit address space clear the inode32 option.
- */
- agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
- ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
-
- if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
- mp->m_flags |= XFS_MOUNT_32BITINODES;
- else
- mp->m_flags &= ~XFS_MOUNT_32BITINODES;
-
- if (mp->m_flags & XFS_MOUNT_32BITINODES)
- index = xfs_set_inode32(mp, agcount);
- else
- index = xfs_set_inode64(mp, agcount);
+ index = xfs_set_inode_alloc(mp, agcount);
if (maxagi)
*maxagi = index;
@@ -865,7 +847,7 @@ xfs_mountfs(
ASSERT(rip != NULL);
- if (unlikely(!S_ISDIR(rip->i_d.di_mode))) {
+ if (unlikely(!S_ISDIR(VFS_I(rip)->i_mode))) {
xfs_warn(mp, "corrupted root inode %llu: not a directory",
(unsigned long long)rip->i_ino);
xfs_iunlock(rip, XFS_ILOCK_EXCL);
@@ -1284,7 +1266,7 @@ xfs_getsb(
}
xfs_buf_hold(bp);
- ASSERT(XFS_BUF_ISDONE(bp));
+ ASSERT(bp->b_flags & XBF_DONE);
return bp;
}
diff --git a/fs/xfs/xfs_mount.h b/fs/xfs/xfs_mount.h
index b57098481..eafe257b3 100644
--- a/fs/xfs/xfs_mount.h
+++ b/fs/xfs/xfs_mount.h
@@ -147,6 +147,17 @@ typedef struct xfs_mount {
* to various other kinds of pain inflicted on the pNFS server.
*/
__uint32_t m_generation;
+
+#ifdef DEBUG
+ /*
+ * DEBUG mode instrumentation to test and/or trigger delayed allocation
+ * block killing in the event of failed writes. When enabled, all
+ * buffered writes are forced to fail. All delalloc blocks in the range
+ * of the write (including pre-existing delalloc blocks!) are tossed as
+ * part of the write failure error handling sequence.
+ */
+ bool m_fail_writes;
+#endif
} xfs_mount_t;
/*
@@ -166,9 +177,8 @@ typedef struct xfs_mount {
#define XFS_MOUNT_GRPID (1ULL << 9) /* group-ID assigned from directory */
#define XFS_MOUNT_NORECOVERY (1ULL << 10) /* no recovery - dirty fs */
#define XFS_MOUNT_DFLT_IOSIZE (1ULL << 12) /* set default i/o size */
-#define XFS_MOUNT_32BITINODES (1ULL << 14) /* do not create inodes above
- * 32 bits in size */
-#define XFS_MOUNT_SMALL_INUMS (1ULL << 15) /* users wants 32bit inodes */
+#define XFS_MOUNT_SMALL_INUMS (1ULL << 14) /* user wants 32bit inodes */
+#define XFS_MOUNT_32BITINODES (1ULL << 15) /* inode32 allocator active */
#define XFS_MOUNT_NOUUID (1ULL << 16) /* ignore uuid during mount */
#define XFS_MOUNT_BARRIER (1ULL << 17)
#define XFS_MOUNT_IKEEP (1ULL << 18) /* keep empty inode clusters*/
@@ -221,12 +231,12 @@ static inline unsigned long
xfs_preferred_iosize(xfs_mount_t *mp)
{
if (mp->m_flags & XFS_MOUNT_COMPAT_IOSIZE)
- return PAGE_CACHE_SIZE;
+ return PAGE_SIZE;
return (mp->m_swidth ?
(mp->m_swidth << mp->m_sb.sb_blocklog) :
((mp->m_flags & XFS_MOUNT_DFLT_IOSIZE) ?
(1 << (int)MAX(mp->m_readio_log, mp->m_writeio_log)) :
- PAGE_CACHE_SIZE));
+ PAGE_SIZE));
}
#define XFS_LAST_UNMOUNT_WAS_CLEAN(mp) \
@@ -264,6 +274,20 @@ xfs_daddr_to_agbno(struct xfs_mount *mp, xfs_daddr_t d)
return (xfs_agblock_t) do_div(ld, mp->m_sb.sb_agblocks);
}
+#ifdef DEBUG
+static inline bool
+xfs_mp_fail_writes(struct xfs_mount *mp)
+{
+ return mp->m_fail_writes;
+}
+#else
+static inline bool
+xfs_mp_fail_writes(struct xfs_mount *mp)
+{
+ return 0;
+}
+#endif
+
/*
* Per-ag incore structure, copies of information in agf and agi, to improve the
* performance of allocation group selection.
@@ -327,7 +351,6 @@ extern int xfs_mod_fdblocks(struct xfs_mount *mp, int64_t delta,
bool reserved);
extern int xfs_mod_frextents(struct xfs_mount *mp, int64_t delta);
-extern int xfs_mount_log_sb(xfs_mount_t *);
extern struct xfs_buf *xfs_getsb(xfs_mount_t *, int);
extern int xfs_readsb(xfs_mount_t *, int);
extern void xfs_freesb(xfs_mount_t *);
diff --git a/fs/xfs/xfs_ondisk.h b/fs/xfs/xfs_ondisk.h
new file mode 100644
index 000000000..184c44eff
--- /dev/null
+++ b/fs/xfs/xfs_ondisk.h
@@ -0,0 +1,117 @@
+/*
+ * Copyright (c) 2016 Oracle.
+ * All Rights Reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it would be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write the Free Software Foundation,
+ * Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+#ifndef __XFS_ONDISK_H
+#define __XFS_ONDISK_H
+
+#define XFS_CHECK_STRUCT_SIZE(structname, size) \
+ BUILD_BUG_ON_MSG(sizeof(structname) != (size), "XFS: sizeof(" \
+ #structname ") is wrong, expected " #size)
+
+static inline void __init
+xfs_check_ondisk_structs(void)
+{
+ /* ag/file structures */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_acl, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_acl_entry, 12);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agf, 224);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agfl, 36);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_agi, 336);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_key, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bmbt_rec, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_bmdr_block, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_btree_block, 72);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dinode, 176);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_disk_dquot, 104);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dqblk, 136);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dsb, 264);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dsymlink_hdr, 56);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_key, 4);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inobt_rec, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_timestamp, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_alloc_key_t, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_alloc_ptr_t, 4);
+ XFS_CHECK_STRUCT_SIZE(xfs_alloc_rec_t, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_inobt_ptr_t, 4);
+
+ /* dir/attr trees */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leaf_hdr, 80);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_leafblock, 88);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_attr3_rmt_hdr, 56);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da3_blkinfo, 56);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da3_intnode, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_da3_node_hdr, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_blk_hdr, 48);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_data_hdr, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_free_hdr, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf, 64);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dir3_leaf_hdr, 64);
+ XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_entry_t, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_hdr_t, 32);
+ XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_map_t, 4);
+ XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_local_t, 4);
+
+ /*
+ * m68k has problems with xfs_attr_leaf_name_remote_t, but we pad it to
+ * 4 bytes anyway so it's not obviously a problem. Hence for the moment
+ * we don't check this structure. This can be re-instated when the attr
+ * definitions are updated to use c99 VLA definitions.
+ *
+ XFS_CHECK_STRUCT_SIZE(xfs_attr_leaf_name_remote_t, 12);
+ */
+
+ XFS_CHECK_STRUCT_SIZE(xfs_attr_leafblock_t, 40);
+ XFS_CHECK_STRUCT_SIZE(xfs_attr_shortform_t, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_da_blkinfo_t, 12);
+ XFS_CHECK_STRUCT_SIZE(xfs_da_intnode_t, 16);
+ XFS_CHECK_STRUCT_SIZE(xfs_da_node_entry_t, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_da_node_hdr_t, 16);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_free_t, 4);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_hdr_t, 16);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_data_unused_t, 6);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_hdr_t, 16);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_free_t, 16);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino4_t, 4);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_ino8_t, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_inou_t, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_entry_t, 8);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_hdr_t, 16);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_t, 16);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_leaf_tail_t, 4);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_entry_t, 3);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_hdr_t, 10);
+ XFS_CHECK_STRUCT_SIZE(xfs_dir2_sf_off_t, 2);
+
+ /* log structures */
+ XFS_CHECK_STRUCT_SIZE(struct xfs_dq_logformat, 24);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_32, 28);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efd_log_format_64, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_32, 28);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_efi_log_format_64, 32);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_extent_32, 12);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_extent_64, 16);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_log_dinode, 176);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_icreate_log, 28);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_ictimestamp, 8);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_32, 52);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_inode_log_format_64, 56);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_qoff_logformat, 20);
+ XFS_CHECK_STRUCT_SIZE(struct xfs_trans_header, 16);
+}
+
+#endif /* __XFS_ONDISK_H */
diff --git a/fs/xfs/xfs_pnfs.c b/fs/xfs/xfs_pnfs.c
index ade236e90..51ddaf2c2 100644
--- a/fs/xfs/xfs_pnfs.c
+++ b/fs/xfs/xfs_pnfs.c
@@ -293,8 +293,8 @@ xfs_fs_commit_blocks(
* Make sure reads through the pagecache see the new data.
*/
error = invalidate_inode_pages2_range(inode->i_mapping,
- start >> PAGE_CACHE_SHIFT,
- (end - 1) >> PAGE_CACHE_SHIFT);
+ start >> PAGE_SHIFT,
+ (end - 1) >> PAGE_SHIFT);
WARN_ON_ONCE(error);
error = xfs_iomap_write_unwritten(ip, start, length);
diff --git a/fs/xfs/xfs_pnfs.h b/fs/xfs/xfs_pnfs.h
index 8147ac108..93f748539 100644
--- a/fs/xfs/xfs_pnfs.h
+++ b/fs/xfs/xfs_pnfs.h
@@ -1,7 +1,7 @@
#ifndef _XFS_PNFS_H
#define _XFS_PNFS_H 1
-#ifdef CONFIG_NFSD_PNFS
+#if defined(CONFIG_NFSD_BLOCKLAYOUT) || defined(CONFIG_NFSD_SCSILAYOUT)
int xfs_fs_get_uuid(struct super_block *sb, u8 *buf, u32 *len, u64 *offset);
int xfs_fs_map_blocks(struct inode *inode, loff_t offset, u64 length,
struct iomap *iomap, bool write, u32 *device_generation);
diff --git a/fs/xfs/xfs_qm.c b/fs/xfs/xfs_qm.c
index 532ab79d3..be125e175 100644
--- a/fs/xfs/xfs_qm.c
+++ b/fs/xfs/xfs_qm.c
@@ -560,6 +560,37 @@ xfs_qm_shrink_count(
return list_lru_shrink_count(&qi->qi_lru, sc);
}
+STATIC void
+xfs_qm_set_defquota(
+ xfs_mount_t *mp,
+ uint type,
+ xfs_quotainfo_t *qinf)
+{
+ xfs_dquot_t *dqp;
+ struct xfs_def_quota *defq;
+ int error;
+
+ error = xfs_qm_dqread(mp, 0, type, XFS_QMOPT_DOWARN, &dqp);
+
+ if (!error) {
+ xfs_disk_dquot_t *ddqp = &dqp->q_core;
+
+ defq = xfs_get_defquota(dqp, qinf);
+
+ /*
+ * Timers and warnings have been already set, let's just set the
+ * default limits for this quota type
+ */
+ defq->bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
+ defq->bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
+ defq->ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
+ defq->isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
+ defq->rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
+ defq->rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
+ xfs_qm_dqdestroy(dqp);
+ }
+}
+
/*
* This initializes all the quota information that's kept in the
* mount structure
@@ -606,19 +637,19 @@ xfs_qm_init_quotainfo(
* We try to get the limits from the superuser's limits fields.
* This is quite hacky, but it is standard quota practice.
*
- * We look at the USR dquot with id == 0 first, but if user quotas
- * are not enabled we goto the GRP dquot with id == 0.
- * We don't really care to keep separate default limits for user
- * and group quotas, at least not at this point.
- *
* Since we may not have done a quotacheck by this point, just read
* the dquot without attaching it to any hashtables or lists.
+ *
+ * Timers and warnings are globally set by the first timer found in
+ * user/group/proj quota types, otherwise a default value is used.
+ * This should be split into different fields per quota type.
*/
error = xfs_qm_dqread(mp, 0,
XFS_IS_UQUOTA_RUNNING(mp) ? XFS_DQ_USER :
(XFS_IS_GQUOTA_RUNNING(mp) ? XFS_DQ_GROUP :
XFS_DQ_PROJ),
XFS_QMOPT_DOWARN, &dqp);
+
if (!error) {
xfs_disk_dquot_t *ddqp = &dqp->q_core;
@@ -639,13 +670,6 @@ xfs_qm_init_quotainfo(
be16_to_cpu(ddqp->d_iwarns) : XFS_QM_IWARNLIMIT;
qinf->qi_rtbwarnlimit = ddqp->d_rtbwarns ?
be16_to_cpu(ddqp->d_rtbwarns) : XFS_QM_RTBWARNLIMIT;
- qinf->qi_bhardlimit = be64_to_cpu(ddqp->d_blk_hardlimit);
- qinf->qi_bsoftlimit = be64_to_cpu(ddqp->d_blk_softlimit);
- qinf->qi_ihardlimit = be64_to_cpu(ddqp->d_ino_hardlimit);
- qinf->qi_isoftlimit = be64_to_cpu(ddqp->d_ino_softlimit);
- qinf->qi_rtbhardlimit = be64_to_cpu(ddqp->d_rtb_hardlimit);
- qinf->qi_rtbsoftlimit = be64_to_cpu(ddqp->d_rtb_softlimit);
-
xfs_qm_dqdestroy(dqp);
} else {
qinf->qi_btimelimit = XFS_QM_BTIMELIMIT;
@@ -656,6 +680,13 @@ xfs_qm_init_quotainfo(
qinf->qi_rtbwarnlimit = XFS_QM_RTBWARNLIMIT;
}
+ if (XFS_IS_UQUOTA_RUNNING(mp))
+ xfs_qm_set_defquota(mp, XFS_DQ_USER, qinf);
+ if (XFS_IS_GQUOTA_RUNNING(mp))
+ xfs_qm_set_defquota(mp, XFS_DQ_GROUP, qinf);
+ if (XFS_IS_PQUOTA_RUNNING(mp))
+ xfs_qm_set_defquota(mp, XFS_DQ_PROJ, qinf);
+
qinf->qi_shrinker.count_objects = xfs_qm_shrink_count;
qinf->qi_shrinker.scan_objects = xfs_qm_shrink_scan;
qinf->qi_shrinker.seeks = DEFAULT_SEEKS;
diff --git a/fs/xfs/xfs_qm.h b/fs/xfs/xfs_qm.h
index 996a04064..2975a822e 100644
--- a/fs/xfs/xfs_qm.h
+++ b/fs/xfs/xfs_qm.h
@@ -53,6 +53,15 @@ extern struct kmem_zone *xfs_qm_dqtrxzone;
*/
#define XFS_DQUOT_CLUSTER_SIZE_FSB (xfs_filblks_t)1
+struct xfs_def_quota {
+ xfs_qcnt_t bhardlimit; /* default data blk hard limit */
+ xfs_qcnt_t bsoftlimit; /* default data blk soft limit */
+ xfs_qcnt_t ihardlimit; /* default inode count hard limit */
+ xfs_qcnt_t isoftlimit; /* default inode count soft limit */
+ xfs_qcnt_t rtbhardlimit; /* default realtime blk hard limit */
+ xfs_qcnt_t rtbsoftlimit; /* default realtime blk soft limit */
+};
+
/*
* Various quota information for individual filesystems.
* The mount structure keeps a pointer to this.
@@ -76,12 +85,9 @@ typedef struct xfs_quotainfo {
struct mutex qi_quotaofflock;/* to serialize quotaoff */
xfs_filblks_t qi_dqchunklen; /* # BBs in a chunk of dqs */
uint qi_dqperchunk; /* # ondisk dqs in above chunk */
- xfs_qcnt_t qi_bhardlimit; /* default data blk hard limit */
- xfs_qcnt_t qi_bsoftlimit; /* default data blk soft limit */
- xfs_qcnt_t qi_ihardlimit; /* default inode count hard limit */
- xfs_qcnt_t qi_isoftlimit; /* default inode count soft limit */
- xfs_qcnt_t qi_rtbhardlimit;/* default realtime blk hard limit */
- xfs_qcnt_t qi_rtbsoftlimit;/* default realtime blk soft limit */
+ struct xfs_def_quota qi_usr_default;
+ struct xfs_def_quota qi_grp_default;
+ struct xfs_def_quota qi_prj_default;
struct shrinker qi_shrinker;
} xfs_quotainfo_t;
@@ -104,15 +110,15 @@ xfs_dquot_tree(
}
static inline struct xfs_inode *
-xfs_dq_to_quota_inode(struct xfs_dquot *dqp)
+xfs_quota_inode(xfs_mount_t *mp, uint dq_flags)
{
- switch (dqp->dq_flags & XFS_DQ_ALLTYPES) {
+ switch (dq_flags & XFS_DQ_ALLTYPES) {
case XFS_DQ_USER:
- return dqp->q_mount->m_quotainfo->qi_uquotaip;
+ return mp->m_quotainfo->qi_uquotaip;
case XFS_DQ_GROUP:
- return dqp->q_mount->m_quotainfo->qi_gquotaip;
+ return mp->m_quotainfo->qi_gquotaip;
case XFS_DQ_PROJ:
- return dqp->q_mount->m_quotainfo->qi_pquotaip;
+ return mp->m_quotainfo->qi_pquotaip;
default:
ASSERT(0);
}
@@ -164,11 +170,27 @@ extern void xfs_qm_dqrele_all_inodes(struct xfs_mount *, uint);
/* quota ops */
extern int xfs_qm_scall_trunc_qfiles(struct xfs_mount *, uint);
-extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t,
- uint, struct qc_dqblk *);
+extern int xfs_qm_scall_getquota(struct xfs_mount *, xfs_dqid_t *,
+ uint, struct qc_dqblk *, uint);
extern int xfs_qm_scall_setqlim(struct xfs_mount *, xfs_dqid_t, uint,
struct qc_dqblk *);
extern int xfs_qm_scall_quotaon(struct xfs_mount *, uint);
extern int xfs_qm_scall_quotaoff(struct xfs_mount *, uint);
+static inline struct xfs_def_quota *
+xfs_get_defquota(struct xfs_dquot *dqp, struct xfs_quotainfo *qi)
+{
+ struct xfs_def_quota *defq;
+
+ if (XFS_QM_ISUDQ(dqp))
+ defq = &qi->qi_usr_default;
+ else if (XFS_QM_ISGDQ(dqp))
+ defq = &qi->qi_grp_default;
+ else {
+ ASSERT(XFS_QM_ISPDQ(dqp));
+ defq = &qi->qi_prj_default;
+ }
+ return defq;
+}
+
#endif /* __XFS_QM_H__ */
diff --git a/fs/xfs/xfs_qm_syscalls.c b/fs/xfs/xfs_qm_syscalls.c
index 3640c6e89..f4d0e0a8f 100644
--- a/fs/xfs/xfs_qm_syscalls.c
+++ b/fs/xfs/xfs_qm_syscalls.c
@@ -404,6 +404,7 @@ xfs_qm_scall_setqlim(
struct xfs_disk_dquot *ddq;
struct xfs_dquot *dqp;
struct xfs_trans *tp;
+ struct xfs_def_quota *defq;
int error;
xfs_qcnt_t hard, soft;
@@ -431,6 +432,8 @@ xfs_qm_scall_setqlim(
ASSERT(error != -ENOENT);
goto out_unlock;
}
+
+ defq = xfs_get_defquota(dqp, q);
xfs_dqunlock(dqp);
tp = xfs_trans_alloc(mp, XFS_TRANS_QM_SETQLIM);
@@ -458,8 +461,8 @@ xfs_qm_scall_setqlim(
ddq->d_blk_softlimit = cpu_to_be64(soft);
xfs_dquot_set_prealloc_limits(dqp);
if (id == 0) {
- q->qi_bhardlimit = hard;
- q->qi_bsoftlimit = soft;
+ defq->bhardlimit = hard;
+ defq->bsoftlimit = soft;
}
} else {
xfs_debug(mp, "blkhard %Ld < blksoft %Ld", hard, soft);
@@ -474,8 +477,8 @@ xfs_qm_scall_setqlim(
ddq->d_rtb_hardlimit = cpu_to_be64(hard);
ddq->d_rtb_softlimit = cpu_to_be64(soft);
if (id == 0) {
- q->qi_rtbhardlimit = hard;
- q->qi_rtbsoftlimit = soft;
+ defq->rtbhardlimit = hard;
+ defq->rtbsoftlimit = soft;
}
} else {
xfs_debug(mp, "rtbhard %Ld < rtbsoft %Ld", hard, soft);
@@ -491,8 +494,8 @@ xfs_qm_scall_setqlim(
ddq->d_ino_hardlimit = cpu_to_be64(hard);
ddq->d_ino_softlimit = cpu_to_be64(soft);
if (id == 0) {
- q->qi_ihardlimit = hard;
- q->qi_isoftlimit = soft;
+ defq->ihardlimit = hard;
+ defq->isoftlimit = soft;
}
} else {
xfs_debug(mp, "ihard %Ld < isoft %Ld", hard, soft);
@@ -635,9 +638,10 @@ out:
int
xfs_qm_scall_getquota(
struct xfs_mount *mp,
- xfs_dqid_t id,
+ xfs_dqid_t *id,
uint type,
- struct qc_dqblk *dst)
+ struct qc_dqblk *dst,
+ uint dqget_flags)
{
struct xfs_dquot *dqp;
int error;
@@ -647,7 +651,7 @@ xfs_qm_scall_getquota(
* we aren't passing the XFS_QMOPT_DOALLOC flag. If it doesn't
* exist, we'll get ENOENT back.
*/
- error = xfs_qm_dqget(mp, NULL, id, type, 0, &dqp);
+ error = xfs_qm_dqget(mp, NULL, *id, type, dqget_flags, &dqp);
if (error)
return error;
@@ -660,6 +664,9 @@ xfs_qm_scall_getquota(
goto out_put;
}
+ /* Fill in the ID we actually read from disk */
+ *id = be32_to_cpu(dqp->q_core.d_id);
+
memset(dst, 0, sizeof(*dst));
dst->d_spc_hardlimit =
XFS_FSB_TO_B(mp, be64_to_cpu(dqp->q_core.d_blk_hardlimit));
@@ -701,7 +708,7 @@ xfs_qm_scall_getquota(
if (((XFS_IS_UQUOTA_ENFORCED(mp) && type == XFS_DQ_USER) ||
(XFS_IS_GQUOTA_ENFORCED(mp) && type == XFS_DQ_GROUP) ||
(XFS_IS_PQUOTA_ENFORCED(mp) && type == XFS_DQ_PROJ)) &&
- id != 0) {
+ *id != 0) {
if ((dst->d_space > dst->d_spc_softlimit) &&
(dst->d_spc_softlimit > 0)) {
ASSERT(dst->d_spc_timer != 0);
diff --git a/fs/xfs/xfs_quotaops.c b/fs/xfs/xfs_quotaops.c
index 7795e0d01..f82d79a8c 100644
--- a/fs/xfs/xfs_quotaops.c
+++ b/fs/xfs/xfs_quotaops.c
@@ -231,14 +231,45 @@ xfs_fs_get_dqblk(
struct qc_dqblk *qdq)
{
struct xfs_mount *mp = XFS_M(sb);
+ xfs_dqid_t id;
if (!XFS_IS_QUOTA_RUNNING(mp))
return -ENOSYS;
if (!XFS_IS_QUOTA_ON(mp))
return -ESRCH;
- return xfs_qm_scall_getquota(mp, from_kqid(&init_user_ns, qid),
- xfs_quota_type(qid.type), qdq);
+ id = from_kqid(&init_user_ns, qid);
+ return xfs_qm_scall_getquota(mp, &id,
+ xfs_quota_type(qid.type), qdq, 0);
+}
+
+/* Return quota info for active quota >= this qid */
+STATIC int
+xfs_fs_get_nextdqblk(
+ struct super_block *sb,
+ struct kqid *qid,
+ struct qc_dqblk *qdq)
+{
+ int ret;
+ struct xfs_mount *mp = XFS_M(sb);
+ xfs_dqid_t id;
+
+ if (!XFS_IS_QUOTA_RUNNING(mp))
+ return -ENOSYS;
+ if (!XFS_IS_QUOTA_ON(mp))
+ return -ESRCH;
+
+ id = from_kqid(&init_user_ns, *qid);
+ ret = xfs_qm_scall_getquota(mp, &id,
+ xfs_quota_type(qid->type), qdq,
+ XFS_QMOPT_DQNEXT);
+ if (ret)
+ return ret;
+
+ /* ID may be different, so convert back what we got */
+ *qid = make_kqid(current_user_ns(), qid->type, id);
+ return 0;
+
}
STATIC int
@@ -267,5 +298,6 @@ const struct quotactl_ops xfs_quotactl_operations = {
.quota_disable = xfs_quota_disable,
.rm_xquota = xfs_fs_rm_xquota,
.get_dqblk = xfs_fs_get_dqblk,
+ .get_nextdqblk = xfs_fs_get_nextdqblk,
.set_dqblk = xfs_fs_set_dqblk,
};
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index be02a68b2..abf44435d 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -1272,7 +1272,7 @@ xfs_rtpick_extent(
ASSERT(xfs_isilocked(mp->m_rbmip, XFS_ILOCK_EXCL));
- seqp = (__uint64_t *)&mp->m_rbmip->i_d.di_atime;
+ seqp = (__uint64_t *)&VFS_I(mp->m_rbmip)->i_atime;
if (!(mp->m_rbmip->i_d.di_flags & XFS_DIFLAG_NEWRTBM)) {
mp->m_rbmip->i_d.di_flags |= XFS_DIFLAG_NEWRTBM;
*seqp = 0;
diff --git a/fs/xfs/xfs_super.c b/fs/xfs/xfs_super.c
index 59c9b7bd9..cedf601ca 100644
--- a/fs/xfs/xfs_super.c
+++ b/fs/xfs/xfs_super.c
@@ -45,6 +45,7 @@
#include "xfs_filestream.h"
#include "xfs_quota.h"
#include "xfs_sysfs.h"
+#include "xfs_ondisk.h"
#include <linux/namei.h>
#include <linux/init.h>
@@ -65,83 +66,85 @@ static struct kset *xfs_kset; /* top-level xfs sysfs dir */
static struct xfs_kobj xfs_dbg_kobj; /* global debug sysfs attrs */
#endif
-#define MNTOPT_LOGBUFS "logbufs" /* number of XFS log buffers */
-#define MNTOPT_LOGBSIZE "logbsize" /* size of XFS log buffers */
-#define MNTOPT_LOGDEV "logdev" /* log device */
-#define MNTOPT_RTDEV "rtdev" /* realtime I/O device */
-#define MNTOPT_BIOSIZE "biosize" /* log2 of preferred buffered io size */
-#define MNTOPT_WSYNC "wsync" /* safe-mode nfs compatible mount */
-#define MNTOPT_NOALIGN "noalign" /* turn off stripe alignment */
-#define MNTOPT_SWALLOC "swalloc" /* turn on stripe width allocation */
-#define MNTOPT_SUNIT "sunit" /* data volume stripe unit */
-#define MNTOPT_SWIDTH "swidth" /* data volume stripe width */
-#define MNTOPT_NOUUID "nouuid" /* ignore filesystem UUID */
-#define MNTOPT_MTPT "mtpt" /* filesystem mount point */
-#define MNTOPT_GRPID "grpid" /* group-ID from parent directory */
-#define MNTOPT_NOGRPID "nogrpid" /* group-ID from current process */
-#define MNTOPT_BSDGROUPS "bsdgroups" /* group-ID from parent directory */
-#define MNTOPT_SYSVGROUPS "sysvgroups" /* group-ID from current process */
-#define MNTOPT_ALLOCSIZE "allocsize" /* preferred allocation size */
-#define MNTOPT_NORECOVERY "norecovery" /* don't run XFS recovery */
-#define MNTOPT_BARRIER "barrier" /* use writer barriers for log write and
- * unwritten extent conversion */
-#define MNTOPT_NOBARRIER "nobarrier" /* .. disable */
-#define MNTOPT_64BITINODE "inode64" /* inodes can be allocated anywhere */
-#define MNTOPT_32BITINODE "inode32" /* inode allocation limited to
- * XFS_MAXINUMBER_32 */
-#define MNTOPT_IKEEP "ikeep" /* do not free empty inode clusters */
-#define MNTOPT_NOIKEEP "noikeep" /* free empty inode clusters */
-#define MNTOPT_LARGEIO "largeio" /* report large I/O sizes in stat() */
-#define MNTOPT_NOLARGEIO "nolargeio" /* do not report large I/O sizes
- * in stat(). */
-#define MNTOPT_ATTR2 "attr2" /* do use attr2 attribute format */
-#define MNTOPT_NOATTR2 "noattr2" /* do not use attr2 attribute format */
-#define MNTOPT_FILESTREAM "filestreams" /* use filestreams allocator */
-#define MNTOPT_QUOTA "quota" /* disk quotas (user) */
-#define MNTOPT_NOQUOTA "noquota" /* no quotas */
-#define MNTOPT_USRQUOTA "usrquota" /* user quota enabled */
-#define MNTOPT_GRPQUOTA "grpquota" /* group quota enabled */
-#define MNTOPT_PRJQUOTA "prjquota" /* project quota enabled */
-#define MNTOPT_UQUOTA "uquota" /* user quota (IRIX variant) */
-#define MNTOPT_GQUOTA "gquota" /* group quota (IRIX variant) */
-#define MNTOPT_PQUOTA "pquota" /* project quota (IRIX variant) */
-#define MNTOPT_UQUOTANOENF "uqnoenforce"/* user quota limit enforcement */
-#define MNTOPT_GQUOTANOENF "gqnoenforce"/* group quota limit enforcement */
-#define MNTOPT_PQUOTANOENF "pqnoenforce"/* project quota limit enforcement */
-#define MNTOPT_QUOTANOENF "qnoenforce" /* same as uqnoenforce */
-#define MNTOPT_DISCARD "discard" /* Discard unused blocks */
-#define MNTOPT_NODISCARD "nodiscard" /* Do not discard unused blocks */
-
-#define MNTOPT_DAX "dax" /* Enable direct access to bdev pages */
-
/*
* Table driven mount option parser.
- *
- * Currently only used for remount, but it will be used for mount
- * in the future, too.
*/
enum {
- Opt_barrier,
- Opt_nobarrier,
- Opt_inode64,
- Opt_inode32,
- Opt_err
+ Opt_logbufs, Opt_logbsize, Opt_logdev, Opt_rtdev, Opt_biosize,
+ Opt_wsync, Opt_noalign, Opt_swalloc, Opt_sunit, Opt_swidth, Opt_nouuid,
+ Opt_mtpt, Opt_grpid, Opt_nogrpid, Opt_bsdgroups, Opt_sysvgroups,
+ Opt_allocsize, Opt_norecovery, Opt_barrier, Opt_nobarrier,
+ Opt_inode64, Opt_inode32, Opt_ikeep, Opt_noikeep,
+ Opt_largeio, Opt_nolargeio, Opt_attr2, Opt_noattr2, Opt_filestreams,
+ Opt_quota, Opt_noquota, Opt_usrquota, Opt_grpquota, Opt_prjquota,
+ Opt_uquota, Opt_gquota, Opt_pquota,
+ Opt_uqnoenforce, Opt_gqnoenforce, Opt_pqnoenforce, Opt_qnoenforce,
+ Opt_discard, Opt_nodiscard, Opt_dax, Opt_err,
};
static const match_table_t tokens = {
- {Opt_barrier, "barrier"},
- {Opt_nobarrier, "nobarrier"},
- {Opt_inode64, "inode64"},
- {Opt_inode32, "inode32"},
- {Opt_err, NULL}
+ {Opt_logbufs, "logbufs=%u"}, /* number of XFS log buffers */
+ {Opt_logbsize, "logbsize=%s"}, /* size of XFS log buffers */
+ {Opt_logdev, "logdev=%s"}, /* log device */
+ {Opt_rtdev, "rtdev=%s"}, /* realtime I/O device */
+ {Opt_biosize, "biosize=%u"}, /* log2 of preferred buffered io size */
+ {Opt_wsync, "wsync"}, /* safe-mode nfs compatible mount */
+ {Opt_noalign, "noalign"}, /* turn off stripe alignment */
+ {Opt_swalloc, "swalloc"}, /* turn on stripe width allocation */
+ {Opt_sunit, "sunit=%u"}, /* data volume stripe unit */
+ {Opt_swidth, "swidth=%u"}, /* data volume stripe width */
+ {Opt_nouuid, "nouuid"}, /* ignore filesystem UUID */
+ {Opt_mtpt, "mtpt"}, /* filesystem mount point */
+ {Opt_grpid, "grpid"}, /* group-ID from parent directory */
+ {Opt_nogrpid, "nogrpid"}, /* group-ID from current process */
+ {Opt_bsdgroups, "bsdgroups"}, /* group-ID from parent directory */
+ {Opt_sysvgroups,"sysvgroups"}, /* group-ID from current process */
+ {Opt_allocsize, "allocsize=%s"},/* preferred allocation size */
+ {Opt_norecovery,"norecovery"}, /* don't run XFS recovery */
+ {Opt_barrier, "barrier"}, /* use writer barriers for log write and
+ * unwritten extent conversion */
+ {Opt_nobarrier, "nobarrier"}, /* .. disable */
+ {Opt_inode64, "inode64"}, /* inodes can be allocated anywhere */
+ {Opt_inode32, "inode32"}, /* inode allocation limited to
+ * XFS_MAXINUMBER_32 */
+ {Opt_ikeep, "ikeep"}, /* do not free empty inode clusters */
+ {Opt_noikeep, "noikeep"}, /* free empty inode clusters */
+ {Opt_largeio, "largeio"}, /* report large I/O sizes in stat() */
+ {Opt_nolargeio, "nolargeio"}, /* do not report large I/O sizes
+ * in stat(). */
+ {Opt_attr2, "attr2"}, /* do use attr2 attribute format */
+ {Opt_noattr2, "noattr2"}, /* do not use attr2 attribute format */
+ {Opt_filestreams,"filestreams"},/* use filestreams allocator */
+ {Opt_quota, "quota"}, /* disk quotas (user) */
+ {Opt_noquota, "noquota"}, /* no quotas */
+ {Opt_usrquota, "usrquota"}, /* user quota enabled */
+ {Opt_grpquota, "grpquota"}, /* group quota enabled */
+ {Opt_prjquota, "prjquota"}, /* project quota enabled */
+ {Opt_uquota, "uquota"}, /* user quota (IRIX variant) */
+ {Opt_gquota, "gquota"}, /* group quota (IRIX variant) */
+ {Opt_pquota, "pquota"}, /* project quota (IRIX variant) */
+ {Opt_uqnoenforce,"uqnoenforce"},/* user quota limit enforcement */
+ {Opt_gqnoenforce,"gqnoenforce"},/* group quota limit enforcement */
+ {Opt_pqnoenforce,"pqnoenforce"},/* project quota limit enforcement */
+ {Opt_qnoenforce, "qnoenforce"}, /* same as uqnoenforce */
+ {Opt_discard, "discard"}, /* Discard unused blocks */
+ {Opt_nodiscard, "nodiscard"}, /* Do not discard unused blocks */
+
+ {Opt_dax, "dax"}, /* Enable direct access to bdev pages */
+ {Opt_err, NULL},
};
STATIC int
-suffix_kstrtoint(char *s, unsigned int base, int *res)
+suffix_kstrtoint(const substring_t *s, unsigned int base, int *res)
{
int last, shift_left_factor = 0, _res;
- char *value = s;
+ char *value;
+ int ret = 0;
+
+ value = match_strdup(s);
+ if (!value)
+ return -ENOMEM;
last = strlen(value) - 1;
if (value[last] == 'K' || value[last] == 'k') {
@@ -157,10 +160,11 @@ suffix_kstrtoint(char *s, unsigned int base, int *res)
value[last] = '\0';
}
- if (kstrtoint(s, base, &_res))
- return -EINVAL;
+ if (kstrtoint(value, base, &_res))
+ ret = -EINVAL;
+ kfree(value);
*res = _res << shift_left_factor;
- return 0;
+ return ret;
}
/*
@@ -169,14 +173,19 @@ suffix_kstrtoint(char *s, unsigned int base, int *res)
*
* Note that this function leaks the various device name allocations on
* failure. The caller takes care of them.
+ *
+ * *sb is const because this is also used to test options on the remount
+ * path, and we don't want this to have any side effects at remount time.
+ * Today this function does not change *sb, but just to future-proof...
*/
STATIC int
xfs_parseargs(
struct xfs_mount *mp,
char *options)
{
- struct super_block *sb = mp->m_super;
- char *this_char, *value;
+ const struct super_block *sb = mp->m_super;
+ char *p;
+ substring_t args[MAX_OPT_ARGS];
int dsunit = 0;
int dswidth = 0;
int iosize = 0;
@@ -217,152 +226,152 @@ xfs_parseargs(
if (!options)
goto done;
- while ((this_char = strsep(&options, ",")) != NULL) {
- if (!*this_char)
+ while ((p = strsep(&options, ",")) != NULL) {
+ int token;
+
+ if (!*p)
continue;
- if ((value = strchr(this_char, '=')) != NULL)
- *value++ = 0;
- if (!strcmp(this_char, MNTOPT_LOGBUFS)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return -EINVAL;
- }
- if (kstrtoint(value, 10, &mp->m_logbufs))
- return -EINVAL;
- } else if (!strcmp(this_char, MNTOPT_LOGBSIZE)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return -EINVAL;
- }
- if (suffix_kstrtoint(value, 10, &mp->m_logbsize))
+ token = match_token(p, tokens, args);
+ switch (token) {
+ case Opt_logbufs:
+ if (match_int(args, &mp->m_logbufs))
return -EINVAL;
- } else if (!strcmp(this_char, MNTOPT_LOGDEV)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
+ break;
+ case Opt_logbsize:
+ if (suffix_kstrtoint(args, 10, &mp->m_logbsize))
return -EINVAL;
- }
- mp->m_logname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+ break;
+ case Opt_logdev:
+ mp->m_logname = match_strdup(args);
if (!mp->m_logname)
return -ENOMEM;
- } else if (!strcmp(this_char, MNTOPT_MTPT)) {
- xfs_warn(mp, "%s option not allowed on this system",
- this_char);
+ break;
+ case Opt_mtpt:
+ xfs_warn(mp, "%s option not allowed on this system", p);
return -EINVAL;
- } else if (!strcmp(this_char, MNTOPT_RTDEV)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return -EINVAL;
- }
- mp->m_rtname = kstrndup(value, MAXNAMELEN, GFP_KERNEL);
+ case Opt_rtdev:
+ mp->m_rtname = match_strdup(args);
if (!mp->m_rtname)
return -ENOMEM;
- } else if (!strcmp(this_char, MNTOPT_ALLOCSIZE) ||
- !strcmp(this_char, MNTOPT_BIOSIZE)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return -EINVAL;
- }
- if (suffix_kstrtoint(value, 10, &iosize))
+ break;
+ case Opt_allocsize:
+ case Opt_biosize:
+ if (suffix_kstrtoint(args, 10, &iosize))
return -EINVAL;
iosizelog = ffs(iosize) - 1;
- } else if (!strcmp(this_char, MNTOPT_GRPID) ||
- !strcmp(this_char, MNTOPT_BSDGROUPS)) {
+ break;
+ case Opt_grpid:
+ case Opt_bsdgroups:
mp->m_flags |= XFS_MOUNT_GRPID;
- } else if (!strcmp(this_char, MNTOPT_NOGRPID) ||
- !strcmp(this_char, MNTOPT_SYSVGROUPS)) {
+ break;
+ case Opt_nogrpid:
+ case Opt_sysvgroups:
mp->m_flags &= ~XFS_MOUNT_GRPID;
- } else if (!strcmp(this_char, MNTOPT_WSYNC)) {
+ break;
+ case Opt_wsync:
mp->m_flags |= XFS_MOUNT_WSYNC;
- } else if (!strcmp(this_char, MNTOPT_NORECOVERY)) {
+ break;
+ case Opt_norecovery:
mp->m_flags |= XFS_MOUNT_NORECOVERY;
- } else if (!strcmp(this_char, MNTOPT_NOALIGN)) {
+ break;
+ case Opt_noalign:
mp->m_flags |= XFS_MOUNT_NOALIGN;
- } else if (!strcmp(this_char, MNTOPT_SWALLOC)) {
+ break;
+ case Opt_swalloc:
mp->m_flags |= XFS_MOUNT_SWALLOC;
- } else if (!strcmp(this_char, MNTOPT_SUNIT)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
- return -EINVAL;
- }
- if (kstrtoint(value, 10, &dsunit))
- return -EINVAL;
- } else if (!strcmp(this_char, MNTOPT_SWIDTH)) {
- if (!value || !*value) {
- xfs_warn(mp, "%s option requires an argument",
- this_char);
+ break;
+ case Opt_sunit:
+ if (match_int(args, &dsunit))
return -EINVAL;
- }
- if (kstrtoint(value, 10, &dswidth))
+ break;
+ case Opt_swidth:
+ if (match_int(args, &dswidth))
return -EINVAL;
- } else if (!strcmp(this_char, MNTOPT_32BITINODE)) {
+ break;
+ case Opt_inode32:
mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
- } else if (!strcmp(this_char, MNTOPT_64BITINODE)) {
+ break;
+ case Opt_inode64:
mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
- } else if (!strcmp(this_char, MNTOPT_NOUUID)) {
+ break;
+ case Opt_nouuid:
mp->m_flags |= XFS_MOUNT_NOUUID;
- } else if (!strcmp(this_char, MNTOPT_BARRIER)) {
+ break;
+ case Opt_barrier:
mp->m_flags |= XFS_MOUNT_BARRIER;
- } else if (!strcmp(this_char, MNTOPT_NOBARRIER)) {
+ break;
+ case Opt_nobarrier:
mp->m_flags &= ~XFS_MOUNT_BARRIER;
- } else if (!strcmp(this_char, MNTOPT_IKEEP)) {
+ break;
+ case Opt_ikeep:
mp->m_flags |= XFS_MOUNT_IKEEP;
- } else if (!strcmp(this_char, MNTOPT_NOIKEEP)) {
+ break;
+ case Opt_noikeep:
mp->m_flags &= ~XFS_MOUNT_IKEEP;
- } else if (!strcmp(this_char, MNTOPT_LARGEIO)) {
+ break;
+ case Opt_largeio:
mp->m_flags &= ~XFS_MOUNT_COMPAT_IOSIZE;
- } else if (!strcmp(this_char, MNTOPT_NOLARGEIO)) {
+ break;
+ case Opt_nolargeio:
mp->m_flags |= XFS_MOUNT_COMPAT_IOSIZE;
- } else if (!strcmp(this_char, MNTOPT_ATTR2)) {
+ break;
+ case Opt_attr2:
mp->m_flags |= XFS_MOUNT_ATTR2;
- } else if (!strcmp(this_char, MNTOPT_NOATTR2)) {
+ break;
+ case Opt_noattr2:
mp->m_flags &= ~XFS_MOUNT_ATTR2;
mp->m_flags |= XFS_MOUNT_NOATTR2;
- } else if (!strcmp(this_char, MNTOPT_FILESTREAM)) {
+ break;
+ case Opt_filestreams:
mp->m_flags |= XFS_MOUNT_FILESTREAMS;
- } else if (!strcmp(this_char, MNTOPT_NOQUOTA)) {
+ break;
+ case Opt_noquota:
mp->m_qflags &= ~XFS_ALL_QUOTA_ACCT;
mp->m_qflags &= ~XFS_ALL_QUOTA_ENFD;
mp->m_qflags &= ~XFS_ALL_QUOTA_ACTIVE;
- } else if (!strcmp(this_char, MNTOPT_QUOTA) ||
- !strcmp(this_char, MNTOPT_UQUOTA) ||
- !strcmp(this_char, MNTOPT_USRQUOTA)) {
+ break;
+ case Opt_quota:
+ case Opt_uquota:
+ case Opt_usrquota:
mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE |
XFS_UQUOTA_ENFD);
- } else if (!strcmp(this_char, MNTOPT_QUOTANOENF) ||
- !strcmp(this_char, MNTOPT_UQUOTANOENF)) {
+ break;
+ case Opt_qnoenforce:
+ case Opt_uqnoenforce:
mp->m_qflags |= (XFS_UQUOTA_ACCT | XFS_UQUOTA_ACTIVE);
mp->m_qflags &= ~XFS_UQUOTA_ENFD;
- } else if (!strcmp(this_char, MNTOPT_PQUOTA) ||
- !strcmp(this_char, MNTOPT_PRJQUOTA)) {
+ break;
+ case Opt_pquota:
+ case Opt_prjquota:
mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE |
XFS_PQUOTA_ENFD);
- } else if (!strcmp(this_char, MNTOPT_PQUOTANOENF)) {
+ break;
+ case Opt_pqnoenforce:
mp->m_qflags |= (XFS_PQUOTA_ACCT | XFS_PQUOTA_ACTIVE);
mp->m_qflags &= ~XFS_PQUOTA_ENFD;
- } else if (!strcmp(this_char, MNTOPT_GQUOTA) ||
- !strcmp(this_char, MNTOPT_GRPQUOTA)) {
+ case Opt_gquota:
+ case Opt_grpquota:
mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE |
XFS_GQUOTA_ENFD);
- } else if (!strcmp(this_char, MNTOPT_GQUOTANOENF)) {
+ break;
+ case Opt_gqnoenforce:
mp->m_qflags |= (XFS_GQUOTA_ACCT | XFS_GQUOTA_ACTIVE);
mp->m_qflags &= ~XFS_GQUOTA_ENFD;
- } else if (!strcmp(this_char, MNTOPT_DISCARD)) {
+ break;
+ case Opt_discard:
mp->m_flags |= XFS_MOUNT_DISCARD;
- } else if (!strcmp(this_char, MNTOPT_NODISCARD)) {
+ break;
+ case Opt_nodiscard:
mp->m_flags &= ~XFS_MOUNT_DISCARD;
+ break;
#ifdef CONFIG_FS_DAX
- } else if (!strcmp(this_char, MNTOPT_DAX)) {
+ case Opt_dax:
mp->m_flags |= XFS_MOUNT_DAX;
+ break;
#endif
- } else {
- xfs_warn(mp, "unknown mount option [%s].", this_char);
+ default:
+ xfs_warn(mp, "unknown mount option [%s].", p);
return -EINVAL;
}
}
@@ -461,25 +470,25 @@ xfs_showargs(
{
static struct proc_xfs_info xfs_info_set[] = {
/* the few simple ones we can get from the mount struct */
- { XFS_MOUNT_IKEEP, "," MNTOPT_IKEEP },
- { XFS_MOUNT_WSYNC, "," MNTOPT_WSYNC },
- { XFS_MOUNT_NOALIGN, "," MNTOPT_NOALIGN },
- { XFS_MOUNT_SWALLOC, "," MNTOPT_SWALLOC },
- { XFS_MOUNT_NOUUID, "," MNTOPT_NOUUID },
- { XFS_MOUNT_NORECOVERY, "," MNTOPT_NORECOVERY },
- { XFS_MOUNT_ATTR2, "," MNTOPT_ATTR2 },
- { XFS_MOUNT_FILESTREAMS, "," MNTOPT_FILESTREAM },
- { XFS_MOUNT_GRPID, "," MNTOPT_GRPID },
- { XFS_MOUNT_DISCARD, "," MNTOPT_DISCARD },
- { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_32BITINODE },
- { XFS_MOUNT_DAX, "," MNTOPT_DAX },
+ { XFS_MOUNT_IKEEP, ",ikeep" },
+ { XFS_MOUNT_WSYNC, ",wsync" },
+ { XFS_MOUNT_NOALIGN, ",noalign" },
+ { XFS_MOUNT_SWALLOC, ",swalloc" },
+ { XFS_MOUNT_NOUUID, ",nouuid" },
+ { XFS_MOUNT_NORECOVERY, ",norecovery" },
+ { XFS_MOUNT_ATTR2, ",attr2" },
+ { XFS_MOUNT_FILESTREAMS, ",filestreams" },
+ { XFS_MOUNT_GRPID, ",grpid" },
+ { XFS_MOUNT_DISCARD, ",discard" },
+ { XFS_MOUNT_SMALL_INUMS, ",inode32" },
+ { XFS_MOUNT_DAX, ",dax" },
{ 0, NULL }
};
static struct proc_xfs_info xfs_info_unset[] = {
/* the few simple ones we can get from the mount struct */
- { XFS_MOUNT_COMPAT_IOSIZE, "," MNTOPT_LARGEIO },
- { XFS_MOUNT_BARRIER, "," MNTOPT_NOBARRIER },
- { XFS_MOUNT_SMALL_INUMS, "," MNTOPT_64BITINODE },
+ { XFS_MOUNT_COMPAT_IOSIZE, ",largeio" },
+ { XFS_MOUNT_BARRIER, ",nobarrier" },
+ { XFS_MOUNT_SMALL_INUMS, ",inode64" },
{ 0, NULL }
};
struct proc_xfs_info *xfs_infop;
@@ -494,46 +503,46 @@ xfs_showargs(
}
if (mp->m_flags & XFS_MOUNT_DFLT_IOSIZE)
- seq_printf(m, "," MNTOPT_ALLOCSIZE "=%dk",
+ seq_printf(m, ",allocsize=%dk",
(int)(1 << mp->m_writeio_log) >> 10);
if (mp->m_logbufs > 0)
- seq_printf(m, "," MNTOPT_LOGBUFS "=%d", mp->m_logbufs);
+ seq_printf(m, ",logbufs=%d", mp->m_logbufs);
if (mp->m_logbsize > 0)
- seq_printf(m, "," MNTOPT_LOGBSIZE "=%dk", mp->m_logbsize >> 10);
+ seq_printf(m, ",logbsize=%dk", mp->m_logbsize >> 10);
if (mp->m_logname)
- seq_show_option(m, MNTOPT_LOGDEV, mp->m_logname);
+ seq_show_option(m, "logdev", mp->m_logname);
if (mp->m_rtname)
- seq_show_option(m, MNTOPT_RTDEV, mp->m_rtname);
+ seq_show_option(m, "rtdev", mp->m_rtname);
if (mp->m_dalign > 0)
- seq_printf(m, "," MNTOPT_SUNIT "=%d",
+ seq_printf(m, ",sunit=%d",
(int)XFS_FSB_TO_BB(mp, mp->m_dalign));
if (mp->m_swidth > 0)
- seq_printf(m, "," MNTOPT_SWIDTH "=%d",
+ seq_printf(m, ",swidth=%d",
(int)XFS_FSB_TO_BB(mp, mp->m_swidth));
if (mp->m_qflags & (XFS_UQUOTA_ACCT|XFS_UQUOTA_ENFD))
- seq_puts(m, "," MNTOPT_USRQUOTA);
+ seq_puts(m, ",usrquota");
else if (mp->m_qflags & XFS_UQUOTA_ACCT)
- seq_puts(m, "," MNTOPT_UQUOTANOENF);
+ seq_puts(m, ",uqnoenforce");
if (mp->m_qflags & XFS_PQUOTA_ACCT) {
if (mp->m_qflags & XFS_PQUOTA_ENFD)
- seq_puts(m, "," MNTOPT_PRJQUOTA);
+ seq_puts(m, ",prjquota");
else
- seq_puts(m, "," MNTOPT_PQUOTANOENF);
+ seq_puts(m, ",pqnoenforce");
}
if (mp->m_qflags & XFS_GQUOTA_ACCT) {
if (mp->m_qflags & XFS_GQUOTA_ENFD)
- seq_puts(m, "," MNTOPT_GRPQUOTA);
+ seq_puts(m, ",grpquota");
else
- seq_puts(m, "," MNTOPT_GQUOTANOENF);
+ seq_puts(m, ",gqnoenforce");
}
if (!(mp->m_qflags & XFS_ALL_QUOTA_ACCT))
- seq_puts(m, "," MNTOPT_NOQUOTA);
+ seq_puts(m, ",noquota");
return 0;
}
@@ -547,10 +556,10 @@ xfs_max_file_offset(
/* Figure out maximum filesize, on Linux this can depend on
* the filesystem blocksize (on 32 bit platforms).
* __block_write_begin does this in an [unsigned] long...
- * page->index << (PAGE_CACHE_SHIFT - bbits)
+ * page->index << (PAGE_SHIFT - bbits)
* So, for page sized blocks (4K on 32 bit platforms),
* this wraps at around 8Tb (hence MAX_LFS_FILESIZE which is
- * (((u64)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1)
+ * (((u64)PAGE_SIZE << (BITS_PER_LONG-1))-1)
* but for smaller blocksizes it is less (bbits = log2 bsize).
* Note1: get_block_t takes a long (implicit cast from above)
* Note2: The Large Block Device (LBD and HAVE_SECTOR_T) patch
@@ -561,10 +570,10 @@ xfs_max_file_offset(
#if BITS_PER_LONG == 32
# if defined(CONFIG_LBDAF)
ASSERT(sizeof(sector_t) == 8);
- pagefactor = PAGE_CACHE_SIZE;
+ pagefactor = PAGE_SIZE;
bitshift = BITS_PER_LONG;
# else
- pagefactor = PAGE_CACHE_SIZE >> (PAGE_CACHE_SHIFT - blockshift);
+ pagefactor = PAGE_SIZE >> (PAGE_SHIFT - blockshift);
# endif
#endif
@@ -572,23 +581,35 @@ xfs_max_file_offset(
}
/*
- * xfs_set_inode32() and xfs_set_inode64() are passed an agcount
- * because in the growfs case, mp->m_sb.sb_agcount is not updated
- * yet to the potentially higher ag count.
+ * Set parameters for inode allocation heuristics, taking into account
+ * filesystem size and inode32/inode64 mount options; i.e. specifically
+ * whether or not XFS_MOUNT_SMALL_INUMS is set.
+ *
+ * Inode allocation patterns are altered only if inode32 is requested
+ * (XFS_MOUNT_SMALL_INUMS), and the filesystem is sufficiently large.
+ * If altered, XFS_MOUNT_32BITINODES is set as well.
+ *
+ * An agcount independent of that in the mount structure is provided
+ * because in the growfs case, mp->m_sb.sb_agcount is not yet updated
+ * to the potentially higher ag count.
+ *
+ * Returns the maximum AG index which may contain inodes.
*/
xfs_agnumber_t
-xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
+xfs_set_inode_alloc(
+ struct xfs_mount *mp,
+ xfs_agnumber_t agcount)
{
- xfs_agnumber_t index = 0;
+ xfs_agnumber_t index;
xfs_agnumber_t maxagi = 0;
xfs_sb_t *sbp = &mp->m_sb;
xfs_agnumber_t max_metadata;
xfs_agino_t agino;
xfs_ino_t ino;
- xfs_perag_t *pag;
- /* Calculate how much should be reserved for inodes to meet
- * the max inode percentage.
+ /*
+ * Calculate how much should be reserved for inodes to meet
+ * the max inode percentage. Used only for inode32.
*/
if (mp->m_maxicount) {
__uint64_t icount;
@@ -602,54 +623,48 @@ xfs_set_inode32(struct xfs_mount *mp, xfs_agnumber_t agcount)
max_metadata = agcount;
}
+ /* Get the last possible inode in the filesystem */
agino = XFS_OFFBNO_TO_AGINO(mp, sbp->sb_agblocks - 1, 0);
+ ino = XFS_AGINO_TO_INO(mp, agcount - 1, agino);
+
+ /*
+ * If user asked for no more than 32-bit inodes, and the fs is
+ * sufficiently large, set XFS_MOUNT_32BITINODES if we must alter
+ * the allocator to accommodate the request.
+ */
+ if ((mp->m_flags & XFS_MOUNT_SMALL_INUMS) && ino > XFS_MAXINUMBER_32)
+ mp->m_flags |= XFS_MOUNT_32BITINODES;
+ else
+ mp->m_flags &= ~XFS_MOUNT_32BITINODES;
for (index = 0; index < agcount; index++) {
- ino = XFS_AGINO_TO_INO(mp, index, agino);
+ struct xfs_perag *pag;
- if (ino > XFS_MAXINUMBER_32) {
- pag = xfs_perag_get(mp, index);
- pag->pagi_inodeok = 0;
- pag->pagf_metadata = 0;
- xfs_perag_put(pag);
- continue;
- }
+ ino = XFS_AGINO_TO_INO(mp, index, agino);
pag = xfs_perag_get(mp, index);
- pag->pagi_inodeok = 1;
- maxagi++;
- if (index < max_metadata)
- pag->pagf_metadata = 1;
- xfs_perag_put(pag);
- }
- mp->m_flags |= (XFS_MOUNT_32BITINODES |
- XFS_MOUNT_SMALL_INUMS);
-
- return maxagi;
-}
-xfs_agnumber_t
-xfs_set_inode64(struct xfs_mount *mp, xfs_agnumber_t agcount)
-{
- xfs_agnumber_t index = 0;
-
- for (index = 0; index < agcount; index++) {
- struct xfs_perag *pag;
+ if (mp->m_flags & XFS_MOUNT_32BITINODES) {
+ if (ino > XFS_MAXINUMBER_32) {
+ pag->pagi_inodeok = 0;
+ pag->pagf_metadata = 0;
+ } else {
+ pag->pagi_inodeok = 1;
+ maxagi++;
+ if (index < max_metadata)
+ pag->pagf_metadata = 1;
+ else
+ pag->pagf_metadata = 0;
+ }
+ } else {
+ pag->pagi_inodeok = 1;
+ pag->pagf_metadata = 0;
+ }
- pag = xfs_perag_get(mp, index);
- pag->pagi_inodeok = 1;
- pag->pagf_metadata = 0;
xfs_perag_put(pag);
}
- /* There is no need for lock protection on m_flags,
- * the rw_semaphore of the VFS superblock is locked
- * during mount/umount/remount operations, so this is
- * enough to avoid concurency on the m_flags field
- */
- mp->m_flags &= ~(XFS_MOUNT_32BITINODES |
- XFS_MOUNT_SMALL_INUMS);
- return index;
+ return (mp->m_flags & XFS_MOUNT_32BITINODES) ? maxagi : agcount;
}
STATIC int
@@ -913,7 +928,7 @@ xfs_fs_alloc_inode(
/*
* Now that the generic code is guaranteed not to be accessing
- * the linux inode, we can reclaim the inode.
+ * the linux inode, we can inactivate and reclaim the inode.
*/
STATIC void
xfs_fs_destroy_inode(
@@ -923,9 +938,14 @@ xfs_fs_destroy_inode(
trace_xfs_destroy_inode(ip);
- XFS_STATS_INC(ip->i_mount, vn_reclaim);
+ ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
+ XFS_STATS_INC(ip->i_mount, vn_rele);
+ XFS_STATS_INC(ip->i_mount, vn_remove);
+
+ xfs_inactive(ip);
ASSERT(XFS_FORCED_SHUTDOWN(ip->i_mount) || ip->i_delayed_blks == 0);
+ XFS_STATS_INC(ip->i_mount, vn_reclaim);
/*
* We should never get here with one of the reclaim flags already set.
@@ -972,24 +992,6 @@ xfs_fs_inode_init_once(
"xfsino", ip->i_ino);
}
-STATIC void
-xfs_fs_evict_inode(
- struct inode *inode)
-{
- xfs_inode_t *ip = XFS_I(inode);
-
- ASSERT(!rwsem_is_locked(&ip->i_iolock.mr_lock));
-
- trace_xfs_evict_inode(ip);
-
- truncate_inode_pages_final(&inode->i_data);
- clear_inode(inode);
- XFS_STATS_INC(ip->i_mount, vn_rele);
- XFS_STATS_INC(ip->i_mount, vn_remove);
-
- xfs_inactive(ip);
-}
-
/*
* We do an unlocked check for XFS_IDONTCACHE here because we are already
* serialised against cache hits here via the inode->i_lock and igrab() in
@@ -1166,6 +1168,27 @@ xfs_quiesce_attr(
}
STATIC int
+xfs_test_remount_options(
+ struct super_block *sb,
+ struct xfs_mount *mp,
+ char *options)
+{
+ int error = 0;
+ struct xfs_mount *tmp_mp;
+
+ tmp_mp = kmem_zalloc(sizeof(*tmp_mp), KM_MAYFAIL);
+ if (!tmp_mp)
+ return -ENOMEM;
+
+ tmp_mp->m_super = sb;
+ error = xfs_parseargs(tmp_mp, options);
+ xfs_free_fsname(tmp_mp);
+ kfree(tmp_mp);
+
+ return error;
+}
+
+STATIC int
xfs_fs_remount(
struct super_block *sb,
int *flags,
@@ -1177,6 +1200,11 @@ xfs_fs_remount(
char *p;
int error;
+ /* First, check for complete junk; i.e. invalid options */
+ error = xfs_test_remount_options(sb, mp, options);
+ if (error)
+ return error;
+
sync_filesystem(sb);
while ((p = strsep(&options, ",")) != NULL) {
int token;
@@ -1193,10 +1221,12 @@ xfs_fs_remount(
mp->m_flags &= ~XFS_MOUNT_BARRIER;
break;
case Opt_inode64:
- mp->m_maxagi = xfs_set_inode64(mp, sbp->sb_agcount);
+ mp->m_flags &= ~XFS_MOUNT_SMALL_INUMS;
+ mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
break;
case Opt_inode32:
- mp->m_maxagi = xfs_set_inode32(mp, sbp->sb_agcount);
+ mp->m_flags |= XFS_MOUNT_SMALL_INUMS;
+ mp->m_maxagi = xfs_set_inode_alloc(mp, sbp->sb_agcount);
break;
default:
/*
@@ -1233,6 +1263,16 @@ xfs_fs_remount(
return -EINVAL;
}
+ if (XFS_SB_VERSION_NUM(sbp) == XFS_SB_VERSION_5 &&
+ xfs_sb_has_ro_compat_feature(sbp,
+ XFS_SB_FEAT_RO_COMPAT_UNKNOWN)) {
+ xfs_warn(mp,
+"ro->rw transition prohibited on unknown (0x%x) ro-compat filesystem",
+ (sbp->sb_features_ro_compat &
+ XFS_SB_FEAT_RO_COMPAT_UNKNOWN));
+ return -EINVAL;
+ }
+
mp->m_flags &= ~XFS_MOUNT_RDONLY;
/*
@@ -1344,9 +1384,8 @@ xfs_finish_flags(
*/
if (xfs_sb_version_hascrc(&mp->m_sb) &&
(mp->m_flags & XFS_MOUNT_NOATTR2)) {
- xfs_warn(mp,
-"Cannot mount a V5 filesystem as %s. %s is always enabled for V5 filesystems.",
- MNTOPT_NOATTR2, MNTOPT_ATTR2);
+ xfs_warn(mp, "Cannot mount a V5 filesystem as noattr2. "
+ "attr2 is always enabled for V5 filesystems.");
return -EINVAL;
}
@@ -1621,7 +1660,6 @@ xfs_fs_free_cached_objects(
static const struct super_operations xfs_super_operations = {
.alloc_inode = xfs_fs_alloc_inode,
.destroy_inode = xfs_fs_destroy_inode,
- .evict_inode = xfs_fs_evict_inode,
.drop_inode = xfs_fs_drop_inode,
.put_super = xfs_fs_put_super,
.sync_fs = xfs_fs_sync_fs,
@@ -1817,6 +1855,8 @@ init_xfs_fs(void)
{
int error;
+ xfs_check_ondisk_structs();
+
printk(KERN_INFO XFS_VERSION_STRING " with "
XFS_BUILD_OPTIONS " enabled\n");
diff --git a/fs/xfs/xfs_super.h b/fs/xfs/xfs_super.h
index 499058fea..2dfb1ce45 100644
--- a/fs/xfs/xfs_super.h
+++ b/fs/xfs/xfs_super.h
@@ -65,8 +65,8 @@ extern __uint64_t xfs_max_file_offset(unsigned int);
extern void xfs_flush_inodes(struct xfs_mount *mp);
extern void xfs_blkdev_issue_flush(struct xfs_buftarg *);
-extern xfs_agnumber_t xfs_set_inode32(struct xfs_mount *, xfs_agnumber_t agcount);
-extern xfs_agnumber_t xfs_set_inode64(struct xfs_mount *, xfs_agnumber_t agcount);
+extern xfs_agnumber_t xfs_set_inode_alloc(struct xfs_mount *,
+ xfs_agnumber_t agcount);
extern const struct export_operations xfs_export_operations;
extern const struct xattr_handler *xfs_xattr_handlers[];
diff --git a/fs/xfs/xfs_sysfs.c b/fs/xfs/xfs_sysfs.c
index 641d625eb..6ced4f143 100644
--- a/fs/xfs/xfs_sysfs.c
+++ b/fs/xfs/xfs_sysfs.c
@@ -18,10 +18,13 @@
#include "xfs.h"
#include "xfs_sysfs.h"
+#include "xfs_format.h"
#include "xfs_log_format.h"
+#include "xfs_trans_resv.h"
#include "xfs_log.h"
#include "xfs_log_priv.h"
#include "xfs_stats.h"
+#include "xfs_mount.h"
struct xfs_sysfs_attr {
struct attribute attr;
@@ -45,16 +48,6 @@ to_attr(struct attribute *attr)
#define ATTR_LIST(name) &xfs_sysfs_attr_##name.attr
-/*
- * xfs_mount kobject. This currently has no attributes and thus no need for show
- * and store helpers. The mp kobject serves as the per-mount parent object that
- * is identified by the fsname under sysfs.
- */
-
-struct kobj_type xfs_mp_ktype = {
- .release = xfs_sysfs_release,
-};
-
STATIC ssize_t
xfs_sysfs_object_show(
struct kobject *kobject,
@@ -83,6 +76,71 @@ static const struct sysfs_ops xfs_sysfs_ops = {
.store = xfs_sysfs_object_store,
};
+/*
+ * xfs_mount kobject. The mp kobject also serves as the per-mount parent object
+ * that is identified by the fsname under sysfs.
+ */
+
+static inline struct xfs_mount *
+to_mp(struct kobject *kobject)
+{
+ struct xfs_kobj *kobj = to_kobj(kobject);
+
+ return container_of(kobj, struct xfs_mount, m_kobj);
+}
+
+#ifdef DEBUG
+
+STATIC ssize_t
+fail_writes_store(
+ struct kobject *kobject,
+ const char *buf,
+ size_t count)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+ int ret;
+ int val;
+
+ ret = kstrtoint(buf, 0, &val);
+ if (ret)
+ return ret;
+
+ if (val == 1)
+ mp->m_fail_writes = true;
+ else if (val == 0)
+ mp->m_fail_writes = false;
+ else
+ return -EINVAL;
+
+ return count;
+}
+
+STATIC ssize_t
+fail_writes_show(
+ struct kobject *kobject,
+ char *buf)
+{
+ struct xfs_mount *mp = to_mp(kobject);
+
+ return snprintf(buf, PAGE_SIZE, "%d\n", mp->m_fail_writes ? 1 : 0);
+}
+XFS_SYSFS_ATTR_RW(fail_writes);
+
+#endif /* DEBUG */
+
+static struct attribute *xfs_mp_attrs[] = {
+#ifdef DEBUG
+ ATTR_LIST(fail_writes),
+#endif
+ NULL,
+};
+
+struct kobj_type xfs_mp_ktype = {
+ .release = xfs_sysfs_release,
+ .sysfs_ops = &xfs_sysfs_ops,
+ .default_attrs = xfs_mp_attrs,
+};
+
#ifdef DEBUG
/* debug */
diff --git a/fs/xfs/xfs_trace.h b/fs/xfs/xfs_trace.h
index 391d797cb..c8d584260 100644
--- a/fs/xfs/xfs_trace.h
+++ b/fs/xfs/xfs_trace.h
@@ -1296,11 +1296,7 @@ DEFINE_IOMAP_EVENT(xfs_map_blocks_found);
DEFINE_IOMAP_EVENT(xfs_map_blocks_alloc);
DEFINE_IOMAP_EVENT(xfs_get_blocks_found);
DEFINE_IOMAP_EVENT(xfs_get_blocks_alloc);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_new);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_update);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_none);
-DEFINE_IOMAP_EVENT(xfs_gbmap_direct_endio);
+DEFINE_IOMAP_EVENT(xfs_get_blocks_map_direct);
DECLARE_EVENT_CLASS(xfs_simple_io_class,
TP_PROTO(struct xfs_inode *ip, xfs_off_t offset, ssize_t count),
@@ -1340,6 +1336,9 @@ DEFINE_SIMPLE_IO_EVENT(xfs_unwritten_convert);
DEFINE_SIMPLE_IO_EVENT(xfs_get_blocks_notfound);
DEFINE_SIMPLE_IO_EVENT(xfs_setfilesize);
DEFINE_SIMPLE_IO_EVENT(xfs_zero_eof);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_unwritten);
+DEFINE_SIMPLE_IO_EVENT(xfs_end_io_direct_write_append);
DECLARE_EVENT_CLASS(xfs_itrunc_class,
TP_PROTO(struct xfs_inode *ip, xfs_fsize_t new_size),
diff --git a/fs/xfs/xfs_trans.c b/fs/xfs/xfs_trans.c
index 748b16aff..20c53666c 100644
--- a/fs/xfs/xfs_trans.c
+++ b/fs/xfs/xfs_trans.c
@@ -1028,6 +1028,8 @@ __xfs_trans_roll(
struct xfs_trans_res tres;
int error;
+ *committed = 0;
+
/*
* Ensure that the inode is always logged.
*/
@@ -1082,6 +1084,6 @@ xfs_trans_roll(
struct xfs_trans **tpp,
struct xfs_inode *dp)
{
- int committed = 0;
+ int committed;
return __xfs_trans_roll(tpp, dp, &committed);
}
diff --git a/fs/xfs/xfs_trans.h b/fs/xfs/xfs_trans.h
index 4643070d7..e7c49cf43 100644
--- a/fs/xfs/xfs_trans.h
+++ b/fs/xfs/xfs_trans.h
@@ -133,7 +133,6 @@ typedef struct xfs_trans {
* XFS transaction mechanism exported interfaces that are
* actually macros.
*/
-#define xfs_trans_get_block_res(tp) ((tp)->t_blk_res)
#define xfs_trans_set_sync(tp) ((tp)->t_flags |= XFS_TRANS_SYNC)
#if defined(DEBUG) || defined(XFS_WARN)
diff --git a/fs/xfs/xfs_trans_ail.c b/fs/xfs/xfs_trans_ail.c
index 4f18fd92c..d6c9c3e9e 100644
--- a/fs/xfs/xfs_trans_ail.c
+++ b/fs/xfs/xfs_trans_ail.c
@@ -497,6 +497,7 @@ xfsaild(
long tout = 0; /* milliseconds */
current->flags |= PF_MEMALLOC;
+ set_freezable();
while (!kthread_should_stop()) {
if (tout && tout <= 20)
@@ -519,14 +520,14 @@ xfsaild(
if (!xfs_ail_min(ailp) &&
ailp->xa_target == ailp->xa_target_prev) {
spin_unlock(&ailp->xa_lock);
- schedule();
+ freezable_schedule();
tout = 0;
continue;
}
spin_unlock(&ailp->xa_lock);
if (tout)
- schedule_timeout(msecs_to_jiffies(tout));
+ freezable_schedule_timeout(msecs_to_jiffies(tout));
__set_current_state(TASK_RUNNING);
diff --git a/fs/xfs/xfs_trans_buf.c b/fs/xfs/xfs_trans_buf.c
index 757984128..8ee29ca13 100644
--- a/fs/xfs/xfs_trans_buf.c
+++ b/fs/xfs/xfs_trans_buf.c
@@ -155,7 +155,7 @@ xfs_trans_get_buf_map(
ASSERT(xfs_buf_islocked(bp));
if (XFS_FORCED_SHUTDOWN(tp->t_mountp)) {
xfs_buf_stale(bp);
- XFS_BUF_DONE(bp);
+ bp->b_flags |= XBF_DONE;
}
ASSERT(bp->b_transp == tp);
@@ -518,7 +518,7 @@ xfs_trans_log_buf(xfs_trans_t *tp,
* inside the b_bdstrat callback so that this won't get written to
* disk.
*/
- XFS_BUF_DONE(bp);
+ bp->b_flags |= XBF_DONE;
ASSERT(atomic_read(&bip->bli_refcount) > 0);
bp->b_iodone = xfs_buf_iodone_callbacks;
@@ -534,8 +534,8 @@ xfs_trans_log_buf(xfs_trans_t *tp,
*/
if (bip->bli_flags & XFS_BLI_STALE) {
bip->bli_flags &= ~XFS_BLI_STALE;
- ASSERT(XFS_BUF_ISSTALE(bp));
- XFS_BUF_UNSTALE(bp);
+ ASSERT(bp->b_flags & XBF_STALE);
+ bp->b_flags &= ~XBF_STALE;
bip->__bli_format.blf_flags &= ~XFS_BLF_CANCEL;
}
@@ -600,7 +600,7 @@ xfs_trans_binval(
* If the buffer is already invalidated, then
* just return.
*/
- ASSERT(XFS_BUF_ISSTALE(bp));
+ ASSERT(bp->b_flags & XBF_STALE);
ASSERT(!(bip->bli_flags & (XFS_BLI_LOGGED | XFS_BLI_DIRTY)));
ASSERT(!(bip->__bli_format.blf_flags & XFS_BLF_INODE_BUF));
ASSERT(!(bip->__bli_format.blf_flags & XFS_BLFT_MASK));
diff --git a/fs/xfs/xfs_trans_dquot.c b/fs/xfs/xfs_trans_dquot.c
index 995170194..c3d547211 100644
--- a/fs/xfs/xfs_trans_dquot.c
+++ b/fs/xfs/xfs_trans_dquot.c
@@ -609,17 +609,20 @@ xfs_trans_dqresv(
xfs_qcnt_t total_count;
xfs_qcnt_t *resbcountp;
xfs_quotainfo_t *q = mp->m_quotainfo;
+ struct xfs_def_quota *defq;
xfs_dqlock(dqp);
+ defq = xfs_get_defquota(dqp, q);
+
if (flags & XFS_TRANS_DQ_RES_BLKS) {
hardlimit = be64_to_cpu(dqp->q_core.d_blk_hardlimit);
if (!hardlimit)
- hardlimit = q->qi_bhardlimit;
+ hardlimit = defq->bhardlimit;
softlimit = be64_to_cpu(dqp->q_core.d_blk_softlimit);
if (!softlimit)
- softlimit = q->qi_bsoftlimit;
+ softlimit = defq->bsoftlimit;
timer = be32_to_cpu(dqp->q_core.d_btimer);
warns = be16_to_cpu(dqp->q_core.d_bwarns);
warnlimit = dqp->q_mount->m_quotainfo->qi_bwarnlimit;
@@ -628,10 +631,10 @@ xfs_trans_dqresv(
ASSERT(flags & XFS_TRANS_DQ_RES_RTBLKS);
hardlimit = be64_to_cpu(dqp->q_core.d_rtb_hardlimit);
if (!hardlimit)
- hardlimit = q->qi_rtbhardlimit;
+ hardlimit = defq->rtbhardlimit;
softlimit = be64_to_cpu(dqp->q_core.d_rtb_softlimit);
if (!softlimit)
- softlimit = q->qi_rtbsoftlimit;
+ softlimit = defq->rtbsoftlimit;
timer = be32_to_cpu(dqp->q_core.d_rtbtimer);
warns = be16_to_cpu(dqp->q_core.d_rtbwarns);
warnlimit = dqp->q_mount->m_quotainfo->qi_rtbwarnlimit;
@@ -672,10 +675,10 @@ xfs_trans_dqresv(
warnlimit = dqp->q_mount->m_quotainfo->qi_iwarnlimit;
hardlimit = be64_to_cpu(dqp->q_core.d_ino_hardlimit);
if (!hardlimit)
- hardlimit = q->qi_ihardlimit;
+ hardlimit = defq->ihardlimit;
softlimit = be64_to_cpu(dqp->q_core.d_ino_softlimit);
if (!softlimit)
- softlimit = q->qi_isoftlimit;
+ softlimit = defq->isoftlimit;
if (hardlimit && total_count > hardlimit) {
xfs_quota_warn(mp, dqp, QUOTA_NL_IHARDWARN);
diff --git a/fs/xfs/xfs_trans_inode.c b/fs/xfs/xfs_trans_inode.c
index b97f1df91..11a3af08b 100644
--- a/fs/xfs/xfs_trans_inode.c
+++ b/fs/xfs/xfs_trans_inode.c
@@ -75,18 +75,10 @@ xfs_trans_ichgtime(
tv = current_fs_time(inode->i_sb);
- if ((flags & XFS_ICHGTIME_MOD) &&
- !timespec_equal(&inode->i_mtime, &tv)) {
+ if (flags & XFS_ICHGTIME_MOD)
inode->i_mtime = tv;
- ip->i_d.di_mtime.t_sec = tv.tv_sec;
- ip->i_d.di_mtime.t_nsec = tv.tv_nsec;
- }
- if ((flags & XFS_ICHGTIME_CHG) &&
- !timespec_equal(&inode->i_ctime, &tv)) {
+ if (flags & XFS_ICHGTIME_CHG)
inode->i_ctime = tv;
- ip->i_d.di_ctime.t_sec = tv.tv_sec;
- ip->i_d.di_ctime.t_nsec = tv.tv_nsec;
- }
}
/*
@@ -125,7 +117,7 @@ xfs_trans_log_inode(
*/
if (!(ip->i_itemp->ili_item.li_desc->lid_flags & XFS_LID_DIRTY) &&
IS_I_VERSION(VFS_I(ip))) {
- ip->i_d.di_changecount = ++VFS_I(ip)->i_version;
+ VFS_I(ip)->i_version++;
flags |= XFS_ILOG_CORE;
}