summaryrefslogtreecommitdiff
path: root/Documentation/filesystems
diff options
context:
space:
mode:
Diffstat (limited to 'Documentation/filesystems')
-rw-r--r--Documentation/filesystems/Locking4
-rw-r--r--Documentation/filesystems/aufs/README383
-rw-r--r--Documentation/filesystems/aufs/design/01intro.txt170
-rw-r--r--Documentation/filesystems/aufs/design/02struct.txt258
-rw-r--r--Documentation/filesystems/aufs/design/03atomic_open.txt85
-rw-r--r--Documentation/filesystems/aufs/design/03lookup.txt113
-rw-r--r--Documentation/filesystems/aufs/design/04branch.txt74
-rw-r--r--Documentation/filesystems/aufs/design/05wbr_policy.txt64
-rw-r--r--Documentation/filesystems/aufs/design/06fhsm.txt120
-rw-r--r--Documentation/filesystems/aufs/design/06mmap.txt72
-rw-r--r--Documentation/filesystems/aufs/design/06xattr.txt96
-rw-r--r--Documentation/filesystems/aufs/design/07export.txt58
-rw-r--r--Documentation/filesystems/aufs/design/08shwh.txt52
-rw-r--r--Documentation/filesystems/aufs/design/10dynop.txt47
-rw-r--r--Documentation/filesystems/automount-support.txt51
-rw-r--r--Documentation/filesystems/caching/backend-api.txt23
-rw-r--r--Documentation/filesystems/caching/fscache.txt7
-rw-r--r--Documentation/filesystems/dax.txt6
-rw-r--r--Documentation/filesystems/nfs/knfsd-stats.txt44
-rw-r--r--Documentation/filesystems/porting29
-rw-r--r--Documentation/filesystems/proc.txt3
-rw-r--r--Documentation/filesystems/quota.txt5
-rw-r--r--Documentation/filesystems/vfs.txt39
-rw-r--r--Documentation/filesystems/xfs.txt12
24 files changed, 112 insertions, 1703 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 0a926e2ba..6a34a0f4d 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -50,8 +50,8 @@ prototypes:
int (*rename2) (struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
- void * (*follow_link) (struct dentry *, struct nameidata *);
- void (*put_link) (struct dentry *, struct nameidata *, void *);
+ const char *(*follow_link) (struct dentry *, void **);
+ void (*put_link) (struct inode *, void *);
void (*truncate) (struct inode *);
int (*permission) (struct inode *, int, unsigned int);
int (*get_acl)(struct inode *, int);
diff --git a/Documentation/filesystems/aufs/README b/Documentation/filesystems/aufs/README
deleted file mode 100644
index 27faa06fa..000000000
--- a/Documentation/filesystems/aufs/README
+++ /dev/null
@@ -1,383 +0,0 @@
-
-Aufs4 -- advanced multi layered unification filesystem version 4.x
-http://aufs.sf.net
-Junjiro R. Okajima
-
-
-0. Introduction
-----------------------------------------
-In the early days, aufs was entirely re-designed and re-implemented
-Unionfs Version 1.x series. Adding many original ideas, approaches,
-improvements and implementations, it becomes totally different from
-Unionfs while keeping the basic features.
-Recently, Unionfs Version 2.x series begin taking some of the same
-approaches to aufs1's.
-Unionfs is being developed by Professor Erez Zadok at Stony Brook
-University and his team.
-
-Aufs4 supports linux-4.0 and later, and for linux-3.x series try aufs3.
-If you want older kernel version support, try aufs2-2.6.git or
-aufs2-standalone.git repository, aufs1 from CVS on SourceForge.
-
-Note: it becomes clear that "Aufs was rejected. Let's give it up."
- According to Christoph Hellwig, linux rejects all union-type
- filesystems but UnionMount.
-<http://marc.info/?l=linux-kernel&m=123938533724484&w=2>
-
-PS. Al Viro seems have a plan to merge aufs as well as overlayfs and
- UnionMount, and he pointed out an issue around a directory mutex
- lock and aufs addressed it. But it is still unsure whether aufs will
- be merged (or any other union solution).
-<http://marc.info/?l=linux-kernel&m=136312705029295&w=1>
-
-
-1. Features
-----------------------------------------
-- unite several directories into a single virtual filesystem. The member
- directory is called as a branch.
-- you can specify the permission flags to the branch, which are 'readonly',
- 'readwrite' and 'whiteout-able.'
-- by upper writable branch, internal copyup and whiteout, files/dirs on
- readonly branch are modifiable logically.
-- dynamic branch manipulation, add, del.
-- etc...
-
-Also there are many enhancements in aufs, such as:
-- test only the highest one for the directory permission (dirperm1)
-- copyup on open (coo=)
-- 'move' policy for copy-up between two writable branches, after
- checking free space.
-- xattr, acl
-- readdir(3) in userspace.
-- keep inode number by external inode number table
-- keep the timestamps of file/dir in internal copyup operation
-- seekable directory, supporting NFS readdir.
-- whiteout is hardlinked in order to reduce the consumption of inodes
- on branch
-- do not copyup, nor create a whiteout when it is unnecessary
-- revert a single systemcall when an error occurs in aufs
-- remount interface instead of ioctl
-- maintain /etc/mtab by an external command, /sbin/mount.aufs.
-- loopback mounted filesystem as a branch
-- kernel thread for removing the dir who has a plenty of whiteouts
-- support copyup sparse file (a file which has a 'hole' in it)
-- default permission flags for branches
-- selectable permission flags for ro branch, whether whiteout can
- exist or not
-- export via NFS.
-- support <sysfs>/fs/aufs and <debugfs>/aufs.
-- support multiple writable branches, some policies to select one
- among multiple writable branches.
-- a new semantics for link(2) and rename(2) to support multiple
- writable branches.
-- no glibc changes are required.
-- pseudo hardlink (hardlink over branches)
-- allow a direct access manually to a file on branch, e.g. bypassing aufs.
- including NFS or remote filesystem branch.
-- userspace wrapper for pathconf(3)/fpathconf(3) with _PC_LINK_MAX.
-- and more...
-
-Currently these features are dropped temporary from aufs4.
-See design/08plan.txt in detail.
-- nested mount, i.e. aufs as readonly no-whiteout branch of another aufs
- (robr)
-- statistics of aufs thread (/sys/fs/aufs/stat)
-
-Features or just an idea in the future (see also design/*.txt),
-- reorder the branch index without del/re-add.
-- permanent xino files for NFSD
-- an option for refreshing the opened files after add/del branches
-- light version, without branch manipulation. (unnecessary?)
-- copyup in userspace
-- inotify in userspace
-- readv/writev
-
-
-2. Download
-----------------------------------------
-There are three GIT trees for aufs4, aufs4-linux.git,
-aufs4-standalone.git, and aufs-util.git. Note that there is no "4" in
-"aufs-util.git."
-While the aufs-util is always necessary, you need either of aufs4-linux
-or aufs4-standalone.
-
-The aufs4-linux tree includes the whole linux mainline GIT tree,
-git://git.kernel.org/.../torvalds/linux.git.
-And you cannot select CONFIG_AUFS_FS=m for this version, eg. you cannot
-build aufs4 as an external kernel module.
-Several extra patches are not included in this tree. Only
-aufs4-standalone tree contains them. They are describe in the later
-section "Configuration and Compilation."
-
-On the other hand, the aufs4-standalone tree has only aufs source files
-and necessary patches, and you can select CONFIG_AUFS_FS=m.
-But you need to apply all aufs patches manually.
-
-You will find GIT branches whose name is in form of "aufs4.x" where "x"
-represents the linux kernel version, "linux-4.x". For instance,
-"aufs4.0" is for linux-4.0. For latest "linux-4.x-rcN", use
-"aufs4.x-rcN" branch.
-
-o aufs4-linux tree
-$ git clone --reference /your/linux/git/tree \
- git://github.com/sfjro/aufs4-linux.git aufs4-linux.git
-- if you don't have linux GIT tree, then remove "--reference ..."
-$ cd aufs4-linux.git
-$ git checkout origin/aufs4.0
-
-Or You may want to directly git-pull aufs into your linux GIT tree, and
-leave the patch-work to GIT.
-$ cd /your/linux/git/tree
-$ git remote add aufs4 git://github.com/sfjro/aufs4-linux.git
-$ git fetch aufs4
-$ git checkout -b my4.0 v4.0
-$ (add your local change...)
-$ git pull aufs4 aufs4.0
-- now you have v4.0 + your_changes + aufs4.0 in you my4.0 branch.
-- you may need to solve some conflicts between your_changes and
- aufs4.0. in this case, git-rerere is recommended so that you can
- solve the similar conflicts automatically when you upgrade to 4.1 or
- later in the future.
-
-o aufs4-standalone tree
-$ git clone git://github.com/sfjro/aufs4-standalone.git aufs4-standalone.git
-$ cd aufs4-standalone.git
-$ git checkout origin/aufs4.0
-
-o aufs-util tree
-$ git clone git://git.code.sf.net/p/aufs/aufs-util aufs-util.git
-- note that the public aufs-util.git is on SourceForge instead of
- GitHUB.
-$ cd aufs-util.git
-$ git checkout origin/aufs4.0
-
-Note: The 4.x-rcN branch is to be used with `rc' kernel versions ONLY.
-The minor version number, 'x' in '4.x', of aufs may not always
-follow the minor version number of the kernel.
-Because changes in the kernel that cause the use of a new
-minor version number do not always require changes to aufs-util.
-
-Since aufs-util has its own minor version number, you may not be
-able to find a GIT branch in aufs-util for your kernel's
-exact minor version number.
-In this case, you should git-checkout the branch for the
-nearest lower number.
-
-For (an unreleased) example:
-If you are using "linux-4.10" and the "aufs4.10" branch
-does not exist in aufs-util repository, then "aufs4.9", "aufs4.8"
-or something numerically smaller is the branch for your kernel.
-
-Also you can view all branches by
- $ git branch -a
-
-
-3. Configuration and Compilation
-----------------------------------------
-Make sure you have git-checkout'ed the correct branch.
-
-For aufs4-linux tree,
-- enable CONFIG_AUFS_FS.
-- set other aufs configurations if necessary.
-
-For aufs4-standalone tree,
-There are several ways to build.
-
-1.
-- apply ./aufs4-kbuild.patch to your kernel source files.
-- apply ./aufs4-base.patch too.
-- apply ./aufs4-mmap.patch too.
-- apply ./aufs4-standalone.patch too, if you have a plan to set
- CONFIG_AUFS_FS=m. otherwise you don't need ./aufs4-standalone.patch.
-- copy ./{Documentation,fs,include/uapi/linux/aufs_type.h} files to your
- kernel source tree. Never copy $PWD/include/uapi/linux/Kbuild.
-- enable CONFIG_AUFS_FS, you can select either
- =m or =y.
-- and build your kernel as usual.
-- install the built kernel.
- Note: Since linux-3.9, every filesystem module requires an alias
- "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
- modules.aliases file if you set CONFIG_AUFS_FS=m.
-- install the header files too by "make headers_install" to the
- directory where you specify. By default, it is $PWD/usr.
- "make help" shows a brief note for headers_install.
-- and reboot your system.
-
-2.
-- module only (CONFIG_AUFS_FS=m).
-- apply ./aufs4-base.patch to your kernel source files.
-- apply ./aufs4-mmap.patch too.
-- apply ./aufs4-standalone.patch too.
-- build your kernel, don't forget "make headers_install", and reboot.
-- edit ./config.mk and set other aufs configurations if necessary.
- Note: You should read $PWD/fs/aufs/Kconfig carefully which describes
- every aufs configurations.
-- build the module by simple "make".
- Note: Since linux-3.9, every filesystem module requires an alias
- "fs-<fsname>". You should make sure that "fs-aufs" is listed in your
- modules.aliases file.
-- you can specify ${KDIR} make variable which points to your kernel
- source tree.
-- install the files
- + run "make install" to install the aufs module, or copy the built
- $PWD/aufs.ko to /lib/modules/... and run depmod -a (or reboot simply).
- + run "make install_headers" (instead of headers_install) to install
- the modified aufs header file (you can specify DESTDIR which is
- available in aufs standalone version's Makefile only), or copy
- $PWD/usr/include/linux/aufs_type.h to /usr/include/linux or wherever
- you like manually. By default, the target directory is $PWD/usr.
-- no need to apply aufs4-kbuild.patch, nor copying source files to your
- kernel source tree.
-
-Note: The header file aufs_type.h is necessary to build aufs-util
- as well as "make headers_install" in the kernel source tree.
- headers_install is subject to be forgotten, but it is essentially
- necessary, not only for building aufs-util.
- You may not meet problems without headers_install in some older
- version though.
-
-And then,
-- read README in aufs-util, build and install it
-- note that your distribution may contain an obsoleted version of
- aufs_type.h in /usr/include/linux or something. When you build aufs
- utilities, make sure that your compiler refers the correct aufs header
- file which is built by "make headers_install."
-- if you want to use readdir(3) in userspace or pathconf(3) wrapper,
- then run "make install_ulib" too. And refer to the aufs manual in
- detail.
-
-There several other patches in aufs4-standalone.git. They are all
-optional. When you meet some problems, they will help you.
-- aufs4-loopback.patch
- Supports a nested loopback mount in a branch-fs. This patch is
- unnecessary until aufs produces a message like "you may want to try
- another patch for loopback file".
-- vfs-ino.patch
- Modifies a system global kernel internal function get_next_ino() in
- order to stop assigning 0 for an inode-number. Not directly related to
- aufs, but recommended generally.
-- tmpfs-idr.patch
- Keeps the tmpfs inode number as the lowest value. Effective to reduce
- the size of aufs XINO files for tmpfs branch. Also it prevents the
- duplication of inode number, which is important for backup tools and
- other utilities. When you find aufs XINO files for tmpfs branch
- growing too much, try this patch.
-
-
-4. Usage
-----------------------------------------
-At first, make sure aufs-util are installed, and please read the aufs
-manual, aufs.5 in aufs-util.git tree.
-$ man -l aufs.5
-
-And then,
-$ mkdir /tmp/rw /tmp/aufs
-# mount -t aufs -o br=/tmp/rw:${HOME} none /tmp/aufs
-
-Here is another example. The result is equivalent.
-# mount -t aufs -o br=/tmp/rw=rw:${HOME}=ro none /tmp/aufs
- Or
-# mount -t aufs -o br:/tmp/rw none /tmp/aufs
-# mount -o remount,append:${HOME} /tmp/aufs
-
-Then, you can see whole tree of your home dir through /tmp/aufs. If
-you modify a file under /tmp/aufs, the one on your home directory is
-not affected, instead the same named file will be newly created under
-/tmp/rw. And all of your modification to a file will be applied to
-the one under /tmp/rw. This is called the file based Copy on Write
-(COW) method.
-Aufs mount options are described in aufs.5.
-If you run chroot or something and make your aufs as a root directory,
-then you need to customize the shutdown script. See the aufs manual in
-detail.
-
-Additionally, there are some sample usages of aufs which are a
-diskless system with network booting, and LiveCD over NFS.
-See sample dir in CVS tree on SourceForge.
-
-
-5. Contact
-----------------------------------------
-When you have any problems or strange behaviour in aufs, please let me
-know with:
-- /proc/mounts (instead of the output of mount(8))
-- /sys/module/aufs/*
-- /sys/fs/aufs/* (if you have them)
-- /debug/aufs/* (if you have them)
-- linux kernel version
- if your kernel is not plain, for example modified by distributor,
- the url where i can download its source is necessary too.
-- aufs version which was printed at loading the module or booting the
- system, instead of the date you downloaded.
-- configuration (define/undefine CONFIG_AUFS_xxx)
-- kernel configuration or /proc/config.gz (if you have it)
-- behaviour which you think to be incorrect
-- actual operation, reproducible one is better
-- mailto: aufs-users at lists.sourceforge.net
-
-Usually, I don't watch the Public Areas(Bugs, Support Requests, Patches,
-and Feature Requests) on SourceForge. Please join and write to
-aufs-users ML.
-
-
-6. Acknowledgements
-----------------------------------------
-Thanks to everyone who have tried and are using aufs, whoever
-have reported a bug or any feedback.
-
-Especially donators:
-Tomas Matejicek(slax.org) made a donation (much more than once).
- Since Apr 2010, Tomas M (the author of Slax and Linux Live
- scripts) is making "doubling" donations.
- Unfortunately I cannot list all of the donators, but I really
- appreciate.
- It ends Aug 2010, but the ordinary donation URL is still available.
- <http://sourceforge.net/donate/index.php?group_id=167503>
-Dai Itasaka made a donation (2007/8).
-Chuck Smith made a donation (2008/4, 10 and 12).
-Henk Schoneveld made a donation (2008/9).
-Chih-Wei Huang, ASUS, CTC donated Eee PC 4G (2008/10).
-Francois Dupoux made a donation (2008/11).
-Bruno Cesar Ribas and Luis Carlos Erpen de Bona, C3SL serves public
- aufs2 GIT tree (2009/2).
-William Grant made a donation (2009/3).
-Patrick Lane made a donation (2009/4).
-The Mail Archive (mail-archive.com) made donations (2009/5).
-Nippy Networks (Ed Wildgoose) made a donation (2009/7).
-New Dream Network, LLC (www.dreamhost.com) made a donation (2009/11).
-Pavel Pronskiy made a donation (2011/2).
-Iridium and Inmarsat satellite phone retailer (www.mailasail.com), Nippy
- Networks (Ed Wildgoose) made a donation for hardware (2011/3).
-Max Lekomcev (DOM-TV project) made a donation (2011/7, 12, 2012/3, 6 and
-11).
-Sam Liddicott made a donation (2011/9).
-Era Scarecrow made a donation (2013/4).
-Bor Ratajc made a donation (2013/4).
-Alessandro Gorreta made a donation (2013/4).
-POIRETTE Marc made a donation (2013/4).
-Alessandro Gorreta made a donation (2013/4).
-lauri kasvandik made a donation (2013/5).
-"pemasu from Finland" made a donation (2013/7).
-The Parted Magic Project made a donation (2013/9 and 11).
-Pavel Barta made a donation (2013/10).
-Nikolay Pertsev made a donation (2014/5).
-James B made a donation (2014/7).
-Stefano Di Biase made a donation (2014/8).
-Daniel Epellei made a donation (2015/1).
-
-Thank you very much.
-Donations are always, including future donations, very important and
-helpful for me to keep on developing aufs.
-
-
-7.
-----------------------------------------
-If you are an experienced user, no explanation is needed. Aufs is
-just a linux filesystem.
-
-
-Enjoy!
-
-# Local variables: ;
-# mode: text;
-# End: ;
diff --git a/Documentation/filesystems/aufs/design/01intro.txt b/Documentation/filesystems/aufs/design/01intro.txt
deleted file mode 100644
index a0194fe21..000000000
--- a/Documentation/filesystems/aufs/design/01intro.txt
+++ /dev/null
@@ -1,170 +0,0 @@
-
-# Copyright (C) 2005-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Introduction
-----------------------------------------
-
-aufs [ei ju: ef es] | [a u f s]
-1. abbrev. for "advanced multi-layered unification filesystem".
-2. abbrev. for "another unionfs".
-3. abbrev. for "auf das" in German which means "on the" in English.
- Ex. "Butter aufs Brot"(G) means "butter onto bread"(E).
- But "Filesystem aufs Filesystem" is hard to understand.
-
-AUFS is a filesystem with features:
-- multi layered stackable unification filesystem, the member directory
- is called as a branch.
-- branch permission and attribute, 'readonly', 'real-readonly',
- 'readwrite', 'whiteout-able', 'link-able whiteout', etc. and their
- combination.
-- internal "file copy-on-write".
-- logical deletion, whiteout.
-- dynamic branch manipulation, adding, deleting and changing permission.
-- allow bypassing aufs, user's direct branch access.
-- external inode number translation table and bitmap which maintains the
- persistent aufs inode number.
-- seekable directory, including NFS readdir.
-- file mapping, mmap and sharing pages.
-- pseudo-link, hardlink over branches.
-- loopback mounted filesystem as a branch.
-- several policies to select one among multiple writable branches.
-- revert a single systemcall when an error occurs in aufs.
-- and more...
-
-
-Multi Layered Stackable Unification Filesystem
-----------------------------------------------------------------------
-Most people already knows what it is.
-It is a filesystem which unifies several directories and provides a
-merged single directory. When users access a file, the access will be
-passed/re-directed/converted (sorry, I am not sure which English word is
-correct) to the real file on the member filesystem. The member
-filesystem is called 'lower filesystem' or 'branch' and has a mode
-'readonly' and 'readwrite.' And the deletion for a file on the lower
-readonly branch is handled by creating 'whiteout' on the upper writable
-branch.
-
-On LKML, there have been discussions about UnionMount (Jan Blunck,
-Bharata B Rao and Valerie Aurora) and Unionfs (Erez Zadok). They took
-different approaches to implement the merged-view.
-The former tries putting it into VFS, and the latter implements as a
-separate filesystem.
-(If I misunderstand about these implementations, please let me know and
-I shall correct it. Because it is a long time ago when I read their
-source files last time).
-
-UnionMount's approach will be able to small, but may be hard to share
-branches between several UnionMount since the whiteout in it is
-implemented in the inode on branch filesystem and always
-shared. According to Bharata's post, readdir does not seems to be
-finished yet.
-There are several missing features known in this implementations such as
-- for users, the inode number may change silently. eg. copy-up.
-- link(2) may break by copy-up.
-- read(2) may get an obsoleted filedata (fstat(2) too).
-- fcntl(F_SETLK) may be broken by copy-up.
-- unnecessary copy-up may happen, for example mmap(MAP_PRIVATE) after
- open(O_RDWR).
-
-In linux-3.18, "overlay" filesystem (formerly known as "overlayfs") was
-merged into mainline. This is another implementation of UnionMount as a
-separated filesystem. All the limitations and known problems which
-UnionMount are equally inherited to "overlay" filesystem.
-
-Unionfs has a longer history. When I started implementing a stackable
-filesystem (Aug 2005), it already existed. It has virtual super_block,
-inode, dentry and file objects and they have an array pointing lower
-same kind objects. After contributing many patches for Unionfs, I
-re-started my project AUFS (Jun 2006).
-
-In AUFS, the structure of filesystem resembles to Unionfs, but I
-implemented my own ideas, approaches and enhancements and it became
-totally different one.
-
-Comparing DM snapshot and fs based implementation
-- the number of bytes to be copied between devices is much smaller.
-- the type of filesystem must be one and only.
-- the fs must be writable, no readonly fs, even for the lower original
- device. so the compression fs will not be usable. but if we use
- loopback mount, we may address this issue.
- for instance,
- mount /cdrom/squashfs.img /sq
- losetup /sq/ext2.img
- losetup /somewhere/cow
- dmsetup "snapshot /dev/loop0 /dev/loop1 ..."
-- it will be difficult (or needs more operations) to extract the
- difference between the original device and COW.
-- DM snapshot-merge may help a lot when users try merging. in the
- fs-layer union, users will use rsync(1).
-
-You may want to read my old paper "Filesystems in LiveCD"
-(http://aufs.sourceforge.net/aufs2/report/sq/sq.pdf).
-
-
-Several characters/aspects/persona of aufs
-----------------------------------------------------------------------
-
-Aufs has several characters, aspects or persona.
-1. a filesystem, callee of VFS helper
-2. sub-VFS, caller of VFS helper for branches
-3. a virtual filesystem which maintains persistent inode number
-4. reader/writer of files on branches such like an application
-
-1. Callee of VFS Helper
-As an ordinary linux filesystem, aufs is a callee of VFS. For instance,
-unlink(2) from an application reaches sys_unlink() kernel function and
-then vfs_unlink() is called. vfs_unlink() is one of VFS helper and it
-calls filesystem specific unlink operation. Actually aufs implements the
-unlink operation but it behaves like a redirector.
-
-2. Caller of VFS Helper for Branches
-aufs_unlink() passes the unlink request to the branch filesystem as if
-it were called from VFS. So the called unlink operation of the branch
-filesystem acts as usual. As a caller of VFS helper, aufs should handle
-every necessary pre/post operation for the branch filesystem.
-- acquire the lock for the parent dir on a branch
-- lookup in a branch
-- revalidate dentry on a branch
-- mnt_want_write() for a branch
-- vfs_unlink() for a branch
-- mnt_drop_write() for a branch
-- release the lock on a branch
-
-3. Persistent Inode Number
-One of the most important issue for a filesystem is to maintain inode
-numbers. This is particularly important to support exporting a
-filesystem via NFS. Aufs is a virtual filesystem which doesn't have a
-backend block device for its own. But some storage is necessary to
-keep and maintain the inode numbers. It may be a large space and may not
-suit to keep in memory. Aufs rents some space from its first writable
-branch filesystem (by default) and creates file(s) on it. These files
-are created by aufs internally and removed soon (currently) keeping
-opened.
-Note: Because these files are removed, they are totally gone after
- unmounting aufs. It means the inode numbers are not persistent
- across unmount or reboot. I have a plan to make them really
- persistent which will be important for aufs on NFS server.
-
-4. Read/Write Files Internally (copy-on-write)
-Because a branch can be readonly, when you write a file on it, aufs will
-"copy-up" it to the upper writable branch internally. And then write the
-originally requested thing to the file. Generally kernel doesn't
-open/read/write file actively. In aufs, even a single write may cause a
-internal "file copy". This behaviour is very similar to cp(1) command.
-
-Some people may think it is better to pass such work to user space
-helper, instead of doing in kernel space. Actually I am still thinking
-about it. But currently I have implemented it in kernel space.
diff --git a/Documentation/filesystems/aufs/design/02struct.txt b/Documentation/filesystems/aufs/design/02struct.txt
deleted file mode 100644
index b53a9778b..000000000
--- a/Documentation/filesystems/aufs/design/02struct.txt
+++ /dev/null
@@ -1,258 +0,0 @@
-
-# Copyright (C) 2005-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Basic Aufs Internal Structure
-
-Superblock/Inode/Dentry/File Objects
-----------------------------------------------------------------------
-As like an ordinary filesystem, aufs has its own
-superblock/inode/dentry/file objects. All these objects have a
-dynamically allocated array and store the same kind of pointers to the
-lower filesystem, branch.
-For example, when you build a union with one readwrite branch and one
-readonly, mounted /au, /rw and /ro respectively.
-- /au = /rw + /ro
-- /ro/fileA exists but /rw/fileA
-
-Aufs lookup operation finds /ro/fileA and gets dentry for that. These
-pointers are stored in a aufs dentry. The array in aufs dentry will be,
-- [0] = NULL (because /rw/fileA doesn't exist)
-- [1] = /ro/fileA
-
-This style of an array is essentially same to the aufs
-superblock/inode/dentry/file objects.
-
-Because aufs supports manipulating branches, ie. add/delete/change
-branches dynamically, these objects has its own generation. When
-branches are changed, the generation in aufs superblock is
-incremented. And a generation in other object are compared when it is
-accessed. When a generation in other objects are obsoleted, aufs
-refreshes the internal array.
-
-
-Superblock
-----------------------------------------------------------------------
-Additionally aufs superblock has some data for policies to select one
-among multiple writable branches, XIB files, pseudo-links and kobject.
-See below in detail.
-About the policies which supports copy-down a directory, see
-wbr_policy.txt too.
-
-
-Branch and XINO(External Inode Number Translation Table)
-----------------------------------------------------------------------
-Every branch has its own xino (external inode number translation table)
-file. The xino file is created and unlinked by aufs internally. When two
-members of a union exist on the same filesystem, they share the single
-xino file.
-The struct of a xino file is simple, just a sequence of aufs inode
-numbers which is indexed by the lower inode number.
-In the above sample, assume the inode number of /ro/fileA is i111 and
-aufs assigns the inode number i999 for fileA. Then aufs writes 999 as
-4(8) bytes at 111 * 4(8) bytes offset in the xino file.
-
-When the inode numbers are not contiguous, the xino file will be sparse
-which has a hole in it and doesn't consume as much disk space as it
-might appear. If your branch filesystem consumes disk space for such
-holes, then you should specify 'xino=' option at mounting aufs.
-
-Aufs has a mount option to free the disk blocks for such holes in XINO
-files on tmpfs or ramdisk. But it is not so effective actually. If you
-meet a problem of disk shortage due to XINO files, then you should try
-"tmpfs-ino.patch" (and "vfs-ino.patch" too) in aufs4-standalone.git.
-The patch localizes the assignment inumbers per tmpfs-mount and avoid
-the holes in XINO files.
-
-Also a writable branch has three kinds of "whiteout bases". All these
-are existed when the branch is joined to aufs, and their names are
-whiteout-ed doubly, so that users will never see their names in aufs
-hierarchy.
-1. a regular file which will be hardlinked to all whiteouts.
-2. a directory to store a pseudo-link.
-3. a directory to store an "orphan"-ed file temporary.
-
-1. Whiteout Base
- When you remove a file on a readonly branch, aufs handles it as a
- logical deletion and creates a whiteout on the upper writable branch
- as a hardlink of this file in order not to consume inode on the
- writable branch.
-2. Pseudo-link Dir
- See below, Pseudo-link.
-3. Step-Parent Dir
- When "fileC" exists on the lower readonly branch only and it is
- opened and removed with its parent dir, and then user writes
- something into it, then aufs copies-up fileC to this
- directory. Because there is no other dir to store fileC. After
- creating a file under this dir, the file is unlinked.
-
-Because aufs supports manipulating branches, ie. add/delete/change
-dynamically, a branch has its own id. When the branch order changes,
-aufs finds the new index by searching the branch id.
-
-
-Pseudo-link
-----------------------------------------------------------------------
-Assume "fileA" exists on the lower readonly branch only and it is
-hardlinked to "fileB" on the branch. When you write something to fileA,
-aufs copies-up it to the upper writable branch. Additionally aufs
-creates a hardlink under the Pseudo-link Directory of the writable
-branch. The inode of a pseudo-link is kept in aufs super_block as a
-simple list. If fileB is read after unlinking fileA, aufs returns
-filedata from the pseudo-link instead of the lower readonly
-branch. Because the pseudo-link is based upon the inode, to keep the
-inode number by xino (see above) is essentially necessary.
-
-All the hardlinks under the Pseudo-link Directory of the writable branch
-should be restored in a proper location later. Aufs provides a utility
-to do this. The userspace helpers executed at remounting and unmounting
-aufs by default.
-During this utility is running, it puts aufs into the pseudo-link
-maintenance mode. In this mode, only the process which began the
-maintenance mode (and its child processes) is allowed to operate in
-aufs. Some other processes which are not related to the pseudo-link will
-be allowed to run too, but the rest have to return an error or wait
-until the maintenance mode ends. If a process already acquires an inode
-mutex (in VFS), it has to return an error.
-
-
-XIB(external inode number bitmap)
-----------------------------------------------------------------------
-Addition to the xino file per a branch, aufs has an external inode number
-bitmap in a superblock object. It is also an internal file such like a
-xino file.
-It is a simple bitmap to mark whether the aufs inode number is in-use or
-not.
-To reduce the file I/O, aufs prepares a single memory page to cache xib.
-
-As well as XINO files, aufs has a feature to truncate/refresh XIB to
-reduce the number of consumed disk blocks for these files.
-
-
-Virtual or Vertical Dir, and Readdir in Userspace
-----------------------------------------------------------------------
-In order to support multiple layers (branches), aufs readdir operation
-constructs a virtual dir block on memory. For readdir, aufs calls
-vfs_readdir() internally for each dir on branches, merges their entries
-with eliminating the whiteout-ed ones, and sets it to file (dir)
-object. So the file object has its entry list until it is closed. The
-entry list will be updated when the file position is zero and becomes
-obsoleted. This decision is made in aufs automatically.
-
-The dynamically allocated memory block for the name of entries has a
-unit of 512 bytes (by default) and stores the names contiguously (no
-padding). Another block for each entry is handled by kmem_cache too.
-During building dir blocks, aufs creates hash list and judging whether
-the entry is whiteouted by its upper branch or already listed.
-The merged result is cached in the corresponding inode object and
-maintained by a customizable life-time option.
-
-Some people may call it can be a security hole or invite DoS attack
-since the opened and once readdir-ed dir (file object) holds its entry
-list and becomes a pressure for system memory. But I'd say it is similar
-to files under /proc or /sys. The virtual files in them also holds a
-memory page (generally) while they are opened. When an idea to reduce
-memory for them is introduced, it will be applied to aufs too.
-For those who really hate this situation, I've developed readdir(3)
-library which operates this merging in userspace. You just need to set
-LD_PRELOAD environment variable, and aufs will not consume no memory in
-kernel space for readdir(3).
-
-
-Workqueue
-----------------------------------------------------------------------
-Aufs sometimes requires privilege access to a branch. For instance,
-in copy-up/down operation. When a user process is going to make changes
-to a file which exists in the lower readonly branch only, and the mode
-of one of ancestor directories may not be writable by a user
-process. Here aufs copy-up the file with its ancestors and they may
-require privilege to set its owner/group/mode/etc.
-This is a typical case of a application character of aufs (see
-Introduction).
-
-Aufs uses workqueue synchronously for this case. It creates its own
-workqueue. The workqueue is a kernel thread and has privilege. Aufs
-passes the request to call mkdir or write (for example), and wait for
-its completion. This approach solves a problem of a signal handler
-simply.
-If aufs didn't adopt the workqueue and changed the privilege of the
-process, then the process may receive the unexpected SIGXFSZ or other
-signals.
-
-Also aufs uses the system global workqueue ("events" kernel thread) too
-for asynchronous tasks, such like handling inotify/fsnotify, re-creating a
-whiteout base and etc. This is unrelated to a privilege.
-Most of aufs operation tries acquiring a rw_semaphore for aufs
-superblock at the beginning, at the same time waits for the completion
-of all queued asynchronous tasks.
-
-
-Whiteout
-----------------------------------------------------------------------
-The whiteout in aufs is very similar to Unionfs's. That is represented
-by its filename. UnionMount takes an approach of a file mode, but I am
-afraid several utilities (find(1) or something) will have to support it.
-
-Basically the whiteout represents "logical deletion" which stops aufs to
-lookup further, but also it represents "dir is opaque" which also stop
-further lookup.
-
-In aufs, rmdir(2) and rename(2) for dir uses whiteout alternatively.
-In order to make several functions in a single systemcall to be
-revertible, aufs adopts an approach to rename a directory to a temporary
-unique whiteouted name.
-For example, in rename(2) dir where the target dir already existed, aufs
-renames the target dir to a temporary unique whiteouted name before the
-actual rename on a branch, and then handles other actions (make it opaque,
-update the attributes, etc). If an error happens in these actions, aufs
-simply renames the whiteouted name back and returns an error. If all are
-succeeded, aufs registers a function to remove the whiteouted unique
-temporary name completely and asynchronously to the system global
-workqueue.
-
-
-Copy-up
-----------------------------------------------------------------------
-It is a well-known feature or concept.
-When user modifies a file on a readonly branch, aufs operate "copy-up"
-internally and makes change to the new file on the upper writable branch.
-When the trigger systemcall does not update the timestamps of the parent
-dir, aufs reverts it after copy-up.
-
-
-Move-down (aufs3.9 and later)
-----------------------------------------------------------------------
-"Copy-up" is one of the essential feature in aufs. It copies a file from
-the lower readonly branch to the upper writable branch when a user
-changes something about the file.
-"Move-down" is an opposite action of copy-up. Basically this action is
-ran manually instead of automatically and internally.
-For desgin and implementation, aufs has to consider these issues.
-- whiteout for the file may exist on the lower branch.
-- ancestor directories may not exist on the lower branch.
-- diropq for the ancestor directories may exist on the upper branch.
-- free space on the lower branch will reduce.
-- another access to the file may happen during moving-down, including
- UDBA (see "Revalidate Dentry and UDBA").
-- the file should not be hard-linked nor pseudo-linked. they should be
- handled by auplink utility later.
-
-Sometimes users want to move-down a file from the upper writable branch
-to the lower readonly or writable branch. For instance,
-- the free space of the upper writable branch is going to run out.
-- create a new intermediate branch between the upper and lower branch.
-- etc.
-
-For this purpose, use "aumvdown" command in aufs-util.git.
diff --git a/Documentation/filesystems/aufs/design/03atomic_open.txt b/Documentation/filesystems/aufs/design/03atomic_open.txt
deleted file mode 100644
index 974b524f7..000000000
--- a/Documentation/filesystems/aufs/design/03atomic_open.txt
+++ /dev/null
@@ -1,85 +0,0 @@
-
-# Copyright (C) 2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Support for a branch who has its ->atomic_open()
-----------------------------------------------------------------------
-The filesystems who implement its ->atomic_open() are not majority. For
-example NFSv4 does, and aufs should call NFSv4 ->atomic_open,
-particularly for open(O_CREAT|O_EXCL, 0400) case. Other than
-->atomic_open(), NFSv4 returns an error for this open(2). While I am not
-sure whether all filesystems who have ->atomic_open() behave like this,
-but NFSv4 surely returns the error.
-
-In order to support ->atomic_open() for aufs, there are a few
-approaches.
-
-A. Introduce aufs_atomic_open()
- - calls one of VFS:do_last(), lookup_open() or atomic_open() for
- branch fs.
-B. Introduce aufs_atomic_open() calling create, open and chmod. this is
- an aufs user Pip Cet's approach
- - calls aufs_create(), VFS finish_open() and notify_change().
- - pass fake-mode to finish_open(), and then correct the mode by
- notify_change().
-C. Extend aufs_open() to call branch fs's ->atomic_open()
- - no aufs_atomic_open().
- - aufs_lookup() registers the TID to an aufs internal object.
- - aufs_create() does nothing when the matching TID is registered, but
- registers the mode.
- - aufs_open() calls branch fs's ->atomic_open() when the matching
- TID is registered.
-D. Extend aufs_open() to re-try branch fs's ->open() with superuser's
- credential
- - no aufs_atomic_open().
- - aufs_create() registers the TID to an internal object. this info
- represents "this process created this file just now."
- - when aufs gets EACCES from branch fs's ->open(), then confirm the
- registered TID and re-try open() with superuser's credential.
-
-Pros and cons for each approach.
-
-A.
- - straightforward but highly depends upon VFS internal.
- - the atomic behavaiour is kept.
- - some of parameters such as nameidata are hard to reproduce for
- branch fs.
- - large overhead.
-B.
- - easy to implement.
- - the atomic behavaiour is lost.
-C.
- - the atomic behavaiour is kept.
- - dirty and tricky.
- - VFS checks whether the file is created correctly after calling
- ->create(), which means this approach doesn't work.
-D.
- - easy to implement.
- - the atomic behavaiour is lost.
- - to open a file with superuser's credential and give it to a user
- process is a bad idea, since the file object keeps the credential
- in it. It may affect LSM or something. This approach doesn't work
- either.
-
-The approach A is ideal, but it hard to implement. So here is a
-variation of A, which is to be implemented.
-
-A-1. Introduce aufs_atomic_open()
- - calls branch fs ->atomic_open() if exists. otherwise calls
- vfs_create() and finish_open().
- - the demerit is that the several checks after branch fs
- ->atomic_open() are lost. in the ordinary case, the checks are
- done by VFS:do_last(), lookup_open() and atomic_open(). some can
- be implemented in aufs, but not all I am afraid.
diff --git a/Documentation/filesystems/aufs/design/03lookup.txt b/Documentation/filesystems/aufs/design/03lookup.txt
deleted file mode 100644
index 3515c9228..000000000
--- a/Documentation/filesystems/aufs/design/03lookup.txt
+++ /dev/null
@@ -1,113 +0,0 @@
-
-# Copyright (C) 2005-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Lookup in a Branch
-----------------------------------------------------------------------
-Since aufs has a character of sub-VFS (see Introduction), it operates
-lookup for branches as VFS does. It may be a heavy work. But almost all
-lookup operation in aufs is the simplest case, ie. lookup only an entry
-directly connected to its parent. Digging down the directory hierarchy
-is unnecessary. VFS has a function lookup_one_len() for that use, and
-aufs calls it.
-
-When a branch is a remote filesystem, aufs basically relies upon its
-->d_revalidate(), also aufs forces the hardest revalidate tests for
-them.
-For d_revalidate, aufs implements three levels of revalidate tests. See
-"Revalidate Dentry and UDBA" in detail.
-
-
-Test Only the Highest One for the Directory Permission (dirperm1 option)
-----------------------------------------------------------------------
-Let's try case study.
-- aufs has two branches, upper readwrite and lower readonly.
- /au = /rw + /ro
-- "dirA" exists under /ro, but /rw. and its mode is 0700.
-- user invoked "chmod a+rx /au/dirA"
-- the internal copy-up is activated and "/rw/dirA" is created and its
- permission bits are set to world readable.
-- then "/au/dirA" becomes world readable?
-
-In this case, /ro/dirA is still 0700 since it exists in readonly branch,
-or it may be a natively readonly filesystem. If aufs respects the lower
-branch, it should not respond readdir request from other users. But user
-allowed it by chmod. Should really aufs rejects showing the entries
-under /ro/dirA?
-
-To be honest, I don't have a good solution for this case. So aufs
-implements 'dirperm1' and 'nodirperm1' mount options, and leave it to
-users.
-When dirperm1 is specified, aufs checks only the highest one for the
-directory permission, and shows the entries. Otherwise, as usual, checks
-every dir existing on all branches and rejects the request.
-
-As a side effect, dirperm1 option improves the performance of aufs
-because the number of permission check is reduced when the number of
-branch is many.
-
-
-Revalidate Dentry and UDBA (User's Direct Branch Access)
-----------------------------------------------------------------------
-Generally VFS helpers re-validate a dentry as a part of lookup.
-0. digging down the directory hierarchy.
-1. lock the parent dir by its i_mutex.
-2. lookup the final (child) entry.
-3. revalidate it.
-4. call the actual operation (create, unlink, etc.)
-5. unlock the parent dir
-
-If the filesystem implements its ->d_revalidate() (step 3), then it is
-called. Actually aufs implements it and checks the dentry on a branch is
-still valid.
-But it is not enough. Because aufs has to release the lock for the
-parent dir on a branch at the end of ->lookup() (step 2) and
-->d_revalidate() (step 3) while the i_mutex of the aufs dir is still
-held by VFS.
-If the file on a branch is changed directly, eg. bypassing aufs, after
-aufs released the lock, then the subsequent operation may cause
-something unpleasant result.
-
-This situation is a result of VFS architecture, ->lookup() and
-->d_revalidate() is separated. But I never say it is wrong. It is a good
-design from VFS's point of view. It is just not suitable for sub-VFS
-character in aufs.
-
-Aufs supports such case by three level of revalidation which is
-selectable by user.
-1. Simple Revalidate
- Addition to the native flow in VFS's, confirm the child-parent
- relationship on the branch just after locking the parent dir on the
- branch in the "actual operation" (step 4). When this validation
- fails, aufs returns EBUSY. ->d_revalidate() (step 3) in aufs still
- checks the validation of the dentry on branches.
-2. Monitor Changes Internally by Inotify/Fsnotify
- Addition to above, in the "actual operation" (step 4) aufs re-lookup
- the dentry on the branch, and returns EBUSY if it finds different
- dentry.
- Additionally, aufs sets the inotify/fsnotify watch for every dir on branches
- during it is in cache. When the event is notified, aufs registers a
- function to kernel 'events' thread by schedule_work(). And the
- function sets some special status to the cached aufs dentry and inode
- private data. If they are not cached, then aufs has nothing to
- do. When the same file is accessed through aufs (step 0-3) later,
- aufs will detect the status and refresh all necessary data.
- In this mode, aufs has to ignore the event which is fired by aufs
- itself.
-3. No Extra Validation
- This is the simplest test and doesn't add any additional revalidation
- test, and skip the revalidation in step 4. It is useful and improves
- aufs performance when system surely hide the aufs branches from user,
- by over-mounting something (or another method).
diff --git a/Documentation/filesystems/aufs/design/04branch.txt b/Documentation/filesystems/aufs/design/04branch.txt
deleted file mode 100644
index 940216e0d..000000000
--- a/Documentation/filesystems/aufs/design/04branch.txt
+++ /dev/null
@@ -1,74 +0,0 @@
-
-# Copyright (C) 2005-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Branch Manipulation
-
-Since aufs supports dynamic branch manipulation, ie. add/remove a branch
-and changing its permission/attribute, there are a lot of works to do.
-
-
-Add a Branch
-----------------------------------------------------------------------
-o Confirm the adding dir exists outside of aufs, including loopback
- mount, and its various attributes.
-o Initialize the xino file and whiteout bases if necessary.
- See struct.txt.
-
-o Check the owner/group/mode of the directory
- When the owner/group/mode of the adding directory differs from the
- existing branch, aufs issues a warning because it may impose a
- security risk.
- For example, when a upper writable branch has a world writable empty
- top directory, a malicious user can create any files on the writable
- branch directly, like copy-up and modify manually. If something like
- /etc/{passwd,shadow} exists on the lower readonly branch but the upper
- writable branch, and the writable branch is world-writable, then a
- malicious guy may create /etc/passwd on the writable branch directly
- and the infected file will be valid in aufs.
- I am afraid it can be a security issue, but aufs can do nothing except
- producing a warning.
-
-
-Delete a Branch
-----------------------------------------------------------------------
-o Confirm the deleting branch is not busy
- To be general, there is one merit to adopt "remount" interface to
- manipulate branches. It is to discard caches. At deleting a branch,
- aufs checks the still cached (and connected) dentries and inodes. If
- there are any, then they are all in-use. An inode without its
- corresponding dentry can be alive alone (for example, inotify/fsnotify case).
-
- For the cached one, aufs checks whether the same named entry exists on
- other branches.
- If the cached one is a directory, because aufs provides a merged view
- to users, as long as one dir is left on any branch aufs can show the
- dir to users. In this case, the branch can be removed from aufs.
- Otherwise aufs rejects deleting the branch.
-
- If any file on the deleting branch is opened by aufs, then aufs
- rejects deleting.
-
-
-Modify the Permission of a Branch
-----------------------------------------------------------------------
-o Re-initialize or remove the xino file and whiteout bases if necessary.
- See struct.txt.
-
-o rw --> ro: Confirm the modifying branch is not busy
- Aufs rejects the request if any of these conditions are true.
- - a file on the branch is mmap-ed.
- - a regular file on the branch is opened for write and there is no
- same named entry on the upper branch.
diff --git a/Documentation/filesystems/aufs/design/05wbr_policy.txt b/Documentation/filesystems/aufs/design/05wbr_policy.txt
deleted file mode 100644
index aeb108734..000000000
--- a/Documentation/filesystems/aufs/design/05wbr_policy.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-
-# Copyright (C) 2005-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Policies to Select One among Multiple Writable Branches
-----------------------------------------------------------------------
-When the number of writable branch is more than one, aufs has to decide
-the target branch for file creation or copy-up. By default, the highest
-writable branch which has the parent (or ancestor) dir of the target
-file is chosen (top-down-parent policy).
-By user's request, aufs implements some other policies to select the
-writable branch, for file creation several policies, round-robin,
-most-free-space, and other policies. For copy-up, top-down-parent,
-bottom-up-parent, bottom-up and others.
-
-As expected, the round-robin policy selects the branch in circular. When
-you have two writable branches and creates 10 new files, 5 files will be
-created for each branch. mkdir(2) systemcall is an exception. When you
-create 10 new directories, all will be created on the same branch.
-And the most-free-space policy selects the one which has most free
-space among the writable branches. The amount of free space will be
-checked by aufs internally, and users can specify its time interval.
-
-The policies for copy-up is more simple,
-top-down-parent is equivalent to the same named on in create policy,
-bottom-up-parent selects the writable branch where the parent dir
-exists and the nearest upper one from the copyup-source,
-bottom-up selects the nearest upper writable branch from the
-copyup-source, regardless the existence of the parent dir.
-
-There are some rules or exceptions to apply these policies.
-- If there is a readonly branch above the policy-selected branch and
- the parent dir is marked as opaque (a variation of whiteout), or the
- target (creating) file is whiteout-ed on the upper readonly branch,
- then the result of the policy is ignored and the target file will be
- created on the nearest upper writable branch than the readonly branch.
-- If there is a writable branch above the policy-selected branch and
- the parent dir is marked as opaque or the target file is whiteouted
- on the branch, then the result of the policy is ignored and the target
- file will be created on the highest one among the upper writable
- branches who has diropq or whiteout. In case of whiteout, aufs removes
- it as usual.
-- link(2) and rename(2) systemcalls are exceptions in every policy.
- They try selecting the branch where the source exists as possible
- since copyup a large file will take long time. If it can't be,
- ie. the branch where the source exists is readonly, then they will
- follow the copyup policy.
-- There is an exception for rename(2) when the target exists.
- If the rename target exists, aufs compares the index of the branches
- where the source and the target exists and selects the higher
- one. If the selected branch is readonly, then aufs follows the
- copyup policy.
diff --git a/Documentation/filesystems/aufs/design/06fhsm.txt b/Documentation/filesystems/aufs/design/06fhsm.txt
deleted file mode 100644
index 5928ed219..000000000
--- a/Documentation/filesystems/aufs/design/06fhsm.txt
+++ /dev/null
@@ -1,120 +0,0 @@
-
-# Copyright (C) 2011-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-
-File-based Hierarchical Storage Management (FHSM)
-----------------------------------------------------------------------
-Hierarchical Storage Management (or HSM) is a well-known feature in the
-storage world. Aufs provides this feature as file-based with multiple
-writable branches, based upon the principle of "Colder, the Lower".
-Here the word "colder" means that the less used files, and "lower" means
-that the position in the order of the stacked branches vertically.
-These multiple writable branches are prioritized, ie. the topmost one
-should be the fastest drive and be used heavily.
-
-o Characters in aufs FHSM story
-- aufs itself and a new branch attribute.
-- a new ioctl interface to move-down and to establish a connection with
- the daemon ("move-down" is a converse of "copy-up").
-- userspace tool and daemon.
-
-The userspace daemon establishes a connection with aufs and waits for
-the notification. The notified information is very similar to struct
-statfs containing the number of consumed blocks and inodes.
-When the consumed blocks/inodes of a branch exceeds the user-specified
-upper watermark, the daemon activates its move-down process until the
-consumed blocks/inodes reaches the user-specified lower watermark.
-
-The actual move-down is done by aufs based upon the request from
-user-space since we need to maintain the inode number and the internal
-pointer arrays in aufs.
-
-Currently aufs FHSM handles the regular files only. Additionally they
-must not be hard-linked nor pseudo-linked.
-
-
-o Cowork of aufs and the user-space daemon
- During the userspace daemon established the connection, aufs sends a
- small notification to it whenever aufs writes something into the
- writable branch. But it may cost high since aufs issues statfs(2)
- internally. So user can specify a new option to cache the
- info. Actually the notification is controlled by these factors.
- + the specified cache time.
- + classified as "force" by aufs internally.
- Until the specified time expires, aufs doesn't send the info
- except the forced cases. When aufs decide forcing, the info is always
- notified to userspace.
- For example, the number of free inodes is generally large enough and
- the shortage of it happens rarely. So aufs doesn't force the
- notification when creating a new file, directory and others. This is
- the typical case which aufs doesn't force.
- When aufs writes the actual filedata and the files consumes any of new
- blocks, the aufs forces notifying.
-
-
-o Interfaces in aufs
-- New branch attribute.
- + fhsm
- Specifies that the branch is managed by FHSM feature. In other word,
- participant in the FHSM.
- When nofhsm is set to the branch, it will not be the source/target
- branch of the move-down operation. This attribute is set
- independently from coo and moo attributes, and if you want full
- FHSM, you should specify them as well.
-- New mount option.
- + fhsm_sec
- Specifies a second to suppress many less important info to be
- notified.
-- New ioctl.
- + AUFS_CTL_FHSM_FD
- create a new file descriptor which userspace can read the notification
- (a subset of struct statfs) from aufs.
-- Module parameter 'brs'
- It has to be set to 1. Otherwise the new mount option 'fhsm' will not
- be set.
-- mount helpers /sbin/mount.aufs and /sbin/umount.aufs
- When there are two or more branches with fhsm attributes,
- /sbin/mount.aufs invokes the user-space daemon and /sbin/umount.aufs
- terminates it. As a result of remounting and branch-manipulation, the
- number of branches with fhsm attribute can be one. In this case,
- /sbin/mount.aufs will terminate the user-space daemon.
-
-
-Finally the operation is done as these steps in kernel-space.
-- make sure that,
- + no one else is using the file.
- + the file is not hard-linked.
- + the file is not pseudo-linked.
- + the file is a regular file.
- + the parent dir is not opaqued.
-- find the target writable branch.
-- make sure the file is not whiteout-ed by the upper (than the target)
- branch.
-- make the parent dir on the target branch.
-- mutex lock the inode on the branch.
-- unlink the whiteout on the target branch (if exists).
-- lookup and create the whiteout-ed temporary name on the target branch.
-- copy the file as the whiteout-ed temporary name on the target branch.
-- rename the whiteout-ed temporary name to the original name.
-- unlink the file on the source branch.
-- maintain the internal pointer array and the external inode number
- table (XINO).
-- maintain the timestamps and other attributes of the parent dir and the
- file.
-
-And of course, in every step, an error may happen. So the operation
-should restore the original file state after an error happens.
diff --git a/Documentation/filesystems/aufs/design/06mmap.txt b/Documentation/filesystems/aufs/design/06mmap.txt
deleted file mode 100644
index a42364eee..000000000
--- a/Documentation/filesystems/aufs/design/06mmap.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-
-# Copyright (C) 2005-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-mmap(2) -- File Memory Mapping
-----------------------------------------------------------------------
-In aufs, the file-mapped pages are handled by a branch fs directly, no
-interaction with aufs. It means aufs_mmap() calls the branch fs's
-->mmap().
-This approach is simple and good, but there is one problem.
-Under /proc, several entries show the mmapped files by its path (with
-device and inode number), and the printed path will be the path on the
-branch fs's instead of virtual aufs's.
-This is not a problem in most cases, but some utilities lsof(1) (and its
-user) may expect the path on aufs.
-
-To address this issue, aufs adds a new member called vm_prfile in struct
-vm_area_struct (and struct vm_region). The original vm_file points to
-the file on the branch fs in order to handle everything correctly as
-usual. The new vm_prfile points to a virtual file in aufs, and the
-show-functions in procfs refers to vm_prfile if it is set.
-Also we need to maintain several other places where touching vm_file
-such like
-- fork()/clone() copies vma and the reference count of vm_file is
- incremented.
-- merging vma maintains the ref count too.
-
-This is not a good approach. It just fakes the printed path. But it
-leaves all behaviour around f_mapping unchanged. This is surely an
-advantage.
-Actually aufs had adopted another complicated approach which calls
-generic_file_mmap() and handles struct vm_operations_struct. In this
-approach, aufs met a hard problem and I could not solve it without
-switching the approach.
-
-There may be one more another approach which is
-- bind-mount the branch-root onto the aufs-root internally
-- grab the new vfsmount (ie. struct mount)
-- lazy-umount the branch-root internally
-- in open(2) the aufs-file, open the branch-file with the hidden
- vfsmount (instead of the original branch's vfsmount)
-- ideally this "bind-mount and lazy-umount" should be done atomically,
- but it may be possible from userspace by the mount helper.
-
-Adding the internal hidden vfsmount and using it in opening a file, the
-file path under /proc will be printed correctly. This approach looks
-smarter, but is not possible I am afraid.
-- aufs-root may be bind-mount later. when it happens, another hidden
- vfsmount will be required.
-- it is hard to get the chance to bind-mount and lazy-umount
- + in kernel-space, FS can have vfsmount in open(2) via
- file->f_path, and aufs can know its vfsmount. But several locks are
- already acquired, and if aufs tries to bind-mount and lazy-umount
- here, then it may cause a deadlock.
- + in user-space, bind-mount doesn't invoke the mount helper.
-- since /proc shows dev and ino, aufs has to give vma these info. it
- means a new member vm_prinode will be necessary. this is essentially
- equivalent to vm_prfile described above.
-
-I have to give up this "looks-smater" approach.
diff --git a/Documentation/filesystems/aufs/design/06xattr.txt b/Documentation/filesystems/aufs/design/06xattr.txt
deleted file mode 100644
index 8aad929b8..000000000
--- a/Documentation/filesystems/aufs/design/06xattr.txt
+++ /dev/null
@@ -1,96 +0,0 @@
-
-# Copyright (C) 2014-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
-
-
-Listing XATTR/EA and getting the value
-----------------------------------------------------------------------
-For the inode standard attributes (owner, group, timestamps, etc.), aufs
-shows the values from the topmost existing file. This behaviour is good
-for the non-dir entries since the bahaviour exactly matches the shown
-information. But for the directories, aufs considers all the same named
-entries on the lower branches. Which means, if one of the lower entry
-rejects readdir call, then aufs returns an error even if the topmost
-entry allows it. This behaviour is necessary to respect the branch fs's
-security, but can make users confused since the user-visible standard
-attributes don't match the behaviour.
-To address this issue, aufs has a mount option called dirperm1 which
-checks the permission for the topmost entry only, and ignores the lower
-entry's permission.
-
-A similar issue can happen around XATTR.
-getxattr(2) and listxattr(2) families behave as if dirperm1 option is
-always set. Otherwise these very unpleasant situation would happen.
-- listxattr(2) may return the duplicated entries.
-- users may not be able to remove or reset the XATTR forever,
-
-
-XATTR/EA support in the internal (copy,move)-(up,down)
-----------------------------------------------------------------------
-Generally the extended attributes of inode are categorized as these.
-- "security" for LSM and capability.
-- "system" for posix ACL, 'acl' mount option is required for the branch
- fs generally.
-- "trusted" for userspace, CAP_SYS_ADMIN is required.
-- "user" for userspace, 'user_xattr' mount option is required for the
- branch fs generally.
-
-Moreover there are some other categories. Aufs handles these rather
-unpopular categories as the ordinary ones, ie. there is no special
-condition nor exception.
-
-In copy-up, the support for XATTR on the dst branch may differ from the
-src branch. In this case, the copy-up operation will get an error and
-the original user operation which triggered the copy-up will fail. It
-can happen that even all copy-up will fail.
-When both of src and dst branches support XATTR and if an error occurs
-during copying XATTR, then the copy-up should fail obviously. That is a
-good reason and aufs should return an error to userspace. But when only
-the src branch support that XATTR, aufs should not return an error.
-For example, the src branch supports ACL but the dst branch doesn't
-because the dst branch may natively un-support it or temporary
-un-support it due to "noacl" mount option. Of course, the dst branch fs
-may NOT return an error even if the XATTR is not supported. It is
-totally up to the branch fs.
-
-Anyway when the aufs internal copy-up gets an error from the dst branch
-fs, then aufs tries removing the just copied entry and returns the error
-to the userspace. The worst case of this situation will be all copy-up
-will fail.
-
-For the copy-up operation, there two basic approaches.
-- copy the specified XATTR only (by category above), and return the
- error unconditionally if it happens.
-- copy all XATTR, and ignore the error on the specified category only.
-
-In order to support XATTR and to implement the correct behaviour, aufs
-chooses the latter approach and introduces some new branch attributes,
-"icexsec", "icexsys", "icextr", "icexusr", and "icexoth".
-They correspond to the XATTR namespaces (see above). Additionally, to be
-convenient, "icex" is also provided which means all "icex*" attributes
-are set (here the word "icex" stands for "ignore copy-error on XATTR").
-
-The meaning of these attributes is to ignore the error from setting
-XATTR on that branch.
-Note that aufs tries copying all XATTR unconditionally, and ignores the
-error from the dst branch according to the specified attributes.
-
-Some XATTR may have its default value. The default value may come from
-the parent dir or the environment. If the default value is set at the
-file creating-time, it will be overwritten by copy-up.
-Some contradiction may happen I am afraid.
-Do we need another attribute to stop copying XATTR? I am unsure. For
-now, aufs implements the branch attributes to ignore the error.
diff --git a/Documentation/filesystems/aufs/design/07export.txt b/Documentation/filesystems/aufs/design/07export.txt
deleted file mode 100644
index b25dd950c..000000000
--- a/Documentation/filesystems/aufs/design/07export.txt
+++ /dev/null
@@ -1,58 +0,0 @@
-
-# Copyright (C) 2005-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Export Aufs via NFS
-----------------------------------------------------------------------
-Here is an approach.
-- like xino/xib, add a new file 'xigen' which stores aufs inode
- generation.
-- iget_locked(): initialize aufs inode generation for a new inode, and
- store it in xigen file.
-- destroy_inode(): increment aufs inode generation and store it in xigen
- file. it is necessary even if it is not unlinked, because any data of
- inode may be changed by UDBA.
-- encode_fh(): for a root dir, simply return FILEID_ROOT. otherwise
- build file handle by
- + branch id (4 bytes)
- + superblock generation (4 bytes)
- + inode number (4 or 8 bytes)
- + parent dir inode number (4 or 8 bytes)
- + inode generation (4 bytes))
- + return value of exportfs_encode_fh() for the parent on a branch (4
- bytes)
- + file handle for a branch (by exportfs_encode_fh())
-- fh_to_dentry():
- + find the index of a branch from its id in handle, and check it is
- still exist in aufs.
- + 1st level: get the inode number from handle and search it in cache.
- + 2nd level: if not found in cache, get the parent inode number from
- the handle and search it in cache. and then open the found parent
- dir, find the matching inode number by vfs_readdir() and get its
- name, and call lookup_one_len() for the target dentry.
- + 3rd level: if the parent dir is not cached, call
- exportfs_decode_fh() for a branch and get the parent on a branch,
- build a pathname of it, convert it a pathname in aufs, call
- path_lookup(). now aufs gets a parent dir dentry, then handle it as
- the 2nd level.
- + to open the dir, aufs needs struct vfsmount. aufs keeps vfsmount
- for every branch, but not itself. to get this, (currently) aufs
- searches in current->nsproxy->mnt_ns list. it may not be a good
- idea, but I didn't get other approach.
- + test the generation of the gotten inode.
-- every inode operation: they may get EBUSY due to UDBA. in this case,
- convert it into ESTALE for NFSD.
-- readdir(): call lockdep_on/off() because filldir in NFSD calls
- lookup_one_len(), vfs_getattr(), encode_fh() and others.
diff --git a/Documentation/filesystems/aufs/design/08shwh.txt b/Documentation/filesystems/aufs/design/08shwh.txt
deleted file mode 100644
index a97a7987b..000000000
--- a/Documentation/filesystems/aufs/design/08shwh.txt
+++ /dev/null
@@ -1,52 +0,0 @@
-
-# Copyright (C) 2005-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Show Whiteout Mode (shwh)
-----------------------------------------------------------------------
-Generally aufs hides the name of whiteouts. But in some cases, to show
-them is very useful for users. For instance, creating a new middle layer
-(branch) by merging existing layers.
-
-(borrowing aufs1 HOW-TO from a user, Michael Towers)
-When you have three branches,
-- Bottom: 'system', squashfs (underlying base system), read-only
-- Middle: 'mods', squashfs, read-only
-- Top: 'overlay', ram (tmpfs), read-write
-
-The top layer is loaded at boot time and saved at shutdown, to preserve
-the changes made to the system during the session.
-When larger changes have been made, or smaller changes have accumulated,
-the size of the saved top layer data grows. At this point, it would be
-nice to be able to merge the two overlay branches ('mods' and 'overlay')
-and rewrite the 'mods' squashfs, clearing the top layer and thus
-restoring save and load speed.
-
-This merging is simplified by the use of another aufs mount, of just the
-two overlay branches using the 'shwh' option.
-# mount -t aufs -o ro,shwh,br:/livesys/overlay=ro+wh:/livesys/mods=rr+wh \
- aufs /livesys/merge_union
-
-A merged view of these two branches is then available at
-/livesys/merge_union, and the new feature is that the whiteouts are
-visible!
-Note that in 'shwh' mode the aufs mount must be 'ro', which will disable
-writing to all branches. Also the default mode for all branches is 'ro'.
-It is now possible to save the combined contents of the two overlay
-branches to a new squashfs, e.g.:
-# mksquashfs /livesys/merge_union /path/to/newmods.squash
-
-This new squashfs archive can be stored on the boot device and the
-initramfs will use it to replace the old one at the next boot.
diff --git a/Documentation/filesystems/aufs/design/10dynop.txt b/Documentation/filesystems/aufs/design/10dynop.txt
deleted file mode 100644
index 04c35f5ac..000000000
--- a/Documentation/filesystems/aufs/design/10dynop.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-
-# Copyright (C) 2010-2015 Junjiro R. Okajima
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program. If not, see <http://www.gnu.org/licenses/>.
-
-Dynamically customizable FS operations
-----------------------------------------------------------------------
-Generally FS operations (struct inode_operations, struct
-address_space_operations, struct file_operations, etc.) are defined as
-"static const", but it never means that FS have only one set of
-operation. Some FS have multiple sets of them. For instance, ext2 has
-three sets, one for XIP, for NOBH, and for normal.
-Since aufs overrides and redirects these operations, sometimes aufs has
-to change its behaviour according to the branch FS type. More importantly
-VFS acts differently if a function (member in the struct) is set or
-not. It means aufs should have several sets of operations and select one
-among them according to the branch FS definition.
-
-In order to solve this problem and not to affect the behaviour of VFS,
-aufs defines these operations dynamically. For instance, aufs defines
-dummy direct_IO function for struct address_space_operations, but it may
-not be set to the address_space_operations actually. When the branch FS
-doesn't have it, aufs doesn't set it to its address_space_operations
-while the function definition itself is still alive. So the behaviour
-itself will not change, and it will return an error when direct_IO is
-not set.
-
-The lifetime of these dynamically generated operation object is
-maintained by aufs branch object. When the branch is removed from aufs,
-the reference counter of the object is decremented. When it reaches
-zero, the dynamically generated operation object will be freed.
-
-This approach is designed to support AIO (io_submit), Direct I/O and
-XIP (DAX) mainly.
-Currently this approach is applied to address_space_operations for
-regular files only.
diff --git a/Documentation/filesystems/automount-support.txt b/Documentation/filesystems/automount-support.txt
index 7cac200e2..7eb762eb3 100644
--- a/Documentation/filesystems/automount-support.txt
+++ b/Documentation/filesystems/automount-support.txt
@@ -1,41 +1,15 @@
-Support is available for filesystems that wish to do automounting support (such
-as kAFS which can be found in fs/afs/). This facility includes allowing
-in-kernel mounts to be performed and mountpoint degradation to be
-requested. The latter can also be requested by userspace.
+Support is available for filesystems that wish to do automounting
+support (such as kAFS which can be found in fs/afs/ and NFS in
+fs/nfs/). This facility includes allowing in-kernel mounts to be
+performed and mountpoint degradation to be requested. The latter can
+also be requested by userspace.
======================
IN-KERNEL AUTOMOUNTING
======================
-A filesystem can now mount another filesystem on one of its directories by the
-following procedure:
-
- (1) Give the directory a follow_link() operation.
-
- When the directory is accessed, the follow_link op will be called, and
- it will be provided with the location of the mountpoint in the nameidata
- structure (vfsmount and dentry).
-
- (2) Have the follow_link() op do the following steps:
-
- (a) Call vfs_kern_mount() to call the appropriate filesystem to set up a
- superblock and gain a vfsmount structure representing it.
-
- (b) Copy the nameidata provided as an argument and substitute the dentry
- argument into it the copy.
-
- (c) Call do_add_mount() to install the new vfsmount into the namespace's
- mountpoint tree, thus making it accessible to userspace. Use the
- nameidata set up in (b) as the destination.
-
- If the mountpoint will be automatically expired, then do_add_mount()
- should also be given the location of an expiration list (see further
- down).
-
- (d) Release the path in the nameidata argument and substitute in the new
- vfsmount and its root dentry. The ref counts on these will need
- incrementing.
+See section "Mount Traps" of Documentation/filesystems/autofs4.txt
Then from userspace, you can just do something like:
@@ -61,17 +35,18 @@ AUTOMATIC MOUNTPOINT EXPIRY
===========================
Automatic expiration of mountpoints is easy, provided you've mounted the
-mountpoint to be expired in the automounting procedure outlined above.
+mountpoint to be expired in the automounting procedure outlined separately.
To do expiration, you need to follow these steps:
- (3) Create at least one list off which the vfsmounts to be expired can be
- hung. Access to this list will be governed by the vfsmount_lock.
+ (1) Create at least one list off which the vfsmounts to be expired can be
+ hung.
- (4) In step (2c) above, the call to do_add_mount() should be provided with a
- pointer to this list. It will hang the vfsmount off of it if it succeeds.
+ (2) When a new mountpoint is created in the ->d_automount method, add
+ the mnt to the list using mnt_set_expiry()
+ mnt_set_expiry(newmnt, &afs_vfsmounts);
- (5) When you want mountpoints to be expired, call mark_mounts_for_expiry()
+ (3) When you want mountpoints to be expired, call mark_mounts_for_expiry()
with a pointer to this list. This will process the list, marking every
vfsmount thereon for potential expiry on the next call.
diff --git a/Documentation/filesystems/caching/backend-api.txt b/Documentation/filesystems/caching/backend-api.txt
index 277d1e810..c0bd56772 100644
--- a/Documentation/filesystems/caching/backend-api.txt
+++ b/Documentation/filesystems/caching/backend-api.txt
@@ -676,6 +676,29 @@ FS-Cache provides some utilities that a cache backend may make use of:
as possible.
+ (*) Indicate that a stale object was found and discarded:
+
+ void fscache_object_retrying_stale(struct fscache_object *object);
+
+ This is called to indicate that the lookup procedure found an object in
+ the cache that the netfs decided was stale. The object has been
+ discarded from the cache and the lookup will be performed again.
+
+
+ (*) Indicate that the caching backend killed an object:
+
+ void fscache_object_mark_killed(struct fscache_object *object,
+ enum fscache_why_object_killed why);
+
+ This is called to indicate that the cache backend preemptively killed an
+ object. The why parameter should be set to indicate the reason:
+
+ FSCACHE_OBJECT_IS_STALE - the object was stale and needs discarding.
+ FSCACHE_OBJECT_NO_SPACE - there was insufficient cache space
+ FSCACHE_OBJECT_WAS_RETIRED - the object was retired when relinquished.
+ FSCACHE_OBJECT_WAS_CULLED - the object was culled to make space.
+
+
(*) Get and release references on a retrieval record:
void fscache_get_retrieval(struct fscache_retrieval *op);
diff --git a/Documentation/filesystems/caching/fscache.txt b/Documentation/filesystems/caching/fscache.txt
index 770267af5..50f0a5757 100644
--- a/Documentation/filesystems/caching/fscache.txt
+++ b/Documentation/filesystems/caching/fscache.txt
@@ -284,8 +284,9 @@ proc files.
enq=N Number of times async ops queued for processing
can=N Number of async ops cancelled
rej=N Number of async ops rejected due to object lookup/create failure
+ ini=N Number of async ops initialised
dfr=N Number of async ops queued for deferred release
- rel=N Number of async ops released
+ rel=N Number of async ops released (should equal ini=N when idle)
gc=N Number of deferred-release async ops garbage collected
CacheOp alo=N Number of in-progress alloc_object() cache ops
luo=N Number of in-progress lookup_object() cache ops
@@ -303,6 +304,10 @@ proc files.
wrp=N Number of in-progress write_page() cache ops
ucp=N Number of in-progress uncache_page() cache ops
dsp=N Number of in-progress dissociate_pages() cache ops
+ CacheEv nsp=N Number of object lookups/creations rejected due to lack of space
+ stl=N Number of stale objects deleted
+ rtr=N Number of objects retired when relinquished
+ cul=N Number of objects culled
(*) /proc/fs/fscache/histogram
diff --git a/Documentation/filesystems/dax.txt b/Documentation/filesystems/dax.txt
index baf411186..7af2851d6 100644
--- a/Documentation/filesystems/dax.txt
+++ b/Documentation/filesystems/dax.txt
@@ -18,8 +18,10 @@ Usage
-----
If you have a block device which supports DAX, you can make a filesystem
-on it as usual. When mounting it, use the -o dax option manually
-or add 'dax' to the options in /etc/fstab.
+on it as usual. The DAX code currently only supports files with a block
+size equal to your kernel's PAGE_SIZE, so you may need to specify a block
+size when creating the filesystem. When mounting it, use the "-o dax"
+option on the command line or add 'dax' to the options in /etc/fstab.
Implementation Tips for Block Driver Writers
diff --git a/Documentation/filesystems/nfs/knfsd-stats.txt b/Documentation/filesystems/nfs/knfsd-stats.txt
index 64ced5149..1a5d82180 100644
--- a/Documentation/filesystems/nfs/knfsd-stats.txt
+++ b/Documentation/filesystems/nfs/knfsd-stats.txt
@@ -68,16 +68,10 @@ sockets-enqueued
rate of change for this counter is zero; significantly non-zero
values may indicate a performance limitation.
- This can happen either because there are too few nfsd threads in the
- thread pool for the NFS workload (the workload is thread-limited),
- or because the NFS workload needs more CPU time than is available in
- the thread pool (the workload is CPU-limited). In the former case,
- configuring more nfsd threads will probably improve the performance
- of the NFS workload. In the latter case, the sunrpc server layer is
- already choosing not to wake idle nfsd threads because there are too
- many nfsd threads which want to run but cannot, so configuring more
- nfsd threads will make no difference whatsoever. The overloads-avoided
- statistic (see below) can be used to distinguish these cases.
+ This can happen because there are too few nfsd threads in the thread
+ pool for the NFS workload (the workload is thread-limited), in which
+ case configuring more nfsd threads will probably improve the
+ performance of the NFS workload.
threads-woken
Counts how many times an idle nfsd thread is woken to try to
@@ -88,36 +82,6 @@ threads-woken
thing. The ideal rate of change for this counter will be close
to but less than the rate of change of the packets-arrived counter.
-overloads-avoided
- Counts how many times the sunrpc server layer chose not to wake an
- nfsd thread, despite the presence of idle nfsd threads, because
- too many nfsd threads had been recently woken but could not get
- enough CPU time to actually run.
-
- This statistic counts a circumstance where the sunrpc layer
- heuristically avoids overloading the CPU scheduler with too many
- runnable nfsd threads. The ideal rate of change for this counter
- is zero. Significant non-zero values indicate that the workload
- is CPU limited. Usually this is associated with heavy CPU usage
- on all the CPUs in the nfsd thread pool.
-
- If a sustained large overloads-avoided rate is detected on a pool,
- the top(1) utility should be used to check for the following
- pattern of CPU usage on all the CPUs associated with the given
- nfsd thread pool.
-
- - %us ~= 0 (as you're *NOT* running applications on your NFS server)
-
- - %wa ~= 0
-
- - %id ~= 0
-
- - %sy + %hi + %si ~= 100
-
- If this pattern is seen, configuring more nfsd threads will *not*
- improve the performance of the workload. If this patten is not
- seen, then something more subtle is wrong.
-
threads-timedout
Counts how many times an nfsd thread triggered an idle timeout,
i.e. was not woken to handle any incoming network packets for
diff --git a/Documentation/filesystems/porting b/Documentation/filesystems/porting
index e69274de8..f24d1b833 100644
--- a/Documentation/filesystems/porting
+++ b/Documentation/filesystems/porting
@@ -379,10 +379,10 @@ may now be called in rcu-walk mode (nd->flags & LOOKUP_RCU). -ECHILD should be
returned if the filesystem cannot handle rcu-walk. See
Documentation/filesystems/vfs.txt for more details.
- permission and check_acl are inode permission checks that are called
-on many or all directory inodes on the way down a path walk (to check for
-exec permission). These must now be rcu-walk aware (flags & IPERM_FLAG_RCU).
-See Documentation/filesystems/vfs.txt for more details.
+ permission is an inode permission check that is called on many or all
+directory inodes on the way down a path walk (to check for exec permission). It
+must now be rcu-walk aware (mask & MAY_NOT_BLOCK). See
+Documentation/filesystems/vfs.txt for more details.
--
[mandatory]
@@ -483,3 +483,24 @@ in your dentry operations instead.
--
[mandatory]
->aio_read/->aio_write are gone. Use ->read_iter/->write_iter.
+---
+[recommended]
+ for embedded ("fast") symlinks just set inode->i_link to wherever the
+ symlink body is and use simple_follow_link() as ->follow_link().
+--
+[mandatory]
+ calling conventions for ->follow_link() have changed. Instead of returning
+ cookie and using nd_set_link() to store the body to traverse, we return
+ the body to traverse and store the cookie using explicit void ** argument.
+ nameidata isn't passed at all - nd_jump_link() doesn't need it and
+ nd_[gs]et_link() is gone.
+--
+[mandatory]
+ calling conventions for ->put_link() have changed. It gets inode instead of
+ dentry, it does not get nameidata at all and it gets called only when cookie
+ is non-NULL. Note that link body isn't available anymore, so if you need it,
+ store it as cookie.
+--
+[mandatory]
+ __fd_install() & fd_install() can now sleep. Callers should not
+ hold a spinlock or other resources that do not allow a schedule.
diff --git a/Documentation/filesystems/proc.txt b/Documentation/filesystems/proc.txt
index c3b6b301d..6f7fafde0 100644
--- a/Documentation/filesystems/proc.txt
+++ b/Documentation/filesystems/proc.txt
@@ -205,7 +205,7 @@ asynchronous manner and the value may not be very precise. To see a precise
snapshot of a moment, you can see /proc/<pid>/smaps file and scan page table.
It's slow but very precise.
-Table 1-2: Contents of the status files (as of 3.20.0)
+Table 1-2: Contents of the status files (as of 4.1)
..............................................................................
Field Content
Name filename of the executable
@@ -235,6 +235,7 @@ Table 1-2: Contents of the status files (as of 3.20.0)
VmExe size of text segment
VmLib size of shared library code
VmPTE size of page table entries
+ VmPMD size of second level page tables
VmSwap size of swap usage (the number of referred swapents)
Threads number of threads
SigQ number of signals queued/max. number for queue
diff --git a/Documentation/filesystems/quota.txt b/Documentation/filesystems/quota.txt
index 5e8de25bf..29fc01552 100644
--- a/Documentation/filesystems/quota.txt
+++ b/Documentation/filesystems/quota.txt
@@ -32,7 +32,10 @@ The interface uses generic netlink framework (see
http://lwn.net/Articles/208755/ and http://people.suug.ch/~tgr/libnl/ for more
details about this layer). The name of the quota generic netlink interface
is "VFS_DQUOT". Definitions of constants below are in <linux/quota.h>.
- Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
+Since the quota netlink protocol is not namespace aware, quota netlink messages
+are sent only in initial network namespace.
+
+Currently, the interface supports only one message type QUOTA_NL_C_WARNING.
This command is used to send a notification about any of the above mentioned
events. Each message has six attributes. These are (type of the argument is
in parentheses):
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index 5d833b32b..5eb8456fc 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -350,8 +350,8 @@ struct inode_operations {
int (*rename2) (struct inode *, struct dentry *,
struct inode *, struct dentry *, unsigned int);
int (*readlink) (struct dentry *, char __user *,int);
- void * (*follow_link) (struct dentry *, struct nameidata *);
- void (*put_link) (struct dentry *, struct nameidata *, void *);
+ const char *(*follow_link) (struct dentry *, void **);
+ void (*put_link) (struct inode *, void *);
int (*permission) (struct inode *, int);
int (*get_acl)(struct inode *, int);
int (*setattr) (struct dentry *, struct iattr *);
@@ -436,16 +436,18 @@ otherwise noted.
follow_link: called by the VFS to follow a symbolic link to the
inode it points to. Only required if you want to support
- symbolic links. This method returns a void pointer cookie
- that is passed to put_link().
+ symbolic links. This method returns the symlink body
+ to traverse (and possibly resets the current position with
+ nd_jump_link()). If the body won't go away until the inode
+ is gone, nothing else is needed; if it needs to be otherwise
+ pinned, the data needed to release whatever we'd grabbed
+ is to be stored in void * variable passed by address to
+ follow_link() instance.
put_link: called by the VFS to release resources allocated by
- follow_link(). The cookie returned by follow_link() is passed
- to this method as the last parameter. It is used by
- filesystems such as NFS where page cache is not stable
- (i.e. page that was installed when the symbolic link walk
- started might not be in the page cache at the end of the
- walk).
+ follow_link(). The cookie stored by follow_link() is passed
+ to this method as the last parameter; only called when
+ cookie isn't NULL.
permission: called by the VFS to check for access rights on a POSIX-like
filesystem.
@@ -797,7 +799,7 @@ struct file_operations
----------------------
This describes how the VFS can manipulate an open file. As of kernel
-3.12, the following members are defined:
+4.1, the following members are defined:
struct file_operations {
struct module *owner;
@@ -811,8 +813,9 @@ struct file_operations {
long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long);
long (*compat_ioctl) (struct file *, unsigned int, unsigned long);
int (*mmap) (struct file *, struct vm_area_struct *);
+ int (*mremap)(struct file *, struct vm_area_struct *);
int (*open) (struct inode *, struct file *);
- int (*flush) (struct file *);
+ int (*flush) (struct file *, fl_owner_t id);
int (*release) (struct inode *, struct file *);
int (*fsync) (struct file *, loff_t, loff_t, int datasync);
int (*aio_fsync) (struct kiocb *, int datasync);
@@ -822,11 +825,15 @@ struct file_operations {
unsigned long (*get_unmapped_area)(struct file *, unsigned long, unsigned long, unsigned long, unsigned long);
int (*check_flags)(int);
int (*flock) (struct file *, int, struct file_lock *);
- ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, size_t, unsigned int);
- ssize_t (*splice_read)(struct file *, struct pipe_inode_info *, size_t, unsigned int);
- int (*setlease)(struct file *, long arg, struct file_lock **, void **);
- long (*fallocate)(struct file *, int mode, loff_t offset, loff_t len);
+ ssize_t (*splice_write)(struct pipe_inode_info *, struct file *, loff_t *, size_t, unsigned int);
+ ssize_t (*splice_read)(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
+ int (*setlease)(struct file *, long, struct file_lock **, void **);
+ long (*fallocate)(struct file *file, int mode, loff_t offset,
+ loff_t len);
void (*show_fdinfo)(struct seq_file *m, struct file *f);
+#ifndef CONFIG_MMU
+ unsigned (*mmap_capabilities)(struct file *);
+#endif
};
Again, all methods are called without any locks being held, unless
diff --git a/Documentation/filesystems/xfs.txt b/Documentation/filesystems/xfs.txt
index 5a5a05582..8146e9fd5 100644
--- a/Documentation/filesystems/xfs.txt
+++ b/Documentation/filesystems/xfs.txt
@@ -236,10 +236,10 @@ Removed Mount Options
Name Removed
---- -------
- delaylog/nodelaylog v3.20
- ihashsize v3.20
- irixsgid v3.20
- osyncisdsync/osyncisosync v3.20
+ delaylog/nodelaylog v4.0
+ ihashsize v4.0
+ irixsgid v4.0
+ osyncisdsync/osyncisosync v4.0
sysctls
@@ -346,5 +346,5 @@ Removed Sysctls
Name Removed
---- -------
- fs.xfs.xfsbufd_centisec v3.20
- fs.xfs.age_buffer_centisecs v3.20
+ fs.xfs.xfsbufd_centisec v4.0
+ fs.xfs.age_buffer_centisecs v4.0