From: Jan Blunck <jblunck@xxxxxxx> Add support for whiteout dentries to tmpfs. This includes adding support for whiteouts to d_genocide(), which is called to tear down pinned tmpfs dentries. Whiteouts have to be persistent, so they have a pinning extra ref count that needs to be dropped by d_genocide(). Signed-off-by: Jan Blunck <jblunck@xxxxxxx> Signed-off-by: David Woodhouse <dwmw2@xxxxxxxxxxxxx> Signed-off-by: Valerie Aurora <vaurora@xxxxxxxxxx> Cc: Hugh Dickins <hugh.dickins@xxxxxxxxxxxxx> Cc: linux-mm@xxxxxxxxx --- fs/dcache.c | 12 +++++ mm/shmem.c | 144 +++++++++++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 141 insertions(+), 15 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 06ba068..1974031 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2915,7 +2915,17 @@ resume: next = tmp->next; spin_lock_nested(&dentry->d_lock, DENTRY_D_LOCK_NESTED); - if (d_unhashed(dentry) || !dentry->d_inode) { + + /* Skip unhashed and negative dentries, but process positive + * dentries and whiteouts. A whiteout looks kind of like a + * negative dentry for purposes of lookup, but it has an extra + * pinning ref count because it can't be evicted like a + * negative dentry can. What we care about here is ref counts + * - and we need to drop the ref count on a whiteout before we + * can evict it. + */ + if (d_unhashed(dentry) || + (!dentry->d_inode && !d_is_whiteout(dentry))) { spin_unlock(&dentry->d_lock); continue; } diff --git a/mm/shmem.c b/mm/shmem.c index d221a1c..7e795e3 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -1865,6 +1865,76 @@ static int shmem_statfs(struct dentry *dentry, struct kstatfs *buf) return 0; } +static int shmem_rmdir(struct inode *dir, struct dentry *dentry); +static int shmem_unlink(struct inode *dir, struct dentry *dentry); + +/* + * This is the whiteout support for tmpfs. It uses one singleton whiteout + * inode per superblock thus it is very similar to shmem_link(). + */ +static int shmem_whiteout(struct inode *dir, struct dentry *old_dentry, + struct dentry *new_dentry) +{ + struct shmem_sb_info *sbinfo = SHMEM_SB(dir->i_sb); + struct dentry *dentry; + + if (!(dir->i_sb->s_flags & MS_WHITEOUT)) + return -EPERM; + + /* This gives us a proper initialized negative dentry */ + dentry = simple_lookup(dir, new_dentry, NULL); + if (dentry && IS_ERR(dentry)) + return PTR_ERR(dentry); + + /* + * No ordinary (disk based) filesystem counts whiteouts as inodes; + * but each new link needs a new dentry, pinning lowmem, and + * tmpfs dentries cannot be pruned until they are unlinked. + */ + if (sbinfo->max_inodes) { + spin_lock(&sbinfo->stat_lock); + if (!sbinfo->free_inodes) { + spin_unlock(&sbinfo->stat_lock); + return -ENOSPC; + } + sbinfo->free_inodes--; + spin_unlock(&sbinfo->stat_lock); + } + + if (old_dentry->d_inode) { + if (S_ISDIR(old_dentry->d_inode->i_mode)) + shmem_rmdir(dir, old_dentry); + else + shmem_unlink(dir, old_dentry); + } + + dir->i_size += BOGO_DIRENT_SIZE; + dir->i_ctime = dir->i_mtime = CURRENT_TIME; + /* Extra pinning count for the created dentry */ + dget(new_dentry); + spin_lock(&new_dentry->d_lock); + new_dentry->d_flags |= DCACHE_WHITEOUT; + spin_unlock(&new_dentry->d_lock); + return 0; +} + +static void shmem_d_instantiate(struct inode *dir, struct dentry *dentry, + struct inode *inode) +{ + if (d_is_whiteout(dentry)) { + /* Re-using an existing whiteout */ + shmem_free_inode(dir->i_sb); + if (S_ISDIR(inode->i_mode)) + inode->i_mode |= S_OPAQUE; + } else { + /* New dentry */ + dir->i_size += BOGO_DIRENT_SIZE; + dget(dentry); /* Extra count - pin the dentry in core */ + } + /* Will clear DCACHE_WHITEOUT flag */ + d_instantiate(dentry, inode); + +} /* * File creation. Allocate an inode, and we're done.. */ @@ -1894,10 +1964,8 @@ shmem_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev) #else error = 0; #endif - dir->i_size += BOGO_DIRENT_SIZE; dir->i_ctime = dir->i_mtime = CURRENT_TIME; - d_instantiate(dentry, inode); - dget(dentry); /* Extra count - pin the dentry in core */ + shmem_d_instantiate(dir, dentry, inode); } return error; } @@ -1935,12 +2003,10 @@ static int shmem_link(struct dentry *old_dentry, struct inode *dir, struct dentr if (ret) goto out; - dir->i_size += BOGO_DIRENT_SIZE; inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; inc_nlink(inode); ihold(inode); /* New dentry reference */ - dget(dentry); /* Extra pinning count for the created dentry */ - d_instantiate(dentry, inode); + shmem_d_instantiate(dir, dentry, inode); out: return ret; } @@ -1949,21 +2015,61 @@ static int shmem_unlink(struct inode *dir, struct dentry *dentry) { struct inode *inode = dentry->d_inode; - if (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode)) - shmem_free_inode(inode->i_sb); + if (d_is_whiteout(dentry) || (inode->i_nlink > 1 && !S_ISDIR(inode->i_mode))) + shmem_free_inode(dir->i_sb); + if (inode) { + inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; + drop_nlink(inode); + } dir->i_size -= BOGO_DIRENT_SIZE; - inode->i_ctime = dir->i_ctime = dir->i_mtime = CURRENT_TIME; - drop_nlink(inode); dput(dentry); /* Undo the count from "create" - this does all the work */ return 0; } +static void shmem_dir_unlink_whiteouts(struct inode *dir, struct dentry *dentry) +{ + if (!dentry->d_inode) + return; + + /* Remove whiteouts from logical empty directory */ + if (S_ISDIR(dentry->d_inode->i_mode) && + dentry->d_inode->i_sb->s_flags & MS_WHITEOUT) { + struct dentry *child, *next; + LIST_HEAD(list); + + spin_lock(&dentry->d_lock); + list_for_each_entry(child, &dentry->d_subdirs, d_u.d_child) { + spin_lock(&child->d_lock); + if (d_is_whiteout(child)) { + __d_drop(child); + if (!list_empty(&child->d_lru)) { + list_del(&child->d_lru); + dentry_stat.nr_unused--; + } + list_add(&child->d_lru, &list); + } + spin_unlock(&child->d_lock); + } + spin_unlock(&dentry->d_lock); + + list_for_each_entry_safe(child, next, &list, d_lru) { + spin_lock(&child->d_lock); + list_del_init(&child->d_lru); + spin_unlock(&child->d_lock); + + shmem_unlink(dentry->d_inode, child); + } + } +} + static int shmem_rmdir(struct inode *dir, struct dentry *dentry) { if (!simple_empty(dentry)) return -ENOTEMPTY; + /* Remove whiteouts from logical empty directory */ + shmem_dir_unlink_whiteouts(dir, dentry); drop_nlink(dentry->d_inode); drop_nlink(dir); return shmem_unlink(dir, dentry); @@ -1972,7 +2078,7 @@ static int shmem_rmdir(struct inode *dir, struct dentry *dentry) /* * The VFS layer already does all the dentry stuff for rename, * we just have to decrement the usage count for the target if - * it exists so that the VFS layer correctly free's it when it + * it exists so that the VFS layer correctly frees it when it * gets overwritten. */ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct inode *new_dir, struct dentry *new_dentry) @@ -1983,7 +2089,12 @@ static int shmem_rename(struct inode *old_dir, struct dentry *old_dentry, struct if (!simple_empty(new_dentry)) return -ENOTEMPTY; + if (d_is_whiteout(new_dentry)) + shmem_unlink(new_dir, new_dentry); + if (new_dentry->d_inode) { + /* Remove whiteouts from logical empty directory */ + shmem_dir_unlink_whiteouts(new_dir, new_dentry); (void) shmem_unlink(new_dir, new_dentry); if (they_are_dirs) drop_nlink(old_dir); @@ -2048,10 +2159,8 @@ static int shmem_symlink(struct inode *dir, struct dentry *dentry, const char *s unlock_page(page); page_cache_release(page); } - dir->i_size += BOGO_DIRENT_SIZE; dir->i_ctime = dir->i_mtime = CURRENT_TIME; - d_instantiate(dentry, inode); - dget(dentry); + shmem_d_instantiate(dir, dentry, inode); return 0; } @@ -2623,6 +2732,12 @@ int shmem_fill_super(struct super_block *sb, void *data, int silent) if (!root) goto failed_iput; sb->s_root = root; + +#ifdef CONFIG_TMPFS + if (!(sb->s_flags & MS_NOUSER)) + sb->s_flags |= MS_WHITEOUT; +#endif + return 0; failed_iput: @@ -2731,6 +2846,7 @@ static const struct inode_operations shmem_dir_inode_operations = { .rmdir = shmem_rmdir, .mknod = shmem_mknod, .rename = shmem_rename, + .whiteout = shmem_whiteout, #endif #ifdef CONFIG_TMPFS_XATTR .setxattr = shmem_setxattr, -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html