Re: [PATCH 1/2] Re introduces a DCACHE_DENTRY_UNHASHED flag

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Nov 30, 2016, at 11:18 PM, Alexey Lyashkov wrote:

> rehash process protected with d_seq and d_lock locks, but VFS have access to the
> d_hashed field without any locks sometimes. It produce errors with get cwd
> operations or fsnotify may report an unlink event sometimes. d_seq lock isn’t
> used to protect due possibility to sleep with holding locks, and d_lock is too
> hot to check a simple state, lets return a DCACHE_DENTRY_UNHASHED flag to
> ability to check a unhashed state without locks held.

While Lustre-wise the patch is ok, I guess,
I wonder if the underlying idea is sound?
Lockless access is inherently racy, so if you replace the list check with just
a bit check, it's still racy, just the window is a bit smaller, no?
Or are you only concerned about d_move's very nonatomic unhash/rehash?

> #include <pthread.h>
> #include <sys/stat.h>
> #include <stdlib.h>
> #include <assert.h>
> #include <unistd.h>
> #include <stdio.h>
> #include <errno.h>
> 
> void *thread_main(void *unused)
> {
>        int rc;
>        for (;;) {
>                rc = rename("/tmp/t-a", "/tmp/t-b");
>                assert(rc == 0);
>                rc = rename("/tmp/t-b", "/tmp/t-a");
>                assert(rc == 0);
>        }
> 
>        return NULL;
> }
> 
> int main(void) {
>        int rc, i;
>        pthread_t ptt;
> 
>        rmdir("/tmp/t-a");
>        rmdir("/tmp/t-b");
> 
>        rc = mkdir("/tmp/t-a", 0666);
>        assert(rc == 0);
> 
>        rc = chdir("/tmp/t-a");
>        assert(rc == 0);
> 
>        rc = pthread_create(&ptt, NULL, thread_main, NULL);
>        assert(rc == 0);
> 
>        for (i=0; ;i++) {
>                char buf[100], *b;
>                b = getcwd(buf, sizeof(buf));
>                if (b == NULL) {
>                        printf("getcwd failed on iter %d with %d\n", i, errno);
>                        break;
>                }
>        }
> 
>        return 0;
> }
> 
> Reported-by: Andrew Perepechko <anserper@xxxxxxxxx>
> Signed-off-by: Alexey Lyashkov <alexey.lyashkov@xxxxxxxxx>
> ---
> arch/powerpc/platforms/cell/spufs/inode.c          |    2 +-
> drivers/infiniband/hw/qib/qib_fs.c                 |    2 +-
> .../staging/lustre/lustre/llite/llite_internal.h   |    2 +-
> fs/configfs/inode.c                                |    2 +-
> fs/dcache.c                                        |   24 +++++++++++++-------
> fs/nfs/dir.c                                       |    2 +-
> include/linux/dcache.h                             |    6 +++--
> 7 files changed, 25 insertions(+), 15 deletions(-)
> 
> diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c
> index 5364d4a..cc8623d 100644
> --- a/arch/powerpc/platforms/cell/spufs/inode.c
> +++ b/arch/powerpc/platforms/cell/spufs/inode.c
> @@ -168,7 +168,7 @@ static void spufs_prune_dir(struct dentry *dir)
> 		spin_lock(&dentry->d_lock);
> 		if (simple_positive(dentry)) {
> 			dget_dlock(dentry);
> -			__d_drop(dentry);
> +			_d_drop(dentry);
> 			spin_unlock(&dentry->d_lock);
> 			simple_unlink(d_inode(dir), dentry);
> 			/* XXX: what was dcache_lock protecting here? Other
> diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c
> index f1e66ef..9b4c0e7 100644
> --- a/drivers/infiniband/hw/qib/qib_fs.c
> +++ b/drivers/infiniband/hw/qib/qib_fs.c
> @@ -440,7 +440,7 @@ static int remove_file(struct dentry *parent, char *name)
> 
> 	spin_lock(&tmp->d_lock);
> 	if (simple_positive(tmp)) {
> -		__d_drop(tmp);
> +		_d_drop(tmp);
> 		spin_unlock(&tmp->d_lock);
> 		simple_unlink(d_inode(parent), tmp);
> 	} else {
> diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h
> index 4bc5512..f230505 100644
> --- a/drivers/staging/lustre/lustre/llite/llite_internal.h
> +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h
> @@ -1353,7 +1353,7 @@ static inline void d_lustre_invalidate(struct dentry *dentry, int nested)
> 	 * it and busy inodes would be reported.
> 	 */
> 	if (d_count(dentry) == 0 && !(dentry->d_flags & DCACHE_DISCONNECTED))
> -		__d_drop(dentry);
> +		_d_drop(dentry);
> 	spin_unlock(&dentry->d_lock);
> }
> 
> diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c
> index ad718e5..9f531f7 100644
> --- a/fs/configfs/inode.c
> +++ b/fs/configfs/inode.c
> @@ -248,7 +248,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent)
> 		spin_lock(&dentry->d_lock);
> 		if (simple_positive(dentry)) {
> 			dget_dlock(dentry);
> -			__d_drop(dentry);
> +			_d_drop(dentry);
> 			spin_unlock(&dentry->d_lock);
> 			simple_unlink(d_inode(parent), dentry);
> 		} else
> diff --git a/fs/dcache.c b/fs/dcache.c
> index 5c7cc95..0863841 100644
> --- a/fs/dcache.c
> +++ b/fs/dcache.c
> @@ -438,7 +438,7 @@ static void dentry_lru_add(struct dentry *dentry)
>  */
> void __d_drop(struct dentry *dentry)
> {
> -	if (!d_unhashed(dentry)) {
> +	if (!hlist_bl_unhashed(&dentry->d_hash)) {
> 		struct hlist_bl_head *b;
> 		/*
> 		 * Hashed dentries are normally on the dentry hashtable,
> @@ -458,12 +458,18 @@ void __d_drop(struct dentry *dentry)
> 		write_seqcount_invalidate(&dentry->d_seq);
> 	}
> }
> -EXPORT_SYMBOL(__d_drop);
> +
> +void _d_drop(struct dentry *dentry)
> +{
> +	dentry->d_flags |= DCACHE_DENTRY_UNHASHED;
> +	__d_drop(dentry);
> +}
> +EXPORT_SYMBOL(_d_drop);
> 
> void d_drop(struct dentry *dentry)
> {
> 	spin_lock(&dentry->d_lock);
> -	__d_drop(dentry);
> +	_d_drop(dentry);
> 	spin_unlock(&dentry->d_lock);
> }
> EXPORT_SYMBOL(d_drop);
> @@ -530,7 +536,7 @@ static void __dentry_kill(struct dentry *dentry)
> 			d_lru_del(dentry);
> 	}
> 	/* if it was on the hash then remove it */
> -	__d_drop(dentry);
> +	_d_drop(dentry);
> 	dentry_unlist(dentry, parent);
> 	if (parent)
> 		spin_unlock(&parent->d_lock);
> @@ -1486,7 +1492,7 @@ static void check_and_drop(void *_data)
> 	struct detach_data *data = _data;
> 
> 	if (!data->mountpoint && !data->select.found)
> -		__d_drop(data->select.start);
> +		_d_drop(data->select.start);
> }
> 
> /**
> @@ -1601,7 +1607,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name)
> 	dentry->d_name.name = dname;
> 
> 	dentry->d_lockref.count = 1;
> -	dentry->d_flags = 0;
> +	dentry->d_flags = DCACHE_DENTRY_UNHASHED;
> 	spin_lock_init(&dentry->d_lock);
> 	seqcount_init(&dentry->d_seq);
> 	dentry->d_inode = NULL;
> @@ -1926,6 +1932,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected)
> 	spin_lock(&tmp->d_lock);
> 	__d_set_inode_and_type(tmp, inode, add_flags);
> 	hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry);
> +	tmp->d_flags &= ~DCACHE_DENTRY_UNHASHED;
> 	hlist_bl_lock(&tmp->d_sb->s_anon);
> 	hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon);
> 	hlist_bl_unlock(&tmp->d_sb->s_anon);
> @@ -2331,7 +2338,7 @@ void d_delete(struct dentry * dentry)
> 	}
> 
> 	if (!d_unhashed(dentry))
> -		__d_drop(dentry);
> +		_d_drop(dentry);
> 
> 	spin_unlock(&dentry->d_lock);
> 
> @@ -2342,7 +2349,8 @@ EXPORT_SYMBOL(d_delete);
> static void __d_rehash(struct dentry *entry)
> {
> 	struct hlist_bl_head *b = d_hash(entry->d_name.hash);
> -	BUG_ON(!d_unhashed(entry));
> +	BUG_ON(!hlist_bl_unhashed(&entry->d_hash));
> +	entry->d_flags &= ~DCACHE_DENTRY_UNHASHED;
> 	hlist_bl_lock(b);
> 	hlist_bl_add_head_rcu(&entry->d_hash, b);
> 	hlist_bl_unlock(b);
> diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c
> index 5f1af4c..a1911bb 100644
> --- a/fs/nfs/dir.c
> +++ b/fs/nfs/dir.c
> @@ -1891,7 +1891,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry)
> 		goto out;
> 	}
> 	if (!d_unhashed(dentry)) {
> -		__d_drop(dentry);
> +		_d_drop(dentry);
> 		need_rehash = 1;
> 	}
> 	spin_unlock(&dentry->d_lock);
> diff --git a/include/linux/dcache.h b/include/linux/dcache.h
> index 5beed7b..6b0b3d7 100644
> --- a/include/linux/dcache.h
> +++ b/include/linux/dcache.h
> @@ -213,6 +213,7 @@ struct dentry_operations {
> #define DCACHE_PAR_LOOKUP		0x10000000 /* being looked up (with parent locked shared) */
> #define DCACHE_DENTRY_CURSOR		0x20000000
> 
> +#define DCACHE_DENTRY_UNHASHED		0x40000000 /* dentry unhashed from tree with unlink */
> extern seqlock_t rename_lock;
> 
> /*
> @@ -221,7 +222,7 @@ extern seqlock_t rename_lock;
> extern void d_instantiate(struct dentry *, struct inode *);
> extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *);
> extern int d_instantiate_no_diralias(struct dentry *, struct inode *);
> -extern void __d_drop(struct dentry *dentry);
> +extern void _d_drop(struct dentry *dentry);
> extern void d_drop(struct dentry *dentry);
> extern void d_delete(struct dentry *);
> extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op);
> @@ -326,11 +327,12 @@ extern struct dentry *dget_parent(struct dentry *dentry);
>  *	@dentry: entry to check
>  *
>  *	Returns true if the dentry passed is not currently hashed.
> + *	hlist_bl_unhashed can't be used as race with d_move().

s/as/due to a/

>  */
> 
> static inline int d_unhashed(const struct dentry *dentry)
> {
> -	return hlist_bl_unhashed(&dentry->d_hash);
> +	return dentry->d_flags & DCACHE_DENTRY_UNHASHED;
> }
> 
> static inline int d_unlinked(const struct dentry *dentry)
> 
> --
> To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html

--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux