On Nov 30, 2016, at 11:18 PM, Alexey Lyashkov wrote: > rehash process protected with d_seq and d_lock locks, but VFS have access to the > d_hashed field without any locks sometimes. It produce errors with get cwd > operations or fsnotify may report an unlink event sometimes. d_seq lock isn’t > used to protect due possibility to sleep with holding locks, and d_lock is too > hot to check a simple state, lets return a DCACHE_DENTRY_UNHASHED flag to > ability to check a unhashed state without locks held. While Lustre-wise the patch is ok, I guess, I wonder if the underlying idea is sound? Lockless access is inherently racy, so if you replace the list check with just a bit check, it's still racy, just the window is a bit smaller, no? Or are you only concerned about d_move's very nonatomic unhash/rehash? > #include <pthread.h> > #include <sys/stat.h> > #include <stdlib.h> > #include <assert.h> > #include <unistd.h> > #include <stdio.h> > #include <errno.h> > > void *thread_main(void *unused) > { > int rc; > for (;;) { > rc = rename("/tmp/t-a", "/tmp/t-b"); > assert(rc == 0); > rc = rename("/tmp/t-b", "/tmp/t-a"); > assert(rc == 0); > } > > return NULL; > } > > int main(void) { > int rc, i; > pthread_t ptt; > > rmdir("/tmp/t-a"); > rmdir("/tmp/t-b"); > > rc = mkdir("/tmp/t-a", 0666); > assert(rc == 0); > > rc = chdir("/tmp/t-a"); > assert(rc == 0); > > rc = pthread_create(&ptt, NULL, thread_main, NULL); > assert(rc == 0); > > for (i=0; ;i++) { > char buf[100], *b; > b = getcwd(buf, sizeof(buf)); > if (b == NULL) { > printf("getcwd failed on iter %d with %d\n", i, errno); > break; > } > } > > return 0; > } > > Reported-by: Andrew Perepechko <anserper@xxxxxxxxx> > Signed-off-by: Alexey Lyashkov <alexey.lyashkov@xxxxxxxxx> > --- > arch/powerpc/platforms/cell/spufs/inode.c | 2 +- > drivers/infiniband/hw/qib/qib_fs.c | 2 +- > .../staging/lustre/lustre/llite/llite_internal.h | 2 +- > fs/configfs/inode.c | 2 +- > fs/dcache.c | 24 +++++++++++++------- > fs/nfs/dir.c | 2 +- > include/linux/dcache.h | 6 +++-- > 7 files changed, 25 insertions(+), 15 deletions(-) > > diff --git a/arch/powerpc/platforms/cell/spufs/inode.c b/arch/powerpc/platforms/cell/spufs/inode.c > index 5364d4a..cc8623d 100644 > --- a/arch/powerpc/platforms/cell/spufs/inode.c > +++ b/arch/powerpc/platforms/cell/spufs/inode.c > @@ -168,7 +168,7 @@ static void spufs_prune_dir(struct dentry *dir) > spin_lock(&dentry->d_lock); > if (simple_positive(dentry)) { > dget_dlock(dentry); > - __d_drop(dentry); > + _d_drop(dentry); > spin_unlock(&dentry->d_lock); > simple_unlink(d_inode(dir), dentry); > /* XXX: what was dcache_lock protecting here? Other > diff --git a/drivers/infiniband/hw/qib/qib_fs.c b/drivers/infiniband/hw/qib/qib_fs.c > index f1e66ef..9b4c0e7 100644 > --- a/drivers/infiniband/hw/qib/qib_fs.c > +++ b/drivers/infiniband/hw/qib/qib_fs.c > @@ -440,7 +440,7 @@ static int remove_file(struct dentry *parent, char *name) > > spin_lock(&tmp->d_lock); > if (simple_positive(tmp)) { > - __d_drop(tmp); > + _d_drop(tmp); > spin_unlock(&tmp->d_lock); > simple_unlink(d_inode(parent), tmp); > } else { > diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h > index 4bc5512..f230505 100644 > --- a/drivers/staging/lustre/lustre/llite/llite_internal.h > +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h > @@ -1353,7 +1353,7 @@ static inline void d_lustre_invalidate(struct dentry *dentry, int nested) > * it and busy inodes would be reported. > */ > if (d_count(dentry) == 0 && !(dentry->d_flags & DCACHE_DISCONNECTED)) > - __d_drop(dentry); > + _d_drop(dentry); > spin_unlock(&dentry->d_lock); > } > > diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c > index ad718e5..9f531f7 100644 > --- a/fs/configfs/inode.c > +++ b/fs/configfs/inode.c > @@ -248,7 +248,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) > spin_lock(&dentry->d_lock); > if (simple_positive(dentry)) { > dget_dlock(dentry); > - __d_drop(dentry); > + _d_drop(dentry); > spin_unlock(&dentry->d_lock); > simple_unlink(d_inode(parent), dentry); > } else > diff --git a/fs/dcache.c b/fs/dcache.c > index 5c7cc95..0863841 100644 > --- a/fs/dcache.c > +++ b/fs/dcache.c > @@ -438,7 +438,7 @@ static void dentry_lru_add(struct dentry *dentry) > */ > void __d_drop(struct dentry *dentry) > { > - if (!d_unhashed(dentry)) { > + if (!hlist_bl_unhashed(&dentry->d_hash)) { > struct hlist_bl_head *b; > /* > * Hashed dentries are normally on the dentry hashtable, > @@ -458,12 +458,18 @@ void __d_drop(struct dentry *dentry) > write_seqcount_invalidate(&dentry->d_seq); > } > } > -EXPORT_SYMBOL(__d_drop); > + > +void _d_drop(struct dentry *dentry) > +{ > + dentry->d_flags |= DCACHE_DENTRY_UNHASHED; > + __d_drop(dentry); > +} > +EXPORT_SYMBOL(_d_drop); > > void d_drop(struct dentry *dentry) > { > spin_lock(&dentry->d_lock); > - __d_drop(dentry); > + _d_drop(dentry); > spin_unlock(&dentry->d_lock); > } > EXPORT_SYMBOL(d_drop); > @@ -530,7 +536,7 @@ static void __dentry_kill(struct dentry *dentry) > d_lru_del(dentry); > } > /* if it was on the hash then remove it */ > - __d_drop(dentry); > + _d_drop(dentry); > dentry_unlist(dentry, parent); > if (parent) > spin_unlock(&parent->d_lock); > @@ -1486,7 +1492,7 @@ static void check_and_drop(void *_data) > struct detach_data *data = _data; > > if (!data->mountpoint && !data->select.found) > - __d_drop(data->select.start); > + _d_drop(data->select.start); > } > > /** > @@ -1601,7 +1607,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) > dentry->d_name.name = dname; > > dentry->d_lockref.count = 1; > - dentry->d_flags = 0; > + dentry->d_flags = DCACHE_DENTRY_UNHASHED; > spin_lock_init(&dentry->d_lock); > seqcount_init(&dentry->d_seq); > dentry->d_inode = NULL; > @@ -1926,6 +1932,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) > spin_lock(&tmp->d_lock); > __d_set_inode_and_type(tmp, inode, add_flags); > hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); > + tmp->d_flags &= ~DCACHE_DENTRY_UNHASHED; > hlist_bl_lock(&tmp->d_sb->s_anon); > hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); > hlist_bl_unlock(&tmp->d_sb->s_anon); > @@ -2331,7 +2338,7 @@ void d_delete(struct dentry * dentry) > } > > if (!d_unhashed(dentry)) > - __d_drop(dentry); > + _d_drop(dentry); > > spin_unlock(&dentry->d_lock); > > @@ -2342,7 +2349,8 @@ EXPORT_SYMBOL(d_delete); > static void __d_rehash(struct dentry *entry) > { > struct hlist_bl_head *b = d_hash(entry->d_name.hash); > - BUG_ON(!d_unhashed(entry)); > + BUG_ON(!hlist_bl_unhashed(&entry->d_hash)); > + entry->d_flags &= ~DCACHE_DENTRY_UNHASHED; > hlist_bl_lock(b); > hlist_bl_add_head_rcu(&entry->d_hash, b); > hlist_bl_unlock(b); > diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c > index 5f1af4c..a1911bb 100644 > --- a/fs/nfs/dir.c > +++ b/fs/nfs/dir.c > @@ -1891,7 +1891,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) > goto out; > } > if (!d_unhashed(dentry)) { > - __d_drop(dentry); > + _d_drop(dentry); > need_rehash = 1; > } > spin_unlock(&dentry->d_lock); > diff --git a/include/linux/dcache.h b/include/linux/dcache.h > index 5beed7b..6b0b3d7 100644 > --- a/include/linux/dcache.h > +++ b/include/linux/dcache.h > @@ -213,6 +213,7 @@ struct dentry_operations { > #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ > #define DCACHE_DENTRY_CURSOR 0x20000000 > > +#define DCACHE_DENTRY_UNHASHED 0x40000000 /* dentry unhashed from tree with unlink */ > extern seqlock_t rename_lock; > > /* > @@ -221,7 +222,7 @@ extern seqlock_t rename_lock; > extern void d_instantiate(struct dentry *, struct inode *); > extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); > extern int d_instantiate_no_diralias(struct dentry *, struct inode *); > -extern void __d_drop(struct dentry *dentry); > +extern void _d_drop(struct dentry *dentry); > extern void d_drop(struct dentry *dentry); > extern void d_delete(struct dentry *); > extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); > @@ -326,11 +327,12 @@ extern struct dentry *dget_parent(struct dentry *dentry); > * @dentry: entry to check > * > * Returns true if the dentry passed is not currently hashed. > + * hlist_bl_unhashed can't be used as race with d_move(). s/as/due to a/ > */ > > static inline int d_unhashed(const struct dentry *dentry) > { > - return hlist_bl_unhashed(&dentry->d_hash); > + return dentry->d_flags & DCACHE_DENTRY_UNHASHED; > } > > static inline int d_unlinked(const struct dentry *dentry) > > -- > To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html