rehash process protected with d_seq and d_lock locks, but VFS have access to the d_hashed field without any locks sometimes. It produce errors with get cwd operations or fsnotify may report an unlink event sometimes. d_seq lock isn’t used to protect due possibility to sleep with holding locks, and d_lock is too hot to check a simple state, lets return a DCACHE_DENTRY_UNHASHED flag to ability to check a unhashed state without locks held. #include <pthread.h> #include <sys/stat.h> #include <stdlib.h> #include <assert.h> #include <unistd.h> #include <stdio.h> #include <errno.h> void *thread_main(void *unused) { int rc; for (;;) { rc = rename("/tmp/t-a", "/tmp/t-b"); assert(rc == 0); rc = rename("/tmp/t-b", "/tmp/t-a"); assert(rc == 0); } return NULL; } int main(void) { int rc, i; pthread_t ptt; rmdir("/tmp/t-a"); rmdir("/tmp/t-b"); rc = mkdir("/tmp/t-a", 0666); assert(rc == 0); rc = chdir("/tmp/t-a"); assert(rc == 0); rc = pthread_create(&ptt, NULL, thread_main, NULL); assert(rc == 0); for (i=0; ;i++) { char buf[100], *b; b = getcwd(buf, sizeof(buf)); if (b == NULL) { printf("getcwd failed on iter %d with %d\n", i, errno); break; } } return 0; } Reported-by: Andrew Perepechko <anserper@xxxxxxxxx> Signed-off-by: Alexey Lyashkov <alexey.lyashkov@xxxxxxxxx> --- fs/configfs/inode.c | 2 +- fs/dcache.c | 24 ++++++++++++++++-------- fs/nfs/dir.c | 2 +- include/linux/dcache.h | 6 ++++-- 4 files changed, 22 insertions(+), 12 deletions(-) diff --git a/fs/configfs/inode.c b/fs/configfs/inode.c index ad718e5..9f531f7 100644 --- a/fs/configfs/inode.c +++ b/fs/configfs/inode.c @@ -248,7 +248,7 @@ void configfs_drop_dentry(struct configfs_dirent * sd, struct dentry * parent) spin_lock(&dentry->d_lock); if (simple_positive(dentry)) { dget_dlock(dentry); - __d_drop(dentry); + _d_drop(dentry); spin_unlock(&dentry->d_lock); simple_unlink(d_inode(parent), dentry); } else diff --git a/fs/dcache.c b/fs/dcache.c index 5c7cc95..0863841 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -438,7 +438,7 @@ static void dentry_lru_add(struct dentry *dentry) */ void __d_drop(struct dentry *dentry) { - if (!d_unhashed(dentry)) { + if (!hlist_bl_unhashed(&dentry->d_hash)) { struct hlist_bl_head *b; /* * Hashed dentries are normally on the dentry hashtable, @@ -458,12 +458,18 @@ void __d_drop(struct dentry *dentry) write_seqcount_invalidate(&dentry->d_seq); } } -EXPORT_SYMBOL(__d_drop); + +void _d_drop(struct dentry *dentry) +{ + dentry->d_flags |= DCACHE_DENTRY_UNHASHED; + __d_drop(dentry); +} +EXPORT_SYMBOL(_d_drop); void d_drop(struct dentry *dentry) { spin_lock(&dentry->d_lock); - __d_drop(dentry); + _d_drop(dentry); spin_unlock(&dentry->d_lock); } EXPORT_SYMBOL(d_drop); @@ -530,7 +536,7 @@ static void __dentry_kill(struct dentry *dentry) d_lru_del(dentry); } /* if it was on the hash then remove it */ - __d_drop(dentry); + _d_drop(dentry); dentry_unlist(dentry, parent); if (parent) spin_unlock(&parent->d_lock); @@ -1486,7 +1492,7 @@ static void check_and_drop(void *_data) struct detach_data *data = _data; if (!data->mountpoint && !data->select.found) - __d_drop(data->select.start); + _d_drop(data->select.start); } /** @@ -1601,7 +1607,7 @@ struct dentry *__d_alloc(struct super_block *sb, const struct qstr *name) dentry->d_name.name = dname; dentry->d_lockref.count = 1; - dentry->d_flags = 0; + dentry->d_flags = DCACHE_DENTRY_UNHASHED; spin_lock_init(&dentry->d_lock); seqcount_init(&dentry->d_seq); dentry->d_inode = NULL; @@ -1926,6 +1932,7 @@ static struct dentry *__d_obtain_alias(struct inode *inode, int disconnected) spin_lock(&tmp->d_lock); __d_set_inode_and_type(tmp, inode, add_flags); hlist_add_head(&tmp->d_u.d_alias, &inode->i_dentry); + tmp->d_flags &= ~DCACHE_DENTRY_UNHASHED; hlist_bl_lock(&tmp->d_sb->s_anon); hlist_bl_add_head(&tmp->d_hash, &tmp->d_sb->s_anon); hlist_bl_unlock(&tmp->d_sb->s_anon); @@ -2331,7 +2338,7 @@ void d_delete(struct dentry * dentry) } if (!d_unhashed(dentry)) - __d_drop(dentry); + _d_drop(dentry); spin_unlock(&dentry->d_lock); @@ -2342,7 +2349,8 @@ EXPORT_SYMBOL(d_delete); static void __d_rehash(struct dentry *entry) { struct hlist_bl_head *b = d_hash(entry->d_name.hash); - BUG_ON(!d_unhashed(entry)); + BUG_ON(!hlist_bl_unhashed(&entry->d_hash)); + entry->d_flags &= ~DCACHE_DENTRY_UNHASHED; hlist_bl_lock(b); hlist_bl_add_head_rcu(&entry->d_hash, b); hlist_bl_unlock(b); diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 5f1af4c..a1911bb 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1891,7 +1891,7 @@ int nfs_unlink(struct inode *dir, struct dentry *dentry) goto out; } if (!d_unhashed(dentry)) { - __d_drop(dentry); + _d_drop(dentry); need_rehash = 1; } spin_unlock(&dentry->d_lock); diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 5beed7b..6b0b3d7 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -213,6 +213,7 @@ struct dentry_operations { #define DCACHE_PAR_LOOKUP 0x10000000 /* being looked up (with parent locked shared) */ #define DCACHE_DENTRY_CURSOR 0x20000000 +#define DCACHE_DENTRY_UNHASHED 0x40000000 /* dentry unhashed from tree with unlink */ extern seqlock_t rename_lock; /* @@ -221,7 +222,7 @@ extern seqlock_t rename_lock; extern void d_instantiate(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern int d_instantiate_no_diralias(struct dentry *, struct inode *); -extern void __d_drop(struct dentry *dentry); +extern void _d_drop(struct dentry *dentry); extern void d_drop(struct dentry *dentry); extern void d_delete(struct dentry *); extern void d_set_d_op(struct dentry *dentry, const struct dentry_operations *op); @@ -326,11 +327,12 @@ extern struct dentry *dget_parent(struct dentry *dentry); * @dentry: entry to check * * Returns true if the dentry passed is not currently hashed. + * hlist_bl_unhashed can't be used as race with d_move(). */ static inline int d_unhashed(const struct dentry *dentry) { - return hlist_bl_unhashed(&dentry->d_hash); + return dentry->d_flags & DCACHE_DENTRY_UNHASHED; } static inline int d_unlinked(const struct dentry *dentry) -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html