On Wed, May 28, 2014 at 03:19:37PM +0100, Al Viro wrote: > OK, it's not ->i_lock, it's ->d_lock on parent being grabbed after that on > child, while d_walk() keeps taking them in opposite order. Hmm... > > In principle we could do the following: > * split dentry_kill() into the part that is taking locks and > the rest of it. > * in case of trylock failure have shrink_dentry_list() do > read_seqlock_excl(&rename_lock) (which will stabilize ->d_parent) and > take ->d_lock in the right order, drop rename_lock and call __dentry_kill(). > > AFAICS, that would kill the livelock for good. We still have ->i_lock > trylock failures to deal with, but those are less of a problem - d_walk() > won't step on ->i_lock at all. I'm going to grab a couple of hours of sleep > and try to put together something along those lines... OK, the warnings about averting your eyes very much apply; the thing below definitely needs more massage before it becomes acceptable (and no, it's not a single commit; I'm not that insane), but it changes behaviour in the way described above. Could you check if the livelock persists with it? No trace-generating code in there, so the logs should be compact enough... diff --git a/fs/dcache.c b/fs/dcache.c index 42ae01e..ed0cc62 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -441,42 +441,12 @@ void d_drop(struct dentry *dentry) } EXPORT_SYMBOL(d_drop); -/* - * Finish off a dentry we've decided to kill. - * dentry->d_lock must be held, returns with it unlocked. - * If ref is non-zero, then decrement the refcount too. - * Returns dentry requiring refcount drop, or NULL if we're done. - */ -static struct dentry * -dentry_kill(struct dentry *dentry, int unlock_on_failure) - __releases(dentry->d_lock) +static void __dentry_kill(struct dentry *dentry) { - struct inode *inode; struct dentry *parent = NULL; bool can_free = true; - - if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { - can_free = dentry->d_flags & DCACHE_MAY_FREE; - spin_unlock(&dentry->d_lock); - goto out; - } - - inode = dentry->d_inode; - if (inode && !spin_trylock(&inode->i_lock)) { -relock: - if (unlock_on_failure) { - spin_unlock(&dentry->d_lock); - cpu_relax(); - } - return dentry; /* try again with same dentry */ - } if (!IS_ROOT(dentry)) parent = dentry->d_parent; - if (parent && !spin_trylock(&parent->d_lock)) { - if (inode) - spin_unlock(&inode->i_lock); - goto relock; - } /* * The dentry is now unrecoverably dead to the world. @@ -520,10 +490,44 @@ relock: can_free = false; } spin_unlock(&dentry->d_lock); -out: if (likely(can_free)) dentry_free(dentry); +} + +/* + * Finish off a dentry we've decided to kill. + * dentry->d_lock must be held, returns with it unlocked. + * If ref is non-zero, then decrement the refcount too. + * Returns dentry requiring refcount drop, or NULL if we're done. + */ +static struct dentry * +dentry_kill(struct dentry *dentry, int unlock_on_failure) + __releases(dentry->d_lock) +{ + struct inode *inode = dentry->d_inode; + struct dentry *parent = NULL; + + if (inode && unlikely(!spin_trylock(&inode->i_lock))) + goto failed; + + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + goto failed; + } + } + + __dentry_kill(dentry); return parent; + +failed: + if (unlock_on_failure) { + spin_unlock(&dentry->d_lock); + cpu_relax(); + } + return dentry; /* try again with same dentry */ } /* @@ -797,6 +801,7 @@ static void shrink_dentry_list(struct list_head *list) struct dentry *dentry, *parent; while (!list_empty(list)) { + struct inode *inode; dentry = list_entry(list->prev, struct dentry, d_lru); spin_lock(&dentry->d_lock); /* @@ -815,23 +820,52 @@ static void shrink_dentry_list(struct list_head *list) continue; } - parent = dentry_kill(dentry, 0); - /* - * If dentry_kill returns NULL, we have nothing more to do. - */ - if (!parent) + + if (unlikely(dentry->d_flags & DCACHE_DENTRY_KILLED)) { + bool can_free = dentry->d_flags & DCACHE_MAY_FREE; + spin_unlock(&dentry->d_lock); + if (can_free) + dentry_free(dentry); continue; + } - if (unlikely(parent == dentry)) { - /* - * trylocks have failed and d_lock has been held the - * whole time, so it could not have been added to any - * other lists. Just add it back to the shrink list. - */ + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { d_shrink_add(dentry, list); spin_unlock(&dentry->d_lock); continue; } + + if (IS_ROOT(dentry)) { + __dentry_kill(dentry); + continue; + } + + parent = dentry->d_parent; + if (unlikely(!spin_trylock(&parent->d_lock))) { + if (inode) + spin_unlock(&inode->i_lock); + d_shrink_add(dentry, list); + spin_unlock(&dentry->d_lock); + read_seqlock_excl(&rename_lock); + parent = NULL; + if (!IS_ROOT(dentry)) { + parent = dentry->d_parent; + spin_lock(&parent->d_lock); + } + spin_lock(&dentry->d_lock); + read_sequnlock_excl(&rename_lock); + inode = dentry->d_inode; + if (inode && unlikely(!spin_trylock(&inode->i_lock))) { + if (parent) + spin_unlock(&parent->d_lock); + spin_unlock(&dentry->d_lock); + continue; + } + d_shrink_del(dentry); + } + + __dentry_kill(dentry); /* * We need to prune ancestors too. This is necessary to prevent * quadratic behavior of shrink_dcache_parent(), but is also -- To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html