__d_add() and __d_move() wake up waiters on dentry::d_wait from within the i_seq_dir write held region. This violates the PREEMPT_RT constraints as the wake up acquires wait_queue_head::lock which is a "sleeping" spinlock on RT. There is no requirement to do so. __d_lookup_unhash() has cleared DCACHE_PAR_LOOKUP and dentry::d_wait and returned the now unreachable wait queue head pointer to the caller, so the actual wake up can be postponed until the i_dir_seq write side critical section is left. The only requirement is that dentry::lock is held across the whole sequence including the wake up. This is safe because: 1) The whole sequence including the wake up is protected by dentry::lock. 2) The waitqueue head is allocated by the caller on stack and can't go away until the whole callchain completes. 3) If a queued waiter is woken by a spurious wake up, then it is blocked on dentry:lock before it can observe DCACHE_PAR_LOOKUP cleared and return from d_wait_lookup(). As the wake up is inside the dentry:lock held region it's guaranteed that the waiters waitq is dequeued from the waitqueue head before the waiter returns. Moving the wake up past the unlock of dentry::lock would allow the waiter to return with the on stack waitq still enqueued due to a spurious wake up. 4) New waiters have to acquire dentry::lock before checking whether the DCACHE_PAR_LOOKUP flag is set. Move the wake up past end_dir_add() which leaves the i_dir_seq write side critical section and enables preemption. For non RT kernels there is no difference because preemption is still disabled due to dentry::lock being held, but it shortens the time between wake up and unlocking dentry::lock, which reduces the contention for the woken up waiter. Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx> Signed-off-by: Sebastian Andrzej Siewior <bigeasy@xxxxxxxxxxxxx> --- fs/dcache.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/dcache.c b/fs/dcache.c index 6ef1f5c32bc0f..0b5fd3a17ff7c 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -2747,13 +2747,15 @@ EXPORT_SYMBOL(__d_lookup_done); static inline void __d_add(struct dentry *dentry, struct inode *inode) { + wait_queue_head_t *d_wait; struct inode *dir = NULL; unsigned n; + spin_lock(&dentry->d_lock); if (unlikely(d_in_lookup(dentry))) { dir = dentry->d_parent->d_inode; n = start_dir_add(dir); - wake_up_all(__d_lookup_unhash(dentry)); + d_wait = __d_lookup_unhash(dentry); } if (inode) { unsigned add_flags = d_flags_for_inode(inode); @@ -2764,8 +2766,10 @@ static inline void __d_add(struct dentry *dentry, struct inode *inode) fsnotify_update_flags(dentry); } __d_rehash(dentry); - if (dir) + if (dir) { end_dir_add(dir, n); + wake_up_all(d_wait); + } spin_unlock(&dentry->d_lock); if (inode) spin_unlock(&inode->i_lock); @@ -2912,6 +2916,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, bool exchange) { struct dentry *old_parent, *p; + wait_queue_head_t *d_wait; struct inode *dir = NULL; unsigned n; @@ -2942,7 +2947,7 @@ static void __d_move(struct dentry *dentry, struct dentry *target, if (unlikely(d_in_lookup(target))) { dir = target->d_parent->d_inode; n = start_dir_add(dir); - wake_up_all(__d_lookup_unhash(target)); + d_wait = __d_lookup_unhash(target); } write_seqcount_begin(&dentry->d_seq); @@ -2977,8 +2982,10 @@ static void __d_move(struct dentry *dentry, struct dentry *target, write_seqcount_end(&target->d_seq); write_seqcount_end(&dentry->d_seq); - if (dir) + if (dir) { end_dir_add(dir, n); + wake_up_all(d_wait); + } if (dentry->d_parent != old_parent) spin_unlock(&dentry->d_parent->d_lock); -- 2.36.1