If exp_deferred_qs is incorrectly set and leaked to the next exp GP, it may cause the next GP to be incorrectly prematurely completed. Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxxxxx> --- kernel/rcu/tree_exp.h | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/kernel/rcu/tree_exp.h b/kernel/rcu/tree_exp.h index a0e1e51c51c2..6dec21909b30 100644 --- a/kernel/rcu/tree_exp.h +++ b/kernel/rcu/tree_exp.h @@ -603,6 +603,18 @@ static void rcu_exp_handler(void *unused) struct rcu_node *rnp = rdp->mynode; struct task_struct *t = current; + /* + * Note that there is a large group of race conditions that + * can have caused this quiescent state to already have been + * reported, so we really do need to check ->expmask first. + */ + raw_spin_lock_irqsave_rcu_node(rnp, flags); + if (!(rnp->expmask & rdp->grpmask)) { + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + return; + } + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + /* * First, the common case of not being in an RCU read-side * critical section. If also enabled or idle, immediately @@ -628,17 +640,10 @@ static void rcu_exp_handler(void *unused) * a future context switch. Either way, if the expedited * grace period is still waiting on this CPU, set ->deferred_qs * so that the eventual quiescent state will be reported. - * Note that there is a large group of race conditions that - * can have caused this quiescent state to already have been - * reported, so we really do need to check ->expmask. */ if (t->rcu_read_lock_nesting > 0) { - raw_spin_lock_irqsave_rcu_node(rnp, flags); - if (rnp->expmask & rdp->grpmask) { - rdp->exp_deferred_qs = true; - t->rcu_read_unlock_special.b.exp_hint = true; - } - raw_spin_unlock_irqrestore_rcu_node(rnp, flags); + rdp->exp_deferred_qs = true; + WRITE_ONCE(t->rcu_read_unlock_special.b.exp_hint, true); return; } -- 2.20.1