Work in progress, not for inclusion. This patch modified the RCU priority booster to explicitly sleep when there are no RCU readers in need of priority boosting. This should be a power-consumption improvement over the one-second polling cycle in the underlying RCU priority-boosting patch. Signed-off-by: Paul E. McKenney <paulmck@xxxxxxxxxxxxxxxxxx> --- include/linux/rcupreempt.h | 15 ++++++ kernel/rcupreempt.c | 102 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 115 insertions(+), 2 deletions(-) diff -urpNa -X dontdiff linux-2.6.22-G-boosttorture/include/linux/rcupreempt.h linux-2.6.22-H-boostsleep/include/linux/rcupreempt.h --- linux-2.6.22-G-boosttorture/include/linux/rcupreempt.h 2007-08-24 11:24:59.000000000 -0700 +++ linux-2.6.22-H-boostsleep/include/linux/rcupreempt.h 2007-08-24 18:12:41.000000000 -0700 @@ -60,6 +60,21 @@ enum rcu_boost_state { #define N_RCU_BOOST_STATE (RCU_BOOST_INVALID + 1) +/* + * RCU-booster state with respect to sleeping. The RCU booster + * sleeps when no task has recently been seen sleeping in an RCU + * read-side critical section, and is awakened when a new sleeper + * appears. + */ +enum rcu_booster_state { + RCU_BOOSTER_ACTIVE = 0, /* RCU booster actively scanning. */ + RCU_BOOSTER_DROWSY = 1, /* RCU booster is considering sleeping. */ + RCU_BOOSTER_SLEEPING = 2, /* RCU booster is asleep. */ + RCU_BOOSTER_INVALID = 3, /* For bogus state sightings. */ +}; + +#define N_RCU_BOOSTER_STATE (RCU_BOOSTER_INVALID + 1) + #endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST */ #define call_rcu_bh(head, rcu) call_rcu(head, rcu) diff -urpNa -X dontdiff linux-2.6.22-G-boosttorture/kernel/rcupreempt.c linux-2.6.22-H-boostsleep/kernel/rcupreempt.c --- linux-2.6.22-G-boosttorture/kernel/rcupreempt.c 2007-08-27 15:42:57.000000000 -0700 +++ linux-2.6.22-H-boostsleep/kernel/rcupreempt.c 2007-08-27 15:42:37.000000000 -0700 @@ -108,6 +108,7 @@ struct rcu_boost_dat { unsigned long rbs_unboosted; #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS unsigned long rbs_stats[N_RCU_BOOST_DAT_EVENTS][N_RCU_BOOST_STATE]; + unsigned long rbs_qw_stats[N_RCU_BOOSTER_STATE]; #endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */ }; #define RCU_BOOST_ELEMENTS 4 @@ -115,6 +116,10 @@ struct rcu_boost_dat { static int rcu_boost_idx = -1; /* invalid value for early RCU use. */ static DEFINE_PER_CPU(struct rcu_boost_dat, rcu_boost_dat[RCU_BOOST_ELEMENTS]); static struct task_struct *rcu_boost_task; +static DEFINE_SPINLOCK(rcu_boost_quiesce_lock); +static enum rcu_booster_state rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE; +static unsigned long rbs_qs_stats[2][N_RCU_BOOSTER_STATE]; +wait_queue_head_t rcu_booster_quiesce_wq; #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS @@ -171,6 +176,15 @@ static char *rcu_boost_state_error[] = { "? ?", /* unlock */ }; +/* Labels for RCU booster state printout. */ + +static char *rcu_booster_state_label[] = { + "Active", + "Drowsy", + "Sleeping", + "???", +}; + /* * Print out RCU booster task statistics at the specified interval. */ @@ -221,6 +235,14 @@ static void rcu_boost_dat_stat_print(voi cpu)[i].rbs_stats[event][state]; } } + for (state = 0; state < N_RCU_BOOSTER_STATE; state++) { + sum.rbs_qw_stats[state] = 0; + for_each_possible_cpu(cpu) + for (i = 0; i < RCU_BOOST_ELEMENTS; i++) + sum.rbs_qw_stats[state] += + per_cpu(rcu_boost_dat, + cpu)[i].rbs_qw_stats[state]; + } /* Print them out! */ @@ -240,6 +262,24 @@ static void rcu_boost_dat_stat_print(voi rcu_boost_state_event[event], buf); } + printk(KERN_INFO "RCU booster state: %s\n", + rcu_booster_quiesce_state >= 0 && + rcu_booster_quiesce_state < N_RCU_BOOSTER_STATE + ? rcu_booster_state_label[rcu_booster_quiesce_state] + : "???"); + i = 0; + for (state = 0; state < N_RCU_BOOSTER_STATE; state++) + i += sprintf(&buf[i], " %ld", rbs_qs_stats[0][state]); + printk(KERN_INFO "No tasks found: %s\n", buf); + i = 0; + for (state = 0; state < N_RCU_BOOSTER_STATE; state++) + i += sprintf(&buf[i], " %ld", rbs_qs_stats[1][state]); + printk(KERN_INFO "Tasks found: %s\n", buf); + i = 0; + for (state = 0; state < N_RCU_BOOSTER_STATE; state++) + i += sprintf(&buf[i], " %ld", sum.rbs_qw_stats[state]); + printk(KERN_INFO "Awaken opportunities: %s\n", buf); + /* Go away and don't come back for awhile. */ lastprint = xtime.tv_sec; @@ -293,6 +333,8 @@ static void init_rcu_boost_early(void) for (j = 0; j < N_RCU_BOOST_DAT_EVENTS; j++) for (k = 0; k < N_RCU_BOOST_STATE; k++) rbdp[i].rbs_stats[j][k] = 0; + for (j = 0; j < N_RCU_BOOSTER_STATE; j++) + rbdp[i].rbs_qw_stats[j] = 0; } #endif /* #ifdef CONFIG_PREEMPT_RCU_BOOST_STATS */ } @@ -378,10 +420,11 @@ static void rcu_unboost_prio(struct task /* * Boost all of the RCU-reader tasks on the specified list. */ -static void rcu_boost_one_reader_list(struct rcu_boost_dat *rbdp) +static int rcu_boost_one_reader_list(struct rcu_boost_dat *rbdp) { LIST_HEAD(list); unsigned long flags; + int retval = 0; struct task_struct *taskp; /* @@ -397,6 +440,7 @@ static void rcu_boost_one_reader_list(st list_splice_init(&rbdp->rbs_toboost, &list); list_splice_init(&rbdp->rbs_boosted, &list); while (!list_empty(&list)) { + retval = 1; /* * Pause for a bit before boosting each task. @@ -438,6 +482,36 @@ static void rcu_boost_one_reader_list(st list_add_tail(&taskp->rcub_entry, &rbdp->rbs_boosted); } spin_unlock_irqrestore(&rbdp->rbs_lock, flags); + return retval; +} + +/* + * Examine state to see if it is time to sleep. + */ +static void rcu_booster_try_sleep(int yo) +{ + spin_lock(&rcu_boost_quiesce_lock); + if (rcu_booster_quiesce_state < 0 || + rcu_booster_quiesce_state >= N_RCU_BOOSTER_STATE) + rcu_booster_quiesce_state = RCU_BOOST_INVALID; + rbs_qs_stats[yo != 0][rcu_booster_quiesce_state]++; + if (yo != 0) { + rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE; + } else { + if (rcu_booster_quiesce_state == RCU_BOOSTER_ACTIVE) { + rcu_booster_quiesce_state = RCU_BOOSTER_DROWSY; + } else if (rcu_booster_quiesce_state == RCU_BOOSTER_DROWSY) { + rcu_booster_quiesce_state = RCU_BOOSTER_SLEEPING; + spin_unlock(&rcu_boost_quiesce_lock); + __wait_event(rcu_booster_quiesce_wq, + rcu_booster_quiesce_state == + RCU_BOOSTER_ACTIVE); + spin_lock(&rcu_boost_quiesce_lock); + } else { + rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE; + } + } + spin_unlock(&rcu_boost_quiesce_lock); } /* @@ -448,15 +522,21 @@ static int rcu_booster(void *arg) { int cpu; struct sched_param sp = { .sched_priority = PREEMPT_RCU_BOOSTER_PRIO, }; + int yo = 0; sched_setscheduler(current, SCHED_RR, &sp); current->flags |= PF_NOFREEZE; + init_waitqueue_head(&rcu_booster_quiesce_wq); do { /* Advance the lists of tasks. */ rcu_boost_idx = (rcu_boost_idx + 1) % RCU_BOOST_ELEMENTS; + if (rcu_boost_idx == 0) { + rcu_booster_try_sleep(yo); + yo = 0; + } for_each_possible_cpu(cpu) { /* @@ -469,7 +549,7 @@ static int rcu_booster(void *arg) * nothing. */ - rcu_boost_one_reader_list(rcu_rbd_boosting(cpu)); + yo += rcu_boost_one_reader_list(rcu_rbd_boosting(cpu)); /* * Large SMP systems may need to sleep sometimes @@ -511,6 +591,23 @@ void init_rcu_boost_late(void) } /* + * Awaken the RCU priority booster if neecessary. + */ +static void rcu_preempt_wake(struct rcu_boost_dat *rbdp) +{ + spin_lock(&rcu_boost_quiesce_lock); + if (rcu_booster_quiesce_state >= N_RCU_BOOSTER_STATE) + rcu_booster_quiesce_state = RCU_BOOSTER_INVALID; + rbdp->rbs_qw_stats[rcu_booster_quiesce_state]++; + if (rcu_booster_quiesce_state == RCU_BOOSTER_SLEEPING) { + rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE; + wake_up(&rcu_booster_quiesce_wq); + } else if (rcu_booster_quiesce_state != RCU_BOOSTER_ACTIVE) + rcu_booster_quiesce_state = RCU_BOOSTER_ACTIVE; + spin_unlock(&rcu_boost_quiesce_lock); +} + +/* * Update task's RCU-boost state to reflect blocking in RCU read-side * critical section, so that the RCU-boost task can find it in case it * later needs its priority boosted. @@ -532,6 +629,7 @@ void __rcu_preempt_boost(void) } spin_lock(&rbdp->rbs_lock); rbdp->rbs_blocked++; + rcu_preempt_wake(rbdp); /* * Update state. We hold the lock and aren't yet on the list, - To unsubscribe from this list: send the line "unsubscribe linux-rt-users" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html