On Mon, 24 Oct 2016, Kent Overstreet wrote: > On Sun, Oct 23, 2016 at 06:57:26PM -0700, Davidlohr Bueso wrote: > > On Wed, 19 Oct 2016, Peter Zijlstra wrote: > > > > > Subject: sched: Better explain sleep/wakeup > > > From: Peter Zijlstra <peterz@xxxxxxxxxxxxx> > > > Date: Wed Oct 19 15:45:27 CEST 2016 > > > > > > There were a few questions wrt how sleep-wakeup works. Try and explain > > > it more. > > > > > > Requested-by: Will Deacon <will.deacon@xxxxxxx> > > > Signed-off-by: Peter Zijlstra (Intel) <peterz@xxxxxxxxxxxxx> > > > --- > > > include/linux/sched.h | 52 ++++++++++++++++++++++++++++++++++---------------- > > > kernel/sched/core.c | 15 +++++++------- > > > 2 files changed, 44 insertions(+), 23 deletions(-) > > > > > > --- a/include/linux/sched.h > > > +++ b/include/linux/sched.h > > > @@ -262,20 +262,9 @@ extern char ___assert_task_state[1 - 2*! > > > #define set_task_state(tsk, state_value) \ > > > do { \ > > > (tsk)->task_state_change = _THIS_IP_; \ > > > - smp_store_mb((tsk)->state, (state_value)); \ > > > + smp_store_mb((tsk)->state, (state_value)); \ > > > } while (0) > > > > > > -/* > > > - * set_current_state() includes a barrier so that the write of current->state > > > - * is correctly serialised wrt the caller's subsequent test of whether to > > > - * actually sleep: > > > - * > > > - * set_current_state(TASK_UNINTERRUPTIBLE); > > > - * if (do_i_need_to_sleep()) > > > - * schedule(); > > > - * > > > - * If the caller does not need such serialisation then use __set_current_state() > > > - */ > > > #define __set_current_state(state_value) \ > > > do { \ > > > current->task_state_change = _THIS_IP_; \ > > > @@ -284,11 +273,19 @@ extern char ___assert_task_state[1 - 2*! > > > #define set_current_state(state_value) \ > > > do { \ > > > current->task_state_change = _THIS_IP_; \ > > > - smp_store_mb(current->state, (state_value)); \ > > > + smp_store_mb(current->state, (state_value)); \ > > > } while (0) > > > > > > #else > > > > > > +/* > > > + * @tsk had better be current, or you get to keep the pieces. > > > > That reminds me we were getting rid of the set_task_state() calls. Bcache was > > pending, being only user in the kernel that doesn't actually use current; but > > instead breaks newly (yet blocked/uninterruptible) created garbage collection > > kthread. I cannot figure out why this is done (ie purposely accounting the > > load avg. Furthermore gc kicks in in very specific scenarios obviously, such > > as as by the allocator task, so I don't see why bcache gc should want to be > > interruptible. > > > > Kent, Jens, can we get rid of this? > > Here's a patch that just fixes the way gc gets woken up. Eric, you want to take > this? Sure, I'll put it up with my -rc2 pull request to Jens. A couple of sanity checks (for my understanding at least): Why does bch_data_insert_start() no longer need to call set_gc_sectors(op->c) now that bch_cache_set_alloc and bch_gc_thread do? Does bch_cache_set_alloc() even need to call set_gc_sectors since bch_gc_thread() does before calling bch_btree_gc? Also I'm curious, why change invalidate_needs_gc from a bitfield? -Eric > > -- >8 -- > Subject: [PATCH] bcache: Make gc wakeup saner > > This lets us ditch a set_task_state() call. > > Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx> > --- > drivers/md/bcache/bcache.h | 4 ++-- > drivers/md/bcache/btree.c | 39 ++++++++++++++++++++------------------- > drivers/md/bcache/btree.h | 3 +-- > drivers/md/bcache/request.c | 4 +--- > drivers/md/bcache/super.c | 2 ++ > 5 files changed, 26 insertions(+), 26 deletions(-) > > diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h > index 6b420a55c7..c3ea03c9a1 100644 > --- a/drivers/md/bcache/bcache.h > +++ b/drivers/md/bcache/bcache.h > @@ -425,7 +425,7 @@ struct cache { > * until a gc finishes - otherwise we could pointlessly burn a ton of > * cpu > */ > - unsigned invalidate_needs_gc:1; > + unsigned invalidate_needs_gc; > > bool discard; /* Get rid of? */ > > @@ -593,8 +593,8 @@ struct cache_set { > > /* Counts how many sectors bio_insert has added to the cache */ > atomic_t sectors_to_gc; > + wait_queue_head_t gc_wait; > > - wait_queue_head_t moving_gc_wait; > struct keybuf moving_gc_keys; > /* Number of moving GC bios in flight */ > struct semaphore moving_in_flight; > diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c > index 81d3db40cd..2efdce0724 100644 > --- a/drivers/md/bcache/btree.c > +++ b/drivers/md/bcache/btree.c > @@ -1757,32 +1757,34 @@ static void bch_btree_gc(struct cache_set *c) > bch_moving_gc(c); > } > > -static int bch_gc_thread(void *arg) > +static bool gc_should_run(struct cache_set *c) > { > - struct cache_set *c = arg; > struct cache *ca; > unsigned i; > > - while (1) { > -again: > - bch_btree_gc(c); > + for_each_cache(ca, c, i) > + if (ca->invalidate_needs_gc) > + return true; > > - set_current_state(TASK_INTERRUPTIBLE); > - if (kthread_should_stop()) > - break; > + if (atomic_read(&c->sectors_to_gc) < 0) > + return true; > > - mutex_lock(&c->bucket_lock); > + return false; > +} > > - for_each_cache(ca, c, i) > - if (ca->invalidate_needs_gc) { > - mutex_unlock(&c->bucket_lock); > - set_current_state(TASK_RUNNING); > - goto again; > - } > +static int bch_gc_thread(void *arg) > +{ > + struct cache_set *c = arg; > > - mutex_unlock(&c->bucket_lock); > + while (1) { > + wait_event_interruptible(c->gc_wait, > + kthread_should_stop() || gc_should_run(c)); > > - schedule(); > + if (kthread_should_stop()) > + break; > + > + set_gc_sectors(c); > + bch_btree_gc(c); > } > > return 0; > @@ -1790,11 +1792,10 @@ static int bch_gc_thread(void *arg) > > int bch_gc_thread_start(struct cache_set *c) > { > - c->gc_thread = kthread_create(bch_gc_thread, c, "bcache_gc"); > + c->gc_thread = kthread_run(bch_gc_thread, c, "bcache_gc"); > if (IS_ERR(c->gc_thread)) > return PTR_ERR(c->gc_thread); > > - set_task_state(c->gc_thread, TASK_INTERRUPTIBLE); > return 0; > } > > diff --git a/drivers/md/bcache/btree.h b/drivers/md/bcache/btree.h > index 5c391fa01b..9b80417cd5 100644 > --- a/drivers/md/bcache/btree.h > +++ b/drivers/md/bcache/btree.h > @@ -260,8 +260,7 @@ void bch_initial_mark_key(struct cache_set *, int, struct bkey *); > > static inline void wake_up_gc(struct cache_set *c) > { > - if (c->gc_thread) > - wake_up_process(c->gc_thread); > + wake_up(&c->gc_wait); > } > > #define MAP_DONE 0 > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c > index 40ffe5e424..a37c1776f2 100644 > --- a/drivers/md/bcache/request.c > +++ b/drivers/md/bcache/request.c > @@ -196,10 +196,8 @@ static void bch_data_insert_start(struct closure *cl) > struct data_insert_op *op = container_of(cl, struct data_insert_op, cl); > struct bio *bio = op->bio, *n; > > - if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) { > - set_gc_sectors(op->c); > + if (atomic_sub_return(bio_sectors(bio), &op->c->sectors_to_gc) < 0) > wake_up_gc(op->c); > - } > > if (op->bypass) > return bch_data_invalidate(cl); > diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c > index 849ad441cd..66669c8f41 100644 > --- a/drivers/md/bcache/super.c > +++ b/drivers/md/bcache/super.c > @@ -1491,6 +1491,7 @@ struct cache_set *bch_cache_set_alloc(struct cache_sb *sb) > mutex_init(&c->bucket_lock); > init_waitqueue_head(&c->btree_cache_wait); > init_waitqueue_head(&c->bucket_wait); > + init_waitqueue_head(&c->gc_wait); > sema_init(&c->uuid_write_mutex, 1); > > spin_lock_init(&c->btree_gc_time.lock); > @@ -1550,6 +1551,7 @@ static void run_cache_set(struct cache_set *c) > > for_each_cache(ca, c, i) > c->nbuckets += ca->sb.nbuckets; > + set_gc_sectors(c); > > if (CACHE_SYNC(&c->sb)) { > LIST_HEAD(journal); > -- > 2.9.3 > > -- > To unsubscribe from this list: send the line "unsubscribe linux-bcache" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > -- To unsubscribe from this list: send the line "unsubscribe linux-bcache" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html