Hi Vojtěch, Have you tested the patch below in SLE12-* when bcache is backed by md raid5/6? FYI: I was compared the drivers/md/bcache/io.c functions in the various branches here: https://github.com/openSUSE/kernel I compared the presence of bch_generic_make_request() (which the patch below removes). It looks like the branch SLE12-SP2 has the patch, but version before SLE12-SP2 and openSUSE-* do not (as they still have bch_generic_make_request). Since the patch does exist in SLE12-SP2, I'm guessing that is been tested, though I am curious if it has been tested specifically when being backed by md raid5/6 so that queue->limits->partial_stripes_expensive is nonzero. If you have and it is stable, then I want to get it to Kent and Jens for upstream integration. Thanks! -Eric -- Eric Wheeler, President eWheeler, Inc. dba Global Linux Security 888-LINUX26 (888-546-8926) Fax: 503-716-3878 PO Box 25107 www.GlobalLinuxSecurity.pro Linux since 1996! Portland, OR 97298 On Thu, 7 Jan 2016, Eric Wheeler wrote: > > From: Kent Overstreet Wed, 12 Aug 2015 00:07:13 -0700 > > The bcache driver has always accepted arbitrarily large bios and split > > them internally. Now that every driver must accept arbitrarily large > > bios this code isn't nessecary anymore. > > Hi Kent, > > Does your patch below make any kernel version assumptions about bio > splitting? That doesn't appear so based on the commit message, but > thought I'd double-check. > > If there is a kernel dependency, then, how far back should this be safe? > I'd like to apply to 3.17-rc1 like the other stable commits and stamp it > with Cc: stable@ so it will merge forward into 3.18 and onward if it fixes > my problem below: > > This question comes up because I'm getting the following > (discard-related?) backtrace and would like to try the patch. If it fixes > the problem I'll stamp it with stable and get it to Jens. Note that > bch_generic_make_request() doesn't even exist after this patch, so this > backtrace would at least be simplified and possibly fixed: > > [ 294.059255] [<ffffffffa049a20d>] bch_generic_make_request+0x15d/0x1f0 [bcache] > [ 294.059578] [<ffffffffa049a3b9>] __bch_submit_bbio+0x79/0x80 [bcache] > [ 294.059794] [<ffffffffa049a3eb>] bch_submit_bbio+0x2b/0x30 [bcache] > [ 294.059983] [<ffffffffa049fabb>] bch_data_insert_start+0xcb/0x5a0 [bcache] > [ 294.060169] [<ffffffffa049ffe1>] bch_data_insert+0x51/0xc0 [bcache] > [ 294.060354] [<ffffffffa04a0cc4>] cached_dev_make_request+0xbe4/0xdf0 [bcache] > [ 294.060652] [<ffffffff81317c20>] generic_make_request+0xe0/0x130 > [ 294.060830] [<ffffffff81317ce7>] submit_bio+0x77/0x150 > [ 294.061012] [<ffffffff81312906>] ? bio_alloc_bioset+0x1d6/0x330 > [ 294.061194] [<ffffffffa04cef90>] ? le64_dec+0x20/0x20 [dm_persistent_data] > [ 294.061377] [<ffffffffa04e49be>] __blkdev_issue_discard_async.constprop.59+0x17e/0x210 [dm_thin_pool] > [ 294.061695] [<ffffffffa04e55dc>] process_prepared_discard_passdown+0x9c/0x230 [dm_thin_pool] > [ 294.062007] [<ffffffff811ac73f>] ? mempool_free+0x2f/0x90 > [ 294.062207] [<ffffffffa04e0772>] process_prepared+0x92/0xc0 [dm_thin_pool] > [ 294.062404] [<ffffffffa04e5f78>] do_worker+0xb8/0x880 [dm_thin_pool] > [ 294.062606] [<ffffffff81014693>] ? __switch_to+0x1e3/0x580 > [ 294.062796] [<ffffffff810dc71c>] ? put_prev_task_fair+0x2c/0x40 > [ 294.062989] [<ffffffff810ba18d>] process_one_work+0x14d/0x420 > [ 294.063171] [<ffffffff810ba952>] worker_thread+0x112/0x520 > [ 294.063355] [<ffffffff810ba840>] ? rescuer_thread+0x3e0/0x3e0 > [ 294.063560] [<ffffffff810c06a8>] kthread+0xd8/0xf0 > [ 294.063737] [<ffffffff810c05d0>] ? kthread_create_on_node+0x1b0/0x1b0 > [ 294.063943] [<ffffffff816e4aa2>] ret_from_fork+0x42/0x70 > [ 294.064133] [<ffffffff810c05d0>] ? kthread_create_on_node+0x1b0/0x1b0 > > > Thanks! > > -Eric > > > Cc: linux-bcache@xxxxxxxxxxxxxxx > > Signed-off-by: Kent Overstreet <kent.overstreet@xxxxxxxxx> > > [dpark: add more description in commit message] > > Signed-off-by: Dongsu Park <dpark@xxxxxxxxxx> > > Signed-off-by: Ming Lin <ming.l@xxxxxxxxxxxxxxx> > > --- > > drivers/md/bcache/bcache.h | 18 -------- > > drivers/md/bcache/io.c | 101 +----------------------------------------- > > drivers/md/bcache/journal.c | 4 +- > > drivers/md/bcache/request.c | 16 +++---- > > drivers/md/bcache/super.c | 32 +------------ > > drivers/md/bcache/util.h | 5 ++- > > drivers/md/bcache/writeback.c | 4 +- > > 7 files changed, 18 insertions(+), 162 deletions(-) > > > > diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h > > index 04f7bc2..6b420a5 100644 > > --- a/drivers/md/bcache/bcache.h > > +++ b/drivers/md/bcache/bcache.h > > @@ -243,19 +243,6 @@ struct keybuf { > > DECLARE_ARRAY_ALLOCATOR(struct keybuf_key, freelist, KEYBUF_NR); > > }; > > > > -struct bio_split_pool { > > - struct bio_set *bio_split; > > - mempool_t *bio_split_hook; > > -}; > > - > > -struct bio_split_hook { > > - struct closure cl; > > - struct bio_split_pool *p; > > - struct bio *bio; > > - bio_end_io_t *bi_end_io; > > - void *bi_private; > > -}; > > - > > struct bcache_device { > > struct closure cl; > > > > @@ -288,8 +275,6 @@ struct bcache_device { > > int (*cache_miss)(struct btree *, struct search *, > > struct bio *, unsigned); > > int (*ioctl) (struct bcache_device *, fmode_t, unsigned, unsigned long); > > - > > - struct bio_split_pool bio_split_hook; > > }; > > > > struct io { > > @@ -454,8 +439,6 @@ struct cache { > > atomic_long_t meta_sectors_written; > > atomic_long_t btree_sectors_written; > > atomic_long_t sectors_written; > > - > > - struct bio_split_pool bio_split_hook; > > }; > > > > struct gc_stat { > > @@ -873,7 +856,6 @@ void bch_bbio_endio(struct cache_set *, struct bio *, int, const char *); > > void bch_bbio_free(struct bio *, struct cache_set *); > > struct bio *bch_bbio_alloc(struct cache_set *); > > > > -void bch_generic_make_request(struct bio *, struct bio_split_pool *); > > void __bch_submit_bbio(struct bio *, struct cache_set *); > > void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); > > > > diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c > > index bf6a9ca..86a0bb8 100644 > > --- a/drivers/md/bcache/io.c > > +++ b/drivers/md/bcache/io.c > > @@ -11,105 +11,6 @@ > > > > #include <linux/blkdev.h> > > > > -static unsigned bch_bio_max_sectors(struct bio *bio) > > -{ > > - struct request_queue *q = bdev_get_queue(bio->bi_bdev); > > - struct bio_vec bv; > > - struct bvec_iter iter; > > - unsigned ret = 0, seg = 0; > > - > > - if (bio->bi_rw & REQ_DISCARD) > > - return min(bio_sectors(bio), q->limits.max_discard_sectors); > > - > > - bio_for_each_segment(bv, bio, iter) { > > - struct bvec_merge_data bvm = { > > - .bi_bdev = bio->bi_bdev, > > - .bi_sector = bio->bi_iter.bi_sector, > > - .bi_size = ret << 9, > > - .bi_rw = bio->bi_rw, > > - }; > > - > > - if (seg == min_t(unsigned, BIO_MAX_PAGES, > > - queue_max_segments(q))) > > - break; > > - > > - if (q->merge_bvec_fn && > > - q->merge_bvec_fn(q, &bvm, &bv) < (int) bv.bv_len) > > - break; > > - > > - seg++; > > - ret += bv.bv_len >> 9; > > - } > > - > > - ret = min(ret, queue_max_sectors(q)); > > - > > - WARN_ON(!ret); > > - ret = max_t(int, ret, bio_iovec(bio).bv_len >> 9); > > - > > - return ret; > > -} > > - > > -static void bch_bio_submit_split_done(struct closure *cl) > > -{ > > - struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); > > - > > - s->bio->bi_end_io = s->bi_end_io; > > - s->bio->bi_private = s->bi_private; > > - bio_endio(s->bio, 0); > > - > > - closure_debug_destroy(&s->cl); > > - mempool_free(s, s->p->bio_split_hook); > > -} > > - > > -static void bch_bio_submit_split_endio(struct bio *bio, int error) > > -{ > > - struct closure *cl = bio->bi_private; > > - struct bio_split_hook *s = container_of(cl, struct bio_split_hook, cl); > > - > > - if (error) > > - clear_bit(BIO_UPTODATE, &s->bio->bi_flags); > > - > > - bio_put(bio); > > - closure_put(cl); > > -} > > - > > -void bch_generic_make_request(struct bio *bio, struct bio_split_pool *p) > > -{ > > - struct bio_split_hook *s; > > - struct bio *n; > > - > > - if (!bio_has_data(bio) && !(bio->bi_rw & REQ_DISCARD)) > > - goto submit; > > - > > - if (bio_sectors(bio) <= bch_bio_max_sectors(bio)) > > - goto submit; > > - > > - s = mempool_alloc(p->bio_split_hook, GFP_NOIO); > > - closure_init(&s->cl, NULL); > > - > > - s->bio = bio; > > - s->p = p; > > - s->bi_end_io = bio->bi_end_io; > > - s->bi_private = bio->bi_private; > > - bio_get(bio); > > - > > - do { > > - n = bio_next_split(bio, bch_bio_max_sectors(bio), > > - GFP_NOIO, s->p->bio_split); > > - > > - n->bi_end_io = bch_bio_submit_split_endio; > > - n->bi_private = &s->cl; > > - > > - closure_get(&s->cl); > > - generic_make_request(n); > > - } while (n != bio); > > - > > - continue_at(&s->cl, bch_bio_submit_split_done, NULL); > > - return; > > -submit: > > - generic_make_request(bio); > > -} > > - > > /* Bios with headers */ > > > > void bch_bbio_free(struct bio *bio, struct cache_set *c) > > @@ -139,7 +40,7 @@ void __bch_submit_bbio(struct bio *bio, struct cache_set *c) > > bio->bi_bdev = PTR_CACHE(c, &b->key, 0)->bdev; > > > > b->submit_time_us = local_clock_us(); > > - closure_bio_submit(bio, bio->bi_private, PTR_CACHE(c, &b->key, 0)); > > + closure_bio_submit(bio, bio->bi_private); > > } > > > > void bch_submit_bbio(struct bio *bio, struct cache_set *c, > > diff --git a/drivers/md/bcache/journal.c b/drivers/md/bcache/journal.c > > index 418607a..727ca9b 100644 > > --- a/drivers/md/bcache/journal.c > > +++ b/drivers/md/bcache/journal.c > > @@ -61,7 +61,7 @@ reread: left = ca->sb.bucket_size - offset; > > bio->bi_private = &cl; > > bch_bio_map(bio, data); > > > > - closure_bio_submit(bio, &cl, ca); > > + closure_bio_submit(bio, &cl); > > closure_sync(&cl); > > > > /* This function could be simpler now since we no longer write > > @@ -648,7 +648,7 @@ static void journal_write_unlocked(struct closure *cl) > > spin_unlock(&c->journal.lock); > > > > while ((bio = bio_list_pop(&list))) > > - closure_bio_submit(bio, cl, c->cache[0]); > > + closure_bio_submit(bio, cl); > > > > continue_at(cl, journal_write_done, NULL); > > } > > diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c > > index f292790..ab093a8 100644 > > --- a/drivers/md/bcache/request.c > > +++ b/drivers/md/bcache/request.c > > @@ -718,7 +718,7 @@ static void cached_dev_read_error(struct closure *cl) > > > > /* XXX: invalidate cache */ > > > > - closure_bio_submit(bio, cl, s->d); > > + closure_bio_submit(bio, cl); > > } > > > > continue_at(cl, cached_dev_cache_miss_done, NULL); > > @@ -841,7 +841,7 @@ static int cached_dev_cache_miss(struct btree *b, struct search *s, > > s->cache_miss = miss; > > s->iop.bio = cache_bio; > > bio_get(cache_bio); > > - closure_bio_submit(cache_bio, &s->cl, s->d); > > + closure_bio_submit(cache_bio, &s->cl); > > > > return ret; > > out_put: > > @@ -849,7 +849,7 @@ out_put: > > out_submit: > > miss->bi_end_io = request_endio; > > miss->bi_private = &s->cl; > > - closure_bio_submit(miss, &s->cl, s->d); > > + closure_bio_submit(miss, &s->cl); > > return ret; > > } > > > > @@ -914,7 +914,7 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) > > > > if (!(bio->bi_rw & REQ_DISCARD) || > > blk_queue_discard(bdev_get_queue(dc->bdev))) > > - closure_bio_submit(bio, cl, s->d); > > + closure_bio_submit(bio, cl); > > } else if (s->iop.writeback) { > > bch_writeback_add(dc); > > s->iop.bio = bio; > > @@ -929,12 +929,12 @@ static void cached_dev_write(struct cached_dev *dc, struct search *s) > > flush->bi_end_io = request_endio; > > flush->bi_private = cl; > > > > - closure_bio_submit(flush, cl, s->d); > > + closure_bio_submit(flush, cl); > > } > > } else { > > s->iop.bio = bio_clone_fast(bio, GFP_NOIO, dc->disk.bio_split); > > > > - closure_bio_submit(bio, cl, s->d); > > + closure_bio_submit(bio, cl); > > } > > > > closure_call(&s->iop.cl, bch_data_insert, NULL, cl); > > @@ -950,7 +950,7 @@ static void cached_dev_nodata(struct closure *cl) > > bch_journal_meta(s->iop.c, cl); > > > > /* If it's a flush, we send the flush to the backing device too */ > > - closure_bio_submit(bio, cl, s->d); > > + closure_bio_submit(bio, cl); > > > > continue_at(cl, cached_dev_bio_complete, NULL); > > } > > @@ -994,7 +994,7 @@ static void cached_dev_make_request(struct request_queue *q, struct bio *bio) > > !blk_queue_discard(bdev_get_queue(dc->bdev))) > > bio_endio(bio, 0); > > else > > - bch_generic_make_request(bio, &d->bio_split_hook); > > + generic_make_request(bio); > > } > > } > > > > diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c > > index 94980bf..db70c9e 100644 > > --- a/drivers/md/bcache/super.c > > +++ b/drivers/md/bcache/super.c > > @@ -59,29 +59,6 @@ struct workqueue_struct *bcache_wq; > > > > #define BTREE_MAX_PAGES (256 * 1024 / PAGE_SIZE) > > > > -static void bio_split_pool_free(struct bio_split_pool *p) > > -{ > > - if (p->bio_split_hook) > > - mempool_destroy(p->bio_split_hook); > > - > > - if (p->bio_split) > > - bioset_free(p->bio_split); > > -} > > - > > -static int bio_split_pool_init(struct bio_split_pool *p) > > -{ > > - p->bio_split = bioset_create(4, 0); > > - if (!p->bio_split) > > - return -ENOMEM; > > - > > - p->bio_split_hook = mempool_create_kmalloc_pool(4, > > - sizeof(struct bio_split_hook)); > > - if (!p->bio_split_hook) > > - return -ENOMEM; > > - > > - return 0; > > -} > > - > > /* Superblock */ > > > > static const char *read_super(struct cache_sb *sb, struct block_device *bdev, > > @@ -537,7 +514,7 @@ static void prio_io(struct cache *ca, uint64_t bucket, unsigned long rw) > > bio->bi_private = ca; > > bch_bio_map(bio, ca->disk_buckets); > > > > - closure_bio_submit(bio, &ca->prio, ca); > > + closure_bio_submit(bio, &ca->prio); > > closure_sync(cl); > > } > > > > @@ -757,7 +734,6 @@ static void bcache_device_free(struct bcache_device *d) > > put_disk(d->disk); > > } > > > > - bio_split_pool_free(&d->bio_split_hook); > > if (d->bio_split) > > bioset_free(d->bio_split); > > kvfree(d->full_dirty_stripes); > > @@ -804,7 +780,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size, > > return minor; > > > > if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) || > > - bio_split_pool_init(&d->bio_split_hook) || > > !(d->disk = alloc_disk(1))) { > > ida_simple_remove(&bcache_minor, minor); > > return -ENOMEM; > > @@ -1793,8 +1768,6 @@ void bch_cache_release(struct kobject *kobj) > > ca->set->cache[ca->sb.nr_this_dev] = NULL; > > } > > > > - bio_split_pool_free(&ca->bio_split_hook); > > - > > free_pages((unsigned long) ca->disk_buckets, ilog2(bucket_pages(ca))); > > kfree(ca->prio_buckets); > > vfree(ca->buckets); > > @@ -1839,8 +1812,7 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca) > > ca->sb.nbuckets)) || > > !(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) * > > 2, GFP_KERNEL)) || > > - !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) || > > - bio_split_pool_init(&ca->bio_split_hook)) > > + !(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca))) > > return -ENOMEM; > > > > ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca); > > diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h > > index 1d04c48..cf2cbc2 100644 > > --- a/drivers/md/bcache/util.h > > +++ b/drivers/md/bcache/util.h > > @@ -4,6 +4,7 @@ > > > > #include <linux/blkdev.h> > > #include <linux/errno.h> > > +#include <linux/blkdev.h> > > #include <linux/kernel.h> > > #include <linux/llist.h> > > #include <linux/ratelimit.h> > > @@ -570,10 +571,10 @@ static inline sector_t bdev_sectors(struct block_device *bdev) > > return bdev->bd_inode->i_size >> 9; > > } > > > > -#define closure_bio_submit(bio, cl, dev) \ > > +#define closure_bio_submit(bio, cl) \ > > do { \ > > closure_get(cl); \ > > - bch_generic_make_request(bio, &(dev)->bio_split_hook); \ > > + generic_make_request(bio); \ > > } while (0) > > > > uint64_t bch_crc64_update(uint64_t, const void *, size_t); > > diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c > > index f1986bc..ca38362 100644 > > --- a/drivers/md/bcache/writeback.c > > +++ b/drivers/md/bcache/writeback.c > > @@ -188,7 +188,7 @@ static void write_dirty(struct closure *cl) > > io->bio.bi_bdev = io->dc->bdev; > > io->bio.bi_end_io = dirty_endio; > > > > - closure_bio_submit(&io->bio, cl, &io->dc->disk); > > + closure_bio_submit(&io->bio, cl); > > > > continue_at(cl, write_dirty_finish, system_wq); > > } > > @@ -208,7 +208,7 @@ static void read_dirty_submit(struct closure *cl) > > { > > struct dirty_io *io = container_of(cl, struct dirty_io, cl); > > > > - closure_bio_submit(&io->bio, cl, &io->dc->disk); > > + closure_bio_submit(&io->bio, cl); > > > > continue_at(cl, write_dirty, system_wq); > > } > > -- > > 2.1.4 > > > > -- > > To unsubscribe from this list: send the line "unsubscribe linux-bcache" in > > the body of a message to majordomo@xxxxxxxxxxxxxxx > > More majordomo info at http://vger.kernel.org/majordomo-info.html > > > -- > To unsubscribe from this list: send the line "unsubscribe linux-bcache" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html >