On 07.01.2020 06:24, Martin K. Petersen wrote: > > Kirill, > > Sorry, the holiday break got in the way. > >> But I also worry about NOFALLBACK case. There are possible block >> devices, which support write zeroes, but they can't allocate blocks >> (block allocation are just not appliable for them, say, these are all >> ordinary hdd). > > Correct. We shouldn't go down this path unless a device is thinly > provisioned (i.e. max_discard_sectors > 0). (I assumed it is a typo, and you mean max_allocate_sectors like bellow). >> But won't it be a good thing to return EOPNOTSUPP right from >> __blkdev_issue_write_zeroes() in case of block device can't allocate >> blocks (q->limits.write_zeroes_can_allocate in the patch below)? Here >> is just a way to underline block devices, which support write zeroes, >> but allocation of blocks is meant nothing for them (wasting of time). > > I don't like "write_zeroes_can_allocate" because that makes assumptions > about WRITE ZEROES being the command of choice. I suggest we call it > "max_allocate_sectors" to mirror "max_discard_sectors". I.e. put > emphasis on the semantic operation and not the plumbing. Hm. Do you mean "bool max_allocate_sectors" or "unsigned int max_allocate_sectors"? In the second case we should make all the q->limits.max_write_zeroes_sectors dereferencing as switches like the below (this is a partial patch and only several of places are converted to switches as examples): diff --git a/block/blk-core.c b/block/blk-core.c index 50a5de025d5e..4c45417838f7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -978,7 +978,8 @@ generic_make_request_checks(struct bio *bio) goto not_supported; break; case REQ_OP_WRITE_ZEROES: - if (!q->limits.max_write_zeroes_sectors) + if (!blk_queue_get_max_write_zeroes_sectors(q, + bio->bi_opf & REQ_NOZERO)) goto not_supported; break; default: @@ -1250,10 +1251,10 @@ EXPORT_SYMBOL(submit_bio); static int blk_cloned_rq_check_limits(struct request_queue *q, struct request *rq) { - if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, req_op(rq))) { + if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) { printk(KERN_ERR "%s: over max size limit. (%u > %u)\n", __func__, blk_rq_sectors(rq), - blk_queue_get_max_sectors(q, req_op(rq))); + blk_queue_get_max_sectors(q, rq->cmd_flags)); return -EIO; } diff --git a/block/blk-merge.c b/block/blk-merge.c index 347782a24a35..0cdf7d0386c8 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -105,15 +105,22 @@ static struct bio *blk_bio_discard_split(struct request_queue *q, static struct bio *blk_bio_write_zeroes_split(struct request_queue *q, struct bio *bio, struct bio_set *bs, unsigned *nsegs) { + unsigned int max_sectors; + + if (bio->bi_opf & REQ_NOZERO) + max_sectors = q->limits.max_allocate_sectors; + else + max_sectors = q->limits.max_write_zeroes_sectors; + *nsegs = 0; - if (!q->limits.max_write_zeroes_sectors) + if (!max_sectors) return NULL; - if (bio_sectors(bio) <= q->limits.max_write_zeroes_sectors) + if (bio_sectors(bio) <= max_sectors) return NULL; - return bio_split(bio, q->limits.max_write_zeroes_sectors, GFP_NOIO, bs); + return bio_split(bio, max_sectors, GFP_NOIO, bs); } static struct bio *blk_bio_write_same_split(struct request_queue *q, diff --git a/block/blk-settings.c b/block/blk-settings.c index 5f6dcc7a47bd..3f68a60cb196 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -506,6 +506,8 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b, b->max_write_same_sectors); t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors, b->max_write_zeroes_sectors); + t->max_allocate_sectors = min(t->max_allocate_sectors, + b->max_allocate_sectors); t->bounce_pfn = min_not_zero(t->bounce_pfn, b->bounce_pfn); t->seg_boundary_mask = min_not_zero(t->seg_boundary_mask, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9e3cd3394dd6..6219604a0c12 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -336,6 +336,7 @@ struct queue_limits { unsigned int max_hw_discard_sectors; unsigned int max_write_same_sectors; unsigned int max_write_zeroes_sectors; + unsigned int max_allocate_sectors; unsigned int discard_granularity; unsigned int discard_alignment; @@ -989,9 +989,19 @@ static inline struct bio_vec req_bvec(struct request *rq) return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); } +static inline unsigned int blk_queue_get_max_write_zeroes_sectors( + struct request_queue *q, bool allocate) +{ + if (allocate) + return q->limits.max_allocate_sectors; + return q->limits.max_write_zeroes_sectors; +} + static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, - int op) + unsigned int op_flags) { + int op = op_flags & REQ_OP_MASK; + if (unlikely(op == REQ_OP_DISCARD || op == REQ_OP_SECURE_ERASE)) return min(q->limits.max_discard_sectors, UINT_MAX >> SECTOR_SHIFT); @@ -1000,7 +1010,8 @@ static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, return q->limits.max_write_same_sectors; if (unlikely(op == REQ_OP_WRITE_ZEROES)) - return q->limits.max_write_zeroes_sectors; + return blk_queue_get_max_write_zeroes_sectors(q, + op_flags & REQ_NOZERO); return q->limits.max_sectors; }