On Thu, Oct 24, 2024 at 06:56:22AM +0200, Christoph Hellwig wrote: > On Wed, Oct 23, 2024 at 03:15:19PM -0600, Uday Shankar wrote: > > @@ -600,9 +600,7 @@ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) > > if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len) > > goto put_bio; > > if (bytes + bv->bv_len > nr_iter) > > - goto put_bio; > > - if (bv->bv_offset + bv->bv_len > PAGE_SIZE) > > - goto put_bio; > > + break; > > So while this fixes NVMe, it actually breaks just about every SCSI > driver as the code will easily exceed max_segment_size now, which the > old code obeyed (although more by accident). Looking at the existing code a bit more it seems really confused, e.g. by iterating over all segments in the iov_iter instead of using the proper iterators that limit to the actualy size for the I/O, which I think is the root cause of your problem. Can you try the (untested) patch below? That uses the proper block layer helper to check the I/O layout using the bio iterator. It will handle all block layer queue limits, and it does so on the actual iterator instead of the potential larger registration. One change in behavior is that it now returns -EREMOTEIO for all limits mismatches instead of a random mix of -EINVAL and -REMOTEIO. diff --git a/block/blk-map.c b/block/blk-map.c index 0e1167b23934..ca2f2ff853da 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -561,57 +561,27 @@ EXPORT_SYMBOL(blk_rq_append_bio); /* Prepare bio for passthrough IO given ITER_BVEC iter */ static int blk_rq_map_user_bvec(struct request *rq, const struct iov_iter *iter) { - struct request_queue *q = rq->q; - size_t nr_iter = iov_iter_count(iter); - size_t nr_segs = iter->nr_segs; - struct bio_vec *bvecs, *bvprvp = NULL; - const struct queue_limits *lim = &q->limits; - unsigned int nsegs = 0, bytes = 0; + const struct queue_limits *lim = &rq->q->limits; + unsigned int nsegs; struct bio *bio; - size_t i; - if (!nr_iter || (nr_iter >> SECTOR_SHIFT) > queue_max_hw_sectors(q)) - return -EINVAL; - if (nr_segs > queue_max_segments(q)) + if (!iov_iter_count(iter)) return -EINVAL; - /* no iovecs to alloc, as we already have a BVEC iterator */ + /* reuse the bvecs from the iterator instead of allocating new ones */ bio = blk_rq_map_bio_alloc(rq, 0, GFP_KERNEL); - if (bio == NULL) + if (!bio) return -ENOMEM; - bio_iov_bvec_set(bio, (struct iov_iter *)iter); - blk_rq_bio_prep(rq, bio, nr_segs); - - /* loop to perform a bunch of sanity checks */ - bvecs = (struct bio_vec *)iter->bvec; - for (i = 0; i < nr_segs; i++) { - struct bio_vec *bv = &bvecs[i]; - /* - * If the queue doesn't support SG gaps and adding this - * offset would create a gap, fallback to copy. - */ - if (bvprvp && bvec_gap_to_prev(lim, bvprvp, bv->bv_offset)) { - blk_mq_map_bio_put(bio); - return -EREMOTEIO; - } - /* check full condition */ - if (nsegs >= nr_segs || bytes > UINT_MAX - bv->bv_len) - goto put_bio; - if (bytes + bv->bv_len > nr_iter) - goto put_bio; - if (bv->bv_offset + bv->bv_len > PAGE_SIZE) - goto put_bio; - - nsegs++; - bytes += bv->bv_len; - bvprvp = bv; + /* check that the data layout matches the hardware restrictions */ + if (bio_split_rw_at(bio, lim, &nsegs, lim->max_hw_sectors)) { + blk_mq_map_bio_put(bio); + return -EREMOTEIO; } + + blk_rq_bio_prep(rq, bio, nsegs); return 0; -put_bio: - blk_mq_map_bio_put(bio); - return -EINVAL; } /**