Re: bvec_iter rewind API

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, Aug 28, 2018 at 07:33:25PM -0400, Kent Overstreet wrote:
> I just came across bi_done and your patch that added it -
> f9df1cd99e bio: add bvec_iter rewind API
> 
> I invite you to think through what happens to a bio that gets split by something
> further down the stack, and then rewound after completion:
> 
> To create the first half of the split, you just truncate the iterator by setting
> bio->bi_iter.bi_size to be the length you want. So if the original bio becomes
> the first half of the split, we've lost the original value for bi_size. After
> completion, bio_rewind_iter will rewind the bio to the start position the
> original code expects, but the bio will _not_ have the same size as it did when
> that code submitted it.
> 
> So if bio_rewind_iter() is being used because we want to restore the bio to what
> it was when we submitted it, this is bogus.
> 
> To create the second half of the split, we just advance the bio's iterator up to
> where we want the split to start. So this shouldn't prevent us from restoring
> the bio to the state it was in when it was created by rewinding it.
> 
> Except if you look at bio_split(), it sets bi_done to 0, and git blame reveals
> an interesting commit message...
> 
>     block: reset bi_iter.bi_done after splitting bio
>     
>     After the bio has been updated to represent the remaining sectors, reset
>     bi_done so bio_rewind_iter() does not rewind further than it should.
>     
>     This resolves a bio_integrity_process() failure on reads where the
>     original request was split.
> 
> *sigh*
> 
> The original code was bogus, and so was the fix. If a bio is split _AFTER_ a
> chunk of code in the stack sees it - and wants to restore it to the state it was
> it when it saw it after it completes - then not touching bi_done gives us the
> result we want. But if the bio was split _BEFORE_ that chunk of code sees it,
> rewinding it without ever touching bi_done will rewind it to be _LARGER_ than
> the bio that chunk of code saw.
> 
> So what should bi_done be? There is no correct answer - fundamentally, bi_done
> _cannot_ do what you're trying to do with it. Advancing or truncating an
> iterator destroys information, and stacked layers can manipulate bio iterators
> in arbitrary ways.
> 
> The correct way, the ONLY way, to restore a bio's iterator to a previous state
> after it's been submitted and other code has messed with it, is to save the
> entire iterator state: before you submit the bio, you save a copy of
> bio->bi_iter in your driver's private state, and then you restore it after
> completion.

I agree, and it isn't good to introduce extra update of .bi_done in fast path
too, and it just makes things complicated in concept.

Maybe we can do the following way, and looks it works in scsi_debug/dix
test.

diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 67b5fb861a51..839c332069a9 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -306,6 +306,8 @@ bool bio_integrity_prep(struct bio *bio)
 	if (bio_data_dir(bio) == WRITE) {
 		bio_integrity_process(bio, &bio->bi_iter,
 				      bi->profile->generate_fn);
+	} else {
+		bip->bio_iter = bio->bi_iter;
 	}
 	return true;
 
@@ -331,19 +333,13 @@ static void bio_integrity_verify_fn(struct work_struct *work)
 		container_of(work, struct bio_integrity_payload, bip_work);
 	struct bio *bio = bip->bip_bio;
 	struct blk_integrity *bi = blk_get_integrity(bio->bi_disk);
-	struct bvec_iter iter = bio->bi_iter;
 
 	/*
 	 * At the moment verify is called bio's iterator was advanced
 	 * during split and completion, we need to rewind iterator to
 	 * it's original position.
 	 */
-	if (bio_rewind_iter(bio, &iter, iter.bi_done)) {
-		bio->bi_status = bio_integrity_process(bio, &iter,
-						       bi->profile->verify_fn);
-	} else {
-		bio->bi_status = BLK_STS_IOERR;
-	}
+	bio->bi_status = bio_integrity_process(bio, &bip->bio_iter, bi->profile->verify_fn);
 
 	bio_integrity_free(bio);
 	bio_endio(bio);
diff --git a/block/bio.c b/block/bio.c
index b12966e415d3..529c1a65b1e8 100644
--- a/block/bio.c
+++ b/block/bio.c
@@ -283,6 +283,8 @@ void bio_init(struct bio *bio, struct bio_vec *table,
 	atomic_set(&bio->__bi_remaining, 1);
 	atomic_set(&bio->__bi_cnt, 1);
 
+	WARN_ON(max_vecs > UIO_MAXIOV);
+
 	bio->bi_io_vec = table;
 	bio->bi_max_vecs = max_vecs;
 }
@@ -1807,7 +1809,6 @@ struct bio *bio_split(struct bio *bio, int sectors,
 		bio_integrity_trim(split);
 
 	bio_advance(bio, split->bi_iter.bi_size);
-	bio->bi_iter.bi_done = 0;
 
 	if (bio_flagged(bio, BIO_TRACE_COMPLETION))
 		bio_set_flag(split, BIO_TRACE_COMPLETION);
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 51371740d2a8..14b4fa266357 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -170,27 +170,11 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter,
 {
 	iter->bi_sector += bytes >> 9;
 
-	if (bio_no_advance_iter(bio)) {
+	if (bio_no_advance_iter(bio))
 		iter->bi_size -= bytes;
-		iter->bi_done += bytes;
-	} else {
+	else
 		bvec_iter_advance(bio->bi_io_vec, iter, bytes);
 		/* TODO: It is reasonable to complete bio with error here. */
-	}
-}
-
-static inline bool bio_rewind_iter(struct bio *bio, struct bvec_iter *iter,
-		unsigned int bytes)
-{
-	iter->bi_sector -= bytes >> 9;
-
-	if (bio_no_advance_iter(bio)) {
-		iter->bi_size += bytes;
-		iter->bi_done -= bytes;
-		return true;
-	}
-
-	return bvec_iter_rewind(bio->bi_io_vec, iter, bytes);
 }
 
 #define __bio_for_each_segment(bvl, bio, iter, start)			\
@@ -353,6 +337,8 @@ struct bio_integrity_payload {
 	unsigned short		bip_max_vcnt;	/* integrity bio_vec slots */
 	unsigned short		bip_flags;	/* control flags */
 
+	struct bvec_iter	bio_iter;	/* for rewinding parent bio */
+
 	struct work_struct	bip_work;	/* I/O completion */
 
 	struct bio_vec		*bip_vec;
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index fe7a22dd133b..5ebfd0500fe7 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -38,11 +38,8 @@ struct bvec_iter {
 						   sectors */
 	unsigned int		bi_size;	/* residual I/O count */
 
-	unsigned int		bi_idx;		/* current index into bvl_vec */
-
-	unsigned int            bi_done;	/* number of bytes completed */
-
-	unsigned int            bi_bvec_done;	/* number of bytes completed in
+	unsigned int		bi_idx:10;	/* current index into bvl_vec */
+	unsigned int            bi_bvec_done:22;	/* number of bytes completed in
 						   current bvec */
 };
 
@@ -85,7 +82,6 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 		bytes -= len;
 		iter->bi_size -= len;
 		iter->bi_bvec_done += len;
-		iter->bi_done += len;
 
 		if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) {
 			iter->bi_bvec_done = 0;
@@ -95,30 +91,6 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv,
 	return true;
 }
 
-static inline bool bvec_iter_rewind(const struct bio_vec *bv,
-				     struct bvec_iter *iter,
-				     unsigned int bytes)
-{
-	while (bytes) {
-		unsigned len = min(bytes, iter->bi_bvec_done);
-
-		if (iter->bi_bvec_done == 0) {
-			if (WARN_ONCE(iter->bi_idx == 0,
-				      "Attempted to rewind iter beyond "
-				      "bvec's boundaries\n")) {
-				return false;
-			}
-			iter->bi_idx--;
-			iter->bi_bvec_done = __bvec_iter_bvec(bv, *iter)->bv_len;
-			continue;
-		}
-		bytes -= len;
-		iter->bi_size += len;
-		iter->bi_bvec_done -= len;
-	}
-	return true;
-}
-
 #define for_each_bvec(bvl, bio_vec, iter, start)			\
 	for (iter = (start);						\
 	     (iter).bi_size &&						\
Thanks,
Ming



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux