Re: [PATCH 5/8] dm: always setup ->orig_bio in alloc_io

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 4/13/22 08:00, Mike Snitzer wrote:
> On Tue, Apr 12 2022 at  6:38P -0400,
> Damien Le Moal <damien.lemoal@xxxxxxxxxxxxxxxxxx> wrote:
> 
>> On 4/13/22 05:52, Mike Snitzer wrote:
>>> On Tue, Apr 12 2022 at  4:56P -0400,
>>> Ming Lei <ming.lei@xxxxxxxxxx> wrote:
>>>
>>>> The current DM codes setup ->orig_bio after __map_bio() returns,
>>>> and not only cause kernel panic for dm zone, but also a bit ugly
>>>> and tricky, especially the waiting until ->orig_bio is set in
>>>> dm_submit_bio_remap().
>>>>
>>>> The reason is that one new bio is cloned from original FS bio to
>>>> represent the mapped part, which just serves io accounting.
>>>>
>>>> Now we have switched to bdev based io accounting interface, and we
>>>> can retrieve sectors/bio_op from both the real original bio and the
>>>> added fields of .sector_offset & .sectors easily, so the new cloned
>>>> bio isn't necessary any more.
>>>>
>>>> Not only fixes dm-zone's kernel panic, but also cleans up dm io
>>>> accounting & split a bit.
>>>
>>> You're conflating quite a few things here.  DM zone really has no
>>> business accessing io->orig_bio (dm-zone.c can just as easily inspect
>>> the tio->clone, because it hasn't been remapped yet it reflects the
>>> io->origin_bio, so there is no need to look at io->orig_bio) -- but
>>> yes I clearly broke things during the 5.18 merge and it needs fixing
>>> ASAP.
>>
>> Problem is that we need to look at the BIO op in submission AND completion
>> path to handle zone append requests. So looking at the clone on submission
>> is OK since its op is still the original one. But on the completion path,
>> the clone may now be a regular write emulating a zone append op. And
>> looking at the clone only does not allow detecting that zone append.
>>
>> We could have the orig_bio op saved in dm_io to avoid completely looking
>> at the orig_bio for detecting zone append.
> 
> Can you please try the following patch?

This works. I tested with a zoned nullblk + dm-crypt, forcing the zone
append emulation code to be used. I ran zonefs tests on top of that with
no issues. I will run btrfs tests too later today to exercise things a
little more.

> 
> Really sorry for breaking dm-zone.c; please teach this man how to test
> the basics of all things dm-zoned (is there a testsuite in the tools
> or something?).
> 
> Thanks,
> Mike
> 
> diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
> index c1ca9be4b79e..896127366000 100644
> --- a/drivers/md/dm-zone.c
> +++ b/drivers/md/dm-zone.c
> @@ -360,16 +360,20 @@ static int dm_update_zone_wp_offset(struct mapped_device *md, unsigned int zno,
>  	return 0;
>  }
>  
> +struct orig_bio_details {
> +	unsigned int op;
> +	unsigned int nr_sectors;
> +};
> +
>  /*
>   * First phase of BIO mapping for targets with zone append emulation:
>   * check all BIO that change a zone writer pointer and change zone
>   * append operations into regular write operations.
>   */
> -static bool dm_zone_map_bio_begin(struct mapped_device *md,
> -				  struct bio *orig_bio, struct bio *clone)
> +static bool dm_zone_map_bio_begin(struct mapped_device *md, unsigned int zno,
> +				  struct bio *clone)
>  {
>  	sector_t zsectors = blk_queue_zone_sectors(md->queue);
> -	unsigned int zno = bio_zone_no(orig_bio);
>  	unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
>  
>  	/*
> @@ -384,7 +388,7 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md,
>  		WRITE_ONCE(md->zwp_offset[zno], zwp_offset);
>  	}
>  
> -	switch (bio_op(orig_bio)) {
> +	switch (bio_op(clone)) {
>  	case REQ_OP_ZONE_RESET:
>  	case REQ_OP_ZONE_FINISH:
>  		return true;
> @@ -401,9 +405,8 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md,
>  		 * target zone.
>  		 */
>  		clone->bi_opf = REQ_OP_WRITE | REQ_NOMERGE |
> -			(orig_bio->bi_opf & (~REQ_OP_MASK));
> -		clone->bi_iter.bi_sector =
> -			orig_bio->bi_iter.bi_sector + zwp_offset;
> +			(clone->bi_opf & (~REQ_OP_MASK));
> +		clone->bi_iter.bi_sector += zwp_offset;
>  		break;
>  	default:
>  		DMWARN_LIMIT("Invalid BIO operation");
> @@ -423,11 +426,10 @@ static bool dm_zone_map_bio_begin(struct mapped_device *md,
>   * data written to a zone. Note that at this point, the remapped clone BIO
>   * may already have completed, so we do not touch it.
>   */
> -static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
> -					struct bio *orig_bio,
> +static blk_status_t dm_zone_map_bio_end(struct mapped_device *md, unsigned int zno,
> +					struct orig_bio_details *orig_bio_details,
>  					unsigned int nr_sectors)
>  {
> -	unsigned int zno = bio_zone_no(orig_bio);
>  	unsigned int zwp_offset = READ_ONCE(md->zwp_offset[zno]);
>  
>  	/* The clone BIO may already have been completed and failed */
> @@ -435,7 +437,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
>  		return BLK_STS_IOERR;
>  
>  	/* Update the zone wp offset */
> -	switch (bio_op(orig_bio)) {
> +	switch (orig_bio_details->op) {
>  	case REQ_OP_ZONE_RESET:
>  		WRITE_ONCE(md->zwp_offset[zno], 0);
>  		return BLK_STS_OK;
> @@ -452,7 +454,7 @@ static blk_status_t dm_zone_map_bio_end(struct mapped_device *md,
>  		 * Check that the target did not truncate the write operation
>  		 * emulating a zone append.
>  		 */
> -		if (nr_sectors != bio_sectors(orig_bio)) {
> +		if (nr_sectors != orig_bio_details->nr_sectors) {
>  			DMWARN_LIMIT("Truncated write for zone append");
>  			return BLK_STS_IOERR;
>  		}
> @@ -488,7 +490,7 @@ static inline void dm_zone_unlock(struct request_queue *q,
>  	bio_clear_flag(clone, BIO_ZONE_WRITE_LOCKED);
>  }
>  
> -static bool dm_need_zone_wp_tracking(struct bio *orig_bio)
> +static bool dm_need_zone_wp_tracking(struct bio *clone)
>  {
>  	/*
>  	 * Special processing is not needed for operations that do not need the
> @@ -496,15 +498,15 @@ static bool dm_need_zone_wp_tracking(struct bio *orig_bio)
>  	 * zones and all operations that do not modify directly a sequential
>  	 * zone write pointer.
>  	 */
> -	if (op_is_flush(orig_bio->bi_opf) && !bio_sectors(orig_bio))
> +	if (op_is_flush(clone->bi_opf) && !bio_sectors(clone))
>  		return false;
> -	switch (bio_op(orig_bio)) {
> +	switch (bio_op(clone)) {
>  	case REQ_OP_WRITE_ZEROES:
>  	case REQ_OP_WRITE:
>  	case REQ_OP_ZONE_RESET:
>  	case REQ_OP_ZONE_FINISH:
>  	case REQ_OP_ZONE_APPEND:
> -		return bio_zone_is_seq(orig_bio);
> +		return bio_zone_is_seq(clone);
>  	default:
>  		return false;
>  	}
> @@ -519,8 +521,8 @@ int dm_zone_map_bio(struct dm_target_io *tio)
>  	struct dm_target *ti = tio->ti;
>  	struct mapped_device *md = io->md;
>  	struct request_queue *q = md->queue;
> -	struct bio *orig_bio = io->orig_bio;
>  	struct bio *clone = &tio->clone;
> +	struct orig_bio_details orig_bio_details;
>  	unsigned int zno;
>  	blk_status_t sts;
>  	int r;
> @@ -529,18 +531,21 @@ int dm_zone_map_bio(struct dm_target_io *tio)
>  	 * IOs that do not change a zone write pointer do not need
>  	 * any additional special processing.
>  	 */
> -	if (!dm_need_zone_wp_tracking(orig_bio))
> +	if (!dm_need_zone_wp_tracking(clone))
>  		return ti->type->map(ti, clone);
>  
>  	/* Lock the target zone */
> -	zno = bio_zone_no(orig_bio);
> +	zno = bio_zone_no(clone);
>  	dm_zone_lock(q, zno, clone);
>  
> +	orig_bio_details.nr_sectors = bio_sectors(clone);
> +	orig_bio_details.op = bio_op(clone);
> +
>  	/*
>  	 * Check that the bio and the target zone write pointer offset are
>  	 * both valid, and if the bio is a zone append, remap it to a write.
>  	 */
> -	if (!dm_zone_map_bio_begin(md, orig_bio, clone)) {
> +	if (!dm_zone_map_bio_begin(md, zno, clone)) {
>  		dm_zone_unlock(q, zno, clone);
>  		return DM_MAPIO_KILL;
>  	}
> @@ -560,7 +565,8 @@ int dm_zone_map_bio(struct dm_target_io *tio)
>  		 * The target submitted the clone BIO. The target zone will
>  		 * be unlocked on completion of the clone.
>  		 */
> -		sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr);
> +		sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
> +					  *tio->len_ptr);
>  		break;
>  	case DM_MAPIO_REMAPPED:
>  		/*
> @@ -568,7 +574,8 @@ int dm_zone_map_bio(struct dm_target_io *tio)
>  		 * unlock the target zone here as the clone will not be
>  		 * submitted.
>  		 */
> -		sts = dm_zone_map_bio_end(md, orig_bio, *tio->len_ptr);
> +		sts = dm_zone_map_bio_end(md, zno, &orig_bio_details,
> +					  *tio->len_ptr);
>  		if (sts != BLK_STS_OK)
>  			dm_zone_unlock(q, zno, clone);
>  		break;
> 


-- 
Damien Le Moal
Western Digital Research



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux