Re: [PATCH v2] md: add block tracing for bio_remapping

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Fri, Nov 18, 2016 at 01:22:04PM +1100, Neil Brown wrote:
> 
> The block tracing infrastructure (accessed with blktrace/blkparse)
> supports the tracing of mapping bios from one device to another.
> This is currently used when a bio in a partition is mapped to the
> whole device, when bios are mapped by dm, and for mapping in md/raid5.
> Other md personalities do not include this tracing yet, so add it.
> 
> When a read-error is detected we redirect the request to a different device.
> This could justifiably be seen as a new mapping for the originial bio,
> or a secondary mapping for the bio that errors.  This patch uses
> the second option.
> 
> When md is used under dm-raid, the mappings are not traced as we do
> not have access to the block device number of the parent.

thanks, applied patch 1, 3, 4.

Thanks,
Shaohua
 
> Signed-off-by: NeilBrown <neilb@xxxxxxxx>
> ---
> 
> This is the revised version based on discussions.
> Now uses correct sector for linear and raid0, and code for raid1/raid10
> rearranged a bit.
> 
>  drivers/md/linear.c | 18 ++++++++++++------
>  drivers/md/raid0.c  | 13 ++++++++++---
>  drivers/md/raid1.c  | 26 ++++++++++++++++++++++++--
>  drivers/md/raid10.c | 29 +++++++++++++++++++++++++++--
>  4 files changed, 73 insertions(+), 13 deletions(-)
> 
> diff --git a/drivers/md/linear.c b/drivers/md/linear.c
> index 9c7d4f5483ea..5975c9915684 100644
> --- a/drivers/md/linear.c
> +++ b/drivers/md/linear.c
> @@ -21,6 +21,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/module.h>
>  #include <linux/slab.h>
> +#include <trace/events/block.h>
>  #include "md.h"
>  #include "linear.h"
>  
> @@ -227,22 +228,22 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
>  	}
>  
>  	do {
> -		tmp_dev = which_dev(mddev, bio->bi_iter.bi_sector);
> +		sector_t bio_sector = bio->bi_iter.bi_sector;
> +		tmp_dev = which_dev(mddev, bio_sector);
>  		start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
>  		end_sector = tmp_dev->end_sector;
>  		data_offset = tmp_dev->rdev->data_offset;
>  		bio->bi_bdev = tmp_dev->rdev->bdev;
>  
> -		if (unlikely(bio->bi_iter.bi_sector >= end_sector ||
> -			     bio->bi_iter.bi_sector < start_sector))
> +		if (unlikely(bio_sector >= end_sector ||
> +			     bio_sector < start_sector))
>  			goto out_of_bounds;
>  
>  		if (unlikely(bio_end_sector(bio) > end_sector)) {
>  			/* This bio crosses a device boundary, so we have to
>  			 * split it.
>  			 */
> -			split = bio_split(bio, end_sector -
> -					  bio->bi_iter.bi_sector,
> +			split = bio_split(bio, end_sector - bio_sector,
>  					  GFP_NOIO, fs_bio_set);
>  			bio_chain(split, bio);
>  		} else {
> @@ -256,8 +257,13 @@ static void linear_make_request(struct mddev *mddev, struct bio *bio)
>  			 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
>  			/* Just ignore it */
>  			bio_endio(split);
> -		} else
> +		} else {
> +			if (mddev->gendisk)
> +				trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
> +						      split, disk_devt(mddev->gendisk),
> +						      bio_sector);
>  			generic_make_request(split);
> +		}
>  	} while (split != bio);
>  	return;
>  
> diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
> index b3ba77a3c3bc..e628f187e5ad 100644
> --- a/drivers/md/raid0.c
> +++ b/drivers/md/raid0.c
> @@ -21,6 +21,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/module.h>
>  #include <linux/slab.h>
> +#include <trace/events/block.h>
>  #include "md.h"
>  #include "raid0.h"
>  #include "raid5.h"
> @@ -463,7 +464,8 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
>  	}
>  
>  	do {
> -		sector_t sector = bio->bi_iter.bi_sector;
> +		sector_t bio_sector = bio->bi_iter.bi_sector;
> +		sector_t sector = bio_sector;
>  		unsigned chunk_sects = mddev->chunk_sectors;
>  
>  		unsigned sectors = chunk_sects -
> @@ -472,7 +474,7 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
>  			 : sector_div(sector, chunk_sects));
>  
>  		/* Restore due to sector_div */
> -		sector = bio->bi_iter.bi_sector;
> +		sector = bio_sector;
>  
>  		if (sectors < bio_sectors(bio)) {
>  			split = bio_split(bio, sectors, GFP_NOIO, fs_bio_set);
> @@ -491,8 +493,13 @@ static void raid0_make_request(struct mddev *mddev, struct bio *bio)
>  			 !blk_queue_discard(bdev_get_queue(split->bi_bdev)))) {
>  			/* Just ignore it */
>  			bio_endio(split);
> -		} else
> +		} else {
> +			if (mddev->gendisk)
> +				trace_block_bio_remap(bdev_get_queue(split->bi_bdev),
> +						      split, disk_devt(mddev->gendisk),
> +						      bio_sector);
>  			generic_make_request(split);
> +		}
>  	} while (split != bio);
>  }
>  
> diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
> index 9ac61cd85e5c..2dc1934925ec 100644
> --- a/drivers/md/raid1.c
> +++ b/drivers/md/raid1.c
> @@ -37,6 +37,7 @@
>  #include <linux/module.h>
>  #include <linux/seq_file.h>
>  #include <linux/ratelimit.h>
> +#include <trace/events/block.h>
>  #include "md.h"
>  #include "raid1.h"
>  #include "bitmap.h"
> @@ -1162,6 +1163,11 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
>  		bio_set_op_attrs(read_bio, op, do_sync);
>  		read_bio->bi_private = r1_bio;
>  
> +		if (mddev->gendisk)
> +			trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
> +					      read_bio, disk_devt(mddev->gendisk),
> +					      r1_bio->sector);
> +
>  		if (max_sectors < r1_bio->sectors) {
>  			/* could not read all from this device, so we will
>  			 * need another r1_bio.
> @@ -1367,13 +1373,20 @@ static void raid1_make_request(struct mddev *mddev, struct bio * bio)
>  
>  		mbio->bi_iter.bi_sector	= (r1_bio->sector +
>  				   conf->mirrors[i].rdev->data_offset);
> -		mbio->bi_bdev = (void*)conf->mirrors[i].rdev;
> +		mbio->bi_bdev = conf->mirrors[i].rdev->bdev;
>  		mbio->bi_end_io	= raid1_end_write_request;
>  		bio_set_op_attrs(mbio, op, do_flush_fua | do_sync);
>  		mbio->bi_private = r1_bio;
>  
>  		atomic_inc(&r1_bio->remaining);
>  
> +		if (mddev->gendisk)
> +			trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
> +					      mbio, disk_devt(mddev->gendisk),
> +					      r1_bio->sector);
> +		/* flush_pending_writes() needs access to the rdev so...*/
> +		mbio->bi_bdev = (void*)conf->mirrors[i].rdev;
> +
>  		cb = blk_check_plugged(raid1_unplug, mddev, sizeof(*plug));
>  		if (cb)
>  			plug = container_of(cb, struct raid1_plug_cb, cb);
> @@ -2290,6 +2303,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>  	struct bio *bio;
>  	char b[BDEVNAME_SIZE];
>  	struct md_rdev *rdev;
> +	dev_t bio_dev;
> +	sector_t bio_sector;
>  
>  	clear_bit(R1BIO_ReadError, &r1_bio->state);
>  	/* we got a read error. Maybe the drive is bad.  Maybe just
> @@ -2303,6 +2318,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>  
>  	bio = r1_bio->bios[r1_bio->read_disk];
>  	bdevname(bio->bi_bdev, b);
> +	bio_dev = bio->bi_bdev->bd_dev;
> +	bio_sector = conf->mirrors[r1_bio->read_disk].rdev->data_offset + r1_bio->sector;
>  	bio_put(bio);
>  	r1_bio->bios[r1_bio->read_disk] = NULL;
>  
> @@ -2353,6 +2370,8 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>  			else
>  				mbio->bi_phys_segments++;
>  			spin_unlock_irq(&conf->device_lock);
> +			trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
> +					      bio, bio_dev, bio_sector);
>  			generic_make_request(bio);
>  			bio = NULL;
>  
> @@ -2367,8 +2386,11 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
>  				sectors_handled;
>  
>  			goto read_more;
> -		} else
> +		} else {
> +			trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
> +					      bio, bio_dev, bio_sector);
>  			generic_make_request(bio);
> +		}
>  	}
>  }
>  
> diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
> index 5290be3d5c26..c63041ec9415 100644
> --- a/drivers/md/raid10.c
> +++ b/drivers/md/raid10.c
> @@ -25,6 +25,7 @@
>  #include <linux/seq_file.h>
>  #include <linux/ratelimit.h>
>  #include <linux/kthread.h>
> +#include <trace/events/block.h>
>  #include "md.h"
>  #include "raid10.h"
>  #include "raid0.h"
> @@ -1165,6 +1166,10 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
>  		bio_set_op_attrs(read_bio, op, do_sync);
>  		read_bio->bi_private = r10_bio;
>  
> +		if (mddev->gendisk)
> +			trace_block_bio_remap(bdev_get_queue(read_bio->bi_bdev),
> +					      read_bio, disk_devt(mddev->gendisk),
> +					      r10_bio->sector);
>  		if (max_sectors < r10_bio->sectors) {
>  			/* Could not read all from this device, so we will
>  			 * need another r10_bio.
> @@ -1367,11 +1372,17 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
>  			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr+
>  					   choose_data_offset(r10_bio,
>  							      rdev));
> -			mbio->bi_bdev = (void*)rdev;
> +			mbio->bi_bdev = rdev->bdev;
>  			mbio->bi_end_io	= raid10_end_write_request;
>  			bio_set_op_attrs(mbio, op, do_sync | do_fua);
>  			mbio->bi_private = r10_bio;
>  
> +			if (conf->mddev->gendisk)
> +				trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
> +						      mbio, disk_devt(conf->mddev->gendisk),
> +						      r10_bio->sector);
> +			mbio->bi_bdev = (void*)rdev;
> +
>  			atomic_inc(&r10_bio->remaining);
>  
>  			cb = blk_check_plugged(raid10_unplug, mddev,
> @@ -1409,11 +1420,17 @@ static void __make_request(struct mddev *mddev, struct bio *bio)
>  			mbio->bi_iter.bi_sector	= (r10_bio->devs[i].addr +
>  					   choose_data_offset(
>  						   r10_bio, rdev));
> -			mbio->bi_bdev = (void*)rdev;
> +			mbio->bi_bdev = rdev->bdev;
>  			mbio->bi_end_io	= raid10_end_write_request;
>  			bio_set_op_attrs(mbio, op, do_sync | do_fua);
>  			mbio->bi_private = r10_bio;
>  
> +			if (conf->mddev->gendisk)
> +				trace_block_bio_remap(bdev_get_queue(mbio->bi_bdev),
> +						      mbio, disk_devt(conf->mddev->gendisk),
> +						      r10_bio->sector);
> +			mbio->bi_bdev = (void*)rdev;
> +
>  			atomic_inc(&r10_bio->remaining);
>  			spin_lock_irqsave(&conf->device_lock, flags);
>  			bio_list_add(&conf->pending_bio_list, mbio);
> @@ -2496,6 +2513,8 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
>  	char b[BDEVNAME_SIZE];
>  	unsigned long do_sync;
>  	int max_sectors;
> +	dev_t bio_dev;
> +	sector_t bio_last_sector;
>  
>  	/* we got a read error. Maybe the drive is bad.  Maybe just
>  	 * the block and we can fix it.
> @@ -2507,6 +2526,8 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
>  	 */
>  	bio = r10_bio->devs[slot].bio;
>  	bdevname(bio->bi_bdev, b);
> +	bio_dev = bio->bi_bdev->bd_dev;
> +	bio_last_sector = r10_bio->devs[slot].addr + rdev->data_offset + r10_bio->sectors;
>  	bio_put(bio);
>  	r10_bio->devs[slot].bio = NULL;
>  
> @@ -2546,6 +2567,10 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
>  	bio_set_op_attrs(bio, REQ_OP_READ, do_sync);
>  	bio->bi_private = r10_bio;
>  	bio->bi_end_io = raid10_end_read_request;
> +	trace_block_bio_remap(bdev_get_queue(bio->bi_bdev),
> +			      bio, bio_dev,
> +			      bio_last_sector - r10_bio->sectors);
> +
>  	if (max_sectors < r10_bio->sectors) {
>  		/* Drat - have to split this up more */
>  		struct bio *mbio = r10_bio->master_bio;
> -- 
> 2.10.2
> 


--
To unsubscribe from this list: send the line "unsubscribe linux-raid" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html



[Index of Archives]     [Linux RAID Wiki]     [ATA RAID]     [Linux SCSI Target Infrastructure]     [Linux Block]     [Linux IDE]     [Linux SCSI]     [Linux Hams]     [Device Mapper]     [Device Mapper Cryptographics]     [Kernel]     [Linux Admin]     [Linux Net]     [GFS]     [RPM]     [git]     [Yosemite Forum]


  Powered by Linux