Re: [PATCH v3 12/13] bcache: add io_disable to struct cached_dev

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On 01/14/2018 03:42 PM, Coly Li wrote:
> If a bcache device is configured to writeback mode, current code does not
> handle write I/O errors on backing devices properly.
> 
> In writeback mode, write request is written to cache device, and
> latter being flushed to backing device. If I/O failed when writing from
> cache device to the backing device, bcache code just ignores the error and
> upper layer code is NOT noticed that the backing device is broken.
> 
> This patch tries to handle backing device failure like how the cache device
> failure is handled,
> - Add a error counter 'io_errors' and error limit 'error_limit' in struct
>   cached_dev. Add another io_disable to struct cached_dev to disable I/Os
>   on the problematic backing device.
> - When I/O error happens on backing device, increase io_errors counter. And
>   if io_errors reaches error_limit, set cache_dev->io_disable to true, and
>   stop the bcache device.
> 
> The result is, if backing device is broken of disconnected, and I/O errors
> reach its error limit, backing device will be disabled and the associated
> bcache device will be removed from system.
> 
> Signed-off-by: Coly Li <colyli@xxxxxxx>
> Cc: Michael Lyle <mlyle@xxxxxxxx>
> Cc: Hannes Reinecke <hare@xxxxxxxx>
> Cc: Junhui Tang <tang.junhui@xxxxxxxxxx>
> ---
>  drivers/md/bcache/bcache.h  |  7 +++++++
>  drivers/md/bcache/io.c      | 14 ++++++++++++++
>  drivers/md/bcache/request.c | 14 ++++++++++++--
>  drivers/md/bcache/super.c   | 22 ++++++++++++++++++++++
>  drivers/md/bcache/sysfs.c   | 15 ++++++++++++++-
>  5 files changed, 69 insertions(+), 3 deletions(-)
> 
> diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
> index c41736960045..5a811959392d 100644
> --- a/drivers/md/bcache/bcache.h
> +++ b/drivers/md/bcache/bcache.h
> @@ -360,6 +360,7 @@ struct cached_dev {
>  	unsigned		sequential_cutoff;
>  	unsigned		readahead;
>  
> +	unsigned		io_disable:1;
>  	unsigned		verify:1;
>  	unsigned		bypass_torture_test:1;
>  
> @@ -379,6 +380,10 @@ struct cached_dev {
>  	unsigned		writeback_rate_i_term_inverse;
>  	unsigned		writeback_rate_p_term_inverse;
>  	unsigned		writeback_rate_minimum;
> +
> +#define DEFAULT_CACHED_DEV_ERROR_LIMIT 64
> +	atomic_t		io_errors;
> +	unsigned		error_limit;
>  };
>  
>  enum alloc_reserve {
> @@ -882,6 +887,7 @@ static inline void closure_bio_submit(struct cache_set *c,
>  
>  /* Forward declarations */
>  
> +void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio);
>  void bch_count_io_errors(struct cache *, blk_status_t, int, const char *);
>  void bch_bbio_count_io_errors(struct cache_set *, struct bio *,
>  			      blk_status_t, const char *);
> @@ -909,6 +915,7 @@ int bch_bucket_alloc_set(struct cache_set *, unsigned,
>  			 struct bkey *, int, bool);
>  bool bch_alloc_sectors(struct cache_set *, struct bkey *, unsigned,
>  		       unsigned, unsigned, bool);
> +bool bch_cached_dev_error(struct cached_dev *dc);
>  
>  __printf(2, 3)
>  bool bch_cache_set_error(struct cache_set *, const char *, ...);
> diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c
> index 8013ecbcdbda..7fac97ae036e 100644
> --- a/drivers/md/bcache/io.c
> +++ b/drivers/md/bcache/io.c
> @@ -50,6 +50,20 @@ void bch_submit_bbio(struct bio *bio, struct cache_set *c,
>  }
>  
>  /* IO errors */
> +void bch_count_backing_io_errors(struct cached_dev *dc, struct bio *bio)
> +{
> +	char buf[BDEVNAME_SIZE];
> +	unsigned errors;
> +
> +	WARN_ONCE(!dc, "NULL pointer of struct cached_dev");
> +
> +	errors = atomic_add_return(1, &dc->io_errors);
> +	if (errors < dc->error_limit)
> +		pr_err("%s: IO error on backing device, unrecoverable",
> +			bio_devname(bio, buf));
> +	else
> +		bch_cached_dev_error(dc);
> +}
>  
>  void bch_count_io_errors(struct cache *ca,
>  			 blk_status_t error,
> diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c
> index ad4cf71f7eab..386b388ce296 100644
> --- a/drivers/md/bcache/request.c
> +++ b/drivers/md/bcache/request.c
> @@ -637,6 +637,8 @@ static void backing_request_endio(struct bio *bio)
>  
>  	if (bio->bi_status) {
>  		struct search *s = container_of(cl, struct search, cl);
> +		struct cached_dev *dc = container_of(s->d,
> +						     struct cached_dev, disk);
>  		/*
>  		 * If a bio has REQ_PREFLUSH for writeback mode, it is
>  		 * speically assembled in cached_dev_write() for a non-zero
> @@ -657,6 +659,7 @@ static void backing_request_endio(struct bio *bio)
>  		}
>  		s->recoverable = false;
>  		/* should count I/O error for backing device here */
> +		bch_count_backing_io_errors(dc, bio);
>  	}
>  
>  	bio_put(bio);
> @@ -1067,8 +1070,14 @@ static void detatched_dev_end_io(struct bio *bio)
>  			    bio_data_dir(bio),
>  			    &ddip->d->disk->part0, ddip->start_time);
>  
> -	kfree(ddip);
> +	if (bio->bi_status) {
> +		struct cached_dev *dc = container_of(ddip->d,
> +						     struct cached_dev, disk);
> +		/* should count I/O error for backing device here */
> +		bch_count_backing_io_errors(dc, bio);
> +	}
>  
> +	kfree(ddip);
>  	bio->bi_end_io(bio);
>  }
>  
> @@ -1107,7 +1116,8 @@ static blk_qc_t cached_dev_make_request(struct request_queue *q,
>  	struct cached_dev *dc = container_of(d, struct cached_dev, disk);
>  	int rw = bio_data_dir(bio);
>  
> -	if (unlikely(d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags))) {
> +	if (unlikely((d->c && test_bit(CACHE_SET_IO_DISABLE, &d->c->flags)) ||
> +		     dc->io_disable)) {
>  		bio->bi_status = BLK_STS_IOERR;
>  		bio_endio(bio);
>  		return BLK_QC_T_NONE;
> diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
> index 08a0b541a4da..14fce3623770 100644
> --- a/drivers/md/bcache/super.c
> +++ b/drivers/md/bcache/super.c
> @@ -1188,6 +1188,10 @@ static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
>  		max(dc->disk.disk->queue->backing_dev_info->ra_pages,
>  		    q->backing_dev_info->ra_pages);
>  
> +	atomic_set(&dc->io_errors, 0);
> +	dc->io_disable = false;
> +	dc->error_limit = DEFAULT_CACHED_DEV_ERROR_LIMIT;
> +
>  	bch_cached_dev_request_init(dc);
>  	bch_cached_dev_writeback_init(dc);
>  	return 0;
> @@ -1339,6 +1343,24 @@ int bch_flash_dev_create(struct cache_set *c, uint64_t size)
>  	return flash_dev_run(c, u);
>  }
>  
> +bool bch_cached_dev_error(struct cached_dev *dc)
> +{
> +	char name[BDEVNAME_SIZE];
> +
> +	if (!dc || test_bit(BCACHE_DEV_CLOSING, &dc->disk.flags))
> +		return false;
> +
> +	dc->io_disable = true;
> +	/* make others know io_disable is true earlier */
> +	smp_mb();
> +
> +	pr_err("bcache: stop %s: too many IO errors on backing device %s\n",
> +		dc->disk.name, bdevname(dc->bdev, name));
> +
> +	bcache_device_stop(&dc->disk);
> +	return true;
> +}
> +
>  /* Cache set */
>  
>  __printf(2, 3)
> diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c
> index afb051bcfca1..7288927f2a47 100644
> --- a/drivers/md/bcache/sysfs.c
> +++ b/drivers/md/bcache/sysfs.c
> @@ -131,7 +131,9 @@ SHOW(__bch_cached_dev)
>  	var_print(writeback_delay);
>  	var_print(writeback_percent);
>  	sysfs_hprint(writeback_rate,	dc->writeback_rate.rate << 9);
> -
> +	sysfs_hprint(io_errors,		atomic_read(&dc->io_errors));
> +	sysfs_printf(io_error_limit,	"%i", dc->error_limit);
> +	sysfs_printf(io_disable,	"%i", dc->io_disable);
>  	var_print(writeback_rate_update_seconds);
>  	var_print(writeback_rate_i_term_inverse);
>  	var_print(writeback_rate_p_term_inverse);
> @@ -223,6 +225,14 @@ STORE(__cached_dev)
>  	d_strtoul(writeback_rate_i_term_inverse);
>  	d_strtoul_nonzero(writeback_rate_p_term_inverse);
>  
> +	sysfs_strtoul_clamp(io_error_limit, dc->error_limit, 0, INT_MAX);
> +
> +	if (attr == &sysfs_io_disable) {
> +		int v = strtoul_or_return(buf);
> +
> +		dc->io_disable = v ? 1 : 0;
> +	}
> +
>  	d_strtoi_h(sequential_cutoff);
>  	d_strtoi_h(readahead);
>  
> @@ -330,6 +340,9 @@ static struct attribute *bch_cached_dev_files[] = {
>  	&sysfs_writeback_rate_i_term_inverse,
>  	&sysfs_writeback_rate_p_term_inverse,
>  	&sysfs_writeback_rate_debug,
> +	&sysfs_errors,
> +	&sysfs_io_error_limit,
> +	&sysfs_io_disable,
>  	&sysfs_dirty_data,
>  	&sysfs_stripe_size,
>  	&sysfs_partial_stripes_expensive,
> 
Personally, I'm not a big fan of using smp_mb() and not proper locking.
But in this case it should be okay.

Reviewed-by: Hannes Reinecke <hare@xxxxxxxx>

Cheers,

Hannes
-- 
Dr. Hannes Reinecke		   Teamlead Storage & Networking
hare@xxxxxxx			               +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)



[Index of Archives]     [Linux RAID]     [Linux SCSI]     [Linux ATA RAID]     [IDE]     [Linux Wireless]     [Linux Kernel]     [ATH6KL]     [Linux Bluetooth]     [Linux Netdev]     [Kernel Newbies]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Device Mapper]

  Powered by Linux