On 09/27/2013 08:07 AM, Hannes Reinecke wrote: > On 09/27/2013 01:49 AM, Mike Snitzer wrote: >> On Thu, Sep 26 2013 at 7:22pm -0400, >> Alasdair G Kergon <agk@xxxxxxxxxx> wrote: >> >>> On Thu, Sep 26, 2013 at 10:47:13AM -0700, Frank Mayhar wrote: >>>> Launching it from ramdisk won't help, particularly, since it still goes >>>> through the block layer. The other stuff won't help if a (potentially >>>> unrelated) bug in the daemon happens to be being tickled at the same >>>> time, or if some dependency happens to be broken and _that's_ what's >>>> preventing the daemon from making progress. >>> >>> Then put more effort into debugging your daemon so it doesn't have >>> bugs that make it die? Implement the timeout in a robust independent >>> daemon if it's other code there that's unreliable? >>> >>>> And as far as lvm2 and multipath-tools, yeah, they cope okay in the kind >>>> of environments most people have, but that's not the kind of environment >>>> (or scale) we have to deal with. >>> >>> In what way are your requirements so different that a locked-into-memory >>> monitoring daemon cannot implement this timeout? >> >> Frank, I had a look at your patch. It leaves a lot to be desired, I was >> starting to clean it up but ultimately found myself agreeing with >> Alasdair's original point: that this policy should be implemented in the >> userspace daemon. >> > _Actually_ there is a way how this could be implemented properly: > implement a blk_timeout function. > > Thing is, every request_queue might have a timeout function > implemented, whose goal is to abort requests which are beyond that > timeout. EG SCSI uses that for the dev_loss_tmo mechanism. > > Multipath what with it being request-based could easily implement > the same mechanism, namely have to blk_timeout function which would > just re-arm the timeout in the default case, but abort any queued > I/O (after a timeout) if all paths are down. > > Hmm. I see to draft up a PoC. > And indeed, here it is. Completely untested, just to give you an idea what I was going on about. Let's see if I can put this to test somewhere... Cheers, Hannes -- Dr. Hannes Reinecke zSeries & Storage hare@xxxxxxx +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 5adede1..6801ac3 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -444,6 +444,61 @@ static int queue_if_no_path(struct multipath *m, unsigned queue_if_no_path, return 0; } +/* + * Block timeout callback, called from the block layer + * + * request_queue lock is held on entry. + * + * Return values: + * BLK_EH_RESET_TIMER if the request should be left running + * BLK_EH_NOT_HANDLED if the request is handled or terminated + * by the driver. + */ +enum blk_eh_timer_return abort_if_no_path(struct request *req) +{ + union map_info *info; + struct dm_mpath_io *mpio; + struct multipath *m; + unsigned long flags; + int rc = BLK_EH_RESET_TIMER; + int flush_ios = 0; + + info = dm_get_rq_mapinfo(req); + if (!info || !info->ptr) + return BLK_EH_NOT_HANDLED; + + mpio = info->ptr; + m = mpio->pgpath->pg->m; + /* + * Only abort request if: + * - queued_ios is not empty + * (protect against races with process_queued_ios) + * - queue_io is not set + * - no valid paths are found + */ + spin_lock_irqsave(&m->lock, flags); + if (!list_empty(&m->queued_ios) && + !m->queue_io && + !m->nr_valid_paths) { + list_del_init(&req->queuelist); + m->queue_size--; + m->queue_if_no_path = 0; + if (m->queue_size) + flush_ios = 1; + rc = BLK_EH_NOT_HANDLED; + } + spin_unlock_irqrestore(&m->lock, flags); + + if (rc == BLK_EH_NOT_HANDLED) { + mempool_free(mpio, m->mpio_pool); + dm_kill_unmapped_request(clone, -ETIMEOUT); + } + if (flush_ios) + queue_work(kmultipathd, &m->process_queue_ios); + + return rc; +} + /*----------------------------------------------------------------- * The multipath daemon is responsible for resubmitting queued ios. *---------------------------------------------------------------*/ @@ -790,6 +845,7 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) {0, 6, "invalid number of feature args"}, {1, 50, "pg_init_retries must be between 1 and 50"}, {0, 60000, "pg_init_delay_msecs must be between 0 and 60000"}, + {0, 65535, "no_path_timeout must be between 0 and 65535"}, }; r = dm_read_arg_group(_args, as, &argc, &ti->error); @@ -827,6 +883,13 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) continue; } + if (!strcasecmp(arg_name, "no_path_timeout") && + (argc >= 1)) { + r = dm_read_arg(_args + 3, as, &m->no_path_timeout, &ti->error); + argc--; + continue; + } + ti->error = "Unrecognised multipath feature request"; r = -EINVAL; } while (argc && !r); @@ -905,6 +968,12 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, goto bad; } + if (m->no_path_timeout) + dm_set_queue_timeout(ti, abort_if_no_path, + m->no_path_timeout * HZ); + else + dm_set_queue_timeout(ti, NULL, 0) + ti->num_flush_bios = 1; ti->num_discard_bios = 1; ti->num_write_same_bios = 1; diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9e39d2b..26bfad6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1881,6 +1881,16 @@ static void dm_init_md_queue(struct mapped_device *md) blk_queue_merge_bvec(md->queue, dm_merge_bvec); } +static void dm_set_md_timeout(struct mapped_device *md, + rq_timed_out_fn *fn, unsigned int timeout) +{ + if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED) + return; + + blk_queue_rq_timed_out(md->queue, fn); + blk_queue_rq_timeout(md->queue, timeout); +} + /* * Allocate and initialise a blank device with a given minor. */ @@ -2790,6 +2800,13 @@ int dm_noflush_suspending(struct dm_target *ti) } EXPORT_SYMBOL_GPL(dm_noflush_suspending); +int dm_set_queue_timeout(struct dm_target *ti, rq_timed_out_fn *fn, + unsigned int timeout) +{ + return dm_set_md_timeout(dm_table_get_md(ti->table), fn, timeout); +} +EXPORT_SYMBOL_GPL(dm_set_queue_timeout); + struct dm_md_mempools *dm_alloc_md_mempools(unsigned type, unsigned integrity, unsigned per_bio_data_size) { struct dm_md_mempools *pools = kzalloc(sizeof(*pools), GFP_KERNEL); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 45b97da..c8df1ef 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -80,6 +80,8 @@ void dm_unlock_md_type(struct mapped_device *md); void dm_set_md_type(struct mapped_device *md, unsigned type); unsigned dm_get_md_type(struct mapped_device *md); struct target_type *dm_get_immutable_target_type(struct mapped_device *md); +void dm_set_queue_timeout(struct dm_table *t, rq_timed_out_fn *fn, + unsigned int timeout); int dm_setup_md_queue(struct mapped_device *md);
-- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel