I have seen this race condition earlier on my system. Problem is resolved after testing with this patch. Thanks. --Babu Moger > -----Original Message----- > From: Chandra Seetharaman [mailto:sekharan@xxxxxxxxxx] > Sent: Friday, June 05, 2009 7:05 PM > To: device-mapper development > Cc: Alasdair G Kergon; Moger, Babu > Subject: Re: [PATCH] Handle multipath paths in a path group > properly during pg_init > > Found a race condition where pg_init_in_progress being incremented when > queue_work() returns without queuing the work. > > Changed the code to increment pg_init_in_progress only when queue_work > returns success. > > Tested in 2.6.30-rc8 > > --------------------------------------------------- > From: Chandra Seetharaman <sekharan@xxxxxxxxxx> > > Fixed a problem affecting reinstatement of passive paths and another > problem with io lockup during device offline/online. > > Before we moved the hardware handler from dm to SCSI, it performed a > pg_init > for a path group and didn't maintain any state about each path in hardware > handler code. > > But in SCSI dh, such state is now maintained, as we want to fail I/O early > on a > path if it is not the active path. > > All the hardware handlers have a state now and set to active or some form > of > inactive. They have prep_fn() which uses this state to fail the I/O > without > it ever being sent to the device. > > So in effect when dm-multipath calls scsi_dh_activate(), activate is > sent to only one path and the "state" of that path is changed > appropriately > to "active" while other paths in the same path group are never changed > as they never got an "activate". > > In order make sure all the paths in a path group gets their state set > properly when a pg_init happens, we need to call scsi_dh_activate() on > all paths in a path group. > > Doing this at the hardware handler layer is not a good option as we > want the multipath layer to define the relationship between path and path > groups and not the hardware handler. > > Attached patch sends an "activate" on each path in a path group when a > path group is switched. It also sends an activate when a path is > reinstated. > > Signed-off-by: Chandra Seetharaman <sekharan@xxxxxxxxxx> > Signed-off-by: Alasdair G Kergon <agk@xxxxxxxxxx> > > --- > drivers/md/dm-mpath.c | 63 +++++++++++++++++++++----------------------- > ------ > 1 file changed, 26 insertions(+), 37 deletions(-) > > Index: linux-2.6.29/drivers/md/dm-mpath.c > =================================================================== > --- linux-2.6.29.orig/drivers/md/dm-mpath.c > +++ linux-2.6.29/drivers/md/dm-mpath.c > @@ -35,6 +35,7 @@ struct pgpath { > > struct dm_path path; > struct work_struct deactivate_path; > + struct work_struct activate_path; > }; > > #define path_to_pgpath(__pgp) container_of((__pgp), struct pgpath, path) > @@ -64,8 +65,6 @@ struct multipath { > spinlock_t lock; > > const char *hw_handler_name; > - struct work_struct activate_path; > - struct pgpath *pgpath_to_activate; > unsigned nr_priority_groups; > struct list_head priority_groups; > unsigned pg_init_required; /* pg_init needs calling? */ > @@ -128,6 +127,7 @@ static struct pgpath *alloc_pgpath(void) > if (pgpath) { > pgpath->is_active = 1; > INIT_WORK(&pgpath->deactivate_path, deactivate_path); > + INIT_WORK(&pgpath->activate_path, activate_path); > } > > return pgpath; > @@ -160,7 +160,6 @@ static struct priority_group *alloc_prio > > static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) > { > - unsigned long flags; > struct pgpath *pgpath, *tmp; > struct multipath *m = ti->private; > > @@ -169,10 +168,6 @@ static void free_pgpaths(struct list_hea > if (m->hw_handler_name) > scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); > dm_put_device(ti, pgpath->path.dev); > - spin_lock_irqsave(&m->lock, flags); > - if (m->pgpath_to_activate == pgpath) > - m->pgpath_to_activate = NULL; > - spin_unlock_irqrestore(&m->lock, flags); > free_pgpath(pgpath); > } > } > @@ -202,7 +197,6 @@ static struct multipath *alloc_multipath > m->queue_io = 1; > INIT_WORK(&m->process_queued_ios, process_queued_ios); > INIT_WORK(&m->trigger_event, trigger_event); > - INIT_WORK(&m->activate_path, activate_path); > m->mpio_pool = mempool_create_slab_pool(MIN_IOS, _mpio_cache); > if (!m->mpio_pool) { > kfree(m); > @@ -427,8 +421,8 @@ static void process_queued_ios(struct wo > { > struct multipath *m = > container_of(work, struct multipath, process_queued_ios); > - struct pgpath *pgpath = NULL; > - unsigned init_required = 0, must_queue = 1; > + struct pgpath *pgpath = NULL, *tmp; > + unsigned must_queue = 1; > unsigned long flags; > > spin_lock_irqsave(&m->lock, flags); > @@ -446,19 +440,15 @@ static void process_queued_ios(struct wo > must_queue = 0; > > if (m->pg_init_required && !m->pg_init_in_progress && pgpath) { > - m->pgpath_to_activate = pgpath; > m->pg_init_count++; > m->pg_init_required = 0; > - m->pg_init_in_progress = 1; > - init_required = 1; > + list_for_each_entry(tmp, &pgpath->pg->pgpaths, list) { > + if (queue_work(kmpath_handlerd, &tmp->activate_path)) > + m->pg_init_in_progress++; > + } > } > - > out: > spin_unlock_irqrestore(&m->lock, flags); > - > - if (init_required) > - queue_work(kmpath_handlerd, &m->activate_path); > - > if (!must_queue) > dispatch_queued_ios(m); > } > @@ -924,9 +914,13 @@ static int reinstate_path(struct pgpath > > pgpath->is_active = 1; > > - m->current_pgpath = NULL; > - if (!m->nr_valid_paths++ && m->queue_size) > + if (!m->nr_valid_paths++ && m->queue_size) { > + m->current_pgpath = NULL; > queue_work(kmultipathd, &m->process_queued_ios); > + } else if (m->hw_handler_name && (m->current_pg == pgpath->pg)) { > + if (queue_work(kmpath_handlerd, &pgpath->activate_path)) > + m->pg_init_in_progress++; > + } > > dm_path_uevent(DM_UEVENT_PATH_REINSTATED, m->ti, > pgpath->path.dev->name, m->nr_valid_paths); > @@ -1102,35 +1096,30 @@ static void pg_init_done(struct dm_path > > spin_lock_irqsave(&m->lock, flags); > if (errors) { > - DMERR("Could not failover device. Error %d.", errors); > - m->current_pgpath = NULL; > - m->current_pg = NULL; > + if (pgpath == m->current_pgpath) { > + DMERR("Could not failover device. Error %d.", errors); > + m->current_pgpath = NULL; > + m->current_pg = NULL; > + } > } else if (!m->pg_init_required) { > m->queue_io = 0; > pg->bypassed = 0; > } > > - m->pg_init_in_progress = 0; > - queue_work(kmultipathd, &m->process_queued_ios); > + m->pg_init_in_progress--; > + if (!m->pg_init_in_progress) > + queue_work(kmultipathd, &m->process_queued_ios); > spin_unlock_irqrestore(&m->lock, flags); > } > > static void activate_path(struct work_struct *work) > { > int ret; > - struct multipath *m = > - container_of(work, struct multipath, activate_path); > - struct dm_path *path; > - unsigned long flags; > + struct pgpath *pgpath = > + container_of(work, struct pgpath, activate_path); > > - spin_lock_irqsave(&m->lock, flags); > - path = &m->pgpath_to_activate->path; > - m->pgpath_to_activate = NULL; > - spin_unlock_irqrestore(&m->lock, flags); > - if (!path) > - return; > - ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev)); > - pg_init_done(path, ret); > + ret = scsi_dh_activate(bdev_get_queue(pgpath->path.dev->bdev)); > + pg_init_done(&pgpath->path, ret); > } > > /* > > > -- dm-devel mailing list dm-devel@xxxxxxxxxx https://www.redhat.com/mailman/listinfo/dm-devel