From: Mike Christie <michaelc@xxxxxxxxxxx> fc_remote_port_add calls fc_flush_work before matching and readding a rport, but there is nothing that stops the class from queueing a deletion right after the fc_flush_work call and before we grab the host_lock. To fix this this adds a cancel_work_sync call for the stgt_delete_work when we have the rport partially resetup. There is also a problem where fc_timeout_deleted_rport can begin the rport timeout handling and move the rport to the rport_bindings list. Then when fc_timeout_deleted_rport drops the host_lock to call fc_terminate_rport_io or dev_loss_tmo_callbk, fc_remote_port_add will grab the lock, see the rport is on the rport_bindings list and then add the rport back while fc_timeout_deleted_rport is still running fc_terminate_rport_io or dev_loss_tmo_callbk. This patch also fixes this by adding cancel_delayed_work_sync calls for the dev_loss_work work in this code path. Note: this patch replaces the cancel_delayed_work/fc_flush_devloss calls with cancel_delayed_work_sync. I think this should be more efficient, because we do not have to wait for all work to complete (just have to wait for the work and anything queued before it). Signed-off-by: Mike Christie <michaelc@xxxxxxxxxxx> --- drivers/scsi/scsi_transport_fc.c | 66 ++++++++++++++++++-------------------- 1 files changed, 31 insertions(+), 35 deletions(-) diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c index 78486d5..d43f69a 100644 --- a/drivers/scsi/scsi_transport_fc.c +++ b/drivers/scsi/scsi_transport_fc.c @@ -2300,25 +2300,6 @@ fc_queue_devloss_work(struct Scsi_Host *shost, struct delayed_work *work, } /** - * fc_flush_devloss - Flush a fc_host's devloss workqueue. - * @shost: Pointer to Scsi_Host bound to fc_host. - */ -static void -fc_flush_devloss(struct Scsi_Host *shost) -{ - if (!fc_host_devloss_work_q(shost)) { - printk(KERN_ERR - "ERROR: FC host '%s' attempted to flush work, " - "when no workqueue created.\n", shost->hostt->name); - dump_stack(); - return; - } - - flush_workqueue(fc_host_devloss_work_q(shost)); -} - - -/** * fc_remove_host - called to terminate any fc_transport-related elements for a scsi host. * @shost: Which &Scsi_Host * @@ -2417,6 +2398,19 @@ fc_starget_delete(struct work_struct *work) scsi_remove_target(&rport->dev); } +/** + * fc_cancel_remote_port_delete - cancel rport deletion related work + * @rport: rport to sync up + * + * This does not cancel rport_delete_work, because if that is queued + * the rport will have been destroyed when the sync completes. + */ +static void fc_cancel_remote_port_delete(struct fc_rport *rport) +{ + cancel_delayed_work_sync(&rport->fail_io_work); + cancel_delayed_work_sync(&rport->dev_loss_work); + cancel_work_sync(&rport->stgt_delete_work); +} /** * fc_rport_final_delete - finish rport termination and delete it. @@ -2450,10 +2444,9 @@ fc_rport_final_delete(struct work_struct *work) spin_lock_irqsave(shost->host_lock, flags); if (rport->flags & FC_RPORT_DEVLOSS_PENDING) { spin_unlock_irqrestore(shost->host_lock, flags); - if (!cancel_delayed_work(&rport->fail_io_work)) - fc_flush_devloss(shost); - if (!cancel_delayed_work(&rport->dev_loss_work)) - fc_flush_devloss(shost); + + fc_cancel_remote_port_delete(rport); + spin_lock_irqsave(shost->host_lock, flags); rport->flags &= ~FC_RPORT_DEVLOSS_PENDING; } @@ -2718,10 +2711,7 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel, * If they flush, the port_state will * be checked and will NOOP the function. */ - if (!cancel_delayed_work(&rport->fail_io_work)) - fc_flush_devloss(shost); - if (!cancel_delayed_work(&rport->dev_loss_work)) - fc_flush_devloss(shost); + fc_cancel_remote_port_delete(rport); spin_lock_irqsave(shost->host_lock, flags); @@ -2785,13 +2775,25 @@ fc_remote_port_add(struct Scsi_Host *shost, int channel, } if (match) { + rport->port_state = FC_PORTSTATE_ONLINE; + spin_unlock_irqrestore(shost->host_lock, flags); + /* + * A stgt delete could be queued after the + * flush call, or a fc_timeout_deleted_rport + * could be in a fc_terminate_rport_io or + * dev_loss_tmo_callbk call so we must + * cancel all work for the rport before + * proceeding. + */ + fc_cancel_remote_port_delete(rport); + + spin_lock_irqsave(shost->host_lock, flags); memcpy(&rport->node_name, &ids->node_name, sizeof(rport->node_name)); memcpy(&rport->port_name, &ids->port_name, sizeof(rport->port_name)); rport->port_id = ids->port_id; rport->roles = ids->roles; - rport->port_state = FC_PORTSTATE_ONLINE; rport->flags &= ~FC_RPORT_FAST_FAIL_TIMEDOUT; if (fci->f->dd_fcrport_size) @@ -2990,19 +2992,13 @@ fc_remote_port_rolechg(struct fc_rport *rport, u32 roles) * machine state change will validate the * transaction. */ - if (!cancel_delayed_work(&rport->fail_io_work)) - fc_flush_devloss(shost); - if (!cancel_delayed_work(&rport->dev_loss_work)) - fc_flush_devloss(shost); + fc_cancel_remote_port_delete(rport); spin_lock_irqsave(shost->host_lock, flags); rport->flags &= ~(FC_RPORT_FAST_FAIL_TIMEDOUT | FC_RPORT_DEVLOSS_PENDING); spin_unlock_irqrestore(shost->host_lock, flags); - /* ensure any stgt delete functions are done */ - fc_flush_work(shost); - /* initiate a scan of the target */ spin_lock_irqsave(shost->host_lock, flags); rport->flags |= FC_RPORT_SCAN_PENDING; -- 1.7.2.2 -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html