I'll give it a go and see if this fixes a possibly related issue I was experiencing on my test system. Thank you for working this up. Mike James.Smart@xxxxxxxxxx wrote: > In the scenario that a link was broken, the devloss timer for each rport > was expire at roughly the same time, causing lots of "delete" workqueue items > being queued. Depth is dependent upon the number of rports that were on the link. > > The rport target remove calls were calling flush_scheduled_work(), which would > interrupt the stream, and start the next workqueue item, which did the same thing, > and so on until recursion depth was large. > > This fix stops the recursion in the initial delete path, and pushes it off to > a host-level work item that reaps the dead rports. > > James - this fix should be added to the final list of 2.6.15-rc5 patches you just posted. > > -- james s > > > > diff -upNr a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c > --- a/drivers/scsi/scsi_transport_fc.c 2005-10-17 12:24:01.000000000 -0400 > +++ b/drivers/scsi/scsi_transport_fc.c 2005-12-15 09:45:42.000000000 -0500 > @@ -106,6 +106,7 @@ static struct { > { FC_PORTSTATE_LINKDOWN, "Linkdown" }, > { FC_PORTSTATE_ERROR, "Error" }, > { FC_PORTSTATE_LOOPBACK, "Loopback" }, > + { FC_PORTSTATE_DELETED, "Deleted" }, > }; > fc_enum_name_search(port_state, fc_port_state, fc_port_state_names) > #define FC_PORTSTATE_MAX_NAMELEN 20 > @@ -212,6 +213,7 @@ fc_bitfield_name_search(remote_port_role > #define FC_MGMTSRVR_PORTID 0x00000a > > > +static void fc_shost_remove_rports(void *data); > static void fc_timeout_deleted_rport(void *data); > static void fc_scsi_scan_rport(void *data); > static void fc_rport_terminate(struct fc_rport *rport); > @@ -319,6 +321,8 @@ static int fc_host_setup(struct transpor > fc_host_next_rport_number(shost) = 0; > fc_host_next_target_id(shost) = 0; > > + fc_host_flags(shost) = 0; > + INIT_WORK(&fc_host_rport_del_work(shost), fc_shost_remove_rports, shost); > return 0; > } > > @@ -388,6 +392,7 @@ show_fc_rport_##field (struct class_devi > struct fc_internal *i = to_fc_internal(shost->transportt); \ > if ((i->f->get_rport_##field) && \ > !((rport->port_state == FC_PORTSTATE_BLOCKED) || \ > + (rport->port_state == FC_PORTSTATE_DELETED) || \ > (rport->port_state == FC_PORTSTATE_NOTPRESENT))) \ > i->f->get_rport_##field(rport); \ > return snprintf(buf, sz, format_string, cast rport->field); \ > @@ -403,6 +408,7 @@ store_fc_rport_##field(struct class_devi > struct Scsi_Host *shost = rport_to_shost(rport); \ > struct fc_internal *i = to_fc_internal(shost->transportt); \ > if ((rport->port_state == FC_PORTSTATE_BLOCKED) || \ > + (rport->port_state == FC_PORTSTATE_DELETED) || \ > (rport->port_state == FC_PORTSTATE_NOTPRESENT)) \ > return -EBUSY; \ > val = simple_strtoul(buf, NULL, 0); \ > @@ -520,6 +526,7 @@ store_fc_rport_dev_loss_tmo(struct class > struct Scsi_Host *shost = rport_to_shost(rport); > struct fc_internal *i = to_fc_internal(shost->transportt); > if ((rport->port_state == FC_PORTSTATE_BLOCKED) || > + (rport->port_state == FC_PORTSTATE_DELETED) || > (rport->port_state == FC_PORTSTATE_NOTPRESENT)) > return -EBUSY; > val = simple_strtoul(buf, NULL, 0); > @@ -1739,7 +1746,7 @@ fc_timeout_deleted_rport(void *data) > rport->maxframe_size = -1; > rport->supported_classes = FC_COS_UNSPECIFIED; > rport->roles = FC_RPORT_ROLE_UNKNOWN; > - rport->port_state = FC_PORTSTATE_NOTPRESENT; > + rport->port_state = FC_PORTSTATE_DELETED; > > /* remove the identifiers that aren't used in the consisting binding */ > switch (fc_host_tgtid_bind_type(shost)) { > @@ -1759,14 +1766,23 @@ fc_timeout_deleted_rport(void *data) > break; > } > > - spin_unlock_irqrestore(shost->host_lock, flags); > - > /* > * As this only occurs if the remote port (scsi target) > * went away and didn't come back - we'll remove > * all attached scsi devices. > + * > + * We'll schedule the shost work item to perform the actual removal > + * to avoid recursion in the different flush calls if we perform > + * the removal in each target - and there are lots of targets > + * whose timeouts fire at the same time. > */ > - fc_rport_tgt_remove(rport); > + > + if ( !(fc_host_flags(shost) & FC_SHOST_RPORT_DEL_SCHEDULED)) { > + fc_host_flags(shost) |= FC_SHOST_RPORT_DEL_SCHEDULED; > + scsi_queue_work(shost, &fc_host_rport_del_work(shost)); > + } > + > + spin_unlock_irqrestore(shost->host_lock, flags); > } > > /** > @@ -1788,6 +1804,41 @@ fc_scsi_scan_rport(void *data) > } > > > +/** > + * fc_shost_remove_rports - called to remove all rports that are marked > + * as in a deleted (not connected) state. > + * > + * @data: shost whose rports are to be looked at > + **/ > +static void > +fc_shost_remove_rports(void *data) > +{ > + struct Scsi_Host *shost = (struct Scsi_Host *)data; > + struct fc_rport *rport, *next_rport; > + unsigned long flags; > + > + spin_lock_irqsave(shost->host_lock, flags); > + while (fc_host_flags(shost) & FC_SHOST_RPORT_DEL_SCHEDULED) { > + > + fc_host_flags(shost) &= ~FC_SHOST_RPORT_DEL_SCHEDULED; > + > +restart_search: > + list_for_each_entry_safe(rport, next_rport, > + &fc_host_rport_bindings(shost), peers) { > + if (rport->port_state == FC_PORTSTATE_DELETED) { > + rport->port_state = FC_PORTSTATE_NOTPRESENT; > + spin_unlock_irqrestore(shost->host_lock, flags); > + fc_rport_tgt_remove(rport); > + spin_lock_irqsave(shost->host_lock, flags); > + goto restart_search; > + } > + } > + > + } > + spin_unlock_irqrestore(shost->host_lock, flags); > +} > + > + > MODULE_AUTHOR("Martin Hicks"); > MODULE_DESCRIPTION("FC Transport Attributes"); > MODULE_LICENSE("GPL"); > diff -upNr a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h > --- a/include/scsi/scsi_transport_fc.h 2005-10-17 10:03:47.000000000 -0400 > +++ b/include/scsi/scsi_transport_fc.h 2005-12-07 09:43:30.000000000 -0500 > @@ -78,6 +78,7 @@ enum fc_port_state { > FC_PORTSTATE_LINKDOWN, > FC_PORTSTATE_ERROR, > FC_PORTSTATE_LOOPBACK, > + FC_PORTSTATE_DELETED, > }; > > > @@ -324,8 +325,14 @@ struct fc_host_attrs { > struct list_head rport_bindings; > u32 next_rport_number; > u32 next_target_id; > + u8 flags; > + struct work_struct rport_del_work; > }; > > +/* values for struct fc_host_attrs "flags" field: */ > +#define FC_SHOST_RPORT_DEL_SCHEDULED 0x01 > + > + > #define fc_host_node_name(x) \ > (((struct fc_host_attrs *)(x)->shost_data)->node_name) > #define fc_host_port_name(x) \ > @@ -364,6 +371,10 @@ struct fc_host_attrs { > (((struct fc_host_attrs *)(x)->shost_data)->next_rport_number) > #define fc_host_next_target_id(x) \ > (((struct fc_host_attrs *)(x)->shost_data)->next_target_id) > +#define fc_host_flags(x) \ > + (((struct fc_host_attrs *)(x)->shost_data)->flags) > +#define fc_host_rport_del_work(x) \ > + (((struct fc_host_attrs *)(x)->shost_data)->rport_del_work) > > > /* The functions by which the transport class and the driver communicate */ > - > : send the line "unsubscribe linux-scsi" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > > - : send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html