Michael Reed wrote: > I'll give it a go and see if this fixes a possibly related issue I > was experiencing on my test system. > > Thank you for working this up. Well, this saves me some debugging. With your patch applied I no longer experience a console hang if more than about 8 targets disappear at one. THANK YOU. Mike > > Mike > > > James.Smart@xxxxxxxxxx wrote: >> In the scenario that a link was broken, the devloss timer for each rport >> was expire at roughly the same time, causing lots of "delete" workqueue items >> being queued. Depth is dependent upon the number of rports that were on the link. >> >> The rport target remove calls were calling flush_scheduled_work(), which would >> interrupt the stream, and start the next workqueue item, which did the same thing, >> and so on until recursion depth was large. >> >> This fix stops the recursion in the initial delete path, and pushes it off to >> a host-level work item that reaps the dead rports. >> >> James - this fix should be added to the final list of 2.6.15-rc5 patches you just posted. >> >> -- james s >> >> >> >> diff -upNr a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c >> --- a/drivers/scsi/scsi_transport_fc.c 2005-10-17 12:24:01.000000000 -0400 >> +++ b/drivers/scsi/scsi_transport_fc.c 2005-12-15 09:45:42.000000000 -0500 >> @@ -106,6 +106,7 @@ static struct { >> { FC_PORTSTATE_LINKDOWN, "Linkdown" }, >> { FC_PORTSTATE_ERROR, "Error" }, >> { FC_PORTSTATE_LOOPBACK, "Loopback" }, >> + { FC_PORTSTATE_DELETED, "Deleted" }, >> }; >> fc_enum_name_search(port_state, fc_port_state, fc_port_state_names) >> #define FC_PORTSTATE_MAX_NAMELEN 20 >> @@ -212,6 +213,7 @@ fc_bitfield_name_search(remote_port_role >> #define FC_MGMTSRVR_PORTID 0x00000a >> >> >> +static void fc_shost_remove_rports(void *data); >> static void fc_timeout_deleted_rport(void *data); >> static void fc_scsi_scan_rport(void *data); >> static void fc_rport_terminate(struct fc_rport *rport); >> @@ -319,6 +321,8 @@ static int fc_host_setup(struct transpor >> fc_host_next_rport_number(shost) = 0; >> fc_host_next_target_id(shost) = 0; >> >> + fc_host_flags(shost) = 0; >> + INIT_WORK(&fc_host_rport_del_work(shost), fc_shost_remove_rports, shost); >> return 0; >> } >> >> @@ -388,6 +392,7 @@ show_fc_rport_##field (struct class_devi >> struct fc_internal *i = to_fc_internal(shost->transportt); \ >> if ((i->f->get_rport_##field) && \ >> !((rport->port_state == FC_PORTSTATE_BLOCKED) || \ >> + (rport->port_state == FC_PORTSTATE_DELETED) || \ >> (rport->port_state == FC_PORTSTATE_NOTPRESENT))) \ >> i->f->get_rport_##field(rport); \ >> return snprintf(buf, sz, format_string, cast rport->field); \ >> @@ -403,6 +408,7 @@ store_fc_rport_##field(struct class_devi >> struct Scsi_Host *shost = rport_to_shost(rport); \ >> struct fc_internal *i = to_fc_internal(shost->transportt); \ >> if ((rport->port_state == FC_PORTSTATE_BLOCKED) || \ >> + (rport->port_state == FC_PORTSTATE_DELETED) || \ >> (rport->port_state == FC_PORTSTATE_NOTPRESENT)) \ >> return -EBUSY; \ >> val = simple_strtoul(buf, NULL, 0); \ >> @@ -520,6 +526,7 @@ store_fc_rport_dev_loss_tmo(struct class >> struct Scsi_Host *shost = rport_to_shost(rport); >> struct fc_internal *i = to_fc_internal(shost->transportt); >> if ((rport->port_state == FC_PORTSTATE_BLOCKED) || >> + (rport->port_state == FC_PORTSTATE_DELETED) || >> (rport->port_state == FC_PORTSTATE_NOTPRESENT)) >> return -EBUSY; >> val = simple_strtoul(buf, NULL, 0); >> @@ -1739,7 +1746,7 @@ fc_timeout_deleted_rport(void *data) >> rport->maxframe_size = -1; >> rport->supported_classes = FC_COS_UNSPECIFIED; >> rport->roles = FC_RPORT_ROLE_UNKNOWN; >> - rport->port_state = FC_PORTSTATE_NOTPRESENT; >> + rport->port_state = FC_PORTSTATE_DELETED; >> >> /* remove the identifiers that aren't used in the consisting binding */ >> switch (fc_host_tgtid_bind_type(shost)) { >> @@ -1759,14 +1766,23 @@ fc_timeout_deleted_rport(void *data) >> break; >> } >> >> - spin_unlock_irqrestore(shost->host_lock, flags); >> - >> /* >> * As this only occurs if the remote port (scsi target) >> * went away and didn't come back - we'll remove >> * all attached scsi devices. >> + * >> + * We'll schedule the shost work item to perform the actual removal >> + * to avoid recursion in the different flush calls if we perform >> + * the removal in each target - and there are lots of targets >> + * whose timeouts fire at the same time. >> */ >> - fc_rport_tgt_remove(rport); >> + >> + if ( !(fc_host_flags(shost) & FC_SHOST_RPORT_DEL_SCHEDULED)) { >> + fc_host_flags(shost) |= FC_SHOST_RPORT_DEL_SCHEDULED; >> + scsi_queue_work(shost, &fc_host_rport_del_work(shost)); >> + } >> + >> + spin_unlock_irqrestore(shost->host_lock, flags); >> } >> >> /** >> @@ -1788,6 +1804,41 @@ fc_scsi_scan_rport(void *data) >> } >> >> >> +/** >> + * fc_shost_remove_rports - called to remove all rports that are marked >> + * as in a deleted (not connected) state. >> + * >> + * @data: shost whose rports are to be looked at >> + **/ >> +static void >> +fc_shost_remove_rports(void *data) >> +{ >> + struct Scsi_Host *shost = (struct Scsi_Host *)data; >> + struct fc_rport *rport, *next_rport; >> + unsigned long flags; >> + >> + spin_lock_irqsave(shost->host_lock, flags); >> + while (fc_host_flags(shost) & FC_SHOST_RPORT_DEL_SCHEDULED) { >> + >> + fc_host_flags(shost) &= ~FC_SHOST_RPORT_DEL_SCHEDULED; >> + >> +restart_search: >> + list_for_each_entry_safe(rport, next_rport, >> + &fc_host_rport_bindings(shost), peers) { >> + if (rport->port_state == FC_PORTSTATE_DELETED) { >> + rport->port_state = FC_PORTSTATE_NOTPRESENT; >> + spin_unlock_irqrestore(shost->host_lock, flags); >> + fc_rport_tgt_remove(rport); >> + spin_lock_irqsave(shost->host_lock, flags); >> + goto restart_search; >> + } >> + } >> + >> + } >> + spin_unlock_irqrestore(shost->host_lock, flags); >> +} >> + >> + >> MODULE_AUTHOR("Martin Hicks"); >> MODULE_DESCRIPTION("FC Transport Attributes"); >> MODULE_LICENSE("GPL"); >> diff -upNr a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h >> --- a/include/scsi/scsi_transport_fc.h 2005-10-17 10:03:47.000000000 -0400 >> +++ b/include/scsi/scsi_transport_fc.h 2005-12-07 09:43:30.000000000 -0500 >> @@ -78,6 +78,7 @@ enum fc_port_state { >> FC_PORTSTATE_LINKDOWN, >> FC_PORTSTATE_ERROR, >> FC_PORTSTATE_LOOPBACK, >> + FC_PORTSTATE_DELETED, >> }; >> >> >> @@ -324,8 +325,14 @@ struct fc_host_attrs { >> struct list_head rport_bindings; >> u32 next_rport_number; >> u32 next_target_id; >> + u8 flags; >> + struct work_struct rport_del_work; >> }; >> >> +/* values for struct fc_host_attrs "flags" field: */ >> +#define FC_SHOST_RPORT_DEL_SCHEDULED 0x01 >> + >> + >> #define fc_host_node_name(x) \ >> (((struct fc_host_attrs *)(x)->shost_data)->node_name) >> #define fc_host_port_name(x) \ >> @@ -364,6 +371,10 @@ struct fc_host_attrs { >> (((struct fc_host_attrs *)(x)->shost_data)->next_rport_number) >> #define fc_host_next_target_id(x) \ >> (((struct fc_host_attrs *)(x)->shost_data)->next_target_id) >> +#define fc_host_flags(x) \ >> + (((struct fc_host_attrs *)(x)->shost_data)->flags) >> +#define fc_host_rport_del_work(x) \ >> + (((struct fc_host_attrs *)(x)->shost_data)->rport_del_work) >> >> >> /* The functions by which the transport class and the driver communicate */ >> - >> : send the line "unsubscribe linux-scsi" in >> the body of a message to majordomo@xxxxxxxxxxxxxxx >> More majordomo info at http://vger.kernel.org/majordomo-info.html >> >> > - > : send the line "unsubscribe linux-scsi" in > the body of a message to majordomo@xxxxxxxxxxxxxxx > More majordomo info at http://vger.kernel.org/majordomo-info.html > > - : send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html