Re: [PATCH] fix for fc transport recursion problem.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



I'll give it a go and see if this fixes a possibly related issue I
was experiencing on my test system.

Thank you for working this up.

Mike


James.Smart@xxxxxxxxxx wrote:
> In the scenario that a link was broken, the devloss timer for each rport
> was expire at roughly the same time, causing lots of "delete" workqueue items
> being queued. Depth is dependent upon the number of rports that were on the link.
> 
> The rport target remove calls were calling flush_scheduled_work(), which would
> interrupt the stream, and start the next workqueue item, which did the same thing,
> and so on until recursion depth was large.
> 
> This fix stops the recursion in the initial delete path, and pushes it off to
> a host-level work item that reaps the dead rports.
> 
> James - this fix should be added to the final list of 2.6.15-rc5 patches you just posted.
> 
> -- james s
> 
> 
> 
> diff -upNr a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
> --- a/drivers/scsi/scsi_transport_fc.c	2005-10-17 12:24:01.000000000 -0400
> +++ b/drivers/scsi/scsi_transport_fc.c	2005-12-15 09:45:42.000000000 -0500
> @@ -106,6 +106,7 @@ static struct {
>  	{ FC_PORTSTATE_LINKDOWN,	"Linkdown" },
>  	{ FC_PORTSTATE_ERROR,		"Error" },
>  	{ FC_PORTSTATE_LOOPBACK,	"Loopback" },
> +	{ FC_PORTSTATE_DELETED,		"Deleted" },
>  };
>  fc_enum_name_search(port_state, fc_port_state, fc_port_state_names)
>  #define FC_PORTSTATE_MAX_NAMELEN	20
> @@ -212,6 +213,7 @@ fc_bitfield_name_search(remote_port_role
>  #define FC_MGMTSRVR_PORTID		0x00000a
>  
>  
> +static void fc_shost_remove_rports(void  *data);
>  static void fc_timeout_deleted_rport(void *data);
>  static void fc_scsi_scan_rport(void *data);
>  static void fc_rport_terminate(struct fc_rport  *rport);
> @@ -319,6 +321,8 @@ static int fc_host_setup(struct transpor
>  	fc_host_next_rport_number(shost) = 0;
>  	fc_host_next_target_id(shost) = 0;
>  
> +	fc_host_flags(shost) = 0;
> +	INIT_WORK(&fc_host_rport_del_work(shost), fc_shost_remove_rports, shost);
>  	return 0;
>  }
>  
> @@ -388,6 +392,7 @@ show_fc_rport_##field (struct class_devi
>  	struct fc_internal *i = to_fc_internal(shost->transportt);	\
>  	if ((i->f->get_rport_##field) &&				\
>  	    !((rport->port_state == FC_PORTSTATE_BLOCKED) ||		\
> +	      (rport->port_state == FC_PORTSTATE_DELETED) ||		\
>  	      (rport->port_state == FC_PORTSTATE_NOTPRESENT)))		\
>  		i->f->get_rport_##field(rport);				\
>  	return snprintf(buf, sz, format_string, cast rport->field); 	\
> @@ -403,6 +408,7 @@ store_fc_rport_##field(struct class_devi
>  	struct Scsi_Host *shost = rport_to_shost(rport);		\
>  	struct fc_internal *i = to_fc_internal(shost->transportt);	\
>  	if ((rport->port_state == FC_PORTSTATE_BLOCKED) ||		\
> +	    (rport->port_state == FC_PORTSTATE_DELETED) ||		\
>  	    (rport->port_state == FC_PORTSTATE_NOTPRESENT))		\
>  		return -EBUSY;						\
>  	val = simple_strtoul(buf, NULL, 0);				\
> @@ -520,6 +526,7 @@ store_fc_rport_dev_loss_tmo(struct class
>  	struct Scsi_Host *shost = rport_to_shost(rport);
>  	struct fc_internal *i = to_fc_internal(shost->transportt);
>  	if ((rport->port_state == FC_PORTSTATE_BLOCKED) ||
> +	    (rport->port_state == FC_PORTSTATE_DELETED) ||
>  	    (rport->port_state == FC_PORTSTATE_NOTPRESENT))
>  		return -EBUSY;
>  	val = simple_strtoul(buf, NULL, 0);
> @@ -1739,7 +1746,7 @@ fc_timeout_deleted_rport(void  *data)
>  	rport->maxframe_size = -1;
>  	rport->supported_classes = FC_COS_UNSPECIFIED;
>  	rport->roles = FC_RPORT_ROLE_UNKNOWN;
> -	rport->port_state = FC_PORTSTATE_NOTPRESENT;
> +	rport->port_state = FC_PORTSTATE_DELETED;
>  
>  	/* remove the identifiers that aren't used in the consisting binding */
>  	switch (fc_host_tgtid_bind_type(shost)) {
> @@ -1759,14 +1766,23 @@ fc_timeout_deleted_rport(void  *data)
>  		break;
>  	}
>  
> -	spin_unlock_irqrestore(shost->host_lock, flags);
> -
>  	/*
>  	 * As this only occurs if the remote port (scsi target)
>  	 * went away and didn't come back - we'll remove
>  	 * all attached scsi devices.
> +	 *
> +	 * We'll schedule the shost work item to perform the actual removal
> +	 * to avoid recursion in the different flush calls if we perform
> +	 * the removal in each target - and there are lots of targets
> +	 * whose timeouts fire at the same time.
>  	 */
> -	fc_rport_tgt_remove(rport);
> +
> +	if ( !(fc_host_flags(shost) & FC_SHOST_RPORT_DEL_SCHEDULED)) {
> +		fc_host_flags(shost) |= FC_SHOST_RPORT_DEL_SCHEDULED;
> +		scsi_queue_work(shost, &fc_host_rport_del_work(shost));
> +	}
> +
> +	spin_unlock_irqrestore(shost->host_lock, flags);
>  }
>  
>  /**
> @@ -1788,6 +1804,41 @@ fc_scsi_scan_rport(void *data)
>  }
>  
>  
> +/**
> + * fc_shost_remove_rports - called to remove all rports that are marked
> + *                       as in a deleted (not connected) state.
> + * 
> + * @data:	shost whose rports are to be looked at
> + **/
> +static void
> +fc_shost_remove_rports(void  *data)
> +{
> +	struct Scsi_Host *shost = (struct Scsi_Host *)data;
> +	struct fc_rport *rport, *next_rport;
> +	unsigned long flags;
> +
> +	spin_lock_irqsave(shost->host_lock, flags);
> +	while (fc_host_flags(shost) & FC_SHOST_RPORT_DEL_SCHEDULED) {
> +
> +		fc_host_flags(shost) &= ~FC_SHOST_RPORT_DEL_SCHEDULED;
> +
> +restart_search:
> +		list_for_each_entry_safe(rport, next_rport,
> +				&fc_host_rport_bindings(shost), peers) {
> +			if (rport->port_state == FC_PORTSTATE_DELETED) {
> +				rport->port_state = FC_PORTSTATE_NOTPRESENT;
> +				spin_unlock_irqrestore(shost->host_lock, flags);
> +				fc_rport_tgt_remove(rport);
> +				spin_lock_irqsave(shost->host_lock, flags);
> +				goto restart_search;
> +			}
> +		}
> +
> +	}
> +	spin_unlock_irqrestore(shost->host_lock, flags);
> +}
> +
> +
>  MODULE_AUTHOR("Martin Hicks");
>  MODULE_DESCRIPTION("FC Transport Attributes");
>  MODULE_LICENSE("GPL");
> diff -upNr a/include/scsi/scsi_transport_fc.h b/include/scsi/scsi_transport_fc.h
> --- a/include/scsi/scsi_transport_fc.h	2005-10-17 10:03:47.000000000 -0400
> +++ b/include/scsi/scsi_transport_fc.h	2005-12-07 09:43:30.000000000 -0500
> @@ -78,6 +78,7 @@ enum fc_port_state {
>  	FC_PORTSTATE_LINKDOWN,
>  	FC_PORTSTATE_ERROR,
>  	FC_PORTSTATE_LOOPBACK,
> +	FC_PORTSTATE_DELETED,
>  };
>  
>  
> @@ -324,8 +325,14 @@ struct fc_host_attrs {
>  	struct list_head rport_bindings;
>  	u32 next_rport_number;
>  	u32 next_target_id;
> +	u8 flags;
> + 	struct work_struct rport_del_work;
>  };
>  
> +/* values for struct fc_host_attrs "flags" field: */
> +#define FC_SHOST_RPORT_DEL_SCHEDULED	0x01
> +
> +
>  #define fc_host_node_name(x) \
>  	(((struct fc_host_attrs *)(x)->shost_data)->node_name)
>  #define fc_host_port_name(x)	\
> @@ -364,6 +371,10 @@ struct fc_host_attrs {
>  	(((struct fc_host_attrs *)(x)->shost_data)->next_rport_number)
>  #define fc_host_next_target_id(x) \
>  	(((struct fc_host_attrs *)(x)->shost_data)->next_target_id)
> +#define fc_host_flags(x) \
> +	(((struct fc_host_attrs *)(x)->shost_data)->flags)
> +#define fc_host_rport_del_work(x) \
> +	(((struct fc_host_attrs *)(x)->shost_data)->rport_del_work)
>  
>  
>  /* The functions by which the transport class and the driver communicate */
> -
> : send the line "unsubscribe linux-scsi" in
> the body of a message to majordomo@xxxxxxxxxxxxxxx
> More majordomo info at  http://vger.kernel.org/majordomo-info.html
> 
> 
-
: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]
  Powered by Linux