Re: [PATCH 2/5] fusion: vmware bug fix prevent inifinite retries

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Tue, 2007-01-09 at 12:55 -0800, Petr Vandrovec wrote:
> Adam Zimman wrote:
> > Adding VMware engineering... 
> > 
> > -----Original Message-----
> > From: Manon Goo [mailto:manon@xxxxxxxx] 
> > Sent: Tuesday, January 09, 2007 9:49 AM
> > To: Michael Reed; Moore, Eric; David Berghoff
> > Cc: James Bottomley; Adam Zimman; linux-scsi@xxxxxxxxxxxxxxx; Shirron, Stephen
> > Subject: Re: [PATCH 2/5] fusion: vmware bug fix prevent inifinite retries
> > 
> > Hmm .... why don't w make the whole thing configurable (david implemented this for us)
> 
> In that case I would prefer going on with automatic detection of our 
> implementation - see first part of attached mpt-patch.diff I offered 
> when this all started.  As passing options to modules loaded by initrd 
> is not exactly trivial and varies across distributions, I would prefer 
> this runtime detection...
> 				Thanks,
> 					Petr Vandrovec
> 

The attached (untested) patch shows a VMware and scsi transport agnostic
approach which introduces a new host status (DID_QUALIFIED_REQUEUE) to
be used by mptscsih.c (and other LLDs) instead of DID_BUS_BUSY.  A host
status of DID_QUALIFIED_REQUEUE will return ADD_TO_MLQUEUE from
scsi_decide_disposition IFF the REQ_FAILFAST bit is not set in the
cmd_flags field of the SCSI command's request structure.

The approach depends on both VMware Linux guests not setting
REQ_FAILFAST and non-VMware Linux hosts with an IBM RDAC/MPP multi-
pathing driver doing so.  This requirement is not a problem for VMware
since its guest operating systems have no need to configure block device
multi-pathing.  This requirement shouldn't be a problem for the IBM
RDAC/MPP driver either since it should already be setting the
REQ_FAILFAST attribute of I/Os for which it is providing multi-pathing,
similar to what the Linux dm-multipath driver already does.

Ed Goggin

> > 
> > +/*
> > + *  cmd line parameters
> > + */
> > +static int mpt_mpi_busy;
> > +module_param(mpt_mpi_busy, int, 0);
> > +MODULE_PARM_DESC(mpt_mpi_busy, " MPT MPI busy workaround for VMWare ESX 
> > (default=0)");
> > +
> >  /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
> > 
> >  typedef struct _BIG_SENSE_BUF {
> > @@ -704,10 +711,13 @@
> >                         sc->resid=0;
> >                 case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:        /* 0x0040 */
> >                 case MPI_IOCSTATUS_SUCCESS:                     /* 0x0000 */
> > -                       if (scsi_status == MPI_SCSI_STATUS_BUSY)
> > +                       if ((scsi_status == MPI_SCSI_STATUS_BUSY) && 
> > !mpt_mpi_busy)
> >                                 sc->result = (DID_BUS_BUSY << 16) | 
> > scsi_status;
> > -                       else
> > +                       else {
> > +                                if (mpt_mpi_busy)
> > +                                        printk(KERN_INFO "MPT MPI ESX busy 
> > hack enabled ... waiting\n");
> >                                 sc->result = (DID_OK << 16) | scsi_status;
> > +                        }
> >                         if (scsi_state == 0) {
> >                                 ;
> >                         } else if (scsi_state & 
> > MPI_SCSI_STATE_AUTOSENSE_VALID) {
> plain text document attachment (mpt-patch.diff)
> Patch to fix timeouts during multi-path failover with LSILogic adapter on RedHat 4.0 Update 3 & 4.
> Patch is needed for other distributions as well - for example current Linus's 2.6.19 kernel needs it.
> 
> 
> diff -urN linux-2.6.9.orig/drivers/message/fusion/mptbase.c linux-2.6.9/drivers/message/fusion/mptbase.c
> --- linux-2.6.9.orig/drivers/message/fusion/mptbase.c	2006-12-15 11:13:47.000000000 -0800
> +++ linux-2.6.9/drivers/message/fusion/mptbase.c	2006-12-15 11:43:01.000000000 -0800
> @@ -1435,12 +1435,21 @@
>  		ioc->bus_type = SPI;
>  		/* 1030 Chip Fix. Disable Split transactions
>  		 * for PCIX. Set MOST bits to zero if Rev < C0( = 8).
> +		 *
> +		 * Also detect VMware's LSILogic emulation - it does
> +		 * not have PCI-X capability at offset 0x68 (and does
> +		 * not need disabling split transactions although it
> +		 * reports itself as revision 1).
>  		 */
>  		pci_read_config_byte(pdev, PCI_CLASS_REVISION, &revision);
>  		if (revision < C0_1030) {
> -			pci_read_config_byte(pdev, 0x6a, &pcixcmd);
> -			pcixcmd &= 0x8F;
> -			pci_write_config_byte(pdev, 0x6a, pcixcmd);
> +			if (pci_find_capability(pdev, PCI_CAP_ID_PCIX) == 0x68) {
> +				pci_read_config_byte(pdev, 0x6a, &pcixcmd);
> +				pcixcmd &= 0x8F;
> +				pci_write_config_byte(pdev, 0x6a, pcixcmd);
> +			} else {
> +				ioc->isVMware = 1;
> +			}
>  		}
>  	}
>  	else if (pdev->device == MPI_MANUFACTPAGE_DEVID_1030_53C1035) {
> diff -urN linux-2.6.9.orig/drivers/message/fusion/mptbase.h linux-2.6.9/drivers/message/fusion/mptbase.h
> --- linux-2.6.9.orig/drivers/message/fusion/mptbase.h	2006-12-15 11:13:47.000000000 -0800
> +++ linux-2.6.9/drivers/message/fusion/mptbase.h	2006-12-15 11:40:47.000000000 -0800
> @@ -673,7 +673,8 @@
>  	u8			 upload_fw;	/* If set, do a fw upload */
>  	u8			 reload_fw;	/* Force a FW Reload on next reset */
>  	u8			 NBShiftFactor;  /* NB Shift Factor based on Block Size (Facts)  */
> -	u8			 pad1[4];
> +	u8			 isVMware;
> +	u8			 pad1[3];
>  	int			 DoneCtx;
>  	int			 TaskCtx;
>  	int			 InternalCtx;
> diff -urN linux-2.6.9.orig/drivers/message/fusion/mptscsi.c linux-2.6.9/drivers/message/fusion/mptscsi.c
> --- linux-2.6.9.orig/drivers/message/fusion/mptscsi.c	2006-12-15 11:13:47.000000000 -0800
> +++ linux-2.6.9/drivers/message/fusion/mptscsi.c	2006-12-15 11:49:18.000000000 -0800
> @@ -773,7 +773,12 @@
>  			sc->resid=0;
>  		case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:	/* 0x0040 */
>  		case MPI_IOCSTATUS_SUCCESS:			/* 0x0000 */
> -			if (scsi_status == MPI_SCSI_STATUS_BUSY)
> +			/*
> +			 * In the case of emulated adapter busy status may be reported 
> +			 * for minutes when storage path switch occurs in the firmware.
> +			 * We definitely do not want to give up after standard timeout.
> +			 */
> +			if (scsi_status == MPI_SCSI_STATUS_BUSY && !ioc->isVMware)
>  				sc->result = (DID_BUS_BUSY << 16) | scsi_status;
>  			else
>  				sc->result = (DID_OK << 16) | scsi_status;
> @@ -810,6 +815,7 @@
>  				 * Not real sure here either so do nothing...  */
>  			}
>  
> +			/* Perhaps this wanted to test scsi_status and not sc->result? */
>  			if (sc->result == MPI_SCSI_STATUS_TASK_SET_FULL)
>  				mptscsih_report_queue_full(sc, pScsiReply, pScsiReq);
>  
diff -ru linux-2.6.20-rc3/drivers/message/fusion/mptscsih.c linux-2.6.20-rc3.vmware_patch/drivers/message/fusion/mptscsih.c
--- linux-2.6.20-rc3/drivers/message/fusion/mptscsih.c	2007-01-09 15:10:28.000019000 -0500
+++ linux-2.6.20-rc3.vmware_patch/drivers/message/fusion/mptscsih.c	2007-01-09 15:46:26.000105000 -0500
@@ -769,7 +769,7 @@
 		case MPI_IOCSTATUS_SCSI_RECOVERED_ERROR:	/* 0x0040 */
 		case MPI_IOCSTATUS_SUCCESS:			/* 0x0000 */
 			if (scsi_status == MPI_SCSI_STATUS_BUSY)
-				sc->result = (DID_BUS_BUSY << 16) | scsi_status;
+				sc->result = (DID_QUALIFIED_REQUEUE << 16) | scsi_status;
 			else
 				sc->result = (DID_OK << 16) | scsi_status;
 			if (scsi_state == 0) {
diff -ru linux-2.6.20-rc3/drivers/scsi/scsi_error.c linux-2.6.20-rc3.vmware_patch/drivers/scsi/scsi_error.c
--- linux-2.6.20-rc3/drivers/scsi/scsi_error.c	2007-01-09 14:40:16.000022000 -0500
+++ linux-2.6.20-rc3.vmware_patch/drivers/scsi/scsi_error.c	2007-01-09 15:55:48.000051000 -0500
@@ -1216,6 +1216,14 @@
 	case DID_IMM_RETRY:
 		return NEEDS_RETRY;
 
+	case DID_QUALIFIED_REQUEUE:
+		/*
+		 * Return immediately w/o requeue if the request 
+		 * indicates no retry.
+		 */
+		if (blk_noretry_request(scmd->request)) {
+			return SUCCESS;
+		}
 	case DID_REQUEUE:
 		return ADD_TO_MLQUEUE;
 
diff -ru linux-2.6.20-rc3/include/scsi/scsi.h linux-2.6.20-rc3.vmware_patch/include/scsi/scsi.h
--- linux-2.6.20-rc3/include/scsi/scsi.h	2007-01-09 15:07:30.000009000 -0500
+++ linux-2.6.20-rc3.vmware_patch/include/scsi/scsi.h	2007-01-09 15:45:41.000075000 -0500
@@ -309,6 +309,8 @@
 #define DID_IMM_RETRY   0x0c	/* Retry without decrementing retry count  */
 #define DID_REQUEUE	0x0d	/* Requeue command (no immediate retry) also
 				 * without decrementing the retry count	   */
+#define DID_QUALIFIED_REQUEUE 0x0e /* Requeue cmd w/o decr of retry count IFF
+				 * not blk_noretry_request		   */
 #define DRIVER_OK       0x00	/* Driver status                           */
 
 /*

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]
  Powered by Linux