[PATCH] Fix double free of MPT request frames.

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi, 

While testing scsi path failover for disks using MPT drivers we hit a
kernel oops on RHEL 5.1-64bit, while analyzing the problem we noticed
that this is due to a race present in the mpt scsi code path. The same
race seems to be present in the latest git kernel code too.

Here is the description of the race,
If a command has run for too long - which can happen often in the
failover case - the SCSI stack decides to abort that command and invokes
a device's abort handler. 
The code flow is...

mptscsih_abort ==> mptscsih_TMHandle (ABORT_TASK) ==>
mptscsih_IssueTaskMgmt (submits the command ) ==>
mptscsih_tm_wait_for_completion()

the mptscsih_tm_wait_for_completion code looks like this

do {
                spin_lock_irqsave(&ioc->FreeQlock, flags);
                if(hd->tmPending == 0) {
                        status = SUCCESS;
                        spin_unlock_irqrestore(&ioc->FreeQlock, flags);
                        break;
                }
                spin_unlock_irqrestore(&ioc->FreeQlock, flags);
                msleep(250);                        <<==================
        } while (--loop_count);

where its looping on hd->tmPending (SUCCESS) or until timeout expires
(FAILURE).

Every thing works fine in the success case, in the failure case we
return to mptscsih_IssueTaskMgmt, and call mpt_HardResetHandler, where
we throw all inflight commands (hard reset) and free request via
mpt_free_msg_frame. Which also seems fine.

Now notice in the loop above that in the last iteration we sleep for
250ms after the last read to tmPending, the TM command could complete
between this check of tmPending and before the adapter is hard reset. 
In such a case the request is double freed once when the command
completes (interrupt gets delivered and mpt_reply releases request
frame), and once after the mpt_HardResetHandler. This results in
corruption of  the linked list of empty request frames and causes a oops
on next list access.

The patch below attempts to fix this race by freeing the request_frame
before mpt_HardReset and doing that atomically after checking for
tmPending value. Also the mptscsih_taskmgmt_complete function returns
"1" irrespective of the tmPending value which results in always freeing
of the request frame from mpt_reply, so I have modified that function to
look at the tmPending value before returning. 

While I am at it, I have also added a BUG_ON statement in
mpt_free_msg_frame to check for double free, instead of silently
corrupting the list.

Please consider the patch below for inclusion.


Signed-off-by: Alok N Kataria <akataria@xxxxxxxxxx>
Cc: <stable@xxxxxxxxxx>
---

 drivers/message/fusion/mptbase.c  |   11 ++++-------
 drivers/message/fusion/mptbase.h  |   10 +++++++++-
 drivers/message/fusion/mptscsih.c |   25 +++++++++++++++++++++----
 3 files changed, 34 insertions(+), 12 deletions(-)


diff --git a/drivers/message/fusion/mptbase.c b/drivers/message/fusion/mptbase.c
index 5d496a9..a1637bf 100644
--- a/drivers/message/fusion/mptbase.c
+++ b/drivers/message/fusion/mptbase.c
@@ -974,7 +974,7 @@ mpt_put_msg_frame_hi_pri(u8 cb_idx, MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
 /**
- *	mpt_free_msg_frame - Place MPT request frame back on FreeQ.
+ *	__mpt_free_msg_frame - Place MPT request frame back on FreeQ.
  *	@ioc: Pointer to MPT adapter structure
  *	@mf: Pointer to MPT request frame
  *
@@ -982,18 +982,15 @@ mpt_put_msg_frame_hi_pri(u8 cb_idx, MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
  *	FreeQ.
  */
 void
-mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
+__mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
 {
-	unsigned long flags;
-
+	BUG_ON(mf->u.frame.linkage.arg1 == 0xdeadbeaf);
 	/*  Put Request back on FreeQ!  */
-	spin_lock_irqsave(&ioc->FreeQlock, flags);
 	mf->u.frame.linkage.arg1 = 0xdeadbeaf; /* signature to know if this mf is freed */
 	list_add_tail(&mf->u.frame.linkage.list, &ioc->FreeQ);
 #ifdef MFCNT
 	ioc->mfcnt--;
 #endif
-	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/
@@ -7612,7 +7609,7 @@ EXPORT_SYMBOL(mpt_device_driver_deregister);
 EXPORT_SYMBOL(mpt_get_msg_frame);
 EXPORT_SYMBOL(mpt_put_msg_frame);
 EXPORT_SYMBOL(mpt_put_msg_frame_hi_pri);
-EXPORT_SYMBOL(mpt_free_msg_frame);
+EXPORT_SYMBOL(__mpt_free_msg_frame);
 EXPORT_SYMBOL(mpt_add_sge);
 EXPORT_SYMBOL(mpt_send_handshake_request);
 EXPORT_SYMBOL(mpt_verify_adapter);
diff --git a/drivers/message/fusion/mptbase.h b/drivers/message/fusion/mptbase.h
index b3e981d..4e4ee53 100644
--- a/drivers/message/fusion/mptbase.h
+++ b/drivers/message/fusion/mptbase.h
@@ -906,7 +906,7 @@ extern void	 mpt_reset_deregister(u8 cb_idx);
 extern int	 mpt_device_driver_register(struct mpt_pci_driver * dd_cbfunc, u8 cb_idx);
 extern void	 mpt_device_driver_deregister(u8 cb_idx);
 extern MPT_FRAME_HDR	*mpt_get_msg_frame(u8 cb_idx, MPT_ADAPTER *ioc);
-extern void	 mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
+extern void	 __mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
 extern void	 mpt_put_msg_frame(u8 cb_idx, MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
 extern void	 mpt_put_msg_frame_hi_pri(u8 cb_idx, MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf);
 extern void	 mpt_add_sge(char *pAddr, u32 flagslength, dma_addr_t dma_addr);
@@ -924,6 +924,14 @@ extern int	 mptbase_sas_persist_operation(MPT_ADAPTER *ioc, u8 persist_opcode);
 extern int	 mpt_raid_phys_disk_pg0(MPT_ADAPTER *ioc, u8 phys_disk_num, pRaidPhysDiskPage0_t phys_disk);
 extern void     mpt_halt_firmware(MPT_ADAPTER *ioc);
 
+static inline void
+mpt_free_msg_frame(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf)
+{
+	unsigned long flags;
+	spin_lock_irqsave(&ioc->FreeQlock, flags);
+	__mpt_free_msg_frame(ioc, mf);
+	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
+}
 
 /*
  *  Public data decl's...
diff --git a/drivers/message/fusion/mptscsih.c b/drivers/message/fusion/mptscsih.c
index e62c6bc..afe7fc0 100644
--- a/drivers/message/fusion/mptscsih.c
+++ b/drivers/message/fusion/mptscsih.c
@@ -1719,6 +1719,18 @@ mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, i
 	}
 
 	if(mptscsih_tm_wait_for_completion(hd, timeout) == FAILED) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&ioc->FreeQlock, flags);
+		if(hd->tmPending == 0) {
+                        spin_unlock_irqrestore(&ioc->FreeQlock, flags);
+                        goto out_success;
+                }
+		__mpt_free_msg_frame(ioc, mf);
+		hd->tmPending = 0;
+		hd->tmState = TM_STATE_NONE;
+		spin_unlock_irqrestore(&ioc->FreeQlock, flags);
+
 		dfailprintk(ioc, printk(MYIOC_s_ERR_FMT "task management request TIMED OUT!"
 			" (hd %p, ioc %p, mf %p) \n", ioc->name, hd,
 			ioc, mf));
@@ -1727,9 +1739,10 @@ mptscsih_IssueTaskMgmt(MPT_SCSI_HOST *hd, u8 type, u8 channel, u8 id, int lun, i
 		retval = mpt_HardResetHandler(ioc, CAN_SLEEP);
 		dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "rc=%d \n",
 			 ioc->name, retval));
-		goto fail_out;
+		return FAILED;
 	}
 
+out_success:
 	/*
 	 * Handle success case, see if theres a non-zero ioc_status.
 	 */
@@ -2159,6 +2172,7 @@ mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *m
 	u16			 iocstatus;
 	u8			 tmType;
 	u32			 termination_count;
+	int			 retval = 1;
 
 	dtmprintk(ioc, printk(MYIOC_s_DEBUG_FMT "TaskMgmt completed (mf=%p,mr=%p)\n",
 	    ioc->name, mf, mr));
@@ -2235,12 +2249,15 @@ mptscsih_taskmgmt_complete(MPT_ADAPTER *ioc, MPT_FRAME_HDR *mf, MPT_FRAME_HDR *m
 
  out:
 	spin_lock_irqsave(&ioc->FreeQlock, flags);
-	hd->tmPending = 0;
-	hd->tmState = TM_STATE_NONE;
+	if (hd->tmPending) {
+		hd->tmPending = 0;
+		hd->tmState = TM_STATE_NONE;
+	} else
+		retval = 0;
 	hd->tm_iocstatus = iocstatus;
 	spin_unlock_irqrestore(&ioc->FreeQlock, flags);
 
-	return 1;
+	return retval;
 }
 
 /*=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=-=*/


--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]
  Powered by Linux