[PATCH V2] scsi: qla2xxx: Fix crashes in qla2x00_probe_one on probe failure

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Because of the shifting around of code in qla2x00_probe_one recently,
failures during adapter initialization can lead to problems, i.e. NULL
pointer crashes and doubly freed data structures which cause eventual
panics.

This V2 version makes the relevant memory free routines idempotent, so
repeat calls won't cause any harm. I also removed the problematic
probe_init_failed exit point as it is not needed.

Fixes: d64d6c5671db ("scsi: qla2xxx: Fix NULL pointer crash due to probe failure")
Signed-off-by: Bill Kuzeja <william.kuzeja@xxxxxxxxxxx>

---

Some of these issues are due to:

commit d64d6c5671db scsi: qla2xxx: Fix NULL pointer crash due to probe failure

where some frees were moved around, as well as the error exit from
a qla2x00_request_irqs failure.

This was a fix for:

commit d74595278f4a scsi: qla2xxx: Add multiple queue pair functionality.

which caused problems of its own.

To reproduce these issues, I run a test where I break the card early in
init, (and also through kprobe fault injection). This way, I've been able
to hit several different types of crashes, all started by failures of
various routines called throughout the probe.

The problematic routines that fail and their exit points are:

qla2x00_alloc_queues  => probe_init_failed
initialize_adapter    => probe_failed
kthread_create        => probe_failed
scsi_add_host         => probe_failed

Exit points are ordered in this way:

probe_init_failed:
        qla2x00_free_req_que(ha, req);
        ha->req_q_map[0] = NULL;
        clear_bit(0, ha->req_qid_map);
        qla2x00_free_rsp_que(ha, rsp);
        ha->rsp_q_map[0] = NULL;
        clear_bit(0, ha->rsp_qid_map);
        ha->max_req_queues = ha->max_rsp_queues = 0;

probe_failed:
        if (base_vha->timer_active)
                qla2x00_stop_timer(base_vha);
...
        qla2x00_free_device(base_vha);

        scsi_host_put(base_vha->host);

probe_hw_failed:
        qla2x00_mem_free(ha);
        qla2x00_free_req_que(ha, req);
        qla2x00_free_rsp_que(ha, rsp);
        qla2x00_clear_drv_active(ha);

Note that qla2x00_free_device calls qla2x00_mem_free and
qla2x00_free_rsp_que and qla2x00_free_req_que. So if we exit to
probe_failed or probe_init_failed, we'll end up calling these
routines multiple times.

These routines are not idempotent, I am making them so. This solves
most of the issues.

Also probe_init_failed is not needed. In the place that it is called,
ha->req_q_map[0] and ha->rsp_q_map[0] are not setup. And we now call
qla2x00_free_rsp_que/qla2x00_free_req_que below in probe_hw_failed.
I removed this exit point entirely.

Along the way I found that the return code for qla2x00_alloc_queues
never really causes us to exit out of the probe routine.

In order to fail...

        if (!qla2x00_alloc_queues(ha, req, rsp)) {

...we must return 0. However, internally, if this routine succeeds it
returns 1 and if it fails it returns -ENOMEM. So I am modifying
qla2x00_alloc_queues to fall in line with other return conventions
where zero is a success (and obviously have changed the probe routine
accordingly).

One more issue falls out of this case: when qla2x00_abort_all_cmds
is invoked from qla2x00_free_device and request queues are not
allocated (qla2x00_alloc_queues has not succeeded), we crash on a NULL
pointer (ha->req_q_map[que]). So check for this at the start of
qla2x00_abort_all_cmds and exit accordingly.

I've tested out these changes thoroughly failing initialization at
various times. I've also used kprobes to inject errors to force us
into various error paths.
---
 drivers/scsi/qla2xxx/qla_os.c | 59 +++++++++++++++++++++++++++----------------
 1 file changed, 37 insertions(+), 22 deletions(-)

diff --git a/drivers/scsi/qla2xxx/qla_os.c b/drivers/scsi/qla2xxx/qla_os.c
index afcb5567..3860bdfc 100644
--- a/drivers/scsi/qla2xxx/qla_os.c
+++ b/drivers/scsi/qla2xxx/qla_os.c
@@ -454,7 +454,7 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, struct req_que *req,
 	ha->req_q_map[0] = req;
 	set_bit(0, ha->rsp_qid_map);
 	set_bit(0, ha->req_qid_map);
-	return 1;
+	return 0;
 
 fail_qpair_map:
 	kfree(ha->base_qpair);
@@ -471,6 +471,9 @@ static int qla2x00_alloc_queues(struct qla_hw_data *ha, struct req_que *req,
 
 static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
 {
+	if (!ha->req_q_map)
+		return;
+
 	if (IS_QLAFX00(ha)) {
 		if (req && req->ring_fx00)
 			dma_free_coherent(&ha->pdev->dev,
@@ -481,14 +484,17 @@ static void qla2x00_free_req_que(struct qla_hw_data *ha, struct req_que *req)
 		(req->length + 1) * sizeof(request_t),
 		req->ring, req->dma);
 
-	if (req)
+	if (req) {
 		kfree(req->outstanding_cmds);
-
-	kfree(req);
+		kfree(req);
+	}
 }
 
 static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
 {
+	if (!ha->rsp_q_map)
+		return;
+
 	if (IS_QLAFX00(ha)) {
 		if (rsp && rsp->ring)
 			dma_free_coherent(&ha->pdev->dev,
@@ -499,7 +505,8 @@ static void qla2x00_free_rsp_que(struct qla_hw_data *ha, struct rsp_que *rsp)
 		(rsp->length + 1) * sizeof(response_t),
 		rsp->ring, rsp->dma);
 	}
-	kfree(rsp);
+	if (rsp)
+		kfree(rsp);
 }
 
 static void qla2x00_free_queues(struct qla_hw_data *ha)
@@ -1723,6 +1730,8 @@ uint32_t qla2x00_isp_reg_stat(struct qla_hw_data *ha)
 	struct qla_tgt_cmd *cmd;
 	uint8_t trace = 0;
 
+	if (!ha->req_q_map)
+		return;
 	spin_lock_irqsave(qp->qp_lock_ptr, flags);
 	req = qp->req;
 	for (cnt = 1; cnt < req->num_outstanding_cmds; cnt++) {
@@ -3095,14 +3104,14 @@ static void qla2x00_iocb_work_fn(struct work_struct *work)
 	/* Set up the irqs */
 	ret = qla2x00_request_irqs(ha, rsp);
 	if (ret)
-		goto probe_hw_failed;
+		goto probe_failed;
 
 	/* Alloc arrays of request and response ring ptrs */
-	if (!qla2x00_alloc_queues(ha, req, rsp)) {
+	if (qla2x00_alloc_queues(ha, req, rsp)) {
 		ql_log(ql_log_fatal, base_vha, 0x003d,
 		    "Failed to allocate memory for queue pointers..."
 		    "aborting.\n");
-		goto probe_init_failed;
+		goto probe_failed;
 	}
 
 	if (ha->mqenable && shost_use_blk_mq(host)) {
@@ -3387,15 +3396,6 @@ static void qla2x00_iocb_work_fn(struct work_struct *work)
 
 	return 0;
 
-probe_init_failed:
-	qla2x00_free_req_que(ha, req);
-	ha->req_q_map[0] = NULL;
-	clear_bit(0, ha->req_qid_map);
-	qla2x00_free_rsp_que(ha, rsp);
-	ha->rsp_q_map[0] = NULL;
-	clear_bit(0, ha->rsp_qid_map);
-	ha->max_req_queues = ha->max_rsp_queues = 0;
-
 probe_failed:
 	if (base_vha->timer_active)
 		qla2x00_stop_timer(base_vha);
@@ -4508,11 +4508,17 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha, fc_port_t *fcport,
 	if (ha->init_cb)
 		dma_free_coherent(&ha->pdev->dev, ha->init_cb_size,
 			ha->init_cb, ha->init_cb_dma);
-	vfree(ha->optrom_buffer);
-	kfree(ha->nvram);
-	kfree(ha->npiv_info);
-	kfree(ha->swl);
-	kfree(ha->loop_id_map);
+
+	if (ha->optrom_buffer)
+		vfree(ha->optrom_buffer);
+	if (ha->nvram)
+		kfree(ha->nvram);
+	if (ha->npiv_info)
+		kfree(ha->npiv_info);
+	if (ha->swl)
+		kfree(ha->swl);
+	if (ha->loop_id_map)
+		kfree(ha->loop_id_map);
 
 	ha->srb_mempool = NULL;
 	ha->ctx_mempool = NULL;
@@ -4528,6 +4534,15 @@ void qla2x00_mark_device_lost(scsi_qla_host_t *vha, fc_port_t *fcport,
 	ha->ex_init_cb_dma = 0;
 	ha->async_pd = NULL;
 	ha->async_pd_dma = 0;
+	ha->loop_id_map = NULL;
+	ha->npiv_info = NULL;
+	ha->optrom_buffer = NULL;
+	ha->swl = NULL;
+	ha->nvram = NULL;
+	ha->mctp_dump = NULL;
+	ha->dcbx_tlv = NULL;
+	ha->xgmac_data = NULL;
+	ha->sfp_data = NULL;
 
 	ha->s_dma_pool = NULL;
 	ha->dl_dma_pool = NULL;
-- 
1.8.3.1




[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]
[Index of Archives]     [SCSI Target Devel]     [Linux SCSI Target Infrastructure]     [Kernel Newbies]     [IDE]     [Security]     [Git]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Linux ATA RAID]     [Linux IIO]     [Samba]     [Device Mapper]

  Powered by Linux