> -----Original Message----- > From: Tomas Henzl [mailto:thenzl@xxxxxxxxxx] > Sent: Friday, September 12, 2014 12:50 AM > To: Kashyap Desai > Cc: Sumit Saxena; linux-scsi@xxxxxxxxxxxxxxx; Martin K. Petersen; > Christoph > Hellwig; jbottomley@xxxxxxxxxxxxx; aradford > Subject: Re: [PATCH 10/11] megaraid_sas : MFI MPT linked list corruption > fix > > On 09/11/2014 08:41 PM, Kashyap Desai wrote: > > On Thu, Sep 11, 2014 at 5:53 PM, Tomas Henzl <thenzl@xxxxxxxxxx> > wrote: > >> On 09/11/2014 04:48 AM, Kashyap Desai wrote: > >>> On Wed, Sep 10, 2014 at 8:36 PM, Tomas Henzl <thenzl@xxxxxxxxxx> > wrote: > >>>> On 09/06/2014 03:25 PM, Sumit.Saxena@xxxxxxxxxxxxx wrote: > >>>>> Problem statement: > >>>>> MFI link list in megaraid_sas driver is used from mfi-mpt > >>>>> pass-through > commands. > >>>>> This list can be corrupted due to many possible race conditions in > >>>>> driver and eventually we may see kernel panic. > >>>>> > >>>>> One example - > >>>>> MFI frame is freed from calling process as driver send command via > >>>>> polling method and interrupt for that command comes after driver > >>>>> free mfi frame (actually even after some other context reuse the > >>>>> mfi frame). When driver receive MPT frame in ISR, driver will be > >>>>> using > the index of MFI and access that MFI frame and finally in-used MFI frame’s > list will be corrupted. > >>>>> > >>>>> High level description of new solution - Free MFI and MPT command > >>>>> from same context. > >>>>> Free both the command either from process (from where mfi-mpt > >>>>> pass-through was called) or from ISR context. Do not split freeing > >>>>> of MFI and MPT, because it creates the race condition which will do > MFI/MPT list corruption. > >>>>> > >>>>> Renamed the cmd_pool_lock which is used in instance as well as > fusion with below name. > >>>>> mfi_pool_lock and mpt_pool_lock to add more code readability. > >>>>> > >>>>> Signed-off-by: Sumit Saxena <sumit.saxena@xxxxxxxxxxxxx> > >>>>> Signed-off-by: Kashyap Desai <kashyap.desai@xxxxxxxxxxxxx> > >>>>> --- > >>>>> drivers/scsi/megaraid/megaraid_sas.h | 25 +++- > >>>>> drivers/scsi/megaraid/megaraid_sas_base.c | 196 > ++++++++++++++++++++-------- > >>>>> drivers/scsi/megaraid/megaraid_sas_fusion.c | 95 ++++++++++---- > >>>>> drivers/scsi/megaraid/megaraid_sas_fusion.h | 2 +- > >>>>> 4 files changed, 235 insertions(+), 83 deletions(-) > >>>>> > >>>>> diff --git a/drivers/scsi/megaraid/megaraid_sas.h > >>>>> b/drivers/scsi/megaraid/megaraid_sas.h > >>>>> index 156d4b9..f99db18 100644 > >>>>> --- a/drivers/scsi/megaraid/megaraid_sas.h > >>>>> +++ b/drivers/scsi/megaraid/megaraid_sas.h > >>>>> @@ -1016,6 +1016,12 @@ struct megasas_ctrl_info { > >>>>> > >>>>> #define VD_EXT_DEBUG 0 > >>>>> > >>>>> +enum MR_MFI_MPT_PTHR_FLAGS { > >>>>> + MFI_MPT_DETACHED = 0, > >>>>> + MFI_LIST_ADDED = 1, > >>>>> + MFI_MPT_ATTACHED = 2, > >>>>> +}; > >>>>> + > >>>>> /* Frame Type */ > >>>>> #define IO_FRAME 0 > >>>>> #define PTHRU_FRAME 1 > >>>>> @@ -1033,7 +1039,7 @@ struct megasas_ctrl_info { > >>>>> #define MEGASAS_IOCTL_CMD 0 > >>>>> #define MEGASAS_DEFAULT_CMD_TIMEOUT 90 > >>>>> #define MEGASAS_THROTTLE_QUEUE_DEPTH 16 > >>>>> - > >>>>> +#define MEGASAS_BLOCKED_CMD_TIMEOUT 60 > >>>>> /* > >>>>> * FW reports the maximum of number of commands that it can > accept (maximum > >>>>> * commands that can be outstanding) at any time. The driver must > >>>>> report a @@ -1652,7 +1658,7 @@ struct megasas_instance { > >>>>> struct megasas_cmd **cmd_list; > >>>>> struct list_head cmd_pool; > >>>>> /* used to sync fire the cmd to fw */ > >>>>> - spinlock_t cmd_pool_lock; > >>>>> + spinlock_t mfi_pool_lock; > >>>>> /* used to sync fire the cmd to fw */ > >>>>> spinlock_t hba_lock; > >>>>> /* used to synch producer, consumer ptrs in dpc */ @@ > >>>>> -1839,6 +1845,11 @@ struct megasas_cmd { > >>>>> > >>>>> struct list_head list; > >>>>> struct scsi_cmnd *scmd; > >>>>> + > >>>>> + void *mpt_pthr_cmd_blocked; > >>>>> + atomic_t mfi_mpt_pthr; > >>>>> + u8 is_wait_event; > >>>>> + > >>>>> struct megasas_instance *instance; > >>>>> union { > >>>>> struct { > >>>>> @@ -1927,4 +1938,14 @@ int > megasas_set_crash_dump_params(struct > >>>>> megasas_instance *instance, void > >>>>> megasas_free_host_crash_buffer(struct megasas_instance > *instance); > >>>>> void megasas_fusion_crash_dump_wq(struct work_struct *work); > >>>>> > >>>>> +void megasas_return_cmd_fusion(struct megasas_instance > *instance, > >>>>> + struct megasas_cmd_fusion *cmd); int > >>>>> +megasas_issue_blocked_cmd(struct megasas_instance *instance, > >>>>> + struct megasas_cmd *cmd, int timeout); void > >>>>> +__megasas_return_cmd(struct megasas_instance *instance, > >>>>> + struct megasas_cmd *cmd); > >>>>> + > >>>>> +void megasas_return_mfi_mpt_pthr(struct megasas_instance > *instance, > >>>>> + struct megasas_cmd *cmd_mfi, struct megasas_cmd_fusion > >>>>> +*cmd_fusion); > >>>>> + > >>>>> #endif /*LSI_MEGARAID_SAS_H */ > >>>>> diff --git a/drivers/scsi/megaraid/megaraid_sas_base.c > >>>>> b/drivers/scsi/megaraid/megaraid_sas_base.c > >>>>> index 086beee..50d69eb 100644 > >>>>> --- a/drivers/scsi/megaraid/megaraid_sas_base.c > >>>>> +++ b/drivers/scsi/megaraid/megaraid_sas_base.c > >>>>> @@ -210,43 +210,66 @@ struct megasas_cmd > *megasas_get_cmd(struct megasas_instance > >>>>> unsigned long flags; > >>>>> struct megasas_cmd *cmd = NULL; > >>>>> > >>>>> - spin_lock_irqsave(&instance->cmd_pool_lock, flags); > >>>>> + spin_lock_irqsave(&instance->mfi_pool_lock, flags); > >>>>> > >>>>> if (!list_empty(&instance->cmd_pool)) { > >>>>> cmd = list_entry((&instance->cmd_pool)->next, > >>>>> struct megasas_cmd, list); > >>>>> list_del_init(&cmd->list); > >>>>> + atomic_set(&cmd->mfi_mpt_pthr, MFI_MPT_DETACHED); > >>>>> } else { > >>>>> printk(KERN_ERR "megasas: Command pool empty!\n"); > >>>>> } > >>>>> > >>>>> - spin_unlock_irqrestore(&instance->cmd_pool_lock, flags); > >>>>> + spin_unlock_irqrestore(&instance->mfi_pool_lock, flags); > >>>>> return cmd; > >>>>> } > >>>>> > >>>>> /** > >>>>> - * megasas_return_cmd - Return a cmd to free command pool > >>>>> + * __megasas_return_cmd - Return a cmd to free command pool > >>>>> * @instance: Adapter soft state > >>>>> * @cmd: Command packet to be returned to free command > pool > >>>>> */ > >>>>> inline void > >>>>> -megasas_return_cmd(struct megasas_instance *instance, struct > >>>>> megasas_cmd *cmd) > >>>>> +__megasas_return_cmd(struct megasas_instance *instance, struct > >>>>> +megasas_cmd *cmd) > >>>>> { > >>>>> - unsigned long flags; > >>>>> - > >>>>> - spin_lock_irqsave(&instance->cmd_pool_lock, flags); > >>>>> + /* > >>>>> + * Don't go ahead and free the MFI frame, if corresponding > >>>>> + * MPT frame is not freed(valid for only fusion adapters). > >>>>> + * In case of MFI adapters, anyways for any allocated MFI > >>>>> + * frame will have cmd->mfi_mpt_mpthr set to > MFI_MPT_DETACHED > >>>>> + */ > >>>>> + if (atomic_read(&cmd->mfi_mpt_pthr) != MFI_MPT_DETACHED) > >>>>> + return; > >>>>> > >>>>> cmd->scmd = NULL; > >>>>> cmd->frame_count = 0; > >>>>> + cmd->is_wait_event = 0; > >>>>> + cmd->mpt_pthr_cmd_blocked = NULL; > >>>>> + > >>>>> if ((instance->pdev->device != PCI_DEVICE_ID_LSI_FUSION) && > >>>>> - (instance->pdev->device != PCI_DEVICE_ID_LSI_PLASMA) && > >>>>> (instance->pdev->device != PCI_DEVICE_ID_LSI_INVADER) && > >>>>> (instance->pdev->device != PCI_DEVICE_ID_LSI_FURY) && > >>>>> (reset_devices)) > >>>>> cmd->frame->hdr.cmd = MFI_CMD_INVALID; > >>>>> - list_add_tail(&cmd->list, &instance->cmd_pool); > >>>>> > >>>>> - spin_unlock_irqrestore(&instance->cmd_pool_lock, flags); > >>>>> + atomic_set(&cmd->mfi_mpt_pthr, MFI_LIST_ADDED); > >>>>> + list_add(&cmd->list, (&instance->cmd_pool)->next); } > >>>>> + > >>>>> +/** > >>>>> + * megasas_return_cmd - Return a cmd to free command pool > >>>>> + * @instance: Adapter soft state > >>>>> + * @cmd: Command packet to be returned to free command > pool > >>>>> + */ > >>>>> +inline void > >>>>> +megasas_return_cmd(struct megasas_instance *instance, struct > >>>>> +megasas_cmd *cmd) { > >>>>> + unsigned long flags; > >>>>> + > >>>>> + spin_lock_irqsave(&instance->mfi_pool_lock, flags); > >>>>> + __megasas_return_cmd(instance, cmd); > >>>>> + spin_unlock_irqrestore(&instance->mfi_pool_lock, flags); > >>>>> } > >>>>> > >>>>> > >>>>> @@ -925,13 +948,14 @@ megasas_issue_polled(struct > megasas_instance *instance, struct megasas_cmd *cmd) > >>>>> * Max wait time is MEGASAS_INTERNAL_CMD_WAIT_TIME secs > >>>>> * Used to issue ioctl commands. > >>>>> */ > >>>>> -static int > >>>>> +int > >>>>> megasas_issue_blocked_cmd(struct megasas_instance *instance, > >>>>> struct megasas_cmd *cmd, int timeout) { > >>>>> int ret = 0; > >>>>> cmd->cmd_status = ENODATA; > >>>>> > >>>>> + cmd->is_wait_event = 1; > >>>>> instance->instancet->issue_dcmd(instance, cmd); > >>>>> if (timeout) { > >>>>> ret = wait_event_timeout(instance->int_cmd_wait_q, > >>>>> @@ -1903,7 +1927,12 @@ out: > >>>>> new_affiliation_111, > >>>>> new_affiliation_111_h); > >>>>> } > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> > >>>>> return retval; > >>>>> } > >>>>> @@ -2070,7 +2099,11 @@ out: > >>>>> (MAX_LOGICAL_DRIVES + 1) * > >>>>> sizeof(struct > >>>>> MR_LD_VF_AFFILIATION), > >>>>> new_affiliation, > >>>>> new_affiliation_h); > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> > >>>>> return retval; > >>>>> } > >>>>> @@ -2530,7 +2563,12 @@ megasas_service_aen(struct > megasas_instance *instance, struct megasas_cmd *cmd) > >>>>> cmd->abort_aen = 0; > >>>>> > >>>>> instance->aen_cmd = NULL; > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> > >>>>> if ((instance->unload == 0) && > >>>>> ((instance->issuepend_done == 1))) { @@ -2906,7 > >>>>> +2944,8 @@ megasas_complete_cmd(struct megasas_instance > *instance, struct megasas_cmd *cmd, > >>>>> "failed, status = > >>>>> 0x%x.\n", > >>>>> > >>>>> cmd->frame->hdr.cmd_status); > >>>>> else { > >>>>> - megasas_return_cmd(instance, > >>>>> cmd); > >>>>> + > >>>>> megasas_return_mfi_mpt_pthr(instance, > >>>>> + cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> spin_unlock_irqrestore( > >>>>> > >>>>> instance->host->host_lock, > >>>>> flags); @@ -2914,7 > >>>>> +2953,8 @@ megasas_complete_cmd(struct megasas_instance > *instance, struct megasas_cmd *cmd, > >>>>> } > >>>>> } else > >>>>> instance->map_id++; > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> > >>>>> /* > >>>>> * Set fast path IO to ZERO. > >>>>> @@ -3070,7 +3110,7 @@ megasas_internal_reset_defer_cmds(struct > megasas_instance *instance) > >>>>> unsigned long flags; > >>>>> > >>>>> defer_index = 0; > >>>>> - spin_lock_irqsave(&instance->cmd_pool_lock, flags); > >>>>> + spin_lock_irqsave(&instance->mfi_pool_lock, flags); > >>>>> for (i = 0; i < max_cmd; i++) { > >>>>> cmd = instance->cmd_list[i]; > >>>>> if (cmd->sync_cmd == 1 || cmd->scmd) { @@ -3091,7 > >>>>> +3131,7 @@ megasas_internal_reset_defer_cmds(struct > megasas_instance *instance) > >>>>> &instance->internal_reset_pending_q); > >>>>> } > >>>>> } > >>>>> - spin_unlock_irqrestore(&instance->cmd_pool_lock, flags); > >>>>> + spin_unlock_irqrestore(&instance->mfi_pool_lock, flags); > >>>>> } > >>>>> > >>>>> > >>>>> @@ -3656,7 +3696,9 @@ int megasas_alloc_cmds(struct > megasas_instance *instance) > >>>>> int j; > >>>>> u32 max_cmd; > >>>>> struct megasas_cmd *cmd; > >>>>> + struct fusion_context *fusion; > >>>>> > >>>>> + fusion = instance->ctrl_context; > >>>>> max_cmd = instance->max_mfi_cmds; > >>>>> > >>>>> /* > >>>>> @@ -3689,13 +3731,11 @@ int megasas_alloc_cmds(struct > megasas_instance *instance) > >>>>> } > >>>>> } > >>>>> > >>>>> - /* > >>>>> - * Add all the commands to command pool (instance->cmd_pool) > >>>>> - */ > >>>>> for (i = 0; i < max_cmd; i++) { > >>>>> cmd = instance->cmd_list[i]; > >>>>> memset(cmd, 0, sizeof(struct megasas_cmd)); > >>>>> cmd->index = i; > >>>>> + atomic_set(&cmd->mfi_mpt_pthr, MFI_LIST_ADDED); > >>>>> cmd->scmd = NULL; > >>>>> cmd->instance = instance; > >>>>> > >>>>> @@ -3766,11 +3806,11 @@ megasas_get_pd_list(struct > megasas_instance *instance) > >>>>> dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h); > >>>>> dcmd->sgl.sge32[0].length = cpu_to_le32(MEGASAS_MAX_PD * > >>>>> sizeof(struct MR_PD_LIST)); > >>>>> > >>>>> - if (!megasas_issue_polled(instance, cmd)) { > >>>>> - ret = 0; > >>>>> - } else { > >>>>> - ret = -1; > >>>>> - } > >>>>> + if (instance->ctrl_context && !instance->mask_interrupts) > >>>>> + ret = megasas_issue_blocked_cmd(instance, cmd, > >>>>> + MEGASAS_BLOCKED_CMD_TIMEOUT); > >>>>> + else > >>>>> + ret = megasas_issue_polled(instance, cmd); > >>>>> > >>>>> /* > >>>>> * the following function will get the instance PD LIST. > >>>>> @@ -3802,7 +3842,12 @@ megasas_get_pd_list(struct > megasas_instance *instance) > >>>>> pci_free_consistent(instance->pdev, > >>>>> MEGASAS_MAX_PD * sizeof(struct > >>>>> MR_PD_LIST), > >>>>> ci, ci_h); > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> > >>>>> return ret; > >>>>> } > >>>>> @@ -3861,11 +3906,12 @@ megasas_get_ld_list(struct > megasas_instance *instance) > >>>>> dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct > MR_LD_LIST)); > >>>>> dcmd->pad_0 = 0; > >>>>> > >>>>> - if (!megasas_issue_polled(instance, cmd)) { > >>>>> - ret = 0; > >>>>> - } else { > >>>>> - ret = -1; > >>>>> - } > >>>>> + if (instance->ctrl_context && !instance->mask_interrupts) > >>>>> + ret = megasas_issue_blocked_cmd(instance, cmd, > >>>>> + MEGASAS_BLOCKED_CMD_TIMEOUT); > >>>>> + else > >>>>> + ret = megasas_issue_polled(instance, cmd); > >>>>> + > >>>>> > >>>>> ld_count = le32_to_cpu(ci->ldCount); > >>>>> > >>>>> @@ -3888,7 +3934,11 @@ megasas_get_ld_list(struct > megasas_instance *instance) > >>>>> ci, > >>>>> ci_h); > >>>>> > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> return ret; > >>>>> } > >>>>> > >>>>> @@ -3949,12 +3999,11 @@ megasas_ld_list_query(struct > megasas_instance *instance, u8 query_type) > >>>>> dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct > MR_LD_TARGETID_LIST)); > >>>>> dcmd->pad_0 = 0; > >>>>> > >>>>> - if (!megasas_issue_polled(instance, cmd) && !dcmd->cmd_status) > { > >>>>> - ret = 0; > >>>>> - } else { > >>>>> - /* On failure, call older LD list DCMD */ > >>>>> - ret = 1; > >>>>> - } > >>>>> + if (instance->ctrl_context && !instance->mask_interrupts) > >>>>> + ret = megasas_issue_blocked_cmd(instance, cmd, > >>>>> + MEGASAS_BLOCKED_CMD_TIMEOUT); > >>>>> + else > >>>>> + ret = megasas_issue_polled(instance, cmd); > >>>>> > >>>>> tgtid_count = le32_to_cpu(ci->count); > >>>>> > >>>>> @@ -3970,7 +4019,11 @@ megasas_ld_list_query(struct > megasas_instance *instance, u8 query_type) > >>>>> pci_free_consistent(instance->pdev, sizeof(struct > MR_LD_TARGETID_LIST), > >>>>> ci, ci_h); > >>>>> > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> > >>>>> return ret; > >>>>> } > >>>>> @@ -4027,17 +4080,23 @@ megasas_get_ctrl_info(struct > megasas_instance *instance, > >>>>> dcmd->sgl.sge32[0].length = cpu_to_le32(sizeof(struct > megasas_ctrl_info)); > >>>>> dcmd->mbox.b[0] = 1; > >>>>> > >>>>> - if (!megasas_issue_polled(instance, cmd)) { > >>>>> - ret = 0; > >>>>> + if (instance->ctrl_context && !instance->mask_interrupts) > >>>>> + ret = megasas_issue_blocked_cmd(instance, cmd, > >>>>> + MEGASAS_BLOCKED_CMD_TIMEOUT); > >>>>> + else > >>>>> + ret = megasas_issue_polled(instance, cmd); > >>>>> + > >>>>> + if (!ret) > >>>>> memcpy(ctrl_info, ci, sizeof(struct > >>>>> megasas_ctrl_info)); > >>>>> - } else { > >>>>> - ret = -1; > >>>>> - } > >>>>> > >>>>> pci_free_consistent(instance->pdev, sizeof(struct > megasas_ctrl_info), > >>>>> ci, ci_h); > >>>>> > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> return ret; > >>>>> } > >>>>> > >>>>> @@ -4086,11 +4145,17 @@ int > megasas_set_crash_dump_params(struct megasas_instance *instance, > >>>>> dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(instance- > >crash_dump_h); > >>>>> dcmd->sgl.sge32[0].length = > cpu_to_le32(CRASH_DMA_BUF_SIZE); > >>>>> > >>>>> - if (!megasas_issue_polled(instance, cmd)) > >>>>> - ret = 0; > >>>>> + if (instance->ctrl_context && !instance->mask_interrupts) > >>>>> + ret = megasas_issue_blocked_cmd(instance, cmd, > >>>>> + MEGASAS_BLOCKED_CMD_TIMEOUT); > >>>>> else > >>>>> - ret = -1; > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + ret = megasas_issue_polled(instance, cmd); > >>>>> + > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> return ret; > >>>>> } > >>>>> > >>>>> @@ -4660,7 +4725,11 @@ megasas_get_seq_num(struct > megasas_instance *instance, > >>>>> pci_free_consistent(instance->pdev, sizeof(struct > megasas_evt_log_info), > >>>>> el_info, el_info_h); > >>>>> > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> > >>>>> return 0; > >>>>> } > >>>>> @@ -5015,7 +5084,7 @@ static int megasas_probe_one(struct > pci_dev *pdev, > >>>>> } > >>>>> fusion = instance->ctrl_context; > >>>>> INIT_LIST_HEAD(&fusion->cmd_pool); > >>>>> - spin_lock_init(&fusion->cmd_pool_lock); > >>>>> + spin_lock_init(&fusion->mpt_pool_lock); > >>>>> memset(fusion->load_balance_info, 0, > >>>>> sizeof(struct LD_LOAD_BALANCE_INFO) * > MAX_LOGICAL_DRIVES_EXT); > >>>>> } > >>>>> @@ -5084,7 +5153,7 @@ static int megasas_probe_one(struct > pci_dev *pdev, > >>>>> init_waitqueue_head(&instance->int_cmd_wait_q); > >>>>> init_waitqueue_head(&instance->abort_cmd_wait_q); > >>>>> > >>>>> - spin_lock_init(&instance->cmd_pool_lock); > >>>>> + spin_lock_init(&instance->mfi_pool_lock); > >>>>> spin_lock_init(&instance->hba_lock); > >>>>> spin_lock_init(&instance->completion_lock); > >>>>> > >>>>> @@ -5104,7 +5173,7 @@ static int megasas_probe_one(struct > pci_dev *pdev, > >>>>> instance->flag_ieee = 1; > >>>>> sema_init(&instance->ioctl_sem, > MEGASAS_SKINNY_INT_CMDS); > >>>>> } else > >>>>> - sema_init(&instance->ioctl_sem, MEGASAS_INT_CMDS); > >>>>> + sema_init(&instance->ioctl_sem, (MEGASAS_INT_CMDS - > >>>>> + 5)); > >>>>> > >>>>> megasas_dbg_lvl = 0; > >>>>> instance->flag = 0; > >>>>> @@ -5316,7 +5385,11 @@ static void megasas_flush_cache(struct > megasas_instance *instance) > >>>>> dev_err(&instance->pdev->dev, "Command timedout" > >>>>> " from %s\n", __func__); > >>>>> > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> > >>>>> return; > >>>>> } > >>>>> @@ -5363,7 +5436,11 @@ static void > megasas_shutdown_controller(struct megasas_instance *instance, > >>>>> dev_err(&instance->pdev->dev, "Command timedout" > >>>>> "from %s\n", __func__); > >>>>> > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> > >>>>> return; > >>>>> } > >>>>> @@ -6024,9 +6101,14 @@ megasas_mgmt_fw_ioctl(struct > megasas_instance *instance, > >>>>> > >>>>> le32_to_cpu(kern_sge32[i].length), > >>>>> kbuff_arr[i], > >>>>> > >>>>> le32_to_cpu(kern_sge32[i].phys_addr)); > >>>>> + kbuff_arr[i] = NULL; > >>>>> } > >>>>> > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> return error; > >>>>> } > >>>>> > >>>>> diff --git a/drivers/scsi/megaraid/megaraid_sas_fusion.c > >>>>> b/drivers/scsi/megaraid/megaraid_sas_fusion.c > >>>>> index a3de45a..e8f4f6c 100644 > >>>>> --- a/drivers/scsi/megaraid/megaraid_sas_fusion.c > >>>>> +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.c > >>>>> @@ -50,6 +50,7 @@ > >>>>> #include <scsi/scsi_cmnd.h> > >>>>> #include <scsi/scsi_device.h> > >>>>> #include <scsi/scsi_host.h> > >>>>> +#include <scsi/scsi_dbg.h> > >>>>> > >>>>> #include "megaraid_sas_fusion.h" > >>>>> #include "megaraid_sas.h" > >>>>> @@ -163,7 +164,7 @@ struct megasas_cmd_fusion > *megasas_get_cmd_fusion(struct megasas_instance > >>>>> (struct fusion_context *)instance->ctrl_context; > >>>>> struct megasas_cmd_fusion *cmd = NULL; > >>>>> > >>>>> - spin_lock_irqsave(&fusion->cmd_pool_lock, flags); > >>>>> + spin_lock_irqsave(&fusion->mpt_pool_lock, flags); > >>>>> > >>>>> if (!list_empty(&fusion->cmd_pool)) { > >>>>> cmd = list_entry((&fusion->cmd_pool)->next, > >>>>> @@ -173,7 +174,7 @@ struct megasas_cmd_fusion > *megasas_get_cmd_fusion(struct megasas_instance > >>>>> printk(KERN_ERR "megasas: Command pool (fusion) > empty!\n"); > >>>>> } > >>>>> > >>>>> - spin_unlock_irqrestore(&fusion->cmd_pool_lock, flags); > >>>>> + spin_unlock_irqrestore(&fusion->mpt_pool_lock, flags); > >>>>> return cmd; > >>>>> } > >>>>> > >>>>> @@ -182,21 +183,41 @@ struct megasas_cmd_fusion > *megasas_get_cmd_fusion(struct megasas_instance > >>>>> * @instance: Adapter soft state > >>>>> * @cmd: Command packet to be returned to free command > pool > >>>>> */ > >>>>> -static inline void > >>>>> -megasas_return_cmd_fusion(struct megasas_instance *instance, > >>>>> - struct megasas_cmd_fusion *cmd) > >>>>> +inline void megasas_return_cmd_fusion(struct megasas_instance > *instance, > >>>>> + struct megasas_cmd_fusion *cmd) > >>>>> { > >>>>> unsigned long flags; > >>>>> struct fusion_context *fusion = > >>>>> (struct fusion_context *)instance->ctrl_context; > >>>>> > >>>>> - spin_lock_irqsave(&fusion->cmd_pool_lock, flags); > >>>>> + spin_lock_irqsave(&fusion->mpt_pool_lock, flags); > >>>>> > >>>>> cmd->scmd = NULL; > >>>>> cmd->sync_cmd_idx = (u32)ULONG_MAX; > >>>>> - list_add_tail(&cmd->list, &fusion->cmd_pool); > >>>>> + list_add(&cmd->list, (&fusion->cmd_pool)->next); > >>>>> + > >>>>> + spin_unlock_irqrestore(&fusion->mpt_pool_lock, flags); } > >>>>> + > >>>>> +/** > >>>>> + * megasas_return_mfi_mpt_pthr - Return a mfi and mpt to free > command pool > >>>>> + * @instance: Adapter soft state > >>>>> + * @cmd_mfi: MFI Command packet to be returned to free > command pool > >>>>> + * @cmd_mpt: MPT Command packet to be returned to free > command pool > >>>>> + */ > >>>>> +inline void megasas_return_mfi_mpt_pthr(struct megasas_instance > *instance, > >>>>> + struct megasas_cmd *cmd_mfi, struct megasas_cmd_fusion > >>>>> +*cmd_fusion) { > >>>>> + unsigned long flags; > >>>>> > >>>>> - spin_unlock_irqrestore(&fusion->cmd_pool_lock, flags); > >>>>> + spin_lock_irqsave(&instance->mfi_pool_lock, flags); > >>>>> + megasas_return_cmd_fusion(instance, cmd_fusion); > >>>> Is the lock needed in this place for megasas_return_cmd_fusion (it > >>>> uses another lock inside) > >>> Issue what we are trying to fix is very unusual. Driver fetch the > >>> MFI frame and MPT frame from different pool. > >>> and link it via sync_cmd_idx. > >>> > >>> Driver send MFI-MPT Pass through command and do completion in sync > >>> (Just do polling on status) or async (complete from ISR via wakeup > >>> call) method. While completing in sync method (In Ideal case we > >>> should not get interrupt as caller do not expect it), there are > >>> cases (Due to some known workaround specific to MR) where interrupts > >>> are enabled and completion comes from ISR path as well. > >>> > >>> As current driver do not complete MFI and MPT at same time, we end > >>> up in link corruption because lots of places driver access frame via > >>> sync_cmd_idx. > >>> > >>> Since driver does not use common lock for MFI-MPT pool we have to > >>> still keep both the lock to avoid any issue with older > >>> controllers.(which are only MFI based and no MPT pool). I thought of > >>> doing with only mfi_pool_lock() for Fusion controller and completely > >>> remove mpt_pool_lock(). To do that, we need lots of code changes in > >>> driver just to support code around callback issue_dcmd(). > >>> There is legacy code for older controller which does not allow > >>> smooth changes to use single lock for both mpt and mfi frame, so we > >>> continue with both the lock as this is not coming under IO Path + > >>> adding code change like this will not cause much regression issues. > >> Yes, and all that is the reason why you have in __megasas_return_cmd > >> this test "if (atomic_read(&cmd->mfi_mpt_pthr) != > MFI_MPT_DETACHED) return;" > > This particular check is to avoid freeing mfi and mpt as a separate > command. > > If we hit this condition, it means... driver will later free this > > command so caller can skip this. > > > >> You free both linked mfi+mpt from this function, so when you are here > >> you know that no one else uses these two commands, correct? > >> That means you don't need to hold both locks when when freeing the > >> first command. > > We still operate in two different frame pool, so those individual > > frame need respective locks. > > Yes megasas_return_cmd_fusion uses mpt_pool_lock and > _megasas_return_cmd the mfi_pool_lock - that is OK. > I commented the fact that you take here the mfi_pool_lock for > megasas_return_cmd_fusion even though it has it's own mpt_pool_lock. Correct... just need to be careful here so will try it in next phase. > > As I already said I accept the code as it is. > > I will tomorrow check if the series is complete. Do you want to repost > any > patches? For this patch will only add comment in code for to-do item. We will send one more Resend patch series to make sure we address all comments for this patch series. > (I lost the overview a bit) and add he formal reviewed-by. Just to avoid confusion will resend whole patch series. In header will mentioned what has been changed from earlier version of the patch. + formal review by tag. > > Tomas > > > the > > We can either remove mpt lock completely and use mfi lock throughout > > to avoid this two level of locking. > > > > We can plan to remove mpt_pool_lock and use only mfi_pool_lock (with > > some rename of this variable) even if driver wants to grab mfi OR mpt > > frame use the single lock for both the pool... we just need to keep > > things working for older controllers as well. > > > > Let me mark this as To-do (probably will add comment in code) and once > > we have any progress we will post add-on patch for this. > > > > Kashyap > > > >> It's is not a bug holding both locks at once, it is just not necessary. > >> If you prefer the code as it is, I can accept it too. > >> > >> tomash > >> > >>> ~ Kashyap > >>> > >>>>> + if (atomic_read(&cmd_mfi->mfi_mpt_pthr) != > MFI_MPT_ATTACHED) > >>>>> + dev_err(&instance->pdev->dev, "Possible bug from %s > %d\n", > >>>>> + __func__, __LINE__); > >>>>> + atomic_set(&cmd_mfi->mfi_mpt_pthr, MFI_MPT_DETACHED); > >>>>> + __megasas_return_cmd(instance, cmd_mfi); > >>>> And the lock could be moved here? > >>> Covered in above inline reply. > >>> > >>>> Thanks, > >>>> Tomas > >>>> > >>>>> + spin_unlock_irqrestore(&instance->mfi_pool_lock, flags); > >>>>> } > >>>>> > >>>>> /** > >>>>> @@ -562,9 +583,11 @@ wait_and_poll(struct megasas_instance > >>>>> *instance, struct megasas_cmd *cmd, { > >>>>> int i; > >>>>> struct megasas_header *frame_hdr = &cmd->frame->hdr; > >>>>> + struct fusion_context *fusion; > >>>>> > >>>>> u32 msecs = seconds * 1000; > >>>>> > >>>>> + fusion = instance->ctrl_context; > >>>>> /* > >>>>> * Wait for cmd_status to change > >>>>> */ > >>>>> @@ -573,8 +596,12 @@ wait_and_poll(struct megasas_instance > *instance, struct megasas_cmd *cmd, > >>>>> msleep(20); > >>>>> } > >>>>> > >>>>> - if (frame_hdr->cmd_status == 0xff) > >>>>> + if (frame_hdr->cmd_status == 0xff) { > >>>>> + if (fusion) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> return -ETIME; > >>>>> + } > >>>>> > >>>>> return 0; > >>>>> } > >>>>> @@ -777,14 +804,17 @@ megasas_get_ld_map_info(struct > megasas_instance *instance) > >>>>> dcmd->sgl.sge32[0].phys_addr = cpu_to_le32(ci_h); > >>>>> dcmd->sgl.sge32[0].length = cpu_to_le32(size_map_info); > >>>>> > >>>>> - if (!megasas_issue_polled(instance, cmd)) > >>>>> - ret = 0; > >>>>> - else { > >>>>> - printk(KERN_ERR "megasas: Get LD Map Info Failed\n"); > >>>>> - ret = -1; > >>>>> - } > >>>>> + if (instance->ctrl_context && !instance->mask_interrupts) > >>>>> + ret = megasas_issue_blocked_cmd(instance, cmd, > >>>>> + MEGASAS_BLOCKED_CMD_TIMEOUT); > >>>>> + else > >>>>> + ret = megasas_issue_polled(instance, cmd); > >>>>> > >>>>> - megasas_return_cmd(instance, cmd); > >>>>> + if (instance->ctrl_context && cmd->mpt_pthr_cmd_blocked) > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd, > >>>>> + cmd->mpt_pthr_cmd_blocked); > >>>>> + else > >>>>> + megasas_return_cmd(instance, cmd); > >>>>> > >>>>> return ret; > >>>>> } > >>>>> @@ -2020,10 +2050,19 @@ complete_cmd_fusion(struct > megasas_instance *instance, u32 MSIxIndex) > >>>>> break; > >>>>> case MEGASAS_MPI2_FUNCTION_PASSTHRU_IO_REQUEST: > /*MFI command */ > >>>>> cmd_mfi = > >>>>> instance->cmd_list[cmd_fusion->sync_cmd_idx]; > >>>>> + > >>>>> + if (!cmd_mfi->mpt_pthr_cmd_blocked) { > >>>>> + if (megasas_dbg_lvl == 5) > >>>>> + dev_info(&instance->pdev->dev, > >>>>> + "freeing mfi/mpt > >>>>> pass-through " > >>>>> + "from %s %d\n", > >>>>> + __func__, __LINE__); > >>>>> + megasas_return_mfi_mpt_pthr(instance, > >>>>> cmd_mfi, > >>>>> + cmd_fusion); > >>>>> + } > >>>>> + > >>>>> megasas_complete_cmd(instance, cmd_mfi, > >>>>> DID_OK); > >>>>> cmd_fusion->flags = 0; > >>>>> - megasas_return_cmd_fusion(instance, > >>>>> cmd_fusion); > >>>>> - > >>>>> break; > >>>>> } > >>>>> > >>>>> @@ -2183,6 +2222,7 @@ build_mpt_mfi_pass_thru(struct > megasas_instance *instance, > >>>>> struct megasas_cmd_fusion *cmd; > >>>>> struct fusion_context *fusion; > >>>>> struct megasas_header *frame_hdr = &mfi_cmd->frame->hdr; > >>>>> + u32 opcode; > >>>>> > >>>>> cmd = megasas_get_cmd_fusion(instance); > >>>>> if (!cmd) > >>>>> @@ -2190,9 +2230,20 @@ build_mpt_mfi_pass_thru(struct > >>>>> megasas_instance *instance, > >>>>> > >>>>> /* Save the smid. To be used for returning the cmd */ > >>>>> mfi_cmd->context.smid = cmd->index; > >>>>> - > >>>>> cmd->sync_cmd_idx = mfi_cmd->index; > >>>>> > >>>>> + /* Set this only for Blocked commands */ > >>>>> + opcode = le32_to_cpu(mfi_cmd->frame->dcmd.opcode); > >>>>> + if ((opcode == MR_DCMD_LD_MAP_GET_INFO) > >>>>> + && (mfi_cmd->frame->dcmd.mbox.b[1] == 1)) > >>>>> + mfi_cmd->is_wait_event = 1; > >>>>> + > >>>>> + if (opcode == MR_DCMD_CTRL_EVENT_WAIT) > >>>>> + mfi_cmd->is_wait_event = 1; > >>>>> + > >>>>> + if (mfi_cmd->is_wait_event) > >>>>> + mfi_cmd->mpt_pthr_cmd_blocked = cmd; > >>>>> + > >>>>> /* > >>>>> * For cmds where the flag is set, store the flag and check > >>>>> * on completion. For cmds with this flag, don't call @@ > >>>>> -2281,6 +2332,7 @@ megasas_issue_dcmd_fusion(struct > megasas_instance *instance, > >>>>> printk(KERN_ERR "Couldn't issue MFI pass thru cmd\n"); > >>>>> return; > >>>>> } > >>>>> + atomic_set(&cmd->mfi_mpt_pthr, MFI_MPT_ATTACHED); > >>>>> instance->instancet->fire_cmd(instance, req_desc->u.low, > >>>>> req_desc->u.high, > >>>>> instance->reg_set); } @@ -2752,10 +2804,7 @@ int > >>>>> megasas_reset_fusion(struct Scsi_Host *shost, int iotimeout) > >>>>> > >>>>> cmd_list[cmd_fusion->sync_cmd_idx]; > >>>>> if > >>>>> (cmd_mfi->frame->dcmd.opcode == > >>>>> > >>>>> cpu_to_le32(MR_DCMD_LD_MAP_GET_INFO)) { > >>>>> - > >>>>> megasas_return_cmd(instance, > >>>>> - > >>>>> cmd_mfi); > >>>>> - > >>>>> megasas_return_cmd_fusion( > >>>>> - instance, > >>>>> cmd_fusion); > >>>>> + > >>>>> + megasas_return_mfi_mpt_pthr(instance, cmd_mfi, cmd_fusion); > >>>>> } else { > >>>>> req_desc = > >>>>> > >>>>> megasas_get_request_descriptor( diff --git > >>>>> a/drivers/scsi/megaraid/megaraid_sas_fusion.h > >>>>> b/drivers/scsi/megaraid/megaraid_sas_fusion.h > >>>>> index a72fa19..9822de2 100644 > >>>>> --- a/drivers/scsi/megaraid/megaraid_sas_fusion.h > >>>>> +++ b/drivers/scsi/megaraid/megaraid_sas_fusion.h > >>>>> @@ -800,7 +800,7 @@ struct fusion_context { > >>>>> struct megasas_cmd_fusion **cmd_list; > >>>>> struct list_head cmd_pool; > >>>>> > >>>>> - spinlock_t cmd_pool_lock; > >>>>> + spinlock_t mpt_pool_lock; > >>>>> > >>>>> dma_addr_t req_frames_desc_phys; > >>>>> u8 *req_frames_desc; > >>> > > > > -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html