On Thu, Mar 09, 2023 at 04:33:07PM -0600, Mike Christie wrote: > «Внимание! Данное письмо от внешнего адресата!» > > This fixes a bug where an initiator thinks a LUN_RESET has cleaned > up running commands when it hasn't. The bug was added in: > > commit 51ec502a3266 ("target: Delete tmr from list before processing") > > The problem occurs when: > > 1. We have N IO cmds running in the target layer spread over 2 sessions. > 2. The initiator sends a LUN_RESET for each session. > 3. session1's LUN_RESET loops over all the running commands from both > sessions and moves them to its local drain_task_list. > 4. session2's LUN_RESET does not see the LUN_RESET from session1 because > the commit above has it remove itself. session2 also does not see any > commands since the other reset moved them off the state lists. > 5. sessions2's LUN_RESET will then complete with a successful response. > 6. sessions2's inititor believes the running commands on its session are > now cleaned up due to the successful response and cleans up the running > commands from its side. It then restarts them. > 7. The commands do eventually complete on the backend and the target > starts to return aborted task statuses for them. The initiator will > either throw a invalid ITT error or might accidentally lookup a new task > if the ITT has been reallocated already. > > This fixes the bug by reverting the patch, and also serializes the > execution of LUN_RESETs and Preempt and Aborts. The latter is necessary > because it turns out the commit accidentally fixed a bug where if there > are 2 LUN RESETs executing they can see each other on the dev_tmr_list, > put the other one on their local drain list, then end up waiting on each > other resulting in a deadlock. If LUN_RESET is not in TMR list anymore there is no need to serialize core_tmr_drain_tmr_list. > > Fixes: 51ec502a3266 ("target: Delete tmr from list before processing") > Signed-off-by: Mike Christie <michael.christie@xxxxxxxxxx> > --- > drivers/target/target_core_device.c | 15 ++++++-- > drivers/target/target_core_tmr.c | 15 ++++---- > drivers/target/target_core_transport.c | 50 ++++++++++++++++++++++++-- > include/target/target_core_base.h | 5 ++- > 4 files changed, 74 insertions(+), 11 deletions(-) > > diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c > index f6e58410ec3f..c9f75ed1566b 100644 > --- a/drivers/target/target_core_device.c > +++ b/drivers/target/target_core_device.c > @@ -179,7 +179,16 @@ int transport_lookup_tmr_lun(struct se_cmd *se_cmd) > se_tmr->tmr_dev = rcu_dereference_raw(se_lun->lun_se_dev); > > spin_lock_irqsave(&se_tmr->tmr_dev->se_tmr_lock, flags); > - list_add_tail(&se_tmr->tmr_list, &se_tmr->tmr_dev->dev_tmr_list); > + switch (se_tmr->function) { > + case TMR_ABORT_TASK: > + list_add_tail(&se_tmr->tmr_list, > + &se_tmr->tmr_dev->generic_tmr_list); > + break; > + case TMR_LUN_RESET: > + list_add_tail(&se_tmr->tmr_list, > + &se_tmr->tmr_dev->lun_reset_tmr_list); > + break; > + } > spin_unlock_irqrestore(&se_tmr->tmr_dev->se_tmr_lock, flags); > > return 0; > @@ -761,7 +770,8 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) > dev->hba_index = hba->hba_index; > > INIT_LIST_HEAD(&dev->dev_sep_list); > - INIT_LIST_HEAD(&dev->dev_tmr_list); > + INIT_LIST_HEAD(&dev->generic_tmr_list); > + INIT_LIST_HEAD(&dev->lun_reset_tmr_list); > INIT_LIST_HEAD(&dev->delayed_cmd_list); > INIT_LIST_HEAD(&dev->qf_cmd_list); > spin_lock_init(&dev->delayed_cmd_lock); > @@ -782,6 +792,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) > spin_lock_init(&dev->t10_alua.lba_map_lock); > > INIT_WORK(&dev->delayed_cmd_work, target_do_delayed_work); > + mutex_init(&dev->lun_reset_mutex); > > dev->t10_wwn.t10_dev = dev; > /* > diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c > index 2b95b4550a63..88d2a7839876 100644 > --- a/drivers/target/target_core_tmr.c > +++ b/drivers/target/target_core_tmr.c > @@ -184,13 +184,11 @@ static void core_tmr_drain_tmr_list( > unsigned long flags; > bool rc; > /* > - * Release all pending and outgoing TMRs aside from the received > - * LUN_RESET tmr.. > + * Release all pending and outgoing TMRs except for LUN_RESETS. > */ > spin_lock_irqsave(&dev->se_tmr_lock, flags); > - if (tmr) > - list_del_init(&tmr->tmr_list); > - list_for_each_entry_safe(tmr_p, tmr_pp, &dev->dev_tmr_list, tmr_list) { > + list_for_each_entry_safe(tmr_p, tmr_pp, &dev->generic_tmr_list, > + tmr_list) { > cmd = tmr_p->task_cmd; > if (!cmd) { > pr_err("Unable to locate struct se_cmd for TMR\n"); > @@ -379,14 +377,19 @@ int core_tmr_lun_reset( > tmr_nacl->initiatorname); > } > } > + > + /* Serialize LUN RESET TMRs and preempt and aborts */ > + mutex_lock(&dev->lun_reset_mutex); > + > pr_debug("LUN_RESET: %s starting for [%s], tas: %d\n", > (preempt_and_abort_list) ? "Preempt" : "TMR", > dev->transport->name, tas); > - > core_tmr_drain_tmr_list(dev, tmr, preempt_and_abort_list); > core_tmr_drain_state_list(dev, prout_cmd, tmr_sess, tas, > preempt_and_abort_list); > > + mutex_unlock(&dev->lun_reset_mutex); > + > /* > * Clear any legacy SPC-2 reservation when called during > * LOGICAL UNIT RESET > diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c > index 1c23079a5d7f..3c732b1b5389 100644 > --- a/drivers/target/target_core_transport.c > +++ b/drivers/target/target_core_transport.c > @@ -3574,6 +3574,7 @@ static void target_tmr_work(struct work_struct *work) > struct se_cmd *cmd = container_of(work, struct se_cmd, work); > struct se_device *dev = cmd->se_dev; > struct se_tmr_req *tmr = cmd->se_tmr_req; > + bool sched_reset = false; > int ret; > > if (cmd->transport_state & CMD_T_ABORTED) > @@ -3596,6 +3597,22 @@ static void target_tmr_work(struct work_struct *work) > target_dev_ua_allocate(dev, 0x29, > ASCQ_29H_BUS_DEVICE_RESET_FUNCTION_OCCURRED); > } > + > + /* > + * If this is the last reset the device can be freed after we > + * run transport_cmd_check_stop_to_fabric. Figure out if there > + * are other resets that need to be scheduled while we know we > + * have a refcount on the device. > + */ > + spin_lock_irq(&dev->se_tmr_lock); tmr->tmr_list is removed from the list in the very end of se_cmd lifecycle so any number of LUN_RESETs can be in lun_reset_tmr_list. And all of them can be finished but not yet removed from the list. You may delete lun_reset here with nulling tmr->tmr_dev: + list_del_init(&cmd->se_tmr_req->tmr_list); + cmd->se_tmr_req->tmr_dev = NULL; Then the check below will be just + if (!list_empty(dev->lun_reset_tmr_list)) > + if (list_first_entry(&dev->lun_reset_tmr_list, > + struct se_tmr_req, tmr_list) != > + list_last_entry(&dev->lun_reset_tmr_list, > + struct se_tmr_req, tmr_list)) > + sched_reset = true; > + else > + dev->dev_flags &= ~DF_RESETTING_LUN; > + spin_unlock_irq(&dev->se_tmr_lock); > break; > case TMR_TARGET_WARM_RESET: > tmr->response = TMR_FUNCTION_REJECTED; > @@ -3617,15 +3634,26 @@ static void target_tmr_work(struct work_struct *work) > > transport_lun_remove_cmd(cmd); > transport_cmd_check_stop_to_fabric(cmd); > + > + if (!sched_reset) > + return; > + > + spin_lock_irq(&dev->se_tmr_lock); > + tmr = list_first_entry(&dev->lun_reset_tmr_list, struct se_tmr_req, > + tmr_list); And this list_first_entry will return the next LUN_RESET as you expected. > + spin_unlock_irq(&dev->se_tmr_lock); > + > + INIT_WORK(&tmr->task_cmd->work, target_tmr_work); > + schedule_work(&tmr->task_cmd->work); > return; > > aborted: > target_handle_abort(cmd); > } > > -int transport_generic_handle_tmr( > - struct se_cmd *cmd) > +int transport_generic_handle_tmr(struct se_cmd *cmd) > { > + struct se_device *dev = cmd->se_dev; > unsigned long flags; > bool aborted = false; > > @@ -3646,8 +3674,26 @@ int transport_generic_handle_tmr( > return 0; > } > > + spin_lock_irqsave(&dev->se_tmr_lock, flags); > + if (cmd->se_tmr_req->function == TMR_LUN_RESET) { > + /* > + * We only allow one reset to execute at a time to prevent > + * one reset waiting on another, and to make sure one reset > + * does not claim all the cmds causing the other reset to > + * return early. > + */ > + if (dev->dev_flags & DF_RESETTING_LUN) { > + spin_unlock_irqrestore(&dev->se_tmr_lock, flags); > + goto done; > + } > + > + dev->dev_flags |= DF_RESETTING_LUN; Not good choise of flag variable. It is used at configuration time and not under a lock. Configfs file dev/alias can be changed in any time and could race with LUN_RESET. > + } > + spin_unlock_irqrestore(&dev->se_tmr_lock, flags); > + > INIT_WORK(&cmd->work, target_tmr_work); > schedule_work(&cmd->work); > +done: > return 0; > } > EXPORT_SYMBOL(transport_generic_handle_tmr); > diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h > index bd299790e99c..0a5b51f8e5e8 100644 > --- a/include/target/target_core_base.h > +++ b/include/target/target_core_base.h > @@ -804,6 +804,7 @@ struct se_device { > #define DF_USING_UDEV_PATH 0x00000008 > #define DF_USING_ALIAS 0x00000010 > #define DF_READ_ONLY 0x00000020 > +#define DF_RESETTING_LUN 0x00000040 > u8 transport_flags; > /* Physical device queue depth */ > u32 queue_depth; > @@ -840,7 +841,8 @@ struct se_device { > /* Used for SPC-3 Persistent Reservations */ > struct t10_pr_registration *dev_pr_res_holder; > struct list_head dev_sep_list; > - struct list_head dev_tmr_list; > + struct list_head generic_tmr_list; > + struct list_head lun_reset_tmr_list; > struct work_struct qf_work_queue; > struct work_struct delayed_cmd_work; > struct list_head delayed_cmd_list; > @@ -872,6 +874,7 @@ struct se_device { > struct rcu_head rcu_head; > int queue_cnt; > struct se_device_queue *queues; > + struct mutex lun_reset_mutex; > }; > > struct target_opcode_descriptor { > -- > 2.31.1 > >