On 12/08/2015 01:48 AM, Himanshu Madhani wrote: > From: Quinn Tran <quinn.tran@xxxxxxxxxx> > > During LUN/Target reset, the TMR code attempt to intercept > cmds and try to aborted them. Current code assume cmds are > always intercepted at the back end device. The cleanup code > would issue a "queue_status() & check_stop_free()" to terminate > the command. However, when a cmd is intercepted at the front > end/Fabric layer, current code introduce premature free or > cause Fabric to double free. > > When command is intercepted at Fabric layer, it means a > check_stop_free(cmd_kref--) has been called. The extra > check_stop_free in the Lun Reset cleanup code causes early > free. When a cmd in the Fabric layer is completed, the normal > free code adds another another free which introduce a double free. > > To fix the issue: > - add a new flag/CMD_T_SENT_STATUS to track command that have > made it down to fabric layer after back end good/bad completion. > - if cmd reach Fabric Layer at Lun Reset time, add an extra > cmd_kref count to prevent premature free. > > Signed-off-by: Quinn Tran <quinn.tran@xxxxxxxxxx> > Signed-off-by: Himanshu Madhani <himanshu.madhani@xxxxxxxxxx> > --- > drivers/target/target_core_tmr.c | 33 +++++++++++++++++++++++++++++++- > drivers/target/target_core_transport.c | 30 +++++++++++++++++++++++++++++ > include/target/target_core_base.h | 1 + > 3 files changed, 63 insertions(+), 1 deletions(-) > > diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c > index 28fb301..41f8b57 100644 > --- a/drivers/target/target_core_tmr.c > +++ b/drivers/target/target_core_tmr.c > @@ -243,7 +243,9 @@ static void core_tmr_drain_state_list( > { > LIST_HEAD(drain_task_list); > struct se_cmd *cmd, *next; > - unsigned long flags; > + unsigned long flags, flags2; > + int rmkref; > + struct se_session *se_sess; > > /* > * Complete outstanding commands with TASK_ABORTED SAM status. > @@ -282,6 +284,16 @@ static void core_tmr_drain_state_list( > if (prout_cmd == cmd) > continue; > > + se_sess = cmd->se_sess; > + /* take an extra kref to prevent cmd free race condition. */ > + spin_lock_irqsave(&se_sess->sess_cmd_lock, flags2); > + if (!kref_get_unless_zero(&cmd->cmd_kref)) { > + /* cmd is already in the free process */ > + spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags2); > + continue; > + } > + spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags2); > + > list_move_tail(&cmd->state_list, &drain_task_list); > cmd->state_active = false; > } > @@ -320,9 +332,28 @@ static void core_tmr_drain_state_list( > target_stop_cmd(cmd, &flags); > > cmd->transport_state |= CMD_T_ABORTED; > + > + /* CMD_T_SENT_STATUS: cmd is down in fabric layer. > + * A check stop has been called. Keep the extra kref > + * from above because core_tmr_handle_tas_abort will > + * generate another check_stop. > + * > + * !CMD_T_SENT_STATUS: cmd intercepted at back end. > + * Remove the extra kref from above because only > + * 1 check_stop is required or generated by > + * core_tmr_handle_tas_abort() > + */ > + rmkref = 0; > + if (!((cmd->t_state == TRANSPORT_COMPLETE) && > + (cmd->transport_state & CMD_T_SENT_STATUS))) > + rmkref = 1; > + > spin_unlock_irqrestore(&cmd->t_state_lock, flags); > > core_tmr_handle_tas_abort(tmr_nacl, cmd, tas); > + > + if (rmkref) > + target_put_sess_cmd(cmd); > } > } > > diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c > index 4fdcee2..cdd18bf 100644 > --- a/drivers/target/target_core_transport.c > +++ b/drivers/target/target_core_transport.c > @@ -639,9 +639,14 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) > static void target_complete_failure_work(struct work_struct *work) > { > struct se_cmd *cmd = container_of(work, struct se_cmd, work); > + unsigned long flags; > > transport_generic_request_failure(cmd, > TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE); > + > + spin_lock_irqsave(&cmd->t_state_lock, flags); > + cmd->transport_state |= CMD_T_SENT_STATUS; > + spin_unlock_irqrestore(&cmd->t_state_lock, flags); > } > > /* > @@ -1659,6 +1664,7 @@ void transport_generic_request_failure(struct se_cmd *cmd, > sense_reason_t sense_reason) > { > int ret = 0, post_ret = 0; > + unsigned long flags; > > pr_debug("-----[ Storage Engine Exception for cmd: %p ITT: 0x%08llx" > " CDB: 0x%02x\n", cmd, cmd->tag, cmd->t_task_cdb[0]); > @@ -1670,6 +1676,10 @@ void transport_generic_request_failure(struct se_cmd *cmd, > (cmd->transport_state & CMD_T_STOP) != 0, > (cmd->transport_state & CMD_T_SENT) != 0); > > + spin_lock_irqsave(&cmd->t_state_lock, flags); > + cmd->transport_state |= CMD_T_SENT_STATUS; > + spin_unlock_irqrestore(&cmd->t_state_lock, flags); > + > /* > * For SAM Task Attribute emulation for failed struct se_cmd > */ > @@ -1951,6 +1961,7 @@ static void transport_complete_task_attr(struct se_cmd *cmd) > static void transport_complete_qf(struct se_cmd *cmd) > { > int ret = 0; > + unsigned long flags; > > transport_complete_task_attr(cmd); > > @@ -1986,6 +1997,10 @@ out: > } > transport_lun_remove_cmd(cmd); > transport_cmd_check_stop_to_fabric(cmd); > + > + spin_lock_irqsave(&cmd->t_state_lock, flags); > + cmd->transport_state |= CMD_T_SENT_STATUS; > + spin_unlock_irqrestore(&cmd->t_state_lock, flags); > } > > static void transport_handle_queue_full( > @@ -2032,6 +2047,7 @@ static void target_complete_ok_work(struct work_struct *work) > { > struct se_cmd *cmd = container_of(work, struct se_cmd, work); > int ret; > + unsigned long flags; > > /* > * Check if we need to move delayed/dormant tasks from cmds on the > @@ -2060,6 +2076,10 @@ static void target_complete_ok_work(struct work_struct *work) > > transport_lun_remove_cmd(cmd); > transport_cmd_check_stop_to_fabric(cmd); > + > + spin_lock_irqsave(&cmd->t_state_lock, flags); > + cmd->transport_state |= CMD_T_SENT_STATUS; > + spin_unlock_irqrestore(&cmd->t_state_lock, flags); > return; > } > /* > @@ -2086,6 +2106,11 @@ static void target_complete_ok_work(struct work_struct *work) > > transport_lun_remove_cmd(cmd); > transport_cmd_check_stop_to_fabric(cmd); > + > + spin_lock_irqsave(&cmd->t_state_lock, flags); > + cmd->transport_state |= CMD_T_SENT_STATUS; > + spin_unlock_irqrestore(&cmd->t_state_lock, flags); > + > return; > } > } > @@ -2136,6 +2161,7 @@ queue_rsp: > ret = cmd->se_tfo->queue_status(cmd); > if (ret == -EAGAIN || ret == -ENOMEM) > goto queue_full; > + > break; > default: > break; > @@ -2143,6 +2169,10 @@ queue_rsp: > > transport_lun_remove_cmd(cmd); > transport_cmd_check_stop_to_fabric(cmd); > + > + spin_lock_irqsave(&cmd->t_state_lock, flags); > + cmd->transport_state |= CMD_T_SENT_STATUS; > + spin_unlock_irqrestore(&cmd->t_state_lock, flags); > return; > > queue_full: > diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h > index aabf0ac..efccd71 100644 > --- a/include/target/target_core_base.h > +++ b/include/target/target_core_base.h > @@ -490,6 +490,7 @@ struct se_cmd { > #define CMD_T_DEV_ACTIVE (1 << 7) > #define CMD_T_REQUEST_STOP (1 << 8) > #define CMD_T_BUSY (1 << 9) > +#define CMD_T_SENT_STATUS (1 << 10) > spinlock_t t_state_lock; > struct kref cmd_kref; > struct completion t_transport_stop_comp; > Same here: using bitops would save you taking the spinlock when modifying flags. Cheers, Hannes -- Dr. Hannes Reinecke zSeries & Storage hare@xxxxxxx +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) -- To unsubscribe from this list: send the line "unsubscribe linux-scsi" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html