On Fri, 2017-06-02 at 16:47 +0000, Bart Van Assche wrote: > On Thu, 2017-06-01 at 20:53 -0700, Nicholas A. Bellinger wrote: > > On Thu, 2017-06-01 at 15:35 +0000, bugzilla-daemon@xxxxxxxxxxxxxxxxxxx > > wrote: > > > https://bugzilla.kernel.org/show_bug.cgi?id=195963 > > > [ ... ] > > > Call Trace: > > > refcount_dec_and_test+0x11/0x20 > > > target_put_sess_cmd+0x14/0x30 [target_core_mod] > > > core_tmr_abort_task+0x140/0x1b0 [target_core_mod] > > > target_tmr_work+0x116/0x130 [target_core_mod] > > > process_one_work+0x1ca/0x3f0 > > > worker_thread+0x49/0x3b0 > > > kthread+0x109/0x140 > > > ret_from_fork+0x2a/0x40 > > > > Well, I'm not able to reproduce on target-pending/master with > > iscsi-test-cu --test=ALL --dataloss, or with the debug code to force > > ABORT_TASK + session shutdown to occur. > > > > I assume that MNC wasn't able to reproduce either on > > target-pending/master either, as he's been testing the same code-path to > > verify: > > > > commit 25cdda95fda78d22d44157da15aa7ea34be3c804 > > Author: Nicholas Bellinger <nab@xxxxxxxxxxxxxxx> > > Date: Wed May 24 21:47:09 2017 -0700 > > > > iscsi-target: Fix initial login PDU asynchronous socket close OOPs > > > > So are you sure you're not running with more of your out-of-tree code..? > > > > If not, what are the steps to reproduce..? > > Hello Nic, > > Yes, I'm sure that my pending patches were not responsible for triggering > this. After I encountered this warning for the first time I switched to the > git branch with Linus' tree, double checked there were no local modifications, > ran git clean -f -d -x, rebuilt the kernel, rebooted and repeated my test. The > command I ran to trigger this issue is as follows: > > for ((i=1;i<=2;i++)); do time ./iscsi-test-cu --dataloss --allow-sanitize iscsi://127.0.0.1/tgt1/$i iscsi://127.0.0.1/tgt2/$i; done > > libiscsi was granted access to both tgt1 and tgt2. LUN 1 uses the FILEIO > backend while LUN 2 uses the IBLOCK backend driver. > Great. I'm was able to reproduce and currently testing the following to avoid extra kref_put() when transport_cmd_finish_abort() has a se_cmd->cmd_ref reach zero outside of the parent caller. Since pre refcount_t kref code would only invoke a final kref_put() callback when zero was reached (and not warn for negative) this has not been reported before, and given se_cmd is pre-allocated memory it's not been a issue in practice. None the less, it's good refcount_sub_and_test() caught this. Care to verify on your end..? diff --git a/drivers/target/target_core_internal.h b/drivers/target/target_core_internal.h index 9ab7090..0912de7 100644 --- a/drivers/target/target_core_internal.h +++ b/drivers/target/target_core_internal.h @@ -136,7 +136,7 @@ struct se_node_acl *core_tpg_add_initiator_node_acl(struct se_portal_group *tpg, void release_se_kmem_caches(void); u32 scsi_get_new_index(scsi_index_t); void transport_subsystem_check_init(void); -void transport_cmd_finish_abort(struct se_cmd *, int); +int transport_cmd_finish_abort(struct se_cmd *, int); unsigned char *transport_dump_cmd_direction(struct se_cmd *); void transport_dump_dev_state(struct se_device *, char *, int *); void transport_dump_dev_info(struct se_device *, struct se_lun *, diff --git a/drivers/target/target_core_tmr.c b/drivers/target/target_core_tmr.c index dce1e1b..cf18cd1 100644 --- a/drivers/target/target_core_tmr.c +++ b/drivers/target/target_core_tmr.c @@ -75,7 +75,7 @@ void core_tmr_release_req(struct se_tmr_req *tmr) kfree(tmr); } -static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) +static int core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) { unsigned long flags; bool remove = true, send_tas; @@ -91,7 +91,7 @@ static void core_tmr_handle_tas_abort(struct se_cmd *cmd, int tas) transport_send_task_abort(cmd); } - transport_cmd_finish_abort(cmd, remove); + return transport_cmd_finish_abort(cmd, remove); } static int target_check_cdb_and_preempt(struct list_head *list, @@ -182,10 +182,11 @@ void core_tmr_abort_task( spin_unlock_irqrestore(&se_sess->sess_cmd_lock, flags); cancel_work_sync(&se_cmd->work); + transport_wait_for_tasks(se_cmd); - transport_cmd_finish_abort(se_cmd, true); - target_put_sess_cmd(se_cmd); + if (!transport_cmd_finish_abort(se_cmd, true)) + target_put_sess_cmd(se_cmd); printk("ABORT_TASK: Sending TMR_FUNCTION_COMPLETE for" " ref_tag: %llu\n", ref_tag); @@ -281,8 +282,8 @@ static void core_tmr_drain_tmr_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - transport_cmd_finish_abort(cmd, 1); - target_put_sess_cmd(cmd); + if (!transport_cmd_finish_abort(cmd, 1)) + target_put_sess_cmd(cmd); } } @@ -380,8 +381,8 @@ static void core_tmr_drain_state_list( cancel_work_sync(&cmd->work); transport_wait_for_tasks(cmd); - core_tmr_handle_tas_abort(cmd, tas); - target_put_sess_cmd(cmd); + if (!core_tmr_handle_tas_abort(cmd, tas)) + target_put_sess_cmd(cmd); } } diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index f16a789..3ddb1d2 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -651,9 +651,10 @@ static void transport_lun_remove_cmd(struct se_cmd *cmd) percpu_ref_put(&lun->lun_ref); } -void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) +int transport_cmd_finish_abort(struct se_cmd *cmd, int remove) { bool ack_kref = (cmd->se_cmd_flags & SCF_ACK_KREF); + int ret = 0; if (cmd->se_cmd_flags & SCF_SE_LUN_CMD) transport_lun_remove_cmd(cmd); @@ -665,9 +666,11 @@ void transport_cmd_finish_abort(struct se_cmd *cmd, int remove) cmd->se_tfo->aborted_task(cmd); if (transport_cmd_check_stop_to_fabric(cmd)) - return; + return 1; if (remove && ack_kref) - transport_put_cmd(cmd); + ret = transport_put_cmd(cmd); + + return ret; } static void target_complete_failure_work(struct work_struct *work) -- To unsubscribe from this list: send the line "unsubscribe target-devel" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html