Dear FUJITA-san, Forget to mention, signalfd doesn't work in my platform. pthread notification is used by default. 2011/7/19 Kiefer Chang <zapchang@xxxxxxxxx>: > Hi FUJITA-san, > > tgtd is blocked in the while loop after dealing with first abort_task_cmd: > > Jul 19 10:19:56 node tgtd: iscsi_tcp_release(398) conn=0x168765c8 fd=337 closed > Jul 19 10:19:57 node tgtd: iscsi_tcp_event_handler(167) connection > closed 0x1687a8b8 > Jul 19 10:19:57 node tgtd: conn_close(101) connection closed, 0x1687a8b8 1 > Jul 19 10:19:57 node tgtd: iscsi_tcp_release(398) conn=0x1687a8b8 fd=338 closed > Jul 19 10:24:17 node tgtd: iscsi_tm_execute(1407) 1 > Jul 19 10:24:17 node tgtd: abort_task_set(1153) found f0000048 0 > Jul 19 10:24:17 node tgtd: abort_cmd(1126) found f0000048 6 > Jul 19 10:24:32 node tgtd: iscsi_tcp_event_handler(167) connection > closed 0x166af268 > Jul 19 10:24:32 node tgtd: conn_close(101) connection closed, 0x166af268 3 > Jul 19 10:24:32 node tgtd: conn_close(107) sesson 0x166d9580 1 > Jul 19 10:24:32 node tgtd: conn_close(177) release task 0x1730aad0 0 flag 2, 3 > Jul 19 10:24:32 node tgtd: conn_close(177) release task 0x171ff570 > f0000048 flag 2, 3 > Jul 19 10:24:32 node tgtd: conn_close(184) 0x166af268 3 > Jul 19 10:24:32 node tgtd: conn_close(190) 0x166af268 3 > Jul 19 10:24:32 node tgtd: bs_thread_request_done(132) wrong wakeup > Jul 19 10:24:33 node tgtd: conn_close(190) 0x166af268 3 > Jul 19 10:24:35 node last message repeated 4 times > Jul 19 10:24:35 node tgtd: iscsi_scsi_cmd_done(1217) finish a task on > a closed conn, 0x171ff570 3 > Jul 19 10:24:35 node tgtd: tgt_event_modify(213) Cannot find event 246 > Jul 19 10:24:35 node tgtd: iscsi_event_modify(448) tgt_event_modify failed > Jul 19 10:24:35 node tgtd: conn_close(190) 0x166af268 2 > Jul 19 10:24:35 node tgtd: bs_thread_request_done(132) wrong wakeup > Jul 19 10:24:36 node tgtd: conn_close(190) 0x166af268 2 > Jul 19 10:24:36 node tgtd: bs_thread_request_done(132) wrong wakeup > Jul 19 10:24:36 node tgtd: conn_close(190) 0x166af268 2 > Jul 19 10:24:36 node tgtd: bs_thread_request_done(132) wrong wakeup > Jul 19 10:24:37 node tgtd: conn_close(190) 0x166af268 2 > Jul 19 10:24:37 node tgtd: bs_thread_request_done(132) wrong wakeup > Jul 19 10:24:37 node tgtd: conn_close(190) 0x166af268 2 > Jul 19 10:24:37 node tgtd: bs_thread_request_done(132) wrong wakeup > > Full log can be downloaded from: > http://dl.dropbox.com/u/8354750/tgtd/20110719/messages.gz > > > Thanks. > > > 2011/7/19 FUJITA Tomonori <fujita.tomonori@xxxxxxxxxxxxx>: >> On Mon, 18 Jul 2011 20:26:07 +0800 >> Kiefer Chang <zapchang@xxxxxxxxx> wrote: >> >>> I applied the patch. The symptom still exists. >>> I add 2 print statements in the patch. I can't find it in the log >>> after tgtd segfault. >>> Please download the log if you need: >>> http://dl.dropbox.com/u/8354750/tgtd/20110718/messages.gz >> >> Thanks for the testing. I'll investigate this cause but meanwhile can >> you try the following patch? >> >> diff --git a/usr/bs.c b/usr/bs.c >> index d72d090..8f56aee 100644 >> --- a/usr/bs.c >> +++ b/usr/bs.c >> @@ -122,7 +122,7 @@ out: >> pthread_exit(NULL); >> } >> >> -static void bs_thread_request_done(int fd, int events, void *data) >> +void bs_thread_request_done(int fd, int events, void *data) >> { >> struct scsi_cmd *cmd; >> int nr_events, ret; >> @@ -230,6 +230,8 @@ static int bs_init_signalfd(void) >> sigset_t mask; >> int ret; >> >> + return 1; >> + >> pthread_mutex_init(&finished_lock, NULL); >> >> sigemptyset(&mask); >> @@ -270,6 +272,7 @@ static int bs_init_notify_thread(void) >> goto close_command_fd; >> } >> >> + set_non_blocking(done_fd[0]); >> ret = tgt_event_add(done_fd[0], EPOLLIN, bs_thread_request_done, NULL); >> if (ret) { >> eprintf("failed to add epoll event\n"); >> diff --git a/usr/bs_thread.h b/usr/bs_thread.h >> index d460032..14456a7 100644 >> --- a/usr/bs_thread.h >> +++ b/usr/bs_thread.h >> @@ -29,3 +29,4 @@ extern int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn, >> extern void bs_thread_close(struct bs_thread_info *info); >> extern int bs_thread_cmd_submit(struct scsi_cmd *cmd); >> >> +extern void bs_wait_one_completion(void); >> diff --git a/usr/iscsi/conn.c b/usr/iscsi/conn.c >> index 53e719e..928a8a1 100644 >> --- a/usr/iscsi/conn.c >> +++ b/usr/iscsi/conn.c >> @@ -115,14 +115,14 @@ void conn_close(struct iscsi_connection *conn) >> if (task->conn != conn) >> continue; >> >> - eprintf("Forcing release of pending task %p %" PRIx64 "\n", >> - task, task->tag); >> + eprintf("Forcing release of pending task %p %" PRIx64 " %u\n", >> + task, task->tag, conn->refcount); >> list_del(&task->c_list); >> iscsi_free_task(task); >> } >> >> if (conn->tx_task) { >> - dprintf("Add current tx task to the tx list for removal " >> + eprintf("Add current tx task to the tx list for removal " >> "%p %" PRIx64 "\n", >> conn->tx_task, conn->tx_task->tag); >> list_add(&conn->tx_task->c_list, &conn->tx_clist); >> @@ -134,8 +134,8 @@ void conn_close(struct iscsi_connection *conn) >> >> op = task->req.opcode & ISCSI_OPCODE_MASK; >> >> - eprintf("Forcing release of tx task %p %" PRIx64 " %x\n", >> - task, task->tag, op); >> + eprintf("Forcing release of tx task %p %" PRIx64 " %x %u\n", >> + task, task->tag, op, conn->refcount); >> switch (op) { >> case ISCSI_OP_SCSI_CMD: >> /* >> @@ -155,14 +155,14 @@ void conn_close(struct iscsi_connection *conn) >> iscsi_free_task(task); >> break; >> default: >> - eprintf("%x\n", op); >> + eprintf("unknow op %x\n", op); >> break; >> } >> } >> >> if (conn->rx_task) { >> - eprintf("Forcing release of rx task %p %" PRIx64 "\n", >> - conn->rx_task, conn->rx_task->tag); >> + eprintf("Forcing release of rx task %p %" PRIx64 " %u\n", >> + conn->rx_task, conn->rx_task->tag, conn->refcount); >> iscsi_free_task(conn->rx_task); >> } >> conn->rx_task = NULL; >> @@ -173,10 +173,24 @@ void conn_close(struct iscsi_connection *conn) >> * This task is in SCSI. We need to wait for I/O >> * completion. >> */ >> + eprintf("release task %p %" PRIx64 " flag %lx, %u\n", >> + task, task->tag, task->flags, conn->refcount); >> + >> if (task_in_scsi(task)) >> continue; >> iscsi_free_task(task); >> } >> + >> + eprintf("%p %u\n", conn, conn->refcount); >> + >> + while (conn->refcount != 1) { >> + struct timeval t; >> + t.tv_sec = 1; >> + t.tv_usec = 0; >> + eprintf("%p %u\n", conn, conn->refcount); >> + bs_thread_request_done(0, 0, NULL); >> + select(0, NULL, NULL, NULL, &t); >> + } >> done: >> conn_put(conn); >> } >> diff --git a/usr/iscsi/iscsi_tcp.c b/usr/iscsi/iscsi_tcp.c >> index 1988b2f..c61faa1 100644 >> --- a/usr/iscsi/iscsi_tcp.c >> +++ b/usr/iscsi/iscsi_tcp.c >> @@ -164,7 +164,7 @@ static void iscsi_tcp_event_handler(int fd, int events, void *data) >> iscsi_tx_handler(conn); >> >> if (conn->state == STATE_CLOSE) { >> - dprintf("connection closed %p\n", conn); >> + eprintf("connection closed %p\n", conn); >> conn_close(conn); >> } >> } >> diff --git a/usr/iscsi/iscsid.c b/usr/iscsi/iscsid.c >> index 22a21cc..59f88c4 100644 >> --- a/usr/iscsi/iscsid.c >> +++ b/usr/iscsi/iscsid.c >> @@ -1209,6 +1209,12 @@ static int iscsi_scsi_cmd_done(uint64_t nid, int result, struct scsi_cmd *scmd) >> struct iscsi_task *task = ITASK(scmd); >> uint32_t read_len = scsi_get_in_length(scmd); >> >> + if (result == TASK_ABORTED) { >> + list_del(&task->c_hlist); >> + iscsi_free_task(task); >> + return 0; >> + } >> + >> /* >> * Since the connection is closed we just free the task. >> * We could delay the closing of the conn in some cases and send >> @@ -1216,6 +1222,8 @@ static int iscsi_scsi_cmd_done(uint64_t nid, int result, struct scsi_cmd *scmd) >> * task got reassinged to another connection. >> */ >> if (task->conn->state == STATE_CLOSE) { >> + eprintf("finish a task on a closed conn, %p %u\n", >> + task, task->conn->refcount); >> iscsi_free_cmd_task(task); >> return 0; >> } >> @@ -1405,6 +1413,8 @@ static int iscsi_tm_execute(struct iscsi_task *task) >> struct iscsi_tm *req = (struct iscsi_tm *) &task->req; >> int fn = 0, err = 0; >> >> + eprintf("%x\n", req->flags & ISCSI_FLAG_TM_FUNC_MASK); >> + >> switch (req->flags & ISCSI_FLAG_TM_FUNC_MASK) { >> case ISCSI_TM_FUNC_ABORT_TASK: >> fn = ABORT_TASK; >> @@ -1412,15 +1422,11 @@ static int iscsi_tm_execute(struct iscsi_task *task) >> case ISCSI_TM_FUNC_ABORT_TASK_SET: >> fn = ABORT_TASK_SET; >> break; >> - case ISCSI_TM_FUNC_CLEAR_ACA: >> - fn = CLEAR_TASK_SET; >> - break; >> - case ISCSI_TM_FUNC_CLEAR_TASK_SET: >> - fn = CLEAR_ACA; >> - break; >> case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: >> fn = LOGICAL_UNIT_RESET; >> break; >> + case ISCSI_TM_FUNC_CLEAR_ACA: >> + case ISCSI_TM_FUNC_CLEAR_TASK_SET: >> case ISCSI_TM_FUNC_TARGET_WARM_RESET: >> case ISCSI_TM_FUNC_TARGET_COLD_RESET: >> case ISCSI_TM_FUNC_TASK_REASSIGN: >> diff --git a/usr/target.c b/usr/target.c >> index 5f04cf9..3e3d28b 100644 >> --- a/usr/target.c >> +++ b/usr/target.c >> @@ -1127,7 +1127,10 @@ static int abort_cmd(struct target* target, struct mgmt_req *mreq, >> cmd->mreq = mreq; >> err = -EBUSY; >> } else { >> - cmd->dev->cmd_done(target, cmd); >> + eprintf("found %" PRIx64 " %lx\n", cmd->tag, cmd->state); >> + cmd_hlist_remove(cmd); >> + list_del(&cmd->qlist); >> + >> target_cmd_io_done(cmd, TASK_ABORTED); >> } >> return err; >> > -- To unsubscribe from this list: send the line "unsubscribe stgt" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html