Hi fujita-san, Here is the result, unfortunately neither the core dump or directly attaching gdb to tgtd can't show right code trace. system log: http://dl.dropbox.com/u/8354750/tgtd/20110720/messages.gz backtrace (tgtd built with make DEBUG=1): Core was generated by `./tgtd'. Program terminated with signal 11, Segmentation fault. #0 0x0000000000000000 in ?? () (gdb) bt #0 0x0000000000000000 in ?? () #1 0x00000000004177d1 in event_loop () at tgtd.c:400 #2 0x0000000000417e82 in main (argc=1, argv=0x7fff192e2bc8) at tgtd.c:574 By the way, if I use O_DIRECT flag on all lun's backing store, I found tgtd won't segfault during the same I/O test (1 day). I can't see any abort task command in the log. I can't understand why...Shouldn't lower performance cause command timeout much easily? 2011/7/20 FUJITA Tomonori <fujita.tomonori@xxxxxxxxxxxxx>: > On Tue, 19 Jul 2011 14:57:45 +0800 > Kiefer Chang <zapchang@xxxxxxxxx> wrote: > >> Forget to mention, signalfd doesn't work in my platform. >> pthread notification is used by default. > > It should be fine. However, after reading your log, I found another > bug. > > Can you try the following patch (against the latest git)? > > Thanks a lot for the testing. > > diff --git a/usr/bs.c b/usr/bs.c > index d72d090..8f56aee 100644 > --- a/usr/bs.c > +++ b/usr/bs.c > @@ -122,7 +122,7 @@ out: > pthread_exit(NULL); > } > > -static void bs_thread_request_done(int fd, int events, void *data) > +void bs_thread_request_done(int fd, int events, void *data) > { > struct scsi_cmd *cmd; > int nr_events, ret; > @@ -230,6 +230,8 @@ static int bs_init_signalfd(void) > sigset_t mask; > int ret; > > + return 1; > + > pthread_mutex_init(&finished_lock, NULL); > > sigemptyset(&mask); > @@ -270,6 +272,7 @@ static int bs_init_notify_thread(void) > goto close_command_fd; > } > > + set_non_blocking(done_fd[0]); > ret = tgt_event_add(done_fd[0], EPOLLIN, bs_thread_request_done, NULL); > if (ret) { > eprintf("failed to add epoll event\n"); > diff --git a/usr/bs_thread.h b/usr/bs_thread.h > index d460032..14456a7 100644 > --- a/usr/bs_thread.h > +++ b/usr/bs_thread.h > @@ -29,3 +29,4 @@ extern int bs_thread_open(struct bs_thread_info *info, request_func_t *rfn, > extern void bs_thread_close(struct bs_thread_info *info); > extern int bs_thread_cmd_submit(struct scsi_cmd *cmd); > > +extern void bs_wait_one_completion(void); > diff --git a/usr/iscsi/conn.c b/usr/iscsi/conn.c > index 53e719e..3836655 100644 > --- a/usr/iscsi/conn.c > +++ b/usr/iscsi/conn.c > @@ -115,14 +115,14 @@ void conn_close(struct iscsi_connection *conn) > if (task->conn != conn) > continue; > > - eprintf("Forcing release of pending task %p %" PRIx64 "\n", > - task, task->tag); > + eprintf("Forcing release of pending task %p %" PRIx64 " %u\n", > + task, task->tag, conn->refcount); > list_del(&task->c_list); > iscsi_free_task(task); > } > > if (conn->tx_task) { > - dprintf("Add current tx task to the tx list for removal " > + eprintf("Add current tx task to the tx list for removal " > "%p %" PRIx64 "\n", > conn->tx_task, conn->tx_task->tag); > list_add(&conn->tx_task->c_list, &conn->tx_clist); > @@ -134,8 +134,8 @@ void conn_close(struct iscsi_connection *conn) > > op = task->req.opcode & ISCSI_OPCODE_MASK; > > - eprintf("Forcing release of tx task %p %" PRIx64 " %x\n", > - task, task->tag, op); > + eprintf("Forcing release of tx task %p %" PRIx64 " %x %u\n", > + task, task->tag, op, conn->refcount); > switch (op) { > case ISCSI_OP_SCSI_CMD: > /* > @@ -155,14 +155,14 @@ void conn_close(struct iscsi_connection *conn) > iscsi_free_task(task); > break; > default: > - eprintf("%x\n", op); > + eprintf("unknow op %x\n", op); > break; > } > } > > if (conn->rx_task) { > - eprintf("Forcing release of rx task %p %" PRIx64 "\n", > - conn->rx_task, conn->rx_task->tag); > + eprintf("Forcing release of rx task %p %" PRIx64 " %u\n", > + conn->rx_task, conn->rx_task->tag, conn->refcount); > iscsi_free_task(conn->rx_task); > } > conn->rx_task = NULL; > @@ -173,10 +173,25 @@ void conn_close(struct iscsi_connection *conn) > * This task is in SCSI. We need to wait for I/O > * completion. > */ > + eprintf("release task %p %x %" PRIx64 " flag %lx, %u\n", > + task, task->req.opcode & ISCSI_OPCODE_MASK, > + task->tag, task->flags, conn->refcount); > + > if (task_in_scsi(task)) > continue; > iscsi_free_task(task); > } > + > + eprintf("%p %u\n", conn, conn->refcount); > + > + while (conn->refcount != 1) { > + struct timeval t; > + t.tv_sec = 1; > + t.tv_usec = 0; > + eprintf("%p %u\n", conn, conn->refcount); > + bs_thread_request_done(0, 0, NULL); > + select(0, NULL, NULL, NULL, &t); > + } > done: > conn_put(conn); > } > diff --git a/usr/iscsi/iscsi_tcp.c b/usr/iscsi/iscsi_tcp.c > index 1988b2f..c61faa1 100644 > --- a/usr/iscsi/iscsi_tcp.c > +++ b/usr/iscsi/iscsi_tcp.c > @@ -164,7 +164,7 @@ static void iscsi_tcp_event_handler(int fd, int events, void *data) > iscsi_tx_handler(conn); > > if (conn->state == STATE_CLOSE) { > - dprintf("connection closed %p\n", conn); > + eprintf("connection closed %p\n", conn); > conn_close(conn); > } > } > diff --git a/usr/iscsi/iscsid.c b/usr/iscsi/iscsid.c > index 22a21cc..4cde178 100644 > --- a/usr/iscsi/iscsid.c > +++ b/usr/iscsi/iscsid.c > @@ -1209,6 +1209,12 @@ static int iscsi_scsi_cmd_done(uint64_t nid, int result, struct scsi_cmd *scmd) > struct iscsi_task *task = ITASK(scmd); > uint32_t read_len = scsi_get_in_length(scmd); > > + if (result == TASK_ABORTED) { > + list_del(&task->c_hlist); > + iscsi_free_task(task); > + return 0; > + } > + > /* > * Since the connection is closed we just free the task. > * We could delay the closing of the conn in some cases and send > @@ -1216,6 +1222,8 @@ static int iscsi_scsi_cmd_done(uint64_t nid, int result, struct scsi_cmd *scmd) > * task got reassinged to another connection. > */ > if (task->conn->state == STATE_CLOSE) { > + eprintf("finish a task on a closed conn, %p %u\n", > + task, task->conn->refcount); > iscsi_free_cmd_task(task); > return 0; > } > @@ -1376,6 +1384,13 @@ static int iscsi_tm_done(struct mgmt_req *mreq) > > task = (struct iscsi_task *) (unsigned long) mreq->mid; > > + if (task->conn->state == STATE_CLOSE) { > + eprintf("finish a tm task on a closed conn, %p %u\n", > + task, task->conn->refcount); > + iscsi_free_task(task); > + return 0; > + } > + > switch (mreq->result) { > case 0: > task->result = ISCSI_TMF_RSP_COMPLETE; > @@ -1405,6 +1420,8 @@ static int iscsi_tm_execute(struct iscsi_task *task) > struct iscsi_tm *req = (struct iscsi_tm *) &task->req; > int fn = 0, err = 0; > > + eprintf("%x\n", req->flags & ISCSI_FLAG_TM_FUNC_MASK); > + > switch (req->flags & ISCSI_FLAG_TM_FUNC_MASK) { > case ISCSI_TM_FUNC_ABORT_TASK: > fn = ABORT_TASK; > @@ -1412,15 +1429,11 @@ static int iscsi_tm_execute(struct iscsi_task *task) > case ISCSI_TM_FUNC_ABORT_TASK_SET: > fn = ABORT_TASK_SET; > break; > - case ISCSI_TM_FUNC_CLEAR_ACA: > - fn = CLEAR_TASK_SET; > - break; > - case ISCSI_TM_FUNC_CLEAR_TASK_SET: > - fn = CLEAR_ACA; > - break; > case ISCSI_TM_FUNC_LOGICAL_UNIT_RESET: > fn = LOGICAL_UNIT_RESET; > break; > + case ISCSI_TM_FUNC_CLEAR_ACA: > + case ISCSI_TM_FUNC_CLEAR_TASK_SET: > case ISCSI_TM_FUNC_TARGET_WARM_RESET: > case ISCSI_TM_FUNC_TARGET_COLD_RESET: > case ISCSI_TM_FUNC_TASK_REASSIGN: > diff --git a/usr/target.c b/usr/target.c > index 5f04cf9..5afce53 100644 > --- a/usr/target.c > +++ b/usr/target.c > @@ -1101,6 +1101,8 @@ void target_cmd_done(struct scsi_cmd *cmd) > struct mgmt_req *mreq; > > mreq = cmd->mreq; > + if (mreq) > + eprintf("%p %d\n", mreq, mreq->busy); > if (mreq && !--mreq->busy) { > mreq->result = mreq->function == ABORT_TASK ? -EEXIST : 0; > mreq->itn_id = cmd->cmd_itn_id; > @@ -1127,7 +1129,10 @@ static int abort_cmd(struct target* target, struct mgmt_req *mreq, > cmd->mreq = mreq; > err = -EBUSY; > } else { > - cmd->dev->cmd_done(target, cmd); > + eprintf("found %" PRIx64 " %lx\n", cmd->tag, cmd->state); > + cmd_hlist_remove(cmd); > + list_del(&cmd->qlist); > + > target_cmd_io_done(cmd, TASK_ABORTED); > } > return err; > @@ -1140,7 +1145,7 @@ static int abort_task_set(struct mgmt_req *mreq, struct target* target, > struct it_nexus *itn; > int i, err, count = 0; > > - eprintf("found %" PRIx64 " %d\n", tag, all); > + eprintf("aborting %" PRIx64 " %d, %p\n", tag, all, mreq); > > list_for_each_entry(itn, &target->it_nexus_list, nexus_siblings) { > for (i = 0; i < ARRAY_SIZE(itn->cmd_hash_list); i++) { > > -- To unsubscribe from this list: send the line "unsubscribe stgt" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html