Both the receiving thread and the bs worker threads access the hash of lists at it_nexus->cmd_hash_list. It may be the case that this is causing list corruption. Lock each individual list in the hash with a mutex. Using a recursive mutex, since abort_task_set both needs to hold the mutex, and __cmd_done (called from abort_cmd) also is calling cmd_hlist_remove, which also takes the mutex. Signed-off-by: Andy Grover <agrover@xxxxxxxxxx> --- Hi Kiefer, does this make a difference? Thanks -- Andy usr/scsi_cmnd.h | 3 +++ usr/target.c | 46 ++++++++++++++++++++++++++++++++++++++-------- usr/target.h | 7 ++++++- 3 files changed, 47 insertions(+), 9 deletions(-) diff --git a/usr/scsi_cmnd.h b/usr/scsi_cmnd.h index c764c4e..cd9272e 100644 --- a/usr/scsi_cmnd.h +++ b/usr/scsi_cmnd.h @@ -49,6 +49,9 @@ struct scsi_cmd { struct it_nexus *it_nexus; struct it_nexus_lu_info *itn_lu_info; + + /* backptr to what cmd_hash_entry we're on so we can lock properly */ + struct cmd_hash_entry *cur_hash_entry; }; #define scsi_cmnd_accessor(field, type) \ diff --git a/usr/target.c b/usr/target.c index 42443d0..ac26a99 100644 --- a/usr/target.c +++ b/usr/target.c @@ -29,6 +29,7 @@ #include <unistd.h> #include <sys/socket.h> #include <sys/time.h> +#include <pthread.h> #include "list.h" #include "util.h" @@ -269,6 +270,7 @@ int it_nexus_create(int tid, uint64_t itn_id, int host_no, char *info) struct scsi_lu *lu; struct it_nexus_lu_info *itn_lu; struct timeval tv; + pthread_mutexattr_t mutex_attr; dprintf("%d %" PRIu64 " %d\n", tid, itn_id, host_no); /* for reserve/release code */ @@ -308,8 +310,15 @@ int it_nexus_create(int tid, uint64_t itn_id, int host_no, char *info) &itn->it_nexus_lu_info_list); } - for (i = 0; i < ARRAY_SIZE(itn->cmd_hash_list); i++) - INIT_LIST_HEAD(&itn->cmd_hash_list[i]); + pthread_mutexattr_init(&mutex_attr); + pthread_mutexattr_settype(&mutex_attr, PTHREAD_MUTEX_RECURSIVE); + + for (i = 0; i < ARRAY_SIZE(itn->cmd_hash_list); i++) { + INIT_LIST_HEAD(&itn->cmd_hash_list[i].head); + pthread_mutex_init(&itn->cmd_hash_list[i].lock, &mutex_attr); + } + + pthread_mutexattr_destroy(&mutex_attr); list_add_tail(&itn->nexus_siblings, &target->it_nexus_list); @@ -324,6 +333,7 @@ int it_nexus_destroy(int tid, uint64_t itn_id) int i; struct it_nexus *itn; struct scsi_lu *lu; + struct cmd_hash_entry *entry; dprintf("%d %" PRIu64 "\n", tid, itn_id); @@ -331,9 +341,18 @@ int it_nexus_destroy(int tid, uint64_t itn_id) if (!itn) return -ENOENT; - for (i = 0; i < ARRAY_SIZE(itn->cmd_hash_list); i++) - if (!list_empty(&itn->cmd_hash_list[i])) + for (i = 0; i < ARRAY_SIZE(itn->cmd_hash_list); i++) { + entry = &itn->cmd_hash_list[i]; + pthread_mutex_lock(&entry->lock); + if (!list_empty(&entry->head)) { + pthread_mutex_unlock(&entry->lock); return -EBUSY; + } + pthread_mutex_unlock(&entry->lock); + } + + for (i = 0; i < ARRAY_SIZE(itn->cmd_hash_list); i++) + pthread_mutex_destroy(&itn->cmd_hash_list[i].lock); list_for_each_entry(lu, &itn->nexus_target->device_list, device_siblings) device_release(tid, itn_id, lu->lun, 0); @@ -357,13 +376,22 @@ static struct scsi_lu *device_lookup(struct target *target, uint64_t lun) static void cmd_hlist_insert(struct it_nexus *itn, struct scsi_cmd *cmd) { - struct list_head *list = &itn->cmd_hash_list[hashfn(cmd->tag)]; - list_add(&cmd->c_hlist, list); + struct cmd_hash_entry *entry = &itn->cmd_hash_list[hashfn(cmd->tag)]; + + pthread_mutex_lock(&entry->lock); + cmd->cur_hash_entry = entry; + list_add(&cmd->c_hlist, &entry->head); + pthread_mutex_unlock(&entry->lock); } static void cmd_hlist_remove(struct scsi_cmd *cmd) { + struct cmd_hash_entry *entry = cmd->cur_hash_entry; + + pthread_mutex_lock(&entry->lock); + cmd->cur_hash_entry = NULL; list_del(&cmd->c_hlist); + pthread_mutex_unlock(&entry->lock); } static void tgt_cmd_queue_init(struct tgt_cmd_queue *q) @@ -1116,8 +1144,9 @@ static int abort_task_set(struct mgmt_req *mreq, struct target* target, list_for_each_entry(itn, &target->it_nexus_list, nexus_siblings) { for (i = 0; i < ARRAY_SIZE(itn->cmd_hash_list); i++) { - struct list_head *list = &itn->cmd_hash_list[i]; - list_for_each_entry_safe(cmd, tmp, list, c_hlist) { + struct cmd_hash_entry *entry = &itn->cmd_hash_list[i]; + pthread_mutex_lock(&entry->lock); + list_for_each_entry_safe(cmd, tmp, &entry->head, c_hlist) { if ((all && itn->itn_id == itn_id) || (cmd->tag == tag && itn->itn_id == itn_id) || (lun && !memcmp(cmd->lun, lun, sizeof(cmd->lun)))) { @@ -1127,6 +1156,7 @@ static int abort_task_set(struct mgmt_req *mreq, struct target* target, count++; } } + pthread_mutex_unlock(&entry->lock); } } return count; diff --git a/usr/target.h b/usr/target.h index f1e51f3..23e149d 100644 --- a/usr/target.h +++ b/usr/target.h @@ -48,11 +48,16 @@ struct target { struct tgt_account account; }; +struct cmd_hash_entry { + struct list_head head; + pthread_mutex_t lock; +}; + struct it_nexus { uint64_t itn_id; long ctime; - struct list_head cmd_hash_list[1 << HASH_ORDER]; + struct cmd_hash_entry cmd_hash_list[1 << HASH_ORDER]; struct target *nexus_target; -- 1.7.1 -- To unsubscribe from this list: send the line "unsubscribe stgt" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html