Hi, I discovered the race condition when using asynch verify with libaio engine. The code assumes that because the td->cur_depth value is not 0 that there is still I/O pending and issues io_getevents when the I/O was actually being verified by the asynchronous verify thread. This causes the code to hang. I have attached a patch along to fix this issue. thanks -radha
diff --git a/io_u.c b/io_u.c index da9d950..9268b62 100644 --- a/io_u.c +++ b/io_u.c @@ -421,9 +421,10 @@ void put_io_u(struct thread_data *td, struct io_u *io_u) put_file_log(td, io_u->file); io_u->file = NULL; + if (io_u->in_cur_depth) + td->cur_depth--; flist_del_init(&io_u->list); flist_add(&io_u->list, &td->io_u_freelist); - td->cur_depth--; td_io_u_unlock(td); td_io_u_free_notify(td); } @@ -447,10 +448,10 @@ void requeue_io_u(struct thread_data *td, struct io_u **io_u) td->io_issues[__io_u->ddir]--; __io_u->flags &= ~IO_U_F_FLIGHT; - + if (__io_u->in_cur_depth) + td->cur_depth--; flist_del(&__io_u->list); flist_add_tail(&__io_u->list, &td->io_u_requeues); - td->cur_depth--; td_io_u_unlock(td); *io_u = NULL; } @@ -867,6 +868,7 @@ again: flist_del(&io_u->list); flist_add(&io_u->list, &td->io_u_busylist); td->cur_depth++; + io_u->in_cur_depth = 1; } td_io_u_unlock(td); diff --git a/ioengine.h b/ioengine.h index 3df0944..4f65dfb 100644 --- a/ioengine.h +++ b/ioengine.h @@ -72,6 +72,7 @@ struct io_u { * Callback for io completion */ int (*end_io)(struct thread_data *, struct io_u *); + int in_cur_depth; }; /* diff --git a/verify.c b/verify.c index faa5684..619bb78 100644 --- a/verify.c +++ b/verify.c @@ -437,6 +437,10 @@ int verify_io_u_async(struct thread_data *td, struct io_u *io_u) io_u->file = NULL; pthread_mutex_lock(&td->io_u_lock); + if (io_u->in_cur_depth) { + td->cur_depth--; + io_u->in_cur_depth = 0; + } flist_del(&io_u->list); flist_add_tail(&io_u->list, &td->verify_list); pthread_mutex_unlock(&td->io_u_lock);