On 2014-10-28 09:49, Ketor D wrote:
Cannot get the new commited code from github now. When I get the newest code, I will test.
So here's another idea, applies on top of current -git. Basically it makes rbd wait for the oldest event, not just the first one in the array of all ios. This is the saner thing to do, as hopefully the oldest event will be the one to complete first. At least it has a much higher chance of being the right thing to do, than just waiting on a random event.
Completely untested, so you might have to fiddle a bit with it to ensure that it actually works...
-- Jens Axboe
diff --git a/engines/rbd.c b/engines/rbd.c index cf7be0acd1e3..f3129044c430 100644 --- a/engines/rbd.c +++ b/engines/rbd.c @@ -20,6 +20,7 @@ struct rbd_data { rados_ioctx_t io_ctx; rbd_image_t image; struct io_u **aio_events; + struct io_u **sort_events; }; struct rbd_options { @@ -80,20 +81,19 @@ static int _fio_setup_rbd_data(struct thread_data *td, if (td->io_ops->data) return 0; - rbd_data = malloc(sizeof(struct rbd_data)); + rbd_data = calloc(1, sizeof(struct rbd_data)); if (!rbd_data) goto failed; - memset(rbd_data, 0, sizeof(struct rbd_data)); - - rbd_data->aio_events = malloc(td->o.iodepth * sizeof(struct io_u *)); + rbd_data->aio_events = calloc(td->o.iodepth, sizeof(struct io_u *)); if (!rbd_data->aio_events) goto failed; - memset(rbd_data->aio_events, 0, td->o.iodepth * sizeof(struct io_u *)); + rbd_data->sort_events = calloc(td->o.iodepth, sizeof(struct io_u *)); + if (!rbd_data->sort_events) + goto failed; *rbd_data_ptr = rbd_data; - return 0; failed: @@ -218,14 +218,32 @@ static inline int fri_check_complete(struct rbd_data *rbd_data, return 0; } +static int rbd_io_u_cmp(const void *p1, const void *p2) +{ + const struct io_u **a = (const struct io_u **) p1; + const struct io_u **b = (const struct io_u **) p2; + uint64_t at, bt; + + at = utime_since_now(&(*a)->start_time); + bt = utime_since_now(&(*b)->start_time); + + if (at < bt) + return -1; + else if (at == bt) + return 0; + else + return 1; +} + static int rbd_iter_events(struct thread_data *td, unsigned int *events, unsigned int min_evts, int wait) { struct rbd_data *rbd_data = td->io_ops->data; unsigned int this_events = 0; struct io_u *io_u; - int i; + int i, sort_idx; + sort_idx = 0; io_u_qiter(&td->io_u_all, io_u, i) { struct fio_rbd_iou *fri = io_u->engine_data; @@ -236,16 +254,39 @@ static int rbd_iter_events(struct thread_data *td, unsigned int *events, if (fri_check_complete(rbd_data, io_u, events)) this_events++; - else if (wait) { - rbd_aio_wait_for_complete(fri->completion); + else if (wait) + rbd_data->sort_events[sort_idx++] = io_u; - if (fri_check_complete(rbd_data, io_u, events)) - this_events++; - } if (*events >= min_evts) break; } + if (!wait || !sort_idx) + return this_events; + + qsort(rbd_data->sort_events, sort_idx, sizeof(struct io_u *), rbd_io_u_cmp); + for (i = 0; i < sort_idx; i++) { + struct fio_rbd_iou *fri; + + io_u = rbd_data->sort_events[i]; + fri = io_u->engine_data; + + if (fri_check_complete(rbd_data, io_u, events)) { + this_events++; + continue; + } + if (!wait) + continue; + + rbd_aio_wait_for_complete(fri->completion); + + if (fri_check_complete(rbd_data, io_u, events)) + this_events++; + + if (wait && *events >= min_evts) + wait = 0; + } + return this_events; } @@ -359,6 +400,7 @@ static void fio_rbd_cleanup(struct thread_data *td) if (rbd_data) { _fio_rbd_disconnect(rbd_data); free(rbd_data->aio_events); + free(rbd_data->sort_events); free(rbd_data); }