From: Niklas Cassel <niklas.cassel@xxxxxxx> Convert the stat code to report clat stats on a per priority granularity, rather than simply supporting high/low priority. This is made possible by using the new clat_prio_stat array (per ddir), together with the clat_prio_stat index which is saved in each io_u. The per priority samples are only printed when there are samples for more than one priority in the clat_prio_stat array. If there are only samples for one priority, that means that all I/Os where submitted using the same priority, so no need to print. For example, running the following fio command: fio --name=test --filename=/dev/sdc --direct=1 --runtime=60 --rw=randread \ --ioengine=io_uring --ioscheduler=mq-deadline --iodepth=32 --bs=32k \ --prioclass=2 --prio=7 --cmdprio_bssplit=32k/20/3/0:32k/10/1/4 Now results in the following output: test: (groupid=0, jobs=1): err= 0: pid=465655: Tue Feb 1 02:24:47 2022 read: IOPS=146, BW=4695KiB/s (4808kB/s)(276MiB/60239msec) slat (usec): min=18, max=335, avg=62.87, stdev=22.59 clat (msec): min=2, max=2135, avg=217.97, stdev=287.26 lat (msec): min=2, max=2135, avg=218.03, stdev=287.26 clat prio 2/7 (msec): min=3, max=606, avg=106.57, stdev=86.64 clat prio 3/0 (msec): min=10, max=2135, avg=664.94, stdev=339.42 clat prio 1/4 (msec): min=2, max=300, avg=52.29, stdev=42.52 clat percentiles (msec): | 1.00th=[ 8], 5.00th=[ 14], 10.00th=[ 19], 20.00th=[ 33], | 30.00th=[ 52], 40.00th=[ 77], 50.00th=[ 108], 60.00th=[ 144], | 70.00th=[ 192], 80.00th=[ 300], 90.00th=[ 684], 95.00th=[ 911], | 99.00th=[ 1234], 99.50th=[ 1318], 99.90th=[ 1687], 99.95th=[ 1770], | 99.99th=[ 2140] clat prio 2/7 (69.25% of IOs) percentiles (msec): | 1.00th=[ 7], 5.00th=[ 13], 10.00th=[ 17], 20.00th=[ 28], | 30.00th=[ 44], 40.00th=[ 64], 50.00th=[ 85], 60.00th=[ 111], | 70.00th=[ 140], 80.00th=[ 174], 90.00th=[ 226], 95.00th=[ 279], | 99.00th=[ 368], 99.50th=[ 418], 99.90th=[ 502], 99.95th=[ 567], | 99.99th=[ 609] clat prio 3/0 (20.91% of IOs) percentiles (msec): | 1.00th=[ 44], 5.00th=[ 138], 10.00th=[ 205], 20.00th=[ 347], | 30.00th=[ 464], 40.00th=[ 558], 50.00th=[ 659], 60.00th=[ 760], | 70.00th=[ 860], 80.00th=[ 961], 90.00th=[ 1099], 95.00th=[ 1217], | 99.00th=[ 1485], 99.50th=[ 1687], 99.90th=[ 1871], 99.95th=[ 2140], | 99.99th=[ 2140] clat prio 1/4 (9.84% of IOs) percentiles (msec): | 1.00th=[ 7], 5.00th=[ 10], 10.00th=[ 13], 20.00th=[ 18], | 30.00th=[ 24], 40.00th=[ 30], 50.00th=[ 39], 60.00th=[ 51], | 70.00th=[ 63], 80.00th=[ 84], 90.00th=[ 114], 95.00th=[ 136], | 99.00th=[ 188], 99.50th=[ 197], 99.90th=[ 300], 99.95th=[ 300], | 99.99th=[ 300] bw ( KiB/s): min= 3456, max= 5888, per=100.00%, avg=4697.60, stdev=472.38, samples=120 iops : min= 108, max= 184, avg=146.80, stdev=14.76, samples=120 lat (msec) : 4=0.11%, 10=2.57%, 20=8.67%, 50=18.21%, 100=18.34% lat (msec) : 250=28.87%, 500=9.41%, 750=5.22%, 1000=5.09%, 2000=3.50% lat (msec) : >=2000=0.01% cpu : usr=0.16%, sys=0.97%, ctx=17715, majf=0, minf=262 IO depths : 1=0.1%, 2=0.1%, 4=0.1%, 8=0.1%, 16=0.2%, 32=99.6%, >=64=0.0% submit : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.0%, 64=0.0%, >=64=0.0% complete : 0=0.0%, 4=100.0%, 8=0.0%, 16=0.0%, 32=0.1%, 64=0.0%, >=64=0.0% issued rwts: total=8839,0,0,0 short=0,0,0,0 dropped=0,0,0,0 latency : target=0, window=0, percentile=100.00%, depth=32 Signed-off-by: Niklas Cassel <niklas.cassel@xxxxxxx> --- client.c | 31 +++- engines/filecreate.c | 2 +- engines/filedelete.c | 2 +- engines/filestat.c | 2 +- io_u.c | 6 +- io_u.h | 2 - server.c | 40 ++++- server.h | 2 +- stat.c | 351 +++++++++++++++++++++++++++++++++---------- stat.h | 4 +- 10 files changed, 338 insertions(+), 104 deletions(-) diff --git a/client.c b/client.c index e5f6cfa7..61c6b930 100644 --- a/client.c +++ b/client.c @@ -1037,14 +1037,6 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src) dst->nr_block_infos = le64_to_cpu(src->nr_block_infos); for (i = 0; i < dst->nr_block_infos; i++) dst->block_infos[i] = le32_to_cpu(src->block_infos[i]); - for (i = 0; i < DDIR_RWDIR_CNT; i++) { - for (j = 0; j < FIO_IO_U_PLAT_NR; j++) { - dst->io_u_plat_high_prio[i][j] = le64_to_cpu(src->io_u_plat_high_prio[i][j]); - dst->io_u_plat_low_prio[i][j] = le64_to_cpu(src->io_u_plat_low_prio[i][j]); - } - convert_io_stat(&dst->clat_high_prio_stat[i], &src->clat_high_prio_stat[i]); - convert_io_stat(&dst->clat_low_prio_stat[i], &src->clat_low_prio_stat[i]); - } dst->ss_dur = le64_to_cpu(src->ss_dur); dst->ss_state = le32_to_cpu(src->ss_state); @@ -1054,6 +1046,19 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src) dst->ss_deviation.u.f = fio_uint64_to_double(le64_to_cpu(src->ss_deviation.u.i)); dst->ss_criterion.u.f = fio_uint64_to_double(le64_to_cpu(src->ss_criterion.u.i)); + for (i = 0; i < DDIR_RWDIR_CNT; i++) { + dst->nr_clat_prio[i] = le32_to_cpu(src->nr_clat_prio[i]); + for (j = 0; j < dst->nr_clat_prio[i]; j++) { + for (k = 0; k < FIO_IO_U_PLAT_NR; k++) + dst->clat_prio[i][j].io_u_plat[k] = + le64_to_cpu(src->clat_prio[i][j].io_u_plat[k]); + convert_io_stat(&dst->clat_prio[i][j].clat_stat, + &src->clat_prio[i][j].clat_stat); + dst->clat_prio[i][j].ioprio = + le32_to_cpu(dst->clat_prio[i][j].ioprio); + } + } + if (dst->ss_state & FIO_SS_DATA) { for (i = 0; i < dst->ss_dur; i++ ) { dst->ss_iops_data[i] = le64_to_cpu(src->ss_iops_data[i]); @@ -1796,6 +1801,15 @@ int fio_handle_client(struct fio_client *client) case FIO_NET_CMD_TS: { struct cmd_ts_pdu *p = (struct cmd_ts_pdu *) cmd->payload; uint64_t offset; + int i; + + for (i = 0; i < DDIR_RWDIR_CNT; i++) { + if (le32_to_cpu(p->ts.nr_clat_prio[i])) { + offset = le64_to_cpu(p->ts.clat_prio_offset[i]); + p->ts.clat_prio[i] = + (struct clat_prio_stat *)((char *)p + offset); + } + } dprint(FD_NET, "client: ts->ss_state = %u\n", (unsigned int) le32_to_cpu(p->ts.ss_state)); if (le32_to_cpu(p->ts.ss_state) & FIO_SS_DATA) { @@ -2156,6 +2170,7 @@ int fio_handle_clients(struct client_ops *ops) fio_client_json_fini(); + free_clat_prio_stats(&client_ts); free(pfds); return retval || error_clients; } diff --git a/engines/filecreate.c b/engines/filecreate.c index 4bb13c34..7884752d 100644 --- a/engines/filecreate.c +++ b/engines/filecreate.c @@ -49,7 +49,7 @@ static int open_file(struct thread_data *td, struct fio_file *f) uint64_t nsec; nsec = ntime_since_now(&start); - add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false); + add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0); } return 0; diff --git a/engines/filedelete.c b/engines/filedelete.c index e882ccf0..df388ac9 100644 --- a/engines/filedelete.c +++ b/engines/filedelete.c @@ -51,7 +51,7 @@ static int delete_file(struct thread_data *td, struct fio_file *f) uint64_t nsec; nsec = ntime_since_now(&start); - add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false); + add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0); } return 0; diff --git a/engines/filestat.c b/engines/filestat.c index 00311247..e587eb54 100644 --- a/engines/filestat.c +++ b/engines/filestat.c @@ -125,7 +125,7 @@ static int stat_file(struct thread_data *td, struct fio_file *f) uint64_t nsec; nsec = ntime_since_now(&start); - add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, false); + add_clat_sample(td, data->stat_ddir, nsec, 0, 0, 0, 0); } return 0; diff --git a/io_u.c b/io_u.c index 656b4610..059637e5 100644 --- a/io_u.c +++ b/io_u.c @@ -1595,7 +1595,7 @@ again: assert(io_u->flags & IO_U_F_FREE); io_u_clear(td, io_u, IO_U_F_FREE | IO_U_F_NO_FILE_PUT | IO_U_F_TRIMMED | IO_U_F_BARRIER | - IO_U_F_VER_LIST | IO_U_F_HIGH_PRIO); + IO_U_F_VER_LIST); io_u->error = 0; io_u->acct_ddir = -1; @@ -1890,7 +1890,7 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u, tnsec = ntime_since(&io_u->start_time, &icd->time); add_lat_sample(td, idx, tnsec, bytes, io_u->offset, - io_u->ioprio, io_u_is_high_prio(io_u)); + io_u->ioprio, io_u->clat_prio_index); if (td->flags & TD_F_PROFILE_OPS) { struct prof_io_ops *ops = &td->prof_io_ops; @@ -1912,7 +1912,7 @@ static void account_io_completion(struct thread_data *td, struct io_u *io_u, if (ddir_rw(idx)) { if (!td->o.disable_clat) { add_clat_sample(td, idx, llnsec, bytes, io_u->offset, - io_u->ioprio, io_u_is_high_prio(io_u)); + io_u->ioprio, io_u->clat_prio_index); io_u_mark_latency(td, llnsec); } diff --git a/io_u.h b/io_u.h index d88d5f2c..206e24fe 100644 --- a/io_u.h +++ b/io_u.h @@ -21,7 +21,6 @@ enum { IO_U_F_TRIMMED = 1 << 5, IO_U_F_BARRIER = 1 << 6, IO_U_F_VER_LIST = 1 << 7, - IO_U_F_HIGH_PRIO = 1 << 8, }; /* @@ -194,6 +193,5 @@ static inline enum fio_ddir acct_ddir(struct io_u *io_u) td_flags_clear((td), &(io_u->flags), (val)) #define io_u_set(td, io_u, val) \ td_flags_set((td), &(io_u)->flags, (val)) -#define io_u_is_high_prio(io_u) (io_u->flags & IO_U_F_HIGH_PRIO) #endif diff --git a/server.c b/server.c index 35f3ba34..098ddf93 100644 --- a/server.c +++ b/server.c @@ -1465,6 +1465,7 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs) { struct cmd_ts_pdu p; int i, j, k; + size_t clat_prio_stats_extra_size = 0; size_t ss_extra_size = 0; size_t extended_buf_size = 0; void *extended_buf; @@ -1581,16 +1582,13 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs) p.ts.cachehit = cpu_to_le64(ts->cachehit); p.ts.cachemiss = cpu_to_le64(ts->cachemiss); + convert_gs(&p.rs, rs); + for (i = 0; i < DDIR_RWDIR_CNT; i++) { - for (j = 0; j < FIO_IO_U_PLAT_NR; j++) { - p.ts.io_u_plat_high_prio[i][j] = cpu_to_le64(ts->io_u_plat_high_prio[i][j]); - p.ts.io_u_plat_low_prio[i][j] = cpu_to_le64(ts->io_u_plat_low_prio[i][j]); - } - convert_io_stat(&p.ts.clat_high_prio_stat[i], &ts->clat_high_prio_stat[i]); - convert_io_stat(&p.ts.clat_low_prio_stat[i], &ts->clat_low_prio_stat[i]); + if (ts->nr_clat_prio[i]) + clat_prio_stats_extra_size += ts->nr_clat_prio[i] * sizeof(*ts->clat_prio[i]); } - - convert_gs(&p.rs, rs); + extended_buf_size += clat_prio_stats_extra_size; dprint(FD_NET, "ts->ss_state = %d\n", ts->ss_state); if (ts->ss_state & FIO_SS_DATA) @@ -1612,6 +1610,32 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs) memcpy(extended_buf, &p, sizeof(p)); extended_buf_wp = (struct cmd_ts_pdu *)extended_buf + 1; + if (clat_prio_stats_extra_size) { + for (i = 0; i < DDIR_RWDIR_CNT; i++) { + struct clat_prio_stat *prio = (struct clat_prio_stat *) extended_buf_wp; + + for (j = 0; j < ts->nr_clat_prio[i]; j++) { + for (k = 0; k < FIO_IO_U_PLAT_NR; k++) + prio->io_u_plat[k] = + cpu_to_le64(ts->clat_prio[i][j].io_u_plat[k]); + convert_io_stat(&prio->clat_stat, + &ts->clat_prio[i][j].clat_stat); + prio->ioprio = cpu_to_le32(ts->clat_prio[i][j].ioprio); + prio++; + } + + if (ts->nr_clat_prio[i]) { + uint64_t offset = (char *)extended_buf_wp - (char *)extended_buf; + struct cmd_ts_pdu *ptr = extended_buf; + + ptr->ts.clat_prio_offset[i] = cpu_to_le64(offset); + ptr->ts.nr_clat_prio[i] = cpu_to_le32(ts->nr_clat_prio[i]); + } + + extended_buf_wp = prio; + } + } + if (ss_extra_size) { uint64_t *ss_iops, *ss_bw; uint64_t offset; diff --git a/server.h b/server.h index 25b6bbdc..27091f69 100644 --- a/server.h +++ b/server.h @@ -48,7 +48,7 @@ struct fio_net_cmd_reply { }; enum { - FIO_SERVER_VER = 95, + FIO_SERVER_VER = 96, FIO_SERVER_MAX_FRAGMENT_PDU = 1024, FIO_SERVER_MAX_CMD_MB = 2048, diff --git a/stat.c b/stat.c index f783aed8..a6810d9b 100644 --- a/stat.c +++ b/stat.c @@ -265,6 +265,18 @@ static void show_clat_percentiles(uint64_t *io_u_plat, unsigned long long nr, free(ovals); } +static int get_nr_prios_with_samples(struct thread_stat *ts, enum fio_ddir ddir) +{ + int i, nr_prios_with_samples = 0; + + for (i = 0; i < ts->nr_clat_prio[ddir]; i++) { + if (ts->clat_prio[ddir][i].clat_stat.samples) + nr_prios_with_samples++; + } + + return nr_prios_with_samples; +} + bool calc_lat(struct io_stat *is, unsigned long long *min, unsigned long long *max, double *mean, double *dev) { @@ -511,7 +523,8 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts, unsigned long long min, max, bw, iops; double mean, dev; char *io_p, *bw_p, *bw_p_alt, *iops_p, *post_st = NULL; - int i2p; + int i2p, i; + const char *clat_type = ts->lat_percentiles ? "lat" : "clat"; if (ddir_sync(ddir)) { if (calc_lat(&ts->sync_stat, &min, &max, &mean, &dev)) { @@ -572,12 +585,22 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts, display_lat("clat", min, max, mean, dev, out); if (calc_lat(&ts->lat_stat[ddir], &min, &max, &mean, &dev)) display_lat(" lat", min, max, mean, dev, out); - if (calc_lat(&ts->clat_high_prio_stat[ddir], &min, &max, &mean, &dev)) { - display_lat(ts->lat_percentiles ? "high prio_lat" : "high prio_clat", - min, max, mean, dev, out); - if (calc_lat(&ts->clat_low_prio_stat[ddir], &min, &max, &mean, &dev)) - display_lat(ts->lat_percentiles ? "low prio_lat" : "low prio_clat", - min, max, mean, dev, out); + + /* Only print per prio stats if there are >= 2 prios with samples */ + if (get_nr_prios_with_samples(ts, ddir) >= 2) { + for (i = 0; i < ts->nr_clat_prio[ddir]; i++) { + if (calc_lat(&ts->clat_prio[ddir][i].clat_stat, &min, + &max, &mean, &dev)) { + char buf[64]; + + snprintf(buf, sizeof(buf), + "%s prio %u/%u", + clat_type, + ts->clat_prio[ddir][i].ioprio >> 13, + ts->clat_prio[ddir][i].ioprio & 7); + display_lat(buf, min, max, mean, dev, out); + } + } } if (ts->slat_percentiles && ts->slat_stat[ddir].samples > 0) @@ -597,8 +620,7 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts, ts->percentile_precision, "lat", out); if (ts->clat_percentiles || ts->lat_percentiles) { - const char *name = ts->lat_percentiles ? "lat" : "clat"; - char prio_name[32]; + char prio_name[64]; uint64_t samples; if (ts->lat_percentiles) @@ -606,25 +628,24 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts, else samples = ts->clat_stat[ddir].samples; - /* Only print this if some high and low priority stats were collected */ - if (ts->clat_high_prio_stat[ddir].samples > 0 && - ts->clat_low_prio_stat[ddir].samples > 0) - { - sprintf(prio_name, "high prio (%.2f%%) %s", - 100. * (double) ts->clat_high_prio_stat[ddir].samples / (double) samples, - name); - show_clat_percentiles(ts->io_u_plat_high_prio[ddir], - ts->clat_high_prio_stat[ddir].samples, - ts->percentile_list, - ts->percentile_precision, prio_name, out); - - sprintf(prio_name, "low prio (%.2f%%) %s", - 100. * (double) ts->clat_low_prio_stat[ddir].samples / (double) samples, - name); - show_clat_percentiles(ts->io_u_plat_low_prio[ddir], - ts->clat_low_prio_stat[ddir].samples, - ts->percentile_list, - ts->percentile_precision, prio_name, out); + /* Only print per prio stats if there are >= 2 prios with samples */ + if (get_nr_prios_with_samples(ts, ddir) >= 2) { + for (i = 0; i < ts->nr_clat_prio[ddir]; i++) { + uint64_t prio_samples = ts->clat_prio[ddir][i].clat_stat.samples; + + if (prio_samples > 0) { + snprintf(prio_name, sizeof(prio_name), + "%s prio %u/%u (%.2f%% of IOs)", + clat_type, + ts->clat_prio[ddir][i].ioprio >> 13, + ts->clat_prio[ddir][i].ioprio & 7, + 100. * (double) prio_samples / (double) samples); + show_clat_percentiles(ts->clat_prio[ddir][i].io_u_plat, + prio_samples, ts->percentile_list, + ts->percentile_precision, + prio_name, out); + } + } } } @@ -679,6 +700,7 @@ static void show_mixed_ddir_status(struct group_run_stats *rs, if (ts_lcl) show_ddir_status(rs, ts_lcl, DDIR_READ, out); + free_clat_prio_stats(ts_lcl); free(ts_lcl); } @@ -1353,6 +1375,7 @@ static void show_mixed_ddir_status_terse(struct thread_stat *ts, if (ts_lcl) show_ddir_status_terse(ts_lcl, rs, DDIR_READ, ver, out); + free_clat_prio_stats(ts_lcl); free(ts_lcl); } @@ -1537,6 +1560,7 @@ static void add_mixed_ddir_status_json(struct thread_stat *ts, if (ts_lcl) add_ddir_status_json(ts_lcl, rs, DDIR_READ, parent); + free_clat_prio_stats(ts_lcl); free(ts_lcl); } @@ -2038,6 +2062,175 @@ int alloc_clat_prio_stat_ddir(struct thread_stat *ts, enum fio_ddir ddir, return 0; } +static int grow_clat_prio_stat(struct thread_stat *dst, enum fio_ddir ddir) +{ + int curr_len = dst->nr_clat_prio[ddir]; + void *new_arr; + + new_arr = scalloc(curr_len + 1, sizeof(*dst->clat_prio[ddir])); + if (!new_arr) { + log_err("fio: failed to grow clat prio array\n"); + return 1; + } + + memcpy(new_arr, dst->clat_prio[ddir], + curr_len * sizeof(*dst->clat_prio[ddir])); + sfree(dst->clat_prio[ddir]); + + dst->clat_prio[ddir] = new_arr; + dst->clat_prio[ddir][curr_len].clat_stat.min_val = ULONG_MAX; + dst->nr_clat_prio[ddir]++; + + return 0; +} + +static int find_clat_prio_index(struct thread_stat *dst, enum fio_ddir ddir, + uint32_t ioprio) +{ + int i, nr_prios = dst->nr_clat_prio[ddir]; + + for (i = 0; i < nr_prios; i++) { + if (dst->clat_prio[ddir][i].ioprio == ioprio) + return i; + } + + return -1; +} + +static int alloc_or_get_clat_prio_index(struct thread_stat *dst, + enum fio_ddir ddir, uint32_t ioprio, + int *idx) +{ + int index = find_clat_prio_index(dst, ddir, ioprio); + + if (index == -1) { + index = dst->nr_clat_prio[ddir]; + + if (grow_clat_prio_stat(dst, ddir)) + return 1; + + dst->clat_prio[ddir][index].ioprio = ioprio; + } + + *idx = index; + + return 0; +} + +static int clat_prio_stats_copy(struct thread_stat *dst, struct thread_stat *src, + enum fio_ddir dst_ddir, enum fio_ddir src_ddir) +{ + size_t sz = sizeof(*src->clat_prio[src_ddir]) * + src->nr_clat_prio[src_ddir]; + + dst->clat_prio[dst_ddir] = smalloc(sz); + if (!dst->clat_prio[dst_ddir]) { + log_err("fio: failed to alloc clat prio array\n"); + return 1; + } + + memcpy(dst->clat_prio[dst_ddir], src->clat_prio[src_ddir], sz); + dst->nr_clat_prio[dst_ddir] = src->nr_clat_prio[src_ddir]; + + return 0; +} + +static int clat_prio_stat_add_samples(struct thread_stat *dst, + enum fio_ddir dst_ddir, uint32_t ioprio, + struct io_stat *io_stat, + uint64_t *io_u_plat) +{ + int i, dst_index; + + if (!io_stat->samples) + return 0; + + if (alloc_or_get_clat_prio_index(dst, dst_ddir, ioprio, &dst_index)) + return 1; + + sum_stat(&dst->clat_prio[dst_ddir][dst_index].clat_stat, io_stat, + false); + + for (i = 0; i < FIO_IO_U_PLAT_NR; i++) + dst->clat_prio[dst_ddir][dst_index].io_u_plat[i] += io_u_plat[i]; + + return 0; +} + +static int sum_clat_prio_stats_src_single_prio(struct thread_stat *dst, + struct thread_stat *src, + enum fio_ddir dst_ddir, + enum fio_ddir src_ddir) +{ + struct io_stat *io_stat; + uint64_t *io_u_plat; + + /* + * If src ts has no clat_prio_stat array, then all I/Os were submitted + * using src->ioprio. Thus, the global samples in src->clat_stat (or + * src->lat_stat) can be used as the 'per prio' samples for src->ioprio. + */ + assert(!src->clat_prio[src_ddir]); + assert(src->nr_clat_prio[src_ddir] == 0); + + if (src->lat_percentiles) { + io_u_plat = src->io_u_plat[FIO_LAT][src_ddir]; + io_stat = &src->lat_stat[src_ddir]; + } else { + io_u_plat = src->io_u_plat[FIO_CLAT][src_ddir]; + io_stat = &src->clat_stat[src_ddir]; + } + + return clat_prio_stat_add_samples(dst, dst_ddir, src->ioprio, io_stat, + io_u_plat); +} + +static int sum_clat_prio_stats_src_multi_prio(struct thread_stat *dst, + struct thread_stat *src, + enum fio_ddir dst_ddir, + enum fio_ddir src_ddir) +{ + int i; + + /* + * If src ts has a clat_prio_stat array, then there are multiple prios + * in use (i.e. src ts had cmdprio_percentage or cmdprio_bssplit set). + * The samples for the default prio will exist in the src->clat_prio + * array, just like the samples for any other prio. + */ + assert(src->clat_prio[src_ddir]); + assert(src->nr_clat_prio[src_ddir]); + + /* If the dst ts doesn't yet have a clat_prio array, simply memcpy. */ + if (!dst->clat_prio[dst_ddir]) + return clat_prio_stats_copy(dst, src, dst_ddir, src_ddir); + + /* The dst ts already has a clat_prio_array, add src stats into it. */ + for (i = 0; i < src->nr_clat_prio[src_ddir]; i++) { + struct io_stat *io_stat = &src->clat_prio[src_ddir][i].clat_stat; + uint64_t *io_u_plat = src->clat_prio[src_ddir][i].io_u_plat; + uint32_t ioprio = src->clat_prio[src_ddir][i].ioprio; + + if (clat_prio_stat_add_samples(dst, dst_ddir, ioprio, io_stat, io_u_plat)) + return 1; + } + + return 0; +} + +static int sum_clat_prio_stats(struct thread_stat *dst, struct thread_stat *src, + enum fio_ddir dst_ddir, enum fio_ddir src_ddir) +{ + if (dst->disable_prio_stat) + return 0; + + if (!src->clat_prio[src_ddir]) + return sum_clat_prio_stats_src_single_prio(dst, src, dst_ddir, + src_ddir); + + return sum_clat_prio_stats_src_multi_prio(dst, src, dst_ddir, src_ddir); +} + void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src) { int k, l, m; @@ -2045,12 +2238,11 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src) for (l = 0; l < DDIR_RWDIR_CNT; l++) { if (dst->unified_rw_rep != UNIFIED_MIXED) { sum_stat(&dst->clat_stat[l], &src->clat_stat[l], false); - sum_stat(&dst->clat_high_prio_stat[l], &src->clat_high_prio_stat[l], false); - sum_stat(&dst->clat_low_prio_stat[l], &src->clat_low_prio_stat[l], false); sum_stat(&dst->slat_stat[l], &src->slat_stat[l], false); sum_stat(&dst->lat_stat[l], &src->lat_stat[l], false); sum_stat(&dst->bw_stat[l], &src->bw_stat[l], true); sum_stat(&dst->iops_stat[l], &src->iops_stat[l], true); + sum_clat_prio_stats(dst, src, l, l); dst->io_bytes[l] += src->io_bytes[l]; @@ -2058,12 +2250,11 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src) dst->runtime[l] = src->runtime[l]; } else { sum_stat(&dst->clat_stat[0], &src->clat_stat[l], false); - sum_stat(&dst->clat_high_prio_stat[0], &src->clat_high_prio_stat[l], false); - sum_stat(&dst->clat_low_prio_stat[0], &src->clat_low_prio_stat[l], false); sum_stat(&dst->slat_stat[0], &src->slat_stat[l], false); sum_stat(&dst->lat_stat[0], &src->lat_stat[l], false); sum_stat(&dst->bw_stat[0], &src->bw_stat[l], true); sum_stat(&dst->iops_stat[0], &src->iops_stat[l], true); + sum_clat_prio_stats(dst, src, 0, l); dst->io_bytes[0] += src->io_bytes[l]; @@ -2117,19 +2308,6 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src) for (k = 0; k < FIO_IO_U_PLAT_NR; k++) dst->io_u_sync_plat[k] += src->io_u_sync_plat[k]; - for (k = 0; k < DDIR_RWDIR_CNT; k++) { - for (m = 0; m < FIO_IO_U_PLAT_NR; m++) { - if (dst->unified_rw_rep != UNIFIED_MIXED) { - dst->io_u_plat_high_prio[k][m] += src->io_u_plat_high_prio[k][m]; - dst->io_u_plat_low_prio[k][m] += src->io_u_plat_low_prio[k][m]; - } else { - dst->io_u_plat_high_prio[0][m] += src->io_u_plat_high_prio[k][m]; - dst->io_u_plat_low_prio[0][m] += src->io_u_plat_low_prio[k][m]; - } - - } - } - dst->total_run_time += src->total_run_time; dst->total_submit += src->total_submit; dst->total_complete += src->total_complete; @@ -2157,8 +2335,6 @@ void init_thread_stat_min_vals(struct thread_stat *ts) ts->lat_stat[i].min_val = ULONG_MAX; ts->bw_stat[i].min_val = ULONG_MAX; ts->iops_stat[i].min_val = ULONG_MAX; - ts->clat_high_prio_stat[i].min_val = ULONG_MAX; - ts->clat_low_prio_stat[i].min_val = ULONG_MAX; } ts->sync_stat.min_val = ULONG_MAX; } @@ -2517,6 +2693,12 @@ void __show_run_stats(void) log_info_flush(); free(runstats); + + /* free arrays allocated by sum_thread_stats(), if any */ + for (i = 0; i < nr_ts; i++) { + ts = &threadstats[i]; + free_clat_prio_stats(ts); + } free(threadstats); free(opt_lists); } @@ -2643,6 +2825,14 @@ static inline void add_stat_sample(struct io_stat *is, unsigned long long data) is->samples++; } +static inline void add_stat_prio_sample(struct clat_prio_stat *clat_prio, + unsigned short clat_prio_index, + unsigned long long nsec) +{ + if (clat_prio) + add_stat_sample(&clat_prio[clat_prio_index].clat_stat, nsec); +} + /* * Return a struct io_logs, which is added to the tail of the log * list for 'iolog'. @@ -2848,14 +3038,28 @@ static inline void reset_io_u_plat(uint64_t *io_u_plat) io_u_plat[i] = 0; } +static inline void reset_clat_prio_stats(struct thread_stat *ts) +{ + enum fio_ddir ddir; + int i; + + for (ddir = 0; ddir < DDIR_RWDIR_CNT; ddir++) { + if (!ts->clat_prio[ddir]) + continue; + + for (i = 0; i < ts->nr_clat_prio[ddir]; i++) { + reset_io_stat(&ts->clat_prio[ddir][i].clat_stat); + reset_io_u_plat(ts->clat_prio[ddir][i].io_u_plat); + } + } +} + void reset_io_stats(struct thread_data *td) { struct thread_stat *ts = &td->ts; int i, j; for (i = 0; i < DDIR_RWDIR_CNT; i++) { - reset_io_stat(&ts->clat_high_prio_stat[i]); - reset_io_stat(&ts->clat_low_prio_stat[i]); reset_io_stat(&ts->clat_stat[i]); reset_io_stat(&ts->slat_stat[i]); reset_io_stat(&ts->lat_stat[i]); @@ -2867,15 +3071,14 @@ void reset_io_stats(struct thread_data *td) ts->total_io_u[i] = 0; ts->short_io_u[i] = 0; ts->drop_io_u[i] = 0; - - reset_io_u_plat(ts->io_u_plat_high_prio[i]); - reset_io_u_plat(ts->io_u_plat_low_prio[i]); } for (i = 0; i < FIO_LAT_CNT; i++) for (j = 0; j < DDIR_RWDIR_CNT; j++) reset_io_u_plat(ts->io_u_plat[i][j]); + reset_clat_prio_stats(ts); + ts->total_io_u[DDIR_SYNC] = 0; reset_io_u_plat(ts->io_u_sync_plat); @@ -3028,22 +3231,21 @@ static inline void add_lat_percentile_sample(struct thread_stat *ts, ts->io_u_plat[lat][ddir][idx]++; } -static inline void add_lat_percentile_prio_sample(struct thread_stat *ts, - unsigned long long nsec, - enum fio_ddir ddir, - bool high_prio) +static inline void +add_lat_percentile_prio_sample(struct thread_stat *ts, unsigned long long nsec, + enum fio_ddir ddir, + unsigned short clat_prio_index) { unsigned int idx = plat_val_to_idx(nsec); - if (!high_prio) - ts->io_u_plat_low_prio[ddir][idx]++; - else - ts->io_u_plat_high_prio[ddir][idx]++; + if (ts->clat_prio[ddir]) + ts->clat_prio[ddir][clat_prio_index].io_u_plat[idx]++; } void add_clat_sample(struct thread_data *td, enum fio_ddir ddir, unsigned long long nsec, unsigned long long bs, - uint64_t offset, unsigned int ioprio, bool high_prio) + uint64_t offset, unsigned int ioprio, + unsigned short clat_prio_index) { const bool needs_lock = td_async_processing(td); unsigned long elapsed, this_window; @@ -3056,7 +3258,7 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir, add_stat_sample(&ts->clat_stat[ddir], nsec); /* - * When lat_percentiles=1 (default 0), the reported high/low priority + * When lat_percentiles=1 (default 0), the reported per priority * percentiles and stats are used for describing total latency values, * even though the variable names themselves start with clat_. * @@ -3064,12 +3266,9 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir, * lat_percentiles=0. add_lat_sample() will add the prio stat sample * when lat_percentiles=1. */ - if (!ts->lat_percentiles) { - if (high_prio) - add_stat_sample(&ts->clat_high_prio_stat[ddir], nsec); - else - add_stat_sample(&ts->clat_low_prio_stat[ddir], nsec); - } + if (!ts->lat_percentiles) + add_stat_prio_sample(ts->clat_prio[ddir], clat_prio_index, + nsec); if (td->clat_log) add_log_sample(td, td->clat_log, sample_val(nsec), ddir, bs, @@ -3084,7 +3283,7 @@ void add_clat_sample(struct thread_data *td, enum fio_ddir ddir, add_lat_percentile_sample(ts, nsec, ddir, FIO_CLAT); if (!ts->lat_percentiles) add_lat_percentile_prio_sample(ts, nsec, ddir, - high_prio); + clat_prio_index); } if (iolog && iolog->hist_msec) { @@ -3157,7 +3356,8 @@ void add_slat_sample(struct thread_data *td, enum fio_ddir ddir, void add_lat_sample(struct thread_data *td, enum fio_ddir ddir, unsigned long long nsec, unsigned long long bs, - uint64_t offset, unsigned int ioprio, bool high_prio) + uint64_t offset, unsigned int ioprio, + unsigned short clat_prio_index) { const bool needs_lock = td_async_processing(td); struct thread_stat *ts = &td->ts; @@ -3175,7 +3375,7 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir, offset, ioprio); /* - * When lat_percentiles=1 (default 0), the reported high/low priority + * When lat_percentiles=1 (default 0), the reported per priority * percentiles and stats are used for describing total latency values, * even though the variable names themselves start with clat_. * @@ -3186,12 +3386,9 @@ void add_lat_sample(struct thread_data *td, enum fio_ddir ddir, */ if (ts->lat_percentiles) { add_lat_percentile_sample(ts, nsec, ddir, FIO_LAT); - add_lat_percentile_prio_sample(ts, nsec, ddir, high_prio); - if (high_prio) - add_stat_sample(&ts->clat_high_prio_stat[ddir], nsec); - else - add_stat_sample(&ts->clat_low_prio_stat[ddir], nsec); - + add_lat_percentile_prio_sample(ts, nsec, ddir, clat_prio_index); + add_stat_prio_sample(ts->clat_prio[ddir], clat_prio_index, + nsec); } if (needs_lock) __td_io_u_unlock(td); diff --git a/stat.h b/stat.h index 6c86fa22..05228fd2 100644 --- a/stat.h +++ b/stat.h @@ -373,9 +373,9 @@ extern void update_rusage_stat(struct thread_data *); extern void clear_rusage_stat(struct thread_data *); extern void add_lat_sample(struct thread_data *, enum fio_ddir, unsigned long long, - unsigned long long, uint64_t, unsigned int, bool); + unsigned long long, uint64_t, unsigned int, unsigned short); extern void add_clat_sample(struct thread_data *, enum fio_ddir, unsigned long long, - unsigned long long, uint64_t, unsigned int, bool); + unsigned long long, uint64_t, unsigned int, unsigned short); extern void add_slat_sample(struct thread_data *, enum fio_ddir, unsigned long long, unsigned long long, uint64_t, unsigned int); extern void add_agg_sample(union io_sample_data, enum fio_ddir, unsigned long long); -- 2.34.1