The following changes since commit fba5c5ff89163062922c3e560e871c087f2177c3: smalloc: alloc failure cleanups (2013-01-29 22:29:09 +0100) are available in the git repository at: git://git.kernel.dk/fio.git master Huadong Liu (1): Add support for idletime profiling Jens Axboe (6): HOWTO: indentation fix Add unified_rw_reporting option configure: add SCHED_IDLE check idletime: style cleanups net: fix compile warning on Windows (and others) Add empty <netinet/tcp.h> for Windows Ken Raeburn (1): Fix bugs in [v]snprintf usage Steven Noonan (1): net engine: implement option "nodelay" for TCP sockets HOWTO | 43 +++- Makefile | 3 +- README | 3 + backend.c | 8 + client.c | 3 + configure | 20 ++ engines/falloc.c | 2 +- engines/net.c | 32 ++- eta.c | 43 +++- filesetup.c | 4 +- fio.1 | 8 + fio.h | 3 +- idletime.c | 475 ++++++++++++++++++++++++++++++++ idletime.h | 57 ++++ init.c | 19 ++- iolog.c | 2 +- log.c | 4 + options.c | 7 + os/os-linux.h | 8 + os/os-windows.h | 7 + os/windows/posix/include/netinet/tcp.h | 4 + server.c | 4 + server.h | 2 +- stat.c | 77 ++++-- stat.h | 2 + t/log.c | 2 + 26 files changed, 787 insertions(+), 55 deletions(-) create mode 100644 idletime.c create mode 100644 idletime.h create mode 100644 os/windows/posix/include/netinet/tcp.h --- Diff of recent changes: diff --git a/HOWTO b/HOWTO index d5be376..c46b883 100644 --- a/HOWTO +++ b/HOWTO @@ -272,17 +272,17 @@ filename=str Fio normally makes up a filename based on the job name, can specify a number of files by separating the names with a ':' colon. So if you wanted a job to open /dev/sda and /dev/sdb as the two working files, you would use - filename=/dev/sda:/dev/sdb. On Windows, disk devices are accessed - as \\.\PhysicalDrive0 for the first device, \\.\PhysicalDrive1 - for the second etc. - Note: Windows and FreeBSD prevent write access to areas of the disk - containing in-use data (e.g. filesystems). - If the wanted filename does need to include a colon, then escape that - with a '\' character. - For instance, if the filename is "/dev/dsk/foo@3,0:c", - then you would use filename="/dev/dsk/foo@3,0\:c". - '-' is a reserved name, meaning stdin or stdout. Which of the - two depends on the read/write direction set. + filename=/dev/sda:/dev/sdb. On Windows, disk devices are + accessed as \\.\PhysicalDrive0 for the first device, + \\.\PhysicalDrive1 for the second etc. Note: Windows and + FreeBSD prevent write access to areas of the disk containing + in-use data (e.g. filesystems). + If the wanted filename does need to include a colon, then + escape that with a '\' character. For instance, if the filename + is "/dev/dsk/foo@3,0:c", then you would use + filename="/dev/dsk/foo@3,0\:c". '-' is a reserved name, meaning + stdin or stdout. Which of the two depends on the read/write + direction set. opendir=str Tell fio to recursively add any file it can find in this directory and down the file system tree. @@ -353,6 +353,12 @@ kb_base=int The base unit for a kilobyte. The defacto base is 2^10, 1024. ten unit instead, for obvious reasons. Allow values are 1024 or 1000, with 1024 being the default. +unified_rw_reporting=bool Fio normally reports statistics on a per + data direction basis, meaning that read, write, and trim are + accounted and reported separately. If this option is set, + the fio will sum the results and report them as "mixed" + instead. + randrepeat=bool For random IO workloads, seed the generator in a predictable way so that results are repeatable across repetitions. @@ -1687,3 +1693,18 @@ write Write 'length' bytes beginning from 'offset' sync fsync() the file datasync fdatasync() the file trim trim the given file from the given 'offset' for 'length' bytes + + +9.0 CPU idleness profiling + +In some cases, we want to understand CPU overhead in a test. For example, +we test patches for the specific goodness of whether they reduce CPU usage. +fio implements a balloon approach to create a thread per CPU that runs at +idle priority, meaning that it only runs when nobody else needs the cpu. +By measuring the amount of work completed by the thread, idleness of each +CPU can be derived accordingly. + +An unit work is defined as touching a full page of unsigned characters. Mean +and standard deviation of time to complete an unit work is reported in "unit +work" section. Options can be chosen to report detailed percpu idleness or +overall system idleness by aggregating percpu stats. diff --git a/Makefile b/Makefile index 52ec4a2..a4a478d 100644 --- a/Makefile +++ b/Makefile @@ -28,7 +28,8 @@ SOURCE := gettime.c fio.c ioengines.c init.c stat.c log.c time.c filesetup.c \ engines/mmap.c engines/sync.c engines/null.c engines/net.c \ memalign.c server.c client.c iolog.c backend.c libfio.c flow.c \ json.c lib/zipf.c lib/axmap.c lib/lfsr.c gettime-thread.c \ - helpers.c lib/flist_sort.c lib/hweight.c lib/getrusage.c + helpers.c lib/flist_sort.c lib/hweight.c lib/getrusage.c \ + idletime.c ifdef CONFIG_64BIT_LLP64 CFLAGS += -DBITS_PER_LONG=32 diff --git a/README b/README index 7c4552d..c43b795 100644 --- a/README +++ b/README @@ -145,6 +145,9 @@ $ fio --max-jobs Maximum number of threads/processes to support --server=args Start backend server. See Client/Server section. --client=host Connect to specified backend. + --idle-prof=option Report cpu idleness on a system or percpu basis + (option=system,percpu) or run unit work + calibration only (option=calibrate). Any parameters following the options will be assumed to be job files, diff --git a/backend.c b/backend.c index 87810e8..218ae25 100644 --- a/backend.c +++ b/backend.c @@ -51,6 +51,7 @@ #include "memalign.h" #include "server.h" #include "lib/getrusage.h" +#include "idletime.h" static pthread_t disk_util_thread; static struct fio_mutex *disk_thread_mutex; @@ -1499,6 +1500,8 @@ static void run_threads(void) if (fio_gtod_offload && fio_start_gtod_thread()) return; + + fio_idle_prof_init(); set_sig_handlers(); @@ -1556,6 +1559,9 @@ static void run_threads(void) } } + /* start idle threads before io threads start to run */ + fio_idle_prof_start(); + set_genesis_time(); while (todo) { @@ -1718,6 +1724,8 @@ static void run_threads(void) usleep(10000); } + fio_idle_prof_stop(); + update_io_ticks(); fio_unpin_memory(); } diff --git a/client.c b/client.c index 6021e4a..0dc620d 100644 --- a/client.c +++ b/client.c @@ -595,6 +595,7 @@ static void convert_ts(struct thread_stat *dst, struct thread_stat *src) dst->groupid = le32_to_cpu(src->groupid); dst->pid = le32_to_cpu(src->pid); dst->members = le32_to_cpu(src->members); + dst->unified_rw_rep = le32_to_cpu(src->unified_rw_rep); for (i = 0; i < DDIR_RWDIR_CNT; i++) { convert_io_stat(&dst->clat_stat[i], &src->clat_stat[i]); @@ -667,6 +668,7 @@ static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src) dst->kb_base = le32_to_cpu(src->kb_base); dst->groupid = le32_to_cpu(src->groupid); + dst->unified_rw_rep = le32_to_cpu(src->unified_rw_rep); } static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd) @@ -687,6 +689,7 @@ static void handle_ts(struct fio_client *client, struct fio_net_cmd *cmd) client_ts.members++; client_ts.groupid = p->ts.groupid; + client_ts.unified_rw_rep = p->ts.unified_rw_rep; if (++sum_stat_nr == sum_stat_clients) { strcpy(client_ts.name, "All clients"); diff --git a/configure b/configure index 583e3ab..995b5df 100755 --- a/configure +++ b/configure @@ -200,6 +200,7 @@ CYGWIN*) output_sym "CONFIG_FDATASYNC" output_sym "CONFIG_GETTIMEOFDAY" output_sym "CONFIG_CLOCK_GETTIME" + output_sym "CONFIG_SCHED_IDLE" echo "CC=$CC" >> $config_host_mak echo "EXTFLAGS=$CFLAGS -include config-host.h -D_GNU_SOURCE" >> $config_host_mak exit 0 @@ -844,6 +845,22 @@ if compile_prog "" "" "RUSAGE_THREAD"; then fi echo "RUSAGE_THREAD $rusage_thread" +########################################## +# Check whether we have SCHED_IDLE +sched_idle="no" +cat > $TMPC << EOF +#include <sched.h> +int main(int argc, char **argv) +{ + struct sched_param p; + return sched_setscheduler(0, SCHED_IDLE, &p); +} +EOF +if compile_prog "" "" "SCHED_IDLE"; then + sched_idle="yes" +fi +echo "SCHED_IDLE $sched_idle" + ############################################################################# echo "# Automatically generated by configure - do not modify" > $config_host_mak @@ -947,6 +964,9 @@ fi if test "$rusage_thread" = "yes" ; then output_sym "CONFIG_RUSAGE_THREAD" fi +if test "$sched_idle" = "yes" ; then + output_sym "CONFIG_SCHED_IDLE" +fi echo "LIBS+=$LIBS" >> $config_host_mak echo "CC=$cc" >> $config_host_mak diff --git a/engines/falloc.c b/engines/falloc.c index 525a0aa..4654fe8 100644 --- a/engines/falloc.c +++ b/engines/falloc.c @@ -44,7 +44,7 @@ open_again: if (f->fd == -1) { char buf[FIO_VERROR_SIZE]; int __e = errno; - snprintf(buf, sizeof(buf) - 1, "open(%s)", f->file_name); + snprintf(buf, sizeof(buf), "open(%s)", f->file_name); td_verror(td, __e, buf); } diff --git a/engines/net.c b/engines/net.c index de7cdb5..d0f4fa0 100644 --- a/engines/net.c +++ b/engines/net.c @@ -11,6 +11,7 @@ #include <errno.h> #include <assert.h> #include <netinet/in.h> +#include <netinet/tcp.h> #include <arpa/inet.h> #include <netdb.h> #include <sys/poll.h> @@ -35,6 +36,7 @@ struct netio_options { unsigned int proto; unsigned int listen; unsigned int pingpong; + unsigned int nodelay; }; struct udp_close_msg { @@ -91,6 +93,12 @@ static struct fio_option options[] = { }, }, { + .name = "nodelay", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct netio_options, nodelay), + .help = "Use TCP_NODELAY on TCP connections", + }, + { .name = "listen", .type = FIO_OPT_STR_SET, .off1 = offsetof(struct netio_options, listen), @@ -448,7 +456,7 @@ static int fio_netio_connect(struct thread_data *td, struct fio_file *f) { struct netio_data *nd = td->io_ops->data; struct netio_options *o = td->eo; - int type, domain; + int type, domain, optval; if (o->proto == FIO_TYPE_TCP) { domain = AF_INET; @@ -471,6 +479,14 @@ static int fio_netio_connect(struct thread_data *td, struct fio_file *f) return 1; } + if (o->nodelay && o->proto == FIO_TYPE_TCP) { + optval = 1; + if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) { + log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno)); + return 1; + } + } + if (o->proto == FIO_TYPE_UDP) return 0; else if (o->proto == FIO_TYPE_TCP) { @@ -502,7 +518,7 @@ static int fio_netio_accept(struct thread_data *td, struct fio_file *f) struct netio_data *nd = td->io_ops->data; struct netio_options *o = td->eo; socklen_t socklen = sizeof(nd->addr); - int state; + int state, optval; if (o->proto == FIO_TYPE_UDP) { f->fd = nd->listenfd; @@ -523,6 +539,14 @@ static int fio_netio_accept(struct thread_data *td, struct fio_file *f) goto err; } + if (o->nodelay && o->proto == FIO_TYPE_TCP) { + optval = 1; + if (setsockopt(f->fd, IPPROTO_TCP, TCP_NODELAY, (void *) &optval, sizeof(int)) < 0) { + log_err("fio: cannot set TCP_NODELAY option on socket (%s), disable with 'nodelay=0'\n", strerror(errno)); + return 1; + } + } + reset_all_stats(td); td_set_runstate(td, state); return 0; @@ -743,12 +767,12 @@ static int fio_netio_setup_listen_inet(struct thread_data *td, short port) } opt = 1; - if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void*)&opt, sizeof(opt)) < 0) { + if (setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, (void *) &opt, sizeof(opt)) < 0) { td_verror(td, errno, "setsockopt"); return 1; } #ifdef SO_REUSEPORT - if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt)) < 0) { + if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, (void *) &opt, sizeof(opt)) < 0) { td_verror(td, errno, "setsockopt"); return 1; } diff --git a/eta.c b/eta.c index fdf55c5..39fe10f 100644 --- a/eta.c +++ b/eta.c @@ -226,7 +226,8 @@ static int thread_eta(struct thread_data *td) return eta_sec; } -static void calc_rate(unsigned long mtime, unsigned long long *io_bytes, +static void calc_rate(int unified_rw_rep, unsigned long mtime, + unsigned long long *io_bytes, unsigned long long *prev_io_bytes, unsigned int *rate) { int i; @@ -235,19 +236,32 @@ static void calc_rate(unsigned long mtime, unsigned long long *io_bytes, unsigned long long diff; diff = io_bytes[i] - prev_io_bytes[i]; - rate[i] = ((1000 * diff) / mtime) / 1024; + if (unified_rw_rep) { + rate[i] = 0; + rate[0] += ((1000 * diff) / mtime) / 1024; + } else + rate[i] = ((1000 * diff) / mtime) / 1024; prev_io_bytes[i] = io_bytes[i]; } } -static void calc_iops(unsigned long mtime, unsigned long long *io_iops, +static void calc_iops(int unified_rw_rep, unsigned long mtime, + unsigned long long *io_iops, unsigned long long *prev_io_iops, unsigned int *iops) { int i; for (i = 0; i < DDIR_RWDIR_CNT; i++) { - iops[i] = ((io_iops[i] - prev_io_iops[i]) * 1000) / mtime; + unsigned long long diff; + + diff = io_iops[i] - prev_io_iops[i]; + if (unified_rw_rep) { + iops[i] = 0; + iops[0] += (diff * 1000) / mtime; + } else + iops[i] = (diff * 1000) / mtime; + prev_io_iops[i] = io_iops[i]; } } @@ -259,7 +273,7 @@ static void calc_iops(unsigned long mtime, unsigned long long *io_iops, int calc_thread_status(struct jobs_eta *je, int force) { struct thread_data *td; - int i; + int i, unified_rw_rep; unsigned long rate_time, disp_time, bw_avg_time, *eta_secs; unsigned long long io_bytes[DDIR_RWDIR_CNT]; unsigned long long io_iops[DDIR_RWDIR_CNT]; @@ -293,7 +307,9 @@ int calc_thread_status(struct jobs_eta *je, int force) io_bytes[DDIR_READ] = io_bytes[DDIR_WRITE] = io_bytes[DDIR_TRIM] = 0; io_iops[DDIR_READ] = io_iops[DDIR_WRITE] = io_iops[DDIR_TRIM] = 0; bw_avg_time = ULONG_MAX; + unified_rw_rep = 0; for_each_td(td, i) { + unified_rw_rep += td->o.unified_rw_rep; if (is_power_of_2(td->o.kb_base)) je->is_pow2 = 1; if (td->o.bw_avg_time < bw_avg_time) @@ -339,9 +355,15 @@ int calc_thread_status(struct jobs_eta *je, int force) if (td->runstate > TD_RAMP) { int ddir; + for (ddir = DDIR_READ; ddir < DDIR_RWDIR_CNT; ddir++) { - io_bytes[ddir] += td->io_bytes[ddir]; - io_iops[ddir] += td->io_blocks[ddir]; + if (unified_rw_rep) { + io_bytes[0] += td->io_bytes[ddir]; + io_iops[0] += td->io_blocks[ddir]; + } else { + io_bytes[ddir] += td->io_bytes[ddir]; + io_iops[ddir] += td->io_blocks[ddir]; + } } } } @@ -367,7 +389,8 @@ int calc_thread_status(struct jobs_eta *je, int force) rate_time = mtime_since(&rate_prev_time, &now); if (write_bw_log && rate_time > bw_avg_time && !in_ramp_time(td)) { - calc_rate(rate_time, io_bytes, rate_io_bytes, je->rate); + calc_rate(unified_rw_rep, rate_time, io_bytes, rate_io_bytes, + je->rate); memcpy(&rate_prev_time, &now, sizeof(now)); add_agg_sample(je->rate[DDIR_READ], DDIR_READ, 0); add_agg_sample(je->rate[DDIR_WRITE], DDIR_WRITE, 0); @@ -382,8 +405,8 @@ int calc_thread_status(struct jobs_eta *je, int force) if (!force && disp_time < 900) return 0; - calc_rate(disp_time, io_bytes, disp_io_bytes, je->rate); - calc_iops(disp_time, io_iops, disp_io_iops, je->iops); + calc_rate(unified_rw_rep, disp_time, io_bytes, disp_io_bytes, je->rate); + calc_iops(unified_rw_rep, disp_time, io_iops, disp_io_iops, je->iops); memcpy(&disp_prev_time, &now, sizeof(now)); diff --git a/filesetup.c b/filesetup.c index 6f0a876..5aadf12 100644 --- a/filesetup.c +++ b/filesetup.c @@ -563,7 +563,7 @@ open_again: if (__e == EMFILE && file_close_shadow_fds(td)) goto open_again; - snprintf(buf, sizeof(buf) - 1, "open(%s)", f->file_name); + snprintf(buf, sizeof(buf), "open(%s)", f->file_name); if (__e == EINVAL && (flags & OS_O_DIRECT)) { log_err("fio: looks like your file system does not " \ @@ -1250,7 +1250,7 @@ static int recurse_dir(struct thread_data *td, const char *dirname) if (!D) { char buf[FIO_VERROR_SIZE]; - snprintf(buf, FIO_VERROR_SIZE - 1, "opendir(%s)", dirname); + snprintf(buf, FIO_VERROR_SIZE, "opendir(%s)", dirname); td_verror(td, errno, buf); return 1; } diff --git a/fio.1 b/fio.1 index 2f7728a..6b7e945 100644 --- a/fio.1 +++ b/fio.1 @@ -77,6 +77,9 @@ Background a fio server, writing the pid to the given pid file. .TP .BI \-\-client \fR=\fPhost Instead of running the jobs locally, send and run them on the given host. +.TP +.BI \-\-idle\-prof \fR=\fPoption +Report cpu idleness on a system or percpu basis (\fIoption\fP=system,percpu) or run unit work calibration only (\fIoption\fP=calibrate). .SH "JOB FILE FORMAT" Job files are in `ini' format. They consist of one or more job definitions, which begin with a job name in square brackets and @@ -240,6 +243,11 @@ The base unit for a kilobyte. The defacto base is 2^10, 1024. Storage manufacturers like to use 10^3 or 1000 as a base ten unit instead, for obvious reasons. Allow values are 1024 or 1000, with 1024 being the default. .TP +.BI unified_rw_reporting \fR=\fPbool +Fio normally reports statistics on a per data direction basis, meaning that +read, write, and trim are accounted and reported separately. If this option is +set, the fio will sum the results and report them as "mixed" instead. +.TP .BI randrepeat \fR=\fPbool Seed the random number generator in a predictable way so results are repeatable across runs. Default: true. diff --git a/fio.h b/fio.h index 9e20299..d18029a 100644 --- a/fio.h +++ b/fio.h @@ -237,6 +237,7 @@ struct thread_options { unsigned int disable_clat; unsigned int disable_slat; unsigned int disable_bw; + unsigned int unified_rw_rep; unsigned int gtod_reduce; unsigned int gtod_cpu; unsigned int gtod_offload; @@ -567,7 +568,7 @@ enum { int e = (err); \ (td)->error = e; \ if (!(td)->first_error) \ - snprintf(td->verror, sizeof(td->verror) - 1, "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg)); \ + snprintf(td->verror, sizeof(td->verror), "file:%s:%d, func=%s, error=%s", __FILE__, __LINE__, (func), (msg)); \ } while (0) diff --git a/idletime.c b/idletime.c new file mode 100644 index 0000000..244723f --- /dev/null +++ b/idletime.c @@ -0,0 +1,475 @@ +#include <math.h> +#include "json.h" +#include "idletime.h" + +static volatile struct idle_prof_common ipc; + +/* + * Get time to complete an unit work on a particular cpu. + * The minimum number in CALIBRATE_RUNS runs is returned. + */ +static double calibrate_unit(unsigned char *data) +{ + unsigned long t, i, j, k; + struct timeval tps; + double tunit = 0.0; + + for (i = 0; i < CALIBRATE_RUNS; i++) { + + fio_gettime(&tps, NULL); + /* scale for less variance */ + for (j = 0; j < CALIBRATE_SCALE; j++) { + /* unit of work */ + for (k=0; k < page_size; k++) { + data[(k + j) % page_size] = k % 256; + /* + * we won't see STOP here. this is to match + * the same statement in the profiling loop. + */ + if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) + return 0.0; + } + } + + t = utime_since_now(&tps); + if (!t) + continue; + + /* get the minimum time to complete CALIBRATE_SCALE units */ + if ((i == 0) || ((double)t < tunit)) + tunit = (double)t; + } + + return tunit / CALIBRATE_SCALE; +} + +static void *idle_prof_thread_fn(void *data) +{ + int retval; + unsigned long j, k; + struct idle_prof_thread *ipt = data; + + /* wait for all threads are spawned */ + pthread_mutex_lock(&ipt->init_lock); + + /* exit if any other thread failed to start */ + if (ipc.status == IDLE_PROF_STATUS_ABORT) + return NULL; + +#if defined(FIO_HAVE_CPU_AFFINITY) + os_cpu_mask_t cpu_mask; + memset(&cpu_mask, 0, sizeof(cpu_mask)); + fio_cpu_set(&cpu_mask, ipt->cpu); + + if ((retval=fio_setaffinity(gettid(), cpu_mask)) == -1) + log_err("fio: fio_setaffinity failed\n"); +#else + retval = -1; + log_err("fio: fio_setaffinity not supported\n"); +#endif + if (retval == -1) { + ipt->state = TD_EXITED; + pthread_mutex_unlock(&ipt->init_lock); + return NULL; + } + + ipt->cali_time = calibrate_unit(ipt->data); + + /* delay to set IDLE class till now for better calibration accuracy */ +#if defined(CONFIG_SCHED_IDLE) + if ((retval = fio_set_sched_idle())) + log_err("fio: fio_set_sched_idle failed\n"); +#else + retval = -1; + log_err("fio: fio_set_sched_idle not supported\n"); +#endif + if (retval == -1) { + ipt->state = TD_EXITED; + pthread_mutex_unlock(&ipt->init_lock); + return NULL; + } + + ipt->state = TD_INITIALIZED; + + /* signal the main thread that calibration is done */ + pthread_cond_signal(&ipt->cond); + pthread_mutex_unlock(&ipt->init_lock); + + /* wait for other calibration to finish */ + pthread_mutex_lock(&ipt->start_lock); + + /* exit if other threads failed to initialize */ + if (ipc.status == IDLE_PROF_STATUS_ABORT) + return NULL; + + /* exit if we are doing calibration only */ + if (ipc.status == IDLE_PROF_STATUS_CALI_STOP) + return NULL; + + fio_gettime(&ipt->tps, NULL); + ipt->state = TD_RUNNING; + + j = 0; + while (1) { + for (k = 0; k < page_size; k++) { + ipt->data[(k + j) % page_size] = k % 256; + if (ipc.status == IDLE_PROF_STATUS_PROF_STOP) { + fio_gettime(&ipt->tpe, NULL); + goto idle_prof_done; + } + } + j++; + } + +idle_prof_done: + + ipt->loops = j + (double) k / page_size; + ipt->state = TD_EXITED; + pthread_mutex_unlock(&ipt->start_lock); + + return NULL; +} + +/* calculate mean and standard deviation to complete an unit of work */ +static void calibration_stats(void) +{ + int i; + double sum = 0.0, var = 0.0; + struct idle_prof_thread *ipt; + + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + sum += ipt->cali_time; + } + + ipc.cali_mean = sum/ipc.nr_cpus; + + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + var += pow(ipt->cali_time-ipc.cali_mean, 2); + } + + ipc.cali_stddev = sqrt(var/(ipc.nr_cpus-1)); +} + +void fio_idle_prof_init(void) +{ + int i, ret; + struct timeval tp; + struct timespec ts; + pthread_attr_t tattr; + struct idle_prof_thread *ipt; + + ipc.nr_cpus = cpus_online(); + ipc.status = IDLE_PROF_STATUS_OK; + + if (ipc.opt == IDLE_PROF_OPT_NONE) + return; + + if ((ret = pthread_attr_init(&tattr))) { + log_err("fio: pthread_attr_init %s\n", strerror(ret)); + return; + } + if ((ret = pthread_attr_setscope(&tattr, PTHREAD_SCOPE_SYSTEM))) { + log_err("fio: pthread_attr_setscope %s\n", strerror(ret)); + return; + } + + ipc.ipts = malloc(ipc.nr_cpus * sizeof(struct idle_prof_thread)); + if (!ipc.ipts) { + log_err("fio: malloc failed\n"); + return; + } + + ipc.buf = malloc(ipc.nr_cpus * page_size); + if (!ipc.buf) { + log_err("fio: malloc failed\n"); + free(ipc.ipts); + return; + } + + /* + * profiling aborts on any single thread failure since the + * result won't be accurate if any cpu is not used. + */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + + ipt->cpu = i; + ipt->state = TD_NOT_CREATED; + ipt->data = (unsigned char *)(ipc.buf + page_size * i); + + if ((ret = pthread_mutex_init(&ipt->init_lock, NULL))) { + ipc.status = IDLE_PROF_STATUS_ABORT; + log_err("fio: pthread_mutex_init %s\n", strerror(ret)); + break; + } + + if ((ret = pthread_mutex_init(&ipt->start_lock, NULL))) { + ipc.status = IDLE_PROF_STATUS_ABORT; + log_err("fio: pthread_mutex_init %s\n", strerror(ret)); + break; + } + + if ((ret = pthread_cond_init(&ipt->cond, NULL))) { + ipc.status = IDLE_PROF_STATUS_ABORT; + log_err("fio: pthread_cond_init %s\n", strerror(ret)); + break; + } + + /* make sure all threads are spawned before they start */ + pthread_mutex_lock(&ipt->init_lock); + + /* make sure all threads finish init before profiling starts */ + pthread_mutex_lock(&ipt->start_lock); + + if ((ret = pthread_create(&ipt->thread, &tattr, idle_prof_thread_fn, ipt))) { + ipc.status = IDLE_PROF_STATUS_ABORT; + log_err("fio: pthread_create %s\n", strerror(ret)); + break; + } else + ipt->state = TD_CREATED; + + if ((ret = pthread_detach(ipt->thread))) { + /* log error and let the thread spin */ + log_err("fio: pthread_detatch %s\n", strerror(ret)); + } + } + + /* + * let good threads continue so that they can exit + * if errors on other threads occurred previously. + */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + pthread_mutex_unlock(&ipt->init_lock); + } + + if (ipc.status == IDLE_PROF_STATUS_ABORT) + return; + + /* wait for calibration to finish */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + pthread_mutex_lock(&ipt->init_lock); + while ((ipt->state != TD_EXITED) && + (ipt->state!=TD_INITIALIZED)) { + fio_gettime(&tp, NULL); + ts.tv_sec = tp.tv_sec + 1; + ts.tv_nsec = tp.tv_usec * 1000; + pthread_cond_timedwait(&ipt->cond, &ipt->init_lock, &ts); + } + pthread_mutex_unlock(&ipt->init_lock); + + /* + * any thread failed to initialize would abort other threads + * later after fio_idle_prof_start. + */ + if (ipt->state == TD_EXITED) + ipc.status = IDLE_PROF_STATUS_ABORT; + } + + if (ipc.status != IDLE_PROF_STATUS_ABORT) + calibration_stats(); + else + ipc.cali_mean = ipc.cali_stddev = 0.0; + + if (ipc.opt == IDLE_PROF_OPT_CALI) + ipc.status = IDLE_PROF_STATUS_CALI_STOP; +} + +void fio_idle_prof_start(void) +{ + int i; + struct idle_prof_thread *ipt; + + if (ipc.opt == IDLE_PROF_OPT_NONE) + return; + + /* unlock regardless abort is set or not */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + pthread_mutex_unlock(&ipt->start_lock); + } +} + +void fio_idle_prof_stop(void) +{ + int i; + uint64_t runt; + struct timeval tp; + struct timespec ts; + struct idle_prof_thread *ipt; + + if (ipc.opt == IDLE_PROF_OPT_NONE) + return; + + if (ipc.opt == IDLE_PROF_OPT_CALI) + return; + + ipc.status = IDLE_PROF_STATUS_PROF_STOP; + + /* wait for all threads to exit from profiling */ + for (i = 0; i < ipc.nr_cpus; i++) { + ipt = &ipc.ipts[i]; + pthread_mutex_lock(&ipt->start_lock); + while ((ipt->state != TD_EXITED) && + (ipt->state!=TD_NOT_CREATED)) { + fio_gettime(&tp, NULL); + ts.tv_sec = tp.tv_sec + 1; + ts.tv_nsec = tp.tv_usec * 1000; + /* timed wait in case a signal is not received */ + pthread_cond_timedwait(&ipt->cond, &ipt->start_lock, &ts); + } + pthread_mutex_unlock(&ipt->start_lock); + + /* calculate idleness */ + if (ipc.cali_mean != 0.0) { + runt = utime_since(&ipt->tps, &ipt->tpe); + ipt->idleness = ipt->loops * ipc.cali_mean / runt; + } else + ipt->idleness = 0.0; + } + + /* + * memory allocations are freed via explicit fio_idle_prof_cleanup + * after profiling stats are collected by apps. + */ +} + +/* + * return system idle percentage when cpu is -1; + * return one cpu idle percentage otherwise. + */ +static double fio_idle_prof_cpu_stat(int cpu) +{ + int i, nr_cpus = ipc.nr_cpus; + struct idle_prof_thread *ipt; + double p = 0.0; + + if (ipc.opt == IDLE_PROF_OPT_NONE) + return 0.0; + + if ((cpu >= nr_cpus) || (cpu < -1)) { + log_err("fio: idle profiling invalid cpu index\n"); + return 0.0; + } + + if (cpu == -1) { + for (i = 0; i < nr_cpus; i++) { + ipt = &ipc.ipts[i]; + p += ipt->idleness; + } + p /= nr_cpus; + } else { + ipt = &ipc.ipts[cpu]; + p = ipt->idleness; + } + + return p * 100.0; +} + +void fio_idle_prof_cleanup(void) +{ + if (ipc.ipts) { + free(ipc.ipts); + ipc.ipts = NULL; + } + + if (ipc.buf) { + free(ipc.buf); + ipc.buf = NULL; + } +} + +int fio_idle_prof_parse_opt(const char *args) +{ + ipc.opt = IDLE_PROF_OPT_NONE; /* default */ + + if (!args) { + log_err("fio: empty idle-prof option string\n"); + return -1; + } + +#if defined(FIO_HAVE_CPU_AFFINITY) && defined(CONFIG_SCHED_IDLE) + if (strcmp("calibrate", args) == 0) { + ipc.opt = IDLE_PROF_OPT_CALI; + fio_idle_prof_init(); + fio_idle_prof_start(); + fio_idle_prof_stop(); + show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL); + return 1; + } else if (strcmp("system", args) == 0) { + ipc.opt = IDLE_PROF_OPT_SYSTEM; + return 0; + } else if (strcmp("percpu", args) == 0) { + ipc.opt = IDLE_PROF_OPT_PERCPU; + return 0; + } else { + log_err("fio: incorrect idle-prof option\n", args); + return -1; + } +#else + log_err("fio: idle-prof not supported on this platform\n"); + return -1; +#endif +} + +void show_idle_prof_stats(int output, struct json_object *parent) +{ + int i, nr_cpus = ipc.nr_cpus; + struct json_object *tmp; + char s[MAX_CPU_STR_LEN]; + + if (output == FIO_OUTPUT_NORMAL) { + if (ipc.opt > IDLE_PROF_OPT_CALI) + log_info("\nCPU idleness:\n"); + else if (ipc.opt == IDLE_PROF_OPT_CALI) + log_info("CPU idleness:\n"); + + if (ipc.opt >= IDLE_PROF_OPT_SYSTEM) + log_info(" system: %3.2f%%\n", fio_idle_prof_cpu_stat(-1)); + + if (ipc.opt == IDLE_PROF_OPT_PERCPU) { + log_info(" percpu: %3.2f%%", fio_idle_prof_cpu_stat(0)); + for (i = 1; i < nr_cpus; i++) + log_info(", %3.2f%%", fio_idle_prof_cpu_stat(i)); + log_info("\n"); + } + + if (ipc.opt >= IDLE_PROF_OPT_CALI) { + log_info(" unit work: mean=%3.2fus,", ipc.cali_mean); + log_info(" stddev=%3.2f\n", ipc.cali_stddev); + } + + /* dynamic mem allocations can now be freed */ + if (ipc.opt != IDLE_PROF_OPT_NONE) + fio_idle_prof_cleanup(); + + return; + } + + if ((ipc.opt != IDLE_PROF_OPT_NONE) && (output == FIO_OUTPUT_JSON)) { + if (!parent) + return; + + tmp = json_create_object(); + if (!tmp) + return; + + json_object_add_value_object(parent, "cpu_idleness", tmp); + json_object_add_value_float(tmp, "system", fio_idle_prof_cpu_stat(-1)); + + if (ipc.opt == IDLE_PROF_OPT_PERCPU) { + for (i = 0; i < nr_cpus; i++) { + snprintf(s, MAX_CPU_STR_LEN, "cpu-%d", i); + json_object_add_value_float(tmp, s, fio_idle_prof_cpu_stat(i)); + } + } + + json_object_add_value_float(tmp, "unit_mean", ipc.cali_mean); + json_object_add_value_float(tmp, "unit_stddev", ipc.cali_stddev); + + fio_idle_prof_cleanup(); + } +} diff --git a/idletime.h b/idletime.h new file mode 100644 index 0000000..acb8407 --- /dev/null +++ b/idletime.h @@ -0,0 +1,57 @@ +#ifndef FIO_IDLETIME_H +#define FIO_IDLETIME_H + +#include "fio.h" + +#define CALIBRATE_RUNS 10 +#define CALIBRATE_SCALE 1000 +#define MAX_CPU_STR_LEN 32 + +enum { + IDLE_PROF_OPT_NONE, + IDLE_PROF_OPT_CALI, /* calibration only */ + IDLE_PROF_OPT_SYSTEM, + IDLE_PROF_OPT_PERCPU +}; + +enum { + IDLE_PROF_STATUS_OK, + IDLE_PROF_STATUS_CALI_STOP, + IDLE_PROF_STATUS_PROF_STOP, + IDLE_PROF_STATUS_ABORT +}; + +struct idle_prof_thread { + pthread_t thread; + int cpu; + int state; + struct timeval tps; + struct timeval tpe; + double cali_time; /* microseconds to finish a unit wrok */ + double loops; + double idleness; + unsigned char *data; /* bytes to be touched */ + pthread_cond_t cond; + pthread_mutex_t init_lock; + pthread_mutex_t start_lock; +}; + +struct idle_prof_common { + struct idle_prof_thread *ipts; + int nr_cpus; + int status; + int opt; + double cali_mean; + double cali_stddev; + void *buf; /* single data allocation for all threads */ +}; + +extern int fio_idle_prof_parse_opt(const char *); + +extern void fio_idle_prof_init(void); +extern void fio_idle_prof_start(void); +extern void fio_idle_prof_stop(void); + +extern void show_idle_prof_stats(int, struct json_object *); + +#endif diff --git a/init.c b/init.c index fca74fe..dfc5a8f 100644 --- a/init.c +++ b/init.c @@ -22,6 +22,7 @@ #include "verify.h" #include "profile.h" #include "server.h" +#include "idletime.h" #include "lib/getopt.h" @@ -213,6 +214,11 @@ static struct option l_opts[FIO_NR_OPTIONS] = { .val = 'T', }, { + .name = (char *) "idle-prof", + .has_arg = required_argument, + .val = 'I', + }, + { .name = NULL, }, }; @@ -621,7 +627,7 @@ static char *to_kmg(unsigned int val) p++; } while (*p); - snprintf(buf, 31, "%u%c", val, *p); + snprintf(buf, 32, "%u%c", val, *p); return buf; } @@ -1290,6 +1296,9 @@ static void usage(const char *name) printf(" --server=args\t\tStart a backend fio server\n"); printf(" --daemonize=pidfile\tBackground fio server, write pid to file\n"); printf(" --client=hostname\tTalk to remote backend fio server at hostname\n"); + printf(" --idle-prof=option\tReport cpu idleness on a system or percpu basis\n" + "\t\t\t(option=system,percpu) or run unit work\n" + "\t\t\tcalibration only (option=calibrate)\n"); printf("\nFio was written by Jens Axboe <jens.axboe@xxxxxxxxxx>"); printf("\n Jens Axboe <jaxboe@xxxxxxxxxxxx>\n"); } @@ -1611,6 +1620,14 @@ int parse_cmd_line(int argc, char *argv[]) case 'D': pid_file = strdup(optarg); break; + case 'I': + if ((ret = fio_idle_prof_parse_opt(optarg))) { + /* exit on error and calibration only */ + do_exit++; + if (ret == -1) + exit_val = 1; + } + break; case 'C': if (is_backend) { log_err("fio: can't be both client and server\n"); diff --git a/iolog.c b/iolog.c index 12f09d0..e4c1fef 100644 --- a/iolog.c +++ b/iolog.c @@ -534,7 +534,7 @@ void finish_log_named(struct thread_data *td, struct io_log *log, { char file_name[256], *p; - snprintf(file_name, 200, "%s_%s.log", prefix, postfix); + snprintf(file_name, sizeof(file_name), "%s_%s.log", prefix, postfix); p = basename(file_name); __finish_log(log, p); } diff --git a/log.c b/log.c index af974f8..08509b3 100644 --- a/log.c +++ b/log.c @@ -12,6 +12,7 @@ int log_valist(const char *str, va_list args) size_t len; len = vsnprintf(buffer, sizeof(buffer), str, args); + len = min(len, sizeof(buffer) - 1); if (log_syslog) syslog(LOG_INFO, "%s", buffer); @@ -40,6 +41,7 @@ int log_local(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); if (log_syslog) syslog(LOG_INFO, "%s", buffer); @@ -58,6 +60,7 @@ int log_info(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); if (is_backend) return fio_server_text_output(buffer, len); @@ -77,6 +80,7 @@ int log_err(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); if (is_backend) return fio_server_text_output(buffer, len); diff --git a/options.c b/options.c index 1009df3..799e77a 100644 --- a/options.c +++ b/options.c @@ -2497,6 +2497,13 @@ static struct fio_option options[FIO_MAX_OPTS] = { .verify = gtod_cpu_verify, }, { + .name = "unified_rw_reporting", + .type = FIO_OPT_BOOL, + .off1 = td_var_offset(unified_rw_rep), + .help = "Unify reporting across data direction", + .def = "0", + }, + { .name = "continue_on_error", .type = FIO_OPT_STR, .off1 = td_var_offset(continue_on_error), diff --git a/os/os-linux.h b/os/os-linux.h index 4e837da..c45f071 100644 --- a/os/os-linux.h +++ b/os/os-linux.h @@ -246,4 +246,12 @@ static inline int os_trim(int fd, unsigned long long start, return errno; } +#ifdef CONFIG_SCHED_IDLE +static inline int fio_set_sched_idle(void) +{ + struct sched_param p = { .sched_priority = 0, }; + return sched_setscheduler(gettid(), SCHED_IDLE, &p); +} +#endif + #endif diff --git a/os/os-windows.h b/os/os-windows.h index ef71dd7..98f9030 100644 --- a/os/os-windows.h +++ b/os/os-windows.h @@ -247,4 +247,11 @@ static inline int init_random_state(struct thread_data *td, unsigned long *rand_ } +static inline int fio_set_sched_idle(void) +{ + /* SetThreadPriority returns nonzero for success */ + return (SetThreadPriority(GetCurrentThread(), THREAD_PRIORITY_IDLE))? 0 : -1; +} + + #endif /* FIO_OS_WINDOWS_H */ diff --git a/os/windows/posix/include/netinet/tcp.h b/os/windows/posix/include/netinet/tcp.h new file mode 100644 index 0000000..1b48076 --- /dev/null +++ b/os/windows/posix/include/netinet/tcp.h @@ -0,0 +1,4 @@ +#ifndef NETINET_TCP_H +#define NET_INET_TCP_H + +#endif diff --git a/server.c b/server.c index 0cc3fad..ad78572 100644 --- a/server.c +++ b/server.c @@ -648,6 +648,7 @@ static void convert_gs(struct group_run_stats *dst, struct group_run_stats *src) dst->kb_base = cpu_to_le32(src->kb_base); dst->groupid = cpu_to_le32(src->groupid); + dst->unified_rw_rep = cpu_to_le32(src->unified_rw_rep); } /* @@ -669,8 +670,10 @@ void fio_server_send_ts(struct thread_stat *ts, struct group_run_stats *rs) p.ts.error = cpu_to_le32(ts->error); p.ts.groupid = cpu_to_le32(ts->groupid); + p.ts.unified_rw_rep = cpu_to_le32(ts->unified_rw_rep); p.ts.pid = cpu_to_le32(ts->pid); p.ts.members = cpu_to_le32(ts->members); + p.ts.unified_rw_rep = cpu_to_le32(ts->unified_rw_rep); for (i = 0; i < DDIR_RWDIR_CNT; i++) { convert_io_stat(&p.ts.clat_stat[i], &ts->clat_stat[i]); @@ -808,6 +811,7 @@ int fio_server_log(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); return fio_server_text_output(buffer, len); } diff --git a/server.h b/server.h index 3f1bde4..15b802b 100644 --- a/server.h +++ b/server.h @@ -36,7 +36,7 @@ struct fio_net_int_cmd { }; enum { - FIO_SERVER_VER = 9, + FIO_SERVER_VER = 10, FIO_SERVER_MAX_PDU = 1024, diff --git a/stat.c b/stat.c index 8e1034b..62eee9a 100644 --- a/stat.c +++ b/stat.c @@ -12,6 +12,7 @@ #include "lib/ieee754.h" #include "json.h" #include "lib/getrusage.h" +#include "idletime.h" void update_rusage_stat(struct thread_data *td) { @@ -277,9 +278,9 @@ void show_group_stats(struct group_run_stats *rs) p4 = num2str(rs->max_bw[i], 6, rs->kb_base, i2p); log_info("%s: io=%sB, aggrb=%sB/s, minb=%sB/s, maxb=%sB/s," - " mint=%llumsec, maxt=%llumsec\n", ddir_str[i], p1, p2, - p3, p4, rs->min_run[i], - rs->max_run[i]); + " mint=%llumsec, maxt=%llumsec\n", + rs->unified_rw_rep ? " MIXED" : ddir_str[i], + p1, p2, p3, p4, rs->min_run[i], rs->max_run[i]); free(p1); free(p2); @@ -381,8 +382,8 @@ static void show_ddir_status(struct group_run_stats *rs, struct thread_stat *ts, iops_p = num2str(iops, 6, 1, 0); log_info(" %s: io=%sB, bw=%sB/s, iops=%s, runt=%6llumsec\n", - ddir_str[ddir], io_p, bw_p, iops_p, - ts->runtime[ddir]); + rs->unified_rw_rep ? "mixed" : ddir_str[ddir], + io_p, bw_p, iops_p, ts->runtime[ddir]); free(io_p); free(bw_p); @@ -695,8 +696,12 @@ static void add_ddir_status_json(struct thread_stat *ts, assert(ddir_rw(ddir)); + if (ts->unified_rw_rep && ddir != DDIR_READ) + return; + dir_object = json_create_object(); - json_object_add_value_object(parent, ddirname[ddir], dir_object); + json_object_add_value_object(parent, + ts->unified_rw_rep ? "mixed" : ddirname[ddir], dir_object); iops = bw = 0; if (ts->runtime[ddir]) { @@ -748,7 +753,7 @@ static void add_ddir_status_json(struct thread_stat *ts, json_object_add_value_int(percentile_object, "0.00", 0); continue; } - snprintf(buf, sizeof(buf) - 1, "%2.2f", ts->percentile_list[i].u.f); + snprintf(buf, sizeof(buf), "%2.2f", ts->percentile_list[i].u.f); json_object_add_value_int(percentile_object, (const char *)buf, ovals[i]); } @@ -954,9 +959,9 @@ static struct json_object *show_thread_status_json(struct thread_stat *ts, for (i = 0; i < 7; i++) { char name[20]; if (i < 6) - snprintf(name, 19, "%d", 1 << i); + snprintf(name, 20, "%d", 1 << i); else - snprintf(name, 19, ">=%d", 1 << i); + snprintf(name, 20, ">=%d", 1 << i); json_object_add_value_float(tmp, (const char *)name, io_u_dist[i]); } @@ -1062,15 +1067,27 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src, int nr) int l, k; for (l = 0; l < DDIR_RWDIR_CNT; l++) { - sum_stat(&dst->clat_stat[l], &src->clat_stat[l], nr); - sum_stat(&dst->slat_stat[l], &src->slat_stat[l], nr); - sum_stat(&dst->lat_stat[l], &src->lat_stat[l], nr); - sum_stat(&dst->bw_stat[l], &src->bw_stat[l], nr); - - dst->io_bytes[l] += src->io_bytes[l]; - - if (dst->runtime[l] < src->runtime[l]) - dst->runtime[l] = src->runtime[l]; + if (!dst->unified_rw_rep) { + sum_stat(&dst->clat_stat[l], &src->clat_stat[l], nr); + sum_stat(&dst->slat_stat[l], &src->slat_stat[l], nr); + sum_stat(&dst->lat_stat[l], &src->lat_stat[l], nr); + sum_stat(&dst->bw_stat[l], &src->bw_stat[l], nr); + + dst->io_bytes[l] += src->io_bytes[l]; + + if (dst->runtime[l] < src->runtime[l]) + dst->runtime[l] = src->runtime[l]; + } else { + sum_stat(&dst->clat_stat[0], &src->clat_stat[l], nr); + sum_stat(&dst->slat_stat[0], &src->slat_stat[l], nr); + sum_stat(&dst->lat_stat[0], &src->lat_stat[l], nr); + sum_stat(&dst->bw_stat[0], &src->bw_stat[l], nr); + + dst->io_bytes[0] += src->io_bytes[l]; + + if (dst->runtime[0] < src->runtime[l]) + dst->runtime[0] = src->runtime[l]; + } } dst->usr_time += src->usr_time; @@ -1091,14 +1108,24 @@ void sum_thread_stats(struct thread_stat *dst, struct thread_stat *src, int nr) dst->io_u_lat_m[k] += src->io_u_lat_m[k]; for (k = 0; k < DDIR_RWDIR_CNT; k++) { - dst->total_io_u[k] += src->total_io_u[k]; - dst->short_io_u[k] += src->short_io_u[k]; + if (!dst->unified_rw_rep) { + dst->total_io_u[k] += src->total_io_u[k]; + dst->short_io_u[k] += src->short_io_u[k]; + } else { + dst->total_io_u[0] += src->total_io_u[k]; + dst->short_io_u[0] += src->short_io_u[k]; + } } for (k = 0; k < DDIR_RWDIR_CNT; k++) { int m; - for (m = 0; m < FIO_IO_U_PLAT_NR; m++) - dst->io_u_plat[k][m] += src->io_u_plat[k][m]; + + for (m = 0; m < FIO_IO_U_PLAT_NR; m++) { + if (!dst->unified_rw_rep) + dst->io_u_plat[k][m] += src->io_u_plat[k][m]; + else + dst->io_u_plat[0][m] += src->io_u_plat[k][m]; + } } dst->total_run_time += src->total_run_time; @@ -1210,6 +1237,7 @@ void show_run_stats(void) ts->pid = td->pid; ts->kb_base = td->o.kb_base; + ts->unified_rw_rep = td->o.unified_rw_rep; } else if (ts->kb_base != td->o.kb_base && !kb_base_warned) { log_info("fio: kb_base differs for jobs in group, using" " %u as the base\n", ts->kb_base); @@ -1239,6 +1267,7 @@ void show_run_stats(void) ts = &threadstats[i]; rs = &runstats[ts->groupid]; rs->kb_base = ts->kb_base; + rs->unified_rw_rep += ts->unified_rw_rep; for (j = 0; j < DDIR_RWDIR_CNT; j++) { if (!ts->runtime[j]) @@ -1307,6 +1336,8 @@ void show_run_stats(void) /* disk util stats, if any */ show_disk_util(1, root); + show_idle_prof_stats(FIO_OUTPUT_JSON, root); + json_print_object(root); log_info("\n"); json_free_object(root); @@ -1327,6 +1358,8 @@ void show_run_stats(void) else if (output_format == FIO_OUTPUT_NORMAL) show_disk_util(0, NULL); + show_idle_prof_stats(FIO_OUTPUT_NORMAL, NULL); + free(runstats); free(threadstats); } diff --git a/stat.h b/stat.h index 4ca8261..97186c1 100644 --- a/stat.h +++ b/stat.h @@ -8,6 +8,7 @@ struct group_run_stats { uint64_t agg[DDIR_RWDIR_CNT]; uint32_t kb_base; uint32_t groupid; + uint32_t unified_rw_rep; }; /* @@ -120,6 +121,7 @@ struct thread_stat { uint32_t pid; char description[FIO_JOBNAME_SIZE]; uint32_t members; + uint32_t unified_rw_rep; /* * bandwidth and latency stats diff --git a/t/log.c b/t/log.c index ac02303..76ae68e 100644 --- a/t/log.c +++ b/t/log.c @@ -10,6 +10,7 @@ int log_err(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); return fwrite(buffer, len, 1, stderr); } @@ -23,6 +24,7 @@ int log_info(const char *format, ...) va_start(args, format); len = vsnprintf(buffer, sizeof(buffer), format, args); va_end(args); + len = min(len, sizeof(buffer) - 1); return fwrite(buffer, len, 1, stdout); } -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html