The following changes since commit 5f2d43188c2d65674aaba6280e2a87107e5d7099: Merge branch 'fix/json/strdup_memory_leak' of https://github.com/dpronin/fio (2022-04-17 16:47:22 -0600) are available in the Git repository at: git://git.kernel.dk/fio.git master for you to fetch changes up to 6e594a2fa8388892dffb2ffc9b865689e2d67833: Merge branch 'global_dedup' of https://github.com/bardavid/fio (2022-04-29 16:30:50 -0600) ---------------------------------------------------------------- Bar David (2): Introducing support for generation of dedup buffers across jobs. The dedup buffers are spread evenly between the jobs that enabled the dedupe_global option adding an example for dedupe_global usage and DRR testing Jens Axboe (1): Merge branch 'global_dedup' of https://github.com/bardavid/fio HOWTO.rst | 6 +++++ backend.c | 5 ++++ cconv.c | 2 ++ dedupe.c | 46 +++++++++++++++++++++++++++++++++---- dedupe.h | 3 ++- examples/dedupe-global.fio | 57 ++++++++++++++++++++++++++++++++++++++++++++++ fio.1 | 9 ++++++++ init.c | 2 +- options.c | 10 ++++++++ server.h | 2 +- thread_options.h | 3 +++ 11 files changed, 138 insertions(+), 7 deletions(-) create mode 100644 examples/dedupe-global.fio --- Diff of recent changes: diff --git a/HOWTO.rst b/HOWTO.rst index a5fa432e..6a3e09f5 100644 --- a/HOWTO.rst +++ b/HOWTO.rst @@ -1749,6 +1749,12 @@ Buffers and memory Note that size needs to be explicitly provided and only 1 file per job is supported +.. option:: dedupe_global=bool + + This controls whether the deduplication buffers will be shared amongst + all jobs that have this option set. The buffers are spread evenly between + participating jobs. + .. option:: invalidate=bool Invalidate the buffer/page cache parts of the files to be used prior to diff --git a/backend.c b/backend.c index 317e4f6c..ffbb7e2a 100644 --- a/backend.c +++ b/backend.c @@ -2570,6 +2570,11 @@ int fio_backend(struct sk_out *sk_out) setup_log(&agg_io_log[DDIR_TRIM], &p, "agg-trim_bw.log"); } + if (init_global_dedupe_working_set_seeds()) { + log_err("fio: failed to initialize global dedupe working set\n"); + return 1; + } + startup_sem = fio_sem_init(FIO_SEM_LOCKED); if (!sk_out) is_local_backend = true; diff --git a/cconv.c b/cconv.c index 62d02e36..6c36afb7 100644 --- a/cconv.c +++ b/cconv.c @@ -305,6 +305,7 @@ void convert_thread_options_to_cpu(struct thread_options *o, o->dedupe_percentage = le32_to_cpu(top->dedupe_percentage); o->dedupe_mode = le32_to_cpu(top->dedupe_mode); o->dedupe_working_set_percentage = le32_to_cpu(top->dedupe_working_set_percentage); + o->dedupe_global = le32_to_cpu(top->dedupe_global); o->block_error_hist = le32_to_cpu(top->block_error_hist); o->replay_align = le32_to_cpu(top->replay_align); o->replay_scale = le32_to_cpu(top->replay_scale); @@ -513,6 +514,7 @@ void convert_thread_options_to_net(struct thread_options_pack *top, top->dedupe_percentage = cpu_to_le32(o->dedupe_percentage); top->dedupe_mode = cpu_to_le32(o->dedupe_mode); top->dedupe_working_set_percentage = cpu_to_le32(o->dedupe_working_set_percentage); + top->dedupe_global = cpu_to_le32(o->dedupe_global); top->block_error_hist = cpu_to_le32(o->block_error_hist); top->replay_align = cpu_to_le32(o->replay_align); top->replay_scale = cpu_to_le32(o->replay_scale); diff --git a/dedupe.c b/dedupe.c index fd116dfb..8214a786 100644 --- a/dedupe.c +++ b/dedupe.c @@ -1,13 +1,37 @@ #include "fio.h" -int init_dedupe_working_set_seeds(struct thread_data *td) +/** + * initializes the global dedup workset. + * this needs to be called after all jobs' seeds + * have been initialized + */ +int init_global_dedupe_working_set_seeds(void) { - unsigned long long i, j, num_seed_advancements; + int i; + struct thread_data *td; + + for_each_td(td, i) { + if (!td->o.dedupe_global) + continue; + + if (init_dedupe_working_set_seeds(td, 1)) + return 1; + } + + return 0; +} + +int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedup) +{ + int tindex; + struct thread_data *td_seed; + unsigned long long i, j, num_seed_advancements, pages_per_seed; struct frand_state dedupe_working_set_state = {0}; if (!td->o.dedupe_percentage || !(td->o.dedupe_mode == DEDUPE_MODE_WORKING_SET)) return 0; + tindex = td->thread_number - 1; num_seed_advancements = td->o.min_bs[DDIR_WRITE] / min_not_zero(td->o.min_bs[DDIR_WRITE], (unsigned long long) td->o.compress_chunk); /* @@ -20,9 +44,11 @@ int init_dedupe_working_set_seeds(struct thread_data *td) log_err("fio: could not allocate dedupe working set\n"); return 1; } + frand_copy(&dedupe_working_set_state, &td->buf_state); - for (i = 0; i < td->num_unique_pages; i++) { - frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state); + frand_copy(&td->dedupe_working_set_states[0], &dedupe_working_set_state); + pages_per_seed = max(td->num_unique_pages / thread_number, 1ull); + for (i = 1; i < td->num_unique_pages; i++) { /* * When compression is used the seed is advanced multiple times to * generate the buffer. We want to regenerate the same buffer when @@ -30,6 +56,18 @@ int init_dedupe_working_set_seeds(struct thread_data *td) */ for (j = 0; j < num_seed_advancements; j++) __get_next_seed(&dedupe_working_set_state); + + /* + * When global dedup is used, we rotate the seeds to allow + * generating same buffers across different jobs. Deduplication buffers + * are spread evenly across jobs participating in global dedupe + */ + if (global_dedup && i % pages_per_seed == 0) { + td_seed = tnumber_to_td(++tindex % thread_number); + frand_copy(&dedupe_working_set_state, &td_seed->buf_state); + } + + frand_copy(&td->dedupe_working_set_states[i], &dedupe_working_set_state); } return 0; diff --git a/dedupe.h b/dedupe.h index d4c4dc37..bd1f9c0c 100644 --- a/dedupe.h +++ b/dedupe.h @@ -1,6 +1,7 @@ #ifndef DEDUPE_H #define DEDUPE_H -int init_dedupe_working_set_seeds(struct thread_data *td); +int init_dedupe_working_set_seeds(struct thread_data *td, bool global_dedupe); +int init_global_dedupe_working_set_seeds(void); #endif diff --git a/examples/dedupe-global.fio b/examples/dedupe-global.fio new file mode 100644 index 00000000..edaaad55 --- /dev/null +++ b/examples/dedupe-global.fio @@ -0,0 +1,57 @@ +# Writing to 2 files that share the duplicate blocks. +# The dedupe working set is spread uniformly such that when +# each of the jobs choose to perform a dedup operation they will +# regenerate a buffer from the global space. +# If you test the dedup ratio on either file by itself the result +# is likely lower than if you test the ratio of the two files combined. +# +# Use `./t/fio-dedupe <file> -C 1 -c 1 -b 4096` to test the total +# data reduction ratio. +# +# +# Full example of test: +# $ ./fio ./examples/dedupe-global.fio +# +# Checking ratio on a and b individually: +# $ ./t/fio-dedupe a.0.0 -C 1 -c 1 -b 4096 +# +# $ Extents=25600, Unique extents=16817 Duplicated extents=5735 +# $ De-dupe ratio: 1:0.52 +# $ De-dupe working set at least: 22.40% +# $ Fio setting: dedupe_percentage=34 +# $ Unique capacity 33MB +# +# ./t/fio-dedupe b.0.0 -C 1 -c 1 -b 4096 +# $ Extents=25600, Unique extents=17009 Duplicated extents=5636 +# $ De-dupe ratio: 1:0.51 +# $ De-dupe working set at least: 22.02% +# $ Fio setting: dedupe_percentage=34 +# $ Unique capacity 34MB +# +# Combining files: +# $ cat a.0.0 > c.0.0 +# $ cat b.0.0 >> c.0.0 +# +# Checking data reduction ratio on combined file: +# $ ./t/fio-dedupe c.0.0 -C 1 -c 1 -b 4096 +# $ Extents=51200, Unique extents=25747 Duplicated extents=11028 +# $ De-dupe ratio: 1:0.99 +# $ De-dupe working set at least: 21.54% +# $ Fio setting: dedupe_percentage=50 +# $ Unique capacity 51MB +# +[global] +ioengine=libaio +iodepth=256 +size=100m +dedupe_mode=working_set +dedupe_global=1 +dedupe_percentage=50 +blocksize=4k +rw=write +buffer_compress_percentage=50 +dedupe_working_set_percentage=50 + +[a] + +[b] diff --git a/fio.1 b/fio.1 index a2ec836f..609947dc 100644 --- a/fio.1 +++ b/fio.1 @@ -1553,6 +1553,15 @@ Note that \fBsize\fR needs to be explicitly provided and only 1 file per job is supported .RE .TP +.BI dedupe_global \fR=\fPbool +This controls whether the deduplication buffers will be shared amongst +all jobs that have this option set. The buffers are spread evenly between +participating jobs. +.P +.RS +Note that \fBdedupe_mode\fR must be set to \fBworking_set\fR for this to work. +Can be used in combination with compression +.TP .BI invalidate \fR=\fPbool Invalidate the buffer/page cache parts of the files to be used prior to starting I/O if the platform and file type support it. Defaults to true. diff --git a/init.c b/init.c index 6f186051..f7d702f8 100644 --- a/init.c +++ b/init.c @@ -1541,7 +1541,7 @@ static int add_job(struct thread_data *td, const char *jobname, int job_add_num, if (fixup_options(td)) goto err; - if (init_dedupe_working_set_seeds(td)) + if (!td->o.dedupe_global && init_dedupe_working_set_seeds(td, 0)) goto err; /* diff --git a/options.c b/options.c index e06d9b66..3b83573b 100644 --- a/options.c +++ b/options.c @@ -4665,6 +4665,16 @@ struct fio_option fio_options[FIO_MAX_OPTS] = { .category = FIO_OPT_C_IO, .group = FIO_OPT_G_IO_BUF, }, + { + .name = "dedupe_global", + .lname = "Global deduplication", + .type = FIO_OPT_BOOL, + .off1 = offsetof(struct thread_options, dedupe_global), + .help = "Share deduplication buffers across jobs", + .def = "0", + .category = FIO_OPT_C_IO, + .group = FIO_OPT_G_IO_BUF, + }, { .name = "dedupe_mode", .lname = "Dedupe mode", diff --git a/server.h b/server.h index 0e62b6df..b0c5e2df 100644 --- a/server.h +++ b/server.h @@ -51,7 +51,7 @@ struct fio_net_cmd_reply { }; enum { - FIO_SERVER_VER = 96, + FIO_SERVER_VER = 97, FIO_SERVER_MAX_FRAGMENT_PDU = 1024, FIO_SERVER_MAX_CMD_MB = 2048, diff --git a/thread_options.h b/thread_options.h index 4162c42f..634070af 100644 --- a/thread_options.h +++ b/thread_options.h @@ -263,6 +263,7 @@ struct thread_options { unsigned int dedupe_percentage; unsigned int dedupe_mode; unsigned int dedupe_working_set_percentage; + unsigned int dedupe_global; unsigned int time_based; unsigned int disable_lat; unsigned int disable_clat; @@ -578,6 +579,7 @@ struct thread_options_pack { uint32_t dedupe_percentage; uint32_t dedupe_mode; uint32_t dedupe_working_set_percentage; + uint32_t dedupe_global; uint32_t time_based; uint32_t disable_lat; uint32_t disable_clat; @@ -596,6 +598,7 @@ struct thread_options_pack { uint32_t lat_percentiles; uint32_t slat_percentiles; uint32_t percentile_precision; + uint32_t pad5; fio_fp64_t percentile_list[FIO_IO_U_LIST_MAX_LEN]; uint8_t read_iolog_file[FIO_TOP_STR_MAX];