The following changes since commit d1c46c049cfba2028abc45246e2609bcee52d0f3: Add support for redirection replay of blktrace traces to another device (2010-08-31 21:20:47 +0200) are available in the git repository at: git://git.kernel.dk/fio.git master Jens Axboe (2): Add verify trim support Remove debug TRIM #error Makefile | 4 +- blktrace.c | 6 +++- fio.c | 1 + fio.h | 18 ++++++++++-- init.c | 1 + io_u.c | 59 +++++++++++++++++++++++++++++++++-------- ioengine.h | 1 + ioengines.c | 3 +- iolog.h | 14 ++++++++++ log.c | 16 ++++++++++- options.c | 45 +++++++++++++++++++++++++++++++- stat.c | 8 +++-- trim.c | 84 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ trim.h | 36 +++++++++++++++++++++++++ verify.c | 47 +++++++++++++++++++++++++++++++- 15 files changed, 315 insertions(+), 28 deletions(-) create mode 100644 trim.c create mode 100644 trim.h --- Diff of recent changes: diff --git a/Makefile b/Makefile index 288480f..9fec137 100644 --- a/Makefile +++ b/Makefile @@ -7,7 +7,7 @@ SCRIPTS = fio_generate_plots OBJS = gettime.o fio.o ioengines.o init.o stat.o log.o time.o filesetup.o \ eta.o verify.o memory.o io_u.o parse.o mutex.o options.o \ rbtree.o diskutil.o fifo.o blktrace.o smalloc.o filehash.o helpers.o \ - cgroup.o profile.o debug.o + cgroup.o profile.o debug.o trim.o OBJS += lib/rand.o OBJS += lib/flist_sort.o @@ -62,7 +62,7 @@ $(PROGS): depend all: depend $(PROGS) $(SCRIPTS) clean: - -rm -f .depend cscope.out $(OBJS) $(PROGS) core.* core + -rm -f .depend $(OBJS) $(PROGS) core.* core cscope: @cscope -b -R diff --git a/blktrace.c b/blktrace.c index 9ce4ae2..297a8a9 100644 --- a/blktrace.c +++ b/blktrace.c @@ -168,6 +168,7 @@ static void trace_add_open_event(struct thread_data *td, int fileno) struct io_piece *ipo; ipo = calloc(1, sizeof(*ipo)); + init_ipo(ipo); ipo->ddir = DDIR_INVAL; ipo->fileno = fileno; @@ -215,8 +216,8 @@ static void store_ipo(struct thread_data *td, unsigned long long offset, { struct io_piece *ipo = malloc(sizeof(*ipo)); - memset(ipo, 0, sizeof(*ipo)); - INIT_FLIST_HEAD(&ipo->list); + init_ipo(ipo); + /* * the 512 is wrong here, it should be the hardware sector size... */ @@ -256,6 +257,7 @@ static void handle_trace_discard(struct thread_data *td, struct blk_io_trace *t, { struct io_piece *ipo = malloc(sizeof(*ipo)); + init_ipo(ipo); trace_add_file(td, t->device); ios[DDIR_WRITE]++; diff --git a/fio.c b/fio.c index 1d20cf7..c8de2ee 100644 --- a/fio.c +++ b/fio.c @@ -1022,6 +1022,7 @@ static void *thread_main(void *data) INIT_FLIST_HEAD(&td->io_log_list); INIT_FLIST_HEAD(&td->io_hist_list); INIT_FLIST_HEAD(&td->verify_list); + INIT_FLIST_HEAD(&td->trim_list); pthread_mutex_init(&td->io_u_lock, NULL); td->io_hist_tree = RB_ROOT; diff --git a/fio.h b/fio.h index 729604d..e8c025d 100644 --- a/fio.h +++ b/fio.h @@ -120,8 +120,8 @@ struct thread_stat { unsigned int io_u_complete[FIO_IO_U_MAP_NR]; unsigned int io_u_lat_u[FIO_IO_U_LAT_U_NR]; unsigned int io_u_lat_m[FIO_IO_U_LAT_M_NR]; - unsigned long total_io_u[2]; - unsigned long short_io_u[2]; + unsigned long total_io_u[3]; + unsigned long short_io_u[3]; unsigned long total_submit; unsigned long total_complete; @@ -255,6 +255,10 @@ struct thread_options { unsigned int gtod_offload; enum fio_cs clocksource; unsigned int no_stall; + unsigned int trim_percentage; + unsigned int trim_batch; + unsigned int trim_zero; + unsigned long long trim_backlog; char *read_iolog_file; char *write_iolog_file; @@ -347,12 +351,14 @@ struct thread_data { char *sysfs_root; - unsigned long rand_seeds[6]; + unsigned long rand_seeds[7]; os_random_state_t bsrange_state; os_random_state_t verify_state; + os_random_state_t trim_state; unsigned int verify_batch; + unsigned int trim_batch; int shm_id; @@ -437,6 +443,12 @@ struct thread_data { struct flist_head io_log_list; /* + * For tracking/handling discards + */ + struct flist_head trim_list; + unsigned long trim_entries; + + /* * for fileservice, how often to switch to a new file */ unsigned int file_service_nr; diff --git a/init.c b/init.c index f0ee37a..fe4dbf2 100644 --- a/init.c +++ b/init.c @@ -447,6 +447,7 @@ void td_fill_rand_seeds(struct thread_data *td) os_random_seed(td->rand_seeds[3], &td->next_file_state); os_random_seed(td->rand_seeds[5], &td->file_size_state); + os_random_seed(td->rand_seeds[6], &td->trim_state); if (!td_random(td)) return; diff --git a/io_u.c b/io_u.c index 21a801f..ea0d46c 100644 --- a/io_u.c +++ b/io_u.c @@ -8,6 +8,7 @@ #include "fio.h" #include "hash.h" #include "verify.h" +#include "trim.h" #include "lib/rand.h" struct io_completion_data { @@ -982,21 +983,31 @@ again: return io_u; } -/* - * Return an io_u to be processed. Gets a buflen and offset, sets direction, - * etc. The returned io_u is fully ready to be prepped and submitted. - */ -struct io_u *get_io_u(struct thread_data *td) +static int check_get_trim(struct thread_data *td, struct io_u *io_u) { - struct fio_file *f; - struct io_u *io_u; + if (td->o.trim_backlog && td->trim_entries) { + int get_trim = 0; - io_u = __get_io_u(td); - if (!io_u) { - dprint(FD_IO, "__get_io_u failed\n"); - return NULL; + if (td->trim_batch) { + td->trim_batch--; + get_trim = 1; + } else if (!(td->io_hist_len % td->o.trim_backlog) && + td->last_ddir != DDIR_READ) { + td->trim_batch = td->o.trim_batch; + if (!td->trim_batch) + td->trim_batch = td->o.trim_backlog; + get_trim = 1; + } + + if (get_trim && !get_next_trim(td, io_u)) + return 1; } + return 0; +} + +static int check_get_verify(struct thread_data *td, struct io_u *io_u) +{ if (td->o.verify_backlog && td->io_hist_len) { int get_verify = 0; @@ -1012,9 +1023,32 @@ struct io_u *get_io_u(struct thread_data *td) } if (get_verify && !get_next_verify(td, io_u)) - goto out; + return 1; } + return 0; +} + +/* + * Return an io_u to be processed. Gets a buflen and offset, sets direction, + * etc. The returned io_u is fully ready to be prepped and submitted. + */ +struct io_u *get_io_u(struct thread_data *td) +{ + struct fio_file *f; + struct io_u *io_u; + + io_u = __get_io_u(td); + if (!io_u) { + dprint(FD_IO, "__get_io_u failed\n"); + return NULL; + } + + if (check_get_verify(td, io_u)) + goto out; + if (check_get_trim(td, io_u)) + goto out; + /* * from a requeue, io_u already setup */ @@ -1064,6 +1098,7 @@ struct io_u *get_io_u(struct thread_data *td) io_u->xfer_buflen = io_u->buflen; out: + assert(io_u->file); if (!td_io_prep(td, io_u)) { if (!td->o.disable_slat) fio_gettime(&io_u->start_time, NULL); diff --git a/ioengine.h b/ioengine.h index 389e95a..f6238f8 100644 --- a/ioengine.h +++ b/ioengine.h @@ -9,6 +9,7 @@ enum { IO_U_F_FREE_DEF = 1 << 2, IO_U_F_IN_CUR_DEPTH = 1 << 3, IO_U_F_BUSY_OK = 1 << 4, + IO_U_F_TRIMMED = 1 << 5, }; /* diff --git a/ioengines.c b/ioengines.c index f976efb..7df0aba 100644 --- a/ioengines.c +++ b/ioengines.c @@ -269,7 +269,8 @@ int td_io_queue(struct thread_data *td, struct io_u *io_u) if (ddir_rw(io_u->ddir)) { io_u_mark_depth(td, 1); td->ts.total_io_u[io_u->ddir]++; - } + } else if (io_u->ddir == DDIR_TRIM) + td->ts.total_io_u[2]++; } else if (ret == FIO_Q_QUEUED) { int r; diff --git a/iolog.h b/iolog.h index 2a97e28..c59e6aa 100644 --- a/iolog.h +++ b/iolog.h @@ -32,6 +32,12 @@ struct io_log { struct io_sample *log; }; +enum { + IP_F_ONRB = 1, + IP_F_ONLIST = 2, + IP_F_TRIMMED = 4, +}; + /* * When logging io actions, this matches a single sent io_u */ @@ -40,12 +46,14 @@ struct io_piece { struct rb_node rb_node; struct flist_head list; }; + struct flist_head trim_list; union { int fileno; struct fio_file *file; }; unsigned long long offset; unsigned long len; + unsigned long flags; enum fio_ddir ddir; union { unsigned long delay; @@ -95,4 +103,10 @@ extern struct io_log *agg_io_log[2]; extern int write_bw_log; extern void add_agg_sample(unsigned long, enum fio_ddir, unsigned int); +static inline void init_ipo(struct io_piece *ipo) +{ + memset(ipo, 0, sizeof(*ipo)); + INIT_FLIST_HEAD(&ipo->trim_list); +} + #endif diff --git a/log.c b/log.c index ce4ac9f..266dc06 100644 --- a/log.c +++ b/log.c @@ -9,6 +9,7 @@ #include "flist.h" #include "fio.h" #include "verify.h" +#include "trim.h" static const char iolog_ver2[] = "fio version 2 iolog"; @@ -115,6 +116,7 @@ int read_iolog_get(struct thread_data *td, struct io_u *io_u) ipo = flist_entry(td->io_log_list.next, struct io_piece, list); flist_del(&ipo->list); + remove_trim_entry(td, ipo); ret = ipo_special(td, ipo); if (ret < 0) { @@ -160,6 +162,7 @@ void prune_io_piece_log(struct thread_data *td) while ((n = rb_first(&td->io_hist_tree)) != NULL) { ipo = rb_entry(n, struct io_piece, rb_node); rb_erase(n, &td->io_hist_tree); + remove_trim_entry(td, ipo); td->io_hist_len--; free(ipo); } @@ -167,6 +170,7 @@ void prune_io_piece_log(struct thread_data *td) while (!flist_empty(&td->io_hist_list)) { ipo = flist_entry(td->io_hist_list.next, struct io_piece, list); flist_del(&ipo->list); + remove_trim_entry(td, ipo); td->io_hist_len--; free(ipo); } @@ -181,10 +185,16 @@ void log_io_piece(struct thread_data *td, struct io_u *io_u) struct io_piece *ipo, *__ipo; ipo = malloc(sizeof(struct io_piece)); + init_ipo(ipo); ipo->file = io_u->file; ipo->offset = io_u->offset; ipo->len = io_u->buflen; + if (io_u_should_trim(td, io_u)) { + flist_add_tail(&ipo->trim_list, &td->trim_list); + td->trim_entries++; + } + /* * We don't need to sort the entries, if: * @@ -203,6 +213,7 @@ void log_io_piece(struct thread_data *td, struct io_u *io_u) (file_randommap(td, ipo->file) || td->o.verify == VERIFY_NONE)) { INIT_FLIST_HEAD(&ipo->list); flist_add_tail(&ipo->list, &td->io_hist_list); + ipo->flags |= IP_F_ONLIST; td->io_hist_len++; return; } @@ -231,6 +242,7 @@ restart: assert(ipo->len == __ipo->len); td->io_hist_len--; rb_erase(parent, &td->io_hist_tree); + remove_trim_entry(td, __ipo); free(__ipo); goto restart; } @@ -238,6 +250,7 @@ restart: rb_link_node(&ipo->rb_node, parent, p); rb_insert_color(&ipo->rb_node, &td->io_hist_tree); + ipo->flags |= IP_F_ONRB; td->io_hist_len++; } @@ -345,8 +358,7 @@ static int read_iolog2(struct thread_data *td, FILE *f) * Make note of file */ ipo = malloc(sizeof(*ipo)); - memset(ipo, 0, sizeof(*ipo)); - INIT_FLIST_HEAD(&ipo->list); + init_ipo(ipo); ipo->ddir = rw; if (rw == DDIR_WAIT) { ipo->delay = offset; diff --git a/options.c b/options.c index 3d32c8e..e255e94 100644 --- a/options.c +++ b/options.c @@ -440,6 +440,16 @@ static int str_verify_cpus_allowed_cb(void *data, const char *input) } #endif +#ifdef FIO_HAVE_TRIM +static int str_verify_trim_cb(void *data, unsigned long long *val) +{ + struct thread_data *td = data; + + td->o.trim_percentage = *val; + return 0; +} +#endif + static int str_fst_cb(void *data, const char *str) { struct thread_data *td = data; @@ -1458,7 +1468,7 @@ static struct fio_option options[FIO_MAX_OPTS] = { .type = FIO_OPT_INT, .off1 = td_var_offset(verify_batch), .help = "Verify this number of IO blocks", - .parent = "verify_backlog", + .parent = "verify", }, #ifdef FIO_HAVE_CPU_AFFINITY { @@ -1469,6 +1479,39 @@ static struct fio_option options[FIO_MAX_OPTS] = { .parent = "verify_async", }, #endif +#ifdef FIO_HAVE_TRIM + { + .name = "trim_percentage", + .type = FIO_OPT_INT, + .cb = str_verify_trim_cb, + .maxval = 100, + .help = "Number of verify blocks to discard/trim", + .parent = "verify", + .def = "0", + }, + { + .name = "trim_verify_zero", + .type = FIO_OPT_INT, + .help = "Verify that trim/discarded blocks are returned as zeroes", + .off1 = td_var_offset(trim_zero), + .parent = "trim_percentage", + .def = "1", + }, + { + .name = "trim_backlog", + .type = FIO_OPT_STR_VAL, + .off1 = td_var_offset(trim_backlog), + .help = "Trim after this number of blocks are written", + .parent = "trim_percentage", + }, + { + .name = "trim_backlog_batch", + .type = FIO_OPT_INT, + .off1 = td_var_offset(trim_batch), + .help = "Trim this number of IO blocks", + .parent = "trim_percentage", + }, +#endif { .name = "write_iolog", .type = FIO_OPT_STR_STORE, diff --git a/stat.c b/stat.c index 8e9fba0..326b1f7 100644 --- a/stat.c +++ b/stat.c @@ -351,9 +351,11 @@ static void show_thread_status(struct thread_stat *ts, io_u_dist[1], io_u_dist[2], io_u_dist[3], io_u_dist[4], io_u_dist[5], io_u_dist[6]); - log_info(" issued r/w: total=%lu/%lu, short=%lu/%lu\n", + log_info(" issued r/w/d: total=%lu/%lu/%lu, short=%lu/%lu/%lu\n", ts->total_io_u[0], ts->total_io_u[1], - ts->short_io_u[0], ts->short_io_u[1]); + ts->total_io_u[2], + ts->short_io_u[0], ts->short_io_u[1], + ts->short_io_u[2]); stat_calc_lat_u(ts, io_u_lat_u); stat_calc_lat_m(ts, io_u_lat_m); show_latencies(io_u_lat_u, io_u_lat_m); @@ -615,7 +617,7 @@ void show_run_stats(void) ts->io_u_lat_m[k] += td->ts.io_u_lat_m[k]; - for (k = 0; k <= DDIR_WRITE; k++) { + for (k = 0; k <= 2; k++) { ts->total_io_u[k] += td->ts.total_io_u[k]; ts->short_io_u[k] += td->ts.short_io_u[k]; } diff --git a/trim.c b/trim.c new file mode 100644 index 0000000..cf42625 --- /dev/null +++ b/trim.c @@ -0,0 +1,84 @@ +/* + * TRIM/DISCARD support + */ +#include <unistd.h> +#include <fcntl.h> +#include <string.h> +#include <assert.h> +#include <pthread.h> + +#include "fio.h" +#include "trim.h" + +#ifdef FIO_HAVE_TRIM +int get_next_trim(struct thread_data *td, struct io_u *io_u) +{ + struct io_piece *ipo; + + /* + * this io_u is from a requeue, we already filled the offsets + */ + if (io_u->file) + return 0; + if (flist_empty(&td->trim_list)) + return 0; + + assert(td->trim_entries); + ipo = flist_entry(td->trim_list.next, struct io_piece, trim_list); + remove_trim_entry(td, ipo); + ipo->flags |= IP_F_TRIMMED; + + /* + * If not verifying that trimmed ranges return zeroed data, + * remove this from the to-read verify lists + */ + if (!td->o.trim_zero) { + if (ipo->flags & IP_F_ONLIST) + flist_del(&ipo->list); + else { + assert(ipo->flags & IP_F_ONRB); + rb_erase(&ipo->rb_node, &td->io_hist_tree); + } + td->io_hist_len--; + } + + io_u->offset = ipo->offset; + io_u->buflen = ipo->len; + io_u->file = ipo->file; + + if (!fio_file_open(io_u->file)) { + int r = td_io_open_file(td, io_u->file); + + if (r) { + dprint(FD_VERIFY, "failed file %s open\n", + io_u->file->file_name); + return 1; + } + } + + get_file(ipo->file); + assert(fio_file_open(io_u->file)); + io_u->ddir = DDIR_TRIM; + io_u->xfer_buf = NULL; + io_u->xfer_buflen = io_u->buflen; + + free(ipo); + dprint(FD_VERIFY, "get_next_trim: ret io_u %p\n", io_u); + return 0; +} + +int io_u_should_trim(struct thread_data *td, struct io_u *io_u) +{ + unsigned long long val; + long r; + + if (!td->o.trim_percentage) + return 0; + + r = os_random_long(&td->trim_state); + val = (OS_RAND_MAX / 100ULL); + val *= (unsigned long long) td->o.trim_percentage; + + return r <= val; +} +#endif diff --git a/trim.h b/trim.h new file mode 100644 index 0000000..6584606 --- /dev/null +++ b/trim.h @@ -0,0 +1,36 @@ +#ifndef FIO_TRIM_H +#define FIO_TRIM_H + +#include "fio.h" + +#ifdef FIO_HAVE_TRIM +extern int __must_check get_next_trim(struct thread_data *td, struct io_u *io_u); +extern int io_u_should_trim(struct thread_data *td, struct io_u *io_u); + +/* + * Determine whether a given io_u should be logged for verify or + * for discard + */ +static inline void remove_trim_entry(struct thread_data *td, struct io_piece *ipo) +{ + if (!flist_empty(&ipo->trim_list)) { + flist_del_init(&ipo->trim_list); + td->trim_entries--; + } +} + +#else +static inline int get_next_trim(struct thread_data *td, struct io_u *io_u) +{ + return 1; +} +static inline int io_u_should_trim(struct thread_data *td, struct io_u *io_u) +{ + return 0; +} +static inline void remove_trim_entry(struct thread_data *td, struct io_piece *ipo) +{ +} +#endif + +#endif diff --git a/verify.c b/verify.c index 7957bd4..073eec5 100644 --- a/verify.c +++ b/verify.c @@ -10,6 +10,7 @@ #include "fio.h" #include "verify.h" #include "smalloc.h" +#include "trim.h" #include "lib/rand.h" #include "crc/md5.h" @@ -470,6 +471,38 @@ int verify_io_u_async(struct thread_data *td, struct io_u *io_u) return 0; } +static int verify_trimmed_io_u(struct thread_data *td, struct io_u *io_u) +{ + static char zero_buf[1024]; + unsigned int this_len, len; + int ret = 0; + void *p; + + if (!td->o.trim_zero) + return 0; + + len = io_u->buflen; + p = io_u->buf; + do { + this_len = sizeof(zero_buf); + if (this_len > len) + this_len = len; + if (memcmp(p, zero_buf, this_len)) { + ret = EILSEQ; + break; + } + len -= this_len; + p += this_len; + } while (len); + + if (!ret) + return 0; + + log_err("trims: verify failed at file %s offset %llu, length %lu\n", + io_u->file->file_name, io_u->offset, io_u->buflen); + return ret; +} + int verify_io_u(struct thread_data *td, struct io_u *io_u) { struct verify_header *hdr; @@ -479,6 +512,10 @@ int verify_io_u(struct thread_data *td, struct io_u *io_u) if (td->o.verify == VERIFY_NULL || io_u->ddir != DDIR_READ) return 0; + if (io_u->flags & IO_U_F_TRIMMED) { + ret = verify_trimmed_io_u(td, io_u); + goto done; + } hdr_inc = io_u->buflen; if (td->o.verify_interval) @@ -570,6 +607,7 @@ int verify_io_u(struct thread_data *td, struct io_u *io_u) } } +done: if (ret && td->o.verify_fatal) td->terminate = 1; @@ -778,18 +816,21 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u) ipo = rb_entry(n, struct io_piece, rb_node); rb_erase(n, &td->io_hist_tree); - td->io_hist_len--; } else if (!flist_empty(&td->io_hist_list)) { ipo = flist_entry(td->io_hist_list.next, struct io_piece, list); - td->io_hist_len--; flist_del(&ipo->list); } if (ipo) { + td->io_hist_len--; + io_u->offset = ipo->offset; io_u->buflen = ipo->len; io_u->file = ipo->file; + if (ipo->flags & IP_F_TRIMMED) + io_u->flags |= IO_U_F_TRIMMED; + if (!fio_file_open(io_u->file)) { int r = td_io_open_file(td, io_u->file); @@ -805,6 +846,8 @@ int get_next_verify(struct thread_data *td, struct io_u *io_u) io_u->ddir = DDIR_READ; io_u->xfer_buf = io_u->buf; io_u->xfer_buflen = io_u->buflen; + + remove_trim_entry(td, ipo); free(ipo); dprint(FD_VERIFY, "get_next_verify: ret io_u %p\n", io_u); return 0; -- To unsubscribe from this list: send the line "unsubscribe fio" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html