Hi Jens, I made changes to fio so we wld re-use the already populated io_u buffer (when there is a non-random pattern) during writes. That way only the header will be re-calculated for every I/O. This way the buffer wld get populated in the beginning and as long as the subsequent ios using the same io_u structure are writes and have same or less block size, it wld get re-used. If any of the subsequent i/o is a read or has a block size greater than the pre-filled one, then the buffer is invalidated and will be re-filled at the next write. Reason for this risky change: (Performance) I tested this change on a tmpfs(with no swap backing), with the following config file: [sscan_write] filename=/mytmpfs/datafile.tmp rw=write bs=64k size=3G ioengine=libaio iodepth=1024 iodepth_low=512 runtime=10800 bwavgtime=5000 thread=1 do_verify=0 verify=meta verify_pattern=0x55aaa55a verify_interval=4k continue_on_error=1 fio-1-41-6 gave 306MB/s and the new change had a performance of 1546MB/s Side effects/Risks: There is a risk with this fix, that if the buffer gets corrupted then the subsequent writes will also be corrupt. I think for both sequential writes and random writes (with verify, where the I/O log is replayed) we shld be able to find the first I/O that started with the corruption and if the buffer is getting corrupted, there are other issues here. Testing: I have tested this fix with sequential write(verify)/random read write mix combination(with verify). I think I have taken care of most of the case, but please let me know if there is anything I have missed. I have attached the patch along with this email. I think the performance improvement outweighs the risk associated with the fix. But I will let you decide if you wld like to pick it up. thanks -radha
diff --git a/fio.c b/fio.c index 2dab64e..896f797 100644 --- a/fio.c +++ b/fio.c @@ -831,6 +831,13 @@ static int init_io_u(struct thread_data *td) if (td_write(td) && !td->o.refill_buffers) io_u_fill_buffer(td, io_u, max_bs); + else if (td_write(td) && td->o.verify_pattern_bytes) { + /* + * Fill the buffer with the pattern if we are + * going to be doing writes. + */ + fill_pattern(td, io_u->buf, max_bs, io_u); + } } io_u->index = i; diff --git a/io_u.c b/io_u.c index b2b7230..dc4473b 100644 --- a/io_u.c +++ b/io_u.c @@ -983,6 +983,14 @@ struct io_u *get_io_u(struct thread_data *td) populate_verify_io_u(td, io_u); else if (td->o.refill_buffers && io_u->ddir == DDIR_WRITE) io_u_fill_buffer(td, io_u, io_u->xfer_buflen); + else if (io_u->ddir == DDIR_READ) { + /* + * Reset the buf_filled parameters so next time if the + * buffer is used for writes it is refilled. + */ + io_u->buf_filled = 0; + io_u->buf_filled_len = 0; + } } /* diff --git a/ioengine.h b/ioengine.h index 91dd429..b599b61 100644 --- a/ioengine.h +++ b/ioengine.h @@ -43,6 +43,13 @@ struct io_u { unsigned long long offset; /* + * Parameters related to pre-filled buffers and + * their size to handle variable block sizes. + */ + int buf_filled; + unsigned long buf_filled_len; + + /* * IO engine state, may be different from above when we get * partial transfers / residual data counts */ diff --git a/verify.c b/verify.c index 265bd55..73c1262 100644 --- a/verify.c +++ b/verify.c @@ -22,7 +22,7 @@ #include "crc/sha512.h" #include "crc/sha1.h" -static void fill_pattern(struct thread_data *td, void *p, unsigned int len) +void fill_pattern(struct thread_data *td, void *p, unsigned int len, struct io_u *io_u) { switch (td->o.verify_pattern_bytes) { case 0: @@ -30,13 +30,24 @@ static void fill_pattern(struct thread_data *td, void *p, unsigned int len) fill_random_buf(p, len); break; case 1: + if (io_u->buf_filled && io_u->buf_filled_len >= len) { + dprint(FD_VERIFY, "using already filled verify pattern b=0 len=%u\n", len); + return; + } dprint(FD_VERIFY, "fill verify pattern b=0 len=%u\n", len); memset(p, td->o.verify_pattern[0], len); + io_u->buf_filled = 1; + io_u->buf_filled_len = len; break; default: { unsigned int i = 0, size = 0; unsigned char *b = p; + if (io_u->buf_filled && io_u->buf_filled_len >= len) { + dprint(FD_VERIFY, "using already filled verify pattern b=%d len=%u\n", + td->o.verify_pattern_bytes, len); + return; + } dprint(FD_VERIFY, "fill verify pattern b=%d len=%u\n", td->o.verify_pattern_bytes, len); @@ -47,6 +58,8 @@ static void fill_pattern(struct thread_data *td, void *p, unsigned int len) memcpy(b+i, td->o.verify_pattern, size); i += size; } + io_u->buf_filled = 1; + io_u->buf_filled_len = len; break; } } @@ -675,7 +688,7 @@ void populate_verify_io_u(struct thread_data *td, struct io_u *io_u) if (td->o.verify == VERIFY_NULL) return; - fill_pattern(td, p, io_u->buflen); + fill_pattern(td, p, io_u->buflen, io_u); hdr_inc = io_u->buflen; if (td->o.verify_interval) diff --git a/verify.h b/verify.h index a4a8cfe..29c4b46 100644 --- a/verify.h +++ b/verify.h @@ -69,6 +69,7 @@ extern void populate_verify_io_u(struct thread_data *, struct io_u *); extern int __must_check get_next_verify(struct thread_data *td, struct io_u *); extern int __must_check verify_io_u(struct thread_data *, struct io_u *); extern int verify_io_u_async(struct thread_data *, struct io_u *); +extern void fill_pattern(struct thread_data *td, void *p, unsigned int len, struct io_u *io_u); /* * Async verify offload