David Laight <David.Laight@xxxxxxxxxx> wrote: > > Move the iterator functions to a header file so that other operations that > > need to scan over an iterator can be added. For instance, the rbd driver > > could use this to scan a buffer to see if it is all zeros and libceph could > > use this to generate a crc. > > These all look a bit big for being more generally inlined. > > I know you want to avoid the indirect call in the normal cases, > but maybe it would be ok for other uses? So you'd advocate for something like: size_t generic_iterate(struct iov_iter *iter, size_t len, void *priv, void *priv2, iov_ustep_f ustep, iov_step_f step) { return iterate_and_advance2(iter, len, priv, priv2, ustep, step); } EXPORT_SYMBOL(generic_iterate); in lib/iov_iter.c and then call that from the places that want to use it? I tried benchmarking that (see attached patch - it needs to go on top of my iov patches). Running the insmod thrice and then filtering out and sorting the results: iov_kunit_benchmark_bvec: avg 3174 uS, stddev 68 uS iov_kunit_benchmark_bvec: avg 3176 uS, stddev 61 uS iov_kunit_benchmark_bvec: avg 3180 uS, stddev 64 uS iov_kunit_benchmark_bvec_outofline: avg 3678 uS, stddev 4 uS iov_kunit_benchmark_bvec_outofline: avg 3678 uS, stddev 5 uS iov_kunit_benchmark_bvec_outofline: avg 3679 uS, stddev 6 uS iov_kunit_benchmark_xarray: avg 3560 uS, stddev 5 uS iov_kunit_benchmark_xarray: avg 3560 uS, stddev 6 uS iov_kunit_benchmark_xarray: avg 3570 uS, stddev 16 uS iov_kunit_benchmark_xarray_outofline: avg 4125 uS, stddev 13 uS iov_kunit_benchmark_xarray_outofline: avg 4125 uS, stddev 2 uS iov_kunit_benchmark_xarray_outofline: avg 4125 uS, stddev 6 uS It adds almost 16% overhead: (gdb) p 4125/3560.0 $2 = 1.1587078651685394 (gdb) p 3678/3174.0 $3 = 1.1587901701323251 I'm guessing a lot of that is due to function pointer mitigations. Now, part of the code size expansion can be mitigated by using, say, iterate_and_advance_kernel() if you know you aren't going to encounter user-backed iterators, or even using, say, iterate_bvec() if you know you're only going to see a specific iterator type. David --- iov_iter: Benchmark out of line generic iterator diff --git a/include/linux/iov_iter.h b/include/linux/iov_iter.h index 2ebb86c041b6..8f562e80473b 100644 --- a/include/linux/iov_iter.h +++ b/include/linux/iov_iter.h @@ -293,4 +293,7 @@ size_t iterate_and_advance_kernel(struct iov_iter *iter, size_t len, void *priv, return progress; } +size_t generic_iterate(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f ustep, iov_step_f step); + #endif /* _LINUX_IOV_ITER_H */ diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 8f7a10c4a295..f9643dd02676 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -1684,3 +1684,10 @@ ssize_t iov_iter_extract_pages(struct iov_iter *i, return -EFAULT; } EXPORT_SYMBOL_GPL(iov_iter_extract_pages); + +size_t generic_iterate(struct iov_iter *iter, size_t len, void *priv, void *priv2, + iov_ustep_f ustep, iov_step_f step) +{ + return iterate_and_advance2(iter, len, priv, priv2, ustep, step); +} +EXPORT_SYMBOL(generic_iterate); diff --git a/lib/kunit_iov_iter.c b/lib/kunit_iov_iter.c index cc9c64663a73..f208516a68c9 100644 --- a/lib/kunit_iov_iter.c +++ b/lib/kunit_iov_iter.c @@ -18,6 +18,7 @@ #include <linux/writeback.h> #include <linux/uio.h> #include <linux/bvec.h> +#include <linux/iov_iter.h> #include <kunit/test.h> MODULE_DESCRIPTION("iov_iter testing"); @@ -1571,6 +1572,124 @@ static void __init iov_kunit_benchmark_xarray(struct kunit *test) KUNIT_SUCCEED(); } +static noinline +size_t shovel_to_user_iter(void __user *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + if (should_fail_usercopy()) + return len; + if (access_ok(iter_to, len)) { + from += progress; + instrument_copy_to_user(iter_to, from, len); + len = raw_copy_to_user(iter_to, from, len); + } + return len; +} + +static noinline +size_t shovel_to_kernel_iter(void *iter_to, size_t progress, + size_t len, void *from, void *priv2) +{ + memcpy(iter_to, from + progress, len); + return 0; +} + +/* + * Time copying 256MiB through an ITER_BVEC with an out-of-line copier + * function. + */ +static void __init iov_kunit_benchmark_bvec_outofline(struct kunit *test) +{ + struct iov_iter iter; + struct bio_vec *bvec; + struct page *page; + unsigned int samples[IOV_KUNIT_NR_SAMPLES]; + ktime_t a, b; + ssize_t copied; + size_t size = 256 * 1024 * 1024, npages = size / PAGE_SIZE; + void *scratch; + int i; + + /* Allocate a page and tile it repeatedly in the buffer. */ + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + kunit_add_action_or_reset(test, iov_kunit_free_page, page); + + bvec = kunit_kmalloc_array(test, npages, sizeof(bvec[0]), GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, bvec); + for (i = 0; i < npages; i++) + bvec_set_page(&bvec[i], page, PAGE_SIZE, 0); + + /* Create a single large buffer to copy to/from. */ + scratch = iov_kunit_create_source(test, npages); + + /* Perform and time a bunch of copies. */ + kunit_info(test, "Benchmarking copy_to_iter() over BVEC:\n"); + for (i = 0; i < IOV_KUNIT_NR_SAMPLES; i++) { + iov_iter_bvec(&iter, ITER_DEST, bvec, npages, size); + a = ktime_get_real(); + copied = generic_iterate(&iter, size, scratch, NULL, + shovel_to_user_iter, + shovel_to_kernel_iter); + b = ktime_get_real(); + KUNIT_EXPECT_EQ(test, copied, size); + samples[i] = ktime_to_us(ktime_sub(b, a)); + } + + iov_kunit_benchmark_print_stats(test, samples); + KUNIT_SUCCEED(); +} + +/* + * Time copying 256MiB through an ITER_XARRAY with an out-of-line copier + * function. + */ +static void __init iov_kunit_benchmark_xarray_outofline(struct kunit *test) +{ + struct iov_iter iter; + struct xarray *xarray; + struct page *page; + unsigned int samples[IOV_KUNIT_NR_SAMPLES]; + ktime_t a, b; + ssize_t copied; + size_t size = 256 * 1024 * 1024, npages = size / PAGE_SIZE; + void *scratch; + int i; + + /* Allocate a page and tile it repeatedly in the buffer. */ + page = alloc_page(GFP_KERNEL); + KUNIT_ASSERT_NOT_NULL(test, page); + kunit_add_action_or_reset(test, iov_kunit_free_page, page); + + xarray = iov_kunit_create_xarray(test); + + for (i = 0; i < npages; i++) { + void *x = xa_store(xarray, i, page, GFP_KERNEL); + + KUNIT_ASSERT_FALSE(test, xa_is_err(x)); + } + + /* Create a single large buffer to copy to/from. */ + scratch = iov_kunit_create_source(test, npages); + + /* Perform and time a bunch of copies. */ + kunit_info(test, "Benchmarking copy_to_iter() over XARRAY:\n"); + for (i = 0; i < IOV_KUNIT_NR_SAMPLES; i++) { + iov_iter_xarray(&iter, ITER_DEST, xarray, 0, size); + a = ktime_get_real(); + + copied = generic_iterate(&iter, size, scratch, NULL, + shovel_to_user_iter, + shovel_to_kernel_iter); + b = ktime_get_real(); + KUNIT_EXPECT_EQ(test, copied, size); + samples[i] = ktime_to_us(ktime_sub(b, a)); + } + + iov_kunit_benchmark_print_stats(test, samples); + KUNIT_SUCCEED(); +} + static struct kunit_case __refdata iov_kunit_cases[] = { KUNIT_CASE(iov_kunit_copy_to_ubuf), KUNIT_CASE(iov_kunit_copy_from_ubuf), @@ -1593,6 +1712,8 @@ static struct kunit_case __refdata iov_kunit_cases[] = { KUNIT_CASE(iov_kunit_benchmark_bvec), KUNIT_CASE(iov_kunit_benchmark_bvec_split), KUNIT_CASE(iov_kunit_benchmark_xarray), + KUNIT_CASE(iov_kunit_benchmark_bvec_outofline), + KUNIT_CASE(iov_kunit_benchmark_xarray_outofline), {} };