Hi Yu, Thanks. On 07/04/2014 02:21 PM, Chao Yu wrote: > Hi Jaegeuk, Gu, Changman > >> -----Original Message----- >> From: Jaegeuk Kim [mailto:jaegeuk@xxxxxxxxxx] >> Sent: Friday, July 04, 2014 1:36 PM >> To: Gu Zheng >> Cc: f2fs; fsdevel; 이창만; 俞 >> Subject: Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block >> >> Well, how about testing with many ones in the bit streams? >> Thanks, >> >> On Thu, Jul 03, 2014 at 06:14:02PM +0800, Gu Zheng wrote: >>> Hi Jaegeuk, Changman >>> >>> Just a simple test, not very sure it can address >>> our qualm. >>> >>> Bitmap size:216(the same as f2fs dentry_bits). >>> CPU: Intel i5 x86_64. >>> >>> Time counting based on tsc(the less the fast). >>> [Index of 1] find_next_bit_le test_bit_le >>> 0 20 117 >>> 1 20 114 >>> 2 20 113 >>> 3 20 139 >>> 4 22 121 >>> 5 22 118 >>> 6 22 115 >>> 8 22 112 >>> 9 22 106 >>> 10 22 105 >>> 11 22 100 >>> 16 22 98 >>> 48 22 97 >>> 80 27 95 >>> 104 27 92 >>> 136 32 95 >>> 160 32 92 >>> 184 32 90 >>> 200 27 87 >>> 208 35 84 >>> >>> According to the result, find_next_bit_le is always >>> better than test_bit_le, though there may be some >>> noise, but I think the result is clear. >>> Hope it can help us.:) >>> ps.The sample is attached too. >>> >>> Thanks, >>> Gu > > I hope this could provide some help for this patch. > > I modify Gu's code like this, and add few test case: > > static void test_bit_search_speed(void) > { > unsigned long flags; > uint64_t tsc_s_b1, tsc_s_e1, tsc_s_b2, tsc_s_e2; > int i, j, pos; > const void *bit_addr; > > local_irq_save(flags); > preempt_disable(); > > printk("find_next_bit test_bit_le\n"); > > for (i = 0; i < 24; i++) { > > bit_addr = &bitmaps[i]; > > tsc_s_b1 = rdtsc(); > > for (j = 0, pos = 0; j < 1000; j++, pos = 0) > while (pos < 216) > pos = find_next_bit_le(bit_addr, 216, pos) + 1; > > mb(); > tsc_s_e1 = rdtsc(); > tsc_s_e1 -= tsc_s_b1; > do_div(tsc_s_e1, 1000); > > tsc_s_b2 = rdtsc(); > > for (j = 0, pos = 0; j < 1000; j++, pos = 0) > while (pos < 216) > test_bit_le(pos++, bit_addr); > > mb(); > tsc_s_e2 = rdtsc(); > tsc_s_e2 -= tsc_s_b2; > do_div(tsc_s_e2, 1000); > > printk("%-16llu %-16llu\n", tsc_s_e1, tsc_s_e2); > } > > preempt_enable(); > local_irq_restore(flags); > } > case: 11111111 11111111 > {255, 255, 255, 255, 255, 255, 255, > 255, 255, 255, 255, 255, 255, 255, > 255, 255, 255, 255, 255, 255, 255, > 255, 255, 255, 255, 255, 255}, > case: 10101010 10101010 > {170, 170, 170, 170, 170, 170, 170, > 170, 170, 170, 170, 170, 170, 170, > 170, 170, 170, 170, 170, 170, 170, > 170, 170, 170, 170, 170, 170}, > case: 11111111 00000000 > {255, 0, 255, 0, 255, 0, 255, > 0, 255, 0, 255, 0, 255, 0, > 255, 0, 255, 0, 255, 0, 255, > 0, 255, 0, 255, 0, 255}, > case: 00001111 00001111 > {15, 15, 15, 15, 15, 15, 15, > 15, 15, 15, 15, 15, 15, 15, > 15, 15, 15, 15, 15, 15, 15, > 15, 15, 15, 15, 15, 15} > > and here are test result in my env. (Ubuntu vm, 768MB, i3-3220) > It seems find_next_bit works not so bad as I thought. > > find_next_bit test_bit_le > 73 4209 > 62 1271 > 69 1585 > 50 2031 > 67 2255 > 82 2261 > 52 4007 > 79 2159 > 50 2043 > 55 2215 > 53 2393 > 72 3784 > 76 1879 > 61 2562 > 70 2702 > 62 2489 > 56 2307 > 54 2063 > 51 2258 > 69 2712 > 4133 3989 -- case: 11111111 11111111 > 2370 3024 -- case: 10101010 10101010 > 2608 2413 -- case: 11111111 00000000 > 2457 2506 -- case: 00001111 00001111 The time cost of test_bit_le shakes violently, it should be very smooth according to the test case, maybe it has relation to your vm env. To Jaegeuk, Following test result is walking a bitmap via find_next_bit_le and test_bit_le. (Front 7 are random bitmaps, last four are cases from Yu, see attached sample for detail): find_next_bit_le test_bit_le 3615 3492 2640 3492 2431 3492 1957 3494 2160 3492 1933 3495 1096 3492 8078 3492 3732 3492 4237 3493 3824 3492 Thanks, Gu > > . >
#include <linux/module.h> __u8 bitmaps[11][27] = { {1, 123, 2, 3, 127, 200, 100, 123, 23, 13, 78, 42, 123, 50, 123, 56, 198, 0, 58, 22, 19, 123, 23, 13, 78, 42, 123}, {2, 0, 0, 0, 0, 0, 0, 123, 23, 13, 78, 42, 123, 50, 123, 56, 198, 0, 58, 22, 19, 23, 123, 2, 34, 123, 12}, {4, 0, 0, 0, 0, 0, 0, 123, 56, 198, 0, 58, 22, 19, 123, 23, 13, 78, 42, 123, 0, 0, 0, 34, 45, 29}, {8, 0, 0, 0, 0, 0, 0, 1, 123, 2, 3, 127, 200, 100, 123, 23, 13, 78, 42, 123, 50, 0, 0, 0, 0, 0, 0}, {16, 0, 0, 0, 0, 0, 0, 8, 78, 0, 0, 23, 0, 213, 0, 12, 0, 45, 0, 109, 111, 231, 11, 11, 88, 77, 99}, {32, 0, 0, 0, 0, 0, 0, 8, 78, 0, 0, 23, 0, 213, 0, 12, 0, 45, 0, 109, 111, 231, 11, 11, 88, 77, 99}, {64, 0, 0, 0, 0, 0, 0, 8, 78, 0, 0, 23, 0, 213, 0, 12, 0, 45, 0, 109, 111, 0, 0, 0, 0, 0, 0}, {255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255, 255}, {170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170, 170}, {255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255, 0, 255}, {15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15} }; uint64_t rdtsc(void) { uint32_t lo, hi; __asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi)); return (uint64_t)hi << 32 | lo; } static void test_bit_search_speed(void) { unsigned long flags; uint64_t tsc_s, tsc_diff; int i, j, pos; const void *bit_addr; local_irq_save(flags); preempt_disable(); printk("find_next_bit_le test_bit_le\n"); for (i = 0; i < 11; i++) { bit_addr = &bitmaps[i]; pos = 0; tsc_s = rdtsc(); for (j = 0; j < 1000; j++, pos = 0) while (pos < 216) pos = find_next_bit_le(bit_addr, 216, pos) + 1; mb(); tsc_diff = (rdtsc() - tsc_s)/1000; printk("%8llu", tsc_diff); pos = 0; tsc_s = rdtsc(); for (j = 0; j < 1000; j++, pos = 0) while (pos < 216) test_bit_le(pos++, bit_addr); mb(); tsc_diff = (rdtsc() - tsc_s)/1000; printk("%20llu \n", tsc_diff); } preempt_enable(); local_irq_restore(flags); } static int __init start_test(void) { test_bit_search_speed(); return 0; } static void __exit exit_test(void) { } module_init(start_test); module_exit(exit_test); MODULE_LICENSE("GPL"); MODULE_AUTHOR("Gu Zheng");