Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi Yu,
Thanks.
On 07/04/2014 02:21 PM, Chao Yu wrote:

> Hi Jaegeuk, Gu, Changman
> 
>> -----Original Message-----
>> From: Jaegeuk Kim [mailto:jaegeuk@xxxxxxxxxx]
>> Sent: Friday, July 04, 2014 1:36 PM
>> To: Gu Zheng
>> Cc: f2fs; fsdevel; 이창만; 俞
>> Subject: Re: [PATCH 3/4] f2fs: use find_next_bit_le rather than test_bit_le in, find_in_block
>>
>> Well, how about testing with many ones in the bit streams?
>> Thanks,
>>
>> On Thu, Jul 03, 2014 at 06:14:02PM +0800, Gu Zheng wrote:
>>> Hi Jaegeuk, Changman
>>>
>>> Just a simple test, not very sure it can address
>>> our qualm.
>>>
>>> Bitmap size:216(the same as f2fs dentry_bits).
>>> CPU: Intel i5 x86_64.
>>>
>>> Time counting based on tsc(the less the fast).
>>> [Index of 1]	find_next_bit_le	test_bit_le
>>> 0		20			117
>>> 1		20			114
>>> 2		20			113
>>> 3		20			139
>>> 4		22			121
>>> 5		22			118
>>> 6		22			115
>>> 8		22			112
>>> 9		22			106
>>> 10		22			105
>>> 11		22			100
>>> 16		22			98
>>> 48		22			97
>>> 80		27			95
>>> 104		27			92
>>> 136		32			95
>>> 160		32			92
>>> 184		32			90
>>> 200		27			87
>>> 208		35			84
>>>
>>> According to the result, find_next_bit_le is always
>>> better than test_bit_le, though there may be some
>>> noise, but I think the result is clear.
>>> Hope it can help us.:)
>>> ps.The sample is attached too.
>>>
>>> Thanks,
>>> Gu
> 
> I hope this could provide some help for this patch.
> 
> I modify Gu's code like this, and add few test case:
> 
> static void test_bit_search_speed(void)
> {
> 	unsigned long flags;
> 	uint64_t tsc_s_b1, tsc_s_e1, tsc_s_b2, tsc_s_e2;
> 	int i, j, pos;
> 	const void *bit_addr;
> 
> 	local_irq_save(flags);
> 	preempt_disable();
> 	
> 	printk("find_next_bit	test_bit_le\n");
> 
> 	for (i = 0; i < 24; i++) {
> 
> 		bit_addr = &bitmaps[i];
> 
> 		tsc_s_b1 = rdtsc();
> 
> 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> 			while (pos < 216)
> 				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
> 
> 		mb();
> 		tsc_s_e1 = rdtsc();
> 		tsc_s_e1 -= tsc_s_b1;
> 		do_div(tsc_s_e1, 1000);
> 
> 		tsc_s_b2 = rdtsc();
> 
> 		for (j = 0, pos = 0; j < 1000; j++, pos = 0)
> 			while (pos < 216)
> 				test_bit_le(pos++, bit_addr);
> 
> 		mb();
> 		tsc_s_e2 = rdtsc();
> 		tsc_s_e2 -= tsc_s_b2;
> 		do_div(tsc_s_e2, 1000);
> 
> 		printk("%-16llu %-16llu\n", tsc_s_e1, tsc_s_e2);
> 	}
> 
> 	preempt_enable();
> 	local_irq_restore(flags);
> }
> case: 11111111 11111111
> 		{255, 255, 255, 255, 255, 255, 255,
> 		 255, 255, 255, 255, 255, 255, 255,
> 		 255, 255, 255, 255, 255, 255, 255,
> 		 255, 255, 255, 255, 255, 255},
> case: 10101010 10101010
> 		{170, 170, 170, 170, 170, 170, 170,
> 		 170, 170, 170, 170, 170, 170, 170,
> 		 170, 170, 170, 170, 170, 170, 170,
> 		 170, 170, 170, 170, 170, 170},
> case: 11111111 00000000
> 		{255, 0, 255, 0, 255, 0, 255,
> 		 0, 255, 0, 255, 0, 255, 0,
> 		 255, 0, 255, 0, 255, 0, 255,
> 		 0, 255, 0, 255, 0, 255},
> case: 00001111 00001111
> 		 {15, 15, 15, 15, 15, 15, 15,
> 		 15, 15, 15, 15, 15, 15, 15,
> 		 15, 15, 15, 15, 15, 15, 15,
> 		 15, 15, 15, 15, 15, 15}
> 
> and here are test result in my env. (Ubuntu vm, 768MB, i3-3220)
> It seems find_next_bit works not so bad as I thought.
> 
> find_next_bit    test_bit_le
> 73               4209
> 62               1271
> 69               1585
> 50               2031
> 67               2255
> 82               2261
> 52               4007
> 79               2159
> 50               2043
> 55               2215
> 53               2393
> 72               3784
> 76               1879
> 61               2562
> 70               2702
> 62               2489
> 56               2307
> 54               2063
> 51               2258
> 69               2712
> 4133             3989  -- case: 11111111 11111111
> 2370             3024  -- case: 10101010 10101010
> 2608             2413  -- case: 11111111 00000000
> 2457             2506  -- case: 00001111 00001111

The time cost of test_bit_le shakes violently, it should be
very smooth according to the test case, maybe it has relation
to your vm env.

To Jaegeuk,
Following test result is walking a bitmap via find_next_bit_le
and test_bit_le.
(Front 7 are random bitmaps, last four are cases from Yu, see
attached sample for detail):

find_next_bit_le    test_bit_le

     3615                3492 
     2640                3492 
     2431                3492 
     1957                3494 
     2160                3492 
     1933                3495 
     1096                3492 

     8078                3492 
     3732                3492 
     4237                3493 
     3824                3492

Thanks,
Gu 

> 
> .
> 


#include <linux/module.h>

__u8 bitmaps[11][27] = {
		{1, 123, 2, 3, 127, 200, 100,
		 123, 23, 13, 78, 42, 123, 50,
		 123, 56, 198, 0, 58, 22, 19,
		 123, 23, 13, 78, 42, 123},
		{2, 0, 0, 0, 0, 0, 0,
		 123, 23, 13, 78, 42, 123, 50,
		 123, 56, 198, 0, 58, 22, 19,
		 23, 123, 2, 34, 123, 12},
		{4, 0, 0, 0, 0, 0, 0,
		 123, 56, 198, 0, 58, 22, 19,
		 123, 23, 13, 78, 42, 123,
		 0, 0, 0, 34, 45, 29},
		{8, 0, 0, 0, 0, 0, 0,
		 1, 123, 2, 3, 127, 200, 100,
		 123, 23, 13, 78, 42, 123, 50,
		 0, 0, 0, 0, 0, 0},
		{16, 0, 0, 0, 0, 0, 0,
		 8, 78, 0, 0, 23, 0, 213,
		 0, 12, 0, 45, 0, 109, 111,
		 231, 11, 11, 88, 77, 99},
		{32, 0, 0, 0, 0, 0, 0,
		 8, 78, 0, 0, 23, 0, 213,
		 0, 12, 0, 45, 0, 109, 111,
		 231, 11, 11, 88, 77, 99},
		{64, 0, 0, 0, 0, 0, 0,
		 8, 78, 0, 0, 23, 0, 213,
		 0, 12, 0, 45, 0, 109, 111,
		 0, 0, 0, 0, 0, 0},
		{255, 255, 255, 255, 255, 255, 255,
		 255, 255, 255, 255, 255, 255, 255,
		 255, 255, 255, 255, 255, 255, 255,
		 255, 255, 255, 255, 255, 255},
		{170, 170, 170, 170, 170, 170, 170,
		 170, 170, 170, 170, 170, 170, 170,
		 170, 170, 170, 170, 170, 170, 170,
		 170, 170, 170, 170, 170, 170},
		{255, 0, 255, 0, 255, 0, 255,
		 0, 255, 0, 255, 0, 255, 0,
		 255, 0, 255, 0, 255, 0, 255,
		 0, 255, 0, 255, 0, 255},
		 {15, 15, 15, 15, 15, 15, 15,
		 15, 15, 15, 15, 15, 15, 15,
		 15, 15, 15, 15, 15, 15, 15,
		 15, 15, 15, 15, 15, 15}
		};

uint64_t rdtsc(void) {
uint32_t lo, hi;
__asm__ __volatile__ ("rdtsc" : "=a" (lo), "=d" (hi));
return (uint64_t)hi << 32 | lo;
}

static void test_bit_search_speed(void)
{
	unsigned long flags;
	uint64_t tsc_s, tsc_diff;
	int i, j, pos;
	const void *bit_addr;

	local_irq_save(flags);
	preempt_disable();

	printk("find_next_bit_le    test_bit_le\n");

	for (i = 0; i < 11; i++) {
		bit_addr = &bitmaps[i];

		pos = 0;
		tsc_s = rdtsc();

		for (j = 0; j < 1000; j++, pos = 0)
			while (pos < 216)
				pos = find_next_bit_le(bit_addr, 216, pos) + 1;
		mb();
		tsc_diff = (rdtsc() - tsc_s)/1000;

		printk("%8llu", tsc_diff);

		pos = 0;
		tsc_s = rdtsc();

		for (j = 0; j < 1000; j++, pos = 0)
			while (pos < 216)
				test_bit_le(pos++, bit_addr);

		mb();
		tsc_diff = (rdtsc() - tsc_s)/1000;

		printk("%20llu \n", tsc_diff);
	}

	preempt_enable();
	local_irq_restore(flags);
}

static int __init start_test(void) {
	test_bit_search_speed();
	return 0;
}

static void __exit exit_test(void) {

}

module_init(start_test);
module_exit(exit_test);

MODULE_LICENSE("GPL");
MODULE_AUTHOR("Gu Zheng");

[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux