> -----Original Message----- > From: Wang, Yalin > Sent: Tuesday, February 03, 2015 3:04 PM > To: 'Andrew Morton' > Cc: 'Kirill A. Shutemov'; 'arnd@xxxxxxxx'; 'linux-arch@xxxxxxxxxxxxxxx'; > 'linux-kernel@xxxxxxxxxxxxxxx'; 'linux@xxxxxxxxxxxxxxxx'; 'linux-arm- > kernel@xxxxxxxxxxxxxxxxxxx' > Subject: RE: [RFC] change non-atomic bitops method > > > -----Original Message----- > > From: Andrew Morton [mailto:akpm@xxxxxxxxxxxxxxxxxxxx] > > Sent: Tuesday, February 03, 2015 2:39 PM > > To: Wang, Yalin > > Cc: 'Kirill A. Shutemov'; 'arnd@xxxxxxxx'; 'linux-arch@xxxxxxxxxxxxxxx'; > > 'linux-kernel@xxxxxxxxxxxxxxx'; 'linux@xxxxxxxxxxxxxxxx'; 'linux-arm- > > kernel@xxxxxxxxxxxxxxxxxxx' > > Subject: Re: [RFC] change non-atomic bitops method > > > > On Tue, 3 Feb 2015 13:42:45 +0800 "Wang, Yalin" > <Yalin.Wang@xxxxxxxxxxxxxx> > > wrote: > > > > > > ... > > > > > > #ifdef CHECK_BEFORE_SET > > > if (p[i] != times) > > > #endif > > > > > > ... > > > > > > ---- > > > One run on CPU0, reader thread run on CPU1, > > > Test result: > > > sudo ./cache_test > > > reader:8.426228173 > > > 8.672198335 > > > > > > With -DCHECK_BEFORE_SET > > > sudo ./cache_test_check > > > reader:7.537036819 > > > 10.799746531 > > > > > > > You aren't measuring the right thing. You should compare > > > > if (p[i] != x) > > p[i] = x; > > > > versus > > > > p[i] = x; > > > > and you should do this for two cases: > > > > a) p[i] == x > > > > b) p[i] != x > > > > > > The first code sequence will be slower when (p[i] != x) and faster when > > (p[i] == x). > > > > > > Next, we should instrument the kernel to work out the frequency of > > set_bit on an already-set bit. > > > > It is only with both these ratios that we can work out whether the > > patch is a net gain. My suspicion is that set_bit on an already-set > > bit is so rare that the patch will be a loss. > I see, let's change the test a little: > 1) > memset(p, 0, SIZE); > if (p[i] != 0) > p[i] = 0; // never called > > #sudo ./cache_test_check > 6.698153838 > reader:7.529402625 > > > 2) > memset(p, 0, SIZE); > if (p[i] == 0) > p[i] = 0; // always called > #sudo ./cache_test_check > reader:7.895421311 > 9.000889973 > > Thanks > > I make a change in kernel to test hit/miss ratio: --- diff --git a/fs/proc/meminfo.c b/fs/proc/meminfo.c index 80e4645..a82937b 100644 --- a/fs/proc/meminfo.c +++ b/fs/proc/meminfo.c @@ -2,6 +2,7 @@ #include <linux/hugetlb.h> #include <linux/init.h> #include <linux/kernel.h> +#include <linux/module.h> #include <linux/mm.h> #include <linux/mman.h> #include <linux/mmzone.h> @@ -15,6 +16,41 @@ #include <asm/pgtable.h> #include "internal.h" +atomic_t __set_bit_success_count = ATOMIC_INIT(0); +atomic_t __set_bit_miss_count = ATOMIC_INIT(0); +EXPORT_SYMBOL_GPL(__set_bit_success_count); +EXPORT_SYMBOL_GPL(__set_bit_miss_count); + +atomic_t __clear_bit_success_count = ATOMIC_INIT(0); +atomic_t __clear_bit_miss_count = ATOMIC_INIT(0); +EXPORT_SYMBOL_GPL(__clear_bit_success_count); +EXPORT_SYMBOL_GPL(__clear_bit_miss_count); + +atomic_t __test_and_set_bit_success_count = ATOMIC_INIT(0); +atomic_t __test_and_set_bit_miss_count = ATOMIC_INIT(0); +EXPORT_SYMBOL_GPL(__test_and_set_bit_success_count); +EXPORT_SYMBOL_GPL(__test_and_set_bit_miss_count); + +atomic_t __test_and_clear_bit_success_count = ATOMIC_INIT(0); +atomic_t __test_and_clear_bit_miss_count = ATOMIC_INIT(0); +EXPORT_SYMBOL_GPL(__test_and_clear_bit_success_count); +EXPORT_SYMBOL_GPL(__test_and_clear_bit_miss_count); + +/* + * atomic bitops + */ +atomic_t set_bit_success_count = ATOMIC_INIT(0); +atomic_t set_bit_miss_count = ATOMIC_INIT(0); + +atomic_t clear_bit_success_count = ATOMIC_INIT(0); +atomic_t clear_bit_miss_count = ATOMIC_INIT(0); + +atomic_t test_and_set_bit_success_count = ATOMIC_INIT(0); +atomic_t test_and_set_bit_miss_count = ATOMIC_INIT(0); + +atomic_t test_and_clear_bit_success_count = ATOMIC_INIT(0); +atomic_t test_and_clear_bit_miss_count = ATOMIC_INIT(0); + void __attribute__((weak)) arch_report_meminfo(struct seq_file *m) { } @@ -165,6 +201,18 @@ static int meminfo_proc_show(struct seq_file *m, void *v) HPAGE_PMD_NR) #endif ); + seq_printf(m, "__set_bit_miss_count:%d __set_bit_success_count:%d\n" + "__clear_bit_miss_count:%d __clear_bit_success_count:%d\n" + "__test_and_set_bit_miss_count:%d __test_and_set_bit_success_count:%d\n" + "__test_and_clear_bit_miss_count:%d __test_and_clear_bit_success_count:%d\n", + atomic_read(&__set_bit_miss_count), atomic_read(&__set_bit_success_count), + atomic_read(&__clear_bit_miss_count), atomic_read(&__clear_bit_success_count), + + atomic_read(&__test_and_set_bit_miss_count), + atomic_read(&__test_and_set_bit_success_count), + + atomic_read(&__test_and_clear_bit_miss_count), + atomic_read(&__test_and_clear_bit_success_count)); hugetlb_report_meminfo(m); diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h index 697cc2b..1895133 100644 --- a/include/asm-generic/bitops/non-atomic.h +++ b/include/asm-generic/bitops/non-atomic.h @@ -2,7 +2,18 @@ #define _ASM_GENERIC_BITOPS_NON_ATOMIC_H_ #include <asm/types.h> +#include <asm/atomic.h> +extern atomic_t __set_bit_success_count; +extern atomic_t __set_bit_miss_count; +extern atomic_t __clear_bit_success_count; +extern atomic_t __clear_bit_miss_count; + +extern atomic_t __test_and_set_bit_success_count; +extern atomic_t __test_and_set_bit_miss_count; + +extern atomic_t __test_and_clear_bit_success_count; +extern atomic_t __test_and_clear_bit_miss_count; /** * __set_bit - Set a bit in memory * @nr: the bit to set @@ -17,7 +28,13 @@ static inline void __set_bit(int nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - *p |= mask; + if ((*p & mask) == 0) { + atomic_inc(&__set_bit_success_count); + *p |= mask; + } else { + atomic_inc(&__set_bit_miss_count); + } + } static inline void __clear_bit(int nr, volatile unsigned long *addr) @@ -25,7 +42,12 @@ static inline void __clear_bit(int nr, volatile unsigned long *addr) unsigned long mask = BIT_MASK(nr); unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); - *p &= ~mask; + if ((*p & mask) != 0) { + atomic_inc(&__clear_bit_success_count); + *p &= ~mask; + } else { + atomic_inc(&__clear_bit_miss_count); + } } /** @@ -60,7 +82,12 @@ static inline int __test_and_set_bit(int nr, volatile unsigned long *addr) unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); unsigned long old = *p; - *p = old | mask; + if ((old & mask) == 0) { + atomic_inc(&__test_and_set_bit_success_count); + *p = old | mask; + } else { + atomic_inc(&__test_and_set_bit_miss_count); + } return (old & mask) != 0; } @@ -79,7 +106,12 @@ static inline int __test_and_clear_bit(int nr, volatile unsigned long *addr) unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); unsigned long old = *p; - *p = old & ~mask; + if ((old & mask) != 0) { + atomic_inc(&__test_and_clear_bit_success_count); + *p = old & ~mask; + } else { + atomic_inc(&__test_and_clear_bit_miss_count); + } return (old & mask) != 0; } --- After use the phone some time: root@D5303:/ # cat /proc/meminfo VmallocUsed: 10348 kB VmallocChunk: 75632 kB __set_bit_miss_count:10002 __set_bit_success_count:1096661 __clear_bit_miss_count:359484 __clear_bit_success_count:3674617 __test_and_set_bit_miss_count:7 __test_and_set_bit_success_count:221 __test_and_clear_bit_miss_count:924611 __test_and_clear_bit_success_count:193 __test_and_clear_bit_miss_count has a very high miss rate. In fact, I think set/clear/test_and_set(clear)_bit atomic version can also Be investigated to see its miss ratio, I have not tested the atomic version, Because it reside in different architectures. Thanks -- To unsubscribe from this list: send the line "unsubscribe linux-arch" in the body of a message to majordomo@xxxxxxxxxxxxxxx More majordomo info at http://vger.kernel.org/majordomo-info.html