[PATCH] percpu_counter : add percpu_counter_add_fast()

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Andrew

I based following patch against linux-2.6, I dont know if previous
Christoph patch is in a git tree. I'll respin it eventually.

Thanks

[PATCH] percpu_counter : percpu_counter_add_fast()

The current way to change a percpu_counter is to call
percpu_counter_add(), which is a bit expensive.
(More than 40 instructions, possible false sharing, ...)

When we dont need to maintain the approximate value of the
percpu_counter (aka fbc->count), and dont need a "s64" wide counter but
a regular "int" or "long" one, we can use this new function : 
percpu_counter_add_fast()

This function is pretty fast : 
- One instruction on x86 SMP, no register pressure.
- Is safe in preempt enabled contexts.
- No lock acquisition, less false sharing.

Users of this percpu_counter variant should not use
percpu_counter_read() or percpu_counter_read_positive() anymore, only
percpu_counter_sum{_positive}() variant.

Note: we could add later irqsafe variant, still one instruction on x86
SMP...

Suggested-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Eric Dumazet <eric.dumazet@xxxxxxxxx>
CC: Christoph Lameter <cl@xxxxxxxxxxxxxxxxxxxx>
CC: Nick Piggin <npiggin@xxxxxxxxx>
CC: Dave Chinner <david@xxxxxxxxxxxxx>
---
 include/linux/percpu_counter.h |   36 +++++++++++++++++++++++++++----
 lib/percpu_counter.c           |   12 +++++-----
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 8a7d510..b9f4cc1 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -3,7 +3,9 @@
 /*
  * A simple "approximate counter" for use in ext2 and ext3 superblocks.
  *
- * WARNING: these things are HUGE.  4 kbytes per counter on 32-way P4.
+ * WARNING: these things are big.  sizeof(long) bytes per possible cpu per counter.
+ * For a 64 cpus 64bit machine :
+ *	64*8 (512) bytes + sizeof(struct percpu_counter)
  */
 
 #include <linux/spinlock.h>
@@ -21,7 +23,7 @@ struct percpu_counter {
 #ifdef CONFIG_HOTPLUG_CPU
 	struct list_head list;	/* All percpu_counters are on a list */
 #endif
-	s32 __percpu *counters;
+	long __percpu *counters;
 };
 
 extern int percpu_counter_batch;
@@ -38,7 +40,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 
 void percpu_counter_destroy(struct percpu_counter *fbc);
 void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
-void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
+void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, long batch);
 s64 __percpu_counter_sum(struct percpu_counter *fbc);
 int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs);
 
@@ -47,6 +49,24 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 	__percpu_counter_add(fbc, amount, percpu_counter_batch);
 }
 
+/**
+ * percpu_counter_add_fast - fast variant of percpu_counter_add
+ * @fbc: pointer to percpu_counter
+ * @amount: value to add to counter
+ *
+ * Add amount to a percpu_counter object, without approximate (fbc->count)
+ * estimation / correction.
+ * Notes :
+ * - This fast version is limited to "long" counters, not "s64".
+ * - It is preempt safe, but not IRQ safe (on UP)
+ * - Use of percpu_counter_read{_positive}() is discouraged.
+ * - fbc->count accumulates the counters from offlined cpus.
+ */
+static inline void percpu_counter_add_fast(struct percpu_counter *fbc, long amount)
+{
+	this_cpu_add(*fbc->counters, amount);
+}
+
 static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
 {
 	s64 ret = __percpu_counter_sum(fbc);
@@ -118,7 +138,15 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
 }
 
 static inline void
-__percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
+percpu_counter_add_fast(struct percpu_counter *fbc, long amount)
+{
+	preempt_disable();
+	fbc->count += amount;
+	preempt_enable();
+}
+
+static inline void
+__percpu_counter_add(struct percpu_counter *fbc, s64 amount, long batch)
 {
 	percpu_counter_add(fbc, amount);
 }
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index ec9048e..93d50a5 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -18,7 +18,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 
 	spin_lock(&fbc->lock);
 	for_each_possible_cpu(cpu) {
-		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
+		long *pcount = per_cpu_ptr(fbc->counters, cpu);
 		*pcount = 0;
 	}
 	fbc->count = amount;
@@ -26,10 +26,10 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
 }
 EXPORT_SYMBOL(percpu_counter_set);
 
-void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
+void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, long batch)
 {
 	s64 count;
-	s32 *pcount;
+	long *pcount;
 	int cpu = get_cpu();
 
 	pcount = per_cpu_ptr(fbc->counters, cpu);
@@ -58,7 +58,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
 	spin_lock(&fbc->lock);
 	ret = fbc->count;
 	for_each_online_cpu(cpu) {
-		s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
+		long *pcount = per_cpu_ptr(fbc->counters, cpu);
 		ret += *pcount;
 	}
 	spin_unlock(&fbc->lock);
@@ -72,7 +72,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
 	spin_lock_init(&fbc->lock);
 	lockdep_set_class(&fbc->lock, key);
 	fbc->count = amount;
-	fbc->counters = alloc_percpu(s32);
+	fbc->counters = alloc_percpu(long);
 	if (!fbc->counters)
 		return -ENOMEM;
 #ifdef CONFIG_HOTPLUG_CPU
@@ -123,7 +123,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
 	cpu = (unsigned long)hcpu;
 	mutex_lock(&percpu_counters_lock);
 	list_for_each_entry(fbc, &percpu_counters, list) {
-		s32 *pcount;
+		long *pcount;
 		unsigned long flags;
 
 		spin_lock_irqsave(&fbc->lock, flags);


--
To unsubscribe from this list: send the line "unsubscribe linux-fsdevel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[Index of Archives]     [Linux Ext4 Filesystem]     [Union Filesystem]     [Filesystem Testing]     [Ceph Users]     [Ecryptfs]     [AutoFS]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux Cachefs]     [Reiser Filesystem]     [Linux RAID]     [Samba]     [Device Mapper]     [CEPH Development]
  Powered by Linux