[PATCH] percpu_counter : add percpu_counter_add_fast()

From: Eric Dumazet
Date: Sat Oct 16 2010 - 10:19:33 EST


Andrew

I based following patch against linux-2.6, I dont know if previous
Christoph patch is in a git tree. I'll respin it eventually.

Thanks

[PATCH] percpu_counter : percpu_counter_add_fast()

The current way to change a percpu_counter is to call
percpu_counter_add(), which is a bit expensive.
(More than 40 instructions, possible false sharing, ...)

When we dont need to maintain the approximate value of the
percpu_counter (aka fbc->count), and dont need a "s64" wide counter but
a regular "int" or "long" one, we can use this new function :
percpu_counter_add_fast()

This function is pretty fast :
- One instruction on x86 SMP, no register pressure.
- Is safe in preempt enabled contexts.
- No lock acquisition, less false sharing.

Users of this percpu_counter variant should not use
percpu_counter_read() or percpu_counter_read_positive() anymore, only
percpu_counter_sum{_positive}() variant.

Note: we could add later irqsafe variant, still one instruction on x86
SMP...

Suggested-by: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Signed-off-by: Eric Dumazet <eric.dumazet@xxxxxxxxx>
CC: Christoph Lameter <cl@xxxxxxxxxxxxxxxxxxxx>
CC: Nick Piggin <npiggin@xxxxxxxxx>
CC: Dave Chinner <david@xxxxxxxxxxxxx>
---
include/linux/percpu_counter.h | 36 +++++++++++++++++++++++++++----
lib/percpu_counter.c | 12 +++++-----
2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/include/linux/percpu_counter.h b/include/linux/percpu_counter.h
index 8a7d510..b9f4cc1 100644
--- a/include/linux/percpu_counter.h
+++ b/include/linux/percpu_counter.h
@@ -3,7 +3,9 @@
/*
* A simple "approximate counter" for use in ext2 and ext3 superblocks.
*
- * WARNING: these things are HUGE. 4 kbytes per counter on 32-way P4.
+ * WARNING: these things are big. sizeof(long) bytes per possible cpu per counter.
+ * For a 64 cpus 64bit machine :
+ * 64*8 (512) bytes + sizeof(struct percpu_counter)
*/

#include <linux/spinlock.h>
@@ -21,7 +23,7 @@ struct percpu_counter {
#ifdef CONFIG_HOTPLUG_CPU
struct list_head list; /* All percpu_counters are on a list */
#endif
- s32 __percpu *counters;
+ long __percpu *counters;
};

extern int percpu_counter_batch;
@@ -38,7 +40,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,

void percpu_counter_destroy(struct percpu_counter *fbc);
void percpu_counter_set(struct percpu_counter *fbc, s64 amount);
-void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch);
+void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, long batch);
s64 __percpu_counter_sum(struct percpu_counter *fbc);
int percpu_counter_compare(struct percpu_counter *fbc, s64 rhs);

@@ -47,6 +49,24 @@ static inline void percpu_counter_add(struct percpu_counter *fbc, s64 amount)
__percpu_counter_add(fbc, amount, percpu_counter_batch);
}

+/**
+ * percpu_counter_add_fast - fast variant of percpu_counter_add
+ * @fbc: pointer to percpu_counter
+ * @amount: value to add to counter
+ *
+ * Add amount to a percpu_counter object, without approximate (fbc->count)
+ * estimation / correction.
+ * Notes :
+ * - This fast version is limited to "long" counters, not "s64".
+ * - It is preempt safe, but not IRQ safe (on UP)
+ * - Use of percpu_counter_read{_positive}() is discouraged.
+ * - fbc->count accumulates the counters from offlined cpus.
+ */
+static inline void percpu_counter_add_fast(struct percpu_counter *fbc, long amount)
+{
+ this_cpu_add(*fbc->counters, amount);
+}
+
static inline s64 percpu_counter_sum_positive(struct percpu_counter *fbc)
{
s64 ret = __percpu_counter_sum(fbc);
@@ -118,7 +138,15 @@ percpu_counter_add(struct percpu_counter *fbc, s64 amount)
}

static inline void
-__percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
+percpu_counter_add_fast(struct percpu_counter *fbc, long amount)
+{
+ preempt_disable();
+ fbc->count += amount;
+ preempt_enable();
+}
+
+static inline void
+__percpu_counter_add(struct percpu_counter *fbc, s64 amount, long batch)
{
percpu_counter_add(fbc, amount);
}
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index ec9048e..93d50a5 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -18,7 +18,7 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)

spin_lock(&fbc->lock);
for_each_possible_cpu(cpu) {
- s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
+ long *pcount = per_cpu_ptr(fbc->counters, cpu);
*pcount = 0;
}
fbc->count = amount;
@@ -26,10 +26,10 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount)
}
EXPORT_SYMBOL(percpu_counter_set);

-void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch)
+void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, long batch)
{
s64 count;
- s32 *pcount;
+ long *pcount;
int cpu = get_cpu();

pcount = per_cpu_ptr(fbc->counters, cpu);
@@ -58,7 +58,7 @@ s64 __percpu_counter_sum(struct percpu_counter *fbc)
spin_lock(&fbc->lock);
ret = fbc->count;
for_each_online_cpu(cpu) {
- s32 *pcount = per_cpu_ptr(fbc->counters, cpu);
+ long *pcount = per_cpu_ptr(fbc->counters, cpu);
ret += *pcount;
}
spin_unlock(&fbc->lock);
@@ -72,7 +72,7 @@ int __percpu_counter_init(struct percpu_counter *fbc, s64 amount,
spin_lock_init(&fbc->lock);
lockdep_set_class(&fbc->lock, key);
fbc->count = amount;
- fbc->counters = alloc_percpu(s32);
+ fbc->counters = alloc_percpu(long);
if (!fbc->counters)
return -ENOMEM;
#ifdef CONFIG_HOTPLUG_CPU
@@ -123,7 +123,7 @@ static int __cpuinit percpu_counter_hotcpu_callback(struct notifier_block *nb,
cpu = (unsigned long)hcpu;
mutex_lock(&percpu_counters_lock);
list_for_each_entry(fbc, &percpu_counters, list) {
- s32 *pcount;
+ long *pcount;
unsigned long flags;

spin_lock_irqsave(&fbc->lock, flags);


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/