Re: [PATCH v5 0/3] lib: find_*_bit reimplementation

From: Yury Norov
Date: Sun Mar 08 2015 - 14:17:51 EST


It looks I screwed up with performance measurements.

At first, I measured performance of 'find_next_zero_bit', while wrote about
'find_next_bit'. I was really thinking it's not matter, so I'm sorry.

At second, to run test on userspace I got ffs and ffz from kernel. And it
looks like all performance difference is caused by them. Simple measurement
of performance of current 'find_next_bit' and 'find_next_zero_bit' shows the
same 35% difference, while the code is virtually identical. The trick is that
new implementation doesn't use ffz, that is slow for me (because I got generic
implementation), and so 'find_next_zero_bit' looks faster, while it's not.
In kernel, ffs and ffz are both fast. I'm sorry again.

When I ran this test in kernel mode, I found measurements weird. I tested it on
Core i7-3770@xxxxxxx and Core i7-2630QM@xxxxxxxx First machine did show noizly
but relatively equal results (I cannot attach it now), second one is more stable,
and it looks like performance degrade for about 20%.

find_next_bit find_next_zero_bit
old new old new
25 30 24 29
25 30 24 29
25 30 25 29
26 29 24 29
25 30 24 29
26 30 24 29
26 30 24 30
25 30 25 29
26 30 25 29
25 30 25 30

The module I use for testing is here.

To Alexey:
Could you be so kind to run this test on your Odroid? It's also interesting, is
there real difference between generic and platform implementations.

---
lib/Kconfig.debug | 8 +++
lib/Makefile | 1 +
lib/test_find_bit.c | 189 ++++++++++++++++++++++++++++++++++++++++++++++++++++
3 files changed, 198 insertions(+)
create mode 100644 lib/test_find_bit.c

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index c5cefb3..5d78dbb 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1669,6 +1669,14 @@ config DMA_API_DEBUG

If unsure, say N.

+config TEST_FIND_BIT
+ tristate "Test find_bit family performance"
+ default n
+ help
+ This builds the "test_find_bit".
+
+ If unsure, say N.
+
config TEST_LKM
tristate "Test module loading with 'hello world' module"
default n
diff --git a/lib/Makefile b/lib/Makefile
index d857965..9186da4 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -34,6 +34,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test-hexdump.o
obj-y += kstrtox.o
obj-$(CONFIG_TEST_BPF) += test_bpf.o
obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
+obj-$(CONFIG_TEST_FIND_BIT) += test_find_bit.o
obj-$(CONFIG_TEST_KASAN) += test_kasan.o
obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
obj-$(CONFIG_TEST_LKM) += test_module.o
diff --git a/lib/test_find_bit.c b/lib/test_find_bit.c
new file mode 100644
index 0000000..0c04022
--- /dev/null
+++ b/lib/test_find_bit.c
@@ -0,0 +1,189 @@
+/*
+ * This module tests 'find_next_bit' performance.
+ */
+
+#include <linux/bitops.h>
+#include <linux/init.h>
+#include <linux/random.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+
+#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG)
+
+unsigned long find_next_bit_old(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp &= (~0UL << offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if ((tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp &= (~0UL >> (BITS_PER_LONG - size));
+ if (tmp == 0UL) /* Are any bits set? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + __ffs(tmp);
+}
+
+unsigned long find_next_zero_bit_old(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ const unsigned long *p = addr + BITOP_WORD(offset);
+ unsigned long result = offset & ~(BITS_PER_LONG-1);
+ unsigned long tmp;
+
+ if (offset >= size)
+ return size;
+ size -= result;
+ offset %= BITS_PER_LONG;
+ if (offset) {
+ tmp = *(p++);
+ tmp |= ~0UL >> (BITS_PER_LONG - offset);
+ if (size < BITS_PER_LONG)
+ goto found_first;
+ if (~tmp)
+ goto found_middle;
+ size -= BITS_PER_LONG;
+ result += BITS_PER_LONG;
+ }
+ while (size & ~(BITS_PER_LONG-1)) {
+ if (~(tmp = *(p++)))
+ goto found_middle;
+ result += BITS_PER_LONG;
+ size -= BITS_PER_LONG;
+ }
+ if (!size)
+ return result;
+ tmp = *p;
+
+found_first:
+ tmp |= ~0UL << size;
+ if (tmp == ~0UL) /* Are any bits zero? */
+ return result + size; /* Nope. */
+found_middle:
+ return result + ffz(tmp);
+ }
+static unsigned long _find_next_bit(const unsigned long *addr,
+ unsigned long nbits, unsigned long start, unsigned long invert)
+{
+ unsigned long tmp;
+
+ if (!nbits || start >= nbits)
+ return nbits;
+
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
+
+ /* Handle 1st word. */
+ tmp &= BITMAP_FIRST_WORD_MASK(start);
+ start = round_down(start, BITS_PER_LONG);
+
+ while (!tmp) {
+ start += BITS_PER_LONG;
+ if (start >= nbits)
+ return nbits;
+
+ tmp = addr[start / BITS_PER_LONG] ^ invert;
+ }
+
+ return min(start + __ffs(tmp), nbits);
+}
+
+unsigned long find_next_bit_new(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ return _find_next_bit(addr, size, offset, 0UL);
+}
+
+unsigned long find_next_zero_bit_new(const unsigned long *addr, unsigned long size,
+ unsigned long offset)
+{
+ return _find_next_bit(addr, size, offset, ~0UL);
+}
+
+static int __init test_module_init(void)
+{
+ long long start, old, new;
+ unsigned long *addr;
+ unsigned long ret, i, nbits = 30*1024*1024;
+
+ addr = alloc_pages_exact(nbits/8, GFP_KERNEL);
+ if (!addr) {
+ pr_warn("No mem\n");
+ return -ENOMEM;
+ }
+
+ for (i = 0; i < 10; i++) {
+ get_random_bytes(addr, nbits/8);
+
+ ret = 0;
+ start = jiffies;
+ while (ret < nbits)
+ ret = find_next_zero_bit_old(addr, nbits, ret + 1);
+
+ old = jiffies - start;
+
+ ret = 0;
+ start = jiffies;
+ while (ret < nbits)
+ ret = find_next_zero_bit_new(addr, nbits, ret + 1);
+
+ new = jiffies - start;
+ pr_warn("%lld\t%lld\n", old, new);
+ }
+
+ pr_warn("\n");
+
+ for (i = 0; i < 10; i++) {
+ get_random_bytes(addr, nbits/8);
+
+ ret = 0;
+ start = jiffies;
+ while (ret < nbits)
+ ret = find_next_bit_old(addr, nbits, ret + 1);
+
+ old = jiffies - start;
+
+ ret = 0;
+ start = jiffies;
+ while (ret < nbits)
+ ret = find_next_bit_new(addr, nbits, ret + 1);
+
+ new = jiffies - start;
+
+ pr_warn("%lld\t%lld\n", old, new);
+ }
+
+ free_pages_exact(addr, nbits/8);
+ return 0;
+}
+module_init(test_module_init);
+
+static void __exit test_module_exit(void) {}
+module_exit(test_module_exit);
+
+MODULE_AUTHOR("Yury Norov <yury.norov@xxxxxxxxx>");
+MODULE_LICENSE("GPL");
--
2.1.0

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/