Radix tree range entries
From: Matthew Wilcox
Date: Tue Jan 31 2017 - 11:59:33 EST
Hi Dan, Konstantin,
You've been interested in using range entries in the radix tree rather
than higher-order entries. I've built a range entry interface on top
of our current radix tree substrate, and it seems to be behaving itself
in the test suite. Would you care to try out this patch? Ideas for
other tests to add to the test suite would be appreciated too.
There may be some minor conflicts with the current Linus tree; the full
patch series can be found here:
http://git.infradead.org/users/willy/linux-dax.git/shortlog/refs/heads/idr-20170131
commit b6444c0557bc1ac5add3c8db66d26a7ba7ccd6a6
Author: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx>
Date: Tue Jan 31 11:16:47 2017 -0500
radix-tree: Add range entry support
We have a confirmed customer in the shape of devm_memremap_pages(), so
add support for range entries. The documentation lists caveats on their
use, but none of these caveats apply to the one user.
Signed-off-by: Matthew Wilcox <mawilcox@xxxxxxxxxxxxx>
diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 8984f90..34d0910 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -386,6 +386,11 @@ int radix_tree_split(struct radix_tree_root *, unsigned long index,
unsigned new_order);
int radix_tree_join(struct radix_tree_root *, unsigned long index,
unsigned new_order, void *);
+int radix_tree_insert_range(struct radix_tree_root *, unsigned long start,
+ unsigned long len, void *entry);
+void *radix_tree_delete_range(struct radix_tree_root *, unsigned long start,
+ unsigned long len);
+
void **idr_get_free(struct radix_tree_root *, struct radix_tree_iter *,
gfp_t, int end);
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 2b1c0e2..348f825 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -884,27 +884,13 @@ static void radix_tree_free_nodes(struct radix_tree_node *node)
}
#ifdef CONFIG_RADIX_TREE_MULTIORDER
-static inline int insert_entries(struct radix_tree_node *node, void **slot,
- void *item, unsigned order, bool replace)
+static inline int __insert_entries(struct radix_tree_node *node,
+ void **slot, void *item, unsigned offset,
+ unsigned n, bool replace)
{
struct radix_tree_node *child;
- unsigned i, n, tag, offset, tags = 0;
+ unsigned i, tag, tags = 0;
- if (node) {
- if (order > node->shift)
- n = 1 << (order - node->shift);
- else
- n = 1;
- offset = get_slot_offset(node, slot);
- } else {
- n = 1;
- offset = 0;
- }
-
- if (n > 1) {
- offset = offset & ~(n - 1);
- slot = &node->slots[offset];
- }
child = node_to_entry(slot);
for (i = 0; i < n; i++) {
@@ -946,9 +932,29 @@ static inline int insert_entries(struct radix_tree_node *node, void **slot,
}
return n;
}
+
+static inline int insert_entries(struct radix_tree_node *node,
+ void **slot, void *item, unsigned order, bool replace)
+{
+ unsigned int n, offset;
+
+ if (node) {
+ /* Can only be true if there is a larger entry here */
+ if (order < node->shift)
+ return -EEXIST;
+ n = 1 << (order - node->shift);
+ offset = get_slot_offset(node, slot) & ~(n - 1);
+ slot = &node->slots[offset];
+ } else {
+ n = 1;
+ offset = 0;
+ }
+
+ return __insert_entries(node, slot, item, offset, n, replace);
+}
#else
-static inline int insert_entries(struct radix_tree_node *node, void **slot,
- void *item, unsigned order, bool replace)
+static inline int insert_entries(struct radix_tree_node *node,
+ void **slot, void *item, unsigned order, bool replace)
{
if (*slot)
return -EEXIST;
@@ -978,7 +984,8 @@ int __radix_tree_insert(struct radix_tree_root *root, unsigned long index,
void **slot;
int error;
- BUG_ON(radix_tree_is_internal_node(item));
+ if (radix_tree_is_internal_node(item))
+ return -EINVAL;
error = __radix_tree_create(root, index, order, &node, &slot);
if (error)
@@ -2131,6 +2138,140 @@ int radix_tree_tagged(const struct radix_tree_root *root, unsigned int tag)
}
EXPORT_SYMBOL(radix_tree_tagged);
+#ifdef CONFIG_RADIX_TREE_MULTIORDER
+/**
+ * radix_tree_insert_range - insert an entry for multiple indices
+ * @root: radix tree root
+ * @start: First index
+ * @len: Number of indices to cover
+ * @entry: Value to return
+ *
+ * Insert an entry to cover an entire range of indices from @start to
+ * @start + @n - 1.
+ *
+ * Range entries must be deleted with radix_tree_delete_range().
+ * Calling radix_tree_replace_slot() or __radix_tree_replace() may replace
+ * only part of the range; please write a radix_tree_replace_range() if
+ * you need this functionality.
+ * Tagging range entries may be confusing; setting a tag on one index
+ * may leave other indices within the range untagged, and clearing a tag
+ * on one index may leave other indices within the range still tagged.
+ * Iterating over range entries and gang lookups may return the same entry
+ * multiple times with different indices.
+ * Range entries may consume more memory than you would like; another kind
+ * of tree may be more suited to your needs.
+ *
+ * Return: 0 on success, -ENOMEM on memory allocation failure, -EEXISTS if
+ * another entry already exists in this range, -EINVAL if the parameters
+ * are nonsensical.
+ */
+int radix_tree_insert_range(struct radix_tree_root *root, unsigned long start,
+ unsigned long len, void *entry)
+{
+ unsigned long index = start;
+ unsigned long n, limit = start + len;
+ unsigned int shift, max, offset;
+ int error;
+ bool inserted = false;
+ struct radix_tree_node *node;
+ void **slot;
+
+ if (!len)
+ return -EINVAL;
+ if ((start + len) < start)
+ return -EINVAL;
+ if (radix_tree_is_internal_node(entry))
+ return -EINVAL;
+
+ max = __fls(start | len) + 1;
+ for (shift = 0; shift < max; shift += RADIX_TREE_MAP_SHIFT) {
+ offset = (index >> shift) & RADIX_TREE_MAP_MASK;
+ if (!offset)
+ continue;
+ n = RADIX_TREE_MAP_SIZE - offset;
+ if (n > ((limit - index) >> shift))
+ n = (limit - index) >> shift;
+ if (!n)
+ continue;
+
+ error = __radix_tree_create(root, index, shift, &node, &slot);
+ if (error)
+ goto error;
+ error = __insert_entries(node, slot, entry, offset, n, false);
+ if (error < 0)
+ goto error;
+ inserted = true;
+ index += (n << shift);
+ }
+
+ while (shift) {
+ shift -= RADIX_TREE_MAP_SHIFT;
+ n = ((limit - index) >> shift) & RADIX_TREE_MAP_MASK;
+ if (!n)
+ continue;
+ index += ((n - 1) << shift);
+ error = __radix_tree_create(root, index, shift, &node, &slot);
+ if (error)
+ goto error;
+ slot -= (n - 1);
+ error = __insert_entries(node, slot, entry, 0, n, false);
+ if (error < 0)
+ goto error;
+ inserted = true;
+ index += (1UL << shift);
+ }
+
+ return 0;
+
+ error:
+ if (inserted)
+ radix_tree_delete_range(root, start, len);
+ return error;
+}
+
+/**
+ * radix_tree_delete_range - delete an entry for multiple indices
+ * @root: radix tree root
+ * @start: First index
+ * @len: Number of indices covered
+ *
+ * Deletes a previously inserted entry that covers multiple indices
+ *
+ * Return: the deleted entry, or NULL if the entry was not present.
+ * May return ERR_PTR(-EINVAL) if the parameters are nonsensical.
+ */
+void *radix_tree_delete_range(struct radix_tree_root *root,
+ unsigned long start, unsigned long len)
+{
+ struct radix_tree_iter iter;
+ struct radix_tree_node *node;
+ void **slot, **this_slot, *entry = ERR_PTR(-EINVAL);
+
+ if (unlikely(!len))
+ return entry;
+ if (unlikely((start + len) < start))
+ return entry;
+
+ slot = radix_tree_iter_lookup(root, &iter, start);
+ entry = rcu_dereference_raw(*slot);
+ if (unlikely(!entry))
+ return entry;
+
+ do {
+ node = iter.node;
+ this_slot = slot;
+ slot = radix_tree_next_slot(slot, &iter,
+ RADIX_TREE_ITER_CONTIG);
+ if (!slot)
+ slot = radix_tree_next_chunk(root, &iter,
+ RADIX_TREE_ITER_CONTIG);
+ __radix_tree_delete(root, node, this_slot);
+ } while (slot && rcu_dereference_raw(*slot) == entry);
+
+ return entry;
+}
+#endif
+
/**
* idr_preload - preload for idr_alloc()
* @gfp_mask: allocation mask to use for preloading
diff --git a/tools/include/linux/log2.h b/tools/include/linux/log2.h
index 4144666..729b939 100644
--- a/tools/include/linux/log2.h
+++ b/tools/include/linux/log2.h
@@ -182,4 +182,20 @@ unsigned long __rounddown_pow_of_two(unsigned long n)
__rounddown_pow_of_two(n) \
)
+/**
+ * order_base_2 - calculate the (rounded up) base 2 order of the argument
+ * @n: parameter
+ *
+ * The first few values calculated by this routine:
+ * ob2(0) = 0
+ * ob2(1) = 0
+ * ob2(2) = 1
+ * ob2(3) = 2
+ * ob2(4) = 2
+ * ob2(5) = 3
+ * ... and so on.
+ */
+
+#define order_base_2(n) ilog2(roundup_pow_of_two(n))
+
#endif /* _TOOLS_LINUX_LOG2_H */
diff --git a/tools/testing/radix-tree/Makefile b/tools/testing/radix-tree/Makefile
index ecea846..72e293a 100644
--- a/tools/testing/radix-tree/Makefile
+++ b/tools/testing/radix-tree/Makefile
@@ -1,10 +1,11 @@
CFLAGS += -I. -I../../include -g -O2 -Wall -D_LGPL_SOURCE -fsanitize=address
LDFLAGS += -lpthread -lurcu
-TARGETS = main idr-test multiorder
+TARGETS = main idr-test multiorder range
CORE_OFILES := radix-tree.o idr.o linux.o test.o find_bit.o
OFILES = main.o $(CORE_OFILES) regression1.o regression2.o regression3.o \
- tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o
+ tag_check.o multiorder.o idr-test.o iteration_check.o benchmark.o \
+ range.o
ifdef BENCHMARK
CFLAGS += -DBENCHMARK=1
@@ -21,6 +22,9 @@ idr-test: idr-test.o $(CORE_OFILES)
multiorder: multiorder.o $(CORE_OFILES)
$(CC) $(CFLAGS) $(LDFLAGS) $^ -o multiorder
+range: range.o $(CORE_OFILES)
+ $(CC) $(CFLAGS) $(LDFLAGS) $^ -o range
+
clean:
$(RM) $(TARGETS) *.o radix-tree.c idr.c
diff --git a/tools/testing/radix-tree/main.c b/tools/testing/radix-tree/main.c
index b829127..64c960d 100644
--- a/tools/testing/radix-tree/main.c
+++ b/tools/testing/radix-tree/main.c
@@ -300,6 +300,10 @@ static void single_thread_tests(bool long_run)
rcu_barrier();
printv(2, "after multiorder_check: %d allocated, preempt %d\n",
nr_allocated, preempt_count);
+ range_checks();
+ rcu_barrier();
+ printv(2, "after range_check: %d allocated, preempt %d\n",
+ nr_allocated, preempt_count);
locate_check();
rcu_barrier();
printv(2, "after locate_check: %d allocated, preempt %d\n",
diff --git a/tools/testing/radix-tree/range.c b/tools/testing/radix-tree/range.c
new file mode 100644
index 0000000..cdc4cce
--- /dev/null
+++ b/tools/testing/radix-tree/range.c
@@ -0,0 +1,114 @@
+#include "test.h"
+
+int item_insert_range(struct radix_tree_root *root,
+ unsigned long start, unsigned long len)
+{
+ struct item *item = item_create(start, order_base_2(start + len));
+ int err = radix_tree_insert_range(root, start, len, item);
+ if (err)
+ free(item);
+ return err;
+}
+
+int item_remove_range(struct radix_tree_root *root,
+ unsigned long start, unsigned long len)
+{
+ struct item *item = radix_tree_delete_range(root, start, len);
+ if (item && !IS_ERR(item)) {
+ item_sanity(item, start);
+ free(item);
+ return 1;
+ }
+ return 0;
+}
+
+/* Check thoroughly around the inserted entries */
+void range_check_1(struct radix_tree_root *root,
+ unsigned long start, unsigned long len)
+{
+ unsigned long i;
+
+ assert(!item_insert_range(root, start, len));
+
+ for (i = 0; i < start; i++)
+ item_check_absent(root, i);
+ for (; i < start + len; i++)
+ item_check_present(root, i);
+ for (; i < start + 2 * len; i++)
+ item_check_absent(root, i);
+
+ assert(item_remove_range(root, start, len));
+ assert(radix_tree_empty(root));
+}
+
+/*
+ * For large values of start and len, range_check_1 takes too long; just
+ * check around the ends
+ */
+void range_check_2(struct radix_tree_root *root,
+ unsigned long start, unsigned long len)
+{
+ assert(!item_insert_range(root, start, len));
+
+ if (start > 0)
+ item_check_absent(root, start - 1);
+ item_check_present(root, start);
+ item_check_present(root, start + len - 1);
+ item_check_absent(root, start + len);
+
+ assert(item_remove_range(root, start, len));
+ assert(radix_tree_empty(root));
+}
+
+/*
+ * Check that we correctly fail to insert an entry over another one
+ */
+void range_check_3(struct radix_tree_root *root, unsigned long index)
+{
+ item_insert(root, index);
+ assert(item_insert_range(root, index, 1) == -EEXIST);
+ assert(item_insert_range(root, index, 2) == -EEXIST);
+ if (index > 0) {
+ assert(item_insert_range(root, index - 1, 2) == -EEXIST);
+ assert(item_insert_range(root, 0, index * 2) == -EEXIST);
+ assert(item_insert_range(root, 0, index + 1) == -EEXIST);
+ }
+ item_delete(root, index);
+ assert(radix_tree_empty(root));
+}
+
+void range_checks(void)
+{
+ RADIX_TREE(tree, GFP_KERNEL);
+ unsigned long start, len;
+
+ assert(item_insert_range(&tree, 0, 0) == -EINVAL);
+ assert(item_insert_range(&tree, 1, 0) == -EINVAL);
+ assert(item_insert_range(&tree, ~0UL, 5) == -EINVAL);
+
+ for (start = 0; start < 100; start++) {
+ for (len = 1; len < 100; len++) {
+ range_check_1(&tree, start, len);
+ }
+ }
+
+ range_check_2(&tree, 0, 1UL << 35);
+ range_check_2(&tree, 0, 1UL << 63);
+
+ range_check_3(&tree, 0);
+ range_check_3(&tree, 5);
+ range_check_3(&tree, 256);
+}
+
+int __weak main(void)
+{
+ radix_tree_init();
+ rcu_init();
+
+ range_checks();
+
+ rcu_barrier();
+ if (nr_allocated)
+ printf("nr_allocated = %d\n", nr_allocated);
+ return 0;
+}
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 4c42c6b..3b6f1b9 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -13,6 +13,7 @@ int __item_insert(struct radix_tree_root *root, struct item *item);
int item_insert(struct radix_tree_root *root, unsigned long index);
int item_insert_order(struct radix_tree_root *root, unsigned long index,
unsigned order);
+void item_sanity(struct item *, unsigned long index);
int item_delete(struct radix_tree_root *root, unsigned long index);
struct item *item_lookup(struct radix_tree_root *root, unsigned long index);
@@ -32,6 +33,7 @@ unsigned long find_item(struct radix_tree_root *, void *item);
void tag_check(void);
void multiorder_checks(void);
+void range_checks(void);
void iteration_test(unsigned order, unsigned duration);
void benchmark(void);
void idr_checks(void);