[PATCH] radix-tree: support locking of individual exception entries.

From: NeilBrown
Date: Sun Feb 28 2016 - 00:09:29 EST


The least significant bit of an exception entry is used as a lock flag.
A caller can:
- create a locked entry by simply adding an entry with this flag set
- lock an existing entry with radix_tree_lookup_lock(). This may return
NULL if the entry doesn't exists, or was deleted while waiting for
the lock. It may return a non-exception entry if that is what is
found. If it returns a locked entry then it has exclusive rights
to delete the entry.
- unlock an entry that is already locked. This will wake any waiters.

These must all be called with the radix tree locked (i.e. a spinlock held).
That spinlock is passed to radix_tree_lookup_lock() so that it can drop
the lock while waiting.

This is a "demonstration of concept". I haven't actually tested, only compiled.
A possible use case is for the exception entries used by DAX.

It is possible that some of the lookups can be optimised away in some
cases by storing a slot pointer. I wanted to keep it reasonable
simple until it was determined if it might be useful.

Signed-off-by: NeilBrown <neilb@xxxxxxxx>
Signed-off-by: Jan Kara <jack@xxxxxxx>
---
include/linux/radix-tree.h | 5 +++
lib/radix-tree.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 88 insertions(+)

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index 450c12b546b7..92138d30b95d 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -308,6 +308,11 @@ unsigned long radix_tree_range_tag_if_tagged(struct radix_tree_root *root,
int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag);
unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item);

+void *radix_tree_lookup_lock(struct radix_tree_root *root, unsigned long index,
+ wait_queue_head_t *wq, spinlock_t *lock);
+void radix_tree_unlock(struct radix_tree_root *root, unsigned long index,
+ wait_queue_head_t *wq);
+
static inline void radix_tree_preload_end(void)
{
preempt_enable();
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 37d4643ab5c0..a97231ab66f0 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -1500,3 +1500,86 @@ void __init radix_tree_init(void)
radix_tree_init_maxindex();
hotcpu_notifier(radix_tree_callback, 0);
}
+
+/*
+ * Exception entry locking
+ */
+static inline int slot_locked(void **v)
+{
+ unsigned long l = *(unsigned long *)v;
+ return l & 1;
+}
+
+static inline void *lock_slot(void **v)
+{
+ unsigned long *l = (unsigned long *)v;
+ return (void*)(*l |= 1);
+}
+
+static inline void * unlock_slot(void **v)
+{
+ unsigned long *l = (unsigned long *)v;
+ return (void*)(*l &= ~1UL);
+}
+
+/**
+ * radix_tree_lookup_lock - lookup and lock exceptional entry if found
+ * @root: radix tree root
+ * @index: index key
+ * @wq: waitqueue to wait for exceptional entry lock
+ * @lock: spinlock protecting the radix tree
+ *
+ * Lookup @index in the radix tree @root and if there is an exceptional
+ * entry at that location, lock it. If entry at @index is not exceptional,
+ * we just return it. We use @wq as a wait queue to wait for exceptional
+ * entry lock to be released. @lock is a spinlock protecting the radix
+ * tree which we assume is locked when entering this function and released
+ * while waiting for the exceptional entry lock.
+ */
+void *radix_tree_lookup_lock(struct radix_tree_root *root, unsigned long index,
+ wait_queue_head_t *wq, spinlock_t *lock)
+{
+ void *ret, **slot;
+ DEFINE_WAIT(wait);
+
+ for (;;) {
+ ret = __radix_tree_lookup(root, index, NULL, &slot);
+ if (!ret || !radix_tree_exceptional_entry(ret))
+ return ret;
+ if (!slot_locked(slot))
+ return lock_slot(slot);
+ prepare_to_wait(wq, &wait, TASK_UNINTERRUPTIBLE);
+ spin_unlock(lock);
+ schedule();
+ finish_wait(wq, &wait);
+ spin_lock(lock);
+ }
+}
+EXPORT_SYMBOL(radix_tree_lookup_lock);
+
+/**
+ * radix_tree_unlock - unlock exceptional radix tree entry
+ * @root: radix tree root
+ * @index: index key
+ * @wq: waitqueue to wake waiters for exceptional entry lock
+ *
+ * Unlock exceptional entry at @index in a radix tree @root and wake up
+ * waiters for it waiting in wait queue @wq. We expect the radix tree is
+ * locked against concurrent modifications.
+ */
+void radix_tree_unlock(struct radix_tree_root *root, unsigned long index,
+ wait_queue_head_t *wq)
+{
+ void *ret, **slot;
+
+ ret = __radix_tree_lookup(root, index, NULL, &slot);
+ if (WARN_ON_ONCE(!ret || !radix_tree_exceptional_entry(ret)))
+ return;
+ if (WARN_ON_ONCE(!slot_locked(slot)))
+ return;
+ unlock_slot(slot);
+
+ if (waitqueue_active(wq))
+ wake_up(wq);
+}
+EXPORT_SYMBOL(radix_tree_unlock);
--
2.6.2


--0OAP2g/MAC+5xKAE
Content-Type: text/x-patch; charset=us-ascii
Content-Disposition: attachment; filename="0001-radix-tree-Avoid-false-wakeups-when-waiting-for-exce.patch"