[HMM v13 11/18] mm/hmm/mirror: add range monitor helper, to monitor CPU page table update

From: JÃrÃme Glisse
Date: Fri Nov 18 2016 - 12:20:12 EST


Complement the hmm_vma_range_lock/unlock() mechanism with a range monitor that do
not block CPU page table invalidation and thus do not garanty forward progress. It
is still usefull as in many situations concurrent CPU page table update and CPU
snapshot are taking place in different region of the virtual address space.

Signed-off-by: JÃrÃme Glisse <jglisse@xxxxxxxxxx>
Signed-off-by: Jatin Kumar <jakumar@xxxxxxxxxx>
Signed-off-by: John Hubbard <jhubbard@xxxxxxxxxx>
Signed-off-by: Mark Hairgrove <mhairgrove@xxxxxxxxxx>
Signed-off-by: Sherry Cheung <SCheung@xxxxxxxxxx>
Signed-off-by: Subhash Gutti <sgutti@xxxxxxxxxx>
---
include/linux/hmm.h | 18 ++++++++++
mm/hmm.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
2 files changed, 112 insertions(+), 1 deletion(-)

diff --git a/include/linux/hmm.h b/include/linux/hmm.h
index c0b1c07..6571647 100644
--- a/include/linux/hmm.h
+++ b/include/linux/hmm.h
@@ -254,6 +254,24 @@ int hmm_vma_range_lock(struct hmm_range *range,
void hmm_vma_range_unlock(struct hmm_range *range);


+/*
+ * Monitoring a range allow to track any CPU page table modification that can
+ * affect the range. It complements the hmm_vma_range_lock/unlock() mechanism
+ * as a non blocking method for synchronizing device page table with the CPU
+ * page table. See functions description in mm/hmm.c for documentation.
+ *
+ * NOTE AFTER A CALL TO hmm_vma_range_monitor_start() THAT RETURNED TRUE YOU
+ * MUST MAKE A CALL TO hmm_vma_range_monitor_end() BEFORE FREEING THE RANGE
+ * STRUCT OR BAD THING WILL HAPPEN !
+ */
+bool hmm_vma_range_monitor_start(struct hmm_range *range,
+ struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ bool wait);
+bool hmm_vma_range_monitor_end(struct hmm_range *range);
+
+
/* Below are for HMM internal use only ! Not to be use by device driver ! */
void hmm_mm_destroy(struct mm_struct *mm);

diff --git a/mm/hmm.c b/mm/hmm.c
index ee05419..746eb96 100644
--- a/mm/hmm.c
+++ b/mm/hmm.c
@@ -40,6 +40,7 @@ struct hmm {
spinlock_t lock;
struct list_head ranges;
struct list_head mirrors;
+ struct list_head monitors;
atomic_t sequence;
wait_queue_head_t wait_queue;
struct mmu_notifier mmu_notifier;
@@ -65,6 +66,7 @@ static struct hmm *hmm_register(struct mm_struct *mm)
return NULL;
init_waitqueue_head(&hmm->wait_queue);
atomic_set(&hmm->notifier_count, 0);
+ INIT_LIST_HEAD(&hmm->monitors);
INIT_LIST_HEAD(&hmm->mirrors);
atomic_set(&hmm->sequence, 0);
hmm->mmu_notifier.ops = NULL;
@@ -112,7 +114,7 @@ static void hmm_invalidate_range(struct hmm *hmm,
unsigned long start,
unsigned long end)
{
- struct hmm_range range, *tmp;
+ struct hmm_range range, *tmp, *next;
struct hmm_mirror *mirror;

/*
@@ -127,6 +129,13 @@ static void hmm_invalidate_range(struct hmm *hmm,
range.hmm = hmm;

spin_lock(&hmm->lock);
+ /* Remove any range monitors */
+ list_for_each_entry_safe (tmp, next, &hmm->monitors, list) {
+ if (range.start >= tmp->end || range.end <= tmp->start)
+ continue;
+ /* This range is no longer valid */
+ list_del_init(&tmp->list);
+ }
list_for_each_entry (tmp, &hmm->ranges, list) {
if (range.start >= tmp->end || range.end <= tmp->start)
continue;
@@ -361,3 +370,87 @@ void hmm_vma_range_unlock(struct hmm_range *range)
wake_up(&hmm->wait_queue);
}
EXPORT_SYMBOL(hmm_vma_range_unlock);
+
+
+/*
+ * hmm_vma_range_monitor_start() - start monitoring of a range
+ * @range: pointer to hmm_range struct use to monitor
+ * @vma: virtual memory area for the range
+ * @start: start address of the range to monitor (inclusive)
+ * @end: end address of the range to monitor (exclusive)
+ * @wait: wait for any pending CPU page table to finish
+ * Returns: false if there is pendding CPU page table update, true otherwise
+ *
+ * The use pattern of this function is :
+ * retry:
+ * hmm_vma_range_monitor_start(range, vma, start, end, true);
+ * // Do something that rely on stable CPU page table content but do not
+ * // Prepare device page table update transaction
+ * ...
+ * // Take device driver lock that serialize device page table update
+ * driver_lock_device_page_table_update();
+ * if (!hmm_vma_range_monitor_end(range)) {
+ * driver_unlock_device_page_table_update();
+ * // Abort transaction you just build and cleanup anything that need
+ * // to be. Same comment as above, about avoiding busy loop.
+ * goto retry;
+ * }
+ * // Commit device page table update
+ * driver_unlock_device_page_table_update();
+ */
+bool hmm_vma_range_monitor_start(struct hmm_range *range,
+ struct vm_area_struct *vma,
+ unsigned long start,
+ unsigned long end,
+ bool wait)
+{
+ BUG_ON(!vma);
+ BUG_ON(!range);
+
+ INIT_LIST_HEAD(&range->list);
+ range->hmm = hmm_register(vma->vm_mm);
+ if (!range->hmm)
+ return false;
+
+again:
+ spin_lock(&range->hmm->lock);
+ if (atomic_read(&range->hmm->notifier_count)) {
+ spin_unlock(&range->hmm->lock);
+ if (!wait)
+ return false;
+ /*
+ * FIXME: Wait for all active mmu_notifier this is because we
+ * can no keep an hmm_range struct around while waiting for
+ * range invalidation to finish. Need to update mmu_notifier
+ * to make this doable.
+ */
+ wait_event(range->hmm->wait_queue,
+ !atomic_read(&range->hmm->notifier_count));
+ goto again;
+ }
+ list_add_tail(&range->list, &range->hmm->monitors);
+ spin_unlock(&range->hmm->lock);
+ return true;
+}
+EXPORT_SYMBOL(hmm_vma_range_monitor_start);
+
+/*
+ * hmm_vma_range_monitor_end() - end monitoring of a range
+ * @range: range that was being monitored
+ * Returns: true if no invalidation since hmm_vma_range_monitor_start()
+ */
+bool hmm_vma_range_monitor_end(struct hmm_range *range)
+{
+ bool valid;
+
+ if (!range->hmm || list_empty(&range->list))
+ return false;
+
+ spin_lock(&range->hmm->lock);
+ valid = !list_empty(&range->list);
+ list_del_init(&range->list);
+ spin_unlock(&range->hmm->lock);
+
+ return valid;
+}
+EXPORT_SYMBOL(hmm_vma_range_monitor_end);
--
2.4.3