[RFT PATCH 10/13] kprobes: Make free_*insn_slot() mutex-less

From: Masami Hiramatsu
Date: Thu Jan 16 2020 - 09:46:05 EST


Rewrite kprobe_insn_cache implementation so that free_*insn_slot()
do not acquire kprobe_insn_cache->mutex. This allows us to call it
from call_rcu() callback function.

For this purpose, this introduces flip-flop dirty generation in
kprobe_insn_cache. When __free_insn_slot() is called, if a slot
is dirty (this means it can be in use even after RCU grace period),
it marks as "current-generation" dirty. The garbage collector
(kprobe_insn_cache_gc()) flips the generation bit, waits enough
safe period by synchronize_rcu_tasks(), and collect "previous-
generation" dirty slots. In the results, it collects the dirty
slots which was returned by __free_insn_slot() before the GC
starts waiting the period (and the dirty slots which is returned
while the safe period, will be marked as new-generation dirty.)
Since the GC is not concurrently running, we do not need more
than 2 generations. So it flips the generation bit instead of
counting it up.

Signed-off-by: Masami Hiramatsu <mhiramat@xxxxxxxxxx>
---
arch/powerpc/kernel/optprobes.c | 1
include/linux/kprobes.h | 2
kernel/kprobes.c | 172 ++++++++++++++++++++++-----------------
3 files changed, 96 insertions(+), 79 deletions(-)

diff --git a/arch/powerpc/kernel/optprobes.c b/arch/powerpc/kernel/optprobes.c
index 024f7aad1952..8304f3814515 100644
--- a/arch/powerpc/kernel/optprobes.c
+++ b/arch/powerpc/kernel/optprobes.c
@@ -53,7 +53,6 @@ struct kprobe_insn_cache kprobe_ppc_optinsn_slots = {
/* insn_size initialized later */
.alloc = __ppc_alloc_insn_page,
.free = __ppc_free_insn_page,
- .nr_garbage = 0,
};

/*
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 0f832817fca3..1cd53b7b8409 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -244,7 +244,7 @@ struct kprobe_insn_cache {
void (*free)(void *); /* free insn page */
struct list_head pages; /* list of kprobe_insn_page */
size_t insn_size; /* size of instruction slot */
- int nr_garbage;
+ int generation; /* dirty generation */
struct work_struct work;
};

diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 60ffc9d54d87..5c12eb7fa8e1 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -90,8 +90,7 @@ struct kprobe_insn_page {
struct rcu_head rcu;
kprobe_opcode_t *insns; /* Page of instruction slots */
struct kprobe_insn_cache *cache;
- int nused;
- int ngarbage;
+ atomic_t nused;
char slot_used[];
};

@@ -106,8 +105,9 @@ static int slots_per_page(struct kprobe_insn_cache *c)

enum kprobe_slot_state {
SLOT_CLEAN = 0,
- SLOT_DIRTY = 1,
- SLOT_USED = 2,
+ SLOT_USED = 1,
+ SLOT_DIRTY0 = 2,
+ SLOT_DIRTY1 = 3,
};

void __weak *alloc_insn_page(void)
@@ -126,7 +126,6 @@ struct kprobe_insn_cache kprobe_insn_slots = {
.free = free_insn_page,
.pages = LIST_HEAD_INIT(kprobe_insn_slots.pages),
.insn_size = MAX_INSN_SIZE,
- .nr_garbage = 0,
.work = __WORK_INITIALIZER(kprobe_insn_slots.work,
kprobe_insn_cache_gc),
};
@@ -137,6 +136,22 @@ static void kick_kprobe_insn_cache_gc(struct kprobe_insn_cache *c)
schedule_work(&c->work);
}

+static void *try_to_get_clean_slot(struct kprobe_insn_page *kip)
+{
+ struct kprobe_insn_cache *c = kip->cache;
+ int i;
+
+ for (i = 0; i < slots_per_page(c); i++) {
+ if (kip->slot_used[i] == SLOT_CLEAN) {
+ kip->slot_used[i] = SLOT_USED;
+ atomic_inc(&kip->nused);
+ return kip->insns + (i * c->insn_size);
+ }
+ }
+
+ return NULL;
+}
+
/**
* __get_insn_slot() - Find a slot on an executable page for an instruction.
* We allocate an executable page if there's no room on existing ones.
@@ -145,25 +160,20 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
{
struct kprobe_insn_page *kip;
kprobe_opcode_t *slot = NULL;
+ bool reclaimable = false;

/* Since the slot array is not protected by rcu, we need a mutex */
mutex_lock(&c->mutex);
list_for_each_entry(kip, &c->pages, list) {
- if (kip->nused < slots_per_page(c)) {
- int i;
- for (i = 0; i < slots_per_page(c); i++) {
- if (kip->slot_used[i] == SLOT_CLEAN) {
- kip->slot_used[i] = SLOT_USED;
- kip->nused++;
- slot = kip->insns + (i * c->insn_size);
- goto out;
- }
- }
- /* kip->nused is broken. Fix it. */
- kip->nused = slots_per_page(c);
- WARN_ON(1);
+ if (atomic_read(&kip->nused) < slots_per_page(c)) {
+ slot = try_to_get_clean_slot(kip);
+ if (slot)
+ goto out;
+ reclaimable = true;
}
}
+ if (reclaimable)
+ kick_kprobe_insn_cache_gc(c);

/* All out of space. Need to allocate a new page. */
kip = kmalloc(KPROBE_INSN_PAGE_SIZE(slots_per_page(c)), GFP_KERNEL);
@@ -183,8 +193,7 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
INIT_LIST_HEAD(&kip->list);
memset(kip->slot_used, SLOT_CLEAN, slots_per_page(c));
kip->slot_used[0] = SLOT_USED;
- kip->nused = 1;
- kip->ngarbage = 0;
+ atomic_set(&kip->nused, 1);
kip->cache = c;
list_add_rcu(&kip->list, &c->pages);
slot = kip->insns;
@@ -193,90 +202,106 @@ kprobe_opcode_t *__get_insn_slot(struct kprobe_insn_cache *c)
return slot;
}

-static void free_kprobe_insn_page(struct rcu_head *head)
+static void free_kprobe_insn_page_cb(struct rcu_head *head)
{
struct kprobe_insn_page *kip = container_of(head, typeof(*kip), rcu);

kfree(kip);
}

-/* Return 1 if all garbages are collected, otherwise 0. */
-static int collect_one_slot(struct kprobe_insn_page *kip, int idx)
+static void free_kprobe_insn_page(struct kprobe_insn_page *kip)
{
- kip->slot_used[idx] = SLOT_CLEAN;
- kip->nused--;
- if (kip->nused == 0) {
- /*
- * Page is no longer in use. Free it unless
- * it's the last one. We keep the last one
- * so as not to have to set it up again the
- * next time somebody inserts a probe.
- */
- if (!list_is_singular(&kip->list)) {
- list_del_rcu(&kip->list);
- kip->cache->free(kip->insns);
- call_rcu(&kip->rcu, free_kprobe_insn_page);
- }
- return 1;
+ if (WARN_ON_ONCE(atomic_read(&kip->nused) != 0))
+ return;
+ /*
+ * Page is no longer in use. Free it unless
+ * it's the last one. We keep the last one
+ * so as not to have to set it up again the
+ * next time somebody inserts a probe.
+ */
+ if (!list_is_singular(&kip->list)) {
+ list_del_rcu(&kip->list);
+ kip->cache->free(kip->insns);
+ call_rcu(&kip->rcu, free_kprobe_insn_page_cb);
}
- return 0;
}

void kprobe_insn_cache_gc(struct work_struct *work)
{
struct kprobe_insn_cache *c = container_of(work, typeof(*c), work);
struct kprobe_insn_page *kip, *next;
+ int dirtygen = c->generation ? SLOT_DIRTY1 : SLOT_DIRTY0;
+ int i, nr;

mutex_lock(&c->mutex);
+
+ c->generation ^= 1; /* flip generation (0->1, 1->0) */
+
+ /* Make sure the generation update is shown in __free_insn_slot() */
+ smp_wmb();
+
/* Ensure no-one is running on the garbages. */
synchronize_rcu_tasks();

list_for_each_entry_safe(kip, next, &c->pages, list) {
- int i;
- if (kip->ngarbage == 0)
- continue;
- kip->ngarbage = 0; /* we will collect all garbages */
+ nr = 0;
+ /* Reclaim previous generation dirty slots */
for (i = 0; i < slots_per_page(c); i++) {
- if (kip->slot_used[i] == SLOT_DIRTY &&
- collect_one_slot(kip, i))
- break;
+ if (kip->slot_used[i] == dirtygen)
+ kip->slot_used[i] = SLOT_CLEAN;
+ else if (kip->slot_used[i] != SLOT_CLEAN)
+ nr++;
}
+ if (!nr)
+ free_kprobe_insn_page(kip);
}
- c->nr_garbage = 0;
mutex_unlock(&c->mutex);
}

+static struct kprobe_insn_page *
+find_kprobe_insn_page(struct kprobe_insn_cache *c, unsigned long addr)
+{
+ struct kprobe_insn_page *kip;
+
+ list_for_each_entry_rcu(kip, &c->pages, list) {
+ if (addr >= (unsigned long)kip->insns &&
+ addr < (unsigned long)kip->insns + PAGE_SIZE)
+ return kip;
+ }
+ return NULL;
+}
+
void __free_insn_slot(struct kprobe_insn_cache *c,
kprobe_opcode_t *slot, int dirty)
{
struct kprobe_insn_page *kip;
+ int dirtygen;
long idx;

- mutex_lock(&c->mutex);
- list_for_each_entry(kip, &c->pages, list) {
+ rcu_read_lock();
+ kip = find_kprobe_insn_page(c, (unsigned long)slot);
+ if (kip) {
idx = ((long)slot - (long)kip->insns) /
(c->insn_size * sizeof(kprobe_opcode_t));
- if (idx >= 0 && idx < slots_per_page(c))
+ /* Check double free */
+ if (WARN_ON(kip->slot_used[idx] != SLOT_USED))
goto out;
- }
- /* Could not find this slot. */
- WARN_ON(1);
- kip = NULL;
+
+ /* Make sure to use new generation */
+ smp_rmb();
+
+ dirtygen = c->generation ? SLOT_DIRTY1 : SLOT_DIRTY0;
+ if (dirty)
+ kip->slot_used[idx] = dirtygen;
+ else
+ kip->slot_used[idx] = SLOT_CLEAN;
+
+ if (!atomic_dec_return(&kip->nused))
+ kick_kprobe_insn_cache_gc(c);
+ } else
+ WARN_ON(1); /* Not found: what happen? */
out:
- /* Mark and sweep: this may sleep */
- if (kip) {
- /* Check double free */
- WARN_ON(kip->slot_used[idx] != SLOT_USED);
- if (dirty) {
- kip->slot_used[idx] = SLOT_DIRTY;
- kip->ngarbage++;
- if (++c->nr_garbage > slots_per_page(c))
- kick_kprobe_insn_cache_gc(c);
- } else {
- collect_one_slot(kip, idx);
- }
- }
- mutex_unlock(&c->mutex);
+ rcu_read_unlock();
}

/*
@@ -286,17 +311,11 @@ void __free_insn_slot(struct kprobe_insn_cache *c,
*/
bool __is_insn_slot_addr(struct kprobe_insn_cache *c, unsigned long addr)
{
- struct kprobe_insn_page *kip;
bool ret = false;

rcu_read_lock();
- list_for_each_entry_rcu(kip, &c->pages, list) {
- if (addr >= (unsigned long)kip->insns &&
- addr < (unsigned long)kip->insns + PAGE_SIZE) {
- ret = true;
- break;
- }
- }
+ if (find_kprobe_insn_page(c, addr))
+ ret = true;
rcu_read_unlock();

return ret;
@@ -310,7 +329,6 @@ struct kprobe_insn_cache kprobe_optinsn_slots = {
.free = free_insn_page,
.pages = LIST_HEAD_INIT(kprobe_optinsn_slots.pages),
/* .insn_size is initialized later */
- .nr_garbage = 0,
.work = __WORK_INITIALIZER(kprobe_optinsn_slots.work,
kprobe_insn_cache_gc),
};