Re: [PATCH v4] kretprobe: percpu support

From: Masami Hiramatsu
Date: Thu Mar 05 2020 - 01:42:47 EST


Hi Luigi,

On Fri, 21 Feb 2020 13:16:57 -0800
Luigi Rizzo <lrizzo@xxxxxxxxxx> wrote:

> kretprobe uses a list protected by a single lock to allocate a
> kretprobe_instance in pre_handler_kretprobe(). This works poorly with
> concurrent calls.
>
> This patch offers a simplified fix: the percpu_instance flag indicates
> that kretprobe_instance's are per CPU, which makes allocation lock free.

I concern that your new percpu_instance flag silently hides the
maxactive parameter. At least we should return an error if maxactive
is some positive value but percpu_instance is true. Or, define
KRETPROBE_PERCPU_MAXACTIVE (< 0) and use it instead of new bool flag.

> As part of this patch, we factor out the code to allocate instances in
> get_pcpu_rp_instance() and get_rp_instance().
>
> At the moment we only allow one pending kretprobe per CPU. This can be
> extended to a small constant number of entries, but finding a free entry
> would either bring back the lock, or require scanning an array, which can
> be expensive (especially if callers block and migrate).

I think if you disables irq while scanning an array (that should be
a small array), you don't need to afraid of such racing (maybe we need
a pair of memory barriers).

Anyway, my concern is about introducing a new knob which conflict with
another knob. Implementation is good to me.

Thank you,


>
> Signed-off-by: Luigi Rizzo <lrizzo@xxxxxxxxxx>
> ---
> include/linux/kprobes.h | 8 +++-
> kernel/kprobes.c | 102 +++++++++++++++++++++++++++++++---------
> kernel/test_kprobes.c | 20 ++++++--
> 3 files changed, 101 insertions(+), 29 deletions(-)
>
> diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
> index 04bdaf01112cb..c5894f4c5c26c 100644
> --- a/include/linux/kprobes.h
> +++ b/include/linux/kprobes.h
> @@ -142,6 +142,8 @@ static inline int kprobe_ftrace(struct kprobe *p)
> * can be active concurrently.
> * nmissed - tracks the number of times the probed function's return was
> * ignored, due to maxactive being too low.
> + * percpu_instance - if set, uses one instance per cpu instead of allocating
> + * from the list protected by lock.
> *
> */
> struct kretprobe {
> @@ -151,8 +153,12 @@ struct kretprobe {
> int maxactive;
> int nmissed;
> size_t data_size;
> - struct hlist_head free_instances;
> + union {
> + struct kretprobe_instance __percpu *pcpu;
> + struct hlist_head free_instances;
> + };
> raw_spinlock_t lock;
> + bool percpu_instance;
> };
>
> struct kretprobe_instance {
> diff --git a/kernel/kprobes.c b/kernel/kprobes.c
> index 2625c241ac00f..46018a4a76178 100644
> --- a/kernel/kprobes.c
> +++ b/kernel/kprobes.c
> @@ -1184,6 +1184,10 @@ void recycle_rp_inst(struct kretprobe_instance *ri,
> hlist_del(&ri->hlist);
> INIT_HLIST_NODE(&ri->hlist);
> if (likely(rp)) {
> + if (rp->percpu_instance) {
> + ri->rp = NULL;
> + return;
> + }
> raw_spin_lock(&rp->lock);
> hlist_add_head(&ri->hlist, &rp->free_instances);
> raw_spin_unlock(&rp->lock);
> @@ -1274,6 +1278,11 @@ static inline void free_rp_inst(struct kretprobe *rp)
> struct kretprobe_instance *ri;
> struct hlist_node *next;
>
> + if (rp->percpu_instance) {
> + free_percpu(rp->pcpu);
> + return;
> + }
> +
> hlist_for_each_entry_safe(ri, next, &rp->free_instances, hlist) {
> hlist_del(&ri->hlist);
> kfree(ri);
> @@ -1851,6 +1860,46 @@ unsigned long __weak arch_deref_entry_point(void *entry)
> }
>
> #ifdef CONFIG_KRETPROBES
> +struct kretprobe_instance *get_pcpu_rp_instance(struct kretprobe *rp)
> +{
> + struct kretprobe_instance *ri;
> + unsigned long flags;
> +
> + local_irq_save(flags);
> + ri = this_cpu_ptr(rp->pcpu);
> + if (!ri || ri->rp) { /* already in use */
> + local_irq_restore(flags);
> + rp->nmissed++;
> + return NULL;
> + }
> + INIT_HLIST_NODE(&ri->hlist);
> + ri->rp = rp;
> + ri->task = current;
> + local_irq_restore(flags);
> + return ri;
> +}
> +
> +struct kretprobe_instance *get_rp_instance(struct kretprobe *rp)
> +{
> + struct kretprobe_instance *ri;
> + unsigned long flags;
> +
> + raw_spin_lock_irqsave(&rp->lock, flags);
> + if (hlist_empty(&rp->free_instances)) {
> + rp->nmissed++;
> + raw_spin_unlock_irqrestore(&rp->lock, flags);
> + return NULL;
> + }
> + ri = hlist_entry(rp->free_instances.first, struct kretprobe_instance,
> + hlist);
> + hlist_del(&ri->hlist);
> + raw_spin_unlock_irqrestore(&rp->lock, flags);
> +
> + ri->rp = rp;
> + ri->task = current;
> + return ri;
> +}
> +
> /*
> * This kprobe pre_handler is registered with every kretprobe. When probe
> * hits it will set up the return probe.
> @@ -1873,35 +1922,32 @@ static int pre_handler_kretprobe(struct kprobe *p, struct pt_regs *regs)
> }
>
> /* TODO: consider to only swap the RA after the last pre_handler fired */
> - hash = hash_ptr(current, KPROBE_HASH_BITS);
> - raw_spin_lock_irqsave(&rp->lock, flags);
> - if (!hlist_empty(&rp->free_instances)) {
> - ri = hlist_entry(rp->free_instances.first,
> - struct kretprobe_instance, hlist);
> - hlist_del(&ri->hlist);
> - raw_spin_unlock_irqrestore(&rp->lock, flags);
> -
> - ri->rp = rp;
> - ri->task = current;
> -
> - if (rp->entry_handler && rp->entry_handler(ri, regs)) {
> + if (rp->percpu_instance) {
> + ri = get_pcpu_rp_instance(rp);
> + } else {
> + ri = get_rp_instance(rp);
> + }
> + if (!ri)
> + return 0;
> + if (rp->entry_handler && rp->entry_handler(ri, regs)) {
> + if (rp->percpu_instance) {
> + ri->rp = NULL;
> + } else {
> raw_spin_lock_irqsave(&rp->lock, flags);
> hlist_add_head(&ri->hlist, &rp->free_instances);
> raw_spin_unlock_irqrestore(&rp->lock, flags);
> - return 0;
> }
> + return 0;
> + }
> + arch_prepare_kretprobe(ri, regs);
>
> - arch_prepare_kretprobe(ri, regs);
> + /* XXX(hch): why is there no hlist_move_head? */
> + INIT_HLIST_NODE(&ri->hlist);
> + hash = hash_ptr(current, KPROBE_HASH_BITS);
> + kretprobe_table_lock(hash, &flags);
> + hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
> + kretprobe_table_unlock(hash, &flags);
>
> - /* XXX(hch): why is there no hlist_move_head? */
> - INIT_HLIST_NODE(&ri->hlist);
> - kretprobe_table_lock(hash, &flags);
> - hlist_add_head(&ri->hlist, &kretprobe_inst_table[hash]);
> - kretprobe_table_unlock(hash, &flags);
> - } else {
> - rp->nmissed++;
> - raw_spin_unlock_irqrestore(&rp->lock, flags);
> - }
> return 0;
> }
> NOKPROBE_SYMBOL(pre_handler_kretprobe);
> @@ -1950,6 +1996,15 @@ int register_kretprobe(struct kretprobe *rp)
> rp->kp.post_handler = NULL;
> rp->kp.fault_handler = NULL;
>
> + if (rp->percpu_instance) {
> + rp->pcpu = __alloc_percpu(sizeof(*rp->pcpu) + rp->data_size,
> + __alignof__(*rp->pcpu));
> + if (rp->pcpu)
> + goto finalize;
> + free_rp_inst(rp);
> + return -ENOMEM;
> + }
> +
> /* Pre-allocate memory for max kretprobe instances */
> if (rp->maxactive <= 0) {
> #ifdef CONFIG_PREEMPTION
> @@ -1971,6 +2026,7 @@ int register_kretprobe(struct kretprobe *rp)
> hlist_add_head(&inst->hlist, &rp->free_instances);
> }
>
> +finalize:
> rp->nmissed = 0;
> /* Establish function entry probe point */
> ret = register_kprobe(&rp->kp);
> diff --git a/kernel/test_kprobes.c b/kernel/test_kprobes.c
> index 76c997fdbc9da..4ebe48301d743 100644
> --- a/kernel/test_kprobes.c
> +++ b/kernel/test_kprobes.c
> @@ -236,31 +236,36 @@ static struct kretprobe rp2 = {
> .kp.symbol_name = "kprobe_target2"
> };
>
> -static int test_kretprobes(void)
> +static int test_kretprobes(bool percpu_instance)
> {
> int ret;
> struct kretprobe *rps[2] = {&rp, &rp2};
> + const char *mode = percpu_instance ? "percpu " : "normal";
>
> /* addr and flags should be cleard for reusing kprobe. */
> rp.kp.addr = NULL;
> rp.kp.flags = 0;
> + rp.percpu_instance = percpu_instance;
> + rp2.kp.addr = NULL;
> + rp2.kp.flags = 0;
> + rp2.percpu_instance = percpu_instance;
> ret = register_kretprobes(rps, 2);
> if (ret < 0) {
> - pr_err("register_kretprobe returned %d\n", ret);
> + pr_err("register_kretprobe mode %s returned %d\n", mode, ret);
> return ret;
> }
>
> krph_val = 0;
> ret = target(rand1);
> if (krph_val != rand1) {
> - pr_err("kretprobe handler not called\n");
> + pr_err("kretprobe handler mode %s not called\n", mode);
> handler_errors++;
> }
>
> krph_val = 0;
> ret = target2(rand1);
> if (krph_val != rand1) {
> - pr_err("kretprobe handler2 not called\n");
> + pr_err("kretprobe handler2 mode %s not called\n", mode);
> handler_errors++;
> }
> unregister_kretprobes(rps, 2);
> @@ -297,7 +302,12 @@ int init_test_probes(void)
> errors++;
>
> num_tests++;
> - ret = test_kretprobes();
> + ret = test_kretprobes(false);
> + if (ret < 0)
> + errors++;
> +
> + num_tests++;
> + ret = test_kretprobes(true);
> if (ret < 0)
> errors++;
> #endif /* CONFIG_KRETPROBES */
> --
> 2.25.0.265.gbab2e86ba0-goog
>


--
Masami Hiramatsu <mhiramat@xxxxxxxxxx>