[PATCH v6 07/12] perf/x86: no counter allocation support

From: Wei Wang
Date: Thu Jun 06 2019 - 03:49:25 EST


In some cases, an event may be created without needing a counter
allocation. For example, an lbr event may be created by the host
only to help save/restore the lbr stack on the vCPU context switching.

This patch adds a new interface to allow users to create a perf event
without the need of counter assignment.

Signed-off-by: Wei Wang <wei.w.wang@xxxxxxxxx>
Cc: Andi Kleen <ak@xxxxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
---
arch/x86/events/core.c | 12 ++++++++++++
include/linux/perf_event.h | 13 +++++++++++++
kernel/events/core.c | 37 +++++++++++++++++++++++++------------
3 files changed, 50 insertions(+), 12 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index f315425..eebbd65 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -410,6 +410,9 @@ int x86_setup_perfctr(struct perf_event *event)
struct hw_perf_event *hwc = &event->hw;
u64 config;

+ if (is_no_counter_event(event))
+ return 0;
+
if (!is_sampling_event(event)) {
hwc->sample_period = x86_pmu.max_period;
hwc->last_period = hwc->sample_period;
@@ -1248,6 +1251,12 @@ static int x86_pmu_add(struct perf_event *event, int flags)
hwc = &event->hw;

n0 = cpuc->n_events;
+
+ if (is_no_counter_event(event)) {
+ n = n0;
+ goto done_collect;
+ }
+
ret = n = collect_events(cpuc, event, false);
if (ret < 0)
goto out;
@@ -1422,6 +1431,9 @@ static void x86_pmu_del(struct perf_event *event, int flags)
if (cpuc->txn_flags & PERF_PMU_TXN_ADD)
goto do_del;

+ if (is_no_counter_event(event))
+ goto do_del;
+
/*
* Not a TXN, therefore cleanup properly.
*/
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 0ab99c7..19e6593 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -528,6 +528,7 @@ typedef void (*perf_overflow_handler_t)(struct perf_event *,
*/
#define PERF_EV_CAP_SOFTWARE BIT(0)
#define PERF_EV_CAP_READ_ACTIVE_PKG BIT(1)
+#define PERF_EV_CAP_NO_COUNTER BIT(2)

#define SWEVENT_HLIST_BITS 8
#define SWEVENT_HLIST_SIZE (1 << SWEVENT_HLIST_BITS)
@@ -895,6 +896,13 @@ extern int perf_event_refresh(struct perf_event *event, int refresh);
extern void perf_event_update_userpage(struct perf_event *event);
extern int perf_event_release_kernel(struct perf_event *event);
extern struct perf_event *
+perf_event_create(struct perf_event_attr *attr,
+ int cpu,
+ struct task_struct *task,
+ perf_overflow_handler_t overflow_handler,
+ void *context,
+ bool counter_assignment);
+extern struct perf_event *
perf_event_create_kernel_counter(struct perf_event_attr *attr,
int cpu,
struct task_struct *task,
@@ -1032,6 +1040,11 @@ static inline bool is_sampling_event(struct perf_event *event)
return event->attr.sample_period != 0;
}

+static inline bool is_no_counter_event(struct perf_event *event)
+{
+ return !!(event->event_caps & PERF_EV_CAP_NO_COUNTER);
+}
+
/*
* Return 1 for a software event, 0 for a hardware event
*/
diff --git a/kernel/events/core.c b/kernel/events/core.c
index abbd4b3..70884df 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -11162,18 +11162,10 @@ SYSCALL_DEFINE5(perf_event_open,
return err;
}

-/**
- * perf_event_create_kernel_counter
- *
- * @attr: attributes of the counter to create
- * @cpu: cpu in which the counter is bound
- * @task: task to profile (NULL for percpu)
- */
-struct perf_event *
-perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
- struct task_struct *task,
- perf_overflow_handler_t overflow_handler,
- void *context)
+struct perf_event *perf_event_create(struct perf_event_attr *attr, int cpu,
+ struct task_struct *task,
+ perf_overflow_handler_t overflow_handler,
+ void *context, bool need_counter)
{
struct perf_event_context *ctx;
struct perf_event *event;
@@ -11193,6 +11185,9 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
/* Mark owner so we could distinguish it from user events. */
event->owner = TASK_TOMBSTONE;

+ if (!need_counter)
+ event->event_caps |= PERF_EV_CAP_NO_COUNTER;
+
ctx = find_get_context(event->pmu, task, event);
if (IS_ERR(ctx)) {
err = PTR_ERR(ctx);
@@ -11241,6 +11236,24 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
err:
return ERR_PTR(err);
}
+EXPORT_SYMBOL_GPL(perf_event_create);
+
+/**
+ * perf_event_create_kernel_counter
+ *
+ * @attr: attributes of the counter to create
+ * @cpu: cpu in which the counter is bound
+ * @task: task to profile (NULL for percpu)
+ */
+struct perf_event *
+perf_event_create_kernel_counter(struct perf_event_attr *attr, int cpu,
+ struct task_struct *task,
+ perf_overflow_handler_t overflow_handler,
+ void *context)
+{
+ return perf_event_create(attr, cpu, task, overflow_handler,
+ context, true);
+}
EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);

void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
--
2.7.4