[PATCH 7/8] perf/core: Introduce perf_prepare_header()

From: Namhyung Kim
Date: Wed Jan 18 2023 - 01:49:18 EST


Factor out perf_prepare_header() so that it can call
perf_prepare_sample() without a header if not needed.

Also it checks the filtered_sample_type to avoid duplicate
work when perf_prepare_sample() is called twice (or more).

Cc: linux-s390@xxxxxxxxxxxxxxx
Cc: x86@xxxxxxxxxx
Suggested-by: Peter Zijlstr <peterz@xxxxxxxxxxxxx>
Acked-by: Jiri Olsa <jolsa@xxxxxxxxxx>
Acked-by: Song Liu <song@xxxxxxxxxx>
Tested-by: Jiri Olsa <jolsa@xxxxxxxxxx>
Signed-off-by: Namhyung Kim <namhyung@xxxxxxxxxx>
---
arch/s390/kernel/perf_cpum_sf.c | 3 ++-
arch/x86/events/intel/ds.c | 3 ++-
include/linux/perf_event.h | 16 +++++++++++++-
kernel/events/core.c | 38 +++++++++++++++++++++------------
4 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/arch/s390/kernel/perf_cpum_sf.c b/arch/s390/kernel/perf_cpum_sf.c
index 332a49965130..fd02f8423243 100644
--- a/arch/s390/kernel/perf_cpum_sf.c
+++ b/arch/s390/kernel/perf_cpum_sf.c
@@ -671,7 +671,8 @@ static void cpumsf_output_event_pid(struct perf_event *event,
/* Protect callchain buffers, tasks */
rcu_read_lock();

- perf_prepare_sample(&header, data, event, regs);
+ perf_prepare_sample(data, event, regs);
+ perf_prepare_header(&header, data, event, regs);
if (perf_output_begin(&handle, data, event, header.size))
goto out;

diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
index 07c8a2cdc3ee..183efa914b99 100644
--- a/arch/x86/events/intel/ds.c
+++ b/arch/x86/events/intel/ds.c
@@ -807,7 +807,8 @@ int intel_pmu_drain_bts_buffer(void)
* the sample.
*/
rcu_read_lock();
- perf_prepare_sample(&header, &data, event, &regs);
+ perf_prepare_sample(&data, event, &regs);
+ perf_prepare_header(&header, &data, event, &regs);

if (perf_output_begin(&handle, &data, event,
header.size * (top - base - skip)))
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 7db0e9cc2682..d5628a7b5eaa 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1250,6 +1250,17 @@ static inline void perf_sample_save_brstack(struct perf_sample_data *data,
data->sample_flags |= PERF_SAMPLE_BRANCH_STACK;
}

+static inline u32 perf_sample_data_size(struct perf_sample_data *data,
+ struct perf_event *event)
+{
+ u32 size = sizeof(struct perf_event_header);
+
+ size += event->header_size + event->id_header_size;
+ size += data->dyn_size;
+
+ return size;
+}
+
/*
* Clear all bitfields in the perf_branch_entry.
* The to and from fields are not cleared because they are
@@ -1271,7 +1282,10 @@ extern void perf_output_sample(struct perf_output_handle *handle,
struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event);
-extern void perf_prepare_sample(struct perf_event_header *header,
+extern void perf_prepare_sample(struct perf_sample_data *data,
+ struct perf_event *event,
+ struct pt_regs *regs);
+extern void perf_prepare_header(struct perf_event_header *header,
struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs);
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 9cc55122188f..73c40ce84c48 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -7575,20 +7575,13 @@ static __always_inline u64 __cond_set(u64 flags, u64 s, u64 d)
return d * !!(flags & s);
}

-void perf_prepare_sample(struct perf_event_header *header,
- struct perf_sample_data *data,
+void perf_prepare_sample(struct perf_sample_data *data,
struct perf_event *event,
struct pt_regs *regs)
{
u64 sample_type = event->attr.sample_type;
u64 filtered_sample_type;

- header->type = PERF_RECORD_SAMPLE;
- header->size = sizeof(*header) + event->header_size + event->id_header_size;
-
- header->misc = 0;
- header->misc |= perf_misc_flags(regs);
-
/*
* Add the sample flags that are dependent to others. And clear the
* sample flags that have already been done by the PMU driver.
@@ -7602,6 +7595,12 @@ void perf_prepare_sample(struct perf_event_header *header,
PERF_SAMPLE_REGS_USER);
filtered_sample_type &= ~data->sample_flags;

+ if (filtered_sample_type == 0) {
+ /* Make sure it has the correct data->type for output */
+ data->type = event->attr.sample_type;
+ return;
+ }
+
__perf_event_header__init_id(data, event, filtered_sample_type);

if (filtered_sample_type & PERF_SAMPLE_IP) {
@@ -7653,9 +7652,10 @@ void perf_prepare_sample(struct perf_event_header *header,
* up the rest of the sample size.
*/
u16 stack_size = event->attr.sample_stack_user;
+ u16 header_size = perf_sample_data_size(data, event);
u16 size = sizeof(u64);

- stack_size = perf_sample_ustack_size(stack_size, header->size,
+ stack_size = perf_sample_ustack_size(stack_size, header_size,
data->regs_user.regs);

/*
@@ -7740,8 +7740,9 @@ void perf_prepare_sample(struct perf_event_header *header,

if (filtered_sample_type & PERF_SAMPLE_AUX) {
u64 size;
+ u16 header_size = perf_sample_data_size(data, event);

- header->size += sizeof(u64); /* size */
+ header_size += sizeof(u64); /* size */

/*
* Given the 16bit nature of header::size, an AUX sample can
@@ -7749,17 +7750,25 @@ void perf_prepare_sample(struct perf_event_header *header,
* Make sure this doesn't happen by using up to U16_MAX bytes
* per sample in total (rounded down to 8 byte boundary).
*/
- size = min_t(size_t, U16_MAX - header->size,
+ size = min_t(size_t, U16_MAX - header_size,
event->attr.aux_sample_size);
size = rounddown(size, 8);
size = perf_prepare_sample_aux(event, data, size);

- WARN_ON_ONCE(size + header->size > U16_MAX);
+ WARN_ON_ONCE(size + header_size > U16_MAX);
data->dyn_size += size + sizeof(u64); /* size above */
data->sample_flags |= PERF_SAMPLE_AUX;
}
+}

- header->size += data->dyn_size;
+void perf_prepare_header(struct perf_event_header *header,
+ struct perf_sample_data *data,
+ struct perf_event *event,
+ struct pt_regs *regs)
+{
+ header->type = PERF_RECORD_SAMPLE;
+ header->size = perf_sample_data_size(data, event);
+ header->misc = perf_misc_flags(regs);

/*
* If you're adding more sample types here, you likely need to do
@@ -7788,7 +7797,8 @@ __perf_event_output(struct perf_event *event,
/* protect the callchain buffers */
rcu_read_lock();

- perf_prepare_sample(&header, data, event, regs);
+ perf_prepare_sample(data, event, regs);
+ perf_prepare_header(&header, data, event, regs);

err = output_begin(&handle, data, event, header.size);
if (err)
--
2.39.0.314.g84b9a713c41-goog