Re: [BUG] perf/x86/intel/pebs: PEBS timestamps overwritten

From: Liang, Kan
Date: Wed Aug 24 2022 - 15:15:00 EST




On 2022-08-24 5:27 a.m., Peter Zijlstra wrote:
>
> Should be 3 patches at the very least I think, the first one introducing
> the new field and then follow up patches making use of it.
>
> And yes as Ravi mentions there's the CALLCHAIN_EARLY hack that could be
> cleaned up as well, making it 4 or something.
>
> On Fri, Aug 05, 2022 at 09:36:37AM -0400, Liang, Kan wrote:
>
>> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
>> index 34be3bc5151a..a2c26eaeb0d9 100644
>> --- a/arch/x86/events/intel/ds.c
>> +++ b/arch/x86/events/intel/ds.c
>> @@ -1687,8 +1687,10 @@ static void setup_pebs_fixed_sample_data(struct
>> perf_event *event,
>> * We can only do this for the default trace clock.
>> */
>> if (x86_pmu.intel_cap.pebs_format >= 3 &&
>> - event->attr.use_clockid == 0)
>> + event->attr.use_clockid == 0) {
>
> Indent fail; please add: 'set cino=(0:0' to your .vimrc or figure out
> the equivalent for your editor of choice.
>
>> data->time = native_sched_clock_from_tsc(pebs->tsc);
>> + data->flags |= PERF_SAMPLE_DATA_TIME;
>> + }
>>
>> if (has_branch_stack(event))
>> data->br_stack = &cpuc->lbr_stack;
>
>> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
>> index da759560eec5..33054bf31fc1 100644
>> --- a/include/linux/perf_event.h
>> +++ b/include/linux/perf_event.h
>> @@ -999,6 +999,7 @@ int perf_event_read_local(struct perf_event *event,
>> u64 *value,
>> extern u64 perf_event_read_value(struct perf_event *event,
>> u64 *enabled, u64 *running);
>>
>> +#define PERF_SAMPLE_DATA_TIME 0x1
>>
>> struct perf_sample_data {
>> /*
>> @@ -1012,6 +1013,7 @@ struct perf_sample_data {
>> union perf_sample_weight weight;
>> u64 txn;
>> union perf_mem_data_src data_src;
>> + u64 flags;
>>
>> /*
>> * The other fields, optionally {set,used} by
>
> How about we call that 'sample_flags' instead and use PERF_SAMPLE_* as
> we already have, something like so:

True, I think we can use PERF_SAMPLE_* and avoid adding more flags.

I will implement some patches based on the suggestion.

Thanks,
Kan

>
>
> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> index ee8b9ecdc03b..b0ebbb1377b9 100644
> --- a/include/linux/perf_event.h
> +++ b/include/linux/perf_event.h
> @@ -1007,6 +1007,7 @@ struct perf_sample_data {
> * Fields set by perf_sample_data_init(), group so as to
> * minimize the cachelines touched.
> */
> + u64 sample_flags;
> u64 addr;
> struct perf_raw_record *raw;
> struct perf_branch_stack *br_stack;
> @@ -1056,6 +1057,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
> u64 addr, u64 period)
> {
> /* remaining struct members initialized in perf_prepare_sample() */
> + data->sample_flags = 0;
> data->addr = addr;
> data->raw = NULL;
> data->br_stack = NULL;
> diff --git a/kernel/events/core.c b/kernel/events/core.c
> index 2621fd24ad26..fed447f59024 100644
> --- a/kernel/events/core.c
> +++ b/kernel/events/core.c
> @@ -6792,15 +6792,21 @@ static void perf_aux_sample_output(struct perf_event *event,
> ring_buffer_put(rb);
> }
>
> -static void __perf_event_header__init_id(struct perf_event_header *header,
> - struct perf_sample_data *data,
> - struct perf_event *event)
> +static u64 __perf_event_header__init_id(struct perf_event_header *header,
> + struct perf_sample_data *data,
> + struct perf_event *event)
> {
> u64 sample_type = event->attr.sample_type;
>
> data->type = sample_type;
> header->size += event->id_header_size;
>
> + /*
> + * Clear the sample flags that have already been done by the
> + * PMU driver.
> + */
> + sample_type &= ~data->sample_flags;
> +
> if (sample_type & PERF_SAMPLE_TID) {
> /* namespace issues */
> data->tid_entry.pid = perf_event_pid(event, current);
> @@ -6820,6 +6826,8 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
> data->cpu_entry.cpu = raw_smp_processor_id();
> data->cpu_entry.reserved = 0;
> }
> +
> + return sample_type;
> }
>
> void perf_event_header__init_id(struct perf_event_header *header,
> @@ -7302,7 +7310,7 @@ void perf_prepare_sample(struct perf_event_header *header,
> struct perf_event *event,
> struct pt_regs *regs)
> {
> - u64 sample_type = event->attr.sample_type;
> + u64 sample_type;
>
> header->type = PERF_RECORD_SAMPLE;
> header->size = sizeof(*header) + event->header_size;
> @@ -7310,7 +7318,7 @@ void perf_prepare_sample(struct perf_event_header *header,
> header->misc = 0;
> header->misc |= perf_misc_flags(regs);
>
> - __perf_event_header__init_id(header, data, event);
> + sample_type = __perf_event_header__init_id(header, data, event);
>
> if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
> data->ip = perf_instruction_pointer(regs);