Re: [BUG] perf/x86/intel/pebs: PEBS timestamps overwritten

From: Stephane Eranian
Date: Wed Aug 24 2022 - 15:52:49 EST


On Wed, Aug 24, 2022 at 12:14 PM Liang, Kan <kan.liang@xxxxxxxxxxxxxxx> wrote:
>
>
>
> On 2022-08-24 5:27 a.m., Peter Zijlstra wrote:
> >
> > Should be 3 patches at the very least I think, the first one introducing
> > the new field and then follow up patches making use of it.
> >
> > And yes as Ravi mentions there's the CALLCHAIN_EARLY hack that could be
> > cleaned up as well, making it 4 or something.
> >
> > On Fri, Aug 05, 2022 at 09:36:37AM -0400, Liang, Kan wrote:
> >
> >> diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c
> >> index 34be3bc5151a..a2c26eaeb0d9 100644
> >> --- a/arch/x86/events/intel/ds.c
> >> +++ b/arch/x86/events/intel/ds.c
> >> @@ -1687,8 +1687,10 @@ static void setup_pebs_fixed_sample_data(struct
> >> perf_event *event,
> >> * We can only do this for the default trace clock.
> >> */
> >> if (x86_pmu.intel_cap.pebs_format >= 3 &&
> >> - event->attr.use_clockid == 0)
> >> + event->attr.use_clockid == 0) {
> >
> > Indent fail; please add: 'set cino=(0:0' to your .vimrc or figure out
> > the equivalent for your editor of choice.
> >
> >> data->time = native_sched_clock_from_tsc(pebs->tsc);
> >> + data->flags |= PERF_SAMPLE_DATA_TIME;
> >> + }
> >>
> >> if (has_branch_stack(event))
> >> data->br_stack = &cpuc->lbr_stack;
> >
> >> diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> >> index da759560eec5..33054bf31fc1 100644
> >> --- a/include/linux/perf_event.h
> >> +++ b/include/linux/perf_event.h
> >> @@ -999,6 +999,7 @@ int perf_event_read_local(struct perf_event *event,
> >> u64 *value,
> >> extern u64 perf_event_read_value(struct perf_event *event,
> >> u64 *enabled, u64 *running);
> >>
> >> +#define PERF_SAMPLE_DATA_TIME 0x1
> >>
> >> struct perf_sample_data {
> >> /*
> >> @@ -1012,6 +1013,7 @@ struct perf_sample_data {
> >> union perf_sample_weight weight;
> >> u64 txn;
> >> union perf_mem_data_src data_src;
> >> + u64 flags;
> >>
> >> /*
> >> * The other fields, optionally {set,used} by
> >
> > How about we call that 'sample_flags' instead and use PERF_SAMPLE_* as
> > we already have, something like so:
>
> True, I think we can use PERF_SAMPLE_* and avoid adding more flags.
>
> I will implement some patches based on the suggestion.
>
I agree with the approach as well. We reuse the PERF_SAMPLE. That
means it automatically
adjusts as we add more PERF_SAMPLE*.

> Thanks,
> Kan
>
> >
> >
> > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
> > index ee8b9ecdc03b..b0ebbb1377b9 100644
> > --- a/include/linux/perf_event.h
> > +++ b/include/linux/perf_event.h
> > @@ -1007,6 +1007,7 @@ struct perf_sample_data {
> > * Fields set by perf_sample_data_init(), group so as to
> > * minimize the cachelines touched.
> > */
> > + u64 sample_flags;
> > u64 addr;
> > struct perf_raw_record *raw;
> > struct perf_branch_stack *br_stack;
> > @@ -1056,6 +1057,7 @@ static inline void perf_sample_data_init(struct perf_sample_data *data,
> > u64 addr, u64 period)
> > {
> > /* remaining struct members initialized in perf_prepare_sample() */
> > + data->sample_flags = 0;
> > data->addr = addr;
> > data->raw = NULL;
> > data->br_stack = NULL;
> > diff --git a/kernel/events/core.c b/kernel/events/core.c
> > index 2621fd24ad26..fed447f59024 100644
> > --- a/kernel/events/core.c
> > +++ b/kernel/events/core.c
> > @@ -6792,15 +6792,21 @@ static void perf_aux_sample_output(struct perf_event *event,
> > ring_buffer_put(rb);
> > }
> >
> > -static void __perf_event_header__init_id(struct perf_event_header *header,
> > - struct perf_sample_data *data,
> > - struct perf_event *event)
> > +static u64 __perf_event_header__init_id(struct perf_event_header *header,
> > + struct perf_sample_data *data,
> > + struct perf_event *event)
> > {
> > u64 sample_type = event->attr.sample_type;
> >
> > data->type = sample_type;
> > header->size += event->id_header_size;
> >
> > + /*
> > + * Clear the sample flags that have already been done by the
> > + * PMU driver.
> > + */
> > + sample_type &= ~data->sample_flags;
> > +
> > if (sample_type & PERF_SAMPLE_TID) {
> > /* namespace issues */
> > data->tid_entry.pid = perf_event_pid(event, current);
> > @@ -6820,6 +6826,8 @@ static void __perf_event_header__init_id(struct perf_event_header *header,
> > data->cpu_entry.cpu = raw_smp_processor_id();
> > data->cpu_entry.reserved = 0;
> > }
> > +
> > + return sample_type;
> > }
> >
> > void perf_event_header__init_id(struct perf_event_header *header,
> > @@ -7302,7 +7310,7 @@ void perf_prepare_sample(struct perf_event_header *header,
> > struct perf_event *event,
> > struct pt_regs *regs)
> > {
> > - u64 sample_type = event->attr.sample_type;
> > + u64 sample_type;
> >
> > header->type = PERF_RECORD_SAMPLE;
> > header->size = sizeof(*header) + event->header_size;
> > @@ -7310,7 +7318,7 @@ void perf_prepare_sample(struct perf_event_header *header,
> > header->misc = 0;
> > header->misc |= perf_misc_flags(regs);
> >
> > - __perf_event_header__init_id(header, data, event);
> > + sample_type = __perf_event_header__init_id(header, data, event);
> >
> > if (sample_type & (PERF_SAMPLE_IP | PERF_SAMPLE_CODE_PAGE_SIZE))
> > data->ip = perf_instruction_pointer(regs);