Re: [PATCH 8/8] perf arm-spe: Set thread TID

From: Leo Yan
Date: Sun Jan 31 2021 - 08:01:03 EST


Hi James,

On Tue, Jan 19, 2021 at 04:46:58PM +0200, James Clark wrote:
> From: Leo Yan <leo.yan@xxxxxxxxxx>
>
> Set thread TID for SPE samples. Now that the context ID is saved
> in each record it can be used to set the TID for a sample.
>
> The context ID is only present in SPE data if the kernel is
> compiled with CONFIG_PID_IN_CONTEXTIDR and perf record is
> run as root. Otherwise the PID of the first process is assigned
> to each SPE sample.

I tested this patch series on Hisilicon D06, it outputs the result as
expected. I am comfortable for the testing result based our two sides
on two different platforms.

Based on the discussion in the thread [1], IIUC, there have concern for
using CONTEXTIDR for non-root namespace. Thus the patch 08/08 is
limited to support PID tracing in the root namespace, so we have two
options:

Option 1: by merging patches 07/08 and 08/08, we can firstly support PID
tracing for root namespace, and later we can extend to support PID
tracing in container (and in VMs).

Option 2: we can use the software method to establish PID for SPE
trace, which can base on kernel's events PERF_RECORD_SWITCH /
PERF_RECORD_SWITCH_CPU_WIDE and check context switch ip.

To be honest, I am a bit concern for option 1 for later might
introduce regression when later support PID for containers (and VMs).
If you have a plan for option 1, I think it's good to record current
limitation and the plan for next step in the commit log, so we can merge
this patch at this time and later extend for containers.

Otherwise, we need to consider how to implement the PID tracing with
option 2. If it is the case, we should firstly only merge patches
01 ~ 06 for data source enabling. How about you think for this?

> Signed-off-by: Leo Yan <leo.yan@xxxxxxxxxx>
> Signed-off-by: James Clark <james.clark@xxxxxxx>

Besides for techinical question, you could add your "Co-developed-by"
tags for patches 06, 07, 08/08, which you have took time to refin them.

Thanks you for kindly efforts.

[1] https://lore.kernel.org/patchwork/patch/1353286/

> Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
> Cc: Ingo Molnar <mingo@xxxxxxxxxx>
> Cc: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
> Cc: Mark Rutland <mark.rutland@xxxxxxx>
> Cc: Alexander Shishkin <alexander.shishkin@xxxxxxxxxxxxxxx>
> Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
> Cc: Namhyung Kim <namhyung@xxxxxxxxxx>
> Cc: John Garry <john.garry@xxxxxxxxxx>
> Cc: Will Deacon <will@xxxxxxxxxx>
> Cc: Mathieu Poirier <mathieu.poirier@xxxxxxxxxx>
> Cc: Al Grant <al.grant@xxxxxxx>
> Cc: Andre Przywara <andre.przywara@xxxxxxx>
> Cc: Wei Li <liwei391@xxxxxxxxxx>
> Cc: Tan Xiaojun <tanxiaojun@xxxxxxxxxx>
> Cc: Adrian Hunter <adrian.hunter@xxxxxxxxx>
> ---
> tools/perf/util/arm-spe.c | 75 ++++++++++++++++++++++++++-------------
> 1 file changed, 50 insertions(+), 25 deletions(-)
>
> diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c
> index 27a0b9dfe22d..9828fad7e516 100644
> --- a/tools/perf/util/arm-spe.c
> +++ b/tools/perf/util/arm-spe.c
> @@ -223,6 +223,46 @@ static inline u8 arm_spe_cpumode(struct arm_spe *spe, u64 ip)
> PERF_RECORD_MISC_USER;
> }
>
> +static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
> + struct auxtrace_queue *queue)
> +{
> + struct arm_spe_queue *speq = queue->priv;
> + pid_t tid;
> +
> + tid = machine__get_current_tid(spe->machine, speq->cpu);
> + if (tid != -1) {
> + speq->tid = tid;
> + thread__zput(speq->thread);
> + } else
> + speq->tid = queue->tid;
> +
> + if ((!speq->thread) && (speq->tid != -1)) {
> + speq->thread = machine__find_thread(spe->machine, -1,
> + speq->tid);
> + }
> +
> + if (speq->thread) {
> + speq->pid = speq->thread->pid_;
> + if (queue->cpu == -1)
> + speq->cpu = speq->thread->cpu;
> + }
> +}
> +
> +static int arm_spe_set_tid(struct arm_spe_queue *speq, pid_t tid)
> +{
> + int err;
> + struct arm_spe *spe = speq->spe;
> + struct auxtrace_queue *queue;
> +
> + err = machine__set_current_tid(spe->machine, speq->cpu, tid, tid);
> + if (err)
> + return err;
> +
> + queue = &speq->spe->queues.queue_array[speq->queue_nr];
> + arm_spe_set_pid_tid_cpu(speq->spe, queue);
> + return 0;
> +}
> +
> static void arm_spe_prep_sample(struct arm_spe *spe,
> struct arm_spe_queue *speq,
> union perf_event *event,
> @@ -431,6 +471,7 @@ static int arm_spe_sample(struct arm_spe_queue *speq)
> static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
> {
> struct arm_spe *spe = speq->spe;
> + const struct arm_spe_record *record;
> int ret;
>
> if (!spe->kernel_start)
> @@ -450,6 +491,11 @@ static int arm_spe_run_decoder(struct arm_spe_queue *speq, u64 *timestamp)
> if (ret < 0)
> continue;
>
> + record = &speq->decoder->record;
> + ret = arm_spe_set_tid(speq, record->context_id);
> + if (ret)
> + return ret;
> +
> ret = arm_spe_sample(speq);
> if (ret)
> return ret;
> @@ -500,6 +546,10 @@ static int arm_spe__setup_queue(struct arm_spe *spe,
>
> record = &speq->decoder->record;
>
> + ret = arm_spe_set_tid(speq, record->context_id);
> + if (ret)
> + return ret;
> +
> speq->timestamp = record->timestamp;
> ret = auxtrace_heap__add(&spe->heap, queue_nr, speq->timestamp);
> if (ret)
> @@ -552,31 +602,6 @@ static bool arm_spe__is_timeless_decoding(struct arm_spe *spe)
> return timeless_decoding;
> }
>
> -static void arm_spe_set_pid_tid_cpu(struct arm_spe *spe,
> - struct auxtrace_queue *queue)
> -{
> - struct arm_spe_queue *speq = queue->priv;
> - pid_t tid;
> -
> - tid = machine__get_current_tid(spe->machine, speq->cpu);
> - if (tid != -1) {
> - speq->tid = tid;
> - thread__zput(speq->thread);
> - } else
> - speq->tid = queue->tid;
> -
> - if ((!speq->thread) && (speq->tid != -1)) {
> - speq->thread = machine__find_thread(spe->machine, -1,
> - speq->tid);
> - }
> -
> - if (speq->thread) {
> - speq->pid = speq->thread->pid_;
> - if (queue->cpu == -1)
> - speq->cpu = speq->thread->cpu;
> - }
> -}
> -
> static int arm_spe_process_queues(struct arm_spe *spe, u64 timestamp)
> {
> unsigned int queue_nr;
> --
> 2.28.0
>