Re: [PATCH v3] perf mem: enable sampling loads and stores simultaneously

From: Stephane Eranian
Date: Wed Dec 17 2014 - 11:38:16 EST


On Wed, Dec 17, 2014 at 11:07 AM, Arnaldo Carvalho de Melo
<acme@xxxxxxxxxx> wrote:
> Em Wed, Dec 17, 2014 at 04:23:55PM +0100, Stephane Eranian escreveu:
>>
>> This patch modifies perf mem to default to sampling loads
>> and stores simultaneously. It could only do one or the other
>> before yet there was no hardware restriction preventing
>> simultaneous collection. With this patch, one run is sufficient
>> to collect both.
>>
>> It is still possible to sample only loads or stores by using the
>> -t option:
>> $ perf mem -t load rec
>> $ perf mem -t load rep
>> Or
>> $ perf mem -t store rec
>> $ perf mem -t store rep
>>
>> The perf report TUI will show one event at a time. The store
>> output will contain a Weight column which will be empty.
>>
>> In V2, we updated the man pages to reflect the change and
>> also simplify the initialization of the argv vector passed
>> to the cmd_*() functions as per LKML feedback.
>>
>> In V3, we fixed typos in the changelog.
>>
>> Signed-off-by: Stephane Eranian <eranian@xxxxxxxxxx>
>
> So here it goes a v4, just a minor change to avoid adding another global
> variable and make this operation parm like the other parms inside that
> struct perf_mem, attached goes the interdiff from your patch to this one
> here, ok?
>
So you want me to post V4 with this one? I am fine if you add yours
directly on top of mine.

Acked-by: Stephane Eranian <eranian@xxxxxxxxxx>

> diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
> index 1d78a4064da4..43310d8661fe 100644
> --- a/tools/perf/Documentation/perf-mem.txt
> +++ b/tools/perf/Documentation/perf-mem.txt
> @@ -12,11 +12,12 @@ SYNOPSIS
>
> DESCRIPTION
> -----------
> -"perf mem -t <TYPE> record" runs a command and gathers memory operation data
> +"perf mem record" runs a command and gathers memory operation data
> from it, into perf.data. Perf record options are accepted and are passed through.
>
> -"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
> -right set of options to display a memory access profile.
> +"perf mem report" displays the result. It invokes perf report with the
> +right set of options to display a memory access profile. By default, loads
> +and stores are sampled. Use the -t option to limit to loads or stores.
>
> Note that on Intel systems the memory latency reported is the use-latency,
> not the pure load (or store latency). Use latency includes any pipeline
> @@ -29,7 +30,7 @@ OPTIONS
>
> -t::
> --type=::
> - Select the memory operation type: load or store (default: load)
> + Select the memory operation type: load or store (default: load,store)
>
> -D::
> --dump-raw-samples=::
> diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
> index 24db6ffe2957..9b5663950a4d 100644
> --- a/tools/perf/builtin-mem.c
> +++ b/tools/perf/builtin-mem.c
> @@ -7,44 +7,47 @@
> #include "util/session.h"
> #include "util/data.h"
>
> -#define MEM_OPERATION_LOAD "load"
> -#define MEM_OPERATION_STORE "store"
> -
> -static const char *mem_operation = MEM_OPERATION_LOAD;
> +#define MEM_OPERATION_LOAD 0x1
> +#define MEM_OPERATION_STORE 0x2
>
> struct perf_mem {
> struct perf_tool tool;
> char const *input_name;
> bool hide_unresolved;
> bool dump_raw;
> + int operation;
> const char *cpu_list;
> DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
> };
>
> -static int __cmd_record(int argc, const char **argv)
> +static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
> {
> int rec_argc, i = 0, j;
> const char **rec_argv;
> - char event[64];
> int ret;
>
> - rec_argc = argc + 4;
> + rec_argc = argc + 7; /* max number of arguments */
> rec_argv = calloc(rec_argc + 1, sizeof(char *));
> if (!rec_argv)
> return -1;
>
> - rec_argv[i++] = strdup("record");
> - if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
> - rec_argv[i++] = strdup("-W");
> - rec_argv[i++] = strdup("-d");
> - rec_argv[i++] = strdup("-e");
> + rec_argv[i++] = "record";
>
> - if (strcmp(mem_operation, MEM_OPERATION_LOAD))
> - sprintf(event, "cpu/mem-stores/pp");
> - else
> - sprintf(event, "cpu/mem-loads/pp");
> + if (mem->operation & MEM_OPERATION_LOAD)
> + rec_argv[i++] = "-W";
> +
> + rec_argv[i++] = "-d";
> +
> + if (mem->operation & MEM_OPERATION_LOAD) {
> + rec_argv[i++] = "-e";
> + rec_argv[i++] = "cpu/mem-loads/pp";
> + }
> +
> + if (mem->operation & MEM_OPERATION_STORE) {
> + rec_argv[i++] = "-e";
> + rec_argv[i++] = "cpu/mem-stores/pp";
> + }
>
> - rec_argv[i++] = strdup(event);
> for (j = 1; j < argc; j++, i++)
> rec_argv[i] = argv[j];
>
> @@ -162,17 +165,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
> if (!rep_argv)
> return -1;
>
> - rep_argv[i++] = strdup("report");
> - rep_argv[i++] = strdup("--mem-mode");
> - rep_argv[i++] = strdup("-n"); /* display number of samples */
> + rep_argv[i++] = "report";
> + rep_argv[i++] = "--mem-mode";
> + rep_argv[i++] = "-n"; /* display number of samples */
>
> /*
> * there is no weight (cost) associated with stores, so don't print
> * the column
> */
> - if (strcmp(mem_operation, MEM_OPERATION_LOAD))
> - rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
> - "dso_daddr,tlb,locked");
> + if (!(mem->operation & MEM_OPERATION_LOAD))
> + rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
> + "dso_daddr,tlb,locked";
>
> for (j = 1; j < argc; j++, i++)
> rep_argv[i] = argv[j];
> @@ -182,6 +185,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
> return ret;
> }
>
> +struct mem_mode {
> + const char *name;
> + int mode;
> +};
> +
> +#define MEM_OPT(n, m) \
> + { .name = n, .mode = (m) }
> +
> +#define MEM_END { .name = NULL }
> +
> +static const struct mem_mode mem_modes[]={
> + MEM_OPT("load", MEM_OPERATION_LOAD),
> + MEM_OPT("store", MEM_OPERATION_STORE),
> + MEM_END
> +};
> +
> +static int
> +parse_mem_ops(const struct option *opt, const char *str, int unset)
> +{
> + int *mode = (int *)opt->value;
> + const struct mem_mode *m;
> + char *s, *os = NULL, *p;
> + int ret = -1;
> +
> + if (unset)
> + return 0;
> +
> + /* str may be NULL in case no arg is passed to -t */
> + if (str) {
> + /* because str is read-only */
> + s = os = strdup(str);
> + if (!s)
> + return -1;
> +
> + /* reset mode */
> + *mode = 0;
> +
> + for (;;) {
> + p = strchr(s, ',');
> + if (p)
> + *p = '\0';
> +
> + for (m = mem_modes; m->name; m++) {
> + if (!strcasecmp(s, m->name))
> + break;
> + }
> + if (!m->name) {
> + fprintf(stderr, "unknown sampling op %s,"
> + " check man page\n", s);
> + goto error;
> + }
> +
> + *mode |= m->mode;
> +
> + if (!p)
> + break;
> +
> + s = p + 1;
> + }
> + }
> + ret = 0;
> +
> + if (*mode == 0)
> + *mode = MEM_OPERATION_LOAD;
> +error:
> + free(os);
> + return ret;
> +}
> +
> int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> {
> struct stat st;
> @@ -197,10 +269,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> .ordered_events = true,
> },
> .input_name = "perf.data",
> + /*
> + * default to both load an store sampling
> + */
> + .operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE,
> };
> const struct option mem_options[] = {
> - OPT_STRING('t', "type", &mem_operation,
> - "type", "memory operations(load/store)"),
> + OPT_CALLBACK('t', "type", &mem.operation,
> + "type", "memory operations(load,store) Default load,store",
> + parse_mem_ops),
> OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
> "dump raw samples in ASCII"),
> OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
> @@ -225,7 +302,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
> mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
>
> - if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
> + if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
> usage_with_options(mem_usage, mem_options);
>
> if (!mem.input_name || !strlen(mem.input_name)) {
> @@ -236,7 +313,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> }
>
> if (!strncmp(argv[0], "rec", 3))
> - return __cmd_record(argc, argv);
> + return __cmd_record(argc, argv, &mem);
> else if (!strncmp(argv[0], "rep", 3))
> return report_events(argc, argv, &mem);
> else
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/