Re: [PATCH v3] perf mem: enable sampling loads and stores simultaneously

From: Arnaldo Carvalho de Melo
Date: Wed Dec 17 2014 - 11:49:36 EST


Em Wed, Dec 17, 2014 at 11:38:11AM -0500, Stephane Eranian escreveu:
> On Wed, Dec 17, 2014 at 11:07 AM, Arnaldo Carvalho de Melo
> <acme@xxxxxxxxxx> wrote:
> > Em Wed, Dec 17, 2014 at 04:23:55PM +0100, Stephane Eranian escreveu:
> >>
> >> This patch modifies perf mem to default to sampling loads
> >> and stores simultaneously. It could only do one or the other
> >> before yet there was no hardware restriction preventing
> >> simultaneous collection. With this patch, one run is sufficient
> >> to collect both.
> >>
> >> It is still possible to sample only loads or stores by using the
> >> -t option:
> >> $ perf mem -t load rec
> >> $ perf mem -t load rep
> >> Or
> >> $ perf mem -t store rec
> >> $ perf mem -t store rep
> >>
> >> The perf report TUI will show one event at a time. The store
> >> output will contain a Weight column which will be empty.
> >>
> >> In V2, we updated the man pages to reflect the change and
> >> also simplify the initialization of the argv vector passed
> >> to the cmd_*() functions as per LKML feedback.
> >>
> >> In V3, we fixed typos in the changelog.
> >>
> >> Signed-off-by: Stephane Eranian <eranian@xxxxxxxxxx>
> >
> > So here it goes a v4, just a minor change to avoid adding another global
> > variable and make this operation parm like the other parms inside that
> > struct perf_mem, attached goes the interdiff from your patch to this one
> > here, ok?

> So you want me to post V4 with this one? I am fine if you add yours
> directly on top of mine.

Ok, then I'll do it + your ack, thanks for checking!

- Arnaldo

> Acked-by: Stephane Eranian <eranian@xxxxxxxxxx>
>
> > diff --git a/tools/perf/Documentation/perf-mem.txt b/tools/perf/Documentation/perf-mem.txt
> > index 1d78a4064da4..43310d8661fe 100644
> > --- a/tools/perf/Documentation/perf-mem.txt
> > +++ b/tools/perf/Documentation/perf-mem.txt
> > @@ -12,11 +12,12 @@ SYNOPSIS
> >
> > DESCRIPTION
> > -----------
> > -"perf mem -t <TYPE> record" runs a command and gathers memory operation data
> > +"perf mem record" runs a command and gathers memory operation data
> > from it, into perf.data. Perf record options are accepted and are passed through.
> >
> > -"perf mem -t <TYPE> report" displays the result. It invokes perf report with the
> > -right set of options to display a memory access profile.
> > +"perf mem report" displays the result. It invokes perf report with the
> > +right set of options to display a memory access profile. By default, loads
> > +and stores are sampled. Use the -t option to limit to loads or stores.
> >
> > Note that on Intel systems the memory latency reported is the use-latency,
> > not the pure load (or store latency). Use latency includes any pipeline
> > @@ -29,7 +30,7 @@ OPTIONS
> >
> > -t::
> > --type=::
> > - Select the memory operation type: load or store (default: load)
> > + Select the memory operation type: load or store (default: load,store)
> >
> > -D::
> > --dump-raw-samples=::
> > diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c
> > index 24db6ffe2957..9b5663950a4d 100644
> > --- a/tools/perf/builtin-mem.c
> > +++ b/tools/perf/builtin-mem.c
> > @@ -7,44 +7,47 @@
> > #include "util/session.h"
> > #include "util/data.h"
> >
> > -#define MEM_OPERATION_LOAD "load"
> > -#define MEM_OPERATION_STORE "store"
> > -
> > -static const char *mem_operation = MEM_OPERATION_LOAD;
> > +#define MEM_OPERATION_LOAD 0x1
> > +#define MEM_OPERATION_STORE 0x2
> >
> > struct perf_mem {
> > struct perf_tool tool;
> > char const *input_name;
> > bool hide_unresolved;
> > bool dump_raw;
> > + int operation;
> > const char *cpu_list;
> > DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS);
> > };
> >
> > -static int __cmd_record(int argc, const char **argv)
> > +static int __cmd_record(int argc, const char **argv, struct perf_mem *mem)
> > {
> > int rec_argc, i = 0, j;
> > const char **rec_argv;
> > - char event[64];
> > int ret;
> >
> > - rec_argc = argc + 4;
> > + rec_argc = argc + 7; /* max number of arguments */
> > rec_argv = calloc(rec_argc + 1, sizeof(char *));
> > if (!rec_argv)
> > return -1;
> >
> > - rec_argv[i++] = strdup("record");
> > - if (!strcmp(mem_operation, MEM_OPERATION_LOAD))
> > - rec_argv[i++] = strdup("-W");
> > - rec_argv[i++] = strdup("-d");
> > - rec_argv[i++] = strdup("-e");
> > + rec_argv[i++] = "record";
> >
> > - if (strcmp(mem_operation, MEM_OPERATION_LOAD))
> > - sprintf(event, "cpu/mem-stores/pp");
> > - else
> > - sprintf(event, "cpu/mem-loads/pp");
> > + if (mem->operation & MEM_OPERATION_LOAD)
> > + rec_argv[i++] = "-W";
> > +
> > + rec_argv[i++] = "-d";
> > +
> > + if (mem->operation & MEM_OPERATION_LOAD) {
> > + rec_argv[i++] = "-e";
> > + rec_argv[i++] = "cpu/mem-loads/pp";
> > + }
> > +
> > + if (mem->operation & MEM_OPERATION_STORE) {
> > + rec_argv[i++] = "-e";
> > + rec_argv[i++] = "cpu/mem-stores/pp";
> > + }
> >
> > - rec_argv[i++] = strdup(event);
> > for (j = 1; j < argc; j++, i++)
> > rec_argv[i] = argv[j];
> >
> > @@ -162,17 +165,17 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
> > if (!rep_argv)
> > return -1;
> >
> > - rep_argv[i++] = strdup("report");
> > - rep_argv[i++] = strdup("--mem-mode");
> > - rep_argv[i++] = strdup("-n"); /* display number of samples */
> > + rep_argv[i++] = "report";
> > + rep_argv[i++] = "--mem-mode";
> > + rep_argv[i++] = "-n"; /* display number of samples */
> >
> > /*
> > * there is no weight (cost) associated with stores, so don't print
> > * the column
> > */
> > - if (strcmp(mem_operation, MEM_OPERATION_LOAD))
> > - rep_argv[i++] = strdup("--sort=mem,sym,dso,symbol_daddr,"
> > - "dso_daddr,tlb,locked");
> > + if (!(mem->operation & MEM_OPERATION_LOAD))
> > + rep_argv[i++] = "--sort=mem,sym,dso,symbol_daddr,"
> > + "dso_daddr,tlb,locked";
> >
> > for (j = 1; j < argc; j++, i++)
> > rep_argv[i] = argv[j];
> > @@ -182,6 +185,75 @@ static int report_events(int argc, const char **argv, struct perf_mem *mem)
> > return ret;
> > }
> >
> > +struct mem_mode {
> > + const char *name;
> > + int mode;
> > +};
> > +
> > +#define MEM_OPT(n, m) \
> > + { .name = n, .mode = (m) }
> > +
> > +#define MEM_END { .name = NULL }
> > +
> > +static const struct mem_mode mem_modes[]={
> > + MEM_OPT("load", MEM_OPERATION_LOAD),
> > + MEM_OPT("store", MEM_OPERATION_STORE),
> > + MEM_END
> > +};
> > +
> > +static int
> > +parse_mem_ops(const struct option *opt, const char *str, int unset)
> > +{
> > + int *mode = (int *)opt->value;
> > + const struct mem_mode *m;
> > + char *s, *os = NULL, *p;
> > + int ret = -1;
> > +
> > + if (unset)
> > + return 0;
> > +
> > + /* str may be NULL in case no arg is passed to -t */
> > + if (str) {
> > + /* because str is read-only */
> > + s = os = strdup(str);
> > + if (!s)
> > + return -1;
> > +
> > + /* reset mode */
> > + *mode = 0;
> > +
> > + for (;;) {
> > + p = strchr(s, ',');
> > + if (p)
> > + *p = '\0';
> > +
> > + for (m = mem_modes; m->name; m++) {
> > + if (!strcasecmp(s, m->name))
> > + break;
> > + }
> > + if (!m->name) {
> > + fprintf(stderr, "unknown sampling op %s,"
> > + " check man page\n", s);
> > + goto error;
> > + }
> > +
> > + *mode |= m->mode;
> > +
> > + if (!p)
> > + break;
> > +
> > + s = p + 1;
> > + }
> > + }
> > + ret = 0;
> > +
> > + if (*mode == 0)
> > + *mode = MEM_OPERATION_LOAD;
> > +error:
> > + free(os);
> > + return ret;
> > +}
> > +
> > int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> > {
> > struct stat st;
> > @@ -197,10 +269,15 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> > .ordered_events = true,
> > },
> > .input_name = "perf.data",
> > + /*
> > + * default to both load an store sampling
> > + */
> > + .operation = MEM_OPERATION_LOAD | MEM_OPERATION_STORE,
> > };
> > const struct option mem_options[] = {
> > - OPT_STRING('t', "type", &mem_operation,
> > - "type", "memory operations(load/store)"),
> > + OPT_CALLBACK('t', "type", &mem.operation,
> > + "type", "memory operations(load,store) Default load,store",
> > + parse_mem_ops),
> > OPT_BOOLEAN('D', "dump-raw-samples", &mem.dump_raw,
> > "dump raw samples in ASCII"),
> > OPT_BOOLEAN('U', "hide-unresolved", &mem.hide_unresolved,
> > @@ -225,7 +302,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> > argc = parse_options_subcommand(argc, argv, mem_options, mem_subcommands,
> > mem_usage, PARSE_OPT_STOP_AT_NON_OPTION);
> >
> > - if (!argc || !(strncmp(argv[0], "rec", 3) || mem_operation))
> > + if (!argc || !(strncmp(argv[0], "rec", 3) || mem.operation))
> > usage_with_options(mem_usage, mem_options);
> >
> > if (!mem.input_name || !strlen(mem.input_name)) {
> > @@ -236,7 +313,7 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused)
> > }
> >
> > if (!strncmp(argv[0], "rec", 3))
> > - return __cmd_record(argc, argv);
> > + return __cmd_record(argc, argv, &mem);
> > else if (!strncmp(argv[0], "rep", 3))
> > return report_events(argc, argv, &mem);
> > else
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/