Re: [PATCH V8 08/25] perf tools: Add Intel BTS support

From: Arnaldo Carvalho de Melo
Date: Mon Aug 17 2015 - 11:53:01 EST


Em Fri, Jul 17, 2015 at 07:33:43PM +0300, Adrian Hunter escreveu:
> Intel BTS support fits within the new auxtrace infrastructure.
> Recording is supporting by identifying the Intel BTS PMU, parsing
> options and setting up events.
>
> Decoding is supported by queuing up trace data by thread and then
> decoding synchronously delivering synthesized event samples into the
> session processing for tools to consume.

So, I am not being able to reproduce the results from last time I tried
it..

[root@zoo ~]# uname -r
4.2.0-rc5+

But all the DSOs are not being resolved :-\ Same machine, will try after lunch
with a tip/master built kernel, right now I get this, perhaps that "81649
instruction errors" message? I'll see what are the results with tip/master,
meanwhile what I have is at the tmp.perf/intel_pt branch in my tree.

- Arnaldo

[root@zoo ~]# perf record --per-thread -e intel_bts// usleep 1
[ perf record: Woken up 1 times to write data ]
[ perf record: Captured and wrote 1.884 MB perf.data ]
[root@zoo ~]# perf evlist
intel_bts//
dummy:u
[root@zoo ~]# perf evlist -v
intel_bts//: type: 6, size: 112, { sample_period, sample_freq }: 1,
sample_type: IP|TID|IDENTIFIER, read_format: ID, disabled: 1,
enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1
dummy:u: type: 1, size: 112, config: 0x9, { sample_period, sample_freq
}: 1, sample_type: IP|TID|IDENTIFIER, read_format: ID, disabled: 1,
exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, enable_on_exec: 1,
task: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1
[root@zoo ~]#

Warning:
81649 instruction trace errors
# To display the perf.data header info, please use --header/--header-only options.
#
#
# Total Lost Samples: 0
#
# Samples: 0 of event 'intel_bts//'
# Event count (approx.): 0
#
# Overhead Command Shared Object Symbol
# ........ ....... ............. ......
#


# Samples: 0 of event 'dummy:u'
# Event count (approx.): 0
#
# Overhead Command Shared Object Symbol
# ........ ....... ............. ......
#


# Samples: 81K of event 'branches'
# Event count (approx.): 81649
#
# Overhead Command Shared Object Symbol
# ........ ....... ................ ......................
#
2.71% usleep [unknown] [.] 0x00007fa0ff695061
2.62% usleep [unknown] [.] 0x00007fa0ffb3de7e
1.96% usleep [unknown] [.] 0x00007fa0ffb2e726
1.92% usleep [unknown] [.] 0x00007fa0ff695086
1.60% usleep [unknown] [.] 0xffffffff811c91d0
1.48% usleep [unknown] [.] 0x00007fa0ffb3030d
1.24% usleep [unknown] [.] 0x00007fa0ff6950c7




> E.g:
>
> [root@zoo ~]# perf record --per-thread -e intel_bts// ls
> anaconda-ks.cfg b bin lib64 libexec new old perf.data
> perf.data.old stream_test tg.run
> [ perf record: Woken up 2 times to write data ]
> [ perf record: Captured and wrote 4.242 MB perf.data ]
> [root@zoo ~]# perf evlist
> intel_bts//
> dummy:u
> [root@zoo ~]# perf evlist -v
> intel_bts//: type: 7, size: 112, { sample_period, sample_freq }: 1,
> sample_type: IP|TID|IDENTIFIER, read_format: ID, disabled: 1,
> enable_on_exec: 1, sample_id_all: 1, exclude_guest: 1 dummy:u:
> type: 1, size: 112, config: 0x9, { sample_period, sample_freq }: 1,
> sample_type: IP|TID|IDENTIFIER, read_format: ID, disabled: 1,
> exclude_kernel: 1, exclude_hv: 1, mmap: 1, comm: 1, enable_on_exec: 1,
> task: 1, sample_id_all: 1, mmap2: 1, comm_exec: 1
> [root@zoo ~]# perf report --stdio
> # To display the perf.data header info, please use --header/--header-only options.
> #
> #
> # Total Lost Samples: 0
> #
> # Samples: 0 of event 'intel_bts//'
> # Event count (approx.): 0
> #
> # Overhead Command Shared Object Symbol
> # ........ ....... ............. ......
> #
>
> # Samples: 0 of event 'dummy:u'
> # Event count (approx.): 0
> #
> # Overhead Command Shared Object Symbol
> # ........ ....... ............. ......
> #
>
> # Samples: 184K of event 'branches'
> # Event count (approx.): 184522
> #
> # Overhead Command Shared Object Symbol
> # ........ ....... .................. ..............................................
> #
> 7.36% ls [kernel.kallsyms] [.] unmap_single_vma
> 4.69% ls ld-2.20.so [.] strcmp
> 4.66% ls libc-2.20.so [.] _dl_addr
> 3.43% ls [kernel.kallsyms] [.] change_protection
> 3.01% ls ld-2.20.so [.] do_lookup_x
> 2.18% ls [kernel.kallsyms] [.] filemap_map_pages
> 2.09% ls ld-2.20.so [.] _dl_name_match_p
> 1.93% ls ld-2.20.so [.] _dl_lookup_symbol_x
> 1.91% ls [kernel.kallsyms] [.] page_remove_rmap
> 1.72% ls [kernel.kallsyms] [.] do_set_pte
> 1.48% ls ld-2.20.so [.] _dl_relocate_object
> 1.34% ls [kernel.kallsyms] [.] mem_cgroup_begin_page_stat
> 1.11% ls [kernel.kallsyms] [.] page_add_file_rmap
> 1.00% ls [kernel.kallsyms] [.] mark_page_accessed
> 0.97% ls [kernel.kallsyms] [.] perf_event_aux
> 0.95% ls [kernel.kallsyms] [.] handle_mm_fault
> 0.94% ls [kernel.kallsyms] [.] get_page_from_freelist
> <SNIP>
> [root@zoo ~]# perf record --per-thread -e intel_bts//u ls
> <SNIP>
> [ perf record: Woken up 1 times to write data ]
> [ perf record: Captured and wrote 1.278 MB perf.data ]
> [root@zoo ~]# perf report --stdio
> <SNIP>
> # Samples: 55K of event 'branches:u'
> # Event count (approx.): 55165
> #
> # Overhead Command Shared Object Symbol
> # ........ ....... .................. ......................................
> #
> 15.69% ls ld-2.20.so [.] strcmp
> 15.58% ls libc-2.20.so [.] _dl_addr
> 10.05% ls ld-2.20.so [.] do_lookup_x
> 6.98% ls ld-2.20.so [.] _dl_name_match_p
> 6.46% ls ld-2.20.so [.] _dl_lookup_symbol_x
> 4.95% ls ld-2.20.so [.] _dl_relocate_object
> 2.96% ls ls [.] quotearg_buffer_restyled
> 2.78% ls libc-2.20.so [.] getenv
> 1.91% ls ld-2.20.so [.] _dl_cache_libcmp
> 1.76% ls libc-2.20.so [.] __memmove_sse2
> 1.75% ls ld-2.20.so [.] check_match.isra.0
> 1.47% ls ld-2.20.so [.] _dl_map_object_deps
> 1.27% ls ld-2.20.so [.] _dl_map_object_from_fd
> 1.17% ls ls [.] quote_name
> 1.16% ls ld-2.20.so [.] _dl_check_map_versions
> <SNIP>
> 0.19% ls [kernel.kallsyms] [.] entry_SYSCALL_64_fastpath
> 0.19% ls [kernel.kallsyms] [.] native_irq_return_iret
> 0.19% ls libc-2.20.so [.] _IO_file_xsputn@@GLIBC_2.2.5
> <SNIP>
>
> Signed-off-by: Adrian Hunter <adrian.hunter@xxxxxxxxx>
> ---
> tools/perf/Documentation/intel-bts.txt | 86 +++
> tools/perf/arch/x86/util/Build | 1 +
> tools/perf/arch/x86/util/auxtrace.c | 49 +-
> tools/perf/arch/x86/util/intel-bts.c | 458 ++++++++++++++++
> tools/perf/arch/x86/util/pmu.c | 3 +
> tools/perf/util/Build | 1 +
> tools/perf/util/auxtrace.c | 3 +
> tools/perf/util/auxtrace.h | 1 +
> tools/perf/util/intel-bts.c | 933 +++++++++++++++++++++++++++++++++
> tools/perf/util/intel-bts.h | 43 ++
> tools/perf/util/pmu.c | 4 -
> 11 files changed, 1576 insertions(+), 6 deletions(-)
> create mode 100644 tools/perf/Documentation/intel-bts.txt
> create mode 100644 tools/perf/arch/x86/util/intel-bts.c
> create mode 100644 tools/perf/util/intel-bts.c
> create mode 100644 tools/perf/util/intel-bts.h
>
> diff --git a/tools/perf/Documentation/intel-bts.txt b/tools/perf/Documentation/intel-bts.txt
> new file mode 100644
> index 000000000000..8bdc93bd7fdb
> --- /dev/null
> +++ b/tools/perf/Documentation/intel-bts.txt
> @@ -0,0 +1,86 @@
> +Intel Branch Trace Store
> +========================
> +
> +Overview
> +========
> +
> +Intel BTS could be regarded as a predecessor to Intel PT and has some
> +similarities because it can also identify every branch a program takes. A
> +notable difference is that Intel BTS has no timing information and as a
> +consequence the present implementation is limited to per-thread recording.
> +
> +While decoding Intel BTS does not require walking the object code, the object
> +code is still needed to pair up calls and returns correctly, consequently much
> +of the Intel PT documentation applies also to Intel BTS. Refer to the Intel PT
> +documentation and consider that the PMU 'intel_bts' can usually be used in
> +place of 'intel_pt' in the examples provided, with the proviso that per-thread
> +recording must also be stipulated i.e. the --per-thread option for
> +'perf record'.
> +
> +
> +perf record
> +===========
> +
> +new event
> +---------
> +
> +The Intel BTS kernel driver creates a new PMU for Intel BTS. The perf record
> +option is:
> +
> + -e intel_bts//
> +
> +Currently Intel BTS is limited to per-thread tracing so the --per-thread option
> +is also needed.
> +
> +
> +snapshot option
> +---------------
> +
> +The snapshot option is the same as Intel PT (refer Intel PT documentation).
> +
> +
> +auxtrace mmap size option
> +-----------------------
> +
> +The mmap size option is the same as Intel PT (refer Intel PT documentation).
> +
> +
> +perf script
> +===========
> +
> +By default, perf script will decode trace data found in the perf.data file.
> +This can be further controlled by option --itrace. The --itrace option is
> +the same as Intel PT (refer Intel PT documentation) except that neither
> +"instructions" events nor "transactions" events (and consequently call
> +chains) are supported.
> +
> +To disable trace decoding entirely, use the option --no-itrace.
> +
> +
> +dump option
> +-----------
> +
> +perf script has an option (-D) to "dump" the events i.e. display the binary
> +data.
> +
> +When -D is used, Intel BTS packets are displayed.
> +
> +To disable the display of Intel BTS packets, combine the -D option with
> +--no-itrace.
> +
> +
> +perf report
> +===========
> +
> +By default, perf report will decode trace data found in the perf.data file.
> +This can be further controlled by new option --itrace exactly the same as
> +perf script.
> +
> +
> +perf inject
> +===========
> +
> +perf inject also accepts the --itrace option in which case tracing data is
> +removed and replaced with the synthesized events. e.g.
> +
> + perf inject --itrace -i perf.data -o perf.data.new
> diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build
> index a8be9f9d0462..2c55e1b336c5 100644
> --- a/tools/perf/arch/x86/util/Build
> +++ b/tools/perf/arch/x86/util/Build
> @@ -10,3 +10,4 @@ libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o
>
> libperf-$(CONFIG_AUXTRACE) += auxtrace.o
> libperf-$(CONFIG_AUXTRACE) += intel-pt.o
> +libperf-$(CONFIG_AUXTRACE) += intel-bts.o
> diff --git a/tools/perf/arch/x86/util/auxtrace.c b/tools/perf/arch/x86/util/auxtrace.c
> index e7654b506312..7a7805583e3f 100644
> --- a/tools/perf/arch/x86/util/auxtrace.c
> +++ b/tools/perf/arch/x86/util/auxtrace.c
> @@ -13,11 +13,56 @@
> *
> */
>
> +#include <stdbool.h>
> +
> #include "../../util/header.h"
> +#include "../../util/debug.h"
> +#include "../../util/pmu.h"
> #include "../../util/auxtrace.h"
> #include "../../util/intel-pt.h"
> +#include "../../util/intel-bts.h"
> +#include "../../util/evlist.h"
> +
> +static
> +struct auxtrace_record *auxtrace_record__init_intel(struct perf_evlist *evlist,
> + int *err)
> +{
> + struct perf_pmu *intel_pt_pmu;
> + struct perf_pmu *intel_bts_pmu;
> + struct perf_evsel *evsel;
> + bool found_pt = false;
> + bool found_bts = false;
> +
> + intel_pt_pmu = perf_pmu__find(INTEL_PT_PMU_NAME);
> + intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
> +
> + if (evlist) {
> + evlist__for_each(evlist, evsel) {
> + if (intel_pt_pmu &&
> + evsel->attr.type == intel_pt_pmu->type)
> + found_pt = true;
> + if (intel_bts_pmu &&
> + evsel->attr.type == intel_bts_pmu->type)
> + found_bts = true;
> + }
> + }
> +
> + if (found_pt && found_bts) {
> + pr_err("intel_pt and intel_bts may not be used together\n");
> + *err = -EINVAL;
> + return NULL;
> + }
> +
> + if (found_pt)
> + return intel_pt_recording_init(err);
> +
> + if (found_bts)
> + return intel_bts_recording_init(err);
>
> -struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe_unused,
> + return NULL;
> +}
> +
> +struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist,
> int *err)
> {
> char buffer[64];
> @@ -32,7 +77,7 @@ struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist __maybe
> }
>
> if (!strncmp(buffer, "GenuineIntel,", 13))
> - return intel_pt_recording_init(err);
> + return auxtrace_record__init_intel(evlist, err);
>
> return NULL;
> }
> diff --git a/tools/perf/arch/x86/util/intel-bts.c b/tools/perf/arch/x86/util/intel-bts.c
> new file mode 100644
> index 000000000000..9b94ce520917
> --- /dev/null
> +++ b/tools/perf/arch/x86/util/intel-bts.c
> @@ -0,0 +1,458 @@
> +/*
> + * intel-bts.c: Intel Processor Trace support
> + * Copyright (c) 2013-2015, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#include <linux/kernel.h>
> +#include <linux/types.h>
> +#include <linux/bitops.h>
> +#include <linux/log2.h>
> +
> +#include "../../util/cpumap.h"
> +#include "../../util/evsel.h"
> +#include "../../util/evlist.h"
> +#include "../../util/session.h"
> +#include "../../util/util.h"
> +#include "../../util/pmu.h"
> +#include "../../util/debug.h"
> +#include "../../util/tsc.h"
> +#include "../../util/auxtrace.h"
> +#include "../../util/intel-bts.h"
> +
> +#define KiB(x) ((x) * 1024)
> +#define MiB(x) ((x) * 1024 * 1024)
> +#define KiB_MASK(x) (KiB(x) - 1)
> +#define MiB_MASK(x) (MiB(x) - 1)
> +
> +#define INTEL_BTS_DFLT_SAMPLE_SIZE KiB(4)
> +
> +#define INTEL_BTS_MAX_SAMPLE_SIZE KiB(60)
> +
> +struct intel_bts_snapshot_ref {
> + void *ref_buf;
> + size_t ref_offset;
> + bool wrapped;
> +};
> +
> +struct intel_bts_recording {
> + struct auxtrace_record itr;
> + struct perf_pmu *intel_bts_pmu;
> + struct perf_evlist *evlist;
> + bool snapshot_mode;
> + size_t snapshot_size;
> + int snapshot_ref_cnt;
> + struct intel_bts_snapshot_ref *snapshot_refs;
> +};
> +
> +struct branch {
> + u64 from;
> + u64 to;
> + u64 misc;
> +};
> +
> +static size_t intel_bts_info_priv_size(struct auxtrace_record *itr __maybe_unused)
> +{
> + return INTEL_BTS_AUXTRACE_PRIV_SIZE;
> +}
> +
> +static int intel_bts_info_fill(struct auxtrace_record *itr,
> + struct perf_session *session,
> + struct auxtrace_info_event *auxtrace_info,
> + size_t priv_size)
> +{
> + struct intel_bts_recording *btsr =
> + container_of(itr, struct intel_bts_recording, itr);
> + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
> + struct perf_event_mmap_page *pc;
> + struct perf_tsc_conversion tc = { .time_mult = 0, };
> + bool cap_user_time_zero = false;
> + int err;
> +
> + if (priv_size != INTEL_BTS_AUXTRACE_PRIV_SIZE)
> + return -EINVAL;
> +
> + if (!session->evlist->nr_mmaps)
> + return -EINVAL;
> +
> + pc = session->evlist->mmap[0].base;
> + if (pc) {
> + err = perf_read_tsc_conversion(pc, &tc);
> + if (err) {
> + if (err != -EOPNOTSUPP)
> + return err;
> + } else {
> + cap_user_time_zero = tc.time_mult != 0;
> + }
> + if (!cap_user_time_zero)
> + ui__warning("Intel BTS: TSC not available\n");
> + }
> +
> + auxtrace_info->type = PERF_AUXTRACE_INTEL_BTS;
> + auxtrace_info->priv[INTEL_BTS_PMU_TYPE] = intel_bts_pmu->type;
> + auxtrace_info->priv[INTEL_BTS_TIME_SHIFT] = tc.time_shift;
> + auxtrace_info->priv[INTEL_BTS_TIME_MULT] = tc.time_mult;
> + auxtrace_info->priv[INTEL_BTS_TIME_ZERO] = tc.time_zero;
> + auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO] = cap_user_time_zero;
> + auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE] = btsr->snapshot_mode;
> +
> + return 0;
> +}
> +
> +static int intel_bts_recording_options(struct auxtrace_record *itr,
> + struct perf_evlist *evlist,
> + struct record_opts *opts)
> +{
> + struct intel_bts_recording *btsr =
> + container_of(itr, struct intel_bts_recording, itr);
> + struct perf_pmu *intel_bts_pmu = btsr->intel_bts_pmu;
> + struct perf_evsel *evsel, *intel_bts_evsel = NULL;
> + const struct cpu_map *cpus = evlist->cpus;
> + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0;
> +
> + btsr->evlist = evlist;
> + btsr->snapshot_mode = opts->auxtrace_snapshot_mode;
> +
> + evlist__for_each(evlist, evsel) {
> + if (evsel->attr.type == intel_bts_pmu->type) {
> + if (intel_bts_evsel) {
> + pr_err("There may be only one " INTEL_BTS_PMU_NAME " event\n");
> + return -EINVAL;
> + }
> + evsel->attr.freq = 0;
> + evsel->attr.sample_period = 1;
> + intel_bts_evsel = evsel;
> + opts->full_auxtrace = true;
> + }
> + }
> +
> + if (opts->auxtrace_snapshot_mode && !opts->full_auxtrace) {
> + pr_err("Snapshot mode (-S option) requires " INTEL_BTS_PMU_NAME " PMU event (-e " INTEL_BTS_PMU_NAME ")\n");
> + return -EINVAL;
> + }
> +
> + if (!opts->full_auxtrace)
> + return 0;
> +
> + if (opts->full_auxtrace && !cpu_map__empty(cpus)) {
> + pr_err(INTEL_BTS_PMU_NAME " does not support per-cpu recording\n");
> + return -EINVAL;
> + }
> +
> + /* Set default sizes for snapshot mode */
> + if (opts->auxtrace_snapshot_mode) {
> + if (!opts->auxtrace_snapshot_size && !opts->auxtrace_mmap_pages) {
> + if (privileged) {
> + opts->auxtrace_mmap_pages = MiB(4) / page_size;
> + } else {
> + opts->auxtrace_mmap_pages = KiB(128) / page_size;
> + if (opts->mmap_pages == UINT_MAX)
> + opts->mmap_pages = KiB(256) / page_size;
> + }
> + } else if (!opts->auxtrace_mmap_pages && !privileged &&
> + opts->mmap_pages == UINT_MAX) {
> + opts->mmap_pages = KiB(256) / page_size;
> + }
> + if (!opts->auxtrace_snapshot_size)
> + opts->auxtrace_snapshot_size =
> + opts->auxtrace_mmap_pages * (size_t)page_size;
> + if (!opts->auxtrace_mmap_pages) {
> + size_t sz = opts->auxtrace_snapshot_size;
> +
> + sz = round_up(sz, page_size) / page_size;
> + opts->auxtrace_mmap_pages = roundup_pow_of_two(sz);
> + }
> + if (opts->auxtrace_snapshot_size >
> + opts->auxtrace_mmap_pages * (size_t)page_size) {
> + pr_err("Snapshot size %zu must not be greater than AUX area tracing mmap size %zu\n",
> + opts->auxtrace_snapshot_size,
> + opts->auxtrace_mmap_pages * (size_t)page_size);
> + return -EINVAL;
> + }
> + if (!opts->auxtrace_snapshot_size || !opts->auxtrace_mmap_pages) {
> + pr_err("Failed to calculate default snapshot size and/or AUX area tracing mmap pages\n");
> + return -EINVAL;
> + }
> + pr_debug2("Intel BTS snapshot size: %zu\n",
> + opts->auxtrace_snapshot_size);
> + }
> +
> + /* Set default sizes for full trace mode */
> + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) {
> + if (privileged) {
> + opts->auxtrace_mmap_pages = MiB(4) / page_size;
> + } else {
> + opts->auxtrace_mmap_pages = KiB(128) / page_size;
> + if (opts->mmap_pages == UINT_MAX)
> + opts->mmap_pages = KiB(256) / page_size;
> + }
> + }
> +
> + /* Validate auxtrace_mmap_pages */
> + if (opts->auxtrace_mmap_pages) {
> + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size;
> + size_t min_sz;
> +
> + if (opts->auxtrace_snapshot_mode)
> + min_sz = KiB(4);
> + else
> + min_sz = KiB(8);
> +
> + if (sz < min_sz || !is_power_of_2(sz)) {
> + pr_err("Invalid mmap size for Intel BTS: must be at least %zuKiB and a power of 2\n",
> + min_sz / 1024);
> + return -EINVAL;
> + }
> + }
> +
> + if (intel_bts_evsel) {
> + /*
> + * To obtain the auxtrace buffer file descriptor, the auxtrace event
> + * must come first.
> + */
> + perf_evlist__to_front(evlist, intel_bts_evsel);
> + /*
> + * In the case of per-cpu mmaps, we need the CPU on the
> + * AUX event.
> + */
> + if (!cpu_map__empty(cpus))
> + perf_evsel__set_sample_bit(intel_bts_evsel, CPU);
> + }
> +
> + /* Add dummy event to keep tracking */
> + if (opts->full_auxtrace) {
> + struct perf_evsel *tracking_evsel;
> + int err;
> +
> + err = parse_events(evlist, "dummy:u", NULL);
> + if (err)
> + return err;
> +
> + tracking_evsel = perf_evlist__last(evlist);
> +
> + perf_evlist__set_tracking_event(evlist, tracking_evsel);
> +
> + tracking_evsel->attr.freq = 0;
> + tracking_evsel->attr.sample_period = 1;
> + }
> +
> + return 0;
> +}
> +
> +static int intel_bts_parse_snapshot_options(struct auxtrace_record *itr,
> + struct record_opts *opts,
> + const char *str)
> +{
> + struct intel_bts_recording *btsr =
> + container_of(itr, struct intel_bts_recording, itr);
> + unsigned long long snapshot_size = 0;
> + char *endptr;
> +
> + if (str) {
> + snapshot_size = strtoull(str, &endptr, 0);
> + if (*endptr || snapshot_size > SIZE_MAX)
> + return -1;
> + }
> +
> + opts->auxtrace_snapshot_mode = true;
> + opts->auxtrace_snapshot_size = snapshot_size;
> +
> + btsr->snapshot_size = snapshot_size;
> +
> + return 0;
> +}
> +
> +static u64 intel_bts_reference(struct auxtrace_record *itr __maybe_unused)
> +{
> + return rdtsc();
> +}
> +
> +static int intel_bts_alloc_snapshot_refs(struct intel_bts_recording *btsr,
> + int idx)
> +{
> + const size_t sz = sizeof(struct intel_bts_snapshot_ref);
> + int cnt = btsr->snapshot_ref_cnt, new_cnt = cnt * 2;
> + struct intel_bts_snapshot_ref *refs;
> +
> + if (!new_cnt)
> + new_cnt = 16;
> +
> + while (new_cnt <= idx)
> + new_cnt *= 2;
> +
> + refs = calloc(new_cnt, sz);
> + if (!refs)
> + return -ENOMEM;
> +
> + memcpy(refs, btsr->snapshot_refs, cnt * sz);
> +
> + btsr->snapshot_refs = refs;
> + btsr->snapshot_ref_cnt = new_cnt;
> +
> + return 0;
> +}
> +
> +static void intel_bts_free_snapshot_refs(struct intel_bts_recording *btsr)
> +{
> + int i;
> +
> + for (i = 0; i < btsr->snapshot_ref_cnt; i++)
> + zfree(&btsr->snapshot_refs[i].ref_buf);
> + zfree(&btsr->snapshot_refs);
> +}
> +
> +static void intel_bts_recording_free(struct auxtrace_record *itr)
> +{
> + struct intel_bts_recording *btsr =
> + container_of(itr, struct intel_bts_recording, itr);
> +
> + intel_bts_free_snapshot_refs(btsr);
> + free(btsr);
> +}
> +
> +static int intel_bts_snapshot_start(struct auxtrace_record *itr)
> +{
> + struct intel_bts_recording *btsr =
> + container_of(itr, struct intel_bts_recording, itr);
> + struct perf_evsel *evsel;
> +
> + evlist__for_each(btsr->evlist, evsel) {
> + if (evsel->attr.type == btsr->intel_bts_pmu->type)
> + return perf_evlist__disable_event(btsr->evlist, evsel);
> + }
> + return -EINVAL;
> +}
> +
> +static int intel_bts_snapshot_finish(struct auxtrace_record *itr)
> +{
> + struct intel_bts_recording *btsr =
> + container_of(itr, struct intel_bts_recording, itr);
> + struct perf_evsel *evsel;
> +
> + evlist__for_each(btsr->evlist, evsel) {
> + if (evsel->attr.type == btsr->intel_bts_pmu->type)
> + return perf_evlist__enable_event(btsr->evlist, evsel);
> + }
> + return -EINVAL;
> +}
> +
> +static bool intel_bts_first_wrap(u64 *data, size_t buf_size)
> +{
> + int i, a, b;
> +
> + b = buf_size >> 3;
> + a = b - 512;
> + if (a < 0)
> + a = 0;
> +
> + for (i = a; i < b; i++) {
> + if (data[i])
> + return true;
> + }
> +
> + return false;
> +}
> +
> +static int intel_bts_find_snapshot(struct auxtrace_record *itr, int idx,
> + struct auxtrace_mmap *mm, unsigned char *data,
> + u64 *head, u64 *old)
> +{
> + struct intel_bts_recording *btsr =
> + container_of(itr, struct intel_bts_recording, itr);
> + bool wrapped;
> + int err;
> +
> + pr_debug3("%s: mmap index %d old head %zu new head %zu\n",
> + __func__, idx, (size_t)*old, (size_t)*head);
> +
> + if (idx >= btsr->snapshot_ref_cnt) {
> + err = intel_bts_alloc_snapshot_refs(btsr, idx);
> + if (err)
> + goto out_err;
> + }
> +
> + wrapped = btsr->snapshot_refs[idx].wrapped;
> + if (!wrapped && intel_bts_first_wrap((u64 *)data, mm->len)) {
> + btsr->snapshot_refs[idx].wrapped = true;
> + wrapped = true;
> + }
> +
> + /*
> + * In full trace mode 'head' continually increases. However in snapshot
> + * mode 'head' is an offset within the buffer. Here 'old' and 'head'
> + * are adjusted to match the full trace case which expects that 'old' is
> + * always less than 'head'.
> + */
> + if (wrapped) {
> + *old = *head;
> + *head += mm->len;
> + } else {
> + if (mm->mask)
> + *old &= mm->mask;
> + else
> + *old %= mm->len;
> + if (*old > *head)
> + *head += mm->len;
> + }
> +
> + pr_debug3("%s: wrap-around %sdetected, adjusted old head %zu adjusted new head %zu\n",
> + __func__, wrapped ? "" : "not ", (size_t)*old, (size_t)*head);
> +
> + return 0;
> +
> +out_err:
> + pr_err("%s: failed, error %d\n", __func__, err);
> + return err;
> +}
> +
> +static int intel_bts_read_finish(struct auxtrace_record *itr, int idx)
> +{
> + struct intel_bts_recording *btsr =
> + container_of(itr, struct intel_bts_recording, itr);
> + struct perf_evsel *evsel;
> +
> + evlist__for_each(btsr->evlist, evsel) {
> + if (evsel->attr.type == btsr->intel_bts_pmu->type)
> + return perf_evlist__enable_event_idx(btsr->evlist,
> + evsel, idx);
> + }
> + return -EINVAL;
> +}
> +
> +struct auxtrace_record *intel_bts_recording_init(int *err)
> +{
> + struct perf_pmu *intel_bts_pmu = perf_pmu__find(INTEL_BTS_PMU_NAME);
> + struct intel_bts_recording *btsr;
> +
> + if (!intel_bts_pmu)
> + return NULL;
> +
> + btsr = zalloc(sizeof(struct intel_bts_recording));
> + if (!btsr) {
> + *err = -ENOMEM;
> + return NULL;
> + }
> +
> + btsr->intel_bts_pmu = intel_bts_pmu;
> + btsr->itr.recording_options = intel_bts_recording_options;
> + btsr->itr.info_priv_size = intel_bts_info_priv_size;
> + btsr->itr.info_fill = intel_bts_info_fill;
> + btsr->itr.free = intel_bts_recording_free;
> + btsr->itr.snapshot_start = intel_bts_snapshot_start;
> + btsr->itr.snapshot_finish = intel_bts_snapshot_finish;
> + btsr->itr.find_snapshot = intel_bts_find_snapshot;
> + btsr->itr.parse_snapshot_options = intel_bts_parse_snapshot_options;
> + btsr->itr.reference = intel_bts_reference;
> + btsr->itr.read_finish = intel_bts_read_finish;
> + btsr->itr.alignment = sizeof(struct branch);
> + return &btsr->itr;
> +}
> diff --git a/tools/perf/arch/x86/util/pmu.c b/tools/perf/arch/x86/util/pmu.c
> index fd11cc3ce780..79fe07158d00 100644
> --- a/tools/perf/arch/x86/util/pmu.c
> +++ b/tools/perf/arch/x86/util/pmu.c
> @@ -3,6 +3,7 @@
> #include <linux/perf_event.h>
>
> #include "../../util/intel-pt.h"
> +#include "../../util/intel-bts.h"
> #include "../../util/pmu.h"
>
> struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused)
> @@ -10,6 +11,8 @@ struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu __mayb
> #ifdef HAVE_AUXTRACE_SUPPORT
> if (!strcmp(pmu->name, INTEL_PT_PMU_NAME))
> return intel_pt_pmu_default_config(pmu);
> + if (!strcmp(pmu->name, INTEL_BTS_PMU_NAME))
> + pmu->selectable = true;
> #endif
> return NULL;
> }
> diff --git a/tools/perf/util/Build b/tools/perf/util/Build
> index 7f5f2b6aff19..dc29c58cb300 100644
> --- a/tools/perf/util/Build
> +++ b/tools/perf/util/Build
> @@ -78,6 +78,7 @@ libperf-y += thread-stack.o
> libperf-$(CONFIG_AUXTRACE) += auxtrace.o
> libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/
> libperf-$(CONFIG_AUXTRACE) += intel-pt.o
> +libperf-$(CONFIG_AUXTRACE) += intel-bts.o
> libperf-y += parse-branch-options.o
>
> libperf-$(CONFIG_LIBELF) += symbol-elf.o
> diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c
> index 1b3cc297290d..3f3b40fa87c8 100644
> --- a/tools/perf/util/auxtrace.c
> +++ b/tools/perf/util/auxtrace.c
> @@ -48,6 +48,7 @@
> #include "parse-options.h"
>
> #include "intel-pt.h"
> +#include "intel-bts.h"
>
> int auxtrace_mmap__mmap(struct auxtrace_mmap *mm,
> struct auxtrace_mmap_params *mp,
> @@ -888,6 +889,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused,
> switch (type) {
> case PERF_AUXTRACE_INTEL_PT:
> return intel_pt_process_auxtrace_info(event, session);
> + case PERF_AUXTRACE_INTEL_BTS:
> + return intel_bts_process_auxtrace_info(event, session);
> case PERF_AUXTRACE_UNKNOWN:
> default:
> return -EINVAL;
> diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h
> index 7d12f33a3a06..bf72b77a588a 100644
> --- a/tools/perf/util/auxtrace.h
> +++ b/tools/perf/util/auxtrace.h
> @@ -40,6 +40,7 @@ struct events_stats;
> enum auxtrace_type {
> PERF_AUXTRACE_UNKNOWN,
> PERF_AUXTRACE_INTEL_PT,
> + PERF_AUXTRACE_INTEL_BTS,
> };
>
> enum itrace_period_type {
> diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c
> new file mode 100644
> index 000000000000..dce99cfb1309
> --- /dev/null
> +++ b/tools/perf/util/intel-bts.c
> @@ -0,0 +1,933 @@
> +/*
> + * intel-bts.c: Intel Processor Trace support
> + * Copyright (c) 2013-2015, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#include <endian.h>
> +#include <byteswap.h>
> +#include <linux/kernel.h>
> +#include <linux/types.h>
> +#include <linux/bitops.h>
> +#include <linux/log2.h>
> +
> +#include "cpumap.h"
> +#include "color.h"
> +#include "evsel.h"
> +#include "evlist.h"
> +#include "machine.h"
> +#include "session.h"
> +#include "util.h"
> +#include "thread.h"
> +#include "thread-stack.h"
> +#include "debug.h"
> +#include "tsc.h"
> +#include "auxtrace.h"
> +#include "intel-pt-decoder/intel-pt-insn-decoder.h"
> +#include "intel-bts.h"
> +
> +#define MAX_TIMESTAMP (~0ULL)
> +
> +#define INTEL_BTS_ERR_NOINSN 5
> +#define INTEL_BTS_ERR_LOST 9
> +
> +#if __BYTE_ORDER == __BIG_ENDIAN
> +#define le64_to_cpu bswap_64
> +#else
> +#define le64_to_cpu
> +#endif
> +
> +struct intel_bts {
> + struct auxtrace auxtrace;
> + struct auxtrace_queues queues;
> + struct auxtrace_heap heap;
> + u32 auxtrace_type;
> + struct perf_session *session;
> + struct machine *machine;
> + bool sampling_mode;
> + bool snapshot_mode;
> + bool data_queued;
> + u32 pmu_type;
> + struct perf_tsc_conversion tc;
> + bool cap_user_time_zero;
> + struct itrace_synth_opts synth_opts;
> + bool sample_branches;
> + u32 branches_filter;
> + u64 branches_sample_type;
> + u64 branches_id;
> + size_t branches_event_size;
> + bool synth_needs_swap;
> +};
> +
> +struct intel_bts_queue {
> + struct intel_bts *bts;
> + unsigned int queue_nr;
> + struct auxtrace_buffer *buffer;
> + bool on_heap;
> + bool done;
> + pid_t pid;
> + pid_t tid;
> + int cpu;
> + u64 time;
> + struct intel_pt_insn intel_pt_insn;
> + u32 sample_flags;
> +};
> +
> +struct branch {
> + u64 from;
> + u64 to;
> + u64 misc;
> +};
> +
> +static void intel_bts_dump(struct intel_bts *bts __maybe_unused,
> + unsigned char *buf, size_t len)
> +{
> + struct branch *branch;
> + size_t i, pos = 0, br_sz = sizeof(struct branch), sz;
> + const char *color = PERF_COLOR_BLUE;
> +
> + color_fprintf(stdout, color,
> + ". ... Intel BTS data: size %zu bytes\n",
> + len);
> +
> + while (len) {
> + if (len >= br_sz)
> + sz = br_sz;
> + else
> + sz = len;
> + printf(".");
> + color_fprintf(stdout, color, " %08x: ", pos);
> + for (i = 0; i < sz; i++)
> + color_fprintf(stdout, color, " %02x", buf[i]);
> + for (; i < br_sz; i++)
> + color_fprintf(stdout, color, " ");
> + if (len >= br_sz) {
> + branch = (struct branch *)buf;
> + color_fprintf(stdout, color, " %"PRIx64" -> %"PRIx64" %s\n",
> + le64_to_cpu(branch->from),
> + le64_to_cpu(branch->to),
> + le64_to_cpu(branch->misc) & 0x10 ?
> + "pred" : "miss");
> + } else {
> + color_fprintf(stdout, color, " Bad record!\n");
> + }
> + pos += sz;
> + buf += sz;
> + len -= sz;
> + }
> +}
> +
> +static void intel_bts_dump_event(struct intel_bts *bts, unsigned char *buf,
> + size_t len)
> +{
> + printf(".\n");
> + intel_bts_dump(bts, buf, len);
> +}
> +
> +static int intel_bts_lost(struct intel_bts *bts, struct perf_sample *sample)
> +{
> + union perf_event event;
> + int err;
> +
> + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
> + INTEL_BTS_ERR_LOST, sample->cpu, sample->pid,
> + sample->tid, 0, "Lost trace data");
> +
> + err = perf_session__deliver_synth_event(bts->session, &event, NULL);
> + if (err)
> + pr_err("Intel BTS: failed to deliver error event, error %d\n",
> + err);
> +
> + return err;
> +}
> +
> +static struct intel_bts_queue *intel_bts_alloc_queue(struct intel_bts *bts,
> + unsigned int queue_nr)
> +{
> + struct intel_bts_queue *btsq;
> +
> + btsq = zalloc(sizeof(struct intel_bts_queue));
> + if (!btsq)
> + return NULL;
> +
> + btsq->bts = bts;
> + btsq->queue_nr = queue_nr;
> + btsq->pid = -1;
> + btsq->tid = -1;
> + btsq->cpu = -1;
> +
> + return btsq;
> +}
> +
> +static int intel_bts_setup_queue(struct intel_bts *bts,
> + struct auxtrace_queue *queue,
> + unsigned int queue_nr)
> +{
> + struct intel_bts_queue *btsq = queue->priv;
> +
> + if (list_empty(&queue->head))
> + return 0;
> +
> + if (!btsq) {
> + btsq = intel_bts_alloc_queue(bts, queue_nr);
> + if (!btsq)
> + return -ENOMEM;
> + queue->priv = btsq;
> +
> + if (queue->cpu != -1)
> + btsq->cpu = queue->cpu;
> + btsq->tid = queue->tid;
> + }
> +
> + if (bts->sampling_mode)
> + return 0;
> +
> + if (!btsq->on_heap && !btsq->buffer) {
> + int ret;
> +
> + btsq->buffer = auxtrace_buffer__next(queue, NULL);
> + if (!btsq->buffer)
> + return 0;
> +
> + ret = auxtrace_heap__add(&bts->heap, queue_nr,
> + btsq->buffer->reference);
> + if (ret)
> + return ret;
> + btsq->on_heap = true;
> + }
> +
> + return 0;
> +}
> +
> +static int intel_bts_setup_queues(struct intel_bts *bts)
> +{
> + unsigned int i;
> + int ret;
> +
> + for (i = 0; i < bts->queues.nr_queues; i++) {
> + ret = intel_bts_setup_queue(bts, &bts->queues.queue_array[i],
> + i);
> + if (ret)
> + return ret;
> + }
> + return 0;
> +}
> +
> +static inline int intel_bts_update_queues(struct intel_bts *bts)
> +{
> + if (bts->queues.new_data) {
> + bts->queues.new_data = false;
> + return intel_bts_setup_queues(bts);
> + }
> + return 0;
> +}
> +
> +static unsigned char *intel_bts_find_overlap(unsigned char *buf_a, size_t len_a,
> + unsigned char *buf_b, size_t len_b)
> +{
> + size_t offs, len;
> +
> + if (len_a > len_b)
> + offs = len_a - len_b;
> + else
> + offs = 0;
> +
> + for (; offs < len_a; offs += sizeof(struct branch)) {
> + len = len_a - offs;
> + if (!memcmp(buf_a + offs, buf_b, len))
> + return buf_b + len;
> + }
> +
> + return buf_b;
> +}
> +
> +static int intel_bts_do_fix_overlap(struct auxtrace_queue *queue,
> + struct auxtrace_buffer *b)
> +{
> + struct auxtrace_buffer *a;
> + void *start;
> +
> + if (b->list.prev == &queue->head)
> + return 0;
> + a = list_entry(b->list.prev, struct auxtrace_buffer, list);
> + start = intel_bts_find_overlap(a->data, a->size, b->data, b->size);
> + if (!start)
> + return -EINVAL;
> + b->use_size = b->data + b->size - start;
> + b->use_data = start;
> + return 0;
> +}
> +
> +static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq,
> + struct branch *branch)
> +{
> + int ret;
> + struct intel_bts *bts = btsq->bts;
> + union perf_event event;
> + struct perf_sample sample = { .ip = 0, };
> +
> + event.sample.header.type = PERF_RECORD_SAMPLE;
> + event.sample.header.misc = PERF_RECORD_MISC_USER;
> + event.sample.header.size = sizeof(struct perf_event_header);
> +
> + sample.ip = le64_to_cpu(branch->from);
> + sample.pid = btsq->pid;
> + sample.tid = btsq->tid;
> + sample.addr = le64_to_cpu(branch->to);
> + sample.id = btsq->bts->branches_id;
> + sample.stream_id = btsq->bts->branches_id;
> + sample.period = 1;
> + sample.cpu = btsq->cpu;
> + sample.flags = btsq->sample_flags;
> + sample.insn_len = btsq->intel_pt_insn.length;
> +
> + if (bts->synth_opts.inject) {
> + event.sample.header.size = bts->branches_event_size;
> + ret = perf_event__synthesize_sample(&event,
> + bts->branches_sample_type,
> + 0, &sample,
> + bts->synth_needs_swap);
> + if (ret)
> + return ret;
> + }
> +
> + ret = perf_session__deliver_synth_event(bts->session, &event, &sample);
> + if (ret)
> + pr_err("Intel BTS: failed to deliver branch event, error %d\n",
> + ret);
> +
> + return ret;
> +}
> +
> +static int intel_bts_get_next_insn(struct intel_bts_queue *btsq, u64 ip)
> +{
> + struct machine *machine = btsq->bts->machine;
> + struct thread *thread;
> + struct addr_location al;
> + unsigned char buf[1024];
> + size_t bufsz;
> + ssize_t len;
> + int x86_64;
> + uint8_t cpumode;
> + int err = -1;
> +
> + bufsz = intel_pt_insn_max_size();
> +
> + if (machine__kernel_ip(machine, ip))
> + cpumode = PERF_RECORD_MISC_KERNEL;
> + else
> + cpumode = PERF_RECORD_MISC_USER;
> +
> + thread = machine__find_thread(machine, -1, btsq->tid);
> + if (!thread)
> + return -1;
> +
> + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, ip, &al);
> + if (!al.map || !al.map->dso)
> + goto out_put;
> +
> + len = dso__data_read_addr(al.map->dso, al.map, machine, ip, buf, bufsz);
> + if (len <= 0)
> + goto out_put;
> +
> + /* Load maps to ensure dso->is_64_bit has been updated */
> + map__load(al.map, machine->symbol_filter);
> +
> + x86_64 = al.map->dso->is_64_bit;
> +
> + if (intel_pt_get_insn(buf, len, x86_64, &btsq->intel_pt_insn))
> + goto out_put;
> +
> + err = 0;
> +out_put:
> + thread__put(thread);
> + return err;
> +}
> +
> +static int intel_bts_synth_error(struct intel_bts *bts, int cpu, pid_t pid,
> + pid_t tid, u64 ip)
> +{
> + union perf_event event;
> + int err;
> +
> + auxtrace_synth_error(&event.auxtrace_error, PERF_AUXTRACE_ERROR_ITRACE,
> + INTEL_BTS_ERR_NOINSN, cpu, pid, tid, ip,
> + "Failed to get instruction");
> +
> + err = perf_session__deliver_synth_event(bts->session, &event, NULL);
> + if (err)
> + pr_err("Intel BTS: failed to deliver error event, error %d\n",
> + err);
> +
> + return err;
> +}
> +
> +static int intel_bts_get_branch_type(struct intel_bts_queue *btsq,
> + struct branch *branch)
> +{
> + int err;
> +
> + if (!branch->from) {
> + if (branch->to)
> + btsq->sample_flags = PERF_IP_FLAG_BRANCH |
> + PERF_IP_FLAG_TRACE_BEGIN;
> + else
> + btsq->sample_flags = 0;
> + btsq->intel_pt_insn.length = 0;
> + } else if (!branch->to) {
> + btsq->sample_flags = PERF_IP_FLAG_BRANCH |
> + PERF_IP_FLAG_TRACE_END;
> + btsq->intel_pt_insn.length = 0;
> + } else {
> + err = intel_bts_get_next_insn(btsq, branch->from);
> + if (err) {
> + btsq->sample_flags = 0;
> + btsq->intel_pt_insn.length = 0;
> + if (!btsq->bts->synth_opts.errors)
> + return 0;
> + err = intel_bts_synth_error(btsq->bts, btsq->cpu,
> + btsq->pid, btsq->tid,
> + branch->from);
> + return err;
> + }
> + btsq->sample_flags = intel_pt_insn_type(btsq->intel_pt_insn.op);
> + /* Check for an async branch into the kernel */
> + if (!machine__kernel_ip(btsq->bts->machine, branch->from) &&
> + machine__kernel_ip(btsq->bts->machine, branch->to) &&
> + btsq->sample_flags != (PERF_IP_FLAG_BRANCH |
> + PERF_IP_FLAG_CALL |
> + PERF_IP_FLAG_SYSCALLRET))
> + btsq->sample_flags = PERF_IP_FLAG_BRANCH |
> + PERF_IP_FLAG_CALL |
> + PERF_IP_FLAG_ASYNC |
> + PERF_IP_FLAG_INTERRUPT;
> + }
> +
> + return 0;
> +}
> +
> +static int intel_bts_process_buffer(struct intel_bts_queue *btsq,
> + struct auxtrace_buffer *buffer)
> +{
> + struct branch *branch;
> + size_t sz, bsz = sizeof(struct branch);
> + u32 filter = btsq->bts->branches_filter;
> + int err = 0;
> +
> + if (buffer->use_data) {
> + sz = buffer->use_size;
> + branch = buffer->use_data;
> + } else {
> + sz = buffer->size;
> + branch = buffer->data;
> + }
> +
> + if (!btsq->bts->sample_branches)
> + return 0;
> +
> + for (; sz > bsz; branch += 1, sz -= bsz) {
> + if (!branch->from && !branch->to)
> + continue;
> + intel_bts_get_branch_type(btsq, branch);
> + if (filter && !(filter & btsq->sample_flags))
> + continue;
> + err = intel_bts_synth_branch_sample(btsq, branch);
> + if (err)
> + break;
> + }
> + return err;
> +}
> +
> +static int intel_bts_process_queue(struct intel_bts_queue *btsq, u64 *timestamp)
> +{
> + struct auxtrace_buffer *buffer = btsq->buffer, *old_buffer = buffer;
> + struct auxtrace_queue *queue;
> + struct thread *thread;
> + int err;
> +
> + if (btsq->done)
> + return 1;
> +
> + if (btsq->pid == -1) {
> + thread = machine__find_thread(btsq->bts->machine, -1,
> + btsq->tid);
> + if (thread)
> + btsq->pid = thread->pid_;
> + } else {
> + thread = machine__findnew_thread(btsq->bts->machine, btsq->pid,
> + btsq->tid);
> + }
> +
> + queue = &btsq->bts->queues.queue_array[btsq->queue_nr];
> +
> + if (!buffer)
> + buffer = auxtrace_buffer__next(queue, NULL);
> +
> + if (!buffer) {
> + if (!btsq->bts->sampling_mode)
> + btsq->done = 1;
> + err = 1;
> + goto out_put;
> + }
> +
> + /* Currently there is no support for split buffers */
> + if (buffer->consecutive) {
> + err = -EINVAL;
> + goto out_put;
> + }
> +
> + if (!buffer->data) {
> + int fd = perf_data_file__fd(btsq->bts->session->file);
> +
> + buffer->data = auxtrace_buffer__get_data(buffer, fd);
> + if (!buffer->data) {
> + err = -ENOMEM;
> + goto out_put;
> + }
> + }
> +
> + if (btsq->bts->snapshot_mode && !buffer->consecutive &&
> + intel_bts_do_fix_overlap(queue, buffer)) {
> + err = -ENOMEM;
> + goto out_put;
> + }
> +
> + if (!btsq->bts->synth_opts.callchain && thread &&
> + (!old_buffer || btsq->bts->sampling_mode ||
> + (btsq->bts->snapshot_mode && !buffer->consecutive)))
> + thread_stack__set_trace_nr(thread, buffer->buffer_nr + 1);
> +
> + err = intel_bts_process_buffer(btsq, buffer);
> +
> + auxtrace_buffer__drop_data(buffer);
> +
> + btsq->buffer = auxtrace_buffer__next(queue, buffer);
> + if (btsq->buffer) {
> + if (timestamp)
> + *timestamp = btsq->buffer->reference;
> + } else {
> + if (!btsq->bts->sampling_mode)
> + btsq->done = 1;
> + }
> +out_put:
> + thread__put(thread);
> + return err;
> +}
> +
> +static int intel_bts_flush_queue(struct intel_bts_queue *btsq)
> +{
> + u64 ts = 0;
> + int ret;
> +
> + while (1) {
> + ret = intel_bts_process_queue(btsq, &ts);
> + if (ret < 0)
> + return ret;
> + if (ret)
> + break;
> + }
> + return 0;
> +}
> +
> +static int intel_bts_process_tid_exit(struct intel_bts *bts, pid_t tid)
> +{
> + struct auxtrace_queues *queues = &bts->queues;
> + unsigned int i;
> +
> + for (i = 0; i < queues->nr_queues; i++) {
> + struct auxtrace_queue *queue = &bts->queues.queue_array[i];
> + struct intel_bts_queue *btsq = queue->priv;
> +
> + if (btsq && btsq->tid == tid)
> + return intel_bts_flush_queue(btsq);
> + }
> + return 0;
> +}
> +
> +static int intel_bts_process_queues(struct intel_bts *bts, u64 timestamp)
> +{
> + while (1) {
> + unsigned int queue_nr;
> + struct auxtrace_queue *queue;
> + struct intel_bts_queue *btsq;
> + u64 ts = 0;
> + int ret;
> +
> + if (!bts->heap.heap_cnt)
> + return 0;
> +
> + if (bts->heap.heap_array[0].ordinal > timestamp)
> + return 0;
> +
> + queue_nr = bts->heap.heap_array[0].queue_nr;
> + queue = &bts->queues.queue_array[queue_nr];
> + btsq = queue->priv;
> +
> + auxtrace_heap__pop(&bts->heap);
> +
> + ret = intel_bts_process_queue(btsq, &ts);
> + if (ret < 0) {
> + auxtrace_heap__add(&bts->heap, queue_nr, ts);
> + return ret;
> + }
> +
> + if (!ret) {
> + ret = auxtrace_heap__add(&bts->heap, queue_nr, ts);
> + if (ret < 0)
> + return ret;
> + } else {
> + btsq->on_heap = false;
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int intel_bts_process_event(struct perf_session *session,
> + union perf_event *event,
> + struct perf_sample *sample,
> + struct perf_tool *tool)
> +{
> + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
> + auxtrace);
> + u64 timestamp;
> + int err;
> +
> + if (dump_trace)
> + return 0;
> +
> + if (!tool->ordered_events) {
> + pr_err("Intel BTS requires ordered events\n");
> + return -EINVAL;
> + }
> +
> + if (sample->time)
> + timestamp = perf_time_to_tsc(sample->time, &bts->tc);
> + else
> + timestamp = 0;
> +
> + err = intel_bts_update_queues(bts);
> + if (err)
> + return err;
> +
> + err = intel_bts_process_queues(bts, timestamp);
> + if (err)
> + return err;
> + if (event->header.type == PERF_RECORD_EXIT) {
> + err = intel_bts_process_tid_exit(bts, event->comm.tid);
> + if (err)
> + return err;
> + }
> +
> + if (event->header.type == PERF_RECORD_AUX &&
> + (event->aux.flags & PERF_AUX_FLAG_TRUNCATED) &&
> + bts->synth_opts.errors)
> + err = intel_bts_lost(bts, sample);
> +
> + return err;
> +}
> +
> +static int intel_bts_process_auxtrace_event(struct perf_session *session,
> + union perf_event *event,
> + struct perf_tool *tool __maybe_unused)
> +{
> + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
> + auxtrace);
> +
> + if (bts->sampling_mode)
> + return 0;
> +
> + if (!bts->data_queued) {
> + struct auxtrace_buffer *buffer;
> + off_t data_offset;
> + int fd = perf_data_file__fd(session->file);
> + int err;
> +
> + if (perf_data_file__is_pipe(session->file)) {
> + data_offset = 0;
> + } else {
> + data_offset = lseek(fd, 0, SEEK_CUR);
> + if (data_offset == -1)
> + return -errno;
> + }
> +
> + err = auxtrace_queues__add_event(&bts->queues, session, event,
> + data_offset, &buffer);
> + if (err)
> + return err;
> +
> + /* Dump here now we have copied a piped trace out of the pipe */
> + if (dump_trace) {
> + if (auxtrace_buffer__get_data(buffer, fd)) {
> + intel_bts_dump_event(bts, buffer->data,
> + buffer->size);
> + auxtrace_buffer__put_data(buffer);
> + }
> + }
> + }
> +
> + return 0;
> +}
> +
> +static int intel_bts_flush(struct perf_session *session __maybe_unused,
> + struct perf_tool *tool __maybe_unused)
> +{
> + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
> + auxtrace);
> + int ret;
> +
> + if (dump_trace || bts->sampling_mode)
> + return 0;
> +
> + if (!tool->ordered_events)
> + return -EINVAL;
> +
> + ret = intel_bts_update_queues(bts);
> + if (ret < 0)
> + return ret;
> +
> + return intel_bts_process_queues(bts, MAX_TIMESTAMP);
> +}
> +
> +static void intel_bts_free_queue(void *priv)
> +{
> + struct intel_bts_queue *btsq = priv;
> +
> + if (!btsq)
> + return;
> + free(btsq);
> +}
> +
> +static void intel_bts_free_events(struct perf_session *session)
> +{
> + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
> + auxtrace);
> + struct auxtrace_queues *queues = &bts->queues;
> + unsigned int i;
> +
> + for (i = 0; i < queues->nr_queues; i++) {
> + intel_bts_free_queue(queues->queue_array[i].priv);
> + queues->queue_array[i].priv = NULL;
> + }
> + auxtrace_queues__free(queues);
> +}
> +
> +static void intel_bts_free(struct perf_session *session)
> +{
> + struct intel_bts *bts = container_of(session->auxtrace, struct intel_bts,
> + auxtrace);
> +
> + auxtrace_heap__free(&bts->heap);
> + intel_bts_free_events(session);
> + session->auxtrace = NULL;
> + free(bts);
> +}
> +
> +struct intel_bts_synth {
> + struct perf_tool dummy_tool;
> + struct perf_session *session;
> +};
> +
> +static int intel_bts_event_synth(struct perf_tool *tool,
> + union perf_event *event,
> + struct perf_sample *sample __maybe_unused,
> + struct machine *machine __maybe_unused)
> +{
> + struct intel_bts_synth *intel_bts_synth =
> + container_of(tool, struct intel_bts_synth, dummy_tool);
> +
> + return perf_session__deliver_synth_event(intel_bts_synth->session,
> + event, NULL);
> +}
> +
> +static int intel_bts_synth_event(struct perf_session *session,
> + struct perf_event_attr *attr, u64 id)
> +{
> + struct intel_bts_synth intel_bts_synth;
> +
> + memset(&intel_bts_synth, 0, sizeof(struct intel_bts_synth));
> + intel_bts_synth.session = session;
> +
> + return perf_event__synthesize_attr(&intel_bts_synth.dummy_tool, attr, 1,
> + &id, intel_bts_event_synth);
> +}
> +
> +static int intel_bts_synth_events(struct intel_bts *bts,
> + struct perf_session *session)
> +{
> + struct perf_evlist *evlist = session->evlist;
> + struct perf_evsel *evsel;
> + struct perf_event_attr attr;
> + bool found = false;
> + u64 id;
> + int err;
> +
> + evlist__for_each(evlist, evsel) {
> + if (evsel->attr.type == bts->pmu_type && evsel->ids) {
> + found = true;
> + break;
> + }
> + }
> +
> + if (!found) {
> + pr_debug("There are no selected events with Intel BTS data\n");
> + return 0;
> + }
> +
> + memset(&attr, 0, sizeof(struct perf_event_attr));
> + attr.size = sizeof(struct perf_event_attr);
> + attr.type = PERF_TYPE_HARDWARE;
> + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK;
> + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID |
> + PERF_SAMPLE_PERIOD;
> + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME;
> + attr.sample_type &= ~(u64)PERF_SAMPLE_CPU;
> + attr.exclude_user = evsel->attr.exclude_user;
> + attr.exclude_kernel = evsel->attr.exclude_kernel;
> + attr.exclude_hv = evsel->attr.exclude_hv;
> + attr.exclude_host = evsel->attr.exclude_host;
> + attr.exclude_guest = evsel->attr.exclude_guest;
> + attr.sample_id_all = evsel->attr.sample_id_all;
> + attr.read_format = evsel->attr.read_format;
> +
> + id = evsel->id[0] + 1000000000;
> + if (!id)
> + id = 1;
> +
> + if (bts->synth_opts.branches) {
> + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS;
> + attr.sample_period = 1;
> + attr.sample_type |= PERF_SAMPLE_ADDR;
> + pr_debug("Synthesizing 'branches' event with id %" PRIu64 " sample type %#" PRIx64 "\n",
> + id, (u64)attr.sample_type);
> + err = intel_bts_synth_event(session, &attr, id);
> + if (err) {
> + pr_err("%s: failed to synthesize 'branches' event type\n",
> + __func__);
> + return err;
> + }
> + bts->sample_branches = true;
> + bts->branches_sample_type = attr.sample_type;
> + bts->branches_id = id;
> + /*
> + * We only use sample types from PERF_SAMPLE_MASK so we can use
> + * __perf_evsel__sample_size() here.
> + */
> + bts->branches_event_size = sizeof(struct sample_event) +
> + __perf_evsel__sample_size(attr.sample_type);
> + }
> +
> + bts->synth_needs_swap = evsel->needs_swap;
> +
> + return 0;
> +}
> +
> +static const char * const intel_bts_info_fmts[] = {
> + [INTEL_BTS_PMU_TYPE] = " PMU Type %"PRId64"\n",
> + [INTEL_BTS_TIME_SHIFT] = " Time Shift %"PRIu64"\n",
> + [INTEL_BTS_TIME_MULT] = " Time Muliplier %"PRIu64"\n",
> + [INTEL_BTS_TIME_ZERO] = " Time Zero %"PRIu64"\n",
> + [INTEL_BTS_CAP_USER_TIME_ZERO] = " Cap Time Zero %"PRId64"\n",
> + [INTEL_BTS_SNAPSHOT_MODE] = " Snapshot mode %"PRId64"\n",
> +};
> +
> +static void intel_bts_print_info(u64 *arr, int start, int finish)
> +{
> + int i;
> +
> + if (!dump_trace)
> + return;
> +
> + for (i = start; i <= finish; i++)
> + fprintf(stdout, intel_bts_info_fmts[i], arr[i]);
> +}
> +
> +u64 intel_bts_auxtrace_info_priv[INTEL_BTS_AUXTRACE_PRIV_SIZE];
> +
> +int intel_bts_process_auxtrace_info(union perf_event *event,
> + struct perf_session *session)
> +{
> + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info;
> + size_t min_sz = sizeof(u64) * INTEL_BTS_SNAPSHOT_MODE;
> + struct intel_bts *bts;
> + int err;
> +
> + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) +
> + min_sz)
> + return -EINVAL;
> +
> + bts = zalloc(sizeof(struct intel_bts));
> + if (!bts)
> + return -ENOMEM;
> +
> + err = auxtrace_queues__init(&bts->queues);
> + if (err)
> + goto err_free;
> +
> + bts->session = session;
> + bts->machine = &session->machines.host; /* No kvm support */
> + bts->auxtrace_type = auxtrace_info->type;
> + bts->pmu_type = auxtrace_info->priv[INTEL_BTS_PMU_TYPE];
> + bts->tc.time_shift = auxtrace_info->priv[INTEL_BTS_TIME_SHIFT];
> + bts->tc.time_mult = auxtrace_info->priv[INTEL_BTS_TIME_MULT];
> + bts->tc.time_zero = auxtrace_info->priv[INTEL_BTS_TIME_ZERO];
> + bts->cap_user_time_zero =
> + auxtrace_info->priv[INTEL_BTS_CAP_USER_TIME_ZERO];
> + bts->snapshot_mode = auxtrace_info->priv[INTEL_BTS_SNAPSHOT_MODE];
> +
> + bts->sampling_mode = false;
> +
> + bts->auxtrace.process_event = intel_bts_process_event;
> + bts->auxtrace.process_auxtrace_event = intel_bts_process_auxtrace_event;
> + bts->auxtrace.flush_events = intel_bts_flush;
> + bts->auxtrace.free_events = intel_bts_free_events;
> + bts->auxtrace.free = intel_bts_free;
> + session->auxtrace = &bts->auxtrace;
> +
> + intel_bts_print_info(&auxtrace_info->priv[0], INTEL_BTS_PMU_TYPE,
> + INTEL_BTS_SNAPSHOT_MODE);
> +
> + if (dump_trace)
> + return 0;
> +
> + if (session->itrace_synth_opts && session->itrace_synth_opts->set)
> + bts->synth_opts = *session->itrace_synth_opts;
> + else
> + itrace_synth_opts__set_default(&bts->synth_opts);
> +
> + if (bts->synth_opts.calls)
> + bts->branches_filter |= PERF_IP_FLAG_CALL | PERF_IP_FLAG_ASYNC |
> + PERF_IP_FLAG_TRACE_END;
> + if (bts->synth_opts.returns)
> + bts->branches_filter |= PERF_IP_FLAG_RETURN |
> + PERF_IP_FLAG_TRACE_BEGIN;
> +
> + err = intel_bts_synth_events(bts, session);
> + if (err)
> + goto err_free_queues;
> +
> + err = auxtrace_queues__process_index(&bts->queues, session);
> + if (err)
> + goto err_free_queues;
> +
> + if (bts->queues.populated)
> + bts->data_queued = true;
> +
> + return 0;
> +
> +err_free_queues:
> + auxtrace_queues__free(&bts->queues);
> + session->auxtrace = NULL;
> +err_free:
> + free(bts);
> + return err;
> +}
> diff --git a/tools/perf/util/intel-bts.h b/tools/perf/util/intel-bts.h
> new file mode 100644
> index 000000000000..ca65e21b3e83
> --- /dev/null
> +++ b/tools/perf/util/intel-bts.h
> @@ -0,0 +1,43 @@
> +/*
> + * intel-bts.h: Intel Processor Trace support
> + * Copyright (c) 2013-2014, Intel Corporation.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms and conditions of the GNU General Public License,
> + * version 2, as published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope it will be useful, but WITHOUT
> + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
> + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
> + * more details.
> + *
> + */
> +
> +#ifndef INCLUDE__PERF_INTEL_BTS_H__
> +#define INCLUDE__PERF_INTEL_BTS_H__
> +
> +#define INTEL_BTS_PMU_NAME "intel_bts"
> +
> +enum {
> + INTEL_BTS_PMU_TYPE,
> + INTEL_BTS_TIME_SHIFT,
> + INTEL_BTS_TIME_MULT,
> + INTEL_BTS_TIME_ZERO,
> + INTEL_BTS_CAP_USER_TIME_ZERO,
> + INTEL_BTS_SNAPSHOT_MODE,
> + INTEL_BTS_AUXTRACE_PRIV_MAX,
> +};
> +
> +#define INTEL_BTS_AUXTRACE_PRIV_SIZE (INTEL_BTS_AUXTRACE_PRIV_MAX * sizeof(u64))
> +
> +struct auxtrace_record;
> +struct perf_tool;
> +union perf_event;
> +struct perf_session;
> +
> +struct auxtrace_record *intel_bts_recording_init(int *err);
> +
> +int intel_bts_process_auxtrace_info(union perf_event *event,
> + struct perf_session *session);
> +
> +#endif
> diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c
> index b5ea26bc290d..52d569cda606 100644
> --- a/tools/perf/util/pmu.c
> +++ b/tools/perf/util/pmu.c
> @@ -462,10 +462,6 @@ static struct perf_pmu *pmu_lookup(const char *name)
> LIST_HEAD(aliases);
> __u32 type;
>
> - /* No support for intel_bts so disallow it */
> - if (!strcmp(name, "intel_bts"))
> - return NULL;
> -
> /*
> * The pmu data we store & need consists of the pmu
> * type value and format definitions. Load both right
> --
> 1.9.1
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/