From cc40bf9273f68814be59a3c2f908d9b3e34f4f22 Mon Sep 17 00:00:00 2001 From: Luming Yu Date: Fri, 29 Sep 2017 21:50:11 +0800 Subject: [PATCH v1 1/9] early pt: Basic support for early intel processor trace with zero dependencies on other technologies in linux kernel, 1.Per cpu dump for basic block level code analysis 2.I can trace any code including myself right after it's enabled Signed-off-by: Luming Yu --- arch/x86/events/Kconfig | 6 + arch/x86/events/intel/Makefile | 1 + arch/x86/events/intel/early_pt.c | 337 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 344 insertions(+) create mode 100644 arch/x86/events/intel/early_pt.c diff --git a/arch/x86/events/Kconfig b/arch/x86/events/Kconfig index 98397db..4205918 100644 --- a/arch/x86/events/Kconfig +++ b/arch/x86/events/Kconfig @@ -32,5 +32,11 @@ config PERF_EVENTS_AMD_POWER Currently, it leverages X86_FEATURE_ACC_POWER (CPUID Fn8000_0007_EDX[12]) interface to calculate the average power consumption on Family 15h processors. +config EARLY_PT + tristate "Intel early PT" + depends on CPU_SUP_INTEL + default n + ---help--- + Early PT support. endmenu diff --git a/arch/x86/events/intel/Makefile b/arch/x86/events/intel/Makefile index e9d8520..d04d8ec 100644 --- a/arch/x86/events/intel/Makefile +++ b/arch/x86/events/intel/Makefile @@ -1,6 +1,7 @@ obj-$(CONFIG_CPU_SUP_INTEL) += core.o bts.o obj-$(CONFIG_CPU_SUP_INTEL) += ds.o knc.o obj-$(CONFIG_CPU_SUP_INTEL) += lbr.o p4.o p6.o pt.o +obj-$(CONFIG_EARLY_PT) += early_pt.o obj-$(CONFIG_PERF_EVENTS_INTEL_RAPL) += intel-rapl-perf.o intel-rapl-perf-objs := rapl.o obj-$(CONFIG_PERF_EVENTS_INTEL_UNCORE) += intel-uncore.o diff --git a/arch/x86/events/intel/early_pt.c b/arch/x86/events/intel/early_pt.c new file mode 100644 index 0000000..67513ed --- /dev/null +++ b/arch/x86/events/intel/early_pt.c @@ -0,0 +1,337 @@ +/*I can trace myself !*/ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../perf_event.h" +#include "pt.h" + +/* ioctl command */ +#define PT_SET_CPU 9901 +#define PT_GET_SIZE 9902 +#define PT_GET_OFFSET 9903 +#define PT_STOP 9904 +#define PT_START 9905 + +#define PT_ERROR BIT_ULL(4) +#define MTC_MASK (0xf << 14) +#define CYC_MASK (0xf << 19) +#define PSB_MASK (0xf << 24) + +#define ADDR0_SHIFT 32 +#define ADDR1_SHIFT 36 +#define ADDR0_MASK (0x1ULL << ADDR0_SHIFT) +#define ADDR1_MASK (0x1ULL << ADDR1_SHIFT) + +#define MSR_IA32_CR3_MATCH 0x00000572 + +#define CYC_EN BIT_ULL(1) +#define MTC_EN BIT_ULL(9) + +#define TOPA_STOP BIT_ULL(4) +#define TOPA_INT BIT_ULL(2) +#define TOPA_END BIT_ULL(0) +#define TOPA_SIZE_SHIFT 6 + +static int early_pt_buffer_order = 9; +static bool single_range = true; + +static DEFINE_PER_CPU(bool, early_pt_running); +static DEFINE_PER_CPU(u64, pt_offset); +static DEFINE_PER_CPU(u64, pt_buffer_cpu); + +static bool has_cr3_match; +static unsigned addr_cfg_max; +static int pt_num_buffers; +static unsigned psb_freq_mask = 0; +static unsigned cyc_thresh_mask = 0; +static unsigned mtc_freq_mask = 0; +static unsigned addr_range_num = 0; + +static int early_pt_buffer_init(int cpu) +{ + u64 pt_buffer; + + pt_buffer = per_cpu(pt_buffer_cpu, cpu); + + if (!pt_buffer) { + pt_buffer = __get_free_pages(GFP_KERNEL | + __GFP_NOWARN | __GFP_ZERO, early_pt_buffer_order); + if (!pt_buffer) { + pr_err("cpu %d, cannot allocate %ld KB buffer\n", cpu, + (PAGE_SIZE << early_pt_buffer_order) / 1024); + return -ENOMEM; + } + per_cpu(pt_buffer_cpu, cpu) = pt_buffer; + } else + memset((void *)pt_buffer, 0, PAGE_SIZE << early_pt_buffer_order); + + return 0; +} + +static void early_pt_buffer_exit(void *arg) +{ + long cpu = (long) arg; + + if (per_cpu(pt_buffer_cpu, cpu)) { + free_pages(per_cpu(pt_buffer_cpu, cpu), early_pt_buffer_order); + per_cpu(pt_buffer_cpu, cpu) = 0; + } + + return; +} + +/* + * cpuid Intel PT detection and Caps enumeration. + */ +static int early_pt_cpuid_caps(void) +{ + unsigned a, b, c, d; + unsigned a1, b1, c1, d1; + + /* + * cpuid func: 0x14 for PT + */ + cpuid(0, &a, &b, &c, &d); + if (a < 0x14) { + pr_info("No cpuid func 0x14 (for PT) available\n"); + return -ENODEV; + } + cpuid_count(0x07, 0, &a, &b, &c, &d); + if ((b & BIT(25)) == 0) { + pr_info("No PT available\n"); + return -ENODEV; + } + cpuid_count(0x14, 0, &a, &b, &c, &d); + if ((c & BIT(0)) == 0) { + pr_info("No ToPA available\n"); + return -ENODEV; + } + has_cr3_match = !!(b & BIT(0)); + if (b & BIT(2)) + addr_cfg_max = 2; + if (!(c & BIT(1))) + pt_num_buffers = 1; + pt_num_buffers = min_t(unsigned, pt_num_buffers, + (PAGE_SIZE / 8) - 1); + a1 = b1 = c1 = d1 = 0; + if (a >= 1) + cpuid_count(0x14, 1, &a1, &b1, &c1, &d1); + if (b & BIT(1)) { + mtc_freq_mask = (a1 >> 16) & 0xffff; + cyc_thresh_mask = b1 & 0xffff; + psb_freq_mask = (b1 >> 16) & 0xffff; + addr_range_num = a1 & 0x3; + } + return 0; +} +static int start_early_pt(void *arg) +{ + u64 val, oldval; + int cpu; + + + if (rdmsrl_safe(MSR_IA32_RTIT_CTL, &val) < 0) { + pr_info("start_early_pt: failed\n"); + return -1; + } + oldval = val; + + cpu = raw_smp_processor_id(); + early_pt_buffer_init(cpu); + + /* + * Q: How should we handle PT is already enabled? + * A: disable and re-enable. + */ + if (val & RTIT_CTL_TRACEEN) + wrmsrl_safe(MSR_IA32_RTIT_CTL, val & ~RTIT_CTL_TRACEEN); + + if (wrmsrl_safe(MSR_IA32_RTIT_OUTPUT_BASE, __pa(__this_cpu_read(pt_buffer_cpu))) < 0) { + pr_info(" wrmsrl output base failed \n"); + return 0; + } + + if (single_range) + wrmsrl_safe(MSR_IA32_RTIT_OUTPUT_MASK, + ((1ULL << (PAGE_SHIFT + early_pt_buffer_order)) - 1)); + else + wrmsrl_safe(MSR_IA32_RTIT_OUTPUT_MASK, 0ULL); + wrmsrl_safe(MSR_IA32_RTIT_STATUS, 0ULL); + + val &= ~(RTIT_CTL_TSC_EN | RTIT_CTL_OS | RTIT_CTL_USR | RTIT_CTL_CR3EN | + RTIT_CTL_DISRETC | RTIT_CTL_TOPA | RTIT_CTL_CYCLEACC | + RTIT_CTL_TRACEEN | RTIT_CTL_BRANCH_EN | RTIT_CTL_CYCLEACC + | RTIT_CTL_MTC_EN | MTC_MASK | CYC_MASK | PSB_MASK | ADDR0_MASK | + ADDR1_MASK); + + /* enale trace */ + val |= RTIT_CTL_TRACEEN; + val |= RTIT_CTL_BRANCH_EN; + val |= RTIT_CTL_TSC_EN; + val |= RTIT_CTL_OS; + val |= RTIT_CTL_USR; + + if (wrmsrl_safe(MSR_IA32_RTIT_CTL, val) < 0) { + pr_info("early_pt start failed on cpu[%d]\n", cpu); + __this_cpu_write(early_pt_running, false); + return -1; + } + + pr_info("early_pt started on cpu[%d]\n", cpu); + __this_cpu_write(early_pt_running, true); + return 0; +} + +static void start_pt_no_return(void *arg) +{ + start_early_pt(arg); + return; +} + +static struct miscdevice early_pt_miscdev; + +static int early_pt_init(void) +{ + int err; + + err = early_pt_cpuid_caps(); + if (err < 0) { + pr_info("early_pt_init: no feature available\n"); + return err; + } + on_each_cpu(start_pt_no_return, NULL, 0); + return 0; +} +static int late_pt_init(void) +{ + int err; + unsigned int i; + + for_each_online_cpu(i) { + if (!per_cpu(early_pt_running, i)) { + pr_err("late_pt_init: failed, early pt not running\n"); + return -ENODEV; + } + } + + err = misc_register(&early_pt_miscdev); + if (err < 0) { + pr_err("misc_register early_pt_miscdev failed\n"); + return err; + } + return 0; +} +static int stop_early_pt(void *arg) +{ + u64 offset; + u64 ctl, status; + int cpu; + + cpu = raw_smp_processor_id(); + + if (!__this_cpu_read(early_pt_running)) + return -1; + rdmsrl_safe(MSR_IA32_RTIT_CTL, &ctl); + rdmsrl_safe(MSR_IA32_RTIT_STATUS, &status); + if (!(ctl & RTIT_CTL_TRACEEN)) + pr_debug("cpu %d, PT not enabled on stop, ctl %llx, status %llx\n", + raw_smp_processor_id(), ctl, status); + if (status & PT_ERROR) { + pr_info("cpu %d, error: status %llx\n", cpu, status); + wrmsrl_safe(MSR_IA32_RTIT_STATUS, 0); + } + wrmsrl_safe(MSR_IA32_RTIT_CTL, 0LL); + rdmsrl_safe(MSR_IA32_RTIT_OUTPUT_MASK, &offset); + + __this_cpu_write(pt_offset, (offset >> 32)); + __this_cpu_write(early_pt_running, false); + pr_info("early_pt stopped on cpu[%d]\n", cpu); + return 0; +} +static void stop_pt_no_return(void *arg) +{ + stop_early_pt(arg); + return; +} +static void early_pt_exit(void) +{ + on_each_cpu(stop_pt_no_return, NULL, 0); + on_each_cpu(early_pt_buffer_exit, NULL, 0); +} +core_initcall(early_pt_init); +late_initcall(late_pt_init); + +static int early_pt_mmap(struct file *file, struct vm_area_struct *vma) +{ + unsigned long len = vma->vm_end - vma->vm_start; + int cpu = (long) file->private_data; + unsigned long buffer_size = PAGE_SIZE << early_pt_buffer_order; + + vma->vm_flags &= ~VM_MAYWRITE; + + if (len % PAGE_SIZE || vma->vm_pgoff) + return -EINVAL; + + if (vma->vm_flags & VM_WRITE) + return -EPERM; + + return remap_pfn_range(vma, vma->vm_start, + __pa(per_cpu(pt_buffer_cpu, cpu)) >> PAGE_SHIFT, + buffer_size, + vma->vm_page_prot); +} + +static long early_pt_ioctl(struct file *file, unsigned int cmd, + unsigned long arg) +{ + unsigned long cpu; + + switch (cmd) { + case PT_SET_CPU: { + cpu = arg; + if (cpu >= NR_CPUS || !cpu_online(cpu)) + return -EINVAL; + file->private_data = (void *)cpu; + return 0; + } + case PT_GET_SIZE: + return put_user((PAGE_SIZE << early_pt_buffer_order), + (int *)arg); + case PT_GET_OFFSET: { + unsigned offset; + cpu = (unsigned long) file->private_data; + smp_call_on_cpu(cpu, stop_early_pt, NULL, true); + offset = per_cpu(pt_offset, (long)file->private_data); + return put_user(offset, (int *)arg); + } + case PT_STOP: + early_pt_exit(); + return 0; + case PT_START: + cpu = (unsigned long) file->private_data; + smp_call_on_cpu(cpu, start_early_pt, NULL, true); + return 0; + default: + return -ENOTTY; + } +} + +static const struct file_operations early_pt_fops = { + .mmap = early_pt_mmap, + .unlocked_ioctl = early_pt_ioctl, + .llseek = noop_llseek, +}; + +static struct miscdevice early_pt_miscdev = { + MISC_DYNAMIC_MINOR, + "simple-pt", + &early_pt_fops +}; + -- 2.7.5