[PATCH 3/4] perf, x86: Add Intel SandyBridge pricise store support

From: Lin Ming
Date: Mon Jul 04 2011 - 03:57:31 EST


Implements Intel memory store event for SandyBridge.

$ perf mem -t store record make -j8

<building kernel ..., monitoring memory store opeartion>

$ perf mem -t store report

Memory store operation statistics
=================================
data-cache hit: 8138
data-cache miss: 0
STLB hit: 8138
STLB miss: 0
Locked access: 0
Unlocked access: 8138

Signed-off-by: Lin Ming <ming.m.lin@xxxxxxxxx>
---
arch/x86/kernel/cpu/perf_event_intel.c | 3 +-
arch/x86/kernel/cpu/perf_event_intel_ds.c | 30 +++++++++++++++++++++++++++++
2 files changed, 32 insertions(+), 1 deletions(-)

diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c
index dde9041..eede1f3 100644
--- a/arch/x86/kernel/cpu/perf_event_intel.c
+++ b/arch/x86/kernel/cpu/perf_event_intel.c
@@ -1523,8 +1523,9 @@ static __init int intel_pmu_init(void)
/* UOPS_DISPATCHED.THREAD,c=1,i=1 to count stall cycles*/
intel_perfmon_event_map[PERF_COUNT_HW_STALLED_CYCLES_BACKEND] = 0x18001b1;

- /* Memory load latency */
+ /* Memory load latency and precise store*/
intel_perfmon_event_map[PERF_COUNT_HW_MEM_LOAD] = 0x01cd;
+ intel_perfmon_event_map[PERF_COUNT_HW_MEM_STORE] = 0x02cd;

pr_cont("SandyBridge events, ");
break;
diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c
index d2d3155..bd7289b 100644
--- a/arch/x86/kernel/cpu/perf_event_intel_ds.c
+++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c
@@ -23,6 +23,26 @@ static u64 load_latency_data_source[] = {

#define LOAD_LATENCY_DATA_SOURCE_MASK 0x0FULL

+#define PRECISE_STORE_DCU_HIT (1ULL << 0)
+#define PRECISE_STORE_STLB_MISS (1ULL << 4)
+#define PRECISE_STORE_LOCKED_ACCESS (1ULL << 5)
+
+static u64 precise_store_data(u64 status)
+{
+ u64 extra = 0;
+
+ if (status & PRECISE_STORE_DCU_HIT)
+ extra |= MEM_STORE_DCU_HIT;
+
+ if (!(status & PRECISE_STORE_STLB_MISS))
+ extra |= MEM_STORE_STLB_HIT;
+
+ if (status & PRECISE_STORE_LOCKED_ACCESS)
+ extra |= MEM_STORE_LOCKED_ACCESS;
+
+ return extra;
+}
+
/* The maximal number of PEBS events: */
#define MAX_PEBS_EVENTS 4

@@ -637,6 +657,16 @@ static void __intel_pmu_pebs_event(struct perf_event *event,
LOAD_LATENCY_DATA_SOURCE_MASK];
}

+ if (event->attr.config == PERF_COUNT_HW_MEM_STORE) {
+ sample_type = event->attr.sample_type;
+
+ if (sample_type & PERF_SAMPLE_ADDR)
+ data.addr = pebs->dla;
+
+ if (sample_type & PERF_SAMPLE_EXTRA)
+ data.extra = precise_store_data(pebs->dse);
+ }
+
/*
* We use the interrupt regs as a base because the PEBS record
* does not contain a full regs set, specifically it seems to
--
1.7.5.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/