[PATCH 3/6] x86/intel_rdt: Intel Code Data Prioritization detection

From: Vikas Shivappa
Date: Sun Aug 23 2015 - 18:46:40 EST


This patch adds enumeration support for code data prioritization(CDP)
feature found in future Intel Xeon processors.

CDP is an extension to Cache Allocation and lets threads allocate subset
of L3 cache for code and data separately. The allocation is represented
by the code or data cache mask MSRs (IA32_L3_QOS_MASK_n). Each Class of
service would be associated with one dcache_mask and one icache_mask
MSR. The association for a CLOSid 'n' is shown below :

data_mask_address (n) = base + (n <<1)
code_mask_address (n) = base + (n <<1) +1.
During scheduling the kernel writes the CLOSid
of the thread to IA32_PQR_ASSOC_MSR.

This patch includes CPUID enumeration routines which includes CPUID
enumeration as well probing MSR_IA32_PQOS_CFG.

Signed-off-by: Vikas Shivappa <vikas.shivappa@xxxxxxxxxxxxxxx>
---
arch/x86/include/asm/cpufeature.h | 5 +++-
arch/x86/include/asm/intel_rdt.h | 1 +
arch/x86/include/asm/rdt_common.h | 1 +
arch/x86/kernel/cpu/common.c | 1 +
arch/x86/kernel/cpu/intel_rdt.c | 58 ++++++++++++++++++++++++++-------------
5 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/arch/x86/include/asm/cpufeature.h b/arch/x86/include/asm/cpufeature.h
index ae5ae9d..c0d435a 100644
--- a/arch/x86/include/asm/cpufeature.h
+++ b/arch/x86/include/asm/cpufeature.h
@@ -12,7 +12,7 @@
#include <asm/disabled-features.h>
#endif

-#define NCAPINTS 14 /* N 32-bit words worth of info */
+#define NCAPINTS 15 /* N 32-bit words worth of info */
#define NBUGINTS 1 /* N 32-bit bug flags */

/*
@@ -256,6 +256,9 @@
/* Intel-defined CPU features, CPUID level 0x00000010:0 (ebx), word 13 */
#define X86_FEATURE_CAT_L3 (13*32 + 1) /* Cache Allocation L3 */

+/* Intel-defined CPU QoS Sub-leaf, CPUID level 0x00000010:1 (ecx), word 14 */
+#define X86_FEATURE_CDP_L3 (14*32 + 2) /* Code data prioritization L3 */
+
/*
* BUG word(s)
*/
diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h
index 78df3d7..68f220e 100644
--- a/arch/x86/include/asm/intel_rdt.h
+++ b/arch/x86/include/asm/intel_rdt.h
@@ -16,6 +16,7 @@ extern void __intel_rdt_sched_in(void);

struct rdt_subsys_info {
unsigned long *closmap;
+ bool cdp_supported;
};

struct intel_rdt {
diff --git a/arch/x86/include/asm/rdt_common.h b/arch/x86/include/asm/rdt_common.h
index 01502c5..8b75128 100644
--- a/arch/x86/include/asm/rdt_common.h
+++ b/arch/x86/include/asm/rdt_common.h
@@ -2,6 +2,7 @@
#define _X86_RDT_H_

#define MSR_IA32_PQR_ASSOC 0x0c8f
+#define MSR_IA32_PQOS_CFG 0x0c81

/**
* struct intel_pqr_state - State cache for the PQR MSR
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index fd014f5..cf5f962 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -682,6 +682,7 @@ void get_cpu_cap(struct cpuinfo_x86 *c)
cpuid_count(0x00000010, 1, &eax, &ebx, &ecx, &edx);
c->x86_cache_max_closid = edx + 1;
c->x86_cache_max_cbm_len = eax + 1;
+ c->x86_capability[14] = ecx;
}
}

diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index b2752cc..b8dcb30 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -50,8 +50,30 @@ static cpumask_t rdt_cpumask;
#define rdt_for_each_child(pos_css, parent_ir) \
css_for_each_child((pos_css), &(parent_ir)->css)

+static inline bool msr_probe_rdt(unsigned int msr,u32 l, u32 h)
+{
+ u32 h_old, h_new, h_tmp;
+ u32 l_old, l_new, l_tmp;
+
+ if (rdmsr_safe(msr, &l_old, &h_old))
+ return false;
+
+ l_tmp = l_old ^ l;
+ h_tmp = h_old ^ h;
+ if (wrmsr_safe(msr, l_tmp, h_tmp) ||
+ rdmsr_safe(msr, &l_new, &h_new))
+ return false;
+
+ if ((l_tmp != l_new) || (h_tmp != h_new))
+ return false;
+
+ wrmsr_safe(msr, l_old, h_old);
+
+ return true;
+}
+
/*
- * hsw_probetest() - Have to do probe test for Intel haswell CPUs as it
+ * cache_alloc_hswprobe() - Have to do probe test for Intel haswell CPUs as it
* does not have CPUID enumeration support for Cache allocation.
*
* Probes by writing to the high 32 bits(CLOSid) of the IA32_PQR_MSR and
@@ -59,26 +81,11 @@ static cpumask_t rdt_cpumask;
* bitmask length on hsw. The minimum cache bitmask length allowed for
* HSW is 2 bits.
*/
-static inline bool hsw_probetest(void)
+static inline bool cache_alloc_hswprobe(void)
{
- u32 l, h_old, h_new, h_tmp;
-
- if (rdmsr_safe(MSR_IA32_PQR_ASSOC, &l, &h_old))
+ if (!msr_probe_rdt(MSR_IA32_PQR_ASSOC, 0, 0x1U))
return false;

- /*
- * Default value is always 0 if feature is present.
- */
- h_tmp = h_old ^ 0x1U;
- if (wrmsr_safe(MSR_IA32_PQR_ASSOC, l, h_tmp) ||
- rdmsr_safe(MSR_IA32_PQR_ASSOC, &l, &h_new))
- return false;
-
- if (h_tmp != h_new)
- return false;
-
- wrmsr_safe(MSR_IA32_PQR_ASSOC, l, h_old);
-
boot_cpu_data.x86_cache_max_closid = 4;
boot_cpu_data.x86_cache_max_cbm_len = 20;
min_bitmask_len = 2;
@@ -95,7 +102,16 @@ static inline bool cache_alloc_supported(struct cpuinfo_x86 *c)
* Probe test for Haswell CPUs.
*/
if (c->x86 == 0x6 && c->x86_model == 0x3f)
- return hsw_probetest();
+ return cache_alloc_hswprobe();
+
+ return false;
+}
+
+static inline bool cdp_supported(struct cpuinfo_x86 *c)
+{
+ if (cpu_has(c, X86_FEATURE_CDP_L3) &&
+ msr_probe_rdt(MSR_IA32_PQOS_CFG, 0x1U, 0))
+ return true;

return false;
}
@@ -499,6 +515,10 @@ static int __init intel_rdt_late_init(void)

static_key_slow_inc(&rdt_enable_key);
pr_info("Intel cache allocation enabled\n");
+ if (cdp_supported(c)) {
+ rdtss_info.cdp_supported = true;
+ pr_info("Intel code data prioritization enabled\n");
+ }
out_err:

return err;
--
1.9.1

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/