[RFC PATCH 10/12] perf/x86: Add ZMM in extended regs

From: kan . liang
Date: Fri Jun 13 2025 - 10:02:42 EST


From: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>

Support ZMM as the extended registers. It can be configured in the
sample_ext_regs_intr/user.

Only the PMU with PERF_PMU_CAP_EXTENDED_REGS2 supports the feature.
The value can be retrieved via the XSAVES.

Signed-off-by: Kan Liang <kan.liang@xxxxxxxxxxxxxxx>
---
arch/x86/events/core.c | 14 +++++++++
arch/x86/events/perf_event.h | 11 ++++++-
arch/x86/include/asm/perf_event.h | 2 ++
arch/x86/include/uapi/asm/perf_regs.h | 43 +++++++++++++++++++++++++--
arch/x86/kernel/perf_regs.c | 10 +++++++
5 files changed, 77 insertions(+), 3 deletions(-)

diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c
index 741e6dfd50a5..9bcef9a32dd2 100644
--- a/arch/x86/events/core.c
+++ b/arch/x86/events/core.c
@@ -437,6 +437,10 @@ static void x86_pmu_get_ext_regs(struct x86_perf_regs *perf_regs, u64 mask)
__x86_pmu_get_regs(XFEATURE_MASK_YMM, perf_regs->ymmh_regs, XSAVE_YMM_SIZE);
__x86_pmu_get_regs(XFEATURE_MASK_APX, perf_regs->apx_regs, sizeof(struct apx_state));
__x86_pmu_get_regs(XFEATURE_MASK_OPMASK, perf_regs->opmask_regs, sizeof(struct avx_512_opmask_state));
+ __x86_pmu_get_regs(XFEATURE_MASK_ZMM_Hi256, perf_regs->zmmh_regs,
+ sizeof(struct avx_512_zmm_uppers_state));
+ __x86_pmu_get_regs(XFEATURE_MASK_Hi16_ZMM, perf_regs->h16zmm_regs,
+ sizeof(struct avx_512_hi16_state));
}

static void release_ext_regs_buffers(void)
@@ -468,6 +472,10 @@ static void reserve_ext_regs_buffers(void)
size += sizeof(struct apx_state);
if (x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_OPMASK))
size += sizeof(struct avx_512_opmask_state);
+ if (x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_ZMMH))
+ size += sizeof(struct avx_512_zmm_uppers_state);
+ if (x86_pmu.ext_regs_mask & BIT_ULL(X86_EXT_REGS_H16ZMM))
+ size += sizeof(struct avx_512_hi16_state);

/* XSAVE feature requires 64-byte alignment. */
size += 64;
@@ -747,6 +755,8 @@ int x86_pmu_hw_config(struct perf_event *event)
check_ext_regs(X86_EXT_REGS_YMM);
check_ext_regs(X86_EXT_REGS_APX);
check_ext_regs(X86_EXT_REGS_OPMASK);
+ check_ext_regs(X86_EXT_REGS_ZMMH);
+ check_ext_regs(X86_EXT_REGS_H16ZMM);
}
}
return x86_setup_perfctr(event);
@@ -1852,6 +1862,10 @@ void x86_pmu_setup_regs_data(struct perf_event *event,
XFEATURE_MASK_APX, PERF_X86_EXT_REG_APX_SIZE);
init_ext_regs_data(X86_EXT_REGS_OPMASK, perf_regs->opmask_regs,
XFEATURE_MASK_OPMASK, PERF_X86_EXT_REG_OPMASK_SIZE);
+ init_ext_regs_data(X86_EXT_REGS_ZMMH, perf_regs->zmmh_regs,
+ XFEATURE_MASK_ZMM_Hi256, PERF_X86_EXT_REG_ZMMH_SIZE);
+ init_ext_regs_data(X86_EXT_REGS_H16ZMM, perf_regs->h16zmm_regs,
+ XFEATURE_MASK_Hi16_ZMM, PERF_X86_EXT_REG_H16ZMM_SIZE);

mask &= ~ignore_mask;
if (mask)
diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h
index c2626dcea1a0..93a65c529afe 100644
--- a/arch/x86/events/perf_event.h
+++ b/arch/x86/events/perf_event.h
@@ -692,6 +692,8 @@ enum {
X86_EXT_REGS_YMM,
X86_EXT_REGS_APX,
X86_EXT_REGS_OPMASK,
+ X86_EXT_REGS_ZMMH,
+ X86_EXT_REGS_H16ZMM,
};

#define PERF_PEBS_DATA_SOURCE_MAX 0x100
@@ -1324,7 +1326,7 @@ static inline u64 x86_pmu_get_event_config(struct perf_event *event)

static inline int get_num_ext_regs(u64 *ext_regs, unsigned int type)
{
- u64 mask;
+ u64 mask, mask2;

switch (type) {
case X86_EXT_REGS_YMM:
@@ -1336,6 +1338,13 @@ static inline int get_num_ext_regs(u64 *ext_regs, unsigned int type)
case X86_EXT_REGS_OPMASK:
mask = GENMASK_ULL(PERF_REG_X86_OPMASK7, PERF_REG_X86_OPMASK0);
return hweight64(ext_regs[0] & mask);
+ case X86_EXT_REGS_ZMMH:
+ mask = GENMASK_ULL(PERF_REG_X86_ZMMH15, PERF_REG_X86_ZMMH0);
+ return hweight64(ext_regs[0] & mask);
+ case X86_EXT_REGS_H16ZMM:
+ mask = GENMASK_ULL(PERF_REG_X86_EXT_REGS_64, PERF_REG_X86_ZMM16);
+ mask2 = GENMASK_ULL(PERF_REG_X86_ZMM31 - 64, 0);
+ return hweight64(ext_regs[0] & mask) + hweight64(ext_regs[1] & mask2);
default:
return 0;
}
diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h
index 4e971f38ff94..eb35ba9afbb4 100644
--- a/arch/x86/include/asm/perf_event.h
+++ b/arch/x86/include/asm/perf_event.h
@@ -596,6 +596,8 @@ struct x86_perf_regs {
u64 *ymmh_regs;
u64 *apx_regs;
u64 *opmask_regs;
+ u64 *zmmh_regs;
+ u64 *h16zmm_regs;
};

extern unsigned long perf_arch_instruction_pointer(struct pt_regs *regs);
diff --git a/arch/x86/include/uapi/asm/perf_regs.h b/arch/x86/include/uapi/asm/perf_regs.h
index b9ec58b98c5e..c43a025b0c01 100644
--- a/arch/x86/include/uapi/asm/perf_regs.h
+++ b/arch/x86/include/uapi/asm/perf_regs.h
@@ -102,15 +102,54 @@ enum perf_event_x86_ext_regs {
PERF_REG_X86_OPMASK6,
PERF_REG_X86_OPMASK7,

- PERF_REG_X86_EXT_REGS_MAX = PERF_REG_X86_OPMASK7,
+ /* ZMMH 0-15 Registers */
+ PERF_REG_X86_ZMMH0,
+ PERF_REG_X86_ZMMH1,
+ PERF_REG_X86_ZMMH2,
+ PERF_REG_X86_ZMMH3,
+ PERF_REG_X86_ZMMH4,
+ PERF_REG_X86_ZMMH5,
+ PERF_REG_X86_ZMMH6,
+ PERF_REG_X86_ZMMH7,
+ PERF_REG_X86_ZMMH8,
+ PERF_REG_X86_ZMMH9,
+ PERF_REG_X86_ZMMH10,
+ PERF_REG_X86_ZMMH11,
+ PERF_REG_X86_ZMMH12,
+ PERF_REG_X86_ZMMH13,
+ PERF_REG_X86_ZMMH14,
+ PERF_REG_X86_ZMMH15,
+
+ /* H16ZMM 16-31 Registers */
+ PERF_REG_X86_ZMM16,
+ PERF_REG_X86_ZMM17,
+ PERF_REG_X86_ZMM18,
+ PERF_REG_X86_ZMM19,
+ PERF_REG_X86_ZMM20,
+ PERF_REG_X86_ZMM21,
+ PERF_REG_X86_ZMM22,
+ PERF_REG_X86_ZMM23,
+ PERF_REG_X86_ZMM24,
+ PERF_REG_X86_ZMM25,
+ PERF_REG_X86_ZMM26,
+ PERF_REG_X86_ZMM27,
+ PERF_REG_X86_ZMM28,
+ PERF_REG_X86_ZMM29,
+ PERF_REG_X86_ZMM30,
+ PERF_REG_X86_ZMM31,
+
+ PERF_REG_X86_EXT_REGS_64 = PERF_REG_X86_ZMM23,
+ PERF_REG_X86_EXT_REGS_MAX = PERF_REG_X86_ZMM31,
};

enum perf_event_x86_ext_reg_size {
PERF_X86_EXT_REG_YMMH_SIZE = 2,
PERF_X86_EXT_REG_APX_SIZE = 1,
PERF_X86_EXT_REG_OPMASK_SIZE = 1,
+ PERF_X86_EXT_REG_ZMMH_SIZE = 4,
+ PERF_X86_EXT_REG_H16ZMM_SIZE = 8,

/* max of PERF_REG_X86_XXX_SIZE */
- PERF_X86_EXT_REG_SIZE_MAX = PERF_X86_EXT_REG_YMMH_SIZE,
+ PERF_X86_EXT_REG_SIZE_MAX = PERF_X86_EXT_REG_H16ZMM_SIZE,
};
#endif /* _ASM_X86_PERF_REGS_H */
diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c
index 34b94b846f00..d5721ea85c5d 100644
--- a/arch/x86/kernel/perf_regs.c
+++ b/arch/x86/kernel/perf_regs.c
@@ -92,6 +92,16 @@ static u64 perf_ext_reg_value(struct pt_regs *regs, int idx,
idx - PERF_REG_X86_OPMASK0,
perf_regs->opmask_regs,
PERF_X86_EXT_REG_OPMASK_SIZE);
+ case PERF_REG_X86_ZMMH0 ... PERF_REG_X86_ZMMH15:
+ return __perf_ext_reg_value(ext, ext_size,
+ idx - PERF_REG_X86_ZMMH0,
+ perf_regs->zmmh_regs,
+ PERF_X86_EXT_REG_ZMMH_SIZE);
+ case PERF_REG_X86_ZMM16 ... PERF_REG_X86_ZMM31:
+ return __perf_ext_reg_value(ext, ext_size,
+ idx - PERF_REG_X86_ZMM16,
+ perf_regs->h16zmm_regs,
+ PERF_X86_EXT_REG_H16ZMM_SIZE);
default:
WARN_ON_ONCE(1);
*ext_size = 0;
--
2.38.1