[PATCH v2 10/18] x86/resctrl: Allow arch to allocate memory needed in resctrl_arch_rmid_read()

From: James Morse
Date: Fri Jan 13 2023 - 13:03:13 EST


Depending on the number of monitors available, Arm's MPAM may need to
allocate a monitor prior to reading the counter value. Allocating a
contended resource may involve sleeping.

All callers of resctrl_arch_rmid_read() read the counter on more than
one domain. If the monitor is allocated globally, there is no need to
allocate and free it for each call to resctrl_arch_rmid_read().

Add arch hooks for this allocation, which need calling before
resctrl_arch_rmid_read(). The allocated monitor is passed to
resctrl_arch_rmid_read(), then freed again afterwards. The helper
can be called on any CPU, and can sleep.

Tested-by: Shaopeng Tan <tan.shaopeng@xxxxxxxxxxx>
Signed-off-by: James Morse <james.morse@xxxxxxx>
---
arch/x86/include/asm/resctrl.h | 11 +++++++
arch/x86/kernel/cpu/resctrl/internal.h | 1 +
arch/x86/kernel/cpu/resctrl/monitor.c | 40 +++++++++++++++++++++++---
include/linux/resctrl.h | 4 +--
4 files changed, 50 insertions(+), 6 deletions(-)

diff --git a/arch/x86/include/asm/resctrl.h b/arch/x86/include/asm/resctrl.h
index d589a82995ac..194a1570af7b 100644
--- a/arch/x86/include/asm/resctrl.h
+++ b/arch/x86/include/asm/resctrl.h
@@ -136,6 +136,17 @@ static inline u32 resctrl_arch_rmid_idx_encode(u32 closid, u32 rmid)
return rmid;
}

+/* x86 can always read an rmid, nothing needs allocating */
+struct rdt_resource;
+static inline int resctrl_arch_mon_ctx_alloc(struct rdt_resource *r, int evtid)
+{
+ might_sleep();
+ return 0;
+};
+
+static inline void resctrl_arch_mon_ctx_free(struct rdt_resource *r, int evtid,
+ int ctx) { };
+
void resctrl_cpu_detect(struct cpuinfo_x86 *c);

#else
diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h
index 1f90a10b75a1..e85e454bec72 100644
--- a/arch/x86/kernel/cpu/resctrl/internal.h
+++ b/arch/x86/kernel/cpu/resctrl/internal.h
@@ -88,6 +88,7 @@ struct rmid_read {
bool first;
int err;
u64 val;
+ int arch_mon_ctx;
};

extern bool rdt_alloc_capable;
diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c
index d6ae4b713801..4e248f4a5f59 100644
--- a/arch/x86/kernel/cpu/resctrl/monitor.c
+++ b/arch/x86/kernel/cpu/resctrl/monitor.c
@@ -15,6 +15,7 @@
* Software Developer Manual June 2016, volume 3, section 17.17.
*/

+#include <linux/cpu.h>
#include <linux/module.h>
#include <linux/sizes.h>
#include <linux/slab.h>
@@ -236,7 +237,7 @@ static void __rmid_read(void *arg)

int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, u32 rmid, enum resctrl_event_id eventid,
- u64 *val)
+ u64 *val, int ignored)
{
struct rdt_hw_resource *hw_res = resctrl_to_arch_res(r);
struct rdt_hw_domain *hw_dom = resctrl_to_arch_dom(d);
@@ -285,9 +286,14 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
u32 idx_limit = resctrl_arch_system_num_rmid_idx();
struct rmid_entry *entry;
u32 idx, cur_idx = 1;
+ int arch_mon_ctx;
bool rmid_dirty;
u64 val = 0;

+ arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
+ if (arch_mon_ctx < 0)
+ return;
+
/*
* Skip RMID 0 and start from RMID 1 and check all the RMIDs that
* are marked as busy for occupancy < threshold. If the occupancy
@@ -301,7 +307,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free)

entry = __rmid_entry(idx);
if (resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
- QOS_L3_OCCUP_EVENT_ID, &val)) {
+ QOS_L3_OCCUP_EVENT_ID, &val,
+ arch_mon_ctx)) {
rmid_dirty = true;
} else {
rmid_dirty = (val >= resctrl_rmid_realloc_threshold);
@@ -316,6 +323,8 @@ void __check_limbo(struct rdt_domain *d, bool force_free)
}
cur_idx = idx + 1;
}
+
+ resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);
}

bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d)
@@ -407,16 +416,22 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
{
struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3].r_resctrl;
struct rdt_domain *d;
+ int arch_mon_ctx;
u64 val = 0;
u32 idx;
int err;

idx = resctrl_arch_rmid_idx_encode(entry->closid, entry->rmid);

+ arch_mon_ctx = resctrl_arch_mon_ctx_alloc(r, QOS_L3_OCCUP_EVENT_ID);
+ if (arch_mon_ctx < 0)
+ return;
+
entry->busy = 0;
list_for_each_entry(d, &r->domains, list) {
err = resctrl_arch_rmid_read(r, d, entry->closid, entry->rmid,
- QOS_L3_OCCUP_EVENT_ID, &val);
+ QOS_L3_OCCUP_EVENT_ID, &val,
+ arch_mon_ctx);
if (err || val <= resctrl_rmid_realloc_threshold)
continue;

@@ -429,6 +444,7 @@ static void add_rmid_to_limbo(struct rmid_entry *entry)
set_bit(idx, d->rmid_busy_llc);
entry->busy++;
}
+ resctrl_arch_mon_ctx_free(r, QOS_L3_OCCUP_EVENT_ID, arch_mon_ctx);

if (entry->busy)
rmid_limbo_count++;
@@ -465,7 +481,7 @@ static int __mon_event_count(u32 closid, u32 rmid, struct rmid_read *rr)
resctrl_arch_reset_rmid(rr->r, rr->d, closid, rmid, rr->evtid);

rr->err = resctrl_arch_rmid_read(rr->r, rr->d, closid, rmid, rr->evtid,
- &tval);
+ &tval, rr->arch_mon_ctx);
if (rr->err)
return rr->err;

@@ -538,6 +554,9 @@ int mon_event_count(void *info)
int ret;

rdtgrp = rr->rgrp;
+ rr->arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr->r, rr->evtid);
+ if (rr->arch_mon_ctx < 0)
+ return rr->arch_mon_ctx;

ret = __mon_event_count(rdtgrp->closid, rdtgrp->mon.rmid, rr);

@@ -564,6 +583,8 @@ int mon_event_count(void *info)
if (ret == 0)
rr->err = 0;

+ resctrl_arch_mon_ctx_free(rr->r, rr->evtid, rr->arch_mon_ctx);
+
return 0;
}

@@ -700,11 +721,21 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
if (is_mbm_total_enabled()) {
rr.evtid = QOS_L3_MBM_TOTAL_EVENT_ID;
rr.val = 0;
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (rr.arch_mon_ctx < 0)
+ return;
+
__mon_event_count(closid, rmid, &rr);
+
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
rr.val = 0;
+ rr.arch_mon_ctx = resctrl_arch_mon_ctx_alloc(rr.r, rr.evtid);
+ if (rr.arch_mon_ctx < 0)
+ return;
+
__mon_event_count(closid, rmid, &rr);

/*
@@ -714,6 +745,7 @@ static void mbm_update(struct rdt_resource *r, struct rdt_domain *d,
*/
if (is_mba_sc(NULL))
mbm_bw_count(closid, rmid, &rr);
+ resctrl_arch_mon_ctx_free(rr.r, rr.evtid, rr.arch_mon_ctx);
}
}

diff --git a/include/linux/resctrl.h b/include/linux/resctrl.h
index 57d32c3ce06f..d90d3dca48e9 100644
--- a/include/linux/resctrl.h
+++ b/include/linux/resctrl.h
@@ -230,6 +230,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
* @rmid: rmid of the counter to read.
* @eventid: eventid to read, e.g. L3 occupancy.
* @val: result of the counter read in bytes.
+ * @arch_mon_ctx: An allocated context from resctrl_arch_mon_ctx_alloc().
*
* Call from process context on a CPU that belongs to domain @d.
*
@@ -238,8 +239,7 @@ void resctrl_offline_domain(struct rdt_resource *r, struct rdt_domain *d);
*/
int resctrl_arch_rmid_read(struct rdt_resource *r, struct rdt_domain *d,
u32 closid, u32 rmid, enum resctrl_event_id eventid,
- u64 *val);
-
+ u64 *val, int arch_mon_ctx);

/**
* resctrl_arch_reset_rmid() - Reset any private state associated with rmid
--
2.30.2