Re: [RFC PATCH 27/36] arm_mpam: Allow configuration to be applied and restored during cpu online

From: James Morse
Date: Fri Aug 08 2025 - 03:14:51 EST


Hi Ben,

On 28/07/2025 12:59, Ben Horgan wrote:
On 7/11/25 19:36, James Morse wrote:
When CPUs come online the original configuration should be restored.
Once the maximum partid is known, allocate an configuration array for
each component, and reprogram each RIS configuration from this.

The MPAM spec describes how multiple controls can interact. To prevent
this happening by accident, always reset controls that don't have a
valid configuration. This allows the same helper to be used for
configuration and reset.
diff --git a/drivers/platform/arm64/mpam/mpam_devices.c b/drivers/platform/arm64/mpam/
mpam_devices.c
index bb3695eb84e9..f3ecfda265d2 100644
--- a/drivers/platform/arm64/mpam/mpam_devices.c
+++ b/drivers/platform/arm64/mpam/mpam_devices.c
@@ -1000,10 +1041,38 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online)
*/
ris->in_reset_state = online;
}
- srcu_read_unlock(&mpam_srcu, idx);
mpam_mon_sel_outer_unlock(msc);
}
+static void mpam_reprogram_msc(struct mpam_msc *msc)
+{
+ int idx;
+ u16 partid;
+ bool reset;
+ struct mpam_config *cfg;
+ struct mpam_msc_ris *ris;
+
+ idx = srcu_read_lock(&mpam_srcu);
+ list_for_each_entry_rcu(ris, &msc->ris, msc_list) {
+ if (!mpam_is_enabled() && !ris->in_reset_state) {
+ mpam_touch_msc(msc, &mpam_reset_ris, ris);
+ ris->in_reset_state = true;
+ continue;
+ }
+
+ reset = true;
+ for (partid = 0; partid <= mpam_partid_max; partid++) {
Do we need to consider 'partid_max_lock' here?

The lock is only needed while those parameters can change, due to a race with mpam_register_requestor(). Once mpam_enabled()
has been called, the values can't change, so the lock is redundant.

In this case, its relying on mpam_cpu_online() only ever being the cpuhp callback once partid_max_published has been set.

I'll add a comment.


+ cfg = &ris->vmsc->comp->cfg[partid];
+ if (cfg->features)
+ reset = false;
+
+ mpam_reprogram_ris_partid(ris, partid, cfg);
+ }
+ ris->in_reset_state = reset;
+ }
+ srcu_read_unlock(&mpam_srcu, idx);
+}
+
static void _enable_percpu_irq(void *_irq)
{
int *irq = _irq;
@@ -1806,6 +1875,43 @@ static void mpam_unregister_irqs(void)
cpus_read_unlock();
}
+static void __destroy_component_cfg(struct mpam_component *comp)
+{
+ add_to_garbage(comp->cfg);
+}
+
+static int __allocate_component_cfg(struct mpam_component *comp)
+{
+ if (comp->cfg)
+ return 0;
+
+ comp->cfg = kcalloc(mpam_partid_max + 1, sizeof(*comp->cfg), GFP_KERNEL);
And here?

Aha, that is a good one - the configuration should be allocated after the partid values are fixed. Currently its done by
the same function, but not in the right order.


+ if (!comp->cfg)
+ return -ENOMEM;
+ init_garbage(comp->cfg);
+
+ return 0;
+}
+
+static int mpam_allocate_config(void)
+{
+ int err = 0;
+ struct mpam_class *class;
+ struct mpam_component *comp;
+
+ lockdep_assert_held(&mpam_list_lock);
+
+ list_for_each_entry(class, &mpam_classes, classes_list) {
+ list_for_each_entry(comp, &class->components, class_list) {
+ err = __allocate_component_cfg(comp);
+ if (err)
+ return err;
+ }
+ }
+
+ return 0;
+}
+
static void mpam_enable_once(void)
{
int err;
@@ -1817,12 +1923,21 @@ static void mpam_enable_once(void)
*/
cpus_read_lock();
mutex_lock(&mpam_list_lock);
- mpam_enable_merge_features(&mpam_classes);
+ do {
+ mpam_enable_merge_features(&mpam_classes);
- err = mpam_register_irqs();
- if (err)
- pr_warn("Failed to register irqs: %d\n", err);
+ err = mpam_allocate_config();
+ if (err) {
+ pr_err("Failed to allocate configuration arrays.\n");
+ break;
+ }
+ err = mpam_register_irqs();
+ if (err) {
+ pr_warn("Failed to register irqs: %d\n", err);
+ break;
+ }
+ } while (0);
mutex_unlock(&mpam_list_lock);
cpus_read_unlock();
@@ -1861,6 +1976,8 @@ static void mpam_reset_component_locked(struct mpam_component
*comp)
might_sleep();
lockdep_assert_cpus_held();
+ memset(comp->cfg, 0, (mpam_partid_max * sizeof(*comp->cfg)));
And here?

Same story, and the same bug - the disable path can be called and reach this before the partid size has been fixed. The
code that enables interrupts should pull that earlier in mpam_enable_once, which would simplify all of these.



Thanks,

James