Hi James, On 8/22/25 16:30, James Morse wrote: > cpuhp callbacks aren't the only time the MSC configuration may need to > be reset. Resctrl has an API call to reset a class. > If an MPAM error interrupt arrives it indicates the driver has > misprogrammed an MSC. The safest thing to do is reset all the MSCs > and disable MPAM. > > Add a helper to reset RIS via their class. Call this from mpam_disable(), > which can be scheduled from the error interrupt handler. > > Signed-off-by: James Morse <james.morse@xxxxxxx> > --- > drivers/resctrl/mpam_devices.c | 62 +++++++++++++++++++++++++++++++-- > drivers/resctrl/mpam_internal.h | 1 + > 2 files changed, 61 insertions(+), 2 deletions(-) > > diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c > index 759244966736..3516cbe8623e 100644 > --- a/drivers/resctrl/mpam_devices.c > +++ b/drivers/resctrl/mpam_devices.c > @@ -915,8 +915,6 @@ static int mpam_reset_ris(void *arg) > u16 partid, partid_max; > struct mpam_msc_ris *ris = arg; > > - mpam_assert_srcu_read_lock_held(); > - > if (ris->in_reset_state) > return 0; > > @@ -1569,6 +1567,66 @@ static void mpam_enable_once(void) > mpam_partid_max + 1, mpam_pmg_max + 1); > } > > +static void mpam_reset_component_locked(struct mpam_component *comp) > +{ > + int idx; > + struct mpam_msc *msc; > + struct mpam_vmsc *vmsc; > + struct mpam_msc_ris *ris; > + > + might_sleep(); > + lockdep_assert_cpus_held(); > + > + idx = srcu_read_lock(&mpam_srcu); > + list_for_each_entry_rcu(vmsc, &comp->vmsc, comp_list) { > + msc = vmsc->msc; > + > + list_for_each_entry_rcu(ris, &vmsc->ris, vmsc_list) { > + if (!ris->in_reset_state) > + mpam_touch_msc(msc, mpam_reset_ris, ris); > + ris->in_reset_state = true; > + } > + } > + srcu_read_unlock(&mpam_srcu, idx); > +} > + > +static void mpam_reset_class_locked(struct mpam_class *class) > +{ > + int idx; > + struct mpam_component *comp; > + > + lockdep_assert_cpus_held(); > + > + idx = srcu_read_lock(&mpam_srcu); > + list_for_each_entry_rcu(comp, &class->components, class_list) > + mpam_reset_component_locked(comp); > + srcu_read_unlock(&mpam_srcu, idx); > +} > + > +static void mpam_reset_class(struct mpam_class *class) > +{ > + cpus_read_lock(); > + mpam_reset_class_locked(class); > + cpus_read_unlock(); > +} > + > +/* > + * Called in response to an error IRQ. > + * All of MPAMs errors indicate a software bug, restore any modified > + * controls to their reset values. > + */ > +void mpam_disable(void) > +{ > + int idx; > + struct mpam_class *class; > + > + idx = srcu_read_lock(&mpam_srcu); > + list_for_each_entry_srcu(class, &mpam_classes, classes_list, > + srcu_read_lock_held(&mpam_srcu)) Why do you use list_for_each_entry_srcu() here when in other places you use list_for_each_entry_rcu()? > + mpam_reset_class(class); > + srcu_read_unlock(&mpam_srcu, idx); > +} > + > /* > * Enable mpam once all devices have been probed. > * Scheduled by mpam_discovery_cpu_online() once all devices have been created. > diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h > index 466d670a01eb..b30fee2b7674 100644 > --- a/drivers/resctrl/mpam_internal.h > +++ b/drivers/resctrl/mpam_internal.h > @@ -281,6 +281,7 @@ extern u8 mpam_pmg_max; > > /* Scheduled work callback to enable mpam once all MSC have been probed */ > void mpam_enable(struct work_struct *work); > +void mpam_disable(void); > > int mpam_get_cpumask_from_cache_id(unsigned long cache_id, u32 cache_level, > cpumask_t *affinity); Thanks, Ben