Hi Babu/Tony, On 6/13/25 2:04 PM, Babu Moger wrote: > From: Tony Luck <tony.luck@xxxxxxxxx> ... > @@ -400,25 +400,27 @@ static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_ctrl_domain * > */ > static int arch_domain_mbm_alloc(u32 num_rmid, struct rdt_hw_mon_domain *hw_dom) > { > - size_t tsize; > - > - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) { > - tsize = sizeof(*hw_dom->arch_mbm_total); > - hw_dom->arch_mbm_total = kcalloc(num_rmid, tsize, GFP_KERNEL); > - if (!hw_dom->arch_mbm_total) > - return -ENOMEM; > - } > - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) { > - tsize = sizeof(*hw_dom->arch_mbm_local); > - hw_dom->arch_mbm_local = kcalloc(num_rmid, tsize, GFP_KERNEL); > - if (!hw_dom->arch_mbm_local) { > - kfree(hw_dom->arch_mbm_total); > - hw_dom->arch_mbm_total = NULL; > - return -ENOMEM; > - } > + size_t tsize = sizeof(*hw_dom->arch_mbm_states[0]); > + enum resctrl_event_id eventid; > + int idx; > + > + for_each_mbm_event_id(eventid) { > + if (!resctrl_is_mon_event_enabled(eventid)) > + continue; > + idx = MBM_STATE_IDX(eventid); > + hw_dom->arch_mbm_states[idx] = kcalloc(num_rmid, tsize, GFP_KERNEL); > + if (!hw_dom->arch_mbm_states[idx]) > + goto cleanup; > } > > return 0; > +cleanup: > + while (--idx >= 0) { (please see note about this pattern below) > + kfree(hw_dom->arch_mbm_states[idx]); > + hw_dom->arch_mbm_states[idx] = NULL; > + } > + > + return -ENOMEM; > } > > static int get_domain_id_from_scope(int cpu, enum resctrl_scope scope) > diff --git a/arch/x86/kernel/cpu/resctrl/internal.h b/arch/x86/kernel/cpu/resctrl/internal.h > index 5e3c41b36437..44ef0d94131e 100644 > --- a/arch/x86/kernel/cpu/resctrl/internal.h > +++ b/arch/x86/kernel/cpu/resctrl/internal.h > @@ -54,15 +54,16 @@ struct rdt_hw_ctrl_domain { > * struct rdt_hw_mon_domain - Arch private attributes of a set of CPUs that share > * a resource for a monitor function > * @d_resctrl: Properties exposed to the resctrl file system > - * @arch_mbm_total: arch private state for MBM total bandwidth > - * @arch_mbm_local: arch private state for MBM local bandwidth > + * @arch_mbm_states: arch private state for each MBM event Duplicate @arch_mbm_states > + * @arch_mbm_states: Per-event pointer to the MBM event's saved state. > + * An MBM event's state is an array of struct arch_mbm_state > + * indexed by RMID on x86 or combined CLOSID, RMID on Arm. The "or combined CLOSID, RMID on Arm" can be dropped from the x86 arch specific docs. > * > * Members of this structure are accessed via helpers that provide abstraction. > */ > struct rdt_hw_mon_domain { > struct rdt_mon_domain d_resctrl; > - struct arch_mbm_state *arch_mbm_total; > - struct arch_mbm_state *arch_mbm_local; > + struct arch_mbm_state *arch_mbm_states[QOS_NUM_L3_MBM_EVENTS]; > }; > > static inline struct rdt_hw_ctrl_domain *resctrl_to_arch_ctrl_dom(struct rdt_ctrl_domain *r) > diff --git a/arch/x86/kernel/cpu/resctrl/monitor.c b/arch/x86/kernel/cpu/resctrl/monitor.c > index 07f8ab097cbe..0add57b29a4d 100644 > --- a/arch/x86/kernel/cpu/resctrl/monitor.c > +++ b/arch/x86/kernel/cpu/resctrl/monitor.c > @@ -161,18 +161,14 @@ static struct arch_mbm_state *get_arch_mbm_state(struct rdt_hw_mon_domain *hw_do > u32 rmid, > enum resctrl_event_id eventid) > { > - switch (eventid) { > - case QOS_L3_OCCUP_EVENT_ID: > - return NULL; > - case QOS_L3_MBM_TOTAL_EVENT_ID: > - return &hw_dom->arch_mbm_total[rmid]; > - case QOS_L3_MBM_LOCAL_EVENT_ID: > - return &hw_dom->arch_mbm_local[rmid]; > - default: > - /* Never expect to get here */ > - WARN_ON_ONCE(1); > + struct arch_mbm_state *state; > + > + if (!resctrl_is_mbm_event(eventid)) > return NULL; > - } > + > + state = hw_dom->arch_mbm_states[MBM_STATE_IDX(eventid)]; > + > + return state ? &state[rmid] : NULL; > } > > void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, > @@ -201,14 +197,16 @@ void resctrl_arch_reset_rmid(struct rdt_resource *r, struct rdt_mon_domain *d, > void resctrl_arch_reset_rmid_all(struct rdt_resource *r, struct rdt_mon_domain *d) > { > struct rdt_hw_mon_domain *hw_dom = resctrl_to_arch_mon_dom(d); > - > - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) > - memset(hw_dom->arch_mbm_total, 0, > - sizeof(*hw_dom->arch_mbm_total) * r->num_rmid); > - > - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) > - memset(hw_dom->arch_mbm_local, 0, > - sizeof(*hw_dom->arch_mbm_local) * r->num_rmid); > + enum resctrl_event_id eventid; > + int idx; > + > + for_each_mbm_event_id(eventid) { > + if (!resctrl_is_mon_event_enabled(eventid)) > + continue; > + idx = MBM_STATE_IDX(eventid); > + memset(hw_dom->arch_mbm_states[idx], 0, > + sizeof(struct arch_mbm_state) * r->num_rmid); sizeof(struct arch_mbm_state) -> sizeof(*hw_dom->arch_mbm_states[0])? > + } > } > > static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr, unsigned int width) ... > void resctrl_offline_ctrl_domain(struct rdt_resource *r, struct rdt_ctrl_domain *d) > @@ -4085,32 +4081,34 @@ void resctrl_offline_mon_domain(struct rdt_resource *r, struct rdt_mon_domain *d > static int domain_setup_mon_state(struct rdt_resource *r, struct rdt_mon_domain *d) > { > u32 idx_limit = resctrl_arch_system_num_rmid_idx(); > - size_t tsize; > + size_t tsize = sizeof(*d->mbm_states[0]); > + enum resctrl_event_id eventid; > + int idx; > > if (resctrl_is_mon_event_enabled(QOS_L3_OCCUP_EVENT_ID)) { > d->rmid_busy_llc = bitmap_zalloc(idx_limit, GFP_KERNEL); > if (!d->rmid_busy_llc) > return -ENOMEM; > } > - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_TOTAL_EVENT_ID)) { > - tsize = sizeof(*d->mbm_total); > - d->mbm_total = kcalloc(idx_limit, tsize, GFP_KERNEL); > - if (!d->mbm_total) { > - bitmap_free(d->rmid_busy_llc); > - return -ENOMEM; > - } > - } > - if (resctrl_is_mon_event_enabled(QOS_L3_MBM_LOCAL_EVENT_ID)) { > - tsize = sizeof(*d->mbm_local); > - d->mbm_local = kcalloc(idx_limit, tsize, GFP_KERNEL); > - if (!d->mbm_local) { > - bitmap_free(d->rmid_busy_llc); > - kfree(d->mbm_total); > - return -ENOMEM; > - } > + > + for_each_mbm_event_id(eventid) { > + if (!resctrl_is_mon_event_enabled(eventid)) > + continue; > + idx = MBM_STATE_IDX(eventid); > + d->mbm_states[idx] = kcalloc(idx_limit, tsize, GFP_KERNEL); > + if (!d->mbm_states[idx]) > + goto cleanup; > } Looks like this cleanup pattern is a landmine that this series stepped on in patch #13. Any code added here that fails and then run the "cleanup" code will either end up with a memory leak or accessing an uninitialized variable. > > return 0; > +cleanup: > + bitmap_free(d->rmid_busy_llc); > + while (--idx >= 0) { > + kfree(d->mbm_states[idx]); > + d->mbm_states[idx] = NULL; > + } This pattern should be made safer by not relying on idx, or ensure here that idx is initialized correctly. > + > + return -ENOMEM; > } > Reinette