Re: [PATCH 28/33] arm_mpam: Track bandwidth counter state for overflow and power management

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



Hi James,

On 8/22/25 16:30, James Morse wrote:
> Bandwidth counters need to run continuously to correctly reflect the
> bandwidth.
> 
> The value read may be lower than the previous value read in the case
> of overflow and when the hardware is reset due to CPU hotplug.
> 
> Add struct mbwu_state to track the bandwidth counter to allow overflow
> and power management to be handled.
> 
> Signed-off-by: James Morse <james.morse@xxxxxxx>
> ---
>  drivers/resctrl/mpam_devices.c  | 163 +++++++++++++++++++++++++++++++-
>  drivers/resctrl/mpam_internal.h |  54 ++++++++---
>  2 files changed, 200 insertions(+), 17 deletions(-)
> 
> diff --git a/drivers/resctrl/mpam_devices.c b/drivers/resctrl/mpam_devices.c
> index 9ce771aaf671..11be34b54643 100644
> --- a/drivers/resctrl/mpam_devices.c
> +++ b/drivers/resctrl/mpam_devices.c
> @@ -1004,6 +1004,7 @@ static void gen_msmon_ctl_flt_vals(struct mon_read *m, u32 *ctl_val,
>  	*ctl_val |= MSMON_CFG_x_CTL_MATCH_PARTID;
>  
>  	*flt_val = FIELD_PREP(MSMON_CFG_MBWU_FLT_PARTID, ctx->partid);
> +	*flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_RWBW, ctx->opts);
>  	if (m->ctx->match_pmg) {
>  		*ctl_val |= MSMON_CFG_x_CTL_MATCH_PMG;
>  		*flt_val |= FIELD_PREP(MSMON_CFG_MBWU_FLT_PMG, ctx->pmg);
> @@ -1041,6 +1042,7 @@ static void clean_msmon_ctl_val(u32 *cur_ctl)
>  static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>  				     u32 flt_val)
>  {
> +	struct msmon_mbwu_state *mbwu_state;
>  	struct mpam_msc *msc = m->ris->vmsc->msc;
>  
>  	/*
> @@ -1059,20 +1061,32 @@ static void write_msmon_ctl_flt_vals(struct mon_read *m, u32 ctl_val,
>  		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val);
>  		mpam_write_monsel_reg(msc, MBWU, 0);
>  		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, ctl_val | MSMON_CFG_x_CTL_EN);
> +
> +		mbwu_state = &m->ris->mbwu_state[m->ctx->mon];
> +		if (mbwu_state)
> +			mbwu_state->prev_val = 0;
> +
>  		break;
>  	default:
>  		return;
>  	}
>  }
>  
> +static u64 mpam_msmon_overflow_val(struct mpam_msc_ris *ris)
> +{
> +	/* TODO: scaling, and long counters */
> +	return GENMASK_ULL(30, 0);
> +}
> +
>  /* Call with MSC lock held */
>  static void __ris_msmon_read(void *arg)
>  {
> -	u64 now;
>  	bool nrdy = false;
>  	struct mon_read *m = arg;
> +	u64 now, overflow_val = 0;
>  	struct mon_cfg *ctx = m->ctx;
>  	struct mpam_msc_ris *ris = m->ris;
> +	struct msmon_mbwu_state *mbwu_state;
>  	struct mpam_props *rprops = &ris->props;
>  	struct mpam_msc *msc = m->ris->vmsc->msc;
>  	u32 mon_sel, ctl_val, flt_val, cur_ctl, cur_flt;
> @@ -1100,11 +1114,30 @@ static void __ris_msmon_read(void *arg)
>  		now = mpam_read_monsel_reg(msc, CSU);
>  		if (mpam_has_feature(mpam_feat_msmon_csu_hw_nrdy, rprops))
>  			nrdy = now & MSMON___NRDY;
> +		now = FIELD_GET(MSMON___VALUE, now);
>  		break;
>  	case mpam_feat_msmon_mbwu:
>  		now = mpam_read_monsel_reg(msc, MBWU);
>  		if (mpam_has_feature(mpam_feat_msmon_mbwu_hw_nrdy, rprops))
>  			nrdy = now & MSMON___NRDY;
> +		now = FIELD_GET(MSMON___VALUE, now);
> +
> +		if (nrdy)
> +			break;
> +
> +		mbwu_state = &ris->mbwu_state[ctx->mon];
> +		if (!mbwu_state)
> +			break;
> +
> +		/* Add any pre-overflow value to the mbwu_state->val */
> +		if (mbwu_state->prev_val > now)
> +			overflow_val = mpam_msmon_overflow_val(ris) - mbwu_state->prev_val;
> +
> +		mbwu_state->prev_val = now;
> +		mbwu_state->correction += overflow_val;
> +
> +		/* Include bandwidth consumed before the last hardware reset */
> +		now += mbwu_state->correction;
>  		break;
>  	default:
>  		m->err = -EINVAL;
> @@ -1117,7 +1150,6 @@ static void __ris_msmon_read(void *arg)
>  		return;
>  	}
>  
> -	now = FIELD_GET(MSMON___VALUE, now);
>  	*m->val += now;
>  }
>  
> @@ -1329,6 +1361,72 @@ static int mpam_reprogram_ris(void *_arg)
>  	return 0;
>  }
>  
> +/* Call with MSC lock and outer mon_sel lock held */
> +static int mpam_restore_mbwu_state(void *_ris)
> +{
> +	int i;
> +	struct mon_read mwbu_arg;
> +	struct mpam_msc_ris *ris = _ris;
> +	struct mpam_msc *msc = ris->vmsc->msc;
> +
> +	mpam_mon_sel_outer_lock(msc);
> +
> +	for (i = 0; i < ris->props.num_mbwu_mon; i++) {
> +		if (ris->mbwu_state[i].enabled) {
> +			mwbu_arg.ris = ris;
> +			mwbu_arg.ctx = &ris->mbwu_state[i].cfg;
> +			mwbu_arg.type = mpam_feat_msmon_mbwu;
> +
> +			__ris_msmon_read(&mwbu_arg);
> +		}
> +	}
> +
> +	mpam_mon_sel_outer_unlock(msc);
> +
> +	return 0;
> +}
> +
> +/* Call with MSC lock and outer mon_sel lock held */
> +static int mpam_save_mbwu_state(void *arg)
> +{
> +	int i;
> +	u64 val;
> +	struct mon_cfg *cfg;
> +	u32 cur_flt, cur_ctl, mon_sel;
> +	struct mpam_msc_ris *ris = arg;
> +	struct msmon_mbwu_state *mbwu_state;
> +	struct mpam_msc *msc = ris->vmsc->msc;
> +
> +	for (i = 0; i < ris->props.num_mbwu_mon; i++) {
> +		mbwu_state = &ris->mbwu_state[i];
> +		cfg = &mbwu_state->cfg;
> +
> +		if (WARN_ON_ONCE(!mpam_mon_sel_inner_lock(msc)))
> +			return -EIO;
> +
> +		mon_sel = FIELD_PREP(MSMON_CFG_MON_SEL_MON_SEL, i) |
> +			  FIELD_PREP(MSMON_CFG_MON_SEL_RIS, ris->ris_idx);
> +		mpam_write_monsel_reg(msc, CFG_MON_SEL, mon_sel);
> +
> +		cur_flt = mpam_read_monsel_reg(msc, CFG_MBWU_FLT);
> +		cur_ctl = mpam_read_monsel_reg(msc, CFG_MBWU_CTL);
> +		mpam_write_monsel_reg(msc, CFG_MBWU_CTL, 0);
> +
> +		val = mpam_read_monsel_reg(msc, MBWU);
> +		mpam_write_monsel_reg(msc, MBWU, 0);
> +
> +		cfg->mon = i;
> +		cfg->pmg = FIELD_GET(MSMON_CFG_MBWU_FLT_PMG, cur_flt);
> +		cfg->match_pmg = FIELD_GET(MSMON_CFG_x_CTL_MATCH_PMG, cur_ctl);
> +		cfg->partid = FIELD_GET(MSMON_CFG_MBWU_FLT_PARTID, cur_flt);
> +		mbwu_state->correction += val;
> +		mbwu_state->enabled = FIELD_GET(MSMON_CFG_x_CTL_EN, cur_ctl);
> +		mpam_mon_sel_inner_unlock(msc);
> +	}
> +
> +	return 0;
> +}
> +
>  /*
>   * Called via smp_call_on_cpu() to prevent migration, while still being
>   * pre-emptible.
> @@ -1389,6 +1487,9 @@ static void mpam_reset_msc(struct mpam_msc *msc, bool online)
>  		 * for non-zero partid may be lost while the CPUs are offline.
>  		 */
>  		ris->in_reset_state = online;
> +
> +		if (mpam_is_enabled() && !online)
> +			mpam_touch_msc(msc, &mpam_save_mbwu_state, ris);
>  	}
>  	mpam_mon_sel_outer_unlock(msc);
>  }
> @@ -1423,6 +1524,9 @@ static void mpam_reprogram_msc(struct mpam_msc *msc)
>  			mpam_reprogram_ris_partid(ris, partid, cfg);
>  		}
>  		ris->in_reset_state = reset;
> +
> +		if (mpam_has_feature(mpam_feat_msmon_mbwu, &ris->props))
> +			mpam_touch_msc(msc, &mpam_restore_mbwu_state, ris);
>  	}
>  }
>  
> @@ -2291,11 +2395,35 @@ static void mpam_unregister_irqs(void)
>  
>  static void __destroy_component_cfg(struct mpam_component *comp)
>  {
> +	struct mpam_msc *msc;
> +	struct mpam_vmsc *vmsc;
> +	struct mpam_msc_ris *ris;
> +
> +	lockdep_assert_held(&mpam_list_lock);
> +
>  	add_to_garbage(comp->cfg);
> +	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
> +		msc = vmsc->msc;
> +
> +		mpam_mon_sel_outer_lock(msc);
> +		if (mpam_mon_sel_inner_lock(msc)) {
> +			list_for_each_entry(ris, &vmsc->ris, vmsc_list)
> +				add_to_garbage(ris->mbwu_state);
> +			mpam_mon_sel_inner_unlock(msc);
> +		}
> +		mpam_mon_sel_outer_lock(msc);
> +	}
>  }
>  
>  static int __allocate_component_cfg(struct mpam_component *comp)
>  {
> +	int err = 0;
> +	struct mpam_msc *msc;
> +	struct mpam_vmsc *vmsc;
> +	struct mpam_msc_ris *ris;
> +	struct msmon_mbwu_state *mbwu_state;
> +
> +	lockdep_assert_held(&mpam_list_lock);
>  	mpam_assert_partid_sizes_fixed();
>  
>  	if (comp->cfg)
> @@ -2306,6 +2434,37 @@ static int __allocate_component_cfg(struct mpam_component *comp)
>  		return -ENOMEM;
>  	init_garbage(comp->cfg);
>  
> +	list_for_each_entry(vmsc, &comp->vmsc, comp_list) {
> +		if (!vmsc->props.num_mbwu_mon)
> +			continue;
> +
> +		msc = vmsc->msc;
> +		mpam_mon_sel_outer_lock(msc);
> +		list_for_each_entry(ris, &vmsc->ris, vmsc_list) {
> +			if (!ris->props.num_mbwu_mon)
> +				continue;
> +
> +			mbwu_state = kcalloc(ris->props.num_mbwu_mon,
> +					     sizeof(*ris->mbwu_state),
> +					     GFP_KERNEL);
> +			if (!mbwu_state) {
> +				__destroy_component_cfg(comp);
> +				err = -ENOMEM;
> +				break;
> +			}
> +
> +			if (mpam_mon_sel_inner_lock(msc)) {
> +				init_garbage(mbwu_state);
> +				ris->mbwu_state = mbwu_state;
> +				mpam_mon_sel_inner_unlock(msc);
> +			}
> +		}
> +		mpam_mon_sel_outer_unlock(msc);
> +
> +		if (err)
> +			break;
> +	}
> +
>  	return 0;
>  }
>  
> diff --git a/drivers/resctrl/mpam_internal.h b/drivers/resctrl/mpam_internal.h
> index 76e406a2b0d1..9a50a5432f4a 100644
> --- a/drivers/resctrl/mpam_internal.h
> +++ b/drivers/resctrl/mpam_internal.h
> @@ -271,6 +271,42 @@ struct mpam_component {
>  	struct mpam_garbage	garbage;
>  };
>  
> +/* The values for MSMON_CFG_MBWU_FLT.RWBW */
> +enum mon_filter_options {
> +	COUNT_BOTH	= 0,
> +	COUNT_WRITE	= 1,
> +	COUNT_READ	= 2,
> +};
> +
> +struct mon_cfg {
> +	/* mon is wider than u16 to hold an out of range 'USE_RMID_IDX' */
> +	u32                     mon;
> +	u8                      pmg;
> +	bool                    match_pmg;
> +	u32                     partid;
> +	enum mon_filter_options opts;
> +};
> +
> +/*
> + * Changes to enabled and cfg are protected by the msc->lock.
> + * Changes to prev_val and correction are protected by the msc's mon_sel_lock.
> + */
> +struct msmon_mbwu_state {
> +	bool		enabled;
> +	struct mon_cfg	cfg;
> +
> +	/* The value last read from the hardware. Used to detect overflow. */
> +	u64		prev_val;
> +
> +	/*
> +	 * The value to add to the new reading to account for power management,
> +	 * and shifts to trigger the overflow interrupt.
> +	 */
> +	u64		correction;
> +
> +	struct mpam_garbage	garbage;
> +};
> +

These structures have ended up between struct mpam_component and struct
mpam_vmc. Move to somewhere more natural.

>  struct mpam_vmsc {
>  	/* member of mpam_component:vmsc_list */
>  	struct list_head	comp_list;
> @@ -306,22 +342,10 @@ struct mpam_msc_ris {
>  	/* parent: */
>  	struct mpam_vmsc	*vmsc;
>  
> -	struct mpam_garbage	garbage;
> -};
> +	/* msmon mbwu configuration is preserved over reset */
> +	struct msmon_mbwu_state	*mbwu_state;
>  
> -/* The values for MSMON_CFG_MBWU_FLT.RWBW */
> -enum mon_filter_options {
> -	COUNT_BOTH	= 0,
> -	COUNT_WRITE	= 1,
> -	COUNT_READ	= 2,
> -};
> -
> -struct mon_cfg {
> -	u16                     mon;
> -	u8                      pmg;
> -	bool                    match_pmg;
> -	u32                     partid;
> -	enum mon_filter_options opts;

Choose where this enum and structure go in the previous patch.
> +	struct mpam_garbage	garbage;
>  };
>  
>  static inline int mpam_alloc_csu_mon(struct mpam_class *class)

Thanks,

Ben





[Index of Archives]     [Linux IBM ACPI]     [Linux Power Management]     [Linux Kernel]     [Linux Laptop]     [Kernel Newbies]     [Share Photos]     [Security]     [Netfilter]     [Bugtraq]     [Yosemite News]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]
  Powered by Linux