Re: [PATCH v5 3/8] cxl/edac: Add CXL memory device patrol scrub control feature

[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

 



On Thu, May 15, 2025 at 12:59:19PM +0100, shiju.jose@xxxxxxxxxx wrote:
> From: Shiju Jose <shiju.jose@xxxxxxxxxx>
> 
> CXL spec 3.2 section 8.2.10.9.11.1 describes the device patrol scrub
> control feature. The device patrol scrub proactively locates and makes
> corrections to errors in regular cycle.
> 

snip

> +
> +static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx,
> +				u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle)
> +{
> +	struct cxl_mailbox *cxl_mbox;
> +	u8 min_scrub_cycle = U8_MAX;
> +	struct cxl_memdev *cxlmd;
> +	int i, ret;
> +
> +	if (cxl_ps_ctx->cxlr) {
> +		struct cxl_region *cxlr = cxl_ps_ctx->cxlr;
> +		struct cxl_region_params *p = &cxlr->params;

This function and the next, have a big if { } wrapper around
cxlr existence. Can this logic be reversed -

ie, declare cxl_region and cxl_region_params in the header and
then do something like - 

	if (!cxl_ps_ctx->cxlr) {
		cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox;
		return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags, min_cycle);
	}

	cxlr =  cxl_ps_ctx->cxlr;
	p = &cxlr->params;

Then all this code below can shift left.

> +
> +		struct rw_semaphore *region_lock __free(rwsem_read_release) =
> +			rwsem_read_intr_acquire(&cxl_region_rwsem);
> +		if (!region_lock)
> +			return -EINTR;
> +
> +		for (i = 0; i < p->nr_targets; i++) {
> +			struct cxl_endpoint_decoder *cxled = p->targets[i];
> +
> +			cxlmd = cxled_to_memdev(cxled);
> +			cxl_mbox = &cxlmd->cxlds->cxl_mbox;
> +			ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle,
> +						       flags, min_cycle);
> +			if (ret)
> +				return ret;
> +
> +			if (min_cycle)
> +				min_scrub_cycle =
> +					min(*min_cycle, min_scrub_cycle);
> +		}
> +
> +		if (min_cycle)
> +			*min_cycle = min_scrub_cycle;
> +
> +		return 0;
> +	}
> +	cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox;
> +
> +	return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags, min_cycle);
> +}
> +
> +static int cxl_scrub_set_attrbs(struct device *dev,
> +				struct cxl_patrol_scrub_context *cxl_ps_ctx,
> +				u8 cycle, u8 flags)
> +{
> +	struct cxl_scrub_wr_attrbs wr_attrbs;
> +	struct cxl_mailbox *cxl_mbox;
> +	struct cxl_memdev *cxlmd;
> +	int ret, i;
> +
> +	wr_attrbs.scrub_cycle_hours = cycle;
> +	wr_attrbs.scrub_flags = flags;
> +
> +	if (cxl_ps_ctx->cxlr) {
> +		struct cxl_region *cxlr = cxl_ps_ctx->cxlr;
> +		struct cxl_region_params *p = &cxlr->params;

Similar to above function, but more work in the !cxlr case. Maybe a goto.


> +
> +		struct rw_semaphore *region_lock __free(rwsem_read_release) =
> +			rwsem_read_intr_acquire(&cxl_region_rwsem);
> +		if (!region_lock)
> +			return -EINTR;
> +
> +		for (i = 0; i < p->nr_targets; i++) {
> +			struct cxl_endpoint_decoder *cxled = p->targets[i];
> +
> +			cxlmd = cxled_to_memdev(cxled);
> +			cxl_mbox = &cxlmd->cxlds->cxl_mbox;
> +			ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
> +					      cxl_ps_ctx->set_version, &wr_attrbs,
> +					      sizeof(wr_attrbs),
> +					      CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET,
> +					      0, NULL);
> +			if (ret)
> +				return ret;
> +
> +			if (cycle != cxlmd->cur_scrub_cycle) {
> +				if (cxlmd->cur_region_id != -1)
> +					dev_info(dev,
> +						 "Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n",
> +						 cxlmd->cur_scrub_cycle,
> +						 cxlmd->cur_region_id, cxlr->id,
> +						 cycle);
> +
> +				cxlmd->cur_scrub_cycle = cycle;
> +				cxlmd->cur_region_id = cxlr->id;
> +			}
> +		}
> +
> +		return 0;
> +	}
> +
> +	cxlmd = cxl_ps_ctx->cxlmd;
> +	cxl_mbox = &cxlmd->cxlds->cxl_mbox;
> +	ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID,
> +			      cxl_ps_ctx->set_version, &wr_attrbs,
> +			      sizeof(wr_attrbs),
> +			      CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0,
> +			      NULL);
> +	if (ret)
> +		return ret;
> +
> +	if (cycle != cxlmd->cur_scrub_cycle) {
> +		if (cxlmd->cur_region_id != -1)
> +			dev_info(dev,
> +				 "Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n",
> +				 cxlmd->cur_scrub_cycle, cxlmd->cur_region_id,
> +				 cycle);
> +
> +		cxlmd->cur_scrub_cycle = cycle;
> +		cxlmd->cur_region_id = -1;
> +	}
> +
> +	return 0;
> +}
> +

skip

> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h
> index 3ec6b906371b..685957b312ea 100644
> --- a/drivers/cxl/cxlmem.h
> +++ b/drivers/cxl/cxlmem.h
> @@ -45,6 +45,8 @@
>   * @endpoint: connection to the CXL port topology for this memory device
>   * @id: id number of this memdev instance.
>   * @depth: endpoint port depth
> + * @cur_scrub_cycle: current scrub cycle set for this device
> + * @cur_region_id: id number of a backed region (if any) for which current scrub cycle set
>   */
>  struct cxl_memdev {
>  	struct device dev;
> @@ -56,6 +58,10 @@ struct cxl_memdev {
>  	struct cxl_port *endpoint;
>  	int id;
>  	int depth;
> +#ifdef CONFIG_CXL_EDAC_SCRUB
> +	u8 cur_scrub_cycle;
> +	int cur_region_id;
> +#endif
>  };


Why the cur_ prefix?  Seems like it's just 'the' scrub cycle.

How about:

s/cur_scrub_cycle/scrub_cycle
s/cur_region_id/scrub_region_id

That also makes it clear that the region_id is related to the scrub.

Somewhere later cur_region_id gets compared to -1 a few times.
Perhaps add a define for that like #define CXL_SCRUB_NO_REGION -1

>  
>  static inline struct cxl_memdev *to_cxl_memdev(struct device *dev)
> @@ -853,6 +859,16 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd);
>  int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa);
>  int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa);
>  
> +#if IS_ENABLED(CONFIG_CXL_EDAC_MEM_FEATURES)

that's the one I mentioned in cover letter that can be #ifdef

> +int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd);
> +int devm_cxl_region_edac_register(struct cxl_region *cxlr);
> +#else
> +static inline int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd)
> +{ return 0; }
> +static inline int devm_cxl_region_edac_register(struct cxl_region *cxlr)
> +{ return 0; }
> +#endif
> +
>  #ifdef CONFIG_CXL_SUSPEND
>  void cxl_mem_active_inc(void);
>  void cxl_mem_active_dec(void);
> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c
> index 9675243bd05b..6e6777b7bafb 100644
> --- a/drivers/cxl/mem.c
> +++ b/drivers/cxl/mem.c
> @@ -180,6 +180,10 @@ static int cxl_mem_probe(struct device *dev)
>  			return rc;
>  	}
>  
> +	rc = devm_cxl_memdev_edac_register(cxlmd);
> +	if (rc)
> +		dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc);
> +
>  	/*
>  	 * The kernel may be operating out of CXL memory on this device,
>  	 * there is no spec defined way to determine whether this device
> -- 
> 2.43.0
> 
> 




[Index of Archives]     [Kernel Newbies]     [Security]     [Netfilter]     [Bugtraq]     [Linux FS]     [Yosemite Forum]     [MIPS Linux]     [ARM Linux]     [Linux Security]     [Linux RAID]     [Samba]     [Video 4 Linux]     [Device Mapper]     [Linux Resources]

  Powered by Linux