On Thu, May 15, 2025 at 12:59:19PM +0100, shiju.jose@xxxxxxxxxx wrote: > From: Shiju Jose <shiju.jose@xxxxxxxxxx> > > CXL spec 3.2 section 8.2.10.9.11.1 describes the device patrol scrub > control feature. The device patrol scrub proactively locates and makes > corrections to errors in regular cycle. > snip > + > +static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, > + u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle) > +{ > + struct cxl_mailbox *cxl_mbox; > + u8 min_scrub_cycle = U8_MAX; > + struct cxl_memdev *cxlmd; > + int i, ret; > + > + if (cxl_ps_ctx->cxlr) { > + struct cxl_region *cxlr = cxl_ps_ctx->cxlr; > + struct cxl_region_params *p = &cxlr->params; This function and the next, have a big if { } wrapper around cxlr existence. Can this logic be reversed - ie, declare cxl_region and cxl_region_params in the header and then do something like - if (!cxl_ps_ctx->cxlr) { cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox; return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags, min_cycle); } cxlr = cxl_ps_ctx->cxlr; p = &cxlr->params; Then all this code below can shift left. > + > + struct rw_semaphore *region_lock __free(rwsem_read_release) = > + rwsem_read_intr_acquire(&cxl_region_rwsem); > + if (!region_lock) > + return -EINTR; > + > + for (i = 0; i < p->nr_targets; i++) { > + struct cxl_endpoint_decoder *cxled = p->targets[i]; > + > + cxlmd = cxled_to_memdev(cxled); > + cxl_mbox = &cxlmd->cxlds->cxl_mbox; > + ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, > + flags, min_cycle); > + if (ret) > + return ret; > + > + if (min_cycle) > + min_scrub_cycle = > + min(*min_cycle, min_scrub_cycle); > + } > + > + if (min_cycle) > + *min_cycle = min_scrub_cycle; > + > + return 0; > + } > + cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox; > + > + return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags, min_cycle); > +} > + > +static int cxl_scrub_set_attrbs(struct device *dev, > + struct cxl_patrol_scrub_context *cxl_ps_ctx, > + u8 cycle, u8 flags) > +{ > + struct cxl_scrub_wr_attrbs wr_attrbs; > + struct cxl_mailbox *cxl_mbox; > + struct cxl_memdev *cxlmd; > + int ret, i; > + > + wr_attrbs.scrub_cycle_hours = cycle; > + wr_attrbs.scrub_flags = flags; > + > + if (cxl_ps_ctx->cxlr) { > + struct cxl_region *cxlr = cxl_ps_ctx->cxlr; > + struct cxl_region_params *p = &cxlr->params; Similar to above function, but more work in the !cxlr case. Maybe a goto. > + > + struct rw_semaphore *region_lock __free(rwsem_read_release) = > + rwsem_read_intr_acquire(&cxl_region_rwsem); > + if (!region_lock) > + return -EINTR; > + > + for (i = 0; i < p->nr_targets; i++) { > + struct cxl_endpoint_decoder *cxled = p->targets[i]; > + > + cxlmd = cxled_to_memdev(cxled); > + cxl_mbox = &cxlmd->cxlds->cxl_mbox; > + ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, > + cxl_ps_ctx->set_version, &wr_attrbs, > + sizeof(wr_attrbs), > + CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, > + 0, NULL); > + if (ret) > + return ret; > + > + if (cycle != cxlmd->cur_scrub_cycle) { > + if (cxlmd->cur_region_id != -1) > + dev_info(dev, > + "Device scrub rate(%d hours) set by region%d rate overwritten by region%d scrub rate(%d hours)\n", > + cxlmd->cur_scrub_cycle, > + cxlmd->cur_region_id, cxlr->id, > + cycle); > + > + cxlmd->cur_scrub_cycle = cycle; > + cxlmd->cur_region_id = cxlr->id; > + } > + } > + > + return 0; > + } > + > + cxlmd = cxl_ps_ctx->cxlmd; > + cxl_mbox = &cxlmd->cxlds->cxl_mbox; > + ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, > + cxl_ps_ctx->set_version, &wr_attrbs, > + sizeof(wr_attrbs), > + CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, 0, > + NULL); > + if (ret) > + return ret; > + > + if (cycle != cxlmd->cur_scrub_cycle) { > + if (cxlmd->cur_region_id != -1) > + dev_info(dev, > + "Device scrub rate(%d hours) set by region%d rate overwritten with device local scrub rate(%d hours)\n", > + cxlmd->cur_scrub_cycle, cxlmd->cur_region_id, > + cycle); > + > + cxlmd->cur_scrub_cycle = cycle; > + cxlmd->cur_region_id = -1; > + } > + > + return 0; > +} > + skip > diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h > index 3ec6b906371b..685957b312ea 100644 > --- a/drivers/cxl/cxlmem.h > +++ b/drivers/cxl/cxlmem.h > @@ -45,6 +45,8 @@ > * @endpoint: connection to the CXL port topology for this memory device > * @id: id number of this memdev instance. > * @depth: endpoint port depth > + * @cur_scrub_cycle: current scrub cycle set for this device > + * @cur_region_id: id number of a backed region (if any) for which current scrub cycle set > */ > struct cxl_memdev { > struct device dev; > @@ -56,6 +58,10 @@ struct cxl_memdev { > struct cxl_port *endpoint; > int id; > int depth; > +#ifdef CONFIG_CXL_EDAC_SCRUB > + u8 cur_scrub_cycle; > + int cur_region_id; > +#endif > }; Why the cur_ prefix? Seems like it's just 'the' scrub cycle. How about: s/cur_scrub_cycle/scrub_cycle s/cur_region_id/scrub_region_id That also makes it clear that the region_id is related to the scrub. Somewhere later cur_region_id gets compared to -1 a few times. Perhaps add a define for that like #define CXL_SCRUB_NO_REGION -1 > > static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) > @@ -853,6 +859,16 @@ int cxl_trigger_poison_list(struct cxl_memdev *cxlmd); > int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa); > int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa); > > +#if IS_ENABLED(CONFIG_CXL_EDAC_MEM_FEATURES) that's the one I mentioned in cover letter that can be #ifdef > +int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd); > +int devm_cxl_region_edac_register(struct cxl_region *cxlr); > +#else > +static inline int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) > +{ return 0; } > +static inline int devm_cxl_region_edac_register(struct cxl_region *cxlr) > +{ return 0; } > +#endif > + > #ifdef CONFIG_CXL_SUSPEND > void cxl_mem_active_inc(void); > void cxl_mem_active_dec(void); > diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c > index 9675243bd05b..6e6777b7bafb 100644 > --- a/drivers/cxl/mem.c > +++ b/drivers/cxl/mem.c > @@ -180,6 +180,10 @@ static int cxl_mem_probe(struct device *dev) > return rc; > } > > + rc = devm_cxl_memdev_edac_register(cxlmd); > + if (rc) > + dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", rc); > + > /* > * The kernel may be operating out of CXL memory on this device, > * there is no spec defined way to determine whether this device > -- > 2.43.0 > >