>-----Original Message----- >From: Alison Schofield <alison.schofield@xxxxxxxxx> >Sent: 20 May 2025 03:02 >To: Shiju Jose <shiju.jose@xxxxxxxxxx> >Cc: linux-cxl@xxxxxxxxxxxxxxx; dan.j.williams@xxxxxxxxx; Jonathan Cameron ><jonathan.cameron@xxxxxxxxxx>; dave.jiang@xxxxxxxxx; dave@xxxxxxxxxxxx; >vishal.l.verma@xxxxxxxxx; ira.weiny@xxxxxxxxx; linux-edac@xxxxxxxxxxxxxxx; >linux-doc@xxxxxxxxxxxxxxx; bp@xxxxxxxxx; tony.luck@xxxxxxxxx; >lenb@xxxxxxxxxx; Yazen.Ghannam@xxxxxxx; mchehab@xxxxxxxxxx; >nifan.cxl@xxxxxxxxx; Linuxarm <linuxarm@xxxxxxxxxx>; tanxiaofei ><tanxiaofei@xxxxxxxxxx>; Zengtao (B) <prime.zeng@xxxxxxxxxxxxx>; Roberto >Sassu <roberto.sassu@xxxxxxxxxx>; kangkang.shen@xxxxxxxxxxxxx; >wanghuiqiang <wanghuiqiang@xxxxxxxxxx> >Subject: Re: [PATCH v5 3/8] cxl/edac: Add CXL memory device patrol scrub >control feature > >On Thu, May 15, 2025 at 12:59:19PM +0100, shiju.jose@xxxxxxxxxx wrote: >> From: Shiju Jose <shiju.jose@xxxxxxxxxx> >> >> CXL spec 3.2 section 8.2.10.9.11.1 describes the device patrol scrub >> control feature. The device patrol scrub proactively locates and makes >> corrections to errors in regular cycle. >> > >snip > >> + >> +static int cxl_scrub_get_attrbs(struct cxl_patrol_scrub_context *cxl_ps_ctx, >> + u8 *cap, u16 *cycle, u8 *flags, u8 *min_cycle) { >> + struct cxl_mailbox *cxl_mbox; >> + u8 min_scrub_cycle = U8_MAX; >> + struct cxl_memdev *cxlmd; >> + int i, ret; >> + >> + if (cxl_ps_ctx->cxlr) { >> + struct cxl_region *cxlr = cxl_ps_ctx->cxlr; >> + struct cxl_region_params *p = &cxlr->params; > >This function and the next, have a big if { } wrapper around cxlr existence. Can >this logic be reversed - > >ie, declare cxl_region and cxl_region_params in the header and then do >something like - > > if (!cxl_ps_ctx->cxlr) { > cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox; > return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags, >min_cycle); > } > > cxlr = cxl_ps_ctx->cxlr; > p = &cxlr->params; > >Then all this code below can shift left. Changed. > >> + >> + struct rw_semaphore *region_lock >__free(rwsem_read_release) = >> + rwsem_read_intr_acquire(&cxl_region_rwsem); >> + if (!region_lock) >> + return -EINTR; >> + >> + for (i = 0; i < p->nr_targets; i++) { >> + struct cxl_endpoint_decoder *cxled = p->targets[i]; >> + >> + cxlmd = cxled_to_memdev(cxled); >> + cxl_mbox = &cxlmd->cxlds->cxl_mbox; >> + ret = cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, >> + flags, min_cycle); >> + if (ret) >> + return ret; >> + >> + if (min_cycle) >> + min_scrub_cycle = >> + min(*min_cycle, min_scrub_cycle); >> + } >> + >> + if (min_cycle) >> + *min_cycle = min_scrub_cycle; >> + >> + return 0; >> + } >> + cxl_mbox = &cxl_ps_ctx->cxlmd->cxlds->cxl_mbox; >> + >> + return cxl_mem_scrub_get_attrbs(cxl_mbox, cap, cycle, flags, >> +min_cycle); } >> + >> +static int cxl_scrub_set_attrbs(struct device *dev, >> + struct cxl_patrol_scrub_context *cxl_ps_ctx, >> + u8 cycle, u8 flags) >> +{ >> + struct cxl_scrub_wr_attrbs wr_attrbs; >> + struct cxl_mailbox *cxl_mbox; >> + struct cxl_memdev *cxlmd; >> + int ret, i; >> + >> + wr_attrbs.scrub_cycle_hours = cycle; >> + wr_attrbs.scrub_flags = flags; >> + >> + if (cxl_ps_ctx->cxlr) { >> + struct cxl_region *cxlr = cxl_ps_ctx->cxlr; >> + struct cxl_region_params *p = &cxlr->params; > >Similar to above function, but more work in the !cxlr case. Maybe a goto. Modified with helper functions. > > >> + >> + struct rw_semaphore *region_lock >__free(rwsem_read_release) = >> + rwsem_read_intr_acquire(&cxl_region_rwsem); >> + if (!region_lock) >> + return -EINTR; >> + >> + for (i = 0; i < p->nr_targets; i++) { >> + struct cxl_endpoint_decoder *cxled = p->targets[i]; >> + >> + cxlmd = cxled_to_memdev(cxled); >> + cxl_mbox = &cxlmd->cxlds->cxl_mbox; >> + ret = cxl_set_feature(cxl_mbox, >&CXL_FEAT_PATROL_SCRUB_UUID, >> + cxl_ps_ctx->set_version, >&wr_attrbs, >> + sizeof(wr_attrbs), >> + >CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, >> + 0, NULL); >> + if (ret) >> + return ret; >> + >> + if (cycle != cxlmd->cur_scrub_cycle) { >> + if (cxlmd->cur_region_id != -1) >> + dev_info(dev, >> + "Device scrub rate(%d hours) >set by region%d rate overwritten by region%d scrub rate(%d hours)\n", >> + cxlmd->cur_scrub_cycle, >> + cxlmd->cur_region_id, cxlr->id, >> + cycle); >> + >> + cxlmd->cur_scrub_cycle = cycle; >> + cxlmd->cur_region_id = cxlr->id; >> + } >> + } >> + >> + return 0; >> + } >> + >> + cxlmd = cxl_ps_ctx->cxlmd; >> + cxl_mbox = &cxlmd->cxlds->cxl_mbox; >> + ret = cxl_set_feature(cxl_mbox, &CXL_FEAT_PATROL_SCRUB_UUID, >> + cxl_ps_ctx->set_version, &wr_attrbs, >> + sizeof(wr_attrbs), >> + CXL_SET_FEAT_FLAG_DATA_SAVED_ACROSS_RESET, >0, >> + NULL); >> + if (ret) >> + return ret; >> + >> + if (cycle != cxlmd->cur_scrub_cycle) { >> + if (cxlmd->cur_region_id != -1) >> + dev_info(dev, >> + "Device scrub rate(%d hours) set by region%d >rate overwritten with device local scrub rate(%d hours)\n", >> + cxlmd->cur_scrub_cycle, cxlmd- >>cur_region_id, >> + cycle); >> + >> + cxlmd->cur_scrub_cycle = cycle; >> + cxlmd->cur_region_id = -1; >> + } >> + >> + return 0; >> +} >> + > >skip > >> diff --git a/drivers/cxl/cxlmem.h b/drivers/cxl/cxlmem.h index >> 3ec6b906371b..685957b312ea 100644 >> --- a/drivers/cxl/cxlmem.h >> +++ b/drivers/cxl/cxlmem.h >> @@ -45,6 +45,8 @@ >> * @endpoint: connection to the CXL port topology for this memory device >> * @id: id number of this memdev instance. >> * @depth: endpoint port depth >> + * @cur_scrub_cycle: current scrub cycle set for this device >> + * @cur_region_id: id number of a backed region (if any) for which >> + current scrub cycle set >> */ >> struct cxl_memdev { >> struct device dev; >> @@ -56,6 +58,10 @@ struct cxl_memdev { >> struct cxl_port *endpoint; >> int id; >> int depth; >> +#ifdef CONFIG_CXL_EDAC_SCRUB >> + u8 cur_scrub_cycle; >> + int cur_region_id; >> +#endif >> }; > > >Why the cur_ prefix? Seems like it's just 'the' scrub cycle. > >How about: > >s/cur_scrub_cycle/scrub_cycle >s/cur_region_id/scrub_region_id > >That also makes it clear that the region_id is related to the scrub. Changed. > >Somewhere later cur_region_id gets compared to -1 a few times. >Perhaps add a define for that like #define CXL_SCRUB_NO_REGION -1 Added and used CXL_SCRUB_NO_REGION. > >> >> static inline struct cxl_memdev *to_cxl_memdev(struct device *dev) @@ >> -853,6 +859,16 @@ int cxl_trigger_poison_list(struct cxl_memdev >> *cxlmd); int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa); >> int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa); >> >> +#if IS_ENABLED(CONFIG_CXL_EDAC_MEM_FEATURES) > >that's the one I mentioned in cover letter that can be #ifdef > >> +int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd); int >> +devm_cxl_region_edac_register(struct cxl_region *cxlr); #else static >> +inline int devm_cxl_memdev_edac_register(struct cxl_memdev *cxlmd) { >> +return 0; } static inline int devm_cxl_region_edac_register(struct >> +cxl_region *cxlr) { return 0; } #endif >> + >> #ifdef CONFIG_CXL_SUSPEND >> void cxl_mem_active_inc(void); >> void cxl_mem_active_dec(void); >> diff --git a/drivers/cxl/mem.c b/drivers/cxl/mem.c index >> 9675243bd05b..6e6777b7bafb 100644 >> --- a/drivers/cxl/mem.c >> +++ b/drivers/cxl/mem.c >> @@ -180,6 +180,10 @@ static int cxl_mem_probe(struct device *dev) >> return rc; >> } >> >> + rc = devm_cxl_memdev_edac_register(cxlmd); >> + if (rc) >> + dev_dbg(dev, "CXL memdev EDAC registration failed rc=%d\n", >rc); >> + >> /* >> * The kernel may be operating out of CXL memory on this device, >> * there is no spec defined way to determine whether this device >> -- >> 2.43.0 >> >> Thanks, Shiju