On 22/08/2025 11:42, Smita Koralahalli wrote: > Previously, dax_hmem deferred to CXL only when an immediate resource > intersection with a CXL window was detected. This left a gap: if cxl_acpi > or cxl_pci probing or region assembly had not yet started, hmem could > prematurely claim ranges. > > Fix this by introducing a dax_cxl_mode state machine and a deferred > work mechanism. > > The new workqueue delays consideration of Soft Reserved overlaps until > the CXL subsystem has had a chance to complete its discovery and region > assembly. This avoids premature iomem claims, eliminates race conditions > with async cxl_pci probe, and provides a cleaner handoff between hmem and > CXL resource management. > > Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx> > Signed-off-by: Dan Williams <dan.j.williams@xxxxxxxxx> > --- > drivers/dax/hmem/hmem.c | 72 +++++++++++++++++++++++++++++++++++++++-- > 1 file changed, 70 insertions(+), 2 deletions(-) > > diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c > index 7ada820cb177..90978518e5f4 100644 > --- a/drivers/dax/hmem/hmem.c > +++ b/drivers/dax/hmem/hmem.c > @@ -58,9 +58,45 @@ static void release_hmem(void *pdev) > platform_device_unregister(pdev); > } > > +static enum dax_cxl_mode { > + DAX_CXL_MODE_DEFER, > + DAX_CXL_MODE_REGISTER, The patch looks good overall, but I have one question for the community: Should we retain the `DAX_CXL_MODE_REGISTER` enum value which for the feature we have not ever supported. The idea of having a 'register' mode as the last resort for 'Soft Reserved' memory might seem appealing, but it is not easy to implement. Instead, to avoid increasing driver complexity, I would prefer that when we encounter quirk/misconfiguration cases, we allow the user to reprogram/recorrect it. However, this is beyond the scope of the current patchset Thanks Zhijian > + DAX_CXL_MODE_DROP, > +} dax_cxl_mode; > + > +static int handle_deferred_cxl(struct device *host, int target_nid, > + const struct resource *res) > +{ > + if (region_intersects(res->start, resource_size(res), IORESOURCE_MEM, > + IORES_DESC_CXL) != REGION_DISJOINT) { > + if (dax_cxl_mode == DAX_CXL_MODE_DROP) > + dev_dbg(host, "dropping CXL range: %pr\n", res); > + } > + return 0; > +} > + > +struct dax_defer_work { > + struct platform_device *pdev; > + struct work_struct work; > +}; > + > +static void process_defer_work(struct work_struct *_work) > +{ > + struct dax_defer_work *work = container_of(_work, typeof(*work), work); > + struct platform_device *pdev = work->pdev; > + > + /* relies on cxl_acpi and cxl_pci having had a chance to load */ > + wait_for_device_probe(); > + > + dax_cxl_mode = DAX_CXL_MODE_DROP; > + > + walk_hmem_resources(&pdev->dev, handle_deferred_cxl); > +} > + > static int hmem_register_device(struct device *host, int target_nid, > const struct resource *res) > { > + struct dax_defer_work *work = dev_get_drvdata(host); > struct platform_device *pdev; > struct memregion_info info; > long id; > @@ -69,8 +105,18 @@ static int hmem_register_device(struct device *host, int target_nid, > if (IS_ENABLED(CONFIG_DEV_DAX_CXL) && > region_intersects(res->start, resource_size(res), IORESOURCE_MEM, > IORES_DESC_CXL) != REGION_DISJOINT) { > - dev_dbg(host, "deferring range to CXL: %pr\n", res); > - return 0; > + switch (dax_cxl_mode) { > + case DAX_CXL_MODE_DEFER: > + dev_dbg(host, "deferring range to CXL: %pr\n", res); > + schedule_work(&work->work); > + return 0; > + case DAX_CXL_MODE_REGISTER: > + dev_dbg(host, "registering CXL range: %pr\n", res); > + break; > + case DAX_CXL_MODE_DROP: > + dev_dbg(host, "dropping CXL range: %pr\n", res); > + return 0; > + } > } > > #ifdef CONFIG_EFI_SOFT_RESERVE > @@ -130,8 +176,30 @@ static int hmem_register_device(struct device *host, int target_nid, > return rc; > } > > +static void kill_defer_work(void *_work) > +{ > + struct dax_defer_work *work = container_of(_work, typeof(*work), work); > + > + cancel_work_sync(&work->work); > + kfree(work); > +} > + > static int dax_hmem_platform_probe(struct platform_device *pdev) > { > + struct dax_defer_work *work = kzalloc(sizeof(*work), GFP_KERNEL); > + int rc; > + > + if (!work) > + return -ENOMEM; > + > + work->pdev = pdev; > + INIT_WORK(&work->work, process_defer_work); > + > + rc = devm_add_action_or_reset(&pdev->dev, kill_defer_work, work); > + if (rc) > + return rc; > + > + platform_set_drvdata(pdev, work); > return walk_hmem_resources(&pdev->dev, hmem_register_device); > } >