Hi Smita, Nathan, Terry
I am struggling to understand if this patch is truly necessary, or if I haven't
fully grasped the scenario where it provides value. Without applying this patch
on a QEMU/VM with both HMEM and CXL.mem installed, I observed no issues. (Are there
specific config options required to reproduce the problem?)
Here is the /proc/iomem without the patch:
180000000-1ffffffff : Soft Reserved ### 2 hmem nodes
180000000-1bfffffff : dax1.0
180000000-1bfffffff : System RAM (kmem)
1c0000000-1ffffffff : dax2.0
1c0000000-1ffffffff : System RAM (kmem)
5c0001128-5c00011b7 : port1
5d0000000-64fffffff : CXL Window 0 ### 1 CXL node
5d0000000-64fffffff : region0
5d0000000-64fffffff : dax0.0
5d0000000-64fffffff : System RAM (kmem)
On 04/06/2025 06:19, Smita Koralahalli wrote:
From: Nathan Fontenot <nathan.fontenot@xxxxxxx>
The DAX HMEM driver currently consumes all SOFT RESERVED iomem resources
during initialization. This interferes with the CXL driver’s ability to
create regions and trim overlapping SOFT RESERVED ranges before DAX uses
them.
When referring to "HMEM driver" in the commit message, is it
`dax_hmem_platform_driver` or `dax_hmem_driver`? Regardless of which,
what is the impact if one consumes all SOFT RESERVED resources?
Since `hmem_register_device()` only creates HMEM devices for ranges
*without* `IORES_DESC_CXL` which could be marked in cxl_acpi , cxl_core/cxl_dax
should still create regions and DAX devices without conflicts.
To resolve this, defer the DAX driver's resource consumption if the
cxl_acpi driver is enabled. The DAX HMEM initialization skips walking the
iomem resource tree in this case. After CXL region creation completes,
any remaining SOFT RESERVED resources are explicitly registered with the
DAX driver by the CXL driver.
Conversely, with this patch applied, `cxl_region_softreserv_update()` attempts
to register new HMEM devices. This may cause duplicate registrations for the
same range (e.g., 0x180000000-0x1ffffffff), triggering warnings like:
[ 14.984108] kmem dax4.0: mapping0: 0x180000000-0x1ffffffff could not reserve region
[ 14.987204] kmem dax4.0: probe with driver kmem failed with error -16
Because the HMAT initialization already registered these sub-ranges:
180000000-1bfffffff
1c0000000-1ffffffff
If I'm missing something, please correct me.
Thanks,
Zhijian
This sequencing ensures proper handling of overlaps and fixes hotplug
failures.
Co-developed-by: Nathan Fontenot <Nathan.Fontenot@xxxxxxx>
Signed-off-by: Nathan Fontenot <Nathan.Fontenot@xxxxxxx>
Co-developed-by: Terry Bowman <terry.bowman@xxxxxxx>
Signed-off-by: Terry Bowman <terry.bowman@xxxxxxx>
Signed-off-by: Smita Koralahalli <Smita.KoralahalliChannabasappa@xxxxxxx>
---
drivers/cxl/core/region.c | 10 +++++++++
drivers/dax/hmem/device.c | 43 ++++++++++++++++++++-------------------
drivers/dax/hmem/hmem.c | 3 ++-
include/linux/dax.h | 6 ++++++
4 files changed, 40 insertions(+), 22 deletions(-)
diff --git a/drivers/cxl/core/region.c b/drivers/cxl/core/region.c
index 3a5ca44d65f3..c6c0c7ba3b20 100644
--- a/drivers/cxl/core/region.c
+++ b/drivers/cxl/core/region.c
@@ -10,6 +10,7 @@
#include <linux/sort.h>
#include <linux/idr.h>
#include <linux/memory-tiers.h>
+#include <linux/dax.h>
#include <cxlmem.h>
#include <cxl.h>
#include "core.h"
@@ -3553,6 +3554,11 @@ static struct resource *normalize_resource(struct resource *res)
return NULL;
}
+static int cxl_softreserv_mem_register(struct resource *res, void *unused)
+{
+ return hmem_register_device(phys_to_target_node(res->start), res);
+}
+
static int __cxl_region_softreserv_update(struct resource *soft,
void *_cxlr)
{
@@ -3590,6 +3596,10 @@ int cxl_region_softreserv_update(void)
__cxl_region_softreserv_update);
}
+ /* Now register any remaining SOFT RESERVES with DAX */
+ walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED, IORESOURCE_MEM,
+ 0, -1, NULL, cxl_softreserv_mem_register);
+
return 0;
}
EXPORT_SYMBOL_NS_GPL(cxl_region_softreserv_update, "CXL");
diff --git a/drivers/dax/hmem/device.c b/drivers/dax/hmem/device.c
index 59ad44761191..cc1ed7bbdb1a 100644
--- a/drivers/dax/hmem/device.c
+++ b/drivers/dax/hmem/device.c
@@ -8,7 +8,6 @@
static bool nohmem;
module_param_named(disable, nohmem, bool, 0444);
-static bool platform_initialized;
static DEFINE_MUTEX(hmem_resource_lock);
static struct resource hmem_active = {
.name = "HMEM devices",
@@ -35,9 +34,7 @@ EXPORT_SYMBOL_GPL(walk_hmem_resources);
static void __hmem_register_resource(int target_nid, struct resource *res)
{
- struct platform_device *pdev;
struct resource *new;
- int rc;
new = __request_region(&hmem_active, res->start, resource_size(res), "",
0);
@@ -47,21 +44,6 @@ static void __hmem_register_resource(int target_nid, struct resource *res)
}
new->desc = target_nid;
-
- if (platform_initialized)
- return;
-
- pdev = platform_device_alloc("hmem_platform", 0);
- if (!pdev) {
- pr_err_once("failed to register device-dax hmem_platform device\n");
- return;
- }
-
- rc = platform_device_add(pdev);
- if (rc)
- platform_device_put(pdev);
- else
- platform_initialized = true;
}
void hmem_register_resource(int target_nid, struct resource *res)
@@ -83,9 +65,28 @@ static __init int hmem_register_one(struct resource *res, void *data)
static __init int hmem_init(void)
{
- walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
- IORESOURCE_MEM, 0, -1, NULL, hmem_register_one);
- return 0;
+ struct platform_device *pdev;
+ int rc;
+
+ if (!IS_ENABLED(CONFIG_CXL_ACPI)) {
+ walk_iomem_res_desc(IORES_DESC_SOFT_RESERVED,
+ IORESOURCE_MEM, 0, -1, NULL,
+ hmem_register_one);
+ }
+
+ pdev = platform_device_alloc("hmem_platform", 0);
+ if (!pdev) {
+ pr_err("failed to register device-dax hmem_platform device\n");
+ return -1;
+ }
+
+ rc = platform_device_add(pdev);
+ if (rc) {
+ pr_err("failed to add device-dax hmem_platform device\n");
+ platform_device_put(pdev);
+ }
+
+ return rc;
}
/*
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index 3aedef5f1be1..a206b9b383e4 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -61,7 +61,7 @@ static void release_hmem(void *pdev)
platform_device_unregister(pdev);
}
-static int hmem_register_device(int target_nid, const struct resource *res)
+int hmem_register_device(int target_nid, const struct resource *res)
{
struct device *host = &dax_hmem_pdev->dev;
struct platform_device *pdev;
@@ -124,6 +124,7 @@ static int hmem_register_device(int target_nid, const struct resource *res)
platform_device_put(pdev);
return rc;
}
+EXPORT_SYMBOL_GPL(hmem_register_device);
static int dax_hmem_platform_probe(struct platform_device *pdev)
{
diff --git a/include/linux/dax.h b/include/linux/dax.h
index a4ad3708ea35..5052dca8b3bc 100644
--- a/include/linux/dax.h
+++ b/include/linux/dax.h
@@ -299,10 +299,16 @@ static inline int dax_mem2blk_err(int err)
#ifdef CONFIG_DEV_DAX_HMEM_DEVICES
void hmem_register_resource(int target_nid, struct resource *r);
+int hmem_register_device(int target_nid, const struct resource *res);
#else
static inline void hmem_register_resource(int target_nid, struct resource *r)
{
}
+
+static inline int hmem_register_device(int target_nid, const struct resource *res)
+{
+ return 0;
+}
#endif
typedef int (*walk_hmem_fn)(int target_nid, const struct resource *res);