[PATCH v3 15/15] iommu/amd: Add support for nested domain attach/detach

From: Suravee Suthikulpanit
Date: Thu Oct 09 2025 - 20:01:18 EST


Introduce set_dte_nested() to program guest translation settings in
the host DTE when attaches the nested domain to a device.

In addition, introduce struct amd_iommu_viommu, which stores reference to
the nest parent domain assigned during the call to struct
iommu_ops.viommu_init(). Information in the nest parent domain is needed
when setting up the DTE for nested translation.

Signed-off-by: Suravee Suthikulpanit <suravee.suthikulpanit@xxxxxxx>
---
drivers/iommu/amd/amd_iommu.h | 3 ++
drivers/iommu/amd/amd_iommu_types.h | 8 ++++
drivers/iommu/amd/iommu.c | 8 ++--
drivers/iommu/amd/nested.c | 66 +++++++++++++++++++++++++++++
4 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/drivers/iommu/amd/amd_iommu.h b/drivers/iommu/amd/amd_iommu.h
index cfb63de7732a..98351b0cb9a0 100644
--- a/drivers/iommu/amd/amd_iommu.h
+++ b/drivers/iommu/amd/amd_iommu.h
@@ -190,6 +190,9 @@ struct iommu_dev_data *search_dev_data(struct amd_iommu *iommu, u16 devid);
int amd_iommu_completion_wait(struct amd_iommu *iommu);

/* DTE */
+void amd_iommu_set_dte_v1(struct iommu_dev_data *dev_data,
+ struct protection_domain *domain, u16 domid,
+ struct dev_table_entry *new);
int amd_iommu_device_flush_dte(struct iommu_dev_data *dev_data);
void amd_iommu_update_dte256(struct amd_iommu *iommu,
struct iommu_dev_data *dev_data,
diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h
index a68b5c2fc0a2..683ee288c636 100644
--- a/drivers/iommu/amd/amd_iommu_types.h
+++ b/drivers/iommu/amd/amd_iommu_types.h
@@ -17,6 +17,7 @@
#include <linux/list.h>
#include <linux/spinlock.h>
#include <linux/pci.h>
+#include <linux/iommufd.h>
#include <linux/irqreturn.h>
#include <linux/io-pgtable.h>

@@ -420,6 +421,7 @@
#define DTE_FLAG_HAD (3ULL << 7)
#define DTE_MODE_MASK GENMASK_ULL(11, 9)
#define DTE_HOST_TRP GENMASK_ULL(51, 12)
+#define DTE_FLAG_PPR BIT_ULL(52)
#define DTE_FLAG_GIOV BIT_ULL(54)
#define DTE_FLAG_GV BIT_ULL(55)
#define DTE_GLX GENMASK_ULL(57, 56)
@@ -590,6 +592,11 @@ struct pdom_iommu_info {
u32 refcnt; /* Count of attached dev/pasid per domain/IOMMU */
};

+struct amd_iommu_viommu {
+ struct iommufd_viommu core;
+ struct protection_domain *parent; /* nest parent domain for this viommu */
+};
+
/*
* Structure defining one entry in the device table
*/
@@ -607,6 +614,7 @@ struct nested_domain {
struct iommu_domain domain; /* generic domain handle used by iommu core code */
u16 id; /* the domain id written to the device table */
struct iommu_hwpt_amd_guest gdte; /* Guest vIOMMU DTE */
+ struct amd_iommu_viommu *viommu; /* AMD hw-viommu this nested domain belong to */
};

/*
diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c
index 2a536d02aeab..013914fc8a4f 100644
--- a/drivers/iommu/amd/iommu.c
+++ b/drivers/iommu/amd/iommu.c
@@ -2044,9 +2044,9 @@ static void set_dte_gcr3_table(struct amd_iommu *iommu,
target->data[2] |= FIELD_PREP(DTE_GPT_LEVEL_MASK, GUEST_PGTABLE_4_LEVEL);
}

-static void set_dte_v1(struct iommu_dev_data *dev_data,
- struct protection_domain *domain, u16 domid,
- struct dev_table_entry *new)
+void amd_iommu_set_dte_v1(struct iommu_dev_data *dev_data,
+ struct protection_domain *domain, u16 domid,
+ struct dev_table_entry *new)
{
/*
* When SNP is enabled, we can only support TV=1 with non-zero domain ID.
@@ -2089,7 +2089,7 @@ static void set_dte_entry(struct amd_iommu *iommu,

old_domid = READ_ONCE(dte->data[1]) & DTE_DOMID_MASK;

- set_dte_v1(dev_data, domain, domid, &new);
+ amd_iommu_set_dte_v1(dev_data, domain, domid, &new);
set_dte_gcr3_table(iommu, dev_data, &new);

if (dev_data->ppr)
diff --git a/drivers/iommu/amd/nested.c b/drivers/iommu/amd/nested.c
index 3307c925d3c1..ca3d3001c87f 100644
--- a/drivers/iommu/amd/nested.c
+++ b/drivers/iommu/amd/nested.c
@@ -63,6 +63,7 @@ amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
const struct iommu_user_data *user_data)
{
int ret;
+ struct amd_iommu_viommu *aviommu = container_of(viommu, struct amd_iommu_viommu, core);
struct iommu_hwpt_amd_guest gdte;
struct nested_domain *ndom;

@@ -85,6 +86,7 @@ amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,

ndom->domain.ops = &nested_domain_ops;
ndom->domain.type = IOMMU_DOMAIN_NESTED;
+ ndom->viommu = aviommu;
memcpy(&ndom->gdte, &gdte, sizeof(gdte));

/*
@@ -111,6 +113,69 @@ amd_iommu_alloc_domain_nested(struct iommufd_viommu *viommu, u32 flags,
return ERR_PTR(ret);
}

+static void set_dte_nested(struct amd_iommu *iommu,
+ struct iommu_domain *dom,
+ struct iommu_dev_data *dev_data)
+{
+ struct protection_domain *parent;
+ struct dev_table_entry new = {0};
+ struct nested_domain *ndom = to_ndomain(dom);
+ struct iommu_hwpt_amd_guest *gdte = &ndom->gdte;
+
+ /*
+ * The nest parent domain is attached during the call to the
+ * struct iommu_ops.viommu_init(), which will be stored as part
+ * of the struct amd_iommu_viommu.parent.
+ */
+ if (WARN_ON(!ndom->viommu || !ndom->viommu->parent))
+ return;
+
+ parent = ndom->viommu->parent;
+ amd_iommu_make_clear_dte(dev_data, &new);
+
+ /*
+ * Use nested domain ID to program DTE.
+ * See amd_iommu_alloc_domain_nested().
+ */
+ amd_iommu_set_dte_v1(dev_data, parent, ndom->id, &new);
+
+ /* Guest PPR */
+ new.data[0] |= gdte->dte[0] & DTE_FLAG_PPR;
+
+ /* Guest translation stuff */
+ new.data[0] |= gdte->dte[0] & (DTE_GLX | DTE_FLAG_GV | DTE_FLAG_GIOV);
+
+ /* GCR3 table */
+ new.data[0] |= gdte->dte[0] & DTE_GCR3_14_12;
+ new.data[1] |= gdte->dte[1] & (DTE_GCR3_30_15 | DTE_GCR3_51_31);
+
+ /* Guest paging mode */
+ new.data[2] |= gdte->dte[2] & DTE_GPT_LEVEL_MASK;
+
+ amd_iommu_update_dte256(iommu, dev_data, &new);
+}
+
+static int nested_attach_device(struct iommu_domain *dom, struct device *dev)
+{
+ struct iommu_dev_data *dev_data = dev_iommu_priv_get(dev);
+ struct amd_iommu *iommu = get_amd_iommu_from_dev_data(dev_data);
+ int ret = 0;
+
+ if (WARN_ON(dom->type != IOMMU_DOMAIN_NESTED))
+ return -EINVAL;
+
+ mutex_lock(&dev_data->mutex);
+
+ /* Update device table entry */
+ set_dte_nested(iommu, dom, dev_data);
+ amd_iommu_device_flush_dte(dev_data);
+ amd_iommu_completion_wait(iommu);
+
+ mutex_unlock(&dev_data->mutex);
+
+ return ret;
+}
+
static void nested_domain_free(struct iommu_domain *dom)
{
struct nested_domain *ndom = to_ndomain(dom);
@@ -120,6 +185,7 @@ static void nested_domain_free(struct iommu_domain *dom)
}

static const struct iommu_domain_ops nested_domain_ops = {
+ .attach_dev = nested_attach_device,
.free = nested_domain_free,
};

--
2.34.1