[PATCH v9 08/21] iommu/io-pgtable-arm-v7s: Extend MediaTek 4GB Mode

From: Yong Wu
Date: Sat Aug 10 2019 - 04:00:20 EST


MediaTek extend the arm v7s descriptor to support the dram over 4GB.

In the mt2712 and mt8173, it's called "4GB mode", the physical address
is from 0x4000_0000 to 0x1_3fff_ffff, but from EMI point of view, it
is remapped to high address from 0x1_0000_0000 to 0x1_ffff_ffff, the
bit32 is always enabled. thus, in the M4U, we always enable the bit9
for all PTEs which means to enable bit32 of physical address. Here is
the detailed remap relationship in the "4GB mode":
CPU PA -> HW PA
0x4000_0000 0x1_4000_0000 (Add bit32)
0x8000_0000 0x1_8000_0000 ...
0xc000_0000 0x1_c000_0000 ...
0x1_0000_0000 0x1_0000_0000 (No change)

but in mt8183, M4U support the dram from 0x4000_0000 to 0x3_ffff_ffff
which isn't remaped. We extend the PTEs: the bit9 represent bit32 of
PA and the bit4 represent bit33 of PA. Meanwhile the iova still is
32bits.

The "4GB mode" is so special, it always add bit32 in paddr_to_iopte and
it only add bit32 while pa < 0x40000000UL in the iopte_to_paddr since the
valid pa is from 0x4000_0000 to 0x1_3fff_ffff. Thus I add a special macro
ARM_V7S_MTK_4GB_OAS(oas: 33) for "4GB mode".

Regarding whether the pagetable address could be over 4GB, the mt8183
support it while the previous mt8173 don't. thus keep it as is.

Signed-off-by: Yong Wu <yong.wu@xxxxxxxxxxxx>
---
drivers/iommu/io-pgtable-arm-v7s.c | 36 +++++++++++++++++++++++++++++-------
drivers/iommu/mtk_iommu.c | 23 +++++++++++++----------
drivers/iommu/mtk_iommu.h | 1 +
include/linux/io-pgtable.h | 9 +++++----
4 files changed, 48 insertions(+), 21 deletions(-)

diff --git a/drivers/iommu/io-pgtable-arm-v7s.c b/drivers/iommu/io-pgtable-arm-v7s.c
index 77cc1eb..ab12ef5 100644
--- a/drivers/iommu/io-pgtable-arm-v7s.c
+++ b/drivers/iommu/io-pgtable-arm-v7s.c
@@ -112,7 +112,9 @@
#define ARM_V7S_TEX_MASK 0x7
#define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT)

-#define ARM_V7S_ATTR_MTK_4GB BIT(9) /* MTK extend it for 4GB mode */
+/* MediaTek extend the two bits for PA 32bit/33bit */
+#define ARM_V7S_ATTR_MTK_PA_BIT32 BIT(9)
+#define ARM_V7S_ATTR_MTK_PA_BIT33 BIT(4)

/* *well, except for TEX on level 2 large pages, of course :( */
#define ARM_V7S_CONT_PAGE_TEX_SHIFT 6
@@ -179,13 +181,22 @@ static dma_addr_t __arm_v7s_dma_addr(void *pages)
static arm_v7s_iopte paddr_to_iopte(phys_addr_t paddr, int lvl,
struct io_pgtable_cfg *cfg)
{
- return paddr & ARM_V7S_LVL_MASK(lvl);
+ arm_v7s_iopte pte = paddr & ARM_V7S_LVL_MASK(lvl);
+
+ if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT) {
+ if ((paddr & BIT_ULL(32)) || cfg->oas == ARM_V7S_MTK_4GB_OAS)
+ pte |= ARM_V7S_ATTR_MTK_PA_BIT32;
+ if (paddr & BIT_ULL(33))
+ pte |= ARM_V7S_ATTR_MTK_PA_BIT33;
+ }
+ return pte;
}

static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
struct io_pgtable_cfg *cfg)
{
arm_v7s_iopte mask;
+ phys_addr_t paddr;

if (ARM_V7S_PTE_IS_TABLE(pte, lvl))
mask = ARM_V7S_TABLE_MASK;
@@ -194,7 +205,19 @@ static phys_addr_t iopte_to_paddr(arm_v7s_iopte pte, int lvl,
else
mask = ARM_V7S_LVL_MASK(lvl);

- return pte & mask;
+ paddr = pte & mask;
+ if (cfg->oas == 32 || !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT))
+ return paddr;
+
+ if (pte & ARM_V7S_ATTR_MTK_PA_BIT33)
+ paddr |= BIT_ULL(33);
+
+ /* Workaround for MTK 4GB Mode: Add BIT32 only when PA < 0x4000_0000.*/
+ if (cfg->oas == ARM_V7S_MTK_4GB_OAS && paddr < 0x40000000UL)
+ paddr |= BIT_ULL(32);
+ else if (pte & ARM_V7S_ATTR_MTK_PA_BIT32)
+ paddr |= BIT_ULL(32);
+ return paddr;
}

static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl,
@@ -315,9 +338,6 @@ static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl,
if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS))
pte |= ARM_V7S_ATTR_NS_SECTION;

- if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)
- pte |= ARM_V7S_ATTR_MTK_4GB;
-
return pte;
}

@@ -731,7 +751,9 @@ static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg,
{
struct arm_v7s_io_pgtable *data;

- if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS)
+ if (cfg->ias > ARM_V7S_ADDR_BITS ||
+ (cfg->oas > ARM_V7S_ADDR_BITS &&
+ !(cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_EXT)))
return NULL;

if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS |
diff --git a/drivers/iommu/mtk_iommu.c b/drivers/iommu/mtk_iommu.c
index 8300489..1a37db0 100644
--- a/drivers/iommu/mtk_iommu.c
+++ b/drivers/iommu/mtk_iommu.c
@@ -263,16 +263,20 @@ static int mtk_iommu_domain_finalise(struct mtk_iommu_domain *dom)
dom->cfg = (struct io_pgtable_cfg) {
.quirks = IO_PGTABLE_QUIRK_ARM_NS |
IO_PGTABLE_QUIRK_NO_PERMS |
- IO_PGTABLE_QUIRK_TLBI_ON_MAP,
+ IO_PGTABLE_QUIRK_TLBI_ON_MAP |
+ IO_PGTABLE_QUIRK_ARM_MTK_EXT,
.pgsize_bitmap = mtk_iommu_ops.pgsize_bitmap,
.ias = 32,
- .oas = 32,
.tlb = &mtk_iommu_gather_ops,
.iommu_dev = data->dev,
};

- if (data->enable_4GB)
- dom->cfg.quirks |= IO_PGTABLE_QUIRK_ARM_MTK_EXT;
+ if (!IS_ENABLED(CONFIG_PHYS_ADDR_T_64BIT))
+ dom->cfg.oas = 32;
+ else if (data->enable_4GB)
+ dom->cfg.oas = ARM_V7S_MTK_4GB_OAS;
+ else
+ dom->cfg.oas = 34;

dom->iop = alloc_io_pgtable_ops(ARM_V7S, &dom->cfg, data);
if (!dom->iop) {
@@ -363,8 +367,7 @@ static int mtk_iommu_map(struct iommu_domain *domain, unsigned long iova,
int ret;

spin_lock_irqsave(&dom->pgtlock, flags);
- ret = dom->iop->map(dom->iop, iova, paddr & DMA_BIT_MASK(32),
- size, prot);
+ ret = dom->iop->map(dom->iop, iova, paddr, size, prot);
spin_unlock_irqrestore(&dom->pgtlock, flags);

return ret;
@@ -393,7 +396,6 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
dma_addr_t iova)
{
struct mtk_iommu_domain *dom = to_mtk_domain(domain);
- struct mtk_iommu_data *data = mtk_iommu_get_m4u_data();
unsigned long flags;
phys_addr_t pa;

@@ -401,9 +403,6 @@ static phys_addr_t mtk_iommu_iova_to_phys(struct iommu_domain *domain,
pa = dom->iop->iova_to_phys(dom->iop, iova);
spin_unlock_irqrestore(&dom->pgtlock, flags);

- if (data->enable_4GB)
- pa |= BIT_ULL(32);
-
return pa;
}

@@ -594,6 +593,8 @@ static int mtk_iommu_probe(struct platform_device *pdev)

/* Whether the current dram is over 4GB */
data->enable_4GB = !!(max_pfn > (BIT_ULL(32) >> PAGE_SHIFT));
+ if (!data->plat_data->has_4gb_mode)
+ data->enable_4GB = false;

res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
data->base = devm_ioremap_resource(dev, res);
@@ -734,10 +735,12 @@ static int __maybe_unused mtk_iommu_resume(struct device *dev)

static const struct mtk_iommu_plat_data mt2712_data = {
.m4u_plat = M4U_MT2712,
+ .has_4gb_mode = true,
};

static const struct mtk_iommu_plat_data mt8173_data = {
.m4u_plat = M4U_MT8173,
+ .has_4gb_mode = true,
};

static const struct of_device_id mtk_iommu_of_ids[] = {
diff --git a/drivers/iommu/mtk_iommu.h b/drivers/iommu/mtk_iommu.h
index 9725b08..c281c01 100644
--- a/drivers/iommu/mtk_iommu.h
+++ b/drivers/iommu/mtk_iommu.h
@@ -34,6 +34,7 @@ enum mtk_iommu_plat {

struct mtk_iommu_plat_data {
enum mtk_iommu_plat m4u_plat;
+ bool has_4gb_mode;
};

struct mtk_iommu_domain;
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 915fb73..2733739 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -65,10 +65,9 @@ struct io_pgtable_cfg {
* (unmapped) entries but the hardware might do so anyway, perform
* TLB maintenance when mapping as well as when unmapping.
*
- * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) Set bit 9 in all
- * PTEs, for Mediatek IOMMUs which treat it as a 33rd address bit
- * when the SoC is in "4GB mode" and they can only access the high
- * remap of DRAM (0x1_00000000 to 0x1_ffffffff).
+ * IO_PGTABLE_QUIRK_ARM_MTK_EXT: (ARM v7s format) MediaTek IOMMUs extend
+ * to support up to 34 bits PA where the bit32 and bit33 are
+ * encoded in the bit9 and bit4 of the PTE respectively.
*
* IO_PGTABLE_QUIRK_NON_STRICT: Skip issuing synchronous leaf TLBIs
* on unmap, for DMA domains using the flush queue mechanism for
@@ -114,6 +113,8 @@ struct io_pgtable_cfg {
};
};

+#define ARM_V7S_MTK_4GB_OAS 33
+
/**
* struct io_pgtable_ops - Page table manipulation API for IOMMU drivers.
*
--
1.9.1