RE: [PATCH v3 6/6] dmaengine: fsl-edma: integrate TCD64 support for i.MX95

From: Joy Zou
Date: Thu Dec 21 2023 - 02:47:46 EST


Hi frank,
> -----Original Message-----
> From: Frank Li <frank.li@xxxxxxx>
> Sent: 2023年11月28日 6:56
> To: Frank Li <frank.li@xxxxxxx>; vkoul@xxxxxxxxxx
> Cc: devicetree@xxxxxxxxxxxxxxx; dmaengine@xxxxxxxxxxxxxxx;
> imx@xxxxxxxxxxxxxxx; Joy Zou <joy.zou@xxxxxxx>;
> krzysztof.kozlowski+dt@xxxxxxxxxx; linux-kernel@xxxxxxxxxxxxxxx; Peng Fan
> <peng.fan@xxxxxxx>; robh+dt@xxxxxxxxxx; Shenwei Wang
> <shenwei.wang@xxxxxxx>
> Subject: [PATCH v3 6/6] dmaengine: fsl-edma: integrate TCD64 support for
> i.MX95
>
> In i.MX95's edma version 5, the TCD structure is extended to support 64-bit
> addresses for fields like saddr and daddr. To prevent code duplication, employ
> help macros to handle the fields, as the field names remain the same between
> TCD and TCD64.
>
> Change local variables related to TCD addresses from 'u32' to 'dma_addr_t'
> to accept 64-bit DMA addresses.
>
> Change 'vtcd' type to 'void *' to avoid direct use. Use helper macros to access
> the TCD fields correctly.
>
> Call 'dma_set_mask_and_coherent(64)' when TCD64 is supported.
>
> Signed-off-by: Frank Li <Frank.Li@xxxxxxx>
> ---
> drivers/dma/fsl-edma-common.c | 34 ++++---
> drivers/dma/fsl-edma-common.h | 165
> +++++++++++++++++++++++++++-------
> drivers/dma/fsl-edma-main.c | 14 +++
> 3 files changed, 170 insertions(+), 43 deletions(-)
>
> diff --git a/drivers/dma/fsl-edma-common.c
> b/drivers/dma/fsl-edma-common.c index 65f466ab9d4da..c8acff09308fd
> 100644
> --- a/drivers/dma/fsl-edma-common.c
> +++ b/drivers/dma/fsl-edma-common.c
> @@ -351,7 +351,7 @@ static size_t fsl_edma_desc_residue(struct
> fsl_edma_chan *fsl_chan, {
> struct fsl_edma_desc *edesc = fsl_chan->edesc;
> enum dma_transfer_direction dir = edesc->dirn;
> - dma_addr_t cur_addr, dma_addr;
> + dma_addr_t cur_addr, dma_addr, old_addr;
> size_t len, size;
> u32 nbytes = 0;
> int i;
> @@ -367,10 +367,16 @@ static size_t fsl_edma_desc_residue(struct
> fsl_edma_chan *fsl_chan,
> if (!in_progress)
> return len;
>
> - if (dir == DMA_MEM_TO_DEV)
> - cur_addr = edma_read_tcdreg(fsl_chan, saddr);
> - else
> - cur_addr = edma_read_tcdreg(fsl_chan, daddr);
> + /* 64bit read is not atomic, need read retry when high 32bit changed */
> + do {
> + if (dir == DMA_MEM_TO_DEV) {
> + old_addr = edma_read_tcdreg(fsl_chan, saddr);
> + cur_addr = edma_read_tcdreg(fsl_chan, saddr);
> + } else {
> + old_addr = edma_read_tcdreg(fsl_chan, daddr);
> + cur_addr = edma_read_tcdreg(fsl_chan, daddr);
> + }
> + } while (upper_32_bits(cur_addr) != upper_32_bits(old_addr));
>
> /* figure out the finished and calculate the residue */
> for (i = 0; i < fsl_chan->edesc->n_tcds; i++) { @@ -426,8 +432,7 @@
> enum dma_status fsl_edma_tx_status(struct dma_chan *chan,
> return fsl_chan->status;
> }
>
> -static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan,
> - struct fsl_edma_hw_tcd *tcd)
> +static void fsl_edma_set_tcd_regs(struct fsl_edma_chan *fsl_chan, void
> +*tcd)
> {
> u16 csr = 0;
>
> @@ -478,9 +483,9 @@ static void fsl_edma_set_tcd_regs(struct
> fsl_edma_chan *fsl_chan,
>
> static inline
> void fsl_edma_fill_tcd(struct fsl_edma_chan *fsl_chan,
> - struct fsl_edma_hw_tcd *tcd, u32 src, u32 dst,
> - u16 attr, u16 soff, u32 nbytes, u32 slast, u16 citer,
> - u16 biter, u16 doff, u32 dlast_sga, bool major_int,
> + struct fsl_edma_hw_tcd *tcd, dma_addr_t src, dma_addr_t
> dst,
> + u16 attr, u16 soff, u32 nbytes, dma_addr_t slast, u16 citer,
> + u16 biter, u16 doff, dma_addr_t dlast_sga, bool major_int,
> bool disable_req, bool enable_sg) {
> struct dma_slave_config *cfg = &fsl_chan->cfg; @@ -581,8 +586,9 @@
> struct dma_async_tx_descriptor *fsl_edma_prep_dma_cyclic(
> dma_addr_t dma_buf_next;
> bool major_int = true;
> int sg_len, i;
> - u32 src_addr, dst_addr, last_sg, nbytes;
> + dma_addr_t src_addr, dst_addr, last_sg;
> u16 soff, doff, iter;
> + u32 nbytes;
>
> if (!is_slave_direction(direction))
> return NULL;
> @@ -654,8 +660,9 @@ struct dma_async_tx_descriptor
> *fsl_edma_prep_slave_sg(
> struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
> struct fsl_edma_desc *fsl_desc;
> struct scatterlist *sg;
> - u32 src_addr, dst_addr, last_sg, nbytes;
> + dma_addr_t src_addr, dst_addr, last_sg;
> u16 soff, doff, iter;
> + u32 nbytes;
> int i;
>
> if (!is_slave_direction(direction))
> @@ -804,7 +811,8 @@ int fsl_edma_alloc_chan_resources(struct dma_chan
> *chan)
> struct fsl_edma_chan *fsl_chan = to_fsl_edma_chan(chan);
>
> fsl_chan->tcd_pool = dma_pool_create("tcd_pool", chan->device->dev,
> - sizeof(struct fsl_edma_hw_tcd),
> + fsl_edma_drvflags(fsl_chan) & FSL_EDMA_DRV_TCD64 ?
> + sizeof(struct fsl_edma_hw_tcd64) : sizeof(struct
> fsl_edma_hw_tcd),
> 32, 0);
> return 0;
> }
> diff --git a/drivers/dma/fsl-edma-common.h
> b/drivers/dma/fsl-edma-common.h index 4f39a548547a6..6afceb9fded1b
> 100644
> --- a/drivers/dma/fsl-edma-common.h
> +++ b/drivers/dma/fsl-edma-common.h
> @@ -87,6 +87,20 @@ struct fsl_edma_hw_tcd {
> __le16 biter;
> };
>
> +struct fsl_edma_hw_tcd64 {
> + __le64 saddr;
> + __le16 soff;
> + __le16 attr;
> + __le32 nbytes;
> + __le64 slast;
> + __le64 daddr;
> + __le64 dlast_sga;
> + __le16 doff;
> + __le16 citer;
> + __le16 csr;
> + __le16 biter;
> +} __packed;
> +
> struct fsl_edma3_ch_reg {
> __le32 ch_csr;
> __le32 ch_es;
> @@ -96,7 +110,10 @@ struct fsl_edma3_ch_reg {
> __le32 ch_mux;
> __le32 ch_mattr; /* edma4, reserved for edma3 */
> __le32 ch_reserved;
> - struct fsl_edma_hw_tcd tcd;
> + union {
> + struct fsl_edma_hw_tcd tcd;
> + struct fsl_edma_hw_tcd tcd64;
> + };
> } __packed;
The tcd64 should be fsl_edma_hw_tcd64?
BR
Joy Zou
>
> /*
> @@ -125,7 +142,7 @@ struct edma_regs {
>
> struct fsl_edma_sw_tcd {
> dma_addr_t ptcd;
> - struct fsl_edma_hw_tcd *vtcd;
> + void *vtcd;
> };
>
> struct fsl_edma_chan {
> @@ -144,7 +161,7 @@ struct fsl_edma_chan {
> u32 dma_dev_size;
> enum dma_data_direction dma_dir;
> char chan_name[32];
> - struct fsl_edma_hw_tcd __iomem *tcd;
> + void __iomem *tcd;
> void __iomem *mux_addr;
> u32 real_count;
> struct work_struct issue_worker;
> @@ -188,6 +205,7 @@ struct fsl_edma_desc {
> #define FSL_EDMA_DRV_CLEAR_DONE_E_SG BIT(13)
> /* Need clean CHn_CSR DONE before enable TCD's MAJORELINK */
> #define FSL_EDMA_DRV_CLEAR_DONE_E_LINK BIT(14)
> +#define FSL_EDMA_DRV_TCD64 BIT(15)
>
> #define FSL_EDMA_DRV_EDMA3 (FSL_EDMA_DRV_SPLIT_REG | \
> FSL_EDMA_DRV_BUS_8BYTE | \
> @@ -231,18 +249,61 @@ struct fsl_edma_engine {
> struct fsl_edma_chan chans[] __counted_by(n_chans);
> };
>
> -#define edma_read_tcdreg(chan, __name) \
> -(sizeof(chan->tcd->__name) == sizeof(u32) ? \
> - edma_readl(chan->edma, &chan->tcd->__name) : \
> - edma_readw(chan->edma, &chan->tcd->__name))
> +#define edma_read_tcdreg_c(chan, _tcd, __name) \
> +(sizeof((_tcd)->__name) == sizeof(u64) ? \
> + edma_readq(chan->edma, &(_tcd)->__name) : \
> + ((sizeof((_tcd)->__name) == sizeof(u32)) ? \
> + edma_readl(chan->edma, &(_tcd)->__name) : \
> + edma_readw(chan->edma, &(_tcd)->__name) \
> + ))
> +
> +#define edma_read_tcdreg(chan, __name)
> \
> +((fsl_edma_drvflags(chan) & FSL_EDMA_DRV_TCD64) ?
> \
> + edma_read_tcdreg_c(chan, ((struct fsl_edma_hw_tcd64 __iomem
> *)chan->tcd), __name) : \
> + edma_read_tcdreg_c(chan, ((struct fsl_edma_hw_tcd __iomem
> *)chan->tcd), __name) \
> +)
> +
> +#define edma_write_tcdreg_c(chan, _tcd, _val, __name) \
> +do { \
> + switch (sizeof(_tcd->__name)) { \
> + case sizeof(u64): \
> + edma_writeq(chan->edma, (u64 __force)_val, &_tcd->__name); \
> + break; \
> + case sizeof(u32): \
> + edma_writel(chan->edma, (u32 __force)_val, &_tcd->__name); \
> + break; \
> + case sizeof(u16): \
> + edma_writew(chan->edma, (u16 __force)_val, &_tcd->__name); \
> + break; \
> + case sizeof(u8): \
> + edma_writeb(chan->edma, (u8 __force)_val, &_tcd->__name); \
> + break; \
> + } \
> +} while (0)
>
> -#define edma_write_tcdreg(chan, val, __name) \
> -(sizeof(chan->tcd->__name) == sizeof(u32) ? \
> - edma_writel(chan->edma, (u32 __force)val, &chan->tcd->__name) : \
> - edma_writew(chan->edma, (u16 __force)val, &chan->tcd->__name))
> +#define edma_write_tcdreg(chan, val, __name)
> \
> +do { \
> + struct fsl_edma_hw_tcd64 __iomem *tcd64_r = (struct
> fsl_edma_hw_tcd64 __iomem *)chan->tcd; \
> + struct fsl_edma_hw_tcd __iomem *tcd_r = (struct fsl_edma_hw_tcd
> __iomem *)chan->tcd; \
> + \
> + if (fsl_edma_drvflags(chan) & FSL_EDMA_DRV_TCD64)
> \
> + edma_write_tcdreg_c(chan, tcd64_r, val, __name);
> \
> + else \
> + edma_write_tcdreg_c(chan, tcd_r, val, __name);
> \
> +} while (0)
>
> -#define edma_cp_tcd_to_reg(chan, __tcd, __name) \
> - edma_write_tcdreg(chan, __tcd->__name, __name)
> +#define edma_cp_tcd_to_reg(chan, __tcd, __name)
> \
> +do { \
> + struct fsl_edma_hw_tcd64 __iomem *tcd64_r = (struct
> fsl_edma_hw_tcd64 __iomem *)chan->tcd; \
> + struct fsl_edma_hw_tcd __iomem *tcd_r = (struct fsl_edma_hw_tcd
> __iomem *)chan->tcd; \
> + struct fsl_edma_hw_tcd64 *tcd64_m = (struct fsl_edma_hw_tcd64
> *)__tcd; \
> + struct fsl_edma_hw_tcd *tcd_m = (struct fsl_edma_hw_tcd *)__tcd;
> \
> + \
> + if (fsl_edma_drvflags(chan) & FSL_EDMA_DRV_TCD64)
> \
> + edma_write_tcdreg_c(chan, tcd64_r, tcd64_m->__name, __name);
> \
> + else \
> + edma_write_tcdreg_c(chan, tcd_r, tcd_m->__name, __name);
> \
> +} while (0)
>
> #define edma_readl_chreg(chan, __name) \
> edma_readl(chan->edma, \
> @@ -254,24 +315,41 @@ struct fsl_edma_engine {
> (void __iomem *)&(container_of(((__force void *)chan->tcd),\
> struct fsl_edma3_ch_reg, tcd)->__name))
>
> -#define fsl_edma_get_tcd(_chan, _tcd, _field) ((_tcd)->_field)
> -
> -#define fsl_edma_le_to_cpu(x) \
> -(sizeof(x) == sizeof(u32) ? le32_to_cpu((__force __le32)(x)) :
> le16_to_cpu((__force __le16)(x)))
> -
> -#define fsl_edma_get_tcd_to_cpu(_chan, _tcd, _field) \
> -fsl_edma_le_to_cpu(fsl_edma_get_tcd(_chan, _tcd, _field))
> +#define fsl_edma_get_tcd(_chan, _tcd, _field) \
> +(fsl_edma_drvflags(_chan) & FSL_EDMA_DRV_TCD64 ? (((struct
> fsl_edma_hw_tcd64 *)_tcd)->_field) : \
> + (((struct fsl_edma_hw_tcd *)_tcd)->_field))
> +
> +#define fsl_edma_le_to_cpu(x) \
> +(sizeof(x) == sizeof(u64) ? le64_to_cpu((__force __le64)(x)) : \
> + (sizeof(x) == sizeof(u32) ? le32_to_cpu((__force __le32)(x)) : \
> + le16_to_cpu((__force __le16)(x))))
> +
> +#define fsl_edma_get_tcd_to_cpu(_chan, _tcd, _field) \
> +(fsl_edma_drvflags(_chan) & FSL_EDMA_DRV_TCD64 ? \
> + fsl_edma_le_to_cpu(((struct fsl_edma_hw_tcd64 *)_tcd)->_field) : \
> + fsl_edma_le_to_cpu(((struct fsl_edma_hw_tcd *)_tcd)->_field))
> +
> +#define fsl_edma_set_tcd_to_le_c(_tcd, _val, _field) \
> +do { \
> + switch (sizeof((_tcd)->_field)) { \
> + case sizeof(u64): \
> + *(__force __le64 *)(&((_tcd)->_field)) = cpu_to_le64(_val); \
> + break; \
> + case sizeof(u32): \
> + *(__force __le32 *)(&((_tcd)->_field)) = cpu_to_le32(_val); \
> + break; \
> + case sizeof(u16): \
> + *(__force __le16 *)(&((_tcd)->_field)) = cpu_to_le16(_val); \
> + break; \
> + } \
> +} while (0)
>
> -#define fsl_edma_set_tcd_to_le(_fsl_chan, _tcd, _val, _field) \
> -do { \
> - switch (sizeof((_tcd)->_field)) { \
> - case sizeof(u32): \
> - *(__force __le32 *)(&((_tcd)->_field)) = cpu_to_le32(_val); \
> - break; \
> - case sizeof(u16): \
> - *(__force __le16 *)(&((_tcd)->_field)) = cpu_to_le16(_val); \
> - break; \
> - } \
> +#define fsl_edma_set_tcd_to_le(_chan, _tcd, _val, _field) \
> +do { \
> + if (fsl_edma_drvflags(_chan) & FSL_EDMA_DRV_TCD64) \
> + fsl_edma_set_tcd_to_le_c((struct fsl_edma_hw_tcd64 *)_tcd, _val,
> _field); \
> + else \
> + fsl_edma_set_tcd_to_le_c((struct fsl_edma_hw_tcd *)_tcd, _val,
> _field); \
> } while (0)
>
> /*
> @@ -280,6 +358,21 @@ do {
> \
> * For the big-endian IP module, the offset for 8-bit or 16-bit registers
> * should also be swapped opposite to that in little-endian IP.
> */
> +static inline u64 edma_readq(struct fsl_edma_engine *edma, void __iomem
> +*addr) {
> + u64 l, h;
> +
> + if (edma->big_endian) {
> + l = ioread32be(addr);
> + h = ioread32be(addr + 4);
> + } else {
> + l = ioread32(addr);
> + h = ioread32(addr + 4);
> + }
> +
> + return (h << 32) | l;
> +}
> +
> static inline u32 edma_readl(struct fsl_edma_engine *edma, void __iomem
> *addr) {
> if (edma->big_endian)
> @@ -325,6 +418,18 @@ static inline void edma_writel(struct
> fsl_edma_engine *edma,
> iowrite32(val, addr);
> }
>
> +static inline void edma_writeq(struct fsl_edma_engine *edma,
> + u64 val, void __iomem *addr)
> +{
> + if (edma->big_endian) {
> + iowrite32be(val & 0xFFFFFFFF, addr);
> + iowrite32be(val >> 32, addr + 4);
> + } else {
> + iowrite32(val & 0xFFFFFFFF, addr);
> + iowrite32(val >> 32, addr + 4);
> + }
> +}
> +
> static inline struct fsl_edma_chan *to_fsl_edma_chan(struct dma_chan *chan)
> {
> return container_of(chan, struct fsl_edma_chan, vchan.chan); diff --git
> a/drivers/dma/fsl-edma-main.c b/drivers/dma/fsl-edma-main.c index
> d767c89973b69..c2c0c3effc8cb 100644
> --- a/drivers/dma/fsl-edma-main.c
> +++ b/drivers/dma/fsl-edma-main.c
> @@ -364,6 +364,16 @@ static struct fsl_edma_drvdata imx93_data4 = {
> .setup_irq = fsl_edma3_irq_init,
> };
>
> +static struct fsl_edma_drvdata imx95_data5 = {
> + .flags = FSL_EDMA_DRV_HAS_CHMUX | FSL_EDMA_DRV_HAS_DMACLK |
> FSL_EDMA_DRV_EDMA4 |
> + FSL_EDMA_DRV_TCD64,
> + .chreg_space_sz = 0x8000,
> + .chreg_off = 0x10000,
> + .mux_off = 0x200,
> + .mux_skip = sizeof(u32),
> + .setup_irq = fsl_edma3_irq_init,
> +};
> +
> static const struct of_device_id fsl_edma_dt_ids[] = {
> { .compatible = "fsl,vf610-edma", .data = &vf610_data},
> { .compatible = "fsl,ls1028a-edma", .data = &ls1028a_data}, @@ -372,6
> +382,7 @@ static const struct of_device_id fsl_edma_dt_ids[] = {
> { .compatible = "fsl,imx8qm-adma", .data = &imx8qm_audio_data},
> { .compatible = "fsl,imx93-edma3", .data = &imx93_data3},
> { .compatible = "fsl,imx93-edma4", .data = &imx93_data4},
> + { .compatible = "fsl,imx95-edma5", .data = &imx95_data5},
> { /* sentinel */ }
> };
> MODULE_DEVICE_TABLE(of, fsl_edma_dt_ids); @@ -513,6 +524,9 @@ static
> int fsl_edma_probe(struct platform_device *pdev)
> return ret;
> }
>
> + if (drvdata->flags & FSL_EDMA_DRV_TCD64)
> + dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64));
> +
> INIT_LIST_HEAD(&fsl_edma->dma_dev.channels);
> for (i = 0; i < fsl_edma->n_chans; i++) {
> struct fsl_edma_chan *fsl_chan = &fsl_edma->chans[i];
> --
> 2.34.1