[PATCH 4/4] spi: cadence-quadspi: Add Xilinx Versal external DMA support

From: Sai Krishna Potthuri
Date: Fri Sep 24 2021 - 06:10:59 EST


Add support to read the data from the flash using external DMA.
Cadence Octal SPI Flash Controller has optional DMA peripheral interface
to communicate indirect mode of operations with external DMA.
Xilinx Versal OSPI has external DMA enabled, this will automatically
request the external DMA to fetch the data from SRAM. It supports only
reading the data from SRAM (DMA read) and doesn't support writing the
data to SRAM (DMA write).
Xilinx Versal OSPI read the data from the flash device using external DMA
and write the data to the flash device using software triggered
indirect mode.

Signed-off-by: Sai Krishna Potthuri <lakshmi.sai.krishna.potthuri@xxxxxxxxxx>
---
drivers/spi/spi-cadence-quadspi.c | 207 ++++++++++++++++++++++++++++--
1 file changed, 198 insertions(+), 9 deletions(-)

diff --git a/drivers/spi/spi-cadence-quadspi.c b/drivers/spi/spi-cadence-quadspi.c
index 32cba7830b58..5bdb1bae5c99 100644
--- a/drivers/spi/spi-cadence-quadspi.c
+++ b/drivers/spi/spi-cadence-quadspi.c
@@ -36,6 +36,7 @@
/* Quirks */
#define CQSPI_NEEDS_WR_DELAY BIT(0)
#define CQSPI_DISABLE_DAC_MODE BIT(1)
+#define CQSPI_SUPPORT_EXTERNAL_DMA BIT(2)

/* Capabilities */
#define CQSPI_SUPPORTS_OCTAL BIT(0)
@@ -83,12 +84,16 @@ struct cqspi_st {
u32 wr_delay;
bool use_direct_mode;
struct cqspi_flash_pdata f_pdata[CQSPI_MAX_CHIPSELECT];
+ bool use_dma_read;
u32 pd_dev_id;
};

struct cqspi_driver_platdata {
u32 hwcaps_mask;
u8 quirks;
+ int (*indirect_read_dma)(struct cqspi_flash_pdata *f_pdata,
+ u_char *rxbuf, loff_t from_addr, size_t n_rx);
+ u32 (*get_dma_status)(struct cqspi_st *cqspi);
};

/* Operation timeout value */
@@ -219,6 +224,8 @@ struct cqspi_driver_platdata {
#define CQSPI_REG_INDIRECTWRSTARTADDR 0x78
#define CQSPI_REG_INDIRECTWRBYTES 0x7C

+#define CQSPI_REG_INDTRIG_ADDRRANGE 0x80
+
#define CQSPI_REG_CMDADDRESS 0x94
#define CQSPI_REG_CMDREADDATALOWER 0xA0
#define CQSPI_REG_CMDREADDATAUPPER 0xA4
@@ -233,6 +240,23 @@ struct cqspi_driver_platdata {
#define CQSPI_REG_OP_EXT_WRITE_LSB 16
#define CQSPI_REG_OP_EXT_STIG_LSB 0

+#define CQSPI_REG_VERSAL_DMA_SRC_ADDR 0x1000
+
+#define CQSPI_REG_VERSAL_DMA_DST_ADDR 0x1800
+#define CQSPI_REG_VERSAL_DMA_DST_SIZE 0x1804
+
+#define CQSPI_REG_VERSAL_DMA_DST_CTRL 0x180C
+
+#define CQSPI_REG_VERSAL_DMA_DST_I_STS 0x1814
+#define CQSPI_REG_VERSAL_DMA_DST_I_EN 0x1818
+#define CQSPI_REG_VERSAL_DMA_DST_I_DIS 0x181C
+#define CQSPI_REG_VERSAL_DMA_DST_DONE_MASK BIT(1)
+
+#define CQSPI_REG_VERSAL_DMA_DST_ADDR_MSB 0x1828
+
+#define CQSPI_REG_VERSAL_DMA_DST_CTRL_VAL 0xF43FFA00
+#define CQSPI_REG_VERSAL_ADDRRANGE_WIDTH_VAL 0x6
+
/* Interrupt status bits */
#define CQSPI_REG_IRQ_MODE_ERR BIT(0)
#define CQSPI_REG_IRQ_UNDERFLOW BIT(1)
@@ -252,6 +276,9 @@ struct cqspi_driver_platdata {
CQSPI_REG_IRQ_UNDERFLOW)

#define CQSPI_IRQ_STATUS_MASK 0x1FFFF
+#define CQSPI_DMA_UNALIGN 0x3
+
+#define CQSPI_REG_VERSAL_DMA_VAL 0x602

static int cqspi_wait_for_bit(void __iomem *reg, const u32 mask, bool clr)
{
@@ -277,10 +304,26 @@ static u32 cqspi_get_rd_sram_level(struct cqspi_st *cqspi)
return reg & CQSPI_REG_SDRAMLEVEL_RD_MASK;
}

+static u32 cqspi_get_versal_dma_status(struct cqspi_st *cqspi)
+{
+ u32 dma_status;
+
+ dma_status = readl(cqspi->iobase +
+ CQSPI_REG_VERSAL_DMA_DST_I_STS);
+ writel(dma_status, cqspi->iobase +
+ CQSPI_REG_VERSAL_DMA_DST_I_STS);
+
+ return dma_status & CQSPI_REG_VERSAL_DMA_DST_DONE_MASK;
+}
+
static irqreturn_t cqspi_irq_handler(int this_irq, void *dev)
{
struct cqspi_st *cqspi = dev;
unsigned int irq_status;
+ struct device *device = &cqspi->pdev->dev;
+ const struct cqspi_driver_platdata *ddata;
+
+ ddata = of_device_get_match_data(device);

/* Read interrupt status */
irq_status = readl(cqspi->iobase + CQSPI_REG_IRQSTATUS);
@@ -288,6 +331,13 @@ static irqreturn_t cqspi_irq_handler(int this_irq, void *dev)
/* Clear interrupt */
writel(irq_status, cqspi->iobase + CQSPI_REG_IRQSTATUS);

+ if (cqspi->use_dma_read && ddata && ddata->get_dma_status) {
+ if (ddata->get_dma_status(cqspi)) {
+ complete(&cqspi->transfer_complete);
+ return IRQ_HANDLED;
+ }
+ }
+
irq_status &= CQSPI_IRQ_MASK_RD | CQSPI_IRQ_MASK_WR;

if (irq_status)
@@ -783,6 +833,131 @@ static int cqspi_indirect_read_execute(struct cqspi_flash_pdata *f_pdata,
return ret;
}

+static int cqspi_versal_indirect_read_dma(struct cqspi_flash_pdata *f_pdata,
+ u_char *rxbuf, loff_t from_addr,
+ size_t n_rx)
+{
+ struct cqspi_st *cqspi = f_pdata->cqspi;
+ struct device *dev = &cqspi->pdev->dev;
+ void __iomem *reg_base = cqspi->iobase;
+ u32 reg, bytes_to_dma;
+ loff_t addr = from_addr;
+ void *buf = rxbuf;
+ dma_addr_t dma_addr;
+ u8 bytes_rem;
+ int ret = 0;
+
+ bytes_rem = n_rx % 4;
+ bytes_to_dma = (n_rx - bytes_rem);
+
+ if (!bytes_to_dma)
+ goto nondmard;
+
+ ret = zynqmp_pm_ospi_mux_select(cqspi->pd_dev_id, PM_OSPI_MUX_SEL_DMA);
+ if (ret)
+ return ret;
+
+ reg = readl(cqspi->iobase + CQSPI_REG_CONFIG);
+ reg |= CQSPI_REG_CONFIG_DMA_MASK;
+ writel(reg, cqspi->iobase + CQSPI_REG_CONFIG);
+
+ dma_addr = dma_map_single(dev, rxbuf, bytes_to_dma, DMA_FROM_DEVICE);
+ if (dma_mapping_error(dev, dma_addr)) {
+ dev_err(dev, "dma mapping failed\n");
+ return -ENOMEM;
+ }
+
+ writel(from_addr, reg_base + CQSPI_REG_INDIRECTRDSTARTADDR);
+ writel(bytes_to_dma, reg_base + CQSPI_REG_INDIRECTRDBYTES);
+ writel(CQSPI_REG_VERSAL_ADDRRANGE_WIDTH_VAL,
+ reg_base + CQSPI_REG_INDTRIG_ADDRRANGE);
+
+ /* Clear all interrupts. */
+ writel(CQSPI_IRQ_STATUS_MASK, reg_base + CQSPI_REG_IRQSTATUS);
+
+ /* Enable DMA done interrupt */
+ writel(CQSPI_REG_VERSAL_DMA_DST_DONE_MASK,
+ reg_base + CQSPI_REG_VERSAL_DMA_DST_I_EN);
+
+ /* Default DMA periph configuration */
+ writel(CQSPI_REG_VERSAL_DMA_VAL, reg_base + CQSPI_REG_DMA);
+
+ /* Configure DMA Dst address */
+ writel(lower_32_bits(dma_addr),
+ reg_base + CQSPI_REG_VERSAL_DMA_DST_ADDR);
+ writel(upper_32_bits(dma_addr),
+ reg_base + CQSPI_REG_VERSAL_DMA_DST_ADDR_MSB);
+
+ /* Configure DMA Src address */
+ writel(cqspi->trigger_address, reg_base +
+ CQSPI_REG_VERSAL_DMA_SRC_ADDR);
+
+ /* Set DMA destination size */
+ writel(bytes_to_dma, reg_base + CQSPI_REG_VERSAL_DMA_DST_SIZE);
+
+ /* Set DMA destination control */
+ writel(CQSPI_REG_VERSAL_DMA_DST_CTRL_VAL,
+ reg_base + CQSPI_REG_VERSAL_DMA_DST_CTRL);
+
+ writel(CQSPI_REG_INDIRECTRD_START_MASK,
+ reg_base + CQSPI_REG_INDIRECTRD);
+
+ reinit_completion(&cqspi->transfer_complete);
+
+ if (!wait_for_completion_timeout(&cqspi->transfer_complete,
+ msecs_to_jiffies(CQSPI_READ_TIMEOUT_MS))) {
+ ret = -ETIMEDOUT;
+ goto failrd;
+ }
+
+ /* Disable DMA interrupt */
+ writel(0x0, cqspi->iobase + CQSPI_REG_VERSAL_DMA_DST_I_DIS);
+
+ /* Clear indirect completion status */
+ writel(CQSPI_REG_INDIRECTRD_DONE_MASK,
+ cqspi->iobase + CQSPI_REG_INDIRECTRD);
+ dma_unmap_single(dev, dma_addr, bytes_to_dma, DMA_FROM_DEVICE);
+
+ reg = readl(cqspi->iobase + CQSPI_REG_CONFIG);
+ reg &= ~CQSPI_REG_CONFIG_DMA_MASK;
+ writel(reg, cqspi->iobase + CQSPI_REG_CONFIG);
+
+ ret = zynqmp_pm_ospi_mux_select(cqspi->pd_dev_id,
+ PM_OSPI_MUX_SEL_LINEAR);
+ if (ret)
+ return ret;
+
+nondmard:
+ if (bytes_rem) {
+ addr += bytes_to_dma;
+ buf += bytes_to_dma;
+ ret = cqspi_indirect_read_execute(f_pdata, buf, addr,
+ bytes_rem);
+ if (ret)
+ return ret;
+ }
+
+ return 0;
+
+failrd:
+ /* Disable DMA interrupt */
+ writel(0x0, reg_base + CQSPI_REG_VERSAL_DMA_DST_I_DIS);
+
+ /* Cancel the indirect read */
+ writel(CQSPI_REG_INDIRECTWR_CANCEL_MASK,
+ reg_base + CQSPI_REG_INDIRECTRD);
+
+ dma_unmap_single(dev, dma_addr, bytes_to_dma, DMA_DEV_TO_MEM);
+
+ reg = readl(cqspi->iobase + CQSPI_REG_CONFIG);
+ reg &= ~CQSPI_REG_CONFIG_DMA_MASK;
+ writel(reg, cqspi->iobase + CQSPI_REG_CONFIG);
+
+ zynqmp_pm_ospi_mux_select(cqspi->pd_dev_id, PM_OSPI_MUX_SEL_LINEAR);
+
+ return ret;
+}
+
static int cqspi_write_setup(struct cqspi_flash_pdata *f_pdata,
const struct spi_mem_op *op)
{
@@ -1182,11 +1357,15 @@ static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
const struct spi_mem_op *op)
{
struct cqspi_st *cqspi = f_pdata->cqspi;
+ struct device *dev = &cqspi->pdev->dev;
+ const struct cqspi_driver_platdata *ddata;
loff_t from = op->addr.val;
size_t len = op->data.nbytes;
u_char *buf = op->data.buf.in;
+ u64 dma_align = (u64)(uintptr_t)buf;
int ret;

+ ddata = of_device_get_match_data(dev);
ret = cqspi_set_protocol(f_pdata, op);
if (ret)
return ret;
@@ -1198,6 +1377,10 @@ static ssize_t cqspi_read(struct cqspi_flash_pdata *f_pdata,
if (cqspi->use_direct_mode && ((from + len) <= cqspi->ahb_size))
return cqspi_direct_read_execute(f_pdata, buf, from, len);

+ if (cqspi->use_dma_read && ddata && ddata->indirect_read_dma &&
+ virt_addr_valid(buf) && ((dma_align & CQSPI_DMA_UNALIGN) == 0))
+ return ddata->indirect_read_dma(f_pdata, buf, from, len);
+
return cqspi_indirect_read_execute(f_pdata, buf, from, len);
}

@@ -1366,6 +1549,13 @@ static void cqspi_controller_init(struct cqspi_st *cqspi)
writel(reg, cqspi->iobase + CQSPI_REG_CONFIG);
}

+ /* Enable DMA interface */
+ if (cqspi->use_dma_read) {
+ reg = readl(cqspi->iobase + CQSPI_REG_CONFIG);
+ reg |= CQSPI_REG_CONFIG_DMA_MASK;
+ writel(reg, cqspi->iobase + CQSPI_REG_CONFIG);
+ }
+
cqspi_controller_enable(cqspi, 1);
}

@@ -1555,15 +1745,12 @@ static int cqspi_probe(struct platform_device *pdev)
master->mode_bits |= SPI_RX_OCTAL | SPI_TX_OCTAL;
if (!(ddata->quirks & CQSPI_DISABLE_DAC_MODE))
cqspi->use_direct_mode = true;
+ if (ddata->quirks & CQSPI_SUPPORT_EXTERNAL_DMA)
+ cqspi->use_dma_read = true;
+
if (of_device_is_compatible(pdev->dev.of_node,
- "xlnx,versal-ospi-1.0")) {
- ret = zynqmp_pm_ospi_mux_select(cqspi->pd_dev_id,
- PM_OSPI_MUX_SEL_LINEAR);
- if (ret) {
- dev_err(dev, "failed to select OSPI Mux.\n");
- goto probe_reset_failed;
- }
- }
+ "xlnx,versal-ospi-1.0"))
+ dma_set_mask(&pdev->dev, DMA_BIT_MASK(64));
}

ret = devm_request_irq(dev, irq, cqspi_irq_handler, 0,
@@ -1674,7 +1861,9 @@ static const struct cqspi_driver_platdata intel_lgm_qspi = {

static const struct cqspi_driver_platdata versal_ospi = {
.hwcaps_mask = CQSPI_SUPPORTS_OCTAL,
- .quirks = CQSPI_DISABLE_DAC_MODE,
+ .quirks = CQSPI_DISABLE_DAC_MODE | CQSPI_SUPPORT_EXTERNAL_DMA,
+ .indirect_read_dma = cqspi_versal_indirect_read_dma,
+ .get_dma_status = cqspi_get_versal_dma_status,
};

static const struct of_device_id cqspi_dt_ids[] = {
--
2.17.1