[PATCH 09/12] dmaengine: stm32-dma3: improve residue granularity

From: Amelie Delaunay
Date: Tue Apr 23 2024 - 08:36:46 EST


Implement own device_tx_status ops to compute the residue with a finer
granularity, up to bytes.
STM32 DMA3 has a bitfield, BNDT, in CxTR1 register which reflects the
number of bytes read from the source.
It also has a bitfield, FIFOL, in CxSR register which reflects the FIFO
level in units of programmed destination data width.
The channel is briefly suspended to get a coherent snapshot of registers.
It is possible to correct the fifo level when packing/unpacking is enabled
with destination increment.

Signed-off-by: Amelie Delaunay <amelie.delaunay@xxxxxxxxxxx>
---
drivers/dma/stm32/stm32-dma3.c | 165 ++++++++++++++++++++++++++++++++-
1 file changed, 163 insertions(+), 2 deletions(-)

diff --git a/drivers/dma/stm32/stm32-dma3.c b/drivers/dma/stm32/stm32-dma3.c
index 3d827c33150e..f15cc0129fc7 100644
--- a/drivers/dma/stm32/stm32-dma3.c
+++ b/drivers/dma/stm32/stm32-dma3.c
@@ -799,6 +799,134 @@ static void stm32_dma3_chan_reset(struct stm32_dma3_chan *chan)
writel_relaxed(ccr |= CCR_RESET, ddata->base + STM32_DMA3_CCR(chan->id));
}

+static int stm32_dma3_chan_get_curr_hwdesc(struct stm32_dma3_swdesc *swdesc, u32 cllr, u32 *residue)
+{
+ u32 i, lli_offset, next_lli_offset = cllr & CLLR_LA;
+
+ /* If cllr is null, it means it is either the last or single item */
+ if (!cllr)
+ return swdesc->lli_size - 1;
+
+ /* In cyclic mode, go fast and first check we are not on the last item */
+ if (swdesc->cyclic && next_lli_offset == (swdesc->lli[0].hwdesc_addr & CLLR_LA))
+ return swdesc->lli_size - 1;
+
+ /* As transfer is in progress, look backward from the last item */
+ for (i = swdesc->lli_size - 1; i > 0; i--) {
+ *residue += FIELD_GET(CBR1_BNDT, swdesc->lli[i].hwdesc->cbr1);
+ lli_offset = swdesc->lli[i].hwdesc_addr & CLLR_LA;
+ if (lli_offset == next_lli_offset)
+ return i - 1;
+ }
+
+ return -EINVAL;
+}
+
+static void stm32_dma3_chan_set_residue(struct stm32_dma3_chan *chan,
+ struct stm32_dma3_swdesc *swdesc,
+ struct dma_tx_state *txstate)
+{
+ struct stm32_dma3_ddata *ddata = to_stm32_dma3_ddata(chan);
+ struct device *dev = chan2dev(chan);
+ struct stm32_dma3_hwdesc *hwdesc;
+ u32 residue, curr_lli, csr, cdar, cbr1, cllr, bndt, fifol;
+ bool pack_unpack;
+ int ret;
+
+ csr = readl_relaxed(ddata->base + STM32_DMA3_CSR(chan->id));
+ if (!((csr & CSR_TCF) && (csr & CSR_IDLEF)) && chan->dma_status != DMA_PAUSED) {
+ /* Suspend current transfer to read registers for a snapshot */
+ writel_relaxed(swdesc->ccr | CCR_SUSP, ddata->base + STM32_DMA3_CCR(chan->id));
+ ret = readl_relaxed_poll_timeout_atomic(ddata->base + STM32_DMA3_CSR(chan->id), csr,
+ csr & (CSR_SUSPF | CSR_TCF), 1, 10);
+
+ if (ret || ((csr & CSR_TCF) && (csr & CSR_IDLEF))) {
+ writel_relaxed(CFCR_SUSPF, ddata->base + STM32_DMA3_CFCR(chan->id));
+ writel_relaxed(swdesc->ccr, ddata->base + STM32_DMA3_CCR(chan->id));
+ if (ret)
+ dev_err(dev, "Channel suspension timeout, csr=%08x\n", csr);
+ }
+ }
+
+ /* If channel is still active (CSR_IDLEF is not set), can't get a reliable residue */
+ if (!(csr & CSR_IDLEF)) {
+ dev_err(dev, "Can't get residue: channel still active, csr=%08x\n", csr);
+ return;
+ }
+
+ /*
+ * If channel is not suspended, but Idle and Transfer Complete are set,
+ * linked-list is over, no residue
+ */
+ if (!(csr & CSR_SUSPF) && (csr & CSR_TCF) && (csr & CSR_IDLEF))
+ return;
+
+ /* Read registers to have a snapshot */
+ cllr = readl_relaxed(ddata->base + STM32_DMA3_CLLR(chan->id));
+ cbr1 = readl_relaxed(ddata->base + STM32_DMA3_CBR1(chan->id));
+ cdar = readl_relaxed(ddata->base + STM32_DMA3_CDAR(chan->id));
+
+ /* Resume current transfer */
+ writel_relaxed(CFCR_SUSPF, ddata->base + STM32_DMA3_CFCR(chan->id));
+ writel_relaxed(swdesc->ccr, ddata->base + STM32_DMA3_CCR(chan->id));
+
+ /* Add current BNDT */
+ bndt = FIELD_GET(CBR1_BNDT, cbr1);
+ residue = bndt;
+
+ /* Get current hwdesc and cumulate residue of pending hwdesc BNDT */
+ ret = stm32_dma3_chan_get_curr_hwdesc(swdesc, cllr, &residue);
+ if (ret < 0) {
+ dev_err(chan2dev(chan), "Can't get residue: current hwdesc not found\n");
+ return;
+ }
+ curr_lli = ret;
+
+ /* Read current FIFO level - in units of programmed destination data width */
+ hwdesc = swdesc->lli[curr_lli].hwdesc;
+ fifol = FIELD_GET(CSR_FIFOL, csr) * (1 << FIELD_GET(CTR1_DDW_LOG2, hwdesc->ctr1));
+ /* If the FIFO contains as many bytes as its size, it can't contain more */
+ if (fifol == (1 << (chan->fifo_size + 1)))
+ goto skip_fifol_update;
+
+ /*
+ * In case of PACKING (Destination burst length > Source burst length) or UNPACKING
+ * (Source burst length > Destination burst length), bytes could be pending in the FIFO
+ * (to be packed up to Destination burst length or unpacked into Destination burst length
+ * chunks).
+ * BNDT is not reliable, as it reflects the number of bytes read from the source but not the
+ * number of bytes written to the destination.
+ * FIFOL is also not sufficient, because it reflects the number of available write beats in
+ * units of Destination data width but not the bytes not yet packed or unpacked.
+ * In case of Destination increment DINC, it is possible to compute the number of bytes in
+ * the FIFO:
+ * fifol_in_bytes = bytes_read - bytes_written.
+ */
+ pack_unpack = !!(FIELD_GET(CTR1_PAM, hwdesc->ctr1) == CTR1_PAM_PACK_UNPACK);
+ if (pack_unpack && (hwdesc->ctr1 & CTR1_DINC)) {
+ int bytes_read = FIELD_GET(CBR1_BNDT, hwdesc->cbr1) - bndt;
+ int bytes_written = cdar - hwdesc->cdar;
+
+ if (bytes_read > 0)
+ fifol = bytes_read - bytes_written;
+ }
+
+skip_fifol_update:
+ if (fifol) {
+ dev_dbg(chan2dev(chan), "%u byte(s) in the FIFO\n", fifol);
+ dma_set_in_flight_bytes(txstate, fifol);
+ /*
+ * Residue is already accurate for DMA_MEM_TO_DEV as BNDT reflects data read from
+ * the source memory buffer, so just need to add fifol to residue in case of
+ * DMA_DEV_TO_MEM transfer because these bytes are not yet written in destination
+ * memory buffer.
+ */
+ if (chan->dma_config.direction == DMA_DEV_TO_MEM)
+ residue += fifol;
+ }
+ dma_set_residue(txstate, residue);
+}
+
static int stm32_dma3_chan_stop(struct stm32_dma3_chan *chan)
{
struct stm32_dma3_ddata *ddata = to_stm32_dma3_ddata(chan);
@@ -1301,6 +1429,39 @@ static void stm32_dma3_synchronize(struct dma_chan *c)
vchan_synchronize(&chan->vchan);
}

+static enum dma_status stm32_dma3_tx_status(struct dma_chan *c, dma_cookie_t cookie,
+ struct dma_tx_state *txstate)
+{
+ struct stm32_dma3_chan *chan = to_stm32_dma3_chan(c);
+ struct stm32_dma3_swdesc *swdesc = NULL;
+ enum dma_status status;
+ unsigned long flags;
+ struct virt_dma_desc *vd;
+
+ status = dma_cookie_status(c, cookie, txstate);
+ if (status == DMA_COMPLETE)
+ return status;
+
+ if (!txstate)
+ return chan->dma_status;
+
+ spin_lock_irqsave(&chan->vchan.lock, flags);
+
+ vd = vchan_find_desc(&chan->vchan, cookie);
+ if (vd)
+ swdesc = to_stm32_dma3_swdesc(vd);
+ else if (chan->swdesc && chan->swdesc->vdesc.tx.cookie == cookie)
+ swdesc = chan->swdesc;
+
+ /* Get residue/in_flight_bytes only if a transfer is currently running (swdesc != NULL) */
+ if (swdesc)
+ stm32_dma3_chan_set_residue(chan, swdesc, txstate);
+
+ spin_unlock_irqrestore(&chan->vchan.lock, flags);
+
+ return chan->dma_status;
+}
+
static void stm32_dma3_issue_pending(struct dma_chan *c)
{
struct stm32_dma3_chan *chan = to_stm32_dma3_chan(c);
@@ -1497,7 +1658,7 @@ static int stm32_dma3_probe(struct platform_device *pdev)

dma_dev->descriptor_reuse = true;
dma_dev->max_sg_burst = STM32_DMA3_MAX_SEG_SIZE;
- dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
+ dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_BURST;
dma_dev->device_alloc_chan_resources = stm32_dma3_alloc_chan_resources;
dma_dev->device_free_chan_resources = stm32_dma3_free_chan_resources;
dma_dev->device_prep_dma_memcpy = stm32_dma3_prep_dma_memcpy;
@@ -1509,7 +1670,7 @@ static int stm32_dma3_probe(struct platform_device *pdev)
dma_dev->device_resume = stm32_dma3_resume;
dma_dev->device_terminate_all = stm32_dma3_terminate_all;
dma_dev->device_synchronize = stm32_dma3_synchronize;
- dma_dev->device_tx_status = dma_cookie_status;
+ dma_dev->device_tx_status = stm32_dma3_tx_status;
dma_dev->device_issue_pending = stm32_dma3_issue_pending;

/* if dma_channels is not modified, get it from hwcfgr1 */
--
2.25.1