RE: [PATCH] usb: cdns3: Coherent memory allocation optimization

From: Sanket Parmar
Date: Tue Mar 09 2021 - 00:40:50 EST


Hi Peter,

> On 21-03-05 17:01:11, Sanket Parmar wrote:
> > Allocation of DMA coherent memory in atomic context using
> > dma_alloc_coherent() might fail on some platform. To fix it,
> > Replaced dma_alloc_coherent() with dma_pool API to allocate a
> > smaller chunk of DMA coherent memory for TRB rings.
> >
> > Also in cdns3_prepare_aligned_request_buf(), replaced
> > dma_alloc_coherent() with kmalloc and dma_map API to allocate
> > aligned request buffer of dynamic length.
> >
>
> You do two changes in one commit, would you please split this one as
> two patches?
>
> > Fixes: commit 7733f6c32e36 ("usb: cdns3: Add Cadence USB3 DRD Driver")
>
> "commit" is not needed
>
> > Reported by: Aswath Govindraju <a-govindraju@xxxxxx>
>
> Reported-by:
>
I have split the change into two patches.
New patch series has been posted already.

> > Signed-off-by: Sanket Parmar <sparmar@xxxxxxxxxxx>
> > ---
> > drivers/usb/cdns3/cdns3-gadget.c | 115 ++++++++++++++++++++++------------
> ---
> > drivers/usb/cdns3/cdns3-gadget.h | 3 +
> > 2 files changed, 71 insertions(+), 47 deletions(-)
> >
> > diff --git a/drivers/usb/cdns3/cdns3-gadget.c b/drivers/usb/cdns3/cdns3-
> gadget.c
> > index 582bfec..5fd6993 100644
> > --- a/drivers/usb/cdns3/cdns3-gadget.c
> > +++ b/drivers/usb/cdns3/cdns3-gadget.c
> > @@ -59,6 +59,7 @@
> > #include <linux/dma-mapping.h>
> > #include <linux/usb/gadget.h>
> > #include <linux/module.h>
> > +#include <linux/dmapool.h>
> > #include <linux/iopoll.h>
> >
> > #include "core.h"
> > @@ -190,29 +191,13 @@ dma_addr_t cdns3_trb_virt_to_dma(struct
> cdns3_endpoint *priv_ep,
> > return priv_ep->trb_pool_dma + offset;
> > }
> >
> > -static int cdns3_ring_size(struct cdns3_endpoint *priv_ep)
> > -{
> > - switch (priv_ep->type) {
> > - case USB_ENDPOINT_XFER_ISOC:
> > - return TRB_ISO_RING_SIZE;
> > - case USB_ENDPOINT_XFER_CONTROL:
> > - return TRB_CTRL_RING_SIZE;
> > - default:
> > - if (priv_ep->use_streams)
> > - return TRB_STREAM_RING_SIZE;
> > - else
> > - return TRB_RING_SIZE;
> > - }
> > -}
> > -
> > static void cdns3_free_trb_pool(struct cdns3_endpoint *priv_ep)
> > {
> > struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
> >
> > if (priv_ep->trb_pool) {
> > - dma_free_coherent(priv_dev->sysdev,
> > - cdns3_ring_size(priv_ep),
> > - priv_ep->trb_pool, priv_ep->trb_pool_dma);
> > + dma_pool_free(priv_dev->eps_dma_pool,
> > + priv_ep->trb_pool, priv_ep->trb_pool_dma);
> > priv_ep->trb_pool = NULL;
> > }
> > }
> > @@ -226,7 +211,7 @@ static void cdns3_free_trb_pool(struct
> cdns3_endpoint *priv_ep)
> > int cdns3_allocate_trb_pool(struct cdns3_endpoint *priv_ep)
> > {
> > struct cdns3_device *priv_dev = priv_ep->cdns3_dev;
> > - int ring_size = cdns3_ring_size(priv_ep);
> > + int ring_size = TRB_RING_SIZE;
>
> You will use the same size for TRB ring region for control/bulk/isoc
> endpoint.
>
As single DMA pool is used to allocate the ring buffer, different TRB ring
size is not possible for different EP. Hence, TRB_RING_SIZE(600 * 12 B) which fits
for all type of EPs is used.

> > int num_trbs = ring_size / TRB_SIZE;
> > struct cdns3_trb *link_trb;
> >
> > @@ -234,10 +219,10 @@ int cdns3_allocate_trb_pool(struct
> cdns3_endpoint *priv_ep)
> > cdns3_free_trb_pool(priv_ep);
> >
> > if (!priv_ep->trb_pool) {
> > - priv_ep->trb_pool = dma_alloc_coherent(priv_dev->sysdev,
> > - ring_size,
> > - &priv_ep->trb_pool_dma,
> > - GFP_DMA32 |
> GFP_ATOMIC);
> > + priv_ep->trb_pool = dma_pool_alloc(priv_dev-
> >eps_dma_pool,
> > + GFP_DMA32 | GFP_ATOMIC,
> > + &priv_ep->trb_pool_dma);
>
> dma_pool_alloc also allocates the whole thunk of TRB region. See the
> kernel-doc for dma_pool_create.
>
> * Given one of these pools, dma_pool_alloc()
> * may be used to allocate memory. Such memory will all have "consistent"

Yes, dma_pool_alloc allocates the whole chunk of TRB region for enabled EPs from the
pool. Currently the block size of the DMA pool is 7.2KB. So dma_pool_alloc allocates at least
7.2KB for TRB region per enabled EP.

> > +
> > if (!priv_ep->trb_pool)
> > return -ENOMEM;
> >
> > @@ -833,10 +818,26 @@ void cdns3_gadget_giveback(struct
> cdns3_endpoint *priv_ep,
> > usb_gadget_unmap_request_by_dev(priv_dev->sysdev, request,
> > priv_ep->dir);
> >
> > - if ((priv_req->flags & REQUEST_UNALIGNED) &&
> > - priv_ep->dir == USB_DIR_OUT && !request->status)
> > - memcpy(request->buf, priv_req->aligned_buf->buf,
> > - request->length);
> > + if ((priv_req->flags & REQUEST_UNALIGNED) && priv_req-
> >aligned_buf) {
> > + struct cdns3_aligned_buf *buf;
> > +
> > + buf = priv_req->aligned_buf;
> > + dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> > + buf->dir);
> > + priv_req->flags &= ~REQUEST_UNALIGNED;
> > +
> > + if (priv_ep->dir == USB_DIR_OUT && !request->status) {
> > + memcpy(request->buf, priv_req->aligned_buf->buf,
> > + request->length);
> > + }
> > +
> > + trace_cdns3_free_aligned_request(priv_req);
> > + priv_req->aligned_buf->in_use = 0;
> > + queue_work(system_freezable_wq,
> > + &priv_dev->aligned_buf_wq);
> > + priv_req->aligned_buf = NULL;
> > +
> > + }
> >
> > priv_req->flags &= ~(REQUEST_PENDING | REQUEST_UNALIGNED);
> > /* All TRBs have finished, clear the counter */
> > @@ -898,8 +899,7 @@ static void cdns3_free_aligned_request_buf(struct
> work_struct *work)
> > * interrupts.
> > */
> > spin_unlock_irqrestore(&priv_dev->lock, flags);
> > - dma_free_coherent(priv_dev->sysdev, buf->size,
> > - buf->buf, buf->dma);
> > + kfree(buf->buf);
> > kfree(buf);
> > spin_lock_irqsave(&priv_dev->lock, flags);
> > }
> > @@ -925,27 +925,16 @@ static int
> cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> > if (!buf)
> > return -ENOMEM;
> >
> > - buf->size = priv_req->request.length;
> > + buf->size = usb_endpoint_dir_out(priv_ep->endpoint.desc) ?
> > + usb_ep_align(&(priv_ep->endpoint), priv_req-
> >request.length)
> > + : priv_req->request.length;
> >
> > - buf->buf = dma_alloc_coherent(priv_dev->sysdev,
> > - buf->size,
> > - &buf->dma,
> > - GFP_ATOMIC);
> > + buf->buf = kmalloc(buf->size, GFP_ATOMIC);
> > if (!buf->buf) {
> > kfree(buf);
> > return -ENOMEM;
> > }
> >
> > - if (priv_req->aligned_buf) {
> > - trace_cdns3_free_aligned_request(priv_req);
> > - priv_req->aligned_buf->in_use = 0;
> > - queue_work(system_freezable_wq,
> > - &priv_dev->aligned_buf_wq);
> > - }
> > -
> > - buf->in_use = 1;
> > - priv_req->aligned_buf = buf;
> > -
> > list_add_tail(&buf->list,
> > &priv_dev->aligned_buf_list);
> > }
> > @@ -955,6 +944,27 @@ static int
> cdns3_prepare_aligned_request_buf(struct cdns3_request *priv_req)
> > priv_req->request.length);
> > }
> >
> > + if (priv_req->aligned_buf) {
> > + trace_cdns3_free_aligned_request(priv_req);
> > + priv_req->aligned_buf->in_use = 0;
> > + queue_work(system_freezable_wq,
> > + &priv_dev->aligned_buf_wq);
> > + }
> > +
> > + buf->dir = priv_ep->dir ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
> > + buf->in_use = 1;
> > + priv_req->aligned_buf = buf;
> > +
> > + buf->dma = dma_map_single(priv_dev->sysdev, buf->buf, buf->size,
> > + buf->dir);
> > +
> > + if (dma_mapping_error(priv_dev->sysdev, buf->dma)) {
> > + dev_err(priv_dev->dev, "Failed to map buffer\n");
> > + kfree(buf->buf);
> > + kfree(buf);
> > + return -EFAULT;
> > + }
> > +
> > priv_req->flags |= REQUEST_UNALIGNED;
> > trace_cdns3_prepare_aligned_request(priv_req);
> >
> > @@ -3103,16 +3113,17 @@ static void cdns3_gadget_exit(struct cdns
> *cdns)
> > struct cdns3_aligned_buf *buf;
> >
> > buf = cdns3_next_align_buf(&priv_dev->aligned_buf_list);
> > - dma_free_coherent(priv_dev->sysdev, buf->size,
> > - buf->buf,
> > - buf->dma);
> > + dma_unmap_single(priv_dev->sysdev, buf->dma, buf->size,
> > + buf->dir);
> >
> > list_del(&buf->list);
> > + kfree(buf->buf);
> > kfree(buf);
> > }
> >
> > dma_free_coherent(priv_dev->sysdev, 8, priv_dev->setup_buf,
> > priv_dev->setup_dma);
> > + dma_pool_destroy(priv_dev->eps_dma_pool);
> >
> > kfree(priv_dev->zlp_buf);
> > usb_put_gadget(&priv_dev->gadget);
> > @@ -3185,6 +3196,14 @@ static int cdns3_gadget_start(struct cdns *cdns)
> > /* initialize endpoint container */
> > INIT_LIST_HEAD(&priv_dev->gadget.ep_list);
> > INIT_LIST_HEAD(&priv_dev->aligned_buf_list);
> > + priv_dev->eps_dma_pool = dma_pool_create("cdns3_eps_dma_pool",
> > + priv_dev->sysdev,
> > + TRB_RING_SIZE, 8, 0);
> > + if (!priv_dev->eps_dma_pool) {
> > + dev_err(priv_dev->dev, "Failed to create TRB dma pool\n");
> > + ret = -ENOMEM;
> > + goto err1;
> > + }
> >
> > ret = cdns3_init_eps(priv_dev);
> > if (ret) {
> > @@ -3235,6 +3254,8 @@ static int cdns3_gadget_start(struct cdns *cdns)
> > err2:
> > cdns3_free_all_eps(priv_dev);
> > err1:
> > + dma_pool_destroy(priv_dev->eps_dma_pool);
> > +
> > usb_put_gadget(&priv_dev->gadget);
> > cdns->gadget_dev = NULL;
> > return ret;
> > diff --git a/drivers/usb/cdns3/cdns3-gadget.h b/drivers/usb/cdns3/cdns3-
> gadget.h
> > index 21fa461..c5660f2 100644
> > --- a/drivers/usb/cdns3/cdns3-gadget.h
> > +++ b/drivers/usb/cdns3/cdns3-gadget.h
> > @@ -12,6 +12,7 @@
> > #ifndef __LINUX_CDNS3_GADGET
> > #define __LINUX_CDNS3_GADGET
> > #include <linux/usb/gadget.h>
> > +#include <linux/dma-direction.h>
> >
> > /*
> > * USBSS-DEV register interface.
> > @@ -1205,6 +1206,7 @@ struct cdns3_aligned_buf {
> > void *buf;
> > dma_addr_t dma;
> > u32 size;
> > + enum dma_data_direction dir;
> > unsigned in_use:1;
> > struct list_head list;
> > };
> > @@ -1298,6 +1300,7 @@ struct cdns3_device {
> >
> > struct cdns3_usb_regs __iomem *regs;
> >
> > + struct dma_pool *eps_dma_pool;
> > struct usb_ctrlrequest *setup_buf;
> > dma_addr_t setup_dma;
> > void *zlp_buf;
> > --
> > 1.7.1
> >
>
> I guess this issue may due to the size for DMA region is too small,
> try to enlarge the it (eg, CMA size).
>
> --
>
> Thanks,
> Peter Chen

--
Thanks,
Sanket Parmar