[PATCH 2/4] gpu: host1x: Implement job tracking using DMA fences

From: Mikko Perttunen
Date: Thu Jan 19 2023 - 08:11:36 EST


From: Mikko Perttunen <mperttunen@xxxxxxxxxx>

In anticipation of removal of the intr API, implement job tracking
using DMA fences instead. The main two things about this are
making cdma_update schedule the work since fence completion can
now be called from interrupt context, and some complication in
ensuring the callback is not running when we free the fence.

Signed-off-by: Mikko Perttunen <mperttunen@xxxxxxxxxx>
---
drivers/gpu/host1x/cdma.c | 14 +++++++--
drivers/gpu/host1x/cdma.h | 2 ++
drivers/gpu/host1x/hw/channel_hw.c | 48 +++++++++++++++++-------------
drivers/gpu/host1x/job.c | 12 ++++++--
include/linux/host1x.h | 6 ++--
5 files changed, 53 insertions(+), 29 deletions(-)

diff --git a/drivers/gpu/host1x/cdma.c b/drivers/gpu/host1x/cdma.c
index 103fda055394..bc821b0ed908 100644
--- a/drivers/gpu/host1x/cdma.c
+++ b/drivers/gpu/host1x/cdma.c
@@ -490,6 +490,15 @@ void host1x_cdma_update_sync_queue(struct host1x_cdma *cdma,
host1x_hw_cdma_resume(host1x, cdma, restart_addr);
}

+static void cdma_update_work(struct work_struct *work)
+{
+ struct host1x_cdma *cdma = container_of(work, struct host1x_cdma, update_work);
+
+ mutex_lock(&cdma->lock);
+ update_cdma_locked(cdma);
+ mutex_unlock(&cdma->lock);
+}
+
/*
* Create a cdma
*/
@@ -499,6 +508,7 @@ int host1x_cdma_init(struct host1x_cdma *cdma)

mutex_init(&cdma->lock);
init_completion(&cdma->complete);
+ INIT_WORK(&cdma->update_work, cdma_update_work);

INIT_LIST_HEAD(&cdma->sync_queue);

@@ -679,7 +689,5 @@ void host1x_cdma_end(struct host1x_cdma *cdma,
*/
void host1x_cdma_update(struct host1x_cdma *cdma)
{
- mutex_lock(&cdma->lock);
- update_cdma_locked(cdma);
- mutex_unlock(&cdma->lock);
+ schedule_work(&cdma->update_work);
}
diff --git a/drivers/gpu/host1x/cdma.h b/drivers/gpu/host1x/cdma.h
index 12c4327c4df0..7fd8168af4f9 100644
--- a/drivers/gpu/host1x/cdma.h
+++ b/drivers/gpu/host1x/cdma.h
@@ -11,6 +11,7 @@
#include <linux/sched.h>
#include <linux/completion.h>
#include <linux/list.h>
+#include <linux/workqueue.h>

struct host1x_syncpt;
struct host1x_userctx_timeout;
@@ -69,6 +70,7 @@ struct host1x_cdma {
struct buffer_timeout timeout; /* channel's timeout state/wq */
bool running;
bool torndown;
+ struct work_struct update_work;
};

#define cdma_to_channel(cdma) container_of(cdma, struct host1x_channel, cdma)
diff --git a/drivers/gpu/host1x/hw/channel_hw.c b/drivers/gpu/host1x/hw/channel_hw.c
index 732abe0750ff..8a3119fc5a77 100644
--- a/drivers/gpu/host1x/hw/channel_hw.c
+++ b/drivers/gpu/host1x/hw/channel_hw.c
@@ -278,6 +278,14 @@ static void channel_program_cdma(struct host1x_job *job)
#endif
}

+static void job_complete_callback(struct dma_fence *fence, struct dma_fence_cb *cb)
+{
+ struct host1x_job *job = container_of(cb, struct host1x_job, fence_cb);
+
+ /* Schedules CDMA update. */
+ host1x_cdma_update(&job->channel->cdma);
+}
+
static int channel_submit(struct host1x_job *job)
{
struct host1x_channel *ch = job->channel;
@@ -285,7 +293,6 @@ static int channel_submit(struct host1x_job *job)
u32 prev_max = 0;
u32 syncval;
int err;
- struct host1x_waitlist *completed_waiter = NULL;
struct host1x *host = dev_get_drvdata(ch->dev->parent);

trace_host1x_channel_submit(dev_name(ch->dev),
@@ -298,14 +305,7 @@ static int channel_submit(struct host1x_job *job)
/* get submit lock */
err = mutex_lock_interruptible(&ch->submitlock);
if (err)
- goto error;
-
- completed_waiter = kzalloc(sizeof(*completed_waiter), GFP_KERNEL);
- if (!completed_waiter) {
- mutex_unlock(&ch->submitlock);
- err = -ENOMEM;
- goto error;
- }
+ return err;

host1x_channel_set_streamid(ch);
host1x_enable_gather_filter(ch);
@@ -315,31 +315,37 @@ static int channel_submit(struct host1x_job *job)
err = host1x_cdma_begin(&ch->cdma, job);
if (err) {
mutex_unlock(&ch->submitlock);
- goto error;
+ return err;
}

channel_program_cdma(job);
syncval = host1x_syncpt_read_max(sp);

+ /*
+ * Create fence before submitting job to HW to avoid job completing
+ * before the fence is set up.
+ */
+ job->fence = host1x_fence_create(sp, syncval);
+ if (WARN(IS_ERR(job->fence), "Failed to create submit complete fence")) {
+ job->fence = NULL;
+ } else {
+ err = dma_fence_add_callback(job->fence, &job->fence_cb,
+ job_complete_callback);
+ }
+
/* end CDMA submit & stash pinned hMems into sync queue */
host1x_cdma_end(&ch->cdma, job);

trace_host1x_channel_submitted(dev_name(ch->dev), prev_max, syncval);

- /* schedule a submit complete interrupt */
- err = host1x_intr_add_action(host, sp, syncval,
- HOST1X_INTR_ACTION_SUBMIT_COMPLETE, ch,
- completed_waiter, &job->waiter);
- completed_waiter = NULL;
- WARN(err, "Failed to set submit complete interrupt");
-
mutex_unlock(&ch->submitlock);

- return 0;
+ if (err == -ENOENT)
+ host1x_cdma_update(&ch->cdma);
+ else
+ WARN(err, "Failed to set submit complete interrupt");

-error:
- kfree(completed_waiter);
- return err;
+ return 0;
}

static int host1x_channel_init(struct host1x_channel *ch, struct host1x *dev,
diff --git a/drivers/gpu/host1x/job.c b/drivers/gpu/host1x/job.c
index b2761aa03b95..3ed49e1fd933 100644
--- a/drivers/gpu/host1x/job.c
+++ b/drivers/gpu/host1x/job.c
@@ -88,9 +88,15 @@ static void job_free(struct kref *ref)
if (job->release)
job->release(job);

- if (job->waiter)
- host1x_intr_put_ref(job->syncpt->host, job->syncpt->id,
- job->waiter, false);
+ if (job->fence) {
+ /*
+ * remove_callback is atomic w.r.t. fence signaling, so
+ * after the call returns, we know that the callback is not
+ * in execution, and the fence can be safely freed.
+ */
+ dma_fence_remove_callback(job->fence, &job->fence_cb);
+ dma_fence_put(job->fence);
+ }

if (job->syncpt)
host1x_syncpt_put(job->syncpt);
diff --git a/include/linux/host1x.h b/include/linux/host1x.h
index dc55d9d3b94f..db6cf6f34361 100644
--- a/include/linux/host1x.h
+++ b/include/linux/host1x.h
@@ -8,6 +8,7 @@

#include <linux/device.h>
#include <linux/dma-direction.h>
+#include <linux/dma-fence.h>
#include <linux/spinlock.h>
#include <linux/types.h>

@@ -288,8 +289,9 @@ struct host1x_job {
u32 syncpt_incrs;
u32 syncpt_end;

- /* Completion waiter ref */
- void *waiter;
+ /* Completion fence for job tracking */
+ struct dma_fence *fence;
+ struct dma_fence_cb fence_cb;

/* Maximum time to wait for this job */
unsigned int timeout;
--
2.39.0