Re: [PATCH v3 05/10] drm/xe/xe_late_bind_fw: Load late binding firmware

From: Nilawar, Badal
Date: Thu Jun 19 2025 - 01:55:13 EST



On 19-06-2025 02:27, Daniele Ceraolo Spurio wrote:


On 6/18/2025 12:00 PM, Badal Nilawar wrote:
Load late binding firmware

v2:
  - s/EAGAIN/EBUSY/
  - Flush worker in suspend and driver unload (Daniele)
v3:
  - Use retry interval of 6s, in steps of 200ms, to allow
    other OS components release MEI CL handle (Sasha)

Signed-off-by: Badal Nilawar <badal.nilawar@xxxxxxxxx>
---
  drivers/gpu/drm/xe/xe_late_bind_fw.c       | 113 ++++++++++++++++++++-
  drivers/gpu/drm/xe/xe_late_bind_fw.h       |   1 +
  drivers/gpu/drm/xe/xe_late_bind_fw_types.h |   5 +
  3 files changed, 118 insertions(+), 1 deletion(-)

diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.c b/drivers/gpu/drm/xe/xe_late_bind_fw.c
index d416d6cc1fa2..54aa08c6bdfd 100644
--- a/drivers/gpu/drm/xe/xe_late_bind_fw.c
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw.c
@@ -16,6 +16,20 @@
  #include "xe_late_bind_fw.h"
  #include "xe_pcode.h"
  #include "xe_pcode_api.h"
+#include "xe_pm.h"
+
+/*
+ * The component should load quite quickly in most cases, but it could take
+ * a bit. Using a very big timeout just to cover the worst case scenario
+ */
+#define LB_INIT_TIMEOUT_MS 20000
+
+/*
+ * Retry interval set to 6 seconds, in steps of 200 ms, to allow time for
+ * other OS components to release the MEI CL handle
+ */
+#define LB_FW_LOAD_RETRY_MAXCOUNT 30
+#define LB_FW_LOAD_RETRY_PAUSE_MS 200
    static const u32 fw_id_to_type[] = {
          [FAN_CONTROL_FW_ID] = CSC_LATE_BINDING_TYPE_FAN_CONTROL,
@@ -44,6 +58,85 @@ static int xe_late_bind_fw_num_fans(struct xe_late_bind *late_bind)
          return 0;
  }
  +static void xe_late_bind_wait_for_worker_completion(struct xe_late_bind *late_bind)
+{
+    struct xe_device *xe = late_bind_to_xe(late_bind);
+    struct xe_late_bind_fw *lbfw;
+    int fw_id;
+
+    for (fw_id = 0; fw_id < MAX_FW_ID; fw_id++) {
+        lbfw = &late_bind->late_bind_fw[fw_id];
+        if (lbfw->valid && late_bind->wq) {
+            drm_dbg(&xe->drm, "Flush work: load %s firmware\n",
+                fw_id_to_name[lbfw->id]);
+                flush_work(&lbfw->work);

incorrect indent here for flush_work

+        }
+    }
+}
+
+static void late_bind_work(struct work_struct *work)
+{
+    struct xe_late_bind_fw *lbfw = container_of(work, struct xe_late_bind_fw, work);
+    struct xe_late_bind *late_bind = container_of(lbfw, struct xe_late_bind,
+                              late_bind_fw[lbfw->id]);
+    struct xe_device *xe = late_bind_to_xe(late_bind);
+    int retry = LB_FW_LOAD_RETRY_MAXCOUNT;
+    int ret;
+    int slept;
+
+    /* we can queue this before the component is bound */
+    for (slept = 0; slept < LB_INIT_TIMEOUT_MS; slept += 100) {
+        if (late_bind->component.ops)
+            break;
+        msleep(100);
+    }
+
+    xe_pm_runtime_get(xe);
+    mutex_lock(&late_bind->mutex);
+
+    if (!late_bind->component.ops) {
+        drm_err(&xe->drm, "Late bind component not bound\n");
+        goto out;
+    }
+
+    drm_dbg(&xe->drm, "Load %s firmware\n", fw_id_to_name[lbfw->id]);
+
+    do {
+        ret = late_bind->component.ops->push_config(late_bind->component.mei_dev,
+                                lbfw->type, lbfw->flags,
+                                lbfw->payload, lbfw->payload_size);
+        if (!ret)
+            break;
+        msleep(LB_FW_LOAD_RETRY_PAUSE_MS);
+    } while (--retry && ret == -EBUSY);
+
+    if (ret)
+        drm_err(&xe->drm, "Load %s firmware failed with err %d\n",
+            fw_id_to_name[lbfw->id], ret);
+    else
+        drm_dbg(&xe->drm, "Load %s firmware successful\n",
+            fw_id_to_name[lbfw->id]);
+out:
+    mutex_unlock(&late_bind->mutex);
+    xe_pm_runtime_put(xe);
+}
+
+int xe_late_bind_fw_load(struct xe_late_bind *late_bind)
+{
+    struct xe_late_bind_fw *lbfw;
+    int fw_id;
+
+    if (!late_bind->component_added)
+        return -EINVAL;

-ENODEV instead? This should only happen if we don't support late_bind or if the component was not built.

Sure.


+
+    for (fw_id = 0; fw_id < MAX_FW_ID; fw_id++) {
+        lbfw = &late_bind->late_bind_fw[fw_id];
+        if (lbfw->valid)
+            queue_work(late_bind->wq, &lbfw->work);
+    }
+    return 0;
+}
+
  static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id)
  {
      struct xe_device *xe = late_bind_to_xe(late_bind);
@@ -93,6 +186,7 @@ static int __xe_late_bind_fw_init(struct xe_late_bind *late_bind, u32 fw_id)
        memcpy(lb_fw->payload, fw->data, lb_fw->payload_size);
      release_firmware(fw);
+    INIT_WORK(&lb_fw->work, late_bind_work);
      lb_fw->valid = true;
        return 0;
@@ -103,11 +197,16 @@ static int xe_late_bind_fw_init(struct xe_late_bind *late_bind)
      int ret;
      int fw_id;
  +    late_bind->wq = alloc_ordered_workqueue("late-bind-ordered-wq", 0);
+    if (!late_bind->wq)
+        return -ENOMEM;
+
      for (fw_id = 0; fw_id < MAX_FW_ID; fw_id++) {
          ret = __xe_late_bind_fw_init(late_bind, fw_id);
          if (ret)
              return ret;
      }
+

nit: this newline could be moved to the patch that adds this code.

      return ret;
  }
  @@ -131,6 +230,8 @@ static void xe_late_bind_component_unbind(struct device *xe_kdev,
      struct xe_device *xe = kdev_to_xe_device(xe_kdev);
      struct xe_late_bind *late_bind = &xe->late_bind;
  +    xe_late_bind_wait_for_worker_completion(late_bind);

I don't see this called for full suspend (not rpm), even if follow up patches. Am I just not seeing it, or is it missing?

I missed this, I will address in follow up patch.

Badal


Daniele

+
      mutex_lock(&late_bind->mutex);
      late_bind->component.ops = NULL;
      mutex_unlock(&late_bind->mutex);
@@ -146,8 +247,14 @@ static void xe_late_bind_remove(void *arg)
      struct xe_late_bind *late_bind = arg;
      struct xe_device *xe = late_bind_to_xe(late_bind);
  +    xe_late_bind_wait_for_worker_completion(late_bind);
+
      component_del(xe->drm.dev, &xe_late_bind_component_ops);
      late_bind->component_added = false;
+    if (late_bind->wq) {
+        destroy_workqueue(late_bind->wq);
+        late_bind->wq = NULL;
+    }
      mutex_destroy(&late_bind->mutex);
  }
  @@ -184,5 +291,9 @@ int xe_late_bind_init(struct xe_late_bind *late_bind)
      if (err)
          return err;
  -    return xe_late_bind_fw_init(late_bind);
+    err = xe_late_bind_fw_init(late_bind);
+    if (err)
+        return err;
+
+    return xe_late_bind_fw_load(late_bind);
  }
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw.h b/drivers/gpu/drm/xe/xe_late_bind_fw.h
index 4c73571c3e62..28d56ed2bfdc 100644
--- a/drivers/gpu/drm/xe/xe_late_bind_fw.h
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw.h
@@ -11,5 +11,6 @@
  struct xe_late_bind;
    int xe_late_bind_init(struct xe_late_bind *late_bind);
+int xe_late_bind_fw_load(struct xe_late_bind *late_bind);
    #endif
diff --git a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
index c6fd33fd5484..d256f53d59e6 100644
--- a/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
+++ b/drivers/gpu/drm/xe/xe_late_bind_fw_types.h
@@ -9,6 +9,7 @@
  #include <linux/iosys-map.h>
  #include <linux/mutex.h>
  #include <linux/types.h>
+#include <linux/workqueue.h>
    #define MAX_PAYLOAD_SIZE (1024 * 4)
  @@ -38,6 +39,8 @@ struct xe_late_bind_fw {
      u8  payload[MAX_PAYLOAD_SIZE];
      /** @late_bind_fw.payload_size: late binding blob payload_size */
      size_t payload_size;
+    /** @late_bind_fw.work: worker to upload latebind blob */
+    struct work_struct work;
  };
    /**
@@ -66,6 +69,8 @@ struct xe_late_bind {
      struct mutex mutex;
      /** @late_bind.late_bind_fw: late binding firmware array */
      struct xe_late_bind_fw late_bind_fw[MAX_FW_ID];
+    /** @late_bind.wq: workqueue to submit request to download late bind blob */
+    struct workqueue_struct *wq;
  };
    #endif