Re: [PATCH v9 08/27] mailbox: Add Gunyah message queue mailbox

From: Elliot Berman
Date: Wed Feb 08 2023 - 15:46:59 EST




On 2/2/2023 1:59 AM, Srinivas Kandagatla wrote:


On 20/01/2023 22:46, Elliot Berman wrote:
Gunyah message queues are a unidirectional inter-VM pipe for messages up
to 1024 bytes. This driver supports pairing a receiver message queue and
a transmitter message queue to expose a single mailbox channel.

Signed-off-by: Elliot Berman <quic_eberman@xxxxxxxxxxx>
---

[snip]

+static irqreturn_t gh_msgq_rx_irq_handler(int irq, void *data)
+{
+    struct gh_msgq *msgq = data;
+    struct gh_msgq_rx_data rx_data;
+    unsigned long gh_err;
+    bool ready = true;
+
+    while (ready) {
+        gh_err = gh_hypercall_msgq_recv(msgq->rx_ghrsc->capid,
+                (uintptr_t)&rx_data.data, sizeof(rx_data.data),
you should proabably use  GH_MSGQ_MAX_MSG_SIZE instead of calling sizeof for every loop.

+                &rx_data.length, &ready);
+        if (gh_err == GH_ERROR_OK) {
+            mbox_chan_received_data(gh_msgq_chan(msgq), &rx_data);
+        } else if (gh_err == GH_ERROR_MSGQUEUE_EMPTY) {
+            break;
+        } else {
+            pr_warn("Failed to receive data from msgq for %s: %zd\n",
+                msgq->mbox.dev ? dev_name(msgq->mbox.dev) : "", gh_err);
+            break;
+        }
+    }
+
+    return IRQ_HANDLED;
+}
How about making this more readable.
also use of gh_ in local variables is not really adding any value.
> while (ready) {
    err = gh_hypercall_msgq_recv(...);
    if (err) {
        if (err != GH_ERROR_MSGQUEUE_EMPTY)
                    dev_warn(msgq->mbox.dev, "Failedto receive data %zd\n", err);
                break;

               }
    mbox_chan_received_data(gh_msgq_chan(msgq), &rx_data);
}



Done

+
+static irqreturn_t gh_msgq_tx_irq_handler(int irq, void *data)
+{
+    struct gh_msgq *msgq = data;
+
+    mbox_chan_txdone(gh_msgq_chan(msgq), 0);
What is this irq for? Is it for tx done ack?


This IRQ is asserted when the message queue transitions from "full" to "space available". For mailbox framework, it is tx done ack.

+
+    return IRQ_HANDLED;
+}
+
+static void gh_msgq_txdone_tasklet(struct tasklet_struct *tasklet)
+{
+    struct gh_msgq *msgq = container_of(tasklet, struct gh_msgq, txdone_tasklet);
+
+    mbox_chan_txdone(gh_msgq_chan(msgq), msgq->last_ret);
+}
+
+static int gh_msgq_send_data(struct mbox_chan *chan, void *data)
+{
+    struct gh_msgq *msgq = mbox_chan_to_msgq(chan);
+    struct gh_msgq_tx_data *msgq_data = data;
+    u64 tx_flags = 0;
+    unsigned long ret;
+    bool ready;
+
+    if (msgq_data->push)
+        tx_flags |= GH_HYPERCALL_MSGQ_TX_FLAGS_PUSH;
+
+    ret = gh_hypercall_msgq_send(msgq->tx_ghrsc->capid, msgq_data->length,
+                    (uintptr_t)msgq_data->data, tx_flags, &ready);
+
+    /**
+     * unlikely because Linux tracks state of msgq and should not try to
+     * send message when msgq is full.
+     */
+    if (unlikely(ret == GH_ERROR_MSGQUEUE_FULL))
+        return -EAGAIN;
+
+    /**
+     * Propagate all other errors to client. If we return error to mailbox
+     * framework, then no other messages can be sent and nobody will know
+     * to retry this message.
+     */
+    msgq->last_ret = gh_remap_error(ret);
+
+    /**
+     * This message was successfully sent, but message queue isn't ready to
+     * receive more messages because it's now full.Mailbox framework
+     * requires that we only report that message was transmitted when
+     * we're ready to transmit another message. We'll get that in the form
+     * of tx IRQ once the other side starts to drain the msgq.
+     */
+    if (ret == GH_ERROR_OK && !ready)
+        return 0;
+
+    /**
+     * We can send more messages. Mailbox frameworkrequires that tx done
+     * happens asynchronously to sending the message. Gunyah message queues
+     * tell us right away on the hypercall return whether we can send more
+     * messages. To work around this, defer the txdone to a tasklet.
+     */
+    tasklet_schedule(&msgq->txdone_tasklet);
+
+    return 0;
+}
+
+static struct mbox_chan_ops gh_msgq_ops = {
+    .send_data = gh_msgq_send_data,
+};
+
+/**
+ * gh_msgq_init() - Initialize a Gunyah message queue with an mbox_client
+ * @parent: optional, device parent used for the mailbox controller
+ * @msgq: Pointer to the gh_msgq to initialize
+ * @cl: A mailbox client to bind to the mailbox channel that the message queue creates
+ * @tx_ghrsc: optional, the transmission side of the message queue
+ * @rx_ghrsc: optional, the receiving side of the message queue
+ *
+ * At least one of tx_ghrsc and rx_ghrsc should be not NULL. Most message queue use cases come with
+ * a pair of message queues to facilitate bidirectional communication. When tx_ghrsc is set,
+ * the client can send messages with mbox_send_message(gh_msgq_chan(msgq), msg). When rx_ghrsc
+ * is set, the mbox_client should register an .rx_callback() and the message queue driver will
+ * push all available messages upon receiving the RX ready interrupt. The messages should be
+ * consumed or copied by the client right away as the gh_msgq_rx_data will be replaced/destroyed
+ * after the callback.
+ *
+ * Returns - 0 on success, negative otherwise
+ */
+int gh_msgq_init(struct device *parent, struct gh_msgq *msgq, struct mbox_client *cl,
+             struct gunyah_resource *tx_ghrsc, struct gunyah_resource *rx_ghrsc)
+{
+    int ret;
+
+    /* Must have at least a tx_ghrsc or rx_ghrsc and that they are the right device types */
+    if ((!tx_ghrsc && !rx_ghrsc) ||
+        (tx_ghrsc && tx_ghrsc->type != GUNYAH_RESOURCE_TYPE_MSGQ_TX) ||
+        (rx_ghrsc && rx_ghrsc->type != GUNYAH_RESOURCE_TYPE_MSGQ_RX))
+        return -EINVAL;
+
+    if (gh_api_version() != GUNYAH_API_V1) {
+        pr_warn("Unrecognized gunyahversion: %u. Currently supported: %d\n",
+            gh_api_version(), GUNYAH_API_V1);
how about using dev_err here?

+        return -ENODEV;

-EOPNOTSUPP?


Done.

+    }
+
+    if (!gh_api_has_feature(GH_API_FEATURE_MSGQUEUE))
+        return -EOPNOTSUPP;
+
+    msgq->tx_ghrsc = tx_ghrsc;
+    msgq->rx_ghrsc = rx_ghrsc;
+
+    msgq->mbox.dev = parent;
+    msgq->mbox.ops = &gh_msgq_ops;
+    msgq->mbox.num_chans = 1;
+    msgq->mbox.chans = kcalloc(msgq->mbox.num_chans, sizeof(*msgq->mbox.chans), GFP_KERNEL);
+    if (!msgq->mbox.chans)
+        return -ENOMEM;

new line here would be nice.
+    msgq->mbox.txdone_irq = true;
+
+    if (msgq->tx_ghrsc) {
+        ret = request_irq(msgq->tx_ghrsc->irq, gh_msgq_tx_irq_handler, 0, "gh_msgq_tx",
+                msgq);
+        if (ret)
+            gotoerr_chans;
+    }
+
+    if (msgq->rx_ghrsc) {
+        ret = request_threaded_irq(msgq->rx_ghrsc->irq, NULL, gh_msgq_rx_irq_handler,
+                        IRQF_ONESHOT, "gh_msgq_rx", msgq);
+        if (ret)
+            gotoerr_tx_irq;
+    }
+
+    tasklet_setup(&msgq->txdone_tasklet, gh_msgq_txdone_tasklet);

AFAIU, this looks like duplicating what core already has with TXDONE_BY_POLL.

can we not use
txdone_poll = true
and implement last_tx_done callback to use hrtimer from the core to tick tx.


The TXDONE_BY_POLL suits when message queue is immediately ready to receive more data. In the other case, we have an interrupt to indicate when it can receive more data and the poll doesn't make sense. IMO, the IRQ handler should immediately tick the mailbox controller and I don't know a way to do that if using TXDONE_BY_POLL -- there isn't an interface to reset the timer and mbox_chan_done() only works with TXDONE_BY_IRQ.

Thanks,
Elliot