Re: [PATCH v3 2/2] Bluetooth: hci_qca: wcn3990: Drop baudrate change vendor event

From: Matthias Kaehlcke
Date: Mon Apr 29 2019 - 19:31:41 EST


On Mon, Apr 29, 2019 at 04:21:31PM -0700, Matthias Kaehlcke wrote:
> Firmware download to the WCN3990 often fails with a 'TLV response size
> mismatch' error:
>
> [ 133.064659] Bluetooth: hci0: setting up wcn3990
> [ 133.489150] Bluetooth: hci0: QCA controller version 0x02140201
> [ 133.495245] Bluetooth: hci0: QCA Downloading qca/crbtfw21.tlv
> [ 133.507214] Bluetooth: hci0: QCA TLV response size mismatch
> [ 133.513265] Bluetooth: hci0: QCA Failed to download patch (-84)
>
> This is caused by a vendor event that corresponds to an earlier command
> to change the baudrate. The event is not processed in the context of the
> baudrate change and is later interpreted as response to the firmware
> download command (which is also a vendor command), but the driver detects
> that the event doesn't have the expected amount of associated data.
>
> More details:
>
> For the WCN3990 the vendor command for a baudrate change isn't sent as
> synchronous HCI command, because the controller sends the corresponding
> vendor event with the new baudrate. The event is received and decoded
> after the baudrate change of the host port.
>
> Identify the 'unused' event when it is received and don't add it to
> the queue of RX frames.
>
> Signed-off-by: Matthias Kaehlcke <mka@xxxxxxxxxxxx>
> ---
> Changes in v3:
> - rebased on latest bluetooth-next/master
> - removed barrier calls again, bit routines include barriers
>
> Changes in v2:
> - make QCA_DROP_VENDOR_EVENT an enum value and don't use BIT()
> - free skb in qca_recv_event()
> - add barriers to ensure qca_recv_event() sees updated flags
> - return -ETIMEDOUT instead of -EPROTO if the vendor event isn't
> received in time
> ---
> drivers/bluetooth/hci_qca.c | 56 +++++++++++++++++++++++++++++++++++--
> 1 file changed, 53 insertions(+), 3 deletions(-)
>
> diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
> index 57322c42bb2d..5b57d897d8b5 100644
> --- a/drivers/bluetooth/hci_qca.c
> +++ b/drivers/bluetooth/hci_qca.c
> @@ -30,6 +30,7 @@
>
> #include <linux/kernel.h>
> #include <linux/clk.h>
> +#include <linux/completion.h>
> #include <linux/debugfs.h>
> #include <linux/delay.h>
> #include <linux/device.h>
> @@ -66,6 +67,7 @@
>
> enum qca_flags {
> QCA_IBS_ENABLED,
> + QCA_DROP_VENDOR_EVENT,
> };
>
> /* HCI_IBS transmit side sleep protocol states */
> @@ -110,6 +112,7 @@ struct qca_data {
> struct work_struct ws_rx_vote_off;
> struct work_struct ws_tx_vote_off;
> unsigned long flags;
> + struct completion drop_ev_comp;
>
> /* For debugging purpose */
> u64 ibs_sent_wacks;
> @@ -491,6 +494,7 @@ static int qca_open(struct hci_uart *hu)
> INIT_WORK(&qca->ws_tx_vote_off, qca_wq_serial_tx_clock_vote_off);
>
> qca->hu = hu;
> + init_completion(&qca->drop_ev_comp);
>
> /* Assume we start with both sides asleep -- extra wakes OK */
> qca->tx_ibs_state = HCI_IBS_TX_ASLEEP;
> @@ -885,6 +889,35 @@ static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb)
> return hci_recv_frame(hdev, skb);
> }
>
> +static int qca_recv_event(struct hci_dev *hdev, struct sk_buff *skb)
> +{
> + struct hci_uart *hu = hci_get_drvdata(hdev);
> + struct qca_data *qca = hu->priv;
> +
> + if (test_bit(QCA_DROP_VENDOR_EVENT, &qca->flags)) {
> + struct hci_event_hdr *hdr = (void *)skb->data;
> +
> + /* For the WCN3990 the vendor command for a baudrate change
> + * isn't sent as synchronous HCI command, because the
> + * controller sends the corresponding vendor event with the
> + * new baudrate. The event is received and properly decoded
> + * after changing the baudrate of the host port. It needs to
> + * be dropped, otherwise it can be misinterpreted as
> + * response to a later firmware download command (also a
> + * vendor command).
> + */
> +
> + if (hdr->evt == HCI_EV_VENDOR)
> + complete(&qca->drop_ev_comp);
> +
> + kfree(skb);
> +
> + return 0;
> + }
> +
> + return hci_recv_frame(hdev, skb);
> +}
> +
> #define QCA_IBS_SLEEP_IND_EVENT \
> .type = HCI_IBS_SLEEP_IND, \
> .hlen = 0, \
> @@ -909,7 +942,7 @@ static int qca_recv_acl_data(struct hci_dev *hdev, struct sk_buff *skb)
> static const struct h4_recv_pkt qca_recv_pkts[] = {
> { H4_RECV_ACL, .recv = qca_recv_acl_data },
> { H4_RECV_SCO, .recv = hci_recv_frame },
> - { H4_RECV_EVENT, .recv = hci_recv_frame },
> + { H4_RECV_EVENT, .recv = qca_recv_event },
> { QCA_IBS_WAKE_IND_EVENT, .recv = qca_ibs_wake_ind },
> { QCA_IBS_WAKE_ACK_EVENT, .recv = qca_ibs_wake_ack },
> { QCA_IBS_SLEEP_IND_EVENT, .recv = qca_ibs_sleep_ind },
> @@ -1104,6 +1137,7 @@ static int qca_check_speeds(struct hci_uart *hu)
> static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
> {
> unsigned int speed, qca_baudrate;
> + struct qca_data *qca = hu->priv;
> int ret = 0;
>
> if (speed_type == QCA_INIT_SPEED) {
> @@ -1120,8 +1154,11 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
> /* Disable flow control for wcn3990 to deassert RTS while
> * changing the baudrate of chip and host.
> */
> - if (qca_is_wcn399x(soc_type))
> + if (qca_is_wcn399x(soc_type)) {
> hci_uart_set_flow_control(hu, true);
> + reinit_completion(&qca->drop_ev_comp);
> + set_bit(QCA_DROP_VENDOR_EVENT, &qca->flags);
> + }
>
> qca_baudrate = qca_get_baudrate_value(speed);
> bt_dev_dbg(hu->hdev, "Set UART speed to %d", speed);
> @@ -1132,8 +1169,21 @@ static int qca_set_speed(struct hci_uart *hu, enum qca_speed_type speed_type)
> host_set_baudrate(hu, speed);
>
> error:
> - if (qca_is_wcn399x(soc_type))
> + if (qca_is_wcn399x(soc_type)) {
> hci_uart_set_flow_control(hu, false);
> +
> + /* Wait for the controller to send the vendor event
> + * for the baudrate change command.
> + */
> + if (!wait_for_completion_timeout(&qca->drop_ev_comp,
> + msecs_to_jiffies(100))) {
> + bt_dev_err(hu->hdev,
> + "Failed to change controller baudrate\n");
> + ret = -ETIMEDOUT;
> + }
> +
> + clear_bit(QCA_DROP_VENDOR_EVENT, &qca->flags);
> + }
> }
>
> return ret;

The alternative to this patch could be a short delay in the right
place, as commented on v2
(https://lore.kernel.org/patchwork/patch/1048463/#1252739).

This series has been floating around for two months and hasn't
received any maintainer attention (or at least no replies). I don't
pretend it's a pretty solution, if maintainers prefer we can go for
the simpler option to add a delay plus a comment explaining why it is
needed. In any case let's do something, firmware download has been
broken since forever even though possible fixes are available :(

Thanks

Matthias