Re: [PATCH v3 lora-next 5/5] net: lora: sx125x sx1301: allow radio to register as a clk provider

From: Andreas FÃrber
Date: Mon Dec 31 2018 - 08:27:17 EST


Am 30.12.18 um 11:55 schrieb Andreas FÃrber:
> Am 29.12.18 um 21:16 schrieb Andreas FÃrber:
>> `sudo ip link set lora2 up` froze my system, with both
>> lora1 and lora2 being sx1301. [...]
>>
>> Investigating...
>
> I've bisected and confirmed that it was indeed this patch that regresses
> for one of my SX1301 concentrators.
[...]
> We never return from the sx125x_clkout_enable() performing the
> regmap_field_write() on our regmap_bus, which in turn uses a SPI regmap
> in sx1301_regmap_bus_read().
>
> A notable difference between my two concentrators is that the working
> one is using spi-gpio driver, the regressing one spi-sun6i.
>
> Two things stood out in spi-sun6i: It uses a completion (I do not run
> into its timeout warning!), and it uses clk_{get,set}_rate().
>
> Given that observed symptoms were CPU stalls, workqueue [freezes] and RCU
> problems, requiring a power-cycle to recover, I wonder whether we are
> running into some atomic/locking issue with clk_enable()? Is it valid at
> all to use SPI/regmap for clk_enable()? If it is, is there a known issue
> specific to spi-sun6i (A64) in 4.20.0?

I've now hacked together a test case: delaying the regmap operation to a
work queue (violating the .enable contract of a stable clk on return!)
and having our caller poll afterwards for the operation to finish. Guess
what, below gross hack makes it work again on both cards... :/

Is this hinting at an issue with spi-sun6i clk_get_rate()'s prepare_lock
vs. our clk_enable()'s enable_lock? I grep'ed around and spi-sun6i is
not the only SPI driver using clk_get_rate() in transfer_one...

Thanks for any hints,

Andreas


diff --git a/drivers/net/lora/sx125x.c b/drivers/net/lora/sx125x.c
index 597b882379ac..095ca40e5de7 100644
--- a/drivers/net/lora/sx125x.c
+++ b/drivers/net/lora/sx125x.c
@@ -11,6 +11,7 @@

#include <linux/clk.h>
#include <linux/clk-provider.h>
+#include <linux/delay.h>
#include <linux/module.h>
#include <linux/of.h>
#include <linux/of_device.h>
@@ -49,6 +50,9 @@ struct sx125x_priv {
struct device *dev;
struct regmap *regmap;
struct regmap_field
*regmap_fields[ARRAY_SIZE(sx125x_regmap_fields)];
+
+ struct workqueue_struct *clk_wq;
+ struct work_struct clk_out_enable_work;
};

#define to_clkout(_hw) container_of(_hw, struct sx125x_priv, clkout_hw)
@@ -81,8 +85,17 @@ static int sx125x_clkout_enable(struct clk_hw *hw)
{
struct sx125x_priv *priv = to_clkout(hw);

+ dev_info(priv->dev, "enabling clkout...\n");
+ queue_work(priv->clk_wq, &priv->clk_out_enable_work);
+ return 0;
+}
+
+static void sx125x_clk_out_enable_work_handler(struct work_struct *ws)
+{
+ struct sx125x_priv *priv = container_of(ws, struct sx125x_priv,
clk_out_enable_work);
+
dev_info(priv->dev, "enabling clkout\n");
- return sx125x_field_write(priv, F_CLK_OUT, 1);
+ sx125x_field_write(priv, F_CLK_OUT, 1);
}

static void sx125x_clkout_disable(struct clk_hw *hw)
@@ -230,6 +243,9 @@ static int __maybe_unused sx125x_regmap_probe(struct
device *dev, struct regmap
}
}

+ priv->clk_wq = alloc_workqueue("sx127x_wq", WQ_FREEZABLE |
WQ_MEM_RECLAIM, 0);
+ INIT_WORK(&priv->clk_out_enable_work,
sx125x_clk_out_enable_work_handler);
+
dev_info(dev, "SX125x module probed\n");

return 0;
@@ -237,6 +253,10 @@ static int __maybe_unused
sx125x_regmap_probe(struct device *dev, struct regmap

static int __maybe_unused sx125x_regmap_remove(struct device *dev)
{
+ struct sx125x_priv *priv = dev_get_drvdata(dev);
+
+ destroy_workqueue(priv->clk_wq);
+
dev_info(dev, "SX125x module removed\n");

return 0;
diff --git a/drivers/net/lora/sx130x.c b/drivers/net/lora/sx130x.c
index 7ac7de9eda46..4ae6699d38ad 100644
--- a/drivers/net/lora/sx130x.c
+++ b/drivers/net/lora/sx130x.c
@@ -11,6 +11,7 @@

#include <linux/bitops.h>
#include <linux/clk.h>
+#include <linux/clk-provider.h>
#include <linux/delay.h>
#include <linux/firmware.h>
#include <linux/lora.h>
@@ -391,6 +392,10 @@ static int sx130x_loradev_open(struct net_device
*netdev)
goto err_clk_enable;
}

+ do {
+ usleep_range(100, 1000);
+ } while (!__clk_is_enabled(priv->clk32m));
+
ret = sx130x_field_write(priv, F_GLOBAL_EN, 1);
if (ret) {
dev_err(priv->dev, "enable global clocks failed (%d)\n",
ret);


--
SUSE Linux GmbH, Maxfeldstr. 5, 90409 NÃrnberg, Germany
GF: Felix ImendÃrffer, Jane Smithard, Graham Norton
HRB 21284 (AG NÃrnberg)