Re: [PATCH 1/4] watchdog: at91sam9_wdt: better watchdog support

From: boris brezillon
Date: Thu Oct 03 2013 - 13:56:48 EST


Hi Guenter,

On 03/10/2013 19:31, Guenter Roeck wrote:
On Thu, Oct 03, 2013 at 02:19:18PM +0200, Boris BREZILLON wrote:
The at91sam9 watchdog timer can only be configured once, and the current
implementation tries to configure it in a static way:
- 2 seconds timeout
- wdt restart every 500ms

If the timer has already been configured with different values, it returns an
error and do not create any watchdog device.

This is not critical if the watchdog is disabled, but if it has been enabled with
different timeout values it will lead to a SoC reset.

This patch series tries to address this issue by adapting the heartbeat value
according the WDT timer config:
- it first tries to configure the timer as requested.
- if it fails it fallbacks to the current config, adapting its heartbeat timer
to the needs

This patch series also move to a dynamically allocated at91wdt device instead
of the static instance.

It adds a new at91 wdt type: software. This new type make use of the at91 wdt
interrupt to trigger a software reboot.

Finally it adds several properties to the device tree bindings.

Signed-off-by: Boris BREZILLON <b.brezillon@xxxxxxxxxxx>
Hi Boris,

deeper dive this time ...

---
drivers/watchdog/at91sam9_wdt.c | 300 ++++++++++++++++++++++++++++-----------
1 file changed, 217 insertions(+), 83 deletions(-)

diff --git a/drivers/watchdog/at91sam9_wdt.c b/drivers/watchdog/at91sam9_wdt.c
index be37dde..8f44528 100644
--- a/drivers/watchdog/at91sam9_wdt.c
+++ b/drivers/watchdog/at91sam9_wdt.c
@@ -19,11 +19,13 @@
#include <linux/errno.h>
#include <linux/init.h>
+#include <linux/interrupt.h>
#include <linux/io.h>
#include <linux/kernel.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/platform_device.h>
+#include <linux/reboot.h>
#include <linux/types.h>
#include <linux/watchdog.h>
#include <linux/jiffies.h>
@@ -31,22 +33,33 @@
#include <linux/bitops.h>
#include <linux/uaccess.h>
#include <linux/of.h>
+#include <linux/of_irq.h>
#include "at91sam9_wdt.h"
#define DRV_NAME "AT91SAM9 Watchdog"
-#define wdt_read(field) \
- __raw_readl(at91wdt_private.base + field)
-#define wdt_write(field, val) \
- __raw_writel((val), at91wdt_private.base + field)
+#define wdt_read(wdt, field) \
+ __raw_readl((wdt)->base + (field))
+#define wdt_write(wtd, field, val) \
+ __raw_writel((val), (wdt)->base + (field))
/* AT91SAM9 watchdog runs a 12bit counter @ 256Hz,
* use this to convert a watchdog
* value from/to milliseconds.
*/
-#define ms_to_ticks(t) (((t << 8) / 1000) - 1)
-#define ticks_to_ms(t) (((t + 1) * 1000) >> 8)
+#define ticks_to_hz_rounddown(t) ((((t) + 1) * HZ) >> 8)
+#define ticks_to_hz_roundup(t) (((((t) + 1) * HZ) + 255) >> 8)
+#define ticks_to_secs(t) (((t) + 1) >> 8)
+#define secs_to_ticks(s) (((s) << 8) - 1)
+
+#define WDT_MR_RESET 0x3FFF2FFF
+
+/* Watchdog max counter value in ticks */
+#define WDT_COUNTER_MAX_TICKS 0xFFF
+
+/* Watchdog max delta/value in secs */
+#define WDT_COUNTER_MAX_SECS ticks_to_secs(WDT_COUNTER_MAX_TICKS)
/* Hardware timeout in seconds */
#define WDT_HW_TIMEOUT 2
@@ -66,23 +79,40 @@ module_param(nowayout, bool, 0);
MODULE_PARM_DESC(nowayout, "Watchdog cannot be stopped once started "
"(default=" __MODULE_STRING(WATCHDOG_NOWAYOUT) ")");
-static struct watchdog_device at91_wdt_dev;
-static void at91_ping(unsigned long data);
-
-static struct {
+#define to_wdt(wdd) container_of(wdd, struct at91wdt, wdd)
+struct at91wdt {
+ struct watchdog_device wdd;
void __iomem *base;
unsigned long next_heartbeat; /* the next_heartbeat for the timer */
struct timer_list timer; /* The timer that pings the watchdog */
-} at91wdt_private;
+ u32 mr;
+ u32 mr_mask;
+ unsigned long heartbeat; /* WDT heartbeat in jiffies */
+ bool nowayout;
+ unsigned int irq;
+};
/* ......................................................................... */
+static irqreturn_t wdt_interrupt(int irq, void *dev_id)
+{
+ struct at91wdt *wdt = (struct at91wdt *)dev_id;
+
+ if (wdt_read(wdt, AT91_WDT_SR)) {
+ pr_crit("at91sam9 WDT software reset\n");
+ emergency_restart();
+ pr_crit("Reboot didn't ?????\n");
+ }
+
+ return IRQ_HANDLED;
+}
+
/*
* Reload the watchdog timer. (ie, pat the watchdog)
*/
-static inline void at91_wdt_reset(void)
+static inline void at91_wdt_reset(struct at91wdt *wdt)
{
- wdt_write(AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT);
+ wdt_write(wdt, AT91_WDT_CR, AT91_WDT_KEY | AT91_WDT_WDRSTT);
}
/*
@@ -90,26 +120,20 @@ static inline void at91_wdt_reset(void)
*/
static void at91_ping(unsigned long data)
{
- if (time_before(jiffies, at91wdt_private.next_heartbeat) ||
- (!watchdog_active(&at91_wdt_dev))) {
- at91_wdt_reset();
- mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT);
+ struct at91wdt *wdt = (struct at91wdt *)data;
+ if (time_before(jiffies, wdt->next_heartbeat) ||
+ !watchdog_active(&wdt->wdd)) {
+ at91_wdt_reset(wdt);
+ mod_timer(&wdt->timer, jiffies + wdt->heartbeat);
} else
pr_crit("I will reset your machine !\n");
}
static int at91_wdt_ping(struct watchdog_device *wdd)
{
+ struct at91wdt *wdt = to_wdt(wdd);
/* calculate when the next userspace timeout will be */
- at91wdt_private.next_heartbeat = jiffies + wdd->timeout * HZ;
- return 0;
-}
-
-static int at91_wdt_start(struct watchdog_device *wdd)
-{
- /* calculate the next userspace timeout and modify the timer */
- at91_wdt_ping(wdd);
- mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT);
+ wdt->next_heartbeat = jiffies + wdd->timeout * HZ;
return 0;
}
@@ -125,36 +149,84 @@ static int at91_wdt_set_timeout(struct watchdog_device *wdd, unsigned int new_ti
at91_wdt_set_timeout only sets the new timeout but does not update
next_heartbeat. That is ok if the timeout decreases, but if it is
increased (say, from 1 second to 1 minute) it might result in a missed
ping as userspace will subsequently believe that the new (larger)
timeout was accepted by the driver. So I think you should also call
at91_wdt_ping here or set wdt->next_heartbeat directly.
Absolutely, I'll call at91_wdt_ping (or start as it will be renamed).

return 0;
}
-/*
- * Set the watchdog time interval in 1/256Hz (write-once)
- * Counter is 12 bit.
- */
-static int at91_wdt_settimeout(unsigned int timeout)
+static int at91_wdt_init(struct platform_device *pdev, struct at91wdt *wdt)
{
- unsigned int reg;
- unsigned int mr;
-
- /* Check if disabled */
- mr = wdt_read(AT91_WDT_MR);
- if (mr & AT91_WDT_WDDIS) {
- pr_err("sorry, watchdog is disabled\n");
- return -EIO;
+ u32 tmp;
+ u32 delta;
+ u32 value;
+ int err;
+ u32 mask = wdt->mr_mask;
+ unsigned long min_heartbeat = 1;
+
+ tmp = wdt_read(wdt, AT91_WDT_MR);
+ if ((tmp & mask) != (wdt->mr & mask)) {
+ if (tmp == WDT_MR_RESET) {
+ wdt_write(wdt, AT91_WDT_MR, wdt->mr);
+ tmp = wdt_read(wdt, AT91_WDT_MR);
+ }
}
- /*
- * All counting occurs at SLOW_CLOCK / 128 = 256 Hz
- *
- * Since WDV is a 12-bit counter, the maximum period is
- * 4096 / 256 = 16 seconds.
- */
- reg = AT91_WDT_WDRSTEN /* causes watchdog reset */
- /* | AT91_WDT_WDRPROC causes processor reset only */
- | AT91_WDT_WDDBGHLT /* disabled in debug mode */
- | AT91_WDT_WDD /* restart at any time */
- | (timeout & AT91_WDT_WDV); /* timer value */
- wdt_write(AT91_WDT_MR, reg);
+ if (tmp & AT91_WDT_WDDIS) {
+ if (wdt->mr & AT91_WDT_WDDIS)
+ return 0;
+ dev_err(wdt->wdd.parent, "watchdog is disabled\n");
Took me a while to get why you use "wdt->wdd.parent" :).
I would just have passed the 'dev' argument. Your call, though.

I can't use wdt->wdd.dev, because the watchdog device is not registered yet,
but I can use &pdev->dev instead.
What do you prefer ?

+ return -EINVAL;
+ }
+
+ value = tmp & AT91_WDT_WDV;
+ delta = (tmp & AT91_WDT_WDD) >> 16;
+
+ if (delta < value)
+ min_heartbeat = ticks_to_hz_roundup(value - delta);
+
+ wdt->heartbeat = ticks_to_hz_rounddown(value);
+ if (!wdt->heartbeat) {
+ dev_err(wdt->wdd.parent,
+ "heartbeat is too small for the system to handle it correctly\n");
+ return -EINVAL;
+ }
+
+ if (wdt->heartbeat < min_heartbeat + 3) {
+ wdt->heartbeat = min_heartbeat;
+ dev_warn(wdt->wdd.parent,
+ "min heartbeat and max heartbeat might be too close for the system to handle it correctly\n");
+ if (wdt->heartbeat < 4)
+ dev_warn(wdt->wdd.parent,
+ "heartbeat might be too small for the system to handle it correctly\n");
+ } else
+ wdt->heartbeat -= 4;
That means heartbeat can be 0, correct (if min_heartbeat == 1) ?
Yes, this is a mistake. It should be

if (wdt->heartbeat < min_heartbeat + 4) {
...
}

Will that work ?
Absolutely not :)
I'll fix it.


+
+ if ((tmp & AT91_WDT_WDFIEN) && wdt->irq) {
+ err = request_irq(wdt->irq, wdt_interrupt,
+ IRQF_SHARED | IRQF_IRQPOLL,
+ pdev->name, wdt);
+ if (err)
+ return err;
+ }
+
+ if ((tmp & wdt->mr_mask) != (wdt->mr & wdt->mr_mask))
+ dev_warn(wdt->wdd.parent,
+ "watchdog already configured differently (mr = %x expecting %x)\n",
+ tmp & wdt->mr_mask, wdt->mr & wdt->mr_mask);
+
+ setup_timer(&wdt->timer, at91_ping, (unsigned long)wdt);
+ mod_timer(&wdt->timer, jiffies + wdt->heartbeat);
+
+ /* Try to set timeout from device tree first */
+ if (watchdog_init_timeout(&wdt->wdd, 0, &pdev->dev))
+ watchdog_init_timeout(&wdt->wdd, heartbeat, &pdev->dev);
+ watchdog_set_nowayout(&wdt->wdd, wdt->nowayout);
+ err = watchdog_register_device(&wdt->wdd);
+ if (err)
+ goto out_stop_timer;
+
+ wdt->next_heartbeat = jiffies + wdt->wdd.timeout * HZ;
return 0;
+
+out_stop_timer:
+ del_timer(&wdt->timer);
+ return err;
}
/* ......................................................................... */
@@ -167,63 +239,125 @@ static const struct watchdog_info at91_wdt_info = {
static const struct watchdog_ops at91_wdt_ops = {
.owner = THIS_MODULE,
- .start = at91_wdt_start,
+ .start = at91_wdt_ping,
.stop = at91_wdt_stop,
.ping = at91_wdt_ping,
Notpick, but you don't need both start and ping. If both are the same, providing
start is sufficient. Given this, it might be cleaner to name the function
at91_wdt_start and not provide ping.
I'll change this too.

Thanks, for catching these bugs.

Regards,

Boris

.set_timeout = at91_wdt_set_timeout,
};
-static struct watchdog_device at91_wdt_dev = {
- .info = &at91_wdt_info,
- .ops = &at91_wdt_ops,
- .timeout = WDT_HEARTBEAT,
- .min_timeout = 1,
- .max_timeout = 0xFFFF,
-};
+#if defined(CONFIG_OF)
+static int of_at91wdt_init(struct device_node *np, struct at91wdt *wdt)
+{
+ u32 min = 0;
+ u32 max = WDT_COUNTER_MAX_SECS;
+ const char *tmp;
+
+ /* Get the interrupts property */
+ wdt->irq = irq_of_parse_and_map(np, 0);
+ if (!wdt->irq)
+ dev_warn(wdt->wdd.parent, "failed to get IRQ from DT\n");
+
+ if (!of_property_read_u32_index(np, "atmel,max-heartbeat-sec", 0,
+ &max)) {
+ if (!max || max > WDT_COUNTER_MAX_SECS)
+ max = WDT_COUNTER_MAX_SECS;
+
+ if (!of_property_read_u32_index(np, "atmel,min-heartbeat-sec",
+ 0, &min)) {
+ if (min >= max)
+ min = max - 1;
+ }
+ }
+
+ min = secs_to_ticks(min);
+ max = secs_to_ticks(max);
+
+ wdt->mr_mask = 0x3FFFFFFF;
+ wdt->mr = 0;
+ if (!of_property_read_string(np, "atmel,watchdog-type", &tmp) &&
+ !strcmp(tmp, "software")) {
+ wdt->mr |= AT91_WDT_WDFIEN;
+ wdt->mr_mask &= ~AT91_WDT_WDRPROC;
+ } else
+ wdt->mr |= AT91_WDT_WDRSTEN;
+
+ if (!of_property_read_string(np, "atmel,reset-type", &tmp) &&
+ !strcmp(tmp, "proc"))
+ wdt->mr |= AT91_WDT_WDRPROC;
+
+ if (of_property_read_bool(np, "atmel,disable")) {
+ wdt->mr |= AT91_WDT_WDDIS;
+ wdt->mr_mask &= AT91_WDT_WDDIS;
+ }
+
+ if (of_property_read_bool(np, "atmel,idle-halt"))
+ wdt->mr |= AT91_WDT_WDIDLEHLT;
+
+ if (of_property_read_bool(np, "atmel,dbg-halt"))
+ wdt->mr |= AT91_WDT_WDDBGHLT;
+
+ wdt->mr |= max | ((max - min) << 16);
+
+ return 0;
+}
+#else
+static inline int of_at91wdt_init(struct device_node *np, struct at91wdt *wdt)
+{
+ return 0;
+}
+#endif
static int __init at91wdt_probe(struct platform_device *pdev)
{
struct resource *r;
- int res;
+ int err;
+ struct at91wdt *wdt;
- r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (!r)
- return -ENODEV;
- at91wdt_private.base = ioremap(r->start, resource_size(r));
- if (!at91wdt_private.base) {
- dev_err(&pdev->dev, "failed to map registers, aborting.\n");
+ wdt = devm_kzalloc(&pdev->dev, sizeof(*wdt), GFP_KERNEL);
+ if (!wdt)
return -ENOMEM;
- }
- at91_wdt_dev.parent = &pdev->dev;
- watchdog_init_timeout(&at91_wdt_dev, heartbeat, &pdev->dev);
- watchdog_set_nowayout(&at91_wdt_dev, nowayout);
+ wdt->mr = (WDT_HW_TIMEOUT * 256) | AT91_WDT_WDRSTEN | AT91_WDT_WDD |
+ AT91_WDT_WDDBGHLT | AT91_WDT_WDIDLEHLT;
+ wdt->mr_mask = 0x3FFFFFFF;
+ wdt->nowayout = nowayout;
+ wdt->wdd.parent = &pdev->dev;
+ wdt->wdd.info = &at91_wdt_info;
+ wdt->wdd.ops = &at91_wdt_ops;
+ wdt->wdd.timeout = WDT_HEARTBEAT;
+ wdt->wdd.min_timeout = 1;
+ wdt->wdd.max_timeout = 0xFFFF;
- /* Set watchdog */
- res = at91_wdt_settimeout(ms_to_ticks(WDT_HW_TIMEOUT * 1000));
- if (res)
- return res;
+ r = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+ wdt->base = devm_ioremap_resource(&pdev->dev, r);
+ if (IS_ERR(wdt->base))
+ return PTR_ERR(wdt->base);
+
+ if (pdev->dev.of_node) {
+ err = of_at91wdt_init(pdev->dev.of_node, wdt);
+ if (err)
+ return err;
+ }
- res = watchdog_register_device(&at91_wdt_dev);
- if (res)
- return res;
+ err = at91_wdt_init(pdev, wdt);
+ if (err)
+ return err;
- at91wdt_private.next_heartbeat = jiffies + at91_wdt_dev.timeout * HZ;
- setup_timer(&at91wdt_private.timer, at91_ping, 0);
- mod_timer(&at91wdt_private.timer, jiffies + WDT_TIMEOUT);
+ platform_set_drvdata(pdev, wdt);
pr_info("enabled (heartbeat=%d sec, nowayout=%d)\n",
- at91_wdt_dev.timeout, nowayout);
+ wdt->wdd.timeout, wdt->nowayout);
return 0;
}
static int __exit at91wdt_remove(struct platform_device *pdev)
{
- watchdog_unregister_device(&at91_wdt_dev);
+ struct at91wdt *wdt = platform_get_drvdata(pdev);
+ watchdog_unregister_device(&wdt->wdd);
pr_warn("I quit now, hardware will probably reboot!\n");
- del_timer(&at91wdt_private.timer);
+ del_timer(&wdt->timer);
return 0;
}
--
1.7.9.5

--
To unsubscribe from this list: send the line "unsubscribe linux-watchdog" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/