[PATCH 3.10 221/319] ipv6: move DAD and addrconf_verify processing to workqueue

From: Willy Tarreau
Date: Sun Feb 05 2017 - 15:08:43 EST


From: Hannes Frederic Sowa <hannes@xxxxxxxxxxxxxxxxxxx>

commit c15b1ccadb323ea50023e8f1cca2954129a62b51 upstream.

addrconf_join_solict and addrconf_join_anycast may cause actions which
need rtnl locked, especially on first address creation.

A new DAD state is introduced which defers processing of the initial
DAD processing into a workqueue.

To get rtnl lock we need to push the code paths which depend on those
calls up to workqueues, specifically addrconf_verify and the DAD
processing.

(v2)
addrconf_dad_failure needs to be queued up to the workqueue, too. This
patch introduces a new DAD state and stop the DAD processing in the
workqueue (this is because of the possible ipv6_del_addr processing
which removes the solicited multicast address from the device).

addrconf_verify_lock is removed, too. After the transition it is not
needed any more.

As we are not processing in bottom half anymore we need to be a bit more
careful about disabling bottom half out when we lock spin_locks which are also
used in bh.

Relevant backtrace:
[ 541.030090] RTNL: assertion failed at net/core/dev.c (4496)
[ 541.031143] CPU: 0 PID: 0 Comm: swapper/0 Tainted: G O 3.10.33-1-amd64-vyatta #1
[ 541.031145] Hardware name: Bochs Bochs, BIOS Bochs 01/01/2007
[ 541.031146] ffffffff8148a9f0 000000000000002f ffffffff813c98c1 ffff88007c4451f8
[ 541.031148] 0000000000000000 0000000000000000 ffffffff813d3540 ffff88007fc03d18
[ 541.031150] 0000880000000006 ffff88007c445000 ffffffffa0194160 0000000000000000
[ 541.031152] Call Trace:
[ 541.031153] <IRQ> [<ffffffff8148a9f0>] ? dump_stack+0xd/0x17
[ 541.031180] [<ffffffff813c98c1>] ? __dev_set_promiscuity+0x101/0x180
[ 541.031183] [<ffffffff813d3540>] ? __hw_addr_create_ex+0x60/0xc0
[ 541.031185] [<ffffffff813cfe1a>] ? __dev_set_rx_mode+0xaa/0xc0
[ 541.031189] [<ffffffff813d3a81>] ? __dev_mc_add+0x61/0x90
[ 541.031198] [<ffffffffa01dcf9c>] ? igmp6_group_added+0xfc/0x1a0 [ipv6]
[ 541.031208] [<ffffffff8111237b>] ? kmem_cache_alloc+0xcb/0xd0
[ 541.031212] [<ffffffffa01ddcd7>] ? ipv6_dev_mc_inc+0x267/0x300 [ipv6]
[ 541.031216] [<ffffffffa01c2fae>] ? addrconf_join_solict+0x2e/0x40 [ipv6]
[ 541.031219] [<ffffffffa01ba2e9>] ? ipv6_dev_ac_inc+0x159/0x1f0 [ipv6]
[ 541.031223] [<ffffffffa01c0772>] ? addrconf_join_anycast+0x92/0xa0 [ipv6]
[ 541.031226] [<ffffffffa01c311e>] ? __ipv6_ifa_notify+0x11e/0x1e0 [ipv6]
[ 541.031229] [<ffffffffa01c3213>] ? ipv6_ifa_notify+0x33/0x50 [ipv6]
[ 541.031233] [<ffffffffa01c36c8>] ? addrconf_dad_completed+0x28/0x100 [ipv6]
[ 541.031241] [<ffffffff81075c1d>] ? task_cputime+0x2d/0x50
[ 541.031244] [<ffffffffa01c38d6>] ? addrconf_dad_timer+0x136/0x150 [ipv6]
[ 541.031247] [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]
[ 541.031255] [<ffffffff8105313a>] ? call_timer_fn.isra.22+0x2a/0x90
[ 541.031258] [<ffffffffa01c37a0>] ? addrconf_dad_completed+0x100/0x100 [ipv6]

Hunks and backtrace stolen from a patch by Stephen Hemminger.

Reported-by: Stephen Hemminger <stephen@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Stephen Hemminger <stephen@xxxxxxxxxxxxxxxxxx>
Signed-off-by: Hannes Frederic Sowa <hannes@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Cc: <stable@xxxxxxxxxxxxxxx>
Cc: <stable@xxxxxxxxxxxxxxx>
[Mike Manning <mmanning@xxxxxxxxxxx>: resolved minor conflicts in addrconf.c]
Signed-off-by: Mike Manning <mmanning@xxxxxxxxxxx>
Signed-off-by: Willy Tarreau <w@xxxxxx>
---
include/net/if_inet6.h | 4 +-
net/ipv6/addrconf.c | 186 ++++++++++++++++++++++++++++++++++++-------------
2 files changed, 141 insertions(+), 49 deletions(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 3b558c6..a49b650 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -31,8 +31,10 @@
#define IF_PREFIX_AUTOCONF 0x02

enum {
+ INET6_IFADDR_STATE_PREDAD,
INET6_IFADDR_STATE_DAD,
INET6_IFADDR_STATE_POSTDAD,
+ INET6_IFADDR_STATE_ERRDAD,
INET6_IFADDR_STATE_UP,
INET6_IFADDR_STATE_DEAD,
};
@@ -58,7 +60,7 @@ struct inet6_ifaddr {
unsigned long cstamp; /* created timestamp */
unsigned long tstamp; /* updated timestamp */

- struct timer_list dad_timer;
+ struct delayed_work dad_work;

struct inet6_dev *idev;
struct rt6_info *rt;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 4ff6a9c..98dd353 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -139,10 +139,12 @@ static int ipv6_count_addresses(struct inet6_dev *idev);
static struct hlist_head inet6_addr_lst[IN6_ADDR_HSIZE];
static DEFINE_SPINLOCK(addrconf_hash_lock);

-static void addrconf_verify(unsigned long);
+static void addrconf_verify(void);
+static void addrconf_verify_rtnl(void);
+static void addrconf_verify_work(struct work_struct *);

-static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0);
-static DEFINE_SPINLOCK(addrconf_verify_lock);
+static struct workqueue_struct *addrconf_wq;
+static DECLARE_DELAYED_WORK(addr_chk_work, addrconf_verify_work);

static void addrconf_join_anycast(struct inet6_ifaddr *ifp);
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp);
@@ -157,7 +159,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx,
u32 flags, u32 noflags);

static void addrconf_dad_start(struct inet6_ifaddr *ifp);
-static void addrconf_dad_timer(unsigned long data);
+static void addrconf_dad_work(struct work_struct *w);
static void addrconf_dad_completed(struct inet6_ifaddr *ifp);
static void addrconf_dad_run(struct inet6_dev *idev);
static void addrconf_rs_timer(unsigned long data);
@@ -259,9 +261,9 @@ static void addrconf_del_rs_timer(struct inet6_dev *idev)
__in6_dev_put(idev);
}

-static void addrconf_del_dad_timer(struct inet6_ifaddr *ifp)
+static void addrconf_del_dad_work(struct inet6_ifaddr *ifp)
{
- if (del_timer(&ifp->dad_timer))
+ if (cancel_delayed_work(&ifp->dad_work))
__in6_ifa_put(ifp);
}

@@ -273,12 +275,12 @@ static void addrconf_mod_rs_timer(struct inet6_dev *idev,
mod_timer(&idev->rs_timer, jiffies + when);
}

-static void addrconf_mod_dad_timer(struct inet6_ifaddr *ifp,
- unsigned long when)
+static void addrconf_mod_dad_work(struct inet6_ifaddr *ifp,
+ unsigned long delay)
{
- if (!timer_pending(&ifp->dad_timer))
+ if (!delayed_work_pending(&ifp->dad_work))
in6_ifa_hold(ifp);
- mod_timer(&ifp->dad_timer, jiffies + when);
+ mod_delayed_work(addrconf_wq, &ifp->dad_work, delay);
}

static int snmp6_alloc_dev(struct inet6_dev *idev)
@@ -773,8 +775,9 @@ void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp)

in6_dev_put(ifp->idev);

- if (del_timer(&ifp->dad_timer))
- pr_notice("Timer is still running, when freeing ifa=%p\n", ifp);
+ if (cancel_delayed_work(&ifp->dad_work))
+ pr_notice("delayed DAD work was pending while freeing ifa=%p\n",
+ ifp);

if (ifp->state != INET6_IFADDR_STATE_DEAD) {
pr_warn("Freeing alive inet6 address %p\n", ifp);
@@ -866,8 +869,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen,

spin_lock_init(&ifa->lock);
spin_lock_init(&ifa->state_lock);
- setup_timer(&ifa->dad_timer, addrconf_dad_timer,
- (unsigned long)ifa);
+ INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work);
INIT_HLIST_NODE(&ifa->addr_lst);
ifa->scope = scope;
ifa->prefix_len = pfxlen;
@@ -927,6 +929,8 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
int deleted = 0, onlink = 0;
unsigned long expires = jiffies;

+ ASSERT_RTNL();
+
spin_lock_bh(&ifp->state_lock);
state = ifp->state;
ifp->state = INET6_IFADDR_STATE_DEAD;
@@ -991,7 +995,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp)
}
write_unlock_bh(&idev->lock);

- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);

ipv6_ifa_notify(RTM_DELADDR, ifp);

@@ -1614,7 +1618,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
{
if (ifp->flags&IFA_F_PERMANENT) {
spin_lock_bh(&ifp->lock);
- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);
ifp->flags |= IFA_F_TENTATIVE;
if (dad_failed)
ifp->flags |= IFA_F_DADFAILED;
@@ -1637,20 +1641,21 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
}
ipv6_del_addr(ifp);
#endif
- } else
+ } else {
ipv6_del_addr(ifp);
+ }
}

static int addrconf_dad_end(struct inet6_ifaddr *ifp)
{
int err = -ENOENT;

- spin_lock(&ifp->state_lock);
+ spin_lock_bh(&ifp->state_lock);
if (ifp->state == INET6_IFADDR_STATE_DAD) {
ifp->state = INET6_IFADDR_STATE_POSTDAD;
err = 0;
}
- spin_unlock(&ifp->state_lock);
+ spin_unlock_bh(&ifp->state_lock);

return err;
}
@@ -1683,7 +1688,12 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp)
}
}

- addrconf_dad_stop(ifp, 1);
+ spin_lock_bh(&ifp->state_lock);
+ /* transition from _POSTDAD to _ERRDAD */
+ ifp->state = INET6_IFADDR_STATE_ERRDAD;
+ spin_unlock_bh(&ifp->state_lock);
+
+ addrconf_mod_dad_work(ifp, 0);
}

/* Join to solicited addr multicast group. */
@@ -1692,6 +1702,8 @@ void addrconf_join_solict(struct net_device *dev, const struct in6_addr *addr)
{
struct in6_addr maddr;

+ ASSERT_RTNL();
+
if (dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;

@@ -1703,6 +1715,8 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
{
struct in6_addr maddr;

+ ASSERT_RTNL();
+
if (idev->dev->flags&(IFF_LOOPBACK|IFF_NOARP))
return;

@@ -1713,6 +1727,9 @@ void addrconf_leave_solict(struct inet6_dev *idev, const struct in6_addr *addr)
static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
+
+ ASSERT_RTNL();
+
if (ifp->prefix_len == 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -1724,6 +1741,9 @@ static void addrconf_join_anycast(struct inet6_ifaddr *ifp)
static void addrconf_leave_anycast(struct inet6_ifaddr *ifp)
{
struct in6_addr addr;
+
+ ASSERT_RTNL();
+
if (ifp->prefix_len == 127) /* RFC 6164 */
return;
ipv6_addr_prefix(&addr, &ifp->addr, ifp->prefix_len);
@@ -2358,7 +2378,7 @@ ok:
}
#endif
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify();
}
}
inet6_prefix_notify(RTM_NEWPREFIX, in6_dev, pinfo);
@@ -2501,7 +2521,7 @@ static int inet6_addr_add(struct net *net, int ifindex, const struct in6_addr *p
*/
addrconf_dad_start(ifp);
in6_ifa_put(ifp);
- addrconf_verify(0);
+ addrconf_verify_rtnl();
return 0;
}

@@ -3082,7 +3102,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
hlist_for_each_entry_rcu(ifa, h, addr_lst) {
if (ifa->idev == idev) {
hlist_del_init_rcu(&ifa->addr_lst);
- addrconf_del_dad_timer(ifa);
+ addrconf_del_dad_work(ifa);
goto restart;
}
}
@@ -3122,7 +3142,7 @@ static int addrconf_ifdown(struct net_device *dev, int how)
while (!list_empty(&idev->addr_list)) {
ifa = list_first_entry(&idev->addr_list,
struct inet6_ifaddr, if_list);
- addrconf_del_dad_timer(ifa);
+ addrconf_del_dad_work(ifa);

list_del(&ifa->if_list);

@@ -3217,10 +3237,10 @@ static void addrconf_dad_kick(struct inet6_ifaddr *ifp)
rand_num = net_random() % (idev->cnf.rtr_solicit_delay ? : 1);

ifp->dad_probes = idev->cnf.dad_transmits;
- addrconf_mod_dad_timer(ifp, rand_num);
+ addrconf_mod_dad_work(ifp, rand_num);
}

-static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+static void addrconf_dad_begin(struct inet6_ifaddr *ifp)
{
struct inet6_dev *idev = ifp->idev;
struct net_device *dev = idev->dev;
@@ -3272,25 +3292,68 @@ out:
read_unlock_bh(&idev->lock);
}

-static void addrconf_dad_timer(unsigned long data)
+static void addrconf_dad_start(struct inet6_ifaddr *ifp)
+{
+ bool begin_dad = false;
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state != INET6_IFADDR_STATE_DEAD) {
+ ifp->state = INET6_IFADDR_STATE_PREDAD;
+ begin_dad = true;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (begin_dad)
+ addrconf_mod_dad_work(ifp, 0);
+}
+
+static void addrconf_dad_work(struct work_struct *w)
{
- struct inet6_ifaddr *ifp = (struct inet6_ifaddr *) data;
+ struct inet6_ifaddr *ifp = container_of(to_delayed_work(w),
+ struct inet6_ifaddr,
+ dad_work);
struct inet6_dev *idev = ifp->idev;
struct in6_addr mcaddr;

+ enum {
+ DAD_PROCESS,
+ DAD_BEGIN,
+ DAD_ABORT,
+ } action = DAD_PROCESS;
+
+ rtnl_lock();
+
+ spin_lock_bh(&ifp->state_lock);
+ if (ifp->state == INET6_IFADDR_STATE_PREDAD) {
+ action = DAD_BEGIN;
+ ifp->state = INET6_IFADDR_STATE_DAD;
+ } else if (ifp->state == INET6_IFADDR_STATE_ERRDAD) {
+ action = DAD_ABORT;
+ ifp->state = INET6_IFADDR_STATE_POSTDAD;
+ }
+ spin_unlock_bh(&ifp->state_lock);
+
+ if (action == DAD_BEGIN) {
+ addrconf_dad_begin(ifp);
+ goto out;
+ } else if (action == DAD_ABORT) {
+ addrconf_dad_stop(ifp, 1);
+ goto out;
+ }
+
if (!ifp->dad_probes && addrconf_dad_end(ifp))
goto out;

- write_lock(&idev->lock);
+ write_lock_bh(&idev->lock);
if (idev->dead || !(idev->if_flags & IF_READY)) {
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}

spin_lock(&ifp->lock);
if (ifp->state == INET6_IFADDR_STATE_DEAD) {
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);
goto out;
}

@@ -3301,7 +3364,7 @@ static void addrconf_dad_timer(unsigned long data)

ifp->flags &= ~(IFA_F_TENTATIVE|IFA_F_OPTIMISTIC|IFA_F_DADFAILED);
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);

addrconf_dad_completed(ifp);

@@ -3309,15 +3372,16 @@ static void addrconf_dad_timer(unsigned long data)
}

ifp->dad_probes--;
- addrconf_mod_dad_timer(ifp, ifp->idev->nd_parms->retrans_time);
+ addrconf_mod_dad_work(ifp, ifp->idev->nd_parms->retrans_time);
spin_unlock(&ifp->lock);
- write_unlock(&idev->lock);
+ write_unlock_bh(&idev->lock);

/* send a neighbour solicitation for our addr */
addrconf_addr_solict_mult(&ifp->addr, &mcaddr);
ndisc_send_ns(ifp->idev->dev, NULL, &ifp->addr, &mcaddr, &in6addr_any);
out:
in6_ifa_put(ifp);
+ rtnl_unlock();
}

static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
@@ -3325,7 +3389,7 @@ static void addrconf_dad_completed(struct inet6_ifaddr *ifp)
struct net_device *dev = ifp->idev->dev;
struct in6_addr lladdr;

- addrconf_del_dad_timer(ifp);
+ addrconf_del_dad_work(ifp);

/*
* Configure the address for reception. Now it is valid.
@@ -3557,23 +3621,23 @@ int ipv6_chk_home_addr(struct net *net, const struct in6_addr *addr)
* Periodic address status verification
*/

-static void addrconf_verify(unsigned long foo)
+static void addrconf_verify_rtnl(void)
{
unsigned long now, next, next_sec, next_sched;
struct inet6_ifaddr *ifp;
int i;

+ ASSERT_RTNL();
+
rcu_read_lock_bh();
- spin_lock(&addrconf_verify_lock);
now = jiffies;
next = round_jiffies_up(now + ADDR_CHECK_FREQUENCY);

- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);

for (i = 0; i < IN6_ADDR_HSIZE; i++) {
restart:
- hlist_for_each_entry_rcu_bh(ifp,
- &inet6_addr_lst[i], addr_lst) {
+ hlist_for_each_entry_rcu_bh(ifp, &inet6_addr_lst[i], addr_lst) {
unsigned long age;

if (ifp->flags & IFA_F_PERMANENT)
@@ -3664,13 +3728,22 @@ restart:

ADBG((KERN_DEBUG "now = %lu, schedule = %lu, rounded schedule = %lu => %lu\n",
now, next, next_sec, next_sched));
-
- addr_chk_timer.expires = next_sched;
- add_timer(&addr_chk_timer);
- spin_unlock(&addrconf_verify_lock);
+ mod_delayed_work(addrconf_wq, &addr_chk_work, next_sched - now);
rcu_read_unlock_bh();
}

+static void addrconf_verify_work(struct work_struct *w)
+{
+ rtnl_lock();
+ addrconf_verify_rtnl();
+ rtnl_unlock();
+}
+
+static void addrconf_verify(void)
+{
+ mod_delayed_work(addrconf_wq, &addr_chk_work, 0);
+}
+
static struct in6_addr *extract_addr(struct nlattr *addr, struct nlattr *local)
{
struct in6_addr *pfx = NULL;
@@ -3722,6 +3795,8 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,
clock_t expires;
unsigned long timeout;

+ ASSERT_RTNL();
+
if (!valid_lft || (prefered_lft > valid_lft))
return -EINVAL;

@@ -3755,7 +3830,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags,

addrconf_prefix_route(&ifp->addr, ifp->prefix_len, ifp->idev->dev,
expires, flags);
- addrconf_verify(0);
+ addrconf_verify_rtnl();

return 0;
}
@@ -4364,6 +4439,8 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
bool update_rs = false;
struct in6_addr ll_addr;

+ ASSERT_RTNL();
+
if (token == NULL)
return -EINVAL;
if (ipv6_addr_any(token))
@@ -4409,6 +4486,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token)
}

write_unlock_bh(&idev->lock);
+ addrconf_verify_rtnl();
return 0;
}

@@ -4610,6 +4688,9 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
{
struct net *net = dev_net(ifp->idev->dev);

+ if (event)
+ ASSERT_RTNL();
+
inet6_ifa_notify(event ? : RTM_NEWADDR, ifp);

switch (event) {
@@ -5138,6 +5219,12 @@ int __init addrconf_init(void)
if (err < 0)
goto out_addrlabel;

+ addrconf_wq = create_workqueue("ipv6_addrconf");
+ if (!addrconf_wq) {
+ err = -ENOMEM;
+ goto out_nowq;
+ }
+
/* The addrconf netdev notifier requires that loopback_dev
* has it's ipv6 private information allocated and setup
* before it can bring up and give link-local addresses
@@ -5168,7 +5255,7 @@ int __init addrconf_init(void)

register_netdevice_notifier(&ipv6_dev_notf);

- addrconf_verify(0);
+ addrconf_verify();

err = rtnl_af_register(&inet6_ops);
if (err < 0)
@@ -5199,6 +5286,8 @@ errout:
errout_af:
unregister_netdevice_notifier(&ipv6_dev_notf);
errlo:
+ destroy_workqueue(addrconf_wq);
+out_nowq:
unregister_pernet_subsys(&addrconf_ops);
out_addrlabel:
ipv6_addr_label_cleanup();
@@ -5234,7 +5323,8 @@ void addrconf_cleanup(void)
for (i = 0; i < IN6_ADDR_HSIZE; i++)
WARN_ON(!hlist_empty(&inet6_addr_lst[i]));
spin_unlock_bh(&addrconf_hash_lock);
-
- del_timer(&addr_chk_timer);
+ cancel_delayed_work(&addr_chk_work);
rtnl_unlock();
+
+ destroy_workqueue(addrconf_wq);
}
--
2.8.0.rc2.1.gbe9624a