Re: [PATCH net] ax25: Fix refcount leak issues of ax25_dev

From: Dan Carpenter
Date: Tue May 07 2024 - 04:08:29 EST


On Mon, May 06, 2024 at 11:18:06PM -0400, Lars Kellogg-Stedman wrote:
> On Sat, May 04, 2024 at 06:16:14PM GMT, Lars Kellogg-Stedman wrote:
> > My original patch corrected this by adding the call to netdev_hold()
> > right next to the ax25_cb_add() in ax25_rcv(), which solves this
> > problem. If it seems weird to have this login in ax25_rcv, we could move
> > it to ax25_accept, right around line 1430 [3]; that would look
> > something like:
>
> The same patch applies cleanly against the Raspberry Pi 6.6.30 kernel,
> and clears up the frequeny crashes I was experiencing in that
> environment as well.

I have reviewed this code some more. My theory is:

ax25_dev_device_up() <- sets refcount to 1
ax25_dev_device_down() <- set refcount to 0 and frees it

If the refcount is not 1 at ax25_dev_device_down() then something is
screwed up. So why do we even need more refcounting than that? But
apparently we do. I don't really understand networking that well so
maybe we can have lingering connections after the device is down.

So the next rule is if we set ax25->ax25_dev from NULL to non-NULL then
bump the refcount and decrement it if we set it back to NULL or we free
ax25. Right now that's happening in ax25_bind() and ax25_release(). And
also in ax25_kill_by_device() but not consistently.

But it needs to happen *everywhere* we set ax25->ax25_dev and we need to
decrement it when ax25 is freed in ax25_cb_put(). My patch is similar
to yours in that every ax25_rcv() will now bump the reference through
calling ax25_fillin_cb() or ax25_make_new(). The send path also bumps
the reference.

There are a few questions I don't know how to answer. I added two
-EBUSY paths to this patch. I'm not sure if this is correct.
Second, I don't understand the netdev_put(ax25_dev->dev, &s->dev_tracker);
stuff. Maybe that should be done in ax25_dev_hold/put().

This patch might not work because of the netdev_hold/put() thing...

I used the Smatch cross function database to show where ax25->ax25_dev
is set to NULL/non-NULL.

$ smdb.py where ax25_cb ax25_dev | grep -v "min-max"
net/ax25/ax25_route.c | ax25_rt_autobind | (struct ax25_cb)->ax25_dev | 0-u64max
net/ax25/af_ax25.c | ax25_kill_by_device | (struct ax25_cb)->ax25_dev | 0-u64max
net/ax25/af_ax25.c | ax25_fillin_cb | (struct ax25_cb)->ax25_dev | 0-u64max
net/ax25/af_ax25.c | ax25_setsockopt | (struct ax25_cb)->ax25_dev | 0-u64max
net/ax25/af_ax25.c | ax25_make_new | (struct ax25_cb)->ax25_dev | 0-u64max
net/ax25/af_ax25.c | ax25_bind | (struct ax25_cb)->ax25_dev | 4096-ptr_max
net/ax25/ax25_in.c | ax25_rcv | (struct ax25_cb)->ax25_dev | 0-u64max
net/ax25/ax25_out.c | ax25_send_frame | (struct ax25_cb)->ax25_dev | 0-u64max

regards,
dan carpenter

diff --git a/include/net/ax25.h b/include/net/ax25.h
index eb9cee8252c8..2cc94352b13d 100644
--- a/include/net/ax25.h
+++ b/include/net/ax25.h
@@ -275,25 +275,30 @@ static inline struct ax25_cb *sk_to_ax25(const struct sock *sk)
#define ax25_cb_hold(__ax25) \
refcount_inc(&((__ax25)->refcount))

-static __inline__ void ax25_cb_put(ax25_cb *ax25)
+static inline ax25_dev *ax25_dev_hold(ax25_dev *ax25_dev)
{
- if (refcount_dec_and_test(&ax25->refcount)) {
- kfree(ax25->digipeat);
- kfree(ax25);
- }
+ if (ax25_dev)
+ refcount_inc(&ax25_dev->refcount);
+ return ax25_dev;
}

-static inline void ax25_dev_hold(ax25_dev *ax25_dev)
+static inline void ax25_dev_put(ax25_dev *ax25_dev)
{
- refcount_inc(&ax25_dev->refcount);
+ if (ax25_dev && refcount_dec_and_test(&ax25_dev->refcount)) {
+ kfree(ax25_dev);
+ }
}

-static inline void ax25_dev_put(ax25_dev *ax25_dev)
+static __inline__ void ax25_cb_put(ax25_cb *ax25)
{
- if (refcount_dec_and_test(&ax25_dev->refcount)) {
- kfree(ax25_dev);
+ if (refcount_dec_and_test(&ax25->refcount)) {
+ if (ax25->ax25_dev)
+ ax25_dev_put(ax25->ax25_dev);
+ kfree(ax25->digipeat);
+ kfree(ax25);
}
}
+
static inline __be16 ax25_type_trans(struct sk_buff *skb, struct net_device *dev)
{
skb->dev = dev;
diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c
index 9169efb2f43a..4d1ab296d52c 100644
--- a/net/ax25/af_ax25.c
+++ b/net/ax25/af_ax25.c
@@ -92,6 +92,7 @@ static void ax25_kill_by_device(struct net_device *dev)
spin_unlock_bh(&ax25_list_lock);
ax25_disconnect(s, ENETUNREACH);
s->ax25_dev = NULL;
+ ax25_dev_put(ax25_dev);
ax25_cb_del(s);
spin_lock_bh(&ax25_list_lock);
goto again;
@@ -101,11 +102,8 @@ static void ax25_kill_by_device(struct net_device *dev)
lock_sock(sk);
ax25_disconnect(s, ENETUNREACH);
s->ax25_dev = NULL;
- if (sk->sk_socket) {
- netdev_put(ax25_dev->dev,
- &s->dev_tracker);
- ax25_dev_put(ax25_dev);
- }
+ netdev_put(ax25_dev->dev, &s->dev_tracker);
+ ax25_dev_put(ax25_dev);
ax25_cb_del(s);
release_sock(sk);
spin_lock_bh(&ax25_list_lock);
@@ -496,6 +494,7 @@ void ax25_fillin_cb(ax25_cb *ax25, ax25_dev *ax25_dev)
ax25->ax25_dev = ax25_dev;

if (ax25->ax25_dev != NULL) {
+ ax25_dev_hold(ax25->ax25_dev);
ax25_fillin_cb_from_dev(ax25, ax25_dev);
return;
}
@@ -685,6 +684,11 @@ static int ax25_setsockopt(struct socket *sock, int level, int optname,
break;
}

+ if (ax25->ax25_dev) {
+ rtnl_unlock();
+ res = -EBUSY;
+ break;
+ }
ax25->ax25_dev = ax25_dev_ax25dev(dev);
if (!ax25->ax25_dev) {
rtnl_unlock();
@@ -961,7 +965,7 @@ struct sock *ax25_make_new(struct sock *osk, struct ax25_dev *ax25_dev)
ax25->paclen = oax25->paclen;
ax25->window = oax25->window;

- ax25->ax25_dev = ax25_dev;
+ ax25->ax25_dev = ax25_dev_hold(ax25_dev);
ax25->source_addr = oax25->source_addr;

if (oax25->digipeat != NULL) {
@@ -995,6 +999,11 @@ static int ax25_release(struct socket *sock)
sock_orphan(sk);
ax25 = sk_to_ax25(sk);
ax25_dev = ax25->ax25_dev;
+ /*
+ * The ax25_destroy_socket() function decrements the reference but we
+ * need to keep a reference until the end of the function.
+ */
+ ax25_dev_hold(ax25_dev);

if (sk->sk_type == SOCK_SEQPACKET) {
switch (ax25->state) {
@@ -1147,6 +1156,12 @@ static int ax25_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)

if (ax25_dev) {
ax25_fillin_cb(ax25, ax25_dev);
+ /*
+ * both ax25_addr_ax25dev() and ax25_fillin_cb() take a
+ * reference but we only want to take one reference so drop
+ * the extra reference.
+ */
+ ax25_dev_put(ax25_dev);
netdev_hold(ax25_dev->dev, &ax25->dev_tracker, GFP_ATOMIC);
}

diff --git a/net/ax25/ax25_route.c b/net/ax25/ax25_route.c
index b7c4d656a94b..d7f6d9f4f20c 100644
--- a/net/ax25/ax25_route.c
+++ b/net/ax25/ax25_route.c
@@ -406,6 +406,10 @@ int ax25_rt_autobind(ax25_cb *ax25, ax25_address *addr)
ax25_route_lock_unuse();
return -EHOSTUNREACH;
}
+ if (ax25->ax25_dev) {
+ err = -EBUSY;
+ goto put;
+ }
if ((ax25->ax25_dev = ax25_dev_ax25dev(ax25_rt->dev)) == NULL) {
err = -EHOSTUNREACH;
goto put;