[PATCH 3.2 65/77] ipv6: add complete rcu protection around np->opt

From: Ben Hutchings
Date: Thu Dec 24 2015 - 10:56:11 EST


3.2.75-rc1 review patch. If anyone has any objections, please let me know.

------------------

From: Eric Dumazet <edumazet@xxxxxxxxxx>

[ Upstream commit 45f6fad84cc305103b28d73482b344d7f5b76f39 ]

This patch addresses multiple problems :

UDP/RAW sendmsg() need to get a stable struct ipv6_txoptions
while socket is not locked : Other threads can change np->opt
concurrently. Dmitry posted a syzkaller
(http://github.com/google/syzkaller) program desmonstrating
use-after-free.

Starting with TCP/DCCP lockless listeners, tcp_v6_syn_recv_sock()
and dccp_v6_request_recv_sock() also need to use RCU protection
to dereference np->opt once (before calling ipv6_dup_options())

This patch adds full RCU protection to np->opt

Reported-by: Dmitry Vyukov <dvyukov@xxxxxxxxxx>
Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx>
Acked-by: Hannes Frederic Sowa <hannes@xxxxxxxxxxxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
[bwh: Backported to 3.2:
- Drop changes to l2tp
- Fix an additional use of np->opt in tcp_v6_send_synack()
- Fold in commit 43264e0bd963 ("ipv6: remove unnecessary codes in tcp_ipv6.c")
- Adjust context]
Signed-off-by: Ben Hutchings <ben@xxxxxxxxxxxxxxx>
---
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -369,7 +369,7 @@ struct ipv6_pinfo {
struct ipv6_ac_socklist *ipv6_ac_list;
struct ipv6_fl_socklist *ipv6_fl_list;

- struct ipv6_txoptions *opt;
+ struct ipv6_txoptions __rcu *opt;
struct sk_buff *pktoptions;
struct sk_buff *rxpmtu;
struct {
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -191,6 +191,7 @@ extern rwlock_t ip6_ra_lock;
*/

struct ipv6_txoptions {
+ atomic_t refcnt;
/* Length of this structure */
int tot_len;

@@ -203,7 +204,7 @@ struct ipv6_txoptions {
struct ipv6_opt_hdr *dst0opt;
struct ipv6_rt_hdr *srcrt; /* Routing Header */
struct ipv6_opt_hdr *dst1opt;
-
+ struct rcu_head rcu;
/* Option buffer, as read by IPV6_PKTOPTIONS, starts here. */
};

@@ -229,6 +230,24 @@ struct ipv6_fl_socklist {
struct ip6_flowlabel *fl;
};

+static inline struct ipv6_txoptions *txopt_get(const struct ipv6_pinfo *np)
+{
+ struct ipv6_txoptions *opt;
+
+ rcu_read_lock();
+ opt = rcu_dereference(np->opt);
+ if (opt && !atomic_inc_not_zero(&opt->refcnt))
+ opt = NULL;
+ rcu_read_unlock();
+ return opt;
+}
+
+static inline void txopt_put(struct ipv6_txoptions *opt)
+{
+ if (opt && atomic_dec_and_test(&opt->refcnt))
+ kfree_rcu(opt, rcu);
+}
+
extern struct ip6_flowlabel *fl6_sock_lookup(struct sock *sk, __be32 label);
extern struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions * opt_space,
struct ip6_flowlabel * fl,
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -252,7 +252,9 @@ static int dccp_v6_send_response(struct
security_req_classify_flow(req, flowi6_to_flowi(&fl6));


- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();

dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
if (IS_ERR(dst)) {
@@ -269,7 +271,10 @@ static int dccp_v6_send_response(struct
&ireq6->loc_addr,
&ireq6->rmt_addr);
ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
- err = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ rcu_read_lock();
+ err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
+ rcu_read_unlock();
err = net_xmit_eval(err);
}

@@ -463,6 +468,7 @@ static struct sock *dccp_v6_request_recv
{
struct inet6_request_sock *ireq6 = inet6_rsk(req);
struct ipv6_pinfo *newnp, *np = inet6_sk(sk);
+ struct ipv6_txoptions *opt;
struct inet_sock *newinet;
struct dccp6_sock *newdp6;
struct sock *newsk;
@@ -586,13 +592,15 @@ static struct sock *dccp_v6_request_recv
* Yes, keeping reference count would be much more clever, but we make
* one more one thing there: reattach optmem to newsk.
*/
- if (np->opt != NULL)
- newnp->opt = ipv6_dup_options(newsk, np->opt);
-
+ opt = rcu_dereference(np->opt);
+ if (opt) {
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
+ }
inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt != NULL)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;

dccp_sync_mss(newsk, dst_mtu(dst));

@@ -844,6 +852,7 @@ static int dccp_v6_connect(struct sock *
struct ipv6_pinfo *np = inet6_sk(sk);
struct dccp_sock *dp = dccp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct flowi6 fl6;
struct dst_entry *dst;
int addr_type;
@@ -946,7 +955,8 @@ static int dccp_v6_connect(struct sock *
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));

- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);

dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
if (IS_ERR(dst)) {
@@ -966,9 +976,8 @@ static int dccp_v6_connect(struct sock *
__ip6_dst_store(sk, dst, NULL, NULL);

icsk->icsk_ext_hdr_len = 0;
- if (np->opt != NULL)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;

inet->inet_dport = usin->sin6_port;

--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -430,8 +430,11 @@ void inet6_destroy_sock(struct sock *sk)

/* Free tx options */

- if ((opt = xchg(&np->opt, NULL)) != NULL)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt, NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
}

EXPORT_SYMBOL_GPL(inet6_destroy_sock);
@@ -669,7 +672,10 @@ int inet6_sk_rebuild_header(struct sock
fl6.fl6_sport = inet->inet_sport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));

- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt),
+ &final);
+ rcu_read_unlock();

dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
if (IS_ERR(dst)) {
--- a/net/ipv6/datagram.c
+++ b/net/ipv6/datagram.c
@@ -166,8 +166,10 @@ ipv4_connected:

security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));

- opt = flowlabel ? flowlabel->opt : np->opt;
+ rcu_read_lock();
+ opt = flowlabel ? flowlabel->opt : rcu_dereference(np->opt);
final_p = fl6_update_dst(&fl6, opt, &final);
+ rcu_read_unlock();

dst = ip6_dst_lookup_flow(sk, &fl6, final_p, true);
err = 0;
--- a/net/ipv6/exthdrs.c
+++ b/net/ipv6/exthdrs.c
@@ -748,6 +748,7 @@ ipv6_dup_options(struct sock *sk, struct
*((char**)&opt2->dst1opt) += dif;
if (opt2->srcrt)
*((char**)&opt2->srcrt) += dif;
+ atomic_set(&opt2->refcnt, 1);
}
return opt2;
}
@@ -812,7 +813,7 @@ ipv6_renew_options(struct sock *sk, stru
return ERR_PTR(-ENOBUFS);

memset(opt2, 0, tot_len);
-
+ atomic_set(&opt2->refcnt, 1);
opt2->tot_len = tot_len;
p = (char *)(opt2 + 1);

--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -66,7 +66,9 @@ struct dst_entry *inet6_csk_route_req(st
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();
ipv6_addr_copy(&fl6.saddr, &treq->loc_addr);
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
@@ -225,7 +227,9 @@ int inet6_csk_xmit(struct sk_buff *skb,
fl6.fl6_dport = inet->inet_dport;
security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));

- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ rcu_read_lock();
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
+ rcu_read_unlock();

dst = __inet6_csk_dst_check(sk, np->dst_cookie);

@@ -248,7 +252,8 @@ int inet6_csk_xmit(struct sk_buff *skb,
/* Restore final destination back after routing done */
ipv6_addr_copy(&fl6.daddr, &np->daddr);

- res = ip6_xmit(sk, skb, &fl6, np->opt, np->tclass);
+ res = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
rcu_read_unlock();
return res;
}
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -110,10 +110,12 @@ struct ipv6_txoptions *ipv6_update_optio
icsk->icsk_ext_hdr_len = opt->opt_flen + opt->opt_nflen;
icsk->icsk_sync_mss(sk, icsk->icsk_pmtu_cookie);
}
- opt = xchg(&inet6_sk(sk)->opt, opt);
+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+ opt);
} else {
spin_lock(&sk->sk_dst_lock);
- opt = xchg(&inet6_sk(sk)->opt, opt);
+ opt = xchg((__force struct ipv6_txoptions **)&inet6_sk(sk)->opt,
+ opt);
spin_unlock(&sk->sk_dst_lock);
}
sk_dst_reset(sk);
@@ -213,9 +215,12 @@ static int do_ipv6_setsockopt(struct soc
sk->sk_socket->ops = &inet_dgram_ops;
sk->sk_family = PF_INET;
}
- opt = xchg(&np->opt, NULL);
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = xchg((__force struct ipv6_txoptions **)&np->opt,
+ NULL);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
pktopt = xchg(&np->pktoptions, NULL);
kfree_skb(pktopt);

@@ -384,7 +389,8 @@ static int do_ipv6_setsockopt(struct soc
if (optname != IPV6_RTHDR && !capable(CAP_NET_RAW))
break;

- opt = ipv6_renew_options(sk, np->opt, optname,
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ opt = ipv6_renew_options(sk, opt, optname,
(struct ipv6_opt_hdr __user *)optval,
optlen);
if (IS_ERR(opt)) {
@@ -413,8 +419,10 @@ static int do_ipv6_setsockopt(struct soc
retv = 0;
opt = ipv6_update_options(sk, opt);
sticky_done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}

@@ -467,6 +475,7 @@ sticky_done:
break;

memset(opt, 0, sizeof(*opt));
+ atomic_set(&opt->refcnt, 1);
opt->tot_len = sizeof(*opt) + optlen;
retv = -EFAULT;
if (copy_from_user(opt+1, optval, optlen))
@@ -483,8 +492,10 @@ update:
retv = 0;
opt = ipv6_update_options(sk, opt);
done:
- if (opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ if (opt) {
+ atomic_sub(opt->tot_len, &sk->sk_omem_alloc);
+ txopt_put(opt);
+ }
break;
}
case IPV6_UNICAST_HOPS:
@@ -1053,10 +1064,11 @@ static int do_ipv6_getsockopt(struct soc
case IPV6_RTHDR:
case IPV6_DSTOPTS:
{
+ struct ipv6_txoptions *opt;

lock_sock(sk);
- len = ipv6_getsockopt_sticky(sk, np->opt,
- optname, optval, len);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ len = ipv6_getsockopt_sticky(sk, opt, optname, optval, len);
release_sock(sk);
/* check if ipv6_getsockopt_sticky() returns err code */
if (len < 0)
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -724,6 +724,7 @@ static int rawv6_probe_proto_opt(struct
static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk,
struct msghdr *msg, size_t len)
{
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ipv6_txoptions opt_space;
struct sockaddr_in6 * sin6 = (struct sockaddr_in6 *) msg->msg_name;
struct in6_addr *daddr, *final_p, final;
@@ -830,8 +831,10 @@ static int rawv6_sendmsg(struct kiocb *i
if (!(opt->opt_nflen|opt->opt_flen))
opt = NULL;
}
- if (opt == NULL)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -896,6 +899,7 @@ done:
dst_release(dst);
out:
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
return err<0?err:len;
do_confirm:
dst_confirm(dst);
--- a/net/ipv6/syncookies.c
+++ b/net/ipv6/syncookies.c
@@ -238,7 +238,7 @@ struct sock *cookie_v6_check(struct sock
memset(&fl6, 0, sizeof(fl6));
fl6.flowi6_proto = IPPROTO_TCP;
ipv6_addr_copy(&fl6.daddr, &ireq6->rmt_addr);
- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);
ipv6_addr_copy(&fl6.saddr, &ireq6->loc_addr);
fl6.flowi6_oif = sk->sk_bound_dev_if;
fl6.flowi6_mark = sk->sk_mark;
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -131,6 +131,7 @@ static int tcp_v6_connect(struct sock *s
struct ipv6_pinfo *np = inet6_sk(sk);
struct tcp_sock *tp = tcp_sk(sk);
struct in6_addr *saddr = NULL, *final_p, final;
+ struct ipv6_txoptions *opt;
struct rt6_info *rt;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -252,7 +253,8 @@ static int tcp_v6_connect(struct sock *s
fl6.fl6_dport = usin->sin6_port;
fl6.fl6_sport = inet->inet_sport;

- final_p = fl6_update_dst(&fl6, np->opt, &final);
+ opt = rcu_dereference_protected(np->opt, sock_owned_by_user(sk));
+ final_p = fl6_update_dst(&fl6, opt, &final);

security_sk_classify_flow(sk, flowi6_to_flowi(&fl6));

@@ -295,9 +297,9 @@ static int tcp_v6_connect(struct sock *s
}

icsk->icsk_ext_hdr_len = 0;
- if (np->opt)
- icsk->icsk_ext_hdr_len = (np->opt->opt_flen +
- np->opt->opt_nflen);
+ if (opt)
+ icsk->icsk_ext_hdr_len = opt->opt_flen +
+ opt->opt_nflen;

tp->rx_opt.mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr);

@@ -481,7 +483,6 @@ static int tcp_v6_send_synack(struct soc
struct inet6_request_sock *treq = inet6_rsk(req);
struct ipv6_pinfo *np = inet6_sk(sk);
struct sk_buff * skb;
- struct ipv6_txoptions *opt = NULL;
struct in6_addr * final_p, final;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -498,8 +499,7 @@ static int tcp_v6_send_synack(struct soc
fl6.fl6_sport = inet_rsk(req)->loc_port;
security_req_classify_flow(req, flowi6_to_flowi(&fl6));

- opt = np->opt;
- final_p = fl6_update_dst(&fl6, opt, &final);
+ final_p = fl6_update_dst(&fl6, rcu_dereference(np->opt), &final);

dst = ip6_dst_lookup_flow(sk, &fl6, final_p, false);
if (IS_ERR(dst)) {
@@ -513,13 +513,12 @@ static int tcp_v6_send_synack(struct soc
__tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr);

ipv6_addr_copy(&fl6.daddr, &treq->rmt_addr);
- err = ip6_xmit(sk, skb, &fl6, opt, np->tclass);
+ err = ip6_xmit(sk, skb, &fl6, rcu_dereference(np->opt),
+ np->tclass);
err = net_xmit_eval(err);
}

done:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
return err;
}
@@ -1408,7 +1407,6 @@ static struct sock * tcp_v6_syn_recv_soc
}

treq = inet6_rsk(req);
- opt = np->opt;

if (sk_acceptq_is_full(sk))
goto out_overflow;
@@ -1476,16 +1474,15 @@ static struct sock * tcp_v6_syn_recv_soc
but we make one more one thing there: reattach optmem
to newsk.
*/
+ opt = rcu_dereference(np->opt);
if (opt) {
- newnp->opt = ipv6_dup_options(newsk, opt);
- if (opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
+ opt = ipv6_dup_options(newsk, opt);
+ RCU_INIT_POINTER(newnp->opt, opt);
}
-
- inet_csk(newsk)->icsk_ext_hdr_len = 0;
- if (newnp->opt)
- inet_csk(newsk)->icsk_ext_hdr_len = (newnp->opt->opt_nflen +
- newnp->opt->opt_flen);
+ inet_csk(newsk)->icsk_ext_hdr_len = 0;
+ if (opt)
+ inet_csk(newsk)->icsk_ext_hdr_len = opt->opt_nflen +
+ opt->opt_flen;

tcp_mtup_init(newsk);
tcp_sync_mss(newsk, dst_mtu(dst));
@@ -1530,8 +1527,6 @@ static struct sock * tcp_v6_syn_recv_soc
out_overflow:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS);
out_nonewsk:
- if (opt && opt != np->opt)
- sock_kfree_s(sk, opt, opt->tot_len);
dst_release(dst);
out:
NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS);
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -954,6 +954,7 @@ int udpv6_sendmsg(struct kiocb *iocb, st
struct sockaddr_in6 *sin6 = (struct sockaddr_in6 *) msg->msg_name;
struct in6_addr *daddr, *final_p, final;
struct ipv6_txoptions *opt = NULL;
+ struct ipv6_txoptions *opt_to_free = NULL;
struct ip6_flowlabel *flowlabel = NULL;
struct flowi6 fl6;
struct dst_entry *dst;
@@ -1107,8 +1108,10 @@ do_udp_sendmsg:
opt = NULL;
connected = 0;
}
- if (opt == NULL)
- opt = np->opt;
+ if (!opt) {
+ opt = txopt_get(np);
+ opt_to_free = opt;
+ }
if (flowlabel)
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
@@ -1208,6 +1211,7 @@ do_append_data:
out:
dst_release(dst);
fl6_sock_release(flowlabel);
+ txopt_put(opt_to_free);
if (!err)
return len;
/*

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/