[PATCH 3.10 235/319] tcp: take care of truncations done by sk_filter()

From: Willy Tarreau
Date: Sun Feb 05 2017 - 14:24:39 EST


From: Eric Dumazet <edumazet@xxxxxxxxxx>

commit ac6e780070e30e4c35bd395acfe9191e6268bdd3 upstream.

With syzkaller help, Marco Grassi found a bug in TCP stack,
crashing in tcp_collapse()

Root cause is that sk_filter() can truncate the incoming skb,
but TCP stack was not really expecting this to happen.
It probably was expecting a simple DROP or ACCEPT behavior.

We first need to make sure no part of TCP header could be removed.
Then we need to adjust TCP_SKB_CB(skb)->end_seq

Many thanks to syzkaller team and Marco for giving us a reproducer.

Signed-off-by: Eric Dumazet <edumazet@xxxxxxxxxx>
Reported-by: Marco Grassi <marco.gra@xxxxxxxxx>
Reported-by: Vladis Dronov <vdronov@xxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Willy Tarreau <w@xxxxxx>
---
include/linux/filter.h | 6 +++++-
include/net/tcp.h | 1 +
net/core/filter.c | 10 +++++-----
net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++++-
net/ipv6/tcp_ipv6.c | 6 ++++--
5 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index f65f5a6..c2bea01 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -36,7 +36,11 @@ static inline unsigned int sk_filter_len(const struct sk_filter *fp)
return fp->len * sizeof(struct sock_filter) + sizeof(*fp);
}

-extern int sk_filter(struct sock *sk, struct sk_buff *skb);
+int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap);
+static inline int sk_filter(struct sock *sk, struct sk_buff *skb)
+{
+ return sk_filter_trim_cap(sk, skb, 1);
+}
extern unsigned int sk_run_filter(const struct sk_buff *skb,
const struct sock_filter *filter);
extern int sk_unattached_filter_create(struct sk_filter **pfp,
diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1c5e037..79cd118 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1029,6 +1029,7 @@ static inline void tcp_prequeue_init(struct tcp_sock *tp)
}

extern bool tcp_prequeue(struct sock *sk, struct sk_buff *skb);
+int tcp_filter(struct sock *sk, struct sk_buff *skb);

#undef STATE_TRACE

diff --git a/net/core/filter.c b/net/core/filter.c
index c6c18d8..65f2a65 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -67,9 +67,10 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
}

/**
- * sk_filter - run a packet through a socket filter
+ * sk_filter_trim_cap - run a packet through a socket filter
* @sk: sock associated with &sk_buff
* @skb: buffer to filter
+ * @cap: limit on how short the eBPF program may trim the packet
*
* Run the filter code and then cut skb->data to correct size returned by
* sk_run_filter. If pkt_len is 0 we toss packet. If skb->len is smaller
@@ -78,7 +79,7 @@ static inline void *load_pointer(const struct sk_buff *skb, int k,
* be accepted or -EPERM if the packet should be tossed.
*
*/
-int sk_filter(struct sock *sk, struct sk_buff *skb)
+int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap)
{
int err;
struct sk_filter *filter;
@@ -99,14 +100,13 @@ int sk_filter(struct sock *sk, struct sk_buff *skb)
filter = rcu_dereference(sk->sk_filter);
if (filter) {
unsigned int pkt_len = SK_RUN_FILTER(filter, skb);
-
- err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM;
+ err = pkt_len ? pskb_trim(skb, max(cap, pkt_len)) : -EPERM;
}
rcu_read_unlock();

return err;
}
-EXPORT_SYMBOL(sk_filter);
+EXPORT_SYMBOL(sk_filter_trim_cap);

/**
* sk_run_filter - run a filter on a socket
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 5401fbf..6504a08 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -1959,6 +1959,21 @@ bool tcp_prequeue(struct sock *sk, struct sk_buff *skb)
}
EXPORT_SYMBOL(tcp_prequeue);

+int tcp_filter(struct sock *sk, struct sk_buff *skb)
+{
+ struct tcphdr *th = (struct tcphdr *)skb->data;
+ unsigned int eaten = skb->len;
+ int err;
+
+ err = sk_filter_trim_cap(sk, skb, th->doff * 4);
+ if (!err) {
+ eaten -= skb->len;
+ TCP_SKB_CB(skb)->end_seq -= eaten;
+ }
+ return err;
+}
+EXPORT_SYMBOL(tcp_filter);
+
/*
* From tcp_input.c
*/
@@ -2021,8 +2036,10 @@ process:
goto discard_and_relse;
nf_reset(skb);

- if (sk_filter(sk, skb))
+ if (tcp_filter(sk, skb))
goto discard_and_relse;
+ th = (const struct tcphdr *)skb->data;
+ iph = ip_hdr(skb);

skb->dev = NULL;

diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d823738..70b10ed 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1330,7 +1330,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
goto discard;
#endif

- if (sk_filter(sk, skb))
+ if (tcp_filter(sk, skb))
goto discard;

/*
@@ -1501,8 +1501,10 @@ process:
if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb))
goto discard_and_relse;

- if (sk_filter(sk, skb))
+ if (tcp_filter(sk, skb))
goto discard_and_relse;
+ th = (const struct tcphdr *)skb->data;
+ hdr = ipv6_hdr(skb);

skb->dev = NULL;

--
2.8.0.rc2.1.gbe9624a