[PATCH 4.19 15/17] udp: Improve load balancing for SO_REUSEPORT.

From: Greg Kroah-Hartman
Date: Thu Jul 30 2020 - 04:07:11 EST


From: Kuniyuki Iwashima <kuniyu@xxxxxxxxxxxx>

[ Upstream commit efc6b6f6c3113e8b203b9debfb72d81e0f3dcace ]

Currently, SO_REUSEPORT does not work well if connected sockets are in a
UDP reuseport group.

Then reuseport_has_conns() returns true and the result of
reuseport_select_sock() is discarded. Also, unconnected sockets have the
same score, hence only does the first unconnected socket in udp_hslot
always receive all packets sent to unconnected sockets.

So, the result of reuseport_select_sock() should be used for load
balancing.

The noteworthy point is that the unconnected sockets placed after
connected sockets in sock_reuseport.socks will receive more packets than
others because of the algorithm in reuseport_select_sock().

index | connected | reciprocal_scale | result
---------------------------------------------
0 | no | 20% | 40%
1 | no | 20% | 20%
2 | yes | 20% | 0%
3 | no | 20% | 40%
4 | yes | 20% | 0%

If most of the sockets are connected, this can be a problem, but it still
works better than now.

Fixes: acdcecc61285 ("udp: correct reuseport selection with connected sockets")
CC: Willem de Bruijn <willemb@xxxxxxxxxx>
Reviewed-by: Benjamin Herrenschmidt <benh@xxxxxxxxxx>
Signed-off-by: Kuniyuki Iwashima <kuniyu@xxxxxxxxxxxx>
Acked-by: Willem de Bruijn <willemb@xxxxxxxxxx>
Signed-off-by: David S. Miller <davem@xxxxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>
---
net/ipv4/udp.c | 15 +++++++++------
net/ipv6/udp.c | 15 +++++++++------
2 files changed, 18 insertions(+), 12 deletions(-)

--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -433,7 +433,7 @@ static struct sock *udp4_lib_lookup2(str
struct udp_hslot *hslot2,
struct sk_buff *skb)
{
- struct sock *sk, *result;
+ struct sock *sk, *result, *reuseport_result;
int score, badness;
u32 hash = 0;

@@ -443,17 +443,20 @@ static struct sock *udp4_lib_lookup2(str
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
+ reuseport_result = NULL;
+
if (sk->sk_reuseport &&
sk->sk_state != TCP_ESTABLISHED) {
hash = udp_ehashfn(net, daddr, hnum,
saddr, sport);
- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
+ reuseport_result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (reuseport_result && !reuseport_has_conns(sk, false))
+ return reuseport_result;
}
+
+ result = reuseport_result ? : sk;
badness = score;
- result = sk;
}
}
return result;
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -167,7 +167,7 @@ static struct sock *udp6_lib_lookup2(str
int dif, int sdif, bool exact_dif,
struct udp_hslot *hslot2, struct sk_buff *skb)
{
- struct sock *sk, *result;
+ struct sock *sk, *result, *reuseport_result;
int score, badness;
u32 hash = 0;

@@ -177,17 +177,20 @@ static struct sock *udp6_lib_lookup2(str
score = compute_score(sk, net, saddr, sport,
daddr, hnum, dif, sdif, exact_dif);
if (score > badness) {
+ reuseport_result = NULL;
+
if (sk->sk_reuseport &&
sk->sk_state != TCP_ESTABLISHED) {
hash = udp6_ehashfn(net, daddr, hnum,
saddr, sport);

- result = reuseport_select_sock(sk, hash, skb,
- sizeof(struct udphdr));
- if (result && !reuseport_has_conns(sk, false))
- return result;
+ reuseport_result = reuseport_select_sock(sk, hash, skb,
+ sizeof(struct udphdr));
+ if (reuseport_result && !reuseport_has_conns(sk, false))
+ return reuseport_result;
}
- result = sk;
+
+ result = reuseport_result ? : sk;
badness = score;
}
}