RE: [PATCH net v2] udp: ipv4: manipulate network header of NATed UDP GRO fraglist

From: Dongseok Yi
Date: Sun Jan 17 2021 - 18:57:00 EST


On 2021-01-16 02:12, Alexander Lobakin wrote:
> From: Dongseok Yi <dseok.yi@xxxxxxxxxxx>
> Date: Fri, 15 Jan 2021 22:20:35 +0900
>
> > UDP/IP header of UDP GROed frag_skbs are not updated even after NAT
> > forwarding. Only the header of head_skb from ip_finish_output_gso ->
> > skb_gso_segment is updated but following frag_skbs are not updated.
> >
> > A call path skb_mac_gso_segment -> inet_gso_segment ->
> > udp4_ufo_fragment -> __udp_gso_segment -> __udp_gso_segment_list
> > does not try to update UDP/IP header of the segment list but copy
> > only the MAC header.
> >
> > Update dport, daddr and checksums of each skb of the segment list
> > in __udp_gso_segment_list. It covers both SNAT and DNAT.
> >
> > Fixes: 9fd1ff5d2ac7 (udp: Support UDP fraglist GRO/GSO.)
> > Signed-off-by: Dongseok Yi <dseok.yi@xxxxxxxxxxx>
> > ---
> > v1:
> > Steffen Klassert said, there could be 2 options.
> > https://lore.kernel.org/patchwork/patch/1362257/
> > I was trying to write a quick fix, but it was not easy to forward
> > segmented list. Currently, assuming DNAT only.
> >
> > v2:
> > Per Steffen Klassert request, move the procedure from
> > udp4_ufo_fragment to __udp_gso_segment_list and support SNAT.
> >
> > To Alexander Lobakin, I've checked your email late. Just use this
> > patch as a reference. It support SNAT too, but does not support IPv6
> > yet. I cannot make IPv6 header changes in __udp_gso_segment_list due
> > to the file is in IPv4 directory.
>
> I used another approach, tried to make fraglist GRO closer to plain
> in terms of checksummming, as it is confusing to me why GSO packet
> should have CHECKSUM_UNNECESSARY. Just let Netfilter do its mangling,
> and then use classic UDP GSO magic at the end of segmentation.
> I also see the idea of explicit comparing and editing of IP and UDP
> headers right in __udp_gso_segment_list() rather unacceptable.

If I understand UDP GRO fraglist correctly, it keeps the length of
each skb of the fraglist. But your approach might change the lengths
by gso_size. What if each skb of the fraglist had different lengths?

For CHECKSUM_UNNECESSARY, GROed head_skb might have an invalid
checksum. But finally, the fraglist will be segmented to queue to
sk_receive_queue with head_skb. We could pass the GROed head_skb with
CHECKSUM_UNNECESSARY.

>
> Dongseok, Steffen, please test this WIP diff and tell if this one
> works for you, so I could clean up the code and make a patch.
> For me, it works now in any configurations, with and without
> checksum/GSO/fraglist offload.
>
> diff --git a/net/core/skbuff.c b/net/core/skbuff.c
> index c1a6f262636a..646a42e88e83 100644
> --- a/net/core/skbuff.c
> +++ b/net/core/skbuff.c
> @@ -3674,6 +3674,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> unsigned int offset)
> {
> struct sk_buff *list_skb = skb_shinfo(skb)->frag_list;
> + unsigned int doffset = skb->data - skb_mac_header(skb);
> unsigned int tnl_hlen = skb_tnl_header_len(skb);
> unsigned int delta_truesize = 0;
> unsigned int delta_len = 0;
> @@ -3681,7 +3682,7 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> struct sk_buff *nskb, *tmp;
> int err;
>
> - skb_push(skb, -skb_network_offset(skb) + offset);
> + skb_push(skb, doffset);
>
> skb_shinfo(skb)->frag_list = NULL;
>
> @@ -3716,12 +3717,11 @@ struct sk_buff *skb_segment_list(struct sk_buff *skb,
> delta_len += nskb->len;
> delta_truesize += nskb->truesize;
>
> - skb_push(nskb, -skb_network_offset(nskb) + offset);
> + skb_push(nskb, skb_headroom(nskb) - skb_headroom(skb));
>
> skb_release_head_state(nskb);
> - __copy_skb_header(nskb, skb);
> + __copy_skb_header(nskb, skb);
>
> - skb_headers_offset_update(nskb, skb_headroom(nskb) - skb_headroom(skb));
> skb_copy_from_linear_data_offset(skb, -tnl_hlen,
> nskb->data - tnl_hlen,
> offset + tnl_hlen);
> diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c
> index ff39e94781bf..61665fcd8c85 100644
> --- a/net/ipv4/udp_offload.c
> +++ b/net/ipv4/udp_offload.c
> @@ -190,13 +190,58 @@ EXPORT_SYMBOL(skb_udp_tunnel_segment);
> static struct sk_buff *__udp_gso_segment_list(struct sk_buff *skb,
> netdev_features_t features)
> {
> - unsigned int mss = skb_shinfo(skb)->gso_size;
> + struct sk_buff *seg;
> + struct udphdr *uh;
> + unsigned int mss;
> + __be16 newlen;
> + __sum16 check;
> +
> + mss = skb_shinfo(skb)->gso_size;
> + if (skb->len <= sizeof(*uh) + mss)
> + return ERR_PTR(-EINVAL);
>
> - skb = skb_segment_list(skb, features, skb_mac_header_len(skb));
> + skb_pull(skb, sizeof(*uh));
> +
> + skb = skb_segment_list(skb, features, skb->data - skb_mac_header(skb));
> if (IS_ERR(skb))
> return skb;
>
> - udp_hdr(skb)->len = htons(sizeof(struct udphdr) + mss);
> + seg = skb;
> + uh = udp_hdr(seg);
> +
> + /* compute checksum adjustment based on old length versus new */
> + newlen = htons(sizeof(*uh) + mss);
> + check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
> +
> + for (;;) {
> + if (!seg->next)
> + break;
> +
> + uh->len = newlen;
> + uh->check = check;
> +
> + if (seg->ip_summed == CHECKSUM_PARTIAL)
> + gso_reset_checksum(seg, ~check);
> + else
> + uh->check = gso_make_checksum(seg, ~check) ? :
> + CSUM_MANGLED_0;
> +
> + seg = seg->next;
> + uh = udp_hdr(seg);
> + }
> +
> + /* last packet can be partial gso_size, account for that in checksum */
> + newlen = htons(skb_tail_pointer(seg) - skb_transport_header(seg) +
> + seg->data_len);
> + check = csum16_add(csum16_sub(uh->check, uh->len), newlen);
> +
> + uh->len = newlen;
> + uh->check = check;
> +
> + if (seg->ip_summed == CHECKSUM_PARTIAL)
> + gso_reset_checksum(seg, ~check);
> + else
> + uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0;
>
> return skb;
> }
> @@ -602,27 +647,13 @@ INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
> const struct iphdr *iph = ip_hdr(skb);
> struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
>
> - if (NAPI_GRO_CB(skb)->is_flist) {
> - uh->len = htons(skb->len - nhoff);
> -
> - skb_shinfo(skb)->gso_type |= (SKB_GSO_FRAGLIST|SKB_GSO_UDP_L4);
> - skb_shinfo(skb)->gso_segs = NAPI_GRO_CB(skb)->count;
> -
> - if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
> - if (skb->csum_level < SKB_MAX_CSUM_LEVEL)
> - skb->csum_level++;
> - } else {
> - skb->ip_summed = CHECKSUM_UNNECESSARY;
> - skb->csum_level = 0;
> - }
> -
> - return 0;
> - }
> -
> if (uh->check)
> uh->check = ~udp_v4_check(skb->len - nhoff, iph->saddr,
> iph->daddr, 0);
>
> + if (NAPI_GRO_CB(skb)->is_flist)
> + skb_shinfo(skb)->gso_type |= SKB_GSO_FRAGLIST;
> +
> return udp_gro_complete(skb, nhoff, udp4_lib_lookup_skb);
> }
>