Re: [PATCH 1/2] r8169: Add XDP support for pass and drop actions

From: Heiner Kallweit
Date: Tue Sep 14 2021 - 12:23:12 EST


On 14.09.2021 11:31, Guilin Tang wrote:
> This commi implements simple xdp drop and pass in the r8169 driver
>
> Signed-off-by: Guilin Tang <tangguilin@xxxxxxxxxxxxx>
> ---
> drivers/net/ethernet/realtek/r8169_main.c | 99 +++++++++++++++++++++--
> 1 file changed, 94 insertions(+), 5 deletions(-)
>
> diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
> index d927211f8d2c..69bc3c68e73d 100644
> --- a/drivers/net/ethernet/realtek/r8169_main.c
> +++ b/drivers/net/ethernet/realtek/r8169_main.c
> @@ -30,6 +30,9 @@
> #include <linux/ipv6.h>
> #include <asm/unaligned.h>
> #include <net/ip6_checksum.h>
> +#include <net/xdp.h>
> +#include <linux/bpf.h>
> +#include <linux/bpf_trace.h>
>
> #include "r8169.h"
> #include "r8169_firmware.h"
> @@ -543,6 +546,12 @@ enum rtl_rx_desc_bit {
> #define RTL_GSO_MAX_SIZE_V2 64000
> #define RTL_GSO_MAX_SEGS_V2 64
>
> +/* XDP */
> +#define R8169_XDP_PASS 0
> +#define R8169_XDP_DROP BIT(0)
> +#define R8169_XDP_TX BIT(1)
> +#define R8169_XDP_REDIR BIT(2)
> +
> struct TxDesc {
> __le32 opts1;
> __le32 opts2;
> @@ -634,6 +643,8 @@ struct rtl8169_private {
> struct rtl_fw *rtl_fw;
>
> u32 ocp_base;
> + /*xdp bpf*/
> + struct bpf_prog *rtl_xdp;
> };
>
> typedef void (*rtl_generic_fct)(struct rtl8169_private *tp);
> @@ -3896,6 +3907,7 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
> tp->RxDescArray[i].addr = 0;
> tp->RxDescArray[i].opts1 = 0;
> }
> + tp->rtl_xdp = NULL;
> }
>
> static int rtl8169_rx_fill(struct rtl8169_private *tp)
> @@ -4501,10 +4513,44 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
> skb_checksum_none_assert(skb);
> }
>
> +static struct sk_buff *rtl8619_run_xdp(struct rtl8169_private *tp, struct bpf_prog *xdp_prog,
> + void *rx_buf, unsigned int pkt_size)
> +{

Why return type struct sk_buff * and not a normal int / errno ?

> + int result = R8169_XDP_PASS;
> + struct xdp_buff xdp;
> + u32 act;
> +
> + xdp.data = rx_buf;
> + xdp.data_end = xdp.data + pkt_size;
> + xdp_set_data_meta_invalid(&xdp);
> +
> + act = bpf_prog_run_xdp(xdp_prog, &xdp);
> + switch (act) {
> + case XDP_PASS:
> + break;
> + case XDP_TX:
> + case XDP_REDIRECT:
> + goto out_failure;
> + default:
> + bpf_warn_invalid_xdp_action(act);
> + fallthrough;
> + case XDP_ABORTED:
> +out_failure:
> + trace_xdp_exception(tp->dev, xdp_prog, act);
> + fallthrough;
> + case XDP_DROP:
> + result = R8169_XDP_DROP;
> + break;
> + }
> +
> + return ERR_PTR(-result);

Overriding errno's with own values isn't nice. If you need an errno,
use an errno.

> +}
> +
> static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget)
> {
> struct device *d = tp_to_dev(tp);
> int count;
> + struct bpf_prog *xdp_prog;
>
> for (count = 0; count < budget; count++, tp->cur_rx++) {
> unsigned int pkt_size, entry = tp->cur_rx % NUM_RX_DESC;
> @@ -4553,17 +4599,27 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, int budget
> goto release_descriptor;
> }
>
> + addr = le64_to_cpu(desc->addr);
> + rx_buf = page_address(tp->Rx_databuff[entry]);
> +
> + dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
> + prefetch(rx_buf);
> + //Determine whether to execute xdp
> + xdp_prog = READ_ONCE(tp->rtl_xdp);
> + if (xdp_prog) {
> + skb = rtl8619_run_xdp(tp, xdp_prog, (void *)rx_buf, pkt_size);

Why do you hijack the skb variable? In case of no error it's overwritten
a few lines later.

> + if (IS_ERR(skb)) {
> + dev->stats.rx_dropped++;
> + goto release_descriptor;
> + }
> + }
> +
> skb = napi_alloc_skb(&tp->napi, pkt_size);
> if (unlikely(!skb)) {
> dev->stats.rx_dropped++;
> goto release_descriptor;
> }
>
> - addr = le64_to_cpu(desc->addr);
> - rx_buf = page_address(tp->Rx_databuff[entry]);
> -
> - dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
> - prefetch(rx_buf);
> skb_copy_to_linear_data(skb, rx_buf, pkt_size);
> skb->tail += pkt_size;
> skb->len = pkt_size;
> @@ -4999,6 +5055,38 @@ static void rtl_remove_one(struct pci_dev *pdev)
> rtl_rar_set(tp, tp->dev->perm_addr);
> }
>
> +static int r8169_xdp_set(struct net_device *netdev, struct netdev_bpf *bpf)
> +{
> + struct rtl8169_private *tp = netdev_priv(netdev);
> + struct bpf_prog *prog = bpf->prog, *old_prog;
> + bool running = netif_running(netdev);
> + bool need_reset;
> +
> + need_reset = !!tp->rtl_xdp != !!prog;
> +

An explanation would be helpful why a reset is needed and what you
mean with reset. Using these functions outside their usual context
is at least risky.

> + if (need_reset && running)
> + rtl8169_close(netdev);
> +
> + old_prog = xchg(&tp->rtl_xdp, prog);
> + if (old_prog)
> + bpf_prog_put(old_prog);
> +
> + if (need_reset && running)
> + rtl_open(netdev);
> +
> + return 0;
> +}
> +
> +static int rtl8169_xdp(struct net_device *dev, struct netdev_bpf *xdp)
> +{
> + switch (xdp->command) {
> + case XDP_SETUP_PROG:
> + return r8169_xdp_set(dev, xdp);
> + default:
> + return -EINVAL;
> + }
> +}
> +
> static const struct net_device_ops rtl_netdev_ops = {
> .ndo_open = rtl_open,
> .ndo_stop = rtl8169_close,
> @@ -5013,6 +5101,7 @@ static const struct net_device_ops rtl_netdev_ops = {
> .ndo_set_mac_address = rtl_set_mac_address,
> .ndo_eth_ioctl = phy_do_ioctl_running,
> .ndo_set_rx_mode = rtl_set_rx_mode,
> + .ndo_bpf = rtl8169_xdp,
> #ifdef CONFIG_NET_POLL_CONTROLLER
> .ndo_poll_controller = rtl8169_netpoll,
> #endif
>