Re: [PATCH net-next] netdevsim: implement peer queue flow control
From: Jakub Kicinski
Date: Wed Jul 02 2025 - 17:07:27 EST
On Tue, 01 Jul 2025 11:10:56 -0700 Breno Leitao wrote:
> +static void nsim_wake_queue(struct net_device *net, struct netdev_queue *txq, void *unused)
> +{
> + if (!(netif_tx_queue_stopped(txq)))
> + return;
> + netif_tx_wake_queue(txq);
> +}
> +
> +static void nsim_wake_all_queues(struct net_device *dev, struct net_device *peer_dev)
> +{
> + netdev_for_each_tx_queue(peer_dev, nsim_wake_queue, NULL);
> + netdev_for_each_tx_queue(dev, nsim_wake_queue, NULL);
I think you can use netif_tx_wake_all_queues() directly in the caller
> +}
> +
> static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf, size_t count)
> {
> struct netdevsim *nsim, *peer;
> @@ -367,6 +380,9 @@ static ssize_t unlink_device_store(const struct bus_type *bus, const char *buf,
> RCU_INIT_POINTER(nsim->peer, NULL);
> RCU_INIT_POINTER(peer->peer, NULL);
>
> + synchronize_net();
> + nsim_wake_all_queues(dev, peer->netdev);
> +
> out_put_netns:
> put_net(ns);
> rtnl_unlock();
> -static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
> +static void nsim_start_peer_tx_queue(struct net_device *dev, struct nsim_rq *rq)
> +{
> + struct netdevsim *ns = netdev_priv(dev);
> + struct net_device *peer_dev;
> + struct netdevsim *peer_ns;
> + struct netdev_queue *txq;
> + u16 idx;
> +
> + idx = rq->napi.index;
> + peer_ns = rcu_dereference(ns->peer);
> + if (!peer_ns)
> + return;
> +
> + /* TX device */
> + peer_dev = peer_ns->netdev;
> +
> + if (dev->real_num_tx_queues != peer_dev->num_rx_queues)
> + return;
> +
> + txq = netdev_get_tx_queue(peer_dev, idx);
> + if (!(netif_tx_queue_stopped(txq)))
> + return;
> +
> + netif_tx_wake_queue(txq);
> +}
> +
> +static void nsim_stop_peer_tx_queue(struct net_device *dev, struct nsim_rq *rq,
> + u16 idx)
> +{
> + struct netdevsim *ns = netdev_priv(dev);
> + struct net_device *peer_dev;
> + struct netdevsim *peer_ns;
> +
> + peer_ns = rcu_dereference(ns->peer);
> + if (!peer_ns)
> + return;
> +
> + /* TX device */
> + peer_dev = peer_ns->netdev;
For the wake we need to find the peer, that's true. But stop happens in
the Tx path IOW we're coming from nsim_start_xmit() which had the
right Tx device already, right? The peer we'll get here was the original
dev that was padded to nsim_start_xmit(), we just need to pass it into
nsim_napi_rx().
> + /* If different queues size, do not stop, since it is not
> + * easy to find which TX queue is mapped here
> + */
> + if (dev->real_num_tx_queues != peer_dev->num_rx_queues)
> + return;
> +
> + netif_subqueue_try_stop(peer_dev, idx,
> + NSIM_RING_SIZE - skb_queue_len(&rq->skb_queue),
> + NSIM_RING_SIZE / 2);
> +}
> +
> +static int nsim_napi_rx(struct net_device *dev, struct nsim_rq *rq,
> + struct sk_buff *skb)
> {
> if (skb_queue_len(&rq->skb_queue) > NSIM_RING_SIZE) {
> + rcu_read_lock();
> + nsim_stop_peer_tx_queue(dev, rq, skb_get_queue_mapping(skb));
> + rcu_read_unlock();
> dev_kfree_skb_any(skb);
> return NET_RX_DROP;
> }
> @@ -51,7 +106,7 @@ static int nsim_napi_rx(struct nsim_rq *rq, struct sk_buff *skb)
> static int nsim_forward_skb(struct net_device *dev, struct sk_buff *skb,
> struct nsim_rq *rq)
> {
> - return __dev_forward_skb(dev, skb) ?: nsim_napi_rx(rq, skb);
> + return __dev_forward_skb(dev, skb) ?: nsim_napi_rx(dev, rq, skb);
> }
>
> static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev)
> @@ -351,6 +406,9 @@ static int nsim_rcv(struct nsim_rq *rq, int budget)
> dev_dstats_rx_dropped(dev);
> }
>
> + rcu_read_lock();
> + nsim_start_peer_tx_queue(dev, rq);
> + rcu_read_unlock();
> return i;
> }
>
>
> ---
> base-commit: f6e98f17ad6829c48573952ede3f52ed00c1377f
> change-id: 20250630-netdev_flow_control-2b2d37965377
>
> Best regards,
> --
> Breno Leitao <leitao@xxxxxxxxxx>
>