Re: [PATCH net-next 10/11] tap: accept an array of XDP buffs through sendmsg()

From: Jason Wang
Date: Thu Sep 06 2018 - 23:41:42 EST




On 2018å09æ07æ 02:00, Michael S. Tsirkin wrote:
On Thu, Sep 06, 2018 at 12:05:25PM +0800, Jason Wang wrote:
This patch implement TUN_MSG_PTR msg_control type. This type allows
the caller to pass an array of XDP buffs to tuntap through ptr field
of the tun_msg_control. Tap will build skb through those XDP buffers.

This will avoid lots of indirect calls thus improves the icache
utilization and allows to do XDP batched flushing when doing XDP
redirection.

Signed-off-by: Jason Wang <jasowang@xxxxxxxxxx>
---
drivers/net/tap.c | 73 +++++++++++++++++++++++++++++++++++++++++++++--
1 file changed, 71 insertions(+), 2 deletions(-)

diff --git a/drivers/net/tap.c b/drivers/net/tap.c
index 7996ed7cbf18..50eb7bf22225 100644
--- a/drivers/net/tap.c
+++ b/drivers/net/tap.c
@@ -1146,14 +1146,83 @@ static const struct file_operations tap_fops = {
#endif
};
+static int tap_get_user_xdp(struct tap_queue *q, struct xdp_buff *xdp)
+{
+ struct virtio_net_hdr *gso = xdp->data_hard_start + sizeof(int);
+ int buflen = *(int *)xdp->data_hard_start;
+ int vnet_hdr_len = 0;
+ struct tap_dev *tap;
+ struct sk_buff *skb;
+ int err, depth;
+
+ if (q->flags & IFF_VNET_HDR)
+ vnet_hdr_len = READ_ONCE(q->vnet_hdr_sz);
+
+ skb = build_skb(xdp->data_hard_start, buflen);
+ if (!skb) {
+ err = -ENOMEM;
+ goto err;
+ }
So fundamentally why is it called XDP?
We just build and skb, don't we?

The reason is that the function accepts a pointer to XDP. And also the for the future development, I think the name is ok:

- we will probably do XDP offloading in this function.
- we may have a chance to call lower device's ndo_xdp_xmit() in the future.

Thanks


+
+ skb_reserve(skb, xdp->data - xdp->data_hard_start);
+ skb_put(skb, xdp->data_end - xdp->data);
+
+ skb_set_network_header(skb, ETH_HLEN);
+ skb_reset_mac_header(skb);
+ skb->protocol = eth_hdr(skb)->h_proto;
+
+ if (vnet_hdr_len) {
+ err = virtio_net_hdr_to_skb(skb, gso, tap_is_little_endian(q));
+ if (err)
+ goto err_kfree;
+ }
+
+ skb_probe_transport_header(skb, ETH_HLEN);
+
+ /* Move network header to the right position for VLAN tagged packets */
+ if ((skb->protocol == htons(ETH_P_8021Q) ||
+ skb->protocol == htons(ETH_P_8021AD)) &&
+ __vlan_get_protocol(skb, skb->protocol, &depth) != 0)
+ skb_set_network_header(skb, depth);
+
+ rcu_read_lock();
+ tap = rcu_dereference(q->tap);
+ if (tap) {
+ skb->dev = tap->dev;
+ dev_queue_xmit(skb);
+ } else {
+ kfree_skb(skb);
+ }
+ rcu_read_unlock();
+
+ return 0;
+
+err_kfree:
+ kfree_skb(skb);
+err:
+ rcu_read_lock();
+ tap = rcu_dereference(q->tap);
+ if (tap && tap->count_tx_dropped)
+ tap->count_tx_dropped(tap);
+ rcu_read_unlock();
+ return err;
+}
+
static int tap_sendmsg(struct socket *sock, struct msghdr *m,
size_t total_len)
{
struct tap_queue *q = container_of(sock, struct tap_queue, sock);
struct tun_msg_ctl *ctl = m->msg_control;
+ struct xdp_buff *xdp;
+ int i;
- if (ctl && ctl->type != TUN_MSG_UBUF)
- return -EINVAL;
+ if (ctl && ((ctl->type & 0xF) == TUN_MSG_PTR)) {
+ for (i = 0; i < ctl->type >> 16; i++) {
+ xdp = &((struct xdp_buff *)ctl->ptr)[i];
+ tap_get_user_xdp(q, xdp);
+ }
+ return 0;
+ }
return tap_get_user(q, ctl ? ctl->ptr : NULL, &m->msg_iter,
m->msg_flags & MSG_DONTWAIT);
--
2.17.1