[RFC/PATCH] skb destructor chaining [was Re: network driver that uses skb destructor]

From: Muli Ben-Yehuda
Date: Thu Jan 01 2004 - 14:11:50 EST


On Tue, Dec 30, 2003 at 09:13:50AM +0000, Adrian Cox wrote:
> On Mon, 2003-12-29 at 17:24, Muli Ben-Yehuda wrote:
>
> > I wrote a patch to allow chaining of skb destructors, which was fairly
> > simple and would allow both the driver and the upper layers to set
> > their destructors. If there's any interet, I could probably resurrect
> > it.
>
> It's interesting to me, for my work with PCI backplane networking, as it
> would eliminate an extra packet copy on receive.

Here's the patch, against 2.6.0. Comments appreciated!

Description: We keep an array of three function pointers for the
destructors, and a counter of registered destructors. When the skb is
destroyted, we call the destructors in reverse order of
registration.

I went with a fixed number of destructors in order to not have to add
an allocation (and having to deal with the allocation possibly
failing...) to a fast path. If greater flexibility in the number of
descriptors is desired, we can use a pool or a slab cache of
destructors. We can also optimize for the common case of one
destructor, and as far as I can see, just two is enough for now - one
for the upper layers, one for the driver.

It's interesting to note how destructor chaining makes the af_unix
code simpler, since it doesn't have to worry about it itself anymore.

Patch compiles, boots, and scp's large files fine. For testing, I
also added a destructor to e100 and verified that every skb allocated
by it gets its destructor called (not included in the patch).

diff -Naur -X /home/muli/w/dontdiff 2.6.0/include/linux/skbuff.h 2.6.0-skb-dest/include/linux/skbuff.h
--- 2.6.0/include/linux/skbuff.h Wed Oct 8 19:55:08 2003
+++ 2.6.0-skb-dest/include/linux/skbuff.h Thu Jan 1 20:22:09 2004
@@ -27,6 +27,7 @@
#include <linux/highmem.h>
#include <linux/poll.h>
#include <linux/net.h>
+#include <asm/hardirq.h>

#define HAVE_ALLOC_SKB /* For the drivers to know */
#define HAVE_ALIGNABLE_SKB /* Ditto 8) */
@@ -147,6 +148,10 @@
skb_frag_t frags[MAX_SKB_FRAGS];
};

+typedef void (*skb_destructor_t) (struct sk_buff* skb);
+
+#define MAX_NUM_SKB_DESTRUCTORS 3
+
/**
* struct sk_buff - socket buffer
* @next: Next buffer in list
@@ -177,7 +182,8 @@
* @data: Data head pointer
* @tail: Tail pointer
* @end: End pointer
- * @destructor: Destruct function
+ * @numdests: The number of registered destructors
+ * @destructors: Destructors function array
* @nfmark: Can be used for communication between hooks
* @nfcache: Cache info
* @nfct: Associated connection, if any
@@ -241,7 +247,9 @@
unsigned short protocol,
security;

- void (*destructor)(struct sk_buff *skb);
+ skb_destructor_t destructors[MAX_NUM_SKB_DESTRUCTORS];
+ size_t numdests;
+
#ifdef CONFIG_NETFILTER
unsigned long nfmark;
__u32 nfcache;
@@ -995,19 +1003,57 @@
return (len < skb->len) ? __pskb_trim(skb, len) : 0;
}

+static inline void skb_init_destructors(struct sk_buff* skb)
+{
+ memset(skb->destructors, 0, sizeof(skb->destructors));
+ skb->numdests = 0;
+}
+
+static inline void skb_call_destructors(struct sk_buff* skb)
+{
+ int in_irq_warn = 1;
+
+ while (skb->numdests--) {
+ skb_destructor_t dest;
+ size_t idx = skb->numdests;
+
+ if (in_irq() && in_irq_warn) {
+ printk(KERN_WARNING "Warning: kfree_skb on "
+ "hard IRQ %p\n", NET_CALLER(skb));
+ in_irq_warn = 0;
+ }
+
+ dest = skb->destructors[idx];
+ BUG_ON(!dest);
+ dest(skb);
+
+ skb->destructors[idx] = NULL;
+ }
+
+ skb->numdests = 0;
+}
+
+static inline void skb_add_destructor(struct sk_buff* skb, skb_destructor_t dest)
+{
+ size_t idx = skb->numdests++;
+
+ BUG_ON(idx >= MAX_NUM_SKB_DESTRUCTORS);
+ BUG_ON(skb->destructors[idx]); /* if it's already set */
+
+ skb->destructors[idx] = dest;
+}
+
/**
* skb_orphan - orphan a buffer
* @skb: buffer to orphan
*
- * If a buffer currently has an owner then we call the owner's
- * destructor function and make the @skb unowned. The buffer continues
+ * If a buffer has any destructors registered, we call them. Then
+ * we make the @skb unowned. The buffer continues
* to exist but is no longer charged to its former owner.
*/
static inline void skb_orphan(struct sk_buff *skb)
{
- if (skb->destructor)
- skb->destructor(skb);
- skb->destructor = NULL;
+ skb_call_destructors(skb);
skb->sk = NULL;
}

diff -Naur -X /home/muli/w/dontdiff 2.6.0/include/net/sock.h 2.6.0-skb-dest/include/net/sock.h
--- 2.6.0/include/net/sock.h Tue Nov 25 05:44:51 2003
+++ 2.6.0-skb-dest/include/net/sock.h Tue Dec 30 16:05:08 2003
@@ -903,14 +903,14 @@
{
sock_hold(sk);
skb->sk = sk;
- skb->destructor = sock_wfree;
+ skb_add_destructor(skb, sock_wfree);
atomic_add(skb->truesize, &sk->sk_wmem_alloc);
}

static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb->sk = sk;
- skb->destructor = sock_rfree;
+ skb_add_destructor(skb, sock_rfree);
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
}

diff -Naur -X /home/muli/w/dontdiff 2.6.0/include/net/tcp.h 2.6.0-skb-dest/include/net/tcp.h
--- 2.6.0/include/net/tcp.h Tue Oct 28 13:11:07 2003
+++ 2.6.0-skb-dest/include/net/tcp.h Tue Dec 30 16:14:39 2003
@@ -1889,7 +1889,7 @@
static inline void tcp_set_owner_r(struct sk_buff *skb, struct sock *sk)
{
skb->sk = sk;
- skb->destructor = tcp_rfree;
+ skb_add_destructor(skb, tcp_rfree);
atomic_add(skb->truesize, &sk->sk_rmem_alloc);
sk->sk_forward_alloc -= skb->truesize;
}
diff -Naur -X /home/muli/w/dontdiff 2.6.0/net/core/skbuff.c 2.6.0-skb-dest/net/core/skbuff.c
--- 2.6.0/net/core/skbuff.c Thu Nov 6 22:23:45 2003
+++ 2.6.0-skb-dest/net/core/skbuff.c Tue Dec 30 16:18:42 2003
@@ -229,12 +229,9 @@
#ifdef CONFIG_XFRM
secpath_put(skb->sp);
#endif
- if(skb->destructor) {
- if (in_irq())
- printk(KERN_WARNING "Warning: kfree_skb on "
- "hard IRQ %p\n", NET_CALLER(skb));
- skb->destructor(skb);
- }
+
+ skb_call_destructors(skb);
+
#ifdef CONFIG_NETFILTER
nf_conntrack_put(skb->nfct);
#ifdef CONFIG_BRIDGE_NETFILTER
@@ -293,7 +290,7 @@
C(priority);
C(protocol);
C(security);
- n->destructor = NULL;
+ skb_init_destructors(n);
#ifdef CONFIG_NETFILTER
C(nfmark);
C(nfcache);
@@ -350,7 +347,7 @@
new->local_df = old->local_df;
new->pkt_type = old->pkt_type;
new->stamp = old->stamp;
- new->destructor = NULL;
+ skb_init_destructors(new);
new->security = old->security;
#ifdef CONFIG_NETFILTER
new->nfmark = old->nfmark;
diff -Naur -X /home/muli/w/dontdiff 2.6.0/net/unix/af_unix.c 2.6.0-skb-dest/net/unix/af_unix.c
--- 2.6.0/net/unix/af_unix.c Wed Oct 1 09:42:11 2003
+++ 2.6.0-skb-dest/net/unix/af_unix.c Thu Jan 1 20:10:01 2004
@@ -1142,7 +1142,6 @@
int i;

scm->fp = UNIXCB(skb).fp;
- skb->destructor = sock_wfree;
UNIXCB(skb).fp = NULL;

for (i=scm->fp->count-1; i>=0; i--)
@@ -1158,7 +1157,6 @@
/* Alas, it calls VFS */
/* So fscking what? fput() had been SMP-safe since the last Summer */
scm_destroy(&scm);
- sock_wfree(skb);
}

static void unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb)
@@ -1167,7 +1165,7 @@
for (i=scm->fp->count-1; i>=0; i--)
unix_inflight(scm->fp->fp[i]);
UNIXCB(skb).fp = scm->fp;
- skb->destructor = unix_destruct_fds;
+ skb_add_destructor(skb, unix_destruct_fds);
scm->fp = NULL;
}


--
Muli Ben-Yehuda
http://www.mulix.org | http://mulix.livejournal.com/

"the nucleus of linux oscillates my world" - gccbot@#offtopic

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/