Re: [patch v2, kernel version 3.2.1] net/ipv4/ip_gre: Ethernetmultipoint GRE over IP

From: Štefan Gula
Date: Mon Jan 16 2012 - 18:12:12 EST


DÅa 16. januÃra 2012 21:28, Eric Dumazet <eric.dumazet@xxxxxxxxx> napÃsal/a:
> Le lundi 16 janvier 2012 Ã 20:45 +0100, Åtefan Gula a Ãcrit :
>> From: Stefan Gula <steweg@xxxxxxxxx
>>
>> This patch is an extension for current Ethernet over GRE
>> implementation, which allows user to create virtual bridge (multipoint
>> VPN) and forward traffic based on Ethernet MAC address information in
>> it. It simulates the Bridge behavior learning mechanism, but instead
>> of learning port ID from which given MAC address comes, it learns IP
>> address of peer which encapsulated given packet. Multicast, Broadcast
>> and unknown-multicast traffic is send over network as multicast
>> encapsulated GRE packet, so one Ethernet multipoint GRE tunnel can be
>> represented as one single virtual switch on logical level and be also
>> represented as one multicast IPv4 address on network level.
>>
>> Signed-off-by: Stefan Gula <steweg@xxxxxxxxx>
>>
>> ---
>>
>> code was merged with latest bridge code and should be easily comparable
>>
>
> Please make sure it applies properly on net-next tree.
>
> That is mandatory.
>
>> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
>> linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h
>> --- linux-3.2.1-orig/include/net/ipip.h    2012-01-12 20:42:45.000000000 +0100
>> +++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100
>> @@ -27,6 +27,14 @@ struct ip_tunnel {
>> Â Â Â __u32 Â Â Â Â Â Â Â Â Â o_seqno; Â Â Â Â/* The last output seqno */
>>    int           hlen;      /* Precalculated GRE header length */
>>    int           mlink;
>> +#ifdef CONFIG_NET_IPGRE_BRIDGE
>> +#define GRETAP_BR_HASH_BITS 8
>> +#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS)
>> +   struct hlist_head    hash[GRETAP_BR_HASH_SIZE];
>> +   spinlock_t       Âhash_lock;
>> +   unsigned long      ageing_time;
>> +   struct timer_list    gc_timer;
>> +#endif
>>
>>    struct ip_tunnel_parm  parms;
>>
>> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
>> linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig
>> --- linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100
>> +++ linux-3.2.1-my/net/ipv4/Kconfig  2012-01-16 12:37:00.000000000 +0100
>> @@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST
>> Â Â Â Â Network), but can be distributed all over the Internet. If you want
>> Â Â Â Â to do that, say Y here and to "IP multicast routing" below.
>>
>> +config NET_IPGRE_BRIDGE
>> + Â Â bool "IP: Ethernet over multipoint GRE over IP"
>> + Â Â depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST
>> + Â Â help
>> + Â Â Â Allows you to use multipoint GRE VPN as virtual switch and interconnect
>> + Â Â Â several L2 endpoints over L3 routed infrastructure. It is useful for
>> + Â Â Â creating multipoint L2 VPNs which can be later used inside bridge
>> + Â Â Â interfaces If you want to use. GRE multipoint L2 VPN feature say Y.
>> +
>> Âconfig IP_MROUTE
>> Â Â Â bool "IP: multicast routing"
>> Â Â Â depends on IP_MULTICAST
>> diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
>> linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c
>> --- linux-3.2.1-orig/net/ipv4/ip_gre.c    Â2012-01-12 20:42:45.000000000 +0100
>> +++ linux-3.2.1-my/net/ipv4/ip_gre.c Â2012-01-16 20:42:03.000000000 +0100
>> @@ -52,6 +52,11 @@
>> Â#include <net/ip6_route.h>
>> Â#endif
>>
>> +#ifdef CONFIG_NET_IPGRE_BRIDGE
>> +#include <linux/jhash.h>
>> +#include <asm/unaligned.h>
>> +#endif
>> +
>> Â/*
>> Â Â Problems & solutions
>> Â Â --------------------
>> @@ -134,6 +139,191 @@ struct ipgre_net {
>> Â Â Â struct net_device *fb_tunnel_dev;
>> Â};
>>
>> +#ifdef CONFIG_NET_IPGRE_BRIDGE
>> + Â Â /*
>> + Â Â Â* This part of code includes codes to enable L2 ethernet
>> + Â Â Â* switch virtualization over IP routed infrastructure with
>> + Â Â Â* utilization of multicast capable endpoint using Ethernet
>> + Â Â Â* over GRE
>> + Â Â Â*
>> + Â Â Â* Author: Stefan Gula
>> + Â Â Â* Signed-off-by: Stefan Gula <steweg@xxxxxxxxx>
>> + Â Â Â*/
>> +struct mac_addr {
>> +   unsigned char  addr[6];
>
> Did I mention : ETH_ALEN ?
>
>> +};
>> +
>> +struct ipgre_tap_bridge_entry {
>> +   struct hlist_node    hlist;
>> + Â Â u32 Â Â Â Â Â Â Â Â Â Â raddr;
>
> Â Â Â Â__be32 raddr ?
>
>> +   struct mac_addr     addr;
>> +   struct net_device    *dev;
>> +   struct rcu_head     rcu;
>> +   unsigned long      updated;
>> +};
>> +
>> +static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly;
>> +static u32 ipgre_salt __read_mostly;
>> +
>> +int __net_init ipgre_tap_bridge_init(void)
>> +{
>> + Â Â ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache",
>> + Â Â Â Â Â Â sizeof(struct ipgre_tap_bridge_entry),
>> + Â Â Â Â Â Â 0,
>> + Â Â Â Â Â Â SLAB_HWCACHE_ALIGN, NULL);
>> + Â Â if (!ipgre_tap_bridge_cache)
>> + Â Â Â Â Â Â return -ENOMEM;
>> + Â Â get_random_bytes(&ipgre_salt, sizeof(ipgre_salt));
>> + Â Â return 0;
>> +}
>> +
>> +void ipgre_tap_bridge_fini(void)
>> +{
>> + Â Â kmem_cache_destroy(ipgre_tap_bridge_cache);
>> +}
>> +
>> +static inline int ipgre_tap_bridge_hash(const unsigned char *mac)
>> +{
>> + Â Â u32 key = get_unaligned((u32 *)(mac + 2));
>> + Â Â return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1);
>> +}
>> +
>> +static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel,
>> + Â Â Â Â Â Â const struct ipgre_tap_bridge_entry *entry)
>> +{
>> + Â Â return time_before_eq(entry->updated + tunnel->ageing_time,
>> + Â Â Â Â Â Â jiffies);
>> +}
>> +
>> +static void ipgre_tap_bridge_rcu_free(struct rcu_head *head)
>> +{
>> + Â Â struct ipgre_tap_bridge_entry *entry
>> + Â Â Â Â Â Â = container_of(head, struct ipgre_tap_bridge_entry, rcu);
>> + Â Â kmem_cache_free(ipgre_tap_bridge_cache, entry);
>> +}
>> +
>> +static inline void ipgre_tap_bridge_delete(struct
>> ipgre_tap_bridge_entry *entry)
>> +{
>> + Â Â hlist_del_rcu(&entry->hlist);
>> + Â Â call_rcu(&entry->rcu, ipgre_tap_bridge_rcu_free);
>> +}
>
> Did I mention : kfree_rcu() ?
>
>> +
>> +
>> +
>> +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find(
>> + Â Â struct hlist_head *head,
>> + Â Â const unsigned char *addr)
>> +{
>> + Â Â struct hlist_node *h;
>> + Â Â struct ipgre_tap_bridge_entry *entry;
>> + Â Â hlist_for_each_entry(entry, h, head, hlist) {
>> + Â Â Â Â Â Â if (!compare_ether_addr(entry->addr.addr, addr))
>> + Â Â Â Â Â Â Â Â Â Â return entry;
>> + Â Â }
>> + Â Â return NULL;
>> +}
>> +
>> +
>> +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find_rcu(
>> + Â Â struct hlist_head *head,
>> + Â Â const unsigned char *addr)
>> +{
>> + Â Â struct hlist_node *h;
>> + Â Â struct ipgre_tap_bridge_entry *entry;
>> + Â Â hlist_for_each_entry_rcu(entry, h, head, hlist) {
>> + Â Â Â Â Â Â if (!compare_ether_addr(entry->addr.addr, addr))
>> + Â Â Â Â Â Â Â Â Â Â return entry;
>> + Â Â }
>> + Â Â return NULL;
>> +}
>> +
>> +static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create(
>> + Â Â Â Â Â Â struct hlist_head *head,
>> + Â Â Â Â Â Â u32 source,
>
> Â Â Â Â__be32 source,
>
>> + Â Â Â Â Â Â const unsigned char *addr, struct net_device *dev)
>> +{
>> + Â Â struct ipgre_tap_bridge_entry *entry;
>> + Â Â entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC);
>> + Â Â if (entry) {
>> + Â Â Â Â Â Â memcpy(entry->addr.addr, addr, ETH_ALEN);
>> + Â Â Â Â Â Â hlist_add_head_rcu(&entry->hlist, head);
>
> Thats bogus.
>
> You must init all entry fields _before_ inserting entry in hash table.
>
>> + Â Â Â Â Â Â entry->raddr = source;
>> + Â Â Â Â Â Â entry->dev = dev;
>> + Â Â Â Â Â Â entry->updated = jiffies;
>> + Â Â }
>> + Â Â return entry;
>> +}
>> +
>> +__be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel,
>> + Â Â const unsigned char *addr)
>> +{
>> + Â Â struct ipgre_tap_bridge_entry *entry;
>> + Â Â entry = __ipgre_tap_bridge_get(tunnel, addr);
>> + Â Â if (entry == NULL)
>> + Â Â Â Â Â Â return 0;
>> + Â Â else
>> + Â Â Â Â Â Â return entry->raddr;
>> +}
>> +
>> +#endif
>> Â/* Tunnel hash table */
>>
>> Â/*
>> @@ -563,6 +753,13 @@ static int ipgre_rcv(struct sk_buff *skb
>>    int  Âoffset = 4;
>> Â Â Â __be16 gre_proto;
>>
>> +#ifdef CONFIG_NET_IPGRE_BRIDGE
>> + Â Â u32 orig_source;
>
> Please run sparse on your code... Â(make C=2 ...)
>
> orig_source is not u32, but __be32
>
>> + Â Â struct hlist_head *head;
>> + Â Â struct ipgre_tap_bridge_entry *entry;
>> + Â Â struct ethhdr *tethhdr;
>> +#endif
>> +
>> Â Â Â if (!pskb_may_pull(skb, 16))
>> Â Â Â Â Â Â Â goto drop_nolock;
>>
>> @@ -659,10 +856,39 @@ static int ipgre_rcv(struct sk_buff *skb
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â tunnel->dev->stats.rx_errors++;
>> Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â goto drop;
>> Â Â Â Â Â Â Â Â Â Â Â }
>> -
>> +#ifdef CONFIG_NET_IPGRE_BRIDGE
>> + Â Â Â Â Â Â Â Â Â Â orig_source = iph->saddr;
>> +#endif
>> Â Â Â Â Â Â Â Â Â Â Â iph = ip_hdr(skb);
>> Â Â Â Â Â Â Â Â Â Â Â skb->protocol = eth_type_trans(skb, tunnel->dev);
>> Â Â Â Â Â Â Â Â Â Â Â skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
>> +#ifdef CONFIG_NET_IPGRE_BRIDGE
>> + Â Â Â Â Â Â Â Â Â Â if (ipv4_is_multicast(tunnel->parms.iph.daddr)) {
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â tethhdr = eth_hdr(skb);
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â if (!is_multicast_ether_addr(
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â tethhdr->h_source)) {
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â head = &tunnel->hash[
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ipgre_tap_bridge_hash(
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â tethhdr->h_source)];
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â entry = ipgre_tap_bridge_find_rcu(head,
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â tethhdr->h_source);
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â if (likely(entry)) {
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â entry->raddr = orig_source;
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â entry->updated = jiffies;
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â } else {
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â spin_lock_bh(&tunnel->hash_lock);
>
> You dont need the _bh() variant here, since we run from softirq handler.
>
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â if (!ipgre_tap_bridge_find(head,
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â tethhdr->h_source))
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â ipgre_tap_bridge_create(
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âhead,
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âorig_source,
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âtethhdr->h_source,
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Âtunnel->dev);
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â spin_unlock_bh(&tunnel->hash_lock);
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â Â }
>> + Â Â Â Â Â Â Â Â Â Â Â Â Â Â }
>> + Â Â Â Â Â Â Â Â Â Â }
>> +#endif
>> Â Â Â Â Â Â Â }
>>
>> Â Â Â Â Â Â Â tstats = this_cpu_ptr(tunnel->dev->tstats);
>> @@ -716,7 +942,19 @@ static netdev_tx_t ipgre_tunnel_xmit(str
>> Â Â Â Â Â Â Â tiph = &tunnel->parms.iph;
>> Â Â Â }
>>
>> +#ifdef CONFIG_NET_IPGRE_BRIDGE
>> + Â Â rcu_read_lock();
>> + Â Â if ((dev->type == ARPHRD_ETHER) &&
>> + Â Â Â Â Â Â ipv4_is_multicast(tunnel->parms.iph.daddr))
>> + Â Â Â Â Â Â dst = ipgre_tap_bridge_get_raddr(tunnel,
>> + Â Â Â Â Â Â Â Â Â Â ((struct ethhdr *)skb->data)->h_dest);
>> + Â Â rcu_read_unlock();
>
> this rcu_read_lock()/rcu_read_unlock() pair should be done in
> ipgre_tap_bridge_get_raddr() instead... so we dont hit this for all
> packets.
>
>> + Â Â if (dst == 0)
>> + Â Â Â Â Â Â dst = tiph->daddr;
>> + Â Â if (dst == 0) {
>> +#else
>> Â Â Â if ((dst = tiph->daddr) == 0) {
>> +#endif
>> Â Â Â Â Â Â Â /* NBMA tunnel */
>>
>
> General comment :
>
> It would be nice this stuff is installed on a new "ip tunnel add"
> option...
>
> Hash table is 2048 bytes long... and an "ip tunnel " option would permit
> to size the hash table eventually, instead of fixed 256 slots.
>
>
>

I agree with you, but for the start of this feature I believe static
slots size is enough here - same limitation is inside the original
linux bridge code. I have merged hopefully all your comments and here
is the newest patch:

diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
linux-3.2.1-orig/include/net/ipip.h linux-3.2.1-my/include/net/ipip.h
--- linux-3.2.1-orig/include/net/ipip.h 2012-01-12 20:42:45.000000000 +0100
+++ linux-3.2.1-my/include/net/ipip.h 2012-01-16 11:17:01.000000000 +0100
@@ -27,6 +27,14 @@ struct ip_tunnel {
__u32 o_seqno; /* The last output seqno */
int hlen; /* Precalculated GRE header length */
int mlink;
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+#define GRETAP_BR_HASH_BITS 8
+#define GRETAP_BR_HASH_SIZE (1 << GRETAP_BR_HASH_BITS)
+ struct hlist_head hash[GRETAP_BR_HASH_SIZE];
+ spinlock_t hash_lock;
+ unsigned long ageing_time;
+ struct timer_list gc_timer;
+#endif

struct ip_tunnel_parm parms;

diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
linux-3.2.1-orig/net/ipv4/Kconfig linux-3.2.1-my/net/ipv4/Kconfig
--- linux-3.2.1-orig/net/ipv4/Kconfig 2012-01-12 20:42:45.000000000 +0100
+++ linux-3.2.1-my/net/ipv4/Kconfig 2012-01-16 12:37:00.000000000 +0100
@@ -211,6 +211,15 @@ config NET_IPGRE_BROADCAST
Network), but can be distributed all over the Internet. If you want
to do that, say Y here and to "IP multicast routing" below.

+config NET_IPGRE_BRIDGE
+ bool "IP: Ethernet over multipoint GRE over IP"
+ depends on IP_MULTICAST && NET_IPGRE && NET_IPGRE_BROADCAST
+ help
+ Allows you to use multipoint GRE VPN as virtual switch and interconnect
+ several L2 endpoints over L3 routed infrastructure. It is useful for
+ creating multipoint L2 VPNs which can be later used inside bridge
+ interfaces If you want to use. GRE multipoint L2 VPN feature say Y.
+
config IP_MROUTE
bool "IP: multicast routing"
depends on IP_MULTICAST
diff -uprN -X linux-3.2.1-orig/Documentation/dontdiff
linux-3.2.1-orig/net/ipv4/ip_gre.c linux-3.2.1-my/net/ipv4/ip_gre.c
--- linux-3.2.1-orig/net/ipv4/ip_gre.c 2012-01-12 20:42:45.000000000 +0100
+++ linux-3.2.1-my/net/ipv4/ip_gre.c 2012-01-17 00:01:17.000000000 +0100
@@ -52,6 +52,11 @@
#include <net/ip6_route.h>
#endif

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+#include <linux/jhash.h>
+#include <asm/unaligned.h>
+#endif
+
/*
Problems & solutions
--------------------
@@ -134,6 +139,184 @@ struct ipgre_net {
struct net_device *fb_tunnel_dev;
};

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ /*
+ * This part of code includes codes to enable L2 ethernet
+ * switch virtualization over IP routed infrastructure with
+ * utilization of multicast capable endpoint using Ethernet
+ * over GRE
+ *
+ * Author: Stefan Gula
+ * Signed-off-by: Stefan Gula <steweg@xxxxxxxxx>
+ */
+struct ipgre_tap_bridge_entry {
+ struct hlist_node hlist;
+ __be32 raddr;
+ unsigned char addr[ETH_ALEN];
+ struct net_device *dev;
+ struct rcu_head rcu;
+ unsigned long updated;
+};
+
+static struct kmem_cache *ipgre_tap_bridge_cache __read_mostly;
+static u32 ipgre_salt __read_mostly;
+
+static int __net_init ipgre_tap_bridge_init(void)
+{
+ ipgre_tap_bridge_cache = kmem_cache_create("ipgre_tap_bridge_cache",
+ sizeof(struct ipgre_tap_bridge_entry),
+ 0,
+ SLAB_HWCACHE_ALIGN, NULL);
+ if (!ipgre_tap_bridge_cache)
+ return -ENOMEM;
+ get_random_bytes(&ipgre_salt, sizeof(ipgre_salt));
+ return 0;
+}
+
+static void ipgre_tap_bridge_fini(void)
+{
+ kmem_cache_destroy(ipgre_tap_bridge_cache);
+}
+
+static inline int ipgre_tap_bridge_hash(const unsigned char *mac)
+{
+ u32 key = get_unaligned((u32 *)(mac + 2));
+ return jhash_1word(key, ipgre_salt) & (GRETAP_BR_HASH_SIZE - 1);
+}
+
+static inline int ipgre_tap_bridge_has_expired(const struct ip_tunnel *tunnel,
+ const struct ipgre_tap_bridge_entry *entry)
+{
+ return time_before_eq(entry->updated + tunnel->ageing_time,
+ jiffies);
+}
+
+static inline void ipgre_tap_bridge_delete(struct
ipgre_tap_bridge_entry *entry)
+{
+ hlist_del_rcu(&entry->hlist);
+ kfree_rcu(entry, rcu);
+}
+
+static void ipgre_tap_bridge_cleanup(unsigned long _data)
+{
+ struct ip_tunnel *tunnel = (struct ip_tunnel *)_data;
+ unsigned long delay = tunnel->ageing_time;
+ unsigned long next_timer = jiffies + tunnel->ageing_time;
+ int i;
+ spin_lock(&tunnel->hash_lock);
+ for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) {
+ struct ipgre_tap_bridge_entry *entry;
+ struct hlist_node *h, *n;
+ hlist_for_each_entry_safe(entry, h, n,
+ &tunnel->hash[i], hlist)
+ {
+ unsigned long this_timer;
+ this_timer = entry->updated + delay;
+ if (time_before_eq(this_timer, jiffies))
+ ipgre_tap_bridge_delete(entry);
+ else if (time_before(this_timer, next_timer))
+ next_timer = this_timer;
+ }
+ }
+ spin_unlock(&tunnel->hash_lock);
+ mod_timer(&tunnel->gc_timer, round_jiffies_up(next_timer));
+}
+
+static void ipgre_tap_bridge_flush(struct ip_tunnel *tunnel)
+{
+ int i;
+ spin_lock_bh(&tunnel->hash_lock);
+ for (i = 0; i < GRETAP_BR_HASH_SIZE; i++) {
+ struct ipgre_tap_bridge_entry *entry;
+ struct hlist_node *h, *n;
+ hlist_for_each_entry_safe(entry, h, n,
+ &tunnel->hash[i], hlist)
+ {
+ ipgre_tap_bridge_delete(entry);
+ }
+ }
+ spin_unlock_bh(&tunnel->hash_lock);
+}
+
+static struct ipgre_tap_bridge_entry *__ipgre_tap_bridge_get(
+ struct ip_tunnel *tunnel, const unsigned char *addr)
+{
+ struct hlist_node *h;
+ struct ipgre_tap_bridge_entry *entry;
+ hlist_for_each_entry_rcu(entry, h,
+ &tunnel->hash[ipgre_tap_bridge_hash(addr)], hlist)
+ {
+ if (!compare_ether_addr(entry->addr, addr)) {
+ if (unlikely(ipgre_tap_bridge_has_expired(tunnel,
+ entry)))
+ break;
+ return entry;
+ }
+ }
+
+ return NULL;
+}
+
+static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find(
+ struct hlist_head *head,
+ const unsigned char *addr)
+{
+ struct hlist_node *h;
+ struct ipgre_tap_bridge_entry *entry;
+ hlist_for_each_entry(entry, h, head, hlist) {
+ if (!compare_ether_addr(entry->addr, addr))
+ return entry;
+ }
+ return NULL;
+}
+
+
+static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_find_rcu(
+ struct hlist_head *head,
+ const unsigned char *addr)
+{
+ struct hlist_node *h;
+ struct ipgre_tap_bridge_entry *entry;
+ hlist_for_each_entry_rcu(entry, h, head, hlist) {
+ if (!compare_ether_addr(entry->addr, addr))
+ return entry;
+ }
+ return NULL;
+}
+
+static struct ipgre_tap_bridge_entry *ipgre_tap_bridge_create(
+ struct hlist_head *head,
+ u32 source,
+ const unsigned char *addr, struct net_device *dev)
+{
+ struct ipgre_tap_bridge_entry *entry;
+ entry = kmem_cache_alloc(ipgre_tap_bridge_cache, GFP_ATOMIC);
+ if (entry) {
+ memcpy(entry->addr, addr, ETH_ALEN);
+ entry->raddr = source;
+ entry->dev = dev;
+ entry->updated = jiffies;
+ hlist_add_head_rcu(&entry->hlist, head);
+ }
+ return entry;
+}
+
+static __be32 ipgre_tap_bridge_get_raddr(struct ip_tunnel *tunnel,
+ const unsigned char *addr)
+{
+ __be32 raddr;
+ struct ipgre_tap_bridge_entry *entry;
+ rcu_read_lock();
+ entry = __ipgre_tap_bridge_get(tunnel, addr);
+ if (entry == NULL)
+ raddr = 0;
+ else
+ raddr = entry->raddr;
+ rcu_read_unlock();
+ return raddr;
+}
+
+#endif
/* Tunnel hash table */

/*
@@ -563,6 +746,13 @@ static int ipgre_rcv(struct sk_buff *skb
int offset = 4;
__be16 gre_proto;

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ __be32 orig_source;
+ struct hlist_head *head;
+ struct ipgre_tap_bridge_entry *entry;
+ struct ethhdr *tethhdr;
+#endif
+
if (!pskb_may_pull(skb, 16))
goto drop_nolock;

@@ -659,10 +849,39 @@ static int ipgre_rcv(struct sk_buff *skb
tunnel->dev->stats.rx_errors++;
goto drop;
}
-
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ orig_source = iph->saddr;
+#endif
iph = ip_hdr(skb);
skb->protocol = eth_type_trans(skb, tunnel->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ if (ipv4_is_multicast(tunnel->parms.iph.daddr)) {
+ tethhdr = eth_hdr(skb);
+ if (!is_multicast_ether_addr(
+ tethhdr->h_source)) {
+ head = &tunnel->hash[
+ ipgre_tap_bridge_hash(
+ tethhdr->h_source)];
+ entry = ipgre_tap_bridge_find_rcu(head,
+ tethhdr->h_source);
+ if (likely(entry)) {
+ entry->raddr = orig_source;
+ entry->updated = jiffies;
+ } else {
+ spin_lock(&tunnel->hash_lock);
+ if (!ipgre_tap_bridge_find(head,
+ tethhdr->h_source))
+ ipgre_tap_bridge_create(
+ head,
+ orig_source,
+ tethhdr->h_source,
+ tunnel->dev);
+ spin_unlock(&tunnel->hash_lock);
+ }
+ }
+ }
+#endif
}

tstats = this_cpu_ptr(tunnel->dev->tstats);
@@ -716,7 +935,17 @@ static netdev_tx_t ipgre_tunnel_xmit(str
tiph = &tunnel->parms.iph;
}

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ if ((dev->type == ARPHRD_ETHER) &&
+ ipv4_is_multicast(tunnel->parms.iph.daddr))
+ dst = ipgre_tap_bridge_get_raddr(tunnel,
+ ((struct ethhdr *)skb->data)->h_dest);
+ if (dst == 0)
+ dst = tiph->daddr;
+ if (dst == 0) {
+#else
if ((dst = tiph->daddr) == 0) {
+#endif
/* NBMA tunnel */

if (skb_dst(skb) == NULL) {
@@ -1209,6 +1438,16 @@ static int ipgre_open(struct net_device
return -EADDRNOTAVAIL;
t->mlink = dev->ifindex;
ip_mc_inc_group(__in_dev_get_rtnl(dev), t->parms.iph.daddr);
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ if (t->dev->type == ARPHRD_ETHER) {
+ INIT_HLIST_HEAD(t->hash);
+ spin_lock_init(&t->hash_lock);
+ t->ageing_time = 300 * HZ;
+ setup_timer(&t->gc_timer, ipgre_tap_bridge_cleanup,
+ (unsigned long) t);
+ mod_timer(&t->gc_timer, jiffies + t->ageing_time);
+ }
+#endif
}
return 0;
}
@@ -1219,6 +1458,12 @@ static int ipgre_close(struct net_device

if (ipv4_is_multicast(t->parms.iph.daddr) && t->mlink) {
struct in_device *in_dev;
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ if (t->dev->type == ARPHRD_ETHER) {
+ ipgre_tap_bridge_flush(t);
+ del_timer_sync(&t->gc_timer);
+ }
+#endif
in_dev = inetdev_by_index(dev_net(dev), t->mlink);
if (in_dev)
ip_mc_dec_group(in_dev, t->parms.iph.daddr);
@@ -1341,6 +1586,12 @@ static int __net_init ipgre_init_net(str
struct ipgre_net *ign = net_generic(net, ipgre_net_id);
int err;

+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ err = ipgre_tap_bridge_init();
+ if (err)
+ goto err_out;
+#endif
+
ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip_tunnel), "gre0",
ipgre_tunnel_setup);
if (!ign->fb_tunnel_dev) {
@@ -1362,6 +1613,10 @@ static int __net_init ipgre_init_net(str
err_reg_dev:
ipgre_dev_free(ign->fb_tunnel_dev);
err_alloc_dev:
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ ipgre_tap_bridge_fini();
+err_out:
+#endif
return err;
}

@@ -1375,6 +1630,9 @@ static void __net_exit ipgre_exit_net(st
ipgre_destroy_tunnels(ign, &list);
unregister_netdevice_many(&list);
rtnl_unlock();
+#ifdef CONFIG_NET_IPGRE_BRIDGE
+ ipgre_tap_bridge_fini();
+#endif
}

static struct pernet_operations ipgre_net_ops = {
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/