[PATCH] Token Ring Source Routing Update for 2.3.39

From: Mike Phillips (phillips@okpcm.com)
Date: Tue Jan 11 2000 - 15:47:22 EST


Attached is an updated version of the source routing patch for token
ring submitted a couple of days ago. The difference is a couple of
clean-ups from the previous patch and updated against the 2.3.39 source
tree.

Mike
Linux Token Ring Project
http://www.linuxtr.net

diff -ur linux-2.3.39.orig/drivers/net/net_init.c linux-2.3.39/drivers/net/net_init.c
--- linux-2.3.39.orig/drivers/net/net_init.c Tue Jan 11 13:48:54 2000
+++ linux-2.3.39/drivers/net/net_init.c Tue Jan 11 13:49:53 2000
@@ -450,6 +450,8 @@
 
         /* New-style flags. */
         dev->flags = IFF_BROADCAST | IFF_MULTICAST ;
+ tr_unlock(dev);
+ dev->rif_cache_lists=NULL;
 }
 
 struct net_device *init_trdev(struct net_device *dev, int sizeof_priv)
@@ -475,6 +477,7 @@
 void unregister_trdev(struct net_device *dev)
 {
         rtnl_lock();
+ tr_free_rif_cache(dev);
         unregister_netdevice(dev);
         rtnl_unlock();
 }
diff -ur linux-2.3.39.orig/include/linux/if_tr.h linux-2.3.39/include/linux/if_tr.h
--- linux-2.3.39.orig/include/linux/if_tr.h Tue Jan 11 13:48:43 2000
+++ linux-2.3.39/include/linux/if_tr.h Tue Jan 11 13:49:53 2000
@@ -1,4 +1,4 @@
-/*
+ /*
  * INET An implementation of the TCP/IP protocol suite for the LINUX
  * operating system. INET is implemented using the BSD Socket
  * interface as the means of communication with the user level.
@@ -9,7 +9,8 @@
  *
  * Author: Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
  * Donald Becker, <becker@super.org>
- * Peter De Schrijver, <stud11@cc4.kuleuven.ac.be>
+ * Peter De Schrijver, <stud11@cc4.kuleuven.ac.be>
+ * D.J. Barrow barrow_dj@yahoo.com
  *
  * This program is free software; you can redistribute it and/or
  * modify it under the terms of the GNU General Public License
@@ -19,46 +20,62 @@
 #ifndef _LINUX_IF_TR_H
 #define _LINUX_IF_TR_H
 
-
 /* IEEE 802.5 Token-Ring magic constants. The frame sizes omit the preamble
    and FCS/CRC (frame check sequence). */
 #define TR_ALEN 6 /* Octets in one ethernet addr */
-#define TR_HLEN (sizeof(struct trh_hdr)+sizeof(struct trllc))
+#define TR_HLEN (sizeof(struct trh_hdr)+sizeof(hdr_8022))
 #define AC 0x10
-#define LLC_FRAME 0x40
-#if 0
-#define ETH_HLEN 14 /* Total octets in header. */
-#define ETH_ZLEN 60 /* Min. octets in frame sans FCS */
-#define ETH_DATA_LEN 1500 /* Max. octets in payload */
-#define ETH_FRAME_LEN 1514 /* Max. octets in frame sans FCS */
-#endif
+#define LLC_FRAME 0x40
 
 
 /* LLC and SNAP constants */
+#define TR_IP_SAP 0xAA
+#define TR_IPX_SAP 0xE0
 #define EXTENDED_SAP 0xAA
 #define UI_CMD 0x03
 
+
 /* This is an Token-Ring frame header. */
-struct trh_hdr {
- __u8 ac; /* access control field */
- __u8 fc; /* frame control field */
- __u8 daddr[TR_ALEN]; /* destination address */
- __u8 saddr[TR_ALEN]; /* source address */
- __u16 rcf; /* route control field */
- __u16 rseg[8]; /* routing registers */
-};
+/* access control field */
+/* frame control field */
+/* destination address */
+/* source address */
+#define TR_HW_HDR \
+__u8 ac; \
+__u8 fc; \
+__u8 daddr[TR_ALEN]; \
+__u8 saddr[TR_ALEN];
+
+typedef struct
+{
+ TR_HW_HDR
+} tr_hw_hdr __attribute__((packed));
+
+typedef struct
+{
+ __u8 saddr[TR_ALEN];
+ unsigned short type;
+} tr_fake_header;
+#define TR_NUM_RSEGS 8
+
+/* route control field */
+/* routing registers */
+#define TR_RIF_INFO \
+__u16 rcf __attribute__((packed)); \
+__u16 rseg[TR_NUM_RSEGS] __attribute__((packed));
+
+typedef struct
+{
+ TR_RIF_INFO
+} tr_rif_info __attribute__((packed));
+#define TR_MAXRIFLEN sizeof(tr_rif_info)
+#define TR_MAX_NUM_HOPS (TR_NUM_RSEGS-1)
+
 
-/* This is an Token-Ring LLC structure */
-struct trllc {
- __u8 dsap; /* destination SAP */
- __u8 ssap; /* source SAP */
- __u8 llc; /* LLC control field */
- __u8 protid[3]; /* protocol id */
- __u16 ethertype; /* ether type field */
-};
 
 /* Token-Ring statistics collection data. */
-struct tr_statistics {
+struct tr_statistics
+{
         unsigned long rx_packets; /* total packets received */
         unsigned long tx_packets; /* total packets transmitted */
         unsigned long rx_bytes; /* total bytes received */
@@ -86,15 +103,173 @@
         unsigned long dummy1;
 };
 
-/* source routing stuff */
 
+#define LLC_8022 \
+__u8 dsap; \
+__u8 ssap; \
+__u8 llc;
+
+#define SNAP_8022 \
+__u8 protid[3]; \
+__u16 ethertype;
+
+typedef struct
+{
+ LLC_8022
+} llc_8022 __attribute__((packed));
+typedef struct
+{
+ SNAP_8022
+} snap_8022 __attribute__((packed));
+
+typedef struct
+{
+ LLC_8022
+ SNAP_8022
+} hdr_8022 __attribute__((packed));
+
+/* for backward compatibility mainly */
+struct trh_hdr
+{
+ TR_HW_HDR
+ TR_RIF_INFO
+} __attribute__((packed));
+
+struct trllc
+{
+ LLC_8022
+ SNAP_8022
+} __attribute__((packed));
+
+#define TR_RCF_FRAME2K 0x20
+/* #define TR_MAXRIFLEN 18 */
+
+/*
+ RFC 1749 stuff
+ */
+/* 0=individual addr 1=group/multicast addr */
+#define TR_IG_BIT 0x80
+/* FAI bit 0=functional address,1= indicates locally administered group address
+*/
+#define TR_FAI_BIT 0x80
+/* 0=universally administered 1=locally administered */
+#define TR_UL_BIT 0x40
+/* This isn't needed to set the broadcast bit in rcfs as the TR_IG_BIT is already set */
+#define IP_MCAST_FUNC_ADDR {0xC0,0x00,0x00,0x04,0x00,0x00}
+
+/* source routing stuff */
 #define TR_RII 0x80
 #define TR_RCF_DIR_BIT 0x80
+#define TR_RCF_LF_MASK 0x70
 #define TR_RCF_LEN_MASK 0x1f00
-#define TR_RCF_BROADCAST 0x8000 /* all-routes broadcast */
-#define TR_RCF_LIMITED_BROADCAST 0xC000 /* single-route broadcast */
-#define TR_RCF_FRAME2K 0x20
-#define TR_RCF_BROADCAST_MASK 0xC000
-#define TR_MAXRIFLEN 18
+#define TR_NORMAL 0x0 /* normal transmit */
+#define TR_NO_RCF 0x1 /* i.e. don't put an rcf in the header */
+#define TR_RCF_ALL_RINGS_BROADCAST 0x8000 /* hit every ring possible ring broadcast */
+#define TR_RCF_LIMITED_BROADCAST 0xC000 /* hit every ring at most once once broadcast */
+#define TR_RCF_BROADCAST_MASK 0xE000
+#define TR_RCF_CACHE_MASK ~(TR_RCF_BROADCAST_MASK)
+
+/* Should be in types.h but ... */
+#ifndef TRUE
+#define TRUE 1
+#endif
+#ifndef FALSE
+#define FALSE 0
+#endif
+#define LOCAL_RING -1
+
+/*
 
+ The format of each Routing Descriptor (RD) field is: ( RFC 1749 )
+
+ octet 1 octet 2
+ +---------------+---------------+
+ |r r r r r r r r r r r r i i i i|
+ +---------------+---------------+
+ <---- ring number ----> <----->
+ ^
+ |
+ bridge number --+ // should be bridge type
+
+ 0 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | B | LTH |D| LF | r |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+*/
+
+typedef struct tr_rif_cache_entry tr_rif_cache_entry;
+/* This list structure is fairly complex ( I got a headache writing it )
+ * ( & don't get put off by all the debug code in tr_add_rif_info. )
+ * It may not add much performance on networks with less than 100 nodes
+ * but it scales very well.
+ * The 2 doubly linked lists nearly completely eliminates any list walking.
+ * The idea behind the lists is as follows
+ * The most recently used list is needed to help
+ * maintain the least recently used list correctly.
+ * The least recently used list is used so that tr_add_rif_info
+ * dosen't have to walk the entire list to find out
+ * which member to replace in the cache when a line is full.
+ * The updated list is used because
+ * most of the packets received in tr_add_rif_info will be at the
+ * top of this list if they received packets from this mac address
+ * in the recent past thus avoiding list walking.
+ * The hint field added to struct sock means that
+ * the no list walking is done on transmitted packets except to
+ * set up the initial hint DJB.
+ * obviously the touched list dosent perform quite as well as
+ * the lru list as the packets coming in are much more random
+ * The hint_idx field is required to tell if a hint is valid
+ * this is done by updating an index each time we free a cache entry.
+ */
+typedef struct
+{
+ tr_rif_cache_entry *head;
+ tr_rif_cache_entry *tail;
+} tr_rif_dlist;
+
+typedef struct
+{
+ tr_rif_cache_entry *next;
+ tr_rif_cache_entry *prev;
+ long timestamp;
+} tr_rif_dentry;
+
+
+enum
+{
+ TR_USED=0,
+ TR_UPDATED,
+ TR_NUMDLISTS
+};
+
+struct tr_rif_cache_lists
+{
+ tr_rif_dlist dlist[TR_NUMDLISTS];
+ int num_entries;
+ int hint_idx;
+};
+
+struct tr_rif_cache_entry
+{
+ tr_rif_dentry dentry[TR_NUMDLISTS];
+ unsigned char addr[TR_ALEN];
+ TR_RIF_INFO
+} __attribute__((packed));
+
+#define RD_TO_RINGNUM(rd) (ntohs(rd)>>4)
+#define RD_TO_BRIDGE_TYPE(rd) (ntohs(rd)&0xf)
+
+#ifdef __KERNEL__
+void tr_free_rif_cache(struct net_device *dev);
+#define TR_DEBUG_LOCKS 0
+#if TR_DEBUG_LOCKS
+extern int tr_lock2(struct device *dev,char *file,int line);
+#define tr_lock(dev) (tr_lock2(dev,__FILE__,__LINE__))
+#else
+#define tr_lock(dev) (test_and_set_bit(1,&dev->tr_lock)==0)
+#endif
+#define tr_unlock(dev) (atomic_set(&dev->tr_lock,0))
+#endif /* __KERNEL__ */
 #endif /* _LINUX_IF_TR_H */
diff -ur linux-2.3.39.orig/include/linux/netdevice.h linux-2.3.39/include/linux/netdevice.h
--- linux-2.3.39.orig/include/linux/netdevice.h Tue Jan 11 13:48:43 2000
+++ linux-2.3.39/include/linux/netdevice.h Tue Jan 11 13:49:53 2000
@@ -173,7 +173,10 @@
  * FIXME: cleanup struct net_device such that network protocol info
  * moves out.
  */
-
+#if CONFIG_TR
+struct tr_rif_cache_lists;
+typedef struct tr_rif_cache_lists tr_rif_cache_lists;
+#endif
 struct net_device
 {
 
@@ -340,6 +343,10 @@
         /* Semi-private data. Keep it at the end of device struct. */
         rwlock_t fastpath_lock;
         struct dst_entry *fastpath[NETDEV_FASTROUTE_HMASK+1];
+#endif
+#if CONFIG_TR
+ atomic_t tr_lock;
+ tr_rif_cache_lists *rif_cache_lists;
 #endif
 };
 
diff -ur linux-2.3.39.orig/include/net/sock.h linux-2.3.39/include/net/sock.h
--- linux-2.3.39.orig/include/net/sock.h Tue Jan 11 13:48:46 2000
+++ linux-2.3.39/include/net/sock.h Tue Jan 11 13:49:53 2000
@@ -466,7 +466,13 @@
         struct proto *prot;
 
         unsigned short shutdown;
-
+#if CONFIG_TR
+ /* used to speed up source routing. */
+ void *hint;
+ /* hint_idx is needed so that we can tell whether the hint is valid */
+ /* i.e. the memory it points to hasn't been freed up */
+ int hint_idx;
+#endif
 #if defined(CONFIG_IPV6) || defined (CONFIG_IPV6_MODULE)
         union {
                 struct ipv6_pinfo af_inet6;
diff -ur linux-2.3.39.orig/net/802/sysctl_net_802.c linux-2.3.39/net/802/sysctl_net_802.c
--- linux-2.3.39.orig/net/802/sysctl_net_802.c Tue Jan 11 13:48:49 2000
+++ linux-2.3.39/net/802/sysctl_net_802.c Tue Jan 11 13:49:53 2000
@@ -19,9 +19,9 @@
 };
 
 #ifdef CONFIG_TR
-extern int sysctl_tr_rif_timeout;
+extern int sysctl_tr_rif_linked_list_width;
 ctl_table tr_table[] = {
- {NET_TR_RIF_TIMEOUT, "rif_timeout", &sysctl_tr_rif_timeout, sizeof(int),
+ {NET_TR_RIF_TIMEOUT, "rif_linked_list_width", &sysctl_tr_rif_linked_list_width, sizeof(int),
          0644, NULL, &proc_dointvec},
         {0}
 };
diff -ur linux-2.3.39.orig/net/802/tr.c linux-2.3.39/net/802/tr.c
--- linux-2.3.39.orig/net/802/tr.c Tue Jan 11 13:48:49 2000
+++ linux-2.3.39/net/802/tr.c Tue Jan 11 13:49:53 2000
@@ -6,6 +6,19 @@
  * as published by the Free Software Foundation; either version
  * 2 of the License, or (at your option) any later version.
  *
+ * May'99 D.J Barrow barrow_dj@yahoo.com
+ * added code cleanups, added smp safe locking
+ * removed broken sti/cli stuff, fixed
+ * race conditions & added limiting to the size of the
+ * rif_table.
+ * put rif caches in device structure where they should be
+ * to avoid smp lock collisions if a few token ring devices
+ * are going concurrently.
+ * Added hint field to sock struct & now using 3 linked lists
+ * to minimise linked list walking see if_tr.h for info.
+ * Added correct MTU fixups to transmitted rifs so that
+ * Changing the MTU size is supported better.
+ *
  * Fixes: 3 Feb 97 Paul Norton <pnorton@cts.com> Minor routing fixes.
  * Added rif table to /proc/net/tr_rif and rif timeout to
  * /proc/sys/net/token-ring/rif_timeout.
@@ -30,288 +43,457 @@
 #include <linux/trdevice.h>
 #include <linux/skbuff.h>
 #include <linux/errno.h>
+#include <linux/string.h>
 #include <linux/timer.h>
 #include <linux/net.h>
 #include <linux/proc_fs.h>
 #include <linux/init.h>
 #include <net/arp.h>
+#include <asm/atomic.h>
+#include <asm/bitops.h>
+#include <linux/if_tr.h>
+#include <net/sock.h>
 
-static void tr_source_route(struct sk_buff *skb, struct trh_hdr *trh, struct net_device *dev);
-static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev);
-static void rif_check_expire(unsigned long dummy);
 
-#define TR_SR_DEBUG 0
 
-typedef struct rif_cache_s *rif_cache;
 
-/*
- * Each RIF entry we learn is kept this way
- */
-
-struct rif_cache_s {
- unsigned char addr[TR_ALEN];
- int iface;
- __u16 rcf;
- __u16 rseg[8];
- rif_cache next;
- unsigned long last_used;
- unsigned char local_ring;
-};
-
-#define RIF_TABLE_SIZE 32
-
-/*
- * We hash the RIF cache 32 ways. We do after all have to look it
- * up a lot.
- */
-
-rif_cache rif_table[RIF_TABLE_SIZE]={ NULL, };
 
-static spinlock_t rif_lock = SPIN_LOCK_UNLOCKED;
+#define TR_SR_DEBUG 0
+#define TR_SR_SANITY 0
+#define TR_DEBUG_TIME 0
+#define TR_SR_BROADCAST_DEBUG 0
+#define TR_SR_LISTPERF 0 /* Compile with this on if dubious about my claims DJB */
+#define TR_RIF_BUFF_SIZE 64
+#define TR_RIF_CACHE_LIST_SIZE 64
+#define TR_RIF_LINKED_LIST_WIDTH 8
+#if 0
+#define TR_RIF_CACHE_LIST_SIZE 4
+#define TR_RIF_LINKED_LIST_WIDTH 4
+#endif
+#if TR_DEBUG_LOCKS
 
-#define RIF_TIMEOUT 60*10*HZ
-#define RIF_CHECK_INTERVAL 60*HZ
+char lockbuff[256];
 
-/*
- * Garbage disposal timer.
- */
-
-static struct timer_list rif_timer;
+int tr_lock2(struct net_device *dev,char *file,int line)
+{
+ if(test_and_set_bit(1,&dev->tr_lock)==0) {
+ sprintf(lockbuff,"lockbuff %s %d\n",file,line);
+ return(TRUE);
+ }
+ return(FALSE);
+}
+#endif
 
-int sysctl_tr_rif_timeout = RIF_TIMEOUT;
+int sysctl_tr_rif_linked_list_width=TR_RIF_LINKED_LIST_WIDTH;
 
 /*
  * Put the headers on a token ring packet. Token ring source routing
  * makes this a little more exciting than on ethernet.
  */
-
-int tr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
- void *daddr, void *saddr, unsigned len)
-{
- struct trh_hdr *trh;
- int hdr_len;
 
- /*
- * Add the 802.2 SNAP header if IP as the IPv4 code calls
- * dev->hard_header directly.
- */
- if (type == ETH_P_IP || type == ETH_P_ARP)
+static hdr_8022 hdr_8022list[] =
+{
         {
- struct trllc *trllc=(struct trllc *)(trh+1);
+ EXTENDED_SAP, /* FOR ETH_P_IP & ARP */
+ EXTENDED_SAP,
+ UI_CMD,
+ {0,0,0},
+ },
+};
 
- hdr_len = sizeof(struct trh_hdr) + sizeof(struct trllc);
- trh = (struct trh_hdr *)skb_push(skb, hdr_len);
- trllc = (struct trllc *)(trh+1);
- trllc->dsap = trllc->ssap = EXTENDED_SAP;
- trllc->llc = UI_CMD;
- trllc->protid[0] = trllc->protid[1] = trllc->protid[2] = 0x00;
- trllc->ethertype = htons(type);
- }
- else
- {
- hdr_len = sizeof(struct trh_hdr);
- trh = (struct trh_hdr *)skb_push(skb, hdr_len);
- }
+void *tr_alloc(size_t size)
+{
+ void *retval=kmalloc(size,GFP_ATOMIC);
 
- trh->ac=AC;
- trh->fc=LLC_FRAME;
+ if(retval)
+ memset(retval,0,size);
+ return(retval);
+}
 
- if(saddr)
- memcpy(trh->saddr,saddr,dev->addr_len);
- else
- memcpy(trh->saddr,dev->dev_addr,dev->addr_len);
+/* N.B. this hash will still work with a source routed address */
+/* as the and operation masks this out. */
+__inline__ tr_rif_cache_lists *tr_hash(struct net_device *dev,
+ __u8 *macaddr)
+{
+ __u16 hash=(*((__u16 *)&macaddr[0])^*((__u16 *)&macaddr[2])
+ ^*((__u16 *)&macaddr[4]))&(TR_RIF_CACHE_LIST_SIZE-1);
+ return(&dev->rif_cache_lists[hash]);
+}
 
- /*
- * Build the destination and then source route the frame
- */
-
- if(daddr)
- {
- memcpy(trh->daddr,daddr,dev->addr_len);
- tr_source_route(skb,trh,dev);
- return(hdr_len);
- }
 
- return -hdr_len;
-}
-
 /*
- * A neighbour discovery of some species (eg arp) has completed. We
- * can now send the packet.
- */
-
-int tr_rebuild_header(struct sk_buff *skb)
+ * Sorry this is so complex, if you have the misfortune of having to debug this
+ * try preprocessing it first ( gcc -E ) DJB.
+ * If I didn't do it using macros I'd end up installing the same bug in several places
+ * P.S. gcc isn't stupid the stuff compiles good.
+ */
+#if TR_SR_SANITY
+/* My big ball of wax to check the list. */
+void tr_sanity_check_list(tr_rif_cache_lists *list,int index,char *name,int delta)
 {
- struct trh_hdr *trh=(struct trh_hdr *)skb->data;
- struct trllc *trllc=(struct trllc *)(skb->data+sizeof(struct trh_hdr));
- struct net_device *dev = skb->dev;
-
- /*
- * FIXME: We don't yet support IPv6 over token rings
- */
-
- if(trllc->ethertype != htons(ETH_P_IP)) {
- printk("tr_rebuild_header: Don't know how to resolve type %04X addresses ?\n",(unsigned int)htons(trllc->ethertype));
- return 0;
+ tr_rif_cache_entry *entry,*prev_entry=NULL,*next_entry,*entry2;
+ tr_rif_dlist *dlist=&list->dlist[index];
+ tr_rif_dentry *dentry,*next_dentry,*prev_dentry;
+ int entrycnt,num_entries=list->num_entries;
+ long now=jiffies;
+ int diff;
+ tr_rif_cache_entry *entry_stack[TR_RIF_LINKED_LIST_WIDTH+16];
+
+ if(delta==0) {
+ if(num_entries==0) {
+ if(list->dlist[0].head||list->dlist[0].tail||
+ list->dlist[1].head||list->dlist[1].tail)
+ panic("%s num_entries=0 & list not empty list=%p\n",name,list);
+ }
+ if(num_entries==1) {
+ if(list->dlist[0].head==NULL||
+ list->dlist[0].head!=list->dlist[0].tail||
+ list->dlist[0].tail!=list->dlist[1].head||
+ list->dlist[1].head!=list->dlist[1].tail)
+ panic("%s num_entries=1 & list inconsistent list=%p\n",name,list);
+ }
         }
-
-#ifdef CONFIG_INET
- if(arp_find(trh->daddr, skb)) {
- return 1;
+ for(entry=dlist->head,entrycnt=0;entry&&
+ entrycnt<=(num_entries+3)
+ ;entry=next_entry,entrycnt++) {
+ entry_stack[entrycnt]=entry;
+ dentry=&entry->dentry[index];
+ next_entry=dentry->next;
+ if(next_entry) {
+ next_dentry=&next_entry->dentry[index];
+ if(next_dentry->prev!=entry)
+ panic("%s index=%d doubly linked list inconsistency forward traversal dlist=%p entry=%p\n",name,index,dlist,entry);
+ if((now-dentry->timestamp)>((now-next_dentry->timestamp)))
+ panic("%s index=%d timestamp inconsistency dlist=%p\n",name,index,dlist);
+ }
+ prev_entry=entry;
         }
- else
-#endif
- {
- tr_source_route(skb,trh,dev);
- return 0;
+/* Have to allow for adding deleting entries */
+ diff=entrycnt-num_entries;
+ if(diff<-delta||diff>delta)
+ panic("%s index=%d entrycnt %d expected %d dlist=%p forward traversal\n",
+ name,index,entrycnt,num_entries,dlist);
+ next_entry=NULL;
+ for(entry=dlist->tail;entry&&
+ entrycnt<=(num_entries+1)
+ ;entry=prev_entry) {
+ if(entry!=entry_stack[--entrycnt])
+ panic("%s entrystack detected a fault dlist=%p entrycnt=%d"
+ "entry=%p entrystack[entrycnt]=%p\n",
+ name,dlist,entrycnt,entry,entry_stack[entrycnt]);
+ dentry=&entry->dentry[index];
+ prev_entry=dentry->prev;
+ if(prev_entry) {
+ prev_dentry=&prev_entry->dentry[index];
+ if(prev_dentry->next!=entry)
+ panic("%s index=%d doubly linked list inconsistency reverse traversal dlist=%p entry=%p\n",name,index,dlist,entry);
+ }
+ next_entry=entry;
         }
+ for(entry=dlist->head;entry;entry=entry->dentry[index].next)
+ for(entry2=dlist->head;entry2;entry2=entry2->dentry[index].next)
+ if(entry!=entry2&&memcmp(entry->addr,entry2->addr,TR_ALEN)==0)
+ panic("%s index=%d duplicate mac addrs dlist=%p\n"
+ "entry1=%p entry2=%p %02X:%02X:%02X:%02X:%02X:%02X\n"
+ ,name,index,dlist,
+ entry,entry2,
+ entry->addr[0],entry->addr[1],
+ entry->addr[2],entry->addr[3],
+ entry->addr[4],entry->addr[5]);
 }
-
-/*
- * Some of this is a bit hackish. We intercept RIF information
- * used for source routing. We also grab IP directly and don't feed
- * it via SNAP.
- */
-
-unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev)
-{
-
- struct trh_hdr *trh=(struct trh_hdr *)skb->data;
- struct trllc *trllc;
- unsigned riflen=0;
-
- skb->mac.raw = skb->data;
-
- if(trh->saddr[0] & TR_RII)
- riflen = (ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
+#endif
 
- trllc = (struct trllc *)(skb->data+sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
+#define TR_FIX_PREV_ENTRY_COMMON(dnext) \
+prev_entry=dentry->prev; \
+if(prev_entry) { \
+ prev_dentry=&prev_entry->dentry[index]; \
+ prev_dentry->next=(dnext); \
+}
+
+#define TR_REMOVE_DENTRY_COMMON \
+TR_FIX_PREV_ENTRY_COMMON(dentry->next) \
+next_entry=dentry->next; \
+if(next_entry) { \
+ next_dentry=&next_entry->dentry[index]; \
+ next_dentry->prev=prev_entry; \
+}
 
- skb_pull(skb,sizeof(struct trh_hdr)-TR_MAXRIFLEN+riflen);
+#define TR_ADD_TO_HEAD_COMMON \
+next_entry=dlist->head; \
+if(next_entry) { \
+ next_dentry=&next_entry->dentry[index]; \
+ next_dentry->prev=entry; \
+} \
+else \
+ dlist->tail=entry; \
+dentry->next=next_entry; \
+dentry->prev=NULL; \
+dlist->head=entry;
 
- if(*trh->daddr & 0x80)
- {
- if(!memcmp(trh->daddr,dev->broadcast,TR_ALEN))
- skb->pkt_type=PACKET_BROADCAST;
- else
- skb->pkt_type=PACKET_MULTICAST;
- }
- else if ( (trh->daddr[0] & 0x01) && (trh->daddr[1] & 0x00) && (trh->daddr[2] & 0x5E))
- {
- skb->pkt_type=PACKET_MULTICAST;
+__inline__ void tr_make_list_head(tr_rif_cache_lists *list,tr_rif_cache_entry *entry,int index)
+{
+ tr_rif_cache_entry *prev_entry,*next_entry;
+ tr_rif_dlist *dlist=&list->dlist[index];
+ tr_rif_dentry *dentry=&entry->dentry[index],*next_dentry,*prev_dentry;
+
+ if(dlist->head!=entry) {
+ TR_REMOVE_DENTRY_COMMON
+ if(dlist->tail==entry)
+ dlist->tail=prev_entry;
+ TR_ADD_TO_HEAD_COMMON
         }
- else if(dev->flags & IFF_PROMISC)
- {
- if(memcmp(trh->daddr, dev->dev_addr, TR_ALEN))
- skb->pkt_type=PACKET_OTHERHOST;
+#if TR_SR_SANITY
+ tr_sanity_check_list(list,index,"tr_make_list_head",0);
+#endif
+}
+
+__inline__ tr_rif_cache_entry *tr_remove_tail(tr_rif_cache_lists *list,int index)
+{
+ tr_rif_dlist *dlist=&list->dlist[index];
+ tr_rif_dentry *dentry,*prev_dentry;
+ tr_rif_cache_entry *entry=dlist->tail,*prev_entry;
+#if TR_SR_SANITY
+ tr_sanity_check_list(list,index,"tr_remove_tail before",1);
+#endif
+ if(entry) {
+ dentry=&entry->dentry[index];
+ TR_FIX_PREV_ENTRY_COMMON(NULL)
+ dlist->tail=prev_entry;
+ if(dlist->head==entry)
+ dlist->head=NULL;
         }
 
- if ((skb->pkt_type != PACKET_BROADCAST) &&
- (skb->pkt_type != PACKET_MULTICAST))
- tr_add_rif_info(trh,dev) ;
+#if TR_SR_SANITY
+ tr_sanity_check_list(list,index,"tr_remove_tail after",1);
+#endif
+ return(entry);
+}
+__inline__ void tr_remove_entry(tr_rif_cache_lists *list,tr_rif_cache_entry *entry,int index)
+{
+ tr_rif_cache_entry *prev_entry,*next_entry;
+ tr_rif_dlist *dlist=&list->dlist[index];
+ tr_rif_dentry *dentry=&entry->dentry[index],*prev_dentry,*next_dentry;
+
+ TR_REMOVE_DENTRY_COMMON
+ if(dlist->head==entry)
+ dlist->head=next_entry;
+ if(dlist->tail==entry)
+ dlist->tail=prev_entry;
+#if TR_SR_SANITY
+ tr_sanity_check_list(list,index,"tr_remove_entry",1);
+#endif
+}
 
- /*
- * Strip the SNAP header from ARP packets since we don't
- * pass them through to the 802.2/SNAP layers.
- */
+__inline__ void tr_add_to_head(tr_rif_cache_lists *list,tr_rif_cache_entry *entry,int index,long now)
+{
+ tr_rif_dlist *dlist=&list->dlist[index];
+ tr_rif_dentry *dentry=&entry->dentry[index],*next_dentry;
+ tr_rif_cache_entry *next_entry;
+
+ TR_ADD_TO_HEAD_COMMON
+ if(dlist->tail==NULL)
+ dlist->tail=entry;
+ dentry->timestamp=now;
+#if TR_SR_SANITY
+ tr_sanity_check_list(list,index,"tr_add_to_head",1);
+#endif
+}
 
- if (trllc->dsap == EXTENDED_SAP &&
- (trllc->ethertype == ntohs(ETH_P_IP) ||
- trllc->ethertype == ntohs(ETH_P_ARP)))
- {
- skb_pull(skb, sizeof(struct trllc));
- return trllc->ethertype;
- }
+__inline__ void tr_add_to_tail(tr_rif_cache_lists *list,tr_rif_cache_entry *entry,int index,long now)
+{
+ tr_rif_dlist *dlist=&list->dlist[index];
+ tr_rif_dentry *dentry=&entry->dentry[index],*prev_dentry;
+ tr_rif_cache_entry *prev_entry;
+
+ prev_entry=dlist->tail;
+ if(prev_entry) {
+ prev_dentry=&prev_entry->dentry[index];
+ prev_dentry->next=entry;
+ dentry->timestamp=prev_dentry->timestamp;
+ }
+ else {
+ dlist->head=entry;
+ dentry->timestamp=now;
+ }
+ dentry->prev=prev_entry;
+ dentry->next=NULL;
+ dlist->tail=entry;
+#if TR_SR_SANITY
+ tr_sanity_check_list(list,index,"tr_add_to_tail",1);
+#endif
+}
+/*
+ From RFC 1042
+ 0 1
+ 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ | B | LTH |D| LF | r |
+ +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+LF - Largest Frame: 3 bits
+
+The LF bits specify the maximum MTU supported by all
+bridges along a specific route. All multi-ring broadcast
+frames should be transmitted with a value at least as
+large as the supported MTU. The values used are:
+
+LF (binary) MAC MTU IP MTU
+
+ 000 552 508
+ 001 1064 1020
+ 010 2088 2044
+ 011 4136 4092
+ 100 8232 8188
+
+Even though the netdevice.h header file specifies
+dev->mtu as the interface's mtu it sets ethernet's mtu
+as 1500 as opposed to 1516 the size of an ethernet frame.
+Therefore I'm taking MTU as being IP MTU.
+
+I added this code as I saw on mailing lists people changing
+MTU sizes.
+*/
 
- return ntohs(ETH_P_802_2);
+__inline__ __u16 tr_set_rcf_framesize(struct net_device *dev)
+{
+ unsigned mtu=dev->mtu;
+
+ if(mtu>=8188)
+ return(0x40);
+ if(mtu>=4092)
+ return(0x30);
+ if(mtu>=/*2044*/2000) /* to keep compatible with our current implementation */
+ return(0x20);
+ if(mtu>=1020)
+ return(0x10);
+ return(0x0);
 }
 
 /*
- * We try to do source routing...
- */
 
-static void tr_source_route(struct sk_buff *skb,struct trh_hdr *trh,struct net_device *dev)
+ *FIXME* I added this code in the hope that someone
+ would add code such that each source routed destination
+ can have a different MTU as recommended by rfc1042.
+ Currently we are ramming 2K packets down ethernet drivers throats
+ across token ring bridges,
+ I presume 802.2 compatible ethernet drivers are designed to put up
+ with this but we will break some of them if we start
+ ramming 8k packets down their throats.
+ Hopefully some nice thoughtful person will figure this one out
+ include/net/dst.h has a pmtu field
+ include/linux/route.h has a rt_mtu field
+ maybe it would be a starting point to see what these do.
+ */
+__inline__ unsigned tr_get_rcf_mtu(__u16 rcf)
 {
- int i, slack;
- unsigned int hash;
- rif_cache entry;
- unsigned char *olddata;
- unsigned char mcast_func_addr[] = {0xC0,0x00,0x00,0x04,0x00,0x00};
-
- spin_lock_bh(&rif_lock);
+ rcf=ntohs(rcf)&TR_RCF_LF_MASK;
 
- /*
- * Broadcasts are single route as stated in RFC 1042
- */
- if( (!memcmp(&(trh->daddr[0]),&(dev->broadcast[0]),TR_ALEN)) ||
- (!memcmp(&(trh->daddr[0]),&(mcast_func_addr[0]), TR_ALEN)) )
- {
- trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)
- | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
- trh->saddr[0]|=TR_RII;
+ switch(rcf) {
+ case 0x40:
+ return(8188);
+ case 0x30:
+ return(4092);
+ case 0x20:
+ return(/*2044*/2000); /* to keep compatible with our current implementation */
+ case 0x10:
+ return(1020);
+ default:
+ return(508); /* Play it safe */
         }
- else
- {
- for(i=0,hash=0;i<TR_ALEN;hash+=trh->daddr[i++]);
- hash&=RIF_TABLE_SIZE-1;
- /*
- * Walk the hash table and look for an entry
- */
- for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->daddr[0]),TR_ALEN);entry=entry->next);
+}
 
+/*
+ * We try to do source routing...
+ */
+int tr_add_rif(struct sk_buff *skb,__u8 *daddr,struct net_device *dev)
+{
+ int rcflen=-1;
+ tr_rif_cache_lists *rif_cache_listtop;
+ tr_rif_cache_entry *entry;
+ struct sock *sk=skb->sk;
+ tr_rif_info *rif;
+#if TR_SR_LISTPERF
+ int perfcnt;
+#endif
+ if(tr_lock(dev)) {
+ rif_cache_listtop=tr_hash(dev,daddr);
+ entry=((sk&&rif_cache_listtop->hint_idx==sk->hint_idx) ? sk->hint:NULL);
+ if(entry==NULL||memcmp(entry->addr,daddr,TR_ALEN)) {
+ /*
+ * Walk the hash table and look for an entry
+ */
+ for(
+#if TR_SR_LISTPERF
+ perfcnt=0,
+#endif
+ entry=rif_cache_listtop->dlist[TR_USED].head;entry &&
+ memcmp(entry->addr,daddr,TR_ALEN);entry=entry->dentry[TR_USED].next
+#if TR_SR_LISTPERF
+ ,perfcnt++
+#endif
+ );
+ if(entry) {
+ if(sk) {
+ sk->hint=entry;
+ sk->hint_idx=rif_cache_listtop->hint_idx;
+ }
+ }
+#if TR_SR_LISTPERF
+ printk("tr_rif num entries walked=%d num entries=%d\n",perfcnt,
+ rif_cache_listtop->num_entries);
+#endif
+ }
+#if TR_SR_DEBUG
+ else
+ printk("Hint worked horray\n");
+#endif
                 /*
                  * If we found an entry we can route the frame.
                  */
- if(entry)
- {
+ if(entry) {
 #if TR_SR_DEBUG
-printk("source routing for %02X %02X %02X %02X %02X %02X\n",trh->daddr[0],
- trh->daddr[1],trh->daddr[2],trh->daddr[3],trh->daddr[4],trh->daddr[5]);
+ printk("source routing for %02X:%02X:%02X:%02X:%02X:%02X\n",daddr[0],
+ daddr[1],daddr[2],daddr[3],daddr[4],daddr[5]);
 #endif
- if(!entry->local_ring && (ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8)
- {
- trh->rcf=entry->rcf;
- memcpy(&trh->rseg[0],&entry->rseg[0],8*sizeof(unsigned short));
- trh->rcf^=htons(TR_RCF_DIR_BIT);
- trh->rcf&=htons(0x1fff); /* Issam Chehab <ichehab@madge1.demon.co.uk> */
-
- trh->saddr[0]|=TR_RII;
+ rcflen=(ntohs(entry->rcf) & TR_RCF_LEN_MASK) >> 8;
+ if(rcflen) {
+ rif=(tr_rif_info *)skb_push(skb,rcflen);
+ memcpy(rif,&entry->rcf,rcflen);
+ rif->rcf=htons((rif->rcf & __constant_htons(TR_RCF_DIR_BIT) ? 0:TR_RCF_DIR_BIT)|
+ tr_set_rcf_framesize(dev))|(rif->rcf & __constant_htons(TR_RCF_LEN_MASK));
 #if TR_SR_DEBUG
                                 printk("entry found with rcf %04x\n", entry->rcf);
- }
- else
- {
- printk("entry found but without rcf length, local=%02x\n", entry->local_ring);
 #endif
                         }
- entry->last_used=jiffies;
- }
- else
- {
- /*
- * Without the information we simply have to shout
- * on the wire. The replies should rapidly clean this
- * situation up.
- */
- trh->rcf=htons((((sizeof(trh->rcf)) << 8) & TR_RCF_LEN_MASK)
- | TR_RCF_FRAME2K | TR_RCF_LIMITED_BROADCAST);
- trh->saddr[0]|=TR_RII;
-#if TR_SR_DEBUG
- printk("no entry in rif table found - broadcasting frame\n");
-#endif
+ /* Are we the most recently used ??, if not fixup. */
+ tr_make_list_head(rif_cache_listtop,entry,TR_USED);
+ entry->dentry[TR_USED].timestamp=jiffies;
                 }
+ tr_unlock(dev);
         }
+ return(rcflen);
+}
 
- /* Compress the RIF here so we don't have to do it in the driver(s) */
- if (!(trh->saddr[0] & 0x80))
- slack = 18;
- else
- slack = 18 - ((ntohs(trh->rcf) & TR_RCF_LEN_MASK)>>8);
- olddata = skb->data;
- spin_unlock_bh(&rif_lock);
 
- skb_pull(skb, slack);
- memmove(skb->data, olddata, sizeof(struct trh_hdr) - slack);
+int sprintfbridgeinfo(char *buffer,tr_rif_info *rifinfo)
+{
+ int len=0;
+ int bridgecnt,curr_bridgeno;
+ int segment,bridge_type;
+
+ len=sprintf(buffer,"%04X", ntohs(rifinfo->rcf));
+ bridgecnt=((ntohs(rifinfo->rcf) & TR_RCF_LEN_MASK) >> 9)-2;
+ for(curr_bridgeno = 0; curr_bridgeno <= bridgecnt; curr_bridgeno++) {
+ if(curr_bridgeno==0) {
+ segment=RD_TO_RINGNUM(rifinfo->rseg[curr_bridgeno]);
+ len+=sprintf(buffer+len," %03X",segment);
+ }
+ else {
+ segment=RD_TO_RINGNUM(rifinfo->rseg[curr_bridgeno]);
+ bridge_type=RD_TO_BRIDGE_TYPE(rifinfo->rseg[curr_bridgeno-1]);
+ len+=sprintf(buffer+len,"-%01X-%03X",bridge_type,segment);
+ }
+ }
+ return(len);
 }
 
 /*
@@ -319,134 +501,336 @@
  * routing.
  */
  
-static void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
+void tr_add_rif_info(struct trh_hdr *trh, struct net_device *dev)
 {
- int i;
- unsigned int hash, rii_p = 0;
- rif_cache entry;
-
-
- spin_lock_bh(&rif_lock);
-
- /*
- * Firstly see if the entry exists
- */
-
- if(trh->saddr[0] & TR_RII)
- {
- trh->saddr[0]&=0x7f;
- if (((ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8) > 2)
- {
- rii_p = 1;
- }
- }
 
- for(i=0,hash=0;i<TR_ALEN;hash+=trh->saddr[i++]);
- hash&=RIF_TABLE_SIZE-1;
- for(entry=rif_table[hash];entry && memcmp(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);entry=entry->next);
+ tr_rif_cache_lists *rif_cache_listtop=NULL;
+ tr_rif_cache_entry *entry=NULL,*delete_entry;
+ long now;
+ __u8 trh_saddr0;
+ int rcflen=0;
+ int fixupdated=FALSE;
+#if TR_SR_LISTPERF
+ int perfcnt;
+#endif
 
- if(entry==NULL)
- {
 #if TR_SR_DEBUG
-printk("adding rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
- trh->saddr[0],trh->saddr[1],trh->saddr[2],
- trh->saddr[3],trh->saddr[4],trh->saddr[5],
- ntohs(trh->rcf));
+ char *debugstr;
+#endif
+#if TR_SR_BROADCAST_DEBUG
+ char broadcastbuff[TR_RIF_BUFF_SIZE];
+#endif
+ if(tr_lock(dev)) {
+ /* is something else playing with the list */
+ if(trh->saddr[0] & TR_RII) {
+ rcflen=((ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8);
+#if TR_SR_BROADCAST_DEBUG
+ /* As can be seen from this debug code
+ * it is perfectly valid to allow broadcast
+ * frames into the routing table as they have
+ * perfectly valid rifs.
+ */
+ if(ntohs(trh->rcf)&TR_RCF_BROADCAST_MASK) {
+ sprintfbridgeinfo(broadcastbuff,(tr_rif_info *)&trh->rcf);
+ printk("broadcast received from %02X:%02X:%02X:%02X:%02X:%02X rif=%s\n",
+ trh->saddr[0],trh->saddr[1],trh->saddr[2],
+ trh->saddr[3],trh->saddr[4],trh->saddr[5],broadcastbuff);
+ }
 #endif
- /*
- * Allocate our new entry. A failure to allocate loses
- * use the information. This is harmless.
- *
- * FIXME: We ought to keep some kind of cache size
- * limiting and adjust the timers to suit.
- */
- entry=kmalloc(sizeof(struct rif_cache_s),GFP_ATOMIC);
-
- if(!entry)
- {
- printk(KERN_DEBUG "tr.c: Couldn't malloc rif cache entry !\n");
- spin_unlock_bh(&rif_lock);
- return;
- }
-
- memcpy(&(entry->addr[0]),&(trh->saddr[0]),TR_ALEN);
- entry->iface = dev->ifindex;
- entry->next=rif_table[hash];
- entry->last_used=jiffies;
- rif_table[hash]=entry;
-
- if (rii_p)
- {
- entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
- memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
- entry->local_ring = 0;
- trh->saddr[0]|=TR_RII; /* put the routing indicator back for tcpdump */
                 }
- else
- {
- entry->local_ring = 1;
+ if(dev->rif_cache_lists==NULL) {
+ if(rcflen<6)
+ goto done;
+ else {
+ dev->rif_cache_lists=tr_alloc(TR_RIF_CACHE_LIST_SIZE*sizeof(tr_rif_cache_lists));
+ if(dev->rif_cache_lists==NULL)
+ goto fail;
+ }
                 }
- }
- else /* Y. Tahara added */
- {
+ now=jiffies;
+ trh_saddr0=trh->saddr[0]&(~TR_RII); /* remove routing info */
                 /*
- * Update existing entries
+ * Firstly see if the entry exists
                  */
- if (!entry->local_ring)
- if (entry->rcf != (trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK)) &&
- !(trh->rcf & htons(TR_RCF_BROADCAST_MASK)))
- {
+ rif_cache_listtop=tr_hash(dev,trh->saddr);
+ if(sysctl_tr_rif_linked_list_width<0)
+ sysctl_tr_rif_linked_list_width=0;
+ while(rif_cache_listtop->num_entries>sysctl_tr_rif_linked_list_width) {
+ delete_entry=tr_remove_tail(rif_cache_listtop,TR_USED);
+ if(delete_entry) {
+ tr_remove_entry(rif_cache_listtop,delete_entry,TR_UPDATED);
+ kfree(delete_entry);
+ rif_cache_listtop->num_entries--;
+ rif_cache_listtop->hint_idx++;
+#if TR_SR_SANITY
+ tr_sanity_check_list(rif_cache_listtop,TR_UPDATED,"deleting entries",0);
+ tr_sanity_check_list(rif_cache_listtop,TR_USED,"deleting entries",0);
+#endif
+ }
+ else {
+ printk("%s linked list bug rif_cache_listtop=%p\n",dev->name,rif_cache_listtop);
+ break;
+ }
+ }
+ if(sysctl_tr_rif_linked_list_width==0)
+ goto done;
+ fixupdated=TRUE;
+ for(
+#if TR_SR_LISTPERF
+ perfcnt=0,
+#endif
+ entry=rif_cache_listtop->dlist[TR_UPDATED].head;entry &&
+ (trh_saddr0!=entry->addr[0]||memcmp(&entry->addr[1],&trh->saddr[1],TR_ALEN-1))
+ ;entry=entry->dentry[TR_UPDATED].next
+#if TR_SR_LISTPERF
+ ,perfcnt++
+#endif
+ );
+#if TR_SR_LISTPERF
+ printk("tr_add_rif_entry num entries walked=%d num entries=%d\n"
+ ,perfcnt,rif_cache_listtop->num_entries);
+#endif
+#if TR_SR_DEBUG
+ debugstr="updating";
+#endif
+ if(entry) {
+#if !TR_DEBUG_TIME
+#if !TR_SR_SANITY
+ /* Try to keep good routes */
+ if(rcflen>((ntohs(entry->rcf) & TR_RCF_LEN_MASK)>>8)
+ ||((now-entry->dentry[TR_UPDATED].timestamp)<(120*HZ))) {
+
+ /* We avoid update the timestamp here
+ * so sanity timestamps are invalid in reality.
+ */
+ goto done;
+ }
+#endif
+#endif
+ }
+ else {
+ if(rif_cache_listtop->num_entries<sysctl_tr_rif_linked_list_width) {
+ entry=tr_alloc(sizeof(tr_rif_cache_entry));
+ fixupdated=FALSE;
+ if(!entry)
+ goto fail;
+ tr_add_to_tail(rif_cache_listtop,entry,TR_USED,now);
+ tr_add_to_head(rif_cache_listtop,entry,TR_UPDATED,now);
+ rif_cache_listtop->num_entries++;
+#if TR_SR_SANITY
+ tr_sanity_check_list(rif_cache_listtop,TR_USED,"adding entries",0);
+ tr_sanity_check_list(rif_cache_listtop,TR_UPDATED,"adding entries",0);
+#endif
+#if TR_SR_DEBUG
+ debugstr="adding";
+#endif
+ }
+ else {
+ /* Entry wasn't in cache & therefore hardly
+ * deserves to be here, however if a nw reply
+ * comes down soon it'll stay.
+ * Remember on average hash_width/2 entries
+ * will be thrown out before us.
+ * the other values in the entry are still good
+ */
+ entry=rif_cache_listtop->dlist[TR_UPDATED].tail;
+#if TR_SR_DEBUG
+ debugstr="invalidating old entry with";
+#endif
+ }
+ }
+ entry->dentry[TR_UPDATED].timestamp=now;
+ entry->addr[0]=trh_saddr0;
+ memcpy(&entry->addr[1],&trh->saddr[1],TR_ALEN-1);
 #if TR_SR_DEBUG
-printk("updating rif_entry: addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
- trh->saddr[0],trh->saddr[1],trh->saddr[2],
- trh->saddr[3],trh->saddr[4],trh->saddr[5],
- ntohs(trh->rcf));
+ printk("%s listtop=%p entry=%p : addr:%02X:%02X:%02X:%02X:%02X:%02X rcf:%04X\n",
+ debugstr,rif_cache_listtop,entry,
+ trh_saddr0,trh->saddr[1],trh->saddr[2],
+ trh->saddr[3],trh->saddr[4],trh->saddr[5],
+ ntohs(trh->rcf));
 #endif
- entry->rcf = trh->rcf & htons((unsigned short)~TR_RCF_BROADCAST_MASK);
- memcpy(&(entry->rseg[0]),&(trh->rseg[0]),8*sizeof(unsigned short));
- }
- entry->last_used=jiffies;
+
+ if (rcflen>=6) {
+ entry->rcf=trh->rcf&__constant_htons(TR_RCF_CACHE_MASK);
+ memcpy(entry->rseg,trh->rseg,rcflen-sizeof(entry->rcf));
+ }
+ else {
+ entry->rcf=0;
+ }
+ goto done;
+ fail:
+ printk(KERN_DEBUG "kmalloc failed for tr_add_rif_info!\n");
+ done:
+ if(fixupdated)
+ tr_make_list_head(rif_cache_listtop,entry,TR_UPDATED);
+ tr_unlock(dev);
         }
- spin_unlock_bh(&rif_lock);
+ return;
 }
 
+
+
 /*
- * Scan the cache with a timer and see what we need to throw out.
+ * Create the Ethernet MAC header for an arbitrary protocol layer
+ *
+ * saddr=NULL means use device source address
+ * daddr=NULL means leave destination address (eg unresolved arp)
+ *
+ * NB as the TR_IG_BIT is set in the ip multicast functional addr
+ * ( C0:00:00:04:00:00 ) we do not have to explicitly check for it DJB.
  */
+int tr_header(struct sk_buff *skb, struct net_device *dev, unsigned short type,
+ void *daddr, void *saddr, unsigned len)
+{
+ hdr_8022 *ptr_8022=NULL,*tr_ptr_8022;
+ int hdr_len=0,rcflen=0;
+ tr_fake_header *fake_header;
+ int broadcast;
+ __u16 rcf=TR_NORMAL;
+ tr_hw_hdr *trh;
+ __u8 fc=0;
+
+ if(daddr==NULL) {
+ fake_header=(tr_fake_header *)skb_push(skb,TR_HLEN);
+ memcpy(&fake_header->saddr,saddr ? saddr:dev->dev_addr,TR_ALEN);
+ fake_header->type=type;
+ return -TR_HLEN; /* This is a kludge cos done cos some code might expect this */
+ }
+
+ switch(type) {
+#if 0
+ /* raw ether proto=len
+ * should be callable from p8023.c
+ */
+ case ETH_P_802_3:
+ break;
+#endif
+ case ETH_P_ARP:
+ case ETH_P_IP:
+ ptr_8022=&hdr_8022list[0];
+ break;
+ }
+ if(ptr_8022) {
+ fc=LLC_FRAME;
+ tr_ptr_8022=(hdr_8022 *)skb_push(skb,sizeof(*ptr_8022));
+ memcpy(tr_ptr_8022,ptr_8022,sizeof(*ptr_8022));
+ tr_ptr_8022->ethertype = htons(type);
+ hdr_len+=sizeof(*ptr_8022);
+ }
+ broadcast=(memcmp(daddr,dev->broadcast,TR_ALEN)==0);
+ if(ptr_8022)
+ {
+ if(broadcast)
+ rcf=(type==ETH_P_ARP ? TR_RCF_ALL_RINGS_BROADCAST : TR_RCF_LIMITED_BROADCAST);
+ else {
+ if(((__u8 *)daddr)[0]&TR_IG_BIT)
+ rcf=TR_RCF_LIMITED_BROADCAST;
+ }
+ if(rcf==TR_NORMAL) {
+ if(dev->rif_cache_lists)
+ rcflen=tr_add_rif(skb,daddr,dev);
+ if(dev->rif_cache_lists==NULL||rcflen==-1) {
+ rcf=TR_RCF_LIMITED_BROADCAST;
+ /* send the mac address to all rings */
+ }
+ }
+ if(rcf!=TR_NO_RCF&&rcf!=TR_NORMAL) {
+ *((__u16 *)skb_push(skb,sizeof(__u16)))=
+ (htons((sizeof(__u16) << 8)
+ | tr_set_rcf_framesize(dev) | rcf));
+ rcflen=sizeof(__u16);
+ }
+ }
+ trh=(tr_hw_hdr *)skb_push(skb,sizeof(*trh));
+ memcpy(trh->daddr,broadcast ? dev->broadcast:daddr,TR_ALEN);
+ /* We can be called from rebuild header so we might overwrite saddr
+ * so we need memmove here instead of memcpy
+ */
+ memmove(trh->saddr,saddr ? saddr:dev->dev_addr,TR_ALEN);
+ if(rcflen)
+ trh->saddr[0]|=TR_RII;
+ trh->fc=fc;
+ trh->ac=AC;
+ hdr_len+=sizeof(tr_hw_hdr);
+ return(hdr_len+rcflen);
+}
 
-static void rif_check_expire(unsigned long dummy)
+
+/*
+ * A neighbour discovery of some species (eg arp) has completed. We
+ * can now send the packet.
+ */
+
+int tr_rebuild_header(struct sk_buff *skb)
+{
+ tr_fake_header *fake_header=(tr_fake_header *)skb->data;
+ __u8 daddr[TR_ALEN];
+
+ skb_pull(skb,TR_HLEN);
+#ifdef CONFIG_INET
+ if(arp_find(daddr, skb)) {
+ return 1;
+ }
+ else
+#endif
+ {
+ tr_header(skb,skb->dev,fake_header->type,&daddr,fake_header->saddr,0);
+ return 0;
+ }
+}
+
+/*
+ * Some of this is a bit hackish. We intercept RIF information
+ * used for source routing. We also grab IP directly and don't feed
+ * it via SNAP.
+ */
+
+unsigned short tr_type_trans(struct sk_buff *skb, struct net_device *dev)
 {
- int i;
- unsigned long now=jiffies;
 
- spin_lock(&rif_lock);
+ struct trh_hdr *trh=(struct trh_hdr *)skb->data;
+ hdr_8022 *ptr_8022;
+ int tr_hdr_size=sizeof(tr_hw_hdr);
+
+ skb->mac.raw = skb->data;
         
- for(i=0; i < RIF_TABLE_SIZE;i++)
+ if(trh->saddr[0] & TR_RII)
+ tr_hdr_size+=(ntohs(trh->rcf) & TR_RCF_LEN_MASK) >> 8;
+ skb_pull(skb,tr_hdr_size);
+ ptr_8022 = (hdr_8022 *)skb->data;
+
+ if(*trh->daddr & TR_IG_BIT) {
+ if(!memcmp(trh->daddr,dev->broadcast,TR_ALEN))
+ skb->pkt_type=PACKET_BROADCAST;
+ else
+ skb->pkt_type=PACKET_MULTICAST;
+ }
+ else if ( (trh->daddr[0] & 0x01) && (trh->daddr[1] & 0x00) &&
+ (trh->daddr[2] & 0x5E))
         {
- rif_cache entry, *pentry=rif_table+i;
- while((entry=*pentry))
- {
- /*
- * Out it goes
- */
- if((now-entry->last_used) > sysctl_tr_rif_timeout)
- {
- *pentry=entry->next;
- kfree_s(entry,sizeof(struct rif_cache_s));
- }
- else
- pentry=&entry->next;
- }
+ skb->pkt_type=PACKET_MULTICAST;
+ }
+ else if(dev->flags & IFF_PROMISC) {
+ if(memcmp(trh->daddr, dev->dev_addr, TR_ALEN))
+ skb->pkt_type=PACKET_OTHERHOST;
         }
-
- spin_unlock(&rif_lock);
 
+ if ((skb->pkt_type != PACKET_BROADCAST) &&
+ (skb->pkt_type != PACKET_MULTICAST))
+ tr_add_rif_info(trh,dev) ;
         /*
- * Reset the timer
+ * Strip the SNAP header from ARP packets since we don't
+ * pass them through to the 802.2/SNAP layers.
          */
-
- mod_timer(&rif_timer, jiffies+sysctl_tr_rif_timeout);
 
+ if (ptr_8022->dsap == EXTENDED_SAP &&
+ (ptr_8022->ethertype == ntohs(ETH_P_IP) ||
+ ptr_8022->ethertype == ntohs(ETH_P_ARP))) {
+ skb_pull(skb, sizeof(*ptr_8022));
+ return ptr_8022->ethertype;
+ }
+
+ return ntohs(ETH_P_802_2);
 }
 
 /*
@@ -454,82 +838,111 @@
  * routing.
  */
  
+#ifdef CONFIG_PROC_FS
+/* for safety put curly brackets around this macro
+ * othewise you may get cunfused elses etc.
+ * also make sure that there is a \n on the end of
+ * each printf otherwise the pretty printing might not
+ * be so pretty if the cache gets modified while printing
+ * to the procfs .
+ */
+#define trproc_printf(exittr,args...) \
+splen=sprintf(spbuff,##args); \
+spoffset+=splen; \
+if(spoffset>offset) { \
+ spbuff+=splen; \
+ currlen+=splen; \
+} \
+if(currlen>=length) \
+ goto exittr;
+#endif
+
 #ifndef CONFIG_PROC_FS
-static int rif_get_info(char *buffer,char **start, off_t offset, int length) {}
+static int tr_rif_proc_info(char *buffer,char **start, off_t offset, int length) {}
 #else
-static int rif_get_info(char *buffer,char **start, off_t offset, int length)
+static int tr_rif_proc_info(char *buffer,char **start, off_t offset, int length)
 {
- int len=0;
- off_t begin=0;
- off_t pos=0;
- int size,i,j,rcf_len,segment,brdgnmb;
+ char *spbuff=*start=buffer;
+ off_t spoffset=0;
+ int currlen=0,splen;
+ tr_rif_cache_entry *entry,*local_entry=NULL;
+ tr_rif_cache_lists *rif_cache_lists;
+ int dir;
+ int rifcnt,listidx;
+ struct net_device *dev;
         unsigned long now=jiffies;
+ char rifbuff[TR_RIF_BUFF_SIZE],*rifstr;
+ int lock_acquired=FALSE;
+ trproc_printf(done1," last last\n");
+ trproc_printf(done1," used updated\n");
+ trproc_printf(done1,"if TR address (secs) (secs) rcf routing segments\n");
+ trproc_printf(done1,"=======================================================================\n");
+ read_lock(&dev_base_lock);
+ for (dev = dev_base; dev != NULL; dev = dev->next) {
+ if(strncmp(dev->name, "tr", 2) == 0) {
+ lock_acquired=tr_lock(dev);
+ if(lock_acquired) {
+ rif_cache_lists=dev->rif_cache_lists;
+ rifcnt=0;
+ if(rif_cache_lists) {
+ for(listidx=0;listidx<TR_RIF_CACHE_LIST_SIZE;listidx++) {
+ for(entry=rif_cache_lists[listidx].dlist[TR_USED].head;
+ entry;entry=entry->dentry[TR_USED].next) {
+ rifcnt++;
+ if(entry->rcf==0) {
+ rifstr="local";
+ }
+ else {
+ local_entry=entry;
+ rifstr=rifbuff;
+ sprintfbridgeinfo(rifbuff,(tr_rif_info *)&entry->rcf);
+ }
+ trproc_printf(done2,"%s %02X:%02X:%02X:%02X:%02X:%02X %8li %8li %s\n",
+ dev->name,entry->addr[0],entry->addr[1],entry->addr[2],
+ entry->addr[3],entry->addr[4],entry->addr[5],
+ (now-entry->dentry[TR_USED].timestamp)/HZ,
+ (now-entry->dentry[TR_UPDATED].timestamp)/HZ,rifstr);
+ }
+ }
+ }
+ else {
+ trproc_printf(done2,"%s currently has no rif_entries\n",dev->name);
+ }
 
- rif_cache entry;
-
- size=sprintf(buffer,
- "if TR address TTL rcf routing segments\n");
- pos+=size;
- len+=size;
-
- spin_lock_bh(&rif_lock);
- for(i=0;i < RIF_TABLE_SIZE;i++)
- {
- for(entry=rif_table[i];entry;entry=entry->next) {
- struct net_device *dev = __dev_get_by_index(entry->iface);
-
- size=sprintf(buffer+len,"%s %02X:%02X:%02X:%02X:%02X:%02X %7li ",
- dev?dev->name:"?",entry->addr[0],entry->addr[1],entry->addr[2],entry->addr[3],entry->addr[4],entry->addr[5],
- sysctl_tr_rif_timeout-(now-entry->last_used));
- len+=size;
- pos=begin+len;
- if (entry->local_ring)
- size=sprintf(buffer+len,"local\n");
- else {
- size=sprintf(buffer+len,"%04X", ntohs(entry->rcf));
- rcf_len = ((ntohs(entry->rcf) & TR_RCF_LEN_MASK)>>8)-2;
- if (rcf_len)
- rcf_len >>= 1;
- for(j = 1; j < rcf_len; j++) {
- if(j==1) {
- segment=ntohs(entry->rseg[j-1])>>4;
- len+=size;
- pos=begin+len;
- size=sprintf(buffer+len," %03X",segment);
- };
- segment=ntohs(entry->rseg[j])>>4;
- brdgnmb=ntohs(entry->rseg[j-1])&0x00f;
- len+=size;
- pos=begin+len;
- size=sprintf(buffer+len,"-%01X-%03X",brdgnmb,segment);
+ trproc_printf(done2,"\nstats for %s\n"
+ "rifcnt=%d rif mem used %ldk\n",
+ dev->name,rifcnt,
+ (long)(rif_cache_lists ? (TR_RIF_CACHE_LIST_SIZE*sizeof(tr_rif_cache_lists)
+ +((rifcnt*sizeof(tr_rif_cache_entry))))>>10:0));
+ if(local_entry) {
+ dir=(ntohs(local_entry->rcf)&TR_RCF_DIR_BIT ? TRUE:FALSE);
+ trproc_printf(done2,"local ring number=%03X\n",
+ RD_TO_RINGNUM(local_entry->rseg[dir ?
+ 0:(((ntohs(local_entry->rcf) & TR_RCF_LEN_MASK)>>9)-2)]));
                                 }
- len+=size;
- pos=begin+len;
- size=sprintf(buffer+len,"\n");
- }
- len+=size;
- pos=begin+len;
-
- if(pos<offset)
- {
- len=0;
- begin=pos;
+ lock_acquired=FALSE;
+ tr_unlock(dev);
                         }
- if(pos>offset+length)
- break;
- }
- if(pos>offset+length)
- break;
- }
- spin_unlock_bh(&rif_lock);
-
- *start=buffer+(offset-begin); /* Start of wanted data */
- len-=(offset-begin); /* Start slop */
- if(len>length)
- len=length; /* Ending slop */
- if (len<0)
- len=0;
- return len;
+ else {
+ trproc_printf(done1,"\n%s is busy no info available try later\n",dev->name);
+#if TR_DEBUG_LOCKS
+ trproc_printf(done1,"%s",lockbuff);
+#endif
+ }
+ }
+ }
+ done2:
+ if(dev&&lock_acquired)
+ tr_unlock(dev);
+ done1:
+ read_unlock(&dev_base_lock);
+ if(currlen>length) {
+ /* rewind to previous printf so that we are correctly
+ * aligned if we get called to print another page.
+ */
+ currlen-=splen;
+ }
+ return(currlen);
 }
 #endif
 
@@ -540,11 +953,27 @@
 
 void __init rif_init(struct net_proto *unused)
 {
- rif_timer.expires = RIF_TIMEOUT;
- rif_timer.data = 0L;
- rif_timer.function = rif_check_expire;
- init_timer(&rif_timer);
- add_timer(&rif_timer);
+ proc_net_create("tr_rif",0,tr_rif_proc_info);
+}
 
- proc_net_create("tr_rif",0,rif_get_info);
+void tr_free_rif_cache(struct net_device *dev)
+{
+ tr_rif_cache_lists *rif_cache_lists=dev->rif_cache_lists;
+ tr_rif_cache_entry *entry;
+ int listidx;
+ if(rif_cache_lists) {
+ if(tr_lock(dev)) {
+ for(listidx=0;listidx <TR_RIF_CACHE_LIST_SIZE;listidx++) {
+ for(entry=rif_cache_lists[listidx].dlist[TR_USED].head;
+ entry;entry=entry->dentry[TR_USED].next)
+ kfree(entry);
+ }
+ kfree(rif_cache_lists);
+ dev->rif_cache_lists=NULL;
+ tr_unlock(dev);
+ }
+ else
+ printk("free rif cache failed for %s possibly busy\n",dev->name);
+ }
 }
+
diff -ur linux-2.3.39.orig/net/core/sock.c linux-2.3.39/net/core/sock.c
--- linux-2.3.39.orig/net/core/sock.c Tue Jan 11 13:48:50 2000
+++ linux-2.3.39/net/core/sock.c Tue Jan 11 13:49:53 2000
@@ -502,6 +502,9 @@
         if(sk && zero_it) {
                 memset(sk, 0, sizeof(struct sock));
                 sk->family = family;
+#if CONFIG_TR
+ sk->hint = NULL; /* user to speed up source routing */
+#endif
                 sock_lock_init(sk);
         }
 

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Jan 15 2000 - 21:00:18 EST