Re: Patch for faster interrupt handling.

Linus Torvalds (torvalds@transmeta.com)
Tue, 4 Aug 1998 00:24:14 -0700 (PDT)


Hi Andi,

On Tue, 4 Aug 1998, Andi Kleen wrote:
>
> This patch makes gcc generate better code in do_8254A_IRQ for the
> UniProcessor case. The problem is that gcc generates horrible for
> long long on i386, and cached_irq_mask was a long long. It only needs
> to be 64bits to handle all the 64 IO-APIC IRQs, but on UP which supports
> 16 8254A interrupts only that is clearly not needed.

Well, the particular feature wasn't needed on SMP either: the bitmask in
"cached_irq_mask" is only relevant for the old-style irq's anyway now that
the io-apic interrupt handling has been cleaned up from the original "as
close to the old 8259 code as possible" code. So the high bits are
actually not needed, and haven't been used for some time any more.

As such, I'd rather just clean it up a bit more, and this is my first cut.
The code quality should be as good or better than with your patch, and it
doesn't need any SMP/UP tests because both are the same. I might have
overlooked something, but please give me feedback.

This also contains the fixes to hopefully make it work on
- UP (silly compiler bug workaround version II)
- plain Pentium (not "Pro" or "II") SMP (bootup sequence bug)

I've verified that it compiles UP, but haven't verified that it actually
does anything interesting, so caveat emptor. The code looks sane, and the
changes are pretty straightforward cleanups, but there might be a typo or
something.

Linus

-----
diff -u --recursive --new-file v2.1.114/linux/arch/i386/kernel/io_apic.c linux/arch/i386/kernel/io_apic.c
--- v2.1.114/linux/arch/i386/kernel/io_apic.c Mon Aug 3 17:48:26 1998
+++ linux/arch/i386/kernel/io_apic.c Mon Aug 3 23:57:29 1998
@@ -1108,9 +1108,8 @@
/*
* disable it in the 8259A:
*/
- cached_irq_mask |= 1 << i;
if (i < 16)
- set_8259A_irq_mask(i);
+ disable_8259A_irq(i);
}
}
}
diff -u --recursive --new-file v2.1.114/linux/arch/i386/kernel/irq.c linux/arch/i386/kernel/irq.c
--- v2.1.114/linux/arch/i386/kernel/irq.c Tue Jul 21 00:15:30 1998
+++ linux/arch/i386/kernel/irq.c Mon Aug 3 23:57:29 1998
@@ -48,27 +48,23 @@
/*
* About the IO-APIC, the architecture is 'merged' into our
* current irq architecture, seemlessly. (i hope). It is only
- * visible through 8 more hardware interrupt lines, but otherwise
- * drivers are unaffected. The main code is believed to be
- * NR_IRQS-safe (nothing anymore thinks we have 16
+ * visible through a few more more hardware interrupt lines, but
+ * otherwise drivers are unaffected. The main code is believed
+ * to be NR_IRQS-safe (nothing anymore thinks we have 16
* irq lines only), but there might be some places left ...
*/

/*
* This contains the irq mask for both 8259A irq controllers,
- * and on SMP the extended IO-APIC IRQs 16-23. The IO-APIC
- * uses this mask too, in probe_irq*().
- *
- * (0x0000ffff for NR_IRQS==16, 0x00ffffff for NR_IRQS=24)
*/
-#if NR_IRQS == 64
-unsigned long long cached_irq_mask = -1;
-#else
-unsigned long long cached_irq_mask = (((unsigned long long) 1)<<NR_IRQS)-1;
-#endif
+static unsigned int cached_irq_mask = 0xffff;
+
+#define __byte(x,y) (((unsigned char *)&(y))[x])
+#define __word(x,y) (((unsigned short *)&(y))[x])
+#define __long(x,y) (((unsigned int *)&(y))[x])

-#define cached_21 ((cached_irq_mask | io_apic_irqs) & 0xff)
-#define cached_A1 (((cached_irq_mask | io_apic_irqs) >> 8) & 0xff)
+#define cached_21 (__byte(0,cached_irq_mask))
+#define cached_A1 (__byte(1,cached_irq_mask))

spinlock_t irq_controller_lock;

@@ -81,18 +77,11 @@
* this 'mixed mode' IRQ handling costs us one more branch in do_IRQ,
* but we have _much_ higher compatibility and robustness this way.
*/
-
-/*
- * Default to all normal IRQ's _not_ using the IO APIC.
- *
- * To get IO-APIC interrupts we turn some of them into IO-APIC
- * interrupts during boot.
- */
unsigned long long io_apic_irqs = 0;

static void do_8259A_IRQ (unsigned int irq, int cpu, struct pt_regs * regs);
-static void enable_8259A_irq (unsigned int irq);
-static void disable_8259A_irq (unsigned int irq);
+static void enable_8259A_irq(unsigned int irq);
+void disable_8259A_irq(unsigned int irq);

/*
* Dummy controller type for unused interrupts
@@ -127,10 +116,10 @@
* These have to be protected by the irq controller spinlock
* before being called.
*/
-
-static inline void mask_8259A(unsigned int irq)
+void disable_8259A_irq(unsigned int irq)
{
- cached_irq_mask |= 1 << irq;
+ unsigned int mask = 1 << irq;
+ cached_irq_mask |= mask;
if (irq & 8) {
outb(cached_A1,0xA1);
} else {
@@ -138,9 +127,10 @@
}
}

-static inline void unmask_8259A(unsigned int irq)
+static void enable_8259A_irq(unsigned int irq)
{
- cached_irq_mask &= ~(1 << irq);
+ unsigned int mask = ~(1 << irq);
+ cached_irq_mask &= mask;
if (irq & 8) {
outb(cached_A1,0xA1);
} else {
@@ -148,21 +138,6 @@
}
}

-void set_8259A_irq_mask(unsigned int irq)
-{
- /*
- * (it might happen that we see IRQ>15 on a UP box, with SMP
- * emulation)
- */
- if (irq < 16) {
- if (irq & 8) {
- outb(cached_A1,0xA1);
- } else {
- outb(cached_21,0x21);
- }
- }
-}
-
/*
* This builds up the IRQ handler stubs using some ugly macros in irq.h
*
@@ -638,23 +613,7 @@
return status;
}

-/*
- * disable/enable_irq() wait for all irq contexts to finish
- * executing. Also it's recursive.
- */
-static void disable_8259A_irq(unsigned int irq)
-{
- cached_irq_mask |= 1 << irq;
- set_8259A_irq_mask(irq);
-}
-
-void enable_8259A_irq (unsigned int irq)
-{
- cached_irq_mask &= ~(1 << irq);
- set_8259A_irq_mask(irq);
-}
-
-int i8259A_irq_pending (unsigned int irq)
+int i8259A_irq_pending(unsigned int irq)
{
unsigned int mask = 1<<irq;

@@ -664,9 +623,9 @@
}


-void make_8259A_irq (unsigned int irq)
+void make_8259A_irq(unsigned int irq)
{
- io_apic_irqs &= ~(1<<irq);
+ __long(0,io_apic_irqs) &= ~(1<<irq);
irq_desc[irq].handler = &i8259A_irq_type;
disable_irq(irq);
enable_irq(irq);
@@ -705,7 +664,7 @@
if (handle_IRQ_event(irq, regs)) {
spin_lock(&irq_controller_lock);
if (!(irq_desc[irq].status &= IRQ_DISABLED))
- unmask_8259A(irq);
+ enable_8259A_irq(irq);
spin_unlock(&irq_controller_lock);
}

@@ -846,15 +805,12 @@
#ifdef __SMP__
if (IO_APIC_IRQ(irq)) {
/*
- * First disable it in the 8259A:
+ * If it was on a 8259, disable it there
+ * and move the "pendingness" onto the
+ * new irq descriptor.
*/
- cached_irq_mask |= 1 << irq;
if (irq < 16) {
- set_8259A_irq_mask(irq);
- /*
- * transport pending ISA IRQs to
- * the new descriptor
- */
+ disable_8259A_irq(irq);
if (i8259A_irq_pending(irq))
irq_desc[irq].events = 1;
}
diff -u --recursive --new-file v2.1.114/linux/arch/i386/kernel/irq.h linux/arch/i386/kernel/irq.h
--- v2.1.114/linux/arch/i386/kernel/irq.h Mon Aug 3 17:48:26 1998
+++ linux/arch/i386/kernel/irq.h Mon Aug 3 23:41:54 1998
@@ -56,7 +56,7 @@

void mask_irq(unsigned int irq);
void unmask_irq(unsigned int irq);
-void set_8259A_irq_mask (unsigned int irq);
+void disable_8259A_irq(unsigned int irq);
int i8259A_irq_pending (unsigned int irq);
void ack_APIC_irq (void);
void setup_IO_APIC (void);
@@ -67,7 +67,6 @@
void print_IO_APIC (void);

extern unsigned long long io_apic_irqs;
-extern unsigned long long cached_irq_mask;

#define IO_APIC_VECTOR(irq) irq_vector[irq]

diff -u --recursive --new-file v2.1.114/linux/include/asm-i386/spinlock.h linux/include/asm-i386/spinlock.h
--- v2.1.114/linux/include/asm-i386/spinlock.h Mon Aug 3 17:48:28 1998
+++ linux/include/asm-i386/spinlock.h Mon Aug 3 23:16:10 1998
@@ -10,8 +10,8 @@
/*
* Your basic spinlocks, allowing only a single CPU anywhere
*/
-typedef struct { } spinlock_t;
-#define SPIN_LOCK_UNLOCKED { }
+typedef struct { int gcc_is_bugggy; } spinlock_t;
+#define SPIN_LOCK_UNLOCKED { 0 }

#define spin_lock_init(lock) do { } while(0)
#define spin_lock(lock) do { } while(0)
diff -u --recursive --new-file v2.1.114/linux/include/linux/sched.h linux/include/linux/sched.h
--- v2.1.114/linux/include/linux/sched.h Mon Aug 3 17:48:28 1998
+++ linux/include/linux/sched.h Tue Aug 4 00:05:26 1998
@@ -322,16 +322,6 @@

#define DEF_PRIORITY (20*HZ/100) /* 200 ms time slices */

-/* Note: This is very ugly I admit. But some versions of gcc will
- * dump core when an empty structure constant is parsed at
- * the end of a large top level structure initialization. -DaveM
- */
-#ifdef __SMP__
-#define INIT_LOCKS SPIN_LOCK_UNLOCKED
-#else
-#define INIT_LOCKS
-#endif
-
/*
* INIT_TASK is used to set up the first task table, touch at
* your own risk!. Base=0, limit=0x1fffff (=2MB)
@@ -367,7 +357,7 @@
/* fs */ &init_fs, \
/* files */ &init_files, \
/* mm */ &init_mm, \
-/* signals */ INIT_LOCKS, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \
+/* signals */ SPIN_LOCK_UNLOCKED, &init_signals, {{0}}, {{0}}, NULL, &init_task.sigqueue, 0, 0, \
}

union task_union {
diff -u --recursive --new-file v2.1.114/linux/init/main.c linux/init/main.c
--- v2.1.114/linux/init/main.c Mon Aug 3 12:45:48 1998
+++ linux/init/main.c Mon Aug 3 13:54:07 1998
@@ -1123,11 +1123,12 @@
dquot_init_hash();
#endif
printk("POSIX conformance testing by UNIFIX\n");
- check_bugs();

#ifdef __SMP__
smp_init();
#endif
+
+ check_bugs();

#if defined(CONFIG_MTRR) /* Do this after SMP initialization */
/*

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.altern.org/andrebalsa/doc/lkml-faq.html