Re: New pentium bug workaround - please test..

Hans Lermen (lermen@elserv.ffm.fgan.de)
Thu, 20 Nov 1997 02:59:26 +0100 (MET)


On Wed, 19 Nov 1997, Linus Torvalds wrote:

> What we could just do, is to re-use the original page that contained the
> very original IDT - and keep modifying that original page through its
> original address.

Ok, done, tested, works. Patch for vanilla 2.0.32 is appended.

Hans
<lermen@fgan.de>

--- linux-2.0.32-clean/arch/i386/kernel/traps.c Tue Nov 18 10:25:34 1997
+++ linux-2.0.32-newf00f/arch/i386/kernel/traps.c Thu Nov 20 02:27:37 1997
@@ -348,33 +348,35 @@
pgd_t * pgd;
pmd_t * pmd;
pte_t * pte;
- unsigned long twopage;
- struct desc_struct *new_idt;
+ unsigned long page;
+ unsigned long idtpage = (unsigned long)idt;
+ struct desc_struct *alias_idt;

- printk("moving IDT ... ");
+ printk("alias mapping IDT readonly ... ");

- twopage = (unsigned long) vmalloc (2*PAGE_SIZE);
-
- new_idt = (void *)(twopage + 4096-7*8);
-
- memcpy(new_idt,idt,256*8);
+ /* just to get free address space */
+ page = (unsigned long) vmalloc (PAGE_SIZE);

+ alias_idt = (void *)(page + (idtpage & ~PAGE_MASK));
idt_descriptor.limit = 256*8-1;
- idt_descriptor.addr = VMALLOC_VMADDR(new_idt);
-
- __asm__ __volatile__("\tlidt %0": "=m" (idt_descriptor));
- idt = new_idt;
+ idt_descriptor.addr = VMALLOC_VMADDR(alias_idt);

/*
- * Unmap lower page:
+ * alias map the original idt to the alias page:
*/
- twopage = VMALLOC_VMADDR(twopage);
- pgd = pgd_offset(current->mm, twopage);
- pmd = pmd_offset(pgd, twopage);
- pte = pte_offset(pmd, twopage);
-
- pte_clear(pte);
+ page = VMALLOC_VMADDR(page);
+ pgd = pgd_offset(&init_mm, page);
+ pmd = pmd_offset(pgd, page);
+ pte = pte_offset(pmd, page);
+ /* give memory back to the pool, don't need it */
+ free_page(pte_page(*pte));
+ /* ... and set the readonly alias */
+ set_pte(pte, mk_pte(idtpage & PAGE_MASK, PAGE_KERNEL));
+ *pte = pte_wrprotect(*pte);
flush_tlb_all();
+
+ /* now we have the mapping ok, we can do LIDT */
+ __asm__ __volatile__("\tlidt %0": "=m" (idt_descriptor));

printk(" ... done\n");
}
--- linux-2.0.32-clean/arch/i386/kernel/head.S Tue Nov 18 10:25:34 1997
+++ linux-2.0.32-newf00f/arch/i386/kernel/head.S Thu Nov 20 00:54:42 1997
@@ -342,6 +342,11 @@
.long SYMBOL_NAME(init_user_stack)+4096
.long KERNEL_DS

+/* NOTE: keep the __idt short behind the above '.org 0x6000'
+ It must fit completely within _one_ page */
+ENTRY(__idt)
+ .fill 256,8,0 # idt is uninitialized
+
/* This is the default interrupt "handler" :-) */
int_msg:
.asciz "Unknown interrupt\n"
@@ -377,9 +382,6 @@
idt_descr:
.word 256*8-1 # idt contains 256 entries
.long 0xc0000000+SYMBOL_NAME(__idt)
-
-ENTRY(__idt)
- .fill 256,8,0 # idt is uninitialized

ALIGN
.word 0
--- linux-2.0.32-clean/arch/i386/mm/fault.c Tue Nov 18 10:25:34 1997
+++ linux-2.0.32-newf00f/arch/i386/mm/fault.c Thu Nov 20 02:41:34 1997
@@ -21,128 +21,10 @@

extern void die_if_kernel(const char *,struct pt_regs *,long);

-asmlinkage void do_divide_error (struct pt_regs *, unsigned long);
-asmlinkage void do_debug (struct pt_regs *, unsigned long);
-asmlinkage void do_nmi (struct pt_regs *, unsigned long);
-asmlinkage void do_int3 (struct pt_regs *, unsigned long);
-asmlinkage void do_overflow (struct pt_regs *, unsigned long);
-asmlinkage void do_bounds (struct pt_regs *, unsigned long);
asmlinkage void do_invalid_op (struct pt_regs *, unsigned long);
-asmlinkage void do_general_protection (struct pt_regs *, unsigned long);

extern int pentium_f00f_bug;

-static int handle_intx_eip_adjust(struct pt_regs *regs)
-{
- unsigned char *addr, *csp = 0;
- int wrap = 0;
- int count = 8; /* only check for reasonable number of bytes
- * else we do it the save 'simple way' */
- unsigned long _eip;
-#define XX_WRAP(x) (wrap ? *((unsigned short *)&x) : x)
-
- /* We rely on being able to access the memory pointed to by cs:eip
- * and the bytes behind it up to the faulting instruction,
- * because we just got an exception for this instruction and
- * hence the memory should just be successfully accessed.
- * In case of crossing a page boundary or when accessing kernel space
- * we just do the simple fix (increase eip by one).
- * This assumption also obsoletes checking of segment limit.
- * ( should be veryfied, however, if this assumption is true )
- */
-
- if (regs->cs == KERNEL_CS) {
- /* not what we expect */
- regs->eip++;
- return 0;
- }
-
- if (regs->eflags & VM_MASK) {
- /* we have real mode type selector */
- wrap = 1;
- csp = (unsigned char *)((unsigned long)regs->cs << 4);
- }
- else if (regs->cs & 4) {
- /* we have a LDT selector */
- struct desc_struct *p, *ldt = current->ldt;
- if (!ldt)
- ldt = (struct desc_struct*) &default_ldt;
- p = ldt + (regs->cs >> 3);
- csp = (unsigned char *)((p->a >> 16) | ((p->b & 0xff) << 16) | (p->b & 0xFF000000));
- if (!(p->b & 0x400000))
- wrap = 1; /* 16-bit segment */
- }
-
- _eip = regs->eip;
- addr = csp+XX_WRAP(_eip);
- while (count-- > 0) {
- if ((unsigned long)addr >= TASK_SIZE) {
- /* accessing kernel space, do the simple case */
- regs->eip++;
- return 0;
- }
- switch (get_user(addr)) {
-
- case 0xCC: /* single byte INT3 */
- XX_WRAP(_eip)++;
- regs->eip = _eip;
- return 0;
-
- case 0xCD: /* two byte INT 3 */
- XX_WRAP(_eip)++;
- /* fall through */
- case 0xCE: /* INTO, single byte */
- XX_WRAP(_eip)++;
- if ( (regs->eflags & VM_MASK)
- && ((regs->eflags & IOPL_MASK) != IOPL_MASK)) {
- /* not allowed, do GP0 fault */
- do_general_protection(regs, 0);
- return -1;
- }
- regs->eip = _eip;
- return 0;
-
- /* the prefixes from the Intel patch */
- case 0xF2 ... 0xF3:
- case 0x2E:
- case 0x36:
- case 0x3E:
- case 0x26:
- case 0x64 ... 0x67:
- break; /* just skipping them */
-
- default:
- /* not what we handle here,
- * just doing the simple fix
- */
- regs->eip++;
- return 0;
- }
-
- if ( !(++XX_WRAP(_eip)) ) {
- /* we wrapped around */
- regs->eip++;
- return 0;
- }
-
- addr = csp+XX_WRAP(_eip);
- if ( !((unsigned long)addr & ~(PAGE_SIZE -1)) ) {
- /* we would cross page boundary, not good,
- * doing the simple fix
- */
- regs->eip++;
- return 0;
- }
- }
-
- /* if we come here something weird happened,
- * just doing the simple fix
- */
- regs->eip++;
- return 0;
-}
-
-
/*
* This routine handles page faults. It determines the address,
* and the problem, and then passes it off to one of the appropriate
@@ -245,22 +127,15 @@
*/
if ( pentium_f00f_bug ) {
unsigned long nr;
+ extern struct {
+ unsigned short limit;
+ unsigned long addr __attribute__((packed));
+ } idt_descriptor;

- nr = (address - TASK_SIZE - (unsigned long) idt) >> 3;
+ nr = (address - idt_descriptor.addr) >> 3;

- if (nr < 7) {
- static void (*handler[])(struct pt_regs *, unsigned long) = {
- do_divide_error, /* 0 - divide overflow */
- do_debug, /* 1 - debug trap */
- do_nmi, /* 2 - NMI */
- do_int3, /* 3 - int 3 */
- do_overflow, /* 4 - overflow */
- do_bounds, /* 5 - bound range */
- do_invalid_op }; /* 6 - invalid opcode */
- if ((nr == 3) || (nr == 4))
- if (handle_intx_eip_adjust(regs))
- return;
- handler[nr](regs, error_code);
+ if (nr == 6) {
+ do_invalid_op(regs, 0);
return;
}
}