patch for 2.1.103 ldt-related problems

Bill Hawes (whawes@star.net)
Sun, 24 May 1998 22:32:55 -0400


This is a multi-part message in MIME format.
--------------D6E59D8048D9105C22BE315F
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

After quite a bit of bug-tracking I've found the cause of the recently
reported invalid TSS double oops when running Wine. The symptoms were
that a selector in %fs referring to an ldt entry was causing first an
invalid TSS exception, which became a GP fault when the kernel attempted
to push and pop %fs.

The underlying problem turned out to be that when cloning a task that
used an ldt, the new clone wasn't being given its own ldt selector in
the gdt; instead the tss.ldt value was left set to the parent task's
selector. But if the parent task exited first and a new task used the
same task slot, the ldt selector in the gdt would be changed, leaving
the clone task with invalid selectors.

The attached patch fixes this and some other ldt-related problems, and
improves the reporting in show_registers that hopefully will make
tracking problems like this a little easier. The changes are:

(1) In kernel/fork.c, call copy_segments() with a NULL new_mm to
indicate we're setting up a clone task.

(2) In arch/i386/kernel/process.c, check for a NULL new_mm in
copy_segments and install the ldt descriptor in the gdt. Also, in
release_segments(), restore the default entry in the gdt so we don't
leave a descriptor to the now-released ldt memory.

(3) In arch/i386/kernel/ldt.c, add new error checking for the ldt
changes, and fix a memory leak if two clones both attempt to allocate an
ldt. The error checking code checks whether any segment registers refer
to the entry about to be changed, and builds a requirements mask for the
entry. It reports a warning if a change is made to an in-use entry. (I
haven't seen any of the warnings, so in practice this is probably very
rare.)

(4) in arch/i386/kernel/traps.c, print values for %fs and %gs, and show
the ldt, tss.ldt, LDT(nr), tr, and TSS(nr) values. The latter display
was what provided the essential clue to tracking this down -- when the
oops occurred, ldt and tss.ldt didn't match the LDT(nr) expected for the
task.

With the patch in place the previously 100% repeatable oops with Wine
has gone away, and other operations appear to correctly.

Regards,
Bill
--------------D6E59D8048D9105C22BE315F
Content-Type: text/plain; charset=us-ascii; name="arch386_ldt103-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="arch386_ldt103-patch"

--- linux-2.1.103/arch/i386/kernel/process.c.old Thu May 21 13:28:14 1998
+++ linux-2.1.103/arch/i386/kernel/process.c Sun May 24 21:57:49 1998
@@ -422,15 +422,20 @@

void release_segments(struct mm_struct *mm)
{
- void * ldt;
+ void * ldt = mm->segments;
+ int nr;

/* forget local segments */
__asm__ __volatile__("movl %w0,%%fs ; movl %w0,%%gs ; lldt %w0"
: /* no outputs */
: "r" (0));
current->tss.ldt = 0;
+ /*
+ * Set the GDT entry back to the default.
+ */
+ nr = current->tarray_ptr - &task[0];
+ set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY, &default_ldt, 1);

- ldt = mm->segments;
if (ldt) {
mm->segments = NULL;
vfree(ldt);
@@ -475,20 +480,30 @@
{
}

+/*
+ * If new_mm is NULL, we're being called to set up the LDT descriptor
+ * for a clone task. Each clone must have a separate entry in the GDT.
+ */
void copy_segments(int nr, struct task_struct *p, struct mm_struct *new_mm)
{
- int ldt_size = 1;
- void * ldt = &default_ldt;
struct mm_struct * old_mm = current->mm;
+ void * old_ldt = old_mm->segments, * ldt = old_ldt;
+ int ldt_size = LDT_ENTRIES;

p->tss.ldt = _LDT(nr);
- if (old_mm->segments) {
- new_mm->segments = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
- if (new_mm->segments) {
- ldt = new_mm->segments;
- ldt_size = LDT_ENTRIES;
- memcpy(ldt, old_mm->segments, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ if (old_ldt) {
+ if (new_mm) {
+ ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+ if (ldt) {
+ new_mm->segments = ldt;
+ memcpy(ldt, old_ldt, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ } else
+ goto no_ldt; /* N.B. report error? */
}
+ } else {
+ no_ldt:
+ ldt = &default_ldt;
+ ldt_size = 1;
}
set_ldt_desc(gdt+(nr<<1)+FIRST_LDT_ENTRY, ldt, ldt_size);
}
--- linux-2.1.103/arch/i386/kernel/ldt.c.old Tue May 5 11:23:28 1998
+++ linux-2.1.103/arch/i386/kernel/ldt.c Sun May 24 17:53:49 1998
@@ -16,6 +16,8 @@
#include <asm/system.h>
#include <asm/ldt.h>

+/* #define LDT_PARANOIA 1 */
+
static int read_ldt(void * ptr, unsigned long bytecount)
{
void * address = current->mm->segments;
@@ -33,23 +35,48 @@
return copy_to_user(ptr, address, size) ? -EFAULT : size;
}

-static int write_ldt(void * ptr, unsigned long bytecount, int oldmode)
+/*
+ * Define masks for error checking
+ */
+#define SEG_NEED_NZ 1
+#define SEG_IS_ZERO 1
+#define SEG_NEED_VALID 2
+#define SEG_IS_INVALID 2
+#define SEG_NEED_CODE 4
+#define SEG_IS_NOTCODE 4
+#define SEG_NEED_READ 8
+#define SEG_IS_NOTREAD 8
+#define SEG_NEED_WRITE 16
+#define SEG_IS_NOTWRITE 16
+
+static int write_ldt(void * ptr, unsigned long bytecount, int oldmode,
+ struct pt_regs *regs)
{
+ struct mm_struct * mm = current->mm;
+ void * ldt;
+ __u32 entry_1, entry_2, *lp;
+ __u16 selector, reg_fs, reg_gs;
+ unsigned int require = 0, attrib = 0;
+ char * which;
+ int error;
struct modify_ldt_ldt_s ldt_info;
- unsigned long *lp;
- struct mm_struct * mm;
- int error, i;

+ error = -EINVAL;
if (bytecount != sizeof(ldt_info))
- return -EINVAL;
- error = copy_from_user(&ldt_info, ptr, sizeof(ldt_info));
- if (error)
- return -EFAULT;
-
- if ((ldt_info.contents == 3 && (oldmode || ldt_info.seg_not_present == 0)) || ldt_info.entry_number >= LDT_ENTRIES)
- return -EINVAL;
-
- mm = current->mm;
+ goto out;
+ error = -EFAULT;
+ if (copy_from_user(&ldt_info, ptr, sizeof(ldt_info)))
+ goto out;
+
+ error = -EINVAL;
+ if (ldt_info.entry_number >= LDT_ENTRIES)
+ goto out;
+ if (ldt_info.contents == 3) {
+ if (oldmode)
+ goto out;
+ if (ldt_info.seg_not_present == 0)
+ goto out;
+ }

/*
* Horrible dependencies! Try to get rid of this. This is wrong,
@@ -62,60 +89,149 @@
* For no good reason except historical, the GDT index of the LDT
* is chosen to follow the index number in the task[] array.
*/
- if (!mm->segments) {
- for (i=1 ; i<NR_TASKS ; i++) {
- if (task[i] == current) {
- if (!(mm->segments = (void *) vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE)))
- return -ENOMEM;
- memset(mm->segments, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
- set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, mm->segments, LDT_ENTRIES);
- load_ldt(i);
- }
+ ldt = mm->segments;
+ if (!ldt) {
+ error = -ENOMEM;
+ ldt = vmalloc(LDT_ENTRIES*LDT_ENTRY_SIZE);
+ if (!ldt)
+ goto out;
+ memset(ldt, 0, LDT_ENTRIES*LDT_ENTRY_SIZE);
+ /*
+ * Make sure someone else hasn't allocated it for us ...
+ */
+ if (!mm->segments) {
+ int i = current->tarray_ptr - &task[0];
+ mm->segments = ldt;
+ set_ldt_desc(gdt+(i<<1)+FIRST_LDT_ENTRY, ldt, LDT_ENTRIES);
+ load_ldt(i);
+ if (mm->count > 1)
+ printk(KERN_WARNING
+ "LDT allocated for cloned task!\n");
+ } else {
+ vfree(ldt);
}
}
+
+ /*
+ * Check whether the entry to be changed is currently in use.
+ * If it is, we may need extra validation checks in case the
+ * kernel is forced to save and restore the selector.
+ *
+ * Note: we check the fs and gs values as well, as these are
+ * loaded by the signal code and during a task switch.
+ */
+ selector = (ldt_info.entry_number << 3) | 4;
+ __asm__("movw %%fs,%0" : "=r"(reg_fs));
+ __asm__("movw %%gs,%0" : "=r"(reg_gs));
+ which = NULL;
+
+ if (selector == (__u16) (regs->xcs & ~3)) {
+ which = "CS";
+ require = SEG_NEED_VALID | SEG_NEED_CODE | SEG_NEED_NZ;
+ }
+ else if (selector == (__u16) (regs->xss & ~3)) {
+ which = "SS";
+ require = SEG_NEED_VALID | SEG_NEED_WRITE | SEG_NEED_NZ;
+ }
+ else if (selector == (__u16) (regs->xds & ~3)) {
+ which = "DS";
+ require = SEG_NEED_VALID | SEG_NEED_READ | SEG_NEED_NZ;
+ }
+ else if (selector == (__u16) (regs->xes & ~3)) {
+ which = "ES";
+ require = SEG_NEED_VALID | SEG_NEED_READ | SEG_NEED_NZ;
+ }
+ else if (selector == (reg_fs & ~3)) {
+ which = "FS";
+ require = SEG_NEED_VALID | SEG_NEED_READ | SEG_NEED_NZ;
+ }
+ else if (selector == (reg_gs & ~3)) {
+ which = "GS";
+ require = SEG_NEED_VALID | SEG_NEED_READ | SEG_NEED_NZ;
+ }
+ if (which)
+ printk(KERN_WARNING "write_ldt: selector %04x in use by %s\n",
+ selector, which);

- lp = (unsigned long *) (LDT_ENTRY_SIZE * ldt_info.entry_number + (unsigned long) mm->segments);
+ lp = (__u32 *) ((selector & ~7) + (char *) ldt);
+
/* Allow LDTs to be cleared by the user. */
- if (ldt_info.base_addr == 0 && ldt_info.limit == 0
- && (oldmode ||
- ( ldt_info.contents == 0
- && ldt_info.read_exec_only == 1
- && ldt_info.seg_32bit == 0
- && ldt_info.limit_in_pages == 0
- && ldt_info.seg_not_present == 1
- && ldt_info.useable == 0 )) ) {
- *lp = 0;
- *(lp+1) = 0;
- return 0;
+ if (ldt_info.base_addr == 0 && ldt_info.limit == 0) {
+ if (oldmode ||
+ (ldt_info.contents == 0 &&
+ ldt_info.read_exec_only == 1 &&
+ ldt_info.seg_32bit == 0 &&
+ ldt_info.limit_in_pages == 0 &&
+ ldt_info.seg_not_present == 1 &&
+ ldt_info.useable == 0 )) {
+ entry_1 = 0;
+ entry_2 = 0;
+ attrib |= SEG_IS_ZERO;
+ goto out_check;
+ }
}
- *lp = ((ldt_info.base_addr & 0x0000ffff) << 16) |
+
+ entry_1 = ((ldt_info.base_addr & 0x0000ffff) << 16) |
(ldt_info.limit & 0x0ffff);
- *(lp+1) = (ldt_info.base_addr & 0xff000000) |
- ((ldt_info.base_addr & 0x00ff0000)>>16) |
+ entry_2 = (ldt_info.base_addr & 0xff000000) |
+ ((ldt_info.base_addr & 0x00ff0000) >> 16) |
(ldt_info.limit & 0xf0000) |
- (ldt_info.contents << 10) |
((ldt_info.read_exec_only ^ 1) << 9) |
+ (ldt_info.contents << 10) |
+ ((ldt_info.seg_not_present ^ 1) << 15) |
(ldt_info.seg_32bit << 22) |
(ldt_info.limit_in_pages << 23) |
- ((ldt_info.seg_not_present ^1) << 15) |
0x7000;
- if (!oldmode) *(lp+1) |= (ldt_info.useable << 20);
- return 0;
+ if (!oldmode)
+ entry_2 |= (ldt_info.useable << 20);
+
+ /* N.B. perform validation checks */
+ if (ldt_info.read_exec_only)
+ attrib |= SEG_IS_NOTWRITE;
+ if (!(ldt_info.contents & MODIFY_LDT_CONTENTS_CODE))
+ attrib |= SEG_IS_NOTCODE;
+
+ /*
+ * Check whether any attributes conflict with requirements.
+ */
+out_check:
+ if (require & attrib)
+ goto out_busy;
+
+ /* OK to change the entry ... */
+ *lp = entry_1;
+ *(lp+1) = entry_2;
+ error = 0;
+#ifdef LDT_PARANOIA
+printk("write_ldt: loaded %04x, entry=(%08x, %08x)\n",
+selector, entry_1, entry_2);
+#endif
+out:
+ return error;
+
+out_busy:
+ error = -EBUSY;
+ printk("write_ldt: can't change %04x to (%08x, %08x)\n",
+ selector, entry_1, entry_2);
+ goto out;
}

asmlinkage int sys_modify_ldt(int func, void *ptr, unsigned long bytecount)
{
- int ret;
+ int ret = -ENOSYS;

lock_kernel();
- if (func == 0)
+ switch (func) {
+ case 0:
ret = read_ldt(ptr, bytecount);
- else if (func == 1)
- ret = write_ldt(ptr, bytecount, 1);
- else if (func == 0x11)
- ret = write_ldt(ptr, bytecount, 0);
- else
- ret = -ENOSYS;
+ break;
+ case 1:
+ ret = write_ldt(ptr, bytecount, 1, (struct pt_regs *) &func);
+ break;
+ case 0x11:
+ ret = write_ldt(ptr, bytecount, 0, (struct pt_regs *) &func);
+ break;
+ }
unlock_kernel();
return ret;
}
--- linux-2.1.103/arch/i386/kernel/traps.c.old Tue May 5 11:20:01 1998
+++ linux-2.1.103/arch/i386/kernel/traps.c Sun May 24 17:37:52 1998
@@ -66,6 +66,10 @@
unlock_kernel(); \
}

+/*
+ * N.B. The use of %fs in these macros can cause problems
+ * if the selector is invalid ... use another register?
+ */
#define get_seg_byte(seg,addr) ({ \
register unsigned char __res; \
__asm__("pushl %%fs;movl %%ax,%%fs;movb %%fs:%2,%%al;popl %%fs" \
@@ -83,6 +87,21 @@
__asm__("movl %%fs,%%ax":"=a" (__res):); \
__res;})

+#define _gs() ({ \
+register unsigned short __res; \
+__asm__("movl %%gs,%%ax":"=a" (__res):); \
+__res;})
+
+#define _ldt() ({ \
+register unsigned short __res; \
+__asm__("sldt %%ax":"=a" (__res):); \
+__res;})
+
+#define _tr() ({ \
+register unsigned short __res; \
+__asm__("str %%ax":"=a" (__res):); \
+__res;})
+
void page_exception(void);

asmlinkage void divide_error(void);
@@ -129,17 +148,23 @@
esp = regs->esp;
ss = regs->xss & 0xffff;
}
+
printk("CPU: %d\nEIP: %04x:[<%08lx>]\nEFLAGS: %08lx\n",
smp_processor_id(), 0xffff & regs->xcs, regs->eip, regs->eflags);
printk("eax: %08lx ebx: %08lx ecx: %08lx edx: %08lx\n",
regs->eax, regs->ebx, regs->ecx, regs->edx);
printk("esi: %08lx edi: %08lx ebp: %08lx esp: %08lx\n",
regs->esi, regs->edi, regs->ebp, esp);
- printk("ds: %04x es: %04x ss: %04x\n",
- regs->xds & 0xffff, regs->xes & 0xffff, ss);
- store_TR(i);
- printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\nStack: ",
- current->comm, current->pid, 0xffff & i, 4096+(unsigned long)current);
+ printk("ds: %04x es: %04x ss: %04x fs: %04x gs: %04x\n",
+ regs->xds & 0xffff, regs->xes & 0xffff, ss, _fs(), _gs());
+
+ i = (current->tarray_ptr - &task[0]);
+ printk("Process %s (pid: %d, process nr: %d, stackpage=%08lx)\n",
+ current->comm, current->pid, i, 4096+(unsigned long)current);
+ printk("ldt: %04x tss.ldt: %04x LDT: %04x tr: %04x TSS: %04x\n",
+ _ldt(), current->tss.ldt, (__u16)_LDT(i), _tr(), (__u16)_TSS(i));
+
+ printk("Stack: ");
stack = (unsigned long *) esp;
for(i=0; i < kstack_depth_to_print; i++) {
if (((long) stack & 4095) == 0)
--- linux-2.1.103/kernel/fork.c.old Sun May 17 12:19:40 1998
+++ linux-2.1.103/kernel/fork.c Sun May 24 21:28:14 1998
@@ -310,6 +310,10 @@

if (clone_flags & CLONE_VM) {
mmget(current->mm);
+ /*
+ * Set up the LDT descriptor for the clone task.
+ */
+ copy_segments(nr, tsk, NULL);
SET_PAGE_DIR(tsk, current->mm->pgd);
return 0;
}

--------------D6E59D8048D9105C22BE315F--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu