patch oom 2.3.x

From: Andrea Arcangeli (andrea@suse.de)
Date: Sat Jan 08 2000 - 11:17:09 EST


o fixes the init-gets-sigsegv during oom.
o avoids iopl'ed tasks to get SIGKILL and send them a SIGTERM so X
        won't screwup the console due oom in IA32.
o makes the signal code to better react to oom.
o updates a few places that doesn't know about the new handle_mm_fault
        interface yet (btw in ptrace David prefers to send the sigkill
        due oom to the traced `tsk` and not to the `current` tracer task,
        I left to him to send an incremental patch if he wants, actually I
        prefer the way on my patch because obviously safe).
o removes the deprecated oom function.
o avoid the swap algorithm to reassing stuff more than one time
        per-try.

diff -urN 2.3.36pre5/arch/alpha/kernel/signal.c 2.3.36pre5-oom/arch/alpha/kernel/signal.c
--- 2.3.36pre5/arch/alpha/kernel/signal.c Wed Nov 24 18:22:03 1999
+++ 2.3.36pre5-oom/arch/alpha/kernel/signal.c Mon Jan 3 19:00:05 2000
@@ -437,6 +437,8 @@
                 err |= __copy_to_user(frame->extramask, &set->sig[1],
                                       sizeof(frame->extramask));
         }
+ if (err)
+ goto give_sigsegv;
 
         /* Set up to return from userspace. If provided, use a stub
            already in userspace. */
@@ -499,6 +501,8 @@
         err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw,
                                 set->sig[0], oldsp);
         err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ goto give_sigsegv;
 
         /* Set up to return from userspace. If provided, use a stub
            already in userspace. */
diff -urN 2.3.36pre5/arch/alpha/mm/fault.c 2.3.36pre5-oom/arch/alpha/mm/fault.c
--- 2.3.36pre5/arch/alpha/mm/fault.c Wed Nov 24 18:22:03 1999
+++ 2.3.36pre5-oom/arch/alpha/mm/fault.c Mon Jan 3 19:00:05 2000
@@ -130,13 +130,13 @@
          * make sure we exit gracefully rather than endlessly redo
          * the fault.
          */
+survive:
         fault = handle_mm_fault(current, vma, address, cause > 0);
- up(&mm->mmap_sem);
-
         if (fault < 0)
                 goto out_of_memory;
         if (fault == 0)
                 goto do_sigbus;
+ up(&mm->mmap_sem);
 
         return;
 
@@ -177,13 +177,23 @@
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
- printk(KERN_ALERT "VM: killing process %s(%d)\n",
- current->comm, current->pid);
- if (!user_mode(regs))
- goto no_context;
- do_exit(SIGKILL);
+ if (current->pid == 1)
+ {
+ current->policy |= SCHED_YIELD;
+ schedule();
+ goto survive;
+ }
+ up(&mm->mmap_sem);
+ if (user_mode(regs))
+ {
+ printk(KERN_ALERT "VM: killing process %s(%d)\n",
+ current->comm, current->pid);
+ do_exit(SIGKILL);
+ }
+ goto no_context;
 
 do_sigbus:
+ up(&mm->mmap_sem);
         /*
          * Send a sigbus, regardless of whether we were in kernel
          * or user mode.
diff -urN 2.3.36pre5/arch/i386/kernel/signal.c 2.3.36pre5-oom/arch/i386/kernel/signal.c
--- 2.3.36pre5/arch/i386/kernel/signal.c Wed Nov 24 18:22:03 1999
+++ 2.3.36pre5-oom/arch/i386/kernel/signal.c Mon Jan 3 19:00:05 2000
@@ -419,13 +419,19 @@
                            ? current->exec_domain->signal_invmap[sig]
                            : sig),
                           &frame->sig);
+ if (err)
+ goto give_sigsegv;
 
         err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+ if (err)
+ goto give_sigsegv;
 
         if (_NSIG_WORDS > 1) {
                 err |= __copy_to_user(frame->extramask, &set->sig[1],
                                       sizeof(frame->extramask));
         }
+ if (err)
+ goto give_sigsegv;
 
         /* Set up to return from userspace. If provided, use a stub
            already in userspace. */
@@ -486,6 +492,8 @@
         err |= __put_user(&frame->info, &frame->pinfo);
         err |= __put_user(&frame->uc, &frame->puc);
         err |= __copy_to_user(&frame->info, info, sizeof(*info));
+ if (err)
+ goto give_sigsegv;
 
         /* Create the ucontext. */
         err |= __put_user(0, &frame->uc.uc_flags);
@@ -497,6 +505,8 @@
         err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
                                 regs, set->sig[0]);
         err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+ if (err)
+ goto give_sigsegv;
 
         /* Set up to return from userspace. If provided, use a stub
            already in userspace. */
diff -urN 2.3.36pre5/arch/i386/mm/fault.c 2.3.36pre5-oom/arch/i386/mm/fault.c
--- 2.3.36pre5/arch/i386/mm/fault.c Wed Nov 24 18:22:04 1999
+++ 2.3.36pre5-oom/arch/i386/mm/fault.c Mon Jan 3 19:00:05 2000
@@ -31,6 +31,7 @@
 {
         struct vm_area_struct * vma;
         unsigned long start = (unsigned long) addr;
+ int fault;
 
         if (!size)
                 return 1;
@@ -50,8 +51,12 @@
         start &= PAGE_MASK;
 
         for (;;) {
- if (handle_mm_fault(current, vma, start, 1) <= 0)
- goto bad_area;
+survive:
+ fault = handle_mm_fault(current, vma, start, 1);
+ if (!fault)
+ goto do_sigbus;
+ if (fault < 0)
+ goto out_of_memory;
                 if (!size)
                         break;
                 size--;
@@ -74,6 +79,19 @@
 
 bad_area:
         return 0;
+
+do_sigbus:
+ force_sig(SIGBUS, current);
+ goto bad_area;
+
+out_of_memory:
+ if (current->pid == 1)
+ {
+ current->policy |= SCHED_YIELD;
+ schedule();
+ goto survive;
+ }
+ goto bad_area;
 }
 
 static inline void handle_wp_test (void)
@@ -188,6 +206,7 @@
          * make sure we exit gracefully rather than endlessly redo
          * the fault.
          */
+survive:
         {
                 int fault = handle_mm_fault(tsk, vma, address, write);
                 if (fault < 0)
@@ -280,10 +299,33 @@
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
+ if (tsk->pid == 1)
+ {
+ tsk->policy |= SCHED_YIELD;
+ schedule();
+ goto survive;
+ }
         up(&mm->mmap_sem);
- printk("VM: killing process %s\n", tsk->comm);
         if (error_code & 4)
- do_exit(SIGKILL);
+ {
+ if (!((regs->eflags >> 12) & 3))
+ {
+ printk(KERN_ALERT "VM: killing process %s\n",
+ tsk->comm);
+ do_exit(SIGKILL);
+ }
+ else
+ {
+ /*
+ * The task is running with privilegies and so we
+ * trust it and we give it a chance to die gracefully.
+ */
+ printk(KERN_ALERT "VM: terminating process %s\n",
+ tsk->comm);
+ force_sig(SIGTERM, current);
+ return;
+ }
+ }
         goto no_context;
 
 do_sigbus:
diff -urN 2.3.36pre5/fs/exec.c 2.3.36pre5-oom/fs/exec.c
--- 2.3.36pre5/fs/exec.c Mon Jan 3 18:56:24 2000
+++ 2.3.36pre5-oom/fs/exec.c Mon Jan 3 19:02:11 2000
@@ -277,13 +277,13 @@
         pmd = pmd_alloc(pgd, address);
         if (!pmd) {
                 __free_page(page);
- oom(tsk);
+ force_sig(SIGKILL, tsk);
                 return;
         }
         pte = pte_alloc(pmd, address);
         if (!pte) {
                 __free_page(page);
- oom(tsk);
+ force_sig(SIGKILL, tsk);
                 return;
         }
         if (!pte_none(*pte)) {
diff -urN 2.3.36pre5/include/linux/mm.h 2.3.36pre5-oom/include/linux/mm.h
--- 2.3.36pre5/include/linux/mm.h Mon Jan 3 18:56:25 2000
+++ 2.3.36pre5-oom/include/linux/mm.h Mon Jan 3 19:00:05 2000
@@ -400,7 +400,6 @@
                 unsigned int * zones_size, unsigned long zone_start_paddr);
 extern void mem_init(void);
 extern void show_mem(void);
-extern void oom(struct task_struct * tsk);
 extern void si_meminfo(struct sysinfo * val);
 extern void swapin_readahead(swp_entry_t);
 
diff -urN 2.3.36pre5/kernel/ptrace.c 2.3.36pre5-oom/kernel/ptrace.c
--- 2.3.36pre5/kernel/ptrace.c Sun Nov 21 03:20:20 1999
+++ 2.3.36pre5-oom/kernel/ptrace.c Mon Jan 3 19:01:02 2000
@@ -26,6 +26,7 @@
         unsigned long mapnr;
         unsigned long maddr;
         struct page *page;
+ int fault;
 
 repeat:
         pgdir = pgd_offset(vma->vm_mm, addr);
@@ -64,8 +65,12 @@
 
 fault_in_page:
         /* -1: out of memory. 0 - unmapped page */
- if (handle_mm_fault(tsk, vma, addr, write) > 0)
+ fault = handle_mm_fault(tsk, vma, addr, write);
+ if (fault > 0)
                 goto repeat;
+ if (fault < 0)
+ /* the out of memory is been triggered by the current task. */
+ force_sig(SIGKILL, current);
         return 0;
 
 bad_pgd:
diff -urN 2.3.36pre5/mm/memory.c 2.3.36pre5-oom/mm/memory.c
--- 2.3.36pre5/mm/memory.c Mon Jan 3 18:56:25 2000
+++ 2.3.36pre5-oom/mm/memory.c Mon Jan 3 19:00:05 2000
@@ -70,16 +70,6 @@
 mem_map_t * mem_map = NULL;
 
 /*
- * oom() prints a message (so that the user knows why the process died),
- * and gives the process an untrappable SIGKILL.
- */
-void oom(struct task_struct * task)
-{
- printk("\nOut of memory for %s.\n", task->comm);
- force_sig(SIGKILL, task);
-}
-
-/*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
  */
diff -urN 2.3.36pre5/mm/vmscan.c 2.3.36pre5-oom/mm/vmscan.c
--- 2.3.36pre5/mm/vmscan.c Fri Dec 31 16:33:05 1999
+++ 2.3.36pre5-oom/mm/vmscan.c Mon Jan 3 19:00:05 2000
@@ -328,6 +328,7 @@
         struct task_struct * p;
         int counter;
         int __ret = 0;
+ int assign = 0;
 
         lock_kernel();
         /*
@@ -347,12 +348,9 @@
         counter = nr_threads / (priority+1);
         if (counter < 1)
                 counter = 1;
- if (counter > nr_threads)
- counter = nr_threads;
 
         for (; counter >= 0; counter--) {
- int assign = 0;
- int max_cnt = 0;
+ unsigned long max_cnt = 0;
                 struct mm_struct *best = NULL;
                 int pid = 0;
         select:
@@ -365,7 +363,7 @@
                          if (mm->rss <= 0)
                                 continue;
                         /* Refresh swap_cnt? */
- if (assign)
+ if (assign == 1)
                                 mm->swap_cnt = mm->rss;
                         if (mm->swap_cnt > max_cnt) {
                                 max_cnt = mm->swap_cnt;
@@ -374,6 +372,8 @@
                         }
                 }
                 read_unlock(&tasklist_lock);
+ if (assign == 1)
+ assign = 2;
                 if (!best) {
                         if (!assign) {
                                 assign = 1;

Andrea

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Sat Jan 15 2000 - 21:00:12 EST