[FIX] oopsen in procfs (2.3.12-pre*)

Alexander Viro (viro@math.psu.edu)
Wed, 28 Jul 1999 06:35:08 -0400 (EDT)


Linus, procfs handling of task->mm had several bad races + ugly
deadlock (my fault - down() is *not* a function to be used while holding
the tasklist_lock). I think that patch below fixes most of the problems
(at least simple tests are working fine). I also included the one-liner
for arch/i386/kernel/smp.c - the thing forgot to call cpu_init() if it
was built for SMP and found that it runs on UP box. Please, apply it.
TIA,
Al
Patch follows:
diff -urN linux-2.3.12-8/arch/i386/kernel/smp.c linux/arch/i386/kernel/smp.c
--- linux-2.3.12-8/arch/i386/kernel/smp.c Wed Jul 28 02:28:12 1999
+++ linux/arch/i386/kernel/smp.c Wed Jul 28 03:47:54 1999
@@ -1229,6 +1229,7 @@
io_apic_irqs = 0;
#endif
cpu_online_map = cpu_present_map;
+ cpu_init();
goto smp_done;
}

diff -urN linux-2.3.12-8/fs/proc/array.c linux/fs/proc/array.c
--- linux-2.3.12-8/fs/proc/array.c Wed Jul 28 02:28:19 1999
+++ linux/fs/proc/array.c Wed Jul 28 06:07:17 1999
@@ -42,6 +42,8 @@
* Alan Cox : security fixes.
* <Alan.Cox@linux.org>
*
+ * Al Viro : safe handling of mm_struct
+ *
*/

#include <linux/types.h>
@@ -386,21 +388,15 @@
return sprintf(buffer, "%s\n", saved_command_line);
}

-static unsigned long get_phys_addr(struct task_struct * p, unsigned long ptr)
+static unsigned long get_phys_addr(struct mm_struct * mm, unsigned long ptr)
{
pgd_t *page_dir;
pmd_t *page_middle;
pte_t pte;

- if (!p || !p->mm || ptr >= TASK_SIZE)
- return 0;
- /* Check for NULL pgd .. shouldn't happen! */
- if (!p->mm->pgd) {
- printk("get_phys_addr: pid %d has NULL pgd!\n", p->pid);
+ if (ptr >= TASK_SIZE)
return 0;
- }
-
- page_dir = pgd_offset(p->mm,ptr);
+ page_dir = pgd_offset(mm,ptr);
if (pgd_none(*page_dir))
return 0;
if (pgd_bad(*page_dir)) {
@@ -422,7 +418,7 @@
return pte_page(pte) + (ptr & ~PAGE_MASK);
}

-static int get_array(struct task_struct *p, unsigned long start, unsigned long end, char * buffer)
+static int get_array(struct mm_struct *mm, unsigned long start, unsigned long end, char * buffer)
{
unsigned long addr;
int size = 0, result = 0;
@@ -431,7 +427,7 @@
if (start >= end)
return result;
for (;;) {
- addr = get_phys_addr(p, start);
+ addr = get_phys_addr(mm, start);
if (!addr)
return result;
do {
@@ -451,29 +447,42 @@
return result;
}

-static int get_env(int pid, char * buffer)
+static struct mm_struct *get_mm(int pid)
{
struct task_struct *p;
+ struct mm_struct *mm = NULL;

read_lock(&tasklist_lock);
p = find_task_by_pid(pid);
- read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
+ if (p)
+ mm = p->mm;
+ if (mm)
+ atomic_inc(&mm->mm_users);
+ read_unlock(&tasklist_lock);
+ return mm;
+}

- if (!p || !p->mm)
- return 0;
- return get_array(p, p->mm->env_start, p->mm->env_end, buffer);
+
+static int get_env(int pid, char * buffer)
+{
+ struct mm_struct *mm = get_mm(pid);
+ int res = 0;
+ if (mm) {
+ res = get_array(mm, mm->env_start, mm->env_end, buffer);
+ mmput(mm);
+ }
+ return res;
}

static int get_arg(int pid, char * buffer)
{
- struct task_struct *p;
-
- read_lock(&tasklist_lock);
- p = find_task_by_pid(pid);
- read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
- if (!p || !p->mm)
- return 0;
- return get_array(p, p->mm->arg_start, p->mm->arg_end, buffer);
+ struct mm_struct *mm = get_mm(pid);
+ int res = 0;
+ if (mm) {
+ res = get_array(mm, mm->arg_start, mm->arg_end, buffer);
+ mmput(mm);
+ }
+ return res;
}

/*
@@ -740,48 +749,44 @@
return buffer;
}

-static inline char * task_mem(struct task_struct *p, char *buffer)
+static inline char * task_mem(struct mm_struct *mm, char *buffer)
{
- struct mm_struct * mm = p->mm;
-
- if (mm) {
- struct vm_area_struct * vma;
- unsigned long data = 0, stack = 0;
- unsigned long exec = 0, lib = 0;
-
- down(&mm->mmap_sem);
- for (vma = mm->mmap; vma; vma = vma->vm_next) {
- unsigned long len = (vma->vm_end - vma->vm_start) >> 10;
- if (!vma->vm_file) {
- data += len;
- if (vma->vm_flags & VM_GROWSDOWN)
- stack += len;
- continue;
- }
- if (vma->vm_flags & VM_WRITE)
+ struct vm_area_struct * vma;
+ unsigned long data = 0, stack = 0;
+ unsigned long exec = 0, lib = 0;
+
+ down(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ unsigned long len = (vma->vm_end - vma->vm_start) >> 10;
+ if (!vma->vm_file) {
+ data += len;
+ if (vma->vm_flags & VM_GROWSDOWN)
+ stack += len;
+ continue;
+ }
+ if (vma->vm_flags & VM_WRITE)
+ continue;
+ if (vma->vm_flags & VM_EXEC) {
+ exec += len;
+ if (vma->vm_flags & VM_EXECUTABLE)
continue;
- if (vma->vm_flags & VM_EXEC) {
- exec += len;
- if (vma->vm_flags & VM_EXECUTABLE)
- continue;
- lib += len;
- }
+ lib += len;
}
- buffer += sprintf(buffer,
- "VmSize:\t%8lu kB\n"
- "VmLck:\t%8lu kB\n"
- "VmRSS:\t%8lu kB\n"
- "VmData:\t%8lu kB\n"
- "VmStk:\t%8lu kB\n"
- "VmExe:\t%8lu kB\n"
- "VmLib:\t%8lu kB\n",
- mm->total_vm << (PAGE_SHIFT-10),
- mm->locked_vm << (PAGE_SHIFT-10),
- mm->rss << (PAGE_SHIFT-10),
- data - stack, stack,
- exec - lib, lib);
- up(&mm->mmap_sem);
}
+ buffer += sprintf(buffer,
+ "VmSize:\t%8lu kB\n"
+ "VmLck:\t%8lu kB\n"
+ "VmRSS:\t%8lu kB\n"
+ "VmData:\t%8lu kB\n"
+ "VmStk:\t%8lu kB\n"
+ "VmExe:\t%8lu kB\n"
+ "VmLib:\t%8lu kB\n",
+ mm->total_vm << (PAGE_SHIFT-10),
+ mm->locked_vm << (PAGE_SHIFT-10),
+ mm->rss << (PAGE_SHIFT-10),
+ data - stack, stack,
+ exec - lib, lib);
+ up(&mm->mmap_sem);
return buffer;
}

@@ -842,44 +847,61 @@
{
char * orig = buffer;
struct task_struct *tsk;
+ struct mm_struct *mm = NULL;

read_lock(&tasklist_lock);
tsk = find_task_by_pid(pid);
+ if (tsk)
+ mm = tsk->mm;
+ if (mm)
+ atomic_inc(&mm->mm_users);
read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
if (!tsk)
return 0;
buffer = task_name(tsk, buffer);
buffer = task_state(tsk, buffer);
- buffer = task_mem(tsk, buffer);
+ if (mm)
+ buffer = task_mem(mm, buffer);
buffer = task_sig(tsk, buffer);
buffer = task_cap(tsk, buffer);
+ if (mm)
+ mmput(mm);
return buffer - orig;
}

static int get_stat(int pid, char * buffer)
{
struct task_struct *tsk;
+ struct mm_struct *mm = NULL;
unsigned long vsize, eip, esp, wchan;
long priority, nice;
int tty_pgrp;
sigset_t sigign, sigcatch;
char state;
+ int res;

read_lock(&tasklist_lock);
tsk = find_task_by_pid(pid);
+ if (tsk)
+ mm = tsk->mm;
+ if (mm)
+ atomic_inc(&mm->mm_users);
read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
if (!tsk)
return 0;
state = *get_task_state(tsk);
vsize = eip = esp = 0;
- if (tsk->mm) {
- struct vm_area_struct *vma = tsk->mm->mmap;
+ if (mm) {
+ struct vm_area_struct *vma;
+ down(&mm->mmap_sem);
+ vma = mm->mmap;
while (vma) {
vsize += vma->vm_end - vma->vm_start;
vma = vma->vm_next;
}
eip = KSTK_EIP(tsk);
esp = KSTK_ESP(tsk);
+ up(&mm->mmap_sem);
}

wchan = get_wchan(tsk);
@@ -898,7 +920,7 @@
nice = tsk->priority;
nice = 20 - (nice * 20 + DEF_PRIORITY / 2) / DEF_PRIORITY;

- return sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
+ res = sprintf(buffer,"%d (%s) %c %d %d %d %d %d %lu %lu \
%lu %lu %lu %lu %lu %ld %ld %ld %ld %ld %ld %lu %lu %ld %lu %lu %lu %lu %lu \
%lu %lu %lu %lu %lu %lu %lu %lu %d %d\n",
pid,
@@ -924,11 +946,11 @@
tsk->it_real_value,
tsk->start_time,
vsize,
- tsk->mm ? tsk->mm->rss : 0, /* you might want to shift this left 3 */
+ mm ? mm->rss : 0, /* you might want to shift this left 3 */
tsk->rlim ? tsk->rlim[RLIMIT_RSS].rlim_cur : 0,
- tsk->mm ? tsk->mm->start_code : 0,
- tsk->mm ? tsk->mm->end_code : 0,
- tsk->mm ? tsk->mm->start_stack : 0,
+ mm ? mm->start_code : 0,
+ mm ? mm->end_code : 0,
+ mm ? mm->start_stack : 0,
esp,
eip,
/* The signal information here is obsolete.
@@ -944,6 +966,9 @@
tsk->cnswap,
tsk->exit_signal,
tsk->processor);
+ if (mm)
+ mmput(mm);
+ return res;
}

static inline void statm_pte_range(pmd_t * pmd, unsigned long address, unsigned long size,
@@ -1021,19 +1046,15 @@

static int get_statm(int pid, char * buffer)
{
- struct task_struct *tsk;
+ struct mm_struct *mm = get_mm(pid);
int size=0, resident=0, share=0, trs=0, lrs=0, drs=0, dt=0;

- read_lock(&tasklist_lock);
- tsk = find_task_by_pid(pid);
- read_unlock(&tasklist_lock); /* FIXME!! This should be done after the last use */
- if (!tsk)
- return 0;
- if (tsk->mm) {
- struct vm_area_struct * vma = tsk->mm->mmap;
-
+ if (mm) {
+ struct vm_area_struct * vma;
+ down(&mm->mmap_sem);
+ vma = mm->mmap;
while (vma) {
- pgd_t *pgd = pgd_offset(tsk->mm, vma->vm_start);
+ pgd_t *pgd = pgd_offset(mm, vma->vm_start);
int pages = 0, shared = 0, dirty = 0, total = 0;

statm_pgd_range(pgd, vma->vm_start, vma->vm_end, &pages, &shared, &dirty, &total);
@@ -1051,6 +1072,8 @@
drs += pages;
vma = vma->vm_next;
}
+ up(&mm->mmap_sem);
+ mmput(mm);
}
return sprintf(buffer,"%d %d %d %d %d %d %d\n",
size, resident, share, trs, lrs, drs, dt);
diff -urN linux-2.3.12-8/fs/proc/link.c linux/fs/proc/link.c
--- linux-2.3.12-8/fs/proc/link.c Wed Jul 28 02:28:19 1999
+++ linux/fs/proc/link.c Wed Jul 28 05:44:09 1999
@@ -82,63 +82,70 @@
ino &= 0x0000ffff;

result = ERR_PTR(-ENOENT);
- read_lock(&tasklist_lock);
- p = find_task_by_pid(pid);
- if (!p)
- goto out_unlock;

switch (ino) {
case PROC_PID_CWD:
- if (!p->fs || !p->fs->pwd)
- goto out_unlock;
- result = p->fs->pwd;
- goto out_dget;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(pid);
+ if (p && p->fs && p->fs->pwd)
+ result = dget(p->fs->pwd);
+ read_unlock(&tasklist_lock);
+ break;

case PROC_PID_ROOT:
- if (!p->fs || !p->fs->root)
- goto out_unlock;
- result = p->fs->root;
- goto out_dget;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(pid);
+ if (p && p->fs && p->fs->root)
+ result = dget(p->fs->root);
+ read_unlock(&tasklist_lock);
+ break;

case PROC_PID_EXE: {
+ struct mm_struct *mm = NULL;
struct vm_area_struct * vma;
- if (!p->mm)
- goto out_unlock;
- down(&p->mm->mmap_sem);
- vma = p->mm->mmap;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(pid);
+ if (p)
+ mm = p->mm;
+ if (mm)
+ atomic_inc(&mm->mm_users);
+ read_unlock(&tasklist_lock);
+ if (!mm)
+ break;
+ down(&mm->mmap_sem);
+ vma = mm->mmap;
while (vma) {
if ((vma->vm_flags & VM_EXECUTABLE) &&
vma->vm_file) {
- result = vma->vm_file->f_dentry;
- up(&p->mm->mmap_sem);
- goto out_dget;
+ result = dget(vma->vm_file->f_dentry);
+ break;
}
vma = vma->vm_next;
}
- up(&p->mm->mmap_sem);
- goto out_unlock;
+ up(&mm->mmap_sem);
+ mmput(mm);
+ break;
}
default:
if (ino & PROC_PID_FD_DIR) {
struct file * file;
+ struct files_struct *files = NULL;
+ read_lock(&tasklist_lock);
+ p = find_task_by_pid(pid);
+ if (p)
+ files = p->files;
+ read_unlock(&tasklist_lock);
+ if (!files)
+ break;
ino &= 0x7fff;
- if (!p->files) /* shouldn't happen here */
- goto out_unlock;
- read_lock(&p->files->file_lock);
- file = fcheck_task(p, ino);
- if (!file || !file->f_dentry)
- goto out_unlock;
- result = file->f_dentry;
+ read_lock(&files->file_lock);
+ /* XXXXXXXXXXXXXXXXXXXXXXXXXXXXXX */
+ if (ino < files->max_fds &&
+ (file = files->fd[ino]) && file->f_dentry)
+ result = dget(file->f_dentry);
read_unlock(&p->files->file_lock);
- goto out_dget;
}
}
-out_dget:
- result = dget(result);
-
-out_unlock:
- read_unlock(&tasklist_lock);
-
out:
return result;
}

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/