[patch] 2.3.18ac10 /proc fixes [Re: [patch] wchan in 2.3.18ac6]

Andrea Arcangeli (andrea@suse.de)
Sun, 3 Oct 1999 17:14:50 +0200 (CEST)


On Mon, 20 Sep 1999, Andrea Arcangeli wrote:

>I don't like the kmalloc in array.c to copy the task struct. Not strictly
>for performances but because we should do the proper locking there.

It seems to me that kmalloc is preventing nothing else than potentially
allowing an user to sniff data from a random kernel page looking the
/proc/ data. This is really a minor problem as the door it too small to be
exploited (at least it was definitely too small with the 2.2.x timings). I
can't see no real need to go slow in /proc/.

There was a few other minor things like KSTK_* which doesn't need the mmap
lock and get_cpuid don't need to take the tasklist lock held while running
the slow sprintf for the same reason as above.

Then I fixed a SMP race in /proc/*/maps in read_maps which was following
the tsk->mm->mmap->vm_next list without holding the mmap_sem of the mm.
With the race properly fixed the volatile mmap trick is not necessary
anymore.

Patch against 2.3.18ac10.

--- 2.3.18ac10/fs/proc/array.c Thu Sep 30 17:00:27 1999
+++ 2.3.18ac10-proc/fs/proc/array.c Sun Oct 3 17:02:42 1999
@@ -48,6 +48,9 @@
*
* Chuck Lever : safe handling of task_struct
* <cel@monkey.org>
+ *
+ * Andrea Arcangeli : SMP race fixes and avoid not necessary copy
+ * of the task struct <andrea@suse.de>
*/

#include <linux/types.h>
@@ -959,7 +962,7 @@

static int get_stat(int pid, char * buffer)
{
- struct task_struct *tsk, *p;
+ struct task_struct *tsk;
struct mm_struct *mm = NULL;
unsigned long vsize, eip, esp, wchan;
long priority, nice;
@@ -968,10 +971,6 @@
char state;
int res = 0;

- tsk = kmalloc(sizeof(struct task_struct), GFP_KERNEL);
- if (!tsk)
- goto out;
-
/*
* Hold the tasklist_lock and extract address information
* we may need after the lock is released. We can't hold
@@ -979,23 +978,22 @@
* result in a deadlock.
*/
read_lock(&tasklist_lock);
- p = find_task_by_pid(pid);
- if (p) {
- memcpy(tsk, p, sizeof(struct task_struct));
-
- mm = p->mm;
+ tsk = find_task_by_pid(pid);
+ if (tsk)
+ {
+ mm = tsk->mm;
if (mm)
atomic_inc(&mm->mm_users);

- ppid = p->p_pptr->pid;
+ ppid = tsk->p_pptr->pid;

- spin_lock_irq(&p->sigmask_lock);
- collect_sigign_sigcatch(p, &sigign, &sigcatch);
- spin_unlock_irq(&p->sigmask_lock);
+ spin_lock_irq(&tsk->sigmask_lock);
+ collect_sigign_sigcatch(tsk, &sigign, &sigcatch);
+ spin_unlock_irq(&tsk->sigmask_lock);
}
read_unlock(&tasklist_lock);
- if (!p)
- goto free_out;
+ if (!tsk)
+ goto out;

state = *get_task_state(tsk);
vsize = eip = esp = 0;
@@ -1007,12 +1005,12 @@
vsize += vma->vm_end - vma->vm_start;
vma = vma->vm_next;
}
- eip = KSTK_EIP(p);
- esp = KSTK_ESP(p);
up(&mm->mmap_sem);
}

- wchan = get_wchan(p);
+ eip = KSTK_EIP(tsk);
+ esp = KSTK_ESP(tsk);
+ wchan = get_wchan(tsk);

/* scale priority and nice values from timeslices to -20..20 */
/* to make it look like a "normal" Unix priority/nice value */
@@ -1068,8 +1066,6 @@
tsk->exit_signal,
tsk->processor);

-free_out:
- kfree(tsk);
out:
if (mm)
mmput(mm);
@@ -1226,7 +1222,6 @@
char * destptr = buf, * buffer;
loff_t lineno;
ssize_t column, i;
- int volatile_task = 0;
long retval;

/*
@@ -1242,13 +1237,8 @@
p = find_task_by_pid(pid);
if (p) {
mm = p->mm;
- if (mm) {
+ if (mm)
atomic_inc(&mm->mm_users);
-
- /* Check whether the mmaps could change if we sleep */
- volatile_task = (p != current ||
- atomic_read(&mm->mm_users) > 2);
- }
}
read_unlock(&tasklist_lock);
if (!p)
@@ -1262,6 +1252,7 @@
lineno = *ppos >> MAPS_LINE_SHIFT;
column = *ppos & (MAPS_LINE_LENGTH-1);

+ down(&mm->mmap_sem);
/* quickly go to line "lineno" */
for (map = mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++)
continue;
@@ -1334,13 +1325,8 @@
/* done? */
if (count == 0)
break;
-
- /* By writing to user space, we might have slept.
- * Stop the loop, to avoid a race condition.
- */
- if (volatile_task)
- break;
}
+ up(&mm->mmap_sem);

/* encode f_pos */
*ppos = (lineno << MAPS_LINE_SHIFT) + column;
@@ -1367,6 +1353,7 @@
*/
read_lock(&tasklist_lock);
tsk = find_task_by_pid(pid);
+ read_unlock(&tasklist_lock);
if (tsk) {
len = sprintf(buffer,
"cpu %lu %lu\n",
@@ -1379,7 +1366,6 @@
tsk->per_cpu_utime[cpu_logical_map(i)],
tsk->per_cpu_stime[cpu_logical_map(i)]);
}
- read_unlock(&tasklist_lock);
return len;
}
#endif

Andrea

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/