Re: [patch] 2.3.18ac10 /proc fixes [Re: [patch] wchan in 2.3.18ac6]

Chuck Lever (cel@monkey.org)
Mon, 4 Oct 1999 13:07:21 -0400 (EDT)


thanks for taking this a step farther, andrea. my patch was only halfway
there, i didn't know how to take it the rest of the way. i agree with
alan, though, this is an exploitable way, on SMP, for looking at bits of
kernel memory, so maybe it's better to keep both copy operations.

get_cpuid is still "safe" in terms of not tripping over address
dereferences. however, it's not clear how we can guarantee that the
calculations it does will be correct unless some locking is done on those
fields. maybe the correct fix there is to add another lock to protect
just those fields?

On Sun, 3 Oct 1999, Andrea Arcangeli wrote:
> >I don't like the kmalloc in array.c to copy the task struct. Not strictly
> >for performances but because we should do the proper locking there.
>
> It seems to me that kmalloc is preventing nothing else than potentially
> allowing an user to sniff data from a random kernel page looking the
> /proc/ data. This is really a minor problem as the door it too small to be
> exploited (at least it was definitely too small with the 2.2.x timings). I
> can't see no real need to go slow in /proc/.
>
> There was a few other minor things like KSTK_* which doesn't need the mmap
> lock and get_cpuid don't need to take the tasklist lock held while running
> the slow sprintf for the same reason as above.
>
> Then I fixed a SMP race in /proc/*/maps in read_maps which was following
> the tsk->mm->mmap->vm_next list without holding the mmap_sem of the mm.
> With the race properly fixed the volatile mmap trick is not necessary
> anymore.
>
> Patch against 2.3.18ac10.
>
> --- 2.3.18ac10/fs/proc/array.c Thu Sep 30 17:00:27 1999
> +++ 2.3.18ac10-proc/fs/proc/array.c Sun Oct 3 17:02:42 1999
> @@ -48,6 +48,9 @@
> *
> * Chuck Lever : safe handling of task_struct
> * <cel@monkey.org>
> + *
> + * Andrea Arcangeli : SMP race fixes and avoid not necessary copy
> + * of the task struct <andrea@suse.de>
> */
>
> #include <linux/types.h>
> @@ -959,7 +962,7 @@
>
> static int get_stat(int pid, char * buffer)
> {
> - struct task_struct *tsk, *p;
> + struct task_struct *tsk;
> struct mm_struct *mm = NULL;
> unsigned long vsize, eip, esp, wchan;
> long priority, nice;
> @@ -968,10 +971,6 @@
> char state;
> int res = 0;
>
> - tsk = kmalloc(sizeof(struct task_struct), GFP_KERNEL);
> - if (!tsk)
> - goto out;
> -
> /*
> * Hold the tasklist_lock and extract address information
> * we may need after the lock is released. We can't hold
> @@ -979,23 +978,22 @@
> * result in a deadlock.
> */
> read_lock(&tasklist_lock);
> - p = find_task_by_pid(pid);
> - if (p) {
> - memcpy(tsk, p, sizeof(struct task_struct));
> -
> - mm = p->mm;
> + tsk = find_task_by_pid(pid);
> + if (tsk)
> + {
> + mm = tsk->mm;
> if (mm)
> atomic_inc(&mm->mm_users);
>
> - ppid = p->p_pptr->pid;
> + ppid = tsk->p_pptr->pid;
>
> - spin_lock_irq(&p->sigmask_lock);
> - collect_sigign_sigcatch(p, &sigign, &sigcatch);
> - spin_unlock_irq(&p->sigmask_lock);
> + spin_lock_irq(&tsk->sigmask_lock);
> + collect_sigign_sigcatch(tsk, &sigign, &sigcatch);
> + spin_unlock_irq(&tsk->sigmask_lock);
> }
> read_unlock(&tasklist_lock);
> - if (!p)
> - goto free_out;
> + if (!tsk)
> + goto out;
>
> state = *get_task_state(tsk);
> vsize = eip = esp = 0;
> @@ -1007,12 +1005,12 @@
> vsize += vma->vm_end - vma->vm_start;
> vma = vma->vm_next;
> }
> - eip = KSTK_EIP(p);
> - esp = KSTK_ESP(p);
> up(&mm->mmap_sem);
> }
>
> - wchan = get_wchan(p);
> + eip = KSTK_EIP(tsk);
> + esp = KSTK_ESP(tsk);
> + wchan = get_wchan(tsk);
>
> /* scale priority and nice values from timeslices to -20..20 */
> /* to make it look like a "normal" Unix priority/nice value */
> @@ -1068,8 +1066,6 @@
> tsk->exit_signal,
> tsk->processor);
>
> -free_out:
> - kfree(tsk);
> out:
> if (mm)
> mmput(mm);
> @@ -1226,7 +1222,6 @@
> char * destptr = buf, * buffer;
> loff_t lineno;
> ssize_t column, i;
> - int volatile_task = 0;
> long retval;
>
> /*
> @@ -1242,13 +1237,8 @@
> p = find_task_by_pid(pid);
> if (p) {
> mm = p->mm;
> - if (mm) {
> + if (mm)
> atomic_inc(&mm->mm_users);
> -
> - /* Check whether the mmaps could change if we sleep */
> - volatile_task = (p != current ||
> - atomic_read(&mm->mm_users) > 2);
> - }
> }
> read_unlock(&tasklist_lock);
> if (!p)
> @@ -1262,6 +1252,7 @@
> lineno = *ppos >> MAPS_LINE_SHIFT;
> column = *ppos & (MAPS_LINE_LENGTH-1);
>
> + down(&mm->mmap_sem);
> /* quickly go to line "lineno" */
> for (map = mm->mmap, i = 0; map && (i < lineno); map = map->vm_next, i++)
> continue;
> @@ -1334,13 +1325,8 @@
> /* done? */
> if (count == 0)
> break;
> -
> - /* By writing to user space, we might have slept.
> - * Stop the loop, to avoid a race condition.
> - */
> - if (volatile_task)
> - break;
> }
> + up(&mm->mmap_sem);
>
> /* encode f_pos */
> *ppos = (lineno << MAPS_LINE_SHIFT) + column;
> @@ -1367,6 +1353,7 @@
> */
> read_lock(&tasklist_lock);
> tsk = find_task_by_pid(pid);
> + read_unlock(&tasklist_lock);
> if (tsk) {
> len = sprintf(buffer,
> "cpu %lu %lu\n",
> @@ -1379,7 +1366,6 @@
> tsk->per_cpu_utime[cpu_logical_map(i)],
> tsk->per_cpu_stime[cpu_logical_map(i)]);
> }
> - read_unlock(&tasklist_lock);
> return len;
> }
> #endif
>
> Andrea
>
>
>
>
> -
> To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
> the body of a message to majordomo@vger.rutgers.edu
> Please read the FAQ at http://www.tux.org/lkml/
>

- Chuck Lever

--
corporate:	<chuckl@netscape.com>
personal:	<chucklever@netscape.net> or <cel@monkey.org>

The Linux Scalability project: http://www.citi.umich.edu/projects/linux-scalability/

- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo@vger.rutgers.edu Please read the FAQ at http://www.tux.org/lkml/