Re: [RFC,PATCH] cookie based proc_pid_readdir

From: Mika Penttilä
Date: Tue Jan 06 2004 - 10:13:23 EST


afaics, clist_head from f_version is leaked if not whole directory read.

--Mika



Manfred Spraul wrote:

proc_dir_readdir skips over tasks if tasks exit between the individual get_tgid_list calls. The only approach that guarantees that no tasks are skipped is to add a cookie into the task list and restart from that cookie.

Attached is a proof of concept patch that adds such cookies - please comment.

--
Manfred

------------------------------------------------------------------------

// $Header$
// Kernel Version:
// VERSION = 2
// PATCHLEVEL = 6
// SUBLEVEL = 0
// EXTRAVERSION =
--- 2.6/include/linux/list.h 2003-12-20 09:19:13.000000000 +0100
+++ build-2.6/include/linux/list.h 2004-01-06 12:26:32.000000000 +0100
@@ -39,6 +39,16 @@
} while (0)

/*
+ * special linked lists that can contain invisible
+ * members. Members with nonzero is_cookie should be
+ * skipped.
+ */
+struct clist_head {
+ struct list_head l;
+ int is_cookie;
+};
+
+/*
* Insert a new entry between two known consecutive entries. *
* This is only for internal list manipulation where we know
--- 2.6/include/linux/proc_fs.h 2003-10-25 20:42:42.000000000 +0200
+++ build-2.6/include/linux/proc_fs.h 2004-01-06 12:48:15.000000000 +0100
@@ -95,6 +95,7 @@
struct dentry *proc_pid_unhash(struct task_struct *p);
void proc_pid_flush(struct dentry *proc_dentry);
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir);
+loff_t proc_pid_llseek(struct file *file, loff_t offset, int origin);

extern struct proc_dir_entry *create_proc_entry(const char *name, mode_t mode,
struct proc_dir_entry *parent);
--- 2.6/include/linux/sched.h 2003-12-04 19:44:40.000000000 +0100
+++ build-2.6/include/linux/sched.h 2004-01-06 12:31:57.000000000 +0100
@@ -352,7 +352,7 @@
cpumask_t cpus_allowed;
unsigned int time_slice, first_time_slice;

- struct list_head tasks;
+ struct clist_head tasks;
struct list_head ptrace_children;
struct list_head ptrace_list;

@@ -719,18 +719,31 @@

#define REMOVE_LINKS(p) do { \
if (thread_group_leader(p)) \
- list_del_init(&(p)->tasks); \
+ list_del_init(&(p)->tasks.l); \
remove_parent(p); \
} while (0)

#define SET_LINKS(p) do { \
if (thread_group_leader(p)) \
- list_add_tail(&(p)->tasks,&init_task.tasks); \
+ list_add_tail(&(p)->tasks.l,&init_task.tasks.l); \
add_parent(p, (p)->parent); \
} while (0)

-#define next_task(p) list_entry((p)->tasks.next, struct task_struct, tasks)
-#define prev_task(p) list_entry((p)->tasks.prev, struct task_struct, tasks)
+static inline task_t * next_task(task_t *p)
+{
+ do {
+ p = list_entry((p)->tasks.l.next, struct task_struct, tasks.l);
+ } while(p->tasks.is_cookie);
+ return p;
+}
+
+static inline task_t * prev_task(task_t *p)
+{
+ do {
+ p = list_entry((p)->tasks.l.prev, struct task_struct, tasks.l);
+ } while(p->tasks.is_cookie);
+ return p;
+}

#define for_each_process(p) \
for (p = &init_task ; (p = next_task(p)) != &init_task ; )
--- 2.6/include/linux/init_task.h 2003-12-04 19:44:40.000000000 +0100
+++ build-2.6/include/linux/init_task.h 2004-01-06 12:32:51.000000000 +0100
@@ -75,7 +75,7 @@
.active_mm = &init_mm, \
.run_list = LIST_HEAD_INIT(tsk.run_list), \
.time_slice = HZ, \
- .tasks = LIST_HEAD_INIT(tsk.tasks), \
+ .tasks = { LIST_HEAD_INIT(tsk.tasks.l), 0}, \
.ptrace_children= LIST_HEAD_INIT(tsk.ptrace_children), \
.ptrace_list = LIST_HEAD_INIT(tsk.ptrace_list), \
.real_parent = &tsk, \
--- 2.6/fs/proc/base.c 2003-12-20 09:19:13.000000000 +0100
+++ build-2.6/fs/proc/base.c 2004-01-06 14:17:20.000000000 +0100
@@ -1627,33 +1627,6 @@
#define PROC_MAXPIDS 20

/*
- * Get a few tgid's to return for filldir - we need to hold the
- * tasklist lock while doing this, and we must release it before
- * we actually do the filldir itself, so we use a temp buffer..
- */
-static int get_tgid_list(int index, unsigned int *tgids)
-{
- struct task_struct *p;
- int nr_tgids = 0;
-
- index--;
- read_lock(&tasklist_lock);
- for_each_process(p) {
- int tgid = p->pid;
- if (!pid_alive(p))
- continue;
- if (--index >= 0)
- continue;
- tgids[nr_tgids] = tgid;
- nr_tgids++;
- if (nr_tgids >= PROC_MAXPIDS)
- break;
- }
- read_unlock(&tasklist_lock);
- return nr_tgids;
-}
-
-/*
* Get a few tid's to return for filldir - we need to hold the
* tasklist lock while doing this, and we must release it before
* we actually do the filldir itself, so we use a temp buffer..
@@ -1685,35 +1658,134 @@
return nr_tids;
}

+loff_t proc_pid_llseek(struct file *file, loff_t offset, int origin)
+{
+ long long retval;
+ struct inode *inode = file->f_dentry->d_inode->i_mapping->host;
+
+ down(&inode->i_sem);
+ switch (origin) {
+ case 2:
+ offset += inode->i_size;
+ break;
+ case 1:
+ offset += file->f_pos;
+ }
+ retval = -EINVAL;
+ if (offset>=0 && offset<=inode->i_sb->s_maxbytes) {
+ if (offset != file->f_pos) {
+ file->f_pos = offset;
+ if (file->f_version) {
+ struct clist_head *cl;
+ cl = (struct clist_head *)file->f_version;
+ write_lock_irq(&tasklist_lock);
+ list_del(&cl->l);
+ write_unlock_irq(&tasklist_lock);
+ kfree(cl);
+ file->f_version = 0;
+ }
+ }
+ retval = offset;
+ }
+ up(&inode->i_sem);
+ return retval;
+}
+
/* for the /proc/ directory itself, after non-process stuff has been done */
int proc_pid_readdir(struct file * filp, void * dirent, filldir_t filldir)
{
- unsigned int tgid_array[PROC_MAXPIDS];
- char buf[PROC_NUMBUF];
unsigned int nr = filp->f_pos - FIRST_PROCESS_ENTRY;
- unsigned int nr_tgids, i;
+ struct clist_head fcl; /* fallback if kmalloc fails - -ENOMEM is not permitted */
+ struct clist_head *pcl;

if (!nr) {
ino_t ino = fake_ino(0,PROC_TGID_INO);
if (filldir(dirent, "self", 4, filp->f_pos, ino, DT_LNK) < 0)
return 0;
filp->f_pos++;
- nr++;
+ } else {
+ nr--;
}

- nr_tgids = get_tgid_list(nr, tgid_array);
+ if (filp->f_version) {
+ pcl = (struct clist_head *)filp->f_version;
+ } else {
+ task_t *p;
+ pcl = kmalloc(sizeof(struct clist_head), GFP_KERNEL);
+ if (pcl)
+ filp->f_version = (unsigned long)pcl;
+ else
+ pcl = &fcl;
+
+ INIT_LIST_HEAD(&pcl->l);
+ pcl->is_cookie = 1;
+
+ write_lock_irq(&tasklist_lock);
+ p = &init_task;
+ while(nr) {
+ if (pid_alive(p))
+ nr--;
+ p = next_task(p);
+ if (p == &init_task) {
+ write_unlock_irq(&tasklist_lock);
+ if (pcl != &fcl) {
+ filp->f_version = 0;
+ kfree(pcl);
+ }
+ return 0;
+ }
+ }
+ list_add(&pcl->l, &p->tasks.l);
+ write_unlock_irq(&tasklist_lock);
+ }

- for (i = 0; i < nr_tgids; i++) {
- int tgid = tgid_array[i];
- ino_t ino = fake_ino(tgid,PROC_TGID_INO);
- unsigned long j = PROC_NUMBUF;
+ for (;;) {
+ unsigned int tgid;
+ int j;
+ char buf[PROC_NUMBUF];
+ ino_t ino;
+ task_t *p;
+ struct list_head *walk;
+
+ write_lock_irq(&tasklist_lock);
+ walk = &pcl->l;
+ do {
+ walk = walk->next;
+ p = list_entry(walk, struct task_struct, tasks.l);
+ } while (p->tasks.is_cookie);
+ if (p == &init_task) {
+ write_unlock_irq(&tasklist_lock);
+ break;
+ }
+ tgid = p->pid;
+ list_del(&pcl->l);
+ list_add(&pcl->l, &p->tasks.l);
+ write_unlock_irq(&tasklist_lock);
+
+ ino = fake_ino(tgid,PROC_TGID_INO);

+ j = PROC_NUMBUF;
do buf[--j] = '0' + (tgid % 10); while (tgid/=10);

- if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0)
+ if (filldir(dirent, buf+j, PROC_NUMBUF-j, filp->f_pos, ino, DT_DIR) < 0) {
+ write_lock_irq(&tasklist_lock);
+ walk = &pcl->l;
+ do {
+ walk = walk->prev;
+ p = list_entry(walk, struct task_struct, tasks.l);
+ } while (p->tasks.is_cookie);
+ list_del(&pcl->l);
+ list_add_tail(&pcl->l, &p->tasks.l);
+ write_unlock_irq(&tasklist_lock);
break;
+ }
filp->f_pos++;
}
+ if (pcl == &fcl) {
+ write_lock_irq(&tasklist_lock);
+ list_del(&fcl.l);
+ write_unlock_irq(&tasklist_lock);
+ }
return 0;
}

--- 2.6/fs/proc/root.c 2003-10-25 20:44:17.000000000 +0200
+++ build-2.6/fs/proc/root.c 2004-01-06 13:19:31.000000000 +0100
@@ -127,6 +127,7 @@
static struct file_operations proc_root_operations = {
.read = generic_read_dir,
.readdir = proc_root_readdir,
+ .llseek = proc_pid_llseek,
};

/*
--- 2.6/kernel/pid.c 2003-12-04 19:44:40.000000000 +0100
+++ build-2.6/kernel/pid.c 2004-01-06 12:29:34.000000000 +0100
@@ -255,7 +255,7 @@
attach_pid(thread, PIDTYPE_TGID, thread->tgid);
attach_pid(thread, PIDTYPE_PGID, leader->__pgrp);
attach_pid(thread, PIDTYPE_SID, thread->session);
- list_add_tail(&thread->tasks, &init_task.tasks);
+ list_add_tail(&thread->tasks.l, &init_task.tasks.l);

attach_pid(leader, PIDTYPE_PID, leader->pid);
attach_pid(leader, PIDTYPE_TGID, leader->tgid);



-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/