diff -urbN linux-2.5.7-futex/include/linux/futex.h linux-2.5.7-afutex/include/linux/futex.h --- linux-2.5.7-futex/include/linux/futex.h Tue Apr 9 16:19:59 2002 +++ linux-2.5.7-afutex/include/linux/futex.h Tue Apr 9 12:28:36 2002 @@ -4,5 +4,7 @@ /* Second argument to futex syscall */ #define FUTEX_WAIT (0) #define FUTEX_WAKE (1) +#define FUTEX_AWAIT (2) +#define FUTEX_AWAKERS (3) #endif diff -urbN linux-2.5.7-futex/kernel/exit.c linux-2.5.7-afutex/kernel/exit.c --- linux-2.5.7-futex/kernel/exit.c Mon Mar 18 15:37:10 2002 +++ linux-2.5.7-afutex/kernel/exit.c Tue Apr 9 14:07:45 2002 @@ -496,6 +496,10 @@ #ifdef CONFIG_BSD_PROCESS_ACCT acct_process(code); #endif + { + extern void __exit_futex(struct task_struct*); + __exit_futex(tsk); + } __exit_mm(tsk); lock_kernel(); diff -urbN linux-2.5.7-futex/kernel/futex.c linux-2.5.7-afutex/kernel/futex.c --- linux-2.5.7-futex/kernel/futex.c Tue Apr 9 16:19:59 2002 +++ linux-2.5.7-afutex/kernel/futex.c Wed Apr 10 09:54:10 2002 @@ -33,6 +33,7 @@ #include #include #include +#include /* for kmalloc() */ #include /* These mutexes are a very simple counter: the winner is the one who @@ -49,15 +50,19 @@ the relevent ones (hashed queues may be shared) */ struct futex_q { struct list_head list; - struct task_struct *task; /* Page struct and offset within it. */ struct page *page; unsigned int offset; + struct task_struct *task; + pid_t tgid; /* "0" if synchronous futex */ + void *uaddr; /* async wait on */ + int sig; /* signal to wakeup */ }; /* The key for the hash is the address + index + offset within page */ static struct list_head futex_queues[1<tgid = 0; + q->task = current; + q->page = page; + q->offset = offset; spin_lock(&futex_lock); - list_for_each_safe(i, next, head) { - struct futex_q *this = list_entry(i, struct futex_q, list); + list_add_tail(&q->list, head); + spin_unlock(&futex_lock); +} - if (this->page == page && this->offset == offset) { - list_del_init(i); - wake_up_process(this->task); - num_woken++; - if (num_woken >= num) break; - } +/* Return 1 if we were still queued (ie. 0 means we were woken) */ +static inline int unqueue_me(struct futex_q *q) +{ + int ret = 0; + spin_lock(&futex_lock); + if (!list_empty(&q->list)) { + list_del(&q->list); + ret = 1; } spin_unlock(&futex_lock); - return num_woken; + return ret; } /* Add at end to avoid starvation */ -static inline void queue_me(struct list_head *head, +static inline void queue_me_async(struct list_head *head, struct futex_q *q, struct page *page, - unsigned int offset) + unsigned int offset, + void *uaddr, + int sig) { + q->tgid = current->tgid; q->task = current; q->page = page; q->offset = offset; + q->uaddr = uaddr; + q->sig = sig; spin_lock(&futex_lock); list_add_tail(&q->list, head); spin_unlock(&futex_lock); } -/* Return 1 if we were still queued (ie. 0 means we were woken) */ -static inline int unqueue_me(struct futex_q *q) +/* Return 1 if we were still queued in wait queue and not in notify queue */ +static inline int unqueue_me_async(struct futex_q *q) { int ret = 0; spin_lock(&futex_lock); - if (!list_empty(&q->list)) { + if ((q->page != NULL) && (!list_empty(&q->list))) { list_del(&q->list); ret = 1; } @@ -120,6 +137,47 @@ return ret; } +static int futex_wake(struct list_head *head, + struct page *page, + unsigned int offset, + int num) +{ + + extern int sys_kill(int,int); + + struct list_head *i, *next; + int num_woken = 0; + + spin_lock(&futex_lock); + list_for_each_safe(i, next, head) { + struct futex_q *this = list_entry(i, struct futex_q, list); + + if (this->page == page && this->offset == offset) { + list_del_init(i); + if (this->tgid == 0) { + /* synchronous */ + wake_up_process(this->task); + } else { + /* move to notification queue, release page */ + list_add_tail(&this->list, ¬ify_queue); + put_page(this->page); + this->page = NULL; + + if (sys_kill(this->tgid,this->sig)) { + /* target is dead */ + list_del(&this->list); + kfree(this); + continue; + } + } + num_woken++; + if (num_woken >= num) break; + } + } + spin_unlock(&futex_lock); + return num_woken; +} + /* Get kernel address of the user page and pin it. */ static struct page *pin_page(unsigned long page_start) { @@ -158,8 +216,10 @@ if (*count != val) { ret = -EWOULDBLOCK; set_current_state(TASK_RUNNING); + kunmap(page); goto out; } + kunmap(page); time = schedule_timeout(time); if (time == 0) { ret = -ETIMEDOUT; @@ -170,21 +230,100 @@ goto out; } out: - kunmap(page); /* Were we woken up anyway? */ if (!unqueue_me(&q)) return 0; return ret; } +static int futex_await(struct list_head *head, + struct page *page, + int offset, + int val, + void *uaddr, + int sig) +{ + int *count; + struct futex_q *q = kmalloc(sizeof(struct futex_q),GFP_KERNEL); + int ret = 0; + + set_current_state(TASK_INTERRUPTIBLE); + queue_me_async(head, q, page, offset, uaddr, sig); + + count = kmap(page) + offset; + if (*count != val) { + set_current_state(TASK_RUNNING); + if (unqueue_me_async(q)) { + kfree(q); + ret = -EAGAIN; + } + } + kunmap(page); + return(ret); +} + +static int futex_awaiters(void **uaddr, int num) +{ + struct list_head *i, *next; + int num_woken = 0; + int rc; + + spin_lock(&futex_lock); + list_for_each_safe(i, next, ¬ify_queue) { + struct futex_q *this = list_entry(i, struct futex_q, list); + + if (this->tgid == current->tgid) { + if (num_woken >= num) + goto out; + + if ((rc = put_user(this->uaddr,&uaddr[num_woken]))) { + /* all notifications selected sofar will be lost */ + /* we could recreate them from **uaddr */ + num_woken = rc; + break; + } + list_del(i); + kfree(this); + num_woken++; + } + } + out: + spin_unlock(&futex_lock); + return num_woken; +} + +/* cleanup called from do_exit */ +void __exit_futex(struct task_struct *task) +{ + struct list_head *i, *next; + + spin_lock(&futex_lock); + list_for_each_safe(i, next, ¬ify_queue) { + struct futex_q *this = list_entry(i, struct futex_q, list); + + if (this->tgid == task->pid) { + list_del(i); + kfree(this); + } + } + spin_unlock(&futex_lock); +} + asmlinkage int sys_futex(void *uaddr, int op, int val, struct timespec *utime) { int ret; unsigned long pos_in_page; struct list_head *head; struct page *page; - unsigned long time = MAX_SCHEDULE_TIMEOUT; + unsigned long time; + /* first handle the special case commands */ + if (op == FUTEX_AWAKERS) { + return futex_awaiters((void**)uaddr, val); + } + + + time = MAX_SCHEDULE_TIMEOUT; if (utime) { struct timespec t; if (copy_from_user(&t, utime, sizeof(t)) != 0) @@ -212,11 +351,17 @@ case FUTEX_WAKE: ret = futex_wake(head, page, pos_in_page, val); break; + case FUTEX_AWAIT: + ret = futex_await(head, page, pos_in_page, val, uaddr, (int)utime); + if (!ret) + goto out ; /* don't release the page */ + break; default: ret = -EINVAL; } put_page(page); +out: return ret; }