Re: [RFC][PATCH] fix move/migrate_pages() race on task struct

From: Christoph Lameter
Date: Fri Feb 24 2012 - 12:33:02 EST


On Fri, 24 Feb 2012, Dave Hansen wrote:

> > Is that all safe? If not then we need to take a refcount on the task
> > struct after all.
>
> Urg, no we can't sleep under an rcu_read_lock().

Ok so take a count and drop it before entering the main migration
function?


Signed-off-by: Christoph Lameter <cl@xxxxxxxxx>


---
mm/mempolicy.c | 12 +++++++-----
mm/migrate.c | 20 +++++++++++---------
2 files changed, 18 insertions(+), 14 deletions(-)

Index: linux-2.6/mm/mempolicy.c
===================================================================
--- linux-2.6.orig/mm/mempolicy.c 2012-02-24 04:10:01.621614996 -0600
+++ linux-2.6/mm/mempolicy.c 2012-02-24 05:01:43.621530156 -0600
@@ -1293,7 +1293,7 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
{

const struct cred *cred = current_cred(), *tcred;
struct mm_struct *mm = NULL;
- struct task_struct *task;
+ struct task_struct *task = NULL;
nodemask_t task_nodes;
int err;
nodemask_t *old;
@@ -1318,10 +1318,10 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
rcu_read_lock();
task = pid ? find_task_by_vpid(pid) : current;
if (!task) {
- rcu_read_unlock();
err = -ESRCH;
goto out;
}
+ get_task_struct(task);
mm = get_task_mm(task);
rcu_read_unlock();

@@ -1335,16 +1335,13 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- rcu_read_lock();
tcred = __task_cred(task);
if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
cred->uid != tcred->suid && cred->uid != tcred->uid &&
!capable(CAP_SYS_NICE)) {
- rcu_read_unlock();
err = -EPERM;
goto out;
}
- rcu_read_unlock();

task_nodes = cpuset_mems_allowed(task);
/* Is the user allowed to access the target nodes? */
@@ -1362,9 +1359,14 @@ SYSCALL_DEFINE4(migrate_pages, pid_t, pi
if (err)
goto out;

+ put_task_struct(task);
+ task = NULL;
err = do_migrate_pages(mm, old, new,
capable(CAP_SYS_NICE) ? MPOL_MF_MOVE_ALL : MPOL_MF_MOVE);
out:
+ if (task)
+ put_task_struct(task);
+
if (mm)
mmput(mm);
NODEMASK_SCRATCH_FREE(scratch);
Index: linux-2.6/mm/migrate.c
===================================================================
--- linux-2.6.orig/mm/migrate.c 2012-02-24 04:10:01.609614993 -0600
+++ linux-2.6/mm/migrate.c 2012-02-24 05:07:39.493520424 -0600
@@ -1176,20 +1176,17 @@ set_status:
* Migrate an array of page address onto an array of nodes and fill
* the corresponding array of status.
*/
-static int do_pages_move(struct mm_struct *mm, struct task_struct *task,
+static int do_pages_move(struct mm_struct *mm, nodemask_t task_nodes,
unsigned long nr_pages,
const void __user * __user *pages,
const int __user *nodes,
int __user *status, int flags)
{
struct page_to_node *pm;
- nodemask_t task_nodes;
unsigned long chunk_nr_pages;
unsigned long chunk_start;
int err;

- task_nodes = cpuset_mems_allowed(task);
-
err = -ENOMEM;
pm = (struct page_to_node *)__get_free_page(GFP_KERNEL);
if (!pm)
@@ -1351,6 +1348,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
struct task_struct *task;
struct mm_struct *mm;
int err;
+ nodemask_t task_nodes;

/* Check flags */
if (flags & ~(MPOL_MF_MOVE|MPOL_MF_MOVE_ALL))
@@ -1366,6 +1364,7 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
rcu_read_unlock();
return -ESRCH;
}
+ get_task_struct(task);
mm = get_task_mm(task);
rcu_read_unlock();

@@ -1378,30 +1377,33 @@ SYSCALL_DEFINE6(move_pages, pid_t, pid,
* capabilities, superuser privileges or the same
* userid as the target process.
*/
- rcu_read_lock();
tcred = __task_cred(task);
if (cred->euid != tcred->suid && cred->euid != tcred->uid &&
cred->uid != tcred->suid && cred->uid != tcred->uid &&
!capable(CAP_SYS_NICE)) {
- rcu_read_unlock();
err = -EPERM;
goto out;
}
- rcu_read_unlock();

err = security_task_movememory(task);
if (err)
goto out;

+ task_nodes = cpuset_mems_allowed(task);
+ put_task_struct(task);
+ task = NULL;
+
if (nodes) {
- err = do_pages_move(mm, task, nr_pages, pages, nodes, status,
- flags);
+ err = do_pages_move(mm, task_nodes, nr_pages, pages, nodes,
+ status, flags);
} else {
err = do_pages_stat(mm, nr_pages, pages, status);
}

out:
mmput(mm);
+ if (task)
+ put_task_struct(task);
return err;
}

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/