[PATCH 14/38] Remove struct mm_struct::exe_file et al

From: Alexey Dobriyan
Date: Fri May 22 2009 - 00:58:57 EST


Commit 925d1c401fa6cfd0df5d2e37da8981494ccdec07 aka "procfs task exe symlink".
introduced struct mm_struct::exe_file and struct mm_struct::num_exe_file_vmas.

The rationale is weak: unifying MMU and no-MMU version of /proc/*/exe code.
For this a) struct mm_struct becomes bigger, b) mmap/munmap/exit become slower,
c) patch adds more code than removes in fact.

After commit 8feae13110d60cc6287afabc2887366b0eb226c2 aka
"NOMMU: Make VMAs per MM as for MMU-mode linux" no-MMU kernels also
maintain list of VMAs in ->mmap, so we can switch back for MMU version
of /proc/*/exe.

This also helps C/R, no need to save and restore ->exe_file and to count
additional references to check if there is a leak of struct file outside
group of checkpointed resources.

Signed-off-by: Alexey Dobriyan <adobriyan@xxxxxxxxx>
---
fs/exec.c | 2 -
fs/proc/base.c | 105 +++++++++++++---------------------------------
include/linux/mm.h | 12 -----
include/linux/mm_types.h | 6 ---
include/linux/proc_fs.h | 20 ---------
kernel/fork.c | 3 -
mm/mmap.c | 22 ++--------
mm/nommu.c | 16 +------
8 files changed, 36 insertions(+), 150 deletions(-)

diff --git a/fs/exec.c b/fs/exec.c
index 895823d..04fbe3e 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -938,8 +938,6 @@ int flush_old_exec(struct linux_binprm * bprm)
if (retval)
goto out;

- set_mm_exe_file(bprm->mm, bprm->file);
-
/*
* Release all of the old mmap stuff
*/
diff --git a/fs/proc/base.c b/fs/proc/base.c
index fb45615..4284cc6 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -201,6 +201,36 @@ static int proc_root_link(struct inode *inode, struct path *path)
return result;
}

+static int proc_exe_link(struct inode *inode, struct path *path)
+{
+ struct task_struct *tsk;
+ struct mm_struct *mm;
+ struct vm_area_struct *vma;
+
+ tsk = get_proc_task(inode);
+ if (!tsk)
+ return -ENOENT;
+ mm = get_task_mm(tsk);
+ put_task_struct(tsk);
+ if (!mm)
+ return -ENOENT;
+
+ down_read(&mm->mmap_sem);
+ for (vma = mm->mmap; vma; vma = vma->vm_next) {
+ if ((vma->vm_flags & VM_EXECUTABLE) && vma->vm_file) {
+ *path = vma->vm_file->f_path;
+ path_get(&vma->vm_file->f_path);
+
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ return 0;
+ }
+ }
+ up_read(&mm->mmap_sem);
+ mmput(mm);
+ return -ENOENT;
+}
+
/*
* Return zero if current may access user memory in @task, -error if not.
*/
@@ -1251,81 +1281,6 @@ static const struct file_operations proc_pid_sched_operations = {

#endif

-/*
- * We added or removed a vma mapping the executable. The vmas are only mapped
- * during exec and are not mapped with the mmap system call.
- * Callers must hold down_write() on the mm's mmap_sem for these
- */
-void added_exe_file_vma(struct mm_struct *mm)
-{
- mm->num_exe_file_vmas++;
-}
-
-void removed_exe_file_vma(struct mm_struct *mm)
-{
- mm->num_exe_file_vmas--;
- if ((mm->num_exe_file_vmas == 0) && mm->exe_file){
- fput(mm->exe_file);
- mm->exe_file = NULL;
- }
-
-}
-
-void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
-{
- if (new_exe_file)
- get_file(new_exe_file);
- if (mm->exe_file)
- fput(mm->exe_file);
- mm->exe_file = new_exe_file;
- mm->num_exe_file_vmas = 0;
-}
-
-struct file *get_mm_exe_file(struct mm_struct *mm)
-{
- struct file *exe_file;
-
- /* We need mmap_sem to protect against races with removal of
- * VM_EXECUTABLE vmas */
- down_read(&mm->mmap_sem);
- exe_file = mm->exe_file;
- if (exe_file)
- get_file(exe_file);
- up_read(&mm->mmap_sem);
- return exe_file;
-}
-
-void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
-{
- /* It's safe to write the exe_file pointer without exe_file_lock because
- * this is called during fork when the task is not yet in /proc */
- newmm->exe_file = get_mm_exe_file(oldmm);
-}
-
-static int proc_exe_link(struct inode *inode, struct path *exe_path)
-{
- struct task_struct *task;
- struct mm_struct *mm;
- struct file *exe_file;
-
- task = get_proc_task(inode);
- if (!task)
- return -ENOENT;
- mm = get_task_mm(task);
- put_task_struct(task);
- if (!mm)
- return -ENOENT;
- exe_file = get_mm_exe_file(mm);
- mmput(mm);
- if (exe_file) {
- *exe_path = exe_file->f_path;
- path_get(&exe_file->f_path);
- fput(exe_file);
- return 0;
- } else
- return -ENOENT;
-}
-
static void *proc_pid_follow_link(struct dentry *dentry, struct nameidata *nd)
{
struct inode *inode = dentry->d_inode;
diff --git a/include/linux/mm.h b/include/linux/mm.h
index bff1f0d..b3b61a6 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1121,18 +1121,6 @@ extern void exit_mmap(struct mm_struct *);
extern int mm_take_all_locks(struct mm_struct *mm);
extern void mm_drop_all_locks(struct mm_struct *mm);

-#ifdef CONFIG_PROC_FS
-/* From fs/proc/base.c. callers must _not_ hold the mm's exe_file_lock */
-extern void added_exe_file_vma(struct mm_struct *mm);
-extern void removed_exe_file_vma(struct mm_struct *mm);
-#else
-static inline void added_exe_file_vma(struct mm_struct *mm)
-{}
-
-static inline void removed_exe_file_vma(struct mm_struct *mm)
-{}
-#endif /* CONFIG_PROC_FS */
-
extern int may_expand_vm(struct mm_struct *mm, unsigned long npages);
extern int install_special_mapping(struct mm_struct *mm,
unsigned long addr, unsigned long len,
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 0e80e26..90786ea 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -269,12 +269,6 @@ struct mm_struct {
*/
struct task_struct *owner;
#endif
-
-#ifdef CONFIG_PROC_FS
- /* store ref to file /proc/<pid>/exe symlink points to */
- struct file *exe_file;
- unsigned long num_exe_file_vmas;
-#endif
#ifdef CONFIG_MMU_NOTIFIER
struct mmu_notifier_mm *mmu_notifier_mm;
#endif
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index fbfa3d4..64ed076 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -188,12 +188,6 @@ extern void proc_net_remove(struct net *net, const char *name);
extern struct proc_dir_entry *proc_net_mkdir(struct net *net, const char *name,
struct proc_dir_entry *parent);

-/* While the {get|set|dup}_mm_exe_file functions are for mm_structs, they are
- * only needed to implement /proc/<pid>|self/exe so we define them here. */
-extern void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file);
-extern struct file *get_mm_exe_file(struct mm_struct *mm);
-extern void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm);
-
#else

#define proc_net_fops_create(net, name, mode, fops) ({ (void)(mode), NULL; })
@@ -240,20 +234,6 @@ static inline int pid_ns_prepare_proc(struct pid_namespace *ns)
static inline void pid_ns_release_proc(struct pid_namespace *ns)
{
}
-
-static inline void set_mm_exe_file(struct mm_struct *mm,
- struct file *new_exe_file)
-{}
-
-static inline struct file *get_mm_exe_file(struct mm_struct *mm)
-{
- return NULL;
-}
-
-static inline void dup_mm_exe_file(struct mm_struct *oldmm,
- struct mm_struct *newmm)
-{}
-
#endif /* CONFIG_PROC_FS */

#if !defined(CONFIG_PROC_KCORE)
diff --git a/kernel/fork.c b/kernel/fork.c
index b9e2edd..ed377ad 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -488,7 +488,6 @@ void mmput(struct mm_struct *mm)
if (atomic_dec_and_test(&mm->mm_users)) {
exit_aio(mm);
exit_mmap(mm);
- set_mm_exe_file(mm, NULL);
if (!list_empty(&mm->mmlist)) {
spin_lock(&mmlist_lock);
list_del(&mm->mmlist);
@@ -611,8 +610,6 @@ struct mm_struct *dup_mm(struct task_struct *tsk)
if (init_new_context(tsk, mm))
goto fail_nocontext;

- dup_mm_exe_file(oldmm, mm);
-
err = dup_mmap(mm, oldmm);
if (err)
goto free_pt;
diff --git a/mm/mmap.c b/mm/mmap.c
index 6b7b1a9..76faabc 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -232,11 +232,8 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma)
might_sleep();
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
- if (vma->vm_file) {
+ if (vma->vm_file)
fput(vma->vm_file);
- if (vma->vm_flags & VM_EXECUTABLE)
- removed_exe_file_vma(vma->vm_mm);
- }
mpol_put(vma_policy(vma));
kmem_cache_free(vm_area_cachep, vma);
return next;
@@ -633,11 +630,8 @@ again: remove_next = 1 + (end > next->vm_end);
spin_unlock(&mapping->i_mmap_lock);

if (remove_next) {
- if (file) {
+ if (file)
fput(file);
- if (next->vm_flags & VM_EXECUTABLE)
- removed_exe_file_vma(mm);
- }
mm->map_count--;
mpol_put(vma_policy(next));
kmem_cache_free(vm_area_cachep, next);
@@ -1192,8 +1186,6 @@ munmap_back:
error = file->f_op->mmap(file, vma);
if (error)
goto unmap_and_free_vma;
- if (vm_flags & VM_EXECUTABLE)
- added_exe_file_vma(mm);
} else if (vm_flags & VM_SHARED) {
error = shmem_zero_setup(vma);
if (error)
@@ -1849,11 +1841,8 @@ int split_vma(struct mm_struct * mm, struct vm_area_struct * vma,
}
vma_set_policy(new, pol);

- if (new->vm_file) {
+ if (new->vm_file)
get_file(new->vm_file);
- if (vma->vm_flags & VM_EXECUTABLE)
- added_exe_file_vma(mm);
- }

if (new->vm_ops && new->vm_ops->open)
new->vm_ops->open(new);
@@ -2200,11 +2189,8 @@ struct vm_area_struct *copy_vma(struct vm_area_struct **vmap,
new_vma->vm_start = addr;
new_vma->vm_end = addr + len;
new_vma->vm_pgoff = pgoff;
- if (new_vma->vm_file) {
+ if (new_vma->vm_file)
get_file(new_vma->vm_file);
- if (vma->vm_flags & VM_EXECUTABLE)
- added_exe_file_vma(mm);
- }
if (new_vma->vm_ops && new_vma->vm_ops->open)
new_vma->vm_ops->open(new_vma);
vma_link(mm, new_vma, prev, rb_link, rb_parent);
diff --git a/mm/nommu.c b/mm/nommu.c
index b571ef7..352aac5 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -721,11 +721,8 @@ static void delete_vma(struct mm_struct *mm, struct vm_area_struct *vma)
kenter("%p", vma);
if (vma->vm_ops && vma->vm_ops->close)
vma->vm_ops->close(vma);
- if (vma->vm_file) {
+ if (vma->vm_file)
fput(vma->vm_file);
- if (vma->vm_flags & VM_EXECUTABLE)
- removed_exe_file_vma(mm);
- }
put_nommu_region(vma->vm_region);
kmem_cache_free(vm_area_cachep, vma);
}
@@ -1218,10 +1215,6 @@ unsigned long do_mmap_pgoff(struct file *file,
get_file(file);
vma->vm_file = file;
get_file(file);
- if (vm_flags & VM_EXECUTABLE) {
- added_exe_file_vma(current->mm);
- vma->vm_mm = current->mm;
- }
}

down_write(&nommu_region_sem);
@@ -1359,11 +1352,8 @@ share:
error_put_region:
__put_nommu_region(region);
if (vma) {
- if (vma->vm_file) {
+ if (vma->vm_file)
fput(vma->vm_file);
- if (vma->vm_flags & VM_EXECUTABLE)
- removed_exe_file_vma(vma->vm_mm);
- }
kmem_cache_free(vm_area_cachep, vma);
}
kleave(" = %d [pr]", ret);
@@ -1375,8 +1365,6 @@ error:
fput(region->vm_file);
kmem_cache_free(vm_region_jar, region);
fput(vma->vm_file);
- if (vma->vm_flags & VM_EXECUTABLE)
- removed_exe_file_vma(vma->vm_mm);
kmem_cache_free(vm_area_cachep, vma);
kleave(" = %d", ret);
return ret;
--
1.5.6.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/