Re: [PATCH] freezer: configure user space process frozen along with kernel threads

From: Eric W. Biederman
Date: Wed Feb 20 2013 - 08:52:19 EST


Li Fei <fei.li@xxxxxxxxx> writes:

> There is well known issue that freezing will fail in case that fuse
> daemon is frozen firstly with some requests not handled, as the fuse
> usage task is waiting for the response from fuse daemon and can't be
> frozen. To solve the issue as above, make fuse daemon frozen after
> all user space processes frozen and during the kernel threads frozen
> phase.
>
> After discussion, at present it's generally agreed that:
> 1) It's only the fuse daemon itself know definitely that it needs
> and can be frozen together with kernel threads;
> 2) It's helpful to expose interface that user space processes can
> use to configure user space processes to be frozen together with
> kernel threads.
> More information can be found on https://lkml.org/lkml/2013/2/18/174.
>
> To support the requirement above, attribute /proc/<PID>/freeze_late
> is added, writing 1 to it will make the process to be frozen together
> with kernel threads, and writing 0 to it will make the process to be
> frozen together with user space processes.

There are no permission checks on this interface at all which
potentially could make freeze late loose all meaning. Certainly
operating without permission checks needs some justification.

Then I have the question how does this scale. Which processes are ok to
have the freeze late bit set?

But even more than that fuse is setup to allow unprivileged mounts and
mostly untrusted processes to act as filesystems, whith a nice kernel
wrapper. Now we are going to trust those filesystems with the ability
to stop the kernel from freezing?

That seems inherently wrong.

Why can't the fuse filesystem freeze when there are requests pending?

How do we maintain this? What happens if there is a process that the
fuse process does ipc with and that it depends on? Are two levels of
freezing enough?

I really can't see how this solution as presented is maintainable or
justified.

Eric

> Signed-off-by: Liu Chuansheng <chuansheng.liu@xxxxxxxxx>
> Signed-off-by: Wang Biao <biao.wang@xxxxxxxxx>
> Signed-off-by: Li Fei <fei.li@xxxxxxxxx>
> ---
> fs/proc/base.c | 70 +++++++++++++++++++++++++++++++++++++++++++++++
> include/linux/freezer.h | 6 ++++
> include/linux/sched.h | 2 +
> kernel/freezer.c | 29 +++++++++++++++++++-
> kernel/power/process.c | 2 +-
> 5 files changed, 107 insertions(+), 2 deletions(-)
>
> diff --git a/fs/proc/base.c b/fs/proc/base.c
> index 9b43ff77..8d54c79 100644
> --- a/fs/proc/base.c
> +++ b/fs/proc/base.c
> @@ -89,6 +89,9 @@
> #include <asm/hardwall.h>
> #endif
> #include <trace/events/oom.h>
> +#ifdef CONFIG_FREEZER
> +#include <linux/freezer.h>
> +#endif
> #include "internal.h"
> #include "fd.h"
>
> @@ -2486,6 +2489,70 @@ static int proc_pid_personality(struct seq_file *m, struct pid_namespace *ns,
> return err;
> }
>
> +#ifdef CONFIG_FREEZER
> +
> +static ssize_t freeze_late_read(struct file *file, char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct task_struct *task = get_proc_task(file->f_path.dentry->d_inode);
> + char buffer[PROC_NUMBUF];
> + int freeze_late;
> + size_t len;
> + if (!task)
> + return -ESRCH;
> + freeze_late = (task->flags & PF_FREEZER_LATE) ? 1 : 0;
> + len = snprintf(buffer, sizeof(buffer), "%d\n", freeze_late);
> + return simple_read_from_buffer(buf, count, ppos, buffer, len);
> +}
> +
> +static ssize_t freeze_late_write(struct file *file, const char __user *buf,
> + size_t count, loff_t *ppos)
> +{
> + struct task_struct *task;
> + char buffer[PROC_NUMBUF];
> + int freeze_late;
> + int err;
> +
> + memset(buffer, 0, sizeof(buffer));
> + if (count > sizeof(buffer) - 1)
> + count = sizeof(buffer) - 1;
> + if (copy_from_user(buffer, buf, count)) {
> + err = -EFAULT;
> + goto out;
> + }
> +
> + err = kstrtoint(strstrip(buffer), 0, &freeze_late);
> + if (err)
> + goto out;
> + if (freeze_late < FREEZE_LATE_MIN ||
> + freeze_late > FREEZE_LATE_MAX) {
> + err = -EINVAL;
> + goto out;
> + }
> +
> + task = get_proc_task(file->f_path.dentry->d_inode);
> + if (!task) {
> + err = -ESRCH;
> + goto out;
> + }
> +
> + if (freeze_late)
> + set_freeze_late_flag(task);
> + else
> + clear_freeze_late_flag(task);
> +
> +out:
> + return err < 0 ? err : count;
> +}
> +
> +static const struct file_operations proc_freeze_late_operations = {
> + .read = freeze_late_read,
> + .write = freeze_late_write,
> + .llseek = generic_file_llseek,
> +};
> +
> +#endif
> +
> /*
> * Thread groups
> */
> @@ -2582,6 +2649,9 @@ static const struct pid_entry tgid_base_stuff[] = {
> REG("gid_map", S_IRUGO|S_IWUSR, proc_gid_map_operations),
> REG("projid_map", S_IRUGO|S_IWUSR, proc_projid_map_operations),
> #endif
> +#ifdef CONFIG_FREEZER
> + REG("freeze_late", S_IRUGO|S_IWUSR, proc_freeze_late_operations),
> +#endif
> };
>
> static int proc_tgid_base_readdir(struct file * filp,
> diff --git a/include/linux/freezer.h b/include/linux/freezer.h
> index e4238ce..10b24f8 100644
> --- a/include/linux/freezer.h
> +++ b/include/linux/freezer.h
> @@ -51,6 +51,10 @@ static inline bool try_to_freeze(void)
>
> extern bool freeze_task(struct task_struct *p);
> extern bool set_freezable(void);
> +#define FREEZE_LATE_MIN 0
> +#define FREEZE_LATE_MAX 1
> +extern void set_freeze_late_flag(struct task_struct *p);
> +extern void clear_freeze_late_flag(struct task_struct *p);
>
> #ifdef CONFIG_CGROUP_FREEZER
> extern bool cgroup_freezing(struct task_struct *task);
> @@ -217,6 +221,8 @@ static inline void freezer_do_not_count(void) {}
> static inline void freezer_count(void) {}
> static inline int freezer_should_skip(struct task_struct *p) { return 0; }
> static inline void set_freezable(void) {}
> +static inline void set_freeze_late_flag(void) {}
> +static inline void clear_freeze_late_flag(void) {}
>
> #define freezable_schedule() schedule()
>
> diff --git a/include/linux/sched.h b/include/linux/sched.h
> index d211247..4b2a7ef 100644
> --- a/include/linux/sched.h
> +++ b/include/linux/sched.h
> @@ -1826,6 +1826,8 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut,
> #define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */
> #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */
> #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */
> +/* Threads to be frozen along with kernel threads */
> +#define PF_FREEZER_LATE 0x80000000
>
> /*
> * Only the _current_ task can read/write to tsk->flags, but other
> diff --git a/kernel/freezer.c b/kernel/freezer.c
> index c38893b..1469474 100644
> --- a/kernel/freezer.c
> +++ b/kernel/freezer.c
> @@ -39,7 +39,8 @@ bool freezing_slow_path(struct task_struct *p)
> if (pm_nosig_freezing || cgroup_freezing(p))
> return true;
>
> - if (pm_freezing && !(p->flags & PF_KTHREAD))
> + if (pm_freezing && !(p->flags & PF_KTHREAD) &&
> + !(p->flags & PF_FREEZE_LATE))
> return true;
>
> return false;
> @@ -162,3 +163,29 @@ bool set_freezable(void)
> return try_to_freeze();
> }
> EXPORT_SYMBOL(set_freezable);
> +
> +/**
> + * set_freeze_late_flag - make %p to be frozen late
> + *
> + * Make %p to be frozen by freezer along with kernel threads
> + */
> +void set_freeze_late_flag(struct task_struct *p)
> +{
> + spin_lock_irq(&freezer_lock);
> + p->flags |= PF_FREEZE_LATE;
> + spin_unlock_irq(&freezer_lock);
> +}
> +EXPORT_SYMBOL(set_freeze_late_flag);
> +
> +/**
> + * clear_freeze_late_flag - make %p to be frozen early
> + *
> + * Make %p to be frozen by freezer along with user space processes
> + */
> +void clear_freeze_late_flag(struct task_struct *p)
> +{
> + spin_lock_irq(&freezer_lock);
> + p->flags &= ~PF_FREEZE_LATE;
> + spin_unlock_irq(&freezer_lock);
> +}
> +EXPORT_SYMBOL(clear_freeze_late_flag);
> diff --git a/kernel/power/process.c b/kernel/power/process.c
> index d5a258b..1472308 100644
> --- a/kernel/power/process.c
> +++ b/kernel/power/process.c
> @@ -199,7 +199,7 @@ void thaw_kernel_threads(void)
>
> read_lock(&tasklist_lock);
> do_each_thread(g, p) {
> - if (p->flags & (PF_KTHREAD | PF_WQ_WORKER))
> + if (p->flags & (PF_KTHREAD | PF_WQ_WORKER | PF_FREEZE_LATE))
> __thaw_task(p);
> } while_each_thread(g, p);
> read_unlock(&tasklist_lock);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/