Re: [PATCH v2] ns: do not block exit_task_namespaces() for a longtime

From: Myklebust, Trond
Date: Mon Jul 16 2012 - 12:53:22 EST


On Mon, 2012-07-16 at 19:39 +0300, Kirill A. Shutemov wrote:
> On Mon, Jul 16, 2012 at 03:39:36PM +0000, Myklebust, Trond wrote:
> > On Mon, 2012-07-16 at 18:09 +0300, Kirill A. Shutemov wrote:
> > > From: "Kirill A. Shutemov" <kirill.shutemov@xxxxxxxxxxxxxxx>
> > >
> > > On exiting of the last task in a namespace we need to trigger freeing of
> > > the namespace. Currently, we call synchronize_rcu() and free_nsproxy()
> > > directly on do_exit() path.
> > >
> > > On my machine synchronize_rcu() blocks for about 0.01 seconds. For
> > > comparing: normal exit_group() syscall takes less than 0.0003 seconds.
> > >
> > > Let's offload synchronize_rcu() and free_nsproxy() to a workqueue.
> > >
> > > I also move synchronize_rcu() inside free_nsproxy(). It fixes racy
> > > put_nsproxy() which calls free_nsproxy() without synchronize_rcu().
> > > I guess it was missed during switch to RCU (see cf7b708).
> > >
> > > Microbenchmark:
> > >
> > > : #define _GNU_SOURCE
> > > : #include <unistd.h>
> > > : #include <sched.h>
> > > : #include <stdlib.h>
> > > : #include <sys/wait.h>
> > > :
> > > : int
> > > : main(void)
> > > : {
> > > : int i;
> > > : for (i = 0; i < 1024; i++) {
> > > : if (fork()) {
> > > : wait(NULL);
> > > : continue;
> > > : }
> > > : unshare(CLONE_NEWIPC);
> > > : exit(0);
> > > : }
> > > : return 0;
> > > : }
> > >
> > > Before the patch:
> > >
> > > real 0m8.335s
> > > user 0m0.000s
> > > sys 0m0.265s
> > >
> > > After:
> > >
> > > real 0m0.569s
> > > user 0m0.001s
> > > sys 0m0.154s
> > >
> > > Signed-off-by: Kirill A. Shutemov <kirill.shutemov@xxxxxxxxxxxxxxx>
> > > Acked-by: Serge E. Hallyn <serge.hallyn@xxxxxxxxxx>
> > > ---
> > >
> > > v2:
> > > - Updated description.
> > >
> > > ---
> > > include/linux/nsproxy.h | 1 +
> > > kernel/nsproxy.c | 34 +++++++++++++++++++++++-----------
> > > 2 files changed, 24 insertions(+), 11 deletions(-)
> > >
> > > diff --git a/include/linux/nsproxy.h b/include/linux/nsproxy.h
> > > index cc37a55..1d26be7 100644
> > > --- a/include/linux/nsproxy.h
> > > +++ b/include/linux/nsproxy.h
> > > @@ -24,6 +24,7 @@ struct fs_struct;
> > > */
> > > struct nsproxy {
> > > atomic_t count;
> > > + struct work_struct free_nsproxy_work;
> > > struct uts_namespace *uts_ns;
> > > struct ipc_namespace *ipc_ns;
> > > struct mnt_namespace *mnt_ns;
> > > diff --git a/kernel/nsproxy.c b/kernel/nsproxy.c
> > > index b576f7f..ebc7d40 100644
> > > --- a/kernel/nsproxy.c
> > > +++ b/kernel/nsproxy.c
> > > @@ -41,13 +41,17 @@ struct nsproxy init_nsproxy = {
> > > #endif
> > > };
> > >
> > > +static void free_nsproxy_work(struct work_struct *work);
> > > +
> > > static inline struct nsproxy *create_nsproxy(void)
> > > {
> > > struct nsproxy *nsproxy;
> > >
> > > nsproxy = kmem_cache_alloc(nsproxy_cachep, GFP_KERNEL);
> > > - if (nsproxy)
> > > + if (nsproxy) {
> > > atomic_set(&nsproxy->count, 1);
> > > + INIT_WORK(&nsproxy->free_nsproxy_work, free_nsproxy_work);
> > > + }
> > > return nsproxy;
> > > }
> > >
> > > @@ -166,6 +170,14 @@ out:
> > >
> > > void free_nsproxy(struct nsproxy *ns)
> > > {
> > > + /*
> > > + * wait for others to get what they want from this nsproxy.
> > > + *
> > > + * cannot release this nsproxy via the call_rcu() since
> > > + * put_mnt_ns() will want to sleep
> > > + */
> > > + synchronize_rcu();
> > > +
> > > if (ns->mnt_ns)
> > > put_mnt_ns(ns->mnt_ns);
> > > if (ns->uts_ns)
> > > @@ -178,6 +190,14 @@ void free_nsproxy(struct nsproxy *ns)
> > > kmem_cache_free(nsproxy_cachep, ns);
> > > }
> > >
> > > +static void free_nsproxy_work(struct work_struct *work)
> > > +{
> > > + struct nsproxy *ns = container_of(work, struct nsproxy,
> > > + free_nsproxy_work);
> > > +
> > > + free_nsproxy(ns);
> > > +}
> > > +
> > > /*
> > > * Called from unshare. Unshare all the namespaces part of nsproxy.
> > > * On success, returns the new nsproxy.
> > > @@ -215,16 +235,8 @@ void switch_task_namespaces(struct task_struct *p, struct nsproxy *new)
> > >
> > > rcu_assign_pointer(p->nsproxy, new);
> > >
> > > - if (ns && atomic_dec_and_test(&ns->count)) {
> > > - /*
> > > - * wait for others to get what they want from this nsproxy.
> > > - *
> > > - * cannot release this nsproxy via the call_rcu() since
> > > - * put_mnt_ns() will want to sleep
> > > - */
> > > - synchronize_rcu();
> > > - free_nsproxy(ns);
> > > - }
> > > + if (ns && atomic_dec_and_test(&ns->count))
> > > + schedule_work(&ns->free_nsproxy_work);
> >
> > What's wrong with using call_rcu()? The above will cause a workqueue
> > thread to block for no good reason.
>
> See comment to synchronize_rcu(). free_nsproxy() might sleep.
> call_rcu() callback invocation might happen from either softirq or process
> context, so we can't use it.

But call_rcu() should be allowed to call schedule_work(). At least you'd
be able to get rid of the 0.01s synchronize_rcu() sleep inside keventd.

--
Trond Myklebust
Linux NFS client maintainer

NetApp
Trond.Myklebust@xxxxxxxxxx
www.netapp.com

¢éì®&Þ~º&¶¬–+-±éÝ¥Šw®žË±Êâmébžìdz¹Þ)í…æèw*jg¬±¨¶‰šŽŠÝj/êäz¹ÞŠà2ŠÞ¨è­Ú&¢)ß«a¶Úþø®G«éh®æj:+v‰¨Šwè†Ù>Wš±êÞiÛaxPjØm¶Ÿÿà -»+ƒùdš_