updated patch for lockd, rpc

Bill Hawes (whawes@star.net)
Sat, 04 Oct 1997 11:42:51 -0400


This is a multi-part message in MIME format.
--------------140E4A41A3DEF3D6DEA6FBD2
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

I've attached an updated patch for nfs lockd and sunrpc routines, and it
seems to work cleanly now. I can mount and unmount NFS without
complaints, the lockd and rpciod processes go up and down as they
should, and there's no disk corruption afterwards.

The main problem turned out to be that 2.1.56 made the socket wait
uninterruptible, so that the daemons couldn't be awakened at shutdown
time. In addition, lockd and rpciod were holding their original
parent's files open (typically this was /etc/fstab, /etc/mtab ...),
making the shutdown unclean.

In the course of tracking this down I found many latent bugs in lockd
and the sunrpc code -- races, memory leaks, double frees, etc. Hence
the patch is getting big, but hopefully I haven't introduced any new
problems. Please give it a thorough testing!

Regards,
Bill
--------------140E4A41A3DEF3D6DEA6FBD2
Content-Type: text/plain; charset=us-ascii; name="sunrpc_57-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="sunrpc_57-patch"

--- linux-2.1.57/net/sunrpc/svc.c.old Thu Mar 20 10:03:20 1997
+++ linux-2.1.57/net/sunrpc/svc.c Fri Oct 3 12:40:31 1997
@@ -62,8 +62,12 @@
serv->sv_program->pg_name,
serv->sv_nrthreads);

- if (--(serv->sv_nrthreads) != 0)
- return;
+ if (serv->sv_nrthreads) {
+ if (--(serv->sv_nrthreads) != 0)
+ return;
+ } else
+ printk("svc_destroy: no threads for serv=%p!\n", serv);
+
while ((svsk = serv->sv_allsocks) != NULL)
svc_delete_socket(svsk);

@@ -110,30 +114,31 @@
int
svc_create_thread(svc_thread_fn func, struct svc_serv *serv)
{
- struct svc_rqst *rqstp = 0;
- int error;
+ struct svc_rqst *rqstp;
+ int error = -ENOMEM;

- if (!(rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL)))
- return -ENOMEM;
+ rqstp = kmalloc(sizeof(*rqstp), GFP_KERNEL);
+ if (!rqstp)
+ goto out;

memset(rqstp, 0, sizeof(*rqstp));
if (!(rqstp->rq_argp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
|| !(rqstp->rq_resp = (u32 *) kmalloc(serv->sv_xdrsize, GFP_KERNEL))
- || !svc_init_buffer(&rqstp->rq_defbuf, serv->sv_bufsz)) {
- error = -ENOMEM;
- goto failure;
- }
+ || !svc_init_buffer(&rqstp->rq_defbuf, serv->sv_bufsz))
+ goto out_thread;

serv->sv_nrthreads++;
- if ((error = kernel_thread((int (*)(void *)) func, rqstp, 0)) < 0)
- goto failure;
-
rqstp->rq_server = serv;
- return 0;
+ error = kernel_thread((int (*)(void *)) func, rqstp, 0);
+ if (error < 0)
+ goto out_thread;
+ error = 0;
+out:
+ return error;

-failure:
+out_thread:
svc_exit_thread(rqstp);
- return error;
+ goto out;
}

/*
@@ -152,7 +157,8 @@
kfree(rqstp);

/* Release the server */
- svc_destroy(serv);
+ if (serv)
+ svc_destroy(serv);
}

/*
--- linux-2.1.57/net/sunrpc/sched.c.old Sat Sep 20 08:16:17 1997
+++ linux-2.1.57/net/sunrpc/sched.c Fri Oct 3 23:37:00 1997
@@ -56,7 +56,8 @@
*/
static struct wait_queue * rpciod_idle = NULL;
static struct wait_queue * rpciod_killer = NULL;
-static int rpciod_sema = 0;
+static struct semaphore rpciod_sema = MUTEX;
+static unsigned int rpciod_users = 0;
static pid_t rpciod_pid = 0;
static int rpc_inhibit = 0;

@@ -575,19 +576,36 @@
current->pid);
}

+/*
+ * Create a new task for the specified client. We have to
+ * clean up after an allocation failure, as the client may
+ * have specified "oneshot".
+ */
struct rpc_task *
rpc_new_task(struct rpc_clnt *clnt, rpc_action callback, int flags)
{
struct rpc_task *task;

- if (!(task = (struct rpc_task *) rpc_allocate(flags, sizeof(*task))))
- return NULL;
+ task = (struct rpc_task *) rpc_allocate(flags, sizeof(*task));
+ if (!task)
+ goto cleanup;

rpc_init_task(task, clnt, callback, flags);

dprintk("RPC: %4d allocated task\n", task->tk_pid);
task->tk_flags |= RPC_TASK_DYNAMIC;
+out:
return task;
+
+cleanup:
+ /* Check whether to release the client */
+ if (clnt) {
+ printk("rpc_new_task: failed, users=%d, oneshot=%d\n",
+ clnt->cl_users, clnt->cl_oneshot);
+ clnt->cl_users++; /* pretend we were used ... */
+ rpc_release_client(clnt);
+ }
+ goto out;
}

void
@@ -662,6 +680,9 @@
rpc_release_task(child);
}

+/*
+ * Note: rpc_new_task releases the client after a failure.
+ */
struct rpc_task *
rpc_new_child(struct rpc_clnt *clnt, struct rpc_task *parent)
{
@@ -715,11 +736,11 @@
unsigned long oldflags;
int rounds = 0;

+ MOD_INC_USE_COUNT;
lock_kernel();
rpciod_pid = current->pid;

- MOD_INC_USE_COUNT;
- /* exit_files(current); */
+ exit_files(current);
exit_mm(current);
current->blocked |= ~_S(SIGKILL);
current->session = 1;
@@ -727,13 +748,13 @@
sprintf(current->comm, "rpciod");

dprintk("RPC: rpciod starting (pid %d)\n", rpciod_pid);
- while (rpciod_sema) {
+ while (rpciod_users) {
if (signalled()) {
if (current->signal & _S(SIGKILL)) {
rpciod_killall();
} else {
printk("rpciod: ignoring signal (%d users)\n",
- rpciod_sema);
+ rpciod_users);
}
current->signal &= current->blocked;
}
@@ -783,23 +804,43 @@
void
rpciod_up(void)
{
- dprintk("rpciod_up pid %d sema %d\n", rpciod_pid, rpciod_sema);
- if (!(rpciod_sema++) || !rpciod_pid)
- kernel_thread(rpciod, &rpciod_killer, 0);
+ down(&rpciod_sema);
+ dprintk("rpciod_up: pid %d, users %d\n", rpciod_pid, rpciod_users);
+ rpciod_users++;
+ if (rpciod_pid)
+ goto out;
+ if (rpciod_users > 1)
+ printk("rpciod_up: no pid, %d users??\n", rpciod_users);
+
+ kernel_thread(rpciod, &rpciod_killer, 0);
+out:
+ up(&rpciod_sema);
}

void
rpciod_down(void)
{
- dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_sema);
- if (--rpciod_sema > 0)
- return;
+ down(&rpciod_sema);
+ dprintk("rpciod_down pid %d sema %d\n", rpciod_pid, rpciod_users);
+ if (rpciod_users) {
+ if (--rpciod_users)
+ goto out;
+ } else
+ printk("rpciod_down: pid=%d, no users??\n", rpciod_pid);
+
+ if (!rpciod_pid) {
+ printk("rpciod_down: Nothing to do!\n");
+ goto out;
+ }

- rpciod_sema = 0;
kill_proc(rpciod_pid, SIGKILL, 1);
while (rpciod_pid) {
- if (signalled())
- return;
+ if (signalled()) {
+ printk("rpciod_down: signalled, pid=%d\n", rpciod_pid);
+ break;
+ }
interruptible_sleep_on(&rpciod_killer);
}
+out:
+ up(&rpciod_sema);
}
--- linux-2.1.57/net/sunrpc/clnt.c.old Fri Sep 26 08:10:53 1997
+++ linux-2.1.57/net/sunrpc/clnt.c Fri Oct 3 22:08:41 1997
@@ -116,17 +116,23 @@

/*
* Properly shut down an RPC client, terminating all outstanding
- * requests.
+ * requests. Note that we must be certain that cl_oneshot and
+ * cl_dead are cleared, or else the client would be destroyed
+ * when the last task releases it.
*/
int
rpc_shutdown_client(struct rpc_clnt *clnt)
{
dprintk("RPC: shutting down %s client for %s\n",
- clnt->cl_protname, clnt->cl_server);
+ clnt->cl_protname, clnt->cl_server);
while (clnt->cl_users) {
- dprintk("sigmask %08lx\n", current->signal);
- dprintk("users %d\n", clnt->cl_users);
- clnt->cl_dead = 1;
+#ifdef RPC_DEBUG
+ printk("rpc_shutdown_client: client %s, tasks=%d\n",
+ clnt->cl_protname, clnt->cl_users);
+#endif
+ /* Don't let rpc_release_client destroy us */
+ clnt->cl_oneshot = 0;
+ clnt->cl_dead = 0;
rpc_killall_tasks(clnt);
sleep_on(&destroy_wait);
}
@@ -162,12 +168,16 @@
{
dprintk("RPC: rpc_release_client(%p, %d)\n",
clnt, clnt->cl_users);
- if (--(clnt->cl_users) == 0) {
- wake_up(&destroy_wait);
- if (clnt->cl_oneshot || clnt->cl_dead)
- rpc_destroy_client(clnt);
- }
- dprintk("RPC: rpc_release_client done\n");
+ if (clnt->cl_users) {
+ if (--(clnt->cl_users) > 0)
+ return;
+ } else
+ printk("rpc_release_client: %s client already free??\n",
+ clnt->cl_protname);
+
+ wake_up(&destroy_wait);
+ if (clnt->cl_oneshot || clnt->cl_dead)
+ rpc_destroy_client(clnt);
}

/*
@@ -205,10 +215,9 @@
if ((async = (flags & RPC_TASK_ASYNC)) != 0) {
if (!func)
func = rpc_default_callback;
- if (!(task = rpc_new_task(clnt, func, flags))) {
- current->blocked = oldmask;
- return -ENOMEM;
- }
+ status = -ENOMEM;
+ if (!(task = rpc_new_task(clnt, func, flags)))
+ goto out;
task->tk_calldata = data;
} else {
rpc_init_task(task, clnt, NULL, flags);
@@ -222,12 +231,13 @@
} else
async = 0;

+ status = 0;
if (!async) {
status = task->tk_status;
rpc_release_task(task);
- } else
- status = 0;
+ }

+out:
current->blocked = oldmask;
return status;
}
@@ -354,6 +364,7 @@

if ((task->tk_buffer = rpc_malloc(task, bufsiz)) != NULL)
return;
+ printk("RPC: buffer allocation failed for task %p\n", task);

if (1 || !signalled()) {
xprt_release(task);
--- linux-2.1.57/net/sunrpc/pmap_clnt.c.old Wed May 14 18:01:21 1997
+++ linux-2.1.57/net/sunrpc/pmap_clnt.c Fri Oct 3 14:19:05 1997
@@ -54,15 +54,16 @@
}
clnt->cl_binding = 1;

- task->tk_status = 0;
- if (!(pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot))) {
- task->tk_status = -EACCES;
+ task->tk_status = -EACCES; /* why set this? returns -EIO below */
+ if (!(pmap_clnt = pmap_create(clnt->cl_server, sap, map->pm_prot)))
goto bailout;
- }
- if (!(child = rpc_new_child(pmap_clnt, task))) {
- rpc_destroy_client(pmap_clnt);
+ task->tk_status = 0;
+
+ /*
+ * Note: rpc_new_child will release client after a failure.
+ */
+ if (!(child = rpc_new_child(pmap_clnt, task)))
goto bailout;
- }

/* Setup the call info struct */
rpc_call_setup(child, PMAP_GETPORT, map, &clnt->cl_port, 0);
--- linux-2.1.57/net/sunrpc/svcsock.c.old Sat Sep 20 08:16:18 1997
+++ linux-2.1.57/net/sunrpc/svcsock.c Fri Oct 3 21:47:48 1997
@@ -743,14 +743,18 @@
if ((svsk = svc_sock_dequeue(serv)) != NULL) {
enable_bh(NET_BH);
rqstp->rq_sock = svsk;
- svsk->sk_inuse++;
+ svsk->sk_inuse++; /* N.B. where is this decremented? */
} else {
/* No data pending. Go to sleep */
rqstp->rq_sock = NULL;
rqstp->rq_wait = NULL;
svc_serv_enqueue(serv, rqstp);

- current->state = TASK_UNINTERRUPTIBLE;
+ /*
+ * We have to be able to interrupt this wait
+ * to bring down the daemons ...
+ */
+ current->state = TASK_INTERRUPTIBLE;
add_wait_queue(&rqstp->rq_wait, &wait);
enable_bh(NET_BH);
schedule();
--- linux-2.1.57/fs/lockd/svc.c.old Wed Apr 16 00:47:24 1997
+++ linux-2.1.57/fs/lockd/svc.c Sat Oct 4 11:08:34 1997
@@ -42,11 +42,14 @@

extern struct svc_program nlmsvc_program;
struct nlmsvc_binding * nlmsvc_ops = NULL;
-static int nlmsvc_sema = 0;
-static int nlmsvc_pid = 0;
+static struct semaphore nlmsvc_sema = MUTEX;
+static unsigned int nlmsvc_users = 0;
+static pid_t nlmsvc_pid = 0;
unsigned long nlmsvc_grace_period = 0;
unsigned long nlmsvc_timeout = 0;

+static struct wait_queue * lockd_exit = NULL;
+
/*
* Currently the following can be set only at insmod time.
* Ideally, they would be accessible through the sysctl interface.
@@ -64,14 +67,15 @@
sigset_t oldsigmask;
int err = 0;

- lock_kernel();
/* Lock module and set up kernel thread */
MOD_INC_USE_COUNT;
- /* exit_files(current); */
+ lock_kernel();
+ exit_files(current);
exit_mm(current);
current->session = 1;
current->pgrp = 1;
sprintf(current->comm, "lockd");
+ nlmsvc_pid = current->pid;

/* kick rpciod */
rpciod_up();
@@ -94,14 +98,14 @@

nlmsvc_grace_period += jiffies;
nlmsvc_timeout = nlm_timeout * HZ;
- nlmsvc_pid = current->pid;

/*
* The main request loop. We don't terminate until the last
* NFS mount or NFS daemon has gone away, and we've been sent a
- * signal.
+ * signal, or else another process has taken over our job.
*/
- while (nlmsvc_sema || !signalled()) {
+ while ((nlmsvc_users || !signalled()) && nlmsvc_pid == current->pid)
+ {
if (signalled())
current->signal = 0;

@@ -156,8 +160,17 @@
nlmsvc_ops->exp_unlock();
}

- nlm_shutdown_hosts();
-
+ /*
+ * Check whether there's a new lockd process before
+ * shutting down the hosts and clearing the slot.
+ */
+ if (!nlmsvc_pid || current->pid == nlmsvc_pid) {
+ nlm_shutdown_hosts();
+ nlmsvc_pid = 0;
+ } else
+ printk("lockd: new process, skipping host shutdown\n");
+ wake_up(&lockd_exit);
+
/* Exit the RPC thread */
svc_exit_thread(rqstp);

@@ -166,7 +179,6 @@

/* Release module */
MOD_DEC_USE_COUNT;
- nlmsvc_pid = 0;
}

/*
@@ -185,42 +197,106 @@
return svc_create_socket(serv, protocol, &sin);
}

+/*
+ * Bring up the lockd process if it's not already up.
+ */
int
lockd_up(void)
{
struct svc_serv * serv;
- int error;
+ int error = 0;

- if (nlmsvc_pid || nlmsvc_sema++)
- return 0;
+ down(&nlmsvc_sema);
+ /*
+ * Unconditionally increment the user count ... this is
+ * the number of clients who _want_ a lockd process.
+ */
+ nlmsvc_users++;
+ /*
+ * Check whether we're already up and running.
+ */
+ if (nlmsvc_pid)
+ goto out;

- dprintk("lockd: creating service\n");
- if ((serv = svc_create(&nlmsvc_program, 0, NLMSVC_XDRSIZE)) == NULL)
- return -ENOMEM;
+ /*
+ * Sanity check: if there's no pid,
+ * we should be the first user ...
+ */
+ if (nlmsvc_users > 1)
+ printk("lockd_up: no pid, %d users??\n", nlmsvc_users);

- if ((error = lockd_makesock(serv, IPPROTO_UDP, 0)) < 0
+ error = -ENOMEM;
+ serv = svc_create(&nlmsvc_program, 0, NLMSVC_XDRSIZE);
+ if (!serv) {
+ printk("lockd_up: create service failed\n");
+ goto out;
+ }
+
+ if ((error = lockd_makesock(serv, IPPROTO_UDP, 0)) < 0
|| (error = lockd_makesock(serv, IPPROTO_TCP, 0)) < 0) {
- svc_destroy(serv);
- return error;
+ printk("lockd_up: makesock failed, error=%d\n", error);
+ goto destroy_and_out;
+ }
+
+ error = svc_create_thread(lockd, serv);
+ if (error) {
+ printk("lockd_up: create thread failed, error=%d\n", error);
+ goto destroy_and_out;
}

- if ((error = svc_create_thread(lockd, serv)) < 0)
- nlmsvc_sema--;
+ /*
+ * Wait briefly to verify that the lockd process has started.
+ */
+ current->state = TASK_INTERRUPTIBLE;
+ current->timeout = jiffies + 1;
+ schedule();
+ current->timeout = 0;
+ if (!nlmsvc_pid)
+ printk("lockd_up: Hmm, no lockd process yet\n");

- /* Release server */
+ /*
+ * Note: svc_serv structures have an initial use count of 1,
+ * so we exit through here on both success and failure.
+ */
+destroy_and_out:
svc_destroy(serv);
- return 0;
+out:
+ up(&nlmsvc_sema);
+ return error;
}

+/*
+ * Decrement the user count and bring down lockd if we're the last.
+ */
void
lockd_down(void)
{
- if (!nlmsvc_pid || --nlmsvc_sema > 0)
- return;
+ down(&nlmsvc_sema);
+ if (nlmsvc_users) {
+ if (--nlmsvc_users)
+ goto out;
+ } else
+ printk("lockd_down: no users! pid=%d\n", nlmsvc_pid);
+
+ if (!nlmsvc_pid) {
+ printk("lockd_down: nothing to do!\n");
+ goto out;
+ }

kill_proc(nlmsvc_pid, SIGKILL, 1);
- nlmsvc_sema = 0;
- nlmsvc_pid = 0;
+ /*
+ * Wait for the lockd process to exit, but since we're holding
+ * the lockd semaphore, we can't wait around forever ...
+ */
+ current->timeout = jiffies + 5 * HZ;
+ interruptible_sleep_on(&lockd_exit);
+ current->timeout = 0;
+ if (nlmsvc_pid) {
+ printk("lockd_down: lockd failed to exit, clearing pid\n");
+ nlmsvc_pid = 0;
+ }
+out:
+ up(&nlmsvc_sema);
}

#ifdef MODULE
@@ -235,6 +311,10 @@
int
init_module(void)
{
+ /* Init the static variables */
+ nlmsvc_sema = MUTEX;
+ nlmsvc_users = 0;
+ nlmsvc_pid = 0;
nlmxdr_init();
return 0;
}
--- linux-2.1.57/fs/lockd/clntproc.c.old Sat Jul 19 08:17:13 1997
+++ linux-2.1.57/fs/lockd/clntproc.c Sat Oct 4 11:06:09 1997
@@ -160,8 +160,9 @@
nlmclnt_grace_wait(struct nlm_host *host)
{
if (!host->h_reclaiming)
- current->timeout = 10 * HZ;
+ current->timeout = jiffies + 10 * HZ;
interruptible_sleep_on(&host->h_gracewait);
+ current->timeout = 0;
return signalled()? -ERESTARTSYS : 0;
}

@@ -178,9 +179,11 @@
sizeof(struct nlm_rqst));
if (call)
return call;
- current->timeout = 5 * HZ;
+ printk("nlmclnt_alloc_call: failed, waiting for memory\n");
+ current->timeout = jiffies + 5 * HZ;
current->state = TASK_INTERRUPTIBLE;
schedule();
+ current->timeout = 0;
}
return NULL;
}
@@ -232,6 +235,7 @@
/* Back off a little and try again */
current->timeout = jiffies + 15 * HZ;
interruptible_sleep_on(&host->h_gracewait);
+ current->timeout = 0;
} while (!signalled());

return -ERESTARTSYS;

--------------140E4A41A3DEF3D6DEA6FBD2--