Re: [PATCH] nfsd: Always lock state exclusively.

From: Jeff Layton
Date: Thu Jun 09 2016 - 06:13:59 EST


On Wed, 2016-06-08 at 22:55 -0400, Oleg Drokin wrote:
> It used to be the case that state had an rwlock that was locked for write
> by downgrades, but for read for upgrades (opens). Well, the problem is
> if there are two competing opens for the same state, they step on
> each other toes potentially leading to leaking file descriptors
> from the state structure, since access mode is a bitmap only set once.
> This patch converts st_rwsem to st_mutex and all users become exclusive,
> no sharing
>
> Signed-off-by: Oleg Drokin <green@xxxxxxxxxxxxxx>
> ---
> This holds up well in my testing.
> I'll also try the other approach to see if it's any better.
> Âfs/nfsd/nfs4state.c | 40 ++++++++++++++++++++--------------------
> Âfs/nfsd/state.hÂÂÂÂÂ|ÂÂ2 +-
> Â2 files changed, 21 insertions(+), 21 deletions(-)
>
> diff --git a/fs/nfsd/nfs4state.c b/fs/nfsd/nfs4state.c
> index f5f82e1..c927d36 100644
> --- a/fs/nfsd/nfs4state.c
> +++ b/fs/nfsd/nfs4state.c
> @@ -3502,7 +3502,7 @@ init_open_stateid(struct nfs4_ol_stateid *stp, struct nfs4_file *fp,
> Â stp->st_access_bmap = 0;
> Â stp->st_deny_bmap = 0;
> Â stp->st_openstp = NULL;
> - init_rwsem(&stp->st_rwsem);
> + mutex_init(&stp->st_mutex);
> Â list_add(&stp->st_perstateowner, &oo->oo_owner.so_stateids);
> Â list_add(&stp->st_perfile, &fp->fi_stateids);
> Â
> @@ -4335,10 +4335,10 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> Â Â*/
> Â if (stp) {
> Â /* Stateid was found, this is an OPEN upgrade */
> - down_read(&stp->st_rwsem);
> + mutex_lock(&stp->st_mutex);
> Â status = nfs4_upgrade_open(rqstp, fp, current_fh, stp, open);
> Â if (status) {
> - up_read(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â goto out;
> Â }
> Â } else {
> @@ -4348,19 +4348,19 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> Â if (swapstp) {
> Â nfs4_put_stid(&stp->st_stid);
> Â stp = swapstp;
> - down_read(&stp->st_rwsem);
> + mutex_lock(&stp->st_mutex);
> Â status = nfs4_upgrade_open(rqstp, fp, current_fh,
> Â stp, open);
> Â if (status) {
> - up_read(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â goto out;
> Â }
> Â goto upgrade_out;
> Â }
> - down_read(&stp->st_rwsem);
> + mutex_lock(&stp->st_mutex);
> Â status = nfs4_get_vfs_file(rqstp, fp, current_fh, stp, open);
> Â if (status) {
> - up_read(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â release_open_stateid(stp);
> Â goto out;
> Â }
> @@ -4372,7 +4372,7 @@ nfsd4_process_open2(struct svc_rqst *rqstp, struct svc_fh *current_fh, struct nf
> Â }
> Âupgrade_out:
> Â nfs4_inc_and_copy_stateid(&open->op_stateid, &stp->st_stid);
> - up_read(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â
> Â if (nfsd4_has_session(&resp->cstate)) {
> Â if (open->op_deleg_want & NFS4_SHARE_WANT_NO_DELEG) {
> @@ -4977,12 +4977,12 @@ static __be32 nfs4_seqid_op_checks(struct nfsd4_compound_state *cstate, stateid_
> Â Â* revoked delegations are kept only for free_stateid.
> Â Â*/
> Â return nfserr_bad_stateid;
> - down_write(&stp->st_rwsem);
> + mutex_lock(&stp->st_mutex);
> Â status = check_stateid_generation(stateid, &stp->st_stid.sc_stateid, nfsd4_has_session(cstate));
> Â if (status == nfs_ok)
> Â status = nfs4_check_fh(current_fh, &stp->st_stid);
> Â if (status != nfs_ok)
> - up_write(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â return status;
> Â}
> Â
> @@ -5030,7 +5030,7 @@ static __be32 nfs4_preprocess_confirmed_seqid_op(struct nfsd4_compound_state *cs
> Â return status;
> Â oo = openowner(stp->st_stateowner);
> Â if (!(oo->oo_flags & NFS4_OO_CONFIRMED)) {
> - up_write(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â nfs4_put_stid(&stp->st_stid);
> Â return nfserr_bad_stateid;
> Â }
> @@ -5062,12 +5062,12 @@ nfsd4_open_confirm(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> Â oo = openowner(stp->st_stateowner);
> Â status = nfserr_bad_stateid;
> Â if (oo->oo_flags & NFS4_OO_CONFIRMED) {
> - up_write(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â goto put_stateid;
> Â }
> Â oo->oo_flags |= NFS4_OO_CONFIRMED;
> Â nfs4_inc_and_copy_stateid(&oc->oc_resp_stateid, &stp->st_stid);
> - up_write(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â dprintk("NFSD: %s: success, seqid=%d stateid=" STATEID_FMT "\n",
> Â __func__, oc->oc_seqid, STATEID_VAL(&stp->st_stid.sc_stateid));
> Â
> @@ -5143,7 +5143,7 @@ nfsd4_open_downgrade(struct svc_rqst *rqstp,
> Â nfs4_inc_and_copy_stateid(&od->od_stateid, &stp->st_stid);
> Â status = nfs_ok;
> Âput_stateid:
> - up_write(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â nfs4_put_stid(&stp->st_stid);
> Âout:
> Â nfsd4_bump_seqid(cstate, status);
> @@ -5196,7 +5196,7 @@ nfsd4_close(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> Â if (status)
> Â goto out;Â
> Â nfs4_inc_and_copy_stateid(&close->cl_stateid, &stp->st_stid);
> - up_write(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â
> Â nfsd4_close_open_stateid(stp);
> Â
> @@ -5422,7 +5422,7 @@ init_lock_stateid(struct nfs4_ol_stateid *stp, struct nfs4_lockowner *lo,
> Â stp->st_access_bmap = 0;
> Â stp->st_deny_bmap = open_stp->st_deny_bmap;
> Â stp->st_openstp = open_stp;
> - init_rwsem(&stp->st_rwsem);
> + mutex_init(&stp->st_mutex);
> Â list_add(&stp->st_locks, &open_stp->st_locks);
> Â list_add(&stp->st_perstateowner, &lo->lo_owner.so_stateids);
> Â spin_lock(&fp->fi_lock);
> @@ -5591,7 +5591,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> Â &open_stp, nn);
> Â if (status)
> Â goto out;
> - up_write(&open_stp->st_rwsem);
> + mutex_unlock(&open_stp->st_mutex);
> Â open_sop = openowner(open_stp->st_stateowner);
> Â status = nfserr_bad_stateid;
> Â if (!same_clid(&open_sop->oo_owner.so_client->cl_clientid,
> @@ -5600,7 +5600,7 @@ nfsd4_lock(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> Â status = lookup_or_create_lock_state(cstate, open_stp, lock,
> Â &lock_stp, &new);
> Â if (status == nfs_ok)
> - down_write(&lock_stp->st_rwsem);
> + mutex_lock(&lock_stp->st_mutex);
> Â } else {
> Â status = nfs4_preprocess_seqid_op(cstate,
> Â ÂÂÂÂÂÂÂlock->lk_old_lock_seqid,
> @@ -5704,7 +5704,7 @@ out:
> Â ÂÂÂÂseqid_mutating_err(ntohl(status)))
> Â lock_sop->lo_owner.so_seqid++;
> Â
> - up_write(&lock_stp->st_rwsem);
> + mutex_unlock(&lock_stp->st_mutex);
> Â
> Â /*
> Â Â* If this is a new, never-before-used stateid, and we are
> @@ -5874,7 +5874,7 @@ nfsd4_locku(struct svc_rqst *rqstp, struct nfsd4_compound_state *cstate,
> Âfput:
> Â fput(filp);
> Âput_stateid:
> - up_write(&stp->st_rwsem);
> + mutex_unlock(&stp->st_mutex);
> Â nfs4_put_stid(&stp->st_stid);
> Âout:
> Â nfsd4_bump_seqid(cstate, status);
> diff --git a/fs/nfsd/state.h b/fs/nfsd/state.h
> index 986e51e..64053ea 100644
> --- a/fs/nfsd/state.h
> +++ b/fs/nfsd/state.h
> @@ -535,7 +535,7 @@ struct nfs4_ol_stateid {
> Â unsigned char st_access_bmap;
> Â unsigned char st_deny_bmap;
> Â struct nfs4_ol_stateid *st_openstp;
> - struct rw_semaphore st_rwsem;
> + struct mutex st_mutex;
> Â};
> Â
> Âstatic inline struct nfs4_ol_stateid *openlockstateid(struct nfs4_stid *s)Looks legit. Eventually we might want to turn this back into a rwsem by
fixing and clarifying the locking around the st_access_bmap (and the
deny bmap), but for now I think this is a reasonable fix for the
problem Oleg found.There is the potential for a minor perf hit here when there are racing
OPEN calls for the same inode, but I think that's acceptable for now.
We may also want to go ahead and send this to stable as well.Reviewed-by:ÂJeff Layton <jlayton@xxxxxxxxxxxxxxx>