[RFC PATCH] nfs: Support posix_fadvise(POSIX_FADV_RANDOM) on nfs server.
From: Namjae Jeon
Date: Sat Jun 23 2012 - 06:47:35 EST
From: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>
This patch disable readahead for a file on NFS Server when
posix_fadvise(fd..., POSIX_FADV_RANDOM) is called on NFS
Client.
Readahead on the file can be enabled again with
posix_fadvise(fd..., POSIX_FADV_NORMAL)
This patch is provides performance boost in a scenario like
traversing a directory with large number of files(e.g. ~5000 files),
showing thumbnail view for an image directory, file attributes of
various music files, preview dialog for video files etc.
To simulate the scenario we performed a traverse test on a directory
containing 5000 files. When OPEN/READDIR/STAT/READ(4k)
is called on all 5000 files, total time is reduced by 19.6 %.
Without this patch:
#> ./traverse_time huge_test_dir 4096
Traversed Path : huge_test_dir/ read_size = 4096
Total Time Taken : 22326 msec <--------
IMPORTANT TIMINGS
OPEN DIR TIME (15915 usec)
READ DIR TIME (8092117 usec)
STAT TIME (118791 usec)
OPEN TIME (1361520 usec)
CLOSE TIME (27134 usec)
READ TIME(12647454 usec)
ADVISE TIME(9066 usec)
MEMSET TIME(7 usec)
READ DATA : 20480000
READ SPEED: 1.54MB/sec
With this patch:
#> ./traverse_time huge_test_dir 4096
Traversed Path : huge_test_dir/ read_size = 4096
Total Time Taken : 17949 msec <--------
IMPORTANT TIMINGS
OPEN DIR TIME (15868 usec)
READ DIR TIME (7982147 usec)
STAT TIME (118780 usec)
OPEN TIME (1369376 usec)
CLOSE TIME (28216 usec)
READ TIME(8372115 usec)
ADVISE TIME(8923 usec)
MEMSET TIME(7 usec)
READ DATA : 20480000
READ SPEED(only read): 2.33MB/sec
Signed-off-by: Namjae Jeon <namjae.jeon@xxxxxxxxxxx>
Signed-off-by: Vivek Trivedi <t.vivek@xxxxxxxxxxx>
Signed-off-by: Amit Sahrawat <a.sahrawat@xxxxxxxxxxx>
---
fs/nfs/file.c | 3 +++
fs/nfs/nfs2xdr.c | 5 +++--
fs/nfs/nfs3xdr.c | 5 +++--
fs/nfs/nfs4xdr.c | 5 +++--
fs/nfsd/nfs3proc.c | 2 +-
fs/nfsd/nfs3xdr.c | 1 +
fs/nfsd/nfs4xdr.c | 5 +++--
fs/nfsd/nfsproc.c | 2 +-
fs/nfsd/nfsxdr.c | 1 +
fs/nfsd/vfs.c | 24 +++++++++++++++++++++---
fs/nfsd/vfs.h | 6 ++++--
fs/nfsd/xdr.h | 1 +
fs/nfsd/xdr3.h | 1 +
fs/nfsd/xdr4.h | 1 +
14 files changed, 47 insertions(+), 15 deletions(-)
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index a6708e6b..89fdc43 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -192,6 +192,7 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
struct dentry * dentry = iocb->ki_filp->f_path.dentry;
struct inode * inode = dentry->d_inode;
ssize_t result;
+ struct nfs_open_context *ctx = nfs_file_open_context(iocb->ki_filp);
if (iocb->ki_filp->f_flags & O_DIRECT)
return nfs_file_direct_read(iocb, iov, nr_segs, pos);
@@ -200,6 +201,8 @@ nfs_file_read(struct kiocb *iocb, const struct iovec *iov,
dentry->d_parent->d_name.name, dentry->d_name.name,
(unsigned long) iov_length(iov, nr_segs), (unsigned long) pos);
+ ctx->mode = iocb->ki_filp->f_mode;
+
result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping);
if (!result) {
result = generic_file_aio_read(iocb, iov, nr_segs, pos);
diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c
index d04f0df..76b3e48 100644
--- a/fs/nfs/nfs2xdr.c
+++ b/fs/nfs/nfs2xdr.c
@@ -44,7 +44,7 @@
#define NFS_removeargs_sz (NFS_fhandle_sz+NFS_filename_sz)
#define NFS_sattrargs_sz (NFS_fhandle_sz+NFS_sattr_sz)
#define NFS_readlinkargs_sz (NFS_fhandle_sz)
-#define NFS_readargs_sz (NFS_fhandle_sz+3)
+#define NFS_readargs_sz (NFS_fhandle_sz+4)
#define NFS_writeargs_sz (NFS_fhandle_sz+4)
#define NFS_createargs_sz (NFS_diropargs_sz+NFS_sattr_sz)
#define NFS_renameargs_sz (NFS_diropargs_sz+NFS_diropargs_sz)
@@ -612,10 +612,11 @@ static void encode_readargs(struct xdr_stream *xdr,
encode_fhandle(xdr, args->fh);
- p = xdr_reserve_space(xdr, 4 + 4 + 4);
+ p = xdr_reserve_space(xdr, 4 + 4 + 4 + 4);
*p++ = cpu_to_be32(offset);
*p++ = cpu_to_be32(count);
*p = cpu_to_be32(count);
+ *++p = cpu_to_be32(args->context->mode);
}
static void nfs2_xdr_enc_readargs(struct rpc_rqst *req,
diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c
index 6cbe894..d0876cb 100644
--- a/fs/nfs/nfs3xdr.c
+++ b/fs/nfs/nfs3xdr.c
@@ -49,7 +49,7 @@
#define NFS3_lookupargs_sz (NFS3_fh_sz+NFS3_filename_sz)
#define NFS3_accessargs_sz (NFS3_fh_sz+1)
#define NFS3_readlinkargs_sz (NFS3_fh_sz)
-#define NFS3_readargs_sz (NFS3_fh_sz+3)
+#define NFS3_readargs_sz (NFS3_fh_sz+4)
#define NFS3_writeargs_sz (NFS3_fh_sz+5)
#define NFS3_createargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
#define NFS3_mkdirargs_sz (NFS3_diropargs_sz+NFS3_sattr_sz)
@@ -951,9 +951,10 @@ static void encode_read3args(struct xdr_stream *xdr,
encode_nfs_fh3(xdr, args->fh);
- p = xdr_reserve_space(xdr, 8 + 4);
+ p = xdr_reserve_space(xdr, 8 + 4 + 4);
p = xdr_encode_hyper(p, args->offset);
*p = cpu_to_be32(args->count);
+ *++p = cpu_to_be32(args->context->mode);
}
static void nfs3_xdr_enc_read3args(struct rpc_rqst *req,
diff --git a/fs/nfs/nfs4xdr.c b/fs/nfs/nfs4xdr.c
index 610ebcc..fbdd8ce 100644
--- a/fs/nfs/nfs4xdr.c
+++ b/fs/nfs/nfs4xdr.c
@@ -188,7 +188,7 @@ static int nfs4_stat_to_errno(int);
#define decode_setattr_maxsz (op_decode_hdr_maxsz + \
nfs4_fattr_bitmap_maxsz)
#define encode_read_maxsz (op_encode_hdr_maxsz + \
- encode_stateid_maxsz + 3)
+ encode_stateid_maxsz + 4)
#define decode_read_maxsz (op_decode_hdr_maxsz + 2)
#define encode_readdir_maxsz (op_encode_hdr_maxsz + \
2 + encode_verifier_maxsz + 5)
@@ -1532,9 +1532,10 @@ static void encode_read(struct xdr_stream *xdr, const struct nfs_readargs *args,
encode_open_stateid(xdr, args->context, args->lock_context,
FMODE_READ, hdr->minorversion);
- p = reserve_space(xdr, 12);
+ p = reserve_space(xdr, 16);
p = xdr_encode_hyper(p, args->offset);
*p = cpu_to_be32(args->count);
+ *++p = cpu_to_be32(args->context->mode);
}
static void encode_readdir(struct xdr_stream *xdr, const struct nfs4_readdir_arg *readdir, struct rpc_rqst *req, struct compound_hdr *hdr)
diff --git a/fs/nfsd/nfs3proc.c b/fs/nfsd/nfs3proc.c
index 9095f3c..d599629 100644
--- a/fs/nfsd/nfs3proc.c
+++ b/fs/nfsd/nfs3proc.c
@@ -171,7 +171,7 @@ nfsd3_proc_read(struct svc_rqst *rqstp, struct nfsd3_readargs *argp,
nfserr = nfsd_read(rqstp, &resp->fh,
argp->offset,
rqstp->rq_vec, argp->vlen,
- &resp->count);
+ &resp->count, argp->f_mode);
if (nfserr == 0) {
struct inode *inode = resp->fh.fh_dentry->d_inode;
diff --git a/fs/nfsd/nfs3xdr.c b/fs/nfsd/nfs3xdr.c
index 43f46cd..d940bc9 100644
--- a/fs/nfsd/nfs3xdr.c
+++ b/fs/nfsd/nfs3xdr.c
@@ -345,6 +345,7 @@ nfs3svc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
v++;
}
args->vlen = v;
+ args->f_mode = ntohl(*p++);
return xdr_argsize_check(rqstp, p);
}
diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c
index 4949667..34cb726 100644
--- a/fs/nfsd/nfs4xdr.c
+++ b/fs/nfsd/nfs4xdr.c
@@ -875,9 +875,10 @@ nfsd4_decode_read(struct nfsd4_compoundargs *argp, struct nfsd4_read *read)
status = nfsd4_decode_stateid(argp, &read->rd_stateid);
if (status)
return status;
- READ_BUF(12);
+ READ_BUF(16);
READ64(read->rd_offset);
READ32(read->rd_length);
+ READ32(read->rd_f_mode);
DECODE_TAIL;
}
@@ -2958,7 +2959,7 @@ nfsd4_encode_read(struct nfsd4_compoundres *resp, __be32 nfserr,
nfserr = nfsd_read_file(read->rd_rqstp, read->rd_fhp, read->rd_filp,
read->rd_offset, resp->rqstp->rq_vec, read->rd_vlen,
- &maxcount);
+ &maxcount, read->rd_f_mode);
if (nfserr)
return nfserr;
diff --git a/fs/nfsd/nfsproc.c b/fs/nfsd/nfsproc.c
index e15dc45..8903975 100644
--- a/fs/nfsd/nfsproc.c
+++ b/fs/nfsd/nfsproc.c
@@ -147,7 +147,7 @@ nfsd_proc_read(struct svc_rqst *rqstp, struct nfsd_readargs *argp,
nfserr = nfsd_read(rqstp, fh_copy(&resp->fh, &argp->fh),
argp->offset,
rqstp->rq_vec, argp->vlen,
- &resp->count);
+ &resp->count, argp->f_mode);
if (nfserr) return nfserr;
return nfserrno(vfs_getattr(resp->fh.fh_export->ex_path.mnt,
diff --git a/fs/nfsd/nfsxdr.c b/fs/nfsd/nfsxdr.c
index 65ec595..3d76274 100644
--- a/fs/nfsd/nfsxdr.c
+++ b/fs/nfsd/nfsxdr.c
@@ -269,6 +269,7 @@ nfssvc_decode_readargs(struct svc_rqst *rqstp, __be32 *p,
v++;
}
args->vlen = v;
+ args->f_mode = ntohl(*p++);
return xdr_argsize_check(rqstp, p);
}
diff --git a/fs/nfsd/vfs.c b/fs/nfsd/vfs.c
index c8bd9c3..8fc82d7 100644
--- a/fs/nfsd/vfs.c
+++ b/fs/nfsd/vfs.c
@@ -1067,7 +1067,8 @@ out_nfserr:
* N.B. After this call fhp needs an fh_put
*/
__be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
- loff_t offset, struct kvec *vec, int vlen, unsigned long *count)
+ loff_t offset, struct kvec *vec, int vlen, unsigned long *count,
+ unsigned long f_mode)
{
struct file *file;
struct inode *inode;
@@ -1078,6 +1079,12 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
if (err)
return err;
+ if (f_mode & FMODE_RANDOM) {
+ spin_lock(&file->f_lock);
+ file->f_mode |= FMODE_RANDOM;
+ spin_unlock(&file->f_lock);
+ }
+
inode = file->f_path.dentry->d_inode;
/* Get readahead parameters */
@@ -1106,7 +1113,7 @@ __be32 nfsd_read(struct svc_rqst *rqstp, struct svc_fh *fhp,
__be32
nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
loff_t offset, struct kvec *vec, int vlen,
- unsigned long *count)
+ unsigned long *count, unsigned long f_mode)
{
__be32 err;
@@ -1115,9 +1122,20 @@ nfsd_read_file(struct svc_rqst *rqstp, struct svc_fh *fhp, struct file *file,
NFSD_MAY_READ|NFSD_MAY_OWNER_OVERRIDE);
if (err)
goto out;
+
+ if (f_mode & FMODE_RANDOM) {
+ spin_lock(&file->f_lock);
+ file->f_mode |= FMODE_RANDOM;
+ spin_unlock(&file->f_lock);
+ } else {
+ spin_lock(&file->f_lock);
+ file->f_mode &= ~FMODE_RANDOM;
+ spin_unlock(&file->f_lock);
+ }
+
err = nfsd_vfs_read(rqstp, fhp, file, offset, vec, vlen, count);
} else /* Note file may still be NULL in NFSv4 special stateid case: */
- err = nfsd_read(rqstp, fhp, offset, vec, vlen, count);
+ err = nfsd_read(rqstp, fhp, offset, vec, vlen, count, f_mode);
out:
return err;
}
diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h
index ec0611b..c181cdb 100644
--- a/fs/nfsd/vfs.h
+++ b/fs/nfsd/vfs.h
@@ -72,9 +72,11 @@ __be32 nfsd_open(struct svc_rqst *, struct svc_fh *, umode_t,
int, struct file **);
void nfsd_close(struct file *);
__be32 nfsd_read(struct svc_rqst *, struct svc_fh *,
- loff_t, struct kvec *, int, unsigned long *);
+ loff_t, struct kvec *, int, unsigned long *,
+ unsigned long);
__be32 nfsd_read_file(struct svc_rqst *, struct svc_fh *, struct file *,
- loff_t, struct kvec *, int, unsigned long *);
+ loff_t, struct kvec *, int, unsigned long *,
+ unsigned long);
__be32 nfsd_write(struct svc_rqst *, struct svc_fh *,struct file *,
loff_t, struct kvec *,int, unsigned long *, int *);
__be32 nfsd_readlink(struct svc_rqst *, struct svc_fh *,
diff --git a/fs/nfsd/xdr.h b/fs/nfsd/xdr.h
index 53b1863..520d5e6 100644
--- a/fs/nfsd/xdr.h
+++ b/fs/nfsd/xdr.h
@@ -27,6 +27,7 @@ struct nfsd_readargs {
__u32 offset;
__u32 count;
int vlen;
+ __u32 f_mode;
};
struct nfsd_writeargs {
diff --git a/fs/nfsd/xdr3.h b/fs/nfsd/xdr3.h
index 7df980e..15fdff4 100644
--- a/fs/nfsd/xdr3.h
+++ b/fs/nfsd/xdr3.h
@@ -32,6 +32,7 @@ struct nfsd3_readargs {
__u64 offset;
__u32 count;
int vlen;
+ __u32 f_mode;
};
struct nfsd3_writeargs {
diff --git a/fs/nfsd/xdr4.h b/fs/nfsd/xdr4.h
index acd127d..49d6ce3 100644
--- a/fs/nfsd/xdr4.h
+++ b/fs/nfsd/xdr4.h
@@ -270,6 +270,7 @@ struct nfsd4_read {
u32 rd_length; /* request */
int rd_vlen;
struct file *rd_filp;
+ u32 rd_f_mode; /* request */
struct svc_rqst *rd_rqstp; /* response */
struct svc_fh * rd_fhp; /* response */
--
1.7.9.5
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/