[PATCH] FUSE: add the async option for the flush/release operation

From: Enke Chen
Date: Tue Aug 09 2016 - 23:36:26 EST


Hi, Miklos:

This patch adds the async option for the flush/release operation in FUSE.

The async flush/release option allows a FUSE-based application to be terminated
without being blocked in the flush/release operation even in the presence of
complex external interactions. In addition, the async operation can be more
efficient when a large number of fuse-based files is involved.

---
Deadlock Example:

Process A is a multi-threaded application that interacts with Process B,
a FUSE-server.


UNIX-domain socket
App (A) ----------------------- FUSE-server (B)
| |
| |
| |
+-----------------------------------+
open/flush/release


When the FUSE-server receives an open and flush/release operations from
Process A, it would in turn interact with Process A (e.g., coordinating
shared memory allocation and de-allocation) using the connection-oriented
UNIX-domain socket.

A deadlock occurs when Process A is terminating:

1) As part of process termination (i.e., do_exit() in the kernel), it
would send "flush/release" to Process B, and wait for its reply due
to the synchronous nature of the operation.

2) When Process B receives the "flush/release" request, it would in turn
send a message to Process A (over the UNIX-domain channel) and wait
for its reply.

3) As Process A is terminating, it may not be able to reply to Process B,
resulting in a deadlock.

The async flush/release option offers a simple and robust solution to the
deadlock issue.

With the async flush/release operation, all the files and sockets in Process
A can be closed without being blocked, which in turn would un-block the
operation in Process B using the UNIX-domain socket.
---

Signed-off-by: Enke Chen <enkechen@xxxxxxxxx>

Version: 4.7.0_next_20160805

fs/fuse/file.c | 39 +++++++++++++++++++++++++++------------
fs/fuse/fuse_i.h | 4 ++++
fs/fuse/inode.c | 4 +++-
include/uapi/linux/fuse.h | 7 ++++++-
4 files changed, 40 insertions(+), 14 deletions(-)

diff --git a/fs/fuse/file.c b/fs/fuse/file.c
index f394aff..7dd144f 100644
--- a/fs/fuse/file.c
+++ b/fs/fuse/file.c
@@ -273,7 +273,8 @@ void fuse_release_common(struct file *file, int opcode)
* synchronous RELEASE is allowed (and desirable) in this case
* because the server can be trusted not to screw up.
*/
- fuse_file_put(ff, ff->fc->destroy_req != NULL);
+ fuse_file_put(ff, (ff->fc->destroy_req != NULL) &&
+ !ff->fc->async_flush);
}

static int fuse_open(struct inode *inode, struct file *file)
@@ -394,13 +395,19 @@ static void fuse_sync_writes(struct inode *inode)
fuse_release_nowrite(inode);
}

+static void fuse_flush_end(struct fuse_conn *fc, struct fuse_req *req)
+{
+ if (req->out.h.error == -ENOSYS)
+ fc->no_flush = 1;
+}
+
static int fuse_flush(struct file *file, fl_owner_t id)
{
struct inode *inode = file_inode(file);
struct fuse_conn *fc = get_fuse_conn(inode);
struct fuse_file *ff = file->private_data;
struct fuse_req *req;
- struct fuse_flush_in inarg;
+ struct fuse_flush_in *inarg;
int err;

if (is_bad_inode(inode))
@@ -423,20 +430,28 @@ static int fuse_flush(struct file *file, fl_owner_t id)

req = fuse_get_req_nofail_nopages(fc, file);
memset(&inarg, 0, sizeof(inarg));
- inarg.fh = ff->fh;
- inarg.lock_owner = fuse_lock_owner_id(fc, id);
+ inarg = &req->misc.flush_in;
+ inarg->fh = ff->fh;
+ inarg->lock_owner = fuse_lock_owner_id(fc, id);
req->in.h.opcode = FUSE_FLUSH;
req->in.h.nodeid = get_node_id(inode);
req->in.numargs = 1;
- req->in.args[0].size = sizeof(inarg);
- req->in.args[0].value = &inarg;
- __set_bit(FR_FORCE, &req->flags);
- fuse_request_send(fc, req);
- err = req->out.h.error;
- fuse_put_request(fc, req);
- if (err == -ENOSYS) {
- fc->no_flush = 1;
+ req->in.args[0].size = sizeof(struct fuse_flush_in);
+ req->in.args[0].value = inarg;
+ if (fc->async_flush) {
+ req->end = fuse_flush_end;
+ __set_bit(FR_BACKGROUND, &req->flags);
+ fuse_request_send_background(fc, req);
err = 0;
+ } else {
+ __set_bit(FR_FORCE, &req->flags);
+ fuse_request_send(fc, req);
+ err = req->out.h.error;
+ fuse_put_request(fc, req);
+ if (err == -ENOSYS) {
+ fc->no_flush = 1;
+ err = 0;
+ }
}
return err;
}
diff --git a/fs/fuse/fuse_i.h b/fs/fuse/fuse_i.h
index d98d8cc..f212cdd 100644
--- a/fs/fuse/fuse_i.h
+++ b/fs/fuse/fuse_i.h
@@ -350,6 +350,7 @@ struct fuse_req {
struct fuse_req *next;
} write;
struct fuse_notify_retrieve_in retrieve_in;
+ struct fuse_flush_in flush_in;
} misc;

/** page vector */
@@ -624,6 +625,9 @@ struct fuse_conn {
/** Is lseek not implemented by fs? */
unsigned no_lseek:1;

+ /** Does the filesystem want async flush? */
+ unsigned async_flush:1;
+
/** The number of requests waiting for completion */
atomic_t num_waiting;

diff --git a/fs/fuse/inode.c b/fs/fuse/inode.c
index 4e05b51..2d031b1 100644
--- a/fs/fuse/inode.c
+++ b/fs/fuse/inode.c
@@ -910,6 +910,8 @@ static void process_init_reply(struct fuse_conn *fc, struct fuse_req *req)
fc->writeback_cache = 1;
if (arg->flags & FUSE_PARALLEL_DIROPS)
fc->parallel_dirops = 1;
+ if (arg->flags & FUSE_ASYNC_FLUSH)
+ fc->async_flush = 1;
if (arg->time_gran && arg->time_gran <= 1000000000)
fc->sb->s_time_gran = arg->time_gran;
} else {
@@ -941,7 +943,7 @@ static void fuse_send_init(struct fuse_conn *fc, struct fuse_req *req)
FUSE_FLOCK_LOCKS | FUSE_HAS_IOCTL_DIR | FUSE_AUTO_INVAL_DATA |
FUSE_DO_READDIRPLUS | FUSE_READDIRPLUS_AUTO | FUSE_ASYNC_DIO |
FUSE_WRITEBACK_CACHE | FUSE_NO_OPEN_SUPPORT |
- FUSE_PARALLEL_DIROPS;
+ FUSE_PARALLEL_DIROPS | FUSE_ASYNC_FLUSH;
req->in.h.opcode = FUSE_INIT;
req->in.numargs = 1;
req->in.args[0].size = sizeof(*arg);
diff --git a/include/uapi/linux/fuse.h b/include/uapi/linux/fuse.h
index 27e1736..76087d3 100644
--- a/include/uapi/linux/fuse.h
+++ b/include/uapi/linux/fuse.h
@@ -108,6 +108,9 @@
*
* 7.25
* - add FUSE_PARALLEL_DIROPS
+ *
+ * 7.26
+ * - add FUSE_ASYNC_FLUSH
*/

#ifndef _LINUX_FUSE_H
@@ -143,7 +146,7 @@
#define FUSE_KERNEL_VERSION 7

/** Minor version number of this interface */
-#define FUSE_KERNEL_MINOR_VERSION 25
+#define FUSE_KERNEL_MINOR_VERSION 26

/** The node ID of the root inode */
#define FUSE_ROOT_ID 1
@@ -238,6 +241,7 @@ struct fuse_file_lock {
* FUSE_WRITEBACK_CACHE: use writeback cache for buffered writes
* FUSE_NO_OPEN_SUPPORT: kernel supports zero-message opens
* FUSE_PARALLEL_DIROPS: allow parallel lookups and readdir
+ * FUSE_ASYNC_FLUSH: asynchronous flush and release
*/
#define FUSE_ASYNC_READ (1 << 0)
#define FUSE_POSIX_LOCKS (1 << 1)
@@ -258,6 +262,7 @@ struct fuse_file_lock {
#define FUSE_WRITEBACK_CACHE (1 << 16)
#define FUSE_NO_OPEN_SUPPORT (1 << 17)
#define FUSE_PARALLEL_DIROPS (1 << 18)
+#define FUSE_ASYNC_FLUSH (1 << 19)

/**
* CUSE INIT request/reply flags