[RFC PATCH v2 1/1] pipe: busy wait for pipe

From: subhra mazumdar
Date: Tue Sep 25 2018 - 19:33:41 EST


Introduce pipe_ll_usec field for pipes that indicates the amount of micro
seconds a thread should spin if pipe is empty or full before sleeping. This
is similar to network sockets. Workloads like hackbench in pipe mode
benefits significantly from this by avoiding the sleep and wakeup overhead.
Other similar usecases can benefit. A tunable pipe_busy_poll is introduced
to enable or disable busy waiting via /proc. The value of it specifies the
amount of spin in microseconds. Default value is 0 indicating no spin.

Signed-off-by: subhra mazumdar <subhra.mazumdar@xxxxxxxxxx>
---
fs/pipe.c | 12 ++++++++++++
include/linux/pipe_fs_i.h | 2 ++
kernel/sysctl.c | 7 +++++++
3 files changed, 21 insertions(+)

diff --git a/fs/pipe.c b/fs/pipe.c
index bdc5d3c..35d805b 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -26,6 +26,7 @@

#include <linux/uaccess.h>
#include <asm/ioctls.h>
+#include <linux/sched/clock.h>

#include "internal.h"

@@ -40,6 +41,7 @@ unsigned int pipe_max_size = 1048576;
*/
unsigned long pipe_user_pages_hard;
unsigned long pipe_user_pages_soft = PIPE_DEF_BUFFERS * INR_OPEN_CUR;
+unsigned int pipe_busy_poll;

/*
* We use a start+len construction, which provides full use of the
@@ -106,6 +108,7 @@ void pipe_double_lock(struct pipe_inode_info *pipe1,
void pipe_wait(struct pipe_inode_info *pipe)
{
DEFINE_WAIT(wait);
+ u64 start;

/*
* Pipes are system-local resources, so sleeping on them
@@ -113,6 +116,10 @@ void pipe_wait(struct pipe_inode_info *pipe)
*/
prepare_to_wait(&pipe->wait, &wait, TASK_INTERRUPTIBLE);
pipe_unlock(pipe);
+ start = local_clock();
+ while (current->state != TASK_RUNNING &&
+ ((local_clock() - start) >> 10) < pipe->pipe_ll_usec)
+ cpu_relax();
schedule();
finish_wait(&pipe->wait, &wait);
pipe_lock(pipe);
@@ -825,6 +832,7 @@ static int do_pipe2(int __user *fildes, int flags)
struct file *files[2];
int fd[2];
int error;
+ struct pipe_inode_info *pipe;

error = __do_pipe_flags(fd, files, flags);
if (!error) {
@@ -838,6 +846,10 @@ static int do_pipe2(int __user *fildes, int flags)
fd_install(fd[0], files[0]);
fd_install(fd[1], files[1]);
}
+ pipe = files[0]->private_data;
+ pipe->pipe_ll_usec = pipe_busy_poll;
+ pipe = files[1]->private_data;
+ pipe->pipe_ll_usec = pipe_busy_poll;
}
return error;
}
diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h
index 5a3bb3b..73267d2 100644
--- a/include/linux/pipe_fs_i.h
+++ b/include/linux/pipe_fs_i.h
@@ -55,6 +55,7 @@ struct pipe_inode_info {
unsigned int waiting_writers;
unsigned int r_counter;
unsigned int w_counter;
+ unsigned int pipe_ll_usec;
struct page *tmp_page;
struct fasync_struct *fasync_readers;
struct fasync_struct *fasync_writers;
@@ -170,6 +171,7 @@ void pipe_double_lock(struct pipe_inode_info *, struct pipe_inode_info *);
extern unsigned int pipe_max_size;
extern unsigned long pipe_user_pages_hard;
extern unsigned long pipe_user_pages_soft;
+extern unsigned int pipe_busy_poll;

/* Drop the inode semaphore and wait for a pipe event, atomically */
void pipe_wait(struct pipe_inode_info *pipe);
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index cc02050..0e9ce0c 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1863,6 +1863,13 @@ static struct ctl_table fs_table[] = {
.proc_handler = proc_doulongvec_minmax,
},
{
+ .procname = "pipe-busy-poll",
+ .data = &pipe_busy_poll,
+ .maxlen = sizeof(unsigned int),
+ .mode = 0644,
+ .proc_handler = proc_dointvec_minmax,
+ },
+ {
.procname = "mount-max",
.data = &sysctl_mount_max,
.maxlen = sizeof(unsigned int),
--
2.9.3