[PATCH] ext2 xip: replace sendfile with splice

From: Carsten Otte
Date: Tue Jun 12 2007 - 02:48:48 EST


This patch removes xip_file_sendfile, and replaces it with
xip_file_splice_read. It passes the ltp testcase on a ext2 filesystem
that is mounted -o xip,rw.
I am not exactly happy with this patch, because it introduces waaaay too
much code duplication from fs/splice.c. I want to dig into making
generic_file_splice_read able to deal with xip files aswell. That would
allow to do the same for splice write.
If I cannot find a nicer solution, I'd vote for removing sendfile from
filemap_xip without replacement: a xip filesystem is intended to be used
for executables and libraries. This content is very rarely subject to
splice.

Signed-off-by: Carsten Otte <cotte@xxxxxxxxxx>
Signed-off-by: Jens Axboe <jens.axboe@xxxxxxxxxx>
---
fs/ext2/file.c | 2 +-
include/linux/fs.h | 4 +-
mm/filemap_xip.c | 206 ++++++++++++++++++++++++++++++++++++++++++++++++----
3 files changed, 192 insertions(+), 20 deletions(-)

diff --git a/fs/ext2/file.c b/fs/ext2/file.c
index 072a190..5f4a17c 100644
--- a/fs/ext2/file.c
+++ b/fs/ext2/file.c
@@ -70,7 +70,7 @@ const struct file_operations ext2_xip_file_operations = {
.open = generic_file_open,
.release = ext2_release_file,
.fsync = ext2_sync_file,
- .sendfile = xip_file_sendfile,
+ .splice_read = xip_file_splice_read,
};
#endif

diff --git a/include/linux/fs.h b/include/linux/fs.h
index 61ebf32..a5bf63e 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -1764,13 +1764,11 @@ extern int nonseekable_open(struct inode * inode, struct file * filp);
#ifdef CONFIG_FS_XIP
extern ssize_t xip_file_read(struct file *filp, char __user *buf, size_t len,
loff_t *ppos);
-extern ssize_t xip_file_sendfile(struct file *in_file, loff_t *ppos,
- size_t count, read_actor_t actor,
- void *target);
extern int xip_file_mmap(struct file * file, struct vm_area_struct * vma);
extern ssize_t xip_file_write(struct file *filp, const char __user *buf,
size_t len, loff_t *ppos);
extern int xip_truncate_page(struct address_space *mapping, loff_t from);
+extern ssize_t xip_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int);
#else
static inline int xip_truncate_page(struct address_space *mapping, loff_t from)
{
diff --git a/mm/filemap_xip.c b/mm/filemap_xip.c
index fa360e5..f1f3134 100644
--- a/mm/filemap_xip.c
+++ b/mm/filemap_xip.c
@@ -14,6 +14,7 @@
#include <linux/uio.h>
#include <linux/rmap.h>
#include <linux/sched.h>
+#include <linux/splice.h>
#include <asm/tlbflush.h>
#include "filemap.h"

@@ -92,7 +93,11 @@ do_xip_mapping_read(struct address_space *mapping,
if (unlikely(IS_ERR(page))) {
if (PTR_ERR(page) == -ENODATA) {
/* sparse */
- page = ZERO_PAGE(0);
+ page = xip_sparse_page();
+ if (!page) {
+ desc->error = -ENOMEM;
+ goto out;
+ }
} else {
desc->error = PTR_ERR(page);
goto out;
@@ -159,27 +164,196 @@ xip_file_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos)
}
EXPORT_SYMBOL_GPL(xip_file_read);

-ssize_t
-xip_file_sendfile(struct file *in_file, loff_t *ppos,
- size_t count, read_actor_t actor, void *target)
+static int xip_pipe_buf_steal(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
{
- read_descriptor_t desc;
+ return 1;
+}

- if (!count)
+static void xip_pipe_buf_release(struct pipe_inode_info *pipe,
+ struct pipe_buffer *buf)
+{
+ page_cache_release(buf->page);
+}
+
+static const struct pipe_buf_operations xip_pipe_buf_ops = {
+ .can_merge = 0,
+ .pin = generic_pipe_buf_pin,
+ .get = generic_pipe_buf_get,
+ .map = generic_pipe_buf_map,
+ .unmap = generic_pipe_buf_unmap,
+ .release = xip_pipe_buf_release,
+ .steal = xip_pipe_buf_steal,
+};
+
+static int
+__xip_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ struct address_space *mapping = in->f_mapping;
+ unsigned int loff, nr_pages;
+ struct page *pages[PIPE_BUFFERS];
+ struct partial_page partial[PIPE_BUFFERS];
+ struct page *page;
+ pgoff_t index, end_index;
+ loff_t isize;
+ size_t total_len;
+ int error, page_nr;
+ struct splice_pipe_desc spd = {
+ .pages = pages,
+ .partial = partial,
+ .flags = flags,
+ .ops = &xip_pipe_buf_ops,
+ };
+
+ index = *ppos >> PAGE_CACHE_SHIFT;
+ loff = *ppos & ~PAGE_CACHE_MASK;
+ nr_pages = (len + loff + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+
+ if (nr_pages > PIPE_BUFFERS)
+ nr_pages = PIPE_BUFFERS;
+
+ error = 0;
+ total_len = 0;
+
+ spd.nr_pages = 0;
+ while (spd.nr_pages < nr_pages) {
+ page = mapping->a_ops->get_xip_page(mapping,
+ index*(PAGE_SIZE/512), 0);
+ if (!page) {
+ error = -EIO;
+ break;
+ }
+ if (unlikely(IS_ERR(page))) {
+ if (PTR_ERR(page) == -ENODATA) {
+ /* sparse */
+ page = xip_sparse_page();
+ if (!page) {
+ error = -ENOMEM;
+ break;
+ }
+ } else {
+ error = PTR_ERR(page);
+ break;
+ }
+ }
+ page_cache_get(page);
+ pages[spd.nr_pages++] = page;
+ index++;
+ }
+
+ /*
+ * Now loop over the map and fill in the partial map
+ */
+ index = *ppos >> PAGE_CACHE_SHIFT;
+ nr_pages = spd.nr_pages;
+ spd.nr_pages = 0;
+ for (page_nr = 0; page_nr < nr_pages; page_nr++) {
+ unsigned int this_len;
+
+ if (!len)
+ break;
+
+ /*
+ * this_len is the max we'll use from this page
+ */
+ this_len = min_t(unsigned long, len, PAGE_CACHE_SIZE - loff);
+ page = pages[page_nr];
+ isize = i_size_read(mapping->host);
+ end_index = (isize - 1) >> PAGE_CACHE_SHIFT;
+ if (unlikely(!isize || index > end_index))
+ break;
+
+ /*
+ * if this is the last page, see if we need to shrink
+ * the length and stop
+ */
+ if (end_index == index) {
+ unsigned int plen;
+
+ /*
+ * max good bytes in this page
+ */
+ plen = ((isize - 1) & ~PAGE_CACHE_MASK) + 1;
+ if (plen <= loff)
+ break;
+
+ /*
+ * force quit after adding this page
+ */
+ this_len = min(this_len, plen - loff);
+ len = this_len;
+ }
+
+ partial[page_nr].offset = loff;
+ partial[page_nr].len = this_len;
+ len -= this_len;
+ loff = 0;
+ spd.nr_pages++;
+ index++;
+ }
+
+ if (spd.nr_pages)
+ return splice_to_pipe(pipe, &spd);
+
+ return error;
+}
+
+/**
+ * xip_file_splice_read - splice data from file to a pipe
+ * @in: file to splice from
+ * @pipe: pipe to splice to
+ * @len: number of bytes to splice
+ * @flags: splice modifier flags
+ *
+ * derived from generic_file_splice_read in fs/splice_read.c
+ */
+ssize_t xip_file_splice_read(struct file *in, loff_t *ppos,
+ struct pipe_inode_info *pipe, size_t len,
+ unsigned int flags)
+{
+ ssize_t spliced;
+ int ret;
+ loff_t isize, left;
+
+ BUG_ON(!in->f_mapping->a_ops->get_xip_page);
+
+ isize = i_size_read(in->f_mapping->host);
+ if (unlikely(*ppos >= isize))
return 0;

- desc.written = 0;
- desc.count = count;
- desc.arg.data = target;
- desc.error = 0;
+ left = isize - *ppos;
+ if (unlikely(left < len))
+ len = left;

- do_xip_mapping_read(in_file->f_mapping, &in_file->f_ra, in_file,
- ppos, &desc, actor);
- if (desc.written)
- return desc.written;
- return desc.error;
+ ret = 0;
+ spliced = 0;
+ while (len) {
+ ret = __xip_file_splice_read(in, ppos, pipe, len, flags);
+
+ if (ret < 0)
+ break;
+ else if (!ret) {
+ if (spliced)
+ break;
+ if (flags & SPLICE_F_NONBLOCK) {
+ ret = -EAGAIN;
+ break;
+ }
+ }
+
+ *ppos += ret;
+ len -= ret;
+ spliced += ret;
+ }
+
+ if (spliced)
+ return spliced;
+
+ return ret;
}
-EXPORT_SYMBOL_GPL(xip_file_sendfile);
+EXPORT_SYMBOL(xip_file_splice_read);

/*
* __xip_unmap is invoked from xip_unmap and
--
1.5.2.1.174.gcd03


--
Jens Axboe

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/