[PATCH] kiobuf bounce buffers for 2.3.19

Stephen C. Tweedie (sct@redhat.com)
Tue, 5 Oct 1999 12:25:46 +0100


Hi,

CONFIG_BIGMEM breaks raw IO on 2.3. Currently, we just break noisily by
doing a

if (map && PageBIGMEM(map)) {
err = -EIO;
goto error;
}

in the rw_kiovec code. The patch below adds generic kiobuf bounce
buffer support to address this, and also to allow kiobuf users to avoid
a number of other potential addressing problems in the future.

The basic idea is that you call setup_kiobuf_bounce_pages() with a
kiobuf and a gfp_mask as an argument, and a bounce table will be set up
with temporary bounce pages allocated according to the gfp_mask. If the
original kiobuf pages were already compatible with the gfp_mask, then no
corresponding bounce pages will be allocated.

This lets the caller setup bounce pages for ISA DMA or for normal
drivers capable of accessing any mapped page. Once we get to the point
of being able to address all of physical memory in the driver layers,
the same bounce buffer mechanism will allow us to create bounce pages
for addresses over 4G when necessary.

Currently the test_bounce_page() condition ignores the PG_Reserved.
Alan is proposing adding a PCI quirks mechanism which will allow me to
detect whether DMA into PCI space works on a given motherboard or not.
Once we have that, we can create bounce pages on demand for DMA access
to memory-mapped PCI devices if the motherboard's support is known to be
stable.

There are a couple of points to think about, though: some applications
might prefer to give up if the direct DMA is not supported rather than
fall back to slower transfers; and the hidden cost of bouncing all
PG_Reserved pages will be an unnecessary copy when doing raw IO to pages
that the kernel has marked reserved for things like sound driver dma
buffers in physical memory. For now, we'll always create bounce pages
in such conditions, because unless the caller specifies GFP_BIGMEM, any
page in the PCI aperture is guaranteed either to be marked PG_Bigmem or
to be beyond max_mapnr.

--Stephen

----------------------------------------------------------------
--- fs/buffer.c.~1~ Fri Sep 3 18:29:38 1999
+++ fs/buffer.c Tue Oct 5 11:23:49 1999
@@ -1738,6 +1738,30 @@
}

/*
+ * Clean up the bounce buffers potentially used by brw_kiovec. All of
+ * the kiovec's bounce buffers must be cleared of temporarily allocated
+ * bounce pages, but only READ pages for whom IO completed successfully
+ * can actually be transferred back to user space.
+ */
+
+void cleanup_bounce_buffers(int rw, int nr, struct kiobuf *iovec[],
+ int transferred)
+{
+ int i;
+ for (i = 0; i < nr; i++) {
+ struct kiobuf *iobuf = iovec[i];
+ if (iobuf->bounced) {
+ if (transferred > 0 && !(rw & WRITE))
+ kiobuf_copy_bounce(iobuf, COPY_FROM_BOUNCE,
+ transferred);
+
+ clear_kiobuf_bounce_pages(iobuf);
+ }
+ transferred -= iobuf->length;
+ }
+}
+
+/*
* Start I/O on a physical range of kernel memory, defined by a vector
* of kiobuf structs (much like a user-space iovec list).
*
@@ -1763,6 +1787,7 @@
unsigned long blocknr;
struct kiobuf * iobuf = NULL;
unsigned long page;
+ unsigned long bounce;
struct page * map;
struct buffer_head *tmp, *bh[KIO_MAX_SECTORS];

@@ -1795,17 +1820,24 @@
bufind = bhind = transferred = err = 0;
for (i = 0; i < nr; i++) {
iobuf = iovec[i];
+ err = setup_kiobuf_bounce_pages(iobuf, GFP_USER);
+ if (err)
+ goto finished;
+ if (rw & WRITE)
+ kiobuf_copy_bounce(iobuf, COPY_TO_BOUNCE, -1);
+
offset = iobuf->offset;
length = iobuf->length;
dprintk ("iobuf %d %d %d\n", offset, length, size);

for (pageind = 0; pageind < iobuf->nr_pages; pageind++) {
- page = iobuf->pagelist[pageind];
- map = iobuf->maplist[pageind];
- if (map && PageBIGMEM(map)) {
- err = -EIO;
- goto error;
- }
+ map = iobuf->maplist[pageind];
+ bounce = iobuf->bouncelist[pageind];
+
+ if (bounce)
+ page = bounce;
+ else
+ page = iobuf->pagelist[pageind];

while (length > 0) {
blocknr = b[bufind++];
@@ -1868,6 +1900,9 @@

finished:
dprintk ("brw_kiovec: end (%d, %d)\n", transferred, err);
+
+ cleanup_bounce_buffers(rw, nr, iovec, transferred);
+
if (transferred)
return transferred;
return err;
@@ -1880,6 +1915,9 @@
__put_unused_buffer_head(bh[bhind]);
}
spin_unlock(&unused_list_lock);
+
+ clear_kiobuf_bounce_pages(iobuf);
+
goto finished;
}

--- fs/iobuf.c.~1~ Tue Aug 31 19:30:48 1999
+++ fs/iobuf.c Tue Oct 5 11:27:37 1999
@@ -9,6 +9,7 @@
#include <linux/iobuf.h>
#include <linux/malloc.h>
#include <linux/slab.h>
+#include <linux/bigmem.h>

static kmem_cache_t *kiobuf_cachep;

@@ -49,15 +50,31 @@
memset(iobuf, 0, sizeof(*iobuf));
init_waitqueue_head(&iobuf->wait_queue);
iobuf->end_io = simple_wakeup_kiobuf;
- iobuf->array_len = KIO_STATIC_PAGES;
- iobuf->pagelist = iobuf->page_array;
- iobuf->maplist = iobuf->map_array;
+ iobuf->array_len = KIO_STATIC_PAGES;
+ iobuf->pagelist = iobuf->page_array;
+ iobuf->maplist = iobuf->map_array;
+ iobuf->bouncelist = iobuf->bounce_array;
*bufp++ = iobuf;
}

return 0;
}

+void clear_kiobuf_bounce_pages(struct kiobuf *iobuf)
+{
+ int i;
+
+ if (!iobuf->bounced)
+ return;
+
+ for (i = 0; i < iobuf->nr_pages; i++) {
+ unsigned long page = iobuf->bouncelist[i];
+ if (page)
+ free_page(page);
+ }
+ iobuf->bounced = 0;
+}
+
void free_kiovec(int nr, struct kiobuf **bufp)
{
int i;
@@ -65,9 +82,9 @@

for (i = 0; i < nr; i++) {
iobuf = bufp[i];
+ clear_kiobuf_bounce_pages(iobuf);
if (iobuf->array_len > KIO_STATIC_PAGES) {
kfree (iobuf->pagelist);
- kfree (iobuf->maplist);
}
kmem_cache_free(kiobuf_cachep, bufp[i]);
}
@@ -75,42 +92,41 @@

int expand_kiobuf(struct kiobuf *iobuf, int wanted)
{
- unsigned long * pagelist;
+ unsigned long * pagelist, * bouncelist;
struct page ** maplist;

if (iobuf->array_len >= wanted)
return 0;
-
+
+ /*
+ * kmalloc enough space for the page, map and bounce lists all
+ * at once.
+ */
pagelist = (unsigned long *)
- kmalloc(wanted * sizeof(unsigned long), GFP_KERNEL);
+ kmalloc(3 * wanted * sizeof(unsigned long), GFP_KERNEL);
if (!pagelist)
return -ENOMEM;
-
- maplist = (struct page **)
- kmalloc(wanted * sizeof(struct page **), GFP_KERNEL);
- if (!maplist) {
- kfree(pagelist);
- return -ENOMEM;
- }

/* Did it grow while we waited? */
if (iobuf->array_len >= wanted) {
kfree(pagelist);
- kfree(maplist);
return 0;
}

- memcpy (pagelist, iobuf->pagelist, wanted * sizeof(unsigned long));
- memcpy (maplist, iobuf->maplist, wanted * sizeof(struct page **));
+ maplist = (struct page **) (pagelist + wanted);
+ bouncelist = pagelist + 2 * wanted;
+
+ memcpy (pagelist, iobuf->pagelist, wanted * sizeof(unsigned long));
+ memcpy (maplist, iobuf->maplist, wanted * sizeof(struct page **));
+ memcpy (bouncelist, iobuf->bouncelist, wanted * sizeof(unsigned long));

- if (iobuf->array_len > KIO_STATIC_PAGES) {
+ if (iobuf->array_len > KIO_STATIC_PAGES)
kfree (iobuf->pagelist);
- kfree (iobuf->maplist);
- }

- iobuf->pagelist = pagelist;
- iobuf->maplist = maplist;
- iobuf->array_len = wanted;
+ iobuf->pagelist = pagelist;
+ iobuf->maplist = maplist;
+ iobuf->bouncelist = bouncelist;
+ iobuf->array_len = wanted;
return 0;
}

@@ -134,3 +150,116 @@



+/*
+ * Test whether a given page from the bounce buffer matches the given
+ * gfp_mask. Return true if a bounce buffer is required for this
+ * page.
+ */
+
+static inline int test_bounce_page(unsigned long page,
+ struct page * map,
+ int gfp_mask)
+{
+ /* Unmapped pages from PCI memory or BIGMEM pages always need a
+ * bounce buffer unless the caller is prepared to accept
+ * GFP_BIGMEM pages. */
+
+ if (!map || PageBIGMEM(map) )
+ /* Careful, the following must return the right value
+ * even if CONFIG_BIGMEM is not set */
+ return !(gfp_mask & __GFP_BIGMEM);
+
+ /* A DMA-able page never needs a bounce buffer */
+ if (PageDMA(map))
+ return 0;
+
+ /* Otherwise it is a non-ISA-DMA-capable page and needs bounce
+ * buffers if GFP_DMA is requested */
+ return gfp_mask & __GFP_DMA;
+}
+
+int setup_kiobuf_bounce_pages(struct kiobuf *iobuf, int gfp_mask)
+{
+ int i;
+
+ clear_kiobuf_bounce_pages(iobuf);
+
+ for (i = 0; i < iobuf->nr_pages; i++) {
+ struct page *map = iobuf->maplist[i];
+ unsigned long page = iobuf->pagelist[i];
+ unsigned long bounce_page;
+
+ if (!test_bounce_page(page, map, gfp_mask)) {
+ iobuf->bouncelist[i] = 0;
+ continue;
+ }
+
+ bounce_page = __get_free_page(gfp_mask);
+ if (!bounce_page)
+ goto error;
+
+ iobuf->bouncelist[i] = bounce_page;
+ iobuf->bounced = 1;
+ }
+ return 0;
+
+ error:
+ clear_kiobuf_bounce_pages(iobuf);
+ return -ENOMEM;
+}
+
+/*
+ * Copy a bounce buffer. For completion of partially-failed read IOs,
+ * we need to be able to place an upper limit on the data successfully
+ * transferred from bounce buffers to the user's own buffers.
+ */
+
+void kiobuf_copy_bounce(struct kiobuf *iobuf, int direction, int max)
+{
+ int i;
+ int offset, length;
+
+ if (!iobuf->bounced)
+ return;
+
+ offset = iobuf->offset;
+ length = iobuf->length;
+ if (max >= 0 && length > max)
+ length = max;
+
+ i = 0;
+
+ if (offset > PAGE_SIZE) {
+ i = (offset >> PAGE_SHIFT);
+ offset &= ~PAGE_MASK;
+ }
+
+ for (; i < iobuf->nr_pages && length > 0; i++) {
+ unsigned long page = iobuf->pagelist[i];
+ unsigned long bounce_page = iobuf->bouncelist[i];
+ unsigned long kin, kout;
+ int pagelen = length;
+
+ if (bounce_page) {
+ if (pagelen > PAGE_SIZE)
+ pagelen = PAGE_SIZE;
+
+ if (direction == COPY_TO_BOUNCE) {
+ kin = kmap(page, KM_READ);
+ kout = kmap(bounce_page, KM_WRITE);
+ } else {
+ kin = kmap(bounce_page, KM_READ);
+ kout = kmap(page, KM_WRITE);
+ }
+
+ memcpy((char *) (kout+offset),
+ (char *) (kin+offset),
+ pagelen);
+ kunmap(kout, KM_WRITE);
+ kunmap(kin, KM_READ);
+ }
+
+ length -= pagelen;
+ offset = 0;
+ }
+}
--- include/linux/iobuf.h.~1~ Tue Oct 5 10:26:17 1999
+++ include/linux/iobuf.h Tue Oct 5 11:28:53 1999
@@ -43,12 +43,15 @@

unsigned long * pagelist;
struct page ** maplist;
+ unsigned long * bouncelist;

unsigned int locked : 1; /* If set, pages has been locked */
+ unsigned int bounced : 1; /* If set, bounce pages are set up */

/* Always embed enough struct pages for 64k of IO */
unsigned long page_array[KIO_STATIC_PAGES];
struct page * map_array[KIO_STATIC_PAGES];
+ unsigned long bounce_array[KIO_STATIC_PAGES];

/* Dynamic state for IO completion: */
atomic_t io_count; /* IOs still in progress */
@@ -71,6 +74,15 @@
void free_kiovec(int nr, struct kiobuf **);
int expand_kiobuf(struct kiobuf *, int);
void kiobuf_wait_for_io(struct kiobuf *);
+int setup_kiobuf_bounce_pages(struct kiobuf *, int gfp_mask);
+void clear_kiobuf_bounce_pages(struct kiobuf *);
+void kiobuf_copy_bounce(struct kiobuf *, int direction, int max);
+
+/* Direction codes for kiobuf_copy_bounce: */
+enum {
+ COPY_TO_BOUNCE,
+ COPY_FROM_BOUNCE
+};

/* fs/buffer.c */

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/