Non-blocking truncate_inode_pages (experimental)

Bill Hawes (whawes@star.net)
Fri, 27 Jun 1997 17:42:32 -0400


This is a multi-part message in MIME format.
--------------99FCE563082618478D2CDBDF
Content-Type: text/plain; charset=us-ascii
Content-Transfer-Encoding: 7bit

There seem to be a number of unprotected race conditions caused by the
fact that clear_inode may block under some circumstances. This is
especially true in the 2.0.xx tree and up through 2.1.42; it may not be
an issue after 2.1.43, but I'm not sure yet.

As one of the causes of clear_inode blocking is the call to
truncate_inode_pages, I've written a non-blocking replacement. It works
requeueing any locked pages to a special inode, where they can be reaped
later by shrink_mmap. This avoids the need to wait for the page IO to
complete so that the truncate call doesn't have to block.

I've tested this under 2.1.42 by forcing it to requeue pages and
verifying that they get released by shrink_mmap. I've also tested it a
little under 2.0.30 with no problems.

With this patch in place all of the kernel calls to clear_inode that
first check that the inode isn't locked should never block, and
potentially nasty race conditions will be averted.

Comments welcome :-)

-Bill
--------------99FCE563082618478D2CDBDF
Content-Type: text/plain; charset=us-ascii; name="filemap_nb-patch"
Content-Transfer-Encoding: 7bit
Content-Disposition: inline; filename="filemap_nb-patch"

--- mm/filemap.c.old Tue May 20 13:16:23 1997
+++ mm/filemap.c Fri Jun 27 16:48:09 1997
@@ -62,26 +74,8 @@
* invalidate them).
*/
+static int remove_inode_pages(struct inode *, int);
void invalidate_inode_pages(struct inode * inode)
{
- struct page ** p;
- struct page * page;
-
- p = &inode->i_pages;
- while ((page = *p) != NULL) {
- if (PageLocked(page)) {
- p = &page->next;
- continue;
- }
- inode->i_nrpages--;
- if ((*p = page->next) != NULL)
- (*p)->prev = page->prev;
- page->dirty = 0;
- page->next = NULL;
- page->prev = NULL;
- remove_page_from_hash_queue(page);
- page->inode = NULL;
- __free_page(page);
- continue;
- }
+ remove_inode_pages(inode, 0);
}

@@ -90,5 +84,5 @@
* that are beyond that offset (and zeroing out partial pages).
*/
-void truncate_inode_pages(struct inode * inode, unsigned long start)
+static void old_truncate_inode_pages(struct inode * inode, unsigned long start)
{
struct page ** p;
@@ -128,4 +122,60 @@
}

+/*
+ * Call the non-blocking remove_inode_pages for the case start==0.
+ */
+void truncate_inode_pages(struct inode * inode, unsigned long start)
+{
+ if (!start)
+ remove_inode_pages(inode, 1);
+ else
+ old_truncate_inode_pages(inode, start);
+}
+
+static struct inode init_inode = {0};
+
+/* define atomic_read() for 2.0.xx support */
+#ifndef atomic_read
+#define atomic_read(p) (*(p))
+#endif
+
+/*
+ * Remove pages from an inode's page cache without blocking. Locked pages
+ * are either left in place or requeued to a special static inode.
+ */
+static int remove_inode_pages(struct inode *inode, int requeue)
+{
+ struct page *page, *next;
+ int locked, free=0;
+
+ for (page=inode->i_pages; page != NULL; page=next) {
+ next = page->next;
+
+ if ((locked = PageLocked(page)) && !requeue)
+ continue;
+ /*
+ * Remove the page from the inode queue.
+ */
+ remove_page_from_hash_queue(page);
+ remove_page_from_inode_queue(page);
+
+ /*
+ * Requeue the page if it's locked, and free it otherwise.
+ */
+ if (locked) {
+ add_page_to_inode_queue(&init_inode, page);
+ add_page_to_hash_queue(page, &init_inode, page->offset);
+printk("remove_inode_pages: requeued page count=%d pages=%ld\n",
+atomic_read(&page->count), init_inode.i_nrpages);
+ }
+ else {
+ page->dirty = 0; /* N.B. is this necessary?? */
+ free |= (atomic_read(&page->count) == 1);
+ __free_page(page);
+ }
+ }
+ return free;
+}
+
int shrink_mmap(int priority, int dma)
{
@@ -139,4 +189,13 @@
count_min = (limit<<1) >> (priority);

+ /*
+ * Check whether the static inode has any pages. If any are
+ * released, our work is already done ...
+ */
+ if (init_inode.i_nrpages && remove_inode_pages(&init_inode, 0)) {
+printk("shrink_mmap: after pages=%lu\n",init_inode.i_nrpages);
+ return 1;
+ }
+
page = mem_map + clock;
do {

--------------99FCE563082618478D2CDBDF--