Anybody wanting a little fun? -> nice buffer patch... ;-)))

Bernd Kaindl (bk@suse.de)
Sun, 11 Jul 1999 01:56:47 +0200 (MEST)


Hi boys and girls,

if you are reading this, you should not take it too serious, this patch
may be complete trash, and a nice joke, but it may also be much fun for
you. ;)

Take it as my present to anywhing you like...

> > > > The old buffer cache had this property, and it worked very well indeed.
> > > > The buffer cache would try to recycle itself in preference to growing,
> > > > so for file-intensive workloads we would naturally evict cached data in
> > > > preference to swapping, but for memory-intensive compute workloads we
> > > > would be more likely to swap unused VM pages out.

Yeah!

> > > >
> > > > --Stephen
> > >

Here is my fully-tested-rock-solid... - no, wrong... - my first attempt in
reassembling the old buffer code of 2.2.0pre3 days... ;-)

I don't know what this patch brings to you when you test it, it's your own
risk! Maybe, after running it a while you may notice something wonderful,
or...

Well,
I've tested it in combination with andrea's OOM Patches on a big 4-way SMP
machine and on a sluggish K63D-IDE system and it did not make a single
failure so far...

Also,
I've recompiled kernels for Benchmarking it, ran I/O Stresstests/Benchmarks
with it, and it was nearly comparable to 2.2.10 Performance. On one side it
is a _little_ slower, in the other much _faster_...

I recommend anybody don't care of loosing their data to try it out ;))))

Right now I'm doing 2 make -j8 bzImage, I run a build of glibc, have a
load around 10, half a dozen comilers and the CPU is still 50% idle and
this is the funny thing...

You can show your friends that you are recompiling the world and still
use netscape as if nothing happens. Granted: Maybe you should have a
Celeron 333Mhz with 128 MB Ram and a Wide-SCSI Disk...

- One Word: Fun...

have a nice day,

(-:B-e-r-n-d;-)

PS: If somebody can help me how to make this a serious patch, this would
be very nice.

Here it is:

diff -rNu linux-2.2.10.sane/Makefile l-oldbuffer/Makefile
--- linux-2.2.10.sane/Makefile Sat May 29 03:10:19 1999
+++ l-oldbuffer/Makefile Sat Jul 10 14:04:57 1999
@@ -1,7 +1,7 @@
VERSION = 2
PATCHLEVEL = 2
SUBLEVEL = 10
-EXTRAVERSION =
+EXTRAVERSION = -oldbuf

ARCH := $(shell uname -m | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ -e s/arm.*/arm/ -e s/sa110/arm/)

diff -rNu linux-2.2.10.sane/fs/buffer.c l-oldbuffer/fs/buffer.c
--- linux-2.2.10.sane/fs/buffer.c Thu May 13 22:14:04 1999
+++ l-oldbuffer/fs/buffer.c Sat Jul 10 19:28:28 1999
@@ -474,7 +474,7 @@
bh->b_next_free = bh->b_prev_free = NULL;
}

-static void remove_from_queues(struct buffer_head * bh)
+static inline void remove_from_queues(struct buffer_head * bh)
{
if(bh->b_dev == B_FREE) {
remove_from_free_list(bh); /* Free list entries should not be
@@ -506,7 +506,7 @@
}
}

-static void insert_into_queues(struct buffer_head * bh)
+static inline void insert_into_queues(struct buffer_head * bh)
{
/* put at end of free list */
if(bh->b_dev == B_FREE) {
@@ -575,9 +575,21 @@
struct buffer_head * get_hash_table(kdev_t dev, int block, int size)
{
struct buffer_head * bh;
- bh = find_buffer(dev,block,size);
- if (bh)
+ for (;;) {
+ bh = find_buffer(dev,block,size);
+ if (!bh)
+ break;
bh->b_count++;
+ bh->b_lru_time = jiffies;
+ if (!buffer_locked(bh))
+ break;
+ __wait_on_buffer(bh);
+ if (bh->b_dev == dev &&
+ bh->b_blocknr == block &&
+ bh->b_size == size)
+ break;
+ bh->b_count--;
+ }
return bh;
}

@@ -651,15 +663,208 @@
}

/*
- * We used to try various strange things. Let's not.
+ * Find a candidate buffer to be reclaimed.
+ * N.B. Must search the entire BUF_LOCKED list rather than terminating
+ * when the first locked buffer is found. Buffers are unlocked at
+ * completion of IO, and under some conditions there may be (many)
+ * unlocked buffers after the first locked one.
*/
+static struct buffer_head *find_candidate(struct buffer_head *bh,
+ int *list_len, int size)
+{
+ if (!bh)
+ goto no_candidate;
+
+ for (; (*list_len) > 0; bh = bh->b_next_free, (*list_len)--) {
+ if (size != bh->b_size && !buffer_touched(bh)) {
+ /* This provides a mechanism for freeing blocks
+ * of other sizes, this is necessary now that we
+ * no longer have the lav code.
+ */
+ try_to_free_buffer(bh,&bh);
+ if (!bh)
+ break;
+ continue;
+ }
+ else if (!bh->b_count &&
+ !buffer_locked(bh) &&
+ !buffer_protected(bh) &&
+ !buffer_dirty(bh))
+ return bh;
+ }
+
+no_candidate:
+ return NULL;
+}
+
+#define buffer_over_max() ((buffermem >> PAGE_SHIFT) * 100 > \
+ buffer_mem.max_percent * num_physpages)
+
static void refill_freelist(int size)
{
- if (!grow_buffers(size)) {
+ struct buffer_head * bh, * next;
+ struct buffer_head * candidate[BUF_DIRTY];
+ int buffers[BUF_DIRTY];
+ int i;
+ int needed, obtained=0;
+
+ //refilled = 1;
+
+ /* We are going to try to locate this much memory. */
+ needed = bdf_prm.b_un.nrefill * size;
+
+ while ((nr_free_pages > freepages.min*2) &&
+ !buffer_over_max() &&
+ grow_buffers(size)) {
+ obtained += PAGE_SIZE;
+ if (obtained >= needed)
+ return;
+ }
+
+ /*
+ * Update the needed amount based on the number of potentially
+ * freeable buffers. We don't want to free more than one quarter
+ * of the available buffers.
+ */
+ i = (nr_buffers_type[BUF_CLEAN] + nr_buffers_type[BUF_LOCKED]) >> 2;
+ if (i < bdf_prm.b_un.nrefill) {
+ needed = i * size;
+ if (needed < PAGE_SIZE)
+ needed = PAGE_SIZE;
+ }
+
+ /*
+ * OK, we cannot grow the buffer cache, now try to get some
+ * from the lru list.
+ */
+repeat:
+ if (obtained >= needed)
+ return;
+
+ /*
+ * First set the candidate pointers to usable buffers. This
+ * should be quick nearly all of the time. N.B. There must be
+ * no blocking calls after setting up the candidate[] array!
+ */
+ for (i = BUF_CLEAN; i<BUF_DIRTY; i++) {
+ buffers[i] = nr_buffers_type[i];
+ candidate[i] = find_candidate(lru_list[i], &buffers[i], size);
+ }
+
+ /*
+ * Select the older of the available buffers until we reach our goal.
+ */
+ for (;;) {
+ i = BUF_CLEAN;
+ if (!candidate[BUF_CLEAN]) {
+ if (!candidate[BUF_LOCKED])
+ break;
+ i = BUF_LOCKED;
+ }
+ else if (candidate[BUF_LOCKED] &&
+ (candidate[BUF_LOCKED]->b_lru_time <
+ candidate[BUF_CLEAN ]->b_lru_time))
+ i = BUF_LOCKED;
+ /*
+ * Free the selected buffer and get the next candidate.
+ */
+ bh = candidate[i];
+ next = bh->b_next_free;
+
+ obtained += bh->b_size;
+ remove_from_queues(bh);
+ put_last_free(bh);
+ if (obtained >= needed)
+ return;
+
+ if (--buffers[i] && bh != next)
+ candidate[i] = find_candidate(next, &buffers[i], size);
+ else
+ candidate[i] = NULL;
+ }
+
+ /*
+ * If there are dirty buffers, do a non-blocking wake-up.
+ * This increases the chances of having buffers available
+ * for the next call ...
+ */
+ if (nr_buffers_type[BUF_DIRTY])
+ wakeup_bdflush(0);
+
+ /*
+ * Allocate buffers to reach half our goal, if possible.
+ * Since the allocation doesn't block, there's no reason
+ * to search the buffer lists again. Then return if there
+ * are _any_ free buffers.
+ */
+ while (obtained < (needed >> 1) &&
+ nr_free_pages > freepages.min + 5 &&
+ grow_buffers(size))
+ obtained += PAGE_SIZE;
+
+ if (free_list[BUFSIZE_INDEX(size)])
+ return;
+
+ /*
+ * If there are dirty buffers, wait while bdflush writes
+ * them out. The buffers become locked, but we can just
+ * wait for one to unlock ...
+ */
+ if (nr_buffers_type[BUF_DIRTY])
wakeup_bdflush(1);
- current->policy |= SCHED_YIELD;
- schedule();
+
+ /*
+ * In order to prevent a buffer shortage from exhausting
+ * the system's reserved pages, we force tasks to wait
+ * before using reserved pages for buffers. This is easily
+ * accomplished by waiting on an unused locked buffer.
+ */
+ if ((bh = lru_list[BUF_LOCKED]) != NULL) {
+ for (i = nr_buffers_type[BUF_LOCKED]; i--; bh = bh->b_next_free)
+ {
+ if (bh->b_size != size)
+ continue;
+ if (bh->b_count)
+ continue;
+ if (!buffer_locked(bh))
+ continue;
+ if (buffer_dirty(bh) || buffer_protected(bh))
+ continue;
+ if (MAJOR(bh->b_dev) == LOOP_MAJOR)
+ continue;
+ /*
+ * We've found an unused, locked, non-dirty buffer of
+ * the correct size. Claim it so no one else can,
+ * then wait for it to unlock.
+ */
+ bh->b_count++;
+ wait_on_buffer(bh);
+ bh->b_count--;
+ /*
+ * Loop back to harvest this (and maybe other) buffers.
+ */
+ goto repeat;
+ }
}
+
+ /*
+ * Convert a reserved page into buffers ... should happen only rarely.
+ */
+ if (grow_buffers(size)) {
+#ifdef BUFFER_DEBUG
+printk("refill_freelist: used reserve page\n");
+#endif
+ return;
+ }
+
+ /*
+ * System is _very_ low on memory ... sleep and try later.
+ */
+#ifdef BUFFER_DEBUG
+printk("refill_freelist: task %s waiting for buffers\n", current->comm);
+#endif
+ schedule();
+ goto repeat;
}

void init_buffer(struct buffer_head *bh, kdev_t dev, int block,
@@ -715,6 +920,7 @@
* and that it's unused (b_count=0), unlocked, and clean.
*/
init_buffer(bh, dev, block, end_buffer_io_sync, NULL);
+ bh->b_lru_time = jiffies;
bh->b_state=0;
insert_into_queues(bh);
return bh;
@@ -801,6 +1007,8 @@
*/
void __brelse(struct buffer_head * buf)
{
+ wait_on_buffer(buf);
+
/* If dirty, mark the time this buffer should be written back. */
set_writetime(buf, 0);
refile_buffer(buf);
@@ -1401,12 +1609,17 @@
return 1;
}

-/*
- * Can the buffer be thrown out?
- */
-#define BUFFER_BUSY_BITS ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
-#define buffer_busy(bh) ((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))

+/* =========== Reduce the buffer memory ============= */
+
+static inline int buffer_waiting(struct buffer_head * bh)
+{
+ return waitqueue_active(&bh->b_wait);
+}
+
+#define BUFFER_BUSY_BITS ((1<<BH_Dirty) | (1<<BH_Lock) | (1<<BH_Protected))
+#define buffer_busy(bh) ((bh)->b_count || ((bh)->b_state & BUFFER_BUSY_BITS))
+
/*
* try_to_free_buffers() checks if all the buffers on this particular page
* are unused, and free's the page if so.
@@ -1416,36 +1629,81 @@
*/
int try_to_free_buffers(struct page * page_map)
{
- struct buffer_head * tmp, * bh = page_map->buffers;
+ struct buffer_head * tmp, * bh = page_map->buffers;

+ tmp = bh;
+ do {
+ struct buffer_head * p = tmp;
+
+ tmp = tmp->b_this_page;
+ if (!buffer_busy(p))
+ continue;
+
+ wakeup_bdflush(0);
+ return 0;
+ } while (tmp != bh);
+
+ tmp = bh;
+ do {
+ struct buffer_head * p = tmp;
+ tmp = tmp->b_this_page;
+ nr_buffers--;
+ remove_from_queues(p);
+ put_unused_buffer_head(p);
+ } while (tmp != bh);
+
+ /* Wake up anyone waiting for buffer heads */
+ wake_up(&buffer_wait);
+
+ /* And free the page */
+ buffermem -= PAGE_SIZE;
+ page_map->buffers = NULL;
+ __free_page(page_map);
+ return 1;
+}
+
+/*
+ * try_to_free_buffer() checks if all the buffers on this particular page
+ * are unused, and free's the page if so.
+ */
+int try_to_free_buffer(struct buffer_head * bh, struct buffer_head ** bhp)
+{
+ unsigned long page;
+ struct buffer_head * tmp, * p;
+
+ *bhp = bh;
+ page = (unsigned long) bh->b_data;
+ page &= PAGE_MASK;
tmp = bh;
do {
- struct buffer_head * p = tmp;
-
+ if (!tmp)
+ return 0;
+ if (tmp->b_count || buffer_protected(tmp) ||
+ buffer_dirty(tmp) || buffer_locked(tmp) ||
+ buffer_waiting(tmp))
+ return 0;
tmp = tmp->b_this_page;
- if (!buffer_busy(p))
- continue;
-
- wakeup_bdflush(0);
- return 0;
} while (tmp != bh);

tmp = bh;
do {
- struct buffer_head * p = tmp;
+ p = tmp;
tmp = tmp->b_this_page;
nr_buffers--;
+ if (p == *bhp) {
+ *bhp = p->b_prev_free;
+ if (p == *bhp) /* Was this the last in the list? */
+ *bhp = NULL;
+ }
remove_from_queues(p);
put_unused_buffer_head(p);
} while (tmp != bh);
-
/* Wake up anyone waiting for buffer heads */
wake_up(&buffer_wait);

- /* And free the page */
buffermem -= PAGE_SIZE;
- page_map->buffers = NULL;
- __free_page(page_map);
+ mem_map[MAP_NR(page)].buffers = NULL;
+ free_page(page);
return 1;
}

diff -rNu linux-2.2.10.sane/include/linux/fs.h l-oldbuffer/include/linux/fs.h
--- linux-2.2.10.sane/include/linux/fs.h Tue May 11 19:35:44 1999
+++ l-oldbuffer/include/linux/fs.h Sat Jul 10 21:47:44 1999
@@ -213,6 +213,8 @@
unsigned int b_list; /* List that this buffer appears */
unsigned long b_flushtime; /* Time when this (dirty) buffer
* should be written */
+ unsigned long b_lru_time; /* Time when this buffer was
+ * last used. */
struct wait_queue * b_wait;
struct buffer_head ** b_pprev; /* doubly linked list of hash-queue */
struct buffer_head * b_prev_free; /* doubly linked list of buffers */
@@ -256,6 +258,8 @@

#define buffer_page(bh) (mem_map + MAP_NR((bh)->b_data))
#define touch_buffer(bh) set_bit(PG_referenced, &buffer_page(bh)->flags)
+#define buffer_touched(bh) test_bit(PG_referenced, &buffer_page(bh)->flags)
+

#include <linux/pipe_fs_i.h>
#include <linux/minix_fs_i.h>

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/