[just another compromise] pre-patch-2.0.31-7

Dr. Werner Fink (werner@suse.de)
Fri, 22 Aug 1997 14:10:06 +0200


Hi,

I've adopted an old patch from DaveM (Hi) to allow swapping even if
we want a buffer page. To avoid heavy swaping I've add only a simple
condition (a compromise) in try_to_free_page to avoid slow down due swap
I/O ... nothing more (no state fix included). Together with a few other
changes this should reduce swap I/O to the necessary amount but should
swap if really needed.

The appended patch also includes your last patch.

I've add a simple look-ahead in find_candidate() to check a bit
further even if a first buffer is looked _and_ avoid cpu waste.

Currently I'm running this patch over an plain pre-patch-2.0.31-7
... hey it looks good :-) ... the `make -j's together with a
really fast bonnie.

Werner

---------------------------------------------------------------------------
diff -urN linux-2.0.31-linus/fs/buffer.c linux/fs/buffer.c
--- linux-2.0.31-linus/fs/buffer.c Mon Aug 18 13:58:51 1997
+++ linux/fs/buffer.c Fri Aug 22 11:54:58 1997
@@ -543,14 +543,11 @@
static inline int can_reclaim(struct buffer_head *bh, int size)
{
if (bh->b_count ||
- buffer_protected(bh) || buffer_locked(bh))
+ buffer_protected(bh) ||
+ buffer_locked(bh) ||
+ mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
+ buffer_dirty(bh))
return 0;
-
- if (mem_map[MAP_NR((unsigned long) bh->b_data)].count != 1 ||
- buffer_dirty(bh)) {
- /* WSH: don't attempt to refile here! */
- return 0;
- }

if (bh->b_size != size)
return 0;
@@ -559,13 +556,15 @@
}

/* find a candidate buffer to be reclaimed */
-static struct buffer_head *find_candidate(struct buffer_head *list,int *list_len,int size)
+static struct buffer_head *find_candidate(struct buffer_head *bh,
+ int *list_len, int size)
{
- struct buffer_head *bh;
+ int lookahead = 42;
+
+ if (!bh)
+ goto no_candidate;

- for (bh = list;
- bh && (*list_len) > 0;
- bh = bh->b_next_free, (*list_len)--) {
+ for (; (*list_len) > 0; bh = bh->b_next_free, (*list_len)--) {
if (size != bh->b_size) {
/* this provides a mechanism for freeing blocks
of other sizes, this is necessary now that we
@@ -573,23 +572,21 @@
try_to_free_buffer(bh,&bh,1);
if (!bh)
break;
+ lookahead = 42;
continue;
}
-
- if (buffer_locked(bh) &&
- (bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) {
- /* Buffers are written in the order they are placed
- on the locked list. If we encounter a locked
- buffer here, this means that the rest of them
- are also locked */
- (*list_len) = 0;
- return NULL;
+ else if (buffer_locked(bh) &&
+ (bh->b_list == BUF_LOCKED || bh->b_list == BUF_LOCKED1)) {
+ if (lookahead--) {
+ (*list_len) = 0;
+ goto no_candidate;
+ }
}
-
- if (can_reclaim(bh,size))
- return bh;
+ else if (can_reclaim(bh,size))
+ return bh;
}

+no_candidate:
return NULL;
}

@@ -662,6 +659,11 @@
}
goto repeat;
}
+
+ /* Dirty buffers should not overtake, wakeup_bdflush(1) calls
+ bdflush and sleeps, therefore kswapd does his important work. */
+ if (nr_buffers_type[BUF_DIRTY] > nr_buffers * bdf_prm.b_un.nfract/100)
+ wakeup_bdflush(1);

/* Too bad, that was not enough. Try a little harder to grow some. */

@@ -672,7 +674,6 @@
};
}

-#if 0
/*
* In order to protect our reserved pages,
* return now if we got any buffers.
@@ -681,10 +682,8 @@
return;

/* and repeat until we find something good */
- if (!grow_buffers(GFP_ATOMIC, size))
+ if (!grow_buffers(GFP_BUFFER, size))
wakeup_bdflush(1);
-#endif
- wakeup_bdflush(1);

/* decrease needed even if there is no success */
needed -= PAGE_SIZE;
@@ -1717,7 +1716,7 @@
* dirty buffers, then make the next write to a
* loop device to be a blocking write.
* This lets us block--which we _must_ do! */
- if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0) {
+ if (ndirty == 0 && nr_buffers_type[BUF_DIRTY] > 0 && wrta_cmd != WRITE) {
wrta_cmd = WRITE;
continue;
}
@@ -1725,7 +1724,7 @@

/* If there are still a lot of dirty buffers around, skip the sleep
and flush some more */
- if(nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
+ if(ndirty == 0 || nr_buffers_type[BUF_DIRTY] <= nr_buffers * bdf_prm.b_un.nfract/100) {
wake_up(&bdflush_done);
current->signal = 0;
interruptible_sleep_on(&bdflush_wait);
diff -urN linux-2.0.31-linus/include/linux/mm.h linux/include/linux/mm.h
--- linux-2.0.31-linus/include/linux/mm.h Mon Jul 21 21:52:45 1997
+++ linux/include/linux/mm.h Fri Aug 22 11:37:00 1997
@@ -295,7 +295,7 @@

/* filemap.c */
extern unsigned long page_unuse(unsigned long);
-extern int shrink_mmap(int, int);
+extern int shrink_mmap(int, int, int);
extern void truncate_inode_pages(struct inode *, unsigned long);

#define GFP_BUFFER 0x00
diff -urN linux-2.0.31-linus/mm/filemap.c linux/mm/filemap.c
--- linux-2.0.31-linus/mm/filemap.c Mon Aug 18 13:58:55 1997
+++ linux/mm/filemap.c Fri Aug 22 11:37:00 1997
@@ -114,7 +114,7 @@
}
}

-int shrink_mmap(int priority, int dma)
+int shrink_mmap(int priority, int dma, int can_do_io)
{
static int clock = 0;
struct page * page;
@@ -174,7 +174,7 @@
}

/* is it a buffer cache page? */
- if (bh && try_to_free_buffer(bh, &bh, 6))
+ if (can_do_io && bh && try_to_free_buffer(bh, &bh, 6))
return 1;
break;

diff -urN linux-2.0.31-linus/mm/page_alloc.c linux/mm/page_alloc.c
--- linux-2.0.31-linus/mm/page_alloc.c Mon Aug 18 13:58:55 1997
+++ linux/mm/page_alloc.c Fri Aug 22 11:37:00 1997
@@ -214,7 +214,7 @@
return 0;
}
restore_flags(flags);
- if (priority != GFP_BUFFER && try_to_free_page(priority, dma, 1))
+ if (try_to_free_page(priority, dma, 1))
goto repeat;
return 0;
}
diff -urN linux-2.0.31-linus/mm/vmscan.c linux/mm/vmscan.c
--- linux-2.0.31-linus/mm/vmscan.c Mon Aug 18 13:58:55 1997
+++ linux/mm/vmscan.c Fri Aug 22 12:42:21 1997
@@ -80,7 +80,7 @@
* have died while we slept).
*/
static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
- unsigned long address, pte_t * page_table, int dma, int wait)
+ unsigned long address, pte_t * page_table, int dma, int wait, int can_do_io)
{
pte_t pte;
unsigned long entry;
@@ -112,6 +112,8 @@
if (page_map->age)
return 0;
if (pte_dirty(pte)) {
+ if(!can_do_io)
+ return 0;
if (vma->vm_ops && vma->vm_ops->swapout) {
pid_t pid = tsk->pid;
vma->vm_mm->rss--;
@@ -169,7 +171,8 @@
*/

static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
- pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
+ pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait,
+ int can_do_io)
{
pte_t * pte;
unsigned long pmd_end;
@@ -191,7 +194,8 @@
do {
int result;
tsk->swap_address = address + PAGE_SIZE;
- result = try_to_swap_out(tsk, vma, address, pte, dma, wait);
+ result = try_to_swap_out(tsk, vma, address, pte, dma, wait,
+ can_do_io);
if (result)
return result;
address += PAGE_SIZE;
@@ -201,7 +205,8 @@
}

static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
- pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
+ pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait,
+ int can_do_io)
{
pmd_t * pmd;
unsigned long pgd_end;
@@ -221,7 +226,8 @@
end = pgd_end;

do {
- int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait);
+ int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait,
+ can_do_io);
if (result)
return result;
address = (address + PMD_SIZE) & PMD_MASK;
@@ -231,7 +237,7 @@
}

static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
- pgd_t *pgdir, unsigned long start, int dma, int wait)
+ pgd_t *pgdir, unsigned long start, int dma, int wait, int can_do_io)
{
unsigned long end;

@@ -242,7 +248,8 @@

end = vma->vm_end;
while (start < end) {
- int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait);
+ int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait,
+ can_do_io);
if (result)
return result;
start = (start + PGDIR_SIZE) & PGDIR_MASK;
@@ -251,7 +258,7 @@
return 0;
}

-static int swap_out_process(struct task_struct * p, int dma, int wait)
+static int swap_out_process(struct task_struct * p, int dma, int wait, int can_do_io)
{
unsigned long address;
struct vm_area_struct* vma;
@@ -272,7 +279,8 @@
address = vma->vm_start;

for (;;) {
- int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait);
+ int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait,
+ can_do_io);
if (result)
return result;
vma = vma->vm_next;
@@ -284,7 +292,7 @@
return 0;
}

-static int swap_out(unsigned int priority, int dma, int wait)
+static int swap_out(unsigned int priority, int dma, int wait, int can_do_io)
{
static int swap_task;
int loop, counter, shfrv;
@@ -357,7 +365,7 @@
}
if (!--p->swap_cnt)
swap_task++;
- switch (swap_out_process(p, dma, wait)) {
+ switch (swap_out_process(p, dma, wait, can_do_io)) {
case 0:
if (p->state == TASK_STOPPED)
/* Stopped task occupy nonused ram */
@@ -391,24 +399,30 @@
{
static int state = 0;
int i=6;
- int stop;
+ int stop, can_do_io;

/* we don't try as hard if we're not waiting.. */
stop = 3;
+ can_do_io = 1;
if (wait)
stop = 0;
+ if (priority == GFP_BUFFER) {
+ /* bdflush() should do the rest if we fail */
+ stop = 3;
+ can_do_io = 0;
+ }
switch (state) {
do {
case 0:
- if (shrink_mmap(i, dma))
+ if (shrink_mmap(i, dma, can_do_io))
return 1;
state = 1;
case 1:
- if (shm_swap(i, dma))
+ if (can_do_io && shm_swap(i, dma))
return 1;
state = 2;
default:
- if (swap_out(i, dma, wait))
+ if (swap_out(i, dma, wait, can_do_io))
return 1;
state = 0;
i--;