Re: buffers vs. pages vs. kernel speed

David S. Miller (davem@jenolan.rutgers.edu)
Thu, 12 Jun 1997 05:36:20 -0400


Date: Wed, 11 Jun 1997 13:59:14 +0200
From: "Dr. Werner Fink" <werner@suse.de>

> Do both. I suspect that the two may not be entirely unrelated.

Yep ... please Dave :-)

Let's try this one... this has:

1) buffer cache clean buffer out of memory fixes
2) handle multi-page allocations in low memory situations
(sans GFP_ATOMIC) more gracefully
3) Matthias's state machine fix
4) Debugging code added to try_to_free_page() for when it fails

I ran a kernel compile in 3mb of ram on a Sparc (which makes the
machine useless, _very_ useless) and as far as it got (I let it run
for a day, I wasn't going to let it finish) it did not allow one
try_to_free_page() to fail, not once.

you may not be as lucky, so if you get the debugging message send me
the output please, thanks.

here is the patch, should apply cleanly to 2.0.30 and pre-2.0.31-2

--- linux/fs/buffer.c.~1~ Thu May 29 13:15:12 1997
+++ linux/fs/buffer.c Mon Jun 9 01:32:41 1997
@@ -670,7 +670,7 @@
}

/* and repeat until we find something good */
- if (grow_buffers(GFP_ATOMIC, size))
+ if (grow_buffers(GFP_BUFFER, size))
needed -= PAGE_SIZE;
else
wakeup_bdflush(1);
@@ -931,11 +931,16 @@
* This is critical. We can't swap out pages to get
* more buffer heads, because the swap-out may need
* more buffer-heads itself. Thus GFP_ATOMIC.
+ *
+ * This is no longer true, it is GFP_BUFFER again, the
+ * swapping code now knows not to perform I/O when that
+ * GFP level is specified... -DaveM
*/
+
/* we now use kmalloc() here instead of gfp as we want
to be able to easily release buffer heads - they
took up quite a bit of memory (tridge) */
- bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_ATOMIC);
+ bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_BUFFER);
if (bh) {
put_unused_buffer_head(bh);
nr_buffer_heads++;
--- linux/mm/vmscan.c.~1~ Mon Jun 2 17:25:12 1997
+++ linux/mm/vmscan.c Mon Jun 9 01:33:51 1997
@@ -68,7 +68,7 @@
* have died while we slept).
*/
static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma,
- unsigned long address, pte_t * page_table, int dma, int wait)
+ unsigned long address, pte_t * page_table, int dma, int wait, int can_do_io)
{
pte_t pte;
unsigned long entry;
@@ -100,6 +100,8 @@
if (page_map->age)
return 0;
if (pte_dirty(pte)) {
+ if(!can_do_io)
+ return 0;
if (vma->vm_ops && vma->vm_ops->swapout) {
pid_t pid = tsk->pid;
vma->vm_mm->rss--;
@@ -157,7 +159,8 @@
*/

static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma,
- pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
+ pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait,
+ int can_do_io)
{
pte_t * pte;
unsigned long pmd_end;
@@ -179,7 +182,7 @@
do {
int result;
tsk->swap_address = address + PAGE_SIZE;
- result = try_to_swap_out(tsk, vma, address, pte, dma, wait);
+ result = try_to_swap_out(tsk, vma, address, pte, dma, wait, can_do_io);
if (result)
return result;
address += PAGE_SIZE;
@@ -189,7 +192,8 @@
}

static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma,
- pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait)
+ pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait,
+ int can_do_io)
{
pmd_t * pmd;
unsigned long pgd_end;
@@ -209,7 +213,7 @@
end = pgd_end;

do {
- int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait);
+ int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait, can_do_io);
if (result)
return result;
address = (address + PMD_SIZE) & PMD_MASK;
@@ -219,7 +223,7 @@
}

static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma,
- pgd_t *pgdir, unsigned long start, int dma, int wait)
+ pgd_t *pgdir, unsigned long start, int dma, int wait, int can_do_io)
{
unsigned long end;

@@ -230,7 +234,7 @@

end = vma->vm_end;
while (start < end) {
- int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait);
+ int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait, can_do_io);
if (result)
return result;
start = (start + PGDIR_SIZE) & PGDIR_MASK;
@@ -239,7 +243,7 @@
return 0;
}

-static int swap_out_process(struct task_struct * p, int dma, int wait)
+static int swap_out_process(struct task_struct * p, int dma, int wait, int can_do_io)
{
unsigned long address;
struct vm_area_struct* vma;
@@ -260,7 +264,7 @@
address = vma->vm_start;

for (;;) {
- int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait);
+ int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait, can_do_io);
if (result)
return result;
vma = vma->vm_next;
@@ -272,7 +276,7 @@
return 0;
}

-static int swap_out(unsigned int priority, int dma, int wait)
+static int swap_out(unsigned int priority, int dma, int wait, int gfp_level)
{
static int swap_task;
int loop, counter;
@@ -311,7 +315,7 @@
}
if (!--p->swap_cnt)
swap_task++;
- switch (swap_out_process(p, dma, wait)) {
+ switch (swap_out_process(p, dma, wait, gfp_level != GFP_BUFFER)) {
case 0:
if (p->swap_cnt)
swap_task++;
@@ -330,33 +334,61 @@
* to be. This works out OK, because we now do proper aging on page
* contents.
*/
+#define TRIED_SMMAP 0x1
+#define TRIED_SSWAP 0x2
+#define TRIED_SWOUT 0x4
int try_to_free_page(int priority, int dma, int wait)
{
static int state = 0;
int i=6;
- int stop;
+ int stop, tried_mask = 0;
+ int old_stop, old_state;

/* we don't try as hard if we're not waiting.. */
stop = 3;
if (wait)
stop = 0;
+ old_stop = stop;
+ old_state = state;
switch (state) {
do {
case 0:
- if (shrink_mmap(i, dma))
+ /* Don't worry here for the GFP_BUFFER case, shrink_mmap never
+ * tries to write dirty things out...
+ */
+ tried_mask |= TRIED_SMMAP;
+ if (shrink_mmap(i, dma)) {
+ state = 1;
return 1;
- state = 1;
+ }
case 1:
- if (shm_swap(i, dma))
- return 1;
- state = 2;
+ /* shm_swap must always perform some I/O if it succeeds
+ * in finding things to free up, so don't waste any time
+ * if we are trying to get some buffer heads...
+ */
+ if (priority != GFP_BUFFER) {
+ tried_mask |= TRIED_SSWAP;
+ if(shm_swap(i, dma)) {
+ state = 2;
+ return 1;
+ }
+ }
default:
- if (swap_out(i, dma, wait))
+ tried_mask |= TRIED_SWOUT;
+ if (swap_out(i, dma, wait, priority)) {
+ state = 0;
return 1;
- state = 0;
+ }
i--;
} while ((i - stop) >= 0);
}
+ printk("try_to_free_page(%d,%d,%d): FAIL try(%s:%s:%s) state[o(%d):n(%d)] "
+ "stop[o(%d):n(%d)]\n", priority, dma, wait,
+ tried_mask & TRIED_SMMAP ? "shrink_mmap" : "",
+ tried_mask & TRIED_SSWAP ? "shm_swap" : "",
+ tried_mask & TRIED_SWOUT ? "swap_out" : "",
+ old_state, state, old_stop, stop);
+ state = 0;
return 0;
}