pre-2.0.32-2 + this patch works for me

Gerard Roudier (groudier@club-internet.fr)
Sun, 15 Jun 1997 11:30:59 +0200 (MET DST)


David,

I've added to your patch some very simple changes that seem to me
enough to fix 2.0.30 memory problems.
Using this patch, I've been unable to break the kernel doing heavy
disk IOs, with mem=4M and a swap partition.
I did'nt try other kernel services that can be affected by this changes.

In my opinion, the behaviour of linux under memory stress is very
different using this patch and may trigger problems in some other kernel
services I did'nt use in my tests.

As I wrote in my previous mails about this topic, I think it was the
buffer headers allocation strategy that should be improved and a new gfp
level seemed to me necessary.

--
Regards, Gerard.

Here's the patch: (It is against pre-2.0.31-2 and does not incorporate other suggested patches that seem to me useless.)

--- linux/fs/buffer.c.orig Sat Jun 14 13:09:41 1997 +++ linux/fs/buffer.c Sun Jun 15 09:58:11 1997 @@ -660,6 +660,7 @@ goto repeat; } +#if 0 /* Too bad, that was not enough. Try a little harder to grow some. */ if (nr_free_pages > min_free_pages + 5) { @@ -668,9 +669,10 @@ goto repeat; }; } +#endif /* and repeat until we find something good */ - if (grow_buffers(GFP_ATOMIC, size)) + if (grow_buffers(GFP_BUFFER, size)) needed -= PAGE_SIZE; else wakeup_bdflush(1); @@ -922,7 +924,7 @@ wake_up(&buffer_wait); } -static void get_more_buffer_heads(void) +static void get_more_buffer_heads(int page_io) { struct buffer_head * bh; @@ -935,7 +937,19 @@ /* we now use kmalloc() here instead of gfp as we want to be able to easily release buffer heads - they took up quite a bit of memory (tridge) */ - bh = (struct buffer_head *) kmalloc(sizeof(*bh),GFP_ATOMIC); + /* + * If buffers heads are'nt for PAGE IO, swap is possible and + * we donnot want to victimize buffers. In this situation, + * we first try GFP_BUFFER. + */ + + bh = 0; + if (!page_io) + bh = (struct buffer_head *) + kmalloc(sizeof(*bh),GFP_BUFFER); + if (!bh) + bh = (struct buffer_head *) + kmalloc(sizeof(*bh),GFP_ATOMIC_IO); if (bh) { put_unused_buffer_head(bh); nr_buffer_heads++; @@ -981,12 +995,12 @@ } } -static struct buffer_head * get_unused_buffer_head(void) +static struct buffer_head * get_unused_buffer_head(int page_io) { struct buffer_head * bh; recover_reusable_buffer_heads(); - get_more_buffer_heads(); + get_more_buffer_heads(page_io); if (!unused_list) return NULL; bh = unused_list; @@ -1001,7 +1015,7 @@ * follow the buffers created. Return NULL if unable to create more * buffers. */ -static struct buffer_head * create_buffers(unsigned long page, unsigned long size) +static struct buffer_head * create_buffers(int page_io, unsigned long page, unsigned long size) { struct buffer_head *bh, *head; long offset; @@ -1009,7 +1023,7 @@ head = NULL; offset = PAGE_SIZE; while ((offset -= size) >= 0) { - bh = get_unused_buffer_head(); + bh = get_unused_buffer_head(page_io); if (!bh) goto no_grow; @@ -1091,7 +1105,7 @@ * They do _not_ show up in the buffer hash table! * They are _not_ registered in page->buffers either! */ - bh = create_buffers(page_address(page), size); + bh = create_buffers(1, page_address(page), size); if (!bh) { clear_bit(PG_locked, &page->flags); wake_up(&page->wait); @@ -1307,7 +1321,7 @@ if (!(page = __get_free_page(pri))) return 0; - bh = create_buffers(page, size); + bh = create_buffers(0, page, size); if (!bh) { free_page(page); return 0; --- linux/mm/page_alloc.c.orig Fri Aug 16 23:07:08 1996 +++ linux/mm/page_alloc.c Sun Jun 15 10:05:35 1997 @@ -214,7 +214,7 @@ return 0; } restore_flags(flags); - if (priority != GFP_BUFFER && try_to_free_page(priority, dma, 1)) + if (try_to_free_page(priority, dma, 1)) goto repeat; return 0; } --- linux/mm/filemap.c.orig Sat Jun 14 13:09:42 1997 +++ linux/mm/filemap.c Sat Jun 14 18:31:51 1997 @@ -127,7 +127,7 @@ } } -int shrink_mmap(int priority, int dma) +int shrink_mmap(int priority, int gfp_level, int dma) { static int clock = 0; struct page * page; @@ -183,7 +183,8 @@ } /* is it a buffer cache page? */ - if (bh && try_to_free_buffer(bh, &bh, 6)) + if (bh && gfp_level != GFP_BUFFER && + try_to_free_buffer(bh, &bh, 6)) return 1; break; --- linux/mm/vmscan.c.orig Sat Mar 22 22:40:35 1997 +++ linux/mm/vmscan.c Sun Jun 15 09:25:22 1997 @@ -68,7 +68,7 @@ * have died while we slept). */ static inline int try_to_swap_out(struct task_struct * tsk, struct vm_area_struct* vma, - unsigned long address, pte_t * page_table, int dma, int wait) + unsigned long address, pte_t * page_table, int dma, int wait, int can_do_io) { pte_t pte; unsigned long entry; @@ -100,6 +100,8 @@ if (page_map->age) return 0; if (pte_dirty(pte)) { + if(!can_do_io) + return 0; if (vma->vm_ops && vma->vm_ops->swapout) { pid_t pid = tsk->pid; vma->vm_mm->rss--; @@ -157,7 +159,8 @@ */ static inline int swap_out_pmd(struct task_struct * tsk, struct vm_area_struct * vma, - pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait) + pmd_t *dir, unsigned long address, unsigned long end, int dma, int wait, + int can_do_io) { pte_t * pte; unsigned long pmd_end; @@ -179,7 +182,7 @@ do { int result; tsk->swap_address = address + PAGE_SIZE; - result = try_to_swap_out(tsk, vma, address, pte, dma, wait); + result = try_to_swap_out(tsk, vma, address, pte, dma, wait, can_do_io); if (result) return result; address += PAGE_SIZE; @@ -189,7 +192,8 @@ } static inline int swap_out_pgd(struct task_struct * tsk, struct vm_area_struct * vma, - pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait) + pgd_t *dir, unsigned long address, unsigned long end, int dma, int wait, + int can_do_io) { pmd_t * pmd; unsigned long pgd_end; @@ -209,7 +213,7 @@ end = pgd_end; do { - int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait); + int result = swap_out_pmd(tsk, vma, pmd, address, end, dma, wait, can_do_io); if (result) return result; address = (address + PMD_SIZE) & PMD_MASK; @@ -219,7 +223,7 @@ } static int swap_out_vma(struct task_struct * tsk, struct vm_area_struct * vma, - pgd_t *pgdir, unsigned long start, int dma, int wait) + pgd_t *pgdir, unsigned long start, int dma, int wait, int can_do_io) { unsigned long end; @@ -230,7 +234,7 @@ end = vma->vm_end; while (start < end) { - int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait); + int result = swap_out_pgd(tsk, vma, pgdir, start, end, dma, wait, can_do_io); if (result) return result; start = (start + PGDIR_SIZE) & PGDIR_MASK; @@ -239,7 +243,7 @@ return 0; } -static int swap_out_process(struct task_struct * p, int dma, int wait) +static int swap_out_process(struct task_struct * p, int dma, int wait, int can_do_io) { unsigned long address; struct vm_area_struct* vma; @@ -260,7 +264,7 @@ address = vma->vm_start; for (;;) { - int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait); + int result = swap_out_vma(p, vma, pgd_offset(p->mm, address), address, dma, wait, can_do_io); if (result) return result; vma = vma->vm_next; @@ -272,7 +276,7 @@ return 0; } -static int swap_out(unsigned int priority, int dma, int wait) +static int swap_out(unsigned int priority, int dma, int wait, int gfp_level) { static int swap_task; int loop, counter; @@ -311,7 +315,7 @@ } if (!--p->swap_cnt) swap_task++; - switch (swap_out_process(p, dma, wait)) { + switch (swap_out_process(p, dma, wait, gfp_level != GFP_ATOMIC_IO)) { case 0: if (p->swap_cnt) swap_task++; @@ -343,15 +347,15 @@ switch (state) { do { case 0: - if (shrink_mmap(i, dma)) + if (shrink_mmap(i, priority, dma)) return 1; state = 1; case 1: - if (shm_swap(i, dma)) + if (priority != GFP_ATOMIC_IO && shm_swap(i, dma)) return 1; state = 2; default: - if (swap_out(i, dma, wait)) + if (swap_out(i, dma, wait, priority)) return 1; state = 0; i--; --- linux/include/linux/mm.h.orig Sat Mar 22 22:40:28 1997 +++ linux/include/linux/mm.h Sat Jun 14 18:58:08 1997 @@ -295,7 +295,7 @@ /* filemap.c */ extern unsigned long page_unuse(unsigned long); -extern int shrink_mmap(int, int); +extern int shrink_mmap(int, int, int); extern void truncate_inode_pages(struct inode *, unsigned long); #define GFP_BUFFER 0x00 @@ -304,6 +304,7 @@ #define GFP_KERNEL 0x03 #define GFP_NOBUFFER 0x04 #define GFP_NFS 0x05 +#define GFP_ATOMIC_IO 0x06 /* Flag - indicates that the buffer will be suitable for DMA. Ignored on some platforms, used as appropriate on others */