Re: Shared mmap write-out (was: Re: patch cow-swapin)-- 2.0, 2.1 at least]

Andrea Arcangeli (andrea@e-mind.com)
Mon, 28 Sep 1998 21:50:42 +0200 (CEST)


On Sun, 27 Sep 1998, Ion Badulescu wrote:

>On Sun, 27 Sep 1998, Andrea Arcangeli wrote:
>
>> >No, I've no clue (yet) how to fix it, but ideas are always welcome. I don't
>> >know if 2.1 has the same behavior -- no box is running 2.1 right now.
>>
>> Yes bo is full with 2.1 too, but I have not read your source and so I don'
>> t know if this is the right behavior.
>
>I just ran the same exact program on solaris 2.6, and sure enough solaris
>does the right thing:
>
>1. no fsync on open file descriptors when exec-ing
>2. no multiple sync-ing of the same data in the children (tested by
>replacing the execl() with a fsync(fd); exit(0); sequence).

Could you try again with this my patch I developed now applyed ;->? It
should perfectly avoid the extra syncing. Yowww!

diff -urN /home/andrea/devel/kernel-tree/linux-2.1.122/mm/filemap.c linux/mm/filemap.c
--- /home/andrea/devel/kernel-tree/linux-2.1.122/mm/filemap.c Tue Aug 25 20:19:58 1998
+++ linux/mm/filemap.c Mon Sep 28 21:43:47 1998
@@ -5,6 +5,10 @@
*/

/*
+ * update_shared_mappings(), 1998 Andrea Arcangeli
+ */
+
+/*
* This file handles the generic file mmap semantics used by
* most "normal" filesystems (but you don't /have/ to use this:
* the NFS filesystem used to do this differently, for example)
@@ -1221,6 +1225,75 @@
return mk_pte(page,vma->vm_page_prot);
}

+static void update_one_shared_mapping(struct vm_area_struct *shared,
+ unsigned long address, pte_t orig_pte)
+{
+ pgd_t *pgd;
+ pmd_t *pmd;
+ pte_t *pte;
+ struct semaphore * mmap_sem = &shared->vm_mm->mmap_sem;
+
+ down(mmap_sem);
+
+ pgd = pgd_offset(shared->vm_mm, address);
+ if (pgd_none(*pgd))
+ goto out;
+ if (pgd_bad(*pgd)) {
+ printk(KERN_ERR "update_shared_mappings: bad pgd (%08lx)\n",
+ pgd_val(*pgd));
+ pgd_clear(pgd);
+ goto out;
+ }
+
+ pmd = pmd_offset(pgd, address);
+ if (pmd_none(*pmd))
+ goto out;
+ if (pmd_bad(*pmd))
+ {
+ printk(KERN_ERR "update_shared_mappings: bad pmd (%08lx)\n",
+ pmd_val(*pmd));
+ pmd_clear(pmd);
+ goto out;
+ }
+
+ pte = pte_offset(pmd, address);
+
+ if (pte_val(pte_mkclean(pte_mkyoung(*pte))) !=
+ pte_val(pte_mkclean(pte_mkyoung(orig_pte))))
+ goto out;
+
+ flush_page_to_ram(page(pte));
+ flush_cache_page(shared, address);
+ set_pte(pte, pte_mkclean(*pte));
+ flush_tlb_page(shared, address);
+
+ out:
+ up(mmap_sem);
+}
+
+static void update_shared_mappings(struct vm_area_struct *this,
+ unsigned long address,
+ pte_t orig_pte)
+{
+ if (this->vm_flags & VM_SHARED)
+ {
+ struct file * filp = this->vm_file;
+ if (filp)
+ {
+ struct inode * inode = filp->f_dentry->d_inode;
+ struct vm_area_struct * shared;
+
+ for (shared = inode->i_mmap; shared;
+ shared = shared->vm_next_share)
+ {
+ if (shared == this)
+ continue;
+ update_one_shared_mapping(shared, address,
+ orig_pte);
+ }
+ }
+ }
+}

static inline int filemap_sync_pte(pte_t * ptep, struct vm_area_struct *vma,
unsigned long address, unsigned int flags)
@@ -1238,6 +1311,7 @@
flush_cache_page(vma, address);
set_pte(ptep, pte_mkclean(pte));
flush_tlb_page(vma, address);
+ update_shared_mappings(vma, address, pte);
page = pte_page(pte);
atomic_inc(&mem_map[MAP_NR(page)].count);
} else {
diff -urN /home/andrea/devel/kernel-tree/linux-2.1.122/mm/mmap.c linux/mm/mmap.c
--- /home/andrea/devel/kernel-tree/linux-2.1.122/mm/mmap.c Wed Aug 26 15:01:16 1998
+++ linux/mm/mmap.c Mon Sep 28 20:24:39 1998
@@ -498,9 +498,6 @@
free = free->vm_next;
freed = 1;

- mm->map_count--;
- remove_shared_vm_struct(mpnt);
-
st = addr < mpnt->vm_start ? mpnt->vm_start : addr;
end = addr+len;
end = end > mpnt->vm_end ? mpnt->vm_end : end;
@@ -508,6 +505,9 @@

if (mpnt->vm_ops && mpnt->vm_ops->unmap)
mpnt->vm_ops->unmap(mpnt, st, size);
+
+ mm->map_count--;
+ remove_shared_vm_struct(mpnt);

flush_cache_range(mm, st, end);
zap_page_range(mm, st, size);

Andrea[s] Arcangeli

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/