Re: [PATCH] Enable mprotect on huge pages

From: Zhang, Yanmin
Date: Mon Feb 27 2006 - 01:35:01 EST


On Mon, 2006-02-27 at 13:36, Zhang, Yanmin wrote:
> On Mon, 2006-02-27 at 07:09, David Gibson wrote:
> > On Fri, Feb 24, 2006 at 02:28:44PM -0800, Andrew Morton wrote:
> > > "Zhang, Yanmin" <yanmin_zhang@xxxxxxxxxxxxxxx> wrote:
> > > >
> > > > From: Zhang, Yanmin <yanmin.zhang@xxxxxxxxx>
> > > >
> > > > 2.6.16-rc3 uses hugetlb on-demand paging, but it doesn_t support hugetlb
> > > > mprotect. My patch against 2.6.16-rc3 enables this capability.

Based on David's comments, I worked out a new patch against 2.6.16-rc4.
Thank David.

Signed-off-by: Zhang Yanmin <yanmin.zhang@xxxxxxxxx>

I tested it on i386/x86_64/ia64. Who could help test it on other
platforms, such like PPC64?

---

diff -Nraup linux-2.6.16-rc4/include/linux/hugetlb.h linux-2.6.16-rc4_mprotect/include/linux/hugetlb.h
--- linux-2.6.16-rc4/include/linux/hugetlb.h 2006-02-22 19:19:04.000000000 +0800
+++ linux-2.6.16-rc4_mprotect/include/linux/hugetlb.h 2006-02-27 20:58:33.000000000 +0800
@@ -41,6 +41,8 @@ struct page *follow_huge_pmd(struct mm_s
pmd_t *pmd, int write);
int is_aligned_hugepage_range(unsigned long addr, unsigned long len);
int pmd_huge(pmd_t pmd);
+void hugetlb_change_protection(struct vm_area_struct *vma,
+ unsigned long address, unsigned long end, pgprot_t newprot);

#ifndef ARCH_HAS_HUGEPAGE_ONLY_RANGE
#define is_hugepage_only_range(mm, addr, len) 0
@@ -101,6 +103,8 @@ static inline unsigned long hugetlb_tota
#define free_huge_page(p) ({ (void)(p); BUG(); })
#define hugetlb_fault(mm, vma, addr, write) ({ BUG(); 0; })

+#define hugetlb_change_protection(vma, address, end, newprot)
+
#ifndef HPAGE_MASK
#define HPAGE_MASK PAGE_MASK /* Keep the compiler happy */
#define HPAGE_SIZE PAGE_SIZE
diff -Nraup linux-2.6.16-rc4/mm/hugetlb.c linux-2.6.16-rc4_mprotect/mm/hugetlb.c
--- linux-2.6.16-rc4/mm/hugetlb.c 2006-02-22 19:19:05.000000000 +0800
+++ linux-2.6.16-rc4_mprotect/mm/hugetlb.c 2006-02-27 20:57:17.000000000 +0800
@@ -572,3 +572,32 @@ int follow_hugetlb_page(struct mm_struct

return i;
}
+
+void hugetlb_change_protection(struct vm_area_struct *vma,
+ unsigned long address, unsigned long end, pgprot_t newprot)
+{
+ struct mm_struct *mm = vma->vm_mm;
+ unsigned long start = address;
+ pte_t *ptep;
+ pte_t pte;
+
+ BUG_ON(address >= end);
+ flush_cache_range(vma, address, end);
+
+ spin_lock(&mm->page_table_lock);
+ for (; address < end; address += HPAGE_SIZE) {
+ ptep = huge_pte_offset(mm, address);
+ if (!ptep)
+ continue;
+ if (!pte_none(*ptep)) {
+ pte = huge_ptep_get_and_clear(mm, address, ptep);
+ pte = pte_modify(pte, newprot);
+ set_huge_pte_at(mm, addr, ptep, pte);
+ lazy_mmu_prot_update(pte);
+ }
+ }
+ spin_unlock(&mm->page_table_lock);
+
+ flush_tlb_range(vma, start, end);
+}
+
diff -Nraup linux-2.6.16-rc4/mm/mprotect.c linux-2.6.16-rc4_mprotect/mm/mprotect.c
--- linux-2.6.16-rc4/mm/mprotect.c 2006-02-22 19:18:21.000000000 +0800
+++ linux-2.6.16-rc4_mprotect/mm/mprotect.c 2006-02-27 20:55:10.000000000 +0800
@@ -124,7 +124,7 @@ mprotect_fixup(struct vm_area_struct *vm
* a MAP_NORESERVE private mapping to writable will now reserve.
*/
if (newflags & VM_WRITE) {
- if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED|VM_HUGETLB))) {
+ if (!(oldflags & (VM_ACCOUNT|VM_WRITE|VM_SHARED))) {
charged = nrpages;
if (security_vm_enough_memory(charged))
return -ENOMEM;
@@ -166,7 +166,10 @@ success:
*/
vma->vm_flags = newflags;
vma->vm_page_prot = newprot;
- change_protection(vma, start, end, newprot);
+ if (is_vm_hugetlb_page(vma))
+ hugetlb_change_protection(vma, start, end, newprot);
+ else
+ change_protection(vma, start, end, newprot);
vm_stat_account(mm, oldflags, vma->vm_file, -nrpages);
vm_stat_account(mm, newflags, vma->vm_file, nrpages);
return 0;
@@ -240,11 +243,6 @@ sys_mprotect(unsigned long start, size_t

/* Here we know that vma->vm_start <= nstart < vma->vm_end. */

- if (is_vm_hugetlb_page(vma)) {
- error = -EACCES;
- goto out;
- }
-
newflags = vm_flags | (vma->vm_flags & ~(VM_READ | VM_WRITE | VM_EXEC));

/* newflags >> 4 shift VM_MAY% in place of VM_% */
@@ -260,6 +258,12 @@ sys_mprotect(unsigned long start, size_t
tmp = vma->vm_end;
if (tmp > end)
tmp = end;
+ if (is_vm_hugetlb_page(vma) &&
+ is_aligned_hugepage_range(nstart, tmp - nstart)) {
+ error = -EINVAL;
+ goto out;
+ }
+
error = mprotect_fixup(vma, &prev, nstart, tmp, newflags);
if (error)
goto out;


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/