[patch 04/26] Xen-paravirt_ops: Add pagetable accessors to pack and unpack pagetable entries

From: Jeremy Fitzhardinge
Date: Tue Feb 27 2007 - 03:45:23 EST


Add a set of accessors to pack, unpack and modify page table entries
(at all levels). This allows a paravirt implementation to control the
contents of pgd/pmd/pte entries. For example, Xen uses this to
convert the (pseudo-)physical address into a machine address when
populating a pagetable entry, and converting back to pphys address
when an entry is read.

Signed-off-by: Jeremy Fitzhardinge <jeremy@xxxxxxxxxxxxx>

---
arch/i386/kernel/paravirt.c | 130 +++++++++++++++++++++++++++++++------
arch/i386/kernel/vmlinux.lds.S | 3
include/asm-i386/page.h | 25 +++++--
include/asm-i386/paravirt.h | 68 ++++++++++++++++++-
include/asm-i386/pgtable-2level.h | 5 -
include/asm-i386/pgtable-3level.h | 41 ++++-------
6 files changed, 217 insertions(+), 55 deletions(-)

===================================================================
--- a/arch/i386/kernel/paravirt.c
+++ b/arch/i386/kernel/paravirt.c
@@ -395,38 +395,76 @@ static void native_flush_tlb_single(u32
}

#ifndef CONFIG_X86_PAE
-static void native_set_pte(pte_t *ptep, pte_t pteval)
+void native_set_pte(pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}

-static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval)
+void native_set_pte_at(struct mm_struct *mm, u32 addr,
+ pte_t *ptep, pte_t pteval)
{
*ptep = pteval;
}

-static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
+void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
{
*pmdp = pmdval;
}

+unsigned long native_pte_val(pte_t pte)
+{
+ return pte.pte_low;
+}
+
+unsigned long native_pmd_val(pmd_t pmd)
+{
+ BUG();
+ return 0;
+}
+
+unsigned long native_pgd_val(pgd_t pgd)
+{
+ return pgd.pgd;
+}
+
+pte_t native_make_pte(unsigned long pte)
+{
+ return (pte_t){ pte };
+}
+
+pmd_t native_make_pmd(unsigned long pmd)
+{
+ BUG();
+}
+
+pgd_t native_make_pgd(unsigned long pgd)
+{
+ return (pgd_t){ pgd };
+}
+
+pte_t native_ptep_get_and_clear(pte_t *ptep)
+{
+ return __pte(xchg(&(ptep)->pte_low, 0));
+}
+
#else /* CONFIG_X86_PAE */

-static void native_set_pte(pte_t *ptep, pte_t pte)
+void native_set_pte(pte_t *ptep, pte_t pte)
{
ptep->pte_high = pte.pte_high;
smp_wmb();
ptep->pte_low = pte.pte_low;
}

-static void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
+void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte)
{
ptep->pte_high = pte.pte_high;
smp_wmb();
ptep->pte_low = pte.pte_low;
}

-static void native_set_pte_present(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte)
+void native_set_pte_present(struct mm_struct *mm, u32 addr,
+ pte_t *ptep, pte_t pte)
{
ptep->pte_low = 0;
smp_wmb();
@@ -435,35 +473,78 @@ static void native_set_pte_present(struc
ptep->pte_low = pte.pte_low;
}

-static void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
-{
- set_64bit((unsigned long long *)ptep,pte_val(pteval));
-}
-
-static void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
-{
- set_64bit((unsigned long long *)pmdp,pmd_val(pmdval));
-}
-
-static void native_set_pud(pud_t *pudp, pud_t pudval)
+void native_set_pte_atomic(pte_t *ptep, pte_t pteval)
+{
+ set_64bit((unsigned long long *)ptep,native_pte_val(pteval));
+}
+
+void native_set_pmd(pmd_t *pmdp, pmd_t pmdval)
+{
+ set_64bit((unsigned long long *)pmdp,native_pmd_val(pmdval));
+}
+
+void native_set_pud(pud_t *pudp, pud_t pudval)
{
*pudp = pudval;
}

-static void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *ptep)
+void native_pte_clear(struct mm_struct *mm, u32 addr, pte_t *ptep)
{
ptep->pte_low = 0;
smp_wmb();
ptep->pte_high = 0;
}

-static void native_pmd_clear(pmd_t *pmd)
+void native_pmd_clear(pmd_t *pmd)
{
u32 *tmp = (u32 *)pmd;
*tmp = 0;
smp_wmb();
*(tmp + 1) = 0;
}
+
+unsigned long long native_pte_val(pte_t pte)
+{
+ return pte.pte_low | ((unsigned long long)pte.pte_high << 32);
+}
+
+unsigned long long native_pmd_val(pmd_t pmd)
+{
+ return pmd.pmd;
+}
+
+unsigned long long native_pgd_val(pgd_t pgd)
+{
+ return pgd.pgd;
+}
+
+pte_t native_make_pte(unsigned long long pte)
+{
+ return (pte_t){ .pte_low = pte, .pte_high = pte >> 32 };
+}
+
+pmd_t native_make_pmd(unsigned long long pmd)
+{
+ return (pmd_t){ pmd };
+}
+
+pgd_t native_make_pgd(unsigned long long pgd)
+{
+ return (pgd_t){ pgd };
+}
+
+pte_t native_ptep_get_and_clear(pte_t *ptep)
+{
+ pte_t res;
+
+ /* xchg acts as a barrier before the setting of the high bits */
+ res.pte_low = xchg(&ptep->pte_low, 0);
+ res.pte_high = ptep->pte_high;
+ ptep->pte_high = 0;
+
+ return res;
+}
+
#endif /* CONFIG_X86_PAE */

/* These are in entry.S */
@@ -556,6 +637,9 @@ struct paravirt_ops paravirt_ops = {
.set_pmd = native_set_pmd,
.pte_update = paravirt_nop,
.pte_update_defer = paravirt_nop,
+
+ .ptep_get_and_clear = native_ptep_get_and_clear,
+
#ifdef CONFIG_X86_PAE
.set_pte_atomic = native_set_pte_atomic,
.set_pte_present = native_set_pte_present,
@@ -564,6 +648,14 @@ struct paravirt_ops paravirt_ops = {
.pmd_clear = native_pmd_clear,
#endif

+ .pte_val = native_pte_val,
+ .pmd_val = native_pmd_val,
+ .pgd_val = native_pgd_val,
+
+ .make_pte = native_make_pte,
+ .make_pmd = native_make_pmd,
+ .make_pgd = native_make_pgd,
+
.irq_enable_sysexit = native_irq_enable_sysexit,
.iret = native_iret,

===================================================================
--- a/arch/i386/kernel/vmlinux.lds.S
+++ b/arch/i386/kernel/vmlinux.lds.S
@@ -21,6 +21,9 @@
#include <asm/page.h>
#include <asm/cache.h>
#include <asm/boot.h>
+
+#undef ENTRY
+#undef ALIGN

OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386")
OUTPUT_ARCH(i386)
===================================================================
--- a/include/asm-i386/page.h
+++ b/include/asm-i386/page.h
@@ -11,7 +11,6 @@

#ifdef __KERNEL__
#ifndef __ASSEMBLY__
-

#ifdef CONFIG_X86_USE_3DNOW

@@ -48,20 +47,26 @@ typedef struct { unsigned long long pmd;
typedef struct { unsigned long long pmd; } pmd_t;
typedef struct { unsigned long long pgd; } pgd_t;
typedef struct { unsigned long long pgprot; } pgprot_t;
+#ifndef CONFIG_PARAVIRT
#define pmd_val(x) ((x).pmd)
#define pte_val(x) ((x).pte_low | ((unsigned long long)(x).pte_high << 32))
+#define __pte(x) ((pte_t) { .pte_low = (x), .pte_high = ((x) >> 32) } )
#define __pmd(x) ((pmd_t) { (x) } )
+#endif /* CONFIG_PARAVIRT */
#define HPAGE_SHIFT 21
#include <asm-generic/pgtable-nopud.h>
-#else
+#else /* !CONFIG_X86_PAE */
typedef struct { unsigned long pte_low; } pte_t;
typedef struct { unsigned long pgd; } pgd_t;
typedef struct { unsigned long pgprot; } pgprot_t;
#define boot_pte_t pte_t /* or would you rather have a typedef */
+#ifndef CONFIG_PARAVIRT
+#define __pte(x) ((pte_t) { (x) })
#define pte_val(x) ((x).pte_low)
+#endif
#define HPAGE_SHIFT 22
#include <asm-generic/pgtable-nopmd.h>
-#endif
+#endif /* CONFIG_X86_PAE */
#define PTE_MASK PAGE_MASK

#ifdef CONFIG_HUGETLB_PAGE
@@ -71,12 +76,13 @@ typedef struct { unsigned long pgprot; }
#define HAVE_ARCH_HUGETLB_UNMAPPED_AREA
#endif

+#define pgprot_val(x) ((x).pgprot)
+#define __pgprot(x) ((pgprot_t) { (x) } )
+
+#ifndef CONFIG_PARAVIRT
#define pgd_val(x) ((x).pgd)
-#define pgprot_val(x) ((x).pgprot)
-
-#define __pte(x) ((pte_t) { (x) } )
#define __pgd(x) ((pgd_t) { (x) } )
-#define __pgprot(x) ((pgprot_t) { (x) } )
+#endif

#endif /* !__ASSEMBLY__ */

@@ -143,6 +149,11 @@ extern int page_is_ram(unsigned long pag
#include <asm-generic/memory_model.h>
#include <asm-generic/page.h>

+#ifdef CONFIG_PARAVIRT
+/* After pte_t, etc, have been defined */
+#include <asm/paravirt.h>
+#endif
+
#ifndef CONFIG_COMPAT_VDSO
#define __HAVE_ARCH_GATE_AREA 1
#endif
===================================================================
--- a/include/asm-i386/paravirt.h
+++ b/include/asm-i386/paravirt.h
@@ -25,6 +25,8 @@
#define CLBR_ANY 0x7

#ifndef __ASSEMBLY__
+#include <linux/types.h>
+
struct thread_struct;
struct Xgt_desc_struct;
struct tss_struct;
@@ -140,12 +142,31 @@ struct paravirt_ops
void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval);
void (*pte_update)(struct mm_struct *mm, u32 addr, pte_t *ptep);
void (*pte_update_defer)(struct mm_struct *mm, u32 addr, pte_t *ptep);
+
+ pte_t (*ptep_get_and_clear)(pte_t *ptep);
+
#ifdef CONFIG_X86_PAE
void (*set_pte_atomic)(pte_t *ptep, pte_t pteval);
- void (*set_pte_present)(struct mm_struct *mm, unsigned long addr, pte_t *ptep, pte_t pte);
+ void (*set_pte_present)(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte);
void (*set_pud)(pud_t *pudp, pud_t pudval);
- void (*pte_clear)(struct mm_struct *mm, unsigned long addr, pte_t *ptep);
+ void (*pte_clear)(struct mm_struct *mm, u32 addr, pte_t *ptep);
void (*pmd_clear)(pmd_t *pmdp);
+
+ unsigned long long (*pte_val)(pte_t);
+ unsigned long long (*pmd_val)(pmd_t);
+ unsigned long long (*pgd_val)(pgd_t);
+
+ pte_t (*make_pte)(unsigned long long pte);
+ pmd_t (*make_pmd)(unsigned long long pmd);
+ pgd_t (*make_pgd)(unsigned long long pgd);
+#else
+ unsigned long (*pte_val)(pte_t);
+ unsigned long (*pmd_val)(pmd_t);
+ unsigned long (*pgd_val)(pgd_t);
+
+ pte_t (*make_pte)(unsigned long pte);
+ pmd_t (*make_pmd)(unsigned long pmd);
+ pgd_t (*make_pgd)(unsigned long pgd);
#endif

void (*set_lazy_mode)(int mode);
@@ -163,6 +184,24 @@ struct paravirt_ops
__attribute__((__section__(".paravirtprobe"))) = fn

extern struct paravirt_ops paravirt_ops;
+
+#ifdef CONFIG_X86_PAE
+unsigned long long native_pte_val(pte_t);
+unsigned long long native_pmd_val(pmd_t);
+unsigned long long native_pgd_val(pgd_t);
+
+pte_t native_make_pte(unsigned long long pte);
+pmd_t native_make_pmd(unsigned long long pmd);
+pgd_t native_make_pgd(unsigned long long pgd);
+#else
+unsigned long native_pte_val(pte_t);
+unsigned long native_pmd_val(pmd_t);
+unsigned long native_pgd_val(pgd_t);
+
+pte_t native_make_pte(unsigned long pte);
+pmd_t native_make_pmd(unsigned long pmd);
+pgd_t native_make_pgd(unsigned long pgd);
+#endif

#define paravirt_enabled() (paravirt_ops.paravirt_enabled)

@@ -215,6 +254,8 @@ static inline void __cpuid(unsigned int
#define read_cr4() paravirt_ops.read_cr4()
#define read_cr4_safe(x) paravirt_ops.read_cr4_safe()
#define write_cr4(x) paravirt_ops.write_cr4(x)
+
+#define raw_ptep_get_and_clear(xp) (paravirt_ops.ptep_get_and_clear(xp))

static inline void raw_safe_halt(void)
{
@@ -297,6 +338,17 @@ static inline void halt(void)
(paravirt_ops.write_idt_entry((dt), (entry), (low), (high)))
#define set_iopl_mask(mask) (paravirt_ops.set_iopl_mask(mask))

+#define __pte(x) paravirt_ops.make_pte(x)
+#define __pgd(x) paravirt_ops.make_pgd(x)
+
+#define pte_val(x) paravirt_ops.pte_val(x)
+#define pgd_val(x) paravirt_ops.pgd_val(x)
+
+#ifdef CONFIG_X86_PAE
+#define __pmd(x) paravirt_ops.make_pmd(x)
+#define pmd_val(x) paravirt_ops.pmd_val(x)
+#endif
+
/* The paravirtualized I/O functions */
static inline void slow_down_io(void) {
paravirt_ops.io_delay();
@@ -336,6 +388,18 @@ static inline void setup_secondary_clock
paravirt_ops.setup_secondary_clock();
}
#endif
+
+
+void native_set_pte(pte_t *ptep, pte_t pteval);
+void native_set_pte_at(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pteval);
+void native_set_pmd(pmd_t *pmdp, pmd_t pmdval);
+void native_set_pte_present(struct mm_struct *mm, u32 addr, pte_t *ptep, pte_t pte);
+void native_set_pte_atomic(pte_t *ptep, pte_t pteval);
+void native_set_pud(pud_t *pudp, pud_t pudval);
+void native_pte_clear(struct mm_struct *mm, u32 addr, pte_t *ptep);
+void native_pmd_clear(pmd_t *pmd);
+pte_t native_ptep_get_and_clear(pte_t *ptep);
+void native_nop(void);

#ifdef CONFIG_SMP
static inline void startup_ipi_hook(int phys_apicid, unsigned long start_eip,
===================================================================
--- a/include/asm-i386/pgtable-2level.h
+++ b/include/asm-i386/pgtable-2level.h
@@ -15,6 +15,7 @@
#define set_pte(pteptr, pteval) (*(pteptr) = pteval)
#define set_pte_at(mm,addr,ptep,pteval) set_pte(ptep,pteval)
#define set_pmd(pmdptr, pmdval) (*(pmdptr) = (pmdval))
+#define raw_ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0))
#endif

#define set_pte_atomic(pteptr, pteval) set_pte(pteptr,pteval)
@@ -23,11 +24,9 @@
#define pte_clear(mm,addr,xp) do { set_pte_at(mm, addr, xp, __pte(0)); } while (0)
#define pmd_clear(xp) do { set_pmd(xp, __pmd(0)); } while (0)

-#define raw_ptep_get_and_clear(xp) __pte(xchg(&(xp)->pte_low, 0))
-
#define pte_page(x) pfn_to_page(pte_pfn(x))
#define pte_none(x) (!(x).pte_low)
-#define pte_pfn(x) ((unsigned long)(((x).pte_low >> PAGE_SHIFT)))
+#define pte_pfn(x) (pte_val(x) >> PAGE_SHIFT)
#define pfn_pte(pfn, prot) __pte(((pfn) << PAGE_SHIFT) | pgprot_val(prot))
#define pfn_pmd(pfn, prot) __pmd(((pfn) << PAGE_SHIFT) | pgprot_val(prot))

===================================================================
--- a/include/asm-i386/pgtable-3level.h
+++ b/include/asm-i386/pgtable-3level.h
@@ -98,6 +98,18 @@ static inline void pmd_clear(pmd_t *pmd)
smp_wmb();
*(tmp + 1) = 0;
}
+
+static inline pte_t raw_ptep_get_and_clear(pte_t *ptep)
+{
+ pte_t res;
+
+ /* xchg acts as a barrier before the setting of the high bits */
+ res.pte_low = xchg(&ptep->pte_low, 0);
+ res.pte_high = ptep->pte_high;
+ ptep->pte_high = 0;
+
+ return res;
+}
#endif

/*
@@ -119,18 +131,6 @@ static inline void pud_clear (pud_t * pu
#define pmd_offset(pud, address) ((pmd_t *) pud_page(*(pud)) + \
pmd_index(address))

-static inline pte_t raw_ptep_get_and_clear(pte_t *ptep)
-{
- pte_t res;
-
- /* xchg acts as a barrier before the setting of the high bits */
- res.pte_low = xchg(&ptep->pte_low, 0);
- res.pte_high = ptep->pte_high;
- ptep->pte_high = 0;
-
- return res;
-}
-
#define __HAVE_ARCH_PTE_SAME
static inline int pte_same(pte_t a, pte_t b)
{
@@ -146,28 +146,21 @@ static inline int pte_none(pte_t pte)

static inline unsigned long pte_pfn(pte_t pte)
{
- return (pte.pte_low >> PAGE_SHIFT) |
- (pte.pte_high << (32 - PAGE_SHIFT));
+ return pte_val(pte) >> PAGE_SHIFT;
}

extern unsigned long long __supported_pte_mask;

static inline pte_t pfn_pte(unsigned long page_nr, pgprot_t pgprot)
{
- pte_t pte;
-
- pte.pte_high = (page_nr >> (32 - PAGE_SHIFT)) | \
- (pgprot_val(pgprot) >> 32);
- pte.pte_high &= (__supported_pte_mask >> 32);
- pte.pte_low = ((page_nr << PAGE_SHIFT) | pgprot_val(pgprot)) & \
- __supported_pte_mask;
- return pte;
+ return __pte((((unsigned long long)page_nr << PAGE_SHIFT) |
+ pgprot_val(pgprot)) & __supported_pte_mask);
}

static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot)
{
- return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) | \
- pgprot_val(pgprot)) & __supported_pte_mask);
+ return __pmd((((unsigned long long)page_nr << PAGE_SHIFT) |
+ pgprot_val(pgprot)) & __supported_pte_mask);
}

/*

--

-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/