[patch 07/11] PAT x86: pat-conflict resolution using linear list

From: venkatesh . pallipadi
Date: Thu Jan 10 2008 - 13:49:47 EST


Straight forward port of pat-conflict.patch to x86 tree. Use a linear
list to keep track of all reserved region mappings.
Only UC access is allowed for RAM regions for now.

Signed-off-by: Venkatesh Pallipadi <venkatesh.pallipadi@xxxxxxxxx>
Signed-off-by: Suresh Siddha <suresh.b.siddha@xxxxxxxxx>
Index: linux-2.6.git/arch/x86/mm/ioremap_64.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/ioremap_64.c 2008-01-08 12:43:13.000000000 -0800
+++ linux-2.6.git/arch/x86/mm/ioremap_64.c 2008-01-08 12:44:20.000000000 -0800
@@ -20,6 +20,7 @@
#include <asm/cacheflush.h>
#include <asm/proto.h>
#include <asm/e820.h>
+#include <asm/pat.h>

unsigned long __phys_addr(unsigned long x)
{
@@ -105,12 +106,23 @@
remove_vm_area((void *)(PAGE_MASK & (unsigned long) addr));
return NULL;
}
+
+ /* For plain ioremap() get the existing attributes. Otherwise
+ check against the existing ones */
+ if (reserve_mattr(phys_addr, phys_addr + size, flags,
+ flags ? NULL : &flags) < 0)
+ goto out;
+
if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) {
- area->flags &= 0xffffff;
- vunmap(addr);
- return NULL;
+ free_mattr(phys_addr, phys_addr + size, flags);
+ goto out;
}
return (__force void __iomem *) (offset + (char *)addr);
+
+out:
+ area->flags &= 0xffffff;
+ vunmap(addr);
+ return NULL;
}
EXPORT_SYMBOL(__ioremap);

@@ -178,8 +190,11 @@
}

/* Reset the direct mapping. Can block */
- if (p->flags >> 20)
- ioremap_change_attr(p->phys_addr, p->size, 0);
+ if (p->flags >> 20) {
+ free_mattr(p->phys_addr, p->phys_addr + get_vm_area_size(p),
+ p->flags>>20);
+ ioremap_change_attr(p->phys_addr, get_vm_area_size(p), 0);
+ }

/* Finally remove it */
o = remove_vm_area((void *)addr);
Index: linux-2.6.git/arch/x86/mm/pat.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/pat.c 2008-01-08 12:43:13.000000000 -0800
+++ linux-2.6.git/arch/x86/mm/pat.c 2008-01-08 12:45:05.000000000 -0800
@@ -5,6 +5,9 @@
#include <asm/msr.h>
#include <asm/tlbflush.h>
#include <asm/processor.h>
+#include <asm/pgtable.h>
+#include <asm/pat.h>
+#include <asm/e820.h>

static u64 boot_pat_state;
int pat_wc_enabled = 0;
@@ -68,3 +71,116 @@
{
}

+/* The global memattr list keeps track of caching attributes for specific
+ physical memory areas. Conflicting caching attributes in different
+ mappings can cause CPU cache corruption. To avoid this we keep track.
+
+ The list is sorted and can contain multiple entries for each address
+ (this allows reference counting for overlapping areas). All the aliases
+ have the same cache attributes of course. Zero attributes are represente
+ as holes.
+
+ Currently the data structure is a list because the number of mappings
+ are right now expected to be relatively small. If this should be a problem
+ it could be changed to a rbtree or similar.
+
+ mattr_lock protects the whole list. */
+
+struct memattr {
+ u64 start;
+ u64 end;
+ unsigned long attr;
+ struct list_head nd;
+};
+
+static LIST_HEAD(mattr_list);
+static DEFINE_SPINLOCK(mattr_lock); /* protects memattr list */
+
+int reserve_mattr(u64 start, u64 end, unsigned long attr, unsigned long *fattr)
+{
+ struct memattr *ma = NULL, *ml;
+ int err = 0;
+
+ if (fattr)
+ *fattr = attr;
+
+ if (is_memory_any_valid(start, end)) {
+ if (!is_memory_all_valid(start, end) && !fattr)
+ return -EINVAL;
+
+ if (attr & _PAGE_WC) {
+ if (!fattr)
+ return -EINVAL;
+ else
+ *fattr = _PAGE_PCD;
+ }
+
+ return 0;
+ }
+
+ ma = kmalloc(sizeof(struct memattr), GFP_KERNEL);
+ if (!ma)
+ return -ENOMEM;
+ ma->start = start;
+ ma->end = end;
+ ma->attr = attr;
+
+ spin_lock(&mattr_lock);
+ list_for_each_entry(ml, &mattr_list, nd) {
+ if (ml->start <= start && ml->end >= end) {
+ if (fattr)
+ ma->attr = *fattr = ml->attr;
+
+ if (!fattr && attr != ml->attr) {
+ printk(
+ KERN_DEBUG "%s:%d conflicting cache attribute %Lx-%Lx %lx<->%lx\n",
+ current->comm, current->pid,
+ start, end, attr, ml->attr);
+ err = -EBUSY;
+ break;
+ }
+ } else if (ml->start >= end) {
+ list_add(&ma->nd, ml->nd.prev);
+ ma = NULL;
+ break;
+ }
+ }
+
+ if (err)
+ kfree(ma);
+ else if (ma)
+ list_add_tail(&ma->nd, &mattr_list);
+
+ spin_unlock(&mattr_lock);
+ return err;
+}
+
+int free_mattr(u64 start, u64 end, unsigned long attr)
+{
+ struct memattr *ml;
+ int err = attr ? -EBUSY : 0;
+
+ if (is_memory_any_valid(start, end))
+ return 0;
+
+ spin_lock(&mattr_lock);
+ list_for_each_entry(ml, &mattr_list, nd) {
+ if (ml->start == start && ml->end == end) {
+ if (ml->attr != attr)
+ printk(KERN_DEBUG
+ "%s:%d conflicting cache attributes on free %Lx-%Lx %lx<->%lx\n",
+ current->comm, current->pid, start, end, attr,ml->attr);
+ list_del(&ml->nd);
+ kfree(ml);
+ err = 0;
+ break;
+ }
+ }
+ spin_unlock(&mattr_lock);
+ if (err)
+ printk(KERN_DEBUG "%s:%d freeing invalid mattr %Lx-%Lx %lx\n",
+ current->comm, current->pid,
+ start, end, attr);
+ return err;
+}
+
Index: linux-2.6.git/include/asm-x86/pat.h
===================================================================
--- /dev/null 1970-01-01 00:00:00.000000000 +0000
+++ linux-2.6.git/include/asm-x86/pat.h 2008-01-08 12:44:20.000000000 -0800
@@ -0,0 +1,12 @@
+#ifndef _ASM_PAT_H
+#define _ASM_PAT_H 1
+
+#include <linux/types.h>
+
+/* Handle the page attribute table (PAT) of the CPU */
+
+int reserve_mattr(u64 start, u64 end, unsigned long attr, unsigned long *fattr);
+int free_mattr(u64 start, u64 end, unsigned long attr);
+
+#endif
+
Index: linux-2.6.git/arch/x86/mm/ioremap_32.c
===================================================================
--- linux-2.6.git.orig/arch/x86/mm/ioremap_32.c 2008-01-08 12:43:13.000000000 -0800
+++ linux-2.6.git/arch/x86/mm/ioremap_32.c 2008-01-08 12:44:20.000000000 -0800
@@ -17,6 +17,7 @@
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
#include <asm/pgtable.h>
+#include <asm/pat.h>

#define ISA_START_ADDRESS 0xa0000
#define ISA_END_ADDRESS 0x100000
@@ -26,6 +27,42 @@
*/

/*
+ * Fix up the linear direct mapping of the kernel to avoid cache attribute
+ * conflicts.
+ */
+static int
+ioremap_change_attr(unsigned long phys_addr, unsigned long size,
+ unsigned long flags)
+{
+ unsigned long last_addr;
+ int err = 0;
+
+ /* Guaranteed to be > phys_addr, as per __ioremap() */
+ last_addr = phys_addr + size - 1;
+ if (last_addr < virt_to_phys(high_memory) - 1) {
+ unsigned long vaddr = (unsigned long)__va(phys_addr);
+ unsigned long npages;
+
+ phys_addr &= PAGE_MASK;
+
+ /* This might overflow and become zero.. */
+ last_addr = PAGE_ALIGN(last_addr);
+
+ /* .. but that's ok, because modulo-2**n arithmetic will make
+ * the page-aligned "last - first" come out right.
+ */
+ npages = (last_addr - phys_addr) >> PAGE_SHIFT;
+
+ err = change_page_attr_addr(vaddr, npages,
+ __pgprot(__PAGE_KERNEL|flags));
+ if (!err)
+ global_flush_tlb();
+ }
+
+ return err;
+}
+
+/*
* Remap an arbitrary physical address space into the kernel virtual
* address space. Needed when the kernel wants to access high addresses
* directly.
@@ -90,7 +127,25 @@
vunmap((void __force *) addr);
return NULL;
}
+
+ /*
+ * For plain ioremap() get the existing attributes. Otherwise
+ * check against the existing ones.
+ */
+ if (reserve_mattr(phys_addr, phys_addr + size, flags,
+ flags ? NULL : &flags) < 0)
+ goto out;
+
+ if (flags && ioremap_change_attr(phys_addr, size, flags) < 0) {
+ free_mattr(phys_addr, phys_addr + size, flags);
+ goto out;
+ }
return (void __iomem *) (offset + (char __iomem *)addr);
+
+out:
+ area->flags &= 0xffffff;
+ vunmap(addr);
+ return NULL;
}
EXPORT_SYMBOL(__ioremap);

@@ -118,36 +173,7 @@

void __iomem *ioremap_nocache (unsigned long phys_addr, unsigned long size)
{
- unsigned long last_addr;
- void __iomem *p = __ioremap(phys_addr, size, _PAGE_PCD);
- if (!p)
- return p;
-
- /* Guaranteed to be > phys_addr, as per __ioremap() */
- last_addr = phys_addr + size - 1;
-
- if (last_addr < virt_to_phys(high_memory) - 1) {
- struct page *ppage = virt_to_page(__va(phys_addr));
- unsigned long npages;
-
- phys_addr &= PAGE_MASK;
-
- /* This might overflow and become zero.. */
- last_addr = PAGE_ALIGN(last_addr);
-
- /* .. but that's ok, because modulo-2**n arithmetic will make
- * the page-aligned "last - first" come out right.
- */
- npages = (last_addr - phys_addr) >> PAGE_SHIFT;
-
- if (change_page_attr(ppage, npages, PAGE_KERNEL_NOCACHE) < 0) {
- iounmap(p);
- p = NULL;
- }
- global_flush_tlb();
- }
-
- return p;
+ return __ioremap(phys_addr, size, _PAGE_PCD);
}
EXPORT_SYMBOL(ioremap_nocache);

@@ -194,12 +220,11 @@
}

/* Reset the direct mapping. Can block */
- if ((p->flags >> 20) && p->phys_addr < virt_to_phys(high_memory) - 1) {
- change_page_attr(virt_to_page(__va(p->phys_addr)),
- get_vm_area_size(p) >> PAGE_SHIFT,
- PAGE_KERNEL);
- global_flush_tlb();
- }
+ if (p->flags >> 20) {
+ free_mattr(p->phys_addr, p->phys_addr + get_vm_area_size(p),
+ p->flags>>20);
+ ioremap_change_attr(p->phys_addr, get_vm_area_size(p), 0);
+ }

/* Finally remove it */
o = remove_vm_area((void *)addr);

--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/