[PATCH] x86: add __copy_user_page_nocache() optimized memcpy

From: Ingo Molnar
Date: Tue Feb 24 2009 - 11:38:12 EST


Add a hardcoded 4K copy __copy_user_page_nocache() implementation,
used for pagecache copies.

Signed-off-by: Ingo Molnar <mingo@xxxxxxx>
---
arch/x86/include/asm/uaccess_64.h | 10 +++--
arch/x86/lib/copy_user_nocache_64.S | 84 +++++++++++++++++++++++++++++++++++
2 files changed, 90 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 987a2c1..71cbcda 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -188,6 +188,8 @@ __copy_to_user_inatomic(void __user *dst, const void *src, unsigned size)
extern long __copy_user_nocache(void *dst, const void __user *src,
unsigned size, int zerorest);

+extern long __copy_user_page_nocache(void *dst, const void __user *src);
+
static inline int __copy_from_user_nocache(void *dst, const void __user *src,
unsigned size)
{
@@ -198,8 +200,8 @@ static inline int __copy_from_user_nocache(void *dst, const void __user *src,
* non-temporal stores here. Smaller writes get handled
* via regular __copy_from_user():
*/
- if (likely(size >= PAGE_SIZE))
- return __copy_user_nocache(dst, src, size, 1);
+ if (likely(size == PAGE_SIZE))
+ return __copy_user_page_nocache(dst, src);
else
return __copy_from_user(dst, src, size);
}
@@ -208,8 +210,8 @@ static inline int __copy_from_user_inatomic_nocache(void *dst,
const void __user *src,
unsigned size)
{
- if (likely(size >= PAGE_SIZE))
- return __copy_user_nocache(dst, src, size, 0);
+ if (likely(size == PAGE_SIZE))
+ return __copy_user_page_nocache(dst, src);
else
return __copy_from_user_inatomic(dst, src, size);
}
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index cb0c112..387f08e 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -135,3 +135,87 @@ ENTRY(__copy_user_nocache)
.previous
CFI_ENDPROC
ENDPROC(__copy_user_nocache)
+
+/*
+ * copy_user_page_nocache - Uncached memory copy of a single page using
+ * non-temporal stores.
+ *
+ * Used for big 4K sized writes - where the chance of repeat access to
+ * the same data is low.
+ */
+ENTRY(__copy_user_page_nocache)
+ CFI_STARTPROC
+
+ /*
+ * We'll do 64 iterations of 64 bytes each == 4096 bytes,
+ * hardcoded:
+ */
+ movl $64, %ecx
+
+1: movq 0*8(%rsi), %r8
+2: movq 1*8(%rsi), %r9
+3: movq 2*8(%rsi), %r10
+4: movq 3*8(%rsi), %r11
+
+5: movnti %r8 0*8(%rdi)
+6: movnti %r9, 1*8(%rdi)
+7: movnti %r10, 2*8(%rdi)
+8: movnti %r11, 3*8(%rdi)
+
+9: movq 4*8(%rsi), %r8
+10: movq 5*8(%rsi), %r9
+11: movq 6*8(%rsi), %r10
+12: movq 7*8(%rsi), %r11
+
+13: movnti %r8, 4*8(%rdi)
+14: movnti %r9, 5*8(%rdi)
+15: movnti %r10, 6*8(%rdi)
+16: movnti %r11, 7*8(%rdi)
+
+ leaq 64(%rsi),%rsi
+ leaq 64(%rdi),%rdi
+
+ decl %ecx
+ jnz 1b
+
+ sfence
+
+ ret
+
+ .section .fixup,"ax"
+30: shll $6,%ecx
+ movl %ecx,%edx
+ jmp 60f
+40: xorl %edx, %edx
+ lea (%rdx,%rcx,8),%rdx
+ jmp 60f
+50: movl %ecx,%edx
+60: sfence
+ jmp copy_user_handle_tail
+ .previous
+
+ .section __ex_table,"a"
+ .quad 1b,30b
+ .quad 2b,30b
+ .quad 3b,30b
+ .quad 4b,30b
+ .quad 5b,30b
+ .quad 6b,30b
+ .quad 7b,30b
+ .quad 8b,30b
+ .quad 9b,30b
+ .quad 10b,30b
+ .quad 11b,30b
+ .quad 12b,30b
+ .quad 13b,30b
+ .quad 14b,30b
+ .quad 15b,30b
+ .quad 16b,30b
+ .quad 18b,40b
+ .quad 19b,40b
+ .quad 21b,50b
+ .quad 22b,50b
+ .previous
+
+ CFI_ENDPROC
+ENDPROC(__copy_user_page_nocache)
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/