arch/x86: memcpy() "confusion"

From: Sedat Dilek
Date: Sat Sep 26 2015 - 08:21:18 EST


Hi,

while still playing with llvmlinux patches against Linux v4.3-rc2+ I
wondered about the diverse usage of memcpy() in several string*.[c,h]
files below x86 arch.

Just FYI: I am here on Ubuntu/precise AMD64.

The background is my build breaks again due to commit (see [1])...
"x86, efi, kasan: #undef memset/memcpy/memmove per arch"
...with its undefs especially for memcpy.
( I have reverted it for now. )

There exists a LLVM PR18415 (see [2]) and llvmlinux has a workaround
(see [3]) for this issue.
I have attached an older alternative patch [4] from PaX Team.

So let's look at my build-log when building with CLANG v3.7...
...
CC arch/x86/kernel/setup.o
...
In file included from arch/x86/kernel/setup.c:96:
./arch/x86/include/asm/desc.h:121:2: error: implicit declaration of
function 'memcpy' [-Werror,-Wimplicit-function-declaration]
memcpy(&idt[entry], gate, sizeof(*gate));
^
1 error generated.
...

Investigating the source-code...

[ arch/x86/kernel/setup.c:96 ]
#include <asm/desc.h>

[ arch/x86/include/asm/desc.h:121 ]
static inline void native_write_idt_entry(gate_desc *idt, int entry,
const gate_desc *gate)
{
memcpy(&idt[entry], gate, sizeof(*gate)); <--- XXX: Line #121
}

Checking the includes...

$ grep ^#include arch/x86/kernel/setup.c | egrep 'efi|string'
#include <linux/efi.h>
#include <asm/efi.h> <--- XXX: undefs of commit 769a8089c1fd moved here

Furthermore Andrey comments in his patch...
[ arch/x86/include/asm/efi.h ]
...
+/*
+ * CONFIG_KASAN may redefine memset to __memset. __memset function is present
+ * only in kernel binary. Since the EFI stub linked into a separate binary it
+ * doesn't have __memset(). So we should use standard memset from
+ * arch/x86/boot/compressed/string.c. The same applies to memcpy and memmove.
+ */
+#undef memcpy
+#undef memset
+#undef memmove
...

This statement is confirmed in...

[ arch/x86/boot/string.h ]
...
/* Undef any of these macros coming from string_32.h. */
#undef memcpy
#undef memset
#undef memcmp
...
/*
* Access builtin version by default. If one needs to use optimized version,
* do "undef memcpy" in .c file and link against right string.c
*/
#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
#define memset(d,c,l) __builtin_memset(d,c,l)
#define memcmp __builtin_memcmp
...

More confirmation when looking at patch...
"x86, boot: Move optimized memcpy() 32/64 bit versions to
compressed/string.c"

My 1st question...
Is "arch/x86/boot/compressed/string.c" file the central place for
memcpy() or only for the "optimized" version?
BTW, why a c-file and not a h-file like arch/x86/boot/compressed/string.h ?

2nd question...
When thinking of an alternative implementation like in [4] - which
file is predestinated?

Why do we have in arch/x86...
__builtin_memcpy() | __inline_memcpy() | __memcpy() | memcpy() ?
Some comments say "faster implementation".

Just curious...
Isn't that crying for a "simplification" or "centralization" of
memcpy() | memset() | memcmp() ?

More enlightenment! Thoughts?

Thanks in advance.

Hopes to get less confused,
- Sedat -

[1] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=769a8089c1fd2fe94c13e66fe6e03d7820953ee3
[2] https://llvm.org/bugs/show_bug.cgi?id=18415
[3] http://git.linuxfoundation.org/?p=llvmlinux.git;a=blob_plain;f=arch/x86_64/patches/boot-workaround-PR18415.patch
[4] http://git.linuxfoundation.org/?p=llvmlinux.git;a=blob_plain;f=arch/x86_64/patches/ARCHIVE/0026-Add-own-versions-of-memcpy-and-memset-for-compilatio.patch
[5] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/arch/x86/kernel/setup.c#n96
[6] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/tree/arch/x86/include/asm/desc.h#n121
[7] http://git.kernel.org/cgit/linux/kernel/git/torvalds/linux.git/commit/?id=820e8feca06ff744f60e5036c3178dde40b91afc

[ INVESTIGATIONS ]

$ cd arch/x86/

$ for p in __builtin_memcpy __inline_memcpy __memcpy memcpy ; do echo
[ $p ] ; LC_ALL=C grep $p -nr ./ | grep -v "Binary file" | grep string
| sort ; echo "" ; done

[ __builtin_memcpy ]
./boot/string.h:19:#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
./include/asm/string_32.h:182:#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
./include/asm/string_64.h:43: __ret =
__builtin_memcpy((dst), (src), __len); \

[ __inline_memcpy ]
./include/asm/string_64.h:52:#define memcpy(dst, src, len)
__inline_memcpy((dst), (src), (len))
./include/asm/string_64.h:9:static __always_inline void
*__inline_memcpy(void *to, const void *from, size_t n)

[ __memcpy ]
./include/asm/string_32.h:161:static inline void *__memcpy3d(void *to,
const void *from, size_t len)
./include/asm/string_32.h:164: return __memcpy(to, from, len);
./include/asm/string_32.h:171: : __memcpy3d((t), (f), (n)))
./include/asm/string_32.h:187: : __memcpy((t), (f), (n)))
./include/asm/string_32.h:194:#define memcpy(t, f, n) __memcpy((t), (f), (n))
./include/asm/string_32.h:32:static __always_inline void
*__memcpy(void *to, const void *from, size_t n)
./include/asm/string_64.h:30:extern void *__memcpy(void *to, const
void *from, size_t len);
./include/asm/string_64.h:41: __ret = __memcpy((dst), (src),
__len); \
./include/asm/string_64.h:77:#define memcpy(dst, src, len)
__memcpy(dst, src, len)

[ memcpy ]
./boot/compressed/string.c:19:static inline void *memcpy(void *dest,
const void *src, size_t n)
./boot/compressed/string.c:29:void *memcpy(void *dest, const void
*src, size_t n)
./boot/compressed/string.c:4:void *memcpy(void *dest, const void *src, size_t n)
./boot/string.h:15: * do "undef memcpy" in .c file and link against
right string.c
./boot/string.h:19:#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
./boot/string.h:5:#undef memcpy
./boot/string.h:9:void *memcpy(void *dst, const void *src, size_t len);
./include/asm/string_32.h:154:static inline void
*__constant_memcpy3d(void *to, const void *from, size_t len)
./include/asm/string_32.h:157: return __constant_memcpy(to, from, len);
./include/asm/string_32.h:158: return _mmx_memcpy(to, from, len);
./include/asm/string_32.h:161:static inline void *__memcpy3d(void *to,
const void *from, size_t len)
./include/asm/string_32.h:164: return __memcpy(to, from, len);
./include/asm/string_32.h:165: return _mmx_memcpy(to, from, len);
./include/asm/string_32.h:168:#define memcpy(t, f, n)
\
./include/asm/string_32.h:170: ? __constant_memcpy3d((t), (f), (n)) \
./include/asm/string_32.h:171: : __memcpy3d((t), (f), (n)))
./include/asm/string_32.h:182:#define memcpy(t, f, n) __builtin_memcpy(t, f, n)
./include/asm/string_32.h:184:#define memcpy(t, f, n)
\
./include/asm/string_32.h:186: ? __constant_memcpy((t), (f), (n)) \
./include/asm/string_32.h:187: : __memcpy((t), (f), (n)))
./include/asm/string_32.h:194:#define memcpy(t, f, n) __memcpy((t), (f), (n))
./include/asm/string_32.h:32:static __always_inline void
*__memcpy(void *to, const void *from, size_t n)
./include/asm/string_32.h:51:static __always_inline void
*__constant_memcpy(void *to, const void *from,
./include/asm/string_64.h:30:extern void *__memcpy(void *to, const
void *from, size_t len);
./include/asm/string_64.h:34:extern void *memcpy(void *to, const void
*from, size_t len);
./include/asm/string_64.h:36:#define memcpy(dst, src, len)
\
./include/asm/string_64.h:41: __ret = __memcpy((dst), (src),
__len); \
./include/asm/string_64.h:43: __ret =
__builtin_memcpy((dst), (src), __len); \
./include/asm/string_64.h:52:#define memcpy(dst, src, len)
__inline_memcpy((dst), (src), (len))
./include/asm/string_64.h:76:#undef memcpy
./include/asm/string_64.h:77:#define memcpy(dst, src, len)
__memcpy(dst, src, len)
./include/asm/string_64.h:9:static __always_inline void
*__inline_memcpy(void *to, const void *from, size_t n)
./lib/memcpy_64.S:44: * memcpy_erms() - enhanced fast string memcpy.
This is faster and

$ grep ^#undef -nr arch/x86 | egrep 'memcpy|memcmp|memset' | sort
arch/x86/boot/compressed/misc.c:101:#undef memcpy
arch/x86/boot/compressed/misc.c:108:#undef memset
arch/x86/boot/string.h:5:#undef memcpy
arch/x86/boot/string.h:6:#undef memset
arch/x86/boot/string.h:7:#undef memcmp
arch/x86/include/asm/string_64.h:76:#undef memcpy
arch/x86/kernel/x8664_ksyms_64.c:49:#undef memcpy
arch/x86/kernel/x8664_ksyms_64.c:50:#undef memset
arch/x86/lib/memcpy_32.c:4:#undef memcpy
arch/x86/lib/memcpy_32.c:5:#undef memset

[ / INVESTIGATIONS ]
From 81aa8c99b5cb439e97e8896319a29d8d25b916e3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Jan-Simon=20M=C3=B6ller?= <dl9pf@xxxxxx>
Date: Wed, 14 Nov 2012 12:46:46 +0100
Subject: [PATCH 22/39] x86: LLVMLinux: Add own versions of memcpy and memset
for compilation with clang
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When using clang to compile the kernel, things like memcpy and memset need to
be provided. With clang these are normally provided by the LLVM compiler-rt
library which isn't used with the Linux kernel.

Author: PaX Team <pageexec@xxxxxxxxxxx>
ML-Post: http://lists.cs.uiuc.edu/pipermail/llvm-commits/Week-of-Mon-20120507/142707.html
URL: http://llvm.linuxfoundation.org
Merge: Jan-Simon MÃller <dl9pf@xxxxxx>
Signed-off-by: Jan-Simon MÃller <dl9pf@xxxxxx>

---
arch/x86/boot/boot.h | 36 +++++++++++++++++++++++++++++++++---
arch/x86/boot/compressed/misc.c | 8 ++++----
arch/x86/boot/memory.c | 2 +-
arch/x86/boot/string.c | 6 +++---
4 files changed, 41 insertions(+), 11 deletions(-)

diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h
index 5b75319..e02f05d 100644
--- a/arch/x86/boot/boot.h
+++ b/arch/x86/boot/boot.h
@@ -30,6 +30,12 @@
#include <asm/processor-flags.h>
#include "ctype.h"

+#ifdef CONFIG_X86_32
+#define asmlinkage __attribute__((regparm(0)))
+#else
+#define asmlinkage
+#endif
+
/* Useful macros */
#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)]))

@@ -229,12 +235,36 @@ void copy_to_fs(addr_t dst, void *src, size_t len);
void *copy_from_fs(void *dst, addr_t src, size_t len);
void copy_to_gs(addr_t dst, void *src, size_t len);
void *copy_from_gs(void *dst, addr_t src, size_t len);
+
+#ifndef __clang__
void *memcpy(void *dst, void *src, size_t len);
void *memset(void *dst, int c, size_t len);

#define memcpy(d,s,l) __builtin_memcpy(d,s,l)
#define memset(d,c,l) __builtin_memset(d,c,l)

+#else
+static inline void *memcpy(void *d, const void *s, size_t l)
+{
+ int d0, d1, d2;
+ asm volatile("rep ; addr32 movsb\n\t"
+ : "=&c" (d0), "=&D" (d1), "=&S" (d2)
+ : "0" (l), "1" ((long)d), "2" ((long)s)
+ : "memory");
+ return d;
+}
+
+static inline void *memset(void *d, char c, size_t l)
+{
+ int d0, d1;
+ asm volatile("rep ; addr32 stosb\n\t"
+ : "=&c" (d0), "=&D" (d1)
+ : "0" (l), "1" (d), "a" (c)
+ : "memory");
+ return d;
+}
+#endif
+
/* a20.c */
int enable_a20(void);

@@ -350,9 +380,9 @@ int printf(const char *fmt, ...);
void initregs(struct biosregs *regs);

/* string.c */
-int strcmp(const char *str1, const char *str2);
-int strncmp(const char *cs, const char *ct, size_t count);
-size_t strnlen(const char *s, size_t maxlen);
+asmlinkage int strcmp(const char *str1, const char *str2);
+asmlinkage int strncmp(const char *cs, const char *ct, size_t count);
+asmlinkage size_t strnlen(const char *s, size_t maxlen);
unsigned int atou(const char *s);
unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base);

diff --git a/arch/x86/boot/compressed/misc.c b/arch/x86/boot/compressed/misc.c
index 0319c88..de331e5 100644
--- a/arch/x86/boot/compressed/misc.c
+++ b/arch/x86/boot/compressed/misc.c
@@ -109,8 +109,8 @@ static void error(char *m);
*/
struct boot_params *real_mode; /* Pointer to real-mode data */

-void *memset(void *s, int c, size_t n);
-void *memcpy(void *dest, const void *src, size_t n);
+asmlinkage void *memset(void *s, int c, size_t n);
+asmlinkage void *memcpy(void *dest, const void *src, size_t n);

#ifdef CONFIG_X86_64
#define memptr long
@@ -222,7 +222,7 @@ void __putstr(const char *s)
outb(0xff & (pos >> 1), vidport+1);
}

-void *memset(void *s, int c, size_t n)
+asmlinkage void *memset(void *s, int c, size_t n)
{
int i;
char *ss = s;
@@ -232,7 +232,7 @@ void *memset(void *s, int c, size_t n)
return s;
}
#ifdef CONFIG_X86_32
-void *memcpy(void *dest, const void *src, size_t n)
+asmlinkage void *memcpy(void *dest, const void *src, size_t n)
{
int d0, d1, d2;
asm volatile(
diff --git a/arch/x86/boot/memory.c b/arch/x86/boot/memory.c
index db75d07..65607e5 100644
--- a/arch/x86/boot/memory.c
+++ b/arch/x86/boot/memory.c
@@ -64,7 +64,7 @@ static int detect_memory_e820(void)
break;
}

- *desc++ = buf;
+ memcpy(desc++, &buf, sizeof(buf));
count++;
} while (ireg.ebx && count < ARRAY_SIZE(boot_params.e820_map));

diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c
index 574dedf..476ddea 100644
--- a/arch/x86/boot/string.c
+++ b/arch/x86/boot/string.c
@@ -14,7 +14,7 @@

#include "boot.h"

-int strcmp(const char *str1, const char *str2)
+asmlinkage int strcmp(const char *str1, const char *str2)
{
const unsigned char *s1 = (const unsigned char *)str1;
const unsigned char *s2 = (const unsigned char *)str2;
@@ -30,7 +30,7 @@ int strcmp(const char *str1, const char *str2)
return 0;
}

-int strncmp(const char *cs, const char *ct, size_t count)
+asmlinkage int strncmp(const char *cs, const char *ct, size_t count)
{
unsigned char c1, c2;

@@ -46,7 +46,7 @@ int strncmp(const char *cs, const char *ct, size_t count)
return 0;
}

-size_t strnlen(const char *s, size_t maxlen)
+asmlinkage size_t strnlen(const char *s, size_t maxlen)
{
const char *es = s;
while (*es && maxlen) {
--
1.8.1.2