Re: [PATCH] x86: combine memmove FSRM and ERMS alternatives

From: Borislav Petkov
Date: Sun Jan 15 2023 - 18:49:58 EST


On Sat, Jan 14, 2023 at 09:49:01PM +0100, Borislav Petkov wrote:
> The more and more I think about it, the more I like the copy_user_generic() idea
> but lemme see how ugly it gets...

Something like this. It doesn't build yet but it is supposed to show what I mean
more concretely.

I definitely like the removal of the grafted ALTERNATIVEs in the asm code...

---

diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 888731ccf1f6..0b48ebd74e2a 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -76,8 +76,17 @@ void *__msan_memmove(void *dest, const void *src, size_t len);
#define memmove __msan_memmove
#else
void *memmove(void *dest, const void *src, size_t count);
+void *__memmove_fsrm(void *dest, const void *src, size_t count);
+void *__memmove_erms(void *dest, void *src, size_t count);
#endif
-void *__memmove(void *dest, const void *src, size_t count);
+static __always_inline void *__memmove(void *dest, const void *src, size_t count)
+{
+ alternative_call_2(__memmove_fsrm,
+ __memmove_erms, ALT_NOT(X86_FEATURE_FSRM),
+ ___memmove, ALT_NOT(X86_FEATURE_ERMS));
+
+ return dest;
+}

int memcmp(const void *cs, const void *ct, size_t count);
size_t strlen(const char *s);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index 4f1a40a86534..039139fa5228 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -70,7 +70,7 @@ ifneq ($(CONFIG_GENERIC_CSUM),y)
lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o
endif
lib-y += clear_page_64.o copy_page_64.o
- lib-y += memmove_64.o memset_64.o
+ lib-y += memmove_64.o memmove.o memset_64.o
lib-y += copy_user_64.o
lib-y += cmpxchg16b_emu.o
endif
diff --git a/arch/x86/lib/memmove.c b/arch/x86/lib/memmove.c
new file mode 100644
index 000000000000..70dbf85dbd35
--- /dev/null
+++ b/arch/x86/lib/memmove.c
@@ -0,0 +1,18 @@
+void *__memmove_fsrm(void *dest, const void *src, size_t count)
+{
+ if (dest < src)
+ return __memmove(dest, src, count);
+
+ asm volatile("rep; movsb\n\t"
+ : "+D" (dest), "+S" (src), "c" (count));
+
+ return dest;
+}
+
+void *__memmove_erms(void *dest, void *src, size_t count)
+{
+ if (size < 32)
+ return __memmove(dest, src, count);
+
+ return __memmove_fsrm(dest, src, count);
+}
diff --git a/arch/x86/lib/memmove_64.S b/arch/x86/lib/memmove_64.S
index 02661861e5dd..81a89bd146a1 100644
--- a/arch/x86/lib/memmove_64.S
+++ b/arch/x86/lib/memmove_64.S
@@ -26,9 +26,12 @@
* Output:
* rax: dest
*/
-SYM_FUNC_START(__memmove)
+SYM_FUNC_START(___memmove)

+ /* Handle more 32 bytes in loop */
mov %rdi, %rax
+ cmp $0x20, %rdx
+ jb 1f

/* Decide forward/backward copy mode */
cmp %rdi, %rsi
@@ -38,10 +41,7 @@ SYM_FUNC_START(__memmove)
cmp %rdi, %r8
jg 2f

- /* FSRM implies ERMS => no length checks, do the copy directly */
.Lmemmove_begin_forward:
- ALTERNATIVE "cmp $0x20, %rdx; jb 1f", "", X86_FEATURE_FSRM
- ALTERNATIVE "", "jmp .Lmemmove_erms", X86_FEATURE_ERMS

/*
* movsq instruction have many startup latency
@@ -207,13 +207,5 @@ SYM_FUNC_START(__memmove)
movb %r11b, (%rdi)
13:
RET
-
-.Lmemmove_erms:
- movq %rdx, %rcx
- rep movsb
- RET
-SYM_FUNC_END(__memmove)
-EXPORT_SYMBOL(__memmove)
-
-SYM_FUNC_ALIAS(memmove, __memmove)
-EXPORT_SYMBOL(memmove)
+SYM_FUNC_END(___memmove)
+EXPORT_SYMBOL(___memmove)

--
Regards/Gruss,
Boris.

https://people.kernel.org/tglx/notes-about-netiquette