Re: [PATCH] Convert filldir[64]() from __put_user() to unsafe_put_user()

From: Linus Torvalds
Date: Tue Oct 08 2019 - 00:09:37 EST


On Mon, Oct 7, 2019 at 8:29 PM Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote:
>
> For x86? Sure, why not... Note, BTW, that for short constant-sized
> copies we *do* STAC/CLAC at the call site - see those
> __uaccess_begin_nospec();
> in raw_copy_{from,to}_user() in the switches...

Yeah, an that code almost never actually triggers in practice. The
code is pointless and dead.

The thing is, it's only ever used for the double undescore versions,
and the ones that do have have it are almost never constant sizes in
the first place.

And yes, there's like a couple of cases in the whole kernel.

Just remove those constant size cases. They are pointless and just
complicate our headers and slow down the compile for no good reason.

Try the attached patch, and then count the number of "rorx"
instructions in the kernel. Hint: not many. On my personal config,
this triggers 15 times in the whole kernel build (not counting
modules).

It's not worth it. The "speedup" from using __copy_{to,from}_user()
with the fancy inlining is negligible. All the cost is in the
STAC/CLAC anyway, the code might as well be deleted.

> 1) cross-architecture user_access_begin_dont_use(): on everything
> except x86 it's empty, on x86 - __uaccess_begin_nospec().

No, just do a proper range check, and use user_access_begin()

Stop trying to optimize that range check away. It's a couple of fast
instructions.

The only ones who don't want the range check are the actual kernel
copy ones, but they don't want the user_access_begin() either.

> void *copy_mount_options(const void __user * data)
> {
> unsigned offs, size;
> char *copy;
>
> if (!data)
> return NULL;
>
> copy = kmalloc(PAGE_SIZE, GFP_KERNEL);
> if (!copy)
> return ERR_PTR(-ENOMEM);
>
> offs = (unsigned long)untagged_addr(data) & (PAGE_SIZE - 1);
>
> if (copy_from_user(copy, data, PAGE_SIZE - offs)) {
> kfree(copy);
> return ERR_PTR(-EFAULT);
> }
> if (offs) {
> if (copy_from_user(copy, data + PAGE_SIZE - offs, offs))
> memset(copy + PAGE_SIZE - offs, 0, offs);
> }
> return copy;
> }
>
> on the theory that any fault halfway through a page means a race with
> munmap/mprotect/etc. and we can just pretend we'd lost the race entirely.
> And to hell with exact_copy_from_user(), byte-by-byte copying, etc.

Looks reasonable.

Linus
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index 5cd1caa8bc65..db58c4436ce3 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -62,6 +62,8 @@ copy_to_user_mcsafe(void *to, const void *from, unsigned len)
return ret;
}

+#define marker(x) asm volatile("rorx $" #x ",%rax,%rdx")
+
static __always_inline __must_check unsigned long
raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
{
@@ -72,30 +74,35 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
switch (size) {
case 1:
__uaccess_begin_nospec();
+ marker(1);
__get_user_asm_nozero(*(u8 *)dst, (u8 __user *)src,
ret, "b", "b", "=q", 1);
__uaccess_end();
return ret;
case 2:
__uaccess_begin_nospec();
+ marker(2);
__get_user_asm_nozero(*(u16 *)dst, (u16 __user *)src,
ret, "w", "w", "=r", 2);
__uaccess_end();
return ret;
case 4:
__uaccess_begin_nospec();
+ marker(4);
__get_user_asm_nozero(*(u32 *)dst, (u32 __user *)src,
ret, "l", "k", "=r", 4);
__uaccess_end();
return ret;
case 8:
__uaccess_begin_nospec();
+ marker(8);
__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 8);
__uaccess_end();
return ret;
case 10:
__uaccess_begin_nospec();
+ marker(10);
__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 10);
if (likely(!ret))
@@ -106,6 +113,7 @@ raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
return ret;
case 16:
__uaccess_begin_nospec();
+ marker(16);
__get_user_asm_nozero(*(u64 *)dst, (u64 __user *)src,
ret, "q", "", "=r", 16);
if (likely(!ret))
@@ -129,30 +137,35 @@ raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
switch (size) {
case 1:
__uaccess_begin();
+ marker(51);
__put_user_asm(*(u8 *)src, (u8 __user *)dst,
ret, "b", "b", "iq", 1);
__uaccess_end();
return ret;
case 2:
__uaccess_begin();
+ marker(52);
__put_user_asm(*(u16 *)src, (u16 __user *)dst,
ret, "w", "w", "ir", 2);
__uaccess_end();
return ret;
case 4:
__uaccess_begin();
+ marker(54);
__put_user_asm(*(u32 *)src, (u32 __user *)dst,
ret, "l", "k", "ir", 4);
__uaccess_end();
return ret;
case 8:
__uaccess_begin();
+ marker(58);
__put_user_asm(*(u64 *)src, (u64 __user *)dst,
ret, "q", "", "er", 8);
__uaccess_end();
return ret;
case 10:
__uaccess_begin();
+ marker(60);
__put_user_asm(*(u64 *)src, (u64 __user *)dst,
ret, "q", "", "er", 10);
if (likely(!ret)) {
@@ -164,6 +177,7 @@ raw_copy_to_user(void __user *dst, const void *src, unsigned long size)
return ret;
case 16:
__uaccess_begin();
+ marker(66);
__put_user_asm(*(u64 *)src, (u64 __user *)dst,
ret, "q", "", "er", 16);
if (likely(!ret)) {