Re: [PATCH] x86/uaccess: small optimization in unsafe_copy_to_user()

From: Eric Dumazet
Date: Fri Apr 16 2021 - 16:11:42 EST


On Fri, Apr 16, 2021 at 9:44 PM Al Viro <viro@xxxxxxxxxxxxxxxxxx> wrote:
>
> On Fri, Apr 16, 2021 at 12:24:13PM -0700, Eric Dumazet wrote:
> > From: Eric Dumazet <edumazet@xxxxxxxxxx>
> >
> > We have to loop only to copy u64 values.
> > After this first loop, we copy at most one u32, one u16 and one byte.
>
> Does it actually yield a better code?
>

Yes, my patch gives a better code, on actual kernel use-case

(net-next tree, look at put_cmsg())

5ca: 48 89 0f mov %rcx,(%rdi)
5cd: 89 77 08 mov %esi,0x8(%rdi)
5d0: 89 57 0c mov %edx,0xc(%rdi)
5d3: 48 83 c7 10 add $0x10,%rdi
5d7: 48 83 c1 f0 add $0xfffffffffffffff0,%rcx
5db: 48 83 f9 07 cmp $0x7,%rcx
5df: 76 40 jbe 621 <put_cmsg+0x111>
5e1: 66 66 66 66 66 66 2e data16 data16 data16 data16 data16 nopw
%cs:0x0(%rax,%rax,1)
5e8: 0f 1f 84 00 00 00 00
5ef: 00
5f0: 49 8b 10 mov (%r8),%rdx
5f3: 48 89 17 mov %rdx,(%rdi)
5f6: 48 83 c7 08 add $0x8,%rdi
5fa: 49 83 c0 08 add $0x8,%r8
5fe: 48 83 c1 f8 add $0xfffffffffffffff8,%rcx
602: 48 83 f9 07 cmp $0x7,%rcx
606: 77 e8 ja 5f0 <put_cmsg+0xe0>
608: eb 17 jmp 621 <put_cmsg+0x111>
60a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
610: 41 8b 10 mov (%r8),%edx
613: 89 17 mov %edx,(%rdi)
615: 48 83 c7 04 add $0x4,%rdi
619: 49 83 c0 04 add $0x4,%r8
61d: 48 83 c1 fc add $0xfffffffffffffffc,%rcx
621: 48 83 f9 03 cmp $0x3,%rcx
625: 77 e9 ja 610 <put_cmsg+0x100>
627: eb 1a jmp 643 <put_cmsg+0x133>
629: 0f 1f 80 00 00 00 00 nopl 0x0(%rax)
630: 41 0f b7 10 movzwl (%r8),%edx
634: 66 89 17 mov %dx,(%rdi)
637: 48 83 c7 02 add $0x2,%rdi
63b: 49 83 c0 02 add $0x2,%r8
63f: 48 83 c1 fe add $0xfffffffffffffffe,%rcx
643: 48 83 f9 01 cmp $0x1,%rcx
647: 77 e7 ja 630 <put_cmsg+0x120>
649: eb 15 jmp 660 <put_cmsg+0x150>
64b: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1)
650: 41 0f b6 08 movzbl (%r8),%ecx
654: 88 0f mov %cl,(%rdi)
656: 48 83 c7 01 add $0x1,%rdi
65a: 49 83 c0 01 add $0x1,%r8
65e: 31 c9 xor %ecx,%ecx
660: 48 85 c9 test %rcx,%rcx
663: 75 eb jne 650 <put_cmsg+0x140>


> FWIW, this
> void bar(unsigned);
> void foo(unsigned n)
> {
> while (n >= 8) {
> bar(n);
> n -= 8;
> }
> while (n >= 4) {
> bar(n);
> n -= 4;
> }
> while (n >= 2) {
> bar(n);
> n -= 2;
> }
> while (n >= 1) {
> bar(n);
> n -= 1;
> }
> }
>
> will compile (with -O2) to
> pushq %rbp
> pushq %rbx
> movl %edi, %ebx
> subq $8, %rsp
> cmpl $7, %edi
> jbe .L2
> movl %edi, %ebp
> .L3:
> movl %ebp, %edi
> subl $8, %ebp
> call bar@PLT
> cmpl $7, %ebp
> ja .L3
> andl $7, %ebx
> .L2:
> cmpl $3, %ebx
> jbe .L4
> movl %ebx, %edi
> andl $3, %ebx
> call bar@PLT
> .L4:
> cmpl $1, %ebx
> jbe .L5
> movl %ebx, %edi
> andl $1, %ebx
> call bar@PLT
> .L5:
> testl %ebx, %ebx
> je .L1
> addq $8, %rsp
> movl $1, %edi
> popq %rbx
> popq %rbp
> jmp bar@PLT
> .L1:
> addq $8, %rsp
> popq %rbx
> popq %rbp
> ret
>
> i.e. loop + if + if + if...