Re: [BUG] x86: failed to boot a kernel on a Ryzen machine

From: Satoru Takeuchi
Date: Mon Apr 24 2017 - 07:27:32 EST


At Mon, 24 Apr 2017 15:58:05 +0900,
Satoru Takeuchi wrote:
>
> [1 <text/plain; US-ASCII (7bit)>]
> Recently I bought a new Ryzen machine. When I tried to test v4.11-rc8 on it, it failed to boot
> with the following panic log.
>
> ```
> ...
> [ 0.227720] raid6: sse2x1 gen() 7985 MB/s
> [ 0.295709] raid6: sse2x1 xor() 8181 MB/s
> [ 0.363706] raid6: sse2x2 gen() 17531 MB/s
> [ 0.431699] raid6: sse2x2 xor() 11098 MB/s
> [ 0.499693] raid6: sse2x4 gen() 18509 MB/s
> [ 0.567688] raid6: sse2x4 xor() 10177 MB/s
> [ 0.571692] invalid opcode: 0000 [#1] SMP
> [ 0.572312] Modules linked in:
> [ 0.572822] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 4.11.0-rc8-ktest #1
> [ 0.573734] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Ubuntu-1.8.2-1ubuntu1 04/01/2014
> [ 0.575040] task: ffff8f791e1c0000 task.stack: ffff9c72c00d0000
> [ 0.575865] RIP: 0010:raid6_avx21_gen_syndrome+0x3d/0x120
> [ 0.576634] RSP: 0018:ffff9c72c00d3d70 EFLAGS: 00010246
> [ 0.577376] RAX: 0000000000000000 RBX: ffff9c72c00d3dc0 RCX: 00000000fffedb97
> [ 0.578327] RDX: 0000000000000000 RSI: 0000000000001000 RDI: 0000000000000012
> [ 0.579283] RBP: ffff9c72c00d3da0 R08: 0000000000000000 R09: 00000000000000cd
> [ 0.580243] R10: 00000000fffedb86 R11: ffffffffa617008d R12: 0000000000001000
> [ 0.581211] R13: ffff8f791e39e000 R14: ffff8f791e39f000 R15: 0000000000000012
> [ 0.582163] FS: 0000000000000000(0000) GS:ffff8f791fc00000(0000) knlGS:0000000000000000
> [ 0.583324] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033
> [ 0.584128] CR2: 0000000000000000 CR3: 000000001be09000 CR4: 00000000003006f0
> [ 0.585078] Call Trace:
> [ 0.594952] raid6_select_algo+0x116/0x30b
> [ 0.595592] ? libcrc32c_mod_init+0x2b/0x2b
> [ 0.596240] do_one_initcall+0x53/0x1a0
> [ 0.596843] ? parse_args+0x2cf/0x490
> [ 0.597421] kernel_init_freeable+0x182/0x21c
> [ 0.598077] ? rest_init+0x80/0x80
> [ 0.598626] kernel_init+0xe/0x100
> [ 0.599175] ret_from_fork+0x2c/0x40
> [ 0.599741] Code: 55 41 54 53 48 89 d3 48 8d 14 c5 00 00 00 00 41 89 ff 49 89 f4 48 83 ec 08 4c 8b 2c c3 4c 8b 74 13 08 48 89 55 d0 e8 53 ed a9 ff <c5> fd 6f 05 2b 2d 4e 00 c5 e5 ef db 4d 85 e4 48 8b 55 d0 0f 84
> [ 0.602215] RIP: raid6_avx21_gen_syndrome+0x3d/0x120 RSP: ffff9c72c00d3d70
> [ 0.603154] ---[ end trace 17ee01f86b8fc548 ]---
> [ 0.603850] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> [ 0.603850]
> [ 0.605276] ---[ end Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b
> ...
> ```
>
> This panic occured at lib/raid6/avx2.c#raid6_avx21_gen_syndrome() and this
> function consists of many AVX instructions.
>
> lib/raid6/avx2.c:
> ```
> /*
> * Plain AVX2 implementation
> */
> static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
> {
> u8 **dptr = (u8 **)ptrs;
> u8 *p, *q;
> int d, z, z0;
>
> z0 = disks - 3; /* Highest data disk */
> p = dptr[z0+1]; /* XOR parity */
> q = dptr[z0+2]; /* RS syndrome */
>
> kernel_fpu_begin();
>
> asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
> asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */
>
> for (d = 0; d < bytes; d += 32) {
> asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
> asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
> asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
> asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
> asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
> for (z = z0-2; z >= 0; z--) {
> asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
> asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
> asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
> asm volatile("vpand %ymm0,%ymm5,%ymm5");
> asm volatile("vpxor %ymm5,%ymm4,%ymm4");
> asm volatile("vpxor %ymm6,%ymm2,%ymm2");
> asm volatile("vpxor %ymm6,%ymm4,%ymm4");
> asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
> }
> asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
> asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
> asm volatile("vpand %ymm0,%ymm5,%ymm5");
> asm volatile("vpxor %ymm5,%ymm4,%ymm4");
> asm volatile("vpxor %ymm6,%ymm2,%ymm2");
> asm volatile("vpxor %ymm6,%ymm4,%ymm4");
>
> asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
> asm volatile("vpxor %ymm2,%ymm2,%ymm2");
> asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
> asm volatile("vpxor %ymm4,%ymm4,%ymm4");
> }
>
> asm volatile("sfence" : : : "memory");
> kernel_fpu_end();
> }
>
> ```
>
> This problem can be bypassed by the following patch.
>
> ```
> diff --git a/arch/x86/Makefile b/arch/x86/Makefile
> index 2d44933..b589097 100644
> --- a/arch/x86/Makefile
> +++ b/arch/x86/Makefile
> @@ -162,7 +162,7 @@ asinstr := $(call as-instr,fxsaveq (%rax),-DCONFIG_AS_FXSAVEQ=1)
> asinstr += $(call as-instr,pshufb %xmm0$(comma)%xmm0,-DCONFIG_AS_SSSE3=1)
> asinstr += $(call as-instr,crc32l %eax$(comma)%eax,-DCONFIG_AS_CRC32=1)
> avx_instr := $(call as-instr,vxorps %ymm0$(comma)%ymm1$(comma)%ymm2,-DCONFIG_AS_AVX=1)
> -avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
> +#avx2_instr :=$(call as-instr,vpbroadcastb %xmm0$(comma)%ymm1,-DCONFIG_AS_AVX2=1)
> avx512_instr :=$(call as-instr,vpmovm2b %k1$(comma)%zmm5,-DCONFIG_AS_AVX512=1)
> sha1_ni_instr :=$(call as-instr,sha1msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA1_NI=1)
> sha256_ni_instr :=$(call as-instr,sha256msg1 %xmm0$(comma)%xmm1,-DCONFIG_AS_SHA256_NI=1)
>
> ```
>
> I suspect that there is a problem about handling AVX2 in the linux kernel or in Ryzen.
>
> # Build environent
>
> ## Hardware
>
> CPU: Ryzen 1800x
>
> ## Software
>
> - distro: Ubuntu 16.04 x86_64
> - gcc: gcc (Ubuntu 5.4.0-6ubuntu1~16.04.4) 5.4.0 20160609
>
> ## Build target kernel version
>
> linux v4.11-r8
>
> ## Test environment
>
> ## Hardware
>
> A VM running on the above mentioned hardware.
>
> ## Software
>
> - distro: Ubuntu 16.04
>
> # Additional information
>
> - .config is attached in this mail
> - This problam also happens on v4.10
> - When I tested v4.10 in my previous Core i5 machine, it succeeded to boot.

Add "kvm@xxxxxxxxxxxxxxx" to this thread.

Here is the more additional information. I suspect that this is a problem
related to virtualization.

I ran the following program simulates raid6_avx21_gen_syndrome() in kernel
both on the host and on the guest.

```
#include <stdio.h>
#include <stdlib.h>
#include <errno.h>
#include <err.h>

#define PAGESIZE 4096
#define DISKS 4

typedef unsigned char u8;
typedef unsigned long u64;

static const struct raid6_avx2_constants {
u64 x1d[4];
} raid6_avx2_constants __attribute__((aligned(32))) = {
{ 0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,
0x1d1d1d1d1d1d1d1dULL, 0x1d1d1d1d1d1d1d1dULL,},
};

static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs)
{
u8 **dptr = (u8 **)ptrs;
u8 *p, *q;
int d, z, z0;

z0 = disks - 3; /* Highest data disk */
p = dptr[z0+1]; /* XOR parity */
q = dptr[z0+2]; /* RS syndrome */

asm volatile("vmovdqa %0,%%ymm0" : : "m" (raid6_avx2_constants.x1d[0]));
asm volatile("vpxor %ymm3,%ymm3,%ymm3"); /* Zero temp */

for (d = 0; d < bytes; d += 32) {
asm volatile("prefetchnta %0" : : "m" (dptr[z0][d]));
asm volatile("vmovdqa %0,%%ymm2" : : "m" (dptr[z0][d]));/* P[0] */
asm volatile("prefetchnta %0" : : "m" (dptr[z0-1][d]));
asm volatile("vmovdqa %ymm2,%ymm4");/* Q[0] */
asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z0-1][d]));
for (z = z0-2; z >= 0; z--) {
asm volatile("prefetchnta %0" : : "m" (dptr[z][d]));
asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
asm volatile("vpand %ymm0,%ymm5,%ymm5");
asm volatile("vpxor %ymm5,%ymm4,%ymm4");
asm volatile("vpxor %ymm6,%ymm2,%ymm2");
asm volatile("vpxor %ymm6,%ymm4,%ymm4");
asm volatile("vmovdqa %0,%%ymm6" : : "m" (dptr[z][d]));
}
asm volatile("vpcmpgtb %ymm4,%ymm3,%ymm5");
asm volatile("vpaddb %ymm4,%ymm4,%ymm4");
asm volatile("vpand %ymm0,%ymm5,%ymm5");
asm volatile("vpxor %ymm5,%ymm4,%ymm4");
asm volatile("vpxor %ymm6,%ymm2,%ymm2");
asm volatile("vpxor %ymm6,%ymm4,%ymm4");

asm volatile("vmovntdq %%ymm2,%0" : "=m" (p[d]));
asm volatile("vpxor %ymm2,%ymm2,%ymm2");
asm volatile("vmovntdq %%ymm4,%0" : "=m" (q[d]));
asm volatile("vpxor %ymm4,%ymm4,%ymm4");
}

asm volatile("sfence" : : : "memory");
}

static char *data[DISKS];

int main(void)
{
int i;
for (i = 0; i < DISKS; i++)
if ((errno = posix_memalign((void **)&data[i], PAGESIZE, PAGESIZE))) {
err(EXIT_FAILURE, "posix_memalign() failed");
}
raid6_avx21_gen_syndrome(DISKS, PAGESIZE, (void **)data);
exit(EXIT_SUCCESS);
}

```

```
$ cc -O2 -o test test.c
```

It succeeded on the host and failed with "Illegal Instruction" on the guest.
There is avx2 flag on /proc/cpuinfo of both the host and the guest.

Here is the result of disassemble of both the kernel code and the above mentioned
user program.

kernel:
```
ffffffff8159a7b0 <raid6_avx21_gen_syndrome>:
ffffffff8159a7b0: 8d 47 fd lea -0x3(%rdi),%eax
ffffffff8159a7b3: 55 push %rbp
ffffffff8159a7b4: 48 98 cltq
ffffffff8159a7b6: 48 89 e5 mov %rsp,%rbp
ffffffff8159a7b9: 41 57 push %r15
ffffffff8159a7bb: 48 83 c0 01 add $0x1,%rax
ffffffff8159a7bf: 41 56 push %r14
ffffffff8159a7c1: 41 55 push %r13
ffffffff8159a7c3: 41 54 push %r12
ffffffff8159a7c5: 53 push %rbx
ffffffff8159a7c6: 48 89 d3 mov %rdx,%rbx
ffffffff8159a7c9: 48 8d 14 c5 00 00 00 lea 0x0(,%rax,8),%rdx
ffffffff8159a7d0: 00
ffffffff8159a7d1: 41 89 ff mov %edi,%r15d
ffffffff8159a7d4: 49 89 f4 mov %rsi,%r12
ffffffff8159a7d7: 48 83 ec 08 sub $0x8,%rsp
ffffffff8159a7db: 4c 8b 2c c3 mov (%rbx,%rax,8),%r13
ffffffff8159a7df: 4c 8b 74 13 08 mov 0x8(%rbx,%rdx,1),%r14
ffffffff8159a7e4: 48 89 55 d0 mov %rdx,-0x30(%rbp)
ffffffff8159a7e8: e8 53 ed a9 ff callq ffffffff81039540 <kernel_fpu_begin>
ffffffff8159a7ed: c5 fd 6f 05 2b 2d 4e vmovdqa 0x4e2d2b(%rip),%ymm0 # ffffffff81a7d520 <raid6_avx2_constants>
ffffffff8159a7f4: 00
ffffffff8159a7f5: c5 e5 ef db vpxor %ymm3,%ymm3,%ymm3
ffffffff8159a7f9: 4d 85 e4 test %r12,%r12
ffffffff8159a7fc: 48 8b 55 d0 mov -0x30(%rbp),%rdx
ffffffff8159a800: 0f 84 a7 00 00 00 je ffffffff8159a8ad <raid6_avx21_gen_syndrome+0xfd>
ffffffff8159a806: 45 8d 47 fb lea -0x5(%r15),%r8d
ffffffff8159a80a: 4c 8d 4c 13 f8 lea -0x8(%rbx,%rdx,1),%r9
ffffffff8159a80f: 48 8d 74 13 f0 lea -0x10(%rbx,%rdx,1),%rsi
ffffffff8159a814: 31 ff xor %edi,%edi
ffffffff8159a816: 31 d2 xor %edx,%edx
ffffffff8159a818: 48 89 f8 mov %rdi,%rax
ffffffff8159a81b: 49 03 01 add (%r9),%rax
ffffffff8159a81e: 0f 18 00 prefetchnta (%rax)
ffffffff8159a821: c5 fd 6f 10 vmovdqa (%rax),%ymm2
ffffffff8159a825: 48 89 f8 mov %rdi,%rax
ffffffff8159a828: 48 03 06 add (%rsi),%rax
ffffffff8159a82b: 0f 18 00 prefetchnta (%rax)
ffffffff8159a82e: c5 fd 6f e2 vmovdqa %ymm2,%ymm4
ffffffff8159a832: c5 fd 6f 30 vmovdqa (%rax),%ymm6
ffffffff8159a836: 45 85 c0 test %r8d,%r8d
ffffffff8159a839: 78 36 js ffffffff8159a871 <raid6_avx21_gen_syndrome+0xc1>
ffffffff8159a83b: 44 89 c0 mov %r8d,%eax
ffffffff8159a83e: 48 63 c8 movslq %eax,%rcx
ffffffff8159a841: 49 89 fa mov %rdi,%r10
ffffffff8159a844: 4c 03 14 cb add (%rbx,%rcx,8),%r10
ffffffff8159a848: 41 0f 18 02 prefetchnta (%r10)
ffffffff8159a84c: c5 e5 64 ec vpcmpgtb %ymm4,%ymm3,%ymm5
ffffffff8159a850: c5 dd fc e4 vpaddb %ymm4,%ymm4,%ymm4
ffffffff8159a854: c5 d5 db e8 vpand %ymm0,%ymm5,%ymm5
ffffffff8159a858: c5 dd ef e5 vpxor %ymm5,%ymm4,%ymm4
ffffffff8159a85c: c5 ed ef d6 vpxor %ymm6,%ymm2,%ymm2
ffffffff8159a860: c5 dd ef e6 vpxor %ymm6,%ymm4,%ymm4
ffffffff8159a864: c4 c1 7d 6f 32 vmovdqa (%r10),%ymm6
ffffffff8159a869: 83 e8 01 sub $0x1,%eax
ffffffff8159a86c: 83 f8 ff cmp $0xffffffff,%eax
ffffffff8159a86f: 75 cd jne ffffffff8159a83e <raid6_avx21_gen_syndrome+0x8e>
ffffffff8159a871: c5 e5 64 ec vpcmpgtb %ymm4,%ymm3,%ymm5
ffffffff8159a875: c5 dd fc e4 vpaddb %ymm4,%ymm4,%ymm4
ffffffff8159a879: c5 d5 db e8 vpand %ymm0,%ymm5,%ymm5
ffffffff8159a87d: c5 dd ef e5 vpxor %ymm5,%ymm4,%ymm4
ffffffff8159a881: c5 ed ef d6 vpxor %ymm6,%ymm2,%ymm2
ffffffff8159a885: c5 dd ef e6 vpxor %ymm6,%ymm4,%ymm4
ffffffff8159a889: c4 c1 7d e7 54 3d 00 vmovntdq %ymm2,0x0(%r13,%rdi,1)
ffffffff8159a890: c5 ed ef d2 vpxor %ymm2,%ymm2,%ymm2
ffffffff8159a894: c4 c1 7d e7 24 3e vmovntdq %ymm4,(%r14,%rdi,1)
ffffffff8159a89a: c5 dd ef e4 vpxor %ymm4,%ymm4,%ymm4
ffffffff8159a89e: 83 c2 20 add $0x20,%edx
ffffffff8159a8a1: 48 63 fa movslq %edx,%rdi
ffffffff8159a8a4: 4c 39 e7 cmp %r12,%rdi
ffffffff8159a8a7: 0f 82 6b ff ff ff jb ffffffff8159a818 <raid6_avx21_gen_syndrome+0x68>
ffffffff8159a8ad: 0f ae f8 sfence
ffffffff8159a8b0: e8 ab f2 a9 ff callq ffffffff81039b60 <kernel_fpu_end>
ffffffff8159a8b5: 48 83 c4 08 add $0x8,%rsp
ffffffff8159a8b9: 5b pop %rbx
ffffffff8159a8ba: 41 5c pop %r12
ffffffff8159a8bc: 41 5d pop %r13
ffffffff8159a8be: 41 5e pop %r14
ffffffff8159a8c0: 41 5f pop %r15
ffffffff8159a8c2: 5d pop %rbp
ffffffff8159a8c3: c3 retq
ffffffff8159a8c4: 66 90 xchg %ax,%ax
ffffffff8159a8c6: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
ffffffff8159a8cd: 00 00 00
```

user:
```
...
0000000000400510 <main>:
400510: 55 push %rbp
400511: 53 push %rbx
400512: bb 80 10 60 00 mov $0x601080,%ebx
400517: 48 83 ec 08 sub $0x8,%rsp
40051b: e8 90 ff ff ff callq 4004b0 <__errno_location@plt>
400520: 48 89 c5 mov %rax,%rbp
400523: ba 00 10 00 00 mov $0x1000,%edx
400528: be 00 10 00 00 mov $0x1000,%esi
40052d: 48 89 df mov %rbx,%rdi
400530: e8 bb ff ff ff callq 4004f0 <posix_memalign@plt>
400535: 85 c0 test %eax,%eax
400537: 74 17 je 400550 <main+0x40>
400539: 89 45 00 mov %eax,0x0(%rbp)
40053c: be 84 07 40 00 mov $0x400784,%esi
400541: bf 01 00 00 00 mov $0x1,%edi
400546: 31 c0 xor %eax,%eax
400548: e8 83 ff ff ff callq 4004d0 <err@plt>
40054d: 0f 1f 00 nopl (%rax)
400550: 48 83 c3 08 add $0x8,%rbx
400554: c7 45 00 00 00 00 00 movl $0x0,0x0(%rbp)
40055b: 48 81 fb a0 10 60 00 cmp $0x6010a0,%rbx
400562: 75 bf jne 400523 <main+0x13>
400564: 48 8b 35 25 0b 20 00 mov 0x200b25(%rip),%rsi # 601090 <data+0x10>
40056b: 48 8b 0d 26 0b 20 00 mov 0x200b26(%rip),%rcx # 601098 <data+0x18>
400572: c5 fd 6f 05 26 02 00 vmovdqa 0x226(%rip),%ymm0 # 4007a0 <raid6_avx2_constants>
400579: 00
40057a: c5 e5 ef db vpxor %ymm3,%ymm3,%ymm3
40057e: 31 c0 xor %eax,%eax
400580: 48 89 c2 mov %rax,%rdx
400583: 48 03 15 fe 0a 20 00 add 0x200afe(%rip),%rdx # 601088 <data+0x8>
40058a: 0f 18 02 prefetchnta (%rdx)
40058d: c5 fd 6f 12 vmovdqa (%rdx),%ymm2
400591: 48 89 c2 mov %rax,%rdx
400594: 48 03 15 e5 0a 20 00 add 0x200ae5(%rip),%rdx # 601080 <data>
40059b: 0f 18 02 prefetchnta (%rdx)
40059e: c5 fd 6f e2 vmovdqa %ymm2,%ymm4
4005a2: c5 fd 6f 32 vmovdqa (%rdx),%ymm6
4005a6: c5 e5 64 ec vpcmpgtb %ymm4,%ymm3,%ymm5
4005aa: c5 dd fc e4 vpaddb %ymm4,%ymm4,%ymm4
4005ae: c5 d5 db e8 vpand %ymm0,%ymm5,%ymm5
4005b2: c5 dd ef e5 vpxor %ymm5,%ymm4,%ymm4
4005b6: c5 ed ef d6 vpxor %ymm6,%ymm2,%ymm2
4005ba: c5 dd ef e6 vpxor %ymm6,%ymm4,%ymm4
4005be: c5 fd e7 14 06 vmovntdq %ymm2,(%rsi,%rax,1)
4005c3: c5 ed ef d2 vpxor %ymm2,%ymm2,%ymm2
4005c7: c5 fd e7 24 01 vmovntdq %ymm4,(%rcx,%rax,1)
4005cc: c5 dd ef e4 vpxor %ymm4,%ymm4,%ymm4
4005d0: 48 83 c0 20 add $0x20,%rax
4005d4: 48 3d 00 10 00 00 cmp $0x1000,%rax
4005da: 75 a4 jne 400580 <main+0x70>
4005dc: 0f ae f8 sfence
4005df: 31 ff xor %edi,%edi
4005e1: e8 fa fe ff ff callq 4004e0 <exit@plt>
4005e6: 66 2e 0f 1f 84 00 00 nopw %cs:0x0(%rax,%rax,1)
4005ed: 00 00 00

...
```

Thanks,
Satoru