x86: XSAVE flags mystery

From: Tavis Ormandy
Date: Thu Jan 12 2023 - 13:05:25 EST


Hello, I have a mystery and wonder if it might be a kernel bug.

I've been trying to track down why the tests in a checkpointing library
were flakey. It turns out the xstate_bv flag appears to change
non-deterministically when using AVX instructions... but why is that?

Is this a bug? Is there some state being leaked across a context switch?

Here is a reproducer:

https://godbolt.org/z/h8nY4d6c4

I would expect this to infinite loop, but if you click recompile (the
circle arrow icon at the bottom), notice the number of tests change and
it does exit the loop.

I've also attached it for reference, please compile it with gcc -mavx.

I don't know, any ideas?

Tavis.

--
_o) $ lynx lock.cmpxchg8b.com
/\\ _o) _o) $ finger taviso@xxxxxxx
_\_V _( ) _( ) @taviso
#include <stdint.h>
#include <string.h>
#include <stdio.h>
#include <unistd.h>
#include <stdbool.h>
#include <x86intrin.h>
#include <immintrin.h>

#if !defined(__AVX__)
# error You must compile this with -mavx to get the needed intrinsics
#endif

struct i387_fxsave_struct {
uint16_t cwd; /* Control Word */
uint16_t swd; /* Status Word */
uint16_t twd; /* Tag Word */
uint16_t fop; /* Last Instruction Opcode */
union {
struct {
uint64_t rip; /* Instruction Pointer */
uint64_t rdp; /* Data Pointer */
};
struct {
uint32_t fip; /* FPU IP Offset */
uint32_t fcs; /* FPU IP Selector */
uint32_t foo; /* FPU Operand Offset */
uint32_t fos; /* FPU Operand Selector */
};
};
uint32_t mxcsr; /* MXCSR Register State */
uint32_t mxcsr_mask; /* MXCSR Mask */
uint32_t st_space[32];
uint32_t xmm_space[64];
uint32_t padding[12];
union {
uint32_t padding1[12];
uint32_t sw_reserved[12];
};
} __attribute__((aligned(16)));

struct ymmh_struct {
uint32_t ymmh_space[64];
};

struct xsave_hdr_struct {
uint64_t xstate_bv;
uint64_t reserved1[2];
uint64_t reserved2[5];
} __attribute__((packed));

struct xsave_struct {
struct i387_fxsave_struct i387;
struct xsave_hdr_struct xsave_hdr;
struct ymmh_struct ymmh;
} __attribute__ ((packed, aligned (64)));


int main(int argc, char **argv)
{
uint64_t count;
static struct xsave_struct initial = {0};
static struct xsave_struct xsave = {0};
register __m128 a = _mm_setzero_ps();
register __m128 b = _mm_setzero_ps();
register __m128 c = _mm_set1_ps(2);

// VSQRTSS followed by VZEROALL makes XSAVE non-deterministic.
// Is this a bug?
// Note: serializing doesn't seem to make a difference

// Do a test execution just to record our XINUSE flags.
asm volatile ("vsqrtss %0, %1, %2" : "=v"(c) : "v"(b), "v"(a));

// Reset everything
_mm256_zeroall();

// Now fetch our XINUSE bitmap.
_xsave(&initial, 0b11);

fprintf(stderr, "first execution, our flags: %010lX\n",
initial.xsave_hdr.xstate_bv);

for (count = 0;; count++) {
asm volatile ("vsqrtss %0, %1, %2" : "=v"(c) : "v"(b), "v"(a));
_mm256_zeroall();
_xsave(&xsave, 0b11);

if (xsave.xsave_hdr.xstate_bv != initial.xsave_hdr.xstate_bv) {
fprintf(stderr, "After %lu tests, our XINUSE was %010lx vs %010lx\n",
count,
xsave.xsave_hdr.xstate_bv,
initial.xsave_hdr.xstate_bv);
break;
}
}

return 0;
}