Re: [patch 4/9] x86/io: Speedup schedule out of I/O bitmap user

From: Andy Lutomirski
Date: Fri Nov 08 2019 - 19:24:53 EST


On 11/6/19 11:35 AM, Thomas Gleixner wrote:
> There is no requirement to update the TSS I/O bitmap when a thread using it is
> scheduled out and the incoming thread does not use it.
>
> For the permission check based on the TSS I/O bitmap the CPU calculates the memory
> location of the I/O bitmap by the address of the TSS and the io_bitmap_base member
> of the tss_struct. The easiest way to invalidate the I/O bitmap is to switch the
> offset to an address outside of the TSS limit.
>
> If an I/O instruction is issued from user space the TSS limit causes #GP to be
> raised in the same was as valid I/O bitmap with all bits set to 1 would do.
>
> This removes the extra work when an I/O bitmap using task is scheduled out
> and puts the burden on the rare I/O bitmap users when they are scheduled
> in.
>
> Signed-off-by: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
> ---
> arch/x86/include/asm/processor.h | 38 +++++++++++++++++--------
> arch/x86/kernel/cpu/common.c | 3 +
> arch/x86/kernel/doublefault.c | 2 -
> arch/x86/kernel/process.c | 59 +++++++++++++++++++++------------------
> 4 files changed, 61 insertions(+), 41 deletions(-)
>
> --- a/arch/x86/include/asm/processor.h
> +++ b/arch/x86/include/asm/processor.h
> @@ -330,8 +330,23 @@ struct x86_hw_tss {
> #define IO_BITMAP_BITS 65536
> #define IO_BITMAP_BYTES (IO_BITMAP_BITS/8)
> #define IO_BITMAP_LONGS (IO_BITMAP_BYTES/sizeof(long))
> -#define IO_BITMAP_OFFSET (offsetof(struct tss_struct, io_bitmap) - offsetof(struct tss_struct, x86_tss))
> -#define INVALID_IO_BITMAP_OFFSET 0x8000
> +
> +#define IO_BITMAP_OFFSET_VALID \
> + (offsetof(struct tss_struct, io_bitmap) - \
> + offsetof(struct tss_struct, x86_tss))
> +
> +/*
> + * sizeof(unsigned long) coming from an extra "long" at the end
> + * of the iobitmap.
> + *
> + * -1? seg base+limit should be pointing to the address of the
> + * last valid byte

What's with the '?'

> + */
> +#define __KERNEL_TSS_LIMIT \
> + (IO_BITMAP_OFFSET_VALID + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
> +
> +/* Base offset outside of TSS_LIMIT so unpriviledged IO causes #GP */
> +#define IO_BITMAP_OFFSET_INVALID (__KERNEL_TSS_LIMIT + 1)
>
> struct entry_stack {
> unsigned long words[64];
> @@ -350,6 +365,15 @@ struct tss_struct {
> struct x86_hw_tss x86_tss;
>
> /*
> + * Store the dirty size of the last io bitmap offender. The next
> + * one will have to do the cleanup as the switch out to a non
> + * io bitmap user will just set x86_tss.io_bitmap_base to a value
> + * outside of the TSS limit. So for sane tasks there is no need
> + * to actually touch the io_bitmap at all.
> + */
> + unsigned int io_bitmap_prev_max;

Hmm. I'm wondering if a clearer way to say this would be:

io_bitmap_max_allowed_byte: offset from the start of the io bitmap to
the first byte that might contain a zero bit. If we switch from a task
that uses ioperm() to one that does not, we will invalidate the io
bitmap by changing the offset. The next task that starts using the io
bitmap again will need to make sure it updates all the bytes through
io_bitmap_max_allowed_byte.

But your description is okay, too.

It's worth noting that, due to Meltdown, this patch leaks the io bitmap
of io bitmap-using tasks. I'm not sure I care.

> +
> + /*
> * The extra 1 is there because the CPU will access an
> * additional byte beyond the end of the IO permission
> * bitmap. The extra byte must be all 1 bits, and must
> @@ -360,16 +384,6 @@ struct tss_struct {
>
> DECLARE_PER_CPU_PAGE_ALIGNED(struct tss_struct, cpu_tss_rw);
>
> -/*
> - * sizeof(unsigned long) coming from an extra "long" at the end
> - * of the iobitmap.
> - *
> - * -1? seg base+limit should be pointing to the address of the
> - * last valid byte
> - */
> -#define __KERNEL_TSS_LIMIT \
> - (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1)
> -
> /* Per CPU interrupt stacks */
> struct irq_stack {
> char stack[IRQ_STACK_SIZE];
> --- a/arch/x86/kernel/cpu/common.c
> +++ b/arch/x86/kernel/cpu/common.c
> @@ -1863,7 +1863,8 @@ void cpu_init(void)
>
> /* Initialize the TSS. */
> tss_setup_ist(tss);
> - tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET;
> + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;
> + tss->io_bitmap_prev_max = 0;
> memset(tss->io_bitmap, 0xff, sizeof(tss->io_bitmap));
> set_tss_desc(cpu, &get_cpu_entry_area(cpu)->tss.x86_tss);
>
> --- a/arch/x86/kernel/doublefault.c
> +++ b/arch/x86/kernel/doublefault.c
> @@ -54,7 +54,7 @@ struct x86_hw_tss doublefault_tss __cach
> .sp0 = STACK_START,
> .ss0 = __KERNEL_DS,
> .ldt = 0,
> - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
> + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
>
> .ip = (unsigned long) doublefault_fn,
> /* 0x2 bit is always set */
> --- a/arch/x86/kernel/process.c
> +++ b/arch/x86/kernel/process.c
> @@ -72,18 +72,9 @@
> #ifdef CONFIG_X86_32
> .ss0 = __KERNEL_DS,
> .ss1 = __KERNEL_CS,
> - .io_bitmap_base = INVALID_IO_BITMAP_OFFSET,
> #endif
> + .io_bitmap_base = IO_BITMAP_OFFSET_INVALID,
> },
> -#ifdef CONFIG_X86_32
> - /*
> - * Note that the .io_bitmap member must be extra-big. This is because
> - * the CPU will access an additional byte beyond the end of the IO
> - * permission bitmap. The extra byte must be all 1 bits, and must
> - * be within the limit.
> - */
> - .io_bitmap = { [0 ... IO_BITMAP_LONGS] = ~0 },
> -#endif
> };
> EXPORT_PER_CPU_SYMBOL(cpu_tss_rw);
>
> @@ -112,18 +103,18 @@ void exit_thread(struct task_struct *tsk
> struct thread_struct *t = &tsk->thread;
> unsigned long *bp = t->io_bitmap_ptr;
> struct fpu *fpu = &t->fpu;
> + struct tss_struct *tss;
>
> if (bp) {
> - struct tss_struct *tss = &per_cpu(cpu_tss_rw, get_cpu());
> + preempt_disable();
> + tss = this_cpu_ptr(&cpu_tss_rw);
>
> t->io_bitmap_ptr = NULL;
> - clear_thread_flag(TIF_IO_BITMAP);
> - /*
> - * Careful, clear this in the TSS too:
> - */
> - memset(tss->io_bitmap, 0xff, t->io_bitmap_max);
> t->io_bitmap_max = 0;
> - put_cpu();
> + clear_thread_flag(TIF_IO_BITMAP);
> + /* Invalidate the io bitmap base in the TSS */
> + tss->x86_tss.io_bitmap_base = IO_BITMAP_OFFSET_INVALID;

The first time I read this, I thought this code was all unnecessary.
But now I think I understand. How about a comment:

/*
* We're about to free the IO bitmap. We need to disable it in the TSS
* too so that switch_to() doesn't see an inconsistent state.
*/

In any case:

Acked-by: Andy Lutomirski <luto@xxxxxxxxxx>