[RFC v4][PATCH part-1 7/7] mm/asi: Implement PTI with ASI

From: Alexandre Chartre
Date: Mon May 04 2020 - 10:53:59 EST


ASI supersedes PTI. If both CONFIG_ADDRESS_SPACE_ISOLATION and
CONFIG_PAGE_TABLE_ISOLATION are set then PTI is implemented using
ASI. For each user process, a "user" ASI is then defined with the
PTI pagetable. The user ASI is used when running userland code, and
it is exited when entering a syscall. The user ASI is re-entered
when the syscall returns to userland.

As with any ASI, interrupts/exceptions/NMIs will interrupt the
ASI, the ASI will resume when the interrupt/exception/NMI has
completed. Faults won't abort the user ASI as user faults are
handled by the kernel before returning to userland.

Signed-off-by: Alexandre Chartre <alexandre.chartre@xxxxxxxxxx>
---
arch/x86/entry/calling.h | 13 ++++++++++++-
arch/x86/entry/common.c | 29 ++++++++++++++++++++++++-----
arch/x86/entry/entry_64.S | 6 ++++++
arch/x86/include/asm/asi.h | 9 +++++++++
arch/x86/include/asm/tlbflush.h | 11 +++++++++--
arch/x86/mm/asi.c | 9 +++++++++
arch/x86/mm/pti.c | 28 ++++++++++++++++++++--------
include/linux/mm_types.h | 5 +++++
kernel/fork.c | 17 +++++++++++++++++
9 files changed, 111 insertions(+), 16 deletions(-)

diff --git a/arch/x86/entry/calling.h b/arch/x86/entry/calling.h
index ca23b79adecf..e452fce1435f 100644
--- a/arch/x86/entry/calling.h
+++ b/arch/x86/entry/calling.h
@@ -176,16 +176,27 @@ For 32-bit we have the following conventions - kernel is built with
#if defined(CONFIG_ADDRESS_SPACE_ISOLATION)

/*
- * For now, ASI is not compatible with PTI.
+ * ASI supersedes the entry points used by PTI. If both
+ * CONFIG_ADDRESS_SPACE_ISOLATION and CONFIG_PAGE_TABLE_ISOLATION are
+ * set then PTI is implemented using ASI.
*/

.macro SWITCH_TO_KERNEL_CR3 scratch_reg:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+ ASI_INTERRUPT \scratch_reg
+.Lend_\@:
.endm

.macro SWITCH_TO_USER_CR3_NOSTACK scratch_reg:req scratch_reg2:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+ ASI_RESUME \scratch_reg
+.Lend_\@:
.endm

.macro SWITCH_TO_USER_CR3_STACK scratch_reg:req
+ ALTERNATIVE "jmp .Lend_\@", "", X86_FEATURE_PTI
+ ASI_RESUME \scratch_reg
+.Lend_\@:
.endm

.macro SAVE_AND_SWITCH_TO_KERNEL_CR3 scratch_reg:req save_reg:req
diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c
index 76735ec813e6..752b6672d455 100644
--- a/arch/x86/entry/common.c
+++ b/arch/x86/entry/common.c
@@ -35,6 +35,7 @@
#include <asm/nospec-branch.h>
#include <asm/io_bitmap.h>
#include <asm/syscall.h>
+#include <asm/asi.h>

#define CREATE_TRACE_POINTS
#include <trace/events/syscalls.h>
@@ -50,6 +51,13 @@ __visible inline void enter_from_user_mode(void)
static inline void enter_from_user_mode(void) {}
#endif

+static inline void syscall_enter(void)
+{
+ /* syscall enter has interrupted ASI, now exit ASI */
+ asi_exit(current->mm->user_asi);
+ enter_from_user_mode();
+}
+
static void do_audit_syscall_entry(struct pt_regs *regs, u32 arch)
{
#ifdef CONFIG_X86_64
@@ -225,6 +233,17 @@ __visible inline void prepare_exit_to_usermode(struct pt_regs *regs)
mds_user_clear_cpu_buffers();
}

+static inline void prepare_syscall_return(struct pt_regs *regs)
+{
+ prepare_exit_to_usermode(regs);
+
+ /*
+ * Syscall return will resume ASI, prepare resume to enter
+ * user ASI.
+ */
+ asi_deferred_enter(current->mm->user_asi);
+}
+
#define SYSCALL_EXIT_WORK_FLAGS \
(_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
_TIF_SINGLESTEP | _TIF_SYSCALL_TRACEPOINT)
@@ -276,7 +295,7 @@ __visible inline void syscall_return_slowpath(struct pt_regs *regs)
syscall_slow_exit_work(regs, cached_flags);

local_irq_disable();
- prepare_exit_to_usermode(regs);
+ prepare_syscall_return(regs);
}

#ifdef CONFIG_X86_64
@@ -284,7 +303,7 @@ __visible void do_syscall_64(unsigned long nr, struct pt_regs *regs)
{
struct thread_info *ti;

- enter_from_user_mode();
+ syscall_enter();
local_irq_enable();
ti = current_thread_info();
if (READ_ONCE(ti->flags) & _TIF_WORK_SYSCALL_ENTRY)
@@ -343,7 +362,7 @@ static __always_inline void do_syscall_32_irqs_on(struct pt_regs *regs)
/* Handles int $0x80 */
__visible void do_int80_syscall_32(struct pt_regs *regs)
{
- enter_from_user_mode();
+ syscall_enter();
local_irq_enable();
do_syscall_32_irqs_on(regs);
}
@@ -366,7 +385,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
*/
regs->ip = landing_pad;

- enter_from_user_mode();
+ syscall_enter();

local_irq_enable();

@@ -388,7 +407,7 @@ __visible long do_fast_syscall_32(struct pt_regs *regs)
/* User code screwed up. */
local_irq_disable();
regs->ax = -EFAULT;
- prepare_exit_to_usermode(regs);
+ prepare_syscall_return(regs);
return 0; /* Keep it simple: use IRET. */
}

diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ac47da63a29f..003c945dd6b0 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -627,6 +627,9 @@ ret_from_intr:
.Lretint_user:
mov %rsp,%rdi
call prepare_exit_to_usermode
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+ ASI_PREPARE_RESUME
+#endif
TRACE_IRQS_ON

SYM_INNER_LABEL(swapgs_restore_regs_and_return_to_usermode, SYM_L_GLOBAL)
@@ -1491,6 +1494,9 @@ SYM_CODE_START(nmi)
movq %rsp, %rdi
movq $-1, %rsi
call do_nmi
+#ifdef CONFIG_ADDRESS_SPACE_ISOLATION
+ ASI_PREPARE_RESUME
+#endif

/*
* Return back to user mode. We must *not* do the normal exit
diff --git a/arch/x86/include/asm/asi.h b/arch/x86/include/asm/asi.h
index b8d7b936cd19..ac0594d4f549 100644
--- a/arch/x86/include/asm/asi.h
+++ b/arch/x86/include/asm/asi.h
@@ -62,6 +62,10 @@ struct asi_tlb_state {
struct asi_tlb_pgtable tlb_pgtables[ASI_TLB_NR_DYN_ASIDS];
};

+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+#define ASI_PCID_PREFIX_USER 0x80 /* user ASI */
+#endif
+
struct asi_type {
int pcid_prefix; /* PCID prefix */
struct asi_tlb_state *tlb_state; /* percpu ASI TLB state */
@@ -139,6 +143,7 @@ void asi_schedule_out(struct task_struct *task);
void asi_schedule_in(struct task_struct *task);
bool asi_fault(struct pt_regs *regs, unsigned long error_code,
unsigned long address, enum asi_fault_origin fault_origin);
+void asi_deferred_enter(struct asi *asi);

extern struct asi *asi_create(struct asi_type *type);
extern void asi_destroy(struct asi *asi);
@@ -146,6 +151,10 @@ extern void asi_set_pagetable(struct asi *asi, pgd_t *pagetable);
extern int asi_enter(struct asi *asi);
extern void asi_exit(struct asi *asi);

+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+DECLARE_ASI_TYPE(user);
+#endif
+
static inline void asi_set_log_policy(struct asi *asi, int policy)
{
asi->fault_log_policy = policy;
diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h
index 241058ff63ba..db114deeb763 100644
--- a/arch/x86/include/asm/tlbflush.h
+++ b/arch/x86/include/asm/tlbflush.h
@@ -390,6 +390,8 @@ extern void initialize_tlbstate_and_flush(void);
*/
static inline void invalidate_user_asid(u16 asid)
{
+ struct asi_tlb_state *tlb_state;
+
/* There is no user ASID if address space separation is off */
if (!IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION))
return;
@@ -404,8 +406,13 @@ static inline void invalidate_user_asid(u16 asid)
if (!static_cpu_has(X86_FEATURE_PTI))
return;

- __set_bit(kern_pcid(asid),
- (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
+ if (IS_ENABLED(CONFIG_ADDRESS_SPACE_ISOLATION)) {
+ tlb_state = get_cpu_ptr(asi_type_user.tlb_state);
+ tlb_state->tlb_pgtables[asid].id = 0;
+ } else {
+ __set_bit(kern_pcid(asid),
+ (unsigned long *)this_cpu_ptr(&cpu_tlbstate.user_pcid_flush_mask));
+ }
}

/*
diff --git a/arch/x86/mm/asi.c b/arch/x86/mm/asi.c
index a4a5d35fb779..b63a0a883293 100644
--- a/arch/x86/mm/asi.c
+++ b/arch/x86/mm/asi.c
@@ -14,6 +14,10 @@
#include <asm/mmu_context.h>
#include <asm/tlbflush.h>

+#ifdef CONFIG_PAGE_TABLE_ISOLATION
+DEFINE_ASI_TYPE(user, ASI_PCID_PREFIX_USER, false);
+#endif
+
static void asi_log_fault(struct asi *asi, struct pt_regs *regs,
unsigned long error_code, unsigned long address,
enum asi_fault_origin fault_origin)
@@ -314,6 +318,11 @@ void asi_exit(struct asi *asi)
}
EXPORT_SYMBOL(asi_exit);

+void asi_deferred_enter(struct asi *asi)
+{
+ asi_switch_to_asi_cr3(asi, ASI_SWITCH_ON_RESUME);
+}
+
void asi_prepare_resume(void)
{
struct asi_session *asi_session;
diff --git a/arch/x86/mm/pti.c b/arch/x86/mm/pti.c
index 843aa10a4cb6..a1d09c163709 100644
--- a/arch/x86/mm/pti.c
+++ b/arch/x86/mm/pti.c
@@ -430,6 +430,18 @@ static void __init pti_clone_p4d(unsigned long addr)
*user_p4d = *kernel_p4d;
}

+static void __init pti_map_va(unsigned long va)
+{
+ phys_addr_t pa = per_cpu_ptr_to_phys((void *)va);
+ pte_t *target_pte;
+
+ target_pte = pti_user_pagetable_walk_pte(va);
+ if (WARN_ON(!target_pte))
+ return;
+
+ *target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL);
+}
+
/*
* Clone the CPU_ENTRY_AREA and associated data into the user space visible
* page table.
@@ -457,15 +469,15 @@ static void __init pti_clone_user_shared(void)
* is set up.
*/

- unsigned long va = (unsigned long)&per_cpu(cpu_tss_rw, cpu);
- phys_addr_t pa = per_cpu_ptr_to_phys((void *)va);
- pte_t *target_pte;
-
- target_pte = pti_user_pagetable_walk_pte(va);
- if (WARN_ON(!target_pte))
- return;
+ pti_map_va((unsigned long)&per_cpu(cpu_tss_rw, cpu));

- *target_pte = pfn_pte(pa >> PAGE_SHIFT, PAGE_KERNEL);
+ if (IS_ENABLED(CONFIG_ADDRESS_SPACE_ISOLATION)) {
+ /*
+ * Map the ASI session. We need to always be able
+ * to access the ASI session.
+ */
+ pti_map_va((unsigned long)&per_cpu(cpu_tlbstate, cpu));
+ }
}
}

diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 4aba6c0c2ba8..e2c6d63f39e5 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -25,6 +25,7 @@

struct address_space;
struct mem_cgroup;
+struct asi;

/*
* Each physical page in the system has a struct page associated with
@@ -534,6 +535,10 @@ struct mm_struct {
atomic_long_t hugetlb_usage;
#endif
struct work_struct async_put_work;
+#if defined(CONFIG_ADDRESS_SPACE_ISOLATION) && defined(CONFIG_PAGE_TABLE_ISOLATION)
+ /* ASI used for user address space */
+ struct asi *user_asi;
+#endif
} __randomize_layout;

/*
diff --git a/kernel/fork.c b/kernel/fork.c
index 8c700f881d92..f245f9a4c55d 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -101,6 +101,7 @@
#include <asm/mmu_context.h>
#include <asm/cacheflush.h>
#include <asm/tlbflush.h>
+#include <asm/asi.h>

#include <trace/events/sched.h>

@@ -698,6 +699,10 @@ void __mmdrop(struct mm_struct *mm)
mmu_notifier_subscriptions_destroy(mm);
check_mm(mm);
put_user_ns(mm->user_ns);
+ if (IS_ENABLED(CONFIG_ADDRESS_SPACE_ISOLATION) &&
+ IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ asi_destroy(mm->user_asi);
+ }
free_mm(mm);
}
EXPORT_SYMBOL_GPL(__mmdrop);
@@ -1049,6 +1054,18 @@ static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p,
if (init_new_context(p, mm))
goto fail_nocontext;

+ if (IS_ENABLED(CONFIG_ADDRESS_SPACE_ISOLATION) &&
+ IS_ENABLED(CONFIG_PAGE_TABLE_ISOLATION)) {
+ /*
+ * If we have PTI and ASI then use ASI to switch between
+ * user and kernel spaces, so create an ASI for this mm.
+ */
+ mm->user_asi = asi_create_user();
+ if (!mm->user_asi)
+ goto fail_nocontext;
+ asi_set_pagetable(mm->user_asi, kernel_to_user_pgdp(mm->pgd));
+ }
+
mm->user_ns = get_user_ns(user_ns);
return mm;

--
2.18.2