[RFC PATCH v2 18/19] x86/mm: Add PKS table soft mode

From: Rick Edgecombe
Date: Mon Aug 30 2021 - 20:01:10 EST


Some users may not want to treat errant page table writes as fatal, and
would prefer to just log the invalid access and continue. Add a "soft"
mode for this. Add a config to make always make this the default behavior,
and a config to enable it at boot in the absence of the new config.

After a single warning, the page tables will be writable, so no warnings
will be reported until the next reboot.

Signed-off-by: Rick Edgecombe <rick.p.edgecombe@xxxxxxxxx>
---
Documentation/admin-guide/kernel-parameters.txt | 4 ++++
arch/x86/include/asm/pgtable.h | 1 +
arch/x86/mm/pgtable.c | 16 +++++++++++++++-
arch/x86/mm/pkeys.c | 3 +++
mm/Kconfig | 12 ++++++++++++
5 files changed, 35 insertions(+), 1 deletion(-)

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index 7902fce7f1da..8bb290fee77f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4254,6 +4254,10 @@
nopti [X86-64]
Equivalent to pti=off

+ nopkstables [X86-64] Disable PKS page table protection
+
+ pkstablessoft [X86-64] Warn instead of oops on pks tables violations
+
pty.legacy_count=
[KNL] Number of legacy pty's. Overwrites compiled-in
default number.
diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h
index 871308c40dac..2e4b4308bd59 100644
--- a/arch/x86/include/asm/pgtable.h
+++ b/arch/x86/include/asm/pgtable.h
@@ -122,6 +122,7 @@ void pks_tables_check_boottime_disable(void);
void enable_pgtable_write(void);
void disable_pgtable_write(void);
bool pks_tables_inited(void);
+bool pks_tables_fault(unsigned long addr, bool write);
#else /* CONFIG_PKS_PG_TABLES */
static inline void pks_tables_check_boottime_disable(void) { }
static void enable_pgtable_write(void) { }
diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c
index 69b43097c9da..0dcbd976a91b 100644
--- a/arch/x86/mm/pgtable.c
+++ b/arch/x86/mm/pgtable.c
@@ -40,7 +40,7 @@ pgtable_t pte_alloc_one(struct mm_struct *mm)
#ifdef CONFIG_PKS_PG_TABLES
static struct grouped_page_cache gpc_pks;
static bool __ro_after_init pks_tables_inited_val;
-
+static bool __ro_after_init pks_tables_soft;

struct page *alloc_table_node(gfp_t gfp, int node)
{
@@ -971,6 +971,16 @@ bool pks_tables_inited(void)
return pks_tables_inited_val;
}

+bool pks_tables_fault(unsigned long addr, bool write)
+{
+ WARN(1, "Write to protected page table, exploit attempt?");
+ if (!pks_tables_soft)
+ return 0;
+
+ pks_abandon_protections(PKS_KEY_PG_TABLES);
+ return 1;
+}
+
static int __init pks_page_init(void)
{
/*
@@ -999,6 +1009,10 @@ __init void pks_tables_check_boottime_disable(void)
if (cmdline_find_option_bool(boot_command_line, "nopkstables"))
return;

+ if (IS_ENABLED(CONFIG_PKS_PG_TABLES_SOFT_ALWAYS) ||
+ cmdline_find_option_bool(boot_command_line, "pkstablessoft"))
+ pks_tables_soft = true;
+
/*
* PTI will want to allocate higher order page table pages, which the
* PKS table allocator doesn't support. So don't attempt to enable PKS
diff --git a/arch/x86/mm/pkeys.c b/arch/x86/mm/pkeys.c
index 48a390722c06..d8df2bb4bbd0 100644
--- a/arch/x86/mm/pkeys.c
+++ b/arch/x86/mm/pkeys.c
@@ -247,6 +247,9 @@ static const pks_key_callback pks_key_callbacks[PKS_KEY_NR_CONSUMERS] = {
#ifdef CONFIG_DEVMAP_ACCESS_PROTECTION
[PKS_KEY_PGMAP_PROTECTION] = pgmap_pks_fault_callback,
#endif
+#ifdef CONFIG_PKS_PG_TABLES
+ [PKS_KEY_PG_TABLES] = pks_tables_fault,
+#endif
};

bool handle_pks_key_callback(unsigned long address, bool write, u16 key)
diff --git a/mm/Kconfig b/mm/Kconfig
index 0f8e8595a396..1f4fc85cbd2c 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -851,6 +851,18 @@ config PKS_PG_TABLES
depends on !HIGHMEM && !X86_PAE && SPARSEMEM_VMEMMAP
depends on ARCH_HAS_SUPERVISOR_PKEYS

+config PKS_PG_TABLES_SOFT_ALWAYS
+ bool
+ default y
+ depends on PKS_PG_TABLES
+ help
+ This features enables PKS tables "soft" mode by default, such that
+ the first PKS table violation is logged and after that protections
+ are disabled. This is useful for cases where users would not like
+ to treat bugs that incorrectly modify page tables as fatal, but would
+ still like to get notifications of illegitimate attempts to modify
+ them.
+
config PERCPU_STATS
bool "Collect percpu memory statistics"
help
--
2.17.1