[PATCH v1 1/8] sbm: x86: page table arch hooks

From: Petr Tesarik
Date: Wed Feb 14 2024 - 06:36:21 EST


From: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx>

Add arch hooks for the x86 architecture and select CONFIG_HAVE_ARCH_SBM.

Implement arch_sbm_init(): Allocate an arch-specific state page and store
it as SBM instance private data. Set up mappings for kernel text, static
data, current task and current thread stack into the.

Implement arch_sbm_map_readonly() and arch_sbm_map_writable(): Set the PTE
value, allocating additional page tables as necessary.

Implement arch_sbm_destroy(): Walk the page table hierarchy and free all
page tables, including the page global directory.

Provide a trivial implementation of arch_sbm_exec() to avoid build
failures, but do not switch to the constructed page tables yet.

Signed-off-by: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx>
---
arch/x86/Kconfig | 1 +
arch/x86/include/asm/sbm.h | 29 ++++
arch/x86/kernel/Makefile | 2 +
arch/x86/kernel/sbm/Makefile | 10 ++
arch/x86/kernel/sbm/core.c | 248 +++++++++++++++++++++++++++++++++++
5 files changed, 290 insertions(+)
create mode 100644 arch/x86/include/asm/sbm.h
create mode 100644 arch/x86/kernel/sbm/Makefile
create mode 100644 arch/x86/kernel/sbm/core.c

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 5edec175b9bf..41fa4ab84c15 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -188,6 +188,7 @@ config X86
select HAVE_ARCH_MMAP_RND_COMPAT_BITS if MMU && COMPAT
select HAVE_ARCH_COMPAT_MMAP_BASES if MMU && COMPAT
select HAVE_ARCH_PREL32_RELOCATIONS
+ select HAVE_ARCH_SBM
select HAVE_ARCH_SECCOMP_FILTER
select HAVE_ARCH_THREAD_STRUCT_WHITELIST
select HAVE_ARCH_STACKLEAK
diff --git a/arch/x86/include/asm/sbm.h b/arch/x86/include/asm/sbm.h
new file mode 100644
index 000000000000..01c8d357550b
--- /dev/null
+++ b/arch/x86/include/asm/sbm.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2023-2024 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx>
+ *
+ * SandBox Mode (SBM) declarations for the x86 architecture.
+ */
+#ifndef __ASM_SBM_H
+#define __ASM_SBM_H
+
+#if defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE)
+
+#include <asm/pgtable_types.h>
+
+/**
+ * struct x86_sbm_state - Run-time state of the environment.
+ * @pgd: Sandbox mode page global directory.
+ *
+ * One instance of this union is allocated for each sandbox and stored as SBM
+ * instance private data.
+ */
+struct x86_sbm_state {
+ pgd_t *pgd;
+};
+
+#endif /* defined(CONFIG_HAVE_ARCH_SBM) && defined(CONFIG_SANDBOX_MODE) */
+
+#endif /* __ASM_SBM_H */
diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile
index 0000325ab98f..4ad63b7d13ee 100644
--- a/arch/x86/kernel/Makefile
+++ b/arch/x86/kernel/Makefile
@@ -150,6 +150,8 @@ obj-$(CONFIG_X86_CET) += cet.o

obj-$(CONFIG_X86_USER_SHADOW_STACK) += shstk.o

+obj-$(CONFIG_SANDBOX_MODE) += sbm/
+
###
# 64 bit specific files
ifeq ($(CONFIG_X86_64),y)
diff --git a/arch/x86/kernel/sbm/Makefile b/arch/x86/kernel/sbm/Makefile
new file mode 100644
index 000000000000..92d368b526cd
--- /dev/null
+++ b/arch/x86/kernel/sbm/Makefile
@@ -0,0 +1,10 @@
+# SPDX-License-Identifier: GPL-2.0
+#
+# Copyright (C) 2023-2024 Huawei Technologies Duesseldorf GmbH
+#
+# Author: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx>
+#
+# Makefile for the x86 SandBox Mode (SBM) implementation.
+#
+
+obj-y := core.o
diff --git a/arch/x86/kernel/sbm/core.c b/arch/x86/kernel/sbm/core.c
new file mode 100644
index 000000000000..b775e3b387b1
--- /dev/null
+++ b/arch/x86/kernel/sbm/core.c
@@ -0,0 +1,248 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (C) 2023-2024 Huawei Technologies Duesseldorf GmbH
+ *
+ * Author: Petr Tesarik <petr.tesarik1@xxxxxxxxxxxxxxxxxxx>
+ *
+ * SandBox Mode (SBM) implementation for the x86 architecture.
+ */
+
+#include <asm/pgtable.h>
+#include <asm/sbm.h>
+#include <asm/sections.h>
+#include <linux/mm.h>
+#include <linux/sbm.h>
+#include <linux/sched/task_stack.h>
+
+#define GFP_SBM_PGTABLE (GFP_KERNEL | __GFP_ZERO)
+#define PGD_ORDER get_order(sizeof(pgd_t) * PTRS_PER_PGD)
+
+static inline phys_addr_t page_to_ptval(struct page *page)
+{
+ return PFN_PHYS(page_to_pfn(page)) | _PAGE_TABLE;
+}
+
+static int map_page(struct x86_sbm_state *state, unsigned long addr,
+ unsigned long pfn, pgprot_t prot)
+{
+ struct page *page;
+ pgd_t *pgdp;
+ p4d_t *p4dp;
+ pud_t *pudp;
+ pmd_t *pmdp;
+ pte_t *ptep;
+
+ pgdp = pgd_offset_pgd(state->pgd, addr);
+ if (pgd_none(*pgdp)) {
+ page = alloc_page(GFP_KERNEL | __GFP_ZERO);
+ if (!page)
+ return -ENOMEM;
+ set_pgd(pgdp, __pgd(page_to_ptval(page)));
+ p4dp = (p4d_t *)page_address(page) + p4d_index(addr);
+ } else
+ p4dp = p4d_offset(pgdp, addr);
+
+ if (p4d_none(*p4dp)) {
+ page = alloc_page(GFP_SBM_PGTABLE);
+ if (!page)
+ return -ENOMEM;
+ set_p4d(p4dp, __p4d(page_to_ptval(page)));
+ pudp = (pud_t *)page_address(page) + pud_index(addr);
+ } else
+ pudp = pud_offset(p4dp, addr);
+
+ if (pud_none(*pudp)) {
+ page = alloc_page(GFP_SBM_PGTABLE);
+ if (!page)
+ return -ENOMEM;
+ set_pud(pudp, __pud(page_to_ptval(page)));
+ pmdp = (pmd_t *)page_address(page) + pmd_index(addr);
+ } else
+ pmdp = pmd_offset(pudp, addr);
+
+ if (pmd_none(*pmdp)) {
+ page = alloc_page(GFP_SBM_PGTABLE);
+ if (!page)
+ return -ENOMEM;
+ set_pmd(pmdp, __pmd(page_to_ptval(page)));
+ ptep = (pte_t *)page_address(page) + pte_index(addr);
+ } else
+ ptep = pte_offset_kernel(pmdp, addr);
+
+ set_pte(ptep, pfn_pte(pfn, prot));
+ return 0;
+}
+
+static int map_range(struct x86_sbm_state *state, unsigned long start,
+ unsigned long end, pgprot_t prot)
+{
+ unsigned long pfn;
+ int err;
+
+ start = PAGE_ALIGN_DOWN(start);
+ while (start < end) {
+ if (is_vmalloc_or_module_addr((void *)start))
+ pfn = vmalloc_to_pfn((void *)start);
+ else
+ pfn = PHYS_PFN(__pa(start));
+ err = map_page(state, start, pfn, prot);
+ if (err)
+ return err;
+ start += PAGE_SIZE;
+ }
+
+ return 0;
+}
+
+int arch_sbm_map_readonly(struct sbm *sbm, const struct sbm_buf *buf)
+{
+ return map_range(sbm->private, (unsigned long)buf->sbm_ptr,
+ (unsigned long)buf->sbm_ptr + buf->size,
+ PAGE_READONLY);
+}
+
+int arch_sbm_map_writable(struct sbm *sbm, const struct sbm_buf *buf)
+{
+ return map_range(sbm->private, (unsigned long)buf->sbm_ptr,
+ (unsigned long)buf->sbm_ptr + buf->size,
+ PAGE_SHARED);
+}
+
+/* Map kernel text, data, rodata, BSS and static per-cpu sections. */
+static int map_kernel(struct x86_sbm_state *state)
+{
+ int __maybe_unused cpu;
+ int err;
+
+ err = map_range(state, (unsigned long)_stext, (unsigned long)_etext,
+ PAGE_READONLY_EXEC);
+ if (err)
+ return err;
+
+ err = map_range(state, (unsigned long)__entry_text_start,
+ (unsigned long)__entry_text_end, PAGE_KERNEL_ROX);
+ if (err)
+ return err;
+
+ err = map_range(state, (unsigned long)_sdata, (unsigned long)_edata,
+ PAGE_READONLY);
+ if (err)
+ return err;
+ err = map_range(state, (unsigned long)__bss_start,
+ (unsigned long)__bss_stop, PAGE_READONLY);
+ if (err)
+ return err;
+ err = map_range(state, (unsigned long)__start_rodata,
+ (unsigned long)__end_rodata, PAGE_READONLY);
+ if (err)
+ return err;
+
+#ifdef CONFIG_SMP
+ for_each_possible_cpu(cpu) {
+ unsigned long off = per_cpu_offset(cpu);
+
+ err = map_range(state, (unsigned long)__per_cpu_start + off,
+ (unsigned long)__per_cpu_end + off,
+ PAGE_READONLY);
+ if (err)
+ return err;
+ }
+#endif
+
+ return 0;
+}
+
+int arch_sbm_init(struct sbm *sbm)
+{
+ struct x86_sbm_state *state;
+ unsigned long stack;
+ int err;
+
+ BUILD_BUG_ON(sizeof(*state) > PAGE_SIZE);
+ state = (struct x86_sbm_state *)__get_free_page(GFP_KERNEL);
+ if (!state)
+ return -ENOMEM;
+ sbm->private = state;
+
+ state->pgd = (pgd_t *)__get_free_pages(GFP_SBM_PGTABLE, PGD_ORDER);
+ if (!state->pgd)
+ return -ENOMEM;
+
+ err = map_kernel(state);
+ if (err)
+ return err;
+
+ err = map_range(state, (unsigned long)current,
+ (unsigned long)(current + 1), PAGE_READONLY);
+ if (err)
+ return err;
+
+ stack = (unsigned long)task_stack_page(current);
+ err = map_range(state, stack, stack + THREAD_SIZE, PAGE_READONLY);
+ if (err)
+ return err;
+
+ return 0;
+}
+
+static void free_pmd(pmd_t *pmd)
+{
+ pmd_t *pmdp;
+
+ for (pmdp = pmd; pmdp < pmd + PTRS_PER_PMD; ++pmdp)
+ if (!pmd_none(*pmdp))
+ free_page(pmd_page_vaddr(*pmdp));
+ if (PTRS_PER_PMD > 1)
+ free_page((unsigned long)pmd);
+}
+
+static void free_pud(pud_t *pud)
+{
+ pud_t *pudp;
+
+ for (pudp = pud; pudp < pud + PTRS_PER_PUD; ++pudp)
+ if (!pud_none(*pudp))
+ free_pmd(pmd_offset(pudp, 0));
+ if (PTRS_PER_PUD > 1)
+ free_page((unsigned long)pud);
+}
+
+static void free_p4d(p4d_t *p4d)
+{
+ p4d_t *p4dp;
+
+ for (p4dp = p4d; p4dp < p4d + PTRS_PER_P4D; ++p4dp)
+ if (!p4d_none(*p4dp))
+ free_pud(pud_offset(p4dp, 0));
+ if (PTRS_PER_P4D > 1)
+ free_page((unsigned long)p4d);
+}
+
+static void free_pgd(pgd_t *pgd)
+{
+ pgd_t *pgdp;
+
+ for (pgdp = pgd; pgdp < pgd + PTRS_PER_PGD; ++pgdp)
+ if (!pgd_none(*pgdp))
+ free_p4d(p4d_offset(pgdp, 0));
+}
+
+void arch_sbm_destroy(struct sbm *sbm)
+{
+ struct x86_sbm_state *state = sbm->private;
+
+ if (!state)
+ return;
+
+ if (state->pgd) {
+ free_pgd(state->pgd);
+ free_pages((unsigned long)state->pgd, PGD_ORDER);
+ }
+ free_page((unsigned long)state);
+ sbm->private = NULL;
+}
+
+int arch_sbm_exec(struct sbm *sbm, sbm_func func, void *args)
+{
+ return func(args);
+}
--
2.34.1