[RFC V2] mm: Generalize notify_page_fault()

From: Anshuman Khandual
Date: Tue Jun 04 2019 - 02:39:40 EST


Similar notify_page_fault() definitions are being used by architectures
duplicating much of the same code. This attempts to unify them into a
single implementation, generalize it and then move it to a common place.
kprobes_built_in() can detect CONFIG_KPROBES, hence notify_page_fault()
need not be wrapped again within CONFIG_KPROBES. Trap number argument can
now contain upto an 'unsigned int' accommodating all possible platforms.

Cc: linux-arm-kernel@xxxxxxxxxxxxxxxxxxx
Cc: linux-ia64@xxxxxxxxxxxxxxx
Cc: linuxppc-dev@xxxxxxxxxxxxxxxx
Cc: linux-s390@xxxxxxxxxxxxxxx
Cc: linux-sh@xxxxxxxxxxxxxxx
Cc: sparclinux@xxxxxxxxxxxxxxx
Cc: x86@xxxxxxxxxx
Cc: Andrew Morton <akpm@xxxxxxxxxxxxxxxxxxxx>
Cc: Michal Hocko <mhocko@xxxxxxxx>
Cc: Matthew Wilcox <willy@xxxxxxxxxxxxx>
Cc: Mark Rutland <mark.rutland@xxxxxxx>
Cc: Christophe Leroy <christophe.leroy@xxxxxx>
Cc: Stephen Rothwell <sfr@xxxxxxxxxxxxxxxx>
Cc: Andrey Konovalov <andreyknvl@xxxxxxxxxx>
Cc: Michael Ellerman <mpe@xxxxxxxxxxxxxx>
Cc: Paul Mackerras <paulus@xxxxxxxxx>
Cc: Russell King <linux@xxxxxxxxxxxxxxx>
Cc: Catalin Marinas <catalin.marinas@xxxxxxx>
Cc: Will Deacon <will.deacon@xxxxxxx>
Cc: Tony Luck <tony.luck@xxxxxxxxx>
Cc: Fenghua Yu <fenghua.yu@xxxxxxxxx>
Cc: Martin Schwidefsky <schwidefsky@xxxxxxxxxx>
Cc: Heiko Carstens <heiko.carstens@xxxxxxxxxx>
Cc: Yoshinori Sato <ysato@xxxxxxxxxxxxxxxxxxxx>
Cc: "David S. Miller" <davem@xxxxxxxxxxxxx>
Cc: Thomas Gleixner <tglx@xxxxxxxxxxxxx>
Cc: Peter Zijlstra <peterz@xxxxxxxxxxxxx>
Cc: Ingo Molnar <mingo@xxxxxxxxxx>
Cc: Andy Lutomirski <luto@xxxxxxxxxx>
Cc: Dave Hansen <dave.hansen@xxxxxxxxxxxxxxx>

Signed-off-by: Anshuman Khandual <anshuman.khandual@xxxxxxx>
---
Testing:

- Build and boot tested on arm64 and x86
- Build tested on some other archs (arm, sparc64, alpha, powerpc etc)

Changes in RFC V2:

- Changed generic notify_page_fault() per Mathew Wilcox
- Changed x86 to use new generic notify_page_fault()
- s/must not/need not/ in commit message per Matthew Wilcox

Changes in RFC V1: (https://patchwork.kernel.org/patch/10968273/)

arch/arm/mm/fault.c | 22 ----------------------
arch/arm64/mm/fault.c | 22 ----------------------
arch/ia64/mm/fault.c | 22 ----------------------
arch/powerpc/mm/fault.c | 23 ++---------------------
arch/s390/mm/fault.c | 16 +---------------
arch/sh/mm/fault.c | 14 --------------
arch/sparc/mm/fault_64.c | 16 +---------------
arch/x86/mm/fault.c | 21 ++-------------------
include/linux/mm.h | 1 +
mm/memory.c | 16 ++++++++++++++++
10 files changed, 23 insertions(+), 150 deletions(-)

diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c
index 58f69fa..1bc3b18 100644
--- a/arch/arm/mm/fault.c
+++ b/arch/arm/mm/fault.c
@@ -30,28 +30,6 @@

#ifdef CONFIG_MMU

-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
-{
- int ret = 0;
-
- if (!user_mode(regs)) {
- /* kprobe_running() needs smp_processor_id() */
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, fsr))
- ret = 1;
- preempt_enable();
- }
-
- return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int fsr)
-{
- return 0;
-}
-#endif
-
/*
* This is useful to dump out the page tables associated with
* 'addr' in mm 'mm'.
diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c
index a30818e..152f1f1 100644
--- a/arch/arm64/mm/fault.c
+++ b/arch/arm64/mm/fault.c
@@ -70,28 +70,6 @@ static inline const struct fault_info *esr_to_debug_fault_info(unsigned int esr)
return debug_fault_info + DBG_ESR_EVT(esr);
}

-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
-{
- int ret = 0;
-
- /* kprobe_running() needs smp_processor_id() */
- if (!user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, esr))
- ret = 1;
- preempt_enable();
- }
-
- return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, unsigned int esr)
-{
- return 0;
-}
-#endif
-
static void data_abort_decode(unsigned int esr)
{
pr_alert("Data abort info:\n");
diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c
index 5baeb02..64283d2 100644
--- a/arch/ia64/mm/fault.c
+++ b/arch/ia64/mm/fault.c
@@ -21,28 +21,6 @@

extern int die(char *, struct pt_regs *, long);

-#ifdef CONFIG_KPROBES
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
- int ret = 0;
-
- if (!user_mode(regs)) {
- /* kprobe_running() needs smp_processor_id() */
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, trap))
- ret = 1;
- preempt_enable();
- }
-
- return ret;
-}
-#else
-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
- return 0;
-}
-#endif
-
/*
* Return TRUE if ADDRESS points at a page in the kernel's mapped segment
* (inside region 5, on ia64) and that page is present.
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c
index b5d3578..5a0d71f 100644
--- a/arch/powerpc/mm/fault.c
+++ b/arch/powerpc/mm/fault.c
@@ -46,26 +46,6 @@
#include <asm/debug.h>
#include <asm/kup.h>

-static inline bool notify_page_fault(struct pt_regs *regs)
-{
- bool ret = false;
-
-#ifdef CONFIG_KPROBES
- /* kprobe_running() needs smp_processor_id() */
- if (!user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, 11))
- ret = true;
- preempt_enable();
- }
-#endif /* CONFIG_KPROBES */
-
- if (unlikely(debugger_fault_handler(regs)))
- ret = true;
-
- return ret;
-}
-
/*
* Check whether the instruction inst is a store using
* an update addressing form which will update r1.
@@ -466,8 +446,9 @@ static int __do_page_fault(struct pt_regs *regs, unsigned long address,
int is_write = page_fault_is_write(error_code);
vm_fault_t fault, major = 0;
bool must_retry = false;
+ int kprobe_fault = notify_page_fault(regs, 11);

- if (notify_page_fault(regs))
+ if (unlikely(debugger_fault_handler(regs) || kprobe_fault))
return 0;

if (unlikely(page_fault_is_bad(error_code))) {
diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c
index c220399..d317263 100644
--- a/arch/s390/mm/fault.c
+++ b/arch/s390/mm/fault.c
@@ -67,20 +67,6 @@ static int __init fault_init(void)
}
early_initcall(fault_init);

-static inline int notify_page_fault(struct pt_regs *regs)
-{
- int ret = 0;
-
- /* kprobe_running() needs smp_processor_id() */
- if (kprobes_built_in() && !user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, 14))
- ret = 1;
- preempt_enable();
- }
- return ret;
-}
-
/*
* Find out which address space caused the exception.
* Access register mode is impossible, ignore space == 3.
@@ -409,7 +395,7 @@ static inline vm_fault_t do_exception(struct pt_regs *regs, int access)
*/
clear_pt_regs_flag(regs, PIF_PER_TRAP);

- if (notify_page_fault(regs))
+ if (notify_page_fault(regs, 14))
return 0;

mm = tsk->mm;
diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c
index 6defd2c6..94bdfcb 100644
--- a/arch/sh/mm/fault.c
+++ b/arch/sh/mm/fault.c
@@ -24,20 +24,6 @@
#include <asm/tlbflush.h>
#include <asm/traps.h>

-static inline int notify_page_fault(struct pt_regs *regs, int trap)
-{
- int ret = 0;
-
- if (kprobes_built_in() && !user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, trap))
- ret = 1;
- preempt_enable();
- }
-
- return ret;
-}
-
static void
force_sig_info_fault(int si_signo, int si_code, unsigned long address,
struct task_struct *tsk)
diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c
index 8f8a604..e5557a1 100644
--- a/arch/sparc/mm/fault_64.c
+++ b/arch/sparc/mm/fault_64.c
@@ -38,20 +38,6 @@

int show_unhandled_signals = 1;

-static inline __kprobes int notify_page_fault(struct pt_regs *regs)
-{
- int ret = 0;
-
- /* kprobe_running() needs smp_processor_id() */
- if (kprobes_built_in() && !user_mode(regs)) {
- preempt_disable();
- if (kprobe_running() && kprobe_fault_handler(regs, 0))
- ret = 1;
- preempt_enable();
- }
- return ret;
-}
-
static void __kprobes unhandled_fault(unsigned long address,
struct task_struct *tsk,
struct pt_regs *regs)
@@ -285,7 +271,7 @@ asmlinkage void __kprobes do_sparc64_fault(struct pt_regs *regs)

fault_code = get_thread_fault_code();

- if (notify_page_fault(regs))
+ if (notify_page_fault(regs, 0))
goto exit_exception;

si_code = SEGV_MAPERR;
diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c
index 46df4c6..1790859 100644
--- a/arch/x86/mm/fault.c
+++ b/arch/x86/mm/fault.c
@@ -46,23 +46,6 @@ kmmio_fault(struct pt_regs *regs, unsigned long addr)
return 0;
}

-static nokprobe_inline int kprobes_fault(struct pt_regs *regs)
-{
- if (!kprobes_built_in())
- return 0;
- if (user_mode(regs))
- return 0;
- /*
- * To be potentially processing a kprobe fault and to be allowed to call
- * kprobe_running(), we have to be non-preemptible.
- */
- if (preemptible())
- return 0;
- if (!kprobe_running())
- return 0;
- return kprobe_fault_handler(regs, X86_TRAP_PF);
-}
-
/*
* Prefetch quirks:
*
@@ -1280,7 +1263,7 @@ do_kern_addr_fault(struct pt_regs *regs, unsigned long hw_error_code,
return;

/* kprobes don't want to hook the spurious faults: */
- if (kprobes_fault(regs))
+ if (notify_page_fault(regs, X86_TRAP_PF))
return;

/*
@@ -1311,7 +1294,7 @@ void do_user_addr_fault(struct pt_regs *regs,
mm = tsk->mm;

/* kprobes don't want to hook the spurious faults: */
- if (unlikely(kprobes_fault(regs)))
+ if (unlikely(notify_page_fault(regs, X86_TRAP_PF)))
return;

/*
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 0e8834a..c5a8dcf 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1778,6 +1778,7 @@ static inline int pte_devmap(pte_t pte)
}
#endif

+int notify_page_fault(struct pt_regs *regs, unsigned int trap);
int vma_wants_writenotify(struct vm_area_struct *vma, pgprot_t vm_page_prot);

extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr,
diff --git a/mm/memory.c b/mm/memory.c
index ddf20bd..b6bae8f 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -52,6 +52,7 @@
#include <linux/pagemap.h>
#include <linux/memremap.h>
#include <linux/ksm.h>
+#include <linux/kprobes.h>
#include <linux/rmap.h>
#include <linux/export.h>
#include <linux/delayacct.h>
@@ -141,6 +142,21 @@ static int __init init_zero_pfn(void)
core_initcall(init_zero_pfn);


+int __kprobes notify_page_fault(struct pt_regs *regs, unsigned int trap)
+{
+ int ret = 0;
+
+ /*
+ * To be potentially processing a kprobe fault and to be allowed
+ * to call kprobe_running(), we have to be non-preemptible.
+ */
+ if (kprobes_built_in() && !preemptible() && !user_mode(regs)) {
+ if (kprobe_running() && kprobe_fault_handler(regs, trap))
+ ret = 1;
+ }
+ return ret;
+}
+
#if defined(SPLIT_RSS_COUNTING)

void sync_mm_rss(struct mm_struct *mm)
--
2.7.4