Re: [PATCH] mm: kmemleak: Skip check in kmemleak_*_phys when pfn bound is not ready

From: Patrick Wang
Date: Mon May 30 2022 - 09:51:43 EST




On 2022/5/30 10:27, Yee Lee wrote:
On Fri, 2022-05-27 at 21:39 +0800, patrick wang wrote:
On Fri, May 27, 2022 at 11:25 AM <yee.lee@xxxxxxxxxxxx> wrote:

From: Yee Lee <yee.lee@xxxxxxxxxxxx>

In some archs (arm64), memblock allocates memory in boot time when
the pfn boundary (max_pfn/min_pfn) is not ready. The lowmen checks
in
kmemleak_*_phys() drop those blocks and cause some false leak
alarms
on common kernel objects.

Kmemleak output: (Qemu/arm64)
unreferenced object 0xffff0000c0170a00 (size 128):
comm "swapper/0", pid 1, jiffies 4294892404 (age 126.208s)
hex dump (first 32 bytes):
62 61 73 65 00 00 00 00 00 00 00 00 00 00 00
00 base............
00 00 00 00 00 00 00 00 00 00 00 00 00 00 00
00 ................
backtrace:
[<(____ptrval____)>] __kmalloc_track_caller+0x1b0/0x2e4
[<(____ptrval____)>] kstrdup_const+0x8c/0xc4
[<(____ptrval____)>] kvasprintf_const+0xbc/0xec
[<(____ptrval____)>] kobject_set_name_vargs+0x58/0xe4
[<(____ptrval____)>] kobject_add+0x84/0x100
[<(____ptrval____)>] __of_attach_node_sysfs+0x78/0xec
[<(____ptrval____)>] of_core_init+0x68/0x104
[<(____ptrval____)>] driver_init+0x28/0x48
[<(____ptrval____)>] do_basic_setup+0x14/0x28
[<(____ptrval____)>] kernel_init_freeable+0x110/0x178
[<(____ptrval____)>] kernel_init+0x20/0x1a0
[<(____ptrval____)>] ret_from_fork+0x10/0x20

This patch relaxs the boundary checking in kmemleak_*_phys api
if max_low_pfn is uninitialzed.

Fixes: 23c2d4 (mm: kmemleak: take a full lowmem check in
kmemleak_*_phy)
Signed-off-by: Yee Lee <yee.lee@xxxxxxxxxxxx>
---
mm/kmemleak.c | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index a182f5ddaf68..6b2af544aa0f 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1132,7 +1132,7 @@ EXPORT_SYMBOL(kmemleak_no_scan);
void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int
min_count,
gfp_t gfp)
{
- if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) <
max_low_pfn)
+ if (!max_low_pfn || (PHYS_PFN(phys) >= min_low_pfn &&
PHYS_PFN(phys) < max_low_pfn))

Just skip checking will bring the crash possibility back. Seems it's
beyond
these interfaces' handle scope for this situation, since
"min_low_pfn" and
"max_low_pfn" are depending on arches.

Yes, for the cases beyond the pfn guard, users have to take care the
boundary by themselves.


Could we record these early calling and deal with them when it's
ready? Is this appropriate?

I have an implementation based on this approach. Could you please
help to have a test on your machine as well? And someone to take
a look or review?



From 82cae75dfaa78f414faf85bb49133e95159c041a Mon Sep 17 00:00:00 2001
From: Patrick Wang <patrick.wang.shcn@xxxxxxxxx>
Date: Mon, 30 May 2022 18:38:23 +0800
Subject: [PATCH] mm: kmemleak: record early operations and handle later

The kmemleak_*_phys() interface uses "min_low_pfn" and
"max_low_pfn" to check address. But on some architectures,
kmemleak_*_phys() is called before those two variables
initialized. Record these early operations and handle them
when kmemleak_*_phys() are ready.

Signed-off-by: Patrick Wang <patrick.wang.shcn@xxxxxxxxx>
---
mm/kmemleak.c | 158 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 file changed, 158 insertions(+)

diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index a182f5ddaf68..a71e41b49ebc 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -164,6 +164,26 @@ struct kmemleak_object {
char comm[TASK_COMM_LEN]; /* executable name */
};
+/* maximum early recording count */
+#define EARLY_RECORDS 20
+
+/* early recording operation type */
+enum early_record_type {
+ record_alloc = 0,
+ record_free_part,
+ record_not_leak,
+ record_ignore
+};
+
+/* early recording operation */
+struct early_record_op {
+ enum early_record_type record_type;
+ phys_addr_t arg1;
+ size_t arg2;
+ int arg3;
+ gfp_t arg4;
+};
+
/* flag representing the memory block allocation status */
#define OBJECT_ALLOCATED (1 << 0)
/* flag set after the first reporting of an unreference object */
@@ -230,11 +250,26 @@ static int kmemleak_skip_disable;
/* If there are leaks that can be reported */
static bool kmemleak_found_leaks;
+/*
+ * Used to record early ops. Could recorded ops remain unhandled after
+ * initmem freed? Not likely.
+ */
+static struct early_record_op early_record_ops[EARLY_RECORDS] __initdata;
+static int early_record_op_count;
+/* indicate if recorded ops being handled */
+static bool early_record_in_handle;
+static DEFINE_RAW_SPINLOCK(early_record_lock);
+
static bool kmemleak_verbose;
module_param_named(verbose, kmemleak_verbose, bool, 0600);
static void kmemleak_disable(void);
+static void record_early_ops(enum early_record_type record_type,
+ phys_addr_t arg1, size_t arg2,
+ int arg3, gfp_t arg4) __init;
+static void handle_early_ops(void) __init;
+
/*
* Print a warning and dump the stack trace.
*/
@@ -1132,6 +1167,26 @@ EXPORT_SYMBOL(kmemleak_no_scan);
void __ref kmemleak_alloc_phys(phys_addr_t phys, size_t size, int min_count,
gfp_t gfp)
{
+ /* Not ready, record this operation. */
+ if (!max_low_pfn && !early_record_in_handle) {
+ /*
+ * record_early_ops is in __init section, make sure
+ * text executable.
+ */
+ if (core_kernel_text((unsigned long)record_early_ops))
+ record_early_ops(record_alloc, phys, size, min_count, gfp);
+ return;
+ }
+ /* Ready, handle recorded ops if they exit and not being handled. */
+ else if (early_record_op_count && !early_record_in_handle) {
+ /*
+ * handle_early_ops is in __init section, make sure
+ * text executable.
+ */
+ if (core_kernel_text((unsigned long)handle_early_ops))
+ handle_early_ops();
+ }
+
if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
kmemleak_alloc(__va(phys), size, min_count, gfp);
}
@@ -1146,6 +1201,18 @@ EXPORT_SYMBOL(kmemleak_alloc_phys);
*/
void __ref kmemleak_free_part_phys(phys_addr_t phys, size_t size)
{
+ /* Not ready, record this operation. */
+ if (!max_low_pfn && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)record_early_ops))
+ record_early_ops(record_free_part, phys, size, 0, 0);
+ return;
+ }
+ /* Ready, handle recorded ops if they exit and not being handled. */
+ else if (early_record_op_count && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)handle_early_ops))
+ handle_early_ops();
+ }
+
if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
kmemleak_free_part(__va(phys), size);
}
@@ -1158,6 +1225,18 @@ EXPORT_SYMBOL(kmemleak_free_part_phys);
*/
void __ref kmemleak_not_leak_phys(phys_addr_t phys)
{
+ /* Not ready, record this operation. */
+ if (!max_low_pfn && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)record_early_ops))
+ record_early_ops(record_not_leak, phys, 0, 0, 0);
+ return;
+ }
+ /* Ready, handle recorded ops if they exit and not being handled. */
+ else if (early_record_op_count && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)handle_early_ops))
+ handle_early_ops();
+ }
+
if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
kmemleak_not_leak(__va(phys));
}
@@ -1170,11 +1249,90 @@ EXPORT_SYMBOL(kmemleak_not_leak_phys);
*/
void __ref kmemleak_ignore_phys(phys_addr_t phys)
{
+ /* Not ready, record this operation. */
+ if (!max_low_pfn && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)record_early_ops))
+ record_early_ops(record_ignore, phys, 0, 0, 0);
+ return;
+ }
+ /* Ready, handle recorded ops if they exit and not being handled. */
+ else if (early_record_op_count && !early_record_in_handle) {
+ if (core_kernel_text((unsigned long)handle_early_ops))
+ handle_early_ops();
+ }
+
if (PHYS_PFN(phys) >= min_low_pfn && PHYS_PFN(phys) < max_low_pfn)
kmemleak_ignore(__va(phys));
}
EXPORT_SYMBOL(kmemleak_ignore_phys);
+/*
+ * Record early operation to early_record_ops array.
+ */
+static void __init record_early_ops(enum early_record_type record_type,
+ phys_addr_t arg1, size_t arg2,
+ int arg3, gfp_t arg4)
+{
+ struct early_record_op *op;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&early_record_lock, flags);
+ /* early_record_ops array full */
+ if (early_record_op_count >= EARLY_RECORDS)
+ goto out;
+
+ op = &early_record_ops[early_record_op_count];
+ op->record_type = record_type;
+ op->arg1 = arg1;
+ op->arg2 = arg2;
+ op->arg3 = arg3;
+ op->arg4 = arg4;
+
+ early_record_op_count++;
+out:
+ raw_spin_unlock_irqrestore(&early_record_lock, flags);
+}
+
+/*
+ * Handle the whole recorded operations.
+ */
+static void __init handle_early_ops(void)
+{
+ struct early_record_op *op = early_record_ops;
+ unsigned long flags;
+
+ raw_spin_lock_irqsave(&early_record_lock, flags);
+ /* Indicate operations are being handled. */
+ early_record_in_handle = true;
+
+ while (early_record_op_count) {
+ /* Deal with operations according to their type. */
+ switch (op->record_type) {
+ case record_alloc:
+ kmemleak_alloc_phys(op->arg1, op->arg2,
+ op->arg3, op->arg4);
+ break;
+ case record_free_part:
+ kmemleak_free_part_phys(op->arg1, op->arg2);
+ break;
+ case record_not_leak:
+ kmemleak_not_leak_phys(op->arg1);
+ break;
+ case record_ignore:
+ kmemleak_ignore_phys(op->arg1);
+ break;
+ default:
+ break;
+ }
+
+ early_record_op_count--;
+ op++;
+ }
+
+ early_record_in_handle = false;
+ raw_spin_unlock_irqrestore(&early_record_lock, flags);
+}
+
/*
* Update an object's checksum and return true if it was modified.
*/
--
2.25.1


Thanks,
Patrick