[RFC PATCH 2/4] mm: Add a debug interface to control the range of speculative numa fault

From: Baolin Wang
Date: Sun Dec 12 2021 - 06:32:42 EST


Add a debug interface to control the range of speculative numa fault,
which can be used to tuning the performance or event close the speculative
numa fault window for some workloads.

Signed-off-by: Baolin Wang <baolin.wang@xxxxxxxxxxxxxxxxx>
---
mm/memory.c | 46 +++++++++++++++++++++++++++++++++++++++++++---
1 file changed, 43 insertions(+), 3 deletions(-)

diff --git a/mm/memory.c b/mm/memory.c
index 2c9ed63e4e23..a0f4a2a008cc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -4052,7 +4052,29 @@ vm_fault_t finish_fault(struct vm_fault *vmf)
static unsigned long fault_around_bytes __read_mostly =
rounddown_pow_of_two(65536);

+static unsigned long numa_around_bytes __read_mostly;
+
#ifdef CONFIG_DEBUG_FS
+static int numa_around_bytes_get(void *data, u64 *val)
+{
+ *val = numa_around_bytes;
+ return 0;
+}
+
+static int numa_around_bytes_set(void *data, u64 val)
+{
+ if (val / PAGE_SIZE > PTRS_PER_PTE)
+ return -EINVAL;
+ if (val > PAGE_SIZE)
+ numa_around_bytes = rounddown_pow_of_two(val);
+ else
+ numa_around_bytes = 0; /* rounddown_pow_of_two(0) is undefined */
+ return 0;
+}
+DEFINE_DEBUGFS_ATTRIBUTE(numa_around_bytes_fops,
+ numa_around_bytes_get,
+ numa_around_bytes_set, "%llu\n");
+
static int fault_around_bytes_get(void *data, u64 *val)
{
*val = fault_around_bytes;
@@ -4080,6 +4102,8 @@ static int __init fault_around_debugfs(void)
{
debugfs_create_file_unsafe("fault_around_bytes", 0644, NULL, NULL,
&fault_around_bytes_fops);
+ debugfs_create_file_unsafe("numa_around_bytes", 0644, NULL, NULL,
+ &numa_around_bytes_fops);
return 0;
}
late_initcall(fault_around_debugfs);
@@ -4348,10 +4372,13 @@ static bool try_next_numa_page(struct vm_fault *vmf, unsigned int win_pages,
((win) & NUMA_FAULT_WINDOW_SIZE_MASK))

static inline unsigned int numa_fault_max_pages(struct vm_area_struct *vma,
- unsigned long fault_address)
+ unsigned long fault_address,
+ unsigned long numa_around_size)
{
+ unsigned long numa_around_addr =
+ (fault_address + numa_around_size) & PAGE_MASK;
unsigned long pmd_end_addr = (fault_address & PMD_MASK) + PMD_SIZE;
- unsigned long max_fault_addr = min_t(unsigned long, pmd_end_addr,
+ unsigned long max_fault_addr = min3(numa_around_addr, pmd_end_addr,
vma->vm_end);

return (max_fault_addr - fault_address - 1) >> PAGE_SHIFT;
@@ -4360,12 +4387,24 @@ static inline unsigned int numa_fault_max_pages(struct vm_area_struct *vma,
static unsigned int adjust_numa_fault_window(struct vm_area_struct *vma,
unsigned long fault_address)
{
+ unsigned long numa_around_size = READ_ONCE(numa_around_bytes);
unsigned long numafault_ahead = GET_NUMA_FAULT_INFO(vma);
unsigned long prev_start = NUMA_FAULT_WINDOW_START(numafault_ahead);
unsigned int prev_pages = NUMA_FAULT_WINDOW_SIZE(numafault_ahead);
unsigned long win_start;
unsigned int win_pages, max_fault_pages;

+ /*
+ * Shut down the proactive numa fault if the numa_around_bytes
+ * is set to 0.
+ */
+ if (!numa_around_size) {
+ if (numafault_ahead)
+ atomic_long_set(&vma->numafault_ahead_info,
+ NUMA_FAULT_INFO(0, 0));
+ return 0;
+ }
+
win_start = fault_address + PAGE_SIZE;

/*
@@ -4437,7 +4476,8 @@ static unsigned int adjust_numa_fault_window(struct vm_area_struct *vma,
* Make sure the size of ahead numa fault address is less than the
* size of current VMA or PMD.
*/
- max_fault_pages = numa_fault_max_pages(vma, fault_address);
+ max_fault_pages = numa_fault_max_pages(vma, fault_address,
+ numa_around_size);
if (win_pages > max_fault_pages)
win_pages = max_fault_pages;

--
2.27.0