Re: [PATCH 1/2] ring-buffer: Introducing ring-buffer mapping functions

From: Steven Rostedt
Date: Mon Mar 20 2023 - 21:45:27 EST


On Fri, 17 Mar 2023 14:33:09 +0000
Vincent Donnefort <vdonnefort@xxxxxxxxxx> wrote:

> Also, the meta-page being... a single page, this limits at the moment the
> number of pages in the ring-buffer that can be mapped: ~3MB on a 4K pages
> system.

I hate this limitation, so I fixed it ;-)

I added a meta_page_size field to the meta page, and user space can do:

meta = mmap(NULL, page_size, PROT_READ, MAP_SHARED, fd, 0);
if (meta == MAP_FAILED)
pdie("mmap");

map = meta;
meta_len = map->meta_page_size;

if (meta_len > page_size) {
munmap(meta, page_size);
meta = mmap(NULL, meta_len, PROT_READ, MAP_SHARED, fd, 0);
if (meta == MAP_FAILED)
pdie("mmap");
map = meta;
}

This appears to work (but I'm still testing it).

-- Steve

diff --git a/include/uapi/linux/trace_mmap.h b/include/uapi/linux/trace_mmap.h
index 24bcec754a35..12f3f7ee33d9 100644
--- a/include/uapi/linux/trace_mmap.h
+++ b/include/uapi/linux/trace_mmap.h
@@ -18,6 +18,7 @@ struct ring_buffer_meta_page {
__u32 reader_page;
__u32 nr_data_pages; /* doesn't take into account the reader_page */
__u32 data_page_head; /* index of data_pages[] */
+ __u32 meta_page_size; /* size of the meta page */
__u32 data_pages[];
};

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index 10a17e78cfe6..77c92e4a7adc 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -526,6 +526,7 @@ struct ring_buffer_per_cpu {
u64 read_stamp;

int mapped;
+ int meta_order;
struct mutex mapping_lock;
unsigned long *page_ids; /* ID to addr */
struct ring_buffer_meta_page *meta_page;
@@ -5898,7 +5899,7 @@ int ring_buffer_read_page(struct trace_buffer *buffer,
EXPORT_SYMBOL_GPL(ring_buffer_read_page);

#define META_PAGE_MAX_PAGES \
- ((PAGE_SIZE - (offsetof(struct ring_buffer_meta_page, data_page_head))) >> 2)
+ ((PAGE_SIZE - (offsetof(struct ring_buffer_meta_page, data_pages))) >> 2)

static void rb_free_page_ids(struct ring_buffer_per_cpu *cpu_buffer)
{
@@ -5908,22 +5909,34 @@ static void rb_free_page_ids(struct ring_buffer_per_cpu *cpu_buffer)

static int rb_alloc_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
{
+ struct page *meta_pages;
+ int pages;
+ int order = 0;
+
if (cpu_buffer->meta_page)
return 0;

- if (cpu_buffer->nr_pages > META_PAGE_MAX_PAGES)
- return -E2BIG;
-
- cpu_buffer->meta_page = page_to_virt(alloc_page(GFP_USER));
- if (!cpu_buffer->meta_page)
+ if (cpu_buffer->nr_pages > META_PAGE_MAX_PAGES) {
+ /* Calcualte how many more pages we need to hold indexes */
+ pages = DIV_ROUND_UP(cpu_buffer->nr_pages - META_PAGE_MAX_PAGES,
+ PAGE_SIZE / sizeof(u32));
+ /* Add back the meta_page itself */
+ pages++;
+ order = fls(pages) - 1;
+ }
+ meta_pages = alloc_pages(GFP_USER, order);
+ if (!meta_pages)
return -ENOMEM;

+ cpu_buffer->meta_page = page_to_virt(meta_pages);
+ cpu_buffer->meta_order = order;
+
return 0;
}

static void rb_free_meta_page(struct ring_buffer_per_cpu *cpu_buffer)
{
- free_page((unsigned long)cpu_buffer->meta_page);
+ free_pages((unsigned long)cpu_buffer->meta_page, cpu_buffer->meta_order);
cpu_buffer->meta_page = NULL;
}

@@ -5932,14 +5945,20 @@ static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
{
struct ring_buffer_meta_page *meta = cpu_buffer->meta_page;
struct buffer_page *first_page, *bpage;
+ int data_page_end;
int id = 0;

page_ids[id] = (unsigned long)cpu_buffer->reader_page->page;
cpu_buffer->reader_page->id = id++;

+ /* Calculate the last index of data_pages[] */
+ data_page_end = (1 << (cpu_buffer->meta_order + PAGE_SHIFT)) -
+ offsetof(struct ring_buffer_meta_page, data_pages);
+ data_page_end /= sizeof(u32);
+
first_page = bpage = rb_set_head_page(cpu_buffer);
do {
- if (id > META_PAGE_MAX_PAGES) {
+ if (id > data_page_end) {
WARN_ON(1);
break;
}
@@ -5960,6 +5979,7 @@ static void rb_setup_ids_meta_page(struct ring_buffer_per_cpu *cpu_buffer,
meta->pages_touched = 0;
meta->reader_page = cpu_buffer->reader_page->id;
meta->nr_data_pages = cpu_buffer->nr_pages;
+ meta->meta_page_size = 1 << (cpu_buffer->meta_order + PAGE_SHIFT);
meta->data_page_head = 0;
}

@@ -6092,10 +6112,12 @@ int ring_buffer_unmap(struct trace_buffer *buffer, int cpu)
/*
* +--------------+
* | meta page | pgoff=0
+ * | ... |
+ * | | pgoff=(1<<cpu_buffer->meta_order - 1)
* +--------------+
- * | data page1 | pgoff=1 page_ids=0
+ * | data page1 | page_ids=0
* +--------------+
- * | data page2 | pgoff=2 page_ids=1
+ * | data page2 | page_ids=1
* ...
*/
struct page *ring_buffer_map_fault(struct trace_buffer *buffer, int cpu,
@@ -6103,10 +6125,11 @@ struct page *ring_buffer_map_fault(struct trace_buffer *buffer, int cpu,
{
struct ring_buffer_per_cpu *cpu_buffer = buffer->buffers[cpu];

- if (!pgoff)
- return virt_to_page(cpu_buffer->meta_page);
+ if (pgoff < (1 << cpu_buffer->meta_order) + 1)
+ return virt_to_page((void *)cpu_buffer->meta_page + (pgoff << PAGE_SHIFT));
+
+ pgoff -= (1 << cpu_buffer->meta_order);

- pgoff--;
if (pgoff > cpu_buffer->nr_pages)
return NULL;