Re: [RFC][PATCH] ring-buffer: Have nested events still record running time stamp

From: Steven Rostedt
Date: Thu Jun 25 2020 - 14:12:13 EST


After running my ring buffer benchmark on this update, it went from
55ns to 85ns per event. A 30ns increase! At first I freaked out, but
then noticed that I unnecessarily take the time stamp counter twice,
which is the most expensive operation.

Here's a diff to my patch (I'll send a v2 later) that removes the
double taking of the time stamp.

It also removes the double testing of the absolute value. Which I have
another patch that consolidates that with the info add_timestamp field.
But that patch wont be folded into this.

-- Steve

diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c
index a5c3f1608de8..7370c483c6cd 100644
--- a/kernel/trace/ring_buffer.c
+++ b/kernel/trace/ring_buffer.c
@@ -2908,6 +2908,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
struct buffer_page *tail_page;
unsigned long tail, write, w, next;
u64 delta, before, after;
+ bool abs = false;

/* Don't let the compiler play games with cpu_buffer->tail_page */
tail_page = info->tail_page = READ_ONCE(cpu_buffer->tail_page);
@@ -2916,6 +2917,19 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
barrier();
before = READ_ONCE(cpu_buffer->before_stamp);
after = local64_read(&cpu_buffer->write_stamp);
+ barrier();
+ info->ts = rb_time_stamp(cpu_buffer->buffer);
+
+ if (ring_buffer_time_stamp_abs(cpu_buffer->buffer)) {
+ info->delta = info->ts;
+ abs = true;
+ } else {
+ info->delta = info->ts - after;
+ }
+
+ if (unlikely(test_time_stamp(info->delta)))
+ rb_handle_timestamp(cpu_buffer, info);
+
/*
* If preempting an event time update, we may need absolute timestamp.
* Don't bother if this is the start of a new page (w == 0).
@@ -2933,7 +2947,6 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
next = READ_ONCE(cpu_buffer->next_write);
WRITE_ONCE(cpu_buffer->next_write, w + info->length);

- info->ts = rb_time_stamp(cpu_buffer->buffer);
/*B*/ WRITE_ONCE(cpu_buffer->before_stamp, info->ts);

/*C*/ write = local_add_return(info->length, &tail_page->write);
@@ -3021,8 +3034,7 @@ __rb_reserve_next(struct ring_buffer_per_cpu *cpu_buffer,
* If this is the first commit on the page, then it has the same
* timestamp as the page itself.
*/
- if (unlikely(!tail && info->add_timestamp != RB_ADD_STAMP_FORCE &&
- !ring_buffer_time_stamp_abs(cpu_buffer->buffer)))
+ if (unlikely(!tail && info->add_timestamp != RB_ADD_STAMP_FORCE && !abs))
info->delta = 0;

/* We reserved something on the buffer */
@@ -3056,7 +3068,6 @@ rb_reserve_next_event(struct trace_buffer *buffer,
struct ring_buffer_event *event;
struct rb_event_info info;
int nr_loops = 0;
- u64 write_stamp;

rb_start_commit(cpu_buffer);
/* The commit page can not change after this */
@@ -3093,18 +3104,6 @@ rb_reserve_next_event(struct trace_buffer *buffer,
if (RB_WARN_ON(cpu_buffer, ++nr_loops > 1000))
goto out_fail;

- write_stamp = local64_read(&cpu_buffer->write_stamp);
- info.ts = rb_time_stamp(cpu_buffer->buffer);
-
- if (ring_buffer_time_stamp_abs(buffer)) {
- info.delta = info.ts;
- rb_handle_timestamp(cpu_buffer, &info);
- } else {
- info.delta = info.ts - write_stamp ;
- if (unlikely(test_time_stamp(info.delta)))
- rb_handle_timestamp(cpu_buffer, &info);
- }
-
event = __rb_reserve_next(cpu_buffer, &info);

if (unlikely(PTR_ERR(event) == -EAGAIN)) {