[PATCH 2/5 v2] ftrace: infrastructure for supporting binary record

From: Frederic Weisbecker
Date: Thu Mar 05 2009 - 00:11:35 EST


From: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>

Impact: save on memory for tracing

Current tracers are typically using a struct(like struct ftrace_entry,
struct ctx_switch_entry, struct special_entr etc...)to record a binary
event. These structs can only record a their own kind of events.
A new kind of tracer need a new struct and a lot of code too handle it.

So we need a generic binary record for events. This infrastructure
is for this purpose.

[fweisbec@xxxxxxxxx: rebase against latest -tip, v2: make it safe while sched
tracing as reported by Steven Rostedt]

Signed-off-by: Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
Signed-off-by: Frederic Weisbecker <fweisbec@xxxxxxxxx>
---
include/linux/ftrace.h | 3 +
kernel/trace/Kconfig | 6 +++
kernel/trace/Makefile | 1 +
kernel/trace/trace.c | 56 +++++++++++++++++++++++++++
kernel/trace/trace.h | 12 ++++++
kernel/trace/trace_bprintk.c | 87 ++++++++++++++++++++++++++++++++++++++++++
kernel/trace/trace_output.c | 76 ++++++++++++++++++++++++++++++++++++
7 files changed, 241 insertions(+), 0 deletions(-)
create mode 100644 kernel/trace/trace_bprintk.c

diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 1f69ac7..d663225 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -222,6 +222,9 @@ extern int ftrace_make_nop(struct module *mod,
*/
extern int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr);

+#ifdef CONFIG_TRACE_BPRINTK
+extern int trace_vbprintk(unsigned long ip, const char *fmt, va_list args);
+#endif

/* May be defined in arch */
extern int ftrace_arch_read_dyn_info(char *buf, int size);
diff --git a/kernel/trace/Kconfig b/kernel/trace/Kconfig
index 02bc5d6..2e84fc7 100644
--- a/kernel/trace/Kconfig
+++ b/kernel/trace/Kconfig
@@ -86,6 +86,12 @@ config FUNCTION_GRAPH_TRACER
This is done by setting the current return address on the current
task structure into a stack of calls.

+config TRACE_BPRINTK
+ bool "Binary printk for tracing"
+ default y
+ depends on TRACING
+ select BINARY_PRINTF
+
config IRQSOFF_TRACER
bool "Interrupts-off Latency Tracer"
default n
diff --git a/kernel/trace/Makefile b/kernel/trace/Makefile
index c931fe0..dba6157 100644
--- a/kernel/trace/Makefile
+++ b/kernel/trace/Makefile
@@ -22,6 +22,7 @@ obj-$(CONFIG_TRACING) += trace.o
obj-$(CONFIG_TRACING) += trace_clock.o
obj-$(CONFIG_TRACING) += trace_output.o
obj-$(CONFIG_TRACING) += trace_stat.o
+obj-$(CONFIG_TRACE_BPRINTK) += trace_bprintk.o
obj-$(CONFIG_CONTEXT_SWITCH_TRACER) += trace_sched_switch.o
obj-$(CONFIG_SYSPROF_TRACER) += trace_sysprof.o
obj-$(CONFIG_FUNCTION_TRACER) += trace_functions.o
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index c8abbb0..439ccdf 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3827,6 +3827,62 @@ int __ftrace_vprintk(unsigned long ip, const char *fmt, va_list ap)
}
EXPORT_SYMBOL_GPL(__ftrace_vprintk);

+/**
+ * trace_vbprintk - write binary msg to tracing buffer
+ *
+ * Caller must insure @fmt are valid when msg is in tracing buffer.
+ */
+int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
+{
+ static DEFINE_SPINLOCK(trace_buf_lock);
+ static u32 trace_buf[TRACE_BUF_SIZE];
+
+ struct ring_buffer_event *event;
+ struct trace_array *tr = &global_trace;
+ struct trace_array_cpu *data;
+ struct bprintk_entry *entry;
+ unsigned long flags;
+ int resched;
+ int cpu, len = 0, size, pc;
+
+ if (tracing_disabled || !trace_bprintk_enable)
+ return 0;
+
+ pc = preempt_count();
+ resched = ftrace_preempt_disable();
+ cpu = raw_smp_processor_id();
+ data = tr->data[cpu];
+
+ if (unlikely(atomic_read(&data->disabled)))
+ goto out;
+
+ spin_lock_irqsave(&trace_buf_lock, flags);
+ len = vbin_printf(trace_buf, TRACE_BUF_SIZE, fmt, args);
+
+ if (len > TRACE_BUF_SIZE || len < 0)
+ goto out_unlock;
+
+ size = sizeof(*entry) + sizeof(u32) * len;
+ event = trace_buffer_lock_reserve(tr, TRACE_BPRINTK, size, flags, pc);
+ if (!event)
+ goto out_unlock;
+ entry = ring_buffer_event_data(event);
+ entry->ip = ip;
+ entry->fmt = fmt;
+
+ memcpy(entry->buf, trace_buf, sizeof(u32) * len);
+ ring_buffer_unlock_commit(tr->buffer, event);
+
+out_unlock:
+ spin_unlock_irqrestore(&trace_buf_lock, flags);
+
+out:
+ ftrace_preempt_enable(resched);
+
+ return len;
+}
+EXPORT_SYMBOL_GPL(trace_vbprintk);
+
static int trace_panic_handler(struct notifier_block *this,
unsigned long event, void *unused)
{
diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h
index 561bb5c..0988c14 100644
--- a/kernel/trace/trace.h
+++ b/kernel/trace/trace.h
@@ -20,6 +20,7 @@ enum trace_type {
TRACE_WAKE,
TRACE_STACK,
TRACE_PRINT,
+ TRACE_BPRINTK,
TRACE_SPECIAL,
TRACE_MMIO_RW,
TRACE_MMIO_MAP,
@@ -124,6 +125,16 @@ struct print_entry {
char buf[];
};

+struct bprintk_entry {
+ struct trace_entry ent;
+ unsigned long ip;
+ const char *fmt;
+ u32 buf[];
+};
+#ifdef CONFIG_TRACE_BPRINTK
+extern int trace_bprintk_enable;
+#endif
+
#define TRACE_OLD_SIZE 88

struct trace_field_cont {
@@ -285,6 +296,7 @@ extern void __ftrace_bad_type(void);
IF_ASSIGN(var, ent, struct stack_entry, TRACE_STACK); \
IF_ASSIGN(var, ent, struct userstack_entry, TRACE_USER_STACK);\
IF_ASSIGN(var, ent, struct print_entry, TRACE_PRINT); \
+ IF_ASSIGN(var, ent, struct bprintk_entry, TRACE_BPRINTK);\
IF_ASSIGN(var, ent, struct special_entry, 0); \
IF_ASSIGN(var, ent, struct trace_mmiotrace_rw, \
TRACE_MMIO_RW); \
diff --git a/kernel/trace/trace_bprintk.c b/kernel/trace/trace_bprintk.c
new file mode 100644
index 0000000..1f8e532
--- /dev/null
+++ b/kernel/trace/trace_bprintk.c
@@ -0,0 +1,87 @@
+/*
+ * trace binary printk
+ *
+ * Copyright (C) 2008 Lai Jiangshan <laijs@xxxxxxxxxxxxxx>
+ *
+ */
+#include <linux/kernel.h>
+#include <linux/ftrace.h>
+#include <linux/string.h>
+#include <linux/ctype.h>
+#include <linux/list.h>
+#include <linux/mutex.h>
+#include <linux/slab.h>
+#include <linux/module.h>
+#include <linux/seq_file.h>
+#include <linux/fs.h>
+#include <linux/marker.h>
+#include <linux/uaccess.h>
+
+#include "trace.h"
+
+/* binary printk basic */
+static DEFINE_MUTEX(btrace_mutex);
+static int btrace_metadata_count;
+
+static inline void lock_btrace(void)
+{
+ mutex_lock(&btrace_mutex);
+}
+
+static inline void unlock_btrace(void)
+{
+ mutex_unlock(&btrace_mutex);
+}
+
+static void get_btrace_metadata(void)
+{
+ lock_btrace();
+ btrace_metadata_count++;
+ unlock_btrace();
+}
+
+static void put_btrace_metadata(void)
+{
+ lock_btrace();
+ btrace_metadata_count--;
+ unlock_btrace();
+}
+
+/* events tracer */
+int trace_bprintk_enable;
+
+static void start_bprintk_trace(struct trace_array *tr)
+{
+ get_btrace_metadata();
+ tracing_reset_online_cpus(tr);
+ trace_bprintk_enable = 1;
+}
+
+static void stop_bprintk_trace(struct trace_array *tr)
+{
+ trace_bprintk_enable = 0;
+ tracing_reset_online_cpus(tr);
+ put_btrace_metadata();
+}
+
+static int init_bprintk_trace(struct trace_array *tr)
+{
+ start_bprintk_trace(tr);
+ return 0;
+}
+
+static struct tracer bprintk_trace __read_mostly =
+{
+ .name = "events",
+ .init = init_bprintk_trace,
+ .reset = stop_bprintk_trace,
+ .start = start_bprintk_trace,
+ .stop = stop_bprintk_trace,
+};
+
+static __init int init_bprintk(void)
+{
+ return register_tracer(&bprintk_trace);
+}
+
+device_initcall(init_bprintk);
diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c
index 9fc8150..7897b5d 100644
--- a/kernel/trace/trace_output.c
+++ b/kernel/trace/trace_output.c
@@ -53,6 +53,26 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...)
return len;
}

+static int
+trace_seq_bprintf(struct trace_seq *s, const char *fmt, const u32 *binary)
+{
+ int len = (PAGE_SIZE - 1) - s->len;
+ int ret;
+
+ if (!len)
+ return 0;
+
+ ret = bstr_printf(s->buffer + s->len, len, fmt, binary);
+
+ /* If we can't write it all, don't bother writing anything */
+ if (ret >= len)
+ return 0;
+
+ s->len += ret;
+
+ return len;
+}
+
/**
* trace_seq_puts - trace sequence printing of simple string
* @s: trace sequence descriptor
@@ -887,6 +907,61 @@ static struct trace_event trace_print_event = {
.raw = trace_print_raw,
};

+/* TRACE_BPRINTK */
+static enum print_line_t
+trace_bprintk_print(struct trace_iterator *iter, int flags)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct bprintk_entry *field;
+
+ trace_assign_type(field, entry);
+
+ if (!seq_print_ip_sym(s, field->ip, flags))
+ goto partial;
+
+ if (!trace_seq_puts(s, ": "))
+ goto partial;
+
+ if (!trace_seq_bprintf(s, field->fmt, field->buf))
+ goto partial;
+
+ return TRACE_TYPE_HANDLED;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static enum print_line_t
+trace_bprintk_raw(struct trace_iterator *iter, int flags)
+{
+ struct trace_entry *entry = iter->ent;
+ struct trace_seq *s = &iter->seq;
+ struct bprintk_entry *field;
+
+ trace_assign_type(field, entry);
+
+ if (!trace_seq_printf(s, ": %lx : ", field->ip))
+ goto partial;
+
+ if (!trace_seq_bprintf(s, field->fmt, field->buf))
+ goto partial;
+
+ return TRACE_TYPE_HANDLED;
+
+ partial:
+ return TRACE_TYPE_PARTIAL_LINE;
+}
+
+static struct trace_event trace_bprintk_event = {
+ .type = TRACE_BPRINTK,
+ .trace = trace_bprintk_print,
+ .latency_trace = trace_bprintk_print,
+ .raw = trace_bprintk_raw,
+ .hex = trace_nop_print,
+ .binary = trace_nop_print,
+};
+
static struct trace_event *events[] __initdata = {
&trace_fn_event,
&trace_ctx_event,
@@ -895,6 +970,7 @@ static struct trace_event *events[] __initdata = {
&trace_stack_event,
&trace_user_stack_event,
&trace_print_event,
+ &trace_bprintk_event,
NULL
};

--
1.6.1


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/