Re: [PATCH] sched: Provide iowait counters

From: Steven Rostedt
Date: Mon Jul 20 2009 - 16:26:34 EST



On Mon, 20 Jul 2009, Peter Zijlstra wrote:
> >
> > How would the TRACE_EVENT look exactly? Would there be anything different
> > in the trace point location itself?
>
> Something like:
>
> TRACE_EVENT(sched_iowait,
>
> TP_PROTO(struct task_struct *p, u64 time),
>
> TP_ARGS(p, time),
>
> TP_STRUCT__entry(
> __field(pid_t, pid )
> __field(u64, time)
> ),
>
> TP_fast_assign(
> __entry->pid = p->pid;
> __entry->time = time;
>
> __perf_count(time);
> ),
>
> TP_printk("task %d waited for IO for %Lu ns",
> __entry->pid, __entry->time)
> );
>
> Something like the below, except that its probably borken in interesting
> ways..
>
> ---
> include/trace/ftrace.h | 19 ++++++++++++++-----
> kernel/perf_counter.c | 6 +++---
> 2 files changed, 17 insertions(+), 8 deletions(-)
>
> diff --git a/include/trace/ftrace.h b/include/trace/ftrace.h
> index 1867553..7e550a2 100644
> --- a/include/trace/ftrace.h
> +++ b/include/trace/ftrace.h
> @@ -447,11 +447,20 @@ static inline int ftrace_get_offsets_##call( \
> #define TP_FMT(fmt, args...) fmt "\n", ##args
>
> #ifdef CONFIG_EVENT_PROFILE
> -#define _TRACE_PROFILE(call, proto, args) \
> +
> +#undef __perf_addr
> +#define __perf_addr(a) addr = (a)
> +
> +#undef __perf_count
> +#define __perf_count(c) count = (c)
> +
> +#define _TRACE_PROFILE(call, proto, args, assign) \
> static void ftrace_profile_##call(proto) \
> { \
> - extern void perf_tpcounter_event(int); \
> - perf_tpcounter_event(event_##call.id); \
> + extern void perf_tpcounter_event(int, u64, u64); \
> + u64 addr = 0, count = 1; \
> + { assign; } \
> + perf_tpcounter_event(event_##call.id, addr, count); \

The problem here is that the assign also will do the :

TP_fast_assign(
__entry->pid = p->pid;
__entry->time = time;

part, thus you will probably get errors in processing the __entry part.

What about doing instead:

TRACE_EVENT(sched_iowait,

TP_PROTO(struct task_struct *p, u64 time),

TP_ARGS(p, time),

TP_STRUCT__entry(
__field(pid_t, pid )
__field(u64, time)
),

TP_fast_assign(
__entry->pid = p->pid;
__entry->time = time;

) __perf_count(time),

TP_printk("task %d waited for IO for %Lu ns",
__entry->pid, __entry->time)
);

Then we could simply do:

#undef __perf_count
#define __perf_count(a)

[ do all the ftrace event work ]

#undef TP_fast_assign
#define TP_fast_assign(a...)

#undef __perf_count
#define __perf_count(c) count = (c);


-- Steve


> } \
> \
> static int ftrace_profile_enable_##call(struct ftrace_event_call *event_call) \
> @@ -476,7 +485,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
> .profile_disable = ftrace_profile_disable_##call,
>
> #else
> -#define _TRACE_PROFILE(call, proto, args)
> +#define _TRACE_PROFILE(call, proto, args, assign)
> #define _TRACE_PROFILE_INIT(call)
> #endif
>
> @@ -502,7 +511,7 @@ static void ftrace_profile_disable_##call(struct ftrace_event_call *event_call)\
>
> #undef TRACE_EVENT
> #define TRACE_EVENT(call, proto, args, tstruct, assign, print) \
> -_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args)) \
> +_TRACE_PROFILE(call, PARAMS(proto), PARAMS(args), PARAMS(assign)) \
> \
> static struct ftrace_event_call event_##call; \
> \
> diff --git a/kernel/perf_counter.c b/kernel/perf_counter.c
> index 5498890..879a6ce 100644
> --- a/kernel/perf_counter.c
> +++ b/kernel/perf_counter.c
> @@ -3680,17 +3680,17 @@ static const struct pmu perf_ops_task_clock = {
> };
>
> #ifdef CONFIG_EVENT_PROFILE
> -void perf_tpcounter_event(int event_id)
> +void perf_tpcounter_event(int event_id, u64 addr, u64 count)
> {
> struct perf_sample_data data = {
> .regs = get_irq_regs(),
> - .addr = 0,
> + .addr = addr,
> };
>
> if (!data.regs)
> data.regs = task_pt_regs(current);
>
> - do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, 1, 1, &data);
> + do_perf_swcounter_event(PERF_TYPE_TRACEPOINT, event_id, count, 1, &data);
> }
> EXPORT_SYMBOL_GPL(perf_tpcounter_event);
>
>
>
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/