Re: [PATCH v7 3/3] binder: add transaction latency tracer

From: Todd Kjos
Date: Tue Aug 04 2020 - 11:29:12 EST


On Tue, Aug 4, 2020 at 6:59 AM Frankie Chang <Frankie.Chang@xxxxxxxxxxxx> wrote:
>
> From: "Frankie.Chang" <Frankie.Chang@xxxxxxxxxxxx>
>
> Record start/end timestamp for binder transaction.
> When transaction is completed or transaction is free,
> it would be checked if transaction latency over threshold
> (default 2 sec), if yes, printing related information for tracing.
>
> /* Implement details */
> - Add latency tracer module to monitor transaction
> by attaching to new tracepoints introduced
> when transactions are allocated and freed.
> The trace_binder_txn_latency_free would not be enabled
> by default. Monitoring which transaction is too slow to
> cause some of exceptions is important. So we hook the
> tracepoint to call the monitor function.
>
> - Since some of modules would trigger timeout NE
> if their binder transaction don't finish in time,
> such as audio timeout (5 sec), even BT command
> timeout (2 sec), etc.
> Therefore, setting the timeout threshold as default
> 2 seconds could be helpful to debug.
> But this timeout threshold is configurable, to let
> all users determine the more suitable threshold.
>
> - The reason why printing the related information to
> kernel information log but not trace buffer is that
> some abnormal transactions may be pending for a long
> time ago, they could not be recorded due to buffer
> limited.
>
> Signed-off-by: Frankie.Chang <Frankie.Chang@xxxxxxxxxxxx>

Acked-by: Todd Kjos <tkjos@xxxxxxxxxx>

> ---
> drivers/android/Kconfig | 8 +++
> drivers/android/Makefile | 1 +
> drivers/android/binder.c | 2 +
> drivers/android/binder_internal.h | 13 ++++
> drivers/android/binder_latency_tracer.c | 112 +++++++++++++++++++++++++++++++
> drivers/android/binder_trace.h | 26 ++++++-
> 6 files changed, 159 insertions(+), 3 deletions(-)
> create mode 100644 drivers/android/binder_latency_tracer.c
>
> diff --git a/drivers/android/Kconfig b/drivers/android/Kconfig
> index 6fdf2ab..91fff1e 100644
> --- a/drivers/android/Kconfig
> +++ b/drivers/android/Kconfig
> @@ -54,6 +54,14 @@ config ANDROID_BINDER_IPC_SELFTEST
> exhaustively with combinations of various buffer sizes and
> alignments.
>
> +config BINDER_TRANSACTION_LATENCY_TRACKING
> + tristate "Android Binder transaction tracking"
> + help
> + Used for track abnormal binder transaction which is over threshold,
> + when the transaction is done or be free, this transaction would be
> + checked whether it executed overtime.
> + If yes, printing out the detailed info.
> +
> endif # if ANDROID
>
> endmenu
> diff --git a/drivers/android/Makefile b/drivers/android/Makefile
> index c9d3d0c9..c2ffdb6 100644
> --- a/drivers/android/Makefile
> +++ b/drivers/android/Makefile
> @@ -4,3 +4,4 @@ ccflags-y += -I$(src) # needed for trace events
> obj-$(CONFIG_ANDROID_BINDERFS) += binderfs.o
> obj-$(CONFIG_ANDROID_BINDER_IPC) += binder.o binder_alloc.o
> obj-$(CONFIG_ANDROID_BINDER_IPC_SELFTEST) += binder_alloc_selftest.o
> +obj-$(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING) += binder_latency_tracer.o
> diff --git a/drivers/android/binder.c b/drivers/android/binder.c
> index 1bfadc2..22f7cc6 100644
> --- a/drivers/android/binder.c
> +++ b/drivers/android/binder.c
> @@ -2671,6 +2671,7 @@ static void binder_transaction(struct binder_proc *proc,
> return_error_line = __LINE__;
> goto err_alloc_t_failed;
> }
> + trace_binder_txn_latency_alloc(t);
> INIT_LIST_HEAD(&t->fd_fixups);
> binder_stats_created(BINDER_STAT_TRANSACTION);
> spin_lock_init(&t->lock);
> @@ -5159,6 +5160,7 @@ static void print_binder_transaction_ilocked(struct seq_file *m,
> to_proc ? to_proc->pid : 0,
> t->to_thread ? t->to_thread->pid : 0,
> t->code, t->flags, t->priority, t->need_reply);
> + trace_binder_txn_latency_info(m, t);
> spin_unlock(&t->lock);
>
> if (proc != to_proc) {
> diff --git a/drivers/android/binder_internal.h b/drivers/android/binder_internal.h
> index 5b65413..596db00 100644
> --- a/drivers/android/binder_internal.h
> +++ b/drivers/android/binder_internal.h
> @@ -13,6 +13,11 @@
> #include <linux/uidgid.h>
> #include "binder_alloc.h"
>
> +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> +#include <linux/rtc.h>
> +#include <linux/time.h>
> +#endif
> +
> struct binder_context {
> struct binder_node *binder_context_mgr_node;
> struct mutex context_mgr_node_lock;
> @@ -521,6 +526,14 @@ struct binder_transaction {
> * during thread teardown
> */
> spinlock_t lock;
> + /**
> + * @timestamp and @tv are used to record the time
> + * that the binder transaction startup
> + */
> +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> + struct timespec64 timestamp;
> + struct timeval tv;
> +#endif
> };
>
> /**
> diff --git a/drivers/android/binder_latency_tracer.c b/drivers/android/binder_latency_tracer.c
> new file mode 100644
> index 0000000..61ae75f
> --- /dev/null
> +++ b/drivers/android/binder_latency_tracer.c
> @@ -0,0 +1,112 @@
> +// SPDX-License-Identifier: GPL-2.0
> +/*
> + * Copyright (C) 2019 MediaTek Inc.
> + */
> +
> +#include <linux/module.h>
> +#include <uapi/linux/android/binder.h>
> +#include "binder_internal.h"
> +#include "binder_trace.h"
> +
> +/**
> + * The reason setting the binder_txn_latency_threshold to 2 sec
> + * is that most of timeout abort is greater or equal to 2 sec.
> + * Making it configurable to let all users determine which
> + * threshold is more suitable.
> + */
> +static uint32_t binder_txn_latency_threshold = 2;
> +module_param_named(threshold, binder_txn_latency_threshold,
> + uint, 0644);
> +
> +/*
> + * probe_binder_txn_latency_free - Output info of a delay transaction
> + * @t: pointer to the over-time transaction
> + */
> +void probe_binder_txn_latency_free(void *ignore, struct binder_transaction *t,
> + int from_proc, int from_thread,
> + int to_proc, int to_thread)
> +{
> + struct rtc_time tm;
> + struct timespec64 *startime;
> + struct timespec64 cur, sub_t;
> +
> + ktime_get_ts64(&cur);
> + startime = &t->timestamp;
> + sub_t = timespec64_sub(cur, *startime);
> +
> + /* if transaction time is over than binder_txn_latency_threshold (sec),
> + * show timeout warning log.
> + */
> + if (sub_t.tv_sec < binder_txn_latency_threshold)
> + return;
> +
> + rtc_time_to_tm(t->tv.tv_sec, &tm);
> +
> + pr_info_ratelimited("%d: from %d:%d to %d:%d",
> + t->debug_id, from_proc, from_thread,
> + to_proc, to_thread);
> +
> + pr_info_ratelimited(" total %u.%03ld s code %u start %lu.%03ld android %d-%02d-%02d %02d:%02d:%02d.%03lu\n",
> + (unsigned int)sub_t.tv_sec,
> + (sub_t.tv_nsec / NSEC_PER_MSEC),
> + t->code,
> + (unsigned long)startime->tv_sec,
> + (startime->tv_nsec / NSEC_PER_MSEC),
> + (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
> + tm.tm_hour, tm.tm_min, tm.tm_sec,
> + (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
> +}
> +
> +static void probe_binder_txn_latency_alloc(void *ignore,
> + struct binder_transaction *t)
> +{
> + struct timespec64 now;
> +
> + ktime_get_ts64(&t->timestamp);
> + ktime_get_real_ts64(&now);
> + t->tv.tv_sec = now.tv_sec;
> + t->tv.tv_sec -= (sys_tz.tz_minuteswest * 60);
> + t->tv.tv_usec = now.tv_nsec/1000;
> +}
> +
> +static void probe_binder_txn_latency_info(void *ignore, struct seq_file *m,
> + struct binder_transaction *t)
> +{
> + struct rtc_time tm;
> +
> + rtc_time_to_tm(t->tv.tv_sec, &tm);
> + seq_printf(m,
> + " start %lu.%06lu android %d-%02d-%02d %02d:%02d:%02d.%03lu",
> + (unsigned long)t->timestamp.tv_sec,
> + (t->timestamp.tv_nsec / NSEC_PER_USEC),
> + (tm.tm_year + 1900), (tm.tm_mon + 1), tm.tm_mday,
> + tm.tm_hour, tm.tm_min, tm.tm_sec,
> + (unsigned long)(t->tv.tv_usec / USEC_PER_MSEC));
> +}
> +
> +static int __init init_binder_latency_tracer(void)
> +{
> + register_trace_binder_txn_latency_free(
> + probe_binder_txn_latency_free, NULL);
> + register_trace_binder_txn_latency_alloc(
> + probe_binder_txn_latency_alloc, NULL);
> + register_trace_binder_txn_latency_info(
> + probe_binder_txn_latency_info, NULL);
> +
> + return 0;
> +}
> +
> +static void exit_binder_latency_tracer(void)
> +{
> + unregister_trace_binder_txn_latency_free(
> + probe_binder_txn_latency_free, NULL);
> + unregister_trace_binder_txn_latency_alloc(
> + probe_binder_txn_latency_alloc, NULL);
> + unregister_trace_binder_txn_latency_info(
> + probe_binder_txn_latency_info, NULL);
> +}
> +
> +module_init(init_binder_latency_tracer);
> +module_exit(exit_binder_latency_tracer);
> +
> +MODULE_LICENSE("GPL v2");
> diff --git a/drivers/android/binder_trace.h b/drivers/android/binder_trace.h
> index eb2c53c..be4fe4c 100644
> --- a/drivers/android/binder_trace.h
> +++ b/drivers/android/binder_trace.h
> @@ -95,6 +95,17 @@
> __entry->thread_todo)
> );
>
> +DECLARE_TRACE(binder_txn_latency_alloc,
> + TP_PROTO(struct binder_transaction *t),
> + TP_ARGS(t, e)
> +);
> +
> +DECLARE_TRACE(binder_txn_latency_info,
> + TP_PROTO(struct seq_file *m,
> + struct binder_transaction *t),
> + TP_ARGS(m, t)
> +);
> +
> TRACE_EVENT(binder_txn_latency_free,
> TP_PROTO(struct binder_transaction *t
> int from_proc, int from_thread
> @@ -108,6 +119,8 @@
> __field(int, to_thread)
> __field(unsigned int, code)
> __field(unsigned int, flags)
> + __field(unsigned long, start_sec)
> + __field(unsigned long, start_nsec)
> ),
> TP_fast_assign(
> __entry->debug_id = t->debug_id;
> @@ -117,11 +130,18 @@
> __entry->to_thread = to_thread;
> __entry->code = t->code;
> __entry->flags = t->flags;
> - ),
> - TP_printk("transaction=%d from %d:%d to %d:%d flags=0x%x code=0x%x",
> +#if IS_ENABLED(CONFIG_BINDER_TRANSACTION_LATENCY_TRACKING)
> + __entry->start_sec = t->timestamp.tv_sec;
> + __entry->start_nsec = t->timestamp.tv_nsec / NSEC_PER_MSEC;
> +#else
> + __entry->start_sec = 0;
> + __entry->start_nsec = 0;
> +#endif
> + ),
> + TP_printk("transaction=%d from %d:%d to %d:%d flags=0x%x code=0x%x start %lu.%03ld",
> __entry->debug_id, __entry->from_proc, __entry->from_thread,
> __entry->to_proc, __entry->to_thread, __entry->code,
> - __entry->flags)
> + __entry->flags, __entry->start_sec, __entry->start_nsec)
> );
>
> TRACE_EVENT(binder_transaction,
> --
> 1.7.9.5