Re: [Patch 11/12] ftrace plugin for kernel symbol tracing usingHWBreakpoint interfaces - v4

From: K.Prasad
Date: Tue May 12 2009 - 10:20:21 EST


On Tue, May 12, 2009 at 12:14:29AM +0200, Frederic Weisbecker wrote:
> On Mon, May 11, 2009 at 05:25:02PM +0530, K.Prasad wrote:
> > This patch adds an ftrace plugin to detect and profile memory access over kernel
> > variables. It uses HW Breakpoint interfaces to 'watch memory addresses.
> >
> > +void ksym_collect_stats(unsigned long hbp_hit_addr)
> > +{
> > + struct hlist_node *node;
> > + struct trace_ksym *entry;
> > +
> > + rcu_read_lock();
> > + hlist_for_each_entry_rcu(entry, node, &ksym_filter_head, ksym_hlist) {
> > + if ((entry->ksym_addr == hbp_hit_addr) &&
> > + (entry->counter <= MAX_UL_INT)) {
> > + entry->counter++;
> > + break;
> > + }
> > + }
> > + rcu_read_unlock();
>
>
>
> rcu looks a good idea to maintain your list.
>

True, and there weren't many choices either. The earlier implementations
with mutex/spin_lock turned out to be incorrect in their own way (while
mutexes cannot be used inside exception handler context, spinlocks led to
potential circular dependancy).

> > +static int process_new_ksym_entry(char *ksymname, int op, unsigned long addr)
> > +{
> > + struct trace_ksym *entry;
> > + int ret;
> > +
> > + if (ksym_filter_entry_count >= KSYM_TRACER_MAX) {
> > + printk(KERN_ERR "ksym_tracer: Maximum limit:(%d) reached. No"
> > + " new requests for tracing can be accepted now.\n",
> > + KSYM_TRACER_MAX);
> > + return -ENOSPC;
> > + }
> > +
> > + entry = kzalloc(sizeof(struct trace_ksym), GFP_KERNEL);
> > + if (!entry)
> > + return -ENOMEM;
> > +
> > + entry->ksym_hbp = kzalloc(sizeof(struct hw_breakpoint), GFP_KERNEL);
> > + if (!entry->ksym_hbp) {
> > + kfree(entry);
> > + return -ENOMEM;
> > + }
> > +
> > + entry->ksym_hbp->info.name = ksymname;
> > + entry->ksym_hbp->info.type = op;
> > + entry->ksym_addr = entry->ksym_hbp->info.address = addr;
> > +#ifdef CONFIG_X86
> > + entry->ksym_hbp->info.len = HW_BREAKPOINT_LEN_4;
> > +#endif
> > + entry->ksym_hbp->triggered = (void *)ksym_hbp_handler;
> > +
> > + ret = register_kernel_hw_breakpoint(entry->ksym_hbp);
> > + if (ret < 0) {
> > + printk(KERN_INFO "ksym_tracer request failed. Try again"
> > + " later!!\n");
> > + kfree(entry->ksym_hbp);
> > + kfree(entry);
> > + return -EAGAIN;
> > + }
> > + hlist_add_head_rcu(&(entry->ksym_hlist), &ksym_filter_head);
>
>
> And then ksym_tracer_mutex protect concurrent writers.
>
>

Yes, they synchronise read/write operations over the list pointed by
ksym_filter_head.

> > + ksym_filter_entry_count++;
> > +
> > + return 0;
> > +}
> > +
> > +static ssize_t ksym_trace_filter_read(struct file *filp, char __user *ubuf,
> > + size_t count, loff_t *ppos)
> > +{
> > + struct trace_ksym *entry;
> > + struct hlist_node *node;
> > + char buf[KSYM_FILTER_ENTRY_LEN * KSYM_TRACER_MAX];
> > + ssize_t ret, cnt = 0;
> > +
> > + mutex_lock(&ksym_tracer_mutex);
> > +
> > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
>
>
>
> And here you don't use the rcu version.
> I guess it's fine since you're protected by the writer lock...
>

I couldn't use RCU here because the simple_read_from_buffer() is
inatomic, and hence the mutex.

> > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt, "%s:",
> > + entry->ksym_hbp->info.name);
> > + if (entry->ksym_hbp->info.type == HW_BREAKPOINT_WRITE)
> > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > + "-w-\n");
> > + else if (entry->ksym_hbp->info.type == HW_BREAKPOINT_RW)
> > + cnt += snprintf(&buf[cnt], KSYM_FILTER_ENTRY_LEN - cnt,
> > + "rw-\n");
> > + }
> > + ret = simple_read_from_buffer(ubuf, count, ppos, buf, strlen(buf));
> > + mutex_unlock(&ksym_tracer_mutex);
> > +
> > + return ret;
> > +}
> > +
> > +static ssize_t ksym_trace_filter_write(struct file *file,
> > + const char __user *buffer,
> > + size_t count, loff_t *ppos)
> > +{
> > + struct trace_ksym *entry;
> > + struct hlist_node *node;
> > + char *input_string, *ksymname = NULL;
> > + unsigned long ksym_addr = 0;
> > + int ret, op, changed = 0;
> > +
> > + /* Ignore echo "" > ksym_trace_filter */
> > + if (count == 0)
> > + return 0;
> > +
> > + input_string = kzalloc(count, GFP_KERNEL);
> > + if (!input_string)
> > + return -ENOMEM;
> > +
> > + if (copy_from_user(input_string, buffer, count)) {
> > + kfree(input_string);
> > + return -EFAULT;
> > + }
> > +
> > + ret = op = parse_ksym_trace_str(input_string, &ksymname, &ksym_addr);
> > + if (ret < 0) {
> > + kfree(input_string);
> > + return ret;
> > + }
> > +
> > + mutex_lock(&ksym_tracer_mutex);
> > +
> > + ret = -EINVAL;
> > + hlist_for_each_entry(entry, node, &ksym_filter_head, ksym_hlist) {
>
>
>
> Same here, ok...
>
>
> > +static int ksym_trace_init(struct trace_array *tr)
> > +{
> > + int cpu, ret = 0;
> > +
> > + for_each_online_cpu(cpu)
> > + tracing_reset(tr, cpu);
> > +
> > + ksym_tracing_enabled = 1;
> > + ksym_trace_array = tr;
> > +
> > +#ifdef CONFIG_FTRACE_SELFTEST
> > + /* Check if we are re-entering self-test code during initialisation */
> > + if (ksym_selftest_dummy)
> > + goto ret_path;
> > +
> > + ksym_selftest_dummy = 0;
> > +
> > + /* Register the read-write tracing request */
> > + ret = process_new_ksym_entry(KSYM_SELFTEST_ENTRY, HW_BREAKPOINT_RW,
> > + (unsigned long)(&ksym_selftest_dummy));
> > +
> > + if (ret < 0) {
> > + printk(KERN_CONT "ksym_trace read-write startup test failed\n");
> > + goto ret_path;
> > + }
> > + /* Perform a read and a write operation over the dummy variable to
> > + * trigger the tracer
> > + */
> > + if (ksym_selftest_dummy == 0)
> > + ksym_selftest_dummy++;
> > +ret_path:
> > +#endif /* CONFIG_FTRACE_SELFTEST */
>
>
> It means that each time your tracer is selected, it will perform a selftest.
> I think we only need this selftest once during the boot.
> I would rather see that in the real selftest callback (trace_selftest_startup_kysm).
>

> > + if (ksym_selftest_dummy)
> > + goto ret_path;

The above check will help prevent a re-run of the test everytime init is
executed.

A part of the selftest was kept in trace_ksym.c (and hence in
ksym_trace_init()) in order to use functions local to this file, such as
process_new_ksym_entry().

> > +__init static int init_ksym_trace(void)
> > +{
> > + struct dentry *d_tracer;
> > + struct dentry *entry;
> > +
> > + d_tracer = tracing_init_dentry();
> > + ksym_filter_entry_count = 0;
> > +
> > + entry = debugfs_create_file("ksym_trace_filter", 0666, d_tracer,
>
>
>
> Still writeable for everyone?
>
> Thanks,
> Frederic.
>

Looks like I missed the change! Please find the next patchset to contain
a '644' permission mode.

Thanks for reviewing the code.

-- K.Prasad

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/