[ 096/103] tracing: Failed to create system directory

From: Greg Kroah-Hartman
Date: Tue Jul 23 2013 - 18:31:42 EST


3.10-stable review patch. If anyone has any objections, please let me know.

------------------

From: Steven Rostedt <rostedt@xxxxxxxxxxx>

commit 6e94a780374ed31b280f939d4757e8d7858dff16 upstream.

Running the following:

# cd /sys/kernel/debug/tracing
# echo p:i do_sys_open > kprobe_events
# echo p:j schedule >> kprobe_events
# cat kprobe_events
p:kprobes/i do_sys_open
p:kprobes/j schedule
# echo p:i do_sys_open >> kprobe_events
# cat kprobe_events
p:kprobes/j schedule
p:kprobes/i do_sys_open
# ls /sys/kernel/debug/tracing/events/kprobes/
enable filter j

Notice that the 'i' is missing from the kprobes directory.

The console produces:

"Failed to create system directory kprobes"

This is because kprobes passes in a allocated name for the system
and the ftrace event subsystem saves off that name instead of creating
a duplicate for it. But the kprobes may free the system name making
the pointer to it invalid.

This bug was introduced by 92edca073c37 "tracing: Use direct field, type
and system names" which switched from using kstrdup() on the system name
in favor of just keeping apointer to it, as the internal ftrace event
system names are static and exist for the life of the computer being booted.

Instead of reverting back to duplicating system names again, we can use
core_kernel_data() to determine if the passed in name was allocated or
static. Then use the MSB of the ref_count to be a flag to keep track if
the name was allocated or not. Then we can still save from having to duplicate
strings that will always exist, but still copy the ones that may be freed.

Reported-by: "zhangwei(Jovi)" <jovi.zhangwei@xxxxxxxxxx>
Reported-by: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
Tested-by: Masami Hiramatsu <masami.hiramatsu.pt@xxxxxxxxxxx>
Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
Signed-off-by: Greg Kroah-Hartman <gregkh@xxxxxxxxxxxxxxxxxxx>

---
kernel/trace/trace_events.c | 41 +++++++++++++++++++++++++++++++++++------
1 file changed, 35 insertions(+), 6 deletions(-)

--- a/kernel/trace/trace_events.c
+++ b/kernel/trace/trace_events.c
@@ -41,6 +41,23 @@ static LIST_HEAD(ftrace_common_fields);
static struct kmem_cache *field_cachep;
static struct kmem_cache *file_cachep;

+#define SYSTEM_FL_FREE_NAME (1 << 31)
+
+static inline int system_refcount(struct event_subsystem *system)
+{
+ return system->ref_count & ~SYSTEM_FL_FREE_NAME;
+}
+
+static int system_refcount_inc(struct event_subsystem *system)
+{
+ return (system->ref_count++) & ~SYSTEM_FL_FREE_NAME;
+}
+
+static int system_refcount_dec(struct event_subsystem *system)
+{
+ return (--system->ref_count) & ~SYSTEM_FL_FREE_NAME;
+}
+
/* Double loops, do not use break, only goto's work */
#define do_for_each_event_file(tr, file) \
list_for_each_entry(tr, &ftrace_trace_arrays, list) { \
@@ -349,8 +366,8 @@ static void __put_system(struct event_su
{
struct event_filter *filter = system->filter;

- WARN_ON_ONCE(system->ref_count == 0);
- if (--system->ref_count)
+ WARN_ON_ONCE(system_refcount(system) == 0);
+ if (system_refcount_dec(system))
return;

list_del(&system->list);
@@ -359,13 +376,15 @@ static void __put_system(struct event_su
kfree(filter->filter_string);
kfree(filter);
}
+ if (system->ref_count & SYSTEM_FL_FREE_NAME)
+ kfree(system->name);
kfree(system);
}

static void __get_system(struct event_subsystem *system)
{
- WARN_ON_ONCE(system->ref_count == 0);
- system->ref_count++;
+ WARN_ON_ONCE(system_refcount(system) == 0);
+ system_refcount_inc(system);
}

static void __get_system_dir(struct ftrace_subsystem_dir *dir)
@@ -379,7 +398,7 @@ static void __put_system_dir(struct ftra
{
WARN_ON_ONCE(dir->ref_count == 0);
/* If the subsystem is about to be freed, the dir must be too */
- WARN_ON_ONCE(dir->subsystem->ref_count == 1 && dir->ref_count != 1);
+ WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);

__put_system(dir->subsystem);
if (!--dir->ref_count)
@@ -1279,7 +1298,15 @@ create_new_subsystem(const char *name)
return NULL;

system->ref_count = 1;
- system->name = name;
+
+ /* Only allocate if dynamic (kprobes and modules) */
+ if (!core_kernel_data((unsigned long)name)) {
+ system->ref_count |= SYSTEM_FL_FREE_NAME;
+ system->name = kstrdup(name, GFP_KERNEL);
+ if (!system->name)
+ goto out_free;
+ } else
+ system->name = name;

system->filter = NULL;

@@ -1292,6 +1319,8 @@ create_new_subsystem(const char *name)
return system;

out_free:
+ if (system->ref_count & SYSTEM_FL_FREE_NAME)
+ kfree(system->name);
kfree(system);
return NULL;
}


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/