[PATCH 13/14] tracing/filter: Swap entire filter of events

From: Steven Rostedt
Date: Mon Feb 07 2011 - 21:00:24 EST


From: Steven Rostedt <srostedt@xxxxxxxxxx>

When creating a new filter, instead of allocating the filter to the
event call first and then processing the filter, it is easier to
process a temporary filter and then just swap it with the call filter.
By doing this, it simplifies the code.

A filter is allocated and processed, when it is done, it is
swapped with the call filter, synchronize_sched() is called to make
sure all callers are done with the old filter (filters are called
with premption disabled), and then the old filter is freed.

Cc: Tom Zanussi <tzanussi@xxxxxxxxx>
Signed-off-by: Steven Rostedt <rostedt@xxxxxxxxxxx>
---
kernel/trace/trace_events_filter.c | 251 +++++++++++++++++++++---------------
1 files changed, 146 insertions(+), 105 deletions(-)

diff --git a/kernel/trace/trace_events_filter.c b/kernel/trace/trace_events_filter.c
index 2403ce5..f5d335d 100644
--- a/kernel/trace/trace_events_filter.c
+++ b/kernel/trace/trace_events_filter.c
@@ -425,10 +425,15 @@ int filter_match_preds(struct event_filter *filter, void *rec)
struct filter_pred *preds;
struct filter_pred *pred;
struct filter_pred *root;
- int n_preds = ACCESS_ONCE(filter->n_preds);
+ int n_preds;
int done = 0;

/* no filter is considered a match */
+ if (!filter)
+ return 1;
+
+ n_preds = filter->n_preds;
+
if (!n_preds)
return 1;

@@ -509,6 +514,9 @@ static void parse_error(struct filter_parse_state *ps, int err, int pos)

static void remove_filter_string(struct event_filter *filter)
{
+ if (!filter)
+ return;
+
kfree(filter->filter_string);
filter->filter_string = NULL;
}
@@ -568,9 +576,10 @@ static void append_filter_err(struct filter_parse_state *ps,

void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
{
- struct event_filter *filter = call->filter;
+ struct event_filter *filter;

mutex_lock(&event_mutex);
+ filter = call->filter;
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
@@ -581,9 +590,10 @@ void print_event_filter(struct ftrace_event_call *call, struct trace_seq *s)
void print_subsystem_event_filter(struct event_subsystem *system,
struct trace_seq *s)
{
- struct event_filter *filter = system->filter;
+ struct event_filter *filter;

mutex_lock(&event_mutex);
+ filter = system->filter;
if (filter && filter->filter_string)
trace_seq_printf(s, "%s\n", filter->filter_string);
else
@@ -745,26 +755,9 @@ static void __free_preds(struct event_filter *filter)
filter->n_preds = 0;
}

-static void reset_preds(struct event_filter *filter)
-{
- int n_preds = filter->n_preds;
- int i;
-
- filter->n_preds = 0;
- filter->root = NULL;
- if (!filter->preds)
- return;
-
- for (i = 0; i < n_preds; i++)
- filter->preds[i].fn = filter_pred_none;
-}
-
-static void filter_disable_preds(struct ftrace_event_call *call)
+static void filter_disable(struct ftrace_event_call *call)
{
- struct event_filter *filter = call->filter;
-
call->flags &= ~TRACE_EVENT_FL_FILTERED;
- reset_preds(filter);
}

static void __free_filter(struct event_filter *filter)
@@ -777,11 +770,16 @@ static void __free_filter(struct event_filter *filter)
kfree(filter);
}

+/*
+ * Called when destroying the ftrace_event_call.
+ * The call is being freed, so we do not need to worry about
+ * the call being currently used. This is for module code removing
+ * the tracepoints from within it.
+ */
void destroy_preds(struct ftrace_event_call *call)
{
__free_filter(call->filter);
call->filter = NULL;
- call->flags &= ~TRACE_EVENT_FL_FILTERED;
}

static struct event_filter *__alloc_filter(void)
@@ -789,11 +787,6 @@ static struct event_filter *__alloc_filter(void)
struct event_filter *filter;

filter = kzalloc(sizeof(*filter), GFP_KERNEL);
- if (!filter)
- return ERR_PTR(-ENOMEM);
-
- filter->n_preds = 0;
-
return filter;
}

@@ -838,46 +831,28 @@ static int __alloc_preds(struct event_filter *filter, int n_preds)
return 0;
}

-static int init_filter(struct ftrace_event_call *call)
-{
- if (call->filter)
- return 0;
-
- call->flags &= ~TRACE_EVENT_FL_FILTERED;
- call->filter = __alloc_filter();
- if (IS_ERR(call->filter))
- return PTR_ERR(call->filter);
-
- return 0;
-}
-
-static int init_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_preds(struct event_subsystem *system)
{
struct ftrace_event_call *call;
- int err;

list_for_each_entry(call, &ftrace_events, list) {
if (strcmp(call->class->system, system->name) != 0)
continue;

- err = init_filter(call);
- if (err)
- return err;
+ filter_disable(call);
+ remove_filter_string(call->filter);
}
-
- return 0;
}

-static void filter_free_subsystem_preds(struct event_subsystem *system)
+static void filter_free_subsystem_filters(struct event_subsystem *system)
{
struct ftrace_event_call *call;

list_for_each_entry(call, &ftrace_events, list) {
if (strcmp(call->class->system, system->name) != 0)
continue;
-
- filter_disable_preds(call);
- remove_filter_string(call->filter);
+ __free_filter(call->filter);
+ call->filter = NULL;
}
}

@@ -1743,88 +1718,129 @@ fail:
return err;
}

+struct filter_list {
+ struct list_head list;
+ struct event_filter *filter;
+};
+
static int replace_system_preds(struct event_subsystem *system,
struct filter_parse_state *ps,
char *filter_string)
{
struct ftrace_event_call *call;
+ struct filter_list *filter_item;
+ struct filter_list *tmp;
+ LIST_HEAD(filter_list);
bool fail = true;
int err;

list_for_each_entry(call, &ftrace_events, list) {
- struct event_filter *filter = call->filter;

if (strcmp(call->class->system, system->name) != 0)
continue;

- /* try to see if the filter can be applied */
- err = replace_preds(call, filter, ps, filter_string, true);
+ /*
+ * Try to see if the filter can be applied
+ * (filter arg is ignored on dry_run)
+ */
+ err = replace_preds(call, NULL, ps, filter_string, true);
if (err)
goto fail;
}

- /* set all filter pred counts to zero */
list_for_each_entry(call, &ftrace_events, list) {
- struct event_filter *filter = call->filter;
+ struct event_filter *filter;

if (strcmp(call->class->system, system->name) != 0)
continue;

- reset_preds(filter);
- }
+ filter_item = kzalloc(sizeof(*filter_item), GFP_KERNEL);
+ if (!filter_item)
+ goto fail_mem;

- /*
- * Since some of the preds may be used under preemption
- * we need to wait for them to finish before we may
- * reallocate them.
- */
- synchronize_sched();
+ list_add_tail(&filter_item->list, &filter_list);

- list_for_each_entry(call, &ftrace_events, list) {
- struct event_filter *filter = call->filter;
+ filter_item->filter = __alloc_filter();
+ if (!filter_item->filter)
+ goto fail_mem;
+ filter = filter_item->filter;

- if (strcmp(call->class->system, system->name) != 0)
- continue;
+ /* Can only fail on no memory */
+ err = replace_filter_string(filter, filter_string);
+ if (err)
+ goto fail_mem;

- /* really apply the filter */
- filter_disable_preds(call);
err = replace_preds(call, filter, ps, filter_string, false);
- if (err)
- filter_disable_preds(call);
- else {
+ if (err) {
+ filter_disable(call);
+ parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
+ append_filter_err(ps, filter);
+ } else
call->flags |= TRACE_EVENT_FL_FILTERED;
- replace_filter_string(filter, filter_string);
- }
+ /*
+ * Regardless of if this returned an error, we still
+ * replace the filter for the call.
+ */
+ filter = call->filter;
+ call->filter = filter_item->filter;
+ filter_item->filter = filter;
+
fail = false;
}

if (fail)
goto fail;

+ /*
+ * The calls can still be using the old filters.
+ * Do a synchronize_sched() to ensure all calls are
+ * done with them before we free them.
+ */
+ synchronize_sched();
+ list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
+ __free_filter(filter_item->filter);
+ list_del(&filter_item->list);
+ kfree(filter_item);
+ }
return 0;
fail:
+ /* No call succeeded */
+ list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
+ list_del(&filter_item->list);
+ kfree(filter_item);
+ }
parse_error(ps, FILT_ERR_BAD_SUBSYS_FILTER, 0);
return -EINVAL;
+ fail_mem:
+ /* If any call succeeded, we still need to sync */
+ if (!fail)
+ synchronize_sched();
+ list_for_each_entry_safe(filter_item, tmp, &filter_list, list) {
+ __free_filter(filter_item->filter);
+ list_del(&filter_item->list);
+ kfree(filter_item);
+ }
+ return -ENOMEM;
}

int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
{
- int err;
struct filter_parse_state *ps;
+ struct event_filter *filter;
+ struct event_filter *tmp;
+ int err = 0;

mutex_lock(&event_mutex);

- err = init_filter(call);
- if (err)
- goto out_unlock;
-
if (!strcmp(strstrip(filter_string), "0")) {
- filter_disable_preds(call);
- reset_preds(call->filter);
+ filter_disable(call);
+ filter = call->filter;
+ if (!filter)
+ goto out_unlock;
+ call->filter = NULL;
/* Make sure the filter is not being used */
synchronize_sched();
- __free_preds(call->filter);
- remove_filter_string(call->filter);
+ __free_filter(filter);
goto out_unlock;
}

@@ -1833,29 +1849,41 @@ int apply_event_filter(struct ftrace_event_call *call, char *filter_string)
if (!ps)
goto out_unlock;

- filter_disable_preds(call);
- replace_filter_string(call->filter, filter_string);
+ filter = __alloc_filter();
+ if (!filter) {
+ kfree(ps);
+ goto out_unlock;
+ }
+
+ replace_filter_string(filter, filter_string);

parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps);
if (err) {
- append_filter_err(ps, call->filter);
+ append_filter_err(ps, filter);
goto out;
}

- /*
- * Make sure all the pred counts are zero so that
- * no task is using it when we reallocate the preds array.
- */
- reset_preds(call->filter);
- synchronize_sched();
-
- err = replace_preds(call, call->filter, ps, filter_string, false);
- if (err)
- append_filter_err(ps, call->filter);
- else
+ err = replace_preds(call, filter, ps, filter_string, false);
+ if (err) {
+ filter_disable(call);
+ append_filter_err(ps, filter);
+ } else
call->flags |= TRACE_EVENT_FL_FILTERED;
out:
+ /*
+ * Always swap the call filter with the new filter
+ * even if there was an error. If there was an error
+ * in the filter, we disable the filter and show the error
+ * string
+ */
+ tmp = call->filter;
+ call->filter = filter;
+ if (tmp) {
+ /* Make sure the call is done with the filter */
+ synchronize_sched();
+ __free_filter(tmp);
+ }
filter_opstack_clear(ps);
postfix_clear(ps);
kfree(ps);
@@ -1868,18 +1896,21 @@ out_unlock:
int apply_subsystem_event_filter(struct event_subsystem *system,
char *filter_string)
{
- int err;
struct filter_parse_state *ps;
+ struct event_filter *filter;
+ int err = 0;

mutex_lock(&event_mutex);

- err = init_subsystem_preds(system);
- if (err)
- goto out_unlock;
-
if (!strcmp(strstrip(filter_string), "0")) {
filter_free_subsystem_preds(system);
remove_filter_string(system->filter);
+ filter = system->filter;
+ system->filter = NULL;
+ /* Ensure all filters are no longer used */
+ synchronize_sched();
+ filter_free_subsystem_filters(system);
+ __free_filter(filter);
goto out_unlock;
}

@@ -1888,7 +1919,17 @@ int apply_subsystem_event_filter(struct event_subsystem *system,
if (!ps)
goto out_unlock;

- replace_filter_string(system->filter, filter_string);
+ filter = __alloc_filter();
+ if (!filter)
+ goto out;
+
+ replace_filter_string(filter, filter_string);
+ /*
+ * No event actually uses the system filter
+ * we can free it without synchronize_sched().
+ */
+ __free_filter(system->filter);
+ system->filter = filter;

parse_init(ps, filter_ops, filter_string);
err = filter_parse(ps);
@@ -1945,7 +1986,7 @@ int ftrace_profile_set_filter(struct perf_event *event, int event_id,
goto out_unlock;

filter = __alloc_filter();
- if (IS_ERR(filter)) {
+ if (!filter) {
err = PTR_ERR(filter);
goto out_unlock;
}
--
1.7.2.3


--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/