Re: perf overlapping maps...

From: David Miller
Date: Mon Oct 22 2018 - 13:58:47 EST


From: Jiri Olsa <jolsa@xxxxxxxxxx>
Date: Mon, 22 Oct 2018 18:16:13 +0200

> I think the fix might actualy speed things up,
> but yes, there could be other report regressions

I was about to say the same thing, it could actually speed things up.
In the best case, less work is done (clone avoided, and overlapping
maps don't have to be handled). In the worst case, nothing changes.

Here is what I've been using, to give you an idea. There may be some
file offset fuzz in these patches.

diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 0cd42150f712..e5a442313f9d 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -308,6 +308,7 @@ static int perf_event__synthesize_fork(struct perf_tool *tool,
event->fork.pid = tgid;
event->fork.tid = pid;
event->fork.header.type = PERF_RECORD_FORK;
+ event->fork.header.misc = PERF_RECORD_MISC_COMM_EXEC;

event->fork.header.size = (sizeof(event->fork) + machine->id_hdr_size);

diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c
index 111ae858cbcb..dc06f1fc2ed5 100644
--- a/tools/perf/util/machine.c
+++ b/tools/perf/util/machine.c
@@ -1708,5 +1720,6 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
struct thread *parent = machine__findnew_thread(machine,
event->fork.ppid,
event->fork.ptid);
+ int do_maps_clone = 1;
int err = 0;

@@ -1737,8 +1754,11 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event
thread = machine__findnew_thread(machine, event->fork.pid,
event->fork.tid);

+ if (event->fork.header.misc & PERF_RECORD_MISC_COMM_EXEC)
+ do_maps_clone = 0;
+
if (thread == NULL || parent == NULL ||
- thread__fork(thread, parent, sample->time) < 0) {
+ thread__fork(thread, parent, sample->time, do_maps_clone) < 0) {
dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n");
err = -1;
}
diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c
index 2048d393ece6..7f2858edf221 100644
--- a/tools/perf/util/thread.c
+++ b/tools/perf/util/thread.c
@@ -330,7 +330,8 @@ static int thread__prepare_access(struct thread *thread)
}

static int thread__clone_map_groups(struct thread *thread,
- struct thread *parent)
+ struct thread *parent,
+ int do_maps_clone)
{
/* This is new thread, we share map groups for process. */
if (thread->pid_ == parent->pid_)
@@ -341,15 +342,14 @@ static int thread__clone_map_groups(struct thread *thread,
thread->pid_, thread->tid, parent->pid_, parent->tid);
return 0;
}
-
/* But this one is new process, copy maps. */
- if (map_groups__clone(thread, parent->mg) < 0)
+ if (do_maps_clone &&
+ map_groups__clone(thread, parent->mg) < 0)
return -ENOMEM;
-
return 0;
}

-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, int do_maps_clone)
{
if (parent->comm_set) {
const char *comm = thread__comm_str(parent);
@@ -362,7 +362,7 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp)
}

thread->ppid = parent->tid;
- return thread__clone_map_groups(thread, parent);
+ return thread__clone_map_groups(thread, parent, do_maps_clone);
}

void thread__find_cpumode_addr_location(struct thread *thread, u64 addr,
diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h
index 07606aa6998d..8e4ca1ede01f 100644
--- a/tools/perf/util/thread.h
+++ b/tools/perf/util/thread.h
@@ -87,7 +87,7 @@ struct comm *thread__comm(const struct thread *thread);
struct comm *thread__exec_comm(const struct thread *thread);
const char *thread__comm_str(const struct thread *thread);
int thread__insert_map(struct thread *thread, struct map *map);
-int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp);
+int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp, int do_maps_clone);
size_t thread__fprintf(struct thread *thread, FILE *fp);

struct thread *thread__main_thread(struct machine *machine, struct thread *thread);