[PATCH 2/2] perf tools: Cope with sparsely-numbered CPUs

From: Paul Mackerras
Date: Tue Dec 15 2009 - 04:35:51 EST


For system-wide monitoring, the perf tools currently ask how many CPUs
are online, and then instantiate perf_events for CPUs 0 ... N-1. This
doesn't work correctly when CPUs are numbered sparsely. For example,
a four-core POWER6 in single-thread mode will have CPUs 0, 2, 4 and 6.
The perf tools will try to open counters on CPUs 0, 1, 2 and 3, and
either fail with an error message or silently ignore CPUs 4 and 6.

This fixes the problem by making perf stat, perf record and perf top
create counters for increasing CPU numbers until they have a counter
for each online CPU. If the attempt to create a counter on a given
CPU fails, we get an ENODEV error and we just move on to the next CPU.
To avoid an infinite loop in case the number of online CPUs gets
reduced while we are creating counters, we re-read the number of
online CPUs when we fail to create a counter on some CPU.

Reported-by: Michael Neuling <mikey@xxxxxxxxxxx>
Tested-by: Michael Neuling <mikey@xxxxxxxxxxx>
Cc: stable@xxxxxxxxxx
Signed-off-by: Paul Mackerras <paulus@xxxxxxxxx>
---
tools/perf/builtin-record.c | 36 ++++++++++++++++++++++++++++--------
tools/perf/builtin-stat.c | 15 +++++++++++++--
tools/perf/builtin-top.c | 27 +++++++++++++++++++--------
3 files changed, 60 insertions(+), 18 deletions(-)

diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c
index 0e519c6..9a47b86 100644
--- a/tools/perf/builtin-record.c
+++ b/tools/perf/builtin-record.c
@@ -230,7 +230,7 @@ static struct perf_header_attr *get_header_attr(struct perf_event_attr *a, int n
return h_attr;
}

-static void create_counter(int counter, int cpu, pid_t pid)
+static int create_counter(int counter, int cpu, pid_t pid)
{
char *filter = filters[counter];
struct perf_event_attr *attr = attrs + counter;
@@ -287,8 +287,11 @@ try_again:

if (err == EPERM || err == EACCES)
die("Permission error - are you root?\n");
- else if (err == ENODEV && profile_cpu != -1)
- die("No such device - did you specify an out-of-range profile CPU?\n");
+ else if (err == ENODEV && system_wide) {
+ if (profile_cpu != -1)
+ die("No such device - did you specify an out-of-range profile CPU?\n");
+ return -1;
+ }

/*
* If it's cycles then fall back to hrtimer
@@ -380,17 +383,28 @@ try_again:
}

ioctl(fd[nr_cpu][counter], PERF_EVENT_IOC_ENABLE);
+ return 0;
}

-static void open_counters(int cpu, pid_t pid)
+static int open_counters(int cpu, pid_t pid)
{
int counter;

group_fd = -1;
- for (counter = 0; counter < nr_counters; counter++)
- create_counter(counter, cpu, pid);
+ for (counter = 0; counter < nr_counters; counter++) {
+ if (create_counter(counter, cpu, pid)) {
+ /*
+ * If cpu #cpu doesn't exist but we have already
+ * created some events for it, close them.
+ */
+ while (--counter >= 0)
+ close(fd[nr_cpu][counter]);
+ return -1;
+ }
+ }

nr_cpu++;
+ return 0;
}

static void atexit_header(void)
@@ -475,8 +489,14 @@ static int __cmd_record(int argc, const char **argv)
if (profile_cpu != -1) {
open_counters(profile_cpu, target_pid);
} else {
- for (i = 0; i < nr_cpus; i++)
- open_counters(i, target_pid);
+ for (i = 0; nr_cpu < nr_cpus; i++) {
+ /*
+ * If cpu i doesn't exist, re-check
+ * # online cpus in case it has changed.
+ */
+ if (open_counters(i, target_pid))
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ }
}
}

diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c
index c70d720..5a1a7c0 100644
--- a/tools/perf/builtin-stat.c
+++ b/tools/perf/builtin-stat.c
@@ -145,13 +145,24 @@ static void create_perf_stat_counter(int counter, int pid)
PERF_FORMAT_TOTAL_TIME_RUNNING;

if (system_wide) {
- unsigned int cpu;
+ unsigned int cpu, cpu_id;

- for (cpu = 0; cpu < nr_cpus; cpu++) {
+ cpu_id = 0;
+ for (cpu = 0; cpu < nr_cpus; cpu_id++) {
fd[cpu][counter] = sys_perf_event_open(attr, -1, cpu, -1, 0);
+ if (fd[cpu][counter] < 0 && errno == ENODEV) {
+ /*
+ * This CPU number doesn't exist, re-read
+ * nr_cpus (in case it's changed)
+ * and try the next number.
+ */
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ continue;
+ }
if (fd[cpu][counter] < 0 && verbose)
fprintf(stderr, ERR_PERF_OPEN, counter,
fd[cpu][counter], strerror(errno));
+ ++cpu;
}
} else {
attr->inherit = inherit;
diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c
index e0a374d..30fa53d 100644
--- a/tools/perf/builtin-top.c
+++ b/tools/perf/builtin-top.c
@@ -1077,14 +1077,9 @@ static void mmap_read(void)
int nr_poll;
int group_fd;

-static void start_counter(int i, int counter)
+static int start_counter(int i, int cpu, int counter)
{
struct perf_event_attr *attr;
- int cpu;
-
- cpu = profile_cpu;
- if (target_pid == -1 && profile_cpu == -1)
- cpu = i;

attr = attrs + counter;

@@ -1107,6 +1102,8 @@ try_again:

if (err == EPERM || err == EACCES)
die("No permission - are you root?\n");
+ if (err == ENODEV && cpu != profile_cpu)
+ return -1;
/*
* If it's cycles then fall back to hrtimer
* based cpu-clock-tick sw counter, which
@@ -1148,12 +1145,14 @@ try_again:
PROT_READ, MAP_SHARED, fd[i][counter], 0);
if (mmap_array[i][counter].base == MAP_FAILED)
die("failed to mmap with %d (%s)\n", errno, strerror(errno));
+ return 0;
}

static int __cmd_top(void)
{
pthread_t thread;
int i, counter;
+ int cpu;
int ret;

if (target_pid != -1)
@@ -1161,10 +1160,22 @@ static int __cmd_top(void)
else
event__synthesize_threads(event__process);

- for (i = 0; i < nr_cpus; i++) {
+ if (target_pid == -1 && profile_cpu == -1) {
+ cpu = 0;
+ for (i = 0; i < nr_cpus; ++i, ++cpu) {
+ group_fd = -1;
+ for (counter = 0; counter < nr_counters; counter++)
+ if (start_counter(i, i, counter)) {
+ --i;
+ nr_cpus = sysconf(_SC_NPROCESSORS_ONLN);
+ break;
+ }
+ }
+
+ } else {
group_fd = -1;
for (counter = 0; counter < nr_counters; counter++)
- start_counter(i, counter);
+ start_counter(0, profile_cpu, counter);
}

/* Wait for a minimal set of events before starting the snapshot */
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/