[PATCH] perf: align raw sample data on 64-bit boundaries

From: Robert Richter
Date: Fri May 07 2010 - 09:32:45 EST


In the current implementation 64 bit raw sample data values are not
aligned due to the 32 bit size header. The size header is located
before the data buffer on a 64 bit boundary. This leads to unaligned
memory access to the data buffer for arrays or structures containing
64 bit values. To avoid this, the patch adds a 32 bit reserved value
to the raw sample data header. The data buffer starts then at a 64 bit
boundary.

This changes the ABI and requires changes in the userland tools. For
tools/perf this is at a single location in event.c only. This could
also introduce some overhead on smaller architectures, but currently
this is only used on x86 or for transferring raw tracepoint
data. Though an ABI change should be avoided, it is worth to align raw
sample data on 64-bit boundaries as the change fixes unaligned memory
access, eases the implementation for raw sample data and reduces the
risk of data corruption due to different pad structures inserted by
the compiler.

Signed-off-by: Robert Richter <robert.richter@xxxxxxx>
---
include/linux/perf_event.h | 1 +
kernel/perf_event.c | 21 ++++++++++-----------
kernel/trace/trace_kprobe.c | 6 ++----
kernel/trace/trace_syscalls.c | 6 ++----
tools/perf/util/event.c | 4 ++--
5 files changed, 17 insertions(+), 21 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 3fd5c82..016969c 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -431,6 +431,7 @@ enum perf_event_type {
* #
*
* { u32 size;
+ * u32 reserved;
* char data[size];}&& PERF_SAMPLE_RAW
* };
*/
diff --git a/kernel/perf_event.c b/kernel/perf_event.c
index a4fa381..12bb53d 100644
--- a/kernel/perf_event.c
+++ b/kernel/perf_event.c
@@ -3252,18 +3252,19 @@ void perf_output_sample(struct perf_output_handle *handle,
}

if (sample_type & PERF_SAMPLE_RAW) {
+ struct {
+ u32 size;
+ u32 reserved;
+ } raw = {
+ .size = 0,
+ .reserved = 0,
+ };
if (data->raw) {
- perf_output_put(handle, data->raw->size);
+ raw.size = data->raw->size;
+ perf_output_put(handle, raw);
perf_output_copy(handle, data->raw->data,
data->raw->size);
} else {
- struct {
- u32 size;
- u32 data;
- } raw = {
- .size = sizeof(u32),
- .data = 0,
- };
perf_output_put(handle, raw);
}
}
@@ -3344,12 +3345,10 @@ void perf_prepare_sample(struct perf_event_header *header,
}

if (sample_type & PERF_SAMPLE_RAW) {
- int size = sizeof(u32);
+ int size = sizeof(u64);

if (data->raw)
size += data->raw->size;
- else
- size += sizeof(u32);

WARN_ON_ONCE(size & (sizeof(u64)-1));
header->size += size;
diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c
index a751432..00172b0 100644
--- a/kernel/trace/trace_kprobe.c
+++ b/kernel/trace/trace_kprobe.c
@@ -1347,8 +1347,7 @@ static __kprobes void kprobe_perf_func(struct kprobe *kp,
int rctx;

__size = sizeof(*entry) + tp->size;
- size = ALIGN(__size + sizeof(u32), sizeof(u64));
- size -= sizeof(u32);
+ size = ALIGN(__size, sizeof(u64));
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"profile buffer not large enough"))
return;
@@ -1378,8 +1377,7 @@ static __kprobes void kretprobe_perf_func(struct kretprobe_instance *ri,
int rctx;

__size = sizeof(*entry) + tp->size;
- size = ALIGN(__size + sizeof(u32), sizeof(u64));
- size -= sizeof(u32);
+ size = ALIGN(__size, sizeof(u64));
if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"profile buffer not large enough"))
return;
diff --git a/kernel/trace/trace_syscalls.c b/kernel/trace/trace_syscalls.c
index 4d6d711..ad7e713 100644
--- a/kernel/trace/trace_syscalls.c
+++ b/kernel/trace/trace_syscalls.c
@@ -453,8 +453,7 @@ static void perf_syscall_enter(struct pt_regs *regs, long id)

/* get the size after alignment with the u32 buffer size field */
size = sizeof(unsigned long) * sys_data->nb_args + sizeof(*rec);
- size = ALIGN(size + sizeof(u32), sizeof(u64));
- size -= sizeof(u32);
+ size = ALIGN(size, sizeof(u64));

if (WARN_ONCE(size > PERF_MAX_TRACE_SIZE,
"perf buffer not large enough"))
@@ -524,8 +523,7 @@ static void perf_syscall_exit(struct pt_regs *regs, long ret)
return;

/* We can probably do that at build time */
- size = ALIGN(sizeof(*rec) + sizeof(u32), sizeof(u64));
- size -= sizeof(u32);
+ size = ALIGN(sizeof(*rec), sizeof(u64));

/*
* Impossible, but be paranoid with the future
diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c
index 50771b5..6902b85 100644
--- a/tools/perf/util/event.c
+++ b/tools/perf/util/event.c
@@ -790,8 +790,8 @@ int event__parse_sample(event_t *event, u64 type, struct sample_data *data)
if (type & PERF_SAMPLE_RAW) {
u32 *p = (u32 *)array;
data->raw_size = *p;
- p++;
- data->raw_data = p;
+ array++;
+ data->raw_data = array;
}

return 0;
--
1.7.1

--
Advanced Micro Devices, Inc.
Operating System Research Center
email: robert.richter@xxxxxxx

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/