[PATCH 1/1] pahole/Rust: Check that we're adding DW_TAG_member sorted by byte offset

From: Arnaldo Carvalho de Melo
Date: Fri Feb 10 2023 - 15:48:46 EST


Hi Miguel, after a long winter, I'm trying to get Rust properly
supported on pahole, please check that this specific use case is working
for you as well.

I'll go thru the others to see if they are easy (or at least restricted
to Rust CUs) as this one.

Thanks,

- Arnaldo

---

Rust may reorder struct fields and pahole assumes them to be in order,
as is the case for languages like C and C++, etc. So after having the
class member bit and byte offsets sorted out, sort Rust CU types by
offset.

Using: https://github.com/Rust-for-Linux/pahole-rust-cases/blob/main/inverted.o

Before:

$ pahole --show_private_classes ../pahole-rust-cases/inverted.o
struct S {

/* XXX 4 bytes hole, try to pack */

bool a __attribute__((__aligned__(1))); /* 4 1 */

/* XXX 65531 bytes hole, try to pack */
/* Bitfield combined with previous fields */

u32 b __attribute__((__aligned__(4))); /* 0 4 */

/* size: 8, cachelines: 1, members: 2 */
/* sum members: 5, holes: 2, sum holes: 65535 */
/* padding: 4 */
/* forced alignments: 2, forced holes: 2, sum forced holes: 65535 */
/* last cacheline: 8 bytes */

/* BRAIN FART ALERT! 8 bytes != 5 (member bytes) + 0 (member bits) + 65535 (byte holes) + 0 (bit holes), diff = -524288 bits */
} __attribute__((__aligned__(4)));
$

After:

$ readelf -wi ../pahole-rust-cases/inverted.o | grep DW_TAG_compile_unit -A9
<0><b>: Abbrev Number: 1 (DW_TAG_compile_unit)
<c> DW_AT_producer : (indirect string, offset: 0x0): clang LLVM (rustc version 1.60.0 (7737e0b5c 2022-04-04))
<10> DW_AT_language : 28 (Rust)
<12> DW_AT_name : (indirect string, offset: 0x39): inverted.rs/@/inverted.c4dda47b-cgu.0
<16> DW_AT_stmt_list : 0x0
<1a> DW_AT_comp_dir : (indirect string, offset: 0x5f): /root/pahole-rust
<1e> DW_AT_GNU_pubnames: 1
<1e> DW_AT_low_pc : 0x0
<26> DW_AT_high_pc : 0x62
<1><2a>: Abbrev Number: 2 (DW_TAG_namespace)
$ pahole --show_private_classes ../pahole-rust-cases/inverted.o
struct S {
u32 b __attribute__((__aligned__(4))); /* 0 4 */
bool a __attribute__((__aligned__(1))); /* 4 1 */

/* size: 8, cachelines: 1, members: 2 */
/* padding: 3 */
/* forced alignments: 2 */
/* last cacheline: 8 bytes */
} __attribute__((__aligned__(4)));
$

$ cp ../pahole-rust-cases/inverted.o .
$ pahole --btf_encode inverted.o
$ readelf -SW inverted.o | grep -i BTF
[26] .BTF PROGBITS 0000000000000000 000922 00006c 00 0 0 1
$
$ pahole -F btf inverted.o
struct S {
u32 b; /* 0 4 */
bool a; /* 4 1 */

/* size: 8, cachelines: 1, members: 2 */
/* padding: 3 */
/* last cacheline: 8 bytes */
};
$

Reported-by: Miguel Ojeda <miguel.ojeda.sandonis@xxxxxxxxx>
Cc: Alexei Starovoitov <ast@xxxxxxxxxx>
Cc: Andrii Nakryiko <andrii@xxxxxxxxxx>
Cc: Eric Curtin <ecurtin@xxxxxxxxxx>
Cc: Jiri Olsa <jolsa@xxxxxxxxxx>
Cc: Martin Rodriguez Reboredo <yakoyoku@xxxxxxxxx>
Cc: Neal Gompa <neal@xxxxxxxxx>
Cc: Yonghong Song <yhs@xxxxxx>
Cc: Daniel Borkmann <daniel@xxxxxxxxxxxxx>
Signed-off-by: Arnaldo Carvalho de Melo <acme@xxxxxxxxxx>
---
dwarf_loader.c | 42 ++++++++++++++++++++++++++++++++++++++++++
1 file changed, 42 insertions(+)

diff --git a/dwarf_loader.c b/dwarf_loader.c
index 253c5efaf3b55a93..a77598dc3affca88 100644
--- a/dwarf_loader.c
+++ b/dwarf_loader.c
@@ -2835,9 +2835,51 @@ static int class_member__cache_byte_size(struct tag *tag, struct cu *cu,
return 0;
}

+static bool cu__language_reorders_offsets(const struct cu *cu)
+{
+ return cu->language == DW_LANG_Rust;
+}
+
+static int type__sort_by_offset(struct tag *tag, struct cu *cu, void *cookie __maybe_unused)
+{
+ if (!tag__is_type(tag))
+ return 0;
+
+ struct type *type = tag__type(tag);
+ struct class_member *current_member;
+
+ // There may be more than DW_TAG_members entries in the type tags, so do a simple
+ // bubble sort for now, so that the other non tags stay where they are.
+restart:
+ type__for_each_data_member(type, current_member) {
+ if (list_is_last(&current_member->tag.node, &type->namespace.tags))
+ break;
+
+ struct class_member *next_member = list_entry(current_member->tag.node.next, typeof(*current_member), tag.node);
+
+ if (current_member->byte_offset < next_member->byte_offset)
+ continue;
+
+ list_del(&current_member->tag.node);
+ list_add(&current_member->tag.node, &next_member->tag.node);
+ goto restart;
+ }
+
+ return 0;
+}
+
+static void cu__sort_types_by_offset(struct cu *cu, struct conf_load *conf)
+{
+ cu__for_all_tags(cu, type__sort_by_offset, conf);
+}
+
static int cu__finalize(struct cu *cu, struct conf_load *conf, void *thr_data)
{
cu__for_all_tags(cu, class_member__cache_byte_size, conf);
+
+ if (cu__language_reorders_offsets(cu))
+ cu__sort_types_by_offset(cu, conf);
+
if (conf && conf->steal) {
return conf->steal(cu, conf, thr_data);
}
--
2.39.1