Re: [PATCH v2 01/13] perf dso: Reorder variables to save space in struct dso

From: Arnaldo Carvalho de Melo
Date: Thu Mar 21 2024 - 16:28:23 EST


On Thu, Mar 21, 2024 at 09:02:48AM -0700, Ian Rogers wrote:
> Save 40 bytes and move from 8 to 7 cache lines. Make variable dwfl
> dependent on being a powerpc build. Squeeze bits of int/enum types
> when appropriate. Remove holes/padding by reordering variables.

Thanks, applied.

- Arnaldo

> Before:
> ```
> struct dso {
> struct mutex lock; /* 0 40 */
> struct list_head node; /* 40 16 */
> struct rb_node rb_node __attribute__((__aligned__(8))); /* 56 24 */
> /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
> struct rb_root * root; /* 80 8 */
> struct rb_root_cached symbols; /* 88 16 */
> struct symbol * * symbol_names; /* 104 8 */
> size_t symbol_names_len; /* 112 8 */
> struct rb_root_cached inlined_nodes; /* 120 16 */
> /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
> struct rb_root_cached srclines; /* 136 16 */
> struct {
> u64 addr; /* 152 8 */
> struct symbol * symbol; /* 160 8 */
> } last_find_result; /* 152 16 */
> void * a2l; /* 168 8 */
> char * symsrc_filename; /* 176 8 */
> unsigned int a2l_fails; /* 184 4 */
> enum dso_space_type kernel; /* 188 4 */
> /* --- cacheline 3 boundary (192 bytes) --- */
> _Bool is_kmod; /* 192 1 */
>
> /* XXX 3 bytes hole, try to pack */
>
> enum dso_swap_type needs_swap; /* 196 4 */
> enum dso_binary_type symtab_type; /* 200 4 */
> enum dso_binary_type binary_type; /* 204 4 */
> enum dso_load_errno load_errno; /* 208 4 */
> u8 adjust_symbols:1; /* 212: 0 1 */
> u8 has_build_id:1; /* 212: 1 1 */
> u8 header_build_id:1; /* 212: 2 1 */
> u8 has_srcline:1; /* 212: 3 1 */
> u8 hit:1; /* 212: 4 1 */
> u8 annotate_warned:1; /* 212: 5 1 */
> u8 auxtrace_warned:1; /* 212: 6 1 */
> u8 short_name_allocated:1; /* 212: 7 1 */
> u8 long_name_allocated:1; /* 213: 0 1 */
> u8 is_64_bit:1; /* 213: 1 1 */
>
> /* XXX 6 bits hole, try to pack */
>
> _Bool sorted_by_name; /* 214 1 */
> _Bool loaded; /* 215 1 */
> u8 rel; /* 216 1 */
>
> /* XXX 7 bytes hole, try to pack */
>
> struct build_id bid; /* 224 32 */
> /* --- cacheline 4 boundary (256 bytes) --- */
> u64 text_offset; /* 256 8 */
> u64 text_end; /* 264 8 */
> const char * short_name; /* 272 8 */
> const char * long_name; /* 280 8 */
> u16 long_name_len; /* 288 2 */
> u16 short_name_len; /* 290 2 */
>
> /* XXX 4 bytes hole, try to pack */
>
> void * dwfl; /* 296 8 */
> struct auxtrace_cache * auxtrace_cache; /* 304 8 */
> int comp; /* 312 4 */
>
> /* XXX 4 bytes hole, try to pack */
>
> /* --- cacheline 5 boundary (320 bytes) --- */
> struct {
> struct rb_root cache; /* 320 8 */
> int fd; /* 328 4 */
> int status; /* 332 4 */
> u32 status_seen; /* 336 4 */
>
> /* XXX 4 bytes hole, try to pack */
>
> u64 file_size; /* 344 8 */
> struct list_head open_entry; /* 352 16 */
> u64 elf_base_addr; /* 368 8 */
> u64 debug_frame_offset; /* 376 8 */
> /* --- cacheline 6 boundary (384 bytes) --- */
> u64 eh_frame_hdr_addr; /* 384 8 */
> u64 eh_frame_hdr_offset; /* 392 8 */
> } data; /* 320 80 */
> struct {
> u32 id; /* 400 4 */
> u32 sub_id; /* 404 4 */
> struct perf_env * env; /* 408 8 */
> } bpf_prog; /* 400 16 */
> union {
> void * priv; /* 416 8 */
> u64 db_id; /* 416 8 */
> }; /* 416 8 */
> struct nsinfo * nsinfo; /* 424 8 */
> struct dso_id id; /* 432 24 */
> /* --- cacheline 7 boundary (448 bytes) was 8 bytes ago --- */
> refcount_t refcnt; /* 456 4 */
> char name[]; /* 460 0 */
>
> /* size: 464, cachelines: 8, members: 49 */
> /* sum members: 440, holes: 4, sum holes: 18 */
> /* sum bitfield members: 10 bits, bit holes: 1, sum bit holes: 6 bits */
> /* padding: 4 */
> /* forced alignments: 1 */
> /* last cacheline: 16 bytes */
> } __attribute__((__aligned__(8)));
> ```
>
> After:
> ```
> struct dso {
> struct mutex lock; /* 0 40 */
> struct list_head node; /* 40 16 */
> struct rb_node rb_node __attribute__((__aligned__(8))); /* 56 24 */
> /* --- cacheline 1 boundary (64 bytes) was 16 bytes ago --- */
> struct rb_root * root; /* 80 8 */
> struct rb_root_cached symbols; /* 88 16 */
> struct symbol * * symbol_names; /* 104 8 */
> size_t symbol_names_len; /* 112 8 */
> struct rb_root_cached inlined_nodes; /* 120 16 */
> /* --- cacheline 2 boundary (128 bytes) was 8 bytes ago --- */
> struct rb_root_cached srclines; /* 136 16 */
> struct {
> u64 addr; /* 152 8 */
> struct symbol * symbol; /* 160 8 */
> } last_find_result; /* 152 16 */
> struct build_id bid; /* 168 32 */
> /* --- cacheline 3 boundary (192 bytes) was 8 bytes ago --- */
> u64 text_offset; /* 200 8 */
> u64 text_end; /* 208 8 */
> const char * short_name; /* 216 8 */
> const char * long_name; /* 224 8 */
> void * a2l; /* 232 8 */
> char * symsrc_filename; /* 240 8 */
> struct nsinfo * nsinfo; /* 248 8 */
> /* --- cacheline 4 boundary (256 bytes) --- */
> struct auxtrace_cache * auxtrace_cache; /* 256 8 */
> union {
> void * priv; /* 264 8 */
> u64 db_id; /* 264 8 */
> }; /* 264 8 */
> struct {
> struct perf_env * env; /* 272 8 */
> u32 id; /* 280 4 */
> u32 sub_id; /* 284 4 */
> } bpf_prog; /* 272 16 */
> struct {
> struct rb_root cache; /* 288 8 */
> struct list_head open_entry; /* 296 16 */
> u64 file_size; /* 312 8 */
> /* --- cacheline 5 boundary (320 bytes) --- */
> u64 elf_base_addr; /* 320 8 */
> u64 debug_frame_offset; /* 328 8 */
> u64 eh_frame_hdr_addr; /* 336 8 */
> u64 eh_frame_hdr_offset; /* 344 8 */
> int fd; /* 352 4 */
> int status; /* 356 4 */
> u32 status_seen; /* 360 4 */
> } data; /* 288 80 */
>
> /* XXX last struct has 4 bytes of padding */
>
> struct dso_id id; /* 368 24 */
> /* --- cacheline 6 boundary (384 bytes) was 8 bytes ago --- */
> unsigned int a2l_fails; /* 392 4 */
> int comp; /* 396 4 */
> refcount_t refcnt; /* 400 4 */
> enum dso_load_errno load_errno; /* 404 4 */
> u16 long_name_len; /* 408 2 */
> u16 short_name_len; /* 410 2 */
> enum dso_binary_type symtab_type:8; /* 412: 0 4 */
> enum dso_binary_type binary_type:8; /* 412: 8 4 */
> enum dso_space_type kernel:2; /* 412:16 4 */
> enum dso_swap_type needs_swap:2; /* 412:18 4 */
>
> /* Bitfield combined with next fields */
>
> _Bool is_kmod:1; /* 414: 4 1 */
> u8 adjust_symbols:1; /* 414: 5 1 */
> u8 has_build_id:1; /* 414: 6 1 */
> u8 header_build_id:1; /* 414: 7 1 */
> u8 has_srcline:1; /* 415: 0 1 */
> u8 hit:1; /* 415: 1 1 */
> u8 annotate_warned:1; /* 415: 2 1 */
> u8 auxtrace_warned:1; /* 415: 3 1 */
> u8 short_name_allocated:1; /* 415: 4 1 */
> u8 long_name_allocated:1; /* 415: 5 1 */
> u8 is_64_bit:1; /* 415: 6 1 */
>
> /* XXX 1 bit hole, try to pack */
>
> _Bool sorted_by_name; /* 416 1 */
> _Bool loaded; /* 417 1 */
> u8 rel; /* 418 1 */
> char name[]; /* 419 0 */
>
> /* size: 424, cachelines: 7, members: 48 */
> /* sum members: 415 */
> /* sum bitfield members: 31 bits, bit holes: 1, sum bit holes: 1 bits */
> /* padding: 5 */
> /* paddings: 1, sum paddings: 4 */
> /* forced alignments: 1 */
> /* last cacheline: 40 bytes */
> } __attribute__((__aligned__(8)));
> ```
>
> Signed-off-by: Ian Rogers <irogers@xxxxxxxxxx>
> ---
> tools/perf/util/dso.h | 84 +++++++++++++++++++++----------------------
> 1 file changed, 42 insertions(+), 42 deletions(-)
>
> diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h
> index 2cdcd1e2ef8b..17dab230a2ca 100644
> --- a/tools/perf/util/dso.h
> +++ b/tools/perf/util/dso.h
> @@ -161,66 +161,66 @@ struct dso {
> u64 addr;
> struct symbol *symbol;
> } last_find_result;
> - void *a2l;
> - char *symsrc_filename;
> - unsigned int a2l_fails;
> - enum dso_space_type kernel;
> - bool is_kmod;
> - enum dso_swap_type needs_swap;
> - enum dso_binary_type symtab_type;
> - enum dso_binary_type binary_type;
> - enum dso_load_errno load_errno;
> - u8 adjust_symbols:1;
> - u8 has_build_id:1;
> - u8 header_build_id:1;
> - u8 has_srcline:1;
> - u8 hit:1;
> - u8 annotate_warned:1;
> - u8 auxtrace_warned:1;
> - u8 short_name_allocated:1;
> - u8 long_name_allocated:1;
> - u8 is_64_bit:1;
> - bool sorted_by_name;
> - bool loaded;
> - u8 rel;
> struct build_id bid;
> u64 text_offset;
> u64 text_end;
> const char *short_name;
> const char *long_name;
> - u16 long_name_len;
> - u16 short_name_len;
> + void *a2l;
> + char *symsrc_filename;
> +#if defined(__powerpc__)
> void *dwfl; /* DWARF debug info */
> +#endif
> + struct nsinfo *nsinfo;
> struct auxtrace_cache *auxtrace_cache;
> - int comp;
> -
> + union { /* Tool specific area */
> + void *priv;
> + u64 db_id;
> + };
> + /* bpf prog information */
> + struct {
> + struct perf_env *env;
> + u32 id;
> + u32 sub_id;
> + } bpf_prog;
> /* dso data file */
> struct {
> struct rb_root cache;
> - int fd;
> - int status;
> - u32 status_seen;
> - u64 file_size;
> struct list_head open_entry;
> + u64 file_size;
> u64 elf_base_addr;
> u64 debug_frame_offset;
> u64 eh_frame_hdr_addr;
> u64 eh_frame_hdr_offset;
> + int fd;
> + int status;
> + u32 status_seen;
> } data;
> - /* bpf prog information */
> - struct {
> - u32 id;
> - u32 sub_id;
> - struct perf_env *env;
> - } bpf_prog;
> -
> - union { /* Tool specific area */
> - void *priv;
> - u64 db_id;
> - };
> - struct nsinfo *nsinfo;
> struct dso_id id;
> + unsigned int a2l_fails;
> + int comp;
> refcount_t refcnt;
> + enum dso_load_errno load_errno;
> + u16 long_name_len;
> + u16 short_name_len;
> + enum dso_binary_type symtab_type:8;
> + enum dso_binary_type binary_type:8;
> + enum dso_space_type kernel:2;
> + enum dso_swap_type needs_swap:2;
> + bool is_kmod:1;
> + u8 adjust_symbols:1;
> + u8 has_build_id:1;
> + u8 header_build_id:1;
> + u8 has_srcline:1;
> + u8 hit:1;
> + u8 annotate_warned:1;
> + u8 auxtrace_warned:1;
> + u8 short_name_allocated:1;
> + u8 long_name_allocated:1;
> + u8 is_64_bit:1;
> + bool sorted_by_name;
> + bool loaded;
> + u8 rel;
> char name[];
> };
>
> --
> 2.44.0.396.g6e790dbe36-goog
>