Some functions are not inlined by gcc 3.2, resulting code is ugly

From: Denis Vlasenko (vda@port.imtp.ilyichevsk.odessa.ua)
Date: Sun Nov 03 2002 - 11:17:02 EST


It seems gcc started to de-inline large functions.
We got bitten:

# sort -t ' ' +2 <System.map
...
c01876f0 t __cleanup_transaction
c0150f90 t __clear_page_buffers
c01d9fa0 T __clear_user
c011b020 T __cond_resched
c01d9ea0 T __const_udelay
c0107150 t __constant_c_and_count_memset
c0107fd0 t __constant_c_and_count_memset

[~seven screenfuls (!) snipped]

c010c7f0 t __constant_c_and_count_memset
c010e210 t __constant_c_and_count_memset
c03609e0 t __constant_c_and_count_memset
c03634f0 t __constant_c_and_count_memset
c0107ec0 t __constant_memcpy
c010fc10 t __constant_memcpy

[more snipped]

c0111410 t __constant_memcpy
c0111e10 t __constant_memcpy
c0114b50 t __constant_memcpy
c0353e50 t __constant_memcpy
c03573a0 t __constant_memcpy
c035bfc0 t __constant_memcpy
c03633e0 t __constant_memcpy
c01da040 T __copy_from_user
c011dd30 t __copy_fs_struct
c01da000 T __copy_to_user

This happens in 2.4.19 too.

Let's take exit.c compilation as an example:

gcc -Wp,-MD,kernel/.exit.o.d -D__KERNEL__ -Iinclude -Wall
-Wstrict-prototypes -Wno-trigraphs -O2 -fomit-frame-pointer
-fno-strict-aliasing -fno-common -pipe -mpreferred-stack-boundary=2
-march=i486 -Iarch/i386/mach-generic -nostdinc -iwithprefix include
-DKBUILD_BASENAME=exit -c -o kernel/exit.o kernel/exit.c

[2.5] objdump -D exit.o, abridged
=================================
exit.o: file format elf32-i386

Disassembly of section .text:

...........
000004c0 <reparent_to_init>:
...........
     5ab: 68 84 03 00 00 push $0x384
     5b0: 50 push %eax
     5b1: e8 0a 12 00 00 call 17c0 <__constant_memcpy>

Eek.... __constant_memcpy??! It carries this in string.h:
/*
 * This looks horribly ugly, but the compiler can optimize it totally,
 * as the count is constant.
 */
static inline void * __constant_memcpy(void * to, const void * from, size_t n)

and when it _does_ gets compiled it _does_ look horribly ugly:

000017c0 <__constant_memcpy>:
    17c0: 57 push %edi
    17c1: 56 push %esi
    17c2: 51 push %ecx
    17c3: 8b 4c 24 18 mov 0x18(%esp,1),%ecx
    17c7: 83 f9 14 cmp $0x14,%ecx
    17ca: 8b 54 24 10 mov 0x10(%esp,1),%edx
    17ce: 8b 74 24 14 mov 0x14(%esp,1),%esi
    17d2: 77 7c ja 1850 <__constant_memcpy+0x90>
    17d4: ff 24 8d 28 01 00 00 jmp *0x128(,%ecx,4)
    17db: 8a 06 mov (%esi),%al
    17dd: 88 02 mov %al,(%edx)
    17df: 90 nop
    17e0: 89 d0 mov %edx,%eax
    17e2: 5a pop %edx
    17e3: 5e pop %esi
    17e4: 5f pop %edi
    17e5: c3 ret
    17e6: 66 8b 06 mov (%esi),%ax
    17e9: 66 89 02 mov %ax,(%edx)
    17ec: eb f2 jmp 17e0 <__constant_memcpy+0x20>
    17ee: 66 8b 06 mov (%esi),%ax
    17f1: 66 89 02 mov %ax,(%edx)
    17f4: 8a 46 02 mov 0x2(%esi),%al
    17f7: 88 42 02 mov %al,0x2(%edx)
    17fa: eb e4 jmp 17e0 <__constant_memcpy+0x20>
    17fc: 8b 06 mov (%esi),%eax
    17fe: 89 02 mov %eax,(%edx)
    1800: eb de jmp 17e0 <__constant_memcpy+0x20>
    1802: 8b 06 mov (%esi),%eax
    1804: 89 02 mov %eax,(%edx)
    1806: 66 8b 46 04 mov 0x4(%esi),%ax
    180a: 66 89 42 04 mov %ax,0x4(%edx)
    180e: eb d0 jmp 17e0 <__constant_memcpy+0x20>
    1810: 8b 06 mov (%esi),%eax
    1812: 89 02 mov %eax,(%edx)
    1814: 8b 46 04 mov 0x4(%esi),%eax
    1817: 89 42 04 mov %eax,0x4(%edx)
    181a: eb c4 jmp 17e0 <__constant_memcpy+0x20>
    181c: 8b 06 mov (%esi),%eax
    181e: 89 02 mov %eax,(%edx)
    1820: 8b 46 04 mov 0x4(%esi),%eax
    1823: 89 42 04 mov %eax,0x4(%edx)
    1826: 8b 46 08 mov 0x8(%esi),%eax
    1829: 89 42 08 mov %eax,0x8(%edx)
    182c: eb b2 jmp 17e0 <__constant_memcpy+0x20>
    182e: 8b 06 mov (%esi),%eax
    1830: 89 02 mov %eax,(%edx)
    1832: 8b 46 04 mov 0x4(%esi),%eax
    1835: 89 42 04 mov %eax,0x4(%edx)
    1838: 8b 46 08 mov 0x8(%esi),%eax
    183b: 89 42 08 mov %eax,0x8(%edx)
    183e: 8b 46 0c mov 0xc(%esi),%eax
    1841: 89 42 0c mov %eax,0xc(%edx)
    1844: eb 9a jmp 17e0 <__constant_memcpy+0x20>
    1846: 8d 76 00 lea 0x0(%esi),%esi
    1849: 8d bc 27 00 00 00 00 lea 0x0(%edi,1),%edi
    1850: 89 c8 mov %ecx,%eax
    1852: 83 e0 03 and $0x3,%eax
    1855: 83 f8 01 cmp $0x1,%eax
    1858: 74 46 je 18a0 <__constant_memcpy+0xe0>
    185a: 83 f8 01 cmp $0x1,%eax
    185d: 72 31 jb 1890 <__constant_memcpy+0xd0>
    185f: 83 f8 02 cmp $0x2,%eax
    1862: 74 0f je 1873 <__constant_memcpy+0xb3>
    1864: c1 e9 02 shr $0x2,%ecx
    1867: 89 d7 mov %edx,%edi
    1869: f3 a5 repz movsl %ds:(%esi),%es:(%edi)
    186b: 66 a5 movsw %ds:(%esi),%es:(%edi)
    186d: a4 movsb %ds:(%esi),%es:(%edi)
    186e: e9 6d ff ff ff jmp 17e0 <__constant_memcpy+0x20>
    1873: c1 e9 02 shr $0x2,%ecx
    1876: 89 d7 mov %edx,%edi
    1878: f3 a5 repz movsl %ds:(%esi),%es:(%edi)
    187a: 66 a5 movsw %ds:(%esi),%es:(%edi)
    187c: e9 5f ff ff ff jmp 17e0 <__constant_memcpy+0x20>
    1881: eb 0d jmp 1890 <__constant_memcpy+0xd0>
    1883: 90 nop
    1884: 90 nop
    1885: 90 nop
    1886: 90 nop
    1887: 90 nop
    1888: 90 nop
    
BTW, this string of nops is deeply unfunny...
looks similar to inter-function 16 byte alignment,
but it's _inside_ of a function.
GCC aligned branch target?? :(((

    1889: 90 nop
    188a: 90 nop
    188b: 90 nop
    188c: 90 nop
    188d: 90 nop
    188e: 90 nop
    188f: 90 nop
    1890: c1 e9 02 shr $0x2,%ecx
    1893: 89 d7 mov %edx,%edi
    1895: f3 a5 repz movsl %ds:(%esi),%es:(%edi)
    1897: e9 44 ff ff ff jmp 17e0 <__constant_memcpy+0x20>
    189c: 8d 74 26 00 lea 0x0(%esi,1),%esi
    18a0: c1 e9 02 shr $0x2,%ecx
    18a3: 89 d7 mov %edx,%edi
    18a5: f3 a5 repz movsl %ds:(%esi),%es:(%edi)
    18a7: a4 movsb %ds:(%esi),%es:(%edi)
    18a8: e9 33 ff ff ff jmp 17e0 <__constant_memcpy+0x20>
    18ad: 8b 06 mov (%esi),%eax
    18af: 89 02 mov %eax,(%edx)
    18b1: 8b 46 04 mov 0x4(%esi),%eax
    18b4: 89 42 04 mov %eax,0x4(%edx)
    18b7: 8b 46 08 mov 0x8(%esi),%eax
    18ba: 89 42 08 mov %eax,0x8(%edx)
    18bd: 8b 46 0c mov 0xc(%esi),%eax
    18c0: 89 42 0c mov %eax,0xc(%edx)
    18c3: 8b 46 10 mov 0x10(%esi),%eax
    18c6: 89 42 10 mov %eax,0x10(%edx)
    18c9: e9 12 ff ff ff jmp 17e0 <__constant_memcpy+0x20>
    18ce: 89 f6 mov %esi,%esi

--
vda
-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/



This archive was generated by hypermail 2b29 : Thu Nov 07 2002 - 22:00:28 EST