Re: [PATCH 0/1] vsprintf: optimize decimal conversion (again)

From: roma1390
Date: Wed Mar 28 2012 - 06:39:41 EST


On 2012.03.28 13:33, Denys Vlasenko wrote:
On Wednesday 28 March 2012 12:24, roma1390 wrote:
On 2012.03.28 13:13, Denys Vlasenko wrote:
Second: run
arm-linux-gnueabi-gcc -O2 -Wall test_{org,new}.c -S
and email me resulting test_{org,new}.s files.

test_{org,new}.s attached.


Bingo.

bl __aeabi_uidivmod

Not good. Your gcc did not optimize division by constant.

Can you add "noinline_for_stack":

static noinline_for_stack<=== HERE
char *put_dec(char *buf, unsigned long long n)

amd regenerate and resend the test_new.s?


Hello,

Your requested asm are attached.

roma1390 --- test_new.org.c 2012-03-28 13:25:14.000000000 +0300
+++ test_new-org-with-noinline_for_stack.c 2012-03-28 13:37:26.000000000 +0300
@@ -110,7 +110,7 @@

/* First algorithm: generic */

-static
+static noinline_for_stack
char *put_dec(char *buf, unsigned long long n)
{
if (n >= 100*1000*1000) {
@@ -145,7 +145,7 @@
* (with permission from the author).
* Performs no 64-bit division and hence should be fast on 32-bit machines.
*/
-static
+static noinline_for_stack
char *put_dec(char *buf, unsigned long long n)
{
uint32_t d3, d2, d1, q, h;
.cpu arm9tdmi
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 18, 4
.file "test_new-org-with-noinline_for_stack.c"
.text
.align 2
.type put_dec_trunc8, %function
put_dec_trunc8:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
ldr r3, .L5
stmfd sp!, {r4, r5, fp}
umull fp, ip, r1, r3
mov r2, ip
add r1, r1, #48
mov ip, r0
add r0, r2, r2, asl #2
sub r1, r1, r0, asl #1
cmp r2, #0
mov r0, ip
strb r1, [r0], #1
beq .L2
umull r4, r5, r2, r3
add r2, r2, #48
add fp, r5, r5, asl #2
sub r2, r2, fp, asl #1
cmp r5, #0
mov r1, r5
strb r2, [ip, #1]
add r0, r0, #1
beq .L2
umull r4, r5, r1, r3
add r1, r1, #48
add ip, r5, r5, asl #2
sub r1, r1, ip, asl #1
cmp r5, #0
mov r2, r5
strb r1, [r0], #1
beq .L2
umull r4, r5, r2, r3
add r2, r2, #48
add r1, r5, r5, asl #2
sub r2, r2, r1, asl #1
cmp r5, #0
strb r2, [r0], #1
beq .L2
add r2, r5, r5, asl #1
add r2, r5, r2, asl #2
rsb r2, r2, r2, asl #6
add r2, r5, r2, asl #2
mov r2, r2, asl #1
mov r2, r2, lsr #16
add r3, r5, #48
add r1, r2, r2, asl #2
sub r3, r3, r1, asl #1
cmp r2, #0
strb r3, [r0], #1
beq .L2
add r1, r2, r1, asl #3
add r1, r1, r1, asl #2
mov r3, r1, lsr #11
add r2, r2, #48
add r1, r3, r3, asl #2
sub r2, r2, r1, asl #1
cmp r3, #0
strb r2, [r0], #1
beq .L2
add r1, r3, r1, asl #3
add r1, r1, r1, asl #2
mov r2, r1, lsr #11
add r1, r2, r2, asl #2
add r3, r3, #48
cmp r2, #0
sub r3, r3, r1, asl #1
strb r3, [r0], #1
addne r2, r2, #48
strneb r2, [r0], #1
.L2:
ldmfd sp!, {r4, r5, fp}
bx lr
.L6:
.align 2
.L5:
.word 429496730
.size put_dec_trunc8, .-put_dec_trunc8
.align 2
.type put_dec_full4, %function
put_dec_full4:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
add r3, r1, r1, asl #1
add r3, r3, r3, asl #4
add r3, r3, r3, asl #8
add r3, r1, r3, asl #2
mov r3, r3, lsr #19
add r2, r3, r3, asl #1
add r2, r3, r2, asl #2
rsb r2, r2, r2, asl #6
add r2, r3, r2, asl #2
mov r2, r2, asl #1
mov r2, r2, lsr #16
add ip, r2, r2, asl #2
stmfd sp!, {r4, r5}
add r4, r2, ip, asl #3
add r5, r3, r3, asl #2
add r1, r1, #48
add r4, r4, r4, asl #2
sub r1, r1, r5, asl #1
mov r4, r4, lsr #11
mov r5, r0
strb r1, [r5], #1
add r3, r3, #48
add r1, r4, r4, asl #2
add r2, r2, #48
sub r2, r2, r1, asl #1
sub ip, r3, ip, asl #1
add r1, r5, #1
add r4, r4, #48
strb ip, [r0, #1]
strb r2, [r5, #1]
add r0, r1, #2
strb r4, [r1, #1]
ldmfd sp!, {r4, r5}
bx lr
.size put_dec_full4, .-put_dec_full4
.align 2
.type put_dec, %function
put_dec:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
cmp r3, #0
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, lr}
mov ip, r0
beq .L17
.L10:
mov r6, r2, lsr #16
add r1, r6, r6, asl #2
add r1, r6, r1, asl #2
mov r5, r3, asl #16
mov r4, r3, lsr #16
mov r5, r5, lsr #16
add r3, r6, r1, asl #1
mov r2, r2, asl #16
add r3, r6, r3, asl #2
mov sl, r5, asl #3
add r1, r4, r4, asl #2
mov r2, r2, lsr #16
add r1, r4, r1, asl #3
add r2, r2, r3, asl #5
rsb r3, r5, sl
add r3, r5, r3, asl #3
ldr r7, .L19
add r2, r2, r1, asl #4
add r2, r2, r3, asl #7
umull r3, r8, r7, r2
mov r8, r8, lsr #13
add r3, r8, r8, asl #2
add r3, r3, r3, asl #2
add r3, r3, r3, asl #2
add r3, r3, r3, asl #2
sub r1, r2, r3, asl #4
mov r0, ip
bl put_dec_full4
mov r3, r4, asl #10
sub r3, r3, r4, asl #6
add sl, sl, r5
rsb r3, r4, r3
add sl, sl, sl, asl #5
rsb r3, r4, r3, asl #3
rsb sl, r5, sl, asl #2
add sl, r3, sl, asl #3
add r6, r6, r6, asl #1
add r6, sl, r6, asl #1
add r8, r6, r8
umull r3, r6, r7, r8
mov r6, r6, lsr #13
add r3, r6, r6, asl #2
add r3, r3, r3, asl #2
add r3, r3, r3, asl #2
add r3, r3, r3, asl #2
sub r1, r8, r3, asl #4
bl put_dec_full4
add r3, r4, r4, asl #3
add r3, r3, r3, asl #5
add r2, r5, r5, asl #2
rsb r3, r4, r3, asl #2
add r3, r4, r3, asl #2
add r5, r5, r2, asl #2
add r5, r3, r5, asl #1
add r5, r5, r6
umull r3, r6, r7, r5
mov r6, r6, lsr #13
add r3, r6, r6, asl #2
add r3, r3, r3, asl #2
add r3, r3, r3, asl #2
add r3, r3, r3, asl #2
sub r1, r5, r3, asl #4
bl put_dec_full4
add r2, r4, r4, asl #4
add r2, r4, r2, asl #1
add r4, r4, r2, asl #3
adds r4, r6, r4
mov r3, r0
bne .L18
.L13:
mov r0, r3
ldrb r2, [r3, #-1]! @ zero_extendqisi2
cmp r2, #48
beq .L13
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, lr}
bx lr
.L17:
ldr r1, .L19+4
cmp r2, r1
bhi .L10
mov r1, r2
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, lr}
b put_dec_trunc8
.L18:
umull r3, r5, r7, r4
mov r5, r5, lsr #13
add r3, r5, r5, asl #2
add r3, r3, r3, asl #2
add r3, r3, r3, asl #2
add r3, r3, r3, asl #2
sub r1, r4, r3, asl #4
bl put_dec_full4
cmp r5, #0
mov r3, r0
beq .L13
mov r1, r5
bl put_dec_full4
mov r3, r0
b .L13
.L20:
.align 2
.L19:
.word -776530087
.word 99999999
.size put_dec, .-put_dec
.align 2
.type number, %function
number:
@ Function supports interworking.
@ args = 8, pretend = 0, frame = 120
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
sub sp, sp, #124
ldrb ip, [sp, #161] @ zero_extendqisi2
ldrb r4, [sp, #162] @ zero_extendqisi2
ldrh r5, [sp, #166]
str r0, [sp, #12]
ands r0, ip, #64
str r4, [sp, #20]
str r5, [sp, #36]
ldrh r4, [sp, #164]
str ip, [sp, #16]
beq .L74
ldr r0, [sp, #20]
subs r0, r0, #10
movne r0, #1
.L74:
ands r5, ip, #16
str r0, [sp, #28]
and r0, ip, #32
andne ip, ip, #254
strne ip, [sp, #16]
str r5, [sp, #40]
ldr r5, [sp, #16]
str r0, [sp, #8]
tst r5, #2
beq .L26
cmp r3, #0
blt .L65
tst r5, #4
bne .L76
ldr r5, [sp, #16]
tst r5, #8
beq .L26
sub r0, r4, #1
mov r0, r0, asl #16
mov r0, r0, lsr #16
mov ip, #32
str r0, [sp, #24]
str ip, [sp, #32]
b .L29
.L26:
mov r0, #0
str r4, [sp, #24]
str r0, [sp, #32]
.L29:
ldr r4, [sp, #28]
cmp r4, #0
beq .L31
ldr r5, [sp, #24]
ldr ip, [sp, #20]
sub r0, r5, #1
mov r0, r0, asl #16
mov r0, r0, lsr #16
cmp ip, #16
str r0, [sp, #24]
subeq r0, r0, #1
moveq r0, r0, asl #16
moveq r0, r0, lsr #16
streq r0, [sp, #24]
.L31:
cmp r3, #0
bne .L32
cmp r2, #7
bls .L77
.L32:
ldr r0, [sp, #20]
cmp r0, #10
beq .L35
cmp r0, #16
movne r5, #3
moveq r5, #4
sub r8, r0, #1
ldr sl, .L79
rsb r0, r5, #32
mov r4, #0
add r9, sp, #52
sub r6, r5, #32
str r1, [sp, #44]
mov fp, r0
.L38:
mov ip, r2, lsr r5
cmp r6, #0
orr ip, ip, r3, asl fp
movge ip, r3, lsr r6
mov r7, r3, lsr r5
and r2, r2, #255
and r2, r2, r8
mov r0, ip
ldr r1, [sp, #8]
ldrb ip, [sl, r2] @ zero_extendqisi2
mov r2, r0
orr ip, r1, ip
orrs r0, r2, r7
strb ip, [r9, r4]
mov r3, r7
add r4, r4, #1
bne .L38
ldr r1, [sp, #44]
sub r5, r4, #1
.L34:
ldr r2, [sp, #36]
ldr r3, [sp, #24]
mov ip, r2, asl #16
cmp r4, ip, asr #16
movgt ip, r4, asl #16
mov ip, ip, lsr #16
ldr r0, [sp, #16]
rsb r7, ip, r3
mov r7, r7, asl #16
mov r7, r7, lsr #16
tst r0, #17
mov r0, r7
bne .L41
sub r0, r7, #1
mov r0, r0, asl #16
cmp r0, #0
mov r0, r0, lsr #16
blt .L41
ldr r3, [sp, #12]
mov r8, r0
add r2, r3, #1
add r2, r2, r0
mov r6, #32
.L43:
cmp r1, r3
strhib r6, [r3, #0]
add r3, r3, #1
cmp r3, r2
bne .L43
rsb r7, r7, #1
ldr r2, [sp, #12]
add r0, r0, r7
mov r0, r0, asl #16
add r3, r8, #1
sub r0, r0, #65536
add r2, r2, r3
str r2, [sp, #12]
mov r0, r0, lsr #16
.L41:
ldr r3, [sp, #32]
cmp r3, #0
beq .L44
ldr r2, [sp, #12]
cmp r2, r1
strccb r3, [r2, #0]
ldr r3, [sp, #12]
add r3, r3, #1
str r3, [sp, #12]
.L44:
ldr r2, [sp, #28]
cmp r2, #0
beq .L46
ldr r3, [sp, #12]
cmp r3, r1
ldrcc r2, [sp, #12]
movcc r3, #48
strccb r3, [r2, #0]
ldr r2, [sp, #12]
ldr r3, [sp, #20]
add r2, r2, #1
cmp r3, #16
str r2, [sp, #12]
beq .L78
.L46:
ldr r2, [sp, #40]
cmp r2, #0
movne r6, r0
movne r7, r6, asl #16
bne .L50
sub r6, r0, #1
ldr r3, [sp, #16]
mov r6, r6, asl #16
tst r3, #1
mov r6, r6, lsr #16
movne r8, #48
moveq r8, #32
movs r7, r6, asl #16
bmi .L50
sub r2, r0, #1
ldr r3, [sp, #12]
mov r2, r2, asl #16
add r2, r3, r2, lsr #16
add r2, r2, #1
.L54:
cmp r1, r3
strhib r8, [r3, #0]
add r3, r3, #1
cmp r3, r2
bne .L54
rsb r6, r0, r6
mov r6, r6, asl #16
mov r6, r6, lsr #16
str r3, [sp, #12]
mov r7, r6, asl #16
.L50:
sub r3, ip, #1
mov r3, r3, asl #16
cmp r4, r3, asr #16
bgt .L55
sub r0, ip, #2
ldr r3, [sp, #12]
mov r0, r0, asl #16
add r0, r3, r0, asr #16
add r0, r0, #1
mov ip, #48
.L57:
cmp r1, r3
strhib ip, [r3, #0]
add r3, r3, #1
rsb r2, r3, r0
cmp r4, r2
ble .L57
str r3, [sp, #12]
.L55:
cmp r5, #0
blt .L58
add r2, sp, #52
ldr r3, [sp, #12]
sub r0, r2, #1
add r2, r2, r5
.L60:
cmp r1, r3
ldrhib ip, [r2, #0] @ zero_extendqisi2
sub r2, r2, #1
strhib ip, [r3, #0]
cmp r2, r0
add r3, r3, #1
bne .L60
ldr r4, [sp, #12]
add r5, r5, #1
add r4, r4, r5
str r4, [sp, #12]
.L58:
cmp r7, #0
ble .L61
ldr r5, [sp, #12]
sub r2, r6, #1
mov r2, r2, asl #16
add r2, r5, r2, lsr #16
add r2, r2, #1
mov r3, r5
mov r0, #32
.L63:
cmp r1, r3
strhib r0, [r3, #0]
add r3, r3, #1
cmp r3, r2
bne .L63
str r3, [sp, #12]
.L61:
ldr r0, [sp, #12]
add sp, sp, #124
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
bx lr
.L76:
sub r0, r4, #1
mov r0, r0, asl #16
mov r0, r0, lsr #16
str r0, [sp, #24]
mov r0, #43
str r0, [sp, #32]
b .L29
.L65:
sub r0, r4, #1
mov r0, r0, asl #16
mov r0, r0, lsr #16
mov ip, #45
rsbs r2, r2, #0
rsc r3, r3, #0
str r0, [sp, #24]
str ip, [sp, #32]
b .L29
.L77:
add r0, r2, #48
strb r0, [sp, #52]
mov r5, r3
mov r4, #1
b .L34
.L35:
add r4, sp, #52
mov r0, r4
str r1, [sp, #4]
bl put_dec
rsb r4, r4, r0
sub r5, r4, #1
ldr r1, [sp, #4]
b .L34
.L78:
cmp r1, r2
ldrhi r2, [sp, #8]
orrhi r3, r2, #88
ldrhi r2, [sp, #12]
strhib r3, [r2, #0]
ldr r3, [sp, #12]
add r3, r3, #1
str r3, [sp, #12]
b .L46
.L80:
.align 2
.L79:
.word .LANCHOR0
.size number, .-number
.align 2
.type measure_number, %function
measure_number:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 72
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
mov fp, r3
sub sp, sp, #84
ldr r3, [r0, #0]
str r0, [sp, #12]
mov r0, #0
str r3, [sp, #8]
mov sl, r2
bl time
ldr r3, [sp, #8]
mov r9, #0
add r6, sp, #16
cmp r3, r0
mov r7, r9
ldr r5, .L88
add r8, r6, #63
bne .L87
.L84:
ldr r4, .L88+4
.L83:
mov ip, sp
ldmia r5, {r0, r1}
mov r2, sl
stmia ip, {r0, r1}
mov r3, fp
mov r0, r6
mov r1, r8
bl number
sub r4, r4, #1
cmn r4, #1
strb r7, [r0, #0]
bne .L83
mov r0, #0
bl time
ldr r3, [sp, #8]
add r9, r9, #4000
cmp r3, r0
beq .L84
.L87:
ldr ip, [sp, #12]
str r0, [ip, #0]
mov r0, r9
add sp, sp, #84
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
bx lr
.L89:
.align 2
.L88:
.word .LANCHOR0+16
.word 3999
.size measure_number, .-measure_number
.align 2
.type measure, %function
measure:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 8
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, lr}
mov r0, #0
sub sp, sp, #24
bl time
str r0, [sp, #20]
.L91:
mov r0, #0
bl time
ldr r3, [sp, #20]
cmp r0, r3
beq .L91
add r8, sp, #24
str r0, [r8, #-4]!
mov r2, #8
mov r3, #0
mov r0, r8
bl measure_number
mov r2, #123
mov sl, r0
mov r3, #0
mov r0, r8
bl measure_number
ldr r2, .L94
mov r7, r0
mov r3, #0
mov r0, r8
bl measure_number
ldr r2, .L94+4
mov r6, r0
mov r3, #0
mov r0, r8
bl measure_number
ldr r2, .L94+8
mov r5, r0
mov r3, #0
mov r0, r8
bl measure_number
mvn r2, #0
mov r4, r0
mov r3, #0
mov r0, r8
bl measure_number
mvn r2, #0
mov r9, r0
mvn r3, #0
mov r0, r8
bl measure_number
mov r1, sl
str r0, [sp, #12]
mov r2, r7
mov r3, r6
ldr r0, .L94+12
str r5, [sp, #0]
stmib sp, {r4, r9} @ phole stm
bl printf
add sp, sp, #24
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, lr}
bx lr
.L95:
.align 2
.L94:
.word 123456
.word 12345678
.word 123456789
.word .LC0
.size measure, .-measure
.align 2
.type check, %function
check:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 128
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, lr}
ldr r3, .L100
sub sp, sp, #140
mov r5, r0
mov r6, r1
add r4, sp, #72
ldmia r3, {r0, r1}
mov r3, sp
stmia r3, {r0, r1}
mov r2, r5
mov r3, r6
add r1, r4, #63
mov r0, r4
bl number
add r7, sp, #8
mov r3, #0
strb r3, [r0, #0]
mov r2, r5
mov r3, r6
ldr r1, .L100+4
mov r0, r7
bl sprintf
mov r0, r4
mov r1, r7
bl strcmp
cmp r0, #0
bne .L99
add sp, sp, #140
ldmfd sp!, {r4, r5, r6, r7, lr}
bx lr
.L99:
mov r2, r5
mov r3, r6
ldr r0, .L100+8
str r4, [sp, #0]
bl printf
mov r0, #1
bl exit
.L101:
.align 2
.L100:
.word .LANCHOR0+16
.word .LC1
.word .LC2
.size check, .-check
.align 2
.global main
.type main, %function
main:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r3, r4, r5, r6, r7, r8, r9, sl, fp, lr}
bl measure
mov r4, #0
bl measure
mov r5, #0
bl measure
ldr r6, .L106
bl measure
mov r7, #0
mov sl, #1
mov fp, #0
b .L104
.L103:
adds r4, r4, sl
adc r5, r5, fp
.L104:
mov r0, r4
mov r1, r5
bl check
and r8, r4, r6
rsbs r0, r4, #0
rsc r1, r5, #0
and r9, r5, r7
bl check
orrs r8, r8, r9
bne .L103
mov r2, r4
mov r3, r5
ldr r0, .L106+4
bl printf
mov r0, r8
bl fflush
b .L103
.L107:
.align 2
.L106:
.word 262143
.word .LC3
.size main, .-main
.section .rodata
.align 2
.LANCHOR0 = . + 0
.type digits.3938, %object
.size digits.3938, 16
digits.3938:
.ascii "0123456789ABCDEF"
.type dummy_spec, %object
.size dummy_spec, 8
dummy_spec:
.byte 8
.byte 0
.byte 10
.byte 0
.short 0
.short 0
.section .rodata.str1.4,"aMS",%progbits,1
.align 2
.LC0:
.ascii "Conversions per second: 8:%d 123:%d 123456:%d 12345"
.ascii "678:%d 123456789:%d 2^32:%d 2^64:%d\012\000"
.LC1:
.ascii "%llu\000"
.space 3
.LC2:
.ascii "Error in formatting %llu:'%s'\012\000"
.space 1
.LC3:
.ascii "\015Tested %llu \000"
.ident "GCC: (Debian 4.4.5-8) 4.4.5"
.section .note.GNU-stack,"",%progbits
--- test_new.org.c 2012-03-28 13:25:14.000000000 +0300
+++ test_new-org-with-noinline_for_stack_if-LONG_MAX_header_arm.c 2012-03-28 13:37:00.000000000 +0300
@@ -1,4 +1,4 @@
-#include "test_header.c"
+#include "test_header_arm.c"

/* Decimal conversion is by far the most typical, and is used
* for /proc and /sys data. This directly impacts e.g. top performance
@@ -7,7 +7,7 @@
* (with permission from the author, Douglas W. Jones).
*/

-#if LONG_MAX > ((1UL<<31)-1) || LLONG_MAX > ((1ULL<<63)-1)
+#if 1 ////LONG_MAX > ((1UL<<31)-1) || LLONG_MAX > ((1ULL<<63)-1)
/* Formats correctly any integer in [0, 999999999] */
static noinline_for_stack
char *put_dec_full9(char *buf, unsigned q)
@@ -106,11 +106,11 @@
* Else (if long is 32 bits and long long is 64 bits) we use second one.
*/

-#if LONG_MAX > ((1UL<<31)-1) || LLONG_MAX > ((1ULL<<63)-1)
+#if 1 ///LONG_MAX > ((1UL<<31)-1) || LLONG_MAX > ((1ULL<<63)-1)

/* First algorithm: generic */

-static
+static noinline_for_stack
char *put_dec(char *buf, unsigned long long n)
{
if (n >= 100*1000*1000) {
@@ -145,7 +145,7 @@
* (with permission from the author).
* Performs no 64-bit division and hence should be fast on 32-bit machines.
*/
-static
+static noinline_for_stack
char *put_dec(char *buf, unsigned long long n)
{
uint32_t d3, d2, d1, q, h;
.cpu arm9tdmi
.fpu softvfp
.eabi_attribute 20, 1
.eabi_attribute 21, 1
.eabi_attribute 23, 3
.eabi_attribute 24, 1
.eabi_attribute 25, 1
.eabi_attribute 26, 2
.eabi_attribute 30, 2
.eabi_attribute 18, 4
.file "test_new-org-with-noinline_for_stack_if-LONG_MAX_header_arm.c"
.text
.align 2
.type put_dec_full9, %function
put_dec_full9:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 40
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
ldr r3, .L3
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
umull r4, r5, r1, r3
mov r6, r5
umull r4, r5, r6, r3
umull r7, r8, r5, r3
mov ip, r8
umull r7, r8, ip, r3
mov r2, r8
umull r7, r8, r2, r3
mov r4, r5
add r5, r8, r8, asl #1
add r5, r8, r5, asl #2
rsb r5, r5, r5, asl #6
add r5, r8, r5, asl #2
mov r5, r5, asl #1
mov r3, r8
mov r5, r5, lsr #16
add r1, r1, #48
add r8, r6, r6, asl #2
sub sp, sp, #40
add r7, r5, r5, asl #2
sub r8, r1, r8, asl #1
add r6, r6, #48
mov r1, r0
str r7, [sp, #12]
strb r8, [r1], #1
add r7, r5, r7, asl #3
str r6, [sp, #0]
add r5, r5, #48
str r5, [sp, #32]
ldr r5, [sp, #0]
add r6, r4, #48
add r4, r4, r4, asl #2
str r6, [sp, #16]
sub r4, r5, r4, asl #1
str r4, [sp, #0]
add r6, ip, #48
ldr r4, [sp, #16]
add r7, r7, r7, asl #2
str r6, [sp, #20]
mov r7, r7, lsr #11
add r6, r2, #48
add ip, ip, ip, asl #2
str r6, [sp, #24]
add r8, r1, #1
add r9, r7, r7, asl #2
sub ip, r4, ip, asl #1
str r8, [sp, #8]
str ip, [sp, #16]
ldr r5, [sp, #20]
ldr ip, [sp, #24]
add r8, r7, r9, asl #3
add r6, r3, #48
add r7, r7, #48
str r6, [sp, #28]
str r7, [sp, #36]
add r8, r8, r8, asl #2
add r2, r2, r2, asl #2
add r3, r3, r3, asl #2
ldr r4, [sp, #36]
sub r2, r5, r2, asl #1
sub r3, ip, r3, asl #1
ldr r5, [sp, #28]
ldr ip, [sp, #12]
mov r8, r8, lsr #11
add r7, r8, r8, asl #2
sub r7, r4, r7, asl #1
sub r4, r5, ip, asl #1
ldr r5, [sp, #32]
ldr sl, [sp, #8]
sub r9, r5, r9, asl #1
ldr r5, [sp, #0]
add sl, sl, #1
str sl, [sp, #4]
strb r5, [r0, #1]
ldr r0, [sp, #16]
add sl, sl, #1
strb r0, [r1, #1]
ldr r1, [sp, #8]
add fp, sl, #1
strb r2, [r1, #1]
ldr r2, [sp, #4]
add r6, fp, #1
add ip, r6, #1
add r8, r8, #48
strb r3, [r2, #1]
add r0, ip, #2
strb r4, [sl, #1]
strb r9, [fp, #1]
strb r7, [r6, #1]
strb r8, [ip, #1]
add sp, sp, #40
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp}
bx lr
.L4:
.align 2
.L3:
.word 429496730
.size put_dec_full9, .-put_dec_full9
.align 2
.type put_dec_trunc8, %function
put_dec_trunc8:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
@ link register save eliminated.
ldr r3, .L9
stmfd sp!, {r4, r5, fp}
umull fp, ip, r1, r3
mov r2, ip
add r1, r1, #48
mov ip, r0
add r0, r2, r2, asl #2
sub r1, r1, r0, asl #1
cmp r2, #0
mov r0, ip
strb r1, [r0], #1
beq .L6
umull r4, r5, r2, r3
add r2, r2, #48
add fp, r5, r5, asl #2
sub r2, r2, fp, asl #1
cmp r5, #0
mov r1, r5
strb r2, [ip, #1]
add r0, r0, #1
beq .L6
umull r4, r5, r1, r3
add r1, r1, #48
add ip, r5, r5, asl #2
sub r1, r1, ip, asl #1
cmp r5, #0
mov r2, r5
strb r1, [r0], #1
beq .L6
umull r4, r5, r2, r3
add r2, r2, #48
add r1, r5, r5, asl #2
sub r2, r2, r1, asl #1
cmp r5, #0
strb r2, [r0], #1
beq .L6
add r2, r5, r5, asl #1
add r2, r5, r2, asl #2
rsb r2, r2, r2, asl #6
add r2, r5, r2, asl #2
mov r2, r2, asl #1
mov r2, r2, lsr #16
add r3, r5, #48
add r1, r2, r2, asl #2
sub r3, r3, r1, asl #1
cmp r2, #0
strb r3, [r0], #1
beq .L6
add r1, r2, r1, asl #3
add r1, r1, r1, asl #2
mov r3, r1, lsr #11
add r2, r2, #48
add r1, r3, r3, asl #2
sub r2, r2, r1, asl #1
cmp r3, #0
strb r2, [r0], #1
beq .L6
add r1, r3, r1, asl #3
add r1, r1, r1, asl #2
mov r2, r1, lsr #11
add r1, r2, r2, asl #2
add r3, r3, #48
cmp r2, #0
sub r3, r3, r1, asl #1
strb r3, [r0], #1
addne r2, r2, #48
strneb r2, [r0], #1
.L6:
ldmfd sp!, {r4, r5, fp}
bx lr
.L10:
.align 2
.L9:
.word 429496730
.size put_dec_trunc8, .-put_dec_trunc8
.align 2
.type put_dec, %function
put_dec:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
cmp r3, #0
stmfd sp!, {r4, r5, r6, r7, r8, sl, fp, lr}
beq .L28
.L25:
ldr sl, .L29
ldr r5, .L29+4
ldr r6, .L29+8
mov r8, #0
.L27:
#APP
@ 118 "test_new-org-with-noinline_for_stack_if-LONG_MAX_header_arm.c" 1
umull fp, ip, r5, r2
cmn fp, r5
adcs ip, ip, r6
adc fp, r8, #0
@ 0 "" 2
mov r1, r8
#APP
@ 118 "test_new-org-with-noinline_for_stack_if-LONG_MAX_header_arm.c" 1
umlal ip, fp, r6, r2
umlal ip, r1, r5, r3
mov ip, #0
adds fp, r1, fp
adc ip, ip, #0
umlal fp, ip, r6, r3
@ 0 "" 2
mov r4, fp, lsr #29
orr r4, r4, ip, asl #3
rsb r1, r4, r4, asl #5
rsb r1, r1, r1, asl #6
add r1, r4, r1, asl #3
add r1, r1, r1, asl #2
add r1, r1, r1, asl #2
add r1, r1, r1, asl #2
mov r7, ip, lsr #29
sub r1, r2, r1, asl #9
bl put_dec_full9
cmp r7, #0
mov r2, r4
mov r3, r7
bne .L27
cmp r4, sl
bhi .L27
ldr r3, .L29+12
cmp r4, r3
bhi .L16
.L18:
mov r1, r4
ldmfd sp!, {r4, r5, r6, r7, r8, sl, fp, lr}
b put_dec_trunc8
.L28:
ldr r1, .L29+12
cmp r2, r1
movls r4, r2
bls .L18
ldr r1, .L29
cmp r2, r1
movls r4, r2
bhi .L25
.L16:
mov r1, r4
ldmfd sp!, {r4, r5, r6, r7, r8, sl, fp, lr}
b put_dec_full9
.L30:
.align 2
.L29:
.word 999999999
.word 917808535
.word -1989124287
.word 99999999
.size put_dec, .-put_dec
.align 2
.type number, %function
number:
@ Function supports interworking.
@ args = 8, pretend = 0, frame = 120
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
sub sp, sp, #124
ldrb ip, [sp, #161] @ zero_extendqisi2
ldrb r4, [sp, #162] @ zero_extendqisi2
ldrh r5, [sp, #166]
str r0, [sp, #12]
ands r0, ip, #64
str r4, [sp, #20]
str r5, [sp, #36]
ldrh r4, [sp, #164]
str ip, [sp, #16]
beq .L84
ldr r0, [sp, #20]
subs r0, r0, #10
movne r0, #1
.L84:
ands r5, ip, #16
str r0, [sp, #28]
and r0, ip, #32
andne ip, ip, #254
strne ip, [sp, #16]
str r5, [sp, #40]
ldr r5, [sp, #16]
str r0, [sp, #8]
tst r5, #2
beq .L36
cmp r3, #0
blt .L75
tst r5, #4
bne .L86
ldr r5, [sp, #16]
tst r5, #8
beq .L36
sub r0, r4, #1
mov r0, r0, asl #16
mov r0, r0, lsr #16
mov ip, #32
str r0, [sp, #24]
str ip, [sp, #32]
b .L39
.L36:
mov r0, #0
str r4, [sp, #24]
str r0, [sp, #32]
.L39:
ldr r4, [sp, #28]
cmp r4, #0
beq .L41
ldr r5, [sp, #24]
ldr ip, [sp, #20]
sub r0, r5, #1
mov r0, r0, asl #16
mov r0, r0, lsr #16
cmp ip, #16
str r0, [sp, #24]
subeq r0, r0, #1
moveq r0, r0, asl #16
moveq r0, r0, lsr #16
streq r0, [sp, #24]
.L41:
cmp r3, #0
bne .L42
cmp r2, #7
bls .L87
.L42:
ldr r0, [sp, #20]
cmp r0, #10
beq .L45
cmp r0, #16
movne r5, #3
moveq r5, #4
sub r8, r0, #1
ldr sl, .L89
rsb r0, r5, #32
mov r4, #0
add r9, sp, #52
sub r6, r5, #32
str r1, [sp, #44]
mov fp, r0
.L48:
mov ip, r2, lsr r5
cmp r6, #0
orr ip, ip, r3, asl fp
movge ip, r3, lsr r6
mov r7, r3, lsr r5
and r2, r2, #255
and r2, r2, r8
mov r0, ip
ldr r1, [sp, #8]
ldrb ip, [sl, r2] @ zero_extendqisi2
mov r2, r0
orr ip, r1, ip
orrs r0, r2, r7
strb ip, [r9, r4]
mov r3, r7
add r4, r4, #1
bne .L48
ldr r1, [sp, #44]
sub r5, r4, #1
.L44:
ldr r2, [sp, #36]
ldr r3, [sp, #24]
mov ip, r2, asl #16
cmp r4, ip, asr #16
movgt ip, r4, asl #16
mov ip, ip, lsr #16
ldr r0, [sp, #16]
rsb r7, ip, r3
mov r7, r7, asl #16
mov r7, r7, lsr #16
tst r0, #17
mov r0, r7
bne .L51
sub r0, r7, #1
mov r0, r0, asl #16
cmp r0, #0
mov r0, r0, lsr #16
blt .L51
ldr r3, [sp, #12]
mov r8, r0
add r2, r3, #1
add r2, r2, r0
mov r6, #32
.L53:
cmp r1, r3
strhib r6, [r3, #0]
add r3, r3, #1
cmp r3, r2
bne .L53
rsb r7, r7, #1
ldr r2, [sp, #12]
add r0, r0, r7
mov r0, r0, asl #16
add r3, r8, #1
sub r0, r0, #65536
add r2, r2, r3
str r2, [sp, #12]
mov r0, r0, lsr #16
.L51:
ldr r3, [sp, #32]
cmp r3, #0
beq .L54
ldr r2, [sp, #12]
cmp r2, r1
strccb r3, [r2, #0]
ldr r3, [sp, #12]
add r3, r3, #1
str r3, [sp, #12]
.L54:
ldr r2, [sp, #28]
cmp r2, #0
beq .L56
ldr r3, [sp, #12]
cmp r3, r1
ldrcc r2, [sp, #12]
movcc r3, #48
strccb r3, [r2, #0]
ldr r2, [sp, #12]
ldr r3, [sp, #20]
add r2, r2, #1
cmp r3, #16
str r2, [sp, #12]
beq .L88
.L56:
ldr r2, [sp, #40]
cmp r2, #0
movne r6, r0
movne r7, r6, asl #16
bne .L60
sub r6, r0, #1
ldr r3, [sp, #16]
mov r6, r6, asl #16
tst r3, #1
mov r6, r6, lsr #16
movne r8, #48
moveq r8, #32
movs r7, r6, asl #16
bmi .L60
sub r2, r0, #1
ldr r3, [sp, #12]
mov r2, r2, asl #16
add r2, r3, r2, lsr #16
add r2, r2, #1
.L64:
cmp r1, r3
strhib r8, [r3, #0]
add r3, r3, #1
cmp r3, r2
bne .L64
rsb r6, r0, r6
mov r6, r6, asl #16
mov r6, r6, lsr #16
str r3, [sp, #12]
mov r7, r6, asl #16
.L60:
sub r3, ip, #1
mov r3, r3, asl #16
cmp r4, r3, asr #16
bgt .L65
sub r0, ip, #2
ldr r3, [sp, #12]
mov r0, r0, asl #16
add r0, r3, r0, asr #16
add r0, r0, #1
mov ip, #48
.L67:
cmp r1, r3
strhib ip, [r3, #0]
add r3, r3, #1
rsb r2, r3, r0
cmp r4, r2
ble .L67
str r3, [sp, #12]
.L65:
cmp r5, #0
blt .L68
add r2, sp, #52
ldr r3, [sp, #12]
sub r0, r2, #1
add r2, r2, r5
.L70:
cmp r1, r3
ldrhib ip, [r2, #0] @ zero_extendqisi2
sub r2, r2, #1
strhib ip, [r3, #0]
cmp r2, r0
add r3, r3, #1
bne .L70
ldr r4, [sp, #12]
add r5, r5, #1
add r4, r4, r5
str r4, [sp, #12]
.L68:
cmp r7, #0
ble .L71
ldr r5, [sp, #12]
sub r2, r6, #1
mov r2, r2, asl #16
add r2, r5, r2, lsr #16
add r2, r2, #1
mov r3, r5
mov r0, #32
.L73:
cmp r1, r3
strhib r0, [r3, #0]
add r3, r3, #1
cmp r3, r2
bne .L73
str r3, [sp, #12]
.L71:
ldr r0, [sp, #12]
add sp, sp, #124
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
bx lr
.L86:
sub r0, r4, #1
mov r0, r0, asl #16
mov r0, r0, lsr #16
str r0, [sp, #24]
mov r0, #43
str r0, [sp, #32]
b .L39
.L75:
sub r0, r4, #1
mov r0, r0, asl #16
mov r0, r0, lsr #16
mov ip, #45
rsbs r2, r2, #0
rsc r3, r3, #0
str r0, [sp, #24]
str ip, [sp, #32]
b .L39
.L87:
add r0, r2, #48
strb r0, [sp, #52]
mov r5, r3
mov r4, #1
b .L44
.L45:
add r4, sp, #52
mov r0, r4
str r1, [sp, #4]
bl put_dec
rsb r4, r4, r0
sub r5, r4, #1
ldr r1, [sp, #4]
b .L44
.L88:
cmp r1, r2
ldrhi r2, [sp, #8]
orrhi r3, r2, #88
ldrhi r2, [sp, #12]
strhib r3, [r2, #0]
ldr r3, [sp, #12]
add r3, r3, #1
str r3, [sp, #12]
b .L56
.L90:
.align 2
.L89:
.word .LANCHOR0
.size number, .-number
.align 2
.type measure_number, %function
measure_number:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 72
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
mov fp, r3
sub sp, sp, #84
ldr r3, [r0, #0]
str r0, [sp, #12]
mov r0, #0
str r3, [sp, #8]
mov sl, r2
bl time
ldr r3, [sp, #8]
mov r9, #0
add r6, sp, #16
cmp r3, r0
mov r7, r9
ldr r5, .L98
add r8, r6, #63
bne .L97
.L94:
ldr r4, .L98+4
.L93:
mov ip, sp
ldmia r5, {r0, r1}
mov r2, sl
stmia ip, {r0, r1}
mov r3, fp
mov r0, r6
mov r1, r8
bl number
sub r4, r4, #1
cmn r4, #1
strb r7, [r0, #0]
bne .L93
mov r0, #0
bl time
ldr r3, [sp, #8]
add r9, r9, #4000
cmp r3, r0
beq .L94
.L97:
ldr ip, [sp, #12]
str r0, [ip, #0]
mov r0, r9
add sp, sp, #84
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, fp, lr}
bx lr
.L99:
.align 2
.L98:
.word .LANCHOR0+16
.word 3999
.size measure_number, .-measure_number
.align 2
.type measure, %function
measure:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 8
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, r8, r9, sl, lr}
mov r0, #0
sub sp, sp, #24
bl time
str r0, [sp, #20]
.L101:
mov r0, #0
bl time
ldr r3, [sp, #20]
cmp r0, r3
beq .L101
add r8, sp, #24
str r0, [r8, #-4]!
mov r2, #8
mov r3, #0
mov r0, r8
bl measure_number
mov r2, #123
mov sl, r0
mov r3, #0
mov r0, r8
bl measure_number
ldr r2, .L104
mov r7, r0
mov r3, #0
mov r0, r8
bl measure_number
ldr r2, .L104+4
mov r6, r0
mov r3, #0
mov r0, r8
bl measure_number
ldr r2, .L104+8
mov r5, r0
mov r3, #0
mov r0, r8
bl measure_number
mvn r2, #0
mov r4, r0
mov r3, #0
mov r0, r8
bl measure_number
mvn r2, #0
mov r9, r0
mvn r3, #0
mov r0, r8
bl measure_number
mov r1, sl
str r0, [sp, #12]
mov r2, r7
mov r3, r6
ldr r0, .L104+12
str r5, [sp, #0]
stmib sp, {r4, r9} @ phole stm
bl printf
add sp, sp, #24
ldmfd sp!, {r4, r5, r6, r7, r8, r9, sl, lr}
bx lr
.L105:
.align 2
.L104:
.word 123456
.word 12345678
.word 123456789
.word .LC0
.size measure, .-measure
.align 2
.type check, %function
check:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 128
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r4, r5, r6, r7, lr}
ldr r3, .L110
sub sp, sp, #140
mov r5, r0
mov r6, r1
add r4, sp, #72
ldmia r3, {r0, r1}
mov r3, sp
stmia r3, {r0, r1}
mov r2, r5
mov r3, r6
add r1, r4, #63
mov r0, r4
bl number
add r7, sp, #8
mov r3, #0
strb r3, [r0, #0]
mov r2, r5
mov r3, r6
ldr r1, .L110+4
mov r0, r7
bl sprintf
mov r0, r4
mov r1, r7
bl strcmp
cmp r0, #0
bne .L109
add sp, sp, #140
ldmfd sp!, {r4, r5, r6, r7, lr}
bx lr
.L109:
mov r2, r5
mov r3, r6
ldr r0, .L110+8
str r4, [sp, #0]
bl printf
mov r0, #1
bl exit
.L111:
.align 2
.L110:
.word .LANCHOR0+16
.word .LC1
.word .LC2
.size check, .-check
.align 2
.global main
.type main, %function
main:
@ Function supports interworking.
@ args = 0, pretend = 0, frame = 0
@ frame_needed = 0, uses_anonymous_args = 0
stmfd sp!, {r3, r4, r5, r6, r7, r8, r9, sl, fp, lr}
bl measure
mov r4, #0
bl measure
mov r5, #0
bl measure
ldr r6, .L116
bl measure
mov r7, #0
mov sl, #1
mov fp, #0
b .L114
.L113:
adds r4, r4, sl
adc r5, r5, fp
.L114:
mov r0, r4
mov r1, r5
bl check
and r8, r4, r6
rsbs r0, r4, #0
rsc r1, r5, #0
and r9, r5, r7
bl check
orrs r8, r8, r9
bne .L113
mov r2, r4
mov r3, r5
ldr r0, .L116+4
bl printf
mov r0, r8
bl fflush
b .L113
.L117:
.align 2
.L116:
.word 262143
.word .LC3
.size main, .-main
.section .rodata
.align 2
.LANCHOR0 = . + 0
.type digits.4059, %object
.size digits.4059, 16
digits.4059:
.ascii "0123456789ABCDEF"
.type dummy_spec, %object
.size dummy_spec, 8
dummy_spec:
.byte 8
.byte 0
.byte 10
.byte 0
.short 0
.short 0
.section .rodata.str1.4,"aMS",%progbits,1
.align 2
.LC0:
.ascii "Conversions per second: 8:%d 123:%d 123456:%d 12345"
.ascii "678:%d 123456789:%d 2^32:%d 2^64:%d\012\000"
.LC1:
.ascii "%llu\000"
.space 3
.LC2:
.ascii "Error in formatting %llu:'%s'\012\000"
.space 1
.LC3:
.ascii "\015Tested %llu \000"
.ident "GCC: (Debian 4.4.5-8) 4.4.5"
.section .note.GNU-stack,"",%progbits