Re: [PATCH 15/17] x86, mpx: do 32-bit-only cmpxchg for 32-bit apps

From: Borislav Petkov
Date: Sat Mar 28 2015 - 04:41:29 EST


On Fri, Mar 27, 2015 at 11:16:41AM -0700, Dave Hansen wrote:
> That would have saved creating 'u32 __user *bd_entry_32' so that we
> could implicitly do sizeof(*bd_entry_32). But, what else does it buy us?

Well, you could misappropriate futex_atomic_cmpxchg_inatomic() which
takes u32s already - you probably might want to rename it to something
more generic first, though.

Diff ontop:

---
Index: b/arch/x86/mm/mpx.c
===================================================================
--- a/arch/x86/mm/mpx.c 2015-03-28 09:21:40.199966745 +0100
+++ b/arch/x86/mm/mpx.c 2015-03-28 09:19:40.491968402 +0100
@@ -18,6 +18,7 @@
#include <asm/processor.h>
#include <asm/trace/mpx.h>
#include <asm/fpu-internal.h>
+#include <asm/futex.h>

#define CREATE_TRACE_POINTS
#include <asm/trace/mpx.h>
@@ -425,7 +426,6 @@ static int mpx_cmpxchg_bd_entry(struct m
unsigned long *actual_old_val_ptr, long __user *bd_entry_addr,
unsigned long expected_old_val, unsigned long new_bd_entry)
{
- int ret;
/*
* user_atomic_cmpxchg_inatomic() actually uses sizeof()
* the pointer thatt we pass to it to figure out how much
@@ -433,21 +433,16 @@ static int mpx_cmpxchg_bd_entry(struct m
* pass a pointer to a 64-bit data type when we only want
* a 32-bit copy.
*/
- if (is_64bit_mm(mm)) {
- ret = user_atomic_cmpxchg_inatomic(actual_old_val_ptr,
- bd_entry_addr, expected_old_val, new_bd_entry);
- } else {
- u32 uninitialized_var(actual_old_val_32);
- u32 expected_old_val_32 = expected_old_val;
- u32 new_bd_entry_32 = new_bd_entry;
- u32 __user *bd_entry_32 = (u32 __user *)bd_entry_addr;
- ret = user_atomic_cmpxchg_inatomic(&actual_old_val_32,
- bd_entry_32, expected_old_val_32,
- new_bd_entry_32);
- if (!ret)
- *actual_old_val_ptr = actual_old_val_32;
- }
- return ret;
+ if (is_64bit_mm(mm))
+ return user_atomic_cmpxchg_inatomic(actual_old_val_ptr,
+ bd_entry_addr,
+ expected_old_val,
+ new_bd_entry);
+ else
+ return futex_atomic_cmpxchg_inatomic((u32 *)actual_old_val_ptr,
+ (u32 __user *)bd_entry_addr,
+ expected_old_val,
+ new_bd_entry);
}

/*
---

The asm looks the same except the retval. Yours does

mov %rax, (%rsi)

for actual_old_val_ptr which, AFAICT, is not needed in the 32-bit
case because there we're returning a 32-bit value anyway:

*actual_old_val_ptr = actual_old_val_32;

but gcc writes out the whole 64-bit register %rax to the pointer in %rsi
because it is an unsigned long it gets passed in.

Not that it matters, it is being sign-extended before that with

movl %eax, %eax # actual_old_val_32, tmp137


yours:
------
.loc 1 445 0
cmpq %rax, %rdx # D.38827, bd_entry_addr
ja .L151 #,
.LBB993:
.loc 1 445 0 is_stmt 0 discriminator 1
movl %ecx, %eax # expected_old_val, actual_old_val_32
.LVL179:
xorl %edi, %edi # ret
.LVL180:
#APP
# 445 "arch/x86/mm/mpx.c" 1

1: .pushsection .smp_locks,"a"
.balign 4
.long 671f - .
.popsection
671:
lock; cmpxchgl %r8d, (%rdx) # new_bd_entry, MEM[(u32 *)bd_entry_addr_12(D)]
2:
.section .fixup, "ax"
3: mov $-14, %edi #, ret
jmp 2b
.previous
.pushsection "__ex_table","a"
.balign 8
.long (1b) - .
.long (3b) - .
.popsection

# 0 "" 2
#NO_APP
.LBE993:
.loc 1 448 0 is_stmt 1 discriminator 1
testl %edi, %edi # ret
jne .L151 #,
.loc 1 449 0
movl %eax, %eax # actual_old_val_32, tmp137
.LVL181:
movq %rax, (%rsi) # tmp137, *actual_old_val_ptr_17(D)
---



futex_atomic_cmpxchg_inatomic:
------------------------------
.file 9 "./arch/x86/include/asm/futex.h"
.loc 9 113 0
cmpq %rax, %rdx # D.38827, bd_entry_addr
ja .L153 #,
.LBB1003:
movl %ecx, %eax # expected_old_val, __old
.LVL185:
xorl %edi, %edi # ret
.LVL186:
#APP
# 113 "./arch/x86/include/asm/futex.h" 1

1: .pushsection .smp_locks,"a"
.balign 4
.long 671f - .
.popsection
671:
lock; cmpxchgl %r8d, (%rdx) # new_bd_entry, MEM[(u32 *)bd_entry_addr_12(D)]
2:
.section .fixup, "ax"
3: mov $-14, %edi #, ret
jmp 2b
.previous
.pushsection "__ex_table","a"
.balign 8
.long (1b) - .
.long (3b) - .
.popsection

# 0 "" 2
#NO_APP
movl %eax, (%rsi) # __old, MEM[(u32 *)actual_old_val_ptr_17(D)]
.LBE1003:
.LBE995:
.LBE994:
.LBE989:
.loc 1 458 0
movl %edi, %eax # ret,
---

Here the objdump output which shows the difference better:

yours:
------
b02: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
b08: 48 83 e8 04 sub $0x4,%rax
b0c: bf f2 ff ff ff mov $0xfffffff2,%edi
b11: 48 39 c2 cmp %rax,%rdx
b14: 77 e8 ja afe <mpx_cmpxchg_bd_entry+0x3e>
b16: 89 c8 mov %ecx,%eax
b18: 31 ff xor %edi,%edi
b1a: f0 44 0f b1 02 lock cmpxchg %r8d,(%rdx)
b1f: 85 ff test %edi,%edi
b21: 75 db jne afe <mpx_cmpxchg_bd_entry+0x3e>
b23: 89 c0 mov %eax,%eax
b25: 48 89 06 mov %rax,(%rsi)
b28: 89 f8 mov %edi,%eax
b2a: 5d pop %rbp
b2b: c3 retq
b2c: 0f 1f 40 00 nopl 0x0(%rax)



futex_atomic_cmpxchg_inatomic:
------------------------------
b72: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
b78: 48 83 ef 04 sub $0x4,%rdi
b7c: b8 f2 ff ff ff mov $0xfffffff2,%eax
b81: 48 39 fa cmp %rdi,%rdx
b84: 77 ea ja b70 <mpx_cmpxchg_bd_entry+0x40>
b86: 89 c8 mov %ecx,%eax
b88: 31 ff xor %edi,%edi
b8a: f0 44 0f b1 02 lock cmpxchg %r8d,(%rdx)
b8f: 89 06 mov %eax,(%rsi)
b91: 89 f8 mov %edi,%eax
b93: 5d pop %rbp
b94: c3 retq
b95: 66 66 2e 0f 1f 84 00 data16 nopw %cs:0x0(%rax,%rax,1)
b9c: 00 00 00 00

AFAICT, in this case, we return only a 32-bit value and don't touch
the upper 32 bits of actual_old_val which might be a problem if the
assumptions of the callers is that the whole unsigned long is being
changed.

If that's not the case, then you get much nicer code :-)

--
Regards/Gruss,
Boris.

ECO tip #101: Trim your mails when you reply.
--
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo@xxxxxxxxxxxxxxx
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/