Re: [PATCH] Fix copy_user on x86_64

From: Anton Arapov
Date: Thu Jun 26 2008 - 05:13:15 EST


Just in order to bring your attention!

This is the patch patch for copy_user routine, you've discussed recently.

I'm attached updated patch, in order to pass 'checkpatch.pl' tool process smoothly.
Couple of extra whitespace has been removed and signed-off and acked-by lines added.

Patch cleanly applies to current git tree.

-Anton

Vitaly Mayatskikh wrote:
Bug in copy_user can be used from userspace on RHEL-4 and other
distributions with similar kernel base (CVE-2008-0598). Patch with fixed
copy_user attached, it falls into byte copy loop on faults and returns
correct count for uncopied bytes. Patch also fixes incorrect passing of
zero flag in copy_to_user (%eax was used instead of %ecx).

Also there's script for systemtap, it tests corner cases in both
copy_user realizations (unrolled and string).



------------------------------------------------------------------------



Bug in copy_user can be used from userspace on RHEL-4 and other
distributions with similar kernel base (CVE-2008-0598). Patch with fixed
copy_user attached, it falls into byte copy loop on faults and returns
correct count for uncopied bytes. Patch also fixes incorrect passing of
zero flag in copy_to_user (%eax was used instead of %ecx).

Also there's script for systemtap, it tests corner cases in both
copy_user realizations (unrolled and string).


Signed-off-by: Vitaly Mayatskikh <v.mayatskih@xxxxxxxxx>
Acked-by: Anton Arapov <aarapov@xxxxxxxxxx>

diff --git a/arch/x86/lib/copy_user_64.S b/arch/x86/lib/copy_user_64.S
index ee1c3f6..6402891 100644
--- a/arch/x86/lib/copy_user_64.S
+++ b/arch/x86/lib/copy_user_64.S
@@ -42,7 +42,7 @@ ENTRY(copy_to_user)
jc bad_to_user
cmpq threadinfo_addr_limit(%rax),%rcx
jae bad_to_user
- xorl %eax,%eax /* clear zero flag */
+ xorl %ecx,%ecx /* clear zero flag */
ALTERNATIVE_JUMP X86_FEATURE_REP_GOOD,copy_user_generic_unrolled,copy_user_generic_string
CFI_ENDPROC

@@ -170,8 +170,8 @@ ENTRY(copy_user_generic_unrolled)
jnz .Lloop_8

.Lhandle_7:
+ andl $7,%edx
movl %edx,%ecx
- andl $7,%ecx
jz .Lende
.p2align 4
.Lloop_1:
@@ -218,41 +218,74 @@ ENTRY(copy_user_generic_unrolled)
.section __ex_table,"a"
.align 8
.quad .Ls1,.Ls1e /* Ls1-Ls4 have copied zero bytes */
- .quad .Ls2,.Ls1e
- .quad .Ls3,.Ls1e
- .quad .Ls4,.Ls1e
- .quad .Ld1,.Ls1e /* Ld1-Ld4 have copied 0-24 bytes */
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
+ .quad .Ls2,.Ls2e
+ .quad .Ls3,.Ls3e
+ .quad .Ls4,.Ls4e
+ .quad .Ld1,.Ld1e /* Ld1-Ld4 have copied 0-24 bytes */
+ .quad .Ld2,.Ld2e
+ .quad .Ld3,.Ld3e
+ .quad .Ld4,.Ld4e
.quad .Ls5,.Ls5e /* Ls5-Ls8 have copied 32 bytes */
- .quad .Ls6,.Ls5e
- .quad .Ls7,.Ls5e
- .quad .Ls8,.Ls5e
- .quad .Ld5,.Ls5e /* Ld5-Ld8 have copied 32-56 bytes */
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
+ .quad .Ls6,.Ls6e
+ .quad .Ls7,.Ls7e
+ .quad .Ls8,.Ls8e
+ .quad .Ld5,.Ld5e /* Ld5-Ld8 have copied 32-56 bytes */
+ .quad .Ld6,.Ld6e
+ .quad .Ld7,.Ld7e
+ .quad .Ld8,.Ld8e
+ .quad .Ls9,.Ls9e
+ .quad .Ld9,.Ld9e
.quad .Ls10,.Le_byte
.quad .Ld10,.Le_byte
#ifdef FIX_ALIGNMENT
.quad .Ls11,.Lzero_rest
.quad .Ld11,.Lzero_rest
#endif
+ .quad .Lt1,.Lt1e
.quad .Le5,.Le_zero
.previous

+ /* Exception on read in unrolled loop
+ Don't forget to store registers, which were loaded before fault.
+ Otherwise we will have up to 24 bytes of garbage and possible
+ security leak */
+.Ls8e: addl $8,%eax
+ movq %r9,6*8(%rdi)
+.Ls7e: addl $8,%eax
+ movq %r8,5*8(%rdi)
+.Ls6e: addl $8,%eax
+ movq %r11,4*8(%rdi)
+.Ls5e: addl $32,%eax
+ jmp .Ls1e
+
+.Ls4e: addl $8,%eax
+ movq %r9,2*8(%rdi)
+.Ls3e: addl $8,%eax
+ movq %r8,1*8(%rdi)
+.Ls2e: addl $8,%eax
+ movq %r11,(%rdi)
+.Ls1e: addq %rax,%rdi
+ addq %rax,%rsi
+ shlq $6,%rdx
+ addq %rbx,%rdx
+ subq %rax,%rdx
+ andl $63,%ecx
+ addq %rdx,%rcx
+.Lt1: rep /* copy last bytes */
+ movsb
+.Lt1e: movq %rcx,%rdx
+ jmp .Lzero_rest
+
+ /* Exception on write in unrolled loop */
/* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
+.Ld1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
+.Ld2e: addl $8,%eax
+.Ld3e: addl $8,%eax
+.Ld4e: addl $8,%eax
+.Ld5e: addl $8,%eax
+.Ld6e: addl $8,%eax
+.Ld7e: addl $8,%eax
+.Ld8e: addl $8,%eax
addq %rbx,%rdi /* +64 */
subq %rax,%rdi /* correct destination with computed offset */

@@ -260,19 +293,27 @@ ENTRY(copy_user_generic_unrolled)
addq %rax,%rdx /* add offset to loopcnt */
andl $63,%ecx /* remaining bytes */
addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
+ jmp .Le_zero /* fault in dst, just return */

- /* exception on quad word loop in tail handling */
+ /* Exception on read in quad word loop in tail handling */
/* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
+.Ls9e: shll $3,%ecx /* fault in src of quad loop */
+ andl $7,%edx
+ addl %edx,%ecx
+ jmp .Lt1 /* try to copy last bytes */
+
+ /* Exception on write in quad word loop in tail handling */
+.Ld9e: shll $3,%ecx /* fault in dst of quad loop */
andl $7,%edx
addl %ecx,%edx
+ jmp .Le_zero /* fault in dst, just return */
+
/* edx: bytes to zero, rdi: dest, eax:zero */
.Lzero_rest:
cmpl $0,(%rsp)
jz .Le_zero
movq %rdx,%rcx
+ /* Exception on read or write in byte loop in tail handling */
.Le_byte:
xorl %eax,%eax
.Le5: rep
@@ -308,44 +349,39 @@ ENDPROC(copy_user_generic)
ENTRY(copy_user_generic_string)
CFI_STARTPROC
movl %ecx,%r8d /* save zero flag */
+ xorq %rax,%rax
movl %edx,%ecx
shrl $3,%ecx
- andl $7,%edx
- jz 10f
-1: rep
- movsq
+ andl $7,%edx
+ andl %r8d,%r8d /* store zero flag in eflags */
+.Lc1: rep
+ movsq
movl %edx,%ecx
-2: rep
+.Lc2: rep
movsb
-9: movl %ecx,%eax
ret

- /* multiple of 8 byte */
-10: rep
- movsq
- xor %eax,%eax
+.Lc1e: leaq (%rdx,%rcx,8),%r8
+ movl %r8d,%ecx
+.Lc3: rep /* try to use byte copy */
+ movsb
+.Lc3e: movl %ecx,%r8d
+ jz .Lc4e /* rep movs* does not affect eflags */
+.Lc4: rep
+ stosb
+.Lc4e: movl %r8d,%eax
ret

- /* exception handling */
-3: lea (%rdx,%rcx,8),%rax /* exception on quad loop */
- jmp 6f
-5: movl %ecx,%eax /* exception on byte loop */
- /* eax: left over bytes */
-6: testl %r8d,%r8d /* zero flag set? */
- jz 7f
- movl %eax,%ecx /* initialize x86 loop counter */
- push %rax
- xorl %eax,%eax
-8: rep
- stosb /* zero the rest */
-11: pop %rax
-7: ret
+.Lc2e: movl %ecx,%r8d
+ jz .Lc4e
+ jmp .Lc4
CFI_ENDPROC
-END(copy_user_generic_c)
+ENDPROC(copy_user_generic_string)

.section __ex_table,"a"
- .quad 1b,3b
- .quad 2b,5b
- .quad 8b,11b
- .quad 10b,3b
+ .align 8
+ .quad .Lc1,.Lc1e
+ .quad .Lc2,.Lc2e
+ .quad .Lc3,.Lc3e
+ .quad .Lc4,.Lc4e
.previous
diff --git a/arch/x86/lib/copy_user_nocache_64.S b/arch/x86/lib/copy_user_nocache_64.S
index 9d3d1ab..b34b6bd 100644
--- a/arch/x86/lib/copy_user_nocache_64.S
+++ b/arch/x86/lib/copy_user_nocache_64.S
@@ -146,41 +146,74 @@ ENTRY(__copy_user_nocache)
.section __ex_table,"a"
.align 8
.quad .Ls1,.Ls1e /* .Ls[1-4] - 0 bytes copied */
- .quad .Ls2,.Ls1e
- .quad .Ls3,.Ls1e
- .quad .Ls4,.Ls1e
- .quad .Ld1,.Ls1e /* .Ld[1-4] - 0..24 bytes coped */
- .quad .Ld2,.Ls2e
- .quad .Ld3,.Ls3e
- .quad .Ld4,.Ls4e
+ .quad .Ls2,.Ls2e
+ .quad .Ls3,.Ls3e
+ .quad .Ls4,.Ls4e
+ .quad .Ld1,.Ld1e /* .Ld[1-4] - 0..24 bytes coped */
+ .quad .Ld2,.Ld2e
+ .quad .Ld3,.Ld3e
+ .quad .Ld4,.Ld4e
.quad .Ls5,.Ls5e /* .Ls[5-8] - 32 bytes copied */
- .quad .Ls6,.Ls5e
- .quad .Ls7,.Ls5e
- .quad .Ls8,.Ls5e
- .quad .Ld5,.Ls5e /* .Ld[5-8] - 32..56 bytes copied */
- .quad .Ld6,.Ls6e
- .quad .Ld7,.Ls7e
- .quad .Ld8,.Ls8e
- .quad .Ls9,.Le_quad
- .quad .Ld9,.Le_quad
+ .quad .Ls6,.Ls6e
+ .quad .Ls7,.Ls7e
+ .quad .Ls8,.Ls8e
+ .quad .Ld5,.Ld5e /* .Ld[5-8] - 32..56 bytes copied */
+ .quad .Ld6,.Ld6e
+ .quad .Ld7,.Ld7e
+ .quad .Ld8,.Ld8e
+ .quad .Ls9,.Ls9e
+ .quad .Ld9,.Ld9e
.quad .Ls10,.Le_byte
.quad .Ld10,.Le_byte
#ifdef FIX_ALIGNMENT
.quad .Ls11,.Lzero_rest
.quad .Ld11,.Lzero_rest
#endif
+ .quad .Lt1,.Lt1e
.quad .Le5,.Le_zero
.previous

+ /* Exception on read in unrolled loop
+ Don't forget to store registers, which were loaded before fault.
+ Otherwise we will have up to 24 bytes of garbage and possible
+ security leak */
+.Ls8e: addl $8,%eax
+ movq %r9,6*8(%rdi)
+.Ls7e: addl $8,%eax
+ movq %r8,5*8(%rdi)
+.Ls6e: addl $8,%eax
+ movq %r11,4*8(%rdi)
+.Ls5e: addl $32,%eax
+ jmp .Ls1e
+
+.Ls4e: addl $8,%eax
+ movq %r9,2*8(%rdi)
+.Ls3e: addl $8,%eax
+ movq %r8,1*8(%rdi)
+.Ls2e: addl $8,%eax
+ movq %r11,(%rdi)
+.Ls1e: addq %rax,%rdi
+ addq %rax,%rsi
+ shlq $6,%rdx
+ addq %rbx,%rdx
+ subq %rax,%rdx
+ andl $63,%ecx
+ addq %rdx,%rcx
+.Lt1: rep /* copy last bytes */
+ movsb
+.Lt1e: movq %rcx,%rdx
+ jmp .Lzero_rest
+
+ /* Exception on write in unrolled loop */
/* eax: zero, ebx: 64 */
-.Ls1e: addl $8,%eax /* eax: bytes left uncopied: Ls1e: 64 .. Ls8e: 8 */
-.Ls2e: addl $8,%eax
-.Ls3e: addl $8,%eax
-.Ls4e: addl $8,%eax
-.Ls5e: addl $8,%eax
-.Ls6e: addl $8,%eax
-.Ls7e: addl $8,%eax
-.Ls8e: addl $8,%eax
+.Ld1e: addl $8,%eax /* eax is bytes left uncopied within the loop (Ls1e: 64 .. Ls8e: 8) */
+.Ld2e: addl $8,%eax
+.Ld3e: addl $8,%eax
+.Ld4e: addl $8,%eax
+.Ld5e: addl $8,%eax
+.Ld6e: addl $8,%eax
+.Ld7e: addl $8,%eax
+.Ld8e: addl $8,%eax
addq %rbx,%rdi /* +64 */
subq %rax,%rdi /* correct destination with computed offset */

@@ -188,19 +221,27 @@ ENTRY(__copy_user_nocache)
addq %rax,%rdx /* add offset to loopcnt */
andl $63,%ecx /* remaining bytes */
addq %rcx,%rdx /* add them */
- jmp .Lzero_rest
+ jmp .Le_zero /* fault in dst, just return */

- /* exception on quad word loop in tail handling */
+ /* Exception on read in quad word loop in tail handling */
/* ecx: loopcnt/8, %edx: length, rdi: correct */
-.Le_quad:
- shll $3,%ecx
+.Ls9e: shll $3,%ecx /* fault in src of quad loop */
+ andl $7,%edx
+ addl %edx,%ecx
+ jmp .Lt1 /* try to copy last bytes */
+
+ /* Exception on write in quad word loop in tail handling */
+.Ld9e: shll $3,%ecx /* fault in dst of quad loop */
andl $7,%edx
addl %ecx,%edx
+ jmp .Le_zero /* fault in dst, just return */
+
/* edx: bytes to zero, rdi: dest, eax:zero */
.Lzero_rest:
cmpl $0,(%rsp) /* zero flag set? */
jz .Le_zero
movq %rdx,%rcx
+ /* Exception on read or write in byte loop in tail handling */
.Le_byte:
xorl %eax,%eax
.Le5: rep