[PATCH 2/7] crypto: aesni: make non-AVX AES-GCM work with all valid auth_tag_len

From: Sabrina Dubroca
Date: Fri Apr 28 2017 - 12:16:04 EST


Signed-off-by: Sabrina Dubroca <sd@xxxxxxxxxxxxxxx>
---
arch/x86/crypto/aesni-intel_asm.S | 62 ++++++++++++++++++++++++++++++---------
1 file changed, 48 insertions(+), 14 deletions(-)

diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S
index 605726aaf0a2..16627fec80b2 100644
--- a/arch/x86/crypto/aesni-intel_asm.S
+++ b/arch/x86/crypto/aesni-intel_asm.S
@@ -1549,18 +1549,35 @@ ENTRY(aesni_gcm_dec)
mov arg10, %r11 # %r11 = auth_tag_len
cmp $16, %r11
je _T_16_decrypt
- cmp $12, %r11
- je _T_12_decrypt
+ cmp $8, %r11
+ jl _T_4_decrypt
_T_8_decrypt:
MOVQ_R64_XMM %xmm0, %rax
mov %rax, (%r10)
- jmp _return_T_done_decrypt
-_T_12_decrypt:
- MOVQ_R64_XMM %xmm0, %rax
- mov %rax, (%r10)
+ add $8, %r10
+ sub $8, %r11
psrldq $8, %xmm0
+ cmp $0, %r11
+ je _return_T_done_decrypt
+_T_4_decrypt:
+ movd %xmm0, %eax
+ mov %eax, (%r10)
+ add $4, %r10
+ sub $4, %r11
+ psrldq $4, %xmm0
+ cmp $0, %r11
+ je _return_T_done_decrypt
+_T_123_decrypt:
movd %xmm0, %eax
- mov %eax, 8(%r10)
+ cmp $2, %r11
+ jl _T_1_decrypt
+ mov %ax, (%r10)
+ cmp $2, %r11
+ je _return_T_done_decrypt
+ add $2, %r10
+ sar $16, %eax
+_T_1_decrypt:
+ mov %al, (%r10)
jmp _return_T_done_decrypt
_T_16_decrypt:
movdqu %xmm0, (%r10)
@@ -1813,18 +1830,35 @@ ENTRY(aesni_gcm_enc)
mov arg10, %r11 # %r11 = auth_tag_len
cmp $16, %r11
je _T_16_encrypt
- cmp $12, %r11
- je _T_12_encrypt
+ cmp $8, %r11
+ jl _T_4_encrypt
_T_8_encrypt:
MOVQ_R64_XMM %xmm0, %rax
mov %rax, (%r10)
- jmp _return_T_done_encrypt
-_T_12_encrypt:
- MOVQ_R64_XMM %xmm0, %rax
- mov %rax, (%r10)
+ add $8, %r10
+ sub $8, %r11
psrldq $8, %xmm0
+ cmp $0, %r11
+ je _return_T_done_encrypt
+_T_4_encrypt:
+ movd %xmm0, %eax
+ mov %eax, (%r10)
+ add $4, %r10
+ sub $4, %r11
+ psrldq $4, %xmm0
+ cmp $0, %r11
+ je _return_T_done_encrypt
+_T_123_encrypt:
movd %xmm0, %eax
- mov %eax, 8(%r10)
+ cmp $2, %r11
+ jl _T_1_encrypt
+ mov %ax, (%r10)
+ cmp $2, %r11
+ je _return_T_done_encrypt
+ add $2, %r10
+ sar $16, %eax
+_T_1_encrypt:
+ mov %al, (%r10)
jmp _return_T_done_encrypt
_T_16_encrypt:
movdqu %xmm0, (%r10)
--
2.12.2