[openssl-commits] [openssl] master update
Andy Polyakov
appro at openssl.org
Sun Feb 26 20:28:19 UTC 2017
The branch master has been updated
via e052083cc7620379b7119cdbe4def5ea5de65c18 (commit)
via 5e32cfb2b6aec4d8d80083dabbd25bf89a482f21 (commit)
via fa62bc4661960a593a77d2c3f260173c3aa7333d (commit)
via 49508b23ce929ad5c8381bdc4b397eb41fd06137 (commit)
via 1c47e8836f4213251957254764886e82ac2563bc (commit)
via f17652e5f9198941ce761da2ccc6ce584fd90e81 (commit)
from 26a556e778f167070037fee243d7e6b9800fdb7f (commit)
- Log -----------------------------------------------------------------
commit e052083cc7620379b7119cdbe4def5ea5de65c18
Author: Andy Polyakov <appro at openssl.org>
Date: Sat Feb 25 18:37:24 2017 +0100
poly1305/asm/poly1305-x86_64.pl: minor AVX512 optimization.
Reviewed-by: Rich Salz <rsalz at openssl.org>
commit 5e32cfb2b6aec4d8d80083dabbd25bf89a482f21
Author: Andy Polyakov <appro at openssl.org>
Date: Sat Feb 25 22:17:21 2017 +0100
crypto/x86_64cpuid.pl: add CFI annotations.
Reviewed-by: Rich Salz <rsalz at openssl.org>
commit fa62bc4661960a593a77d2c3f260173c3aa7333d
Author: Andy Polyakov <appro at openssl.org>
Date: Sat Feb 25 22:16:57 2017 +0100
whrlpool/asm/wp-x86_64.pl: add CFI annotations.
Reviewed-by: Rich Salz <rsalz at openssl.org>
commit 49508b23ce929ad5c8381bdc4b397eb41fd06137
Author: Andy Polyakov <appro at openssl.org>
Date: Sat Feb 25 22:16:38 2017 +0100
camellia/asm/cmll-x86_64.pl: add CFI annotations.
Reviewed-by: Rich Salz <rsalz at openssl.org>
commit 1c47e8836f4213251957254764886e82ac2563bc
Author: Andy Polyakov <appro at openssl.org>
Date: Sat Feb 25 19:37:02 2017 +0100
poly1305/asm/poly1305-x86_64.pl: add CFI annotations.
Reviewed-by: Rich Salz <rsalz at openssl.org>
commit f17652e5f9198941ce761da2ccc6ce584fd90e81
Author: Andy Polyakov <appro at openssl.org>
Date: Sat Feb 25 19:36:43 2017 +0100
chacha/asm/chacha-x86_64.pl: add CFI annotations.
Reviewed-by: Rich Salz <rsalz at openssl.org>
-----------------------------------------------------------------------
Summary of changes:
crypto/camellia/asm/cmll-x86_64.pl | 57 ++++++++++++
crypto/chacha/asm/chacha-x86_64.pl | 41 +++++++++
crypto/poly1305/asm/poly1305-x86_64.pl | 161 ++++++++++++++++++++++++---------
crypto/whrlpool/asm/wp-x86_64.pl | 18 ++++
crypto/x86_64cpuid.pl | 4 +
5 files changed, 239 insertions(+), 42 deletions(-)
diff --git a/crypto/camellia/asm/cmll-x86_64.pl b/crypto/camellia/asm/cmll-x86_64.pl
index da5ad7b..02c52c3 100644
--- a/crypto/camellia/asm/cmll-x86_64.pl
+++ b/crypto/camellia/asm/cmll-x86_64.pl
@@ -137,11 +137,17 @@ Camellia_EncryptBlock:
.align 16
.Lenc_rounds:
Camellia_EncryptBlock_Rounds:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lenc_prologue:
#mov %rsi,$inp # put away arguments
@@ -173,13 +179,20 @@ Camellia_EncryptBlock_Rounds:
mov @S[3],12($out)
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%rbp
+.cfi_restore %rbp
mov 32(%rsp),%rbx
+.cfi_restore %rbx
lea 40(%rsp),%rsp
+.cfi_adjust_cfa_offset -40
.Lenc_epilogue:
ret
+.cfi_endproc
.size Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds
.type _x86_64_Camellia_encrypt,\@abi-omnipotent
@@ -247,11 +260,17 @@ Camellia_DecryptBlock:
.align 16
.Ldec_rounds:
Camellia_DecryptBlock_Rounds:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Ldec_prologue:
#mov %rsi,$inp # put away arguments
@@ -283,13 +302,20 @@ Camellia_DecryptBlock_Rounds:
mov @S[3],12($out)
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%rbp
+.cfi_restore %rbp
mov 32(%rsp),%rbx
+.cfi_restore %rbx
lea 40(%rsp),%rsp
+.cfi_adjust_cfa_offset -40
.Ldec_epilogue:
ret
+.cfi_endproc
.size Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds
.type _x86_64_Camellia_decrypt,\@abi-omnipotent
@@ -409,11 +435,17 @@ $code.=<<___;
.type Camellia_Ekeygen,\@function,3
.align 16
Camellia_Ekeygen:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lkey_prologue:
mov %edi,${keyend}d # put away arguments, keyBitLength
@@ -573,13 +605,20 @@ $code.=<<___;
mov \$4,%eax
.Ldone:
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%rbp
+.cfi_restore %rbp
mov 32(%rsp),%rbx
+.cfi_restore %rbx
lea 40(%rsp),%rsp
+.cfi_adjust_cfa_offset -40
.Lkey_epilogue:
ret
+.cfi_endproc
.size Camellia_Ekeygen,.-Camellia_Ekeygen
___
}
@@ -637,17 +676,25 @@ $code.=<<___;
.type Camellia_cbc_encrypt,\@function,6
.align 16
Camellia_cbc_encrypt:
+.cfi_startproc
cmp \$0,%rdx
je .Lcbc_abort
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lcbc_prologue:
mov %rsp,%rbp
+.cfi_def_cfa_register %rbp
sub \$64,%rsp
and \$-64,%rsp
@@ -668,6 +715,7 @@ Camellia_cbc_encrypt:
mov %r8,$_ivp
mov %rbp,$_rsp
+.cfi_cfa_expression $_rsp,deref,+56
.Lcbc_body:
lea .LCamellia_SBOX(%rip),$Tbl
@@ -856,15 +904,24 @@ Camellia_cbc_encrypt:
.align 16
.Lcbc_done:
mov $_rsp,%rcx
+.cfi_def_cfa %rcx,56
mov 0(%rcx),%r15
+.cfi_restore %r15
mov 8(%rcx),%r14
+.cfi_restore %r14
mov 16(%rcx),%r13
+.cfi_restore %r13
mov 24(%rcx),%r12
+.cfi_restore %r12
mov 32(%rcx),%rbp
+.cfi_restore %rbp
mov 40(%rcx),%rbx
+.cfi_restore %rbx
lea 48(%rcx),%rsp
+.cfi_def_cfa %rsp,8
.Lcbc_abort:
ret
+.cfi_endproc
.size Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
.asciz "Camellia for x86_64 by <appro\@openssl.org>"
diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl
index 7fc1749..b59d96f 100755
--- a/crypto/chacha/asm/chacha-x86_64.pl
+++ b/crypto/chacha/asm/chacha-x86_64.pl
@@ -242,6 +242,7 @@ $code.=<<___;
.type ChaCha20_ctr32,\@function,5
.align 64
ChaCha20_ctr32:
+.cfi_startproc
cmp \$0,$len
je .Lno_data
mov OPENSSL_ia32cap_P+4(%rip),%r10
@@ -255,12 +256,19 @@ $code.=<<___;
jnz .LChaCha20_ssse3
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
sub \$64+24,%rsp
+.cfi_adjust_cfa_offset 64+24
.Lctr32_body:
#movdqa .Lsigma(%rip),%xmm0
@@ -401,15 +409,24 @@ $code.=<<___;
.Ldone:
lea 64+24+48(%rsp),%rsi
+.cfi_def_cfa %rsi,8
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lno_data:
ret
+.cfi_endproc
.size ChaCha20_ctr32,.-ChaCha20_ctr32
___
@@ -448,8 +465,10 @@ $code.=<<___;
.type ChaCha20_ssse3,\@function,5
.align 32
ChaCha20_ssse3:
+.cfi_startproc
.LChaCha20_ssse3:
mov %rsp,%r9 # frame pointer
+.cfi_def_cfa_register %r9
___
$code.=<<___ if ($avx);
test \$`1<<(43-32)`,%r10d
@@ -565,8 +584,10 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r9),%rsp
+.cfi_def_cfa_register %rsp
.Lssse3_epilogue:
ret
+.cfi_endproc
.size ChaCha20_ssse3,.-ChaCha20_ssse3
___
}
@@ -708,8 +729,10 @@ $code.=<<___;
.type ChaCha20_4x,\@function,5
.align 32
ChaCha20_4x:
+.cfi_startproc
.LChaCha20_4x:
mov %rsp,%r9 # frame pointer
+.cfi_def_cfa_register %r9
mov %r10,%r11
___
$code.=<<___ if ($avx>1);
@@ -1149,8 +1172,10 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r9),%rsp
+.cfi_def_cfa_register %rsp
.L4x_epilogue:
ret
+.cfi_endproc
.size ChaCha20_4x,.-ChaCha20_4x
___
}
@@ -1237,8 +1262,10 @@ $code.=<<___;
.type ChaCha20_4xop,\@function,5
.align 32
ChaCha20_4xop:
+.cfi_startproc
.LChaCha20_4xop:
mov %rsp,%r9 # frame pointer
+.cfi_def_cfa_register %r9
sub \$0x140+$xframe,%rsp
___
################ stack layout
@@ -1601,8 +1628,10 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r9),%rsp
+.cfi_def_cfa_register %rsp
.L4xop_epilogue:
ret
+.cfi_endproc
.size ChaCha20_4xop,.-ChaCha20_4xop
___
}
@@ -1735,8 +1764,10 @@ $code.=<<___;
.type ChaCha20_8x,\@function,5
.align 32
ChaCha20_8x:
+.cfi_startproc
.LChaCha20_8x:
mov %rsp,%r9 # frame register
+.cfi_def_cfa_register %r9
sub \$0x280+$xframe,%rsp
and \$-32,%rsp
___
@@ -2242,8 +2273,10 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r9),%rsp
+.cfi_def_cfa_register %rsp
.L8x_epilogue:
ret
+.cfi_endproc
.size ChaCha20_8x,.-ChaCha20_8x
___
}
@@ -2280,8 +2313,10 @@ $code.=<<___;
.type ChaCha20_avx512,\@function,5
.align 32
ChaCha20_avx512:
+.cfi_startproc
.LChaCha20_avx512:
mov %rsp,%r9 # frame pointer
+.cfi_def_cfa_register %r9
cmp \$512,$len
ja .LChaCha20_16x
@@ -2461,8 +2496,10 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r9),%rsp
+.cfi_def_cfa_register %rsp
.Lavx512_epilogue:
ret
+.cfi_endproc
.size ChaCha20_avx512,.-ChaCha20_avx512
___
}
@@ -2544,8 +2581,10 @@ $code.=<<___;
.type ChaCha20_16x,\@function,5
.align 32
ChaCha20_16x:
+.cfi_startproc
.LChaCha20_16x:
mov %rsp,%r9 # frame register
+.cfi_def_cfa_register %r9
sub \$64+$xframe,%rsp
and \$-64,%rsp
___
@@ -2963,8 +3002,10 @@ $code.=<<___ if ($win64);
___
$code.=<<___;
lea (%r9),%rsp
+.cfi_def_cfa_register %rsp
.L16x_epilogue:
ret
+.cfi_endproc
.size ChaCha20_16x,.-ChaCha20_16x
___
}
diff --git a/crypto/poly1305/asm/poly1305-x86_64.pl b/crypto/poly1305/asm/poly1305-x86_64.pl
index ff4efb3..a397019 100755
--- a/crypto/poly1305/asm/poly1305-x86_64.pl
+++ b/crypto/poly1305/asm/poly1305-x86_64.pl
@@ -210,16 +210,23 @@ $code.=<<___;
.type poly1305_blocks,\@function,4
.align 32
poly1305_blocks:
+.cfi_startproc
.Lblocks:
shr \$4,$len
jz .Lno_data # too short
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lblocks_body:
mov $len,%r15 # reassign $len
@@ -255,15 +262,23 @@ $code.=<<___;
mov $h2,16($ctx)
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%r12
+.cfi_restore %r12
mov 32(%rsp),%rbp
+.cfi_restore %rbp
mov 40(%rsp),%rbx
+.cfi_restore %rbx
lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lno_data:
.Lblocks_epilogue:
ret
+.cfi_endproc
.size poly1305_blocks,.-poly1305_blocks
.type poly1305_emit,\@function,3
@@ -484,6 +499,7 @@ __poly1305_init_avx:
.type poly1305_blocks_avx,\@function,4
.align 32
poly1305_blocks_avx:
+.cfi_startproc
mov 20($ctx),%r8d # is_base2_26
cmp \$128,$len
jae .Lblocks_avx
@@ -503,11 +519,17 @@ poly1305_blocks_avx:
jz .Leven_avx
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lblocks_avx_body:
mov $len,%r15 # reassign $len
@@ -610,24 +632,39 @@ poly1305_blocks_avx:
.align 16
.Ldone_avx:
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%r12
+.cfi_restore %r12
mov 32(%rsp),%rbp
+.cfi_restore %rbp
mov 40(%rsp),%rbx
+.cfi_restore %rbx
lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lno_data_avx:
.Lblocks_avx_epilogue:
ret
+.cfi_endproc
.align 32
.Lbase2_64_avx:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lbase2_64_avx_body:
mov $len,%r15 # reassign $len
@@ -687,18 +724,27 @@ poly1305_blocks_avx:
mov %r15,$len
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%r12
+.cfi_restore %r12
mov 32(%rsp),%rbp
+.cfi_restore %rbp
mov 40(%rsp),%rbx
+.cfi_restore %rbx
lea 48(%rsp),%rax
lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lbase2_64_avx_epilogue:
jmp .Ldo_avx
+.cfi_endproc
.align 32
.Leven_avx:
+.cfi_startproc
vmovd 4*0($ctx),$H0 # load hash value
vmovd 4*1($ctx),$H1
vmovd 4*2($ctx),$H2
@@ -709,6 +755,7 @@ poly1305_blocks_avx:
___
$code.=<<___ if (!$win64);
lea -0x58(%rsp),%r11
+.cfi_def_cfa %r11,0x60
sub \$0x178,%rsp
___
$code.=<<___ if ($win64);
@@ -1301,10 +1348,12 @@ $code.=<<___ if ($win64);
___
$code.=<<___ if (!$win64);
lea 0x58(%r11),%rsp
+.cfi_def_cfa %rsp,8
___
$code.=<<___;
vzeroupper
ret
+.cfi_endproc
.size poly1305_blocks_avx,.-poly1305_blocks_avx
.type poly1305_emit_avx,\@function,3
@@ -1372,6 +1421,7 @@ $code.=<<___;
.type poly1305_blocks_avx2,\@function,4
.align 32
poly1305_blocks_avx2:
+.cfi_startproc
mov 20($ctx),%r8d # is_base2_26
cmp \$128,$len
jae .Lblocks_avx2
@@ -1391,11 +1441,17 @@ poly1305_blocks_avx2:
jz .Leven_avx2
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lblocks_avx2_body:
mov $len,%r15 # reassign $len
@@ -1504,24 +1560,39 @@ poly1305_blocks_avx2:
.align 16
.Ldone_avx2:
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%r12
+.cfi_restore %r12
mov 32(%rsp),%rbp
+.cfi_restore %rbp
mov 40(%rsp),%rbx
+.cfi_restore %rbx
lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lno_data_avx2:
.Lblocks_avx2_epilogue:
ret
+.cfi_endproc
.align 32
.Lbase2_64_avx2:
+.cfi_startproc
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
.Lbase2_64_avx2_body:
mov $len,%r15 # reassign $len
@@ -1588,18 +1659,27 @@ poly1305_blocks_avx2:
mov \$`(1<<31|1<<30|1<<16)`,%r11d
mov 0(%rsp),%r15
+.cfi_restore %r15
mov 8(%rsp),%r14
+.cfi_restore %r14
mov 16(%rsp),%r13
+.cfi_restore %r13
mov 24(%rsp),%r12
+.cfi_restore %r12
mov 32(%rsp),%rbp
+.cfi_restore %rbp
mov 40(%rsp),%rbx
+.cfi_restore %rbx
lea 48(%rsp),%rax
lea 48(%rsp),%rsp
+.cfi_adjust_cfa_offset -48
.Lbase2_64_avx2_epilogue:
jmp .Ldo_avx2
+.cfi_endproc
.align 32
.Leven_avx2:
+.cfi_startproc
mov OPENSSL_ia32cap_P+8(%rip),%r10d
mov \$`(1<<31|1<<30|1<<16)`,%r11d
vmovd 4*0($ctx),%x#$H0 # load hash value base 2^26
@@ -1620,6 +1700,7 @@ $code.=<<___ if ($avx>2);
___
$code.=<<___ if (!$win64);
lea -8(%rsp),%r11
+.cfi_def_cfa %r11,16
sub \$0x128,%rsp
___
$code.=<<___ if ($win64);
@@ -2008,10 +2089,12 @@ $code.=<<___ if ($win64);
___
$code.=<<___ if (!$win64);
lea 8(%r11),%rsp
+.cfi_def_cfa %rsp,8
___
$code.=<<___;
vzeroupper
ret
+.cfi_endproc
.size poly1305_blocks_avx2,.-poly1305_blocks_avx2
___
#######################################################################
@@ -2031,11 +2114,13 @@ $code.=<<___;
.type poly1305_blocks_avx512,\@function,4
.align 32
poly1305_blocks_avx512:
+.cfi_startproc
.Lblocks_avx512:
vzeroupper
___
$code.=<<___ if (!$win64);
lea -8(%rsp),%r11
+.cfi_def_cfa %r11,16
sub \$0x128,%rsp
___
$code.=<<___ if ($win64);
@@ -2044,13 +2129,13 @@ $code.=<<___ if ($win64);
vmovdqa %xmm6,0x50(%r11)
vmovdqa %xmm7,0x60(%r11)
vmovdqa %xmm8,0x70(%r11)
- vmovdqa %xmm9,0x80(%r11)
- vmovdqa %xmm10,0x90(%r11)
- vmovdqa %xmm11,0xa0(%r11)
- vmovdqa %xmm12,0xb0(%r11)
- vmovdqa %xmm13,0xc0(%r11)
- vmovdqa %xmm14,0xd0(%r11)
- vmovdqa %xmm15,0xe0(%r11)
+ vmovdqa32 %xmm9,0x80(%r11)
+ vmovdqa32 %xmm10,0x90(%r11)
+ vmovdqa32 %xmm11,0xa0(%r11)
+ vmovdqa32 %xmm12,0xb0(%r11)
+ vmovdqa32 %xmm13,0xc0(%r11)
+ vmovdqa32 %xmm14,0xd0(%r11)
+ vmovdqa32 %xmm15,0xe0(%r11)
.Ldo_avx512_body:
___
$code.=<<___;
@@ -2213,36 +2298,21 @@ $code.=<<___;
# we could just flow along, hence the goal for $R0-$S4 is
# 1858286838784888 ...
- mov \$0b0110011001100110,%eax
- mov \$0b1100110011001100,%r8d
- mov \$0b0101010101010101,%r9d
+ vmovdqa32 128(%rcx),$M0 # .Lpermd_avx512:
+ mov \$0x7777,%eax
kmovw %eax,%k1
- kmovw %r8d,%k2
- kmovw %r9d,%k3
-
- vpbroadcastq %x#$D0,$M0 # 0808080808080808
- vpbroadcastq %x#$D1,$M1
- vpbroadcastq %x#$D2,$M2
- vpbroadcastq %x#$D3,$M3
- vpbroadcastq %x#$D4,$M4
-
- vpexpandd $D0,${D0}{%k1} # 05060708 -> -05--06--07--08-
- vpexpandd $D1,${D1}{%k1}
- vpexpandd $D2,${D2}{%k1}
- vpexpandd $D3,${D3}{%k1}
- vpexpandd $D4,${D4}{%k1}
-
- vpexpandd $R0,${D0}{%k2} # -05--06--07--08- -> 145-246-347-448-
- vpexpandd $R1,${D1}{%k2}
- vpexpandd $R2,${D2}{%k2}
- vpexpandd $R3,${D3}{%k2}
- vpexpandd $R4,${D4}{%k2}
-
- vpblendmd $M0,$D0,${R0}{%k3} # 1858286838784888
- vpblendmd $M1,$D1,${R1}{%k3}
- vpblendmd $M2,$D2,${R2}{%k3}
- vpblendmd $M3,$D3,${R3}{%k3}
- vpblendmd $M4,$D4,${R4}{%k3}
+
+ vpermd $R0,$M0,$R0 # 14243444 -> 1---2---3---4---
+ vpermd $R1,$M0,$R1
+ vpermd $R2,$M0,$R2
+ vpermd $R3,$M0,$R3
+ vpermd $R4,$M0,$R4
+
+ vpermd $D0,$M0,${R0}{%k1} # 05060708 -> 1858286838784888
+ vpermd $D1,$M0,${R1}{%k1}
+ vpermd $D2,$M0,${R2}{%k1}
+ vpermd $D3,$M0,${R3}{%k1}
+ vpermd $D4,$M0,${R4}{%k1}
vpslld \$2,$R1,$S1 # *5
vpslld \$2,$R2,$S2
@@ -2264,15 +2334,14 @@ $code.=<<___;
vpsrlq \$40,$T4,$T4 # 4
vpandq $MASK,$T2,$T2 # 2
vpandq $MASK,$T0,$T0 # 0
- vpandq $MASK,$T1,$T1 # 1
- vpandq $MASK,$T3,$T3 # 3
+ #vpandq $MASK,$T1,$T1 # 1
+ #vpandq $MASK,$T3,$T3 # 3
#vporq $PADBIT,$T4,$T4 # padbit, yes, always
vpaddq $H2,$T2,$H2 # accumulate input
- mov \$0x0f,%eax
sub \$192,$len
jbe .Ltail_avx512
- jmp .Loop_avx512
+ #jmp .Loop_avx512
.align 32
.Loop_avx512:
@@ -2307,7 +2376,9 @@ $code.=<<___;
vpmuludq $H2,$R1,$D3 # d3 = h2*r1
vpaddq $H0,$T0,$H0
vpmuludq $H2,$R2,$D4 # d4 = h2*r2
+ vpandq $MASK,$T1,$T1 # 1
vpmuludq $H2,$S3,$D0 # d0 = h2*s3
+ vpandq $MASK,$T3,$T3 # 3
vpmuludq $H2,$S4,$D1 # d1 = h2*s4
vporq $PADBIT,$T4,$T4 # padbit, yes, always
vpmuludq $H2,$R0,$D2 # d2 = h2*r0
@@ -2415,8 +2486,8 @@ $code.=<<___;
vpaddq $D3,$H4,$H4 # h3 -> h4
vpandq $MASK,$T0,$T0 # 0
- vpandq $MASK,$T1,$T1 # 1
- vpandq $MASK,$T3,$T3 # 3
+ #vpandq $MASK,$T1,$T1 # 1
+ #vpandq $MASK,$T3,$T3 # 3
#vporq $PADBIT,$T4,$T4 # padbit, yes, always
sub \$128,$len
@@ -2448,7 +2519,9 @@ $code.=<<___;
vpmuludq $H2,$R1,$D3 # d3 = h2*r1
vpmuludq $H2,$R2,$D4 # d4 = h2*r2
vpmuludq $H2,$S3,$D0 # d0 = h2*s3
+ vpandq $MASK,$T1,$T1 # 1
vpmuludq $H2,$S4,$D1 # d1 = h2*s4
+ vpandq $MASK,$T3,$T3 # 3
vpmuludq $H2,$R0,$D2 # d2 = h2*r0
vporq $PADBIT,$T4,$T4 # padbit, yes, always
vpaddq $H1,$T1,$H1 # accumulate input
@@ -2622,9 +2695,11 @@ $code.=<<___ if ($win64);
___
$code.=<<___ if (!$win64);
lea 8(%r11),%rsp
+.cfi_def_cfa %rsp,8
___
$code.=<<___;
ret
+.cfi_endproc
.size poly1305_blocks_avx512,.-poly1305_blocks_avx512
___
if ($avx>3) {
@@ -2832,6 +2907,8 @@ $code.=<<___;
.long 0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
.Lpermd_avx2:
.long 2,2,2,3,2,0,2,1
+.Lpermd_avx512:
+.long 0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
.L2_44_inp_permd:
.long 0,1,1,2,2,3,7,7
diff --git a/crypto/whrlpool/asm/wp-x86_64.pl b/crypto/whrlpool/asm/wp-x86_64.pl
index d0b7ecc..4a1261d 100644
--- a/crypto/whrlpool/asm/wp-x86_64.pl
+++ b/crypto/whrlpool/asm/wp-x86_64.pl
@@ -66,13 +66,21 @@ $code=<<___;
.type $func,\@function,3
.align 16
$func:
+.cfi_startproc
mov %rsp,%rax
+.cfi_def_cfa_register %rax
push %rbx
+.cfi_push %rbx
push %rbp
+.cfi_push %rbp
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
sub \$128+40,%rsp
and \$-64,%rsp
@@ -82,6 +90,7 @@ $func:
mov %rsi,8(%r10)
mov %rdx,16(%r10)
mov %rax,32(%r10) # saved stack pointer
+.cfi_cfa_expression %rsp+`128+32`,deref,+8
.Lprologue:
mov %r10,%rbx
@@ -205,15 +214,24 @@ $code.=<<___;
jmp .Louterloop
.Lalldone:
mov 32(%rbx),%rsi # restore saved pointer
+.cfi_def_cfa %rsi,8
mov -48(%rsi),%r15
+.cfi_restore %r15
mov -40(%rsi),%r14
+.cfi_restore %r14
mov -32(%rsi),%r13
+.cfi_restore %r13
mov -24(%rsi),%r12
+.cfi_restore %r12
mov -16(%rsi),%rbp
+.cfi_restore %rbp
mov -8(%rsi),%rbx
+.cfi_restore %rbx
lea (%rsi),%rsp
+.cfi_def_cfa_register %rsp
.Lepilogue:
ret
+.cfi_endproc
.size $func,.-$func
.align 64
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index 3082253..e08e1c4 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -63,7 +63,9 @@ OPENSSL_rdtsc:
.type OPENSSL_ia32_cpuid,\@function,1
.align 16
OPENSSL_ia32_cpuid:
+.cfi_startproc
mov %rbx,%r8 # save %rbx
+.cfi_register %rbx,%r8
xor %eax,%eax
mov %eax,8(%rdi) # clear 3rd word
@@ -194,8 +196,10 @@ OPENSSL_ia32_cpuid:
shl \$32,%r9
mov %r10d,%eax
mov %r8,%rbx # restore %rbx
+.cfi_restore %rbx
or %r9,%rax
ret
+.cfi_endproc
.size OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
.globl OPENSSL_cleanse
More information about the openssl-commits
mailing list