[openssl-commits] [openssl] master update

Andy Polyakov appro at openssl.org
Sun Feb 26 20:28:19 UTC 2017


The branch master has been updated
       via  e052083cc7620379b7119cdbe4def5ea5de65c18 (commit)
       via  5e32cfb2b6aec4d8d80083dabbd25bf89a482f21 (commit)
       via  fa62bc4661960a593a77d2c3f260173c3aa7333d (commit)
       via  49508b23ce929ad5c8381bdc4b397eb41fd06137 (commit)
       via  1c47e8836f4213251957254764886e82ac2563bc (commit)
       via  f17652e5f9198941ce761da2ccc6ce584fd90e81 (commit)
      from  26a556e778f167070037fee243d7e6b9800fdb7f (commit)


- Log -----------------------------------------------------------------
commit e052083cc7620379b7119cdbe4def5ea5de65c18
Author: Andy Polyakov <appro at openssl.org>
Date:   Sat Feb 25 18:37:24 2017 +0100

    poly1305/asm/poly1305-x86_64.pl: minor AVX512 optimization.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

commit 5e32cfb2b6aec4d8d80083dabbd25bf89a482f21
Author: Andy Polyakov <appro at openssl.org>
Date:   Sat Feb 25 22:17:21 2017 +0100

    crypto/x86_64cpuid.pl: add CFI annotations.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

commit fa62bc4661960a593a77d2c3f260173c3aa7333d
Author: Andy Polyakov <appro at openssl.org>
Date:   Sat Feb 25 22:16:57 2017 +0100

    whrlpool/asm/wp-x86_64.pl: add CFI annotations.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

commit 49508b23ce929ad5c8381bdc4b397eb41fd06137
Author: Andy Polyakov <appro at openssl.org>
Date:   Sat Feb 25 22:16:38 2017 +0100

    camellia/asm/cmll-x86_64.pl: add CFI annotations.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

commit 1c47e8836f4213251957254764886e82ac2563bc
Author: Andy Polyakov <appro at openssl.org>
Date:   Sat Feb 25 19:37:02 2017 +0100

    poly1305/asm/poly1305-x86_64.pl: add CFI annotations.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

commit f17652e5f9198941ce761da2ccc6ce584fd90e81
Author: Andy Polyakov <appro at openssl.org>
Date:   Sat Feb 25 19:36:43 2017 +0100

    chacha/asm/chacha-x86_64.pl: add CFI annotations.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

-----------------------------------------------------------------------

Summary of changes:
 crypto/camellia/asm/cmll-x86_64.pl     |  57 ++++++++++++
 crypto/chacha/asm/chacha-x86_64.pl     |  41 +++++++++
 crypto/poly1305/asm/poly1305-x86_64.pl | 161 ++++++++++++++++++++++++---------
 crypto/whrlpool/asm/wp-x86_64.pl       |  18 ++++
 crypto/x86_64cpuid.pl                  |   4 +
 5 files changed, 239 insertions(+), 42 deletions(-)

diff --git a/crypto/camellia/asm/cmll-x86_64.pl b/crypto/camellia/asm/cmll-x86_64.pl
index da5ad7b..02c52c3 100644
--- a/crypto/camellia/asm/cmll-x86_64.pl
+++ b/crypto/camellia/asm/cmll-x86_64.pl
@@ -137,11 +137,17 @@ Camellia_EncryptBlock:
 .align	16
 .Lenc_rounds:
 Camellia_EncryptBlock_Rounds:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lenc_prologue:
 
 	#mov	%rsi,$inp		# put away arguments
@@ -173,13 +179,20 @@ Camellia_EncryptBlock_Rounds:
 	mov	@S[3],12($out)
 
 	mov	0(%rsp),%r15
+.cfi_restore	%r15
 	mov	8(%rsp),%r14
+.cfi_restore	%r14
 	mov	16(%rsp),%r13
+.cfi_restore	%r13
 	mov	24(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	32(%rsp),%rbx
+.cfi_restore	%rbx
 	lea	40(%rsp),%rsp
+.cfi_adjust_cfa_offset	-40
 .Lenc_epilogue:
 	ret
+.cfi_endproc
 .size	Camellia_EncryptBlock_Rounds,.-Camellia_EncryptBlock_Rounds
 
 .type	_x86_64_Camellia_encrypt,\@abi-omnipotent
@@ -247,11 +260,17 @@ Camellia_DecryptBlock:
 .align	16
 .Ldec_rounds:
 Camellia_DecryptBlock_Rounds:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Ldec_prologue:
 
 	#mov	%rsi,$inp		# put away arguments
@@ -283,13 +302,20 @@ Camellia_DecryptBlock_Rounds:
 	mov	@S[3],12($out)
 
 	mov	0(%rsp),%r15
+.cfi_restore	%r15
 	mov	8(%rsp),%r14
+.cfi_restore	%r14
 	mov	16(%rsp),%r13
+.cfi_restore	%r13
 	mov	24(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	32(%rsp),%rbx
+.cfi_restore	%rbx
 	lea	40(%rsp),%rsp
+.cfi_adjust_cfa_offset	-40
 .Ldec_epilogue:
 	ret
+.cfi_endproc
 .size	Camellia_DecryptBlock_Rounds,.-Camellia_DecryptBlock_Rounds
 
 .type	_x86_64_Camellia_decrypt,\@abi-omnipotent
@@ -409,11 +435,17 @@ $code.=<<___;
 .type	Camellia_Ekeygen,\@function,3
 .align	16
 Camellia_Ekeygen:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lkey_prologue:
 
 	mov	%edi,${keyend}d		# put away arguments, keyBitLength
@@ -573,13 +605,20 @@ $code.=<<___;
 	mov	\$4,%eax
 .Ldone:
 	mov	0(%rsp),%r15
+.cfi_restore	%r15
 	mov	8(%rsp),%r14
+.cfi_restore	%r14
 	mov	16(%rsp),%r13
+.cfi_restore	%r13
 	mov	24(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	32(%rsp),%rbx
+.cfi_restore	%rbx
 	lea	40(%rsp),%rsp
+.cfi_adjust_cfa_offset	-40
 .Lkey_epilogue:
 	ret
+.cfi_endproc
 .size	Camellia_Ekeygen,.-Camellia_Ekeygen
 ___
 }
@@ -637,17 +676,25 @@ $code.=<<___;
 .type	Camellia_cbc_encrypt,\@function,6
 .align	16
 Camellia_cbc_encrypt:
+.cfi_startproc
 	cmp	\$0,%rdx
 	je	.Lcbc_abort
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lcbc_prologue:
 
 	mov	%rsp,%rbp
+.cfi_def_cfa_register	%rbp
 	sub	\$64,%rsp
 	and	\$-64,%rsp
 
@@ -668,6 +715,7 @@ Camellia_cbc_encrypt:
 
 	mov	%r8,$_ivp
 	mov	%rbp,$_rsp
+.cfi_cfa_expression	$_rsp,deref,+56
 
 .Lcbc_body:
 	lea	.LCamellia_SBOX(%rip),$Tbl
@@ -856,15 +904,24 @@ Camellia_cbc_encrypt:
 .align	16
 .Lcbc_done:
 	mov	$_rsp,%rcx
+.cfi_def_cfa	%rcx,56
 	mov	0(%rcx),%r15
+.cfi_restore	%r15
 	mov	8(%rcx),%r14
+.cfi_restore	%r14
 	mov	16(%rcx),%r13
+.cfi_restore	%r13
 	mov	24(%rcx),%r12
+.cfi_restore	%r12
 	mov	32(%rcx),%rbp
+.cfi_restore	%rbp
 	mov	40(%rcx),%rbx
+.cfi_restore	%rbx
 	lea	48(%rcx),%rsp
+.cfi_def_cfa	%rsp,8
 .Lcbc_abort:
 	ret
+.cfi_endproc
 .size	Camellia_cbc_encrypt,.-Camellia_cbc_encrypt
 
 .asciz	"Camellia for x86_64 by <appro\@openssl.org>"
diff --git a/crypto/chacha/asm/chacha-x86_64.pl b/crypto/chacha/asm/chacha-x86_64.pl
index 7fc1749..b59d96f 100755
--- a/crypto/chacha/asm/chacha-x86_64.pl
+++ b/crypto/chacha/asm/chacha-x86_64.pl
@@ -242,6 +242,7 @@ $code.=<<___;
 .type	ChaCha20_ctr32,\@function,5
 .align	64
 ChaCha20_ctr32:
+.cfi_startproc
 	cmp	\$0,$len
 	je	.Lno_data
 	mov	OPENSSL_ia32cap_P+4(%rip),%r10
@@ -255,12 +256,19 @@ $code.=<<___;
 	jnz	.LChaCha20_ssse3
 
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 	sub	\$64+24,%rsp
+.cfi_adjust_cfa_offset	64+24
 .Lctr32_body:
 
 	#movdqa	.Lsigma(%rip),%xmm0
@@ -401,15 +409,24 @@ $code.=<<___;
 
 .Ldone:
 	lea	64+24+48(%rsp),%rsi
+.cfi_def_cfa	%rsi,8
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lno_data:
 	ret
+.cfi_endproc
 .size	ChaCha20_ctr32,.-ChaCha20_ctr32
 ___
 
@@ -448,8 +465,10 @@ $code.=<<___;
 .type	ChaCha20_ssse3,\@function,5
 .align	32
 ChaCha20_ssse3:
+.cfi_startproc
 .LChaCha20_ssse3:
 	mov	%rsp,%r9		# frame pointer
+.cfi_def_cfa_register	%r9
 ___
 $code.=<<___	if ($avx);
 	test	\$`1<<(43-32)`,%r10d
@@ -565,8 +584,10 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	lea	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
 .Lssse3_epilogue:
 	ret
+.cfi_endproc
 .size	ChaCha20_ssse3,.-ChaCha20_ssse3
 ___
 }
@@ -708,8 +729,10 @@ $code.=<<___;
 .type	ChaCha20_4x,\@function,5
 .align	32
 ChaCha20_4x:
+.cfi_startproc
 .LChaCha20_4x:
 	mov		%rsp,%r9		# frame pointer
+.cfi_def_cfa_register	%r9
 	mov		%r10,%r11
 ___
 $code.=<<___	if ($avx>1);
@@ -1149,8 +1172,10 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	lea		(%r9),%rsp
+.cfi_def_cfa_register	%rsp
 .L4x_epilogue:
 	ret
+.cfi_endproc
 .size	ChaCha20_4x,.-ChaCha20_4x
 ___
 }
@@ -1237,8 +1262,10 @@ $code.=<<___;
 .type	ChaCha20_4xop,\@function,5
 .align	32
 ChaCha20_4xop:
+.cfi_startproc
 .LChaCha20_4xop:
 	mov		%rsp,%r9		# frame pointer
+.cfi_def_cfa_register	%r9
 	sub		\$0x140+$xframe,%rsp
 ___
 	################ stack layout
@@ -1601,8 +1628,10 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	lea		(%r9),%rsp
+.cfi_def_cfa_register	%rsp
 .L4xop_epilogue:
 	ret
+.cfi_endproc
 .size	ChaCha20_4xop,.-ChaCha20_4xop
 ___
 }
@@ -1735,8 +1764,10 @@ $code.=<<___;
 .type	ChaCha20_8x,\@function,5
 .align	32
 ChaCha20_8x:
+.cfi_startproc
 .LChaCha20_8x:
 	mov		%rsp,%r9		# frame register
+.cfi_def_cfa_register	%r9
 	sub		\$0x280+$xframe,%rsp
 	and		\$-32,%rsp
 ___
@@ -2242,8 +2273,10 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	lea		(%r9),%rsp
+.cfi_def_cfa_register	%rsp
 .L8x_epilogue:
 	ret
+.cfi_endproc
 .size	ChaCha20_8x,.-ChaCha20_8x
 ___
 }
@@ -2280,8 +2313,10 @@ $code.=<<___;
 .type	ChaCha20_avx512,\@function,5
 .align	32
 ChaCha20_avx512:
+.cfi_startproc
 .LChaCha20_avx512:
 	mov	%rsp,%r9		# frame pointer
+.cfi_def_cfa_register	%r9
 	cmp	\$512,$len
 	ja	.LChaCha20_16x
 
@@ -2461,8 +2496,10 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	lea	(%r9),%rsp
+.cfi_def_cfa_register	%rsp
 .Lavx512_epilogue:
 	ret
+.cfi_endproc
 .size	ChaCha20_avx512,.-ChaCha20_avx512
 ___
 }
@@ -2544,8 +2581,10 @@ $code.=<<___;
 .type	ChaCha20_16x,\@function,5
 .align	32
 ChaCha20_16x:
+.cfi_startproc
 .LChaCha20_16x:
 	mov		%rsp,%r9		# frame register
+.cfi_def_cfa_register	%r9
 	sub		\$64+$xframe,%rsp
 	and		\$-64,%rsp
 ___
@@ -2963,8 +3002,10 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___;
 	lea		(%r9),%rsp
+.cfi_def_cfa_register	%rsp
 .L16x_epilogue:
 	ret
+.cfi_endproc
 .size	ChaCha20_16x,.-ChaCha20_16x
 ___
 }
diff --git a/crypto/poly1305/asm/poly1305-x86_64.pl b/crypto/poly1305/asm/poly1305-x86_64.pl
index ff4efb3..a397019 100755
--- a/crypto/poly1305/asm/poly1305-x86_64.pl
+++ b/crypto/poly1305/asm/poly1305-x86_64.pl
@@ -210,16 +210,23 @@ $code.=<<___;
 .type	poly1305_blocks,\@function,4
 .align	32
 poly1305_blocks:
+.cfi_startproc
 .Lblocks:
 	shr	\$4,$len
 	jz	.Lno_data		# too short
 
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lblocks_body:
 
 	mov	$len,%r15		# reassign $len
@@ -255,15 +262,23 @@ $code.=<<___;
 	mov	$h2,16($ctx)
 
 	mov	0(%rsp),%r15
+.cfi_restore	%r15
 	mov	8(%rsp),%r14
+.cfi_restore	%r14
 	mov	16(%rsp),%r13
+.cfi_restore	%r13
 	mov	24(%rsp),%r12
+.cfi_restore	%r12
 	mov	32(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	40(%rsp),%rbx
+.cfi_restore	%rbx
 	lea	48(%rsp),%rsp
+.cfi_adjust_cfa_offset	-48
 .Lno_data:
 .Lblocks_epilogue:
 	ret
+.cfi_endproc
 .size	poly1305_blocks,.-poly1305_blocks
 
 .type	poly1305_emit,\@function,3
@@ -484,6 +499,7 @@ __poly1305_init_avx:
 .type	poly1305_blocks_avx,\@function,4
 .align	32
 poly1305_blocks_avx:
+.cfi_startproc
 	mov	20($ctx),%r8d		# is_base2_26
 	cmp	\$128,$len
 	jae	.Lblocks_avx
@@ -503,11 +519,17 @@ poly1305_blocks_avx:
 	jz	.Leven_avx
 
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lblocks_avx_body:
 
 	mov	$len,%r15		# reassign $len
@@ -610,24 +632,39 @@ poly1305_blocks_avx:
 .align	16
 .Ldone_avx:
 	mov	0(%rsp),%r15
+.cfi_restore	%r15
 	mov	8(%rsp),%r14
+.cfi_restore	%r14
 	mov	16(%rsp),%r13
+.cfi_restore	%r13
 	mov	24(%rsp),%r12
+.cfi_restore	%r12
 	mov	32(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	40(%rsp),%rbx
+.cfi_restore	%rbx
 	lea	48(%rsp),%rsp
+.cfi_adjust_cfa_offset	-48
 .Lno_data_avx:
 .Lblocks_avx_epilogue:
 	ret
+.cfi_endproc
 
 .align	32
 .Lbase2_64_avx:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lbase2_64_avx_body:
 
 	mov	$len,%r15		# reassign $len
@@ -687,18 +724,27 @@ poly1305_blocks_avx:
 	mov	%r15,$len
 
 	mov	0(%rsp),%r15
+.cfi_restore	%r15
 	mov	8(%rsp),%r14
+.cfi_restore	%r14
 	mov	16(%rsp),%r13
+.cfi_restore	%r13
 	mov	24(%rsp),%r12
+.cfi_restore	%r12
 	mov	32(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	40(%rsp),%rbx
+.cfi_restore	%rbx
 	lea	48(%rsp),%rax
 	lea	48(%rsp),%rsp
+.cfi_adjust_cfa_offset	-48
 .Lbase2_64_avx_epilogue:
 	jmp	.Ldo_avx
+.cfi_endproc
 
 .align	32
 .Leven_avx:
+.cfi_startproc
 	vmovd		4*0($ctx),$H0		# load hash value
 	vmovd		4*1($ctx),$H1
 	vmovd		4*2($ctx),$H2
@@ -709,6 +755,7 @@ poly1305_blocks_avx:
 ___
 $code.=<<___	if (!$win64);
 	lea		-0x58(%rsp),%r11
+.cfi_def_cfa		%r11,0x60
 	sub		\$0x178,%rsp
 ___
 $code.=<<___	if ($win64);
@@ -1301,10 +1348,12 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___	if (!$win64);
 	lea		0x58(%r11),%rsp
+.cfi_def_cfa		%rsp,8
 ___
 $code.=<<___;
 	vzeroupper
 	ret
+.cfi_endproc
 .size	poly1305_blocks_avx,.-poly1305_blocks_avx
 
 .type	poly1305_emit_avx,\@function,3
@@ -1372,6 +1421,7 @@ $code.=<<___;
 .type	poly1305_blocks_avx2,\@function,4
 .align	32
 poly1305_blocks_avx2:
+.cfi_startproc
 	mov	20($ctx),%r8d		# is_base2_26
 	cmp	\$128,$len
 	jae	.Lblocks_avx2
@@ -1391,11 +1441,17 @@ poly1305_blocks_avx2:
 	jz	.Leven_avx2
 
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lblocks_avx2_body:
 
 	mov	$len,%r15		# reassign $len
@@ -1504,24 +1560,39 @@ poly1305_blocks_avx2:
 .align	16
 .Ldone_avx2:
 	mov	0(%rsp),%r15
+.cfi_restore	%r15
 	mov	8(%rsp),%r14
+.cfi_restore	%r14
 	mov	16(%rsp),%r13
+.cfi_restore	%r13
 	mov	24(%rsp),%r12
+.cfi_restore	%r12
 	mov	32(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	40(%rsp),%rbx
+.cfi_restore	%rbx
 	lea	48(%rsp),%rsp
+.cfi_adjust_cfa_offset	-48
 .Lno_data_avx2:
 .Lblocks_avx2_epilogue:
 	ret
+.cfi_endproc
 
 .align	32
 .Lbase2_64_avx2:
+.cfi_startproc
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 .Lbase2_64_avx2_body:
 
 	mov	$len,%r15		# reassign $len
@@ -1588,18 +1659,27 @@ poly1305_blocks_avx2:
 	mov	\$`(1<<31|1<<30|1<<16)`,%r11d
 
 	mov	0(%rsp),%r15
+.cfi_restore	%r15
 	mov	8(%rsp),%r14
+.cfi_restore	%r14
 	mov	16(%rsp),%r13
+.cfi_restore	%r13
 	mov	24(%rsp),%r12
+.cfi_restore	%r12
 	mov	32(%rsp),%rbp
+.cfi_restore	%rbp
 	mov	40(%rsp),%rbx
+.cfi_restore	%rbx
 	lea	48(%rsp),%rax
 	lea	48(%rsp),%rsp
+.cfi_adjust_cfa_offset	-48
 .Lbase2_64_avx2_epilogue:
 	jmp	.Ldo_avx2
+.cfi_endproc
 
 .align	32
 .Leven_avx2:
+.cfi_startproc
 	mov		OPENSSL_ia32cap_P+8(%rip),%r10d
 	mov		\$`(1<<31|1<<30|1<<16)`,%r11d
 	vmovd		4*0($ctx),%x#$H0	# load hash value base 2^26
@@ -1620,6 +1700,7 @@ $code.=<<___		if ($avx>2);
 ___
 $code.=<<___	if (!$win64);
 	lea		-8(%rsp),%r11
+.cfi_def_cfa		%r11,16
 	sub		\$0x128,%rsp
 ___
 $code.=<<___	if ($win64);
@@ -2008,10 +2089,12 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___	if (!$win64);
 	lea		8(%r11),%rsp
+.cfi_def_cfa		%rsp,8
 ___
 $code.=<<___;
 	vzeroupper
 	ret
+.cfi_endproc
 .size	poly1305_blocks_avx2,.-poly1305_blocks_avx2
 ___
 #######################################################################
@@ -2031,11 +2114,13 @@ $code.=<<___;
 .type	poly1305_blocks_avx512,\@function,4
 .align	32
 poly1305_blocks_avx512:
+.cfi_startproc
 .Lblocks_avx512:
 	vzeroupper
 ___
 $code.=<<___	if (!$win64);
 	lea		-8(%rsp),%r11
+.cfi_def_cfa		%r11,16
 	sub		\$0x128,%rsp
 ___
 $code.=<<___	if ($win64);
@@ -2044,13 +2129,13 @@ $code.=<<___	if ($win64);
 	vmovdqa		%xmm6,0x50(%r11)
 	vmovdqa		%xmm7,0x60(%r11)
 	vmovdqa		%xmm8,0x70(%r11)
-	vmovdqa		%xmm9,0x80(%r11)
-	vmovdqa		%xmm10,0x90(%r11)
-	vmovdqa		%xmm11,0xa0(%r11)
-	vmovdqa		%xmm12,0xb0(%r11)
-	vmovdqa		%xmm13,0xc0(%r11)
-	vmovdqa		%xmm14,0xd0(%r11)
-	vmovdqa		%xmm15,0xe0(%r11)
+	vmovdqa32	%xmm9,0x80(%r11)
+	vmovdqa32	%xmm10,0x90(%r11)
+	vmovdqa32	%xmm11,0xa0(%r11)
+	vmovdqa32	%xmm12,0xb0(%r11)
+	vmovdqa32	%xmm13,0xc0(%r11)
+	vmovdqa32	%xmm14,0xd0(%r11)
+	vmovdqa32	%xmm15,0xe0(%r11)
 .Ldo_avx512_body:
 ___
 $code.=<<___;
@@ -2213,36 +2298,21 @@ $code.=<<___;
 	# we could just flow along, hence the goal for $R0-$S4 is
 	# 1858286838784888 ...
 
-	mov		\$0b0110011001100110,%eax
-	mov		\$0b1100110011001100,%r8d
-	mov		\$0b0101010101010101,%r9d
+	vmovdqa32	128(%rcx),$M0		# .Lpermd_avx512:
+	mov		\$0x7777,%eax
 	kmovw		%eax,%k1
-	kmovw		%r8d,%k2
-	kmovw		%r9d,%k3
-
-	vpbroadcastq	%x#$D0,$M0	# 0808080808080808
-	vpbroadcastq	%x#$D1,$M1
-	vpbroadcastq	%x#$D2,$M2
-	vpbroadcastq	%x#$D3,$M3
-	vpbroadcastq	%x#$D4,$M4
-
-	vpexpandd	$D0,${D0}{%k1}	# 05060708 -> -05--06--07--08-
-	vpexpandd	$D1,${D1}{%k1}
-	vpexpandd	$D2,${D2}{%k1}
-	vpexpandd	$D3,${D3}{%k1}
-	vpexpandd	$D4,${D4}{%k1}
-
-	vpexpandd	$R0,${D0}{%k2}	# -05--06--07--08- -> 145-246-347-448-
-	vpexpandd	$R1,${D1}{%k2}
-	vpexpandd	$R2,${D2}{%k2}
-	vpexpandd	$R3,${D3}{%k2}
-	vpexpandd	$R4,${D4}{%k2}
-
-	vpblendmd	$M0,$D0,${R0}{%k3}	# 1858286838784888
-	vpblendmd	$M1,$D1,${R1}{%k3}
-	vpblendmd	$M2,$D2,${R2}{%k3}
-	vpblendmd	$M3,$D3,${R3}{%k3}
-	vpblendmd	$M4,$D4,${R4}{%k3}
+
+	vpermd		$R0,$M0,$R0		# 14243444 -> 1---2---3---4---
+	vpermd		$R1,$M0,$R1
+	vpermd		$R2,$M0,$R2
+	vpermd		$R3,$M0,$R3
+	vpermd		$R4,$M0,$R4
+
+	vpermd		$D0,$M0,${R0}{%k1}	# 05060708 -> 1858286838784888
+	vpermd		$D1,$M0,${R1}{%k1}
+	vpermd		$D2,$M0,${R2}{%k1}
+	vpermd		$D3,$M0,${R3}{%k1}
+	vpermd		$D4,$M0,${R4}{%k1}
 
 	vpslld		\$2,$R1,$S1		# *5
 	vpslld		\$2,$R2,$S2
@@ -2264,15 +2334,14 @@ $code.=<<___;
 	vpsrlq		\$40,$T4,$T4		# 4
 	vpandq		$MASK,$T2,$T2		# 2
 	vpandq		$MASK,$T0,$T0		# 0
-	vpandq		$MASK,$T1,$T1		# 1
-	vpandq		$MASK,$T3,$T3		# 3
+	#vpandq		$MASK,$T1,$T1		# 1
+	#vpandq		$MASK,$T3,$T3		# 3
 	#vporq		$PADBIT,$T4,$T4		# padbit, yes, always
 
 	vpaddq		$H2,$T2,$H2		# accumulate input
-	mov		\$0x0f,%eax
 	sub		\$192,$len
 	jbe		.Ltail_avx512
-	jmp		.Loop_avx512
+	#jmp		.Loop_avx512
 
 .align	32
 .Loop_avx512:
@@ -2307,7 +2376,9 @@ $code.=<<___;
 	vpmuludq	$H2,$R1,$D3		# d3 = h2*r1
 	 vpaddq		$H0,$T0,$H0
 	vpmuludq	$H2,$R2,$D4		# d4 = h2*r2
+	 vpandq		$MASK,$T1,$T1		# 1
 	vpmuludq	$H2,$S3,$D0		# d0 = h2*s3
+	 vpandq		$MASK,$T3,$T3		# 3
 	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
 	 vporq		$PADBIT,$T4,$T4		# padbit, yes, always
 	vpmuludq	$H2,$R0,$D2		# d2 = h2*r0
@@ -2415,8 +2486,8 @@ $code.=<<___;
 	vpaddq		$D3,$H4,$H4		# h3 -> h4
 
 	 vpandq		$MASK,$T0,$T0		# 0
-	 vpandq		$MASK,$T1,$T1		# 1
-	 vpandq		$MASK,$T3,$T3		# 3
+	 #vpandq	$MASK,$T1,$T1		# 1
+	 #vpandq	$MASK,$T3,$T3		# 3
 	 #vporq		$PADBIT,$T4,$T4		# padbit, yes, always
 
 	sub		\$128,$len
@@ -2448,7 +2519,9 @@ $code.=<<___;
 	vpmuludq	$H2,$R1,$D3		# d3 = h2*r1
 	vpmuludq	$H2,$R2,$D4		# d4 = h2*r2
 	vpmuludq	$H2,$S3,$D0		# d0 = h2*s3
+	 vpandq		$MASK,$T1,$T1		# 1
 	vpmuludq	$H2,$S4,$D1		# d1 = h2*s4
+	 vpandq		$MASK,$T3,$T3		# 3
 	vpmuludq	$H2,$R0,$D2		# d2 = h2*r0
 	 vporq		$PADBIT,$T4,$T4		# padbit, yes, always
 	 vpaddq		$H1,$T1,$H1		# accumulate input
@@ -2622,9 +2695,11 @@ $code.=<<___	if ($win64);
 ___
 $code.=<<___	if (!$win64);
 	lea		8(%r11),%rsp
+.cfi_def_cfa		%rsp,8
 ___
 $code.=<<___;
 	ret
+.cfi_endproc
 .size	poly1305_blocks_avx512,.-poly1305_blocks_avx512
 ___
 if ($avx>3) {
@@ -2832,6 +2907,8 @@ $code.=<<___;
 .long	0x3ffffff,0,0x3ffffff,0,0x3ffffff,0,0x3ffffff,0
 .Lpermd_avx2:
 .long	2,2,2,3,2,0,2,1
+.Lpermd_avx512:
+.long	0,0,0,1, 0,2,0,3, 0,4,0,5, 0,6,0,7
 
 .L2_44_inp_permd:
 .long	0,1,1,2,2,3,7,7
diff --git a/crypto/whrlpool/asm/wp-x86_64.pl b/crypto/whrlpool/asm/wp-x86_64.pl
index d0b7ecc..4a1261d 100644
--- a/crypto/whrlpool/asm/wp-x86_64.pl
+++ b/crypto/whrlpool/asm/wp-x86_64.pl
@@ -66,13 +66,21 @@ $code=<<___;
 .type	$func,\@function,3
 .align	16
 $func:
+.cfi_startproc
 	mov	%rsp,%rax
+.cfi_def_cfa_register	%rax
 	push	%rbx
+.cfi_push	%rbx
 	push	%rbp
+.cfi_push	%rbp
 	push	%r12
+.cfi_push	%r12
 	push	%r13
+.cfi_push	%r13
 	push	%r14
+.cfi_push	%r14
 	push	%r15
+.cfi_push	%r15
 
 	sub	\$128+40,%rsp
 	and	\$-64,%rsp
@@ -82,6 +90,7 @@ $func:
 	mov	%rsi,8(%r10)
 	mov	%rdx,16(%r10)
 	mov	%rax,32(%r10)		# saved stack pointer
+.cfi_cfa_expression	%rsp+`128+32`,deref,+8
 .Lprologue:
 
 	mov	%r10,%rbx
@@ -205,15 +214,24 @@ $code.=<<___;
 	jmp	.Louterloop
 .Lalldone:
 	mov	32(%rbx),%rsi		# restore saved pointer
+.cfi_def_cfa	%rsi,8
 	mov	-48(%rsi),%r15
+.cfi_restore	%r15
 	mov	-40(%rsi),%r14
+.cfi_restore	%r14
 	mov	-32(%rsi),%r13
+.cfi_restore	%r13
 	mov	-24(%rsi),%r12
+.cfi_restore	%r12
 	mov	-16(%rsi),%rbp
+.cfi_restore	%rbp
 	mov	-8(%rsi),%rbx
+.cfi_restore	%rbx
 	lea	(%rsi),%rsp
+.cfi_def_cfa_register	%rsp
 .Lepilogue:
 	ret
+.cfi_endproc
 .size	$func,.-$func
 
 .align	64
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index 3082253..e08e1c4 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -63,7 +63,9 @@ OPENSSL_rdtsc:
 .type	OPENSSL_ia32_cpuid,\@function,1
 .align	16
 OPENSSL_ia32_cpuid:
+.cfi_startproc
 	mov	%rbx,%r8		# save %rbx
+.cfi_register	%rbx,%r8
 
 	xor	%eax,%eax
 	mov	%eax,8(%rdi)		# clear 3rd word
@@ -194,8 +196,10 @@ OPENSSL_ia32_cpuid:
 	shl	\$32,%r9
 	mov	%r10d,%eax
 	mov	%r8,%rbx		# restore %rbx
+.cfi_restore	%rbx
 	or	%r9,%rax
 	ret
+.cfi_endproc
 .size	OPENSSL_ia32_cpuid,.-OPENSSL_ia32_cpuid
 
 .globl  OPENSSL_cleanse


More information about the openssl-commits mailing list