[openssl-commits] [openssl] master update

Andy Polyakov appro at openssl.org
Fri Jul 21 12:12:45 UTC 2017


The branch master has been updated
       via  0d7903f83f84bba1d29225efd999c633a0c5ba01 (commit)
      from  64d92d74985ebb3d0be58a9718f9e080a14a8e7f (commit)


- Log -----------------------------------------------------------------
commit 0d7903f83f84bba1d29225efd999c633a0c5ba01
Author: Andy Polyakov <appro at openssl.org>
Date:   Thu Jul 20 13:56:17 2017 +0200

    sha/asm/keccak1600-avx512.pl: absorb bug-fix and minor optimization.
    
    Hardware used for benchmarking courtesy of Atos, experiments run by
    Romain Dolbeau <romain.dolbeau at atos.net>. Kudos!
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

-----------------------------------------------------------------------

Summary of changes:
 crypto/sha/asm/keccak1600-avx512.pl | 36 +++++++++++++++++-------------------
 1 file changed, 17 insertions(+), 19 deletions(-)

diff --git a/crypto/sha/asm/keccak1600-avx512.pl b/crypto/sha/asm/keccak1600-avx512.pl
index 9536351..70dec4e 100755
--- a/crypto/sha/asm/keccak1600-avx512.pl
+++ b/crypto/sha/asm/keccak1600-avx512.pl
@@ -30,8 +30,8 @@
 #
 #			r=1088(*)
 #
-# Knights Landing	-
-# Skylake Xeon		-
+# Knights Landing	8.9
+# Skylake-X		6.7
 #
 # (*)	Corresponds to SHA3-256.
 
@@ -119,22 +119,22 @@ __KeccakF1600:
 	vpermq		$A03, at Theta[3],$A03
 	vpermq		$A04, at Theta[4],$A04
 
-	vpxorq		$A01,$A00,$C00
-	vpxorq		$A02,$C00,$C00
-	vpternlogq	\$0x96,$A04,$A03,$C00
+	vmovdqa64	$A00, at T[0]		# put aside original A00
+	vpternlogq	\$0x96,$A02,$A01,$A00	# and use it as "C00"
+	vpternlogq	\$0x96,$A04,$A03,$A00
 
-	vprolq		\$1,$C00,$D00
-	vpermq		$C00, at Theta[1],$C00
+	vprolq		\$1,$A00,$D00
+	vpermq		$A00, at Theta[1],$A00
 	vpermq		$D00, at Theta[4],$D00
 
-	vpternlogq	\$0x96,$C00,$D00,$A00
-	vpternlogq	\$0x96,$C00,$D00,$A01
-	vpternlogq	\$0x96,$C00,$D00,$A02
-	vpternlogq	\$0x96,$C00,$D00,$A03
-	vpternlogq	\$0x96,$C00,$D00,$A04
+	vpternlogq	\$0x96,$A00,$D00, at T[0]	# T[0] is original A00
+	vpternlogq	\$0x96,$A00,$D00,$A01
+	vpternlogq	\$0x96,$A00,$D00,$A02
+	vpternlogq	\$0x96,$A00,$D00,$A03
+	vpternlogq	\$0x96,$A00,$D00,$A04
 
 	######################################### Rho
-	vprolvq		@Rhotate[0],$A00,$A00
+	vprolvq		@Rhotate[0], at T[0],$A00	# T[0] is original A00
 	vprolvq		@Rhotate[1],$A01,$A01
 	vprolvq		@Rhotate[2],$A02,$A02
 	vprolvq		@Rhotate[3],$A03,$A03
@@ -259,22 +259,20 @@ SHA3_absorb:
 	jc		.Ldone_absorb_avx512
 
 	shr		\$3,%eax
-	vmovdqu64	-96($inp),@{T[0]}{$k11111}
-	sub		\$4,%eax
 ___
-for(my $i=5; $i<25; $i++) {
+for(my $i=0; $i<25; $i++) {
 $code.=<<___
-	dec	%eax
-	jz	.Labsorved_avx512
 	mov	8*$i-96($inp),%r8
 	mov	%r8,$A_jagged_in[$i]-128(%r9)
+	dec	%eax
+	jz	.Labsorved_avx512
 ___
 }
 $code.=<<___;
 .Labsorved_avx512:
 	lea	($inp,$bsz),$inp
 
-	vpxorq	@T[0],$A00,$A00
+	vpxorq	64*0-128(%r9),$A00,$A00
 	vpxorq	64*1-128(%r9),$A01,$A01
 	vpxorq	64*2-128(%r9),$A02,$A02
 	vpxorq	64*3-128(%r9),$A03,$A03


More information about the openssl-commits mailing list