[openssl-commits] [openssl] master update
Andy Polyakov
appro at openssl.org
Fri Jul 21 12:12:45 UTC 2017
The branch master has been updated
via 0d7903f83f84bba1d29225efd999c633a0c5ba01 (commit)
from 64d92d74985ebb3d0be58a9718f9e080a14a8e7f (commit)
- Log -----------------------------------------------------------------
commit 0d7903f83f84bba1d29225efd999c633a0c5ba01
Author: Andy Polyakov <appro at openssl.org>
Date: Thu Jul 20 13:56:17 2017 +0200
sha/asm/keccak1600-avx512.pl: absorb bug-fix and minor optimization.
Hardware used for benchmarking courtesy of Atos, experiments run by
Romain Dolbeau <romain.dolbeau at atos.net>. Kudos!
Reviewed-by: Rich Salz <rsalz at openssl.org>
-----------------------------------------------------------------------
Summary of changes:
crypto/sha/asm/keccak1600-avx512.pl | 36 +++++++++++++++++-------------------
1 file changed, 17 insertions(+), 19 deletions(-)
diff --git a/crypto/sha/asm/keccak1600-avx512.pl b/crypto/sha/asm/keccak1600-avx512.pl
index 9536351..70dec4e 100755
--- a/crypto/sha/asm/keccak1600-avx512.pl
+++ b/crypto/sha/asm/keccak1600-avx512.pl
@@ -30,8 +30,8 @@
#
# r=1088(*)
#
-# Knights Landing -
-# Skylake Xeon -
+# Knights Landing 8.9
+# Skylake-X 6.7
#
# (*) Corresponds to SHA3-256.
@@ -119,22 +119,22 @@ __KeccakF1600:
vpermq $A03, at Theta[3],$A03
vpermq $A04, at Theta[4],$A04
- vpxorq $A01,$A00,$C00
- vpxorq $A02,$C00,$C00
- vpternlogq \$0x96,$A04,$A03,$C00
+ vmovdqa64 $A00, at T[0] # put aside original A00
+ vpternlogq \$0x96,$A02,$A01,$A00 # and use it as "C00"
+ vpternlogq \$0x96,$A04,$A03,$A00
- vprolq \$1,$C00,$D00
- vpermq $C00, at Theta[1],$C00
+ vprolq \$1,$A00,$D00
+ vpermq $A00, at Theta[1],$A00
vpermq $D00, at Theta[4],$D00
- vpternlogq \$0x96,$C00,$D00,$A00
- vpternlogq \$0x96,$C00,$D00,$A01
- vpternlogq \$0x96,$C00,$D00,$A02
- vpternlogq \$0x96,$C00,$D00,$A03
- vpternlogq \$0x96,$C00,$D00,$A04
+ vpternlogq \$0x96,$A00,$D00, at T[0] # T[0] is original A00
+ vpternlogq \$0x96,$A00,$D00,$A01
+ vpternlogq \$0x96,$A00,$D00,$A02
+ vpternlogq \$0x96,$A00,$D00,$A03
+ vpternlogq \$0x96,$A00,$D00,$A04
######################################### Rho
- vprolvq @Rhotate[0],$A00,$A00
+ vprolvq @Rhotate[0], at T[0],$A00 # T[0] is original A00
vprolvq @Rhotate[1],$A01,$A01
vprolvq @Rhotate[2],$A02,$A02
vprolvq @Rhotate[3],$A03,$A03
@@ -259,22 +259,20 @@ SHA3_absorb:
jc .Ldone_absorb_avx512
shr \$3,%eax
- vmovdqu64 -96($inp),@{T[0]}{$k11111}
- sub \$4,%eax
___
-for(my $i=5; $i<25; $i++) {
+for(my $i=0; $i<25; $i++) {
$code.=<<___
- dec %eax
- jz .Labsorved_avx512
mov 8*$i-96($inp),%r8
mov %r8,$A_jagged_in[$i]-128(%r9)
+ dec %eax
+ jz .Labsorved_avx512
___
}
$code.=<<___;
.Labsorved_avx512:
lea ($inp,$bsz),$inp
- vpxorq @T[0],$A00,$A00
+ vpxorq 64*0-128(%r9),$A00,$A00
vpxorq 64*1-128(%r9),$A01,$A01
vpxorq 64*2-128(%r9),$A02,$A02
vpxorq 64*3-128(%r9),$A03,$A03
More information about the openssl-commits
mailing list