[openssl-commits] [openssl] master update
Andy Polyakov
appro at openssl.org
Mon Feb 12 13:05:16 UTC 2018
The branch master has been updated
via af0fcf7b4668218b24d9250b95e0b96939ccb4d1 (commit)
via 77f3612e2bd930b27dcab63fdcca1d7091cb948d (commit)
from 1d0c08b4963f5f7e1d1855e360417a11973d8455 (commit)
- Log -----------------------------------------------------------------
commit af0fcf7b4668218b24d9250b95e0b96939ccb4d1
Author: Andy Polyakov <appro at openssl.org>
Date: Sun Feb 11 12:29:47 2018 +0100
sha/asm/sha512-armv8.pl: add hardware-assisted SHA512 subroutine.
Reviewed-by: Rich Salz <rsalz at openssl.org>
commit 77f3612e2bd930b27dcab63fdcca1d7091cb948d
Author: Andy Polyakov <appro at openssl.org>
Date: Sun Feb 11 12:29:06 2018 +0100
crypto/armcap.c: detect hardware-assisted SHA512 support.
Reviewed-by: Rich Salz <rsalz at openssl.org>
-----------------------------------------------------------------------
Summary of changes:
crypto/arm64cpuid.pl | 8 +++
crypto/arm_arch.h | 1 +
crypto/armcap.c | 15 +++++
crypto/sha/asm/sha512-armv8.pl | 130 ++++++++++++++++++++++++++++++++++++++++-
4 files changed, 151 insertions(+), 3 deletions(-)
diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl
index caa3387..68734f5 100755
--- a/crypto/arm64cpuid.pl
+++ b/crypto/arm64cpuid.pl
@@ -63,6 +63,7 @@ _armv8_sha256_probe:
sha256su0 v0.4s, v0.4s
ret
.size _armv8_sha256_probe,.-_armv8_sha256_probe
+
.globl _armv8_pmull_probe
.type _armv8_pmull_probe,%function
_armv8_pmull_probe:
@@ -70,6 +71,13 @@ _armv8_pmull_probe:
ret
.size _armv8_pmull_probe,.-_armv8_pmull_probe
+.globl _armv8_sha512_probe
+.type _armv8_sha512_probe,%function
+_armv8_sha512_probe:
+ .long 0xcec08000 // sha512su0 v0.2d,v0.2d
+ ret
+.size _armv8_sha512_probe,.-_armv8_sha512_probe
+
.globl OPENSSL_cleanse
.type OPENSSL_cleanse,%function
.align 5
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
index 8b41408..395ada6 100644
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@@ -79,5 +79,6 @@ extern unsigned int OPENSSL_armcap_P;
# define ARMV8_SHA1 (1<<3)
# define ARMV8_SHA256 (1<<4)
# define ARMV8_PMULL (1<<5)
+# define ARMV8_SHA512 (1<<6)
#endif
diff --git a/crypto/armcap.c b/crypto/armcap.c
index a1f77fa..deffd93 100644
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@@ -46,6 +46,9 @@ void _armv8_aes_probe(void);
void _armv8_sha1_probe(void);
void _armv8_sha256_probe(void);
void _armv8_pmull_probe(void);
+# ifdef __aarch64__
+void _armv8_sha512_probe(void);
+# endif
uint32_t _armv7_tick(void);
uint32_t OPENSSL_rdtsc(void)
@@ -94,6 +97,7 @@ static unsigned long (*getauxval) (unsigned long) = NULL;
# define HWCAP_CE_PMULL (1 << 4)
# define HWCAP_CE_SHA1 (1 << 5)
# define HWCAP_CE_SHA256 (1 << 6)
+# define HWCAP_CE_SHA512 (1 << 21)
# endif
void OPENSSL_cpuid_setup(void)
@@ -163,6 +167,11 @@ void OPENSSL_cpuid_setup(void)
if (hwcap & HWCAP_CE_SHA256)
OPENSSL_armcap_P |= ARMV8_SHA256;
+
+# ifdef __aarch64__
+ if (hwcap & HWCAP_CE_SHA512)
+ OPENSSL_armcap_P |= ARMV8_SHA512;
+# endif
}
} else if (sigsetjmp(ill_jmp, 1) == 0) {
_armv7_neon_probe();
@@ -182,6 +191,12 @@ void OPENSSL_cpuid_setup(void)
_armv8_sha256_probe();
OPENSSL_armcap_P |= ARMV8_SHA256;
}
+# ifdef __aarch64__
+ if (sigsetjmp(ill_jmp, 1) == 0) {
+ _armv8_sha512_probe();
+ OPENSSL_armcap_P |= ARMV8_SHA512;
+ }
+# endif
}
if (sigsetjmp(ill_jmp, 1) == 0) {
_armv7_tick();
diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl
index a4755e6..2f8644c 100644
--- a/crypto/sha/asm/sha512-armv8.pl
+++ b/crypto/sha/asm/sha512-armv8.pl
@@ -197,8 +197,6 @@ $code.=<<___;
.type $func,%function
.align 6
$func:
-___
-$code.=<<___ if ($SZ==4);
#ifndef __KERNEL__
# ifdef __ILP32__
ldrsw x16,.LOPENSSL_armcap_P
@@ -208,13 +206,19 @@ $code.=<<___ if ($SZ==4);
adr x17,.LOPENSSL_armcap_P
add x16,x16,x17
ldr w16,[x16]
+___
+$code.=<<___ if ($SZ==4);
tst w16,#ARMV8_SHA256
b.ne .Lv8_entry
tst w16,#ARMV7_NEON
b.ne .Lneon_entry
-#endif
+___
+$code.=<<___ if ($SZ==8);
+ tst w16,#ARMV8_SHA512
+ b.ne .Lv8_entry
___
$code.=<<___;
+#endif
stp x29,x30,[sp,#-128]!
add x29,sp,#0
@@ -732,6 +736,108 @@ $code.=<<___;
___
}
+if ($SZ==8) {
+my $Ktbl="x3";
+
+my @H = map("v$_.16b",(0..4));
+my ($fg,$de,$m9_10)=map("v$_.16b",(5..7));
+my @MSG=map("v$_.16b",(16..23));
+my ($W0,$W1)=("v24.2d","v25.2d");
+my ($AB,$CD,$EF,$GH)=map("v$_.16b",(26..29));
+
+$code.=<<___;
+#ifndef __KERNEL__
+.type sha512_block_armv8,%function
+.align 6
+sha512_block_armv8:
+.Lv8_entry:
+ stp x29,x30,[sp,#-16]!
+ add x29,sp,#0
+
+ ld1 {@MSG[0]- at MSG[3]},[$inp],#64 // load input
+ ld1 {@MSG[4]- at MSG[7]},[$inp],#64
+
+ ld1.64 {@H[0]- at H[3]},[$ctx] // load context
+ adr $Ktbl,.LK512
+
+ rev64 @MSG[0], at MSG[0]
+ rev64 @MSG[1], at MSG[1]
+ rev64 @MSG[2], at MSG[2]
+ rev64 @MSG[3], at MSG[3]
+ rev64 @MSG[4], at MSG[4]
+ rev64 @MSG[5], at MSG[5]
+ rev64 @MSG[6], at MSG[6]
+ rev64 @MSG[7], at MSG[7]
+ b .Loop_hw
+
+.align 4
+.Loop_hw:
+ ld1.64 {$W0},[$Ktbl],#16
+ subs $num,$num,#1
+ sub x4,$inp,#128
+ orr $AB, at H[0], at H[0] // offload
+ orr $CD, at H[1], at H[1]
+ orr $EF, at H[2], at H[2]
+ orr $GH, at H[3], at H[3]
+ csel $inp,$inp,x4,ne // conditional rewind
+___
+for($i=0;$i<32;$i++) {
+$code.=<<___;
+ add.i64 $W0,$W0, at MSG[0]
+ ld1.64 {$W1},[$Ktbl],#16
+ ext $W0,$W0,$W0,#8
+ ext $fg, at H[2], at H[3],#8
+ ext $de, at H[1], at H[2],#8
+ add.i64 @H[3], at H[3],$W0 // "T1 + H + K512[i]"
+ sha512su0 @MSG[0], at MSG[1]
+ ext $m9_10, at MSG[4], at MSG[5],#8
+ sha512h @H[3],$fg,$de
+ sha512su1 @MSG[0], at MSG[7],$m9_10
+ add.i64 @H[4], at H[1], at H[3] // "D + T1"
+ sha512h2 @H[3],$H[1], at H[0]
+___
+ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
+ @H = (@H[3], at H[0], at H[4], at H[2], at H[1]);
+}
+for(;$i<40;$i++) {
+$code.=<<___ if ($i<39);
+ ld1.64 {$W1},[$Ktbl],#16
+___
+$code.=<<___ if ($i==39);
+ sub $Ktbl,$Ktbl,#$rounds*$SZ // rewind
+___
+$code.=<<___;
+ add.i64 $W0,$W0, at MSG[0]
+ ld1 {@MSG[0]},[$inp],#16 // load next input
+ ext $W0,$W0,$W0,#8
+ ext $fg, at H[2], at H[3],#8
+ ext $de, at H[1], at H[2],#8
+ add.i64 @H[3], at H[3],$W0 // "T1 + H + K512[i]"
+ sha512h @H[3],$fg,$de
+ rev64 @MSG[0], at MSG[0]
+ add.i64 @H[4], at H[1], at H[3] // "D + T1"
+ sha512h2 @H[3],$H[1], at H[0]
+___
+ ($W0,$W1)=($W1,$W0); push(@MSG,shift(@MSG));
+ @H = (@H[3], at H[0], at H[4], at H[2], at H[1]);
+}
+$code.=<<___;
+ add.i64 @H[0], at H[0],$AB // accumulate
+ add.i64 @H[1], at H[1],$CD
+ add.i64 @H[2], at H[2],$EF
+ add.i64 @H[3], at H[3],$GH
+
+ cbnz $num,.Loop_hw
+
+ st1.64 {@H[0]- at H[3]},[$ctx] // store context
+
+ ldr x29,[sp],#16
+ ret
+.size sha512_block_armv8,.-sha512_block_armv8
+#endif
+___
+}
+
$code.=<<___;
#ifndef __KERNEL__
.comm OPENSSL_armcap_P,4,4
@@ -753,6 +859,21 @@ ___
}
}
+{ my %opcode = (
+ "sha512h" => 0xce608000, "sha512h2" => 0xce608400,
+ "sha512su0" => 0xcec08000, "sha512su1" => 0xce608800 );
+
+ sub unsha512 {
+ my ($mnemonic,$arg)=@_;
+
+ $arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
+ &&
+ sprintf ".inst\t0x%08x\t//%s %s",
+ $opcode{$mnemonic}|$1|($2<<5)|($3<<16),
+ $mnemonic,$arg;
+ }
+}
+
open SELF,$0;
while(<SELF>) {
next if (/^#!/);
@@ -765,12 +886,15 @@ foreach(split("\n",$code)) {
s/\`([^\`]*)\`/eval($1)/ge;
+ s/\b(sha512\w+)\s+([qv].*)/unsha512($1,$2)/ge or
s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/ge;
s/\bq([0-9]+)\b/v$1.16b/g; # old->new registers
s/\.[ui]?8(\s)/$1/;
+ s/\.\w?64\b// and s/\.16b/\.2d/g or
s/\.\w?32\b// and s/\.16b/\.4s/g;
+ m/\bext\b/ and s/\.2d/\.16b/g or
m/(ld|st)1[^\[]+\[0\]/ and s/\.4s/\.s/g;
print $_,"\n";
More information about the openssl-commits
mailing list