[openssl-commits] [openssl] master update

Andy Polyakov appro at openssl.org
Mon Feb 12 13:05:16 UTC 2018


The branch master has been updated
       via  af0fcf7b4668218b24d9250b95e0b96939ccb4d1 (commit)
       via  77f3612e2bd930b27dcab63fdcca1d7091cb948d (commit)
      from  1d0c08b4963f5f7e1d1855e360417a11973d8455 (commit)


- Log -----------------------------------------------------------------
commit af0fcf7b4668218b24d9250b95e0b96939ccb4d1
Author: Andy Polyakov <appro at openssl.org>
Date:   Sun Feb 11 12:29:47 2018 +0100

    sha/asm/sha512-armv8.pl: add hardware-assisted SHA512 subroutine.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

commit 77f3612e2bd930b27dcab63fdcca1d7091cb948d
Author: Andy Polyakov <appro at openssl.org>
Date:   Sun Feb 11 12:29:06 2018 +0100

    crypto/armcap.c: detect hardware-assisted SHA512 support.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

-----------------------------------------------------------------------

Summary of changes:
 crypto/arm64cpuid.pl           |   8 +++
 crypto/arm_arch.h              |   1 +
 crypto/armcap.c                |  15 +++++
 crypto/sha/asm/sha512-armv8.pl | 130 ++++++++++++++++++++++++++++++++++++++++-
 4 files changed, 151 insertions(+), 3 deletions(-)

diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl
index caa3387..68734f5 100755
--- a/crypto/arm64cpuid.pl
+++ b/crypto/arm64cpuid.pl
@@ -63,6 +63,7 @@ _armv8_sha256_probe:
 	sha256su0	v0.4s, v0.4s
 	ret
 .size	_armv8_sha256_probe,.-_armv8_sha256_probe
+
 .globl	_armv8_pmull_probe
 .type	_armv8_pmull_probe,%function
 _armv8_pmull_probe:
@@ -70,6 +71,13 @@ _armv8_pmull_probe:
 	ret
 .size	_armv8_pmull_probe,.-_armv8_pmull_probe
 
+.globl	_armv8_sha512_probe
+.type	_armv8_sha512_probe,%function
+_armv8_sha512_probe:
+	.long	0xcec08000	// sha512su0	v0.2d,v0.2d
+	ret
+.size	_armv8_sha512_probe,.-_armv8_sha512_probe
+
 .globl	OPENSSL_cleanse
 .type	OPENSSL_cleanse,%function
 .align	5
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
index 8b41408..395ada6 100644
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@@ -79,5 +79,6 @@ extern unsigned int OPENSSL_armcap_P;
 # define ARMV8_SHA1      (1<<3)
 # define ARMV8_SHA256    (1<<4)
 # define ARMV8_PMULL     (1<<5)
+# define ARMV8_SHA512    (1<<6)
 
 #endif
diff --git a/crypto/armcap.c b/crypto/armcap.c
index a1f77fa..deffd93 100644
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@@ -46,6 +46,9 @@ void _armv8_aes_probe(void);
 void _armv8_sha1_probe(void);
 void _armv8_sha256_probe(void);
 void _armv8_pmull_probe(void);
+# ifdef __aarch64__
+void _armv8_sha512_probe(void);
+# endif
 uint32_t _armv7_tick(void);
 
 uint32_t OPENSSL_rdtsc(void)
@@ -94,6 +97,7 @@ static unsigned long (*getauxval) (unsigned long) = NULL;
 #  define HWCAP_CE_PMULL         (1 << 4)
 #  define HWCAP_CE_SHA1          (1 << 5)
 #  define HWCAP_CE_SHA256        (1 << 6)
+#  define HWCAP_CE_SHA512        (1 << 21)
 # endif
 
 void OPENSSL_cpuid_setup(void)
@@ -163,6 +167,11 @@ void OPENSSL_cpuid_setup(void)
 
             if (hwcap & HWCAP_CE_SHA256)
                 OPENSSL_armcap_P |= ARMV8_SHA256;
+
+# ifdef __aarch64__
+            if (hwcap & HWCAP_CE_SHA512)
+                OPENSSL_armcap_P |= ARMV8_SHA512;
+# endif
         }
     } else if (sigsetjmp(ill_jmp, 1) == 0) {
         _armv7_neon_probe();
@@ -182,6 +191,12 @@ void OPENSSL_cpuid_setup(void)
             _armv8_sha256_probe();
             OPENSSL_armcap_P |= ARMV8_SHA256;
         }
+# ifdef __aarch64__
+        if (sigsetjmp(ill_jmp, 1) == 0) {
+            _armv8_sha512_probe();
+            OPENSSL_armcap_P |= ARMV8_SHA512;
+        }
+# endif
     }
     if (sigsetjmp(ill_jmp, 1) == 0) {
         _armv7_tick();
diff --git a/crypto/sha/asm/sha512-armv8.pl b/crypto/sha/asm/sha512-armv8.pl
index a4755e6..2f8644c 100644
--- a/crypto/sha/asm/sha512-armv8.pl
+++ b/crypto/sha/asm/sha512-armv8.pl
@@ -197,8 +197,6 @@ $code.=<<___;
 .type	$func,%function
 .align	6
 $func:
-___
-$code.=<<___	if ($SZ==4);
 #ifndef	__KERNEL__
 # ifdef	__ILP32__
 	ldrsw	x16,.LOPENSSL_armcap_P
@@ -208,13 +206,19 @@ $code.=<<___	if ($SZ==4);
 	adr	x17,.LOPENSSL_armcap_P
 	add	x16,x16,x17
 	ldr	w16,[x16]
+___
+$code.=<<___	if ($SZ==4);
 	tst	w16,#ARMV8_SHA256
 	b.ne	.Lv8_entry
 	tst	w16,#ARMV7_NEON
 	b.ne	.Lneon_entry
-#endif
+___
+$code.=<<___	if ($SZ==8);
+	tst	w16,#ARMV8_SHA512
+	b.ne	.Lv8_entry
 ___
 $code.=<<___;
+#endif
 	stp	x29,x30,[sp,#-128]!
 	add	x29,sp,#0
 
@@ -732,6 +736,108 @@ $code.=<<___;
 ___
 }
 
+if ($SZ==8) {
+my $Ktbl="x3";
+
+my @H = map("v$_.16b",(0..4));
+my ($fg,$de,$m9_10)=map("v$_.16b",(5..7));
+my @MSG=map("v$_.16b",(16..23));
+my ($W0,$W1)=("v24.2d","v25.2d");
+my ($AB,$CD,$EF,$GH)=map("v$_.16b",(26..29));
+
+$code.=<<___;
+#ifndef	__KERNEL__
+.type	sha512_block_armv8,%function
+.align	6
+sha512_block_armv8:
+.Lv8_entry:
+	stp		x29,x30,[sp,#-16]!
+	add		x29,sp,#0
+
+	ld1		{@MSG[0]- at MSG[3]},[$inp],#64	// load input
+	ld1		{@MSG[4]- at MSG[7]},[$inp],#64
+
+	ld1.64		{@H[0]- at H[3]},[$ctx]		// load context
+	adr		$Ktbl,.LK512
+
+	rev64		@MSG[0], at MSG[0]
+	rev64		@MSG[1], at MSG[1]
+	rev64		@MSG[2], at MSG[2]
+	rev64		@MSG[3], at MSG[3]
+	rev64		@MSG[4], at MSG[4]
+	rev64		@MSG[5], at MSG[5]
+	rev64		@MSG[6], at MSG[6]
+	rev64		@MSG[7], at MSG[7]
+	b		.Loop_hw
+
+.align	4
+.Loop_hw:
+	ld1.64		{$W0},[$Ktbl],#16
+	subs		$num,$num,#1
+	sub		x4,$inp,#128
+	orr		$AB, at H[0], at H[0]			// offload
+	orr		$CD, at H[1], at H[1]
+	orr		$EF, at H[2], at H[2]
+	orr		$GH, at H[3], at H[3]
+	csel		$inp,$inp,x4,ne			// conditional rewind
+___
+for($i=0;$i<32;$i++) {
+$code.=<<___;
+	add.i64		$W0,$W0, at MSG[0]
+	ld1.64		{$W1},[$Ktbl],#16
+	ext		$W0,$W0,$W0,#8
+	ext		$fg, at H[2], at H[3],#8
+	ext		$de, at H[1], at H[2],#8
+	add.i64		@H[3], at H[3],$W0			// "T1 + H + K512[i]"
+	 sha512su0	@MSG[0], at MSG[1]
+	 ext		$m9_10, at MSG[4], at MSG[5],#8
+	sha512h		@H[3],$fg,$de
+	 sha512su1	@MSG[0], at MSG[7],$m9_10
+	add.i64		@H[4], at H[1], at H[3]		// "D + T1"
+	sha512h2	@H[3],$H[1], at H[0]
+___
+	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG));
+	@H = (@H[3], at H[0], at H[4], at H[2], at H[1]);
+}
+for(;$i<40;$i++) {
+$code.=<<___	if ($i<39);
+	ld1.64		{$W1},[$Ktbl],#16
+___
+$code.=<<___	if ($i==39);
+	sub		$Ktbl,$Ktbl,#$rounds*$SZ	// rewind
+___
+$code.=<<___;
+	add.i64		$W0,$W0, at MSG[0]
+	 ld1		{@MSG[0]},[$inp],#16		// load next input
+	ext		$W0,$W0,$W0,#8
+	ext		$fg, at H[2], at H[3],#8
+	ext		$de, at H[1], at H[2],#8
+	add.i64		@H[3], at H[3],$W0			// "T1 + H + K512[i]"
+	sha512h		@H[3],$fg,$de
+	 rev64		@MSG[0], at MSG[0]
+	add.i64		@H[4], at H[1], at H[3]		// "D + T1"
+	sha512h2	@H[3],$H[1], at H[0]
+___
+	($W0,$W1)=($W1,$W0);	push(@MSG,shift(@MSG));
+	@H = (@H[3], at H[0], at H[4], at H[2], at H[1]);
+}
+$code.=<<___;
+	add.i64		@H[0], at H[0],$AB			// accumulate
+	add.i64		@H[1], at H[1],$CD
+	add.i64		@H[2], at H[2],$EF
+	add.i64		@H[3], at H[3],$GH
+
+	cbnz		$num,.Loop_hw
+
+	st1.64		{@H[0]- at H[3]},[$ctx]		// store context
+
+	ldr		x29,[sp],#16
+	ret
+.size	sha512_block_armv8,.-sha512_block_armv8
+#endif
+___
+}
+
 $code.=<<___;
 #ifndef	__KERNEL__
 .comm	OPENSSL_armcap_P,4,4
@@ -753,6 +859,21 @@ ___
     }
 }
 
+{   my  %opcode = (
+	"sha512h"	=> 0xce608000,	"sha512h2"	=> 0xce608400,
+	"sha512su0"	=> 0xcec08000,	"sha512su1"	=> 0xce608800	);
+
+    sub unsha512 {
+	my ($mnemonic,$arg)=@_;
+
+	$arg =~ m/[qv]([0-9]+)[^,]*,\s*[qv]([0-9]+)[^,]*(?:,\s*[qv]([0-9]+))?/o
+	&&
+	sprintf ".inst\t0x%08x\t//%s %s",
+			$opcode{$mnemonic}|$1|($2<<5)|($3<<16),
+			$mnemonic,$arg;
+    }
+}
+
 open SELF,$0;
 while(<SELF>) {
         next if (/^#!/);
@@ -765,12 +886,15 @@ foreach(split("\n",$code)) {
 
 	s/\`([^\`]*)\`/eval($1)/ge;
 
+	s/\b(sha512\w+)\s+([qv].*)/unsha512($1,$2)/ge	or
 	s/\b(sha256\w+)\s+([qv].*)/unsha256($1,$2)/ge;
 
 	s/\bq([0-9]+)\b/v$1.16b/g;		# old->new registers
 
 	s/\.[ui]?8(\s)/$1/;
+	s/\.\w?64\b//		and s/\.16b/\.2d/g	or
 	s/\.\w?32\b//		and s/\.16b/\.4s/g;
+	m/\bext\b/		and s/\.2d/\.16b/g	or
 	m/(ld|st)1[^\[]+\[0\]/	and s/\.4s/\.s/g;
 
 	print $_,"\n";


More information about the openssl-commits mailing list