[openssl] master update

Richard Levitte levitte at openssl.org
Sat Feb 16 16:00:03 UTC 2019


The branch master has been updated
       via  3405db97e5448c784729b56837f3f8c776a01067 (commit)
      from  b2b580fe445e064da50c13d3e00f71022da16ece (commit)


- Log -----------------------------------------------------------------
commit 3405db97e5448c784729b56837f3f8c776a01067
Author: Andy Polyakov <appro at openssl.org>
Date:   Fri Feb 15 09:44:39 2019 +0100

    ARM assembly pack: make it Windows-friendly.
    
    "Windows friendliness" means a) flipping .thumb and .text directives,
    b) always generate Thumb-2 code when asked(*); c) Windows-specific
    references to external OPENSSL_armcap_P.
    
    (*) so far *some* modules were compiled as .code 32 even if Thumb-2
    was targeted. It works at hardware level because processor can alternate
    between the modes with no overhead. But clang --target=arm-windows's
    builtin assembler just refuses to compile .code 32...
    
    Reviewed-by: Paul Dale <paul.dale at oracle.com>
    Reviewed-by: Richard Levitte <levitte at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/8252)

-----------------------------------------------------------------------

Summary of changes:
 crypto/aes/asm/aes-armv4.pl           |  3 ++-
 crypto/aes/asm/aesv8-armx.pl          | 24 +++++++++++++-----
 crypto/aes/asm/bsaes-armv7.pl         |  7 ++---
 crypto/armv4cpuid.pl                  |  3 ++-
 crypto/bn/asm/armv4-gf2m.pl           | 13 +++++++---
 crypto/bn/asm/armv4-mont.pl           | 17 +++++++++----
 crypto/chacha/asm/chacha-armv4.pl     | 11 ++++++--
 crypto/ec/asm/ecp_nistz256-armv4.pl   |  4 ++-
 crypto/modes/asm/ghash-armv4.pl       |  3 ++-
 crypto/modes/asm/ghashv8-armx.pl      | 26 ++++++++++++++-----
 crypto/perlasm/arm-xlate.pl           |  7 +++++
 crypto/poly1305/asm/poly1305-armv4.pl | 48 +++++++++++++++++------------------
 crypto/sha/asm/keccak1600-armv4.pl    | 34 ++++++++++++++++++++++---
 crypto/sha/asm/sha1-armv4-large.pl    | 20 ++++++++++-----
 crypto/sha/asm/sha256-armv4.pl        | 18 +++++++++----
 crypto/sha/asm/sha512-armv4.pl        | 20 +++++++++++----
 16 files changed, 185 insertions(+), 73 deletions(-)

diff --git a/crypto/aes/asm/aes-armv4.pl b/crypto/aes/asm/aes-armv4.pl
index abb2cc7..456a469 100644
--- a/crypto/aes/asm/aes-armv4.pl
+++ b/crypto/aes/asm/aes-armv4.pl
@@ -76,7 +76,6 @@ $code=<<___;
 # define __ARM_ARCH__ __LINUX_ARM_ARCH__
 #endif
 
-.text
 #if defined(__thumb2__) && !defined(__APPLE__)
 .syntax	unified
 .thumb
@@ -85,6 +84,8 @@ $code=<<___;
 #undef __thumb2__
 #endif
 
+.text
+
 .type	AES_Te,%object
 .align	5
 AES_Te:
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
index 9ab2158..81bc1cb 100755
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -53,18 +53,27 @@ open OUT,"| \"$^X\" $xlate $flavour $output";
 
 $prefix="aes_v8";
 
+$_byte = ($flavour =~ /win/ ? "DCB" : ".byte");
+
 $code=<<___;
 #include "arm_arch.h"
 
 #if __ARM_MAX_ARCH__>=7
-.text
 ___
-$code.=".arch	armv8-a+crypto\n"			if ($flavour =~ /64/);
+$code.=".arch	armv8-a+crypto\n.text\n"		if ($flavour =~ /64/);
 $code.=<<___						if ($flavour !~ /64/);
 .arch	armv7-a	// don't confuse not-so-latest binutils with argv8 :-)
 .fpu	neon
+#ifdef	__thumb2__
+.syntax	unified
+.thumb
+# define INST(a,b,c,d)	$_byte	c,d|0xc,a,b
+#else
 .code	32
-#undef	__thumb2__
+# define INST(a,b,c,d)	$_byte	a,b,c,d
+#endif
+
+.text
 ___
 
 # Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
@@ -955,7 +964,7 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	    # since ARMv7 instructions are always encoded little-endian.
 	    # correct solution is to use .inst directive, but older
 	    # assemblers don't implement it:-(
-	    sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+	    sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
 			$word&0xff,($word>>8)&0xff,
 			($word>>16)&0xff,($word>>24)&0xff,
 			$mnemonic,$arg;
@@ -996,14 +1005,17 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	s/\],#[0-9]+/]!/o;
 
 	s/[v]?(aes\w+)\s+([qv].*)/unaes($1,$2)/geo	or
-	s/cclr\s+([^,]+),\s*([a-z]+)/mov$2	$1,#0/o	or
+	s/cclr\s+([^,]+),\s*([a-z]+)/mov.$2	$1,#0/o	or
 	s/vtbl\.8\s+(.*)/unvtbl($1)/geo			or
 	s/vdup\.32\s+(.*)/unvdup32($1)/geo		or
 	s/vmov\.32\s+(.*)/unvmov32($1)/geo		or
 	s/^(\s+)b\./$1b/o				or
-	s/^(\s+)mov\./$1mov/o				or
 	s/^(\s+)ret/$1bx\tlr/o;
 
+	if (s/^(\s+)mov\.([a-z]+)/$1mov$2/) {
+	    print "	it	$2\n";
+	}
+
 	print $_,"\n";
     }
 }
diff --git a/crypto/aes/asm/bsaes-armv7.pl b/crypto/aes/asm/bsaes-armv7.pl
index 5df195b..7f5219b 100644
--- a/crypto/aes/asm/bsaes-armv7.pl
+++ b/crypto/aes/asm/bsaes-armv7.pl
@@ -728,7 +728,6 @@ $code.=<<___;
 .arch	armv7-a
 .fpu	neon
 
-.text
 .syntax	unified 	@ ARMv7-capable assembler is expected to handle this
 #if defined(__thumb2__) && !defined(__APPLE__)
 .thumb
@@ -737,6 +736,8 @@ $code.=<<___;
 # undef __thumb2__
 #endif
 
+.text
+
 .type	_bsaes_decrypt8,%function
 .align	4
 _bsaes_decrypt8:
@@ -1125,9 +1126,9 @@ bsaes_cbc_encrypt:
 #ifndef	__thumb__
 	blo	AES_cbc_encrypt
 #else
-	bhs	1f
+	bhs	.Lcbc_do_bsaes
 	b	AES_cbc_encrypt
-1:
+.Lcbc_do_bsaes:
 #endif
 #endif
 
diff --git a/crypto/armv4cpuid.pl b/crypto/armv4cpuid.pl
index 3a7be6e..f8aeec6 100644
--- a/crypto/armv4cpuid.pl
+++ b/crypto/armv4cpuid.pl
@@ -21,7 +21,6 @@ open OUT,"| \"$^X\" $xlate $flavour $output";
 $code.=<<___;
 #include "arm_arch.h"
 
-.text
 #if defined(__thumb2__) && !defined(__APPLE__)
 .syntax	unified
 .thumb
@@ -30,6 +29,8 @@ $code.=<<___;
 #undef	__thumb2__
 #endif
 
+.text
+
 .align	5
 .global	OPENSSL_atomic_add
 .type	OPENSSL_atomic_add,%function
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl
index 442ae46..0bf6f63 100644
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -57,13 +57,14 @@ if ($flavour && $flavour ne "void") {
 $code=<<___;
 #include "arm_arch.h"
 
-.text
 #if defined(__thumb2__)
 .syntax	unified
 .thumb
 #else
 .code	32
 #endif
+
+.text
 ___
 ################
 # private interface to mul_1x1_ialu
@@ -176,11 +177,13 @@ bn_GF2m_mul_2x2:
 #if __ARM_MAX_ARCH__>=7
 	stmdb	sp!,{r10,lr}
 	ldr	r12,.LOPENSSL_armcap
+# if !defined(_WIN32)
 	adr	r10,.LOPENSSL_armcap
 	ldr	r12,[r12,r10]
-#ifdef	__APPLE__
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
 	ldr	r12,[r12]
-#endif
+# endif
 	tst	r12,#ARMV7_NEON
 	itt	ne
 	ldrne	r10,[sp],#8
@@ -310,7 +313,11 @@ $code.=<<___;
 #if __ARM_MAX_ARCH__>=7
 .align	5
 .LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
 .word	OPENSSL_armcap_P-.
+# endif
 #endif
 .asciz	"GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
 .align	5
diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl
index b4f6c06..7e0a4d8 100644
--- a/crypto/bn/asm/armv4-mont.pl
+++ b/crypto/bn/asm/armv4-mont.pl
@@ -97,7 +97,6 @@ $_num="$num,#15*4";	$_bpend=$_num;
 $code=<<___;
 #include "arm_arch.h"
 
-.text
 #if defined(__thumb2__)
 .syntax	unified
 .thumb
@@ -105,10 +104,16 @@ $code=<<___;
 .code	32
 #endif
 
+.text
+
 #if __ARM_MAX_ARCH__>=7
 .align	5
 .LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
 .word	OPENSSL_armcap_P-.Lbn_mul_mont
+# endif
 #endif
 
 .global	bn_mul_mont
@@ -122,12 +127,14 @@ bn_mul_mont:
 #if __ARM_MAX_ARCH__>=7
 	tst	ip,#7
 	bne	.Lialu
-	adr	r0,.Lbn_mul_mont
-	ldr	r2,.LOPENSSL_armcap
+	ldr	r0,.LOPENSSL_armcap
+#if !defined(_WIN32)
+	adr	r2,.Lbn_mul_mont
 	ldr	r0,[r0,r2]
-#ifdef	__APPLE__
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
 	ldr	r0,[r0]
-#endif
+# endif
 	tst	r0,#ARMV7_NEON		@ NEON available?
 	ldmia	sp, {r0,r2}
 	beq	.Lialu
diff --git a/crypto/chacha/asm/chacha-armv4.pl b/crypto/chacha/asm/chacha-armv4.pl
index 9bbfc6b..c440296 100755
--- a/crypto/chacha/asm/chacha-armv4.pl
+++ b/crypto/chacha/asm/chacha-armv4.pl
@@ -171,7 +171,6 @@ my @ret;
 $code.=<<___;
 #include "arm_arch.h"
 
-.text
 #if defined(__thumb2__) || defined(__clang__)
 .syntax	unified
 #endif
@@ -185,6 +184,8 @@ $code.=<<___;
 #define ldrhsb	ldrbhs
 #endif
 
+.text
+
 .align	5
 .Lsigma:
 .long	0x61707865,0x3320646e,0x79622d32,0x6b206574	@ endian-neutral
@@ -192,7 +193,11 @@ $code.=<<___;
 .long	1,0,0,0
 #if __ARM_MAX_ARCH__>=7
 .LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
 .word   OPENSSL_armcap_P-.LChaCha20_ctr32
+# endif
 #else
 .word	-1
 #endif
@@ -219,8 +224,10 @@ ChaCha20_ctr32:
 	cmp	r2,#192			@ test len
 	bls	.Lshort
 	ldr	r4,[r14,#-32]
+# if !defined(_WIN32)
 	ldr	r4,[r14,r4]
-# ifdef	__APPLE__
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
 	ldr	r4,[r4]
 # endif
 	tst	r4,#ARMV7_NEON
diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl b/crypto/ec/asm/ecp_nistz256-armv4.pl
index 4005a6f..43a675b 100755
--- a/crypto/ec/asm/ecp_nistz256-armv4.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv4.pl
@@ -51,7 +51,6 @@ if ($flavour && $flavour ne "void") {
 $code.=<<___;
 #include "arm_arch.h"
 
-.text
 #if defined(__thumb2__)
 .syntax	unified
 .thumb
@@ -80,6 +79,7 @@ close TABLE;
 die "insane number of elements" if ($#arr != 64*16*37-1);
 
 $code.=<<___;
+.rodata
 .globl	ecp_nistz256_precomputed
 .type	ecp_nistz256_precomputed,%object
 .align	12
@@ -104,6 +104,8 @@ for(1..37) {
 }
 $code.=<<___;
 .size	ecp_nistz256_precomputed,.-ecp_nistz256_precomputed
+
+.text
 .align	5
 .LRR:	@ 2^512 mod P precomputed for NIST P256 polynomial
 .long	0x00000003, 0x00000000, 0xffffffff, 0xfffffffb
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl
index 759d29c..1391b1b 100644
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -142,7 +142,6 @@ ___
 $code=<<___;
 #include "arm_arch.h"
 
-.text
 #if defined(__thumb2__) || defined(__clang__)
 .syntax	unified
 #define ldrplb  ldrbpl
@@ -154,6 +153,8 @@ $code=<<___;
 .code	32
 #endif
 
+.text
+
 .type	rem_4bit,%object
 .align	5
 rem_4bit:
diff --git a/crypto/modes/asm/ghashv8-armx.pl b/crypto/modes/asm/ghashv8-armx.pl
index ea05950..e891583 100644
--- a/crypto/modes/asm/ghashv8-armx.pl
+++ b/crypto/modes/asm/ghashv8-armx.pl
@@ -66,18 +66,26 @@ $inc="x12";
 {
 my ($Xl,$Xm,$Xh,$IN)=map("q$_",(0..3));
 my ($t0,$t1,$t2,$xC2,$H,$Hhl,$H2)=map("q$_",(8..14));
+my $_byte = ($flavour =~ /win/ ? "DCB" : ".byte");
 
 $code=<<___;
 #include "arm_arch.h"
 
 #if __ARM_MAX_ARCH__>=7
-.text
 ___
-$code.=".arch	armv8-a+crypto\n"	if ($flavour =~ /64/);
-$code.=<<___				if ($flavour !~ /64/);
+$code.=".arch	armv8-a+crypto\n.text\n"	if ($flavour =~ /64/);
+$code.=<<___					if ($flavour !~ /64/);
 .fpu	neon
-.code	32
-#undef	__thumb2__
+#ifdef __thumb2__
+.syntax        unified
+.thumb
+# define INST(a,b,c,d) $_byte  c,0xef,a,b
+#else
+.code  32
+# define INST(a,b,c,d) $_byte  a,b,c,0xf2
+#endif
+
+.text
 ___
 
 ################################################################################
@@ -752,7 +760,7 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	    # since ARMv7 instructions are always encoded little-endian.
 	    # correct solution is to use .inst directive, but older
 	    # assemblers don't implement it:-(
-	    sprintf ".byte\t0x%02x,0x%02x,0x%02x,0x%02x\t@ %s %s",
+	    sprintf "INST(0x%02x,0x%02x,0x%02x,0x%02x)\t@ %s %s",
 			$word&0xff,($word>>8)&0xff,
 			($word>>16)&0xff,($word>>24)&0xff,
 			$mnemonic,$arg;
@@ -767,13 +775,17 @@ if ($flavour =~ /64/) {			######## 64-bit code
 	# fix up remaining new-style suffixes
 	s/\],#[0-9]+/]!/o;
 
-	s/cclr\s+([^,]+),\s*([a-z]+)/mov$2	$1,#0/o			or
+	s/cclr\s+([^,]+),\s*([a-z]+)/mov.$2	$1,#0/o			or
 	s/vdup\.32\s+(.*)/unvdup32($1)/geo				or
 	s/v?(pmull2?)\.p64\s+(.*)/unvpmullp64($1,$2)/geo		or
 	s/\bq([0-9]+)#(lo|hi)/sprintf "d%d",2*$1+($2 eq "hi")/geo	or
 	s/^(\s+)b\./$1b/o						or
 	s/^(\s+)ret/$1bx\tlr/o;
 
+	if (s/^(\s+)mov\.([a-z]+)/$1mov$2/) {
+	    print "     it      $2\n";
+	}
+
 	print $_,"\n";
     }
 }
diff --git a/crypto/perlasm/arm-xlate.pl b/crypto/perlasm/arm-xlate.pl
index d78a8ba..b953f1f 100755
--- a/crypto/perlasm/arm-xlate.pl
+++ b/crypto/perlasm/arm-xlate.pl
@@ -28,6 +28,13 @@ my $fpu = sub {
     if ($flavour =~ /linux/)	{ ".fpu\t".join(',', at _); }
     else			{ ""; }
 };
+my $rodata = sub {
+    SWITCH: for ($flavour) {
+	/linux/		&& return ".section\t.rodata";
+	/ios/		&& return ".section\t__TEXT,__const";
+	last;
+    }
+};
 my $hidden = sub {
     if ($flavour =~ /ios/)	{ ".private_extern\t".join(',', at _); }
     else			{ ".hidden\t".join(',', at _); }
diff --git a/crypto/poly1305/asm/poly1305-armv4.pl b/crypto/poly1305/asm/poly1305-armv4.pl
index 7003cc7..38622af 100755
--- a/crypto/poly1305/asm/poly1305-armv4.pl
+++ b/crypto/poly1305/asm/poly1305-armv4.pl
@@ -48,7 +48,6 @@ if ($flavour && $flavour ne "void") {
 $code.=<<___;
 #include "arm_arch.h"
 
-.text
 #if defined(__thumb2__)
 .syntax	unified
 .thumb
@@ -56,6 +55,8 @@ $code.=<<___;
 .code	32
 #endif
 
+.text
+
 .globl	poly1305_emit
 .globl	poly1305_blocks
 .globl	poly1305_init
@@ -100,8 +101,10 @@ poly1305_init:
 	and	r4,r4,r10
 
 #if	__ARM_MAX_ARCH__>=7
+# if !defined(_WIN32)
 	ldr	r12,[r11,r12]		@ OPENSSL_armcap_P
-# ifdef	__APPLE__
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
 	ldr	r12,[r12]
 # endif
 #endif
@@ -116,31 +119,21 @@ poly1305_init:
 
 #if	__ARM_MAX_ARCH__>=7
 	tst	r12,#ARMV7_NEON		@ check for NEON
-# ifdef	__APPLE__
-	adr	r9,poly1305_blocks_neon
-	adr	r11,poly1305_blocks
-#  ifdef __thumb2__
-	it	ne
-#  endif
+# ifdef	__thumb2__
+	adr	r9,.Lpoly1305_blocks_neon
+	adr	r11,.Lpoly1305_blocks
+	adr	r12,.Lpoly1305_emit
+	adr	r10,.Lpoly1305_emit_neon
+	itt	ne
 	movne	r11,r9
-	adr	r12,poly1305_emit
-	adr	r10,poly1305_emit_neon
-#  ifdef __thumb2__
-	it	ne
-#  endif
 	movne	r12,r10
+	orr	r11,r11,#1	@ thumb-ify address
+	orr	r12,r12,#1
 # else
-#  ifdef __thumb2__
-	itete	eq
-#  endif
-	addeq	r12,r11,#(poly1305_emit-.Lpoly1305_init)
-	addne	r12,r11,#(poly1305_emit_neon-.Lpoly1305_init)
-	addeq	r11,r11,#(poly1305_blocks-.Lpoly1305_init)
-	addne	r11,r11,#(poly1305_blocks_neon-.Lpoly1305_init)
-# endif
-# ifdef	__thumb2__
-	orr	r12,r12,#1	@ thumb-ify address
-	orr	r11,r11,#1
+	addeq	r12,r11,#(.Lpoly1305_emit-.Lpoly1305_init)
+	addne	r12,r11,#(.Lpoly1305_emit_neon-.Lpoly1305_init)
+	addeq	r11,r11,#(.Lpoly1305_blocks-.Lpoly1305_init)
+	addne	r11,r11,#(.Lpoly1305_blocks_neon-.Lpoly1305_init)
 # endif
 #endif
 	ldrb	r9,[$inp,#11]
@@ -352,6 +345,7 @@ $code.=<<___;
 .type	poly1305_emit,%function
 .align	5
 poly1305_emit:
+.Lpoly1305_emit:
 	stmdb	sp!,{r4-r11}
 .Lpoly1305_emit_enter:
 
@@ -671,6 +665,7 @@ poly1305_init_neon:
 .type	poly1305_blocks_neon,%function
 .align	5
 poly1305_blocks_neon:
+.Lpoly1305_blocks_neon:
 	ldr	ip,[$ctx,#36]		@ is_base2_26
 	ands	$len,$len,#-16
 	beq	.Lno_data_neon
@@ -1157,6 +1152,7 @@ poly1305_blocks_neon:
 .type	poly1305_emit_neon,%function
 .align	5
 poly1305_emit_neon:
+.Lpoly1305_emit_neon:
 	ldr	ip,[$ctx,#36]		@ is_base2_26
 
 	stmdb	sp!,{r4-r11}
@@ -1229,7 +1225,11 @@ poly1305_emit_neon:
 .Lzeros:
 .long	0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0
 .LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
 .word	OPENSSL_armcap_P-.Lpoly1305_init
+# endif
 #endif
 ___
 }	}
diff --git a/crypto/sha/asm/keccak1600-armv4.pl b/crypto/sha/asm/keccak1600-armv4.pl
index 44504fa..be411ed 100755
--- a/crypto/sha/asm/keccak1600-armv4.pl
+++ b/crypto/sha/asm/keccak1600-armv4.pl
@@ -113,8 +113,6 @@ my @T = map([ 8*$_, 8*($_+1), 8*($_+2), 8*($_+3), 8*($_+4) ], (30,35,40,45,50));
 $code.=<<___;
 #include "arm_arch.h"
 
-.text
-
 #if defined(__thumb2__)
 .syntax	unified
 .thumb
@@ -122,6 +120,8 @@ $code.=<<___;
 .code	32
 #endif
 
+.text
+
 .type	iotas32, %object
 .align	5
 iotas32:
@@ -691,7 +691,14 @@ ___
 $code.=<<___;
 	blo	.Lround2x
 
+#if __ARM_ARCH__>=5
 	ldr	pc,[sp,#440]
+#else
+	ldr	lr,[sp,#440]
+	tst	lr,#1
+	moveq	pc,lr		@ be binary compatible with V4, yet
+	bx	lr		@ interoperable with Thumb ISA:-)
+#endif
 .size	KeccakF1600_int,.-KeccakF1600_int
 
 .type	KeccakF1600, %function
@@ -730,7 +737,14 @@ KeccakF1600:
 	stmia	@E[1], {@C[0]- at C[9]}
 
 	add	sp,sp,#440+20
+#if __ARM_ARCH__>=5
 	ldmia	sp!,{r4-r11,pc}
+#else
+	ldmia	sp!,{r4-r11,lr}
+	tst	lr,#1
+	moveq	pc,lr		@ be binary compatible with V4, yet
+	bx	lr		@ interoperable with Thumb ISA:-)
+#endif
 .size	KeccakF1600,.-KeccakF1600
 ___
 { my ($A_flat,$inp,$len,$bsz) = map("r$_",(10..12,14));
@@ -905,7 +919,14 @@ SHA3_absorb:
 .Labsorb_abort:
 	add	sp,sp,#456+32
 	mov	r0,$len			@ return value
+#if __ARM_ARCH__>=5
 	ldmia	sp!,{r4-r12,pc}
+#else
+	ldmia	sp!,{r4-r12,lr}
+	tst	lr,#1
+	moveq	pc,lr		@ be binary compatible with V4, yet
+	bx	lr		@ interoperable with Thumb ISA:-)
+#endif
 .size	SHA3_absorb,.-SHA3_absorb
 ___
 }
@@ -1055,7 +1076,14 @@ SHA3_squeeze:
 .align	4
 .Lsqueeze_done:
 	add	sp,sp,#24
+#if __ARM_ARCH__>=5
 	ldmia	sp!,{r4-r10,pc}
+#else
+	ldmia	sp!,{r4-r10,lr}
+	tst	lr,#1
+	moveq	pc,lr		@ be binary compatible with V4, yet
+	bx	lr		@ interoperable with Thumb ISA:-)
+#endif
 .size	SHA3_squeeze,.-SHA3_squeeze
 ___
 }
@@ -1265,7 +1293,7 @@ KeccakF1600_neon:
 	subs	r3, r3, #1
 	bne	.Loop_neon
 
-	bx	lr
+	ret
 .size	KeccakF1600_neon,.-KeccakF1600_neon
 
 .global	SHA3_absorb_neon
diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl
index cdacbd4..cd0b95a 100644
--- a/crypto/sha/asm/sha1-armv4-large.pl
+++ b/crypto/sha/asm/sha1-armv4-large.pl
@@ -187,7 +187,6 @@ ___
 $code=<<___;
 #include "arm_arch.h"
 
-.text
 #if defined(__thumb2__)
 .syntax	unified
 .thumb
@@ -195,6 +194,8 @@ $code=<<___;
 .code	32
 #endif
 
+.text
+
 .global	sha1_block_data_order
 .type	sha1_block_data_order,%function
 
@@ -202,12 +203,14 @@ $code=<<___;
 sha1_block_data_order:
 #if __ARM_MAX_ARCH__>=7
 .Lsha1_block:
-	adr	r3,.Lsha1_block
 	ldr	r12,.LOPENSSL_armcap
+# if !defined(_WIN32)
+	adr	r3,.Lsha1_block
 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
-#ifdef	__APPLE__
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
 	ldr	r12,[r12]
-#endif
+# endif
 	tst	r12,#ARMV8_SHA1
 	bne	.LARMv8
 	tst	r12,#ARMV7_NEON
@@ -311,7 +314,11 @@ $code.=<<___;
 .LK_60_79:	.word	0xca62c1d6
 #if __ARM_MAX_ARCH__>=7
 .LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
 .word	OPENSSL_armcap_P-.Lsha1_block
+# endif
 #endif
 .asciz	"SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
 .align	5
@@ -613,14 +620,15 @@ my ($ABCD,$E,$E0,$E1)=map("q$_",(0..3));
 my @MSG=map("q$_",(4..7));
 my @Kxx=map("q$_",(8..11));
 my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
+my $_byte = ($flavour =~ /win/ ? "DCB" : ".byte");
 
 $code.=<<___;
 #if __ARM_MAX_ARCH__>=7
 
 # if defined(__thumb2__)
-#  define INST(a,b,c,d)	.byte	c,d|0xf,a,b
+#  define INST(a,b,c,d)	$_byte	c,d|0xf,a,b
 # else
-#  define INST(a,b,c,d)	.byte	a,b,c,d|0x10
+#  define INST(a,b,c,d)	$_byte	a,b,c,d|0x10
 # endif
 
 .type	sha1_block_data_order_armv8,%function
diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl
index e0512b4..cd01fcb 100644
--- a/crypto/sha/asm/sha256-armv4.pl
+++ b/crypto/sha/asm/sha256-armv4.pl
@@ -181,7 +181,6 @@ $code=<<___;
 # define __ARM_MAX_ARCH__ 7
 #endif
 
-.text
 #if defined(__thumb2__)
 .syntax unified
 .thumb
@@ -189,6 +188,8 @@ $code=<<___;
 .code   32
 #endif
 
+.text
+
 .type	K256,%object
 .align	5
 K256:
@@ -212,7 +213,11 @@ K256:
 .word	0				@ terminator
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 .LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
 .word	OPENSSL_armcap_P-.Lsha256_block_data_order
+# endif
 #endif
 .align	5
 
@@ -227,10 +232,12 @@ sha256_block_data_order:
 #endif
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 	ldr	r12,.LOPENSSL_armcap
+# if !defined(_WIN32)
 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
-#ifdef	__APPLE__
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
 	ldr	r12,[r12]
-#endif
+# endif
 	tst	r12,#ARMV8_SHA256
 	bne	.LARMv8
 	tst	r12,#ARMV7_NEON
@@ -598,14 +605,15 @@ my ($ABCD,$EFGH,$abcd)=map("q$_",(0..2));
 my @MSG=map("q$_",(8..11));
 my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
 my $Ktbl="r3";
+my $_byte = ($flavour =~ /win/ ? "DCB" : ".byte");
 
 $code.=<<___;
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 
 # if defined(__thumb2__)
-#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
+#  define INST(a,b,c,d)	$_byte	c,d|0xc,a,b
 # else
-#  define INST(a,b,c,d)	.byte	a,b,c,d
+#  define INST(a,b,c,d)	$_byte	a,b,c,d
 # endif
 
 .type	sha256_block_data_order_armv8,%function
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl
index 795c4cb..39c943b 100644
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -196,6 +196,9 @@ $code.=<<___;
 	add	$Ktbl,$Ktbl,#8
 ___
 }
+
+my $_word = ($flavour =~ /win/ ? "DCDU" : ".word");
+
 $code=<<___;
 #ifndef __KERNEL__
 # include "arm_arch.h"
@@ -211,14 +214,13 @@ $code=<<___;
 #ifdef __ARMEL__
 # define LO 0
 # define HI 4
-# define WORD64(hi0,lo0,hi1,lo1)	.word	lo0,hi0, lo1,hi1
+# define WORD64(hi0,lo0,hi1,lo1)	$_word	lo0,hi0, lo1,hi1
 #else
 # define HI 0
 # define LO 4
-# define WORD64(hi0,lo0,hi1,lo1)	.word	hi0,lo0, hi1,lo1
+# define WORD64(hi0,lo0,hi1,lo1)	$_word	hi0,lo0, hi1,lo1
 #endif
 
-.text
 #if defined(__thumb2__)
 .syntax unified
 .thumb
@@ -227,6 +229,8 @@ $code=<<___;
 .code	32
 #endif
 
+.text
+
 .type	K512,%object
 .align	5
 K512:
@@ -273,7 +277,11 @@ WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
 .size	K512,.-K512
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 .LOPENSSL_armcap:
+# ifdef	_WIN32
+.word	OPENSSL_armcap_P
+# else
 .word	OPENSSL_armcap_P-.Lsha512_block_data_order
+# endif
 .skip	32-4
 #else
 .skip	32
@@ -290,10 +298,12 @@ sha512_block_data_order:
 #endif
 #if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
 	ldr	r12,.LOPENSSL_armcap
+# if !defined(_WIN32)
 	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
-#ifdef	__APPLE__
+# endif
+# if defined(__APPLE__) || defined(_WIN32)
 	ldr	r12,[r12]
-#endif
+# endif
 	tst	r12,#ARMV7_NEON
 	bne	.LNEON
 #endif


More information about the openssl-commits mailing list