[openssl-commits] [openssl] OpenSSL source code branch master updated. c1669e1c205dc8e695fb0c10a655f434e758b9f7
Andy Polyakov
appro at openssl.org
Sun Jan 4 22:54:13 UTC 2015
This is an automated email from the git hooks/post-receive script. It was
generated because a ref change was pushed to the repository containing
the project "OpenSSL source code".
The branch, master has been updated
via c1669e1c205dc8e695fb0c10a655f434e758b9f7 (commit)
from 9e557ab2624d5c5e8d799c123f5e8211664d8845 (commit)
Those revisions listed above that are new to this repository have
not appeared on any other notification email; so we list those
revisions in full, below.
- Log -----------------------------------------------------------------
commit c1669e1c205dc8e695fb0c10a655f434e758b9f7
Author: Andy Polyakov <appro at openssl.org>
Date: Fri Nov 7 22:48:22 2014 +0100
Remove inconsistency in ARM support.
This facilitates "universal" builds, ones that target multiple
architectures, e.g. ARMv5 through ARMv7. See commentary in
Configure for details.
Reviewed-by: Ard Biesheuvel <ard.biesheuvel at linaro.org>
Reviewed-by: Matt Caswell <matt at openssl.org>
-----------------------------------------------------------------------
Summary of changes:
Configure | 30 +++++++-
crypto/aes/asm/aesv8-armx.pl | 8 ++-
crypto/aes/asm/bsaes-armv7.pl | 8 ++-
crypto/arm_arch.h | 12 ++++
crypto/armcap.c | 7 +-
crypto/armv4cpuid.S | 140 +++++++++++++++++-------------------
crypto/bn/asm/armv4-gf2m.pl | 128 +++++++++++++++++----------------
crypto/bn/asm/armv4-mont.pl | 9 +--
crypto/evp/e_aes.c | 2 +-
crypto/modes/asm/ghash-armv4.pl | 3 +-
crypto/modes/gcm128.c | 2 +-
crypto/sha/asm/sha1-armv4-large.pl | 11 ++-
crypto/sha/asm/sha256-armv4.pl | 11 ++-
crypto/sha/asm/sha512-armv4.pl | 11 ++-
14 files changed, 223 insertions(+), 159 deletions(-)
diff --git a/Configure b/Configure
index 5c4a460..6246822 100755
--- a/Configure
+++ b/Configure
@@ -350,8 +350,34 @@ my %table=(
# throw in -D[BL]_ENDIAN, whichever appropriate...
"linux-generic32","gcc:-DTERMIO -O3 -fomit-frame-pointer -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${no_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-ppc", "gcc:-DB_ENDIAN -DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_RISC1 DES_UNROLL:${ppc32_asm}:linux32:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
-# It's believed that majority of ARM toolchains predefine appropriate -march.
-# If you compiler does not, do complement config command line with one!
+
+#######################################################################
+# Note that -march is not among compiler options in below linux-armv4
+# target line. Not specifying one is intentional to give you choice to:
+#
+# a) rely on your compiler default by not specifying one;
+# b) specify your target platform explicitly for optimal performance,
+# e.g. -march=armv6 or -march=armv7-a;
+# c) build "universal" binary that targets *range* of platforms by
+# specifying minimum and maximum supported architecture;
+#
+# As for c) option. It actually makes no sense to specify maximum to be
+# less than ARMv7, because it's the least requirement for run-time
+# switch between platform-specific code paths. And without run-time
+# switch performance would be equivalent to one for minimum. Secondly,
+# there are some natural limitations that you'd have to accept and
+# respect. Most notably you can *not* build "universal" binary for
+# big-endian platform. This is because ARMv7 processor always picks
+# instructions in little-endian order. Another similar limitation is
+# that -mthumb can't "cross" -march=armv6t2 boundary, because that's
+# where it became Thumb-2. Well, this limitation is a bit artificial,
+# because it's not really impossible, but it's deemed too tricky to
+# support. And of course you have to be sure that your binutils are
+# actually up to the task of handling maximum target platform. With all
+# this in mind here is an example of how to configure "universal" build:
+#
+# ./Configure linux-armv4 -march=armv6 -D__ARM_MAX_ARCH__=8
+#
"linux-armv4", "gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:BN_LLONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${armv4_asm}:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
"linux-aarch64","gcc:-DTERMIO -O3 -Wall::-D_REENTRANT::-ldl:SIXTY_FOUR_BIT_LONG RC4_CHAR RC4_CHUNK DES_INT DES_UNROLL BF_PTR:${aarch64_asm}:linux64:dlfcn:linux-shared:-fPIC::.so.\$(SHLIB_MAJOR).\$(SHLIB_MINOR)",
# Configure script adds minimally required -march for assembly support,
diff --git a/crypto/aes/asm/aesv8-armx.pl b/crypto/aes/asm/aesv8-armx.pl
index 923c7f6..1e93f86 100755
--- a/crypto/aes/asm/aesv8-armx.pl
+++ b/crypto/aes/asm/aesv8-armx.pl
@@ -35,11 +35,13 @@ $prefix="aes_v8";
$code=<<___;
#include "arm_arch.h"
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.text
___
-$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
-$code.=".fpu neon\n.code 32\n" if ($flavour !~ /64/);
+$code.=".arch armv8-a+crypto\n" if ($flavour =~ /64/);
+$code.=".arch armv7-a\n.fpu neon\n.code 32\n" if ($flavour !~ /64/);
+ #^^^^^^ this is done to simplify adoption by not depending
+ # on latest binutils.
# Assembler mnemonics are an eclectic mix of 32- and 64-bit syntax,
# NEON is mostly 32-bit mnemonics, integer - mostly 64. Goal is to
diff --git a/crypto/aes/asm/bsaes-armv7.pl b/crypto/aes/asm/bsaes-armv7.pl
index f3d96d9..fcc81d1 100644
--- a/crypto/aes/asm/bsaes-armv7.pl
+++ b/crypto/aes/asm/bsaes-armv7.pl
@@ -702,13 +702,17 @@ $code.=<<___;
# define BSAES_ASM_EXTENDED_KEY
# define XTS_CHAIN_TWEAK
# define __ARM_ARCH__ __LINUX_ARM_ARCH__
+# define __ARM_MAX_ARCH__ __LINUX_ARM_ARCH__
#endif
#ifdef __thumb__
# define adrl adr
#endif
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
.text
.syntax unified @ ARMv7-capable assembler is expected to handle this
#ifdef __thumb2__
@@ -717,8 +721,6 @@ $code.=<<___;
.code 32
#endif
-.fpu neon
-
.type _bsaes_decrypt8,%function
.align 4
_bsaes_decrypt8:
diff --git a/crypto/arm_arch.h b/crypto/arm_arch.h
index d406c8c..9a125d8 100644
--- a/crypto/arm_arch.h
+++ b/crypto/arm_arch.h
@@ -48,6 +48,18 @@
# endif
#endif
+#if !defined(__ARM_MAX_ARCH__)
+# define __ARM_MAX_ARCH__ __ARM_ARCH__
+#endif
+
+#if __ARM_MAX_ARCH__<__ARM_ARCH__
+# error "__ARM_MAX_ARCH__ can't be less than __ARM_ARCH__"
+#elif __ARM_MAX_ARCH__!=__ARM_ARCH__
+# if __ARM_ARCH__<7 && __ARM_MAX_ARCH__>=7 && defined(__ARMEB__)
+# error "can't build universal big-endian binary"
+# endif
+#endif
+
#if !__ASSEMBLER__
extern unsigned int OPENSSL_armcap_P;
#endif
diff --git a/crypto/armcap.c b/crypto/armcap.c
index 7e46d07..24f7a08 100644
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@@ -7,8 +7,12 @@
#include "arm_arch.h"
-unsigned int OPENSSL_armcap_P;
+unsigned int OPENSSL_armcap_P=0;
+#if __ARM_MAX_ARCH__<7
+void OPENSSL_cpuid_setup(void) {}
+unsigned long OPENSSL_rdtsc(void) { return 0; }
+#else
static sigset_t all_masked;
static sigjmp_buf ill_jmp;
@@ -155,3 +159,4 @@ void OPENSSL_cpuid_setup(void)
sigaction (SIGILL,&ill_oact,NULL);
sigprocmask(SIG_SETMASK,&oset,NULL);
}
+#endif
diff --git a/crypto/armv4cpuid.S b/crypto/armv4cpuid.S
index 0059311..65010ae 100644
--- a/crypto/armv4cpuid.S
+++ b/crypto/armv4cpuid.S
@@ -3,69 +3,6 @@
.text
.code 32
-@ Special note about using .byte directives to encode instructions.
-@ Initial reason for hand-coding instructions was to allow module to
-@ be compilable by legacy tool-chains. At later point it was pointed
-@ out that since ARMv7, instructions are always encoded in little-endian
-@ order, therefore one has to opt for endian-neutral presentation.
-@ Contemporary tool-chains offer .inst directive for this purpose,
-@ but not legacy ones. Therefore .byte. But there is an exception,
-@ namely ARMv7-R profile still allows for big-endian encoding even for
-@ instructions. This raises the question what if probe instructions
-@ appear executable to such processor operating in big-endian order?
-@ They have to be chosen in a way that avoids this problem. As failed
-@ NEON probe disables a number of other probes we have to ensure that
-@ only NEON probe instruction doesn't appear executable in big-endian
-@ order, therefore 'vorr q8,q8,q8', and not some other register. The
-@ only probe that is not bypassed on failed NEON probe is _armv7_tick,
-@ where you'll spot 'mov r0,r6' that serves this purpose. Basic idea is
-@ that if fetched in alternative byte oder instruction should crash to
-@ denote lack of probed capability...
-
-.align 5
-.global _armv7_neon_probe
-.type _armv7_neon_probe,%function
-_armv7_neon_probe:
- .byte 0xf0,0x01,0x60,0xf2 @ vorr q8,q8,q8
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv7_neon_probe,.-_armv7_neon_probe
-
-.global _armv7_tick
-.type _armv7_tick,%function
-_armv7_tick:
- .byte 0x06,0x00,0xa0,0xe1 @ mov r0,r6
- .byte 0x1e,0x0f,0x51,0xec @ mrrc p15,1,r0,r1,c14 @ CNTVCT
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
- nop
-.size _armv7_tick,.-_armv7_tick
-
-.global _armv8_aes_probe
-.type _armv8_aes_probe,%function
-_armv8_aes_probe:
- .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv8_aes_probe,.-_armv8_aes_probe
-
-.global _armv8_sha1_probe
-.type _armv8_sha1_probe,%function
-_armv8_sha1_probe:
- .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv8_sha1_probe,.-_armv8_sha1_probe
-
-.global _armv8_sha256_probe
-.type _armv8_sha256_probe,%function
-_armv8_sha256_probe:
- .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv8_sha256_probe,.-_armv8_sha256_probe
-.global _armv8_pmull_probe
-.type _armv8_pmull_probe,%function
-_armv8_pmull_probe:
- .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
- .byte 0x1e,0xff,0x2f,0xe1 @ bx lr
-.size _armv8_pmull_probe,.-_armv8_pmull_probe
-
.align 5
.global OPENSSL_atomic_add
.type OPENSSL_atomic_add,%function
@@ -139,30 +76,81 @@ OPENSSL_cleanse:
#endif
.size OPENSSL_cleanse,.-OPENSSL_cleanse
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.align 5
+.global _armv7_neon_probe
+.type _armv7_neon_probe,%function
+_armv7_neon_probe:
+ vorr q0,q0,q0
+ bx lr
+.size _armv7_neon_probe,.-_armv7_neon_probe
+
+.global _armv7_tick
+.type _armv7_tick,%function
+_armv7_tick:
+ mrrc p15,1,r0,r1,c14 @ CNTVCT
+ bx lr
+.size _armv7_tick,.-_armv7_tick
+
+.global _armv8_aes_probe
+.type _armv8_aes_probe,%function
+_armv8_aes_probe:
+ .byte 0x00,0x03,0xb0,0xf3 @ aese.8 q0,q0
+ bx lr
+.size _armv8_aes_probe,.-_armv8_aes_probe
+
+.global _armv8_sha1_probe
+.type _armv8_sha1_probe,%function
+_armv8_sha1_probe:
+ .byte 0x40,0x0c,0x00,0xf2 @ sha1c.32 q0,q0,q0
+ bx lr
+.size _armv8_sha1_probe,.-_armv8_sha1_probe
+
+.global _armv8_sha256_probe
+.type _armv8_sha256_probe,%function
+_armv8_sha256_probe:
+ .byte 0x40,0x0c,0x00,0xf3 @ sha256h.32 q0,q0,q0
+ bx lr
+.size _armv8_sha256_probe,.-_armv8_sha256_probe
+.global _armv8_pmull_probe
+.type _armv8_pmull_probe,%function
+_armv8_pmull_probe:
+ .byte 0x00,0x0e,0xa0,0xf2 @ vmull.p64 q0,d0,d0
+ bx lr
+.size _armv8_pmull_probe,.-_armv8_pmull_probe
+#endif
+
.global OPENSSL_wipe_cpu
.type OPENSSL_wipe_cpu,%function
OPENSSL_wipe_cpu:
+#if __ARM_MAX_ARCH__>=7
ldr r0,.LOPENSSL_armcap
adr r1,.LOPENSSL_armcap
ldr r0,[r1,r0]
+#endif
eor r2,r2,r2
eor r3,r3,r3
eor ip,ip,ip
+#if __ARM_MAX_ARCH__>=7
tst r0,#1
beq .Lwipe_done
- .byte 0x50,0x01,0x00,0xf3 @ veor q0, q0, q0
- .byte 0x52,0x21,0x02,0xf3 @ veor q1, q1, q1
- .byte 0x54,0x41,0x04,0xf3 @ veor q2, q2, q2
- .byte 0x56,0x61,0x06,0xf3 @ veor q3, q3, q3
- .byte 0xf0,0x01,0x40,0xf3 @ veor q8, q8, q8
- .byte 0xf2,0x21,0x42,0xf3 @ veor q9, q9, q9
- .byte 0xf4,0x41,0x44,0xf3 @ veor q10, q10, q10
- .byte 0xf6,0x61,0x46,0xf3 @ veor q11, q11, q11
- .byte 0xf8,0x81,0x48,0xf3 @ veor q12, q12, q12
- .byte 0xfa,0xa1,0x4a,0xf3 @ veor q13, q13, q13
- .byte 0xfc,0xc1,0x4c,0xf3 @ veor q14, q14, q14
- .byte 0xfe,0xe1,0x4e,0xf3 @ veor q14, q14, q14
+ veor q0, q0, q0
+ veor q1, q1, q1
+ veor q2, q2, q2
+ veor q3, q3, q3
+ veor q8, q8, q8
+ veor q9, q9, q9
+ veor q10, q10, q10
+ veor q11, q11, q11
+ veor q12, q12, q12
+ veor q13, q13, q13
+ veor q14, q14, q14
+ veor q15, q15, q15
.Lwipe_done:
+#endif
mov r0,sp
#if __ARM_ARCH__>=5
bx lr
@@ -200,8 +188,10 @@ OPENSSL_instrument_bus2:
.size OPENSSL_instrument_bus2,.-OPENSSL_instrument_bus2
.align 5
+#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-.LOPENSSL_armcap
+#endif
#if __ARM_ARCH__>=6
.align 5
#else
diff --git a/crypto/bn/asm/armv4-gf2m.pl b/crypto/bn/asm/armv4-gf2m.pl
index b781afb..8f529c9 100644
--- a/crypto/bn/asm/armv4-gf2m.pl
+++ b/crypto/bn/asm/armv4-gf2m.pl
@@ -40,10 +40,6 @@ $code=<<___;
.text
.code 32
-
-#if __ARM_ARCH__>=7
-.fpu neon
-#endif
___
################
# private interface to mul_1x1_ialu
@@ -142,20 +138,80 @@ ___
# BN_ULONG a1,BN_ULONG a0,
# BN_ULONG b1,BN_ULONG b0); # r[3..0]=a1a0·b1b0
{
-my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
-my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
-
$code.=<<___;
.global bn_GF2m_mul_2x2
.type bn_GF2m_mul_2x2,%function
.align 5
bn_GF2m_mul_2x2:
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
.Lpic: ldr r12,[pc,r12]
tst r12,#1
- beq .Lialu
+ bne .LNEON
+#endif
+___
+$ret="r10"; # reassigned 1st argument
+$code.=<<___;
+ stmdb sp!,{r4-r10,lr}
+ mov $ret,r0 @ reassign 1st argument
+ mov $b,r3 @ $b=b1
+ ldr r3,[sp,#32] @ load b0
+ mov $mask,#7<<2
+ sub sp,sp,#32 @ allocate tab[8]
+
+ bl mul_1x1_ialu @ a1·b1
+ str $lo,[$ret,#8]
+ str $hi,[$ret,#12]
+
+ eor $b,$b,r3 @ flip b0 and b1
+ eor $a,$a,r2 @ flip a0 and a1
+ eor r3,r3,$b
+ eor r2,r2,$a
+ eor $b,$b,r3
+ eor $a,$a,r2
+ bl mul_1x1_ialu @ a0·b0
+ str $lo,[$ret]
+ str $hi,[$ret,#4]
+ eor $a,$a,r2
+ eor $b,$b,r3
+ bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
+___
+ at r=map("r$_",(6..9));
+$code.=<<___;
+ ldmia $ret,{@r[0]- at r[3]}
+ eor $lo,$lo,$hi
+ eor $hi,$hi, at r[1]
+ eor $lo,$lo, at r[0]
+ eor $hi,$hi, at r[2]
+ eor $lo,$lo, at r[3]
+ eor $hi,$hi, at r[3]
+ str $hi,[$ret,#8]
+ eor $lo,$lo,$hi
+ add sp,sp,#32 @ destroy tab[8]
+ str $lo,[$ret,#4]
+
+#if __ARM_ARCH__>=5
+ ldmia sp!,{r4-r10,pc}
+#else
+ ldmia sp!,{r4-r10,lr}
+ tst lr,#1
+ moveq pc,lr @ be binary compatible with V4, yet
+ bx lr @ interoperable with Thumb ISA:-)
+#endif
+___
+}
+{
+my ($r,$t0,$t1,$t2,$t3)=map("q$_",(0..3,8..12));
+my ($a,$b,$k48,$k32,$k16)=map("d$_",(26..31));
+
+$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
+.fpu neon
+
+.align 5
+.LNEON:
ldr r12, [sp] @ 5th argument
vmov.32 $a, r2, r1
vmov.32 $b, r12, r3
@@ -203,62 +259,12 @@ bn_GF2m_mul_2x2:
vst1.32 {$r}, [r0]
ret @ bx lr
-.align 4
-.Lialu:
#endif
___
}
-$ret="r10"; # reassigned 1st argument
$code.=<<___;
- stmdb sp!,{r4-r10,lr}
- mov $ret,r0 @ reassign 1st argument
- mov $b,r3 @ $b=b1
- ldr r3,[sp,#32] @ load b0
- mov $mask,#7<<2
- sub sp,sp,#32 @ allocate tab[8]
-
- bl mul_1x1_ialu @ a1·b1
- str $lo,[$ret,#8]
- str $hi,[$ret,#12]
-
- eor $b,$b,r3 @ flip b0 and b1
- eor $a,$a,r2 @ flip a0 and a1
- eor r3,r3,$b
- eor r2,r2,$a
- eor $b,$b,r3
- eor $a,$a,r2
- bl mul_1x1_ialu @ a0·b0
- str $lo,[$ret]
- str $hi,[$ret,#4]
-
- eor $a,$a,r2
- eor $b,$b,r3
- bl mul_1x1_ialu @ (a1+a0)·(b1+b0)
-___
- at r=map("r$_",(6..9));
-$code.=<<___;
- ldmia $ret,{@r[0]- at r[3]}
- eor $lo,$lo,$hi
- eor $hi,$hi, at r[1]
- eor $lo,$lo, at r[0]
- eor $hi,$hi, at r[2]
- eor $lo,$lo, at r[3]
- eor $hi,$hi, at r[3]
- str $hi,[$ret,#8]
- eor $lo,$lo,$hi
- add sp,sp,#32 @ destroy tab[8]
- str $lo,[$ret,#4]
-
-#if __ARM_ARCH__>=5
- ldmia sp!,{r4-r10,pc}
-#else
- ldmia sp!,{r4-r10,lr}
- tst lr,#1
- moveq pc,lr @ be binary compatible with V4, yet
- bx lr @ interoperable with Thumb ISA:-)
-#endif
.size bn_GF2m_mul_2x2,.-bn_GF2m_mul_2x2
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-(.Lpic+8)
@@ -266,7 +272,9 @@ $code.=<<___;
.asciz "GF(2^m) Multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
+#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
+#endif
___
foreach (split("\n",$code)) {
diff --git a/crypto/bn/asm/armv4-mont.pl b/crypto/bn/asm/armv4-mont.pl
index 72bad8e..1d330e9 100644
--- a/crypto/bn/asm/armv4-mont.pl
+++ b/crypto/bn/asm/armv4-mont.pl
@@ -72,7 +72,7 @@ $code=<<___;
.text
.code 32
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.align 5
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-bn_mul_mont
@@ -85,7 +85,7 @@ $code=<<___;
bn_mul_mont:
ldr ip,[sp,#4] @ load num
stmdb sp!,{r0,r2} @ sp points at argument block
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
tst ip,#7
bne .Lialu
adr r0,bn_mul_mont
@@ -256,7 +256,8 @@ my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("r$_",(0..5));
my ($tinptr,$toutptr,$inner,$outer)=map("r$_",(6..9));
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.type bn_mul8x_mont_neon,%function
@@ -663,7 +664,7 @@ ___
$code.=<<___;
.asciz "Montgomery multiplication for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
#endif
___
diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c
index ba3d43b..ddd856e 100644
--- a/crypto/evp/e_aes.c
+++ b/crypto/evp/e_aes.c
@@ -1036,7 +1036,7 @@ const EVP_CIPHER *EVP_aes_##keylen##_##mode(void) \
#if defined(OPENSSL_CPUID_OBJ) && (defined(__arm__) || defined(__arm) || defined(__aarch64__))
#include "arm_arch.h"
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
# if defined(BSAES_ASM)
# define BSAES_CAPABLE (OPENSSL_armcap_P & ARMV7_NEON)
# endif
diff --git a/crypto/modes/asm/ghash-armv4.pl b/crypto/modes/asm/ghash-armv4.pl
index 0023bf9..77fbf34 100644
--- a/crypto/modes/asm/ghash-armv4.pl
+++ b/crypto/modes/asm/ghash-armv4.pl
@@ -365,7 +365,8 @@ ___
}
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.global gcm_init_neon
diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c
index 261dc59..4038d9c 100644
--- a/crypto/modes/gcm128.c
+++ b/crypto/modes/gcm128.c
@@ -675,7 +675,7 @@ void gcm_ghash_4bit_x86(u64 Xi[2],const u128 Htable[16],const u8 *inp,size_t len
# endif
# elif defined(__arm__) || defined(__arm) || defined(__aarch64__)
# include "arm_arch.h"
-# if __ARM_ARCH__>=7
+# if __ARM_MAX_ARCH__>=7
# define GHASH_ASM_ARM
# define GCM_FUNCREF_4BIT
# define PMULL_CAPABLE (OPENSSL_armcap_P & ARMV8_PMULL)
diff --git a/crypto/sha/asm/sha1-armv4-large.pl b/crypto/sha/asm/sha1-armv4-large.pl
index 50bd07b..b2c3032 100644
--- a/crypto/sha/asm/sha1-armv4-large.pl
+++ b/crypto/sha/asm/sha1-armv4-large.pl
@@ -174,7 +174,7 @@ $code=<<___;
.align 5
sha1_block_data_order:
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
sub r3,pc,#8 @ sha1_block_data_order
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
@@ -264,8 +264,10 @@ $code.=<<___;
.LK_20_39: .word 0x6ed9eba1
.LK_40_59: .word 0x8f1bbcdc
.LK_60_79: .word 0xca62c1d6
+#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha1_block_data_order
+#endif
.asciz "SHA1 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 5
___
@@ -476,7 +478,8 @@ sub Xloop()
}
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.type sha1_block_data_order_neon,%function
@@ -563,7 +566,7 @@ my @Kxx=map("q$_",(8..11));
my ($W0,$W1,$ABCD_SAVE)=map("q$_",(12..14));
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.type sha1_block_data_order_armv8,%function
.align 5
sha1_block_data_order_armv8:
@@ -637,7 +640,9 @@ $code.=<<___;
___
}}}
$code.=<<___;
+#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
+#endif
___
{ my %opcode = (
diff --git a/crypto/sha/asm/sha256-armv4.pl b/crypto/sha/asm/sha256-armv4.pl
index 505ca8f..b0ae936 100644
--- a/crypto/sha/asm/sha256-armv4.pl
+++ b/crypto/sha/asm/sha256-armv4.pl
@@ -177,8 +177,10 @@ K256:
.word 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
.size K256,.-K256
.word 0 @ terminator
+#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha256_block_data_order
+#endif
.align 5
.global sha256_block_data_order
@@ -186,7 +188,7 @@ K256:
sha256_block_data_order:
sub r3,pc,#8 @ sha256_block_data_order
add $len,$inp,$len,lsl#6 @ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#ARMV8_SHA256
@@ -423,7 +425,8 @@ sub body_00_15 () {
}
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.type sha256_block_data_order_neon,%function
@@ -545,7 +548,7 @@ my ($W0,$W1,$ABCD_SAVE,$EFGH_SAVE)=map("q$_",(12..15));
my $Ktbl="r3";
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
.type sha256_block_data_order_armv8,%function
.align 5
sha256_block_data_order_armv8:
@@ -616,7 +619,9 @@ ___
$code.=<<___;
.asciz "SHA256 block transform for ARMv4/NEON/ARMv8, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+#if __ARM_MARCH_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
+#endif
___
{ my %opcode = (
diff --git a/crypto/sha/asm/sha512-armv4.pl b/crypto/sha/asm/sha512-armv4.pl
index 1d5275b..fb7dc50 100644
--- a/crypto/sha/asm/sha512-armv4.pl
+++ b/crypto/sha/asm/sha512-armv4.pl
@@ -237,16 +237,20 @@ WORD64(0x3c9ebe0a,0x15c9bebc, 0x431d67c4,0x9c100d4c)
WORD64(0x4cc5d4be,0xcb3e42b6, 0x597f299c,0xfc657e2a)
WORD64(0x5fcb6fab,0x3ad6faec, 0x6c44198c,0x4a475817)
.size K512,.-K512
+#if __ARM_MAX_ARCH__>=7
.LOPENSSL_armcap:
.word OPENSSL_armcap_P-sha512_block_data_order
.skip 32-4
+#else
+.skip 32
+#endif
.global sha512_block_data_order
.type sha512_block_data_order,%function
sha512_block_data_order:
sub r3,pc,#8 @ sha512_block_data_order
add $len,$inp,$len,lsl#7 @ len to point at the end of inp
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
ldr r12,.LOPENSSL_armcap
ldr r12,[r3,r12] @ OPENSSL_armcap_P
tst r12,#1
@@ -551,7 +555,8 @@ ___
}
$code.=<<___;
-#if __ARM_ARCH__>=7
+#if __ARM_MAX_ARCH__>=7
+.arch armv7-a
.fpu neon
.align 4
@@ -592,7 +597,9 @@ $code.=<<___;
.size sha512_block_data_order,.-sha512_block_data_order
.asciz "SHA512 block transform for ARMv4/NEON, CRYPTOGAMS by <appro\@openssl.org>"
.align 2
+#if __ARM_MAX_ARCH__>=7
.comm OPENSSL_armcap_P,4,4
+#endif
___
$code =~ s/\`([^\`]*)\`/eval $1/gem;
hooks/post-receive
--
OpenSSL source code
More information about the openssl-commits
mailing list