[openssl] master update
Dr. Paul Dale
pauli at openssl.org
Sun May 9 13:16:21 UTC 2021
The branch master has been updated
via 10646160125ac1328d892f1dd27f2847892d33c5 (commit)
from f0f4a46c4f5c82d4d9d0fb8a51d546c3135668a2 (commit)
- Log -----------------------------------------------------------------
commit 10646160125ac1328d892f1dd27f2847892d33c5
Author: fangming.fang <fangming.fang at arm.com>
Date: Fri Mar 19 06:45:57 2021 +0000
Optimize RSA on armv8
Add Neon path for RSA on armv8, this optimisation targets to A72
and N1 that are ones of important cores of infrastructure. Other
platforms are not impacted.
A72
old new improved
rsa 512 sign 9828.6 9738.7 -1%
rsa 512 verify 121497.2 122367.7 1%
rsa 1024 sign 1818 1816.9 0%
rsa 1024 verify 37175.6 37161.3 0%
rsa 2048 sign 267.3 267.4 0%
rsa 2048 verify 10127.6 10119.6 0%
rsa 3072 sign 86.8 87 0%
rsa 3072 verify 4604.2 4956.2 8%
rsa 4096 sign 38.3 38.5 1%
rsa 4096 verify 2619.8 2972.1 13%
rsa 7680 sign 5 7 40%
rsa 7680 verify 756 929.4 23%
rsa 15360 sign 0.8 1 25%
rsa 15360 verify 190.4 246 29%
N1
old new improved
rsa 512 sign 12599.2 12596.7 0%
rsa 512 verify 148636.1 148656.2 0%
rsa 1024 sign 2150.6 2148.9 0%
rsa 1024 verify 42353.5 42265.2 0%
rsa 2048 sign 305.5 305.3 0%
rsa 2048 verify 11209.7 11205.2 0%
rsa 3072 sign 97.8 98.2 0%
rsa 3072 verify 5061.3 5990.7 18%
rsa 4096 sign 42.8 43 0%
rsa 4096 verify 2867.6 3509.8 22%
rsa 7680 sign 5.5 8.4 53%
rsa 7680 verify 823.5 1058.3 29%
rsa 15360 sign 0.9 1.1 22%
rsa 15360 verify 207 273.9 32%
CustomizedGitHooks: yes
Change-Id: I01c732cc429d793c4eb5ffd27ccd30ff9cebf8af
Jira: SECLIB-540
Reviewed-by: Tomas Mraz <tomas at openssl.org>
Reviewed-by: Paul Dale <pauli at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/14761)
-----------------------------------------------------------------------
Summary of changes:
crypto/armcap.c | 7 +
crypto/bn/asm/armv8-mont.pl | 381 ++++++++++++++++++++++++++++++++++++++++++++
crypto/bn/build.info | 1 +
3 files changed, 389 insertions(+)
diff --git a/crypto/armcap.c b/crypto/armcap.c
index 0e7c0842ad..dc2326f8f6 100644
--- a/crypto/armcap.c
+++ b/crypto/armcap.c
@@ -19,6 +19,7 @@
unsigned int OPENSSL_armcap_P = 0;
unsigned int OPENSSL_arm_midr = 0;
+unsigned int OPENSSL_armv8_rsa_neonized = 0;
#if __ARM_MAX_ARCH__<7
void OPENSSL_cpuid_setup(void)
@@ -237,6 +238,12 @@ void OPENSSL_cpuid_setup(void)
# ifdef __aarch64__
if (OPENSSL_armcap_P & ARMV8_CPUID)
OPENSSL_arm_midr = _armv8_cpuid_probe();
+
+ if ((MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_CORTEX_A72) ||
+ MIDR_IS_CPU_MODEL(OPENSSL_arm_midr, ARM_CPU_IMP_ARM, ARM_CPU_PART_N1)) &&
+ (OPENSSL_armcap_P & ARMV7_NEON)) {
+ OPENSSL_armv8_rsa_neonized = 1;
+ }
# endif
}
#endif
diff --git a/crypto/bn/asm/armv8-mont.pl b/crypto/bn/asm/armv8-mont.pl
index e8bdfa3bb8..0867ccabee 100755
--- a/crypto/bn/asm/armv8-mont.pl
+++ b/crypto/bn/asm/armv8-mont.pl
@@ -67,16 +67,34 @@ $n0="x4"; # const BN_ULONG *n0,
$num="x5"; # int num);
$code.=<<___;
+#ifndef __KERNEL__
+# include "arm_arch.h"
+.extern OPENSSL_armv8_rsa_neonized
+.hidden OPENSSL_armv8_rsa_neonized
+#endif
.text
.globl bn_mul_mont
.type bn_mul_mont,%function
.align 5
bn_mul_mont:
+.Lbn_mul_mont:
+ tst $num,#3
+ b.ne .Lmul_mont
+ cmp $num,#32
+ b.le .Lscalar_impl
+#ifndef __KERNEL__
+ adrp x17,OPENSSL_armv8_rsa_neonized
+ ldr w17,[x17,#:lo12:OPENSSL_armv8_rsa_neonized]
+ cbnz w17, bn_mul8x_mont_neon
+#endif
+
+.Lscalar_impl:
tst $num,#7
b.eq __bn_sqr8x_mont
tst $num,#3
b.eq __bn_mul4x_mont
+
.Lmul_mont:
stp x29,x30,[sp,#-64]!
add x29,sp,#0
@@ -274,6 +292,369 @@ bn_mul_mont:
.size bn_mul_mont,.-bn_mul_mont
___
{
+my ($A0,$A1,$N0,$N1)=map("v$_",(0..3));
+my ($Z,$Temp)=("v4.16b","v5");
+my @ACC=map("v$_",(6..13));
+my ($Bi,$Ni,$M0)=map("v$_",(28..30));
+my $sBi="s28";
+my $sM0="s30";
+my $zero="v14";
+my $temp="v15";
+my $ACCTemp="v16";
+
+my ($rptr,$aptr,$bptr,$nptr,$n0,$num)=map("x$_",(0..5));
+my ($tinptr,$toutptr,$inner,$outer,$bnptr)=map("x$_",(6..11));
+
+$code.=<<___;
+.type bn_mul8x_mont_neon,%function
+.align 5
+bn_mul8x_mont_neon:
+ stp x29,x30,[sp,#-80]!
+ mov x16,sp
+ stp d8,d9,[sp,#16]
+ stp d10,d11,[sp,#32]
+ stp d12,d13,[sp,#48]
+ stp d14,d15,[sp,#64]
+ lsl $num,$num,#1
+ eor $zero.16b,$zero.16b,$zero.16b
+
+.align 4
+.LNEON_8n:
+ eor @ACC[0].16b, at ACC[0].16b, at ACC[0].16b
+ sub $toutptr,sp,#128
+ eor @ACC[1].16b, at ACC[1].16b, at ACC[1].16b
+ sub $toutptr,$toutptr,$num,lsl#4
+ eor @ACC[2].16b, at ACC[2].16b, at ACC[2].16b
+ and $toutptr,$toutptr,#-64
+ eor @ACC[3].16b, at ACC[3].16b, at ACC[3].16b
+ mov sp,$toutptr // alloca
+ eor @ACC[4].16b, at ACC[4].16b, at ACC[4].16b
+ add $toutptr,$toutptr,#256
+ eor @ACC[5].16b, at ACC[5].16b, at ACC[5].16b
+ sub $inner,$num,#8
+ eor @ACC[6].16b, at ACC[6].16b, at ACC[6].16b
+ eor @ACC[7].16b, at ACC[7].16b, at ACC[7].16b
+
+.LNEON_8n_init:
+ st1 {@ACC[0].2d, at ACC[1].2d},[$toutptr],#32
+ subs $inner,$inner,#8
+ st1 {@ACC[2].2d, at ACC[3].2d},[$toutptr],#32
+ st1 {@ACC[4].2d, at ACC[5].2d},[$toutptr],#32
+ st1 {@ACC[6].2d, at ACC[7].2d},[$toutptr],#32
+ bne .LNEON_8n_init
+
+ add $tinptr,sp,#256
+ ld1 {$A0.4s,$A1.4s},[$aptr],#32
+ add $bnptr,sp,#8
+ ldr $sM0,[$n0],#4
+ mov $outer,$num
+ b .LNEON_8n_outer
+
+.align 4
+.LNEON_8n_outer:
+ ldr $sBi,[$bptr],#4 // *b++
+ uxtl $Bi.4s,$Bi.4h
+ add $toutptr,sp,#128
+ ld1 {$N0.4s,$N1.4s},[$nptr],#32
+
+ umlal @ACC[0].2d,$Bi.2s,$A0.s[0]
+ umlal @ACC[1].2d,$Bi.2s,$A0.s[1]
+ umlal @ACC[2].2d,$Bi.2s,$A0.s[2]
+ shl $Ni.2d, at ACC[0].2d,#16
+ ext $Ni.16b,$Ni.16b,$Ni.16b,#8
+ umlal @ACC[3].2d,$Bi.2s,$A0.s[3]
+ add $Ni.2d,$Ni.2d, at ACC[0].2d
+ umlal @ACC[4].2d,$Bi.2s,$A1.s[0]
+ mul $Ni.2s,$Ni.2s,$M0.2s
+ umlal @ACC[5].2d,$Bi.2s,$A1.s[1]
+ st1 {$Bi.2s},[sp] // put aside smashed b[8*i+0]
+ umlal @ACC[6].2d,$Bi.2s,$A1.s[2]
+ uxtl $Ni.4s,$Ni.4h
+ umlal @ACC[7].2d,$Bi.2s,$A1.s[3]
+___
+for ($i=0; $i<7;) {
+$code.=<<___;
+ ldr $sBi,[$bptr],#4 // *b++
+ umlal @ACC[0].2d,$Ni.2s,$N0.s[0]
+ umlal @ACC[1].2d,$Ni.2s,$N0.s[1]
+ uxtl $Bi.4s,$Bi.4h
+ umlal @ACC[2].2d,$Ni.2s,$N0.s[2]
+ ushr $temp.2d, at ACC[0].2d,#16
+ umlal @ACC[3].2d,$Ni.2s,$N0.s[3]
+ umlal @ACC[4].2d,$Ni.2s,$N1.s[0]
+ ext @ACC[0].16b, at ACC[0].16b, at ACC[0].16b,#8
+ add @ACC[0].2d, at ACC[0].2d,$temp.2d
+ umlal @ACC[5].2d,$Ni.2s,$N1.s[1]
+ ushr @ACC[0].2d, at ACC[0].2d,#16
+ umlal @ACC[6].2d,$Ni.2s,$N1.s[2]
+ umlal @ACC[7].2d,$Ni.2s,$N1.s[3]
+ add $ACCTemp.2d, at ACC[1].2d, at ACC[0].2d
+ ins @ACC[1].d[0],$ACCTemp.d[0]
+ st1 {$Ni.2s},[$bnptr],#8 // put aside smashed m[8*i+$i]
+___
+ push(@ACC,shift(@ACC)); $i++;
+$code.=<<___;
+ umlal @ACC[0].2d,$Bi.2s,$A0.s[0]
+ ld1 {@ACC[7].2d},[$tinptr],#16
+ umlal @ACC[1].2d,$Bi.2s,$A0.s[1]
+ umlal @ACC[2].2d,$Bi.2s,$A0.s[2]
+ shl $Ni.2d, at ACC[0].2d,#16
+ ext $Ni.16b,$Ni.16b,$Ni.16b,#8
+ umlal @ACC[3].2d,$Bi.2s,$A0.s[3]
+ add $Ni.2d,$Ni.2d, at ACC[0].2d
+ umlal @ACC[4].2d,$Bi.2s,$A1.s[0]
+ mul $Ni.2s,$Ni.2s,$M0.2s
+ umlal @ACC[5].2d,$Bi.2s,$A1.s[1]
+ st1 {$Bi.2s},[$bnptr],#8 // put aside smashed b[8*i+$i]
+ umlal @ACC[6].2d,$Bi.2s,$A1.s[2]
+ uxtl $Ni.4s,$Ni.4h
+ umlal @ACC[7].2d,$Bi.2s,$A1.s[3]
+___
+}
+$code.=<<___;
+ ld1 {$Bi.2s},[sp] // pull smashed b[8*i+0]
+ umlal @ACC[0].2d,$Ni.2s,$N0.s[0]
+ ld1 {$A0.4s,$A1.4s},[$aptr],#32
+ umlal @ACC[1].2d,$Ni.2s,$N0.s[1]
+ umlal @ACC[2].2d,$Ni.2s,$N0.s[2]
+ mov $Temp.16b, at ACC[0].16b
+ ushr $Temp.2d,$Temp.2d,#16
+ ext @ACC[0].16b, at ACC[0].16b, at ACC[0].16b,#8
+ umlal @ACC[3].2d,$Ni.2s,$N0.s[3]
+ umlal @ACC[4].2d,$Ni.2s,$N1.s[0]
+ add @ACC[0].2d, at ACC[0].2d,$Temp.2d
+ umlal @ACC[5].2d,$Ni.2s,$N1.s[1]
+ ushr @ACC[0].2d, at ACC[0].2d,#16
+ eor $temp.16b,$temp.16b,$temp.16b
+ ins @ACC[0].d[1],$temp.d[0]
+ umlal @ACC[6].2d,$Ni.2s,$N1.s[2]
+ umlal @ACC[7].2d,$Ni.2s,$N1.s[3]
+ add @ACC[1].2d, at ACC[1].2d, at ACC[0].2d
+ st1 {$Ni.2s},[$bnptr],#8 // put aside smashed m[8*i+$i]
+ add $bnptr,sp,#8 // rewind
+___
+ push(@ACC,shift(@ACC));
+$code.=<<___;
+ sub $inner,$num,#8
+ b .LNEON_8n_inner
+
+.align 4
+.LNEON_8n_inner:
+ subs $inner,$inner,#8
+ umlal @ACC[0].2d,$Bi.2s,$A0.s[0]
+ ld1 {@ACC[7].2d},[$tinptr]
+ umlal @ACC[1].2d,$Bi.2s,$A0.s[1]
+ ld1 {$Ni.2s},[$bnptr],#8 // pull smashed m[8*i+0]
+ umlal @ACC[2].2d,$Bi.2s,$A0.s[2]
+ ld1 {$N0.4s,$N1.4s},[$nptr],#32
+ umlal @ACC[3].2d,$Bi.2s,$A0.s[3]
+ b.eq .LInner_jump
+ add $tinptr,$tinptr,#16 // don't advance in last iteration
+.LInner_jump:
+ umlal @ACC[4].2d,$Bi.2s,$A1.s[0]
+ umlal @ACC[5].2d,$Bi.2s,$A1.s[1]
+ umlal @ACC[6].2d,$Bi.2s,$A1.s[2]
+ umlal @ACC[7].2d,$Bi.2s,$A1.s[3]
+___
+for ($i=1; $i<8; $i++) {
+$code.=<<___;
+ ld1 {$Bi.2s},[$bnptr],#8 // pull smashed b[8*i+$i]
+ umlal @ACC[0].2d,$Ni.2s,$N0.s[0]
+ umlal @ACC[1].2d,$Ni.2s,$N0.s[1]
+ umlal @ACC[2].2d,$Ni.2s,$N0.s[2]
+ umlal @ACC[3].2d,$Ni.2s,$N0.s[3]
+ umlal @ACC[4].2d,$Ni.2s,$N1.s[0]
+ umlal @ACC[5].2d,$Ni.2s,$N1.s[1]
+ umlal @ACC[6].2d,$Ni.2s,$N1.s[2]
+ umlal @ACC[7].2d,$Ni.2s,$N1.s[3]
+ st1 {@ACC[0].2d},[$toutptr],#16
+___
+ push(@ACC,shift(@ACC));
+$code.=<<___;
+ umlal @ACC[0].2d,$Bi.2s,$A0.s[0]
+ ld1 {@ACC[7].2d},[$tinptr]
+ umlal @ACC[1].2d,$Bi.2s,$A0.s[1]
+ ld1 {$Ni.2s},[$bnptr],#8 // pull smashed m[8*i+$i]
+ umlal @ACC[2].2d,$Bi.2s,$A0.s[2]
+ b.eq .LInner_jump$i
+ add $tinptr,$tinptr,#16 // don't advance in last iteration
+.LInner_jump$i:
+ umlal @ACC[3].2d,$Bi.2s,$A0.s[3]
+ umlal @ACC[4].2d,$Bi.2s,$A1.s[0]
+ umlal @ACC[5].2d,$Bi.2s,$A1.s[1]
+ umlal @ACC[6].2d,$Bi.2s,$A1.s[2]
+ umlal @ACC[7].2d,$Bi.2s,$A1.s[3]
+___
+}
+$code.=<<___;
+ b.ne .LInner_after_rewind$i
+ sub $aptr,$aptr,$num,lsl#2 // rewind
+.LInner_after_rewind$i:
+ umlal @ACC[0].2d,$Ni.2s,$N0.s[0]
+ ld1 {$Bi.2s},[sp] // pull smashed b[8*i+0]
+ umlal @ACC[1].2d,$Ni.2s,$N0.s[1]
+ ld1 {$A0.4s,$A1.4s},[$aptr],#32
+ umlal @ACC[2].2d,$Ni.2s,$N0.s[2]
+ add $bnptr,sp,#8 // rewind
+ umlal @ACC[3].2d,$Ni.2s,$N0.s[3]
+ umlal @ACC[4].2d,$Ni.2s,$N1.s[0]
+ umlal @ACC[5].2d,$Ni.2s,$N1.s[1]
+ umlal @ACC[6].2d,$Ni.2s,$N1.s[2]
+ st1 {@ACC[0].2d},[$toutptr],#16
+ umlal @ACC[7].2d,$Ni.2s,$N1.s[3]
+
+ bne .LNEON_8n_inner
+___
+ push(@ACC,shift(@ACC));
+$code.=<<___;
+ add $tinptr,sp,#128
+ st1 {@ACC[0].2d, at ACC[1].2d},[$toutptr],#32
+ eor $N0.16b,$N0.16b,$N0.16b // $N0
+ st1 {@ACC[2].2d, at ACC[3].2d},[$toutptr],#32
+ eor $N1.16b,$N1.16b,$N1.16b // $N1
+ st1 {@ACC[4].2d, at ACC[5].2d},[$toutptr],#32
+ st1 {@ACC[6].2d},[$toutptr]
+
+ subs $outer,$outer,#8
+ ld1 {@ACC[0].2d, at ACC[1].2d},[$tinptr],#32
+ ld1 {@ACC[2].2d, at ACC[3].2d},[$tinptr],#32
+ ld1 {@ACC[4].2d, at ACC[5].2d},[$tinptr],#32
+ ld1 {@ACC[6].2d, at ACC[7].2d},[$tinptr],#32
+
+ b.eq .LInner_8n_jump_2steps
+ sub $nptr,$nptr,$num,lsl#2 // rewind
+ b .LNEON_8n_outer
+
+.LInner_8n_jump_2steps:
+ add $toutptr,sp,#128
+ st1 {$N0.2d,$N1.2d}, [sp],#32 // start wiping stack frame
+ mov $Temp.16b, at ACC[0].16b
+ ushr $temp.2d, at ACC[0].2d,#16
+ ext @ACC[0].16b, at ACC[0].16b, at ACC[0].16b,#8
+ st1 {$N0.2d,$N1.2d}, [sp],#32
+ add @ACC[0].2d, at ACC[0].2d,$temp.2d
+ st1 {$N0.2d,$N1.2d}, [sp],#32
+ ushr $temp.2d, at ACC[0].2d,#16
+ st1 {$N0.2d,$N1.2d}, [sp],#32
+ zip1 @ACC[0].4h,$Temp.4h, at ACC[0].4h
+ ins $temp.d[1],$zero.d[0]
+
+ mov $inner,$num
+ b .LNEON_tail_entry
+
+.align 4
+.LNEON_tail:
+ add @ACC[0].2d, at ACC[0].2d,$temp.2d
+ mov $Temp.16b, at ACC[0].16b
+ ushr $temp.2d, at ACC[0].2d,#16
+ ext @ACC[0].16b, at ACC[0].16b, at ACC[0].16b,#8
+ ld1 {@ACC[2].2d, at ACC[3].2d}, [$tinptr],#32
+ add @ACC[0].2d, at ACC[0].2d,$temp.2d
+ ld1 {@ACC[4].2d, at ACC[5].2d}, [$tinptr],#32
+ ushr $temp.2d, at ACC[0].2d,#16
+ ld1 {@ACC[6].2d, at ACC[7].2d}, [$tinptr],#32
+ zip1 @ACC[0].4h,$Temp.4h, at ACC[0].4h
+ ins $temp.d[1],$zero.d[0]
+
+.LNEON_tail_entry:
+___
+for ($i=1; $i<8; $i++) {
+$code.=<<___;
+ add @ACC[1].2d, at ACC[1].2d,$temp.2d
+ st1 {@ACC[0].s}[0], [$toutptr],#4
+ ushr $temp.2d, at ACC[1].2d,#16
+ mov $Temp.16b, at ACC[1].16b
+ ext @ACC[1].16b, at ACC[1].16b, at ACC[1].16b,#8
+ add @ACC[1].2d, at ACC[1].2d,$temp.2d
+ ushr $temp.2d, at ACC[1].2d,#16
+ zip1 @ACC[1].4h,$Temp.4h, at ACC[1].4h
+ ins $temp.d[1],$zero.d[0]
+___
+ push(@ACC,shift(@ACC));
+}
+ push(@ACC,shift(@ACC));
+$code.=<<___;
+ ld1 {@ACC[0].2d, at ACC[1].2d}, [$tinptr],#32
+ subs $inner,$inner,#8
+ st1 {@ACC[7].s}[0], [$toutptr],#4
+ bne .LNEON_tail
+
+ st1 {$temp.s}[0], [$toutptr],#4 // top-most bit
+ sub $nptr,$nptr,$num,lsl#2 // rewind $nptr
+ subs $aptr,sp,#0 // clear carry flag
+ add $bptr,sp,$num,lsl#2
+
+.LNEON_sub:
+ ldp w4,w5,[$aptr],#8
+ ldp w6,w7,[$aptr],#8
+ ldp w8,w9,[$nptr],#8
+ ldp w10,w11,[$nptr],#8
+ sbcs w8,w4,w8
+ sbcs w9,w5,w9
+ sbcs w10,w6,w10
+ sbcs w11,w7,w11
+ sub x17,$bptr,$aptr
+ stp w8,w9,[$rptr],#8
+ stp w10,w11,[$rptr],#8
+ cbnz x17,.LNEON_sub
+
+ ldr w10, [$aptr] // load top-most bit
+ mov x11,sp
+ eor v0.16b,v0.16b,v0.16b
+ sub x11,$bptr,x11 // this is num*4
+ eor v1.16b,v1.16b,v1.16b
+ mov $aptr,sp
+ sub $rptr,$rptr,x11 // rewind $rptr
+ mov $nptr,$bptr // second 3/4th of frame
+ sbcs w10,w10,wzr // result is carry flag
+
+.LNEON_copy_n_zap:
+ ldp w4,w5,[$aptr],#8
+ ldp w6,w7,[$aptr],#8
+ ldp w8,w9,[$rptr],#8
+ ldp w10,w11,[$rptr]
+ sub $rptr,$rptr,#8
+ b.cs .LCopy_1
+ mov w8,w4
+ mov w9,w5
+ mov w10,w6
+ mov w11,w7
+.LCopy_1:
+ st1 {v0.2d,v1.2d}, [$nptr],#32 // wipe
+ st1 {v0.2d,v1.2d}, [$nptr],#32 // wipe
+ ldp w4,w5,[$aptr],#8
+ ldp w6,w7,[$aptr],#8
+ stp w8,w9,[$rptr],#8
+ stp w10,w11,[$rptr],#8
+ sub $aptr,$aptr,#32
+ ldp w8,w9,[$rptr],#8
+ ldp w10,w11,[$rptr]
+ sub $rptr,$rptr,#8
+ b.cs .LCopy_2
+ mov w8, w4
+ mov w9, w5
+ mov w10, w6
+ mov w11, w7
+.LCopy_2:
+ st1 {v0.2d,v1.2d}, [$aptr],#32 // wipe
+ st1 {v0.2d,v1.2d}, [$nptr],#32 // wipe
+ sub x17,$bptr,$aptr // preserves carry
+ stp w8,w9,[$rptr],#8
+ stp w10,w11,[$rptr],#8
+ cbnz x17,.LNEON_copy_n_zap
+
+ mov sp,x16
+ ldp d14,d15,[sp,#64]
+ ldp d12,d13,[sp,#48]
+ ldp d10,d11,[sp,#32]
+ ldp d8,d9,[sp,#16]
+ ldr x29,[sp],#80
+ ret // bx lr
+
+.size bn_mul8x_mont_neon,.-bn_mul8x_mont_neon
+___
+}
+{
########################################################################
# Following is ARMv8 adaptation of sqrx8x_mont from x86_64-mont5 module.
diff --git a/crypto/bn/build.info b/crypto/bn/build.info
index 3c32e83067..d0c1034bde 100644
--- a/crypto/bn/build.info
+++ b/crypto/bn/build.info
@@ -177,3 +177,4 @@ INCLUDE[armv4-mont.o]=..
GENERATE[armv4-gf2m.S]=asm/armv4-gf2m.pl
INCLUDE[armv4-gf2m.o]=..
GENERATE[armv8-mont.S]=asm/armv8-mont.pl
+INCLUDE[armv8-mont.o]=..
More information about the openssl-commits
mailing list