[openssl-commits] [openssl] master update
Matt Caswell
matt at openssl.org
Wed Aug 24 09:57:06 UTC 2016
The branch master has been updated
via c74aea8d6ccdf07ce826a9451887739b8aa64096 (commit)
via e3057a57caf4274ea1fb074518e4714059dfcabf (commit)
from dfde4219fdebbb5a8a17602fea036f7690e517ea (commit)
- Log -----------------------------------------------------------------
commit c74aea8d6ccdf07ce826a9451887739b8aa64096
Author: Andy Polyakov <appro at openssl.org>
Date: Fri Aug 19 23:18:35 2016 +0200
ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity.
RT#4625
Reviewed-by: Rich Salz <rsalz at openssl.org>
commit e3057a57caf4274ea1fb074518e4714059dfcabf
Author: Andy Polyakov <appro at openssl.org>
Date: Fri Aug 19 23:16:04 2016 +0200
ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity.
RT#4625
Reviewed-by: Rich Salz <rsalz at openssl.org>
-----------------------------------------------------------------------
Summary of changes:
crypto/ec/asm/ecp_nistz256-armv4.pl | 58 ++++---------
crypto/ec/asm/ecp_nistz256-armv8.pl | 76 ++++++-----------
crypto/ec/asm/ecp_nistz256-sparcv9.pl | 150 ++++++++++------------------------
crypto/ec/asm/ecp_nistz256-x86.pl | 30 +++----
crypto/ec/asm/ecp_nistz256-x86_64.pl | 24 +++---
crypto/ec/ecp_nistz256.c | 57 +++++++++----
6 files changed, 152 insertions(+), 243 deletions(-)
diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl b/crypto/ec/asm/ecp_nistz256-armv4.pl
index de3cd5c..2314b75 100755
--- a/crypto/ec/asm/ecp_nistz256-armv4.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv4.pl
@@ -1405,27 +1405,19 @@ ecp_nistz256_point_add:
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
sub sp,sp,#32*18+16
- ldmia $b_ptr!,{r4-r11} @ copy in2
+ ldmia $b_ptr!,{r4-r11} @ copy in2_x
add r3,sp,#$in2_x
- orr r12,r4,r5
- orr r12,r12,r6
- orr r12,r12,r7
- orr r12,r12,r8
- orr r12,r12,r9
- orr r12,r12,r10
- orr r12,r12,r11
stmia r3!,{r4-r11}
- ldmia $b_ptr!,{r4-r11}
- orr r12,r12,r4
- orr r12,r12,r5
+ ldmia $b_ptr!,{r4-r11} @ copy in2_y
+ stmia r3!,{r4-r11}
+ ldmia $b_ptr,{r4-r11} @ copy in2_z
+ orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r7
orr r12,r12,r8
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
- stmia r3!,{r4-r11}
- ldmia $b_ptr,{r4-r11}
cmp r12,#0
#ifdef __thumb2__
it ne
@@ -1434,27 +1426,19 @@ ecp_nistz256_point_add:
stmia r3,{r4-r11}
str r12,[sp,#32*18+8] @ !in2infty
- ldmia $a_ptr!,{r4-r11} @ copy in1
+ ldmia $a_ptr!,{r4-r11} @ copy in1_x
add r3,sp,#$in1_x
- orr r12,r4,r5
- orr r12,r12,r6
- orr r12,r12,r7
- orr r12,r12,r8
- orr r12,r12,r9
- orr r12,r12,r10
- orr r12,r12,r11
stmia r3!,{r4-r11}
- ldmia $a_ptr!,{r4-r11}
- orr r12,r12,r4
- orr r12,r12,r5
+ ldmia $a_ptr!,{r4-r11} @ copy in1_y
+ stmia r3!,{r4-r11}
+ ldmia $a_ptr,{r4-r11} @ copy in1_z
+ orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r7
orr r12,r12,r8
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
- stmia r3!,{r4-r11}
- ldmia $a_ptr,{r4-r11}
cmp r12,#0
#ifdef __thumb2__
it ne
@@ -1684,27 +1668,19 @@ ecp_nistz256_point_add_affine:
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
sub sp,sp,#32*15
- ldmia $a_ptr!,{r4-r11} @ copy in1
+ ldmia $a_ptr!,{r4-r11} @ copy in1_x
add r3,sp,#$in1_x
- orr r12,r4,r5
- orr r12,r12,r6
- orr r12,r12,r7
- orr r12,r12,r8
- orr r12,r12,r9
- orr r12,r12,r10
- orr r12,r12,r11
stmia r3!,{r4-r11}
- ldmia $a_ptr!,{r4-r11}
- orr r12,r12,r4
- orr r12,r12,r5
+ ldmia $a_ptr!,{r4-r11} @ copy in1_y
+ stmia r3!,{r4-r11}
+ ldmia $a_ptr,{r4-r11} @ copy in1_z
+ orr r12,r4,r5
orr r12,r12,r6
orr r12,r12,r7
orr r12,r12,r8
orr r12,r12,r9
orr r12,r12,r10
orr r12,r12,r11
- stmia r3!,{r4-r11}
- ldmia $a_ptr,{r4-r11}
cmp r12,#0
#ifdef __thumb2__
it ne
@@ -1713,7 +1689,7 @@ ecp_nistz256_point_add_affine:
stmia r3,{r4-r11}
str r12,[sp,#32*15+4] @ !in1infty
- ldmia $b_ptr!,{r4-r11} @ copy in2
+ ldmia $b_ptr!,{r4-r11} @ copy in2_x
add r3,sp,#$in2_x
orr r12,r4,r5
orr r12,r12,r6
@@ -1723,7 +1699,7 @@ ecp_nistz256_point_add_affine:
orr r12,r12,r10
orr r12,r12,r11
stmia r3!,{r4-r11}
- ldmia $b_ptr!,{r4-r11}
+ ldmia $b_ptr!,{r4-r11} @ copy in2_y
orr r12,r12,r4
orr r12,r12,r5
orr r12,r12,r6
diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl
index 1362586..cdc9161 100644
--- a/crypto/ec/asm/ecp_nistz256-armv8.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv8.pl
@@ -862,46 +862,28 @@ ecp_nistz256_point_add:
stp x25,x26,[sp,#64]
sub sp,sp,#32*12
- ldp $a0,$a1,[$bp]
- ldp $a2,$a3,[$bp,#16]
- ldp $t0,$t1,[$bp,#32]
- ldp $t2,$t3,[$bp,#48]
+ ldp $a0,$a1,[$bp,#64] // in2_z
+ ldp $a2,$a3,[$bp,#64+16]
mov $rp_real,$rp
mov $ap_real,$ap
mov $bp_real,$bp
- orr $a0,$a0,$a1
- orr $a2,$a2,$a3
- ldp $acc0,$acc1,[$ap]
- orr $t0,$t0,$t1
- orr $t2,$t2,$t3
- ldp $acc2,$acc3,[$ap,#16]
- orr $a0,$a0,$a2
- orr $t2,$t0,$t2
- ldp $t0,$t1,[$ap,#32]
- orr $in2infty,$a0,$t2
- cmp $in2infty,#0
- ldp $t2,$t3,[$ap,#48]
- csetm $in2infty,ne // !in2infty
-
- ldp $a0,$a1,[$bp_real,#64] // forward load for p256_sqr_mont
- orr $acc0,$acc0,$acc1
- orr $acc2,$acc2,$acc3
- ldp $a2,$a3,[$bp_real,#64+16]
- orr $t0,$t0,$t1
- orr $t2,$t2,$t3
- orr $acc0,$acc0,$acc2
- orr $t0,$t0,$t2
- orr $in1infty,$acc0,$t0
- cmp $in1infty,#0
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
- csetm $in1infty,ne // !in1infty
-
+ orr $t0,$a0,$a1
+ orr $t2,$a2,$a3
+ orr $in2infty,$t0,$t2
+ cmp $in2infty,#0
+ csetm $in2infty,ne // !in2infty
add $rp,sp,#$Z2sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z2sqr, in2_z);
- ldp $a0,$a1,[$ap_real,#64]
+ ldp $a0,$a1,[$ap_real,#64] // in1_z
ldp $a2,$a3,[$ap_real,#64+16]
+ orr $t0,$a0,$a1
+ orr $t2,$a2,$a3
+ orr $in1infty,$t0,$t2
+ cmp $in1infty,#0
+ csetm $in1infty,ne // !in1infty
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
@@ -1150,36 +1132,28 @@ ecp_nistz256_point_add_affine:
ldr $poly1,.Lpoly+8
ldr $poly3,.Lpoly+24
- ldp $a0,$a1,[$ap]
- ldp $a2,$a3,[$ap,#16]
- ldp $t0,$t1,[$ap,#32]
- ldp $t2,$t3,[$ap,#48]
- orr $a0,$a0,$a1
- orr $a2,$a2,$a3
- orr $t0,$t0,$t1
- orr $t2,$t2,$t3
- orr $a0,$a0,$a2
- orr $t0,$t0,$t2
- orr $in1infty,$a0,$t0
+ ldp $a0,$a1,[$ap,#64] // in1_z
+ ldp $a2,$a3,[$ap,#64+16]
+ orr $t0,$a0,$a1
+ orr $t2,$a2,$a3
+ orr $in1infty,$t0,$t2
cmp $in1infty,#0
csetm $in1infty,ne // !in1infty
- ldp $a0,$a1,[$bp]
- ldp $a2,$a3,[$bp,#16]
- ldp $t0,$t1,[$bp,#32]
+ ldp $acc0,$acc1,[$bp] // in2_x
+ ldp $acc2,$acc3,[$bp,#16]
+ ldp $t0,$t1,[$bp,#32] // in2_y
ldp $t2,$t3,[$bp,#48]
- orr $a0,$a0,$a1
- orr $a2,$a2,$a3
+ orr $acc0,$acc0,$acc1
+ orr $acc2,$acc2,$acc3
orr $t0,$t0,$t1
orr $t2,$t2,$t3
- orr $a0,$a0,$a2
+ orr $acc0,$acc0,$acc2
orr $t0,$t0,$t2
- orr $in2infty,$a0,$t0
+ orr $in2infty,$acc0,$t0
cmp $in2infty,#0
csetm $in2infty,ne // !in2infty
- ldp $a0,$a1,[$ap_real,#64]
- ldp $a2,$a3,[$ap_real,#64+16]
add $rp,sp,#$Z1sqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Z1sqr, in1_z);
diff --git a/crypto/ec/asm/ecp_nistz256-sparcv9.pl b/crypto/ec/asm/ecp_nistz256-sparcv9.pl
index 3c7ff50..97201cb 100755
--- a/crypto/ec/asm/ecp_nistz256-sparcv9.pl
+++ b/crypto/ec/asm/ecp_nistz256-sparcv9.pl
@@ -899,71 +899,39 @@ ecp_nistz256_point_add:
mov $ap,$ap_real
mov $bp,$bp_real
- ld [$bp], at acc[0] ! in2_x
- ld [$bp+4], at acc[1]
- ld [$bp+8], at acc[2]
- ld [$bp+12], at acc[3]
- ld [$bp+16], at acc[4]
- ld [$bp+20], at acc[5]
- ld [$bp+24], at acc[6]
- ld [$bp+28], at acc[7]
- ld [$bp+32],$t0 ! in2_y
- ld [$bp+32+4],$t1
- ld [$bp+32+8],$t2
- ld [$bp+32+12],$t3
- ld [$bp+32+16],$t4
- ld [$bp+32+20],$t5
- ld [$bp+32+24],$t6
- ld [$bp+32+28],$t7
- or @acc[1], at acc[0], at acc[0]
- or @acc[3], at acc[2], at acc[2]
- or @acc[5], at acc[4], at acc[4]
- or @acc[7], at acc[6], at acc[6]
- or @acc[2], at acc[0], at acc[0]
- or @acc[6], at acc[4], at acc[4]
- or @acc[4], at acc[0], at acc[0]
+ ld [$bp+64],$t0 ! in2_z
+ ld [$bp+64+4],$t1
+ ld [$bp+64+8],$t2
+ ld [$bp+64+12],$t3
+ ld [$bp+64+16],$t4
+ ld [$bp+64+20],$t5
+ ld [$bp+64+24],$t6
+ ld [$bp+64+28],$t7
or $t1,$t0,$t0
or $t3,$t2,$t2
or $t5,$t4,$t4
or $t7,$t6,$t6
or $t2,$t0,$t0
or $t6,$t4,$t4
- or $t4,$t0,$t0
- or @acc[0],$t0,$t0 ! !in2infty
+ or $t4,$t0,$t0 ! !in2infty
movrnz $t0,-1,$t0
st $t0,[%fp+STACK_BIAS-12]
- ld [$ap], at acc[0] ! in1_x
- ld [$ap+4], at acc[1]
- ld [$ap+8], at acc[2]
- ld [$ap+12], at acc[3]
- ld [$ap+16], at acc[4]
- ld [$ap+20], at acc[5]
- ld [$ap+24], at acc[6]
- ld [$ap+28], at acc[7]
- ld [$ap+32],$t0 ! in1_y
- ld [$ap+32+4],$t1
- ld [$ap+32+8],$t2
- ld [$ap+32+12],$t3
- ld [$ap+32+16],$t4
- ld [$ap+32+20],$t5
- ld [$ap+32+24],$t6
- ld [$ap+32+28],$t7
- or @acc[1], at acc[0], at acc[0]
- or @acc[3], at acc[2], at acc[2]
- or @acc[5], at acc[4], at acc[4]
- or @acc[7], at acc[6], at acc[6]
- or @acc[2], at acc[0], at acc[0]
- or @acc[6], at acc[4], at acc[4]
- or @acc[4], at acc[0], at acc[0]
+ ld [$ap+64],$t0 ! in1_z
+ ld [$ap+64+4],$t1
+ ld [$ap+64+8],$t2
+ ld [$ap+64+12],$t3
+ ld [$ap+64+16],$t4
+ ld [$ap+64+20],$t5
+ ld [$ap+64+24],$t6
+ ld [$ap+64+28],$t7
or $t1,$t0,$t0
or $t3,$t2,$t2
or $t5,$t4,$t4
or $t7,$t6,$t6
or $t2,$t0,$t0
or $t6,$t4,$t4
- or $t4,$t0,$t0
- or @acc[0],$t0,$t0 ! !in1infty
+ or $t4,$t0,$t0 ! !in1infty
movrnz $t0,-1,$t0
st $t0,[%fp+STACK_BIAS-16]
@@ -1201,37 +1169,21 @@ ecp_nistz256_point_add_affine:
mov $ap,$ap_real
mov $bp,$bp_real
- ld [$ap], at acc[0] ! in1_x
- ld [$ap+4], at acc[1]
- ld [$ap+8], at acc[2]
- ld [$ap+12], at acc[3]
- ld [$ap+16], at acc[4]
- ld [$ap+20], at acc[5]
- ld [$ap+24], at acc[6]
- ld [$ap+28], at acc[7]
- ld [$ap+32],$t0 ! in1_y
- ld [$ap+32+4],$t1
- ld [$ap+32+8],$t2
- ld [$ap+32+12],$t3
- ld [$ap+32+16],$t4
- ld [$ap+32+20],$t5
- ld [$ap+32+24],$t6
- ld [$ap+32+28],$t7
- or @acc[1], at acc[0], at acc[0]
- or @acc[3], at acc[2], at acc[2]
- or @acc[5], at acc[4], at acc[4]
- or @acc[7], at acc[6], at acc[6]
- or @acc[2], at acc[0], at acc[0]
- or @acc[6], at acc[4], at acc[4]
- or @acc[4], at acc[0], at acc[0]
+ ld [$ap+64],$t0 ! in1_z
+ ld [$ap+64+4],$t1
+ ld [$ap+64+8],$t2
+ ld [$ap+64+12],$t3
+ ld [$ap+64+16],$t4
+ ld [$ap+64+20],$t5
+ ld [$ap+64+24],$t6
+ ld [$ap+64+28],$t7
or $t1,$t0,$t0
or $t3,$t2,$t2
or $t5,$t4,$t4
or $t7,$t6,$t6
or $t2,$t0,$t0
or $t6,$t4,$t4
- or $t4,$t0,$t0
- or @acc[0],$t0,$t0 ! !in1infty
+ or $t4,$t0,$t0 ! !in1infty
movrnz $t0,-1,$t0
st $t0,[%fp+STACK_BIAS-16]
@@ -2402,16 +2354,6 @@ ecp_nistz256_point_add_vis3:
stx $acc2,[%sp+LOCALS64+$in2_y+16]
stx $acc3,[%sp+LOCALS64+$in2_y+24]
- or $a1,$a0,$a0
- or $a3,$a2,$a2
- or $acc1,$acc0,$acc0
- or $acc3,$acc2,$acc2
- or $a2,$a0,$a0
- or $acc2,$acc0,$acc0
- or $acc0,$a0,$a0
- movrnz $a0,-1,$a0 ! !in2infty
- stx $a0,[%fp+STACK_BIAS-8]
-
ld [$bp+64],$acc0 ! in2_z
ld [$bp+64+4],$t0
ld [$bp+64+8],$acc1
@@ -2445,6 +2387,12 @@ ecp_nistz256_point_add_vis3:
stx $acc2,[%sp+LOCALS64+$in2_z+16]
stx $acc3,[%sp+LOCALS64+$in2_z+24]
+ or $acc1,$acc0,$acc0
+ or $acc3,$acc2,$acc2
+ or $acc2,$acc0,$acc0
+ movrnz $acc0,-1,$acc0 ! !in2infty
+ stx $acc0,[%fp+STACK_BIAS-8]
+
or $a0,$t0,$a0
ld [$ap+32],$acc0 ! in1_y
or $a1,$t1,$a1
@@ -2474,16 +2422,6 @@ ecp_nistz256_point_add_vis3:
stx $acc2,[%sp+LOCALS64+$in1_y+16]
stx $acc3,[%sp+LOCALS64+$in1_y+24]
- or $a1,$a0,$a0
- or $a3,$a2,$a2
- or $acc1,$acc0,$acc0
- or $acc3,$acc2,$acc2
- or $a2,$a0,$a0
- or $acc2,$acc0,$acc0
- or $acc0,$a0,$a0
- movrnz $a0,-1,$a0 ! !in1infty
- stx $a0,[%fp+STACK_BIAS-16]
-
ldx [%sp+LOCALS64+$in2_z],$a0 ! forward load
ldx [%sp+LOCALS64+$in2_z+8],$a1
ldx [%sp+LOCALS64+$in2_z+16],$a2
@@ -2510,6 +2448,12 @@ ecp_nistz256_point_add_vis3:
stx $acc2,[%sp+LOCALS64+$in1_z+16]
stx $acc3,[%sp+LOCALS64+$in1_z+24]
+ or $acc1,$acc0,$acc0
+ or $acc3,$acc2,$acc2
+ or $acc2,$acc0,$acc0
+ movrnz $acc0,-1,$acc0 ! !in1infty
+ stx $acc0,[%fp+STACK_BIAS-16]
+
call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z2sqr, in2_z);
add %sp,LOCALS64+$Z2sqr,$rp
@@ -2871,16 +2815,6 @@ ecp_nistz256_point_add_affine_vis3:
stx $acc2,[%sp+LOCALS64+$in1_y+16]
stx $acc3,[%sp+LOCALS64+$in1_y+24]
- or $a1,$a0,$a0
- or $a3,$a2,$a2
- or $acc1,$acc0,$acc0
- or $acc3,$acc2,$acc2
- or $a2,$a0,$a0
- or $acc2,$acc0,$acc0
- or $acc0,$a0,$a0
- movrnz $a0,-1,$a0 ! !in1infty
- stx $a0,[%fp+STACK_BIAS-16]
-
ld [$ap+64],$a0 ! in1_z
ld [$ap+64+4],$t0
ld [$ap+64+8],$a1
@@ -2902,6 +2836,12 @@ ecp_nistz256_point_add_affine_vis3:
stx $a2,[%sp+LOCALS64+$in1_z+16]
stx $a3,[%sp+LOCALS64+$in1_z+24]
+ or $a1,$a0,$t0
+ or $a3,$a2,$t2
+ or $t2,$t0,$t0
+ movrnz $t0,-1,$t0 ! !in1infty
+ stx $t0,[%fp+STACK_BIAS-16]
+
call __ecp_nistz256_sqr_mont_vis3 ! p256_sqr_mont(Z1sqr, in1_z);
add %sp,LOCALS64+$Z1sqr,$rp
diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl b/crypto/ec/asm/ecp_nistz256-x86.pl
index b96b1aa..1d9e006 100755
--- a/crypto/ec/asm/ecp_nistz256-x86.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86.pl
@@ -1405,14 +1405,14 @@ for ($i=0;$i<7;$i++) {
&mov ("edx",&DWP($i+12,"esi"));
&mov (&DWP($i+0,"edi"),"eax");
&mov (&DWP(32*18+12,"esp"),"ebp") if ($i==0);
- &mov ("ebp","eax") if ($i==0);
- &or ("ebp","eax") if ($i!=0 && $i<64);
+ &mov ("ebp","eax") if ($i==64);
+ &or ("ebp","eax") if ($i>64);
&mov (&DWP($i+4,"edi"),"ebx");
- &or ("ebp","ebx") if ($i<64);
+ &or ("ebp","ebx") if ($i>=64);
&mov (&DWP($i+8,"edi"),"ecx");
- &or ("ebp","ecx") if ($i<64);
+ &or ("ebp","ecx") if ($i>=64);
&mov (&DWP($i+12,"edi"),"edx");
- &or ("ebp","edx") if ($i<64);
+ &or ("ebp","edx") if ($i>=64);
}
&xor ("eax","eax");
&mov ("esi",&wparam(1));
@@ -1428,14 +1428,14 @@ for ($i=0;$i<7;$i++) {
&mov ("ecx",&DWP($i+8,"esi"));
&mov ("edx",&DWP($i+12,"esi"));
&mov (&DWP($i+0,"edi"),"eax");
- &mov ("ebp","eax") if ($i==0);
- &or ("ebp","eax") if ($i!=0 && $i<64);
+ &mov ("ebp","eax") if ($i==64);
+ &or ("ebp","eax") if ($i>64);
&mov (&DWP($i+4,"edi"),"ebx");
- &or ("ebp","ebx") if ($i<64);
+ &or ("ebp","ebx") if ($i>=64);
&mov (&DWP($i+8,"edi"),"ecx");
- &or ("ebp","ecx") if ($i<64);
+ &or ("ebp","ecx") if ($i>=64);
&mov (&DWP($i+12,"edi"),"edx");
- &or ("ebp","edx") if ($i<64);
+ &or ("ebp","edx") if ($i>=64);
}
&xor ("eax","eax");
&sub ("eax","ebp");
@@ -1684,14 +1684,14 @@ for ($i=0;$i<7;$i++) {
&mov ("edx",&DWP($i+12,"esi"));
&mov (&DWP($i+0,"edi"),"eax");
&mov (&DWP(32*15+8,"esp"),"ebp") if ($i==0);
- &mov ("ebp","eax") if ($i==0);
- &or ("ebp","eax") if ($i!=0 && $i<64);
+ &mov ("ebp","eax") if ($i==64);
+ &or ("ebp","eax") if ($i>64);
&mov (&DWP($i+4,"edi"),"ebx");
- &or ("ebp","ebx") if ($i<64);
+ &or ("ebp","ebx") if ($i>=64);
&mov (&DWP($i+8,"edi"),"ecx");
- &or ("ebp","ecx") if ($i<64);
+ &or ("ebp","ecx") if ($i>=64);
&mov (&DWP($i+12,"edi"),"edx");
- &or ("ebp","edx") if ($i<64);
+ &or ("ebp","edx") if ($i>=64);
}
&xor ("eax","eax");
&mov ("esi",&wparam(2));
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl
index cc7b976..ddbbedf 100755
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -2294,16 +2294,14 @@ $code.=<<___;
mov $b_org, $a_ptr # reassign
movdqa %xmm0, $in1_x(%rsp)
movdqa %xmm1, $in1_x+0x10(%rsp)
- por %xmm0, %xmm1
movdqa %xmm2, $in1_y(%rsp)
movdqa %xmm3, $in1_y+0x10(%rsp)
- por %xmm2, %xmm3
movdqa %xmm4, $in1_z(%rsp)
movdqa %xmm5, $in1_z+0x10(%rsp)
- por %xmm1, %xmm3
+ por %xmm4, %xmm5
movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$b_ptr
- pshufd \$0xb1, %xmm3, %xmm5
+ pshufd \$0xb1, %xmm5, %xmm3
movdqu 0x10($a_ptr), %xmm1
movdqu 0x20($a_ptr), %xmm2
por %xmm3, %xmm5
@@ -2315,14 +2313,14 @@ $code.=<<___;
movdqa %xmm0, $in2_x(%rsp)
pshufd \$0x1e, %xmm5, %xmm4
movdqa %xmm1, $in2_x+0x10(%rsp)
- por %xmm0, %xmm1
- movq $r_ptr, %xmm0 # save $r_ptr
+ movdqu 0x40($a_ptr),%xmm0 # in2_z again
+ movdqu 0x50($a_ptr),%xmm1
movdqa %xmm2, $in2_y(%rsp)
movdqa %xmm3, $in2_y+0x10(%rsp)
- por %xmm2, %xmm3
por %xmm4, %xmm5
pxor %xmm4, %xmm4
- por %xmm1, %xmm3
+ por %xmm0, %xmm1
+ movq $r_ptr, %xmm0 # save $r_ptr
lea 0x40-$bias($a_ptr), $a_ptr # $a_ptr is still valid
mov $src0, $in2_z+8*0(%rsp) # make in2_z copy
@@ -2333,8 +2331,8 @@ $code.=<<___;
call __ecp_nistz256_sqr_mont$x # p256_sqr_mont(Z2sqr, in2_z);
pcmpeqd %xmm4, %xmm5
- pshufd \$0xb1, %xmm3, %xmm4
- por %xmm3, %xmm4
+ pshufd \$0xb1, %xmm1, %xmm4
+ por %xmm1, %xmm4
pshufd \$0, %xmm5, %xmm5 # in1infty
pshufd \$0x1e, %xmm4, %xmm3
por %xmm3, %xmm4
@@ -2666,16 +2664,14 @@ $code.=<<___;
mov 0x40+8*3($a_ptr), $acc0
movdqa %xmm0, $in1_x(%rsp)
movdqa %xmm1, $in1_x+0x10(%rsp)
- por %xmm0, %xmm1
movdqa %xmm2, $in1_y(%rsp)
movdqa %xmm3, $in1_y+0x10(%rsp)
- por %xmm2, %xmm3
movdqa %xmm4, $in1_z(%rsp)
movdqa %xmm5, $in1_z+0x10(%rsp)
- por %xmm1, %xmm3
+ por %xmm4, %xmm5
movdqu 0x00($b_ptr), %xmm0 # copy *(P256_POINT_AFFINE *)$b_ptr
- pshufd \$0xb1, %xmm3, %xmm5
+ pshufd \$0xb1, %xmm5, %xmm3
movdqu 0x10($b_ptr), %xmm1
movdqu 0x20($b_ptr), %xmm2
por %xmm3, %xmm5
diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c
index 564a889..dca3a2d 100644
--- a/crypto/ec/ecp_nistz256.c
+++ b/crypto/ec/ecp_nistz256.c
@@ -335,19 +335,16 @@ static void ecp_nistz256_point_add(P256_POINT *r,
const BN_ULONG *in2_y = b->Y;
const BN_ULONG *in2_z = b->Z;
- /* We encode infinity as (0,0), which is not on the curve,
- * so it is OK. */
- in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
- in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
+ /*
+ * Infinity in encoded as (,,0)
+ */
+ in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
if (P256_LIMBS == 8)
- in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
- in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
+ in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
- in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
- in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
+ in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]);
if (P256_LIMBS == 8)
- in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] |
- in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]);
+ in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]);
in1infty = is_zero(in1infty);
in2infty = is_zero(in2infty);
@@ -436,15 +433,16 @@ static void ecp_nistz256_point_add_affine(P256_POINT *r,
const BN_ULONG *in2_y = b->Y;
/*
- * In affine representation we encode infty as (0,0), which is not on the
- * curve, so it is OK
+ * Infinity in encoded as (,,0)
*/
- in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
- in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
+ in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
if (P256_LIMBS == 8)
- in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
- in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
+ in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
+ /*
+ * In affine representation we encode infinity as (0,0), which is
+ * not on the curve, so it is OK
+ */
in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
if (P256_LIMBS == 8)
@@ -1273,6 +1271,8 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP *group,
} else
#endif
{
+ BN_ULONG infty;
+
/* First window */
wvalue = (p_str[0] << 1) & mask;
idx += window_size;
@@ -1285,7 +1285,30 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP *group,
ecp_nistz256_neg(p.p.Z, p.p.Y);
copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
- memcpy(p.p.Z, ONE, sizeof(ONE));
+ /*
+ * Since affine infinity is encoded as (0,0) and
+ * Jacobian ias (,,0), we need to harmonize them
+ * by assigning "one" or zero to Z.
+ */
+ infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] |
+ p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]);
+ if (P256_LIMBS == 8)
+ infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] |
+ p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]);
+
+ infty = 0 - is_zero(infty);
+ infty = ~infty;
+
+ p.p.Z[0] = ONE[0] & infty;
+ p.p.Z[1] = ONE[1] & infty;
+ p.p.Z[2] = ONE[2] & infty;
+ p.p.Z[3] = ONE[3] & infty;
+ if (P256_LIMBS == 8) {
+ p.p.Z[4] = ONE[4] & infty;
+ p.p.Z[5] = ONE[5] & infty;
+ p.p.Z[6] = ONE[6] & infty;
+ p.p.Z[7] = ONE[7] & infty;
+ }
for (i = 1; i < 37; i++) {
unsigned int off = (idx - 1) / 8;
More information about the openssl-commits
mailing list