[openssl-commits] [openssl] master update
Andy Polyakov
appro at openssl.org
Tue Feb 23 20:23:33 UTC 2016
The branch master has been updated
via 50e34aaba390926e9d3bc9ffa4b7e3bab394db21 (commit)
via 143ee099e9cdffb256adc27cba583ec52454a29f (commit)
via d93753412b455907b4dc09427ccd2382209d9af2 (commit)
from e9fd82f624b158eb782da6a49725bb219247cb8c (commit)
- Log -----------------------------------------------------------------
commit 50e34aaba390926e9d3bc9ffa4b7e3bab394db21
Author: Andy Polyakov <appro at openssl.org>
Date: Sun Feb 21 21:05:50 2016 +0100
test/ectest.c: add regression test for RT#4284.
Reviewed-by: Rich Salz <rsalz at openssl.org>
commit 143ee099e9cdffb256adc27cba583ec52454a29f
Author: Andy Polyakov <appro at openssl.org>
Date: Sun Feb 21 21:04:26 2016 +0100
ec/asm/ecp_nistz256-*.pl: get corner case logic right.
RT#4284
Reviewed-by: Rich Salz <rsalz at openssl.org>
commit d93753412b455907b4dc09427ccd2382209d9af2
Author: Andy Polyakov <appro at openssl.org>
Date: Wed Feb 3 11:26:11 2016 +0100
ec/asm/ecp_nistz256-x86_64.pl: get corner case logic right.
RT#4284
Reviewed-by: Rich Salz <rsalz at openssl.org>
-----------------------------------------------------------------------
Summary of changes:
crypto/ec/asm/ecp_nistz256-armv4.pl | 17 ++++++++++++-----
crypto/ec/asm/ecp_nistz256-armv8.pl | 16 +++++++++++++---
crypto/ec/asm/ecp_nistz256-x86.pl | 10 +++++++++-
crypto/ec/asm/ecp_nistz256-x86_64.pl | 11 ++++++++++-
test/ectest.c | 15 +++++++++++++++
5 files changed, 59 insertions(+), 10 deletions(-)
diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl b/crypto/ec/asm/ecp_nistz256-armv4.pl
index 3a636ea..ab11a87 100755
--- a/crypto/ec/asm/ecp_nistz256-armv4.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv4.pl
@@ -1252,6 +1252,7 @@ ecp_nistz256_point_double:
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
sub sp,sp,#32*5
+.Lpoint_double_shortcut:
add r3,sp,#$in_x
ldmia $a_ptr!,{r4-r11} @ copy in_x
stmia r3,{r4-r11}
@@ -1371,7 +1372,7 @@ $code.=<<___;
.align 5
ecp_nistz256_point_add:
stmdb sp!,{r0-r12,lr} @ push from r0, unusual, but intentional
- sub sp,sp,#32*18
+ sub sp,sp,#32*18+16
ldmia $b_ptr!,{r4-r11} @ copy in2
add r3,sp,#$in2_x
@@ -1504,9 +1505,9 @@ ecp_nistz256_point_add:
tst $t0,$t1
beq .Ladd_proceed @ (in1infty || in2infty)?
tst $t2,$t2
- beq .Ladd_proceed @ is_equal(S1,S2)?
+ beq .Ladd_double @ is_equal(S1,S2)?
- ldr $r_ptr,[sp,#32*18]
+ ldr $r_ptr,[sp,#32*18+16]
eor r4,r4,r4
eor r5,r5,r5
eor r6,r6,r6
@@ -1521,6 +1522,12 @@ ecp_nistz256_point_add:
b .Ladd_done
.align 4
+.Ladd_double:
+ ldr $a_ptr,[sp,#32*18+20]
+ add sp,sp,#32*(18-5)+16 @ difference in frame sizes
+ b .Lpoint_double_shortcut
+
+.align 4
.Ladd_proceed:
add $a_ptr,sp,#$R
add $b_ptr,sp,#$R
@@ -1588,7 +1595,7 @@ ecp_nistz256_point_add:
add r3,sp,#$in1_x
and r11,r11,r12
mvn r12,r12
- ldr $r_ptr,[sp,#32*18]
+ ldr $r_ptr,[sp,#32*18+16]
___
for($i=0;$i<96;$i+=8) { # conditional moves
$code.=<<___;
@@ -1610,7 +1617,7 @@ ___
}
$code.=<<___;
.Ladd_done:
- add sp,sp,#32*18+16 @ +16 means "skip even over saved r0-r3"
+ add sp,sp,#32*18+16+16 @ +16 means "skip even over saved r0-r3"
#if __ARM_ARCH__>=5 || defined(__thumb__)
ldmia sp!,{r4-r12,pc}
#else
diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl
index ce6b69e..4b2e925 100644
--- a/crypto/ec/asm/ecp_nistz256-armv8.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv8.pl
@@ -691,12 +691,13 @@ $code.=<<___;
.type ecp_nistz256_point_double,%function
.align 5
ecp_nistz256_point_double:
- stp x29,x30,[sp,#-48]!
+ stp x29,x30,[sp,#-80]!
add x29,sp,#0
stp x19,x20,[sp,#16]
stp x21,x22,[sp,#32]
sub sp,sp,#32*4
+.Ldouble_shortcut:
ldp $acc0,$acc1,[$ap,#32]
mov $rp_real,$rp
ldp $acc2,$acc3,[$ap,#48]
@@ -823,7 +824,7 @@ ecp_nistz256_point_double:
add sp,x29,#0 // destroy frame
ldp x19,x20,[x29,#16]
ldp x21,x22,[x29,#32]
- ldp x29,x30,[sp],#48
+ ldp x29,x30,[sp],#80
ret
.size ecp_nistz256_point_double,.-ecp_nistz256_point_double
___
@@ -963,7 +964,7 @@ ecp_nistz256_point_add:
b.eq .Ladd_proceed // (in1infty || in2infty)?
tst $temp,$temp
- b.eq .Ladd_proceed // is_equal(S1,S2)?
+ b.eq .Ladd_double // is_equal(S1,S2)?
eor $a0,$a0,$a0
eor $a1,$a1,$a1
@@ -976,6 +977,15 @@ ecp_nistz256_point_add:
b .Ladd_done
.align 4
+.Ladd_double:
+ mov $ap,$ap_real
+ mov $rp,$rp_real
+ ldp x23,x24,[x29,#48]
+ ldp x25,x26,[x29,#64]
+ add sp,sp,#32*(12-4) // difference in stack frames
+ b .Ldouble_shortcut
+
+.align 4
.Ladd_proceed:
add $rp,sp,#$Rsqr
bl __ecp_nistz256_sqr_mont // p256_sqr_mont(Rsqr, R);
diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl b/crypto/ec/asm/ecp_nistz256-x86.pl
index 421ac0b..4d55f82 100755
--- a/crypto/ec/asm/ecp_nistz256-x86.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86.pl
@@ -1197,6 +1197,7 @@ for ($i=0;$i<7;$i++) {
########################################################################
# void ecp_nistz256_point_double(P256_POINT *out,const P256_POINT *inp);
#
+&static_label("point_double_shortcut");
&function_begin("ecp_nistz256_point_double");
{ my ($S,$M,$Zsqr,$in_x,$tmp0)=map(32*$_,(0..4));
@@ -1212,6 +1213,7 @@ for ($i=0;$i<7;$i++) {
&picmeup("edx","OPENSSL_ia32cap_P","eax",&label("pic"));
&mov ("ebp",&DWP(0,"edx")); }
+&set_label("point_double_shortcut");
&mov ("eax",&DWP(0,"esi")); # copy in_x
&mov ("ebx",&DWP(4,"esi"));
&mov ("ecx",&DWP(8,"esi"));
@@ -1491,7 +1493,7 @@ for ($i=0;$i<7;$i++) {
&mov ("ebx",&DWP(32*18+8,"esp"));
&jz (&label("add_proceed")); # (in1infty || in2infty)?
&test ("ebx","ebx");
- &jz (&label("add_proceed")); # is_equal(S1,S2)?
+ &jz (&label("add_double")); # is_equal(S1,S2)?
&mov ("edi",&wparam(0));
&xor ("eax","eax");
@@ -1499,6 +1501,12 @@ for ($i=0;$i<7;$i++) {
&data_byte(0xfc,0xf3,0xab); # cld; stosd
&jmp (&label("add_done"));
+&set_label("add_double",16);
+ &mov ("esi",&wparam(1));
+ &mov ("ebp",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy
+ &add ("esp",4*((8*18+5)-(8*5+1))); # difference in frame sizes
+ &jmp (&label("point_double_shortcut"));
+
&set_label("add_proceed",16);
&mov ("eax",&DWP(32*18+12,"esp")); # OPENSSL_ia32cap_P copy
&lea ("esi",&DWP($R,"esp"));
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl
index c2621c2..824d7b5 100755
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -2044,6 +2044,7 @@ $code.=<<___;
push %r15
sub \$32*5+8, %rsp
+.Lpoint_double_shortcut$x:
movdqu 0x00($a_ptr), %xmm0 # copy *(P256_POINT *)$a_ptr.x
mov $a_ptr, $b_ptr # backup copy
movdqu 0x10($a_ptr), %xmm1
@@ -2334,6 +2335,7 @@ $code.=<<___;
mov 0x40+8*1($b_ptr), $acc6
mov 0x40+8*2($b_ptr), $acc7
mov 0x40+8*3($b_ptr), $acc0
+ movq $b_ptr, %xmm1
lea 0x40-$bias($b_ptr), $a_ptr
lea $Z1sqr(%rsp), $r_ptr # Z1^2
@@ -2389,7 +2391,7 @@ $code.=<<___;
test $acc0, $acc0
jnz .Ladd_proceed$x # (in1infty || in2infty)?
test $acc1, $acc1
- jz .Ladd_proceed$x # is_equal(S1,S2)?
+ jz .Ladd_double$x # is_equal(S1,S2)?
movq %xmm0, $r_ptr # restore $r_ptr
pxor %xmm0, %xmm0
@@ -2402,6 +2404,13 @@ $code.=<<___;
jmp .Ladd_done$x
.align 32
+.Ladd_double$x:
+ movq %xmm1, $a_ptr # restore $a_ptr
+ movq %xmm0, $r_ptr # restore $r_ptr
+ add \$`32*(18-5)`, %rsp # difference in frame sizes
+ jmp .Lpoint_double_shortcut$x
+
+.align 32
.Ladd_proceed$x:
`&load_for_sqr("$R(%rsp)", "$src0")`
lea $Rsqr(%rsp), $r_ptr # R^2
diff --git a/test/ectest.c b/test/ectest.c
index 0abb545..b0fbcdc 100644
--- a/test/ectest.c
+++ b/test/ectest.c
@@ -119,6 +119,8 @@ static void group_order_tests(EC_GROUP *group)
BIGNUM *n1, *n2, *order;
EC_POINT *P = EC_POINT_new(group);
EC_POINT *Q = EC_POINT_new(group);
+ EC_POINT *R = EC_POINT_new(group);
+ EC_POINT *S = EC_POINT_new(group);
BN_CTX *ctx = BN_CTX_new();
int i;
@@ -198,6 +200,17 @@ static void group_order_tests(EC_GROUP *group)
/* Exercise EC_POINTs_mul, including corner cases. */
if (EC_POINT_is_at_infinity(group, P))
ABORT;
+
+ scalars[0] = scalars[1] = BN_value_one();
+ points[0] = points[1] = P;
+
+ if (!EC_POINTs_mul(group, R, NULL, 2, points, scalars, ctx))
+ ABORT;
+ if (!EC_POINT_dbl(group, S, points[0], ctx))
+ ABORT;
+ if (0 != EC_POINT_cmp(group, R, S, ctx))
+ ABORT;
+
scalars[0] = n1;
points[0] = Q; /* => infinity */
scalars[1] = n2;
@@ -219,6 +232,8 @@ static void group_order_tests(EC_GROUP *group)
EC_POINT_free(P);
EC_POINT_free(Q);
+ EC_POINT_free(R);
+ EC_POINT_free(S);
BN_free(n1);
BN_free(n2);
BN_free(order);
More information about the openssl-commits
mailing list