[openssl-commits] [openssl] master update

Matt Caswell matt at openssl.org
Wed Aug 24 09:57:06 UTC 2016


The branch master has been updated
       via  c74aea8d6ccdf07ce826a9451887739b8aa64096 (commit)
       via  e3057a57caf4274ea1fb074518e4714059dfcabf (commit)
      from  dfde4219fdebbb5a8a17602fea036f7690e517ea (commit)


- Log -----------------------------------------------------------------
commit c74aea8d6ccdf07ce826a9451887739b8aa64096
Author: Andy Polyakov <appro at openssl.org>
Date:   Fri Aug 19 23:18:35 2016 +0200

    ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity.
    
    RT#4625
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

commit e3057a57caf4274ea1fb074518e4714059dfcabf
Author: Andy Polyakov <appro at openssl.org>
Date:   Fri Aug 19 23:16:04 2016 +0200

    ec/ecp_nistz256: harmonize is_infinity with ec_GFp_simple_is_at_infinity.
    
    RT#4625
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>

-----------------------------------------------------------------------

Summary of changes:
 crypto/ec/asm/ecp_nistz256-armv4.pl   |  58 ++++---------
 crypto/ec/asm/ecp_nistz256-armv8.pl   |  76 ++++++-----------
 crypto/ec/asm/ecp_nistz256-sparcv9.pl | 150 ++++++++++------------------------
 crypto/ec/asm/ecp_nistz256-x86.pl     |  30 +++----
 crypto/ec/asm/ecp_nistz256-x86_64.pl  |  24 +++---
 crypto/ec/ecp_nistz256.c              |  57 +++++++++----
 6 files changed, 152 insertions(+), 243 deletions(-)

diff --git a/crypto/ec/asm/ecp_nistz256-armv4.pl b/crypto/ec/asm/ecp_nistz256-armv4.pl
index de3cd5c..2314b75 100755
--- a/crypto/ec/asm/ecp_nistz256-armv4.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv4.pl
@@ -1405,27 +1405,19 @@ ecp_nistz256_point_add:
 	stmdb	sp!,{r0-r12,lr}		@ push from r0, unusual, but intentional
 	sub	sp,sp,#32*18+16
 
-	ldmia	$b_ptr!,{r4-r11}	@ copy in2
+	ldmia	$b_ptr!,{r4-r11}	@ copy in2_x
 	add	r3,sp,#$in2_x
-	orr	r12,r4,r5
-	orr	r12,r12,r6
-	orr	r12,r12,r7
-	orr	r12,r12,r8
-	orr	r12,r12,r9
-	orr	r12,r12,r10
-	orr	r12,r12,r11
 	stmia	r3!,{r4-r11}
-	ldmia	$b_ptr!,{r4-r11}
-	orr	r12,r12,r4
-	orr	r12,r12,r5
+	ldmia	$b_ptr!,{r4-r11}	@ copy in2_y
+	stmia	r3!,{r4-r11}
+	ldmia	$b_ptr,{r4-r11}		@ copy in2_z
+	orr	r12,r4,r5
 	orr	r12,r12,r6
 	orr	r12,r12,r7
 	orr	r12,r12,r8
 	orr	r12,r12,r9
 	orr	r12,r12,r10
 	orr	r12,r12,r11
-	stmia	r3!,{r4-r11}
-	ldmia	$b_ptr,{r4-r11}
 	cmp	r12,#0
 #ifdef	__thumb2__
 	it	ne
@@ -1434,27 +1426,19 @@ ecp_nistz256_point_add:
 	stmia	r3,{r4-r11}
 	str	r12,[sp,#32*18+8]	@ !in2infty
 
-	ldmia	$a_ptr!,{r4-r11}	@ copy in1
+	ldmia	$a_ptr!,{r4-r11}	@ copy in1_x
 	add	r3,sp,#$in1_x
-	orr	r12,r4,r5
-	orr	r12,r12,r6
-	orr	r12,r12,r7
-	orr	r12,r12,r8
-	orr	r12,r12,r9
-	orr	r12,r12,r10
-	orr	r12,r12,r11
 	stmia	r3!,{r4-r11}
-	ldmia	$a_ptr!,{r4-r11}
-	orr	r12,r12,r4
-	orr	r12,r12,r5
+	ldmia	$a_ptr!,{r4-r11}	@ copy in1_y
+	stmia	r3!,{r4-r11}
+	ldmia	$a_ptr,{r4-r11}		@ copy in1_z
+	orr	r12,r4,r5
 	orr	r12,r12,r6
 	orr	r12,r12,r7
 	orr	r12,r12,r8
 	orr	r12,r12,r9
 	orr	r12,r12,r10
 	orr	r12,r12,r11
-	stmia	r3!,{r4-r11}
-	ldmia	$a_ptr,{r4-r11}
 	cmp	r12,#0
 #ifdef	__thumb2__
 	it	ne
@@ -1684,27 +1668,19 @@ ecp_nistz256_point_add_affine:
 	stmdb	sp!,{r0-r12,lr}		@ push from r0, unusual, but intentional
 	sub	sp,sp,#32*15
 
-	ldmia	$a_ptr!,{r4-r11}	@ copy in1
+	ldmia	$a_ptr!,{r4-r11}	@ copy in1_x
 	add	r3,sp,#$in1_x
-	orr	r12,r4,r5
-	orr	r12,r12,r6
-	orr	r12,r12,r7
-	orr	r12,r12,r8
-	orr	r12,r12,r9
-	orr	r12,r12,r10
-	orr	r12,r12,r11
 	stmia	r3!,{r4-r11}
-	ldmia	$a_ptr!,{r4-r11}
-	orr	r12,r12,r4
-	orr	r12,r12,r5
+	ldmia	$a_ptr!,{r4-r11}	@ copy in1_y
+	stmia	r3!,{r4-r11}
+	ldmia	$a_ptr,{r4-r11}		@ copy in1_z
+	orr	r12,r4,r5
 	orr	r12,r12,r6
 	orr	r12,r12,r7
 	orr	r12,r12,r8
 	orr	r12,r12,r9
 	orr	r12,r12,r10
 	orr	r12,r12,r11
-	stmia	r3!,{r4-r11}
-	ldmia	$a_ptr,{r4-r11}
 	cmp	r12,#0
 #ifdef	__thumb2__
 	it	ne
@@ -1713,7 +1689,7 @@ ecp_nistz256_point_add_affine:
 	stmia	r3,{r4-r11}
 	str	r12,[sp,#32*15+4]	@ !in1infty
 
-	ldmia	$b_ptr!,{r4-r11}	@ copy in2
+	ldmia	$b_ptr!,{r4-r11}	@ copy in2_x
 	add	r3,sp,#$in2_x
 	orr	r12,r4,r5
 	orr	r12,r12,r6
@@ -1723,7 +1699,7 @@ ecp_nistz256_point_add_affine:
 	orr	r12,r12,r10
 	orr	r12,r12,r11
 	stmia	r3!,{r4-r11}
-	ldmia	$b_ptr!,{r4-r11}
+	ldmia	$b_ptr!,{r4-r11}	@ copy in2_y
 	orr	r12,r12,r4
 	orr	r12,r12,r5
 	orr	r12,r12,r6
diff --git a/crypto/ec/asm/ecp_nistz256-armv8.pl b/crypto/ec/asm/ecp_nistz256-armv8.pl
index 1362586..cdc9161 100644
--- a/crypto/ec/asm/ecp_nistz256-armv8.pl
+++ b/crypto/ec/asm/ecp_nistz256-armv8.pl
@@ -862,46 +862,28 @@ ecp_nistz256_point_add:
 	stp	x25,x26,[sp,#64]
 	sub	sp,sp,#32*12
 
-	ldp	$a0,$a1,[$bp]
-	ldp	$a2,$a3,[$bp,#16]
-	ldp	$t0,$t1,[$bp,#32]
-	ldp	$t2,$t3,[$bp,#48]
+	ldp	$a0,$a1,[$bp,#64]	// in2_z
+	ldp	$a2,$a3,[$bp,#64+16]
 	 mov	$rp_real,$rp
 	 mov	$ap_real,$ap
 	 mov	$bp_real,$bp
-	orr	$a0,$a0,$a1
-	orr	$a2,$a2,$a3
-	 ldp	$acc0,$acc1,[$ap]
-	orr	$t0,$t0,$t1
-	orr	$t2,$t2,$t3
-	 ldp	$acc2,$acc3,[$ap,#16]
-	orr	$a0,$a0,$a2
-	orr	$t2,$t0,$t2
-	 ldp	$t0,$t1,[$ap,#32]
-	orr	$in2infty,$a0,$t2
-	cmp	$in2infty,#0
-	 ldp	$t2,$t3,[$ap,#48]
-	csetm	$in2infty,ne		// !in2infty
-
-	 ldp	$a0,$a1,[$bp_real,#64]	// forward load for p256_sqr_mont
-	orr	$acc0,$acc0,$acc1
-	orr	$acc2,$acc2,$acc3
-	 ldp	$a2,$a3,[$bp_real,#64+16]
-	orr	$t0,$t0,$t1
-	orr	$t2,$t2,$t3
-	orr	$acc0,$acc0,$acc2
-	orr	$t0,$t0,$t2
-	orr	$in1infty,$acc0,$t0
-	cmp	$in1infty,#0
 	 ldr	$poly1,.Lpoly+8
 	 ldr	$poly3,.Lpoly+24
-	csetm	$in1infty,ne		// !in1infty
-
+	orr	$t0,$a0,$a1
+	orr	$t2,$a2,$a3
+	orr	$in2infty,$t0,$t2
+	cmp	$in2infty,#0
+	csetm	$in2infty,ne		// !in2infty
 	add	$rp,sp,#$Z2sqr
 	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z2sqr, in2_z);
 
-	ldp	$a0,$a1,[$ap_real,#64]
+	ldp	$a0,$a1,[$ap_real,#64]	// in1_z
 	ldp	$a2,$a3,[$ap_real,#64+16]
+	orr	$t0,$a0,$a1
+	orr	$t2,$a2,$a3
+	orr	$in1infty,$t0,$t2
+	cmp	$in1infty,#0
+	csetm	$in1infty,ne		// !in1infty
 	add	$rp,sp,#$Z1sqr
 	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z1sqr, in1_z);
 
@@ -1150,36 +1132,28 @@ ecp_nistz256_point_add_affine:
 	ldr	$poly1,.Lpoly+8
 	ldr	$poly3,.Lpoly+24
 
-	ldp	$a0,$a1,[$ap]
-	ldp	$a2,$a3,[$ap,#16]
-	ldp	$t0,$t1,[$ap,#32]
-	ldp	$t2,$t3,[$ap,#48]
-	orr	$a0,$a0,$a1
-	orr	$a2,$a2,$a3
-	orr	$t0,$t0,$t1
-	orr	$t2,$t2,$t3
-	orr	$a0,$a0,$a2
-	orr	$t0,$t0,$t2
-	orr	$in1infty,$a0,$t0
+	ldp	$a0,$a1,[$ap,#64]	// in1_z
+	ldp	$a2,$a3,[$ap,#64+16]
+	orr	$t0,$a0,$a1
+	orr	$t2,$a2,$a3
+	orr	$in1infty,$t0,$t2
 	cmp	$in1infty,#0
 	csetm	$in1infty,ne		// !in1infty
 
-	ldp	$a0,$a1,[$bp]
-	ldp	$a2,$a3,[$bp,#16]
-	ldp	$t0,$t1,[$bp,#32]
+	ldp	$acc0,$acc1,[$bp]	// in2_x
+	ldp	$acc2,$acc3,[$bp,#16]
+	ldp	$t0,$t1,[$bp,#32]	// in2_y
 	ldp	$t2,$t3,[$bp,#48]
-	orr	$a0,$a0,$a1
-	orr	$a2,$a2,$a3
+	orr	$acc0,$acc0,$acc1
+	orr	$acc2,$acc2,$acc3
 	orr	$t0,$t0,$t1
 	orr	$t2,$t2,$t3
-	orr	$a0,$a0,$a2
+	orr	$acc0,$acc0,$acc2
 	orr	$t0,$t0,$t2
-	orr	$in2infty,$a0,$t0
+	orr	$in2infty,$acc0,$t0
 	cmp	$in2infty,#0
 	csetm	$in2infty,ne		// !in2infty
 
-	ldp	$a0,$a1,[$ap_real,#64]
-	ldp	$a2,$a3,[$ap_real,#64+16]
 	add	$rp,sp,#$Z1sqr
 	bl	__ecp_nistz256_sqr_mont	// p256_sqr_mont(Z1sqr, in1_z);
 
diff --git a/crypto/ec/asm/ecp_nistz256-sparcv9.pl b/crypto/ec/asm/ecp_nistz256-sparcv9.pl
index 3c7ff50..97201cb 100755
--- a/crypto/ec/asm/ecp_nistz256-sparcv9.pl
+++ b/crypto/ec/asm/ecp_nistz256-sparcv9.pl
@@ -899,71 +899,39 @@ ecp_nistz256_point_add:
 	mov	$ap,$ap_real
 	mov	$bp,$bp_real
 
-	ld	[$bp], at acc[0]		! in2_x
-	ld	[$bp+4], at acc[1]
-	ld	[$bp+8], at acc[2]
-	ld	[$bp+12], at acc[3]
-	ld	[$bp+16], at acc[4]
-	ld	[$bp+20], at acc[5]
-	ld	[$bp+24], at acc[6]
-	ld	[$bp+28], at acc[7]
-	ld	[$bp+32],$t0		! in2_y
-	ld	[$bp+32+4],$t1
-	ld	[$bp+32+8],$t2
-	ld	[$bp+32+12],$t3
-	ld	[$bp+32+16],$t4
-	ld	[$bp+32+20],$t5
-	ld	[$bp+32+24],$t6
-	ld	[$bp+32+28],$t7
-	or	@acc[1], at acc[0], at acc[0]
-	or	@acc[3], at acc[2], at acc[2]
-	or	@acc[5], at acc[4], at acc[4]
-	or	@acc[7], at acc[6], at acc[6]
-	or	@acc[2], at acc[0], at acc[0]
-	or	@acc[6], at acc[4], at acc[4]
-	or	@acc[4], at acc[0], at acc[0]
+	ld	[$bp+64],$t0		! in2_z
+	ld	[$bp+64+4],$t1
+	ld	[$bp+64+8],$t2
+	ld	[$bp+64+12],$t3
+	ld	[$bp+64+16],$t4
+	ld	[$bp+64+20],$t5
+	ld	[$bp+64+24],$t6
+	ld	[$bp+64+28],$t7
 	or	$t1,$t0,$t0
 	or	$t3,$t2,$t2
 	or	$t5,$t4,$t4
 	or	$t7,$t6,$t6
 	or	$t2,$t0,$t0
 	or	$t6,$t4,$t4
-	or	$t4,$t0,$t0
-	or	@acc[0],$t0,$t0		! !in2infty
+	or	$t4,$t0,$t0		! !in2infty
 	movrnz	$t0,-1,$t0
 	st	$t0,[%fp+STACK_BIAS-12]
 
-	ld	[$ap], at acc[0]		! in1_x
-	ld	[$ap+4], at acc[1]
-	ld	[$ap+8], at acc[2]
-	ld	[$ap+12], at acc[3]
-	ld	[$ap+16], at acc[4]
-	ld	[$ap+20], at acc[5]
-	ld	[$ap+24], at acc[6]
-	ld	[$ap+28], at acc[7]
-	ld	[$ap+32],$t0		! in1_y
-	ld	[$ap+32+4],$t1
-	ld	[$ap+32+8],$t2
-	ld	[$ap+32+12],$t3
-	ld	[$ap+32+16],$t4
-	ld	[$ap+32+20],$t5
-	ld	[$ap+32+24],$t6
-	ld	[$ap+32+28],$t7
-	or	@acc[1], at acc[0], at acc[0]
-	or	@acc[3], at acc[2], at acc[2]
-	or	@acc[5], at acc[4], at acc[4]
-	or	@acc[7], at acc[6], at acc[6]
-	or	@acc[2], at acc[0], at acc[0]
-	or	@acc[6], at acc[4], at acc[4]
-	or	@acc[4], at acc[0], at acc[0]
+	ld	[$ap+64],$t0		! in1_z
+	ld	[$ap+64+4],$t1
+	ld	[$ap+64+8],$t2
+	ld	[$ap+64+12],$t3
+	ld	[$ap+64+16],$t4
+	ld	[$ap+64+20],$t5
+	ld	[$ap+64+24],$t6
+	ld	[$ap+64+28],$t7
 	or	$t1,$t0,$t0
 	or	$t3,$t2,$t2
 	or	$t5,$t4,$t4
 	or	$t7,$t6,$t6
 	or	$t2,$t0,$t0
 	or	$t6,$t4,$t4
-	or	$t4,$t0,$t0
-	or	@acc[0],$t0,$t0		! !in1infty
+	or	$t4,$t0,$t0		! !in1infty
 	movrnz	$t0,-1,$t0
 	st	$t0,[%fp+STACK_BIAS-16]
 
@@ -1201,37 +1169,21 @@ ecp_nistz256_point_add_affine:
 	mov	$ap,$ap_real
 	mov	$bp,$bp_real
 
-	ld	[$ap], at acc[0]		! in1_x
-	ld	[$ap+4], at acc[1]
-	ld	[$ap+8], at acc[2]
-	ld	[$ap+12], at acc[3]
-	ld	[$ap+16], at acc[4]
-	ld	[$ap+20], at acc[5]
-	ld	[$ap+24], at acc[6]
-	ld	[$ap+28], at acc[7]
-	ld	[$ap+32],$t0		! in1_y
-	ld	[$ap+32+4],$t1
-	ld	[$ap+32+8],$t2
-	ld	[$ap+32+12],$t3
-	ld	[$ap+32+16],$t4
-	ld	[$ap+32+20],$t5
-	ld	[$ap+32+24],$t6
-	ld	[$ap+32+28],$t7
-	or	@acc[1], at acc[0], at acc[0]
-	or	@acc[3], at acc[2], at acc[2]
-	or	@acc[5], at acc[4], at acc[4]
-	or	@acc[7], at acc[6], at acc[6]
-	or	@acc[2], at acc[0], at acc[0]
-	or	@acc[6], at acc[4], at acc[4]
-	or	@acc[4], at acc[0], at acc[0]
+	ld	[$ap+64],$t0		! in1_z
+	ld	[$ap+64+4],$t1
+	ld	[$ap+64+8],$t2
+	ld	[$ap+64+12],$t3
+	ld	[$ap+64+16],$t4
+	ld	[$ap+64+20],$t5
+	ld	[$ap+64+24],$t6
+	ld	[$ap+64+28],$t7
 	or	$t1,$t0,$t0
 	or	$t3,$t2,$t2
 	or	$t5,$t4,$t4
 	or	$t7,$t6,$t6
 	or	$t2,$t0,$t0
 	or	$t6,$t4,$t4
-	or	$t4,$t0,$t0
-	or	@acc[0],$t0,$t0		! !in1infty
+	or	$t4,$t0,$t0		! !in1infty
 	movrnz	$t0,-1,$t0
 	st	$t0,[%fp+STACK_BIAS-16]
 
@@ -2402,16 +2354,6 @@ ecp_nistz256_point_add_vis3:
 	stx	$acc2,[%sp+LOCALS64+$in2_y+16]
 	stx	$acc3,[%sp+LOCALS64+$in2_y+24]
 
-	or	$a1,$a0,$a0
-	or	$a3,$a2,$a2
-	or	$acc1,$acc0,$acc0
-	or	$acc3,$acc2,$acc2
-	or	$a2,$a0,$a0
-	or	$acc2,$acc0,$acc0
-	or	$acc0,$a0,$a0
-	movrnz	$a0,-1,$a0			! !in2infty
-	stx	$a0,[%fp+STACK_BIAS-8]
-
 	ld	[$bp+64],$acc0			! in2_z
 	ld	[$bp+64+4],$t0
 	ld	[$bp+64+8],$acc1
@@ -2445,6 +2387,12 @@ ecp_nistz256_point_add_vis3:
 	stx	$acc2,[%sp+LOCALS64+$in2_z+16]
 	stx	$acc3,[%sp+LOCALS64+$in2_z+24]
 
+	or	$acc1,$acc0,$acc0
+	or	$acc3,$acc2,$acc2
+	or	$acc2,$acc0,$acc0
+	movrnz	$acc0,-1,$acc0			! !in2infty
+	stx	$acc0,[%fp+STACK_BIAS-8]
+
 	or	$a0,$t0,$a0
 	ld	[$ap+32],$acc0			! in1_y
 	or	$a1,$t1,$a1
@@ -2474,16 +2422,6 @@ ecp_nistz256_point_add_vis3:
 	stx	$acc2,[%sp+LOCALS64+$in1_y+16]
 	stx	$acc3,[%sp+LOCALS64+$in1_y+24]
 
-	or	$a1,$a0,$a0
-	or	$a3,$a2,$a2
-	or	$acc1,$acc0,$acc0
-	or	$acc3,$acc2,$acc2
-	or	$a2,$a0,$a0
-	or	$acc2,$acc0,$acc0
-	or	$acc0,$a0,$a0
-	movrnz	$a0,-1,$a0			! !in1infty
-	stx	$a0,[%fp+STACK_BIAS-16]
-
 	ldx	[%sp+LOCALS64+$in2_z],$a0	! forward load
 	ldx	[%sp+LOCALS64+$in2_z+8],$a1
 	ldx	[%sp+LOCALS64+$in2_z+16],$a2
@@ -2510,6 +2448,12 @@ ecp_nistz256_point_add_vis3:
 	stx	$acc2,[%sp+LOCALS64+$in1_z+16]
 	stx	$acc3,[%sp+LOCALS64+$in1_z+24]
 
+	or	$acc1,$acc0,$acc0
+	or	$acc3,$acc2,$acc2
+	or	$acc2,$acc0,$acc0
+	movrnz	$acc0,-1,$acc0			! !in1infty
+	stx	$acc0,[%fp+STACK_BIAS-16]
+
 	call	__ecp_nistz256_sqr_mont_vis3	! p256_sqr_mont(Z2sqr, in2_z);
 	add	%sp,LOCALS64+$Z2sqr,$rp
 
@@ -2871,16 +2815,6 @@ ecp_nistz256_point_add_affine_vis3:
 	stx	$acc2,[%sp+LOCALS64+$in1_y+16]
 	stx	$acc3,[%sp+LOCALS64+$in1_y+24]
 
-	or	$a1,$a0,$a0
-	or	$a3,$a2,$a2
-	or	$acc1,$acc0,$acc0
-	or	$acc3,$acc2,$acc2
-	or	$a2,$a0,$a0
-	or	$acc2,$acc0,$acc0
-	or	$acc0,$a0,$a0
-	movrnz	$a0,-1,$a0			! !in1infty
-	stx	$a0,[%fp+STACK_BIAS-16]
-
 	ld	[$ap+64],$a0			! in1_z
 	ld	[$ap+64+4],$t0
 	ld	[$ap+64+8],$a1
@@ -2902,6 +2836,12 @@ ecp_nistz256_point_add_affine_vis3:
 	stx	$a2,[%sp+LOCALS64+$in1_z+16]
 	stx	$a3,[%sp+LOCALS64+$in1_z+24]
 
+	or	$a1,$a0,$t0
+	or	$a3,$a2,$t2
+	or	$t2,$t0,$t0
+	movrnz	$t0,-1,$t0			! !in1infty
+	stx	$t0,[%fp+STACK_BIAS-16]
+
 	call	__ecp_nistz256_sqr_mont_vis3	! p256_sqr_mont(Z1sqr, in1_z);
 	add	%sp,LOCALS64+$Z1sqr,$rp
 
diff --git a/crypto/ec/asm/ecp_nistz256-x86.pl b/crypto/ec/asm/ecp_nistz256-x86.pl
index b96b1aa..1d9e006 100755
--- a/crypto/ec/asm/ecp_nistz256-x86.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86.pl
@@ -1405,14 +1405,14 @@ for ($i=0;$i<7;$i++) {
 	&mov	("edx",&DWP($i+12,"esi"));
 	&mov	(&DWP($i+0,"edi"),"eax");
 	&mov	(&DWP(32*18+12,"esp"),"ebp")	if ($i==0);
-	&mov	("ebp","eax")			if ($i==0);
-	&or	("ebp","eax")			if ($i!=0 && $i<64);
+	&mov	("ebp","eax")			if ($i==64);
+	&or	("ebp","eax")			if ($i>64);
 	&mov	(&DWP($i+4,"edi"),"ebx");
-	&or	("ebp","ebx")			if ($i<64);
+	&or	("ebp","ebx")			if ($i>=64);
 	&mov	(&DWP($i+8,"edi"),"ecx");
-	&or	("ebp","ecx")			if ($i<64);
+	&or	("ebp","ecx")			if ($i>=64);
 	&mov	(&DWP($i+12,"edi"),"edx");
-	&or	("ebp","edx")			if ($i<64);
+	&or	("ebp","edx")			if ($i>=64);
     }
 	&xor	("eax","eax");
 	&mov	("esi",&wparam(1));
@@ -1428,14 +1428,14 @@ for ($i=0;$i<7;$i++) {
 	&mov	("ecx",&DWP($i+8,"esi"));
 	&mov	("edx",&DWP($i+12,"esi"));
 	&mov	(&DWP($i+0,"edi"),"eax");
-	&mov	("ebp","eax")			if ($i==0);
-	&or	("ebp","eax")			if ($i!=0 && $i<64);
+	&mov	("ebp","eax")			if ($i==64);
+	&or	("ebp","eax")			if ($i>64);
 	&mov	(&DWP($i+4,"edi"),"ebx");
-	&or	("ebp","ebx")			if ($i<64);
+	&or	("ebp","ebx")			if ($i>=64);
 	&mov	(&DWP($i+8,"edi"),"ecx");
-	&or	("ebp","ecx")			if ($i<64);
+	&or	("ebp","ecx")			if ($i>=64);
 	&mov	(&DWP($i+12,"edi"),"edx");
-	&or	("ebp","edx")			if ($i<64);
+	&or	("ebp","edx")			if ($i>=64);
     }
 	&xor	("eax","eax");
 	&sub	("eax","ebp");
@@ -1684,14 +1684,14 @@ for ($i=0;$i<7;$i++) {
 	&mov	("edx",&DWP($i+12,"esi"));
 	&mov	(&DWP($i+0,"edi"),"eax");
 	&mov	(&DWP(32*15+8,"esp"),"ebp")	if ($i==0);
-	&mov	("ebp","eax")			if ($i==0);
-	&or	("ebp","eax")			if ($i!=0 && $i<64);
+	&mov	("ebp","eax")			if ($i==64);
+	&or	("ebp","eax")			if ($i>64);
 	&mov	(&DWP($i+4,"edi"),"ebx");
-	&or	("ebp","ebx")			if ($i<64);
+	&or	("ebp","ebx")			if ($i>=64);
 	&mov	(&DWP($i+8,"edi"),"ecx");
-	&or	("ebp","ecx")			if ($i<64);
+	&or	("ebp","ecx")			if ($i>=64);
 	&mov	(&DWP($i+12,"edi"),"edx");
-	&or	("ebp","edx")			if ($i<64);
+	&or	("ebp","edx")			if ($i>=64);
     }
 	&xor	("eax","eax");
 	&mov	("esi",&wparam(2));
diff --git a/crypto/ec/asm/ecp_nistz256-x86_64.pl b/crypto/ec/asm/ecp_nistz256-x86_64.pl
index cc7b976..ddbbedf 100755
--- a/crypto/ec/asm/ecp_nistz256-x86_64.pl
+++ b/crypto/ec/asm/ecp_nistz256-x86_64.pl
@@ -2294,16 +2294,14 @@ $code.=<<___;
 	mov	$b_org, $a_ptr			# reassign
 	movdqa	%xmm0, $in1_x(%rsp)
 	movdqa	%xmm1, $in1_x+0x10(%rsp)
-	por	%xmm0, %xmm1
 	movdqa	%xmm2, $in1_y(%rsp)
 	movdqa	%xmm3, $in1_y+0x10(%rsp)
-	por	%xmm2, %xmm3
 	movdqa	%xmm4, $in1_z(%rsp)
 	movdqa	%xmm5, $in1_z+0x10(%rsp)
-	por	%xmm1, %xmm3
+	por	%xmm4, %xmm5
 
 	movdqu	0x00($a_ptr), %xmm0		# copy	*(P256_POINT *)$b_ptr
-	 pshufd	\$0xb1, %xmm3, %xmm5
+	 pshufd	\$0xb1, %xmm5, %xmm3
 	movdqu	0x10($a_ptr), %xmm1
 	movdqu	0x20($a_ptr), %xmm2
 	 por	%xmm3, %xmm5
@@ -2315,14 +2313,14 @@ $code.=<<___;
 	movdqa	%xmm0, $in2_x(%rsp)
 	 pshufd	\$0x1e, %xmm5, %xmm4
 	movdqa	%xmm1, $in2_x+0x10(%rsp)
-	por	%xmm0, %xmm1
-	 movq	$r_ptr, %xmm0			# save $r_ptr
+	movdqu	0x40($a_ptr),%xmm0		# in2_z again
+	movdqu	0x50($a_ptr),%xmm1
 	movdqa	%xmm2, $in2_y(%rsp)
 	movdqa	%xmm3, $in2_y+0x10(%rsp)
-	por	%xmm2, %xmm3
 	 por	%xmm4, %xmm5
 	 pxor	%xmm4, %xmm4
-	por	%xmm1, %xmm3
+	por	%xmm0, %xmm1
+	 movq	$r_ptr, %xmm0			# save $r_ptr
 
 	lea	0x40-$bias($a_ptr), $a_ptr	# $a_ptr is still valid
 	 mov	$src0, $in2_z+8*0(%rsp)		# make in2_z copy
@@ -2333,8 +2331,8 @@ $code.=<<___;
 	call	__ecp_nistz256_sqr_mont$x	# p256_sqr_mont(Z2sqr, in2_z);
 
 	pcmpeqd	%xmm4, %xmm5
-	pshufd	\$0xb1, %xmm3, %xmm4
-	por	%xmm3, %xmm4
+	pshufd	\$0xb1, %xmm1, %xmm4
+	por	%xmm1, %xmm4
 	pshufd	\$0, %xmm5, %xmm5		# in1infty
 	pshufd	\$0x1e, %xmm4, %xmm3
 	por	%xmm3, %xmm4
@@ -2666,16 +2664,14 @@ $code.=<<___;
 	 mov	0x40+8*3($a_ptr), $acc0
 	movdqa	%xmm0, $in1_x(%rsp)
 	movdqa	%xmm1, $in1_x+0x10(%rsp)
-	por	%xmm0, %xmm1
 	movdqa	%xmm2, $in1_y(%rsp)
 	movdqa	%xmm3, $in1_y+0x10(%rsp)
-	por	%xmm2, %xmm3
 	movdqa	%xmm4, $in1_z(%rsp)
 	movdqa	%xmm5, $in1_z+0x10(%rsp)
-	por	%xmm1, %xmm3
+	por	%xmm4, %xmm5
 
 	movdqu	0x00($b_ptr), %xmm0	# copy	*(P256_POINT_AFFINE *)$b_ptr
-	 pshufd	\$0xb1, %xmm3, %xmm5
+	 pshufd	\$0xb1, %xmm5, %xmm3
 	movdqu	0x10($b_ptr), %xmm1
 	movdqu	0x20($b_ptr), %xmm2
 	 por	%xmm3, %xmm5
diff --git a/crypto/ec/ecp_nistz256.c b/crypto/ec/ecp_nistz256.c
index 564a889..dca3a2d 100644
--- a/crypto/ec/ecp_nistz256.c
+++ b/crypto/ec/ecp_nistz256.c
@@ -335,19 +335,16 @@ static void ecp_nistz256_point_add(P256_POINT *r,
     const BN_ULONG *in2_y = b->Y;
     const BN_ULONG *in2_z = b->Z;
 
-    /* We encode infinity as (0,0), which is not on the curve,
-     * so it is OK. */
-    in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
-                in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
+    /*
+     * Infinity in encoded as (,,0)
+     */
+    in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
     if (P256_LIMBS == 8)
-        in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
-                     in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
+        in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
 
-    in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
-                in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
+    in2infty = (in2_z[0] | in2_z[1] | in2_z[2] | in2_z[3]);
     if (P256_LIMBS == 8)
-        in2infty |= (in2_x[4] | in2_x[5] | in2_x[6] | in2_x[7] |
-                     in2_y[4] | in2_y[5] | in2_y[6] | in2_y[7]);
+        in2infty |= (in2_z[4] | in2_z[5] | in2_z[6] | in2_z[7]);
 
     in1infty = is_zero(in1infty);
     in2infty = is_zero(in2infty);
@@ -436,15 +433,16 @@ static void ecp_nistz256_point_add_affine(P256_POINT *r,
     const BN_ULONG *in2_y = b->Y;
 
     /*
-     * In affine representation we encode infty as (0,0), which is not on the
-     * curve, so it is OK
+     * Infinity in encoded as (,,0)
      */
-    in1infty = (in1_x[0] | in1_x[1] | in1_x[2] | in1_x[3] |
-                in1_y[0] | in1_y[1] | in1_y[2] | in1_y[3]);
+    in1infty = (in1_z[0] | in1_z[1] | in1_z[2] | in1_z[3]);
     if (P256_LIMBS == 8)
-        in1infty |= (in1_x[4] | in1_x[5] | in1_x[6] | in1_x[7] |
-                     in1_y[4] | in1_y[5] | in1_y[6] | in1_y[7]);
+        in1infty |= (in1_z[4] | in1_z[5] | in1_z[6] | in1_z[7]);
 
+    /*
+     * In affine representation we encode infinity as (0,0), which is
+     * not on the curve, so it is OK
+     */
     in2infty = (in2_x[0] | in2_x[1] | in2_x[2] | in2_x[3] |
                 in2_y[0] | in2_y[1] | in2_y[2] | in2_y[3]);
     if (P256_LIMBS == 8)
@@ -1273,6 +1271,8 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP *group,
             } else
 #endif
             {
+                BN_ULONG infty;
+
                 /* First window */
                 wvalue = (p_str[0] << 1) & mask;
                 idx += window_size;
@@ -1285,7 +1285,30 @@ __owur static int ecp_nistz256_points_mul(const EC_GROUP *group,
                 ecp_nistz256_neg(p.p.Z, p.p.Y);
                 copy_conditional(p.p.Y, p.p.Z, wvalue & 1);
 
-                memcpy(p.p.Z, ONE, sizeof(ONE));
+                /*
+                 * Since affine infinity is encoded as (0,0) and
+                 * Jacobian ias (,,0), we need to harmonize them
+                 * by assigning "one" or zero to Z.
+                 */
+                infty = (p.p.X[0] | p.p.X[1] | p.p.X[2] | p.p.X[3] |
+                         p.p.Y[0] | p.p.Y[1] | p.p.Y[2] | p.p.Y[3]);
+                if (P256_LIMBS == 8)
+                    infty |= (p.p.X[4] | p.p.X[5] | p.p.X[6] | p.p.X[7] |
+                              p.p.Y[4] | p.p.Y[5] | p.p.Y[6] | p.p.Y[7]);
+
+                infty = 0 - is_zero(infty);
+                infty = ~infty;
+
+                p.p.Z[0] = ONE[0] & infty;
+                p.p.Z[1] = ONE[1] & infty;
+                p.p.Z[2] = ONE[2] & infty;
+                p.p.Z[3] = ONE[3] & infty;
+                if (P256_LIMBS == 8) {
+                    p.p.Z[4] = ONE[4] & infty;
+                    p.p.Z[5] = ONE[5] & infty;
+                    p.p.Z[6] = ONE[6] & infty;
+                    p.p.Z[7] = ONE[7] & infty;
+                }
 
                 for (i = 1; i < 37; i++) {
                     unsigned int off = (idx - 1) / 8;


More information about the openssl-commits mailing list