[openssl] master update

bernd.edlinger at hotmail.de bernd.edlinger at hotmail.de
Fri Dec 6 12:35:02 UTC 2019


The branch master has been updated
       via  4c3f748d7cfffb3309451c6bfdd686f89ec290b2 (commit)
       via  18d42d8d56352b81510d87dd12d1ac93d1d408d3 (commit)
       via  8736f9538121443cdb2e21951a85e465b8f7f790 (commit)
       via  8c6f86c7c5350fadf22d32d6cd4712e2ad4447ba (commit)
      from  350c92351705aa5916ffdf07fd7b81c1cbcb178b (commit)


- Log -----------------------------------------------------------------
commit 4c3f748d7cfffb3309451c6bfdd686f89ec290b2
Author: Bernd Edlinger <bernd.edlinger at hotmail.de>
Date:   Thu Dec 5 01:20:14 2019 +0100

    Add a CHANGES entry for CVE-2019-1551
    
    Reviewed-by: Paul Dale <paul.dale at oracle.com>
    (Merged from https://github.com/openssl/openssl/pull/10574)

commit 18d42d8d56352b81510d87dd12d1ac93d1d408d3
Author: Bernd Edlinger <bernd.edlinger at hotmail.de>
Date:   Wed Dec 4 22:38:19 2019 +0100

    Add a test case for rsaz_512_sqr overflow handling
    
    [extended tests]
    
    Reviewed-by: Paul Dale <paul.dale at oracle.com>
    (Merged from https://github.com/openssl/openssl/pull/10574)

commit 8736f9538121443cdb2e21951a85e465b8f7f790
Author: Bernd Edlinger <bernd.edlinger at hotmail.de>
Date:   Wed Dec 4 12:57:41 2019 +0100

    Improve the overflow handling in rsaz_512_sqr
    
    We have always a carry in %rcx or %rbx in range 0..2
    from the previous stage, that is added to the result
    of the 64-bit square, but the low nibble of any square
    can only be 0, 1, 4, 9.
    
    Therefore one "adcq $0, %rdx" can be removed.
    Likewise in the ADX code we can remove one
    "adcx %rbp, $out" since %rbp is always 0, and carry is
    also zero, therefore that is a no-op.
    
    Reviewed-by: Paul Dale <paul.dale at oracle.com>
    (Merged from https://github.com/openssl/openssl/pull/10574)

commit 8c6f86c7c5350fadf22d32d6cd4712e2ad4447ba
Author: Andy Polyakov <appro at openssl.org>
Date:   Wed Dec 4 12:48:21 2019 +0100

    Fix an overflow bug in rsaz_512_sqr
    
    There is an overflow bug in the x64_64 Montgomery squaring procedure used in
    exponentiation with 512-bit moduli. No EC algorithms are affected. Analysis
    suggests that attacks against 2-prime RSA1024, 3-prime RSA1536, and DSA1024 as a
    result of this defect would be very difficult to perform and are not believed
    likely. Attacks against DH512 are considered just feasible. However, for an
    attack the target would have to re-use the DH512 private key, which is not
    recommended anyway. Also applications directly using the low level API
    BN_mod_exp may be affected if they use BN_FLG_CONSTTIME.
    
    CVE-2019-1551
    
    Reviewed-by: Paul Dale <paul.dale at oracle.com>
    Reviewed-by: Bernd Edlinger <bernd.edlinger at hotmail.de>
    (Merged from https://github.com/openssl/openssl/pull/10574)

-----------------------------------------------------------------------

Summary of changes:
 CHANGES                      |  12 ++
 crypto/bn/asm/rsaz-x86_64.pl | 383 ++++++++++++++++++++++---------------------
 test/bntest.c                | 284 ++++++++++++++++++++++++++++++++
 3 files changed, 494 insertions(+), 185 deletions(-)

diff --git a/CHANGES b/CHANGES
index 208780e9c9..e0b15b35f6 100644
--- a/CHANGES
+++ b/CHANGES
@@ -9,6 +9,18 @@
 
  Changes between 1.1.1 and 3.0.0 [xx XXX xxxx]
 
+  *) Fixed an an overflow bug in the x64_64 Montgomery squaring procedure
+     used in exponentiation with 512-bit moduli. No EC algorithms are
+     affected. Analysis suggests that attacks against 2-prime RSA1024,
+     3-prime RSA1536, and DSA1024 as a result of this defect would be very
+     difficult to perform and are not believed likely. Attacks against DH512
+     are considered just feasible. However, for an attack the target would
+     have to re-use the DH512 private key, which is not recommended anyway.
+     Also applications directly using the low level API BN_mod_exp may be
+     affected if they use BN_FLG_CONSTTIME.
+     (CVE-2019-1551)
+     [Andy Polyakov]
+
   *) Introduced a new method type and API, OSSL_SERIALIZER, to
      represent generic serializers.  An implementation is expected to
      be able to serialize an object associated with a given name (such
diff --git a/crypto/bn/asm/rsaz-x86_64.pl b/crypto/bn/asm/rsaz-x86_64.pl
index c41d8e521e..d1aa84b86e 100755
--- a/crypto/bn/asm/rsaz-x86_64.pl
+++ b/crypto/bn/asm/rsaz-x86_64.pl
@@ -118,7 +118,7 @@ rsaz_512_sqr:				# 25-29% faster than rsaz_512_mul
 	subq	\$128+24, %rsp
 .cfi_adjust_cfa_offset	128+24
 .Lsqr_body:
-	movq	$mod, %rbp		# common argument
+	movq	$mod, %xmm1		# common off-load
 	movq	($inp), %rdx
 	movq	8($inp), %rax
 	movq	$n0, 128(%rsp)
@@ -136,7 +136,8 @@ $code.=<<___;
 .Loop_sqr:
 	movl	$times,128+8(%rsp)
 #first iteration
-	movq	%rdx, %rbx
+	movq	%rdx, %rbx		# 0($inp)
+	mov	%rax, %rbp		# 8($inp)
 	mulq	%rdx
 	movq	%rax, %r8
 	movq	16($inp), %rax
@@ -175,31 +176,29 @@ $code.=<<___;
 	mulq	%rbx
 	addq	%rax, %r14
 	movq	%rbx, %rax
-	movq	%rdx, %r15
-	adcq	\$0, %r15
+	adcq	\$0, %rdx
 
-	addq	%r8, %r8		#shlq	\$1, %r8
-	movq	%r9, %rcx
-	adcq	%r9, %r9		#shld	\$1, %r8, %r9
+	xorq	%rcx,%rcx		# rcx:r8 = r8 << 1
+	addq	%r8, %r8
+	 movq	%rdx, %r15
+	adcq	\$0, %rcx
 
 	mulq	%rax
-	movq	%rax, (%rsp)
-	addq	%rdx, %r8
-	adcq	\$0, %r9
+	addq	%r8, %rdx
+	adcq	\$0, %rcx
 
-	movq	%r8, 8(%rsp)
-	shrq	\$63, %rcx
+	movq	%rax, (%rsp)
+	movq	%rdx, 8(%rsp)
 
 #second iteration
-	movq	8($inp), %r8
 	movq	16($inp), %rax
-	mulq	%r8
+	mulq	%rbp
 	addq	%rax, %r10
 	movq	24($inp), %rax
 	movq	%rdx, %rbx
 	adcq	\$0, %rbx
 
-	mulq	%r8
+	mulq	%rbp
 	addq	%rax, %r11
 	movq	32($inp), %rax
 	adcq	\$0, %rdx
@@ -207,7 +206,7 @@ $code.=<<___;
 	movq	%rdx, %rbx
 	adcq	\$0, %rbx
 
-	mulq	%r8
+	mulq	%rbp
 	addq	%rax, %r12
 	movq	40($inp), %rax
 	adcq	\$0, %rdx
@@ -215,7 +214,7 @@ $code.=<<___;
 	movq	%rdx, %rbx
 	adcq	\$0, %rbx
 
-	mulq	%r8
+	mulq	%rbp
 	addq	%rax, %r13
 	movq	48($inp), %rax
 	adcq	\$0, %rdx
@@ -223,7 +222,7 @@ $code.=<<___;
 	movq	%rdx, %rbx
 	adcq	\$0, %rbx
 
-	mulq	%r8
+	mulq	%rbp
 	addq	%rax, %r14
 	movq	56($inp), %rax
 	adcq	\$0, %rdx
@@ -231,39 +230,39 @@ $code.=<<___;
 	movq	%rdx, %rbx
 	adcq	\$0, %rbx
 
-	mulq	%r8
+	mulq	%rbp
 	addq	%rax, %r15
-	movq	%r8, %rax
+	movq	%rbp, %rax
 	adcq	\$0, %rdx
 	addq	%rbx, %r15
-	movq	%rdx, %r8
-	movq	%r10, %rdx
-	adcq	\$0, %r8
+	adcq	\$0, %rdx
 
-	add	%rdx, %rdx
-	lea	(%rcx,%r10,2), %r10	#shld	\$1, %rcx, %r10
-	movq	%r11, %rbx
-	adcq	%r11, %r11		#shld	\$1, %r10, %r11
+	xorq	%rbx, %rbx		# rbx:r10:r9 = r10:r9 << 1
+	addq	%r9, %r9
+	 movq	%rdx, %r8
+	adcq	%r10, %r10
+	adcq	\$0, %rbx
 
 	mulq	%rax
+	# rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	addq	%rcx, %rax
+	 movq	16($inp), %rbp
 	addq	%rax, %r9
+	 movq	24($inp), %rax
 	adcq	%rdx, %r10
-	adcq	\$0, %r11
+	adcq	\$0, %rbx
 
 	movq	%r9, 16(%rsp)
 	movq	%r10, 24(%rsp)
-	shrq	\$63, %rbx
 
 #third iteration
-	movq	16($inp), %r9
-	movq	24($inp), %rax
-	mulq	%r9
+	mulq	%rbp
 	addq	%rax, %r12
 	movq	32($inp), %rax
 	movq	%rdx, %rcx
 	adcq	\$0, %rcx
 
-	mulq	%r9
+	mulq	%rbp
 	addq	%rax, %r13
 	movq	40($inp), %rax
 	adcq	\$0, %rdx
@@ -271,7 +270,7 @@ $code.=<<___;
 	movq	%rdx, %rcx
 	adcq	\$0, %rcx
 
-	mulq	%r9
+	mulq	%rbp
 	addq	%rax, %r14
 	movq	48($inp), %rax
 	adcq	\$0, %rdx
@@ -279,9 +278,7 @@ $code.=<<___;
 	movq	%rdx, %rcx
 	adcq	\$0, %rcx
 
-	mulq	%r9
-	 movq	%r12, %r10
-	 lea	(%rbx,%r12,2), %r12	#shld	\$1, %rbx, %r12
+	mulq	%rbp
 	addq	%rax, %r15
 	movq	56($inp), %rax
 	adcq	\$0, %rdx
@@ -289,36 +286,40 @@ $code.=<<___;
 	movq	%rdx, %rcx
 	adcq	\$0, %rcx
 
-	mulq	%r9
-	 shrq	\$63, %r10
+	mulq	%rbp
 	addq	%rax, %r8
-	movq	%r9, %rax
+	movq	%rbp, %rax
 	adcq	\$0, %rdx
 	addq	%rcx, %r8
-	movq	%rdx, %r9
-	adcq	\$0, %r9
+	adcq	\$0, %rdx
 
-	movq	%r13, %rcx
-	leaq	(%r10,%r13,2), %r13	#shld	\$1, %r12, %r13
+	xorq	%rcx, %rcx		# rcx:r12:r11 = r12:r11 << 1
+	addq	%r11, %r11
+	 movq	%rdx, %r9
+	adcq	%r12, %r12
+	adcq	\$0, %rcx
 
 	mulq	%rax
+	# rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	addq	%rbx, %rax
+	 movq	24($inp), %r10
 	addq	%rax, %r11
+	 movq	32($inp), %rax
 	adcq	%rdx, %r12
-	adcq	\$0, %r13
+	adcq	\$0, %rcx
 
 	movq	%r11, 32(%rsp)
 	movq	%r12, 40(%rsp)
-	shrq	\$63, %rcx
 
 #fourth iteration
-	movq	24($inp), %r10
-	movq	32($inp), %rax
+	mov	%rax, %r11		# 32($inp)
 	mulq	%r10
 	addq	%rax, %r14
 	movq	40($inp), %rax
 	movq	%rdx, %rbx
 	adcq	\$0, %rbx
 
+	mov	%rax, %r12		# 40($inp)
 	mulq	%r10
 	addq	%rax, %r15
 	movq	48($inp), %rax
@@ -327,9 +328,8 @@ $code.=<<___;
 	movq	%rdx, %rbx
 	adcq	\$0, %rbx
 
+	mov	%rax, %rbp		# 48($inp)
 	mulq	%r10
-	 movq	%r14, %r12
-	 leaq	(%rcx,%r14,2), %r14	#shld	\$1, %rcx, %r14
 	addq	%rax, %r8
 	movq	56($inp), %rax
 	adcq	\$0, %rdx
@@ -338,32 +338,33 @@ $code.=<<___;
 	adcq	\$0, %rbx
 
 	mulq	%r10
-	 shrq	\$63, %r12
 	addq	%rax, %r9
 	movq	%r10, %rax
 	adcq	\$0, %rdx
 	addq	%rbx, %r9
-	movq	%rdx, %r10
-	adcq	\$0, %r10
+	adcq	\$0, %rdx
 
-	movq	%r15, %rbx
-	leaq	(%r12,%r15,2),%r15	#shld	\$1, %r14, %r15
+	xorq	%rbx, %rbx		# rbx:r13:r14 = r13:r14 << 1
+	addq	%r13, %r13
+	 movq	%rdx, %r10
+	adcq	%r14, %r14
+	adcq	\$0, %rbx
 
 	mulq	%rax
+	# rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	addq	%rcx, %rax
 	addq	%rax, %r13
+	 movq	%r12, %rax		# 40($inp)
 	adcq	%rdx, %r14
-	adcq	\$0, %r15
+	adcq	\$0, %rbx
 
 	movq	%r13, 48(%rsp)
 	movq	%r14, 56(%rsp)
-	shrq	\$63, %rbx
 
 #fifth iteration
-	movq	32($inp), %r11
-	movq	40($inp), %rax
 	mulq	%r11
 	addq	%rax, %r8
-	movq	48($inp), %rax
+	movq	%rbp, %rax		# 48($inp)
 	movq	%rdx, %rcx
 	adcq	\$0, %rcx
 
@@ -371,97 +372,99 @@ $code.=<<___;
 	addq	%rax, %r9
 	movq	56($inp), %rax
 	adcq	\$0, %rdx
-	 movq	%r8, %r12
-	 leaq	(%rbx,%r8,2), %r8	#shld	\$1, %rbx, %r8
 	addq	%rcx, %r9
 	movq	%rdx, %rcx
 	adcq	\$0, %rcx
 
+	mov	%rax, %r14		# 56($inp)
 	mulq	%r11
-	 shrq	\$63, %r12
 	addq	%rax, %r10
 	movq	%r11, %rax
 	adcq	\$0, %rdx
 	addq	%rcx, %r10
-	movq	%rdx, %r11
-	adcq	\$0, %r11
+	adcq	\$0, %rdx
 
-	movq	%r9, %rcx
-	leaq	(%r12,%r9,2), %r9	#shld	\$1, %r8, %r9
+	xorq	%rcx, %rcx		# rcx:r8:r15 = r8:r15 << 1
+	addq	%r15, %r15
+	 movq	%rdx, %r11
+	adcq	%r8, %r8
+	adcq	\$0, %rcx
 
 	mulq	%rax
+	# rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	addq	%rbx, %rax
 	addq	%rax, %r15
+	 movq	%rbp, %rax		# 48($inp)
 	adcq	%rdx, %r8
-	adcq	\$0, %r9
+	adcq	\$0, %rcx
 
 	movq	%r15, 64(%rsp)
 	movq	%r8, 72(%rsp)
-	shrq	\$63, %rcx
 
 #sixth iteration
-	movq	40($inp), %r12
-	movq	48($inp), %rax
 	mulq	%r12
 	addq	%rax, %r10
-	movq	56($inp), %rax
+	movq	%r14, %rax		# 56($inp)
 	movq	%rdx, %rbx
 	adcq	\$0, %rbx
 
 	mulq	%r12
 	addq	%rax, %r11
 	movq	%r12, %rax
-	 movq	%r10, %r15
-	 leaq	(%rcx,%r10,2), %r10	#shld	\$1, %rcx, %r10
 	adcq	\$0, %rdx
-	 shrq	\$63, %r15
 	addq	%rbx, %r11
-	movq	%rdx, %r12
-	adcq	\$0, %r12
+	adcq	\$0, %rdx
 
-	movq	%r11, %rbx
-	leaq	(%r15,%r11,2), %r11	#shld	\$1, %r10, %r11
+	xorq	%rbx, %rbx		# rbx:r10:r9 = r10:r9 << 1
+	addq	%r9, %r9
+	 movq	%rdx, %r12
+	adcq	%r10, %r10
+	adcq	\$0, %rbx
 
 	mulq	%rax
+	# rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	addq	%rcx, %rax
 	addq	%rax, %r9
+	 movq	%r14, %rax		# 56($inp)
 	adcq	%rdx, %r10
-	adcq	\$0, %r11
+	adcq	\$0, %rbx
 
 	movq	%r9, 80(%rsp)
 	movq	%r10, 88(%rsp)
 
 #seventh iteration
-	movq	48($inp), %r13
-	movq	56($inp), %rax
-	mulq	%r13
+	mulq	%rbp
 	addq	%rax, %r12
-	movq	%r13, %rax
-	movq	%rdx, %r13
-	adcq	\$0, %r13
+	movq	%rbp, %rax
+	adcq	\$0, %rdx
 
-	xorq	%r14, %r14
-	shlq	\$1, %rbx
-	adcq	%r12, %r12		#shld	\$1, %rbx, %r12
-	adcq	%r13, %r13		#shld	\$1, %r12, %r13
-	adcq	%r14, %r14		#shld	\$1, %r13, %r14
+	xorq	%rcx, %rcx		# rcx:r12:r11 = r12:r11 << 1
+	addq	%r11, %r11
+	 movq	%rdx, %r13
+	adcq	%r12, %r12
+	adcq	\$0, %rcx
 
 	mulq	%rax
+	# rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	addq	%rbx, %rax
 	addq	%rax, %r11
+	 movq	%r14, %rax		# 56($inp)
 	adcq	%rdx, %r12
-	adcq	\$0, %r13
+	adcq	\$0, %rcx
 
 	movq	%r11, 96(%rsp)
 	movq	%r12, 104(%rsp)
 
 #eighth iteration
-	movq	56($inp), %rax
-	mulq	%rax
-	addq	%rax, %r13
-	adcq	\$0, %rdx
-
-	addq	%rdx, %r14
+	xorq	%rbx, %rbx		# rbx:r13 = r13 << 1
+	addq	%r13, %r13
+	adcq	\$0, %rbx
 
-	movq	%r13, 112(%rsp)
-	movq	%r14, 120(%rsp)
+	mulq	%rax
+	# rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	addq	%rcx, %rax
+	addq	%r13, %rax
+	adcq	%rbx, %rdx
 
 	movq	(%rsp), %r8
 	movq	8(%rsp), %r9
@@ -471,6 +474,10 @@ $code.=<<___;
 	movq	40(%rsp), %r13
 	movq	48(%rsp), %r14
 	movq	56(%rsp), %r15
+	movq	%xmm1, %rbp
+
+	movq	%rax, 112(%rsp)
+	movq	%rdx, 120(%rsp)
 
 	call	__rsaz_512_reduce
 
@@ -502,9 +509,9 @@ $code.=<<___;
 .Loop_sqrx:
 	movl	$times,128+8(%rsp)
 	movq	$out, %xmm0		# off-load
-	movq	%rbp, %xmm1		# off-load
 #first iteration
 	mulx	%rax, %r8, %r9
+	mov	%rax, %rbx
 
 	mulx	16($inp), %rcx, %r10
 	xor	%rbp, %rbp		# cf=0, of=0
@@ -512,40 +519,39 @@ $code.=<<___;
 	mulx	24($inp), %rax, %r11
 	adcx	%rcx, %r9
 
-	mulx	32($inp), %rcx, %r12
+	.byte	0xc4,0x62,0xf3,0xf6,0xa6,0x20,0x00,0x00,0x00	# mulx	32($inp), %rcx, %r12
 	adcx	%rax, %r10
 
-	mulx	40($inp), %rax, %r13
+	.byte	0xc4,0x62,0xfb,0xf6,0xae,0x28,0x00,0x00,0x00	# mulx	40($inp), %rax, %r13
 	adcx	%rcx, %r11
 
-	.byte	0xc4,0x62,0xf3,0xf6,0xb6,0x30,0x00,0x00,0x00	# mulx	48($inp), %rcx, %r14
+	mulx	48($inp), %rcx, %r14
 	adcx	%rax, %r12
 	adcx	%rcx, %r13
 
-	.byte	0xc4,0x62,0xfb,0xf6,0xbe,0x38,0x00,0x00,0x00	# mulx	56($inp), %rax, %r15
+	mulx	56($inp), %rax, %r15
 	adcx	%rax, %r14
 	adcx	%rbp, %r15		# %rbp is 0
 
-	mov	%r9, %rcx
-	shld	\$1, %r8, %r9
-	shl	\$1, %r8
-
-	xor	%ebp, %ebp
-	mulx	%rdx, %rax, %rdx
-	adcx	%rdx, %r8
-	 mov	8($inp), %rdx
-	adcx	%rbp, %r9
+	mulx	%rdx, %rax, $out
+	 mov	%rbx, %rdx		# 8($inp)
+	xor	%rcx, %rcx
+	adox	%r8, %r8
+	adcx	$out, %r8
+	adox	%rbp, %rcx
+	adcx	%rbp, %rcx
 
 	mov	%rax, (%rsp)
 	mov	%r8, 8(%rsp)
 
 #second iteration
-	mulx	16($inp), %rax, %rbx
+	.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x10,0x00,0x00,0x00	# mulx	16($inp), %rax, %rbx
 	adox	%rax, %r10
 	adcx	%rbx, %r11
 
-	.byte	0xc4,0x62,0xc3,0xf6,0x86,0x18,0x00,0x00,0x00	# mulx	24($inp), $out, %r8
+	mulx	24($inp), $out, %r8
 	adox	$out, %r11
+	.byte	0x66
 	adcx	%r8, %r12
 
 	mulx	32($inp), %rax, %rbx
@@ -563,24 +569,25 @@ $code.=<<___;
 	.byte	0xc4,0x62,0xc3,0xf6,0x86,0x38,0x00,0x00,0x00	# mulx	56($inp), $out, %r8
 	adox	$out, %r15
 	adcx	%rbp, %r8
+	 mulx	%rdx, %rax, $out
 	adox	%rbp, %r8
+	 .byte	0x48,0x8b,0x96,0x10,0x00,0x00,0x00		# mov	16($inp), %rdx
 
-	mov	%r11, %rbx
-	shld	\$1, %r10, %r11
-	shld	\$1, %rcx, %r10
-
-	xor	%ebp,%ebp
-	mulx	%rdx, %rax, %rcx
-	 mov	16($inp), %rdx
+	xor	%rbx, %rbx
+	 adox	%r9, %r9
+	# rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	adcx	%rcx, %rax
+	adox	%r10, %r10
 	adcx	%rax, %r9
-	adcx	%rcx, %r10
-	adcx	%rbp, %r11
+	adox	%rbp, %rbx
+	adcx	$out, %r10
+	adcx	%rbp, %rbx
 
 	mov	%r9, 16(%rsp)
 	.byte	0x4c,0x89,0x94,0x24,0x18,0x00,0x00,0x00		# mov	%r10, 24(%rsp)
 
 #third iteration
-	.byte	0xc4,0x62,0xc3,0xf6,0x8e,0x18,0x00,0x00,0x00	# mulx	24($inp), $out, %r9
+	mulx	24($inp), $out, %r9
 	adox	$out, %r12
 	adcx	%r9, %r13
 
@@ -588,7 +595,7 @@ $code.=<<___;
 	adox	%rax, %r13
 	adcx	%rcx, %r14
 
-	mulx	40($inp), $out, %r9
+	.byte	0xc4,0x62,0xc3,0xf6,0x8e,0x28,0x00,0x00,0x00	# mulx	40($inp), $out, %r9
 	adox	$out, %r14
 	adcx	%r9, %r15
 
@@ -596,27 +603,28 @@ $code.=<<___;
 	adox	%rax, %r15
 	adcx	%rcx, %r8
 
-	.byte	0xc4,0x62,0xc3,0xf6,0x8e,0x38,0x00,0x00,0x00	# mulx	56($inp), $out, %r9
+	mulx	56($inp), $out, %r9
 	adox	$out, %r8
 	adcx	%rbp, %r9
+	 mulx	%rdx, %rax, $out
 	adox	%rbp, %r9
+	 mov	24($inp), %rdx
 
-	mov	%r13, %rcx
-	shld	\$1, %r12, %r13
-	shld	\$1, %rbx, %r12
-
-	xor	%ebp, %ebp
-	mulx	%rdx, %rax, %rdx
+	xor	%rcx, %rcx
+	 adox	%r11, %r11
+	# rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	adcx	%rbx, %rax
+	adox	%r12, %r12
 	adcx	%rax, %r11
-	adcx	%rdx, %r12
-	 mov	24($inp), %rdx
-	adcx	%rbp, %r13
+	adox	%rbp, %rcx
+	adcx	$out, %r12
+	adcx	%rbp, %rcx
 
 	mov	%r11, 32(%rsp)
-	.byte	0x4c,0x89,0xa4,0x24,0x28,0x00,0x00,0x00		# mov	%r12, 40(%rsp)
+	mov	%r12, 40(%rsp)
 
 #fourth iteration
-	.byte	0xc4,0xe2,0xfb,0xf6,0x9e,0x20,0x00,0x00,0x00	# mulx	32($inp), %rax, %rbx
+	mulx	32($inp), %rax, %rbx
 	adox	%rax, %r14
 	adcx	%rbx, %r15
 
@@ -631,25 +639,25 @@ $code.=<<___;
 	mulx	56($inp), $out, %r10
 	adox	$out, %r9
 	adcx	%rbp, %r10
+	 mulx	%rdx, %rax, $out
 	adox	%rbp, %r10
+	 mov	32($inp), %rdx
 
-	.byte	0x66
-	mov	%r15, %rbx
-	shld	\$1, %r14, %r15
-	shld	\$1, %rcx, %r14
-
-	xor	%ebp, %ebp
-	mulx	%rdx, %rax, %rdx
+	xor	%rbx, %rbx
+	 adox	%r13, %r13
+	# rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	adcx	%rcx, %rax
+	adox	%r14, %r14
 	adcx	%rax, %r13
-	adcx	%rdx, %r14
-	 mov	32($inp), %rdx
-	adcx	%rbp, %r15
+	adox	%rbp, %rbx
+	adcx	$out, %r14
+	adcx	%rbp, %rbx
 
 	mov	%r13, 48(%rsp)
 	mov	%r14, 56(%rsp)
 
 #fifth iteration
-	.byte	0xc4,0x62,0xc3,0xf6,0x9e,0x28,0x00,0x00,0x00	# mulx	40($inp), $out, %r11
+	mulx	40($inp), $out, %r11
 	adox	$out, %r8
 	adcx	%r11, %r9
 
@@ -660,18 +668,19 @@ $code.=<<___;
 	mulx	56($inp), $out, %r11
 	adox	$out, %r10
 	adcx	%rbp, %r11
+	 mulx	%rdx, %rax, $out
+	 mov	40($inp), %rdx
 	adox	%rbp, %r11
 
-	mov	%r9, %rcx
-	shld	\$1, %r8, %r9
-	shld	\$1, %rbx, %r8
-
-	xor	%ebp, %ebp
-	mulx	%rdx, %rax, %rdx
+	xor	%rcx, %rcx
+	 adox	%r15, %r15
+	# rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	adcx	%rbx, %rax
+	adox	%r8, %r8
 	adcx	%rax, %r15
-	adcx	%rdx, %r8
-	 mov	40($inp), %rdx
-	adcx	%rbp, %r9
+	adox	%rbp, %rcx
+	adcx	$out, %r8
+	adcx	%rbp, %rcx
 
 	mov	%r15, 64(%rsp)
 	mov	%r8, 72(%rsp)
@@ -684,18 +693,19 @@ $code.=<<___;
 	.byte	0xc4,0x62,0xc3,0xf6,0xa6,0x38,0x00,0x00,0x00	# mulx	56($inp), $out, %r12
 	adox	$out, %r11
 	adcx	%rbp, %r12
+	 mulx	%rdx, %rax, $out
 	adox	%rbp, %r12
+	 mov	48($inp), %rdx
 
-	mov	%r11, %rbx
-	shld	\$1, %r10, %r11
-	shld	\$1, %rcx, %r10
-
-	xor	%ebp, %ebp
-	mulx	%rdx, %rax, %rdx
+	xor	%rbx, %rbx
+	 adox	%r9, %r9
+	# rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	adcx	%rcx, %rax
+	adox	%r10, %r10
 	adcx	%rax, %r9
-	adcx	%rdx, %r10
-	 mov	48($inp), %rdx
-	adcx	%rbp, %r11
+	adcx	$out, %r10
+	adox	%rbp, %rbx
+	adcx	%rbp, %rbx
 
 	mov	%r9, 80(%rsp)
 	mov	%r10, 88(%rsp)
@@ -705,31 +715,31 @@ $code.=<<___;
 	adox	%rax, %r12
 	adox	%rbp, %r13
 
-	xor	%r14, %r14
-	shld	\$1, %r13, %r14
-	shld	\$1, %r12, %r13
-	shld	\$1, %rbx, %r12
-
-	xor	%ebp, %ebp
-	mulx	%rdx, %rax, %rdx
-	adcx	%rax, %r11
-	adcx	%rdx, %r12
+	mulx	%rdx, %rax, $out
+	xor	%rcx, %rcx
 	 mov	56($inp), %rdx
-	adcx	%rbp, %r13
+	 adox	%r11, %r11
+	# rbx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	adcx	%rbx, %rax
+	adox	%r12, %r12
+	adcx	%rax, %r11
+	adox	%rbp, %rcx
+	adcx	$out, %r12
+	adcx	%rbp, %rcx
 
 	.byte	0x4c,0x89,0x9c,0x24,0x60,0x00,0x00,0x00		# mov	%r11, 96(%rsp)
 	.byte	0x4c,0x89,0xa4,0x24,0x68,0x00,0x00,0x00		# mov	%r12, 104(%rsp)
 
 #eighth iteration
 	mulx	%rdx, %rax, %rdx
-	adox	%rax, %r13
-	adox	%rbp, %rdx
+	xor	%rbx, %rbx
+	 adox	%r13, %r13
+	# rcx <= 2 and rax <= 0xFFFF..F9, so carry must be zero here
+	adcx	%rcx, %rax
+	adox	%rbp, %rbx
+	adcx	%r13, %rax
+	adcx	%rdx, %rbx
 
-	.byte	0x66
-	add	%rdx, %r14
-
-	movq	%r13, 112(%rsp)
-	movq	%r14, 120(%rsp)
 	movq	%xmm0, $out
 	movq	%xmm1, %rbp
 
@@ -743,6 +753,9 @@ $code.=<<___;
 	movq	48(%rsp), %r14
 	movq	56(%rsp), %r15
 
+	movq	%rax, 112(%rsp)
+	movq	%rbx, 120(%rsp)
+
 	call	__rsaz_512_reducex
 
 	addq	64(%rsp), %r8
diff --git a/test/bntest.c b/test/bntest.c
index f0b98a29ba..6cd924543d 100644
--- a/test/bntest.c
+++ b/test/bntest.c
@@ -2484,6 +2484,288 @@ static int test_gcd_prime(void)
     return st;
 }
 
+typedef struct mod_exp_test_st
+{
+  const char *base;
+  const char *exp;
+  const char *mod;
+  const char *res;
+} MOD_EXP_TEST;
+
+static const MOD_EXP_TEST ModExpTests[] = {
+   /* original test vectors for rsaz_512_sqr bug, by OSS-Fuzz */
+   {
+       "1166180238001879113042182292626169621106255558914000595999312084"
+       "4627946820899490684928760491249738643524880720584249698100907201"
+       "002086675047927600340800371",
+       "8000000000000000000000000000000000000000000000000000000000000000"
+       "0000000000000000000000000000000000000000000000000000000000000000"
+       "00000000",
+       "1340780792684523720980737645613191762604395855615117867483316354"
+       "3294276330515137663421134775482798690129946803802212663956180562"
+       "088664022929883876655300863",
+       "8243904058268085430037326628480645845409758077568738532059032482"
+       "8294114415890603594730158120426756266457928475330450251339773498"
+       "26758407619521544102068438"
+   },
+   {
+       "4974270041410803822078866696159586946995877618987010219312844726"
+       "0284386121835740784990869050050504348861513337232530490826340663"
+       "197278031692737429054",
+       "4974270041410803822078866696159586946995877428188754995041148539"
+       "1663243362592271353668158565195557417149981094324650322556843202"
+       "946445882670777892608",
+       "1340780716511420227215592830971452482815377482627251725537099028"
+       "4429769497230131760206012644403029349547320953206103351725462999"
+       "947509743623340557059752191",
+       "5296244594780707015616522701706118082963369547253192207884519362"
+       "1767869984947542695665420219028522815539559194793619684334900442"
+       "49304558011362360473525933"
+   },
+   /* test vectors for rsaz_512_srq bug, with rcx/rbx=1 */
+   {   /* between first and second iteration */
+       "5148719036160389201525610950887605325980251964889646556085286545"
+       "3931548809178823413169359635978762036512397113080988070677858033"
+       "36463909753993540214027190",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between second and third iteration */
+       "8908340854353752577419678771330460827942371434853054158622636544"
+       "8151360109722890949471912566649465436296659601091730745087014189"
+       "2672764191218875181826063",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between third and fourth iteration */
+       "3427446396505596330634350984901719674479522569002785244080234738"
+       "4288743635435746136297299366444548736533053717416735379073185344"
+       "26985272974404612945608761",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between fourth and fifth iteration */
+       "3472743044917564564078857826111874560045331237315597383869652985"
+       "6919870028890895988478351133601517365908445058405433832718206902"
+       "4088133164805266956353542",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between fifth and sixth iteration */
+       "3608632990153469264412378349742339216742409743898601587274768025"
+       "0110772032985643555192767717344946174122842255204082586753499651"
+       "14483434992887431333675068",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between sixth and seventh iteration */
+       "8455374370234070242910508226941981520235709767260723212165264877"
+       "8689064388017521524568434328264431772644802567028663962962025746"
+       "9283458217850119569539086",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between seventh and eighth iteration */
+       "5155371529688532178421209781159131443543419764974688878527112131"
+       "7446518205609427412336183157918981038066636807317733319323257603"
+       "04416292040754017461076359",
+       "1005585594745694782468051874865438459560952436544429503329267108"
+       "2791323022555160232601405723625177570767523893639864538140315412"
+       "108959927459825236754563832",
+       "1005585594745694782468051874865438459560952436544429503329267108"
+       "2791323022555160232601405723625177570767523893639864538140315412"
+       "108959927459825236754563833",
+       "1"
+   },
+   /* test vectors for rsaz_512_srq bug, with rcx/rbx=2 */
+   {   /* between first and second iteration */
+       "3155666506033786929967309937640790361084670559125912405342594979"
+       "4345142818528956285490897841406338022378565972533508820577760065"
+       "58494345853302083699912572",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between second and third iteration */
+       "3789819583801342198190405714582958759005991915505282362397087750"
+       "4213544724644823098843135685133927198668818185338794377239590049"
+       "41019388529192775771488319",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between third and forth iteration */
+       "4695752552040706867080542538786056470322165281761525158189220280"
+       "4025547447667484759200742764246905647644662050122968912279199065"
+       "48065034299166336940507214",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between forth and fifth iteration */
+       "2159140240970485794188159431017382878636879856244045329971239574"
+       "8919691133560661162828034323196457386059819832804593989740268964"
+       "74502911811812651475927076",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between fifth and sixth iteration */
+       "5239312332984325668414624633307915097111691815000872662334695514"
+       "5436533521392362443557163429336808208137221322444780490437871903"
+       "99972784701334569424519255",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between sixth and seventh iteration */
+       "1977953647322612860406858017869125467496941904523063466791308891"
+       "1172796739058531929470539758361774569875505293428856181093904091"
+       "33788264851714311303725089",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042158",
+       "6703903964971298549787012499102923063739682910296196688861780721"
+       "8608820150367734884009371490834517138450159290932430254268769414"
+       "05973284973216824503042159",
+       "1"
+   },
+   {   /* between seventh and eighth iteration */
+       "6456987954117763835533395796948878140715006860263624787492985786"
+       "8514630216966738305923915688821526449499763719943997120302368211"
+       "04813318117996225041943964",
+       "1340780792994259709957402499820584612747936582059239337772356144"
+       "3721764030073546976801874298166903427690031858186486050853753882"
+       "811946551499689575296532556",
+       "1340780792994259709957402499820584612747936582059239337772356144"
+       "3721764030073546976801874298166903427690031858186486050853753882"
+       "811946551499689575296532557",
+       "1"
+   }
+};
+
+static int test_mod_exp(int i)
+{
+    const MOD_EXP_TEST *test = &ModExpTests[i];
+    int res = 0;
+    BIGNUM* result = NULL;
+    BIGNUM *base = NULL, *exponent = NULL, *modulo = NULL;
+    char *s = NULL;
+
+    if (!TEST_ptr(result = BN_new())
+            || !TEST_true(BN_dec2bn(&base, test->base))
+            || !TEST_true(BN_dec2bn(&exponent, test->exp))
+            || !TEST_true(BN_dec2bn(&modulo, test->mod)))
+        goto err;
+
+    if (!TEST_int_eq(BN_mod_exp(result, base, exponent, modulo, ctx), 1))
+        goto err;
+
+    if (!TEST_ptr(s = BN_bn2dec(result)))
+        goto err;
+
+    if (!TEST_mem_eq(s, strlen(s), test->res, strlen(test->res)))
+        goto err;
+
+    res = 1;
+
+ err:
+    OPENSSL_free(s);
+    BN_free(result);
+    BN_free(base);
+    BN_free(exponent);
+    BN_free(modulo);
+    return res;
+}
+
+static int test_mod_exp_consttime(int i)
+{
+    const MOD_EXP_TEST *test = &ModExpTests[i];
+    int res = 0;
+    BIGNUM* result = NULL;
+    BIGNUM *base = NULL, *exponent = NULL, *modulo = NULL;
+    char *s = NULL;
+
+    if (!TEST_ptr(result = BN_new())
+            || !TEST_true(BN_dec2bn(&base, test->base))
+            || !TEST_true(BN_dec2bn(&exponent, test->exp))
+            || !TEST_true(BN_dec2bn(&modulo, test->mod)))
+        goto err;
+
+    BN_set_flags(base, BN_FLG_CONSTTIME);
+    BN_set_flags(exponent, BN_FLG_CONSTTIME);
+    BN_set_flags(modulo, BN_FLG_CONSTTIME);
+
+    if (!TEST_int_eq(BN_mod_exp(result, base, exponent, modulo, ctx), 1))
+        goto err;
+
+    if (!TEST_ptr(s = BN_bn2dec(result)))
+        goto err;
+
+    if (!TEST_mem_eq(s, strlen(s), test->res, strlen(test->res)))
+        goto err;
+
+    res = 1;
+
+ err:
+    OPENSSL_free(s);
+    BN_free(result);
+    BN_free(base);
+    BN_free(exponent);
+    BN_free(modulo);
+    return res;
+}
+
 static int file_test_run(STANZA *s)
 {
     static const FILETEST filetests[] = {
@@ -2621,6 +2903,8 @@ int setup_tests(void)
         ADD_ALL_TESTS(test_is_prime, (int)OSSL_NELEM(primes));
         ADD_ALL_TESTS(test_not_prime, (int)OSSL_NELEM(not_primes));
         ADD_TEST(test_gcd_prime);
+        ADD_ALL_TESTS(test_mod_exp, (int)OSSL_NELEM(ModExpTests));
+        ADD_ALL_TESTS(test_mod_exp_consttime, (int)OSSL_NELEM(ModExpTests));
         if (stochastic)
             ADD_TEST(test_rand_range);
     } else {


More information about the openssl-commits mailing list