[openssl-commits] [openssl] master update
Andy Polyakov
appro at openssl.org
Sun Jul 15 17:13:45 UTC 2018
The branch master has been updated
via 3c849bc901fa191fc517bc20d905783e6e428de5 (commit)
via d3e3263072c91999afc256fa4666c40912dde410 (commit)
via dfd5fb09500d5800b37b3aec05884fc7409032d7 (commit)
via 2de607d8c952fef0cadf158b0a020037837911ac (commit)
from 5d1c09de1f2736e1d4b1877206d08455ec75f558 (commit)
- Log -----------------------------------------------------------------
commit 3c849bc901fa191fc517bc20d905783e6e428de5
Author: Andy Polyakov <appro at openssl.org>
Date: Thu Jul 12 11:53:16 2018 +0200
ec/curve25519.c: reorganize for better accessibility.
Move base 2^64 code to own #if section. It was nested in base 2^51 section,
which arguably might have been tricky to follow.
Reviewed-by: Rich Salz <rsalz at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6699)
commit d3e3263072c91999afc256fa4666c40912dde410
Author: Andy Polyakov <appro at openssl.org>
Date: Wed Jul 11 22:36:49 2018 +0200
ec/asm/x25519-x86_64.pl: add CFI directives and Windows SE handler.
Reviewed-by: Rich Salz <rsalz at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6699)
commit dfd5fb09500d5800b37b3aec05884fc7409032d7
Author: Andy Polyakov <appro at openssl.org>
Date: Wed Jul 11 22:22:52 2018 +0200
test/.../evppkey.txt: X25519 regression test vectors.
Reviewed-by: Rich Salz <rsalz at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6699)
commit 2de607d8c952fef0cadf158b0a020037837911ac
Author: Andy Polyakov <appro at openssl.org>
Date: Wed Jul 11 22:08:02 2018 +0200
ec/asm/x25519-x86_64.pl: fix base 2^64 add/sub and final reduction.
Base 2^64 addition/subtraction and final reduction failed to treat
partially reduced values correctly.
Thanks to Wycheproof Project for vectors and Paul Kehrer for report.
Reviewed-by: Rich Salz <rsalz at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6699)
-----------------------------------------------------------------------
Summary of changes:
crypto/ec/asm/x25519-x86_64.pl | 318 +++++++++++++++++++++++++++++-
crypto/ec/curve25519.c | 293 +++++++++++++--------------
test/recipes/30-test_evp_data/evppkey.txt | 38 ++++
3 files changed, 501 insertions(+), 148 deletions(-)
diff --git a/crypto/ec/asm/x25519-x86_64.pl b/crypto/ec/asm/x25519-x86_64.pl
index 930d7bd..da81e06 100755
--- a/crypto/ec/asm/x25519-x86_64.pl
+++ b/crypto/ec/asm/x25519-x86_64.pl
@@ -102,13 +102,22 @@ $code.=<<___;
.type x25519_fe51_mul,\@function,3
.align 32
x25519_fe51_mul:
+.cfi_startproc
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
lea -8*5(%rsp),%rsp
+.cfi_adjust_cfa_offset 40
+.Lfe51_mul_body:
mov 8*0(%rsi),%rax # f[0]
mov 8*0(%rdx),%r11 # load g[0-4]
@@ -236,19 +245,30 @@ x25519_fe51_mul:
mov 8*4(%rsp),%rdi # restore 1st argument
jmp .Lreduce51
+.Lfe51_mul_epilogue:
+.cfi_endproc
.size x25519_fe51_mul,.-x25519_fe51_mul
.globl x25519_fe51_sqr
.type x25519_fe51_sqr,\@function,2
.align 32
x25519_fe51_sqr:
+.cfi_startproc
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
lea -8*5(%rsp),%rsp
+.cfi_adjust_cfa_offset 40
+.Lfe51_sqr_body:
mov 8*0(%rsi),%rax # g[0]
mov 8*2(%rsi),%r15 # g[2]
@@ -391,27 +411,45 @@ x25519_fe51_sqr:
mov %r10,8*4(%rdi)
mov 8*5(%rsp),%r15
+.cfi_restore %r15
mov 8*6(%rsp),%r14
+.cfi_restore %r14
mov 8*7(%rsp),%r13
+.cfi_restore %r13
mov 8*8(%rsp),%r12
+.cfi_restore %r12
mov 8*9(%rsp),%rbx
+.cfi_restore %rbx
mov 8*10(%rsp),%rbp
+.cfi_restore %rbp
lea 8*11(%rsp),%rsp
+.cfi_adjust_cfa_offset 88
+.Lfe51_sqr_epilogue:
ret
+.cfi_endproc
.size x25519_fe51_sqr,.-x25519_fe51_sqr
.globl x25519_fe51_mul121666
.type x25519_fe51_mul121666,\@function,2
.align 32
x25519_fe51_mul121666:
+.cfi_startproc
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
- mov \$121666,%eax
+.cfi_push %r15
lea -8*5(%rsp),%rsp
+.cfi_adjust_cfa_offset 40
+.Lfe51_mul121666_body:
+ mov \$121666,%eax
mulq 8*0(%rsi)
mov %rax,%rbx # %rbx:%rcx = h0
@@ -434,6 +472,8 @@ x25519_fe51_mul121666:
mov %rdx,%r15
jmp .Lreduce51
+.Lfe51_mul121666_epilogue:
+.cfi_endproc
.size x25519_fe51_mul121666,.-x25519_fe51_mul121666
___
########################################################################
@@ -460,14 +500,24 @@ x25519_fe64_eligible:
.type x25519_fe64_mul,\@function,3
.align 32
x25519_fe64_mul:
+.cfi_startproc
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
push %rdi # offload dst
+.cfi_push %rdi
lea -8*2(%rsp),%rsp
+.cfi_adjust_cfa_offset 16
+.Lfe64_mul_body:
mov %rdx,%rax
mov 8*0(%rdx),%rbp # b[0]
@@ -534,20 +584,32 @@ x25519_fe64_mul:
adox %rdi,$acc7 # of=0
jmp .Lreduce64
+.Lfe64_mul_epilogue:
+.cfi_endproc
.size x25519_fe64_mul,.-x25519_fe64_mul
.globl x25519_fe64_sqr
.type x25519_fe64_sqr,\@function,2
.align 32
x25519_fe64_sqr:
+.cfi_startproc
push %rbp
+.cfi_push %rbp
push %rbx
+.cfi_push %rbx
push %r12
+.cfi_push %r12
push %r13
+.cfi_push %r13
push %r14
+.cfi_push %r14
push %r15
+.cfi_push %r15
push %rdi # offload dst
+.cfi_push %rdi
lea -8*2(%rsp),%rsp
+.cfi_adjust_cfa_offset 16
+.Lfe64_sqr_body:
mov 8*0(%rsi),%rdx # a[0]
mov 8*1(%rsi),%rcx # a[1]
@@ -637,19 +699,29 @@ x25519_fe64_sqr:
mov $acc0,8*0(%rdi)
mov 8*3(%rsp),%r15
+.cfi_restore %r15
mov 8*4(%rsp),%r14
+.cfi_restore %r14
mov 8*5(%rsp),%r13
+.cfi_restore %r13
mov 8*6(%rsp),%r12
+.cfi_restore %r12
mov 8*7(%rsp),%rbx
+.cfi_restore %rbx
mov 8*8(%rsp),%rbp
+.cfi_restore %rbp
lea 8*9(%rsp),%rsp
+.cfi_adjust_cfa_offset 88
+.Lfe64_sqr_epilogue:
ret
+.cfi_endproc
.size x25519_fe64_sqr,.-x25519_fe64_sqr
.globl x25519_fe64_mul121666
.type x25519_fe64_mul121666,\@function,2
.align 32
x25519_fe64_mul121666:
+.Lfe64_mul121666_body:
mov \$121666,%edx
mulx 8*0(%rsi),$acc0,%rcx
mulx 8*1(%rsi),$acc1,%rax
@@ -676,6 +748,7 @@ x25519_fe64_mul121666:
mov $acc3,8*3(%rdi)
mov $acc0,8*0(%rdi)
+.Lfe64_mul121666_epilogue:
ret
.size x25519_fe64_mul121666,.-x25519_fe64_mul121666
@@ -683,6 +756,7 @@ x25519_fe64_mul121666:
.type x25519_fe64_add,\@function,3
.align 32
x25519_fe64_add:
+.Lfe64_add_body:
mov 8*0(%rsi),$acc0
mov 8*1(%rsi),$acc1
mov 8*2(%rsi),$acc2
@@ -698,13 +772,18 @@ x25519_fe64_add:
add %rax,$acc0
adc \$0,$acc1
- mov $acc0,8*0(%rdi)
adc \$0,$acc2
mov $acc1,8*1(%rdi)
adc \$0,$acc3
mov $acc2,8*2(%rdi)
+ sbb %rax,%rax # cf -> mask
mov $acc3,8*3(%rdi)
+ and \$38,%rax
+ add %rax,$acc0
+ mov $acc0,8*0(%rdi)
+
+.Lfe64_add_epilogue:
ret
.size x25519_fe64_add,.-x25519_fe64_add
@@ -712,6 +791,7 @@ x25519_fe64_add:
.type x25519_fe64_sub,\@function,3
.align 32
x25519_fe64_sub:
+.Lfe64_sub_body:
mov 8*0(%rsi),$acc0
mov 8*1(%rsi),$acc1
mov 8*2(%rsi),$acc2
@@ -727,13 +807,18 @@ x25519_fe64_sub:
sub %rax,$acc0
sbb \$0,$acc1
- mov $acc0,8*0(%rdi)
sbb \$0,$acc2
mov $acc1,8*1(%rdi)
sbb \$0,$acc3
mov $acc2,8*2(%rdi)
+ sbb %rax,%rax # cf -> mask
mov $acc3,8*3(%rdi)
+ and \$38,%rax
+
+ sub %rax,$acc0
+ mov $acc0,8*0(%rdi)
+.Lfe64_sub_epilogue:
ret
.size x25519_fe64_sub,.-x25519_fe64_sub
@@ -741,6 +826,7 @@ x25519_fe64_sub:
.type x25519_fe64_tobytes,\@function,2
.align 32
x25519_fe64_tobytes:
+.Lfe64_to_body:
mov 8*0(%rsi),$acc0
mov 8*1(%rsi),$acc1
mov 8*2(%rsi),$acc2
@@ -751,6 +837,7 @@ x25519_fe64_tobytes:
sar \$63,$acc3 # most significant bit -> mask
shr \$1,%rax # most significant bit cleared
and \$19,$acc3
+ add \$19,$acc3 # compare to modulus in the same go
add $acc3,$acc0
adc \$0,$acc1
@@ -760,15 +847,20 @@ x25519_fe64_tobytes:
lea (%rax,%rax),$acc3
sar \$63,%rax # most significant bit -> mask
shr \$1,$acc3 # most significant bit cleared
+ not %rax
and \$19,%rax
- add %rax,$acc0
+ sub %rax,$acc0
+ sbb \$0,$acc1
+ sbb \$0,$acc2
+ sbb \$0,$acc3
+ mov $acc0,8*0(%rdi)
mov $acc1,8*1(%rdi)
mov $acc2,8*2(%rdi)
mov $acc3,8*3(%rdi)
- mov $acc0,8*0(%rdi)
+.Lfe64_to_epilogue:
ret
.size x25519_fe64_tobytes,.-x25519_fe64_tobytes
___
@@ -804,6 +896,222 @@ $code.=<<___;
.asciz "X25519 primitives for x86_64, CRYPTOGAMS by <appro\@openssl.org>"
___
+# EXCEPTION_DISPOSITION handler (EXCEPTION_RECORD *rec,ULONG64 frame,
+# CONTEXT *context,DISPATCHER_CONTEXT *disp)
+if ($win64) {
+$rec="%rcx";
+$frame="%rdx";
+$context="%r8";
+$disp="%r9";
+
+$code.=<<___;
+.extern __imp_RtlVirtualUnwind
+
+.type short_handler,\@abi-omnipotent
+.align 16
+short_handler:
+ push %rsi
+ push %rdi
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ pushfq
+ sub \$64,%rsp
+
+ mov 120($context),%rax # pull context->Rax
+ mov 248($context),%rbx # pull context->Rip
+
+ mov 8($disp),%rsi # disp->ImageBase
+ mov 56($disp),%r11 # disp->HandlerData
+
+ mov 0(%r11),%r10d # HandlerData[0]
+ lea (%rsi,%r10),%r10 # end of prologue label
+ cmp %r10,%rbx # context->Rip<end of prologue label
+ jb .Lcommon_seh_tail
+
+ mov 152($context),%rax # pull context->Rsp
+ jmp .Lcommon_seh_tail
+.size short_handler,.-short_handler
+
+.type full_handler,\@abi-omnipotent
+.align 16
+full_handler:
+ push %rsi
+ push %rdi
+ push %rbx
+ push %rbp
+ push %r12
+ push %r13
+ push %r14
+ push %r15
+ pushfq
+ sub \$64,%rsp
+
+ mov 120($context),%rax # pull context->Rax
+ mov 248($context),%rbx # pull context->Rip
+
+ mov 8($disp),%rsi # disp->ImageBase
+ mov 56($disp),%r11 # disp->HandlerData
+
+ mov 0(%r11),%r10d # HandlerData[0]
+ lea (%rsi,%r10),%r10 # end of prologue label
+ cmp %r10,%rbx # context->Rip<end of prologue label
+ jb .Lcommon_seh_tail
+
+ mov 152($context),%rax # pull context->Rsp
+
+ mov 4(%r11),%r10d # HandlerData[1]
+ lea (%rsi,%r10),%r10 # epilogue label
+ cmp %r10,%rbx # context->Rip>=epilogue label
+ jae .Lcommon_seh_tail
+
+ mov 8(%r11),%r10d # HandlerData[2]
+ lea (%rax,%r10),%rax
+
+ mov -8(%rax),%rbp
+ mov -16(%rax),%rbx
+ mov -24(%rax),%r12
+ mov -32(%rax),%r13
+ mov -40(%rax),%r14
+ mov -48(%rax),%r15
+ mov %rbx,144($context) # restore context->Rbx
+ mov %rbp,160($context) # restore context->Rbp
+ mov %r12,216($context) # restore context->R12
+ mov %r13,224($context) # restore context->R13
+ mov %r14,232($context) # restore context->R14
+ mov %r15,240($context) # restore context->R15
+
+.Lcommon_seh_tail:
+ mov 8(%rax),%rdi
+ mov 16(%rax),%rsi
+ mov %rax,152($context) # restore context->Rsp
+ mov %rsi,168($context) # restore context->Rsi
+ mov %rdi,176($context) # restore context->Rdi
+
+ mov 40($disp),%rdi # disp->ContextRecord
+ mov $context,%rsi # context
+ mov \$154,%ecx # sizeof(CONTEXT)
+ .long 0xa548f3fc # cld; rep movsq
+
+ mov $disp,%rsi
+ xor %rcx,%rcx # arg1, UNW_FLAG_NHANDLER
+ mov 8(%rsi),%rdx # arg2, disp->ImageBase
+ mov 0(%rsi),%r8 # arg3, disp->ControlPc
+ mov 16(%rsi),%r9 # arg4, disp->FunctionEntry
+ mov 40(%rsi),%r10 # disp->ContextRecord
+ lea 56(%rsi),%r11 # &disp->HandlerData
+ lea 24(%rsi),%r12 # &disp->EstablisherFrame
+ mov %r10,32(%rsp) # arg5
+ mov %r11,40(%rsp) # arg6
+ mov %r12,48(%rsp) # arg7
+ mov %rcx,56(%rsp) # arg8, (NULL)
+ call *__imp_RtlVirtualUnwind(%rip)
+
+ mov \$1,%eax # ExceptionContinueSearch
+ add \$64,%rsp
+ popfq
+ pop %r15
+ pop %r14
+ pop %r13
+ pop %r12
+ pop %rbp
+ pop %rbx
+ pop %rdi
+ pop %rsi
+ ret
+.size full_handler,.-full_handler
+
+.section .pdata
+.align 4
+ .rva .LSEH_begin_x25519_fe51_mul
+ .rva .LSEH_end_x25519_fe51_mul
+ .rva .LSEH_info_x25519_fe51_mul
+
+ .rva .LSEH_begin_x25519_fe51_sqr
+ .rva .LSEH_end_x25519_fe51_sqr
+ .rva .LSEH_info_x25519_fe51_sqr
+
+ .rva .LSEH_begin_x25519_fe51_mul121666
+ .rva .LSEH_end_x25519_fe51_mul121666
+ .rva .LSEH_info_x25519_fe51_mul121666
+___
+$code.=<<___ if ($addx);
+ .rva .LSEH_begin_x25519_fe64_mul
+ .rva .LSEH_end_x25519_fe64_mul
+ .rva .LSEH_info_x25519_fe64_mul
+
+ .rva .LSEH_begin_x25519_fe64_sqr
+ .rva .LSEH_end_x25519_fe64_sqr
+ .rva .LSEH_info_x25519_fe64_sqr
+
+ .rva .LSEH_begin_x25519_fe64_mul121666
+ .rva .LSEH_end_x25519_fe64_mul121666
+ .rva .LSEH_info_x25519_fe64_mul121666
+
+ .rva .LSEH_begin_x25519_fe64_add
+ .rva .LSEH_end_x25519_fe64_add
+ .rva .LSEH_info_x25519_fe64_add
+
+ .rva .LSEH_begin_x25519_fe64_sub
+ .rva .LSEH_end_x25519_fe64_sub
+ .rva .LSEH_info_x25519_fe64_sub
+
+ .rva .LSEH_begin_x25519_fe64_tobytes
+ .rva .LSEH_end_x25519_fe64_tobytes
+ .rva .LSEH_info_x25519_fe64_tobytes
+___
+$code.=<<___;
+.section .xdata
+.align 8
+.LSEH_info_x25519_fe51_mul:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lfe51_mul_body,.Lfe51_mul_epilogue # HandlerData[]
+ .long 88,0
+.LSEH_info_x25519_fe51_sqr:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lfe51_sqr_body,.Lfe51_sqr_epilogue # HandlerData[]
+ .long 88,0
+.LSEH_info_x25519_fe51_mul121666:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lfe51_mul121666_body,.Lfe51_mul121666_epilogue # HandlerData[]
+ .long 88,0
+___
+$code.=<<___ if ($addx);
+.LSEH_info_x25519_fe64_mul:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lfe64_mul_body,.Lfe64_mul_epilogue # HandlerData[]
+ .long 72,0
+.LSEH_info_x25519_fe64_sqr:
+ .byte 9,0,0,0
+ .rva full_handler
+ .rva .Lfe64_sqr_body,.Lfe64_sqr_epilogue # HandlerData[]
+ .long 72,0
+.LSEH_info_x25519_fe64_mul121666:
+ .byte 9,0,0,0
+ .rva short_handler
+ .rva .Lfe64_mul121666_body,.Lfe64_mul121666_epilogue # HandlerData[]
+.LSEH_info_x25519_fe64_add:
+ .byte 9,0,0,0
+ .rva short_handler
+ .rva .Lfe64_add_body,.Lfe64_add_epilogue # HandlerData[]
+.LSEH_info_x25519_fe64_sub:
+ .byte 9,0,0,0
+ .rva short_handler
+ .rva .Lfe64_sub_body,.Lfe64_sub_epilogue # HandlerData[]
+.LSEH_info_x25519_fe64_tobytes:
+ .byte 9,0,0,0
+ .rva short_handler
+ .rva .Lfe64_to_body,.Lfe64_to_epilogue # HandlerData[]
+___
+}
+
$code =~ s/\`([^\`]*)\`/eval $1/gem;
print $code;
close STDOUT;
diff --git a/crypto/ec/curve25519.c b/crypto/ec/curve25519.c
index 9666de1..abe9b9c 100644
--- a/crypto/ec/curve25519.c
+++ b/crypto/ec/curve25519.c
@@ -11,149 +11,23 @@
#include "ec_lcl.h"
#include <openssl/sha.h>
-#if defined(X25519_ASM) \
- || ( (defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16) \
- && !defined(__sparc__) \
- && !(defined(__ANDROID__) && !defined(__clang__)) )
-/*
- * Base 2^51 implementation.
- */
-# define BASE_2_51_IMPLEMENTED
-
-typedef uint64_t fe51[5];
-# if !defined(X25519_ASM)
-typedef __uint128_t u128;
-# endif
-
-static const uint64_t MASK51 = 0x7ffffffffffff;
-
-static uint64_t load_7(const uint8_t *in)
-{
- uint64_t result;
-
- result = in[0];
- result |= ((uint64_t)in[1]) << 8;
- result |= ((uint64_t)in[2]) << 16;
- result |= ((uint64_t)in[3]) << 24;
- result |= ((uint64_t)in[4]) << 32;
- result |= ((uint64_t)in[5]) << 40;
- result |= ((uint64_t)in[6]) << 48;
-
- return result;
-}
-
-static uint64_t load_6(const uint8_t *in)
-{
- uint64_t result;
-
- result = in[0];
- result |= ((uint64_t)in[1]) << 8;
- result |= ((uint64_t)in[2]) << 16;
- result |= ((uint64_t)in[3]) << 24;
- result |= ((uint64_t)in[4]) << 32;
- result |= ((uint64_t)in[5]) << 40;
-
- return result;
-}
-
-static void fe51_frombytes(fe51 h, const uint8_t *s)
-{
- uint64_t h0 = load_7(s); /* 56 bits */
- uint64_t h1 = load_6(s + 7) << 5; /* 53 bits */
- uint64_t h2 = load_7(s + 13) << 2; /* 58 bits */
- uint64_t h3 = load_6(s + 20) << 7; /* 55 bits */
- uint64_t h4 = (load_6(s + 26) & 0x7fffffffffff) << 4; /* 51 bits */
-
- h1 |= h0 >> 51; h0 &= MASK51;
- h2 |= h1 >> 51; h1 &= MASK51;
- h3 |= h2 >> 51; h2 &= MASK51;
- h4 |= h3 >> 51; h3 &= MASK51;
-
- h[0] = h0;
- h[1] = h1;
- h[2] = h2;
- h[3] = h3;
- h[4] = h4;
-}
-
-static void fe51_tobytes(uint8_t *s, const fe51 h)
-{
- uint64_t h0 = h[0];
- uint64_t h1 = h[1];
- uint64_t h2 = h[2];
- uint64_t h3 = h[3];
- uint64_t h4 = h[4];
- uint64_t q;
+#if defined(X25519_ASM) && (defined(__x86_64) || defined(__x86_64__) || \
+ defined(_M_AMD64) || defined(_M_X64))
- /* compare to modulus */
- q = (h0 + 19) >> 51;
- q = (h1 + q) >> 51;
- q = (h2 + q) >> 51;
- q = (h3 + q) >> 51;
- q = (h4 + q) >> 51;
-
- /* full reduce */
- h0 += 19 * q;
- h1 += h0 >> 51; h0 &= MASK51;
- h2 += h1 >> 51; h1 &= MASK51;
- h3 += h2 >> 51; h2 &= MASK51;
- h4 += h3 >> 51; h3 &= MASK51;
- h4 &= MASK51;
-
- /* smash */
- s[0] = (uint8_t)(h0 >> 0);
- s[1] = (uint8_t)(h0 >> 8);
- s[2] = (uint8_t)(h0 >> 16);
- s[3] = (uint8_t)(h0 >> 24);
- s[4] = (uint8_t)(h0 >> 32);
- s[5] = (uint8_t)(h0 >> 40);
- s[6] = (uint8_t)((h0 >> 48) | ((uint32_t)h1 << 3));
- s[7] = (uint8_t)(h1 >> 5);
- s[8] = (uint8_t)(h1 >> 13);
- s[9] = (uint8_t)(h1 >> 21);
- s[10] = (uint8_t)(h1 >> 29);
- s[11] = (uint8_t)(h1 >> 37);
- s[12] = (uint8_t)((h1 >> 45) | ((uint32_t)h2 << 6));
- s[13] = (uint8_t)(h2 >> 2);
- s[14] = (uint8_t)(h2 >> 10);
- s[15] = (uint8_t)(h2 >> 18);
- s[16] = (uint8_t)(h2 >> 26);
- s[17] = (uint8_t)(h2 >> 34);
- s[18] = (uint8_t)(h2 >> 42);
- s[19] = (uint8_t)((h2 >> 50) | ((uint32_t)h3 << 1));
- s[20] = (uint8_t)(h3 >> 7);
- s[21] = (uint8_t)(h3 >> 15);
- s[22] = (uint8_t)(h3 >> 23);
- s[23] = (uint8_t)(h3 >> 31);
- s[24] = (uint8_t)(h3 >> 39);
- s[25] = (uint8_t)((h3 >> 47) | ((uint32_t)h4 << 4));
- s[26] = (uint8_t)(h4 >> 4);
- s[27] = (uint8_t)(h4 >> 12);
- s[28] = (uint8_t)(h4 >> 20);
- s[29] = (uint8_t)(h4 >> 28);
- s[30] = (uint8_t)(h4 >> 36);
- s[31] = (uint8_t)(h4 >> 44);
-}
-
-# ifdef X25519_ASM
-void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g);
-void x25519_fe51_sqr(fe51 h, const fe51 f);
-void x25519_fe51_mul121666(fe51 h, fe51 f);
-# define fe51_mul x25519_fe51_mul
-# define fe51_sq x25519_fe51_sqr
-# define fe51_mul121666 x25519_fe51_mul121666
-
-# if defined(__x86_64) || defined(__x86_64__) || \
- defined(_M_AMD64) || defined(_M_X64)
-
-# define BASE_2_64_IMPLEMENTED
+# define BASE_2_64_IMPLEMENTED
typedef uint64_t fe64[4];
int x25519_fe64_eligible(void);
/*
- * There are no reference C implementations for this radix.
+ * Following subroutines perform corresponding operations modulo
+ * 2^256-38, i.e. double the curve modulus. However, inputs and
+ * outputs are permitted to be partially reduced, i.e. to remain
+ * in [0..2^256) range. It's all tied up in final fe64_tobytes
+ * that performs full reduction modulo 2^255-19.
+ *
+ * There are no reference C implementations for these.
*/
void x25519_fe64_mul(fe64 h, const fe64 f, const fe64 g);
void x25519_fe64_sqr(fe64 h, const fe64 f);
@@ -161,12 +35,12 @@ void x25519_fe64_mul121666(fe64 h, fe64 f);
void x25519_fe64_add(fe64 h, const fe64 f, const fe64 g);
void x25519_fe64_sub(fe64 h, const fe64 f, const fe64 g);
void x25519_fe64_tobytes(uint8_t *s, const fe64 f);
-# define fe64_mul x25519_fe64_mul
-# define fe64_sqr x25519_fe64_sqr
-# define fe64_mul121666 x25519_fe64_mul121666
-# define fe64_add x25519_fe64_add
-# define fe64_sub x25519_fe64_sub
-# define fe64_tobytes x25519_fe64_tobytes
+# define fe64_mul x25519_fe64_mul
+# define fe64_sqr x25519_fe64_sqr
+# define fe64_mul121666 x25519_fe64_mul121666
+# define fe64_add x25519_fe64_add
+# define fe64_sub x25519_fe64_sub
+# define fe64_tobytes x25519_fe64_tobytes
static uint64_t load_8(const uint8_t *in)
{
@@ -375,10 +249,143 @@ static void x25519_scalar_mulx(uint8_t out[32], const uint8_t scalar[32],
OPENSSL_cleanse(e, sizeof(e));
}
-# endif
+#endif
+
+#if defined(X25519_ASM) \
+ || ( (defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16) \
+ && !defined(__sparc__) \
+ && !(defined(__ANDROID__) && !defined(__clang__)) )
+/*
+ * Base 2^51 implementation. It's virtually no different from reference
+ * base 2^25.5 implementation in respect to lax boundary conditions for
+ * intermediate values and even individual limbs. So that whatever you
+ * know about the reference, applies even here...
+ */
+# define BASE_2_51_IMPLEMENTED
+
+typedef uint64_t fe51[5];
+
+static const uint64_t MASK51 = 0x7ffffffffffff;
+
+static uint64_t load_7(const uint8_t *in)
+{
+ uint64_t result;
+
+ result = in[0];
+ result |= ((uint64_t)in[1]) << 8;
+ result |= ((uint64_t)in[2]) << 16;
+ result |= ((uint64_t)in[3]) << 24;
+ result |= ((uint64_t)in[4]) << 32;
+ result |= ((uint64_t)in[5]) << 40;
+ result |= ((uint64_t)in[6]) << 48;
+
+ return result;
+}
+
+static uint64_t load_6(const uint8_t *in)
+{
+ uint64_t result;
+
+ result = in[0];
+ result |= ((uint64_t)in[1]) << 8;
+ result |= ((uint64_t)in[2]) << 16;
+ result |= ((uint64_t)in[3]) << 24;
+ result |= ((uint64_t)in[4]) << 32;
+ result |= ((uint64_t)in[5]) << 40;
+
+ return result;
+}
+
+static void fe51_frombytes(fe51 h, const uint8_t *s)
+{
+ uint64_t h0 = load_7(s); /* 56 bits */
+ uint64_t h1 = load_6(s + 7) << 5; /* 53 bits */
+ uint64_t h2 = load_7(s + 13) << 2; /* 58 bits */
+ uint64_t h3 = load_6(s + 20) << 7; /* 55 bits */
+ uint64_t h4 = (load_6(s + 26) & 0x7fffffffffff) << 4; /* 51 bits */
+
+ h1 |= h0 >> 51; h0 &= MASK51;
+ h2 |= h1 >> 51; h1 &= MASK51;
+ h3 |= h2 >> 51; h2 &= MASK51;
+ h4 |= h3 >> 51; h3 &= MASK51;
+
+ h[0] = h0;
+ h[1] = h1;
+ h[2] = h2;
+ h[3] = h3;
+ h[4] = h4;
+}
+
+static void fe51_tobytes(uint8_t *s, const fe51 h)
+{
+ uint64_t h0 = h[0];
+ uint64_t h1 = h[1];
+ uint64_t h2 = h[2];
+ uint64_t h3 = h[3];
+ uint64_t h4 = h[4];
+ uint64_t q;
+ /* compare to modulus */
+ q = (h0 + 19) >> 51;
+ q = (h1 + q) >> 51;
+ q = (h2 + q) >> 51;
+ q = (h3 + q) >> 51;
+ q = (h4 + q) >> 51;
+
+ /* full reduce */
+ h0 += 19 * q;
+ h1 += h0 >> 51; h0 &= MASK51;
+ h2 += h1 >> 51; h1 &= MASK51;
+ h3 += h2 >> 51; h2 &= MASK51;
+ h4 += h3 >> 51; h3 &= MASK51;
+ h4 &= MASK51;
+
+ /* smash */
+ s[0] = (uint8_t)(h0 >> 0);
+ s[1] = (uint8_t)(h0 >> 8);
+ s[2] = (uint8_t)(h0 >> 16);
+ s[3] = (uint8_t)(h0 >> 24);
+ s[4] = (uint8_t)(h0 >> 32);
+ s[5] = (uint8_t)(h0 >> 40);
+ s[6] = (uint8_t)((h0 >> 48) | ((uint32_t)h1 << 3));
+ s[7] = (uint8_t)(h1 >> 5);
+ s[8] = (uint8_t)(h1 >> 13);
+ s[9] = (uint8_t)(h1 >> 21);
+ s[10] = (uint8_t)(h1 >> 29);
+ s[11] = (uint8_t)(h1 >> 37);
+ s[12] = (uint8_t)((h1 >> 45) | ((uint32_t)h2 << 6));
+ s[13] = (uint8_t)(h2 >> 2);
+ s[14] = (uint8_t)(h2 >> 10);
+ s[15] = (uint8_t)(h2 >> 18);
+ s[16] = (uint8_t)(h2 >> 26);
+ s[17] = (uint8_t)(h2 >> 34);
+ s[18] = (uint8_t)(h2 >> 42);
+ s[19] = (uint8_t)((h2 >> 50) | ((uint32_t)h3 << 1));
+ s[20] = (uint8_t)(h3 >> 7);
+ s[21] = (uint8_t)(h3 >> 15);
+ s[22] = (uint8_t)(h3 >> 23);
+ s[23] = (uint8_t)(h3 >> 31);
+ s[24] = (uint8_t)(h3 >> 39);
+ s[25] = (uint8_t)((h3 >> 47) | ((uint32_t)h4 << 4));
+ s[26] = (uint8_t)(h4 >> 4);
+ s[27] = (uint8_t)(h4 >> 12);
+ s[28] = (uint8_t)(h4 >> 20);
+ s[29] = (uint8_t)(h4 >> 28);
+ s[30] = (uint8_t)(h4 >> 36);
+ s[31] = (uint8_t)(h4 >> 44);
+}
+
+# if defined(X25519_ASM)
+void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g);
+void x25519_fe51_sqr(fe51 h, const fe51 f);
+void x25519_fe51_mul121666(fe51 h, fe51 f);
+# define fe51_mul x25519_fe51_mul
+# define fe51_sq x25519_fe51_sqr
+# define fe51_mul121666 x25519_fe51_mul121666
# else
+typedef __uint128_t u128;
+
static void fe51_mul(fe51 h, const fe51 f, const fe51 g)
{
u128 h0, h1, h2, h3, h4;
diff --git a/test/recipes/30-test_evp_data/evppkey.txt b/test/recipes/30-test_evp_data/evppkey.txt
index 7435125..d482c14 100644
--- a/test/recipes/30-test_evp_data/evppkey.txt
+++ b/test/recipes/30-test_evp_data/evppkey.txt
@@ -18436,3 +18436,41 @@ Ctrl = digest:SM3
Input = D7AD397F6FFA5D4F7F11E7217F241607DC30618C236D2C09C1B9EA8FDADEE2E8
Output = 3045022100f11bf36e75bb304f094fb42a4ca22377d0cc768637c5011cd59fb9ed4b130c98022035545ffe2c2efb3abee4fee661468946d886004fae8ea5311593e48f7fe21b91
Result = KEYOP_MISMATCH
+
+Title = Chosen Wycheproof vectors
+
+PrivateKeyRaw = WychePRIVATE0:X25519:288796bc5aff4b81a37501757bc0753a3c21964790d38699308debc17a6eaf8d
+
+PublicKeyRaw = WychePUBLIC0:X25519:f0ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff7f
+
+Derive=WychePRIVATE0
+PeerKey=WychePUBLIC0
+SharedSecret=b4e0dd76da7b071728b61f856771aa356e57eda78a5b1655cc3820fb5f854c5c
+
+PrivateKeyRaw = WychePRIVATE1:X25519:60887b3dc72443026ebedbbbb70665f42b87add1440e7768fbd7e8e2ce5f639d
+
+PublicKeyRaw = WychePUBLIC1:X25519:f0ffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffff
+
+Derive=WychePRIVATE1
+PeerKey=WychePUBLIC1
+SharedSecret=38d6304c4a7e6d9f7959334fb5245bd2c754525d4c91db950206926234c1f633
+
+PrivateKeyRaw = WychePRIVATE2:X25519:a0a4f130b98a5be4b1cedb7cb85584a3520e142d474dc9ccb909a073a976bf63
+
+PublicKeyRaw = WychePUBLIC2:X25519:0ab4e76380d84dde4f6833c58f2a9fb8f83bb0169b172be4b6e0592887741a36
+
+Derive=WychePRIVATE2
+PeerKey=WychePUBLIC2
+SharedSecret=0200000000000000000000000000000000000000000000000000000000000000
+
+PublicKeyRaw = WychePUBLIC3:X25519:89e10d5701b4337d2d032181538b1064bd4084401ceca1fd12663a1959388000
+
+Derive=WychePRIVATE2
+PeerKey=WychePUBLIC3
+SharedSecret=0900000000000000000000000000000000000000000000000000000000000000
+
+PublicKeyRaw = WychePUBLIC4:X25519:2b55d3aa4a8f80c8c0b2ae5f933e85af49beac36c2fa7394bab76c8933f8f81d
+
+Derive=WychePRIVATE2
+PeerKey=WychePUBLIC4
+SharedSecret=1000000000000000000000000000000000000000000000000000000000000000
More information about the openssl-commits
mailing list