[openssl] master update

Thu Apr 8 11:18:30 UTC 2021

The branch master has been updated
       via  27947123c9f17deac005b2afd265e38903349918 (commit)
       via  a72da9ecebcd8e9b9caf6a3d05251a7008e9c614 (commit)
       via  f684a2d783bf2c4728e65f83e0f89ec654f40e79 (commit)
       via  ceaa6b319e01bd1ac74e3e7c5662745d3d3b3b9d (commit)
       via  eb7bcff67c6bbd9cb463c44035afd00c37ca327f (commit)
       via  eacc18069b4da348247ab37a0665350258619311 (commit)
       via  70fd5110261e9c663b2f6a6009514f72c303d85d (commit)
       via  136f96a5d7abb0879ecbeaf3883e0d20a249a2f6 (commit)
       via  927e704e8c2ec47ca58a86f57dc1900d48ebb985 (commit)
       via  0f4286c78a657958bcd5cee38a8d131b4bb2582a (commit)
       via  5de32f22e731ea151e1c5aac7703cde2573cb4a4 (commit)
       via  bbed0d1cbd436af6797d7837e270bff4ca4d5a10 (commit)
      from  b7dedba8b1434e6a2f2a3848e3375d07ee8946ab (commit)


- Log -----------------------------------------------------------------
commit 27947123c9f17deac005b2afd265e38903349918
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Tue Jan 19 14:14:25 2021 +1100

    curve448: Integrate 64-bit reference implementation
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit a72da9ecebcd8e9b9caf6a3d05251a7008e9c614
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Tue Apr 6 12:52:44 2021 +1000

    Configure: Check if 128-bit integers are supported by compiler
    
    Add a config variable "use_int128" to indicate if 128-bit integers are
    supported or not at the configure time.  This makes it easier to
    automatically select 64-bit versus 32-bit implementation for curve448.
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit f684a2d783bf2c4728e65f83e0f89ec654f40e79
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Mon Feb 1 16:22:16 2021 +1100

    curve448: Use constant time zero check function
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit ceaa6b319e01bd1ac74e3e7c5662745d3d3b3b9d
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Mon Feb 1 16:20:12 2021 +1100

    Add a constant time zero check function for 64-bit integers
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit eb7bcff67c6bbd9cb463c44035afd00c37ca327f
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Tue Jan 19 14:05:24 2021 +1100

    curve448: Remove the unrolled loop version
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit eacc18069b4da348247ab37a0665350258619311
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Mon Mar 29 17:26:41 2021 +1100

    curve448: Use NLIMBS where appropriate to simplify the code
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit 70fd5110261e9c663b2f6a6009514f72c303d85d
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Tue Jan 19 14:04:48 2021 +1100

    curve448: Modernise reference 64-bit code
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit 136f96a5d7abb0879ecbeaf3883e0d20a249a2f6
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Tue Jan 19 13:52:52 2021 +1100

    curve448: Rename arch_ref64 to arch_64
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit 927e704e8c2ec47ca58a86f57dc1900d48ebb985
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Tue Jan 19 13:50:18 2021 +1100

    Partially Revert "Remove curve448 architecture specific files"
    
    This reverts commit 7e492f3372ed83af074a63d5920f13de7e3455b6.
    
    This brings back the 64-bit reference implementation for curve448.
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit 0f4286c78a657958bcd5cee38a8d131b4bb2582a
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Mon Mar 29 19:08:58 2021 +1100

    curve448: Use relative includes to avoid explicit dependencies
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit 5de32f22e731ea151e1c5aac7703cde2573cb4a4
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Mon Mar 29 18:20:53 2021 +1100

    Use numbers definition of int128_t and uint128_t
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

commit bbed0d1cbd436af6797d7837e270bff4ca4d5a10
Author: Amitay Isaacs <amitay at ozlabs.org>
Date:   Mon Mar 29 18:06:13 2021 +1100

    numbers: Define 128-bit integers if compiler supports
    
    Signed-off-by: Amitay Isaacs <amitay at ozlabs.org>
    
    Reviewed-by: Tomas Mraz <tomas at openssl.org>
    Reviewed-by: Matt Caswell <matt at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/14784)

-----------------------------------------------------------------------

Summary of changes:
 Configure                                          |  14 ++
 crypto/bn/bn_div.c                                 |   2 +-
 crypto/bn/bn_local.h                               |   5 +-
 crypto/ec/build.info                               |  15 +-
 crypto/ec/curve25519.c                             |   6 +-
 crypto/ec/curve448/arch_32/f_impl.c                |   2 +-
 .../{arch_32 => arch_64}/arch_intrinsics.h         |  18 +-
 crypto/ec/curve448/arch_64/f_impl.c                | 200 +++++++++++++++++++++
 crypto/ec/curve448/arch_64/f_impl.h                |  58 ++++++
 crypto/ec/curve448/curve448utils.h                 |   6 +-
 crypto/ec/curve448/field.h                         |   9 +-
 crypto/ec/curve448/word.h                          |  11 +-
 crypto/ec/ecp_nistp224.c                           |   8 +-
 crypto/ec/ecp_nistp256.c                           |   9 +-
 crypto/ec/ecp_nistp521.c                           |   8 +-
 crypto/poly1305/poly1305.c                         |   5 +-
 crypto/poly1305/poly1305_base2_44.c                |   2 +-
 include/internal/constant_time.h                   |   5 +
 include/internal/numbers.h                         |  10 ++
 19 files changed, 343 insertions(+), 50 deletions(-)
 copy crypto/ec/curve448/{arch_32 => arch_64}/arch_intrinsics.h (50%)
 create mode 100644 crypto/ec/curve448/arch_64/f_impl.c
 create mode 100644 crypto/ec/curve448/arch_64/f_impl.h

diff --git a/Configure b/Configure
index 64d809258d..e36c5d93aa 100755
--- a/Configure
+++ b/Configure
@@ -1573,6 +1573,20 @@ if (!$disabled{asm} && !$predefined_C{__MACH__} && $^O ne 'VMS') {
     }
 }
 
+# Check if __SIZEOF_INT128__ is defined by compiler
+$config{use_int128} = 0;
+{
+    my $cc = $config{CROSS_COMPILE}.$config{CC};
+    open(PIPE, "$cc -E -dM - </dev/null 2>&1 |");
+    while(<PIPE>) {
+        if (m/__SIZEOF_INT128__/) {
+            $config{use_int128} = 1;
+            last;
+        }
+    }
+    close(PIPE);
+}
+
 # Deal with bn_ops ###################################################
 
 $config{bn_ll}                  =0;
diff --git a/crypto/bn/bn_div.c b/crypto/bn/bn_div.c
index cf4309cdf4..b486e3411a 100644
--- a/crypto/bn/bn_div.c
+++ b/crypto/bn/bn_div.c
@@ -97,7 +97,7 @@ BN_ULONG bn_div_3_words(const BN_ULONG *m, BN_ULONG d1, BN_ULONG d0);
  */
 #  if BN_BITS2 == 64 && defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
 #   undef BN_ULLONG
-#   define BN_ULLONG __uint128_t
+#   define BN_ULLONG uint128_t
 #   define BN_LLONG
 #  endif
 
diff --git a/crypto/bn/bn_local.h b/crypto/bn/bn_local.h
index e93b7e9a0b..30863713d6 100644
--- a/crypto/bn/bn_local.h
+++ b/crypto/bn/bn_local.h
@@ -23,6 +23,7 @@
 
 # include "crypto/bn.h"
 # include "internal/cryptlib.h"
+# include "internal/numbers.h"
 
 /*
  * These preprocessor symbols control various aspects of the bignum headers
@@ -375,9 +376,9 @@ struct bn_gencb_st {
  */
 #  if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16 && \
       (defined(SIXTY_FOUR_BIT) || defined(SIXTY_FOUR_BIT_LONG))
-#   define BN_UMULT_HIGH(a,b)          (((__uint128_t)(a)*(b))>>64)
+#   define BN_UMULT_HIGH(a,b)          (((uint128_t)(a)*(b))>>64)
 #   define BN_UMULT_LOHI(low,high,a,b) ({       \
-        __uint128_t ret=(__uint128_t)(a)*(b);   \
+        uint128_t ret=(uint128_t)(a)*(b);   \
         (high)=ret>>64; (low)=ret;      })
 #  elif defined(__alpha) && (defined(SIXTY_FOUR_BIT_LONG) || defined(SIXTY_FOUR_BIT))
 #   if defined(__DECC)
diff --git a/crypto/ec/build.info b/crypto/ec/build.info
index fff3ab1e1c..e4c8cf6d82 100644
--- a/crypto/ec/build.info
+++ b/crypto/ec/build.info
@@ -48,10 +48,16 @@ $COMMON=ec_lib.c ecp_smpl.c ecp_mont.c ecp_nist.c ec_cvt.c ec_mult.c \
         ec2_smpl.c ec_deprecated.c \
         ecp_oct.c ec2_oct.c ec_oct.c ec_kmeth.c ecdh_ossl.c \
         ecdsa_ossl.c ecdsa_sign.c ecdsa_vrf.c curve25519.c \
-        curve448/arch_32/f_impl.c curve448/f_generic.c curve448/scalar.c \
+        curve448/f_generic.c curve448/scalar.c \
         curve448/curve448_tables.c curve448/eddsa.c curve448/curve448.c \
         $ECASM ec_backend.c ecx_backend.c ecdh_kdf.c
 
+IF[{- $config{'use_int128'} eq "1" -}]
+  $COMMON=$COMMON curve448/arch_64/f_impl.c
+ELSE
+  $COMMON=$COMMON curve448/arch_32/f_impl.c
+ENDIF
+
 IF[{- !$disabled{'ec_nistp_64_gcc_128'} -}]
   $COMMON=$COMMON ecp_nistp224.c ecp_nistp256.c ecp_nistp521.c ecp_nistputil.c
 ENDIF
@@ -87,10 +93,3 @@ GENERATE[ecp_nistz256-ppc64.s]=asm/ecp_nistz256-ppc64.pl
 
 GENERATE[x25519-x86_64.s]=asm/x25519-x86_64.pl
 GENERATE[x25519-ppc64.s]=asm/x25519-ppc64.pl
-
-INCLUDE[curve448/arch_32/f_impl.o]=curve448/arch_32 curve448
-INCLUDE[curve448/f_generic.o]=curve448/arch_32 curve448
-INCLUDE[curve448/scalar.o]=curve448/arch_32 curve448
-INCLUDE[curve448/curve448_tables.o]=curve448/arch_32 curve448
-INCLUDE[curve448/eddsa.o]=curve448/arch_32 curve448
-INCLUDE[curve448/curve448.o]=curve448/arch_32 curve448
diff --git a/crypto/ec/curve25519.c b/crypto/ec/curve25519.c
index 3f24215047..a291e6f472 100644
--- a/crypto/ec/curve25519.c
+++ b/crypto/ec/curve25519.c
@@ -19,6 +19,8 @@
 #include <openssl/evp.h>
 #include <openssl/sha.h>
 
+#include "internal/numbers.h"
+
 #if defined(X25519_ASM) && (defined(__x86_64) || defined(__x86_64__) || \
                             defined(_M_AMD64) || defined(_M_X64))
 
@@ -260,7 +262,7 @@ static void x25519_scalar_mulx(uint8_t out[32], const uint8_t scalar[32],
 #endif
 
 #if defined(X25519_ASM) \
-    || ( (defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16) \
+    || ( defined(INT128_MAX) \
          && !defined(__sparc__) \
          && (!defined(__SIZEOF_LONG__) || (__SIZEOF_LONG__ == 8)) \
          && !(defined(__ANDROID__) && !defined(__clang__)) )
@@ -393,7 +395,7 @@ void x25519_fe51_mul121666(fe51 h, fe51 f);
 #  define fe51_mul121666 x25519_fe51_mul121666
 # else
 
-typedef __uint128_t u128;
+typedef uint128_t u128;
 
 static void fe51_mul(fe51 h, const fe51 f, const fe51 g)
 {
diff --git a/crypto/ec/curve448/arch_32/f_impl.c b/crypto/ec/curve448/arch_32/f_impl.c
index 11aa768f2a..7263ce233d 100644
--- a/crypto/ec/curve448/arch_32/f_impl.c
+++ b/crypto/ec/curve448/arch_32/f_impl.c
@@ -10,7 +10,7 @@
  * Originally written by Mike Hamburg
  */
 
-#include "field.h"
+#include "../field.h"
 
 void gf_mul(gf_s * RESTRICT cs, const gf as, const gf bs)
 {
diff --git a/crypto/ec/curve448/arch_32/arch_intrinsics.h b/crypto/ec/curve448/arch_64/arch_intrinsics.h
similarity index 50%
copy from crypto/ec/curve448/arch_32/arch_intrinsics.h
copy to crypto/ec/curve448/arch_64/arch_intrinsics.h
index 7a54903ac9..30be38e99b 100644
--- a/crypto/ec/curve448/arch_32/arch_intrinsics.h
+++ b/crypto/ec/curve448/arch_64/arch_intrinsics.h
@@ -2,7 +2,7 @@
  * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
  * Copyright 2016 Cryptography Research, Inc.
  *
- * Licensed under the Apache License 2.0 (the "License").  You may not use
+ * Licensed under the OpenSSL license (the "License").  You may not use
  * this file except in compliance with the License.  You can obtain a copy
  * in the file LICENSE in the source distribution or at
  * https://www.openssl.org/source/license.html
@@ -10,18 +10,18 @@
  * Originally written by Mike Hamburg
  */
 
-#ifndef OSSL_CRYPTO_EC_CURVE448_ARCH_32_INTRINSICS_H
-# define OSSL_CRYPTO_EC_CURVE448_ARCH_32_INTRINSICS_H
+#ifndef OSSL_CRYPTO_EC_CURVE448_ARCH_64_INTRINSICS_H
+# define OSSL_CRYPTO_EC_CURVE448_ARCH_64_INTRINSICS_H
 
-#include "internal/constant_time.h"
+# include "internal/constant_time.h"
 
-# define ARCH_WORD_BITS 32
+# define ARCH_WORD_BITS 64
 
-#define word_is_zero(a)     constant_time_is_zero_32(a)
+# define word_is_zero(a)     constant_time_is_zero_64(a)
 
-static ossl_inline uint64_t widemul(uint32_t a, uint32_t b)
+static ossl_inline uint128_t widemul(uint64_t a, uint64_t b)
 {
-    return ((uint64_t)a) * b;
+    return ((uint128_t) a) * b;
 }
 
-#endif                          /* OSSL_CRYPTO_EC_CURVE448_ARCH_32_INTRINSICS_H */
+#endif                          /* OSSL_CRYPTO_EC_CURVE448_ARCH_64_INTRINSICS_H */
diff --git a/crypto/ec/curve448/arch_64/f_impl.c b/crypto/ec/curve448/arch_64/f_impl.c
new file mode 100644
index 0000000000..2b428cd686
--- /dev/null
+++ b/crypto/ec/curve448/arch_64/f_impl.c
@@ -0,0 +1,200 @@
+/*
+ * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2014 Cryptography Research, Inc.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ *
+ * Originally written by Mike Hamburg
+ */
+
+#include "../field.h"
+
+void gf_mul(gf_s * RESTRICT cs, const gf as, const gf bs)
+{
+    const uint64_t *a = as->limb, *b = bs->limb;
+    uint64_t *c = cs->limb;
+    uint128_t accum0 = 0, accum1 = 0, accum2;
+    uint64_t mask = (1ULL << 56) - 1;
+    uint64_t aa[4], bb[4], bbb[4];
+    unsigned int i, j;
+
+    for (i = 0; i < 4; i++) {
+        aa[i] = a[i] + a[i + 4];
+        bb[i] = b[i] + b[i + 4];
+        bbb[i] = bb[i] + b[i + 4];
+    }
+
+    for (i = 0; i < 4; i++) {
+        accum2 = 0;
+
+        for (j = 0; j <= i; j++) {
+            accum2 += widemul(a[j], b[i - j]);
+            accum1 += widemul(aa[j], bb[i - j]);
+            accum0 += widemul(a[j + 4], b[i - j + 4]);
+        }
+        for (; j < 4; j++) {
+            accum2 += widemul(a[j], b[i - j + 8]);
+            accum1 += widemul(aa[j], bbb[i - j + 4]);
+            accum0 += widemul(a[j + 4], bb[i - j + 4]);
+        }
+
+        accum1 -= accum2;
+        accum0 += accum2;
+
+        c[i] = ((uint64_t)(accum0)) & mask;
+        c[i + 4] = ((uint64_t)(accum1)) & mask;
+
+        accum0 >>= 56;
+        accum1 >>= 56;
+    }
+
+    accum0 += accum1;
+    accum0 += c[4];
+    accum1 += c[0];
+    c[4] = ((uint64_t)(accum0)) & mask;
+    c[0] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    c[5] += ((uint64_t)(accum0));
+    c[1] += ((uint64_t)(accum1));
+}
+
+void gf_mulw_unsigned(gf_s * RESTRICT cs, const gf as, uint32_t b)
+{
+    const uint64_t *a = as->limb;
+    uint64_t *c = cs->limb;
+    uint128_t accum0 = 0, accum4 = 0;
+    uint64_t mask = (1ULL << 56) - 1;
+    int i;
+
+    for (i = 0; i < 4; i++) {
+        accum0 += widemul(b, a[i]);
+        accum4 += widemul(b, a[i + 4]);
+        c[i] = accum0 & mask;
+        accum0 >>= 56;
+        c[i + 4] = accum4 & mask;
+        accum4 >>= 56;
+    }
+
+    accum0 += accum4 + c[4];
+    c[4] = accum0 & mask;
+    c[5] += accum0 >> 56;
+
+    accum4 += c[0];
+    c[0] = accum4 & mask;
+    c[1] += accum4 >> 56;
+}
+
+void gf_sqr(gf_s * RESTRICT cs, const gf as)
+{
+    const uint64_t *a = as->limb;
+    uint64_t *c = cs->limb;
+    uint128_t accum0 = 0, accum1 = 0, accum2;
+    uint64_t mask = (1ULL << 56) - 1;
+    uint64_t aa[4];
+    unsigned int i;
+
+    /* For some reason clang doesn't vectorize this without prompting? */
+    for (i = 0; i < 4; i++)
+        aa[i] = a[i] + a[i + 4];
+
+    accum2 = widemul(a[0], a[3]);
+    accum0 = widemul(aa[0], aa[3]);
+    accum1 = widemul(a[4], a[7]);
+
+    accum2 += widemul(a[1], a[2]);
+    accum0 += widemul(aa[1], aa[2]);
+    accum1 += widemul(a[5], a[6]);
+
+    accum0 -= accum2;
+    accum1 += accum2;
+
+    c[3] = ((uint64_t)(accum1)) << 1 & mask;
+    c[7] = ((uint64_t)(accum0)) << 1 & mask;
+
+    accum0 >>= 55;
+    accum1 >>= 55;
+
+    accum0 += widemul(2 * aa[1], aa[3]);
+    accum1 += widemul(2 * a[5], a[7]);
+    accum0 += widemul(aa[2], aa[2]);
+    accum1 += accum0;
+
+    accum0 -= widemul(2 * a[1], a[3]);
+    accum1 += widemul(a[6], a[6]);
+
+    accum2 = widemul(a[0], a[0]);
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    accum0 -= widemul(a[2], a[2]);
+    accum1 += widemul(aa[0], aa[0]);
+    accum0 += widemul(a[4], a[4]);
+
+    c[0] = ((uint64_t)(accum0)) & mask;
+    c[4] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum2 = widemul(2 * aa[2], aa[3]);
+    accum0 -= widemul(2 * a[2], a[3]);
+    accum1 += widemul(2 * a[6], a[7]);
+
+    accum1 += accum2;
+    accum0 += accum2;
+
+    accum2 = widemul(2 * a[0], a[1]);
+    accum1 += widemul(2 * aa[0], aa[1]);
+    accum0 += widemul(2 * a[4], a[5]);
+
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    c[1] = ((uint64_t)(accum0)) & mask;
+    c[5] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum2 = widemul(aa[3], aa[3]);
+    accum0 -= widemul(a[3], a[3]);
+    accum1 += widemul(a[7], a[7]);
+
+    accum1 += accum2;
+    accum0 += accum2;
+
+    accum2 = widemul(2 * a[0], a[2]);
+    accum1 += widemul(2 * aa[0], aa[2]);
+    accum0 += widemul(2 * a[4], a[6]);
+
+    accum2 += widemul(a[1], a[1]);
+    accum1 += widemul(aa[1], aa[1]);
+    accum0 += widemul(a[5], a[5]);
+
+    accum1 -= accum2;
+    accum0 += accum2;
+
+    c[2] = ((uint64_t)(accum0)) & mask;
+    c[6] = ((uint64_t)(accum1)) & mask;
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+
+    accum0 += c[3];
+    accum1 += c[7];
+    c[3] = ((uint64_t)(accum0)) & mask;
+    c[7] = ((uint64_t)(accum1)) & mask;
+
+    /* we could almost stop here, but it wouldn't be stable, so... */
+
+    accum0 >>= 56;
+    accum1 >>= 56;
+    c[4] += ((uint64_t)(accum0)) + ((uint64_t)(accum1));
+    c[0] += ((uint64_t)(accum1));
+}
diff --git a/crypto/ec/curve448/arch_64/f_impl.h b/crypto/ec/curve448/arch_64/f_impl.h
new file mode 100644
index 0000000000..648082c2f3
--- /dev/null
+++ b/crypto/ec/curve448/arch_64/f_impl.h
@@ -0,0 +1,58 @@
+/*
+ * Copyright 2017-2018 The OpenSSL Project Authors. All Rights Reserved.
+ * Copyright 2014-2016 Cryptography Research, Inc.
+ *
+ * Licensed under the OpenSSL license (the "License").  You may not use
+ * this file except in compliance with the License.  You can obtain a copy
+ * in the file LICENSE in the source distribution or at
+ * https://www.openssl.org/source/license.html
+ *
+ * Originally written by Mike Hamburg
+ */
+
+#ifndef OSSL_CRYPTO_EC_CURVE448_ARCH_64_F_IMPL_H
+# define OSSL_CRYPTO_EC_CURVE448_ARCH_64_F_IMPL_H
+
+# define GF_HEADROOM 9999        /* Everything is reduced anyway */
+# define FIELD_LITERAL(a,b,c,d,e,f,g,h) {{a,b,c,d,e,f,g,h}}
+
+# define LIMB_PLACE_VALUE(i) 56
+
+void gf_add_RAW(gf out, const gf a, const gf b)
+{
+    unsigned int i;
+
+    for (i = 0; i < NLIMBS; i++)
+        out->limb[i] = a->limb[i] + b->limb[i];
+
+    gf_weak_reduce(out);
+}
+
+void gf_sub_RAW(gf out, const gf a, const gf b)
+{
+    uint64_t co1 = ((1ULL << 56) - 1) * 2, co2 = co1 - 2;
+    unsigned int i;
+
+    for (i = 0; i < NLIMBS; i++)
+        out->limb[i] = a->limb[i] - b->limb[i] + ((i == NLIMBS / 2) ? co2 : co1);
+
+    gf_weak_reduce(out);
+}
+
+void gf_bias(gf a, int amt)
+{
+}
+
+void gf_weak_reduce(gf a)
+{
+    uint64_t mask = (1ULL << 56) - 1;
+    uint64_t tmp = a->limb[NLIMBS - 1] >> 56;
+    unsigned int i;
+
+    a->limb[NLIMBS / 2] += tmp;
+    for (i = NLIMBS - 1; i > 0; i--)
+        a->limb[i] = (a->limb[i] & mask) + (a->limb[i - 1] >> 56);
+    a->limb[0] = (a->limb[0] & mask) + tmp;
+}
+
+#endif                  /* OSSL_CRYPTO_EC_CURVE448_ARCH_64_F_IMPL_H */
diff --git a/crypto/ec/curve448/curve448utils.h b/crypto/ec/curve448/curve448utils.h
index fa06cb02ec..fd8ae4de70 100644
--- a/crypto/ec/curve448/curve448utils.h
+++ b/crypto/ec/curve448/curve448utils.h
@@ -15,6 +15,8 @@
 
 # include <openssl/e_os2.h>
 
+# include "internal/numbers.h"
+
 /*
  * Internal word types. Somewhat tricky.  This could be decided separately per
  * platform.  However, the structs do need to be all the same size and
@@ -41,9 +43,9 @@ typedef int64_t c448_sword_t;
 /* "Boolean" type, will be set to all-zero or all-one (i.e. -1u) */
 typedef uint64_t c448_bool_t;
 /* Double-word size for internal computations */
-typedef __uint128_t c448_dword_t;
+typedef uint128_t c448_dword_t;
 /* Signed double-word size for internal computations */
-typedef __int128_t c448_dsword_t;
+typedef int128_t c448_dsword_t;
 # elif C448_WORD_BITS == 32
 /* Word size for internal computations */
 typedef uint32_t c448_word_t;
diff --git a/crypto/ec/curve448/field.h b/crypto/ec/curve448/field.h
index 9d6ee1cacc..4f69c0bdd8 100644
--- a/crypto/ec/curve448/field.h
+++ b/crypto/ec/curve448/field.h
@@ -66,10 +66,15 @@ void gf_serialize(uint8_t *serial, const gf x, int with_highbit);
 mask_t gf_deserialize(gf x, const uint8_t serial[SER_BYTES], int with_hibit,
                       uint8_t hi_nmask);
 
-# include "f_impl.h"            /* Bring in the inline implementations */
 
 # define LIMBPERM(i) (i)
-# define LIMB_MASK(i) (((1)<<LIMB_PLACE_VALUE(i))-1)
+# if (ARCH_WORD_BITS == 32)
+#  include "arch_32/f_impl.h"    /* Bring in the inline implementations */
+#  define LIMB_MASK(i) (((1)<<LIMB_PLACE_VALUE(i))-1)
+# elif (ARCH_WORD_BITS == 64)
+#  include "arch_64/f_impl.h"    /* Bring in the inline implementations */
+#  define LIMB_MASK(i) (((1ULL)<<LIMB_PLACE_VALUE(i))-1)
+# endif
 
 static const gf ZERO = {{{0}}}, ONE = {{{1}}};
 
diff --git a/crypto/ec/curve448/word.h b/crypto/ec/curve448/word.h
index d3e6ff863b..f94f086fa9 100644
--- a/crypto/ec/curve448/word.h
+++ b/crypto/ec/curve448/word.h
@@ -17,15 +17,20 @@
 # include <assert.h>
 # include <stdlib.h>
 # include <openssl/e_os2.h>
-# include "arch_intrinsics.h"
 # include "curve448utils.h"
 
+# ifdef INT128_MAX
+#  include "arch_64/arch_intrinsics.h"
+# else
+#  include "arch_32/arch_intrinsics.h"
+# endif
+
 # if (ARCH_WORD_BITS == 64)
 typedef uint64_t word_t, mask_t;
-typedef __uint128_t dword_t;
+typedef uint128_t dword_t;
 typedef int32_t hsword_t;
 typedef int64_t sword_t;
-typedef __int128_t dsword_t;
+typedef int128_t dsword_t;
 # elif (ARCH_WORD_BITS == 32)
 typedef uint32_t word_t, mask_t;
 typedef uint64_t dword_t;
diff --git a/crypto/ec/ecp_nistp224.c b/crypto/ec/ecp_nistp224.c
index c3dc0d9b7d..47f33825ad 100644
--- a/crypto/ec/ecp_nistp224.c
+++ b/crypto/ec/ecp_nistp224.c
@@ -43,11 +43,9 @@
 #include <openssl/err.h>
 #include "ec_local.h"
 
-#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
-  /* even with gcc, the typedef won't work for 32-bit platforms */
-typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
-                                 * platforms */
-#else
+#include "internal/numbers.h"
+
+#ifndef INT128_MAX
 # error "Your compiler doesn't appear to support 128-bit integer types"
 #endif
 
diff --git a/crypto/ec/ecp_nistp256.c b/crypto/ec/ecp_nistp256.c
index c865cd7766..67d2dce9b7 100644
--- a/crypto/ec/ecp_nistp256.c
+++ b/crypto/ec/ecp_nistp256.c
@@ -44,12 +44,9 @@
 #include <openssl/err.h>
 #include "ec_local.h"
 
-#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
-  /* even with gcc, the typedef won't work for 32-bit platforms */
-typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
-                                 * platforms */
-typedef __int128_t int128_t;
-#else
+#include "internal/numbers.h"
+
+#ifndef INT128_MAX
 # error "Your compiler doesn't appear to support 128-bit integer types"
 #endif
 
diff --git a/crypto/ec/ecp_nistp521.c b/crypto/ec/ecp_nistp521.c
index 72468a1d42..694031b45d 100644
--- a/crypto/ec/ecp_nistp521.c
+++ b/crypto/ec/ecp_nistp521.c
@@ -43,11 +43,9 @@
 #include <openssl/err.h>
 #include "ec_local.h"
 
-#if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16
-  /* even with gcc, the typedef won't work for 32-bit platforms */
-typedef __uint128_t uint128_t;  /* nonstandard; implemented by gcc on 64-bit
-                                 * platforms */
-#else
+#include "internal/numbers.h"
+
+#ifndef INT128_MAX
 # error "Your compiler doesn't appear to support 128-bit integer types"
 #endif
 
diff --git a/crypto/poly1305/poly1305.c b/crypto/poly1305/poly1305.c
index 127ce7da2f..be41a6d354 100644
--- a/crypto/poly1305/poly1305.c
+++ b/crypto/poly1305/poly1305.c
@@ -94,11 +94,10 @@ poly1305_blocks(void *ctx, const unsigned char *inp, size_t len, u32 padbit);
          (a ^ ((a ^ b) | ((a - b) ^ b))) >> (sizeof(a) * 8 - 1) \
          )
 
-# if (defined(__SIZEOF_INT128__) && __SIZEOF_INT128__==16) && \
-     (defined(__SIZEOF_LONG__) && __SIZEOF_LONG__==8)
+# if defined(INT64_MAX) && defined(INT128_MAX)
 
 typedef unsigned long u64;
-typedef __uint128_t u128;
+typedef uint128_t u128;
 
 typedef struct {
     u64 h[3];
diff --git a/crypto/poly1305/poly1305_base2_44.c b/crypto/poly1305/poly1305_base2_44.c
index 92f73a43c8..3ed111d04a 100644
--- a/crypto/poly1305/poly1305_base2_44.c
+++ b/crypto/poly1305/poly1305_base2_44.c
@@ -18,7 +18,7 @@
 typedef unsigned char u8;
 typedef unsigned int u32;
 typedef unsigned long u64;
-typedef unsigned __int128 u128;
+typedef uint128_t u128;
 
 typedef struct {
     u64 h[3];
diff --git a/include/internal/constant_time.h b/include/internal/constant_time.h
index cb4ce80830..0ed6f823c1 100644
--- a/include/internal/constant_time.h
+++ b/include/internal/constant_time.h
@@ -182,6 +182,11 @@ static ossl_inline uint32_t constant_time_is_zero_32(uint32_t a)
     return constant_time_msb_32(~a & (a - 1));
 }
 
+static ossl_inline uint64_t constant_time_is_zero_64(uint64_t a)
+{
+    return constant_time_msb_64(~a & (a - 1));
+}
+
 static ossl_inline unsigned int constant_time_eq(unsigned int a,
                                                  unsigned int b)
 {
diff --git a/include/internal/numbers.h b/include/internal/numbers.h
index fc93e59c4b..ac801364d9 100644
--- a/include/internal/numbers.h
+++ b/include/internal/numbers.h
@@ -61,6 +61,16 @@
 #  define UINT64_MAX __MAXUINT__(uint64_t)
 # endif
 
+# ifndef INT128_MAX
+#  if defined(__SIZEOF_INT128__) && __SIZEOF_INT128__ == 16
+typedef __int128_t int128_t;
+typedef __uint128_t uint128_t;
+#   define INT128_MIN __MININT__(int128_t)
+#   define INT128_MAX __MAXINT__(int128_t)
+#   define UINT128_MAX __MAXUINT__(uint128_t)
+#  endif
+# endif
+
 # ifndef SIZE_MAX
 #  define SIZE_MAX __MAXUINT__(size_t)
 # endif