[openssl-commits] [openssl] master update

Andy Polyakov appro at openssl.org
Sun Jun 3 19:17:48 UTC 2018


The branch master has been updated
       via  9a708bf982da1d2c9739339d16d7b021da955e00 (commit)
       via  c1b2569d234c1247d2a7a3338ca4568bc0a489a5 (commit)
       via  c5307d9cc079f379ee0e2bc960abda73046cd4ec (commit)
      from  1e653d0ff7fc2e06a1cb1e05c01feecde84e67d3 (commit)


- Log -----------------------------------------------------------------
commit 9a708bf982da1d2c9739339d16d7b021da955e00
Author: Andy Polyakov <appro at openssl.org>
Date:   Sun May 20 12:13:16 2018 +0200

    {arm64|x86_64}cpuid.pl: add special 16-byte case to OPENSSL_memcmp.
    
    OPENSSL_memcmp is a must in GCM decrypt and general-purpose loop takes
    quite a portion of execution time for short inputs, more than GHASH for
    few-byte inputs according to profiler. Special 16-byte case takes it off
    top five list in profiler output.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/6312)

commit c1b2569d234c1247d2a7a3338ca4568bc0a489a5
Author: Andy Polyakov <appro at openssl.org>
Date:   Sun May 20 12:02:39 2018 +0200

    modes/gcm128.c: coalesce calls to GHASH.
    
    On contemporary platforms assembly GHASH processes multiple blocks
    faster than one by one. For TLS payloads shorter than 16 bytes, e.g.
    alerts, it's possible to reduce hashing operation to single call.
    And for block lengths not divisible by 16 - fold two final calls to
    one. Improvement is most noticeable with "reptoline", because call to
    assembly GHASH is indirect.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/6312)

commit c5307d9cc079f379ee0e2bc960abda73046cd4ec
Author: Andy Polyakov <appro at openssl.org>
Date:   Sun May 20 11:54:24 2018 +0200

    evp/e_aes.c: replace calls to one-liners with references in GCM.
    
    Even though calls can be viewed as styling improvement, they do come
    with cost. It's not big cost and shows only on short inputs, but it is
    measurable, 2-3% on some platforms.
    
    Reviewed-by: Rich Salz <rsalz at openssl.org>
    (Merged from https://github.com/openssl/openssl/pull/6312)

-----------------------------------------------------------------------

Summary of changes:
 crypto/arm64cpuid.pl     |  13 +++
 crypto/evp/e_aes.c       |  73 +++++-------
 crypto/modes/gcm128.c    | 280 +++++++++++++++++++++++++++++++++++++++++------
 crypto/modes/modes_lcl.h |   3 +
 crypto/x86_64cpuid.pl    |  12 ++
 5 files changed, 304 insertions(+), 77 deletions(-)

diff --git a/crypto/arm64cpuid.pl b/crypto/arm64cpuid.pl
index daa2b17..06c8add 100755
--- a/crypto/arm64cpuid.pl
+++ b/crypto/arm64cpuid.pl
@@ -115,6 +115,19 @@ OPENSSL_cleanse:
 CRYPTO_memcmp:
 	eor	w3,w3,w3
 	cbz	x2,.Lno_data	// len==0?
+	cmp	x2,#16
+	b.ne	.Loop_cmp
+	ldp	x8,x9,[x0]
+	ldp	x10,x11,[x1]
+	eor	x8,x8,x10
+	eor	x9,x9,x11
+	orr	x8,x8,x9
+	mov	x0,#1
+	cmp	x8,#0
+	csel	x0,xzr,x0,eq
+	ret
+
+.align	4
 .Loop_cmp:
 	ldrb	w4,[x0],#1
 	ldrb	w5,[x1],#1
diff --git a/crypto/evp/e_aes.c b/crypto/evp/e_aes.c
index 951fc8f..eb37b4b 100644
--- a/crypto/evp/e_aes.c
+++ b/crypto/evp/e_aes.c
@@ -2814,8 +2814,8 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr)
     case EVP_CTRL_INIT:
         gctx->key_set = 0;
         gctx->iv_set = 0;
-        gctx->ivlen = EVP_CIPHER_CTX_iv_length(c);
-        gctx->iv = EVP_CIPHER_CTX_iv_noconst(c);
+        gctx->ivlen = c->cipher->iv_len;
+        gctx->iv = c->iv;
         gctx->taglen = -1;
         gctx->iv_gen = 0;
         gctx->tls_aad_len = -1;
@@ -2826,7 +2826,7 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr)
             return 0;
         /* Allocate memory for IV if needed */
         if ((arg > EVP_MAX_IV_LENGTH) && (arg > gctx->ivlen)) {
-            if (gctx->iv != EVP_CIPHER_CTX_iv_noconst(c))
+            if (gctx->iv != c->iv)
                 OPENSSL_free(gctx->iv);
             if ((gctx->iv = OPENSSL_malloc(arg)) == NULL) {
                 EVPerr(EVP_F_AES_GCM_CTRL, ERR_R_MALLOC_FAILURE);
@@ -2837,17 +2837,17 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr)
         return 1;
 
     case EVP_CTRL_AEAD_SET_TAG:
-        if (arg <= 0 || arg > 16 || EVP_CIPHER_CTX_encrypting(c))
+        if (arg <= 0 || arg > 16 || c->encrypt)
             return 0;
-        memcpy(EVP_CIPHER_CTX_buf_noconst(c), ptr, arg);
+        memcpy(c->buf, ptr, arg);
         gctx->taglen = arg;
         return 1;
 
     case EVP_CTRL_AEAD_GET_TAG:
-        if (arg <= 0 || arg > 16 || !EVP_CIPHER_CTX_encrypting(c)
+        if (arg <= 0 || arg > 16 || !c->encrypt
             || gctx->taglen < 0)
             return 0;
-        memcpy(ptr, EVP_CIPHER_CTX_buf_noconst(c), arg);
+        memcpy(ptr, c->buf, arg);
         return 1;
 
     case EVP_CTRL_GCM_SET_IV_FIXED:
@@ -2865,8 +2865,7 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr)
             return 0;
         if (arg)
             memcpy(gctx->iv, ptr, arg);
-        if (EVP_CIPHER_CTX_encrypting(c)
-            && RAND_bytes(gctx->iv + arg, gctx->ivlen - arg) <= 0)
+        if (c->encrypt && RAND_bytes(gctx->iv + arg, gctx->ivlen - arg) <= 0)
             return 0;
         gctx->iv_gen = 1;
         return 1;
@@ -2887,8 +2886,7 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr)
         return 1;
 
     case EVP_CTRL_GCM_SET_IV_INV:
-        if (gctx->iv_gen == 0 || gctx->key_set == 0
-            || EVP_CIPHER_CTX_encrypting(c))
+        if (gctx->iv_gen == 0 || gctx->key_set == 0 || c->encrypt)
             return 0;
         memcpy(gctx->iv + gctx->ivlen - arg, ptr, arg);
         CRYPTO_gcm128_setiv(&gctx->gcm, gctx->iv, gctx->ivlen);
@@ -2899,24 +2897,22 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr)
         /* Save the AAD for later use */
         if (arg != EVP_AEAD_TLS1_AAD_LEN)
             return 0;
-        memcpy(EVP_CIPHER_CTX_buf_noconst(c), ptr, arg);
+        memcpy(c->buf, ptr, arg);
         gctx->tls_aad_len = arg;
         {
-            unsigned int len =
-                EVP_CIPHER_CTX_buf_noconst(c)[arg - 2] << 8
-                | EVP_CIPHER_CTX_buf_noconst(c)[arg - 1];
+            unsigned int len = c->buf[arg - 2] << 8 | c->buf[arg - 1];
             /* Correct length for explicit IV */
             if (len < EVP_GCM_TLS_EXPLICIT_IV_LEN)
                 return 0;
             len -= EVP_GCM_TLS_EXPLICIT_IV_LEN;
             /* If decrypting correct for tag too */
-            if (!EVP_CIPHER_CTX_encrypting(c)) {
+            if (!c->encrypt) {
                 if (len < EVP_GCM_TLS_TAG_LEN)
                     return 0;
                 len -= EVP_GCM_TLS_TAG_LEN;
             }
-            EVP_CIPHER_CTX_buf_noconst(c)[arg - 2] = len >> 8;
-            EVP_CIPHER_CTX_buf_noconst(c)[arg - 1] = len & 0xff;
+            c->buf[arg - 2] = len >> 8;
+            c->buf[arg - 1] = len & 0xff;
         }
         /* Extra padding: tag appended to record */
         return EVP_GCM_TLS_TAG_LEN;
@@ -2930,8 +2926,8 @@ static int aes_gcm_ctrl(EVP_CIPHER_CTX *c, int type, int arg, void *ptr)
                     return 0;
                 gctx_out->gcm.key = &gctx_out->ks;
             }
-            if (gctx->iv == EVP_CIPHER_CTX_iv_noconst(c))
-                gctx_out->iv = EVP_CIPHER_CTX_iv_noconst(out);
+            if (gctx->iv == c->iv)
+                gctx_out->iv = out->iv;
             else {
                 if ((gctx_out->iv = OPENSSL_malloc(gctx->ivlen)) == NULL) {
                     EVPerr(EVP_F_AES_GCM_CTRL, ERR_R_MALLOC_FAILURE);
@@ -2958,8 +2954,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
         do {
 #ifdef HWAES_CAPABLE
             if (HWAES_CAPABLE) {
-                HWAES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
-                                      &gctx->ks.ks);
+                HWAES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
                 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                    (block128_f) HWAES_encrypt);
 # ifdef HWAES_ctr32_encrypt_blocks
@@ -2972,8 +2967,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 #endif
 #ifdef BSAES_CAPABLE
             if (BSAES_CAPABLE) {
-                AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
-                                    &gctx->ks.ks);
+                AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
                 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                    (block128_f) AES_encrypt);
                 gctx->ctr = (ctr128_f) bsaes_ctr32_encrypt_blocks;
@@ -2982,8 +2976,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 #endif
 #ifdef VPAES_CAPABLE
             if (VPAES_CAPABLE) {
-                vpaes_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
-                                      &gctx->ks.ks);
+                vpaes_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
                 CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                    (block128_f) vpaes_encrypt);
                 gctx->ctr = NULL;
@@ -2992,8 +2985,7 @@ static int aes_gcm_init_key(EVP_CIPHER_CTX *ctx, const unsigned char *key,
 #endif
                 (void)0;        /* terminate potentially open 'else' */
 
-            AES_set_encrypt_key(key, EVP_CIPHER_CTX_key_length(ctx) * 8,
-                                &gctx->ks.ks);
+            AES_set_encrypt_key(key, ctx->key_len * 8, &gctx->ks.ks);
             CRYPTO_gcm128_init(&gctx->gcm, &gctx->ks,
                                (block128_f) AES_encrypt);
 #ifdef AES_CTR_ASM
@@ -3045,19 +3037,18 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
      * Set IV from start of buffer or generate IV and write to start of
      * buffer.
      */
-    if (EVP_CIPHER_CTX_ctrl(ctx, EVP_CIPHER_CTX_encrypting(ctx) ?
-                            EVP_CTRL_GCM_IV_GEN : EVP_CTRL_GCM_SET_IV_INV,
+    if (EVP_CIPHER_CTX_ctrl(ctx, ctx->encrypt ? EVP_CTRL_GCM_IV_GEN
+                                              : EVP_CTRL_GCM_SET_IV_INV,
                             EVP_GCM_TLS_EXPLICIT_IV_LEN, out) <= 0)
         goto err;
     /* Use saved AAD */
-    if (CRYPTO_gcm128_aad(&gctx->gcm, EVP_CIPHER_CTX_buf_noconst(ctx),
-                          gctx->tls_aad_len))
+    if (CRYPTO_gcm128_aad(&gctx->gcm, ctx->buf, gctx->tls_aad_len))
         goto err;
     /* Fix buffer and length to point to payload */
     in += EVP_GCM_TLS_EXPLICIT_IV_LEN;
     out += EVP_GCM_TLS_EXPLICIT_IV_LEN;
     len -= EVP_GCM_TLS_EXPLICIT_IV_LEN + EVP_GCM_TLS_TAG_LEN;
-    if (EVP_CIPHER_CTX_encrypting(ctx)) {
+    if (ctx->encrypt) {
         /* Encrypt payload */
         if (gctx->ctr) {
             size_t bulk = 0;
@@ -3136,11 +3127,9 @@ static int aes_gcm_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
                 goto err;
         }
         /* Retrieve tag */
-        CRYPTO_gcm128_tag(&gctx->gcm, EVP_CIPHER_CTX_buf_noconst(ctx),
-                          EVP_GCM_TLS_TAG_LEN);
+        CRYPTO_gcm128_tag(&gctx->gcm, ctx->buf, EVP_GCM_TLS_TAG_LEN);
         /* If tag mismatch wipe buffer */
-        if (CRYPTO_memcmp(EVP_CIPHER_CTX_buf_noconst(ctx), in + len,
-                          EVP_GCM_TLS_TAG_LEN)) {
+        if (CRYPTO_memcmp(ctx->buf, in + len, EVP_GCM_TLS_TAG_LEN)) {
             OPENSSL_cleanse(out, len);
             goto err;
         }
@@ -3170,7 +3159,7 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
         if (out == NULL) {
             if (CRYPTO_gcm128_aad(&gctx->gcm, in, len))
                 return -1;
-        } else if (EVP_CIPHER_CTX_encrypting(ctx)) {
+        } else if (ctx->encrypt) {
             if (gctx->ctr) {
                 size_t bulk = 0;
 #if defined(AES_GCM_ASM)
@@ -3261,17 +3250,15 @@ static int aes_gcm_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
         }
         return len;
     } else {
-        if (!EVP_CIPHER_CTX_encrypting(ctx)) {
+        if (!ctx->encrypt) {
             if (gctx->taglen < 0)
                 return -1;
-            if (CRYPTO_gcm128_finish(&gctx->gcm,
-                                     EVP_CIPHER_CTX_buf_noconst(ctx),
-                                     gctx->taglen) != 0)
+            if (CRYPTO_gcm128_finish(&gctx->gcm, ctx->buf, gctx->taglen) != 0)
                 return -1;
             gctx->iv_set = 0;
             return 0;
         }
-        CRYPTO_gcm128_tag(&gctx->gcm, EVP_CIPHER_CTX_buf_noconst(ctx), 16);
+        CRYPTO_gcm128_tag(&gctx->gcm, ctx->buf, 16);
         gctx->taglen = 16;
         /* Don't reuse the IV */
         gctx->iv_set = 0;
diff --git a/crypto/modes/gcm128.c b/crypto/modes/gcm128.c
index 4c8ae91..7a10eb2 100644
--- a/crypto/modes/gcm128.c
+++ b/crypto/modes/gcm128.c
@@ -986,7 +986,7 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     block128_f block = ctx->block;
@@ -1004,9 +1004,23 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to encrypt finalizes GHASH(AAD) */
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+#else
         GCM_MUL(ctx);
+#endif
         ctx->ares = 0;
     }
 
@@ -1019,28 +1033,48 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
 #if !defined(OPENSSL_SMALL_FOOTPRINT)
     if (16 % sizeof(size_t) == 0) { /* always true actually */
         do {
             if (n) {
+# if defined(GHASH)
+                while (n && len) {
+                    ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+                    --len;
+                    n = (n + 1) % 16;
+                }
+                if (n == 0) {
+                    GHASH(ctx, ctx->Xn, mres);
+                    mres = 0;
+                } else {
+                    ctx->mres = mres;
+                    return 0;
+                }
+# else
                 while (n && len) {
                     ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
                     --len;
                     n = (n + 1) % 16;
                 }
-                if (n == 0)
+                if (n == 0) {
                     GCM_MUL(ctx);
-                else {
+                    mres = 0;
+                } else {
                     ctx->mres = n;
                     return 0;
                 }
+# endif
             }
 # if defined(STRICT_ALIGNMENT)
             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
                 break;
 # endif
 # if defined(GHASH)
+            if (len >= 16 && mres) {
+                GHASH(ctx, ctx->Xn, mres);
+                mres = 0;
+            }
 #  if defined(GHASH_CHUNK)
             while (len >= GHASH_CHUNK) {
                 size_t j = GHASH_CHUNK;
@@ -1128,13 +1162,21 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
 # endif
                 else
                     ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+                while (len--) {
+                    ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+                    ++n;
+                }
+# else
                 while (len--) {
                     ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
                     ++n;
                 }
+                mres = n;
+# endif
             }
 
-            ctx->mres = n;
+            ctx->mres = mres;
             return 0;
         } while (0);
     }
@@ -1152,13 +1194,22 @@ int CRYPTO_gcm128_encrypt(GCM128_CONTEXT *ctx,
             else
                 ctx->Yi.d[3] = ctr;
         }
-        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        ctx->Xn[mres++] = out[i] = in[i] ^ ctx->EKi.c[n];
         n = (n + 1) % 16;
+        if (mres == sizeof(ctx->Xn)) {
+            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+            mres = 0;
+        }
+#else
+        ctx->Xi.c[n] ^= out[i] = in[i] ^ ctx->EKi.c[n];
+        mres = n = (n + 1) % 16;
         if (n == 0)
             GCM_MUL(ctx);
+#endif
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 }
 
@@ -1170,7 +1221,7 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     block128_f block = ctx->block;
@@ -1188,9 +1239,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to decrypt finalizes GHASH(AAD) */
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+#else
         GCM_MUL(ctx);
+#endif
         ctx->ares = 0;
     }
 
@@ -1203,11 +1268,25 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
 #if !defined(OPENSSL_SMALL_FOOTPRINT)
     if (16 % sizeof(size_t) == 0) { /* always true actually */
         do {
             if (n) {
+# if defined(GHASH)
+                while (n && len) {
+                    *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+                    --len;
+                    n = (n + 1) % 16;
+                }
+                if (n == 0) {
+                    GHASH(ctx, ctx->Xn, mres);
+                    mres = 0;
+                } else {
+                    ctx->mres = mres;
+                    return 0;
+                }
+# else
                 while (n && len) {
                     u8 c = *(in++);
                     *(out++) = c ^ ctx->EKi.c[n];
@@ -1215,18 +1294,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
                     --len;
                     n = (n + 1) % 16;
                 }
-                if (n == 0)
+                if (n == 0) {
                     GCM_MUL(ctx);
-                else {
+                    mres = 0;
+                } else {
                     ctx->mres = n;
                     return 0;
                 }
+# endif
             }
 # if defined(STRICT_ALIGNMENT)
             if (((size_t)in | (size_t)out) % sizeof(size_t) != 0)
                 break;
 # endif
 # if defined(GHASH)
+            if (len >= 16 && mres) {
+                GHASH(ctx, ctx->Xn, mres);
+                mres = 0;
+            }
 #  if defined(GHASH_CHUNK)
             while (len >= GHASH_CHUNK) {
                 size_t j = GHASH_CHUNK;
@@ -1315,15 +1400,23 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
 # endif
                 else
                     ctx->Yi.d[3] = ctr;
+# if defined(GHASH)
+                while (len--) {
+                    out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+                    ++n;
+                }
+# else
                 while (len--) {
                     u8 c = in[n];
                     ctx->Xi.c[n] ^= c;
                     out[n] = c ^ ctx->EKi.c[n];
                     ++n;
                 }
+                mres = n;
+# endif
             }
 
-            ctx->mres = n;
+            ctx->mres = mres;
             return 0;
         } while (0);
     }
@@ -1342,15 +1435,24 @@ int CRYPTO_gcm128_decrypt(GCM128_CONTEXT *ctx,
             else
                 ctx->Yi.d[3] = ctr;
         }
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+        out[i] = (ctx->Xn[mres++] = c = in[i]) ^ ctx->EKi.c[n];
+        n = (n + 1) % 16;
+        if (mres == sizeof(ctx->Xn)) {
+            GHASH(ctx,ctx->Xn,sizeof(ctx->Xn));
+            mres = 0;
+        }
+#else
         c = in[i];
         out[i] = c ^ ctx->EKi.c[n];
         ctx->Xi.c[n] ^= c;
-        n = (n + 1) % 16;
+        mres = n = (n + 1) % 16;
         if (n == 0)
             GCM_MUL(ctx);
+#endif
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 }
 
@@ -1365,7 +1467,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     void *key = ctx->key;
@@ -1382,9 +1484,23 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to encrypt finalizes GHASH(AAD) */
+#if defined(GHASH)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+#else
         GCM_MUL(ctx);
+#endif
         ctx->ares = 0;
     }
 
@@ -1397,30 +1513,51 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
     if (n) {
+# if defined(GHASH)
+        while (n && len) {
+            ctx->Xn[mres++] = *(out++) = *(in++) ^ ctx->EKi.c[n];
+            --len;
+            n = (n + 1) % 16;
+        }
+        if (n == 0) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        } else {
+            ctx->mres = mres;
+            return 0;
+        }
+# else
         while (n && len) {
             ctx->Xi.c[n] ^= *(out++) = *(in++) ^ ctx->EKi.c[n];
             --len;
             n = (n + 1) % 16;
         }
-        if (n == 0)
+        if (n == 0) {
             GCM_MUL(ctx);
-        else {
+            mres = 0;
+        } else {
             ctx->mres = n;
             return 0;
         }
+# endif
     }
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+        if (len >= 16 && mres) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        }
+#  if defined(GHASH_CHUNK)
     while (len >= GHASH_CHUNK) {
         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
         ctr += GHASH_CHUNK / 16;
         if (is_endian.little)
-#  ifdef BSWAP4
+#   ifdef BSWAP4
             ctx->Yi.d[3] = BSWAP4(ctr);
-#  else
+#   else
             PUTU32(ctx->Yi.c + 12, ctr);
-#  endif
+#   endif
         else
             ctx->Yi.d[3] = ctr;
         GHASH(ctx, out, GHASH_CHUNK);
@@ -1428,6 +1565,7 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         in += GHASH_CHUNK;
         len -= GHASH_CHUNK;
     }
+#  endif
 # endif
     if ((i = (len & (size_t)-16))) {
         size_t j = i / 16;
@@ -1468,12 +1606,16 @@ int CRYPTO_gcm128_encrypt_ctr32(GCM128_CONTEXT *ctx,
         else
             ctx->Yi.d[3] = ctr;
         while (len--) {
-            ctx->Xi.c[n] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# if defined(GHASH)
+            ctx->Xn[mres++] = out[n] = in[n] ^ ctx->EKi.c[n];
+# else
+            ctx->Xi.c[mres++] ^= out[n] = in[n] ^ ctx->EKi.c[n];
+# endif
             ++n;
         }
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 #endif
 }
@@ -1489,7 +1631,7 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
         long one;
         char little;
     } is_endian = { 1 };
-    unsigned int n, ctr;
+    unsigned int n, ctr, mres;
     size_t i;
     u64 mlen = ctx->len.u[1];
     void *key = ctx->key;
@@ -1506,9 +1648,23 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
         return -1;
     ctx->len.u[1] = mlen;
 
+    mres = ctx->mres;
+
     if (ctx->ares) {
         /* First call to decrypt finalizes GHASH(AAD) */
+# if defined(GHASH)
+        if (len == 0) {
+            GCM_MUL(ctx);
+            ctx->ares = 0;
+            return 0;
+        }
+        memcpy(ctx->Xn, ctx->Xi.c, sizeof(ctx->Xi));
+        ctx->Xi.u[0] = 0;
+        ctx->Xi.u[1] = 0;
+        mres = sizeof(ctx->Xi);
+# else
         GCM_MUL(ctx);
+# endif
         ctx->ares = 0;
     }
 
@@ -1521,8 +1677,22 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
     else
         ctr = ctx->Yi.d[3];
 
-    n = ctx->mres;
+    n = mres % 16;
     if (n) {
+# if defined(GHASH)
+        while (n && len) {
+            *(out++) = (ctx->Xn[mres++] = *(in++)) ^ ctx->EKi.c[n];
+            --len;
+            n = (n + 1) % 16;
+        }
+        if (n == 0) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        } else {
+            ctx->mres = mres;
+            return 0;
+        }
+# else
         while (n && len) {
             u8 c = *(in++);
             *(out++) = c ^ ctx->EKi.c[n];
@@ -1530,30 +1700,38 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
             --len;
             n = (n + 1) % 16;
         }
-        if (n == 0)
+        if (n == 0) {
             GCM_MUL(ctx);
-        else {
+            mres = 0;
+        } else {
             ctx->mres = n;
             return 0;
         }
+# endif
     }
-# if defined(GHASH) && defined(GHASH_CHUNK)
+# if defined(GHASH)
+    if (len >= 16 && mres) {
+        GHASH(ctx, ctx->Xn, mres);
+        mres = 0;
+    }
+#  if defined(GHASH_CHUNK)
     while (len >= GHASH_CHUNK) {
         GHASH(ctx, in, GHASH_CHUNK);
         (*stream) (in, out, GHASH_CHUNK / 16, key, ctx->Yi.c);
         ctr += GHASH_CHUNK / 16;
         if (is_endian.little)
-#  ifdef BSWAP4
+#   ifdef BSWAP4
             ctx->Yi.d[3] = BSWAP4(ctr);
-#  else
+#   else
             PUTU32(ctx->Yi.c + 12, ctr);
-#  endif
+#   endif
         else
             ctx->Yi.d[3] = ctr;
         out += GHASH_CHUNK;
         in += GHASH_CHUNK;
         len -= GHASH_CHUNK;
     }
+#  endif
 # endif
     if ((i = (len & (size_t)-16))) {
         size_t j = i / 16;
@@ -1597,14 +1775,18 @@ int CRYPTO_gcm128_decrypt_ctr32(GCM128_CONTEXT *ctx,
         else
             ctx->Yi.d[3] = ctr;
         while (len--) {
+# if defined(GHASH)
+            out[n] = (ctx->Xn[mres++] = in[n]) ^ ctx->EKi.c[n];
+# else
             u8 c = in[n];
-            ctx->Xi.c[n] ^= c;
+            ctx->Xi.c[mres++] ^= c;
             out[n] = c ^ ctx->EKi.c[n];
+# endif
             ++n;
         }
     }
 
-    ctx->mres = n;
+    ctx->mres = mres;
     return 0;
 #endif
 }
@@ -1620,10 +1802,32 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
     u64 clen = ctx->len.u[1] << 3;
 #ifdef GCM_FUNCREF_4BIT
     void (*gcm_gmult_p) (u64 Xi[2], const u128 Htable[16]) = ctx->gmult;
+# if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+    void (*gcm_ghash_p) (u64 Xi[2], const u128 Htable[16],
+                         const u8 *inp, size_t len) = ctx->ghash;
+# endif
 #endif
 
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+    u128 bitlen;
+    unsigned int mres = ctx->mres;
+
+    if (mres) {
+        unsigned blocks = (mres + 15) & -16;
+
+        memset(ctx->Xn + mres, 0, blocks - mres);
+        mres = blocks;
+        if (mres == sizeof(ctx->Xn)) {
+            GHASH(ctx, ctx->Xn, mres);
+            mres = 0;
+        }
+    } else if (ctx->ares) {
+        GCM_MUL(ctx);
+    }
+#else
     if (ctx->mres || ctx->ares)
         GCM_MUL(ctx);
+#endif
 
     if (is_endian.little) {
 #ifdef BSWAP8
@@ -1640,9 +1844,17 @@ int CRYPTO_gcm128_finish(GCM128_CONTEXT *ctx, const unsigned char *tag,
 #endif
     }
 
+#if defined(GHASH) && !defined(OPENSSL_SMALL_FOOTPRINT)
+    bitlen.hi = alen;
+    bitlen.lo = clen;
+    memcpy(ctx->Xn + mres, &bitlen, sizeof(bitlen));
+    mres += sizeof(bitlen);
+    GHASH(ctx, ctx->Xn, mres);
+#else
     ctx->Xi.u[0] ^= alen;
     ctx->Xi.u[1] ^= clen;
     GCM_MUL(ctx);
+#endif
 
     ctx->Xi.u[0] ^= ctx->EK0.u[0];
     ctx->Xi.u[1] ^= ctx->EK0.u[1];
diff --git a/crypto/modes/modes_lcl.h b/crypto/modes/modes_lcl.h
index 16dcf4e..fa85460 100644
--- a/crypto/modes/modes_lcl.h
+++ b/crypto/modes/modes_lcl.h
@@ -128,6 +128,9 @@ struct gcm128_context {
     unsigned int mres, ares;
     block128_f block;
     void *key;
+#if !defined(OPENSSL_SMALL_FOOTPRINT)
+    unsigned char Xn[48];
+#endif
 };
 
 struct xts128_context {
diff --git a/crypto/x86_64cpuid.pl b/crypto/x86_64cpuid.pl
index 513d005..6423e80 100644
--- a/crypto/x86_64cpuid.pl
+++ b/crypto/x86_64cpuid.pl
@@ -271,6 +271,18 @@ CRYPTO_memcmp:
 	xor	%r10,%r10
 	cmp	\$0,$arg3
 	je	.Lno_data
+	cmp	\$16,$arg3
+	jne	.Loop_cmp
+	mov	($arg1),%r10
+	mov	8($arg1),%r11
+	mov	\$1,$arg3
+	xor	($arg2),%r10
+	xor	8($arg2),%r11
+	or	%r11,%r10
+	cmovnz	$arg3,%rax
+	ret
+
+.align	16
 .Loop_cmp:
 	mov	($arg1),%r10b
 	lea	1($arg1),$arg1


More information about the openssl-commits mailing list