[openssl-commits] [openssl] master update
Andy Polyakov
appro at openssl.org
Fri Jul 6 14:33:59 UTC 2018
The branch master has been updated
via 0edb109f97c1bbbd5961326f93b2ccf385b26674 (commit)
from 2ce71b60272325c4453914b501a2c2ff1b75c80d (commit)
- Log -----------------------------------------------------------------
commit 0edb109f97c1bbbd5961326f93b2ccf385b26674
Author: Andy Polyakov <appro at openssl.org>
Date: Tue Jul 3 21:34:08 2018 +0200
evp/e_chacha20_poly1305.c: further improve small-fragment TLS performance.
Improvement coefficients vary with TLS fragment length and platform, on
most Intel processors maximum improvement is ~50%, while on Ryzen - 80%.
The "secret" is new dedicated ChaCha20_128 code path and vectorized xor
helpers.
Reviewed-by: Rich Salz <rsalz at openssl.org>
(Merged from https://github.com/openssl/openssl/pull/6638)
-----------------------------------------------------------------------
Summary of changes:
crypto/evp/e_chacha20_poly1305.c | 54 ++++++++++++++---
crypto/poly1305/asm/poly1305-x86_64.pl | 104 +++++++++++++++++++++++++++++++++
2 files changed, 150 insertions(+), 8 deletions(-)
diff --git a/crypto/evp/e_chacha20_poly1305.c b/crypto/evp/e_chacha20_poly1305.c
index 47d5e50..6a9bccf 100644
--- a/crypto/evp/e_chacha20_poly1305.c
+++ b/crypto/evp/e_chacha20_poly1305.c
@@ -196,14 +196,23 @@ static int chacha20_poly1305_init_key(EVP_CIPHER_CTX *ctx,
}
# if !defined(OPENSSL_SMALL_FOOTPRINT)
+
+# if defined(POLY1305_ASM) && (defined(__x86_64) || defined(__x86_64__) || \
+ defined(_M_AMD64) || defined(_M_X64))
+# define XOR128_HELPERS
+void *xor128_encrypt_n_pad(void *out, const void *inp, void *otp, size_t len);
+void *xor128_decrypt_n_pad(void *out, const void *inp, void *otp, size_t len);
+static const unsigned char zero[4 * CHACHA_BLK_SIZE] = { 0 };
+# else
static const unsigned char zero[2 * CHACHA_BLK_SIZE] = { 0 };
+# endif
static int chacha20_poly1305_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
const unsigned char *in, size_t len)
{
EVP_CHACHA_AEAD_CTX *actx = aead_data(ctx);
- size_t i, tail, tohash_len, plen = actx->tls_payload_length;
- unsigned char *buf, *tohash, *ctr, storage[2 * CHACHA_BLK_SIZE + 32];
+ size_t tail, tohash_len, buf_len, plen = actx->tls_payload_length;
+ unsigned char *buf, *tohash, *ctr, storage[sizeof(zero) + 32];
if (len != plen + POLY1305_BLOCK_SIZE)
return -1;
@@ -212,9 +221,11 @@ static int chacha20_poly1305_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
ctr = buf + CHACHA_BLK_SIZE;
tohash = buf + CHACHA_BLK_SIZE - POLY1305_BLOCK_SIZE;
- if (plen <= CHACHA_BLK_SIZE) {
+# ifdef XOR128_HELPERS
+ if (plen <= 3 * CHACHA_BLK_SIZE) {
actx->key.counter[0] = 0;
- ChaCha20_ctr32(buf, zero, 2 * CHACHA_BLK_SIZE, actx->key.key.d,
+ buf_len = (plen + 2 * CHACHA_BLK_SIZE - 1) & (0 - CHACHA_BLK_SIZE);
+ ChaCha20_ctr32(buf, zero, buf_len, actx->key.key.d,
actx->key.counter);
Poly1305_Init(POLY1305_ctx(actx), buf);
actx->key.partial_len = 0;
@@ -223,6 +234,31 @@ static int chacha20_poly1305_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
actx->len.aad = EVP_AEAD_TLS1_AAD_LEN;
actx->len.text = plen;
+ if (plen) {
+ if (ctx->encrypt)
+ ctr = xor128_encrypt_n_pad(out, in, ctr, plen);
+ else
+ ctr = xor128_decrypt_n_pad(out, in, ctr, plen);
+
+ in += plen;
+ out += plen;
+ tohash_len = (size_t)(ctr - tohash);
+ }
+ }
+# else
+ if (plen <= CHACHA_BLK_SIZE) {
+ size_t i;
+
+ actx->key.counter[0] = 0;
+ ChaCha20_ctr32(buf, zero, (buf_len = 2 * CHACHA_BLK_SIZE),
+ actx->key.key.d, actx->key.counter);
+ Poly1305_Init(POLY1305_ctx(actx), buf);
+ actx->key.partial_len = 0;
+ memcpy(tohash, actx->tls_aad, POLY1305_BLOCK_SIZE);
+ tohash_len = POLY1305_BLOCK_SIZE;
+ actx->len.aad = EVP_AEAD_TLS1_AAD_LEN;
+ actx->len.text = plen;
+
if (ctx->encrypt) {
for (i = 0; i < plen; i++) {
out[i] = ctr[i] ^= in[i];
@@ -242,10 +278,12 @@ static int chacha20_poly1305_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
memset(ctr + i, 0, tail);
ctr += i + tail;
tohash_len += i + tail;
- } else {
+ }
+# endif
+ else {
actx->key.counter[0] = 0;
- ChaCha20_ctr32(buf, zero, CHACHA_BLK_SIZE, actx->key.key.d,
- actx->key.counter);
+ ChaCha20_ctr32(buf, zero, (buf_len = CHACHA_BLK_SIZE),
+ actx->key.key.d, actx->key.counter);
Poly1305_Init(POLY1305_ctx(actx), buf);
actx->key.counter[0] = 1;
actx->key.partial_len = 0;
@@ -300,7 +338,7 @@ static int chacha20_poly1305_tls_cipher(EVP_CIPHER_CTX *ctx, unsigned char *out,
}
Poly1305_Update(POLY1305_ctx(actx), tohash, tohash_len);
- OPENSSL_cleanse(buf, 2 * CHACHA_BLK_SIZE);
+ OPENSSL_cleanse(buf, buf_len);
Poly1305_Final(POLY1305_ctx(actx), ctx->encrypt ? actx->tag
: tohash);
diff --git a/crypto/poly1305/asm/poly1305-x86_64.pl b/crypto/poly1305/asm/poly1305-x86_64.pl
index 0d1c0de..0b4c56e 100755
--- a/crypto/poly1305/asm/poly1305-x86_64.pl
+++ b/crypto/poly1305/asm/poly1305-x86_64.pl
@@ -3753,6 +3753,110 @@ poly1305_emit_base2_44:
.size poly1305_emit_base2_44,.-poly1305_emit_base2_44
___
} } }
+
+{ # chacha20-poly1305 helpers
+my ($out,$inp,$otp,$len)=$win64 ? ("%rcx","%rdx","%r8", "%r9") : # Win64 order
+ ("%rdi","%rsi","%rdx","%rcx"); # Unix order
+$code.=<<___;
+.globl xor128_encrypt_n_pad
+.type xor128_encrypt_n_pad,\@abi-omnipotent
+.align 16
+xor128_encrypt_n_pad:
+ sub $otp,$inp
+ sub $otp,$out
+ mov $len,%r10 # put len aside
+ shr \$4,$len # len / 16
+ jz .Ltail_enc
+ nop
+.Loop_enc_xmm:
+ movdqu ($inp,$otp),%xmm0
+ pxor ($otp),%xmm0
+ movdqu %xmm0,($out,$otp)
+ movdqa %xmm0,($otp)
+ lea 16($otp),$otp
+ dec $len
+ jnz .Loop_enc_xmm
+
+ and \$15,%r10 # len % 16
+ jz .Ldone_enc
+
+.Ltail_enc:
+ mov \$16,$len
+ sub %r10,$len
+ xor %eax,%eax
+.Loop_enc_byte:
+ mov ($inp,$otp),%al
+ xor ($otp),%al
+ mov %al,($out,$otp)
+ mov %al,($otp)
+ lea 1($otp),$otp
+ dec %r10
+ jnz .Loop_enc_byte
+
+ xor %eax,%eax
+.Loop_enc_pad:
+ mov %al,($otp)
+ lea 1($otp),$otp
+ dec $len
+ jnz .Loop_enc_pad
+
+.Ldone_enc:
+ mov $otp,%rax
+ ret
+.size xor128_encrypt_n_pad,.-xor128_encrypt_n_pad
+
+.globl xor128_decrypt_n_pad
+.type xor128_decrypt_n_pad,\@abi-omnipotent
+.align 16
+xor128_decrypt_n_pad:
+ sub $otp,$inp
+ sub $otp,$out
+ mov $len,%r10 # put len aside
+ shr \$4,$len # len / 16
+ jz .Ltail_dec
+ nop
+.Loop_dec_xmm:
+ movdqu ($inp,$otp),%xmm0
+ movdqa ($otp),%xmm1
+ pxor %xmm0,%xmm1
+ movdqu %xmm1,($out,$otp)
+ movdqa %xmm0,($otp)
+ lea 16($otp),$otp
+ dec $len
+ jnz .Loop_dec_xmm
+
+ pxor %xmm1,%xmm1
+ and \$15,%r10 # len % 16
+ jz .Ldone_dec
+
+.Ltail_dec:
+ mov \$16,$len
+ sub %r10,$len
+ xor %eax,%eax
+ xor %r11,%r11
+.Loop_dec_byte:
+ mov ($inp,$otp),%r11b
+ mov ($otp),%al
+ xor %r11b,%al
+ mov %al,($out,$otp)
+ mov %r11b,($otp)
+ lea 1($otp),$otp
+ dec %r10
+ jnz .Loop_dec_byte
+
+ xor %eax,%eax
+.Loop_dec_pad:
+ mov %al,($otp)
+ lea 1($otp),$otp
+ dec $len
+ jnz .Loop_dec_pad
+
+.Ldone_dec:
+ mov $otp,%rax
+ ret
+.size xor128_decrypt_n_pad,.-xor128_decrypt_n_pad
+___
+}
$code.=<<___;
.align 64
.Lconst:
More information about the openssl-commits
mailing list