[openssl-commits] [openssl] master update
Andy Polyakov
appro at openssl.org
Wed Dec 2 09:50:33 UTC 2015
The branch master has been updated
via 81f3d6323dcda6a18b06c718600d6a4739e83263 (commit)
via b9e3d7e0f6678a991621cfbc4b11ace7860031a0 (commit)
from 338f5727c88b1ce44a802c5115707309d6316fc4 (commit)
- Log -----------------------------------------------------------------
commit 81f3d6323dcda6a18b06c718600d6a4739e83263
Author: Andy Polyakov <appro at openssl.org>
Date: Mon Nov 30 23:07:38 2015 +0100
modes/ocb128.c: split fixed block xors to aligned and misaligned.
Main goal was to improve performance on RISC platforms, e.g. 10%
was measured on MIPS, POWER8...
Reviewed-by: Matt Caswell <matt at openssl.org>
commit b9e3d7e0f6678a991621cfbc4b11ace7860031a0
Author: Andy Polyakov <appro at openssl.org>
Date: Mon Nov 30 13:26:21 2015 +0100
modes/ocb128.c: ocb_lookup_l to allow non-contiguous lookup
and CRYPTO_ocb128_encrypt to handle in==out.
Reviewed-by: Matt Caswell <matt at openssl.org>
-----------------------------------------------------------------------
Summary of changes:
crypto/modes/modes_lcl.h | 25 +++++++------
crypto/modes/ocb128.c | 93 ++++++++++++++++++++++--------------------------
2 files changed, 54 insertions(+), 64 deletions(-)
diff --git a/crypto/modes/modes_lcl.h b/crypto/modes/modes_lcl.h
index 0fd11ce..2f61afe 100644
--- a/crypto/modes/modes_lcl.h
+++ b/crypto/modes/modes_lcl.h
@@ -144,20 +144,19 @@ struct ccm128_context {
#ifndef OPENSSL_NO_OCB
-# ifdef STRICT_ALIGNMENT
-typedef struct {
- unsigned char a[16];
+typedef union {
+ u64 a[2];
+ unsigned char c[16];
} OCB_BLOCK;
-# define ocb_block16_xor(in1,in2,out) \
- ocb_block_xor((in1)->a,(in2)->a,16,(out)->a)
-# else /* STRICT_ALIGNMENT */
-typedef struct {
- u64 a;
- u64 b;
-} OCB_BLOCK;
-# define ocb_block16_xor(in1,in2,out) \
- (out)->a=(in1)->a^(in2)->a; (out)->b=(in1)->b^(in2)->b;
-# endif /* STRICT_ALIGNMENT */
+# define ocb_block16_xor(in1,in2,out) \
+ ( (out)->a[0]=(in1)->a[0]^(in2)->a[0], \
+ (out)->a[1]=(in1)->a[1]^(in2)->a[1] )
+# if STRICT_ALIGNMENT
+# define ocb_block16_xor_misaligned(in1,in2,out) \
+ ocb_block_xor((in1)->c,(in2)->c,16,(out)->c)
+# else
+# define ocb_block16_xor_misaligned ocb_block16_xor
+# endif
struct ocb128_context {
/* Need both encrypt and decrypt key schedules for decryption */
diff --git a/crypto/modes/ocb128.c b/crypto/modes/ocb128.c
index 2685652..d49aa6e 100644
--- a/crypto/modes/ocb128.c
+++ b/crypto/modes/ocb128.c
@@ -53,11 +53,6 @@
#ifndef OPENSSL_NO_OCB
-union ublock {
- unsigned char *chrblk;
- OCB_BLOCK *ocbblk;
-};
-
/*
* Calculate the number of binary trailing zero's in any given number
*/
@@ -88,23 +83,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out)
unsigned char shift_mask;
int i;
unsigned char mask[15];
- union ublock locin;
- union ublock locout;
-
- locin.ocbblk = in;
- locout.ocbblk = out;
shift_mask = 0xff;
shift_mask <<= (8 - shift);
for (i = 15; i >= 0; i--) {
if (i > 0) {
- mask[i - 1] = locin.chrblk[i] & shift_mask;
+ mask[i - 1] = in->c[i] & shift_mask;
mask[i - 1] >>= 8 - shift;
}
- locout.chrblk[i] = locin.chrblk[i] << shift;
+ out->c[i] = in->c[i] << shift;
if (i != 15) {
- locout.chrblk[i] ^= mask[i];
+ out->c[i] ^= mask[i];
}
}
}
@@ -115,23 +105,18 @@ static void ocb_block_lshift(OCB_BLOCK *in, size_t shift, OCB_BLOCK *out)
static void ocb_double(OCB_BLOCK *in, OCB_BLOCK *out)
{
unsigned char mask;
- union ublock locin;
- union ublock locout;
-
- locin.ocbblk = in;
- locout.ocbblk = out;
/*
* Calculate the mask based on the most significant bit. There are more
* efficient ways to do this - but this way is constant time
*/
- mask = locin.chrblk[0] & 0x80;
+ mask = in->c[0] & 0x80;
mask >>= 7;
mask *= 135;
ocb_block_lshift(in, 1, out);
- locout.chrblk[15] ^= mask;
+ out->c[15] ^= mask;
}
/*
@@ -153,20 +138,34 @@ static void ocb_block_xor(const unsigned char *in1,
*/
static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx)
{
- if (idx <= ctx->l_index) {
+ size_t l_index = ctx->l_index;
+
+ if (idx <= l_index) {
return ctx->l + idx;
}
/* We don't have it - so calculate it */
- ctx->l_index++;
- if (ctx->l_index == ctx->max_l_index) {
- ctx->max_l_index *= 2;
+ if (idx >= ctx->max_l_index) {
+ /*
+ * Each additional entry allows to process almost double as
+ * much data, so that in linear world the table will need to
+ * be expanded with smaller and smaller increments. Originally
+ * it was doubling in size, which was a waste. Growing it
+ * linearly is not formally optimal, but is simpler to implement.
+ * We grow table by minimally required 4*n that would accommodate
+ * the index.
+ */
+ ctx->max_l_index += (idx - ctx->max_l_index + 4) & ~3;
ctx->l =
OPENSSL_realloc(ctx->l, ctx->max_l_index * sizeof(OCB_BLOCK));
if (!ctx->l)
return NULL;
}
- ocb_double(ctx->l + (idx - 1), ctx->l + idx);
+ while (l_index <= idx) {
+ ocb_double(ctx->l + l_index, ctx->l + l_index + 1);
+ l_index++;
+ }
+ ctx->l_index = l_index;
return ctx->l + idx;
}
@@ -177,13 +176,7 @@ static OCB_BLOCK *ocb_lookup_l(OCB128_CONTEXT *ctx, size_t idx)
static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
void *keyenc)
{
- union ublock locin;
- union ublock locout;
-
- locin.ocbblk = in;
- locout.ocbblk = out;
-
- ctx->encrypt(locin.chrblk, locout.chrblk, keyenc);
+ ctx->encrypt(in->c, out->c, keyenc);
}
/*
@@ -192,13 +185,7 @@ static void ocb_encrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
static void ocb_decrypt(OCB128_CONTEXT *ctx, OCB_BLOCK *in, OCB_BLOCK *out,
void *keydec)
{
- union ublock locin;
- union ublock locout;
-
- locin.ocbblk = in;
- locout.ocbblk = out;
-
- ctx->decrypt(locin.chrblk, locout.chrblk, keydec);
+ ctx->decrypt(in->c, out->c, keydec);
}
/*
@@ -228,7 +215,7 @@ int CRYPTO_ocb128_init(OCB128_CONTEXT *ctx, void *keyenc, void *keydec,
{
memset(ctx, 0, sizeof(*ctx));
ctx->l_index = 0;
- ctx->max_l_index = 1;
+ ctx->max_l_index = 5;
ctx->l = OPENSSL_malloc(ctx->max_l_index * 16);
if (ctx->l == NULL)
return 0;
@@ -252,6 +239,13 @@ int CRYPTO_ocb128_init(OCB128_CONTEXT *ctx, void *keyenc, void *keydec,
/* L_0 = double(L_$) */
ocb_double(&ctx->l_dollar, ctx->l);
+ /* L_{i} = double(L_{i-1}) */
+ ocb_double(ctx->l, ctx->l+1);
+ ocb_double(ctx->l+1, ctx->l+2);
+ ocb_double(ctx->l+2, ctx->l+3);
+ ocb_double(ctx->l+3, ctx->l+4);
+ ctx->l_index = 4; /* enough to process up to 496 bytes */
+
return 1;
}
@@ -284,9 +278,6 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
unsigned char ktop[16], tmp[16], mask;
unsigned char stretch[24], nonce[16];
size_t bottom, shift;
- union ublock offset;
-
- offset.ocbblk = &ctx->offset;
/*
* Spec says IV is 120 bits or fewer - it allows non byte aligned lengths.
@@ -320,7 +311,7 @@ int CRYPTO_ocb128_setiv(OCB128_CONTEXT *ctx, const unsigned char *iv,
&ctx->offset);
mask = 0xff;
mask <<= 8 - shift;
- offset.chrblk[15] |=
+ ctx->offset.c[15] |=
(*(stretch + (bottom / 8) + 16) & mask) >> (8 - shift);
return 1;
@@ -423,14 +414,14 @@ int CRYPTO_ocb128_encrypt(OCB128_CONTEXT *ctx,
/* C_i = Offset_i xor ENCIPHER(K, P_i xor Offset_i) */
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
- ocb_block16_xor(&ctx->offset, inblock, &tmp1);
+ ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
+ /* Checksum_i = Checksum_{i-1} xor P_i */
+ ocb_block16_xor_misaligned(&ctx->checksum, inblock, &ctx->checksum);
ocb_encrypt(ctx, &tmp1, &tmp2, ctx->keyenc);
outblock =
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
- ocb_block16_xor(&ctx->offset, &tmp2, outblock);
+ ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
- /* Checksum_i = Checksum_{i-1} xor P_i */
- ocb_block16_xor(&ctx->checksum, inblock, &ctx->checksum);
}
/*
@@ -496,14 +487,14 @@ int CRYPTO_ocb128_decrypt(OCB128_CONTEXT *ctx,
/* P_i = Offset_i xor DECIPHER(K, C_i xor Offset_i) */
inblock = (OCB_BLOCK *)(in + ((i - ctx->blocks_processed - 1) * 16));
- ocb_block16_xor(&ctx->offset, inblock, &tmp1);
+ ocb_block16_xor_misaligned(&ctx->offset, inblock, &tmp1);
ocb_decrypt(ctx, &tmp1, &tmp2, ctx->keydec);
outblock =
(OCB_BLOCK *)(out + ((i - ctx->blocks_processed - 1) * 16));
- ocb_block16_xor(&ctx->offset, &tmp2, outblock);
+ ocb_block16_xor_misaligned(&ctx->offset, &tmp2, outblock);
/* Checksum_i = Checksum_{i-1} xor P_i */
- ocb_block16_xor(&ctx->checksum, outblock, &ctx->checksum);
+ ocb_block16_xor_misaligned(&ctx->checksum, outblock, &ctx->checksum);
}
/*
More information about the openssl-commits
mailing list