[openssl-dev] [openssl.org #3741] [PATCH] Better side-channel security for ECC table lookups
Billy Brumley via RT
rt at openssl.org
Wed Mar 11 07:17:37 UTC 2015
This patch relates to RT 3667 and the paper here (to appear at COSADE 2015):
https://eprint.iacr.org/2015/036
Instead of a direct table lookup for precomputed points in the generic
ECC multi-scalar multiplication routine, it computes the point by
traversing the entire table. Motivation is better side-channel
security.
Benchmarking below (evidently code path is not executed for nistp256
anymore). Costs are 6-10% for ECDSA sign, 2-6% for ECDSA verify
(defense unnecessary, but AFAICT there is no way to authoritatively
distinguish at this depth whether to protect scalars or not), and 1-5%
for ECDH.
BBB
openssl speed ecdsa before:
sign verify sign/s verify/s
160 bit ecdsa (secp160r1) 0.0000s 0.0002s 20818.1 5670.3
192 bit ecdsa (nistp192) 0.0001s 0.0002s 17844.1 4570.7
224 bit ecdsa (nistp224) 0.0001s 0.0003s 13452.7 3413.4
256 bit ecdsa (nistp256) 0.0000s 0.0001s 26485.2 10577.4
384 bit ecdsa (nistp384) 0.0002s 0.0007s 5961.0 1438.0
521 bit ecdsa (nistp521) 0.0004s 0.0015s 2837.1 684.5
openssl speed ecdsa after:
sign verify sign/s verify/s
160 bit ecdsa (secp160r1) 0.0001s 0.0002s 19056.1 5369.1
192 bit ecdsa (nistp192) 0.0001s 0.0002s 16270.0 4490.4
224 bit ecdsa (nistp224) 0.0001s 0.0003s 12150.9 3354.7
256 bit ecdsa (nistp256) 0.0000s 0.0001s 26489.9 10554.9
384 bit ecdsa (nistp384) 0.0002s 0.0007s 5470.9 1355.5
521 bit ecdsa (nistp521) 0.0004s 0.0015s 2673.3 667.4
openssl speed ecdh before:
op op/s
160 bit ecdh (secp160r1) 0.0001s 7025.0
192 bit ecdh (nistp192) 0.0002s 5786.0
224 bit ecdh (nistp224) 0.0002s 4158.1
256 bit ecdh (nistp256) 0.0001s 14899.7
384 bit ecdh (nistp384) 0.0006s 1732.1
521 bit ecdh (nistp521) 0.0012s 820.4
openssl speed ecdh after:
op op/s
160 bit ecdh (secp160r1) 0.0001s 6750.8
192 bit ecdh (nistp192) 0.0002s 5477.6
224 bit ecdh (nistp224) 0.0002s 4060.2
256 bit ecdh (nistp256) 0.0001s 14886.2
384 bit ecdh (nistp384) 0.0006s 1709.7
521 bit ecdh (nistp521) 0.0012s 802.1
(I do not have root on the benchmarking machine to peg the clock. Sorry.)
$ cat /proc/cpuinfo
processor : 0
vendor_id : GenuineIntel
cpu family : 6
model : 60
model name : Intel(R) Core(TM) i5-4570 CPU @ 3.20GHz
stepping : 3
microcode : 26
cpu MHz : 800.000
cache size : 6144 KB
physical id : 0
siblings : 4
core id : 0
cpu cores : 4
apicid : 0
initial apicid : 0
fpu : yes
fpu_exception : yes
cpuid level : 13
wp : yes
flags : fpu vme de pse tsc msr pae mce cx8 apic sep mtrr pge
mca cmov pat pse36 clflush dts acpi mmx fxsr sse sse2 ss ht tm pbe
syscall nx pdpe1gb rdtscp lm constant_tsc arch_perfmon pebs bts
rep_good xtopology nonstop_tsc aperfmperf pni pclmulqdq dtes64 monitor
ds_cpl vmx smx est tm2 ssse3 fma cx16 xtpr pdcm pcid sse4_1 sse4_2
x2apic movbe popcnt tsc_deadline_timer aes xsave avx f16c rdrand
lahf_lm abm ida arat epb xsaveopt pln pts dts tpr_shadow vnmi
flexpriority ept vpid fsgsbase bmi1 hle avx2 smep bmi2 erms invpcid
rtm
bogomips : 6385.38
clflush size : 64
cache_alignment : 64
address sizes : 39 bits physical, 48 bits virtual
power management: SNIP
-------------- next part --------------
diff -rupN openssl-1.0.2_orig/crypto/ec/ec_mult.c openssl-1.0.2/crypto/ec/ec_mult.c
--- openssl-1.0.2_orig/crypto/ec/ec_mult.c 2015-01-22 16:58:32.000000000 +0200
+++ openssl-1.0.2/crypto/ec/ec_mult.c 2015-03-10 13:06:50.661644241 +0200
@@ -306,6 +306,66 @@ static signed char *compute_wNAF(const B
return r;
}
+/**
+ * Traverses t[i] for 0 <= i < 2**(w-1) and places t[digit >> 1] in r
+ * with bitwise operations as a side-channel defense.
+ * Assumes r has enough BIGNUM space so tops of BIGNUMs in t do not overflow.
+ * See https://eprint.iacr.org/2015/036
+ */
+void EC_POINT_select(EC_POINT *r, EC_POINT **t, int digit, int w, int top) {
+
+ w = (1 << w);
+
+ int d, j;
+ BN_ULONG mask;
+
+ for(j=0; j<top; j++) {
+ r->X.d[j] = 0;
+ r->Y.d[j] = 0;
+ r->Z.d[j] = 0;
+ }
+ r->X.top = 0;
+ r->X.dmax = 0;
+ r->X.neg = 0;
+ r->X.flags = 0;
+ r->Y.top = 0;
+ r->Y.dmax = 0;
+ r->Y.neg = 0;
+ r->Y.flags = 0;
+ r->Z.top = 0;
+ r->Z.dmax = 0;
+ r->Z.neg = 0;
+ r->Z.flags = 0;
+ r->Z_is_one = 0;
+
+ for(d=1; d<w; d+=2) {
+ mask = (BN_ULONG)(((digit ^ d) - 1) >> 15);
+ for(j=0; j<t[d >> 1]->X.top; j++) {
+ r->X.d[j] |= mask & t[d >> 1]->X.d[j];
+ }
+ for(j=0; j<t[d >> 1]->Y.top; j++) {
+ r->Y.d[j] |= mask & t[d >> 1]->Y.d[j];
+ }
+ for(j=0; j<t[d >> 1]->Z.top; j++) {
+ r->Z.d[j] |= mask & t[d >> 1]->Z.d[j];
+ }
+ r->X.top |= mask & (t[d >> 1]->X.top );
+ r->X.dmax |= mask & (t[d >> 1]->X.dmax );
+ r->X.neg |= mask & (t[d >> 1]->X.neg );
+ r->X.flags |= mask & (t[d >> 1]->X.flags );
+ r->Y.top |= mask & (t[d >> 1]->Y.top );
+ r->Y.dmax |= mask & (t[d >> 1]->Y.dmax );
+ r->Y.neg |= mask & (t[d >> 1]->Y.neg );
+ r->Y.flags |= mask & (t[d >> 1]->Y.flags );
+ r->Z.top |= mask & (t[d >> 1]->Z.top );
+ r->Z.dmax |= mask & (t[d >> 1]->Z.dmax );
+ r->Z.neg |= mask & (t[d >> 1]->Z.neg );
+ r->Z.flags |= mask & (t[d >> 1]->Z.flags );
+ r->Z_is_one |= mask & (t[d >> 1]->Z_is_one);
+ }
+
+}
+
/*
* TODO: table should be optimised for the wNAF-based implementation,
* sometimes smaller windows will give better performance (thus the
@@ -532,6 +592,7 @@ int ec_wNAF_mul(const EC_GROUP *group, E
tmp_points = pre_comp->points;
for (i = num; i < totalnum; i++) {
+ wsize[i] = wsize[num];
if (i < totalnum - 1) {
wNAF_len[i] = blocksize;
if (tmp_len < blocksize) {
@@ -635,6 +696,11 @@ int ec_wNAF_mul(const EC_GROUP *group, E
goto err;
#endif
+ /* use tmp for selecting point addition arg */
+ bn_wexpand(&tmp->X, group->field.top);
+ bn_wexpand(&tmp->Y, group->field.top);
+ bn_wexpand(&tmp->Z, group->field.top);
+
r_is_at_infinity = 1;
for (k = max_len - 1; k >= 0; k--) {
@@ -665,12 +731,16 @@ int ec_wNAF_mul(const EC_GROUP *group, E
/* digit > 0 */
if (r_is_at_infinity) {
- if (!EC_POINT_copy(r, val_sub[i][digit >> 1]))
- goto err;
+ bn_wexpand(&r->X, group->field.top);
+ bn_wexpand(&r->Y, group->field.top);
+ bn_wexpand(&r->Z, group->field.top);
+ EC_POINT_select(r, val_sub[i], digit, wsize[i], group->field.top);
r_is_at_infinity = 0;
} else {
+ /* prepare point addition argument */
+ EC_POINT_select(tmp, val_sub[i], digit, wsize[i], group->field.top);
if (!EC_POINT_add
- (group, r, r, val_sub[i][digit >> 1], ctx))
+ (group, r, r, tmp, ctx))
goto err;
}
}
More information about the openssl-dev
mailing list