### File openssl-One_and_Done.patch of Package openssl-1_0_0.9548

commit 848113a30b431c2fe21ae8de2a366b9b6146fb92 Author: User <milosprv@gmail.com> Date: Wed May 16 13:59:36 2018 -0400 bn/bn_exp.c: mitigation of the One-and-Done side-channel attack. The One&Done attack, which is described in a paper to appear in the USENIX Security'18 conference, uses EM emanations to recover the values of the bits that are obtained using BN_is_bit_set while constructing the value of the window in BN_mod_exp_consttime. The EM signal changes slightly depending on the value of the bit, and since the lookup of a bit is surrounded by highly regular execution (constant-time Montgomery multiplications) the attack is able to isolate the (very brief) part of the signal that changes depending on the bit. Although the change is slight, the attack recovers it successfully >90% of the time on several phones and IoT devices (all with ARM processors with clock rates around 1GHz), so after only one RSA decryption more than 90% of the bits in d_p and d_q are recovered correctly, which enables rapid recovery of the full RSA key using an algorithm (also described in the paper) that modifies the branch-and-prune approach for a situation in which the exponents' bits are recovered with errors, i.e. where we do not know a priori which bits are correctly recovered. The mitigation for the attack is relatively simple - all the bits of the window are obtained at once, along with other bits so that an entire integer's worth of bits are obtained together using masking and shifts, without unnecessarily considering each bit in isolation. This improves performance somewhat (one call to bn_get_bits is faster than several calls to BN_is_bit_set), so the attacker now gets one signal snippet per window (rather than one per bit) in which the signal is affected by all bits in the integer (rather than just the one bit). Reviewed-by: Andy Polyakov <appro@openssl.org> Reviewed-by: Rich Salz <rsalz@openssl.org> (Merged from https://github.com/openssl/openssl/pull/6276) From 3f0c3d2263cd98dd3bcd366f199f0df7c9887d81 Mon Sep 17 00:00:00 2001 From: Andy Polyakov <appro@openssl.org> Date: Wed, 13 Jun 2018 14:00:04 +0200 Subject: [PATCH] bn/bn_exp.c: harmonize all code paths with last commit. 848113a30b431c2fe21ae8de2a366b9b6146fb92 added mitigation for a side-channel attack. This commit extends approach to all code paths for consistency. [It also removes redundant white spaces introduced in last commit.] Reviewed-by: Rich Salz <rsalz@openssl.org> (Merged from https://github.com/openssl/openssl/pull/6480) diff --git a/crypto/bn/bn_exp.c b/crypto/bn/bn_exp.c index 36b7ba6..f96aea2 100644 --- a/crypto/bn/bn_exp.c +++ b/crypto/bn/bn_exp.c @@ -586,7 +586,6 @@ int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, return (ret); } -#if defined(SPARC_T4_MONT) static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) { BN_ULONG ret = 0; @@ -605,7 +604,6 @@ static BN_ULONG bn_get_bits(const BIGNUM *a, int bitpos) return ret & BN_MASK2; } -#endif /* * BN_mod_exp_mont_consttime() stores the precomputed powers in a specific @@ -704,7 +702,7 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { - int i, bits, ret = 0, window, wvalue; + int i, bits, ret = 0, window, wvalue, wmask, window0; int top; BN_MONT_CTX *mont = NULL; @@ -956,20 +954,27 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, top /= 2; bn_flip_t4(np, mont->N.d, top); - bits--; - for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % 5 + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; bn_gather5_t4(tmp.d, top, powerbuf, wvalue); /* * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { + while (bits > 0) { if (bits < stride) - stride = bits + 1; + stride = bits; bits -= stride; - wvalue = bn_get_bits(p, bits + 1); + wvalue = bn_get_bits(p, bits); if ((*pwr5_worker) (tmp.d, np, n0, powerbuf, wvalue, stride)) continue; @@ -1077,32 +1082,36 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, bn_scatter5(tmp.d, top, powerbuf, i); } # endif - bits--; - for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % 5 + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; bn_gather5(tmp.d, top, powerbuf, wvalue); /* * Scan the exponent one window at a time starting from the most * significant bits. */ - if (top & 7) - while (bits >= 0) { - for (wvalue = 0, i = 0; i < 5; i++, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - + if (top & 7) { + while (bits > 0) { bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, - wvalue); + bn_get_bits5(p->d, bits -= 5)); + } } else { - while (bits >= 0) { - wvalue = bn_get_bits5(p->d, bits - 4); - bits -= 5; - bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); + while (bits > 0) { + bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, + bn_get_bits5(p->d, bits -= 5)); } } @@ -1144,27 +1153,44 @@ int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, } } - bits--; - for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); + /* + * The exponent may not have a whole number of fixed-size windows. + * To simplify the main loop, the initial window has between 1 and + * full-window-size bits such that what remains is always a whole + * number of windows + */ + window0 = (bits - 1) % window + 1; + wmask = (1 << window0) - 1; + bits -= window0; + wvalue = bn_get_bits(p, bits) & wmask; if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp, top, powerbuf, wvalue, window)) goto err; + wmask = (1 << window) - 1; /* * Scan the exponent one window at a time starting from the most * significant bits. */ - while (bits >= 0) { - wvalue = 0; /* The 'value' of the window */ + while (bits > 0) { - /* Scan the window, squaring the result as we go */ - for (i = 0; i < window; i++, bits--) { + /* Square the result window-size times */ + for (i = 0; i < window; i++) if (!bn_mul_mont_fixed_top(&tmp, &tmp, &tmp, mont, ctx)) goto err; - wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); - } + /* + * Get a window's worth of bits from the exponent + * This avoids calling BN_is_bit_set for each bit, which + * is not only slower but also makes each bit vulnerable to + * EM (and likely other) side-channel attacks like One&Done + * (for details see "One&Done: A Single-Decryption EM-Based + * Attack on OpenSSLâ€™s Constant-Time Blinded RSA" by M. Alam, + * H. Khan, M. Dey, N. Sinha, R. Callan, A. Zajic, and + * M. Prvulovic, in USENIX Security'18) + */ + bits -= window; + wvalue = bn_get_bits(p, bits) & wmask; /* * Fetch the appropriate pre-computed value from the pre-buf */