diff --git a/crypto/fipsmodule/ec/ec_nistp.c b/crypto/fipsmodule/ec/ec_nistp.c index 13120c6ee8..6105353e14 100644 --- a/crypto/fipsmodule/ec/ec_nistp.c +++ b/crypto/fipsmodule/ec/ec_nistp.c @@ -18,7 +18,7 @@ // | 1. | x | x | x* | // | 2. | x | x | x* | // | 3. | | | | -// | 4. | | | | +// | 4. | x | x | x* | // | 5. | | | | // * For P-256, only the Fiat-crypto implementation in p256.c is replaced. @@ -30,11 +30,11 @@ // for the moment, this will be fixed when we migrate the whole P-521 // implementation to ec_nistp.c. #if defined(EC_NISTP_USE_64BIT_LIMB) -#define NISTP_FELEM_MAX_NUM_OF_LIMBS (9) +#define FELEM_MAX_NUM_OF_LIMBS (9) #else -#define NISTP_FELEM_MAX_NUM_OF_LIMBS (19) +#define FELEM_MAX_NUM_OF_LIMBS (19) #endif -typedef ec_nistp_felem_limb ec_nistp_felem[NISTP_FELEM_MAX_NUM_OF_LIMBS]; +typedef ec_nistp_felem_limb ec_nistp_felem[FELEM_MAX_NUM_OF_LIMBS]; // Conditional copy in constant-time (out = t == 0 ? z : nz). static void cmovznz(ec_nistp_felem_limb *out, @@ -280,8 +280,8 @@ static int16_t get_bit(const EC_SCALAR *in, size_t i) { // It forces an odd scalar and outputs digits in // {\pm 1, \pm 3, \pm 5, \pm 7, \pm 9, ...} // i.e. signed odd digits with _no zeroes_ -- that makes it "regular". -void scalar_rwnaf(int16_t *out, size_t window_size, - const EC_SCALAR *scalar, size_t scalar_bit_size) { +static void scalar_rwnaf(int16_t *out, size_t window_size, + const EC_SCALAR *scalar, size_t scalar_bit_size) { assert(window_size < 14); // The assert above ensures this works correctly. @@ -304,13 +304,30 @@ void scalar_rwnaf(int16_t *out, size_t window_size, out[num_windows - 1] = window; } +// The window size for scalar multiplication is hard coded for now. +#define SCALAR_MUL_WINDOW_SIZE (5) +#define SCALAR_MUL_TABLE_NUM_POINTS (1 << (SCALAR_MUL_WINDOW_SIZE - 1)) + +// To avoid dynamic allocation and freeing of memory in functions below +// we define maximum values of certain variables. +// +// The maximum number of limbs the table in |ec_nistp_scalar_mul| can have. +// Each point in the table has 3 coordinates that are field elements, +// and each field element has a defined maximum number of limbs. +#define SCALAR_MUL_TABLE_MAX_NUM_FELEM_LIMBS \ + (SCALAR_MUL_TABLE_NUM_POINTS * 3 * FELEM_MAX_NUM_OF_LIMBS) + +// Maximum number of windows (digits) for a scalar encoding which is +// determined by the maximum scalar bit size -- 521 bits in our case. +#define SCALAR_MUL_MAX_NUM_WINDOWS DIV_AND_CEIL(521, SCALAR_MUL_WINDOW_SIZE) + // Generate table of multiples of the input point P = (x_in, y_in, z_in): // table <-- [2i + 1]P for i in [0, SCALAR_MUL_TABLE_NUM_POINTS - 1]. -void generate_table(const ec_nistp_meth *ctx, - ec_nistp_felem_limb *table, - ec_nistp_felem_limb *x_in, - ec_nistp_felem_limb *y_in, - ec_nistp_felem_limb *z_in) +static void generate_table(const ec_nistp_meth *ctx, + ec_nistp_felem_limb *table, + const ec_nistp_felem_limb *x_in, + const ec_nistp_felem_limb *y_in, + const ec_nistp_felem_limb *z_in) { const size_t felem_num_limbs = ctx->felem_num_limbs; const size_t felem_num_bytes = felem_num_limbs * sizeof(ec_nistp_felem_limb); @@ -343,3 +360,130 @@ void generate_table(const ec_nistp_meth *ctx, } } +// Writes to xyz_out the idx-th point from table in constant-time. +static void select_point_from_table(const ec_nistp_meth *ctx, + ec_nistp_felem_limb *xyz_out, + const ec_nistp_felem_limb *table, + const size_t idx) { + size_t entry_size = 3 * ctx->felem_num_limbs * sizeof(ec_nistp_felem_limb); + + constant_time_select_entry_from_table_8( + (uint8_t*)xyz_out, (uint8_t*)table, + idx, SCALAR_MUL_TABLE_NUM_POINTS, entry_size); +} + +// Multiplication of an arbitrary point by a scalar, r = [scalar]P. +// The product is computed with the use of a small table generated on-the-fly +// and the scalar recoded in the regular-wNAF representation. +// +// The precomputed (on-the-fly) table |table| holds odd multiples of P: +// [2i + 1]P for i in [0, SCALAR_MUL_TABLE_NUM_POINTS - 1]. +// Computing the negation of a point P = (x, y, z) is relatively easy: +// -P = (x, -y, z), +// so we may assume that for each point we have its negative as well. +// +// The scalar is recoded (regular-wNAF encoding) into signed digits as explained +// in |scalar_rwnaf| function. Namely, for a window size |w| we have: +// scalar' = s_0 + s_1*2^w + s_2*2^(2*w) + ... + s_{m-1}*2^((m-1)*w), +// where digits s_i are in [\pm 1, \pm 3, ..., \pm (2^w-1)] and +// m = ceil(scalar_bit_size / w). Note that for an odd scalar we have that +// scalar = scalar', while in the case of an even scalar we have that +// scalar = scalar' - 1. +// +// The required product, [scalar]P, is computed by the following algorithm. +// 1. Initialize the accumulator with the point from |table| +// corresponding to the most significant digit s_{m-1} of the scalar. +// 2. For digits s_i starting from s_{m-2} down to s_0: +// 3. Double the accumulator w times. (note that doubling a point [a]P +// w times results in [2^w*a]P). +// 4. Read from |table| the point corresponding to abs(s_i), +// negate it if s_i is negative, and add it to the accumulator. +// 5. Subtract P from the result if the scalar is even. +// +// Note: this function is constant-time. +void ec_nistp_scalar_mul(const ec_nistp_meth *ctx, + ec_nistp_felem_limb *x_out, + ec_nistp_felem_limb *y_out, + ec_nistp_felem_limb *z_out, + const ec_nistp_felem_limb *x_in, + const ec_nistp_felem_limb *y_in, + const ec_nistp_felem_limb *z_in, + const EC_SCALAR *scalar) { + // Make sure that the max table size is large enough. + assert(SCALAR_MUL_TABLE_MAX_NUM_FELEM_LIMBS >= + SCALAR_MUL_TABLE_NUM_POINTS * ctx->felem_num_limbs * 3); + + // Table of multiples of P = (x_in, y_in, z_in). + ec_nistp_felem_limb table[SCALAR_MUL_TABLE_MAX_NUM_FELEM_LIMBS]; + generate_table(ctx, table, x_in, y_in, z_in); + + // Regular-wNAF encoding of the scalar. + int16_t rwnaf[SCALAR_MUL_MAX_NUM_WINDOWS]; + scalar_rwnaf(rwnaf, SCALAR_MUL_WINDOW_SIZE, scalar, ctx->felem_num_bits); + + // We need two point accumulators, so we define them of maximum size + // to avoid allocation, and just take pointers to individual coordinates. + // (This cruft will dissapear when we refactor point_add/dbl to work with + // whole points instead of individual coordinates). + ec_nistp_felem_limb res[3 * FELEM_MAX_NUM_OF_LIMBS]; + ec_nistp_felem_limb tmp[3 * FELEM_MAX_NUM_OF_LIMBS]; + ec_nistp_felem_limb *x_res = &res[0]; + ec_nistp_felem_limb *y_res = &res[ctx->felem_num_limbs]; + ec_nistp_felem_limb *z_res = &res[ctx->felem_num_limbs * 2]; + ec_nistp_felem_limb *x_tmp = &tmp[0]; + ec_nistp_felem_limb *y_tmp = &tmp[ctx->felem_num_limbs]; + ec_nistp_felem_limb *z_tmp = &tmp[ctx->felem_num_limbs * 2]; + + // The actual number of windows (digits) of the scalar (denoted by m in the + // description above the function). + const size_t num_windows = DIV_AND_CEIL(ctx->felem_num_bits, SCALAR_MUL_WINDOW_SIZE); + + // Step 1. Initialize the accmulator (res) with the input point multiplied by + // the most significant digit of the scalar s_{m-1} (note that this digit + // can't be negative). + int16_t idx = rwnaf[num_windows - 1]; + idx >>= 1; + select_point_from_table(ctx, res, table, idx); + + // Step 2. Process the remaining digits of the scalar (s_{m-2} to s_0). + for (int i = num_windows - 2; i >= 0; i--) { + // Step 3. Double the accumulator w times. + for (size_t j = 0; j < SCALAR_MUL_WINDOW_SIZE; j++) { + ctx->point_dbl(x_res, y_res, z_res, x_res, y_res, z_res); + } + + // Step 4a. Compute abs(s_i). + int16_t d = rwnaf[i]; + int16_t is_neg = (d >> 15) & 1; // is_neg = (d < 0) ? 1 : 0 + d = (d ^ -is_neg) + is_neg; // d = abs(d) + + // Step 4b. Select from table the point corresponding to abs(s_i). + idx = d >> 1; + select_point_from_table(ctx, tmp, table, idx); + + // Step 4c. Negate the point if s_i < 0. + ec_nistp_felem ftmp; + ctx->felem_neg(ftmp, y_tmp); + + cmovznz(y_tmp, ctx->felem_num_limbs, is_neg, y_tmp, ftmp); + + // Step 4d. Add the point to the accumulator. + ctx->point_add(x_res, y_res, z_res, x_res, y_res, z_res, 0, x_tmp, y_tmp, z_tmp); + } + + // Step 5a. Negate the input point P (we negate it in-place since we already + // have it stored as the first entry in the table). + ec_nistp_felem_limb *x_mp = &table[0]; + ec_nistp_felem_limb *y_mp = &table[ctx->felem_num_limbs]; + ec_nistp_felem_limb *z_mp = &table[ctx->felem_num_limbs * 2]; + ctx->felem_neg(y_mp, y_mp); + + // Step 5b. Subtract P from the accumulator. + ctx->point_add(x_tmp, y_tmp, z_tmp, x_res, y_res, z_res, 0, x_mp, y_mp, z_mp); + + // Step 5c. Select |res| or |res - P| based on parity of the scalar. + ec_nistp_felem_limb t = scalar->words[0] & 1; + cmovznz(x_out, ctx->felem_num_limbs, t, x_tmp, x_res); + cmovznz(y_out, ctx->felem_num_limbs, t, y_tmp, y_res); + cmovznz(z_out, ctx->felem_num_limbs, t, z_tmp, z_res); +} diff --git a/crypto/fipsmodule/ec/ec_nistp.h b/crypto/fipsmodule/ec/ec_nistp.h index 18730ab056..d567dc18ee 100644 --- a/crypto/fipsmodule/ec/ec_nistp.h +++ b/crypto/fipsmodule/ec/ec_nistp.h @@ -47,10 +47,12 @@ typedef uint32_t ec_nistp_felem_limb; // providing an appropriate methods object. typedef struct { size_t felem_num_limbs; + size_t felem_num_bits; void (*felem_add)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b); void (*felem_sub)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b); void (*felem_mul)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a, const ec_nistp_felem_limb *b); void (*felem_sqr)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a); + void (*felem_neg)(ec_nistp_felem_limb *c, const ec_nistp_felem_limb *a); ec_nistp_felem_limb (*felem_nz)(const ec_nistp_felem_limb *a); void (*point_dbl)(ec_nistp_felem_limb *x_out, @@ -96,20 +98,13 @@ void ec_nistp_point_add(const ec_nistp_meth *ctx, const ec_nistp_felem_limb *y2, const ec_nistp_felem_limb *z2); -// These two functions and two macros are temporarily defined here. -// They will be moved to ec_nistp.c as static function -// once all the scalar multiplications are implemented. -void scalar_rwnaf(int16_t *out, size_t window_size, - const EC_SCALAR *scalar, size_t scalar_bit_size); -void generate_table(const ec_nistp_meth *ctx, - ec_nistp_felem_limb *table, - ec_nistp_felem_limb *x_in, - ec_nistp_felem_limb *y_in, - ec_nistp_felem_limb *z_in); - -// The window size for scalar multiplication is hard coded for now. -#define SCALAR_MUL_WINDOW_SIZE (5) -#define SCALAR_MUL_TABLE_NUM_POINTS (1 << (SCALAR_MUL_WINDOW_SIZE - 1)) - +void ec_nistp_scalar_mul(const ec_nistp_meth *ctx, + ec_nistp_felem_limb *x_out, + ec_nistp_felem_limb *y_out, + ec_nistp_felem_limb *z_out, + const ec_nistp_felem_limb *x_in, + const ec_nistp_felem_limb *y_in, + const ec_nistp_felem_limb *z_in, + const EC_SCALAR *scalar); #endif // EC_NISTP_H diff --git a/crypto/fipsmodule/ec/p256.c b/crypto/fipsmodule/ec/p256.c index b0d94e69eb..f8789ced96 100644 --- a/crypto/fipsmodule/ec/p256.c +++ b/crypto/fipsmodule/ec/p256.c @@ -188,10 +188,12 @@ static void fiat_p256_point_add(fiat_p256_felem x3, fiat_p256_felem y3, DEFINE_METHOD_FUNCTION(ec_nistp_meth, p256_methods) { out->felem_num_limbs = FIAT_P256_NLIMBS; + out->felem_num_bits = 256; out->felem_add = fiat_p256_add; out->felem_sub = fiat_p256_sub; out->felem_mul = fiat_p256_mul; out->felem_sqr = fiat_p256_square; + out->felem_neg = fiat_p256_opp; out->felem_nz = fiat_p256_nz; out->point_dbl = fiat_p256_point_double; out->point_add = fiat_p256_point_add; @@ -214,20 +216,6 @@ static void fiat_p256_select_point_affine( fiat_p256_cmovznz(out[2], idx, out[2], fiat_p256_one); } -// fiat_p256_select_point selects the |idx|th point from a precomputation table -// and copies it to out. -static void fiat_p256_select_point(const fiat_p256_limb_t idx, size_t size, - const fiat_p256_felem pre_comp[/*size*/][3], - fiat_p256_felem out[3]) { - OPENSSL_memset(out, 0, sizeof(fiat_p256_felem) * 3); - for (size_t i = 0; i < size; i++) { - fiat_p256_limb_t mismatch = i ^ idx; - fiat_p256_cmovznz(out[0], mismatch, pre_comp[i][0], out[0]); - fiat_p256_cmovznz(out[1], mismatch, pre_comp[i][1], out[1]); - fiat_p256_cmovznz(out[2], mismatch, pre_comp[i][2], out[2]); - } -} - // fiat_p256_get_bit returns the |i|th bit in |in|. static crypto_word_t fiat_p256_get_bit(const EC_SCALAR *in, int i) { if (i < 0 || i >= 256) { @@ -309,68 +297,16 @@ static void ec_GFp_nistp256_dbl(const EC_GROUP *group, EC_JACOBIAN *r, static void ec_GFp_nistp256_point_mul(const EC_GROUP *group, EC_JACOBIAN *r, const EC_JACOBIAN *p, const EC_SCALAR *scalar) { - fiat_p256_felem p_pre_comp[17][3]; - OPENSSL_memset(&p_pre_comp, 0, sizeof(p_pre_comp)); - // Precompute multiples. - fiat_p256_from_generic(p_pre_comp[1][0], &p->X); - fiat_p256_from_generic(p_pre_comp[1][1], &p->Y); - fiat_p256_from_generic(p_pre_comp[1][2], &p->Z); - for (size_t j = 2; j <= 16; ++j) { - if (j & 1) { - fiat_p256_point_add(p_pre_comp[j][0], p_pre_comp[j][1], p_pre_comp[j][2], - p_pre_comp[1][0], p_pre_comp[1][1], p_pre_comp[1][2], - 0, p_pre_comp[j - 1][0], p_pre_comp[j - 1][1], - p_pre_comp[j - 1][2]); - } else { - fiat_p256_point_double(p_pre_comp[j][0], p_pre_comp[j][1], - p_pre_comp[j][2], p_pre_comp[j / 2][0], - p_pre_comp[j / 2][1], p_pre_comp[j / 2][2]); - } - } + fiat_p256_felem res[3], tmp[3]; + fiat_p256_from_generic(tmp[0], &p->X); + fiat_p256_from_generic(tmp[1], &p->Y); + fiat_p256_from_generic(tmp[2], &p->Z); - // Set nq to the point at infinity. - fiat_p256_felem nq[3] = {{0}, {0}, {0}}, ftmp, tmp[3]; + ec_nistp_scalar_mul(p256_methods(), res[0], res[1], res[2], tmp[0], tmp[1], tmp[2], scalar); - // Loop over |scalar| msb-to-lsb, incorporating |p_pre_comp| every 5th round. - int skip = 1; // Save two point operations in the first round. - for (size_t i = 255; i < 256; i--) { - // double - if (!skip) { - fiat_p256_point_double(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2]); - } - - // do other additions every 5 doublings - if (i % 5 == 0) { - crypto_word_t bits = fiat_p256_get_bit(scalar, i + 4) << 5; - bits |= fiat_p256_get_bit(scalar, i + 3) << 4; - bits |= fiat_p256_get_bit(scalar, i + 2) << 3; - bits |= fiat_p256_get_bit(scalar, i + 1) << 2; - bits |= fiat_p256_get_bit(scalar, i) << 1; - bits |= fiat_p256_get_bit(scalar, i - 1); - crypto_word_t sign, digit; - ec_GFp_nistp_recode_scalar_bits(&sign, &digit, bits); - - // select the point to add or subtract, in constant time. - fiat_p256_select_point((fiat_p256_limb_t)digit, 17, - (const fiat_p256_felem(*)[3])p_pre_comp, tmp); - fiat_p256_opp(ftmp, tmp[1]); // (X, -Y, Z) is the negative point. - fiat_p256_cmovznz(tmp[1], (fiat_p256_limb_t)sign, tmp[1], ftmp); - - if (!skip) { - fiat_p256_point_add(nq[0], nq[1], nq[2], nq[0], nq[1], nq[2], - 0 /* mixed */, tmp[0], tmp[1], tmp[2]); - } else { - fiat_p256_copy(nq[0], tmp[0]); - fiat_p256_copy(nq[1], tmp[1]); - fiat_p256_copy(nq[2], tmp[2]); - skip = 0; - } - } - } - - fiat_p256_to_generic(&r->X, nq[0]); - fiat_p256_to_generic(&r->Y, nq[1]); - fiat_p256_to_generic(&r->Z, nq[2]); + fiat_p256_to_generic(&r->X, res[0]); + fiat_p256_to_generic(&r->Y, res[1]); + fiat_p256_to_generic(&r->Z, res[2]); } static void ec_GFp_nistp256_point_mul_base(const EC_GROUP *group, diff --git a/crypto/fipsmodule/ec/p384.c b/crypto/fipsmodule/ec/p384.c index bcd3c5ba4e..f11bee777b 100644 --- a/crypto/fipsmodule/ec/p384.c +++ b/crypto/fipsmodule/ec/p384.c @@ -273,10 +273,12 @@ static void p384_point_add(p384_felem x3, p384_felem y3, p384_felem z3, #if defined(EC_NISTP_USE_S2N_BIGNUM) DEFINE_METHOD_FUNCTION(ec_nistp_meth, p384_methods) { out->felem_num_limbs = P384_NLIMBS; + out->felem_num_bits = 384; out->felem_add = bignum_add_p384; out->felem_sub = bignum_sub_p384; out->felem_mul = bignum_montmul_p384_selector; out->felem_sqr = bignum_montsqr_p384_selector; + out->felem_neg = bignum_neg_p384; out->felem_nz = p384_felem_nz; out->point_dbl = p384_point_double; out->point_add = p384_point_add; @@ -284,10 +286,12 @@ DEFINE_METHOD_FUNCTION(ec_nistp_meth, p384_methods) { #else DEFINE_METHOD_FUNCTION(ec_nistp_meth, p384_methods) { out->felem_num_limbs = P384_NLIMBS; + out->felem_num_bits = 384; out->felem_add = fiat_p384_add; out->felem_sub = fiat_p384_sub; out->felem_mul = fiat_p384_mul; out->felem_sqr = fiat_p384_square; + out->felem_neg = fiat_p384_opp; out->felem_nz = p384_felem_nz; out->point_dbl = p384_point_double; out->point_add = p384_point_add; @@ -490,24 +494,6 @@ OPENSSL_STATIC_ASSERT(P384_MUL_WSIZE == 5, #define P384_MUL_TABLE_SIZE (P384_MUL_TWO_TO_WSIZE >> 1) #define P384_MUL_PUB_TABLE_SIZE (1 << (P384_MUL_PUB_WSIZE - 1)) -OPENSSL_STATIC_ASSERT(P384_MUL_TABLE_SIZE <= SCALAR_MUL_TABLE_NUM_POINTS, - p384_table_size_larger_than_ec_nistp_supports) - -// p384_select_point selects the |idx|-th projective point from the given -// precomputed table and copies it to |out| in constant time. -static void p384_select_point(p384_felem out[3], - size_t idx, - p384_felem table[][3], - size_t table_size) { - OPENSSL_memset(out, 0, sizeof(p384_felem) * 3); - for (size_t i = 0; i < table_size; i++) { - p384_limb_t mismatch = i ^ idx; - p384_felem_cmovznz(out[0], mismatch, table[i][0], out[0]); - p384_felem_cmovznz(out[1], mismatch, table[i][1], out[1]); - p384_felem_cmovznz(out[2], mismatch, table[i][2], out[2]); - } -} - // p384_select_point_affine selects the |idx|-th affine point from // the given precomputed table and copies it to |out| in constant-time. static void p384_select_point_affine(p384_felem out[2], @@ -522,105 +508,19 @@ static void p384_select_point_affine(p384_felem out[2], } } -// Multiplication of a point by a scalar, r = [scalar]P. -// The product is computed with the use of a small table generated on-the-fly -// and the scalar recoded in the regular-wNAF representation. -// -// The precomputed (on-the-fly) table |p_pre_comp| holds 16 odd multiples of P: -// [2i + 1]P for i in [0, 15]. -// Computing the negation of a point P = (x, y) is relatively easy: -// -P = (x, -y). -// So we may assume that instead of the above-mentioned 64, we have 128 points: -// [\pm 1]P, [\pm 3]P, [\pm 5]P, ..., [\pm 31]P. -// -// The 384-bit scalar is recoded (regular-wNAF encoding) into 77 signed digits -// each of length 5 bits, as explained in the |p384_felem_mul_scalar_rwnaf| -// function. Namely, -// scalar' = s_0 + s_1*2^5 + s_2*2^10 + ... + s_76*2^380, -// where digits s_i are in [\pm 1, \pm 3, ..., \pm 31]. Note that for an odd -// scalar we have that scalar = scalar', while in the case of an even -// scalar we have that scalar = scalar' - 1. -// -// The required product, [scalar]P, is computed by the following algorithm. -// 1. Initialize the accumulator with the point from |p_pre_comp| -// corresponding to the most significant digit s_76 of the scalar. -// 2. For digits s_i starting from s_75 down to s_0: -// 3. Double the accumulator 5 times. (note that doubling a point [a]P -// seven times results in [2^5*a]P). -// 4. Read from |p_pre_comp| the point corresponding to abs(s_i), -// negate it if s_i is negative, and add it to the accumulator. -// -// Note: this function is constant-time. +// Multiplication of an arbitrary point by a scalar, r = [scalar]P. static void ec_GFp_nistp384_point_mul(const EC_GROUP *group, EC_JACOBIAN *r, const EC_JACOBIAN *p, const EC_SCALAR *scalar) { - p384_felem res[3] = {{0}, {0}, {0}}, tmp[3] = {{0}, {0}, {0}}, ftmp; + p384_felem res[3] = {{0}, {0}, {0}}, tmp[3] = {{0}, {0}, {0}}; - // Table of multiples of P: [2i + 1]P for i in [0, 15]. - p384_felem p_pre_comp[P384_MUL_TABLE_SIZE][3]; - - // Set the first point in the table to P. p384_from_generic(tmp[0], &p->X); p384_from_generic(tmp[1], &p->Y); p384_from_generic(tmp[2], &p->Z); - assert(sizeof(p_pre_comp) == (P384_MUL_TABLE_SIZE * 3 * sizeof(p384_felem))); - generate_table(p384_methods(), (ec_nistp_felem_limb*)p_pre_comp, tmp[0], tmp[1], tmp[2]); + ec_nistp_scalar_mul(p384_methods(), res[0], res[1], res[2], tmp[0], tmp[1], tmp[2], scalar); - // Recode the scalar. - int16_t rnaf[P384_MUL_NWINDOWS] = {0}; - scalar_rwnaf(rnaf, P384_MUL_WSIZE, scalar, 384); - - // Initialize the accumulator |res| with the table entry corresponding to - // the most significant digit of the recoded scalar (note that this digit - // can't be negative). - int16_t idx = rnaf[P384_MUL_NWINDOWS - 1] >> 1; - p384_select_point(res, idx, p_pre_comp, P384_MUL_TABLE_SIZE); - - // Process the remaining digits of the scalar. - for (int i = P384_MUL_NWINDOWS - 2; i >= 0; i--) { - // Double |res| 5 times in each iteration. - for (size_t j = 0; j < P384_MUL_WSIZE; j++) { - p384_point_double(res[0], res[1], res[2], res[0], res[1], res[2]); - } - - int16_t d = rnaf[i]; - // is_neg = (d < 0) ? 1 : 0 - int16_t is_neg = (d >> 15) & 1; - // d = abs(d) - d = (d ^ -is_neg) + is_neg; - - idx = d >> 1; - - // Select the point to add, in constant time. - p384_select_point(tmp, idx, p_pre_comp, P384_MUL_TABLE_SIZE); - - // Negate y coordinate of the point tmp = (x, y); ftmp = -y. - p384_felem_opp(ftmp, tmp[1]); - // Conditionally select y or -y depending on the sign of the digit |d|. - p384_felem_cmovznz(tmp[1], is_neg, tmp[1], ftmp); - - // Add the point to the accumulator |res|. - p384_point_add(res[0], res[1], res[2], res[0], res[1], res[2], - 0 /* both Jacobian */, tmp[0], tmp[1], tmp[2]); - - } - - // Conditionally subtract P if the scalar is even, in constant-time. - // First, compute |tmp| = |res| + (-P). - p384_felem_copy(tmp[0], p_pre_comp[0][0]); - p384_felem_opp(tmp[1], p_pre_comp[0][1]); - p384_felem_copy(tmp[2], p_pre_comp[0][2]); - p384_point_add(tmp[0], tmp[1], tmp[2], res[0], res[1], res[2], - 0 /* both Jacobian */, tmp[0], tmp[1], tmp[2]); - - // Select |res| or |tmp| based on the |scalar| parity, in constant-time. - p384_felem_cmovznz(res[0], scalar->words[0] & 1, tmp[0], res[0]); - p384_felem_cmovznz(res[1], scalar->words[0] & 1, tmp[1], res[1]); - p384_felem_cmovznz(res[2], scalar->words[0] & 1, tmp[2], res[2]); - - // Copy the result to the output. p384_to_generic(&r->X, res[0]); p384_to_generic(&r->Y, res[1]); p384_to_generic(&r->Z, res[2]); diff --git a/crypto/fipsmodule/ec/p521.c b/crypto/fipsmodule/ec/p521.c index 3833e04954..b1ed65dc7b 100644 --- a/crypto/fipsmodule/ec/p521.c +++ b/crypto/fipsmodule/ec/p521.c @@ -291,10 +291,12 @@ static void p521_point_add(p521_felem x3, p521_felem y3, p521_felem z3, #if defined(EC_NISTP_USE_S2N_BIGNUM) DEFINE_METHOD_FUNCTION(ec_nistp_meth, p521_methods) { out->felem_num_limbs = P521_NLIMBS; + out->felem_num_bits = 521; out->felem_add = bignum_add_p521; out->felem_sub = bignum_sub_p521; out->felem_mul = bignum_mul_p521_selector; out->felem_sqr = bignum_sqr_p521_selector; + out->felem_neg = bignum_neg_p521; out->felem_nz = p521_felem_nz; out->point_dbl = p521_point_double; out->point_add = p521_point_add; @@ -302,10 +304,12 @@ DEFINE_METHOD_FUNCTION(ec_nistp_meth, p521_methods) { #else DEFINE_METHOD_FUNCTION(ec_nistp_meth, p521_methods) { out->felem_num_limbs = P521_NLIMBS; + out->felem_num_bits = 521; out->felem_add = fiat_secp521r1_carry_add; out->felem_sub = fiat_secp521r1_carry_sub; out->felem_mul = fiat_secp521r1_carry_mul; out->felem_sqr = fiat_secp521r1_carry_square; + out->felem_neg = fiat_secp521r1_carry_opp; out->felem_nz = p521_felem_nz; out->point_dbl = p521_point_double; out->point_add = p521_point_add; @@ -427,24 +431,6 @@ OPENSSL_STATIC_ASSERT(P521_MUL_WSIZE == 5, #define P521_MUL_TABLE_SIZE (P521_MUL_TWO_TO_WSIZE >> 1) #define P521_MUL_PUB_TABLE_SIZE (1 << (P521_MUL_PUB_WSIZE - 1)) -OPENSSL_STATIC_ASSERT(P521_MUL_TABLE_SIZE <= SCALAR_MUL_TABLE_NUM_POINTS, - p521_table_size_larger_than_ec_nistp_supports) - -// p521_select_point selects the |idx|-th projective point from the given -// precomputed table and copies it to |out| in constant time. -static void p521_select_point(p521_felem out[3], - size_t idx, - p521_felem table[][3], - size_t table_size) { - OPENSSL_memset(out, 0, sizeof(p521_felem) * 3); - for (size_t i = 0; i < table_size; i++) { - p521_limb_t mismatch = i ^ idx; - p521_felem_cmovznz(out[0], mismatch, table[i][0], out[0]); - p521_felem_cmovznz(out[1], mismatch, table[i][1], out[1]); - p521_felem_cmovznz(out[2], mismatch, table[i][2], out[2]); - } -} - // p521_select_point_affine selects the |idx|-th affine point from // the given precomputed table and copies it to |out| in constant-time. static void p521_select_point_affine(p521_felem out[2], @@ -459,105 +445,19 @@ static void p521_select_point_affine(p521_felem out[2], } } -// Multiplication of a point by a scalar, r = [scalar]P. -// The product is computed with the use of a small table generated on-the-fly -// and the scalar recoded in the regular-wNAF representation. -// -// The precomputed (on-the-fly) table |p_pre_comp| holds 16 odd multiples of P: -// [2i + 1]P for i in [0, 15]. -// Computing the negation of a point P = (x, y) is relatively easy: -// -P = (x, -y). -// So we may assume that instead of the above-mentioned 16, we have 32 points: -// [\pm 1]P, [\pm 3]P, [\pm 5]P, ..., [\pm 31]P. -// -// The 521-bit scalar is recoded (regular-wNAF encoding) into 105 signed digits -// each of length 5 bits, as explained in the |p521_felem_mul_scalar_rwnaf| -// function. Namely, -// scalar' = s_0 + s_1*2^5 + s_2*2^10 + ... + s_104*2^520, -// where digits s_i are in [\pm 1, \pm 3, ..., \pm 31]. Note that for an odd -// scalar we have that scalar = scalar', while in the case of an even -// scalar we have that scalar = scalar' - 1. -// -// The required product, [scalar]P, is computed by the following algorithm. -// 1. Initialize the accumulator with the point from |p_pre_comp| -// corresponding to the most significant digit s_104 of the scalar. -// 2. For digits s_i starting from s_104 down to s_0: -// 3. Double the accumulator 5 times. (note that doubling a point [a]P -// seven times results in [2^5*a]P). -// 4. Read from |p_pre_comp| the point corresponding to abs(s_i), -// negate it if s_i is negative, and add it to the accumulator. -// -// Note: this function is constant-time. +// Multiplication of an arbitrary point by a scalar, r = [scalar]P. static void ec_GFp_nistp521_point_mul(const EC_GROUP *group, EC_JACOBIAN *r, const EC_JACOBIAN *p, const EC_SCALAR *scalar) { - p521_felem res[3] = {{0}, {0}, {0}}, tmp[3] = {{0}, {0}, {0}}, ftmp; + p521_felem res[3] = {{0}, {0}, {0}}, tmp[3] = {{0}, {0}, {0}}; - // Table of multiples of P: [2i + 1]P for i in [0, 15]. - p521_felem p_pre_comp[P521_MUL_TABLE_SIZE][3]; - - // Set the first point in the table to P. p521_from_generic(tmp[0], &p->X); p521_from_generic(tmp[1], &p->Y); p521_from_generic(tmp[2], &p->Z); - assert(sizeof(p_pre_comp) == (P521_MUL_TABLE_SIZE * 3 * sizeof(p521_felem))); - generate_table(p521_methods(), (ec_nistp_felem_limb*)p_pre_comp, tmp[0], tmp[1], tmp[2]); + ec_nistp_scalar_mul(p521_methods(), res[0], res[1], res[2], tmp[0], tmp[1], tmp[2], scalar); - // Recode the scalar. - int16_t rnaf[P521_MUL_NWINDOWS] = {0}; - scalar_rwnaf(rnaf, P521_MUL_WSIZE, scalar, 521); - - // Initialize the accumulator |res| with the table entry corresponding to - // the most significant digit of the recoded scalar (note that this digit - // can't be negative). - int16_t idx = rnaf[P521_MUL_NWINDOWS - 1] >> 1; - p521_select_point(res, idx, p_pre_comp, P521_MUL_TABLE_SIZE); - - // Process the remaining digits of the scalar. - for (int i = P521_MUL_NWINDOWS - 2; i >= 0; i--) { - // Double |res| 7 times in each iteration. - for (size_t j = 0; j < P521_MUL_WSIZE; j++) { - p521_point_double(res[0], res[1], res[2], res[0], res[1], res[2]); - } - - int16_t d = rnaf[i]; - // is_neg = (d < 0) ? 1 : 0 - int16_t is_neg = (d >> 15) & 1; - // d = abs(d) - d = (d ^ -is_neg) + is_neg; - - idx = d >> 1; - - // Select the point to add, in constant time. - p521_select_point(tmp, idx, p_pre_comp, P521_MUL_TABLE_SIZE); - - // Negate y coordinate of the point tmp = (x, y); ftmp = -y. - p521_felem_opp(ftmp, tmp[1]); - // Conditionally select y or -y depending on the sign of the digit |d|. - p521_felem_cmovznz(tmp[1], is_neg, tmp[1], ftmp); - - // Add the point to the accumulator |res|. - p521_point_add(res[0], res[1], res[2], res[0], res[1], res[2], - 0 /* both Jacobian */, tmp[0], tmp[1], tmp[2]); - - } - - // Conditionally subtract P if the scalar is even, in constant-time. - // First, compute |tmp| = |res| + (-P). - p521_felem_copy(tmp[0], p_pre_comp[0][0]); - p521_felem_opp(tmp[1], p_pre_comp[0][1]); - p521_felem_copy(tmp[2], p_pre_comp[0][2]); - p521_point_add(tmp[0], tmp[1], tmp[2], res[0], res[1], res[2], - 0 /* both Jacobian */, tmp[0], tmp[1], tmp[2]); - - // Select |res| or |tmp| based on the |scalar| parity, in constant-time. - p521_felem_cmovznz(res[0], scalar->words[0] & 1, tmp[0], res[0]); - p521_felem_cmovznz(res[1], scalar->words[0] & 1, tmp[1], res[1]); - p521_felem_cmovznz(res[2], scalar->words[0] & 1, tmp[2], res[2]); - - // Copy the result to the output. p521_to_generic(&r->X, res[0]); p521_to_generic(&r->Y, res[1]); p521_to_generic(&r->Z, res[2]); diff --git a/crypto/internal.h b/crypto/internal.h index ba08e232f5..1546549714 100644 --- a/crypto/internal.h +++ b/crypto/internal.h @@ -466,17 +466,32 @@ static inline void constant_time_select_array_w( } } +static inline void constant_time_select_array_8( + uint8_t *c, uint8_t *a, uint8_t *b, uint8_t mask, size_t len) { + for (size_t i = 0; i < len; i++) { + c[i] = constant_time_select_8(mask, a[i], b[i]); + } +} + // constant_time_select_entry_from_table_w selects the idx-th entry from table. static inline void constant_time_select_entry_from_table_w( crypto_word_t *out, crypto_word_t *table, - size_t idx, size_t num_entries, size_t entry_size) -{ + size_t idx, size_t num_entries, size_t entry_size) { for (size_t i = 0; i < num_entries; i++) { crypto_word_t mask = constant_time_eq_w(i, idx); constant_time_select_array_w(out, &table[i * entry_size], out, mask, entry_size); } } +static inline void constant_time_select_entry_from_table_8( + uint8_t *out, uint8_t *table, size_t idx, + size_t num_entries, size_t entry_size) { + for (size_t i = 0; i < num_entries; i++) { + uint8_t mask = (uint8_t)(constant_time_eq_w(i, idx)); + constant_time_select_array_8(out, &table[i * entry_size], out, mask, entry_size); + } +} + #if defined(BORINGSSL_CONSTANT_TIME_VALIDATION) // CONSTTIME_SECRET takes a pointer and a number of bytes and marks that region