From 92e53fc4c8080546b4a85aef8315d1a545fbb3ab Mon Sep 17 00:00:00 2001 From: Andrew Poelstra Date: Fri, 22 May 2015 12:09:36 -0500 Subject: [PATCH] Implement endomorphism optimization for secp256k1_ecmult_const --- src/bench_internal.c | 2 +- src/ecmult_const_impl.h | 137 ++++++++++++++++++++++++++++++++++++---- src/tests.c | 19 +++++- 3 files changed, 144 insertions(+), 14 deletions(-) diff --git a/src/bench_internal.c b/src/bench_internal.c index 827a3899387..1a06e9441a7 100644 --- a/src/bench_internal.c +++ b/src/bench_internal.c @@ -241,7 +241,7 @@ void bench_wnaf_const(void* arg) { bench_inv_t *data = (bench_inv_t*)arg; for (i = 0; i < 20000; i++) { - secp256k1_wnaf_const(data->wnaf, &data->scalar_x, WINDOW_A); + secp256k1_wnaf_const(data->wnaf, data->scalar_x, WINDOW_A); secp256k1_scalar_add(&data->scalar_x, &data->scalar_x, &data->scalar_y); } } diff --git a/src/ecmult_const_impl.h b/src/ecmult_const_impl.h index 956d5e4c0fe..3632d2ecb58 100644 --- a/src/ecmult_const_impl.h +++ b/src/ecmult_const_impl.h @@ -12,7 +12,11 @@ #include "ecmult_const.h" #include "ecmult_impl.h" -#define WNAF_BITS 256 +#ifdef USE_ENDOMORPHISM + #define WNAF_BITS 128 +#else + #define WNAF_BITS 256 +#endif #define WNAF_SIZE(w) ((WNAF_BITS + (w) - 1) / (w)) /* This is like `ECMULT_TABLE_GET_GE` but is constant time */ @@ -49,17 +53,47 @@ * * Numbers reference steps of `Algorithm SPA-resistant Width-w NAF with Odd Scalar` on pp. 335 */ -static void secp256k1_wnaf_const(int *wnaf, const secp256k1_scalar_t *a, int w) { - secp256k1_scalar_t s = *a; - /* Negate to force oddness */ - int is_even = secp256k1_scalar_is_even(&s); - int global_sign = secp256k1_scalar_cond_negate(&s, is_even); - +static int secp256k1_wnaf_const(int *wnaf, secp256k1_scalar_t s, int w) { + int global_sign = 1; + int skew = 0; int word = 0; /* 1 2 3 */ - int u_last = secp256k1_scalar_shr_int(&s, w); + int u_last; int u; + +#ifdef USE_ENDOMORPHISM + /* If we are using the endomorphism, we cannot handle even numbers by negating + * them, since we are working with 128-bit numbers whose negations would be 256 + * bits, eliminating the performance advantage. Instead we use a technique from + * Section 4.2 of the Okeya/Tagaki paper, which is to add either 1 (for even) + * or 2 (for odd) to the number we are encoding, then compensating after the + * multiplication. */ + /* Negative 128-bit numbers will be negated, since otherwise they are 256-bit */ + int flip = secp256k1_scalar_is_high(&s); + /* We add 1 to even numbers, 2 to odd ones, noting that negation flips parity */ + int bit = flip ^ (s.d[0] & 1); + /* We check for negative one, since adding 2 to it will cause an overflow */ + secp256k1_scalar_t neg_s; + int not_neg_one; + secp256k1_scalar_negate(&neg_s, &s); + not_neg_one = !secp256k1_scalar_is_one(&neg_s); + secp256k1_scalar_cadd_bit(&s, bit, not_neg_one); + /* If we had negative one, flip == 1, s.d[0] == 0, bit == 1, so caller expects + * that we added two to it and flipped it. In fact for -1 these operations are + * identical. We only flipped, but since skewing is required (in the sense that + * the skew must be 1 or 2, never zero) and flipping is not, we need to change + * our flags to claim that we only skewed. */ + global_sign = secp256k1_scalar_cond_negate(&s, flip); + global_sign *= not_neg_one * 2 - 1; + skew = 1 << bit; +#else + /* Otherwise, we just negate to force oddness */ + int is_even = secp256k1_scalar_is_even(&s); + global_sign = secp256k1_scalar_cond_negate(&s, is_even); +#endif + /* 4 */ + u_last = secp256k1_scalar_shr_int(&s, w); while (word * w < WNAF_BITS) { int sign; int even; @@ -81,6 +115,7 @@ static void secp256k1_wnaf_const(int *wnaf, const secp256k1_scalar_t *a, int w) VERIFY_CHECK(secp256k1_scalar_is_zero(&s)); VERIFY_CHECK(word == WNAF_SIZE(w)); + return skew; } @@ -89,17 +124,37 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a, secp256k1_ge_t tmpa; secp256k1_fe_t Z; +#ifdef USE_ENDOMORPHISM + secp256k1_ge_t pre_a_lam[ECMULT_TABLE_SIZE(WINDOW_A)]; + int wnaf_1[1 + WNAF_SIZE(WINDOW_A - 1)]; + int wnaf_lam[1 + WNAF_SIZE(WINDOW_A - 1)]; + int skew_1; + int skew_lam; + secp256k1_scalar_t q_1, q_lam; +#else int wnaf[1 + WNAF_SIZE(WINDOW_A - 1)]; +#endif int i; - int is_zero = secp256k1_scalar_is_zero(scalar); secp256k1_scalar_t sc = *scalar; + + /* build wnaf representation for q. */ +#ifdef USE_ENDOMORPHISM + /* split q into q_1 and q_lam (where q = q_1 + q_lam*lambda, and q_1 and q_lam are ~128 bit) */ + secp256k1_scalar_split_lambda(&q_1, &q_lam, &sc); + /* no need for zero correction when using endomorphism since even + * numbers have one added to them anyway */ + skew_1 = secp256k1_wnaf_const(wnaf_1, q_1, WINDOW_A - 1); + skew_lam = secp256k1_wnaf_const(wnaf_lam, q_lam, WINDOW_A - 1); +#else + int is_zero = secp256k1_scalar_is_zero(scalar); /* the wNAF ladder cannot handle zero, so bump this to one .. we will * correct the result after the fact */ sc.d[0] += is_zero; + VERIFY_CHECK(!secp256k1_scalar_is_zero(&sc)); - /* build wnaf representation for q. */ - secp256k1_wnaf_const(wnaf, &sc, WINDOW_A - 1); + secp256k1_wnaf_const(wnaf, sc, WINDOW_A - 1); +#endif /* Calculate odd multiples of a. * All multiples are brought to the same Z 'denominator', which is stored @@ -109,14 +164,31 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a, */ secp256k1_gej_set_ge(r, a); secp256k1_ecmult_odd_multiples_table_globalz_windowa(pre_a, &Z, r); +#ifdef USE_ENDOMORPHISM + for (i = 0; i < ECMULT_TABLE_SIZE(WINDOW_A); i++) { + secp256k1_ge_mul_lambda(&pre_a_lam[i], &pre_a[i]); + } +#endif /* first loop iteration (separated out so we can directly set r, rather * than having it start at infinity, get doubled several times, then have * its new value added to it) */ +#ifdef USE_ENDOMORPHISM + i = wnaf_1[WNAF_SIZE(WINDOW_A - 1)]; + VERIFY_CHECK(i != 0); + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A); + secp256k1_gej_set_ge(r, &tmpa); + + i = wnaf_lam[WNAF_SIZE(WINDOW_A - 1)]; + VERIFY_CHECK(i != 0); + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, i, WINDOW_A); + secp256k1_gej_add_ge(r, r, &tmpa); +#else i = wnaf[WNAF_SIZE(WINDOW_A - 1)]; VERIFY_CHECK(i != 0); ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, i, WINDOW_A); secp256k1_gej_set_ge(r, &tmpa); +#endif /* remaining loop iterations */ for (i = WNAF_SIZE(WINDOW_A - 1) - 1; i >= 0; i--) { int n; @@ -124,16 +196,59 @@ static void secp256k1_ecmult_const(secp256k1_gej_t *r, const secp256k1_ge_t *a, for (j = 0; j < WINDOW_A - 1; ++j) { secp256k1_gej_double_nonzero(r, r, NULL); } +#ifdef USE_ENDOMORPHISM + n = wnaf_1[i]; + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); + VERIFY_CHECK(n != 0); + secp256k1_gej_add_ge(r, r, &tmpa); + + n = wnaf_lam[i]; + ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A); + VERIFY_CHECK(n != 0); + secp256k1_gej_add_ge(r, r, &tmpa); +#else n = wnaf[i]; VERIFY_CHECK(n != 0); ECMULT_CONST_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A); secp256k1_gej_add_ge(r, r, &tmpa); +#endif } secp256k1_fe_mul(&r->z, &r->z, &Z); +#ifdef USE_ENDOMORPHISM + { + /* Correct for wNAF skew */ + secp256k1_ge_t correction = *a; + secp256k1_ge_storage_t correction_1_stor; + secp256k1_ge_storage_t correction_lam_stor; + secp256k1_ge_storage_t a2_stor; + secp256k1_gej_t tmpj; + secp256k1_gej_set_ge(&tmpj, &correction); + secp256k1_gej_double_var(&tmpj, &tmpj, NULL); + secp256k1_ge_set_gej(&correction, &tmpj); + secp256k1_ge_to_storage(&correction_1_stor, a); + secp256k1_ge_to_storage(&correction_lam_stor, a); + secp256k1_ge_to_storage(&a2_stor, &correction); + + /* For odd numbers this is 2a (so replace it), for even ones a (so no-op) */ + secp256k1_ge_storage_cmov(&correction_1_stor, &a2_stor, skew_1 == 2); + secp256k1_ge_storage_cmov(&correction_lam_stor, &a2_stor, skew_lam == 2); + + /* Apply the correction */ + secp256k1_ge_from_storage(&correction, &correction_1_stor); + secp256k1_ge_neg(&correction, &correction); + secp256k1_gej_add_ge(r, r, &correction); + + secp256k1_ge_from_storage(&correction, &correction_lam_stor); + secp256k1_ge_neg(&correction, &correction); + secp256k1_ge_mul_lambda(&correction, &correction); + secp256k1_gej_add_ge(r, r, &correction); + } +#else /* correct for zero */ r->infinity |= is_zero; +#endif } #endif diff --git a/src/tests.c b/src/tests.c index bfcc8bce116..feb9576330f 100644 --- a/src/tests.c +++ b/src/tests.c @@ -1550,10 +1550,21 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) { secp256k1_scalar_t x, shift; int wnaf[256] = {0}; int i; +#ifdef USE_ENDOMORPHISM + int skew; +#endif + secp256k1_scalar_t num = *number; secp256k1_scalar_set_int(&x, 0); secp256k1_scalar_set_int(&shift, 1 << w); - secp256k1_wnaf_const(wnaf, number, w); + /* With USE_ENDOMORPHISM on we only consider 128-bit numbers */ +#ifdef USE_ENDOMORPHISM + for (i = 0; i < 16; ++i) + secp256k1_scalar_shr_int(&num, 8); + skew = secp256k1_wnaf_const(wnaf, num, w); +#else + secp256k1_wnaf_const(wnaf, num, w); +#endif for (i = WNAF_SIZE(w); i >= 0; --i) { secp256k1_scalar_t t; @@ -1572,7 +1583,11 @@ void test_constant_wnaf(const secp256k1_scalar_t *number, int w) { } secp256k1_scalar_add(&x, &x, &t); } - CHECK(secp256k1_scalar_eq(&x, number)); +#ifdef USE_ENDOMORPHISM + /* Skew num because when encoding 128-bit numbers as odd we use an offset */ + secp256k1_scalar_cadd_bit(&num, skew == 2, 1); +#endif + CHECK(secp256k1_scalar_eq(&x, &num)); } void run_wnaf(void) {