From 2d5a186cee71266eef986ffc3ed99d88ab1e79f4 Mon Sep 17 00:00:00 2001
From: Peter Dettman <peter.dettman@gmail.com>
Date: Mon, 9 Feb 2015 16:34:24 +0700
Subject: [PATCH] Apply effective-affine trick to precomp

---
 src/bench_internal.c  |  2 +-
 src/ecmult_gen_impl.h |  2 +-
 src/ecmult_impl.h     | 36 +++++++++++++++++++++++++++++-------
 src/group.h           |  4 ++--
 src/group_impl.h      | 21 ++++++++++++++-------
 src/tests.c           |  8 ++++++--
 6 files changed, 53 insertions(+), 20 deletions(-)

diff --git a/src/bench_internal.c b/src/bench_internal.c
index 562f5b0e587..bbbc7c3fc79 100644
--- a/src/bench_internal.c
+++ b/src/bench_internal.c
@@ -220,7 +220,7 @@ void bench_group_add_affine_var(void* arg) {
     bench_inv_t *data = (bench_inv_t*)arg;
 
     for (i = 0; i < 200000; i++) {
-        secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y);
+        secp256k1_gej_add_ge_var(&data->gej_x, &data->gej_x, &data->ge_y, NULL);
     }
 }
 
diff --git a/src/ecmult_gen_impl.h b/src/ecmult_gen_impl.h
index 00521707c0d..a5447de88a8 100644
--- a/src/ecmult_gen_impl.h
+++ b/src/ecmult_gen_impl.h
@@ -40,7 +40,7 @@ static void secp256k1_ecmult_gen_context_build(secp256k1_ecmult_gen_context_t *c
         VERIFY_CHECK(secp256k1_ge_set_xo_var(&nums_ge, &nums_x, 0));
         secp256k1_gej_set_ge(&nums_gej, &nums_ge);
         /* Add G to make the bits in x uniformly distributed. */
-        secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, &secp256k1_ge_const_g);
+        secp256k1_gej_add_ge_var(&nums_gej, &nums_gej, &secp256k1_ge_const_g, NULL);
     }
 
     /* compute prec. */
diff --git a/src/ecmult_impl.h b/src/ecmult_impl.h
index a8cf958810b..d6aa2ea7db0 100644
--- a/src/ecmult_impl.h
+++ b/src/ecmult_impl.h
@@ -30,19 +30,41 @@
 /** Fill a table 'prej' with precomputed odd multiples of a. Prej will contain
  *  the values [1*a,3*a,...,(2*n-1)*a], so it space for n values. zr[0] will
  *  contain prej[0].z / a.z. The other zr[i] values = prej[i].z / prej[i-1].z.
+ *  Prej's Z values are undefined, except for the last value.
  */
 static void secp256k1_ecmult_odd_multiples_table(int n, secp256k1_gej_t *prej, secp256k1_fe_t *zr, const secp256k1_gej_t *a) {
     secp256k1_gej_t d;
+    secp256k1_ge_t a_ge, d_ge;
     int i;
 
     VERIFY_CHECK(!a->infinity);
 
-    prej[0] = *a;
-    secp256k1_gej_double_var(&d, &prej[0], NULL);
-    secp256k1_fe_set_int(zr, 1);
+    secp256k1_gej_double_var(&d, a, NULL);
+
+    /*
+     * Perform the additions on an isomorphism where 'd' is affine: drop the z coordinate
+     * of 'd', and scale the 1P starting value's x/y coordinates without changing its z.
+     */
+    d_ge.x = d.x;
+    d_ge.y = d.y;
+    d_ge.infinity = 0;
+
+    secp256k1_ge_set_gej_zinv(&a_ge, a, &d.z);
+    prej[0].x = a_ge.x;
+    prej[0].y = a_ge.y;
+    prej[0].z = a->z;
+    prej[0].infinity = 0;
+
+    zr[0] = d.z;
     for (i = 1; i < n; i++) {
-        secp256k1_gej_add_var(&prej[i], &prej[i-1], &d, &zr[i]);
+        secp256k1_gej_add_ge_var(&prej[i], &prej[i-1], &d_ge, &zr[i]);
     }
+
+    /*
+     * Each point in 'prej' has a z coordinate too small by a factor of 'd.z'. Only
+     * the final point's z coordinate is actually used though, so just update that.
+     */
+    secp256k1_fe_mul(&prej[n-1].z, &prej[n-1].z, &d.z);
 }
 
 /** Fill a table 'pre' with precomputed odd multiples of a.
@@ -319,11 +341,11 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_ge
 #ifdef USE_ENDOMORPHISM
         if (i < bits_na_1 && (n = wnaf_na_1[i])) {
             ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
-            secp256k1_gej_add_ge_var(r, r, &tmpa);
+            secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
         }
         if (i < bits_na_lam && (n = wnaf_na_lam[i])) {
             ECMULT_TABLE_GET_GE(&tmpa, pre_a_lam, n, WINDOW_A);
-            secp256k1_gej_add_ge_var(r, r, &tmpa);
+            secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
         }
         if (i < bits_ng_1 && (n = wnaf_ng_1[i])) {
             ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
@@ -336,7 +358,7 @@ static void secp256k1_ecmult(const secp256k1_ecmult_context_t *ctx, secp256k1_ge
 #else
         if (i < bits_na && (n = wnaf_na[i])) {
             ECMULT_TABLE_GET_GE(&tmpa, pre_a, n, WINDOW_A);
-            secp256k1_gej_add_ge_var(r, r, &tmpa);
+            secp256k1_gej_add_ge_var(r, r, &tmpa, NULL);
         }
         if (i < bits_ng && (n = wnaf_ng[i])) {
             ECMULT_TABLE_GET_GE_STORAGE(&tmpa, *ctx->pre_g, n, WINDOW_G);
diff --git a/src/group.h b/src/group.h
index b4dc2fa1a21..1d9ef9d2aa7 100644
--- a/src/group.h
+++ b/src/group.h
@@ -103,8 +103,8 @@ static void secp256k1_gej_add_ge(secp256k1_gej_t *r, const secp256k1_gej_t *a, c
 
 /** Set r equal to the sum of a and b (with b given in affine coordinates). This is more efficient
     than secp256k1_gej_add_var. It is identical to secp256k1_gej_add_ge but without constant-time
-    guarantee, and b is allowed to be infinity. */
-static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b);
+    guarantee, and b is allowed to be infinity. If rzr is non-NULL, r->z = a->z * *rzr (a cannot be infinity in that case). */
+static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, secp256k1_fe_t *rzr);
 
 /** Set r equal to the sum of a and b (with the inverse of b's Z coordinate passed as bzinv). */
 static void secp256k1_gej_add_zinv_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, const secp256k1_fe_t *bzinv);
diff --git a/src/group_impl.h b/src/group_impl.h
index 165e6938f3d..45bafa6623d 100644
--- a/src/group_impl.h
+++ b/src/group_impl.h
@@ -354,17 +354,18 @@ static void secp256k1_gej_add_var(secp256k1_gej_t *r, const secp256k1_gej_t *a,
     secp256k1_fe_add(&r->y, &h3);
 }
 
-static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b) {
+static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *a, const secp256k1_ge_t *b, secp256k1_fe_t *rzr) {
     /* 8 mul, 3 sqr, 4 normalize, 12 mul_int/add/negate */
     secp256k1_fe_t z12, u1, u2, s1, s2, h, i, i2, h2, h3, t;
     if (a->infinity) {
-        r->infinity = b->infinity;
-        r->x = b->x;
-        r->y = b->y;
-        secp256k1_fe_set_int(&r->z, 1);
+        VERIFY_CHECK(rzr == NULL);
+        secp256k1_gej_set_ge(r, b);
         return;
     }
     if (b->infinity) {
+        if (rzr) {
+            secp256k1_fe_set_int(rzr, 1);
+        }
         *r = *a;
         return;
     }
@@ -379,8 +380,11 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
     secp256k1_fe_negate(&i, &s1, 1); secp256k1_fe_add(&i, &s2);
     if (secp256k1_fe_normalizes_to_zero_var(&h)) {
         if (secp256k1_fe_normalizes_to_zero_var(&i)) {
-            secp256k1_gej_double_var(r, a, NULL);
+            secp256k1_gej_double_var(r, a, rzr);
         } else {
+            if (rzr) {
+                secp256k1_fe_set_int(rzr, 0);
+            }
             r->infinity = 1;
         }
         return;
@@ -388,7 +392,10 @@ static void secp256k1_gej_add_ge_var(secp256k1_gej_t *r, const secp256k1_gej_t *
     secp256k1_fe_sqr(&i2, &i);
     secp256k1_fe_sqr(&h2, &h);
     secp256k1_fe_mul(&h3, &h, &h2);
-    r->z = a->z; secp256k1_fe_mul(&r->z, &r->z, &h);
+    if (rzr) {
+        *rzr = h;
+    }
+    secp256k1_fe_mul(&r->z, &a->z, &h);
     secp256k1_fe_mul(&t, &u1, &h2);
     r->x = t; secp256k1_fe_mul_int(&r->x, 2); secp256k1_fe_add(&r->x, &h3); secp256k1_fe_negate(&r->x, &r->x, 3); secp256k1_fe_add(&r->x, &i2);
     secp256k1_fe_negate(&r->y, &r->x, 5); secp256k1_fe_add(&r->y, &t); secp256k1_fe_mul(&r->y, &r->y, &i);
diff --git a/src/tests.c b/src/tests.c
index 70765c90d07..fb8ce76924a 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -1035,9 +1035,13 @@ void test_ge(void) {
             }
             secp256k1_ge_set_gej_var(&ref, &refj);
 
-            /* Test gej + ge (var). */
-            secp256k1_gej_add_ge_var(&resj, &gej[i1], &ge[i2]);
+            /* Test gej + ge with Z ratio result (var). */
+            secp256k1_gej_add_ge_var(&resj, &gej[i1], &ge[i2], secp256k1_gej_is_infinity(&gej[i1]) ? NULL : &zr);
             ge_equals_gej(&ref, &resj);
+            if (!secp256k1_gej_is_infinity(&gej[i1]) && !secp256k1_gej_is_infinity(&resj)) {
+                secp256k1_fe_t zrz; secp256k1_fe_mul(&zrz, &zr, &gej[i1].z);
+                CHECK(secp256k1_fe_equal_var(&zrz, &resj.z));
+            }
 
             /* Test gej + ge (var, with additional Z factor). */
             {