From d1c1ba90caa99ac2118a4dc9ae584db543ef7f72 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Sat, 16 Nov 2013 15:50:12 +0100
Subject: [PATCH 01/21] First version of ecp_mul_comb()

---
 include/polarssl/ecp.h |  12 +-
 library/ecp.c          | 312 ++++++++++++++++++++++++++++++++++++++---
 2 files changed, 303 insertions(+), 21 deletions(-)

diff --git a/include/polarssl/ecp.h b/include/polarssl/ecp.h
index 02f6f9349..9a52ac692 100644
--- a/include/polarssl/ecp.h
+++ b/include/polarssl/ecp.h
@@ -476,10 +476,14 @@ int ecp_sub( const ecp_group *grp, ecp_point *R,
  *                  has very low overhead, it is recommended to always provide
  *                  a non-NULL f_rng parameter when using secret inputs.
  */
-int ecp_mul( ecp_group *grp, ecp_point *R,
-             const mpi *m, const ecp_point *P,
-             int (*f_rng)(void *, unsigned char *, size_t), void *p_rng );
-
+// Temporary, WIP
+int ecp_mul_wnaf( ecp_group *grp, ecp_point *R,
+                  const mpi *m, const ecp_point *P,
+                  int (*f_rng)(void *, unsigned char *, size_t), void *p_rng );
+int ecp_mul_comb( ecp_group *grp, ecp_point *R,
+                  const mpi *m, const ecp_point *P,
+                  int (*f_rng)(void *, unsigned char *, size_t), void *p_rng );
+#define ecp_mul ecp_mul_comb
 
 /**
  * \brief           Check that a point is a valid public key on this curve
diff --git a/library/ecp.c b/library/ecp.c
index 3a075c403..7aa98c071 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -41,6 +41,11 @@
  *     for elliptic curve cryptosystems. In : Cryptographic Hardware and
  *     Embedded Systems. Springer Berlin Heidelberg, 1999. p. 292-302.
  *     <http://link.springer.com/chapter/10.1007/3-540-48059-5_25>
+ *
+ * [3] HEDABOU, Mustapha, PINEL, Pierre, et BÉNÉTEAU, Lucien. A comb method to
+ *     render ECC resistant against Side Channel Attacks. IACR Cryptology
+ *     ePrint Archive, 2004, vol. 2004, p. 342.
+ *     <http://eprint.iacr.org/2004/342.pdf>
  */
 
 #include "polarssl/config.h"
@@ -902,7 +907,7 @@ cleanup:
 }
 
 /*
- * Normalize jacobian coordinates of an array of points,
+ * Normalize jacobian coordinates of an array of (pointers to) points,
  * using Montgomery's trick to perform only one inversion mod P.
  * (See for example Cohen's "A Course in Computational Algebraic Number
  * Theory", Algorithm 10.3.4.)
@@ -911,14 +916,14 @@ cleanup:
  * This should never happen, see choice of w in ecp_mul().
  */
 static int ecp_normalize_many( const ecp_group *grp,
-                               ecp_point T[], size_t t_len )
+                               ecp_point *T[], size_t t_len )
 {
     int ret;
     size_t i;
     mpi *c, u, Zi, ZZi;
 
     if( t_len < 2 )
-        return( ecp_normalize( grp, T ) );
+        return( ecp_normalize( grp, *T ) );
 
     if( ( c = (mpi *) polarssl_malloc( t_len * sizeof( mpi ) ) ) == NULL )
         return( POLARSSL_ERR_ECP_MALLOC_FAILED );
@@ -930,10 +935,10 @@ static int ecp_normalize_many( const ecp_group *grp,
     /*
      * c[i] = Z_0 * ... * Z_i
      */
-    MPI_CHK( mpi_copy( &c[0], &T[0].Z ) );
+    MPI_CHK( mpi_copy( &c[0], &T[0]->Z ) );
     for( i = 1; i < t_len; i++ )
     {
-        MPI_CHK( mpi_mul_mpi( &c[i], &c[i-1], &T[i].Z ) );
+        MPI_CHK( mpi_mul_mpi( &c[i], &c[i-1], &T[i]->Z ) );
         MOD_MUL( c[i] );
     }
 
@@ -953,18 +958,18 @@ static int ecp_normalize_many( const ecp_group *grp,
         }
         else
         {
-            MPI_CHK( mpi_mul_mpi( &Zi, &u, &c[i-1] ) ); MOD_MUL( Zi );
-            MPI_CHK( mpi_mul_mpi( &u,  &u, &T[i].Z ) ); MOD_MUL( u );
+            MPI_CHK( mpi_mul_mpi( &Zi, &u, &c[i-1]  ) ); MOD_MUL( Zi );
+            MPI_CHK( mpi_mul_mpi( &u,  &u, &T[i]->Z ) ); MOD_MUL( u );
         }
 
         /*
          * proceed as in normalize()
          */
-        MPI_CHK( mpi_mul_mpi( &ZZi,    &Zi,     &Zi     ) ); MOD_MUL( ZZi );
-        MPI_CHK( mpi_mul_mpi( &T[i].X, &T[i].X, &ZZi    ) ); MOD_MUL( T[i].X );
-        MPI_CHK( mpi_mul_mpi( &T[i].Y, &T[i].Y, &ZZi    ) ); MOD_MUL( T[i].Y );
-        MPI_CHK( mpi_mul_mpi( &T[i].Y, &T[i].Y, &Zi     ) ); MOD_MUL( T[i].Y );
-        MPI_CHK( mpi_lset( &T[i].Z, 1 ) );
+        MPI_CHK( mpi_mul_mpi( &ZZi,     &Zi,      &Zi  ) ); MOD_MUL( ZZi );
+        MPI_CHK( mpi_mul_mpi( &T[i]->X, &T[i]->X, &ZZi ) ); MOD_MUL( T[i]->X );
+        MPI_CHK( mpi_mul_mpi( &T[i]->Y, &T[i]->Y, &ZZi ) ); MOD_MUL( T[i]->Y );
+        MPI_CHK( mpi_mul_mpi( &T[i]->Y, &T[i]->Y, &Zi  ) ); MOD_MUL( T[i]->Y );
+        MPI_CHK( mpi_lset( &T[i]->Z, 1 ) );
 
         if( i == 0 )
             break;
@@ -1250,6 +1255,7 @@ static int ecp_precompute( const ecp_group *grp,
     int ret;
     size_t i;
     ecp_point PP;
+    ecp_point *TT[ 1 << ( POLARSSL_ECP_WINDOW_SIZE - 1 ) ];
 
     ecp_point_init( &PP );
 
@@ -1261,9 +1267,11 @@ static int ecp_precompute( const ecp_group *grp,
         MPI_CHK( ecp_add_mixed( grp, &T[i], &T[i-1], &PP, +1 ) );
 
     /*
-     * T[0] = P already has normalized coordinates
+     * T[0] = P already has normalized coordinates, normalize others
      */
-    MPI_CHK( ecp_normalize_many( grp, T + 1, t_len - 1 ) );
+    for( i = 1; i < t_len; i++ )
+        TT[i-1] = &T[i];
+    MPI_CHK( ecp_normalize_many( grp, TT, t_len - 1 ) );
 
 cleanup:
 
@@ -1342,9 +1350,9 @@ cleanup:
  * countermeasure against DPA in 5.3 of [2] (with the obvious adaptation that
  * we use jacobian coordinates, not standard projective coordinates).
  */
-int ecp_mul( ecp_group *grp, ecp_point *R,
-             const mpi *m, const ecp_point *P,
-             int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
+int ecp_mul_wnaf( ecp_group *grp, ecp_point *R,
+                  const mpi *m, const ecp_point *P,
+                  int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
 {
     int ret;
     unsigned char w, m_is_odd, p_eq_g;
@@ -1503,6 +1511,276 @@ cleanup:
     return( ret );
 }
 
+/*
+ * Compute the representation of m that will be used with the comb method.
+ *
+ * The basic comb method is described in GECC 3.44 for example. We use a
+ * modified version [3] that provides resistance to SPA by avoiding zero
+ * digits in the representation. We represent (K_i, s_i) from the paper as a
+ * single signed char.
+ *
+ * Calling conventions:
+ * - x is an array of size d
+ * - w is the size, ie number of teeth, of the comb
+ * - m is the MPI, expected to be odd and such that, if l = bitlength(m):
+ *   ceil( l / w ) <= d (these two assumptions are not checked, an incorrect
+ *   result my be returned if they are not satisfied)
+ */
+static void ecp_comb_fixed( signed char x[], size_t d,
+                            unsigned char w, const mpi *m )
+{
+    size_t i, j;
+
+    memset( x, 0, d );
+
+    /* For x[0] use the classical comb value without adjustement */
+    for( j = 0; j < w; j++ )
+        x[0] |= mpi_get_bit( m, d * j ) << j;
+
+    for( i = 1; i < d; i++ )
+    {
+        /* Get the classical comb value */
+        for( j = 0; j < w; j++ )
+            x[i] |= mpi_get_bit( m, i + d * j ) << j;
+
+        /* Adjust if it's zero */
+        if( x[i] == 0 )
+        {
+            x[i] = x[i-1];
+            x[i-1] *= -1;
+        }
+    }
+}
+
+/*
+ * Precompute points for the comb method
+ *
+ * If i = i_{w-1} ... i_0 is the binary representation of i, then
+ * T[i-1] = i_{w-1} 2^{(w-1)d} P + ... + i_1 2^d P + i_0 P
+ *
+ * T must be able to hold at least 2^w - 1 elements
+ */
+static int ecp_precompute_comb( const ecp_group *grp,
+                                ecp_point T[], const ecp_point *P,
+                                unsigned char w, size_t d )
+{
+    int ret;
+    unsigned char i, mask;
+    size_t j, t_len = ( 1U << w ) - 1;
+    ecp_point *cur, *TT[t_len - 1];
+
+    /*
+     * Compute the 2^{di}
+     */
+    MPI_CHK( ecp_copy( &T[0], P ) );
+
+    for( i = 1; i < w; i++ )
+    {
+        cur = T + ( 1 << i ) - 1;
+        ecp_copy( cur, T + ( 1 << (i-1) ) - 1 );
+        for( j = 0; j < d; j++ )
+            MPI_CHK( ecp_double_jac( grp, cur, cur ) );
+        TT[i-1] = cur;
+    }
+
+    /* P already normalized, so w - 1 points to do */
+    ecp_normalize_many( grp, TT, w - 1);
+
+    /*
+     * Compute the remaining ones using the minimal number of additions
+     */
+    j = 0;
+    for( i = 3; i < (1U << w); i++ )
+    {
+        if( T[i - 1].X.p != NULL )
+            continue;
+
+        /* Find the least significant non-zero bit of the index */
+        for( mask = 1; mask != 0; mask <<=1 )
+            if( ( i & mask ) != 0 )
+                break;
+
+        /* Use the previously computed values */
+        ecp_add_mixed( grp, &T[i - 1], &T[i - mask - 1], &T[mask - 1], +1 );
+
+        /* Register for normalisation */
+        TT[j++] = &T[i - 1];
+    }
+
+    ecp_normalize_many( grp, TT, j );
+
+cleanup:
+    return( ret );
+}
+
+/*
+ * Select precomputed point: R = sign(i) * T[ abs(i) ]
+ */
+static int ecp_select_comb( const ecp_group *grp, ecp_point *R,
+                            const ecp_point T[], signed char i )
+{
+    int ret;
+
+    if( i > 0 )
+        return( ecp_copy( R, &T[i - 1] ) );
+
+    MPI_CHK( ecp_copy( R, &T[-i - 1] ) );
+
+    /*
+     * -R = (R.X, -R.Y, R.Z), and
+     * -R.Y mod P = P - R.Y unless R.Y == 0
+     */
+    if( mpi_cmp_int( &R->Y, 0 ) != 0 )
+        MPI_CHK( mpi_sub_mpi( &R->Y, &grp->P, &R->Y ) );
+
+cleanup:
+    return( ret );
+}
+
+/*
+ * Core multiplication algorithm for the (modified) comb method.
+ * This part is actually common with the basic comb method (GECC 3.44)
+ */
+static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
+                              const ecp_point T[], const signed char x[],
+                              size_t d )
+{
+    int ret;
+    ecp_point Txi;
+    size_t i;
+
+    ecp_point_init( &Txi );
+
+    /* Avoid useless doubling/addition of 0 by better initialisation */
+    i = d - 1;
+    MPI_CHK( ecp_select_comb( grp, R, T, x[i] ) );
+
+    while( i-- != 0 )
+    {
+        MPI_CHK( ecp_double_jac( grp, R, R ) );
+        MPI_CHK( ecp_select_comb( grp, &Txi, T, x[i] ) );
+        MPI_CHK( ecp_add_mixed( grp, R, R, &Txi, +1 ) );
+    }
+
+cleanup:
+    ecp_point_free( &Txi );
+
+    return( ret );
+}
+
+/*
+ * Multiplication using the comb method, WIP
+ */
+int ecp_mul_comb( ecp_group *grp, ecp_point *R,
+                  const mpi *m, const ecp_point *P,
+                  int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
+{
+    int ret;
+    unsigned char w, m_is_odd, p_eq_g;
+    size_t pre_len, d, i;
+    signed char k[100]; // TODO
+    ecp_point Q, *T = NULL, S[2];
+    mpi M;
+
+    (void) f_rng;
+    (void) p_rng; // TODO
+
+    if( mpi_cmp_int( m, 0 ) < 0 || mpi_msb( m ) > grp->nbits )
+        return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
+
+    mpi_init( &M );
+    ecp_point_init( &Q );
+    ecp_point_init( &S[0] );
+    ecp_point_init( &S[1] );
+
+    /*
+     * Check if P == G
+     */
+    p_eq_g = ( mpi_cmp_int( &P->Z, 1 ) == 0 &&
+               mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
+               mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
+
+    /* TODO: adjust exact value */
+    w = grp->nbits >= 192 ? 5 : 2;
+
+    pre_len = 1U << w;
+    d = ( grp->nbits + w - 1 ) / w;
+
+    /*
+     * Prepare precomputed points: if P == G we want to
+     * use grp->T if already initialized, or initiliaze it.
+     */
+    if( ! p_eq_g || grp->T == NULL )
+    {
+        T = (ecp_point *) polarssl_malloc( pre_len * sizeof( ecp_point ) );
+        if( T == NULL )
+        {
+            ret = POLARSSL_ERR_ECP_MALLOC_FAILED;
+            goto cleanup;
+        }
+
+        for( i = 0; i < pre_len; i++ )
+            ecp_point_init( &T[i] );
+
+        MPI_CHK( ecp_precompute_comb( grp, T, P, w, d ) );
+
+        if( p_eq_g )
+        {
+            grp->T = T;
+            grp->T_size = pre_len;
+        }
+    }
+    else
+    {
+        T = grp->T;
+
+        /* Should never happen, but we want to be extra sure */
+        if( pre_len != grp->T_size )
+        {
+            ret = POLARSSL_ERR_ECP_BAD_INPUT_DATA;
+            goto cleanup;
+        }
+    }
+
+    /*
+     * Make sure M is odd (M = m + 1 or M = m + 2)
+     * later we'll get m * P by subtracting P or 2 * P to M * P.
+     */
+    m_is_odd = ( mpi_get_bit( m, 0 ) == 1 );
+
+    MPI_CHK( mpi_copy( &M, m ) );
+    MPI_CHK( mpi_add_int( &M, &M, 1 + m_is_odd ) );
+
+    /*
+     * Go for comb multiplication, Q = M * P
+     */
+    ecp_comb_fixed( k, d, w, &M );
+    ecp_mul_comb_core( grp, &Q, T, k, d );
+
+    /*
+     * Now get m * P from M * P
+     */
+    MPI_CHK( ecp_copy( &S[0], P ) );
+    MPI_CHK( ecp_add( grp, &S[1], P, P ) );
+    MPI_CHK( ecp_sub( grp, R, &Q, &S[m_is_odd] ) );
+
+cleanup:
+
+    if( T != NULL && ! p_eq_g )
+    {
+        for( i = 0; i < pre_len; i++ )
+            ecp_point_free( &T[i] );
+        polarssl_free( T );
+    }
+
+    ecp_point_free( &S[1] );
+    ecp_point_free( &S[0] );
+    ecp_point_free( &Q );
+    mpi_free( &M );
+
+    return( ret );
+}
+
 /*
  * Check that a point is valid as a public key (SEC1 3.2.3.1)
  */

From 101a39f55f0a7154ea94bd5b9c89b0d9631510f8 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Wed, 20 Nov 2013 14:47:19 +0100
Subject: [PATCH 02/21] Improve comb method (less precomputed points)

---
 library/ecp.c | 121 +++++++++++++++++++++++++++-----------------------
 1 file changed, 66 insertions(+), 55 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 7aa98c071..b606a02fe 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1515,27 +1515,33 @@ cleanup:
  * Compute the representation of m that will be used with the comb method.
  *
  * The basic comb method is described in GECC 3.44 for example. We use a
- * modified version [3] that provides resistance to SPA by avoiding zero
- * digits in the representation. We represent (K_i, s_i) from the paper as a
- * single signed char.
+ * modified version that provides resistance to SPA by avoiding zero
+ * digits in the representation as in [3]. We modify the method further by
+ * requiring that all K_i be odd, which has the small cost that our
+ * representation uses on more K, due to carries.
+ *
+ * Also, for the sake of compactness, only the seven low-order bits of x[i]
+ * are used to represent K_i, and the msb of x[i] encodes the the sign (s_i in
+ * the paper): it is set if and only if if s_i == -1;
  *
  * Calling conventions:
- * - x is an array of size d
+ * - x is an array of size d + 1
  * - w is the size, ie number of teeth, of the comb
- * - m is the MPI, expected to be odd and such that, if l = bitlength(m):
- *   ceil( l / w ) <= d (these two assumptions are not checked, an incorrect
- *   result my be returned if they are not satisfied)
+ * - m is the MPI, expected to be odd and such that bitlength(m) <= w * d
+ *   (the result will be incorrect if these assumptions are not satisfied)
  */
-static void ecp_comb_fixed( signed char x[], size_t d,
+static void ecp_comb_fixed( unsigned char x[], size_t d,
                             unsigned char w, const mpi *m )
 {
     size_t i, j;
+    unsigned char c, cc, adjust;
 
-    memset( x, 0, d );
+    memset( x, 0, d+1 );
 
     /* For x[0] use the classical comb value without adjustement */
     for( j = 0; j < w; j++ )
         x[0] |= mpi_get_bit( m, d * j ) << j;
+    c = 0;
 
     for( i = 1; i < d; i++ )
     {
@@ -1543,95 +1549,100 @@ static void ecp_comb_fixed( signed char x[], size_t d,
         for( j = 0; j < w; j++ )
             x[i] |= mpi_get_bit( m, i + d * j ) << j;
 
-        /* Adjust if it's zero */
-        if( x[i] == 0 )
-        {
-            x[i] = x[i-1];
-            x[i-1] *= -1;
-        }
+        /* Add carry and update it */
+        cc   = x[i] & c;
+        x[i] = x[i] ^ c;
+        c = cc;
+
+        /* Make sure x[i] is odd, avoiding if-branches */
+        adjust = 1 - ( x[i] & 0x01 );
+        c   |= x[i] & ( x[i-1] * adjust );
+        x[i] = x[i] ^ ( x[i-1] * adjust );
+        x[i-1] |= adjust << 7;
     }
+
+    /* Finish with the carry */
+    x[i] = c;
+    adjust = 1 - ( x[i] & 0x01 );
+    c   |= x[i] & ( x[i-1] * adjust );
+    x[i] = x[i] ^ ( x[i-1] * adjust );
+    x[i-1] |= adjust << 7;
 }
 
 /*
  * Precompute points for the comb method
  *
- * If i = i_{w-1} ... i_0 is the binary representation of i, then
- * T[i-1] = i_{w-1} 2^{(w-1)d} P + ... + i_1 2^d P + i_0 P
+ * If i = i_{w-1} ... i_1 is the binary representation of i, then
+ * T[i] = i_{w-1} 2^{(w-1)d} P + ... + i_1 2^d P + P
  *
- * T must be able to hold at least 2^w - 1 elements
+ * T must be able to hold at least 2^{w - 1} elements
  */
 static int ecp_precompute_comb( const ecp_group *grp,
                                 ecp_point T[], const ecp_point *P,
                                 unsigned char w, size_t d )
 {
     int ret;
-    unsigned char i, mask;
-    size_t j, t_len = ( 1U << w ) - 1;
-    ecp_point *cur, *TT[t_len - 1];
+    unsigned char i, j, k;
+    ecp_point *cur, *TT[200]; // TODO
 
     /*
-     * Compute the 2^{di}
+     * Set T[0] = P and
+     * T[2^{l-1}] = 2^{dl} P for l = 1 .. w-1 (this is not the final value)
      */
     MPI_CHK( ecp_copy( &T[0], P ) );
 
-    for( i = 1; i < w; i++ )
+    k = 0;
+    for( i = 1; i < ( 1U << (w-1) ); i <<= 1 )
     {
-        cur = T + ( 1 << i ) - 1;
-        ecp_copy( cur, T + ( 1 << (i-1) ) - 1 );
+        cur = T + i;
+        MPI_CHK( ecp_copy( cur, T + ( i >> 1 ) ) );
         for( j = 0; j < d; j++ )
             MPI_CHK( ecp_double_jac( grp, cur, cur ) );
-        TT[i-1] = cur;
+
+        TT[k++] = cur;
     }
 
-    /* P already normalized, so w - 1 points to do */
-    ecp_normalize_many( grp, TT, w - 1);
+    ecp_normalize_many( grp, TT, k );
 
     /*
      * Compute the remaining ones using the minimal number of additions
+     * Be careful to update T[2^l] only after using it!
      */
-    j = 0;
-    for( i = 3; i < (1U << w); i++ )
+    k = 0;
+    for( i = 1; i < ( 1U << (w-1) ); i <<= 1 )
     {
-        if( T[i - 1].X.p != NULL )
-            continue;
-
-        /* Find the least significant non-zero bit of the index */
-        for( mask = 1; mask != 0; mask <<=1 )
-            if( ( i & mask ) != 0 )
-                break;
-
-        /* Use the previously computed values */
-        ecp_add_mixed( grp, &T[i - 1], &T[i - mask - 1], &T[mask - 1], +1 );
-
-        /* Register for normalisation */
-        TT[j++] = &T[i - 1];
+        j = i;
+        while( j-- )
+        {
+            ecp_add_mixed( grp, &T[i + j], &T[j], &T[i], +1 );
+            TT[k++] = &T[i + j];
+        }
     }
 
-    ecp_normalize_many( grp, TT, j );
+    ecp_normalize_many( grp, TT, k );
 
 cleanup:
     return( ret );
 }
 
 /*
- * Select precomputed point: R = sign(i) * T[ abs(i) ]
+ * Select precomputed point: R = sign(i) * T[ abs(i) / 2 ]
  */
 static int ecp_select_comb( const ecp_group *grp, ecp_point *R,
-                            const ecp_point T[], signed char i )
+                            const ecp_point T[], unsigned char i )
 {
     int ret;
 
-    if( i > 0 )
-        return( ecp_copy( R, &T[i - 1] ) );
-
-    MPI_CHK( ecp_copy( R, &T[-i - 1] ) );
+    /* Ignore the "sign" bit */
+    MPI_CHK( ecp_copy( R, &T[ ( i & 0x7Fu ) >> 1 ] ) );
 
     /*
      * -R = (R.X, -R.Y, R.Z), and
      * -R.Y mod P = P - R.Y unless R.Y == 0
      */
-    if( mpi_cmp_int( &R->Y, 0 ) != 0 )
-        MPI_CHK( mpi_sub_mpi( &R->Y, &grp->P, &R->Y ) );
+    if( ( i & 0x80 ) != 0 )
+        if( mpi_cmp_int( &R->Y, 0 ) != 0 )
+            MPI_CHK( mpi_sub_mpi( &R->Y, &grp->P, &R->Y ) );
 
 cleanup:
     return( ret );
@@ -1642,7 +1653,7 @@ cleanup:
  * This part is actually common with the basic comb method (GECC 3.44)
  */
 static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
-                              const ecp_point T[], const signed char x[],
+                              const ecp_point T[], const unsigned char x[],
                               size_t d )
 {
     int ret;
@@ -1652,7 +1663,7 @@ static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
     ecp_point_init( &Txi );
 
     /* Avoid useless doubling/addition of 0 by better initialisation */
-    i = d - 1;
+    i = d;
     MPI_CHK( ecp_select_comb( grp, R, T, x[i] ) );
 
     while( i-- != 0 )
@@ -1678,7 +1689,7 @@ int ecp_mul_comb( ecp_group *grp, ecp_point *R,
     int ret;
     unsigned char w, m_is_odd, p_eq_g;
     size_t pre_len, d, i;
-    signed char k[100]; // TODO
+    unsigned char k[200]; // TODO
     ecp_point Q, *T = NULL, S[2];
     mpi M;
 

From c30200e4ce9b223d01c8c738d1d0c2c10afb7706 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Wed, 20 Nov 2013 18:39:55 +0100
Subject: [PATCH 03/21] Fix bound issues

---
 include/polarssl/ecp.h |  8 ++++----
 library/ecp.c          | 29 ++++++++++++++++++++++-------
 2 files changed, 26 insertions(+), 11 deletions(-)

diff --git a/include/polarssl/ecp.h b/include/polarssl/ecp.h
index 9a52ac692..e46dd6365 100644
--- a/include/polarssl/ecp.h
+++ b/include/polarssl/ecp.h
@@ -157,16 +157,16 @@ ecp_keypair;
 #define POLARSSL_ECP_MAX_PT_LEN   ( 2 * POLARSSL_ECP_MAX_BYTES + 1 )
 
 /*
- * Maximum window size (actually, NAF width) used for point multipliation.
- * Default: 8.
- * Minimum value: 2. Maximum value: 8.
+ * Maximum "window" size used for point multiplication.
+ * Default: 6.
+ * Minimum value: 2. Maximum value: 7.
  *
  * Result is an array of at most ( 1 << ( POLARSSL_ECP_WINDOW_SIZE - 1 ) )
  * points used for point multiplication.
  *
  * Reduction in size may reduce speed for big curves.
  */
-#define POLARSSL_ECP_WINDOW_SIZE    8   /**< Maximum NAF width used. */
+#define POLARSSL_ECP_WINDOW_SIZE    6   /**< Maximum window size used. */
 
 /*
  * Point formats, from RFC 4492's enum ECPointFormat
diff --git a/library/ecp.c b/library/ecp.c
index b606a02fe..ac6a06dff 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1512,13 +1512,26 @@ cleanup:
 }
 
 /*
- * Compute the representation of m that will be used with the comb method.
+ * Check and define parameters used by the comb method (see below for details)
+ */
+#if POLARSSL_ECP_WINDOW_SIZE < 2 || POLARSSL_ECP_WINDOW_SIZE > 7
+#error "POLARSSL_ECP_WINDOW_SIZE out of bounds"
+#endif
+
+/* d = ceil( n / w ) */
+#define COMB_MAX_D      ( POLARSSL_ECP_MAX_BITS + 1 ) / 2
+
+/* number of precomputed points */
+#define COMB_MAX_PRE    ( 1 << ( POLARSSL_ECP_WINDOW_SIZE - 1 ) )
+
+/*
+ * Compute the representation of m that will be used with our comb method.
  *
  * The basic comb method is described in GECC 3.44 for example. We use a
  * modified version that provides resistance to SPA by avoiding zero
  * digits in the representation as in [3]. We modify the method further by
  * requiring that all K_i be odd, which has the small cost that our
- * representation uses on more K, due to carries.
+ * representation uses one more K_i, due to carries.
  *
  * Also, for the sake of compactness, only the seven low-order bits of x[i]
  * are used to represent K_i, and the msb of x[i] encodes the the sign (s_i in
@@ -1526,7 +1539,8 @@ cleanup:
  *
  * Calling conventions:
  * - x is an array of size d + 1
- * - w is the size, ie number of teeth, of the comb
+ * - w is the size, ie number of teeth, of the comb, and must be between
+ *   2 and 7 (in practice, between 2 and POLARSSL_ECP_WINDOW_SIZE)
  * - m is the MPI, expected to be odd and such that bitlength(m) <= w * d
  *   (the result will be incorrect if these assumptions are not satisfied)
  */
@@ -1582,8 +1596,9 @@ static int ecp_precompute_comb( const ecp_group *grp,
                                 unsigned char w, size_t d )
 {
     int ret;
-    unsigned char i, j, k;
-    ecp_point *cur, *TT[200]; // TODO
+    unsigned char i, k;
+    size_t j;
+    ecp_point *cur, *TT[COMB_MAX_PRE - 1];
 
     /*
      * Set T[0] = P and
@@ -1689,7 +1704,7 @@ int ecp_mul_comb( ecp_group *grp, ecp_point *R,
     int ret;
     unsigned char w, m_is_odd, p_eq_g;
     size_t pre_len, d, i;
-    unsigned char k[200]; // TODO
+    unsigned char k[COMB_MAX_D + 1];
     ecp_point Q, *T = NULL, S[2];
     mpi M;
 
@@ -1714,7 +1729,7 @@ int ecp_mul_comb( ecp_group *grp, ecp_point *R,
     /* TODO: adjust exact value */
     w = grp->nbits >= 192 ? 5 : 2;
 
-    pre_len = 1U << w;
+    pre_len = 1U << ( w - 1 );
     d = ( grp->nbits + w - 1 ) / w;
 
     /*

From 70c14372c65fe85b2651d60adb01c25f772438eb Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Wed, 20 Nov 2013 20:07:26 +0100
Subject: [PATCH 04/21] Add coordinate randomization back

---
 library/ecp.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index ac6a06dff..bba69fe9b 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1668,8 +1668,10 @@ cleanup:
  * This part is actually common with the basic comb method (GECC 3.44)
  */
 static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
-                              const ecp_point T[], const unsigned char x[],
-                              size_t d )
+                              const ecp_point T[],
+                              const unsigned char x[], size_t d,
+                              int (*f_rng)(void *, unsigned char *, size_t),
+                              void *p_rng )
 {
     int ret;
     ecp_point Txi;
@@ -1677,9 +1679,11 @@ static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
 
     ecp_point_init( &Txi );
 
-    /* Avoid useless doubling/addition of 0 by better initialisation */
+    /* Start with a non-zero point and randomize its coordinates */
     i = d;
     MPI_CHK( ecp_select_comb( grp, R, T, x[i] ) );
+    if( f_rng != 0 )
+        MPI_CHK( ecp_randomize_coordinates( grp, R, f_rng, p_rng ) );
 
     while( i-- != 0 )
     {
@@ -1708,9 +1712,6 @@ int ecp_mul_comb( ecp_group *grp, ecp_point *R,
     ecp_point Q, *T = NULL, S[2];
     mpi M;
 
-    (void) f_rng;
-    (void) p_rng; // TODO
-
     if( mpi_cmp_int( m, 0 ) < 0 || mpi_msb( m ) > grp->nbits )
         return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
 
@@ -1781,7 +1782,7 @@ int ecp_mul_comb( ecp_group *grp, ecp_point *R,
      * Go for comb multiplication, Q = M * P
      */
     ecp_comb_fixed( k, d, w, &M );
-    ecp_mul_comb_core( grp, &Q, T, k, d );
+    ecp_mul_comb_core( grp, &Q, T, k, d, f_rng, p_rng );
 
     /*
      * Now get m * P from M * P

From 04a0225388eb9a3dbd56c1f454b19d4f8d8028ef Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Wed, 20 Nov 2013 22:57:38 +0100
Subject: [PATCH 05/21] Optimize w in the comb method

---
 library/ecp.c | 42 ++++++++++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 4 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index bba69fe9b..0cefd0a2c 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -870,6 +870,7 @@ cleanup:
 
 /*
  * Normalize jacobian coordinates so that Z == 0 || Z == 1  (GECC 3.2.1)
+ * Cost: 1N := 1I + 3M + 1S
  */
 static int ecp_normalize( const ecp_group *grp, ecp_point *pt )
 {
@@ -914,6 +915,8 @@ cleanup:
  *
  * Warning: fails (returning an error) if one of the points is zero!
  * This should never happen, see choice of w in ecp_mul().
+ *
+ * Cost: 1N(t) := 1I + (6t - 3)M + 1S
  */
 static int ecp_normalize_many( const ecp_group *grp,
                                ecp_point *T[], size_t t_len )
@@ -992,6 +995,8 @@ cleanup:
  * with heavy variable renaming, some reordering and one minor modification
  * (a = 2 * b, c = d - 2a replaced with c = d, c = c - b, c = c - b)
  * in order to use a lot less intermediate variables (6 vs 25).
+ *
+ * Cost: 1D := 2M + 8S
  */
 static int ecp_double_jac( const ecp_group *grp, ecp_point *R,
                            const ecp_point *P )
@@ -1052,6 +1057,8 @@ cleanup:
  * If sign >= 0, perform addition, otherwise perform subtraction,
  * taking advantage of the fact that, for Q != 0, we have
  * -Q = (Q.X, -Q.Y, Q.Z)
+ *
+ * Cost: 1A := 8M + 3S
  */
 static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
                           const ecp_point *P, const ecp_point *Q,
@@ -1153,6 +1160,7 @@ cleanup:
 
 /*
  * Addition: R = P + Q, result's coordinates normalized
+ * Cost: 1A + 1N = 1I + 11M + 4S
  */
 int ecp_add( const ecp_group *grp, ecp_point *R,
              const ecp_point *P, const ecp_point *Q )
@@ -1168,6 +1176,7 @@ cleanup:
 
 /*
  * Subtraction: R = P - Q, result's coordinates normalized
+ * Cost: 1A + 1N = 1I + 11M + 4S
  */
 int ecp_sub( const ecp_group *grp, ecp_point *R,
              const ecp_point *P, const ecp_point *Q )
@@ -1589,7 +1598,9 @@ static void ecp_comb_fixed( unsigned char x[], size_t d,
  * If i = i_{w-1} ... i_1 is the binary representation of i, then
  * T[i] = i_{w-1} 2^{(w-1)d} P + ... + i_1 2^d P + P
  *
- * T must be able to hold at least 2^{w - 1} elements
+ * T must be able to hold 2^{w - 1} elements
+ *
+ * Cost: d(w-1) D + (2^{w-1} - 1) A + 1 N(w-1) + 1 N(2^{w-1} - 1)
  */
 static int ecp_precompute_comb( const ecp_group *grp,
                                 ecp_point T[], const ecp_point *P,
@@ -1666,6 +1677,8 @@ cleanup:
 /*
  * Core multiplication algorithm for the (modified) comb method.
  * This part is actually common with the basic comb method (GECC 3.44)
+ *
+ * Cost: d A + d D + 1 R
  */
 static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
                               const ecp_point T[],
@@ -1699,7 +1712,7 @@ cleanup:
 }
 
 /*
- * Multiplication using the comb method, WIP
+ * Multiplication using the comb method
  */
 int ecp_mul_comb( ecp_group *grp, ecp_point *R,
                   const mpi *m, const ecp_point *P,
@@ -1727,9 +1740,30 @@ int ecp_mul_comb( ecp_group *grp, ecp_point *R,
                mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
                mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
 
-    /* TODO: adjust exact value */
-    w = grp->nbits >= 192 ? 5 : 2;
+    /*
+     * Minimize the number of multiplications, that is minimize
+     * 10 * d * w + 18 * 2^(w-1) + 11 * d + 7 * w
+     * (see costs of the various parts, with 1S = 1M)
+     */
+    w = grp->nbits >= 384 ? 5 : 4;
 
+    /*
+     * If P == G, pre-compute a bit more, since this may be re-used later.
+     * Just adding one ups the cost of the first mul by at most 3%.
+     */
+    if( p_eq_g )
+        w++;
+
+    /*
+     * Make sure w is within limits.
+     * (The last test is useful only for very small curves in the test suite.)
+     */
+    if( w > POLARSSL_ECP_WINDOW_SIZE )
+        w = POLARSSL_ECP_WINDOW_SIZE;
+    if( w < 2 || w >= grp->nbits )
+        w = 2;
+
+    /* Other sizes that depend on w */
     pre_len = 1U << ( w - 1 );
     d = ( grp->nbits + w - 1 ) / w;
 

From 09ceaf49d0789e0d10114eb61745fd4bc4576b62 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Wed, 20 Nov 2013 23:06:14 +0100
Subject: [PATCH 06/21] Rm multiplication using NAF

Comb method is at most 1% slower for random points,
and is way faster for fixed point (repeated).
---
 include/polarssl/ecp.h |  11 +-
 library/ecp.c          | 291 +----------------------------------------
 2 files changed, 6 insertions(+), 296 deletions(-)

diff --git a/include/polarssl/ecp.h b/include/polarssl/ecp.h
index e46dd6365..3dfb311b1 100644
--- a/include/polarssl/ecp.h
+++ b/include/polarssl/ecp.h
@@ -476,14 +476,9 @@ int ecp_sub( const ecp_group *grp, ecp_point *R,
  *                  has very low overhead, it is recommended to always provide
  *                  a non-NULL f_rng parameter when using secret inputs.
  */
-// Temporary, WIP
-int ecp_mul_wnaf( ecp_group *grp, ecp_point *R,
-                  const mpi *m, const ecp_point *P,
-                  int (*f_rng)(void *, unsigned char *, size_t), void *p_rng );
-int ecp_mul_comb( ecp_group *grp, ecp_point *R,
-                  const mpi *m, const ecp_point *P,
-                  int (*f_rng)(void *, unsigned char *, size_t), void *p_rng );
-#define ecp_mul ecp_mul_comb
+int ecp_mul( ecp_group *grp, ecp_point *R,
+             const mpi *m, const ecp_point *P,
+             int (*f_rng)(void *, unsigned char *, size_t), void *p_rng );
 
 /**
  * \brief           Check that a point is a valid public key on this curve
diff --git a/library/ecp.c b/library/ecp.c
index 0cefd0a2c..2dd95bbff 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1190,105 +1190,6 @@ cleanup:
     return( ret );
 }
 
-/*
- * Compute a modified width-w non-adjacent form (NAF) of a number,
- * with a fixed pattern for resistance to simple timing attacks (even SPA),
- * see [1]. (The resulting multiplication algorithm can also been seen as a
- * modification of 2^w-ary multiplication, with signed coefficients, all of
- * them odd.)
- *
- * Input:
- * m must be an odd positive mpi less than w * k bits long
- * x must be an array of k elements
- * w must be less than a certain maximum (currently 8)
- *
- * The result is a sequence x[0], ..., x[k-1] with x[i] in the range
- * - 2^(width - 1) .. 2^(width - 1) - 1 such that
- * m = (2 * x[0] + 1) + 2^width * (2 * x[1] + 1) + ...
- *     + 2^((k-1) * width) * (2 * x[k-1] + 1)
- *
- * Compared to "Algorithm SPA-resistant Width-w NAF with Odd Scalar"
- * p. 335 of the cited reference, here we return only u, not d_w since
- * it is known that the other d_w[j] will be 0.  Moreover, the returned
- * string doesn't actually store u_i but x_i = u_i / 2 since it is known
- * that u_i is odd. Also, since we always select a positive value for d
- * mod 2^w, we don't need to check the sign of u[i-1] when the reference
- * does. Finally, there is an off-by-one error in the reference: the
- * last index should be k-1, not k.
- */
-static int ecp_w_naf_fixed( signed char x[], size_t k,
-                            unsigned char w, const mpi *m )
-{
-    int ret;
-    unsigned int i, u, mask, carry;
-    mpi M;
-
-    mpi_init( &M );
-
-    MPI_CHK( mpi_copy( &M, m ) );
-    mask = ( 1 << w ) - 1;
-    carry = 1 << ( w - 1 );
-
-    for( i = 0; i < k; i++ )
-    {
-        u = M.p[0] & mask;
-
-        if( ( u & 1 ) == 0 && i > 0 )
-            x[i - 1] -= carry;
-
-        x[i] = u >> 1;
-        mpi_shift_r( &M, w );
-    }
-
-    /*
-     * We should have consumed all bits, unless the input value was too big
-     */
-    if( mpi_cmp_int( &M, 0 ) != 0 )
-        ret = POLARSSL_ERR_ECP_BAD_INPUT_DATA;
-
-cleanup:
-
-    mpi_free( &M );
-
-    return( ret );
-}
-
-/*
- * Precompute odd multiples of P up to (2 * t_len - 1) P.
- * The table is filled with T[i] = (2 * i + 1) P.
- */
-static int ecp_precompute( const ecp_group *grp,
-                           ecp_point T[], size_t t_len,
-                           const ecp_point *P )
-{
-    int ret;
-    size_t i;
-    ecp_point PP;
-    ecp_point *TT[ 1 << ( POLARSSL_ECP_WINDOW_SIZE - 1 ) ];
-
-    ecp_point_init( &PP );
-
-    MPI_CHK( ecp_add( grp, &PP, P, P ) );
-
-    MPI_CHK( ecp_copy( &T[0], P ) );
-
-    for( i = 1; i < t_len; i++ )
-        MPI_CHK( ecp_add_mixed( grp, &T[i], &T[i-1], &PP, +1 ) );
-
-    /*
-     * T[0] = P already has normalized coordinates, normalize others
-     */
-    for( i = 1; i < t_len; i++ )
-        TT[i-1] = &T[i];
-    MPI_CHK( ecp_normalize_many( grp, TT, t_len - 1 ) );
-
-cleanup:
-
-    ecp_point_free( &PP );
-
-    return( ret );
-}
-
 /*
  * Randomize jacobian coordinates:
  * (X, Y, Z) -> (l^2 X, l^3 Y, l Z) for random l
@@ -1334,192 +1235,6 @@ cleanup:
     return( ret );
 }
 
-/*
- * Maximum length of the precomputed table
- */
-#define MAX_PRE_LEN     ( 1 << (POLARSSL_ECP_WINDOW_SIZE - 1) )
-
-/*
- * Maximum length of the NAF: ceil( grp->nbits + 1 ) / w
- * (that is: grp->nbits / w + 1)
- * Allow p_bits + 1 bits in case M = grp->N + 1 is one bit longer than N.
- */
-#define MAX_NAF_LEN     ( POLARSSL_ECP_MAX_BITS / 2 + 1 )
-
-/*
- * Integer multiplication: R = m * P
- *
- * Based on fixed-pattern width-w NAF, see comments of ecp_w_naf_fixed().
- *
- * This function executes a fixed number of operations for
- * random m in the range 0 .. 2^nbits - 1.
- *
- * As an additional countermeasure against potential timing attacks,
- * we randomize coordinates before each addition. This was suggested as a
- * countermeasure against DPA in 5.3 of [2] (with the obvious adaptation that
- * we use jacobian coordinates, not standard projective coordinates).
- */
-int ecp_mul_wnaf( ecp_group *grp, ecp_point *R,
-                  const mpi *m, const ecp_point *P,
-                  int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
-{
-    int ret;
-    unsigned char w, m_is_odd, p_eq_g;
-    size_t pre_len = 1, naf_len, i, j;
-    signed char naf[ MAX_NAF_LEN ];
-    ecp_point Q, *T = NULL, S[2];
-    mpi M;
-
-    if( mpi_cmp_int( m, 0 ) < 0 || mpi_msb( m ) > grp->nbits )
-        return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
-
-    mpi_init( &M );
-    ecp_point_init( &Q );
-    ecp_point_init( &S[0] );
-    ecp_point_init( &S[1] );
-
-    /*
-     * Check if P == G
-     */
-    p_eq_g = ( mpi_cmp_int( &P->Z, 1 ) == 0 &&
-               mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
-               mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
-
-    /*
-     * If P == G, pre-compute a lot of points: this will be re-used later,
-     * otherwise, choose window size depending on curve size
-     */
-    if( p_eq_g )
-        w = POLARSSL_ECP_WINDOW_SIZE;
-    else
-        w = grp->nbits >= 512 ? 6 :
-            grp->nbits >= 224 ? 5 :
-                                4;
-
-    /*
-     * Make sure w is within the limits.
-     * The last test ensures that none of the precomputed points is zero,
-     * which wouldn't be handled correctly by ecp_normalize_many().
-     * It is only useful for very small curves as used in the test suite.
-     */
-    if( w > POLARSSL_ECP_WINDOW_SIZE )
-        w = POLARSSL_ECP_WINDOW_SIZE;
-    if( w < 2 || w >= grp->nbits )
-        w = 2;
-
-    pre_len <<= ( w - 1 );
-    naf_len = grp->nbits / w + 1;
-
-    /*
-     * Prepare precomputed points: if P == G we want to
-     * use grp->T if already initialized, or initiliaze it.
-     */
-    if( ! p_eq_g || grp->T == NULL )
-    {
-        T = (ecp_point *) polarssl_malloc( pre_len * sizeof( ecp_point ) );
-        if( T == NULL )
-        {
-            ret = POLARSSL_ERR_ECP_MALLOC_FAILED;
-            goto cleanup;
-        }
-
-        for( i = 0; i < pre_len; i++ )
-            ecp_point_init( &T[i] );
-
-        MPI_CHK( ecp_precompute( grp, T, pre_len, P ) );
-
-        if( p_eq_g )
-        {
-            grp->T = T;
-            grp->T_size = pre_len;
-        }
-    }
-    else
-    {
-        T = grp->T;
-
-        /* Should never happen, but we want to be extra sure */
-        if( pre_len != grp->T_size )
-        {
-            ret = POLARSSL_ERR_ECP_BAD_INPUT_DATA;
-            goto cleanup;
-        }
-    }
-
-    /*
-     * Make sure M is odd (M = m + 1 or M = m + 2)
-     * later we'll get m * P by subtracting P or 2 * P to M * P.
-     */
-    m_is_odd = ( mpi_get_bit( m, 0 ) == 1 );
-
-    MPI_CHK( mpi_copy( &M, m ) );
-    MPI_CHK( mpi_add_int( &M, &M, 1 + m_is_odd ) );
-
-    /*
-     * Compute the fixed-pattern NAF of M
-     */
-    MPI_CHK( ecp_w_naf_fixed( naf, naf_len, w, &M ) );
-
-    /*
-     * Compute M * P, using a variant of left-to-right 2^w-ary multiplication:
-     * at each step we add (2 * naf[i] + 1) P, then multiply by 2^w.
-     *
-     * If naf[i] >= 0, we have (2 * naf[i] + 1) P == T[ naf[i] ]
-     * Otherwise, (2 * naf[i] + 1) P == - ( 2 * ( - naf[i] - 1 ) + 1) P
-     *                               == T[ - naf[i] - 1 ]
-     */
-    MPI_CHK( ecp_set_zero( &Q ) );
-    i = naf_len - 1;
-    while( 1 )
-    {
-        /* Countermeasure (see comments above) */
-        if( f_rng != NULL )
-            ecp_randomize_coordinates( grp, &Q, f_rng, p_rng );
-
-        if( naf[i] < 0 )
-        {
-            MPI_CHK( ecp_add_mixed( grp, &Q, &Q, &T[ - naf[i] - 1 ], -1 ) );
-        }
-        else
-        {
-            MPI_CHK( ecp_add_mixed( grp, &Q, &Q, &T[ naf[i] ], +1 ) );
-        }
-
-        if( i == 0 )
-            break;
-        i--;
-
-        for( j = 0; j < w; j++ )
-        {
-            MPI_CHK( ecp_double_jac( grp, &Q, &Q ) );
-        }
-    }
-
-    /*
-     * Now get m * P from M * P
-     */
-    MPI_CHK( ecp_copy( &S[0], P ) );
-    MPI_CHK( ecp_add( grp, &S[1], P, P ) );
-    MPI_CHK( ecp_sub( grp, R, &Q, &S[m_is_odd] ) );
-
-
-cleanup:
-
-    if( T != NULL && ! p_eq_g )
-    {
-        for( i = 0; i < pre_len; i++ )
-            ecp_point_free( &T[i] );
-        polarssl_free( T );
-    }
-
-    ecp_point_free( &S[1] );
-    ecp_point_free( &S[0] );
-    ecp_point_free( &Q );
-    mpi_free( &M );
-
-    return( ret );
-}
-
 /*
  * Check and define parameters used by the comb method (see below for details)
  */
@@ -1714,9 +1429,9 @@ cleanup:
 /*
  * Multiplication using the comb method
  */
-int ecp_mul_comb( ecp_group *grp, ecp_point *R,
-                  const mpi *m, const ecp_point *P,
-                  int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
+int ecp_mul( ecp_group *grp, ecp_point *R,
+             const mpi *m, const ecp_point *P,
+             int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
 {
     int ret;
     unsigned char w, m_is_odd, p_eq_g;

From ff27b7c9680b9513d2ba2df6c0bc16d2f6f121aa Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 09:28:03 +0100
Subject: [PATCH 07/21] Tighten ecp_mul() validity checks

---
 include/polarssl/ecp.h               | 15 ++++++--------
 library/ecp.c                        | 30 +++++++++++++++-------------
 tests/suites/test_suite_ecp.data     | 14 ++++---------
 tests/suites/test_suite_ecp.function | 26 ++++++++++++++----------
 4 files changed, 42 insertions(+), 43 deletions(-)

diff --git a/include/polarssl/ecp.h b/include/polarssl/ecp.h
index 3dfb311b1..81b789edb 100644
--- a/include/polarssl/ecp.h
+++ b/include/polarssl/ecp.h
@@ -459,22 +459,19 @@ int ecp_sub( const ecp_group *grp, ecp_point *R,
  * \param p_rng     RNG parameter
  *
  * \return          0 if successful,
+ *                  POLARSSL_ERR_ECP_INVALID_KEY if m is not a valid privkey
+ *                  or P is not a valid pubkey,
  *                  POLARSSL_ERR_MPI_MALLOC_FAILED if memory allocation failed
- *                  POLARSSL_ERR_ECP_BAD_INPUT_DATA if m < 0 of m has greater
- *                  bit length than N, the number of points in the group.
  *
  * \note            In order to prevent simple timing attacks, this function
  *                  executes a constant number of operations (that is, point
  *                  doubling and addition of distinct points) for random m in
  *                  the allowed range.
  *
- * \note            If f_rng is not NULL, it is used to randomize projective
- *                  coordinates of indermediate results, in order to prevent
- *                  more elaborate timing attacks relying on intermediate
- *                  operations. (This is a prophylactic measure since no such
- *                  attack has been published yet.) Since this contermeasure
- *                  has very low overhead, it is recommended to always provide
- *                  a non-NULL f_rng parameter when using secret inputs.
+ * \note            If f_rng is not NULL, it is used to randomize intermediate
+ *                  results in order to prevent potential attacks targetting
+ *                  these results. It is recommended to always provide a
+ *                  non-NULL f_rng (the overhead is negligible).
  */
 int ecp_mul( ecp_group *grp, ecp_point *R,
              const mpi *m, const ecp_point *P,
diff --git a/library/ecp.c b/library/ecp.c
index 2dd95bbff..91f08207d 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1440,21 +1440,24 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
     ecp_point Q, *T = NULL, S[2];
     mpi M;
 
-    if( mpi_cmp_int( m, 0 ) < 0 || mpi_msb( m ) > grp->nbits )
-        return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
+    /*
+     * Sanity checks (before we even initialize anything)
+     */
+    if( ( ret = ecp_check_privkey( grp, m ) ) != 0 )
+        return( ret );
+
+    /* We'll need this later, but do it now to possibly avoid cheking P */
+    p_eq_g = ( mpi_cmp_int( &P->Z, 1 ) == 0 &&
+               mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
+               mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
+    if( ! p_eq_g && ( ret = ecp_check_pubkey( grp, P ) ) != 0 )
+        return( ret );
 
     mpi_init( &M );
     ecp_point_init( &Q );
     ecp_point_init( &S[0] );
     ecp_point_init( &S[1] );
 
-    /*
-     * Check if P == G
-     */
-    p_eq_g = ( mpi_cmp_int( &P->Z, 1 ) == 0 &&
-               mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
-               mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
-
     /*
      * Minimize the number of multiplications, that is minimize
      * 10 * d * w + 18 * 2^(w-1) + 11 * d + 7 * w
@@ -2061,13 +2064,12 @@ int ecp_self_test( int verbose )
     /* exponents especially adapted for secp192r1 */
     const char *exponents[] =
     {
-        "000000000000000000000000000000000000000000000000", /* zero */
         "000000000000000000000000000000000000000000000001", /* one */
-        "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22831", /* N */
+        "FFFFFFFFFFFFFFFFFFFFFFFF99DEF836146BC9B1B4D22830", /* N - 1 */
         "5EA6F389A38B8BC81E767753B15AA5569E1782E30ABE7D25", /* random */
-        "400000000000000000000000000000000000000000000000",
-        "7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
-        "555555555555555555555555555555555555555555555555",
+        "400000000000000000000000000000000000000000000000", /* one and zeros */
+        "7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF", /* all ones */
+        "555555555555555555555555555555555555555555555555", /* 101010... */
     };
 
     ecp_group_init( &grp );
diff --git a/tests/suites/test_suite_ecp.data b/tests/suites/test_suite_ecp.data
index 2f5f4efc7..8dafc39bd 100644
--- a/tests/suites/test_suite_ecp.data
+++ b/tests/suites/test_suite_ecp.data
@@ -50,10 +50,10 @@ ECP small subtraction #9
 ecp_small_sub:0:"14":"11":0:"14":"36":0:27:30
 
 ECP small multiplication negative
-ecp_small_mul:-1:0:0:0:POLARSSL_ERR_ECP_BAD_INPUT_DATA
+ecp_small_mul:-1:0:0:0:POLARSSL_ERR_ECP_INVALID_KEY
 
 ECP small multiplication #0
-ecp_small_mul:0:1:0:0:0
+ecp_small_mul:0:1:0:0:POLARSSL_ERR_ECP_INVALID_KEY
 
 ECP small multiplication #1
 ecp_small_mul:1:0:17:42:0
@@ -92,16 +92,10 @@ ECP small multiplication #12
 ecp_small_mul:12:0:17:05:0
 
 ECP small multiplication #13
-ecp_small_mul:13:1:0:0:0
+ecp_small_mul:13:1:0:0:POLARSSL_ERR_ECP_INVALID_KEY
 
 ECP small multiplication #14
-ecp_small_mul:1:0:17:42:0
-
-ECP small multiplication #15
-ecp_small_mul:2:0:20:01:0
-
-ECP small multiplication too big
-ecp_small_mul:-1:0:0:0:POLARSSL_ERR_ECP_BAD_INPUT_DATA
+ecp_small_mul:14:0:17:42:POLARSSL_ERR_ECP_INVALID_KEY
 
 ECP small check pubkey #1
 ecp_small_check_pub:1:1:0:POLARSSL_ERR_ECP_INVALID_KEY
diff --git a/tests/suites/test_suite_ecp.function b/tests/suites/test_suite_ecp.function
index 4eb52596c..8cc5abac3 100644
--- a/tests/suites/test_suite_ecp.function
+++ b/tests/suites/test_suite_ecp.function
@@ -115,12 +115,15 @@ void ecp_small_mul( int m_str, int r_zero, int x_r, int y_r, int ret )
 
     TEST_ASSERT( ecp_mul( &grp, &R, &m, &grp.G, NULL, NULL ) == ret );
 
-    if( r_zero )
-        TEST_ASSERT( mpi_cmp_int( &R.Z, 0 ) == 0 );
-    else
+    if( ret == 0 )
     {
-        TEST_ASSERT( mpi_cmp_int( &R.X, x_r ) == 0 );
-        TEST_ASSERT( mpi_cmp_int( &R.Y, y_r ) == 0 );
+        if( r_zero )
+            TEST_ASSERT( mpi_cmp_int( &R.Z, 0 ) == 0 );
+        else
+        {
+            TEST_ASSERT( mpi_cmp_int( &R.X, x_r ) == 0 );
+            TEST_ASSERT( mpi_cmp_int( &R.Y, y_r ) == 0 );
+        }
     }
 
     /* try again with randomization */
@@ -129,12 +132,15 @@ void ecp_small_mul( int m_str, int r_zero, int x_r, int y_r, int ret )
     TEST_ASSERT( ecp_mul( &grp, &R, &m, &grp.G,
                           &rnd_pseudo_rand, &rnd_info ) == ret );
 
-    if( r_zero )
-        TEST_ASSERT( mpi_cmp_int( &R.Z, 0 ) == 0 );
-    else
+    if( ret == 0 )
     {
-        TEST_ASSERT( mpi_cmp_int( &R.X, x_r ) == 0 );
-        TEST_ASSERT( mpi_cmp_int( &R.Y, y_r ) == 0 );
+        if( r_zero )
+            TEST_ASSERT( mpi_cmp_int( &R.Z, 0 ) == 0 );
+        else
+        {
+            TEST_ASSERT( mpi_cmp_int( &R.X, x_r ) == 0 );
+            TEST_ASSERT( mpi_cmp_int( &R.Y, y_r ) == 0 );
+        }
     }
 
     ecp_group_free( &grp );

From edc1a1f482895ab61032c7c9fbcd718a184acc40 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 09:50:00 +0100
Subject: [PATCH 08/21] Small code cleanups

---
 library/ecp.c | 41 ++++++++++++-----------------------------
 1 file changed, 12 insertions(+), 29 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 91f08207d..2ff882093 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1276,35 +1276,26 @@ static void ecp_comb_fixed( unsigned char x[], size_t d,
 
     memset( x, 0, d+1 );
 
-    /* For x[0] use the classical comb value without adjustement */
-    for( j = 0; j < w; j++ )
-        x[0] |= mpi_get_bit( m, d * j ) << j;
-    c = 0;
-
-    for( i = 1; i < d; i++ )
-    {
-        /* Get the classical comb value */
+    /* First get the classical comb values (except for x_d = 0) */
+    for( i = 0; i < d; i++ )
         for( j = 0; j < w; j++ )
             x[i] |= mpi_get_bit( m, i + d * j ) << j;
 
+    /* Now make sure x_1 .. x_d are odd */
+    c = 0;
+    for( i = 1; i <= d; i++ )
+    {
         /* Add carry and update it */
         cc   = x[i] & c;
         x[i] = x[i] ^ c;
         c = cc;
 
-        /* Make sure x[i] is odd, avoiding if-branches */
+        /* Adjust if needed, avoiding branches */
         adjust = 1 - ( x[i] & 0x01 );
         c   |= x[i] & ( x[i-1] * adjust );
         x[i] = x[i] ^ ( x[i-1] * adjust );
         x[i-1] |= adjust << 7;
     }
-
-    /* Finish with the carry */
-    x[i] = c;
-    adjust = 1 - ( x[i] & 0x01 );
-    c   |= x[i] & ( x[i-1] * adjust );
-    x[i] = x[i] ^ ( x[i-1] * adjust );
-    x[i-1] |= adjust << 7;
 }
 
 /*
@@ -1487,9 +1478,12 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
 
     /*
      * Prepare precomputed points: if P == G we want to
-     * use grp->T if already initialized, or initiliaze it.
+     * use grp->T if already initialized, or initialize it.
      */
-    if( ! p_eq_g || grp->T == NULL )
+    if( p_eq_g )
+        T = grp->T;
+
+    if( T == NULL )
     {
         T = (ecp_point *) polarssl_malloc( pre_len * sizeof( ecp_point ) );
         if( T == NULL )
@@ -1509,17 +1503,6 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
             grp->T_size = pre_len;
         }
     }
-    else
-    {
-        T = grp->T;
-
-        /* Should never happen, but we want to be extra sure */
-        if( pre_len != grp->T_size )
-        {
-            ret = POLARSSL_ERR_ECP_BAD_INPUT_DATA;
-            goto cleanup;
-        }
-    }
 
     /*
      * Make sure M is odd (M = m + 1 or M = m + 2)

From e282012219d7b348e5ce5e06ed322203d074d223 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 10:08:50 +0100
Subject: [PATCH 09/21] Spare some memory

---
 library/ecp.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/library/ecp.c b/library/ecp.c
index 2ff882093..d7a4567ea 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1353,6 +1353,14 @@ static int ecp_precompute_comb( const ecp_group *grp,
 
     ecp_normalize_many( grp, TT, k );
 
+    /*
+     * Post-precessing: reclaim some memory by not storing Z (always 1)
+     */
+    for( i = 0; i < ( 1U << (w-1) ); i++ )
+    {
+        mpi_free( &T[i].Z );
+    }
+
 cleanup:
     return( ret );
 }
@@ -1368,6 +1376,9 @@ static int ecp_select_comb( const ecp_group *grp, ecp_point *R,
     /* Ignore the "sign" bit */
     MPI_CHK( ecp_copy( R, &T[ ( i & 0x7Fu ) >> 1 ] ) );
 
+    /* Restore the Z coordinate */
+    MPI_CHK( mpi_lset( &R->Z, 1 ) );
+
     /*
      * -R = (R.X, -R.Y, R.Z), and
      * -R.Y mod P = P - R.Y unless R.Y == 0

From 5868163e0724bebbdbfd61a723009e6a30dc2d33 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 10:39:37 +0100
Subject: [PATCH 10/21] Add mpi_shrink()

---
 include/polarssl/bignum.h            | 11 ++++++++
 library/bignum.c                     | 39 ++++++++++++++++++++++++++++
 tests/suites/test_suite_mpi.data     | 24 +++++++++++++++++
 tests/suites/test_suite_mpi.function | 16 ++++++++++++
 4 files changed, 90 insertions(+)

diff --git a/include/polarssl/bignum.h b/include/polarssl/bignum.h
index 9bed027d9..1052c5aa0 100644
--- a/include/polarssl/bignum.h
+++ b/include/polarssl/bignum.h
@@ -201,6 +201,17 @@ void mpi_free( mpi *X );
  */
 int mpi_grow( mpi *X, size_t nblimbs );
 
+/**
+ * \brief          Resize down, keeping at least the specified number of limbs
+ *
+ * \param X        MPI to shrink
+ * \param nblimbs  The minimum number of limbs to keep
+ *
+ * \return         0 if successful,
+ *                 POLARSSL_ERR_MPI_MALLOC_FAILED if memory allocation failed
+ */
+int mpi_shrink( mpi *X, size_t nblimbs );
+
 /**
  * \brief          Copy the contents of Y into X
  *
diff --git a/library/bignum.c b/library/bignum.c
index 2a97a5902..55f7463a9 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -119,6 +119,45 @@ int mpi_grow( mpi *X, size_t nblimbs )
     return( 0 );
 }
 
+/*
+ * Resize down as much as possible,
+ * while keeping at least the specified number of limbs
+ */
+int mpi_shrink( mpi *X, size_t nblimbs )
+{
+    t_uint *p;
+    size_t i;
+
+    /* Actually resize up in this case */
+    if( X->n <= nblimbs )
+        return( mpi_grow( X, nblimbs ) );
+
+    for( i = X->n - 1; i > 0; i-- )
+        if( X->p[i] != 0 )
+            break;
+    i++;
+
+    if( i < nblimbs )
+        i = nblimbs;
+
+    if( ( p = (t_uint *) polarssl_malloc( i * ciL ) ) == NULL )
+        return( POLARSSL_ERR_MPI_MALLOC_FAILED );
+
+    memset( p, 0, i * ciL );
+
+    if( X->p != NULL )
+    {
+        memcpy( p, X->p, i * ciL );
+        memset( X->p, 0, X->n * ciL );
+        polarssl_free( X->p );
+    }
+
+    X->n = i;
+    X->p = p;
+
+    return( 0 );
+}
+
 /*
  * Copy the contents of Y into X
  */
diff --git a/tests/suites/test_suite_mpi.data b/tests/suites/test_suite_mpi.data
index 924f364d9..e763374ae 100644
--- a/tests/suites/test_suite_mpi.data
+++ b/tests/suites/test_suite_mpi.data
@@ -181,6 +181,30 @@ mpi_copy_self:14
 Base test mpi_swap #1
 mpi_swap:0:1500
 
+Test mpi_shrink #1
+mpi_shrink:2:2:4:4
+
+Test mpi_shrink #2
+mpi_shrink:4:2:4:4
+
+Test mpi_shrink #3
+mpi_shrink:8:2:4:4
+
+Test mpi_shrink #4
+mpi_shrink:8:4:4:4
+
+Test mpi_shrink #5
+mpi_shrink:8:6:4:6
+
+Test mpi_shrink #6
+mpi_shrink:4:2:0:2
+
+Test mpi_shrink #7
+mpi_shrink:4:1:0:1
+
+Test mpi_shrink #8
+mpi_shrink:4:0:0:1
+
 Base test mpi_add_abs #1
 mpi_add_abs:10:"12345678":10:"642531":10:"12988209"
 
diff --git a/tests/suites/test_suite_mpi.function b/tests/suites/test_suite_mpi.function
index e08b48d09..d3a0d4898 100644
--- a/tests/suites/test_suite_mpi.function
+++ b/tests/suites/test_suite_mpi.function
@@ -292,6 +292,22 @@ void mpi_copy_self( int input_X )
 }
 /* END_CASE */
 
+/* BEGIN_CASE */
+void mpi_shrink( int before, int used, int min, int after )
+{
+    mpi X;
+    mpi_init( &X );
+
+    TEST_ASSERT( mpi_grow( &X, before ) == 0 );
+    TEST_ASSERT( used <= before );
+    memset( X.p, 0x2a, used * sizeof( t_uint ) );
+    TEST_ASSERT( mpi_shrink( &X, min ) == 0 );
+    TEST_ASSERT( X.n == (size_t) after );
+
+    mpi_free( &X );
+}
+/* END_CASE */
+
 /* BEGIN_CASE */
 void mpi_swap( int input_X,  int input_Y )
 {

From 7f762319ad5a604e817c8ff8956211ac664f9e2b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 10:47:41 +0100
Subject: [PATCH 11/21] Use mpi_shrink() in ecp_precompute()

---
 library/ecp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/library/ecp.c b/library/ecp.c
index d7a4567ea..1ab1c52a1 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1354,11 +1354,17 @@ static int ecp_precompute_comb( const ecp_group *grp,
     ecp_normalize_many( grp, TT, k );
 
     /*
-     * Post-precessing: reclaim some memory by not storing Z (always 1)
+     * Post-precessing: reclaim some memory by
+     * - not storing Z (always 1)
+     * - shrinking other coordinates
+     * However keep the same number of limbs as P, which will be useful in
+     * ecp_select_comb()
      */
     for( i = 0; i < ( 1U << (w-1) ); i++ )
     {
         mpi_free( &T[i].Z );
+        mpi_shrink( &T[i].X, grp->P.n );
+        mpi_shrink( &T[i].Y, grp->P.n );
     }
 
 cleanup:

From 44aab79022cfb2ea4c8473100e33f3da651cc03d Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 10:53:59 +0100
Subject: [PATCH 12/21] Update bibliographic references

---
 library/ecp.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 1ab1c52a1..f82624262 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -31,12 +31,6 @@
  * FIPS 186-3 http://csrc.nist.gov/publications/fips/fips186-3/fips_186-3.pdf
  * RFC 4492 for the related TLS structures and constants
  *
- * [1] OKEYA, Katsuyuki and TAKAGI, Tsuyoshi. The width-w NAF method provides
- *     small memory and fast elliptic scalar multiplications secure against
- *     side channel attacks. In : Topics in Cryptology—CT-RSA 2003. Springer
- *     Berlin Heidelberg, 2003. p. 328-343.
- *     <http://rd.springer.com/chapter/10.1007/3-540-36563-X_23>.
- *
  * [2] CORON, Jean-Sébastien. Resistance against differential power analysis
  *     for elliptic curve cryptosystems. In : Cryptographic Hardware and
  *     Embedded Systems. Springer Berlin Heidelberg, 1999. p. 292-302.
@@ -1194,6 +1188,8 @@ cleanup:
  * Randomize jacobian coordinates:
  * (X, Y, Z) -> (l^2 X, l^3 Y, l Z) for random l
  * This is sort of the reverse operation of ecp_normalize().
+ *
+ * This countermeasure was first suggested in [2].
  */
 static int ecp_randomize_coordinates( const ecp_group *grp, ecp_point *pt,
                 int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )

From 71c2c2160182495d019369432d5daa94a801c7c6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 16:56:39 +0100
Subject: [PATCH 13/21] Add mpi_safe_cond_assign()

---
 include/polarssl/bignum.h            | 19 +++++++++++++++++++
 library/bignum.c                     | 25 +++++++++++++++++++++++++
 tests/suites/test_suite_mpi.data     |  9 +++++++++
 tests/suites/test_suite_mpi.function | 20 ++++++++++++++++++++
 4 files changed, 73 insertions(+)

diff --git a/include/polarssl/bignum.h b/include/polarssl/bignum.h
index 1052c5aa0..6e2afac72 100644
--- a/include/polarssl/bignum.h
+++ b/include/polarssl/bignum.h
@@ -231,6 +231,25 @@ int mpi_copy( mpi *X, const mpi *Y );
  */
 void mpi_swap( mpi *X, mpi *Y );
 
+/**
+ * \brief          Safe conditional assignement X = Y if assign is 1
+ *
+ * \param X        MPI to conditionally assign to
+ * \param Y        Value to be assigned
+ * \param assign   1: perform the assignment, 0: leave X untouched
+ *
+ * \return         0 if successful,
+ *                 POLARSSL_ERR_MPI_MALLOC_FAILED if memory allocation failed,
+ *                 POLARSSL_ERR_MPI_BAD_INPUT_DATA if assing is not 0 or 1
+ *
+ * \note           This function is equivalent to
+ *                      if( assign ) mpi_copy( X, Y );
+ *                 except that it avoids leaking any information about whether
+ *                 the assignment was done or not (the above code may leak
+ *                 information through branch prediction analysis).
+ */
+int mpi_safe_cond_assign( mpi *X, mpi *Y, unsigned char assign );
+
 /**
  * \brief          Set value from integer
  *
diff --git a/library/bignum.c b/library/bignum.c
index 55f7463a9..663d9240e 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -204,6 +204,31 @@ void mpi_swap( mpi *X, mpi *Y )
     memcpy(  Y, &T, sizeof( mpi ) );
 }
 
+/*
+ * Conditionally assign X = Y, without leaking information
+ */
+int mpi_safe_cond_assign( mpi *X, mpi *Y, unsigned char assign )
+{
+    int ret = 0;
+    size_t i;
+
+    if( assign * ( 1 - assign ) != 0 )
+        return( POLARSSL_ERR_MPI_BAD_INPUT_DATA );
+
+    /* Make sure both MPIs have the same size */
+    if( X->n > Y->n )
+        MPI_CHK( mpi_grow( Y, X->n ) );
+    if( Y->n > X->n )
+        MPI_CHK( mpi_grow( X, Y->n ) );
+
+    /* Do the conditional assign safely */
+    for( i = 0; i < X->n; i++ )
+        X->p[i] = X->p[i] * (1 - assign) + Y->p[i] * assign;
+
+cleanup:
+    return( ret );
+}
+
 /*
  * Set value from integer
  */
diff --git a/tests/suites/test_suite_mpi.data b/tests/suites/test_suite_mpi.data
index e763374ae..8755fa29a 100644
--- a/tests/suites/test_suite_mpi.data
+++ b/tests/suites/test_suite_mpi.data
@@ -205,6 +205,15 @@ mpi_shrink:4:1:0:1
 Test mpi_shrink #8
 mpi_shrink:4:0:0:1
 
+Test mpi_safe_cond_assign #1
+mpi_safe_cond_assign:"01":"02"
+
+Test mpi_safe_cond_assign #2
+mpi_safe_cond_assign:"FF000000000000000001":"02"
+
+Test mpi_safe_cond_assign #3
+mpi_safe_cond_assign:"01":"FF000000000000000002"
+
 Base test mpi_add_abs #1
 mpi_add_abs:10:"12345678":10:"642531":10:"12988209"
 
diff --git a/tests/suites/test_suite_mpi.function b/tests/suites/test_suite_mpi.function
index d3a0d4898..ee9b94a5b 100644
--- a/tests/suites/test_suite_mpi.function
+++ b/tests/suites/test_suite_mpi.function
@@ -308,6 +308,26 @@ void mpi_shrink( int before, int used, int min, int after )
 }
 /* END_CASE */
 
+/* BEGIN_CASE */
+void mpi_safe_cond_assign( char *x_str, char *y_str )
+{
+    mpi X, Y, XX;
+    mpi_init( &X ); mpi_init( &Y ); mpi_init( &XX );
+
+    TEST_ASSERT( mpi_read_string( &X, 16, x_str ) == 0 );
+    TEST_ASSERT( mpi_read_string( &Y, 16, y_str ) == 0 );
+    TEST_ASSERT( mpi_copy( &XX, &X ) == 0 );
+
+    TEST_ASSERT( mpi_safe_cond_assign( &X, &Y, 0 ) == 0 );
+    TEST_ASSERT( mpi_cmp_mpi( &X, &XX ) == 0 );
+
+    TEST_ASSERT( mpi_safe_cond_assign( &X, &Y, 1 ) == 0 );
+    TEST_ASSERT( mpi_cmp_mpi( &X, &Y ) == 0 );
+
+    mpi_free( &X ); mpi_free( &Y ); mpi_free( &XX );
+}
+/* END_CASE */
+
 /* BEGIN_CASE */
 void mpi_swap( int input_X,  int input_Y )
 {

From 01fca5e882b6e1945539b90b0dd8aacdc279bd2a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 17:47:12 +0100
Subject: [PATCH 14/21] Do point inversion without leaking information

---
 library/ecp.c | 40 +++++++++++++++++++++++++++++++---------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index f82624262..f0dd7d285 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -982,6 +982,31 @@ cleanup:
     return( ret );
 }
 
+/*
+ * Conditional point inversion: Q -> -Q = (Q.X, -Q.Y, Q.Z) without leak.
+ * "inv" must be 0 (don't invert) or 1 (invert) or the result will be invalid
+ */
+static int ecp_safe_invert( const ecp_group *grp,
+                            ecp_point *Q,
+                            unsigned char inv )
+{
+    int ret;
+    unsigned char nonzero;
+    mpi mQY;
+
+    mpi_init( &mQY );
+
+    /* Use the fact that -Q.Y mod P = P - Q.Y unless Q.Y == 0 */
+    MPI_CHK( mpi_sub_mpi( &mQY, &grp->P, &Q->Y ) );
+    nonzero = mpi_cmp_int( &Q->Y, 0 ) != 0;
+    MPI_CHK( mpi_safe_cond_assign( &Q->Y, &mQY, inv & nonzero ) );
+
+cleanup:
+    mpi_free( &mQY );
+
+    return( ret );
+}
+
 /*
  * Point doubling R = 2 P, Jacobian coordinates
  *
@@ -1068,6 +1093,7 @@ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
     /*
      * Trivial cases: P == 0 or Q == 0
      * (Check Q first, so that we know Q != 0 when we compute -Q.)
+     * This will never happen during ecp_mul() so we don't mind the branches.
      */
     if( mpi_cmp_int( &Q->Z, 0 ) == 0 )
         return( ecp_copy( R, P ) );
@@ -1103,6 +1129,7 @@ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
     /*
      * For subtraction, -Q.Y should have been used instead of Q.Y,
      * so we replace T2 by -T2, which is P - T2 mod P
+     * (Again, not used by ecp_mul(), so not worry about the branch.)
      */
     if( sign < 0 )
     {
@@ -1113,6 +1140,7 @@ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
     MPI_CHK( mpi_sub_mpi( &T1,  &T1,    &P->X ) );  MOD_SUB( T1 );
     MPI_CHK( mpi_sub_mpi( &T2,  &T2,    &P->Y ) );  MOD_SUB( T2 );
 
+    /* TODO: make sure it never happens during ecp_mul() */
     if( mpi_cmp_int( &T1, 0 ) == 0 )
     {
         if( mpi_cmp_int( &T2, 0 ) == 0 )
@@ -1353,8 +1381,7 @@ static int ecp_precompute_comb( const ecp_group *grp,
      * Post-precessing: reclaim some memory by
      * - not storing Z (always 1)
      * - shrinking other coordinates
-     * However keep the same number of limbs as P, which will be useful in
-     * ecp_select_comb()
+     * Keep the same number of limbs as P to avoid re-growing on next use.
      */
     for( i = 0; i < ( 1U << (w-1) ); i++ )
     {
@@ -1381,13 +1408,8 @@ static int ecp_select_comb( const ecp_group *grp, ecp_point *R,
     /* Restore the Z coordinate */
     MPI_CHK( mpi_lset( &R->Z, 1 ) );
 
-    /*
-     * -R = (R.X, -R.Y, R.Z), and
-     * -R.Y mod P = P - R.Y unless R.Y == 0
-     */
-    if( ( i & 0x80 ) != 0 )
-        if( mpi_cmp_int( &R->Y, 0 ) != 0 )
-            MPI_CHK( mpi_sub_mpi( &R->Y, &grp->P, &R->Y ) );
+    /* Safely invert result if i is "negative" */
+    MPI_CHK( ecp_safe_invert( grp, R, i >> 7 ) );
 
 cleanup:
     return( ret );

From 469a20933499499c1265a462ba3d529871ccbde3 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 18:20:43 +0100
Subject: [PATCH 15/21] Rm subtraction from ecp_add_mixed()

---
 library/ecp.c | 58 +++++++++++++++++----------------------------------
 1 file changed, 19 insertions(+), 39 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index f0dd7d285..39d0e8dbd 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1067,21 +1067,15 @@ cleanup:
 }
 
 /*
- * Addition or subtraction: R = P + Q or R = P - Q,
- * mixed affine-Jacobian coordinates (GECC 3.22)
+ * Addition: R = P + Q, mixed affine-Jacobian coordinates (GECC 3.22)
  *
  * The coordinates of Q must be normalized (= affine),
  * but those of P don't need to. R is not normalized.
  *
- * If sign >= 0, perform addition, otherwise perform subtraction,
- * taking advantage of the fact that, for Q != 0, we have
- * -Q = (Q.X, -Q.Y, Q.Z)
- *
  * Cost: 1A := 8M + 3S
  */
 static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
-                          const ecp_point *P, const ecp_point *Q,
-                          signed char sign )
+                          const ecp_point *P, const ecp_point *Q )
 {
     int ret;
     mpi T1, T2, T3, T4, X, Y, Z;
@@ -1092,26 +1086,14 @@ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
 
     /*
      * Trivial cases: P == 0 or Q == 0
-     * (Check Q first, so that we know Q != 0 when we compute -Q.)
      * This will never happen during ecp_mul() so we don't mind the branches.
      */
+    if( mpi_cmp_int( &P->Z, 0 ) == 0 )
+        return( ecp_copy( R, Q ) );
+
     if( mpi_cmp_int( &Q->Z, 0 ) == 0 )
         return( ecp_copy( R, P ) );
 
-    if( mpi_cmp_int( &P->Z, 0 ) == 0 )
-    {
-        ret = ecp_copy( R, Q );
-
-        /*
-         * -R.Y mod P = P - R.Y unless R.Y == 0
-         */
-        if( ret == 0 && sign < 0)
-            if( mpi_cmp_int( &R->Y, 0 ) != 0 )
-                ret = mpi_sub_mpi( &R->Y, &grp->P, &R->Y );
-
-        return( ret );
-    }
-
     /*
      * Make sure Q coordinates are normalized
      */
@@ -1125,18 +1107,6 @@ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
     MPI_CHK( mpi_mul_mpi( &T2,  &T1,    &P->Z ) );  MOD_MUL( T2 );
     MPI_CHK( mpi_mul_mpi( &T1,  &T1,    &Q->X ) );  MOD_MUL( T1 );
     MPI_CHK( mpi_mul_mpi( &T2,  &T2,    &Q->Y ) );  MOD_MUL( T2 );
-
-    /*
-     * For subtraction, -Q.Y should have been used instead of Q.Y,
-     * so we replace T2 by -T2, which is P - T2 mod P
-     * (Again, not used by ecp_mul(), so not worry about the branch.)
-     */
-    if( sign < 0 )
-    {
-        MPI_CHK( mpi_sub_mpi( &T2, &grp->P, &T2 ) );
-        MOD_SUB( T2 );
-    }
-
     MPI_CHK( mpi_sub_mpi( &T1,  &T1,    &P->X ) );  MOD_SUB( T1 );
     MPI_CHK( mpi_sub_mpi( &T2,  &T2,    &P->Y ) );  MOD_SUB( T2 );
 
@@ -1189,7 +1159,7 @@ int ecp_add( const ecp_group *grp, ecp_point *R,
 {
     int ret;
 
-    MPI_CHK( ecp_add_mixed( grp, R, P, Q , 1 ) );
+    MPI_CHK( ecp_add_mixed( grp, R, P, Q ) );
     MPI_CHK( ecp_normalize( grp, R ) );
 
 cleanup:
@@ -1204,11 +1174,21 @@ int ecp_sub( const ecp_group *grp, ecp_point *R,
              const ecp_point *P, const ecp_point *Q )
 {
     int ret;
+    ecp_point mQ;
 
-    MPI_CHK( ecp_add_mixed( grp, R, P, Q, -1 ) );
+    ecp_point_init( &mQ );
+
+    /* mQ = - Q */
+    ecp_copy( &mQ, Q );
+    if( mpi_cmp_int( &mQ.Y, 0 ) != 0 )
+        MPI_CHK( mpi_sub_mpi( &mQ.Y, &grp->P, &mQ.Y ) );
+
+    MPI_CHK( ecp_add_mixed( grp, R, P, &mQ ) );
     MPI_CHK( ecp_normalize( grp, R ) );
 
 cleanup:
+    ecp_point_free( &mQ );
+
     return( ret );
 }
 
@@ -1370,7 +1350,7 @@ static int ecp_precompute_comb( const ecp_group *grp,
         j = i;
         while( j-- )
         {
-            ecp_add_mixed( grp, &T[i + j], &T[j], &T[i], +1 );
+            ecp_add_mixed( grp, &T[i + j], &T[j], &T[i] );
             TT[k++] = &T[i + j];
         }
     }
@@ -1443,7 +1423,7 @@ static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
     {
         MPI_CHK( ecp_double_jac( grp, R, R ) );
         MPI_CHK( ecp_select_comb( grp, &Txi, T, x[i] ) );
-        MPI_CHK( ecp_add_mixed( grp, R, R, &Txi, +1 ) );
+        MPI_CHK( ecp_add_mixed( grp, R, R, &Txi ) );
     }
 
 cleanup:

From 36daa13d76a4367a96fc1d6672e046e48257148c Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 18:33:36 +0100
Subject: [PATCH 16/21] Misc details

---
 library/ecp.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 39d0e8dbd..3d2c6e2cf 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1449,13 +1449,16 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
     /*
      * Sanity checks (before we even initialize anything)
      */
+    if( mpi_cmp_int( &P->Z, 1 ) != 0 )
+        return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
+
     if( ( ret = ecp_check_privkey( grp, m ) ) != 0 )
         return( ret );
 
-    /* We'll need this later, but do it now to possibly avoid cheking P */
-    p_eq_g = ( mpi_cmp_int( &P->Z, 1 ) == 0 &&
-               mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
+    /* We'll need this later, but do it now to possibly avoid checking P */
+    p_eq_g = ( mpi_cmp_mpi( &P->Y, &grp->G.Y ) == 0 &&
                mpi_cmp_mpi( &P->X, &grp->G.X ) == 0 );
+
     if( ! p_eq_g && ( ret = ecp_check_pubkey( grp, P ) ) != 0 )
         return( ret );
 
@@ -1466,7 +1469,7 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
 
     /*
      * Minimize the number of multiplications, that is minimize
-     * 10 * d * w + 18 * 2^(w-1) + 11 * d + 7 * w
+     * 10 * d * w + 18 * 2^(w-1) + 11 * d + 7 * w, with d = ceil( nbits / w )
      * (see costs of the various parts, with 1S = 1M)
      */
     w = grp->nbits >= 384 ? 5 : 4;
@@ -1479,12 +1482,12 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
         w++;
 
     /*
-     * Make sure w is within limits.
+     * Make sure w is within bounds.
      * (The last test is useful only for very small curves in the test suite.)
      */
     if( w > POLARSSL_ECP_WINDOW_SIZE )
         w = POLARSSL_ECP_WINDOW_SIZE;
-    if( w < 2 || w >= grp->nbits )
+    if( w >= grp->nbits )
         w = 2;
 
     /* Other sizes that depend on w */

From aade42fd88fd659745315a2c0d6c8fe2b3a04ece Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 19:19:54 +0100
Subject: [PATCH 17/21] Change method for making M odd in ecp_mul()

- faster
- avoids M >= N (if m = N-1 or N-2)
---
 library/ecp.c | 55 +++++++++++++++++++++++++++++----------------------
 1 file changed, 31 insertions(+), 24 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 3d2c6e2cf..9f1d3cef1 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1072,6 +1072,14 @@ cleanup:
  * The coordinates of Q must be normalized (= affine),
  * but those of P don't need to. R is not normalized.
  *
+ * Special cases: (1) P or Q is zero, (2) R is zero, (3) P == Q.
+ * None of these cases can happen as intermediate step in ecp_mul():
+ * - at each step, P, Q and R are multiples of the base point, the factor
+ *   being less than its order, so none of them is zero;
+ * - Q is an odd multiple of the base point, P an even multiple,
+ *   due to the choice of precomputed points in the modified comb method.
+ * So branches for these cases do not leak secret information.
+ *
  * Cost: 1A := 8M + 3S
  */
 static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
@@ -1085,8 +1093,7 @@ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
 #endif
 
     /*
-     * Trivial cases: P == 0 or Q == 0
-     * This will never happen during ecp_mul() so we don't mind the branches.
+     * Trivial cases: P == 0 or Q == 0 (case 1)
      */
     if( mpi_cmp_int( &P->Z, 0 ) == 0 )
         return( ecp_copy( R, Q ) );
@@ -1110,7 +1117,7 @@ static int ecp_add_mixed( const ecp_group *grp, ecp_point *R,
     MPI_CHK( mpi_sub_mpi( &T1,  &T1,    &P->X ) );  MOD_SUB( T1 );
     MPI_CHK( mpi_sub_mpi( &T2,  &T2,    &P->Y ) );  MOD_SUB( T2 );
 
-    /* TODO: make sure it never happens during ecp_mul() */
+    /* Special cases (2) and (3) */
     if( mpi_cmp_int( &T1, 0 ) == 0 )
     {
         if( mpi_cmp_int( &T2, 0 ) == 0 )
@@ -1443,14 +1450,17 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
     unsigned char w, m_is_odd, p_eq_g;
     size_t pre_len, d, i;
     unsigned char k[COMB_MAX_D + 1];
-    ecp_point Q, *T = NULL, S[2];
-    mpi M;
+    ecp_point *T;
+    mpi M, mm;
 
     /*
      * Sanity checks (before we even initialize anything)
      */
-    if( mpi_cmp_int( &P->Z, 1 ) != 0 )
+    if( mpi_cmp_int( &P->Z, 1 ) != 0 ||
+        mpi_get_bit( &grp->N, 0 ) != 1 )
+    {
         return( POLARSSL_ERR_ECP_BAD_INPUT_DATA );
+    }
 
     if( ( ret = ecp_check_privkey( grp, m ) ) != 0 )
         return( ret );
@@ -1463,9 +1473,7 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
         return( ret );
 
     mpi_init( &M );
-    ecp_point_init( &Q );
-    ecp_point_init( &S[0] );
-    ecp_point_init( &S[1] );
+    mpi_init( &mm );
 
     /*
      * Minimize the number of multiplications, that is minimize
@@ -1498,8 +1506,7 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
      * Prepare precomputed points: if P == G we want to
      * use grp->T if already initialized, or initialize it.
      */
-    if( p_eq_g )
-        T = grp->T;
+    T = p_eq_g ? grp->T : NULL;
 
     if( T == NULL )
     {
@@ -1523,26 +1530,25 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
     }
 
     /*
-     * Make sure M is odd (M = m + 1 or M = m + 2)
-     * later we'll get m * P by subtracting P or 2 * P to M * P.
+     * Make sure M is odd (M = m or M = N - m, since N is odd)
+     * using the fact that m * P = - (N - m) * P
      */
     m_is_odd = ( mpi_get_bit( m, 0 ) == 1 );
-
     MPI_CHK( mpi_copy( &M, m ) );
-    MPI_CHK( mpi_add_int( &M, &M, 1 + m_is_odd ) );
+    MPI_CHK( mpi_sub_mpi( &mm, &grp->N, m ) );
+    MPI_CHK( mpi_safe_cond_assign( &M, &mm, ! m_is_odd ) );
 
     /*
-     * Go for comb multiplication, Q = M * P
+     * Go for comb multiplication, R = M * P
      */
     ecp_comb_fixed( k, d, w, &M );
-    ecp_mul_comb_core( grp, &Q, T, k, d, f_rng, p_rng );
+    ecp_mul_comb_core( grp, R, T, k, d, f_rng, p_rng );
 
     /*
-     * Now get m * P from M * P
+     * Now get m * P from M * P and normalize it
      */
-    MPI_CHK( ecp_copy( &S[0], P ) );
-    MPI_CHK( ecp_add( grp, &S[1], P, P ) );
-    MPI_CHK( ecp_sub( grp, R, &Q, &S[m_is_odd] ) );
+    MPI_CHK( ecp_safe_invert( grp, R, ! m_is_odd ) );
+    MPI_CHK( ecp_normalize( grp, R ) );
 
 cleanup:
 
@@ -1553,10 +1559,11 @@ cleanup:
         polarssl_free( T );
     }
 
-    ecp_point_free( &S[1] );
-    ecp_point_free( &S[0] );
-    ecp_point_free( &Q );
     mpi_free( &M );
+    mpi_free( &mm );
+
+    if( ret != 0 )
+        ecp_point_free( R );
 
     return( ret );
 }

From d728350cee8ca87ecbff910c15164f4134331e78 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 20:00:38 +0100
Subject: [PATCH 18/21] Make memory access pattern constant

---
 include/polarssl/bignum.h |  3 ++-
 include/polarssl/ecp.h    | 14 +++++++-------
 library/ecp.c             | 29 +++++++++++++++++++----------
 3 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/include/polarssl/bignum.h b/include/polarssl/bignum.h
index 6e2afac72..f225fc394 100644
--- a/include/polarssl/bignum.h
+++ b/include/polarssl/bignum.h
@@ -246,7 +246,8 @@ void mpi_swap( mpi *X, mpi *Y );
  *                      if( assign ) mpi_copy( X, Y );
  *                 except that it avoids leaking any information about whether
  *                 the assignment was done or not (the above code may leak
- *                 information through branch prediction analysis).
+ *                 information through branch prediction and/or memory access
+ *                 patterns analysis).
  */
 int mpi_safe_cond_assign( mpi *X, mpi *Y, unsigned char assign );
 
diff --git a/include/polarssl/ecp.h b/include/polarssl/ecp.h
index 81b789edb..33c09fcf2 100644
--- a/include/polarssl/ecp.h
+++ b/include/polarssl/ecp.h
@@ -463,15 +463,15 @@ int ecp_sub( const ecp_group *grp, ecp_point *R,
  *                  or P is not a valid pubkey,
  *                  POLARSSL_ERR_MPI_MALLOC_FAILED if memory allocation failed
  *
- * \note            In order to prevent simple timing attacks, this function
- *                  executes a constant number of operations (that is, point
- *                  doubling and addition of distinct points) for random m in
- *                  the allowed range.
+ * \note            In order to prevent timing attacks, this function
+ *                  executes the exact same sequence of (base field)
+ *                  operations for any valid m. It avoids any if-branch or
+ *                  array index depending on the value of m.
  *
  * \note            If f_rng is not NULL, it is used to randomize intermediate
- *                  results in order to prevent potential attacks targetting
- *                  these results. It is recommended to always provide a
- *                  non-NULL f_rng (the overhead is negligible).
+ *                  results in order to prevent potential timing attacks
+ *                  targetting these results. It is recommended to always
+ *                  provide a non-NULL f_rng (the overhead is negligible).
  */
 int ecp_mul( ecp_group *grp, ecp_point *R,
              const mpi *m, const ecp_point *P,
diff --git a/library/ecp.c b/library/ecp.c
index 9f1d3cef1..756beba15 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1385,14 +1385,23 @@ cleanup:
  * Select precomputed point: R = sign(i) * T[ abs(i) / 2 ]
  */
 static int ecp_select_comb( const ecp_group *grp, ecp_point *R,
-                            const ecp_point T[], unsigned char i )
+                            ecp_point T[], unsigned char t_len,
+                            unsigned char i )
 {
     int ret;
+    unsigned char ii, j;
 
-    /* Ignore the "sign" bit */
-    MPI_CHK( ecp_copy( R, &T[ ( i & 0x7Fu ) >> 1 ] ) );
+    /* Ignore the "sign" bit and scale down */
+    ii =  ( i & 0x7Fu ) >> 1;
 
-    /* Restore the Z coordinate */
+    /* Read the whole table to thwart cache-based timing attacks */
+    for( j = 0; j < t_len; j++ )
+    {
+        MPI_CHK( mpi_safe_cond_assign( &R->X, &T[j].X, j == ii ) );
+        MPI_CHK( mpi_safe_cond_assign( &R->Y, &T[j].Y, j == ii ) );
+    }
+
+    /* The Z coordinate is always 1 */
     MPI_CHK( mpi_lset( &R->Z, 1 ) );
 
     /* Safely invert result if i is "negative" */
@@ -1409,7 +1418,7 @@ cleanup:
  * Cost: d A + d D + 1 R
  */
 static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
-                              const ecp_point T[],
+                              ecp_point T[], unsigned char t_len,
                               const unsigned char x[], size_t d,
                               int (*f_rng)(void *, unsigned char *, size_t),
                               void *p_rng )
@@ -1422,14 +1431,14 @@ static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
 
     /* Start with a non-zero point and randomize its coordinates */
     i = d;
-    MPI_CHK( ecp_select_comb( grp, R, T, x[i] ) );
+    MPI_CHK( ecp_select_comb( grp, R, T, t_len, x[i] ) );
     if( f_rng != 0 )
         MPI_CHK( ecp_randomize_coordinates( grp, R, f_rng, p_rng ) );
 
     while( i-- != 0 )
     {
         MPI_CHK( ecp_double_jac( grp, R, R ) );
-        MPI_CHK( ecp_select_comb( grp, &Txi, T, x[i] ) );
+        MPI_CHK( ecp_select_comb( grp, &Txi, T, t_len, x[i] ) );
         MPI_CHK( ecp_add_mixed( grp, R, R, &Txi ) );
     }
 
@@ -1447,8 +1456,8 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
              int (*f_rng)(void *, unsigned char *, size_t), void *p_rng )
 {
     int ret;
-    unsigned char w, m_is_odd, p_eq_g;
-    size_t pre_len, d, i;
+    unsigned char w, m_is_odd, p_eq_g, pre_len, i;
+    size_t d;
     unsigned char k[COMB_MAX_D + 1];
     ecp_point *T;
     mpi M, mm;
@@ -1542,7 +1551,7 @@ int ecp_mul( ecp_group *grp, ecp_point *R,
      * Go for comb multiplication, R = M * P
      */
     ecp_comb_fixed( k, d, w, &M );
-    ecp_mul_comb_core( grp, R, T, k, d, f_rng, p_rng );
+    MPI_CHK( ecp_mul_comb_core( grp, R, T, pre_len, k, d, f_rng, p_rng ) );
 
     /*
      * Now get m * P from M * P and normalize it

From 918148193d731d03f3c4a85edf99f133c2e89edf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 20:23:55 +0100
Subject: [PATCH 19/21] Enhance ecp_selftest

---
 library/ecp.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/library/ecp.c b/library/ecp.c
index 756beba15..01b62f8f1 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -68,10 +68,10 @@
 
 #if defined(POLARSSL_SELF_TEST)
 /*
- * Counts of point addition and doubling operations.
+ * Counts of point addition and doubling, and field multiplications.
  * Used to test resistance of point multiplication to simple timing attacks.
  */
-unsigned long add_count, dbl_count;
+unsigned long add_count, dbl_count, mul_count;
 #endif
 
 /*
@@ -843,7 +843,14 @@ cleanup:
 /*
  * Reduce a mpi mod p in-place, general case, to use after mpi_mul_mpi
  */
-#define MOD_MUL( N )    MPI_CHK( ecp_modp( &N, grp ) )
+#if defined(POLARSSL_SELF_TEST)
+#define INC_MUL_COUNT   mul_count++;
+#else
+#define INC_MUL_COUNT
+#endif
+
+#define MOD_MUL( N )    do { MPI_CHK( ecp_modp( &N, grp ) ); INC_MUL_COUNT } \
+                        while( 0 )
 
 /*
  * Reduce a mpi mod p in-place, to use after mpi_sub_mpi
@@ -2077,7 +2084,7 @@ int ecp_self_test( int verbose )
     ecp_group grp;
     ecp_point R, P;
     mpi m;
-    unsigned long add_c_prev, dbl_c_prev;
+    unsigned long add_c_prev, dbl_c_prev, mul_c_prev;
     /* exponents especially adapted for secp192r1 */
     const char *exponents[] =
     {
@@ -2110,6 +2117,7 @@ int ecp_self_test( int verbose )
 
     add_count = 0;
     dbl_count = 0;
+    mul_count = 0;
     MPI_CHK( mpi_read_string( &m, 16, exponents[0] ) );
     MPI_CHK( ecp_mul( &grp, &R, &m, &grp.G, NULL, NULL ) );
 
@@ -2117,13 +2125,17 @@ int ecp_self_test( int verbose )
     {
         add_c_prev = add_count;
         dbl_c_prev = dbl_count;
+        mul_c_prev = mul_count;
         add_count = 0;
         dbl_count = 0;
+        mul_count = 0;
 
         MPI_CHK( mpi_read_string( &m, 16, exponents[i] ) );
         MPI_CHK( ecp_mul( &grp, &R, &m, &grp.G, NULL, NULL ) );
 
-        if( add_count != add_c_prev || dbl_count != dbl_c_prev )
+        if( add_count != add_c_prev ||
+            dbl_count != dbl_c_prev ||
+            mul_count != mul_c_prev )
         {
             if( verbose != 0 )
                 printf( "failed (%zu)\n", i );
@@ -2142,6 +2154,7 @@ int ecp_self_test( int verbose )
 
     add_count = 0;
     dbl_count = 0;
+    mul_count = 0;
     MPI_CHK( mpi_read_string( &m, 16, exponents[0] ) );
     MPI_CHK( ecp_mul( &grp, &R, &m, &P, NULL, NULL ) );
 
@@ -2149,13 +2162,17 @@ int ecp_self_test( int verbose )
     {
         add_c_prev = add_count;
         dbl_c_prev = dbl_count;
+        mul_c_prev = mul_count;
         add_count = 0;
         dbl_count = 0;
+        mul_count = 0;
 
         MPI_CHK( mpi_read_string( &m, 16, exponents[i] ) );
         MPI_CHK( ecp_mul( &grp, &R, &m, &P, NULL, NULL ) );
 
-        if( add_count != add_c_prev || dbl_count != dbl_c_prev )
+        if( add_count != add_c_prev ||
+            dbl_count != dbl_c_prev ||
+            mul_count != mul_c_prev )
         {
             if( verbose != 0 )
                 printf( "failed (%zu)\n", i );

From 3e3d2b818ca0ffdca6f39105f3804a6e4d3f40e7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Thu, 21 Nov 2013 21:12:26 +0100
Subject: [PATCH 20/21] Fix bug in mpi_safe_cond_assign()

---
 library/bignum.c                     |  1 +
 tests/suites/test_suite_mpi.data     | 15 ++++++++++++---
 tests/suites/test_suite_mpi.function |  5 ++++-
 3 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/library/bignum.c b/library/bignum.c
index 663d9240e..49321bb65 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -222,6 +222,7 @@ int mpi_safe_cond_assign( mpi *X, mpi *Y, unsigned char assign )
         MPI_CHK( mpi_grow( X, Y->n ) );
 
     /* Do the conditional assign safely */
+    X->s = X->s * (1 - assign) + Y->s * assign;
     for( i = 0; i < X->n; i++ )
         X->p[i] = X->p[i] * (1 - assign) + Y->p[i] * assign;
 
diff --git a/tests/suites/test_suite_mpi.data b/tests/suites/test_suite_mpi.data
index 8755fa29a..287cc2d05 100644
--- a/tests/suites/test_suite_mpi.data
+++ b/tests/suites/test_suite_mpi.data
@@ -206,13 +206,22 @@ Test mpi_shrink #8
 mpi_shrink:4:0:0:1
 
 Test mpi_safe_cond_assign #1
-mpi_safe_cond_assign:"01":"02"
+mpi_safe_cond_assign:+1:"01":+1:"02"
 
 Test mpi_safe_cond_assign #2
-mpi_safe_cond_assign:"FF000000000000000001":"02"
+mpi_safe_cond_assign:+1:"FF000000000000000001":+1:"02"
 
 Test mpi_safe_cond_assign #3
-mpi_safe_cond_assign:"01":"FF000000000000000002"
+mpi_safe_cond_assign:+1:"01":+1:"FF000000000000000002"
+
+Test mpi_safe_cond_assign #4
+mpi_safe_cond_assign:+1:"01":-1:"02"
+
+Test mpi_safe_cond_assign #5
+mpi_safe_cond_assign:-1:"01":+1:"02"
+
+Test mpi_safe_cond_assign #6
+mpi_safe_cond_assign:-1:"01":-1:"02"
 
 Base test mpi_add_abs #1
 mpi_add_abs:10:"12345678":10:"642531":10:"12988209"
diff --git a/tests/suites/test_suite_mpi.function b/tests/suites/test_suite_mpi.function
index ee9b94a5b..394cd339b 100644
--- a/tests/suites/test_suite_mpi.function
+++ b/tests/suites/test_suite_mpi.function
@@ -309,13 +309,16 @@ void mpi_shrink( int before, int used, int min, int after )
 /* END_CASE */
 
 /* BEGIN_CASE */
-void mpi_safe_cond_assign( char *x_str, char *y_str )
+void mpi_safe_cond_assign( int x_sign, char *x_str,
+                           int y_sign, char *y_str )
 {
     mpi X, Y, XX;
     mpi_init( &X ); mpi_init( &Y ); mpi_init( &XX );
 
     TEST_ASSERT( mpi_read_string( &X, 16, x_str ) == 0 );
+    X.s = x_sign;
     TEST_ASSERT( mpi_read_string( &Y, 16, y_str ) == 0 );
+    Y.s = y_sign;
     TEST_ASSERT( mpi_copy( &XX, &X ) == 0 );
 
     TEST_ASSERT( mpi_safe_cond_assign( &X, &Y, 0 ) == 0 );

From 96c7a92b08ae3e286dc7c11ed349a1371e49b5dd Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= <mpg@elzevir.fr>
Date: Mon, 25 Nov 2013 18:28:53 +0100
Subject: [PATCH 21/21] Change mpi_safe_cond_assign() for more const-ness

---
 include/polarssl/bignum.h |  2 +-
 library/bignum.c          | 11 ++++++-----
 library/ecp.c             |  4 ++--
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/polarssl/bignum.h b/include/polarssl/bignum.h
index f225fc394..8541403b7 100644
--- a/include/polarssl/bignum.h
+++ b/include/polarssl/bignum.h
@@ -249,7 +249,7 @@ void mpi_swap( mpi *X, mpi *Y );
  *                 information through branch prediction and/or memory access
  *                 patterns analysis).
  */
-int mpi_safe_cond_assign( mpi *X, mpi *Y, unsigned char assign );
+int mpi_safe_cond_assign( mpi *X, const mpi *Y, unsigned char assign );
 
 /**
  * \brief          Set value from integer
diff --git a/library/bignum.c b/library/bignum.c
index 49321bb65..9eceeba55 100644
--- a/library/bignum.c
+++ b/library/bignum.c
@@ -206,8 +206,10 @@ void mpi_swap( mpi *X, mpi *Y )
 
 /*
  * Conditionally assign X = Y, without leaking information
+ * about whether the assignment was made or not.
+ * (Leaking information about the respective sizes of X and Y is ok however.)
  */
-int mpi_safe_cond_assign( mpi *X, mpi *Y, unsigned char assign )
+int mpi_safe_cond_assign( mpi *X, const mpi *Y, unsigned char assign )
 {
     int ret = 0;
     size_t i;
@@ -215,16 +217,15 @@ int mpi_safe_cond_assign( mpi *X, mpi *Y, unsigned char assign )
     if( assign * ( 1 - assign ) != 0 )
         return( POLARSSL_ERR_MPI_BAD_INPUT_DATA );
 
-    /* Make sure both MPIs have the same size */
-    if( X->n > Y->n )
-        MPI_CHK( mpi_grow( Y, X->n ) );
     if( Y->n > X->n )
         MPI_CHK( mpi_grow( X, Y->n ) );
 
     /* Do the conditional assign safely */
     X->s = X->s * (1 - assign) + Y->s * assign;
-    for( i = 0; i < X->n; i++ )
+    for( i = 0; i < Y->n; i++ )
         X->p[i] = X->p[i] * (1 - assign) + Y->p[i] * assign;
+    for( ; i < X->n; i++ )
+        X->p[i] *= (1 - assign);
 
 cleanup:
     return( ret );
diff --git a/library/ecp.c b/library/ecp.c
index 01b62f8f1..7b7d079a8 100644
--- a/library/ecp.c
+++ b/library/ecp.c
@@ -1392,7 +1392,7 @@ cleanup:
  * Select precomputed point: R = sign(i) * T[ abs(i) / 2 ]
  */
 static int ecp_select_comb( const ecp_group *grp, ecp_point *R,
-                            ecp_point T[], unsigned char t_len,
+                            const ecp_point T[], unsigned char t_len,
                             unsigned char i )
 {
     int ret;
@@ -1425,7 +1425,7 @@ cleanup:
  * Cost: d A + d D + 1 R
  */
 static int ecp_mul_comb_core( const ecp_group *grp, ecp_point *R,
-                              ecp_point T[], unsigned char t_len,
+                              const ecp_point T[], unsigned char t_len,
                               const unsigned char x[], size_t d,
                               int (*f_rng)(void *, unsigned char *, size_t),
                               void *p_rng )