diff --git a/library/ecp.c b/library/ecp.c index be6b0d5d1..adef09ea8 100644 --- a/library/ecp.c +++ b/library/ecp.c @@ -911,70 +911,86 @@ cleanup: /* * Point doubling R = 2 P, Jacobian coordinates * - * http://www.hyperelliptic.org/EFD/g1p/auto-code/shortw/jacobian/doubling/dbl-2007-bl.op3 - * with heavy variable renaming, some reordering and one minor modification - * (a = 2 * b, c = d - 2a replaced with c = d, c = c - b, c = c - b) - * in order to use a lot less intermediate variables (6 vs 25). + * Based on http://www.hyperelliptic.org/EFD/g1p/auto-shortw-jacobian.html#doubling-dbl-1998-cmo-2 . * - * Cost: 1D := 2M + 8S + * We follow the variable naming fairly closely. The formula variations that trade a MUL for a SQR + * (plus a few ADDs) aren't useful as our bignum implementation doesn't distinguish squaring. + * + * Standard optimizations are applied when curve parameter A is one of { 0, -3 }. + * + * Cost: 1D := 3M + 4S (A == 0) + * 4M + 4S (A == -3) + * 3M + 6S + 1a otherwise */ static int ecp_double_jac( const ecp_group *grp, ecp_point *R, const ecp_point *P ) { int ret; - mpi T1, T2, T3, X3, Y3, Z3; + mpi M, S, T, U; #if defined(POLARSSL_SELF_TEST) dbl_count++; #endif - mpi_init( &T1 ); mpi_init( &T2 ); mpi_init( &T3 ); - mpi_init( &X3 ); mpi_init( &Y3 ); mpi_init( &Z3 ); - - MPI_CHK( mpi_mul_mpi( &T3, &P->X, &P->X ) ); MOD_MUL( T3 ); - MPI_CHK( mpi_mul_mpi( &T2, &P->Y, &P->Y ) ); MOD_MUL( T2 ); - MPI_CHK( mpi_mul_mpi( &Y3, &T2, &T2 ) ); MOD_MUL( Y3 ); - MPI_CHK( mpi_add_mpi( &X3, &P->X, &T2 ) ); MOD_ADD( X3 ); - MPI_CHK( mpi_mul_mpi( &X3, &X3, &X3 ) ); MOD_MUL( X3 ); - MPI_CHK( mpi_sub_mpi( &X3, &X3, &Y3 ) ); MOD_SUB( X3 ); - MPI_CHK( mpi_sub_mpi( &X3, &X3, &T3 ) ); MOD_SUB( X3 ); - MPI_CHK( mpi_mul_int( &T1, &X3, 2 ) ); MOD_ADD( T1 ); - MPI_CHK( mpi_mul_mpi( &Z3, &P->Z, &P->Z ) ); MOD_MUL( Z3 ); - MPI_CHK( mpi_mul_mpi( &X3, &Z3, &Z3 ) ); MOD_MUL( X3 ); - MPI_CHK( mpi_mul_int( &T3, &T3, 3 ) ); MOD_ADD( T3 ); + mpi_init( &M ); mpi_init( &S ); mpi_init( &T ); mpi_init( &U ); /* Special case for A = -3 */ if( grp->A.p == NULL ) { - MPI_CHK( mpi_mul_int( &X3, &X3, 3 ) ); - X3.s = -1; /* mpi_mul_int doesn't handle negative numbers */ - MOD_SUB( X3 ); + /* M = 3(X + Z^2)(X - Z^2) */ + MPI_CHK( mpi_mul_mpi( &S, &P->Z, &P->Z ) ); MOD_MUL( S ); + MPI_CHK( mpi_add_mpi( &T, &P->X, &S ) ); MOD_ADD( T ); + MPI_CHK( mpi_sub_mpi( &U, &P->X, &S ) ); MOD_SUB( U ); + MPI_CHK( mpi_mul_mpi( &S, &T, &U ) ); MOD_MUL( S ); + MPI_CHK( mpi_mul_int( &M, &S, 3 ) ); MOD_ADD( M ); } else { - MPI_CHK( mpi_mul_mpi( &X3, &X3, &grp->A ) ); MOD_MUL( X3 ); + /* M = 3.X^2 */ + MPI_CHK( mpi_mul_mpi( &S, &P->X, &P->X ) ); MOD_MUL( S ); + MPI_CHK( mpi_mul_int( &M, &S, 3 ) ); MOD_ADD( M ); + + /* Optimize away for "koblitz" curves with A = 0 */ + if( mpi_cmp_int( &grp->A, 0 ) != 0 ) + { + /* M += A.Z^4 */ + MPI_CHK( mpi_mul_mpi( &S, &P->Z, &P->Z ) ); MOD_MUL( S ); + MPI_CHK( mpi_mul_mpi( &T, &S, &S ) ); MOD_MUL( T ); + MPI_CHK( mpi_mul_mpi( &S, &T, &grp->A ) ); MOD_MUL( S ); + MPI_CHK( mpi_add_mpi( &M, &M, &S ) ); MOD_ADD( M ); + } } - MPI_CHK( mpi_add_mpi( &T3, &T3, &X3 ) ); MOD_ADD( T3 ); - MPI_CHK( mpi_mul_mpi( &X3, &T3, &T3 ) ); MOD_MUL( X3 ); - MPI_CHK( mpi_sub_mpi( &X3, &X3, &T1 ) ); MOD_SUB( X3 ); - MPI_CHK( mpi_sub_mpi( &X3, &X3, &T1 ) ); MOD_SUB( X3 ); - MPI_CHK( mpi_sub_mpi( &T1, &T1, &X3 ) ); MOD_SUB( T1 ); - MPI_CHK( mpi_mul_mpi( &T1, &T3, &T1 ) ); MOD_MUL( T1 ); - MPI_CHK( mpi_mul_int( &T3, &Y3, 8 ) ); MOD_ADD( T3 ); - MPI_CHK( mpi_sub_mpi( &Y3, &T1, &T3 ) ); MOD_SUB( Y3 ); - MPI_CHK( mpi_add_mpi( &T1, &P->Y, &P->Z ) ); MOD_ADD( T1 ); - MPI_CHK( mpi_mul_mpi( &T1, &T1, &T1 ) ); MOD_MUL( T1 ); - MPI_CHK( mpi_sub_mpi( &T1, &T1, &T2 ) ); MOD_SUB( T1 ); - MPI_CHK( mpi_sub_mpi( &Z3, &T1, &Z3 ) ); MOD_SUB( Z3 ); + /* S = 4.X.Y^2 */ + MPI_CHK( mpi_mul_mpi( &T, &P->Y, &P->Y ) ); MOD_MUL( T ); + MPI_CHK( mpi_shift_l( &T, 1 ) ); MOD_ADD( T ); + MPI_CHK( mpi_mul_mpi( &S, &P->X, &T ) ); MOD_MUL( S ); + MPI_CHK( mpi_shift_l( &S, 1 ) ); MOD_ADD( S ); - MPI_CHK( mpi_copy( &R->X, &X3 ) ); - MPI_CHK( mpi_copy( &R->Y, &Y3 ) ); - MPI_CHK( mpi_copy( &R->Z, &Z3 ) ); + /* U = 8.Y^4 */ + MPI_CHK( mpi_mul_mpi( &U, &T, &T ) ); MOD_MUL( U ); + MPI_CHK( mpi_shift_l( &U, 1 ) ); MOD_ADD( U ); + + /* T = M^2 - 2.S */ + MPI_CHK( mpi_mul_mpi( &T, &M, &M ) ); MOD_MUL( T ); + MPI_CHK( mpi_sub_mpi( &T, &T, &S ) ); MOD_SUB( T ); + MPI_CHK( mpi_sub_mpi( &T, &T, &S ) ); MOD_SUB( T ); + + /* S = M(S - T) - U */ + MPI_CHK( mpi_sub_mpi( &S, &S, &T ) ); MOD_SUB( S ); + MPI_CHK( mpi_mul_mpi( &S, &S, &M ) ); MOD_MUL( S ); + MPI_CHK( mpi_sub_mpi( &S, &S, &U ) ); MOD_SUB( S ); + + /* U = 2.Y.Z */ + MPI_CHK( mpi_mul_mpi( &U, &P->Y, &P->Z ) ); MOD_MUL( U ); + MPI_CHK( mpi_shift_l( &U, 1 ) ); MOD_ADD( U ); + + MPI_CHK( mpi_copy( &R->X, &T ) ); + MPI_CHK( mpi_copy( &R->Y, &S ) ); + MPI_CHK( mpi_copy( &R->Z, &U ) ); cleanup: - mpi_free( &T1 ); mpi_free( &T2 ); mpi_free( &T3 ); - mpi_free( &X3 ); mpi_free( &Y3 ); mpi_free( &Z3 ); + mpi_free( &M ); mpi_free( &S ); mpi_free( &T ); mpi_free( &U ); return( ret ); }