From d1e7a45fdd59f2d1db081347c4d1ced835be5952 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Manuel=20P=C3=A9gouri=C3=A9-Gonnard?= Date: Tue, 22 Oct 2013 21:03:16 +0200 Subject: [PATCH] Rework ecp_mod_p192() On x86_64, this makes it 5x faster, and ecp_mul() 17% faster for this curve. The code is shorter too. --- library/ecp.c | 101 +++++++++++++------------------ tests/suites/test_suite_ecp.data | 14 ++++- 2 files changed, 56 insertions(+), 59 deletions(-) diff --git a/library/ecp.c b/library/ecp.c index d3880be55..d53d306a5 100644 --- a/library/ecp.c +++ b/library/ecp.c @@ -475,25 +475,36 @@ cleanup: } #if defined(POLARSSL_ECP_DP_SECP192R1_ENABLED) -/* - * 192 bits in terms of t_uint - */ -#define P192_SIZE_INT ( 192 / CHAR_BIT / sizeof( t_uint ) ) -/* - * Table to get S1, S2, S3 of FIPS 186-3 D.2.1: - * -1 means let this chunk be 0 - * a positive value i means A_i. - */ -#define P192_CHUNKS 3 -#define P192_CHUNK_CHAR ( 64 / CHAR_BIT ) -#define P192_CHUNK_INT ( P192_CHUNK_CHAR / sizeof( t_uint ) ) +/* Add 64-bit chunks (dst += src) and update carry */ +static inline void add_64( t_uint *dst, t_uint *src, t_uint *carry ) +{ + unsigned char i; + t_uint c = 0; + for( i = 0; i < 8 / sizeof( t_uint ); i++, dst++, src++ ) + { + *dst += c; c = ( *dst < c ); + *dst += *src; c += ( *dst < *src ); + } + *carry += c; +} -const signed char p192_tbl[][P192_CHUNKS] = { - { -1, 3, 3 }, /* S1 */ - { 4, 4, -1 }, /* S2 */ - { 5, 5, 5 }, /* S3 */ -}; +/* Add carry to a 64-bit chunk and update carry */ +static inline void carry64( t_uint *dst, t_uint *carry ) +{ + unsigned char i; + for( i = 0; i < 8 / sizeof( t_uint ); i++, dst++ ) + { + *dst += *carry; + *carry = ( *dst < *carry ); + } +} + +#define OFFSET ( 8 / sizeof( t_uint ) ) +#define A( i ) ( N->p + ( i ) * OFFSET ) +#define ADD( i ) add_64( p, A( i ), &c ) +#define NEXT p += OFFSET; carry64( p, &c ) +#define LAST p += OFFSET; *p = c; while( ++p < end ) *p = 0 /* * Fast quasi-reduction modulo p192 (FIPS 186-3 D.2.1) @@ -501,53 +512,27 @@ const signed char p192_tbl[][P192_CHUNKS] = { static int ecp_mod_p192( mpi *N ) { int ret; - unsigned char i, j, offset; - signed char chunk; - mpi tmp, acc; - t_uint tmp_p[P192_SIZE_INT], acc_p[P192_SIZE_INT + 1]; + t_uint c = 0; + t_uint *p, *end; - tmp.s = 1; - tmp.n = sizeof( tmp_p ) / sizeof( tmp_p[0] ); - tmp.p = tmp_p; + /* Make sure we have the correct number of blocks */ + MPI_CHK( mpi_grow( N, 6 * OFFSET ) ); + p = N->p; + end = p + N->n; - acc.s = 1; - acc.n = sizeof( acc_p ) / sizeof( acc_p[0] ); - acc.p = acc_p; - - MPI_CHK( mpi_grow( N, P192_SIZE_INT * 2 ) ); - - /* - * acc = T - */ - memset( acc_p, 0, sizeof( acc_p ) ); - memcpy( acc_p, N->p, P192_CHUNK_CHAR * P192_CHUNKS ); - - for( i = 0; i < sizeof( p192_tbl ) / sizeof( p192_tbl[0] ); i++) - { - /* - * tmp = S_i - */ - memset( tmp_p, 0, sizeof( tmp_p ) ); - for( j = 0, offset = P192_CHUNKS - 1; j < P192_CHUNKS; j++, offset-- ) - { - chunk = p192_tbl[i][j]; - if( chunk >= 0 ) - memcpy( tmp_p + offset * P192_CHUNK_INT, - N->p + chunk * P192_CHUNK_INT, - P192_CHUNK_CHAR ); - } - - /* - * acc += tmp - */ - MPI_CHK( mpi_add_abs( &acc, &acc, &tmp ) ); - } - - MPI_CHK( mpi_copy( N, &acc ) ); + ADD( 3 ); ADD( 5 ); NEXT; // A0 += A3 + A5 + ADD( 3 ); ADD( 4 ); ADD( 5 ); NEXT; // A1 += A3 + A4 + A5 + ADD( 4 ); ADD( 5 ); LAST; // A2 += A4 + A5 cleanup: return( ret ); } + +#undef OFFSET +#undef A +#undef ADD +#undef NEXT +#undef LAST #endif /* POLARSSL_ECP_DP_SECP192R1_ENABLED */ #if defined(POLARSSL_ECP_DP_SECP521R1_ENABLED) diff --git a/tests/suites/test_suite_ecp.data b/tests/suites/test_suite_ecp.data index 9eb302b5b..4748ff98b 100644 --- a/tests/suites/test_suite_ecp.data +++ b/tests/suites/test_suite_ecp.data @@ -253,14 +253,26 @@ ECP gen keypair depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED ecp_gen_keypair:POLARSSL_ECP_DP_SECP192R1 +ECP mod p192 small (more than 192 bits, less limbs than 2 * 192 bits) +depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED +ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"0100000000000103010000000000010201000000000001010100000000000100" + ECP mod p192 readable depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED -ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"000000000000010500000000000001040000000000000103000000000000010200000000000001010000000000000100" +ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"010000000000010501000000000001040100000000000103010000000000010201000000000001010100000000000100" + +ECP mod p192 readable with carry +depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED +ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"FF00000000010500FF00000000010400FF00000000010300FF00000000010200FF00000000010100FF00000000010000" ECP mod p192 random depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"36CF96B45D706A0954D89E52CE5F38517A2270E0175849B6F3740151D238CCABEF921437E475881D83BB69E4AA258EBD" +ECP mod p192 (from a past failure case) +depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED +ecp_fast_mod:POLARSSL_ECP_DP_SECP192R1:"1AC2D6F96A2A425E9DD1776DD8368D4BBC86BF4964E79FEA713583BF948BBEFF0939F96FB19EC48C585BDA6A2D35C750" + ECP test vectors secp192r1 rfc 5114 depends_on:POLARSSL_ECP_DP_SECP192R1_ENABLED ecp_test_vect:POLARSSL_ECP_DP_SECP192R1:"323FA3169D8E9C6593F59476BC142000AB5BE0E249C43426":"CD46489ECFD6C105E7B3D32566E2B122E249ABAADD870612":"68887B4877DF51DD4DC3D6FD11F0A26F8FD3844317916E9A":"631F95BB4A67632C9C476EEE9AB695AB240A0499307FCF62":"519A121680E0045466BA21DF2EEE47F5973B500577EF13D5":"FF613AB4D64CEE3A20875BDB10F953F6B30CA072C60AA57F":"AD420182633F8526BFE954ACDA376F05E5FF4F837F54FEBE":"4371545ED772A59741D0EDA32C671112B7FDDD51461FCF32"