From af3fc992b98891446c79c59779fd76b3414df67c Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Mon, 16 Dec 2013 14:38:13 +0900 Subject: [PATCH] minor improvement of bignum --- ChangeLog | 6 + polarssl/include/polarssl/bn_mul.h | 234 ++++++++++++++--------------- polarssl/library/bignum.c | 4 +- 3 files changed, 125 insertions(+), 119 deletions(-) diff --git a/ChangeLog b/ChangeLog index e1894ce..b3ecd63 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,9 +1,15 @@ 2013-12-16 Niibe Yutaka + * polarssl/include/polarssl/bn_mul.h (MULADDC_1024_CORE) + (MULADDC_1024_LOOP, MULADDC_HUIT, MULADDC_INIT, MULADDC_CORE) + (MULADDC_STOP) [__arm__]: The value of input B won't change. + More acculate specification for asm statement. + * polarssl/library/bignum.c (mpi_cmp_abs_limbs): New. (mpi_montmul): Change the signature and use the upper half of T. (mpi_montred): Likewise. (mpi_exp_mod): Use improved mpi_montmul and mpi_montred. + (mpi_sub_hlp, mpi_mul_hlp): Add const qualifier for S. 2013-12-13 Niibe Yutaka diff --git a/polarssl/include/polarssl/bn_mul.h b/polarssl/include/polarssl/bn_mul.h index e0fcbf4..444c503 100644 --- a/polarssl/include/polarssl/bn_mul.h +++ b/polarssl/include/polarssl/bn_mul.h @@ -495,130 +495,130 @@ #endif /* TriCore */ #if defined(__arm__) -#define MULADDC_1024_CORE \ - "ldmia %0!, { r5, r6, r7 } \n" \ - "ldmia %1, { r8, r9, r10 } \n" \ - "umull r11, r12, %2, r5 \n" \ - "adcs r11, r11, %3 \n" \ - "adc %3, r12, #0 \n" \ - "adds r8, r8, r11 \n" \ - "umull r11, r12, %2, r6 \n" \ - "adcs r11, r11, %3 \n" \ - "adc %3, r12, #0 \n" \ - "adds r9, r9, r11 \n" \ - "umull r11, r12, %2, r7 \n" \ - "adcs r11, r11, %3 \n" \ - "adc %3, r12, #0 \n" \ - "adds r10, r10, r11 \n" \ - "stmia %1!, { r8, r9, r10 } \n" +#define MULADDC_1024_CORE \ + "ldmia %0!, { r5, r6, r7 } \n\t" \ + "ldmia %1, { r8, r9, r10 } \n\t" \ + "umull r11, r12, %4, r5 \n\t" \ + "adcs r11, r11, %2 \n\t" \ + "adc %2, r12, #0 \n\t" \ + "adds r8, r8, r11 \n\t" \ + "umull r11, r12, %4, r6 \n\t" \ + "adcs r11, r11, %2 \n\t" \ + "adc %2, r12, #0 \n\t" \ + "adds r9, r9, r11 \n\t" \ + "umull r11, r12, %4, r7 \n\t" \ + "adcs r11, r11, %2 \n\t" \ + "adc %2, r12, #0 \n\t" \ + "adds r10, r10, r11 \n\t" \ + "stmia %1!, { r8, r9, r10 } \n\t" -#define MULADDC_1024_LOOP \ - asm( "tst %4, #0xfe0 \n" \ - "beq 0f \n" \ -"1: sub %4, %4, #32 \n" \ - "ldmia %0!, { r5, r6, r7 } \n" \ - "ldmia %1, { r8, r9, r10 } \n" \ - "umull r11, r12, %2, r5 \n" \ - "adds r11, r11, %3 \n" \ - "adc %3, r12, #0 \n" \ - "adds r8, r8, r11 \n" \ - "umull r11, r12, %2, r6 \n" \ - "adcs r11, r11, %3 \n" \ - "adc %3, r12, #0 \n" \ - "adds r9, r9, r11 \n" \ - "umull r11, r12, %2, r7 \n" \ - "adcs r11, r11, %3 \n" \ - "adc %3, r12, #0 \n" \ - "adds r10, r10, r11 \n" \ - "stmia %1!, { r8, r9, r10 } \n" \ - MULADDC_1024_CORE MULADDC_1024_CORE \ - MULADDC_1024_CORE MULADDC_1024_CORE \ - MULADDC_1024_CORE MULADDC_1024_CORE \ - MULADDC_1024_CORE MULADDC_1024_CORE \ - MULADDC_1024_CORE \ - "ldmia %0!, { r5, r6 } \n" \ - "ldmia %1, { r8, r9 } \n" \ - "umull r11, r12, %2, r5 \n" \ - "adcs r11, r11, %3 \n" \ - "adc %3, r12, #0 \n" \ - "adds r8, r8, r11 \n" \ - "umull r11, r12, %2, r6 \n" \ - "adcs r11, r11, %3 \n" \ - "adc %3, r12, #0 \n" \ - "adds r9, r9, r11 \n" \ - "stmia %1!, { r8, r9 } \n" \ - "adc %3, %3, #0 \n" \ - "tst %4, #0xfe0 \n" \ - "bne 1b \n" \ -"0:" \ - : "=r" (s), "=r" (d), "=r" (b), "=r" (c), "=r" (i) \ - : "0" (s), "1" (d), "2" (b), "3" (c), "4" (i) \ - : "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "memory", "cc" ); +#define MULADDC_1024_LOOP \ + asm( "tst %3, #0xfe0 \n\t" \ + "beq 0f \n" \ +"1: sub %3, %3, #32 \n\t" \ + "ldmia %0!, { r5, r6, r7 } \n\t" \ + "ldmia %1, { r8, r9, r10 } \n\t" \ + "umull r11, r12, %4, r5 \n\t" \ + "adds r11, r11, %2 \n\t" \ + "adc %2, r12, #0 \n\t" \ + "adds r8, r8, r11 \n\t" \ + "umull r11, r12, %4, r6 \n\t" \ + "adcs r11, r11, %2 \n\t" \ + "adc %2, r12, #0 \n\t" \ + "adds r9, r9, r11 \n\t" \ + "umull r11, r12, %4, r7 \n\t" \ + "adcs r11, r11, %2 \n\t" \ + "adc %2, r12, #0 \n\t" \ + "adds r10, r10, r11 \n\t" \ + "stmia %1!, { r8, r9, r10 } \n\t" \ + MULADDC_1024_CORE MULADDC_1024_CORE \ + MULADDC_1024_CORE MULADDC_1024_CORE \ + MULADDC_1024_CORE MULADDC_1024_CORE \ + MULADDC_1024_CORE MULADDC_1024_CORE \ + MULADDC_1024_CORE \ + "ldmia %0!, { r5, r6 } \n\t" \ + "ldmia %1, { r8, r9 } \n\t" \ + "umull r11, r12, %4, r5 \n\t" \ + "adcs r11, r11, %2 \n\t" \ + "adc %2, r12, #0 \n\t" \ + "adds r8, r8, r11 \n\t" \ + "umull r11, r12, %4, r6 \n\t" \ + "adcs r11, r11, %2 \n\t" \ + "adc %2, r12, #0 \n\t" \ + "adds r9, r9, r11 \n\t" \ + "stmia %1!, { r8, r9 } \n\t" \ + "adc %2, %2, #0 \n\t" \ + "tst %3, #0xfe0 \n\t" \ + "bne 1b \n" \ +"0:" \ + : "=r" (s), "=r" (d), "=r" (c), "=r" (i) \ + : "r" (b), "0" (s), "1" (d), "2" (c), "3" (i) \ + : "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "memory", "cc" ); /* Just for reference (dead code) */ -#define MULADDC_HUIT \ - "ldmia %0!, { r4, r5 } \n" \ - "ldmia %1, { r8, r9 } \n" \ - "umull r6, r7, %2, r4 \n" \ - "adcs r6, r6, %3 \n" \ - "adc %3, r7, #0 \n" \ - "adds r8, r8, r6 \n" \ - "umull r6, r7, %2, r5 \n" \ - "adcs r6, r6, %3 \n" \ - "adc %3, r7, #0 \n" \ - "adds r9, r9, r6 \n" \ - "stmia %1!, { r8, r9 } \n" \ - "ldmia %0!, { r4, r5 } \n" \ - "ldmia %1, { r8, r9 } \n" \ - "umull r6, r7, %2, r4 \n" \ - "adcs r6, r6, %3 \n" \ - "adc %3, r7, #0 \n" \ - "adds r8, r8, r6 \n" \ - "umull r6, r7, %2, r5 \n" \ - "adcs r6, r6, %3 \n" \ - "adc %3, r7, #0 \n" \ - "adds r9, r9, r6 \n" \ - "stmia %1!, { r8, r9 } \n" \ - "ldmia %0!, { r4, r5 } \n" \ - "ldmia %1, { r8, r9 } \n" \ - "umull r6, r7, %2, r4 \n" \ - "adcs r6, r6, %3 \n" \ - "adc %3, r7, #0 \n" \ - "adds r8, r8, r6 \n" \ - "umull r6, r7, %2, r5 \n" \ - "adcs r6, r6, %3 \n" \ - "adc %3, r7, #0 \n" \ - "adds r9, r9, r6 \n" \ - "stmia %1!, { r8, r9 } \n" \ - "ldmia %0!, { r4, r5 } \n" \ - "ldmia %1, { r8, r9 } \n" \ - "umull r6, r7, %2, r4 \n" \ - "adcs r6, r6, %3 \n" \ - "adc %3, r7, #0 \n" \ - "adds r8, r8, r6 \n" \ - "umull r6, r7, %2, r5 \n" \ - "adcs r6, r6, %3 \n" \ - "adc %3, r7, #0 \n" \ - "adds r9, r9, r6 \n" \ - "stmia %1!, { r8, r9 } \n" +#define MULADDC_HUIT \ + "ldmia %0!, { r4, r5 } \n\t" \ + "ldmia %1, { r8, r9 } \n\t" \ + "umull r6, r7, %3, r4 \n\t" \ + "adcs r6, r6, %2 \n\t" \ + "adc %2, r7, #0 \n\t" \ + "adds r8, r8, r6 \n\t" \ + "umull r6, r7, %3, r5 \n\t" \ + "adcs r6, r6, %2 \n\t" \ + "adc %2, r7, #0 \n\t" \ + "adds r9, r9, r6 \n\t" \ + "stmia %1!, { r8, r9 } \n\t" \ + "ldmia %0!, { r4, r5 } \n\t" \ + "ldmia %1, { r8, r9 } \n\t" \ + "umull r6, r7, %3, r4 \n\t" \ + "adcs r6, r6, %2 \n\t" \ + "adc %2, r7, #0 \n\t" \ + "adds r8, r8, r6 \n\t" \ + "umull r6, r7, %3, r5 \n\t" \ + "adcs r6, r6, %2 \n\t" \ + "adc %2, r7, #0 \n\t" \ + "adds r9, r9, r6 \n\t" \ + "stmia %1!, { r8, r9 } \n\t" \ + "ldmia %0!, { r4, r5 } \n\t" \ + "ldmia %1, { r8, r9 } \n\t" \ + "umull r6, r7, %3, r4 \n\t" \ + "adcs r6, r6, %2 \n\t" \ + "adc %2, r7, #0 \n\t" \ + "adds r8, r8, r6 \n\t" \ + "umull r6, r7, %3, r5 \n\t" \ + "adcs r6, r6, %2 \n\t" \ + "adc %2, r7, #0 \n\t" \ + "adds r9, r9, r6 \n\t" \ + "stmia %1!, { r8, r9 } \n\t" \ + "ldmia %0!, { r4, r5 } \n\t" \ + "ldmia %1, { r8, r9 } \n\t" \ + "umull r6, r7, %3, r4 \n\t" \ + "adcs r6, r6, %2 \n\t" \ + "adc %2, r7, #0 \n\t" \ + "adds r8, r8, r6 \n\t" \ + "umull r6, r7, %3, r5 \n\t" \ + "adcs r6, r6, %2 \n\t" \ + "adc %2, r7, #0 \n\t" \ + "adds r9, r9, r6 \n\t" \ + "stmia %1!, { r8, r9 } \n\t" -#define MULADDC_INIT \ - asm( "adds %0, #0 \n" +#define MULADDC_INIT \ + asm( "adds %0, #0 \n\t" -#define MULADDC_CORE \ - "ldr r5, [%1] \n" \ - "ldr r4, [%0], #4 \n" \ - "umull r6, r7, %2, r4 \n" \ - "adcs r6, r6, %3 \n" \ - "adc %3, r7, #0 \n" \ - "adds r5, r5, r6 \n" \ - "str r5, [%1], #4 \n" +#define MULADDC_CORE \ + "ldr r5, [%1] \n\t" \ + "ldr r4, [%0], #4 \n\t" \ + "umull r6, r7, %3, r4 \n\t" \ + "adcs r6, r6, %2 \n\t" \ + "adc %2, r7, #0 \n\t" \ + "adds r5, r5, r6 \n\t" \ + "str r5, [%1], #4 \n\t" #define MULADDC_STOP \ - "adc %3, %3, #0 " \ - : "=r" (s), "=r" (d), "=r" (b), "=r" (c) \ - : "0" (s), "1" (d), "2" (b), "3" (c) \ - : "r4", "r5", "r6", "r7", "r8", "r9", "memory", "cc" ); + "adc %2, %2, #0 " \ + : "=r" (s), "=r" (d), "=r" (c) \ + : "r" (b), "0" (s), "1" (d), "2" (c) \ + : "r4", "r5", "r6", "r7", "memory", "cc" ); #endif /* ARMv3 */ diff --git a/polarssl/library/bignum.c b/polarssl/library/bignum.c index 8c2cf9f..d68fa62 100644 --- a/polarssl/library/bignum.c +++ b/polarssl/library/bignum.c @@ -810,7 +810,7 @@ cleanup: /* * Helper for mpi substraction */ -static t_uint mpi_sub_hlp( size_t n, t_uint *s, t_uint *d ) +static t_uint mpi_sub_hlp( size_t n, const t_uint *s, t_uint *d ) { size_t i; t_uint c, z; @@ -981,7 +981,7 @@ static */ __attribute__ ((noinline)) #endif -t_uint mpi_mul_hlp( size_t i, t_uint *s, t_uint *d, t_uint b ) +t_uint mpi_mul_hlp( size_t i, const t_uint *s, t_uint *d, t_uint b ) { t_uint c = 0, t = 0;