From e0b70894db1cef912c67c6d0014eda965f25dd04 Mon Sep 17 00:00:00 2001 From: NIIBE Yutaka Date: Wed, 8 Jun 2011 15:18:27 +0900 Subject: [PATCH] more rsa tweak --- polarssl-0.14.0/include/polarssl/bn_mul.h | 92 +++++++++++++++++++---- polarssl-0.14.0/library/bignum.c | 11 ++- 2 files changed, 86 insertions(+), 17 deletions(-) diff --git a/polarssl-0.14.0/include/polarssl/bn_mul.h b/polarssl-0.14.0/include/polarssl/bn_mul.h index a8a1e67..e0fcbf4 100644 --- a/polarssl-0.14.0/include/polarssl/bn_mul.h +++ b/polarssl-0.14.0/include/polarssl/bn_mul.h @@ -495,50 +495,110 @@ #endif /* TriCore */ #if defined(__arm__) +#define MULADDC_1024_CORE \ + "ldmia %0!, { r5, r6, r7 } \n" \ + "ldmia %1, { r8, r9, r10 } \n" \ + "umull r11, r12, %2, r5 \n" \ + "adcs r11, r11, %3 \n" \ + "adc %3, r12, #0 \n" \ + "adds r8, r8, r11 \n" \ + "umull r11, r12, %2, r6 \n" \ + "adcs r11, r11, %3 \n" \ + "adc %3, r12, #0 \n" \ + "adds r9, r9, r11 \n" \ + "umull r11, r12, %2, r7 \n" \ + "adcs r11, r11, %3 \n" \ + "adc %3, r12, #0 \n" \ + "adds r10, r10, r11 \n" \ + "stmia %1!, { r8, r9, r10 } \n" +#define MULADDC_1024_LOOP \ + asm( "tst %4, #0xfe0 \n" \ + "beq 0f \n" \ +"1: sub %4, %4, #32 \n" \ + "ldmia %0!, { r5, r6, r7 } \n" \ + "ldmia %1, { r8, r9, r10 } \n" \ + "umull r11, r12, %2, r5 \n" \ + "adds r11, r11, %3 \n" \ + "adc %3, r12, #0 \n" \ + "adds r8, r8, r11 \n" \ + "umull r11, r12, %2, r6 \n" \ + "adcs r11, r11, %3 \n" \ + "adc %3, r12, #0 \n" \ + "adds r9, r9, r11 \n" \ + "umull r11, r12, %2, r7 \n" \ + "adcs r11, r11, %3 \n" \ + "adc %3, r12, #0 \n" \ + "adds r10, r10, r11 \n" \ + "stmia %1!, { r8, r9, r10 } \n" \ + MULADDC_1024_CORE MULADDC_1024_CORE \ + MULADDC_1024_CORE MULADDC_1024_CORE \ + MULADDC_1024_CORE MULADDC_1024_CORE \ + MULADDC_1024_CORE MULADDC_1024_CORE \ + MULADDC_1024_CORE \ + "ldmia %0!, { r5, r6 } \n" \ + "ldmia %1, { r8, r9 } \n" \ + "umull r11, r12, %2, r5 \n" \ + "adcs r11, r11, %3 \n" \ + "adc %3, r12, #0 \n" \ + "adds r8, r8, r11 \n" \ + "umull r11, r12, %2, r6 \n" \ + "adcs r11, r11, %3 \n" \ + "adc %3, r12, #0 \n" \ + "adds r9, r9, r11 \n" \ + "stmia %1!, { r8, r9 } \n" \ + "adc %3, %3, #0 \n" \ + "tst %4, #0xfe0 \n" \ + "bne 1b \n" \ +"0:" \ + : "=r" (s), "=r" (d), "=r" (b), "=r" (c), "=r" (i) \ + : "0" (s), "1" (d), "2" (b), "3" (c), "4" (i) \ + : "r5", "r6", "r7", "r8", "r9", "r10", "r11", "r12", "memory", "cc" ); + +/* Just for reference (dead code) */ #define MULADDC_HUIT \ "ldmia %0!, { r4, r5 } \n" \ "ldmia %1, { r8, r9 } \n" \ "umull r6, r7, %2, r4 \n" \ "adcs r6, r6, %3 \n" \ - "adc r7, r7, #0 \n" \ + "adc %3, r7, #0 \n" \ "adds r8, r8, r6 \n" \ - "umull r6, %3, %2, r5 \n" \ - "adcs r6, r6, r7 \n" \ - "adc %3, %3, #0 \n" \ + "umull r6, r7, %2, r5 \n" \ + "adcs r6, r6, %3 \n" \ + "adc %3, r7, #0 \n" \ "adds r9, r9, r6 \n" \ "stmia %1!, { r8, r9 } \n" \ "ldmia %0!, { r4, r5 } \n" \ "ldmia %1, { r8, r9 } \n" \ "umull r6, r7, %2, r4 \n" \ "adcs r6, r6, %3 \n" \ - "adc r7, r7, #0 \n" \ + "adc %3, r7, #0 \n" \ "adds r8, r8, r6 \n" \ - "umull r6, %3, %2, r5 \n" \ - "adcs r6, r6, r7 \n" \ - "adc %3, %3, #0 \n" \ + "umull r6, r7, %2, r5 \n" \ + "adcs r6, r6, %3 \n" \ + "adc %3, r7, #0 \n" \ "adds r9, r9, r6 \n" \ "stmia %1!, { r8, r9 } \n" \ "ldmia %0!, { r4, r5 } \n" \ "ldmia %1, { r8, r9 } \n" \ "umull r6, r7, %2, r4 \n" \ "adcs r6, r6, %3 \n" \ - "adc r7, r7, #0 \n" \ + "adc %3, r7, #0 \n" \ "adds r8, r8, r6 \n" \ - "umull r6, %3, %2, r5 \n" \ - "adcs r6, r6, r7 \n" \ - "adc %3, %3, #0 \n" \ + "umull r6, r7, %2, r5 \n" \ + "adcs r6, r6, %3 \n" \ + "adc %3, r7, #0 \n" \ "adds r9, r9, r6 \n" \ "stmia %1!, { r8, r9 } \n" \ "ldmia %0!, { r4, r5 } \n" \ "ldmia %1, { r8, r9 } \n" \ "umull r6, r7, %2, r4 \n" \ "adcs r6, r6, %3 \n" \ - "adc r7, r7, #0 \n" \ + "adc %3, r7, #0 \n" \ "adds r8, r8, r6 \n" \ - "umull r6, %3, %2, r5 \n" \ - "adcs r6, r6, r7 \n" \ - "adc %3, %3, #0 \n" \ + "umull r6, r7, %2, r5 \n" \ + "adcs r6, r6, %3 \n" \ + "adc %3, r7, #0 \n" \ "adds r9, r9, r6 \n" \ "stmia %1!, { r8, r9 } \n" diff --git a/polarssl-0.14.0/library/bignum.c b/polarssl-0.14.0/library/bignum.c index fadc760..cb1b7eb 100644 --- a/polarssl-0.14.0/library/bignum.c +++ b/polarssl-0.14.0/library/bignum.c @@ -894,7 +894,16 @@ static void mpi_mul_hlp( int i, t_int *s, t_int *d, t_int b ) { t_int c = 0, t = 0; -#if defined(MULADDC_HUIT) +#if defined(MULADDC_1024_LOOP) + MULADDC_1024_LOOP + + for( ; i > 0; i-- ) + { + MULADDC_INIT + MULADDC_CORE + MULADDC_STOP + } +#elif defined(MULADDC_HUIT) for( ; i >= 8; i -= 8 ) { MULADDC_INIT