more minor change for bignum
This commit is contained in:
@@ -496,64 +496,64 @@
|
|||||||
|
|
||||||
#if defined(__arm__)
|
#if defined(__arm__)
|
||||||
#define MULADDC_1024_CORE \
|
#define MULADDC_1024_CORE \
|
||||||
"ldmia %0!, { r8, r9, r12 } \n\t" \
|
"ldmia %[s]!, { r8, r9, r10 } \n\t" \
|
||||||
"ldmia %1, { r5, r6, r7 } \n\t" \
|
"ldmia %[d], { r5, r6, r7 } \n\t" \
|
||||||
"adcs r5, r5, %2 \n\t" \
|
"adcs r5, r5, %[c] \n\t" \
|
||||||
"umull r4, r8, r8, %4 \n\t" \
|
"umull r4, r8, r8, %[b] \n\t" \
|
||||||
"adc %2, r8, #0 \n\t" \
|
"adc %[c], r8, #0 \n\t" \
|
||||||
"adds r5, r5, r4 \n\t" \
|
"adds r5, r5, r4 \n\t" \
|
||||||
"adcs r6, r6, %2 \n\t" \
|
"adcs r6, r6, %[c] \n\t" \
|
||||||
"umull r4, r8, r9, %4 \n\t" \
|
"umull r4, r8, r9, %[b] \n\t" \
|
||||||
"adc %2, r8, #0 \n\t" \
|
"adc %[c], r8, #0 \n\t" \
|
||||||
"adds r6, r6, r4 \n\t" \
|
"adds r6, r6, r4 \n\t" \
|
||||||
"adcs r7, r7, %2 \n\t" \
|
"adcs r7, r7, %[c] \n\t" \
|
||||||
"umull r4, r8, r12, %4 \n\t" \
|
"umull r4, r8, r10, %[b] \n\t" \
|
||||||
"adc %2, r8, #0 \n\t" \
|
"adc %[c], r8, #0 \n\t" \
|
||||||
"adds r7, r7, r4 \n\t" \
|
"adds r7, r7, r4 \n\t" \
|
||||||
"stmia %1!, { r5, r6, r7 } \n\t"
|
"stmia %[d]!, { r5, r6, r7 } \n\t"
|
||||||
|
|
||||||
#define MULADDC_1024_LOOP \
|
#define MULADDC_1024_LOOP \
|
||||||
asm( "tst %3, #0xfe0 \n\t" \
|
asm( "tst %[i], #0xfe0 \n\t" \
|
||||||
"beq 0f \n" \
|
"beq 0f \n" \
|
||||||
"1: sub %3, %3, #32 \n\t" \
|
"1: ldmia %[s]!, { r8, r9, r10 } \n\t" \
|
||||||
"ldmia %0!, { r8, r9, r12 } \n\t" \
|
"ldmia %[d], { r5, r6, r7 } \n\t" \
|
||||||
"ldmia %1, { r5, r6, r7 } \n\t" \
|
"sub %[i], %[i], #32 \n\t" \
|
||||||
"adds r5, r5, %2 \n\t" \
|
"adds r5, r5, %[c] \n\t" \
|
||||||
"umull r4, r8, %4, r8 \n\t" \
|
"umull r4, r8, %[b], r8 \n\t" \
|
||||||
"adc %2, r8, #0 \n\t" \
|
"adc %[c], r8, #0 \n\t" \
|
||||||
"adds r5, r5, r4 \n\t" \
|
"adds r5, r5, r4 \n\t" \
|
||||||
"adcs r6, r6, %2 \n\t" \
|
"adcs r6, r6, %[c] \n\t" \
|
||||||
"umull r4, r8, %4, r9 \n\t" \
|
"umull r4, r8, %[b], r9 \n\t" \
|
||||||
"adc %2, r8, #0 \n\t" \
|
"adc %[c], r8, #0 \n\t" \
|
||||||
"adds r6, r6, r4 \n\t" \
|
"adds r6, r6, r4 \n\t" \
|
||||||
"adcs r7, r7, %2 \n\t" \
|
"adcs r7, r7, %[c] \n\t" \
|
||||||
"umull r4, r8, %4, r12 \n\t" \
|
"umull r4, r8, %[b], r10 \n\t" \
|
||||||
"adc %2, r8, #0 \n\t" \
|
"adc %[c], r8, #0 \n\t" \
|
||||||
"adds r7, r7, r4 \n\t" \
|
"adds r7, r7, r4 \n\t" \
|
||||||
"stmia %1!, { r5, r6, r7 } \n\t" \
|
"stmia %[d]!, { r5, r6, r7 } \n\t" \
|
||||||
MULADDC_1024_CORE MULADDC_1024_CORE \
|
MULADDC_1024_CORE MULADDC_1024_CORE \
|
||||||
MULADDC_1024_CORE MULADDC_1024_CORE \
|
MULADDC_1024_CORE MULADDC_1024_CORE \
|
||||||
MULADDC_1024_CORE MULADDC_1024_CORE \
|
MULADDC_1024_CORE MULADDC_1024_CORE \
|
||||||
MULADDC_1024_CORE MULADDC_1024_CORE \
|
MULADDC_1024_CORE MULADDC_1024_CORE \
|
||||||
MULADDC_1024_CORE \
|
MULADDC_1024_CORE \
|
||||||
"ldmia %0!, { r8, r9 } \n\t" \
|
"ldmia %[s]!, { r8, r9 } \n\t" \
|
||||||
"ldmia %1, { r5, r6 } \n\t" \
|
"ldmia %[d], { r5, r6 } \n\t" \
|
||||||
"adcs r5, r5, %2 \n\t" \
|
"adcs r5, r5, %[c] \n\t" \
|
||||||
"umull r4, r8, %4, r8 \n\t" \
|
"umull r4, r8, %[b], r8 \n\t" \
|
||||||
"adc %2, r8, #0 \n\t" \
|
"adc %[c], r8, #0 \n\t" \
|
||||||
"adds r5, r5, r4 \n\t" \
|
"adds r5, r5, r4 \n\t" \
|
||||||
"adcs r6, r6, %2 \n\t" \
|
"adcs r6, r6, %[c] \n\t" \
|
||||||
"umull r4, r8, %4, r9 \n\t" \
|
"umull r4, r8, %[b], r9 \n\t" \
|
||||||
"adc %2, r8, #0 \n\t" \
|
"adc %[c], r8, #0 \n\t" \
|
||||||
"adds r6, r6, r4 \n\t" \
|
"adds r6, r6, r4 \n\t" \
|
||||||
"stmia %1!, { r5, r6 } \n\t" \
|
"adc %[c], %[c], #0 \n\t" \
|
||||||
"adc %2, %2, #0 \n\t" \
|
"stmia %[d]!, { r5, r6 } \n\t" \
|
||||||
"tst %3, #0xfe0 \n\t" \
|
"tst %[i], #0xfe0 \n\t" \
|
||||||
"bne 1b \n" \
|
"bne 1b \n" \
|
||||||
"0:" \
|
"0:" \
|
||||||
: "=r" (s), "=r" (d), "=r" (c), "=r" (i) \
|
: [s] "=r" (s), [d] "=r" (d), [c] "=r" (c), [i] "=r" (i) \
|
||||||
: "r" (b), "0" (s), "1" (d), "2" (c), "3" (i) \
|
: [b] "r" (b), "[s]" (s), "[d]" (d), "[c]" (c), "[i]" (i) \
|
||||||
: "r4", "r5", "r6", "r7", "r8", "r9", "r12", "memory", "cc" );
|
: "r4", "r5", "r6", "r7", "r8", "r9", "r10", "memory", "cc" );
|
||||||
|
|
||||||
/* Just for reference (dead code) */
|
/* Just for reference (dead code) */
|
||||||
#define MULADDC_HUIT \
|
#define MULADDC_HUIT \
|
||||||
|
|||||||
@@ -1465,39 +1465,38 @@ static void mpi_montsqr( size_t n, const t_uint *np, t_uint mm, t_uint *d )
|
|||||||
x_i = *xj;
|
x_i = *xj;
|
||||||
*xj++ = c;
|
*xj++ = c;
|
||||||
asm (/* (C,R4,R5) := w_i_i + x_i*x_i; w_i_i := R5; */
|
asm (/* (C,R4,R5) := w_i_i + x_i*x_i; w_i_i := R5; */
|
||||||
"ldr r5, [%[wij]]\n\t" /* R5 := w_i_i; */
|
|
||||||
"mov %[c], #0\n\t"
|
"mov %[c], #0\n\t"
|
||||||
"umull r7, r12, %[x_i], %[x_i]\n\t"
|
"ldr r5, [%[wij]]\n\t" /* R5 := w_i_i; */
|
||||||
"adds r5, r5, r7\n\t"
|
"mov r4, %[c]\n\t"
|
||||||
"adc r4, r12, #0\n\t"
|
"umlal r5, r4, %[x_i], %[x_i]\n\t"
|
||||||
"str r5, [%[wij]], #4\n\t"
|
"str r5, [%[wij]], #4\n\t"
|
||||||
"cmp %[xj], %[x_max1]\n\t"
|
"cmp %[xj], %[x_max1]\n\t"
|
||||||
"bhi 0f\n\t"
|
"bhi 0f\n\t"
|
||||||
"mov r8, %[c]\n\t" /* R8 := 0, the constant ZERO from here. */
|
"mov r9, %[c]\n\t" /* R9 := 0, the constant ZERO from here. */
|
||||||
"beq 1f\n"
|
"beq 1f\n"
|
||||||
"2:\n\t"
|
"2:\n\t"
|
||||||
"ldmia %[xj]!, { r7, r9 }\n\t"
|
"ldmia %[xj]!, { r7, r8 }\n\t"
|
||||||
"ldmia %[wij], { r5, r6 }\n\t"
|
"ldmia %[wij], { r5, r6 }\n\t"
|
||||||
/* (C,R4,R5) := (C,R4) + w_i_j + 2*x_i*x_j; */
|
/* (C,R4,R5) := (C,R4) + w_i_j + 2*x_i*x_j; */
|
||||||
"umull r7, r12, %[x_i], r7\n\t"
|
"umull r7, r12, %[x_i], r7\n\t"
|
||||||
"adds r5, r5, r4\n\t"
|
"adds r5, r5, r4\n\t"
|
||||||
"adc r4, %[c], r8\n\t"
|
"adc r4, %[c], r9\n\t"
|
||||||
"adds r5, r5, r7\n\t"
|
"adds r5, r5, r7\n\t"
|
||||||
"adcs r4, r4, r12\n\t"
|
"adcs r4, r4, r12\n\t"
|
||||||
"adc %[c], r8, r8\n\t"
|
"adc %[c], r9, r9\n\t"
|
||||||
"adds r5, r5, r7\n\t"
|
"adds r5, r5, r7\n\t"
|
||||||
"adcs r4, r4, r12\n\t"
|
"adcs r4, r4, r12\n\t"
|
||||||
"adc %[c], %[c], r8\n\t"
|
"adc %[c], %[c], r9\n\t"
|
||||||
/* (C,R4,R6) := (C,R4) + w_i_j + 2*x_i*x_j; */
|
/* (C,R4,R6) := (C,R4) + w_i_j + 2*x_i*x_j; */
|
||||||
"adds r6, r6, r4\n\t"
|
"adds r6, r6, r4\n\t"
|
||||||
"adc r4, %[c], r8\n\t"
|
"adc r4, %[c], r9\n\t"
|
||||||
"umull r7, r12, %[x_i], r9\n\t"
|
"umull r7, r12, %[x_i], r8\n\t"
|
||||||
"adds r6, r6, r7\n\t"
|
"adds r6, r6, r7\n\t"
|
||||||
"adcs r4, r4, r12\n\t"
|
"adcs r4, r4, r12\n\t"
|
||||||
"adc %[c], r8, r8\n\t"
|
"adc %[c], r9, r9\n\t"
|
||||||
"adds r6, r6, r7\n\t"
|
"adds r6, r6, r7\n\t"
|
||||||
"adcs r4, r4, r12\n\t"
|
"adcs r4, r4, r12\n\t"
|
||||||
"adc %[c], %[c], r8\n\t"
|
"adc %[c], %[c], r9\n\t"
|
||||||
/**/
|
/**/
|
||||||
"stmia %[wij]!, { r5, r6 }\n\t"
|
"stmia %[wij]!, { r5, r6 }\n\t"
|
||||||
"cmp %[xj], %[x_max1]\n\t"
|
"cmp %[xj], %[x_max1]\n\t"
|
||||||
@@ -1508,14 +1507,14 @@ static void mpi_montsqr( size_t n, const t_uint *np, t_uint mm, t_uint *d )
|
|||||||
"ldr r5, [%[wij]]\n\t"
|
"ldr r5, [%[wij]]\n\t"
|
||||||
"ldr r6, [%[xj]], #4\n\t"
|
"ldr r6, [%[xj]], #4\n\t"
|
||||||
"adds r5, r5, r4\n\t"
|
"adds r5, r5, r4\n\t"
|
||||||
"adc r4, %[c], r8\n\t"
|
"adc r4, %[c], r9\n\t"
|
||||||
"umull r7, r12, %[x_i], r6\n\t"
|
"umull r7, r12, %[x_i], r6\n\t"
|
||||||
"adds r5, r5, r7\n\t"
|
"adds r5, r5, r7\n\t"
|
||||||
"adcs r4, r4, r12\n\t"
|
"adcs r4, r4, r12\n\t"
|
||||||
"adc %[c], r8, r8\n\t"
|
"adc %[c], r9, r9\n\t"
|
||||||
"adds r5, r5, r7\n\t"
|
"adds r5, r5, r7\n\t"
|
||||||
"adcs r4, r4, r12\n\t"
|
"adcs r4, r4, r12\n\t"
|
||||||
"adc %[c], %[c], r8\n\t"
|
"adc %[c], %[c], r9\n\t"
|
||||||
"str r5, [%[wij]], #4\n"
|
"str r5, [%[wij]], #4\n"
|
||||||
"0:\n\t"
|
"0:\n\t"
|
||||||
"ldr r5, [%[wij]]\n\t"
|
"ldr r5, [%[wij]]\n\t"
|
||||||
|
|||||||
Reference in New Issue
Block a user