This commit is contained in:
NIIBE Yutaka
2014-04-17 13:41:55 +09:00
parent 62a59b56fe
commit e25f50eabc
7 changed files with 78 additions and 109 deletions

View File

@@ -1,10 +1,15 @@
2014-04-17 Niibe Yutaka <gniibe@fsij.org> 2014-04-17 Niibe Yutaka <gniibe@fsij.org>
* src/mod25638.c (mod25638_add, mod25638_sub): Simplify. * src/muladd_256.h (MULADD_256_ASM): New.
(n25638): Remove. (MULADD_256): Use MULADD_256_ASM.
(mod25638_neg): New. * src/ecc-mont.c (mod25638_mul_121665): Add asm implementation.
* src/ecc-edwards.c (point_double): Use mod25638_neg. * src/bn.c (bn256_add_uint, bn256_sub_uint): Simplify.
* src/mod25638.c (mod25638_add, mod25638_sub): Simplify.
(n25638, add19): Remove.
(ADDWORD_256): Add s_ as source pointer.
(mod25519_reduce): Remove a call to memcpy. Use bn256_add_uint.
* src/ecc-edwards.c (point_double): Simplify.
2014-04-16 Niibe Yutaka <gniibe@fsij.org> 2014-04-16 Niibe Yutaka <gniibe@fsij.org>

View File

@@ -89,7 +89,7 @@ uint32_t
bn256_add_uint (bn256 *X, const bn256 *A, uint32_t w) bn256_add_uint (bn256 *X, const bn256 *A, uint32_t w)
{ {
int i; int i;
uint32_t carry = 0; uint32_t carry = w;
uint32_t *px; uint32_t *px;
const uint32_t *pa; const uint32_t *pa;
@@ -100,12 +100,6 @@ bn256_add_uint (bn256 *X, const bn256 *A, uint32_t w)
{ {
*px = *pa + carry; *px = *pa + carry;
carry = (*px < carry); carry = (*px < carry);
if (i == 0)
{
*px += w;
carry += (*px < w);
}
px++; px++;
pa++; pa++;
} }
@@ -117,7 +111,7 @@ uint32_t
bn256_sub_uint (bn256 *X, const bn256 *A, uint32_t w) bn256_sub_uint (bn256 *X, const bn256 *A, uint32_t w)
{ {
int i; int i;
uint32_t borrow = 0; uint32_t borrow = w;
uint32_t *px; uint32_t *px;
const uint32_t *pa; const uint32_t *pa;
@@ -129,14 +123,7 @@ bn256_sub_uint (bn256 *X, const bn256 *A, uint32_t w)
uint32_t borrow0 = (*pa < borrow); uint32_t borrow0 = (*pa < borrow);
*px = *pa - borrow; *px = *pa - borrow;
if (i == 0) borrow = borrow0;
{
borrow = (*px < w) + borrow0;
*px -= w;
}
else
borrow = borrow0;
px++; px++;
pa++; pa++;
} }

View File

@@ -140,28 +140,27 @@ point_double (ptc *X, const ptc *A)
mod25638_sqr (d, A->y); mod25638_sqr (d, A->y);
/* E = aC; where a = -1 */ /* E = aC; where a = -1 */
/* Compute: E - D = -(C+D) : Y3_tmp */ /* Compute: D - E = D + C : Y3_tmp */
mod25638_add (X->y, e, d); mod25638_add (X->y, e, d);
mod25638_neg (X->y, X->y);
/* Compute: F = E + D = D - C; where a = -1 : E */ /* Compute: -F = -(E + D) = C - D; where a = -1 : E */
mod25638_sub (e, d, e); mod25638_sub (e, e, d);
/* Compute: H = Z1^2 : D */ /* Compute: H = Z1^2 : D */
mod25638_sqr (d, A->z); mod25638_sqr (d, A->z);
/* Compute: J = F - 2*H : D */ /* Compute: -J = 2*H - F : D */
mod25638_add (d, d, d); mod25638_add (d, d, d);
mod25638_sub (d, e, d); mod25638_add (d, d, e);
/* Compute: X3 = (B-C-D)*J = (B+Y3_tmp)*J */ /* Compute: X3 = (B-C-D)*J = -J*(C+D-B) = -J*(Y3_tmp-B) */
mod25638_add (X->x, b, X->y); mod25638_sub (X->x, X->y, b);
mod25638_mul (X->x, X->x, d); mod25638_mul (X->x, X->x, d);
/* Compute: Y3 = F*(E-D) = F*Y3_tmp */ /* Compute: Y3 = -F*(D-E) = -F*Y3_tmp */
mod25638_mul (X->y, X->y, e); mod25638_mul (X->y, X->y, e);
/* Z3 = F*J */ /* Z3 = -F*-J */
mod25638_mul (X->z, e, d); mod25638_mul (X->z, e, d);
} }

View File

@@ -58,6 +58,9 @@
* *
*/ */
#ifndef BN256_C_IMPLEMENTATION
#define ASM_IMPLEMENTATION 1
#endif
/* /*
* *
* 121665 = 0x1db41 * 121665 = 0x1db41
@@ -66,22 +69,31 @@
static void static void
mod25638_mul_121665 (bn256 *x, const bn256 *a) mod25638_mul_121665 (bn256 *x, const bn256 *a)
{ {
#if ASM_IMPLEMENTATION
#include "muladd_256.h"
const uint32_t *s;
uint32_t *d;
uint32_t w;
uint32_t c; uint32_t c;
s = a->word;
d = x->word;
w = 121665;
MULADD_256_ASM (s, d, w, c);
#else
uint32_t c, c1;
bn256 m[1]; bn256 m[1];
c = 0; c = c1 = bn256_shift (m, a, 6); c += bn256_add (x, a, m);
memcpy (x, a, sizeof (bn256)); /* X = A */ c1 <<= 2; c1 |= bn256_shift (m, m, 2); c = c + c1 + bn256_add (x, x, m);
c += bn256_shift (m, a, 6); c += bn256_add (x, x, m); /* X += A << 6 */ c1 <<= 1; c1 |= bn256_shift (m, m, 1); c = c + c1 + bn256_add (x, x, m);
c += bn256_shift (m, a, 8); c += bn256_add (x, x, m); /* X += A << 8 */ c1 <<= 2; c1 |= bn256_shift (m, m, 2); c = c + c1 + bn256_add (x, x, m);
c += bn256_shift (m, a, 9); c += bn256_add (x, x, m); /* X += A << 9 */ c1 <<= 1; c1 |= bn256_shift (m, m, 1); c = c + c1 + bn256_add (x, x, m);
c += bn256_shift (m, a, 11); c += bn256_add (x, x, m); /* X += A << 11 */ c1 <<= 2; c1 |= bn256_shift (m, m, 2); c = c + c1 + bn256_add (x, x, m);
c += bn256_shift (m, a, 12); c += bn256_add (x, x, m); /* X += A << 12 */ c1 <<= 1; c1 |= bn256_shift (m, m, 1); c = c + c1 + bn256_add (x, x, m);
c += bn256_shift (m, a, 14); c += bn256_add (x, x, m); /* X += A << 14 */ c1 <<= 1; c1 |= bn256_shift (m, m, 1); c = c + c1 + bn256_add (x, x, m);
c += bn256_shift (m, a, 15); c += bn256_add (x, x, m); /* X += A << 15 */ #endif
c += bn256_shift (m, a, 16); c += bn256_add (x, x, m); /* X += A << 16 */ c = bn256_add_uint (x, x, c*38);
c *= 38;
c = bn256_add_uint (x, x, c);
x->word[0] += c * 38; x->word[0] += c * 38;
} }

View File

@@ -44,23 +44,23 @@
#if ASM_IMPLEMENTATION #if ASM_IMPLEMENTATION
#include "muladd_256.h" #include "muladd_256.h"
#define ADDWORD_256(d_,w_,c_) \ #define ADDWORD_256(d_,s_,w_,c_) \
asm ( "ldmia %[d], { r4, r5, r6, r7 } \n\t" \ asm ( "ldmia %[s]!, { r4, r5, r6, r7 } \n\t" \
"adds r4, r4, %[w] \n\t" \ "adds r4, r4, %[w] \n\t" \
"adcs r5, r5, #0 \n\t" \ "adcs r5, r5, #0 \n\t" \
"adcs r6, r6, #0 \n\t" \ "adcs r6, r6, #0 \n\t" \
"adcs r7, r7, #0 \n\t" \ "adcs r7, r7, #0 \n\t" \
"stmia %[d]!, { r4, r5, r6, r7 }\n\t" \ "stmia %[d]!, { r4, r5, r6, r7 }\n\t" \
"ldmia %[d], { r4, r5, r6, r7 } \n\t" \ "ldmia %[s]!, { r4, r5, r6, r7 } \n\t" \
"adcs r4, r4, #0 \n\t" \ "adcs r4, r4, #0 \n\t" \
"adcs r5, r5, #0 \n\t" \ "adcs r5, r5, #0 \n\t" \
"adcs r6, r6, #0 \n\t" \ "adcs r6, r6, #0 \n\t" \
"adcs r7, r7, #0 \n\t" \ "adcs r7, r7, #0 \n\t" \
"stmia %[d]!, { r4, r5, r6, r7 }\n\t" \ "stmia %[d]!, { r4, r5, r6, r7 }\n\t" \
"mov %[c], #0 \n\t" \ "mov %[c], #0 \n\t" \
"adc %[c], %[c], #0" \ "adc %[c], %[c], #0" \
: [d] "=&r" (d_), [c] "=&r" (c_) \ : [s] "=&r" (s_), [d] "=&r" (d_), [c] "=&r" (c_) \
: "[d]" (d_), [w] "r" (w_) \ : "[s]" (s_), "[d]" (d_), [w] "r" (w_) \
: "r4", "r5", "r6", "r7", "memory", "cc" ) : "r4", "r5", "r6", "r7", "memory", "cc" )
#endif #endif
@@ -89,26 +89,6 @@ const bn256 p25519[1] = {
* 256-bit. * 256-bit.
*/ */
/**
* @brief X = -A mod 2^256-38
*/
void
mod25638_neg (bn256 *X, const bn256 *A)
{
int i;
uint32_t borrow;
uint32_t *px;
const uint32_t *pa;
px = X->word;
pa = A->word;
for (i = 0; i < BN256_WORDS; i++)
*px++ = ~*pa++;
borrow = bn256_sub_uint (X, X, 37);
X->word[0] -= borrow * 38;
}
/** /**
* @brief X = (A + B) mod 2^256-38 * @brief X = (A + B) mod 2^256-38
@@ -158,10 +138,10 @@ mod25638_reduce (bn256 *X, bn512 *A)
s = &A->word[8]; d = &A->word[0]; w = 38; MULADD_256 (s, d, w, c); s = &A->word[8]; d = &A->word[0]; w = 38; MULADD_256 (s, d, w, c);
c0 = A->word[8] * 38; c0 = A->word[8] * 38;
d = &X->word[0];
s = &A->word[0]; s = &A->word[0];
ADDWORD_256 (s, c0, c); ADDWORD_256 (d, s, c0, c);
A->word[0] += c * 38; X->word[0] += c * 38;
memcpy (X, A, sizeof (bn256));
#else #else
s = &A->word[8]; d = &A->word[0]; w = 38; s = &A->word[8]; d = &A->word[0]; w = 38;
{ {
@@ -184,13 +164,10 @@ mod25638_reduce (bn256 *X, bn512 *A)
d[i] = (uint32_t)r; d[i] = (uint32_t)r;
r = ((r >> 32) | ((uint64_t)carry << 32)); r = ((r >> 32) | ((uint64_t)carry << 32));
} }
d[i] = (uint32_t)r;
carry = bn256_add_uint ((bn256 *)A, (bn256 *)A, A->word[8] * 38); carry = bn256_add_uint (X, (bn256 *)A, r * 38);
A->word[0] += carry * 38; X->word[0] += carry * 38;
} }
memcpy (X, A, sizeof (bn256));
#endif #endif
} }
@@ -244,19 +221,6 @@ mod25638_shift (bn256 *X, const bn256 *A, int shift)
mod25638_add (X, X, tmp); mod25638_add (X, X, tmp);
} }
static void
add19 (bn256 *r, bn256 *x)
{
uint32_t v;
int i;
v = 19;
for (i = 0; i < BN256_WORDS; i++)
{
r->word[i] = x->word[i] + v;
v = (r->word[i] < v);
}
}
/* /*
* @brief X = A mod 2^255-19 * @brief X = A mod 2^255-19
@@ -275,12 +239,12 @@ mod25519_reduce (bn256 *X)
r0->word[7] &= 0x7fffffff; r0->word[7] &= 0x7fffffff;
if (q) if (q)
{ {
add19 (r0, r0); bn256_add_uint (r0, r0, 19);
q = (r0->word[7] >> 31); q = (r0->word[7] >> 31);
r0->word[7] &= 0x7fffffff; r0->word[7] &= 0x7fffffff;
if (q) if (q)
{ {
add19 (r1, r0); bn256_add_uint (r1, r0, 19);
q = (r1->word[7] >> 31); q = (r1->word[7] >> 31);
r1->word[7] &= 0x7fffffff; r1->word[7] &= 0x7fffffff;
flag = 0; flag = 0;
@@ -290,7 +254,7 @@ mod25519_reduce (bn256 *X)
} }
else else
{ {
add19 (r1, r0); /* dummy */ bn256_add_uint (r1, r0, 19);
q = (r1->word[7] >> 31); /* dummy */ q = (r1->word[7] >> 31); /* dummy */
r1->word[7] &= 0x7fffffff; /* dummy */ r1->word[7] &= 0x7fffffff; /* dummy */
if (q) if (q)
@@ -301,7 +265,7 @@ mod25519_reduce (bn256 *X)
if (flag) if (flag)
{ {
add19 (r1, r0); bn256_add_uint (r1, r0, 19);
q = (r1->word[7] >> 31); q = (r1->word[7] >> 31);
r1->word[7] &= 0x7fffffff; r1->word[7] &= 0x7fffffff;
if (q) if (q)

View File

@@ -1,6 +1,5 @@
extern const bn256 p25519[1]; extern const bn256 p25519[1];
void mod25638_neg (bn256 *X, const bn256 *A);
void mod25638_add (bn256 *X, const bn256 *A, const bn256 *B); void mod25638_add (bn256 *X, const bn256 *A, const bn256 *B);
void mod25638_sub (bn256 *X, const bn256 *A, const bn256 *B); void mod25638_sub (bn256 *X, const bn256 *A, const bn256 *B);
void mod25638_mul (bn256 *X, const bn256 *A, const bn256 *B); void mod25638_mul (bn256 *X, const bn256 *A, const bn256 *B);

View File

@@ -1,4 +1,4 @@
#define MULADD_256(s_,d_,w_,c_) do { \ #define MULADD_256_ASM(s_,d_,w_,c_) \
asm ( "ldmia %[s]!, { r8, r9, r10 } \n\t" \ asm ( "ldmia %[s]!, { r8, r9, r10 } \n\t" \
"ldmia %[d], { r5, r6, r7 } \n\t" \ "ldmia %[d], { r5, r6, r7 } \n\t" \
"umull r4, r8, %[w], r8 \n\t" \ "umull r4, r8, %[w], r8 \n\t" \
@@ -42,6 +42,9 @@
: [s] "=&r" (s_), [d] "=&r" (d_), [c] "=&r" (c_) \ : [s] "=&r" (s_), [d] "=&r" (d_), [c] "=&r" (c_) \
: "[s]" (s_), "[d]" (d_), [w] "r" (w_) \ : "[s]" (s_), "[d]" (d_), [w] "r" (w_) \
: "r4", "r5", "r6", "r7", "r8", "r9", "r10", \ : "r4", "r5", "r6", "r7", "r8", "r9", "r10", \
"memory", "cc" ); \ "memory", "cc" )
*d_ = c_; \
#define MULADD_256(s__,d__,w__,c__) do { \
MULADD_256_ASM(s__,d__,w__,c__); \
*d__ = c__; \
} while (0) } while (0)