Reorganize curve25519.c slightly. Adding 51-bit limbs will require two implementations of most of the field operations. Group them together to make this more manageable. Also move the representation-independent functions to the end. Change-Id: I264e8ac64318a1d5fa72e6ad6f7ccf2f0a2c2be9 Reviewed-on: https://boringssl-review.googlesource.com/24804 Commit-Queue: David Benjamin <davidben@google.com> CQ-Verified: CQ bot account: commit-bot@chromium.org <commit-bot@chromium.org> Reviewed-by: Adam Langley <agl@google.com>
diff --git a/third_party/fiat/curve25519.c b/third_party/fiat/curve25519.c index be887bd..26b56d0 100644 --- a/third_party/fiat/curve25519.c +++ b/third_party/fiat/curve25519.c
@@ -41,6 +41,12 @@ #include "../../crypto/internal.h" +// Various pre-computed constants. +#include "./curve25519_tables.h" + + +// Low-level intrinsic operations (hand-written). + static uint64_t load_3(const uint8_t *in) { uint64_t result; result = (uint64_t)in[0]; @@ -58,6 +64,50 @@ return result; } +static uint8_t /*bool*/ addcarryx_u25(uint8_t /*bool*/ c, uint32_t a, + uint32_t b, uint32_t *low) { + // This function extracts 25 bits of result and 1 bit of carry (26 total), so + // a 32-bit intermediate is sufficient. + uint32_t x = a + b + c; + *low = x & ((1 << 25) - 1); + return (x >> 25) & 1; +} + +static uint8_t /*bool*/ addcarryx_u26(uint8_t /*bool*/ c, uint32_t a, + uint32_t b, uint32_t *low) { + // This function extracts 26 bits of result and 1 bit of carry (27 total), so + // a 32-bit intermediate is sufficient. + uint32_t x = a + b + c; + *low = x & ((1 << 26) - 1); + return (x >> 26) & 1; +} + +static uint8_t /*bool*/ subborrow_u25(uint8_t /*bool*/ c, uint32_t a, + uint32_t b, uint32_t *low) { + // This function extracts 25 bits of result and 1 bit of borrow (26 total), so + // a 32-bit intermediate is sufficient. + uint32_t x = a - b - c; + *low = x & ((1 << 25) - 1); + return x >> 31; +} + +static uint8_t /*bool*/ subborrow_u26(uint8_t /*bool*/ c, uint32_t a, + uint32_t b, uint32_t *low) { + // This function extracts 26 bits of result and 1 bit of borrow (27 total), so + // a 32-bit intermediate is sufficient. + uint32_t x = a - b - c; + *low = x & ((1 << 26) - 1); + return x >> 31; +} + +static uint32_t cmovznz32(uint32_t t, uint32_t z, uint32_t nz) { + t = -!!t; // all set if nonzero, 0 if 0 + return (t&nz) | ((~t)&z); +} + + +// Field operations. + #define assert_fe(f) do { \ for (unsigned _assert_fe_i = 0; _assert_fe_i< 10; _assert_fe_i++) { \ assert(f[_assert_fe_i] < 1.125*(1<<(26-(_assert_fe_i&1)))); \ @@ -103,47 +153,6 @@ fe_frombytes_impl(h->v, s); } -static uint8_t /*bool*/ addcarryx_u25(uint8_t /*bool*/ c, uint32_t a, - uint32_t b, uint32_t *low) { - // This function extracts 25 bits of result and 1 bit of carry (26 total), so - // a 32-bit intermediate is sufficient. - uint32_t x = a + b + c; - *low = x & ((1 << 25) - 1); - return (x >> 25) & 1; -} - -static uint8_t /*bool*/ addcarryx_u26(uint8_t /*bool*/ c, uint32_t a, - uint32_t b, uint32_t *low) { - // This function extracts 26 bits of result and 1 bit of carry (27 total), so - // a 32-bit intermediate is sufficient. - uint32_t x = a + b + c; - *low = x & ((1 << 26) - 1); - return (x >> 26) & 1; -} - -static uint8_t /*bool*/ subborrow_u25(uint8_t /*bool*/ c, uint32_t a, - uint32_t b, uint32_t *low) { - // This function extracts 25 bits of result and 1 bit of borrow (26 total), so - // a 32-bit intermediate is sufficient. - uint32_t x = a - b - c; - *low = x & ((1 << 25) - 1); - return x >> 31; -} - -static uint8_t /*bool*/ subborrow_u26(uint8_t /*bool*/ c, uint32_t a, - uint32_t b, uint32_t *low) { - // This function extracts 26 bits of result and 1 bit of borrow (27 total), so - // a 32-bit intermediate is sufficient. - uint32_t x = a - b - c; - *low = x & ((1 << 26) - 1); - return x >> 31; -} - -static uint32_t cmovznz32(uint32_t t, uint32_t z, uint32_t nz) { - t = -!!t; // all set if nonzero, 0 if 0 - return (t&nz) | ((~t)&z); -} - static void fe_freeze(uint32_t out[10], const uint32_t in1[10]) { { const uint32_t x17 = in1[9]; { const uint32_t x18 = in1[8]; @@ -681,6 +690,204 @@ fe_sqr_impl(h->v, f->v); } +#if !defined(BORINGSSL_X25519_X86_64) + +// Replace (f,g) with (g,f) if b == 1; +// replace (f,g) with (f,g) if b == 0. +// +// Preconditions: b in {0,1}. +static void fe_cswap(fe *f, fe *g, unsigned int b) { + b = 0-b; + unsigned i; + for (i = 0; i < 10; i++) { + uint32_t x = f->v[i] ^ g->v[i]; + x &= b; + f->v[i] ^= x; + g->v[i] ^= x; + } +} + +// NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.. +static void fe_mul_121666_impl(uint32_t out[10], const uint32_t in1[10]) { + assert_fe_loose(in1); + { const uint32_t x20 = in1[9]; + { const uint32_t x21 = in1[8]; + { const uint32_t x19 = in1[7]; + { const uint32_t x17 = in1[6]; + { const uint32_t x15 = in1[5]; + { const uint32_t x13 = in1[4]; + { const uint32_t x11 = in1[3]; + { const uint32_t x9 = in1[2]; + { const uint32_t x7 = in1[1]; + { const uint32_t x5 = in1[0]; + { const uint32_t x38 = 0; + { const uint32_t x39 = 0; + { const uint32_t x37 = 0; + { const uint32_t x35 = 0; + { const uint32_t x33 = 0; + { const uint32_t x31 = 0; + { const uint32_t x29 = 0; + { const uint32_t x27 = 0; + { const uint32_t x25 = 0; + { const uint32_t x23 = 121666; + { uint64_t x40 = ((uint64_t)x23 * x5); + { uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5)); + { uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5)); + { uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5)); + { uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5)); + { uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5)); + { uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5)); + { uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5)); + { uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5)); + { uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5)); + { uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9)); + { uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9)); + { uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13)); + { uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13)); + { uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17)); + { uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17)); + { uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19)))); + { uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21)); + { uint64_t x58 = ((uint64_t)(0x2 * x38) * x20); + { uint64_t x59 = (x48 + (x58 << 0x4)); + { uint64_t x60 = (x59 + (x58 << 0x1)); + { uint64_t x61 = (x60 + x58); + { uint64_t x62 = (x47 + (x57 << 0x4)); + { uint64_t x63 = (x62 + (x57 << 0x1)); + { uint64_t x64 = (x63 + x57); + { uint64_t x65 = (x46 + (x56 << 0x4)); + { uint64_t x66 = (x65 + (x56 << 0x1)); + { uint64_t x67 = (x66 + x56); + { uint64_t x68 = (x45 + (x55 << 0x4)); + { uint64_t x69 = (x68 + (x55 << 0x1)); + { uint64_t x70 = (x69 + x55); + { uint64_t x71 = (x44 + (x54 << 0x4)); + { uint64_t x72 = (x71 + (x54 << 0x1)); + { uint64_t x73 = (x72 + x54); + { uint64_t x74 = (x43 + (x53 << 0x4)); + { uint64_t x75 = (x74 + (x53 << 0x1)); + { uint64_t x76 = (x75 + x53); + { uint64_t x77 = (x42 + (x52 << 0x4)); + { uint64_t x78 = (x77 + (x52 << 0x1)); + { uint64_t x79 = (x78 + x52); + { uint64_t x80 = (x41 + (x51 << 0x4)); + { uint64_t x81 = (x80 + (x51 << 0x1)); + { uint64_t x82 = (x81 + x51); + { uint64_t x83 = (x40 + (x50 << 0x4)); + { uint64_t x84 = (x83 + (x50 << 0x1)); + { uint64_t x85 = (x84 + x50); + { uint64_t x86 = (x85 >> 0x1a); + { uint32_t x87 = ((uint32_t)x85 & 0x3ffffff); + { uint64_t x88 = (x86 + x82); + { uint64_t x89 = (x88 >> 0x19); + { uint32_t x90 = ((uint32_t)x88 & 0x1ffffff); + { uint64_t x91 = (x89 + x79); + { uint64_t x92 = (x91 >> 0x1a); + { uint32_t x93 = ((uint32_t)x91 & 0x3ffffff); + { uint64_t x94 = (x92 + x76); + { uint64_t x95 = (x94 >> 0x19); + { uint32_t x96 = ((uint32_t)x94 & 0x1ffffff); + { uint64_t x97 = (x95 + x73); + { uint64_t x98 = (x97 >> 0x1a); + { uint32_t x99 = ((uint32_t)x97 & 0x3ffffff); + { uint64_t x100 = (x98 + x70); + { uint64_t x101 = (x100 >> 0x19); + { uint32_t x102 = ((uint32_t)x100 & 0x1ffffff); + { uint64_t x103 = (x101 + x67); + { uint64_t x104 = (x103 >> 0x1a); + { uint32_t x105 = ((uint32_t)x103 & 0x3ffffff); + { uint64_t x106 = (x104 + x64); + { uint64_t x107 = (x106 >> 0x19); + { uint32_t x108 = ((uint32_t)x106 & 0x1ffffff); + { uint64_t x109 = (x107 + x61); + { uint64_t x110 = (x109 >> 0x1a); + { uint32_t x111 = ((uint32_t)x109 & 0x3ffffff); + { uint64_t x112 = (x110 + x49); + { uint64_t x113 = (x112 >> 0x19); + { uint32_t x114 = ((uint32_t)x112 & 0x1ffffff); + { uint64_t x115 = (x87 + (0x13 * x113)); + { uint32_t x116 = (uint32_t) (x115 >> 0x1a); + { uint32_t x117 = ((uint32_t)x115 & 0x3ffffff); + { uint32_t x118 = (x116 + x90); + { uint32_t x119 = (x118 >> 0x19); + { uint32_t x120 = (x118 & 0x1ffffff); + out[0] = x117; + out[1] = x120; + out[2] = (x119 + x93); + out[3] = x96; + out[4] = x99; + out[5] = x102; + out[6] = x105; + out[7] = x108; + out[8] = x111; + out[9] = x114; + }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} + assert_fe(out); +} + +static void fe_mul121666(fe *h, const fe_loose *f) { + assert_fe_loose(f->v); + fe_mul_121666_impl(h->v, f->v); + assert_fe(h->v); +} + +#endif // !BORINGSSL_X25519_X86_64 + +static void fe_neg_impl(uint32_t out[10], const uint32_t in2[10]) { + { const uint32_t x20 = 0; + { const uint32_t x21 = 0; + { const uint32_t x19 = 0; + { const uint32_t x17 = 0; + { const uint32_t x15 = 0; + { const uint32_t x13 = 0; + { const uint32_t x11 = 0; + { const uint32_t x9 = 0; + { const uint32_t x7 = 0; + { const uint32_t x5 = 0; + { const uint32_t x38 = in2[9]; + { const uint32_t x39 = in2[8]; + { const uint32_t x37 = in2[7]; + { const uint32_t x35 = in2[6]; + { const uint32_t x33 = in2[5]; + { const uint32_t x31 = in2[4]; + { const uint32_t x29 = in2[3]; + { const uint32_t x27 = in2[2]; + { const uint32_t x25 = in2[1]; + { const uint32_t x23 = in2[0]; + out[0] = ((0x7ffffda + x5) - x23); + out[1] = ((0x3fffffe + x7) - x25); + out[2] = ((0x7fffffe + x9) - x27); + out[3] = ((0x3fffffe + x11) - x29); + out[4] = ((0x7fffffe + x13) - x31); + out[5] = ((0x3fffffe + x15) - x33); + out[6] = ((0x7fffffe + x17) - x35); + out[7] = ((0x3fffffe + x19) - x37); + out[8] = ((0x7fffffe + x21) - x39); + out[9] = ((0x3fffffe + x20) - x38); + }}}}}}}}}}}}}}}}}}}} +} + +// h = -f +static void fe_neg(fe_loose *h, const fe *f) { + assert_fe(f->v); + fe_neg_impl(h->v, f->v); + assert_fe_loose(h->v); +} + +// Replace (f,g) with (g,g) if b == 1; +// replace (f,g) with (f,g) if b == 0. +// +// Preconditions: b in {0,1}. +static void fe_cmov(fe_loose *f, const fe_loose *g, unsigned b) { + b = 0-b; + unsigned i; + for (i = 0; i < 10; i++) { + uint32_t x = f->v[i] ^ g->v[i]; + x &= b; + f->v[i] ^= x; + } +} + static void fe_loose_invert(fe *out, const fe_loose *z) { fe t0; fe t1; @@ -745,61 +952,6 @@ fe_loose_invert(out, &l); } -static void fe_neg_impl(uint32_t out[10], const uint32_t in2[10]) { - { const uint32_t x20 = 0; - { const uint32_t x21 = 0; - { const uint32_t x19 = 0; - { const uint32_t x17 = 0; - { const uint32_t x15 = 0; - { const uint32_t x13 = 0; - { const uint32_t x11 = 0; - { const uint32_t x9 = 0; - { const uint32_t x7 = 0; - { const uint32_t x5 = 0; - { const uint32_t x38 = in2[9]; - { const uint32_t x39 = in2[8]; - { const uint32_t x37 = in2[7]; - { const uint32_t x35 = in2[6]; - { const uint32_t x33 = in2[5]; - { const uint32_t x31 = in2[4]; - { const uint32_t x29 = in2[3]; - { const uint32_t x27 = in2[2]; - { const uint32_t x25 = in2[1]; - { const uint32_t x23 = in2[0]; - out[0] = ((0x7ffffda + x5) - x23); - out[1] = ((0x3fffffe + x7) - x25); - out[2] = ((0x7fffffe + x9) - x27); - out[3] = ((0x3fffffe + x11) - x29); - out[4] = ((0x7fffffe + x13) - x31); - out[5] = ((0x3fffffe + x15) - x33); - out[6] = ((0x7fffffe + x17) - x35); - out[7] = ((0x3fffffe + x19) - x37); - out[8] = ((0x7fffffe + x21) - x39); - out[9] = ((0x3fffffe + x20) - x38); - }}}}}}}}}}}}}}}}}}}} -} - -// h = -f -static void fe_neg(fe_loose *h, const fe *f) { - assert_fe(f->v); - fe_neg_impl(h->v, f->v); - assert_fe_loose(h->v); -} - -// Replace (f,g) with (g,g) if b == 1; -// replace (f,g) with (f,g) if b == 0. -// -// Preconditions: b in {0,1}. -static void fe_cmov(fe_loose *f, const fe_loose *g, unsigned b) { - b = 0-b; - unsigned i; - for (i = 0; i < 10; i++) { - uint32_t x = f->v[i] ^ g->v[i]; - x &= b; - f->v[i] ^= x; - } -} - // return 0 if f == 0 // return 1 if f != 0 static int fe_isnonzero(const fe_loose *f) { @@ -887,6 +1039,9 @@ fe_mul_ttt(out, &t0, z); } + +// Group operations. + void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) { fe recip; fe x; @@ -911,8 +1066,6 @@ s[31] ^= fe_isnegative(&x) << 7; } -#include "./curve25519_tables.h" - int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) { fe u; fe_loose v; @@ -2389,145 +2542,6 @@ #else -// Replace (f,g) with (g,f) if b == 1; -// replace (f,g) with (f,g) if b == 0. -// -// Preconditions: b in {0,1}. -static void fe_cswap(fe *f, fe *g, unsigned int b) { - b = 0-b; - unsigned i; - for (i = 0; i < 10; i++) { - uint32_t x = f->v[i] ^ g->v[i]; - x &= b; - f->v[i] ^= x; - g->v[i] ^= x; - } -} - -// NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.. -static void fe_mul_121666_impl(uint32_t out[10], const uint32_t in1[10]) { - assert_fe_loose(in1); - { const uint32_t x20 = in1[9]; - { const uint32_t x21 = in1[8]; - { const uint32_t x19 = in1[7]; - { const uint32_t x17 = in1[6]; - { const uint32_t x15 = in1[5]; - { const uint32_t x13 = in1[4]; - { const uint32_t x11 = in1[3]; - { const uint32_t x9 = in1[2]; - { const uint32_t x7 = in1[1]; - { const uint32_t x5 = in1[0]; - { const uint32_t x38 = 0; - { const uint32_t x39 = 0; - { const uint32_t x37 = 0; - { const uint32_t x35 = 0; - { const uint32_t x33 = 0; - { const uint32_t x31 = 0; - { const uint32_t x29 = 0; - { const uint32_t x27 = 0; - { const uint32_t x25 = 0; - { const uint32_t x23 = 121666; - { uint64_t x40 = ((uint64_t)x23 * x5); - { uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5)); - { uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5)); - { uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5)); - { uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5)); - { uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5)); - { uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5)); - { uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5)); - { uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5)); - { uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5)); - { uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9)); - { uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9)); - { uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13)); - { uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13)); - { uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17)); - { uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17)); - { uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19)))); - { uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21)); - { uint64_t x58 = ((uint64_t)(0x2 * x38) * x20); - { uint64_t x59 = (x48 + (x58 << 0x4)); - { uint64_t x60 = (x59 + (x58 << 0x1)); - { uint64_t x61 = (x60 + x58); - { uint64_t x62 = (x47 + (x57 << 0x4)); - { uint64_t x63 = (x62 + (x57 << 0x1)); - { uint64_t x64 = (x63 + x57); - { uint64_t x65 = (x46 + (x56 << 0x4)); - { uint64_t x66 = (x65 + (x56 << 0x1)); - { uint64_t x67 = (x66 + x56); - { uint64_t x68 = (x45 + (x55 << 0x4)); - { uint64_t x69 = (x68 + (x55 << 0x1)); - { uint64_t x70 = (x69 + x55); - { uint64_t x71 = (x44 + (x54 << 0x4)); - { uint64_t x72 = (x71 + (x54 << 0x1)); - { uint64_t x73 = (x72 + x54); - { uint64_t x74 = (x43 + (x53 << 0x4)); - { uint64_t x75 = (x74 + (x53 << 0x1)); - { uint64_t x76 = (x75 + x53); - { uint64_t x77 = (x42 + (x52 << 0x4)); - { uint64_t x78 = (x77 + (x52 << 0x1)); - { uint64_t x79 = (x78 + x52); - { uint64_t x80 = (x41 + (x51 << 0x4)); - { uint64_t x81 = (x80 + (x51 << 0x1)); - { uint64_t x82 = (x81 + x51); - { uint64_t x83 = (x40 + (x50 << 0x4)); - { uint64_t x84 = (x83 + (x50 << 0x1)); - { uint64_t x85 = (x84 + x50); - { uint64_t x86 = (x85 >> 0x1a); - { uint32_t x87 = ((uint32_t)x85 & 0x3ffffff); - { uint64_t x88 = (x86 + x82); - { uint64_t x89 = (x88 >> 0x19); - { uint32_t x90 = ((uint32_t)x88 & 0x1ffffff); - { uint64_t x91 = (x89 + x79); - { uint64_t x92 = (x91 >> 0x1a); - { uint32_t x93 = ((uint32_t)x91 & 0x3ffffff); - { uint64_t x94 = (x92 + x76); - { uint64_t x95 = (x94 >> 0x19); - { uint32_t x96 = ((uint32_t)x94 & 0x1ffffff); - { uint64_t x97 = (x95 + x73); - { uint64_t x98 = (x97 >> 0x1a); - { uint32_t x99 = ((uint32_t)x97 & 0x3ffffff); - { uint64_t x100 = (x98 + x70); - { uint64_t x101 = (x100 >> 0x19); - { uint32_t x102 = ((uint32_t)x100 & 0x1ffffff); - { uint64_t x103 = (x101 + x67); - { uint64_t x104 = (x103 >> 0x1a); - { uint32_t x105 = ((uint32_t)x103 & 0x3ffffff); - { uint64_t x106 = (x104 + x64); - { uint64_t x107 = (x106 >> 0x19); - { uint32_t x108 = ((uint32_t)x106 & 0x1ffffff); - { uint64_t x109 = (x107 + x61); - { uint64_t x110 = (x109 >> 0x1a); - { uint32_t x111 = ((uint32_t)x109 & 0x3ffffff); - { uint64_t x112 = (x110 + x49); - { uint64_t x113 = (x112 >> 0x19); - { uint32_t x114 = ((uint32_t)x112 & 0x1ffffff); - { uint64_t x115 = (x87 + (0x13 * x113)); - { uint32_t x116 = (uint32_t) (x115 >> 0x1a); - { uint32_t x117 = ((uint32_t)x115 & 0x3ffffff); - { uint32_t x118 = (x116 + x90); - { uint32_t x119 = (x118 >> 0x19); - { uint32_t x120 = (x118 & 0x1ffffff); - out[0] = x117; - out[1] = x120; - out[2] = (x119 + x93); - out[3] = x96; - out[4] = x99; - out[5] = x102; - out[6] = x105; - out[7] = x108; - out[8] = x111; - out[9] = x114; - }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} - assert_fe(out); -} - -static void fe_mul121666(fe *h, const fe_loose *f) { - assert_fe_loose(f->v); - fe_mul_121666_impl(h->v, f->v); - assert_fe(h->v); -} - static void x25519_scalar_mult_generic(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32]) {