David Benjamin | 1e3376a | 2016-06-08 14:17:18 -0400 | [diff] [blame] | 1 | /* Copyright (c) 2015, Google Inc. |
| 2 | * |
| 3 | * Permission to use, copy, modify, and/or distribute this software for any |
| 4 | * purpose with or without fee is hereby granted, provided that the above |
| 5 | * copyright notice and this permission notice appear in all copies. |
| 6 | * |
| 7 | * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| 8 | * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| 9 | * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
| 10 | * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| 11 | * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| 12 | * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| 13 | * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
| 14 | |
| 15 | /* This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP |
| 16 | * 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as |
| 17 | * public domain but this file has the ISC license just to keep licencing |
| 18 | * simple. |
| 19 | * |
| 20 | * The field functions are shared by Ed25519 and X25519 where possible. */ |
| 21 | |
Adam Langley | 77a173e | 2015-11-20 14:41:50 -0800 | [diff] [blame] | 22 | #include <openssl/curve25519.h> |
| 23 | |
| 24 | #include <string.h> |
| 25 | |
David Benjamin | 17cf2cb | 2016-12-13 01:07:13 -0500 | [diff] [blame] | 26 | #include "../internal.h" |
Adam Langley | 77a173e | 2015-11-20 14:41:50 -0800 | [diff] [blame] | 27 | #include "internal.h" |
| 28 | |
| 29 | |
| 30 | #if defined(BORINGSSL_X25519_X86_64) |
| 31 | |
| 32 | typedef struct { uint64_t v[5]; } fe25519; |
| 33 | |
| 34 | /* These functions are defined in asm/x25519-x86_64.S */ |
| 35 | void x25519_x86_64_work_cswap(fe25519 *, uint64_t); |
| 36 | void x25519_x86_64_mul(fe25519 *out, const fe25519 *a, const fe25519 *b); |
| 37 | void x25519_x86_64_square(fe25519 *out, const fe25519 *a); |
| 38 | void x25519_x86_64_freeze(fe25519 *); |
| 39 | void x25519_x86_64_ladderstep(fe25519 *work); |
| 40 | |
| 41 | static void fe25519_setint(fe25519 *r, unsigned v) { |
| 42 | r->v[0] = v; |
| 43 | r->v[1] = 0; |
| 44 | r->v[2] = 0; |
| 45 | r->v[3] = 0; |
| 46 | r->v[4] = 0; |
| 47 | } |
| 48 | |
| 49 | /* Assumes input x being reduced below 2^255 */ |
| 50 | static void fe25519_pack(unsigned char r[32], const fe25519 *x) { |
| 51 | fe25519 t; |
| 52 | t = *x; |
| 53 | x25519_x86_64_freeze(&t); |
| 54 | |
| 55 | r[0] = (uint8_t)(t.v[0] & 0xff); |
| 56 | r[1] = (uint8_t)((t.v[0] >> 8) & 0xff); |
| 57 | r[2] = (uint8_t)((t.v[0] >> 16) & 0xff); |
| 58 | r[3] = (uint8_t)((t.v[0] >> 24) & 0xff); |
| 59 | r[4] = (uint8_t)((t.v[0] >> 32) & 0xff); |
| 60 | r[5] = (uint8_t)((t.v[0] >> 40) & 0xff); |
| 61 | r[6] = (uint8_t)((t.v[0] >> 48)); |
| 62 | |
| 63 | r[6] ^= (uint8_t)((t.v[1] << 3) & 0xf8); |
| 64 | r[7] = (uint8_t)((t.v[1] >> 5) & 0xff); |
| 65 | r[8] = (uint8_t)((t.v[1] >> 13) & 0xff); |
| 66 | r[9] = (uint8_t)((t.v[1] >> 21) & 0xff); |
| 67 | r[10] = (uint8_t)((t.v[1] >> 29) & 0xff); |
| 68 | r[11] = (uint8_t)((t.v[1] >> 37) & 0xff); |
| 69 | r[12] = (uint8_t)((t.v[1] >> 45)); |
| 70 | |
| 71 | r[12] ^= (uint8_t)((t.v[2] << 6) & 0xc0); |
| 72 | r[13] = (uint8_t)((t.v[2] >> 2) & 0xff); |
| 73 | r[14] = (uint8_t)((t.v[2] >> 10) & 0xff); |
| 74 | r[15] = (uint8_t)((t.v[2] >> 18) & 0xff); |
| 75 | r[16] = (uint8_t)((t.v[2] >> 26) & 0xff); |
| 76 | r[17] = (uint8_t)((t.v[2] >> 34) & 0xff); |
| 77 | r[18] = (uint8_t)((t.v[2] >> 42) & 0xff); |
| 78 | r[19] = (uint8_t)((t.v[2] >> 50)); |
| 79 | |
| 80 | r[19] ^= (uint8_t)((t.v[3] << 1) & 0xfe); |
| 81 | r[20] = (uint8_t)((t.v[3] >> 7) & 0xff); |
| 82 | r[21] = (uint8_t)((t.v[3] >> 15) & 0xff); |
| 83 | r[22] = (uint8_t)((t.v[3] >> 23) & 0xff); |
| 84 | r[23] = (uint8_t)((t.v[3] >> 31) & 0xff); |
| 85 | r[24] = (uint8_t)((t.v[3] >> 39) & 0xff); |
| 86 | r[25] = (uint8_t)((t.v[3] >> 47)); |
| 87 | |
| 88 | r[25] ^= (uint8_t)((t.v[4] << 4) & 0xf0); |
| 89 | r[26] = (uint8_t)((t.v[4] >> 4) & 0xff); |
| 90 | r[27] = (uint8_t)((t.v[4] >> 12) & 0xff); |
| 91 | r[28] = (uint8_t)((t.v[4] >> 20) & 0xff); |
| 92 | r[29] = (uint8_t)((t.v[4] >> 28) & 0xff); |
| 93 | r[30] = (uint8_t)((t.v[4] >> 36) & 0xff); |
| 94 | r[31] = (uint8_t)((t.v[4] >> 44)); |
| 95 | } |
| 96 | |
| 97 | static void fe25519_unpack(fe25519 *r, const uint8_t x[32]) { |
| 98 | r->v[0] = x[0]; |
| 99 | r->v[0] += (uint64_t)x[1] << 8; |
| 100 | r->v[0] += (uint64_t)x[2] << 16; |
| 101 | r->v[0] += (uint64_t)x[3] << 24; |
| 102 | r->v[0] += (uint64_t)x[4] << 32; |
| 103 | r->v[0] += (uint64_t)x[5] << 40; |
| 104 | r->v[0] += ((uint64_t)x[6] & 7) << 48; |
| 105 | |
| 106 | r->v[1] = x[6] >> 3; |
| 107 | r->v[1] += (uint64_t)x[7] << 5; |
| 108 | r->v[1] += (uint64_t)x[8] << 13; |
| 109 | r->v[1] += (uint64_t)x[9] << 21; |
| 110 | r->v[1] += (uint64_t)x[10] << 29; |
| 111 | r->v[1] += (uint64_t)x[11] << 37; |
| 112 | r->v[1] += ((uint64_t)x[12] & 63) << 45; |
| 113 | |
| 114 | r->v[2] = x[12] >> 6; |
| 115 | r->v[2] += (uint64_t)x[13] << 2; |
| 116 | r->v[2] += (uint64_t)x[14] << 10; |
| 117 | r->v[2] += (uint64_t)x[15] << 18; |
| 118 | r->v[2] += (uint64_t)x[16] << 26; |
| 119 | r->v[2] += (uint64_t)x[17] << 34; |
| 120 | r->v[2] += (uint64_t)x[18] << 42; |
| 121 | r->v[2] += ((uint64_t)x[19] & 1) << 50; |
| 122 | |
| 123 | r->v[3] = x[19] >> 1; |
| 124 | r->v[3] += (uint64_t)x[20] << 7; |
| 125 | r->v[3] += (uint64_t)x[21] << 15; |
| 126 | r->v[3] += (uint64_t)x[22] << 23; |
| 127 | r->v[3] += (uint64_t)x[23] << 31; |
| 128 | r->v[3] += (uint64_t)x[24] << 39; |
| 129 | r->v[3] += ((uint64_t)x[25] & 15) << 47; |
| 130 | |
| 131 | r->v[4] = x[25] >> 4; |
| 132 | r->v[4] += (uint64_t)x[26] << 4; |
| 133 | r->v[4] += (uint64_t)x[27] << 12; |
| 134 | r->v[4] += (uint64_t)x[28] << 20; |
| 135 | r->v[4] += (uint64_t)x[29] << 28; |
| 136 | r->v[4] += (uint64_t)x[30] << 36; |
| 137 | r->v[4] += ((uint64_t)x[31] & 127) << 44; |
| 138 | } |
| 139 | |
| 140 | static void fe25519_invert(fe25519 *r, const fe25519 *x) { |
| 141 | fe25519 z2; |
| 142 | fe25519 z9; |
| 143 | fe25519 z11; |
| 144 | fe25519 z2_5_0; |
| 145 | fe25519 z2_10_0; |
| 146 | fe25519 z2_20_0; |
| 147 | fe25519 z2_50_0; |
| 148 | fe25519 z2_100_0; |
| 149 | fe25519 t; |
| 150 | int i; |
| 151 | |
| 152 | /* 2 */ x25519_x86_64_square(&z2, x); |
| 153 | /* 4 */ x25519_x86_64_square(&t, &z2); |
| 154 | /* 8 */ x25519_x86_64_square(&t, &t); |
| 155 | /* 9 */ x25519_x86_64_mul(&z9, &t, x); |
| 156 | /* 11 */ x25519_x86_64_mul(&z11, &z9, &z2); |
| 157 | /* 22 */ x25519_x86_64_square(&t, &z11); |
| 158 | /* 2^5 - 2^0 = 31 */ x25519_x86_64_mul(&z2_5_0, &t, &z9); |
| 159 | |
| 160 | /* 2^6 - 2^1 */ x25519_x86_64_square(&t, &z2_5_0); |
| 161 | /* 2^20 - 2^10 */ for (i = 1; i < 5; i++) { x25519_x86_64_square(&t, &t); } |
| 162 | /* 2^10 - 2^0 */ x25519_x86_64_mul(&z2_10_0, &t, &z2_5_0); |
| 163 | |
| 164 | /* 2^11 - 2^1 */ x25519_x86_64_square(&t, &z2_10_0); |
| 165 | /* 2^20 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } |
| 166 | /* 2^20 - 2^0 */ x25519_x86_64_mul(&z2_20_0, &t, &z2_10_0); |
| 167 | |
| 168 | /* 2^21 - 2^1 */ x25519_x86_64_square(&t, &z2_20_0); |
| 169 | /* 2^40 - 2^20 */ for (i = 1; i < 20; i++) { x25519_x86_64_square(&t, &t); } |
| 170 | /* 2^40 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_20_0); |
| 171 | |
| 172 | /* 2^41 - 2^1 */ x25519_x86_64_square(&t, &t); |
| 173 | /* 2^50 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } |
| 174 | /* 2^50 - 2^0 */ x25519_x86_64_mul(&z2_50_0, &t, &z2_10_0); |
| 175 | |
| 176 | /* 2^51 - 2^1 */ x25519_x86_64_square(&t, &z2_50_0); |
| 177 | /* 2^100 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } |
| 178 | /* 2^100 - 2^0 */ x25519_x86_64_mul(&z2_100_0, &t, &z2_50_0); |
| 179 | |
| 180 | /* 2^101 - 2^1 */ x25519_x86_64_square(&t, &z2_100_0); |
| 181 | /* 2^200 - 2^100 */ for (i = 1; i < 100; i++) { |
| 182 | x25519_x86_64_square(&t, &t); |
| 183 | } |
| 184 | /* 2^200 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_100_0); |
| 185 | |
| 186 | /* 2^201 - 2^1 */ x25519_x86_64_square(&t, &t); |
| 187 | /* 2^250 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } |
| 188 | /* 2^250 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_50_0); |
| 189 | |
| 190 | /* 2^251 - 2^1 */ x25519_x86_64_square(&t, &t); |
| 191 | /* 2^252 - 2^2 */ x25519_x86_64_square(&t, &t); |
| 192 | /* 2^253 - 2^3 */ x25519_x86_64_square(&t, &t); |
| 193 | |
| 194 | /* 2^254 - 2^4 */ x25519_x86_64_square(&t, &t); |
| 195 | |
| 196 | /* 2^255 - 2^5 */ x25519_x86_64_square(&t, &t); |
| 197 | /* 2^255 - 21 */ x25519_x86_64_mul(r, &t, &z11); |
| 198 | } |
| 199 | |
| 200 | static void mladder(fe25519 *xr, fe25519 *zr, const uint8_t s[32]) { |
| 201 | fe25519 work[5]; |
| 202 | |
| 203 | work[0] = *xr; |
| 204 | fe25519_setint(work + 1, 1); |
| 205 | fe25519_setint(work + 2, 0); |
| 206 | work[3] = *xr; |
| 207 | fe25519_setint(work + 4, 1); |
| 208 | |
| 209 | int i, j; |
| 210 | uint8_t prevbit = 0; |
| 211 | |
| 212 | j = 6; |
| 213 | for (i = 31; i >= 0; i--) { |
| 214 | while (j >= 0) { |
| 215 | const uint8_t bit = 1 & (s[i] >> j); |
| 216 | const uint64_t swap = bit ^ prevbit; |
| 217 | prevbit = bit; |
| 218 | x25519_x86_64_work_cswap(work + 1, swap); |
| 219 | x25519_x86_64_ladderstep(work); |
| 220 | j -= 1; |
| 221 | } |
| 222 | j = 7; |
| 223 | } |
| 224 | |
| 225 | *xr = work[1]; |
| 226 | *zr = work[2]; |
| 227 | } |
| 228 | |
| 229 | void x25519_x86_64(uint8_t out[32], const uint8_t scalar[32], |
| 230 | const uint8_t point[32]) { |
| 231 | uint8_t e[32]; |
David Benjamin | 17cf2cb | 2016-12-13 01:07:13 -0500 | [diff] [blame] | 232 | OPENSSL_memcpy(e, scalar, sizeof(e)); |
Adam Langley | 77a173e | 2015-11-20 14:41:50 -0800 | [diff] [blame] | 233 | |
| 234 | e[0] &= 248; |
| 235 | e[31] &= 127; |
| 236 | e[31] |= 64; |
| 237 | |
| 238 | fe25519 t; |
| 239 | fe25519 z; |
| 240 | fe25519_unpack(&t, point); |
| 241 | mladder(&t, &z, e); |
| 242 | fe25519_invert(&z, &z); |
| 243 | x25519_x86_64_mul(&t, &t, &z); |
| 244 | fe25519_pack(out, &t); |
| 245 | } |
| 246 | |
| 247 | #endif /* BORINGSSL_X25519_X86_64 */ |