Remove x86_64 x25519 assembly. Now that we have 64-bit C code, courtesy of fiat-crypto, the tradeoff for carrying the assembly changes: Assembly: Did 16000 Curve25519 base-point multiplication operations in 1059932us (15095.3 ops/sec) Did 16000 Curve25519 arbitrary point multiplication operations in 1060023us (15094.0 ops/sec) fiat64: Did 39000 Curve25519 base-point multiplication operations in 1004712us (38817.1 ops/sec) Did 14000 Curve25519 arbitrary point multiplication operations in 1006827us (13905.1 ops/sec) The assembly is still about 9% faster than fiat64, but fiat64 gets to use the Ed25519 tables for the base point multiplication, so overall it is actually faster to disable the assembly: >>> 1/(1/15094.0 + 1/15095.3) 7547.324986004976 >>> 1/(1/38817.1 + 1/13905.1) 10237.73016319501 (At the cost of touching a 30kB table.) The assembly implementation is no longer pulling its weight. Remove it and use the fiat code in all build configurations. Change-Id: Id736873177d5568bb16ea06994b9fcb1af104e33 Reviewed-on: https://boringssl-review.googlesource.com/25524 Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/curve25519/CMakeLists.txt b/crypto/curve25519/CMakeLists.txt index 6f51d54..4894fa8 100644 --- a/crypto/curve25519/CMakeLists.txt +++ b/crypto/curve25519/CMakeLists.txt
@@ -8,21 +8,12 @@ ) endif() -if (${ARCH} STREQUAL "x86_64") - set( - CURVE25519_ARCH_SOURCES - - asm/x25519-asm-x86_64.S - ) -endif() - add_library( curve25519 OBJECT spake25519.c - x25519-x86_64.c ${CURVE25519_ARCH_SOURCES} )
diff --git a/crypto/curve25519/asm/x25519-asm-x86_64.S b/crypto/curve25519/asm/x25519-asm-x86_64.S deleted file mode 100644 index 6cff53e..0000000 --- a/crypto/curve25519/asm/x25519-asm-x86_64.S +++ /dev/null
@@ -1,1894 +0,0 @@ -/* Copyright (c) 2015, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -/* This file is adapted from crypto_scalarmult/curve25519/amd64-51/ in - * SUPERCOP 20141124 (http://bench.cr.yp.to/supercop.html). That code is public - * domain licensed but the standard ISC license is included above to keep - * licensing simple. */ - -#if !defined(OPENSSL_NO_ASM) -#if defined(__x86_64__) - -.data -.p2align 4 - -#if defined(__APPLE__) -/* OS X's C ABI prefixes functions with underscore. */ -#define C_ABI(x) _ ## x -#define HIDDEN .private_extern -#else -#define C_ABI(x) x -#define HIDDEN .hidden -#endif - -x25519_x86_64_REDMASK51: .quad 0x0007FFFFFFFFFFFF -x25519_x86_64_121666_213: .quad 996687872 -x25519_x86_64_2P0: .quad 0xFFFFFFFFFFFDA -x25519_x86_64_2P1234: .quad 0xFFFFFFFFFFFFE -x25519_x86_64_4P0: .quad 0x1FFFFFFFFFFFB4 -x25519_x86_64_4P1234: .quad 0x1FFFFFFFFFFFFC -x25519_x86_64_MU0: .quad 0xED9CE5A30A2C131B -x25519_x86_64_MU1: .quad 0x2106215D086329A7 -x25519_x86_64_MU2: .quad 0xFFFFFFFFFFFFFFEB -x25519_x86_64_MU3: .quad 0xFFFFFFFFFFFFFFFF -x25519_x86_64_MU4: .quad 0x000000000000000F -x25519_x86_64_ORDER0: .quad 0x5812631A5CF5D3ED -x25519_x86_64_ORDER1: .quad 0x14DEF9DEA2F79CD6 -x25519_x86_64_ORDER2: .quad 0x0000000000000000 -x25519_x86_64_ORDER3: .quad 0x1000000000000000 -x25519_x86_64_EC2D0: .quad 1859910466990425 -x25519_x86_64_EC2D1: .quad 932731440258426 -x25519_x86_64_EC2D2: .quad 1072319116312658 -x25519_x86_64_EC2D3: .quad 1815898335770999 -x25519_x86_64_EC2D4: .quad 633789495995903 -x25519_x86_64__38: .quad 38 - -.text -.p2align 5 - -.globl C_ABI(x25519_x86_64_freeze) -HIDDEN C_ABI(x25519_x86_64_freeze) -C_ABI(x25519_x86_64_freeze): -.cfi_startproc -/* This is a leaf function and uses the redzone for saving registers. */ -movq %r12,-8(%rsp) -.cfi_rel_offset r12, -8 -movq 0(%rdi),%rsi -movq 8(%rdi),%rdx -movq 16(%rdi),%rcx -movq 24(%rdi),%r8 -movq 32(%rdi),%r9 -movq x25519_x86_64_REDMASK51(%rip),%rax -mov %rax,%r10 -sub $18,%r10 -mov $3,%r11 -._reduceloop: -mov %rsi,%r12 -shr $51,%r12 -and %rax,%rsi -add %r12,%rdx -mov %rdx,%r12 -shr $51,%r12 -and %rax,%rdx -add %r12,%rcx -mov %rcx,%r12 -shr $51,%r12 -and %rax,%rcx -add %r12,%r8 -mov %r8,%r12 -shr $51,%r12 -and %rax,%r8 -add %r12,%r9 -mov %r9,%r12 -shr $51,%r12 -and %rax,%r9 -imulq $19,%r12,%r12 -add %r12,%rsi -sub $1,%r11 -ja ._reduceloop -mov $1,%r12 -cmp %r10,%rsi -cmovl %r11,%r12 -cmp %rax,%rdx -cmovne %r11,%r12 -cmp %rax,%rcx -cmovne %r11,%r12 -cmp %rax,%r8 -cmovne %r11,%r12 -cmp %rax,%r9 -cmovne %r11,%r12 -neg %r12 -and %r12,%rax -and %r12,%r10 -sub %r10,%rsi -sub %rax,%rdx -sub %rax,%rcx -sub %rax,%r8 -sub %rax,%r9 -movq %rsi,0(%rdi) -movq %rdx,8(%rdi) -movq %rcx,16(%rdi) -movq %r8,24(%rdi) -movq %r9,32(%rdi) -movq -8(%rsp),%r12 -ret -.cfi_endproc - -.p2align 5 -.globl C_ABI(x25519_x86_64_mul) -HIDDEN C_ABI(x25519_x86_64_mul) -C_ABI(x25519_x86_64_mul): -.cfi_startproc -/* This is a leaf function and uses the redzone for saving registers. */ -movq %r12,-8(%rsp) -.cfi_rel_offset r12, -8 -movq %r13,-16(%rsp) -.cfi_rel_offset r13, -16 -movq %r14,-24(%rsp) -.cfi_rel_offset r14, -24 -movq %r15,-32(%rsp) -.cfi_rel_offset r15, -32 -movq %rbx,-40(%rsp) -.cfi_rel_offset rbx, -40 -movq %rbp,-48(%rsp) -.cfi_rel_offset rbp, -48 -mov %rdx,%rcx -movq 24(%rsi),%rdx -imulq $19,%rdx,%rax -movq %rax,-64(%rsp) -mulq 16(%rcx) -mov %rax,%r8 -mov %rdx,%r9 -movq 32(%rsi),%rdx -imulq $19,%rdx,%rax -movq %rax,-72(%rsp) -mulq 8(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsi),%rax -mulq 0(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsi),%rax -mulq 8(%rcx) -mov %rax,%r10 -mov %rdx,%r11 -movq 0(%rsi),%rax -mulq 16(%rcx) -mov %rax,%r12 -mov %rdx,%r13 -movq 0(%rsi),%rax -mulq 24(%rcx) -mov %rax,%r14 -mov %rdx,%r15 -movq 0(%rsi),%rax -mulq 32(%rcx) -mov %rax,%rbx -mov %rdx,%rbp -movq 8(%rsi),%rax -mulq 0(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsi),%rax -mulq 8(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 8(%rsi),%rax -mulq 16(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsi),%rax -mulq 24(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 8(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 16(%rsi),%rax -mulq 0(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 16(%rsi),%rax -mulq 8(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 16(%rsi),%rax -mulq 16(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq 16(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rcx) -add %rax,%r8 -adc %rdx,%r9 -movq 16(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq 24(%rsi),%rax -mulq 0(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq 24(%rsi),%rax -mulq 8(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq -64(%rsp),%rax -mulq 24(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq -64(%rsp),%rax -mulq 32(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq 32(%rsi),%rax -mulq 0(%rcx) -add %rax,%rbx -adc %rdx,%rbp -movq -72(%rsp),%rax -mulq 16(%rcx) -add %rax,%r10 -adc %rdx,%r11 -movq -72(%rsp),%rax -mulq 24(%rcx) -add %rax,%r12 -adc %rdx,%r13 -movq -72(%rsp),%rax -mulq 32(%rcx) -add %rax,%r14 -adc %rdx,%r15 -movq x25519_x86_64_REDMASK51(%rip),%rsi -shld $13,%r8,%r9 -and %rsi,%r8 -shld $13,%r10,%r11 -and %rsi,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rsi,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rsi,%r14 -add %r13,%r14 -shld $13,%rbx,%rbp -and %rsi,%rbx -add %r15,%rbx -imulq $19,%rbp,%rdx -add %rdx,%r8 -mov %r8,%rdx -shr $51,%rdx -add %r10,%rdx -mov %rdx,%rcx -shr $51,%rdx -and %rsi,%r8 -add %r12,%rdx -mov %rdx,%r9 -shr $51,%rdx -and %rsi,%rcx -add %r14,%rdx -mov %rdx,%rax -shr $51,%rdx -and %rsi,%r9 -add %rbx,%rdx -mov %rdx,%r10 -shr $51,%rdx -and %rsi,%rax -imulq $19,%rdx,%rdx -add %rdx,%r8 -and %rsi,%r10 -movq %r8,0(%rdi) -movq %rcx,8(%rdi) -movq %r9,16(%rdi) -movq %rax,24(%rdi) -movq %r10,32(%rdi) -movq -8(%rsp),%r12 -movq -16(%rsp),%r13 -movq -24(%rsp),%r14 -movq -32(%rsp),%r15 -movq -40(%rsp),%rbx -movq -48(%rsp),%rbp -ret -.cfi_endproc - -.p2align 5 -.globl C_ABI(x25519_x86_64_square) -HIDDEN C_ABI(x25519_x86_64_square) -C_ABI(x25519_x86_64_square): -.cfi_startproc -/* This is a leaf function and uses the redzone for saving registers. */ -movq %r12,-8(%rsp) -.cfi_rel_offset r12, -8 -movq %r13,-16(%rsp) -.cfi_rel_offset r13, -16 -movq %r14,-24(%rsp) -.cfi_rel_offset r14, -24 -movq %r15,-32(%rsp) -.cfi_rel_offset r15, -32 -movq %rbx,-40(%rsp) -.cfi_rel_offset rbx, -40 -movq 0(%rsi),%rax -mulq 0(%rsi) -mov %rax,%rcx -mov %rdx,%r8 -movq 0(%rsi),%rax -shl $1,%rax -mulq 8(%rsi) -mov %rax,%r9 -mov %rdx,%r10 -movq 0(%rsi),%rax -shl $1,%rax -mulq 16(%rsi) -mov %rax,%r11 -mov %rdx,%r12 -movq 0(%rsi),%rax -shl $1,%rax -mulq 24(%rsi) -mov %rax,%r13 -mov %rdx,%r14 -movq 0(%rsi),%rax -shl $1,%rax -mulq 32(%rsi) -mov %rax,%r15 -mov %rdx,%rbx -movq 8(%rsi),%rax -mulq 8(%rsi) -add %rax,%r11 -adc %rdx,%r12 -movq 8(%rsi),%rax -shl $1,%rax -mulq 16(%rsi) -add %rax,%r13 -adc %rdx,%r14 -movq 8(%rsi),%rax -shl $1,%rax -mulq 24(%rsi) -add %rax,%r15 -adc %rdx,%rbx -movq 8(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsi) -add %rax,%rcx -adc %rdx,%r8 -movq 16(%rsi),%rax -mulq 16(%rsi) -add %rax,%r15 -adc %rdx,%rbx -movq 16(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 24(%rsi) -add %rax,%rcx -adc %rdx,%r8 -movq 16(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsi) -add %rax,%r9 -adc %rdx,%r10 -movq 24(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rsi) -add %rax,%r9 -adc %rdx,%r10 -movq 24(%rsi),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsi) -add %rax,%r11 -adc %rdx,%r12 -movq 32(%rsi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsi) -add %rax,%r13 -adc %rdx,%r14 -movq x25519_x86_64_REDMASK51(%rip),%rsi -shld $13,%rcx,%r8 -and %rsi,%rcx -shld $13,%r9,%r10 -and %rsi,%r9 -add %r8,%r9 -shld $13,%r11,%r12 -and %rsi,%r11 -add %r10,%r11 -shld $13,%r13,%r14 -and %rsi,%r13 -add %r12,%r13 -shld $13,%r15,%rbx -and %rsi,%r15 -add %r14,%r15 -imulq $19,%rbx,%rdx -add %rdx,%rcx -mov %rcx,%rdx -shr $51,%rdx -add %r9,%rdx -and %rsi,%rcx -mov %rdx,%r8 -shr $51,%rdx -add %r11,%rdx -and %rsi,%r8 -mov %rdx,%r9 -shr $51,%rdx -add %r13,%rdx -and %rsi,%r9 -mov %rdx,%rax -shr $51,%rdx -add %r15,%rdx -and %rsi,%rax -mov %rdx,%r10 -shr $51,%rdx -imulq $19,%rdx,%rdx -add %rdx,%rcx -and %rsi,%r10 -movq %rcx,0(%rdi) -movq %r8,8(%rdi) -movq %r9,16(%rdi) -movq %rax,24(%rdi) -movq %r10,32(%rdi) -movq -8(%rsp),%r12 -movq -16(%rsp),%r13 -movq -24(%rsp),%r14 -movq -32(%rsp),%r15 -movq -40(%rsp),%rbx -ret -.cfi_endproc - -.p2align 5 -.globl C_ABI(x25519_x86_64_ladderstep) -HIDDEN C_ABI(x25519_x86_64_ladderstep) -C_ABI(x25519_x86_64_ladderstep): -.cfi_startproc -sub $344,%rsp -.cfi_adjust_cfa_offset 344 -movq %r12,296(%rsp) -.cfi_rel_offset r12, 296 -movq %r13,304(%rsp) -.cfi_rel_offset r13, 304 -movq %r14,312(%rsp) -.cfi_rel_offset r14, 312 -movq %r15,320(%rsp) -.cfi_rel_offset r15, 320 -movq %rbx,328(%rsp) -.cfi_rel_offset rbx, 328 -movq %rbp,336(%rsp) -.cfi_rel_offset rbp, 336 -movq 40(%rdi),%rsi -movq 48(%rdi),%rdx -movq 56(%rdi),%rcx -movq 64(%rdi),%r8 -movq 72(%rdi),%r9 -mov %rsi,%rax -mov %rdx,%r10 -mov %rcx,%r11 -mov %r8,%r12 -mov %r9,%r13 -add x25519_x86_64_2P0(%rip),%rax -add x25519_x86_64_2P1234(%rip),%r10 -add x25519_x86_64_2P1234(%rip),%r11 -add x25519_x86_64_2P1234(%rip),%r12 -add x25519_x86_64_2P1234(%rip),%r13 -addq 80(%rdi),%rsi -addq 88(%rdi),%rdx -addq 96(%rdi),%rcx -addq 104(%rdi),%r8 -addq 112(%rdi),%r9 -subq 80(%rdi),%rax -subq 88(%rdi),%r10 -subq 96(%rdi),%r11 -subq 104(%rdi),%r12 -subq 112(%rdi),%r13 -movq %rsi,0(%rsp) -movq %rdx,8(%rsp) -movq %rcx,16(%rsp) -movq %r8,24(%rsp) -movq %r9,32(%rsp) -movq %rax,40(%rsp) -movq %r10,48(%rsp) -movq %r11,56(%rsp) -movq %r12,64(%rsp) -movq %r13,72(%rsp) -movq 40(%rsp),%rax -mulq 40(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 40(%rsp),%rax -shl $1,%rax -mulq 48(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 40(%rsp),%rax -shl $1,%rax -mulq 56(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 40(%rsp),%rax -shl $1,%rax -mulq 64(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 40(%rsp),%rax -shl $1,%rax -mulq 72(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 48(%rsp),%rax -mulq 48(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 48(%rsp),%rax -shl $1,%rax -mulq 56(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 48(%rsp),%rax -shl $1,%rax -mulq 64(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 48(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 72(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 56(%rsp),%rax -mulq 56(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 56(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 64(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 56(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 72(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 64(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 64(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 64(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 72(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 72(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 72(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,80(%rsp) -movq %r8,88(%rsp) -movq %r9,96(%rsp) -movq %rax,104(%rsp) -movq %r10,112(%rsp) -movq 0(%rsp),%rax -mulq 0(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 0(%rsp),%rax -shl $1,%rax -mulq 8(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 0(%rsp),%rax -shl $1,%rax -mulq 16(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 0(%rsp),%rax -shl $1,%rax -mulq 24(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 0(%rsp),%rax -shl $1,%rax -mulq 32(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 8(%rsp),%rax -mulq 8(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -shl $1,%rax -mulq 16(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 8(%rsp),%rax -shl $1,%rax -mulq 24(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 16(%rsp),%rax -mulq 16(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 16(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 24(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 16(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 24(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 24(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 24(%rsp),%rdx -imulq $38,%rdx,%rax -mulq 32(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 32(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,120(%rsp) -movq %r8,128(%rsp) -movq %r9,136(%rsp) -movq %rax,144(%rsp) -movq %r10,152(%rsp) -mov %rsi,%rsi -mov %r8,%rdx -mov %r9,%rcx -mov %rax,%r8 -mov %r10,%r9 -add x25519_x86_64_2P0(%rip),%rsi -add x25519_x86_64_2P1234(%rip),%rdx -add x25519_x86_64_2P1234(%rip),%rcx -add x25519_x86_64_2P1234(%rip),%r8 -add x25519_x86_64_2P1234(%rip),%r9 -subq 80(%rsp),%rsi -subq 88(%rsp),%rdx -subq 96(%rsp),%rcx -subq 104(%rsp),%r8 -subq 112(%rsp),%r9 -movq %rsi,160(%rsp) -movq %rdx,168(%rsp) -movq %rcx,176(%rsp) -movq %r8,184(%rsp) -movq %r9,192(%rsp) -movq 120(%rdi),%rsi -movq 128(%rdi),%rdx -movq 136(%rdi),%rcx -movq 144(%rdi),%r8 -movq 152(%rdi),%r9 -mov %rsi,%rax -mov %rdx,%r10 -mov %rcx,%r11 -mov %r8,%r12 -mov %r9,%r13 -add x25519_x86_64_2P0(%rip),%rax -add x25519_x86_64_2P1234(%rip),%r10 -add x25519_x86_64_2P1234(%rip),%r11 -add x25519_x86_64_2P1234(%rip),%r12 -add x25519_x86_64_2P1234(%rip),%r13 -addq 160(%rdi),%rsi -addq 168(%rdi),%rdx -addq 176(%rdi),%rcx -addq 184(%rdi),%r8 -addq 192(%rdi),%r9 -subq 160(%rdi),%rax -subq 168(%rdi),%r10 -subq 176(%rdi),%r11 -subq 184(%rdi),%r12 -subq 192(%rdi),%r13 -movq %rsi,200(%rsp) -movq %rdx,208(%rsp) -movq %rcx,216(%rsp) -movq %r8,224(%rsp) -movq %r9,232(%rsp) -movq %rax,240(%rsp) -movq %r10,248(%rsp) -movq %r11,256(%rsp) -movq %r12,264(%rsp) -movq %r13,272(%rsp) -movq 224(%rsp),%rsi -imulq $19,%rsi,%rax -movq %rax,280(%rsp) -mulq 56(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 232(%rsp),%rdx -imulq $19,%rdx,%rax -movq %rax,288(%rsp) -mulq 48(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 200(%rsp),%rax -mulq 40(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 200(%rsp),%rax -mulq 48(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 200(%rsp),%rax -mulq 56(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 200(%rsp),%rax -mulq 64(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 200(%rsp),%rax -mulq 72(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 208(%rsp),%rax -mulq 40(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 208(%rsp),%rax -mulq 48(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 208(%rsp),%rax -mulq 56(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 208(%rsp),%rax -mulq 64(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 208(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 72(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 216(%rsp),%rax -mulq 40(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 216(%rsp),%rax -mulq 48(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 216(%rsp),%rax -mulq 56(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 216(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 64(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 216(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 72(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 224(%rsp),%rax -mulq 40(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 224(%rsp),%rax -mulq 48(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 280(%rsp),%rax -mulq 64(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 280(%rsp),%rax -mulq 72(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 232(%rsp),%rax -mulq 40(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 288(%rsp),%rax -mulq 56(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 288(%rsp),%rax -mulq 64(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 288(%rsp),%rax -mulq 72(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,40(%rsp) -movq %r8,48(%rsp) -movq %r9,56(%rsp) -movq %rax,64(%rsp) -movq %r10,72(%rsp) -movq 264(%rsp),%rsi -imulq $19,%rsi,%rax -movq %rax,200(%rsp) -mulq 16(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 272(%rsp),%rdx -imulq $19,%rdx,%rax -movq %rax,208(%rsp) -mulq 8(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 240(%rsp),%rax -mulq 0(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 240(%rsp),%rax -mulq 8(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 240(%rsp),%rax -mulq 16(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 240(%rsp),%rax -mulq 24(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 240(%rsp),%rax -mulq 32(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 248(%rsp),%rax -mulq 0(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 248(%rsp),%rax -mulq 8(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 248(%rsp),%rax -mulq 16(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 248(%rsp),%rax -mulq 24(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 248(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 256(%rsp),%rax -mulq 0(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 256(%rsp),%rax -mulq 8(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 256(%rsp),%rax -mulq 16(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 256(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 24(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 256(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 32(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 264(%rsp),%rax -mulq 0(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 264(%rsp),%rax -mulq 8(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 200(%rsp),%rax -mulq 24(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 200(%rsp),%rax -mulq 32(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 272(%rsp),%rax -mulq 0(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 208(%rsp),%rax -mulq 16(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 208(%rsp),%rax -mulq 24(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 208(%rsp),%rax -mulq 32(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -mov %rsi,%rdx -mov %r8,%rcx -mov %r9,%r11 -mov %rax,%r12 -mov %r10,%r13 -add x25519_x86_64_2P0(%rip),%rdx -add x25519_x86_64_2P1234(%rip),%rcx -add x25519_x86_64_2P1234(%rip),%r11 -add x25519_x86_64_2P1234(%rip),%r12 -add x25519_x86_64_2P1234(%rip),%r13 -addq 40(%rsp),%rsi -addq 48(%rsp),%r8 -addq 56(%rsp),%r9 -addq 64(%rsp),%rax -addq 72(%rsp),%r10 -subq 40(%rsp),%rdx -subq 48(%rsp),%rcx -subq 56(%rsp),%r11 -subq 64(%rsp),%r12 -subq 72(%rsp),%r13 -movq %rsi,120(%rdi) -movq %r8,128(%rdi) -movq %r9,136(%rdi) -movq %rax,144(%rdi) -movq %r10,152(%rdi) -movq %rdx,160(%rdi) -movq %rcx,168(%rdi) -movq %r11,176(%rdi) -movq %r12,184(%rdi) -movq %r13,192(%rdi) -movq 120(%rdi),%rax -mulq 120(%rdi) -mov %rax,%rsi -mov %rdx,%rcx -movq 120(%rdi),%rax -shl $1,%rax -mulq 128(%rdi) -mov %rax,%r8 -mov %rdx,%r9 -movq 120(%rdi),%rax -shl $1,%rax -mulq 136(%rdi) -mov %rax,%r10 -mov %rdx,%r11 -movq 120(%rdi),%rax -shl $1,%rax -mulq 144(%rdi) -mov %rax,%r12 -mov %rdx,%r13 -movq 120(%rdi),%rax -shl $1,%rax -mulq 152(%rdi) -mov %rax,%r14 -mov %rdx,%r15 -movq 128(%rdi),%rax -mulq 128(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 128(%rdi),%rax -shl $1,%rax -mulq 136(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 128(%rdi),%rax -shl $1,%rax -mulq 144(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 128(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 152(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rdi),%rax -mulq 136(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 136(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 144(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 152(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 144(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 144(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 144(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 152(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 152(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 152(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,120(%rdi) -movq %r8,128(%rdi) -movq %r9,136(%rdi) -movq %rax,144(%rdi) -movq %r10,152(%rdi) -movq 160(%rdi),%rax -mulq 160(%rdi) -mov %rax,%rsi -mov %rdx,%rcx -movq 160(%rdi),%rax -shl $1,%rax -mulq 168(%rdi) -mov %rax,%r8 -mov %rdx,%r9 -movq 160(%rdi),%rax -shl $1,%rax -mulq 176(%rdi) -mov %rax,%r10 -mov %rdx,%r11 -movq 160(%rdi),%rax -shl $1,%rax -mulq 184(%rdi) -mov %rax,%r12 -mov %rdx,%r13 -movq 160(%rdi),%rax -shl $1,%rax -mulq 192(%rdi) -mov %rax,%r14 -mov %rdx,%r15 -movq 168(%rdi),%rax -mulq 168(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 168(%rdi),%rax -shl $1,%rax -mulq 176(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 168(%rdi),%rax -shl $1,%rax -mulq 184(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 168(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 192(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rax -mulq 176(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 176(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 184(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 192(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 184(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 184(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 184(%rdi),%rdx -imulq $38,%rdx,%rax -mulq 192(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 192(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 192(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -and %rdx,%rsi -mov %rcx,%r8 -shr $51,%rcx -add %r10,%rcx -and %rdx,%r8 -mov %rcx,%r9 -shr $51,%rcx -add %r12,%rcx -and %rdx,%r9 -mov %rcx,%rax -shr $51,%rcx -add %r14,%rcx -and %rdx,%rax -mov %rcx,%r10 -shr $51,%rcx -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,160(%rdi) -movq %r8,168(%rdi) -movq %r9,176(%rdi) -movq %rax,184(%rdi) -movq %r10,192(%rdi) -movq 184(%rdi),%rsi -imulq $19,%rsi,%rax -movq %rax,0(%rsp) -mulq 16(%rdi) -mov %rax,%rsi -mov %rdx,%rcx -movq 192(%rdi),%rdx -imulq $19,%rdx,%rax -movq %rax,8(%rsp) -mulq 8(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 160(%rdi),%rax -mulq 0(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 160(%rdi),%rax -mulq 8(%rdi) -mov %rax,%r8 -mov %rdx,%r9 -movq 160(%rdi),%rax -mulq 16(%rdi) -mov %rax,%r10 -mov %rdx,%r11 -movq 160(%rdi),%rax -mulq 24(%rdi) -mov %rax,%r12 -mov %rdx,%r13 -movq 160(%rdi),%rax -mulq 32(%rdi) -mov %rax,%r14 -mov %rdx,%r15 -movq 168(%rdi),%rax -mulq 0(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 168(%rdi),%rax -mulq 8(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 168(%rdi),%rax -mulq 16(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 168(%rdi),%rax -mulq 24(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 168(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rax -mulq 0(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 176(%rdi),%rax -mulq 8(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 176(%rdi),%rax -mulq 16(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 176(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 24(%rdi) -add %rax,%rsi -adc %rdx,%rcx -movq 176(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 32(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 184(%rdi),%rax -mulq 0(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq 184(%rdi),%rax -mulq 8(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 0(%rsp),%rax -mulq 24(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsp),%rax -mulq 32(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 192(%rdi),%rax -mulq 0(%rdi) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rax -mulq 16(%rdi) -add %rax,%r8 -adc %rdx,%r9 -movq 8(%rsp),%rax -mulq 24(%rdi) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -mulq 32(%rdi) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,160(%rdi) -movq %r8,168(%rdi) -movq %r9,176(%rdi) -movq %rax,184(%rdi) -movq %r10,192(%rdi) -movq 144(%rsp),%rsi -imulq $19,%rsi,%rax -movq %rax,0(%rsp) -mulq 96(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 152(%rsp),%rdx -imulq $19,%rdx,%rax -movq %rax,8(%rsp) -mulq 88(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 120(%rsp),%rax -mulq 80(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 120(%rsp),%rax -mulq 88(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 120(%rsp),%rax -mulq 96(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 120(%rsp),%rax -mulq 104(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 120(%rsp),%rax -mulq 112(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 128(%rsp),%rax -mulq 80(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 128(%rsp),%rax -mulq 88(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 128(%rsp),%rax -mulq 96(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 128(%rsp),%rax -mulq 104(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 128(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 112(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rsp),%rax -mulq 80(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 136(%rsp),%rax -mulq 88(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 136(%rsp),%rax -mulq 96(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 136(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 104(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 136(%rsp),%rdx -imulq $19,%rdx,%rax -mulq 112(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 144(%rsp),%rax -mulq 80(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 144(%rsp),%rax -mulq 88(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 0(%rsp),%rax -mulq 104(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsp),%rax -mulq 112(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 152(%rsp),%rax -mulq 80(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rax -mulq 96(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 8(%rsp),%rax -mulq 104(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -mulq 112(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,40(%rdi) -movq %r8,48(%rdi) -movq %r9,56(%rdi) -movq %rax,64(%rdi) -movq %r10,72(%rdi) -movq 160(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -mov %rax,%rsi -mov %rdx,%rcx -movq 168(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%rcx -mov %rdx,%r8 -movq 176(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%r8 -mov %rdx,%r9 -movq 184(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%r9 -mov %rdx,%r10 -movq 192(%rsp),%rax -mulq x25519_x86_64_121666_213(%rip) -shr $13,%rax -add %rax,%r10 -imulq $19,%rdx,%rdx -add %rdx,%rsi -addq 80(%rsp),%rsi -addq 88(%rsp),%rcx -addq 96(%rsp),%r8 -addq 104(%rsp),%r9 -addq 112(%rsp),%r10 -movq %rsi,80(%rdi) -movq %rcx,88(%rdi) -movq %r8,96(%rdi) -movq %r9,104(%rdi) -movq %r10,112(%rdi) -movq 104(%rdi),%rsi -imulq $19,%rsi,%rax -movq %rax,0(%rsp) -mulq 176(%rsp) -mov %rax,%rsi -mov %rdx,%rcx -movq 112(%rdi),%rdx -imulq $19,%rdx,%rax -movq %rax,8(%rsp) -mulq 168(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 80(%rdi),%rax -mulq 160(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 80(%rdi),%rax -mulq 168(%rsp) -mov %rax,%r8 -mov %rdx,%r9 -movq 80(%rdi),%rax -mulq 176(%rsp) -mov %rax,%r10 -mov %rdx,%r11 -movq 80(%rdi),%rax -mulq 184(%rsp) -mov %rax,%r12 -mov %rdx,%r13 -movq 80(%rdi),%rax -mulq 192(%rsp) -mov %rax,%r14 -mov %rdx,%r15 -movq 88(%rdi),%rax -mulq 160(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 88(%rdi),%rax -mulq 168(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 88(%rdi),%rax -mulq 176(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 88(%rdi),%rax -mulq 184(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 88(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 192(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 96(%rdi),%rax -mulq 160(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 96(%rdi),%rax -mulq 168(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 96(%rdi),%rax -mulq 176(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 96(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 184(%rsp) -add %rax,%rsi -adc %rdx,%rcx -movq 96(%rdi),%rdx -imulq $19,%rdx,%rax -mulq 192(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 104(%rdi),%rax -mulq 160(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq 104(%rdi),%rax -mulq 168(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 0(%rsp),%rax -mulq 184(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 0(%rsp),%rax -mulq 192(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 112(%rdi),%rax -mulq 160(%rsp) -add %rax,%r14 -adc %rdx,%r15 -movq 8(%rsp),%rax -mulq 176(%rsp) -add %rax,%r8 -adc %rdx,%r9 -movq 8(%rsp),%rax -mulq 184(%rsp) -add %rax,%r10 -adc %rdx,%r11 -movq 8(%rsp),%rax -mulq 192(%rsp) -add %rax,%r12 -adc %rdx,%r13 -movq x25519_x86_64_REDMASK51(%rip),%rdx -shld $13,%rsi,%rcx -and %rdx,%rsi -shld $13,%r8,%r9 -and %rdx,%r8 -add %rcx,%r8 -shld $13,%r10,%r11 -and %rdx,%r10 -add %r9,%r10 -shld $13,%r12,%r13 -and %rdx,%r12 -add %r11,%r12 -shld $13,%r14,%r15 -and %rdx,%r14 -add %r13,%r14 -imulq $19,%r15,%rcx -add %rcx,%rsi -mov %rsi,%rcx -shr $51,%rcx -add %r8,%rcx -mov %rcx,%r8 -shr $51,%rcx -and %rdx,%rsi -add %r10,%rcx -mov %rcx,%r9 -shr $51,%rcx -and %rdx,%r8 -add %r12,%rcx -mov %rcx,%rax -shr $51,%rcx -and %rdx,%r9 -add %r14,%rcx -mov %rcx,%r10 -shr $51,%rcx -and %rdx,%rax -imulq $19,%rcx,%rcx -add %rcx,%rsi -and %rdx,%r10 -movq %rsi,80(%rdi) -movq %r8,88(%rdi) -movq %r9,96(%rdi) -movq %rax,104(%rdi) -movq %r10,112(%rdi) -movq 296(%rsp),%r12 -movq 304(%rsp),%r13 -movq 312(%rsp),%r14 -movq 320(%rsp),%r15 -movq 328(%rsp),%rbx -movq 336(%rsp),%rbp -add $344,%rsp -.cfi_adjust_cfa_offset -344 -ret -.cfi_endproc - -.p2align 5 -.globl C_ABI(x25519_x86_64_work_cswap) -HIDDEN C_ABI(x25519_x86_64_work_cswap) -C_ABI(x25519_x86_64_work_cswap): -.cfi_startproc -subq $1,%rsi -notq %rsi -movq %rsi,%xmm15 -pshufd $0x44,%xmm15,%xmm15 -movdqu 0(%rdi),%xmm0 -movdqu 16(%rdi),%xmm2 -movdqu 32(%rdi),%xmm4 -movdqu 48(%rdi),%xmm6 -movdqu 64(%rdi),%xmm8 -movdqu 80(%rdi),%xmm1 -movdqu 96(%rdi),%xmm3 -movdqu 112(%rdi),%xmm5 -movdqu 128(%rdi),%xmm7 -movdqu 144(%rdi),%xmm9 -movdqa %xmm1,%xmm10 -movdqa %xmm3,%xmm11 -movdqa %xmm5,%xmm12 -movdqa %xmm7,%xmm13 -movdqa %xmm9,%xmm14 -pxor %xmm0,%xmm10 -pxor %xmm2,%xmm11 -pxor %xmm4,%xmm12 -pxor %xmm6,%xmm13 -pxor %xmm8,%xmm14 -pand %xmm15,%xmm10 -pand %xmm15,%xmm11 -pand %xmm15,%xmm12 -pand %xmm15,%xmm13 -pand %xmm15,%xmm14 -pxor %xmm10,%xmm0 -pxor %xmm10,%xmm1 -pxor %xmm11,%xmm2 -pxor %xmm11,%xmm3 -pxor %xmm12,%xmm4 -pxor %xmm12,%xmm5 -pxor %xmm13,%xmm6 -pxor %xmm13,%xmm7 -pxor %xmm14,%xmm8 -pxor %xmm14,%xmm9 -movdqu %xmm0,0(%rdi) -movdqu %xmm2,16(%rdi) -movdqu %xmm4,32(%rdi) -movdqu %xmm6,48(%rdi) -movdqu %xmm8,64(%rdi) -movdqu %xmm1,80(%rdi) -movdqu %xmm3,96(%rdi) -movdqu %xmm5,112(%rdi) -movdqu %xmm7,128(%rdi) -movdqu %xmm9,144(%rdi) -ret -.cfi_endproc - -#endif /* __x86_64__ */ -#endif /* !OPENSSL_NO_ASM */
diff --git a/crypto/curve25519/x25519-x86_64.c b/crypto/curve25519/x25519-x86_64.c deleted file mode 100644 index 41db0bd..0000000 --- a/crypto/curve25519/x25519-x86_64.c +++ /dev/null
@@ -1,247 +0,0 @@ -/* Copyright (c) 2015, Google Inc. - * - * Permission to use, copy, modify, and/or distribute this software for any - * purpose with or without fee is hereby granted, provided that the above - * copyright notice and this permission notice appear in all copies. - * - * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES - * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF - * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY - * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES - * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION - * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN - * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ - -// This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP -// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as -// public domain but this file has the ISC license just to keep licencing -// simple. -// -// The field functions are shared by Ed25519 and X25519 where possible. - -#include <openssl/curve25519.h> - -#include <string.h> - -#include "../internal.h" -#include "../../third_party/fiat/internal.h" - - -#if defined(BORINGSSL_X25519_X86_64) - -typedef struct { uint64_t v[5]; } fe25519; - -// These functions are defined in asm/x25519-x86_64.S -void x25519_x86_64_work_cswap(fe25519 *, uint64_t); -void x25519_x86_64_mul(fe25519 *out, const fe25519 *a, const fe25519 *b); -void x25519_x86_64_square(fe25519 *out, const fe25519 *a); -void x25519_x86_64_freeze(fe25519 *); -void x25519_x86_64_ladderstep(fe25519 *work); - -static void fe25519_setint(fe25519 *r, unsigned v) { - r->v[0] = v; - r->v[1] = 0; - r->v[2] = 0; - r->v[3] = 0; - r->v[4] = 0; -} - -// Assumes input x being reduced below 2^255 -static void fe25519_pack(unsigned char r[32], const fe25519 *x) { - fe25519 t; - t = *x; - x25519_x86_64_freeze(&t); - - r[0] = (uint8_t)(t.v[0] & 0xff); - r[1] = (uint8_t)((t.v[0] >> 8) & 0xff); - r[2] = (uint8_t)((t.v[0] >> 16) & 0xff); - r[3] = (uint8_t)((t.v[0] >> 24) & 0xff); - r[4] = (uint8_t)((t.v[0] >> 32) & 0xff); - r[5] = (uint8_t)((t.v[0] >> 40) & 0xff); - r[6] = (uint8_t)((t.v[0] >> 48)); - - r[6] ^= (uint8_t)((t.v[1] << 3) & 0xf8); - r[7] = (uint8_t)((t.v[1] >> 5) & 0xff); - r[8] = (uint8_t)((t.v[1] >> 13) & 0xff); - r[9] = (uint8_t)((t.v[1] >> 21) & 0xff); - r[10] = (uint8_t)((t.v[1] >> 29) & 0xff); - r[11] = (uint8_t)((t.v[1] >> 37) & 0xff); - r[12] = (uint8_t)((t.v[1] >> 45)); - - r[12] ^= (uint8_t)((t.v[2] << 6) & 0xc0); - r[13] = (uint8_t)((t.v[2] >> 2) & 0xff); - r[14] = (uint8_t)((t.v[2] >> 10) & 0xff); - r[15] = (uint8_t)((t.v[2] >> 18) & 0xff); - r[16] = (uint8_t)((t.v[2] >> 26) & 0xff); - r[17] = (uint8_t)((t.v[2] >> 34) & 0xff); - r[18] = (uint8_t)((t.v[2] >> 42) & 0xff); - r[19] = (uint8_t)((t.v[2] >> 50)); - - r[19] ^= (uint8_t)((t.v[3] << 1) & 0xfe); - r[20] = (uint8_t)((t.v[3] >> 7) & 0xff); - r[21] = (uint8_t)((t.v[3] >> 15) & 0xff); - r[22] = (uint8_t)((t.v[3] >> 23) & 0xff); - r[23] = (uint8_t)((t.v[3] >> 31) & 0xff); - r[24] = (uint8_t)((t.v[3] >> 39) & 0xff); - r[25] = (uint8_t)((t.v[3] >> 47)); - - r[25] ^= (uint8_t)((t.v[4] << 4) & 0xf0); - r[26] = (uint8_t)((t.v[4] >> 4) & 0xff); - r[27] = (uint8_t)((t.v[4] >> 12) & 0xff); - r[28] = (uint8_t)((t.v[4] >> 20) & 0xff); - r[29] = (uint8_t)((t.v[4] >> 28) & 0xff); - r[30] = (uint8_t)((t.v[4] >> 36) & 0xff); - r[31] = (uint8_t)((t.v[4] >> 44)); -} - -static void fe25519_unpack(fe25519 *r, const uint8_t x[32]) { - r->v[0] = x[0]; - r->v[0] += (uint64_t)x[1] << 8; - r->v[0] += (uint64_t)x[2] << 16; - r->v[0] += (uint64_t)x[3] << 24; - r->v[0] += (uint64_t)x[4] << 32; - r->v[0] += (uint64_t)x[5] << 40; - r->v[0] += ((uint64_t)x[6] & 7) << 48; - - r->v[1] = x[6] >> 3; - r->v[1] += (uint64_t)x[7] << 5; - r->v[1] += (uint64_t)x[8] << 13; - r->v[1] += (uint64_t)x[9] << 21; - r->v[1] += (uint64_t)x[10] << 29; - r->v[1] += (uint64_t)x[11] << 37; - r->v[1] += ((uint64_t)x[12] & 63) << 45; - - r->v[2] = x[12] >> 6; - r->v[2] += (uint64_t)x[13] << 2; - r->v[2] += (uint64_t)x[14] << 10; - r->v[2] += (uint64_t)x[15] << 18; - r->v[2] += (uint64_t)x[16] << 26; - r->v[2] += (uint64_t)x[17] << 34; - r->v[2] += (uint64_t)x[18] << 42; - r->v[2] += ((uint64_t)x[19] & 1) << 50; - - r->v[3] = x[19] >> 1; - r->v[3] += (uint64_t)x[20] << 7; - r->v[3] += (uint64_t)x[21] << 15; - r->v[3] += (uint64_t)x[22] << 23; - r->v[3] += (uint64_t)x[23] << 31; - r->v[3] += (uint64_t)x[24] << 39; - r->v[3] += ((uint64_t)x[25] & 15) << 47; - - r->v[4] = x[25] >> 4; - r->v[4] += (uint64_t)x[26] << 4; - r->v[4] += (uint64_t)x[27] << 12; - r->v[4] += (uint64_t)x[28] << 20; - r->v[4] += (uint64_t)x[29] << 28; - r->v[4] += (uint64_t)x[30] << 36; - r->v[4] += ((uint64_t)x[31] & 127) << 44; -} - -static void fe25519_invert(fe25519 *r, const fe25519 *x) { - fe25519 z2; - fe25519 z9; - fe25519 z11; - fe25519 z2_5_0; - fe25519 z2_10_0; - fe25519 z2_20_0; - fe25519 z2_50_0; - fe25519 z2_100_0; - fe25519 t; - int i; - - /* 2 */ x25519_x86_64_square(&z2, x); - /* 4 */ x25519_x86_64_square(&t, &z2); - /* 8 */ x25519_x86_64_square(&t, &t); - /* 9 */ x25519_x86_64_mul(&z9, &t, x); - /* 11 */ x25519_x86_64_mul(&z11, &z9, &z2); - /* 22 */ x25519_x86_64_square(&t, &z11); - /* 2^5 - 2^0 = 31 */ x25519_x86_64_mul(&z2_5_0, &t, &z9); - - /* 2^6 - 2^1 */ x25519_x86_64_square(&t, &z2_5_0); - /* 2^20 - 2^10 */ for (i = 1; i < 5; i++) { x25519_x86_64_square(&t, &t); } - /* 2^10 - 2^0 */ x25519_x86_64_mul(&z2_10_0, &t, &z2_5_0); - - /* 2^11 - 2^1 */ x25519_x86_64_square(&t, &z2_10_0); - /* 2^20 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } - /* 2^20 - 2^0 */ x25519_x86_64_mul(&z2_20_0, &t, &z2_10_0); - - /* 2^21 - 2^1 */ x25519_x86_64_square(&t, &z2_20_0); - /* 2^40 - 2^20 */ for (i = 1; i < 20; i++) { x25519_x86_64_square(&t, &t); } - /* 2^40 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_20_0); - - /* 2^41 - 2^1 */ x25519_x86_64_square(&t, &t); - /* 2^50 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } - /* 2^50 - 2^0 */ x25519_x86_64_mul(&z2_50_0, &t, &z2_10_0); - - /* 2^51 - 2^1 */ x25519_x86_64_square(&t, &z2_50_0); - /* 2^100 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } - /* 2^100 - 2^0 */ x25519_x86_64_mul(&z2_100_0, &t, &z2_50_0); - - /* 2^101 - 2^1 */ x25519_x86_64_square(&t, &z2_100_0); - /* 2^200 - 2^100 */ for (i = 1; i < 100; i++) { - x25519_x86_64_square(&t, &t); - } - /* 2^200 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_100_0); - - /* 2^201 - 2^1 */ x25519_x86_64_square(&t, &t); - /* 2^250 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } - /* 2^250 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_50_0); - - /* 2^251 - 2^1 */ x25519_x86_64_square(&t, &t); - /* 2^252 - 2^2 */ x25519_x86_64_square(&t, &t); - /* 2^253 - 2^3 */ x25519_x86_64_square(&t, &t); - - /* 2^254 - 2^4 */ x25519_x86_64_square(&t, &t); - - /* 2^255 - 2^5 */ x25519_x86_64_square(&t, &t); - /* 2^255 - 21 */ x25519_x86_64_mul(r, &t, &z11); -} - -static void mladder(fe25519 *xr, fe25519 *zr, const uint8_t s[32]) { - fe25519 work[5]; - - work[0] = *xr; - fe25519_setint(work + 1, 1); - fe25519_setint(work + 2, 0); - work[3] = *xr; - fe25519_setint(work + 4, 1); - - int i, j; - uint8_t prevbit = 0; - - j = 6; - for (i = 31; i >= 0; i--) { - while (j >= 0) { - const uint8_t bit = 1 & (s[i] >> j); - const uint64_t swap = bit ^ prevbit; - prevbit = bit; - x25519_x86_64_work_cswap(work + 1, swap); - x25519_x86_64_ladderstep(work); - j -= 1; - } - j = 7; - } - - *xr = work[1]; - *zr = work[2]; -} - -void x25519_x86_64(uint8_t out[32], const uint8_t scalar[32], - const uint8_t point[32]) { - uint8_t e[32]; - OPENSSL_memcpy(e, scalar, sizeof(e)); - - e[0] &= 248; - e[31] &= 127; - e[31] |= 64; - - fe25519 t; - fe25519 z; - fe25519_unpack(&t, point); - mladder(&t, &z, e); - fe25519_invert(&z, &z); - x25519_x86_64_mul(&t, &t, &z); - fe25519_pack(out, &t); -} - -#endif // BORINGSSL_X25519_X86_64
diff --git a/third_party/fiat/curve25519.c b/third_party/fiat/curve25519.c index dfa4a39..d5928af 100644 --- a/third_party/fiat/curve25519.c +++ b/third_party/fiat/curve25519.c
@@ -512,8 +512,6 @@ fe_sqr_impl(h->v, f->v); } -#if !defined(BORINGSSL_X25519_X86_64) - // Replace (f,g) with (g,f) if b == 1; // replace (f,g) with (f,g) if b == 0. // @@ -589,8 +587,6 @@ assert_fe(h->v); } -#endif // !BORINGSSL_X25519_X86_64 - // Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0. static void fe_neg_impl(uint64_t out[5], const uint64_t in2[5]) { { const uint64_t x10 = 0; @@ -1201,8 +1197,6 @@ fe_sqr_impl(h->v, f->v); } -#if !defined(BORINGSSL_X25519_X86_64) - // Replace (f,g) with (g,f) if b == 1; // replace (f,g) with (f,g) if b == 0. // @@ -1342,8 +1336,6 @@ assert_fe(h->v); } -#endif // !BORINGSSL_X25519_X86_64 - // Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0. static void fe_neg_impl(uint32_t out[10], const uint32_t in2[10]) { { const uint32_t x20 = 0; @@ -3063,15 +3055,6 @@ } -#if defined(BORINGSSL_X25519_X86_64) - -static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32], - const uint8_t point[32]) { - x25519_x86_64(out, scalar, point); -} - -#else - static void x25519_scalar_mult_generic(uint8_t out[32], const uint8_t scalar[32], const uint8_t point[32]) { @@ -3166,9 +3149,6 @@ x25519_scalar_mult_generic(out, scalar, point); } -#endif // BORINGSSL_X25519_X86_64 - - void X25519_keypair(uint8_t out_public_value[32], uint8_t out_private_key[32]) { RAND_bytes(out_private_key, 32); @@ -3200,20 +3180,6 @@ return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0; } -#if defined(BORINGSSL_X25519_X86_64) - -// When |BORINGSSL_X25519_X86_64| is set, base point multiplication is done with -// the Montgomery ladder because it's faster. Otherwise it's done using the -// Ed25519 tables. - -void X25519_public_from_private(uint8_t out_public_value[32], - const uint8_t private_key[32]) { - static const uint8_t kMongomeryBasePoint[32] = {9}; - x25519_scalar_mult(out_public_value, private_key, kMongomeryBasePoint); -} - -#else - void X25519_public_from_private(uint8_t out_public_value[32], const uint8_t private_key[32]) { #if defined(BORINGSSL_X25519_NEON) @@ -3243,5 +3209,3 @@ fe_mul_tlt(&zminusy_inv, &zplusy, &zminusy_inv); fe_tobytes(out_public_value, &zminusy_inv); } - -#endif // BORINGSSL_X25519_X86_64
diff --git a/third_party/fiat/internal.h b/third_party/fiat/internal.h index c5dcc04..be3e265 100644 --- a/third_party/fiat/internal.h +++ b/third_party/fiat/internal.h
@@ -32,15 +32,6 @@ #include "../../crypto/internal.h" -#if defined(OPENSSL_X86_64) && !defined(OPENSSL_SMALL) && \ - !defined(OPENSSL_WINDOWS) && !defined(OPENSSL_NO_ASM) -#define BORINGSSL_X25519_X86_64 - -void x25519_x86_64(uint8_t out[32], const uint8_t scalar[32], - const uint8_t point[32]); -#endif - - #if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_APPLE) #define BORINGSSL_X25519_NEON
diff --git a/util/generate_build_files.py b/util/generate_build_files.py index a4af666..f2b10de 100644 --- a/util/generate_build_files.py +++ b/util/generate_build_files.py
@@ -44,12 +44,6 @@ 'src/crypto/curve25519/asm/x25519-asm-arm.S', 'src/crypto/poly1305/poly1305_arm_asm.S', ], - ('linux', 'x86_64'): [ - 'src/crypto/curve25519/asm/x25519-asm-x86_64.S', - ], - ('mac', 'x86_64'): [ - 'src/crypto/curve25519/asm/x25519-asm-x86_64.S', - ], } PREFIX = None