Remove x86_64 x25519 assembly.

Now that we have 64-bit C code, courtesy of fiat-crypto, the tradeoff
for carrying the assembly changes:

Assembly:
Did 16000 Curve25519 base-point multiplication operations in 1059932us (15095.3 ops/sec)
Did 16000 Curve25519 arbitrary point multiplication operations in 1060023us (15094.0 ops/sec)

fiat64:
Did 39000 Curve25519 base-point multiplication operations in 1004712us (38817.1 ops/sec)
Did 14000 Curve25519 arbitrary point multiplication operations in 1006827us (13905.1 ops/sec)

The assembly is still about 9% faster than fiat64, but fiat64 gets to
use the Ed25519 tables for the base point multiplication, so overall it
is actually faster to disable the assembly:

>>> 1/(1/15094.0 + 1/15095.3)
7547.324986004976
>>> 1/(1/38817.1 + 1/13905.1)
10237.73016319501

(At the cost of touching a 30kB table.)

The assembly implementation is no longer pulling its weight. Remove it
and use the fiat code in all build configurations.

Change-Id: Id736873177d5568bb16ea06994b9fcb1af104e33
Reviewed-on: https://boringssl-review.googlesource.com/25524
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/curve25519/CMakeLists.txt b/crypto/curve25519/CMakeLists.txt
index 6f51d54..4894fa8 100644
--- a/crypto/curve25519/CMakeLists.txt
+++ b/crypto/curve25519/CMakeLists.txt
@@ -8,21 +8,12 @@
   )
 endif()
 
-if (${ARCH} STREQUAL "x86_64")
-  set(
-    CURVE25519_ARCH_SOURCES
-
-    asm/x25519-asm-x86_64.S
-  )
-endif()
-
 add_library(
   curve25519
 
   OBJECT
 
   spake25519.c
-  x25519-x86_64.c
 
   ${CURVE25519_ARCH_SOURCES}
 )
diff --git a/crypto/curve25519/asm/x25519-asm-x86_64.S b/crypto/curve25519/asm/x25519-asm-x86_64.S
deleted file mode 100644
index 6cff53e..0000000
--- a/crypto/curve25519/asm/x25519-asm-x86_64.S
+++ /dev/null
@@ -1,1894 +0,0 @@
-/* Copyright (c) 2015, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-/* This file is adapted from crypto_scalarmult/curve25519/amd64-51/ in
- * SUPERCOP 20141124 (http://bench.cr.yp.to/supercop.html). That code is public
- * domain licensed but the standard ISC license is included above to keep
- * licensing simple. */
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__x86_64__)
-
-.data
-.p2align 4
-
-#if defined(__APPLE__)
-/* OS X's C ABI prefixes functions with underscore. */
-#define C_ABI(x) _ ## x
-#define HIDDEN .private_extern
-#else
-#define C_ABI(x) x
-#define HIDDEN .hidden
-#endif
-
-x25519_x86_64_REDMASK51:   .quad 0x0007FFFFFFFFFFFF
-x25519_x86_64_121666_213:  .quad 996687872
-x25519_x86_64_2P0:         .quad 0xFFFFFFFFFFFDA
-x25519_x86_64_2P1234:      .quad 0xFFFFFFFFFFFFE
-x25519_x86_64_4P0:         .quad 0x1FFFFFFFFFFFB4
-x25519_x86_64_4P1234:      .quad 0x1FFFFFFFFFFFFC
-x25519_x86_64_MU0:         .quad 0xED9CE5A30A2C131B
-x25519_x86_64_MU1:         .quad 0x2106215D086329A7
-x25519_x86_64_MU2:         .quad 0xFFFFFFFFFFFFFFEB
-x25519_x86_64_MU3:         .quad 0xFFFFFFFFFFFFFFFF
-x25519_x86_64_MU4:         .quad 0x000000000000000F
-x25519_x86_64_ORDER0:      .quad 0x5812631A5CF5D3ED
-x25519_x86_64_ORDER1:      .quad 0x14DEF9DEA2F79CD6
-x25519_x86_64_ORDER2:      .quad 0x0000000000000000
-x25519_x86_64_ORDER3:      .quad 0x1000000000000000
-x25519_x86_64_EC2D0:       .quad 1859910466990425
-x25519_x86_64_EC2D1:       .quad 932731440258426
-x25519_x86_64_EC2D2:       .quad 1072319116312658
-x25519_x86_64_EC2D3:       .quad 1815898335770999
-x25519_x86_64_EC2D4:       .quad 633789495995903
-x25519_x86_64__38:         .quad 38
-
-.text
-.p2align 5
-
-.globl C_ABI(x25519_x86_64_freeze)
-HIDDEN C_ABI(x25519_x86_64_freeze)
-C_ABI(x25519_x86_64_freeze):
-.cfi_startproc
-/* This is a leaf function and uses the redzone for saving registers. */
-movq %r12,-8(%rsp)
-.cfi_rel_offset r12, -8
-movq   0(%rdi),%rsi
-movq   8(%rdi),%rdx
-movq   16(%rdi),%rcx
-movq   24(%rdi),%r8
-movq   32(%rdi),%r9
-movq x25519_x86_64_REDMASK51(%rip),%rax
-mov  %rax,%r10
-sub  $18,%r10
-mov  $3,%r11
-._reduceloop:
-mov  %rsi,%r12
-shr  $51,%r12
-and  %rax,%rsi
-add  %r12,%rdx
-mov  %rdx,%r12
-shr  $51,%r12
-and  %rax,%rdx
-add  %r12,%rcx
-mov  %rcx,%r12
-shr  $51,%r12
-and  %rax,%rcx
-add  %r12,%r8
-mov  %r8,%r12
-shr  $51,%r12
-and  %rax,%r8
-add  %r12,%r9
-mov  %r9,%r12
-shr  $51,%r12
-and  %rax,%r9
-imulq  $19,%r12,%r12
-add  %r12,%rsi
-sub  $1,%r11
-ja ._reduceloop
-mov  $1,%r12
-cmp  %r10,%rsi
-cmovl %r11,%r12
-cmp  %rax,%rdx
-cmovne %r11,%r12
-cmp  %rax,%rcx
-cmovne %r11,%r12
-cmp  %rax,%r8
-cmovne %r11,%r12
-cmp  %rax,%r9
-cmovne %r11,%r12
-neg  %r12
-and  %r12,%rax
-and  %r12,%r10
-sub  %r10,%rsi
-sub  %rax,%rdx
-sub  %rax,%rcx
-sub  %rax,%r8
-sub  %rax,%r9
-movq   %rsi,0(%rdi)
-movq   %rdx,8(%rdi)
-movq   %rcx,16(%rdi)
-movq   %r8,24(%rdi)
-movq   %r9,32(%rdi)
-movq -8(%rsp),%r12
-ret
-.cfi_endproc
-
-.p2align 5
-.globl C_ABI(x25519_x86_64_mul)
-HIDDEN C_ABI(x25519_x86_64_mul)
-C_ABI(x25519_x86_64_mul):
-.cfi_startproc
-/* This is a leaf function and uses the redzone for saving registers. */
-movq %r12,-8(%rsp)
-.cfi_rel_offset r12, -8
-movq %r13,-16(%rsp)
-.cfi_rel_offset r13, -16
-movq %r14,-24(%rsp)
-.cfi_rel_offset r14, -24
-movq %r15,-32(%rsp)
-.cfi_rel_offset r15, -32
-movq %rbx,-40(%rsp)
-.cfi_rel_offset rbx, -40
-movq %rbp,-48(%rsp)
-.cfi_rel_offset rbp, -48
-mov  %rdx,%rcx
-movq   24(%rsi),%rdx
-imulq  $19,%rdx,%rax
-movq %rax,-64(%rsp)
-mulq  16(%rcx)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq   32(%rsi),%rdx
-imulq  $19,%rdx,%rax
-movq %rax,-72(%rsp)
-mulq  8(%rcx)
-add  %rax,%r8
-adc %rdx,%r9
-movq   0(%rsi),%rax
-mulq  0(%rcx)
-add  %rax,%r8
-adc %rdx,%r9
-movq   0(%rsi),%rax
-mulq  8(%rcx)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq   0(%rsi),%rax
-mulq  16(%rcx)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq   0(%rsi),%rax
-mulq  24(%rcx)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq   0(%rsi),%rax
-mulq  32(%rcx)
-mov  %rax,%rbx
-mov  %rdx,%rbp
-movq   8(%rsi),%rax
-mulq  0(%rcx)
-add  %rax,%r10
-adc %rdx,%r11
-movq   8(%rsi),%rax
-mulq  8(%rcx)
-add  %rax,%r12
-adc %rdx,%r13
-movq   8(%rsi),%rax
-mulq  16(%rcx)
-add  %rax,%r14
-adc %rdx,%r15
-movq   8(%rsi),%rax
-mulq  24(%rcx)
-add  %rax,%rbx
-adc %rdx,%rbp
-movq   8(%rsi),%rdx
-imulq  $19,%rdx,%rax
-mulq  32(%rcx)
-add  %rax,%r8
-adc %rdx,%r9
-movq   16(%rsi),%rax
-mulq  0(%rcx)
-add  %rax,%r12
-adc %rdx,%r13
-movq   16(%rsi),%rax
-mulq  8(%rcx)
-add  %rax,%r14
-adc %rdx,%r15
-movq   16(%rsi),%rax
-mulq  16(%rcx)
-add  %rax,%rbx
-adc %rdx,%rbp
-movq   16(%rsi),%rdx
-imulq  $19,%rdx,%rax
-mulq  24(%rcx)
-add  %rax,%r8
-adc %rdx,%r9
-movq   16(%rsi),%rdx
-imulq  $19,%rdx,%rax
-mulq  32(%rcx)
-add  %rax,%r10
-adc %rdx,%r11
-movq   24(%rsi),%rax
-mulq  0(%rcx)
-add  %rax,%r14
-adc %rdx,%r15
-movq   24(%rsi),%rax
-mulq  8(%rcx)
-add  %rax,%rbx
-adc %rdx,%rbp
-movq -64(%rsp),%rax
-mulq  24(%rcx)
-add  %rax,%r10
-adc %rdx,%r11
-movq -64(%rsp),%rax
-mulq  32(%rcx)
-add  %rax,%r12
-adc %rdx,%r13
-movq   32(%rsi),%rax
-mulq  0(%rcx)
-add  %rax,%rbx
-adc %rdx,%rbp
-movq -72(%rsp),%rax
-mulq  16(%rcx)
-add  %rax,%r10
-adc %rdx,%r11
-movq -72(%rsp),%rax
-mulq  24(%rcx)
-add  %rax,%r12
-adc %rdx,%r13
-movq -72(%rsp),%rax
-mulq  32(%rcx)
-add  %rax,%r14
-adc %rdx,%r15
-movq x25519_x86_64_REDMASK51(%rip),%rsi
-shld $13,%r8,%r9
-and  %rsi,%r8
-shld $13,%r10,%r11
-and  %rsi,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rsi,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rsi,%r14
-add  %r13,%r14
-shld $13,%rbx,%rbp
-and  %rsi,%rbx
-add  %r15,%rbx
-imulq  $19,%rbp,%rdx
-add  %rdx,%r8
-mov  %r8,%rdx
-shr  $51,%rdx
-add  %r10,%rdx
-mov  %rdx,%rcx
-shr  $51,%rdx
-and  %rsi,%r8
-add  %r12,%rdx
-mov  %rdx,%r9
-shr  $51,%rdx
-and  %rsi,%rcx
-add  %r14,%rdx
-mov  %rdx,%rax
-shr  $51,%rdx
-and  %rsi,%r9
-add  %rbx,%rdx
-mov  %rdx,%r10
-shr  $51,%rdx
-and  %rsi,%rax
-imulq  $19,%rdx,%rdx
-add  %rdx,%r8
-and  %rsi,%r10
-movq   %r8,0(%rdi)
-movq   %rcx,8(%rdi)
-movq   %r9,16(%rdi)
-movq   %rax,24(%rdi)
-movq   %r10,32(%rdi)
-movq -8(%rsp),%r12
-movq -16(%rsp),%r13
-movq -24(%rsp),%r14
-movq -32(%rsp),%r15
-movq -40(%rsp),%rbx
-movq -48(%rsp),%rbp
-ret
-.cfi_endproc
-
-.p2align 5
-.globl C_ABI(x25519_x86_64_square)
-HIDDEN C_ABI(x25519_x86_64_square)
-C_ABI(x25519_x86_64_square):
-.cfi_startproc
-/* This is a leaf function and uses the redzone for saving registers. */
-movq %r12,-8(%rsp)
-.cfi_rel_offset r12, -8
-movq %r13,-16(%rsp)
-.cfi_rel_offset r13, -16
-movq %r14,-24(%rsp)
-.cfi_rel_offset r14, -24
-movq %r15,-32(%rsp)
-.cfi_rel_offset r15, -32
-movq %rbx,-40(%rsp)
-.cfi_rel_offset rbx, -40
-movq   0(%rsi),%rax
-mulq  0(%rsi)
-mov  %rax,%rcx
-mov  %rdx,%r8
-movq   0(%rsi),%rax
-shl  $1,%rax
-mulq  8(%rsi)
-mov  %rax,%r9
-mov  %rdx,%r10
-movq   0(%rsi),%rax
-shl  $1,%rax
-mulq  16(%rsi)
-mov  %rax,%r11
-mov  %rdx,%r12
-movq   0(%rsi),%rax
-shl  $1,%rax
-mulq  24(%rsi)
-mov  %rax,%r13
-mov  %rdx,%r14
-movq   0(%rsi),%rax
-shl  $1,%rax
-mulq  32(%rsi)
-mov  %rax,%r15
-mov  %rdx,%rbx
-movq   8(%rsi),%rax
-mulq  8(%rsi)
-add  %rax,%r11
-adc %rdx,%r12
-movq   8(%rsi),%rax
-shl  $1,%rax
-mulq  16(%rsi)
-add  %rax,%r13
-adc %rdx,%r14
-movq   8(%rsi),%rax
-shl  $1,%rax
-mulq  24(%rsi)
-add  %rax,%r15
-adc %rdx,%rbx
-movq   8(%rsi),%rdx
-imulq  $38,%rdx,%rax
-mulq  32(%rsi)
-add  %rax,%rcx
-adc %rdx,%r8
-movq   16(%rsi),%rax
-mulq  16(%rsi)
-add  %rax,%r15
-adc %rdx,%rbx
-movq   16(%rsi),%rdx
-imulq  $38,%rdx,%rax
-mulq  24(%rsi)
-add  %rax,%rcx
-adc %rdx,%r8
-movq   16(%rsi),%rdx
-imulq  $38,%rdx,%rax
-mulq  32(%rsi)
-add  %rax,%r9
-adc %rdx,%r10
-movq   24(%rsi),%rdx
-imulq  $19,%rdx,%rax
-mulq  24(%rsi)
-add  %rax,%r9
-adc %rdx,%r10
-movq   24(%rsi),%rdx
-imulq  $38,%rdx,%rax
-mulq  32(%rsi)
-add  %rax,%r11
-adc %rdx,%r12
-movq   32(%rsi),%rdx
-imulq  $19,%rdx,%rax
-mulq  32(%rsi)
-add  %rax,%r13
-adc %rdx,%r14
-movq x25519_x86_64_REDMASK51(%rip),%rsi
-shld $13,%rcx,%r8
-and  %rsi,%rcx
-shld $13,%r9,%r10
-and  %rsi,%r9
-add  %r8,%r9
-shld $13,%r11,%r12
-and  %rsi,%r11
-add  %r10,%r11
-shld $13,%r13,%r14
-and  %rsi,%r13
-add  %r12,%r13
-shld $13,%r15,%rbx
-and  %rsi,%r15
-add  %r14,%r15
-imulq  $19,%rbx,%rdx
-add  %rdx,%rcx
-mov  %rcx,%rdx
-shr  $51,%rdx
-add  %r9,%rdx
-and  %rsi,%rcx
-mov  %rdx,%r8
-shr  $51,%rdx
-add  %r11,%rdx
-and  %rsi,%r8
-mov  %rdx,%r9
-shr  $51,%rdx
-add  %r13,%rdx
-and  %rsi,%r9
-mov  %rdx,%rax
-shr  $51,%rdx
-add  %r15,%rdx
-and  %rsi,%rax
-mov  %rdx,%r10
-shr  $51,%rdx
-imulq  $19,%rdx,%rdx
-add  %rdx,%rcx
-and  %rsi,%r10
-movq   %rcx,0(%rdi)
-movq   %r8,8(%rdi)
-movq   %r9,16(%rdi)
-movq   %rax,24(%rdi)
-movq   %r10,32(%rdi)
-movq -8(%rsp),%r12
-movq -16(%rsp),%r13
-movq -24(%rsp),%r14
-movq -32(%rsp),%r15
-movq -40(%rsp),%rbx
-ret
-.cfi_endproc
-
-.p2align 5
-.globl C_ABI(x25519_x86_64_ladderstep)
-HIDDEN C_ABI(x25519_x86_64_ladderstep)
-C_ABI(x25519_x86_64_ladderstep):
-.cfi_startproc
-sub $344,%rsp
-.cfi_adjust_cfa_offset 344
-movq %r12,296(%rsp)
-.cfi_rel_offset r12, 296
-movq %r13,304(%rsp)
-.cfi_rel_offset r13, 304
-movq %r14,312(%rsp)
-.cfi_rel_offset r14, 312
-movq %r15,320(%rsp)
-.cfi_rel_offset r15, 320
-movq %rbx,328(%rsp)
-.cfi_rel_offset rbx, 328
-movq %rbp,336(%rsp)
-.cfi_rel_offset rbp, 336
-movq   40(%rdi),%rsi
-movq   48(%rdi),%rdx
-movq   56(%rdi),%rcx
-movq   64(%rdi),%r8
-movq   72(%rdi),%r9
-mov  %rsi,%rax
-mov  %rdx,%r10
-mov  %rcx,%r11
-mov  %r8,%r12
-mov  %r9,%r13
-add  x25519_x86_64_2P0(%rip),%rax
-add  x25519_x86_64_2P1234(%rip),%r10
-add  x25519_x86_64_2P1234(%rip),%r11
-add  x25519_x86_64_2P1234(%rip),%r12
-add  x25519_x86_64_2P1234(%rip),%r13
-addq 80(%rdi),%rsi
-addq 88(%rdi),%rdx
-addq 96(%rdi),%rcx
-addq 104(%rdi),%r8
-addq 112(%rdi),%r9
-subq 80(%rdi),%rax
-subq 88(%rdi),%r10
-subq 96(%rdi),%r11
-subq 104(%rdi),%r12
-subq 112(%rdi),%r13
-movq %rsi,0(%rsp)
-movq %rdx,8(%rsp)
-movq %rcx,16(%rsp)
-movq %r8,24(%rsp)
-movq %r9,32(%rsp)
-movq %rax,40(%rsp)
-movq %r10,48(%rsp)
-movq %r11,56(%rsp)
-movq %r12,64(%rsp)
-movq %r13,72(%rsp)
-movq 40(%rsp),%rax
-mulq  40(%rsp)
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq 40(%rsp),%rax
-shl  $1,%rax
-mulq  48(%rsp)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq 40(%rsp),%rax
-shl  $1,%rax
-mulq  56(%rsp)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq 40(%rsp),%rax
-shl  $1,%rax
-mulq  64(%rsp)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq 40(%rsp),%rax
-shl  $1,%rax
-mulq  72(%rsp)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq 48(%rsp),%rax
-mulq  48(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 48(%rsp),%rax
-shl  $1,%rax
-mulq  56(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 48(%rsp),%rax
-shl  $1,%rax
-mulq  64(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 48(%rsp),%rdx
-imulq  $38,%rdx,%rax
-mulq  72(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 56(%rsp),%rax
-mulq  56(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 56(%rsp),%rdx
-imulq  $38,%rdx,%rax
-mulq  64(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 56(%rsp),%rdx
-imulq  $38,%rdx,%rax
-mulq  72(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 64(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  64(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 64(%rsp),%rdx
-imulq  $38,%rdx,%rax
-mulq  72(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 72(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  72(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and  %rdx,%rsi
-shld $13,%r8,%r9
-and  %rdx,%r8
-add  %rcx,%r8
-shld $13,%r10,%r11
-and  %rdx,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rdx,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rdx,%r14
-add  %r13,%r14
-imulq  $19,%r15,%rcx
-add  %rcx,%rsi
-mov  %rsi,%rcx
-shr  $51,%rcx
-add  %r8,%rcx
-and  %rdx,%rsi
-mov  %rcx,%r8
-shr  $51,%rcx
-add  %r10,%rcx
-and  %rdx,%r8
-mov  %rcx,%r9
-shr  $51,%rcx
-add  %r12,%rcx
-and  %rdx,%r9
-mov  %rcx,%rax
-shr  $51,%rcx
-add  %r14,%rcx
-and  %rdx,%rax
-mov  %rcx,%r10
-shr  $51,%rcx
-imulq  $19,%rcx,%rcx
-add  %rcx,%rsi
-and  %rdx,%r10
-movq %rsi,80(%rsp)
-movq %r8,88(%rsp)
-movq %r9,96(%rsp)
-movq %rax,104(%rsp)
-movq %r10,112(%rsp)
-movq 0(%rsp),%rax
-mulq  0(%rsp)
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq 0(%rsp),%rax
-shl  $1,%rax
-mulq  8(%rsp)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq 0(%rsp),%rax
-shl  $1,%rax
-mulq  16(%rsp)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq 0(%rsp),%rax
-shl  $1,%rax
-mulq  24(%rsp)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq 0(%rsp),%rax
-shl  $1,%rax
-mulq  32(%rsp)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq 8(%rsp),%rax
-mulq  8(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 8(%rsp),%rax
-shl  $1,%rax
-mulq  16(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 8(%rsp),%rax
-shl  $1,%rax
-mulq  24(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 8(%rsp),%rdx
-imulq  $38,%rdx,%rax
-mulq  32(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 16(%rsp),%rax
-mulq  16(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 16(%rsp),%rdx
-imulq  $38,%rdx,%rax
-mulq  24(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 16(%rsp),%rdx
-imulq  $38,%rdx,%rax
-mulq  32(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 24(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  24(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 24(%rsp),%rdx
-imulq  $38,%rdx,%rax
-mulq  32(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 32(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  32(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and  %rdx,%rsi
-shld $13,%r8,%r9
-and  %rdx,%r8
-add  %rcx,%r8
-shld $13,%r10,%r11
-and  %rdx,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rdx,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rdx,%r14
-add  %r13,%r14
-imulq  $19,%r15,%rcx
-add  %rcx,%rsi
-mov  %rsi,%rcx
-shr  $51,%rcx
-add  %r8,%rcx
-and  %rdx,%rsi
-mov  %rcx,%r8
-shr  $51,%rcx
-add  %r10,%rcx
-and  %rdx,%r8
-mov  %rcx,%r9
-shr  $51,%rcx
-add  %r12,%rcx
-and  %rdx,%r9
-mov  %rcx,%rax
-shr  $51,%rcx
-add  %r14,%rcx
-and  %rdx,%rax
-mov  %rcx,%r10
-shr  $51,%rcx
-imulq  $19,%rcx,%rcx
-add  %rcx,%rsi
-and  %rdx,%r10
-movq %rsi,120(%rsp)
-movq %r8,128(%rsp)
-movq %r9,136(%rsp)
-movq %rax,144(%rsp)
-movq %r10,152(%rsp)
-mov  %rsi,%rsi
-mov  %r8,%rdx
-mov  %r9,%rcx
-mov  %rax,%r8
-mov  %r10,%r9
-add  x25519_x86_64_2P0(%rip),%rsi
-add  x25519_x86_64_2P1234(%rip),%rdx
-add  x25519_x86_64_2P1234(%rip),%rcx
-add  x25519_x86_64_2P1234(%rip),%r8
-add  x25519_x86_64_2P1234(%rip),%r9
-subq 80(%rsp),%rsi
-subq 88(%rsp),%rdx
-subq 96(%rsp),%rcx
-subq 104(%rsp),%r8
-subq 112(%rsp),%r9
-movq %rsi,160(%rsp)
-movq %rdx,168(%rsp)
-movq %rcx,176(%rsp)
-movq %r8,184(%rsp)
-movq %r9,192(%rsp)
-movq   120(%rdi),%rsi
-movq   128(%rdi),%rdx
-movq   136(%rdi),%rcx
-movq   144(%rdi),%r8
-movq   152(%rdi),%r9
-mov  %rsi,%rax
-mov  %rdx,%r10
-mov  %rcx,%r11
-mov  %r8,%r12
-mov  %r9,%r13
-add  x25519_x86_64_2P0(%rip),%rax
-add  x25519_x86_64_2P1234(%rip),%r10
-add  x25519_x86_64_2P1234(%rip),%r11
-add  x25519_x86_64_2P1234(%rip),%r12
-add  x25519_x86_64_2P1234(%rip),%r13
-addq 160(%rdi),%rsi
-addq 168(%rdi),%rdx
-addq 176(%rdi),%rcx
-addq 184(%rdi),%r8
-addq 192(%rdi),%r9
-subq 160(%rdi),%rax
-subq 168(%rdi),%r10
-subq 176(%rdi),%r11
-subq 184(%rdi),%r12
-subq 192(%rdi),%r13
-movq %rsi,200(%rsp)
-movq %rdx,208(%rsp)
-movq %rcx,216(%rsp)
-movq %r8,224(%rsp)
-movq %r9,232(%rsp)
-movq %rax,240(%rsp)
-movq %r10,248(%rsp)
-movq %r11,256(%rsp)
-movq %r12,264(%rsp)
-movq %r13,272(%rsp)
-movq 224(%rsp),%rsi
-imulq  $19,%rsi,%rax
-movq %rax,280(%rsp)
-mulq  56(%rsp)
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq 232(%rsp),%rdx
-imulq  $19,%rdx,%rax
-movq %rax,288(%rsp)
-mulq  48(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 200(%rsp),%rax
-mulq  40(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 200(%rsp),%rax
-mulq  48(%rsp)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq 200(%rsp),%rax
-mulq  56(%rsp)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq 200(%rsp),%rax
-mulq  64(%rsp)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq 200(%rsp),%rax
-mulq  72(%rsp)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq 208(%rsp),%rax
-mulq  40(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 208(%rsp),%rax
-mulq  48(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 208(%rsp),%rax
-mulq  56(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 208(%rsp),%rax
-mulq  64(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 208(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  72(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 216(%rsp),%rax
-mulq  40(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 216(%rsp),%rax
-mulq  48(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 216(%rsp),%rax
-mulq  56(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 216(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  64(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 216(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  72(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 224(%rsp),%rax
-mulq  40(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 224(%rsp),%rax
-mulq  48(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 280(%rsp),%rax
-mulq  64(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 280(%rsp),%rax
-mulq  72(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 232(%rsp),%rax
-mulq  40(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 288(%rsp),%rax
-mulq  56(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 288(%rsp),%rax
-mulq  64(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 288(%rsp),%rax
-mulq  72(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and  %rdx,%rsi
-shld $13,%r8,%r9
-and  %rdx,%r8
-add  %rcx,%r8
-shld $13,%r10,%r11
-and  %rdx,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rdx,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rdx,%r14
-add  %r13,%r14
-imulq  $19,%r15,%rcx
-add  %rcx,%rsi
-mov  %rsi,%rcx
-shr  $51,%rcx
-add  %r8,%rcx
-mov  %rcx,%r8
-shr  $51,%rcx
-and  %rdx,%rsi
-add  %r10,%rcx
-mov  %rcx,%r9
-shr  $51,%rcx
-and  %rdx,%r8
-add  %r12,%rcx
-mov  %rcx,%rax
-shr  $51,%rcx
-and  %rdx,%r9
-add  %r14,%rcx
-mov  %rcx,%r10
-shr  $51,%rcx
-and  %rdx,%rax
-imulq  $19,%rcx,%rcx
-add  %rcx,%rsi
-and  %rdx,%r10
-movq %rsi,40(%rsp)
-movq %r8,48(%rsp)
-movq %r9,56(%rsp)
-movq %rax,64(%rsp)
-movq %r10,72(%rsp)
-movq 264(%rsp),%rsi
-imulq  $19,%rsi,%rax
-movq %rax,200(%rsp)
-mulq  16(%rsp)
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq 272(%rsp),%rdx
-imulq  $19,%rdx,%rax
-movq %rax,208(%rsp)
-mulq  8(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 240(%rsp),%rax
-mulq  0(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 240(%rsp),%rax
-mulq  8(%rsp)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq 240(%rsp),%rax
-mulq  16(%rsp)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq 240(%rsp),%rax
-mulq  24(%rsp)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq 240(%rsp),%rax
-mulq  32(%rsp)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq 248(%rsp),%rax
-mulq  0(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 248(%rsp),%rax
-mulq  8(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 248(%rsp),%rax
-mulq  16(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 248(%rsp),%rax
-mulq  24(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 248(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  32(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 256(%rsp),%rax
-mulq  0(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 256(%rsp),%rax
-mulq  8(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 256(%rsp),%rax
-mulq  16(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 256(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  24(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 256(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  32(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 264(%rsp),%rax
-mulq  0(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 264(%rsp),%rax
-mulq  8(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 200(%rsp),%rax
-mulq  24(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 200(%rsp),%rax
-mulq  32(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 272(%rsp),%rax
-mulq  0(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 208(%rsp),%rax
-mulq  16(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 208(%rsp),%rax
-mulq  24(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 208(%rsp),%rax
-mulq  32(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and  %rdx,%rsi
-shld $13,%r8,%r9
-and  %rdx,%r8
-add  %rcx,%r8
-shld $13,%r10,%r11
-and  %rdx,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rdx,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rdx,%r14
-add  %r13,%r14
-imulq  $19,%r15,%rcx
-add  %rcx,%rsi
-mov  %rsi,%rcx
-shr  $51,%rcx
-add  %r8,%rcx
-mov  %rcx,%r8
-shr  $51,%rcx
-and  %rdx,%rsi
-add  %r10,%rcx
-mov  %rcx,%r9
-shr  $51,%rcx
-and  %rdx,%r8
-add  %r12,%rcx
-mov  %rcx,%rax
-shr  $51,%rcx
-and  %rdx,%r9
-add  %r14,%rcx
-mov  %rcx,%r10
-shr  $51,%rcx
-and  %rdx,%rax
-imulq  $19,%rcx,%rcx
-add  %rcx,%rsi
-and  %rdx,%r10
-mov  %rsi,%rdx
-mov  %r8,%rcx
-mov  %r9,%r11
-mov  %rax,%r12
-mov  %r10,%r13
-add  x25519_x86_64_2P0(%rip),%rdx
-add  x25519_x86_64_2P1234(%rip),%rcx
-add  x25519_x86_64_2P1234(%rip),%r11
-add  x25519_x86_64_2P1234(%rip),%r12
-add  x25519_x86_64_2P1234(%rip),%r13
-addq 40(%rsp),%rsi
-addq 48(%rsp),%r8
-addq 56(%rsp),%r9
-addq 64(%rsp),%rax
-addq 72(%rsp),%r10
-subq 40(%rsp),%rdx
-subq 48(%rsp),%rcx
-subq 56(%rsp),%r11
-subq 64(%rsp),%r12
-subq 72(%rsp),%r13
-movq   %rsi,120(%rdi)
-movq   %r8,128(%rdi)
-movq   %r9,136(%rdi)
-movq   %rax,144(%rdi)
-movq   %r10,152(%rdi)
-movq   %rdx,160(%rdi)
-movq   %rcx,168(%rdi)
-movq   %r11,176(%rdi)
-movq   %r12,184(%rdi)
-movq   %r13,192(%rdi)
-movq   120(%rdi),%rax
-mulq  120(%rdi)
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq   120(%rdi),%rax
-shl  $1,%rax
-mulq  128(%rdi)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq   120(%rdi),%rax
-shl  $1,%rax
-mulq  136(%rdi)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq   120(%rdi),%rax
-shl  $1,%rax
-mulq  144(%rdi)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq   120(%rdi),%rax
-shl  $1,%rax
-mulq  152(%rdi)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq   128(%rdi),%rax
-mulq  128(%rdi)
-add  %rax,%r10
-adc %rdx,%r11
-movq   128(%rdi),%rax
-shl  $1,%rax
-mulq  136(%rdi)
-add  %rax,%r12
-adc %rdx,%r13
-movq   128(%rdi),%rax
-shl  $1,%rax
-mulq  144(%rdi)
-add  %rax,%r14
-adc %rdx,%r15
-movq   128(%rdi),%rdx
-imulq  $38,%rdx,%rax
-mulq  152(%rdi)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   136(%rdi),%rax
-mulq  136(%rdi)
-add  %rax,%r14
-adc %rdx,%r15
-movq   136(%rdi),%rdx
-imulq  $38,%rdx,%rax
-mulq  144(%rdi)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   136(%rdi),%rdx
-imulq  $38,%rdx,%rax
-mulq  152(%rdi)
-add  %rax,%r8
-adc %rdx,%r9
-movq   144(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  144(%rdi)
-add  %rax,%r8
-adc %rdx,%r9
-movq   144(%rdi),%rdx
-imulq  $38,%rdx,%rax
-mulq  152(%rdi)
-add  %rax,%r10
-adc %rdx,%r11
-movq   152(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  152(%rdi)
-add  %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and  %rdx,%rsi
-shld $13,%r8,%r9
-and  %rdx,%r8
-add  %rcx,%r8
-shld $13,%r10,%r11
-and  %rdx,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rdx,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rdx,%r14
-add  %r13,%r14
-imulq  $19,%r15,%rcx
-add  %rcx,%rsi
-mov  %rsi,%rcx
-shr  $51,%rcx
-add  %r8,%rcx
-and  %rdx,%rsi
-mov  %rcx,%r8
-shr  $51,%rcx
-add  %r10,%rcx
-and  %rdx,%r8
-mov  %rcx,%r9
-shr  $51,%rcx
-add  %r12,%rcx
-and  %rdx,%r9
-mov  %rcx,%rax
-shr  $51,%rcx
-add  %r14,%rcx
-and  %rdx,%rax
-mov  %rcx,%r10
-shr  $51,%rcx
-imulq  $19,%rcx,%rcx
-add  %rcx,%rsi
-and  %rdx,%r10
-movq   %rsi,120(%rdi)
-movq   %r8,128(%rdi)
-movq   %r9,136(%rdi)
-movq   %rax,144(%rdi)
-movq   %r10,152(%rdi)
-movq   160(%rdi),%rax
-mulq  160(%rdi)
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq   160(%rdi),%rax
-shl  $1,%rax
-mulq  168(%rdi)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq   160(%rdi),%rax
-shl  $1,%rax
-mulq  176(%rdi)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq   160(%rdi),%rax
-shl  $1,%rax
-mulq  184(%rdi)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq   160(%rdi),%rax
-shl  $1,%rax
-mulq  192(%rdi)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq   168(%rdi),%rax
-mulq  168(%rdi)
-add  %rax,%r10
-adc %rdx,%r11
-movq   168(%rdi),%rax
-shl  $1,%rax
-mulq  176(%rdi)
-add  %rax,%r12
-adc %rdx,%r13
-movq   168(%rdi),%rax
-shl  $1,%rax
-mulq  184(%rdi)
-add  %rax,%r14
-adc %rdx,%r15
-movq   168(%rdi),%rdx
-imulq  $38,%rdx,%rax
-mulq  192(%rdi)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   176(%rdi),%rax
-mulq  176(%rdi)
-add  %rax,%r14
-adc %rdx,%r15
-movq   176(%rdi),%rdx
-imulq  $38,%rdx,%rax
-mulq  184(%rdi)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   176(%rdi),%rdx
-imulq  $38,%rdx,%rax
-mulq  192(%rdi)
-add  %rax,%r8
-adc %rdx,%r9
-movq   184(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  184(%rdi)
-add  %rax,%r8
-adc %rdx,%r9
-movq   184(%rdi),%rdx
-imulq  $38,%rdx,%rax
-mulq  192(%rdi)
-add  %rax,%r10
-adc %rdx,%r11
-movq   192(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  192(%rdi)
-add  %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and  %rdx,%rsi
-shld $13,%r8,%r9
-and  %rdx,%r8
-add  %rcx,%r8
-shld $13,%r10,%r11
-and  %rdx,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rdx,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rdx,%r14
-add  %r13,%r14
-imulq  $19,%r15,%rcx
-add  %rcx,%rsi
-mov  %rsi,%rcx
-shr  $51,%rcx
-add  %r8,%rcx
-and  %rdx,%rsi
-mov  %rcx,%r8
-shr  $51,%rcx
-add  %r10,%rcx
-and  %rdx,%r8
-mov  %rcx,%r9
-shr  $51,%rcx
-add  %r12,%rcx
-and  %rdx,%r9
-mov  %rcx,%rax
-shr  $51,%rcx
-add  %r14,%rcx
-and  %rdx,%rax
-mov  %rcx,%r10
-shr  $51,%rcx
-imulq  $19,%rcx,%rcx
-add  %rcx,%rsi
-and  %rdx,%r10
-movq   %rsi,160(%rdi)
-movq   %r8,168(%rdi)
-movq   %r9,176(%rdi)
-movq   %rax,184(%rdi)
-movq   %r10,192(%rdi)
-movq   184(%rdi),%rsi
-imulq  $19,%rsi,%rax
-movq %rax,0(%rsp)
-mulq  16(%rdi)
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq   192(%rdi),%rdx
-imulq  $19,%rdx,%rax
-movq %rax,8(%rsp)
-mulq  8(%rdi)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   160(%rdi),%rax
-mulq  0(%rdi)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   160(%rdi),%rax
-mulq  8(%rdi)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq   160(%rdi),%rax
-mulq  16(%rdi)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq   160(%rdi),%rax
-mulq  24(%rdi)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq   160(%rdi),%rax
-mulq  32(%rdi)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq   168(%rdi),%rax
-mulq  0(%rdi)
-add  %rax,%r8
-adc %rdx,%r9
-movq   168(%rdi),%rax
-mulq  8(%rdi)
-add  %rax,%r10
-adc %rdx,%r11
-movq   168(%rdi),%rax
-mulq  16(%rdi)
-add  %rax,%r12
-adc %rdx,%r13
-movq   168(%rdi),%rax
-mulq  24(%rdi)
-add  %rax,%r14
-adc %rdx,%r15
-movq   168(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  32(%rdi)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   176(%rdi),%rax
-mulq  0(%rdi)
-add  %rax,%r10
-adc %rdx,%r11
-movq   176(%rdi),%rax
-mulq  8(%rdi)
-add  %rax,%r12
-adc %rdx,%r13
-movq   176(%rdi),%rax
-mulq  16(%rdi)
-add  %rax,%r14
-adc %rdx,%r15
-movq   176(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  24(%rdi)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   176(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  32(%rdi)
-add  %rax,%r8
-adc %rdx,%r9
-movq   184(%rdi),%rax
-mulq  0(%rdi)
-add  %rax,%r12
-adc %rdx,%r13
-movq   184(%rdi),%rax
-mulq  8(%rdi)
-add  %rax,%r14
-adc %rdx,%r15
-movq 0(%rsp),%rax
-mulq  24(%rdi)
-add  %rax,%r8
-adc %rdx,%r9
-movq 0(%rsp),%rax
-mulq  32(%rdi)
-add  %rax,%r10
-adc %rdx,%r11
-movq   192(%rdi),%rax
-mulq  0(%rdi)
-add  %rax,%r14
-adc %rdx,%r15
-movq 8(%rsp),%rax
-mulq  16(%rdi)
-add  %rax,%r8
-adc %rdx,%r9
-movq 8(%rsp),%rax
-mulq  24(%rdi)
-add  %rax,%r10
-adc %rdx,%r11
-movq 8(%rsp),%rax
-mulq  32(%rdi)
-add  %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and  %rdx,%rsi
-shld $13,%r8,%r9
-and  %rdx,%r8
-add  %rcx,%r8
-shld $13,%r10,%r11
-and  %rdx,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rdx,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rdx,%r14
-add  %r13,%r14
-imulq  $19,%r15,%rcx
-add  %rcx,%rsi
-mov  %rsi,%rcx
-shr  $51,%rcx
-add  %r8,%rcx
-mov  %rcx,%r8
-shr  $51,%rcx
-and  %rdx,%rsi
-add  %r10,%rcx
-mov  %rcx,%r9
-shr  $51,%rcx
-and  %rdx,%r8
-add  %r12,%rcx
-mov  %rcx,%rax
-shr  $51,%rcx
-and  %rdx,%r9
-add  %r14,%rcx
-mov  %rcx,%r10
-shr  $51,%rcx
-and  %rdx,%rax
-imulq  $19,%rcx,%rcx
-add  %rcx,%rsi
-and  %rdx,%r10
-movq   %rsi,160(%rdi)
-movq   %r8,168(%rdi)
-movq   %r9,176(%rdi)
-movq   %rax,184(%rdi)
-movq   %r10,192(%rdi)
-movq 144(%rsp),%rsi
-imulq  $19,%rsi,%rax
-movq %rax,0(%rsp)
-mulq  96(%rsp)
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq 152(%rsp),%rdx
-imulq  $19,%rdx,%rax
-movq %rax,8(%rsp)
-mulq  88(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 120(%rsp),%rax
-mulq  80(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 120(%rsp),%rax
-mulq  88(%rsp)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq 120(%rsp),%rax
-mulq  96(%rsp)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq 120(%rsp),%rax
-mulq  104(%rsp)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq 120(%rsp),%rax
-mulq  112(%rsp)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq 128(%rsp),%rax
-mulq  80(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 128(%rsp),%rax
-mulq  88(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 128(%rsp),%rax
-mulq  96(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 128(%rsp),%rax
-mulq  104(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 128(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  112(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 136(%rsp),%rax
-mulq  80(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 136(%rsp),%rax
-mulq  88(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 136(%rsp),%rax
-mulq  96(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 136(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  104(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq 136(%rsp),%rdx
-imulq  $19,%rdx,%rax
-mulq  112(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 144(%rsp),%rax
-mulq  80(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq 144(%rsp),%rax
-mulq  88(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 0(%rsp),%rax
-mulq  104(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 0(%rsp),%rax
-mulq  112(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 152(%rsp),%rax
-mulq  80(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 8(%rsp),%rax
-mulq  96(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 8(%rsp),%rax
-mulq  104(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 8(%rsp),%rax
-mulq  112(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and  %rdx,%rsi
-shld $13,%r8,%r9
-and  %rdx,%r8
-add  %rcx,%r8
-shld $13,%r10,%r11
-and  %rdx,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rdx,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rdx,%r14
-add  %r13,%r14
-imulq  $19,%r15,%rcx
-add  %rcx,%rsi
-mov  %rsi,%rcx
-shr  $51,%rcx
-add  %r8,%rcx
-mov  %rcx,%r8
-shr  $51,%rcx
-and  %rdx,%rsi
-add  %r10,%rcx
-mov  %rcx,%r9
-shr  $51,%rcx
-and  %rdx,%r8
-add  %r12,%rcx
-mov  %rcx,%rax
-shr  $51,%rcx
-and  %rdx,%r9
-add  %r14,%rcx
-mov  %rcx,%r10
-shr  $51,%rcx
-and  %rdx,%rax
-imulq  $19,%rcx,%rcx
-add  %rcx,%rsi
-and  %rdx,%r10
-movq   %rsi,40(%rdi)
-movq   %r8,48(%rdi)
-movq   %r9,56(%rdi)
-movq   %rax,64(%rdi)
-movq   %r10,72(%rdi)
-movq 160(%rsp),%rax
-mulq  x25519_x86_64_121666_213(%rip)
-shr  $13,%rax
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq 168(%rsp),%rax
-mulq  x25519_x86_64_121666_213(%rip)
-shr  $13,%rax
-add  %rax,%rcx
-mov  %rdx,%r8
-movq 176(%rsp),%rax
-mulq  x25519_x86_64_121666_213(%rip)
-shr  $13,%rax
-add  %rax,%r8
-mov  %rdx,%r9
-movq 184(%rsp),%rax
-mulq  x25519_x86_64_121666_213(%rip)
-shr  $13,%rax
-add  %rax,%r9
-mov  %rdx,%r10
-movq 192(%rsp),%rax
-mulq  x25519_x86_64_121666_213(%rip)
-shr  $13,%rax
-add  %rax,%r10
-imulq  $19,%rdx,%rdx
-add  %rdx,%rsi
-addq 80(%rsp),%rsi
-addq 88(%rsp),%rcx
-addq 96(%rsp),%r8
-addq 104(%rsp),%r9
-addq 112(%rsp),%r10
-movq   %rsi,80(%rdi)
-movq   %rcx,88(%rdi)
-movq   %r8,96(%rdi)
-movq   %r9,104(%rdi)
-movq   %r10,112(%rdi)
-movq   104(%rdi),%rsi
-imulq  $19,%rsi,%rax
-movq %rax,0(%rsp)
-mulq  176(%rsp)
-mov  %rax,%rsi
-mov  %rdx,%rcx
-movq   112(%rdi),%rdx
-imulq  $19,%rdx,%rax
-movq %rax,8(%rsp)
-mulq  168(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   80(%rdi),%rax
-mulq  160(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   80(%rdi),%rax
-mulq  168(%rsp)
-mov  %rax,%r8
-mov  %rdx,%r9
-movq   80(%rdi),%rax
-mulq  176(%rsp)
-mov  %rax,%r10
-mov  %rdx,%r11
-movq   80(%rdi),%rax
-mulq  184(%rsp)
-mov  %rax,%r12
-mov  %rdx,%r13
-movq   80(%rdi),%rax
-mulq  192(%rsp)
-mov  %rax,%r14
-mov  %rdx,%r15
-movq   88(%rdi),%rax
-mulq  160(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq   88(%rdi),%rax
-mulq  168(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq   88(%rdi),%rax
-mulq  176(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq   88(%rdi),%rax
-mulq  184(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq   88(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  192(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   96(%rdi),%rax
-mulq  160(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq   96(%rdi),%rax
-mulq  168(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq   96(%rdi),%rax
-mulq  176(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq   96(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  184(%rsp)
-add  %rax,%rsi
-adc %rdx,%rcx
-movq   96(%rdi),%rdx
-imulq  $19,%rdx,%rax
-mulq  192(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq   104(%rdi),%rax
-mulq  160(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq   104(%rdi),%rax
-mulq  168(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 0(%rsp),%rax
-mulq  184(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 0(%rsp),%rax
-mulq  192(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq   112(%rdi),%rax
-mulq  160(%rsp)
-add  %rax,%r14
-adc %rdx,%r15
-movq 8(%rsp),%rax
-mulq  176(%rsp)
-add  %rax,%r8
-adc %rdx,%r9
-movq 8(%rsp),%rax
-mulq  184(%rsp)
-add  %rax,%r10
-adc %rdx,%r11
-movq 8(%rsp),%rax
-mulq  192(%rsp)
-add  %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and  %rdx,%rsi
-shld $13,%r8,%r9
-and  %rdx,%r8
-add  %rcx,%r8
-shld $13,%r10,%r11
-and  %rdx,%r10
-add  %r9,%r10
-shld $13,%r12,%r13
-and  %rdx,%r12
-add  %r11,%r12
-shld $13,%r14,%r15
-and  %rdx,%r14
-add  %r13,%r14
-imulq  $19,%r15,%rcx
-add  %rcx,%rsi
-mov  %rsi,%rcx
-shr  $51,%rcx
-add  %r8,%rcx
-mov  %rcx,%r8
-shr  $51,%rcx
-and  %rdx,%rsi
-add  %r10,%rcx
-mov  %rcx,%r9
-shr  $51,%rcx
-and  %rdx,%r8
-add  %r12,%rcx
-mov  %rcx,%rax
-shr  $51,%rcx
-and  %rdx,%r9
-add  %r14,%rcx
-mov  %rcx,%r10
-shr  $51,%rcx
-and  %rdx,%rax
-imulq  $19,%rcx,%rcx
-add  %rcx,%rsi
-and  %rdx,%r10
-movq   %rsi,80(%rdi)
-movq   %r8,88(%rdi)
-movq   %r9,96(%rdi)
-movq   %rax,104(%rdi)
-movq   %r10,112(%rdi)
-movq 296(%rsp),%r12
-movq 304(%rsp),%r13
-movq 312(%rsp),%r14
-movq 320(%rsp),%r15
-movq 328(%rsp),%rbx
-movq 336(%rsp),%rbp
-add $344,%rsp
-.cfi_adjust_cfa_offset -344
-ret
-.cfi_endproc
-
-.p2align 5
-.globl C_ABI(x25519_x86_64_work_cswap)
-HIDDEN C_ABI(x25519_x86_64_work_cswap)
-C_ABI(x25519_x86_64_work_cswap):
-.cfi_startproc
-subq $1,%rsi
-notq %rsi
-movq %rsi,%xmm15
-pshufd $0x44,%xmm15,%xmm15
-movdqu 0(%rdi),%xmm0
-movdqu 16(%rdi),%xmm2
-movdqu 32(%rdi),%xmm4
-movdqu 48(%rdi),%xmm6
-movdqu 64(%rdi),%xmm8
-movdqu 80(%rdi),%xmm1
-movdqu 96(%rdi),%xmm3
-movdqu 112(%rdi),%xmm5
-movdqu 128(%rdi),%xmm7
-movdqu 144(%rdi),%xmm9
-movdqa %xmm1,%xmm10
-movdqa %xmm3,%xmm11
-movdqa %xmm5,%xmm12
-movdqa %xmm7,%xmm13
-movdqa %xmm9,%xmm14
-pxor %xmm0,%xmm10
-pxor %xmm2,%xmm11
-pxor %xmm4,%xmm12
-pxor %xmm6,%xmm13
-pxor %xmm8,%xmm14
-pand %xmm15,%xmm10
-pand %xmm15,%xmm11
-pand %xmm15,%xmm12
-pand %xmm15,%xmm13
-pand %xmm15,%xmm14
-pxor %xmm10,%xmm0
-pxor %xmm10,%xmm1
-pxor %xmm11,%xmm2
-pxor %xmm11,%xmm3
-pxor %xmm12,%xmm4
-pxor %xmm12,%xmm5
-pxor %xmm13,%xmm6
-pxor %xmm13,%xmm7
-pxor %xmm14,%xmm8
-pxor %xmm14,%xmm9
-movdqu %xmm0,0(%rdi)
-movdqu %xmm2,16(%rdi)
-movdqu %xmm4,32(%rdi)
-movdqu %xmm6,48(%rdi)
-movdqu %xmm8,64(%rdi)
-movdqu %xmm1,80(%rdi)
-movdqu %xmm3,96(%rdi)
-movdqu %xmm5,112(%rdi)
-movdqu %xmm7,128(%rdi)
-movdqu %xmm9,144(%rdi)
-ret
-.cfi_endproc
-
-#endif  /* __x86_64__ */
-#endif  /* !OPENSSL_NO_ASM */
diff --git a/crypto/curve25519/x25519-x86_64.c b/crypto/curve25519/x25519-x86_64.c
deleted file mode 100644
index 41db0bd..0000000
--- a/crypto/curve25519/x25519-x86_64.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/* Copyright (c) 2015, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-// This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
-// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
-// public domain but this file has the ISC license just to keep licencing
-// simple.
-//
-// The field functions are shared by Ed25519 and X25519 where possible.
-
-#include <openssl/curve25519.h>
-
-#include <string.h>
-
-#include "../internal.h"
-#include "../../third_party/fiat/internal.h"
-
-
-#if defined(BORINGSSL_X25519_X86_64)
-
-typedef struct { uint64_t v[5]; } fe25519;
-
-// These functions are defined in asm/x25519-x86_64.S
-void x25519_x86_64_work_cswap(fe25519 *, uint64_t);
-void x25519_x86_64_mul(fe25519 *out, const fe25519 *a, const fe25519 *b);
-void x25519_x86_64_square(fe25519 *out, const fe25519 *a);
-void x25519_x86_64_freeze(fe25519 *);
-void x25519_x86_64_ladderstep(fe25519 *work);
-
-static void fe25519_setint(fe25519 *r, unsigned v) {
-  r->v[0] = v;
-  r->v[1] = 0;
-  r->v[2] = 0;
-  r->v[3] = 0;
-  r->v[4] = 0;
-}
-
-// Assumes input x being reduced below 2^255
-static void fe25519_pack(unsigned char r[32], const fe25519 *x) {
-  fe25519 t;
-  t = *x;
-  x25519_x86_64_freeze(&t);
-
-  r[0] = (uint8_t)(t.v[0] & 0xff);
-  r[1] = (uint8_t)((t.v[0] >> 8) & 0xff);
-  r[2] = (uint8_t)((t.v[0] >> 16) & 0xff);
-  r[3] = (uint8_t)((t.v[0] >> 24) & 0xff);
-  r[4] = (uint8_t)((t.v[0] >> 32) & 0xff);
-  r[5] = (uint8_t)((t.v[0] >> 40) & 0xff);
-  r[6] = (uint8_t)((t.v[0] >> 48));
-
-  r[6] ^= (uint8_t)((t.v[1] << 3) & 0xf8);
-  r[7] = (uint8_t)((t.v[1] >> 5) & 0xff);
-  r[8] = (uint8_t)((t.v[1] >> 13) & 0xff);
-  r[9] = (uint8_t)((t.v[1] >> 21) & 0xff);
-  r[10] = (uint8_t)((t.v[1] >> 29) & 0xff);
-  r[11] = (uint8_t)((t.v[1] >> 37) & 0xff);
-  r[12] = (uint8_t)((t.v[1] >> 45));
-
-  r[12] ^= (uint8_t)((t.v[2] << 6) & 0xc0);
-  r[13] = (uint8_t)((t.v[2] >> 2) & 0xff);
-  r[14] = (uint8_t)((t.v[2] >> 10) & 0xff);
-  r[15] = (uint8_t)((t.v[2] >> 18) & 0xff);
-  r[16] = (uint8_t)((t.v[2] >> 26) & 0xff);
-  r[17] = (uint8_t)((t.v[2] >> 34) & 0xff);
-  r[18] = (uint8_t)((t.v[2] >> 42) & 0xff);
-  r[19] = (uint8_t)((t.v[2] >> 50));
-
-  r[19] ^= (uint8_t)((t.v[3] << 1) & 0xfe);
-  r[20] = (uint8_t)((t.v[3] >> 7) & 0xff);
-  r[21] = (uint8_t)((t.v[3] >> 15) & 0xff);
-  r[22] = (uint8_t)((t.v[3] >> 23) & 0xff);
-  r[23] = (uint8_t)((t.v[3] >> 31) & 0xff);
-  r[24] = (uint8_t)((t.v[3] >> 39) & 0xff);
-  r[25] = (uint8_t)((t.v[3] >> 47));
-
-  r[25] ^= (uint8_t)((t.v[4] << 4) & 0xf0);
-  r[26] = (uint8_t)((t.v[4] >> 4) & 0xff);
-  r[27] = (uint8_t)((t.v[4] >> 12) & 0xff);
-  r[28] = (uint8_t)((t.v[4] >> 20) & 0xff);
-  r[29] = (uint8_t)((t.v[4] >> 28) & 0xff);
-  r[30] = (uint8_t)((t.v[4] >> 36) & 0xff);
-  r[31] = (uint8_t)((t.v[4] >> 44));
-}
-
-static void fe25519_unpack(fe25519 *r, const uint8_t x[32]) {
-  r->v[0] = x[0];
-  r->v[0] += (uint64_t)x[1] << 8;
-  r->v[0] += (uint64_t)x[2] << 16;
-  r->v[0] += (uint64_t)x[3] << 24;
-  r->v[0] += (uint64_t)x[4] << 32;
-  r->v[0] += (uint64_t)x[5] << 40;
-  r->v[0] += ((uint64_t)x[6] & 7) << 48;
-
-  r->v[1] = x[6] >> 3;
-  r->v[1] += (uint64_t)x[7] << 5;
-  r->v[1] += (uint64_t)x[8] << 13;
-  r->v[1] += (uint64_t)x[9] << 21;
-  r->v[1] += (uint64_t)x[10] << 29;
-  r->v[1] += (uint64_t)x[11] << 37;
-  r->v[1] += ((uint64_t)x[12] & 63) << 45;
-
-  r->v[2] = x[12] >> 6;
-  r->v[2] += (uint64_t)x[13] << 2;
-  r->v[2] += (uint64_t)x[14] << 10;
-  r->v[2] += (uint64_t)x[15] << 18;
-  r->v[2] += (uint64_t)x[16] << 26;
-  r->v[2] += (uint64_t)x[17] << 34;
-  r->v[2] += (uint64_t)x[18] << 42;
-  r->v[2] += ((uint64_t)x[19] & 1) << 50;
-
-  r->v[3] = x[19] >> 1;
-  r->v[3] += (uint64_t)x[20] << 7;
-  r->v[3] += (uint64_t)x[21] << 15;
-  r->v[3] += (uint64_t)x[22] << 23;
-  r->v[3] += (uint64_t)x[23] << 31;
-  r->v[3] += (uint64_t)x[24] << 39;
-  r->v[3] += ((uint64_t)x[25] & 15) << 47;
-
-  r->v[4] = x[25] >> 4;
-  r->v[4] += (uint64_t)x[26] << 4;
-  r->v[4] += (uint64_t)x[27] << 12;
-  r->v[4] += (uint64_t)x[28] << 20;
-  r->v[4] += (uint64_t)x[29] << 28;
-  r->v[4] += (uint64_t)x[30] << 36;
-  r->v[4] += ((uint64_t)x[31] & 127) << 44;
-}
-
-static void fe25519_invert(fe25519 *r, const fe25519 *x) {
-  fe25519 z2;
-  fe25519 z9;
-  fe25519 z11;
-  fe25519 z2_5_0;
-  fe25519 z2_10_0;
-  fe25519 z2_20_0;
-  fe25519 z2_50_0;
-  fe25519 z2_100_0;
-  fe25519 t;
-  int i;
-
-  /* 2 */ x25519_x86_64_square(&z2, x);
-  /* 4 */ x25519_x86_64_square(&t, &z2);
-  /* 8 */ x25519_x86_64_square(&t, &t);
-  /* 9 */ x25519_x86_64_mul(&z9, &t, x);
-  /* 11 */ x25519_x86_64_mul(&z11, &z9, &z2);
-  /* 22 */ x25519_x86_64_square(&t, &z11);
-  /* 2^5 - 2^0 = 31 */ x25519_x86_64_mul(&z2_5_0, &t, &z9);
-
-  /* 2^6 - 2^1 */ x25519_x86_64_square(&t, &z2_5_0);
-  /* 2^20 - 2^10 */ for (i = 1; i < 5; i++) { x25519_x86_64_square(&t, &t); }
-  /* 2^10 - 2^0 */ x25519_x86_64_mul(&z2_10_0, &t, &z2_5_0);
-
-  /* 2^11 - 2^1 */ x25519_x86_64_square(&t, &z2_10_0);
-  /* 2^20 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); }
-  /* 2^20 - 2^0 */ x25519_x86_64_mul(&z2_20_0, &t, &z2_10_0);
-
-  /* 2^21 - 2^1 */ x25519_x86_64_square(&t, &z2_20_0);
-  /* 2^40 - 2^20 */ for (i = 1; i < 20; i++) { x25519_x86_64_square(&t, &t); }
-  /* 2^40 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_20_0);
-
-  /* 2^41 - 2^1 */ x25519_x86_64_square(&t, &t);
-  /* 2^50 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); }
-  /* 2^50 - 2^0 */ x25519_x86_64_mul(&z2_50_0, &t, &z2_10_0);
-
-  /* 2^51 - 2^1 */ x25519_x86_64_square(&t, &z2_50_0);
-  /* 2^100 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); }
-  /* 2^100 - 2^0 */ x25519_x86_64_mul(&z2_100_0, &t, &z2_50_0);
-
-  /* 2^101 - 2^1 */ x25519_x86_64_square(&t, &z2_100_0);
-  /* 2^200 - 2^100 */ for (i = 1; i < 100; i++) {
-    x25519_x86_64_square(&t, &t);
-  }
-  /* 2^200 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_100_0);
-
-  /* 2^201 - 2^1 */ x25519_x86_64_square(&t, &t);
-  /* 2^250 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); }
-  /* 2^250 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_50_0);
-
-  /* 2^251 - 2^1 */ x25519_x86_64_square(&t, &t);
-  /* 2^252 - 2^2 */ x25519_x86_64_square(&t, &t);
-  /* 2^253 - 2^3 */ x25519_x86_64_square(&t, &t);
-
-  /* 2^254 - 2^4 */ x25519_x86_64_square(&t, &t);
-
-  /* 2^255 - 2^5 */ x25519_x86_64_square(&t, &t);
-  /* 2^255 - 21 */ x25519_x86_64_mul(r, &t, &z11);
-}
-
-static void mladder(fe25519 *xr, fe25519 *zr, const uint8_t s[32]) {
-  fe25519 work[5];
-
-  work[0] = *xr;
-  fe25519_setint(work + 1, 1);
-  fe25519_setint(work + 2, 0);
-  work[3] = *xr;
-  fe25519_setint(work + 4, 1);
-
-  int i, j;
-  uint8_t prevbit = 0;
-
-  j = 6;
-  for (i = 31; i >= 0; i--) {
-    while (j >= 0) {
-      const uint8_t bit = 1 & (s[i] >> j);
-      const uint64_t swap = bit ^ prevbit;
-      prevbit = bit;
-      x25519_x86_64_work_cswap(work + 1, swap);
-      x25519_x86_64_ladderstep(work);
-      j -= 1;
-    }
-    j = 7;
-  }
-
-  *xr = work[1];
-  *zr = work[2];
-}
-
-void x25519_x86_64(uint8_t out[32], const uint8_t scalar[32],
-                  const uint8_t point[32]) {
-  uint8_t e[32];
-  OPENSSL_memcpy(e, scalar, sizeof(e));
-
-  e[0] &= 248;
-  e[31] &= 127;
-  e[31] |= 64;
-
-  fe25519 t;
-  fe25519 z;
-  fe25519_unpack(&t, point);
-  mladder(&t, &z, e);
-  fe25519_invert(&z, &z);
-  x25519_x86_64_mul(&t, &t, &z);
-  fe25519_pack(out, &t);
-}
-
-#endif  // BORINGSSL_X25519_X86_64
diff --git a/third_party/fiat/curve25519.c b/third_party/fiat/curve25519.c
index dfa4a39..d5928af 100644
--- a/third_party/fiat/curve25519.c
+++ b/third_party/fiat/curve25519.c
@@ -512,8 +512,6 @@
   fe_sqr_impl(h->v, f->v);
 }
 
-#if !defined(BORINGSSL_X25519_X86_64)
-
 // Replace (f,g) with (g,f) if b == 1;
 // replace (f,g) with (f,g) if b == 0.
 //
@@ -589,8 +587,6 @@
   assert_fe(h->v);
 }
 
-#endif  // !BORINGSSL_X25519_X86_64
-
 // Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0.
 static void fe_neg_impl(uint64_t out[5], const uint64_t in2[5]) {
   { const uint64_t x10 = 0;
@@ -1201,8 +1197,6 @@
   fe_sqr_impl(h->v, f->v);
 }
 
-#if !defined(BORINGSSL_X25519_X86_64)
-
 // Replace (f,g) with (g,f) if b == 1;
 // replace (f,g) with (f,g) if b == 0.
 //
@@ -1342,8 +1336,6 @@
   assert_fe(h->v);
 }
 
-#endif  // !BORINGSSL_X25519_X86_64
-
 // Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0.
 static void fe_neg_impl(uint32_t out[10], const uint32_t in2[10]) {
   { const uint32_t x20 = 0;
@@ -3063,15 +3055,6 @@
 }
 
 
-#if defined(BORINGSSL_X25519_X86_64)
-
-static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
-                               const uint8_t point[32]) {
-  x25519_x86_64(out, scalar, point);
-}
-
-#else
-
 static void x25519_scalar_mult_generic(uint8_t out[32],
                                        const uint8_t scalar[32],
                                        const uint8_t point[32]) {
@@ -3166,9 +3149,6 @@
   x25519_scalar_mult_generic(out, scalar, point);
 }
 
-#endif  // BORINGSSL_X25519_X86_64
-
-
 void X25519_keypair(uint8_t out_public_value[32], uint8_t out_private_key[32]) {
   RAND_bytes(out_private_key, 32);
 
@@ -3200,20 +3180,6 @@
   return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0;
 }
 
-#if defined(BORINGSSL_X25519_X86_64)
-
-// When |BORINGSSL_X25519_X86_64| is set, base point multiplication is done with
-// the Montgomery ladder because it's faster. Otherwise it's done using the
-// Ed25519 tables.
-
-void X25519_public_from_private(uint8_t out_public_value[32],
-                                const uint8_t private_key[32]) {
-  static const uint8_t kMongomeryBasePoint[32] = {9};
-  x25519_scalar_mult(out_public_value, private_key, kMongomeryBasePoint);
-}
-
-#else
-
 void X25519_public_from_private(uint8_t out_public_value[32],
                                 const uint8_t private_key[32]) {
 #if defined(BORINGSSL_X25519_NEON)
@@ -3243,5 +3209,3 @@
   fe_mul_tlt(&zminusy_inv, &zplusy, &zminusy_inv);
   fe_tobytes(out_public_value, &zminusy_inv);
 }
-
-#endif  // BORINGSSL_X25519_X86_64
diff --git a/third_party/fiat/internal.h b/third_party/fiat/internal.h
index c5dcc04..be3e265 100644
--- a/third_party/fiat/internal.h
+++ b/third_party/fiat/internal.h
@@ -32,15 +32,6 @@
 #include "../../crypto/internal.h"
 
 
-#if defined(OPENSSL_X86_64) && !defined(OPENSSL_SMALL) && \
-    !defined(OPENSSL_WINDOWS) && !defined(OPENSSL_NO_ASM)
-#define BORINGSSL_X25519_X86_64
-
-void x25519_x86_64(uint8_t out[32], const uint8_t scalar[32],
-                   const uint8_t point[32]);
-#endif
-
-
 #if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_APPLE)
 #define BORINGSSL_X25519_NEON
 
diff --git a/util/generate_build_files.py b/util/generate_build_files.py
index a4af666..f2b10de 100644
--- a/util/generate_build_files.py
+++ b/util/generate_build_files.py
@@ -44,12 +44,6 @@
         'src/crypto/curve25519/asm/x25519-asm-arm.S',
         'src/crypto/poly1305/poly1305_arm_asm.S',
     ],
-    ('linux', 'x86_64'): [
-        'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
-    ],
-    ('mac', 'x86_64'): [
-        'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
-    ],
 }
 
 PREFIX = None