Remove x86_64 x25519 assembly.
Now that we have 64-bit C code, courtesy of fiat-crypto, the tradeoff
for carrying the assembly changes:
Assembly:
Did 16000 Curve25519 base-point multiplication operations in 1059932us (15095.3 ops/sec)
Did 16000 Curve25519 arbitrary point multiplication operations in 1060023us (15094.0 ops/sec)
fiat64:
Did 39000 Curve25519 base-point multiplication operations in 1004712us (38817.1 ops/sec)
Did 14000 Curve25519 arbitrary point multiplication operations in 1006827us (13905.1 ops/sec)
The assembly is still about 9% faster than fiat64, but fiat64 gets to
use the Ed25519 tables for the base point multiplication, so overall it
is actually faster to disable the assembly:
>>> 1/(1/15094.0 + 1/15095.3)
7547.324986004976
>>> 1/(1/38817.1 + 1/13905.1)
10237.73016319501
(At the cost of touching a 30kB table.)
The assembly implementation is no longer pulling its weight. Remove it
and use the fiat code in all build configurations.
Change-Id: Id736873177d5568bb16ea06994b9fcb1af104e33
Reviewed-on: https://boringssl-review.googlesource.com/25524
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/curve25519/CMakeLists.txt b/crypto/curve25519/CMakeLists.txt
index 6f51d54..4894fa8 100644
--- a/crypto/curve25519/CMakeLists.txt
+++ b/crypto/curve25519/CMakeLists.txt
@@ -8,21 +8,12 @@
)
endif()
-if (${ARCH} STREQUAL "x86_64")
- set(
- CURVE25519_ARCH_SOURCES
-
- asm/x25519-asm-x86_64.S
- )
-endif()
-
add_library(
curve25519
OBJECT
spake25519.c
- x25519-x86_64.c
${CURVE25519_ARCH_SOURCES}
)
diff --git a/crypto/curve25519/asm/x25519-asm-x86_64.S b/crypto/curve25519/asm/x25519-asm-x86_64.S
deleted file mode 100644
index 6cff53e..0000000
--- a/crypto/curve25519/asm/x25519-asm-x86_64.S
+++ /dev/null
@@ -1,1894 +0,0 @@
-/* Copyright (c) 2015, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-/* This file is adapted from crypto_scalarmult/curve25519/amd64-51/ in
- * SUPERCOP 20141124 (http://bench.cr.yp.to/supercop.html). That code is public
- * domain licensed but the standard ISC license is included above to keep
- * licensing simple. */
-
-#if !defined(OPENSSL_NO_ASM)
-#if defined(__x86_64__)
-
-.data
-.p2align 4
-
-#if defined(__APPLE__)
-/* OS X's C ABI prefixes functions with underscore. */
-#define C_ABI(x) _ ## x
-#define HIDDEN .private_extern
-#else
-#define C_ABI(x) x
-#define HIDDEN .hidden
-#endif
-
-x25519_x86_64_REDMASK51: .quad 0x0007FFFFFFFFFFFF
-x25519_x86_64_121666_213: .quad 996687872
-x25519_x86_64_2P0: .quad 0xFFFFFFFFFFFDA
-x25519_x86_64_2P1234: .quad 0xFFFFFFFFFFFFE
-x25519_x86_64_4P0: .quad 0x1FFFFFFFFFFFB4
-x25519_x86_64_4P1234: .quad 0x1FFFFFFFFFFFFC
-x25519_x86_64_MU0: .quad 0xED9CE5A30A2C131B
-x25519_x86_64_MU1: .quad 0x2106215D086329A7
-x25519_x86_64_MU2: .quad 0xFFFFFFFFFFFFFFEB
-x25519_x86_64_MU3: .quad 0xFFFFFFFFFFFFFFFF
-x25519_x86_64_MU4: .quad 0x000000000000000F
-x25519_x86_64_ORDER0: .quad 0x5812631A5CF5D3ED
-x25519_x86_64_ORDER1: .quad 0x14DEF9DEA2F79CD6
-x25519_x86_64_ORDER2: .quad 0x0000000000000000
-x25519_x86_64_ORDER3: .quad 0x1000000000000000
-x25519_x86_64_EC2D0: .quad 1859910466990425
-x25519_x86_64_EC2D1: .quad 932731440258426
-x25519_x86_64_EC2D2: .quad 1072319116312658
-x25519_x86_64_EC2D3: .quad 1815898335770999
-x25519_x86_64_EC2D4: .quad 633789495995903
-x25519_x86_64__38: .quad 38
-
-.text
-.p2align 5
-
-.globl C_ABI(x25519_x86_64_freeze)
-HIDDEN C_ABI(x25519_x86_64_freeze)
-C_ABI(x25519_x86_64_freeze):
-.cfi_startproc
-/* This is a leaf function and uses the redzone for saving registers. */
-movq %r12,-8(%rsp)
-.cfi_rel_offset r12, -8
-movq 0(%rdi),%rsi
-movq 8(%rdi),%rdx
-movq 16(%rdi),%rcx
-movq 24(%rdi),%r8
-movq 32(%rdi),%r9
-movq x25519_x86_64_REDMASK51(%rip),%rax
-mov %rax,%r10
-sub $18,%r10
-mov $3,%r11
-._reduceloop:
-mov %rsi,%r12
-shr $51,%r12
-and %rax,%rsi
-add %r12,%rdx
-mov %rdx,%r12
-shr $51,%r12
-and %rax,%rdx
-add %r12,%rcx
-mov %rcx,%r12
-shr $51,%r12
-and %rax,%rcx
-add %r12,%r8
-mov %r8,%r12
-shr $51,%r12
-and %rax,%r8
-add %r12,%r9
-mov %r9,%r12
-shr $51,%r12
-and %rax,%r9
-imulq $19,%r12,%r12
-add %r12,%rsi
-sub $1,%r11
-ja ._reduceloop
-mov $1,%r12
-cmp %r10,%rsi
-cmovl %r11,%r12
-cmp %rax,%rdx
-cmovne %r11,%r12
-cmp %rax,%rcx
-cmovne %r11,%r12
-cmp %rax,%r8
-cmovne %r11,%r12
-cmp %rax,%r9
-cmovne %r11,%r12
-neg %r12
-and %r12,%rax
-and %r12,%r10
-sub %r10,%rsi
-sub %rax,%rdx
-sub %rax,%rcx
-sub %rax,%r8
-sub %rax,%r9
-movq %rsi,0(%rdi)
-movq %rdx,8(%rdi)
-movq %rcx,16(%rdi)
-movq %r8,24(%rdi)
-movq %r9,32(%rdi)
-movq -8(%rsp),%r12
-ret
-.cfi_endproc
-
-.p2align 5
-.globl C_ABI(x25519_x86_64_mul)
-HIDDEN C_ABI(x25519_x86_64_mul)
-C_ABI(x25519_x86_64_mul):
-.cfi_startproc
-/* This is a leaf function and uses the redzone for saving registers. */
-movq %r12,-8(%rsp)
-.cfi_rel_offset r12, -8
-movq %r13,-16(%rsp)
-.cfi_rel_offset r13, -16
-movq %r14,-24(%rsp)
-.cfi_rel_offset r14, -24
-movq %r15,-32(%rsp)
-.cfi_rel_offset r15, -32
-movq %rbx,-40(%rsp)
-.cfi_rel_offset rbx, -40
-movq %rbp,-48(%rsp)
-.cfi_rel_offset rbp, -48
-mov %rdx,%rcx
-movq 24(%rsi),%rdx
-imulq $19,%rdx,%rax
-movq %rax,-64(%rsp)
-mulq 16(%rcx)
-mov %rax,%r8
-mov %rdx,%r9
-movq 32(%rsi),%rdx
-imulq $19,%rdx,%rax
-movq %rax,-72(%rsp)
-mulq 8(%rcx)
-add %rax,%r8
-adc %rdx,%r9
-movq 0(%rsi),%rax
-mulq 0(%rcx)
-add %rax,%r8
-adc %rdx,%r9
-movq 0(%rsi),%rax
-mulq 8(%rcx)
-mov %rax,%r10
-mov %rdx,%r11
-movq 0(%rsi),%rax
-mulq 16(%rcx)
-mov %rax,%r12
-mov %rdx,%r13
-movq 0(%rsi),%rax
-mulq 24(%rcx)
-mov %rax,%r14
-mov %rdx,%r15
-movq 0(%rsi),%rax
-mulq 32(%rcx)
-mov %rax,%rbx
-mov %rdx,%rbp
-movq 8(%rsi),%rax
-mulq 0(%rcx)
-add %rax,%r10
-adc %rdx,%r11
-movq 8(%rsi),%rax
-mulq 8(%rcx)
-add %rax,%r12
-adc %rdx,%r13
-movq 8(%rsi),%rax
-mulq 16(%rcx)
-add %rax,%r14
-adc %rdx,%r15
-movq 8(%rsi),%rax
-mulq 24(%rcx)
-add %rax,%rbx
-adc %rdx,%rbp
-movq 8(%rsi),%rdx
-imulq $19,%rdx,%rax
-mulq 32(%rcx)
-add %rax,%r8
-adc %rdx,%r9
-movq 16(%rsi),%rax
-mulq 0(%rcx)
-add %rax,%r12
-adc %rdx,%r13
-movq 16(%rsi),%rax
-mulq 8(%rcx)
-add %rax,%r14
-adc %rdx,%r15
-movq 16(%rsi),%rax
-mulq 16(%rcx)
-add %rax,%rbx
-adc %rdx,%rbp
-movq 16(%rsi),%rdx
-imulq $19,%rdx,%rax
-mulq 24(%rcx)
-add %rax,%r8
-adc %rdx,%r9
-movq 16(%rsi),%rdx
-imulq $19,%rdx,%rax
-mulq 32(%rcx)
-add %rax,%r10
-adc %rdx,%r11
-movq 24(%rsi),%rax
-mulq 0(%rcx)
-add %rax,%r14
-adc %rdx,%r15
-movq 24(%rsi),%rax
-mulq 8(%rcx)
-add %rax,%rbx
-adc %rdx,%rbp
-movq -64(%rsp),%rax
-mulq 24(%rcx)
-add %rax,%r10
-adc %rdx,%r11
-movq -64(%rsp),%rax
-mulq 32(%rcx)
-add %rax,%r12
-adc %rdx,%r13
-movq 32(%rsi),%rax
-mulq 0(%rcx)
-add %rax,%rbx
-adc %rdx,%rbp
-movq -72(%rsp),%rax
-mulq 16(%rcx)
-add %rax,%r10
-adc %rdx,%r11
-movq -72(%rsp),%rax
-mulq 24(%rcx)
-add %rax,%r12
-adc %rdx,%r13
-movq -72(%rsp),%rax
-mulq 32(%rcx)
-add %rax,%r14
-adc %rdx,%r15
-movq x25519_x86_64_REDMASK51(%rip),%rsi
-shld $13,%r8,%r9
-and %rsi,%r8
-shld $13,%r10,%r11
-and %rsi,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rsi,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rsi,%r14
-add %r13,%r14
-shld $13,%rbx,%rbp
-and %rsi,%rbx
-add %r15,%rbx
-imulq $19,%rbp,%rdx
-add %rdx,%r8
-mov %r8,%rdx
-shr $51,%rdx
-add %r10,%rdx
-mov %rdx,%rcx
-shr $51,%rdx
-and %rsi,%r8
-add %r12,%rdx
-mov %rdx,%r9
-shr $51,%rdx
-and %rsi,%rcx
-add %r14,%rdx
-mov %rdx,%rax
-shr $51,%rdx
-and %rsi,%r9
-add %rbx,%rdx
-mov %rdx,%r10
-shr $51,%rdx
-and %rsi,%rax
-imulq $19,%rdx,%rdx
-add %rdx,%r8
-and %rsi,%r10
-movq %r8,0(%rdi)
-movq %rcx,8(%rdi)
-movq %r9,16(%rdi)
-movq %rax,24(%rdi)
-movq %r10,32(%rdi)
-movq -8(%rsp),%r12
-movq -16(%rsp),%r13
-movq -24(%rsp),%r14
-movq -32(%rsp),%r15
-movq -40(%rsp),%rbx
-movq -48(%rsp),%rbp
-ret
-.cfi_endproc
-
-.p2align 5
-.globl C_ABI(x25519_x86_64_square)
-HIDDEN C_ABI(x25519_x86_64_square)
-C_ABI(x25519_x86_64_square):
-.cfi_startproc
-/* This is a leaf function and uses the redzone for saving registers. */
-movq %r12,-8(%rsp)
-.cfi_rel_offset r12, -8
-movq %r13,-16(%rsp)
-.cfi_rel_offset r13, -16
-movq %r14,-24(%rsp)
-.cfi_rel_offset r14, -24
-movq %r15,-32(%rsp)
-.cfi_rel_offset r15, -32
-movq %rbx,-40(%rsp)
-.cfi_rel_offset rbx, -40
-movq 0(%rsi),%rax
-mulq 0(%rsi)
-mov %rax,%rcx
-mov %rdx,%r8
-movq 0(%rsi),%rax
-shl $1,%rax
-mulq 8(%rsi)
-mov %rax,%r9
-mov %rdx,%r10
-movq 0(%rsi),%rax
-shl $1,%rax
-mulq 16(%rsi)
-mov %rax,%r11
-mov %rdx,%r12
-movq 0(%rsi),%rax
-shl $1,%rax
-mulq 24(%rsi)
-mov %rax,%r13
-mov %rdx,%r14
-movq 0(%rsi),%rax
-shl $1,%rax
-mulq 32(%rsi)
-mov %rax,%r15
-mov %rdx,%rbx
-movq 8(%rsi),%rax
-mulq 8(%rsi)
-add %rax,%r11
-adc %rdx,%r12
-movq 8(%rsi),%rax
-shl $1,%rax
-mulq 16(%rsi)
-add %rax,%r13
-adc %rdx,%r14
-movq 8(%rsi),%rax
-shl $1,%rax
-mulq 24(%rsi)
-add %rax,%r15
-adc %rdx,%rbx
-movq 8(%rsi),%rdx
-imulq $38,%rdx,%rax
-mulq 32(%rsi)
-add %rax,%rcx
-adc %rdx,%r8
-movq 16(%rsi),%rax
-mulq 16(%rsi)
-add %rax,%r15
-adc %rdx,%rbx
-movq 16(%rsi),%rdx
-imulq $38,%rdx,%rax
-mulq 24(%rsi)
-add %rax,%rcx
-adc %rdx,%r8
-movq 16(%rsi),%rdx
-imulq $38,%rdx,%rax
-mulq 32(%rsi)
-add %rax,%r9
-adc %rdx,%r10
-movq 24(%rsi),%rdx
-imulq $19,%rdx,%rax
-mulq 24(%rsi)
-add %rax,%r9
-adc %rdx,%r10
-movq 24(%rsi),%rdx
-imulq $38,%rdx,%rax
-mulq 32(%rsi)
-add %rax,%r11
-adc %rdx,%r12
-movq 32(%rsi),%rdx
-imulq $19,%rdx,%rax
-mulq 32(%rsi)
-add %rax,%r13
-adc %rdx,%r14
-movq x25519_x86_64_REDMASK51(%rip),%rsi
-shld $13,%rcx,%r8
-and %rsi,%rcx
-shld $13,%r9,%r10
-and %rsi,%r9
-add %r8,%r9
-shld $13,%r11,%r12
-and %rsi,%r11
-add %r10,%r11
-shld $13,%r13,%r14
-and %rsi,%r13
-add %r12,%r13
-shld $13,%r15,%rbx
-and %rsi,%r15
-add %r14,%r15
-imulq $19,%rbx,%rdx
-add %rdx,%rcx
-mov %rcx,%rdx
-shr $51,%rdx
-add %r9,%rdx
-and %rsi,%rcx
-mov %rdx,%r8
-shr $51,%rdx
-add %r11,%rdx
-and %rsi,%r8
-mov %rdx,%r9
-shr $51,%rdx
-add %r13,%rdx
-and %rsi,%r9
-mov %rdx,%rax
-shr $51,%rdx
-add %r15,%rdx
-and %rsi,%rax
-mov %rdx,%r10
-shr $51,%rdx
-imulq $19,%rdx,%rdx
-add %rdx,%rcx
-and %rsi,%r10
-movq %rcx,0(%rdi)
-movq %r8,8(%rdi)
-movq %r9,16(%rdi)
-movq %rax,24(%rdi)
-movq %r10,32(%rdi)
-movq -8(%rsp),%r12
-movq -16(%rsp),%r13
-movq -24(%rsp),%r14
-movq -32(%rsp),%r15
-movq -40(%rsp),%rbx
-ret
-.cfi_endproc
-
-.p2align 5
-.globl C_ABI(x25519_x86_64_ladderstep)
-HIDDEN C_ABI(x25519_x86_64_ladderstep)
-C_ABI(x25519_x86_64_ladderstep):
-.cfi_startproc
-sub $344,%rsp
-.cfi_adjust_cfa_offset 344
-movq %r12,296(%rsp)
-.cfi_rel_offset r12, 296
-movq %r13,304(%rsp)
-.cfi_rel_offset r13, 304
-movq %r14,312(%rsp)
-.cfi_rel_offset r14, 312
-movq %r15,320(%rsp)
-.cfi_rel_offset r15, 320
-movq %rbx,328(%rsp)
-.cfi_rel_offset rbx, 328
-movq %rbp,336(%rsp)
-.cfi_rel_offset rbp, 336
-movq 40(%rdi),%rsi
-movq 48(%rdi),%rdx
-movq 56(%rdi),%rcx
-movq 64(%rdi),%r8
-movq 72(%rdi),%r9
-mov %rsi,%rax
-mov %rdx,%r10
-mov %rcx,%r11
-mov %r8,%r12
-mov %r9,%r13
-add x25519_x86_64_2P0(%rip),%rax
-add x25519_x86_64_2P1234(%rip),%r10
-add x25519_x86_64_2P1234(%rip),%r11
-add x25519_x86_64_2P1234(%rip),%r12
-add x25519_x86_64_2P1234(%rip),%r13
-addq 80(%rdi),%rsi
-addq 88(%rdi),%rdx
-addq 96(%rdi),%rcx
-addq 104(%rdi),%r8
-addq 112(%rdi),%r9
-subq 80(%rdi),%rax
-subq 88(%rdi),%r10
-subq 96(%rdi),%r11
-subq 104(%rdi),%r12
-subq 112(%rdi),%r13
-movq %rsi,0(%rsp)
-movq %rdx,8(%rsp)
-movq %rcx,16(%rsp)
-movq %r8,24(%rsp)
-movq %r9,32(%rsp)
-movq %rax,40(%rsp)
-movq %r10,48(%rsp)
-movq %r11,56(%rsp)
-movq %r12,64(%rsp)
-movq %r13,72(%rsp)
-movq 40(%rsp),%rax
-mulq 40(%rsp)
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 40(%rsp),%rax
-shl $1,%rax
-mulq 48(%rsp)
-mov %rax,%r8
-mov %rdx,%r9
-movq 40(%rsp),%rax
-shl $1,%rax
-mulq 56(%rsp)
-mov %rax,%r10
-mov %rdx,%r11
-movq 40(%rsp),%rax
-shl $1,%rax
-mulq 64(%rsp)
-mov %rax,%r12
-mov %rdx,%r13
-movq 40(%rsp),%rax
-shl $1,%rax
-mulq 72(%rsp)
-mov %rax,%r14
-mov %rdx,%r15
-movq 48(%rsp),%rax
-mulq 48(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 48(%rsp),%rax
-shl $1,%rax
-mulq 56(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 48(%rsp),%rax
-shl $1,%rax
-mulq 64(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 48(%rsp),%rdx
-imulq $38,%rdx,%rax
-mulq 72(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 56(%rsp),%rax
-mulq 56(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 56(%rsp),%rdx
-imulq $38,%rdx,%rax
-mulq 64(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 56(%rsp),%rdx
-imulq $38,%rdx,%rax
-mulq 72(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 64(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 64(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 64(%rsp),%rdx
-imulq $38,%rdx,%rax
-mulq 72(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 72(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 72(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and %rdx,%rsi
-shld $13,%r8,%r9
-and %rdx,%r8
-add %rcx,%r8
-shld $13,%r10,%r11
-and %rdx,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rdx,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rdx,%r14
-add %r13,%r14
-imulq $19,%r15,%rcx
-add %rcx,%rsi
-mov %rsi,%rcx
-shr $51,%rcx
-add %r8,%rcx
-and %rdx,%rsi
-mov %rcx,%r8
-shr $51,%rcx
-add %r10,%rcx
-and %rdx,%r8
-mov %rcx,%r9
-shr $51,%rcx
-add %r12,%rcx
-and %rdx,%r9
-mov %rcx,%rax
-shr $51,%rcx
-add %r14,%rcx
-and %rdx,%rax
-mov %rcx,%r10
-shr $51,%rcx
-imulq $19,%rcx,%rcx
-add %rcx,%rsi
-and %rdx,%r10
-movq %rsi,80(%rsp)
-movq %r8,88(%rsp)
-movq %r9,96(%rsp)
-movq %rax,104(%rsp)
-movq %r10,112(%rsp)
-movq 0(%rsp),%rax
-mulq 0(%rsp)
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 0(%rsp),%rax
-shl $1,%rax
-mulq 8(%rsp)
-mov %rax,%r8
-mov %rdx,%r9
-movq 0(%rsp),%rax
-shl $1,%rax
-mulq 16(%rsp)
-mov %rax,%r10
-mov %rdx,%r11
-movq 0(%rsp),%rax
-shl $1,%rax
-mulq 24(%rsp)
-mov %rax,%r12
-mov %rdx,%r13
-movq 0(%rsp),%rax
-shl $1,%rax
-mulq 32(%rsp)
-mov %rax,%r14
-mov %rdx,%r15
-movq 8(%rsp),%rax
-mulq 8(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 8(%rsp),%rax
-shl $1,%rax
-mulq 16(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 8(%rsp),%rax
-shl $1,%rax
-mulq 24(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 8(%rsp),%rdx
-imulq $38,%rdx,%rax
-mulq 32(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 16(%rsp),%rax
-mulq 16(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 16(%rsp),%rdx
-imulq $38,%rdx,%rax
-mulq 24(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 16(%rsp),%rdx
-imulq $38,%rdx,%rax
-mulq 32(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 24(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 24(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 24(%rsp),%rdx
-imulq $38,%rdx,%rax
-mulq 32(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 32(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 32(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and %rdx,%rsi
-shld $13,%r8,%r9
-and %rdx,%r8
-add %rcx,%r8
-shld $13,%r10,%r11
-and %rdx,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rdx,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rdx,%r14
-add %r13,%r14
-imulq $19,%r15,%rcx
-add %rcx,%rsi
-mov %rsi,%rcx
-shr $51,%rcx
-add %r8,%rcx
-and %rdx,%rsi
-mov %rcx,%r8
-shr $51,%rcx
-add %r10,%rcx
-and %rdx,%r8
-mov %rcx,%r9
-shr $51,%rcx
-add %r12,%rcx
-and %rdx,%r9
-mov %rcx,%rax
-shr $51,%rcx
-add %r14,%rcx
-and %rdx,%rax
-mov %rcx,%r10
-shr $51,%rcx
-imulq $19,%rcx,%rcx
-add %rcx,%rsi
-and %rdx,%r10
-movq %rsi,120(%rsp)
-movq %r8,128(%rsp)
-movq %r9,136(%rsp)
-movq %rax,144(%rsp)
-movq %r10,152(%rsp)
-mov %rsi,%rsi
-mov %r8,%rdx
-mov %r9,%rcx
-mov %rax,%r8
-mov %r10,%r9
-add x25519_x86_64_2P0(%rip),%rsi
-add x25519_x86_64_2P1234(%rip),%rdx
-add x25519_x86_64_2P1234(%rip),%rcx
-add x25519_x86_64_2P1234(%rip),%r8
-add x25519_x86_64_2P1234(%rip),%r9
-subq 80(%rsp),%rsi
-subq 88(%rsp),%rdx
-subq 96(%rsp),%rcx
-subq 104(%rsp),%r8
-subq 112(%rsp),%r9
-movq %rsi,160(%rsp)
-movq %rdx,168(%rsp)
-movq %rcx,176(%rsp)
-movq %r8,184(%rsp)
-movq %r9,192(%rsp)
-movq 120(%rdi),%rsi
-movq 128(%rdi),%rdx
-movq 136(%rdi),%rcx
-movq 144(%rdi),%r8
-movq 152(%rdi),%r9
-mov %rsi,%rax
-mov %rdx,%r10
-mov %rcx,%r11
-mov %r8,%r12
-mov %r9,%r13
-add x25519_x86_64_2P0(%rip),%rax
-add x25519_x86_64_2P1234(%rip),%r10
-add x25519_x86_64_2P1234(%rip),%r11
-add x25519_x86_64_2P1234(%rip),%r12
-add x25519_x86_64_2P1234(%rip),%r13
-addq 160(%rdi),%rsi
-addq 168(%rdi),%rdx
-addq 176(%rdi),%rcx
-addq 184(%rdi),%r8
-addq 192(%rdi),%r9
-subq 160(%rdi),%rax
-subq 168(%rdi),%r10
-subq 176(%rdi),%r11
-subq 184(%rdi),%r12
-subq 192(%rdi),%r13
-movq %rsi,200(%rsp)
-movq %rdx,208(%rsp)
-movq %rcx,216(%rsp)
-movq %r8,224(%rsp)
-movq %r9,232(%rsp)
-movq %rax,240(%rsp)
-movq %r10,248(%rsp)
-movq %r11,256(%rsp)
-movq %r12,264(%rsp)
-movq %r13,272(%rsp)
-movq 224(%rsp),%rsi
-imulq $19,%rsi,%rax
-movq %rax,280(%rsp)
-mulq 56(%rsp)
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 232(%rsp),%rdx
-imulq $19,%rdx,%rax
-movq %rax,288(%rsp)
-mulq 48(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 200(%rsp),%rax
-mulq 40(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 200(%rsp),%rax
-mulq 48(%rsp)
-mov %rax,%r8
-mov %rdx,%r9
-movq 200(%rsp),%rax
-mulq 56(%rsp)
-mov %rax,%r10
-mov %rdx,%r11
-movq 200(%rsp),%rax
-mulq 64(%rsp)
-mov %rax,%r12
-mov %rdx,%r13
-movq 200(%rsp),%rax
-mulq 72(%rsp)
-mov %rax,%r14
-mov %rdx,%r15
-movq 208(%rsp),%rax
-mulq 40(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 208(%rsp),%rax
-mulq 48(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 208(%rsp),%rax
-mulq 56(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 208(%rsp),%rax
-mulq 64(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 208(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 72(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 216(%rsp),%rax
-mulq 40(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 216(%rsp),%rax
-mulq 48(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 216(%rsp),%rax
-mulq 56(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 216(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 64(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 216(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 72(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 224(%rsp),%rax
-mulq 40(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 224(%rsp),%rax
-mulq 48(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 280(%rsp),%rax
-mulq 64(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 280(%rsp),%rax
-mulq 72(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 232(%rsp),%rax
-mulq 40(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 288(%rsp),%rax
-mulq 56(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 288(%rsp),%rax
-mulq 64(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 288(%rsp),%rax
-mulq 72(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and %rdx,%rsi
-shld $13,%r8,%r9
-and %rdx,%r8
-add %rcx,%r8
-shld $13,%r10,%r11
-and %rdx,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rdx,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rdx,%r14
-add %r13,%r14
-imulq $19,%r15,%rcx
-add %rcx,%rsi
-mov %rsi,%rcx
-shr $51,%rcx
-add %r8,%rcx
-mov %rcx,%r8
-shr $51,%rcx
-and %rdx,%rsi
-add %r10,%rcx
-mov %rcx,%r9
-shr $51,%rcx
-and %rdx,%r8
-add %r12,%rcx
-mov %rcx,%rax
-shr $51,%rcx
-and %rdx,%r9
-add %r14,%rcx
-mov %rcx,%r10
-shr $51,%rcx
-and %rdx,%rax
-imulq $19,%rcx,%rcx
-add %rcx,%rsi
-and %rdx,%r10
-movq %rsi,40(%rsp)
-movq %r8,48(%rsp)
-movq %r9,56(%rsp)
-movq %rax,64(%rsp)
-movq %r10,72(%rsp)
-movq 264(%rsp),%rsi
-imulq $19,%rsi,%rax
-movq %rax,200(%rsp)
-mulq 16(%rsp)
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 272(%rsp),%rdx
-imulq $19,%rdx,%rax
-movq %rax,208(%rsp)
-mulq 8(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 240(%rsp),%rax
-mulq 0(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 240(%rsp),%rax
-mulq 8(%rsp)
-mov %rax,%r8
-mov %rdx,%r9
-movq 240(%rsp),%rax
-mulq 16(%rsp)
-mov %rax,%r10
-mov %rdx,%r11
-movq 240(%rsp),%rax
-mulq 24(%rsp)
-mov %rax,%r12
-mov %rdx,%r13
-movq 240(%rsp),%rax
-mulq 32(%rsp)
-mov %rax,%r14
-mov %rdx,%r15
-movq 248(%rsp),%rax
-mulq 0(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 248(%rsp),%rax
-mulq 8(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 248(%rsp),%rax
-mulq 16(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 248(%rsp),%rax
-mulq 24(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 248(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 32(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 256(%rsp),%rax
-mulq 0(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 256(%rsp),%rax
-mulq 8(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 256(%rsp),%rax
-mulq 16(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 256(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 24(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 256(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 32(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 264(%rsp),%rax
-mulq 0(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 264(%rsp),%rax
-mulq 8(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 200(%rsp),%rax
-mulq 24(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 200(%rsp),%rax
-mulq 32(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 272(%rsp),%rax
-mulq 0(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 208(%rsp),%rax
-mulq 16(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 208(%rsp),%rax
-mulq 24(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 208(%rsp),%rax
-mulq 32(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and %rdx,%rsi
-shld $13,%r8,%r9
-and %rdx,%r8
-add %rcx,%r8
-shld $13,%r10,%r11
-and %rdx,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rdx,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rdx,%r14
-add %r13,%r14
-imulq $19,%r15,%rcx
-add %rcx,%rsi
-mov %rsi,%rcx
-shr $51,%rcx
-add %r8,%rcx
-mov %rcx,%r8
-shr $51,%rcx
-and %rdx,%rsi
-add %r10,%rcx
-mov %rcx,%r9
-shr $51,%rcx
-and %rdx,%r8
-add %r12,%rcx
-mov %rcx,%rax
-shr $51,%rcx
-and %rdx,%r9
-add %r14,%rcx
-mov %rcx,%r10
-shr $51,%rcx
-and %rdx,%rax
-imulq $19,%rcx,%rcx
-add %rcx,%rsi
-and %rdx,%r10
-mov %rsi,%rdx
-mov %r8,%rcx
-mov %r9,%r11
-mov %rax,%r12
-mov %r10,%r13
-add x25519_x86_64_2P0(%rip),%rdx
-add x25519_x86_64_2P1234(%rip),%rcx
-add x25519_x86_64_2P1234(%rip),%r11
-add x25519_x86_64_2P1234(%rip),%r12
-add x25519_x86_64_2P1234(%rip),%r13
-addq 40(%rsp),%rsi
-addq 48(%rsp),%r8
-addq 56(%rsp),%r9
-addq 64(%rsp),%rax
-addq 72(%rsp),%r10
-subq 40(%rsp),%rdx
-subq 48(%rsp),%rcx
-subq 56(%rsp),%r11
-subq 64(%rsp),%r12
-subq 72(%rsp),%r13
-movq %rsi,120(%rdi)
-movq %r8,128(%rdi)
-movq %r9,136(%rdi)
-movq %rax,144(%rdi)
-movq %r10,152(%rdi)
-movq %rdx,160(%rdi)
-movq %rcx,168(%rdi)
-movq %r11,176(%rdi)
-movq %r12,184(%rdi)
-movq %r13,192(%rdi)
-movq 120(%rdi),%rax
-mulq 120(%rdi)
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 120(%rdi),%rax
-shl $1,%rax
-mulq 128(%rdi)
-mov %rax,%r8
-mov %rdx,%r9
-movq 120(%rdi),%rax
-shl $1,%rax
-mulq 136(%rdi)
-mov %rax,%r10
-mov %rdx,%r11
-movq 120(%rdi),%rax
-shl $1,%rax
-mulq 144(%rdi)
-mov %rax,%r12
-mov %rdx,%r13
-movq 120(%rdi),%rax
-shl $1,%rax
-mulq 152(%rdi)
-mov %rax,%r14
-mov %rdx,%r15
-movq 128(%rdi),%rax
-mulq 128(%rdi)
-add %rax,%r10
-adc %rdx,%r11
-movq 128(%rdi),%rax
-shl $1,%rax
-mulq 136(%rdi)
-add %rax,%r12
-adc %rdx,%r13
-movq 128(%rdi),%rax
-shl $1,%rax
-mulq 144(%rdi)
-add %rax,%r14
-adc %rdx,%r15
-movq 128(%rdi),%rdx
-imulq $38,%rdx,%rax
-mulq 152(%rdi)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 136(%rdi),%rax
-mulq 136(%rdi)
-add %rax,%r14
-adc %rdx,%r15
-movq 136(%rdi),%rdx
-imulq $38,%rdx,%rax
-mulq 144(%rdi)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 136(%rdi),%rdx
-imulq $38,%rdx,%rax
-mulq 152(%rdi)
-add %rax,%r8
-adc %rdx,%r9
-movq 144(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 144(%rdi)
-add %rax,%r8
-adc %rdx,%r9
-movq 144(%rdi),%rdx
-imulq $38,%rdx,%rax
-mulq 152(%rdi)
-add %rax,%r10
-adc %rdx,%r11
-movq 152(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 152(%rdi)
-add %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and %rdx,%rsi
-shld $13,%r8,%r9
-and %rdx,%r8
-add %rcx,%r8
-shld $13,%r10,%r11
-and %rdx,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rdx,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rdx,%r14
-add %r13,%r14
-imulq $19,%r15,%rcx
-add %rcx,%rsi
-mov %rsi,%rcx
-shr $51,%rcx
-add %r8,%rcx
-and %rdx,%rsi
-mov %rcx,%r8
-shr $51,%rcx
-add %r10,%rcx
-and %rdx,%r8
-mov %rcx,%r9
-shr $51,%rcx
-add %r12,%rcx
-and %rdx,%r9
-mov %rcx,%rax
-shr $51,%rcx
-add %r14,%rcx
-and %rdx,%rax
-mov %rcx,%r10
-shr $51,%rcx
-imulq $19,%rcx,%rcx
-add %rcx,%rsi
-and %rdx,%r10
-movq %rsi,120(%rdi)
-movq %r8,128(%rdi)
-movq %r9,136(%rdi)
-movq %rax,144(%rdi)
-movq %r10,152(%rdi)
-movq 160(%rdi),%rax
-mulq 160(%rdi)
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 160(%rdi),%rax
-shl $1,%rax
-mulq 168(%rdi)
-mov %rax,%r8
-mov %rdx,%r9
-movq 160(%rdi),%rax
-shl $1,%rax
-mulq 176(%rdi)
-mov %rax,%r10
-mov %rdx,%r11
-movq 160(%rdi),%rax
-shl $1,%rax
-mulq 184(%rdi)
-mov %rax,%r12
-mov %rdx,%r13
-movq 160(%rdi),%rax
-shl $1,%rax
-mulq 192(%rdi)
-mov %rax,%r14
-mov %rdx,%r15
-movq 168(%rdi),%rax
-mulq 168(%rdi)
-add %rax,%r10
-adc %rdx,%r11
-movq 168(%rdi),%rax
-shl $1,%rax
-mulq 176(%rdi)
-add %rax,%r12
-adc %rdx,%r13
-movq 168(%rdi),%rax
-shl $1,%rax
-mulq 184(%rdi)
-add %rax,%r14
-adc %rdx,%r15
-movq 168(%rdi),%rdx
-imulq $38,%rdx,%rax
-mulq 192(%rdi)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 176(%rdi),%rax
-mulq 176(%rdi)
-add %rax,%r14
-adc %rdx,%r15
-movq 176(%rdi),%rdx
-imulq $38,%rdx,%rax
-mulq 184(%rdi)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 176(%rdi),%rdx
-imulq $38,%rdx,%rax
-mulq 192(%rdi)
-add %rax,%r8
-adc %rdx,%r9
-movq 184(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 184(%rdi)
-add %rax,%r8
-adc %rdx,%r9
-movq 184(%rdi),%rdx
-imulq $38,%rdx,%rax
-mulq 192(%rdi)
-add %rax,%r10
-adc %rdx,%r11
-movq 192(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 192(%rdi)
-add %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and %rdx,%rsi
-shld $13,%r8,%r9
-and %rdx,%r8
-add %rcx,%r8
-shld $13,%r10,%r11
-and %rdx,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rdx,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rdx,%r14
-add %r13,%r14
-imulq $19,%r15,%rcx
-add %rcx,%rsi
-mov %rsi,%rcx
-shr $51,%rcx
-add %r8,%rcx
-and %rdx,%rsi
-mov %rcx,%r8
-shr $51,%rcx
-add %r10,%rcx
-and %rdx,%r8
-mov %rcx,%r9
-shr $51,%rcx
-add %r12,%rcx
-and %rdx,%r9
-mov %rcx,%rax
-shr $51,%rcx
-add %r14,%rcx
-and %rdx,%rax
-mov %rcx,%r10
-shr $51,%rcx
-imulq $19,%rcx,%rcx
-add %rcx,%rsi
-and %rdx,%r10
-movq %rsi,160(%rdi)
-movq %r8,168(%rdi)
-movq %r9,176(%rdi)
-movq %rax,184(%rdi)
-movq %r10,192(%rdi)
-movq 184(%rdi),%rsi
-imulq $19,%rsi,%rax
-movq %rax,0(%rsp)
-mulq 16(%rdi)
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 192(%rdi),%rdx
-imulq $19,%rdx,%rax
-movq %rax,8(%rsp)
-mulq 8(%rdi)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 160(%rdi),%rax
-mulq 0(%rdi)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 160(%rdi),%rax
-mulq 8(%rdi)
-mov %rax,%r8
-mov %rdx,%r9
-movq 160(%rdi),%rax
-mulq 16(%rdi)
-mov %rax,%r10
-mov %rdx,%r11
-movq 160(%rdi),%rax
-mulq 24(%rdi)
-mov %rax,%r12
-mov %rdx,%r13
-movq 160(%rdi),%rax
-mulq 32(%rdi)
-mov %rax,%r14
-mov %rdx,%r15
-movq 168(%rdi),%rax
-mulq 0(%rdi)
-add %rax,%r8
-adc %rdx,%r9
-movq 168(%rdi),%rax
-mulq 8(%rdi)
-add %rax,%r10
-adc %rdx,%r11
-movq 168(%rdi),%rax
-mulq 16(%rdi)
-add %rax,%r12
-adc %rdx,%r13
-movq 168(%rdi),%rax
-mulq 24(%rdi)
-add %rax,%r14
-adc %rdx,%r15
-movq 168(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 32(%rdi)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 176(%rdi),%rax
-mulq 0(%rdi)
-add %rax,%r10
-adc %rdx,%r11
-movq 176(%rdi),%rax
-mulq 8(%rdi)
-add %rax,%r12
-adc %rdx,%r13
-movq 176(%rdi),%rax
-mulq 16(%rdi)
-add %rax,%r14
-adc %rdx,%r15
-movq 176(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 24(%rdi)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 176(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 32(%rdi)
-add %rax,%r8
-adc %rdx,%r9
-movq 184(%rdi),%rax
-mulq 0(%rdi)
-add %rax,%r12
-adc %rdx,%r13
-movq 184(%rdi),%rax
-mulq 8(%rdi)
-add %rax,%r14
-adc %rdx,%r15
-movq 0(%rsp),%rax
-mulq 24(%rdi)
-add %rax,%r8
-adc %rdx,%r9
-movq 0(%rsp),%rax
-mulq 32(%rdi)
-add %rax,%r10
-adc %rdx,%r11
-movq 192(%rdi),%rax
-mulq 0(%rdi)
-add %rax,%r14
-adc %rdx,%r15
-movq 8(%rsp),%rax
-mulq 16(%rdi)
-add %rax,%r8
-adc %rdx,%r9
-movq 8(%rsp),%rax
-mulq 24(%rdi)
-add %rax,%r10
-adc %rdx,%r11
-movq 8(%rsp),%rax
-mulq 32(%rdi)
-add %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and %rdx,%rsi
-shld $13,%r8,%r9
-and %rdx,%r8
-add %rcx,%r8
-shld $13,%r10,%r11
-and %rdx,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rdx,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rdx,%r14
-add %r13,%r14
-imulq $19,%r15,%rcx
-add %rcx,%rsi
-mov %rsi,%rcx
-shr $51,%rcx
-add %r8,%rcx
-mov %rcx,%r8
-shr $51,%rcx
-and %rdx,%rsi
-add %r10,%rcx
-mov %rcx,%r9
-shr $51,%rcx
-and %rdx,%r8
-add %r12,%rcx
-mov %rcx,%rax
-shr $51,%rcx
-and %rdx,%r9
-add %r14,%rcx
-mov %rcx,%r10
-shr $51,%rcx
-and %rdx,%rax
-imulq $19,%rcx,%rcx
-add %rcx,%rsi
-and %rdx,%r10
-movq %rsi,160(%rdi)
-movq %r8,168(%rdi)
-movq %r9,176(%rdi)
-movq %rax,184(%rdi)
-movq %r10,192(%rdi)
-movq 144(%rsp),%rsi
-imulq $19,%rsi,%rax
-movq %rax,0(%rsp)
-mulq 96(%rsp)
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 152(%rsp),%rdx
-imulq $19,%rdx,%rax
-movq %rax,8(%rsp)
-mulq 88(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 120(%rsp),%rax
-mulq 80(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 120(%rsp),%rax
-mulq 88(%rsp)
-mov %rax,%r8
-mov %rdx,%r9
-movq 120(%rsp),%rax
-mulq 96(%rsp)
-mov %rax,%r10
-mov %rdx,%r11
-movq 120(%rsp),%rax
-mulq 104(%rsp)
-mov %rax,%r12
-mov %rdx,%r13
-movq 120(%rsp),%rax
-mulq 112(%rsp)
-mov %rax,%r14
-mov %rdx,%r15
-movq 128(%rsp),%rax
-mulq 80(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 128(%rsp),%rax
-mulq 88(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 128(%rsp),%rax
-mulq 96(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 128(%rsp),%rax
-mulq 104(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 128(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 112(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 136(%rsp),%rax
-mulq 80(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 136(%rsp),%rax
-mulq 88(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 136(%rsp),%rax
-mulq 96(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 136(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 104(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 136(%rsp),%rdx
-imulq $19,%rdx,%rax
-mulq 112(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 144(%rsp),%rax
-mulq 80(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 144(%rsp),%rax
-mulq 88(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 0(%rsp),%rax
-mulq 104(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 0(%rsp),%rax
-mulq 112(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 152(%rsp),%rax
-mulq 80(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 8(%rsp),%rax
-mulq 96(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 8(%rsp),%rax
-mulq 104(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 8(%rsp),%rax
-mulq 112(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and %rdx,%rsi
-shld $13,%r8,%r9
-and %rdx,%r8
-add %rcx,%r8
-shld $13,%r10,%r11
-and %rdx,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rdx,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rdx,%r14
-add %r13,%r14
-imulq $19,%r15,%rcx
-add %rcx,%rsi
-mov %rsi,%rcx
-shr $51,%rcx
-add %r8,%rcx
-mov %rcx,%r8
-shr $51,%rcx
-and %rdx,%rsi
-add %r10,%rcx
-mov %rcx,%r9
-shr $51,%rcx
-and %rdx,%r8
-add %r12,%rcx
-mov %rcx,%rax
-shr $51,%rcx
-and %rdx,%r9
-add %r14,%rcx
-mov %rcx,%r10
-shr $51,%rcx
-and %rdx,%rax
-imulq $19,%rcx,%rcx
-add %rcx,%rsi
-and %rdx,%r10
-movq %rsi,40(%rdi)
-movq %r8,48(%rdi)
-movq %r9,56(%rdi)
-movq %rax,64(%rdi)
-movq %r10,72(%rdi)
-movq 160(%rsp),%rax
-mulq x25519_x86_64_121666_213(%rip)
-shr $13,%rax
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 168(%rsp),%rax
-mulq x25519_x86_64_121666_213(%rip)
-shr $13,%rax
-add %rax,%rcx
-mov %rdx,%r8
-movq 176(%rsp),%rax
-mulq x25519_x86_64_121666_213(%rip)
-shr $13,%rax
-add %rax,%r8
-mov %rdx,%r9
-movq 184(%rsp),%rax
-mulq x25519_x86_64_121666_213(%rip)
-shr $13,%rax
-add %rax,%r9
-mov %rdx,%r10
-movq 192(%rsp),%rax
-mulq x25519_x86_64_121666_213(%rip)
-shr $13,%rax
-add %rax,%r10
-imulq $19,%rdx,%rdx
-add %rdx,%rsi
-addq 80(%rsp),%rsi
-addq 88(%rsp),%rcx
-addq 96(%rsp),%r8
-addq 104(%rsp),%r9
-addq 112(%rsp),%r10
-movq %rsi,80(%rdi)
-movq %rcx,88(%rdi)
-movq %r8,96(%rdi)
-movq %r9,104(%rdi)
-movq %r10,112(%rdi)
-movq 104(%rdi),%rsi
-imulq $19,%rsi,%rax
-movq %rax,0(%rsp)
-mulq 176(%rsp)
-mov %rax,%rsi
-mov %rdx,%rcx
-movq 112(%rdi),%rdx
-imulq $19,%rdx,%rax
-movq %rax,8(%rsp)
-mulq 168(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 80(%rdi),%rax
-mulq 160(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 80(%rdi),%rax
-mulq 168(%rsp)
-mov %rax,%r8
-mov %rdx,%r9
-movq 80(%rdi),%rax
-mulq 176(%rsp)
-mov %rax,%r10
-mov %rdx,%r11
-movq 80(%rdi),%rax
-mulq 184(%rsp)
-mov %rax,%r12
-mov %rdx,%r13
-movq 80(%rdi),%rax
-mulq 192(%rsp)
-mov %rax,%r14
-mov %rdx,%r15
-movq 88(%rdi),%rax
-mulq 160(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 88(%rdi),%rax
-mulq 168(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 88(%rdi),%rax
-mulq 176(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 88(%rdi),%rax
-mulq 184(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 88(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 192(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 96(%rdi),%rax
-mulq 160(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 96(%rdi),%rax
-mulq 168(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 96(%rdi),%rax
-mulq 176(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 96(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 184(%rsp)
-add %rax,%rsi
-adc %rdx,%rcx
-movq 96(%rdi),%rdx
-imulq $19,%rdx,%rax
-mulq 192(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 104(%rdi),%rax
-mulq 160(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq 104(%rdi),%rax
-mulq 168(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 0(%rsp),%rax
-mulq 184(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 0(%rsp),%rax
-mulq 192(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 112(%rdi),%rax
-mulq 160(%rsp)
-add %rax,%r14
-adc %rdx,%r15
-movq 8(%rsp),%rax
-mulq 176(%rsp)
-add %rax,%r8
-adc %rdx,%r9
-movq 8(%rsp),%rax
-mulq 184(%rsp)
-add %rax,%r10
-adc %rdx,%r11
-movq 8(%rsp),%rax
-mulq 192(%rsp)
-add %rax,%r12
-adc %rdx,%r13
-movq x25519_x86_64_REDMASK51(%rip),%rdx
-shld $13,%rsi,%rcx
-and %rdx,%rsi
-shld $13,%r8,%r9
-and %rdx,%r8
-add %rcx,%r8
-shld $13,%r10,%r11
-and %rdx,%r10
-add %r9,%r10
-shld $13,%r12,%r13
-and %rdx,%r12
-add %r11,%r12
-shld $13,%r14,%r15
-and %rdx,%r14
-add %r13,%r14
-imulq $19,%r15,%rcx
-add %rcx,%rsi
-mov %rsi,%rcx
-shr $51,%rcx
-add %r8,%rcx
-mov %rcx,%r8
-shr $51,%rcx
-and %rdx,%rsi
-add %r10,%rcx
-mov %rcx,%r9
-shr $51,%rcx
-and %rdx,%r8
-add %r12,%rcx
-mov %rcx,%rax
-shr $51,%rcx
-and %rdx,%r9
-add %r14,%rcx
-mov %rcx,%r10
-shr $51,%rcx
-and %rdx,%rax
-imulq $19,%rcx,%rcx
-add %rcx,%rsi
-and %rdx,%r10
-movq %rsi,80(%rdi)
-movq %r8,88(%rdi)
-movq %r9,96(%rdi)
-movq %rax,104(%rdi)
-movq %r10,112(%rdi)
-movq 296(%rsp),%r12
-movq 304(%rsp),%r13
-movq 312(%rsp),%r14
-movq 320(%rsp),%r15
-movq 328(%rsp),%rbx
-movq 336(%rsp),%rbp
-add $344,%rsp
-.cfi_adjust_cfa_offset -344
-ret
-.cfi_endproc
-
-.p2align 5
-.globl C_ABI(x25519_x86_64_work_cswap)
-HIDDEN C_ABI(x25519_x86_64_work_cswap)
-C_ABI(x25519_x86_64_work_cswap):
-.cfi_startproc
-subq $1,%rsi
-notq %rsi
-movq %rsi,%xmm15
-pshufd $0x44,%xmm15,%xmm15
-movdqu 0(%rdi),%xmm0
-movdqu 16(%rdi),%xmm2
-movdqu 32(%rdi),%xmm4
-movdqu 48(%rdi),%xmm6
-movdqu 64(%rdi),%xmm8
-movdqu 80(%rdi),%xmm1
-movdqu 96(%rdi),%xmm3
-movdqu 112(%rdi),%xmm5
-movdqu 128(%rdi),%xmm7
-movdqu 144(%rdi),%xmm9
-movdqa %xmm1,%xmm10
-movdqa %xmm3,%xmm11
-movdqa %xmm5,%xmm12
-movdqa %xmm7,%xmm13
-movdqa %xmm9,%xmm14
-pxor %xmm0,%xmm10
-pxor %xmm2,%xmm11
-pxor %xmm4,%xmm12
-pxor %xmm6,%xmm13
-pxor %xmm8,%xmm14
-pand %xmm15,%xmm10
-pand %xmm15,%xmm11
-pand %xmm15,%xmm12
-pand %xmm15,%xmm13
-pand %xmm15,%xmm14
-pxor %xmm10,%xmm0
-pxor %xmm10,%xmm1
-pxor %xmm11,%xmm2
-pxor %xmm11,%xmm3
-pxor %xmm12,%xmm4
-pxor %xmm12,%xmm5
-pxor %xmm13,%xmm6
-pxor %xmm13,%xmm7
-pxor %xmm14,%xmm8
-pxor %xmm14,%xmm9
-movdqu %xmm0,0(%rdi)
-movdqu %xmm2,16(%rdi)
-movdqu %xmm4,32(%rdi)
-movdqu %xmm6,48(%rdi)
-movdqu %xmm8,64(%rdi)
-movdqu %xmm1,80(%rdi)
-movdqu %xmm3,96(%rdi)
-movdqu %xmm5,112(%rdi)
-movdqu %xmm7,128(%rdi)
-movdqu %xmm9,144(%rdi)
-ret
-.cfi_endproc
-
-#endif /* __x86_64__ */
-#endif /* !OPENSSL_NO_ASM */
diff --git a/crypto/curve25519/x25519-x86_64.c b/crypto/curve25519/x25519-x86_64.c
deleted file mode 100644
index 41db0bd..0000000
--- a/crypto/curve25519/x25519-x86_64.c
+++ /dev/null
@@ -1,247 +0,0 @@
-/* Copyright (c) 2015, Google Inc.
- *
- * Permission to use, copy, modify, and/or distribute this software for any
- * purpose with or without fee is hereby granted, provided that the above
- * copyright notice and this permission notice appear in all copies.
- *
- * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
- * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
- * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
- * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
- * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
- * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
- * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
-
-// This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP
-// 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as
-// public domain but this file has the ISC license just to keep licencing
-// simple.
-//
-// The field functions are shared by Ed25519 and X25519 where possible.
-
-#include <openssl/curve25519.h>
-
-#include <string.h>
-
-#include "../internal.h"
-#include "../../third_party/fiat/internal.h"
-
-
-#if defined(BORINGSSL_X25519_X86_64)
-
-typedef struct { uint64_t v[5]; } fe25519;
-
-// These functions are defined in asm/x25519-x86_64.S
-void x25519_x86_64_work_cswap(fe25519 *, uint64_t);
-void x25519_x86_64_mul(fe25519 *out, const fe25519 *a, const fe25519 *b);
-void x25519_x86_64_square(fe25519 *out, const fe25519 *a);
-void x25519_x86_64_freeze(fe25519 *);
-void x25519_x86_64_ladderstep(fe25519 *work);
-
-static void fe25519_setint(fe25519 *r, unsigned v) {
- r->v[0] = v;
- r->v[1] = 0;
- r->v[2] = 0;
- r->v[3] = 0;
- r->v[4] = 0;
-}
-
-// Assumes input x being reduced below 2^255
-static void fe25519_pack(unsigned char r[32], const fe25519 *x) {
- fe25519 t;
- t = *x;
- x25519_x86_64_freeze(&t);
-
- r[0] = (uint8_t)(t.v[0] & 0xff);
- r[1] = (uint8_t)((t.v[0] >> 8) & 0xff);
- r[2] = (uint8_t)((t.v[0] >> 16) & 0xff);
- r[3] = (uint8_t)((t.v[0] >> 24) & 0xff);
- r[4] = (uint8_t)((t.v[0] >> 32) & 0xff);
- r[5] = (uint8_t)((t.v[0] >> 40) & 0xff);
- r[6] = (uint8_t)((t.v[0] >> 48));
-
- r[6] ^= (uint8_t)((t.v[1] << 3) & 0xf8);
- r[7] = (uint8_t)((t.v[1] >> 5) & 0xff);
- r[8] = (uint8_t)((t.v[1] >> 13) & 0xff);
- r[9] = (uint8_t)((t.v[1] >> 21) & 0xff);
- r[10] = (uint8_t)((t.v[1] >> 29) & 0xff);
- r[11] = (uint8_t)((t.v[1] >> 37) & 0xff);
- r[12] = (uint8_t)((t.v[1] >> 45));
-
- r[12] ^= (uint8_t)((t.v[2] << 6) & 0xc0);
- r[13] = (uint8_t)((t.v[2] >> 2) & 0xff);
- r[14] = (uint8_t)((t.v[2] >> 10) & 0xff);
- r[15] = (uint8_t)((t.v[2] >> 18) & 0xff);
- r[16] = (uint8_t)((t.v[2] >> 26) & 0xff);
- r[17] = (uint8_t)((t.v[2] >> 34) & 0xff);
- r[18] = (uint8_t)((t.v[2] >> 42) & 0xff);
- r[19] = (uint8_t)((t.v[2] >> 50));
-
- r[19] ^= (uint8_t)((t.v[3] << 1) & 0xfe);
- r[20] = (uint8_t)((t.v[3] >> 7) & 0xff);
- r[21] = (uint8_t)((t.v[3] >> 15) & 0xff);
- r[22] = (uint8_t)((t.v[3] >> 23) & 0xff);
- r[23] = (uint8_t)((t.v[3] >> 31) & 0xff);
- r[24] = (uint8_t)((t.v[3] >> 39) & 0xff);
- r[25] = (uint8_t)((t.v[3] >> 47));
-
- r[25] ^= (uint8_t)((t.v[4] << 4) & 0xf0);
- r[26] = (uint8_t)((t.v[4] >> 4) & 0xff);
- r[27] = (uint8_t)((t.v[4] >> 12) & 0xff);
- r[28] = (uint8_t)((t.v[4] >> 20) & 0xff);
- r[29] = (uint8_t)((t.v[4] >> 28) & 0xff);
- r[30] = (uint8_t)((t.v[4] >> 36) & 0xff);
- r[31] = (uint8_t)((t.v[4] >> 44));
-}
-
-static void fe25519_unpack(fe25519 *r, const uint8_t x[32]) {
- r->v[0] = x[0];
- r->v[0] += (uint64_t)x[1] << 8;
- r->v[0] += (uint64_t)x[2] << 16;
- r->v[0] += (uint64_t)x[3] << 24;
- r->v[0] += (uint64_t)x[4] << 32;
- r->v[0] += (uint64_t)x[5] << 40;
- r->v[0] += ((uint64_t)x[6] & 7) << 48;
-
- r->v[1] = x[6] >> 3;
- r->v[1] += (uint64_t)x[7] << 5;
- r->v[1] += (uint64_t)x[8] << 13;
- r->v[1] += (uint64_t)x[9] << 21;
- r->v[1] += (uint64_t)x[10] << 29;
- r->v[1] += (uint64_t)x[11] << 37;
- r->v[1] += ((uint64_t)x[12] & 63) << 45;
-
- r->v[2] = x[12] >> 6;
- r->v[2] += (uint64_t)x[13] << 2;
- r->v[2] += (uint64_t)x[14] << 10;
- r->v[2] += (uint64_t)x[15] << 18;
- r->v[2] += (uint64_t)x[16] << 26;
- r->v[2] += (uint64_t)x[17] << 34;
- r->v[2] += (uint64_t)x[18] << 42;
- r->v[2] += ((uint64_t)x[19] & 1) << 50;
-
- r->v[3] = x[19] >> 1;
- r->v[3] += (uint64_t)x[20] << 7;
- r->v[3] += (uint64_t)x[21] << 15;
- r->v[3] += (uint64_t)x[22] << 23;
- r->v[3] += (uint64_t)x[23] << 31;
- r->v[3] += (uint64_t)x[24] << 39;
- r->v[3] += ((uint64_t)x[25] & 15) << 47;
-
- r->v[4] = x[25] >> 4;
- r->v[4] += (uint64_t)x[26] << 4;
- r->v[4] += (uint64_t)x[27] << 12;
- r->v[4] += (uint64_t)x[28] << 20;
- r->v[4] += (uint64_t)x[29] << 28;
- r->v[4] += (uint64_t)x[30] << 36;
- r->v[4] += ((uint64_t)x[31] & 127) << 44;
-}
-
-static void fe25519_invert(fe25519 *r, const fe25519 *x) {
- fe25519 z2;
- fe25519 z9;
- fe25519 z11;
- fe25519 z2_5_0;
- fe25519 z2_10_0;
- fe25519 z2_20_0;
- fe25519 z2_50_0;
- fe25519 z2_100_0;
- fe25519 t;
- int i;
-
- /* 2 */ x25519_x86_64_square(&z2, x);
- /* 4 */ x25519_x86_64_square(&t, &z2);
- /* 8 */ x25519_x86_64_square(&t, &t);
- /* 9 */ x25519_x86_64_mul(&z9, &t, x);
- /* 11 */ x25519_x86_64_mul(&z11, &z9, &z2);
- /* 22 */ x25519_x86_64_square(&t, &z11);
- /* 2^5 - 2^0 = 31 */ x25519_x86_64_mul(&z2_5_0, &t, &z9);
-
- /* 2^6 - 2^1 */ x25519_x86_64_square(&t, &z2_5_0);
- /* 2^20 - 2^10 */ for (i = 1; i < 5; i++) { x25519_x86_64_square(&t, &t); }
- /* 2^10 - 2^0 */ x25519_x86_64_mul(&z2_10_0, &t, &z2_5_0);
-
- /* 2^11 - 2^1 */ x25519_x86_64_square(&t, &z2_10_0);
- /* 2^20 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); }
- /* 2^20 - 2^0 */ x25519_x86_64_mul(&z2_20_0, &t, &z2_10_0);
-
- /* 2^21 - 2^1 */ x25519_x86_64_square(&t, &z2_20_0);
- /* 2^40 - 2^20 */ for (i = 1; i < 20; i++) { x25519_x86_64_square(&t, &t); }
- /* 2^40 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_20_0);
-
- /* 2^41 - 2^1 */ x25519_x86_64_square(&t, &t);
- /* 2^50 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); }
- /* 2^50 - 2^0 */ x25519_x86_64_mul(&z2_50_0, &t, &z2_10_0);
-
- /* 2^51 - 2^1 */ x25519_x86_64_square(&t, &z2_50_0);
- /* 2^100 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); }
- /* 2^100 - 2^0 */ x25519_x86_64_mul(&z2_100_0, &t, &z2_50_0);
-
- /* 2^101 - 2^1 */ x25519_x86_64_square(&t, &z2_100_0);
- /* 2^200 - 2^100 */ for (i = 1; i < 100; i++) {
- x25519_x86_64_square(&t, &t);
- }
- /* 2^200 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_100_0);
-
- /* 2^201 - 2^1 */ x25519_x86_64_square(&t, &t);
- /* 2^250 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); }
- /* 2^250 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_50_0);
-
- /* 2^251 - 2^1 */ x25519_x86_64_square(&t, &t);
- /* 2^252 - 2^2 */ x25519_x86_64_square(&t, &t);
- /* 2^253 - 2^3 */ x25519_x86_64_square(&t, &t);
-
- /* 2^254 - 2^4 */ x25519_x86_64_square(&t, &t);
-
- /* 2^255 - 2^5 */ x25519_x86_64_square(&t, &t);
- /* 2^255 - 21 */ x25519_x86_64_mul(r, &t, &z11);
-}
-
-static void mladder(fe25519 *xr, fe25519 *zr, const uint8_t s[32]) {
- fe25519 work[5];
-
- work[0] = *xr;
- fe25519_setint(work + 1, 1);
- fe25519_setint(work + 2, 0);
- work[3] = *xr;
- fe25519_setint(work + 4, 1);
-
- int i, j;
- uint8_t prevbit = 0;
-
- j = 6;
- for (i = 31; i >= 0; i--) {
- while (j >= 0) {
- const uint8_t bit = 1 & (s[i] >> j);
- const uint64_t swap = bit ^ prevbit;
- prevbit = bit;
- x25519_x86_64_work_cswap(work + 1, swap);
- x25519_x86_64_ladderstep(work);
- j -= 1;
- }
- j = 7;
- }
-
- *xr = work[1];
- *zr = work[2];
-}
-
-void x25519_x86_64(uint8_t out[32], const uint8_t scalar[32],
- const uint8_t point[32]) {
- uint8_t e[32];
- OPENSSL_memcpy(e, scalar, sizeof(e));
-
- e[0] &= 248;
- e[31] &= 127;
- e[31] |= 64;
-
- fe25519 t;
- fe25519 z;
- fe25519_unpack(&t, point);
- mladder(&t, &z, e);
- fe25519_invert(&z, &z);
- x25519_x86_64_mul(&t, &t, &z);
- fe25519_pack(out, &t);
-}
-
-#endif // BORINGSSL_X25519_X86_64
diff --git a/third_party/fiat/curve25519.c b/third_party/fiat/curve25519.c
index dfa4a39..d5928af 100644
--- a/third_party/fiat/curve25519.c
+++ b/third_party/fiat/curve25519.c
@@ -512,8 +512,6 @@
fe_sqr_impl(h->v, f->v);
}
-#if !defined(BORINGSSL_X25519_X86_64)
-
// Replace (f,g) with (g,f) if b == 1;
// replace (f,g) with (f,g) if b == 0.
//
@@ -589,8 +587,6 @@
assert_fe(h->v);
}
-#endif // !BORINGSSL_X25519_X86_64
-
// Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0.
static void fe_neg_impl(uint64_t out[5], const uint64_t in2[5]) {
{ const uint64_t x10 = 0;
@@ -1201,8 +1197,6 @@
fe_sqr_impl(h->v, f->v);
}
-#if !defined(BORINGSSL_X25519_X86_64)
-
// Replace (f,g) with (g,f) if b == 1;
// replace (f,g) with (f,g) if b == 0.
//
@@ -1342,8 +1336,6 @@
assert_fe(h->v);
}
-#endif // !BORINGSSL_X25519_X86_64
-
// Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0.
static void fe_neg_impl(uint32_t out[10], const uint32_t in2[10]) {
{ const uint32_t x20 = 0;
@@ -3063,15 +3055,6 @@
}
-#if defined(BORINGSSL_X25519_X86_64)
-
-static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32],
- const uint8_t point[32]) {
- x25519_x86_64(out, scalar, point);
-}
-
-#else
-
static void x25519_scalar_mult_generic(uint8_t out[32],
const uint8_t scalar[32],
const uint8_t point[32]) {
@@ -3166,9 +3149,6 @@
x25519_scalar_mult_generic(out, scalar, point);
}
-#endif // BORINGSSL_X25519_X86_64
-
-
void X25519_keypair(uint8_t out_public_value[32], uint8_t out_private_key[32]) {
RAND_bytes(out_private_key, 32);
@@ -3200,20 +3180,6 @@
return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0;
}
-#if defined(BORINGSSL_X25519_X86_64)
-
-// When |BORINGSSL_X25519_X86_64| is set, base point multiplication is done with
-// the Montgomery ladder because it's faster. Otherwise it's done using the
-// Ed25519 tables.
-
-void X25519_public_from_private(uint8_t out_public_value[32],
- const uint8_t private_key[32]) {
- static const uint8_t kMongomeryBasePoint[32] = {9};
- x25519_scalar_mult(out_public_value, private_key, kMongomeryBasePoint);
-}
-
-#else
-
void X25519_public_from_private(uint8_t out_public_value[32],
const uint8_t private_key[32]) {
#if defined(BORINGSSL_X25519_NEON)
@@ -3243,5 +3209,3 @@
fe_mul_tlt(&zminusy_inv, &zplusy, &zminusy_inv);
fe_tobytes(out_public_value, &zminusy_inv);
}
-
-#endif // BORINGSSL_X25519_X86_64
diff --git a/third_party/fiat/internal.h b/third_party/fiat/internal.h
index c5dcc04..be3e265 100644
--- a/third_party/fiat/internal.h
+++ b/third_party/fiat/internal.h
@@ -32,15 +32,6 @@
#include "../../crypto/internal.h"
-#if defined(OPENSSL_X86_64) && !defined(OPENSSL_SMALL) && \
- !defined(OPENSSL_WINDOWS) && !defined(OPENSSL_NO_ASM)
-#define BORINGSSL_X25519_X86_64
-
-void x25519_x86_64(uint8_t out[32], const uint8_t scalar[32],
- const uint8_t point[32]);
-#endif
-
-
#if defined(OPENSSL_ARM) && !defined(OPENSSL_NO_ASM) && !defined(OPENSSL_APPLE)
#define BORINGSSL_X25519_NEON
diff --git a/util/generate_build_files.py b/util/generate_build_files.py
index a4af666..f2b10de 100644
--- a/util/generate_build_files.py
+++ b/util/generate_build_files.py
@@ -44,12 +44,6 @@
'src/crypto/curve25519/asm/x25519-asm-arm.S',
'src/crypto/poly1305/poly1305_arm_asm.S',
],
- ('linux', 'x86_64'): [
- 'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
- ],
- ('mac', 'x86_64'): [
- 'src/crypto/curve25519/asm/x25519-asm-x86_64.S',
- ],
}
PREFIX = None