Implement draft-irtf-cfrg-hash-to-curve-06.

This implements hash-to-curve for P-521, which is needed by the
PMBTokens construction in https://eprint.iacr.org/2020/072.pdf. It is
only an internal function for now, operating on EC_RAW_POINT, so that
PMBTokens can avoid allocating EC_POINTs everywhere. If we ever have a
need to expose this outside, we can add an EC_POINT wrapper (hopefully
by then the draft will be stable).

Note this implements two versions of the function due to a spec issue in
P521_XMD:SHA-512_SSWU_RO_. One of them only exists to test against the
original test vectors. See
https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve/issues/237

Bug: chromium:1014199
Change-Id: I7207d1bcb8b20f7111c2ffb40e2794ad2d3d0000
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/40589
Reviewed-by: Adam Langley <agl@google.com>
Commit-Queue: David Benjamin <davidben@google.com>
diff --git a/crypto/CMakeLists.txt b/crypto/CMakeLists.txt
index 2d244d5..f4dd08f 100644
--- a/crypto/CMakeLists.txt
+++ b/crypto/CMakeLists.txt
@@ -275,6 +275,7 @@
   ecdsa_extra/ecdsa_asn1.c
   ec_extra/ec_asn1.c
   ec_extra/ec_derive.c
+  ec_extra/hash_to_curve.c
   err/err.c
   err_data.c
   engine/engine.c
diff --git a/crypto/ec_extra/hash_to_curve.c b/crypto/ec_extra/hash_to_curve.c
new file mode 100644
index 0000000..16a0932
--- /dev/null
+++ b/crypto/ec_extra/hash_to_curve.c
@@ -0,0 +1,356 @@
+/* Copyright (c) 2020, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#include <openssl/ec.h>
+
+#include <openssl/digest.h>
+#include <openssl/err.h>
+#include <openssl/nid.h>
+#include <openssl/type_check.h>
+
+#include <assert.h>
+
+#include "internal.h"
+#include "../fipsmodule/bn/internal.h"
+#include "../fipsmodule/ec/internal.h"
+#include "../internal.h"
+
+
+// This file implements hash-to-curve, as described in
+// draft-irtf-cfrg-hash-to-curve-06.
+
+// expand_message_xmd implements the operation described in section 5.3.1 of
+// draft-irtf-cfrg-hash-to-curve-06. It returns one on success and zero on
+// allocation failure or if |out_len| was too large.
+static int expand_message_xmd(const EVP_MD *md, uint8_t *out, size_t out_len,
+                              const uint8_t *msg, size_t msg_len,
+                              const uint8_t *dst, size_t dst_len) {
+  int ret = 0;
+  const size_t block_size = EVP_MD_block_size(md);
+  const size_t md_size = EVP_MD_size(md);
+  EVP_MD_CTX ctx;
+  EVP_MD_CTX_init(&ctx);
+
+  // Long DSTs are hashed down to size.
+  OPENSSL_STATIC_ASSERT(EVP_MAX_MD_SIZE < 256, "hashed DST still too large");
+  uint8_t dst_buf[EVP_MAX_MD_SIZE];
+  if (dst_len >= 256) {
+    static const char kPrefix[] = "H2C-OVERSIZE-DST-";
+    if (!EVP_DigestInit_ex(&ctx, md, NULL) ||
+        !EVP_DigestUpdate(&ctx, kPrefix, sizeof(kPrefix) - 1) ||
+        !EVP_DigestUpdate(&ctx, dst, dst_len) ||
+        !EVP_DigestFinal_ex(&ctx, dst_buf, NULL)) {
+      goto err;
+    }
+    dst = dst_buf;
+    dst_len = md_size;
+  }
+  uint8_t dst_len_u8 = (uint8_t)dst_len;
+
+  // Compute b_0.
+  static const uint8_t kZeros[EVP_MAX_MD_BLOCK_SIZE] = {0};
+  // If |out_len| exceeds 16 bits then |i| will wrap below causing an error to
+  // be returned. This depends on the static assert above.
+  uint8_t l_i_b_str_zero[3] = {out_len >> 8, out_len, 0};
+  uint8_t b_0[EVP_MAX_MD_SIZE];
+  if (!EVP_DigestInit_ex(&ctx, md, NULL) ||
+      !EVP_DigestUpdate(&ctx, kZeros, block_size) ||
+      !EVP_DigestUpdate(&ctx, msg, msg_len) ||
+      !EVP_DigestUpdate(&ctx, l_i_b_str_zero, sizeof(l_i_b_str_zero)) ||
+      !EVP_DigestUpdate(&ctx, &dst_len_u8, 1) ||
+      !EVP_DigestUpdate(&ctx, dst, dst_len) ||
+      !EVP_DigestFinal_ex(&ctx, b_0, NULL)) {
+    goto err;
+  }
+
+  uint8_t b_i[EVP_MAX_MD_SIZE];
+  uint8_t i = 1;
+  while (out_len > 0) {
+    if (i == 0) {
+      // Input was too large.
+      OPENSSL_PUT_ERROR(EC, ERR_R_INTERNAL_ERROR);
+      goto err;
+    }
+    if (i > 1) {
+      for (size_t j = 0; j < md_size; j++) {
+        b_i[j] ^= b_0[j];
+      }
+    } else {
+      OPENSSL_memcpy(b_i, b_0, md_size);
+    }
+
+    if (!EVP_DigestInit_ex(&ctx, md, NULL) ||
+        !EVP_DigestUpdate(&ctx, b_i, md_size) ||
+        !EVP_DigestUpdate(&ctx, &i, 1) ||
+        !EVP_DigestUpdate(&ctx, &dst_len_u8, 1) ||
+        !EVP_DigestUpdate(&ctx, dst, dst_len) ||
+        !EVP_DigestFinal_ex(&ctx, b_i, NULL)) {
+      goto err;
+    }
+
+    size_t todo = out_len >= md_size ? md_size : out_len;
+    OPENSSL_memcpy(out, b_i, todo);
+    out += todo;
+    out_len -= todo;
+    i++;
+  }
+
+  ret = 1;
+
+err:
+  EVP_MD_CTX_cleanup(&ctx);
+  return ret;
+}
+
+// reduce_to_felem implements step 7 of hash_to_field, described in section 5.2
+// of draft-irtf-cfrg-hash-to-curve-06.
+static void reduce_to_felem(const EC_GROUP *group, EC_FELEM *out,
+                            const uint8_t *in, size_t len) {
+  union {
+    BN_ULONG words[2 * EC_MAX_WORDS];
+    uint8_t bytes[2 * EC_MAX_BYTES];
+  } buf;
+  OPENSSL_memset(&buf, 0, sizeof(buf));
+
+  // |in| is encoded in big-endian.
+  assert(len <= sizeof(buf.bytes));
+  for (size_t i = 0; i < len; i++) {
+    buf.bytes[len - i - 1] = in[i];
+  }
+
+  size_t num_words = group->field.width;
+  group->meth->felem_reduce(group, out, buf.words, num_words * 2);
+}
+
+// hash_to_field implements the operation described in section 5.2
+// of draft-irtf-cfrg-hash-to-curve-06, with count = 2.
+static int hash_to_field2(const EC_GROUP *group, const EVP_MD *md,
+                          EC_FELEM *out1, EC_FELEM *out2, const uint8_t *dst,
+                          size_t dst_len, unsigned k, const uint8_t *msg,
+                          size_t msg_len) {
+  // Determine L, the number of bytes to derive per output element.
+  size_t p_bits = BN_num_bits(&group->field);
+  size_t L = (p_bits + k + 7) / 8;
+
+  // We require 2^(8*L) < 2^(2*p_bits - 2) <= p^2 so to fit in bounds for
+  // |felem_reduce|. All defined hash-to-curve suites define |k| to be well
+  // under this bound. (|k| is usually around half of |p_bits|.)
+  if (L * 8 >= 2 * p_bits - 2) {
+    OPENSSL_PUT_ERROR(EC, ERR_R_INTERNAL_ERROR);
+    return 0;
+  }
+
+  uint8_t buf[4 * EC_MAX_BYTES];
+  if (!expand_message_xmd(md, buf, 2 * L, msg, msg_len, dst, dst_len)) {
+    return 0;
+  }
+  reduce_to_felem(group, out1, buf, L);
+  reduce_to_felem(group, out2, buf + L, L);
+  return 1;
+}
+
+static inline void mul_A(const EC_GROUP *group, EC_FELEM *out,
+                         const EC_FELEM *in) {
+  assert(group->a_is_minus3);
+  EC_FELEM tmp;
+  ec_felem_add(group, &tmp, in, in);      // tmp = 2*in
+  ec_felem_add(group, &tmp, &tmp, &tmp);  // tmp = 4*in
+  ec_felem_sub(group, out, in, &tmp);     // out = -3*in
+}
+
+static inline void mul_minus_A(const EC_GROUP *group, EC_FELEM *out,
+                               const EC_FELEM *in) {
+  assert(group->a_is_minus3);
+  EC_FELEM tmp;
+  ec_felem_add(group, &tmp, in, in);   // tmp = 2*in
+  ec_felem_add(group, out, &tmp, in);  // out = 3*in
+}
+
+// sgn0_le implements the operation described in section 4.1.2 of
+// draft-irtf-cfrg-hash-to-curve-06.
+static BN_ULONG sgn0_le(const EC_GROUP *group, const EC_FELEM *a) {
+  uint8_t buf[EC_MAX_BYTES];
+  size_t len;
+  ec_felem_to_bytes(group, buf, &len, a);
+  return buf[len - 1] & 1;
+}
+
+// map_to_curve_simple_swu implements the operation described in section 6.6.2
+// of draft-irtf-cfrg-hash-to-curve-06, using the optimization in appendix D.2.
+// It returns one on success and zero on error.
+static int map_to_curve_simple_swu(const EC_GROUP *group, const EC_FELEM *Z,
+                                   const BN_ULONG *c1, size_t num_c1,
+                                   const EC_FELEM *c2, EC_RAW_POINT *out,
+                                   const EC_FELEM *u) {
+  void (*const felem_mul)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a,
+                          const EC_FELEM *b) = group->meth->felem_mul;
+  void (*const felem_sqr)(const EC_GROUP *, EC_FELEM *r, const EC_FELEM *a) =
+      group->meth->felem_sqr;
+
+  // This function requires the prime be 3 mod 4, and that A = -3.
+  if (group->field.width == 0 || (group->field.d[0] & 3) != 3 ||
+      !group->a_is_minus3) {
+    OPENSSL_PUT_ERROR(EC, ERR_R_INTERNAL_ERROR);
+    return 0;
+  }
+
+  EC_FELEM tv1, tv2, tv3, tv4, xd, x1n, x2n, tmp, gxd, gx1, y1, y2;
+  felem_sqr(group, &tv1, u);                         // tv1 = u^2
+  felem_mul(group, &tv3, Z, &tv1);                   // tv3 = Z * tv1
+  felem_sqr(group, &tv2, &tv3);                      // tv2 = tv3^2
+  ec_felem_add(group, &xd, &tv2, &tv3);              // xd = tv2 + tv3
+  ec_felem_add(group, &x1n, &xd, &group->one);       // x1n = xd + 1
+  felem_mul(group, &x1n, &x1n, &group->b);           // x1n = x1n * B
+  mul_minus_A(group, &xd, &xd);                      // xd = -A * xd
+  BN_ULONG e1 = ec_felem_non_zero_mask(group, &xd);  // e1 = xd == 0 [flipped]
+  mul_A(group, &tmp, Z);
+  ec_felem_select(group, &xd, e1, &xd, &tmp);  // xd = CMOV(xd, Z * A, e1)
+  felem_sqr(group, &tv2, &xd);                 // tv2 = xd^2
+  felem_mul(group, &gxd, &tv2, &xd);           // gxd = tv2 * xd = xd^3
+  mul_A(group, &tv2, &tv2);                    // tv2 = A * tv2
+  felem_sqr(group, &gx1, &x1n);                // gx1 = x1n^2
+  ec_felem_add(group, &gx1, &gx1, &tv2);       // gx1 = gx1 + tv2
+  felem_mul(group, &gx1, &gx1, &x1n);          // gx1 = gx1 * x1n
+  felem_mul(group, &tv2, &group->b, &gxd);     // tv2 = B * gxd
+  ec_felem_add(group, &gx1, &gx1, &tv2);       // gx1 = gx1 + tv2
+  felem_sqr(group, &tv4, &gxd);                // tv4 = gxd^2
+  felem_mul(group, &tv2, &gx1, &gxd);          // tv2 = gx1 * gxd
+  felem_mul(group, &tv4, &tv4, &tv2);          // tv4 = tv4 * tv2
+  group->meth->felem_exp(group, &y1, &tv4, c1, num_c1);  // y1 = tv4^c1
+  felem_mul(group, &y1, &y1, &tv2);                      // y1 = y1 * tv2
+  felem_mul(group, &x2n, &tv3, &x1n);                    // x2n = tv3 * x1n
+  felem_mul(group, &y2, &y1, c2);                        // y2 = y1 * c2
+  felem_mul(group, &y2, &y2, &tv1);                      // y2 = y2 * tv1
+  felem_mul(group, &y2, &y2, u);                         // y2 = y2 * u
+  felem_sqr(group, &tv2, &y1);                           // tv2 = y1^2
+  felem_mul(group, &tv2, &tv2, &gxd);                    // tv2 = tv2 * gxd
+  ec_felem_sub(group, &tv3, &tv2, &gx1);
+  BN_ULONG e2 =
+      ec_felem_non_zero_mask(group, &tv3);       // e2 = tv2 == gx1 [flipped]
+  ec_felem_select(group, &x1n, e2, &x2n, &x1n);  // xn = CMOV(x2n, x1n, e2)
+  ec_felem_select(group, &y1, e2, &y2, &y1);     // y = CMOV(y2, y1, e2)
+  BN_ULONG sgn0_u = sgn0_le(group, u);
+  BN_ULONG sgn0_y = sgn0_le(group, &y1);
+  BN_ULONG e3 = sgn0_u ^ sgn0_y;
+  e3 = ((BN_ULONG)0) - e3;  // e3 = sgn0(u) == sgn0(y) [flipped]
+  ec_felem_neg(group, &y2, &y1);
+  ec_felem_select(group, &y1, e3, &y2, &y1);  // y = CMOV(-y, y, e3)
+
+  // Appendix D.1 describes how to convert (x1n, xd, y1, 1) to Jacobian
+  // coordinates. Note yd = 1. Also note that gxd computed above is xd^3.
+  felem_mul(group, &out->X, &x1n, &xd);     // X = xn * xd
+  felem_mul(group, &out->Y, &y1, &gxd);     // Y = yn * gxd = yn * xd^3
+  out->Z = xd;                              // Z = xd
+  return 1;
+}
+
+static int hash_to_curve(const EC_GROUP *group, const EVP_MD *md,
+                         const EC_FELEM *Z, const EC_FELEM *c2, unsigned k,
+                         EC_RAW_POINT *out, const uint8_t *dst, size_t dst_len,
+                         const uint8_t *msg, size_t msg_len) {
+  EC_FELEM u0, u1;
+  if (!hash_to_field2(group, md, &u0, &u1, dst, dst_len, k, msg, msg_len)) {
+    return 0;
+  }
+
+  // Compute |c1| = (p - 3) / 4.
+  BN_ULONG c1[EC_MAX_WORDS];
+  size_t num_c1 = group->field.width;
+  if (!bn_copy_words(c1, num_c1, &group->field)) {
+    return 0;
+  }
+  bn_rshift_words(c1, c1, /*shift=*/2, /*num=*/num_c1);
+
+  EC_RAW_POINT Q0, Q1;
+  if (!map_to_curve_simple_swu(group, Z, c1, num_c1, c2, &Q0, &u0) ||
+      !map_to_curve_simple_swu(group, Z, c1, num_c1, c2, &Q1, &u1)) {
+    return 0;
+  }
+
+  group->meth->add(group, out, &Q0, &Q1);  // R = Q0 + Q1
+  // All our curves have cofactor one, so |clear_cofactor| is a no-op.
+  return 1;
+}
+
+static int felem_from_u8(const EC_GROUP *group, EC_FELEM *out, uint8_t a) {
+  uint8_t bytes[EC_MAX_BYTES] = {0};
+  size_t len = BN_num_bytes(&group->field);
+  bytes[len - 1] = a;
+  return ec_felem_from_bytes(group, out, bytes, len);
+}
+
+static int hash_to_curve_p521_xmd_sswu(const EC_GROUP *group, EC_RAW_POINT *out,
+                                       const uint8_t *dst, size_t dst_len,
+                                       const EVP_MD *md, unsigned k,
+                                       const uint8_t *msg, size_t msg_len) {
+  // This hash-to-curve implementation is written generically with the
+  // expectation that we will eventually wish to support P-256 or P-384. If it
+  // becomes a performance bottleneck, some possible optimizations by
+  // specializing it to the curve:
+  //
+  // - c1 = (p-3)/4 = 2^519-1. |felem_exp| costs 515S + 119M for this exponent.
+  //   A more efficient addition chain for c1 would cost 511S + 3M, but it would
+  //   require specializing the particular exponent.
+  //
+  // - P-521, while large, is a Mersenne prime, so we can likely do better than
+  //   the generic Montgomery implementation if we specialize the field
+  //   operations (below).
+  //
+  // - |felem_mul| and |felem_sqr| are indirect calls to generic Montgomery
+  //   code. Given the few curves, we could specialize
+  //   |map_to_curve_simple_swu|. But doing this reasonably without duplicating
+  //   code in C is difficult. (C++ templates would be useful here.)
+  //
+  // - P-521's Z and c2 have small power-of-two absolute values. We could save
+  //   two multiplications in SSWU. (Other curves have reasonable values of Z
+  //   and inconvenient c2.) This is unlikely to be worthwhile without C++
+  //   templates to make specializing more convenient.
+
+  // See section 8.3 of draft-irtf-cfrg-hash-to-curve-06.
+  if (EC_GROUP_get_curve_name(group) != NID_secp521r1) {
+    OPENSSL_PUT_ERROR(EC, EC_R_GROUP_MISMATCH);
+    return 0;
+  }
+
+  // Z = -4, c2 = 8.
+  EC_FELEM Z, c2;
+  if (!felem_from_u8(group, &Z, 4) ||
+      !felem_from_u8(group, &c2, 8)) {
+    return 0;
+  }
+  ec_felem_neg(group, &Z, &Z);
+
+  return hash_to_curve(group, md, &Z, &c2, k, out, dst, dst_len, msg, msg_len);
+}
+
+int ec_hash_to_curve_p521_xmd_sha512_sswu(const EC_GROUP *group,
+                                          EC_RAW_POINT *out, const uint8_t *dst,
+                                          size_t dst_len, const uint8_t *msg,
+                                          size_t msg_len) {
+  return hash_to_curve_p521_xmd_sswu(group, out, dst, dst_len, EVP_sha512(),
+                                     /*k=*/256, msg, msg_len);
+}
+
+int ec_hash_to_curve_p521_xmd_sha512_sswu_ref_for_testing(
+    const EC_GROUP *group, EC_RAW_POINT *out, const uint8_t *dst,
+    size_t dst_len, const uint8_t *msg, size_t msg_len) {
+  // The specification defines the P-521 suites inconsistently. It specifies
+  // L = 96 for hash_to_field, but computing L from k as specified gives L = 98.
+  // See https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve/issues/237.
+  //
+  // Setting k to 240 gives L = 96. We implement this variation to test with the
+  // original test vectors, which were computed with the smaller L.
+  return hash_to_curve_p521_xmd_sswu(group, out, dst, dst_len, EVP_sha512(),
+                                     /*k=*/240, msg, msg_len);
+}
diff --git a/crypto/ec_extra/internal.h b/crypto/ec_extra/internal.h
new file mode 100644
index 0000000..873726c
--- /dev/null
+++ b/crypto/ec_extra/internal.h
@@ -0,0 +1,56 @@
+/* Copyright (c) 2020, Google Inc.
+ *
+ * Permission to use, copy, modify, and/or distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
+ * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION
+ * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
+ * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
+
+#ifndef OPENSSL_HEADER_EC_EXTRA_INTERNAL_H
+#define OPENSSL_HEADER_EC_EXTRA_INTERNAL_H
+
+#include <openssl/ec.h>
+
+#include "../fipsmodule/ec/internal.h"
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+
+// Hash-to-curve.
+//
+// The following functions implement primitives from
+// draft-irtf-cfrg-hash-to-curve-06. We currently only implement a P-521 suite,
+// but others can be added as needed.
+
+// ec_hash_to_curve_p521_sha512_sswu hashes |msg| to a point on |group| and
+// writes the result to |out|, implementing the P521_XMD:SHA-512_SSWU_RO_ suite.
+// It returns one on success and zero on error. |dst| is the domain separation
+// tag and must be unique for each protocol. See section 3.1 of
+// draft-irtf-cfrg-hash-to-curve-06 for additional guidance on this parameter.
+OPENSSL_EXPORT int ec_hash_to_curve_p521_xmd_sha512_sswu(
+    const EC_GROUP *group, EC_RAW_POINT *out, const uint8_t *dst,
+    size_t dst_len, const uint8_t *msg, size_t msg_len);
+
+// ec_hash_to_curve_p521_xmd_sha512_sswu_ref_for_testing behaves like
+// |ec_hash_to_curve_p521_sha512_sswu| but reproduces a spec issue reflected in
+// the original test vectors.
+//
+// This function is exposed for test purposes and should not be used elsewhere.
+OPENSSL_EXPORT int ec_hash_to_curve_p521_xmd_sha512_sswu_ref_for_testing(
+    const EC_GROUP *group, EC_RAW_POINT *out, const uint8_t *dst,
+    size_t dst_len, const uint8_t *msg, size_t msg_len);
+
+
+#if defined(__cplusplus)
+}  // extern C
+#endif
+
+#endif  // OPENSSL_HEADER_EC_EXTRA_INTERNAL_H
diff --git a/crypto/fipsmodule/ec/ec_montgomery.c b/crypto/fipsmodule/ec/ec_montgomery.c
index 316e9d5..77afcfc 100644
--- a/crypto/fipsmodule/ec/ec_montgomery.c
+++ b/crypto/fipsmodule/ec/ec_montgomery.c
@@ -156,6 +156,24 @@
   return 1;
 }
 
+static void ec_GFp_mont_felem_reduce(const EC_GROUP *group, EC_FELEM *out,
+                                     const BN_ULONG *words, size_t num) {
+  // Convert "from" Montgomery form so the value is reduced mod p.
+  bn_from_montgomery_small(out->words, group->field.width, words, num,
+                           group->mont);
+  // Convert "to" Montgomery form to remove the R^-1 factor added.
+  ec_GFp_mont_felem_to_montgomery(group, out, out);
+  // Convert to Montgomery form to match this implementation's representation.
+  ec_GFp_mont_felem_to_montgomery(group, out, out);
+}
+
+static void ec_GFp_mont_felem_exp(const EC_GROUP *group, EC_FELEM *out,
+                                  const EC_FELEM *a, const BN_ULONG *exp,
+                                  size_t num_exp) {
+  bn_mod_exp_mont_small(out->words, a->words, group->field.width, exp, num_exp,
+                        group->mont);
+}
+
 static int ec_GFp_mont_point_get_affine_coordinates(const EC_GROUP *group,
                                                     const EC_RAW_POINT *point,
                                                     EC_FELEM *x, EC_FELEM *y) {
@@ -453,6 +471,8 @@
   out->felem_sqr = ec_GFp_mont_felem_sqr;
   out->felem_to_bytes = ec_GFp_mont_felem_to_bytes;
   out->felem_from_bytes = ec_GFp_mont_felem_from_bytes;
+  out->felem_reduce = ec_GFp_mont_felem_reduce;
+  out->felem_exp = ec_GFp_mont_felem_exp;
   out->scalar_inv0_montgomery = ec_simple_scalar_inv0_montgomery;
   out->scalar_to_montgomery_inv_vartime =
       ec_simple_scalar_to_montgomery_inv_vartime;
diff --git a/crypto/fipsmodule/ec/ec_test.cc b/crypto/fipsmodule/ec/ec_test.cc
index 6a6a71b..f7827c5 100644
--- a/crypto/fipsmodule/ec/ec_test.cc
+++ b/crypto/fipsmodule/ec/ec_test.cc
@@ -28,7 +28,9 @@
 #include <openssl/mem.h>
 #include <openssl/nid.h>
 #include <openssl/obj.h>
+#include <openssl/span.h>
 
+#include "../../ec_extra/internal.h"
 #include "../../test/file_test.h"
 #include "../../test/test_util.h"
 #include "../bn/internal.h"
@@ -132,6 +134,22 @@
   return true;
 }
 
+static bool EncodeECPoint(std::vector<uint8_t> *out, const EC_GROUP *group,
+                          const EC_POINT *p, point_conversion_form_t form) {
+  size_t len = EC_POINT_point2oct(group, p, form, nullptr, 0, nullptr);
+  if (len == 0) {
+    return false;
+  }
+
+  out->resize(len);
+  len = EC_POINT_point2oct(group, p, form, out->data(), out->size(), nullptr);
+  if (len != out->size()) {
+    return false;
+  }
+
+  return true;
+}
+
 TEST(ECTest, Encoding) {
   bssl::UniquePtr<EC_KEY> key =
       DecodeECPrivateKey(kECKeyWithoutPublic, sizeof(kECKeyWithoutPublic));
@@ -721,18 +739,12 @@
                            nullptr, nullptr));
 
   // Serialize the point.
-  size_t serialized_len = EC_POINT_point2oct(
-      group(), point.get(), POINT_CONVERSION_UNCOMPRESSED, nullptr, 0, nullptr);
-  ASSERT_NE(0u, serialized_len);
-
-  std::vector<uint8_t> serialized(serialized_len);
-  ASSERT_EQ(
-      serialized_len,
-      EC_POINT_point2oct(group(), point.get(), POINT_CONVERSION_UNCOMPRESSED,
-                         serialized.data(), serialized_len, nullptr));
+  std::vector<uint8_t> serialized;
+  ASSERT_TRUE(EncodeECPoint(&serialized, group(), point.get(),
+                            POINT_CONVERSION_UNCOMPRESSED));
 
   // Create a serialized point that is not on the curve.
-  serialized[serialized_len - 1]++;
+  serialized[serialized.size() - 1]++;
 
   ASSERT_FALSE(EC_POINT_oct2point(group(), point.get(), serialized.data(),
                                   serialized.size(), nullptr));
@@ -1035,3 +1047,138 @@
     EXPECT_EQ(Bytes(pub, pub_len), Bytes(test.expected_pub));
   }
 }
+
+TEST(ECTest, HashToCurve) {
+  struct HashToCurveTest {
+    int (*hash_to_curve)(const EC_GROUP *group, EC_RAW_POINT *out,
+                         const uint8_t *dst, size_t dst_len, const uint8_t *msg,
+                         size_t msg_len);
+    int curve_nid;
+    const char *dst;
+    const char *msg;
+    const char *x_hex;
+    const char *y_hex;
+  };
+  static const HashToCurveTest kTests[] = {
+      // See draft-irtf-cfrg-hash-to-curve-06, appendix G.3.1. Note these test
+      // |ec_hash_to_curve_p521_xmd_sha512_sswu_ref_for_testing| due to a spec
+      // issue. See
+      // https://github.com/cfrg/draft-irtf-cfrg-hash-to-curve/issues/237. We
+      // expose and test this function to check our consistency with the rest of
+      // the reference implementation.
+      {&ec_hash_to_curve_p521_xmd_sha512_sswu_ref_for_testing, NID_secp521r1,
+       "P521_XMD:SHA-512_SSWU_RO_TESTGEN", "",
+       "00ad6cb736cb0565a2b6c52dd9e53f76a9a40a44c73bfaacef03c3"
+       "ef62a9a23920b7df4de1b92754de7bb3013d9d36049da001136e7f"
+       "4b1b0ba10beac862a2b3d3c5",
+       "01c2ecab1b6f7bb6797a4b5bd416b385e891926fc17f230f2406f3"
+       "d47076526c5d90bfb4d0170fd8a339de1a66e6304d280d0404fb68"
+       "5b2ca07e2742a770b681bf56"},
+      {&ec_hash_to_curve_p521_xmd_sha512_sswu_ref_for_testing, NID_secp521r1,
+       "P521_XMD:SHA-512_SSWU_RO_TESTGEN", "abcdef0123456789",
+       "019f0195e514da4243a4d2de4b7ed2415d5205c6da11eb7deae70b"
+       "e78a61bb89ebf17f7c9970ee20b4152ae50c95e55f626bc7350d5a"
+       "0f530a91f48047bd90eeeaa7",
+       "016eaa02cd5511a96eed4ffc965bdc3f1fdbb7f4c9895eabdf168b"
+       "44250278ebca55474bc89a2f246b8fb959010502aab8a9385319bc"
+       "f69f74dd8f518bea1c7fafde"},
+      {&ec_hash_to_curve_p521_xmd_sha512_sswu_ref_for_testing, NID_secp521r1,
+       "P521_XMD:SHA-512_SSWU_RO_TESTGEN",
+       "a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+       "00de3dd7780cfcd538f7b3067d8da52c522a031244dc0d327e6e99"
+       "ef331be475354a0b481a22e0d4d35b232da260baac5b693a827a20"
+       "b1f328a416ffc47ad945a4dc",
+       "016bb6c5c965c6a80a4a5e0c2c7bbd841766eac695f88a730076b3"
+       "2d4399da01609a4a17b59a21f4f58a174d6110081b96e5aaedead3"
+       "cfd4252e74de969680ba74ab"},
+
+      // The above test vectors with the expectations updated to compute
+      // hash_to_field correctly.
+      {&ec_hash_to_curve_p521_xmd_sha512_sswu, NID_secp521r1,
+       "P521_XMD:SHA-512_SSWU_RO_TESTGEN", "",
+       "00758617b5e40aa8b30fcfd3c7453ad1abeff158de5697d6f1ccb8"
+       "4690aaa8bb6692986200d16206e85e4f39f1d2829fee1a5904a089"
+       "b4fab3b76873429877c58f99",
+       "016edf324d95fcbe4a30f06751f16cdd5d0b49921dd653cefb3ea2"
+       "dc2b5b903e36d9924a65407283588cc6c224ab6d6324c73cdc166c"
+       "e1530b46984b459e966349b3"},
+      {&ec_hash_to_curve_p521_xmd_sha512_sswu, NID_secp521r1,
+       "P521_XMD:SHA-512_SSWU_RO_TESTGEN", "abcdef0123456789",
+       "01f58bfb34825d028c392976a09cebee829734f7714c84b8a13580"
+       "afcc2eb4726e18e307476c1fccdc857a3d6767fd2882875ab132b7"
+       "fa7f3f6bae8954384001b1a1",
+       "00ee0d2d0bfb0bdc6215814fe7096a3dfbf020dce4f0645e8e21a9"
+       "0d6a6113a5ca61ae7d8f3b485b04f2eb2b85e34fc7f9f1bf367386"
+       "2e03932b0acc3655e84d480f"},
+      {&ec_hash_to_curve_p521_xmd_sha512_sswu, NID_secp521r1,
+       "P521_XMD:SHA-512_SSWU_RO_TESTGEN",
+       "a512_aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+       "016d9a90619bb20c49a2a73cc8c6218cd9b3fb13c720fff2e1f8db"
+       "ac92862c7da4faf404faeff6b64f0d9b1c5824cec99b0d0ed02b3f"
+       "acb6275ce553404ea361503e",
+       "007e301e3357fb1d53961c56e53ce2763e44b297062a3eb14b9f8d"
+       "6aadc92162a74f7e254a606275e76ea0ac343b3bc746f99804bacd"
+       "7351a76fce44347c72a6fe9a"},
+
+      // Custom test vector which triggers long DST path.
+      {&ec_hash_to_curve_p521_xmd_sha512_sswu, NID_secp521r1,
+       "P521_XMD:SHA-512_SSWU_RO_TESTGEN_aaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa"
+       "aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa",
+       "abcdef0123456789",
+       "0036b0c8bbec60335ff8b0397c2cb80283b97051cc949c5c190c28"
+       "92b279fafd6c372dcec3e71eab85c48ed440c14498332548ee46d0"
+       "c85442cbdc5b4032e86c3884",
+       "0081e32ca4378ae89b03142361d9c7fbe66acf0351aca3a71eca50"
+       "7a37fb8673b69cb108d079a248aedd74f06949d6623e7f7605ea10"
+       "f6f751ab574c005db7377d7f"},
+  };
+
+  for (const auto &test : kTests) {
+    SCOPED_TRACE(test.dst);
+    SCOPED_TRACE(test.msg);
+
+    bssl::UniquePtr<EC_GROUP> group(EC_GROUP_new_by_curve_name(test.curve_nid));
+    ASSERT_TRUE(group);
+    bssl::UniquePtr<EC_POINT> p(EC_POINT_new(group.get()));
+    ASSERT_TRUE(p);
+    ASSERT_TRUE(test.hash_to_curve(
+        group.get(), &p->raw, reinterpret_cast<const uint8_t *>(test.dst),
+        strlen(test.dst), reinterpret_cast<const uint8_t *>(test.msg),
+        strlen(test.msg)));
+
+    std::vector<uint8_t> buf;
+    ASSERT_TRUE(EncodeECPoint(&buf, group.get(), p.get(),
+                              POINT_CONVERSION_UNCOMPRESSED));
+    size_t field_len = (buf.size() - 1) / 2;
+    EXPECT_EQ(test.x_hex,
+              EncodeHex(bssl::MakeConstSpan(buf).subspan(1, field_len)));
+    EXPECT_EQ(test.y_hex, EncodeHex(bssl::MakeConstSpan(buf).subspan(
+                              1 + field_len, field_len)));
+  }
+}
diff --git a/crypto/fipsmodule/ec/internal.h b/crypto/fipsmodule/ec/internal.h
index e3f2156..7295335 100644
--- a/crypto/fipsmodule/ec/internal.h
+++ b/crypto/fipsmodule/ec/internal.h
@@ -352,6 +352,23 @@
   int (*felem_from_bytes)(const EC_GROUP *group, EC_FELEM *out,
                           const uint8_t *in, size_t len);
 
+  // felem_reduce sets |out| to |words|, reduced modulo the field size, p.
+  // |words| must be less than p^2. |num| must be at most twice the width of p.
+  // This function treats |words| as secret.
+  //
+  // This function is only used in hash-to-curve and may be omitted in curves
+  // that do not support it.
+  void (*felem_reduce)(const EC_GROUP *group, EC_FELEM *out,
+                       const BN_ULONG *words, size_t num);
+
+  // felem_exp sets |out| to |a|^|exp|. It treats |a| is secret but |exp| as
+  // public.
+  //
+  // This function is used in hash-to-curve and may be NULL in curves not used
+  // with hash-to-curve.
+  void (*felem_exp)(const EC_GROUP *group, EC_FELEM *out, const EC_FELEM *a,
+                    const BN_ULONG *exp, size_t num_exp);
+
   // scalar_inv0_montgomery implements |ec_scalar_inv0_montgomery|.
   void (*scalar_inv0_montgomery)(const EC_GROUP *group, EC_SCALAR *out,
                                  const EC_SCALAR *in);