Remove |EC_POINTs_mul| & simplify p256-x86_64.

Without |EC_POINTs_mul|, there's never more than one variable point
passed to a |EC_METHOD|'s |mul| method. This allows them to be
simplified considerably. In this commit, the p256-x86_64 implementation
has been simplified to eliminate the heap allocation and looping
related that was previously necessary to deal with the possibility of
there being multiple input points. The other implementations were left
mostly as-is; they should be similarly simplified in the future.

Change-Id: I70751d1d5296be2562af0730e7ccefdba7a1acae
Reviewed-on: https://boringssl-review.googlesource.com/6493
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/ec/ec.c b/crypto/ec/ec.c
index 021ab1f..eb8e1c9 100644
--- a/crypto/ec/ec.c
+++ b/crypto/ec/ec.c
@@ -837,39 +837,23 @@
 }
 
 int EC_POINT_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
-                 const EC_POINT *point, const BIGNUM *p_scalar, BN_CTX *ctx) {
-  /* just a convenient interface to EC_POINTs_mul() */
+                 const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx) {
+  /* Previously, this function set |r| to the point at infinity if there was
+   * nothing to multiply. But, nobody should be calling this function with
+   * nothing to multiply in the first place. */
+  if ((g_scalar == NULL && p_scalar == NULL) ||
+      ((p == NULL) != (p_scalar == NULL)))  {
+    OPENSSL_PUT_ERROR(EC, ERR_R_PASSED_NULL_PARAMETER);
+    return 0;
+  }
 
-  const EC_POINT *points[1];
-  const BIGNUM *scalars[1];
-
-  points[0] = point;
-  scalars[0] = p_scalar;
-
-  return EC_POINTs_mul(group, r, g_scalar, (point != NULL && p_scalar != NULL),
-                       points, scalars, ctx);
-}
-
-int EC_POINTs_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
-                  size_t num, const EC_POINT *points[], const BIGNUM *scalars[],
-                  BN_CTX *ctx) {
-  if (group->meth != r->meth) {
+  if (group->meth != r->meth ||
+      (p != NULL && group->meth != p->meth)) {
     OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
     return 0;
   }
 
-  size_t i;
-  for (i = 0; i < num; i++) {
-    if (points[i]->meth != r->meth) {
-      break;
-    }
-  }
-  if (i != num) {
-    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
-    return 0;
-  }
-
-  return group->meth->mul(group, r, scalar, num, points, scalars, ctx);
+  return group->meth->mul(group, r, g_scalar, p, p_scalar, ctx);
 }
 
 int ec_point_set_Jprojective_coordinates_GFp(const EC_GROUP *group, EC_POINT *point,
diff --git a/crypto/ec/internal.h b/crypto/ec/internal.h
index 33dc21b..a761784 100644
--- a/crypto/ec/internal.h
+++ b/crypto/ec/internal.h
@@ -95,12 +95,13 @@
   int (*point_get_affine_coordinates)(const EC_GROUP *, const EC_POINT *,
                                       BIGNUM *x, BIGNUM *y, BN_CTX *);
 
-  /* used by EC_POINTs_mul, EC_POINT_mul, EC_POINT_precompute_mult,
-   * EC_POINT_have_precompute_mult
-   * (default implementations are used if the 'mul' pointer is 0): */
-  int (*mul)(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
-             size_t num, const EC_POINT *points[], const BIGNUM *scalars[],
-             BN_CTX *);
+  /* Computes |r = g_scalar*generator + p_scalar*p| if |g_scalar| and |p_scalar|
+   * are both non-null. Computes |r = g_scalar*generator| if |p_scalar| is null.
+   * Computes |r = p_scalar*p| if g_scalar is null. At least one of |g_scalar|
+   * and |p_scalar| must be non-null, and |p| must be non-null if |p_scalar| is
+   * non-null. */
+  int (*mul)(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
+             const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx);
 
   /* internal functions */
 
@@ -164,9 +165,8 @@
  * a built-in group. */
 const BN_MONT_CTX *ec_group_get_mont_data(const EC_GROUP *group);
 
-int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
-                size_t num, const EC_POINT *points[], const BIGNUM *scalars[],
-                BN_CTX *);
+int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
+                const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx);
 
 /* method functions in simple.c */
 int ec_GFp_simple_group_init(EC_GROUP *);
diff --git a/crypto/ec/p224-64.c b/crypto/ec/p224-64.c
index 7ac53c5..40b7cc5 100644
--- a/crypto/ec/p224-64.c
+++ b/crypto/ec/p224-64.c
@@ -1121,12 +1121,16 @@
       (void (*)(void *, const void *))felem_contract);
 }
 
-/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL values
- * Result is stored in r (r can equal one of the inputs). */
 int ec_GFp_nistp224_points_mul(const EC_GROUP *group, EC_POINT *r,
-                               const BIGNUM *scalar, size_t num,
-                               const EC_POINT *points[],
-                               const BIGNUM *scalars[], BN_CTX *ctx) {
+                               const BIGNUM *g_scalar, const EC_POINT *p_,
+                               const BIGNUM *p_scalar_, BN_CTX *ctx) {
+  /* TODO: This function used to take |points| and |scalars| as arrays of
+   * |num| elements. The code below should be simplified to work in terms of
+   * |p_| and |p_scalar_|. */
+  size_t num = p_ != NULL ? 1 : 0;
+  const EC_POINT **points = p_ != NULL ? &p_ : NULL;
+  BIGNUM const *const *scalars = p_ != NULL ? &p_scalar_ : NULL;
+
   int ret = 0;
   int j;
   unsigned i;
@@ -1186,7 +1190,7 @@
       if (i == num) {
         /* the generator */
         p = EC_GROUP_get0_generator(group);
-        p_scalar = scalar;
+        p_scalar = g_scalar;
       } else {
         /* the i^th point */
         p = points[i];
@@ -1194,7 +1198,7 @@
       }
 
       if (p_scalar != NULL && p != NULL) {
-        /* reduce scalar to 0 <= scalar < 2^224 */
+        /* reduce g_scalar to 0 <= g_scalar < 2^224 */
         if (BN_num_bits(p_scalar) > 224 || BN_is_negative(p_scalar)) {
           /* this is an unusual input, and we don't guarantee
            * constant-timeness */
@@ -1239,25 +1243,24 @@
     }
   }
 
-  /* the scalar for the generator */
-  if (scalar != NULL) {
+  if (g_scalar != NULL) {
     memset(g_secret, 0, sizeof(g_secret));
-    /* reduce scalar to 0 <= scalar < 2^224 */
-    if (BN_num_bits(scalar) > 224 || BN_is_negative(scalar)) {
+    /* reduce g_scalar to 0 <= g_scalar < 2^224 */
+    if (BN_num_bits(g_scalar) > 224 || BN_is_negative(g_scalar)) {
       /* this is an unusual input, and we don't guarantee constant-timeness */
-      if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
+      if (!BN_nnmod(tmp_scalar, g_scalar, &group->order, ctx)) {
         OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
         goto err;
       }
       num_bytes = BN_bn2bin(tmp_scalar, tmp);
     } else {
-      num_bytes = BN_bn2bin(scalar, tmp);
+      num_bytes = BN_bn2bin(g_scalar, tmp);
     }
 
     flip_endian(g_secret, tmp, num_bytes);
   }
   batch_mul(x_out, y_out, z_out, (const felem_bytearray(*))secrets,
-            num_points, scalar != NULL ? g_secret : NULL, mixed,
+            num_points, g_scalar != NULL ? g_secret : NULL, mixed,
             (const felem(*)[17][3])pre_comp);
 
   /* reduce the output to its unique minimal representation */
diff --git a/crypto/ec/p256-64.c b/crypto/ec/p256-64.c
index d91d3ae..613c8dd 100644
--- a/crypto/ec/p256-64.c
+++ b/crypto/ec/p256-64.c
@@ -1701,12 +1701,16 @@
       (void (*)(void *, const void *))smallfelem_assign);
 }
 
-/* Computes scalar*generator + \sum scalars[i]*points[i], ignoring NULL
- * values Result is stored in r (r can equal one of the inputs). */
 int ec_GFp_nistp256_points_mul(const EC_GROUP *group, EC_POINT *r,
-                               const BIGNUM *scalar, size_t num,
-                               const EC_POINT *points[],
-                               const BIGNUM *scalars[], BN_CTX *ctx) {
+                               const BIGNUM *g_scalar, const EC_POINT *p_,
+                               const BIGNUM *p_scalar_, BN_CTX *ctx) {
+  /* TODO: This function used to take |points| and |scalars| as arrays of
+   * |num| elements. The code below should be simplified to work in terms of |p|
+   * and |p_scalar|. */
+  size_t num = p_ != NULL ? 1 : 0;
+  const EC_POINT **points = p_ != NULL ? &p_ : NULL;
+  BIGNUM const *const *scalars = p_ != NULL ? &p_scalar_ : NULL;
+
   int ret = 0;
   int j;
   int mixed = 0;
@@ -1765,14 +1769,14 @@
       if (i == num) {
         /* we didn't have a valid precomputation, so we pick the generator. */
         p = EC_GROUP_get0_generator(group);
-        p_scalar = scalar;
+        p_scalar = g_scalar;
       } else {
         /* the i^th point */
         p = points[i];
         p_scalar = scalars[i];
       }
       if (p_scalar != NULL && p != NULL) {
-        /* reduce scalar to 0 <= scalar < 2^256 */
+        /* reduce g_scalar to 0 <= g_scalar < 2^256 */
         if (BN_num_bits(p_scalar) > 256 || BN_is_negative(p_scalar)) {
           /* this is an unusual input, and we don't guarantee
            * constant-timeness. */
@@ -1814,25 +1818,24 @@
     }
   }
 
-  /* the scalar for the generator */
-  if (scalar != NULL) {
+  if (g_scalar != NULL) {
     memset(g_secret, 0, sizeof(g_secret));
-    /* reduce scalar to 0 <= scalar < 2^256 */
-    if (BN_num_bits(scalar) > 256 || BN_is_negative(scalar)) {
+    /* reduce g_scalar to 0 <= g_scalar < 2^256 */
+    if (BN_num_bits(g_scalar) > 256 || BN_is_negative(g_scalar)) {
       /* this is an unusual input, and we don't guarantee
        * constant-timeness. */
-      if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
+      if (!BN_nnmod(tmp_scalar, g_scalar, &group->order, ctx)) {
         OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
         goto err;
       }
       num_bytes = BN_bn2bin(tmp_scalar, tmp);
     } else {
-      num_bytes = BN_bn2bin(scalar, tmp);
+      num_bytes = BN_bn2bin(g_scalar, tmp);
     }
     flip_endian(g_secret, tmp, num_bytes);
   }
   batch_mul(x_out, y_out, z_out, (const felem_bytearray(*))secrets,
-            num_points, scalar != NULL ? g_secret : NULL, mixed,
+            num_points, g_scalar != NULL ? g_secret : NULL, mixed,
             (const smallfelem(*)[17][3])pre_comp);
 
   /* reduce the output to its unique minimal representation */
diff --git a/crypto/ec/p256-x86_64.c b/crypto/ec/p256-x86_64.c
index 52dd634..5244c92 100644
--- a/crypto/ec/p256-x86_64.c
+++ b/crypto/ec/p256-x86_64.c
@@ -22,6 +22,7 @@
 
 #include <openssl/ec.h>
 
+#include <assert.h>
 #include <stdint.h>
 #include <string.h>
 
@@ -44,11 +45,11 @@
 #endif
 
 #if defined(__GNUC__)
-#define ALIGN32 __attribute((aligned(32)))
+#define ALIGN(x) __attribute((aligned(x)))
 #elif defined(_MSC_VER)
-#define ALIGN32 __declspec(align(32))
+#define ALIGN(x) __declspec(align(x))
 #else
-#define ALIGN32
+#define ALIGN(x)
 #endif
 
 #define ALIGNPTR(p, N) ((uint8_t *)p + N - (size_t)p % N)
@@ -254,130 +255,117 @@
   return 1;
 }
 
-/* r = sum(scalar[i]*point[i]) */
+/* r = p * p_scalar */
 static int ecp_nistz256_windowed_mul(const EC_GROUP *group, P256_POINT *r,
-                                     const BIGNUM **scalar,
-                                     const EC_POINT **point, int num,
+                                     const EC_POINT *p, const BIGNUM *p_scalar,
                                      BN_CTX *ctx) {
+  assert(p != NULL);
+  assert(p_scalar != NULL);
+
   static const unsigned kWindowSize = 5;
   static const unsigned kMask = (1 << (5 /* kWindowSize */ + 1)) - 1;
 
-  void *table_storage = OPENSSL_malloc(num * 16 * sizeof(P256_POINT) + 64);
-  uint8_t(*p_str)[33] = OPENSSL_malloc(num * 33 * sizeof(uint8_t));
-  const BIGNUM **scalars = OPENSSL_malloc(num * sizeof(BIGNUM *));
+  /* A |P256_POINT| is (3 * 32) = 96 bytes, and the 64-byte alignment should
+   * add no more than 63 bytes of overhead. Thus, |table| should require
+   * ~1599 ((96 * 16) + 63) bytes of stack space. */
+  ALIGN(64) P256_POINT table[16];
+  uint8_t p_str[33];
+
 
   int ret = 0;
   BN_CTX *new_ctx = NULL;
   int ctx_started = 0;
 
-  if (table_storage == NULL ||
-      p_str == NULL ||
-      scalars == NULL) {
-    OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
-    goto err;
-  }
-
-  P256_POINT(*table)[16] = (void *)ALIGNPTR(table_storage, 64);
-
-  int i;
-  for (i = 0; i < num; i++) {
-    P256_POINT *row = table[i];
-
-    if (BN_num_bits(scalar[i]) > 256 || BN_is_negative(scalar[i])) {
-      if (!ctx_started) {
-        if (ctx == NULL) {
-          new_ctx = BN_CTX_new();
-          if (new_ctx == NULL) {
-            OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
-            goto err;
-          }
-          ctx = new_ctx;
-        }
-        BN_CTX_start(ctx);
-        ctx_started = 1;
-      }
-      BIGNUM *mod = BN_CTX_get(ctx);
-      if (mod == NULL) {
+  if (BN_num_bits(p_scalar) > 256 || BN_is_negative(p_scalar)) {
+    if (ctx == NULL) {
+      new_ctx = BN_CTX_new();
+      if (new_ctx == NULL) {
         OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
         goto err;
       }
-
-      if (!BN_nnmod(mod, scalar[i], &group->order, ctx)) {
-        OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
-        goto err;
-      }
-      scalars[i] = mod;
-    } else {
-      scalars[i] = scalar[i];
+      ctx = new_ctx;
     }
-
-    int j;
-    for (j = 0; j < scalars[i]->top * BN_BYTES; j += BN_BYTES) {
-      BN_ULONG d = scalars[i]->d[j / BN_BYTES];
-
-      p_str[i][j + 0] = d & 0xff;
-      p_str[i][j + 1] = (d >> 8) & 0xff;
-      p_str[i][j + 2] = (d >> 16) & 0xff;
-      p_str[i][j + 3] = (d >>= 24) & 0xff;
-      if (BN_BYTES == 8) {
-        d >>= 8;
-        p_str[i][j + 4] = d & 0xff;
-        p_str[i][j + 5] = (d >> 8) & 0xff;
-        p_str[i][j + 6] = (d >> 16) & 0xff;
-        p_str[i][j + 7] = (d >> 24) & 0xff;
-      }
-    }
-
-    for (; j < 33; j++) {
-      p_str[i][j] = 0;
-    }
-
-    /* table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
-     * not stored. All other values are actually stored with an offset of -1 in
-     * table. */
-
-    if (!ecp_nistz256_bignum_to_field_elem(row[1 - 1].X, &point[i]->X) ||
-        !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &point[i]->Y) ||
-        !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &point[i]->Z)) {
-      OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
+    BN_CTX_start(ctx);
+    ctx_started = 1;
+    BIGNUM *mod = BN_CTX_get(ctx);
+    if (mod == NULL) {
+      OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
       goto err;
     }
-
-    ecp_nistz256_point_double(&row[2 - 1], &row[1 - 1]);
-    ecp_nistz256_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]);
-    ecp_nistz256_point_double(&row[4 - 1], &row[2 - 1]);
-    ecp_nistz256_point_double(&row[6 - 1], &row[3 - 1]);
-    ecp_nistz256_point_double(&row[8 - 1], &row[4 - 1]);
-    ecp_nistz256_point_double(&row[12 - 1], &row[6 - 1]);
-    ecp_nistz256_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]);
-    ecp_nistz256_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]);
-    ecp_nistz256_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]);
-    ecp_nistz256_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]);
-    ecp_nistz256_point_double(&row[14 - 1], &row[7 - 1]);
-    ecp_nistz256_point_double(&row[10 - 1], &row[5 - 1]);
-    ecp_nistz256_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]);
-    ecp_nistz256_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]);
-    ecp_nistz256_point_add(&row[16 - 1], &row[15 - 1], &row[1 - 1]);
+    if (!BN_nnmod(mod, p_scalar, &group->order, ctx)) {
+      OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
+      goto err;
+    }
+    p_scalar = mod;
   }
 
+  int j;
+  for (j = 0; j < p_scalar->top * BN_BYTES; j += BN_BYTES) {
+    BN_ULONG d = p_scalar->d[j / BN_BYTES];
+
+    p_str[j + 0] = d & 0xff;
+    p_str[j + 1] = (d >> 8) & 0xff;
+    p_str[j + 2] = (d >> 16) & 0xff;
+    p_str[j + 3] = (d >>= 24) & 0xff;
+    if (BN_BYTES == 8) {
+      d >>= 8;
+      p_str[j + 4] = d & 0xff;
+      p_str[j + 5] = (d >> 8) & 0xff;
+      p_str[j + 6] = (d >> 16) & 0xff;
+      p_str[j + 7] = (d >> 24) & 0xff;
+    }
+  }
+
+  for (; j < 33; j++) {
+    p_str[j] = 0;
+  }
+
+  /* table[0] is implicitly (0,0,0) (the point at infinity), therefore it is
+   * not stored. All other values are actually stored with an offset of -1 in
+   * table. */
+  P256_POINT *row = table;
+
+  if (!ecp_nistz256_bignum_to_field_elem(row[1 - 1].X, &p->X) ||
+      !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Y, &p->Y) ||
+      !ecp_nistz256_bignum_to_field_elem(row[1 - 1].Z, &p->Z)) {
+    OPENSSL_PUT_ERROR(EC, EC_R_COORDINATES_OUT_OF_RANGE);
+    goto err;
+  }
+
+  ecp_nistz256_point_double(&row[2 - 1], &row[1 - 1]);
+  ecp_nistz256_point_add(&row[3 - 1], &row[2 - 1], &row[1 - 1]);
+  ecp_nistz256_point_double(&row[4 - 1], &row[2 - 1]);
+  ecp_nistz256_point_double(&row[6 - 1], &row[3 - 1]);
+  ecp_nistz256_point_double(&row[8 - 1], &row[4 - 1]);
+  ecp_nistz256_point_double(&row[12 - 1], &row[6 - 1]);
+  ecp_nistz256_point_add(&row[5 - 1], &row[4 - 1], &row[1 - 1]);
+  ecp_nistz256_point_add(&row[7 - 1], &row[6 - 1], &row[1 - 1]);
+  ecp_nistz256_point_add(&row[9 - 1], &row[8 - 1], &row[1 - 1]);
+  ecp_nistz256_point_add(&row[13 - 1], &row[12 - 1], &row[1 - 1]);
+  ecp_nistz256_point_double(&row[14 - 1], &row[7 - 1]);
+  ecp_nistz256_point_double(&row[10 - 1], &row[5 - 1]);
+  ecp_nistz256_point_add(&row[15 - 1], &row[14 - 1], &row[1 - 1]);
+  ecp_nistz256_point_add(&row[11 - 1], &row[10 - 1], &row[1 - 1]);
+  ecp_nistz256_point_add(&row[16 - 1], &row[15 - 1], &row[1 - 1]);
+
   BN_ULONG tmp[P256_LIMBS];
-  ALIGN32 P256_POINT h;
+  ALIGN(32) P256_POINT h;
   unsigned index = 255;
-  unsigned wvalue = p_str[0][(index - 1) / 8];
+  unsigned wvalue = p_str[(index - 1) / 8];
   wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
 
-  ecp_nistz256_select_w5(r, table[0], booth_recode_w5(wvalue) >> 1);
+  ecp_nistz256_select_w5(r, table, booth_recode_w5(wvalue) >> 1);
 
   while (index >= 5) {
-    for (i = (index == 255 ? 1 : 0); i < num; i++) {
+    if (index != 255) {
       unsigned off = (index - 1) / 8;
 
-      wvalue = p_str[i][off] | p_str[i][off + 1] << 8;
+      wvalue = p_str[off] | p_str[off + 1] << 8;
       wvalue = (wvalue >> ((index - 1) % 8)) & kMask;
 
       wvalue = booth_recode_w5(wvalue);
 
-      ecp_nistz256_select_w5(&h, table[i], wvalue >> 1);
+      ecp_nistz256_select_w5(&h, table, wvalue >> 1);
 
       ecp_nistz256_neg(tmp, h.Y);
       copy_conditional(h.Y, tmp, (wvalue & 1));
@@ -395,26 +383,21 @@
   }
 
   /* Final window */
-  for (i = 0; i < num; i++) {
-    wvalue = p_str[i][0];
-    wvalue = (wvalue << 1) & kMask;
+  wvalue = p_str[0];
+  wvalue = (wvalue << 1) & kMask;
 
-    wvalue = booth_recode_w5(wvalue);
+  wvalue = booth_recode_w5(wvalue);
 
-    ecp_nistz256_select_w5(&h, table[i], wvalue >> 1);
+  ecp_nistz256_select_w5(&h, table, wvalue >> 1);
 
-    ecp_nistz256_neg(tmp, h.Y);
-    copy_conditional(h.Y, tmp, wvalue & 1);
+  ecp_nistz256_neg(tmp, h.Y);
+  copy_conditional(h.Y, tmp, wvalue & 1);
 
-    ecp_nistz256_point_add(r, r, &h);
-  }
+  ecp_nistz256_point_add(r, r, &h);
 
   ret = 1;
 
 err:
-  OPENSSL_free(table_storage);
-  OPENSSL_free(p_str);
-  OPENSSL_free((BIGNUM**) scalars);
   if (ctx_started) {
     BN_CTX_end(ctx);
   }
@@ -422,14 +405,15 @@
   return ret;
 }
 
-/* r = scalar*G + sum(scalars[i]*points[i]) */
 static int ecp_nistz256_points_mul(
-    const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar, size_t num,
-    const EC_POINT *points[], const BIGNUM *scalars[], BN_CTX *ctx) {
+    const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
+    const EC_POINT *p_, const BIGNUM *p_scalar, BN_CTX *ctx) {
+  assert((p_ != NULL) == (p_scalar != NULL));
+
   static const unsigned kWindowSize = 7;
   static const unsigned kMask = (1 << (7 /* kWindowSize */ + 1)) - 1;
 
-  ALIGN32 union {
+  ALIGN(32) union {
     P256_POINT p;
     P256_POINT_AFFINE a;
   } t, p;
@@ -438,10 +422,6 @@
   BN_CTX *new_ctx = NULL;
   int ctx_started = 0;
 
-  if (scalar == NULL && num == 0) {
-    return EC_POINT_set_to_infinity(group, r);
-  }
-
   /* Need 256 bits for space for all coordinates. */
   if (bn_wexpand(&r->X, P256_LIMBS) == NULL ||
       bn_wexpand(&r->Y, P256_LIMBS) == NULL ||
@@ -453,36 +433,33 @@
   r->Y.top = P256_LIMBS;
   r->Z.top = P256_LIMBS;
 
-  if (scalar) {
-    if (BN_num_bits(scalar) > 256 || BN_is_negative(scalar)) {
+  if (g_scalar != NULL) {
+    if (BN_num_bits(g_scalar) > 256 || BN_is_negative(g_scalar)) {
       if (ctx == NULL) {
         new_ctx = BN_CTX_new();
         if (new_ctx == NULL) {
-          OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
           goto err;
         }
         ctx = new_ctx;
       }
       BN_CTX_start(ctx);
       ctx_started = 1;
-
       BIGNUM *tmp_scalar = BN_CTX_get(ctx);
       if (tmp_scalar == NULL) {
-        OPENSSL_PUT_ERROR(EC, ERR_R_MALLOC_FAILURE);
         goto err;
       }
 
-      if (!BN_nnmod(tmp_scalar, scalar, &group->order, ctx)) {
+      if (!BN_nnmod(tmp_scalar, g_scalar, &group->order, ctx)) {
         OPENSSL_PUT_ERROR(EC, ERR_R_BN_LIB);
         goto err;
       }
-      scalar = tmp_scalar;
+      g_scalar = tmp_scalar;
     }
 
     uint8_t p_str[33] = {0};
     int i;
-    for (i = 0; i < scalar->top * BN_BYTES; i += BN_BYTES) {
-      BN_ULONG d = scalar->d[i / BN_BYTES];
+    for (i = 0; i < g_scalar->top * BN_BYTES; i += BN_BYTES) {
+      BN_ULONG d = g_scalar->d[i / BN_BYTES];
 
       p_str[i + 0] = d & 0xff;
       p_str[i + 1] = (d >> 8) & 0xff;
@@ -533,14 +510,14 @@
     }
   }
 
-  const int p_is_infinity = !scalar;
-  if (num) {
+  const int p_is_infinity = g_scalar == NULL;
+  if (p_scalar != NULL) {
     P256_POINT *out = &t.p;
     if (p_is_infinity) {
       out = &p.p;
     }
 
-    if (!ecp_nistz256_windowed_mul(group, out, scalars, points, num, ctx)) {
+    if (!ecp_nistz256_windowed_mul(group, out, p_, p_scalar, ctx)) {
       goto err;
     }
 
diff --git a/crypto/ec/wnaf.c b/crypto/ec/wnaf.c
index 298f02e..ba2257c 100644
--- a/crypto/ec/wnaf.c
+++ b/crypto/ec/wnaf.c
@@ -224,15 +224,8 @@
                                                                          ? 2   \
                                                                          : 1))
 
-/* Compute
- *      \sum scalars[i]*points[i],
- * also including
- *      scalar*generator
- * in the addition if scalar != NULL
- */
-int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *scalar,
-                size_t num, const EC_POINT *points[], const BIGNUM *scalars[],
-                BN_CTX *ctx) {
+int ec_wNAF_mul(const EC_GROUP *group, EC_POINT *r, const BIGNUM *g_scalar,
+                const EC_POINT *p, const BIGNUM *p_scalar, BN_CTX *ctx) {
   BN_CTX *new_ctx = NULL;
   const EC_POINT *generator = NULL;
   EC_POINT *tmp = NULL;
@@ -251,22 +244,6 @@
   EC_POINT ***val_sub = NULL; /* pointers to sub-arrays of 'val' */
   int ret = 0;
 
-  if (group->meth != r->meth) {
-    OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
-    return 0;
-  }
-
-  if ((scalar == NULL) && (num == 0)) {
-    return EC_POINT_set_to_infinity(group, r);
-  }
-
-  for (i = 0; i < num; i++) {
-    if (group->meth != points[i]->meth) {
-      OPENSSL_PUT_ERROR(EC, EC_R_INCOMPATIBLE_OBJECTS);
-      return 0;
-    }
-  }
-
   if (ctx == NULL) {
     ctx = new_ctx = BN_CTX_new();
     if (ctx == NULL) {
@@ -274,16 +251,23 @@
     }
   }
 
+  /* TODO: This function used to take |points| and |scalars| as arrays of
+   * |num| elements. The code below should be simplified to work in terms of |p|
+   * and |p_scalar|. */
+  size_t num = p != NULL ? 1 : 0;
+  const EC_POINT **points = p != NULL ? &p : NULL;
+  const BIGNUM **scalars = p != NULL ? &p_scalar : NULL;
+
   total_num = num;
 
-  if (scalar != NULL) {
+  if (g_scalar != NULL) {
     generator = EC_GROUP_get0_generator(group);
     if (generator == NULL) {
       OPENSSL_PUT_ERROR(EC, EC_R_UNDEFINED_GENERATOR);
       goto err;
     }
 
-    ++total_num; /* treat 'scalar' like 'num'-th element of 'scalars' */
+    ++total_num; /* treat 'g_scalar' like 'num'-th element of 'scalars' */
   }
 
 
@@ -309,12 +293,12 @@
   for (i = 0; i < total_num; i++) {
     size_t bits;
 
-    bits = i < num ? BN_num_bits(scalars[i]) : BN_num_bits(scalar);
+    bits = i < num ? BN_num_bits(scalars[i]) : BN_num_bits(g_scalar);
     wsize[i] = EC_window_bits_for_scalar_size(bits);
     num_val += (size_t)1 << (wsize[i] - 1);
     wNAF[i + 1] = NULL; /* make sure we always have a pivot */
     wNAF[i] =
-        compute_wNAF((i < num ? scalars[i] : scalar), wsize[i], &wNAF_len[i]);
+        compute_wNAF((i < num ? scalars[i] : g_scalar), wsize[i], &wNAF_len[i]);
     if (wNAF[i] == NULL) {
       goto err;
     }
diff --git a/include/openssl/ec.h b/include/openssl/ec.h
index 95f25f8..b0a84c6 100644
--- a/include/openssl/ec.h
+++ b/include/openssl/ec.h
@@ -269,13 +269,6 @@
                                 const BIGNUM *n, const EC_POINT *q,
                                 const BIGNUM *m, BN_CTX *ctx);
 
-/* EC_POINTs_mul sets r = generator*n + sum(p[i]*m[i]). It returns one on
- * success and zero otherwise. If |ctx| is not NULL, it may be used. */
-OPENSSL_EXPORT int EC_POINTs_mul(const EC_GROUP *group, EC_POINT *r,
-                                 const BIGNUM *n, size_t num,
-                                 const EC_POINT *p[], const BIGNUM *m[],
-                                 BN_CTX *ctx);
-
 
 /* Deprecated functions. */