Introduce EVP_DecodeBase64.

This fixes several of the problems with the old API.
- Padding was completely ignored.
- ='s in the middle of the input were accepted.
- It tries to be helpful and strips leading/trailing whitespace.

Change-Id: I99b9d5e6583f7eaf9bf0b6ee9ca39799811b58dc
Reviewed-on: https://boringssl-review.googlesource.com/1602
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/base64/base64.c b/crypto/base64/base64.c
index 2336cf5..5fe23a5 100644
--- a/crypto/base64/base64.c
+++ b/crypto/base64/base64.c
@@ -64,8 +64,6 @@
     "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 
 #define conv_bin2ascii(a) (data_bin2ascii[(a) & 0x3f])
-/* TODO(davidben): This doesn't error on bytes above 127. */
-#define conv_ascii2bin(a) (data_ascii2bin[(a) & 0x7f])
 
 /* 64 char lines
  * pad input with 0
@@ -91,13 +89,13 @@
 #define B64_ERROR 0xFF
 #define B64_NOT_BASE64(a) (((a) | 0x13) == 0xF3)
 
-static const unsigned char data_ascii2bin[128] = {
+static const uint8_t data_ascii2bin[128] = {
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE0, 0xF0, 0xFF,
     0xFF, 0xF1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE0, 0xFF, 0xFF, 0xFF,
     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x3E, 0xFF, 0xF2, 0xFF, 0x3F,
     0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0xFF, 0xFF,
-    0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
+    0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
     0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12,
     0x13, 0x14, 0x15, 0x16, 0x17, 0x18, 0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
     0xFF, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24,
@@ -105,6 +103,13 @@
     0x31, 0x32, 0x33, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
 };
 
+static uint8_t conv_ascii2bin(uint8_t a) {
+  if (a >= 128) {
+    return 0xFF;
+  }
+  return data_ascii2bin[a];
+}
+
 void EVP_EncodeInit(EVP_ENCODE_CTX *ctx) {
   ctx->length = 48;
   ctx->num = 0;
@@ -200,6 +205,62 @@
   return ret;
 }
 
+int EVP_DecodedLength(size_t *out_len, size_t len) {
+  if (len % 4 != 0) {
+    return 0;
+  }
+  *out_len = (len / 4) * 3;
+  return 1;
+}
+
+int EVP_DecodeBase64(uint8_t *out, size_t *out_len, size_t max_out,
+                     const uint8_t *in, size_t in_len) {
+  uint8_t a, b, c, d;
+  size_t pad_len = 0, len = 0, max_len, i;
+  uint32_t l;
+
+  if (!EVP_DecodedLength(&max_len, in_len) || max_out < max_len) {
+    return 0;
+  }
+
+  for (i = 0; i < in_len; i += 4) {
+    a = conv_ascii2bin(*(in++));
+    b = conv_ascii2bin(*(in++));
+    if (i + 4 == in_len && in[1] == '=') {
+        if (in[0] == '=') {
+          pad_len = 2;
+        } else {
+          pad_len = 1;
+        }
+    }
+    if (pad_len < 2) {
+      c = conv_ascii2bin(*(in++));
+    } else {
+      c = 0;
+    }
+    if (pad_len < 1) {
+      d = conv_ascii2bin(*(in++));
+    } else {
+      d = 0;
+    }
+    if ((a & 0x80) || (b & 0x80) || (c & 0x80) || (d & 0x80)) {
+      return 0;
+    }
+    l = ((((uint32_t)a) << 18L) | (((uint32_t)b) << 12L) |
+         (((uint32_t)c) << 6L) | (((uint32_t)d)));
+    *(out++) = (uint8_t)(l >> 16L) & 0xff;
+    if (pad_len < 2) {
+      *(out++) = (uint8_t)(l >> 8L) & 0xff;
+    }
+    if (pad_len < 1) {
+      *(out++) = (uint8_t)(l) & 0xff;
+    }
+    len += 3 - pad_len;
+  }
+  *out_len = len;
+  return 1;
+}
+
 void EVP_DecodeInit(EVP_ENCODE_CTX *ctx) {
   ctx->length = 30;
   ctx->num = 0;
@@ -304,6 +365,7 @@
         exp_nl = 1;
       }
       if (n > 0) {
+        /* TODO(davidben): Switch this to EVP_DecodeBase64. */
         v = EVP_DecodeBlock(out, d, n);
         n = 0;
         if (v < 0) {
@@ -347,6 +409,7 @@
 
   *outl = 0;
   if (ctx->num != 0) {
+    /* TODO(davidben): Switch this to EVP_DecodeBase64. */
     i = EVP_DecodeBlock(out, ctx->enc_data, ctx->num);
     if (i < 0) {
       return -1;
@@ -360,9 +423,7 @@
 }
 
 int EVP_DecodeBlock(uint8_t *dst, const uint8_t *src, size_t src_len) {
-  int a, b, c, d;
-  uint32_t l;
-  size_t i, ret = 0;
+  size_t dst_len;
 
   /* trim white space from the start of the line. */
   while (conv_ascii2bin(*src) == B64_WS && src_len > 0) {
@@ -376,31 +437,21 @@
     src_len--;
   }
 
-  if (src_len % 4 != 0) {
+  if (!EVP_DecodedLength(&dst_len, src_len) || dst_len > INT_MAX) {
+    return -1;
+  }
+  if (!EVP_DecodeBase64(dst, &dst_len, dst_len, src, src_len)) {
     return -1;
   }
 
-  for (i = 0; i < src_len; i += 4) {
-    a = conv_ascii2bin(*(src++));
-    b = conv_ascii2bin(*(src++));
-    c = conv_ascii2bin(*(src++));
-    d = conv_ascii2bin(*(src++));
-    if ((a & 0x80) || (b & 0x80) || (c & 0x80) || (d & 0x80)) {
-      return -1;
-    }
-    l = ((((uint32_t)a) << 18L) | (((uint32_t)b) << 12L) |
-         (((uint32_t)c) << 6L) | (((uint32_t)d)));
-    *(dst++) = (uint8_t)(l >> 16L) & 0xff;
-    *(dst++) = (uint8_t)(l >> 8L) & 0xff;
-    *(dst++) = (uint8_t)(l) & 0xff;
-    ret += 3;
+  /* EVP_DecodeBlock does not take padding into account, so put the
+   * NULs back in... so the caller can strip them back out. */
+  while (dst_len % 3 != 0) {
+    dst[dst_len++] = '\0';
   }
+  assert(dst_len <= INT_MAX);
 
-  if (ret > INT_MAX) {
-    return -1;
-  }
-
-  return ret;
+  return dst_len;
 }
 
 int EVP_EncodedLength(size_t *out_len, size_t len) {
diff --git a/crypto/base64/base64_test.c b/crypto/base64/base64_test.c
index e8a8bd0..0bee55f 100644
--- a/crypto/base64/base64_test.c
+++ b/crypto/base64/base64_test.c
@@ -57,17 +57,17 @@
 
 static int test_decode(void) {
   uint8_t out[6];
-  size_t i;
-  ssize_t len;
+  size_t i, len;
+  int ret;
 
   for (i = 0; i < kNumTests; i++) {
+    /* Test the normal API. */
     const TEST_VECTOR *t = &test_vectors[i];
     size_t expected_len = strlen(t->decoded);
-    len = EVP_DecodeBlock(out, (const uint8_t*)t->encoded, strlen(t->encoded));
-    /* TODO(davidben): EVP_DecodeBlock doesn't take padding into account. Is
-     * this behavior we can change? */
-    if (expected_len % 3 != 0) {
-      len -= 3 - (expected_len % 3);
+    if (!EVP_DecodeBase64(out, &len, sizeof(out),
+                          (const uint8_t*)t->encoded, strlen(t->encoded))) {
+      fprintf(stderr, "decode(\"%s\") failed\n", t->encoded);
+      return 0;
     }
     if (len != strlen(t->decoded) ||
         memcmp(out, t->decoded, len) != 0) {
@@ -75,14 +75,40 @@
               t->encoded, (int)len, (const char*)out, t->decoded);
       return 0;
     }
+
+    /* Test that the padding behavior of the deprecated API is
+     * preserved. */
+    ret = EVP_DecodeBlock(out, (const uint8_t*)t->encoded, strlen(t->encoded));
+    if (ret < 0) {
+      fprintf(stderr, "decode(\"%s\") failed\n", t->encoded);
+      return 0;
+    }
+    if (ret % 3 != 0) {
+      fprintf(stderr, "EVP_DecodeBlock did not ignore padding\n");
+      return 0;
+    }
+    if (expected_len % 3 != 0) {
+      ret -= 3 - (expected_len % 3);
+    }
+    if (ret != strlen(t->decoded) ||
+        memcmp(out, t->decoded, ret) != 0) {
+      fprintf(stderr, "decode(\"%s\") = \"%.*s\", want \"%s\"\n",
+              t->encoded, ret, (const char*)out, t->decoded);
+      return 0;
+    }
   }
 
-  if (EVP_DecodeBlock(out, (const uint8_t*)"a!bc", 4) >= 0) {
+  if (EVP_DecodeBase64(out, &len, sizeof(out), (const uint8_t*)"a!bc", 4)) {
     fprintf(stderr, "Failed to reject invalid characters in the middle.\n");
     return 0;
   }
 
-  if (EVP_DecodeBlock(out, (const uint8_t*)"abc", 3) >= 0) {
+  if (EVP_DecodeBase64(out, &len, sizeof(out), (const uint8_t*)"a=bc", 4)) {
+    fprintf(stderr, "Failed to reject invalid characters in the middle.\n");
+    return 0;
+  }
+
+  if (EVP_DecodeBase64(out, &len, sizeof(out), (const uint8_t*)"abc", 4)) {
     fprintf(stderr, "Failed to reject invalid input length.\n");
     return 0;
   }
diff --git a/crypto/x509/x509spki.c b/crypto/x509/x509spki.c
index 0b0b4fa..03823b7 100644
--- a/crypto/x509/x509spki.c
+++ b/crypto/x509/x509spki.c
@@ -77,15 +77,19 @@
 {
 	unsigned char *spki_der;
 	const unsigned char *p;
-	int spki_len;
+	size_t spki_len;
 	NETSCAPE_SPKI *spki;
-	if(len <= 0) len = strlen(str);
-	if (!(spki_der = OPENSSL_malloc(len + 1))) {
+	if (len <= 0)
+		len = strlen(str);
+	if (!EVP_DecodedLength(&spki_len, len)) {
+		OPENSSL_PUT_ERROR(X509, NETSCAPE_SPKI_b64_decode, X509_R_BASE64_DECODE_ERROR);
+		return NULL;
+	}
+	if (!(spki_der = OPENSSL_malloc(spki_len))) {
 		OPENSSL_PUT_ERROR(X509, NETSCAPE_SPKI_b64_decode, ERR_R_MALLOC_FAILURE);
 		return NULL;
 	}
-	spki_len = EVP_DecodeBlock(spki_der, (const unsigned char *)str, len);
-	if(spki_len < 0) {
+	if (!EVP_DecodeBase64(spki_der, &spki_len, spki_len, (const uint8_t *)str, len)) {
 		OPENSSL_PUT_ERROR(X509, NETSCAPE_SPKI_b64_decode, X509_R_BASE64_DECODE_ERROR);
 		OPENSSL_free(spki_der);
 		return NULL;
diff --git a/include/openssl/base64.h b/include/openssl/base64.h
index de94d8e..44f91eb 100644
--- a/include/openssl/base64.h
+++ b/include/openssl/base64.h
@@ -111,6 +111,19 @@
 
 /* Decoding */
 
+/* EVP_DecodedLength sets |*out_len| to the maximum number of bytes
+ * that will be needed to call |EVP_DecodeBase64| on an input of
+ * length |len|. */
+OPENSSL_EXPORT int EVP_DecodedLength(size_t *out_len, size_t len);
+
+/* EVP_DecodeBase64 decodes |in_len| bytes from base64 and writes
+ * |*out_len| bytes to |out|. |max_out| is the size of the output
+ * buffer. If it is not enough for the maximum output size, the
+ * operation fails. */
+OPENSSL_EXPORT int EVP_DecodeBase64(uint8_t *out, size_t *out_len,
+                                    size_t max_out, const uint8_t *in,
+                                    size_t in_len);
+
 /* EVP_DecodeInit initialises |*ctx|, which is typically stack allocated, for
  * a decoding operation.
  *
@@ -135,11 +148,13 @@
 OPENSSL_EXPORT int EVP_DecodeFinal(EVP_ENCODE_CTX *ctx, uint8_t *out,
                                    int *out_len);
 
-/* EVP_DecodeBlock encodes |src_len| bytes from |src| and writes the result to
- * |dst|. It returns the number of bytes written or -1 on error.
+/* Deprecated: EVP_DecodeBlock encodes |src_len| bytes from |src| and
+ * writes the result to |dst|. It returns the number of bytes written
+ * or -1 on error.
  *
  * WARNING: EVP_DecodeBlock's return value does not take padding into
- * account. TODO(davidben): Possible or worth it to fix or add new API? */
+ * account. It also strips leading whitespace and trailing
+ * whitespace. */
 OPENSSL_EXPORT int EVP_DecodeBlock(uint8_t *dst, const uint8_t *src,
                                    size_t src_len);