Parse BER for PKCS#12 more accurately.

CBS_asn1_ber_to_der currently uses heuristics because implicitly-tagged
constructed strings in BER are ambiguous with implicitly-tagged sequences. It's
not possible to convert BER to DER without knowing the schema.

Fortunately, implicitly tagged strings don't appear often so instead split the
job up: CBS_asn1_ber_to_der fixes indefinite-length elements and constructed
strings it can see. Implicitly-tagged strings it leaves uncoverted, but they
will only nest one level down (because BER kindly allows one to nest
constructed strings arbitrarily!).

CBS_get_asn1_implicit_string then performs the final concatenation at parse
time. This isn't much more complex and lets us parse BER more accurately and
also reject a number of mis-encoded values (e.g. constructed INTEGERs are not a
thing) we'd previously let through. The downside is the post-conversion parsing
code must be aware of this limitation of CBS_asn1_ber_to_der. Fortunately,
there's only one implicitly-tagged string in our PKCS#12 code.

(In the category of things that really really don't matter, but I had spare
cycles and the old BER converter is weird.)

Change-Id: Iebdd13b08559fa158b308ef83a5bb07bfdf80ae8
Reviewed-on: https://boringssl-review.googlesource.com/7052
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/bytestring/ber.c b/crypto/bytestring/ber.c
index 6f7d107..2a968e1 100644
--- a/crypto/bytestring/ber.c
+++ b/crypto/bytestring/ber.c
@@ -14,6 +14,7 @@
 
 #include <openssl/bytestring.h>
 
+#include <assert.h>
 #include <string.h>
 
 #include "internal.h"
@@ -24,11 +25,36 @@
  * input could otherwise cause the stack to overflow. */
 static const unsigned kMaxDepth = 2048;
 
+/* is_string_type returns one if |tag| is a string type and zero otherwise. It
+ * ignores the constructed bit. */
+static int is_string_type(unsigned tag) {
+  if ((tag & 0xc0) != 0) {
+    return 0;
+  }
+  switch (tag & 0x1f) {
+    case CBS_ASN1_BITSTRING:
+    case CBS_ASN1_OCTETSTRING:
+    case CBS_ASN1_NUMERICSTRING:
+    case CBS_ASN1_PRINTABLESTRING:
+    case CBS_ASN1_T16STRING:
+    case CBS_ASN1_VIDEOTEXSTRING:
+    case CBS_ASN1_IA5STRING:
+    case CBS_ASN1_GRAPHICSTRING:
+    case CBS_ASN1_VISIBLESTRING:
+    case CBS_ASN1_GENERALSTRING:
+    case CBS_ASN1_UNIVERSALSTRING:
+    case CBS_ASN1_BMPSTRING:
+      return 1;
+    default:
+      return 0;
+  }
+}
+
 /* cbs_find_ber walks an ASN.1 structure in |orig_in| and sets |*ber_found|
- * depending on whether an indefinite length element was found. The value of
- * |in| is not changed. It returns one on success (i.e. |*ber_found| was set)
- * and zero on error. */
-static int cbs_find_ber(CBS *orig_in, char *ber_found, unsigned depth) {
+ * depending on whether an indefinite length element or constructed string was
+ * found. The value of |orig_in| is not changed. It returns one on success (i.e.
+ * |*ber_found| was set) and zero on error. */
+static int cbs_find_ber(const CBS *orig_in, char *ber_found, unsigned depth) {
   CBS in;
 
   if (depth > kMaxDepth) {
@@ -49,10 +75,16 @@
     if (CBS_len(&contents) == header_len &&
         header_len > 0 &&
         CBS_data(&contents)[header_len-1] == 0x80) {
+      /* Found an indefinite-length element. */
       *ber_found = 1;
       return 1;
     }
     if (tag & CBS_ASN1_CONSTRUCTED) {
+      if (is_string_type(tag)) {
+        /* Constructed strings are only legal in BER and require conversion. */
+        *ber_found = 1;
+        return 1;
+      }
       if (!CBS_skip(&contents, header_len) ||
           !cbs_find_ber(&contents, ber_found, depth + 1)) {
         return 0;
@@ -63,16 +95,6 @@
   return 1;
 }
 
-/* is_primitive_type returns true if |tag| likely a primitive type. Normally
- * one can just test the "constructed" bit in the tag but, in BER, even
- * primitive tags can have the constructed bit if they have indefinite
- * length. */
-static char is_primitive_type(unsigned tag) {
-  return (tag & 0xc0) == 0 &&
-         (tag & 0x1f) != (CBS_ASN1_SEQUENCE & 0x1f) &&
-         (tag & 0x1f) != (CBS_ASN1_SET & 0x1f);
-}
-
 /* is_eoc returns true if |header_len| and |contents|, as returned by
  * |CBS_get_any_ber_asn1_element|, indicate an "end of contents" (EOC) value. */
 static char is_eoc(size_t header_len, CBS *contents) {
@@ -81,111 +103,86 @@
 }
 
 /* cbs_convert_ber reads BER data from |in| and writes DER data to |out|. If
- * |squash_header| is set then the top-level of elements from |in| will not
- * have their headers written. This is used when concatenating the fragments of
- * an indefinite length, primitive value. If |looking_for_eoc| is set then any
- * EOC elements found will cause the function to return after consuming it.
- * It returns one on success and zero on error. */
-static int cbs_convert_ber(CBS *in, CBB *out, char squash_header,
+ * |string_tag| is non-zero, then all elements must match |string_tag| up to the
+ * constructed bit and primitive element bodies are written to |out| without
+ * element headers. This is used when concatenating the fragments of a
+ * constructed string. If |looking_for_eoc| is set then any EOC elements found
+ * will cause the function to return after consuming it. It returns one on
+ * success and zero on error. */
+static int cbs_convert_ber(CBS *in, CBB *out, unsigned string_tag,
                            char looking_for_eoc, unsigned depth) {
+  assert(!(string_tag & CBS_ASN1_CONSTRUCTED));
+
   if (depth > kMaxDepth) {
     return 0;
   }
 
   while (CBS_len(in) > 0) {
     CBS contents;
-    unsigned tag;
+    unsigned tag, child_string_tag = string_tag;
     size_t header_len;
     CBB *out_contents, out_contents_storage;
 
     if (!CBS_get_any_ber_asn1_element(in, &contents, &tag, &header_len)) {
       return 0;
     }
-    out_contents = out;
 
-    if (CBS_len(&contents) == header_len) {
-      if (is_eoc(header_len, &contents)) {
-        return looking_for_eoc;
-      }
-
-      if (header_len > 0 && CBS_data(&contents)[header_len - 1] == 0x80) {
-        /* This is an indefinite length element. If it's a SEQUENCE or SET then
-         * we just need to write the out the contents as normal, but with a
-         * concrete length prefix.
-         *
-         * If it's a something else then the contents will be a series of BER
-         * elements of the same type which need to be concatenated. */
-        const char context_specific = (tag & 0xc0) == 0x80;
-        char squash_child_headers = is_primitive_type(tag);
-
-        /* This is a hack, but it sufficies to handle NSS's output. If we find
-         * an indefinite length, context-specific tag with a definite, primitive
-         * tag inside it, then we assume that the context-specific tag is
-         * implicit and the tags within are fragments of a primitive type that
-         * need to be concatenated. */
-        if (context_specific && (tag & CBS_ASN1_CONSTRUCTED)) {
-          CBS in_copy, inner_contents;
-          unsigned inner_tag;
-          size_t inner_header_len;
-
-          CBS_init(&in_copy, CBS_data(in), CBS_len(in));
-          if (!CBS_get_any_ber_asn1_element(&in_copy, &inner_contents,
-                                            &inner_tag, &inner_header_len)) {
-            return 0;
-          }
-          if (CBS_len(&inner_contents) > inner_header_len &&
-              is_primitive_type(inner_tag)) {
-            squash_child_headers = 1;
-          }
-        }
-
-        if (!squash_header) {
-          unsigned out_tag = tag;
-          if (squash_child_headers) {
-            out_tag &= ~CBS_ASN1_CONSTRUCTED;
-          }
-          if (!CBB_add_asn1(out, &out_contents_storage, out_tag)) {
-            return 0;
-          }
-          out_contents = &out_contents_storage;
-        }
-
-        if (!cbs_convert_ber(in, out_contents,
-                             squash_child_headers,
-                             1 /* looking for eoc */, depth + 1)) {
-          return 0;
-        }
-        if (out_contents != out && !CBB_flush(out)) {
-          return 0;
-        }
-        continue;
-      }
+    if (is_eoc(header_len, &contents)) {
+      return looking_for_eoc;
     }
 
-    if (!squash_header) {
-      if (!CBB_add_asn1(out, &out_contents_storage, tag)) {
+    if (string_tag != 0) {
+      /* This is part of a constructed string. All elements must match
+       * |string_tag| up to the constructed bit and get appended to |out|
+       * without a child element. */
+      if ((tag & ~CBS_ASN1_CONSTRUCTED) != string_tag) {
+        return 0;
+      }
+      out_contents = out;
+    } else {
+      unsigned out_tag = tag;
+      if ((tag & CBS_ASN1_CONSTRUCTED) && is_string_type(tag)) {
+        /* If a constructed string, clear the constructed bit and inform
+         * children to concatenate bodies. */
+        out_tag &= ~CBS_ASN1_CONSTRUCTED;
+        child_string_tag = out_tag;
+      }
+      if (!CBB_add_asn1(out, &out_contents_storage, out_tag)) {
         return 0;
       }
       out_contents = &out_contents_storage;
     }
 
+    if (CBS_len(&contents) == header_len && header_len > 0 &&
+        CBS_data(&contents)[header_len - 1] == 0x80) {
+      /* This is an indefinite length element. */
+      if (!cbs_convert_ber(in, out_contents, child_string_tag,
+                           1 /* looking for eoc */, depth + 1) ||
+          !CBB_flush(out)) {
+        return 0;
+      }
+      continue;
+    }
+
     if (!CBS_skip(&contents, header_len)) {
       return 0;
     }
 
     if (tag & CBS_ASN1_CONSTRUCTED) {
-      if (!cbs_convert_ber(&contents, out_contents, 0 /* don't squash header */,
+      /* Recurse into children. */
+      if (!cbs_convert_ber(&contents, out_contents, child_string_tag,
                            0 /* not looking for eoc */, depth + 1)) {
         return 0;
       }
     } else {
+      /* Copy primitive contents as-is. */
       if (!CBB_add_bytes(out_contents, CBS_data(&contents),
                          CBS_len(&contents))) {
         return 0;
       }
     }
 
-    if (out_contents != out && !CBB_flush(out)) {
+    if (!CBB_flush(out)) {
       return 0;
     }
   }
@@ -218,3 +215,48 @@
 
   return 1;
 }
+
+int CBS_get_asn1_implicit_string(CBS *in, CBS *out, uint8_t **out_storage,
+                                 unsigned outer_tag, unsigned inner_tag) {
+  assert(!(outer_tag & CBS_ASN1_CONSTRUCTED));
+  assert(!(inner_tag & CBS_ASN1_CONSTRUCTED));
+  assert(is_string_type(inner_tag));
+
+  if (CBS_peek_asn1_tag(in, outer_tag)) {
+    /* Normal implicitly-tagged string. */
+    *out_storage = NULL;
+    return CBS_get_asn1(in, out, outer_tag);
+  }
+
+  /* Otherwise, try to parse an implicitly-tagged constructed string.
+   * |CBS_asn1_ber_to_der| is assumed to have run, so only allow one level deep
+   * of nesting. */
+  CBB result;
+  CBS child;
+  if (!CBB_init(&result, CBS_len(in)) ||
+      !CBS_get_asn1(in, &child, outer_tag | CBS_ASN1_CONSTRUCTED)) {
+    goto err;
+  }
+
+  while (CBS_len(&child) > 0) {
+    CBS chunk;
+    if (!CBS_get_asn1(&child, &chunk, inner_tag) ||
+        !CBB_add_bytes(&result, CBS_data(&chunk), CBS_len(&chunk))) {
+      goto err;
+    }
+  }
+
+  uint8_t *data;
+  size_t len;
+  if (!CBB_finish(&result, &data, &len)) {
+    goto err;
+  }
+
+  CBS_init(out, data, len);
+  *out_storage = data;
+  return 1;
+
+err:
+  CBB_cleanup(&result);
+  return 0;
+}
diff --git a/crypto/bytestring/bytestring_test.cc b/crypto/bytestring/bytestring_test.cc
index 188c63d..84ecffc 100644
--- a/crypto/bytestring/bytestring_test.cc
+++ b/crypto/bytestring/bytestring_test.cc
@@ -579,7 +579,7 @@
   static const uint8_t kIndefBER[] = {0x30, 0x80, 0x01, 0x01, 0x02, 0x00, 0x00};
   static const uint8_t kIndefDER[] = {0x30, 0x03, 0x01, 0x01, 0x02};
 
-  // kOctetStringBER contains an indefinite length OCTETSTRING with two parts.
+  // kOctetStringBER contains an indefinite length OCTET STRING with two parts.
   // These parts need to be concatenated in DER form.
   static const uint8_t kOctetStringBER[] = {0x24, 0x80, 0x04, 0x02, 0,    1,
                                             0x04, 0x02, 2,    3,    0x00, 0x00};
@@ -609,6 +609,16 @@
       0x6e, 0x10, 0x9b, 0xb8, 0x02, 0x02, 0x07, 0xd0,
   };
 
+  // kConstructedStringBER contains a deeply-nested constructed OCTET STRING.
+  // The BER conversion collapses this to one level deep, but not completely.
+  static const uint8_t kConstructedStringBER[] = {
+      0xa0, 0x10, 0x24, 0x06, 0x04, 0x01, 0x00, 0x04, 0x01,
+      0x01, 0x24, 0x06, 0x04, 0x01, 0x02, 0x04, 0x01, 0x03,
+  };
+  static const uint8_t kConstructedStringDER[] = {
+      0xa0, 0x08, 0x04, 0x02, 0x00, 0x01, 0x04, 0x02, 0x02, 0x03,
+  };
+
   return DoBerConvert("kSimpleBER", kSimpleBER, sizeof(kSimpleBER),
                       kSimpleBER, sizeof(kSimpleBER)) &&
          DoBerConvert("kIndefBER", kIndefDER, sizeof(kIndefDER), kIndefBER,
@@ -617,7 +627,59 @@
                       sizeof(kOctetStringDER), kOctetStringBER,
                       sizeof(kOctetStringBER)) &&
          DoBerConvert("kNSSBER", kNSSDER, sizeof(kNSSDER), kNSSBER,
-                      sizeof(kNSSBER));
+                      sizeof(kNSSBER)) &&
+         DoBerConvert("kConstructedStringBER", kConstructedStringDER,
+                      sizeof(kConstructedStringDER), kConstructedStringBER,
+                      sizeof(kConstructedStringBER));
+}
+
+struct ImplicitStringTest {
+  const char *in;
+  size_t in_len;
+  bool ok;
+  const char *out;
+  size_t out_len;
+};
+
+static const ImplicitStringTest kImplicitStringTests[] = {
+    // A properly-encoded string.
+    {"\x80\x03\x61\x61\x61", 5, true, "aaa", 3},
+    // An implicit-tagged string.
+    {"\xa0\x09\x04\x01\x61\x04\x01\x61\x04\x01\x61", 11, true, "aaa", 3},
+    // |CBS_get_asn1_implicit_string| only accepts one level deep of nesting.
+    {"\xa0\x0b\x24\x06\x04\x01\x61\x04\x01\x61\x04\x01\x61", 13, false, nullptr,
+     0},
+    // The outer tag must match.
+    {"\x81\x03\x61\x61\x61", 5, false, nullptr, 0},
+    {"\xa1\x09\x04\x01\x61\x04\x01\x61\x04\x01\x61", 11, false, nullptr, 0},
+    // The inner tag must match.
+    {"\xa1\x09\x0c\x01\x61\x0c\x01\x61\x0c\x01\x61", 11, false, nullptr, 0},
+};
+
+static bool TestImplicitString() {
+  for (const auto &test : kImplicitStringTests) {
+    uint8_t *storage = nullptr;
+    CBS in, out;
+    CBS_init(&in, reinterpret_cast<const uint8_t *>(test.in), test.in_len);
+    int ok = CBS_get_asn1_implicit_string(&in, &out, &storage,
+                                          CBS_ASN1_CONTEXT_SPECIFIC | 0,
+                                          CBS_ASN1_OCTETSTRING);
+    ScopedOpenSSLBytes scoper(storage);
+
+    if (static_cast<bool>(ok) != test.ok) {
+      fprintf(stderr, "CBS_get_asn1_implicit_string unexpectedly %s\n",
+              ok ? "succeeded" : "failed");
+      return false;
+    }
+
+    if (ok && (CBS_len(&out) != test.out_len ||
+               memcmp(CBS_data(&out), test.out, test.out_len) != 0)) {
+      fprintf(stderr, "CBS_get_asn1_implicit_string gave the wrong output\n");
+      return false;
+    }
+  }
+
+  return true;
 }
 
 struct ASN1Uint64Test {
@@ -747,6 +809,7 @@
       !TestCBBDiscardChild() ||
       !TestCBBASN1() ||
       !TestBerConvert() ||
+      !TestImplicitString() ||
       !TestASN1Uint64() ||
       !TestGetOptionalASN1Bool() ||
       !TestZero() ||
diff --git a/crypto/bytestring/internal.h b/crypto/bytestring/internal.h
index b4ea7e5..f4f4b9c 100644
--- a/crypto/bytestring/internal.h
+++ b/crypto/bytestring/internal.h
@@ -22,22 +22,42 @@
 #endif
 
 
-/* CBS_asn1_ber_to_der reads an ASN.1 structure from |in|. If it finds
- * indefinite-length elements then it attempts to convert the BER data to DER
- * and sets |*out| and |*out_length| to describe a malloced buffer containing
- * the DER data. Additionally, |*in| will be advanced over the ASN.1 data.
+/* CBS_asn1_ber_to_der reads a BER element from |in|. If it finds
+ * indefinite-length elements or constructed strings then it converts the BER
+ * data to DER and sets |*out| and |*out_length| to describe a malloced buffer
+ * containing the DER data. Additionally, |*in| will be advanced over the BER
+ * element.
  *
- * If it doesn't find any indefinite-length elements then it sets |*out| to
- * NULL and |*in| is unmodified.
+ * If it doesn't find any indefinite-length elements or constructed strings then
+ * it sets |*out| to NULL and |*in| is unmodified.
  *
- * A sufficiently complex ASN.1 structure will break this function because it's
- * not possible to generically convert BER to DER without knowledge of the
- * structure itself. However, this sufficies to handle the PKCS#7 and #12 output
- * from NSS.
+ * This function should successfully process any valid BER input, however it
+ * will not convert all of BER's deviations from DER. BER is ambiguous between
+ * implicitly-tagged SEQUENCEs of strings and implicitly-tagged constructed
+ * strings. Implicitly-tagged strings must be parsed with
+ * |CBS_get_ber_implicitly_tagged_string| instead of |CBS_get_asn1|. The caller
+ * must also account for BER variations in the contents of a primitive.
  *
  * It returns one on success and zero otherwise. */
 OPENSSL_EXPORT int CBS_asn1_ber_to_der(CBS *in, uint8_t **out, size_t *out_len);
 
+/* CBS_get_asn1_implicit_string parses a BER string of primitive type
+ * |inner_tag| implicitly-tagged with |outer_tag|. It sets |out| to the
+ * contents. If concatenation was needed, it sets |*out_storage| to a buffer
+ * which the caller must release with |OPENSSL_free|. Otherwise, it sets
+ * |*out_storage| to NULL.
+ *
+ * This function does not parse all of BER. It requires the string be
+ * definite-length. Constructed strings are allowed, but all children of the
+ * outermost element must be primitive. The caller should use
+ * |CBS_asn1_ber_to_der| before running this function.
+ *
+ * It returns one on success and zero otherwise. */
+OPENSSL_EXPORT int CBS_get_asn1_implicit_string(CBS *in, CBS *out,
+                                                uint8_t **out_storage,
+                                                unsigned outer_tag,
+                                                unsigned inner_tag);
+
 
 #if defined(__cplusplus)
 }  /* extern C */
diff --git a/crypto/pkcs8/pkcs8.c b/crypto/pkcs8/pkcs8.c
index ac13faf..fdce544 100644
--- a/crypto/pkcs8/pkcs8.c
+++ b/crypto/pkcs8/pkcs8.c
@@ -737,6 +737,7 @@
                                       struct pkcs12_context *ctx) {
   CBS content_type, wrapped_contents, contents, content_infos;
   int nid, ret = 0;
+  uint8_t *storage = NULL;
 
   if (!CBS_get_asn1(content_info, &content_type, CBS_ASN1_OBJECT) ||
       !CBS_get_asn1(content_info, &wrapped_contents,
@@ -767,8 +768,9 @@
         /* AlgorithmIdentifier, see
          * https://tools.ietf.org/html/rfc5280#section-4.1.1.2 */
         !CBS_get_asn1_element(&eci, &ai, CBS_ASN1_SEQUENCE) ||
-        !CBS_get_asn1(&eci, &encrypted_contents,
-                      CBS_ASN1_CONTEXT_SPECIFIC | 0)) {
+        !CBS_get_asn1_implicit_string(
+            &eci, &encrypted_contents, &storage,
+            CBS_ASN1_CONTEXT_SPECIFIC | 0, CBS_ASN1_OCTETSTRING)) {
       OPENSSL_PUT_ERROR(PKCS8, PKCS8_R_BAD_PKCS12_DATA);
       goto err;
     }
@@ -895,6 +897,7 @@
   }
 
 err:
+  OPENSSL_free(storage);
   return ret;
 }
 
diff --git a/include/openssl/bytestring.h b/include/openssl/bytestring.h
index 9193e11..cf424d0 100644
--- a/include/openssl/bytestring.h
+++ b/include/openssl/bytestring.h
@@ -130,7 +130,18 @@
 #define CBS_ASN1_ENUMERATED 0xa
 #define CBS_ASN1_SEQUENCE (0x10 | CBS_ASN1_CONSTRUCTED)
 #define CBS_ASN1_SET (0x11 | CBS_ASN1_CONSTRUCTED)
+#define CBS_ASN1_NUMERICSTRING 0x12
+#define CBS_ASN1_PRINTABLESTRING 0x13
+#define CBS_ASN1_T16STRING 0x14
+#define CBS_ASN1_VIDEOTEXSTRING 0x15
+#define CBS_ASN1_IA5STRING 0x16
+#define CBS_ASN1_UTCTIME 0x17
 #define CBS_ASN1_GENERALIZEDTIME 0x18
+#define CBS_ASN1_GRAPHICSTRING 0x19
+#define CBS_ASN1_VISIBLESTRING 0x1a
+#define CBS_ASN1_GENERALSTRING 0x1b
+#define CBS_ASN1_UNIVERSALSTRING 0x1c
+#define CBS_ASN1_BMPSTRING 0x1e
 
 #define CBS_ASN1_CONSTRUCTED 0x20
 #define CBS_ASN1_CONTEXT_SPECIFIC 0x80