Add a function for encoding SET OF.

The Chromium certificate verifier ends up encoding a SET OF when
canonicalizing X.509 names. Requiring the caller canonicalize a SET OF
is complicated enough that we should probably sort it for folks. (We
really need to get this name canonicalization insanity out of X.509...)

This would remove the extra level of indirection in Chromium
net/cert/internal/verify_name_match.cc CBB usage.

Note this is not quite the same order as SET, but SET is kind of
useless. Since it's encoding heterogeneous values, it is reasonable to
require the caller just encode them in the correct order. In fact, a DER
SET is just SEQUENCE with a post-processing step on the definition to
fix the ordering of the fields. (Unless the SET contains an untagged
CHOICE, in which case the ordering is weird, but SETs are not really
used in the real world, much less SETs with untagged CHOICEs.)

Bug: 11
Change-Id: I51e7938a81529243e7514360f867330359ae4f2c
Reviewed-on: https://boringssl-review.googlesource.com/24444
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/bytestring/bytestring_test.cc b/crypto/bytestring/bytestring_test.cc
index 1969e73..10eac69 100644
--- a/crypto/bytestring/bytestring_test.cc
+++ b/crypto/bytestring/bytestring_test.cc
@@ -981,3 +981,73 @@
     EXPECT_FALSE(text);
   }
 }
+
+TEST(CBBTest, FlushASN1SetOf) {
+  const struct {
+    std::vector<uint8_t> in, out;
+  } kValidInputs[] = {
+    // No elements.
+    {{}, {}},
+    // One element.
+    {{0x30, 0x00}, {0x30, 0x00}},
+    // Two identical elements.
+    {{0x30, 0x00, 0x30, 0x00}, {0x30, 0x00, 0x30, 0x00}},
+    // clang-format off
+    {{0x30, 0x02, 0x00, 0x00,
+      0x30, 0x00,
+      0x01, 0x00,
+      0x30, 0x02, 0x00, 0x00,
+      0x30, 0x03, 0x00, 0x00, 0x00,
+      0x30, 0x00,
+      0x30, 0x03, 0x00, 0x00, 0x01,
+      0x30, 0x01, 0x00,
+      0x01, 0x01, 0x00},
+     {0x01, 0x00,
+      0x01, 0x01, 0x00,
+      0x30, 0x00,
+      0x30, 0x00,
+      0x30, 0x01, 0x00,
+      0x30, 0x02, 0x00, 0x00,
+      0x30, 0x02, 0x00, 0x00,
+      0x30, 0x03, 0x00, 0x00, 0x00,
+      0x30, 0x03, 0x00, 0x00, 0x01}},
+    // clang-format on
+  };
+
+  for (const auto &t : kValidInputs) {
+    SCOPED_TRACE(Bytes(t.in));
+
+    bssl::ScopedCBB cbb;
+    CBB child;
+    ASSERT_TRUE(CBB_init(cbb.get(), 0));
+    ASSERT_TRUE(CBB_add_asn1(cbb.get(), &child, CBS_ASN1_SET));
+    ASSERT_TRUE(CBB_add_bytes(&child, t.in.data(), t.in.size()));
+    ASSERT_TRUE(CBB_flush_asn1_set_of(&child));
+    EXPECT_EQ(Bytes(t.out), Bytes(CBB_data(&child), CBB_len(&child)));
+
+    // Running it again should be idempotent.
+    ASSERT_TRUE(CBB_flush_asn1_set_of(&child));
+    EXPECT_EQ(Bytes(t.out), Bytes(CBB_data(&child), CBB_len(&child)));
+
+    // The ASN.1 header remain intact.
+    ASSERT_TRUE(CBB_flush(cbb.get()));
+    EXPECT_EQ(0x31, CBB_data(cbb.get())[0]);
+  }
+
+  const std::vector<uint8_t> kInvalidInputs[] = {
+    {0x30},
+    {0x30, 0x01},
+    {0x30, 0x00, 0x30, 0x00, 0x30, 0x01},
+  };
+
+  for (const auto &t : kInvalidInputs) {
+    SCOPED_TRACE(Bytes(t));
+
+    bssl::ScopedCBB cbb;
+    CBB child;
+    ASSERT_TRUE(CBB_init(cbb.get(), 0));
+    ASSERT_TRUE(CBB_add_asn1(cbb.get(), &child, CBS_ASN1_SET));
+    ASSERT_TRUE(CBB_add_bytes(&child, t.data(), t.size()));
+    EXPECT_FALSE(CBB_flush_asn1_set_of(&child));
+  }
+}
diff --git a/crypto/bytestring/cbb.c b/crypto/bytestring/cbb.c
index b12a66c..eac6aa4 100644
--- a/crypto/bytestring/cbb.c
+++ b/crypto/bytestring/cbb.c
@@ -18,6 +18,7 @@
 #include <limits.h>
 #include <string.h>
 
+#include <openssl/buf.h>
 #include <openssl/mem.h>
 
 #include "../internal.h"
@@ -569,3 +570,77 @@
 
   return 1;
 }
+
+static int compare_set_of_element(const void *a_ptr, const void *b_ptr) {
+  // See X.690, section 11.6 for the ordering. They are sorted in ascending
+  // order by their DER encoding.
+  const CBS *a = a_ptr, *b = b_ptr;
+  size_t a_len = CBS_len(a), b_len = CBS_len(b);
+  size_t min_len = a_len < b_len ? a_len : b_len;
+  int ret = OPENSSL_memcmp(CBS_data(a), CBS_data(b), min_len);
+  if (ret != 0) {
+    return ret;
+  }
+  if (a_len == b_len) {
+    return 0;
+  }
+  // If one is a prefix of the other, the shorter one sorts first. (This is not
+  // actually reachable. No DER encoding is a prefix of another DER encoding.)
+  return a_len < b_len ? -1 : 1;
+}
+
+int CBB_flush_asn1_set_of(CBB *cbb) {
+  if (!CBB_flush(cbb)) {
+    return 0;
+  }
+
+  CBS cbs;
+  size_t num_children = 0;
+  CBS_init(&cbs, CBB_data(cbb), CBB_len(cbb));
+  while (CBS_len(&cbs) != 0) {
+    if (!CBS_get_any_asn1_element(&cbs, NULL, NULL, NULL)) {
+      return 0;
+    }
+    num_children++;
+  }
+
+  if (num_children < 2) {
+    return 1;  // Nothing to do. This is the common case for X.509.
+  }
+  if (num_children > ((size_t)-1) / sizeof(CBS)) {
+    return 0;  // Overflow.
+  }
+
+  // Parse out the children and sort. We alias them into a copy of so they
+  // remain valid as we rewrite |cbb|.
+  int ret = 0;
+  size_t buf_len = CBB_len(cbb);
+  uint8_t *buf = BUF_memdup(CBB_data(cbb), buf_len);
+  CBS *children = OPENSSL_malloc(num_children * sizeof(CBS));
+  if (buf == NULL || children == NULL) {
+    goto err;
+  }
+  CBS_init(&cbs, buf, buf_len);
+  for (size_t i = 0; i < num_children; i++) {
+    if (!CBS_get_any_asn1_element(&cbs, &children[i], NULL, NULL)) {
+      goto err;
+    }
+  }
+  qsort(children, num_children, sizeof(CBS), compare_set_of_element);
+
+  // Rewind |cbb| and write the contents back in the new order.
+  cbb->base->len = cbb->offset + cbb->pending_len_len;
+  for (size_t i = 0; i < num_children; i++) {
+    if (!CBB_add_bytes(cbb, CBS_data(&children[i]), CBS_len(&children[i]))) {
+      goto err;
+    }
+  }
+  assert(CBB_len(cbb) == buf_len);
+
+  ret = 1;
+
+err:
+  OPENSSL_free(buf);
+  OPENSSL_free(children);
+  return ret;
+}
diff --git a/include/openssl/bytestring.h b/include/openssl/bytestring.h
index 43349e9..2f25f14 100644
--- a/include/openssl/bytestring.h
+++ b/include/openssl/bytestring.h
@@ -461,6 +461,15 @@
 OPENSSL_EXPORT int CBB_add_asn1_oid_from_text(CBB *cbb, const char *text,
                                               size_t len);
 
+// CBB_flush_asn1_set_of calls |CBB_flush| on |cbb| and then reorders the
+// contents for a DER-encoded ASN.1 SET OF type. It returns one on success and
+// zero on failure. DER canonicalizes SET OF contents by sorting
+// lexicographically by encoding. Call this function when encoding a SET OF
+// type in an order that is not already known to be canonical.
+//
+// Note a SET type has a slightly different ordering than a SET OF.
+OPENSSL_EXPORT int CBB_flush_asn1_set_of(CBB *cbb);
+
 
 #if defined(__cplusplus)
 }  // extern C