Add a test for ASN1_mbstring_copy and clean up.
In writing the tests, I noticed that the documentation was wrong. First,
the maximum lengths are measured in codepoints, not bytes.
Second, the TODO was wrong. We actually do handle this correctly,
*almost*. Rather, the bug is that the function assumes |mask| contains
no extraneous bits. If it does, all extraneous bits are interpreted as
B_ASN1_UTF8STRING. This seems like a bug, so I've gone ahead and fixed
that, with a test.
Change-Id: I7ba8fa700a8e21e6d25cb7ce879dace685eecf7e
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/48825
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/asn1/a_mbstr.c b/crypto/asn1/a_mbstr.c
index 953095c..35c130c 100644
--- a/crypto/asn1/a_mbstr.c
+++ b/crypto/asn1/a_mbstr.c
@@ -208,11 +208,14 @@
encode_func = cbb_add_utf32_be;
size_estimate = 4 * nchar;
outform = MBSTRING_UNIV;
- } else {
+ } else if (mask & B_ASN1_UTF8STRING) {
str_type = V_ASN1_UTF8STRING;
outform = MBSTRING_UTF8;
encode_func = cbb_add_utf8;
size_estimate = utf8_len;
+ } else {
+ OPENSSL_PUT_ERROR(ASN1, ASN1_R_ILLEGAL_CHARACTERS);
+ return -1;
}
if (!out)
diff --git a/crypto/asn1/asn1_test.cc b/crypto/asn1/asn1_test.cc
index 76f87bf..6fa3f95 100644
--- a/crypto/asn1/asn1_test.cc
+++ b/crypto/asn1/asn1_test.cc
@@ -817,6 +817,212 @@
}
}
+TEST(ASN1Test, MBString) {
+ const unsigned long kAll = B_ASN1_PRINTABLESTRING | B_ASN1_IA5STRING |
+ B_ASN1_T61STRING | B_ASN1_BMPSTRING |
+ B_ASN1_UNIVERSALSTRING | B_ASN1_UTF8STRING;
+
+ const struct {
+ int format;
+ std::vector<uint8_t> in;
+ unsigned long mask;
+ int expected_type;
+ std::vector<uint8_t> expected_data;
+ int num_codepoints;
+ } kTests[] = {
+ // Given a choice of formats, we pick the smallest that fits.
+ {MBSTRING_UTF8, {}, kAll, V_ASN1_PRINTABLESTRING, {}, 0},
+ {MBSTRING_UTF8, {'a'}, kAll, V_ASN1_PRINTABLESTRING, {'a'}, 1},
+ {MBSTRING_UTF8, {'\n'}, kAll, V_ASN1_IA5STRING, {'\n'}, 1},
+ {MBSTRING_UTF8,
+ {0xc2, 0x80 /* U+0080 */},
+ kAll,
+ V_ASN1_T61STRING,
+ {0x80},
+ 1},
+ {MBSTRING_UTF8,
+ {0xc4, 0x80 /* U+0100 */},
+ kAll,
+ V_ASN1_BMPSTRING,
+ {0x01, 0x00},
+ 1},
+ {MBSTRING_UTF8,
+ {0xf0, 0x90, 0x80, 0x80 /* U+10000 */},
+ kAll,
+ V_ASN1_UNIVERSALSTRING,
+ {0x00, 0x01, 0x00, 0x00},
+ 1},
+ {MBSTRING_UTF8,
+ {0xf0, 0x90, 0x80, 0x80 /* U+10000 */},
+ kAll & ~B_ASN1_UNIVERSALSTRING,
+ V_ASN1_UTF8STRING,
+ {0xf0, 0x90, 0x80, 0x80},
+ 1},
+
+ // When a particular format is specified, we use it.
+ {MBSTRING_UTF8,
+ {'a'},
+ B_ASN1_PRINTABLESTRING,
+ V_ASN1_PRINTABLESTRING,
+ {'a'},
+ 1},
+ {MBSTRING_UTF8, {'a'}, B_ASN1_IA5STRING, V_ASN1_IA5STRING, {'a'}, 1},
+ {MBSTRING_UTF8, {'a'}, B_ASN1_T61STRING, V_ASN1_T61STRING, {'a'}, 1},
+ {MBSTRING_UTF8, {'a'}, B_ASN1_UTF8STRING, V_ASN1_UTF8STRING, {'a'}, 1},
+ {MBSTRING_UTF8,
+ {'a'},
+ B_ASN1_BMPSTRING,
+ V_ASN1_BMPSTRING,
+ {0x00, 'a'},
+ 1},
+ {MBSTRING_UTF8,
+ {'a'},
+ B_ASN1_UNIVERSALSTRING,
+ V_ASN1_UNIVERSALSTRING,
+ {0x00, 0x00, 0x00, 'a'},
+ 1},
+
+ // A long string with characters of many widths, to test sizes are
+ // measured in code points.
+ {MBSTRING_UTF8,
+ {
+ 'a', //
+ 0xc2, 0x80, // U+0080
+ 0xc4, 0x80, // U+0100
+ 0xf0, 0x90, 0x80, 0x80, // U+10000
+ },
+ B_ASN1_UNIVERSALSTRING,
+ V_ASN1_UNIVERSALSTRING,
+ {
+ 0x00, 0x00, 0x00, 'a', //
+ 0x00, 0x00, 0x00, 0x80, //
+ 0x00, 0x00, 0x01, 0x00, //
+ 0x00, 0x01, 0x00, 0x00, //
+ },
+ 4},
+ };
+ for (const auto &t : kTests) {
+ SCOPED_TRACE(t.format);
+ SCOPED_TRACE(Bytes(t.in));
+ SCOPED_TRACE(t.mask);
+
+ // Passing in nullptr should do a dry run.
+ EXPECT_EQ(t.expected_type,
+ ASN1_mbstring_copy(nullptr, t.in.data(), t.in.size(), t.format,
+ t.mask));
+
+ // Test allocating a new object.
+ ASN1_STRING *str = nullptr;
+ EXPECT_EQ(
+ t.expected_type,
+ ASN1_mbstring_copy(&str, t.in.data(), t.in.size(), t.format, t.mask));
+ ASSERT_TRUE(str);
+ EXPECT_EQ(t.expected_type, ASN1_STRING_type(str));
+ EXPECT_EQ(Bytes(t.expected_data),
+ Bytes(ASN1_STRING_get0_data(str), ASN1_STRING_length(str)));
+
+ // Test writing into an existing object.
+ ASN1_STRING_free(str);
+ str = ASN1_STRING_new();
+ ASSERT_TRUE(str);
+ ASN1_STRING *old_str = str;
+ EXPECT_EQ(
+ t.expected_type,
+ ASN1_mbstring_copy(&str, t.in.data(), t.in.size(), t.format, t.mask));
+ ASSERT_EQ(old_str, str);
+ EXPECT_EQ(t.expected_type, ASN1_STRING_type(str));
+ EXPECT_EQ(Bytes(t.expected_data),
+ Bytes(ASN1_STRING_get0_data(str), ASN1_STRING_length(str)));
+ ASN1_STRING_free(str);
+ str = nullptr;
+
+ // minsize and maxsize should be enforced, even in a dry run.
+ EXPECT_EQ(t.expected_type,
+ ASN1_mbstring_ncopy(nullptr, t.in.data(), t.in.size(), t.format,
+ t.mask, /*minsize=*/t.num_codepoints,
+ /*maxsize=*/t.num_codepoints));
+
+ EXPECT_EQ(t.expected_type,
+ ASN1_mbstring_ncopy(&str, t.in.data(), t.in.size(), t.format,
+ t.mask, /*minsize=*/t.num_codepoints,
+ /*maxsize=*/t.num_codepoints));
+ ASSERT_TRUE(str);
+ EXPECT_EQ(t.expected_type, ASN1_STRING_type(str));
+ EXPECT_EQ(Bytes(t.expected_data),
+ Bytes(ASN1_STRING_get0_data(str), ASN1_STRING_length(str)));
+ ASN1_STRING_free(str);
+ str = nullptr;
+
+ EXPECT_EQ(-1, ASN1_mbstring_ncopy(
+ nullptr, t.in.data(), t.in.size(), t.format, t.mask,
+ /*minsize=*/t.num_codepoints + 1, /*maxsize=*/0));
+ ERR_clear_error();
+ EXPECT_EQ(-1, ASN1_mbstring_ncopy(
+ &str, t.in.data(), t.in.size(), t.format, t.mask,
+ /*minsize=*/t.num_codepoints + 1, /*maxsize=*/0));
+ EXPECT_FALSE(str);
+ ERR_clear_error();
+ if (t.num_codepoints > 1) {
+ EXPECT_EQ(-1, ASN1_mbstring_ncopy(
+ nullptr, t.in.data(), t.in.size(), t.format, t.mask,
+ /*minsize=*/0, /*maxsize=*/t.num_codepoints - 1));
+ ERR_clear_error();
+ EXPECT_EQ(-1, ASN1_mbstring_ncopy(
+ &str, t.in.data(), t.in.size(), t.format, t.mask,
+ /*minsize=*/0, /*maxsize=*/t.num_codepoints - 1));
+ EXPECT_FALSE(str);
+ ERR_clear_error();
+ }
+ }
+
+ const struct {
+ int format;
+ std::vector<uint8_t> in;
+ unsigned long mask;
+ } kInvalidTests[] = {
+ // Invalid encodings are rejected.
+ {MBSTRING_UTF8, {0xff}, B_ASN1_UTF8STRING},
+ {MBSTRING_BMP, {0xff}, B_ASN1_UTF8STRING},
+ {MBSTRING_UNIV, {0xff}, B_ASN1_UTF8STRING},
+
+ // Lone surrogates are not code points.
+ {MBSTRING_UTF8, {0xed, 0xa0, 0x80}, B_ASN1_UTF8STRING},
+ {MBSTRING_BMP, {0xd8, 0x00}, B_ASN1_UTF8STRING},
+ {MBSTRING_UNIV, {0x00, 0x00, 0xd8, 0x00}, B_ASN1_UTF8STRING},
+
+ // The input does not fit in the allowed output types.
+ {MBSTRING_UTF8, {'\n'}, B_ASN1_PRINTABLESTRING},
+ {MBSTRING_UTF8,
+ {0xc2, 0x80 /* U+0080 */},
+ B_ASN1_PRINTABLESTRING | B_ASN1_IA5STRING},
+ {MBSTRING_UTF8,
+ {0xc4, 0x80 /* U+0100 */},
+ B_ASN1_PRINTABLESTRING | B_ASN1_IA5STRING | B_ASN1_T61STRING},
+ {MBSTRING_UTF8,
+ {0xf0, 0x90, 0x80, 0x80 /* U+10000 */},
+ B_ASN1_PRINTABLESTRING | B_ASN1_IA5STRING | B_ASN1_T61STRING |
+ B_ASN1_BMPSTRING},
+
+ // Unrecognized bits are ignored.
+ {MBSTRING_UTF8, {'\n'}, B_ASN1_PRINTABLESTRING | B_ASN1_SEQUENCE},
+ };
+ for (const auto &t : kInvalidTests) {
+ SCOPED_TRACE(t.format);
+ SCOPED_TRACE(Bytes(t.in));
+ SCOPED_TRACE(t.mask);
+
+ EXPECT_EQ(-1, ASN1_mbstring_copy(nullptr, t.in.data(), t.in.size(),
+ t.format, t.mask));
+ ERR_clear_error();
+
+ ASN1_STRING *str = nullptr;
+ EXPECT_EQ(-1, ASN1_mbstring_copy(&str, t.in.data(), t.in.size(),
+ t.format, t.mask));
+ ERR_clear_error();
+ EXPECT_EQ(nullptr, str);
+ }
+}
+
// The ASN.1 macros do not work on Windows shared library builds, where usage of
// |OPENSSL_EXPORT| is a bit stricter.
#if !defined(OPENSSL_WINDOWS) || !defined(BORINGSSL_SHARED_LIBRARY)
diff --git a/include/openssl/asn1.h b/include/openssl/asn1.h
index 4cf71ad..1d30300 100644
--- a/include/openssl/asn1.h
+++ b/include/openssl/asn1.h
@@ -337,6 +337,14 @@
#define MBSTRING_BMP (MBSTRING_FLAG | 2)
#define MBSTRING_UNIV (MBSTRING_FLAG | 4)
+// DIRSTRING_TYPE contains the valid string types in an X.509 DirectoryString.
+#define DIRSTRING_TYPE \
+ (B_ASN1_PRINTABLESTRING | B_ASN1_T61STRING | B_ASN1_BMPSTRING | \
+ B_ASN1_UTF8STRING)
+
+// PKCS9STRING_TYPE contains the valid string types in a PKCS9String.
+#define PKCS9STRING_TYPE (DIRSTRING_TYPE | B_ASN1_IA5STRING)
+
// ASN1_mbstring_copy converts |len| bytes from |in| to an ASN.1 string. If
// |len| is -1, |in| must be NUL-terminated and the length is determined by
// |strlen|. |in| is decoded according to |inform|, which must be one of
@@ -347,23 +355,21 @@
// first output type in |mask| which can represent |in|. It interprets T61String
// as Latin-1, rather than T.61.
//
+// If |mask| is zero, |DIRSTRING_TYPE| is used by default.
+//
// On success, this function returns the |V_ASN1_*| constant corresponding to
// the selected output type and, if |out| and |*out| are both non-NULL, updates
// the object at |*out| with the result. If |out| is non-NULL and |*out| is
// NULL, it instead sets |*out| to a newly-allocated |ASN1_STRING| containing
// the result. If |out| is NULL, it returns the selected output type without
// constructing an |ASN1_STRING|. On error, this function returns -1.
-//
-// TODO(davidben): If no format in |mask| can represent |in|, the function
-// currently falls back to |B_ASN1_UTF8STRING|. This is probably a bug and will
-// cause functions like |ASN1_STRING_set_by_NID| to output the wrong type.
-// Return an error instead.
OPENSSL_EXPORT int ASN1_mbstring_copy(ASN1_STRING **out, const uint8_t *in,
int len, int inform, unsigned long mask);
// ASN1_mbstring_ncopy behaves like |ASN1_mbstring_copy| but returns an error if
-// the output would be less than |minsize| or greater than |maxsize|. A
-// |maxsize| value of zero is ignored.
+// the input is less than |minsize| or greater than |maxsize| codepoints long. A
+// |maxsize| value of zero is ignored. Note the sizes are measured in
+// codepoints, not output bytes.
OPENSSL_EXPORT int ASN1_mbstring_ncopy(ASN1_STRING **out, const uint8_t *in,
int len, int inform, unsigned long mask,
long minsize, long maxsize);
@@ -863,10 +869,6 @@
#define STABLE_FLAGS_MALLOC 0x01
#define STABLE_NO_MASK 0x02
-#define DIRSTRING_TYPE \
- (B_ASN1_PRINTABLESTRING | B_ASN1_T61STRING | B_ASN1_BMPSTRING | \
- B_ASN1_UTF8STRING)
-#define PKCS9STRING_TYPE (DIRSTRING_TYPE | B_ASN1_IA5STRING)
typedef struct asn1_string_table_st {
int nid;