Tidy up how ASN1_STRING_print_ex figures out the type.
Between the lookup table, the multiple layers of reuse of the "type"
variable, it is a little hard to follow what's going on with
ASN1_STRING_print_ex. Replace the lookup table with a switch-case
(implicitly handles the bounds check, and we can let the compiler figure
out the best spelling). Then, rather than returning a "character width",
which doen't represent UTF-8, just use the already-defined MBSTRING_*
constants.
(These changes should be covered by the existing ASN1Test.StringPrintEx
test.)
Change-Id: Ie3b2557bfae0f65db969e90cd0c76bc8ade963d4
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/52365
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: Bob Beck <bbe@google.com>
diff --git a/crypto/asn1/a_strex.c b/crypto/asn1/a_strex.c
index 3732894..56aa033 100644
--- a/crypto/asn1/a_strex.c
+++ b/crypto/asn1/a_strex.c
@@ -56,6 +56,7 @@
#include <openssl/asn1.h>
+#include <assert.h>
#include <ctype.h>
#include <inttypes.h>
#include <string.h>
@@ -155,100 +156,98 @@
return 1;
}
-#define BUF_TYPE_WIDTH_MASK 0x7
-#define BUF_TYPE_CONVUTF8 0x8
-
/*
* This function sends each character in a buffer to do_esc_char(). It
* interprets the content formats and converts to or from UTF8 as
* appropriate.
*/
-static int do_buf(unsigned char *buf, int buflen,
- int type, unsigned char flags, char *quotes, BIO *out)
+static int do_buf(const unsigned char *buf, int buflen, int encoding,
+ int utf8_convert, unsigned char flags, char *quotes, BIO *out)
{
- int i, outlen, len, charwidth;
- unsigned char orflags, *p, *q;
- uint32_t c;
- p = buf;
- q = buf + buflen;
- outlen = 0;
- charwidth = type & BUF_TYPE_WIDTH_MASK;
-
- switch (charwidth) {
- case 4:
+ /* Reject invalid UCS-4 and UCS-2 lengths without parsing. */
+ switch (encoding) {
+ case MBSTRING_UNIV:
if (buflen & 3) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_UNIVERSALSTRING);
return -1;
}
break;
- case 2:
+ case MBSTRING_BMP:
if (buflen & 1) {
OPENSSL_PUT_ERROR(ASN1, ASN1_R_INVALID_BMPSTRING);
return -1;
}
break;
- default:
- break;
}
+ const unsigned char *p = buf;
+ const unsigned char *q = buf + buflen;
+ int outlen = 0;
while (p != q) {
- if (p == buf && flags & ASN1_STRFLGS_ESC_2253)
+ unsigned char orflags = 0;
+ if (p == buf && flags & ASN1_STRFLGS_ESC_2253) {
orflags = CHARTYPE_FIRST_ESC_2253;
- else
- orflags = 0;
+ }
/* TODO(davidben): Replace this with |cbs_get_ucs2_be|, etc., to check
- * for invalid codepoints. */
- switch (charwidth) {
- case 4:
+ * for invalid codepoints. Before doing that, enforce it in the parser,
+ * https://crbug.com/boringssl/427, so these error cases are not
+ * reachable from parsed objects. */
+ uint32_t c;
+ switch (encoding) {
+ case MBSTRING_UNIV:
c = ((uint32_t)*p++) << 24;
c |= ((uint32_t)*p++) << 16;
c |= ((uint32_t)*p++) << 8;
c |= *p++;
break;
- case 2:
+ case MBSTRING_BMP:
c = ((uint32_t)*p++) << 8;
c |= *p++;
break;
- case 1:
+ case MBSTRING_ASC:
c = *p++;
break;
- case 0:
- i = UTF8_getc(p, buflen, &c);
- if (i < 0)
+ case MBSTRING_UTF8: {
+ int consumed = UTF8_getc(p, buflen, &c);
+ if (consumed < 0)
return -1; /* Invalid UTF8String */
- buflen -= i;
- p += i;
+ buflen -= consumed;
+ p += consumed;
break;
+ }
+
default:
- return -1; /* invalid width */
+ assert(0);
+ return -1;
}
if (p == q && flags & ASN1_STRFLGS_ESC_2253)
orflags = CHARTYPE_LAST_ESC_2253;
- if (type & BUF_TYPE_CONVUTF8) {
+ if (utf8_convert) {
unsigned char utfbuf[6];
int utflen;
utflen = UTF8_putc(utfbuf, sizeof utfbuf, c);
- for (i = 0; i < utflen; i++) {
+ for (int i = 0; i < utflen; i++) {
/*
* We don't need to worry about setting orflags correctly
* because if utflen==1 its value will be correct anyway
* otherwise each character will be > 0x7f and so the
* character will never be escaped on first and last.
*/
- len = do_esc_char(utfbuf[i], (unsigned char)(flags | orflags),
- quotes, out);
- if (len < 0)
+ int len = do_esc_char(utfbuf[i], flags | orflags, quotes, out);
+ if (len < 0) {
return -1;
+ }
outlen += len;
}
} else {
- len = do_esc_char(c, (unsigned char)(flags | orflags), quotes, out);
- if (len < 0)
+ int len = do_esc_char(c, flags | orflags, quotes, out);
+ if (len < 0) {
return -1;
+ }
outlen += len;
}
}
@@ -331,22 +330,31 @@
return outlen + 1;
}
-/*
- * Lookup table to convert tags to character widths, 0 = UTF8 encoded, -1 is
- * used for non string types otherwise it is the number of bytes per
- * character
- */
-
-static const signed char tag2nbyte[] = {
- -1, -1, -1, -1, -1, /* 0-4 */
- -1, -1, -1, -1, -1, /* 5-9 */
- -1, -1, 0, -1, /* 10-13 */
- -1, -1, -1, -1, /* 15-17 */
- 1, 1, 1, /* 18-20 */
- -1, 1, 1, 1, /* 21-24 */
- -1, 1, -1, /* 25-27 */
- 4, -1, 2 /* 28-30 */
-};
+/* string_type_to_encoding returns the |MBSTRING_*| constant for the encoding
+ * used by the |ASN1_STRING| type |type|, or -1 if |tag| is not a string
+ * type. */
+static int string_type_to_encoding(int type) {
+ /* This function is sometimes passed ASN.1 universal types and sometimes
+ * passed |ASN1_STRING| type values */
+ switch (type) {
+ case V_ASN1_UTF8STRING:
+ return MBSTRING_UTF8;
+ case V_ASN1_NUMERICSTRING:
+ case V_ASN1_PRINTABLESTRING:
+ case V_ASN1_T61STRING:
+ case V_ASN1_IA5STRING:
+ case V_ASN1_UTCTIME:
+ case V_ASN1_GENERALIZEDTIME:
+ case V_ASN1_ISO64STRING:
+ /* |MBSTRING_ASC| refers to Latin-1, not ASCII. */
+ return MBSTRING_ASC;
+ case V_ASN1_UNIVERSALSTRING:
+ return MBSTRING_UNIV;
+ case V_ASN1_BMPSTRING:
+ return MBSTRING_BMP;
+ }
+ return -1;
+}
/*
* This is the main function, print out an ASN1_STRING taking note of various
@@ -356,79 +364,77 @@
int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)
{
- int outlen, len;
- int type;
- char quotes;
- unsigned char flags;
- quotes = 0;
/* Keep a copy of escape flags */
- flags = (unsigned char)(lflags & ESC_FLAGS);
-
- type = str->type;
-
- outlen = 0;
-
+ unsigned char flags = (unsigned char)(lflags & ESC_FLAGS);
+ int type = str->type;
+ int outlen = 0;
if (lflags & ASN1_STRFLGS_SHOW_TYPE) {
- const char *tagname;
- tagname = ASN1_tag2str(type);
+ const char *tagname = ASN1_tag2str(type);
outlen += strlen(tagname);
if (!maybe_write(out, tagname, outlen) || !maybe_write(out, ":", 1))
return -1;
outlen++;
}
- /* Decide what to do with type, either dump content or display it */
-
- /* Dump everything */
- if (lflags & ASN1_STRFLGS_DUMP_ALL)
- type = -1;
- /* Ignore the string type */
- else if (lflags & ASN1_STRFLGS_IGNORE_TYPE)
- type = 1;
- else {
- /* Else determine width based on type */
- if ((type > 0) && (type < 31))
- type = tag2nbyte[type];
- else
- type = -1;
- if ((type == -1) && !(lflags & ASN1_STRFLGS_DUMP_UNKNOWN))
- type = 1;
+ /* Decide what to do with |str|, either dump the contents or display it. */
+ int encoding;
+ if (lflags & ASN1_STRFLGS_DUMP_ALL) {
+ /* Dump everything. */
+ encoding = -1;
+ } else if (lflags & ASN1_STRFLGS_IGNORE_TYPE) {
+ /* Ignore the string type and interpret the contents as Latin-1. */
+ encoding = MBSTRING_ASC;
+ } else {
+ encoding = string_type_to_encoding(type);
+ if (encoding == -1 && (lflags & ASN1_STRFLGS_DUMP_UNKNOWN) == 0) {
+ encoding = MBSTRING_ASC;
+ }
}
- if (type == -1) {
- len = do_dump(lflags, out, str);
+ if (encoding == -1) {
+ int len = do_dump(lflags, out, str);
if (len < 0)
return -1;
outlen += len;
return outlen;
}
+ int utf8_convert = 0;
if (lflags & ASN1_STRFLGS_UTF8_CONVERT) {
- /*
- * Note: if string is UTF8 and we want to convert to UTF8 then we
- * just interpret it as 1 byte per character to avoid converting
- * twice.
- */
- if (!type)
- type = 1;
- else
- type |= BUF_TYPE_CONVUTF8;
+ /* If the string is UTF-8, skip decoding and just interpret it as 1 byte
+ * per character to avoid converting twice.
+ *
+ * TODO(davidben): This is not quite a valid optimization if the input
+ * was invalid UTF-8. */
+ if (encoding == MBSTRING_UTF8) {
+ encoding = MBSTRING_ASC;
+ } else {
+ utf8_convert = 1;
+ }
}
- len = do_buf(str->data, str->length, type, flags, "es, NULL);
- if (len < 0)
+ /* Measure the length. */
+ char quotes = 0;
+ int len = do_buf(str->data, str->length, encoding, utf8_convert, flags,
+ "es, NULL);
+ if (len < 0) {
return -1;
+ }
outlen += len;
- if (quotes)
+ if (quotes) {
outlen += 2;
- if (!out)
+ }
+ if (!out) {
return outlen;
- if (quotes && !maybe_write(out, "\"", 1))
+ }
+
+ /* Encode the value. */
+ if ((quotes && !maybe_write(out, "\"", 1)) ||
+ do_buf(str->data, str->length, encoding, utf8_convert, flags, NULL,
+ out) < 0 ||
+ (quotes && !maybe_write(out, "\"", 1))) {
return -1;
- if (do_buf(str->data, str->length, type, flags, NULL, out) < 0)
- return -1;
- if (quotes && !maybe_write(out, "\"", 1))
- return -1;
+ }
return outlen;
}
@@ -451,22 +457,19 @@
int ASN1_STRING_to_UTF8(unsigned char **out, const ASN1_STRING *in)
{
- ASN1_STRING stmp, *str = &stmp;
- int mbflag, type, ret;
if (!in)
return -1;
- type = in->type;
- if ((type < 0) || (type > 30))
+ int mbflag = string_type_to_encoding(in->type);
+ if (mbflag == -1) {
+ OPENSSL_PUT_ERROR(ASN1, ASN1_R_UNKNOWN_TAG);
return -1;
- mbflag = tag2nbyte[type];
- if (mbflag == -1)
- return -1;
- mbflag |= MBSTRING_FLAG;
+ }
+ ASN1_STRING stmp, *str = &stmp;
stmp.data = NULL;
stmp.length = 0;
stmp.flags = 0;
- ret = ASN1_mbstring_copy(&str, in->data, in->length, mbflag,
- B_ASN1_UTF8STRING);
+ int ret = ASN1_mbstring_copy(&str, in->data, in->length, mbflag,
+ B_ASN1_UTF8STRING);
if (ret < 0)
return ret;
*out = stmp.data;