Rewrite ASN1_STRING_print_ex escaping.
The original implementation uses a table generated by a Perl script,
and then relies on some subset of ASN1_STRFLGS_* constants overlapping
with CHARTYPE_* constants, while masking off the ones that don't align.
Allocating ASN1_STRFLGS_* constants is already complex with the
XN_FLAG_* interaction. Avoid the additional CHARTYPE_* interaction by
just writing out what it's recognizing in code. If you ignore
CHARTYPE_PRINTABLESTRING (which is unused), that table is just
recognizing 9 characters anyway.
Also this gets charmap.h out of the way so I can clang-format every file
in here without having to constantly exclude it.
Change-Id: I73f31324e4b8a815887afba459e50ed091a9f999
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/52729
Reviewed-by: Bob Beck <bbe@google.com>
Commit-Queue: Bob Beck <bbe@google.com>
diff --git a/crypto/asn1/a_strex.c b/crypto/asn1/a_strex.c
index 56aa033..3ca91b7 100644
--- a/crypto/asn1/a_strex.c
+++ b/crypto/asn1/a_strex.c
@@ -64,25 +64,12 @@
#include <openssl/bio.h>
#include <openssl/mem.h>
-#include "charmap.h"
#include "internal.h"
-// These flags must be distinct from |ESC_FLAGS| and fit in a byte.
-
-// Character is a valid PrintableString character
-#define CHARTYPE_PRINTABLESTRING 0x10
-// Character needs escaping if it is the first character
-#define CHARTYPE_FIRST_ESC_2253 0x20
-// Character needs escaping if it is the last character
-#define CHARTYPE_LAST_ESC_2253 0x40
-
-#define CHARTYPE_BS_ESC (ASN1_STRFLGS_ESC_2253 | CHARTYPE_FIRST_ESC_2253 | CHARTYPE_LAST_ESC_2253)
-
-#define ESC_FLAGS (ASN1_STRFLGS_ESC_2253 | \
- ASN1_STRFLGS_ESC_QUOTE | \
- ASN1_STRFLGS_ESC_CTRL | \
- ASN1_STRFLGS_ESC_MSB)
+#define ESC_FLAGS \
+ (ASN1_STRFLGS_ESC_2253 | ASN1_STRFLGS_ESC_QUOTE | ASN1_STRFLGS_ESC_CTRL | \
+ ASN1_STRFLGS_ESC_MSB)
static int maybe_write(BIO *out, const void *buf, int len)
{
@@ -90,70 +77,54 @@
return out == NULL || BIO_write(out, buf, len) == len;
}
-/*
- * This function handles display of strings, one character at a time. It is
- * passed an unsigned long for each character because it could come from 2 or
- * even 4 byte forms.
- */
-
-#define HEX_SIZE(type) (sizeof(type)*2)
-
-static int do_esc_char(uint32_t c, unsigned char flags, char *do_quotes,
- BIO *out)
+static int is_control_character(unsigned char c)
{
- unsigned char chflgs, chtmp;
- char tmphex[HEX_SIZE(uint32_t) + 3];
+ return c < 32 || c == 127;
+}
+static int do_esc_char(uint32_t c, unsigned long flags, char *do_quotes,
+ BIO *out, int is_first, int is_last)
+{
+ /* |c| is a |uint32_t| because, depending on |ASN1_STRFLGS_UTF8_CONVERT|,
+ * we may be escaping bytes or Unicode codepoints. */
+ char buf[16]; /* Large enough for "\\W01234567". */
+ unsigned char u8 = (unsigned char)c;
if (c > 0xffff) {
- BIO_snprintf(tmphex, sizeof tmphex, "\\W%08" PRIX32, c);
- if (!maybe_write(out, tmphex, 10))
- return -1;
- return 10;
- }
- if (c > 0xff) {
- BIO_snprintf(tmphex, sizeof tmphex, "\\U%04" PRIX32, c);
- if (!maybe_write(out, tmphex, 6))
- return -1;
- return 6;
- }
- chtmp = (unsigned char)c;
- if (chtmp > 0x7f)
- chflgs = flags & ASN1_STRFLGS_ESC_MSB;
- else
- chflgs = char_type[chtmp] & flags;
- if (chflgs & CHARTYPE_BS_ESC) {
- /* If we don't escape with quotes, signal we need quotes */
- if (chflgs & ASN1_STRFLGS_ESC_QUOTE) {
- if (do_quotes)
- *do_quotes = 1;
- if (!maybe_write(out, &chtmp, 1))
- return -1;
- return 1;
+ BIO_snprintf(buf, sizeof(buf), "\\W%08" PRIX32, c);
+ } else if (c > 0xff) {
+ BIO_snprintf(buf, sizeof(buf), "\\U%04" PRIX32, c);
+ } else if ((flags & ASN1_STRFLGS_ESC_MSB) && c > 0x7f) {
+ BIO_snprintf(buf, sizeof(buf), "\\%02X", c);
+ } else if ((flags & ASN1_STRFLGS_ESC_CTRL) && is_control_character(c)) {
+ BIO_snprintf(buf, sizeof(buf), "\\%02X", c);
+ } else if (flags & ASN1_STRFLGS_ESC_2253) {
+ /* See RFC 2253, sections 2.4 and 4. */
+ if (c == '\\' || c == '"') {
+ /* Quotes and backslashes are always escaped, quoted or not. */
+ BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
+ } else if (c == ',' || c == '+' || c == '<' || c == '>' || c == ';' ||
+ (is_first && (c == ' ' || c == '#')) ||
+ (is_last && (c == ' '))) {
+ if (flags & ASN1_STRFLGS_ESC_QUOTE) {
+ /* No need to escape, just tell the caller to quote. */
+ if (do_quotes != NULL) {
+ *do_quotes = 1;
+ }
+ return maybe_write(out, &u8, 1) ? 1 : -1;
+ }
+ BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
+ } else {
+ return maybe_write(out, &u8, 1) ? 1 : -1;
}
- if (!maybe_write(out, "\\", 1))
- return -1;
- if (!maybe_write(out, &chtmp, 1))
- return -1;
- return 2;
+ } else if ((flags & ESC_FLAGS) && c == '\\') {
+ /* If any escape flags are set, also escape backslashes. */
+ BIO_snprintf(buf, sizeof(buf), "\\%c", (int)c);
+ } else {
+ return maybe_write(out, &u8, 1) ? 1 : -1;
}
- if (chflgs & (ASN1_STRFLGS_ESC_CTRL | ASN1_STRFLGS_ESC_MSB)) {
- BIO_snprintf(tmphex, 11, "\\%02X", chtmp);
- if (!maybe_write(out, tmphex, 3))
- return -1;
- return 3;
- }
- /*
- * If we get this far and do any escaping at all must escape the escape
- * character itself: backslash.
- */
- if (chtmp == '\\' && flags & ESC_FLAGS) {
- if (!maybe_write(out, "\\\\", 2))
- return -1;
- return 2;
- }
- if (!maybe_write(out, &chtmp, 1))
- return -1;
- return 1;
+
+ int len = strlen(buf);
+ return maybe_write(out, buf, len) ? len : -1;
}
/*
@@ -163,7 +134,7 @@
*/
static int do_buf(const unsigned char *buf, int buflen, int encoding,
- int utf8_convert, unsigned char flags, char *quotes, BIO *out)
+ int utf8_convert, unsigned long flags, char *quotes, BIO *out)
{
/* Reject invalid UCS-4 and UCS-2 lengths without parsing. */
switch (encoding) {
@@ -185,10 +156,7 @@
const unsigned char *q = buf + buflen;
int outlen = 0;
while (p != q) {
- unsigned char orflags = 0;
- if (p == buf && flags & ASN1_STRFLGS_ESC_2253) {
- orflags = CHARTYPE_FIRST_ESC_2253;
- }
+ const int is_first = p == buf;
/* TODO(davidben): Replace this with |cbs_get_ucs2_be|, etc., to check
* for invalid codepoints. Before doing that, enforce it in the parser,
* https://crbug.com/boringssl/427, so these error cases are not
@@ -224,8 +192,7 @@
assert(0);
return -1;
}
- if (p == q && flags & ASN1_STRFLGS_ESC_2253)
- orflags = CHARTYPE_LAST_ESC_2253;
+ const int is_last = p == q;
if (utf8_convert) {
unsigned char utfbuf[6];
int utflen;
@@ -237,14 +204,15 @@
* otherwise each character will be > 0x7f and so the
* character will never be escaped on first and last.
*/
- int len = do_esc_char(utfbuf[i], flags | orflags, quotes, out);
+ int len = do_esc_char(utfbuf[i], flags, quotes, out, is_first,
+ is_last);
if (len < 0) {
return -1;
}
outlen += len;
}
} else {
- int len = do_esc_char(c, flags | orflags, quotes, out);
+ int len = do_esc_char(c, flags, quotes, out, is_first, is_last);
if (len < 0) {
return -1;
}
@@ -281,14 +249,14 @@
* encoding. This uses the RFC 2253 #01234 format.
*/
-static int do_dump(unsigned long lflags, BIO *out, const ASN1_STRING *str)
+static int do_dump(unsigned long flags, BIO *out, const ASN1_STRING *str)
{
if (!maybe_write(out, "#", 1)) {
return -1;
}
/* If we don't dump DER encoding just dump content octets */
- if (!(lflags & ASN1_STRFLGS_DUMP_DER)) {
+ if (!(flags & ASN1_STRFLGS_DUMP_DER)) {
int outlen = do_hex_dump(out, str->data, str->length);
if (outlen < 0) {
return -1;
@@ -362,13 +330,11 @@
* an error occurred.
*/
-int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long lflags)
+int ASN1_STRING_print_ex(BIO *out, const ASN1_STRING *str, unsigned long flags)
{
- /* Keep a copy of escape flags */
- unsigned char flags = (unsigned char)(lflags & ESC_FLAGS);
int type = str->type;
int outlen = 0;
- if (lflags & ASN1_STRFLGS_SHOW_TYPE) {
+ if (flags & ASN1_STRFLGS_SHOW_TYPE) {
const char *tagname = ASN1_tag2str(type);
outlen += strlen(tagname);
if (!maybe_write(out, tagname, outlen) || !maybe_write(out, ":", 1))
@@ -378,21 +344,21 @@
/* Decide what to do with |str|, either dump the contents or display it. */
int encoding;
- if (lflags & ASN1_STRFLGS_DUMP_ALL) {
+ if (flags & ASN1_STRFLGS_DUMP_ALL) {
/* Dump everything. */
encoding = -1;
- } else if (lflags & ASN1_STRFLGS_IGNORE_TYPE) {
+ } else if (flags & ASN1_STRFLGS_IGNORE_TYPE) {
/* Ignore the string type and interpret the contents as Latin-1. */
encoding = MBSTRING_ASC;
} else {
encoding = string_type_to_encoding(type);
- if (encoding == -1 && (lflags & ASN1_STRFLGS_DUMP_UNKNOWN) == 0) {
+ if (encoding == -1 && (flags & ASN1_STRFLGS_DUMP_UNKNOWN) == 0) {
encoding = MBSTRING_ASC;
}
}
if (encoding == -1) {
- int len = do_dump(lflags, out, str);
+ int len = do_dump(flags, out, str);
if (len < 0)
return -1;
outlen += len;
@@ -400,7 +366,7 @@
}
int utf8_convert = 0;
- if (lflags & ASN1_STRFLGS_UTF8_CONVERT) {
+ if (flags & ASN1_STRFLGS_UTF8_CONVERT) {
/* If the string is UTF-8, skip decoding and just interpret it as 1 byte
* per character to avoid converting twice.
*
diff --git a/crypto/asn1/charmap.h b/crypto/asn1/charmap.h
deleted file mode 100644
index 3305ad1..0000000
--- a/crypto/asn1/charmap.h
+++ /dev/null
@@ -1,15 +0,0 @@
-/*
- * Auto generated with chartype.pl script. Mask of various character
- * properties
- */
-
-static const unsigned char char_type[] = {
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
- 120, 0, 1, 40, 0, 0, 0, 16, 16, 16, 0, 25, 25, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 9, 9, 16, 9, 16,
- 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 1, 0, 0, 0,
- 0, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16,
- 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 16, 0, 0, 0, 0, 2
-};
diff --git a/crypto/asn1/charmap.pl b/crypto/asn1/charmap.pl
deleted file mode 100644
index 117ed32..0000000
--- a/crypto/asn1/charmap.pl
+++ /dev/null
@@ -1,135 +0,0 @@
-#!/usr/local/bin/perl -w
-
-# Written by Dr Stephen N Henson (steve@openssl.org) for the OpenSSL project
-# 2000.
-#
-# ====================================================================
-# Copyright (c) 2000 The OpenSSL Project. All rights reserved.
-#
-# Redistribution and use in source and binary forms, with or without
-# modification, are permitted provided that the following conditions
-# are met:
-#
-# 1. Redistributions of source code must retain the above copyright
-# notice, this list of conditions and the following disclaimer.
-#
-# 2. Redistributions in binary form must reproduce the above copyright
-# notice, this list of conditions and the following disclaimer in
-# the documentation and/or other materials provided with the
-# distribution.
-#
-# 3. All advertising materials mentioning features or use of this
-# software must display the following acknowledgment:
-# "This product includes software developed by the OpenSSL Project
-# for use in the OpenSSL Toolkit. (http://www.OpenSSL.org/)"
-#
-# 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
-# endorse or promote products derived from this software without
-# prior written permission. For written permission, please contact
-# licensing@OpenSSL.org.
-#
-# 5. Products derived from this software may not be called "OpenSSL"
-# nor may "OpenSSL" appear in their names without prior written
-# permission of the OpenSSL Project.
-#
-# 6. Redistributions of any form whatsoever must retain the following
-# acknowledgment:
-# "This product includes software developed by the OpenSSL Project
-# for use in the OpenSSL Toolkit (http://www.OpenSSL.org/)"
-#
-# THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
-# EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
-# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
-# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR
-# ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
-# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
-# NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
-# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
-# HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
-# STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
-# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
-# OF THE POSSIBILITY OF SUCH DAMAGE.
-# ====================================================================
-#
-# This product includes cryptographic software written by Eric Young
-# (eay@cryptsoft.com). This product includes software written by Tim
-# Hudson (tjh@cryptsoft.com).
-
-use strict;
-
-my ($i, @arr);
-
-# Set up an array with the type of ASCII characters
-# Each set bit represents a character property.
-
-# RFC 2253 character properties
-my $RFC2253_ESC = 1; # Character escaped with \
-my $ESC_CTRL = 2; # Escaped control character
-# These are used with RFC 1779 quoting using "
-my $NOESC_QUOTE = 8; # Not escaped if quoted
-my $PSTRING_CHAR = 0x10; # Valid PrintableString character
-my $RFC2253_FIRST_ESC = 0x20; # Escaped with \ if first character
-my $RFC2253_LAST_ESC = 0x40; # Escaped with \ if last character
-
-for($i = 0; $i < 128; $i++) {
- # Set the RFC 2253 escape characters (control)
- $arr[$i] = 0;
- if(($i < 32) || ($i > 126)) {
- $arr[$i] |= $ESC_CTRL;
- }
-
- # Some PrintableString characters
- if( ( ( $i >= ord("a")) && ( $i <= ord("z")) )
- || ( ( $i >= ord("A")) && ( $i <= ord("Z")) )
- || ( ( $i >= ord("0")) && ( $i <= ord("9")) ) ) {
- $arr[$i] |= $PSTRING_CHAR;
- }
-}
-
-# Now setup the rest
-
-# Remaining RFC 2253 escaped characters
-
-$arr[ord(" ")] |= $NOESC_QUOTE | $RFC2253_FIRST_ESC | $RFC2253_LAST_ESC;
-$arr[ord("#")] |= $NOESC_QUOTE | $RFC2253_FIRST_ESC;
-
-$arr[ord(",")] |= $NOESC_QUOTE | $RFC2253_ESC;
-$arr[ord("+")] |= $NOESC_QUOTE | $RFC2253_ESC;
-$arr[ord("\"")] |= $RFC2253_ESC;
-$arr[ord("\\")] |= $RFC2253_ESC;
-$arr[ord("<")] |= $NOESC_QUOTE | $RFC2253_ESC;
-$arr[ord(">")] |= $NOESC_QUOTE | $RFC2253_ESC;
-$arr[ord(";")] |= $NOESC_QUOTE | $RFC2253_ESC;
-
-# Remaining PrintableString characters
-
-$arr[ord(" ")] |= $PSTRING_CHAR;
-$arr[ord("'")] |= $PSTRING_CHAR;
-$arr[ord("(")] |= $PSTRING_CHAR;
-$arr[ord(")")] |= $PSTRING_CHAR;
-$arr[ord("+")] |= $PSTRING_CHAR;
-$arr[ord(",")] |= $PSTRING_CHAR;
-$arr[ord("-")] |= $PSTRING_CHAR;
-$arr[ord(".")] |= $PSTRING_CHAR;
-$arr[ord("/")] |= $PSTRING_CHAR;
-$arr[ord(":")] |= $PSTRING_CHAR;
-$arr[ord("=")] |= $PSTRING_CHAR;
-$arr[ord("?")] |= $PSTRING_CHAR;
-
-# Now generate the C code
-
-print <<EOF;
-/* Auto generated with chartype.pl script.
- * Mask of various character properties
- */
-
-static const unsigned char char_type[] = {
-EOF
-
-for($i = 0; $i < 128; $i++) {
- print("\n") if($i && (($i % 16) == 0));
- printf("%2d", $arr[$i]);
- print(",") if ($i != 127);
-}
-print("\n};\n\n");
-