Document ASN1_STRING.

Almost everything in <openssl/asn1.h> uses ASN1_STRING, and there are a
lot of unspoken assumptions in the library about the type field, so it
needs quite a bit of text.

Change-Id: Ied56c9428069477da8ecb17a174da4320e573fa1
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/44184
Commit-Queue: David Benjamin <davidben@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/include/openssl/asn1.h b/include/openssl/asn1.h
index 3c37096..70a23c5 100644
--- a/include/openssl/asn1.h
+++ b/include/openssl/asn1.h
@@ -79,28 +79,52 @@
 // the new |CBS| and |CBB| library in <openssl/bytestring.h> instead.
 
 
+// Tag constants.
+//
+// These constants are used in various APIs to specify ASN.1 types and tag
+// components. See the specific API's documentation for details on which values
+// are used and how.
+
+// The following constants are tag classes.
 #define V_ASN1_UNIVERSAL 0x00
 #define V_ASN1_APPLICATION 0x40
 #define V_ASN1_CONTEXT_SPECIFIC 0x80
 #define V_ASN1_PRIVATE 0xc0
 
+// V_ASN1_CONSTRUCTED indicates an element is constructed, rather than
+// primitive.
 #define V_ASN1_CONSTRUCTED 0x20
+
+// V_ASN1_PRIMITIVE_TAG is the highest tag number which can be encoded in a
+// single byte. Note this is unrelated to whether an element is constructed or
+// primitive.
+//
+// TODO(davidben): Make this private.
 #define V_ASN1_PRIMITIVE_TAG 0x1f
 
-#define V_ASN1_APP_CHOOSE -2  // let the recipient choose
-#define V_ASN1_OTHER -3       // used in ASN1_TYPE
-#define V_ASN1_ANY -4         // used in ASN1 template code
-
-#define V_ASN1_NEG 0x100  // negative flag
-// No supported universal tags may exceed this value, to avoid ambiguity with
-// V_ASN1_NEG.
+// V_ASN1_MAX_UNIVERSAL is the highest supported universal tag number. It is
+// necessary to avoid ambiguity with |V_ASN1_NEG|.
+//
+// TODO(davidben): Make this private.
 #define V_ASN1_MAX_UNIVERSAL 0xff
 
-#define V_ASN1_UNDEF -1
+// V_ASN1_UNDEF is used in some APIs to indicate an ASN.1 element is omitted.
+#define V_ASN1_UNDEF (-1)
+
+// V_ASN1_APP_CHOOSE is used in some APIs to specify a default ASN.1 type based
+// on the context.
+#define V_ASN1_APP_CHOOSE (-2)
+
+// V_ASN1_OTHER is used in |ASN1_TYPE| to indicate a non-universal ASN.1 type.
+#define V_ASN1_OTHER (-3)
+
+// V_ASN1_ANY is used by the ASN.1 templates to indicate an ANY type.
+#define V_ASN1_ANY (-4)
+
+// The following constants are tag numbers for universal types.
 #define V_ASN1_EOC 0
-#define V_ASN1_BOOLEAN 1  //
+#define V_ASN1_BOOLEAN 1
 #define V_ASN1_INTEGER 2
-#define V_ASN1_NEG_INTEGER (2 | V_ASN1_NEG)
 #define V_ASN1_BIT_STRING 3
 #define V_ASN1_OCTET_STRING 4
 #define V_ASN1_NULL 5
@@ -109,25 +133,190 @@
 #define V_ASN1_EXTERNAL 8
 #define V_ASN1_REAL 9
 #define V_ASN1_ENUMERATED 10
-#define V_ASN1_NEG_ENUMERATED (10 | V_ASN1_NEG)
 #define V_ASN1_UTF8STRING 12
 #define V_ASN1_SEQUENCE 16
 #define V_ASN1_SET 17
-#define V_ASN1_NUMERICSTRING 18  //
+#define V_ASN1_NUMERICSTRING 18
 #define V_ASN1_PRINTABLESTRING 19
 #define V_ASN1_T61STRING 20
-#define V_ASN1_TELETEXSTRING 20   // alias
-#define V_ASN1_VIDEOTEXSTRING 21  //
+#define V_ASN1_TELETEXSTRING 20
+#define V_ASN1_VIDEOTEXSTRING 21
 #define V_ASN1_IA5STRING 22
 #define V_ASN1_UTCTIME 23
-#define V_ASN1_GENERALIZEDTIME 24  //
-#define V_ASN1_GRAPHICSTRING 25    //
-#define V_ASN1_ISO64STRING 26      //
-#define V_ASN1_VISIBLESTRING 26    // alias
-#define V_ASN1_GENERALSTRING 27    //
-#define V_ASN1_UNIVERSALSTRING 28  //
+#define V_ASN1_GENERALIZEDTIME 24
+#define V_ASN1_GRAPHICSTRING 25
+#define V_ASN1_ISO64STRING 26
+#define V_ASN1_VISIBLESTRING 26
+#define V_ASN1_GENERALSTRING 27
+#define V_ASN1_UNIVERSALSTRING 28
 #define V_ASN1_BMPSTRING 30
 
+// The following constants are used for |ASN1_STRING| values that represent
+// negative INTEGER and ENUMERATED values. See |ASN1_STRING| for more details.
+#define V_ASN1_NEG 0x100
+#define V_ASN1_NEG_INTEGER (V_ASN1_INTEGER | V_ASN1_NEG)
+#define V_ASN1_NEG_ENUMERATED (V_ASN1_ENUMERATED | V_ASN1_NEG)
+
+
+// Strings.
+//
+// ASN.1 contains a myriad of string types, as well as types that contain data
+// that may be encoded into a string. This library uses a single type,
+// |ASN1_STRING|, to represent most values.
+
+// An asn1_string_st (aka |ASN1_STRING|) represents a value of a string-like
+// ASN.1 type. It contains a type field, and a byte string data field with a
+// type-specific representation.
+//
+// When representing a string value, the type field is one of
+// |V_ASN1_OCTET_STRING|, |V_ASN1_UTF8STRING|, |V_ASN1_NUMERICSTRING|,
+// |V_ASN1_PRINTABLESTRING|, |V_ASN1_T61STRING|, |V_ASN1_VIDEOTEXSTRING|,
+// |V_ASN1_IA5STRING|, |V_ASN1_GRAPHICSTRING|, |V_ASN1_ISO64STRING|,
+// |V_ASN1_VISIBLESTRING|, |V_ASN1_GENERALSTRING|, |V_ASN1_UNIVERSALSTRING|, or
+// |V_ASN1_BMPSTRING|. The data contains the byte representation of of the
+// string.
+//
+// When representing a BIT STRING value, the type field is |V_ASN1_BIT_STRING|.
+// The data contains the encoded form of the BIT STRING, including any padding
+// bits added to round to a whole number of bytes, but excluding the leading
+// byte containing the number of padding bits. The number of padding bits is
+// encoded in the flags field. See |ASN1_STRING_FLAG_BITS_LEFT| for details. For
+// example, DER encodes the BIT STRING {1, 0} as {0x06, 0x80 = 0b10_000000}. The
+// |ASN1_STRING| representation has data of {0x80} and flags of
+// ASN1_STRING_FLAG_BITS_LEFT | 6.
+//
+// When representing an INTEGER or ENUMERATED value, the data contains the
+// big-endian encoding of the absolute value of the integer. The sign bit is
+// encoded in the type: non-negative values have a type of |V_ASN1_INTEGER| or
+// |V_ASN1_ENUMERATED|, while negative values have a type of
+// |V_ASN1_NEG_INTEGER| or |V_ASN1_NEG_ENUMERATED|. Note this differs from DER's
+// two's complement representation.
+//
+// When representing a GeneralizedTime or UTCTime value, the type field is
+// |V_ASN1_GENERALIZEDTIME| or |V_ASN1_UTCTIME|, respectively. The data contains
+// the DER encoding of the value. For example, the UNIX epoch would be
+// "19700101000000Z" for a GeneralizedTime and "700101000000Z" for a UTCTime.
+//
+// TODO(davidben): |ASN1_TYPE| additionally uses |ASN1_STRING| to represent
+// various other odd cases. It also likes to assume unknown universal tags are
+// string types. Make a note here when documenting |ASN1_TYPE|.
+//
+// |ASN1_STRING| additionally has the following typedefs: |ASN1_BIT_STRING|,
+// |ASN1_BMPSTRING|, |ASN1_ENUMERATED|, |ASN1_GENERALIZEDTIME|,
+// |ASN1_GENERALSTRING|, |ASN1_IA5STRING|, |ASN1_INTEGER|, |ASN1_OCTET_STRING|,
+// |ASN1_PRINTABLESTRING|, |ASN1_T61STRING|, |ASN1_TIME|,
+// |ASN1_UNIVERSALSTRING|, |ASN1_UTCTIME|, |ASN1_UTF8STRING|, and
+// |ASN1_VISIBLESTRING|. Other than |ASN1_TIME|, these correspond to universal
+// ASN.1 types. |ASN1_TIME| represents a CHOICE of UTCTime and GeneralizedTime,
+// with a cutoff of 2049, as used in Section 4.1.2.5 of RFC 5280.
+//
+// For clarity, callers are encouraged to use the appropriate typedef when
+// available. They are the same type as |ASN1_STRING|, so a caller may freely
+// pass them into functions expecting |ASN1_STRING|, such as
+// |ASN1_STRING_length|.
+//
+// If a function returns an |ASN1_STRING| where the typedef or ASN.1 structure
+// implies constraints on the type field, callers may assume that the type field
+// is correct. However, if a function takes an |ASN1_STRING| as input, callers
+// must ensure the type field matches. These invariants are not captured by the
+// C type system and may not be checked at runtime. For example, callers may
+// assume the output of |X509_get0_serialNumber| has type |V_ASN1_INTEGER| or
+// |V_ASN1_NEG_INTEGER|. Callers must not pass a string of type
+// |V_ASN1_OCTET_STRING| to |X509_set_serialNumber|. Doing so may break
+// invariants on the |X509| object and break the |X509_get0_serialNumber|
+// invariant.
+//
+// TODO(davidben): This is very unfriendly. Getting the type field wrong should
+// not cause memory errors, but it may do strange things. We should add runtime
+// checks to anything that consumes |ASN1_STRING|s from the caller.
+struct asn1_string_st {
+  int length;
+  int type;
+  unsigned char *data;
+  long flags;
+};
+
+// ASN1_STRING_FLAG_BITS_LEFT indicates, in a BIT STRING |ASN1_STRING|, that
+// flags & 0x7 contains the number of padding bits added to the BIT STRING
+// value. When not set, all trailing zero bits in the last byte are implicitly
+// treated as padding. This behavior is deprecated and should not be used.
+#define ASN1_STRING_FLAG_BITS_LEFT 0x08
+
+// ASN1_STRING_FLAG_MSTRING indicates that the |ASN1_STRING| is an MSTRING type,
+// which is how this library refers to a CHOICE type of several string types.
+// For example, DirectoryString as defined in RFC5280.
+//
+// TODO(davidben): This is only used in one place within the library and is easy
+// to accidentally drop. Can it be removed?
+#define ASN1_STRING_FLAG_MSTRING 0x040
+
+// ASN1_STRING_type_new returns a newly-allocated empty |ASN1_STRING| object of
+// type |type|, or NULL on error.
+OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_type_new(int type);
+
+// ASN1_STRING_new returns a newly-allocated empty |ASN1_STRING| object with an
+// arbitrary type. Prefer one of the type-specific constructors, such as
+// |ASN1_OCTET_STRING_new|, or |ASN1_STRING_type_new|.
+OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_new(void);
+
+// ASN1_STRING_free releases memory associated with |str|.
+OPENSSL_EXPORT void ASN1_STRING_free(ASN1_STRING *str);
+
+// ASN1_STRING_copy sets |dst| to a copy of |str|. It returns one on success and
+// zero on error.
+OPENSSL_EXPORT int ASN1_STRING_copy(ASN1_STRING *dst, const ASN1_STRING *str);
+
+// ASN1_STRING_dup returns a newly-allocated copy of |str|, or NULL on error.
+OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_dup(const ASN1_STRING *str);
+
+// ASN1_STRING_type returns the type of |str|. This value will be one of the
+// |V_ASN1_*| constants.
+OPENSSL_EXPORT int ASN1_STRING_type(const ASN1_STRING *str);
+
+// ASN1_STRING_get0_data returns a pointer to |str|'s contents. Callers should
+// use |ASN1_STRING_length| to determine the length of the string. The string
+// may have embedded NUL bytes and may not be NUL-terminated.
+OPENSSL_EXPORT const unsigned char *ASN1_STRING_get0_data(
+    const ASN1_STRING *str);
+
+// ASN1_STRING_data returns a mutable pointer to |str|'s contents. Callers
+// should use |ASN1_STRING_length| to determine the length of the string. The
+// string may have embedded NUL bytes and may not be NUL-terminated.
+//
+// Prefer |ASN1_STRING_get0_data|.
+OPENSSL_EXPORT unsigned char *ASN1_STRING_data(ASN1_STRING *str);
+
+// ASN1_STRING_length returns the length of |str|, in bytes.
+OPENSSL_EXPORT int ASN1_STRING_length(const ASN1_STRING *str);
+
+// ASN1_STRING_cmp compares |a| and |b|'s type and contents. It returns an
+// integer equal to, less than, or greater than zero if |a| is equal to, less
+// than, or greater than |b|, respectively. The comparison is suitable for
+// sorting, but callers should not rely on the particular comparison.
+//
+// Note if |a| or |b| are BIT STRINGs, this function does not compare the
+// |ASN1_STRING_FLAG_BITS_LEFT| flags.
+//
+// TODO(davidben): The BIT STRING comparison seems like a bug. Fix it?
+OPENSSL_EXPORT int ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b);
+
+// ASN1_STRING_set sets the contents of |str| to a copy of |len| bytes from
+// |data|. It returns one on success and zero on error.
+OPENSSL_EXPORT int ASN1_STRING_set(ASN1_STRING *str, const void *data, int len);
+
+// ASN1_STRING_set0 sets the contents of |str| to |len| bytes from |data|. It
+// takes ownership of |data|, which must have been allocated with
+// |OPENSSL_malloc|.
+OPENSSL_EXPORT void ASN1_STRING_set0(ASN1_STRING *str, void *data, int len);
+
+// TODO(davidben): Pull up and document functions specific to individual string
+// types.
+
+
+// Underdocumented functions.
+//
+// The following functions are not yet documented and organized.
+
 // For use with d2i_ASN1_type_bytes()
 #define B_ASN1_NUMERICSTRING 0x0001
 #define B_ASN1_PRINTABLESTRING 0x0002
@@ -177,23 +366,6 @@
 
 DEFINE_STACK_OF(ASN1_OBJECT)
 
-#define ASN1_STRING_FLAG_BITS_LEFT 0x08  // Set if 0x07 has bits left value
-
-// This flag is used by ASN1 code to indicate an ASN1_STRING is an MSTRING
-// type.
-#define ASN1_STRING_FLAG_MSTRING 0x040
-// This is the base type that holds just about everything :-)
-struct asn1_string_st {
-  int length;
-  int type;
-  unsigned char *data;
-  // The value of the following field depends on the type being
-  // held.  It is mostly being used for BIT_STRING so if the
-  // input data has a non-zero 'unused bits' value, it will be
-  // handled correctly
-  long flags;
-};
-
 // ASN1_ENCODING structure: this is used to save the received
 // encoding of an ASN1 type. This is useful to get round
 // problems with invalid encodings which can break signatures.
@@ -543,60 +715,6 @@
 
 DECLARE_ASN1_SET_OF(ASN1_OBJECT)
 
-// ASN1_STRING_new returns a newly-allocated empty |ASN1_STRING| object with an
-// arbitrary type. Prefer one of the type-specific constructors, such as
-// |ASN1_OCTET_STRING_new|.
-OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_new(void);
-
-// ASN1_STRING_free releases memory associated with |str|.
-OPENSSL_EXPORT void ASN1_STRING_free(ASN1_STRING *str);
-
-// ASN1_STRING_copy sets |dst| to a copy of |str|. It returns one on success and
-// zero on error.
-OPENSSL_EXPORT int ASN1_STRING_copy(ASN1_STRING *dst, const ASN1_STRING *str);
-
-// ASN1_STRING_dup returns a newly-allocated copy of |str|, or NULL on error.
-OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_dup(const ASN1_STRING *str);
-
-// ASN1_STRING_type_new returns a newly-allocated empty |ASN1_STRING| object of
-// type |type|, or NULL on error.
-OPENSSL_EXPORT ASN1_STRING *ASN1_STRING_type_new(int type);
-
-// ASN1_STRING_cmp compares |a| and |b|'s type and contents. It returns an
-// integer equal to, less than, or greater than zero if |a| is equal to, less
-// than, or greater than |b|, respectively. The comparison is suitable for
-// sorting, but callers should not rely on the particular comparison.
-//
-// Note if |a| or |b| are BIT STRINGs, this function does not compare the
-// |ASN1_STRING_FLAG_BITS_LEFT| flags.
-//
-// TODO(davidben): The BIT STRING comparison seems like a bug. Fix it?
-OPENSSL_EXPORT int ASN1_STRING_cmp(const ASN1_STRING *a, const ASN1_STRING *b);
-
-// ASN1_STRING_set sets the contents of |str| to a copy of |len| bytes from
-// |data|. It returns one on success and zero on error.
-OPENSSL_EXPORT int ASN1_STRING_set(ASN1_STRING *str, const void *data, int len);
-
-// ASN1_STRING_set0 sets the contents of |str| to |len| bytes from |data|. It
-// takes ownership of |data|, which must have been allocated with
-// |OPENSSL_malloc|.
-OPENSSL_EXPORT void ASN1_STRING_set0(ASN1_STRING *str, void *data, int len);
-
-// ASN1_STRING_length returns the length of |str|, in bytes.
-OPENSSL_EXPORT int ASN1_STRING_length(const ASN1_STRING *str);
-
-// ASN1_STRING_type returns the type of |str|. This value will be one of the
-// |V_ASN1_*| constants.
-OPENSSL_EXPORT int ASN1_STRING_type(const ASN1_STRING *str);
-
-// ASN1_STRING_data returns a mutable pointer to |str|'s contents. Prefer
-// |ASN1_STRING_get0_data|.
-OPENSSL_EXPORT unsigned char *ASN1_STRING_data(ASN1_STRING *str);
-
-// ASN1_STRING_get0_data returns a pointer to |str|'s contents.
-OPENSSL_EXPORT const unsigned char *ASN1_STRING_get0_data(
-    const ASN1_STRING *str);
-
 DECLARE_ASN1_FUNCTIONS(ASN1_BIT_STRING)
 OPENSSL_EXPORT int i2c_ASN1_BIT_STRING(const ASN1_BIT_STRING *a,
                                        unsigned char **pp);