| /* Copyright (c) 2018, Google Inc. | 
 |  * | 
 |  * Permission to use, copy, modify, and/or distribute this software for any | 
 |  * purpose with or without fee is hereby granted, provided that the above | 
 |  * copyright notice and this permission notice appear in all copies. | 
 |  * | 
 |  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES | 
 |  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF | 
 |  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY | 
 |  * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES | 
 |  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION | 
 |  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN | 
 |  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ | 
 |  | 
 | #include <openssl/bytestring.h> | 
 |  | 
 | #include "internal.h" | 
 |  | 
 |  | 
 | static int is_valid_code_point(uint32_t v) { | 
 |   // References in the following are to Unicode 9.0.0. | 
 |   if (// The Unicode space runs from zero to 0x10ffff (3.4 D9). | 
 |       v > 0x10ffff || | 
 |       // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved | 
 |       // (3.4 D14) | 
 |       (v & 0xfffe) == 0xfffe || | 
 |       (v >= 0xfdd0 && v <= 0xfdef) || | 
 |       // Surrogate code points are invalid (3.2 C1). | 
 |       (v >= 0xd800 && v <= 0xdfff)) { | 
 |     return 0; | 
 |   } | 
 |   return 1; | 
 | } | 
 |  | 
 | // BOTTOM_BITS returns a byte with the bottom |n| bits set. | 
 | #define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1) | 
 |  | 
 | // TOP_BITS returns a byte with the top |n| bits set. | 
 | #define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n))) | 
 |  | 
 | int cbs_get_utf8(CBS *cbs, uint32_t *out) { | 
 |   uint8_t c; | 
 |   if (!CBS_get_u8(cbs, &c)) { | 
 |     return 0; | 
 |   } | 
 |   if (c <= 0x7f) { | 
 |     *out = c; | 
 |     return 1; | 
 |   } | 
 |   uint32_t v, lower_bound; | 
 |   size_t len; | 
 |   if ((c & TOP_BITS(3)) == TOP_BITS(2)) { | 
 |     v = c & BOTTOM_BITS(5); | 
 |     len = 1; | 
 |     lower_bound = 0x80; | 
 |   } else if ((c & TOP_BITS(4)) == TOP_BITS(3)) { | 
 |     v = c & BOTTOM_BITS(4); | 
 |     len = 2; | 
 |     lower_bound = 0x800; | 
 |   } else if ((c & TOP_BITS(5)) == TOP_BITS(4)) { | 
 |     v = c & BOTTOM_BITS(3); | 
 |     len = 3; | 
 |     lower_bound = 0x10000; | 
 |   } else { | 
 |     return 0; | 
 |   } | 
 |   for (size_t i = 0; i < len; i++) { | 
 |     if (!CBS_get_u8(cbs, &c) || | 
 |         (c & TOP_BITS(2)) != TOP_BITS(1)) { | 
 |       return 0; | 
 |     } | 
 |     v <<= 6; | 
 |     v |= c & BOTTOM_BITS(6); | 
 |   } | 
 |   if (!is_valid_code_point(v) || | 
 |       v < lower_bound) { | 
 |     return 0; | 
 |   } | 
 |   *out = v; | 
 |   return 1; | 
 | } | 
 |  | 
 | int cbs_get_latin1(CBS *cbs, uint32_t *out) { | 
 |   uint8_t c; | 
 |   if (!CBS_get_u8(cbs, &c)) { | 
 |     return 0; | 
 |   } | 
 |   *out = c; | 
 |   return 1; | 
 | } | 
 |  | 
 | int cbs_get_ucs2_be(CBS *cbs, uint32_t *out) { | 
 |   // Note UCS-2 (used by BMPString) does not support surrogates. | 
 |   uint16_t c; | 
 |   if (!CBS_get_u16(cbs, &c) || | 
 |       !is_valid_code_point(c)) { | 
 |     return 0; | 
 |   } | 
 |   *out = c; | 
 |   return 1; | 
 | } | 
 |  | 
 | int cbs_get_utf32_be(CBS *cbs, uint32_t *out) { | 
 |   return CBS_get_u32(cbs, out) && is_valid_code_point(*out); | 
 | } | 
 |  | 
 | size_t cbb_get_utf8_len(uint32_t u) { | 
 |   if (u <= 0x7f) { | 
 |     return 1; | 
 |   } | 
 |   if (u <= 0x7ff) { | 
 |     return 2; | 
 |   } | 
 |   if (u <= 0xffff) { | 
 |     return 3; | 
 |   } | 
 |   return 4; | 
 | } | 
 |  | 
 | int cbb_add_utf8(CBB *cbb, uint32_t u) { | 
 |   if (!is_valid_code_point(u)) { | 
 |     return 0; | 
 |   } | 
 |   if (u <= 0x7f) { | 
 |     return CBB_add_u8(cbb, (uint8_t)u); | 
 |   } | 
 |   if (u <= 0x7ff) { | 
 |     return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); | 
 |   } | 
 |   if (u <= 0xffff) { | 
 |     return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); | 
 |   } | 
 |   if (u <= 0x10ffff) { | 
 |     return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); | 
 |   } | 
 |   return 0; | 
 | } | 
 |  | 
 | int cbb_add_latin1(CBB *cbb, uint32_t u) { | 
 |   return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u); | 
 | } | 
 |  | 
 | int cbb_add_ucs2_be(CBB *cbb, uint32_t u) { | 
 |   return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u); | 
 | } | 
 |  | 
 | int cbb_add_utf32_be(CBB *cbb, uint32_t u) { | 
 |   return is_valid_code_point(u) && CBB_add_u32(cbb, u); | 
 | } |