| // Copyright 2018 The BoringSSL Authors | 
 | // | 
 | // Licensed under the Apache License, Version 2.0 (the "License"); | 
 | // you may not use this file except in compliance with the License. | 
 | // You may obtain a copy of the License at | 
 | // | 
 | //     https://www.apache.org/licenses/LICENSE-2.0 | 
 | // | 
 | // Unless required by applicable law or agreed to in writing, software | 
 | // distributed under the License is distributed on an "AS IS" BASIS, | 
 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
 | // See the License for the specific language governing permissions and | 
 | // limitations under the License. | 
 |  | 
 | #include <openssl/bytestring.h> | 
 |  | 
 | #include "internal.h" | 
 |  | 
 |  | 
 | static int is_valid_code_point(uint32_t v) { | 
 |   // References in the following are to Unicode 15.0.0. | 
 |   if (// The Unicode space runs from zero to 0x10ffff (3.4 D9). | 
 |       v > 0x10ffff || | 
 |       // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved | 
 |       // as noncharacters (3.4 D14). See also 23.7. As our APIs are intended for | 
 |       // "open interchange", such as ASN.1, we reject them. | 
 |       (v & 0xfffe) == 0xfffe || | 
 |       (v >= 0xfdd0 && v <= 0xfdef) || | 
 |       // Surrogate code points are invalid (3.2 C1). | 
 |       (v >= 0xd800 && v <= 0xdfff)) { | 
 |     return 0; | 
 |   } | 
 |   return 1; | 
 | } | 
 |  | 
 | // BOTTOM_BITS returns a byte with the bottom |n| bits set. | 
 | #define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1) | 
 |  | 
 | // TOP_BITS returns a byte with the top |n| bits set. | 
 | #define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n))) | 
 |  | 
 | int CBS_get_utf8(CBS *cbs, uint32_t *out) { | 
 |   uint8_t c; | 
 |   if (!CBS_get_u8(cbs, &c)) { | 
 |     return 0; | 
 |   } | 
 |   if (c <= 0x7f) { | 
 |     *out = c; | 
 |     return 1; | 
 |   } | 
 |   uint32_t v, lower_bound; | 
 |   size_t len; | 
 |   if ((c & TOP_BITS(3)) == TOP_BITS(2)) { | 
 |     v = c & BOTTOM_BITS(5); | 
 |     len = 1; | 
 |     lower_bound = 0x80; | 
 |   } else if ((c & TOP_BITS(4)) == TOP_BITS(3)) { | 
 |     v = c & BOTTOM_BITS(4); | 
 |     len = 2; | 
 |     lower_bound = 0x800; | 
 |   } else if ((c & TOP_BITS(5)) == TOP_BITS(4)) { | 
 |     v = c & BOTTOM_BITS(3); | 
 |     len = 3; | 
 |     lower_bound = 0x10000; | 
 |   } else { | 
 |     return 0; | 
 |   } | 
 |   for (size_t i = 0; i < len; i++) { | 
 |     if (!CBS_get_u8(cbs, &c) || | 
 |         (c & TOP_BITS(2)) != TOP_BITS(1)) { | 
 |       return 0; | 
 |     } | 
 |     v <<= 6; | 
 |     v |= c & BOTTOM_BITS(6); | 
 |   } | 
 |   if (!is_valid_code_point(v) || | 
 |       v < lower_bound) { | 
 |     return 0; | 
 |   } | 
 |   *out = v; | 
 |   return 1; | 
 | } | 
 |  | 
 | int CBS_get_latin1(CBS *cbs, uint32_t *out) { | 
 |   uint8_t c; | 
 |   if (!CBS_get_u8(cbs, &c)) { | 
 |     return 0; | 
 |   } | 
 |   *out = c; | 
 |   return 1; | 
 | } | 
 |  | 
 | int CBS_get_ucs2_be(CBS *cbs, uint32_t *out) { | 
 |   // Note UCS-2 (used by BMPString) does not support surrogates. | 
 |   uint16_t c; | 
 |   if (!CBS_get_u16(cbs, &c) || | 
 |       !is_valid_code_point(c)) { | 
 |     return 0; | 
 |   } | 
 |   *out = c; | 
 |   return 1; | 
 | } | 
 |  | 
 | int CBS_get_utf32_be(CBS *cbs, uint32_t *out) { | 
 |   return CBS_get_u32(cbs, out) && is_valid_code_point(*out); | 
 | } | 
 |  | 
 | size_t CBB_get_utf8_len(uint32_t u) { | 
 |   if (u <= 0x7f) { | 
 |     return 1; | 
 |   } | 
 |   if (u <= 0x7ff) { | 
 |     return 2; | 
 |   } | 
 |   if (u <= 0xffff) { | 
 |     return 3; | 
 |   } | 
 |   return 4; | 
 | } | 
 |  | 
 | int CBB_add_utf8(CBB *cbb, uint32_t u) { | 
 |   if (!is_valid_code_point(u)) { | 
 |     return 0; | 
 |   } | 
 |   if (u <= 0x7f) { | 
 |     return CBB_add_u8(cbb, (uint8_t)u); | 
 |   } | 
 |   if (u <= 0x7ff) { | 
 |     return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); | 
 |   } | 
 |   if (u <= 0xffff) { | 
 |     return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); | 
 |   } | 
 |   if (u <= 0x10ffff) { | 
 |     return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) && | 
 |            CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); | 
 |   } | 
 |   return 0; | 
 | } | 
 |  | 
 | int CBB_add_latin1(CBB *cbb, uint32_t u) { | 
 |   return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u); | 
 | } | 
 |  | 
 | int CBB_add_ucs2_be(CBB *cbb, uint32_t u) { | 
 |   return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u); | 
 | } | 
 |  | 
 | int CBB_add_utf32_be(CBB *cbb, uint32_t u) { | 
 |   return is_valid_code_point(u) && CBB_add_u32(cbb, u); | 
 | } |