|  | // Copyright 2018 The BoringSSL Authors | 
|  | // | 
|  | // Licensed under the Apache License, Version 2.0 (the "License"); | 
|  | // you may not use this file except in compliance with the License. | 
|  | // You may obtain a copy of the License at | 
|  | // | 
|  | //     https://www.apache.org/licenses/LICENSE-2.0 | 
|  | // | 
|  | // Unless required by applicable law or agreed to in writing, software | 
|  | // distributed under the License is distributed on an "AS IS" BASIS, | 
|  | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. | 
|  | // See the License for the specific language governing permissions and | 
|  | // limitations under the License. | 
|  |  | 
|  | #include <openssl/bytestring.h> | 
|  |  | 
|  | #include "internal.h" | 
|  |  | 
|  |  | 
|  | static int is_valid_code_point(uint32_t v) { | 
|  | // References in the following are to Unicode 15.0.0. | 
|  | if (// The Unicode space runs from zero to 0x10ffff (3.4 D9). | 
|  | v > 0x10ffff || | 
|  | // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved | 
|  | // as noncharacters (3.4 D14). See also 23.7. As our APIs are intended for | 
|  | // "open interchange", such as ASN.1, we reject them. | 
|  | (v & 0xfffe) == 0xfffe || | 
|  | (v >= 0xfdd0 && v <= 0xfdef) || | 
|  | // Surrogate code points are invalid (3.2 C1). | 
|  | (v >= 0xd800 && v <= 0xdfff)) { | 
|  | return 0; | 
|  | } | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | // BOTTOM_BITS returns a byte with the bottom |n| bits set. | 
|  | #define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1) | 
|  |  | 
|  | // TOP_BITS returns a byte with the top |n| bits set. | 
|  | #define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n))) | 
|  |  | 
|  | int CBS_get_utf8(CBS *cbs, uint32_t *out) { | 
|  | uint8_t c; | 
|  | if (!CBS_get_u8(cbs, &c)) { | 
|  | return 0; | 
|  | } | 
|  | if (c <= 0x7f) { | 
|  | *out = c; | 
|  | return 1; | 
|  | } | 
|  | uint32_t v, lower_bound; | 
|  | size_t len; | 
|  | if ((c & TOP_BITS(3)) == TOP_BITS(2)) { | 
|  | v = c & BOTTOM_BITS(5); | 
|  | len = 1; | 
|  | lower_bound = 0x80; | 
|  | } else if ((c & TOP_BITS(4)) == TOP_BITS(3)) { | 
|  | v = c & BOTTOM_BITS(4); | 
|  | len = 2; | 
|  | lower_bound = 0x800; | 
|  | } else if ((c & TOP_BITS(5)) == TOP_BITS(4)) { | 
|  | v = c & BOTTOM_BITS(3); | 
|  | len = 3; | 
|  | lower_bound = 0x10000; | 
|  | } else { | 
|  | return 0; | 
|  | } | 
|  | for (size_t i = 0; i < len; i++) { | 
|  | if (!CBS_get_u8(cbs, &c) || | 
|  | (c & TOP_BITS(2)) != TOP_BITS(1)) { | 
|  | return 0; | 
|  | } | 
|  | v <<= 6; | 
|  | v |= c & BOTTOM_BITS(6); | 
|  | } | 
|  | if (!is_valid_code_point(v) || | 
|  | v < lower_bound) { | 
|  | return 0; | 
|  | } | 
|  | *out = v; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | int CBS_get_latin1(CBS *cbs, uint32_t *out) { | 
|  | uint8_t c; | 
|  | if (!CBS_get_u8(cbs, &c)) { | 
|  | return 0; | 
|  | } | 
|  | *out = c; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | int CBS_get_ucs2_be(CBS *cbs, uint32_t *out) { | 
|  | // Note UCS-2 (used by BMPString) does not support surrogates. | 
|  | uint16_t c; | 
|  | if (!CBS_get_u16(cbs, &c) || | 
|  | !is_valid_code_point(c)) { | 
|  | return 0; | 
|  | } | 
|  | *out = c; | 
|  | return 1; | 
|  | } | 
|  |  | 
|  | int CBS_get_utf32_be(CBS *cbs, uint32_t *out) { | 
|  | return CBS_get_u32(cbs, out) && is_valid_code_point(*out); | 
|  | } | 
|  |  | 
|  | size_t CBB_get_utf8_len(uint32_t u) { | 
|  | if (u <= 0x7f) { | 
|  | return 1; | 
|  | } | 
|  | if (u <= 0x7ff) { | 
|  | return 2; | 
|  | } | 
|  | if (u <= 0xffff) { | 
|  | return 3; | 
|  | } | 
|  | return 4; | 
|  | } | 
|  |  | 
|  | int CBB_add_utf8(CBB *cbb, uint32_t u) { | 
|  | if (!is_valid_code_point(u)) { | 
|  | return 0; | 
|  | } | 
|  | if (u <= 0x7f) { | 
|  | return CBB_add_u8(cbb, (uint8_t)u); | 
|  | } | 
|  | if (u <= 0x7ff) { | 
|  | return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) && | 
|  | CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); | 
|  | } | 
|  | if (u <= 0xffff) { | 
|  | return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) && | 
|  | CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) && | 
|  | CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); | 
|  | } | 
|  | if (u <= 0x10ffff) { | 
|  | return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) && | 
|  | CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) && | 
|  | CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) && | 
|  | CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); | 
|  | } | 
|  | return 0; | 
|  | } | 
|  |  | 
|  | int CBB_add_latin1(CBB *cbb, uint32_t u) { | 
|  | return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u); | 
|  | } | 
|  |  | 
|  | int CBB_add_ucs2_be(CBB *cbb, uint32_t u) { | 
|  | return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u); | 
|  | } | 
|  |  | 
|  | int CBB_add_utf32_be(CBB *cbb, uint32_t u) { | 
|  | return is_valid_code_point(u) && CBB_add_u32(cbb, u); | 
|  | } |