| /* Copyright (c) 2018, Google Inc. |
| * |
| * Permission to use, copy, modify, and/or distribute this software for any |
| * purpose with or without fee is hereby granted, provided that the above |
| * copyright notice and this permission notice appear in all copies. |
| * |
| * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES |
| * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF |
| * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY |
| * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES |
| * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION |
| * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN |
| * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ |
| |
| #include <openssl/bytestring.h> |
| |
| #include "internal.h" |
| |
| |
| static int is_valid_code_point(uint32_t v) { |
| // References in the following are to Unicode 9.0.0. |
| if (// The Unicode space runs from zero to 0x10ffff (3.4 D9). |
| v > 0x10ffff || |
| // Values 0x...fffe, 0x...ffff, and 0xfdd0-0xfdef are permanently reserved |
| // (3.4 D14) |
| (v & 0xfffe) == 0xfffe || |
| (v >= 0xfdd0 && v <= 0xfdef) || |
| // Surrogate code points are invalid (3.2 C1). |
| (v >= 0xd800 && v <= 0xdfff)) { |
| return 0; |
| } |
| return 1; |
| } |
| |
| // BOTTOM_BITS returns a byte with the bottom |n| bits set. |
| #define BOTTOM_BITS(n) (uint8_t)((1u << (n)) - 1) |
| |
| // TOP_BITS returns a byte with the top |n| bits set. |
| #define TOP_BITS(n) ((uint8_t)~BOTTOM_BITS(8 - (n))) |
| |
| int cbs_get_utf8(CBS *cbs, uint32_t *out) { |
| uint8_t c; |
| if (!CBS_get_u8(cbs, &c)) { |
| return 0; |
| } |
| if (c <= 0x7f) { |
| *out = c; |
| return 1; |
| } |
| uint32_t v, lower_bound; |
| size_t len; |
| if ((c & TOP_BITS(3)) == TOP_BITS(2)) { |
| v = c & BOTTOM_BITS(5); |
| len = 1; |
| lower_bound = 0x80; |
| } else if ((c & TOP_BITS(4)) == TOP_BITS(3)) { |
| v = c & BOTTOM_BITS(4); |
| len = 2; |
| lower_bound = 0x800; |
| } else if ((c & TOP_BITS(5)) == TOP_BITS(4)) { |
| v = c & BOTTOM_BITS(3); |
| len = 3; |
| lower_bound = 0x10000; |
| } else { |
| return 0; |
| } |
| for (size_t i = 0; i < len; i++) { |
| if (!CBS_get_u8(cbs, &c) || |
| (c & TOP_BITS(2)) != TOP_BITS(1)) { |
| return 0; |
| } |
| v <<= 6; |
| v |= c & BOTTOM_BITS(6); |
| } |
| if (!is_valid_code_point(v) || |
| v < lower_bound) { |
| return 0; |
| } |
| *out = v; |
| return 1; |
| } |
| |
| int cbs_get_latin1(CBS *cbs, uint32_t *out) { |
| uint8_t c; |
| if (!CBS_get_u8(cbs, &c)) { |
| return 0; |
| } |
| *out = c; |
| return 1; |
| } |
| |
| int cbs_get_ucs2_be(CBS *cbs, uint32_t *out) { |
| // Note UCS-2 (used by BMPString) does not support surrogates. |
| uint16_t c; |
| if (!CBS_get_u16(cbs, &c) || |
| !is_valid_code_point(c)) { |
| return 0; |
| } |
| *out = c; |
| return 1; |
| } |
| |
| int cbs_get_utf32_be(CBS *cbs, uint32_t *out) { |
| return CBS_get_u32(cbs, out) && is_valid_code_point(*out); |
| } |
| |
| size_t cbb_get_utf8_len(uint32_t u) { |
| if (u <= 0x7f) { |
| return 1; |
| } |
| if (u <= 0x7ff) { |
| return 2; |
| } |
| if (u <= 0xffff) { |
| return 3; |
| } |
| return 4; |
| } |
| |
| int cbb_add_utf8(CBB *cbb, uint32_t u) { |
| if (!is_valid_code_point(u)) { |
| return 0; |
| } |
| if (u <= 0x7f) { |
| return CBB_add_u8(cbb, (uint8_t)u); |
| } |
| if (u <= 0x7ff) { |
| return CBB_add_u8(cbb, TOP_BITS(2) | (u >> 6)) && |
| CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); |
| } |
| if (u <= 0xffff) { |
| return CBB_add_u8(cbb, TOP_BITS(3) | (u >> 12)) && |
| CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) && |
| CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); |
| } |
| if (u <= 0x10ffff) { |
| return CBB_add_u8(cbb, TOP_BITS(4) | (u >> 18)) && |
| CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 12) & BOTTOM_BITS(6))) && |
| CBB_add_u8(cbb, TOP_BITS(1) | ((u >> 6) & BOTTOM_BITS(6))) && |
| CBB_add_u8(cbb, TOP_BITS(1) | (u & BOTTOM_BITS(6))); |
| } |
| return 0; |
| } |
| |
| int cbb_add_latin1(CBB *cbb, uint32_t u) { |
| return u <= 0xff && CBB_add_u8(cbb, (uint8_t)u); |
| } |
| |
| int cbb_add_ucs2_be(CBB *cbb, uint32_t u) { |
| return u <= 0xffff && is_valid_code_point(u) && CBB_add_u16(cbb, (uint16_t)u); |
| } |
| |
| int cbb_add_utf32_be(CBB *cbb, uint32_t u) { |
| return is_valid_code_point(u) && CBB_add_u32(cbb, u); |
| } |