Bob Beck | bc97b7a | 2023-04-18 08:35:15 -0600 | [diff] [blame] | 1 | // Copyright 2022 The Chromium Authors |
| 2 | // Use of this source code is governed by a BSD-style license that can be |
| 3 | // found in the LICENSE file. |
| 4 | |
| 5 | #include "string_util.h" |
| 6 | |
| 7 | #include <algorithm> |
| 8 | #include <iomanip> |
| 9 | #include <sstream> |
| 10 | #include <string> |
| 11 | |
Bob Beck | 68d6ce3 | 2023-11-22 14:37:02 -0700 | [diff] [blame] | 12 | #include <openssl/base64.h> |
Bob Beck | bc97b7a | 2023-04-18 08:35:15 -0600 | [diff] [blame] | 13 | #include <openssl/mem.h> |
| 14 | |
David Benjamin | 0fbc17a | 2024-08-21 15:13:10 -0400 | [diff] [blame] | 15 | BSSL_NAMESPACE_BEGIN |
| 16 | namespace string_util { |
Bob Beck | bc97b7a | 2023-04-18 08:35:15 -0600 | [diff] [blame] | 17 | |
| 18 | bool IsAscii(std::string_view str) { |
| 19 | for (unsigned char c : str) { |
| 20 | if (c > 127) { |
| 21 | return false; |
| 22 | } |
| 23 | } |
| 24 | return true; |
| 25 | } |
| 26 | |
| 27 | bool IsEqualNoCase(std::string_view str1, std::string_view str2) { |
| 28 | return std::equal(str1.begin(), str1.end(), str2.begin(), str2.end(), |
| 29 | [](const unsigned char a, const unsigned char b) { |
| 30 | return OPENSSL_tolower(a) == OPENSSL_tolower(b); |
| 31 | }); |
| 32 | } |
| 33 | |
| 34 | bool EndsWithNoCase(std::string_view str, std::string_view suffix) { |
| 35 | return suffix.size() <= str.size() && |
| 36 | IsEqualNoCase(suffix, str.substr(str.size() - suffix.size())); |
| 37 | } |
| 38 | |
| 39 | bool StartsWithNoCase(std::string_view str, std::string_view prefix) { |
| 40 | return prefix.size() <= str.size() && |
| 41 | IsEqualNoCase(prefix, str.substr(0, prefix.size())); |
| 42 | } |
| 43 | |
Bob Beck | 5c7a2a0 | 2023-11-20 17:28:21 -0700 | [diff] [blame] | 44 | std::string FindAndReplace(std::string_view str, std::string_view find, |
Bob Beck | bc97b7a | 2023-04-18 08:35:15 -0600 | [diff] [blame] | 45 | std::string_view replace) { |
| 46 | std::string ret; |
| 47 | |
| 48 | if (find.empty()) { |
| 49 | return std::string(str); |
| 50 | } |
| 51 | while (!str.empty()) { |
| 52 | size_t index = str.find(find); |
| 53 | if (index == std::string_view::npos) { |
| 54 | ret.append(str); |
| 55 | break; |
| 56 | } |
| 57 | ret.append(str.substr(0, index)); |
| 58 | ret.append(replace); |
| 59 | str = str.substr(index + find.size()); |
| 60 | } |
| 61 | return ret; |
| 62 | } |
| 63 | |
| 64 | // TODO(bbe) get rid of this once we can c++20. |
| 65 | bool EndsWith(std::string_view str, std::string_view suffix) { |
| 66 | return suffix.size() <= str.size() && |
| 67 | suffix == str.substr(str.size() - suffix.size()); |
| 68 | } |
| 69 | |
| 70 | // TODO(bbe) get rid of this once we can c++20. |
| 71 | bool StartsWith(std::string_view str, std::string_view prefix) { |
| 72 | return prefix.size() <= str.size() && prefix == str.substr(0, prefix.size()); |
| 73 | } |
| 74 | |
David Benjamin | 90ceeb0 | 2024-01-23 14:25:39 -0500 | [diff] [blame] | 75 | std::string HexEncode(Span<const uint8_t> data) { |
Bob Beck | bc97b7a | 2023-04-18 08:35:15 -0600 | [diff] [blame] | 76 | std::ostringstream out; |
David Benjamin | 90ceeb0 | 2024-01-23 14:25:39 -0500 | [diff] [blame] | 77 | for (uint8_t b : data) { |
Bob Beck | bc97b7a | 2023-04-18 08:35:15 -0600 | [diff] [blame] | 78 | out << std::hex << std::setfill('0') << std::setw(2) << std::uppercase |
David Benjamin | 90ceeb0 | 2024-01-23 14:25:39 -0500 | [diff] [blame] | 79 | << int{b}; |
Bob Beck | bc97b7a | 2023-04-18 08:35:15 -0600 | [diff] [blame] | 80 | } |
| 81 | return out.str(); |
| 82 | } |
| 83 | |
| 84 | // TODO(bbe) get rid of this once extracted to boringssl. Everything else |
| 85 | // in third_party uses std::to_string |
| 86 | std::string NumberToDecimalString(int i) { |
| 87 | std::ostringstream out; |
| 88 | out << std::dec << i; |
| 89 | return out.str(); |
| 90 | } |
| 91 | |
| 92 | std::vector<std::string_view> SplitString(std::string_view str, |
| 93 | char split_char) { |
| 94 | std::vector<std::string_view> out; |
| 95 | |
| 96 | if (str.empty()) { |
| 97 | return out; |
| 98 | } |
| 99 | |
| 100 | while (true) { |
| 101 | // Find end of current token |
| 102 | size_t i = str.find(split_char); |
| 103 | |
| 104 | // Add current token |
| 105 | out.push_back(str.substr(0, i)); |
| 106 | |
| 107 | if (i == str.npos) { |
| 108 | // That was the last token |
| 109 | break; |
| 110 | } |
| 111 | // Continue to next |
| 112 | str = str.substr(i + 1); |
| 113 | } |
| 114 | |
| 115 | return out; |
| 116 | } |
| 117 | |
Bob Beck | 4102470 | 2023-11-22 14:03:35 -0700 | [diff] [blame] | 118 | static bool IsUnicodeWhitespace(char c) { |
| 119 | return c == 9 || c == 10 || c == 11 || c == 12 || c == 13 || c == ' '; |
| 120 | } |
| 121 | |
| 122 | std::string CollapseWhitespaceASCII(std::string_view text, |
| 123 | bool trim_sequences_with_line_breaks) { |
| 124 | std::string result; |
| 125 | result.resize(text.size()); |
| 126 | |
| 127 | // Set flags to pretend we're already in a trimmed whitespace sequence, so we |
| 128 | // will trim any leading whitespace. |
| 129 | bool in_whitespace = true; |
| 130 | bool already_trimmed = true; |
| 131 | |
| 132 | int chars_written = 0; |
| 133 | for (auto i = text.begin(); i != text.end(); ++i) { |
| 134 | if (IsUnicodeWhitespace(*i)) { |
| 135 | if (!in_whitespace) { |
| 136 | // Reduce all whitespace sequences to a single space. |
| 137 | in_whitespace = true; |
| 138 | result[chars_written++] = L' '; |
| 139 | } |
| 140 | if (trim_sequences_with_line_breaks && !already_trimmed && |
| 141 | ((*i == '\n') || (*i == '\r'))) { |
| 142 | // Whitespace sequences containing CR or LF are eliminated entirely. |
| 143 | already_trimmed = true; |
| 144 | --chars_written; |
| 145 | } |
| 146 | } else { |
| 147 | // Non-whitespace chracters are copied straight across. |
| 148 | in_whitespace = false; |
| 149 | already_trimmed = false; |
| 150 | result[chars_written++] = *i; |
| 151 | } |
| 152 | } |
| 153 | |
| 154 | if (in_whitespace && !already_trimmed) { |
| 155 | // Any trailing whitespace is eliminated. |
| 156 | --chars_written; |
| 157 | } |
| 158 | |
| 159 | result.resize(chars_written); |
| 160 | return result; |
| 161 | } |
| 162 | |
Bob Beck | 68d6ce3 | 2023-11-22 14:37:02 -0700 | [diff] [blame] | 163 | bool Base64Encode(const std::string_view &input, std::string *output) { |
| 164 | size_t len; |
| 165 | if (!EVP_EncodedLength(&len, input.size())) { |
| 166 | return false; |
| 167 | } |
| 168 | std::vector<char> encoded(len); |
| 169 | len = EVP_EncodeBlock(reinterpret_cast<uint8_t *>(encoded.data()), |
| 170 | reinterpret_cast<const uint8_t *>(input.data()), |
| 171 | input.size()); |
| 172 | if (!len) { |
| 173 | return false; |
| 174 | } |
| 175 | output->assign(encoded.data(), len); |
| 176 | return true; |
| 177 | } |
| 178 | |
| 179 | bool Base64Decode(const std::string_view &input, std::string *output) { |
| 180 | size_t len; |
| 181 | if (!EVP_DecodedLength(&len, input.size())) { |
| 182 | return false; |
| 183 | } |
| 184 | std::vector<char> decoded(len); |
| 185 | if (!EVP_DecodeBase64(reinterpret_cast<uint8_t *>(decoded.data()), &len, len, |
| 186 | reinterpret_cast<const uint8_t *>(input.data()), |
| 187 | input.size())) { |
| 188 | return false; |
| 189 | } |
| 190 | output->assign(decoded.data(), len); |
| 191 | return true; |
| 192 | } |
| 193 | |
David Benjamin | 0fbc17a | 2024-08-21 15:13:10 -0400 | [diff] [blame] | 194 | } // namespace string_util |
| 195 | BSSL_NAMESPACE_END |