Align libpki's PEM parser with crypto/pem a bit Both our crypto/pem and Go's encoding/pem tolerate whitespace in the middle of the line, and also only treat ' ', '\t', '\r', and '\n' as whitespace. Align the libpki one with crypto/pem. As part of this, remove the overcomplicated CollapseWhitespaceASCII function. That was seemingly adapted from Chromium //base, where it is used for a different purpose. Update-Note: libpki's PEMTokenizer now allows whitespace in the middle of a line, matching other implementations, and disallows '\v' and '\f', also matching other implementations. This is not expected to have any real impact. Change-Id: I349dc86762e5e1c184448b6170e4113579f2c43f Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/94928 Presubmit-BoringSSL-Verified: boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com> Auto-Submit: David Benjamin <davidben@google.com> Reviewed-by: Rudolf Polzer <rpolzer@google.com> Commit-Queue: Rudolf Polzer <rpolzer@google.com>
diff --git a/pki/pem.cc b/pki/pem.cc index d4687a4..54a7595 100644 --- a/pki/pem.cc +++ b/pki/pem.cc
@@ -81,10 +81,18 @@ pos_ = footer_pos + it->footer.size(); block_type_ = it->type; + // Remove whitespace from the base64 data. std::string_view encoded = str_.substr(data_begin, footer_pos - data_begin); - if (!string_util::Base64Decode( - string_util::CollapseWhitespaceASCII(encoded, true), &data_)) { + std::string trimmed; + trimmed.reserve(encoded.size()); + for (char c : encoded) { + if (c != '\n' && c != '\r' && c != ' ' && c != '\t') { + trimmed.push_back(c); + } + } + + if (!string_util::Base64Decode(trimmed, &data_)) { // The most likely cause for a decode failure is a datatype that // includes PEM headers, which are not supported. break;
diff --git a/pki/pem_unittest.cc b/pki/pem_unittest.cc index 70242b0..930fa86 100644 --- a/pki/pem_unittest.cc +++ b/pki/pem_unittest.cc
@@ -59,10 +59,10 @@ EXPECT_FALSE(tokenizer.GetNext()); } -TEST(PEMTokenizerTest, WhitespaceAroundLines) { +TEST(PEMTokenizerTest, IgnoreWhitespace) { const char data[] = "-----BEGIN EXPECTED-BLOCK-----\n" - " \tTWF0Y2 \t\n \t\n \thlc0FjY2VwdGVkQmxvY2tUeXBl \t\n" + " \tTWF0Y2 \t\n \t\n \thlc0FjY2Vw \tdGVkQmxvY2tUeXBl \t\n" "-----END EXPECTED-BLOCK-----\n"; std::string_view string_piece(data); std::vector<std::string> accepted_types; @@ -77,21 +77,6 @@ EXPECT_FALSE(tokenizer.GetNext()); } -TEST(PEMTokenizerTest, WhitespaceWithinLines) { - // TODO(davidben): crypto/pem accepts this. Should we? It accepts it because - // |EVP_ENCODE_CTX| silently skips over whitespace. - const char data[] = - "-----BEGIN EXPECTED-BLOCK-----\n" - "TWF0Y2 hlc0FjY2VwdGVkQmxvY2tUeXBl\n" - "-----END EXPECTED-BLOCK-----\n"; - std::string_view string_piece(data); - std::vector<std::string> accepted_types; - accepted_types.push_back("EXPECTED-BLOCK"); - - PEMTokenizer tokenizer(string_piece, accepted_types); - EXPECT_FALSE(tokenizer.GetNext()); -} - TEST(PEMTokenizerTest, InvalidBase64) { const char data[] = "-----BEGIN EXPECTED-BLOCK-----\n"
diff --git a/pki/string_util.cc b/pki/string_util.cc index da8dea0..8baa100 100644 --- a/pki/string_util.cc +++ b/pki/string_util.cc
@@ -125,62 +125,6 @@ return out; } -static bool IsUnicodeWhitespace(char c) { - return c == 9 || c == 10 || c == 11 || c == 12 || c == 13 || c == ' '; -} - -std::string CollapseWhitespaceASCII(std::string_view text, - bool trim_sequences_with_line_breaks) { - std::string result; - result.resize(text.size()); - - // Set flags to pretend we're already in a trimmed whitespace sequence, so we - // will trim any leading whitespace. - bool in_whitespace = true; - bool already_trimmed = true; - - size_t chars_written = 0; - for (auto i = text.begin(); i != text.end(); ++i) { - if (IsUnicodeWhitespace(*i)) { - if (!in_whitespace) { - // Reduce all whitespace sequences to a single space. - in_whitespace = true; - // Can't overflow because |chars_written| is incremented at most once - // per |text| character. - result[chars_written++] = L' '; - } - if (trim_sequences_with_line_breaks && !already_trimmed && - ((*i == '\n') || (*i == '\r'))) { - // Whitespace sequences containing CR or LF are eliminated entirely. - already_trimmed = true; - // Can't underflow because getting here requires |already_trimmed| to - // be false, which is only ever set when |chars_written| is - // incremented. - --chars_written; - } - } else { - // Non-whitespace characters are copied straight across. - in_whitespace = false; - already_trimmed = false; - // Can't overflow because |chars_written| is incremented at most once per - // |text| character. - result[chars_written++] = *i; - } - } - - if (in_whitespace && !already_trimmed) { - // Any trailing whitespace is eliminated. - // - // Can't underflow because getting here requires |already_trimmed| to - // be false, which is only ever set when |chars_written| is - // incremented. - --chars_written; - } - - result.resize(chars_written); - return result; -} - bool Base64Encode(const std::string_view &input, std::string *output) { size_t len; if (!EVP_EncodedLength(&len, input.size())) {
diff --git a/pki/string_util.h b/pki/string_util.h index ff0569c..85639a7 100644 --- a/pki/string_util.h +++ b/pki/string_util.h
@@ -67,10 +67,6 @@ OPENSSL_EXPORT std::vector<std::string_view> SplitString(std::string_view str, char split_char); -// Collapess whitespace in |text| to a single space and returns the result. -OPENSSL_EXPORT std::string CollapseWhitespaceASCII( - std::string_view text, bool trim_sequences_with_line_breaks); - // Base64 encodes |input| into |output| returning true on success, // false otherwise. OPENSSL_EXPORT bool Base64Encode(const std::string_view &input,