Align libpki's PEM parser with crypto/pem a bit

Both our crypto/pem and Go's encoding/pem tolerate whitespace in the
middle of the line, and also only treat ' ', '\t', '\r', and '\n' as
whitespace. Align the libpki one with crypto/pem.

As part of this, remove the overcomplicated CollapseWhitespaceASCII
function. That was seemingly adapted from Chromium //base, where it is
used for a different purpose.

Update-Note: libpki's PEMTokenizer now allows whitespace in the middle
of a line, matching other implementations, and disallows '\v' and '\f',
also matching other implementations. This is not expected to have any
real impact.

Change-Id: I349dc86762e5e1c184448b6170e4113579f2c43f
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/94928
Presubmit-BoringSSL-Verified: boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com <boringssl-scoped@luci-project-accounts.iam.gserviceaccount.com>
Auto-Submit: David Benjamin <davidben@google.com>
Reviewed-by: Rudolf Polzer <rpolzer@google.com>
Commit-Queue: Rudolf Polzer <rpolzer@google.com>
diff --git a/pki/pem.cc b/pki/pem.cc
index d4687a4..54a7595 100644
--- a/pki/pem.cc
+++ b/pki/pem.cc
@@ -81,10 +81,18 @@
       pos_ = footer_pos + it->footer.size();
       block_type_ = it->type;
 
+      // Remove whitespace from the base64 data.
       std::string_view encoded =
           str_.substr(data_begin, footer_pos - data_begin);
-      if (!string_util::Base64Decode(
-              string_util::CollapseWhitespaceASCII(encoded, true), &data_)) {
+      std::string trimmed;
+      trimmed.reserve(encoded.size());
+      for (char c : encoded) {
+        if (c != '\n' && c != '\r' && c != ' ' && c != '\t') {
+          trimmed.push_back(c);
+        }
+      }
+
+      if (!string_util::Base64Decode(trimmed, &data_)) {
         // The most likely cause for a decode failure is a datatype that
         // includes PEM headers, which are not supported.
         break;
diff --git a/pki/pem_unittest.cc b/pki/pem_unittest.cc
index 70242b0..930fa86 100644
--- a/pki/pem_unittest.cc
+++ b/pki/pem_unittest.cc
@@ -59,10 +59,10 @@
   EXPECT_FALSE(tokenizer.GetNext());
 }
 
-TEST(PEMTokenizerTest, WhitespaceAroundLines) {
+TEST(PEMTokenizerTest, IgnoreWhitespace) {
   const char data[] =
       "-----BEGIN EXPECTED-BLOCK-----\n"
-      " \tTWF0Y2 \t\n \t\n \thlc0FjY2VwdGVkQmxvY2tUeXBl \t\n"
+      " \tTWF0Y2 \t\n \t\n \thlc0FjY2Vw   \tdGVkQmxvY2tUeXBl \t\n"
       "-----END EXPECTED-BLOCK-----\n";
   std::string_view string_piece(data);
   std::vector<std::string> accepted_types;
@@ -77,21 +77,6 @@
   EXPECT_FALSE(tokenizer.GetNext());
 }
 
-TEST(PEMTokenizerTest, WhitespaceWithinLines) {
-  // TODO(davidben): crypto/pem accepts this. Should we? It accepts it because
-  // |EVP_ENCODE_CTX| silently skips over whitespace.
-  const char data[] =
-      "-----BEGIN EXPECTED-BLOCK-----\n"
-      "TWF0Y2    hlc0FjY2VwdGVkQmxvY2tUeXBl\n"
-      "-----END EXPECTED-BLOCK-----\n";
-  std::string_view string_piece(data);
-  std::vector<std::string> accepted_types;
-  accepted_types.push_back("EXPECTED-BLOCK");
-
-  PEMTokenizer tokenizer(string_piece, accepted_types);
-  EXPECT_FALSE(tokenizer.GetNext());
-}
-
 TEST(PEMTokenizerTest, InvalidBase64) {
   const char data[] =
       "-----BEGIN EXPECTED-BLOCK-----\n"
diff --git a/pki/string_util.cc b/pki/string_util.cc
index da8dea0..8baa100 100644
--- a/pki/string_util.cc
+++ b/pki/string_util.cc
@@ -125,62 +125,6 @@
   return out;
 }
 
-static bool IsUnicodeWhitespace(char c) {
-  return c == 9 || c == 10 || c == 11 || c == 12 || c == 13 || c == ' ';
-}
-
-std::string CollapseWhitespaceASCII(std::string_view text,
-                                    bool trim_sequences_with_line_breaks) {
-  std::string result;
-  result.resize(text.size());
-
-  // Set flags to pretend we're already in a trimmed whitespace sequence, so we
-  // will trim any leading whitespace.
-  bool in_whitespace = true;
-  bool already_trimmed = true;
-
-  size_t chars_written = 0;
-  for (auto i = text.begin(); i != text.end(); ++i) {
-    if (IsUnicodeWhitespace(*i)) {
-      if (!in_whitespace) {
-        // Reduce all whitespace sequences to a single space.
-        in_whitespace = true;
-        // Can't overflow because |chars_written| is incremented at most once
-        // per |text| character.
-        result[chars_written++] = L' ';
-      }
-      if (trim_sequences_with_line_breaks && !already_trimmed &&
-          ((*i == '\n') || (*i == '\r'))) {
-        // Whitespace sequences containing CR or LF are eliminated entirely.
-        already_trimmed = true;
-        // Can't underflow because getting here requires |already_trimmed| to
-        // be false, which is only ever set when |chars_written| is
-        // incremented.
-        --chars_written;
-      }
-    } else {
-      // Non-whitespace characters are copied straight across.
-      in_whitespace = false;
-      already_trimmed = false;
-      // Can't overflow because |chars_written| is incremented at most once per
-      // |text| character.
-      result[chars_written++] = *i;
-    }
-  }
-
-  if (in_whitespace && !already_trimmed) {
-    // Any trailing whitespace is eliminated.
-    //
-    // Can't underflow because getting here requires |already_trimmed| to
-    // be false, which is only ever set when |chars_written| is
-    // incremented.
-    --chars_written;
-  }
-
-  result.resize(chars_written);
-  return result;
-}
-
 bool Base64Encode(const std::string_view &input, std::string *output) {
   size_t len;
   if (!EVP_EncodedLength(&len, input.size())) {
diff --git a/pki/string_util.h b/pki/string_util.h
index ff0569c..85639a7 100644
--- a/pki/string_util.h
+++ b/pki/string_util.h
@@ -67,10 +67,6 @@
 OPENSSL_EXPORT std::vector<std::string_view> SplitString(std::string_view str,
                                                          char split_char);
 
-// Collapess whitespace in |text| to a single space and returns the result.
-OPENSSL_EXPORT std::string CollapseWhitespaceASCII(
-    std::string_view text, bool trim_sequences_with_line_breaks);
-
 // Base64 encodes |input| into |output| returning true on success,
 // false otherwise.
 OPENSSL_EXPORT bool Base64Encode(const std::string_view &input,