Test the CONF parser more extensively

Change-Id: I8792f118d281bc7a407dfbabe1c8b8e63f9eed9f
Reviewed-on: https://boringssl-review.googlesource.com/c/boringssl/+/60085
Auto-Submit: David Benjamin <davidben@google.com>
Commit-Queue: Adam Langley <agl@google.com>
Reviewed-by: Adam Langley <agl@google.com>
diff --git a/crypto/conf/conf.c b/crypto/conf/conf.c
index 622c13e..0c05df4 100644
--- a/crypto/conf/conf.c
+++ b/crypto/conf/conf.c
@@ -68,15 +68,8 @@
 #include "conf_def.h"
 #include "internal.h"
 #include "../internal.h"
-#include "../lhash/internal.h"
 
 
-DEFINE_LHASH_OF(CONF_VALUE)
-
-struct conf_st {
-  LHASH_OF(CONF_VALUE) *data;
-};
-
 static const char kDefaultSectionName[] = "default";
 
 // The maximum length we can grow a value to after variable expansion. 64k
diff --git a/crypto/conf/conf_test.cc b/crypto/conf/conf_test.cc
index fe03c5f..f913d1e 100644
--- a/crypto/conf/conf_test.cc
+++ b/crypto/conf/conf_test.cc
@@ -12,8 +12,10 @@
  * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN
  * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */
 
+#include <algorithm>
 #include <string>
 #include <vector>
+#include <map>
 
 #include <openssl/bio.h>
 #include <openssl/conf.h>
@@ -23,29 +25,393 @@
 #include "internal.h"
 
 
-TEST(ConfTest, Parse) {
-  // Check that basic parsing works. (We strongly recommend that people don't
-  // use the [N]CONF functions.)
+// A |CONF| is an unordered list of sections, where each section contains an
+// ordered list of (name, value) pairs.
+using ConfModel =
+    std::map<std::string, std::vector<std::pair<std::string, std::string>>>;
 
-  static const char kConf[] = R"(
-# Comment
+static void ExpectConfEquals(const CONF *conf, const ConfModel &model) {
+  // There is always a default section, even if empty. This is an easy mistake
+  // to make in test data, so test for it.
+  EXPECT_NE(model.find("default"), model.end())
+      << "Model does not have a default section";
+
+  size_t total_values = 0;
+  for (const auto &pair : model) {
+    const std::string &section = pair.first;
+    SCOPED_TRACE(section);
+
+    const STACK_OF(CONF_VALUE) *values =
+        NCONF_get_section(conf, section.c_str());
+    ASSERT_TRUE(values);
+    total_values += pair.second.size();
+
+    EXPECT_EQ(sk_CONF_VALUE_num(values), pair.second.size());
+
+    // If the lengths do not match, still compare up to the smaller of the two,
+    // to aid debugging.
+    size_t min_len = std::min(sk_CONF_VALUE_num(values), pair.second.size());
+    for (size_t i = 0; i < min_len; i++) {
+      SCOPED_TRACE(i);
+      const std::string &name = pair.second[i].first;
+      const std::string &value = pair.second[i].second;
+
+      const CONF_VALUE *v = sk_CONF_VALUE_value(values, i);
+      EXPECT_EQ(v->section, section);
+      EXPECT_EQ(v->name, name);
+      EXPECT_EQ(v->value, value);
+
+      const char *str = NCONF_get_string(conf, section.c_str(), name.c_str());
+      ASSERT_NE(str, nullptr);
+      EXPECT_EQ(str, value);
+
+      if (section == "default") {
+        // nullptr is interpreted as the default section.
+        str = NCONF_get_string(conf, nullptr, name.c_str());
+        ASSERT_NE(str, nullptr);
+        EXPECT_EQ(str, value);
+      }
+    }
+  }
+
+  // Unrecognized sections must return nullptr.
+  EXPECT_EQ(NCONF_get_section(conf, "must_not_appear_in_tests"), nullptr);
+  EXPECT_EQ(NCONF_get_string(conf, "must_not_appear_in_tests",
+                             "must_not_appear_in_tests"),
+            nullptr);
+  if (!model.empty()) {
+    // Valid section, invalid name.
+    EXPECT_EQ(NCONF_get_string(conf, model.begin()->first.c_str(),
+                               "must_not_appear_in_tests"),
+              nullptr);
+    if (!model.begin()->second.empty()) {
+      // Invalid section, valid name.
+      EXPECT_EQ(NCONF_get_string(conf, "must_not_appear_in_tests",
+                                 model.begin()->second.front().first.c_str()),
+                nullptr);
+    }
+  }
+
+  // There should not be any other values in |conf|. |conf| currently stores
+  // both sections and values in the same map.
+  EXPECT_EQ(lh_CONF_VALUE_num_items(conf->data), total_values + model.size());
+}
+
+TEST(ConfTest, Parse) {
+  const struct {
+    std::string in;
+    ConfModel model;
+  } kTests[] = {
+      // Test basic parsing.
+      {
+          R"(# Comment
 
 key=value
 
 [section_name]
 key=value2
-)";
+)",
+          {
+              {"default", {{"key", "value"}}},
+              {"section_name", {{"key", "value2"}}},
+          },
+      },
 
-  bssl::UniquePtr<BIO> bio(BIO_new_mem_buf(kConf, sizeof(kConf) - 1));
-  ASSERT_TRUE(bio);
-  bssl::UniquePtr<CONF> conf(NCONF_new(nullptr));
-  ASSERT_TRUE(conf);
-  ASSERT_TRUE(NCONF_load_bio(conf.get(), bio.get(), nullptr));
-  EXPECT_TRUE(NCONF_get_section(conf.get(), "section_name"));
-  EXPECT_FALSE(NCONF_get_section(conf.get(), "other_section"));
-  EXPECT_STREQ(NCONF_get_string(conf.get(), nullptr, "key"), "value");
-  EXPECT_STREQ(NCONF_get_string(conf.get(), "section_name", "key"), "value2");
-  EXPECT_STREQ(NCONF_get_string(conf.get(), "other_section", "key"), nullptr);
+      // If a section is listed multiple times, keys add to the existing one.
+      {
+          R"(key1 = value1
+
+[section1]
+key2 = value2
+
+[section2]
+key3 = value3
+
+[default]
+key4 = value4
+
+[section1]
+key5 = value5
+)",
+          {
+              {"default", {{"key1", "value1"}, {"key4", "value4"}}},
+              {"section1", {{"key2", "value2"}, {"key5", "value5"}}},
+              {"section2", {{"key3", "value3"}}},
+          },
+      },
+
+      // Although the CONF parser internally uses a buffer size of 512 bytes to
+      // read one line, it detects truncation and is able to parse long lines.
+      {
+          std::string(1000, 'a') + " = " + std::string(1000, 'b') + "\n",
+          {
+              {"default", {{std::string(1000, 'a'), std::string(1000, 'b')}}},
+          },
+      },
+
+      // Trailing backslashes are line continations.
+      {
+          "key=\\\nvalue\nkey2=foo\\\nbar=baz",
+          {
+              {"default", {{"key", "value"}, {"key2", "foobar=baz"}}},
+          },
+      },
+
+      // To be a line continuation, it must be at the end of the line.
+      {
+          "key=\\\nvalue\nkey2=foo\\ \nbar=baz",
+          {
+              {"default", {{"key", "value"}, {"key2", "foo"}, {"bar", "baz"}}},
+          },
+      },
+
+      // A line continuation without any following line is ignored.
+      {
+          "key=value\\",
+          {
+              {"default", {{"key", "value"}}},
+          },
+      },
+
+      // Values may have embedded whitespace, but leading and trailing
+      // whitespace is dropped.
+      {
+          "key =  \t  foo   \t\t\tbar  \t  ",
+          {
+              {"default", {{"key", "foo   \t\t\tbar"}}},
+          },
+      },
+
+      // Empty sections still end up in the file.
+      {
+          "[section1]\n[section2]\n[section3]\n",
+          {
+              {"default", {}},
+              {"section1", {}},
+              {"section2", {}},
+              {"section3", {}},
+          },
+      },
+
+      // Section names can contain spaces and punctuation.
+      {
+          "[This! Is. A? Section;]\nkey = value",
+          {
+              {"default", {}},
+              {"This! Is. A? Section;", {{"key", "value"}}},
+          },
+      },
+
+      // Trailing data after a section line is ignored.
+      {
+          "[section] key = value\nkey2 = value2\n",
+          {
+              {"default", {}},
+              {"section", {{"key2", "value2"}}},
+          },
+      },
+
+      // Comments may appear within a line. Escapes and quotes, however,
+      // suppress the comment character.
+      {
+          R"(
+key1 = # comment
+key2 = "# not a comment"
+key3 = '# not a comment'
+key4 = `# not a comment`
+key5 = \# not a comment
+)",
+          {
+              {"default",
+               {
+                   {"key1", ""},
+                   {"key2", "# not a comment"},
+                   {"key3", "# not a comment"},
+                   {"key4", "# not a comment"},
+                   {"key5", "# not a comment"},
+               }},
+          },
+      },
+
+      // Quotes may appear in the middle of a string. Inside quotes, escape
+      // sequences like \n are not evaluated. \X always evaluates to X.
+      {
+          R"(
+key1 = mix "of" 'different' `quotes`
+key2 = "`'"
+key3 = "\r\n\b\t\""
+key4 = '\r\n\b\t\''
+key5 = `\r\n\b\t\``
+)",
+          {
+              {"default",
+               {
+                   {"key1", "mix of different quotes"},
+                   {"key2", "`'"},
+                   {"key3", "rnbt\""},
+                   {"key4", "rnbt'"},
+                   {"key5", "rnbt`"},
+               }},
+          },
+      },
+
+      // Outside quotes, escape sequences like \n are evaluated. Unknown escapes
+      // turn into the character.
+      {
+          R"(
+key = \r\n\b\t\"\'\`\z
+)",
+          {
+              {"default",
+               {
+                   {"key", "\r\n\b\t\"'`z"},
+               }},
+          },
+      },
+
+      // Escapes (but not quoting) work inside section names.
+      {
+          "[section\\ name]\nkey = value\n",
+          {
+              {"default", {}},
+              {"section name", {{"key", "value"}}},
+          },
+      },
+
+      // Escapes (but not quoting) are skipped over in key names, but they are
+      // left unevaluated. This is probably a bug.
+      {
+          "key\\ name = value\n",
+          {
+              {"default", {{"key\\ name", "value"}}},
+          },
+      },
+
+      // Keys can specify sections explicitly with ::.
+      {
+          R"(
+[section1]
+default::key1 = value1
+section1::key2 = value2
+section2::key3 = value3
+section1::key4 = value4
+section2::key5 = value5
+default::key6 = value6
+key7 = value7  # section1
+)",
+          {
+              {"default", {{"key1", "value1"}, {"key6", "value6"}}},
+              {"section1",
+               {{"key2", "value2"}, {"key4", "value4"}, {"key7", "value7"}}},
+              {"section2", {{"key3", "value3"}, {"key5", "value5"}}},
+          },
+      },
+
+      // Values can refer to other values with $.
+      {
+          R"(
+key1 = value1
+key2 = $key1 ${key1} $(key1)
+
+[section1]
+key3 = value3
+
+[section2]
+key4 = value4
+key5 = $key4 ${key4} $(key4)
+key6 = $default::key1 ${default::key1} $(default::key1)
+key7 = $section1::key3 ${section1::key3} $(section1::key3)
+key8 = $section2::key4 ${section2::key4} $(section2::key4)
+)",
+          {
+              {"default",
+               {
+                   {"key1", "value1"},
+                   {"key2", "value1 value1 value1"},
+               }},
+              {"section1",
+               {
+                   {"key3", "value3"},
+               }},
+              {"section2",
+               {
+                   {"key4", "value4"},
+                   {"key5", "value4 value4 value4"},
+                   {"key6", "value1 value1 value1"},
+                   {"key7", "value3 value3 value3"},
+                   {"key8", "value4 value4 value4"},
+               }},
+          },
+      },
+
+      // Punctuation is allowed in key names.
+      {
+          "key.1 = value\n",
+          {
+              {"default", {{"key.1", "value"}}},
+          },
+      },
+
+      // Punctuation is not allowed in $ references. The punctuation stops the
+      // parse.
+      {
+          R"(
+key = a
+key.1 = b
+ref = $key
+)",
+          {
+              {"default",
+               {
+                   {"key", "a"},
+                   {"key.1", "b"},
+                   {"ref", "a"},
+               }},
+          },
+      },
+  };
+  for (const auto &t : kTests) {
+    SCOPED_TRACE(t.in);
+    bssl::UniquePtr<BIO> bio(BIO_new_mem_buf(t.in.data(), t.in.size()));
+    ASSERT_TRUE(bio);
+    bssl::UniquePtr<CONF> conf(NCONF_new(nullptr));
+    ASSERT_TRUE(conf);
+    ASSERT_TRUE(NCONF_load_bio(conf.get(), bio.get(), nullptr));
+
+    ExpectConfEquals(conf.get(), t.model);
+  }
+
+  const char *kInvalidTests[] = {
+      // Missing equals sign.
+      "key",
+      // Unterminated section heading.
+      "[section",
+      // Section names can only contain alphanumeric characters, punctuation,
+      // and escapes. Quotes are not punctuation.
+      "[\"section\"]",
+      // Keys can only contain alphanumeric characters, punctuaion, and escapes.
+      "key name = value",
+      "\"key\" = value",
+      // Referencing a non-existent variable.
+      "key = $key2",
+      // key1 here is interpreted as a section name, not a key.
+      "key1 = value1\nkey2 = $key1::key",
+      // No self references.
+      "key = $key",
+      // Unterminated braced variable reference.
+      "key1 = value1\nkey2 = ${key1",
+      "key1 = value1\nkey2 = $(key1",
+      // Empty reference.
+      "key1 = $",
+  };
+  for (const auto &t : kInvalidTests) {
+    SCOPED_TRACE(t);
+    bssl::UniquePtr<BIO> bio(BIO_new_mem_buf(t, strlen(t)));
+    ASSERT_TRUE(bio);
+    bssl::UniquePtr<CONF> conf(NCONF_new(nullptr));
+    ASSERT_TRUE(conf);
+    EXPECT_FALSE(NCONF_load_bio(conf.get(), bio.get(), nullptr));
+  }
 }
 
 TEST(ConfTest, ParseList) {
diff --git a/crypto/conf/internal.h b/crypto/conf/internal.h
index 6075548..04359ad 100644
--- a/crypto/conf/internal.h
+++ b/crypto/conf/internal.h
@@ -17,11 +17,19 @@
 
 #include <openssl/base.h>
 
+#include "../lhash/internal.h"
+
 #if defined(__cplusplus)
 extern "C" {
 #endif
 
 
+DEFINE_LHASH_OF(CONF_VALUE)
+
+struct conf_st {
+  LHASH_OF(CONF_VALUE) *data;
+};
+
 // CONF_VALUE_new returns a freshly allocated and zeroed |CONF_VALUE|.
 CONF_VALUE *CONF_VALUE_new(void);
 
diff --git a/include/openssl/conf.h b/include/openssl/conf.h
index 908c16e..7529190 100644
--- a/include/openssl/conf.h
+++ b/include/openssl/conf.h
@@ -77,7 +77,10 @@
 //   [section_name]
 //   key2=value2
 //
-// Config files are represented by a |CONF|.
+// Config files are represented by a |CONF|. Use of this module is strongly
+// discouraged. It is a remnant of the OpenSSL command-line tool. Parsing an
+// untrusted input as a config file risks string injection and denial of service
+// vulnerabilities.
 
 struct conf_value_st {
   char *section;