diff --git a/util/benchmark.hh b/util/benchmark.hh
index c05954c179244ed90dc2bd74238c9935f1530bce..f4ff6f99f888a13e4e20a5c2c91df6609f5cd791 100644
--- a/util/benchmark.hh
+++ b/util/benchmark.hh
@@ -49,9 +49,9 @@
 // library
 namespace eic::util {
 
-struct TestDefinitionError : exception {
+struct TestDefinitionError : Exception {
   TestDefinitionError(std::string_view msg)
-      : exception(msg, "test_definition_error") {}
+      : Exception(msg, "test_definition_error") {}
 };
 
 // Wrapper for our test data json, with three methods to set the status
@@ -68,7 +68,7 @@ struct TestDefinitionError : exception {
 //  - weight: Weight for this test (this is defaulted to 1.0 if not specified)
 //  - result: pass/fail/error
 struct Test {
-  test(nlohmann::json definition) : json{std::move(definition)} {
+  Test(nlohmann::json definition) : json{std::move(definition)} {
     // initialize with error (as we don't have a value yet)
     error();
     // Check that all required fields are present
@@ -101,7 +101,7 @@ private:
 void write_test(const std::vector<test>& data, const std::string& fname) {
   nlohmann::json test;
   for (auto& entry : data) {
-    test.push_back(entry.json);
+    test["test"].push_back(entry.json);
   }
   std::cout << fmt::format("Writing test data to {}\n", fname);
   std::ofstream output_file(fname);
diff --git a/util/collect_tests.py b/util/collect_tests.py
new file mode 100644
index 0000000000000000000000000000000000000000..9e3d6e29dbf4fa08b53d44cb6e1b11e9a7772619
--- /dev/null
+++ b/util/collect_tests.py
@@ -0,0 +1,18 @@
+#!/usr/bin/env python3
+
+"""
+Collect the json files from individual benchmark tests into
+a larger json file that combines all benchmark information,
+and do additional accounting for the benchmark.
+
+Tests results are expected to have the following file name and directory
+structure:
+   results/<BENCHMARK_NAME>/<SOME_NAME>.json
+or
+   results/<BENCHMARK_NAME>/subdirectory/<SOME_NAME>.json
+
+Internally, we will look for the "tests" keyword in each of these
+files to identify them as benchmark components.
+"""
+
+