diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 58c30aacc59d206caaa69e94d7b3b1f16a91381a..79ddc8d06606989c4f877139e5b471428e068fd7 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -41,7 +41,7 @@ summary:
   stage: finish
   needs: ["dis:results", "dvcs:results", "dvmp:results"]
   script:
-    - echo "All benchmarks passed"
+    - ./util/collect_benchmarks.py
   artifacts:
     reports:
       junit: ["results/dvcs/report2.xml"]
diff --git a/benchmarks.json b/benchmarks.json
new file mode 100644
index 0000000000000000000000000000000000000000..38168c1836f33b27e147f6a165a91c1b430e0b93
--- /dev/null
+++ b/benchmarks.json
@@ -0,0 +1,5 @@
+{
+  "name": "physics",
+  "title": "Physics benchmarks",
+  "description": "Benchmarks to validate the detector configuration versus various key physics observables."
+}
diff --git a/dvmp/config.yml b/dvmp/config.yml
index ac25f6fbcef2573f2e5e83cc8bb57e68e637219e..5a17fddd9ab77ae7225b22f4299ab49cdf404ab5 100644
--- a/dvmp/config.yml
+++ b/dvmp/config.yml
@@ -17,7 +17,7 @@ dvmp:generate:
   script:
     - ./util/run_many.py ./dvmp/gen.sh 
           -c jpsi_barrel 
-          -e 5x41 -e 10x100 -e 18x275
+          -e 10x100 
           --decay muon --decay electron
           --nproc 5
 
@@ -28,7 +28,7 @@ dvmp:process:
   script:
     - ./util/run_many.py ./dvmp/dvmp.sh 
           -c jpsi_barrel 
-          -e 5x41 -e 10x100 -e 18x275
+          -e 10x100 
           --decay muon --decay electron
           --leading jpsi
           --nproc 5
@@ -40,4 +40,8 @@ dvmp:results:
   stage: collect
   needs: ["dvmp:process"]
   script:
-    - echo "All DVMP benchmarks successful"
+    - ./util/collect_tests.py dvmp
+  artifacts:
+    paths:
+      - results/dvmp.json
+      - results/dvmp
diff --git a/util/collect_benchmarks.py b/util/collect_benchmarks.py
new file mode 100755
index 0000000000000000000000000000000000000000..8f9fd675e5d824580788a13690596515d333b988
--- /dev/null
+++ b/util/collect_benchmarks.py
@@ -0,0 +1,181 @@
+#!/usr/bin/env python3
+
+"""
+Combine the json files from the individual benchmark tests into
+a final master json file combining all benchmarks.
+
+Benchmark results are expected to be all json files in the results
+directory.
+"""
+
+## Our master definition file, the benchmark project directory
+MASTER_FILE=r'benchmarks.json'
+
+## Our results directory
+RESULTS_PATH=r'results'
+
+## Output json file with all benchmark results
+OUTPUT_FILE=r'results/summary.json'
+
+import argparse
+import json
+from pathlib import Path
+
+## Exceptions for this module
+class Error(Exception):
+    '''Base class for exceptions in this module.'''
+    pass
+class FileNotFoundError(Error):
+    '''File does not exist.
+
+    Attributes:
+        file: the file name
+        message: error message
+    '''
+    def __init__(self, file):
+        self.file = file
+        self.message = 'No such file or directory: {}'.format(file)
+
+class InvalidDefinitionError(Error):
+    '''Raised for missing keys in the definitions.
+
+    Attributes:
+        key: the missing key
+        file: the definition file
+        message: error message
+    '''
+    def __init__(self, key, file):
+        self.key = key
+        self.file = file
+        self.message = "key '{}' not found in '{}'".format(key, file)
+
+class InvalidResultError(Error):
+    '''Raised for invalid benchmark result value.
+
+    Attributes:
+        key: the missing key
+        value: the invalid value
+        file: the benchmark definition file
+        message: error message
+    '''
+    def __init__(self, key, value, file):
+        self.key = key
+        self.value = value
+        self.file = file
+        self.message = "value '{}' for key '{}' invalid in benchmark file '{}'".format(
+                value, key, file)
+    
+def collect_benchmarks():
+    '''Collect all benchmark results and write results to a single file.'''
+    print("Collecting all benchmark results")
+
+    ## load the test definition for this benchmark
+    results = _load_master()
+
+    ## collect the test results
+    results['benchmarks'] = _load_benchmarks()
+    
+    ## calculate aggregate test statistics
+    results = _aggregate_results(results)
+
+    ## save results to output file
+    _save(results)
+
+    ## Summarize results
+    for bm in results['benchmarks']:
+        _print_benchmark(bm)
+    _print_summary(results)
+
+def _load_master():
+    '''Load master definition.'''
+    master_file = Path(MASTER_FILE)
+    if not master_file.exists():
+        raise FileNotFoundError(master_file)
+    print('  --> Loading master definition from:', master_file)
+    results = None
+    with master_file.open() as f:
+        results = json.load(f)
+    ## ensure this is a valid benchmark file
+    for key in ('name', 'title', 'description'):
+        if not key in results:
+            raise InvalidDefinitionError('target', master_file)
+    return results
+
+def _load_benchmarks():
+    '''Load all benchmark results from the results folder.'''
+    print('  --> Collecting all benchmarks')
+    rootdir = Path(RESULTS_PATH)
+    results = []
+    for file in rootdir.glob('*.json'):
+        print('    --> Loading file:', file, '... ', end='')
+        with open(file) as f:
+            bm = json.load(f)
+            ## skip files that don't include test results
+            if not 'tests' in bm:
+                print('skipped (does not contain benchmark results).')
+                continue
+            ## check if these are valid benchmark results,
+            ## raise exception otherwise
+            for key in ('name', 'title', 'description', 'target', 'n_tests',
+                    'n_pass', 'n_fail', 'n_error', 'maximum', 'sum', 'value',
+                    'result'):
+                if not key in bm:
+                    raise InvalidDefinitionError(key, file)
+            if bm['result'] not in ('pass', 'fail', 'error'):
+                raise InvalidResultError('result', bm['result'], file)
+            ## Append to our test results
+            results.append(bm)
+            print('done')
+    return results
+
+def _aggregate_results(results):
+    '''Aggregate benchmark results.'''
+    print('  --> Aggregating benchmark statistics')
+    results['n_benchmarks'] = len(results['benchmarks'])
+    results['n_pass'] = len([1 for t in results['benchmarks'] if t['result'] == 'pass'])
+    results['n_fail'] = len([1 for t in results['benchmarks'] if t['result'] == 'fail'])
+    results['n_error'] = len([1 for t in results['benchmarks'] if t['result'] == 'error'])
+    if results['n_error'] > 0:
+        results['result'] = 'error'
+    elif results['n_fail'] == 0:
+        results['result'] = 'pass'
+    else:
+        results['result'] = 'fail'
+    return results
+
+def _save(results):
+    '''Save aggregated benchmark results'''
+    ofile = Path(OUTPUT_FILE)
+    print('  --> Saving results to:', ofile)
+    with ofile.open('w') as f:
+        json.dump(results, f, indent=4)
+
+def _print_benchmark(bm):
+    '''Print benchmark summary to the terminal.'''
+    print('====================================================================')
+    print('  Summary for:', bm['title'])
+    print('  Pass: {}, Fail: {}, Error: {} out of {} total tests'.format(
+        bm['n_pass'], bm['n_fail'], bm['n_error'],
+        bm['n_tests']))
+    print('  Weighted sum: {} / {}'.format(bm['sum'], bm['maximum']))
+    print('  kBenchmark value: {} (target: {})'.format(
+        bm['value'], bm['target']))
+    print('  ===> status:', bm['result'])
+
+def _print_summary(results):
+    '''Print master benchmark summary to the terminal.'''
+    print('====================================================================')
+    print('MASTER BENCHMARK SUMMARY FOR:', results['title'].upper())
+    print('Pass: {}, Fail: {}, Error: {} out of {} total benchmarks'.format(
+        results['n_pass'], results['n_fail'], results['n_error'],
+        results['n_benchmarks']))
+    print('===> status:', results['result'])
+    print('====================================================================')
+
+
+if __name__ == "__main__":
+    try:
+        collect_benchmarks()
+    except Error as e:
+        print()
+        print('ERROR', e.message)
diff --git a/util/collect_tests.py b/util/collect_tests.py
old mode 100644
new mode 100755
index 5f56dd6e4532589a22d652ea52326342bbcf13a0..c56d8c8b2ffd6a1f7ef525c88f66ed6614bb8857
--- a/util/collect_tests.py
+++ b/util/collect_tests.py
@@ -7,16 +7,198 @@ and do additional accounting for the benchmark.
 
 Tests results are expected to have the following file name and directory
 structure:
-   results/<BENCHMARK_NAME>/<SOME_NAME>.json
-or
-   results/<BENCHMARK_NAME>/subdirectory/<SOME_NAME>.json
+   results/<BENCHMARK_NAME>/**/<SOME_NAME>.json
+where ** implies we check recursively check all sub-directories of <BENCHMARK_NAME>
 
 Internally, we will look for the "tests" keyword in each of these
 files to identify them as benchmark components.
 """
 
+## Our benchmark definition file, stored in the benchmark root directory
+BENCHMARK_FILE=r'{}/benchmark.json'
+
+## Our benchmark results directory
+RESULTS_PATH=r'results/{}'
+
+## Output json file with benchmark results
+OUTPUT_FILE=r'results/{}.json'
+
 import argparse
 import json
+from pathlib import Path
 
-if __name__ == "__main__":
+## Exceptions for this module
+class Error(Exception):
+    '''Base class for exceptions in this module.'''
     pass
+class FileNotFoundError(Exception):
+    '''File does not exist.
+
+    Attributes:
+        file: the file name
+        message: error message
+    '''
+    def __init__(self, file):
+        self.file = file
+        self.message = 'No such file or directory: {}'.format(file)
+
+class InvalidBenchmarkDefinitionError(Exception):
+    '''Raised for missing keys in the benchmark definition.
+
+    Attributes:
+        key: the missing key
+        file: the benchmark definition file
+        message: error message
+    '''
+    def __init__(self, key, file):
+        self.key = key
+        self.file = file
+        self.message = "key '{}' not found in benchmark file '{}'".format(key, file)
+
+class InvalidTestDefinitionError(Exception):
+    '''Raised for missing keys in the test result.
+
+    Attributes:
+        key: the missing key
+        file: the test result file
+        message: error message
+    '''
+    def __init__(self, key, file):
+        self.key = key
+        self.file = file
+        self.message = "key '{}' not found in test file '{}'".format(key, file)
+class InvalidTestResultError(Exception):
+    '''Raised for invalid test result value.
+
+    Attributes:
+        key: the missing key
+        value: the invalid value
+        file: the benchmark definition file
+        message: error message
+    '''
+    def __init__(self, key, value, file):
+        self.key = key
+        self.value = value
+        self.file = file
+        self.message = "value '{}' for key '{}' invalid in test file '{}'".format(
+                value, key, file)
+    
+    
+parser = argparse.ArgumentParser()
+parser.add_argument(
+        'benchmark',
+        action='append',
+        help='One or more benchmarks for which to collect test results.')
+
+def collect_results(benchmark):
+    '''Collect benchmark tests and write results to file.'''
+    print("Collecting results for benchmark '{}'".format(benchmark))
+
+    ## load the test definition for this benchmark
+    results = _load_benchmark(benchmark)
+
+    ## collect the test results
+    results['tests'] = _load_tests(benchmark)
+    
+    ## calculate aggregate test statistics
+    results = _aggregate_results(results)
+
+    ## save results to output file
+    _save(benchmark, results)
+
+    ## Summarize results
+    _print_summary(results)
+
+def _load_benchmark(benchmark):
+    '''Load benchmark definition.'''
+    benchfile = Path(BENCHMARK_FILE.format(benchmark))
+    if not benchfile.exists():
+        raise FileNotFoundError(benchfile)
+    print('  --> Loading benchmark definition from:', benchfile)
+    results = None
+    with benchfile.open() as f:
+        results = json.load(f)
+    ## ensure this is a valid benchmark file
+    for key in ('name', 'title', 'description', 'target'):
+        if not key in results:
+            raise InvalidBenchmarkDefinitionError('target', benchfile)
+    return results
+
+def _load_tests(benchmark):
+    '''Loop over all test results in benchmark folder and return results.'''
+    print('  --> Collecting all test results')
+    rootdir = Path(RESULTS_PATH.format(benchmark))
+    results = []
+    for file in rootdir.glob('**/*.json'):
+        print('    --> Loading file:', file, '... ', end='')
+        with open(file) as f:
+            new_results = json.load(f)
+            ## skip files that don't include test results
+            if not 'tests' in new_results:
+                print('not a test result')
+                continue
+            ## check if these are valid test results,
+            ## raise exception otherwise
+            for test in new_results['tests']:
+                for key in ('name', 'title', 'description', 'quantity', 'target',
+                        'value', 'result'):
+                    if not key in test:
+                        raise InvalidTestDefinitionError(key, file)
+                if test['result'] not in ('pass', 'fail', 'error'):
+                    raise InvalidTestResultError('result', test['result'], file)
+                ## ensure 'weight' key present, defaulting to 1 in needed
+                if not 'weight' in test:
+                    test['weight'] = 1.
+                ## Append to our test results
+                results.append(test)
+            print('done')
+    return results
+
+def _aggregate_results(results):
+    '''Aggregate test results for our benchmark.'''
+    print('  --> Aggregating benchmark statistics')
+    results['target'] = float(results['target'])
+    results['n_tests'] = len(results['tests'])
+    results['n_pass'] = len([1 for t in results['tests'] if t['result'] == 'pass'])
+    results['n_fail'] = len([1 for t in results['tests'] if t['result'] == 'fail'])
+    results['n_error'] = len([1 for t in results['tests'] if t['result'] == 'error'])
+    results['maximum'] = sum([t['weight'] for t in results['tests']])
+    results['sum'] = sum([t['weight'] for t in results['tests'] if t['result'] == 'pass'])
+    if (results['n_tests'] > 0):
+        results['value'] = results['sum'] / results['maximum']
+        if results['n_error'] > 0:
+            results['result'] = 'error'
+        elif results['value'] >= results['target']:
+            results['result'] = 'pass'
+        else:
+            results['result'] = 'fail'
+    else:
+        results['value'] = -1
+        results['result'] = 'error'
+    return results
+
+def _save(benchmark, results):
+    '''Save benchmark results'''
+    ofile = Path(OUTPUT_FILE.format(benchmark))
+    print('  --> Saving benchmark results to:', ofile)
+    with ofile.open('w') as f:
+        json.dump(results, f, indent=4)
+
+def _print_summary(results):
+    '''Print benchmark summary to the terminal.'''
+    print('====================================================================')
+    print('Summary for:', results['title'])
+    print('Pass: {}, Fail: {}, Error: {} out of {} total tests'.format(
+        results['n_pass'], results['n_fail'], results['n_error'],
+        results['n_tests']))
+    print('Weighted sum: {} / {}'.format(results['sum'], results['maximum']))
+    print('Benchmark value: {} (target: {})'.format(
+        results['value'], results['target']))
+    print('===> status:', results['result'])
+    print('====================================================================')
+
+
+if __name__ == "__main__":
+    args = parser.parse_args()
+    for benchmark in args.benchmark:
+        collect_results(benchmark)
diff --git a/util/run_many.py b/util/run_many.py
index e101fee8cab74419d28c303029ec7d6dbdc06462..4037823f90b1de47c511281b9344773c80845217 100755
--- a/util/run_many.py
+++ b/util/run_many.py
@@ -116,6 +116,9 @@ if __name__ == '__main__':
         return_values = pool.map(worker, cmds)
         ## check if we all exited nicely, else exit with status 1
         if not all(ret == 0 for ret in return_values):
+            n_fail = sum([1 for ret in return_values if ret != 0])
+            print('ERROR, {} of {} jobs failed'.format(n_fail))
+            print('Return values:', [ret for ret in return_values if ret != 0])
             exit(1)
 
     ## That's all!