Aggregate benchmark data (part 2)

9c146afd · Sylvester Joosten · 82d18cd0 · 9c146afd · 9c146afd · 9c146afd
Commit 9c146afd authored Dec 23, 2020 by Sylvester Joosten
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -41,7 +41,7 @@ summary:
  stage: finish
  needs: ["dis:results", "dvcs:results", "dvmp:results"]
  script:
-    - echo "All benchmarks passed"
+    - ./util/collect_benchmarks.py
  artifacts:
    reports:
      junit: ["results/dvcs/report2.xml"]
--- a/benchmarks.json
+++ b/benchmarks.json
+{
+  "name": "physics",
+  "title": "Physics benchmarks",
+  "description": "Benchmarks to validate the detector configuration versus various key physics observables."
+}
--- a/dvmp/config.yml
+++ b/dvmp/config.yml
@@ -17,7 +17,7 @@ dvmp:generate:
  script:
    - ./util/run_many.py ./dvmp/gen.sh 
          -c jpsi_barrel 
-          -e 5x41 -e 10x100 -e 18x275
+          -e 10x100 
          --decay muon --decay electron
          --nproc 5
@@ -28,7 +28,7 @@ dvmp:process:
  script:
    - ./util/run_many.py ./dvmp/dvmp.sh 
          -c jpsi_barrel 
-          -e 5x41 -e 10x100 -e 18x275
+          -e 10x100 
          --decay muon --decay electron
          --leading jpsi
          --nproc 5
@@ -40,4 +40,8 @@ dvmp:results:
  stage: collect
  needs: ["dvmp:process"]
  script:
-    - echo "All DVMP benchmarks successful"
+    - ./util/collect_tests.py dvmp
+  artifacts:
+    paths:
+      - results/dvmp.json
+      - results/dvmp
--- a/util/collect_benchmarks.py
+++ b/util/collect_benchmarks.py
+#!/usr/bin/env python3
+"""
+Combine the json files from the individual benchmark tests into
+a final master json file combining all benchmarks.
+Benchmark results are expected to be all json files in the results
+directory.
+"""
+## Our master definition file, the benchmark project directory
+MASTER_FILE=r'benchmarks.json'
+## Our results directory
+RESULTS_PATH=r'results'
+## Output json file with all benchmark results
+OUTPUT_FILE=r'results/summary.json'
+import argparse
+import json
+from pathlib import Path
+## Exceptions for this module
+class Error(Exception):
+    '''Base class for exceptions in this module.'''
+    pass
+class FileNotFoundError(Error):
+    '''File does not exist.
+    Attributes:
+        file: the file name
+        message: error message
+    '''
+    def __init__(self, file):
+        self.file = file
+        self.message = 'No such file or directory: {}'.format(file)
+class InvalidDefinitionError(Error):
+    '''Raised for missing keys in the definitions.
+    Attributes:
+        key: the missing key
+        file: the definition file
+        message: error message
+    '''
+    def __init__(self, key, file):
+        self.key = key
+        self.file = file
+        self.message = "key '{}' not found in '{}'".format(key, file)
+class InvalidResultError(Error):
+    '''Raised for invalid benchmark result value.
+    Attributes:
+        key: the missing key
+        value: the invalid value
+        file: the benchmark definition file
+        message: error message
+    '''
+    def __init__(self, key, value, file):
+        self.key = key
+        self.value = value
+        self.file = file
+        self.message = "value '{}' for key '{}' invalid in benchmark file '{}'".format(
+                value, key, file)
+def collect_benchmarks():
+    '''Collect all benchmark results and write results to a single file.'''
+    print("Collecting all benchmark results")
+    ## load the test definition for this benchmark
+    results = _load_master()
+    ## collect the test results
+    results['benchmarks'] = _load_benchmarks()
+    ## calculate aggregate test statistics
+    results = _aggregate_results(results)
+    ## save results to output file
+    _save(results)
+    ## Summarize results
+    for bm in results['benchmarks']:
+        _print_benchmark(bm)
+    _print_summary(results)
+def _load_master():
+    '''Load master definition.'''
+    master_file = Path(MASTER_FILE)
+    if not master_file.exists():
+        raise FileNotFoundError(master_file)
+    print('  --> Loading master definition from:', master_file)
+    results = None
+    with master_file.open() as f:
+        results = json.load(f)
+    ## ensure this is a valid benchmark file
+    for key in ('name', 'title', 'description'):
+        if not key in results:
+            raise InvalidDefinitionError('target', master_file)
+    return results
+def _load_benchmarks():
+    '''Load all benchmark results from the results folder.'''
+    print('  --> Collecting all benchmarks')
+    rootdir = Path(RESULTS_PATH)
+    results = []
+    for file in rootdir.glob('*.json'):
+        print('    --> Loading file:', file, '... ', end='')
+        with open(file) as f:
+            bm = json.load(f)
+            ## skip files that don't include test results
+            if not 'tests' in bm:
+                print('skipped (does not contain benchmark results).')
+                continue
+            ## check if these are valid benchmark results,
+            ## raise exception otherwise
+            for key in ('name', 'title', 'description', 'target', 'n_tests',
+                    'n_pass', 'n_fail', 'n_error', 'maximum', 'sum', 'value',
+                    'result'):
+                if not key in bm:
+                    raise InvalidDefinitionError(key, file)
+            if bm['result'] not in ('pass', 'fail', 'error'):
+                raise InvalidResultError('result', bm['result'], file)
+            ## Append to our test results
+            results.append(bm)
+            print('done')
+    return results
+def _aggregate_results(results):
+    '''Aggregate benchmark results.'''
+    print('  --> Aggregating benchmark statistics')
+    results['n_benchmarks'] = len(results['benchmarks'])
+    results['n_pass'] = len([1 for t in results['benchmarks'] if t['result'] == 'pass'])
+    results['n_fail'] = len([1 for t in results['benchmarks'] if t['result'] == 'fail'])
+    results['n_error'] = len([1 for t in results['benchmarks'] if t['result'] == 'error'])
+    if results['n_error'] > 0:
+        results['result'] = 'error'
+    elif results['n_fail'] == 0:
+        results['result'] = 'pass'
+    else:
+        results['result'] = 'fail'
+    return results
+def _save(results):
+    '''Save aggregated benchmark results'''
+    ofile = Path(OUTPUT_FILE)
+    print('  --> Saving results to:', ofile)
+    with ofile.open('w') as f:
+        json.dump(results, f, indent=4)
+def _print_benchmark(bm):
+    '''Print benchmark summary to the terminal.'''
+    print('====================================================================')
+    print('  Summary for:', bm['title'])
+    print('  Pass: {}, Fail: {}, Error: {} out of {} total tests'.format(
+        bm['n_pass'], bm['n_fail'], bm['n_error'],
+        bm['n_tests']))
+    print('  Weighted sum: {} / {}'.format(bm['sum'], bm['maximum']))
+    print('  kBenchmark value: {} (target: {})'.format(
+        bm['value'], bm['target']))
+    print('  ===> status:', bm['result'])
+def _print_summary(results):
+    '''Print master benchmark summary to the terminal.'''
+    print('====================================================================')
+    print('MASTER BENCHMARK SUMMARY FOR:', results['title'].upper())
+    print('Pass: {}, Fail: {}, Error: {} out of {} total benchmarks'.format(
+        results['n_pass'], results['n_fail'], results['n_error'],
+        results['n_benchmarks']))
+    print('===> status:', results['result'])
+    print('====================================================================')
+if __name__ == "__main__":
+    try:
+        collect_benchmarks()
+    except Error as e:
+        print()
+        print('ERROR', e.message)
--- a/util/collect_tests.py
+++ b/util/collect_tests.py
@@ -7,16 +7,198 @@ and do additional accounting for the benchmark.
 Tests results are expected to have the following file name and directory
 structure:
-   results/<BENCHMARK_NAME>/<SOME_NAME>.json
+   results/<BENCHMARK_NAME>/**/<SOME_NAME>.json
-or
+where ** implies we check recursively check all sub-directories of <BENCHMARK_NAME>
-   results/<BENCHMARK_NAME>/subdirectory/<SOME_NAME>.json
 Internally, we will look for the "tests" keyword in each of these
 files to identify them as benchmark components.
 """
+## Our benchmark definition file, stored in the benchmark root directory
+BENCHMARK_FILE=r'{}/benchmark.json'
+## Our benchmark results directory
+RESULTS_PATH=r'results/{}'
+## Output json file with benchmark results
+OUTPUT_FILE=r'results/{}.json'
 import argparse
 import json
+from pathlib import Path
-if __name__ == "__main__":
+## Exceptions for this module
+class Error(Exception):
+    '''Base class for exceptions in this module.'''
    pass
+class FileNotFoundError(Exception):
+    '''File does not exist.
+    Attributes:
+        file: the file name
+        message: error message
+    '''
+    def __init__(self, file):
+        self.file = file
+        self.message = 'No such file or directory: {}'.format(file)
+class InvalidBenchmarkDefinitionError(Exception):
+    '''Raised for missing keys in the benchmark definition.
+    Attributes:
+        key: the missing key
+        file: the benchmark definition file
+        message: error message
+    '''
+    def __init__(self, key, file):
+        self.key = key
+        self.file = file
+        self.message = "key '{}' not found in benchmark file '{}'".format(key, file)
+class InvalidTestDefinitionError(Exception):
+    '''Raised for missing keys in the test result.
+    Attributes:
+        key: the missing key
+        file: the test result file
+        message: error message
+    '''
+    def __init__(self, key, file):
+        self.key = key
+        self.file = file
+        self.message = "key '{}' not found in test file '{}'".format(key, file)
+class InvalidTestResultError(Exception):
+    '''Raised for invalid test result value.
+    Attributes:
+        key: the missing key
+        value: the invalid value
+        file: the benchmark definition file
+        message: error message
+    '''
+    def __init__(self, key, value, file):
+        self.key = key
+        self.value = value
+        self.file = file
+        self.message = "value '{}' for key '{}' invalid in test file '{}'".format(
+                value, key, file)
+parser = argparse.ArgumentParser()
+parser.add_argument(
+        'benchmark',
+        action='append',
+        help='One or more benchmarks for which to collect test results.')
+def collect_results(benchmark):
+    '''Collect benchmark tests and write results to file.'''
+    print("Collecting results for benchmark '{}'".format(benchmark))
+    ## load the test definition for this benchmark
+    results = _load_benchmark(benchmark)
+    ## collect the test results
+    results['tests'] = _load_tests(benchmark)
+    ## calculate aggregate test statistics
+    results = _aggregate_results(results)
+    ## save results to output file
+    _save(benchmark, results)
+    ## Summarize results
+    _print_summary(results)
+def _load_benchmark(benchmark):
+    '''Load benchmark definition.'''
+    benchfile = Path(BENCHMARK_FILE.format(benchmark))
+    if not benchfile.exists():
+        raise FileNotFoundError(benchfile)
+    print('  --> Loading benchmark definition from:', benchfile)
+    results = None
+    with benchfile.open() as f:
+        results = json.load(f)
+    ## ensure this is a valid benchmark file
+    for key in ('name', 'title', 'description', 'target'):
+        if not key in results:
+            raise InvalidBenchmarkDefinitionError('target', benchfile)
+    return results
+def _load_tests(benchmark):
+    '''Loop over all test results in benchmark folder and return results.'''
+    print('  --> Collecting all test results')
+    rootdir = Path(RESULTS_PATH.format(benchmark))
+    results = []
+    for file in rootdir.glob('**/*.json'):
+        print('    --> Loading file:', file, '... ', end='')
+        with open(file) as f:
+            new_results = json.load(f)
+            ## skip files that don't include test results
+            if not 'tests' in new_results:
+                print('not a test result')
+                continue
+            ## check if these are valid test results,
+            ## raise exception otherwise
+            for test in new_results['tests']:
+                for key in ('name', 'title', 'description', 'quantity', 'target',
+                        'value', 'result'):
+                    if not key in test:
+                        raise InvalidTestDefinitionError(key, file)
+                if test['result'] not in ('pass', 'fail', 'error'):
+                    raise InvalidTestResultError('result', test['result'], file)
+                ## ensure 'weight' key present, defaulting to 1 in needed
+                if not 'weight' in test:
+                    test['weight'] = 1.
+                ## Append to our test results
+                results.append(test)
+            print('done')
+    return results
+def _aggregate_results(results):
+    '''Aggregate test results for our benchmark.'''
+    print('  --> Aggregating benchmark statistics')
+    results['target'] = float(results['target'])
+    results['n_tests'] = len(results['tests'])
+    results['n_pass'] = len([1 for t in results['tests'] if t['result'] == 'pass'])
+    results['n_fail'] = len([1 for t in results['tests'] if t['result'] == 'fail'])
+    results['n_error'] = len([1 for t in results['tests'] if t['result'] == 'error'])
+    results['maximum'] = sum([t['weight'] for t in results['tests']])
+    results['sum'] = sum([t['weight'] for t in results['tests'] if t['result'] == 'pass'])
+    if (results['n_tests'] > 0):
+        results['value'] = results['sum'] / results['maximum']
+        if results['n_error'] > 0:
+            results['result'] = 'error'
+        elif results['value'] >= results['target']:
+            results['result'] = 'pass'
+        else:
+            results['result'] = 'fail'
+    else:
+        results['value'] = -1
+        results['result'] = 'error'
+    return results
+def _save(benchmark, results):
+    '''Save benchmark results'''
+    ofile = Path(OUTPUT_FILE.format(benchmark))
+    print('  --> Saving benchmark results to:', ofile)
+    with ofile.open('w') as f:
+        json.dump(results, f, indent=4)
+def _print_summary(results):
+    '''Print benchmark summary to the terminal.'''
+    print('====================================================================')
+    print('Summary for:', results['title'])
+    print('Pass: {}, Fail: {}, Error: {} out of {} total tests'.format(
+        results['n_pass'], results['n_fail'], results['n_error'],
+        results['n_tests']))
+    print('Weighted sum: {} / {}'.format(results['sum'], results['maximum']))
+    print('Benchmark value: {} (target: {})'.format(
+        results['value'], results['target']))
+    print('===> status:', results['result'])
+    print('====================================================================')
+if __name__ == "__main__":
+    args = parser.parse_args()
+    for benchmark in args.benchmark:
+        collect_results(benchmark)
--- a/util/run_many.py
+++ b/util/run_many.py
@@ -116,6 +116,9 @@ if __name__ == '__main__':
        return_values = pool.map(worker, cmds)
        ## check if we all exited nicely, else exit with status 1
        if not all(ret == 0 for ret in return_values):
+            n_fail = sum([1 for ret in return_values if ret != 0])
+            print('ERROR, {} of {} jobs failed'.format(n_fail))
+            print('Return values:', [ret for ret in return_values if ret != 0])
            exit(1)
    ## That's all!