diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 58c30aacc59d206caaa69e94d7b3b1f16a91381a..79ddc8d06606989c4f877139e5b471428e068fd7 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -41,7 +41,7 @@ summary: stage: finish needs: ["dis:results", "dvcs:results", "dvmp:results"] script: - - echo "All benchmarks passed" + - ./util/collect_benchmarks.py artifacts: reports: junit: ["results/dvcs/report2.xml"] diff --git a/benchmarks.json b/benchmarks.json new file mode 100644 index 0000000000000000000000000000000000000000..38168c1836f33b27e147f6a165a91c1b430e0b93 --- /dev/null +++ b/benchmarks.json @@ -0,0 +1,5 @@ +{ + "name": "physics", + "title": "Physics benchmarks", + "description": "Benchmarks to validate the detector configuration versus various key physics observables." +} diff --git a/dvmp/config.yml b/dvmp/config.yml index ac25f6fbcef2573f2e5e83cc8bb57e68e637219e..5a17fddd9ab77ae7225b22f4299ab49cdf404ab5 100644 --- a/dvmp/config.yml +++ b/dvmp/config.yml @@ -17,7 +17,7 @@ dvmp:generate: script: - ./util/run_many.py ./dvmp/gen.sh -c jpsi_barrel - -e 5x41 -e 10x100 -e 18x275 + -e 10x100 --decay muon --decay electron --nproc 5 @@ -28,7 +28,7 @@ dvmp:process: script: - ./util/run_many.py ./dvmp/dvmp.sh -c jpsi_barrel - -e 5x41 -e 10x100 -e 18x275 + -e 10x100 --decay muon --decay electron --leading jpsi --nproc 5 @@ -40,4 +40,8 @@ dvmp:results: stage: collect needs: ["dvmp:process"] script: - - echo "All DVMP benchmarks successful" + - ./util/collect_tests.py dvmp + artifacts: + paths: + - results/dvmp.json + - results/dvmp diff --git a/util/collect_benchmarks.py b/util/collect_benchmarks.py new file mode 100755 index 0000000000000000000000000000000000000000..8f9fd675e5d824580788a13690596515d333b988 --- /dev/null +++ b/util/collect_benchmarks.py @@ -0,0 +1,181 @@ +#!/usr/bin/env python3 + +""" +Combine the json files from the individual benchmark tests into +a final master json file combining all benchmarks. + +Benchmark results are expected to be all json files in the results +directory. +""" + +## Our master definition file, the benchmark project directory +MASTER_FILE=r'benchmarks.json' + +## Our results directory +RESULTS_PATH=r'results' + +## Output json file with all benchmark results +OUTPUT_FILE=r'results/summary.json' + +import argparse +import json +from pathlib import Path + +## Exceptions for this module +class Error(Exception): + '''Base class for exceptions in this module.''' + pass +class FileNotFoundError(Error): + '''File does not exist. + + Attributes: + file: the file name + message: error message + ''' + def __init__(self, file): + self.file = file + self.message = 'No such file or directory: {}'.format(file) + +class InvalidDefinitionError(Error): + '''Raised for missing keys in the definitions. + + Attributes: + key: the missing key + file: the definition file + message: error message + ''' + def __init__(self, key, file): + self.key = key + self.file = file + self.message = "key '{}' not found in '{}'".format(key, file) + +class InvalidResultError(Error): + '''Raised for invalid benchmark result value. + + Attributes: + key: the missing key + value: the invalid value + file: the benchmark definition file + message: error message + ''' + def __init__(self, key, value, file): + self.key = key + self.value = value + self.file = file + self.message = "value '{}' for key '{}' invalid in benchmark file '{}'".format( + value, key, file) + +def collect_benchmarks(): + '''Collect all benchmark results and write results to a single file.''' + print("Collecting all benchmark results") + + ## load the test definition for this benchmark + results = _load_master() + + ## collect the test results + results['benchmarks'] = _load_benchmarks() + + ## calculate aggregate test statistics + results = _aggregate_results(results) + + ## save results to output file + _save(results) + + ## Summarize results + for bm in results['benchmarks']: + _print_benchmark(bm) + _print_summary(results) + +def _load_master(): + '''Load master definition.''' + master_file = Path(MASTER_FILE) + if not master_file.exists(): + raise FileNotFoundError(master_file) + print(' --> Loading master definition from:', master_file) + results = None + with master_file.open() as f: + results = json.load(f) + ## ensure this is a valid benchmark file + for key in ('name', 'title', 'description'): + if not key in results: + raise InvalidDefinitionError('target', master_file) + return results + +def _load_benchmarks(): + '''Load all benchmark results from the results folder.''' + print(' --> Collecting all benchmarks') + rootdir = Path(RESULTS_PATH) + results = [] + for file in rootdir.glob('*.json'): + print(' --> Loading file:', file, '... ', end='') + with open(file) as f: + bm = json.load(f) + ## skip files that don't include test results + if not 'tests' in bm: + print('skipped (does not contain benchmark results).') + continue + ## check if these are valid benchmark results, + ## raise exception otherwise + for key in ('name', 'title', 'description', 'target', 'n_tests', + 'n_pass', 'n_fail', 'n_error', 'maximum', 'sum', 'value', + 'result'): + if not key in bm: + raise InvalidDefinitionError(key, file) + if bm['result'] not in ('pass', 'fail', 'error'): + raise InvalidResultError('result', bm['result'], file) + ## Append to our test results + results.append(bm) + print('done') + return results + +def _aggregate_results(results): + '''Aggregate benchmark results.''' + print(' --> Aggregating benchmark statistics') + results['n_benchmarks'] = len(results['benchmarks']) + results['n_pass'] = len([1 for t in results['benchmarks'] if t['result'] == 'pass']) + results['n_fail'] = len([1 for t in results['benchmarks'] if t['result'] == 'fail']) + results['n_error'] = len([1 for t in results['benchmarks'] if t['result'] == 'error']) + if results['n_error'] > 0: + results['result'] = 'error' + elif results['n_fail'] == 0: + results['result'] = 'pass' + else: + results['result'] = 'fail' + return results + +def _save(results): + '''Save aggregated benchmark results''' + ofile = Path(OUTPUT_FILE) + print(' --> Saving results to:', ofile) + with ofile.open('w') as f: + json.dump(results, f, indent=4) + +def _print_benchmark(bm): + '''Print benchmark summary to the terminal.''' + print('====================================================================') + print(' Summary for:', bm['title']) + print(' Pass: {}, Fail: {}, Error: {} out of {} total tests'.format( + bm['n_pass'], bm['n_fail'], bm['n_error'], + bm['n_tests'])) + print(' Weighted sum: {} / {}'.format(bm['sum'], bm['maximum'])) + print(' kBenchmark value: {} (target: {})'.format( + bm['value'], bm['target'])) + print(' ===> status:', bm['result']) + +def _print_summary(results): + '''Print master benchmark summary to the terminal.''' + print('====================================================================') + print('MASTER BENCHMARK SUMMARY FOR:', results['title'].upper()) + print('Pass: {}, Fail: {}, Error: {} out of {} total benchmarks'.format( + results['n_pass'], results['n_fail'], results['n_error'], + results['n_benchmarks'])) + print('===> status:', results['result']) + print('====================================================================') + + +if __name__ == "__main__": + try: + collect_benchmarks() + except Error as e: + print() + print('ERROR', e.message) diff --git a/util/collect_tests.py b/util/collect_tests.py old mode 100644 new mode 100755 index 5f56dd6e4532589a22d652ea52326342bbcf13a0..c56d8c8b2ffd6a1f7ef525c88f66ed6614bb8857 --- a/util/collect_tests.py +++ b/util/collect_tests.py @@ -7,16 +7,198 @@ and do additional accounting for the benchmark. Tests results are expected to have the following file name and directory structure: - results/<BENCHMARK_NAME>/<SOME_NAME>.json -or - results/<BENCHMARK_NAME>/subdirectory/<SOME_NAME>.json + results/<BENCHMARK_NAME>/**/<SOME_NAME>.json +where ** implies we check recursively check all sub-directories of <BENCHMARK_NAME> Internally, we will look for the "tests" keyword in each of these files to identify them as benchmark components. """ +## Our benchmark definition file, stored in the benchmark root directory +BENCHMARK_FILE=r'{}/benchmark.json' + +## Our benchmark results directory +RESULTS_PATH=r'results/{}' + +## Output json file with benchmark results +OUTPUT_FILE=r'results/{}.json' + import argparse import json +from pathlib import Path -if __name__ == "__main__": +## Exceptions for this module +class Error(Exception): + '''Base class for exceptions in this module.''' pass +class FileNotFoundError(Exception): + '''File does not exist. + + Attributes: + file: the file name + message: error message + ''' + def __init__(self, file): + self.file = file + self.message = 'No such file or directory: {}'.format(file) + +class InvalidBenchmarkDefinitionError(Exception): + '''Raised for missing keys in the benchmark definition. + + Attributes: + key: the missing key + file: the benchmark definition file + message: error message + ''' + def __init__(self, key, file): + self.key = key + self.file = file + self.message = "key '{}' not found in benchmark file '{}'".format(key, file) + +class InvalidTestDefinitionError(Exception): + '''Raised for missing keys in the test result. + + Attributes: + key: the missing key + file: the test result file + message: error message + ''' + def __init__(self, key, file): + self.key = key + self.file = file + self.message = "key '{}' not found in test file '{}'".format(key, file) +class InvalidTestResultError(Exception): + '''Raised for invalid test result value. + + Attributes: + key: the missing key + value: the invalid value + file: the benchmark definition file + message: error message + ''' + def __init__(self, key, value, file): + self.key = key + self.value = value + self.file = file + self.message = "value '{}' for key '{}' invalid in test file '{}'".format( + value, key, file) + + +parser = argparse.ArgumentParser() +parser.add_argument( + 'benchmark', + action='append', + help='One or more benchmarks for which to collect test results.') + +def collect_results(benchmark): + '''Collect benchmark tests and write results to file.''' + print("Collecting results for benchmark '{}'".format(benchmark)) + + ## load the test definition for this benchmark + results = _load_benchmark(benchmark) + + ## collect the test results + results['tests'] = _load_tests(benchmark) + + ## calculate aggregate test statistics + results = _aggregate_results(results) + + ## save results to output file + _save(benchmark, results) + + ## Summarize results + _print_summary(results) + +def _load_benchmark(benchmark): + '''Load benchmark definition.''' + benchfile = Path(BENCHMARK_FILE.format(benchmark)) + if not benchfile.exists(): + raise FileNotFoundError(benchfile) + print(' --> Loading benchmark definition from:', benchfile) + results = None + with benchfile.open() as f: + results = json.load(f) + ## ensure this is a valid benchmark file + for key in ('name', 'title', 'description', 'target'): + if not key in results: + raise InvalidBenchmarkDefinitionError('target', benchfile) + return results + +def _load_tests(benchmark): + '''Loop over all test results in benchmark folder and return results.''' + print(' --> Collecting all test results') + rootdir = Path(RESULTS_PATH.format(benchmark)) + results = [] + for file in rootdir.glob('**/*.json'): + print(' --> Loading file:', file, '... ', end='') + with open(file) as f: + new_results = json.load(f) + ## skip files that don't include test results + if not 'tests' in new_results: + print('not a test result') + continue + ## check if these are valid test results, + ## raise exception otherwise + for test in new_results['tests']: + for key in ('name', 'title', 'description', 'quantity', 'target', + 'value', 'result'): + if not key in test: + raise InvalidTestDefinitionError(key, file) + if test['result'] not in ('pass', 'fail', 'error'): + raise InvalidTestResultError('result', test['result'], file) + ## ensure 'weight' key present, defaulting to 1 in needed + if not 'weight' in test: + test['weight'] = 1. + ## Append to our test results + results.append(test) + print('done') + return results + +def _aggregate_results(results): + '''Aggregate test results for our benchmark.''' + print(' --> Aggregating benchmark statistics') + results['target'] = float(results['target']) + results['n_tests'] = len(results['tests']) + results['n_pass'] = len([1 for t in results['tests'] if t['result'] == 'pass']) + results['n_fail'] = len([1 for t in results['tests'] if t['result'] == 'fail']) + results['n_error'] = len([1 for t in results['tests'] if t['result'] == 'error']) + results['maximum'] = sum([t['weight'] for t in results['tests']]) + results['sum'] = sum([t['weight'] for t in results['tests'] if t['result'] == 'pass']) + if (results['n_tests'] > 0): + results['value'] = results['sum'] / results['maximum'] + if results['n_error'] > 0: + results['result'] = 'error' + elif results['value'] >= results['target']: + results['result'] = 'pass' + else: + results['result'] = 'fail' + else: + results['value'] = -1 + results['result'] = 'error' + return results + +def _save(benchmark, results): + '''Save benchmark results''' + ofile = Path(OUTPUT_FILE.format(benchmark)) + print(' --> Saving benchmark results to:', ofile) + with ofile.open('w') as f: + json.dump(results, f, indent=4) + +def _print_summary(results): + '''Print benchmark summary to the terminal.''' + print('====================================================================') + print('Summary for:', results['title']) + print('Pass: {}, Fail: {}, Error: {} out of {} total tests'.format( + results['n_pass'], results['n_fail'], results['n_error'], + results['n_tests'])) + print('Weighted sum: {} / {}'.format(results['sum'], results['maximum'])) + print('Benchmark value: {} (target: {})'.format( + results['value'], results['target'])) + print('===> status:', results['result']) + print('====================================================================') + + +if __name__ == "__main__": + args = parser.parse_args() + for benchmark in args.benchmark: + collect_results(benchmark) diff --git a/util/run_many.py b/util/run_many.py index e101fee8cab74419d28c303029ec7d6dbdc06462..4037823f90b1de47c511281b9344773c80845217 100755 --- a/util/run_many.py +++ b/util/run_many.py @@ -116,6 +116,9 @@ if __name__ == '__main__': return_values = pool.map(worker, cmds) ## check if we all exited nicely, else exit with status 1 if not all(ret == 0 for ret in return_values): + n_fail = sum([1 for ret in return_values if ret != 0]) + print('ERROR, {} of {} jobs failed'.format(n_fail)) + print('Return values:', [ret for ret in return_values if ret != 0]) exit(1) ## That's all!