diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 72ff8e3a8..2afa76211 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -12,25 +12,52 @@ variables: BUILD_ROOT: ${CI_PROJECT_DIR} FULL_BUILD_ROOT: ${CI_BUILDS_DIR}/serac/${CI_JOB_NAME} ALLOC_BANK: eng + SPOT_DIR: ${BUILD_ROOT}/califiles # Whether and how to update uberenv .run_update_uberenv: &run_update_uberenv | [[ -n "${UPDATE_UBERENV}" ]] && ./scripts/gitlab/update-uberenv.sh "${UPDATE_UBERENV}" +stages: + - src + - full + - benchmarks + - comparison + # Run src build each push .src_workflow: + stage: src rules: - - if: $SERAC_CI_WORKFLOW_TYPE != "full" && $SERAC_CI_WORKFLOW_TYPE != "benchmarks" + - if: $SERAC_CI_WORKFLOW_TYPE != "full" && + $SERAC_CI_WORKFLOW_TYPE != "benchmarks" && + $SERAC_CI_WORKFLOW_TYPE != "comparison" # Run full build as a nightly scheduled pipeline .full_workflow: + stage: full rules: - if: $SERAC_CI_WORKFLOW_TYPE == "full" # Run benchmarks build as a weekly scheduled pipeline +# or, run benchmarks and save them in tmp location for comparison .benchmarks_workflow: + stage: benchmarks rules: - if: $SERAC_CI_WORKFLOW_TYPE == "benchmarks" + variables: + # Note: make sure this matches what's in common_build_functions.py! + SPOT_DIR: /usr/workspace/smithdev/califiles/serac + - if: $SERAC_CI_WORKFLOW_TYPE == "comparison" + +# Compare caliper files (generated from .benchmarks_workflow) against develop +.comparison_workflow: + stage: comparison + rules: + - if: $SERAC_CI_WORKFLOW_TYPE == "comparison" + dependencies: + - blueos-clang_10_0_1-benchmarks + - toss4-gcc_10_3_1-benchmarks + - toss4-clang_14_0_6-benchmarks #### # Templates @@ -73,13 +100,13 @@ variables: reports: junit: ${FULL_BUILD_ROOT}/${SYS_TYPE}/*/_serac_build_and_test_*/build-*/junit.xml - .benchmarks_build_script: script: # Builds src, runs benchmarks, and stores Caliper files in shared location - echo -e "section_start:$(date +%s):benchmarks_build\r\e[0K Benchmarks Build ${CI_PROJECT_NAME}" - - ${ALLOC_COMMAND} python3 scripts/llnl/run_benchmarks.py + - mkdir ${SPOT_DIR} + - ${ALLOC_COMMAND} python3 scripts/llnl/run_benchmarks.py --spot-dir ${SPOT_DIR} - echo -e "section_end:$(date +%s):benchmarks_build\r\e[0K" artifacts: expire_in: 2 weeks @@ -87,7 +114,17 @@ variables: paths: - _serac_build_and_test_*/output.log*.txt - _serac_build_and_test_*/build-*/output.log*.txt - - _serac_build_and_test_*/build-*/*.cali + - ${SPOT_DIR}/*.cali + +.comparison_build_script: + script: + # Takes Caliper files from previous jobs and performs benchmark analysis + - echo -e "section_start:$(date +%s):comparison\r\e[0K + Comparison Build ${CI_PROJECT_NAME}" + - ls ${SPOT_DIR} + - ls ${SPOT_DIR} | wc -l + - ${ALLOC_COMMAND} python3 scripts/llnl/compare_benchmarks.py --current-cali-dir ${SPOT_DIR} --verbose + - echo -e "section_end:$(date +%s):comparison\r\e[0K" # This is where jobs are included for each system include: diff --git a/.gitlab/build_blueos.yml b/.gitlab/build_blueos.yml index 3b8e6b529..1734bf5c6 100644 --- a/.gitlab/build_blueos.yml +++ b/.gitlab/build_blueos.yml @@ -31,15 +31,12 @@ # Template .src_build_on_blueos: extends: [.src_build_script, .on_blueos, .src_workflow] - needs: [] .full_build_on_blueos: extends: [.full_build_script, .on_blueos, .full_workflow] - needs: [] .benchmarks_build_on_blueos: extends: [.benchmarks_build_script, .on_blueos, .benchmarks_workflow] - needs: [] #### # Build jobs diff --git a/.gitlab/build_toss4.yml b/.gitlab/build_toss4.yml index d4f92a098..3e6a93f7d 100644 --- a/.gitlab/build_toss4.yml +++ b/.gitlab/build_toss4.yml @@ -19,19 +19,18 @@ # Templates .src_build_on_toss4: extends: [.src_build_script, .on_toss4, .src_workflow] - needs: [] .full_build_on_toss4: extends: [.full_build_script, .on_toss4, .full_workflow] - needs: [] before_script: # LC version of pip is ancient - if [[ $(python3 -c 'import pip; print(pip.__version__ < "19.3")') == "True" ]]; then python3 -m pip install --user --upgrade pip; fi .benchmarks_build_on_toss4: extends: [.benchmarks_build_script, .on_toss4, .benchmarks_workflow] - needs: [] +.comparison_build_on_toss4: + extends: [.comparison_build_script, .on_toss4, .comparison_workflow] #### # Build jobs @@ -113,3 +112,11 @@ toss4-gcc_10_3_1-benchmarks: ALLOC_TIME: "120" ALLOC_DEADLINE: "180" extends: .benchmarks_build_on_toss4 + +# This comparison job simply runs Hatchet and compares against benchmarks generated across all configurations +toss4-comparison: + variables: + ALLOC_NODES: "1" + ALLOC_TIME: "30" + ALLOC_DEADLINE: "45" + extends: .comparison_build_on_toss4 diff --git a/.gitlab/build_toss4_cray.yml b/.gitlab/build_toss4_cray.yml index 56787c3b8..41f57b18a 100644 --- a/.gitlab/build_toss4_cray.yml +++ b/.gitlab/build_toss4_cray.yml @@ -16,19 +16,15 @@ # Templates .src_build_on_toss4_cray: extends: [.src_build_script, .on_toss4_cray, .src_workflow] - needs: [] .full_build_on_toss4_cray: extends: [.full_build_script, .on_toss4_cray, .full_workflow] - needs: [] before_script: # LC version of pip is ancient - if [[ $(python3 -c 'import pip; print(pip.__version__ < "19.3")') == "True" ]]; then python3 -m pip install --user --upgrade pip; fi .benchmarks_build_on_toss4_cray: extends: [.benchmarks_build_script, .on_toss4_cray, .benchmarks_workflow] - needs: [] - #### # Build jobs diff --git a/config-build.py b/config-build.py index 954a82173..9143a8661 100755 --- a/config-build.py +++ b/config-build.py @@ -73,8 +73,7 @@ def parse_arguments(): "--buildtype", type=str, choices=["Release", "Debug", "RelWithDebInfo", "MinSizeRel"], - default="Debug", - help="build type.") + help="build type. defaults to Debug") parser.add_argument("-e", "--eclipse", @@ -105,8 +104,6 @@ def parse_arguments(): action='store_true', help="use ninja generator to build serac instead of make") - - args, unknown_args = parser.parse_known_args() if unknown_args: print("[config-build]: Passing the following arguments directly to cmake... %s" % unknown_args) @@ -230,6 +227,7 @@ def create_cmake_command_line(args, unknown_args, buildpath, installpath, hostco # Add build type (opt or debug) cmakeline += " -DCMAKE_BUILD_TYPE=" + args.buildtype + # Set install dir cmakeline += " -DCMAKE_INSTALL_PREFIX=%s" % installpath @@ -277,7 +275,7 @@ def run_cmake(buildpath, cmakeline): # Main ############################ def main(): - repodir = os.path.abspath(os.path.dirname(__file__)) + repodir = os.path.abspath(os.path.dirname(__file__)) assert os.path.abspath(os.getcwd())==repodir, "config-build must be run from %s" % repodir args, unknown_args = parse_arguments() @@ -295,6 +293,15 @@ def main(): else: return False + # CMake build type is Debug by default, but if CMAKE_BUILD_TYPE is an unknown argument (i.e. a CMake argument), then + # use that option instead. + if args.buildtype == None: + args.buildtype = "Debug" + for unknown_arg in unknown_args: + if "-DCMAKE_BUILD_TYPE" in unknown_arg: + args.buildtype = unknown_arg.split("=")[1] + break + basehostconfigpath = find_host_config(args, repodir) platform_info = get_platform_info(basehostconfigpath) buildpath = setup_build_dir(args, platform_info) diff --git a/scripts/llnl/compare_benchmarks.py b/scripts/llnl/compare_benchmarks.py new file mode 100755 index 000000000..87dea4dac --- /dev/null +++ b/scripts/llnl/compare_benchmarks.py @@ -0,0 +1,211 @@ +#!/bin/sh +"exec" "python3" "-u" "-B" "$0" "$@" + +# Copyright (c) 2019-2024, Lawrence Livermore National Security, LLC and +# other Serac Project Developers. See the top-level LICENSE file for details. +# +# SPDX-License-Identifier: (BSD-3-Clause) +# +# Original Source: https://llnl-hatchet.readthedocs.io/en/latest/llnl.html#id3 + +import sys +import os +import platform +import re +import datetime as dt +from argparse import ArgumentParser +from common_build_functions import * + + +# Setup SPOT db and hatchet (LC systems only) +input_deploy_dir_str = "/usr/gapps/spot/live/" +machine = platform.uname().machine +sys.path.append(input_deploy_dir_str + "/hatchet-venv/" + machine + "/lib/python3.7/site-packages") +sys.path.append(input_deploy_dir_str + "/hatchet/" + machine) +sys.path.append(input_deploy_dir_str + "/spotdb") +import hatchet +import spotdb + + +def parse_args(): + "Parses args from command line" + parser = ArgumentParser() + parser.add_argument("-c", "--current-cali-dir", + dest="current_cali_dir", + required=True, + help="Directory containing caliper files to compare against a baseline") + parser.add_argument("-b", "--baseline-cali-dir", + dest="baseline_cali_dir", + default=get_shared_spot_dir(), + help="Directory containing caliper files that will be considered the baseline (defaults to a shared location)") + parser.add_argument("-ma", "--max-allowance", + dest="max_allowance", + default=10, + help="Maximum difference (in seconds) current benchmarks are allowed to be from associated baseline") + parser.add_argument("-v", "--verbose", + dest="verbose", + action="store_true", + default=False, + help="Additionally print graph frames") + parser.add_argument("-d", "--depth", + dest="depth", + default=10000, + help="Depth of graph frames (if verbose is on). The default shows the full graph.") + parser.add_argument("-mc", "--metric-column", + dest="metric_column", + default="Avg time/rank (inc)", + help="Set the metric column to display") + + # Parse args + args, _ = parser.parse_known_args() + args = vars(args) + + return args + + +def get_gf_configuration(gf): + """Get configuration string (compiler and cluster) of a given graph frame""" + return f"{gf.metadata.get('serac_compiler')} {gf.metadata.get('cluster')}" + + +def get_benchmark_id(gf): + """Get unique benchmark id from a graph frame""" + cluster = str(gf.metadata.get("cluster", 1)) + compiler = str(gf.metadata.get("serac_compiler", 1)).replace(" version ", "_") + executable = str(gf.metadata.get("executable", 1)) + job_size = int(gf.metadata.get("jobsize", 1)) + return "{0}_{1}_{2}_{3}".format(cluster, compiler, executable, job_size) + + +def remove_color_codes(text): + """Remove color codes from a string""" + ansi_escape = re.compile(r'\x1b\[[0-9;]*[mG]') + return ansi_escape.sub('', text) + + +def get_clean_gf_tree(gf, metric_column, depth, keep_color=False): + """Clean up a graph frame tree and return as string.""" + gf_tree = gf.tree(depth=depth, render_header=False, metric_column=metric_column) + if not keep_color: + gf_tree = remove_color_codes(gf_tree) + gf_tree = gf_tree.split("\nLegend")[0] + else: + gf_tree = gf_tree.split("\n\x1b[4mLegend\x1b[0m")[0] + return gf_tree + + +def get_gf_tree_sum(gf, metric_column, info_type): + """Get the sum of the graph tree depth=1.""" + shallow_tree_str = get_clean_gf_tree(gf, metric_column, 1).splitlines() + sum = 0 + for line in shallow_tree_str: + pos = line.find(" ") + if pos != -1: + sum += float(line[:pos]) + return sum + + +def main(): + # setup + args = parse_args() + + # args + current_cali_dir = os.path.abspath(args["current_cali_dir"]) + baseline_cali_dir = os.path.abspath(args["baseline_cali_dir"]) + max_allowance = args["max_allowance"] + verbose = args["verbose"] + depth = int(args["depth"]) + metric_column = args["metric_column"] + + # Setup baseline (shared SPOT) graph frames + # Only take caliper files from the previous week + baseline_calis = os.listdir(baseline_cali_dir) + baseline_calis.sort(reverse=True) + last_weekly_benchmark_date = baseline_calis[0].split('-')[0] + delete_index = 0 + for i in range(len(baseline_calis)): + if last_weekly_benchmark_date in baseline_calis[i]: + baseline_calis[i] = os.path.join(baseline_cali_dir, baseline_calis[i]) + else: + delete_index = i + break + del baseline_calis[delete_index:] + db = spotdb.connect(baseline_cali_dir) + gfs_baseline = hatchet.GraphFrame.from_spotdb(db, baseline_calis) + + # Setup current (local build dir) graph frames + current_calis = list() + for file in os.listdir(current_cali_dir): + if file.endswith(".cali"): + current_calis.append(os.path.join(current_cali_dir, file)) + db = spotdb.connect(current_cali_dir) + gfs_current = hatchet.GraphFrame.from_spotdb(db, current_calis) + + # Generate dictionary of configurations (cluster and compiler) from current (local) caliper files and + # filter baseline based off current's configurations + current_configurations = {get_gf_configuration(gf) for gf in gfs_current} + gfs_baseline = [gf for gf in gfs_baseline if get_gf_configuration(gf) in current_configurations] + + # Create dictionary of current graph frames for fast look-ups + gfs_current_dict = dict() + for gf in gfs_current: + id = get_benchmark_id(gf) + gfs_current_dict[id] = gf + + # Dictionary of total times for each benchmark (diff, current, and baseline) + benchmark_times = dict() + + # Generate graph frames from the difference between associating current and baseline benchmarks + for gf_baseline in gfs_baseline: + id = get_benchmark_id(gf_baseline) + gf_current = gfs_current_dict.get(id) + if gf_current == None: + print(f"Warning: Failed to find associated benchmark {id} in specified location: {current_cali_dir}") + continue + + gf_diff = gf_current - gf_baseline + + # Print difference tree. Higher difference means local build is X seconds slower. + if verbose: + print("=" * 80) + print("Hatchet diff tree for {0}:".format(id)) + print("=" * 80) + print(get_clean_gf_tree(gf_diff, metric_column, depth, keep_color=True)) + + # Store total time info for each benchmark + benchmark_times[id] = { + "diff": get_gf_tree_sum(gf_diff, metric_column, "diff"), + "current": get_gf_tree_sum(gf_current, metric_column, "current"), + "baseline": get_gf_tree_sum(gf_baseline, metric_column, "baseline"), + } + + # Print metric column info + if verbose: + print(f"Using metric columns of '{metric_column}'. Other metric column options are:") + print(gfs_baseline[0].show_metric_columns()) + print() + + # Print time table and if benchmark passed or failed + num_failed = 0 + num_passed = 0 + num_benchmarks = len(benchmark_times) + print(f"{'Status':<10} {'Benchmark ID':<65} {'Current (seconds)':<20} {'Baseline (seconds)':<20} {'Diff (current - baseline)':<20}") + for id, benchmark_time in benchmark_times.items(): + status_str = "" + if benchmark_time["diff"] >= max_allowance: + num_failed += 1 + status_str = "❌ Failed" + else: + num_passed += 1 + status_str = "✅ Passed" + + print(f"{status_str:<10} {id:<65} {benchmark_time['current']:<20.2f} {benchmark_time['baseline']:<20.2f} {benchmark_time['diff']:<20.2f} ") + + # Print summary + print(f"\n{num_passed} out of {num_benchmarks} benchmarks passed given a max allowance of {max_allowance} seconds") + + return num_failed + + +if __name__ == "__main__": + sys.exit(main()) diff --git a/scripts/llnl/run_benchmarks.py b/scripts/llnl/run_benchmarks.py index 0be3325c9..a6850ede5 100755 --- a/scripts/llnl/run_benchmarks.py +++ b/scripts/llnl/run_benchmarks.py @@ -61,7 +61,7 @@ def main(): args = parse_args() cmake_options = args["extra_cmake_options"] + " -DENABLE_BENCHMARKS=ON -DENABLE_DOCS=OFF -DCMAKE_BUILD_TYPE=Release" host_config = args["host_config"] - spot_dir = args["spot_dir"] + spot_dir = os.path.abspath(args["spot_dir"]) timestamp = args["timestamp"] # Vars diff --git a/src/docs/sphinx/dev_guide/profiling.rst b/src/docs/sphinx/dev_guide/profiling.rst index 356760966..6487e6bb3 100644 --- a/src/docs/sphinx/dev_guide/profiling.rst +++ b/src/docs/sphinx/dev_guide/profiling.rst @@ -114,10 +114,11 @@ Benchmarking Serac To run all of Serac's benchmarks in one command, first make sure Serac is configured with benchmarking enabled (off by default). Then, run the build target ``run_benchmarks``. +Make sure benchmarks are enabled and the build type is release. .. code-block:: bash - ./config-build.py -hc -DENABLE_BENCHMARKS=ON + ./config-build.py -hc -bt Release -DENABLE_BENCHMARKS=ON cd make -j make run_benchmarks @@ -139,7 +140,7 @@ files: Serac benchmarks are run weekly to track changes over time. The following are steps to visualize this data in a meaningful way: -- Go to https://lc.llnl.gov/spot2/?sf=/usr/WS2/smithdev/califiles/serac +- Go to https://lc.llnl.gov/spot2/?sf=/usr/workspace/smithdev/califiles/serac - Click the check mark button on the top right to view additional data categories - Ensure ``mpi.world.size``, ``executable``, ``cluster``, and ``compilers`` are enabled - Find the pie and bar charts associated with those categories @@ -153,3 +154,28 @@ you don't view two of one single category. .. note:: There is a bug in SPOT where if you remove Caliper files from a directory, they still show up on SPOT - if you've visualized them previously. The current workaround is by removing the ``llnl.gov`` site cache manually. + +Compare a PR's benchmarks vs Develop +------------------------------------ + +Utilizing Hatchet, it is possible to view the performance changes of a prospective PR before it merges into +develop. This process has been conveniently wrapped in a CI pipeline. This Hatchet comparison can only be performed +on LC, since the baseline benchmarks are generated on LC systems. + +1. Go to the following CZ GitLab page to create a new pipeline https://lc.llnl.gov/gitlab/smith/serac/-/pipelines/new +2. Choose your branch +3. Under variables, add "SERAC_CI_WORKFLOW_TYPE" and "comparison" for the key and value, respectively + +It's possible to perform this comparison locally. Since baseline benchmarks are generated across different machines and +compilers, a single build won't compare against all baselines. The benchmarks can be compared using ruby-gcc, +ruby-clang, and lassen-clang builds. + +1. Run benchmarks (see "Benchmarking Serac" above) +2. ``../scripts/llnl/compare_benchmarks.py --current-cali-dir /path/to/caliper/files`` + +The script generates Hatchet graph frames by calculating the difference between each associated baseline and local +benchmark (``gf_diff = gf_current - gf_baseline``). If there is a positive difference, that means your benchmarks ran +that many seconds slower. + +By default, ``compare_benchmarks.py`` will print a table containing the status, id, difference, baseline, and current +times. Running with the verbose option will additionally print the "difference" Hatchet graph frame for each benchmark.