diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml
index 72ff8e3a8..2afa76211 100644
--- a/.gitlab-ci.yml
+++ b/.gitlab-ci.yml
@@ -12,25 +12,52 @@ variables:
   BUILD_ROOT: ${CI_PROJECT_DIR}
   FULL_BUILD_ROOT: ${CI_BUILDS_DIR}/serac/${CI_JOB_NAME}
   ALLOC_BANK: eng
+  SPOT_DIR: ${BUILD_ROOT}/califiles
 
 # Whether and how to update uberenv
 .run_update_uberenv: &run_update_uberenv |
   [[ -n "${UPDATE_UBERENV}" ]] && ./scripts/gitlab/update-uberenv.sh "${UPDATE_UBERENV}"
 
+stages:
+  - src
+  - full
+  - benchmarks
+  - comparison
+
 # Run src build each push
 .src_workflow:
+  stage: src
   rules:
-    - if: $SERAC_CI_WORKFLOW_TYPE != "full" && $SERAC_CI_WORKFLOW_TYPE != "benchmarks"
+    - if: $SERAC_CI_WORKFLOW_TYPE != "full" &&
+          $SERAC_CI_WORKFLOW_TYPE != "benchmarks" &&
+          $SERAC_CI_WORKFLOW_TYPE != "comparison"
 
 # Run full build as a nightly scheduled pipeline
 .full_workflow:
+  stage: full
   rules:
     - if: $SERAC_CI_WORKFLOW_TYPE == "full"
 
 # Run benchmarks build as a weekly scheduled pipeline
+# or, run benchmarks and save them in tmp location for comparison
 .benchmarks_workflow:
+  stage: benchmarks
   rules:
     - if: $SERAC_CI_WORKFLOW_TYPE == "benchmarks"
+      variables:
+        # Note: make sure this matches what's in common_build_functions.py!
+        SPOT_DIR: /usr/workspace/smithdev/califiles/serac
+    - if: $SERAC_CI_WORKFLOW_TYPE == "comparison"
+
+# Compare caliper files (generated from .benchmarks_workflow) against develop
+.comparison_workflow:
+  stage: comparison
+  rules:
+    - if: $SERAC_CI_WORKFLOW_TYPE == "comparison"
+  dependencies:
+    - blueos-clang_10_0_1-benchmarks
+    - toss4-gcc_10_3_1-benchmarks
+    - toss4-clang_14_0_6-benchmarks
 
 ####
 # Templates
@@ -73,13 +100,13 @@ variables:
     reports:
       junit: ${FULL_BUILD_ROOT}/${SYS_TYPE}/*/_serac_build_and_test_*/build-*/junit.xml
 
-
 .benchmarks_build_script:
   script:
     # Builds src, runs benchmarks, and stores Caliper files in shared location
     - echo -e "section_start:$(date +%s):benchmarks_build\r\e[0K
       Benchmarks Build ${CI_PROJECT_NAME}"
-    - ${ALLOC_COMMAND} python3 scripts/llnl/run_benchmarks.py
+    - mkdir ${SPOT_DIR}
+    - ${ALLOC_COMMAND} python3 scripts/llnl/run_benchmarks.py --spot-dir ${SPOT_DIR}
     - echo -e "section_end:$(date +%s):benchmarks_build\r\e[0K"
   artifacts:
     expire_in: 2 weeks
@@ -87,7 +114,17 @@ variables:
     paths:
       - _serac_build_and_test_*/output.log*.txt
       - _serac_build_and_test_*/build-*/output.log*.txt
-      - _serac_build_and_test_*/build-*/*.cali
+      - ${SPOT_DIR}/*.cali
+
+.comparison_build_script:
+  script:
+    # Takes Caliper files from previous jobs and performs benchmark analysis
+    - echo -e "section_start:$(date +%s):comparison\r\e[0K
+      Comparison Build ${CI_PROJECT_NAME}"
+    - ls ${SPOT_DIR}
+    - ls ${SPOT_DIR} | wc -l
+    - ${ALLOC_COMMAND} python3 scripts/llnl/compare_benchmarks.py --current-cali-dir ${SPOT_DIR} --verbose
+    - echo -e "section_end:$(date +%s):comparison\r\e[0K"
 
 # This is where jobs are included for each system
 include:
diff --git a/.gitlab/build_blueos.yml b/.gitlab/build_blueos.yml
index 3b8e6b529..1734bf5c6 100644
--- a/.gitlab/build_blueos.yml
+++ b/.gitlab/build_blueos.yml
@@ -31,15 +31,12 @@
 # Template
 .src_build_on_blueos:
   extends: [.src_build_script, .on_blueos, .src_workflow]
-  needs: []
 
 .full_build_on_blueos:
   extends: [.full_build_script, .on_blueos, .full_workflow]
-  needs: []
 
 .benchmarks_build_on_blueos:
   extends: [.benchmarks_build_script, .on_blueos, .benchmarks_workflow]
-  needs: []
 
 ####
 # Build jobs
diff --git a/.gitlab/build_toss4.yml b/.gitlab/build_toss4.yml
index d4f92a098..3e6a93f7d 100644
--- a/.gitlab/build_toss4.yml
+++ b/.gitlab/build_toss4.yml
@@ -19,19 +19,18 @@
 # Templates
 .src_build_on_toss4:
   extends: [.src_build_script, .on_toss4, .src_workflow]
-  needs: []
 
 .full_build_on_toss4:
   extends: [.full_build_script, .on_toss4, .full_workflow]
-  needs: []
   before_script:
     # LC version of pip is ancient
     - if [[ $(python3 -c 'import pip; print(pip.__version__ < "19.3")') == "True" ]]; then python3 -m pip install --user --upgrade pip; fi
 
 .benchmarks_build_on_toss4:
   extends: [.benchmarks_build_script, .on_toss4, .benchmarks_workflow]
-  needs: []
 
+.comparison_build_on_toss4:
+  extends: [.comparison_build_script, .on_toss4, .comparison_workflow]
 
 ####
 # Build jobs
@@ -113,3 +112,11 @@ toss4-gcc_10_3_1-benchmarks:
     ALLOC_TIME: "120"
     ALLOC_DEADLINE: "180"
   extends: .benchmarks_build_on_toss4
+
+# This comparison job simply runs Hatchet and compares against benchmarks generated across all configurations
+toss4-comparison:
+  variables:
+    ALLOC_NODES: "1"
+    ALLOC_TIME: "30"
+    ALLOC_DEADLINE: "45"
+  extends: .comparison_build_on_toss4
diff --git a/.gitlab/build_toss4_cray.yml b/.gitlab/build_toss4_cray.yml
index 56787c3b8..41f57b18a 100644
--- a/.gitlab/build_toss4_cray.yml
+++ b/.gitlab/build_toss4_cray.yml
@@ -16,19 +16,15 @@
 # Templates
 .src_build_on_toss4_cray:
   extends: [.src_build_script, .on_toss4_cray, .src_workflow]
-  needs: []
 
 .full_build_on_toss4_cray:
   extends: [.full_build_script, .on_toss4_cray, .full_workflow]
-  needs: []
   before_script:
     # LC version of pip is ancient
     - if [[ $(python3 -c 'import pip; print(pip.__version__ < "19.3")') == "True" ]]; then python3 -m pip install --user --upgrade pip; fi
 
 .benchmarks_build_on_toss4_cray:
   extends: [.benchmarks_build_script, .on_toss4_cray, .benchmarks_workflow]
-  needs: []
-
 
 ####
 # Build jobs
diff --git a/config-build.py b/config-build.py
index 954a82173..9143a8661 100755
--- a/config-build.py
+++ b/config-build.py
@@ -73,8 +73,7 @@ def parse_arguments():
                         "--buildtype",
                         type=str,
                         choices=["Release", "Debug", "RelWithDebInfo", "MinSizeRel"],
-                        default="Debug",
-                        help="build type.")
+                        help="build type. defaults to Debug")
 
     parser.add_argument("-e",
                         "--eclipse",
@@ -105,8 +104,6 @@ def parse_arguments():
                         action='store_true',
                         help="use ninja generator to build serac instead of make")
 
-
-    
     args, unknown_args = parser.parse_known_args()
     if unknown_args:
         print("[config-build]: Passing the following arguments directly to cmake... %s" % unknown_args)
@@ -230,6 +227,7 @@ def create_cmake_command_line(args, unknown_args, buildpath, installpath, hostco
 
     # Add build type (opt or debug)
     cmakeline += " -DCMAKE_BUILD_TYPE=" + args.buildtype
+
     # Set install dir
     cmakeline += " -DCMAKE_INSTALL_PREFIX=%s" % installpath
 
@@ -277,7 +275,7 @@ def run_cmake(buildpath, cmakeline):
 # Main
 ############################
 def main():
-    repodir = os.path.abspath(os.path.dirname(__file__))     
+    repodir = os.path.abspath(os.path.dirname(__file__))
     assert os.path.abspath(os.getcwd())==repodir, "config-build must be run from %s" % repodir
 
     args, unknown_args = parse_arguments()
@@ -295,6 +293,15 @@ def main():
        else:
           return False
 
+    # CMake build type is Debug by default, but if CMAKE_BUILD_TYPE is an unknown argument (i.e. a CMake argument), then
+    # use that option instead.
+    if args.buildtype == None:
+        args.buildtype = "Debug"
+        for unknown_arg in unknown_args:
+            if "-DCMAKE_BUILD_TYPE" in unknown_arg:
+                args.buildtype = unknown_arg.split("=")[1]
+                break
+
     basehostconfigpath = find_host_config(args, repodir)
     platform_info = get_platform_info(basehostconfigpath)
     buildpath = setup_build_dir(args, platform_info)
diff --git a/scripts/llnl/compare_benchmarks.py b/scripts/llnl/compare_benchmarks.py
new file mode 100755
index 000000000..87dea4dac
--- /dev/null
+++ b/scripts/llnl/compare_benchmarks.py
@@ -0,0 +1,211 @@
+#!/bin/sh
+"exec" "python3" "-u" "-B" "$0" "$@"
+
+# Copyright (c) 2019-2024, Lawrence Livermore National Security, LLC and
+# other Serac Project Developers. See the top-level LICENSE file for details.
+#
+# SPDX-License-Identifier: (BSD-3-Clause)
+#
+# Original Source: https://llnl-hatchet.readthedocs.io/en/latest/llnl.html#id3
+
+import sys
+import os
+import platform
+import re
+import datetime as dt
+from argparse import ArgumentParser
+from common_build_functions import *
+
+
+# Setup SPOT db and hatchet (LC systems only)
+input_deploy_dir_str = "/usr/gapps/spot/live/"
+machine = platform.uname().machine
+sys.path.append(input_deploy_dir_str + "/hatchet-venv/" + machine + "/lib/python3.7/site-packages")
+sys.path.append(input_deploy_dir_str + "/hatchet/" + machine)
+sys.path.append(input_deploy_dir_str + "/spotdb")
+import hatchet
+import spotdb
+
+
+def parse_args():
+    "Parses args from command line"
+    parser = ArgumentParser()
+    parser.add_argument("-c", "--current-cali-dir",
+                      dest="current_cali_dir",
+                      required=True,
+                      help="Directory containing caliper files to compare against a baseline")
+    parser.add_argument("-b", "--baseline-cali-dir",
+                      dest="baseline_cali_dir",
+                      default=get_shared_spot_dir(),
+                      help="Directory containing caliper files that will be considered the baseline (defaults to a shared location)")
+    parser.add_argument("-ma", "--max-allowance",
+                      dest="max_allowance",
+                      default=10,
+                      help="Maximum difference (in seconds) current benchmarks are allowed to be from associated baseline")
+    parser.add_argument("-v", "--verbose",
+                      dest="verbose",
+                      action="store_true",
+                      default=False,
+                      help="Additionally print graph frames")
+    parser.add_argument("-d", "--depth",
+                      dest="depth",
+                      default=10000,
+                      help="Depth of graph frames (if verbose is on). The default shows the full graph.")
+    parser.add_argument("-mc", "--metric-column",
+                      dest="metric_column",
+                      default="Avg time/rank (inc)",
+                      help="Set the metric column to display")
+
+    # Parse args
+    args, _ = parser.parse_known_args()
+    args = vars(args)
+
+    return args
+
+
+def get_gf_configuration(gf):
+    """Get configuration string (compiler and cluster) of a given graph frame"""
+    return f"{gf.metadata.get('serac_compiler')} {gf.metadata.get('cluster')}"
+
+
+def get_benchmark_id(gf):
+    """Get unique benchmark id from a graph frame"""
+    cluster = str(gf.metadata.get("cluster", 1))
+    compiler = str(gf.metadata.get("serac_compiler", 1)).replace(" version ", "_")
+    executable = str(gf.metadata.get("executable", 1))
+    job_size = int(gf.metadata.get("jobsize", 1)) 
+    return "{0}_{1}_{2}_{3}".format(cluster, compiler, executable, job_size)
+
+
+def remove_color_codes(text):
+    """Remove color codes from a string"""
+    ansi_escape = re.compile(r'\x1b\[[0-9;]*[mG]')
+    return ansi_escape.sub('', text)
+
+
+def get_clean_gf_tree(gf, metric_column, depth, keep_color=False):
+    """Clean up a graph frame tree and return as string."""
+    gf_tree = gf.tree(depth=depth, render_header=False, metric_column=metric_column)
+    if not keep_color:
+        gf_tree = remove_color_codes(gf_tree)
+        gf_tree = gf_tree.split("\nLegend")[0]
+    else:
+        gf_tree = gf_tree.split("\n\x1b[4mLegend\x1b[0m")[0]
+    return gf_tree
+
+
+def get_gf_tree_sum(gf, metric_column, info_type):
+    """Get the sum of the graph tree depth=1."""
+    shallow_tree_str = get_clean_gf_tree(gf, metric_column, 1).splitlines()
+    sum = 0 
+    for line in shallow_tree_str:
+        pos = line.find(" ")
+        if pos != -1:
+           sum += float(line[:pos])
+    return sum
+
+
+def main():
+    # setup
+    args = parse_args()
+
+    # args
+    current_cali_dir = os.path.abspath(args["current_cali_dir"])
+    baseline_cali_dir = os.path.abspath(args["baseline_cali_dir"])
+    max_allowance = args["max_allowance"]
+    verbose = args["verbose"]
+    depth = int(args["depth"])
+    metric_column = args["metric_column"]
+
+    # Setup baseline (shared SPOT) graph frames
+    # Only take caliper files from the previous week
+    baseline_calis = os.listdir(baseline_cali_dir)
+    baseline_calis.sort(reverse=True)
+    last_weekly_benchmark_date = baseline_calis[0].split('-')[0]
+    delete_index = 0
+    for i in range(len(baseline_calis)):
+        if last_weekly_benchmark_date in baseline_calis[i]:
+            baseline_calis[i] = os.path.join(baseline_cali_dir, baseline_calis[i])
+        else:
+            delete_index = i
+            break
+    del baseline_calis[delete_index:]
+    db = spotdb.connect(baseline_cali_dir)
+    gfs_baseline = hatchet.GraphFrame.from_spotdb(db, baseline_calis)
+
+    # Setup current (local build dir) graph frames
+    current_calis = list()
+    for file in os.listdir(current_cali_dir):
+        if file.endswith(".cali"):
+            current_calis.append(os.path.join(current_cali_dir, file))
+    db = spotdb.connect(current_cali_dir)
+    gfs_current = hatchet.GraphFrame.from_spotdb(db, current_calis)
+
+    # Generate dictionary of configurations (cluster and compiler) from current (local) caliper files and
+    # filter baseline based off current's configurations
+    current_configurations = {get_gf_configuration(gf) for gf in gfs_current}
+    gfs_baseline = [gf for gf in gfs_baseline if get_gf_configuration(gf) in current_configurations]
+
+    # Create dictionary of current graph frames for fast look-ups
+    gfs_current_dict = dict()
+    for gf in gfs_current:
+        id = get_benchmark_id(gf)
+        gfs_current_dict[id] = gf
+
+    # Dictionary of total times for each benchmark (diff, current, and baseline)
+    benchmark_times = dict()
+
+    # Generate graph frames from the difference between associating current and baseline benchmarks
+    for gf_baseline in gfs_baseline:
+        id = get_benchmark_id(gf_baseline)
+        gf_current = gfs_current_dict.get(id)
+        if gf_current == None:
+            print(f"Warning: Failed to find associated benchmark {id} in specified location: {current_cali_dir}")
+            continue
+
+        gf_diff = gf_current - gf_baseline
+
+        # Print difference tree. Higher difference means local build is X seconds slower.
+        if verbose:
+            print("=" * 80)
+            print("Hatchet diff tree for {0}:".format(id))
+            print("=" * 80)
+            print(get_clean_gf_tree(gf_diff, metric_column, depth, keep_color=True))
+
+        # Store total time info for each benchmark
+        benchmark_times[id] = {
+            "diff": get_gf_tree_sum(gf_diff, metric_column, "diff"),
+            "current": get_gf_tree_sum(gf_current, metric_column, "current"),
+            "baseline": get_gf_tree_sum(gf_baseline, metric_column, "baseline"),
+        }
+
+    # Print metric column info
+    if verbose:
+        print(f"Using metric columns of '{metric_column}'. Other metric column options are:")
+        print(gfs_baseline[0].show_metric_columns())
+        print()
+
+    # Print time table and if benchmark passed or failed
+    num_failed = 0
+    num_passed = 0
+    num_benchmarks = len(benchmark_times)
+    print(f"{'Status':<10} {'Benchmark ID':<65} {'Current (seconds)':<20} {'Baseline (seconds)':<20} {'Diff (current - baseline)':<20}")
+    for id, benchmark_time in benchmark_times.items():
+        status_str = ""
+        if benchmark_time["diff"] >= max_allowance:
+            num_failed += 1
+            status_str = "❌ Failed"
+        else:
+            num_passed += 1
+            status_str = "✅ Passed"
+
+        print(f"{status_str:<10} {id:<65} {benchmark_time['current']:<20.2f} {benchmark_time['baseline']:<20.2f} {benchmark_time['diff']:<20.2f} ")
+
+    # Print summary
+    print(f"\n{num_passed} out of {num_benchmarks} benchmarks passed given a max allowance of {max_allowance} seconds")
+
+    return num_failed
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/scripts/llnl/run_benchmarks.py b/scripts/llnl/run_benchmarks.py
index 0be3325c9..a6850ede5 100755
--- a/scripts/llnl/run_benchmarks.py
+++ b/scripts/llnl/run_benchmarks.py
@@ -61,7 +61,7 @@ def main():
     args = parse_args()
     cmake_options = args["extra_cmake_options"] + " -DENABLE_BENCHMARKS=ON -DENABLE_DOCS=OFF -DCMAKE_BUILD_TYPE=Release"
     host_config = args["host_config"]
-    spot_dir = args["spot_dir"]
+    spot_dir = os.path.abspath(args["spot_dir"])
     timestamp = args["timestamp"]
 
     # Vars
diff --git a/src/docs/sphinx/dev_guide/profiling.rst b/src/docs/sphinx/dev_guide/profiling.rst
index 356760966..6487e6bb3 100644
--- a/src/docs/sphinx/dev_guide/profiling.rst
+++ b/src/docs/sphinx/dev_guide/profiling.rst
@@ -114,10 +114,11 @@ Benchmarking Serac
 
 To run all of Serac's benchmarks in one command, first make sure Serac is configured
 with benchmarking enabled (off by default). Then, run the build target ``run_benchmarks``.
+Make sure benchmarks are enabled and the build type is release.
 
 .. code-block:: bash
 
-  ./config-build.py -hc <host config file> -DENABLE_BENCHMARKS=ON
+  ./config-build.py -hc <host config file> -bt Release -DENABLE_BENCHMARKS=ON
   cd <serac build location>
   make -j
   make run_benchmarks
@@ -139,7 +140,7 @@ files:
 Serac benchmarks are run weekly to track changes over time. The following are steps to visualize this data in a meaningful
 way:
 
-- Go to https://lc.llnl.gov/spot2/?sf=/usr/WS2/smithdev/califiles/serac
+- Go to https://lc.llnl.gov/spot2/?sf=/usr/workspace/smithdev/califiles/serac
 - Click the check mark button on the top right to view additional data categories
 - Ensure ``mpi.world.size``, ``executable``, ``cluster``, and ``compilers`` are enabled
 - Find the pie and bar charts associated with those categories
@@ -153,3 +154,28 @@ you don't view two of one single category.
 .. note::
   There is a bug in SPOT where if you remove Caliper files from a directory, they still show up on SPOT - if you've
   visualized them previously. The current workaround is by removing the ``llnl.gov`` site cache manually.
+
+Compare a PR's benchmarks vs Develop
+------------------------------------
+
+Utilizing Hatchet, it is possible to view the performance changes of a prospective PR before it merges into
+develop. This process has been conveniently wrapped in a CI pipeline. This Hatchet comparison can only be performed
+on LC, since the baseline benchmarks are generated on LC systems.
+
+1. Go to the following CZ GitLab page to create a new pipeline https://lc.llnl.gov/gitlab/smith/serac/-/pipelines/new
+2. Choose your branch
+3. Under variables, add "SERAC_CI_WORKFLOW_TYPE" and "comparison" for the key and value, respectively
+
+It's possible to perform this comparison locally. Since baseline benchmarks are generated across different machines and
+compilers, a single build won't compare against all baselines. The benchmarks can be compared using ruby-gcc,
+ruby-clang, and lassen-clang builds.
+
+1. Run benchmarks (see "Benchmarking Serac" above)
+2. ``../scripts/llnl/compare_benchmarks.py --current-cali-dir /path/to/caliper/files``
+
+The script generates Hatchet graph frames by calculating the difference between each associated baseline and local
+benchmark (``gf_diff = gf_current - gf_baseline``). If there is a positive difference, that means your benchmarks ran
+that many seconds slower.
+
+By default, ``compare_benchmarks.py`` will print a table containing the status, id, difference, baseline, and current
+times. Running with the verbose option will additionally print the "difference" Hatchet graph frame for each benchmark.