|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +# Wrapper script to run timing benchmark for a specific dataset locally |
| 4 | +# Usage: ./run_timing_benchmark.sh <path-to-dataset.csv> |
| 5 | + |
| 6 | +set -e |
| 7 | + |
| 8 | +# Determine script directory and repository root |
| 9 | +SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)" |
| 10 | +REPO_ROOT="$(cd "$SCRIPT_DIR/.." && pwd)" |
| 11 | + |
| 12 | +if [ $# -eq 0 ]; then |
| 13 | + echo "ERROR: No dataset CSV file provided" |
| 14 | + echo "Usage: $0 <path-to-dataset.csv>" |
| 15 | + echo "" |
| 16 | + echo "Examples:" |
| 17 | + echo " $0 SIDIS/DIJET/pythia6.428-dijet-v1.0_PGF_noRC_ep_18x275_q2_1to20000_ab.csv" |
| 18 | + echo " $0 EXCLUSIVE/UPSILON.csv" |
| 19 | + exit 1 |
| 20 | +fi |
| 21 | + |
| 22 | +DATA="$1" |
| 23 | + |
| 24 | +# Change to repo root directory |
| 25 | +cd "$REPO_ROOT" |
| 26 | + |
| 27 | +if [ ! -f "$DATA" ]; then |
| 28 | + echo "ERROR: File '$DATA' not found" |
| 29 | + exit 1 |
| 30 | +fi |
| 31 | + |
| 32 | +# Set default environment variables (can be overridden) |
| 33 | +export DETECTOR_CONFIG="${DETECTOR_CONFIG:-epic_craterlake}" |
| 34 | +export DETECTOR_VERSION="${DETECTOR_VERSION:-main}" |
| 35 | +export EBEAM="${EBEAM:-18}" |
| 36 | +export PBEAM="${PBEAM:-275}" |
| 37 | +export NEVENTS_PER_TEST="${NEVENTS_PER_TEST:-100}" |
| 38 | +export IMAGE_TAG="${IMAGE_TAG:-nightly}" |
| 39 | +export RESULTS_BASE="results/${IMAGE_TAG}/${DETECTOR_CONFIG}/${DETECTOR_VERSION}" |
| 40 | + |
| 41 | +# Set up parallel alias |
| 42 | +shopt -s expand_aliases |
| 43 | +alias parallel='parallel -k --lb -j 1 --colsep ","' |
| 44 | + |
| 45 | +echo "========================================" |
| 46 | +echo "Running timing benchmark for: $DATA" |
| 47 | +echo "========================================" |
| 48 | +echo "DETECTOR_CONFIG = ${DETECTOR_CONFIG}" |
| 49 | +echo "DETECTOR_VERSION = ${DETECTOR_VERSION}" |
| 50 | +echo "EBEAM = ${EBEAM}" |
| 51 | +echo "PBEAM = ${PBEAM}" |
| 52 | +echo "NEVENTS_PER_TEST = ${NEVENTS_PER_TEST}" |
| 53 | +echo "IMAGE_TAG = ${IMAGE_TAG}" |
| 54 | +echo "RESULTS_BASE = ${RESULTS_BASE}" |
| 55 | +echo "========================================" |
| 56 | +echo "" |
| 57 | + |
| 58 | +# Stage 1: Glob - Find matching files |
| 59 | +echo "[1/3] Running glob stage to find matching files..." |
| 60 | +mkdir -p $(dirname ${RESULTS_BASE}/datasets/glob/$DATA) |
| 61 | +grep -v "^\#" $DATA | parallel scripts/glob.sh ${RESULTS_BASE}/datasets/glob/$DATA {} |
| 62 | +sort -o ${RESULTS_BASE}/datasets/glob/$DATA ${RESULTS_BASE}/datasets/glob/$DATA |
| 63 | +echo "✓ Glob stage complete" |
| 64 | +echo "" |
| 65 | + |
| 66 | +# Stage 2: Nevents - Count events |
| 67 | +echo "[2/3] Running nevents stage to count events..." |
| 68 | +mkdir -p $(dirname ${RESULTS_BASE}/datasets/nevents/$DATA) |
| 69 | +grep -v "^\#" ${RESULTS_BASE}/datasets/glob/$DATA | parallel scripts/count_events.sh ${RESULTS_BASE}/datasets/nevents/$DATA {} |
| 70 | +sort -o ${RESULTS_BASE}/datasets/nevents/$DATA ${RESULTS_BASE}/datasets/nevents/$DATA |
| 71 | +echo "✓ Nevents stage complete" |
| 72 | +echo "" |
| 73 | + |
| 74 | +# Stage 3: Timings - Run timing benchmarks |
| 75 | +echo "[3/3] Running timings stage (this may take a while)..." |
| 76 | +mkdir -p $(dirname ${RESULTS_BASE}/datasets/timings/$DATA) |
| 77 | +# First file with timing measurement |
| 78 | +grep -v "^\#" ${RESULTS_BASE}/datasets/nevents/$DATA | sed '1!d' | parallel scripts/determine_timing.sh ${RESULTS_BASE}/datasets/timings/$DATA {} |
| 79 | +# Remaining files using timing from first file |
| 80 | +IFS="," read file ext nevents dt0 dt1 < ${RESULTS_BASE}/datasets/timings/$DATA |
| 81 | +export dt0 dt1 |
| 82 | +grep -v "^\#" ${RESULTS_BASE}/datasets/nevents/$DATA | sed '1d' | parallel scripts/determine_timing.sh ${RESULTS_BASE}/datasets/timings/$DATA {} |
| 83 | +sort -o ${RESULTS_BASE}/datasets/timings/$DATA ${RESULTS_BASE}/datasets/timings/$DATA |
| 84 | +echo "✓ Timings stage complete" |
| 85 | +echo "" |
| 86 | + |
| 87 | +# Show results |
| 88 | +echo "========================================" |
| 89 | +echo "RESULTS" |
| 90 | +echo "========================================" |
| 91 | +echo "" |
| 92 | +echo "Timing results saved to:" |
| 93 | +echo " ${RESULTS_BASE}/datasets/timings/$DATA" |
| 94 | +echo "" |
| 95 | +echo "Summary:" |
| 96 | +cat ${RESULTS_BASE}/datasets/timings/$DATA | awk 'BEGIN {FS=","} {sum+=$3*$5+$4} END {printf(" Total core-hours: %.2f\n",sum/3600)}' |
| 97 | +cat ${RESULTS_BASE}/datasets/timings/$DATA | awk 'BEGIN {FS=","} {sum+=$3*$7+$6} END {printf(" Total size (full): %.2f GB\n",sum/1048576)}' |
| 98 | +cat ${RESULTS_BASE}/datasets/timings/$DATA | awk 'BEGIN {FS=","} {sum+=$3*$9+$8} END {printf(" Total size (reco): %.2f GB\n",sum/1048576)}' |
| 99 | +echo "" |
| 100 | +echo "Full timing data:" |
| 101 | +cat ${RESULTS_BASE}/datasets/timings/$DATA |
0 commit comments