Skip to content

Commit

Permalink
Add min rel accuracy stopping criterion
Browse files Browse the repository at this point in the history
Clean up the initial commit

Further cleaning of initial commit. Add test.

Improvements to comments thanks to review

Reformat thanks to clang format.

Static cast to avoid conversion warning
  • Loading branch information
maartenarnst committed Dec 10, 2024
1 parent b2b0aab commit 35285be
Show file tree
Hide file tree
Showing 11 changed files with 222 additions and 20 deletions.
26 changes: 22 additions & 4 deletions include/benchmark/benchmark.h
Original file line number Diff line number Diff line change
Expand Up @@ -126,8 +126,12 @@ template <class Q> int BM_Sequential(benchmark::State& state) {
}
BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue<int>)->Range(1<<0, 1<<10);
Use `Benchmark::MinTime(double t)` to set the minimum time used to run the
benchmark. This option overrides the `benchmark_min_time` flag.
Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how
long to run the benchmark. This option overrides the `benchmark_min_time` flag.
If a benchmark measures time manually, use `Benchmark::MinRelAccuracy(double r)`
to set the required minimum relative accuracy used to determine how long to run
the benchmark. This option overrides the `benchmark_min_rel_accuracy` flag.
void BM_test(benchmark::State& state) {
... body ...
Expand Down Expand Up @@ -1262,11 +1266,21 @@ class BENCHMARK_EXPORT Benchmark {
// multiplier kRangeMultiplier will be used.
Benchmark* RangeMultiplier(int multiplier);

// Set the minimum amount of time to use when running this benchmark. This
// option overrides the `benchmark_min_time` flag.
// Set the minimum amount of time to use to determine the required number
// of iterations when running this benchmark. This option overrides
// the `benchmark_min_time` flag.
// REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark.
Benchmark* MinTime(double t);

// Set the minimum relative accuracy to use to determine the required number
// of iterations when running this benchmark. This option overrides
// the `benchmark_min_rel_accuracy` flag.
// REQUIRES: `r > 0`, `Iterations` has not been called on this benchmark, and
// time is measured manually, i.e., `UseManualTime` has been called on this
// benchmark and each benchmark iteration should call
// `SetIterationTime(seconds)` to report the measured time.
Benchmark* MinRelAccuracy(double r);

// Set the minimum amount of time to run the benchmark before taking runtimes
// of this benchmark into account. This
// option overrides the `benchmark_min_warmup_time` flag.
Expand Down Expand Up @@ -1389,6 +1403,7 @@ class BENCHMARK_EXPORT Benchmark {

int range_multiplier_;
double min_time_;
double min_rel_accuracy_;
double min_warmup_time_;
IterationCount iterations_;
int repetitions_;
Expand Down Expand Up @@ -1821,6 +1836,7 @@ struct BENCHMARK_EXPORT BenchmarkName {
std::string function_name;
std::string args;
std::string min_time;
std::string min_rel_accuracy;
std::string min_warmup_time;
std::string iterations;
std::string repetitions;
Expand Down Expand Up @@ -1860,6 +1876,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
threads(1),
time_unit(GetDefaultTimeUnit()),
real_accumulated_time(0),
manual_accumulated_time_pow2(0),
cpu_accumulated_time(0),
max_heapbytes_used(0),
use_real_time_for_initial_big_o(false),
Expand Down Expand Up @@ -1888,6 +1905,7 @@ class BENCHMARK_EXPORT BenchmarkReporter {
int64_t repetitions;
TimeUnit time_unit;
double real_accumulated_time;
double manual_accumulated_time_pow2;
double cpu_accumulated_time;

// Return a value representing the real time per iteration in the unit
Expand Down
30 changes: 23 additions & 7 deletions src/benchmark.cc
Original file line number Diff line number Diff line change
Expand Up @@ -65,12 +65,12 @@ BM_DEFINE_bool(benchmark_list_tests, false);
// linked into the binary are run.
BM_DEFINE_string(benchmark_filter, "");

// Specification of how long to run the benchmark.
// Specification of either an exact number of iterations (specified as
// `<integer>x`) or a minimum number of seconds (specified as `<float>s`) used
// to determine how long to run the benchmark.
//
// It can be either an exact number of iterations (specified as `<integer>x`),
// or a minimum number of seconds (specified as `<float>s`). If the latter
// format (ie., min seconds) is used, the system may run the benchmark longer
// until the results are considered significant.
// If the latter format (ie., min seconds) is used, the system may run
// the benchmark longer until the results are considered significant.
//
// For backward compatibility, the `s` suffix may be omitted, in which case,
// the specified number is interpreted as the number of seconds.
Expand All @@ -81,6 +81,19 @@ BM_DEFINE_string(benchmark_filter, "");
// benchmark execution, regardless of number of threads.
BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr);

// Specification of required relative accuracy used to determine how
// long to run the benchmark.
//
// REQUIRES: time is measured manually.
//
// Manual timers provide per-iteration times. The relative accuracy is
// measured as the standard deviation of these per-iteration times divided by
// the mean and the square root of the number of iterations. The benchmark is
// run until both of the following conditions are fulfilled:
// 1. the specified minimum time or number of iterations is reached
// 2. the measured relative accuracy meets the specified requirement
BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0);

// Minimum number of seconds a benchmark should be run before results should be
// taken into account. This e.g can be necessary for benchmarks of code which
// needs to fill some form of cache before performance is of interest.
Expand All @@ -94,7 +107,7 @@ BM_DEFINE_int32(benchmark_repetitions, 1);

// If enabled, forces each benchmark to execute exactly one iteration and one
// repetition, bypassing any configured
// MinTime()/MinWarmUpTime()/Iterations()/Repetitions()
// MinTime()/MinRelAccuracy()/MinWarmUpTime()/Iterations()/Repetitions()
BM_DEFINE_bool(benchmark_dry_run, false);

// If set, enable random interleaving of repetitions of all benchmarks.
Expand Down Expand Up @@ -722,6 +735,8 @@ void ParseCommandLineFlags(int* argc, char** argv) {
ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) ||
ParseStringFlag(argv[i], "benchmark_min_time",
&FLAGS_benchmark_min_time) ||
ParseDoubleFlag(argv[i], "benchmark_min_rel_accuracy",
&FLAGS_benchmark_min_rel_accuracy) ||
ParseDoubleFlag(argv[i], "benchmark_min_warmup_time",
&FLAGS_benchmark_min_warmup_time) ||
ParseInt32Flag(argv[i], "benchmark_repetitions",
Expand Down Expand Up @@ -793,7 +808,8 @@ void PrintDefaultHelp() {
"benchmark"
" [--benchmark_list_tests={true|false}]\n"
" [--benchmark_filter=<regex>]\n"
" [--benchmark_min_time=`<integer>x` OR `<float>s` ]\n"
" [--benchmark_min_time=`<integer>x` OR `<float>s`]\n"
" [--benchmark_min_rel_accuracy=<min_rel_accuracy>]\n"
" [--benchmark_min_warmup_time=<min_warmup_time>]\n"
" [--benchmark_repetitions=<num_repetitions>]\n"
" [--benchmark_dry_run={true|false}]\n"
Expand Down
6 changes: 6 additions & 0 deletions src/benchmark_api_internal.cc
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
statistics_(benchmark_.statistics_),
repetitions_(benchmark_.repetitions_),
min_time_(benchmark_.min_time_),
min_rel_accuracy_(benchmark_.min_rel_accuracy_),
min_warmup_time_(benchmark_.min_warmup_time_),
iterations_(benchmark_.iterations_),
threads_(thread_count) {
Expand All @@ -51,6 +52,11 @@ BenchmarkInstance::BenchmarkInstance(Benchmark* benchmark, int family_idx,
name_.min_time = StrFormat("min_time:%0.3f", benchmark_.min_time_);
}

if (!IsZero(benchmark->min_rel_accuracy_)) {
name_.min_rel_accuracy =
StrFormat("min_rel_accuracy:%0.3f", benchmark_.min_rel_accuracy_);
}

if (!IsZero(benchmark->min_warmup_time_)) {
name_.min_warmup_time =
StrFormat("min_warmup_time:%0.3f", benchmark_.min_warmup_time_);
Expand Down
2 changes: 2 additions & 0 deletions src/benchmark_api_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@ class BenchmarkInstance {
const std::vector<Statistics>& statistics() const { return statistics_; }
int repetitions() const { return repetitions_; }
double min_time() const { return min_time_; }
double min_rel_accuracy() const { return min_rel_accuracy_; }
double min_warmup_time() const { return min_warmup_time_; }
IterationCount iterations() const { return iterations_; }
int threads() const { return threads_; }
Expand Down Expand Up @@ -64,6 +65,7 @@ class BenchmarkInstance {
const std::vector<Statistics>& statistics_;
int repetitions_;
double min_time_;
double min_rel_accuracy_;
double min_warmup_time_;
IterationCount iterations_;
int threads_; // Number of concurrent threads to us
Expand Down
9 changes: 9 additions & 0 deletions src/benchmark_register.cc
Original file line number Diff line number Diff line change
Expand Up @@ -211,6 +211,7 @@ Benchmark::Benchmark(const std::string& name)
use_default_time_unit_(true),
range_multiplier_(kRangeMultiplier),
min_time_(0),
min_rel_accuracy_(0),
min_warmup_time_(0),
iterations_(0),
repetitions_(0),
Expand Down Expand Up @@ -356,6 +357,14 @@ Benchmark* Benchmark::MinTime(double t) {
return this;
}

Benchmark* Benchmark::MinRelAccuracy(double r) {
BM_CHECK(r > 0.0);
BM_CHECK(iterations_ == 0);
BM_CHECK(use_manual_time_);
min_rel_accuracy_ = r;
return this;
}

Benchmark* Benchmark::MinWarmUpTime(double t) {
BM_CHECK(t >= 0.0);
BM_CHECK(iterations_ == 0);
Expand Down
49 changes: 41 additions & 8 deletions src/benchmark_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,7 @@ namespace benchmark {

BM_DECLARE_bool(benchmark_dry_run);
BM_DECLARE_string(benchmark_min_time);
BM_DECLARE_double(benchmark_min_rel_accuracy);
BM_DECLARE_double(benchmark_min_warmup_time);
BM_DECLARE_int32(benchmark_repetitions);
BM_DECLARE_bool(benchmark_report_aggregates_only);
Expand Down Expand Up @@ -103,6 +104,7 @@ BenchmarkReporter::Run CreateRunReport(
if (!report.skipped) {
if (b.use_manual_time()) {
report.real_accumulated_time = results.manual_time_used;
report.manual_accumulated_time_pow2 = results.manual_time_used_pow2;
} else {
report.real_accumulated_time = results.real_time_used;
}
Expand Down Expand Up @@ -151,6 +153,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters,
results.cpu_time_used += timer.cpu_time_used();
results.real_time_used += timer.real_time_used();
results.manual_time_used += timer.manual_time_used();
results.manual_time_used_pow2 += timer.manual_time_used_pow2();
results.complexity_n += st.complexity_length_n();
internal::Increment(&results.counters, st.counters);
}
Expand Down Expand Up @@ -239,6 +242,11 @@ BenchmarkRunner::BenchmarkRunner(
min_time(FLAGS_benchmark_dry_run
? 0
: ComputeMinTime(b_, parsed_benchtime_flag)),
min_rel_accuracy(FLAGS_benchmark_dry_run
? std::numeric_limits<double>::max()
: (!IsZero(b.min_rel_accuracy())
? b.min_rel_accuracy()
: FLAGS_benchmark_min_rel_accuracy)),
min_warmup_time(
FLAGS_benchmark_dry_run
? 0
Expand Down Expand Up @@ -318,8 +326,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() {

// Base decisions off of real time if requested by this benchmark.
i.seconds = i.results.cpu_time_used;
i.seconds_pow2 = 0;
if (b.use_manual_time()) {
i.seconds = i.results.manual_time_used;
i.seconds_pow2 = i.results.manual_time_used_pow2;
} else if (b.use_real_time()) {
i.seconds = i.results.real_time_used;
}
Expand All @@ -340,6 +350,11 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded(
const bool is_significant = (i.seconds / GetMinTimeToApply()) > 0.1;
multiplier = is_significant ? multiplier : 10.0;

if (!IsZero(GetMinRelAccuracy())) {
multiplier =
std::max(multiplier, GetRelAccuracy(i) * 1.4 / GetMinRelAccuracy());
}

// So what seems to be the sufficiently-large iteration count? Round up.
const IterationCount max_next_iters = static_cast<IterationCount>(
std::llround(std::max(multiplier * static_cast<double>(i.iters),
Expand All @@ -357,14 +372,12 @@ bool BenchmarkRunner::ShouldReportIterationResults(
// Either it has run for a sufficient amount of time
// or because an error was reported.
return i.results.skipped_ || FLAGS_benchmark_dry_run ||
i.iters >= kMaxIterations || // Too many iterations already.
i.seconds >=
GetMinTimeToApply() || // The elapsed time is large enough.
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
// Note that user provided timers are except from this test.
((i.results.real_time_used >= 5 * GetMinTimeToApply()) &&
!b.use_manual_time());
// Too many iterations already.
i.iters >= kMaxIterations ||
// We have applied for enough time and the relative accuracy is good
// enough. Relative accuracy is checked only for user provided timers.
(HasSufficientTimeToApply(i) &&
(!b.use_manual_time() || HasSufficientRelAccuracy(i)));
}

double BenchmarkRunner::GetMinTimeToApply() const {
Expand All @@ -376,6 +389,26 @@ double BenchmarkRunner::GetMinTimeToApply() const {
return warmup_done ? min_time : min_warmup_time;
}

double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const {
return std::sqrt(i.seconds_pow2 - std::pow(i.seconds, 2.) / static_cast<double>(i.iters)) / i.seconds;
}

bool BenchmarkRunner::HasSufficientTimeToApply(
const IterationResults& i) const {
return i.seconds >= GetMinTimeToApply() ||
// CPU time is specified but the elapsed real time greatly exceeds
// the minimum time.
// Note that user provided timers are except from this test.
(!b.use_manual_time() &&
i.results.real_time_used >= 5 * GetMinTimeToApply());
}

bool BenchmarkRunner::HasSufficientRelAccuracy(
const IterationResults& i) const {
return (IsZero(GetMinRelAccuracy()) ||
(GetRelAccuracy(i) <= GetMinRelAccuracy()));
}

void BenchmarkRunner::FinishWarmUp(const IterationCount& i) {
warmup_done = true;
iters = i;
Expand Down
10 changes: 10 additions & 0 deletions src/benchmark_runner.h
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,8 @@ class BenchmarkRunner {

double GetMinTime() const { return min_time; }

double GetMinRelAccuracy() const { return min_rel_accuracy; }

bool HasExplicitIters() const { return has_explicit_iteration_count; }

IterationCount GetIters() const { return iters; }
Expand All @@ -83,6 +85,7 @@ class BenchmarkRunner {

BenchTimeType parsed_benchtime_flag;
const double min_time;
const double min_rel_accuracy;
const double min_warmup_time;
bool warmup_done;
const int repeats;
Expand All @@ -104,6 +107,7 @@ class BenchmarkRunner {
internal::ThreadManager::Result results;
IterationCount iters;
double seconds;
double seconds_pow2;
};
IterationResults DoNIterations();

Expand All @@ -117,6 +121,12 @@ class BenchmarkRunner {

double GetMinTimeToApply() const;

double GetRelAccuracy(const IterationResults& i) const;

bool HasSufficientTimeToApply(const IterationResults& i) const;

bool HasSufficientRelAccuracy(const IterationResults& i) const;

void FinishWarmUp(const IterationCount& i);

void RunWarmUp();
Expand Down
1 change: 1 addition & 0 deletions src/thread_manager.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,7 @@ class ThreadManager {
double real_time_used = 0;
double cpu_time_used = 0;
double manual_time_used = 0;
double manual_time_used_pow2 = 0;
int64_t complexity_n = 0;
std::string report_label_;
std::string skip_message_;
Expand Down
11 changes: 10 additions & 1 deletion src/thread_timer.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,10 @@ class ThreadTimer {
}

// Called by each thread
void SetIterationTime(double seconds) { manual_time_used_ += seconds; }
void SetIterationTime(double seconds) {
manual_time_used_ += seconds;
manual_time_used_pow2_ += std::pow(seconds, 2.);
}

bool running() const { return running_; }

Expand All @@ -60,6 +63,11 @@ class ThreadTimer {
return manual_time_used_;
}

double manual_time_used_pow2() const {
BM_CHECK(!running_);
return manual_time_used_pow2_;
}

private:
double ReadCpuTimerOfChoice() const {
if (measure_process_cpu_time) return ProcessCPUUsage();
Expand All @@ -78,6 +86,7 @@ class ThreadTimer {
double cpu_time_used_ = 0;
// Manually set iteration time. User sets this with SetIterationTime(seconds).
double manual_time_used_ = 0;
double manual_time_used_pow2_ = 0;
};

} // namespace internal
Expand Down
3 changes: 3 additions & 0 deletions test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -97,6 +97,9 @@ benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_
compile_benchmark_test(benchmark_min_time_flag_iters_test)
benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test)

compile_benchmark_test(benchmark_min_rel_accuracy_flag_test)
benchmark_add_test(NAME min_rel_accuracy_flag_test COMMAND benchmark_min_rel_accuracy_flag_test)

add_filter_test(filter_simple "Foo" 3)
add_filter_test(filter_simple_negative "-Foo" 2)
add_filter_test(filter_suffix "BM_.*" 4)
Expand Down
Loading

0 comments on commit 35285be

Please sign in to comment.