diff --git a/include/benchmark/benchmark.h b/include/benchmark/benchmark.h index 84c39fb238..7607e94896 100644 --- a/include/benchmark/benchmark.h +++ b/include/benchmark/benchmark.h @@ -126,8 +126,12 @@ template int BM_Sequential(benchmark::State& state) { } BENCHMARK_TEMPLATE(BM_Sequential, WaitQueue)->Range(1<<0, 1<<10); -Use `Benchmark::MinTime(double t)` to set the minimum time used to run the -benchmark. This option overrides the `benchmark_min_time` flag. +Use `Benchmark::MinTime(double t)` to set the minimum time used to determine how long +to run the benchmark. This option overrides the `benchmark_min_time` flag. + +If a benchmark measures time manually, If a benchmark measures time manuallyuse `Benchmark::MinRelAccuracy(double r)` to set +the required minimum relative accuracy used to determine how long to run the benchmark. +This option overrides the `benchmark_min_rel_accuracy` flag. void BM_test(benchmark::State& state) { ... body ... @@ -1190,11 +1194,19 @@ class BENCHMARK_EXPORT Benchmark { // multiplier kRangeMultiplier will be used. Benchmark* RangeMultiplier(int multiplier); - // Set the minimum amount of time to use when running this benchmark. This - // option overrides the `benchmark_min_time` flag. + // Set the minimum amount of time to use to determine the required number + // of iterations when running this benchmark. This option overrides + // the `benchmark_min_time` flag. // REQUIRES: `t > 0` and `Iterations` has not been called on this benchmark. Benchmark* MinTime(double t); + // Set the minimum relative accuracy to use to determine the required number + // of iterations when running this benchmark. This option overrides + // the `benchmark_min_rel_accuracy` flag. + // REQUIRES: `t > 0`, `Iterations` has not been called on this benchmark, and + // time is measured manually, i.e., `UseManualTime` has been called on this + // benchmark and each benchmark iteration should call SetIterationTime(seconds) + // to report the measured time. Benchmark* MinRelAccuracy(double r); // Set the minimum amount of time to run the benchmark before taking runtimes @@ -1766,6 +1778,7 @@ class BENCHMARK_EXPORT BenchmarkReporter { threads(1), time_unit(GetDefaultTimeUnit()), real_accumulated_time(0), + manual_accumulated_time_pow2(0), cpu_accumulated_time(0), max_heapbytes_used(0), complexity(oNone), @@ -1793,6 +1806,7 @@ class BENCHMARK_EXPORT BenchmarkReporter { int64_t repetitions; TimeUnit time_unit; double real_accumulated_time; + double manual_accumulated_time_pow2; double cpu_accumulated_time; // Return a value representing the real time per iteration in the unit diff --git a/src/benchmark.cc b/src/benchmark.cc index 7dd6eaf519..35e0bbf185 100644 --- a/src/benchmark.cc +++ b/src/benchmark.cc @@ -65,11 +65,11 @@ BM_DEFINE_bool(benchmark_list_tests, false); // linked into the binary are run. BM_DEFINE_string(benchmark_filter, ""); -// Specification of how long to run the benchmark. +// Specification of either an exact number of iterations (specified as `x`) +// or a minimum number of seconds (specified as `s`) used to determine how +// long to run the benchmark. // -// It can be either an exact number of iterations (specified as `x`), -// or a minimum number of seconds (specified as `s`). If the latter -// format (ie., min seconds) is used, the system may run the benchmark longer +// If the latter format (ie., min seconds) is used, the system may run the benchmark longer // until the results are considered significant. // // For backward compatibility, the `s` suffix may be omitted, in which case, @@ -81,6 +81,18 @@ BM_DEFINE_string(benchmark_filter, ""); // benchmark execution, regardless of number of threads. BM_DEFINE_string(benchmark_min_time, kDefaultMinTimeStr); +// Specification of required relative accuracy used to determine how +// long to run the benchmark. +// +// REQUIRES: time is measured manually. +// +// Manual timers provide per-iteration times. The relative accuracy is +// measured as the standard deviation of these per-iteration times divided by +// the mean and the square root of the number of iterations. The benchmark is +// run until the specified minimum time or number of iterations is reached +// and the measured relative accuracy meets the specified requirement. +BM_DEFINE_double(benchmark_min_rel_accuracy, 0.0); + // Minimum number of seconds a benchmark should be run before results should be // taken into account. This e.g can be necessary for benchmarks of code which // needs to fill some form of cache before performance is of interest. @@ -694,6 +706,8 @@ void ParseCommandLineFlags(int* argc, char** argv) { ParseStringFlag(argv[i], "benchmark_filter", &FLAGS_benchmark_filter) || ParseStringFlag(argv[i], "benchmark_min_time", &FLAGS_benchmark_min_time) || + ParseDoubleFlag(argv[i], "benchmark_min_rel_accuracy", + &FLAGS_benchmark_min_rel_accuracy) || ParseDoubleFlag(argv[i], "benchmark_min_warmup_time", &FLAGS_benchmark_min_warmup_time) || ParseInt32Flag(argv[i], "benchmark_repetitions", @@ -753,7 +767,8 @@ void PrintDefaultHelp() { "benchmark" " [--benchmark_list_tests={true|false}]\n" " [--benchmark_filter=]\n" - " [--benchmark_min_time=`x` OR `s` ]\n" + " [--benchmark_min_time=`x` OR `s`]\n" + " [--benchmark_min_rel_accuracy=]\n" " [--benchmark_min_warmup_time=]\n" " [--benchmark_repetitions=]\n" " [--benchmark_enable_random_interleaving={true|false}]\n" diff --git a/src/benchmark_register.cc b/src/benchmark_register.cc index 0795a1a933..1324e6c46e 100644 --- a/src/benchmark_register.cc +++ b/src/benchmark_register.cc @@ -360,6 +360,7 @@ Benchmark* Benchmark::MinTime(double t) { Benchmark* Benchmark::MinRelAccuracy(double r) { BM_CHECK(r > 0.0); BM_CHECK(iterations_ == 0); + BM_CHECK(use_manual_time_); min_rel_accuracy_ = r; return this; } diff --git a/src/benchmark_runner.cc b/src/benchmark_runner.cc index 327a289085..f1ead6c4a9 100644 --- a/src/benchmark_runner.cc +++ b/src/benchmark_runner.cc @@ -93,6 +93,7 @@ BenchmarkReporter::Run CreateRunReport( if (!report.skipped) { if (b.use_manual_time()) { report.real_accumulated_time = results.manual_time_used; + report.manual_accumulated_time_pow2 = results.manual_time_used_pow2; } else { report.real_accumulated_time = results.real_time_used; } @@ -139,7 +140,7 @@ void RunInThread(const BenchmarkInstance* b, IterationCount iters, results.cpu_time_used += timer.cpu_time_used(); results.real_time_used += timer.real_time_used(); results.manual_time_used += timer.manual_time_used(); - results.manual_time_used2 += timer.manual_time_used2(); + results.manual_time_used_pow2 += timer.manual_time_used_pow2(); results.complexity_n += st.complexity_length_n(); internal::Increment(&results.counters, st.counters); } @@ -225,8 +226,10 @@ BenchmarkRunner::BenchmarkRunner( : b(b_), reports_for_family(reports_for_family_), parsed_benchtime_flag(ParseBenchMinTime(FLAGS_benchmark_min_time)), - min_time(ComputeMinTime(b_, parsed_benchtime_flag)), - min_rel_accuracy(b_min_rel_accuracy()), + min_time(ComputeMinTime(b, parsed_benchtime_flag)), + min_rel_accuracy(!IsZero(b.min_rel_accuracy()) + ? b.min_rel_accuracy() + : FLAGS_benchmark_min_rel_accuracy), min_warmup_time((!IsZero(b.min_time()) && b.min_warmup_time() > 0.0) ? b.min_warmup_time() : FLAGS_benchmark_min_warmup_time), @@ -303,10 +306,10 @@ BenchmarkRunner::IterationResults BenchmarkRunner::DoNIterations() { // Base decisions off of real time if requested by this benchmark. i.seconds = i.results.cpu_time_used; - i.seconds2 = 0; + i.seconds_pow2 = 0; if (b.use_manual_time()) { i.seconds = i.results.manual_time_used; - i.seconds2 = i.results.manual_time_used2; + i.seconds_pow2 = i.results.manual_time_used_pow2; } else if (b.use_real_time()) { i.seconds = i.results.real_time_used; } @@ -333,8 +336,8 @@ IterationCount BenchmarkRunner::PredictNumItersNeeded( // So what seems to be the sufficiently-large iteration count? Round up. const IterationCount max_next_iters = static_cast( - std::lround(std::max(multiplier * static_cast(i.iters), - static_cast(i.iters) + 1.0))); + std::llround(std::max(multiplier * static_cast(i.iters), + static_cast(i.iters) + 1.0))); // But we do have *some* limits though.. const IterationCount next_iters = std::min(max_next_iters, kMaxIterations); @@ -364,8 +367,8 @@ double BenchmarkRunner::GetMinTimeToApply() const { return warmup_done ? min_time : min_warmup_time; } -double GetRelAccuracy(const IterationResults& i) const { - return std::sqrt(i.seconds2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters); +double BenchmarkRunner::GetRelAccuracy(const IterationResults& i) const { + return std::sqrt(i.seconds_pow2 / i.iters - std::pow(i.seconds / i.iters, 2.)) / (i.seconds / i.iters) / sqrt(i.iters); } bool BenchmarkRunner::HasSufficientTimeToApply(const IterationResults& i) const { @@ -377,7 +380,7 @@ bool BenchmarkRunner::HasSufficientTimeToApply(const IterationResults& i) const } bool BenchmarkRunner::HasSufficientRelAccuracy(const IterationResults& i) const { - return (!IsZero(GetMinRelAccuracy()) && (GetRelAccuracy(i) <= GetMinRelAccuracy())); + return (IsZero(GetMinRelAccuracy()) || (GetRelAccuracy(i) <= GetMinRelAccuracy())); } void BenchmarkRunner::FinishWarmUp(const IterationCount& i) { diff --git a/src/benchmark_runner.h b/src/benchmark_runner.h index d20a84ff82..fe9df08e4e 100644 --- a/src/benchmark_runner.h +++ b/src/benchmark_runner.h @@ -26,6 +26,7 @@ namespace benchmark { BM_DECLARE_string(benchmark_min_time); +BM_DECLARE_double(benchmark_min_rel_accuracy); BM_DECLARE_double(benchmark_min_warmup_time); BM_DECLARE_int32(benchmark_repetitions); BM_DECLARE_bool(benchmark_report_aggregates_only); @@ -113,7 +114,7 @@ class BenchmarkRunner { internal::ThreadManager::Result results; IterationCount iters; double seconds; - double seconds2; + double seconds_pow2; }; IterationResults DoNIterations(); diff --git a/src/thread_manager.h b/src/thread_manager.h index 28d061e0f9..e3e6c5a2b8 100644 --- a/src/thread_manager.h +++ b/src/thread_manager.h @@ -41,7 +41,7 @@ class ThreadManager { double real_time_used = 0; double cpu_time_used = 0; double manual_time_used = 0; - double manual_time_used2 = 0; + double manual_time_used_pow2 = 0; int64_t complexity_n = 0; std::string report_label_; std::string skip_message_; diff --git a/src/thread_timer.h b/src/thread_timer.h index 26a8d47e9e..464974f73f 100644 --- a/src/thread_timer.h +++ b/src/thread_timer.h @@ -40,7 +40,7 @@ class ThreadTimer { // Called by each thread void SetIterationTime(double seconds) { manual_time_used_ += seconds; - manual_time_used2_ += std::pow(seconds, 2.); + manual_time_used_pow2_ += std::pow(seconds, 2.); } bool running() const { return running_; } @@ -63,9 +63,9 @@ class ThreadTimer { return manual_time_used_; } - double manual_time_used2() const { + double manual_time_used_pow2() const { BM_CHECK(!running_); - return manual_time_used2_; + return manual_time_used_pow2_; } private: @@ -83,11 +83,10 @@ class ThreadTimer { // Accumulated time so far (does not contain current slice if running_) double real_time_used_ = 0; - double cpu_time_used_ = 0; // Manually set iteration time. User sets this with SetIterationTime(seconds). double manual_time_used_ = 0; - double manual_time_used2_ = 0; + double manual_time_used_pow2_ = 0; }; } // namespace internal diff --git a/test/CMakeLists.txt b/test/CMakeLists.txt index eb7137efcc..6da7632d6a 100644 --- a/test/CMakeLists.txt +++ b/test/CMakeLists.txt @@ -97,6 +97,9 @@ benchmark_add_test(NAME min_time_flag_time COMMAND benchmark_min_time_flag_time_ compile_benchmark_test(benchmark_min_time_flag_iters_test) benchmark_add_test(NAME min_time_flag_iters COMMAND benchmark_min_time_flag_iters_test) +compile_benchmark_test(benchmark_min_rel_accuracy_flag_test) +benchmark_add_test(NAME min_rel_accuracy_flag_test COMMAND benchmark_min_rel_accuracy_flag_test) + add_filter_test(filter_simple "Foo" 3) add_filter_test(filter_simple_negative "-Foo" 2) add_filter_test(filter_suffix "BM_.*" 4) diff --git a/test/benchmark_min_rel_accuracy_flag_test.cc b/test/benchmark_min_rel_accuracy_flag_test.cc new file mode 100644 index 0000000000..0221e3ba3d --- /dev/null +++ b/test/benchmark_min_rel_accuracy_flag_test.cc @@ -0,0 +1,88 @@ +#include +#include +#include +#include +#include +#include +#include + +#include "benchmark/benchmark.h" + +// Tests that if a benchmark measures time manually, we can specify the required relative accuracy with +// --benchmark_min_rel_accuracy=. +namespace { + +class TestReporter : public benchmark::ConsoleReporter { + public: + virtual bool ReportContext(const Context& context) BENCHMARK_OVERRIDE { + return ConsoleReporter::ReportContext(context); + }; + + virtual void ReportRuns(const std::vector& report) BENCHMARK_OVERRIDE { + assert(report.size() == 1); + iters_.push_back(report[0].iterations); + real_accumulated_times_.push_back(report[0].real_accumulated_time); + manual_accumulated_time_pow2s_.push_back(report[0].manual_accumulated_time_pow2); + ConsoleReporter::ReportRuns(report); + }; + + TestReporter() {} + + virtual ~TestReporter() {} + + const std::vector& GetIters() const { + return iters_; + } + + const std::vector& GetRealAccumulatedTimes() const { + return real_accumulated_times_; + } + + const std::vector& GetManualAccumulatedTimePow2s() const { + return manual_accumulated_time_pow2s_; + } + + private: + std::vector iters_; + std::vector real_accumulated_times_; + std::vector manual_accumulated_time_pow2s_; +}; + +} // end namespace + +static void BM_MyBench(benchmark::State& state) { + static std::mt19937 rd{std::random_device{}()}; + static std::uniform_real_distribution mrand(0, 1); + + for (auto s : state) { + state.SetIterationTime(mrand(rd)); + } +} +BENCHMARK(BM_MyBench)->UseManualTime(); + +int main(int argc, char** argv) { + // Make a fake argv and append the new --benchmark_min_rel_accuracy= to it. + int fake_argc = argc + 2; + const char** fake_argv = new const char*[static_cast(fake_argc)]; + for (int i = 0; i < argc; ++i) fake_argv[i] = argv[i]; + fake_argv[argc] = "--benchmark_min_time=10s"; + fake_argv[argc + 1] = "--benchmark_min_rel_accuracy=0.01"; + + benchmark::Initialize(&fake_argc, const_cast(fake_argv)); + + TestReporter test_reporter; + const size_t returned_count = + benchmark::RunSpecifiedBenchmarks(&test_reporter, "BM_MyBench"); + assert(returned_count == 1); + + // Check the executed iters. + const benchmark::IterationCount iters = test_reporter.GetIters()[0]; + const double real_accumulated_time = test_reporter.GetRealAccumulatedTimes()[0]; + const double manual_accumulated_time_pow2 = test_reporter.GetManualAccumulatedTimePow2s()[0]; + + const double rel_accuracy = std::sqrt(manual_accumulated_time_pow2 / iters - std::pow(real_accumulated_time / iters, 2.)) / (real_accumulated_time / iters) / sqrt(iters); + assert(rel_accuracy <= 0.01); + + delete[] fake_argv; + return 0; +}