From 509728ad632165e5436547046073ede34a304470 Mon Sep 17 00:00:00 2001
From: jonathanmetzman <31354670+jonathanmetzman@users.noreply.github.com>
Date: Tue, 12 Jan 2021 16:07:26 -0800
Subject: [PATCH] Add more validation (#1029)

* Add more validation

Add validation to presubmit.py and run_experiment.py to ensure:
1. That experiments are only run with one type of benchmark.
2. That experiments are only run with valid benchmarks.
3. Valid benchmarks must have a valid type.
4. To pass presubmit, benchmarks must have a valid type.

* fmt everywhere

* fix test
---
 analysis/benchmark_results.py       | 12 ++---------
 analysis/test_experiment_results.py |  4 +++-
 common/benchmark_utils.py           | 32 ++++++++++++++++++++++++++++-
 common/test_benchmark_utils.py      | 25 ++++++++++++++++++++++
 experiment/run_experiment.py        | 13 ++++++++++++
 fuzzbench/test_e2e/test_e2e_run.py  |  6 +++---
 service/automatic_run_experiment.py |  5 +++--
 test_libs/utils.py                  |  5 ++---
 8 files changed, 82 insertions(+), 20 deletions(-)

diff --git a/analysis/benchmark_results.py b/analysis/benchmark_results.py
index 08828408a..f74255cf7 100644
--- a/analysis/benchmark_results.py
+++ b/analysis/benchmark_results.py
@@ -19,7 +19,7 @@
 from analysis import data_utils
 from analysis import coverage_data_utils
 from analysis import stat_tests
-from common import benchmark_config
+from common import benchmark_utils
 
 
 # pylint: disable=too-many-public-methods, too-many-arguments
@@ -71,15 +71,7 @@ def type(self):
 
         Raises ValueError in case of invalid benchmark type in the config.
         """
-        try:
-            benchmark_type = benchmark_config.get_config(self.name).get('type')
-        except Exception:  # pylint: disable=broad-except
-            return 'code'
-        if not benchmark_type:
-            return 'code'
-        if benchmark_type not in ['code', 'bug']:
-            raise ValueError('Invalid benchmark type: ' + benchmark_type)
-        return benchmark_type
+        return benchmark_utils.get_type(self.name)
 
     @property
     def _relevant_column(self):
diff --git a/analysis/test_experiment_results.py b/analysis/test_experiment_results.py
index 779c67cb4..5725704e9 100644
--- a/analysis/test_experiment_results.py
+++ b/analysis/test_experiment_results.py
@@ -12,12 +12,14 @@
 # See the License for the specific language governing permissions andsss
 # limitations under the License.
 """Tests for experiment_results.py"""
+from unittest import mock
 
 from analysis import experiment_results
 from analysis import test_data_utils
 
 
-def test_linkify_fuzzer_names_in_ranking():
+@mock.patch('common.benchmark_config.get_config', return_value={})
+def test_linkify_fuzzer_names_in_ranking(_):
     """Tests turning fuzzer names into links."""
     experiment_df = test_data_utils.create_experiment_data()
     results = experiment_results.ExperimentResults(experiment_df,
diff --git a/common/benchmark_utils.py b/common/benchmark_utils.py
index 15d5289a5..7d97f7c33 100644
--- a/common/benchmark_utils.py
+++ b/common/benchmark_utils.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Code for dealing with benchmarks."""
+import enum
 import os
 import re
 
@@ -27,6 +28,17 @@
 BENCHMARKS_DIR = os.path.join(utils.ROOT_DIR, 'benchmarks')
 
 
+class BenchmarkType(str, enum.Enum):
+    """Benchmark type."""
+    CODE = 'code'
+    BUG = 'bug'
+
+
+# pytype: disable=missing-parameter
+BENCHMARK_TYPE_STRS = {benchmark_type.value for benchmark_type in BenchmarkType}
+# pytype: enable=missing-parameter
+
+
 def get_fuzz_target(benchmark):
     """Returns the fuzz target of |benchmark|"""
     return benchmark_config.get_config(benchmark)['fuzz_target']
@@ -37,6 +49,12 @@ def get_project(benchmark):
     return benchmark_config.get_config(benchmark)['project']
 
 
+def get_type(benchmark):
+    """Returns the type of |benchmark|"""
+    return benchmark_config.get_config(benchmark).get('type',
+                                                      BenchmarkType.CODE.value)
+
+
 def get_runner_image_url(experiment, benchmark, fuzzer, docker_registry):
     """Get the URL of the docker runner image for fuzzing the benchmark with
     fuzzer."""
@@ -64,6 +82,16 @@ def validate_name(benchmark):
     return True
 
 
+def validate_type(benchmark):
+    """Returns True if |benchmark| has a valid type."""
+    benchmark_type = get_type(benchmark)
+    if benchmark_type not in BENCHMARK_TYPE_STRS:
+        logs.error('%s has an invalid benchmark type %s, must be one of %s',
+                   benchmark, benchmark_type, BENCHMARK_TYPE_STRS)
+        return False
+    return True
+
+
 def validate(benchmark):
     """Returns True if |benchmark| is a valid fuzzbench benchmark."""
     if not validate_name(benchmark):
@@ -73,6 +101,7 @@ def validate(benchmark):
         logs.error('%s must have a benchmark.yaml.', benchmark)
         return False
 
+    # Validate config file can be parsed.
     try:
         get_fuzz_target(benchmark)
     except yaml.parser.ParserError:
@@ -84,7 +113,8 @@ def validate(benchmark):
                    benchmark)
         return False
 
-    return True
+    # Validate type.
+    return validate_type(benchmark)
 
 
 def get_all_benchmarks():
diff --git a/common/test_benchmark_utils.py b/common/test_benchmark_utils.py
index 7e1c44ce5..411dcca62 100644
--- a/common/test_benchmark_utils.py
+++ b/common/test_benchmark_utils.py
@@ -12,6 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Tests for benchmark_utils.py"""
+from unittest import mock
+
 import pytest
 
 from common import benchmark_utils
@@ -56,3 +58,26 @@ def test_validate_name_invalid(benchmark_name):
 def test_validate_name_valid(benchmark_name):
     """Tests that validate_name returns True for a valid benchmark name."""
     assert benchmark_utils.validate_name(benchmark_name)
+
+
+@mock.patch('common.benchmark_utils.get_type', return_value='other-type')
+def test_validate_type_invalid(_):
+    """Tests that validate_type returns False for an invalid type."""
+    assert not benchmark_utils.validate_type('benchmark')
+
+
+@pytest.mark.parametrize(('benchmark_type',), [
+    ('code',),
+    ('bug',),
+])
+def test_validate_type_valid(benchmark_type):
+    """Tests that validate_type returns True for an valid type."""
+    with mock.patch('common.benchmark_utils.get_type',
+                    return_value=benchmark_type):
+        assert benchmark_utils.validate_type('benchmark')
+
+
+@mock.patch('common.benchmark_config.get_config', return_value={})
+def test_get_default_type(_):
+    """Tests that get_type returns the correct default value."""
+    assert benchmark_utils.get_type('benchmark') == 'code'
diff --git a/experiment/run_experiment.py b/experiment/run_experiment.py
index fa9b55a94..aea0dc3c1 100644
--- a/experiment/run_experiment.py
+++ b/experiment/run_experiment.py
@@ -151,10 +151,23 @@ def get_directories(parent_dir):
 
 def validate_benchmarks(benchmarks: List[str]):
     """Parses and validates list of benchmarks."""
+    benchmark_types = set()
     for benchmark in set(benchmarks):
         if benchmarks.count(benchmark) > 1:
             raise Exception('Benchmark "%s" is included more than once.' %
                             benchmark)
+        # Validate benchmarks here. It's possible someone might run an
+        # experiment without going through presubmit. Better to catch an invalid
+        # benchmark than see it in production.
+        if not benchmark_utils.validate(benchmark):
+            raise Exception('Benchmark "%s" is invalid.' % benchmark)
+
+        benchmark_types.add(benchmark_utils.get_type(benchmark))
+
+    if (benchmark_utils.BenchmarkType.CODE.value in benchmark_types and
+            benchmark_utils.BenchmarkType.BUG.value in benchmark_types):
+        raise Exception(
+            'Cannot mix bug benchmarks with code coverage benchmarks.')
 
 
 def validate_fuzzer(fuzzer: str):
diff --git a/fuzzbench/test_e2e/test_e2e_run.py b/fuzzbench/test_e2e/test_e2e_run.py
index 04bdf7555..e85922fb2 100644
--- a/fuzzbench/test_e2e/test_e2e_run.py
+++ b/fuzzbench/test_e2e/test_e2e_run.py
@@ -59,9 +59,9 @@ def test_jobs_dependency(self, experiment_config, redis_connection):  # pylint:
                     assert jobs[dep].ended_at <= jobs[name].started_at
 
     def test_all_jobs_finished_successfully(
-            self,
-            experiment_config,  # pylint: disable=redefined-outer-name
-            redis_connection):  # pylint: disable=redefined-outer-name
+        self,
+        experiment_config,  # pylint: disable=redefined-outer-name
+        redis_connection):  # pylint: disable=redefined-outer-name
         """Tests all jobs finished successully."""
         all_images = docker_images.get_images_to_build(
             experiment_config['fuzzers'], experiment_config['benchmarks'])
diff --git a/service/automatic_run_experiment.py b/service/automatic_run_experiment.py
index 7afc6b128..041394203 100644
--- a/service/automatic_run_experiment.py
+++ b/service/automatic_run_experiment.py
@@ -23,8 +23,9 @@
 import sys
 from typing import Optional
 
-from common import logs
+from common import benchmark_utils
 from common import fuzzer_utils
+from common import logs
 from common import utils
 from common import yaml_utils
 from database import models
@@ -247,7 +248,7 @@ def run_requested_experiment(dry_run):
     fuzzers = requested_experiment['fuzzers']
 
     benchmark_type = requested_experiment.get('type')
-    if benchmark_type == 'bug':
+    if benchmark_type == benchmark_utils.BenchmarkType.BUG.value:
         benchmarks = BUG_BENCHMARKS
     else:
         benchmarks = CODE_BENCHMARKS
diff --git a/test_libs/utils.py b/test_libs/utils.py
index fa08d7795..e4c668e0c 100644
--- a/test_libs/utils.py
+++ b/test_libs/utils.py
@@ -25,9 +25,8 @@ def mock_popen_ctx_mgr(*args, **kwargs):
         yield mocked_popen
 
 
-def create_mock_popen(output=bytes('', 'utf-8'),
-                      err=bytes('', 'utf-8'),
-                      returncode=0):
+def create_mock_popen(
+        output=bytes('', 'utf-8'), err=bytes('', 'utf-8'), returncode=0):
     """Creates a mock subprocess.Popen."""
 
     class MockPopen: