Skip to content

Commit 526b834

Browse files
authored
New option for custom seed corpus (#1395)
1 parent b97fb39 commit 526b834

8 files changed

+78
-3
lines changed

common/experiment_utils.py

+6
Original file line numberDiff line numberDiff line change
@@ -72,6 +72,12 @@ def get_oss_fuzz_corpora_filestore_path():
7272
return posixpath.join(get_experiment_filestore_path(), 'oss_fuzz_corpora')
7373

7474

75+
def get_custom_seed_corpora_filestore_path():
76+
"""Returns path containing the user-provided seed corpora."""
77+
return posixpath.join(get_experiment_filestore_path(),
78+
'custom_seed_corpora')
79+
80+
7581
def get_dispatcher_instance_name(experiment: str) -> str:
7682
"""Returns a dispatcher instance name for an experiment."""
7783
return 'd-%s' % experiment

experiment/resources/runner-startup-script-template.sh

+1
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,7 @@ docker run \
4646
-e NO_SEEDS={{no_seeds}} \
4747
-e NO_DICTIONARIES={{no_dictionaries}} \
4848
-e OSS_FUZZ_CORPUS={{oss_fuzz_corpus}} \
49+
-e CUSTOM_SEED_CORPUS_DIR={{custom_seed_corpus_dir}} \
4950
-e DOCKER_REGISTRY={{docker_registry}} {% if not local_experiment %}-e CLOUD_PROJECT={{cloud_project}} -e CLOUD_COMPUTE_ZONE={{cloud_compute_zone}} {% endif %}\
5051
-e EXPERIMENT_FILESTORE={{experiment_filestore}} {% if local_experiment %}-v {{experiment_filestore}}:{{experiment_filestore}} {% endif %}\
5152
-e REPORT_FILESTORE={{report_filestore}} {% if local_experiment %}-v {{report_filestore}}:{{report_filestore}} {% endif %}\

experiment/run_experiment.py

+52-2
Original file line numberDiff line numberDiff line change
@@ -148,6 +148,26 @@ def get_directories(parent_dir):
148148
]
149149

150150

151+
# pylint: disable=too-many-locals
152+
def validate_custom_seed_corpus(custom_seed_corpus_dir, benchmarks):
153+
"""Validate seed corpus provided by user"""
154+
if not os.path.isdir(custom_seed_corpus_dir):
155+
raise ValidationError('Corpus location "%s" is invalid.' %
156+
custom_seed_corpus_dir)
157+
158+
for benchmark in benchmarks:
159+
benchmark_corpus_dir = os.path.join(custom_seed_corpus_dir, benchmark)
160+
if not os.path.exists(benchmark_corpus_dir):
161+
raise ValidationError('Custom seed corpus directory for '
162+
'benchmark "%s" does not exist.' % benchmark)
163+
if not os.path.isdir(benchmark_corpus_dir):
164+
raise ValidationError('Seed corpus of benchmark "%s" must be '
165+
'a directory.' % benchmark)
166+
if not os.listdir(benchmark_corpus_dir):
167+
raise ValidationError('Seed corpus of benchmark "%s" is empty.' %
168+
benchmark)
169+
170+
151171
def validate_benchmarks(benchmarks: List[str]):
152172
"""Parses and validates list of benchmarks."""
153173
benchmark_types = set()
@@ -219,7 +239,8 @@ def start_experiment( # pylint: disable=too-many-arguments
219239
allow_uncommitted_changes=False,
220240
concurrent_builds=None,
221241
measurers_cpus=None,
222-
runners_cpus=None):
242+
runners_cpus=None,
243+
custom_seed_corpus_dir=None):
223244
"""Start a fuzzer benchmarking experiment."""
224245
if not allow_uncommitted_changes:
225246
check_no_uncommitted_changes()
@@ -248,6 +269,12 @@ def start_experiment( # pylint: disable=too-many-arguments
248269
# 12GB is just the amount that KLEE needs, use this default to make KLEE
249270
# experiments easier to run.
250271
config['runner_memory'] = config.get('runner_memory', '12GB')
272+
273+
config['custom_seed_corpus_dir'] = custom_seed_corpus_dir
274+
if config['custom_seed_corpus_dir']:
275+
validate_custom_seed_corpus(config['custom_seed_corpus_dir'],
276+
benchmarks)
277+
251278
return start_experiment_from_full_config(config)
252279

253280

@@ -330,6 +357,16 @@ def filter_file(tar_info):
330357
for benchmark in config['benchmarks']:
331358
add_oss_fuzz_corpus(benchmark, oss_fuzz_corpora_dir)
332359

360+
if config['custom_seed_corpus_dir']:
361+
for benchmark in config['benchmarks']:
362+
benchmark_custom_corpus_dir = os.path.join(
363+
config['custom_seed_corpus_dir'], benchmark)
364+
filestore_utils.cp(
365+
benchmark_custom_corpus_dir,
366+
experiment_utils.get_custom_seed_corpora_filestore_path() + '/',
367+
recursive=True,
368+
parallel=True)
369+
333370

334371
class BaseDispatcher:
335372
"""Class representing the dispatcher."""
@@ -522,6 +559,10 @@ def main():
522559
'--runners-cpus',
523560
help='Cpus available to the runners.',
524561
required=False)
562+
parser.add_argument('-cs',
563+
'--custom-seed-corpus-dir',
564+
help='Path to the custom seed corpus',
565+
required=False)
525566

526567
all_fuzzers = fuzzer_utils.get_fuzzer_names()
527568
parser.add_argument('-f',
@@ -585,6 +626,14 @@ def main():
585626
parser.error('The sum of runners and measurers cpus is greater than the'
586627
' available cpu cores (%d)' % os.cpu_count())
587628

629+
if args.custom_seed_corpus_dir:
630+
if args.no_seeds:
631+
parser.error('Cannot enable options "custom_seed_corpus_dir" and '
632+
'"no_seeds" at the same time')
633+
if args.oss_fuzz_corpus:
634+
parser.error('Cannot enable options "custom_seed_corpus_dir" and '
635+
'"oss_fuzz_corpus" at the same time')
636+
588637
start_experiment(args.experiment_name,
589638
args.experiment_config,
590639
args.benchmarks,
@@ -596,7 +645,8 @@ def main():
596645
allow_uncommitted_changes=args.allow_uncommitted_changes,
597646
concurrent_builds=concurrent_builds,
598647
measurers_cpus=measurers_cpus,
599-
runners_cpus=runners_cpus)
648+
runners_cpus=runners_cpus,
649+
custom_seed_corpus_dir=args.custom_seed_corpus_dir)
600650
return 0
601651

602652

experiment/runner.py

+15-1
Original file line numberDiff line numberDiff line change
@@ -115,6 +115,17 @@ def get_clusterfuzz_seed_corpus_path(fuzz_target_path):
115115
return seed_corpus_path if os.path.exists(seed_corpus_path) else None
116116

117117

118+
def _copy_custom_seed_corpus(corpus_directory):
119+
"Copy custom seed corpus provided by user"
120+
shutil.rmtree(corpus_directory)
121+
benchmark = environment.get('BENCHMARK')
122+
benchmark_custom_corpus_dir = posixpath.join(
123+
experiment_utils.get_custom_seed_corpora_filestore_path(), benchmark)
124+
filestore_utils.cp(benchmark_custom_corpus_dir,
125+
corpus_directory,
126+
recursive=True)
127+
128+
118129
def _unpack_clusterfuzz_seed_corpus(fuzz_target_path, corpus_directory):
119130
"""If a clusterfuzz seed corpus archive is available, unpack it into the
120131
corpus directory if it exists. Copied from unpack_seed_corpus in
@@ -172,7 +183,10 @@ def run_fuzzer(max_total_time, log_filename):
172183
logs.error('Fuzz target binary not found.')
173184
return
174185

175-
_unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
186+
if environment.get('CUSTOM_SEED_CORPUS_DIR'):
187+
_copy_custom_seed_corpus(input_corpus)
188+
else:
189+
_unpack_clusterfuzz_seed_corpus(target_binary, input_corpus)
176190
_clean_seed_corpus(input_corpus)
177191

178192
if max_total_time is None:

experiment/scheduler.py

+1
Original file line numberDiff line numberDiff line change
@@ -719,6 +719,7 @@ def render_startup_script_template( # pylint: disable=too-many-arguments
719719
'oss_fuzz_corpus': experiment_config['oss_fuzz_corpus'],
720720
'num_cpu_cores': experiment_config['runner_num_cpu_cores'],
721721
'cpuset': cpuset,
722+
'custom_seed_corpus_dir': experiment_config['custom_seed_corpus_dir'],
722723
}
723724

724725
if not local_experiment:

experiment/test_data/experiment-config.yaml

+1
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ git_hash: "git-hash"
3131
no_seeds: false
3232
no_dictionaries: false
3333
oss_fuzz_corpus: false
34+
custom_seed_corpus_dir: null
3435
description: "Test experiment"
3536
concurrent_builds: null
3637
runners_cpus: null

experiment/test_run_experiment.py

+1
Original file line numberDiff line numberDiff line change
@@ -202,6 +202,7 @@ def test_copy_resources_to_bucket(tmp_path):
202202
'experiment': 'experiment',
203203
'benchmarks': ['libxslt_xpath'],
204204
'oss_fuzz_corpus': True,
205+
'custom_seed_corpus_dir': None,
205206
}
206207
try:
207208
with mock.patch('common.filestore_utils.cp') as mocked_filestore_cp:

experiment/test_scheduler.py

+1
Original file line numberDiff line numberDiff line change
@@ -118,6 +118,7 @@ def test_create_trial_instance(benchmark, expected_image, expected_target,
118118
-e NO_SEEDS=False \\
119119
-e NO_DICTIONARIES=False \\
120120
-e OSS_FUZZ_CORPUS=False \\
121+
-e CUSTOM_SEED_CORPUS_DIR=None \\
121122
-e DOCKER_REGISTRY=gcr.io/fuzzbench -e CLOUD_PROJECT=fuzzbench -e CLOUD_COMPUTE_ZONE=us-central1-a \\
122123
-e EXPERIMENT_FILESTORE=gs://experiment-data \\
123124
-e REPORT_FILESTORE=gs://web-reports \\

0 commit comments

Comments
 (0)