Skip to content

Commit b7d5cb2

Browse files
committed
Add detection for recursive decompression bombs
Signed-off-by: Stefan Rehm <[email protected]>
1 parent 37197c2 commit b7d5cb2

File tree

1 file changed

+18
-1
lines changed

1 file changed

+18
-1
lines changed

src/extractcode/extract.py

+18-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818

1919
from commoncode import fileutils
2020
from commoncode import ignore
21+
from commoncode import hash
2122

2223
import extractcode # NOQA
2324
import extractcode.archive
@@ -86,6 +87,7 @@ def extract(
8687
recurse=False,
8788
replace_originals=False,
8889
ignore_pattern=(),
90+
known_archive_hashes=set()
8991
):
9092
"""
9193
Walk and extract any archives found at ``location`` (either a file or
@@ -121,6 +123,7 @@ def extract(
121123
kinds=kinds,
122124
recurse=recurse,
123125
ignore_pattern=ignore_pattern,
126+
known_archive_hashes=known_archive_hashes
124127
)
125128

126129
processed_events = []
@@ -151,6 +154,7 @@ def extract_files(
151154
kinds=extractcode.default_kinds,
152155
recurse=False,
153156
ignore_pattern=(),
157+
known_archive_hashes=set()
154158
):
155159
"""
156160
Extract the files found at `location`.
@@ -190,7 +194,7 @@ def extract_files(
190194
if not recurse and extractcode.is_extraction_path(loc):
191195
if TRACE:
192196
logger.debug(
193-
'extract:walk not recurse: skipped file: %(loc)r' % locals())
197+
'extract:walk: not recurse: skipped file: %(loc)r' % locals())
194198
continue
195199

196200
if not extractcode.archive.should_extract(
@@ -203,6 +207,14 @@ def extract_files(
203207
'extract:walk: skipped file: not should_extract: %(loc)r' % locals())
204208
continue
205209

210+
file_hash = hash.sha256(loc)
211+
212+
if known_archive_hashes and file_hash in known_archive_hashes:
213+
if TRACE:
214+
logger.debug(
215+
'extract:walk: skipped file: decompression bomb detected: %(loc)r' % locals())
216+
continue
217+
206218
target = join(abspath(top), extractcode.get_extraction_path(loc))
207219
if TRACE:
208220
logger.debug('extract:target: %(target)r' % locals())
@@ -220,11 +232,16 @@ def extract_files(
220232
if recurse:
221233
if TRACE:
222234
logger.debug('extract:walk: recursing on target: %(target)r' % locals())
235+
236+
kah = set(known_archive_hashes)
237+
kah.add(file_hash)
238+
223239
for xevent in extract(
224240
location=target,
225241
kinds=kinds,
226242
recurse=recurse,
227243
ignore_pattern=ignore_pattern,
244+
known_archive_hashes=kah
228245
):
229246
if TRACE:
230247
logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals())

0 commit comments

Comments
 (0)