18
18
19
19
from commoncode import fileutils
20
20
from commoncode import ignore
21
+ from commoncode import hash
21
22
22
23
import extractcode # NOQA
23
24
import extractcode .archive
@@ -86,6 +87,7 @@ def extract(
86
87
recurse = False ,
87
88
replace_originals = False ,
88
89
ignore_pattern = (),
90
+ known_archive_hashes = set ()
89
91
):
90
92
"""
91
93
Walk and extract any archives found at ``location`` (either a file or
@@ -121,6 +123,7 @@ def extract(
121
123
kinds = kinds ,
122
124
recurse = recurse ,
123
125
ignore_pattern = ignore_pattern ,
126
+ known_archive_hashes = known_archive_hashes
124
127
)
125
128
126
129
processed_events = []
@@ -151,6 +154,7 @@ def extract_files(
151
154
kinds = extractcode .default_kinds ,
152
155
recurse = False ,
153
156
ignore_pattern = (),
157
+ known_archive_hashes = set ()
154
158
):
155
159
"""
156
160
Extract the files found at `location`.
@@ -190,7 +194,7 @@ def extract_files(
190
194
if not recurse and extractcode .is_extraction_path (loc ):
191
195
if TRACE :
192
196
logger .debug (
193
- 'extract:walk not recurse: skipped file: %(loc)r' % locals ())
197
+ 'extract:walk: not recurse: skipped file: %(loc)r' % locals ())
194
198
continue
195
199
196
200
if not extractcode .archive .should_extract (
@@ -203,6 +207,14 @@ def extract_files(
203
207
'extract:walk: skipped file: not should_extract: %(loc)r' % locals ())
204
208
continue
205
209
210
+ file_hash = hash .sha256 (loc )
211
+
212
+ if known_archive_hashes and file_hash in known_archive_hashes :
213
+ if TRACE :
214
+ logger .debug (
215
+ 'extract:walk: skipped file: decompression bomb detected: %(loc)r' % locals ())
216
+ continue
217
+
206
218
target = join (abspath (top ), extractcode .get_extraction_path (loc ))
207
219
if TRACE :
208
220
logger .debug ('extract:target: %(target)r' % locals ())
@@ -220,11 +232,16 @@ def extract_files(
220
232
if recurse :
221
233
if TRACE :
222
234
logger .debug ('extract:walk: recursing on target: %(target)r' % locals ())
235
+
236
+ kah = set (known_archive_hashes )
237
+ kah .add (file_hash )
238
+
223
239
for xevent in extract (
224
240
location = target ,
225
241
kinds = kinds ,
226
242
recurse = recurse ,
227
243
ignore_pattern = ignore_pattern ,
244
+ known_archive_hashes = kah
228
245
):
229
246
if TRACE :
230
247
logger .debug ('extract:walk:recurse:extraction event: %(xevent)r' % locals ())
0 commit comments