@@ -3014,11 +3014,12 @@ class ArchiveMaker:
30143014 with t.open() as tar:
30153015 ... # `tar` is now a TarFile with 'filename' in it!
30163016 """
3017- def __init__ (self ):
3017+ def __init__ (self , ** kwargs ):
30183018 self .bio = io .BytesIO ()
3019+ self .tar_kwargs = dict (kwargs )
30193020
30203021 def __enter__ (self ):
3021- self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio )
3022+ self .tar_w = tarfile .TarFile (mode = 'w' , fileobj = self .bio , ** self . tar_kwargs )
30223023 return self
30233024
30243025 def __exit__ (self , * exc ):
@@ -3425,7 +3426,10 @@ def test_tar_filter(self):
34253426 # that in the test archive.)
34263427 with tarfile .TarFile .open (tarname ) as tar :
34273428 for tarinfo in tar .getmembers ():
3428- filtered = tarfile .tar_filter (tarinfo , '' )
3429+ try :
3430+ filtered = tarfile .tar_filter (tarinfo , '' )
3431+ except UnicodeEncodeError :
3432+ continue
34293433 self .assertIs (filtered .name , tarinfo .name )
34303434 self .assertIs (filtered .type , tarinfo .type )
34313435
@@ -3436,13 +3440,50 @@ def test_data_filter(self):
34363440 for tarinfo in tar .getmembers ():
34373441 try :
34383442 filtered = tarfile .data_filter (tarinfo , '' )
3439- except tarfile .FilterError :
3443+ except ( tarfile .FilterError , UnicodeEncodeError ) :
34403444 continue
34413445 self .assertIs (filtered .name , tarinfo .name )
34423446 self .assertIs (filtered .type , tarinfo .type )
34433447
3444- def test_default_filter_warns_not (self ):
3445- """Ensure the default filter does not warn (like in 3.12)"""
3448+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
3449+ def test_filter_unencodable (self ):
3450+ # Sanity check using a valid path.
3451+ tarinfo = tarfile .TarInfo (os_helper .TESTFN )
3452+ filtered = tarfile .tar_filter (tarinfo , '' )
3453+ self .assertIs (filtered .name , tarinfo .name )
3454+ filtered = tarfile .data_filter (tarinfo , '' )
3455+ self .assertIs (filtered .name , tarinfo .name )
3456+
3457+ tarinfo = tarfile .TarInfo ('test\x00 ' )
3458+ self .assertRaises (ValueError , tarfile .tar_filter , tarinfo , '' )
3459+ self .assertRaises (ValueError , tarfile .data_filter , tarinfo , '' )
3460+ tarinfo = tarfile .TarInfo ('\ud800 ' )
3461+ self .assertRaises (UnicodeEncodeError , tarfile .tar_filter , tarinfo , '' )
3462+ self .assertRaises (UnicodeEncodeError , tarfile .data_filter , tarinfo , '' )
3463+
3464+ @unittest .skipIf (sys .platform == 'win32' , 'requires native bytes paths' )
3465+ def test_extract_unencodable (self ):
3466+ # Create a member with name \xed\xa0\x80 which is UTF-8 encoded
3467+ # lone surrogate \ud800.
3468+ with ArchiveMaker (encoding = 'ascii' , errors = 'surrogateescape' ) as arc :
3469+ arc .add ('\udced \udca0 \udc80 ' )
3470+ with os_helper .temp_cwd () as tmp :
3471+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
3472+ errorlevel = 1 )
3473+ self .assertEqual (tar .getnames (), ['\ud800 ' ])
3474+ with self .assertRaises (UnicodeEncodeError ):
3475+ tar .extractall ()
3476+ self .assertEqual (os .listdir (), [])
3477+
3478+ tar = arc .open (encoding = 'utf-8' , errors = 'surrogatepass' ,
3479+ errorlevel = 0 , debug = 1 )
3480+ with support .captured_stderr () as stderr :
3481+ tar .extractall ()
3482+ self .assertEqual (os .listdir (), [])
3483+ self .assertIn ('tarfile: UnicodeEncodeError ' , stderr .getvalue ())
3484+
3485+ def test_default_filter_warns (self ):
3486+ """Ensure the default filter warns"""
34463487 with ArchiveMaker () as arc :
34473488 arc .add ('foo' )
34483489 # Replicate warnings_helper.check_no_warnings
0 commit comments