1
1
from contextlib import contextmanager
2
- from ctypes import c_char_p , create_string_buffer
2
+ from ctypes import create_string_buffer
3
3
from enum import IntEnum
4
4
import math
5
5
@@ -34,15 +34,19 @@ def format_time(seconds, nanos):
34
34
35
35
class ArchiveEntry :
36
36
37
- __slots__ = ('_archive_p' , '_entry_p' )
37
+ __slots__ = ('_archive_p' , '_entry_p' , 'header_codec' )
38
38
39
- def __init__ (self , archive_p = None , ** attributes ):
39
+ def __init__ (self , archive_p = None , header_codec = 'utf-8' , ** attributes ):
40
40
"""Allocate memory for an `archive_entry` struct.
41
41
42
- The attributes are passed to the `modify` method.
42
+ The `header_codec` is used to decode and encode file paths and other
43
+ attributes.
44
+
45
+ The `**attributes` are passed to the `modify` method.
43
46
"""
44
47
self ._archive_p = archive_p
45
48
self ._entry_p = ffi .entry_new ()
49
+ self .header_codec = header_codec
46
50
if attributes :
47
51
self .modify (** attributes )
48
52
@@ -54,7 +58,7 @@ def __str__(self):
54
58
"""Returns the file's path"""
55
59
return self .pathname
56
60
57
- def modify (self , ** attributes ):
61
+ def modify (self , header_codec = None , ** attributes ):
58
62
"""Convenience method to modify the entry's attributes.
59
63
60
64
Args:
@@ -83,6 +87,8 @@ def modify(self, **attributes):
83
87
rdevmajor (int): major part of the device number
84
88
rdevminor (int): minor part of the device number
85
89
"""
90
+ if header_codec :
91
+ self .header_codec = header_codec
86
92
for name , value in attributes .items ():
87
93
setattr (self , name , value )
88
94
@@ -112,23 +118,45 @@ def gid(self, gid):
112
118
113
119
@property
114
120
def uname (self ):
115
- return ffi .entry_uname_w (self ._entry_p )
121
+ uname = ffi .entry_uname_w (self ._entry_p )
122
+ if not uname :
123
+ uname = ffi .entry_uname (self ._entry_p )
124
+ if uname is not None :
125
+ try :
126
+ uname = uname .decode (self .header_codec )
127
+ except UnicodeError :
128
+ pass
129
+ return uname
116
130
117
131
@uname .setter
118
132
def uname (self , value ):
119
133
if not isinstance (value , bytes ):
120
- value = value .encode ('utf8' )
121
- ffi .entry_update_uname_utf8 (self ._entry_p , value )
134
+ value = value .encode (self .header_codec )
135
+ if self .header_codec == 'utf-8' :
136
+ ffi .entry_update_uname_utf8 (self ._entry_p , value )
137
+ else :
138
+ ffi .entry_copy_uname (self ._entry_p , value )
122
139
123
140
@property
124
141
def gname (self ):
125
- return ffi .entry_gname_w (self ._entry_p )
142
+ gname = ffi .entry_gname_w (self ._entry_p )
143
+ if not gname :
144
+ gname = ffi .entry_gname (self ._entry_p )
145
+ if gname is not None :
146
+ try :
147
+ gname = gname .decode (self .header_codec )
148
+ except UnicodeError :
149
+ pass
150
+ return gname
126
151
127
152
@gname .setter
128
153
def gname (self , value ):
129
154
if not isinstance (value , bytes ):
130
- value = value .encode ('utf8' )
131
- ffi .entry_update_gname_utf8 (self ._entry_p , value )
155
+ value = value .encode (self .header_codec )
156
+ if self .header_codec == 'utf-8' :
157
+ ffi .entry_update_gname_utf8 (self ._entry_p , value )
158
+ else :
159
+ ffi .entry_copy_gname (self ._entry_p , value )
132
160
133
161
def get_blocks (self , block_size = ffi .page_size ):
134
162
"""Read the file's content, keeping only one chunk in memory at a time.
@@ -294,28 +322,48 @@ def pathname(self):
294
322
path = ffi .entry_pathname_w (self ._entry_p )
295
323
if not path :
296
324
path = ffi .entry_pathname (self ._entry_p )
297
- try :
298
- path = path .decode ()
299
- except UnicodeError :
300
- pass
325
+ if path is not None :
326
+ try :
327
+ path = path .decode (self .header_codec )
328
+ except UnicodeError :
329
+ pass
301
330
return path
302
331
303
332
@pathname .setter
304
333
def pathname (self , value ):
305
334
if not isinstance (value , bytes ):
306
- value = value .encode ('utf8' )
307
- ffi .entry_update_pathname_utf8 (self ._entry_p , c_char_p (value ))
335
+ value = value .encode (self .header_codec )
336
+ if self .header_codec == 'utf-8' :
337
+ ffi .entry_update_pathname_utf8 (self ._entry_p , value )
338
+ else :
339
+ ffi .entry_copy_pathname (self ._entry_p , value )
308
340
309
341
@property
310
342
def linkpath (self ):
311
- return (ffi .entry_symlink_w (self ._entry_p ) or
343
+ path = (
344
+ (
345
+ ffi .entry_symlink_w (self ._entry_p ) or
346
+ ffi .entry_symlink (self ._entry_p )
347
+ ) if self .issym else (
312
348
ffi .entry_hardlink_w (self ._entry_p ) or
313
- ffi .entry_symlink (self ._entry_p ) or
314
- ffi .entry_hardlink (self ._entry_p ))
349
+ ffi .entry_hardlink (self ._entry_p )
350
+ )
351
+ )
352
+ if isinstance (path , bytes ):
353
+ try :
354
+ path = path .decode (self .header_codec )
355
+ except UnicodeError :
356
+ pass
357
+ return path
315
358
316
359
@linkpath .setter
317
360
def linkpath (self , value ):
318
- ffi .entry_update_link_utf8 (self ._entry_p , value )
361
+ if not isinstance (value , bytes ):
362
+ value = value .encode (self .header_codec )
363
+ if self .header_codec == 'utf-8' :
364
+ ffi .entry_update_link_utf8 (self ._entry_p , value )
365
+ else :
366
+ ffi .entry_copy_link (self ._entry_p , value )
319
367
320
368
# aliases for compatibility with the standard `tarfile` module
321
369
path = property (pathname .fget , pathname .fset , doc = "alias of pathname" )
0 commit comments