Skip to content

Commit

Permalink
Merge pull request #32 from pombredanne/issue_31
Browse files Browse the repository at this point in the history
Issue 31
  • Loading branch information
pombredanne committed Jul 14, 2015
2 parents 614c438 + 8d41972 commit 755ead9
Show file tree
Hide file tree
Showing 13 changed files with 296 additions and 70 deletions.
3 changes: 3 additions & 0 deletions src/commoncode/command.py
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,9 @@
"""

logger = logging.getLogger(__name__)
# import sys
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
# logger.setLevel(logging.DEBUG)

# current directory is the root dir of this library
curr_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
Expand Down
13 changes: 10 additions & 3 deletions src/extractcode/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@

from __future__ import print_function, absolute_import

import logging
import os
import posixpath
import re
Expand All @@ -32,6 +33,13 @@
from commoncode import fileutils


logger = logging.getLogger(__name__)
DEBUG = False
# import sys
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
# logger.setLevel(logging.DEBUG)


root_dir = os.path.join(os.path.dirname(__file__), 'bin')

# Suffix added to extracted target_dir paths
Expand Down Expand Up @@ -134,9 +142,8 @@ def extracted_files(location):
Yield the locations of extracted files in a directory location.
"""
assert location
for top, _dirs, files in os.walk(location, topdown=True):
for f in files:
yield os.path.join(top, f)
logger.debug('extracted_files for: %(location)r' % locals())
return fileutils.file_iter(location)


def new_name(location, is_dir=False):
Expand Down
63 changes: 38 additions & 25 deletions src/extractcode/archive.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,13 +27,12 @@
from collections import namedtuple
import functools
import logging
import os

from commoncode import fileutils
from commoncode import filetype
import typecode

import extractcode

from extractcode import all_kinds
from extractcode import regular
from extractcode import package
Expand All @@ -46,10 +45,20 @@
from extractcode import patch
from extractcode import sevenzip
from extractcode import libarchive2
from extractcode import extracted_files
from extractcode.uncompress import uncompress_gzip
from extractcode.uncompress import uncompress_bzip2


logger = logging.getLogger(__name__)
DEBUG = True
DEBUG_DEEP = False
# import sys
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
# logger.setLevel(logging.DEBUG)



"""
Archive formats handling. The purpose of this module is to select an extractor
suitable for the accurate extraction of a given kind of archive. An extractor is
Expand Down Expand Up @@ -98,12 +107,6 @@
extract_ishield = sevenzip.extract
extract_Z = sevenzip.extract

DEBUG = False
logger = logging.getLogger(__name__)
# import sys
# logging.basicConfig(level=logging.DEBUG, stream=sys.stdout)
# logger.setLevel(logging.DEBUG)


Handler = namedtuple('Handler', ['name', 'types', 'mimes', 'exts', 'kind', 'extractors'])

Expand All @@ -112,7 +115,6 @@ def can_extract(location):
"""
Return True if this location can be extracted by some handler.
"""
assert location
handlers = list(get_handlers(location))
if handlers:
return True
Expand All @@ -123,8 +125,7 @@ def should_extract(location, kinds):
Return True if this location should be extracted based on the provided
kinds
"""
assert location
assert kinds
location = os.path.abspath(os.path.expanduser(location))
if get_extractor(location, kinds):
return True

Expand All @@ -135,7 +136,7 @@ def get_extractor(location, kinds=all_kinds):
an None if no extract function is found.
"""
assert location
assert kinds
location = os.path.abspath(os.path.expanduser(location))
extractors = get_extractors(location, kinds)
if not extractors:
return None
Expand All @@ -157,14 +158,15 @@ def get_extractors(location, kinds=all_kinds):
Return a list of extractors that can extract the file at
location or an empty list.
"""
location = os.path.abspath(os.path.expanduser(location))
if filetype.is_file(location):
handlers = get_handlers(location)
handlers = list(get_handlers(location))
if handlers:
candidates = score_handlers(handlers)
if candidates:
best = pick_best_handler(candidates, kinds)
if best:
return best.extractors
if candidates:
best = pick_best_handler(candidates, kinds)
if best:
return best.extractors
return []


Expand All @@ -174,7 +176,6 @@ def get_handlers(location):
extension_matched,) for this `location`.
"""
if filetype.is_file(location):

T = typecode.contenttype.get_type(location)
ftype = T.filetype_file.lower()
mtype = T.mimetype_file
Expand All @@ -199,13 +200,13 @@ def get_handlers(location):
if handler.exts:
extension_matched = location.lower().endswith(handler.exts)

if DEBUG:
if DEBUG_DEEP:
logger.debug('get_handlers: %(location)s: ftype: %(ftype)s, mtype: %(mtype)s ' % locals())
logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals())


if type_matched or mime_matched or extension_matched:
if DEBUG:
if DEBUG_DEEP:
logger.debug('get_handlers: %(location)s: matched type: %(type_matched)s, mime: %(mime_matched)s, ext: %(extension_matched)s' % locals())
logger.debug('get_handlers: %(location)s: handler: %(handler)r' % locals())
yield handler, type_matched, mime_matched, extension_matched
Expand Down Expand Up @@ -307,21 +308,33 @@ def extract_twice(location, target_dir, extractor1, extractor2):
the `extractor1` function to a temporary directory then the `extractor2`
function on the extracted payload of `extractor1`.
Return a mapping of path->warning_message.
Return a list of warning messages. Raise exceptions on errors.
Typical nested archives include compressed tarballs and RPMs (containing a
compressed cpio).
Note: it would be easy to support deeper extractor chains, but this gets
hard to trace and debug very quickly. A depth of two is simple and sane and
covers most common cases.
"""
abs_location = os.path.abspath(os.path.expanduser(location))
abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
# extract first the intermediate payload to a temp dir
temp_target = fileutils.get_temp_dir('extract')
warnings = extractor1(location, temp_target)
warnings = extractor1(abs_location, temp_target)
if DEBUG:
logger.debug('extract_twice: temp_target: %(temp_target)r' % locals())

# extract this intermediate payload to the final target_dir
try:
for extracted1_loc in extractcode.extracted_files(temp_target):
warnings.extend(extractor2(extracted1_loc, target_dir))
inner_archives = list(extracted_files(temp_target))
if not inner_archives:
warnings.append(location + ': No files found in archive.')
else:
warnings.append(location+ ': No files found in archive.')
for extracted1_loc in inner_archives:
if DEBUG:
logger.debug('extract_twice: extractor2: %(extracted1_loc)r' % locals())
warnings.extend(extractor2(extracted1_loc, target_dir))
finally:
# cleanup the temporary output from extractor1
fileutils.delete(temp_target)
Expand Down
32 changes: 18 additions & 14 deletions src/extractcode/extract.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,8 @@
from collections import namedtuple
from functools import partial
import logging
from os.path import abspath
from os.path import expanduser
from os.path import join

from commoncode import fileutils
Expand Down Expand Up @@ -117,11 +119,11 @@ def extract(location, kinds=extractcode.default_kinds, recurse=False):
"""
ignored = partial(ignore.is_ignored, ignores=ignore.default_ignores, unignores={})
if DEBUG:
logger.debug('extract:start:' + location + ' recurse:' + repr(recurse) + '\n')

for top, dirs, files in fileutils.walk(location, ignored):
logger.debug('extract:start: %(location)r recurse: %(recurse)r\n' % locals())
abs_location = abspath(expanduser(location))
for top, dirs, files in fileutils.walk(abs_location, ignored):
if DEBUG:
logger.debug('extract:walk: top:' + top + ' dirs:' + repr(dirs) + ' files:' + repr(files))
logger.debug('extract:walk: top: %(top)r dirs: %(dirs)r files: r(files)r' % locals())

if not recurse:
if DEBUG:
Expand All @@ -135,26 +137,28 @@ def extract(location, kinds=extractcode.default_kinds, recurse=False):
loc = join(top, f)
if not recurse and extractcode.is_extraction_path(loc):
if DEBUG:
logger.debug('extract:walk not recurse: skipped file:' + loc)
logger.debug('extract:walk not recurse: skipped file: %(loc)r' % locals())
continue

if not archive.should_extract(loc, kinds):
if DEBUG:
logger.debug('extract:walk: skipped file: not should_extract:' + loc)
logger.debug('extract:walk: skipped file: not should_extract: %(loc)r' % locals())
continue

target = join(top, extractcode.get_extraction_path(loc))
target = join(abspath(top), extractcode.get_extraction_path(loc))
if DEBUG:
logger.debug('extract:target: %(target)r' % locals())
for xevent in extract_file(loc, target, kinds):
if DEBUG:
logger.debug('extract:walk:extraction event:' + repr(xevent))
logger.debug('extract:walk:extraction event: %(xevent)r' % locals())
yield xevent

if recurse:
if DEBUG:
logger.debug('extract:walk: recursing on:' + target)
logger.debug('extract:walk: recursing on target: %(target)r' % locals())
for xevent in extract(target, kinds, recurse):
if DEBUG:
logger.debug('extract:walk:recurse:extraction event:' + repr(xevent))
logger.debug('extract:walk:recurse:extraction event: %(xevent)r' % locals())
yield xevent


Expand All @@ -167,8 +171,7 @@ def extract_file(location, target, kinds=extractcode.default_kinds):
errors = []
extractor = archive.get_extractor(location, kinds)
if DEBUG:
logger.debug('extract_file: extractor: for:' + location
+ ' with kinds: ' + repr(kinds) + ': '
logger.debug('extract_file: extractor: for: %(location)r with kinds: r(kinds)r : ' % locals()
+ getattr(extractor, '__module__', '')
+ '.' + getattr(extractor, '__name__', ''))
if extractor:
Expand All @@ -178,12 +181,13 @@ def extract_file(location, target, kinds=extractcode.default_kinds):
# if there is an error, the extracted files will not be moved
# to target
tmp_tgt = fileutils.get_temp_dir('extract')
warnings.extend(extractor(location, tmp_tgt))
abs_location= abspath(expanduser(location))
warnings.extend(extractor(abs_location, tmp_tgt))
fileutils.copytree(tmp_tgt, target)
fileutils.delete(tmp_tgt)
except Exception, e:
if DEBUG:
logger.debug('extract_file: ERROR: %(errors)r, %(e)r.\n' % locals())
logger.debug('extract_file: ERROR: %(location)r: %(errors)r, %(e)r.\n' % locals())
errors = [str(e).strip(' \'"')]
finally:
yield ExtractEvent(location, target, done=True, warnings=warnings, errors=errors)
25 changes: 9 additions & 16 deletions src/extractcode/libarchive2.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,13 +101,14 @@ def extract(location, target_dir):
"""
assert location
assert target_dir

abs_location = os.path.abspath(os.path.expanduser(location))
abs_target_dir = os.path.abspath(os.path.expanduser(target_dir))
warnings = []

for entry in list_entries(location):
for entry in list_entries(abs_location):
if not (entry.isdir or entry.isfile):
continue
_target_path = entry.write(target_dir, transform_path=paths.resolve)
_target_path = entry.write(abs_target_dir, transform_path=paths.resolve)
if entry.warnings:
msgs = [w.strip('"\' ') for w in entry.warnings if w.strip('"\' ')]
msgs = msgs or ['No message provided']
Expand All @@ -121,10 +122,12 @@ def list_entries(location):
"""
Return a list entries of archive file at `location`.
"""
assert os.path.exists(location)
assert os.path.isfile(location)
assert location
abs_location = os.path.abspath(os.path.expanduser(location))
assert os.path.isfile(abs_location)

# TODO: harden error handling
with Archive(location) as archive:
with Archive(abs_location) as archive:
for entry in archive:
yield entry

Expand Down Expand Up @@ -314,16 +317,6 @@ def write(self, target_dir, transform_path=lambda x: x):
self.warnings.append(msg)
return target_path

# except ArchiveError, ae:
# if ae.msg and ae.msg.startswith('Encrypted file is unsupported'):
# raise ArchiveErrorPasswordProtected(root_ex=ae)
# else:
# raise
#
# except Exception, e:
# raise
# raise ArchiveError(root_ex=e)

def __repr__(self):
return ('Entry('
'path=%(path)r,'
Expand Down
Loading

0 comments on commit 755ead9

Please sign in to comment.