Skip to content

Commit edb6154

Browse files
authored
Merge pull request #2 from jlab/fetch_directories
Fetch directories
2 parents bcbe53d + eed36ed commit edb6154

File tree

3 files changed

+70
-26
lines changed

3 files changed

+70
-26
lines changed

.github/workflows/qiita-ci.yml

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -52,8 +52,11 @@ jobs:
5252
5353
# we need to download qiita directly so we have "easy" access to
5454
# all config files
55-
wget https://github.com/biocore/qiita/archive/dev.zip
56-
unzip dev.zip
55+
# wget https://github.com/biocore/qiita/archive/dev.zip
56+
# unzip dev.zip
57+
wget https://github.com/jlab/qiita/archive/refs/heads/tornado_FetchFileFromCentralHandler_alsoDirs_debug.zip
58+
unzip tornado_FetchFileFromCentralHandler_alsoDirs_debug.zip
59+
mv qiita-tornado_FetchFileFromCentralHandler_alsoDirs_debug qiita-dev
5760
5861
# pull out the port so we can modify the configuration file easily
5962
pgport=${{ job.services.postgres.ports[5432] }}

qiita_client/qiita_client.py

Lines changed: 39 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,9 @@
1515
from json import dumps
1616
from random import randint
1717
import fnmatch
18+
from io import BytesIO
19+
from zipfile import ZipFile
20+
1821

1922
try:
2023
from itertools import zip_longest
@@ -269,7 +272,7 @@ def _request_oauth2(self, req, rettype, *args, **kwargs):
269272
The request to execute
270273
rettype : string
271274
The return type of the function, either "json" or
272-
if e.g. files are transferred "content"
275+
"object" for the response object itself
273276
args : tuple
274277
The request args
275278
kwargs : dict
@@ -328,15 +331,15 @@ def _request_retry(self, req, url, rettype='json', **kwargs):
328331
The request to execute
329332
rettype : string
330333
The return type of the function, either "json" (default) or
331-
if e.g. files are transferred "content"
334+
"object" for the response object itself
332335
url : str
333336
The url to access in the server
334337
kwargs : dict
335338
The request kwargs
336339
337340
Returns
338341
-------
339-
dict or None or plain content IF rettype='content'
342+
dict or None or response object IF rettype='object'
340343
The JSON information in the request response, if any
341344
342345
Raises
@@ -391,13 +394,13 @@ def _request_retry(self, req, url, rettype='json', **kwargs):
391394
if rettype is None or rettype == 'json':
392395
return r.json()
393396
else:
394-
if rettype == 'content':
395-
return r.content
397+
if rettype == 'object':
398+
return r
396399
else:
397400
raise ValueError(
398401
("return type rettype='%s' cannot be "
399402
"understand. Choose from 'json' (default) "
400-
"or 'content!") % rettype)
403+
"or 'object!") % rettype)
401404
except ValueError:
402405
return None
403406
stime = randint(MIN_TIME_SLEEP, MAX_TIME_SLEEP)
@@ -418,7 +421,7 @@ def get(self, url, rettype='json', **kwargs):
418421
The url to access in the server
419422
rettype : string
420423
The return type of the function, either "json" (default) or
421-
if e.g. files are transferred "content"
424+
"object" for the response object itself
422425
kwargs : dict
423426
The request kwargs
424427
@@ -746,8 +749,8 @@ def _process_files_per_sample_fastq(self, files, prep_info,
746749
return sample_names, prep_info
747750

748751
def fetch_file_from_central(self, filepath, prefix=None):
749-
"""Moves content of a file from Qiita's central BASE_DATA_DIR to a
750-
local plugin file-system.
752+
"""Moves content of a file or directory from Qiita's central
753+
BASE_DATA_DIR to a local plugin file-system.
751754
752755
By default, this is exactly the same location, i.e. the return
753756
filepath is identical to the requested one and nothing is moved /
@@ -760,22 +763,24 @@ def fetch_file_from_central(self, filepath, prefix=None):
760763
----------
761764
filepath : str
762765
The filepath in Qiita's central BASE_DATA_DIR to the requested
763-
file content
766+
file or directory content
764767
prefix : str
765768
Primarily for testing: prefix the target filepath with this
766769
filepath prefix to
767-
a) in 'filesystem' mode: create an actual file copy (for testing)
770+
a) in 'filesystem' mode: create an actual file/directiry copy
771+
(for testing)
768772
If prefix=None, nothing will be copied/moved
769-
b) in 'https' mode: flexibility to locate files differently in
770-
plugin local file system.
773+
b) in 'https' mode: flexibility to locate files/directories
774+
differently in plugin local file system.
771775
772776
Returns
773777
-------
774-
str : the filepath of the requested file within the local file system
778+
str : the filepath of the requested file or directory within the local
779+
file system
775780
"""
776781
target_filepath = filepath
777782
logger.debug(
778-
'Fetching file "%s" via protocol=%s from Qiita main.' % (
783+
'Fetching file/directory "%s" via protocol=%s from Qiita main.' % (
779784
filepath, self._plugincoupling))
780785

781786
if (prefix is not None) and (prefix != ""):
@@ -792,7 +797,10 @@ def fetch_file_from_central(self, filepath, prefix=None):
792797
if not os.path.exists(os.path.dirname(target_filepath)):
793798
os.makedirs(os.path.dirname(target_filepath))
794799

795-
shutil.copyfile(filepath, target_filepath)
800+
if os.path.isdir(filepath):
801+
shutil.copytree(filepath, target_filepath)
802+
else:
803+
shutil.copyfile(filepath, target_filepath)
796804

797805
return target_filepath
798806

@@ -802,17 +810,24 @@ def fetch_file_from_central(self, filepath, prefix=None):
802810
filepath = filepath[len(os.path.abspath(os.sep)):]
803811

804812
# actual call to Qiita central to obtain file content
805-
content = self.get(
813+
response = self.get(
806814
'/cloud/fetch_file_from_central/' + filepath,
807-
rettype='content')
815+
rettype='object')
816+
817+
# check if requested filepath is a single file OR a whole directory
818+
if 'Is-Qiita-Directory' in response.headers.keys():
819+
with ZipFile(BytesIO(response.content)) as zf:
820+
zf.extractall(path=target_filepath)
821+
else:
822+
content = response.content
808823

809-
# create necessary directory locally
810-
if not os.path.exists(os.path.dirname(target_filepath)):
811-
os.makedirs(os.path.dirname(target_filepath))
824+
# create necessary directory locally
825+
if not os.path.exists(os.path.dirname(target_filepath)):
826+
os.makedirs(os.path.dirname(target_filepath))
812827

813-
# write retrieved file content
814-
with open(target_filepath, 'wb') as f:
815-
f.write(content)
828+
# write retrieved file content
829+
with open(target_filepath, 'wb') as f:
830+
f.write(content)
816831

817832
return target_filepath
818833

qiita_client/tests/test_qiita_client.py

Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -526,6 +526,32 @@ def test_delete_file_from_central(self):
526526
# qiita main filepath, returned by delete_file_from_central
527527
self.assertTrue(exists(fp_deleted))
528528

529+
def test_fetch_directory(self):
530+
# a bit hacky, but should work as long as test database does not change
531+
ainfo = self.qclient.get('/qiita_db/artifacts/1/')
532+
base_data_dir = ainfo['files']['raw_forward_seqs'][0]['filepath'][
533+
:(-1 * len('raw_data/1_s_G1_L001_sequences.fastq.gz'))]
534+
535+
# creating a LOCAL test directory within base_data_dir as the DB entry
536+
# but no files exist. "job" is the according mountpoint
537+
fp_test = join(base_data_dir, 'job', '2_test_folder')
538+
self._create_test_dir(prefix=fp_test)
539+
540+
# transmitting test directory to qiita main (remote)
541+
self.tester._plugincoupling = 'https'
542+
self.tester.push_file_to_central(fp_test)
543+
# fp_main = join(base_data_dir, join(*Path(fp_test).parts))
544+
545+
# fetch test directory from qiita main to a different location
546+
# (=prefix) than it was generated
547+
prefix = join(expanduser("~"), 'localFetch')
548+
fp_obs = self.tester.fetch_file_from_central(fp_test, prefix=prefix)
549+
550+
# test a file of the freshly transferred directory from main has
551+
# expected file content
552+
with open(join(fp_obs, 'testdir', 'fileA.txt'), 'r') as f:
553+
self.assertIn('contentA', '\n'.join(f.readlines()))
554+
529555

530556
if __name__ == '__main__':
531557
main()

0 commit comments

Comments
 (0)