|
11 | 11 | from os.path import basename, exists
|
12 | 12 | from tempfile import mkstemp
|
13 | 13 | from json import dumps
|
| 14 | +import pandas as pd |
14 | 15 |
|
15 | 16 | from qiita_client.qiita_client import (QiitaClient, _format_payload,
|
16 | 17 | ArtifactInfo)
|
@@ -133,11 +134,17 @@ def test_get(self):
|
133 | 134 | # Files contain the full path, which it is hard to test, so get only
|
134 | 135 | # the basename of the files
|
135 | 136 | obs_files = obs.pop('files')
|
136 |
| - for k in obs_files: |
137 |
| - obs_files[k] = [basename(v) for v in obs_files[k]] |
| 137 | + obs_files = { |
| 138 | + k: [{'filepath': basename(vv['filepath']), |
| 139 | + 'size': vv['size']} for vv in v] |
| 140 | + for k, v in obs_files.items()} |
138 | 141 | exp_files = {
|
139 |
| - 'raw_barcodes': ['1_s_G1_L001_sequences_barcodes.fastq.gz'], |
140 |
| - 'raw_forward_seqs': ['1_s_G1_L001_sequences.fastq.gz']} |
| 142 | + 'raw_barcodes': [ |
| 143 | + {'filepath': '1_s_G1_L001_sequences_barcodes.fastq.gz', |
| 144 | + 'size': 58}], |
| 145 | + 'raw_forward_seqs': [ |
| 146 | + {'filepath': '1_s_G1_L001_sequences.fastq.gz', |
| 147 | + 'size': 58}]} |
141 | 148 |
|
142 | 149 | self.assertEqual(obs, exp)
|
143 | 150 | self.assertEqual(obs_files, exp_files)
|
@@ -204,6 +211,7 @@ def test_get_job_info(self):
|
204 | 211 | job_id = "3c9991ab-6c14-4368-a48c-841e8837a79c"
|
205 | 212 | obs = self.tester.get_job_info(job_id)
|
206 | 213 | exp = {'command': 'Pick closed-reference OTUs',
|
| 214 | + 'msg': '', |
207 | 215 | 'status': 'success',
|
208 | 216 | 'parameters': {'input_data': 2,
|
209 | 217 | 'reference': 1,
|
@@ -250,6 +258,86 @@ def test_complete_job(self):
|
250 | 258 | obs = self.tester.complete_job(job_id, True, artifacts_info=ainfo)
|
251 | 259 | self.assertIsNone(obs)
|
252 | 260 |
|
| 261 | + def test_artifact_and_preparation_files(self): |
| 262 | + |
| 263 | + # check success |
| 264 | + fobs, prep_info = self.tester.artifact_and_preparation_files(1) |
| 265 | + # just leaving filenames as the folders are dynamic and a pain to test |
| 266 | + fobs = {k: [basename(vv) for vv in v] for k, v in fobs.items()} |
| 267 | + fexp = {'raw_forward_seqs': ['1_s_G1_L001_sequences.fastq.gz'], |
| 268 | + 'raw_barcodes': ['1_s_G1_L001_sequences_barcodes.fastq.gz']} |
| 269 | + self.assertEqual(fobs, fexp) |
| 270 | + self.assertEqual(prep_info.shape, (27, 22)) |
| 271 | + |
| 272 | + # check failure |
| 273 | + with self.assertRaisesRegex(RuntimeError, 'Artifact 8 is an analysis ' |
| 274 | + 'artifact, this method is meant to work ' |
| 275 | + 'with artifacts linked to a preparation.'): |
| 276 | + self.tester.artifact_and_preparation_files(8) |
| 277 | + |
| 278 | + # test _process_files_per_sample_fastq |
| 279 | + # both fwd/rev |
| 280 | + files = { |
| 281 | + 'raw_forward_seqs': [ |
| 282 | + {'filepath': '/X/file_3_R1.fastq.gz', 'size': 101}, |
| 283 | + {'filepath': '/X/file_1_R1.fastq.gz', 'size': 99}, |
| 284 | + {'filepath': '/X/file_2_R1.fastq.gz', 'size': 101}], |
| 285 | + 'raw_reverse_seqs': [ |
| 286 | + {'filepath': '/X/file_2_R2.fastq.gz', 'size': 101}, |
| 287 | + {'filepath': '/X/file_1_R2.fastq.gz', 'size': 101}, |
| 288 | + {'filepath': '/X/file_3_R2.fastq.gz', 'size': 101}]} |
| 289 | + prep_info = pd.DataFrame.from_dict({ |
| 290 | + 'run_prefix': {"sample.1": 'file_1', |
| 291 | + "sample.2": 'file_2', |
| 292 | + "sample.3": 'file_3'}}, dtype=str) |
| 293 | + prep_info.index.name = 'sample_name' |
| 294 | + fobs, piobs = self.tester._process_files_per_sample_fastq( |
| 295 | + files, prep_info, False) |
| 296 | + fexp = { |
| 297 | + 'sample.1': ({'filepath': '/X/file_1_R1.fastq.gz', 'size': 99}, |
| 298 | + {'filepath': '/X/file_1_R2.fastq.gz', 'size': 101}), |
| 299 | + 'sample.2': ({'filepath': '/X/file_2_R1.fastq.gz', 'size': 101}, |
| 300 | + {'filepath': '/X/file_2_R2.fastq.gz', 'size': 101}), |
| 301 | + 'sample.3': ({'filepath': '/X/file_3_R1.fastq.gz', 'size': 101}, |
| 302 | + {'filepath': '/X/file_3_R2.fastq.gz', 'size': 101})} |
| 303 | + self.assertEqual(fobs, fexp) |
| 304 | + self.assertEqual(piobs.shape, (3, 1)) |
| 305 | + |
| 306 | + fobs, piobs = self.tester._process_files_per_sample_fastq( |
| 307 | + files, prep_info, True) |
| 308 | + del fexp['sample.1'] |
| 309 | + self.assertEqual(fobs, fexp) |
| 310 | + self.assertEqual(piobs.shape, (2, 1)) |
| 311 | + |
| 312 | + # just fwd |
| 313 | + files = { |
| 314 | + 'raw_forward_seqs': [ |
| 315 | + {'filepath': '/X/file_3_R1.fastq.gz', 'size': 101}, |
| 316 | + {'filepath': '/X/file_1_R1.fastq.gz', 'size': 99}, |
| 317 | + {'filepath': '/X/file_2_R1.fastq.gz', 'size': 101}]} |
| 318 | + prep_info = pd.DataFrame.from_dict({ |
| 319 | + 'run_prefix': {"sample.1": 'file_1', |
| 320 | + "sample.2": 'file_2', |
| 321 | + "sample.3": 'file_3'}}, dtype=str) |
| 322 | + prep_info.index.name = 'sample_name' |
| 323 | + fobs, piobs = self.tester._process_files_per_sample_fastq( |
| 324 | + files, prep_info, False) |
| 325 | + fexp = { |
| 326 | + 'sample.1': ({'filepath': '/X/file_1_R1.fastq.gz', 'size': 99}, |
| 327 | + None), |
| 328 | + 'sample.2': ({'filepath': '/X/file_2_R1.fastq.gz', 'size': 101}, |
| 329 | + None), |
| 330 | + 'sample.3': ({'filepath': '/X/file_3_R1.fastq.gz', 'size': 101}, |
| 331 | + None)} |
| 332 | + self.assertEqual(fobs, fexp) |
| 333 | + self.assertEqual(piobs.shape, (3, 1)) |
| 334 | + |
| 335 | + fobs, piobs = self.tester._process_files_per_sample_fastq( |
| 336 | + files, prep_info, True) |
| 337 | + del fexp['sample.1'] |
| 338 | + self.assertEqual(fobs, fexp) |
| 339 | + self.assertEqual(piobs.shape, (2, 1)) |
| 340 | + |
253 | 341 |
|
254 | 342 | if __name__ == '__main__':
|
255 | 343 | main()
|
0 commit comments