Skip to content

Commit 551bfd1

Browse files
committed
generate_sequence_counts
1 parent 827a372 commit 551bfd1

File tree

2 files changed

+29
-43
lines changed

2 files changed

+29
-43
lines changed

src/qp_klp/Protocol.py

Lines changed: 21 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -432,38 +432,29 @@ def convert_raw_to_fastq(self):
432432
return failed_samples
433433

434434
def generate_sequence_counts(self):
435-
# config = self.pipeline.get_software_configuration('tell-seq')
436-
437-
# files_to_count_path = join(self.pipeline.output_path,
438-
# 'files_to_count.txt')
439-
440-
# with open(files_to_count_path, 'w') as f:
441-
# for root, _, files in walk(self.raw_fastq_files_path):
442-
# for _file in files:
443-
# if determine_orientation(_file) in ['R1', 'R2']:
444-
# print(join(root, _file), file=f)
445-
446-
# job = SeqCountsJob(self.pipeline.run_dir,
447-
# self.pipeline.output_path,
448-
# config['queue'],
449-
# config['nodes'],
450-
# config['wallclock_time_in_minutes'],
451-
# config['normcount_mem_limit'],
452-
# config['modules_to_load'],
453-
# self.master_qiita_job_id,
454-
# config['job_max_array_length'],
455-
# files_to_count_path,
456-
# self.pipeline.get_sample_sheet_path(),
457-
# cores_per_task=config['tellread_cores'])
458-
459-
# if 'SeqCountsJob' not in self.skip_steps:
460-
# job.run(callback=self.job_callback)
461-
462-
# if successful, set self.reports_path
435+
# for other isntances of generate_sequence_counts in other objects
436+
# the sequence counting needs to be done; however, for PacBio we
437+
# already have done it and just need to merge the results.
438+
gz_files = glob(f'{self.raw_fastq_files_path}/*/*.fastq.gz')
439+
data, missing_files = [], []
440+
441+
for gzf in gz_files:
442+
cf = gzf.replace('.fastq.gz', '.counts.txt')
443+
sn = basename(cf).replace('.counts.txt', '')
444+
if not exists(cf):
445+
missing_files.append(sn)
446+
continue
447+
with open(cf, 'r') as fh:
448+
counts = fh.read().strip()
449+
data.append({'SampleID': sn, '# Reads': counts})
450+
451+
if missing_files:
452+
raise ValueError(f'Missing count files: {missing_files}')
453+
454+
df = pd.DataFrame(data)
463455
self.reports_path = join(self.pipeline.output_path,
464456
'SeqCounts.csv')
465-
open(self.reports_path, 'w').write(
466-
'SampleID,# Reads\nA1,100')
457+
df.to_csv(self.reports_path, index=False)
467458

468459
def integrate_results(self):
469460
pass

tests/data/configuration_profiles/pacbio_metagenomic.json

Lines changed: 8 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -8,23 +8,20 @@
88
"nprocs": 16,
99
"queue": "qiita",
1010
"wallclock_time_in_minutes": 216,
11-
"modules_to_load": [
12-
"bclconvert_3.7.5"
13-
],
14-
"executable_path": "bcl-convert",
15-
"per_process_memory_limit": "10gb"
11+
"modules_to_load": [],
12+
"executable_path": "",
13+
"per_process_memory_limit": "1gb"
1614
},
1715
"nu-qc": {
1816
"nodes": 1,
1917
"cpus_per_task": 8,
2018
"queue": "qiita",
2119
"wallclock_time_in_minutes": 240,
22-
"minimap2_databases": ["/databases/minimap2/db_1.mmi", "/databases/minimap2/db_2.mmi"],
23-
"modules_to_load": [
24-
"fastp_0.20.1",
25-
"samtools_1.12",
26-
"minimap2_2.18"
20+
"minimap2_databases": [
21+
"/databases/minimap2/db_1.mmi",
22+
"/databases/minimap2/db_2.mmi"
2723
],
24+
"modules_to_load": ["fastp_0.20.1", "samtools_1.12", "minimap2_2.18"],
2825
"fastp_executable_path": "fastp",
2926
"minimap2_executable_path": "minimap2",
3027
"samtools_executable_path": "samtools",
@@ -49,9 +46,7 @@
4946
"queue": "qiita",
5047
"nthreads": 16,
5148
"wallclock_time_in_minutes": 60,
52-
"modules_to_load": [
53-
"fastqc_0.11.5"
54-
],
49+
"modules_to_load": ["fastqc_0.11.5"],
5550
"fastqc_executable_path": "fastqc",
5651
"multiqc_executable_path": "multiqc",
5752
"multiqc_config_file_path": "sequence_processing_pipeline/multiqc-bclconvert-config.yaml",

0 commit comments

Comments
 (0)