@@ -432,38 +432,29 @@ def convert_raw_to_fastq(self):
432
432
return failed_samples
433
433
434
434
def generate_sequence_counts (self ):
435
- # config = self.pipeline.get_software_configuration('tell-seq')
436
-
437
- # files_to_count_path = join(self.pipeline.output_path,
438
- # 'files_to_count.txt')
439
-
440
- # with open(files_to_count_path, 'w') as f:
441
- # for root, _, files in walk(self.raw_fastq_files_path):
442
- # for _file in files:
443
- # if determine_orientation(_file) in ['R1', 'R2']:
444
- # print(join(root, _file), file=f)
445
-
446
- # job = SeqCountsJob(self.pipeline.run_dir,
447
- # self.pipeline.output_path,
448
- # config['queue'],
449
- # config['nodes'],
450
- # config['wallclock_time_in_minutes'],
451
- # config['normcount_mem_limit'],
452
- # config['modules_to_load'],
453
- # self.master_qiita_job_id,
454
- # config['job_max_array_length'],
455
- # files_to_count_path,
456
- # self.pipeline.get_sample_sheet_path(),
457
- # cores_per_task=config['tellread_cores'])
458
-
459
- # if 'SeqCountsJob' not in self.skip_steps:
460
- # job.run(callback=self.job_callback)
461
-
462
- # if successful, set self.reports_path
435
+ # for other isntances of generate_sequence_counts in other objects
436
+ # the sequence counting needs to be done; however, for PacBio we
437
+ # already have done it and just need to merge the results.
438
+ gz_files = glob (f'{ self .raw_fastq_files_path } /*/*.fastq.gz' )
439
+ data , missing_files = [], []
440
+
441
+ for gzf in gz_files :
442
+ cf = gzf .replace ('.fastq.gz' , '.counts.txt' )
443
+ sn = basename (cf ).replace ('.counts.txt' , '' )
444
+ if not exists (cf ):
445
+ missing_files .append (sn )
446
+ continue
447
+ with open (cf , 'r' ) as fh :
448
+ counts = fh .read ().strip ()
449
+ data .append ({'SampleID' : sn , '# Reads' : counts })
450
+
451
+ if missing_files :
452
+ raise ValueError (f'Missing count files: { missing_files } ' )
453
+
454
+ df = pd .DataFrame (data )
463
455
self .reports_path = join (self .pipeline .output_path ,
464
456
'SeqCounts.csv' )
465
- open (self .reports_path , 'w' ).write (
466
- 'SampleID,# Reads\n A1,100' )
457
+ df .to_csv (self .reports_path , index = False )
467
458
468
459
def integrate_results (self ):
469
460
pass
0 commit comments