Skip to content
This repository was archived by the owner on Apr 22, 2025. It is now read-only.

fixes after deploy 03.25 #169

Merged
merged 17 commits into from
Mar 6, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 12 additions & 19 deletions sequence_processing_pipeline/FastQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,30 +58,23 @@ def _get_commands(self):
"""
results = []

if self.is_amplicon:
# skip this step for amplicon runs since raw and processed are the
# same file.
project_names = []
else:
# gather the parameters for processing all relevant raw fastq
# files.
params, project_names = self._scan_fastq_files(True)

for fwd_file_path, rev_file_path, output_path in params:
command = ['fastqc', '--noextract', '-t', str(self.nthreads),
fwd_file_path, rev_file_path, '-o', output_path]
results.append(' '.join(command))

# next, do the same for the trimmed/filtered fastq files.
params, additional_project_names = self._scan_fastq_files(False)

# gather the parameters for processing all relevant raw fastq
# files.
params, project_names = self._scan_fastq_files(True)
for fwd_file_path, rev_file_path, output_path in params:
command = ['fastqc', '--noextract', '-t', str(self.nthreads),
fwd_file_path, rev_file_path, '-o', output_path]
results.append(' '.join(command))

# remove duplicate project names from the list
project_names = list(set(project_names + additional_project_names))
if not self.is_amplicon:
# next, do the same for the trimmed/filtered fastq files.
params, additional_project_names = self._scan_fastq_files(False)
for fwd_file_path, rev_file_path, output_path in params:
command = ['fastqc', '--noextract', '-t', str(self.nthreads),
fwd_file_path, rev_file_path, '-o', output_path]
results.append(' '.join(command))
# remove duplicate project names from the list
project_names = list(set(project_names + additional_project_names))

return results, project_names

Expand Down
2 changes: 1 addition & 1 deletion sequence_processing_pipeline/GenPrepFileJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -52,7 +52,7 @@ def __init__(self, run_dir, convert_job_path, qc_job_path, output_path,

if isdir(self.reports_path):
copytree(self.reports_path, reports_dir)
else:
elif not self.is_amplicon:
# assume self.reports_path is a file.
makedirs(reports_dir)
copy(self.reports_path, reports_dir)
Expand Down
6 changes: 6 additions & 0 deletions sequence_processing_pipeline/Job.py
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,8 @@ def __init__(self, root_dir, output_path, job_name, executable_paths,
self.is_test = True if [
x for x in stack() if 'unittest' in x.filename] else False

self.audit_folders = None

# For each executable in the list, get its filename and use _which()
# to see if it can be found. Directly pass an optional list of modules
# to load before-hand, so that the binary can be found.
Expand Down Expand Up @@ -475,6 +477,10 @@ def audit(self, sample_ids):
for root, dirs, files in walk(self.output_path):
if 'zero_files' in root:
continue
if self.audit_folders is not None:
# let's check that any of the audit_folders is in root
if not [f for f in self.audit_folders if f in root]:
continue
files_found += [join(root, x) for x in files if
x.endswith(self.suffix)]

Expand Down
8 changes: 4 additions & 4 deletions sequence_processing_pipeline/MultiQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ def _get_failed_indexes(self, job_id):
# the command used for this job
completed_indexes = [int(cf.split('_')[-1]) for cf in completed_files]

all_indexes = list(range(1, len(self.commands) + 1))
all_indexes = list(range(1, len(self.array_cmds) + 1))
failed_indexes = sorted(set(all_indexes) - set(completed_indexes))

# generate log-file here instead of in run() where it can be
Expand Down Expand Up @@ -180,12 +180,12 @@ def _get_commands(self):
def _generate_job_script(self):
template = self.jinja_env.get_template("multiqc_job.sh")

array_cmds = self._get_commands()
self.array_cmds = self._get_commands()

job_name = f'{self.qiita_job_id}_{self.job_name}'
details_file_name = f'{self.job_name}.array-details'
array_details = join(self.output_path, details_file_name)
array_params = "1-%d%%%d" % (len(array_cmds), self.pool_size)
array_params = "1-%d%%%d" % (len(self.array_cmds), self.pool_size)
modules_to_load = ' '.join(self.modules_to_load)

with open(self.job_script_path, mode="w", encoding="utf-8") as f:
Expand All @@ -202,7 +202,7 @@ def _generate_job_script(self):

# save the .details file as well
with open(array_details, 'w') as f:
f.write('\n'.join(array_cmds) + '\n')
f.write('\n'.join(self.array_cmds) + '\n')

return self.job_script_path

Expand Down
1 change: 1 addition & 0 deletions sequence_processing_pipeline/NuQCJob.py
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ def __init__(self, fastq_root_dir, output_path, sample_sheet_path,
self.gres_value = gres_value
self.pmls_path = pmls_path
self.additional_fastq_tags = additional_fastq_tags
self.audit_folders = ['filtered_sequences']

# for projects that use sequence_processing_pipeline as a dependency,
# jinja_env must be set to sequence_processing_pipeline's root path,
Expand Down
Loading