Skip to content

Commit

Permalink
fixed missing fastqwiper namespace
Browse files Browse the repository at this point in the history
  • Loading branch information
mazzalab committed Dec 9, 2024
1 parent fc07d07 commit c4b17c3
Show file tree
Hide file tree
Showing 5 changed files with 45 additions and 30 deletions.
5 changes: 4 additions & 1 deletion .vscode/settings.json
Original file line number Diff line number Diff line change
Expand Up @@ -5,5 +5,8 @@
"tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
"python.testing.pytestEnabled": true,
"cSpell.words": [
"fastqwiper"
]
}
51 changes: 31 additions & 20 deletions fastqwiper/fastq_gather.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import subprocess
from pathlib import Path
from enum import auto, Enum
from wipertool_abstract import WiperTool
from fastqwiper.wipertool_abstract import WiperTool


class GatherFastq(WiperTool):
Expand All @@ -16,39 +16,44 @@ def set_parser(self, parser: argparse.ArgumentParser):
class OsEnum(Enum):
UNIX = auto()
CROSS_PLATFORM = auto()

class FastqExtEnum(Enum):
FASTQ = auto()
FQ = auto()
FASTQ_GZ = auto()
FQ_GZ = auto()

def files_choices(choices, fname):
# Extract double extensions if present
path = Path(fname)
if len(path.suffixes) == 2: # Handle double extensions like ".fastq.gz"
ext = ''.join(path.suffixes)[1:] # Combine the suffixes and remove the dot
# Combine the suffixes and remove the dot
ext = ''.join(path.suffixes)[1:]
else:
ext = path.suffix[1:] # Single extension

if ext not in choices:
parser.error(f"File '{fname}' doesn't end with one of {choices}")
raise ValueError(f"File '{fname}' doesn't end with one of {choices}")
parser.error(
f"File '{fname}' doesn't end with one of {choices}")
raise ValueError(
f"File '{fname}' doesn't end with one of {choices}")
return fname

parser.add_argument(
"-i",
"--in_fastq",
nargs="+",
type=lambda s:files_choices((e.name.lower().replace("_", ".") for e in FastqExtEnum),s),
nargs="+",
type=lambda s: files_choices(
(e.name.lower().replace("_", ".") for e in FastqExtEnum), s),
help="List of FASTQ files to be joined",
required=True,
)
parser.add_argument(
"-o",
"--out_fastq",
type=lambda s:files_choices((e.name.lower().replace("_", ".") for e in FastqExtEnum),s),
help="Name of the resulting fastq file",
"-o",
"--out_fastq",
type=lambda s: files_choices(
(e.name.lower().replace("_", ".") for e in FastqExtEnum), s),
help="Name of the resulting fastq file",
required=True
)
# Optional arguments
Expand Down Expand Up @@ -100,8 +105,10 @@ def concatenate_fastq(input_files: list[str], output_file: str, prefix: str, ops
return

# Separate gzipped files from regular files
gz_files = [f for f in files if f.endswith("fastq.gz") or f.endswith("fq.gz")]
regular_files = [f for f in files if f.endswith(".fastq") or f.endswith(".fq")]
gz_files = [f for f in files if f.endswith(
"fastq.gz") or f.endswith("fq.gz")]
regular_files = [f for f in files if f.endswith(
".fastq") or f.endswith(".fq")]

try:
if opsys == "cross_platform":
Expand Down Expand Up @@ -149,7 +156,8 @@ def __concat_unix(regular_files: str, gz_files: str, outfile: str):
uncompressed_file = outfile.removesuffix(".gz")

process_compress = subprocess.Popen(
f"mv {outfile} {uncompressed_file} && gzip {uncompressed_file}",
f"mv {outfile} {uncompressed_file} && gzip {
uncompressed_file}",
shell=True,
stdout=subprocess.PIPE,
stderr=subprocess.PIPE,
Expand All @@ -174,8 +182,9 @@ def __concat_cross_platform(regular_files, gz_files, outfile: str):
# Count the replacement characters
replacement_count = data.count("�")
if replacement_count > 0:
print(f"Warning: File '{file_path}' contains {replacement_count} unreadable characters that were replaced.")

print(f"Warning: File '{file_path}' contains {
replacement_count} unreadable characters that were replaced.")

if isinstance(output_file, gzip.GzipFile):
# Write as bytes for gzip
output_file.write(data.encode())
Expand All @@ -187,7 +196,7 @@ def __concat_cross_platform(regular_files, gz_files, outfile: str):
for file_path in gz_files:
with gzip.open(file_path, "rb") as infile:
data = infile.read()

if isinstance(output_file, gzip.GzipFile):
# Write as bytes for gzip
output_file.write(data)
Expand All @@ -197,8 +206,10 @@ def __concat_cross_platform(regular_files, gz_files, outfile: str):
decoded_data = data.decode("utf-8")
except UnicodeDecodeError:
# Gracefully handle decoding errors
print(f"Warning: Decoding error in {file_path}, replacing invalid characters.")
decoded_data = data.decode("utf-8", errors="replace")
print(f"Warning: Decoding error in {
file_path}, replacing invalid characters.")
decoded_data = data.decode(
"utf-8", errors="replace")
output_file.write(decoded_data)


Expand Down
5 changes: 3 additions & 2 deletions fastqwiper/fastq_scatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import subprocess
from pathlib import Path
from enum import auto, Enum
from wipertool_abstract import WiperTool
from fastqwiper.wipertool_abstract import WiperTool


class SplitFastq(WiperTool):
Expand Down Expand Up @@ -35,7 +35,8 @@ def file_choices(choices, fname):
if ext not in choices:
parser.error(
f"File '{fname}' doesn't end with one of {choices}")
raise ValueError(f"File '{fname}' doesn't end with one of {choices}")
raise ValueError(
f"File '{fname}' doesn't end with one of {choices}")
return fname

parser.add_argument("-f", "--fastq", type=lambda s: file_choices((e.name.lower().replace("_", ".")
Expand Down
6 changes: 3 additions & 3 deletions fastqwiper/report_gather.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
import argparse
import re
from wipertool_abstract import WiperTool
from fastq_wiper import (TOTAL_LINES, WELLFORMED, CLEAN, MISPLACED_HEADER,
BAD_SEQ, BAD_PLUS, BAD_QUAL, LENGTH_SEQ_QUAL, BLANKS)
from fastqwiper.wipertool_abstract import WiperTool
from fastqwiper.fastq_wiper import (TOTAL_LINES, WELLFORMED, CLEAN, MISPLACED_HEADER,
BAD_SEQ, BAD_PLUS, BAD_QUAL, LENGTH_SEQ_QUAL, BLANKS)

# region CONST REGEX for output
INT_PERCENT_REGEX: str = r"\s*(?P<var>\d+)\s*\(.+\)\s*"
Expand Down
8 changes: 4 additions & 4 deletions fastqwiper/wipertools.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
import argparse
import json
import os.path
from report_gather import GatherReport
from fastq_scatter import SplitFastq
from fastq_wiper import FastqWiper
from fastq_gather import GatherFastq
from fastqwiper.report_gather import GatherReport
from fastqwiper.fastq_scatter import SplitFastq
from fastqwiper.fastq_wiper import FastqWiper
from fastqwiper.fastq_gather import GatherFastq

# sys.path.insert(0, os.path.abspath(
# os.path.join(os.path.dirname(__file__), "..")))
Expand Down

0 comments on commit c4b17c3

Please sign in to comment.