diff --git a/benches/array_bench.rs b/benches/array_bench.rs index c055449..d3317a0 100644 --- a/benches/array_bench.rs +++ b/benches/array_bench.rs @@ -7,13 +7,13 @@ use std::iter::FromIterator; fn run_benchmark(c: &mut Criterion) { let mut group = c.benchmark_group("bench-byte-array"); group.bench_function("from-bytes", |b| { - b.iter(|| SSeq::from_bytes(b"AGTCCTCTGCATTTTG")) + b.iter(|| SSeq::from_bytes(b"AGTCCTCTGCATTTTG")); }); group.bench_function("from-bytes-unchecked", |b| { - b.iter(|| SSeq::from_bytes_unchecked(b"AGTCCTCTGCATTTTG")) + b.iter(|| SSeq::from_bytes_unchecked(b"AGTCCTCTGCATTTTG")); }); group.bench_function("from-iter", |b| { - b.iter(|| SSeq::from_iter(b"AGTCCTCTGCATTTTG")) + b.iter(|| SSeq::from_iter(b"AGTCCTCTGCATTTTG")); }); group.finish(); } diff --git a/benches/benchmarks.rs b/benches/benchmarks.rs index 7ebca91..c884d2f 100644 --- a/benches/benchmarks.rs +++ b/benches/benchmarks.rs @@ -115,12 +115,12 @@ fn run_fastq_lz4_benchmark(c: &mut Criterion) { c.bench_function("bench-lz4-wc", |b| { b.iter( || assert_eq!(lz4_count(INTERLEAVED_LZ4_FASTQ), 16000), // 16000 lines - ) + ); }); c.bench_function("bench-lz4-read-pair-iter-count", |b| { b.iter( || assert_eq!(read_pair_count(INTERLEAVED_LZ4_FASTQ), 2000), // 2000 read pairs - ) + ); }); // let chemistry: ChemistryDef = // serde_json::from_reader(File::open("tests/rna_read/sc_vdj_chemistry.json").unwrap()) @@ -141,12 +141,12 @@ fn run_fastq_gz_benchmark(c: &mut Criterion) { c.bench_function("bench-gz-wc", |b| { b.iter( || assert_eq!(gz_count(INTERLEAVED_GZ_FASTQ), 16000), // 16000 lines - ) + ); }); c.bench_function("bench-gz-read-pair-iter-count", |b| { b.iter( || assert_eq!(read_pair_count(INTERLEAVED_GZ_FASTQ), 2000), // 2000 read pairs - ) + ); }); // let chemistry: ChemistryDef = // serde_json::from_reader(File::open("tests/rna_read/sc_vdj_chemistry.json").unwrap()) @@ -167,12 +167,12 @@ fn run_fastq_benchmark(c: &mut Criterion) { c.bench_function("bench-fastq-wc", |b| { b.iter( || assert_eq!(simple_count(INTERLEAVED_FASTQ), 16000), // 16000 lines - ) + ); }); c.bench_function("bench-fastq-read-pair-iter-count", |b| { b.iter( || assert_eq!(read_pair_count(INTERLEAVED_FASTQ), 2000), // 2000 read pairs - ) + ); }); // let chemistry: ChemistryDef = // serde_json::from_reader(File::open("tests/rna_read/sc_vdj_chemistry.json").unwrap()) @@ -222,7 +222,7 @@ fn sseq_serde_bincode(c: &mut Criterion) { let mut b = Vec::new(); bincode::serialize_into(&mut b, &sseqs).unwrap(); assert!(!b.is_empty()); - }) + }); }); } diff --git a/src/adapter_trimmer.rs b/src/adapter_trimmer.rs index 3013aa4..67ee9a9 100644 --- a/src/adapter_trimmer.rs +++ b/src/adapter_trimmer.rs @@ -1,11 +1,9 @@ //! Trim adapters from reads using a combination of //! k-mer matches, sparse alignment and banded alignment. -//! Inspired by the [cutadapt](https://cutadapt.readthedocs.io/) -//! tool. +//! Inspired by the [cutadapt](https://cutadapt.readthedocs.io/) tool. //! //! # Features/Limitations -//! * Supports regular, anchored and non-internal, 3' and 5' -//! adapter trimming +//! * Supports regular, anchored and non-internal, 3' and 5' adapter trimming //! * Linked adapters are not supported as of now //! * Allowed error rate for the adapter is 10% //! @@ -23,13 +21,10 @@ use bio::alignment::pairwise::{self, MatchParams, Scoring}; use bio::alignment::sparse; use bio::alignment::sparse::HashMapFx; use serde::{Deserialize, Serialize}; -use std::cmp::Ordering; -use std::cmp::{max, min}; -use std::collections::HashMap; -use std::collections::HashSet; +use std::cmp::{max, min, Ordering}; +use std::collections::{HashMap, HashSet}; use std::fmt::Debug; use std::hash::BuildHasher; -use std::i32; use std::ops::Range; type Aligner = pairwise::banded::Aligner; @@ -122,7 +117,7 @@ pub enum AdapterLoc { /// * `end`: whether it's a `FivePrime` adapter or a `ThreePrime` adapter /// * `location`: Specify the location of the adapter (See [`AdapterLoc`](enum.AdapterLoc.html)) /// * `seq`: The sequence of the adapter as a `String`. One could use a `Vec` here, but -/// chose a String for the ease of auto (de)serialization from a json file. +/// choose a String for the ease of auto (de)serialization from a json file. /// /// # Example /// The example below shows how you can create an `Adapter` from a JSON string @@ -270,7 +265,7 @@ impl<'a> AdapterTrimmer<'a> { /// /// Ouput: /// * `Option`: `None` if the adapter is not found, - /// otherwise `Some(TrimResult)` (See [`TrimResult`](struct.TrimResult.html)) + /// otherwise `Some(TrimResult)` (See [`TrimResult`](struct.TrimResult.html)) pub fn find(&mut self, read: &[u8]) -> Option { use bio::alignment::AlignmentOperation::{Del, Ins, Match, Subst}; @@ -642,7 +637,7 @@ fn compute_path( { let mut max_l_score = pairwise::MIN_SCORE; let mut max_r_score = pairwise::MIN_SCORE; - for &this_match in matches.iter() { + for &this_match in matches { max_l_score = max(max_l_score, l_score(this_match)); max_r_score = max(max_r_score, r_score(this_match)); } diff --git a/src/array.rs b/src/array.rs index 42238c0..c9fbdf5 100644 --- a/src/array.rs +++ b/src/array.rs @@ -106,12 +106,11 @@ where *l = *r.borrow(); len += 1; } - if src.next().is_some() { - panic!( - "Error: Input iter exceeds capacity of {} bytes.", - bytes.len() - ); - } + assert!( + src.next().is_none(), + "Error: Input iter exceeds capacity of {} bytes.", + bytes.len() + ); ByteArray { length: len, @@ -200,9 +199,7 @@ where type Output = u8; fn index(&self, index: usize) -> &Self::Output { - if index >= self.length as usize { - panic!("index out of bounds") - } + assert!(index < self.length as usize, "index out of bounds"); &self.bytes[index] } @@ -213,9 +210,7 @@ where T: ArrayContent, { fn index_mut(&mut self, index: usize) -> &mut Self::Output { - if index >= self.length as usize { - panic!("index out of bounds") - } + assert!(index < self.length as usize, "index out of bounds"); &mut self.bytes[index] } } diff --git a/src/background_iterator.rs b/src/background_iterator.rs index 7a1c86a..a2661cd 100644 --- a/src/background_iterator.rs +++ b/src/background_iterator.rs @@ -93,9 +93,7 @@ mod test { let v = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9]; let iter = (0..11usize).map(move |i| { - if v[i] == 5 { - panic!("simulated panic"); - } + assert!(!(v[i] == 5), "simulated panic"); v[i] }); diff --git a/src/filenames/bcl2fastq.rs b/src/filenames/bcl2fastq.rs index 725f425..a4f25be 100644 --- a/src/filenames/bcl2fastq.rs +++ b/src/filenames/bcl2fastq.rs @@ -495,7 +495,7 @@ mod tests_from_tenkit { #[test] fn test_sample_name_verification() -> Result<()> { let path = "tests/filenames/tenkit91"; - for &s in ["test_sample", "test_sample_suffix"].iter() { + for &s in &["test_sample", "test_sample_suffix"] { let query = Bcl2FastqDef { fastq_path: path.to_string(), sample_name_spec: s.into(), diff --git a/src/filenames/bcl_processor.rs b/src/filenames/bcl_processor.rs index d4fff0c..1948d2b 100644 --- a/src/filenames/bcl_processor.rs +++ b/src/filenames/bcl_processor.rs @@ -75,7 +75,7 @@ impl FindFastqs for BclProcessorFastqDef { if self.sample_index_spec.matches(&bcl_proc.si) && self.lane_spec.contains(bcl_proc.lane_mode()) { - res.push(fastqs) + res.push(fastqs); } } diff --git a/src/filenames/fastq_dir.rs b/src/filenames/fastq_dir.rs index 01ff368..8c88aee 100644 --- a/src/filenames/fastq_dir.rs +++ b/src/filenames/fastq_dir.rs @@ -37,7 +37,7 @@ impl Bcl2FastqDir { }); } - let is_lane_split = match fastq_data + let Ok(is_lane_split) = fastq_data .iter() .map(|(g, _)| match g.lane_mode { LaneMode::NoLaneSplitting => false, @@ -45,13 +45,12 @@ impl Bcl2FastqDir { }) .dedup() .exactly_one() - { - Ok(val) => val, - Err(_) => bail!( + else { + bail!( "Some files in the fastq path {} are split by lane, while some are not. \ This is not supported.", fastq_path.display() - ), + ); }; let samples = fastq_data.iter().map(|(g, _)| g.sample.clone()).collect(); @@ -230,7 +229,7 @@ impl FastqChecker { let lane_spec = match lanes { None => LaneSpec::Any, - Some(ref l) => LaneSpec::Lanes(l.iter().cloned().collect()), + Some(l) => LaneSpec::Lanes(l.iter().copied().collect()), }; if bcl_dir @@ -243,7 +242,7 @@ impl FastqChecker { Ok(match sample_name_spec { SampleNameSpec::Names(names) => names, - _ => unreachable!(), + SampleNameSpec::Any => unreachable!(), }) } diff --git a/src/illumina_header_info.rs b/src/illumina_header_info.rs index c12e586..286e81d 100644 --- a/src/illumina_header_info.rs +++ b/src/illumina_header_info.rs @@ -36,7 +36,7 @@ impl InputFastqs { .ok_or_else(|| anyhow!("No Read1 in FASTQ data"))?; let header = std::str::from_utf8(header)?; - let header_prefix = header.split(|x: char| x == ' ' || x == '/').next(); + let header_prefix = header.split([' ', '/']).next(); if header_prefix.is_none() { return Ok(None); } diff --git a/src/lib.rs b/src/lib.rs index a863def..7546402 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -4,9 +4,10 @@ //! Major functionality includes: //! * Find groups FASTQs (R1/R2/I1/I2) following Illumina filename conventions //! * Parsing flowcell information from Illumina FASTQ headers -//! * High-speed FASTQ I/O (via the `fastq` crate), with careful validation of FASTQ correctness and good error message. -//! * Containers for FASTQ read-pairs (along with index reads), providing access to 'technical' read components like cell barcode and -//! UMI sequences. +//! * High-speed FASTQ I/O (via the `fastq` crate), with careful validation of +//! FASTQ correctness and good error message. +//! * Containers for FASTQ read-pairs (along with index reads), providing access +//! to 'technical' read components like cell barcode and UMI sequences. //! * Flexible read trimming inspired by `cutadapt` #![deny(warnings, unused)] @@ -35,8 +36,7 @@ use crate::read_pair_iter::{AnyReadPairIter, InputFastqs, ReadPairIter}; pub use crate::squality::SQuality; pub use crate::sseq::SSeq; use anyhow::Error; -pub use fastq::OwnedRecord; -pub use fastq::Record; +pub use fastq::{OwnedRecord, Record}; pub use read_pair::WhichRead; use read_pair_iter::FastqError; use serde::{Deserialize, Serialize}; diff --git a/src/read_pair.rs b/src/read_pair.rs index fc14693..cf7bc2c 100644 --- a/src/read_pair.rs +++ b/src/read_pair.rs @@ -4,7 +4,7 @@ //! including the primary 'R1' and 'R2' and index 'I1' and 'I2' reads. use crate::WhichEnd; -use anyhow::{bail, Result}; +use anyhow::{bail, ensure, Result}; use bytes::{Bytes, BytesMut}; use fastq::{OwnedRecord, Record}; use serde::{Deserialize, Serialize}; @@ -186,9 +186,9 @@ impl RpRange { /// # Args /// * `read` - Specify `WhichRead` /// * `offset` - Start of the interval. Must be less than 2^15 (=32,768) - /// * `len` - Optional length that determines the end of the interval. A - /// value `None` indicates everything from `offset` until the end of the - /// `read`. Must be less than 2^15 (=32,768) + /// * `len` - Optional length that determines the end of the interval. + /// A value `None` indicates everything from `offset` until the end of the `read`. + /// Must be less than 2^15 (=32,768) /// /// # Panics /// * If `offset` or `len` is >= `2^15` @@ -205,11 +205,11 @@ impl RpRange { /// /// # Tests /// * `test_rprange_invalid_offset()` - Test that this function panics with - /// an offset that is too large + /// an offset that is too large /// * `test_rprange_invalid_len()` - Test that this function panics with - /// a length that is too large + /// a length that is too large /// * `prop_test_rprange_representation()` - Test that arbitrary construction of RpRange - /// stores the values correctly. + /// stores the values correctly. pub fn new(read: WhichRead, offset: usize, len: Option) -> RpRange { assert!(offset < (1 << 15)); let len_bits = match len { @@ -296,7 +296,7 @@ impl RpRange { /// /// # Tests /// * `test_rprange_intersect_panic()` - Make sure that this function panics - /// if the reads do not match + /// if the reads do not match /// * `test_rprange_intersect_both_open()` - Test a case when both lengths are not set /// * `test_rprange_intersect_self_open()` - Test a case when only self length is set /// * `test_rprange_intersect_other_open()` - Test a case when only other length is set @@ -363,7 +363,7 @@ impl RpRange { /// * `test_shrink_invalid_range_3()`: Test for panic if shrink range start > end /// * `test_rprange_trivial_shrink()`: Test shrink to an empty range. /// * `prop_test_rprange_shrink()`: Test shrink for arbitrary values of - /// `RpRange` and valid `shrink_range` + /// `RpRange` and valid `shrink_range` pub fn shrink(&mut self, shrink_range: &ops::Range) { assert!( shrink_range.start <= shrink_range.end, @@ -496,14 +496,12 @@ impl<'a> MutReadPair<'a> { pub fn new(buffer: &'a mut BytesMut, rr: &[Option; 4]) -> MutReadPair<'a> { let mut rp = MutReadPair::empty(buffer); - - for (_rec, which) in rr.iter().zip(WhichRead::read_types().iter()) { - if let Some(ref rec) = *_rec { - rp.push_read(rec, *which) + for (rec, which) in rr.iter().zip(WhichRead::read_types()) { + if let Some(rec) = rec { + rp.push_read(rec, which); } // default ReadOffsets is exists = false } - rp } @@ -600,32 +598,26 @@ impl ReadPair { pub fn check_range(&self, range: &RpRange, region_name: &str) -> Result<()> { let req_len = range.offset() + range.len().unwrap_or(0); - - match self.get(range.read(), ReadPart::Seq) { - Some(read) => { - if read.len() < req_len { - bail!( - "{} is expected in positions {}-{} in Read {}, but read is {} bp long.", - region_name, - range.offset(), - req_len, - range.read(), - read.len() - ); - } else { - Ok(()) - } - } - None => bail!( + let Some(read) = self.get(range.read(), ReadPart::Seq) else { + bail!( "{region_name} is missing from FASTQ. Read {} is not present.", range.read() - ), - } + ); + }; + ensure!( + read.len() >= req_len, + "{region_name} is expected in positions {}-{} in Read {}, but read is {} bp long.", + range.offset(), + req_len, + range.read(), + read.len() + ); + Ok(()) } pub fn to_owned_record(&self) -> HashMap { let mut result = HashMap::new(); - for &which in WhichRead::read_types().iter() { + for &which in &WhichRead::read_types() { if self.offsets[which as usize].exists { let w = self.offsets[which as usize]; let rec = OwnedRecord { diff --git a/src/read_pair_iter.rs b/src/read_pair_iter.rs index e56a868..ba63998 100644 --- a/src/read_pair_iter.rs +++ b/src/read_pair_iter.rs @@ -10,7 +10,6 @@ use rand::SeedableRng; use rand_xorshift::XorShiftRng; use serde::{Deserialize, Serialize}; use std::io::{self, BufRead, BufReader, ErrorKind, Read, Seek, Write}; -use std::ops::DerefMut; use std::path::{Path, PathBuf}; use thiserror::Error; @@ -355,19 +354,19 @@ impl ReadPairIter { paths, iters, records_read: [0; 4], - read_lengths: [std::usize::MAX; 4], + read_lengths: [usize::MAX; 4], }), is_single_ended, }) } pub fn illumina_r1_trim_length(mut self, r1_length: Option) -> Self { - self.iters.read_lengths[WhichRead::R1 as usize] = r1_length.unwrap_or(std::usize::MAX); + self.iters.read_lengths[WhichRead::R1 as usize] = r1_length.unwrap_or(usize::MAX); self } pub fn illumina_r2_trim_length(mut self, r2_length: Option) -> Self { - self.iters.read_lengths[WhichRead::R2 as usize] = r2_length.unwrap_or(std::usize::MAX); + self.iters.read_lengths[WhichRead::R2 as usize] = r2_length.unwrap_or(usize::MAX); self } @@ -393,11 +392,11 @@ impl ReadPairIter { fn get_next(&mut self) -> Result, FastqError> { // Recycle the buffer if it's almost full. if self.buffer.remaining_mut() < 512 { - self.buffer = BytesMut::with_capacity(BUF_SIZE) + self.buffer = BytesMut::with_capacity(BUF_SIZE); } // need these local reference to avoid borrow checker problem - let iters = self.iters.deref_mut(); + let iters = &mut *self.iters; let paths = &iters.paths; let read_lengths = &iters.read_lengths; let rec_num = &mut iters.records_read; @@ -437,7 +436,7 @@ impl ReadPairIter { let which = WhichRead::read_types()[idx]; let read_length = read_lengths[which as usize]; let tr = TrimRecord::new(rec, read_length); - rp.push_read(&tr, which) + rp.push_read(&tr, which); } } else { // track which reader finished @@ -472,7 +471,7 @@ impl ReadPairIter { let which = WhichRead::read_types()[idx + 1]; let read_length = read_lengths[which as usize]; let tr = TrimRecord::new(rec, read_length); - rp.push_read(&tr, which) + rp.push_read(&tr, which); } } else { // We should only hit this if the FASTQ has an odd @@ -496,7 +495,7 @@ impl ReadPairIter { qual: vec![], }; let tr = TrimRecord::new(&fake_r2_read, read_length); - rp.push_read(&tr, WhichRead::R2) + rp.push_read(&tr, WhichRead::R2); } } } @@ -540,15 +539,15 @@ impl ReadPairIter { let any_not_complete = IntoIterator::into_iter(iter_ended) .zip(paths.iter()) .any(|(ended, path)| !ended && path.is_some()); - - if any_not_complete { - let msg = "Input FASTQ file ended prematurely".to_string(); - let path = paths[ended_index].as_ref().unwrap(); - let e = FastqError::format(msg, path, rec_num[ended_index] * 4); - return Err(e); + return if any_not_complete { + Err(FastqError::format( + "Input FASTQ file ended prematurely".to_string(), + paths[ended_index].as_ref().unwrap(), + rec_num[ended_index] * 4, + )) } else { - return Ok(None); - } + Ok(None) + }; } if sample { diff --git a/src/squality.rs b/src/squality.rs index 99fcc3b..f7caf14 100644 --- a/src/squality.rs +++ b/src/squality.rs @@ -15,9 +15,10 @@ impl ArrayContent for SQualityContents { fn validate_bytes(seq: &[u8]) { for (i, &c) in seq.iter().enumerate() { let q = c as i16 - 33; - if !(0..94).contains(&q) { - panic!("Invalid quality value {q} ASCII character {c} at position {i}"); - } + assert!( + (0..94).contains(&q), + "Invalid quality value {q} ASCII character {c} at position {i}" + ); } } fn expected_contents() -> &'static str { diff --git a/src/sseq.rs b/src/sseq.rs index de5a3df..108fcb2 100644 --- a/src/sseq.rs +++ b/src/sseq.rs @@ -2,9 +2,8 @@ //! Sized, stack-allocated container for a short DNA sequence. -use itertools::Itertools; - use crate::array::{ArrayContent, ByteArray}; +use itertools::Itertools; use std::iter::Iterator; use std::str; @@ -21,9 +20,10 @@ impl ArrayContent for SSeqContents { /// that is not an ACGTN. fn validate_bytes(seq: &[u8]) { for (i, &s) in seq.iter().enumerate() { - if !UPPER_ACGTN.iter().any(|&c| c == s) { - panic!("Non ACGTN character {s} at position {i}"); - }; + assert!( + UPPER_ACGTN.iter().any(|&c| c == s), + "Non ACGTN character {s} at position {i}" + ); } } fn expected_contents() -> &'static str { @@ -156,7 +156,7 @@ pub struct SSeqChars(SSeqGen<5>); impl SSeqChars { pub fn new(chars: &[u8]) -> Self { - SSeqChars(SSeqGen::from_iter(chars.iter().unique())) + SSeqChars(chars.iter().unique().collect()) } pub fn actg() -> Self { SSeqChars(SSeqGen::from_bytes_unchecked(b"ACGT")) @@ -378,7 +378,7 @@ mod sseq_test { ref seq_str in "[ACGTN]{0, 23}", ) { let seq = SSeq::from_bytes(seq_str.as_bytes()); - assert_eq!(seq.len(), seq.into_iter().count()) + assert_eq!(seq.len(), seq.into_iter().count()); } }