diff --git a/DESCRIPTION b/DESCRIPTION index 280e30f..628dfe7 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -10,7 +10,7 @@ biocViews: Infrastructure, DataImport, Genetics, Sequencing, RNASeq, SNP, URL: https://bioconductor.org/packages/GenomicAlignments Video: https://www.youtube.com/watch?v=2KqBSbkfhRo , https://www.youtube.com/watch?v=3PK_jx44QTs BugReports: https://github.com/Bioconductor/GenomicAlignments/issues -Version: 1.35.0 +Version: 1.35.1 License: Artistic-2.0 Encoding: UTF-8 Authors@R: c( diff --git a/R/readGAlignments.R b/R/readGAlignments.R index 17001e6..61b8e24 100644 --- a/R/readGAlignments.R +++ b/R/readGAlignments.R @@ -290,11 +290,37 @@ setMethod("readGAlignmentPairs", "character", setGeneric("readGAlignmentsList", signature="file", function(file, index=file, use.names=FALSE, param=ScanBamParam(), - with.which_label=FALSE) + with.which_label=FALSE, strandMode=NA) standardGeneric("readGAlignmentsList") ) -.matesFromBam <- function(file, use.names, param, what0, with.which_label) +.setRealStrand <- function(gal, param, strandMode) { + if (strandMode == 0L) + strand(gal) <- Rle(strand("*"), length(gal)) + else { + gal_mcols <- mcols(gal, use.names=FALSE) + if (is.null(gal_mcols$flag)) + warning("Flag information missing in GAlignmentsList object. Strand information might not be accurate.") + else { + mask_first_mate <- bamFlagTest(gal_mcols$flag, "isFirstMateRead") + if (strandMode == 1L) + strand(gal[!mask_first_mate]) <- + invertStrand(strand(gal[!mask_first_mate])) + else if (strandMode == 2L) + strand(gal[mask_first_mate]) <- + invertStrand(strand(gal[mask_first_mate])) + else + stop("strandMode should be either 0, 1 or 2.") + } + } + ## if the user didn't request the 'flag' info + ## then remove it to reduce memory footprint + if (!"flag" %in% bamWhat(param)) + mcols(gal)$flag <- NULL + gal +} +.matesFromBam <- function(file, use.names, param, what0, with.which_label, + strandMode) { bamcols <- .load_bamcols_from_BamFile(file, param, what0, with.which_label=with.which_label) @@ -302,8 +328,16 @@ setGeneric("readGAlignmentsList", signature="file", gal <- GAlignments(seqnames=bamcols$rname, pos=bamcols$pos, cigar=bamcols$cigar, strand=bamcols$strand, seqlengths=seqlengths) - gal <- .bindExtraData(gal, use.names=FALSE, param, bamcols, - with.which_label=with.which_label) + if (!is.na(strandMode)) { + flag0 <- scanBamFlag() + what0 <- "flag" + param2 <- .normargParam(param, flag0, what0) + gal <- .bindExtraData(gal, use.names=FALSE, param2, bamcols, + with.which_label=with.which_label) + gal <- .setRealStrand(gal, param, strandMode) + } else + gal <- .bindExtraData(gal, use.names=FALSE, param, bamcols, + with.which_label=with.which_label) if (asMates(file)) { f <- factor(bamcols$groupid) gal <- unname(split(gal, f)) @@ -320,7 +354,8 @@ setGeneric("readGAlignmentsList", signature="file", .readGAlignmentsList.BamFile <- function(file, index=file, use.names=FALSE, param=ScanBamParam(), - with.which_label=FALSE) + with.which_label=FALSE, + strandMode=NA) { if (!isTRUEorFALSE(use.names)) stop("'use.names' must be TRUE or FALSE") @@ -328,22 +363,25 @@ setGeneric("readGAlignmentsList", signature="file", bamWhat(param) <- setdiff(bamWhat(param), c("groupid", "mate_status")) what0 <- c("rname", "strand", "pos", "cigar", "groupid", "mate_status") + if (!is.na(strandMode)) + what0 <- c(what0, "flag") if (use.names) what0 <- c(what0, "qname") - .matesFromBam(file, use.names, param, what0, with.which_label) + .matesFromBam(file, use.names, param, what0, with.which_label, strandMode) } setMethod("readGAlignmentsList", "BamFile", .readGAlignmentsList.BamFile) setMethod("readGAlignmentsList", "character", function(file, index=file, use.names=FALSE, param=ScanBamParam(), - with.which_label=FALSE) + with.which_label=FALSE, strandMode=NA) { bam <- .open_BamFile(file, index=index, asMates=TRUE, param=param) on.exit(close(bam)) readGAlignmentsList(bam, character(0), use.names=use.names, param=param, - with.which_label=with.which_label) + with.which_label=with.which_label, + strandMode=strandMode) } ) diff --git a/inst/unitTests/test_readGAlignmentsList.R b/inst/unitTests/test_readGAlignmentsList.R index deb8f77..eb643e4 100644 --- a/inst/unitTests/test_readGAlignmentsList.R +++ b/inst/unitTests/test_readGAlignmentsList.R @@ -182,3 +182,18 @@ test_readGAlignmentsList_which <- function() rng2 <- as.vector(mcols(unlist(target1[2]))$which_label) checkTrue(all(rng2 %in% my_ROI_labels[4])) } + +text_readGAlignmentsList_findOverlaps <- function() +{ + fl <- system.file("extdata", "ex1.bam", package="Rsamtools") + bf <- BamFile(fl, asMates=TRUE) + galist <- readGAlignmentsList(bf, strandMode=1L) + f <- GRanges(seqnames="seq1", IRanges(30, 250), strand="-") + ov <- findOverlaps(galist[1], f, ignore.strand=FALSE) + checkIdentical(0L, length(ov)) + + galist <- readGAlignmentsList(bf, strandMode=2L) + f <- GRanges(seqnames="seq1", IRanges(30, 250), strand="-") + ov <- findOverlaps(galist[1], f, ignore.strand=FALSE) + checkIdentical(1L, length(ov)) +} diff --git a/man/readGAlignments.Rd b/man/readGAlignments.Rd index eb9c21f..c017a78 100644 --- a/man/readGAlignments.Rd +++ b/man/readGAlignments.Rd @@ -34,7 +34,8 @@ readGAlignmentPairs(file, index=file, use.names=FALSE, param=NULL, with.which_label=FALSE, strandMode=1) readGAlignmentsList(file, index=file, use.names=FALSE, - param=ScanBamParam(), with.which_label=FALSE) + param=ScanBamParam(), with.which_label=FALSE, + strandMode=NA) readGappedReads(file, index=file, use.names=FALSE, param=NULL, with.which_label=FALSE) @@ -103,8 +104,12 @@ readGappedReads(file, index=file, use.names=FALSE, param=NULL, represented as a factor-\link[S4Vectors]{Rle}. } \item{strandMode}{ - Strand mode to set on the returned \link{GAlignmentPairs} object. - See \code{?\link{strandMode}} for more information. + Strand mode to set on the returned \link{GAlignmentPairs} or + \link{GAlignmentsList} object. Note that the default value for + this parameter is different for \code{readGAlignmentPairs()} and + \code{readGAlignmentsList()}. + See details below on \code{readGAlignmentsList()} and + \code{?\link{strandMode}} for more information. } } @@ -155,6 +160,12 @@ readGappedReads(file, index=file, use.names=FALSE, param=NULL, See the \sQuote{asMates} section of \code{?\link[Rsamtools]{BamFile}} in the \pkg{Rsamtools} package for details. + Note that, by default, \code{strandMode=NA}, which is different to + the default value in \code{readGAlignmentPairs()} and which implies + that, by default, the strand values in the returned + \code{GAlignmentsList} object correspond to the original strand of + the reads. + \item \code{readGappedReads} reads a file containing aligned reads as a \link{GappedReads} object. See \code{?\link{GappedReads}} for a description of \link{GappedReads} objects. @@ -398,7 +409,19 @@ readGAlignmentsList(bamfile) ## Use a 'param' to fine tune the results. param <- ScanBamParam(flag=scanBamFlag(isProperPair=TRUE)) galist2 <- readGAlignmentsList(bam, param=param) +galist2[1:3] length(galist2) +table(elementNROWS(galist2)) + +## For paired-end data, we can set the 'strandMode' parameter to +## infer the strand of a pair from the strand of the first and +## last alignments in the pair +galist3 <- readGAlignmentsList(bam, param=param, strandMode=0) +galist3[1:3] +galist4 <- readGAlignmentsList(bam, param=param, strandMode=1) +galist4[1:3] +galist5 <- readGAlignmentsList(bam, param=param, strandMode=2) +galist5[1:3] ## --------------------------------------------------------------------- ## D. COMPARING 4 STRATEGIES FOR LOADING THE ALIGNMENTS THAT OVERLAP