Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion DESCRIPTION
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ biocViews: Infrastructure, DataImport, Genetics, Sequencing, RNASeq, SNP,
URL: https://bioconductor.org/packages/GenomicAlignments
Video: https://www.youtube.com/watch?v=2KqBSbkfhRo , https://www.youtube.com/watch?v=3PK_jx44QTs
BugReports: https://github.com/Bioconductor/GenomicAlignments/issues
Version: 1.35.0
Version: 1.35.1
License: Artistic-2.0
Encoding: UTF-8
Authors@R: c(
Expand Down
54 changes: 46 additions & 8 deletions R/readGAlignments.R
Original file line number Diff line number Diff line change
Expand Up @@ -290,20 +290,54 @@ setMethod("readGAlignmentPairs", "character",

setGeneric("readGAlignmentsList", signature="file",
function(file, index=file, use.names=FALSE, param=ScanBamParam(),
with.which_label=FALSE)
with.which_label=FALSE, strandMode=NA)
standardGeneric("readGAlignmentsList")
)

.matesFromBam <- function(file, use.names, param, what0, with.which_label)
.setRealStrand <- function(gal, param, strandMode) {
if (strandMode == 0L)
strand(gal) <- Rle(strand("*"), length(gal))
else {
gal_mcols <- mcols(gal, use.names=FALSE)
if (is.null(gal_mcols$flag))
warning("Flag information missing in GAlignmentsList object. Strand information might not be accurate.")
else {
mask_first_mate <- bamFlagTest(gal_mcols$flag, "isFirstMateRead")
if (strandMode == 1L)
strand(gal[!mask_first_mate]) <-
invertStrand(strand(gal[!mask_first_mate]))
else if (strandMode == 2L)
strand(gal[mask_first_mate]) <-
invertStrand(strand(gal[mask_first_mate]))
else
stop("strandMode should be either 0, 1 or 2.")
}
}
## if the user didn't request the 'flag' info
## then remove it to reduce memory footprint
if (!"flag" %in% bamWhat(param))
mcols(gal)$flag <- NULL
gal
}
.matesFromBam <- function(file, use.names, param, what0, with.which_label,
strandMode)
{
bamcols <- .load_bamcols_from_BamFile(file, param, what0,
with.which_label=with.which_label)
seqlengths <- .load_seqlengths_from_BamFile(file, levels(bamcols$rname))
gal <- GAlignments(seqnames=bamcols$rname, pos=bamcols$pos,
cigar=bamcols$cigar, strand=bamcols$strand,
seqlengths=seqlengths)
gal <- .bindExtraData(gal, use.names=FALSE, param, bamcols,
with.which_label=with.which_label)
if (!is.na(strandMode)) {
flag0 <- scanBamFlag()
what0 <- "flag"
param2 <- .normargParam(param, flag0, what0)
gal <- .bindExtraData(gal, use.names=FALSE, param2, bamcols,
with.which_label=with.which_label)
gal <- .setRealStrand(gal, param, strandMode)
} else
gal <- .bindExtraData(gal, use.names=FALSE, param, bamcols,
with.which_label=with.which_label)
if (asMates(file)) {
f <- factor(bamcols$groupid)
gal <- unname(split(gal, f))
Expand All @@ -320,30 +354,34 @@ setGeneric("readGAlignmentsList", signature="file",

.readGAlignmentsList.BamFile <- function(file, index=file,
use.names=FALSE, param=ScanBamParam(),
with.which_label=FALSE)
with.which_label=FALSE,
strandMode=NA)
{
if (!isTRUEorFALSE(use.names))
stop("'use.names' must be TRUE or FALSE")
if (!asMates(file))
bamWhat(param) <- setdiff(bamWhat(param),
c("groupid", "mate_status"))
what0 <- c("rname", "strand", "pos", "cigar", "groupid", "mate_status")
if (!is.na(strandMode))
what0 <- c(what0, "flag")
if (use.names)
what0 <- c(what0, "qname")
.matesFromBam(file, use.names, param, what0, with.which_label)
.matesFromBam(file, use.names, param, what0, with.which_label, strandMode)
}

setMethod("readGAlignmentsList", "BamFile", .readGAlignmentsList.BamFile)

setMethod("readGAlignmentsList", "character",
function(file, index=file, use.names=FALSE, param=ScanBamParam(),
with.which_label=FALSE)
with.which_label=FALSE, strandMode=NA)
{
bam <- .open_BamFile(file, index=index, asMates=TRUE, param=param)
on.exit(close(bam))
readGAlignmentsList(bam, character(0),
use.names=use.names, param=param,
with.which_label=with.which_label)
with.which_label=with.which_label,
strandMode=strandMode)
}
)

Expand Down
15 changes: 15 additions & 0 deletions inst/unitTests/test_readGAlignmentsList.R
Original file line number Diff line number Diff line change
Expand Up @@ -182,3 +182,18 @@ test_readGAlignmentsList_which <- function()
rng2 <- as.vector(mcols(unlist(target1[2]))$which_label)
checkTrue(all(rng2 %in% my_ROI_labels[4]))
}

text_readGAlignmentsList_findOverlaps <- function()
{
fl <- system.file("extdata", "ex1.bam", package="Rsamtools")
bf <- BamFile(fl, asMates=TRUE)
galist <- readGAlignmentsList(bf, strandMode=1L)
f <- GRanges(seqnames="seq1", IRanges(30, 250), strand="-")
ov <- findOverlaps(galist[1], f, ignore.strand=FALSE)
checkIdentical(0L, length(ov))

galist <- readGAlignmentsList(bf, strandMode=2L)
f <- GRanges(seqnames="seq1", IRanges(30, 250), strand="-")
ov <- findOverlaps(galist[1], f, ignore.strand=FALSE)
checkIdentical(1L, length(ov))
}
29 changes: 26 additions & 3 deletions man/readGAlignments.Rd
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ readGAlignmentPairs(file, index=file, use.names=FALSE, param=NULL,
with.which_label=FALSE, strandMode=1)

readGAlignmentsList(file, index=file, use.names=FALSE,
param=ScanBamParam(), with.which_label=FALSE)
param=ScanBamParam(), with.which_label=FALSE,
strandMode=NA)

readGappedReads(file, index=file, use.names=FALSE, param=NULL,
with.which_label=FALSE)
Expand Down Expand Up @@ -103,8 +104,12 @@ readGappedReads(file, index=file, use.names=FALSE, param=NULL,
represented as a factor-\link[S4Vectors]{Rle}.
}
\item{strandMode}{
Strand mode to set on the returned \link{GAlignmentPairs} object.
See \code{?\link{strandMode}} for more information.
Strand mode to set on the returned \link{GAlignmentPairs} or
\link{GAlignmentsList} object. Note that the default value for
this parameter is different for \code{readGAlignmentPairs()} and
\code{readGAlignmentsList()}.
See details below on \code{readGAlignmentsList()} and
\code{?\link{strandMode}} for more information.
}
}

Expand Down Expand Up @@ -155,6 +160,12 @@ readGappedReads(file, index=file, use.names=FALSE, param=NULL,
See the \sQuote{asMates} section of \code{?\link[Rsamtools]{BamFile}}
in the \pkg{Rsamtools} package for details.

Note that, by default, \code{strandMode=NA}, which is different to
the default value in \code{readGAlignmentPairs()} and which implies
that, by default, the strand values in the returned
\code{GAlignmentsList} object correspond to the original strand of
the reads.

\item \code{readGappedReads} reads a file containing aligned reads as a
\link{GappedReads} object. See \code{?\link{GappedReads}} for a
description of \link{GappedReads} objects.
Expand Down Expand Up @@ -398,7 +409,19 @@ readGAlignmentsList(bamfile)
## Use a 'param' to fine tune the results.
param <- ScanBamParam(flag=scanBamFlag(isProperPair=TRUE))
galist2 <- readGAlignmentsList(bam, param=param)
galist2[1:3]
length(galist2)
table(elementNROWS(galist2))

## For paired-end data, we can set the 'strandMode' parameter to
## infer the strand of a pair from the strand of the first and
## last alignments in the pair
galist3 <- readGAlignmentsList(bam, param=param, strandMode=0)
galist3[1:3]
galist4 <- readGAlignmentsList(bam, param=param, strandMode=1)
galist4[1:3]
galist5 <- readGAlignmentsList(bam, param=param, strandMode=2)
galist5[1:3]

## ---------------------------------------------------------------------
## D. COMPARING 4 STRATEGIES FOR LOADING THE ALIGNMENTS THAT OVERLAP
Expand Down