diff --git a/R/coordinate-mapping-methods.R b/R/coordinate-mapping-methods.R index a2b382b..8072433 100644 --- a/R/coordinate-mapping-methods.R +++ b/R/coordinate-mapping-methods.R @@ -31,7 +31,7 @@ setGeneric("pmapFromAlignments", signature=c("x", "alignments"), ### mapToAlignments() and mapFromAlignments() methods ### -.mapFromAlignments <- function(x, alignments) +.mapFromAlignments <- function(x, alignments, style = "mapped") { if (!length(x) && !length(alignments)) return(GRanges(xHits=integer(), transcriptsHits=integer())) @@ -39,6 +39,12 @@ setGeneric("pmapFromAlignments", signature=c("x", "alignments"), is.null(alignmentsNames <- names(alignments))) stop ("both 'x' and 'alignments' must have names") + style <- switch(style, + mapped = 0, + sequence = 1, + readpos = 2, + stop(paste("specified unsupported style=", style))) + ## name matching determines pairs match0 <- match(alignmentsNames, alignmentsNames) match1 <- match(xNames, alignmentsNames) @@ -53,10 +59,10 @@ setGeneric("pmapFromAlignments", signature=c("x", "alignments"), alignments <- alignments[alignmentsHits] s <- .Call("query_locs_to_ref_locs", start(x), cigar(alignments), - start(alignments), FALSE) + start(alignments), FALSE, style) e <- .Call("query_locs_to_ref_locs", end(x), cigar(alignments), - start(alignments), TRUE) + start(alignments), TRUE, style) e <- pmax(e, s - 1L) ## remove non-hits @@ -67,17 +73,23 @@ setGeneric("pmapFromAlignments", signature=c("x", "alignments"), xHits=xHits[keep], alignmentsHits=alignmentsHits[keep]) } -.mapToAlignments <- function(x, alignments) +.mapToAlignments <- function(x, alignments, style = "sequence") { if (!length(x) && !length(alignments)) return(GRanges(xHits=integer(), transcriptsHits=integer())) if (is.null(names(alignments))) stop ("'alignments' must have names") + style <- switch(style, + mapped = 0, + sequence = 1, + readpos = 2, + stop(paste("specified unsupported style=", style))) + ## map all possible pairs; returns hits only map <- .Call("map_ref_locs_to_query_locs", start(x), end(x), cigar(alignments), - start(alignments)) + start(alignments), style) xHits <- map[[3]] alignmentsHits <- map[[4]] if (length(xHits)) @@ -91,30 +103,46 @@ setGeneric("pmapFromAlignments", signature=c("x", "alignments"), setMethod("mapToAlignments", c("IntegerRanges", "GAlignments"), function(x, alignments, ...) - ranges(.mapToAlignments(x, alignments)) + ranges(.mapToAlignments(x, alignments, ...)) ) setMethod("mapToAlignments", c("GenomicRanges", "GAlignments"), function(x, alignments, ...) - .mapToAlignments(x, alignments) + .mapToAlignments(x, alignments, ...) ) setMethod("mapFromAlignments", c("IntegerRanges", "GAlignments"), function(x, alignments, ...) - ranges(.mapFromAlignments(x, alignments)) + ranges(.mapFromAlignments(x, alignments, ...)) ) setMethod("mapFromAlignments", c("GenomicRanges", "GAlignments"), function(x, alignments, ...) - .mapFromAlignments(x, alignments) + .mapFromAlignments(x, alignments, ...) ) ### - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - ### pmapToAlignments() and pmapFromAlignments() methods ### -.pmapAlignments <- function(x, alignments, reverse) +.pmapAlignments <- function(x, alignments, reverse, style = "default") { + if (reverse) { + style <- switch(style, + default = 0, + mapped = 0, + sequence = 1, + readpos = 2, + stop(paste("specified unsupported style=", style))) + } else { + style <- switch(style, + default = 1, + mapped = 0, + sequence = 1, + readpos = 2, + stop(paste("specified unsupported style=", style))) + } + if (length(x) && length(alignments)) { if (length(x) != length(alignments)) stop("'x' and 'alignments' must have the same length") @@ -128,8 +156,8 @@ setMethod("mapFromAlignments", c("GenomicRanges", "GAlignments"), FUN <- "ref_locs_to_query_locs" seqname <- names(alignments) } - s <- .Call(FUN, start(x), cigar(alignments), start(alignments), FALSE) - e <- .Call(FUN, end(x), cigar(alignments), start(alignments), TRUE) + s <- .Call(FUN, start(x), cigar(alignments), start(alignments), FALSE, style) + e <- .Call(FUN, end(x), cigar(alignments), start(alignments), TRUE, style) e <- pmax(e, s - 1L) ## non-hits @@ -146,20 +174,20 @@ setMethod("mapFromAlignments", c("GenomicRanges", "GAlignments"), setMethod("pmapToAlignments", c("IntegerRanges", "GAlignments"), function(x, alignments, ...) - ranges(.pmapAlignments(x, alignments, FALSE)) + ranges(.pmapAlignments(x, alignments, FALSE, ...)) ) setMethod("pmapToAlignments", c("GenomicRanges", "GAlignments"), function(x, alignments, ...) - .pmapAlignments(ranges(x), alignments, FALSE) + .pmapAlignments(ranges(x), alignments, FALSE, ...) ) setMethod("pmapFromAlignments", c("IntegerRanges", "GAlignments"), function(x, alignments, ...) - ranges(.pmapAlignments(x, alignments, TRUE)) + ranges(.pmapAlignments(x, alignments, TRUE, ...)) ) setMethod("pmapFromAlignments", c("GenomicRanges", "GAlignments"), function(x, alignments, ...) - .pmapAlignments(ranges(x), alignments, TRUE) + .pmapAlignments(ranges(x), alignments, TRUE, ...) ) diff --git a/inst/unitTests/test_coordinate-mapping-methods.R b/inst/unitTests/test_coordinate-mapping-methods.R index 8fe1db7..1bd8b19 100644 --- a/inst/unitTests/test_coordinate-mapping-methods.R +++ b/inst/unitTests/test_coordinate-mapping-methods.R @@ -109,17 +109,17 @@ test_ref_locs_to_query_locs <- function() { ref <- 43425L + pos - 1L query <- 238L ans <- .Call("ref_locs_to_query_locs", ref, cigar, pos, - FALSE, PACKAGE="GenomicAlignments") + FALSE, 1, PACKAGE="GenomicAlignments") checkIdentical(ans, query) ## out of bounds ans_s <- .Call("ref_locs_to_query_locs", start(x1[1]), cigar(align1[1]), - start(align1[1]), FALSE, + start(align1[1]), FALSE, 1, PACKAGE="GenomicAlignments") ans_e <- .Call("ref_locs_to_query_locs", end(x1[1]), cigar(align1[1]), - start(align1[1]), TRUE, + start(align1[1]), TRUE, 1, PACKAGE="GenomicAlignments") checkIdentical(ans_s, NA_integer_) @@ -130,11 +130,11 @@ test_query_locs_to_ref_locs <- function() { ## out of bounds ans_s <- .Call("query_locs_to_ref_locs", start(x1[4]), cigar(align1[1]), - start(align1[1]), FALSE, + start(align1[1]), FALSE, 1, PACKAGE="GenomicAlignments") ans_e <- .Call("query_locs_to_ref_locs", end(x1[4]), cigar(align1[1]), - start(align1[1]), TRUE, + start(align1[1]), TRUE, 1, PACKAGE="GenomicAlignments") checkIdentical(ans_s, NA_integer_) @@ -144,26 +144,26 @@ test_query_locs_to_ref_locs <- function() { test_map_ref_locs_to_query_locs <- function() { ## hit map <- .Call("map_ref_locs_to_query_locs", - 12L, 16L, "11M", 10L) + 12L, 16L, "11M", 10L, 1) checkIdentical(unlist(map), c(3L, 7L, 1L, 1L)) ## first record out of bounds map <- .Call("map_ref_locs_to_query_locs", - c(5L, 12L), c(16L, 16L), "11M", 10L) + c(5L, 12L), c(16L, 16L), "11M", 10L, 1) checkIdentical(unlist(map), c(3L, 7L, 2L, 1L)) ## second record out of bounds map <- .Call("map_ref_locs_to_query_locs", - c(12L, 5L), c(16L, 16L), "11M", 10L) + c(12L, 5L), c(16L, 16L), "11M", 10L, 1) checkIdentical(unlist(map), c(3L, 7L, 1L, 1L)) ## first alignment out of bounds map <- .Call("map_ref_locs_to_query_locs", - 12L, 16L, c("11M", "11M"), c(20L, 10L)) + 12L, 16L, c("11M", "11M"), c(20L, 10L), 1) checkIdentical(unlist(map), c(3L, 7L, 1L, 2L)) ## second alignment out of bounds map <- .Call("map_ref_locs_to_query_locs", - 12L, 16L, c("11M", "11M"), c(10L, 20L)) + 12L, 16L, c("11M", "11M"), c(10L, 20L), 1) checkIdentical(unlist(map), c(3L, 7L, 1L, 1L)) } diff --git a/src/GenomicAlignments.h b/src/GenomicAlignments.h index 3498d9f..5cc8f01 100644 --- a/src/GenomicAlignments.h +++ b/src/GenomicAlignments.h @@ -66,28 +66,32 @@ SEXP map_query_locs_to_ref_locs( SEXP start, SEXP end, SEXP cigar, - SEXP pos + SEXP pos, + SEXP style ); SEXP map_ref_locs_to_query_locs( SEXP start, SEXP end, SEXP cigar, - SEXP pos + SEXP pos, + SEXP style ); SEXP ref_locs_to_query_locs( SEXP ref_locs, SEXP cigar, SEXP pos, - SEXP narrow_left + SEXP narrow_left, + SEXP style ); SEXP query_locs_to_ref_locs( SEXP query_locs, SEXP cigar, SEXP pos, - SEXP narrow_left + SEXP narrow_left, + SEXP style ); diff --git a/src/R_init_GenomicAlignments.c b/src/R_init_GenomicAlignments.c index 6a88f4b..a1c7ff1 100644 --- a/src/R_init_GenomicAlignments.c +++ b/src/R_init_GenomicAlignments.c @@ -19,10 +19,10 @@ static const R_CallMethodDef callMethods[] = { CALLMETHOD_DEF(cigar_qnarrow, 3), /* mapping_methods.c */ - CALLMETHOD_DEF(map_query_locs_to_ref_locs, 4), - CALLMETHOD_DEF(map_ref_locs_to_query_locs, 4), - CALLMETHOD_DEF(ref_locs_to_query_locs, 4), - CALLMETHOD_DEF(query_locs_to_ref_locs, 4), + CALLMETHOD_DEF(map_query_locs_to_ref_locs, 5), + CALLMETHOD_DEF(map_ref_locs_to_query_locs, 5), + CALLMETHOD_DEF(ref_locs_to_query_locs, 5), + CALLMETHOD_DEF(query_locs_to_ref_locs, 5), /* encodeOverlaps_methods.c */ CALLMETHOD_DEF(encode_overlaps1, 10), diff --git a/src/coordinate_mapping_methods.c b/src/coordinate_mapping_methods.c index 2675c2c..040a2e3 100644 --- a/src/coordinate_mapping_methods.c +++ b/src/coordinate_mapping_methods.c @@ -10,13 +10,13 @@ /* Returns integer position of 'ref_loc' mapped to local space. * If 'ref_loc' cannot be mapped NA is returned. */ -int to_query(int ref_loc, const char *cig0, int pos, Rboolean narrow_left) +int to_query(int ref_loc, const char *cig0, int pos, Rboolean narrow_left, int style) { int query_loc = ref_loc - pos + 1; int n, offset = 0, OPL, query_consumed = 0; char OP; - + while (query_consumed < query_loc && (n = _next_cigar_OP(cig0, offset, &OP, &OPL))) { @@ -29,8 +29,10 @@ int to_query(int ref_loc, const char *cig0, int pos, Rboolean narrow_left) case 'I': /* Soft clip on the read */ case 'S': - query_loc += OPL; - query_consumed += OPL; + if (style >= 1) { + query_loc += OPL; + query_consumed += OPL; + } break; /* Deletion from the reference */ case 'D': @@ -51,6 +53,10 @@ int to_query(int ref_loc, const char *cig0, int pos, Rboolean narrow_left) break; /* Hard clip on the read */ case 'H': + if (style >= 2) { + query_loc += OPL; + query_consumed += OPL; + } break; /* Silent deletion from the padded reference */ case 'P': @@ -81,7 +87,7 @@ int to_query(int ref_loc, const char *cig0, int pos, Rboolean narrow_left) * outside of any deletions or insertions. */ SEXP ref_locs_to_query_locs(SEXP ref_locs, SEXP cigar, SEXP pos, - SEXP narrow_left) + SEXP narrow_left, SEXP style) { int nlocs, i; SEXP query_locs; @@ -92,7 +98,7 @@ SEXP ref_locs_to_query_locs(SEXP ref_locs, SEXP cigar, SEXP pos, const char *cig_i = CHAR(STRING_ELT(cigar, i)); INTEGER(query_locs)[i] = to_query(INTEGER(ref_locs)[i], cig_i, INTEGER(pos)[i], - asLogical(narrow_left)); + asLogical(narrow_left), asInteger(style)); } UNPROTECT(1); @@ -117,7 +123,7 @@ SEXP ref_locs_to_query_locs(SEXP ref_locs, SEXP cigar, SEXP pos, * positions actually occur in the read alignment region, outside of * any deletions or insertions. */ -SEXP map_ref_locs_to_query_locs(SEXP start, SEXP end, SEXP cigar, SEXP pos) +SEXP map_ref_locs_to_query_locs(SEXP start, SEXP end, SEXP cigar, SEXP pos, SEXP style) { SEXP ans, ans_start, ans_end, ans_qhits, ans_shits; IntAE *sbuf, *ebuf, *qhbuf, *shbuf; @@ -133,10 +139,10 @@ SEXP map_ref_locs_to_query_locs(SEXP start, SEXP end, SEXP cigar, SEXP pos) for (j = 0; j < ncigar; j++) { const char *cig_j = CHAR(STRING_ELT(cigar, j)); int pos_j = INTEGER(pos)[j]; - s = to_query(INTEGER(start)[i], cig_j, pos_j, FALSE); + s = to_query(INTEGER(start)[i], cig_j, pos_j, FALSE, asInteger(style)); if (s == NA_INTEGER) continue; - e = to_query(INTEGER(end)[i], cig_j, pos_j, TRUE); + e = to_query(INTEGER(end)[i], cig_j, pos_j, TRUE, asInteger(style)); if (e == NA_INTEGER) continue; IntAE_insert_at(sbuf, IntAE_get_nelt(sbuf), s); @@ -166,12 +172,12 @@ SEXP map_ref_locs_to_query_locs(SEXP start, SEXP end, SEXP cigar, SEXP pos) /* Returns integer position of 'query_loc' mapped to genome-based space. * If 'query_loc' cannot be mapped NA is returned. */ -int to_ref(int query_loc, const char *cig0, int pos, Rboolean narrow_left) +int to_ref(int query_loc, const char *cig0, int pos, Rboolean narrow_left, int style) { int ref_loc = query_loc + pos - 1; int n, offset = 0, OPL, query_consumed = 0; - char OP; - + char OP, lastOP = '?'; + while (query_consumed < query_loc && (n = _next_cigar_OP(cig0, offset, &OP, &OPL))) { @@ -197,7 +203,10 @@ int to_ref(int query_loc, const char *cig0, int pos, Rboolean narrow_left) } /* Soft clip on the read */ case 'S': - query_consumed += OPL; + if (style >= 1) { + query_consumed += OPL; + ref_loc -= OPL; + } break; /* Deletion from the reference */ case 'D': @@ -206,6 +215,10 @@ int to_ref(int query_loc, const char *cig0, int pos, Rboolean narrow_left) break; /* Hard clip on the read */ case 'H': + if (style == 2) { + query_consumed += OPL; + ref_loc -= OPL; + } break; /* Silent deletion from the padded reference */ case 'P': @@ -214,9 +227,10 @@ int to_ref(int query_loc, const char *cig0, int pos, Rboolean narrow_left) break; } offset += n; + lastOP = OP; } - if (n == 0) + if (n == 0 || (style > 0 && ref_loc < pos) || lastOP == 'S' || lastOP == 'H') ref_loc = NA_INTEGER; return ref_loc; @@ -235,7 +249,7 @@ int to_ref(int query_loc, const char *cig0, int pos, Rboolean narrow_left) * outside of any deletions or insertions. */ SEXP query_locs_to_ref_locs(SEXP query_locs, SEXP cigar, SEXP pos, - SEXP narrow_left) + SEXP narrow_left, SEXP style) { int nlocs, i; SEXP ref_locs; @@ -246,7 +260,7 @@ SEXP query_locs_to_ref_locs(SEXP query_locs, SEXP cigar, SEXP pos, const char *cig_i = CHAR(STRING_ELT(cigar, i)); INTEGER(ref_locs)[i] = to_ref(INTEGER(query_locs)[i], cig_i, INTEGER(pos)[i], - asLogical(narrow_left)); + asLogical(narrow_left), asInteger(style)); } UNPROTECT(1); @@ -270,7 +284,7 @@ SEXP query_locs_to_ref_locs(SEXP query_locs, SEXP cigar, SEXP pos, * positions actually occur in the read alignment region, outside of * any deletions or insertions. */ -SEXP map_query_locs_to_ref_locs(SEXP start, SEXP end, SEXP cigar, SEXP pos) +SEXP map_query_locs_to_ref_locs(SEXP start, SEXP end, SEXP cigar, SEXP pos, SEXP style) { SEXP ans, ans_start, ans_end, ans_qhits, ans_shits; IntAE *sbuf, *ebuf, *qhbuf, *shbuf; @@ -286,10 +300,10 @@ SEXP map_query_locs_to_ref_locs(SEXP start, SEXP end, SEXP cigar, SEXP pos) for (j = 0; j < ncigar; j++) { const char *cig_j = CHAR(STRING_ELT(cigar, j)); int pos_j = INTEGER(pos)[j]; - s = to_ref(INTEGER(start)[i], cig_j, pos_j, FALSE); + s = to_ref(INTEGER(start)[i], cig_j, pos_j, FALSE, asInteger(style)); if (s == NA_INTEGER) break; - e = to_ref(INTEGER(end)[i], cig_j, pos_j, TRUE); + e = to_ref(INTEGER(end)[i], cig_j, pos_j, TRUE, asInteger(style)); if (e == NA_INTEGER) break; IntAE_insert_at(sbuf, IntAE_get_nelt(sbuf), s);