Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add with_ref and with_query args for get_aligned_pairs. #777

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
61 changes: 53 additions & 8 deletions pysam/libcalignedsegment.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -1864,7 +1864,7 @@ cdef class AlignedSegment:
return self.query_qualities


def get_aligned_pairs(self, matches_only=False, with_seq=False):
def get_aligned_pairs(self, matches_only=False, with_seq=False, with_query=False, with_ref=False):
"""a list of aligned read (query) and reference positions.

For inserts, deletions, skipping either query or reference
Expand All @@ -1878,10 +1878,14 @@ cdef class AlignedSegment:
matches_only : bool
If True, only matched bases are returned - no None on either
side.
with_seq : bool
with_query : bool
If True, return a third element in the tuple containing the
reference sequence. Substitutions are lower-case. This option
requires an MD tag to be present.
query sequence. This option requires an MD tag to be present.
with_ref : bool
If True, return a fourth element in the tuple containing the
reference sequence. This option requires an MD tag to be present.
with_seq : bool
Equal to `with_ref`, for backward compatibility.

Returns
-------
Expand All @@ -1895,16 +1899,25 @@ cdef class AlignedSegment:
cdef bam1_t * src = self._delegate
cdef bint _matches_only = bool(matches_only)
cdef bint _with_seq = bool(with_seq)
cdef bint _with_query = bool(with_query)
cdef bint _with_ref = bool(with_ref)

# TODO: this method performs no checking and assumes that
# read sequence, cigar and MD tag are consistent.

if _with_seq:
_with_ref = _with_seq

if _with_ref:
# force_str required for py2/py3 compatibility
ref_seq = force_str(build_reference_sequence(src))
if ref_seq is None:
raise ValueError("MD tag not present")

if _with_query:
# force_str required for py2/py3 compatibility
query_seq = force_str(self.query_sequence)

r_idx = 0

if pysam_get_n_cigar(src) == 0:
Expand All @@ -1919,7 +1932,17 @@ cdef class AlignedSegment:
l = cigar_p[k] >> BAM_CIGAR_SHIFT

if op == BAM_CMATCH or op == BAM_CEQUAL or op == BAM_CDIFF:
if _with_seq:
if _with_query and _with_ref:
for i from pos <= i < pos + l:
result.append((qpos, i, query_seq[qpos], ref_seq[r_idx]))
r_idx += 1
qpos += 1
elif _with_query:
for i from pos <= i < pos + l:
result.append((qpos, i, query_seq[qpos]))
r_idx += 1
qpos += 1
elif _with_ref:
for i from pos <= i < pos + l:
result.append((qpos, i, ref_seq[r_idx]))
r_idx += 1
Expand All @@ -1932,7 +1955,15 @@ cdef class AlignedSegment:

elif op == BAM_CINS or op == BAM_CSOFT_CLIP:
if not _matches_only:
if _with_seq:
if _with_query and _with_ref:
for i from pos <= i < pos + l:
result.append((qpos, None, query_seq[qpos], None))
qpos += 1
elif _with_query:
for i from pos <= i < pos + l:
result.append((qpos, None, query_seq[qpos]))
qpos += 1
elif _with_ref:
for i from pos <= i < pos + l:
result.append((qpos, None, None))
qpos += 1
Expand All @@ -1945,7 +1976,15 @@ cdef class AlignedSegment:

elif op == BAM_CDEL:
if not _matches_only:
if _with_seq:
if _with_query and _with_ref:
for i from pos <= i < pos + l:
result.append((None, i, None, ref_seq[r_idx]))
r_idx += 1
elif _with_query:
for i from pos <= i < pos + l:
result.append((None, i, None))
r_idx += 1
elif _with_ref:
for i from pos <= i < pos + l:
result.append((None, i, ref_seq[r_idx]))
r_idx += 1
Expand All @@ -1961,7 +2000,13 @@ cdef class AlignedSegment:

elif op == BAM_CREF_SKIP:
if not _matches_only:
if _with_seq:
if _with_query and _with_ref:
for i from pos <= i < pos + l:
result.append((None, i, None, None))
if _with_query:
for i from pos <= i < pos + l:
result.append((None, i, None))
if _with_ref:
for i from pos <= i < pos + l:
result.append((None, i, None))
else:
Expand Down