-
Notifications
You must be signed in to change notification settings - Fork 0
/
exon-order-distance.py
executable file
·147 lines (115 loc) · 3.82 KB
/
exon-order-distance.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
#!/usr/bin/env python
import sys
import argparse
from collections import namedtuple
from difflib import SequenceMatcher
class Blast:
"""A blast alignment record"""
def __init__(self,
tid, tbeg, tend, tlen,
sid, sbeg, send, slen,
pident, length, evalue):
self.tid = tid
self.tbeg = int(tbeg)
self.tend = int(tend)
self.tlen = int(tlen)
self.sid = sid
self.sbeg = int(sbeg)
self.send = int(send)
self.slen = int(slen)
self.pident = float(pident)
self.length = int(length)
self.evalue = float(evalue)
def __repr__(self):
return "Aln of {:s} and {:s}, length {:d}".format(
self.tid, self.sid, self.length)
def sign(x):
if x > 0:
return 1
elif x < 0:
return -1
elif x == 0:
return 0
else:
return x
def uniq(list):
last = None
uniqlist = []
for item in list:
if last is None or item != last:
uniqlist.append(item)
last = item
return uniqlist
def distance(a,b):
return 1 - SequenceMatcher(None, a, b).ratio()
def nonorderliness(seq, key):
"""Measures how much the sequence is out of order according to a key"""
return min(distance(seq, sorted(seq,None,key,False)),
distance(seq, sorted(seq,None,key,True)))
def nonlinearness(seq):
"""Measure how nonlinear the sequence is,
linear means the items appear in any order but are not repeated"""
return distance(sorted(uniq(seq)), uniq(sorted(seq)))
def inclusivetoexclusive(chunks):
"""Converts [beg,end] chunks to [beg,end)"""
return [(beg,end+1) if beg<end else (beg,end-1) for beg,end in chunks]
def nonorientedness(chunks):
"""Calculates nonorientedness of list of chunks where chunks are [beg,end)"""
total = sum([abs(end-beg) for beg,end in chunks])
inorder = float(total-abs(sum([end-beg for beg,end in chunks])))/2
if total > 0:
return inorder/total
else:
return 0.0
Score = namedtuple("Score", ["nonlinearliness", "nonorientedness", "nonorderliness"])
def score_transcript(alns):
"""Calculates badness of transcript alignment."""
nl = nonlinearness([b.sid for b in alns])
# sys.stderr.write("nonlinearness {:s}: {:f}\n".format(
# alns[0].tid, nl))
alns_per_contig = split_list_on_key(alns, lambda a: a.sid)
no = []
nd = []
for contig_alns in alns_per_contig:
no.append(nonorientedness(zip(
[b.sbeg for b in contig_alns],
[b.send for b in contig_alns])))
# sys.stderr.write("nonorientedness {:s}: {:f}\n".format(
# contig_alns[0].sid, no))
nd.append(nonorderliness(contig_alns, lambda a: a.sbeg))
# sys.stderr.write("nonorderliness {:s}: {:f}\n".format(
# contig_alns[0].sid, nd))
return Score(nl, sum(no)/len(no), sum(nd)/len(nd))
def split_list_on_key(l, key):
splits = [[]]
last_key = None
for item in sorted(l, None, key):
if last_key is None or key(item) == last_key:
splits[-1].append(item)
else:
splits.append([item])
last_key = key(item)
return splits
def read_alns(file):
alns = []
for line in file:
alns.append(Blast(*line.split()))
return alns
def alns_from_file(filename):
with open(filename) as f:
alns = read_alns(f)
return alns
def main():
last_tid = None
alns = []
for line in sys.stdin:
aln = Blast(*line.split())
if last_tid is None or aln.tid == last_tid:
alns.append(aln)
else:
sys.stdout.write("{:s}\t{:f}\t{:f}\t{:f}\n".format(
last_tid, *score_transcript(alns)))
alns = [aln]
last_tid = aln.tid
if __name__ == '__main__':
main()