-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathrcsb_get.py
75 lines (68 loc) · 2.35 KB
/
rcsb_get.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/python
#import Threading
import os, sys, subprocess, json
class ProteinSequence:
def __init__ (self, pdb_name = "2OCJ"):
self.valid_chainids = ["A", "B", "C", "D"]
self.valid_aminos = ["GLY", "ALA", "VAL", "LEU", "ILE", "PRO", "PHE", "TYR", "TRP", "SER", "THR", "CYS", "MET", "ASN", "GLN", "LYS", "ARG", "HIS", "ASP", "GLU"]
if not pdb_name:
self.pdb_name = "2OCJ"
else:
self.pdb_name = pdb_name
self.getJSON (self.pdb_name)
self.eris_start = -1
self.eris_end = -1
self.istable_start = -1
self.istable_end = -1
self.getSequence ()
def getAmino (self, s):
if s[:3] in self.valid_aminos:
try:
num = int(s[3:])
except ValueError:
num = -1
return True, s[:3], num
else:
#print False,s
return False, None, None
def getSequence (self):
chainid = self.json_data["byChain"].keys ()[0]
if chainid not in self.valid_chainids:
print "chainId is ", chainid, "not in", self.valid_chainids
rawsequence = self.json_data["byChain"][chainid]["img"]["mapData"]["sequence"]
self.rawsequence = []
for row in rawsequence:
self.rawsequence = self.rawsequence + row
self.sequence = []
for amino in self.rawsequence:
is_amino, name, residue = self.getAmino (amino["t"])
if is_amino:
self.sequence.append ((name, residue))
num_index = -1
for amino in self.sequence:
if amino[1]!=-1:
self.eris_start = amino[1]
#print self.eris_start
break
num_index = num_index + 1
for index, amino in enumerate(self.sequence):
if amino[1] == -1:
if index > num_index:
if self.eris_end == -1:
self.eris_end = index
#print amino
self.sequence[index] = (amino[0], self.eris_start - num_index - 1 + index)
self.istable_start = self.sequence[0][1]
self.istable_end = self.sequence[len(self.sequence)-1][1]
# process to get all residues
def getJSON (self, name):
file_name = name+".json"
try:
self.json_data = json.load(open(file_name, 'r'))
except IOError:
subprocess.call (["./get_residue.sh", file_name], shell=True)
self.json_data = json.load(open(file_name, 'r'))
if __name__=='__main__':
seq = ProteinSequence ("2OCJ")
print seq.sequence
print seq.istable_start, seq.eris_start, seq.eris_end, seq.istable_end