-
Notifications
You must be signed in to change notification settings - Fork 46
/
Copy pathremove_gap.py
81 lines (74 loc) · 3.44 KB
/
remove_gap.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
from argparse import ArgumentParser, Namespace, FileType
from Bio.PDB import PDBParser,PDBIO
import torch,os,copy
import pandas as pd
from utils.visualise import save_protein
parser = ArgumentParser()
parser.add_argument('--results_path', type=str, default='results/user_inference', help='Directory where the outputs will be written to')
parser.add_argument('--num_workers', type=int, default=1, help='Number of workers for creating the dataset')
parser.add_argument('--samples_per_complex', type=int, default=10, help='Number of samples to generate')
args = parser.parse_args()
results_path_containments = os.listdir(args.results_path)
df = pd.read_csv('./data/test_af2.csv')
for rp in results_path_containments:
if not rp.startswith('index'):
continue
idx = int(rp.split('_')[0][5:])
# if idx not in [268, 157, 123, 103, 54, 121, 165]:
# continue
print(idx)
gap_mask = df.loc[idx,'gap_mask']
write_dir = os.path.join(args.results_path, rp)
file_paths = sorted(os.listdir(write_dir))
for rank in range(args.samples_per_complex):
try:
protein_file_name = [path for path in file_paths if f'rank{rank+1}_receptor_lddt' in path and 'relaxed.pdb' in path][0]
except:
continue
pdbFile = os.path.join(write_dir, protein_file_name)
save_pdbFile = os.path.join(write_dir, protein_file_name.replace('relaxed.pdb','relaxed_remove_gap.pdb'))
s = PDBParser(QUIET=True).get_structure(pdbFile,pdbFile)[0]
start = 0
for c in s:
for i, res in enumerate(list(c.get_residues())):
if gap_mask[start+i] == '1':
c.detach_child(res.id)
start += (i+1)
save_protein(s, save_pdbFile)
try:
protein_file_name = [path for path in file_paths if f'rank{rank+1}_receptor_reverseprocess_relaxed' in path][0]
except:
continue
pdbFile = os.path.join(write_dir, protein_file_name)
save_pdbFile = os.path.join(write_dir, protein_file_name.replace('relaxed.pdb','relaxed_remove_gap.pdb'))
all_s = PDBParser(QUIET=True).get_structure(pdbFile,pdbFile)
for s in all_s:
start = 0
for c in s:
for i, res in enumerate(list(c.get_residues())):
if gap_mask[start+i] == '1':
c.detach_child(res.id)
start += (i+1)
save_protein(all_s, save_pdbFile)
protein_file_name = [path for path in file_paths if f'_aligned_to_' in path][0]
pdbFile = os.path.join(write_dir, protein_file_name)
save_pdbFile = os.path.join(write_dir, protein_file_name.replace('.pdb','_remove_gap.pdb'))
s = PDBParser(QUIET=True).get_structure(pdbFile,pdbFile)[0]
start = 0
for c in s:
for i, res in enumerate(list(c.get_residues())):
if gap_mask[start+i] == '1':
c.detach_child(res.id)
start += (i+1)
save_protein(s, save_pdbFile)
protein_file_name = [path for path in file_paths if f'af2_' in path][0]
pdbFile = os.path.join(write_dir, protein_file_name)
save_pdbFile = os.path.join(write_dir, protein_file_name.replace('.pdb','_remove_gap.pdb'))
s = PDBParser(QUIET=True).get_structure(pdbFile,pdbFile)[0]
start = 0
for c in s:
for i, res in enumerate(list(c.get_residues())):
if gap_mask[start+i] == '1':
c.detach_child(res.id)
start += (i+1)
save_protein(s, save_pdbFile)