-
Notifications
You must be signed in to change notification settings - Fork 0
/
napshift.py
executable file
·207 lines (154 loc) · 7.59 KB
/
napshift.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
#!/usr/bin/env python3 -O
import sys
# Check the current python version before running.
if sys.version_info < (3, 7, 0):
sys.exit("Error: NapShift program requires Python 3.7 or greater.")
# _end_if_
import os
# Disable some tensor-flow warning messages.
os.environ["TF_CPP_MIN_LOG_LEVEL"] = "2"
os.environ["TF_XLA_FLAGS"] = "--tf_xla_enable_xla_devices"
# Import the IC_Chain to update a threshold globally.
from Bio.PDB.internal_coords import IC_Chain
# This avoids the chain break errors when
# the bond is slightly above 1.4 Angstrom.
IC_Chain.MaxPeptideBond = 1.5
from tqdm import tqdm
from pathlib import Path
from src.chemical_shifts.model_machine import ChemShiftPredictor
# INFO:
__version__ = '1.0.2'
__author__ = 'Michail Vrettas, PhD'
__email__ = '[email protected]'
# Main function.
def main(pdb_file=None, pH=None, output_path=None, random_coil_path=None,
talos_fmt=True, all_models=False, verbose=False):
"""
Main function that wraps the call of the predict method. First we create
a ChemShiftPredictor object. We assume that the trained models are located
in a folder named "/models/" that exists inside the parent directory (i.e.
the same directory where the current script is stored). The output of the
prediction is stored in the "output_path/" directory using a predetermined
name that includes the PDB-ID from the input file.
:param pdb_file: This is the input (PDB) file that we want to predict the
chemical shift values.
:param pH: The pH value (default is set to 7).
:param output_path: The directory (path) where we want to store the output
file.
:param random_coil_path: If we have available random coil values, for the
same input file, we can use it here. If not (the default) we will generate
automatically new values using the camcoil engine module.
:param talos_fmt: This is the TALOS format for the output file. If not set
to True, then we will use a tabular format, where each row corresponds to
a single residue and each atom has each own column.
:param all_models: If "True" the method will process all the models in the
PDB file, otherwise only the first model.
:param verbose: This is a boolean flag that determines whether we want to
display additional information on the screen.
:return: None.
"""
try:
# Get the parent folder of the module.
parent_dir = Path(__file__).resolve().parent
# Make sure the input file is Path.
input_dir = Path(parent_dir/"models/")
# Sanity check.
if not input_dir.is_dir():
raise FileNotFoundError(f"Input directory {input_dir} doesn't exist.")
# _end_if_
# Create a predictor object. NOTE: By default we
# will overwrite the results file (if it exists).
nn_predict = ChemShiftPredictor(dir_model=input_dir, dir_output=output_path,
overwrite=True)
# Check if we need to alter the pH value. This makes
# sense only if we have not given a random coil file.
if random_coil_path is None and pH:
# Make sure its float.
pH = float(pH)
# Change the value of the random_coil object.
nn_predict.random_coil.pH = pH
# _end_if_
# Count the successful predictions.
count_success = 0
# Process all input files.
for f_in in tqdm(pdb_file,
" Predicting chemical shifts ... "):
# Make sure is a Path.
f_path = Path(f_in)
# If the file exists.
if f_path.is_file():
# Make the predictions.
nn_predict(f_path, n_peptides=3, all_models=all_models,
random_coil_path=random_coil_path,
talos_fmt=talos_fmt, verbose=verbose)
# Increase counter by one.
count_success += 1
else:
raise FileNotFoundError(f"File {f_path} not found.")
# _end_if_
# _end_for_
# Final message.
if verbose:
print(f" Successfully saved {count_success} result(s) to: {nn_predict.dir_output}")
# _end_if_
except Exception as e1:
# Exit the program.
sys.exit(f" Program ended with message: {e1}")
# _end_try_
# _end_main_
# Run the main script.
if __name__ == "__main__":
# Check if we have given input parameters.
if len(sys.argv) > 1:
# Local import.
import argparse
# Create a parser object.
parser = argparse.ArgumentParser(description="Python chemical shift predictor (of NMR PDB files), "
"with the usage of Artificial Neural Networks (ANN). ")
# Input (PDB) file with the residue coordinates.
parser.add_argument("-f", "--file", type=str, nargs='+', required=True,
help="Input PDB file(s) (Path/String).")
# Input pH values for the random coil shift values.
parser.add_argument("--pH", type=float, default=7.0,
help="The pH value of reference chemical shifts. "
"Default value is set to 7.0.")
# Output path to save the predicted chemical shifts.
parser.add_argument("-o", "--out", type=str, default=None,
help="Output path to save the predicted values. Note: "
"The name of the file will be generated automatically, "
"e.g.: 'prediction_X_model_Y_chain_Z.tab', where 'X' is "
"the input filename, 'Y' is the model number and 'Z' is "
"the chain-id.")
# Random coil path to load the pre-estimated values.
parser.add_argument("--rc_path", type=str, default=None,
help="File with the random coil chemical shift values.")
# Add the output format (True=Talos).
parser.add_argument("--talos", dest="talos", action="store_true",
help="The default output is the 'TALOS' format.")
# Add the output format (False=Tabular).
parser.add_argument("--no-talos", dest="talos", action="store_false",
help="Alternatively we set a tabular (output) format.")
# Enables verbosity.
parser.add_argument("--verbose", dest="verbose", action="store_true",
help="Display information while running.")
# Shows the version of the program.
parser.add_argument("--version", action="version",
version=f" NapShift (c), version: {__version__}",
help="Print version information and exit.")
# Enables prediction for all the models in the PDB file.
parser.add_argument("--all-models", dest="all_models", action="store_true",
help="Allows prediction for all the models in the PDB file.")
# Make sure the defaults are set.
parser.set_defaults(talos=True, verbose=False, all_models=False)
# Parse the arguments.
args = parser.parse_args()
# Call the main function.
main(pdb_file=args.file, pH=args.pH, output_path=args.out,
random_coil_path=args.rc_path, talos_fmt=args.talos,
all_models=args.all_models, verbose=args.verbose)
else:
# Display error message.
sys.exit(f"Error: Not enough input parameters. {os.linesep}"
f" Run : {sys.argv[0]} -h/--help ")
# _end_if_
# _end_program_