diff --git a/scripts/run_waterkit.py b/scripts/run_waterkit.py index eced99d..c4b879b 100644 --- a/scripts/run_waterkit.py +++ b/scripts/run_waterkit.py @@ -10,12 +10,11 @@ import argparse import shutil -from waterkit import AutoGrid +from waterkit import calc_spherical_water_map from waterkit import Map from waterkit import Molecule from waterkit import WaterKit from waterkit import utils -from vina import Vina def cmd_lineparser(): @@ -55,7 +54,7 @@ def main(): temperature = args.temperature output_dir = args.output_dir spherical_water_maps = args.spherical_water_maps - autogrid_exec_path = args.autogrid_exec_path + autogrid_exec_path = os.path.abspath(args.autogrid_exec_path) water_model = 'tip3p' # Force to use only one thread per job @@ -66,29 +65,14 @@ def main(): # Read PDBQT/MOL2 file, Waterfield file and AutoDock grid map receptor = Molecule.from_file(receptor_pdbqtfilename) - with utils.temporary_directory(prefix='wk_', dir='.', clean=False) as tmp_dir: - # Generate AutoDock maps using the Amber ff14SB forcefield - receptor.to_pdbqt_file('receptor.pdbqt') - ff14sb_param_file = os.path.join(utils.path_module('waterkit'), 'data/ff14SB_parameters.dat') - ag = AutoGrid(autogrid_exec_path, ff14sb_param_file) - ad_map = ag.run('receptor.pdbqt', ['OW'], box_center, box_size, smooth=0, dielectric=1) - - if spherical_water_maps[0] is None: - # Convert amber atom types to AutoDock atom types - ad_receptor = utils.convert_amber_to_autodock_types(receptor) - ad_receptor.to_pdbqt_file('receptor_ad.pdbqt') - - # Generate Vina maps for the spherical maps - v = Vina(verbosity=0) - v.set_receptor('receptor_ad.pdbqt') - v.compute_vina_maps(box_center, box_size, force_even_voxels=True) - v.write_maps('vina') - sw_map = Map('vina.O_DA.map', 'SW') - else: - # The first spherical map is for the receptor - sw_map = Map(spherical_water_maps[0], 'SW') - - ad_map.add_map('SW', sw_map._maps['SW']) + receptor_spherical_water_map_filename = spherical_water_maps[0] + ad_map = calc_spherical_water_map( + autogrid_exec_path, + receptor, + box_center, + box_size, + receptor_spherical_water_map_filename, + ) # It is more cleaner if we prepare the maps (OW, HW for tip3p, OT, HT, LP for tip5p) before utils.prepare_water_map(ad_map, water_model) diff --git a/scripts/wk_make_trajectory.py b/scripts/wk_make_trajectory.py index 14af99f..ccddebc 100644 --- a/scripts/wk_make_trajectory.py +++ b/scripts/wk_make_trajectory.py @@ -9,179 +9,9 @@ from __future__ import absolute_import import argparse -import copy -import os -import re -import sys -import numpy as np import parmed as pmd -from pdb4amber import pdb4amber -from pdb4amber.utils import easy_call -from parmed.amber import NetCDFTraj - - -def max_water(water_filenames): - """Return the max number of water molecules seen - - Args: - water_filenames (array-like): list of filenames of the water files - - Return: - int: number of max water molecules - int: index of the file in the water filenames list - """ - sizes = [os.path.getsize(f) for f in water_filenames] - idx = np.argmax(sizes) - m = pmd.load_file(water_filenames[idx]) - # We select only the water molecules, because we might have ions, etc... - m = m["@O, H1, H2"] - max_water = len(m.residues) - return max_water, idx - - -def min_water(water_filenames): - """Return the min number of water molecules seen - - Args: - water_filenames (array-like): list of filenames of the water files - - Return: - int: number of min water molecules - int: index of the file in the water filenames list - """ - sizes = [os.path.getsize(f) for f in water_filenames] - idx = np.argmin(sizes) - m = pmd.load_file(water_filenames[idx]) - # We select only the water molecules, because we might have ions, etc... - m = m["@O, H1, H2"] - max_water = len(m.residues) - - return max_water, idx - - -def add_water_to_receptor(receptor, water): - receptor_wet = copy.deepcopy(receptor) - receptor_wet += water["@O, H1, H2"] - # ParmED really want a symmetry attributes to write the PDB file - receptor_wet.symmetry = None - - return receptor_wet - - -def write_pdb_file(output_name, molecule, overwrite=True, **kwargs): - '''Write PDB file - - Args: - output_name (str): pdbqt output filename - molecule (parmed): parmed molecule object - - ''' - try: - molecule.save(output_name, format='pdb', overwrite=overwrite, **kwargs) - except IOError: - raise IOError("Error: file %s already exists." % fname) - - -def write_tleap_input_file(fname, pdb_filename, lib_files=None, frcmod_files=None): - """Create tleap input script - - Args: - fname (str): tleap input filename - pdb_filename (str): pdb filename - lib_files (list): Amber lib parameter files for non-standard residues - frcmod_files (list): Amber frcmod parameter files for non-standard residues - - """ - prefix = pdb_filename.split(".pdb")[0].split("/")[-1] - - output_str = "source leaprc.protein.ff14SB\n" - output_str += "source leaprc.DNA.OL15\n" - output_str += "source leaprc.RNA.OL3\n" - output_str += "source leaprc.water.tip3p\n" - output_str += "source leaprc.gaff2\n" - if frcmod_files is not None: - output_str += ''.join(['loadamberparams %s\n' % fl for fl in frcmod_files]) - if lib_files is not None: - output_str += ''.join(['loadoff %s\n' % ll for ll in lib_files]) - output_str += "\n" - output_str += "x = loadpdb %s\n" % pdb_filename.split("/")[-1] - output_str += "\n" - output_str += "set default nocenter on\n" - output_str += "saveAmberParm x %s.prmtop %s.rst7\n" % (prefix, prefix) - output_str += "quit\n" - - with open(fname, "w") as w: - w.write(output_str) - - -def write_trajectory_file(fname, receptor, water_filenames): - """Create netcdf trajectory from the water pdb file - - Args: - fname (str): output name for the trajectory - receptor_filename (parmed): parmed receptor object - water_filenames (list): list of filenames of the water files - - """ - max_n_waters, idx = max_water(water_filenames) - min_n_waters, _ = min_water(water_filenames) - buffer_n_waters = max_n_waters - min_n_waters - max_n_water_atoms = max_n_waters * 3 - - n_atoms = receptor.coordinates.shape[0] - max_n_atoms = n_atoms + (max_n_waters * 3) - coordinates = np.zeros(shape=(max_n_atoms, 3)) - - # Boz dimension - box_center = np.mean(receptor.coordinates, axis=0) - - x_min, x_max = np.min(receptor.coordinates[:, 0]), np.max(receptor.coordinates[:, 0]) - y_min, y_max = np.min(receptor.coordinates[:, 1]), np.max(receptor.coordinates[:, 1]) - z_min, z_max = np.min(receptor.coordinates[:, 2]), np.max(receptor.coordinates[:, 2]) - box_size = np.max([np.abs(x_max - x_min), np.abs(y_max - y_min), np.abs(z_max - z_min)]) + 40 - - box = [box_size, box_size, box_size, 90, 90, 90] - - # Dummy water coordinates - dum_water_x = np.array([0, 0, 0]) - dum_water_y = np.array([0, 0.756, 0.586]) - dum_water_z = np.array([0, -0.756, 0.586]) - - radius = (box_size / 2.) - 2. - z = np.random.uniform(-radius, radius, buffer_n_waters) - p = np.random.uniform(0, np.pi * 2, buffer_n_waters) - x = np.sqrt(radius**2 - z**2) * np.cos(p) - y = np.sqrt(radius**2 - z**2) * np.sin(p) - oxygen_xyz = np.stack((x, y, z), axis=-1) - oxygen_xyz += box_center - - dummy_water_xyz = np.zeros(shape=(buffer_n_waters * 3, 3)) - dummy_water_xyz[0::3] = oxygen_xyz - dummy_water_xyz[1::3] = oxygen_xyz + dum_water_y - dummy_water_xyz[2::3] = oxygen_xyz + dum_water_z - - # Already add the coordinates from the receptor - coordinates[:n_atoms] = receptor.coordinates - - trj = NetCDFTraj.open_new(fname, natom=max_n_atoms, box=True, crds=True) - - for i, water_filename in enumerate(water_filenames): - m = pmd.load_file(water_filename) - - last_atom_id = len(m.residues) * 3 - water_xyz = m["@O, H1, H2"].coordinates - - # Get all the TIP3P water molecules - coordinates[n_atoms:n_atoms + last_atom_id] = water_xyz - # Add the dummy water molecules - coordinates[n_atoms + last_atom_id:] = dummy_water_xyz[:max_n_water_atoms - water_xyz.shape[0]] - - trj.add_coordinates(coordinates) - trj.add_box(box) - trj.add_time(i + 1) - - trj.close() +from waterkit import make_trajectory def cmd_lineparser(): @@ -208,43 +38,15 @@ def main(): lib_files = args.lib_files frcmod_files = args.frcmod_files - tleap_input = 'leap.template.in' - tleap_output = 'leap.template.out' - tleap_log = 'leap.log' - receptor_dry = pmd.load_file(receptor_filename) - water_filenames = [] - for fname in os.listdir(water_directory): - if re.match(r"water_[0-9]{6}.pdb", fname): - water_filenames.append(os.path.join(water_directory, fname)) - - """ Add water molecules to the dry receptor and write pdb wet receptor - We are taking the water coordinates from the frame that have the - max number of water molecules. Because the number of water molecules - need to be constant during the trajectory. This is just for creating - the amber topology (and coordinate) file(s).""" - water = pmd.load_file(water_filenames[max_water(water_filenames)[1]]) - receptor_wet = add_water_to_receptor(receptor_dry, water) - write_pdb_file("%s_system.pdb" % output_prefix, receptor_wet) - - # Write tleap input script - write_tleap_input_file(tleap_input, "%s_system.pdb" % output_prefix, lib_files, frcmod_files) - - try: - # Generate amber prmtop and rst7 files - easy_call('tleap -s -f %s > %s' % (tleap_input, tleap_output), shell=True) - except RuntimeError: - error_msg = 'Could not generate topology/coordinates files with tleap.' - raise RuntimeError(error_msg) - - # Write trajectory - write_trajectory_file("%s_system.nc" % output_prefix, receptor_dry, water_filenames) - - os.remove(tleap_input) - os.remove(tleap_output) - os.remove(tleap_log) - + make_trajectory( + receptor_dry, + water_directory, + output_prefix, + lib_files, + frcmod_files, + ) if __name__ == '__main__': main() diff --git a/scripts/wk_minimize_trajectory.py b/scripts/wk_minimize_trajectory.py index 4a5f11b..9cae7a7 100644 --- a/scripts/wk_minimize_trajectory.py +++ b/scripts/wk_minimize_trajectory.py @@ -5,15 +5,8 @@ # import argparse -import sys -from packaging.version import Version -import numpy as np -import parmed as pmd -from parmed.amber import NetCDFTraj -from openmm.unit import Quantity, picoseconds, kilocalories_per_mole, angstroms, nanometer, kelvin, kilocalories, mole -from openmm import CustomExternalForce, LangevinIntegrator, Platform -from openmm.app import AmberPrmtopFile, HBonds, CutoffNonPeriodic, Simulation +from waterkit import WaterMinimizer def cmd_lineparser(): @@ -33,102 +26,6 @@ def cmd_lineparser(): return parser.parse_args() -def _box_information(traj_filename): - box = None - - try: - traj = NetCDFTraj.open_old(traj_filename) - - if traj.hasbox: - box = traj.box[0] - - traj.close() - except FileNotFoundError: - print("Cannot find trajectory file: %s" % traj_filename) - sys.exit(0) - - return box - - -class WaterMinimizer: - def __init__(self, n_steps=100, restraint=None, platform="OpenCL", verbose=True): - self._n_steps = n_steps - self._restraint = restraint - self._platform = platform - self._verbose = verbose - - def minimize_trajectory(self, prmtop_filename, traj_filename, output_filename): - nonbondedMethod = CutoffNonPeriodic - nonbondedCutoff = 9 * angstroms - rigidWater = True - constraints = HBonds - dt = 0.002 * picoseconds - temperature = 300 * kelvin - friction = 1.0 / picoseconds - K = self._restraint * kilocalories_per_mole / angstroms**2 - - # If someone still uses an old version of OpenMM - if Version(Platform.getOpenMMVersion()) > Version("7.5.0"): - tolerance = 1.0 * kilocalories / (nanometer * mole) - else: - tolerance = 1.0 * kilocalories_per_mole - - box = _box_information(traj_filename) - - platform = Platform.getPlatformByName(self._platform) - platformProperties = {'Precision': 'single'} - - prmtop = AmberPrmtopFile(prmtop_filename) - parmedtop = pmd.load_file(prmtop_filename) - old_traj = NetCDFTraj.open_old(traj_filename) - - n_atom = old_traj.atom - n_frame = old_traj.frame - - new_trj = NetCDFTraj.open_new(output_filename, natom=n_atom, box=True, crds=True) - - system = prmtop.createSystem(nonbondedMethod=nonbondedMethod, nonbondedCutoff=nonbondedCutoff, constraints=constraints) - - for i, coordinates in enumerate(old_traj.coordinates): - old_positions = Quantity(coordinates.tolist(), unit=angstroms) - - if self._restraint > 0 or self._restraint is not None: - # Add harmonic constraints on protein - force = CustomExternalForce("k * ((x-x0)^2 + (y-y0)^2 + (z-z0)^2)") - force.addGlobalParameter("k", K) - force.addPerParticleParameter("x0") - force.addPerParticleParameter("y0") - force.addPerParticleParameter("z0") - for atom in parmedtop.view['!@H= & !:WAT']: - force.addParticle(atom.idx, old_positions[atom.idx]) - force_idx = system.addForce(force) - - # Create simulation - integrator = LangevinIntegrator(temperature, friction, dt) - simulation = Simulation(prmtop.topology, system, integrator, platform) - simulation.context.setPositions(old_positions) - - # Minimize the water molecules - simulation.minimizeEnergy(maxIterations=self._n_steps, tolerance=tolerance) - # Get new positions and store in the new trajectory - new_positions = simulation.context.getState(getPositions=True).getPositions(asNumpy=True) - - new_trj.add_coordinates(new_positions) - new_trj.add_box(box) - new_trj.add_time(i + 1) - - if self._restraint > 0 or self._restraint is not None: - system.removeForce(force_idx) - - if (i % 100 == 0) and self._verbose: - sys.stdout.write("\rConformations minimized: %5.2f / 100 %%" % (float(i) / n_frame * 100.)) - sys.stdout.flush() - - new_trj.close() - old_traj.close() - - print("") - def main(): args = cmd_lineparser() @@ -144,4 +41,4 @@ def main(): if __name__ == '__main__': - main() \ No newline at end of file + main() diff --git a/scripts/wk_prepare_receptor.py b/scripts/wk_prepare_receptor.py index e69f2d5..c81e970 100644 --- a/scripts/wk_prepare_receptor.py +++ b/scripts/wk_prepare_receptor.py @@ -5,922 +5,7 @@ # import argparse -import copy -import logging -import math -import os -import string -import sys -import shutil - -import parmed as pmd -from pdb4amber import AmberPDBFixer -from pdb4amber.utils import easy_call - -from waterkit import utils - - -# Added CYM residue -HEAVY_ATOM_DICT = { - 'ALA': 5, - 'ARG': 11, - 'ASN': 8, - 'ASP': 8, - 'CYS': 6, - 'GLN': 9, - 'GLU': 9, - 'GLY': 4, - 'HIS': 10, - 'ILE': 8, - 'LEU': 8, - 'LYS': 9, - 'MET': 8, - 'PHE': 11, - 'PRO': 7, - 'SER': 6, - 'THR': 7, - 'TRP': 14, - 'TYR': 12, - 'VAL': 7, - 'HID': 10, - 'HIE': 10, - 'HIN': 10, - 'HIP': 10, - 'CYX': 6, - 'CYM': 6, - 'ASH': 8, - 'GLH': 9, - 'LYH': 9 -} - -# Global constants -# Added CYM residue -RESPROT = ('ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', - 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', - 'TYR', 'VAL', 'HID', 'HIE', 'HIN', 'HIP', 'CYX', 'CYM', 'ASH', 'GLH', - 'LYH', 'ACE', 'NME', 'GL4', 'AS4') - -RESNA = ('C', 'G', 'U', 'A', 'DC', 'DG', 'DT', 'DA', 'OHE', 'C5', 'G5', 'U5', - 'A5', 'C3', 'G3', 'U3', 'A3', 'DC5', 'DG5', 'DT5', 'DA5', 'DC3', - 'DG3', 'DT3', 'DA3' ) - -RESSOLV = ('WAT', 'HOH', 'AG', 'AL', 'Ag', 'BA', 'BR', 'Be', 'CA', 'CD', 'CE', - 'CL', 'CO', 'CR', 'CS', 'CU', 'CU1', 'Ce', 'Cl-', 'Cr', 'Dy', 'EU', - 'EU3', 'Er', 'F', 'FE', 'FE2', 'GD3', 'HE+', 'HG', 'HZ+', 'Hf', - 'IN', 'IOD', 'K', 'K+', 'LA', 'LI', 'LU', 'MG', 'MN', 'NA', 'NH4', - 'NI', 'Na+', 'Nd', 'PB', 'PD', 'PR', 'PT', 'Pu', 'RB', 'Ra', 'SM', - 'SR', 'Sm', 'Sn', 'TB', 'TL', 'Th', 'Tl', 'Tm', 'U4+', 'V2+', 'Y', - 'YB2', 'ZN', 'Zr') - -AMBER_SUPPORTED_RESNAMES = RESPROT + RESNA + RESSOLV - - -def _write_pdb_file(output_name, molecule, overwrite=True, **kwargs): - '''Write PDB file - - Args: - output_name (str): pdbqt output filename - molecule (parmed): parmed molecule object - - ''' - i = 0 - pdb_template = "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f} {:>4s}{:>2s}{:2s}\n" - ter_template = "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}\n" - output_str = '' - - for residue in molecule.residues: - resname = residue.name - resid = residue.number - chain_id = residue.chain - - for atom in residue.atoms: - if len(atom.name) < 4: - name = ' %s' % atom.name - else: - name = atom.name - - atom_type = atom.name[0] - - if resname in RESSOLV: - atype = 'HETATM' - else: - atype = 'ATOM' - - output_str += pdb_template.format(atype, i + 1, name, " ", resname, chain_id, resid, - " ", atom.xx, atom.xy, atom.xz, 1.0, 1.0, " ", - atom_type, " ") - - i += 1 - - if residue.ter: - output_str += ter_template.format('TER', i + 1, name, " ", resname, chain_id, resid) - i += 1 - - output_str += 'END\n' - - with open(output_name, 'w') as w: - w.write(output_str) - - -def _write_pdbqt_file(output_name, molecule): - '''Write PDBQT file - - Args: - output_name (str): pdbqt output filename - molecule (parmed): parmed molecule object - - ''' - i = 0 - pdb_template = "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f}{:4s}{:6.3f} {:2s} \n" - ter_template = "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}\n" - output_str = '' - - for residue in molecule.residues: - resname = residue.name - resid = residue.number - chain_id = residue.chain - - for atom in residue.atoms: - if len(atom.name) < 4: - name = ' %s' % atom.name - else: - name = atom.name - - # OpenBabel does not like atom types starting with a number - if atom.type[0].isdigit(): - atom_type = atom.type[::-1] - else: - atom_type = atom.type - - # AutoGrid does not accept atom type name of length > 2 - atom_type = atom_type[:2] - - if resname in RESSOLV: - atype = 'HETATM' - else: - atype = 'ATOM' - - output_str += pdb_template.format(atype, i + 1, name, " ", resname, chain_id, resid, - " ", atom.xx, atom.xy, atom.xz, 1.0, 1.0, " ", - atom.charge, atom_type) - - i += 1 - - if residue.ter: - output_str += ter_template.format('TER', i + 1, name, " ", resname, chain_id, resid) - i += 1 - - output_str += 'END\n' - - with open(output_name, 'w') as w: - w.write(output_str) - - -def _convert_amber_to_autodock_types(molecule): - molecule = copy.deepcopy(molecule) - - amber_autodock_dict = { - 'N3': 'N', - 'H': 'HD', - 'CX': 'C', - 'HP': 'H', - 'CT': 'C', - 'HC': 'H', - 'C': 'C', - 'O': 'OA', - 'N': 'N', - 'H1': 'H', - 'C3': 'C', - '3C': 'C', - 'C2': 'C', - '2C': 'C', - 'CO': 'C', - 'O2': 'OA', - 'OH': 'OA', - 'HO': 'HD', - 'SH': 'SA', - 'HS': 'HD', - 'CA': 'A', - 'HA': 'H', - 'S': 'SA', - 'C8': 'C', - 'N2': 'N', - 'CC': 'A', - 'NB': 'NA', - 'CR': 'A', - 'CV': 'A', - 'H5': 'H', - 'NA': 'N', - 'CW': 'A', - 'H4': 'H', - 'C*': 'A', - 'CN': 'A', - 'CB': 'A', - 'Zn2+': 'Zn', - 'Zn': 'Zn', - 'Mn2+': 'Mn', - 'Mn': 'Mn', - 'XC': 'C', - 'br': 'Br', - 'c' : 'C', - 'c1': 'C', - 'c2': 'C', - 'c3': 'C', - 'ca': 'A', - 'cc': 'A', - 'cd': 'A', - 'ce': 'C', - 'cf': 'C', - 'cl': 'Cl', - 'cp': 'A', - 'cq': 'A', - 'cu': 'C', - 'cv': 'C', - 'cx': 'C', - 'cy': 'C', - 'c5': 'C', - 'c6': 'C', - 'cz': 'C', - 'cs': 'C', - 'cg': 'C', - 'ch': 'C', - 'f' : 'F', - 'h1': 'H', - 'h2': 'H', - 'h3': 'H', - 'h4': 'H', - 'h5': 'H', - 'ha': 'H', - 'hc': 'H', - 'hn': 'HD', - 'ho': 'HD', - 'hp': 'HD', - 'hs': 'HD', - 'hx': 'H', - 'i' : 'I', - 'n' : 'N', - 'n1': 'NA', - 'n2': 'N', - 'n3': 'N', - 'n4': 'N', - 'n5': 'N', - 'n6': 'N', - 'n7': 'N', - 'n8': 'N', - 'n9': 'N', - 'na': 'N', - 'nb': 'N', - 'nc': 'N', - 'nd': 'N', - 'nh': 'N', - 'ne': 'N', - 'nf': 'N', - 'no': 'N', - 'n+': 'N', - 'nx': 'N', - 'ny': 'N', - 'nz': 'N', - 'ns': 'N', - 'nt': 'N', - 'nu': 'N', - 'nv': 'N', - 'ni': 'N', - 'nj': 'N', - 'nk': 'N', - 'nl': 'N', - 'nm': 'N', - 'nn': 'N', - 'np': 'N', - 'nq': 'N', - 'o' : 'OA', - 'oh': 'OA', - 'os': 'OA', - 'op': 'OA', - 'oq': 'OA', - 'p2': 'P', - 'p3': 'P', - 'p4': 'P', - 'p5': 'P', - 'pb': 'P', - 'pc': 'P', - 'pd': 'P', - 'pe': 'P', - 'pf': 'P', - 'px': 'P', - 'py': 'P', - 's' : 'S', - 's2': 'SA', - 's4': 'S', - 's6': 'S', - 'sh': 'SA', - 'ss': 'SA', - 'sx': 'S', - 'sy': 'S', - 'sp': 'S', - 'sq': 'S' - #'Cu': - } - - for atom in molecule.atoms: - if atom.residue.name == 'TYR' and atom.name == 'CZ' and atom.type == 'C': - atom.type = 'A' - elif atom.residue.name == 'ARG' and atom.name == 'CZ' and atom.type == 'CA': - atom.type = 'C' - else: - atom.type = amber_autodock_dict[atom.type] - - return molecule - - -def _make_leap_template(parm, ns_names, gaplist, sslist, input_pdb, - prmtop='prmtop', rst7='rst7', lib_files=None, frcmod_files=None): - # Change ff14SB to ff19SB - default_force_field = ('source leaprc.protein.ff14SB\n' - 'source leaprc.DNA.OL15\n' - 'source leaprc.RNA.OL3\n' - 'source leaprc.water.tip3p\n' - 'source leaprc.gaff2\n') - - if frcmod_files is not None: - default_force_field += ''.join(['loadamberparams %s\n' % fl for fl in frcmod_files]) - - if lib_files is not None: - default_force_field += ''.join(['loadoff %s\n' % ll for ll in lib_files]) - - leap_template = ('{force_fields}\n' - '{more_force_fields}\n' - 'x = loadpdb {input_pdb}\n' - '{box_info}\n' - '{more_leap_cmds}\n' - 'set default nocenter on\n' - 'saveAmberParm x {prmtop} {rst7}\n' - 'quit\n') - - # box - box = parm.box - if box is not None: - box_info = 'set x box { %s %s %s }' % (box[0], box[1], box[2]) - else: - box_info = '' - - # Now we can assume that we are dealing with AmberTools16: - more_force_fields = '' - - for res in ns_names: - more_force_fields += '%s = loadmol2 %s.mol2\n' % (res, res) - more_force_fields += 'loadAmberParams %s.frcmod\n' % res - - # more_leap_cmds - more_leap_cmds = '' - if gaplist: - for d, res1, resid1, res2, resid2 in gaplist: - more_leap_cmds += 'deleteBond x.%d.C x.%d.N\n' % (resid1 + 1, resid2 + 1) - - # process sslist - if sslist: - for resid1, resid2 in sslist: - more_leap_cmds += 'bond x.%d.SG x.%d.SG\n' % (resid1 + 1, resid2 + 1) - - leap_string = leap_template.format( - force_fields=default_force_field, - more_force_fields=more_force_fields, - box_info=box_info, - input_pdb=input_pdb, - prmtop=prmtop, - rst7=rst7, - more_leap_cmds=more_leap_cmds) - - return leap_string - - -def _generate_resids_and_chainids(molecule): - resid = 1 - chainid = 0 - resids = [] - chainids = [] - - # This way, we are good for 52 chains in total - ascii_letters = string.ascii_uppercase + string.ascii_lowercase - - for residue in molecule.residues: - try: - resids.append(resid) - chainids.append(ascii_letters[chainid]) - except: - raise IndexError('This structure contains more than 52 chains.') - - resid += 1 - - if residue.ter: - chainid += 1 - resid = 1 - - return resids, chainids - - -def _generate_resids_chainids_for_tleap(molecule): - resids = list(range(1, len(molecule.residues) + 1)) - chainids = [" "] * len(resids) - return resids, chainids - - -def _ter_flags(molecule): - return [True if residue.ter else False for residue in molecule.residues] - - -def _replace_resids_and_chainids(molecule, new_resids, new_chainids): - n_resids = len(molecule.residues) - n_new_resids = len(new_resids) - - error_msg = 'Cannot replace resids and chainids.' - error_msg += ' Number of residues is different (%d != %d).' % (n_resids, n_new_resids) - assert n_resids == n_new_resids, error_msg - - for i in range(n_resids): - molecule.residues[i].number = new_resids[i] - molecule.residues[i].chain = new_chainids[i] - - -def _add_ter_flags(molecule, ter_flags): - n_resids = len(molecule.residues) - n_ter_flags = len(ter_flags) - - error_msg = 'Cannot add TER flags.' - error_msg += ' Number of TER flags and resids are different (%d != %d).' % (n_resids, n_ter_flags) - assert n_resids == n_ter_flags, error_msg - - for i in range(n_resids): - molecule.residues[i].ter = ter_flags[i] - - -def _remove_alt_residues(molecule): - # remove altlocs label - residue_collection = [] - - for residue in molecule.residues: - for atom in residue.atoms: - atom.altloc = '' - for oatom in atom.other_locations.values(): - oatom.altloc = '' - residue_collection.append(residue) - - residue_collection = list(set(residue_collection)) - - return residue_collection - - -def _find_gaps(molecule, resprot, fill_gaps=False): - gaplist = [] - max_distance = 2.0 - - for i, residue in enumerate(molecule.residues): - if residue.ter: - continue - - try: - # If this fails, it means we reached the last residue - # and the TER keyword is not present in the PDB file - next_residue = molecule.residues[i + 1] - except: - # So we automatically flag it as TER residue - residue.ter = True - - continue - - try: - # If the C-backbone of the current residue or N-backbone of residue + 1 - # is not present, we flag it as a gap in the sequence - c_atom = [atom for atom in residue.atoms if atom.name == 'C'][0] - n_atom = [atom for atom in next_residue.atoms if atom.name == 'N'][0] - except: - gaprecord = (9999.999, c_atom.residue.name, i + 1, n_atom.residue.name, i + 2) - gaplist.append(gaprecord) - - if fill_gaps: - residue.ter = True - - continue - - dx = float(c_atom.xx) - float(n_atom.xx) - dy = float(c_atom.xy) - float(n_atom.xy) - dz = float(c_atom.xz) - float(n_atom.xz) - gap = math.sqrt(dx * dx + dy * dy + dz * dz) - - if gap > max_distance: - gaprecord = (gap, c_atom.residue.name, i + 1, n_atom.residue.name, i + 2) - gaplist.append(gaprecord) - - if fill_gaps: - residue.ter = True - - return gaplist - - -def _fix_isoleucine_cd_atom_name(molecule): - ile_fixed = [] - - for residue in molecule.residues: - if residue.name == 'ILE': - for atom in residue: - if atom.name == 'CD': - atom.name = 'CD1' - ile_fixed.append(('ILE', residue.number)) - - return ile_fixed - - -def _assign_histidine(molecule, default_protonation='HIE', force_default_protonation=False): - assigned_states = [] - undefined_state = set(['HIS']) - - for residue in molecule.residues: - if residue.name in undefined_state: - hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) - - # if the HIS is in an undefined state, we look at - # the presence of hydrogen atoms connected to NE1 or - # ND1 (or both) to assign the protonation states - # Those hydrogen atoms will be present if the user - # used REDUCE to add hydrogen atoms before. - if force_default_protonation: - residue.name = default_protonation - elif set(['HD1', 'HE2']).issubset(hydrogen_name_set): - residue.name = 'HIP' - elif 'HD1' in hydrogen_name_set: - residue.name = 'HID' - elif 'HE2' in hydrogen_name_set: - residue.name = 'HIE' - else: - residue.name = default_protonation - - assigned_states.append((residue.name, residue.number)) - - return assigned_states - - -def _assign_glutamine(molecule, default_protonation='GLU', force_default_protonation=False): - assigned_states = [] - undefined_state = set(['GLU']) - - for residue in molecule.residues: - if residue.name in undefined_state: - hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) - - # if the GLU is in an undefined state, we look at - # the presence of hydrogen atoms connected to OE2 - # to assign the protonation states. This hydrogen - # atom will be present if the user used REDUCE or - # another tool to add hydrogen atoms before. - if force_default_protonation: - residue.name = default_protonation - elif set(['HE2']).issubset(hydrogen_name_set): - residue.name = 'GLH' - assigned_states.append((residue.name, residue.number)) - else: - residue.name = default_protonation - - return assigned_states - - -def _assign_asparagine(molecule, default_protonation='ASP', force_default_protonation=False): - assigned_states = [] - undefined_state = set(['ASP']) - - for residue in molecule.residues: - if residue.name in undefined_state: - hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) - - # if the ASP is in an undefined state, we look at - # the presence of hydrogen atoms connected to OD2 - # to assign the protonation states. This hydrogen - # atom will be present if the user used REDUCE or - # another tool to add hydrogen atoms before. - if force_default_protonation: - residue.name = default_protonation - elif set(['HD2']).issubset(hydrogen_name_set): - residue.name = 'ASH' - assigned_states.append((residue.name, residue.number)) - else: - residue.name = default_protonation - - return assigned_states - - -def _assign_lysine(molecule, default_protonation='LYS', force_default_protonation=False): - assigned_states = [] - undefined_state = set(['LYS']) - - for residue in molecule.residues: - if residue.name in undefined_state: - hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) - - # if the LYS is in an undefined state, we look at - # the absence of an hydrogen atom (HZ1) connected to NZ - # to assign the protonation states. This hydrogen - # atom will be absent if the user used REDUCE or - # another tool to add hydrogen atoms before. - if force_default_protonation: - residue.name = default_protonation - elif not set(['HZ1', 'HZ2', 'HZ3']).issubset(hydrogen_name_set): - residue.name = 'LYN' - assigned_states.append((residue.name, residue.number)) - else: - residue.name = default_protonation - - return assigned_states - - -def _assign_cysteine(molecule, default_protonation='CYS', force_default_protonation=False): - assigned_states = [] - undefined_state = set(['CYS']) - - for residue in molecule.residues: - if residue.name in undefined_state: - hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) - - # if the CYS is in an undefined state, we look at - # the absence of an hydrogen atom (HG) connected to SG - # to assign the protonation states. This hydrogen - # atom will be absent if the user used REDUCE or - # another tool to add hydrogen atoms before. - if force_default_protonation: - residue.name = default_protonation - elif not set(['HG']).issubset(hydrogen_name_set): - residue.name = 'CYM' - assigned_states.append((residue.name, residue.number)) - else: - residue.name = default_protonation - - return assigned_states - - -def _fix_charmm_histidine_to_amber(molecule): - his_fixed = [] - - for residue in molecule.residues: - if residue.name == 'HSE': - residue.name = 'HIE' - his_fixed.append(('HIE', residue.number)) - elif residue.name == 'HSD': - residue.name = 'HID' - his_fixed.append(('HID', residue.number)) - elif residue.name == 'HSP': - residue.name = 'HIP' - his_fixed.append(('HIP', residue.number)) - - return his_fixed - - -def _find_non_standard_resnames(molecule, amber_supported_resname): - ns_names = set() - - for residue in molecule.residues: - if len(residue.name) > 3: - rname = residue.name[:3] - else: - rname = residue.name - if rname.strip() not in amber_supported_resname: - ns_names.add(rname) - - return ns_names - - -def _read_resname_from_lib_file(lib_file): - with open(lib_file) as f: - lines = f.readlines() - return lines[1].strip()[1:-1] - - -class PrepareReceptor: - - def __init__(self, keep_hydrogen=False, keep_water=False, no_disulfide=False, - keep_altloc=False, ignore_gaps=False, renumbering=True, use_model=1, - default_his_protonation='HIE'): - self._keep_hydrogen = keep_hydrogen - self._keep_water = keep_water - self._no_difsulfide = no_disulfide - self._keep_altloc = keep_altloc - self._use_model = use_model - # If we ignore gaps (True), we fill the gaps (True) - # If we don't ignore the gaps (False, we don't fille them (False) - self._fill_gaps = ignore_gaps - self._renumbering = renumbering - self._default_his_protonation = default_his_protonation - self._force_default_protonation = not self._keep_hydrogen - - self._pdb_filename = None - self._molecule = None - - def prepare(self, pdb_filename, lib_files=None, frcmod_files=None, clean=True): - '''Prepare receptor structure - - Args: - pdb_filename (str): input pdb filename - lib_files (list): Amber lib parameter files for non-standard residues - frcmod_files (list): Amber frcmod parameter files for non-standard residues - clean (bool): remove tleap input and output files (default: True) - - ''' - final_ns_names = [] - tleap_input = 'leap.template.in' - tleap_output = 'leap.template.out' - tleap_log = 'leap.log' - pdb_clean_filename = 'tmp.pdb' - prmtop_filename = 'tmp.prmtop' - rst7_filename = 'tmp.rst7' - nonstandard_resnames = tuple() - local_lib_files = None - local_frcmod_files = None - - logger = logging.getLogger('WaterKit receptor preparation') - logging.basicConfig(level=os.environ.get('LOGLEVEL', 'INFO')) - - try: - receptor = pmd.load_file(pdb_filename) - except FileNotFoundError: - error_msg = 'Receptor file (%s) cannot be found.' % pdb_filename - logger.error(error_msg) - raise - - pdbfixer = AmberPDBFixer(receptor) - - # Get resnames from lib files - if lib_files is not None: - nonstandard_resnames = tuple([_read_resname_from_lib_file(lib_file) for lib_file in lib_files]) - logger.info('Amber lib parameter files for residue(s): %s' % ', '.join(nonstandard_resnames)) - - # Remove box and symmetry - pdbfixer.parm.box = None - pdbfixer.parm.symmetry = None - - # Find missing heavy atoms - missing_atoms = pdbfixer.find_missing_heavy_atoms(HEAVY_ATOM_DICT) - if missing_atoms: - logger.warning('Found residue(s) with missing heavy atoms: %s' % ', '.join([str(m) for m in missing_atoms])) - - # Remove all the hydrogens - if not self._keep_hydrogen: - pdbfixer.parm.strip('@/H') - logger.info('Removed all hydrogen atoms') - - # Remove water molecules - if not self._keep_water: - pdbfixer.remove_water() - logger.info('Removed all water molecules') - - # Fix isoleucine CD aton name, supposed to be CD1 for Amber - ile_fixed = _fix_isoleucine_cd_atom_name(pdbfixer.parm) - if ile_fixed: - logger.info('Atom names were fixed for: %s' % ', '.join('%s - %d' % (r[0], r[1]) for r in ile_fixed)) - - his_fixed = _fix_charmm_histidine_to_amber(pdbfixer.parm) - if his_fixed: - warning_msg = 'CHARMM histidine protonation were converted to Amber: %s' - logger.warning(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in his_fixed)) - - # Keep only standard-Amber residues - ns_names = _find_non_standard_resnames(pdbfixer.parm, AMBER_SUPPORTED_RESNAMES + nonstandard_resnames) - if ns_names: - pdbfixer.parm.strip('!:' + ','.join(AMBER_SUPPORTED_RESNAMES + nonstandard_resnames)) - logger.info('Removed all non-standard Amber residues: %s' % ', '.join(ns_names)) - - # Find all the gaps - gaplist = _find_gaps(pdbfixer.parm, RESPROT, self._fill_gaps) - ter_flags = _ter_flags(pdbfixer.parm) - if gaplist: - gap_msg = '' - gap_str = ' - gap of %8.3f A between %s %d and %s %d\n' - for _, (d, resname0, resid0, resname1, resid1) in enumerate(gaplist): - gap_msg += gap_str % (d, resname0, resid0, resname1, resid1) - - if self._fill_gaps: - warning_msg = 'Gaps were ignore between residues (automatically added TER records): \n' - warning_msg += gap_msg - logger.warning(warning_msg) - else: - error_msg = ' Gap(s) were found between some residues. You can fix that issue by adding the missing residues' - error_msg += ' or manually add TER records to indicate that the residues/chains are not physically connected' - error_msg += ' to each other. If you know what you are doing, you can use the --ignore_gaps option to' - error_msg += ' ignore them (automatically add TER records).\n' - error_msg += 'Gap(s) found between the following residues: \n' - error_msg += gap_msg - - logger.error(error_msg) - raise RuntimeError(error_msg) - - assigned_his_states = _assign_histidine(pdbfixer.parm, self._default_his_protonation, self._force_default_protonation) - if assigned_his_states: - warning_msg = 'Histidine protonation states were automatically set to: %s' - logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_his_states)) - - assigned_glu_states = _assign_glutamine(pdbfixer.parm, force_default_protonation=self._force_default_protonation) - if assigned_glu_states: - warning_msg = 'Glutamine protonation states were automatically set to: %s' - logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_glu_states)) - - assigned_asp_states = _assign_asparagine(pdbfixer.parm, force_default_protonation=self._force_default_protonation) - if assigned_asp_states: - warning_msg = 'Asparagine protonation states were automatically set to: %s' - logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_asp_states)) - - assigned_lys_states = _assign_lysine(pdbfixer.parm, force_default_protonation=self._force_default_protonation) - if assigned_lys_states: - warning_msg = 'Lysine protonation states were automatically set to: %s' - logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_lys_states)) - - # Find all the disulfide bonds - if not self._no_difsulfide: - sslist, cys_cys_atomidx_set = pdbfixer.find_disulfide() - if sslist: - pdbfixer.rename_cys_to_cyx(sslist) - resids_str = ', '.join(['%s-%s' % (ss[0] + 1, ss[1] + 1) for ss in sslist]) - logger.info('Found disulfide bridges between residues %s' % resids_str) - else: - sslist = None - - # We assign cysteine only after finding all the disulfide bridges, - # Here we are looking potential CYM residues - assigned_cys_states = _assign_cysteine(pdbfixer.parm, force_default_protonation=self._force_default_protonation) - if assigned_cys_states: - warning_msg = 'Cysteine protonation states were automatically set to: %s' - logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_cys_states)) - - # Remove all the aternate residue sidechains - if not self._keep_altloc: - alt_residues = _remove_alt_residues(pdbfixer.parm) - if alt_residues: - logger.info('Removed all alternatives residue sidechains') - - # Renumbers resids (starting from 1) and renames chainids (starting from A) - if self._renumbering: - new_resids, new_chainids = _generate_resids_and_chainids(pdbfixer.parm) - _replace_resids_and_chainids(pdbfixer.parm, new_resids, new_chainids) - else: - new_resids = [residue.number for residue in pdbfixer.parm.residues] - new_chainids = [residue.chain for residue in pdbfixer.parm.residues] - - # Take the first model only - final_coordinates = pdbfixer.parm.get_coordinates()[self._use_model - 1] - write_kwargs = dict(coordinates=final_coordinates) - write_kwargs['increase_tercount'] = False # so CONECT record can work properly - write_kwargs['altlocs'] = 'occupancy' - - if lib_files is not None: - original_lib_files = [os.path.abspath(lib_file) for lib_file in lib_files] - local_lib_files = [os.path.basename(lib_file) for lib_file in lib_files] - - if frcmod_files is not None: - original_frcmod_files = [os.path.abspath(frcmod_file) for frcmod_file in frcmod_files] - local_frcmod_files = [os.path.basename(frcmod_file) for frcmod_file in frcmod_files] - - # Create Amber topology and coordinates files - with utils.temporary_directory(prefix='wk_preparation_', dir='.', clean=clean) as tmp_dir: - if lib_files is not None: - [shutil.copy2(olib_file, llib_file) for olib_file, llib_file in zip(original_lib_files, local_lib_files)] - - if frcmod_files is not None: - [shutil.copy2(ofrc_file, lfrc_file) for ofrc_file, lfrc_file in zip(original_frcmod_files, local_frcmod_files)] - - tleap_resids, tleap_chainids = _generate_resids_chainids_for_tleap(pdbfixer.parm) - _replace_resids_and_chainids(pdbfixer.parm, tleap_resids, tleap_chainids) - - try: - _write_pdb_file(pdb_clean_filename, pdbfixer.parm, **write_kwargs) - except: - error_msg = 'Could not write pdb file %s' % pdb_clean_filename - logger.error(error_msg) - raise - - # Generate topology/coordinates files - with open(tleap_input, 'w') as w: - content = _make_leap_template(pdbfixer.parm, final_ns_names, gaplist, sslist, - input_pdb=pdb_clean_filename, - prmtop=prmtop_filename, rst7=rst7_filename, - lib_files=local_lib_files, frcmod_files=local_frcmod_files) - w.write(content) - - try: - easy_call('tleap -s -f %s > %s' % (tleap_input, tleap_output), shell=True) - except RuntimeError: - error_msg = 'Could not generate topology/coordinates files with tleap' - logger.error(error_msg) - raise RuntimeError(error_msg) - - self._molecule = pmd.load_file(prmtop_filename, rst7_filename) - - # Add back resids, chainids and TER flags to molecule - _replace_resids_and_chainids(self._molecule, new_resids, new_chainids) - _add_ter_flags(self._molecule, ter_flags) - - def write_pdb_file(self, pdb_filename='protein.pdb'): - _write_pdb_file(pdb_filename, self._molecule) - - def write_pdbqt_file(self, pdbqt_filename='protein.pdbqt', amber_atom_types=False): - if amber_atom_types: - _write_pdbqt_file(pdbqt_filename, self._molecule) - else: - molecule = _convert_amber_to_autodock_types(self._molecule) - _write_pdbqt_file(pdbqt_filename, molecule) +from waterkit import PrepareReceptor def cmd_lineparser(): diff --git a/waterkit/__init__.py b/waterkit/__init__.py index fee7a10..ce2f428 100644 --- a/waterkit/__init__.py +++ b/waterkit/__init__.py @@ -6,14 +6,31 @@ from .autodock_map import Map from .autogrid import AutoGrid +from .autogrid import calc_spherical_water_map from .forcefield import AutoDockForceField from .molecule import Molecule +from .receptor_prep import PrepareReceptor from .spherical_model_map import SphericalWaterMap from .sampling import WaterSampler +from .trajectory_utils import make_trajectory +from .trajectory_utils import WaterMinimizer from .water import Water from .water_box import WaterBox from .waterkit import WaterKit +from .wrap_waterkit_and_gist import run_waterkit_and_gist -__all__ = ["Map", "AutoGrid", "AutoDockForceField", "Molecule", - "SphericalWaterMap", "WaterSampler", "Water", - "WaterBox", "WaterKit"] +__all__ = [ + "Map", + "AutoGrid", + "calc_spherical_water_map", + "AutoDockForceField", + "Molecule", + "PrepareReceptor", + "SphericalWaterMap", + "WaterSampler", + "Water", + "WaterBox", + "WaterMinimizer", + "WaterKit", + "run_waterkit_and_gist", +] diff --git a/waterkit/autogrid.py b/waterkit/autogrid.py index 97b27fd..51c5fb9 100644 --- a/waterkit/autogrid.py +++ b/waterkit/autogrid.py @@ -12,12 +12,42 @@ import numpy as np import openbabel as ob +from vina import Vina from .molecule import Molecule from .autodock_map import Map from . import utils + +def calc_spherical_water_map(autogrid_exec_path, receptor, box_center, box_size, receptor_spherical_water_filename=None): + with utils.temporary_directory(prefix='wk_', dir='.', clean=False) as tmp_dir: + # Generate AutoDock maps using the Amber ff14SB forcefield + receptor.to_pdbqt_file('receptor.pdbqt') + ff14sb_param_file = os.path.join(utils.path_module('waterkit'), 'data/ff14SB_parameters.dat') + ag = AutoGrid(autogrid_exec_path, ff14sb_param_file) + ad_map = ag.run('receptor.pdbqt', ['OW'], box_center, box_size, smooth=0, dielectric=1) + + if receptor_spherical_water_filename is None: + # Convert amber atom types to AutoDock atom types + ad_receptor = utils.convert_amber_to_autodock_types(receptor) + ad_receptor.to_pdbqt_file('receptor_ad.pdbqt') + + # Generate Vina maps for the spherical maps + v = Vina(verbosity=0) + v.set_receptor('receptor_ad.pdbqt') + v.compute_vina_maps(box_center, box_size, force_even_voxels=True) + v.write_maps('vina') + sw_map = Map('vina.O_DA.map', 'SW') + else: + # The first spherical map is for the receptor + sw_map = Map(receptor_spherical_water_filename, 'SW') + + ad_map.add_map('SW', sw_map._maps['SW']) + + return ad_map + + class AutoGrid(): def __init__(self, exec_path="autogrid4", param_file="AD4_parameters.dat", gpf_file=None): diff --git a/waterkit/molecule.py b/waterkit/molecule.py index 27d46ab..cc4e1f3 100644 --- a/waterkit/molecule.py +++ b/waterkit/molecule.py @@ -66,6 +66,29 @@ def __init__(self, OBMol, guess_hydrogen_bonds=True, guess_disordered_hydrogens= hbfield = HydrogenBonds(hb_file) self._guess_hydrogen_bond_anchors(OBMol, hbfield) + + @classmethod + def from_pdbqt_string(cls, string, guess_hydrogen_bonds=True, guess_disordered_hydrogens=True): + + OBMol = ob.OBMol() + obconv = ob.OBConversion() + obconv.SetInFormat("pdb") + errlev = ob.obErrorLog.GetOutputLevel() + ob.obErrorLog.SetOutputLevel(0) + obconv.ReadString(OBMol, string) + ob.obErrorLog.SetOutputLevel(errlev) + m = cls(OBMol, guess_hydrogen_bonds, guess_disordered_hydrogens) + + # OpenBabel do chemical perception to define the type + # So we override the types with AutoDock atom types + # from the PDBQT file + qs, ts = m._qt_from_pdbqt_string(string) + m.atoms['q'] = qs + m.atoms['t'] = ts + + return m + + @classmethod def from_file(cls, fname, guess_hydrogen_bonds=True, guess_disordered_hydrogens=True): """Create Molecule object from a PDB file. @@ -83,16 +106,19 @@ def from_file(cls, fname, guess_hydrogen_bonds=True, guess_disordered_hydrogens= name, file_extension = os.path.splitext(fname) file_extension = file_extension.split(os.extsep)[-1] + + if file_extension == "pdbqt": + with open(fname) as f: + string = f.read() + m = cls.from_pdbqt_string(string, guess_hydrogen_bonds, guess_disordered_hydrogens) + return m + # Read PDB file OBMol = ob.OBMol() obconv = ob.OBConversion() - - if file_extension == "pdbqt": - obconv.SetInFormat("pdb") - else: - obconv.SetInFormat(file_extension) + obconv.SetInFormat(file_extension) - # set error level to avoid warnings about non-standard input + # set error level to avoid warnings about non-standard input errlev = ob.obErrorLog.GetOutputLevel() ob.obErrorLog.SetOutputLevel(0) @@ -102,18 +128,10 @@ def from_file(cls, fname, guess_hydrogen_bonds=True, guess_disordered_hydrogens= m = cls(OBMol, guess_hydrogen_bonds, guess_disordered_hydrogens) - # OpenBabel do chemical perception to define the type - # So we override the types with AutoDock atom types - # from the PDBQT file - if file_extension == "pdbqt": - qs, ts = m._qt_from_pdbqt_file(fname) - m.atoms['q'] = qs - m.atoms['t'] = ts - return m - def _qt_from_pdbqt_file(self, fname): - """Get partial charges and atom types from PDBQT file. + def _qt_from_pdbqt_string(self, pdbqt_string): + """Get partial charges and atom types from PDBQT string. Args: fname (str): molecule filename @@ -126,12 +144,10 @@ def _qt_from_pdbqt_file(self, fname): atom_types = [] partial_charges = [] - with open(fname) as f: - lines = f.readlines() - for line in lines: - if re.search("^ATOM", line) or re.search("^HETATM", line): - atom_types.append(line[77:79].strip()) - partial_charges.append(float(line[70:77].strip())) + for line in pdbqt_string.splitlines(): + if re.search("^ATOM", line) or re.search("^HETATM", line): + atom_types.append(line[77:79].strip()) + partial_charges.append(float(line[70:77].strip())) return partial_charges, atom_types diff --git a/waterkit/receptor_prep.py b/waterkit/receptor_prep.py new file mode 100644 index 0000000..80258b5 --- /dev/null +++ b/waterkit/receptor_prep.py @@ -0,0 +1,929 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# prepare receptor +# + +import copy +import logging +import math +import os +import string +import shutil + +import parmed as pmd +from pdb4amber import AmberPDBFixer +from pdb4amber.utils import easy_call + +from waterkit import utils + + +# Added CYM residue +HEAVY_ATOM_DICT = { + 'ALA': 5, + 'ARG': 11, + 'ASN': 8, + 'ASP': 8, + 'CYS': 6, + 'GLN': 9, + 'GLU': 9, + 'GLY': 4, + 'HIS': 10, + 'ILE': 8, + 'LEU': 8, + 'LYS': 9, + 'MET': 8, + 'PHE': 11, + 'PRO': 7, + 'SER': 6, + 'THR': 7, + 'TRP': 14, + 'TYR': 12, + 'VAL': 7, + 'HID': 10, + 'HIE': 10, + 'HIN': 10, + 'HIP': 10, + 'CYX': 6, + 'CYM': 6, + 'ASH': 8, + 'GLH': 9, + 'LYH': 9 +} + +# Global constants +# Added CYM residue +RESPROT = ('ALA', 'ARG', 'ASN', 'ASP', 'CYS', 'GLN', 'GLU', 'GLY', 'HIS', + 'ILE', 'LEU', 'LYS', 'MET', 'PHE', 'PRO', 'SER', 'THR', 'TRP', + 'TYR', 'VAL', 'HID', 'HIE', 'HIN', 'HIP', 'CYX', 'CYM', 'ASH', 'GLH', + 'LYH', 'ACE', 'NME', 'GL4', 'AS4') + +RESNA = ('C', 'G', 'U', 'A', 'DC', 'DG', 'DT', 'DA', 'OHE', 'C5', 'G5', 'U5', + 'A5', 'C3', 'G3', 'U3', 'A3', 'DC5', 'DG5', 'DT5', 'DA5', 'DC3', + 'DG3', 'DT3', 'DA3' ) + +RESSOLV = ('WAT', 'HOH', 'AG', 'AL', 'Ag', 'BA', 'BR', 'Be', 'CA', 'CD', 'CE', + 'CL', 'CO', 'CR', 'CS', 'CU', 'CU1', 'Ce', 'Cl-', 'Cr', 'Dy', 'EU', + 'EU3', 'Er', 'F', 'FE', 'FE2', 'GD3', 'HE+', 'HG', 'HZ+', 'Hf', + 'IN', 'IOD', 'K', 'K+', 'LA', 'LI', 'LU', 'MG', 'MN', 'NA', 'NH4', + 'NI', 'Na+', 'Nd', 'PB', 'PD', 'PR', 'PT', 'Pu', 'RB', 'Ra', 'SM', + 'SR', 'Sm', 'Sn', 'TB', 'TL', 'Th', 'Tl', 'Tm', 'U4+', 'V2+', 'Y', + 'YB2', 'ZN', 'Zr') + +AMBER_SUPPORTED_RESNAMES = RESPROT + RESNA + RESSOLV + + +def _write_pdb_file(output_name, molecule): + output_str = _write_pdb_string(molecule) + with open(output_name, 'w') as w: + w.write(output_str) + + +def _write_pdb_string(molecule): + '''Write PDB file + + Args: + output_name (str): pdbqt output filename + molecule (parmed): parmed molecule object + + ''' + i = 0 + pdb_template = "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f} {:>4s}{:>2s}{:2s}\n" + ter_template = "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}\n" + output_str = '' + + for residue in molecule.residues: + resname = residue.name + resid = residue.number + chain_id = residue.chain + + for atom in residue.atoms: + if len(atom.name) < 4: + name = ' %s' % atom.name + else: + name = atom.name + + atom_type = atom.name[0] + + if resname in RESSOLV: + atype = 'HETATM' + else: + atype = 'ATOM' + + output_str += pdb_template.format(atype, i + 1, name, " ", resname, chain_id, resid, + " ", atom.xx, atom.xy, atom.xz, 1.0, 1.0, " ", + atom_type, " ") + + i += 1 + + if residue.ter: + output_str += ter_template.format('TER', i + 1, name, " ", resname, chain_id, resid) + i += 1 + + output_str += 'END\n' + return output_str + + +def _write_pdbqt_string(molecule): + '''Write PDBQT file + + Args: + output_name (str): pdbqt output filename + molecule (parmed): parmed molecule object + + ''' + i = 0 + pdb_template = "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}{:1s} {:8.3f}{:8.3f}{:8.3f}{:6.2f}{:6.2f}{:4s}{:6.3f} {:2s} \n" + ter_template = "{:6s}{:5d} {:^4s}{:1s}{:3s} {:1s}{:4d}\n" + output_str = '' + + for residue in molecule.residues: + resname = residue.name + resid = residue.number + chain_id = residue.chain + + for atom in residue.atoms: + if len(atom.name) < 4: + name = ' %s' % atom.name + else: + name = atom.name + + # OpenBabel does not like atom types starting with a number + if atom.type[0].isdigit(): + atom_type = atom.type[::-1] + else: + atom_type = atom.type + + # AutoGrid does not accept atom type name of length > 2 + atom_type = atom_type[:2] + + if resname in RESSOLV: + atype = 'HETATM' + else: + atype = 'ATOM' + + output_str += pdb_template.format(atype, i + 1, name, " ", resname, chain_id, resid, + " ", atom.xx, atom.xy, atom.xz, 1.0, 1.0, " ", + atom.charge, atom_type) + + i += 1 + + if residue.ter: + output_str += ter_template.format('TER', i + 1, name, " ", resname, chain_id, resid) + i += 1 + + output_str += 'END\n' + return output_str + + +def _convert_amber_to_autodock_types(molecule): + molecule = copy.deepcopy(molecule) + + amber_autodock_dict = { + 'N3': 'N', + 'H': 'HD', + 'CX': 'C', + 'HP': 'H', + 'CT': 'C', + 'HC': 'H', + 'C': 'C', + 'O': 'OA', + 'N': 'N', + 'H1': 'H', + 'C3': 'C', + '3C': 'C', + 'C2': 'C', + '2C': 'C', + 'CO': 'C', + 'O2': 'OA', + 'OH': 'OA', + 'HO': 'HD', + 'SH': 'SA', + 'HS': 'HD', + 'CA': 'A', + 'HA': 'H', + 'S': 'SA', + 'C8': 'C', + 'N2': 'N', + 'CC': 'A', + 'NB': 'NA', + 'CR': 'A', + 'CV': 'A', + 'H5': 'H', + 'NA': 'N', + 'CW': 'A', + 'H4': 'H', + 'C*': 'A', + 'CN': 'A', + 'CB': 'A', + 'Zn2+': 'Zn', + 'Zn': 'Zn', + 'Mn2+': 'Mn', + 'Mn': 'Mn', + 'XC': 'C', + 'br': 'Br', + 'c' : 'C', + 'c1': 'C', + 'c2': 'C', + 'c3': 'C', + 'ca': 'A', + 'cc': 'A', + 'cd': 'A', + 'ce': 'C', + 'cf': 'C', + 'cl': 'Cl', + 'cp': 'A', + 'cq': 'A', + 'cu': 'C', + 'cv': 'C', + 'cx': 'C', + 'cy': 'C', + 'c5': 'C', + 'c6': 'C', + 'cz': 'C', + 'cs': 'C', + 'cg': 'C', + 'ch': 'C', + 'f' : 'F', + 'h1': 'H', + 'h2': 'H', + 'h3': 'H', + 'h4': 'H', + 'h5': 'H', + 'ha': 'H', + 'hc': 'H', + 'hn': 'HD', + 'ho': 'HD', + 'hp': 'HD', + 'hs': 'HD', + 'hx': 'H', + 'i' : 'I', + 'n' : 'N', + 'n1': 'NA', + 'n2': 'N', + 'n3': 'N', + 'n4': 'N', + 'n5': 'N', + 'n6': 'N', + 'n7': 'N', + 'n8': 'N', + 'n9': 'N', + 'na': 'N', + 'nb': 'N', + 'nc': 'N', + 'nd': 'N', + 'nh': 'N', + 'ne': 'N', + 'nf': 'N', + 'no': 'N', + 'n+': 'N', + 'nx': 'N', + 'ny': 'N', + 'nz': 'N', + 'ns': 'N', + 'nt': 'N', + 'nu': 'N', + 'nv': 'N', + 'ni': 'N', + 'nj': 'N', + 'nk': 'N', + 'nl': 'N', + 'nm': 'N', + 'nn': 'N', + 'np': 'N', + 'nq': 'N', + 'o' : 'OA', + 'oh': 'OA', + 'os': 'OA', + 'op': 'OA', + 'oq': 'OA', + 'p2': 'P', + 'p3': 'P', + 'p4': 'P', + 'p5': 'P', + 'pb': 'P', + 'pc': 'P', + 'pd': 'P', + 'pe': 'P', + 'pf': 'P', + 'px': 'P', + 'py': 'P', + 's' : 'S', + 's2': 'SA', + 's4': 'S', + 's6': 'S', + 'sh': 'SA', + 'ss': 'SA', + 'sx': 'S', + 'sy': 'S', + 'sp': 'S', + 'sq': 'S' + #'Cu': + } + + for atom in molecule.atoms: + if atom.residue.name == 'TYR' and atom.name == 'CZ' and atom.type == 'C': + atom.type = 'A' + elif atom.residue.name == 'ARG' and atom.name == 'CZ' and atom.type == 'CA': + atom.type = 'C' + else: + atom.type = amber_autodock_dict[atom.type] + + return molecule + + +def _make_leap_template(parm, ns_names, gaplist, sslist, input_pdb, + prmtop='prmtop', rst7='rst7', lib_files=None, frcmod_files=None): + # Change ff14SB to ff19SB + default_force_field = ('source leaprc.protein.ff14SB\n' + 'source leaprc.DNA.OL15\n' + 'source leaprc.RNA.OL3\n' + 'source leaprc.water.tip3p\n' + 'source leaprc.gaff2\n') + + if frcmod_files is not None: + default_force_field += ''.join(['loadamberparams %s\n' % fl for fl in frcmod_files]) + + if lib_files is not None: + default_force_field += ''.join(['loadoff %s\n' % ll for ll in lib_files]) + + leap_template = ('{force_fields}\n' + '{more_force_fields}\n' + 'x = loadpdb {input_pdb}\n' + '{box_info}\n' + '{more_leap_cmds}\n' + 'set default nocenter on\n' + 'saveAmberParm x {prmtop} {rst7}\n' + 'quit\n') + + # box + box = parm.box + if box is not None: + box_info = 'set x box { %s %s %s }' % (box[0], box[1], box[2]) + else: + box_info = '' + + # Now we can assume that we are dealing with AmberTools16: + more_force_fields = '' + + for res in ns_names: + more_force_fields += '%s = loadmol2 %s.mol2\n' % (res, res) + more_force_fields += 'loadAmberParams %s.frcmod\n' % res + + # more_leap_cmds + more_leap_cmds = '' + if gaplist: + for d, res1, resid1, res2, resid2 in gaplist: + more_leap_cmds += 'deleteBond x.%d.C x.%d.N\n' % (resid1 + 1, resid2 + 1) + + # process sslist + if sslist: + for resid1, resid2 in sslist: + more_leap_cmds += 'bond x.%d.SG x.%d.SG\n' % (resid1 + 1, resid2 + 1) + + leap_string = leap_template.format( + force_fields=default_force_field, + more_force_fields=more_force_fields, + box_info=box_info, + input_pdb=input_pdb, + prmtop=prmtop, + rst7=rst7, + more_leap_cmds=more_leap_cmds) + + return leap_string + + +def _generate_resids_and_chainids(molecule): + resid = 1 + chainid = 0 + resids = [] + chainids = [] + + # This way, we are good for 52 chains in total + ascii_letters = string.ascii_uppercase + string.ascii_lowercase + + for residue in molecule.residues: + try: + resids.append(resid) + chainids.append(ascii_letters[chainid]) + except: + raise IndexError('This structure contains more than 52 chains.') + + resid += 1 + + if residue.ter: + chainid += 1 + resid = 1 + + return resids, chainids + + +def _generate_resids_chainids_for_tleap(molecule): + resids = list(range(1, len(molecule.residues) + 1)) + chainids = [" "] * len(resids) + return resids, chainids + + +def _ter_flags(molecule): + return [True if residue.ter else False for residue in molecule.residues] + + +def _replace_resids_and_chainids(molecule, new_resids, new_chainids): + n_resids = len(molecule.residues) + n_new_resids = len(new_resids) + + error_msg = 'Cannot replace resids and chainids.' + error_msg += ' Number of residues is different (%d != %d).' % (n_resids, n_new_resids) + assert n_resids == n_new_resids, error_msg + + for i in range(n_resids): + molecule.residues[i].number = new_resids[i] + molecule.residues[i].chain = new_chainids[i] + + +def _add_ter_flags(molecule, ter_flags): + n_resids = len(molecule.residues) + n_ter_flags = len(ter_flags) + + error_msg = 'Cannot add TER flags.' + error_msg += ' Number of TER flags and resids are different (%d != %d).' % (n_resids, n_ter_flags) + assert n_resids == n_ter_flags, error_msg + + for i in range(n_resids): + molecule.residues[i].ter = ter_flags[i] + + +def _remove_alt_residues(molecule): + # remove altlocs label + residue_collection = [] + + for residue in molecule.residues: + for atom in residue.atoms: + atom.altloc = '' + for oatom in atom.other_locations.values(): + oatom.altloc = '' + residue_collection.append(residue) + + residue_collection = list(set(residue_collection)) + + return residue_collection + + +def _find_gaps(molecule, resprot, fill_gaps=False): + gaplist = [] + max_distance = 2.0 + + for i, residue in enumerate(molecule.residues): + if residue.ter: + continue + + try: + # If this fails, it means we reached the last residue + # and the TER keyword is not present in the PDB file + next_residue = molecule.residues[i + 1] + except: + # So we automatically flag it as TER residue + residue.ter = True + + continue + + try: + # If the C-backbone of the current residue or N-backbone of residue + 1 + # is not present, we flag it as a gap in the sequence + c_atom = [atom for atom in residue.atoms if atom.name == 'C'][0] + n_atom = [atom for atom in next_residue.atoms if atom.name == 'N'][0] + except: + gaprecord = (9999.999, c_atom.residue.name, i + 1, n_atom.residue.name, i + 2) + gaplist.append(gaprecord) + + if fill_gaps: + residue.ter = True + + continue + + dx = float(c_atom.xx) - float(n_atom.xx) + dy = float(c_atom.xy) - float(n_atom.xy) + dz = float(c_atom.xz) - float(n_atom.xz) + gap = math.sqrt(dx * dx + dy * dy + dz * dz) + + if gap > max_distance: + gaprecord = (gap, c_atom.residue.name, i + 1, n_atom.residue.name, i + 2) + gaplist.append(gaprecord) + + if fill_gaps: + residue.ter = True + + return gaplist + + +def _fix_isoleucine_cd_atom_name(molecule): + ile_fixed = [] + + for residue in molecule.residues: + if residue.name == 'ILE': + for atom in residue: + if atom.name == 'CD': + atom.name = 'CD1' + ile_fixed.append(('ILE', residue.number)) + + return ile_fixed + + +def _assign_histidine(molecule, default_protonation='HIE', force_default_protonation=False): + assigned_states = [] + undefined_state = set(['HIS']) + + for residue in molecule.residues: + if residue.name in undefined_state: + hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) + + # if the HIS is in an undefined state, we look at + # the presence of hydrogen atoms connected to NE1 or + # ND1 (or both) to assign the protonation states + # Those hydrogen atoms will be present if the user + # used REDUCE to add hydrogen atoms before. + if force_default_protonation: + residue.name = default_protonation + elif set(['HD1', 'HE2']).issubset(hydrogen_name_set): + residue.name = 'HIP' + elif 'HD1' in hydrogen_name_set: + residue.name = 'HID' + elif 'HE2' in hydrogen_name_set: + residue.name = 'HIE' + else: + residue.name = default_protonation + + assigned_states.append((residue.name, residue.number)) + + return assigned_states + + +def _assign_glutamine(molecule, default_protonation='GLU', force_default_protonation=False): + assigned_states = [] + undefined_state = set(['GLU']) + + for residue in molecule.residues: + if residue.name in undefined_state: + hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) + + # if the GLU is in an undefined state, we look at + # the presence of hydrogen atoms connected to OE2 + # to assign the protonation states. This hydrogen + # atom will be present if the user used REDUCE or + # another tool to add hydrogen atoms before. + if force_default_protonation: + residue.name = default_protonation + elif set(['HE2']).issubset(hydrogen_name_set): + residue.name = 'GLH' + assigned_states.append((residue.name, residue.number)) + else: + residue.name = default_protonation + + return assigned_states + + +def _assign_asparagine(molecule, default_protonation='ASP', force_default_protonation=False): + assigned_states = [] + undefined_state = set(['ASP']) + + for residue in molecule.residues: + if residue.name in undefined_state: + hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) + + # if the ASP is in an undefined state, we look at + # the presence of hydrogen atoms connected to OD2 + # to assign the protonation states. This hydrogen + # atom will be present if the user used REDUCE or + # another tool to add hydrogen atoms before. + if force_default_protonation: + residue.name = default_protonation + elif set(['HD2']).issubset(hydrogen_name_set): + residue.name = 'ASH' + assigned_states.append((residue.name, residue.number)) + else: + residue.name = default_protonation + + return assigned_states + + +def _assign_lysine(molecule, default_protonation='LYS', force_default_protonation=False): + assigned_states = [] + undefined_state = set(['LYS']) + + for residue in molecule.residues: + if residue.name in undefined_state: + hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) + + # if the LYS is in an undefined state, we look at + # the absence of an hydrogen atom (HZ1) connected to NZ + # to assign the protonation states. This hydrogen + # atom will be absent if the user used REDUCE or + # another tool to add hydrogen atoms before. + if force_default_protonation: + residue.name = default_protonation + elif not set(['HZ1', 'HZ2', 'HZ3']).issubset(hydrogen_name_set): + residue.name = 'LYN' + assigned_states.append((residue.name, residue.number)) + else: + residue.name = default_protonation + + return assigned_states + + +def _assign_cysteine(molecule, default_protonation='CYS', force_default_protonation=False): + assigned_states = [] + undefined_state = set(['CYS']) + + for residue in molecule.residues: + if residue.name in undefined_state: + hydrogen_name_set = sorted(set(atom.name for atom in residue.atoms if atom.atomic_number == 1)) + + # if the CYS is in an undefined state, we look at + # the absence of an hydrogen atom (HG) connected to SG + # to assign the protonation states. This hydrogen + # atom will be absent if the user used REDUCE or + # another tool to add hydrogen atoms before. + if force_default_protonation: + residue.name = default_protonation + elif not set(['HG']).issubset(hydrogen_name_set): + residue.name = 'CYM' + assigned_states.append((residue.name, residue.number)) + else: + residue.name = default_protonation + + return assigned_states + + +def _fix_charmm_histidine_to_amber(molecule): + his_fixed = [] + + for residue in molecule.residues: + if residue.name == 'HSE': + residue.name = 'HIE' + his_fixed.append(('HIE', residue.number)) + elif residue.name == 'HSD': + residue.name = 'HID' + his_fixed.append(('HID', residue.number)) + elif residue.name == 'HSP': + residue.name = 'HIP' + his_fixed.append(('HIP', residue.number)) + + return his_fixed + + +def _find_non_standard_resnames(molecule, amber_supported_resname): + ns_names = set() + + for residue in molecule.residues: + if len(residue.name) > 3: + rname = residue.name[:3] + else: + rname = residue.name + if rname.strip() not in amber_supported_resname: + ns_names.add(rname) + + return ns_names + + +def _read_resname_from_lib_file(lib_file): + with open(lib_file) as f: + lines = f.readlines() + return lines[1].strip()[1:-1] + + +class PrepareReceptor: + + def __init__(self, keep_hydrogen=False, keep_water=False, no_disulfide=False, + keep_altloc=False, ignore_gaps=False, renumbering=True, use_model=1, + default_his_protonation='HIE'): + self._keep_hydrogen = keep_hydrogen + self._keep_water = keep_water + self._no_difsulfide = no_disulfide + self._keep_altloc = keep_altloc + self._use_model = use_model + # If we ignore gaps (True), we fill the gaps (True) + # If we don't ignore the gaps (False, we don't fille them (False) + self._fill_gaps = ignore_gaps + self._renumbering = renumbering + self._default_his_protonation = default_his_protonation + self._force_default_protonation = not self._keep_hydrogen + + self._pdb_filename = None + self._molecule = None + + + def prepare(self, pdb_filename, lib_files=None, frcmod_files=None, clean=True): + try: + receptor = pmd.load_file(pdb_filename) + except FileNotFoundError as the_error: + error_msg = 'Receptor file (%s) cannot be found.' % pdb_filename + logger.error(error_msg) + raise the_error + return self.prepare_from_parmed_structure(receptor, lib_files=None, frcmod_files=None, clean=True) + + + def prepare_from_parmed_structure(self, receptor, lib_files=None, frcmod_files=None, clean=True): + '''Prepare receptor structure + + Args: + pdb_filename (str): input pdb filename + lib_files (list): Amber lib parameter files for non-standard residues + frcmod_files (list): Amber frcmod parameter files for non-standard residues + clean (bool): remove tleap input and output files (default: True) + + ''' + final_ns_names = [] + tleap_input = 'leap.template.in' + tleap_output = 'leap.template.out' + tleap_log = 'leap.log' + pdb_clean_filename = 'tmp.pdb' + prmtop_filename = 'tmp.prmtop' + rst7_filename = 'tmp.rst7' + nonstandard_resnames = tuple() + local_lib_files = None + local_frcmod_files = None + + logger = logging.getLogger('WaterKit receptor preparation') + logging.basicConfig(level=os.environ.get('LOGLEVEL', 'INFO')) + + + pdbfixer = AmberPDBFixer(receptor) + + # Get resnames from lib files + if lib_files is not None: + nonstandard_resnames = tuple([_read_resname_from_lib_file(lib_file) for lib_file in lib_files]) + logger.info('Amber lib parameter files for residue(s): %s' % ', '.join(nonstandard_resnames)) + + # Remove box and symmetry + pdbfixer.parm.box = None + pdbfixer.parm.symmetry = None + + # Find missing heavy atoms + missing_atoms = pdbfixer.find_missing_heavy_atoms(HEAVY_ATOM_DICT) + if missing_atoms: + logger.warning('Found residue(s) with missing heavy atoms: %s' % ', '.join([str(m) for m in missing_atoms])) + + # Remove all the hydrogens + if not self._keep_hydrogen: + pdbfixer.parm.strip('@/H') + logger.info('Removed all hydrogen atoms') + + # Remove water molecules + if not self._keep_water: + pdbfixer.remove_water() + logger.info('Removed all water molecules') + + # Fix isoleucine CD aton name, supposed to be CD1 for Amber + ile_fixed = _fix_isoleucine_cd_atom_name(pdbfixer.parm) + if ile_fixed: + logger.info('Atom names were fixed for: %s' % ', '.join('%s - %d' % (r[0], r[1]) for r in ile_fixed)) + + his_fixed = _fix_charmm_histidine_to_amber(pdbfixer.parm) + if his_fixed: + warning_msg = 'CHARMM histidine protonation were converted to Amber: %s' + logger.warning(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in his_fixed)) + + # Keep only standard-Amber residues + ns_names = _find_non_standard_resnames(pdbfixer.parm, AMBER_SUPPORTED_RESNAMES + nonstandard_resnames) + if ns_names: + pdbfixer.parm.strip('!:' + ','.join(AMBER_SUPPORTED_RESNAMES + nonstandard_resnames)) + logger.info('Removed all non-standard Amber residues: %s' % ', '.join(ns_names)) + + # Find all the gaps + gaplist = _find_gaps(pdbfixer.parm, RESPROT, self._fill_gaps) + ter_flags = _ter_flags(pdbfixer.parm) + if gaplist: + gap_msg = '' + gap_str = ' - gap of %8.3f A between %s %d and %s %d\n' + for _, (d, resname0, resid0, resname1, resid1) in enumerate(gaplist): + gap_msg += gap_str % (d, resname0, resid0, resname1, resid1) + + if self._fill_gaps: + warning_msg = 'Gaps were ignore between residues (automatically added TER records): \n' + warning_msg += gap_msg + logger.warning(warning_msg) + else: + error_msg = ' Gap(s) were found between some residues. You can fix that issue by adding the missing residues' + error_msg += ' or manually add TER records to indicate that the residues/chains are not physically connected' + error_msg += ' to each other. If you know what you are doing, you can use the --ignore_gaps option to' + error_msg += ' ignore them (automatically add TER records).\n' + error_msg += 'Gap(s) found between the following residues: \n' + error_msg += gap_msg + + logger.error(error_msg) + raise RuntimeError(error_msg) + + assigned_his_states = _assign_histidine(pdbfixer.parm, self._default_his_protonation, self._force_default_protonation) + if assigned_his_states: + warning_msg = 'Histidine protonation states were automatically set to: %s' + logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_his_states)) + + assigned_glu_states = _assign_glutamine(pdbfixer.parm, force_default_protonation=self._force_default_protonation) + if assigned_glu_states: + warning_msg = 'Glutamine protonation states were automatically set to: %s' + logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_glu_states)) + + assigned_asp_states = _assign_asparagine(pdbfixer.parm, force_default_protonation=self._force_default_protonation) + if assigned_asp_states: + warning_msg = 'Asparagine protonation states were automatically set to: %s' + logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_asp_states)) + + assigned_lys_states = _assign_lysine(pdbfixer.parm, force_default_protonation=self._force_default_protonation) + if assigned_lys_states: + warning_msg = 'Lysine protonation states were automatically set to: %s' + logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_lys_states)) + + # Find all the disulfide bonds + if not self._no_difsulfide: + sslist, cys_cys_atomidx_set = pdbfixer.find_disulfide() + if sslist: + pdbfixer.rename_cys_to_cyx(sslist) + resids_str = ', '.join(['%s-%s' % (ss[0] + 1, ss[1] + 1) for ss in sslist]) + logger.info('Found disulfide bridges between residues %s' % resids_str) + else: + sslist = None + + # We assign cysteine only after finding all the disulfide bridges, + # Here we are looking potential CYM residues + assigned_cys_states = _assign_cysteine(pdbfixer.parm, force_default_protonation=self._force_default_protonation) + if assigned_cys_states: + warning_msg = 'Cysteine protonation states were automatically set to: %s' + logger.info(warning_msg % ', '.join('%s - %d' % (r[0], r[1]) for r in assigned_cys_states)) + + # Remove all the aternate residue sidechains + if not self._keep_altloc: + alt_residues = _remove_alt_residues(pdbfixer.parm) + if alt_residues: + logger.info('Removed all alternatives residue sidechains') + + # Renumbers resids (starting from 1) and renames chainids (starting from A) + if self._renumbering: + new_resids, new_chainids = _generate_resids_and_chainids(pdbfixer.parm) + _replace_resids_and_chainids(pdbfixer.parm, new_resids, new_chainids) + else: + new_resids = [residue.number for residue in pdbfixer.parm.residues] + new_chainids = [residue.chain for residue in pdbfixer.parm.residues] + + if lib_files is not None: + original_lib_files = [os.path.abspath(lib_file) for lib_file in lib_files] + local_lib_files = [os.path.basename(lib_file) for lib_file in lib_files] + + if frcmod_files is not None: + original_frcmod_files = [os.path.abspath(frcmod_file) for frcmod_file in frcmod_files] + local_frcmod_files = [os.path.basename(frcmod_file) for frcmod_file in frcmod_files] + + # Create Amber topology and coordinates files + print(f"{os.getcwd()=}") + with utils.temporary_directory(prefix='wk_preparation_', dir='.', clean=clean) as tmp_dir: + print(f"{os.getcwd()=}") + if lib_files is not None: + [shutil.copy2(olib_file, llib_file) for olib_file, llib_file in zip(original_lib_files, local_lib_files)] + + if frcmod_files is not None: + [shutil.copy2(ofrc_file, lfrc_file) for ofrc_file, lfrc_file in zip(original_frcmod_files, local_frcmod_files)] + + tleap_resids, tleap_chainids = _generate_resids_chainids_for_tleap(pdbfixer.parm) + _replace_resids_and_chainids(pdbfixer.parm, tleap_resids, tleap_chainids) + + try: + _write_pdb_file(pdb_clean_filename, pdbfixer.parm) + except: + error_msg = 'Could not write pdb file %s' % pdb_clean_filename + logger.error(error_msg) + raise + + # Generate topology/coordinates files + with open(tleap_input, 'w') as w: + content = _make_leap_template(pdbfixer.parm, final_ns_names, gaplist, sslist, + input_pdb=pdb_clean_filename, + prmtop=prmtop_filename, rst7=rst7_filename, + lib_files=local_lib_files, frcmod_files=local_frcmod_files) + w.write(content) + + try: + easy_call('tleap -s -f %s > %s' % (tleap_input, tleap_output), shell=True) + except RuntimeError: + error_msg = 'Could not generate topology/coordinates files with tleap' + logger.error(error_msg) + raise RuntimeError(error_msg) + + self._molecule = pmd.load_file(prmtop_filename, rst7_filename) + + # Add back resids, chainids and TER flags to molecule + _replace_resids_and_chainids(self._molecule, new_resids, new_chainids) + _add_ter_flags(self._molecule, ter_flags) + + def write_pdb_file(self, pdb_filename='protein.pdb'): + _write_pdb_file(pdb_filename, self._molecule) + + def write_pdbqt_file(self, pdbqt_filename='protein.pdbqt', amber_atom_types=False): + output_str = self.write_pdbqt_string(amber_atom_types) + with open(pdbqt_filename, 'w') as w: + w.write(output_str) + + def write_pdbqt_string(self, amber_atom_types=False): + if amber_atom_types: + return _write_pdbqt_string(self._molecule) + else: + molecule = _convert_amber_to_autodock_types(self._molecule) + return _write_pdbqt_string(molecule) diff --git a/waterkit/trajectory_utils.py b/waterkit/trajectory_utils.py new file mode 100644 index 0000000..eb63b72 --- /dev/null +++ b/waterkit/trajectory_utils.py @@ -0,0 +1,344 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# make_trajectory +# + +from __future__ import division +from __future__ import print_function +from __future__ import absolute_import + +import copy +import os +import re +import sys +from packaging.version import Version +from shutil import copyfile + +import numpy as np +import parmed as pmd +from openmm.unit import Quantity, picoseconds, kilocalories_per_mole, angstroms, nanometer, kelvin, kilocalories, mole +from openmm import CustomExternalForce, LangevinIntegrator, Platform +from openmm.app import AmberPrmtopFile, HBonds, CutoffNonPeriodic, Simulation +from parmed.amber import NetCDFTraj +from pdb4amber import pdb4amber +from pdb4amber.utils import easy_call + +from waterkit import utils + + +# region Make trajectory + +def max_water(water_filenames): + """Return the max number of water molecules seen + + Args: + water_filenames (array-like): list of filenames of the water files + + Return: + int: number of max water molecules + int: index of the file in the water filenames list + """ + sizes = [os.path.getsize(f) for f in water_filenames] + idx = np.argmax(sizes) + m = pmd.load_file(water_filenames[idx]) + # We select only the water molecules, because we might have ions, etc... + m = m["@O, H1, H2"] + max_water = len(m.residues) + return max_water, idx + + +def min_water(water_filenames): + """Return the min number of water molecules seen + + Args: + water_filenames (array-like): list of filenames of the water files + + Return: + int: number of min water molecules + int: index of the file in the water filenames list + """ + sizes = [os.path.getsize(f) for f in water_filenames] + idx = np.argmin(sizes) + m = pmd.load_file(water_filenames[idx]) + # We select only the water molecules, because we might have ions, etc... + m = m["@O, H1, H2"] + max_water = len(m.residues) + + return max_water, idx + + +def add_water_to_receptor(receptor, water): + receptor_wet = copy.deepcopy(receptor) + receptor_wet += water["@O, H1, H2"] + # ParmED really want a symmetry attributes to write the PDB file + receptor_wet.symmetry = None + + return receptor_wet + + +def write_pdb_file(output_name, molecule, overwrite=True, **kwargs): + '''Write PDB file + + Args: + output_name (str): pdbqt output filename + molecule (parmed): parmed molecule object + + ''' + try: + molecule.save(output_name, format='pdb', overwrite=overwrite, **kwargs) + except IOError: + raise IOError("Error: file %s already exists." % fname) + + +def write_tleap_input_file(fname, pdb_filename, lib_files=None, frcmod_files=None): + """Create tleap input script + + Args: + fname (str): tleap input filename + pdb_filename (str): pdb filename + lib_files (list): Amber lib parameter files for non-standard residues + frcmod_files (list): Amber frcmod parameter files for non-standard residues + + """ + prefix = pdb_filename.split(".pdb")[0].split("/")[-1] + + output_str = "source leaprc.protein.ff14SB\n" + output_str += "source leaprc.DNA.OL15\n" + output_str += "source leaprc.RNA.OL3\n" + output_str += "source leaprc.water.tip3p\n" + output_str += "source leaprc.gaff2\n" + if frcmod_files is not None: + output_str += ''.join(['loadamberparams %s\n' % fl for fl in frcmod_files]) + if lib_files is not None: + output_str += ''.join(['loadoff %s\n' % ll for ll in lib_files]) + output_str += "\n" + output_str += "x = loadpdb %s\n" % pdb_filename.split("/")[-1] + output_str += "\n" + output_str += "set default nocenter on\n" + output_str += "saveAmberParm x %s.prmtop %s.rst7\n" % (prefix, prefix) + output_str += "quit\n" + + with open(fname, "w") as w: + w.write(output_str) + + +def write_trajectory_file(fname, receptor, water_filenames): + """Create netcdf trajectory from the water pdb file + + Args: + fname (str): output name for the trajectory + receptor_filename (parmed): parmed receptor object + water_filenames (list): list of filenames of the water files + + """ + max_n_waters, idx = max_water(water_filenames) + min_n_waters, _ = min_water(water_filenames) + buffer_n_waters = max_n_waters - min_n_waters + max_n_water_atoms = max_n_waters * 3 + + n_atoms = receptor.coordinates.shape[0] + max_n_atoms = n_atoms + (max_n_waters * 3) + coordinates = np.zeros(shape=(max_n_atoms, 3)) + + # Boz dimension + box_center = np.mean(receptor.coordinates, axis=0) + + x_min, x_max = np.min(receptor.coordinates[:, 0]), np.max(receptor.coordinates[:, 0]) + y_min, y_max = np.min(receptor.coordinates[:, 1]), np.max(receptor.coordinates[:, 1]) + z_min, z_max = np.min(receptor.coordinates[:, 2]), np.max(receptor.coordinates[:, 2]) + box_size = np.max([np.abs(x_max - x_min), np.abs(y_max - y_min), np.abs(z_max - z_min)]) + 40 + + box = [box_size, box_size, box_size, 90, 90, 90] + + # Dummy water coordinates + dum_water_x = np.array([0, 0, 0]) + dum_water_y = np.array([0, 0.756, 0.586]) + dum_water_z = np.array([0, -0.756, 0.586]) + + radius = (box_size / 2.) - 2. + z = np.random.uniform(-radius, radius, buffer_n_waters) + p = np.random.uniform(0, np.pi * 2, buffer_n_waters) + x = np.sqrt(radius**2 - z**2) * np.cos(p) + y = np.sqrt(radius**2 - z**2) * np.sin(p) + oxygen_xyz = np.stack((x, y, z), axis=-1) + oxygen_xyz += box_center + + dummy_water_xyz = np.zeros(shape=(buffer_n_waters * 3, 3)) + dummy_water_xyz[0::3] = oxygen_xyz + dummy_water_xyz[1::3] = oxygen_xyz + dum_water_y + dummy_water_xyz[2::3] = oxygen_xyz + dum_water_z + + # Already add the coordinates from the receptor + coordinates[:n_atoms] = receptor.coordinates + + trj = NetCDFTraj.open_new(fname, natom=max_n_atoms, box=True, crds=True) + + for i, water_filename in enumerate(water_filenames): + m = pmd.load_file(water_filename) + + last_atom_id = len(m.residues) * 3 + water_xyz = m["@O, H1, H2"].coordinates + + # Get all the TIP3P water molecules + coordinates[n_atoms:n_atoms + last_atom_id] = water_xyz + # Add the dummy water molecules + coordinates[n_atoms + last_atom_id:] = dummy_water_xyz[:max_n_water_atoms - water_xyz.shape[0]] + + trj.add_coordinates(coordinates) + trj.add_box(box) + trj.add_time(i + 1) + + trj.close() + + +def _make_trajectory(receptor_dry, water_directory, output_prefix, lib_files, frcmod_files): + + tleap_input = 'leap.template.in' + tleap_output = 'leap.template.out' + tleap_log = 'leap.log' + + water_filenames = [] + for fname in os.listdir(water_directory): + if re.match(r"water_[0-9]{6}.pdb", fname): + water_filenames.append(os.path.join(water_directory, fname)) + + """ Add water molecules to the dry receptor and write pdb wet receptor + We are taking the water coordinates from the frame that have the + max number of water molecules. Because the number of water molecules + need to be constant during the trajectory. This is just for creating + the amber topology (and coordinate) file(s).""" + water = pmd.load_file(water_filenames[max_water(water_filenames)[1]]) + receptor_wet = add_water_to_receptor(receptor_dry, water) + pdb_fn = "%ssystem.pdb" % output_prefix + write_pdb_file(pdb_fn, receptor_wet) + + # Write tleap input script + write_tleap_input_file(tleap_input, pdb_fn, lib_files, frcmod_files) + + try: + # Generate amber prmtop and rst7 files + easy_call('tleap -s -f %s > %s' % (tleap_input, tleap_output), shell=True) + except RuntimeError: + error_msg = 'Could not generate topology/coordinates files with tleap.' + raise RuntimeError(error_msg) + + # Write trajectory + write_trajectory_file("%ssystem.nc" % output_prefix, receptor_dry, water_filenames) + + +def make_trajectory(receptor_dry, water_directory, output_prefix, lib_files, frcmod_files): + water_directory = os.path.abspath(water_directory) + if lib_files is not None: + lib_files = [os.path.abspath(fn) for fn in lib_files] + if frcmod_files is not None: + frcmod_files = [os.path.abspath(fn) for fn in frcmod_files] + abs_output_dir = os.path.abspath(os.path.dirname(output_prefix)) + prefix = output_prefix.split("/")[-1] # split internally in write_tleap_input_file() + with utils.temporary_directory(clean=True) as tmp_dir: + _make_trajectory(receptor_dry, water_directory, prefix,lib_files, frcmod_files) + copyfile("%ssystem.rst7" % prefix, os.path.join(abs_output_dir, "%ssystem.rst7" % prefix)) + copyfile("%ssystem.prmtop" % prefix, os.path.join(abs_output_dir, "%ssystem.prmtop" % prefix)) + copyfile("%ssystem.nc" % prefix, os.path.join(abs_output_dir, "%ssystem.nc" % prefix)) + copyfile("%ssystem.pdb" % prefix, os.path.join(abs_output_dir, "%ssystem.pdb" % prefix)) + +# endregion + +# region Minimize trajectory + +def _box_information(traj_filename): + box = None + + try: + traj = NetCDFTraj.open_old(traj_filename) + + if traj.hasbox: + box = traj.box[0] + + traj.close() + except FileNotFoundError as err: + print("Cannot find trajectory file: %s" % traj_filename) + raise err + + return box + + +class WaterMinimizer: + def __init__(self, n_steps=100, restraint=None, platform="OpenCL", verbose=True): + self._n_steps = n_steps + self._restraint = restraint + self._platform = platform + self._verbose = verbose + + def minimize_trajectory(self, prmtop_filename, traj_filename, output_filename): + nonbondedMethod = CutoffNonPeriodic + nonbondedCutoff = 9 * angstroms + rigidWater = True + constraints = HBonds + dt = 0.002 * picoseconds + temperature = 300 * kelvin + friction = 1.0 / picoseconds + K = self._restraint * kilocalories_per_mole / angstroms**2 + + # If someone still uses an old version of OpenMM + if Version(Platform.getOpenMMVersion()) > Version("7.5.0"): + tolerance = 1.0 * kilocalories / (nanometer * mole) + else: + tolerance = 1.0 * kilocalories_per_mole + + box = _box_information(traj_filename) + + platform = Platform.getPlatformByName(self._platform) + platformProperties = {'Precision': 'single'} + + prmtop = AmberPrmtopFile(prmtop_filename) + parmedtop = pmd.load_file(prmtop_filename) + old_traj = NetCDFTraj.open_old(traj_filename) + + n_atom = old_traj.atom + n_frame = old_traj.frame + + new_trj = NetCDFTraj.open_new(output_filename, natom=n_atom, box=True, crds=True) + + system = prmtop.createSystem(nonbondedMethod=nonbondedMethod, nonbondedCutoff=nonbondedCutoff, constraints=constraints) + + for i, coordinates in enumerate(old_traj.coordinates): + old_positions = Quantity(coordinates.tolist(), unit=angstroms) + + if self._restraint > 0 or self._restraint is not None: + # Add harmonic constraints on protein + force = CustomExternalForce("k * ((x-x0)^2 + (y-y0)^2 + (z-z0)^2)") + force.addGlobalParameter("k", K) + force.addPerParticleParameter("x0") + force.addPerParticleParameter("y0") + force.addPerParticleParameter("z0") + for atom in parmedtop.view['!@H= & !:WAT']: + force.addParticle(atom.idx, old_positions[atom.idx]) + force_idx = system.addForce(force) + + # Create simulation + integrator = LangevinIntegrator(temperature, friction, dt) + simulation = Simulation(prmtop.topology, system, integrator, platform) + simulation.context.setPositions(old_positions) + + # Minimize the water molecules + simulation.minimizeEnergy(maxIterations=self._n_steps, tolerance=tolerance) + # Get new positions and store in the new trajectory + new_positions = simulation.context.getState(getPositions=True).getPositions(asNumpy=True) + + new_trj.add_coordinates(new_positions) + new_trj.add_box(box) + new_trj.add_time(i + 1) + + if self._restraint > 0 or self._restraint is not None: + system.removeForce(force_idx) + + if (i % 100 == 0) and self._verbose: + sys.stdout.write("\rConformations minimized: %5.2f / 100 %%" % (float(i) / n_frame * 100.)) + sys.stdout.flush() + + new_trj.close() + old_traj.close() + +# endregion + diff --git a/waterkit/utils.py b/waterkit/utils.py index 2ac6689..e74d1bc 100644 --- a/waterkit/utils.py +++ b/waterkit/utils.py @@ -342,7 +342,7 @@ def is_writable(pathname): testfile = tempfile.NamedTemporaryFile(dir=pathname) testfile.close() except (PermissionError, FileNotFoundError) as e: - raise RuntimeError('Can write in directory %s.' % pathname) from e + raise RuntimeError('Can\'t write in directory %s.' % pathname) from e def execute_command(cmd_line): diff --git a/waterkit/wrap_waterkit_and_gist.py b/waterkit/wrap_waterkit_and_gist.py new file mode 100644 index 0000000..0af42ff --- /dev/null +++ b/waterkit/wrap_waterkit_and_gist.py @@ -0,0 +1,140 @@ +import numpy as np +import parmed + +from .molecule import Molecule +from .receptor_prep import PrepareReceptor +from .trajectory_utils import make_trajectory +from .trajectory_utils import WaterMinimizer +from .autogrid import calc_spherical_water_map +from .waterkit import WaterKit +from .utils import prepare_water_map +from .utils import temporary_directory +import os +import subprocess +import shutil + + +def run_waterkit_and_gist( + receptor_parmed_structure_or_filename, + output_prefix, + box_center, + box_size, + autogrid_exec_path, + ignore_gaps=False, + keep_hydrogen=False, + n_frames=10000, + n_layer=3, + n_jobs=-1, + platform="CUDA", +): + + if len(box_center) != 3 or len(box_size) != 3: + raise ValueError("length of box_size and box_center must be 3, but it's %d and %d" % ( + len(box_size), len(box_center))) + + size_types = set([type(c) for c in box_size]) + if len(size_types) != 1 or size_types.pop() not in (int, np.int64, np.int32): + raise ValueError("Size of box must be specified with integers (Angstroms), got %s" % box_size) + + if type(receptor_parmed_structure_or_filename) is str: + receptor_parmed_structure = parmed.load_file(receptor_parmed_structure_or_filename) + else: + receptor_parmed_structure = receptor_parmed_structure_or_filename + + autogrid_exec_path = os.path.abspath(autogrid_exec_path) + + # The following is a list of reasonable defaults that is probably not worth + # exposing to the outside. There are further options that individual classes + # expose, for example `no_disulfide` in PrepareReceptor. + receptor_spherical_water_map_filename = None + water_spherical_water_map_filename = None + water_model="tip3p" + temperature = 300.0 + renumber_receptor = False + minimization_restraint = 2.5 + nr_minimization_steps = 100 + clean = True + + # will eventually need to expose these for systems with custom parameters + lib_files = None + frcmod_files = None + + original_dir = os.getcwd() + output_dir = os.path.abspath(os.path.dirname(output_prefix)) + if not os.path.exists(output_dir): + os.makedirs(output_dir) + prefix = os.path.basename(output_prefix) + + with temporary_directory(clean=clean) as tmp_dir: + + print(tmp_dir) + + # renumbering=False unlike in wk_prepare_receptor.py + print("Preparing receptor") + pr = PrepareReceptor( + keep_hydrogen=keep_hydrogen, # True causes trouble with meeko's blunt ends + ignore_gaps=ignore_gaps, + renumbering=renumber_receptor, + ) + pr.prepare_from_parmed_structure(receptor_parmed_structure, clean=clean) + pdb_fn = "%sprepared.pdb" % prefix + pr.write_pdb_file(pdb_fn) + shutil.copyfile(pdb_fn, os.path.join(output_dir, pdb_fn)) + amber_pdbqt_str = pr.write_pdbqt_string(amber_atom_types=True) + + print("Generating waterkit water configurations") + receptor = Molecule.from_pdbqt_string(amber_pdbqt_str) + ad_map = calc_spherical_water_map( + autogrid_exec_path, + receptor, + box_center, + box_size, + receptor_spherical_water_map_filename, + ) + prepare_water_map(ad_map, water_model=water_model) + k = WaterKit( + temperature, + water_model, + water_spherical_water_map_filename, + n_layer, + n_frames, + n_jobs, + ) + wk_frames_dir = "traj" + os.mkdir(wk_frames_dir) + k.hydrate(receptor, ad_map, wk_frames_dir) + + print("Creating trajectory") + receptor_dry = parmed.load_file(pdb_fn) + make_trajectory(receptor_dry, wk_frames_dir, prefix, lib_files, frcmod_files) + + print("Minimizing trajectory") + prmtop_fn = "%ssystem.prmtop" % prefix + traj_fn = "%ssystem.nc" % prefix + minimized_traj_fn = "%ssystem_minimized.nc" % prefix + m = WaterMinimizer(nr_minimization_steps, minimization_restraint, platform) + m.minimize_trajectory(prmtop_fn, traj_fn, minimized_traj_fn) + + print("Running GIST") + gist_input = "" + gist_input += "parm %s\n" % prmtop_fn + gist_input += "trajin %s\n" % minimized_traj_fn + gist_input += "gist gridspacn 0.5 gridcntr %.3f %.3f %.3f griddim %d %d %d\n" % ( + box_center[0], box_center[1], box_center[2], + box_size[0]*2, box_size[1]*2, box_size[2]*2, + ) + gist_input += "go\n" + gist_input += "quit\n" + gist_input_fn = "gist_input.txt" + with open(gist_input_fn, "w") as w: + w.write(gist_input) + out = subprocess.run(["cpptraj", "-i", gist_input_fn], capture_output=True) + stdout = out.stdout.decode() + print(stdout) + + for key in ("gO", "Esw-dens", "Eww-dens", "dTStrans-dens", "dTSorient-dens"): + shutil.copyfile( + "gist-%s.dx" % key, + os.path.join(output_dir, "%swk_gist-%s.dx" % (prefix, key)), + ) + return