diff --git a/src/check_pdb.py b/src/check_pdb.py index 45a59fe..f371a73 100644 --- a/src/check_pdb.py +++ b/src/check_pdb.py @@ -33,8 +33,8 @@ def get_pdb_line_components(pdb_line: str) -> Dict[str, Union[int, float, str]]: pdb_line_terms: Dict[str, Union[int, float, str]] = {} if pdb_line: - if (pdb_line.split())[0] == "ATOM" or (pdb_line.split())[0] == "HETATM": - pdb_line_terms["RECORD TYPE"] = pdb_line[0:6].replace(" ", "") + if (pdb_line.split())[0] in ["ATOM", "HETATM"]: + pdb_line_terms["RECORD TYPE"] = pdb_line[:6].replace(" ", "") pdb_line_terms["ATOM SERIAL NUMBER"] = ( int(pdb_line[6:11].replace(" ", "")) if pdb_line[6:11].replace(" ", "") != "" @@ -82,7 +82,7 @@ def get_pdb_line_components(pdb_line: str) -> Dict[str, Union[int, float, str]]: pdb_line_terms["SEGMENT ID"] = pdb_line[72:76].replace(" ", "") pdb_line_terms["ELEMENT SYMBOL"] = pdb_line[76:78].replace(" ", "") elif (pdb_line.split())[0] == "TER": - pdb_line_terms["RECORD TYPE"] = pdb_line[0:6].replace(" ", "") + pdb_line_terms["RECORD TYPE"] = pdb_line[:6].replace(" ", "") try: pdb_line_terms["ATOM SERIAL NUMBER"] = int( pdb_line[6:11].replace(" ", "") @@ -100,7 +100,7 @@ def get_pdb_line_components(pdb_line: str) -> Dict[str, Union[int, float, str]]: except ValueError: pass elif (pdb_line.split())[0] == "CONECT": - pdb_line_terms["RECORD TYPE"] = pdb_line[0:6].replace(" ", "") + pdb_line_terms["RECORD TYPE"] = pdb_line[:6].replace(" ", "") pdb_line_terms["ATOM SERIAL NUMBER"] = pdb_line[6:11].replace(" ", "") pdb_line_terms["BONDED ATOM SERIAL NUMBER"] = pdb_line[12:].strip("\n") @@ -111,10 +111,7 @@ def assemble_pdb_line_components( pdb_line_components: Dict[str, Union[int, float, str]] ) -> Optional[str]: # Length of an ATOM line should have 78 characters - if ( - pdb_line_components["RECORD TYPE"] == "ATOM" - or pdb_line_components["RECORD TYPE"] == "HETATM" - ): + if pdb_line_components["RECORD TYPE"] in ["ATOM", "HETATM"]: for key in [ "X COORDINATE", "Y COORDINATE", @@ -170,7 +167,7 @@ def get_missing_terms(pdb_line_terms: Dict[str, Union[int, float, str]]) -> List return [ key - for key in pdb_line_terms.keys() + for key in pdb_line_terms if pdb_line_terms[key] == "" and key in important_columns ] diff --git a/src/gmx_ops.py b/src/gmx_ops.py index 2aa35b4..e7a3343 100644 --- a/src/gmx_ops.py +++ b/src/gmx_ops.py @@ -71,17 +71,14 @@ def combine_prot_lig_gro( root_dir: str, ligand_id: str, protein_pdb: str = "PROTEIN" ) -> List[str]: - complex_lines: list[str] = [] with open(f"{root_dir}/{protein_pdb}_processed.gro", "r") as protein_f: protein_lines = protein_f.readlines() with open( f"{root_dir}/{ligand_id}_fix.acpype/{ligand_id}_fix_GMX.gro", "r" ) as ligand_f: ligand_lines = ligand_f.readlines() - for line in protein_lines[2:-1]: - complex_lines.append(line) - for line in ligand_lines[2:-1]: - complex_lines.append(line) + complex_lines: list[str] = list(protein_lines[2:-1]) + complex_lines.extend(iter(ligand_lines[2:-1])) complex_lines.insert(0, protein_lines[0]) complex_lines.insert(1, f"{len(complex_lines) - 1}\n") complex_lines.append(protein_lines[-1]) @@ -190,100 +187,100 @@ def write_mdp_file(self) -> Dict[str, Any]: sim_types = ["ions", "em", "nvt", "npt", "md"] - default_params_for_ions_and_em: Dict[str, Any] = { - "integrator": self.integrator, - "nsteps": self.nsteps, - "emtol": self.emtol, - "emstep": self.emstep, - "cutoff_scheme": self.cutoff_scheme, - "nstlist": self.nstlist, - "pbc": self.pbc, - "ns_type": self.ns_type, - "rlist": self.rlist, - "coulombtype": self.coulombtype, - "rcoulomb": self.rcoulomb, - "rvdw": self.rvdw, - } - - default_params_for_nvt_npt_md: Dict[str, Any] = { - "integrator": self.integrator, - "dt": self.dt, - "nsteps": self.nsteps, - "nstlog": self.nstlog, - "nstenergy": self.nstenergy, - "nstxout_compressed": self.nstxout_compressed, - "cutoff_scheme": self.cutoff_scheme, - "nstlist": self.nstlist, - "pbc": self.pbc, - "ns_type": self.ns_type, - "rlist": self.rlist, - "coulombtype": self.coulombtype, - "rcoulomb": self.rcoulomb, - "vdwtype": self.vdwtype, - "vdw_modifier": self.vdw_modifier, - "rvdw_switch": self.rvdw_switch, - "rvdw": self.rvdw, - "DispCorr": self.DispCorr, - "fourierspacing": self.fourierspacing, - "pme_order": self.pme_order, - "tcoupl": self.tcoupl, - "tc_grps": self.tc_grps, - "tau_t": self.tau_t, - "ref_t": self.ref_t, - "pcoupl": self.pcoupl, - "gen_vel": self.gen_vel, - "constraints": self.constraints, - "constraint_algorithm": self.constraint_algorithm, - "continuation": self.continuation, - "lincs_order": self.lincs_order, - "lincs_iter": self.lincs_iter, - } - - if self.sim_type in sim_types and self.sim_type == "ions": - return default_params_for_ions_and_em - - elif self.sim_type in sim_types and self.sim_type == "em": - - additional_params: Dict[str, Any] = { + if self.sim_type in sim_types: + default_params_for_ions_and_em: Dict[str, Any] = { + "integrator": self.integrator, + "nsteps": self.nsteps, + "emtol": self.emtol, + "emstep": self.emstep, + "cutoff_scheme": self.cutoff_scheme, + "nstlist": self.nstlist, + "pbc": self.pbc, + "ns_type": self.ns_type, + "rlist": self.rlist, + "coulombtype": self.coulombtype, + "rcoulomb": self.rcoulomb, + "rvdw": self.rvdw, + } + + default_params_for_nvt_npt_md: Dict[str, Any] = { + "integrator": self.integrator, + "dt": self.dt, + "nsteps": self.nsteps, + "nstlog": self.nstlog, + "nstenergy": self.nstenergy, + "nstxout_compressed": self.nstxout_compressed, + "cutoff_scheme": self.cutoff_scheme, + "nstlist": self.nstlist, + "pbc": self.pbc, + "ns_type": self.ns_type, + "rlist": self.rlist, + "coulombtype": self.coulombtype, + "rcoulomb": self.rcoulomb, "vdwtype": self.vdwtype, "vdw_modifier": self.vdw_modifier, "rvdw_switch": self.rvdw_switch, + "rvdw": self.rvdw, "DispCorr": self.DispCorr, + "fourierspacing": self.fourierspacing, + "pme_order": self.pme_order, + "tcoupl": self.tcoupl, + "tc_grps": self.tc_grps, + "tau_t": self.tau_t, + "ref_t": self.ref_t, + "pcoupl": self.pcoupl, + "gen_vel": self.gen_vel, + "constraints": self.constraints, + "constraint_algorithm": self.constraint_algorithm, + "continuation": self.continuation, + "lincs_order": self.lincs_order, + "lincs_iter": self.lincs_iter, } - return default_params_for_ions_and_em | additional_params # type: ignore - - elif self.sim_type in sim_types and self.sim_type == "nvt": - additional_params: Dict[str, Any] = { - "define": self.define, - "gen_temp": self.gen_temp, - "gen_seed": self.gen_seed, - } - - return default_params_for_nvt_npt_md | additional_params # type: ignore - - elif self.sim_type in sim_types and self.sim_type == "npt": - additional_params: Dict[str, Any] = { - "define": self.define, - "pcoupltype": self.pcoupltype, - "tau_p": self.tau_p, - "compressibility": self.compressibility, - "ref_p": self.ref_p, - "refcoord_scaling": self.refcoord_scaling, - } - - return default_params_for_nvt_npt_md | additional_params # type: ignore - - elif self.sim_type in sim_types and self.sim_type == "md": - additional_params: Dict[str, Any] = { - "pcoupltype": self.pcoupltype, - "tau_p": self.tau_p, - "compressibility": self.compressibility, - "ref_p": self.ref_p, - # "freezegrps": self.freezegrps, - # "freezedim": self.freezedim, - } - - return default_params_for_nvt_npt_md | additional_params # type: ignore + if self.sim_type == "em": + additional_params: Dict[str, Any] = { + "vdwtype": self.vdwtype, + "vdw_modifier": self.vdw_modifier, + "rvdw_switch": self.rvdw_switch, + "DispCorr": self.DispCorr, + } + + return default_params_for_ions_and_em | additional_params # type: ignore + + elif self.sim_type == "ions": + return default_params_for_ions_and_em + + elif self.sim_type == "md": + additional_params: Dict[str, Any] = { + "pcoupltype": self.pcoupltype, + "tau_p": self.tau_p, + "compressibility": self.compressibility, + "ref_p": self.ref_p, + # "freezegrps": self.freezegrps, + # "freezedim": self.freezedim, + } + + return default_params_for_nvt_npt_md | additional_params # type: ignore + + elif self.sim_type == "npt": + additional_params: Dict[str, Any] = { + "define": self.define, + "pcoupltype": self.pcoupltype, + "tau_p": self.tau_p, + "compressibility": self.compressibility, + "ref_p": self.ref_p, + "refcoord_scaling": self.refcoord_scaling, + } + + return default_params_for_nvt_npt_md | additional_params # type: ignore + + elif self.sim_type == "nvt": + additional_params: Dict[str, Any] = { + "define": self.define, + "gen_temp": self.gen_temp, + "gen_seed": self.gen_seed, + } + + return default_params_for_nvt_npt_md | additional_params # type: ignore return {} diff --git a/src/main_BAK.py b/src/main_BAK.py index 7743735..71ef189 100644 --- a/src/main_BAK.py +++ b/src/main_BAK.py @@ -89,7 +89,7 @@ def clean_pdb(pdb_type: str, pdb_contents: List[str]) -> List[str]: def mdp_type(pdb_type: str, simulation_type: str, ligid: str) -> Any: if simulation_type == "ions": - mdp = MDP( + return MDP( sim_type=simulation_type, integrator="steep", nsteps=50000, @@ -99,21 +99,15 @@ def mdp_type(pdb_type: str, simulation_type: str, ligid: str) -> Any: rcoulomb=1.0, rvdw=1.0, ) - - return mdp - elif simulation_type == "em": - mdp = MDP( + return MDP( sim_type=simulation_type, integrator="steep", nsteps=50000, nstlist=1, ) - - return mdp - elif simulation_type == "nvt" and pdb_type == "COMPLEX": - mdp = MDP( + return MDP( sim_type=simulation_type, nsteps=500000, # nsteps=50000, @@ -127,11 +121,8 @@ def mdp_type(pdb_type: str, simulation_type: str, ligid: str) -> Any: pcoupl="no", gen_vel="yes", ) - - return mdp - elif simulation_type == "nvt" and pdb_type == "PROTEIN": - mdp = MDP( + return MDP( sim_type=simulation_type, nsteps=500000, # nsteps=50000, @@ -145,11 +136,8 @@ def mdp_type(pdb_type: str, simulation_type: str, ligid: str) -> Any: pcoupl="no", gen_vel="yes", ) - - return mdp - elif simulation_type == "nvt" and pdb_type == "LIGAND": - mdp = MDP( + return MDP( sim_type=simulation_type, nsteps=500000, # nsteps=50000, @@ -163,11 +151,8 @@ def mdp_type(pdb_type: str, simulation_type: str, ligid: str) -> Any: pcoupl="no", gen_vel="yes", ) - - return mdp - elif simulation_type == "npt" and pdb_type == "COMPLEX": - mdp = MDP( + return MDP( sim_type=simulation_type, nsteps=2500000, # nsteps=50000, @@ -177,11 +162,8 @@ def mdp_type(pdb_type: str, simulation_type: str, ligid: str) -> Any: tc_grps=f"Protein_{ligid} Water_and_ions", pcoupl="C-rescale", ) - - return mdp - elif simulation_type == "npt" and pdb_type == "PROTEIN": - mdp = MDP( + return MDP( sim_type=simulation_type, nsteps=2500000, # nsteps=50000, @@ -191,11 +173,8 @@ def mdp_type(pdb_type: str, simulation_type: str, ligid: str) -> Any: tc_grps="Protein Water_and_ions", pcoupl="C-rescale", ) - - return mdp - elif simulation_type == "npt" and pdb_type == "LIGAND": - mdp = MDP( + return MDP( sim_type=simulation_type, nsteps=2500000, # nsteps=50000, @@ -205,21 +184,15 @@ def mdp_type(pdb_type: str, simulation_type: str, ligid: str) -> Any: tc_grps="System", pcoupl="C-rescale", ) - - return mdp - elif simulation_type == "md" and pdb_type == "COMPLEX": - mdp = MDP( + return MDP( sim_type=simulation_type, nsteps=5000000, # nsteps=100000, tc_grps=f"Protein_{ligid} Water_and_ions", ) - - return mdp - elif simulation_type == "md" and pdb_type == "PROTEIN": - mdp = MDP( + return MDP( sim_type=simulation_type, nsteps=500000000, # nsteps=100000, @@ -227,19 +200,14 @@ def mdp_type(pdb_type: str, simulation_type: str, ligid: str) -> Any: # freezegrps="Protein", # freezedim="Y Y Y", ) - - return mdp - elif simulation_type == "md" and pdb_type == "LIGAND": - mdp = MDP( + return MDP( sim_type=simulation_type, nsteps=5000000, # nsteps=100000, tc_grps="System", ) - return mdp - def calc_freeBE(production_md_output: Path, fit: Any, root_dir: str): # Calculate Binding Free Energy @@ -331,9 +299,7 @@ def main(pdb_file: Path, gpu_id: int, output_dir: Path) -> Any: pdb_type_dict["COMPLEX"] = [] ligand_atom_numbers: list[int] = [] - for line in pdb_contents: - ligand_pdb_contents.append(line) - + ligand_pdb_contents.extend(iter(pdb_contents)) elif pdb_type == "PROTEIN": pdb_type_dict["COMPLEX"] = [] protein_pdb_contents = pdb_contents diff --git a/src/protprep.py b/src/protprep.py index fd12f0c..a4f04fe 100644 --- a/src/protprep.py +++ b/src/protprep.py @@ -105,15 +105,15 @@ def delete_hydrogens(pdb_contents: List[str]) -> List[str]: :return: list of strings representing a PDB file with all hydrogen atoms removed """ - edited_pdb_contents: list[str] = [] - for line in pdb_contents: + edited_pdb_contents: list[str] = [ + line + for line in pdb_contents if ( line.split()[0] in ["ATOM", "HETATM"] and line.split()[-1] != "H" or line.split()[0] == "CONECT" - ): - edited_pdb_contents.append(line) - + ) + ] return edited_pdb_contents @@ -161,17 +161,16 @@ def assign_chain_ids(pdb_contents: List[str]) -> List[str]: pdb_line_components: Optional[ Dict[str, Union[int, float, str]] ] = get_pdb_line_components(line) - if line.startswith("CONECT") is False: - if ( - int(pdb_line_components["RESIDUE SEQ NUMBER"]) - > missing_residue_numbers[i] - and int(pdb_line_components["RESIDUE SEQ NUMBER"]) - < missing_residue_numbers[i + 1] - ): - pdb_line_components["CHAIN ID"] = string.ascii_uppercase[i] - edited_pdb_contents.append( - str(assemble_pdb_line_components(pdb_line_components)) - ) + if line.startswith("CONECT") is False and ( + int(pdb_line_components["RESIDUE SEQ NUMBER"]) + > missing_residue_numbers[i] + and int(pdb_line_components["RESIDUE SEQ NUMBER"]) + < missing_residue_numbers[i + 1] + ): + pdb_line_components["CHAIN ID"] = string.ascii_uppercase[i] + edited_pdb_contents.append( + str(assemble_pdb_line_components(pdb_line_components)) + ) return edited_pdb_contents @@ -229,13 +228,11 @@ def get_bound_ligand(pdb_id: str, pdb_contents: List[str]) -> Tuple[str, List[st "comp_id" ] - bound_ligand_pdb: list[str] = [] - - for line in pdb_contents: - if "HETATM" in line and bound_ligand_pdbid in line: - bound_ligand_pdb.append(line) - else: - pass + bound_ligand_pdb: list[str] = [ + line + for line in pdb_contents + if "HETATM" in line and bound_ligand_pdbid in line + ] return bound_ligand_pdbid, bound_ligand_pdb except TypeError: @@ -246,32 +243,30 @@ def get_bound_ligand(pdb_id: str, pdb_contents: List[str]) -> Tuple[str, List[st hetero_resname: list[str] = [] for line in pdb_contents: pdb_terms_dict = get_pdb_line_components(line) - if pdb_terms_dict["RECORD TYPE"] == "HETATM": + if ( + pdb_terms_dict["RECORD TYPE"] != "HETATM" + and pdb_terms_dict["RECORD TYPE"] == "ATOM" + and pdb_terms_dict["RESIDUE NAME"] + not in amino_acids_3_letter_codes + or pdb_terms_dict["RECORD TYPE"] == "HETATM" + ): hetero_resname.append(str(pdb_terms_dict["RESIDUE NAME"])) - elif pdb_terms_dict["RECORD TYPE"] == "ATOM": - if pdb_terms_dict["RESIDUE NAME"] not in amino_acids_3_letter_codes: - hetero_resname.append(str(pdb_terms_dict["RESIDUE NAME"])) hetero_resname = list(set(hetero_resname)) - choices: Dict[int, str] = {} if len(hetero_resname) > 1: - i = 1 - for resname in hetero_resname: - choices[i] = resname - i += 1 - + choices: Dict[int, str] = dict(enumerate(hetero_resname, start=1)) for key, value in choices.items(): print(f"{key}. {value}") bound_ligand_pdbid = choices[int(input("Choose Bound Ligand: "))] - bound_ligand_pdb: list[str] = [] - - for line in pdb_contents: - if "HETATM" in line or "ATOM" in line and bound_ligand_pdbid in line: - bound_ligand_pdb.append(line) - else: - pass + bound_ligand_pdb: list[str] = [ + line + for line in pdb_contents + if "HETATM" in line + or "ATOM" in line + and bound_ligand_pdbid in line + ] dehydrogenated_ligand: list[str] = bound_ligand_pdb | delete_hydrogens # type: ignore @@ -280,13 +275,13 @@ def get_bound_ligand(pdb_id: str, pdb_contents: List[str]) -> Tuple[str, List[st elif len(hetero_resname) == 1: bound_ligand_pdbid = hetero_resname[0] - bound_ligand_pdb: list[str] = [] - - for line in pdb_contents: - if "HETATM" in line or "ATOM" in line and bound_ligand_pdbid in line: - bound_ligand_pdb.append(line) - else: - pass + bound_ligand_pdb: list[str] = [ + line + for line in pdb_contents + if "HETATM" in line + or "ATOM" in line + and bound_ligand_pdbid in line + ] dehydrogenated_ligand: list[str] = bound_ligand_pdb | delete_hydrogens # type: ignore diff --git a/src/utils.py b/src/utils.py index c38c04b..8b22e2e 100644 --- a/src/utils.py +++ b/src/utils.py @@ -102,21 +102,19 @@ def download_pdb(pdb_id: str = "", pdb_file: Path = Path("")) -> List[str]: ) ).stdout.split("\n") - elif pdb_id == "" and pdb_file != Path(""): + elif not pdb_id and pdb_file != Path(""): with open(pdb_file, "r") as pdb_f: pdb_contents: list[str] = pdb_f.readlines() else: sys.exit("Please specify ONLY the PDB ID or the PDB FILE PATH!") - cleaned_pdb_contents = [ + return [ line for line in pdb_contents if len(line) != 0 and (line.split())[0] in record_types ] - return cleaned_pdb_contents - def checkConsecutive(lst: List[int]) -> List[int]: """ @@ -146,27 +144,24 @@ def checkConsecutive(lst: List[int]) -> List[int]: key_missing_numbers.insert(0, int("0")) key_missing_numbers.append(int("9999")) except IndexError: - key_missing_numbers.append(int("0")) - key_missing_numbers.append(int("9999")) - + key_missing_numbers.extend((int("0"), int("9999"))) return key_missing_numbers def get_lines(lines_lst: List[str], first_line: str = "") -> List[str]: output_lst: list[str] = [] for line_idx in range(len(lines_lst)): - if first_line != "": - if first_line in lines_lst[line_idx]: - output_lst.append(lines_lst[line_idx]) - try: - while len(lines_lst[line_idx]) != 0: - line_idx += 1 - if len(lines_lst[line_idx]) != 0: - output_lst.append(lines_lst[line_idx]) - else: - break - except IndexError: - pass + if first_line != "" and first_line in lines_lst[line_idx]: + output_lst.append(lines_lst[line_idx]) + try: + while len(lines_lst[line_idx]) != 0: + line_idx += 1 + if len(lines_lst[line_idx]) != 0: + output_lst.append(lines_lst[line_idx]) + else: + break + except IndexError: + pass return output_lst @@ -209,7 +204,7 @@ def read_topol(pdb_type: str, topol_file: Path, ligid: str) -> Dict[str, List[st topol_contents["SYSTEM"] = get_lines(lines, "[ system ]") topol_contents["MOLECULES"] = get_lines(lines, "[ molecules ]") - if pdb_type == "COMPLEX" or pdb_type == "PROTEIN": + if pdb_type in {"COMPLEX", "PROTEIN"}: topol_contents["FORCEFIELD PARAMS"] = get_lines( lines, "; Include forcefield parameters" ) @@ -243,7 +238,7 @@ def read_topol(pdb_type: str, topol_file: Path, ligid: str) -> Dict[str, List[st '#include "amber99sb.ff/ions.itp"', ] - if pdb_type == "COMPLEX" or pdb_type == "LIGAND": + if pdb_type in {"COMPLEX", "LIGAND"}: topol_contents["LIGAND TOPOLOGY"] = [ "; Include ligand topology", f'#include "{ligid}_fix.acpype/{ligid}_fix_GMX.itp"', @@ -258,8 +253,7 @@ def read_topol(pdb_type: str, topol_file: Path, ligid: str) -> Dict[str, List[st flat_lst: list[Any] = [] for line in topol_contents["MOLECULES"]: terms = line.split() - for term in terms: - flat_lst.append(term) + flat_lst.extend(iter(terms)) if f"{ligid}_fix" not in flat_lst: topol_contents["MOLECULES"].append(f"{ligid}_fix 1") @@ -278,9 +272,7 @@ def write_prm_file(lig_itp_file: Path) -> List[str]: with open(lig_itp_file, "r") as lig_itp_f: lines = lig_itp_f.readlines() - lig_itp_contents = lines[0:1] + lines[10:] - - return lig_itp_contents + return lines[:1] + lines[10:] def check_pdb_type(pdb_contents: List[str]) -> str: @@ -325,15 +317,8 @@ def check_pdb_type(pdb_contents: List[str]) -> str: if check: return "PROTEIN" - else: - count = 0 - for item in resname_lst: - if item in amino_acids_3_letter_codes: - count += 1 - if count == 0: - return "LIGAND" - else: - return "COMPLEX" + count = sum(item in amino_acids_3_letter_codes for item in resname_lst) + return "LIGAND" if count == 0 else "COMPLEX" def calc_net_charge(pdb_contents: List[str]) -> int: