Skip to content

Selection of Nter, Cter and 5'end states at topology generation #1273

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 12 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
44 changes: 44 additions & 0 deletions integration_tests/golden_data/mini_dna.pdb
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
ATOM 1 P DA B 1 0.315 9.360 -1.520 1.00 0.00 B
ATOM 2 O1P DA B 1 0.321 10.697 -2.151 1.00 0.00 B
ATOM 3 O2P DA B 1 1.317 9.141 -0.451 1.00 0.00 B
ATOM 4 O5' DA B 1 -1.147 9.038 -0.954 1.00 0.00 B
ATOM 5 C5' DA B 1 -2.123 8.457 -1.844 1.00 0.00 B
ATOM 6 C4' DA B 1 -2.867 7.341 -1.136 1.00 0.00 B
ATOM 7 O4' DA B 1 -2.198 6.055 -1.270 1.00 0.00 B
ATOM 8 C3' DA B 1 -3.039 7.508 0.371 1.00 0.00 B
ATOM 9 O3' DA B 1 -4.314 7.002 0.746 1.00 0.00 B
ATOM 10 C2' DA B 1 -1.961 6.613 0.982 1.00 0.00 B
ATOM 11 C1' DA B 1 -2.118 5.453 0.006 1.00 0.00 B
ATOM 12 N9 DA B 1 -0.986 4.531 0.008 1.00 0.00 B
ATOM 13 C8 DA B 1 0.352 4.845 0.001 1.00 0.00 B
ATOM 14 N7 DA B 1 1.139 3.798 0.003 1.00 0.00 B
ATOM 15 C5 DA B 1 0.263 2.721 0.002 1.00 0.00 B
ATOM 16 C6 DA B 1 0.472 1.332 0.003 1.00 0.00 B
ATOM 17 N6 DA B 1 1.681 0.764 -0.003 1.00 0.00 B
ATOM 18 N1 DA B 1 -0.618 0.534 0.002 1.00 0.00 B
ATOM 19 C2 DA B 1 -1.828 1.103 0.008 1.00 0.00 B
ATOM 20 N3 DA B 1 -2.154 2.393 0.007 1.00 0.00 B
ATOM 21 C4 DA B 1 -1.050 3.159 0.008 1.00 0.00 B
ATOM 22 P DG B 2 -5.217 7.755 1.698 1.00 0.00 B
ATOM 23 O1P DG B 2 -5.968 8.851 1.048 1.00 0.00 B
ATOM 24 O2P DG B 2 -4.336 8.150 2.824 1.00 0.00 B
ATOM 25 O5' DG B 2 -6.237 6.624 2.192 1.00 0.00 B
ATOM 26 C5' DG B 2 -6.636 5.596 1.263 1.00 0.00 B
ATOM 27 C4' DG B 2 -6.616 4.244 1.945 1.00 0.00 B
ATOM 28 O4' DG B 2 -5.311 3.601 1.866 1.00 0.00 B
ATOM 29 C3' DG B 2 -6.931 4.251 3.441 1.00 0.00 B
ATOM 30 O3' DG B 2 -7.682 3.085 3.764 1.00 0.00 B
ATOM 31 C2' DG B 2 -5.567 4.151 4.125 1.00 0.00 B
ATOM 32 C1' DG B 2 -4.959 3.140 3.162 1.00 0.00 B
ATOM 33 N9 DG B 2 -3.504 3.061 3.233 1.00 0.00 B
ATOM 34 C8 DG B 2 -2.619 4.111 3.298 1.00 0.00 B
ATOM 35 N7 DG B 2 -1.373 3.727 3.353 1.00 0.00 B
ATOM 36 C5 DG B 2 -1.437 2.340 3.332 1.00 0.00 B
ATOM 37 C6 DG B 2 -0.403 1.370 3.368 1.00 0.00 B
ATOM 38 O6 DG B 2 0.820 1.550 3.431 1.00 0.00 B
ATOM 39 N1 DG B 2 -0.911 0.077 3.320 1.00 0.00 B
ATOM 40 C2 DG B 2 -2.244 -0.243 3.241 1.00 0.00 B
ATOM 41 N2 DG B 2 -2.536 -1.553 3.207 1.00 0.00 B
ATOM 42 N3 DG B 2 -3.218 0.650 3.211 1.00 0.00 B
ATOM 43 C4 DG B 2 -2.747 1.914 3.254 1.00 0.00 B
END
190 changes: 186 additions & 4 deletions integration_tests/test_topoaa.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
import tempfile
import copy
from pathlib import Path

import pytest
Expand All @@ -20,17 +21,52 @@ def topoaa_module():
yield topoaa


def extract_nter_cter(fpath) -> tuple[str, str]:
"""Helper function to extract nter and cter residues as strings."""
nter, cter = "", ""
nter_resi, cter_resi = None, None
# Read file
with open(fpath, "r") as fin:
# Loop over lines
for _ in fin:
# Make sure we are looking at coordinates
if _.startswith(("ATOM", "HETATM")):
# Extract residue id
resid = _[22:26].strip()
# First residue should be Nter one
if nter_resi is None:
nter_resi = resid
# Last residue should be Cter one
if cter_resi is None:
cter_resi = resid
else:
# Reset last residue to current residue
# until we cannot do this anymore
# should result in extracting last residue
if cter_resi != resid:
# Reset cter residue to empty string
cter = ""
cter_resi = resid
# Increment first residue
if resid == nter_resi:
nter += _
# Increment last residue
cter += _
return nter, cter


def test_topoaa_module_protein(topoaa_module):
"""Topoaa module with protein-protein input"""
topoaa_module.params["molecules"] = [
Path(GOLDEN_DATA, "e2aP_1F3G.pdb"),
Path(GOLDEN_DATA, "hpr_ensemble.pdb"),
]
topoaa_module.params["mol1"] = {"prot_segid": "A"}
topoaa_module.params["mol2"] = {"prot_segid": "B"}
topoaa_module.params["mol1"]["prot_segid"] = "A"
# Create mol2 parameters by copying the ones found for mol1
topoaa_module.params["mol2"] = copy.deepcopy(topoaa_module.params["mol1"])
topoaa_module.params["mol2"]["prot_segid"] = "B"
topoaa_module.params["cns_exec"] = CNS_EXEC
topoaa_module.params["debug"] = True

topoaa_module.run()

expected_inp = Path(topoaa_module.path, "e2aP_1F3G.inp")
Expand Down Expand Up @@ -94,7 +130,7 @@ def test_topoaa_cyclic(topoaa_module):
]
topoaa_module.params["cyclicpept_dist"] = 3.5
topoaa_module.params["disulphide_dist"] = 4.0
topoaa_module.params["mol1"] = {"cyclicpept": True}
topoaa_module.params["mol1"]["cyclicpept"] = True
topoaa_module.params["cns_exec"] = CNS_EXEC
topoaa_module.params["debug"] = True

Expand All @@ -115,3 +151,149 @@ def test_topoaa_cyclic(topoaa_module):

assert "detected" in file_content
assert "disulphide" in file_content


def test_topoaa_module_protein_noCter(topoaa_module):
"""Topoaa module with uncharged Cter and charged Nter."""
topoaa_module.params["molecules"] = [
Path(GOLDEN_DATA, "2oob_A.pdb"),
]
topoaa_module.params["mol1"]["charged_nter"] = True
topoaa_module.params["mol1"]["charged_cter"] = False
topoaa_module.params["cns_exec"] = CNS_EXEC
topoaa_module.params["debug"] = True
topoaa_module.run()

expected_inp = Path(topoaa_module.path, "2oob_A.inp")
expected_psf = Path(topoaa_module.path, "2oob_A_haddock.psf")
expected_pdb = Path(topoaa_module.path, "2oob_A_haddock.pdb")
expected_gz = Path(topoaa_module.path, "2oob_A.out.gz")
assert expected_inp.exists()
assert expected_psf.exists()
assert expected_pdb.exists()
assert expected_gz.exists()

nter, cter = extract_nter_cter(expected_pdb)
assert "OXT" not in cter
assert all([nh in nter for nh in ("HT1", "HT2", "HT3",)])


def test_topoaa_module_protein_noNter(topoaa_module):
"""Topoaa module with charged Cter and uncharged Nter."""
topoaa_module.params["molecules"] = [
Path(GOLDEN_DATA, "2oob_A.pdb"),
]
topoaa_module.params["mol1"]["charged_nter"] = False
topoaa_module.params["mol1"]["charged_cter"] = True
topoaa_module.params["cns_exec"] = CNS_EXEC
topoaa_module.params["debug"] = True
topoaa_module.run()

expected_inp = Path(topoaa_module.path, "2oob_A.inp")
expected_psf = Path(topoaa_module.path, "2oob_A_haddock.psf")
expected_pdb = Path(topoaa_module.path, "2oob_A_haddock.pdb")
expected_gz = Path(topoaa_module.path, "2oob_A.out.gz")
assert expected_inp.exists()
assert expected_psf.exists()
assert expected_pdb.exists()
assert expected_gz.exists()

nter, cter = extract_nter_cter(expected_pdb)
assert "OXT" in cter
assert not any([nh in nter for nh in ("HT1", "HT2", "HT3",)])

def test_topoaa_module_protein_noter(topoaa_module):
"""Topoaa module without charged termini."""
topoaa_module.params["molecules"] = [
Path(GOLDEN_DATA, "2oob_A.pdb"),
]
topoaa_module.params["mol1"]["charged_nter"] = False
topoaa_module.params["mol1"]["charged_cter"] = False
topoaa_module.params["cns_exec"] = CNS_EXEC
topoaa_module.params["debug"] = True
topoaa_module.run()

expected_inp = Path(topoaa_module.path, "2oob_A.inp")
expected_psf = Path(topoaa_module.path, "2oob_A_haddock.psf")
expected_pdb = Path(topoaa_module.path, "2oob_A_haddock.pdb")
expected_gz = Path(topoaa_module.path, "2oob_A.out.gz")
assert expected_inp.exists()
assert expected_psf.exists()
assert expected_pdb.exists()
assert expected_gz.exists()

nter, cter = extract_nter_cter(expected_pdb)
assert "OXT" not in cter
assert not any([nh in nter for nh in ("HT1", "HT2", "HT3",)])


def test_topoaa_module_protein_charged_ters(topoaa_module):
"""Topoaa module with charged termini."""
topoaa_module.params["molecules"] = [
Path(GOLDEN_DATA, "2oob_A.pdb"),
]
topoaa_module.params["mol1"]["charged_nter"] = True
topoaa_module.params["mol1"]["charged_cter"] = True
topoaa_module.params["cns_exec"] = CNS_EXEC
topoaa_module.params["debug"] = True
topoaa_module.run()

expected_inp = Path(topoaa_module.path, "2oob_A.inp")
expected_psf = Path(topoaa_module.path, "2oob_A_haddock.psf")
expected_pdb = Path(topoaa_module.path, "2oob_A_haddock.pdb")
expected_gz = Path(topoaa_module.path, "2oob_A.out.gz")
assert expected_inp.exists()
assert expected_psf.exists()
assert expected_pdb.exists()
assert expected_gz.exists()

nter, cter = extract_nter_cter(expected_pdb)
assert "OXT" in cter
assert all([nh in nter for nh in ("HT1", "HT2", "HT3",)])


def test_topoaa_module_dna_5_phosphate(topoaa_module):
"""Topoaa module with charged termini."""
topoaa_module.params["molecules"] = [
Path(GOLDEN_DATA, "mini_dna.pdb"),
]
topoaa_module.params["mol1"]["5_phosphate"] = True
topoaa_module.params["cns_exec"] = CNS_EXEC
topoaa_module.params["debug"] = True
topoaa_module.run()

expected_inp = Path(topoaa_module.path, "mini_dna.inp")
expected_psf = Path(topoaa_module.path, "mini_dna_haddock.psf")
expected_pdb = Path(topoaa_module.path, "mini_dna_haddock.pdb")
expected_gz = Path(topoaa_module.path, "mini_dna.out.gz")
assert expected_inp.exists()
assert expected_psf.exists()
assert expected_pdb.exists()
assert expected_gz.exists()

five_prime, _three_prime = extract_nter_cter(expected_pdb)
assert all([fp in five_prime for fp in ("H5T", "O5T", "OP2", "OP1", "P",)])


def test_topoaa_module_dna_5_oh(topoaa_module):
"""Topoaa module with charged termini."""
topoaa_module.params["molecules"] = [
Path(GOLDEN_DATA, "mini_dna.pdb"),
]
topoaa_module.params["mol1"]["5_phosphate"] = False
topoaa_module.params["cns_exec"] = CNS_EXEC
topoaa_module.params["debug"] = True
topoaa_module.run()

expected_inp = Path(topoaa_module.path, "mini_dna.inp")
expected_psf = Path(topoaa_module.path, "mini_dna_haddock.psf")
expected_pdb = Path(topoaa_module.path, "mini_dna_haddock.pdb")
expected_gz = Path(topoaa_module.path, "mini_dna.out.gz")
assert expected_inp.exists()
assert expected_psf.exists()
assert expected_pdb.exists()
assert expected_gz.exists()

five_prime, _three_prime = extract_nter_cter(expected_pdb)
assert not any([fp in five_prime for fp in ("O5T", "OP2", "OP1", "P",)])
assert "H5T" in five_prime
1 change: 0 additions & 1 deletion src/haddock/cns/toppar/ligand.param

This file was deleted.

Empty file removed src/haddock/cns/toppar/ligand.pep
Empty file.
1 change: 0 additions & 1 deletion src/haddock/cns/toppar/ligand.top

This file was deleted.

6 changes: 0 additions & 6 deletions src/haddock/cns/toppar/shape.link

This file was deleted.

11 changes: 11 additions & 0 deletions src/haddock/core/cns_paths.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,17 @@
SCATTER_LIB = "scatter.lib"
INITIAL_POSITIONS_DIR = "initial_positions"

PROTEIN_LINK_FILES = {
"NH3+,COO-": "protein-allhdg5-4.link",
"NH,COO-": "protein-allhdg5-4-noNter.link",
"NH3+,CO": "protein-allhdg5-4-noCter.link",
"NH,CO": LINK_FILE,
}
NUCL_LINK_FILES = {
"5'Phosphate": "dna-rna-pho-1.3.link",
"5'OH": "dna-rna-1.3.link",
}

# default prepared paths
link_file = Path(toppar_path, LINK_FILE)
scatter_lib = Path(toppar_path, SCATTER_LIB)
Expand Down
60 changes: 57 additions & 3 deletions src/haddock/libs/libcns.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,56 @@ def generate_default_header(
)


def find_desired_linkfiles(
charged_nter: bool = False,
charged_cter: bool = False,
phosphate_5: bool = False,
path: Optional[FilePath] = None,
) -> dict[str, Path]:
"""Find appropriate link files to use depending on terminis states.

Parameters
----------
charged_nter : bool, optional
Must the Nter be charged ?, by default False
charged_cter : bool, optional
Must the Cter be charged ?, by default False
phosphate_5 : bool, optional
Must 5' be a phosphate ?, by default False
path : Optional[FilePath], optional
Path to where CNS topology/parameters are, by default None

Returns
-------
linkfiles : dict[str, Path]
Dict of CNS parameters/arguments/variable as keys
and Path to link files to be used during topology
generation.
"""
# Set output variable
linkfiles = {}
# Logic to find appropriate link for proteins
if charged_nter and charged_cter:
prot_link_key = "NH3+,COO-"
elif not charged_nter and charged_cter:
prot_link_key = "NH,COO-"
elif charged_nter and not charged_cter:
prot_link_key= "NH3+,CO"
elif not charged_nter and not charged_cter:
prot_link_key = "NH,CO"
# Point to corresponding file
linkfiles["prot_link_infile"] = cns_paths.PROTEIN_LINK_FILES[prot_link_key]

# Logic to find linkfile for dna
nucl_link_key = "5'Phosphate" if phosphate_5 else "5'OH"
# Point to corresponding file
linkfiles["nucl_link_infile"] = cns_paths.NUCL_LINK_FILES[nucl_link_key]
# Converts to real paths
if path is not None:
linkfiles = {key: Path(path, p) for key, p in linkfiles.items()}
return linkfiles


def _is_nan(x: Any) -> bool:
"""Inspect if is nan."""
try:
Expand Down Expand Up @@ -116,10 +166,13 @@ def load_workflow_params(

Returns
-------
str
param_header: str
The string with the CNS parameters defined.
"""
non_empty_parameters = ((k, v) for k, v in params.items() if filter_empty_vars(v))
non_empty_parameters = (
(k, v) for k, v in params.items()
if filter_empty_vars(v)
)

# types besides the ones in the if-statements should not enter this loop
for param, v in non_empty_parameters:
Expand Down Expand Up @@ -160,7 +213,8 @@ def write_eval_line(param: Any, value: Any, eval_line: str = "eval (${}={})") ->
def load_link(mol_link: Path) -> str:
"""Add the link header."""
return load_workflow_params(
param_header=f"{linesep}! Link file{linesep}", link_file=mol_link
param_header=f"{linesep}! Link file{linesep}",
prot_link_infile=mol_link,
)


Expand Down
4 changes: 0 additions & 4 deletions src/haddock/modules/refinement/emref/cns/read_param.cns
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,6 @@ eval ($carbo_parameter_infile="TOPPAR:carbohydrate.param")
{* solvent parameter file *}
eval ($solv_parameter_infile="TOPPAR:solvent-allhdg5-4.param")

{* ligand parameter file *}
eval ($cofac_parameter_infile="TOPPAR:ligand.param")

{* fragment probes parameter file *}
eval ($ligands_parameter_infile="TOPPAR:fragment_probes.param")

Expand All @@ -46,7 +43,6 @@ parameter
@@$carbo_parameter_infile
@@$solv_parameter_infile
@@$ligands_parameter_infile
@@$cofac_parameter_infile
@@$heme_parameter_infile
@@$shape_parameter_infile
@@$cofactors_parameter_infile
Expand Down
Loading