Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
56 changes: 39 additions & 17 deletions raichu/cluster/terpene_cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@
TailoringRepresentation,
IsomerizationRepresentation,
MethylShiftRepresentation,
WaterQuenchingRepresentation,
)
from raichu.reactions.general_tailoring_reactions import (
dephosphorylation,
Expand All @@ -31,6 +32,7 @@ def __init__(
macrocyclisations: List[MacrocyclizationRepresentation] = None,
double_bond_isomerisations: List[IsomerizationRepresentation] = None,
methyl_shifts: List[MethylShiftRepresentation] = None,
water_quenching: List[WaterQuenchingRepresentation] = None,
tailoring_representations: List[TailoringRepresentation] = None,
) -> None:
super().__init__(tailoring_representations, macrocyclisations)
Expand All @@ -39,6 +41,7 @@ def __init__(
self.precursor = precursor
self.isomerization_representations = double_bond_isomerisations
self.methyl_shift_representations = methyl_shifts
self.water_quenching_representations = water_quenching

self.chain_intermediate = None
self.tailored_product = None
Expand All @@ -49,6 +52,28 @@ def create_precursor(self) -> None:
substrate = TerpeneCyclaseSubstrate(self.precursor)
self.chain_intermediate = read_smiles(substrate.smiles)

def do_water_quenching(self):
if not self.water_quenching_representations:
return
initialized_water_quenching_atoms = self.initialize_modification_sites(
[
water_quenching_representation.modification_sites
for water_quenching_representation in self.water_quenching_representations
]
)
for atom in initialized_water_quenching_atoms:

acceptor_atom = self.chain_intermediate.get_atom(atom[0])
# Check if acceptor atom has double bond

for bond in acceptor_atom.bonds:
# reduce one double bond
if bond.type == "double" or bond.type == "aromatic":
self.chain_intermediate = double_bond_reduction(bond.neighbours[0], bond.neighbours[1], self.chain_intermediate)
break
self.chain_intermediate = addition(acceptor_atom, "O", self.chain_intermediate)
self.chain_intermediate.refresh_structure(find_cycles=True)

def do_double_bond_isomerization(self):
if not self.isomerization_representations:
return
Expand Down Expand Up @@ -77,7 +102,6 @@ def do_double_bond_isomerization(self):
new_double_bond_atom2,
)


def do_methyl_shift(self):
if not self.methyl_shift_representations:
return
Expand All @@ -88,25 +112,23 @@ def do_methyl_shift(self):
]
)
for atoms in initialized_methyl_shift_atoms:
if len(atoms) != 2:
continue
transferred_c = atoms[0]
# Assert its actually a methyl group
assert [atom.type for atom in transferred_c.neighbours].count("H") == 3
source = None
source = [
if len(atoms) != 2:
continue
transferred_c = atoms[0]
# Assert its actually a methyl group
assert [atom.type for atom in transferred_c.neighbours].count("H") == 3
source = None
source = [
atom for atom in transferred_c.neighbours if atom.type != "H"
][0]
assert source
destination_c = atoms[1]
assert destination_c.has_neighbour("H")
assert transferred_c.type == "C" and destination_c.type == "C"

self.chain_intermediate = reductive_bond_breakage(source, transferred_c, self.chain_intermediate)
self.chain_intermediate = addition(destination_c, "C", self.chain_intermediate)
self.chain_intermediate.refresh_structure(find_cycles=True)

assert source
destination_c = atoms[1]
assert destination_c.has_neighbour("H")
assert transferred_c.type == "C" and destination_c.type == "C"

self.chain_intermediate = reductive_bond_breakage(source, transferred_c, self.chain_intermediate)
self.chain_intermediate = addition(destination_c, "C", self.chain_intermediate)
self.chain_intermediate.refresh_structure(find_cycles=True)

def do_macrocyclization(self, sequential=True):
initialized_macrocyclization_atoms = self.initialize_macrocyclization()
Expand Down
159 changes: 157 additions & 2 deletions raichu/data/trans_at.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,15 +7,20 @@
"ARST": [],
"PYR": [("DUMMY_SC", None)],
"ALPHAME_EDB": [("DUMMY_AMT", None), ("DUMMY_KR", None), ("DUMMY_EDH", None)],
"ALPHAME_ZDB": [("DUMMY_AMT", None), ("DUMMY_KR", None), ("DUMMY_ZDH", None)],
"ALPHAME_SHDB": [("DUMMY_AMT", None), ("DUMMY_KR", None), ("DUMMY_GDH", None)],
"OXI": [],
"AA": [],
"ALPHAME_BETA_L_OH": [("DUMMY_AMT", None), ("DUMMY_KR", "B1")],
"NON_ELONGATING_ALPHAME_SHDB": [("DUMMY_KR", None), ("DUMMY_GDH", None), ("DUMMY_AMT", None)],
"NON_ELONGATING_BETA_L_OME": [("DUMMY_KR", "B1"), ("DUMMY_OMT", None)],
"NON_ELONGATING_BETA_OME": [("DUMMY_KR", None), ("DUMMY_OMT", None)],
"NON_ELONGATING_BETA_L_OH": [("DUMMY_KR", "B1")],
"RED_SHDB": [("DUMMY_KR", None), ("DUMMY_GDH", None)],
"DB": [("DUMMY_KR", None), ("DUMMY_DH", None)],
"KETO": [],
"NON_ELONGATING": [],
"BETA_D_OH": [("DUMMY_KR", None)],
"BETA_D_OH": [("DUMMY_KR", "A1")],
"BETA_L_OH": [("DUMMY_KR", "B1")],
"BR": [
("DUMMY_KR", None),
Expand All @@ -34,7 +39,9 @@
("DUMMY_ER", None),
("DUMMY_AMT", None),
],
"BETA_D_OME": [("DUMMY_KR", None), ("DUMMY_OMT", None)],
"BETA_D_OME": [("DUMMY_KR", "A1"), ("DUMMY_OMT", None)],
"BETA_L_OME": [("DUMMY_KR", "B1"), ("DUMMY_OMT", None)],
"BETA_OME": [("DUMMY_KR", None), ("DUMMY_OMT", None)],
"NON_ELONGATING_PYR": [("DUMMY_SC", None)],
"BETA_ME": [
("DUMMY_KR", None),
Expand Down Expand Up @@ -72,6 +79,76 @@
"ALPHAME_DB": [("DUMMY_KR", None), ("DUMMY_DH", None), ("DUMMY_AMT", None)],
"ALPHAME_KETO": [("DUMMY_AMT", None)],
"NON_ELONGATING_SHDB": [("DUMMY_KR", None), ("DUMMY_GDH", None)],
"NON_ELONGATING_BETA_OH_KETO": [],
"NON_ELONGATING_ALPHA_OH": [("DUMMY_AH", None)],
"NON_ELONGATING_EDB": [("DUMMY_KR", None), ("DUMMY_EDH", None)],
"NON_ELONGATING_ARST": [],
"NON_ELONGATING_OXI": [],
"NON_ELONGATING_AA": [],
"NON_ELONGATING_ALPHAME_BETA_L_OH": [("DUMMY_AMT", None), ("DUMMY_KR", "B1")],
"NON_ELONGATING_RED_SHDB": [("DUMMY_KR", None), ("DUMMY_GDH", None)],
"NON_ELONGATING_KETO": [],
"NON_ELONGATING_BETA_D_OH": [("DUMMY_KR", None)],
"NON_ELONGATING_BR": [
("DUMMY_KR", None),
("DUMMY_DH", None),
("DUMMY_ER", None),
("DUMMY_BR", None),
],
"NON_ELONGATING_ALPHABETA_OH": [("DUMMY_AH", None), ("DUMMY_KR", None)],
"NON_ELONGATING_UNST": [],
"NON_ELONGATING_ST": [],
"NON_ELONGATING_MEOST": [],
"NON_ELONGATING_ACST": [],
"NON_ELONGATING_ALPHAME": [
("DUMMY_KR", None),
("DUMMY_DH", None),
("DUMMY_ER", None),
("DUMMY_AMT", None),
],
"NON_ELONGATING_BETA_D_OME": [("DUMMY_KR", None), ("DUMMY_OMT", None)],
"NON_ELONGATING_BETA_ME": [
("DUMMY_KR", None),
("DUMMY_DH", None),
("DUMMY_ER", None),
("DUMMY_BMT", None),
],
"NON_ELONGATING_BETA_OH_EDB": [("DUMMY_KR", None)],
"NON_ELONGATING_LACST": [],
"NON_ELONGATING_OUT": [],
"NON_ELONGATING_ZDB": [("DUMMY_KR", None), ("DUMMY_ZDH", None)],
"NON_ELONGATING_BETA_MEDB": [
("DUMMY_KR", None),
("DUMMY_EDH", None),
("DUMMY_BMT", None),
],
"NON_ELONGATING_MISCELLANEOUS": [],
"NON_ELONGATING_ALPHAME_BETA_D_OH": [("DUMMY_ALMT", None), ("DUMMY_KR", "A1")],
"NON_ELONGATING_ALPHAME_BETAOH": [("DUMMY_AMT", None), ("DUMMY_KR", None)],
"NON_ELONGATING_RED": [("DUMMY_KR", None), ("DUMMY_DH", None), ("DUMMY_ER", None)],
"NON_ELONGATING_EXOMETHYLENE": [
("DUMMY_KR", None),
("DUMMY_DH", None),
("DUMMY_ER", None),
("DUMMY_BMT", None),
("DUMMY_EMO", None),
],
"NON_ELONGATING_ALPHAME_ZDB": [
("DUMMY_KR", None),
("DUMMY_ZDH", None),
("DUMMY_AMT", None),
],
"NON_ELONGATING_ALPHA_D_ME_SHDB": [
("DUMMY_KR", None),
("DUMMY_GDH", None),
("DUMMY_AMT", None),
],
"NON_ELONGATING_ALPHAME_DB": [
("DUMMY_KR", None),
("DUMMY_DH", None),
("DUMMY_AMT", None),
],
"NON_ELONGATING_ALPHAME_KETO": [("DUMMY_AMT", None)],
}


Expand All @@ -84,9 +161,15 @@
"ARST": "PHENYLACETYL_COA",
"PYR": "ACETYL_COA",
"ALPHAME_EDB": "ACETYL_COA",
"ALPHAME_SHDB": "ACETYL_COA",
"OXI": "ACETYL_COA",
"BETA_L_OME": "ACETYL_COA",
"BETA_OME": "ACETYL_COA",
"AA": "GLYCINE",
"ALPHAME_BETA_L_OH": "ACETYL_COA",
"NON_ELONGATING_ALPHAME_SHDB": "ACETYL_COA",
"NON_ELONGATING_BETA_L_OME": "ACETYL_COA",
"NON_ELONGATING_BETA_OME": "ACETYL_COA",
"NON_ELONGATING_BETA_L_OH": "ACETYL_COA",
"RED_SHDB": "ACETYL_COA",
"DB": "ACETYL_COA",
Expand Down Expand Up @@ -124,6 +207,39 @@
"ALPHAME_DB": "ACETYL_COA",
"ALPHAME_KETO": "ACETYL_COA",
"NON_ELONGATING_SHDB": "ACETYL_COA",
"NON_ELONGATING_BETA_OH_KETO": "ACETYL_COA",
"NON_ELONGATING_ALPHA_OH": "ACETYL_COA",
"NON_ELONGATING_EDB": "ACETYL_COA",
"NON_ELONGATING_ARST": "PHENYLACETYL_COA",
"NON_ELONGATING_OXI": "ACETYL_COA",
"NON_ELONGATING_AA": "GLYCINE",
"NON_ELONGATING_ALPHAME_BETA_L_OH": "ACETYL_COA",
"NON_ELONGATING_RED_SHDB": "ACETYL_COA",
"NON_ELONGATING_KETO": "ACETYL_COA",
"NON_ELONGATING_BETA_D_OH": "ACETYL_COA",
"NON_ELONGATING_BR": "ACETYL_COA",
"NON_ELONGATING_ALPHABETA_OH": "ACETYL_COA",
"NON_ELONGATING_UNST": "PROPIONYL_COA",
"NON_ELONGATING_ST": "ACETYL_COA",
"NON_ELONGATING_MEOST": "METHOXYFORMYL_COA",
"NON_ELONGATING_ACST": "ACETYL_COA",
"NON_ELONGATING_ALPHAME": "ACETYL_COA",
"NON_ELONGATING_BETA_D_OME": "ACETYL_COA",
"NON_ELONGATING_BETA_ME": "ACETYL_COA",
"NON_ELONGATING_BETA_OH_EDB": "ACETYL_COA",
"NON_ELONGATING_LACST": "LACTYL_COA",
"NON_ELONGATING_OUT": "ACETYL_COA",
"NON_ELONGATING_ZDB": "ACETYL_COA",
"NON_ELONGATING_BETA_MEDB": "ACETYL_COA",
"NON_ELONGATING_MISCELLANEOUS": "ACETYL_COA",
"NON_ELONGATING_ALPHAME_BETA_D_OH": "ACETYL_COA",
"NON_ELONGATING_ALPHAME_BETAOH": "ACETYL_COA",
"NON_ELONGATING_RED": "ACETYL_COA",
"NON_ELONGATING_EXOMETHYLENE": "ACETYL_COA",
"NON_ELONGATING_ALPHAME_ZDB": "ACETYL_COA",
"NON_ELONGATING_ALPHA_D_ME_SHDB": "ACETYL_COA",
"NON_ELONGATING_ALPHAME_DB": "ACETYL_COA",
"NON_ELONGATING_ALPHAME_KETO": "ACETYL_COA",
}

TRANSATOR_CLADE_TO_ELONGATING = {
Expand All @@ -135,9 +251,15 @@
"ARST": True,
"PYR": True,
"ALPHAME_EDB": True,
"ALPHAME_SHDB": True,
"OXI": True,
"BETA_L_OME": True,
"BETA_OME": True,
"AA": True,
"ALPHAME_BETA_L_OH": True,
"NON_ELONGATING_ALPHAME_SHDB": False,
"NON_ELONGATING_BETA_L_OME": False,
"NON_ELONGATING_BETA_OME": False,
"NON_ELONGATING_BETA_L_OH": False,
"RED_SHDB": True,
"DB": True,
Expand Down Expand Up @@ -175,4 +297,37 @@
"ALPHAME_DB": True,
"ALPHAME_KETO": True,
"NON_ELONGATING_SHDB": False,
"NON_ELONGATING_BETA_OH_KETO": False,
"NON_ELONGATING_ALPHA_OH": False,
"NON_ELONGATING_EDB": False,
"NON_ELONGATING_ARST": False,
"NON_ELONGATING_OXI": False,
"NON_ELONGATING_AA": False,
"NON_ELONGATING_ALPHAME_BETA_L_OH": False,
"NON_ELONGATING_RED_SHDB": False,
"NON_ELONGATING_KETO": False,
"NON_ELONGATING_BETA_D_OH": False,
"NON_ELONGATING_BR": False,
"NON_ELONGATING_ALPHABETA_OH": False,
"NON_ELONGATING_UNST": False,
"NON_ELONGATING_ST": False,
"NON_ELONGATING_MEOST": False,
"NON_ELONGATING_ACST": False,
"NON_ELONGATING_ALPHAME": False,
"NON_ELONGATING_BETA_D_OME": False,
"NON_ELONGATING_BETA_ME": False,
"NON_ELONGATING_BETA_OH_EDB": False,
"NON_ELONGATING_LACST": False,
"NON_ELONGATING_OUT": False,
"NON_ELONGATING_ZDB": False,
"NON_ELONGATING_BETA_MEDB": False,
"NON_ELONGATING_MISCELLANEOUS": False,
"NON_ELONGATING_ALPHAME_BETA_D_OH": False,
"NON_ELONGATING_ALPHAME_BETAOH": False,
"NON_ELONGATING_RED": False,
"NON_ELONGATING_EXOMETHYLENE": False,
"NON_ELONGATING_ALPHAME_ZDB": False,
"NON_ELONGATING_ALPHA_D_ME_SHDB": False,
"NON_ELONGATING_ALPHAME_DB": False,
"NON_ELONGATING_ALPHAME_KETO": False,
}
39 changes: 39 additions & 0 deletions raichu/domain/domain_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,45 @@ class KSDomainSubtype(Enum):
ALPHAME_DB = 48
ALPHAME_KETO = 49
NON_ELONGATING_SHDB = 50
ALPHAME_SHDB = 51
BETA_L_OME = 52
BETA_OME = 53
NON_ELONGATING_AA = 54
NON_ELONGATING_ACST = 55
NON_ELONGATING_ALPHABETA_OH = 56
NON_ELONGATING_ALPHAME = 57
NON_ELONGATING_ALPHAME_BETAOH = 58
NON_ELONGATING_ALPHAME_BETA_D_OH = 59
NON_ELONGATING_ALPHAME_BETA_L_OH = 60
NON_ELONGATING_ALPHAME_DB = 61
NON_ELONGATING_ALPHAME_KETO = 62
NON_ELONGATING_ALPHAME_SHDB = 63
NON_ELONGATING_ALPHAME_ZDB = 64
NON_ELONGATING_ALPHA_D_ME_SHDB = 65
NON_ELONGATING_ALPHA_OH = 66
NON_ELONGATING_ARST = 67
NON_ELONGATING_BETA_D_OH = 68
NON_ELONGATING_BETA_D_OME = 69
NON_ELONGATING_BETA_L_OME = 70
NON_ELONGATING_BETA_ME = 71
NON_ELONGATING_BETA_MEDB = 72
NON_ELONGATING_BETA_OH_EDB = 73
NON_ELONGATING_BETA_OH_KETO = 74
NON_ELONGATING_BETA_OME = 75
NON_ELONGATING_BR = 76
NON_ELONGATING_EDB = 77
NON_ELONGATING_EXOMETHYLENE = 78
NON_ELONGATING_KETO = 79
NON_ELONGATING_LACST = 80
NON_ELONGATING_MEOST = 81
NON_ELONGATING_MISCELLANEOUS = 82
NON_ELONGATING_OUT = 83
NON_ELONGATING_OXI = 84
NON_ELONGATING_RED = 85
NON_ELONGATING_RED_SHDB = 86
NON_ELONGATING_ST = 87
NON_ELONGATING_UNST = 88
NON_ELONGATING_ZDB = 89

@staticmethod
def from_string(label: str) -> "KSDomainSubtype":
Expand Down
3 changes: 3 additions & 0 deletions raichu/general.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ def draw_terpene_structure_from_terpene_cluster(terpene_cluster: TerpeneCluster,
terpene_cluster.draw_product(
as_string=False, out_file=os.path.join(out_folder, "precursor_test_terpene.svg"))
terpene_cluster.do_macrocyclization()
terpene_cluster.do_double_bond_isomerization()
terpene_cluster.do_methyl_shift()
terpene_cluster.do_water_quenching()
terpene_cluster.draw_product(
as_string=False, out_file=os.path.join(out_folder, "macroyclisation_test_terpene.svg"))
terpene_cluster.do_tailoring()
Expand Down
6 changes: 6 additions & 0 deletions raichu/representations.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,12 @@ class MethylShiftRepresentation:
modification_sites: List[List[str]]


@dataclass
class WaterQuenchingRepresentation:
# Should be the atom to receive OH
modification_sites: List[List[str]]


@dataclass
class DomainRepresentation:
gene_name: Union[str, None]
Expand Down
2 changes: 1 addition & 1 deletion raichu/test/integration/test_trans_at_pks.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ def test_extensive_KS_subtypes():
# Check if final structure is correct
assert (
structure_to_smiles(cluster.chain_intermediate)
== r"OC(=O)C[C@H](O)CC(=C)C[C@@H](O1)CC(C[*])CC1C(C)C=C[C@H](O2)CC(OC)C2C(=O)C=C[C@H](O)C"
== r"OC(=O)C[C@H](O)CC(=C)C[C@@H](O1)CC(C[*])CC1C(C)C=C[C@H](O2)C[C@@H](OC)C2C(=O)C=C[C@H](O)C"
)


Expand Down