From 5f24d0f10e2294a84be6583423f92e8751042320 Mon Sep 17 00:00:00 2001 From: FriederikeBiermann Date: Wed, 27 Aug 2025 21:58:02 +0200 Subject: [PATCH 1/4] feat(trans_at_subtypes): added more ks subsubtypes for trans at pks, specifically non elongating --- raichu/data/trans_at.py | 159 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 157 insertions(+), 2 deletions(-) diff --git a/raichu/data/trans_at.py b/raichu/data/trans_at.py index 79e21d6..2e69213 100644 --- a/raichu/data/trans_at.py +++ b/raichu/data/trans_at.py @@ -7,15 +7,20 @@ "ARST": [], "PYR": [("DUMMY_SC", None)], "ALPHAME_EDB": [("DUMMY_AMT", None), ("DUMMY_KR", None), ("DUMMY_EDH", None)], + "ALPHAME_ZDB": [("DUMMY_AMT", None), ("DUMMY_KR", None), ("DUMMY_ZDH", None)], + "ALPHAME_SHDB": [("DUMMY_AMT", None), ("DUMMY_KR", None), ("DUMMY_GDH", None)], "OXI": [], "AA": [], "ALPHAME_BETA_L_OH": [("DUMMY_AMT", None), ("DUMMY_KR", "B1")], + "NON_ELONGATING_ALPHAME_SHDB": [("DUMMY_KR", None), ("DUMMY_GDH", None), ("DUMMY_AMT", None)], + "NON_ELONGATING_BETA_L_OME": [("DUMMY_KR", "B1"), ("DUMMY_OMT", None)], + "NON_ELONGATING_BETA_OME": [("DUMMY_KR", None), ("DUMMY_OMT", None)], "NON_ELONGATING_BETA_L_OH": [("DUMMY_KR", "B1")], "RED_SHDB": [("DUMMY_KR", None), ("DUMMY_GDH", None)], "DB": [("DUMMY_KR", None), ("DUMMY_DH", None)], "KETO": [], "NON_ELONGATING": [], - "BETA_D_OH": [("DUMMY_KR", None)], + "BETA_D_OH": [("DUMMY_KR", "A1")], "BETA_L_OH": [("DUMMY_KR", "B1")], "BR": [ ("DUMMY_KR", None), @@ -34,7 +39,9 @@ ("DUMMY_ER", None), ("DUMMY_AMT", None), ], - "BETA_D_OME": [("DUMMY_KR", None), ("DUMMY_OMT", None)], + "BETA_D_OME": [("DUMMY_KR", "A1"), ("DUMMY_OMT", None)], + "BETA_L_OME": [("DUMMY_KR", "B1"), ("DUMMY_OMT", None)], + "BETA_OME": [("DUMMY_KR", None), ("DUMMY_OMT", None)], "NON_ELONGATING_PYR": [("DUMMY_SC", None)], "BETA_ME": [ ("DUMMY_KR", None), @@ -72,6 +79,76 @@ "ALPHAME_DB": [("DUMMY_KR", None), ("DUMMY_DH", None), ("DUMMY_AMT", None)], "ALPHAME_KETO": [("DUMMY_AMT", None)], "NON_ELONGATING_SHDB": [("DUMMY_KR", None), ("DUMMY_GDH", None)], + "NON_ELONGATING_BETA_OH_KETO": [], + "NON_ELONGATING_ALPHA_OH": [("DUMMY_AH", None)], + "NON_ELONGATING_EDB": [("DUMMY_KR", None), ("DUMMY_EDH", None)], + "NON_ELONGATING_ARST": [], + "NON_ELONGATING_OXI": [], + "NON_ELONGATING_AA": [], + "NON_ELONGATING_ALPHAME_BETA_L_OH": [("DUMMY_AMT", None), ("DUMMY_KR", "B1")], + "NON_ELONGATING_RED_SHDB": [("DUMMY_KR", None), ("DUMMY_GDH", None)], + "NON_ELONGATING_KETO": [], + "NON_ELONGATING_BETA_D_OH": [("DUMMY_KR", None)], + "NON_ELONGATING_BR": [ + ("DUMMY_KR", None), + ("DUMMY_DH", None), + ("DUMMY_ER", None), + ("DUMMY_BR", None), + ], + "NON_ELONGATING_ALPHABETA_OH": [("DUMMY_AH", None), ("DUMMY_KR", None)], + "NON_ELONGATING_UNST": [], + "NON_ELONGATING_ST": [], + "NON_ELONGATING_MEOST": [], + "NON_ELONGATING_ACST": [], + "NON_ELONGATING_ALPHAME": [ + ("DUMMY_KR", None), + ("DUMMY_DH", None), + ("DUMMY_ER", None), + ("DUMMY_AMT", None), + ], + "NON_ELONGATING_BETA_D_OME": [("DUMMY_KR", None), ("DUMMY_OMT", None)], + "NON_ELONGATING_BETA_ME": [ + ("DUMMY_KR", None), + ("DUMMY_DH", None), + ("DUMMY_ER", None), + ("DUMMY_BMT", None), + ], + "NON_ELONGATING_BETA_OH_EDB": [("DUMMY_KR", None)], + "NON_ELONGATING_LACST": [], + "NON_ELONGATING_OUT": [], + "NON_ELONGATING_ZDB": [("DUMMY_KR", None), ("DUMMY_ZDH", None)], + "NON_ELONGATING_BETA_MEDB": [ + ("DUMMY_KR", None), + ("DUMMY_EDH", None), + ("DUMMY_BMT", None), + ], + "NON_ELONGATING_MISCELLANEOUS": [], + "NON_ELONGATING_ALPHAME_BETA_D_OH": [("DUMMY_ALMT", None), ("DUMMY_KR", "A1")], + "NON_ELONGATING_ALPHAME_BETAOH": [("DUMMY_AMT", None), ("DUMMY_KR", None)], + "NON_ELONGATING_RED": [("DUMMY_KR", None), ("DUMMY_DH", None), ("DUMMY_ER", None)], + "NON_ELONGATING_EXOMETHYLENE": [ + ("DUMMY_KR", None), + ("DUMMY_DH", None), + ("DUMMY_ER", None), + ("DUMMY_BMT", None), + ("DUMMY_EMO", None), + ], + "NON_ELONGATING_ALPHAME_ZDB": [ + ("DUMMY_KR", None), + ("DUMMY_ZDH", None), + ("DUMMY_AMT", None), + ], + "NON_ELONGATING_ALPHA_D_ME_SHDB": [ + ("DUMMY_KR", None), + ("DUMMY_GDH", None), + ("DUMMY_AMT", None), + ], + "NON_ELONGATING_ALPHAME_DB": [ + ("DUMMY_KR", None), + ("DUMMY_DH", None), + ("DUMMY_AMT", None), + ], + "NON_ELONGATING_ALPHAME_KETO": [("DUMMY_AMT", None)], } @@ -84,9 +161,15 @@ "ARST": "PHENYLACETYL_COA", "PYR": "ACETYL_COA", "ALPHAME_EDB": "ACETYL_COA", + "ALPHAME_SHDB": "ACETYL_COA", "OXI": "ACETYL_COA", + "BETA_L_OME": "ACETYL_COA", + "BETA_OME": "ACETYL_COA", "AA": "GLYCINE", "ALPHAME_BETA_L_OH": "ACETYL_COA", + "NON_ELONGATING_ALPHAME_SHDB": "ACETYL_COA", + "NON_ELONGATING_BETA_L_OME": "ACETYL_COA", + "NON_ELONGATING_BETA_OME": "ACETYL_COA", "NON_ELONGATING_BETA_L_OH": "ACETYL_COA", "RED_SHDB": "ACETYL_COA", "DB": "ACETYL_COA", @@ -124,6 +207,39 @@ "ALPHAME_DB": "ACETYL_COA", "ALPHAME_KETO": "ACETYL_COA", "NON_ELONGATING_SHDB": "ACETYL_COA", + "NON_ELONGATING_BETA_OH_KETO": "ACETYL_COA", + "NON_ELONGATING_ALPHA_OH": "ACETYL_COA", + "NON_ELONGATING_EDB": "ACETYL_COA", + "NON_ELONGATING_ARST": "PHENYLACETYL_COA", + "NON_ELONGATING_OXI": "ACETYL_COA", + "NON_ELONGATING_AA": "GLYCINE", + "NON_ELONGATING_ALPHAME_BETA_L_OH": "ACETYL_COA", + "NON_ELONGATING_RED_SHDB": "ACETYL_COA", + "NON_ELONGATING_KETO": "ACETYL_COA", + "NON_ELONGATING_BETA_D_OH": "ACETYL_COA", + "NON_ELONGATING_BR": "ACETYL_COA", + "NON_ELONGATING_ALPHABETA_OH": "ACETYL_COA", + "NON_ELONGATING_UNST": "PROPIONYL_COA", + "NON_ELONGATING_ST": "ACETYL_COA", + "NON_ELONGATING_MEOST": "METHOXYFORMYL_COA", + "NON_ELONGATING_ACST": "ACETYL_COA", + "NON_ELONGATING_ALPHAME": "ACETYL_COA", + "NON_ELONGATING_BETA_D_OME": "ACETYL_COA", + "NON_ELONGATING_BETA_ME": "ACETYL_COA", + "NON_ELONGATING_BETA_OH_EDB": "ACETYL_COA", + "NON_ELONGATING_LACST": "LACTYL_COA", + "NON_ELONGATING_OUT": "ACETYL_COA", + "NON_ELONGATING_ZDB": "ACETYL_COA", + "NON_ELONGATING_BETA_MEDB": "ACETYL_COA", + "NON_ELONGATING_MISCELLANEOUS": "ACETYL_COA", + "NON_ELONGATING_ALPHAME_BETA_D_OH": "ACETYL_COA", + "NON_ELONGATING_ALPHAME_BETAOH": "ACETYL_COA", + "NON_ELONGATING_RED": "ACETYL_COA", + "NON_ELONGATING_EXOMETHYLENE": "ACETYL_COA", + "NON_ELONGATING_ALPHAME_ZDB": "ACETYL_COA", + "NON_ELONGATING_ALPHA_D_ME_SHDB": "ACETYL_COA", + "NON_ELONGATING_ALPHAME_DB": "ACETYL_COA", + "NON_ELONGATING_ALPHAME_KETO": "ACETYL_COA", } TRANSATOR_CLADE_TO_ELONGATING = { @@ -135,9 +251,15 @@ "ARST": True, "PYR": True, "ALPHAME_EDB": True, + "ALPHAME_SHDB": True, "OXI": True, + "BETA_L_OME": True, + "BETA_OME": True, "AA": True, "ALPHAME_BETA_L_OH": True, + "NON_ELONGATING_ALPHAME_SHDB": False, + "NON_ELONGATING_BETA_L_OME": False, + "NON_ELONGATING_BETA_OME": False, "NON_ELONGATING_BETA_L_OH": False, "RED_SHDB": True, "DB": True, @@ -175,4 +297,37 @@ "ALPHAME_DB": True, "ALPHAME_KETO": True, "NON_ELONGATING_SHDB": False, + "NON_ELONGATING_BETA_OH_KETO": False, + "NON_ELONGATING_ALPHA_OH": False, + "NON_ELONGATING_EDB": False, + "NON_ELONGATING_ARST": False, + "NON_ELONGATING_OXI": False, + "NON_ELONGATING_AA": False, + "NON_ELONGATING_ALPHAME_BETA_L_OH": False, + "NON_ELONGATING_RED_SHDB": False, + "NON_ELONGATING_KETO": False, + "NON_ELONGATING_BETA_D_OH": False, + "NON_ELONGATING_BR": False, + "NON_ELONGATING_ALPHABETA_OH": False, + "NON_ELONGATING_UNST": False, + "NON_ELONGATING_ST": False, + "NON_ELONGATING_MEOST": False, + "NON_ELONGATING_ACST": False, + "NON_ELONGATING_ALPHAME": False, + "NON_ELONGATING_BETA_D_OME": False, + "NON_ELONGATING_BETA_ME": False, + "NON_ELONGATING_BETA_OH_EDB": False, + "NON_ELONGATING_LACST": False, + "NON_ELONGATING_OUT": False, + "NON_ELONGATING_ZDB": False, + "NON_ELONGATING_BETA_MEDB": False, + "NON_ELONGATING_MISCELLANEOUS": False, + "NON_ELONGATING_ALPHAME_BETA_D_OH": False, + "NON_ELONGATING_ALPHAME_BETAOH": False, + "NON_ELONGATING_RED": False, + "NON_ELONGATING_EXOMETHYLENE": False, + "NON_ELONGATING_ALPHAME_ZDB": False, + "NON_ELONGATING_ALPHA_D_ME_SHDB": False, + "NON_ELONGATING_ALPHAME_DB": False, + "NON_ELONGATING_ALPHAME_KETO": False, } From 4c64d7c07250082ea6322d706bfad21e6d0ea216 Mon Sep 17 00:00:00 2001 From: FriederikeBiermann Date: Wed, 27 Aug 2025 22:44:29 +0200 Subject: [PATCH 2/4] feat(terpene_cluster): added water quenching option to terpene cyclase --- raichu/cluster/terpene_cluster.py | 56 +++++++++++++++++++++---------- raichu/general.py | 3 ++ raichu/representations.py | 6 ++++ 3 files changed, 48 insertions(+), 17 deletions(-) diff --git a/raichu/cluster/terpene_cluster.py b/raichu/cluster/terpene_cluster.py index 8281b08..fd02e40 100644 --- a/raichu/cluster/terpene_cluster.py +++ b/raichu/cluster/terpene_cluster.py @@ -8,6 +8,7 @@ TailoringRepresentation, IsomerizationRepresentation, MethylShiftRepresentation, + WaterQuenchingRepresentation, ) from raichu.reactions.general_tailoring_reactions import ( dephosphorylation, @@ -31,6 +32,7 @@ def __init__( macrocyclisations: List[MacrocyclizationRepresentation] = None, double_bond_isomerisations: List[IsomerizationRepresentation] = None, methyl_shifts: List[MethylShiftRepresentation] = None, + water_quenching: List[WaterQuenchingRepresentation] = None, tailoring_representations: List[TailoringRepresentation] = None, ) -> None: super().__init__(tailoring_representations, macrocyclisations) @@ -39,6 +41,7 @@ def __init__( self.precursor = precursor self.isomerization_representations = double_bond_isomerisations self.methyl_shift_representations = methyl_shifts + self.water_quenching_representations = water_quenching self.chain_intermediate = None self.tailored_product = None @@ -49,6 +52,28 @@ def create_precursor(self) -> None: substrate = TerpeneCyclaseSubstrate(self.precursor) self.chain_intermediate = read_smiles(substrate.smiles) + def do_water_quenching(self): + if not self.water_quenching_representations: + return + initialized_water_quenching_atoms = self.initialize_modification_sites( + [ + water_quenching_representation.modification_sites + for water_quenching_representation in self.water_quenching_representations + ] + ) + for atom in initialized_water_quenching_atoms: + + acceptor_atom = self.chain_intermediate.get_atom(atom[0]) + # Check if acceptor atom has double bond + + for bond in acceptor_atom.bonds: + # reduce one double bond + if bond.type == "double" or bond.type == "aromatic": + self.chain_intermediate = double_bond_reduction(bond.neighbours[0], bond.neighbours[1], self.chain_intermediate) + break + self.chain_intermediate = addition(acceptor_atom, "O", self.chain_intermediate) + self.chain_intermediate.refresh_structure(find_cycles=True) + def do_double_bond_isomerization(self): if not self.isomerization_representations: return @@ -77,7 +102,6 @@ def do_double_bond_isomerization(self): new_double_bond_atom2, ) - def do_methyl_shift(self): if not self.methyl_shift_representations: return @@ -88,25 +112,23 @@ def do_methyl_shift(self): ] ) for atoms in initialized_methyl_shift_atoms: - if len(atoms) != 2: - continue - transferred_c = atoms[0] - # Assert its actually a methyl group - assert [atom.type for atom in transferred_c.neighbours].count("H") == 3 - source = None - source = [ + if len(atoms) != 2: + continue + transferred_c = atoms[0] + # Assert its actually a methyl group + assert [atom.type for atom in transferred_c.neighbours].count("H") == 3 + source = None + source = [ atom for atom in transferred_c.neighbours if atom.type != "H" ][0] - assert source - destination_c = atoms[1] - assert destination_c.has_neighbour("H") - assert transferred_c.type == "C" and destination_c.type == "C" - - self.chain_intermediate = reductive_bond_breakage(source, transferred_c, self.chain_intermediate) - self.chain_intermediate = addition(destination_c, "C", self.chain_intermediate) - self.chain_intermediate.refresh_structure(find_cycles=True) - + assert source + destination_c = atoms[1] + assert destination_c.has_neighbour("H") + assert transferred_c.type == "C" and destination_c.type == "C" + self.chain_intermediate = reductive_bond_breakage(source, transferred_c, self.chain_intermediate) + self.chain_intermediate = addition(destination_c, "C", self.chain_intermediate) + self.chain_intermediate.refresh_structure(find_cycles=True) def do_macrocyclization(self, sequential=True): initialized_macrocyclization_atoms = self.initialize_macrocyclization() diff --git a/raichu/general.py b/raichu/general.py index b32feec..b0b7947 100644 --- a/raichu/general.py +++ b/raichu/general.py @@ -83,6 +83,9 @@ def draw_terpene_structure_from_terpene_cluster(terpene_cluster: TerpeneCluster, terpene_cluster.draw_product( as_string=False, out_file=os.path.join(out_folder, "precursor_test_terpene.svg")) terpene_cluster.do_macrocyclization() + terpene_cluster.do_double_bond_isomerization() + terpene_cluster.do_methyl_shift() + terpene_cluster.do_water_quenching() terpene_cluster.draw_product( as_string=False, out_file=os.path.join(out_folder, "macroyclisation_test_terpene.svg")) terpene_cluster.do_tailoring() diff --git a/raichu/representations.py b/raichu/representations.py index 8939179..38f9e14 100644 --- a/raichu/representations.py +++ b/raichu/representations.py @@ -36,6 +36,12 @@ class MethylShiftRepresentation: modification_sites: List[List[str]] +@dataclass +class WaterQuenchingRepresentation: + # Should be the atom to receive OH + modification_sites: List[List[str]] + + @dataclass class DomainRepresentation: gene_name: Union[str, None] From 7fd11468865f270f5d58e396e0d1bbefbf5aa08c Mon Sep 17 00:00:00 2001 From: FriederikeBiermann Date: Fri, 29 Aug 2025 10:38:35 +0200 Subject: [PATCH 3/4] fix(trans_at): added new trans at domains to domain types --- raichu/domain/domain_types.py | 39 +++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) diff --git a/raichu/domain/domain_types.py b/raichu/domain/domain_types.py index c3ca7f9..cf09150 100644 --- a/raichu/domain/domain_types.py +++ b/raichu/domain/domain_types.py @@ -219,6 +219,45 @@ class KSDomainSubtype(Enum): ALPHAME_DB = 48 ALPHAME_KETO = 49 NON_ELONGATING_SHDB = 50 + ALPHAME_SHDB = 51 + BETA_L_OME = 52 + BETA_OME = 53 + NON_ELONGATING_AA = 54 + NON_ELONGATING_ACST = 55 + NON_ELONGATING_ALPHABETA_OH = 56 + NON_ELONGATING_ALPHAME = 57 + NON_ELONGATING_ALPHAME_BETAOH = 58 + NON_ELONGATING_ALPHAME_BETA_D_OH = 59 + NON_ELONGATING_ALPHAME_BETA_L_OH = 60 + NON_ELONGATING_ALPHAME_DB = 61 + NON_ELONGATING_ALPHAME_KETO = 62 + NON_ELONGATING_ALPHAME_SHDB = 63 + NON_ELONGATING_ALPHAME_ZDB = 64 + NON_ELONGATING_ALPHA_D_ME_SHDB = 65 + NON_ELONGATING_ALPHA_OH = 66 + NON_ELONGATING_ARST = 67 + NON_ELONGATING_BETA_D_OH = 68 + NON_ELONGATING_BETA_D_OME = 69 + NON_ELONGATING_BETA_L_OME = 70 + NON_ELONGATING_BETA_ME = 71 + NON_ELONGATING_BETA_MEDB = 72 + NON_ELONGATING_BETA_OH_EDB = 73 + NON_ELONGATING_BETA_OH_KETO = 74 + NON_ELONGATING_BETA_OME = 75 + NON_ELONGATING_BR = 76 + NON_ELONGATING_EDB = 77 + NON_ELONGATING_EXOMETHYLENE = 78 + NON_ELONGATING_KETO = 79 + NON_ELONGATING_LACST = 80 + NON_ELONGATING_MEOST = 81 + NON_ELONGATING_MISCELLANEOUS = 82 + NON_ELONGATING_OUT = 83 + NON_ELONGATING_OXI = 84 + NON_ELONGATING_RED = 85 + NON_ELONGATING_RED_SHDB = 86 + NON_ELONGATING_ST = 87 + NON_ELONGATING_UNST = 88 + NON_ELONGATING_ZDB = 89 @staticmethod def from_string(label: str) -> "KSDomainSubtype": From cf04dc173d06d2f5ed9638eb22d7f2c375c12035 Mon Sep 17 00:00:00 2001 From: FriederikeBiermann Date: Fri, 29 Aug 2025 11:06:26 +0200 Subject: [PATCH 4/4] fix(tests_trans_at_ks): fixed stereospecificity in asserted substrate --- raichu/test/integration/test_trans_at_pks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/raichu/test/integration/test_trans_at_pks.py b/raichu/test/integration/test_trans_at_pks.py index 950c72f..d95c05c 100644 --- a/raichu/test/integration/test_trans_at_pks.py +++ b/raichu/test/integration/test_trans_at_pks.py @@ -21,7 +21,7 @@ def test_extensive_KS_subtypes(): # Check if final structure is correct assert ( structure_to_smiles(cluster.chain_intermediate) - == r"OC(=O)C[C@H](O)CC(=C)C[C@@H](O1)CC(C[*])CC1C(C)C=C[C@H](O2)CC(OC)C2C(=O)C=C[C@H](O)C" + == r"OC(=O)C[C@H](O)CC(=C)C[C@@H](O1)CC(C[*])CC1C(C)C=C[C@H](O2)C[C@@H](OC)C2C(=O)C=C[C@H](O)C" )