diff --git a/scripts/conformer_demo/AZD9291.mol2 b/scripts/conformer_demo/AZD9291.mol2 new file mode 100644 index 0000000..4533c8d --- /dev/null +++ b/scripts/conformer_demo/AZD9291.mol2 @@ -0,0 +1,169 @@ +@MOLECULE +A:6JWL + 72 75 1 0 1 +SMALL +NO_CHARGES +**** +Generated from the CSD + +@ATOM + 1 C13 -47.0936 -3.8091 -13.4034 C.3 1 YY31101 0.0000 + 2 N2 -47.0086 -2.7311 -14.3695 N.3 1 YY31101 0.0000 + 3 C12 -45.9115 -3.0021 -15.2755 C.3 1 YY31101 0.0000 + 4 C11 -46.7746 -1.4560 -13.7005 C.3 1 YY31101 0.0000 + 5 C10 -47.4726 -0.3070 -14.4355 C.3 1 YY31101 0.0000 + 6 N1 -48.9046 -0.5000 -14.3475 N.pl3 1 YY31101 0.0000 + 7 C28 -49.4387 -1.1750 -13.1824 C.3 1 YY31101 0.0000 + 8 C3 -50.4137 1.1840 -15.2065 C.ar 1 YY31101 0.0000 + 9 C4 -51.2787 1.6321 -16.1655 C.ar 1 YY31101 0.0000 + 10 O1 -51.9027 2.8521 -16.0285 O.3 1 YY31101 0.0000 + 11 C14 -51.6957 3.4241 -14.7895 C.3 1 YY31101 0.0000 + 12 C2 -49.8087 -0.0340 -15.3855 C.ar 1 YY31101 0.0000 + 13 C1 -50.0827 -0.7970 -16.5066 C.ar 1 YY31101 0.0000 + 14 N -49.4027 -2.0741 -16.6096 N.am 1 YY31101 0.0000 + 15 C7 -49.6557 -3.0691 -17.6256 C.2 1 YY31101 0.0000 + 16 O -50.4457 -2.8851 -18.4616 O.2 1 YY31101 0.0000 + 17 C8 -48.8456 -4.3581 -17.5956 C.3 1 YY31101 0.0000 + 18 C9 -49.6517 -5.6372 -17.7846 C.3 1 YY31101 0.0000 + 19 C6 -50.9617 -0.3360 -17.4786 C.ar 1 YY31101 0.0000 + 20 C5 -51.5537 0.9000 -17.2826 C.ar 1 YY31101 0.0000 + 21 N3 -52.4998 1.5241 -18.1876 N.pl3 1 YY31101 0.0000 + 22 C15 -52.4578 1.3800 -19.6177 C.ar 1 YY31101 0.0000 + 23 N5 -51.5097 0.6790 -20.1647 N.ar 1 YY31101 0.0000 + 24 N4 -53.3928 1.9621 -20.3097 N.ar 1 YY31101 0.0000 + 25 C16 -53.4318 1.8691 -21.6127 C.ar 1 YY31101 0.0000 + 26 C17 -52.4518 1.1400 -22.2417 C.ar 1 YY31101 0.0000 + 27 C18 -51.4837 0.5540 -21.4637 C.ar 1 YY31101 0.0000 + 28 C19 -50.3887 -0.2730 -22.1187 C.2 1 YY31101 0.0000 + 29 C26 -49.1556 -0.6420 -21.5447 C.ar 1 YY31101 0.0000 + 30 C25 -48.5396 -0.3950 -20.3087 C.ar 1 YY31101 0.0000 + 31 C24 -47.2806 -0.9140 -20.0517 C.ar 1 YY31101 0.0000 + 32 C23 -46.6356 -1.6751 -21.0187 C.ar 1 YY31101 0.0000 + 33 C22 -47.2446 -1.9131 -22.2327 C.ar 1 YY31101 0.0000 + 34 C21 -48.5256 -1.3830 -22.4878 C.ar 1 YY31101 0.0000 + 35 N6 -49.3076 -1.4780 -23.5728 N.pl3 1 YY31101 0.0000 + 36 C27 -48.9656 -2.1931 -24.7758 C.3 1 YY31101 0.0000 + 37 C20 -50.4267 -0.8280 -23.3788 C.2 1 YY31101 0.0000 + 38 H1282 -51.2463 -0.7410 -24.0905 H 1 YY31101 0.0000 + 39 H1286 -48.6942 -2.2848 -15.9139 H 1 YY31101 0.0000 + 40 H1287 -48.3421 -4.4169 -16.6318 H 1 YY31101 0.0000 + 41 H1288 -48.1031 -4.3077 -18.3906 H 1 YY31101 0.0000 + 42 H1291 -52.4429 1.0302 -23.3252 H 1 YY31101 0.0000 + 43 H1294 -45.7035 -1.2610 -13.6727 H 1 YY31101 0.0000 + 44 H1295 -47.1597 -1.5128 -12.6834 H 1 YY31101 0.0000 + 45 H1306 -47.2682 -4.7498 -13.9236 H 1 YY31101 0.0000 + 46 H1307 -47.9158 -3.6164 -12.7159 H 1 YY31101 0.0000 + 47 H1308 -46.1605 -3.8708 -12.8453 H 1 YY31101 0.0000 + 48 H1309 -48.6222 -1.4507 -12.5166 H 1 YY31101 0.0000 + 49 H1310 -49.9709 -2.0722 -13.4949 H 1 YY31101 0.0000 + 50 H1311 -50.1243 -0.5097 -12.6599 H 1 YY31101 0.0000 + 51 H1316 -46.0900 -3.9453 -15.7897 H 1 YY31101 0.0000 + 52 H1317 -44.9823 -3.0664 -14.7114 H 1 YY31101 0.0000 + 53 H1318 -45.8375 -2.1984 -16.0066 H 1 YY31101 0.0000 + 54 H1319 -50.2092 1.7814 -14.3192 H 1 YY31101 0.0000 + 55 H1321 -50.1543 -5.6101 -18.7503 H 1 YY31101 0.0000 + 56 H1322 -48.9834 -6.4962 -17.7471 H 1 YY31101 0.0000 + 57 H1323 -50.3934 -5.7194 -16.9915 H 1 YY31101 0.0000 + 58 H1324 -52.0840 2.7638 -14.0154 H 1 YY31101 0.0000 + 59 H1325 -52.2111 4.3823 -14.7413 H 1 YY31101 0.0000 + 60 H1326 -50.6288 3.5776 -14.6342 H 1 YY31101 0.0000 + 61 H1327 -53.2358 2.0970 -17.7874 H 1 YY31101 0.0000 + 62 H1331 -49.0489 0.2024 -19.5539 H 1 YY31101 0.0000 + 63 H1332 -45.6467 -2.0845 -20.8174 H 1 YY31101 0.0000 + 64 H1335 -46.7380 -2.5080 -22.9913 H 1 YY31101 0.0000 + 65 H1336 -48.8006 -3.2432 -24.5395 H 1 YY31101 0.0000 + 66 H1337 -49.7799 -2.1062 -25.4937 H 1 YY31101 0.0000 + 67 H1338 -48.0579 -1.7703 -25.2037 H 1 YY31101 0.0000 + 68 H1342 -46.7979 -0.7258 -19.0938 H 1 YY31101 0.0000 + 69 H1343 -47.1673 -0.3019 -15.4808 H 1 YY31101 0.0000 + 70 H1344 -47.2018 0.6413 -13.9736 H 1 YY31101 0.0000 + 71 H1345 -51.1782 -0.9281 -18.3665 H 1 YY31101 0.0000 + 72 H1347 -54.2200 2.3564 -22.1846 H 1 YY31101 0.0000 +@BOND + 1 1 2 1 + 2 2 3 1 + 3 2 4 1 + 4 4 5 1 + 5 5 6 1 + 6 6 7 1 + 7 8 9 ar + 8 9 10 1 + 9 10 11 1 + 10 6 12 1 + 11 8 12 ar + 12 12 13 ar + 13 13 14 1 + 14 14 15 am + 15 15 16 2 + 16 15 17 1 + 17 17 18 1 + 18 13 19 ar + 19 9 20 ar + 20 19 20 ar + 21 20 21 1 + 22 21 22 1 + 23 22 23 ar + 24 22 24 ar + 25 24 25 ar + 26 25 26 ar + 27 23 27 ar + 28 26 27 ar + 29 27 28 1 + 30 28 29 1 + 31 29 30 ar + 32 30 31 ar + 33 31 32 ar + 34 32 33 ar + 35 29 34 ar + 36 33 34 ar + 37 34 35 1 + 38 35 36 1 + 39 28 37 2 + 40 35 37 1 + 41 37 38 1 + 42 14 39 1 + 43 17 40 1 + 44 17 41 1 + 45 26 42 1 + 46 4 43 1 + 47 4 44 1 + 48 1 45 1 + 49 1 46 1 + 50 1 47 1 + 51 7 48 1 + 52 7 49 1 + 53 7 50 1 + 54 3 51 1 + 55 3 52 1 + 56 3 53 1 + 57 8 54 1 + 58 18 55 1 + 59 18 56 1 + 60 18 57 1 + 61 11 58 1 + 62 11 59 1 + 63 11 60 1 + 64 21 61 1 + 65 30 62 1 + 66 32 63 1 + 67 33 64 1 + 68 36 65 1 + 69 36 66 1 + 70 36 67 1 + 71 31 68 1 + 72 5 69 1 + 73 5 70 1 + 74 19 71 1 + 75 25 72 1 +@SUBSTRUCTURE + 1 YY31101 1 GROUP 0 A YY3 0 +@SET +CCDC_LIGAND STATIC ATOMS +72 1 2 3 4 5 6 7 8 9 \ +10 11 12 13 14 15 16 17 18 19 \ +20 21 22 23 24 25 26 27 28 29 \ +30 31 32 33 34 35 36 37 38 39 \ +40 41 42 43 44 45 46 47 48 49 \ +50 51 52 53 54 55 56 57 58 59 \ +60 61 62 63 64 65 66 67 68 69 \ +70 71 72 diff --git a/scripts/conformer_demo/conformer_demo.py b/scripts/conformer_demo/conformer_demo.py new file mode 100644 index 0000000..067fde8 --- /dev/null +++ b/scripts/conformer_demo/conformer_demo.py @@ -0,0 +1,135 @@ +#! /usr/bin/env python +######################################################################################################################## +# +# This script can be used for any purpose without limitation subject to the +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx +# +# This permission notice and the following statement of attribution must be +# included in all copies or substantial portions of this script. +# +# 2024-11-22: created by the Cambridge Crystallographic Data Centre +# +######################################################################################################################## + +from ccdc import conformer, descriptors, io, molecule +from ccdc.search import SubstructureSearch, SMARTSSubstructure + + +def read(molecule_file: str) -> molecule: + print(f'Reading file: {molecule_file} ... ', end='') + mol_reader = io.MoleculeReader(molecule_file) + mol = mol_reader[0] + print('done.') + + return mol + + +def generate_conformers(molecule: molecule, max_conformers: int = 50) -> conformer.ConformerHitList: + """ + Generate conformers for a molecule. + + :param molecule: The Molecule (ccdc Molecule object) to generate conformers for. + :param max_conformers: The maximum number of conformers to generate. + + :returns: ccdc.conformer.ConformerHitList + """ + + # Set up the ConformerGenerator + confgen = conformer.ConformerGenerator() + confgen.settings.max_conformers = max_conformers + # confgen.settings.superimpose_conformers_onto_reference = True + + # Generate conformers and assign identifiers to them before returning + conformers = confgen.generate(molecule) + + print(f'Generating conformers, maximum of {max_conformers} ... ', end='') + for i, conf in enumerate(conformers): + conf.molecule.identifier = '{}_{:04}'.format(conf.molecule.identifier, i + 1) + print(f'done, generated {len(conformers)} conformers.') + + return conformers + + +def analyse(conformers: conformer.ConformerHitList) -> molecule: + """ + Perform some basic analysis of the conformers generated. + :param conformers: Conformers generated from ConfGen + :return: The best molecule of all the conformers generated. + """ + print(f'Sampling limit reached? {"Yes." if conformers.sampling_limit_reached else "No."}') + + print(f'How many rotamers had no observations? {conformers.n_rotamers_with_no_observations}.') + + most_probable_conformer = conformers[0] + + print(f'Normalised score of most probable conformer: {round(most_probable_conformer.normalised_score, 5)}.') + print(f'Most probable conformer RMSD wrt input: {round(most_probable_conformer.rmsd(), 3)}; ' + f'wrt minimised: {round(most_probable_conformer.rmsd(wrt="minimised"), 3)}.') + + print('Scores of top 10 conformers: ', end='') + + top_ten = conformers[:10] + for i in range(len(top_ten)): + if i < len(top_ten) - 1: + print(f'{round(top_ten[i].normalised_score, 3):.3f}, ', end='') + else: + print(f'{round(top_ten[i].normalised_score, 3):.3f}.') + + return most_probable_conformer.molecule + + +def overlay(conformers, query: str, output_filename: str) -> None: + """ + Overlay conformers based on a SMARTS substructure pattern + :param conformers: Conformers generated from ConfGen + :param query: SMARTS pattern which the conformers will overlay on top of. + Should be consistent across all conformers, e.g. benzene ring. + """ + print('Overlaying conformers ... ', end='') + conformers_mols = [c.molecule for c in conformers] + ss_search = SubstructureSearch() + substructure = SMARTSSubstructure(query) + ss_search.add_substructure(substructure) + hits = ss_search.search(conformers_mols, max_hits_per_structure=1) + ref_ats = hits[0].match_atoms() + print('done.') + + print('Writing file superimposed ... ', end='') + with io.MoleculeWriter(output_filename) as writer: + for hit in hits: + hit_ats = hit.match_atoms() + atoms = zip(ref_ats, hit_ats) + ov = descriptors.MolecularDescriptors.Overlay(hits[0].molecule, hit.molecule, atoms) + superimposed_hit = ov.molecule + writer.write(superimposed_hit) + print('done.') + + +def write_conformers_to_file(conformers: conformer.ConformerHitList, filename: str) -> None: + """ + Write conformers to a file without any addition overlaying. + :param conformers: Conformer generated from ConfGen. + :param filename: The name of the output file. + """ + + with io.MoleculeWriter(filename) as writer: + for conf in conformers: + writer.write(conf.molecule) + + +if __name__ == '__main__': + + input_filename = 'AZD9291.mol2' + # Read example molecule + mol = read(input_filename) + + # Generate conformers + confs = generate_conformers(mol, 20) + + # Provide summary of analysis + analyse(confs) + + # Overlay structures based on common substructure + query = 'c1cncnc1' + output_filename = f'superimposed_{input_filename}' + overlay(confs, query, output_filename) diff --git a/scripts/conformer_demo/description.md b/scripts/conformer_demo/description.md new file mode 100644 index 0000000..05baf94 --- /dev/null +++ b/scripts/conformer_demo/description.md @@ -0,0 +1,26 @@ +# Conformer Demo + +This is a short script to generate conformers with some rudimentary analysis for a single molecule. +There are also options to overlay the results to view in Hermes. + +### Example output showing what the user can expect to see: + +``` +Reading file: AZD9291.mol2 ... done. +Generating conformers, maximum of 20 ... done, generated 20 conformers. +Sampling limit reached? No. +How many rotamers had no observations? 0. +Normalised score of most probable conformer: 0.0. +Most probable conformer RMSD wrt input: 3.276; wrt minimised: 3.202. +Scores of top 10 conformers: 0.000, 0.000, 0.000, 0.027, 0.027, 0.027, 0.027, 0.027, 0.029, 0.029. +Overlaying conformers ... done. +Writing file superimposed ... done. +``` + +CCDC Python API Licence required, minimum version: 3.0.15 + +There is an accompanying mol2 file with this script, but users may use any small molecule provided in a file format readable by our API (e.g. mol, mol2, sdf, etc) + +Author: Chris Ringrose - 22/11/24 + + For feedback or to report any issues please contact support@ccdc.cam.ac.uk