|
| 1 | +#! /usr/bin/env python |
| 2 | +######################################################################################################################## |
| 3 | +# |
| 4 | +# This script can be used for any purpose without limitation subject to the |
| 5 | +# conditions at http://www.ccdc.cam.ac.uk/Community/Pages/Licences/v2.aspx |
| 6 | +# |
| 7 | +# This permission notice and the following statement of attribution must be |
| 8 | +# included in all copies or substantial portions of this script. |
| 9 | +# |
| 10 | +# 2024-11-22: created by the Cambridge Crystallographic Data Centre |
| 11 | +# |
| 12 | +######################################################################################################################## |
| 13 | + |
| 14 | +from ccdc import conformer, descriptors, io, molecule |
| 15 | +from ccdc.search import SubstructureSearch, SMARTSSubstructure |
| 16 | + |
| 17 | + |
| 18 | +def read(molecule_file: str) -> molecule: |
| 19 | + print(f'Reading file: {molecule_file} ... ', end='') |
| 20 | + mol_reader = io.MoleculeReader(molecule_file) |
| 21 | + mol = mol_reader[0] |
| 22 | + print('done.') |
| 23 | + |
| 24 | + return mol |
| 25 | + |
| 26 | + |
| 27 | +def generate_conformers(molecule: molecule, max_conformers: int = 50) -> conformer.ConformerHitList: |
| 28 | + """ |
| 29 | + Generate conformers for a molecule. |
| 30 | +
|
| 31 | + :param molecule: The Molecule (ccdc Molecule object) to generate conformers for. |
| 32 | + :param max_conformers: The maximum number of conformers to generate. |
| 33 | +
|
| 34 | + :returns: ccdc.conformer.ConformerHitList |
| 35 | + """ |
| 36 | + |
| 37 | + # Set up the ConformerGenerator |
| 38 | + confgen = conformer.ConformerGenerator() |
| 39 | + confgen.settings.max_conformers = max_conformers |
| 40 | + # confgen.settings.superimpose_conformers_onto_reference = True |
| 41 | + |
| 42 | + # Generate conformers and assign identifiers to them before returning |
| 43 | + conformers = confgen.generate(molecule) |
| 44 | + |
| 45 | + print(f'Generating conformers, maximum of {max_conformers} ... ', end='') |
| 46 | + for i, conf in enumerate(conformers): |
| 47 | + conf.molecule.identifier = '{}_{:04}'.format(conf.molecule.identifier, i + 1) |
| 48 | + print(f'done, generated {len(conformers)} conformers.') |
| 49 | + |
| 50 | + return conformers |
| 51 | + |
| 52 | + |
| 53 | +def analyse(conformers: conformer.ConformerHitList) -> molecule: |
| 54 | + """ |
| 55 | + Perform some basic analysis of the conformers generated. |
| 56 | + :param conformers: Conformers generated from ConfGen |
| 57 | + :return: The best molecule of all the conformers generated. |
| 58 | + """ |
| 59 | + print(f'Sampling limit reached? {"Yes." if conformers.sampling_limit_reached else "No."}') |
| 60 | + |
| 61 | + print(f'How many rotamers had no observations? {conformers.n_rotamers_with_no_observations}.') |
| 62 | + |
| 63 | + most_probable_conformer = conformers[0] |
| 64 | + |
| 65 | + print(f'Normalised score of most probable conformer: {round(most_probable_conformer.normalised_score, 5)}.') |
| 66 | + print(f'Most probable conformer RMSD wrt input: {round(most_probable_conformer.rmsd(), 3)}; ' |
| 67 | + f'wrt minimised: {round(most_probable_conformer.rmsd(wrt="minimised"), 3)}.') |
| 68 | + |
| 69 | + print('Scores of top 10 conformers: ', end='') |
| 70 | + |
| 71 | + top_ten = conformers[:10] |
| 72 | + for i in range(len(top_ten)): |
| 73 | + if i < len(top_ten) - 1: |
| 74 | + print(f'{round(top_ten[i].normalised_score, 3):.3f}, ', end='') |
| 75 | + else: |
| 76 | + print(f'{round(top_ten[i].normalised_score, 3):.3f}.') |
| 77 | + |
| 78 | + return most_probable_conformer.molecule |
| 79 | + |
| 80 | + |
| 81 | +def overlay(conformers, query: str, output_filename: str) -> None: |
| 82 | + """ |
| 83 | + Overlay conformers based on a SMARTS substructure pattern |
| 84 | + :param conformers: Conformers generated from ConfGen |
| 85 | + :param query: SMARTS pattern which the conformers will overlay on top of. |
| 86 | + Should be consistent across all conformers, e.g. benzene ring. |
| 87 | + """ |
| 88 | + print('Overlaying conformers ... ', end='') |
| 89 | + conformers_mols = [c.molecule for c in conformers] |
| 90 | + ss_search = SubstructureSearch() |
| 91 | + substructure = SMARTSSubstructure(query) |
| 92 | + ss_search.add_substructure(substructure) |
| 93 | + hits = ss_search.search(conformers_mols, max_hits_per_structure=1) |
| 94 | + ref_ats = hits[0].match_atoms() |
| 95 | + print('done.') |
| 96 | + |
| 97 | + print('Writing file superimposed ... ', end='') |
| 98 | + with io.MoleculeWriter(output_filename) as writer: |
| 99 | + for hit in hits: |
| 100 | + hit_ats = hit.match_atoms() |
| 101 | + atoms = zip(ref_ats, hit_ats) |
| 102 | + ov = descriptors.MolecularDescriptors.Overlay(hits[0].molecule, hit.molecule, atoms) |
| 103 | + superimposed_hit = ov.molecule |
| 104 | + writer.write(superimposed_hit) |
| 105 | + print('done.') |
| 106 | + |
| 107 | + |
| 108 | +def write_conformers_to_file(conformers: conformer.ConformerHitList, filename: str) -> None: |
| 109 | + """ |
| 110 | + Write conformers to a file without any addition overlaying. |
| 111 | + :param conformers: Conformer generated from ConfGen. |
| 112 | + :param filename: The name of the output file. |
| 113 | + """ |
| 114 | + |
| 115 | + with io.MoleculeWriter(filename) as writer: |
| 116 | + for conf in conformers: |
| 117 | + writer.write(conf.molecule) |
| 118 | + |
| 119 | + |
| 120 | +if __name__ == '__main__': |
| 121 | + |
| 122 | + input_filename = 'AZD9291.mol2' |
| 123 | + # Read example molecule |
| 124 | + mol = read(input_filename) |
| 125 | + |
| 126 | + # Generate conformers |
| 127 | + confs = generate_conformers(mol, 20) |
| 128 | + |
| 129 | + # Provide summary of analysis |
| 130 | + analyse(confs) |
| 131 | + |
| 132 | + # Overlay structures based on common substructure |
| 133 | + query = 'c1cncnc1' |
| 134 | + output_filename = f'superimposed_{input_filename}' |
| 135 | + overlay(confs, query, output_filename) |
0 commit comments