Skip to content

Commit

Permalink
Add a workflow to generate the header file from the SMILES templates …
Browse files Browse the repository at this point in the history
…file (#6)
  • Loading branch information
ricrogz authored Aug 16, 2023
1 parent 9e4634b commit 03d3d7d
Show file tree
Hide file tree
Showing 12 changed files with 334 additions and 182 deletions.
76 changes: 76 additions & 0 deletions .github/workflows/update_templates.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
name: Update the header file and image gallery
on:
workflow_dispatch: ~
workflow_call: ~
push:
branches:
- main
paths:
- templates.smi

permissions:
contents: write

env:
tpl_hdr: 'template_smiles.h'
gallery_file: 'gallery.md'
img_dir: 'img'

jobs:
Check_new_templates:
runs-on: ubuntu-latest
steps:
- name: Checkout repo
uses: actions/checkout@v3
- name: Set up Python & RDKit
uses: mamba-org/setup-micromamba@v1
with:
micromamba-version: 'latest'
environment-file: environment.yml
generate-run-shell: true
cache-environment: true
post-cleanup: 'none'
- name: Run basic checks
if: success()
run: pytest -n=auto checks/test_requisites.py
shell: micromamba-shell {0}
- name: Run check for duplicates
if: success()
run: pytest -sv checks/test_duplicates.py
shell: micromamba-shell {0}
- name: Generate the header file
id: header-gen
if: success()
run: python3 src/header_generation.py
shell: micromamba-shell {0}
- name: Test the header file
if: ${{ steps.header-gen.outputs.header_changed == 'true' && success() }}
run: g++ -std=c++17 -I. checks/test_program.cpp -o test && ./test
shell: micromamba-shell {0}
- name: Update the gallery
if: ${{ steps.header-gen.outputs.header_changed == 'true' && success() }}
run: python3 src/update_gallery.py
shell: micromamba-shell {0}

- name: Commit and push the new header file
if: ${{ steps.header-gen.outputs.header_changed == 'true' && success() }}
run: |
if ! git diff --quiet --exit-code ${{ env.tpl_hdr }}; then
git config user.name "github-actions[bot]"
git config user.email "github-actions[bot]@noreply.github.com"
git checkout main
git pull
git add ${{ env.tpl_hdr }} ${{ env.gallery_file }} ${{ env.img_dir }}
coauthor=$(git log -1 --pretty=format:'%an <%ae>' -- ${{ env.tpl_hdr }})
> commitmsg
echo -e "[bot] Update molecular templates header\n\n" >> commitmsg
echo -e "Co-authored-by: ${coauthor}\n" >> commitmsg
git commit -F commitmsg
git push
fi
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
**/__pycache__
8 changes: 3 additions & 5 deletions checks/test_duplicates.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,8 @@
import pytest

from common_code import load_templates
from rdkit import Chem
from rdkit.Chem import RegistrationHash

from common_code import load_templates


@pytest.mark.xfail(reason='we know we have some duplicates')
def test_check_duplicates():
Expand All @@ -19,8 +17,8 @@ def test_check_duplicates():
duplicates = 0
for i, smiles, cxsmiles in load_templates():
mol = Chem.MolFromSmiles(cxsmiles)
mol_layers = RegistrationHash.GetMolLayers(mol,
enable_tautomer_hash_v2=True)
mol_layers = RegistrationHash.GetMolLayers(
mol, enable_tautomer_hash_v2=True)
mol_hash = RegistrationHash.GetMolHash(mol_layers)
if (seen := all_templates.get(mol_hash, None)) is not None:
seen_idx, seen_smiles = seen
Expand Down
15 changes: 15 additions & 0 deletions checks/test_program.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
#include <iostream>

#include "template_smiles.h"

int main()
{
size_t i = 0;
for (const auto& cxsmiles : TEMPLATE_SMILES) {
std::cout << "Template #" << ++i << ": " << cxsmiles << std::endl;
}

std::cout << "All templates in header listed, check passed." << std::endl;

return 0;
}
4 changes: 1 addition & 3 deletions checks/test_requisites.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,6 @@
import pytest

from rdkit import Chem

from common_code import load_templates
from rdkit import Chem


def generate_params():
Expand Down
1 change: 1 addition & 0 deletions environment.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ channels:
- conda-forge
dependencies:
- gitpython
- gxx
- python=3.11
- pytest
- pytest-xdist
Expand Down
83 changes: 83 additions & 0 deletions src/header_generation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
import filecmp
import os
import shutil
import tempfile

from rdkit import Chem

HEADER_FILE = 'template_smiles.h'
TEMPLATE_FILE = 'templates.smi'

HEADER_TEXT = """//
// Copyright (C) 2023 Schrödinger, LLC
//
// @@ All Rights Reserved @@
// This file is part of the RDKit.
// The contents are covered by the terms of the BSD license
// which is included in the file license.txt, found at the root
// of the RDKit source tree.
//
// THIS FILE IS AUTOMATICALLY GENERATED. It contains templates used
// in 2D coordinate generation. If you want to contribute to these
// templates, please refer to instructions in:
// https://github.com/rdkit/molecular_templates/blob/main/README.md
//
#include <string>
#include <vector>
// clang-format off
const std::vector<std::string> TEMPLATE_SMILES = {
"""


def clean_smiles(template_smiles):
"""
Translate all atoms into dummy atoms so that templates are not atom-specific.
"""
template = Chem.MolFromSmiles(template_smiles)
for atom in template.GetAtoms():
atom.SetAtomicNum(0)

# TO_DO: replace bonds with query bonds

return Chem.MolToCXSmiles(template)


def generate_header(generated_header_path):
with open(generated_header_path, 'w') as f_out:
f_out.write(HEADER_TEXT)
with open(TEMPLATE_FILE, 'r') as f_in:
for line in f_in:
if not (cxsmiles := line.strip()):
continue

# TO_DO: Clean smiles to make them atom-type and bond-type agnostic
# cxsmiles = clean_smiles(cxsmiles)

f_out.write(f' "{cxsmiles}",\n')
f_out.write('};\n// clang-format on\n')
print(f"Successfully generated {generated_header_path}")


def check_header_changed(header_file_path):
if header_changed := not filecmp.cmp(header_file_path, HEADER_FILE):
if gh_output := os.environ.get('GITHUB_OUTPUT', ''):
with open(gh_output, 'a') as f:
f.write(f'header_changed={str(header_changed).lower()}')
print(f'Header file has {"" if header_changed else "not "}changed')
return header_changed


def main():
with tempfile.TemporaryDirectory() as tmpdir:
generated_header_path = os.path.join(tmpdir, HEADER_FILE)
generate_header(generated_header_path)

if check_header_changed(generated_header_path):
shutil.copy(generated_header_path, HEADER_FILE)
print(f'Updated {HEADER_FILE} with the generated header')


if __name__ == '__main__':
main()
21 changes: 18 additions & 3 deletions src/img_generation.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@

import git
from rdkit import Chem
from rdkit.Chem.Draw import MolToImage
from rdkit.Chem.Draw import MolDraw2DSVG, MolToImage

from imgur_upload import upload_img

Expand Down Expand Up @@ -42,7 +42,7 @@ def get_new_templates():
yield line_nums[cxsmiles], cxsmiles


def draw_mol(cxsmiles, idx, output_dir='.'):
def draw_png(cxsmiles, idx, output_dir='.'):
smiles = cxsmiles.split()[0]
legend = f"#{idx} {smiles}"
mol = Chem.MolFromSmiles(cxsmiles)
Expand All @@ -57,6 +57,21 @@ def draw_mol(cxsmiles, idx, output_dir='.'):
return fpath, legend


def draw_svg(cxsmiles, fname, legend):
mol = Chem.MolFromSmiles(cxsmiles)
if not mol.GetNumConformers():
raise ValueError("SMILES must include coordinates")

print(f'Creating SVG image {legend}: {fname}')

drawer = MolDraw2DSVG(img_width, img_height)
drawer.DrawMolecule(mol, legend=legend)
drawer.FinishDrawing()

with open(fname, 'w') as f:
f.write(drawer.GetDrawingText())


def export_image_urls(template_imgs):
if gh_output := os.environ.get('GITHUB_OUTPUT', ''):
markdown_imgs = [f'![{title}]({url})' for url, title in template_imgs]
Expand All @@ -68,7 +83,7 @@ def main():
template_imgs = []
with tempfile.TemporaryDirectory() as tmpdir:
for idx, cxsmiles in get_new_templates():
fpath, title = draw_mol(cxsmiles, idx, tmpdir)
fpath, title = draw_png(cxsmiles, idx, tmpdir)
img_url, title = upload_img(fpath, title)
template_imgs.append((img_url, title))

Expand Down
2 changes: 1 addition & 1 deletion src/imgur_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
UPLOAD_RETRIES = 5

IMGUR_CLIENT_ID = os.environ.get('IMGUR_CLIENT_ID', None)
IMGUR_TOKEN = json.loads(os.environ.get('IMGUR_TOKEN', ''))
IMGUR_TOKEN = json.loads(os.environ.get('IMGUR_TOKEN', '[]'))


def upload_img(fpath, title):
Expand Down
95 changes: 0 additions & 95 deletions src/make_templates_header.py

This file was deleted.

Loading

0 comments on commit 03d3d7d

Please sign in to comment.