Skip to content

Commit

Permalink
Merge pull request #460 from xchem/m2ms-1202-achristie
Browse files Browse the repository at this point in the history
Adds support for ref-url and rationale in RHS uploaded ComputedMolecules
  • Loading branch information
alanbchristie authored Dec 8, 2023
2 parents 8c1e967 + a883211 commit 701f30a
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 63 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,7 @@ for the viewer's model run the following: -

docker-compose -f docker-compose-migrate.yml up -d

# Then enter the backend container with: -
Then enter the backend container with: -

Then from within the backend container make the migrations
(in this case for the `viewer`)...
Expand Down
139 changes: 77 additions & 62 deletions viewer/cset_upload.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,10 +102,9 @@ def save_pdb_zip(
test_pdb_code = f'{code}#{rand_str}'
zfile_hashvals[code] = rand_str

fn = test_pdb_code + '.pdb'

fn = f'{test_pdb_code}.pdb'
pdb_path = default_storage.save(
'tmp/' + fn, ContentFile(zf.read(filename))
f'tmp/{fn}', ContentFile(zf.read(filename))
)
zfile[test_pdb_code] = pdb_path

Expand Down Expand Up @@ -148,7 +147,7 @@ def process_pdb(self, pdb_code, target, zfile, zfile_hashvals) -> SiteObservatio
pdb_fp = zfile[pdb_code]
pdb_fn = zfile[pdb_code].split('/')[-1]

new_filename = settings.MEDIA_ROOT + 'pdbs/' + pdb_fn
new_filename = f'{settings.MEDIA_ROOT}pdbs/{pdb_fn}'
old_filename = settings.MEDIA_ROOT + pdb_fp
shutil.copy(old_filename, new_filename)

Expand All @@ -162,13 +161,13 @@ def process_pdb(self, pdb_code, target, zfile, zfile_hashvals) -> SiteObservatio
if created:
target_obj = Target.objects.get(title=target)
site_obvs.target_id = target_obj
site_obvs.pdb_info = 'pdbs/' + pdb_fn
site_obvs.pdb_info = f'pdbs/{pdb_fn}'
site_obvs.save()

return site_obvs

# use zfile object for pdb files uploaded in zip
def get_prot(
def get_site_observation(
self, mol, target, compound_set, zfile, zfile_hashvals
) -> Optional[SiteObservation]:
# The returned protein object may be None
Expand All @@ -185,32 +184,45 @@ def get_prot(
zfile_hashvals=zfile_hashvals,
)
else:
name = compound_set.target.title + '-' + pdb_fn

# try to get single exact match
# name.split(':')[0].split('_')[0]
name = f'{compound_set.target.title}-{pdb_fn}'
try:
site_obvs = SiteObservation.objects.get(code__contains=name)
except SiteObservation.DoesNotExist:
# SiteObservation lookup failed.
# Initial SiteObservation lookup failed.
logger.warning(
'Failed to get SiteObservation object (target=%s name=%s)',
compound_set.target.title,
name,
)
# Try an alternative.
# If all else fails then the prot_obj will be 'None'
# Try alternatives.
# If all else fails then the site_obvs will be 'None'
qs = SiteObservation.objects.filter(code__contains=name)
if not qs.exists():
qs = SiteObservation.objects.filter(
code__contains=name.split(':')[0].split('_')[0]
if qs.exists():
logger.info(
'Found SiteObservation containing name=%s qs=%s',
name,
qs,
)
else:
alt_name = name.split(':')[0].split('_')[0]
qs = SiteObservation.objects.filter(code__contains=alt_name)
if qs.exists():
logger.info(
'Found SiteObservation containing alternative name=%s qs=%s',
alt_name,
qs,
)
if qs.count() > 0:
logger.debug(
'Found alternative (target=%s name=%s)',
compound_set.target.title,
name,
)
site_obvs = qs[0]

if not site_obvs:
logger.warning(
'No SiteObservation object (target=%s pdb_fn=%s)',
'No SiteObservation found (target=%s pdb_fn=%s)',
compound_set.target.title,
pdb_fn,
)
Expand All @@ -226,12 +238,7 @@ def create_mol(self, inchi, long_inchi=None, name=None) -> Compound:
cpd = Compound.objects.filter(inchi=inchi)
sanitized_mol = Chem.MolFromInchi(inchi, sanitize=True)

if len(cpd) != 0:
new_mol = cpd[0]
elif len(cpd) == 0:
# add molecule and return the object
new_mol = Compound()

new_mol = cpd[0] if len(cpd) != 0 else Compound()
new_mol.smiles = Chem.MolToSmiles(sanitized_mol)
new_mol.inchi = inchi
if long_inchi:
Expand All @@ -258,7 +265,7 @@ def create_mol(self, inchi, long_inchi=None, name=None) -> Compound:

return new_mol

def set_props(self, cpd, props, compound_set) -> ScoreDescription:
def set_props(self, cpd, props, compound_set) -> List[ScoreDescription]:
if 'ref_mols' and 'ref_pdb' not in list(props.keys()):
raise Exception('ref_mols and ref_pdb not set!')
set_obj = ScoreDescription.objects.filter(computed_set=compound_set)
Expand Down Expand Up @@ -337,88 +344,94 @@ def set_mol(

_ = mol.GetProp('original SMILES')

# Try to get the protein object.
# Try to get the SiteObservation.
# This may fail.
prot = self.get_prot(
prot = self.get_site_observation(
mol, target, compound_set, zfile, zfile_hashvals=zfile_hashvals
)
if not prot:
logger.warning('get_prot() failed to return a Protein object')
logger.warning('get_prot() failed to return a SiteObservation object')

# need to add Compound before saving
# see if anything exists already
existing = ComputedMolecule.objects.filter(
# Need a ComputedMolecule before saving.
# Check if anything exists already...
existing_computed_molecules = ComputedMolecule.objects.filter(
name=name, smiles=smiles, computed_set=compound_set
)

if len(existing) == 1:
computed_molecule: ComputedMolecule = existing[0]
elif len(existing) > 1:
for exist in existing:
computed_molecule: Optional[ComputedMolecule] = None
if len(existing_computed_molecules) == 1:
logger.info(
'Using existing ComputedMolecule %s', existing_computed_molecules[0]
)
computed_molecule = existing_computed_molecules[0]
elif len(existing_computed_molecules) > 1:
logger.warning('Deleting existing ComputedMolecules (more than 1 found')
for exist in existing_computed_molecules:
logger.info('Deleting ComputedMolecule %s', exist)
exist.delete()
computed_molecule = ComputedMolecule()
elif len(existing) == 0:
if not computed_molecule:
logger.info('Creating new ComputedMolecule')
computed_molecule = ComputedMolecule()

assert computed_molecule
computed_molecule.compound = ref_cpd
computed_molecule.computed_set = compound_set
computed_molecule.sdf_info = mol_block
computed_molecule.name = name
computed_molecule.smiles = smiles
# Extract possible reference URL and Rationale
computed_molecule.ref_url = (
mol.GetProp('ref_url') if mol.HasProp('ref_url') else None
)
computed_molecule.rationale = (
mol.GetProp('rationale') if mol.HasProp('rationale') else None
)
# To void the error
# needs to have a value for field "id"
# before this many-to-many relationship can be used.
# We must save this ComputedMolecule before adding inspirations
computed_molecule.save()
for insp_frag in insp_frags:
computed_molecule.computed_inspirations.add(insp_frag)
# Done
computed_molecule.save()

return computed_molecule

def get_submission_info(self, description_mol) -> ComputedSetSubmitter:
y_m_d = description_mol.GetProp('generation_date').split('-')

submitter = ComputedSetSubmitter.objects.get_or_create(
return ComputedSetSubmitter.objects.get_or_create(
name=description_mol.GetProp('submitter_name'),
method=description_mol.GetProp('method'),
email=description_mol.GetProp('submitter_email'),
institution=description_mol.GetProp('submitter_institution'),
generation_date=datetime.date(int(y_m_d[0]), int(y_m_d[1]), int(y_m_d[2])),
)[0]

return submitter

def process_mol(
self, mol, target, compound_set, filename, zfile=None, zfile_hashvals=None
) -> ScoreDescription:
) -> List[ScoreDescription]:
cpd = self.set_mol(mol, target, compound_set, filename, zfile, zfile_hashvals)
other_props = mol.GetPropsAsDict()
score_description = self.set_props(cpd, other_props, compound_set)
return self.set_props(cpd, other_props, compound_set)

return score_description

def set_descriptions(self, filename, computed_set: ComputedSet) -> List[str]:
def set_descriptions(
self, filename, computed_set: ComputedSet
) -> List[Chem.rdchem.Mol]:
suppl = Chem.SDMolSupplier(str(filename))
description_mol = suppl[0]

mols = []

for i in range(1, len(suppl)):
mols.append(suppl[i])

mols = [suppl[i] for i in range(1, len(suppl))]
descriptions_needed = list(
set(
[
item
for sublist in [list(m.GetPropsAsDict().keys()) for m in mols]
for item in sublist
]
)
{
item
for sublist in [list(m.GetPropsAsDict().keys()) for m in mols]
for item in sublist
}
)

submitter = self.get_submission_info(description_mol)

description_dict = description_mol.GetPropsAsDict()
version = description_mol.GetProp('_Name')
computed_set.spec_version = version.split('_')[-1]
Expand Down Expand Up @@ -472,6 +485,7 @@ def task(self) -> ComputedSet:
f' (unique_name="{unique_name}" len_existing={len_existing})'
)
else:
logger.info('Creating new ComputedSet')
computed_set = ComputedSet()

text_scores = TextScoreValues.objects.filter(score__computed_set=computed_set)
Expand All @@ -498,15 +512,17 @@ def task(self) -> ComputedSet:
# Here the ComputedSet owner will take on a default (anonymous) value.
assert settings.AUTHENTICATE_UPLOAD is False
computed_set.save()
logger.info('%s', computed_set)

# set descriptions and get all other mols back
mols_to_process = self.set_descriptions(
filename=sdf_filename, computed_set=computed_set
)

# process every other mol
logger.info('%s mols_to_process=%s', computed_set, len(mols_to_process))
for i in range(len(mols_to_process)):
self.process_mol(
_ = self.process_mol(
mols_to_process[i],
self.target,
computed_set,
Expand All @@ -515,17 +531,15 @@ def task(self) -> ComputedSet:
self.zfile_hashvals,
)

# check that molecules have been added to the compound set
_ = ComputedMolecule.objects.filter(computed_set=computed_set)

# check compound set folder exists.
cmp_set_folder = os.path.join(settings.MEDIA_ROOT, 'compound_sets')
if not os.path.isdir(cmp_set_folder):
logger.info('Making ComputedSet folder (%s)', cmp_set_folder)
os.mkdir(cmp_set_folder)

# move and save the compound set
new_filename = (
settings.MEDIA_ROOT + 'compound_sets/' + sdf_filename.split('/')[-1]
f'{settings.MEDIA_ROOT}compound_sets/' + sdf_filename.split('/')[-1]
)
shutil.copy(sdf_filename, new_filename)
# os.renames(sdf_filename, new_filename)
Expand All @@ -537,6 +551,7 @@ def task(self) -> ComputedSet:
# print(list(set(old_mols)))

for old_mol in old_mols:
logger.info('Deleting old molecule %s', old_mol)
old_mol.delete()

return computed_set
Expand Down
30 changes: 30 additions & 0 deletions viewer/migrations/0022_add_computedMolecule_rationale.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
# Generated by Django 3.2.20 on 2023-12-08 10:45

from django.db import migrations, models


class Migration(migrations.Migration):
dependencies = [
('viewer', '0021_fix_computedset_table'),
]

operations = [
migrations.AddField(
model_name='computedmolecule',
name='rationale',
field=models.TextField(
blank=True,
help_text='An optional rationale for this molecule',
null=True,
),
),
migrations.AddField(
model_name='computedmolecule',
name='ref_url',
field=models.TextField(
blank=True,
help_text='An optional url linking to the reference for this molecule',
null=True,
),
),
]
10 changes: 10 additions & 0 deletions viewer/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -950,6 +950,16 @@ class ComputedMolecule(models.Model):
name = models.CharField(max_length=50)
smiles = models.CharField(max_length=255)
computed_inspirations = models.ManyToManyField(SiteObservation, blank=True)
ref_url = models.TextField(
null=True,
blank=True,
help_text="An optional url linking to the reference for this molecule",
)
rationale = models.TextField(
null=True,
blank=True,
help_text="An optional rationale for this molecule",
)

def __str__(self) -> str:
return f"{self.smiles}"
Expand Down

0 comments on commit 701f30a

Please sign in to comment.