diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 886bcbeb..b2ecae3c 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -42,7 +42,7 @@ repos: # the user has done 90% of the lint checks before the code # hits the server. - repo: https://github.com/pycqa/pylint - rev: v2.17.0 + rev: v3.0.2 hooks: - id: pylint additional_dependencies: diff --git a/README.md b/README.md index 6f0f3a43..9caa9bb5 100644 --- a/README.md +++ b/README.md @@ -201,29 +201,30 @@ at `/code/logs`. ## Database migrations The best approach is to spin-up the development backend (locally) using -`docker-compose` and then shell into Django. For example, -to make new migrations called "add_job_request_start_and_finish_times" +`docker-compose` with the custom *migration* compose file and then shell into Django. +For example, to make new migrations called "add_job_request_start_and_finish_times" for the viewer's model run the following: - > Before starting postgres, if you need to, remove any pre-existing local database (if one exists) with `rm -rf ./data/postgresl` - docker-compose up -d + docker-compose -f docker-compose-migrate.yml up -d + +# Then enter the backend container with: - Then from within the backend container make the migrations (in this case for the `viewer`)... - docker-compose exec backend bash - + docker-compose -f docker-compose-migrate.yml exec backend bash python manage.py makemigrations viewer --name "add_job_request_start_and_finish_times" Exit the container and tear-down the deployment: - - docker-compose down + docker-compose -f docker-compose-migrate.yml down -> The migrations will be written to your clone's filesystem as the clone directory - is mapped into the container as a volume. You just need to commit the - migrations that have been written to the local directory to Git. +> The migrations will be written to your clone's filesystem as the project directory + is mapped into the container as a volume at `/code`. You just need to commit the + migrations that have been written to the corresponding migrations directory. ## Sentry error logging [Sentry] can be used to log errors in the backend container image. diff --git a/docker-compose-migrate.yml b/docker-compose-migrate.yml new file mode 100644 index 00000000..419a8125 --- /dev/null +++ b/docker-compose-migrate.yml @@ -0,0 +1,77 @@ +--- + +# You typically create .env file to populate the +# sensitive variables for the backend deployment. +# Then bring the containers up with: - +# docker-compose -f docker-compose-migrate.yml up -d +# Then enter the backend container with: - +# docker-compose exec backend bash +# Then run the migrations with: - +# python manage.py makemigrations viewer --name "add_job_request_start_and_finish_times" + +version: '3' + +services: + + # The database + database: + image: postgres:12.16-alpine3.18 + container_name: database + volumes: + - ./data/postgresql/data:/var/lib/postgresql/data + environment: + POSTGRES_PASSWORD: fragalysis + POSTGRES_DB: frag + PGDATA: /var/lib/postgresql/data/pgdata + ports: + - "5432:5432" + healthcheck: + test: pg_isready -U postgres -d frag + interval: 10s + timeout: 2s + retries: 5 + start_period: 10s + + # The stack backend + backend: + image: ${BE_NAMESPACE:-xchem}/fragalysis-backend:${BE_IMAGE_TAG:-latest} + container_name: backend + build: + context: . + dockerfile: Dockerfile + command: /bin/bash /code/launch-stack.sh + volumes: + - ./data/logs:/code/logs/ + - ./data/media:/code/media/ + - .:/code/ + environment: + AUTHENTICATE_UPLOAD: ${AUTHENTICATE_UPLOAD:-True} + POSTGRESQL_USER: postgres + # Celery tasks need to run synchronously + CELERY_TASK_ALWAYS_EAGER: 'True' + # Error reporting and default/root log-level + FRAGALYSIS_BACKEND_SENTRY_DNS: ${FRAGALYSIS_BACKEND_SENTRY_DNS} + LOGGING_FRAMEWORK_ROOT_LEVEL: ${LOGGING_FRAMEWORK_ROOT_LEVEL:-INFO} + # Keycloak configuration + OIDC_KEYCLOAK_REALM: ${OIDC_KEYCLOAK_REALM} + OIDC_RP_CLIENT_ID: ${OIDC_RP_CLIENT_ID:-fragalysis-local} + OIDC_RP_CLIENT_SECRET: ${OIDC_RP_CLIENT_SECRET} + OIDC_AS_CLIENT_ID: ${OIDC_AS_CLIENT_ID:-account-server-api} + OIDC_DM_CLIENT_ID: ${OIDC_DM_CLIENT_ID:-data-manager-api} + OIDC_RENEW_ID_TOKEN_EXPIRY_MINUTES: '210' + # Squonk configuration + SQUONK2_VERIFY_CERTIFICATES: 'No' + SQUONK2_UNIT_BILLING_DAY: 3 + SQUONK2_PRODUCT_FLAVOUR: BRONZE + SQUONK2_SLUG: fs-local + SQUONK2_ORG_OWNER: ${SQUONK2_ORG_OWNER} + SQUONK2_ORG_OWNER_PASSWORD: ${SQUONK2_ORG_OWNER_PASSWORD} + SQUONK2_ORG_UUID: ${SQUONK2_ORG_UUID} + SQUONK2_UI_URL: ${SQUONK2_UI_URL} + SQUONK2_DMAPI_URL: ${SQUONK2_DMAPI_URL} + SQUONK2_ASAPI_URL: ${SQUONK2_ASAPI_URL} + ports: + - "8080:80" + depends_on: + database: + condition: service_healthy diff --git a/fragalysis/settings.py b/fragalysis/settings.py index e2ad5a32..e81c87e0 100644 --- a/fragalysis/settings.py +++ b/fragalysis/settings.py @@ -354,6 +354,10 @@ # dedicated Discourse server. DISCOURSE_DEV_POST_SUFFIX = os.environ.get("DISCOURSE_DEV_POST_SUFFIX", '') +# Where all the computed set SDF files are hosted (relative to the MEDIA_ROOT). +# Used primarily by the Computed-Set upload logic. +COMPUTED_SET_SDF_ROOT = "computed_set_sdfs/" + # An optional URL that identifies the URL to a prior stack. # If set, it's typically something like "https://fragalysis.diamond.ac.uk". # It can be blank, indicating there is no legacy service. diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index bd274114..213a15b8 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -1,18 +1,17 @@ +import logging import os -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "fragalysis.settings") -import django -django.setup() -from django.conf import settings - -import zipfile import datetime import ast import shutil +import zipfile +import uuid +from typing import Any, Dict, List, Optional, Tuple +os.environ.setdefault("DJANGO_SETTINGS_MODULE", "fragalysis.settings") +import django +django.setup() +from django.conf import settings -# import the logging library -import logging -# Get an instance of a logger logger = logging.getLogger(__name__) from django.core.files.storage import default_storage @@ -20,7 +19,6 @@ from rdkit import Chem from rdkit.Chem import Crippen, Descriptors -import uuid from viewer.models import ( Compound, @@ -36,7 +34,7 @@ ) -def dataType(a_str): +def dataType(a_str: str) -> str: lean_str = a_str.strip() if len(lean_str) == 0: return 'BLANK' try: @@ -63,7 +61,7 @@ def dataType(a_str): class PdbOps: - def save_pdb_zip(self, pdb_file): + def save_pdb_zip(self, pdb_file) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, str]]]: zfile = None zfile_hashvals = None if pdb_file: @@ -71,7 +69,6 @@ def save_pdb_zip(self, pdb_file): zip_lst = zf.namelist() zfile = {} zfile_hashvals = {} - print(zip_lst) for filename in zip_lst: # only handle pdb files if filename.split('.')[-1] == 'pdb': @@ -79,7 +76,6 @@ def save_pdb_zip(self, pdb_file): code = filename.split('/')[-1].replace('.pdb', '') test_pdb_code = filename.split('/')[-1].replace('.pdb', '') test_site_obvs_objs = SiteObservation.objects.filter(code=test_pdb_code) - print([c.code for c in test_site_obvs_objs]) if len(test_site_obvs_objs) != 0: # make a unique pdb code as not to overwrite existing object @@ -99,11 +95,8 @@ def save_pdb_zip(self, pdb_file): return zfile, zfile_hashvals - - def run(self, params): - zfile, zfile_hashval = self.save_pdb_zip(params['pdb_zip']) - - return zfile, zfile_hashval + def run(self, params) -> Tuple[Optional[Dict[str, Any]], Optional[Dict[str, str]]]: + return self.save_pdb_zip(params['pdb_zip']) class MolOps: @@ -118,7 +111,7 @@ def __init__(self, user_id, sdf_filename, submitter_name, submitter_method, targ self.zfile = zfile self.zfile_hashvals = zfile_hashvals - def process_pdb(self, pdb_code, target, zfile, zfile_hashvals): + def process_pdb(self, pdb_code, target, zfile, zfile_hashvals) -> SiteObservation: for key in zfile_hashvals.keys(): if key == pdb_code: @@ -145,7 +138,7 @@ def process_pdb(self, pdb_code, target, zfile, zfile_hashvals): return site_obvs # use zfile object for pdb files uploaded in zip - def get_prot(self, mol, target, compound_set, zfile, zfile_hashvals): + def get_prot(self, mol, target, compound_set, zfile, zfile_hashvals) -> Optional[SiteObservation]: # The returned protein object may be None pdb_fn = mol.GetProp('ref_pdb').split('/')[-1] @@ -162,8 +155,8 @@ def get_prot(self, mol, target, compound_set, zfile, zfile_hashvals): try: site_obvs = SiteObservation.objects.get(code__contains=name) except SiteObservation.DoesNotExist: - # Protein lookup failed. - logger.warning('Failed to get Protein object (target=%s name=%s)', + # SiteObservation lookup failed. + logger.warning('Failed to get SiteObservation object (target=%s name=%s)', compound_set.target.title, name) # Try an alternative. # If all else fails then the prot_obj will be 'None' @@ -174,13 +167,14 @@ def get_prot(self, mol, target, compound_set, zfile, zfile_hashvals): site_obvs = qs[0] if not site_obvs: - logger.warning('No Protein object (target=%s pdb_fn=%s)', + logger.warning('No SiteObservation object (target=%s pdb_fn=%s)', compound_set.target.title, pdb_fn) return site_obvs - def create_mol(self, inchi, long_inchi=None, name=None): - # check for an existing compound + def create_mol(self, inchi, long_inchi=None, name=None) -> Compound: + + # check for an existing compound, returning a Compound if long_inchi: cpd = Compound.objects.filter(long_inchi=long_inchi) sanitized_mol = Chem.MolFromInchi(long_inchi, sanitize=True) @@ -220,11 +214,12 @@ def create_mol(self, inchi, long_inchi=None, name=None): return new_mol - - def set_props(self, cpd, props, compound_set): + def set_props(self, cpd, props, compound_set) -> ScoreDescription: if 'ref_mols' and 'ref_pdb' not in list(props.keys()): raise Exception('ref_mols and ref_pdb not set!') set_obj = ScoreDescription.objects.filter(computed_set=compound_set) + assert set_obj + set_props_list = [s.name for s in set_obj] for key in list(props.keys()): if key in set_props_list not in ['ref_mols', 'ref_pdb', 'original SMILES']: @@ -241,10 +236,20 @@ def set_props(self, cpd, props, compound_set): return set_obj - def set_mol(self, mol, target, compound_set, filename, zfile=None, zfile_hashvals=None): + def set_mol(self, + mol, + target, + compound_set, + filename, + zfile=None, + zfile_hashvals=None) -> ComputedMolecule: # Don't need... del filename + assert mol + assert target + assert compound_set + smiles = Chem.MolToSmiles(mol) inchi = Chem.inchi.MolToInchi(mol) name = mol.GetProp('_Name') @@ -253,7 +258,7 @@ def set_mol(self, mol, target, compound_set, filename, zfile=None, zfile_hashval long_inchi = inchi inchi = inchi[:254] - ref_cpd = self.create_mol(inchi, name=name, long_inchi=long_inchi) + ref_cpd: Compound = self.create_mol(inchi, name=name, long_inchi=long_inchi) mol_block = Chem.MolToMolBlock(mol) @@ -265,15 +270,14 @@ def set_mol(self, mol, target, compound_set, filename, zfile=None, zfile_hashval # try exact match first try: site_obvs = SiteObservation.objects.get( - prot_id__code__contains=str(compound_set.target.title + '-' + i), - prot_id__target_id=compound_set.target, + code__contains=str(compound_set.target.title + '-' + i), + experiment__experiment_upload__target_id=compound_set.target, ) ref = site_obvs except SiteObservation.DoesNotExist: - qs = SiteObservation.objects.filter( - prot_id__code__contains=str(compound_set.target.title + '-' + i.split(':')[0].split('_')[0]), - prot_id__target_id=compound_set.target, + code__contains=str(compound_set.target.title + '-' + i.split(':')[0].split('_')[0]), + experiment__experiment_upload__target_id=compound_set.target, ) if not qs.exists(): raise Exception('No matching molecules found for inspiration frag ' + i) # pylint: disable=raise-missing-from @@ -300,31 +304,32 @@ def set_mol(self, mol, target, compound_set, filename, zfile=None, zfile_hashval existing = ComputedMolecule.objects.filter(name=name, smiles=smiles, computed_set=compound_set) if len(existing) == 1: - cpd = existing[0] - if len(existing) > 1: + computed_molecule: ComputedMolecule = existing[0] + elif len(existing) > 1: for exist in existing: exist.delete() - cpd = ComputedMolecule() + computed_molecule = ComputedMolecule() elif len(existing) == 0: - cpd = ComputedMolecule() - - cpd.compound = ref_cpd - cpd.computed_set = compound_set - cpd.sdf_info = mol_block - cpd.name = name - cpd.smiles = smiles - cpd.pdb = prot - cpd.save() - + computed_molecule = ComputedMolecule() + + computed_molecule.compound = ref_cpd + computed_molecule.computed_set = compound_set + computed_molecule.sdf_info = mol_block + computed_molecule.name = name + computed_molecule.smiles = smiles + # To void the error + # needs to have a value for field "id" + # before this many-to-many relationship can be used. + # We must save this ComputedMolecule before adding inspirations + computed_molecule.save() for insp_frag in insp_frags: - cpd.computed_inspirations.add(insp_frag) - - cpd.save() + computed_molecule.computed_inspirations.add(insp_frag) + computed_molecule.save() - return cpd + return computed_molecule - def get_submission_info(self, description_mol): + def get_submission_info(self, description_mol) -> ComputedSetSubmitter: y_m_d = description_mol.GetProp('generation_date').split('-') submitter = ComputedSetSubmitter.objects.get_or_create(name=description_mol.GetProp('submitter_name'), @@ -338,14 +343,20 @@ def get_submission_info(self, description_mol): return submitter - def process_mol(self, mol, target, compound_set, filename, zfile=None, zfile_hashvals=None): + def process_mol(self, + mol, + target, + compound_set, + filename, + zfile=None, + zfile_hashvals=None) -> ScoreDescription: cpd = self.set_mol(mol, target, compound_set, filename, zfile, zfile_hashvals) other_props = mol.GetPropsAsDict() - compound_set = self.set_props(cpd, other_props, compound_set) + score_description = self.set_props(cpd, other_props, compound_set) - return compound_set + return score_description - def set_descriptions(self, filename, compound_set): + def set_descriptions(self, filename, computed_set: ComputedSet) -> List[str]: suppl = Chem.SDMolSupplier(str(filename)) description_mol = suppl[0] @@ -362,26 +373,25 @@ def set_descriptions(self, filename, compound_set): description_dict = description_mol.GetPropsAsDict() version = description_mol.GetProp('_Name') - compound_set.spec_version = version.split('_')[-1] + computed_set.spec_version = version.split('_')[-1] method = description_mol.GetProp('ref_url') - compound_set.method_url = method - compound_set.submitter = submitter - compound_set.save() + computed_set.method_url = method + computed_set.submitter = submitter + computed_set.save() for key in list(description_dict.keys()): if key in descriptions_needed and key not in ['ref_mols', 'ref_pdb', 'index', 'Name', 'original SMILES']: - _ = ScoreDescription.objects.get_or_create(computed_set=compound_set, + _ = ScoreDescription.objects.get_or_create(computed_set=computed_set, name=key, description=description_dict[key], ) return mols - def task(self): + def task(self) -> ComputedSet: user = User.objects.get(id=self.user_id) sdf_filename = str(self.sdf_filename) - # create a new compound set set_name = ''.join(sdf_filename.split('/')[-1].replace('.sdf', '').split('_')[1:]) unique_name = "".join(self.submitter_name.split()) + '-' + "".join(self.submitter_method.split()) @@ -392,38 +402,36 @@ def task(self): if len_existing == 1: logger.info('Using existing ComputedSet') - compound_set = existing[0] + computed_set = existing[0] elif len_existing > 1: raise Exception('Too many ComputedSet instances exist' f' (unique_name="{unique_name}" len_existing={len_existing})') else: - compound_set = ComputedSet() + computed_set = ComputedSet() - text_scores = TextScoreValues.objects.filter(score__computed_set=compound_set) - num_scores = NumericalScoreValues.objects.filter(score__computed_set=compound_set) + text_scores = TextScoreValues.objects.filter(score__computed_set=computed_set) + num_scores = NumericalScoreValues.objects.filter(score__computed_set=computed_set) old_mols = [o.compound for o in text_scores] old_mols.extend([o.compound for o in num_scores]) - print(list(set(old_mols))) - compound_set.name = set_name + computed_set.name = set_name matching_target = Target.objects.get(title=self.target) - compound_set.target = matching_target - ver = float(self.version.strip('ver_')) - compound_set.spec_version = ver - compound_set.unique_name = "".join(self.submitter_name.split()) + '-' + "".join(self.submitter_method.split()) - compound_set.owner_user = user - compound_set.save() + computed_set.target = matching_target + computed_set.spec_version = float(self.version.strip('ver_')) + computed_set.unique_name = "".join(self.submitter_name.split()) + '-' + "".join(self.submitter_method.split()) + computed_set.owner_user = user + computed_set.save() # set descriptions and get all other mols back - mols_to_process = self.set_descriptions(filename=sdf_filename, compound_set=compound_set) + mols_to_process = self.set_descriptions(filename=sdf_filename, computed_set=computed_set) # process every other mol for i in range(0, len(mols_to_process)): - self.process_mol(mols_to_process[i], self.target, compound_set, sdf_filename, self.zfile, self.zfile_hashvals) + self.process_mol(mols_to_process[i], self.target, computed_set, sdf_filename, self.zfile, self.zfile_hashvals) # check that molecules have been added to the compound set - _ = ComputedMolecule.objects.filter(computed_set=compound_set) + _ = ComputedMolecule.objects.filter(computed_set=computed_set) # check compound set folder exists. cmp_set_folder = os.path.join(settings.MEDIA_ROOT, 'compound_sets') @@ -434,8 +442,8 @@ def task(self): new_filename = settings.MEDIA_ROOT + 'compound_sets/' + sdf_filename.split('/')[-1] shutil.copy(sdf_filename, new_filename) # os.renames(sdf_filename, new_filename) - compound_set.submitted_sdf = new_filename - compound_set.save() + computed_set.submitted_sdf = new_filename + computed_set.save() # old_mols = [o.compound for o in old_s2] # old_mols.extend([o.compound for o in old_s1]) @@ -444,10 +452,10 @@ def task(self): for old_mol in old_mols: old_mol.delete() - return compound_set + return computed_set -def blank_mol_vals(sdf_file): +def blank_mol_vals(sdf_file) -> Tuple[str, str, str]: """Returns the submitter name, method and version (_Name) if present. If not present the corresponding values are empty strings. """ diff --git a/viewer/migrations/0021_fix_computedset_table.py b/viewer/migrations/0021_fix_computedset_table.py new file mode 100644 index 00000000..79bbdb04 --- /dev/null +++ b/viewer/migrations/0021_fix_computedset_table.py @@ -0,0 +1,32 @@ +# Generated by Django 3.2.20 on 2023-11-24 10:38 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('viewer', '0020_remove_jobfiletransfer_transfer_spec'), + ] + + operations = [ + migrations.RemoveField( + model_name='computedmolecule', + name='site_observation', + ), + migrations.AddField( + model_name='computedmolecule', + name='computed_inspirations', + field=models.ManyToManyField(blank=True, to='viewer.SiteObservation'), + ), + migrations.AlterField( + model_name='computedset', + name='spec_version', + field=models.FloatField(help_text='The version of the SDF file format specification'), + ), + migrations.AlterField( + model_name='computedset', + name='submitted_sdf', + field=models.FileField(help_text='The SDF file containing the computed set', max_length=255, upload_to='computed_set_sdfs/'), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index 8e7a1a69..1e8caf08 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -29,6 +29,8 @@ from .managers import CanonSiteDataManager from .managers import CanonSiteConfDataManager +from fragalysis.settings import COMPUTED_SET_SDF_ROOT + logger = logging.getLogger(__name__) @@ -666,7 +668,7 @@ def __str__(self) -> str: return f"{self.user}" def __repr__(self) -> str: - return "" % (self.id, self.user, self.uuid) + return "" % (self.uuid, self.user) # computed sets = sets of poses calculated computationally @@ -691,9 +693,9 @@ class ComputedSet(models.Model): name = models.CharField(max_length=50, unique=True, primary_key=True) target = models.ForeignKey(Target, null=True, on_delete=models.CASCADE) - submitted_sdf = models.FileField(upload_to="compound_sets/", max_length=255, - help_text="The sdf file containing the computed set") - spec_version = models.FloatField(help_text="The version of the sdf file format specification") + submitted_sdf = models.FileField(upload_to=COMPUTED_SET_SDF_ROOT, max_length=255, + help_text="The SDF file containing the computed set") + spec_version = models.FloatField(help_text="The version of the SDF file format specification") method_url = models.TextField(max_length=1000, null=True, help_text="A url linking to a write-up of the methodology used to create the" " computed set") @@ -730,7 +732,7 @@ def __str__(self) -> str: return f"{self.name}" def __repr__(self) -> str: - return "" % (self.id, self.name, self.target) + return "" % (self.name, self.target, self.submitter) class ComputedMolecule(models.Model): @@ -741,8 +743,7 @@ class ComputedMolecule(models.Model): computed_set = models.ForeignKey(ComputedSet, on_delete=models.CASCADE) name = models.CharField(max_length=50) smiles = models.CharField(max_length=255) - site_observation = models.ForeignKey(SiteObservation, on_delete=models.PROTECT, null=True) - + computed_inspirations = models.ManyToManyField(SiteObservation, blank=True) def __str__(self) -> str: return f"{self.smiles}" @@ -751,7 +752,6 @@ def __repr__(self) -> str: return "" % (self.id, self.smiles, self.name, self.compound) - class ScoreDescription(models.Model): """The names and descriptions of scores that the user uploads with each computed set molecule. """ @@ -781,7 +781,6 @@ def __repr__(self) -> str: return "" % (self.id, self.score, self.value, self.compound) - class TextScoreValues(models.Model): """The values of text scores that the user uploads with each computed set molecule. """ diff --git a/viewer/tasks.py b/viewer/tasks.py index d6da850a..611bfcb5 100644 --- a/viewer/tasks.py +++ b/viewer/tasks.py @@ -72,27 +72,25 @@ def process_compound_set(validate_output): Returns ------- - If successful, a dictionary that contains the fields: - + If successful, a tuple that contains the fields: - - process_stage: with a value of 'process' - process_type: with a vlue of 'cset' - compound_set_name: the compound set name (its primary key) + A value of 'process' + the compound set name (its primary key) Otherwise (i.e. not validated)... Returns the response from 'validate_compound_set()' """ - process_stage, process_type, validate_dict, validated, params = validate_output + process_stage, validate_dict, validated, params = validate_output logger.info('process_compound_set() ENTER') - logger.info('process_compound_set() process_type=%s', process_type) logger.info('process_compound_set() validated=%s', validated) logger.info('process_compound_set() params=%s', params) if not validated: logger.warning('process_compound_set() EXIT params=%s (not validated)', params) - return process_stage, 'cset', validate_dict, validated + return process_stage, validate_dict, validated submitter_name, submitter_method, blank_version = blank_mol_vals(params['sdf']) zfile, zfile_hashvals = PdbOps().run(params) @@ -116,9 +114,7 @@ def process_compound_set(validate_output): compound_set = save_mols.task() logger.info('process_compound_set() EXIT (CompoundSet.name="%s")', compound_set.name) - return {'process_stage': 'process', - 'process_type': 'cset', - 'compound_set_name': compound_set.name} + return 'process', compound_set.name @shared_task @@ -149,8 +145,7 @@ def validate_compound_set(task_params): ------- tuple containing the following: - processing stage (str): 'validate' - - processing type (str): 'cset' - - validate dict (dict): dict containing any errors found during the calidation step + - validate dict (dict): dict containing any errors found during the validation step - validated (bool): True if the file(s) were validated, False if not - params (dict): - user_id (int): User record ID of user initiating the task @@ -204,12 +199,12 @@ def validate_compound_set(task_params): validated = False logger.info('validate_compound_set() EXIT (no file) validated=%s outbound_params=%s', validated, outbound_params) - return 'validate', 'cset', validate_dict, validated, outbound_params + return 'validate', validate_dict, validated, outbound_params elif not os.path.isfile(sdf_file): validated = False logger.info('validate_compound_set() EXIT (missing file) validated=%s outbound_params=%s', validated, outbound_params) - return 'validate', 'cset', validate_dict, validated, outbound_params + return 'validate', validate_dict, validated, outbound_params suppl = Chem.SDMolSupplier(sdf_file) # print('%d mols detected (including blank mol)' % (len(suppl),)) @@ -312,7 +307,7 @@ def validate_compound_set(task_params): logger.info('validate_compound_set() EXIT validated=%s outbound_params=%s', validated, outbound_params) - return 'validate', 'cset', validate_dict, validated, outbound_params + return 'validate', validate_dict, validated, outbound_params def create_mol(inchi, long_inchi=None, name=None): @@ -623,8 +618,9 @@ def process_compound_set_job_file(task_params): @shared_task def erase_compound_set_job_material(task_params, job_request_id=0): """Celery task to clean-up files generated by a JobRequest on Squonk2. - We receive the output of 'process_compound_set()'. If the first field is not - 'process' then we can assume the upload failed, maybe during validation? + We receive the output of 'process_compound_set()'. It provides us with a tuple. + If the first field is not 'process' then we can assume the upload failed, + maybe during validation? Parameters ---------- @@ -653,15 +649,15 @@ def erase_compound_set_job_material(task_params, job_request_id=0): # # Task linking is a bit of a mess atm, # if something went wrong we'll get a tuple, not a dictionary. - if isinstance(task_params, dict) \ - and task_params['process_stage'] == 'process' \ - and task_params['compound_set_name']: - logger.info('Upload successful (%d) CompoundSet.name="%s"', - job_request_id, task_params['compound_set_name']) + if isinstance(task_params, list) \ + and task_params[0] == 'process': + cs_name: str = task_params[2] + logger.info('Upload successful (%d) ComputedSet.name="%s"', + job_request_id, cs_name) job_request.upload_status = 'SUCCESS' # We're given a compound set name. # Get its record and put that into the JobRequest... - cs = ComputedSet.objects.get(name=task_params['compound_set_name']) + cs = ComputedSet.objects.get(name=cs_name) assert cs job_request.computed_set = cs else: diff --git a/viewer/urls.py b/viewer/urls.py index 3a031a86..7e60f904 100644 --- a/viewer/urls.py +++ b/viewer/urls.py @@ -17,7 +17,6 @@ path("open_targets/", views.get_open_targets, name="get_open_targets"), path("compound_set//", views.cset_download, name="compound_set"), path("protein_set//", views.pset_download, name="protein_set"), - path("target//", views.tset_download, name="target_set"), path("upload_designs/", views.DSetUploadView.as_view(), name="upload_designs"), path("job_access/", views.JobAccessView.as_view(), name="job_access"), path("task_status/<uuid:task_id>/", views.TaskStatus.as_view(), name="task_status"), diff --git a/viewer/views.py b/viewer/views.py index e373dcae..2353cc21 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -2,7 +2,6 @@ import json import os import zipfile -from io import StringIO import uuid import shlex import shutil @@ -429,8 +428,14 @@ def post(self, request): tmp_pdb_file = None tmp_sdf_file = None if 'pdb_zip' in list(request.FILES.keys()): - pdb_file = request.FILES['pdb_zip'] - tmp_pdb_file = save_tmp_file(pdb_file) + # In the first stage (green release) of the XCA-based Fragalysis Stack + # we do not support PDB files. + request.session[_SESSION_ERROR] = \ + 'This release does not support the inclusion of PDB file.' + logger.warning('- UploadCSet POST error_msg="%s"', request.session[_SESSION_ERROR]) + return redirect('viewer:upload_cset') +# pdb_file = request.FILES['pdb_zip'] +# tmp_pdb_file = save_tmp_file(pdb_file) if sdf_file: tmp_sdf_file = save_tmp_file(sdf_file) @@ -506,7 +511,6 @@ def post(self, request): return render(request, 'viewer/upload-cset.html', context) - def email_task_completion(contact_email, message_type, target_name, target_path=None, task_id=None): """Notify user of upload completion """ @@ -544,8 +548,7 @@ def email_task_completion(contact_email, message_type, target_name, target_path= class ValidateTaskView(View): """View to handle dynamic loading of validation results from `viewer.tasks.validate`. - The validation of files uploaded to viewer/upload_cset or a target set by a user - at viewer/upload_tset + The validation of files uploaded to viewer/upload_cset. """ def get(self, request, validate_task_id): """Get method for `ValidateTaskView`. Takes a validate task id, checks its @@ -586,19 +589,12 @@ def get(self, request, validate_task_id): if task.status == "SUCCESS": logger.info('+ ValidateTaskView.get.SUCCESS') results = task.get() - # NB get tuple from validate task - process_type = results[1] - validate_dict = results[2] - validated = results[3] + # Response from validation is a tuple + validate_dict = results[1] + validated = results[2] if validated: response_data['html'] = 'Your data was validated. \n It can now be uploaded using the upload option.' response_data['validated'] = 'Validated' - - if process_type== 'tset': - target_name = results[5] - contact_email = results[8] - email_task_completion(contact_email, 'validate-success', target_name) - return JsonResponse(response_data) if not validated: @@ -611,10 +607,6 @@ def get(self, request, validate_task_id): response_data["html"] = html_table response_data['validated'] = 'Not validated' - if process_type== 'tset': - target_name = results[5] - contact_email = results[8] - email_task_completion(contact_email, 'validate-failure', target_name, task_id=validate_task_id_str) return JsonResponse(response_data) @@ -645,8 +637,7 @@ def get(self, request, update_task_id): class UploadTaskView(View): """View to handle dynamic loading of upload results from `viewer.tasks.process_compound_set`. - The upload of files for a computed set by a user at viewer/upload_cset or a target - set by a user at viewer/upload_tset. + The upload of files for a computed set by a user at viewer/upload_cset. """ def get(self, request, upload_task_id): """Get method for `UploadTaskView`. Takes an upload task id, checks its @@ -673,11 +664,8 @@ def get(self, request, upload_task_id): if results are a validation/upload process: - validated (str): 'Validated' - results (dict): results - For compound sets ('cset') - results['cset_download_url'] (str): download url for computed set sdf file - results['pset_download_url'] (str): download url for computed set pdb files (zip) - For target sets ('tset') - - results['tset_download_url'] (str): download url for processed zip file - if results are not string or list: - processed (str): 'None' - html (str): message to tell the user their data was not processed @@ -694,12 +682,14 @@ def get(self, request, upload_task_id): return JsonResponse(response_data) + logger.debug('+ UploadTaskView.get() task.status=%s', task.status) if task.status == 'SUCCESS': - logger.debug('+ UploadTaskView.get.success') results = task.get() + logger.debug('+ UploadTaskView.get() SUCCESS task.get()=%s (%s)', results, type(results)) - # Validation output for a cset or tset is a dictionary. + # Was the task about validation or processing (an actual upload)? + # We receive a list with the first value being 'validate' or 'process' if isinstance(results, list): if results[0] == 'validate': # Get dictionary results @@ -707,34 +697,22 @@ def get(self, request, upload_task_id): # set pandas options to display all column data pd.set_option('display.max_colwidth', -1) - table = pd.DataFrame.from_dict(results[2]) + table = pd.DataFrame.from_dict(validate_dict) html_table = table.to_html() html_table += '''<p> Your data was <b>not</b> validated. The table above shows errors</p>''' - response_data['validated'] = 'Not validated' response_data['html'] = html_table return JsonResponse(response_data) else: # Upload/Update output tasks send back a tuple - # First element defines the source of the upload task (cset, tset) response_data['validated'] = 'Validated' - if results[1] == 'tset': - target_name = results[2] - contact_email = results[5] - target_path = '/viewer/target/%s' % target_name - response_data['results'] = {} - response_data['results']['tset_download_url'] = target_path - logger.info('+ UploadTaskView.get.success -email: %s', contact_email) - email_task_completion(contact_email, 'upload-success', target_name, target_path=target_path) - else: - cset_name = results[2] - cset = models.ComputedSet.objects.get(name=cset_name) - submitter = cset.submitter - name = cset.unique_name - response_data['results'] = {} - response_data['results']['cset_download_url'] = '/viewer/compound_set/%s' % name - response_data['results']['pset_download_url'] = '/viewer/protein_set/%s' % name + cset_name = results[1] + cset = models.ComputedSet.objects.get(name=cset_name) + name = cset.unique_name + response_data['results'] = {} + response_data['results']['cset_download_url'] = '/viewer/compound_set/%s' % name + response_data['results']['pset_download_url'] = '/viewer/protein_set/%s' % name return JsonResponse(response_data) @@ -808,16 +786,15 @@ def get_open_targets(request): return HttpResponse(json.dumps({'target_names': target_names, 'target_ids': target_ids})) -# This is used in the URL on the process results page after uploading a compound_set def cset_download(request, name): - """View to download an SDF file of a computed set by name + """View to download an SDF file of a ComputedSet by name (viewer/compound_set/(<name>)). """ - compound_set = models.ComputedSet.objects.get(unique_name=name) - filepath = compound_set.submitted_sdf + computed_set = models.ComputedSet.objects.get(unique_name=name) + filepath = computed_set.submitted_sdf with open(filepath.path, 'r', encoding='utf-8') as fp: data = fp.read() - filename = 'compund-set_' + name + '.sdf' + filename = 'computed-set_' + name + '.sdf' response = HttpResponse(content_type='text/plain') response['Content-Disposition'] = 'attachment; filename=%s' % filename # force browser to download file response.write(data) @@ -832,37 +809,27 @@ def pset_download(request, name): filename = 'protein-set_' + name + '.zip' response['Content-Disposition'] = 'filename=%s' % filename # force browser to download file - compound_set = models.ComputedSet.objects.get(unique_name=name) - computed = models.ComputedMolecule.objects.filter(computed_set=compound_set) - pdb_filepaths = list(set([c.pdb_info.path for c in computed])) - - buff = StringIO() - zip_obj = zipfile.ZipFile(buff, 'w') - - for fp in pdb_filepaths: - data = open(fp, 'r', encoding='utf-8').read() - zip_obj.writestr(fp.split('/')[-1], data) - zip_obj.close() - - buff.flush() - ret_zip = buff.getvalue() - buff.close() - response.write(ret_zip) - - return response - - -# This is used in the URL on the process results page after uploading a target_set -def tset_download(request, title): - """View to download an zip file of a target set by name (viewer/target/(<title>)). - """ - target_set = models.Target.objects.get(title=title) - media_root = settings.MEDIA_ROOT - filepath = os.path.join(media_root, target_set.zip_archive.name) - target_zip = open(filepath, 'rb') - filename = 'target-set_' + title + '.zip' - response = HttpResponse(target_zip, content_type='application/force-download') - response['Content-Disposition'] = 'attachment; filename="%s"' % filename # force browser to download file + # For the first stage (green release) of the XCA-based Fragalysis Stack + # there are no PDB files. +# compound_set = models.ComputedSet.objects.get(unique_name=name) +# computed_molecules = models.ComputedMolecule.objects.filter(computed_set=compound_set) +# pdb_filepaths = list(set([c.pdb_info.path for c in computed_molecules])) +# buff = StringIO() +# zip_obj = zipfile.ZipFile(buff, 'w') +# zip_obj.writestr('') +# for fp in pdb_filepaths: +# data = open(fp, 'r', encoding='utf-8').read() +# zip_obj.writestr(fp.split('/')[-1], data) +# zip_obj.close() +# buff.flush() +# ret_zip = buff.getvalue() +# buff.close() + + # ...instead we just create an empty file... + with zipfile.ZipFile('dummy.zip', 'w') as pdb_file: + pass + + response.write(pdb_file) return response