From 4a703ed803595ef6f0a2f7b792b771d982866b65 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Mon, 12 Feb 2024 13:03:08 +0000 Subject: [PATCH 01/47] Some changes to cset_upload.py to allow site observation short codes (#527) * stashing * fix: cset_upload.py updated to allow new-style site observation codes NB! this probably still won't work! I suspect the file I was given is broken and I cannot test it further --- viewer/cset_upload.py | 25 ++++++++++++++++--------- viewer/sdf_check.py | 8 +++++--- 2 files changed, 21 insertions(+), 12 deletions(-) diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index f1e84b6f..9cbeed32 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -198,9 +198,12 @@ def get_site_observation( zfile_hashvals=zfile_hashvals, ) else: - name = f'{compound_set.target.title}-{pdb_fn}' + name = pdb_fn try: - site_obvs = SiteObservation.objects.get(code__contains=name) + site_obvs = SiteObservation.objects.get( + code__contains=name, + experiment__experiment_upload__target__title=target, + ) except SiteObservation.DoesNotExist: # Initial SiteObservation lookup failed. logger.warning( @@ -210,7 +213,10 @@ def get_site_observation( ) # Try alternatives. # If all else fails then the site_obvs will be 'None' - qs = SiteObservation.objects.filter(code__contains=name) + qs = SiteObservation.objects.filter( + code__contains=name, + experiment__experiment_upload__target__title=target, + ) if qs.exists(): logger.info( 'Found SiteObservation containing name=%s qs=%s', @@ -219,7 +225,10 @@ def get_site_observation( ) else: alt_name = name.split(':')[0].split('_')[0] - qs = SiteObservation.objects.filter(code__contains=alt_name) + qs = SiteObservation.objects.filter( + code__contains=alt_name, + experiment__experiment_upload__target__title=target, + ) if qs.exists(): logger.info( 'Found SiteObservation containing alternative name=%s qs=%s', @@ -328,15 +337,13 @@ def set_mol( # try exact match first try: site_obvs = SiteObservation.objects.get( - code__contains=str(compound_set.target.title + '-' + i), + code=str(i), experiment__experiment_upload__target_id=compound_set.target, ) ref = site_obvs except SiteObservation.DoesNotExist: qs = SiteObservation.objects.filter( - code__contains=str( - compound_set.target.title + '-' + i.split(':')[0].split('_')[0] - ), + code=str(i.split(':')[0].split('_')[0]), experiment__experiment_upload__target_id=compound_set.target, ) if not qs.exists(): @@ -503,7 +510,7 @@ def set_descriptions( computed_set.save() description_dict = description_mol.GetPropsAsDict() - for key in list(description_dict.keys()): + for key in description_dict.keys(): if key in descriptions_needed and key not in [ 'ref_mols', 'ref_pdb', diff --git a/viewer/sdf_check.py b/viewer/sdf_check.py index 949f1001..411128e4 100755 --- a/viewer/sdf_check.py +++ b/viewer/sdf_check.py @@ -89,10 +89,12 @@ def check_refmol(mol, validate_dict, target=None): for ref_mol in ref_mols: ref_strip = ref_mol.strip() - query_string = f'{target}-' + ref_strip.split(':')[0].split('_')[0] - query = SiteObservation.objects.filter(code__contains=query_string) + query = SiteObservation.objects.filter( + code=ref_strip, + experiment__experiment_upload__target__title=target, + ) if len(query) == 0: - msg = f"No SiteObservation code contains '{query_string}'" + msg = f"No SiteObservation code contains '{ref_strip}'" validate_dict = add_warning( molecule_name=mol.GetProp('_Name'), field='ref_mol', From 433f232d417335f378b5f8e875b74d85e95f6d8c Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Mon, 12 Feb 2024 14:42:43 +0000 Subject: [PATCH 02/47] stashing --- viewer/target_loader.py | 36 +++++++++++++++++++++++++++++------- 1 file changed, 29 insertions(+), 7 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index b9665633..0a61455c 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1437,12 +1437,33 @@ def process_bundle(self): self.report.log(logging.ERROR, msg) raise KeyError(msg) from exc - # moved this bit from init + try: + config_inputs = config["inputs"] + except KeyError as exc: + msg = "'inputs' key missing in config file" + self.report.log(logging.ERROR, msg) + raise KeyError(msg) from exc + + try: + code_prefix = config_inputs[0]["code_prefix"] + except KeyError as exc: + msg = "'code_prefix' key missing in config file" + self.report.log(logging.ERROR, msg) + raise KeyError(msg) from exc + try: + code_prefix_tooltip = config_inputs[0]["code_prefix_tooltip"] + except KeyError as exc: + msg = "'code_prefix_tooltip' key missing in config file" + self.report.log(logging.ERROR, msg) + raise KeyError(msg) from exc + self.target, target_created = Target.objects.get_or_create( title=self.target_name, display_name=self.target_name, ) + logger.debug("tooltip: %s", code_prefix_tooltip) + # TODO: original target loader's function get_create_projects # seems to handle more cases. adopt or copy visit = self.proposal_ref.split()[0] @@ -1681,20 +1702,21 @@ def process_bundle(self): # technically it should be validated in previous try-catch block logger.error("Non-standard SiteObservation code 2: %s", last) - logger.debug("iter_pos: %s", iter_pos) - # ... and create new one starting from next item suffix = alphanumerator(start_from=iter_pos) for so in so_group.filter(code__isnull=True): - code = f"{so.experiment.code.split('-')[1]}{next(suffix)}" + code = f"{code_prefix}{so.experiment.code.split('-')[1]}{next(suffix)}" # test uniqueness for target # TODO: this should ideally be solved by db engine, before # rushing to write the trigger, have think about the # loader concurrency situations - prefix = alphanumerator() - while code in current_list: - code = f"{next(prefix)}{code}" + if code in current_list: + msg = ( + f"short code {code} already exists for this target; " + + "specify a code_prefix to resolve this conflict" + ) + self.report.log(logging.ERROR, msg) so.code = code so.save() From 3d255f2240d6a309548fad09a2a54dd3b66c832a Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 13 Feb 2024 14:49:42 +0000 Subject: [PATCH 03/47] stashing --- viewer/views.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viewer/views.py b/viewer/views.py index 6112f7ba..650945e1 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -1492,7 +1492,7 @@ def create(self, request): # prot = models.Protein.objects.filter(code__contains=code_first_part).values() # I don't see why I need to drop out of django objects here prot = models.SiteObservation.objects.filter( - code__contains=code_first_part + experiment__experiment_upload__target=target, code=code_first_part ) if prot.exists(): # even more than just django object, I need an From 7fd97c9569be90769ddce44d1ec5ee011fd14f5e Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 13 Feb 2024 15:25:32 +0000 Subject: [PATCH 04/47] Short code prefix and tooltip to backend Target loader now reads short code prefix and tooltip from meta_aligner.yaml. Tooltip is saved to Experiment model. TODO: make tooltip available via API --- .../0043_experiment_prefix_tooltip.py | 17 +++++++++ viewer/models.py | 1 + viewer/target_loader.py | 37 +++++++------------ 3 files changed, 32 insertions(+), 23 deletions(-) create mode 100644 viewer/migrations/0043_experiment_prefix_tooltip.py diff --git a/viewer/migrations/0043_experiment_prefix_tooltip.py b/viewer/migrations/0043_experiment_prefix_tooltip.py new file mode 100644 index 00000000..93477ed4 --- /dev/null +++ b/viewer/migrations/0043_experiment_prefix_tooltip.py @@ -0,0 +1,17 @@ +# Generated by Django 3.2.23 on 2024-02-13 15:12 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0042_alter_xtalformsite_xtalform_site_num'), + ] + + operations = [ + migrations.AddField( + model_name='experiment', + name='prefix_tooltip', + field=models.TextField(null=True), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index e3839475..c2b8af72 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -196,6 +196,7 @@ class Experiment(models.Model): map_info = ArrayField(models.FileField(max_length=255), null=True) type = models.PositiveSmallIntegerField(null=True) pdb_sha256 = models.TextField(null=True) + prefix_tooltip = models.TextField(null=True) compounds = models.ManyToManyField( "Compound", through="ExperimentCompound", diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 0a61455c..d4845174 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -700,6 +700,7 @@ def _enumerate_objects(self, objects: dict, attr: str) -> None: def process_experiment( self, item_data: tuple[str, dict] | None = None, + prefix_tooltips: dict[str, str] | None = None, validate_files: bool = True, **kwargs, ) -> ProcessedObject | None: @@ -734,6 +735,7 @@ def process_experiment( """ del kwargs assert item_data + assert prefix_tooltips logger.debug("incoming data: %s", item_data) experiment_name, data = item_data @@ -813,6 +815,9 @@ def process_experiment( # version int old versions are kept target loader version = 1 + code_prefix = extract(key="code_prefix") + prefix_tooltip = prefix_tooltips.get(code_prefix, "") + fields = { "code": experiment_name, } @@ -830,6 +835,7 @@ def process_experiment( "mtz_info": str(self._get_final_path(mtz_info)), "cif_info": str(self._get_final_path(cif_info)), "map_info": map_info_paths, + "prefix_tooltip": prefix_tooltip, # this doesn't seem to be present # pdb_sha256: } @@ -839,6 +845,7 @@ def process_experiment( index_fields = { "xtalform": assigned_xtalform, "smiles": smiles, + "code_prefix": code_prefix, } return ProcessedObject( @@ -1437,33 +1444,11 @@ def process_bundle(self): self.report.log(logging.ERROR, msg) raise KeyError(msg) from exc - try: - config_inputs = config["inputs"] - except KeyError as exc: - msg = "'inputs' key missing in config file" - self.report.log(logging.ERROR, msg) - raise KeyError(msg) from exc - - try: - code_prefix = config_inputs[0]["code_prefix"] - except KeyError as exc: - msg = "'code_prefix' key missing in config file" - self.report.log(logging.ERROR, msg) - raise KeyError(msg) from exc - try: - code_prefix_tooltip = config_inputs[0]["code_prefix_tooltip"] - except KeyError as exc: - msg = "'code_prefix_tooltip' key missing in config file" - self.report.log(logging.ERROR, msg) - raise KeyError(msg) from exc - self.target, target_created = Target.objects.get_or_create( title=self.target_name, display_name=self.target_name, ) - logger.debug("tooltip: %s", code_prefix_tooltip) - # TODO: original target loader's function get_create_projects # seems to handle more cases. adopt or copy visit = self.proposal_ref.split()[0] @@ -1496,6 +1481,7 @@ def process_bundle(self): self.version_number = meta["version_number"] self.version_dir = meta["version_dir"] self.previous_version_dirs = meta["previous_version_dirs"] + prefix_tooltips = meta["code_prefix_tooltips"] # check transformation matrix files ( # pylint: disable=unbalanced-tuple-unpacking @@ -1554,7 +1540,9 @@ def process_bundle(self): ), ) - experiment_objects = self.process_experiment(yaml_data=crystals) + experiment_objects = self.process_experiment( + yaml_data=crystals, prefix_tooltips=prefix_tooltips + ) compound_objects = self.process_compound( yaml_data=crystals, experiments=experiment_objects ) @@ -1705,6 +1693,9 @@ def process_bundle(self): # ... and create new one starting from next item suffix = alphanumerator(start_from=iter_pos) for so in so_group.filter(code__isnull=True): + code_prefix = experiment_objects[so.experiment.code].index_data[ + "code_prefix" + ] code = f"{code_prefix}{so.experiment.code.split('-')[1]}{next(suffix)}" # test uniqueness for target From f43eabf186a52518693a1f26c8b2338df2913bd5 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 14 Feb 2024 12:18:23 +0000 Subject: [PATCH 05/47] Prefix tooltip now serverd by api/site_observation --- viewer/managers.py | 1 + viewer/serializers.py | 1 + viewer/target_loader.py | 10 +++++----- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/viewer/managers.py b/viewer/managers.py index 836ff422..7a1a4826 100644 --- a/viewer/managers.py +++ b/viewer/managers.py @@ -17,6 +17,7 @@ def filter_qs(self): ).annotate( target=F("experiment__experiment_upload__target"), compound_code=F("cmpd__compound_code"), + prefix_tooltip=F("experiment__prefix_tooltip"), ) return qs diff --git a/viewer/serializers.py b/viewer/serializers.py index c969d3da..15093985 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -951,6 +951,7 @@ class Meta: class SiteObservationReadSerializer(serializers.ModelSerializer): compound_code = serializers.StringRelatedField() + prefix_tooltip = serializers.StringRelatedField() class Meta: model = models.SiteObservation diff --git a/viewer/target_loader.py b/viewer/target_loader.py index d4845174..968a6305 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1652,16 +1652,13 @@ def process_bundle(self): canon_site_confs=canon_site_conf_objects, ) - values = ["xtalform_site__xtalform", "canon_site_conf__canon_site", "cmpd"] + values = ["canon_site_conf__canon_site", "cmpd"] qs = ( SiteObservation.objects.values(*values) .order_by(*values) .annotate(obvs=ArrayAgg("id")) .values_list("obvs", flat=True) ) - current_list = SiteObservation.objects.filter( - experiment__experiment_upload__target=self.target - ).values_list('code', flat=True) for elem in qs: # objects in this group should be named with same scheme so_group = SiteObservation.objects.filter(pk__in=elem) @@ -1702,7 +1699,10 @@ def process_bundle(self): # TODO: this should ideally be solved by db engine, before # rushing to write the trigger, have think about the # loader concurrency situations - if code in current_list: + if SiteObservation.objects.filter( + experiment__experiment_upload__target=self.target, + code=code, + ).exists(): msg = ( f"short code {code} already exists for this target; " + "specify a code_prefix to resolve this conflict" From 118756972dfc6a9c36ae0b654fb3d0e91426a72d Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 14 Feb 2024 14:10:17 +0000 Subject: [PATCH 06/47] stashing --- viewer/download_structures.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 65f94efc..ee4b5766 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -11,6 +11,7 @@ import shutil import uuid import zipfile +from dataclasses import dataclass from datetime import datetime, timedelta, timezone from io import BytesIO from pathlib import Path @@ -49,6 +50,13 @@ 'readme': (''), } + +@dataclass +class MetadataObject: + path: str + archive_path: str + + # Dictionary containing all references needed to create the zip file # NB you may need to add a version number to this at some point... zip_template = { From 632719ac1365522c27ce7faeebac874740b36b14 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 14 Feb 2024 14:15:01 +0000 Subject: [PATCH 07/47] Site observation groups for shortcodes now by experiment --- viewer/target_loader.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 968a6305..8878bda9 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1652,7 +1652,8 @@ def process_bundle(self): canon_site_confs=canon_site_conf_objects, ) - values = ["canon_site_conf__canon_site", "cmpd"] + # values = ["canon_site_conf__canon_site", "cmpd"] + values = ["experiment"] qs = ( SiteObservation.objects.values(*values) .order_by(*values) From 30c10809989491c79b40ee7855f2c4a890d9ca3e Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 15 Feb 2024 13:22:55 +0000 Subject: [PATCH 08/47] feat: download structure fixed TODO: add all the yamls --- viewer/download_structures.py | 133 ++++++++++++++++++++++++---------- 1 file changed, 94 insertions(+), 39 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index ee4b5766..9bddb0e2 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -51,8 +51,8 @@ } -@dataclass -class MetadataObject: +@dataclass(frozen=True) +class ArchiveFile: path: str archive_path: str @@ -224,7 +224,7 @@ def _read_and_patch_molecule_name(path, molecule_name=None): return content -def _add_file_to_zip_aligned(ziparchive, code, filepath): +def _add_file_to_zip_aligned(ziparchive, code, archive_file): """Add the requested file to the zip archive. If the file is an SDF or MOL we insert the name of the molecule @@ -238,39 +238,32 @@ def _add_file_to_zip_aligned(ziparchive, code, filepath): Returns: [boolean]: [True of record added to archive] """ - logger.debug('+_add_file_to_zip_aligned: %s, %s', code, filepath) - if not filepath: + logger.debug('+_add_file_to_zip_aligned: %s, %s', code, archive_file) + if not archive_file: # Odd - assume success logger.error('No filepath value') return True - # Incoming filepath can be both str and FieldFile - try: - filepath = filepath.path - except AttributeError: - filepath = str(Path(settings.MEDIA_ROOT).joinpath(filepath)) - - # strip off the leading parts of path - archive_path = str(Path(*Path(filepath).parts[7:])) + filepath = str(Path(settings.MEDIA_ROOT).joinpath(archive_file.path)) if Path(filepath).is_file(): if _is_mol_or_sdf(filepath): # It's a MOL or SD file. # Read and (potentially) adjust the file # and add to the archive as a string. content = _read_and_patch_molecule_name(filepath, molecule_name=code) - ziparchive.writestr(archive_path, content) + ziparchive.writestr(archive_file.archive_path, content) else: # Copy the file without modification - ziparchive.write(filepath, archive_path) + ziparchive.write(filepath, archive_file.archive_path) return True else: logger.warning('filepath "%s" is not a file', filepath) - _add_empty_file(ziparchive, archive_path) + _add_empty_file(ziparchive, archive_file.archive_path) return False -def _add_file_to_sdf(combined_sdf_file, filepath): +def _add_file_to_sdf(combined_sdf_file, archive_file): """Append the requested sdf file to the single sdf file provided. Args: @@ -282,19 +275,19 @@ def _add_file_to_sdf(combined_sdf_file, filepath): """ media_root = settings.MEDIA_ROOT - if not filepath: + if not archive_file.path: # Odd - assume success logger.error('No filepath value') return True - fullpath = os.path.join(media_root, filepath) + fullpath = os.path.join(media_root, archive_file.path) if os.path.isfile(fullpath): with open(combined_sdf_file, 'a', encoding='utf-8') as f_out: patched_sdf_content = _read_and_patch_molecule_name(fullpath) f_out.write(patched_sdf_content) return True else: - logger.warning('filepath "%s" is not a file', filepath) + logger.warning('filepath "%s" is not a file', archive_file.path) return False @@ -309,11 +302,8 @@ def _protein_files_zip(zip_contents, ziparchive, error_file): continue for prot, prot_file in files.items(): - # if it's a list of files (map_info) instead of single file - if not isinstance(prot_file, list): - prot_file = [prot_file] for f in prot_file: - if not _add_file_to_zip_aligned(ziparchive, prot.split(":")[0], f): + if not _add_file_to_zip_aligned(ziparchive, prot, f): error_file.write(f'{param},{prot},{f}\n') prot_errors += 1 @@ -341,14 +331,14 @@ def _molecule_files_zip(zip_contents, ziparchive, combined_sdf_file, error_file) ] is True and not _add_file_to_zip_aligned( ziparchive, prot.split(":")[0], file ): - error_file.write(f'sdf_info,{prot},{file}\n') + error_file.write(f'sdf_info,{prot},{file.path}\n') mol_errors += 1 # Append sdf file on the Molecule record to the combined_sdf_file. if zip_contents['molecules'][ 'single_sdf_file' ] is True and not _add_file_to_sdf(combined_sdf_file, file): - error_file.write(f'single_sdf_file,{prot},{file}\n') + error_file.write(f'single_sdf_file,{prot},{file.path}\n') mol_errors += 1 return mol_errors @@ -633,21 +623,79 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params): for so in site_obvs: for param in protein_params: if protein_params[param] is True: - try: - # getting the param from experiment. more data are - # coming from there, that's why this is in try - # block + if param in ['pdb_info', 'mtz_info', 'cif_info', 'map_info']: + # experiment object model_attr = getattr(so.experiment, param) - # getattr retrieves FieldFile object, hence the .name - if isinstance(model_attr, list): - # except map_files, this returns a list of files - zip_contents['proteins'][param][so.code] = model_attr + logger.debug( + 'Adding param to zip: %s, value: %s', param, model_attr + ) + if param != 'map_info': + # treat all params as list + model_attr = ( + [model_attr.name] + # None - some weird glitch in storing the values + if model_attr and not str(model_attr).find('None') > -1 + else [param] + ) + + afile = [] + for f in model_attr: + # here the model_attr is already stringified + if model_attr and model_attr != 'None': + archive_path = str( + Path('crystallographic_files') + .joinpath(so.code) + .joinpath( + Path(f) + .parts[-1] + .replace(so.experiment.code, so.code) + ) + ) + else: + archive_path = param + afile.append(ArchiveFile(path=f, archive_path=archive_path)) + + elif param in [ + 'bound_file', + 'apo_solv_file', + 'apo_desolv_file', + 'apo_file', + 'sigmaa_file', + 'event_file', + 'artefacts_file', + 'pdb_header_file', + 'diff_file', + ]: + # siteobservation object + + model_attr = getattr(so, param) + logger.debug( + 'Adding param to zip: %s, value: %s', param, model_attr + ) + if model_attr and model_attr != 'None': + archive_path = str( + Path('aligned_files') + .joinpath(so.code) + .joinpath( + Path(model_attr.name) + .parts[-1] + .replace(so.longcode, so.code) + ) + ) else: - zip_contents['proteins'][param][so.code] = model_attr.name + archive_path = param - except AttributeError: - # on the off chance that the data are in site_observation model - zip_contents['proteins'][param][so.code] = getattr(so, param).name + afile = [ + ArchiveFile( + path=model_attr.name, + archive_path=archive_path, + ) + ] + else: + logger.warning('Unexpected param: %s', param) + continue + + zip_contents['proteins'][param][so.code] = afile if other_params['single_sdf_file'] is True: zip_contents['molecules']['single_sdf_file'] = True @@ -674,7 +722,14 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params): if rel_sd_file: logger.debug('rel_sd_file=%s code=%s', rel_sd_file, so.code) - zip_contents['molecules']['sdf_files'].update({rel_sd_file: so.code}) + zip_contents['molecules']['sdf_files'].update( + { + ArchiveFile( + path=rel_sd_file, + archive_path=rel_sd_file, + ): so.code + } + ) num_molecules_collected += 1 # Report (in the log) anomalies From 1744688c169c7a30d27d0d5ffd3c76d9afdbcc5a Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 15 Feb 2024 14:35:19 +0000 Subject: [PATCH 09/47] All yaml files added to download --- viewer/download_structures.py | 42 +++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 9bddb0e2..59e7ceb9 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -446,6 +446,46 @@ def _extra_files_zip(ziparchive, target): logger.info('Processed %s extra files', num_processed) +def _yaml_files_zip(ziparchive, target): + """Add all yaml files (except transforms) from upload to ziparchive""" + + for experiment_upload in target.experimentupload_set.order_by('commit_datetime'): + yaml_paths = ( + Path(settings.MEDIA_ROOT) + .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY) + .joinpath(experiment_upload.task_id) + ) + + transforms = [ + Path(f.name).name + for f in ( + experiment_upload.neighbourhood_transforms, + experiment_upload.neighbourhood_transforms, + experiment_upload.neighbourhood_transforms, + ) + ] + # taking the latest upload for now + # add unpacked zip directory + yaml_paths = [d for d in list(yaml_paths.glob("*")) if d.is_dir()][0] + + # add upload_[d] dir + yaml_paths = next(yaml_paths.glob("upload_*")) + + archive_path = Path('yaml_files').joinpath(yaml_paths.parts[-1]) + + yaml_files = [ + f + for f in list(yaml_paths.glob("*.yaml")) + if f.is_file() and f.name not in transforms + ] + + logger.info('Processing yaml files (%s)...', yaml_files) + + for file in yaml_files: + logger.info('Adding yaml file "%s"...', file) + ziparchive.write(file, str(Path(archive_path).joinpath(file.name))) + + def _document_file_zip(ziparchive, download_path, original_search, host): """Create the document file This consists of a template plus an added contents description. @@ -581,6 +621,8 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host _extra_files_zip(ziparchive, target) + _yaml_files_zip(ziparchive, target) + _document_file_zip(ziparchive, download_path, original_search, host) error_file.close() From 417126f42fb41db4085a7a2bf01f13225864217a Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 15 Feb 2024 15:04:26 +0000 Subject: [PATCH 10/47] New format to download zip (issue 1326) (#530) * stashing * stashing * feat: download structure fixed TODO: add all the yamls * All yaml files added to download --- viewer/download_structures.py | 179 +++++++++++++++++++++++++++------- viewer/views.py | 2 +- 2 files changed, 143 insertions(+), 38 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 65f94efc..59e7ceb9 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -11,6 +11,7 @@ import shutil import uuid import zipfile +from dataclasses import dataclass from datetime import datetime, timedelta, timezone from io import BytesIO from pathlib import Path @@ -49,6 +50,13 @@ 'readme': (''), } + +@dataclass(frozen=True) +class ArchiveFile: + path: str + archive_path: str + + # Dictionary containing all references needed to create the zip file # NB you may need to add a version number to this at some point... zip_template = { @@ -216,7 +224,7 @@ def _read_and_patch_molecule_name(path, molecule_name=None): return content -def _add_file_to_zip_aligned(ziparchive, code, filepath): +def _add_file_to_zip_aligned(ziparchive, code, archive_file): """Add the requested file to the zip archive. If the file is an SDF or MOL we insert the name of the molecule @@ -230,39 +238,32 @@ def _add_file_to_zip_aligned(ziparchive, code, filepath): Returns: [boolean]: [True of record added to archive] """ - logger.debug('+_add_file_to_zip_aligned: %s, %s', code, filepath) - if not filepath: + logger.debug('+_add_file_to_zip_aligned: %s, %s', code, archive_file) + if not archive_file: # Odd - assume success logger.error('No filepath value') return True - # Incoming filepath can be both str and FieldFile - try: - filepath = filepath.path - except AttributeError: - filepath = str(Path(settings.MEDIA_ROOT).joinpath(filepath)) - - # strip off the leading parts of path - archive_path = str(Path(*Path(filepath).parts[7:])) + filepath = str(Path(settings.MEDIA_ROOT).joinpath(archive_file.path)) if Path(filepath).is_file(): if _is_mol_or_sdf(filepath): # It's a MOL or SD file. # Read and (potentially) adjust the file # and add to the archive as a string. content = _read_and_patch_molecule_name(filepath, molecule_name=code) - ziparchive.writestr(archive_path, content) + ziparchive.writestr(archive_file.archive_path, content) else: # Copy the file without modification - ziparchive.write(filepath, archive_path) + ziparchive.write(filepath, archive_file.archive_path) return True else: logger.warning('filepath "%s" is not a file', filepath) - _add_empty_file(ziparchive, archive_path) + _add_empty_file(ziparchive, archive_file.archive_path) return False -def _add_file_to_sdf(combined_sdf_file, filepath): +def _add_file_to_sdf(combined_sdf_file, archive_file): """Append the requested sdf file to the single sdf file provided. Args: @@ -274,19 +275,19 @@ def _add_file_to_sdf(combined_sdf_file, filepath): """ media_root = settings.MEDIA_ROOT - if not filepath: + if not archive_file.path: # Odd - assume success logger.error('No filepath value') return True - fullpath = os.path.join(media_root, filepath) + fullpath = os.path.join(media_root, archive_file.path) if os.path.isfile(fullpath): with open(combined_sdf_file, 'a', encoding='utf-8') as f_out: patched_sdf_content = _read_and_patch_molecule_name(fullpath) f_out.write(patched_sdf_content) return True else: - logger.warning('filepath "%s" is not a file', filepath) + logger.warning('filepath "%s" is not a file', archive_file.path) return False @@ -301,11 +302,8 @@ def _protein_files_zip(zip_contents, ziparchive, error_file): continue for prot, prot_file in files.items(): - # if it's a list of files (map_info) instead of single file - if not isinstance(prot_file, list): - prot_file = [prot_file] for f in prot_file: - if not _add_file_to_zip_aligned(ziparchive, prot.split(":")[0], f): + if not _add_file_to_zip_aligned(ziparchive, prot, f): error_file.write(f'{param},{prot},{f}\n') prot_errors += 1 @@ -333,14 +331,14 @@ def _molecule_files_zip(zip_contents, ziparchive, combined_sdf_file, error_file) ] is True and not _add_file_to_zip_aligned( ziparchive, prot.split(":")[0], file ): - error_file.write(f'sdf_info,{prot},{file}\n') + error_file.write(f'sdf_info,{prot},{file.path}\n') mol_errors += 1 # Append sdf file on the Molecule record to the combined_sdf_file. if zip_contents['molecules'][ 'single_sdf_file' ] is True and not _add_file_to_sdf(combined_sdf_file, file): - error_file.write(f'single_sdf_file,{prot},{file}\n') + error_file.write(f'single_sdf_file,{prot},{file.path}\n') mol_errors += 1 return mol_errors @@ -448,6 +446,46 @@ def _extra_files_zip(ziparchive, target): logger.info('Processed %s extra files', num_processed) +def _yaml_files_zip(ziparchive, target): + """Add all yaml files (except transforms) from upload to ziparchive""" + + for experiment_upload in target.experimentupload_set.order_by('commit_datetime'): + yaml_paths = ( + Path(settings.MEDIA_ROOT) + .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY) + .joinpath(experiment_upload.task_id) + ) + + transforms = [ + Path(f.name).name + for f in ( + experiment_upload.neighbourhood_transforms, + experiment_upload.neighbourhood_transforms, + experiment_upload.neighbourhood_transforms, + ) + ] + # taking the latest upload for now + # add unpacked zip directory + yaml_paths = [d for d in list(yaml_paths.glob("*")) if d.is_dir()][0] + + # add upload_[d] dir + yaml_paths = next(yaml_paths.glob("upload_*")) + + archive_path = Path('yaml_files').joinpath(yaml_paths.parts[-1]) + + yaml_files = [ + f + for f in list(yaml_paths.glob("*.yaml")) + if f.is_file() and f.name not in transforms + ] + + logger.info('Processing yaml files (%s)...', yaml_files) + + for file in yaml_files: + logger.info('Adding yaml file "%s"...', file) + ziparchive.write(file, str(Path(archive_path).joinpath(file.name))) + + def _document_file_zip(ziparchive, download_path, original_search, host): """Create the document file This consists of a template plus an added contents description. @@ -583,6 +621,8 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host _extra_files_zip(ziparchive, target) + _yaml_files_zip(ziparchive, target) + _document_file_zip(ziparchive, download_path, original_search, host) error_file.close() @@ -625,21 +665,79 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params): for so in site_obvs: for param in protein_params: if protein_params[param] is True: - try: - # getting the param from experiment. more data are - # coming from there, that's why this is in try - # block + if param in ['pdb_info', 'mtz_info', 'cif_info', 'map_info']: + # experiment object model_attr = getattr(so.experiment, param) - # getattr retrieves FieldFile object, hence the .name - if isinstance(model_attr, list): - # except map_files, this returns a list of files - zip_contents['proteins'][param][so.code] = model_attr + logger.debug( + 'Adding param to zip: %s, value: %s', param, model_attr + ) + if param != 'map_info': + # treat all params as list + model_attr = ( + [model_attr.name] + # None - some weird glitch in storing the values + if model_attr and not str(model_attr).find('None') > -1 + else [param] + ) + + afile = [] + for f in model_attr: + # here the model_attr is already stringified + if model_attr and model_attr != 'None': + archive_path = str( + Path('crystallographic_files') + .joinpath(so.code) + .joinpath( + Path(f) + .parts[-1] + .replace(so.experiment.code, so.code) + ) + ) + else: + archive_path = param + afile.append(ArchiveFile(path=f, archive_path=archive_path)) + + elif param in [ + 'bound_file', + 'apo_solv_file', + 'apo_desolv_file', + 'apo_file', + 'sigmaa_file', + 'event_file', + 'artefacts_file', + 'pdb_header_file', + 'diff_file', + ]: + # siteobservation object + + model_attr = getattr(so, param) + logger.debug( + 'Adding param to zip: %s, value: %s', param, model_attr + ) + if model_attr and model_attr != 'None': + archive_path = str( + Path('aligned_files') + .joinpath(so.code) + .joinpath( + Path(model_attr.name) + .parts[-1] + .replace(so.longcode, so.code) + ) + ) else: - zip_contents['proteins'][param][so.code] = model_attr.name + archive_path = param + + afile = [ + ArchiveFile( + path=model_attr.name, + archive_path=archive_path, + ) + ] + else: + logger.warning('Unexpected param: %s', param) + continue - except AttributeError: - # on the off chance that the data are in site_observation model - zip_contents['proteins'][param][so.code] = getattr(so, param).name + zip_contents['proteins'][param][so.code] = afile if other_params['single_sdf_file'] is True: zip_contents['molecules']['single_sdf_file'] = True @@ -666,7 +764,14 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params): if rel_sd_file: logger.debug('rel_sd_file=%s code=%s', rel_sd_file, so.code) - zip_contents['molecules']['sdf_files'].update({rel_sd_file: so.code}) + zip_contents['molecules']['sdf_files'].update( + { + ArchiveFile( + path=rel_sd_file, + archive_path=rel_sd_file, + ): so.code + } + ) num_molecules_collected += 1 # Report (in the log) anomalies diff --git a/viewer/views.py b/viewer/views.py index 6112f7ba..650945e1 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -1492,7 +1492,7 @@ def create(self, request): # prot = models.Protein.objects.filter(code__contains=code_first_part).values() # I don't see why I need to drop out of django objects here prot = models.SiteObservation.objects.filter( - code__contains=code_first_part + experiment__experiment_upload__target=target, code=code_first_part ) if prot.exists(): # even more than just django object, I need an From ef6d56a47455cbde59a5819cf0bf7ccd7547b01e Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 16 Feb 2024 14:19:00 +0000 Subject: [PATCH 11/47] cset_upload.py: lhs_pdb renamed to ref_pdb --- viewer/cset_upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index 9cbeed32..bb4e704f 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -363,7 +363,7 @@ def set_mol( # Try to get the LHS SiteObservation, # This will be used to set the ComputedMolecule.site_observation_code. # This may fail. - lhs_property = 'lhs_pdb' + lhs_property = 'ref_pdb' lhs_so = self.get_site_observation( lhs_property, mol, From be127621538194a7851399970652c5b3cdc577d4 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 16 Feb 2024 14:43:59 +0000 Subject: [PATCH 12/47] Renamed canon- and conf site tags --- viewer/target_loader.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 8878bda9..edad072e 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1724,7 +1724,7 @@ def process_bundle(self): # tag site observations for val in canon_site_objects.values(): # pylint: disable=no-member - tag = f"{val.instance.canon_site_num} - {val.instance.name}" + tag = f"{val.instance.canon_site_num} - {''.join(val.instance.name.split('+')[1:-1])}" so_list = SiteObservation.objects.filter( canon_site_conf__canon_site=val.instance ) @@ -1739,7 +1739,7 @@ def process_bundle(self): tag = ( f"{val.instance.canon_site.canon_site_num}" + f"{next(numerators[val.instance.canon_site.canon_site_num])}" - + f" - {val.instance.name}" + + f" - {val.instance.name.split('+')[0]}" ) so_list = [ site_observation_objects[strip_version(k)].instance From f3483bb919db9db5836a42f3a30566f04bd2c366 Mon Sep 17 00:00:00 2001 From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com> Date: Mon, 19 Feb 2024 16:29:08 +0100 Subject: [PATCH 13/47] Adds support for key-based SSH connections (#534) * Centralised environment variables (#529) * refactor: Restructured settings.py * docs: Minor tweaks * refactor: Move security and infection config to settings * refactor: b/e & f/e/ tags now in settings (also fixed f/e tag value) * refactor: Move Neo4j config to settings * refactor: More variables into settings * refactor: Moved remaining config * docs: Adds configuration guide as comments * docs: Variable prefix now 'stack_' not 'stack_env_' --------- Co-authored-by: Alan Christie * feat: Adds support for private keys on SSH tunnel * fix: Fixes key-based logic --------- Co-authored-by: Alan Christie --- api/infections.py | 12 +- api/remote_ispyb_connector.py | 54 ++- api/security.py | 39 +-- fragalysis/settings.py | 623 ++++++++++++++++++++-------------- fragalysis/views.py | 39 +-- network/views.py | 13 +- viewer/serializers.py | 4 +- viewer/services.py | 5 +- viewer/squonk2_agent.py | 63 ++-- viewer/views.py | 2 +- 10 files changed, 463 insertions(+), 391 deletions(-) diff --git a/api/infections.py b/api/infections.py index c1eb6cab..4143c585 100644 --- a/api/infections.py +++ b/api/infections.py @@ -4,9 +4,10 @@ # Infections are injected into the application via the environment variable # 'INFECTIONS', a comma-separated list of infection names. -import os from typing import Dict, Set +from django.conf import settings + from api.utils import deployment_mode_is_production # The built-in set of infections. @@ -20,9 +21,6 @@ INFECTION_STRUCTURE_DOWNLOAD: 'An error in the DownloadStructures view' } -# What infection have been set? -_INFECTIONS: str = os.environ.get('INFECTIONS', '').lower() - def have_infection(name: str) -> bool: """Returns True if we've been given the named infection. @@ -31,9 +29,11 @@ def have_infection(name: str) -> bool: def _get_infections() -> Set[str]: - if _INFECTIONS == '': + if settings.INFECTIONS == '': return set() infections: set[str] = { - infection for infection in _INFECTIONS.split(',') if infection in _CATALOGUE + infection + for infection in settings.INFECTIONS.split(',') + if infection in _CATALOGUE } return infections diff --git a/api/remote_ispyb_connector.py b/api/remote_ispyb_connector.py index 56fce7dc..398f3473 100644 --- a/api/remote_ispyb_connector.py +++ b/api/remote_ispyb_connector.py @@ -28,6 +28,7 @@ def __init__( remote=False, ssh_user=None, ssh_password=None, + ssh_private_key_filename=None, ssh_host=None, conn_inactivity=360, ): @@ -45,6 +46,7 @@ def __init__( 'ssh_host': ssh_host, 'ssh_user': ssh_user, 'ssh_pass': ssh_password, + 'ssh_pkey': ssh_private_key_filename, 'db_host': host, 'db_port': int(port), 'db_user': user, @@ -53,12 +55,11 @@ def __init__( } self.remote_connect(**creds) logger.debug( - "Started host=%s username=%s local_bind_port=%s", + "Started remote ssh_host=%s ssh_user=%s local_bind_port=%s", ssh_host, ssh_user, self.server.local_bind_port, ) - else: self.connect( user=user, @@ -68,29 +69,60 @@ def __init__( port=port, conn_inactivity=conn_inactivity, ) - logger.debug("Started host=%s user=%s port=%s", host, user, port) + logger.debug("Started direct host=%s user=%s port=%s", host, user, port) def remote_connect( - self, ssh_host, ssh_user, ssh_pass, db_host, db_port, db_user, db_pass, db_name + self, + ssh_host, + ssh_user, + ssh_pass, + ssh_pkey, + db_host, + db_port, + db_user, + db_pass, + db_name, ): sshtunnel.SSH_TIMEOUT = 10.0 sshtunnel.TUNNEL_TIMEOUT = 10.0 sshtunnel.DEFAULT_LOGLEVEL = logging.CRITICAL self.conn_inactivity = int(self.conn_inactivity) - self.server = sshtunnel.SSHTunnelForwarder( - (ssh_host), - ssh_username=ssh_user, - ssh_password=ssh_pass, - remote_bind_address=(db_host, db_port), - ) + if ssh_pkey: + logger.debug( + 'Creating SSHTunnelForwarder (with SSH Key) host=%s user=%s', + ssh_host, + ssh_user, + ) + self.server = sshtunnel.SSHTunnelForwarder( + (ssh_host), + ssh_username=ssh_user, + ssh_pkey=ssh_pkey, + remote_bind_address=(db_host, db_port), + ) + else: + logger.debug( + 'Creating SSHTunnelForwarder (with password) host=%s user=%s', + ssh_host, + ssh_user, + ) + self.server = sshtunnel.SSHTunnelForwarder( + (ssh_host), + ssh_username=ssh_user, + ssh_password=ssh_pass, + remote_bind_address=(db_host, db_port), + ) + logger.debug('Created SSHTunnelForwarder') # stops hanging connections in transport self.server.daemon_forward_servers = True self.server.daemon_transport = True + logger.debug('Starting SSH server...') self.server.start() + logger.debug('Started SSH server') + logger.debug('Connecting to ISPyB (db_user=%s db_name=%s)...', db_user, db_name) self.conn = pymysql.connect( user=db_user, password=db_pass, @@ -100,8 +132,10 @@ def remote_connect( ) if self.conn is not None: + logger.debug('Connected') self.conn.autocommit = True else: + logger.debug('Failed to connect') self.server.stop() raise ISPyBConnectionException self.last_activity_ts = time.time() diff --git a/api/security.py b/api/security.py index eafc31fe..01605352 100644 --- a/api/security.py +++ b/api/security.py @@ -48,40 +48,41 @@ def get_remote_conn() -> Optional[SSHConnector]: - ispyb_credentials: Dict[str, Any] = { - "user": os.environ.get("ISPYB_USER"), - "pw": os.environ.get("ISPYB_PASSWORD"), - "host": os.environ.get("ISPYB_HOST"), - "port": os.environ.get("ISPYB_PORT"), + credentials: Dict[str, Any] = { + "user": settings.ISPYB_USER, + "pw": settings.ISPYB_PASSWORD, + "host": settings.ISPYB_HOST, + "port": settings.ISPYB_PORT, "db": "ispyb", "conn_inactivity": 360, } ssh_credentials: Dict[str, Any] = { - 'ssh_host': os.environ.get("SSH_HOST"), - 'ssh_user': os.environ.get("SSH_USER"), - 'ssh_password': os.environ.get("SSH_PASSWORD"), + 'ssh_host': settings.SSH_HOST, + 'ssh_user': settings.SSH_USER, + 'ssh_password': settings.SSH_PASSWORD, + "ssh_private_key_filename": settings.SSH_PRIVATE_KEY_FILENAME, 'remote': True, } - ispyb_credentials.update(**ssh_credentials) + credentials.update(**ssh_credentials) # Caution: Credentials may not be set in the environment. # Assume the credentials are invalid if there is no host. # If a host is not defined other properties are useless. - if not ispyb_credentials["host"]: + if not credentials["host"]: logger.debug("No ISPyB host - cannot return a connector") return None # Try to get an SSH connection (aware that it might fail) conn: Optional[SSHConnector] = None try: - conn = SSHConnector(**ispyb_credentials) + conn = SSHConnector(**credentials) except Exception: # Log the exception if DEBUG level or lower/finer? - # The following wil not log if the level is set to INFO for example. + # The following will not log if the level is set to INFO for example. if logging.DEBUG >= logger.level: - logger.info("ispyb_credentials=%s", ispyb_credentials) + logger.info("credentials=%s", credentials) logger.exception("Got the following exception creating SSHConnector...") return conn @@ -89,10 +90,10 @@ def get_remote_conn() -> Optional[SSHConnector]: def get_conn() -> Optional[Connector]: credentials: Dict[str, Any] = { - "user": os.environ.get("ISPYB_USER"), - "pw": os.environ.get("ISPYB_PASSWORD"), - "host": os.environ.get("ISPYB_HOST"), - "port": os.environ.get("ISPYB_PORT"), + "user": settings.ISPYB_USER, + "pw": settings.ISPYB_PASSWORD, + "host": settings.ISPYB_HOST, + "port": settings.ISPYB_PORT, "db": "ispyb", "conn_inactivity": 360, } @@ -108,7 +109,7 @@ def get_conn() -> Optional[Connector]: conn = Connector(**credentials) except Exception: # Log the exception if DEBUG level or lower/finer? - # The following wil not log if the level is set to INFO for example. + # The following will not log if the level is set to INFO for example. if logging.DEBUG >= logger.level: logger.info("credentials=%s", credentials) logger.exception("Got the following exception creating Connector...") @@ -349,7 +350,7 @@ def get_proposals_for_user(self, user, restrict_to_membership=False): assert user proposals = set() - ispyb_user = os.environ.get("ISPYB_USER") + ispyb_user = settings.ISPYB_USER logger.debug( "ispyb_user=%s restrict_to_membership=%s", ispyb_user, diff --git a/fragalysis/settings.py b/fragalysis/settings.py index 93885b01..3aa3b58a 100644 --- a/fragalysis/settings.py +++ b/fragalysis/settings.py @@ -1,18 +1,67 @@ -""" -Django settings for fragalysis project. +"""Django settings for the fragalysis 'backend'""" -Generated by 'django-admin startproject' using Django 1.11.6. - -For more information on this file, see -https://docs.djangoproject.com/en/1.11/topics/settings/ - -For the full list of settings and their values, see -https://docs.djangoproject.com/en/1.11/ref/settings/ -""" +# This standard Django module is used to provide the dynamic configuration of +# the backend logic. As well as providing vital django-related configuration +# it is also the source of the numerous fragalysis-specific environment variables +# that control the stack's configuration (behaviour). +# +# Not all settings are configured by environment variable. Some are hard-coded +# and you'll need to edit their values here. Those that are configurable at run-time +# should be obvious (i.e. they'll use "os.environ.get()" to obtain their value) +# alternative run-time value. +# +# You will find the django-related configuration at the top of the file +# (under DJANGO SETTINGS) and the fragalysis-specific configuration at the bottom of +# the file (under FRAGALYSIS SETTINGS). +# +# Guidance for variables: - +# +# 1. Everything *MUST* have a default value, this file should not raise an exception +# if a value cannot be found in the environment, that's the role of the +# application code. +# +# 2. The constant used to hold the environment variable *SHOULD* match the +# environment variable's name. i.e. the "DEPLOYMENT_MODE" environment variable's +# value *SHOULD* be found in 'settings.DEPLOYMENT_MODE'. +# +# Providing run-time values for variables: - +# +# The environment variable values are set using either a 'docker-compose' file +# (when used for local development) or, more typically, via an "Ansible variable" +# provided by the "Ansible playbook" that's responsible for deploying the stack. +# +# Many (not all) of the environment variables are made available +# for deployment using an Ansible playbook variable, explained below. +# +# 1. Ansible variables are lower-case and use "snake case". +# +# 2. Ansible variables that map directly to environment variables in this file +# use the same name as the environment variable and are prefixed with +# "stack_". For example the "DEPLOYMENT_MODE" environment variable +# can be set using the "stack_deployment_mode" variable. +# +# 3. Variables are declared using the 'EXTRA VARIABLES' section of the corresponding +# AWX "Job Template". +# +# IMPORTANTLY: For a description of an environment variable (setting) and its value +# you *MUST* consult the comments in this file ("settings.py"), and *NOT* +# the Ansible playbook. This file is the primary authority for the +# configuration of the Fragalysis Stack. +# +# Ansible variables are declared in "roles/fragalysis-stack/defaults/main.yaml" +# or "roles/fragalysis-stack/vars/main.yaml" of the playbook repository +# https://github.com/xchem/fragalysis-stack-kubernetes +# +# For more information on "settings.py", see +# https://docs.djangoproject.com/en/3.2/topics/settings/ +# +# For the full list of Django-related settings and their values, see +# https://docs.djangoproject.com/en/3.2/ref/settings/ import os import sys from datetime import timedelta +from typing import List import sentry_sdk from sentry_sdk.integrations.celery import CeleryIntegration @@ -20,88 +69,52 @@ from sentry_sdk.integrations.excepthook import ExcepthookIntegration from sentry_sdk.integrations.redis import RedisIntegration -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = False -if os.environ.get("DEBUG_FRAGALYSIS") == True: - DEBUG = True +# -------------------------------------------------------------------------------------- +# DJANGO SETTINGS +# -------------------------------------------------------------------------------------- -# These flags are used in the upload_tset form as follows. -# Proposal Supported | Proposal Required | Proposal / View fields -# Y | Y | Shown / Required -# Y | N | Shown / Optional -# N | N | Not Shown -PROPOSAL_SUPPORTED = True -PROPOSAL_REQUIRED = True +ALLOWED_HOSTS = ["*"] # AnonymousUser should be the first record inserted into the auth_user table. ANONYMOUS_USER = 1 -# This is set on AWX when the fragalysis-stack is rebuilt. -SENTRY_DNS = os.environ.get("FRAGALYSIS_BACKEND_SENTRY_DNS") -if SENTRY_DNS: - # By default only call sentry in staging/production - sentry_sdk.init( - dsn=SENTRY_DNS, - integrations=[ - DjangoIntegration(), - CeleryIntegration(), - RedisIntegration(), - ExcepthookIntegration(always_run=True), - ], - # If you wish to associate users to errors (assuming you are using - # django.contrib.auth) you may enable sending PII data. - send_default_pii=True, - ) - -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), "..")) -# Quick-start development settings - unsuitable for production -# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/ - -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = os.environ.get( - "WEB_DJANGO_SECRET_KEY", "8flmz)c9i!o&f1-moi5-p&9ak4r9=ck$3!0y1@%34p^(6i*^_9" +AUTHENTICATION_BACKENDS = ( + "django.contrib.auth.backends.ModelBackend", + "fragalysis.auth.KeycloakOIDCAuthenticationBackend", + "guardian.backends.ObjectPermissionBackend", ) -USE_X_FORWARDED_HOST = True -SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https") - -ALLOWED_HOSTS = ["*"] - -DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" +# Password validation +# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators -# DATA_UPLOAD_MAX_MEMORY_SIZE = 26214400 # 25 MB +AUTH_PASSWORD_VALIDATORS = [ + { + "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator" + }, + {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator"}, + {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator"}, + {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator"}, +] -REST_FRAMEWORK = { - "DEFAULT_FILTER_BACKENDS": ("django_filters.rest_framework.DjangoFilterBackend",), - "DEFAULT_PAGINATION_CLASS": "rest_framework.pagination.LimitOffsetPagination", - "PAGE_SIZE": 5000, - "DEFAULT_VERSIONING_CLASS": "rest_framework.versioning.QueryParameterVersioning", - 'DEFAULT_AUTHENTICATION_CLASSES': [ - 'rest_framework.authentication.SessionAuthentication', - 'mozilla_django_oidc.contrib.drf.OIDCAuthentication', - 'rest_framework.authentication.BasicAuthentication', - ], -} +# Build paths inside the project like this: os.path.join(BASE_DIR, ...) +BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) # CELERY STUFF -CELERY_ACCEPT_CONTENT = ['application/json'] +CELERY_ACCEPT_CONTENT = ["application/json"] CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True -CELERY_BROKER_URL = os.environ.get('CELERY_BROKER_URL', 'redis://redis:6379/') -CELERY_RESULT_BACKEND = os.environ.get('CELERY_RESULT_BACKEND', 'redis://redis:6379/0') +CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL", "redis://redis:6379/") +CELERY_RESULT_BACKEND = os.environ.get("CELERY_RESULT_BACKEND", "redis://redis:6379/0") CELERY_RESULT_BACKEND_ALWAYS_RETRY = True CELERY_RESULT_EXPIRES = timedelta(days=15) CELERY_TASK_ALWAYS_EAGER = os.environ.get( - 'CELERY_TASK_ALWAYS_EAGER', 'False' -).lower() in ['true', 'yes'] + "CELERY_TASK_ALWAYS_EAGER", "False" +).lower() in ["true", "yes"] CELERY_WORKER_HIJACK_ROOT_LOGGER = False -# This can be injected as an ENV var -NEOMODEL_NEO4J_BOLT_URL = os.environ.get( - "NEO4J_BOLT_URL", "bolt://neo4j:test@neo4j:7687" -) +# SECURITY WARNING: don't run with DUBUG turned on in production! +DEBUG = os.environ.get("DEBUG_FRAGALYSIS") == "True" + +DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField" # Application definition INSTALLED_APPS = [ @@ -136,6 +149,17 @@ "simple_history", ] +LANGUAGE_CODE = "en-us" + +# Swagger logging / logout +LOGIN_URL = "/accounts/login/" +LOGOUT_URL = "/accounts/logout/" +# LOGIN_REDIRECT_URL = "" +LOGIN_REDIRECT_URL = "/viewer/react/landing" +# LOGOUT_REDIRECT_URL = "" +LOGOUT_REDIRECT_URL = "/viewer/react/landing" + MIDDLEWARE = [ "django.middleware.security.SecurityMiddleware", "django.contrib.sessions.middleware.SessionMiddleware", @@ -147,25 +171,88 @@ "mozilla_django_oidc.middleware.SessionRefresh", ] -AUTHENTICATION_BACKENDS = ( - "django.contrib.auth.backends.ModelBackend", - "fragalysis.auth.KeycloakOIDCAuthenticationBackend", - "guardian.backends.ObjectPermissionBackend", +PROJECT_ROOT = os.path.abspath(os.path.join(BASE_DIR, "..")) + +REST_FRAMEWORK = { + "DEFAULT_FILTER_BACKENDS": ("django_filters.rest_framework.DjangoFilterBackend",), + "DEFAULT_PAGINATION_CLASS": "rest_framework.pagination.LimitOffsetPagination", + "PAGE_SIZE": 5000, + "DEFAULT_VERSIONING_CLASS": "rest_framework.versioning.QueryParameterVersioning", + "DEFAULT_AUTHENTICATION_CLASSES": [ + "rest_framework.authentication.SessionAuthentication", + "mozilla_django_oidc.contrib.drf.OIDCAuthentication", + "rest_framework.authentication.BasicAuthentication", + ], +} + +ROOT_URLCONF = "fragalysis.urls" + +# SECURITY WARNING: keep the secret key used in production secret! +SECRET_KEY = os.environ.get( + "WEB_DJANGO_SECRET_KEY", "8flmz)c9i!o&f1-moi5-p&9ak4r9=ck$3!0y1@%34p^(6i*^_9" ) -STATICFILES_DIRS = [os.path.join(BASE_DIR, "fragalysis", "../viewer/static")] +if SENTRY_DNS := os.environ.get("FRAGALYSIS_BACKEND_SENTRY_DNS"): + # By default only call sentry in staging/production + sentry_sdk.init( + dsn=SENTRY_DNS, + integrations=[ + DjangoIntegration(), + CeleryIntegration(), + RedisIntegration(), + ExcepthookIntegration(always_run=True), + ], + # If you wish to associate users to errors (assuming you are using + # django.contrib.auth) you may enable sending PII data. + send_default_pii=True, + ) +STATIC_ROOT = os.path.join(PROJECT_ROOT, "static") +STATICFILES_DIRS = [os.path.join(BASE_DIR, "fragalysis", "../viewer/static")] STATICFILES_FINDERS = ( "django.contrib.staticfiles.finders.FileSystemFinder", "django.contrib.staticfiles.finders.AppDirectoriesFinder", ) -# mozilla_django_oidc - from documentation: https://mozilla-django-oidc.readthedocs.io/en/stable/ -# Before you can configure your application, you need to set up a client with an OpenID Connect provider (OP). -# You’ll need to set up a different client for every environment you have for your site. For example, -# if your site has a -dev, -stage, and -prod environments, each of those has a different hostname and thus you +USE_X_FORWARDED_HOST = True +SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https") + +# A list of identifiers of messages generated by the system check framework +# that we wish to permanently acknowledge and ignore. +# Silenced checks will not be output to the console. +# +# fields.W342 Is issued for the xchem-db package. +# The hint is "ForeignKey(unique=True) is usually better served by a OneToOneField." +SILENCED_SYSTEM_CHECKS = [ + "fields.W342", +] + +TEMPLATES = [ + { + "BACKEND": "django.template.backends.django.DjangoTemplates", + "DIRS": [], + "APP_DIRS": True, + "OPTIONS": { + "context_processors": [ + "django.template.context_processors.debug", + "django.template.context_processors.request", + "django.contrib.auth.context_processors.auth", + "django.contrib.messages.context_processors.messages", + ] + }, + } +] + +TIME_ZONE = "UTC" + +# mozilla_django_oidc. +# See: https://mozilla-django-oidc.readthedocs.io/en/stable/ +# Before you can configure your application, you need to set up a client with +# an OpenID Connect provider (OP). You’ll need to set up a different client for +# every environment you have for your site. For example, if your site has a -dev, +# -stage, and -prod environments, each of those has a different hostname and thus you # need to set up a separate client for each one. -# you need to provide your OpenID Connect provider (OP) the callback url for your site. +# You need to provide your OpenID Connect provider (OP) the callback url for your site. # The URL path for the callback url is /oidc/callback/. # # Here are examples of callback urls: @@ -179,14 +266,18 @@ # a client id (OIDC_RP_CLIENT_ID) # a client secret (OIDC_RP_CLIENT_SECRET) -# Keycloak mozilla_django_oidc - Settings -# from keyclaok (openid provider = OP) - NB these should be environment variables - not checked in +# Keycloak mozilla_django_oidc settings (openid provider = OP). +# These should be environment variables - not checked in OIDC_RP_CLIENT_ID = os.environ.get("OIDC_RP_CLIENT_ID", "fragalysis-local") -OIDC_RP_CLIENT_SECRET = os.environ.get('OIDC_RP_CLIENT_SECRET') +OIDC_RP_CLIENT_SECRET = os.environ.get("OIDC_RP_CLIENT_SECRET") OIDC_KEYCLOAK_REALM = os.environ.get( "OIDC_KEYCLOAK_REALM", "https://keycloak.xchem-dev.diamond.ac.uk/auth/realms/xchem" ) +# Squonk2 Account Server and Data Manager Client IDs +OIDC_AS_CLIENT_ID: str = os.environ.get("OIDC_AS_CLIENT_ID", "") +OIDC_DM_CLIENT_ID: str = os.environ.get("OIDC_DM_CLIENT_ID", "") + # OIDC_OP_AUTHORIZATION_ENDPOINT = "" OIDC_OP_AUTHORIZATION_ENDPOINT = os.path.join( OIDC_KEYCLOAK_REALM, "protocol/openid-connect/auth" @@ -199,11 +290,13 @@ OIDC_OP_USER_ENDPOINT = os.path.join( OIDC_KEYCLOAK_REALM, "protocol/openid-connect/userinfo" ) -# OIDC_OP_JWKS_ENDPOINT = "" - This is required when using RS256. +# OIDC_OP_JWKS_ENDPOINT = "" +# This is required when using RS256. OIDC_OP_JWKS_ENDPOINT = os.path.join( OIDC_KEYCLOAK_REALM, "protocol/openid-connect/certs" ) -# OIDC_OP_LOGOUT_ENDPOINT = "" - This is required when using RS256. +# OIDC_OP_LOGOUT_ENDPOINT = "" +# This is required when using RS256. OIDC_OP_LOGOUT_ENDPOINT = os.path.join( OIDC_KEYCLOAK_REALM, "protocol/openid-connect/logout" ) @@ -212,76 +305,23 @@ # If desired, this should be set to "fragalysis.views.keycloak_logout" OIDC_OP_LOGOUT_URL_METHOD = os.environ.get("OIDC_OP_LOGOUT_URL_METHOD") -# LOGIN_REDIRECT_URL = "" -LOGIN_REDIRECT_URL = "/viewer/react/landing" -# LOGOUT_REDIRECT_URL = "" -LOGOUT_REDIRECT_URL = "/viewer/react/landing" - # After much trial and error -# Using RS256 + JWKS Endpoint seems to work with no value for OIDC_RP_IDP_SIGN_KEY seems to work for authentication. -# Trying HS256 produces a "JWS token verification failed" error for some reason. +# Using RS256 + JWKS Endpoint seems to work with no value for OIDC_RP_IDP_SIGN_KEY +# seems to work for authentication. Trying HS256 produces a "JWS token verification failed" +# error for some reason. OIDC_RP_SIGN_ALGO = "RS256" OIDC_STORE_ACCESS_TOKEN = True OIDC_STORE_ID_TOKEN = True -# Security/access control connector. -# Currently one of 'ispyb' or 'ssh_ispyb'. -SECURITY_CONNECTOR = os.environ.get('SECURITY_CONNECTOR', 'ispyb').lower() -# Number of minutes to cache security information for a user. -# Set to '0' to disable caching. -SECURITY_CONNECTOR_CACHE_MINUTES = int( - os.environ.get('SECURITY_CONNECTOR_CACHE_MINUTES', '2') -) - # SessionRefresh configuration. # There's only one item - the token expiry period, with a default of 15 minutes. # The default is 15 minutes if you don't set this value. TOKEN_EXPIRY_MINUTES = os.environ.get("OIDC_RENEW_ID_TOKEN_EXPIRY_MINUTES", "15") OIDC_RENEW_ID_TOKEN_EXPIRY_SECONDS = int(TOKEN_EXPIRY_MINUTES) * 60 -# Keycloak mozilla_django_oidc - Settings - End - -# The deployment mode. -# Controls the behaviour of the application (it's strictness to errors etc). -# Typically one of "DEVELOPMENT" or "PRODUCTION". -# see api.utils for the 'deployment_mode_is_production()' function. -DEPLOYMENT_MODE = os.environ.get("DEPLOYMENT_MODE", "production").upper() - -# Authentication check when uploading files. -# This can be switched off to simplify development testing if required. -# It's asserted as True for 'production' mode. -AUTHENTICATE_UPLOAD = True -if os.environ.get("AUTHENTICATE_UPLOAD") == 'False': - assert DEPLOYMENT_MODE != "PRODUCTION" - AUTHENTICATE_UPLOAD = False - -ROOT_URLCONF = "fragalysis.urls" - -STATIC_ROOT = os.path.join(PROJECT_ROOT, "static") - -TEMPLATES = [ - { - "BACKEND": "django.template.backends.django.DjangoTemplates", - "DIRS": [], - "APP_DIRS": True, - "OPTIONS": { - "context_processors": [ - "django.template.context_processors.debug", - "django.template.context_processors.request", - "django.contrib.auth.context_processors.auth", - "django.contrib.messages.context_processors.messages", - ] - }, - } -] WSGI_APPLICATION = "fragalysis.wsgi.application" -# Database -# https://docs.djangoproject.com/en/1.11/ref/settings/#databases - -CHEMCENTRAL_DB_NAME = os.environ.get("CHEMCENT_DB_NAME", "UNKOWN") - -DATABASE_ROUTERS = ['xchem_db.routers.AuthRouter'] +DATABASE_ROUTERS = ["xchem_db.routers.AuthRouter"] DATABASES = { "default": { @@ -294,9 +334,9 @@ } } -if os.environ.get("BUILD_XCDB") == 'yes': +if os.environ.get("BUILD_XCDB") == "yes": DATABASES["xchem_db"] = { - "ENGINE": 'django.db.backends.postgresql', + "ENGINE": "django.db.backends.postgresql", "NAME": os.environ.get("XCHEM_NAME", ""), "USER": os.environ.get("XCHEM_USER", ""), "PASSWORD": os.environ.get("XCHEM_PASSWORD", ""), @@ -304,7 +344,8 @@ "PORT": os.environ.get("XCHEM_PORT", ""), } -if CHEMCENTRAL_DB_NAME != "UNKOWN": +CHEMCENTRAL_DB_NAME = os.environ.get("CHEMCENT_DB_NAME", "UNKNOWN") +if CHEMCENTRAL_DB_NAME != "UNKNOWN": DATABASES["chemcentral"] = { "ENGINE": "django.db.backends.postgresql", "NAME": CHEMCENTRAL_DB_NAME, @@ -314,40 +355,14 @@ "PORT": 5432, } -# Password validation -# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators - -AUTH_PASSWORD_VALIDATORS = [ - { - "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator" - }, - {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator"}, - {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator"}, - {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator"}, -] - -# Internationalization -# https://docs.djangoproject.com/en/1.11/topics/i18n/ - -LANGUAGE_CODE = "en-us" - -TIME_ZONE = "UTC" - USE_I18N = True - USE_L10N = True - USE_TZ = True # Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/1.11/howto/static-files/ - STATIC_URL = "/static/" MEDIA_ROOT = "/code/media/" MEDIA_URL = "/media/" -# Swagger loging / logout -LOGIN_URL = "/accounts/login/" -LOGOUT_URL = "/accounts/logout/" WEBPACK_LOADER = { "DEFAULT": { @@ -361,69 +376,13 @@ GRAPH_MODELS = {"all_applications": True, "group_models": True} # email settings for upload key stuff -EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend' -EMAIL_HOST_USER = os.environ.get("EMAIL_USER") -# If there is an email user is defined then check the rest of the configuration is present. -# The defaults are set for the current (gamil) production configuration. -if EMAIL_HOST_USER: - EMAIL_HOST = os.environ.get('EMAIL_HOST', 'smtp.gmail.com') - EMAIL_USE_TLS = os.environ.get('EMAIL_USE_TLS', True) - EMAIL_PORT = os.environ.get('EMAIL_PORT', 587) +EMAIL_BACKEND = "django.core.mail.backends.smtp.EmailBackend" +if EMAIL_HOST_USER := os.environ.get("EMAIL_USER"): + EMAIL_HOST = os.environ.get("EMAIL_HOST", "smtp.gmail.com") + EMAIL_USE_TLS = os.environ.get("EMAIL_USE_TLS", True) + EMAIL_PORT = os.environ.get("EMAIL_PORT", 587) EMAIL_HOST_PASSWORD = os.environ.get("EMAIL_PASSWORD") - -# DOCS_ROOT = "/code/docs/_build/html " - -# Discourse settings for API calls to Discourse Platform -DISCOURSE_PARENT_CATEGORY = 'Fragalysis targets' -DISCOURSE_USER = 'fragalysis' -DISCOURSE_HOST = os.environ.get('DISCOURSE_HOST') -# Note that this can be obtained from discourse for the desired environment. -DISCOURSE_API_KEY = os.environ.get("DISCOURSE_API_KEY") - -# This suffix can be set to that the different development environments posting to the same Discourse -# server can "automatically" generate different category/post titles - hopefully reducing confusion. -# It will be appended at category or post-title, e.g. "Mpro-duncan", "Mpro-staging" etc. -# Note that it is for dev systems. It is not required on production because production will have a -# dedicated Discourse server. -DISCOURSE_DEV_POST_SUFFIX = os.environ.get("DISCOURSE_DEV_POST_SUFFIX", '') - -# An optional URL that identifies the URL to a prior stack. -# If set, it's typically something like "https://fragalysis.diamond.ac.uk". -# It can be blank, indicating there is no legacy service. -LEGACY_URL = os.environ.get("LEGACY_URL", "") - -SQUONK2_MEDIA_DIRECTORY = "fragalysis-files" -SQUONK2_INSTANCE_API = "data-manager-ui/results/instance/" - -# The Target Access String (TAS) Python regular expression. -# The Project title (the TAS) must match this expression to be valid. -# See api/utils.py validate_tas() for the current implementation. -# To simplify error messages when the match fails you can also -# add an error message. -TAS_REGEX = os.environ.get("TAS_REGEX", r"^(lb\d{5})(-(\d+)){0,1}$") -TAS_REGEX_ERROR_MSG = os.environ.get( - "TAS_REGEX_ERROR_MSG", - "Must begin 'lb' followed by 5 digits, optionally followed by a hyphen and a number.", -) -# Are any public target access strings defined? -# If so they'll be in the PUBLIC_TAS variable as a comma separated list. -PUBLIC_TAS = os.environ.get("PUBLIC_TAS", "") -PUBLIC_TAS_LIST = PUBLIC_TAS.split(",") if PUBLIC_TAS else [] - -COMPUTED_SET_MEDIA_DIRECTORY = "computed_set_data" -TARGET_LOADER_MEDIA_DIRECTORY = "target_loader_data" - -# A list of identifiers of messages generated by the system check framework -# that we wish to permanently acknowledge and ignore. -# Silenced checks will not be output to the console. -# -# fields.W342 Is issued for the xchem-db package. -# The hint is "ForeignKey(unique=True) is usually better served by a OneToOneField." -SILENCED_SYSTEM_CHECKS = [ - "fields.W342", -] - # Configure django logging. # We provide a standard formatter that emits a timestamp, the module issuing the log # and the level name, a little like this... @@ -433,36 +392,34 @@ # We provide a console and rotating file handler # (50Mi of logging in 10 files of 5M each), # with the rotating file handler typically used for everything. -DISABLE_LOGGING_FRAMEWORK = ( - True - if os.environ.get("DISABLE_LOGGING_FRAMEWORK", "no").lower() in ["yes"] - else False -) +DISABLE_LOGGING_FRAMEWORK = os.environ.get( + "DISABLE_LOGGING_FRAMEWORK", "no" +).lower() in ["yes"] LOGGING_FRAMEWORK_ROOT_LEVEL = os.environ.get("LOGGING_FRAMEWORK_ROOT_LEVEL", "DEBUG") if not DISABLE_LOGGING_FRAMEWORK: LOGGING = { - 'version': 1, - 'disable_existing_loggers': False, - 'formatters': { - 'simple': { - 'format': '%(asctime)s %(name)s.%(funcName)s():%(lineno)s %(levelname)s # %(message)s', - 'datefmt': '%Y-%m-%dT%H:%M:%S%z', + "version": 1, + "disable_existing_loggers": False, + "formatters": { + "simple": { + "format": "%(asctime)s %(name)s.%(funcName)s():%(lineno)s %(levelname)s # %(message)s", + "datefmt": "%Y-%m-%dT%H:%M:%S%z", } }, - 'handlers': { - 'console': { - 'level': 'DEBUG', - 'class': 'logging.StreamHandler', - 'stream': sys.stdout, - 'formatter': 'simple', + "handlers": { + "console": { + "level": "DEBUG", + "class": "logging.StreamHandler", + "stream": sys.stdout, + "formatter": "simple", }, - 'rotating': { - 'level': 'DEBUG', - 'class': 'logging.handlers.RotatingFileHandler', - 'maxBytes': 5_000_000, - 'backupCount': 10, - 'filename': os.path.join(BASE_DIR, 'logs/backend.log'), - 'formatter': 'simple', + "rotating": { + "level": "DEBUG", + "class": "logging.handlers.RotatingFileHandler", + "maxBytes": 5_000_000, + "backupCount": 10, + "filename": os.path.join(BASE_DIR, "logs/backend.log"), + "formatter": "simple", }, }, 'loggers': { @@ -474,8 +431,146 @@ 'urllib3': {'level': 'WARNING'}, 'paramiko': {'level': 'WARNING'}, }, - 'root': { - 'level': LOGGING_FRAMEWORK_ROOT_LEVEL, - 'handlers': ['console', 'rotating'], + "root": { + "level": LOGGING_FRAMEWORK_ROOT_LEVEL, + "handlers": ["console", "rotating"], }, } + +# -------------------------------------------------------------------------------------- +# FRAGALYSIS SETTINGS +# -------------------------------------------------------------------------------------- +# With comprehensive comments where necessary to explain the setting's values. + +# The deployment mode. +# Controls the behaviour of the application (it's strictness to errors etc). +# Typically one of "DEVELOPMENT" or "PRODUCTION". +# see api.utils for the 'deployment_mode_is_production()' function. +DEPLOYMENT_MODE: str = os.environ.get("DEPLOYMENT_MODE", "production").upper() + +# Authentication check when uploading files. +# This can be switched off to simplify development testing if required. +# It's asserted as True for 'production' mode. +AUTHENTICATE_UPLOAD: bool = True +if os.environ.get("AUTHENTICATE_UPLOAD") == "False": + assert DEPLOYMENT_MODE != "PRODUCTION" + AUTHENTICATE_UPLOAD = False + +COMPUTED_SET_MEDIA_DIRECTORY: str = "computed_set_data" + +# Discourse settings for API calls to Discourse Platform +DISCOURSE_PARENT_CATEGORY: str = "Fragalysis targets" +DISCOURSE_USER: str = "fragalysis" +DISCOURSE_HOST: str = os.environ.get("DISCOURSE_HOST", "") +# Note that this can be obtained from discourse for the desired environment. +DISCOURSE_API_KEY: str = os.environ.get("DISCOURSE_API_KEY", "") +# This suffix can be set to that the different development environments posting +# to the same Discourse server can "automatically" generate different category/post +# titles - hopefully reducing confusion. It will be appended at category or post-title, +# e.g. "Mpro-duncan", "Mpro-staging" etc. Note that it is for dev systems. +# It is not required on production because production will have a +# dedicated Discourse server. +DISCOURSE_DEV_POST_SUFFIX: str = os.environ.get("DISCOURSE_DEV_POST_SUFFIX", "") + +DUMMY_TARGET_TITLE: str = os.environ.get("DUMMY_TARGET_TITLE", "") +DUMMY_USER: str = os.environ.get("DUMMY_USER", "") +DUMMY_TAS: str = os.environ.get("DUMMY_TAS", "") + +# Do we enable the collection and presentation +# of the availability of underlying services? +# A colon (:) separated list of services to enable. +# See "viewer/services.py" for the full list of supported services. +ENABLE_SERVICE_STATUS: str = os.environ.get("ENABLE_SERVICE_STATUS", "") + +# What infection have been set? +# "Infections" are built-in faults that can be induced by providing their names. +# Typically these are "hard to reproduce" errors that are useful for testing. +# The names are provided in a comma-separated list in this variable. +# The full set of supported names can be used can be found in "api/infections.py" +INFECTIONS: str = os.environ.get("INFECTIONS", "").lower() + +# The ISpyB database settings. +# Can be used in conjunction with SSH settings (later in this file) +ISPYB_USER: str = os.environ.get("ISPYB_USER", "") +ISPYB_PASSWORD: str = os.environ.get("ISPYB_PASSWORD", "") +ISPYB_HOST: str = os.environ.get("ISPYB_HOST", "") +ISPYB_PORT: str = os.environ.get("ISPYB_PORT", "") + +# An optional URL that identifies the URL to a prior stack. +# If set, it's typically something like "https://fragalysis.diamond.ac.uk". +# It can be blank, indicating there is no legacy service. +LEGACY_URL: str = os.environ.get("LEGACY_URL", "") + +NEOMODEL_NEO4J_BOLT_URL: str = os.environ.get( + "NEO4J_BOLT_URL", "bolt://neo4j:test@neo4j:7687" +) + +NEO4J_QUERY: str = os.environ.get("NEO4J_QUERY", "neo4j") +NEO4J_AUTH: str = os.environ.get("NEO4J_AUTH", "neo4j/neo4j") + +# These flags are used in the upload_tset form as follows. +# Proposal Supported | Proposal Required | Proposal / View fields +# Y | Y | Shown / Required +# Y | N | Shown / Optional +# N | N | Not Shown +PROPOSAL_SUPPORTED: bool = True +PROPOSAL_REQUIRED: bool = True + +# Are any public target access strings defined? +# If so they'll be in the PUBLIC_TAS variable as a comma separated list. +PUBLIC_TAS: str = os.environ.get("PUBLIC_TAS", "") +PUBLIC_TAS_LIST: List[str] = PUBLIC_TAS.split(",") if PUBLIC_TAS else [] + +# Security/access control connector. +# Currently one of 'ispyb' or 'ssh_ispyb'. +SECURITY_CONNECTOR: str = os.environ.get("SECURITY_CONNECTOR", "ispyb").lower() +# Number of minutes to cache security information for a user. +# Set to '0' to disable caching. +SECURITY_CONNECTOR_CACHE_MINUTES: int = int( + os.environ.get("SECURITY_CONNECTOR_CACHE_MINUTES", "2") +) + +# An SSH host. +# Used in the security module in conjunction with ISPyB settings. +# Any SSH_PRIVATE_KEY_FILENAME value will be used in preference to SSH_PASSWORD. +SSH_HOST: str = os.environ.get("SSH_HOST", "") +SSH_USER: str = os.environ.get("SSH_USER", "") +SSH_PASSWORD: str = os.environ.get("SSH_PASSWORD", "") +SSH_PRIVATE_KEY_FILENAME: str = os.environ.get("SSH_PRIVATE_KEY_FILENAME", "") + +# A slug used for names this Fragalysis will create +SQUONK2_MAX_SLUG_LENGTH: int = 10 + +SQUONK2_MEDIA_DIRECTORY: str = "fragalysis-files" +SQUONK2_INSTANCE_API: str = "data-manager-ui/results/instance/" + +SQUONK2_ASAPI_URL: str = os.environ.get("SQUONK2_ASAPI_URL", "") +SQUONK2_DMAPI_URL: str = os.environ.get("SQUONK2_DMAPI_URL", "") +SQUONK2_UI_URL: str = os.environ.get("SQUONK2_UI_URL", "") +SQUONK2_ORG_UUID: str = os.environ.get("SQUONK2_ORG_UUID", "") +SQUONK2_UNIT_BILLING_DAY: str = os.environ.get("SQUONK2_UNIT_BILLING_DAY", "") +SQUONK2_PRODUCT_FLAVOUR: str = os.environ.get("SQUONK2_PRODUCT_FLAVOUR", "") +SQUONK2_SLUG: str = os.environ.get("SQUONK2_SLUG", "")[:SQUONK2_MAX_SLUG_LENGTH] +SQUONK2_ORG_OWNER: str = os.environ.get("SQUONK2_ORG_OWNER", "") +SQUONK2_ORG_OWNER_PASSWORD: str = os.environ.get("SQUONK2_ORG_OWNER_PASSWORD", "") +SQUONK2_VERIFY_CERTIFICATES: str = os.environ.get("SQUONK2_VERIFY_CERTIFICATES", "") + +TARGET_LOADER_MEDIA_DIRECTORY: str = "target_loader_data" + +# The Target Access String (TAS) Python regular expression. +# The Project title (the TAS) must match this expression to be valid. +# See api/utils.py validate_tas() for the current implementation. +# To simplify error messages when the match fails you can also +# add an error message. +TAS_REGEX: str = os.environ.get("TAS_REGEX", r"^(lb\d{5})(-(\d+)){0,1}$") +TAS_REGEX_ERROR_MSG: str = os.environ.get( + "TAS_REGEX_ERROR_MSG", + "Must begin 'lb' followed by 5 digits, optionally followed by a hyphen and a number.", +) + +BE_NAMESPACE: str = os.environ.get("BE_NAMESPACE", "undefined") +BE_IMAGE_TAG: str = os.environ.get("BE_IMAGE_TAG", "undefined") +FE_NAMESPACE: str = os.environ.get("FE_NAMESPACE", "undefined") +FE_IMAGE_TAG: str = os.environ.get("FE_IMAGE_TAG", "undefined") +STACK_NAMESPACE: str = os.environ.get("STACK_NAMESPACE", "undefined") +STACK_VERSION: str = os.environ.get("STACK_VERSION", "undefined") diff --git a/fragalysis/views.py b/fragalysis/views.py index c68a4d4e..7b14e912 100644 --- a/fragalysis/views.py +++ b/fragalysis/views.py @@ -1,6 +1,4 @@ # Classes/Methods to override default OIDC Views (Keycloak authentication) -import os - from django.conf import settings from django.http import JsonResponse from mozilla_django_oidc.views import OIDCLogoutView @@ -34,41 +32,12 @@ def version(request): # Unused args del request - undefined_value = "undefined" - # b/e, f/e and stack origin comes form container environment variables. - # - # We also need to deal with empty or unset strings - # so the get() default does not help - be_namespace = os.environ.get('BE_NAMESPACE') - if not be_namespace: - be_namespace = undefined_value - - be_image_tag = os.environ.get('BE_IMAGE_TAG') - if not be_image_tag: - be_image_tag = undefined_value - - fe_namespace = os.environ.get('FE_NAMESPACE') - if not fe_namespace: - fe_namespace = undefined_value - - fe_branch = os.environ.get('FE_BRANCH') - if not fe_branch: - fe_branch = undefined_value - - stack_namespace = os.environ.get('STACK_NAMESPACE') - if not stack_namespace: - stack_namespace = undefined_value - - stack_version = os.environ.get('STACK_VERSION') - if not stack_version: - stack_version = undefined_value - version_response = { - 'version': { - 'backend': f'{be_namespace}:{be_image_tag}', - 'frontend': f'{fe_namespace}:{fe_branch}', - 'stack': f'{stack_namespace}:{stack_version}', + "version": { + "backend": f"{settings.BE_NAMESPACE}:{settings.BE_IMAGE_TAG}", + "frontend": f"{settings.FE_NAMESPACE}:{settings.FE_IMAGE_TAG}", + "stack": f"{settings.STACK_NAMESPACE}:{settings.STACK_VERSION}", } } return JsonResponse(version_response) diff --git a/network/views.py b/network/views.py index 8d790247..0be98a9d 100644 --- a/network/views.py +++ b/network/views.py @@ -1,5 +1,4 @@ -import os - +from django.conf import settings from django.http import HttpResponse from frag.network.decorate import get_add_del_link from frag.network.query import get_full_graph @@ -8,13 +7,9 @@ def full_graph(request): - """ - Get the full graph for a molecule from an input smiles - :param request: - :return: - """ - graph_choice = os.environ.get("NEO4J_QUERY", "neo4j") - graph_auth = os.environ.get("NEO4J_AUTH", "neo4j/neo4j") + """Get the full graph for a molecule from an input smiles""" + graph_choice = settings.NEO4J_QUERY + graph_auth = settings.NEO4J_AUTH if "graph_choice" in request.GET: graph_choice = request.GET["graph_choice"] if "smiles" in request.GET: diff --git a/viewer/serializers.py b/viewer/serializers.py index 15093985..8694419f 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -465,8 +465,8 @@ class Meta: class GraphSerializer(serializers.ModelSerializer): graph = serializers.SerializerMethodField() - graph_choice = os.environ.get("NEO4J_QUERY", "neo4j") - graph_auth = os.environ.get("NEO4J_AUTH", "neo4j/neo4j") + graph_choice = settings.NEO4J_QUERY + graph_auth = settings.NEO4J_AUTH def get_graph(self, obj): return get_full_graph( diff --git a/viewer/services.py b/viewer/services.py index a203bfc3..77417143 100644 --- a/viewer/services.py +++ b/viewer/services.py @@ -6,6 +6,7 @@ from enum import Enum import requests +from django.conf import settings from frag.utils.network_utils import get_driver from pydiscourse import DiscourseClient @@ -18,8 +19,8 @@ # Default timeout for any request calls REQUEST_TIMEOUT_S = 5 -_NEO4J_LOCATION: str = os.environ.get("NEO4J_QUERY", "neo4j") -_NEO4J_AUTH: str = os.environ.get("NEO4J_AUTH", "neo4j/neo4j") +_NEO4J_LOCATION: str = settings.NEO4J_QUERY +_NEO4J_AUTH: str = settings.NEO4J_AUTH class State(str, Enum): diff --git a/viewer/squonk2_agent.py b/viewer/squonk2_agent.py index 3d4f7936..aadab6c0 100644 --- a/viewer/squonk2_agent.py +++ b/viewer/squonk2_agent.py @@ -12,6 +12,7 @@ from urllib.parse import ParseResult, urlparse import requests +from django.conf import settings from requests import Response from squonk2.as_api import AsApi, AsApiRv from squonk2.auth import Auth @@ -58,9 +59,7 @@ # How long are Squonk2 'names'? _SQ2_MAX_NAME_LENGTH: int = 80 -# A slug used for names this Fragalysis will create -# and a prefix string. So Squonk2 objects will be called "Fragalysis {slug}" -_MAX_SLUG_LENGTH: int = 10 +# An object prefix string. So Squonk2 objects will be called "Fragalysis {slug}" _SQ2_NAME_PREFIX: str = "Fragalysis" # Built-in @@ -94,46 +93,24 @@ def __init__(self): # "Fragalysis {SLUG} ", this leaves (80-22) 58 characters for the # use with the target-access-string and session project strings # to form Squonk2 Unit and Project names. - self.__CFG_SQUONK2_ASAPI_URL: Optional[str] = os.environ.get( - 'SQUONK2_ASAPI_URL' - ) - self.__CFG_SQUONK2_DMAPI_URL: Optional[str] = os.environ.get( - 'SQUONK2_DMAPI_URL' - ) - self.__CFG_SQUONK2_UI_URL: Optional[str] = os.environ.get('SQUONK2_UI_URL') - self.__CFG_SQUONK2_ORG_UUID: Optional[str] = os.environ.get('SQUONK2_ORG_UUID') - self.__CFG_SQUONK2_UNIT_BILLING_DAY: Optional[str] = os.environ.get( - 'SQUONK2_UNIT_BILLING_DAY' - ) - self.__CFG_SQUONK2_PRODUCT_FLAVOUR: Optional[str] = os.environ.get( - 'SQUONK2_PRODUCT_FLAVOUR' - ) - self.__CFG_SQUONK2_SLUG: Optional[str] = os.environ.get('SQUONK2_SLUG', '')[ - :_MAX_SLUG_LENGTH - ] - self.__CFG_SQUONK2_ORG_OWNER: Optional[str] = os.environ.get( - 'SQUONK2_ORG_OWNER' - ) - self.__CFG_SQUONK2_ORG_OWNER_PASSWORD: Optional[str] = os.environ.get( - 'SQUONK2_ORG_OWNER_PASSWORD' - ) - self.__CFG_OIDC_AS_CLIENT_ID: Optional[str] = os.environ.get( - 'OIDC_AS_CLIENT_ID' - ) - self.__CFG_OIDC_DM_CLIENT_ID: Optional[str] = os.environ.get( - 'OIDC_DM_CLIENT_ID' - ) - self.__CFG_OIDC_KEYCLOAK_REALM: Optional[str] = os.environ.get( - 'OIDC_KEYCLOAK_REALM' - ) + self.__CFG_SQUONK2_ASAPI_URL: str = settings.SQUONK2_ASAPI_URL + self.__CFG_SQUONK2_DMAPI_URL: str = settings.SQUONK2_DMAPI_URL + self.__CFG_SQUONK2_UI_URL: str = settings.SQUONK2_UI_URL + self.__CFG_SQUONK2_ORG_UUID: str = settings.SQUONK2_ORG_UUID + self.__CFG_SQUONK2_UNIT_BILLING_DAY: str = settings.SQUONK2_UNIT_BILLING_DAY + self.__CFG_SQUONK2_PRODUCT_FLAVOUR: str = settings.SQUONK2_PRODUCT_FLAVOUR + self.__CFG_SQUONK2_SLUG: str = settings.SQUONK2_SLUG + self.__CFG_SQUONK2_ORG_OWNER: str = settings.SQUONK2_ORG_OWNER + self.__CFG_SQUONK2_ORG_OWNER_PASSWORD: str = settings.SQUONK2_ORG_OWNER_PASSWORD + self.__CFG_OIDC_AS_CLIENT_ID: str = settings.OIDC_AS_CLIENT_ID + self.__CFG_OIDC_DM_CLIENT_ID: str = settings.OIDC_DM_CLIENT_ID + self.__CFG_OIDC_KEYCLOAK_REALM: str = settings.OIDC_KEYCLOAK_REALM # Optional config (no '__CFG_' prefix) - self.__DUMMY_TARGET_TITLE: Optional[str] = os.environ.get('DUMMY_TARGET_TITLE') - self.__DUMMY_USER: Optional[str] = os.environ.get('DUMMY_USER') - self.__DUMMY_TAS: Optional[str] = os.environ.get('DUMMY_TAS') - self.__SQUONK2_VERIFY_CERTIFICATES: Optional[str] = os.environ.get( - 'SQUONK2_VERIFY_CERTIFICATES' - ) + self.__DUMMY_TARGET_TITLE: str = settings.DUMMY_TARGET_TITLE + self.__DUMMY_USER: str = settings.DUMMY_USER + self.__DUMMY_TAS: str = settings.DUMMY_TAS + self.__SQUONK2_VERIFY_CERTIFICATES: str = settings.SQUONK2_VERIFY_CERTIFICATES # The integer billing day, valid if greater than zero self.__unit_billing_day: int = 0 @@ -799,9 +776,9 @@ def configured(self) -> Squonk2AgentRv: # Is the slug too long? # Limited to 10 characters assert self.__CFG_SQUONK2_SLUG - if len(self.__CFG_SQUONK2_SLUG) > _MAX_SLUG_LENGTH: + if len(self.__CFG_SQUONK2_SLUG) > settings.SQUONK2_MAX_SLUG_LENGTH: msg = ( - f'Slug is longer than {_MAX_SLUG_LENGTH} characters' + f'Slug is longer than {settings.SQUONK2_MAX_SLUG_LENGTH} characters' f' ({self.__CFG_SQUONK2_SLUG})' ) _LOGGER.error(msg) diff --git a/viewer/views.py b/viewer/views.py index 650945e1..b3e3562a 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -2487,7 +2487,7 @@ def get(self, *args, **kwargs): del args, kwargs logger.debug("+ ServiceServiceState.State.get called") - service_string = os.environ.get("ENABLE_SERVICE_STATUS", "") + service_string = settings.ENABLE_SERVICE_STATUS logger.debug("Service string: %s", service_string) services = [k for k in service_string.split(":") if k != ""] From 62c04bd76f352a04bd3f68c87761d592d8f63622 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Feb 2024 16:29:39 +0100 Subject: [PATCH 14/47] build(deps): bump cryptography from 42.0.0 to 42.0.2 (#533) Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.0 to 42.0.2. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/42.0.0...42.0.2) --- updated-dependencies: - dependency-name: cryptography dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 66 ++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/poetry.lock b/poetry.lock index 64ceed5f..0fcf5b82 100644 --- a/poetry.lock +++ b/poetry.lock @@ -539,43 +539,43 @@ jinja2 = "*" [[package]] name = "cryptography" -version = "42.0.0" +version = "42.0.2" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-42.0.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:c640b0ef54138fde761ec99a6c7dc4ce05e80420262c20fa239e694ca371d434"}, - {file = "cryptography-42.0.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:678cfa0d1e72ef41d48993a7be75a76b0725d29b820ff3cfd606a5b2b33fda01"}, - {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:146e971e92a6dd042214b537a726c9750496128453146ab0ee8971a0299dc9bd"}, - {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87086eae86a700307b544625e3ba11cc600c3c0ef8ab97b0fda0705d6db3d4e3"}, - {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0a68bfcf57a6887818307600c3c0ebc3f62fbb6ccad2240aa21887cda1f8df1b"}, - {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5a217bca51f3b91971400890905a9323ad805838ca3fa1e202a01844f485ee87"}, - {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ca20550bb590db16223eb9ccc5852335b48b8f597e2f6f0878bbfd9e7314eb17"}, - {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:33588310b5c886dfb87dba5f013b8d27df7ffd31dc753775342a1e5ab139e59d"}, - {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9515ea7f596c8092fdc9902627e51b23a75daa2c7815ed5aa8cf4f07469212ec"}, - {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:35cf6ed4c38f054478a9df14f03c1169bb14bd98f0b1705751079b25e1cb58bc"}, - {file = "cryptography-42.0.0-cp37-abi3-win32.whl", hash = "sha256:8814722cffcfd1fbd91edd9f3451b88a8f26a5fd41b28c1c9193949d1c689dc4"}, - {file = "cryptography-42.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:a2a8d873667e4fd2f34aedab02ba500b824692c6542e017075a2efc38f60a4c0"}, - {file = "cryptography-42.0.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:8fedec73d590fd30c4e3f0d0f4bc961aeca8390c72f3eaa1a0874d180e868ddf"}, - {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be41b0c7366e5549265adf2145135dca107718fa44b6e418dc7499cfff6b4689"}, - {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca482ea80626048975360c8e62be3ceb0f11803180b73163acd24bf014133a0"}, - {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c58115384bdcfe9c7f644c72f10f6f42bed7cf59f7b52fe1bf7ae0a622b3a139"}, - {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:56ce0c106d5c3fec1038c3cca3d55ac320a5be1b44bf15116732d0bc716979a2"}, - {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:324721d93b998cb7367f1e6897370644751e5580ff9b370c0a50dc60a2003513"}, - {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:d97aae66b7de41cdf5b12087b5509e4e9805ed6f562406dfcf60e8481a9a28f8"}, - {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:85f759ed59ffd1d0baad296e72780aa62ff8a71f94dc1ab340386a1207d0ea81"}, - {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:206aaf42e031b93f86ad60f9f5d9da1b09164f25488238ac1dc488334eb5e221"}, - {file = "cryptography-42.0.0-cp39-abi3-win32.whl", hash = "sha256:74f18a4c8ca04134d2052a140322002fef535c99cdbc2a6afc18a8024d5c9d5b"}, - {file = "cryptography-42.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:14e4b909373bc5bf1095311fa0f7fcabf2d1a160ca13f1e9e467be1ac4cbdf94"}, - {file = "cryptography-42.0.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3005166a39b70c8b94455fdbe78d87a444da31ff70de3331cdec2c568cf25b7e"}, - {file = "cryptography-42.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:be14b31eb3a293fc6e6aa2807c8a3224c71426f7c4e3639ccf1a2f3ffd6df8c3"}, - {file = "cryptography-42.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:bd7cf7a8d9f34cc67220f1195884151426ce616fdc8285df9054bfa10135925f"}, - {file = "cryptography-42.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c310767268d88803b653fffe6d6f2f17bb9d49ffceb8d70aed50ad45ea49ab08"}, - {file = "cryptography-42.0.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bdce70e562c69bb089523e75ef1d9625b7417c6297a76ac27b1b8b1eb51b7d0f"}, - {file = "cryptography-42.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e9326ca78111e4c645f7e49cbce4ed2f3f85e17b61a563328c85a5208cf34440"}, - {file = "cryptography-42.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:69fd009a325cad6fbfd5b04c711a4da563c6c4854fc4c9544bff3088387c77c0"}, - {file = "cryptography-42.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:988b738f56c665366b1e4bfd9045c3efae89ee366ca3839cd5af53eaa1401bce"}, - {file = "cryptography-42.0.0.tar.gz", hash = "sha256:6cf9b76d6e93c62114bd19485e5cb003115c134cf9ce91f8ac924c44f8c8c3f4"}, + {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be"}, + {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529"}, + {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9"}, + {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2"}, + {file = "cryptography-42.0.2-cp37-abi3-win32.whl", hash = "sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee"}, + {file = "cryptography-42.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee"}, + {file = "cryptography-42.0.2-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90"}, + {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea"}, + {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33"}, + {file = "cryptography-42.0.2-cp39-abi3-win32.whl", hash = "sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635"}, + {file = "cryptography-42.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2"}, + {file = "cryptography-42.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a"}, + {file = "cryptography-42.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65"}, + {file = "cryptography-42.0.2.tar.gz", hash = "sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888"}, ] [package.dependencies] From 4b509eb70f52826157235f95562cc0a483435564 Mon Sep 17 00:00:00 2001 From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com> Date: Mon, 19 Feb 2024 18:00:58 +0100 Subject: [PATCH 15/47] docs: Updates documentation (#536) Co-authored-by: Alan Christie --- README.md | 14 +++++++++++-- fragalysis/settings.py | 47 +++++++++++++++++++++++++++++++++++------- 2 files changed, 52 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 0b417e5e..af572d65 100644 --- a/README.md +++ b/README.md @@ -66,13 +66,11 @@ installs/updates new packages to local venv. It's equivalent to running `poetry lock && poetry install`, so if you're not interested in local environment and just want to update the lockfile, you can run just `poetry lock`. - ## Building and running (local) The backend is a Docker container image and can be build and deployed locally using `docker-compose`: - docker-compose build - To run the application (which wil include deployment of the postgres and neo4j databases) run: - @@ -181,6 +179,18 @@ at `/code/logs`. > For local development using the `docker-compose.yml` file you'll find the logs at `./data/logs/backend.log`. +## Configuration (environment variables) +The backend configuration is controlled by a number of environment variables. +Variables are typically defined in the project's `fragalysis/settings.py`, where you +will also find **ALL** the dynamically configured variables (those that can be changed +using *environment variables* in the deployed Pod/Container). + +- Not all variables are dynamic. For example `ALLOWED_HOSTS` is a static variable + that is set in the `settings.py` file and is not intended to be changed at run-time. + +Refer to the documentation in the `settings.py` file to understand the environment +and the style guide for new variables that you need to add. + ## Database migrations The best approach is to spin-up the development backend (locally) using `docker-compose` with the custom *migration* compose file and then shell into Django. diff --git a/fragalysis/settings.py b/fragalysis/settings.py index 3aa3b58a..c7b47cc4 100644 --- a/fragalysis/settings.py +++ b/fragalysis/settings.py @@ -6,9 +6,11 @@ # that control the stack's configuration (behaviour). # # Not all settings are configured by environment variable. Some are hard-coded -# and you'll need to edit their values here. Those that are configurable at run-time -# should be obvious (i.e. they'll use "os.environ.get()" to obtain their value) -# alternative run-time value. +# and you'll need to edit their values here. For example `ALLOWED_HOSTS` +# is a static variable that is not intended to be changed at run-time. +# +# Those that are configurable at run-time should be obvious +# (i.e. they'll use "os.environ.get()" to obtain their value) alternative run-time value. # # You will find the django-related configuration at the top of the file # (under DJANGO SETTINGS) and the fragalysis-specific configuration at the bottom of @@ -22,7 +24,11 @@ # # 2. The constant used to hold the environment variable *SHOULD* match the # environment variable's name. i.e. the "DEPLOYMENT_MODE" environment variable's -# value *SHOULD* be found in 'settings.DEPLOYMENT_MODE'. +# value *SHOULD* be found in 'settings.DEPLOYMENT_MODE' variable. +# +# 3. In the FRAGALYSIS section, document the variable's purpose and the values +# it can take in the comments. If there are dependencies or "gotchas" +# (i.e. changing its value after deployment) then these should be documented. # # Providing run-time values for variables: - # @@ -45,7 +51,7 @@ # # IMPORTANTLY: For a description of an environment variable (setting) and its value # you *MUST* consult the comments in this file ("settings.py"), and *NOT* -# the Ansible playbook. This file is the primary authority for the +# the Ansible playbook. "settings.py" is the primary authority for the # configuration of the Fragalysis Stack. # # Ansible variables are declared in "roles/fragalysis-stack/defaults/main.yaml" @@ -472,6 +478,8 @@ # dedicated Discourse server. DISCOURSE_DEV_POST_SUFFIX: str = os.environ.get("DISCOURSE_DEV_POST_SUFFIX", "") +# Some Squonk2 developer/debug variables. +# Unused in production. DUMMY_TARGET_TITLE: str = os.environ.get("DUMMY_TARGET_TITLE", "") DUMMY_USER: str = os.environ.get("DUMMY_USER", "") DUMMY_TAS: str = os.environ.get("DUMMY_TAS", "") @@ -505,6 +513,9 @@ "NEO4J_BOLT_URL", "bolt://neo4j:test@neo4j:7687" ) +# The graph (neo4j) database settings. +# The query provides the graph endpoint, typically a service in a kubernetes namespace +# like 'graph.graph-a.svc' and the 'auth' provides the graph username and password. NEO4J_QUERY: str = os.environ.get("NEO4J_QUERY", "neo4j") NEO4J_AUTH: str = os.environ.get("NEO4J_AUTH", "neo4j/neo4j") @@ -532,27 +543,46 @@ # An SSH host. # Used in the security module in conjunction with ISPyB settings. -# Any SSH_PRIVATE_KEY_FILENAME value will be used in preference to SSH_PASSWORD. +# The SSH_PRIVATE_KEY_FILENAME value will be used if there is no SSH_PASSWORD. SSH_HOST: str = os.environ.get("SSH_HOST", "") SSH_USER: str = os.environ.get("SSH_USER", "") SSH_PASSWORD: str = os.environ.get("SSH_PASSWORD", "") SSH_PRIVATE_KEY_FILENAME: str = os.environ.get("SSH_PRIVATE_KEY_FILENAME", "") -# A slug used for names this Fragalysis will create +# The maximum length of the 'slug' used for names this Fragalysis will create. +# +# Squonk2 variables are generally used by the 'squonk2_agent.py' module +# in the 'viewer' package. SQUONK2_MAX_SLUG_LENGTH: int = 10 +# Where the Squonk2 logic places its files in Job containers. SQUONK2_MEDIA_DIRECTORY: str = "fragalysis-files" +# The Squonk2 DataManger UI endpoint to obtain Job Instance information. SQUONK2_INSTANCE_API: str = "data-manager-ui/results/instance/" +# The URL for the Squonk2 Account Server API. SQUONK2_ASAPI_URL: str = os.environ.get("SQUONK2_ASAPI_URL", "") +# The URL for the Squonk2 Data Manaqer API. SQUONK2_DMAPI_URL: str = os.environ.get("SQUONK2_DMAPI_URL", "") +# The URL for the Squonk2 User Interface. SQUONK2_UI_URL: str = os.environ.get("SQUONK2_UI_URL", "") +# The pre-assigned Squonk2 Account Server Organisation for the stack. +# This is created by an administrator of the Squonk2 service. SQUONK2_ORG_UUID: str = os.environ.get("SQUONK2_ORG_UUID", "") +# The Account Server Unit billing day 9for all products (projects) that are created. +# It's a day of the month (1..27). SQUONK2_UNIT_BILLING_DAY: str = os.environ.get("SQUONK2_UNIT_BILLING_DAY", "") +# The Squonk2 Account Server product "flavour" created for Jobs (products/projects). +# It's usually one of "GOLD", "SILVER" or "BRONZE". SQUONK2_PRODUCT_FLAVOUR: str = os.environ.get("SQUONK2_PRODUCT_FLAVOUR", "") +# A short slug used when creating Squonk2 objects for this stack. +# This must be unique across all stacks that share the same Squonk2 service. SQUONK2_SLUG: str = os.environ.get("SQUONK2_SLUG", "")[:SQUONK2_MAX_SLUG_LENGTH] +# The pre-assigned Squonk2 Account Server Organisation owner and password. +# This account is used to create Squonk2 objects for the stack. SQUONK2_ORG_OWNER: str = os.environ.get("SQUONK2_ORG_OWNER", "") SQUONK2_ORG_OWNER_PASSWORD: str = os.environ.get("SQUONK2_ORG_OWNER_PASSWORD", "") +# Do we verify Squonk2 SSL certificates ("yes" or "no"). SQUONK2_VERIFY_CERTIFICATES: str = os.environ.get("SQUONK2_VERIFY_CERTIFICATES", "") TARGET_LOADER_MEDIA_DIRECTORY: str = "target_loader_data" @@ -568,6 +598,9 @@ "Must begin 'lb' followed by 5 digits, optionally followed by a hyphen and a number.", ) +# Version variables. +# These are set by the Dockerfile in the fragalysis-stack repository +# and controlled by the CI process, i.e. they're not normally set by a a user. BE_NAMESPACE: str = os.environ.get("BE_NAMESPACE", "undefined") BE_IMAGE_TAG: str = os.environ.get("BE_IMAGE_TAG", "undefined") FE_NAMESPACE: str = os.environ.get("FE_NAMESPACE", "undefined") From 51d9b352fb82abee3724a7807be57767efd9abd8 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 19 Feb 2024 18:01:21 +0100 Subject: [PATCH 16/47] build(deps): bump django from 3.2.20 to 3.2.24 (#535) Bumps [django](https://github.com/django/django) from 3.2.20 to 3.2.24. - [Commits](https://github.com/django/django/compare/3.2.20...3.2.24) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- build-requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/build-requirements.txt b/build-requirements.txt index 3a824035..22008213 100644 --- a/build-requirements.txt +++ b/build-requirements.txt @@ -8,7 +8,7 @@ pre-commit == 3.5.0 poetry == 1.7.1 # Matching main requirements... -Django==3.2.20 +Django==3.2.24 # Others httpie == 3.2.1 From 05ba0efeea40c425012254cff73d2657b49f5950 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 20 Feb 2024 09:31:14 +0000 Subject: [PATCH 17/47] fix: reverting wrong changes --- viewer/cset_upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index bb4e704f..9cbeed32 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -363,7 +363,7 @@ def set_mol( # Try to get the LHS SiteObservation, # This will be used to set the ComputedMolecule.site_observation_code. # This may fail. - lhs_property = 'ref_pdb' + lhs_property = 'lhs_pdb' lhs_so = self.get_site_observation( lhs_property, mol, From ad39996e8e16ad584b1cce40376362a6a469b010 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 20 Feb 2024 09:43:36 +0000 Subject: [PATCH 18/47] fix: reverting wrong changes (#538) --- viewer/cset_upload.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index bb4e704f..9cbeed32 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -363,7 +363,7 @@ def set_mol( # Try to get the LHS SiteObservation, # This will be used to set the ComputedMolecule.site_observation_code. # This may fail. - lhs_property = 'ref_pdb' + lhs_property = 'lhs_pdb' lhs_so = self.get_site_observation( lhs_property, mol, From 7521b7afb62d6eb7bb5f4d70730cd481d4202740 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 22 Feb 2024 12:14:22 +0000 Subject: [PATCH 19/47] stashing --- viewer/cset_upload.py | 205 +++++++++--------- .../migrations/0044_computedmolecule_pdb.py | 23 ++ viewer/migrations/0045_auto_20240221_1203.py | 34 +++ viewer/models.py | 13 ++ viewer/views.py | 22 +- 5 files changed, 187 insertions(+), 110 deletions(-) create mode 100644 viewer/migrations/0044_computedmolecule_pdb.py create mode 100644 viewer/migrations/0045_auto_20240221_1203.py diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index 9cbeed32..706b7832 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -2,9 +2,9 @@ import datetime import logging import os -import shutil import uuid import zipfile +from pathlib import Path from typing import Any, Dict, List, Optional, Tuple from openpyxl.utils import get_column_letter @@ -142,37 +142,45 @@ def __init__( self.zfile = zfile self.zfile_hashvals = zfile_hashvals - def process_pdb(self, pdb_code, target, zfile, zfile_hashvals) -> SiteObservation: + def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None: for key in zfile_hashvals.keys(): if key == pdb_code: pdb_code = f'{pdb_code}#{zfile_hashvals[pdb_code]}' - pdb_fp = zfile[pdb_code] - pdb_fn = zfile[pdb_code].split('/')[-1] + try: + pdb_fp = zfile[pdb_code] + except KeyError: + return None - new_filename = f'{settings.MEDIA_ROOT}pdbs/{pdb_fn}' - old_filename = settings.MEDIA_ROOT + pdb_fp - shutil.copy(old_filename, new_filename) + # ensure filename uniqueness + pdb_fn = '_'.join([zfile[pdb_code].split('/')[-1], uuid.uuid4().hex]) + pdb_field = Path(settings.COMPUTED_SET_MEDIA_DIRECTORY).joinpath(pdb_fn) - # Create Protein object - target_obj = Target.objects.get(title=target) - # prot.target_id = target_obj - site_obvs, created = SiteObservation.objects.get_or_create( - code=pdb_code, target_id=target_obj - ) - # prot.code = pdb_code - if created: - target_obj = Target.objects.get(title=target) - site_obvs.target_id = target_obj - site_obvs.pdb_info = f'pdbs/{pdb_fn}' - site_obvs.save() + new_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_field) + old_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_fp) + old_filename.rename(new_filename) - return site_obvs + return str(pdb_field) + + # # Create Protein object + # target_obj = Target.objects.get(title=target) + # # prot.target_id = target_obj + # site_obvs, created = SiteObservation.objects.get_or_create( + # code=pdb_code, target_id=target_obj + # ) + # # prot.code = pdb_code + # if created: + # target_obj = Target.objects.get(title=target) + # site_obvs.target_id = target_obj + # site_obvs.pdb_info = f'pdbs/{pdb_fn}' + # site_obvs.save() + + # return site_obvs # use zfile object for pdb files uploaded in zip def get_site_observation( self, property_name, mol, target, compound_set, zfile, zfile_hashvals - ) -> Optional[SiteObservation]: + ) -> SiteObservation | str | None: # Get a SiteObservation from the molecule using # a named property (i.e. lhs_pdb or ref_pdb for example) @@ -187,61 +195,69 @@ def get_site_observation( return None pdb_fn = mol.GetProp(property_name).split('/')[-1] - site_obvs = None if zfile: + # pdb archive uploaded. referenced pdb file may or may not be included pdb_code = pdb_fn.replace('.pdb', '') - site_obvs = self.process_pdb( + pdb_file = self.process_pdb( pdb_code=pdb_code, - target=target, zfile=zfile, zfile_hashvals=zfile_hashvals, ) - else: - name = pdb_fn - try: - site_obvs = SiteObservation.objects.get( - code__contains=name, - experiment__experiment_upload__target__title=target, + if pdb_file: + return pdb_file + else: + logger.info( + 'No protein pdb (%s) found in zipfile', + pdb_fn, ) - except SiteObservation.DoesNotExist: - # Initial SiteObservation lookup failed. - logger.warning( - 'Failed to get SiteObservation object (target=%s name=%s)', - compound_set.target.title, + + # pdb was not included, try to find the matching site observation + name = pdb_fn + site_obvs = None + try: + site_obvs = SiteObservation.objects.get( + code__contains=name, + experiment__experiment_upload__target__title=target, + ) + except SiteObservation.DoesNotExist: + # Initial SiteObservation lookup failed. + logger.warning( + 'Failed to get SiteObservation object (target=%s name=%s)', + compound_set.target.title, + name, + ) + # Try alternatives. + # If all else fails then the site_obvs will be 'None' + qs = SiteObservation.objects.filter( + code__contains=name, + experiment__experiment_upload__target__title=target, + ) + if qs.exists(): + logger.info( + 'Found SiteObservation containing name=%s qs=%s', name, + qs, ) - # Try alternatives. - # If all else fails then the site_obvs will be 'None' + else: + alt_name = name.split(':')[0].split('_')[0] qs = SiteObservation.objects.filter( - code__contains=name, + code__contains=alt_name, experiment__experiment_upload__target__title=target, ) if qs.exists(): logger.info( - 'Found SiteObservation containing name=%s qs=%s', - name, + 'Found SiteObservation containing alternative name=%s qs=%s', + alt_name, qs, ) - else: - alt_name = name.split(':')[0].split('_')[0] - qs = SiteObservation.objects.filter( - code__contains=alt_name, - experiment__experiment_upload__target__title=target, - ) - if qs.exists(): - logger.info( - 'Found SiteObservation containing alternative name=%s qs=%s', - alt_name, - qs, - ) - if qs.count() > 0: - logger.debug( - 'Found alternative (target=%s name=%s)', - compound_set.target.title, - name, - ) - site_obvs = qs[0] + if qs.count() > 0: + logger.debug( + 'Found alternative (target=%s name=%s)', + compound_set.target.title, + name, + ) + site_obvs = qs[0] if not site_obvs: logger.warning( @@ -360,31 +376,10 @@ def set_mol( insp_frags.append(ref) - # Try to get the LHS SiteObservation, - # This will be used to set the ComputedMolecule.site_observation_code. - # This may fail. - lhs_property = 'lhs_pdb' - lhs_so = self.get_site_observation( - lhs_property, - mol, - target, - compound_set, - zfile, - zfile_hashvals=zfile_hashvals, - ) - if not lhs_so: - logger.warning( - 'Failed to get a LHS SiteObservation (%s) for %s, %s, %s', - lhs_property, - mol, - target, - compound_set, - ) - - # Try to get the reference SiteObservation, - # This will be used to set the ComputedMolecule.reference_code. - # This may fail. ref_property = 'ref_pdb' + # data in ref ref_pdb field may be one of 2 things: + # - siteobservation's short code (code field) + # - pdb file in uploaded zipfile ref_so = self.get_site_observation( ref_property, mol, @@ -404,12 +399,12 @@ def set_mol( # A LHS or Reference protein must be provided. # (Part of "Fix behaviour of RHS [P] button - also RHS upload change", issue #1249) - if not lhs_so and not ref_so: - logger.error( - 'ComputedMolecule has no LHS (%s) or Reference (%s) property', - lhs_property, - ref_property, - ) + # if not lhs_so and not ref_so: + # logger.error( + # 'ComputedMolecule has no LHS (%s) or Reference (%s) property', + # lhs_property, + # ref_property, + # ) # Need a ComputedMolecule before saving. # Check if anything exists already... @@ -433,15 +428,27 @@ def set_mol( logger.info('Creating new ComputedMolecule') computed_molecule = ComputedMolecule() + if isinstance(ref_so, SiteObservation): + code = ref_so.code + pdb_info = ref_so.experiment.pdb_info + lhs_so = ref_so + else: + code = None + pdb_info = ref_so + lhs_so = None + assert computed_molecule computed_molecule.compound = compound computed_molecule.computed_set = compound_set computed_molecule.sdf_info = Chem.MolToMolBlock(mol) - computed_molecule.site_observation_code = lhs_so.code if lhs_so else None - computed_molecule.reference_code = ref_so.code if ref_so else None + computed_molecule.site_observation_code = code + computed_molecule.reference_code = code computed_molecule.molecule_name = molecule_name computed_molecule.name = f"{target}-{computed_molecule.identifier}" computed_molecule.smiles = smiles + computed_molecule.pdb = lhs_so + # TODO: this is wrong + computed_molecule.pdb_info = pdb_info # Extract possible reference URL and Rationale # URLs have to be valid URLs and rationals must contain more than one word ref_url: Optional[str] = ( @@ -591,6 +598,14 @@ def task(self) -> ComputedSet: assert settings.AUTHENTICATE_UPLOAD is False computed_set.save() + # check compound set folder exists. + cmp_set_folder = os.path.join( + settings.MEDIA_ROOT, settings.COMPUTED_SET_MEDIA_DIRECTORY + ) + if not os.path.isdir(cmp_set_folder): + logger.info('Making ComputedSet folder (%s)', cmp_set_folder) + os.mkdir(cmp_set_folder) + # Set descriptions in return for the Molecules. # This also sets the submitter and method URL properties of the computed set # while also saving it. @@ -611,14 +626,6 @@ def task(self) -> ComputedSet: self.zfile_hashvals, ) - # check compound set folder exists. - cmp_set_folder = os.path.join( - settings.MEDIA_ROOT, settings.COMPUTED_SET_MEDIA_DIRECTORY - ) - if not os.path.isdir(cmp_set_folder): - logger.info('Making ComputedSet folder (%s)', cmp_set_folder) - os.mkdir(cmp_set_folder) - # move and save the compound set new_filename = f'{settings.MEDIA_ROOT}{settings.COMPUTED_SET_MEDIA_DIRECTORY}/{computed_set.name}.sdf' os.rename(sdf_filename, new_filename) diff --git a/viewer/migrations/0044_computedmolecule_pdb.py b/viewer/migrations/0044_computedmolecule_pdb.py new file mode 100644 index 00000000..e5b20906 --- /dev/null +++ b/viewer/migrations/0044_computedmolecule_pdb.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.23 on 2024-02-20 15:09 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0043_experiment_prefix_tooltip'), + ] + + operations = [ + migrations.AddField( + model_name='computedmolecule', + name='pdb', + field=models.ForeignKey( + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name='pdb', + to='viewer.siteobservation', + ), + ), + ] diff --git a/viewer/migrations/0045_auto_20240221_1203.py b/viewer/migrations/0045_auto_20240221_1203.py new file mode 100644 index 00000000..53ba541d --- /dev/null +++ b/viewer/migrations/0045_auto_20240221_1203.py @@ -0,0 +1,34 @@ +# Generated by Django 3.2.23 on 2024-02-21 12:03 + +import django.db.models.deletion +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0044_computedmolecule_pdb'), + ] + + operations = [ + migrations.AddField( + model_name='computedmolecule', + name='pdb_info', + field=models.FileField( + help_text='Link to pdb file; user-uploaded pdb or pdb.experiment.pdb_info', + max_length=255, + null=True, + upload_to='computed_set_data/', + ), + ), + migrations.AlterField( + model_name='computedmolecule', + name='pdb', + field=models.ForeignKey( + help_text='SiteObservation object user referenced in upload (if given)', + null=True, + on_delete=django.db.models.deletion.PROTECT, + related_name='pdb', + to='viewer.siteobservation', + ), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index c2b8af72..e10c58c6 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -943,6 +943,19 @@ class ComputedMolecule(models.Model): blank=True, help_text="An optional rationale for this molecule", ) + pdb = models.ForeignKey( + SiteObservation, + related_name="pdb", + on_delete=models.PROTECT, + null=True, + help_text="SiteObservation object user referenced in upload (if given)", + ) + pdb_info = models.FileField( + upload_to="computed_set_data/", + null=True, + max_length=255, + help_text="Link to pdb file; user-uploaded pdb or pdb.experiment.pdb_info", + ) def __str__(self) -> str: return f"{self.smiles}" diff --git a/viewer/views.py b/viewer/views.py index b3e3562a..8da3c3d0 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -490,17 +490,17 @@ def post(self, request): tmp_pdb_file = None tmp_sdf_file = None if 'pdb_zip' in list(request.FILES.keys()): - # In the first stage (green release) of the XCA-based Fragalysis Stack - # we do not support PDB files. - request.session[ - _SESSION_ERROR - ] = 'This release does not support the inclusion of PDB file.' - logger.warning( - '- UploadCSet POST error_msg="%s"', request.session[_SESSION_ERROR] - ) - return redirect('viewer:upload_cset') - # pdb_file = request.FILES['pdb_zip'] - # tmp_pdb_file = save_tmp_file(pdb_file) + # # In the first stage (green release) of the XCA-based Fragalysis Stack + # # we do not support PDB files. + # request.session[ + # _SESSION_ERROR + # ] = 'This release does not support the inclusion of PDB file.' + # logger.warning( + # '- UploadCSet POST error_msg="%s"', request.session[_SESSION_ERROR] + # ) + # return redirect('viewer:upload_cset') + pdb_file = request.FILES['pdb_zip'] + tmp_pdb_file = save_tmp_file(pdb_file) if sdf_file: tmp_sdf_file = save_tmp_file(sdf_file) From d8d4e06ccc0b34bc15b22a2698537168577a87c8 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 22 Feb 2024 14:34:36 +0000 Subject: [PATCH 20/47] add site observation's ligand sdf to aligned_files --- viewer/download_structures.py | 97 +++++++++-------------------------- 1 file changed, 25 insertions(+), 72 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 59e7ceb9..1fd31bb0 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -20,7 +20,7 @@ import pandoc from django.conf import settings -from viewer.models import DownloadLinks +from viewer.models import DownloadLinks, SiteObservation from viewer.utils import clean_filename logger = logging.getLogger(__name__) @@ -55,6 +55,7 @@ class ArchiveFile: path: str archive_path: str + site_observation: SiteObservation | None = None # Dictionary containing all references needed to create the zip file @@ -81,62 +82,9 @@ class ArchiveFile: } -# A directory, relative to the media directory, -# where missing SD files are written. -# The SD files are constructed from the molecule 'sdf_info' field -# (essentially MOL-file text) when the 'sdf_file' field is blank. -_MISSING_SDF_DIRECTORY = 'missing_sdfs' -_MISSING_SDF_PATH = os.path.join(settings.MEDIA_ROOT, _MISSING_SDF_DIRECTORY) - _ERROR_FILE = 'errors.csv' -def _replace_missing_sdf(molecule, code): - """Creates a file in the 'missing SDFs' directory, using the protein code - provided. The file is constructed using the molecule's sdf_info field, skipping the - action if the file exists. The media-relative path of the written file is returned - (if it was written). - - Files, once written, are left and are not removed (or replaced). - The directory serves an archive of missing SD files. - - This was added for FE/915 to generate SD files for those that are missing - from the upload directory. - """ - if not os.path.isdir(_MISSING_SDF_PATH): - os.mkdir(_MISSING_SDF_PATH) - - # We shouldn't be called if molecule['sdf_info'] is blank. - # but check anyway. - sdf_info = molecule.ligand_mol_file - if not sdf_info: - return None - sdf_lines = sdf_info.splitlines(True)[1:] - if not sdf_lines: - return None - # Make sure last line ends with a new-line - if not sdf_lines[-1].endswith('\n'): - sdf_lines[-1] += '\n' - - # media-relative path to missing file... - missing_file = os.path.join(_MISSING_SDF_DIRECTORY, f'{code}.sdf') - # absolute path to missing file... - missing_path = os.path.join(settings.MEDIA_ROOT, missing_file) - # create the file if it doesn't exist... - if not os.path.isfile(missing_path): - # No file - create one. - with open(missing_path, 'w', encoding='utf-8') as sd_file: - # First line is the protein code, i.e. "PGN_RS02895PGA-x0346_0B" - sd_file.write(f'{code}\n') - # Now write the lines from the molecule sdf_info record - sd_file.writelines(sdf_lines) - # And append file terminator... - sd_file.write('$$$$\n') - - # Returns the media-relative path to the file in the missing file directory - return missing_file - - def _add_file_to_zip(ziparchive, param, filepath): """Add the requested file to the zip archive. @@ -256,6 +204,11 @@ def _add_file_to_zip_aligned(ziparchive, code, archive_file): # Copy the file without modification ziparchive.write(filepath, archive_file.archive_path) return True + elif archive_file.site_observation: + # NB! this bypasses _read_and_patch_molecule_name. problem? + ziparchive.writestr( + archive_file.archive_path, archive_file.site_observation.ligand_mol_file + ) else: logger.warning('filepath "%s" is not a file', filepath) _add_empty_file(ziparchive, archive_file.archive_path) @@ -319,9 +272,9 @@ def _molecule_files_zip(zip_contents, ziparchive, combined_sdf_file, error_file) logger.info( 'len(molecules.sd_files)=%s', len(zip_contents['molecules']['sdf_files']) ) - for file, prot in zip_contents['molecules']['sdf_files'].items(): + for archive_file, prot in zip_contents['molecules']['sdf_files'].items(): # Do not try and process any missing SD files. - if not file: + if not archive_file: error_file.write(f'sdf_files,{prot},missing\n') mol_errors += 1 continue @@ -329,16 +282,16 @@ def _molecule_files_zip(zip_contents, ziparchive, combined_sdf_file, error_file) if zip_contents['molecules'][ 'sdf_info' ] is True and not _add_file_to_zip_aligned( - ziparchive, prot.split(":")[0], file + ziparchive, prot.split(":")[0], archive_file ): - error_file.write(f'sdf_info,{prot},{file.path}\n') + error_file.write(f'sdf_info,{prot},{archive_file.path}\n') mol_errors += 1 # Append sdf file on the Molecule record to the combined_sdf_file. if zip_contents['molecules'][ 'single_sdf_file' - ] is True and not _add_file_to_sdf(combined_sdf_file, file): - error_file.write(f'single_sdf_file,{prot},{file.path}\n') + ] is True and not _add_file_to_sdf(combined_sdf_file, archive_file): + error_file.write(f'single_sdf_file,{prot},{archive_file.path}\n') mol_errors += 1 return mol_errors @@ -750,29 +703,29 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params): num_molecules_collected = 0 num_missing_sd_files = 0 for so in site_obvs: - rel_sd_file = None if so.ligand_mol_file: # There is an SD file (normal) # sdf info is now kept as text in db field - rel_sd_file = _replace_missing_sdf(so, so.code) - else: - # No file value (odd). - logger.warning( - "SiteObservation record's 'ligand_mol_file' isn't set (%s)", so + archive_path = str( + Path('aligned_files').joinpath(so.code).joinpath(f'{so.code}.sdf') ) - num_missing_sd_files += 1 - - if rel_sd_file: - logger.debug('rel_sd_file=%s code=%s', rel_sd_file, so.code) + # path is ignored when writing sdfs but mandatory field zip_contents['molecules']['sdf_files'].update( { ArchiveFile( - path=rel_sd_file, - archive_path=rel_sd_file, + path=archive_path, + archive_path=archive_path, + site_observation=so, ): so.code } ) num_molecules_collected += 1 + else: + # No file value (odd). + logger.warning( + "SiteObservation record's 'ligand_mol_file' isn't set (%s)", so + ) + num_missing_sd_files += 1 # Report (in the log) anomalies if num_molecules_collected == 0: From ad8cc40767e25b679181a40309cdaa3fc45a55f7 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 23 Feb 2024 13:50:06 +0000 Subject: [PATCH 21/47] fix: custom pdb now downloadable --- media_serve/urls.py | 3 +++ media_serve/views.py | 26 ++++++++++++++++++++++---- viewer/cset_upload.py | 25 +------------------------ 3 files changed, 26 insertions(+), 28 deletions(-) diff --git a/media_serve/urls.py b/media_serve/urls.py index 604b51ee..181b701a 100644 --- a/media_serve/urls.py +++ b/media_serve/urls.py @@ -11,5 +11,8 @@ re_path( r"^target_loader_data/(?P.+)", views.tld_download, name="get_tld" ), + re_path( + r"^computed_set_data/(?P.+)", views.cspdb_download, name="get_cspdb" + ), re_path(r"^pdbs/(?P.+)", views.file_download, name="get_file"), ] diff --git a/media_serve/views.py b/media_serve/views.py index 75923ce4..c44411fa 100644 --- a/media_serve/views.py +++ b/media_serve/views.py @@ -73,17 +73,35 @@ def tld_download(request, file_path): ispy_b_static.permission_string = ( "experiment__experiment_upload__target__project_id" ) - # ispy_b_static.field_name = "pdb_info" ispy_b_static.field_name = "apo_file" ispy_b_static.content_type = "application/x-pilot" - # ispy_b_static.prefix = "target_loader_data/48225dbf-204a-48e1-8ae7-f1632f4dba89/Mpro-v2/Mpro/upload_2/aligned_files/Mpro_Nterm-x0029/" - # ispy_b_static.prefix = "target_loader_data" - # ispy_b_static.prefix = "/target_loader_data/" ispy_b_static.prefix = "/target_loader_data/" ispy_b_static.input_string = file_path return ispy_b_static.get_response() +def cspdb_download(request, file_path): + """ + Download a protein by nginx redirect + :param request: the initial request + :param file_path: the file path we're getting from the static + :return: the response (a redirect to nginx internal) + """ + logger.info("+ Received cspdb_download file path: %s", file_path) + ispy_b_static = ISpyBSafeStaticFiles2() + ispy_b_static.model = SiteObservation + ispy_b_static.request = request + # the following 2 aren't used atm + ispy_b_static.permission_string = ( + "experiment__experiment_upload__target__project_id" + ) + ispy_b_static.field_name = "apo_file" + ispy_b_static.content_type = "application/x-pilot" + ispy_b_static.prefix = "/computed_set_data/" + ispy_b_static.input_string = file_path + return ispy_b_static.get_response() + + def bound_download(request, file_path): """ Download a protein by nginx redirect diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py index 706b7832..0ccf7555 100644 --- a/viewer/cset_upload.py +++ b/viewer/cset_upload.py @@ -159,24 +159,10 @@ def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None: new_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_field) old_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_fp) old_filename.rename(new_filename) + os.chmod(new_filename, 0o755) return str(pdb_field) - # # Create Protein object - # target_obj = Target.objects.get(title=target) - # # prot.target_id = target_obj - # site_obvs, created = SiteObservation.objects.get_or_create( - # code=pdb_code, target_id=target_obj - # ) - # # prot.code = pdb_code - # if created: - # target_obj = Target.objects.get(title=target) - # site_obvs.target_id = target_obj - # site_obvs.pdb_info = f'pdbs/{pdb_fn}' - # site_obvs.save() - - # return site_obvs - # use zfile object for pdb files uploaded in zip def get_site_observation( self, property_name, mol, target, compound_set, zfile, zfile_hashvals @@ -397,15 +383,6 @@ def set_mol( compound_set, ) - # A LHS or Reference protein must be provided. - # (Part of "Fix behaviour of RHS [P] button - also RHS upload change", issue #1249) - # if not lhs_so and not ref_so: - # logger.error( - # 'ComputedMolecule has no LHS (%s) or Reference (%s) property', - # lhs_property, - # ref_property, - # ) - # Need a ComputedMolecule before saving. # Check if anything exists already... existing_computed_molecules = ComputedMolecule.objects.filter( From 27d6ff5e9a19cb2b4c8d7f4989b8a22704b01562 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 27 Feb 2024 15:31:34 +0000 Subject: [PATCH 22/47] fix: increased loglevel to error on unexpected exceptions block --- viewer/target_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index edad072e..b91e64e9 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1957,7 +1957,7 @@ def load_target( # These are errors processing the data, which we handle gracefully. # The task should _always_ end successfully. # Any problem with the underlying data is transmitted in the report. - logger.debug(exc, exc_info=True) + logger.error(exc, exc_info=True) target_loader.report.final( f"Failed to process '{target_loader.data_bundle}'", success=False ) From c130a12e89f003b9894febb3f1e0de6213271f84 Mon Sep 17 00:00:00 2001 From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com> Date: Thu, 29 Feb 2024 11:31:47 +0100 Subject: [PATCH 23/47] fix: Discourse service check now checks API key before creating a service (#544) Co-authored-by: Alan Christie --- viewer/services.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/viewer/services.py b/viewer/services.py index 77417143..4e208f57 100644 --- a/viewer/services.py +++ b/viewer/services.py @@ -166,6 +166,9 @@ def discourse(func_id, name, key=None, url=None, user=None) -> bool: del func_id, name logger.debug("+ discourse") + # Discourse is "unconfigured" if there is no API key + if not settings.DISCOURSE_API_KEY: + return False client = DiscourseClient( os.environ.get(url, None), api_username=os.environ.get(user, None), @@ -206,6 +209,7 @@ def keycloak(func_id, name, url=None, secret=None) -> bool: del func_id, name, secret logger.debug("+ keycloak") + # Keycloak is "unconfigured" if there is no realm URL keycloak_realm = os.environ.get(url, None) if not keycloak_realm: return False From 4e8db0baa96ba62d3ce96eb247013483e3e14241 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Thu, 29 Feb 2024 11:40:30 +0100 Subject: [PATCH 24/47] build(deps): bump cryptography from 42.0.2 to 42.0.4 (#539) Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.2 to 42.0.4. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/42.0.2...42.0.4) --- updated-dependencies: - dependency-name: cryptography dependency-type: indirect ... Signed-off-by: dependabot[bot] Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> --- poetry.lock | 66 ++++++++++++++++++++++++++--------------------------- 1 file changed, 33 insertions(+), 33 deletions(-) diff --git a/poetry.lock b/poetry.lock index 0fcf5b82..306d4ce0 100644 --- a/poetry.lock +++ b/poetry.lock @@ -539,43 +539,43 @@ jinja2 = "*" [[package]] name = "cryptography" -version = "42.0.2" +version = "42.0.4" description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers." optional = false python-versions = ">=3.7" files = [ - {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be"}, - {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d"}, - {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4"}, - {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2"}, - {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529"}, - {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1"}, - {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1"}, - {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929"}, - {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9"}, - {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2"}, - {file = "cryptography-42.0.2-cp37-abi3-win32.whl", hash = "sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee"}, - {file = "cryptography-42.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee"}, - {file = "cryptography-42.0.2-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242"}, - {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a"}, - {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446"}, - {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90"}, - {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3"}, - {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589"}, - {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a"}, - {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea"}, - {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33"}, - {file = "cryptography-42.0.2-cp39-abi3-win32.whl", hash = "sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635"}, - {file = "cryptography-42.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6"}, - {file = "cryptography-42.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380"}, - {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6"}, - {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2"}, - {file = "cryptography-42.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f"}, - {file = "cryptography-42.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008"}, - {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12"}, - {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a"}, - {file = "cryptography-42.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65"}, - {file = "cryptography-42.0.2.tar.gz", hash = "sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888"}, + {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449"}, + {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18"}, + {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2"}, + {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1"}, + {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b"}, + {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1"}, + {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992"}, + {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885"}, + {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824"}, + {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b"}, + {file = "cryptography-42.0.4-cp37-abi3-win32.whl", hash = "sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925"}, + {file = "cryptography-42.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923"}, + {file = "cryptography-42.0.4-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7"}, + {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52"}, + {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a"}, + {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9"}, + {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764"}, + {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff"}, + {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257"}, + {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929"}, + {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0"}, + {file = "cryptography-42.0.4-cp39-abi3-win32.whl", hash = "sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129"}, + {file = "cryptography-42.0.4-cp39-abi3-win_amd64.whl", hash = "sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854"}, + {file = "cryptography-42.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298"}, + {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88"}, + {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20"}, + {file = "cryptography-42.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce"}, + {file = "cryptography-42.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74"}, + {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd"}, + {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b"}, + {file = "cryptography-42.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660"}, + {file = "cryptography-42.0.4.tar.gz", hash = "sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb"}, ] [package.dependencies] From 09c3c085df39a57f2981d6bade0e65d5a8a10b4c Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 29 Feb 2024 15:41:25 +0000 Subject: [PATCH 25/47] metadata.csv populated Started working on issue 1355 as well, it's too tightly coupled. Some work remaining re that: - when tag created in UI, make sure upload_name attribute is populated --- viewer/download_structures.py | 140 ++++++++++++++++--- viewer/migrations/0046_auto_20240228_1651.py | 38 +++++ viewer/models.py | 8 +- viewer/serializers.py | 4 + viewer/target_loader.py | 3 +- viewer/target_set_upload.py | 3 +- 6 files changed, 172 insertions(+), 24 deletions(-) create mode 100644 viewer/migrations/0046_auto_20240228_1651.py diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 1fd31bb0..2ada27fe 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -8,19 +8,27 @@ import json import logging import os +import re import shutil import uuid import zipfile from dataclasses import dataclass from datetime import datetime, timedelta, timezone -from io import BytesIO +from io import BytesIO, StringIO from pathlib import Path from typing import Any, Dict import pandoc from django.conf import settings - -from viewer.models import DownloadLinks, SiteObservation +from django.db.models import Exists, OuterRef, Subquery + +from viewer.models import ( + DownloadLinks, + SiteObservation, + SiteObservationTag, + SiteObvsSiteObservationTag, + TagCategory, +) from viewer.utils import clean_filename logger = logging.getLogger(__name__) @@ -50,6 +58,43 @@ 'readme': (''), } +TAG_CATEGORIES = ( + 'ConformerSites', + 'CanonSites', + 'CrystalformSites', + 'Quatassemblies', + 'Crystalforms', +) +CURATED_TAG_CATEGORIES = ('Series', 'Forum', 'Other') + + +class TagSubquery(Subquery): + """Annotate SiteObservation with tag of given category""" + + def __init__(self, category): + query = SiteObservationTag.objects.filter( + pk=Subquery( + SiteObvsSiteObservationTag.objects.filter( + site_observation=OuterRef('pk'), + site_obvs_tag__category=TagCategory.objects.get( + category=category, + ), + ).values('site_obvs_tag')[:1] + ) + ).values('tag')[0:1] + super().__init__(query) + + +class CuratedTagSubquery(Exists): + """Annotate SiteObservation with tag of given category""" + + def __init__(self, tag): + query = SiteObvsSiteObservationTag.objects.filter( + site_observation=OuterRef('pk'), + site_obvs_tag=tag, + ) + super().__init__(query) + @dataclass(frozen=True) class ArchiveFile: @@ -84,6 +129,9 @@ class ArchiveFile: _ERROR_FILE = 'errors.csv' +# unlike v1, metadata doesn't exist anymore, needs compiling +_METADATA_FILE = 'metadata.csv' + def _add_file_to_zip(ziparchive, param, filepath): """Add the requested file to the zip archive. @@ -350,6 +398,58 @@ def _trans_matrix_files_zip(ziparchive, target): _add_empty_file(ziparchive, archive_path) +def _metadate_file_zip(ziparchive, target): + """Compile and add metadata file to archive.""" + logger.info('+ Processing metadata') + + annotations = {} + values = ['code', 'longcode', 'cmpd__compound_code', 'smiles'] + header = ['Code', 'Long code', 'Compound code', 'Smiles'] + + for category in TagCategory.objects.filter(category__in=TAG_CATEGORIES): + tag = f'tag_{category.category.lower()}' + values.append(tag) + header.append(category.category) + annotations[tag] = TagSubquery(category.category) + + pattern = re.compile(r'\W+') + for tag in SiteObservationTag.objects.filter( + category__in=TagCategory.objects.filter(category__in=CURATED_TAG_CATEGORIES), + target=target, + ): + # for reasons unknown, mypy thinks tag is a string + tagname = f'tag_{pattern.sub("_", tag.tag).strip().lower()}' # type: ignore[attr-defined] + values.append(tagname) + header.append(f'[{tag.category}] {tag.tag}') # type: ignore[attr-defined] + annotations[tagname] = CuratedTagSubquery(tag) + + # fmt: off + qs = SiteObservation.filter_manager.by_target( + target=target, + ).prefetch_related( + 'cmpd', + 'siteobservationtags', + ).annotate(**annotations).values_list(*values) + # fmt: on + + buff = StringIO() + buff.write(','.join(header)) + buff.write('\n') + for so_values in qs: + buff.write( + ','.join( + [ + str(k) if k else 'False' if isinstance(k, bool) else '' + for k in so_values + ] + ) + ) + buff.write('\n') + + ziparchive.writestr(_METADATA_FILE, buff.getvalue()) + logger.info('+ Processing metadata') + + def _extra_files_zip(ziparchive, target): """If an extra info folder exists at the target root level, then copy the contents to the output file as is. @@ -514,7 +614,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host error_filename = os.path.join(download_path, _ERROR_FILE) error_file = open(error_filename, "w", encoding="utf-8") - error_file.write("Param,Code,Invalid file reference\n") + error_file.write("Param,Code,File not found when assembling download\n") errors = 0 # If a single sdf file is also wanted then create file to @@ -560,14 +660,17 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host _smiles_files_zip(zip_contents, ziparchive, download_path) # Add the metadata file from the target - if zip_contents['metadata_info'] and not _add_file_to_zip( - ziparchive, 'metadata_info', zip_contents['metadata_info'] - ): - error_file.write( - f"metadata_info,{target},{zip_contents['metadata_info']}\n" - ) - errors += 1 - logger.warning('After _add_file_to_zip() errors=%s', errors) + # if zip_contents['metadata_info'] and not _add_file_to_zip( + # ziparchive, 'metadata_info', zip_contents['metadata_info'] + # ): + # error_file.write( + # f"metadata_info,{target},{zip_contents['metadata_info']}\n" + # ) + # errors += 1 + # logger.warning('After _add_file_to_zip() errors=%s', errors) + + if zip_contents['metadata_info']: + _metadate_file_zip(ziparchive, target) if zip_contents['trans_matrix_info']: _trans_matrix_files_zip(ziparchive, target) @@ -599,7 +702,7 @@ def _protein_garbage_filter(proteins): return proteins.exclude(code__startswith=r'references_') -def _create_structures_dict(target, site_obvs, protein_params, other_params): +def _create_structures_dict(site_obvs, protein_params, other_params): """Write a ZIP file containing data from an input dictionary Args: @@ -746,13 +849,10 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params): for molecule in site_obvs: zip_contents['molecules']['smiles_info'].update({molecule.smiles: None}) - # Add the metadata file from the target - if other_params['metadata_info'] is True: - zip_contents['metadata_info'] = target.metadata.name + zip_contents['metadata_info'] = other_params['metadata_info'] - # Add the metadata file from the target - if other_params['trans_matrix_info'] is True: - zip_contents['trans_matrix_info'] = True + # Add the trans matrix files + zip_contents['trans_matrix_info'] = other_params['trans_matrix_info'] return zip_contents @@ -900,7 +1000,7 @@ def create_or_return_download_link(request, target, site_observations): logger.info('Creating new download (file_url=%s)...', file_url) zip_contents = _create_structures_dict( - target, site_observations, protein_params, other_params + site_observations, protein_params, other_params ) _create_structures_zip(target, zip_contents, file_url, original_search, host) diff --git a/viewer/migrations/0046_auto_20240228_1651.py b/viewer/migrations/0046_auto_20240228_1651.py new file mode 100644 index 00000000..213834cf --- /dev/null +++ b/viewer/migrations/0046_auto_20240228_1651.py @@ -0,0 +1,38 @@ +# Generated by Django 3.2.23 on 2024-02-28 16:51 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0045_auto_20240221_1203'), + ] + + operations = [ + migrations.AddField( + model_name='sessionprojecttag', + name='upload_name', + field=models.CharField( + default='', help_text='The generated name of the tag', max_length=200 + ), + preserve_default=False, + ), + migrations.AddField( + model_name='siteobservationtag', + name='upload_name', + field=models.CharField( + default='default', + help_text='The generated name of the tag', + max_length=200, + ), + preserve_default=False, + ), + migrations.AlterUniqueTogether( + name='sessionprojecttag', + unique_together={('upload_name', 'target')}, + ), + migrations.AlterUniqueTogether( + name='siteobservationtag', + unique_together={('upload_name', 'target')}, + ), + ] diff --git a/viewer/models.py b/viewer/models.py index e10c58c6..fc430d5f 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -1177,6 +1177,9 @@ class Meta: class Tag(models.Model): tag = models.CharField(max_length=200, help_text="The (unique) name of the tag") + upload_name = models.CharField( + max_length=200, help_text="The generated name of the tag" + ) category = models.ForeignKey(TagCategory, on_delete=models.CASCADE) target = models.ForeignKey(Target, on_delete=models.CASCADE) user = models.ForeignKey(User, null=True, on_delete=models.CASCADE) @@ -1196,9 +1199,10 @@ def __str__(self) -> str: return f"{self.tag}" def __repr__(self) -> str: - return "" % ( + return "" % ( self.id, self.tag, + self.upload_name, self.category, self.target, self.user, @@ -1207,7 +1211,7 @@ def __repr__(self) -> str: class Meta: abstract = True unique_together = ( - 'tag', + 'upload_name', 'target', ) diff --git a/viewer/serializers.py b/viewer/serializers.py index 8694419f..327631c1 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -713,6 +713,10 @@ class SiteObservationTagSerializer(serializers.ModelSerializer): class Meta: model = models.SiteObservationTag fields = '__all__' + extra_kwargs = { + "id": {"read_only": True}, + "upload_name": {"read_only": True}, + } class SessionProjectTagSerializer(serializers.ModelSerializer): diff --git a/viewer/target_loader.py b/viewer/target_loader.py index b91e64e9..11df2b51 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1859,7 +1859,7 @@ def _tag_observations(self, tag, category, so_list): so_group.save() try: - so_tag = SiteObservationTag.objects.get(tag=tag, target=self.target) + so_tag = SiteObservationTag.objects.get(upload_name=tag, target=self.target) # Tag already exists # Apart from the new mol_group and molecules, we shouldn't be # changing anything. @@ -1867,6 +1867,7 @@ def _tag_observations(self, tag, category, so_list): except SiteObservationTag.DoesNotExist: so_tag = SiteObservationTag() so_tag.tag = tag + so_tag.upload_name = tag so_tag.category = TagCategory.objects.get(category=category) so_tag.target = self.target so_tag.mol_group = so_group diff --git a/viewer/target_set_upload.py b/viewer/target_set_upload.py index c46a7531..bc4d9cc1 100644 --- a/viewer/target_set_upload.py +++ b/viewer/target_set_upload.py @@ -637,7 +637,7 @@ def specifc_site(rd_mols, site_observations, target, site_description=None): try: site_obvs_tag = SiteObservationTag.objects.get( - tag=site_description, target_id=target.id + upload_name=site_description, target_id=target.id ) except SiteObservationTag.DoesNotExist: site_obvs_tag = None @@ -646,6 +646,7 @@ def specifc_site(rd_mols, site_observations, target, site_description=None): # New site/tag or the tag has been deleted site_obvs_tag = SiteObservationTag() site_obvs_tag.tag = site_description + site_obvs_tag.upload_name = site_description site_obvs_tag.category = TagCategory.objects.get(category='Sites') site_obvs_tag.target = target site_obvs_tag.mol_group = site_obvs_group From 0aa1f7da2f7d2998c09e4569f57a2ae223971bb0 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 1 Mar 2024 09:24:51 +0000 Subject: [PATCH 26/47] upload_name automatically pouplated when creating tags in UI Only populated on creation, updates won't touch it --- viewer/serializers.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/viewer/serializers.py b/viewer/serializers.py index 327631c1..d33b1b4d 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -710,6 +710,11 @@ class SiteObservationTagSerializer(serializers.ModelSerializer): many=True, queryset=models.SiteObservation.objects.all() ) + def create(self, validated_data): + # populate 'upload_name' field at object creation + validated_data['upload_name'] = validated_data['tag'] + return super().create(validated_data) + class Meta: model = models.SiteObservationTag fields = '__all__' From bc7fcd196e554eddace688994249768b62027be8 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 1 Mar 2024 13:03:26 +0000 Subject: [PATCH 27/47] changes to api/download_structures - apo_file, bound_file, sdf_info and smiles_info merged into all_aligned_structures - added pdb_info field NB! download_structures was requred to provide ligand_pdb as well. This wasn't tracked previously, so I added field to SiteObservation model. Meaning there's a migration and on stack deployment data needs to be wiped and reuploaded --- viewer/download_structures.py | 124 +++++++++---------- viewer/migrations/0047_auto_20240301_1243.py | 24 ++++ viewer/models.py | 3 + viewer/serializers.py | 12 +- viewer/target_loader.py | 3 + viewer/views.py | 12 +- 6 files changed, 97 insertions(+), 81 deletions(-) create mode 100644 viewer/migrations/0047_auto_20240301_1243.py diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 2ada27fe..9d351b68 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -31,6 +31,8 @@ ) from viewer.utils import clean_filename +from .serializers import DownloadStructuresSerializer + logger = logging.getLogger(__name__) # Length of time to keep records of dynamic links. @@ -41,18 +43,23 @@ # the protein code subdirectory of the aligned directory # (as for the target upload). _ZIP_FILEPATHS = { - 'apo_file': ('aligned'), - 'bound_file': ('aligned'), - 'cif_info': ('aligned'), - 'mtz_info': ('aligned'), - 'map_info': ('aligned'), - 'sigmaa_file': ('aligned'), - 'diff_file': ('aligned'), - 'event_file': ('aligned'), - 'sdf_info': ('aligned'), + 'apo_file': ('aligned'), # SiteObservation: apo_file + 'apo_solv_file': ('aligned'), # SiteObservation: apo_solv_file + 'apo_desolv_file': ('aligned'), # SiteObservation: apo_desolv_file + 'bound_file': ('aligned'), # SiteObservation: bound_file + 'sdf_info': ('aligned'), # SiteObservation: ligand_mol_file (indirectly) + 'ligand_pdb': ('aligned'), # SiteObservation: ligand_pdb + 'smiles_info': (''), # SiteObservation: smiles_info (indirectly) + # those above are all controlled by serializer's all_aligned_structures flag + 'sigmaa_file': ('aligned'), # SiteObservation: sigmaa_file + 'diff_file': ('aligned'), # SiteObservation: diff_file + 'event_file': ('aligned'), # SiteObservation: ligand_pdb + 'pdb_info': ('aligned'), # Experiment: cif_info + 'cif_info': ('aligned'), # Experiment: cif_info + 'mtz_info': ('aligned'), # Experiment: mtz_info + 'map_info': ('aligned'), # Experiment: map_info (multiple files) 'single_sdf_file': (''), 'metadata_info': (''), - 'smiles_info': (''), 'trans_matrix_info': (''), 'extra_files': ('extra_files'), 'readme': (''), @@ -107,14 +114,18 @@ class ArchiveFile: # NB you may need to add a version number to this at some point... zip_template = { 'proteins': { - 'apo_file': {}, # from experiment - 'bound_file': {}, # x - 'cif_info': {}, # from experiment - 'mtz_info': {}, # from experiment - 'map_info': {}, # from experiment - 'event_file': {}, # x + 'apo_file': {}, + 'apo_solv_file': {}, + 'apo_desolv_file': {}, + 'bound_file': {}, + 'pdb_info': {}, + 'cif_info': {}, + 'mtz_info': {}, + 'map_info': {}, + 'event_file': {}, 'diff_file': {}, 'sigmaa_file': {}, + 'ligand_pdb': {}, }, 'molecules': { 'sdf_files': {}, @@ -755,13 +766,14 @@ def _create_structures_dict(site_obvs, protein_params, other_params): elif param in [ 'bound_file', + 'apo_file', 'apo_solv_file', 'apo_desolv_file', - 'apo_file', 'sigmaa_file', 'event_file', 'artefacts_file', 'pdb_header_file', + 'ligand_pdb', 'diff_file', ]: # siteobservation object @@ -866,55 +878,35 @@ def get_download_params(request): Returns: protein_params, other_params """ - protein_param_flags = [ - 'apo_file', - 'bound_file', - 'cif_info', - 'mtz_info', - 'map_info', - 'event_file', - 'sigmaa_file', - 'diff_file', - ] - - other_param_flags = [ - 'sdf_info', - 'single_sdf_file', - 'metadata_info', - 'smiles_info', - 'trans_matrix_info', - ] - - # protein_params = {'pdb_info': request.data['pdb_info'], - # 'bound_info': request.data['bound_info'], - # 'cif_info': request.data['cif_info'], - # 'mtz_info': request.data['mtz_info'], - # 'diff_info': request.data['diff_info'], - # 'event_info': request.data['event_info'], - # 'sigmaa_info': request.data['sigmaa_info'], - # 'trans_matrix_info': - # request.data['trans_matrix_info']} - protein_params = {} - for param in protein_param_flags: - protein_params[param] = False - if param in request.data and request.data[param] in [True, 'true']: - protein_params[param] = True - - # other_params = {'sdf_info': request.data['sdf_info'], - # 'single_sdf_file': request.data['single_sdf_file'], - # 'metadata_info': request.data['metadata_info'], - # 'smiles_info': request.data['smiles_info']} - other_params = {} - for param in other_param_flags: - other_params[param] = False - if param in request.data and request.data[param] in [True, 'true']: - other_params[param] = True - - static_link = False - if 'static_link' in request.data and ( - request.data['static_link'] is True or request.data['static_link'] == 'true' - ): - static_link = True + + serializer = DownloadStructuresSerializer(data=request.data) + serializer.is_valid() + logger.debug('serializer data: %s', serializer.validated_data) + + protein_params = { + 'pdb_info': serializer.validated_data['pdb_info'], + 'apo_file': serializer.validated_data['all_aligned_structures'], + 'bound_file': serializer.validated_data['all_aligned_structures'], + 'apo_solv_file': serializer.validated_data['all_aligned_structures'], + 'apo_desolv_file': serializer.validated_data['all_aligned_structures'], + 'ligand_pdb': serializer.validated_data['all_aligned_structures'], + 'cif_info': serializer.validated_data['cif_info'], + 'mtz_info': serializer.validated_data['mtz_info'], + 'map_info': serializer.validated_data['map_info'], + 'event_file': serializer.validated_data['event_file'], + 'sigmaa_file': serializer.validated_data['sigmaa_file'], + 'diff_file': serializer.validated_data['diff_file'], + } + + other_params = { + 'sdf_info': serializer.validated_data['all_aligned_structures'], + 'single_sdf_file': serializer.validated_data['single_sdf_file'], + 'metadata_info': serializer.validated_data['metadata_info'], + 'smiles_info': serializer.validated_data['all_aligned_structures'], + 'trans_matrix_info': serializer.validated_data['trans_matrix_info'], + } + + static_link = serializer.validated_data['static_link'] return protein_params, other_params, static_link diff --git a/viewer/migrations/0047_auto_20240301_1243.py b/viewer/migrations/0047_auto_20240301_1243.py new file mode 100644 index 00000000..e153c066 --- /dev/null +++ b/viewer/migrations/0047_auto_20240301_1243.py @@ -0,0 +1,24 @@ +# Generated by Django 3.2.23 on 2024-03-01 12:43 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0046_auto_20240228_1651'), + ] + + operations = [ + migrations.AddField( + model_name='historicalsiteobservation', + name='ligand_pdb', + field=models.TextField(max_length=255, null=True), + ), + migrations.AddField( + model_name='siteobservation', + name='ligand_pdb', + field=models.FileField( + max_length=255, null=True, upload_to='target_loader_data/' + ), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index fc430d5f..2fe21e90 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -465,6 +465,9 @@ class SiteObservation(models.Model): seq_id = models.IntegerField() chain_id = models.CharField(max_length=1) ligand_mol_file = models.TextField(null=True) + ligand_pdb = models.FileField( + upload_to="target_loader_data/", null=True, max_length=255 + ) objects = models.Manager() history = HistoricalRecords() diff --git a/viewer/serializers.py b/viewer/serializers.py index d33b1b4d..b601ae04 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -832,22 +832,20 @@ class Meta: class DownloadStructuresSerializer(serializers.Serializer): - target_name = serializers.CharField(max_length=200) - proteins = serializers.CharField(max_length=5000) - apo_file = serializers.BooleanField(default=False) - bound_file = serializers.BooleanField(default=False) + target_name = serializers.CharField(max_length=200, default=None) + proteins = serializers.CharField(max_length=5000, default=None) + all_aligned_structures = serializers.BooleanField(default=False) + pdb_info = serializers.BooleanField(default=False) cif_info = serializers.BooleanField(default=False) mtz_info = serializers.BooleanField(default=False) diff_file = serializers.BooleanField(default=False) event_file = serializers.BooleanField(default=False) sigmaa_file = serializers.BooleanField(default=False) map_info = serializers.BooleanField(default=False) - sdf_info = serializers.BooleanField(default=False) single_sdf_file = serializers.BooleanField(default=False) metadata_info = serializers.BooleanField(default=False) - smiles_info = serializers.BooleanField(default=False) static_link = serializers.BooleanField(default=False) - file_url = serializers.CharField(max_length=200) + file_url = serializers.CharField(max_length=200, default=None) trans_matrix_info = serializers.BooleanField(default=False) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 11df2b51..269f1b45 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1324,6 +1324,7 @@ def process_site_observation( sigmaa_file, diff_file, event_file, + ligand_pdb, ) = self.validate_files( obj_identifier=experiment_id, file_struct=data, @@ -1339,6 +1340,7 @@ def process_site_observation( "sigmaa_map", # NB! keys in meta_aligner not yet updated "diff_map", # NB! keys in meta_aligner not yet updated "event_map", + "ligand_pdb", ), validate_files=validate_files, ) @@ -1375,6 +1377,7 @@ def process_site_observation( "diff_file": str(self._get_final_path(diff_file)), "event_file": str(self._get_final_path(event_file)), "artefacts_file": str(self._get_final_path(artefacts_file)), + "ligand_pdb": str(self._get_final_path(ligand_pdb)), "pdb_header_file": "currently missing", "ligand_mol_file": mol_data, } diff --git a/viewer/views.py b/viewer/views.py index 8da3c3d0..56f4c43e 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -1462,14 +1462,10 @@ def create(self, request): target = None logger.info('Given target_name "%s"', target_name) - # Check target_name is valid - # (it should natch the title of an existing target) - for targ in self.queryset: - if targ.title == target_name: - target = targ - break - - if not target: + # Check target_name is valid: + try: + target = self.queryset.get(title=target_name) + except models.Target.DoesNotExist: msg = f'Either the Target "{target_name}" is not present or you are not permitted access it' logger.warning(msg) content = {'message': msg} From 5f2320a42c3c3ed1ef8ef0fd4618f5b88ff30c83 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 1 Mar 2024 14:43:20 +0000 Subject: [PATCH 28/47] don't download neighbourhoods.yaml unless trans_matrix_info is checked --- viewer/download_structures.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 9d351b68..13f25466 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -510,7 +510,7 @@ def _extra_files_zip(ziparchive, target): logger.info('Processed %s extra files', num_processed) -def _yaml_files_zip(ziparchive, target): +def _yaml_files_zip(ziparchive, target, transforms_requested: bool = False) -> None: """Add all yaml files (except transforms) from upload to ziparchive""" for experiment_upload in target.experimentupload_set.order_by('commit_datetime'): @@ -547,6 +547,9 @@ def _yaml_files_zip(ziparchive, target): for file in yaml_files: logger.info('Adding yaml file "%s"...', file) + if not transforms_requested and file.name == 'neighbourhoods.yaml': + # don't add this file if transforms are not requested + continue ziparchive.write(file, str(Path(archive_path).joinpath(file.name))) @@ -688,7 +691,9 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host _extra_files_zip(ziparchive, target) - _yaml_files_zip(ziparchive, target) + _yaml_files_zip( + ziparchive, target, transforms_requested=zip_contents['trans_matrix_info'] + ) _document_file_zip(ziparchive, download_path, original_search, host) From 5f268d752cf3633539ccdb6006eba7b127032164 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 1 Mar 2024 16:36:48 +0000 Subject: [PATCH 29/47] fixed error handling (errors.csv) and not returning combined sdf --- viewer/download_structures.py | 98 +++++++++++++---------------------- 1 file changed, 37 insertions(+), 61 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 13f25466..8286ee83 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -144,36 +144,6 @@ class ArchiveFile: _METADATA_FILE = 'metadata.csv' -def _add_file_to_zip(ziparchive, param, filepath): - """Add the requested file to the zip archive. - - Args: - ziparchive: Handle of zip archive - param: parameter of filelist - filepath: filepath from record - - Returns: - [boolean]: [True of record added] - """ - logger.debug('+_add_file_to_zip: %s, %s', param, filepath) - if not filepath: - # Odd - assume success - logger.error('No filepath value') - return True - - fullpath = os.path.join(settings.MEDIA_ROOT, filepath) - cleaned_filename = clean_filename(filepath) - archive_path = os.path.join(_ZIP_FILEPATHS[param], cleaned_filename) - if os.path.isfile(fullpath): - ziparchive.write(fullpath, archive_path) - return True - else: - logger.warning('filepath "%s" is not a file', filepath) - _add_empty_file(ziparchive, archive_path) - - return False - - def _is_mol_or_sdf(path): """Returns True if the file and path look like a MOL or SDF file. It does this by simply checking the file's extension. @@ -231,6 +201,27 @@ def _read_and_patch_molecule_name(path, molecule_name=None): return content +def _patch_molecule_name(site_observation): + """Patch the MOL or SDF file with molecule name. + + Processes the content of ligand_mol attribute of the + site_observation object. Returns the content as string. + + Alternative to _read_and_patch_molecule_name function above + which operates on files. As ligand_mol is now stored as text, + slightly different approach was necessary. + + """ + logger.debug('Patching MOL/SDF of "%s"', site_observation) + + # Now read the file, checking the first line + # and setting it to the molecule name if it's blank. + lines = site_observation.ligand_mol_file.split('\n') + if not lines[0].strip(): + lines[0] = site_observation.long_code + return '\n'.join(lines) + + def _add_file_to_zip_aligned(ziparchive, code, archive_file): """Add the requested file to the zip archive. @@ -264,10 +255,11 @@ def _add_file_to_zip_aligned(ziparchive, code, archive_file): ziparchive.write(filepath, archive_file.archive_path) return True elif archive_file.site_observation: - # NB! this bypasses _read_and_patch_molecule_name. problem? ziparchive.writestr( - archive_file.archive_path, archive_file.site_observation.ligand_mol_file + archive_file.archive_path, + _patch_molecule_name(archive_file.site_observation), ) + return True else: logger.warning('filepath "%s" is not a file', filepath) _add_empty_file(ziparchive, archive_file.archive_path) @@ -285,17 +277,14 @@ def _add_file_to_sdf(combined_sdf_file, archive_file): Returns: [boolean]: [True of record added] """ - media_root = settings.MEDIA_ROOT - if not archive_file.path: # Odd - assume success logger.error('No filepath value') return True - fullpath = os.path.join(media_root, archive_file.path) - if os.path.isfile(fullpath): + if archive_file.path and archive_file.path != 'None': with open(combined_sdf_file, 'a', encoding='utf-8') as f_out: - patched_sdf_content = _read_and_patch_molecule_name(fullpath) + patched_sdf_content = _patch_molecule_name(archive_file.site_observation) f_out.write(patched_sdf_content) return True else: @@ -315,8 +304,9 @@ def _protein_files_zip(zip_contents, ziparchive, error_file): for prot, prot_file in files.items(): for f in prot_file: + # memo to self: f is ArchiveFile object if not _add_file_to_zip_aligned(ziparchive, prot, f): - error_file.write(f'{param},{prot},{f}\n') + error_file.write(f'{param},{prot},{f.archive_path}\n') prot_errors += 1 return prot_errors @@ -673,16 +663,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host if zip_contents['molecules']['smiles_info']: _smiles_files_zip(zip_contents, ziparchive, download_path) - # Add the metadata file from the target - # if zip_contents['metadata_info'] and not _add_file_to_zip( - # ziparchive, 'metadata_info', zip_contents['metadata_info'] - # ): - # error_file.write( - # f"metadata_info,{target},{zip_contents['metadata_info']}\n" - # ) - # errors += 1 - # logger.warning('After _add_file_to_zip() errors=%s', errors) - + # compile and add metadata.csv if zip_contents['metadata_info']: _metadate_file_zip(ziparchive, target) @@ -755,18 +736,17 @@ def _create_structures_dict(site_obvs, protein_params, other_params): afile = [] for f in model_attr: # here the model_attr is already stringified + apath = Path('crystallographic_files').joinpath(so.code) if model_attr and model_attr != 'None': archive_path = str( - Path('crystallographic_files') - .joinpath(so.code) - .joinpath( + apath.joinpath( Path(f) .parts[-1] .replace(so.experiment.code, so.code) ) ) else: - archive_path = param + archive_path = str(apath.joinpath(param)) afile.append(ArchiveFile(path=f, archive_path=archive_path)) elif param in [ @@ -787,18 +767,17 @@ def _create_structures_dict(site_obvs, protein_params, other_params): logger.debug( 'Adding param to zip: %s, value: %s', param, model_attr ) + apath = Path('aligned_files').joinpath(so.code) if model_attr and model_attr != 'None': archive_path = str( - Path('aligned_files') - .joinpath(so.code) - .joinpath( + apath.joinpath( Path(model_attr.name) .parts[-1] .replace(so.longcode, so.code) ) ) else: - archive_path = param + archive_path = str(apath.joinpath(param)) afile = [ ArchiveFile( @@ -812,11 +791,8 @@ def _create_structures_dict(site_obvs, protein_params, other_params): zip_contents['proteins'][param][so.code] = afile - if other_params['single_sdf_file'] is True: - zip_contents['molecules']['single_sdf_file'] = True - - if other_params['sdf_info'] is True: - zip_contents['molecules']['sdf_info'] = True + zip_contents['molecules']['single_sdf_file'] = other_params['single_sdf_file'] + zip_contents['molecules']['sdf_info'] = other_params['sdf_info'] # sdf information is held as a file on the Molecule record. if other_params['sdf_info'] or other_params['single_sdf_file']: From 22f9641d4bca0afdac15d900797686ff6a885375 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Mon, 4 Mar 2024 12:16:03 +0000 Subject: [PATCH 30/47] fix: Added parsing directives to DownloadStructuresserializer --- viewer/download_structures.py | 9 ++++++--- viewer/serializers.py | 6 +++--- viewer/views.py | 1 + 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 8286ee83..5036e018 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -242,7 +242,8 @@ def _add_file_to_zip_aligned(ziparchive, code, archive_file): logger.error('No filepath value') return True - filepath = str(Path(settings.MEDIA_ROOT).joinpath(archive_file.path)) + # calling str on archive_file.path because could be None + filepath = str(Path(settings.MEDIA_ROOT).joinpath(str(archive_file.path))) if Path(filepath).is_file(): if _is_mol_or_sdf(filepath): # It's a MOL or SD file. @@ -861,8 +862,10 @@ def get_download_params(request): """ serializer = DownloadStructuresSerializer(data=request.data) - serializer.is_valid() - logger.debug('serializer data: %s', serializer.validated_data) + valid = serializer.is_valid() + logger.debug('serializer validated data: %s, %s', valid, serializer.validated_data) + if not valid: + logger.error('serializer errors: %s', serializer.errors) protein_params = { 'pdb_info': serializer.validated_data['pdb_info'], diff --git a/viewer/serializers.py b/viewer/serializers.py index b601ae04..400c55dd 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -832,8 +832,8 @@ class Meta: class DownloadStructuresSerializer(serializers.Serializer): - target_name = serializers.CharField(max_length=200, default=None) - proteins = serializers.CharField(max_length=5000, default=None) + target_name = serializers.CharField(max_length=200, default=None, allow_blank=True) + proteins = serializers.CharField(max_length=5000, default='', allow_blank=True) all_aligned_structures = serializers.BooleanField(default=False) pdb_info = serializers.BooleanField(default=False) cif_info = serializers.BooleanField(default=False) @@ -845,7 +845,7 @@ class DownloadStructuresSerializer(serializers.Serializer): single_sdf_file = serializers.BooleanField(default=False) metadata_info = serializers.BooleanField(default=False) static_link = serializers.BooleanField(default=False) - file_url = serializers.CharField(max_length=200, default=None) + file_url = serializers.CharField(max_length=200, default='', allow_blank=True) trans_matrix_info = serializers.BooleanField(default=False) diff --git a/viewer/views.py b/viewer/views.py index 56f4c43e..8c33b318 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -1425,6 +1425,7 @@ def create(self, request): this method. """ logger.info('+ DownloadStructures.post') + logger.debug('DownloadStructures.post.data: %s', request.data) erase_out_of_date_download_records() From bc17249fa14886890449b4aba498d4316d1f27d4 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Mon, 4 Mar 2024 15:09:42 +0000 Subject: [PATCH 31/47] Consecutive numbering of observations under canon site --- viewer/target_loader.py | 105 ++++++++++++++++++++-------------------- 1 file changed, 53 insertions(+), 52 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 269f1b45..6b957663 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -5,7 +5,6 @@ import logging import math import os -import re import string import tarfile import uuid @@ -23,7 +22,7 @@ from django.contrib.postgres.aggregates import ArrayAgg from django.core.exceptions import MultipleObjectsReturned from django.db import IntegrityError, transaction -from django.db.models import Model +from django.db.models import Count, Model from django.db.models.base import ModelBase from django.utils import timezone @@ -1655,7 +1654,6 @@ def process_bundle(self): canon_site_confs=canon_site_conf_objects, ) - # values = ["canon_site_conf__canon_site", "cmpd"] values = ["experiment"] qs = ( SiteObservation.objects.values(*values) @@ -1663,58 +1661,61 @@ def process_bundle(self): .annotate(obvs=ArrayAgg("id")) .values_list("obvs", flat=True) ) - for elem in qs: - # objects in this group should be named with same scheme - so_group = SiteObservation.objects.filter(pk__in=elem) - # first process existing codes and find maximum value - codelist = so_group.filter(code__isnull=False).values_list( - "code", flat=True - ) - stripped = [] - for k in codelist: - try: - stripped.append(re.search(r"x\d*\D*", k).group(0)) - except AttributeError: - # code exists but seems to be non-standard. don't - # know if this has implications to upload - # processing - logger.error("Non-standard SiteObservation code: %s", k) - - # get the latest iterator position - iter_pos = "" - if stripped: - last = sorted(stripped)[-1] - try: - iter_pos = re.search(r"[^\d]+(?=\d*$)", last).group(0) - except AttributeError: - # technically it should be validated in previous try-catch block - logger.error("Non-standard SiteObservation code 2: %s", last) - - # ... and create new one starting from next item - suffix = alphanumerator(start_from=iter_pos) - for so in so_group.filter(code__isnull=True): - code_prefix = experiment_objects[so.experiment.code].index_data[ - "code_prefix" - ] - code = f"{code_prefix}{so.experiment.code.split('-')[1]}{next(suffix)}" - - # test uniqueness for target - # TODO: this should ideally be solved by db engine, before - # rushing to write the trigger, have think about the - # loader concurrency situations - if SiteObservation.objects.filter( - experiment__experiment_upload__target=self.target, - code=code, - ).exists(): - msg = ( - f"short code {code} already exists for this target; " - + "specify a code_prefix to resolve this conflict" + for elem in qs: + # fmt: off + subgroups = SiteObservation.objects.filter( + pk__in=elem, + ).order_by( + "canon_site_conf__canon_site", + ).annotate( + sites=Count("canon_site_conf__canon_site"), + obvs=ArrayAgg('id'), + ).order_by( + "-sites", + ).values_list("obvs", flat=True) + # fmt: on + + suffix = alphanumerator() + for sub in subgroups: + # objects in this group should be named with same scheme + so_group = SiteObservation.objects.filter(pk__in=sub) + + # memo to self: there used to be some code to test the + # position of the iterator in existing entries. This + # was because it was assumed, that when adding v2 + # uploads, it can bring a long new observations under + # existing experiment. Following discussions with + # Conor, it seems that this will not be the case. But + # should it agin be, this code was deleted on + # 2024-03-04, if you need to check + + for so in so_group.filter(code__isnull=True): + code_prefix = experiment_objects[so.experiment.code].index_data[ + "code_prefix" + ] + # iter_pos = next(suffix) + # code = f"{code_prefix}{so.experiment.code.split('-')[1]}{iter_pos}" + code = ( + f"{code_prefix}{so.experiment.code.split('-')[1]}{next(suffix)}" ) - self.report.log(logging.ERROR, msg) - so.code = code - so.save() + # test uniqueness for target + # TODO: this should ideally be solved by db engine, before + # rushing to write the trigger, have think about the + # loader concurrency situations + if SiteObservation.objects.filter( + experiment__experiment_upload__target=self.target, + code=code, + ).exists(): + msg = ( + f"short code {code} already exists for this target; " + + "specify a code_prefix to resolve this conflict" + ) + self.report.log(logging.ERROR, msg) + + so.code = code + so.save() # final remaining fk, attach reference site observation to canon_site_conf for val in canon_site_conf_objects.values(): # pylint: disable=no-member From 3d8202a83baa63a49bf5690806f4cb6656ac8fd3 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 5 Mar 2024 11:07:58 +0000 Subject: [PATCH 32/47] SiteObservatdion.tag split to tag and tag_prefix (1361) --- viewer/migrations/0048_auto_20240305_1038.py | 26 ++++++++++++++++ viewer/models.py | 3 ++ viewer/serializers.py | 1 + viewer/target_loader.py | 31 +++++++++++--------- 4 files changed, 47 insertions(+), 14 deletions(-) create mode 100644 viewer/migrations/0048_auto_20240305_1038.py diff --git a/viewer/migrations/0048_auto_20240305_1038.py b/viewer/migrations/0048_auto_20240305_1038.py new file mode 100644 index 00000000..950f0605 --- /dev/null +++ b/viewer/migrations/0048_auto_20240305_1038.py @@ -0,0 +1,26 @@ +# Generated by Django 3.2.23 on 2024-03-05 10:38 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0047_auto_20240301_1243'), + ] + + operations = [ + migrations.AddField( + model_name='sessionprojecttag', + name='tag_prefix', + field=models.TextField( + help_text='Tag prefix for auto-generated tags', null=True + ), + ), + migrations.AddField( + model_name='siteobservationtag', + name='tag_prefix', + field=models.TextField( + help_text='Tag prefix for auto-generated tags', null=True + ), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index 2fe21e90..ac9742b8 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -1180,6 +1180,9 @@ class Meta: class Tag(models.Model): tag = models.CharField(max_length=200, help_text="The (unique) name of the tag") + tag_prefix = models.TextField( + null=True, help_text="Tag prefix for auto-generated tags" + ) upload_name = models.CharField( max_length=200, help_text="The generated name of the tag" ) diff --git a/viewer/serializers.py b/viewer/serializers.py index 400c55dd..a029267b 100644 --- a/viewer/serializers.py +++ b/viewer/serializers.py @@ -721,6 +721,7 @@ class Meta: extra_kwargs = { "id": {"read_only": True}, "upload_name": {"read_only": True}, + "tag_prefix": {"read_only": True}, } diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 6b957663..5613973f 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1728,11 +1728,12 @@ def process_bundle(self): # tag site observations for val in canon_site_objects.values(): # pylint: disable=no-member - tag = f"{val.instance.canon_site_num} - {''.join(val.instance.name.split('+')[1:-1])}" + prefix = val.instance.canon_site_num + tag = ''.join(val.instance.name.split('+')[1:-1]) so_list = SiteObservation.objects.filter( canon_site_conf__canon_site=val.instance ) - self._tag_observations(tag, "CanonSites", so_list) + self._tag_observations(tag, prefix, "CanonSites", so_list) logger.debug("canon_site objects tagged") @@ -1740,51 +1741,52 @@ def process_bundle(self): for val in canon_site_conf_objects.values(): # pylint: disable=no-member if val.instance.canon_site.canon_site_num not in numerators.keys(): numerators[val.instance.canon_site.canon_site_num] = alphanumerator() - tag = ( + prefix = ( f"{val.instance.canon_site.canon_site_num}" + f"{next(numerators[val.instance.canon_site.canon_site_num])}" - + f" - {val.instance.name.split('+')[0]}" ) + tag = val.instance.name.split('+')[0] so_list = [ site_observation_objects[strip_version(k)].instance for k in val.index_data["members"] ] - self._tag_observations(tag, "ConformerSites", so_list) + self._tag_observations(tag, prefix, "ConformerSites", so_list) logger.debug("conf_site objects tagged") for val in quat_assembly_objects.values(): # pylint: disable=no-member - tag = f"A{val.instance.assembly_num} - {val.instance.name}" + prefix = f"A{val.instance.assembly_num}" + tag = val.instance.name so_list = SiteObservation.objects.filter( xtalform_site__xtalform__in=XtalformQuatAssembly.objects.filter( quat_assembly=val.instance ).values("xtalform") ) - self._tag_observations(tag, "Quatassemblies", so_list) + self._tag_observations(tag, prefix, "Quatassemblies", so_list) logger.debug("quat_assembly objects tagged") for val in xtalform_objects.values(): # pylint: disable=no-member - tag = f"F{val.instance.xtalform_num} - {val.instance.name}" + prefix = f"F{val.instance.xtalform_num}" + tag = val.instance.name so_list = SiteObservation.objects.filter( xtalform_site__xtalform=val.instance ) - self._tag_observations(tag, "Crystalforms", so_list) + self._tag_observations(tag, prefix, "Crystalforms", so_list) logger.debug("xtalform objects tagged") for val in xtalform_sites_objects.values(): # pylint: disable=no-member - tag = ( + prefix = ( f"F{val.instance.xtalform.xtalform_num}" + f"{val.instance.xtalform_site_num}" - + f" - {val.instance.xtalform.name}" - + f" - {val.instance.xtalform_site_id}" ) + tag = f"{val.instance.xtalform.name} - {val.instance.xtalform_site_id}" so_list = [ site_observation_objects[strip_version(k)].instance for k in val.index_data["residues"] ] - self._tag_observations(tag, "CrystalformSites", so_list) + self._tag_observations(tag, prefix, "CrystalformSites", so_list) logger.debug("xtalform_sites objects tagged") @@ -1836,7 +1838,7 @@ def _extract( return result - def _tag_observations(self, tag, category, so_list): + def _tag_observations(self, tag, prefix, category, so_list): try: # memo to self: description is set to tag, but there's # no fk to tag, instead, tag has a fk to @@ -1871,6 +1873,7 @@ def _tag_observations(self, tag, category, so_list): except SiteObservationTag.DoesNotExist: so_tag = SiteObservationTag() so_tag.tag = tag + so_tag.tag_prefix = prefix so_tag.upload_name = tag so_tag.category = TagCategory.objects.get(category=category) so_tag.target = self.target From afe10e2f2843ae562ce37eb599ee69aed7b02c90 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 5 Mar 2024 12:15:51 +0000 Subject: [PATCH 33/47] fix: crystallographic_files folders in download now sans suffix (#550) --- viewer/download_structures.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 5036e018..9f154a6a 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -737,7 +737,13 @@ def _create_structures_dict(site_obvs, protein_params, other_params): afile = [] for f in model_attr: # here the model_attr is already stringified - apath = Path('crystallographic_files').joinpath(so.code) + try: + exp_path = re.search(r"x\d*", so.code).group(0) # type: ignore[union-attr] + except AttributeError: + logger.error('Unexpected shortcodeformat: %s', so.code) + exp_path = so.code + + apath = Path('crystallographic_files').joinpath(exp_path) if model_attr and model_attr != 'None': archive_path = str( apath.joinpath( From d7cc29a31fd6f737e7f8625c50cb13ea5be5202c Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Tue, 5 Mar 2024 16:48:48 +0000 Subject: [PATCH 34/47] fix: tag names underdand prefix in download's metadata.csv --- viewer/download_structures.py | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 9f154a6a..cf5862d0 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -20,7 +20,8 @@ import pandoc from django.conf import settings -from django.db.models import Exists, OuterRef, Subquery +from django.db.models import CharField, Exists, F, OuterRef, Subquery, Value +from django.db.models.functions import Concat from viewer.models import ( DownloadLinks, @@ -79,6 +80,7 @@ class TagSubquery(Subquery): """Annotate SiteObservation with tag of given category""" def __init__(self, category): + # fmt: off query = SiteObservationTag.objects.filter( pk=Subquery( SiteObvsSiteObservationTag.objects.filter( @@ -88,8 +90,16 @@ def __init__(self, category): ), ).values('site_obvs_tag')[:1] ) - ).values('tag')[0:1] + ).annotate( + combitag=Concat( + F('tag_prefix'), + Value(' - '), + F('tag'), + output_field=CharField(), + ), + ).values('combitag')[0:1] super().__init__(query) + # fmt: on class CuratedTagSubquery(Exists): From e037ac0cee75e6b1461397c61e2f65e9596b808b Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 7 Mar 2024 09:43:22 +0000 Subject: [PATCH 35/47] fix: return all proteins listed in api/download_structures --- viewer/views.py | 42 +++++++++++++++--------------------------- 1 file changed, 15 insertions(+), 27 deletions(-) diff --git a/viewer/views.py b/viewer/views.py index 8c33b318..29ebf55d 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -1473,40 +1473,28 @@ def create(self, request): return Response(content, status=status.HTTP_404_NOT_FOUND) logger.info('Found Target record %r', target) - site_obvs = models.SiteObservation.objects.none() - proteins_list = [] - if request.data['proteins']: - logger.info('Given Proteins in request') - # Get first part of protein code - proteins_list = [ - p.strip().split(":")[0] for p in request.data['proteins'].split(',') - ] + proteins_list = [p.strip() for p in request.data.get('proteins', []).split(',')] + if proteins_list: logger.info('Given %s Proteins %s', len(proteins_list), proteins_list) - logger.info('Looking for SiteObservation records for given Proteins...') - # Filter by protein codes - for code_first_part in proteins_list: - # prot = models.Protein.objects.filter(code__contains=code_first_part).values() - # I don't see why I need to drop out of django objects here - prot = models.SiteObservation.objects.filter( - experiment__experiment_upload__target=target, code=code_first_part + + site_obvs = models.SiteObservation.objects.filter( + experiment__experiment_upload__target=target, + code__in=proteins_list, + ) + + missing_obvs = set(proteins_list).difference( + set(site_obvs.values_list('code', flat=True)) + ) + if missing_obvs: + logger.warning( + 'Could not find SiteObservation record for "%s"', + missing_obvs, ) - if prot.exists(): - # even more than just django object, I need an - # unevaluated queryset down the line - site_obvs = models.SiteObservation.objects.filter( - pk=prot.first().pk, - ) - else: - logger.warning( - 'Could not find SiteObservation record for "%s"', - code_first_part, - ) else: logger.info('Request had no Proteins') logger.info('Looking for Protein records for %r...', target) - # proteins = models.Protein.objects.filter(target_id=target.id).values() site_obvs = models.SiteObservation.objects.filter( experiment__experiment_upload__target=target ) From d4783bb7c2e5e84f981c91813d4517aefd5621b0 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 7 Mar 2024 11:02:18 +0000 Subject: [PATCH 36/47] fix: fixed 'All structures' option not working in download dialog --- viewer/views.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/viewer/views.py b/viewer/views.py index 29ebf55d..1d1600e9 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -1473,7 +1473,9 @@ def create(self, request): return Response(content, status=status.HTTP_404_NOT_FOUND) logger.info('Found Target record %r', target) - proteins_list = [p.strip() for p in request.data.get('proteins', []).split(',')] + proteins_list = [ + p.strip() for p in request.data.get('proteins', '').split(',') if p + ] if proteins_list: logger.info('Given %s Proteins %s', len(proteins_list), proteins_list) logger.info('Looking for SiteObservation records for given Proteins...') From c08f18bfda076ae3493b391469ffc04927293006 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 7 Mar 2024 13:45:53 +0000 Subject: [PATCH 37/47] Migrations for new file fields --- viewer/migrations/0049_auto_20240307_1344.py | 36 ++++++++++++++++++++ viewer/models.py | 6 ++++ viewer/target_loader.py | 14 +++++--- 3 files changed, 52 insertions(+), 4 deletions(-) create mode 100644 viewer/migrations/0049_auto_20240307_1344.py diff --git a/viewer/migrations/0049_auto_20240307_1344.py b/viewer/migrations/0049_auto_20240307_1344.py new file mode 100644 index 00000000..862b6045 --- /dev/null +++ b/viewer/migrations/0049_auto_20240307_1344.py @@ -0,0 +1,36 @@ +# Generated by Django 3.2.23 on 2024-03-07 13:44 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0048_auto_20240305_1038'), + ] + + operations = [ + migrations.AddField( + model_name='historicalsiteobservation', + name='ligand_mol', + field=models.TextField(max_length=255, null=True), + ), + migrations.AddField( + model_name='historicalsiteobservation', + name='ligand_smiles', + field=models.TextField(max_length=255, null=True), + ), + migrations.AddField( + model_name='siteobservation', + name='ligand_mol', + field=models.FileField( + max_length=255, null=True, upload_to='target_loader_data/' + ), + ), + migrations.AddField( + model_name='siteobservation', + name='ligand_smiles', + field=models.FileField( + max_length=255, null=True, upload_to='target_loader_data/' + ), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index ac9742b8..31336fa4 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -465,6 +465,12 @@ class SiteObservation(models.Model): seq_id = models.IntegerField() chain_id = models.CharField(max_length=1) ligand_mol_file = models.TextField(null=True) + ligand_mol = models.FileField( + upload_to="target_loader_data/", null=True, max_length=255 + ) + ligand_smiles = models.FileField( + upload_to="target_loader_data/", null=True, max_length=255 + ) ligand_pdb = models.FileField( upload_to="target_loader_data/", null=True, max_length=255 ) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 5613973f..aff26aaa 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1319,11 +1319,13 @@ def process_site_observation( apo_desolv_file, apo_file, artefacts_file, - ligand_mol, + ligand_mol_file, sigmaa_file, diff_file, event_file, ligand_pdb, + ligand_mol, + ligand_smiles, ) = self.validate_files( obj_identifier=experiment_id, file_struct=data, @@ -1340,16 +1342,18 @@ def process_site_observation( "diff_map", # NB! keys in meta_aligner not yet updated "event_map", "ligand_pdb", + "ligand_mol", + "ligand_smiles", ), validate_files=validate_files, ) - logger.debug('looking for ligand_mol: %s', ligand_mol) + logger.debug('looking for ligand_mol: %s', ligand_mol_file) mol_data = None - if ligand_mol: + if ligand_mol_file: with contextlib.suppress(TypeError, FileNotFoundError): with open( - self.raw_data.joinpath(ligand_mol), + self.raw_data.joinpath(ligand_mol_file), "r", encoding="utf-8", ) as f: @@ -1377,6 +1381,8 @@ def process_site_observation( "event_file": str(self._get_final_path(event_file)), "artefacts_file": str(self._get_final_path(artefacts_file)), "ligand_pdb": str(self._get_final_path(ligand_pdb)), + "ligand_mol": str(self._get_final_path(ligand_mol)), + "ligand_smiles": str(self._get_final_path(ligand_smiles)), "pdb_header_file": "currently missing", "ligand_mol_file": mol_data, } From 8b737490580e8d697cbe2e5afd13123be93114bc Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 7 Mar 2024 16:40:11 +0000 Subject: [PATCH 38/47] Issue 1326 - mol and smiles added to download bundle NB! not prodction/staging ready, still contains a hack for testing because XCA doesn't provide all the attributes. --- viewer/download_structures.py | 12 ++++++++++++ viewer/target_loader.py | 14 +++++++++++++- 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index cf5862d0..ce1091ff 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -49,6 +49,8 @@ 'apo_desolv_file': ('aligned'), # SiteObservation: apo_desolv_file 'bound_file': ('aligned'), # SiteObservation: bound_file 'sdf_info': ('aligned'), # SiteObservation: ligand_mol_file (indirectly) + 'ligand_mol': ('aligned'), # SiteObservation: ligand_mol + 'ligand_smiles': ('aligned'), # SiteObservation: ligand_smiles 'ligand_pdb': ('aligned'), # SiteObservation: ligand_pdb 'smiles_info': (''), # SiteObservation: smiles_info (indirectly) # those above are all controlled by serializer's all_aligned_structures flag @@ -136,6 +138,8 @@ class ArchiveFile: 'diff_file': {}, 'sigmaa_file': {}, 'ligand_pdb': {}, + 'ligand_mol': {}, + 'ligand_smiles': {}, }, 'molecules': { 'sdf_files': {}, @@ -229,6 +233,10 @@ def _patch_molecule_name(site_observation): lines = site_observation.ligand_mol_file.split('\n') if not lines[0].strip(): lines[0] = site_observation.long_code + + # the db contents is mol file but what's requested here is + # sdf. add sdf separator + lines.append('$$$$\n') return '\n'.join(lines) @@ -776,6 +784,8 @@ def _create_structures_dict(site_obvs, protein_params, other_params): 'artefacts_file', 'pdb_header_file', 'ligand_pdb', + 'ligand_mol', + 'ligand_smiles', 'diff_file', ]: # siteobservation object @@ -890,6 +900,8 @@ def get_download_params(request): 'apo_solv_file': serializer.validated_data['all_aligned_structures'], 'apo_desolv_file': serializer.validated_data['all_aligned_structures'], 'ligand_pdb': serializer.validated_data['all_aligned_structures'], + 'ligand_mol': serializer.validated_data['all_aligned_structures'], + 'ligand_smiles': serializer.validated_data['all_aligned_structures'], 'cif_info': serializer.validated_data['cif_info'], 'mtz_info': serializer.validated_data['mtz_info'], 'map_info': serializer.validated_data['map_info'], diff --git a/viewer/target_loader.py b/viewer/target_loader.py index aff26aaa..efbdcfc8 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -635,7 +635,7 @@ def logfunc(key, message): # memo to self: added type ignore directives to return line # below and append line above because after small refactoring, - # mypy all of the sudden started throwing errors on bothe or + # mypy all of the sudden started throwing errors on both of # these. the core of it's grievance is that it expects the # return type to be list[str]. no idea why, function signature # clearly defines it as list[str | None] @@ -1348,7 +1348,19 @@ def process_site_observation( validate_files=validate_files, ) + # TODO: ligand file simulation for testing, remove once the + # key is addded to XCA output + if ligand_mol: + ligand_smiles_path = f"{ligand_mol.removesuffix('.mol')}.smi" + if self.raw_data.joinpath(ligand_smiles_path).is_file(): + ligand_smiles = ligand_smiles_path + else: + ligand_smiles = None + else: + ligand_smiles = None + logger.debug('looking for ligand_mol: %s', ligand_mol_file) + mol_data = None if ligand_mol_file: with contextlib.suppress(TypeError, FileNotFoundError): From 1f1de43f64f79487be5541b3153682e63ef582b3 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 8 Mar 2024 09:27:39 +0000 Subject: [PATCH 39/47] Target loader should handle empty code_prefix and tooltip 'Should' because haven't tested yet with real data --- viewer/target_loader.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index efbdcfc8..e792169a 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -734,7 +734,6 @@ def process_experiment( """ del kwargs assert item_data - assert prefix_tooltips logger.debug("incoming data: %s", item_data) experiment_name, data = item_data @@ -814,8 +813,12 @@ def process_experiment( # version int old versions are kept target loader version = 1 - code_prefix = extract(key="code_prefix") - prefix_tooltip = prefix_tooltips.get(code_prefix, "") + # if empty or key missing entirely, ensure code_prefix returns empty + code_prefix = extract(key="code_prefix", level=logging.INFO) + # ignoring type because tooltip dict can legitimately be empty + # and in such case, assert statement fails. need to remove it + # and use the ignore + prefix_tooltip = prefix_tooltips.get(code_prefix, "") # type: ignore[union-attr] fields = { "code": experiment_name, @@ -1501,7 +1504,7 @@ def process_bundle(self): self.version_number = meta["version_number"] self.version_dir = meta["version_dir"] self.previous_version_dirs = meta["previous_version_dirs"] - prefix_tooltips = meta["code_prefix_tooltips"] + prefix_tooltips = meta.get("code_prefix_tooltips", {}) # check transformation matrix files ( # pylint: disable=unbalanced-tuple-unpacking From 35ba43998c8e8e833e7b508bdb53b2c812ec5e43 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 8 Mar 2024 10:51:04 +0000 Subject: [PATCH 40/47] Column 'Downloaded' to metadata.csv in downloads --- viewer/download_structures.py | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index ce1091ff..fc7e68d0 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -418,13 +418,13 @@ def _trans_matrix_files_zip(ziparchive, target): _add_empty_file(ziparchive, archive_path) -def _metadate_file_zip(ziparchive, target): +def _metadata_file_zip(ziparchive, target, site_observations): """Compile and add metadata file to archive.""" logger.info('+ Processing metadata') annotations = {} - values = ['code', 'longcode', 'cmpd__compound_code', 'smiles'] - header = ['Code', 'Long code', 'Compound code', 'Smiles'] + values = ['code', 'longcode', 'cmpd__compound_code', 'smiles', 'downloaded'] + header = ['Code', 'Long code', 'Compound code', 'Smiles', 'Downloaded'] for category in TagCategory.objects.filter(category__in=TAG_CATEGORIES): tag = f'tag_{category.category.lower()}' @@ -432,7 +432,7 @@ def _metadate_file_zip(ziparchive, target): header.append(category.category) annotations[tag] = TagSubquery(category.category) - pattern = re.compile(r'\W+') + pattern = re.compile(r'\W+') # non-alphanumeric characters for tag in SiteObservationTag.objects.filter( category__in=TagCategory.objects.filter(category__in=CURATED_TAG_CATEGORIES), target=target, @@ -449,6 +449,12 @@ def _metadate_file_zip(ziparchive, target): ).prefetch_related( 'cmpd', 'siteobservationtags', + ).annotate( + downloaded=Exists( + site_observations.filter( + pk=OuterRef('pk'), + ), + ) ).annotate(**annotations).values_list(*values) # fmt: on @@ -621,7 +627,9 @@ def _build_readme(readme, original_search, template_file, ziparchive): readme.write(f'- {filename}' + '\n') -def _create_structures_zip(target, zip_contents, file_url, original_search, host): +def _create_structures_zip( + target, zip_contents, file_url, original_search, host, site_observations +): """Write a ZIP file containing data from an input dictionary.""" logger.info('+ _create_structures_zip(%s)', target.title) @@ -684,7 +692,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host # compile and add metadata.csv if zip_contents['metadata_info']: - _metadate_file_zip(ziparchive, target) + _metadata_file_zip(ziparchive, target, site_observations) if zip_contents['trans_matrix_info']: _trans_matrix_files_zip(ziparchive, target) @@ -1006,7 +1014,14 @@ def create_or_return_download_link(request, target, site_observations): zip_contents = _create_structures_dict( site_observations, protein_params, other_params ) - _create_structures_zip(target, zip_contents, file_url, original_search, host) + _create_structures_zip( + target, + zip_contents, + file_url, + original_search, + host, + site_observations, + ) download_link = DownloadLinks() # Note: 'zip_file' and 'zip_contents' record properties are no longer used. From 46278e3b152902acb4a2cdbe5a6fe2db60617e76 Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Fri, 8 Mar 2024 14:52:55 +0000 Subject: [PATCH 41/47] fix: restore 'upload_name' in site obvs tags to prefix-tag format --- viewer/target_loader.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 5613973f..b5196c73 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1874,7 +1874,7 @@ def _tag_observations(self, tag, prefix, category, so_list): so_tag = SiteObservationTag() so_tag.tag = tag so_tag.tag_prefix = prefix - so_tag.upload_name = tag + so_tag.upload_name = f"{prefix} - {tag}" so_tag.category = TagCategory.objects.get(category=category) so_tag.target = self.target so_tag.mol_group = so_group From b9522423113796bb13b99c20836d85391038fabb Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Mon, 11 Mar 2024 10:38:28 +0000 Subject: [PATCH 42/47] Removed ligand_smiles workaround All necessary files are now tracked by the database and returned in download. --- viewer/download_structures.py | 2 +- viewer/target_loader.py | 13 +------------ 2 files changed, 2 insertions(+), 13 deletions(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index fc7e68d0..1ec33714 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -512,7 +512,7 @@ def _extra_files_zip(ziparchive, target): ziparchive.write( filepath, os.path.join( - _ZIP_FILEPATHS[f'extra_files_{num_extra_dir}'], file + f'{_ZIP_FILEPATHS["extra_files"]}_{num_extra_dir}', file ), ) num_processed += 1 diff --git a/viewer/target_loader.py b/viewer/target_loader.py index efbdcfc8..2ea15435 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1279,7 +1279,7 @@ def process_site_observation( longcode = f"{experiment.code}_{chain}_{str(ligand)}_{str(idx)}" key = f"{experiment.code}/{chain}/{str(ligand)}" - smiles = extract(key="ligand_smiles") + smiles = extract(key="ligand_smiles_string") try: compound = compounds[experiment_id].instance @@ -1348,17 +1348,6 @@ def process_site_observation( validate_files=validate_files, ) - # TODO: ligand file simulation for testing, remove once the - # key is addded to XCA output - if ligand_mol: - ligand_smiles_path = f"{ligand_mol.removesuffix('.mol')}.smi" - if self.raw_data.joinpath(ligand_smiles_path).is_file(): - ligand_smiles = ligand_smiles_path - else: - ligand_smiles = None - else: - ligand_smiles = None - logger.debug('looking for ligand_mol: %s', ligand_mol_file) mol_data = None From 6d2511eb3bbb98ce707f4374fbb3dff1e7a3adbc Mon Sep 17 00:00:00 2001 From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com> Date: Mon, 11 Mar 2024 17:54:07 +0100 Subject: [PATCH 43/47] fix: Add force_error_display to connection functions (default False) (#559) Co-authored-by: Alan Christie --- api/security.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/api/security.py b/api/security.py index 01605352..12d4a987 100644 --- a/api/security.py +++ b/api/security.py @@ -47,7 +47,7 @@ # response = view(request) -def get_remote_conn() -> Optional[SSHConnector]: +def get_remote_conn(force_error_display=False) -> Optional[SSHConnector]: credentials: Dict[str, Any] = { "user": settings.ISPYB_USER, "pw": settings.ISPYB_PASSWORD, @@ -71,7 +71,8 @@ def get_remote_conn() -> Optional[SSHConnector]: # Assume the credentials are invalid if there is no host. # If a host is not defined other properties are useless. if not credentials["host"]: - logger.debug("No ISPyB host - cannot return a connector") + if logging.DEBUG >= logger.level or force_error_display: + logger.info("No ISPyB host - cannot return a connector") return None # Try to get an SSH connection (aware that it might fail) @@ -81,14 +82,14 @@ def get_remote_conn() -> Optional[SSHConnector]: except Exception: # Log the exception if DEBUG level or lower/finer? # The following will not log if the level is set to INFO for example. - if logging.DEBUG >= logger.level: + if logging.DEBUG >= logger.level or force_error_display: logger.info("credentials=%s", credentials) logger.exception("Got the following exception creating SSHConnector...") return conn -def get_conn() -> Optional[Connector]: +def get_conn(force_error_display=False) -> Optional[Connector]: credentials: Dict[str, Any] = { "user": settings.ISPYB_USER, "pw": settings.ISPYB_PASSWORD, @@ -101,7 +102,8 @@ def get_conn() -> Optional[Connector]: # Assume the credentials are invalid if there is no host. # If a host is not defined other properties are useless. if not credentials["host"]: - logger.debug("No ISPyB host - cannot return a connector") + if logging.DEBUG >= logger.level or force_error_display: + logger.info("No ISPyB host - cannot return a connector") return None conn: Optional[Connector] = None @@ -110,7 +112,7 @@ def get_conn() -> Optional[Connector]: except Exception: # Log the exception if DEBUG level or lower/finer? # The following will not log if the level is set to INFO for example. - if logging.DEBUG >= logger.level: + if logging.DEBUG >= logger.level or force_error_display: logger.info("credentials=%s", credentials) logger.exception("Got the following exception creating Connector...") From 14cd643eb6ecb3e1f7ae40e529f358071fa47db9 Mon Sep 17 00:00:00 2001 From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com> Date: Tue, 12 Mar 2024 16:38:43 +0100 Subject: [PATCH 44/47] Align production with staging (#555) (#560) * Some changes to cset_upload.py to allow site observation short codes (#527) * stashing * fix: cset_upload.py updated to allow new-style site observation codes NB! this probably still won't work! I suspect the file I was given is broken and I cannot test it further * stashing * stashing * Short code prefix and tooltip to backend Target loader now reads short code prefix and tooltip from meta_aligner.yaml. Tooltip is saved to Experiment model. TODO: make tooltip available via API * Prefix tooltip now serverd by api/site_observation * stashing * Site observation groups for shortcodes now by experiment * feat: download structure fixed TODO: add all the yamls * All yaml files added to download * New format to download zip (issue 1326) (#530) * stashing * stashing * feat: download structure fixed TODO: add all the yamls * All yaml files added to download * cset_upload.py: lhs_pdb renamed to ref_pdb * Renamed canon- and conf site tags * Adds support for key-based SSH connections (#534) * Centralised environment variables (#529) * refactor: Restructured settings.py * docs: Minor tweaks * refactor: Move security and infection config to settings * refactor: b/e & f/e/ tags now in settings (also fixed f/e tag value) * refactor: Move Neo4j config to settings * refactor: More variables into settings * refactor: Moved remaining config * docs: Adds configuration guide as comments * docs: Variable prefix now 'stack_' not 'stack_env_' --------- * feat: Adds support for private keys on SSH tunnel * fix: Fixes key-based logic --------- * build(deps): bump cryptography from 42.0.0 to 42.0.2 (#533) Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.0 to 42.0.2. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/42.0.0...42.0.2) --- updated-dependencies: - dependency-name: cryptography dependency-type: indirect ... * docs: Updates documentation (#536) * build(deps): bump django from 3.2.20 to 3.2.24 (#535) Bumps [django](https://github.com/django/django) from 3.2.20 to 3.2.24. - [Commits](https://github.com/django/django/compare/3.2.20...3.2.24) --- updated-dependencies: - dependency-name: django dependency-type: direct:production ... * fix: reverting wrong changes * fix: reverting wrong changes (#538) * stashing * add site observation's ligand sdf to aligned_files * fix: custom pdb now downloadable * fix: increased loglevel to error on unexpected exceptions block * fix: Discourse service check now checks API key before creating a service (#544) * build(deps): bump cryptography from 42.0.2 to 42.0.4 (#539) Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.2 to 42.0.4. - [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst) - [Commits](https://github.com/pyca/cryptography/compare/42.0.2...42.0.4) --- updated-dependencies: - dependency-name: cryptography dependency-type: indirect ... * metadata.csv populated Started working on issue 1355 as well, it's too tightly coupled. Some work remaining re that: - when tag created in UI, make sure upload_name attribute is populated * upload_name automatically pouplated when creating tags in UI Only populated on creation, updates won't touch it * changes to api/download_structures - apo_file, bound_file, sdf_info and smiles_info merged into all_aligned_structures - added pdb_info field NB! download_structures was requred to provide ligand_pdb as well. This wasn't tracked previously, so I added field to SiteObservation model. Meaning there's a migration and on stack deployment data needs to be wiped and reuploaded * don't download neighbourhoods.yaml unless trans_matrix_info is checked * fixed error handling (errors.csv) and not returning combined sdf * fix: Added parsing directives to DownloadStructuresserializer * Consecutive numbering of observations under canon site * SiteObservatdion.tag split to tag and tag_prefix (1361) * fix: crystallographic_files folders in download now sans suffix (#550) * fix: tag names underdand prefix in download's metadata.csv * fix: return all proteins listed in api/download_structures * fix: fixed 'All structures' option not working in download dialog * Migrations for new file fields * Issue 1326 - mol and smiles added to download bundle NB! not prodction/staging ready, still contains a hack for testing because XCA doesn't provide all the attributes. * Target loader should handle empty code_prefix and tooltip 'Should' because haven't tested yet with real data * Column 'Downloaded' to metadata.csv in downloads * fix: restore 'upload_name' in site obvs tags to prefix-tag format * Removed ligand_smiles workaround All necessary files are now tracked by the database and returned in download. * fix: Add force_error_display to connection functions (default False) (#559) --------- Signed-off-by: dependabot[bot] Co-authored-by: Kalev Takkis Co-authored-by: Warren Thompson Co-authored-by: Alan Christie Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com> Co-authored-by: Warren Thompson From 7805e71b7c86454816091563c5e8ed575a4771a2 Mon Sep 17 00:00:00 2001 From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com> Date: Wed, 13 Mar 2024 10:39:50 +0100 Subject: [PATCH 45/47] Add DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP (#561) * feat: Add DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP * style: Minor log tweak --------- Co-authored-by: Alan Christie --- api/security.py | 8 ++++++-- fragalysis/settings.py | 11 +++++++++++ viewer/views.py | 2 +- 3 files changed, 18 insertions(+), 3 deletions(-) diff --git a/api/security.py b/api/security.py index 12d4a987..4c2afc19 100644 --- a/api/security.py +++ b/api/security.py @@ -354,9 +354,10 @@ def get_proposals_for_user(self, user, restrict_to_membership=False): proposals = set() ispyb_user = settings.ISPYB_USER logger.debug( - "ispyb_user=%s restrict_to_membership=%s", + "ispyb_user=%s restrict_to_membership=%s (DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP=%s)", ispyb_user, restrict_to_membership, + settings.DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP, ) if ispyb_user: if user.is_authenticated: @@ -368,7 +369,10 @@ def get_proposals_for_user(self, user, restrict_to_membership=False): # We have all the proposals where the user has authority. # Add open/public proposals? - if not restrict_to_membership: + if ( + not restrict_to_membership + or settings.DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP + ): proposals.update(self._get_open_proposals()) # Return the set() as a list() diff --git a/fragalysis/settings.py b/fragalysis/settings.py index 633487f6..cfef3208 100644 --- a/fragalysis/settings.py +++ b/fragalysis/settings.py @@ -464,6 +464,17 @@ COMPUTED_SET_MEDIA_DIRECTORY: str = "computed_set_data" +# The following (part of m2ms-1385) is used to prevent the +# 'restrict-to-membership' check in security.py - something that is designed to prevent +# uploading to public proposals unless the user is explicitly part of the proposal +# (according to ISPyB). This variable is used to defeat this test for situations +# when ISPyB is unavailable. It is not permitted when the DEPLOYMENT_MODE +# is 'PRODUCTION +DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP: bool = False +if os.environ.get("DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP") == "True": + assert DEPLOYMENT_MODE != "PRODUCTION" + DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP = True + # Discourse settings for API calls to Discourse Platform. DISCOURSE_PARENT_CATEGORY: str = "Fragalysis targets" DISCOURSE_USER: str = "fragalysis" diff --git a/viewer/views.py b/viewer/views.py index 1d1600e9..91f91fbb 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -1579,7 +1579,7 @@ def create(self, request, *args, **kwargs): contact_email=contact_email, user_id=request.user.pk, ) - logger.info("+ UploadTargetExperiments.create got Celery id %s", task.task_id) + logger.info("+ UploadTargetExperiments.create got Celery id %s", task.task_id) url = reverse('viewer:task_status', kwargs={'task_id': task.task_id}) # as it launches task, I think 202 is more appropriate From 849c24cd5e3087227505ad9a7cc3aae3ecabd5fb Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Thu, 14 Mar 2024 12:01:27 +0000 Subject: [PATCH 46/47] fix: metadata.csv in download now showing correct tags --- viewer/download_structures.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 1ec33714..0a25850c 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -86,7 +86,7 @@ def __init__(self, category): query = SiteObservationTag.objects.filter( pk=Subquery( SiteObvsSiteObservationTag.objects.filter( - site_observation=OuterRef('pk'), + site_observation=OuterRef(OuterRef('pk')), site_obvs_tag__category=TagCategory.objects.get( category=category, ), From 173b303e9990a27a2d0716fae033c6c0843c625b Mon Sep 17 00:00:00 2001 From: Kalev Takkis Date: Wed, 20 Mar 2024 10:10:45 +0000 Subject: [PATCH 47/47] fix: fixed tag creation process for upload 2 --- viewer/target_loader.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 2590f163..4e72cc8a 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -1875,7 +1875,9 @@ def _tag_observations(self, tag, prefix, category, so_list): so_group.save() try: - so_tag = SiteObservationTag.objects.get(upload_name=tag, target=self.target) + so_tag = SiteObservationTag.objects.get( + upload_name=f"{prefix} - {tag}", target=self.target + ) # Tag already exists # Apart from the new mol_group and molecules, we shouldn't be # changing anything.