diff --git a/api/security.py b/api/security.py index 01605352..12d4a987 100644 --- a/api/security.py +++ b/api/security.py @@ -47,7 +47,7 @@ # response = view(request) -def get_remote_conn() -> Optional[SSHConnector]: +def get_remote_conn(force_error_display=False) -> Optional[SSHConnector]: credentials: Dict[str, Any] = { "user": settings.ISPYB_USER, "pw": settings.ISPYB_PASSWORD, @@ -71,7 +71,8 @@ def get_remote_conn() -> Optional[SSHConnector]: # Assume the credentials are invalid if there is no host. # If a host is not defined other properties are useless. if not credentials["host"]: - logger.debug("No ISPyB host - cannot return a connector") + if logging.DEBUG >= logger.level or force_error_display: + logger.info("No ISPyB host - cannot return a connector") return None # Try to get an SSH connection (aware that it might fail) @@ -81,14 +82,14 @@ def get_remote_conn() -> Optional[SSHConnector]: except Exception: # Log the exception if DEBUG level or lower/finer? # The following will not log if the level is set to INFO for example. - if logging.DEBUG >= logger.level: + if logging.DEBUG >= logger.level or force_error_display: logger.info("credentials=%s", credentials) logger.exception("Got the following exception creating SSHConnector...") return conn -def get_conn() -> Optional[Connector]: +def get_conn(force_error_display=False) -> Optional[Connector]: credentials: Dict[str, Any] = { "user": settings.ISPYB_USER, "pw": settings.ISPYB_PASSWORD, @@ -101,7 +102,8 @@ def get_conn() -> Optional[Connector]: # Assume the credentials are invalid if there is no host. # If a host is not defined other properties are useless. if not credentials["host"]: - logger.debug("No ISPyB host - cannot return a connector") + if logging.DEBUG >= logger.level or force_error_display: + logger.info("No ISPyB host - cannot return a connector") return None conn: Optional[Connector] = None @@ -110,7 +112,7 @@ def get_conn() -> Optional[Connector]: except Exception: # Log the exception if DEBUG level or lower/finer? # The following will not log if the level is set to INFO for example. - if logging.DEBUG >= logger.level: + if logging.DEBUG >= logger.level or force_error_display: logger.info("credentials=%s", credentials) logger.exception("Got the following exception creating Connector...") diff --git a/viewer/download_structures.py b/viewer/download_structures.py index 9f154a6a..1ec33714 100644 --- a/viewer/download_structures.py +++ b/viewer/download_structures.py @@ -20,7 +20,8 @@ import pandoc from django.conf import settings -from django.db.models import Exists, OuterRef, Subquery +from django.db.models import CharField, Exists, F, OuterRef, Subquery, Value +from django.db.models.functions import Concat from viewer.models import ( DownloadLinks, @@ -48,6 +49,8 @@ 'apo_desolv_file': ('aligned'), # SiteObservation: apo_desolv_file 'bound_file': ('aligned'), # SiteObservation: bound_file 'sdf_info': ('aligned'), # SiteObservation: ligand_mol_file (indirectly) + 'ligand_mol': ('aligned'), # SiteObservation: ligand_mol + 'ligand_smiles': ('aligned'), # SiteObservation: ligand_smiles 'ligand_pdb': ('aligned'), # SiteObservation: ligand_pdb 'smiles_info': (''), # SiteObservation: smiles_info (indirectly) # those above are all controlled by serializer's all_aligned_structures flag @@ -79,6 +82,7 @@ class TagSubquery(Subquery): """Annotate SiteObservation with tag of given category""" def __init__(self, category): + # fmt: off query = SiteObservationTag.objects.filter( pk=Subquery( SiteObvsSiteObservationTag.objects.filter( @@ -88,8 +92,16 @@ def __init__(self, category): ), ).values('site_obvs_tag')[:1] ) - ).values('tag')[0:1] + ).annotate( + combitag=Concat( + F('tag_prefix'), + Value(' - '), + F('tag'), + output_field=CharField(), + ), + ).values('combitag')[0:1] super().__init__(query) + # fmt: on class CuratedTagSubquery(Exists): @@ -126,6 +138,8 @@ class ArchiveFile: 'diff_file': {}, 'sigmaa_file': {}, 'ligand_pdb': {}, + 'ligand_mol': {}, + 'ligand_smiles': {}, }, 'molecules': { 'sdf_files': {}, @@ -219,6 +233,10 @@ def _patch_molecule_name(site_observation): lines = site_observation.ligand_mol_file.split('\n') if not lines[0].strip(): lines[0] = site_observation.long_code + + # the db contents is mol file but what's requested here is + # sdf. add sdf separator + lines.append('$$$$\n') return '\n'.join(lines) @@ -400,13 +418,13 @@ def _trans_matrix_files_zip(ziparchive, target): _add_empty_file(ziparchive, archive_path) -def _metadate_file_zip(ziparchive, target): +def _metadata_file_zip(ziparchive, target, site_observations): """Compile and add metadata file to archive.""" logger.info('+ Processing metadata') annotations = {} - values = ['code', 'longcode', 'cmpd__compound_code', 'smiles'] - header = ['Code', 'Long code', 'Compound code', 'Smiles'] + values = ['code', 'longcode', 'cmpd__compound_code', 'smiles', 'downloaded'] + header = ['Code', 'Long code', 'Compound code', 'Smiles', 'Downloaded'] for category in TagCategory.objects.filter(category__in=TAG_CATEGORIES): tag = f'tag_{category.category.lower()}' @@ -414,7 +432,7 @@ def _metadate_file_zip(ziparchive, target): header.append(category.category) annotations[tag] = TagSubquery(category.category) - pattern = re.compile(r'\W+') + pattern = re.compile(r'\W+') # non-alphanumeric characters for tag in SiteObservationTag.objects.filter( category__in=TagCategory.objects.filter(category__in=CURATED_TAG_CATEGORIES), target=target, @@ -431,6 +449,12 @@ def _metadate_file_zip(ziparchive, target): ).prefetch_related( 'cmpd', 'siteobservationtags', + ).annotate( + downloaded=Exists( + site_observations.filter( + pk=OuterRef('pk'), + ), + ) ).annotate(**annotations).values_list(*values) # fmt: on @@ -488,7 +512,7 @@ def _extra_files_zip(ziparchive, target): ziparchive.write( filepath, os.path.join( - _ZIP_FILEPATHS[f'extra_files_{num_extra_dir}'], file + f'{_ZIP_FILEPATHS["extra_files"]}_{num_extra_dir}', file ), ) num_processed += 1 @@ -603,7 +627,9 @@ def _build_readme(readme, original_search, template_file, ziparchive): readme.write(f'- {filename}' + '\n') -def _create_structures_zip(target, zip_contents, file_url, original_search, host): +def _create_structures_zip( + target, zip_contents, file_url, original_search, host, site_observations +): """Write a ZIP file containing data from an input dictionary.""" logger.info('+ _create_structures_zip(%s)', target.title) @@ -666,7 +692,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host # compile and add metadata.csv if zip_contents['metadata_info']: - _metadate_file_zip(ziparchive, target) + _metadata_file_zip(ziparchive, target, site_observations) if zip_contents['trans_matrix_info']: _trans_matrix_files_zip(ziparchive, target) @@ -766,6 +792,8 @@ def _create_structures_dict(site_obvs, protein_params, other_params): 'artefacts_file', 'pdb_header_file', 'ligand_pdb', + 'ligand_mol', + 'ligand_smiles', 'diff_file', ]: # siteobservation object @@ -880,6 +908,8 @@ def get_download_params(request): 'apo_solv_file': serializer.validated_data['all_aligned_structures'], 'apo_desolv_file': serializer.validated_data['all_aligned_structures'], 'ligand_pdb': serializer.validated_data['all_aligned_structures'], + 'ligand_mol': serializer.validated_data['all_aligned_structures'], + 'ligand_smiles': serializer.validated_data['all_aligned_structures'], 'cif_info': serializer.validated_data['cif_info'], 'mtz_info': serializer.validated_data['mtz_info'], 'map_info': serializer.validated_data['map_info'], @@ -984,7 +1014,14 @@ def create_or_return_download_link(request, target, site_observations): zip_contents = _create_structures_dict( site_observations, protein_params, other_params ) - _create_structures_zip(target, zip_contents, file_url, original_search, host) + _create_structures_zip( + target, + zip_contents, + file_url, + original_search, + host, + site_observations, + ) download_link = DownloadLinks() # Note: 'zip_file' and 'zip_contents' record properties are no longer used. diff --git a/viewer/migrations/0049_auto_20240307_1344.py b/viewer/migrations/0049_auto_20240307_1344.py new file mode 100644 index 00000000..862b6045 --- /dev/null +++ b/viewer/migrations/0049_auto_20240307_1344.py @@ -0,0 +1,36 @@ +# Generated by Django 3.2.23 on 2024-03-07 13:44 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ('viewer', '0048_auto_20240305_1038'), + ] + + operations = [ + migrations.AddField( + model_name='historicalsiteobservation', + name='ligand_mol', + field=models.TextField(max_length=255, null=True), + ), + migrations.AddField( + model_name='historicalsiteobservation', + name='ligand_smiles', + field=models.TextField(max_length=255, null=True), + ), + migrations.AddField( + model_name='siteobservation', + name='ligand_mol', + field=models.FileField( + max_length=255, null=True, upload_to='target_loader_data/' + ), + ), + migrations.AddField( + model_name='siteobservation', + name='ligand_smiles', + field=models.FileField( + max_length=255, null=True, upload_to='target_loader_data/' + ), + ), + ] diff --git a/viewer/models.py b/viewer/models.py index ac9742b8..31336fa4 100644 --- a/viewer/models.py +++ b/viewer/models.py @@ -465,6 +465,12 @@ class SiteObservation(models.Model): seq_id = models.IntegerField() chain_id = models.CharField(max_length=1) ligand_mol_file = models.TextField(null=True) + ligand_mol = models.FileField( + upload_to="target_loader_data/", null=True, max_length=255 + ) + ligand_smiles = models.FileField( + upload_to="target_loader_data/", null=True, max_length=255 + ) ligand_pdb = models.FileField( upload_to="target_loader_data/", null=True, max_length=255 ) diff --git a/viewer/target_loader.py b/viewer/target_loader.py index 5613973f..2590f163 100644 --- a/viewer/target_loader.py +++ b/viewer/target_loader.py @@ -635,7 +635,7 @@ def logfunc(key, message): # memo to self: added type ignore directives to return line # below and append line above because after small refactoring, - # mypy all of the sudden started throwing errors on bothe or + # mypy all of the sudden started throwing errors on both of # these. the core of it's grievance is that it expects the # return type to be list[str]. no idea why, function signature # clearly defines it as list[str | None] @@ -734,7 +734,6 @@ def process_experiment( """ del kwargs assert item_data - assert prefix_tooltips logger.debug("incoming data: %s", item_data) experiment_name, data = item_data @@ -814,8 +813,12 @@ def process_experiment( # version int old versions are kept target loader version = 1 - code_prefix = extract(key="code_prefix") - prefix_tooltip = prefix_tooltips.get(code_prefix, "") + # if empty or key missing entirely, ensure code_prefix returns empty + code_prefix = extract(key="code_prefix", level=logging.INFO) + # ignoring type because tooltip dict can legitimately be empty + # and in such case, assert statement fails. need to remove it + # and use the ignore + prefix_tooltip = prefix_tooltips.get(code_prefix, "") # type: ignore[union-attr] fields = { "code": experiment_name, @@ -1279,7 +1282,7 @@ def process_site_observation( longcode = f"{experiment.code}_{chain}_{str(ligand)}_{str(idx)}" key = f"{experiment.code}/{chain}/{str(ligand)}" - smiles = extract(key="ligand_smiles") + smiles = extract(key="ligand_smiles_string") try: compound = compounds[experiment_id].instance @@ -1319,11 +1322,13 @@ def process_site_observation( apo_desolv_file, apo_file, artefacts_file, - ligand_mol, + ligand_mol_file, sigmaa_file, diff_file, event_file, ligand_pdb, + ligand_mol, + ligand_smiles, ) = self.validate_files( obj_identifier=experiment_id, file_struct=data, @@ -1340,16 +1345,19 @@ def process_site_observation( "diff_map", # NB! keys in meta_aligner not yet updated "event_map", "ligand_pdb", + "ligand_mol", + "ligand_smiles", ), validate_files=validate_files, ) - logger.debug('looking for ligand_mol: %s', ligand_mol) + logger.debug('looking for ligand_mol: %s', ligand_mol_file) + mol_data = None - if ligand_mol: + if ligand_mol_file: with contextlib.suppress(TypeError, FileNotFoundError): with open( - self.raw_data.joinpath(ligand_mol), + self.raw_data.joinpath(ligand_mol_file), "r", encoding="utf-8", ) as f: @@ -1377,6 +1385,8 @@ def process_site_observation( "event_file": str(self._get_final_path(event_file)), "artefacts_file": str(self._get_final_path(artefacts_file)), "ligand_pdb": str(self._get_final_path(ligand_pdb)), + "ligand_mol": str(self._get_final_path(ligand_mol)), + "ligand_smiles": str(self._get_final_path(ligand_smiles)), "pdb_header_file": "currently missing", "ligand_mol_file": mol_data, } @@ -1483,7 +1493,7 @@ def process_bundle(self): self.version_number = meta["version_number"] self.version_dir = meta["version_dir"] self.previous_version_dirs = meta["previous_version_dirs"] - prefix_tooltips = meta["code_prefix_tooltips"] + prefix_tooltips = meta.get("code_prefix_tooltips", {}) # check transformation matrix files ( # pylint: disable=unbalanced-tuple-unpacking @@ -1874,7 +1884,7 @@ def _tag_observations(self, tag, prefix, category, so_list): so_tag = SiteObservationTag() so_tag.tag = tag so_tag.tag_prefix = prefix - so_tag.upload_name = tag + so_tag.upload_name = f"{prefix} - {tag}" so_tag.category = TagCategory.objects.get(category=category) so_tag.target = self.target so_tag.mol_group = so_group diff --git a/viewer/views.py b/viewer/views.py index 8c33b318..1d1600e9 100644 --- a/viewer/views.py +++ b/viewer/views.py @@ -1473,40 +1473,30 @@ def create(self, request): return Response(content, status=status.HTTP_404_NOT_FOUND) logger.info('Found Target record %r', target) - site_obvs = models.SiteObservation.objects.none() - proteins_list = [] - if request.data['proteins']: - logger.info('Given Proteins in request') - # Get first part of protein code - proteins_list = [ - p.strip().split(":")[0] for p in request.data['proteins'].split(',') - ] + proteins_list = [ + p.strip() for p in request.data.get('proteins', '').split(',') if p + ] + if proteins_list: logger.info('Given %s Proteins %s', len(proteins_list), proteins_list) - logger.info('Looking for SiteObservation records for given Proteins...') - # Filter by protein codes - for code_first_part in proteins_list: - # prot = models.Protein.objects.filter(code__contains=code_first_part).values() - # I don't see why I need to drop out of django objects here - prot = models.SiteObservation.objects.filter( - experiment__experiment_upload__target=target, code=code_first_part + + site_obvs = models.SiteObservation.objects.filter( + experiment__experiment_upload__target=target, + code__in=proteins_list, + ) + + missing_obvs = set(proteins_list).difference( + set(site_obvs.values_list('code', flat=True)) + ) + if missing_obvs: + logger.warning( + 'Could not find SiteObservation record for "%s"', + missing_obvs, ) - if prot.exists(): - # even more than just django object, I need an - # unevaluated queryset down the line - site_obvs = models.SiteObservation.objects.filter( - pk=prot.first().pk, - ) - else: - logger.warning( - 'Could not find SiteObservation record for "%s"', - code_first_part, - ) else: logger.info('Request had no Proteins') logger.info('Looking for Protein records for %r...', target) - # proteins = models.Protein.objects.filter(target_id=target.id).values() site_obvs = models.SiteObservation.objects.filter( experiment__experiment_upload__target=target )