From 4a703ed803595ef6f0a2f7b792b771d982866b65 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Mon, 12 Feb 2024 13:03:08 +0000
Subject: [PATCH 01/47] Some changes to cset_upload.py to allow site
 observation short codes (#527)

* stashing

* fix: cset_upload.py updated to allow new-style site observation codes

NB! this probably still won't work! I suspect the file I was given is
broken and I cannot test it further
---
 viewer/cset_upload.py | 25 ++++++++++++++++---------
 viewer/sdf_check.py   |  8 +++++---
 2 files changed, 21 insertions(+), 12 deletions(-)

diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py
index f1e84b6f..9cbeed32 100644
--- a/viewer/cset_upload.py
+++ b/viewer/cset_upload.py
@@ -198,9 +198,12 @@ def get_site_observation(
                 zfile_hashvals=zfile_hashvals,
             )
         else:
-            name = f'{compound_set.target.title}-{pdb_fn}'
+            name = pdb_fn
             try:
-                site_obvs = SiteObservation.objects.get(code__contains=name)
+                site_obvs = SiteObservation.objects.get(
+                    code__contains=name,
+                    experiment__experiment_upload__target__title=target,
+                )
             except SiteObservation.DoesNotExist:
                 # Initial SiteObservation lookup failed.
                 logger.warning(
@@ -210,7 +213,10 @@ def get_site_observation(
                 )
                 # Try alternatives.
                 # If all else fails then the site_obvs will be 'None'
-                qs = SiteObservation.objects.filter(code__contains=name)
+                qs = SiteObservation.objects.filter(
+                    code__contains=name,
+                    experiment__experiment_upload__target__title=target,
+                )
                 if qs.exists():
                     logger.info(
                         'Found SiteObservation containing name=%s qs=%s',
@@ -219,7 +225,10 @@ def get_site_observation(
                     )
                 else:
                     alt_name = name.split(':')[0].split('_')[0]
-                    qs = SiteObservation.objects.filter(code__contains=alt_name)
+                    qs = SiteObservation.objects.filter(
+                        code__contains=alt_name,
+                        experiment__experiment_upload__target__title=target,
+                    )
                     if qs.exists():
                         logger.info(
                             'Found SiteObservation containing alternative name=%s qs=%s',
@@ -328,15 +337,13 @@ def set_mol(
             # try exact match first
             try:
                 site_obvs = SiteObservation.objects.get(
-                    code__contains=str(compound_set.target.title + '-' + i),
+                    code=str(i),
                     experiment__experiment_upload__target_id=compound_set.target,
                 )
                 ref = site_obvs
             except SiteObservation.DoesNotExist:
                 qs = SiteObservation.objects.filter(
-                    code__contains=str(
-                        compound_set.target.title + '-' + i.split(':')[0].split('_')[0]
-                    ),
+                    code=str(i.split(':')[0].split('_')[0]),
                     experiment__experiment_upload__target_id=compound_set.target,
                 )
                 if not qs.exists():
@@ -503,7 +510,7 @@ def set_descriptions(
         computed_set.save()
 
         description_dict = description_mol.GetPropsAsDict()
-        for key in list(description_dict.keys()):
+        for key in description_dict.keys():
             if key in descriptions_needed and key not in [
                 'ref_mols',
                 'ref_pdb',
diff --git a/viewer/sdf_check.py b/viewer/sdf_check.py
index 949f1001..411128e4 100755
--- a/viewer/sdf_check.py
+++ b/viewer/sdf_check.py
@@ -89,10 +89,12 @@ def check_refmol(mol, validate_dict, target=None):
 
         for ref_mol in ref_mols:
             ref_strip = ref_mol.strip()
-            query_string = f'{target}-' + ref_strip.split(':')[0].split('_')[0]
-            query = SiteObservation.objects.filter(code__contains=query_string)
+            query = SiteObservation.objects.filter(
+                code=ref_strip,
+                experiment__experiment_upload__target__title=target,
+            )
             if len(query) == 0:
-                msg = f"No SiteObservation code contains '{query_string}'"
+                msg = f"No SiteObservation code contains '{ref_strip}'"
                 validate_dict = add_warning(
                     molecule_name=mol.GetProp('_Name'),
                     field='ref_mol',

From 433f232d417335f378b5f8e875b74d85e95f6d8c Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Mon, 12 Feb 2024 14:42:43 +0000
Subject: [PATCH 02/47] stashing

---
 viewer/target_loader.py | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index b9665633..0a61455c 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1437,12 +1437,33 @@ def process_bundle(self):
             self.report.log(logging.ERROR, msg)
             raise KeyError(msg) from exc
 
-        # moved this bit from init
+        try:
+            config_inputs = config["inputs"]
+        except KeyError as exc:
+            msg = "'inputs' key missing in config file"
+            self.report.log(logging.ERROR, msg)
+            raise KeyError(msg) from exc
+
+        try:
+            code_prefix = config_inputs[0]["code_prefix"]
+        except KeyError as exc:
+            msg = "'code_prefix' key missing in config file"
+            self.report.log(logging.ERROR, msg)
+            raise KeyError(msg) from exc
+        try:
+            code_prefix_tooltip = config_inputs[0]["code_prefix_tooltip"]
+        except KeyError as exc:
+            msg = "'code_prefix_tooltip' key missing in config file"
+            self.report.log(logging.ERROR, msg)
+            raise KeyError(msg) from exc
+
         self.target, target_created = Target.objects.get_or_create(
             title=self.target_name,
             display_name=self.target_name,
         )
 
+        logger.debug("tooltip: %s", code_prefix_tooltip)
+
         # TODO: original target loader's function get_create_projects
         # seems to handle more cases. adopt or copy
         visit = self.proposal_ref.split()[0]
@@ -1681,20 +1702,21 @@ def process_bundle(self):
                     # technically it should be validated in previous try-catch block
                     logger.error("Non-standard SiteObservation code 2: %s", last)
 
-            logger.debug("iter_pos: %s", iter_pos)
-
             # ... and create new one starting from next item
             suffix = alphanumerator(start_from=iter_pos)
             for so in so_group.filter(code__isnull=True):
-                code = f"{so.experiment.code.split('-')[1]}{next(suffix)}"
+                code = f"{code_prefix}{so.experiment.code.split('-')[1]}{next(suffix)}"
 
                 # test uniqueness for target
                 # TODO: this should ideally be solved by db engine, before
                 # rushing to write the trigger, have think about the
                 # loader concurrency situations
-                prefix = alphanumerator()
-                while code in current_list:
-                    code = f"{next(prefix)}{code}"
+                if code in current_list:
+                    msg = (
+                        f"short code {code} already exists for this target;  "
+                        + "specify a code_prefix to resolve this conflict"
+                    )
+                    self.report.log(logging.ERROR, msg)
 
                 so.code = code
                 so.save()

From 3d255f2240d6a309548fad09a2a54dd3b66c832a Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Tue, 13 Feb 2024 14:49:42 +0000
Subject: [PATCH 03/47] stashing

---
 viewer/views.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viewer/views.py b/viewer/views.py
index 6112f7ba..650945e1 100644
--- a/viewer/views.py
+++ b/viewer/views.py
@@ -1492,7 +1492,7 @@ def create(self, request):
                 # prot = models.Protein.objects.filter(code__contains=code_first_part).values()
                 # I don't see why I need to drop out of django objects here
                 prot = models.SiteObservation.objects.filter(
-                    code__contains=code_first_part
+                    experiment__experiment_upload__target=target, code=code_first_part
                 )
                 if prot.exists():
                     # even more than just django object, I need an

From 7fd97c9569be90769ddce44d1ec5ee011fd14f5e Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Tue, 13 Feb 2024 15:25:32 +0000
Subject: [PATCH 04/47] Short code prefix and tooltip to backend

Target loader now reads short code prefix and tooltip from
meta_aligner.yaml. Tooltip is saved to Experiment model.

TODO: make tooltip available via API
---
 .../0043_experiment_prefix_tooltip.py         | 17 +++++++++
 viewer/models.py                              |  1 +
 viewer/target_loader.py                       | 37 +++++++------------
 3 files changed, 32 insertions(+), 23 deletions(-)
 create mode 100644 viewer/migrations/0043_experiment_prefix_tooltip.py

diff --git a/viewer/migrations/0043_experiment_prefix_tooltip.py b/viewer/migrations/0043_experiment_prefix_tooltip.py
new file mode 100644
index 00000000..93477ed4
--- /dev/null
+++ b/viewer/migrations/0043_experiment_prefix_tooltip.py
@@ -0,0 +1,17 @@
+# Generated by Django 3.2.23 on 2024-02-13 15:12
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('viewer', '0042_alter_xtalformsite_xtalform_site_num'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='experiment',
+            name='prefix_tooltip',
+            field=models.TextField(null=True),
+        ),
+    ]
diff --git a/viewer/models.py b/viewer/models.py
index e3839475..c2b8af72 100644
--- a/viewer/models.py
+++ b/viewer/models.py
@@ -196,6 +196,7 @@ class Experiment(models.Model):
     map_info = ArrayField(models.FileField(max_length=255), null=True)
     type = models.PositiveSmallIntegerField(null=True)
     pdb_sha256 = models.TextField(null=True)
+    prefix_tooltip = models.TextField(null=True)
     compounds = models.ManyToManyField(
         "Compound",
         through="ExperimentCompound",
diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index 0a61455c..d4845174 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -700,6 +700,7 @@ def _enumerate_objects(self, objects: dict, attr: str) -> None:
     def process_experiment(
         self,
         item_data: tuple[str, dict] | None = None,
+        prefix_tooltips: dict[str, str] | None = None,
         validate_files: bool = True,
         **kwargs,
     ) -> ProcessedObject | None:
@@ -734,6 +735,7 @@ def process_experiment(
         """
         del kwargs
         assert item_data
+        assert prefix_tooltips
         logger.debug("incoming data: %s", item_data)
         experiment_name, data = item_data
 
@@ -813,6 +815,9 @@ def process_experiment(
         # version	int	old versions are kept	target loader
         version = 1
 
+        code_prefix = extract(key="code_prefix")
+        prefix_tooltip = prefix_tooltips.get(code_prefix, "")
+
         fields = {
             "code": experiment_name,
         }
@@ -830,6 +835,7 @@ def process_experiment(
             "mtz_info": str(self._get_final_path(mtz_info)),
             "cif_info": str(self._get_final_path(cif_info)),
             "map_info": map_info_paths,
+            "prefix_tooltip": prefix_tooltip,
             # this doesn't seem to be present
             # pdb_sha256:
         }
@@ -839,6 +845,7 @@ def process_experiment(
         index_fields = {
             "xtalform": assigned_xtalform,
             "smiles": smiles,
+            "code_prefix": code_prefix,
         }
 
         return ProcessedObject(
@@ -1437,33 +1444,11 @@ def process_bundle(self):
             self.report.log(logging.ERROR, msg)
             raise KeyError(msg) from exc
 
-        try:
-            config_inputs = config["inputs"]
-        except KeyError as exc:
-            msg = "'inputs' key missing in config file"
-            self.report.log(logging.ERROR, msg)
-            raise KeyError(msg) from exc
-
-        try:
-            code_prefix = config_inputs[0]["code_prefix"]
-        except KeyError as exc:
-            msg = "'code_prefix' key missing in config file"
-            self.report.log(logging.ERROR, msg)
-            raise KeyError(msg) from exc
-        try:
-            code_prefix_tooltip = config_inputs[0]["code_prefix_tooltip"]
-        except KeyError as exc:
-            msg = "'code_prefix_tooltip' key missing in config file"
-            self.report.log(logging.ERROR, msg)
-            raise KeyError(msg) from exc
-
         self.target, target_created = Target.objects.get_or_create(
             title=self.target_name,
             display_name=self.target_name,
         )
 
-        logger.debug("tooltip: %s", code_prefix_tooltip)
-
         # TODO: original target loader's function get_create_projects
         # seems to handle more cases. adopt or copy
         visit = self.proposal_ref.split()[0]
@@ -1496,6 +1481,7 @@ def process_bundle(self):
         self.version_number = meta["version_number"]
         self.version_dir = meta["version_dir"]
         self.previous_version_dirs = meta["previous_version_dirs"]
+        prefix_tooltips = meta["code_prefix_tooltips"]
 
         # check transformation matrix files
         (  # pylint: disable=unbalanced-tuple-unpacking
@@ -1554,7 +1540,9 @@ def process_bundle(self):
             ),
         )
 
-        experiment_objects = self.process_experiment(yaml_data=crystals)
+        experiment_objects = self.process_experiment(
+            yaml_data=crystals, prefix_tooltips=prefix_tooltips
+        )
         compound_objects = self.process_compound(
             yaml_data=crystals, experiments=experiment_objects
         )
@@ -1705,6 +1693,9 @@ def process_bundle(self):
             # ... and create new one starting from next item
             suffix = alphanumerator(start_from=iter_pos)
             for so in so_group.filter(code__isnull=True):
+                code_prefix = experiment_objects[so.experiment.code].index_data[
+                    "code_prefix"
+                ]
                 code = f"{code_prefix}{so.experiment.code.split('-')[1]}{next(suffix)}"
 
                 # test uniqueness for target

From f43eabf186a52518693a1f26c8b2338df2913bd5 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Wed, 14 Feb 2024 12:18:23 +0000
Subject: [PATCH 05/47] Prefix tooltip now serverd by api/site_observation

---
 viewer/managers.py      |  1 +
 viewer/serializers.py   |  1 +
 viewer/target_loader.py | 10 +++++-----
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/viewer/managers.py b/viewer/managers.py
index 836ff422..7a1a4826 100644
--- a/viewer/managers.py
+++ b/viewer/managers.py
@@ -17,6 +17,7 @@ def filter_qs(self):
         ).annotate(
             target=F("experiment__experiment_upload__target"),
             compound_code=F("cmpd__compound_code"),
+            prefix_tooltip=F("experiment__prefix_tooltip"),
         )
 
         return qs
diff --git a/viewer/serializers.py b/viewer/serializers.py
index c969d3da..15093985 100644
--- a/viewer/serializers.py
+++ b/viewer/serializers.py
@@ -951,6 +951,7 @@ class Meta:
 
 class SiteObservationReadSerializer(serializers.ModelSerializer):
     compound_code = serializers.StringRelatedField()
+    prefix_tooltip = serializers.StringRelatedField()
 
     class Meta:
         model = models.SiteObservation
diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index d4845174..968a6305 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1652,16 +1652,13 @@ def process_bundle(self):
             canon_site_confs=canon_site_conf_objects,
         )
 
-        values = ["xtalform_site__xtalform", "canon_site_conf__canon_site", "cmpd"]
+        values = ["canon_site_conf__canon_site", "cmpd"]
         qs = (
             SiteObservation.objects.values(*values)
             .order_by(*values)
             .annotate(obvs=ArrayAgg("id"))
             .values_list("obvs", flat=True)
         )
-        current_list = SiteObservation.objects.filter(
-            experiment__experiment_upload__target=self.target
-        ).values_list('code', flat=True)
         for elem in qs:
             # objects in this group should be named with same scheme
             so_group = SiteObservation.objects.filter(pk__in=elem)
@@ -1702,7 +1699,10 @@ def process_bundle(self):
                 # TODO: this should ideally be solved by db engine, before
                 # rushing to write the trigger, have think about the
                 # loader concurrency situations
-                if code in current_list:
+                if SiteObservation.objects.filter(
+                    experiment__experiment_upload__target=self.target,
+                    code=code,
+                ).exists():
                     msg = (
                         f"short code {code} already exists for this target;  "
                         + "specify a code_prefix to resolve this conflict"

From 118756972dfc6a9c36ae0b654fb3d0e91426a72d Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Wed, 14 Feb 2024 14:10:17 +0000
Subject: [PATCH 06/47] stashing

---
 viewer/download_structures.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 65f94efc..ee4b5766 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -11,6 +11,7 @@
 import shutil
 import uuid
 import zipfile
+from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
 from io import BytesIO
 from pathlib import Path
@@ -49,6 +50,13 @@
     'readme': (''),
 }
 
+
+@dataclass
+class MetadataObject:
+    path: str
+    archive_path: str
+
+
 # Dictionary containing all references needed to create the zip file
 # NB you may need to add a version number to this at some point...
 zip_template = {

From 632719ac1365522c27ce7faeebac874740b36b14 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Wed, 14 Feb 2024 14:15:01 +0000
Subject: [PATCH 07/47] Site observation groups for shortcodes now by
 experiment

---
 viewer/target_loader.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index 968a6305..8878bda9 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1652,7 +1652,8 @@ def process_bundle(self):
             canon_site_confs=canon_site_conf_objects,
         )
 
-        values = ["canon_site_conf__canon_site", "cmpd"]
+        # values = ["canon_site_conf__canon_site", "cmpd"]
+        values = ["experiment"]
         qs = (
             SiteObservation.objects.values(*values)
             .order_by(*values)

From 30c10809989491c79b40ee7855f2c4a890d9ca3e Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 15 Feb 2024 13:22:55 +0000
Subject: [PATCH 08/47] feat: download structure fixed

TODO: add all the yamls
---
 viewer/download_structures.py | 133 ++++++++++++++++++++++++----------
 1 file changed, 94 insertions(+), 39 deletions(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index ee4b5766..9bddb0e2 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -51,8 +51,8 @@
 }
 
 
-@dataclass
-class MetadataObject:
+@dataclass(frozen=True)
+class ArchiveFile:
     path: str
     archive_path: str
 
@@ -224,7 +224,7 @@ def _read_and_patch_molecule_name(path, molecule_name=None):
     return content
 
 
-def _add_file_to_zip_aligned(ziparchive, code, filepath):
+def _add_file_to_zip_aligned(ziparchive, code, archive_file):
     """Add the requested file to the zip archive.
 
     If the file is an SDF or MOL we insert the name of the molecule
@@ -238,39 +238,32 @@ def _add_file_to_zip_aligned(ziparchive, code, filepath):
     Returns:
         [boolean]: [True of record added to archive]
     """
-    logger.debug('+_add_file_to_zip_aligned: %s, %s', code, filepath)
-    if not filepath:
+    logger.debug('+_add_file_to_zip_aligned: %s, %s', code, archive_file)
+    if not archive_file:
         # Odd - assume success
         logger.error('No filepath value')
         return True
 
-    # Incoming filepath can be both str and FieldFile
-    try:
-        filepath = filepath.path
-    except AttributeError:
-        filepath = str(Path(settings.MEDIA_ROOT).joinpath(filepath))
-
-    # strip off the leading parts of path
-    archive_path = str(Path(*Path(filepath).parts[7:]))
+    filepath = str(Path(settings.MEDIA_ROOT).joinpath(archive_file.path))
     if Path(filepath).is_file():
         if _is_mol_or_sdf(filepath):
             # It's a MOL or SD file.
             # Read and (potentially) adjust the file
             # and add to the archive as a string.
             content = _read_and_patch_molecule_name(filepath, molecule_name=code)
-            ziparchive.writestr(archive_path, content)
+            ziparchive.writestr(archive_file.archive_path, content)
         else:
             # Copy the file without modification
-            ziparchive.write(filepath, archive_path)
+            ziparchive.write(filepath, archive_file.archive_path)
         return True
     else:
         logger.warning('filepath "%s" is not a file', filepath)
-        _add_empty_file(ziparchive, archive_path)
+        _add_empty_file(ziparchive, archive_file.archive_path)
 
     return False
 
 
-def _add_file_to_sdf(combined_sdf_file, filepath):
+def _add_file_to_sdf(combined_sdf_file, archive_file):
     """Append the requested sdf file to the single sdf file provided.
 
     Args:
@@ -282,19 +275,19 @@ def _add_file_to_sdf(combined_sdf_file, filepath):
     """
     media_root = settings.MEDIA_ROOT
 
-    if not filepath:
+    if not archive_file.path:
         # Odd - assume success
         logger.error('No filepath value')
         return True
 
-    fullpath = os.path.join(media_root, filepath)
+    fullpath = os.path.join(media_root, archive_file.path)
     if os.path.isfile(fullpath):
         with open(combined_sdf_file, 'a', encoding='utf-8') as f_out:
             patched_sdf_content = _read_and_patch_molecule_name(fullpath)
             f_out.write(patched_sdf_content)
         return True
     else:
-        logger.warning('filepath "%s" is not a file', filepath)
+        logger.warning('filepath "%s" is not a file', archive_file.path)
 
     return False
 
@@ -309,11 +302,8 @@ def _protein_files_zip(zip_contents, ziparchive, error_file):
             continue
 
         for prot, prot_file in files.items():
-            # if it's a list of files (map_info) instead of single file
-            if not isinstance(prot_file, list):
-                prot_file = [prot_file]
             for f in prot_file:
-                if not _add_file_to_zip_aligned(ziparchive, prot.split(":")[0], f):
+                if not _add_file_to_zip_aligned(ziparchive, prot, f):
                     error_file.write(f'{param},{prot},{f}\n')
                     prot_errors += 1
 
@@ -341,14 +331,14 @@ def _molecule_files_zip(zip_contents, ziparchive, combined_sdf_file, error_file)
         ] is True and not _add_file_to_zip_aligned(
             ziparchive, prot.split(":")[0], file
         ):
-            error_file.write(f'sdf_info,{prot},{file}\n')
+            error_file.write(f'sdf_info,{prot},{file.path}\n')
             mol_errors += 1
 
         # Append sdf file on the Molecule record to the combined_sdf_file.
         if zip_contents['molecules'][
             'single_sdf_file'
         ] is True and not _add_file_to_sdf(combined_sdf_file, file):
-            error_file.write(f'single_sdf_file,{prot},{file}\n')
+            error_file.write(f'single_sdf_file,{prot},{file.path}\n')
             mol_errors += 1
 
     return mol_errors
@@ -633,21 +623,79 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params):
     for so in site_obvs:
         for param in protein_params:
             if protein_params[param] is True:
-                try:
-                    # getting the param from experiment. more data are
-                    # coming from there, that's why this is in try
-                    # block
+                if param in ['pdb_info', 'mtz_info', 'cif_info', 'map_info']:
+                    # experiment object
                     model_attr = getattr(so.experiment, param)
-                    # getattr retrieves FieldFile object, hence the .name
-                    if isinstance(model_attr, list):
-                        # except map_files, this returns a list of files
-                        zip_contents['proteins'][param][so.code] = model_attr
+                    logger.debug(
+                        'Adding param to zip: %s, value: %s', param, model_attr
+                    )
+                    if param != 'map_info':
+                        # treat all params as list
+                        model_attr = (
+                            [model_attr.name]
+                            # None - some weird glitch in storing the values
+                            if model_attr and not str(model_attr).find('None') > -1
+                            else [param]
+                        )
+
+                    afile = []
+                    for f in model_attr:
+                        # here the model_attr is already stringified
+                        if model_attr and model_attr != 'None':
+                            archive_path = str(
+                                Path('crystallographic_files')
+                                .joinpath(so.code)
+                                .joinpath(
+                                    Path(f)
+                                    .parts[-1]
+                                    .replace(so.experiment.code, so.code)
+                                )
+                            )
+                        else:
+                            archive_path = param
+                        afile.append(ArchiveFile(path=f, archive_path=archive_path))
+
+                elif param in [
+                    'bound_file',
+                    'apo_solv_file',
+                    'apo_desolv_file',
+                    'apo_file',
+                    'sigmaa_file',
+                    'event_file',
+                    'artefacts_file',
+                    'pdb_header_file',
+                    'diff_file',
+                ]:
+                    # siteobservation object
+
+                    model_attr = getattr(so, param)
+                    logger.debug(
+                        'Adding param to zip: %s, value: %s', param, model_attr
+                    )
+                    if model_attr and model_attr != 'None':
+                        archive_path = str(
+                            Path('aligned_files')
+                            .joinpath(so.code)
+                            .joinpath(
+                                Path(model_attr.name)
+                                .parts[-1]
+                                .replace(so.longcode, so.code)
+                            )
+                        )
                     else:
-                        zip_contents['proteins'][param][so.code] = model_attr.name
+                        archive_path = param
 
-                except AttributeError:
-                    # on the off chance that the data are in site_observation model
-                    zip_contents['proteins'][param][so.code] = getattr(so, param).name
+                    afile = [
+                        ArchiveFile(
+                            path=model_attr.name,
+                            archive_path=archive_path,
+                        )
+                    ]
+                else:
+                    logger.warning('Unexpected param: %s', param)
+                    continue
+
+                zip_contents['proteins'][param][so.code] = afile
 
     if other_params['single_sdf_file'] is True:
         zip_contents['molecules']['single_sdf_file'] = True
@@ -674,7 +722,14 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params):
 
             if rel_sd_file:
                 logger.debug('rel_sd_file=%s code=%s', rel_sd_file, so.code)
-                zip_contents['molecules']['sdf_files'].update({rel_sd_file: so.code})
+                zip_contents['molecules']['sdf_files'].update(
+                    {
+                        ArchiveFile(
+                            path=rel_sd_file,
+                            archive_path=rel_sd_file,
+                        ): so.code
+                    }
+                )
                 num_molecules_collected += 1
 
         # Report (in the log) anomalies

From 1744688c169c7a30d27d0d5ffd3c76d9afdbcc5a Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 15 Feb 2024 14:35:19 +0000
Subject: [PATCH 09/47] All yaml files added to download

---
 viewer/download_structures.py | 42 +++++++++++++++++++++++++++++++++++
 1 file changed, 42 insertions(+)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 9bddb0e2..59e7ceb9 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -446,6 +446,46 @@ def _extra_files_zip(ziparchive, target):
         logger.info('Processed %s extra files', num_processed)
 
 
+def _yaml_files_zip(ziparchive, target):
+    """Add all yaml files (except transforms) from upload to ziparchive"""
+
+    for experiment_upload in target.experimentupload_set.order_by('commit_datetime'):
+        yaml_paths = (
+            Path(settings.MEDIA_ROOT)
+            .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY)
+            .joinpath(experiment_upload.task_id)
+        )
+
+        transforms = [
+            Path(f.name).name
+            for f in (
+                experiment_upload.neighbourhood_transforms,
+                experiment_upload.neighbourhood_transforms,
+                experiment_upload.neighbourhood_transforms,
+            )
+        ]
+        # taking the latest upload for now
+        # add unpacked zip directory
+        yaml_paths = [d for d in list(yaml_paths.glob("*")) if d.is_dir()][0]
+
+        # add upload_[d] dir
+        yaml_paths = next(yaml_paths.glob("upload_*"))
+
+        archive_path = Path('yaml_files').joinpath(yaml_paths.parts[-1])
+
+        yaml_files = [
+            f
+            for f in list(yaml_paths.glob("*.yaml"))
+            if f.is_file() and f.name not in transforms
+        ]
+
+        logger.info('Processing yaml files (%s)...', yaml_files)
+
+        for file in yaml_files:
+            logger.info('Adding yaml file "%s"...', file)
+            ziparchive.write(file, str(Path(archive_path).joinpath(file.name)))
+
+
 def _document_file_zip(ziparchive, download_path, original_search, host):
     """Create the document file
     This consists of a template plus an added contents description.
@@ -581,6 +621,8 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
 
         _extra_files_zip(ziparchive, target)
 
+        _yaml_files_zip(ziparchive, target)
+
         _document_file_zip(ziparchive, download_path, original_search, host)
 
         error_file.close()

From 417126f42fb41db4085a7a2bf01f13225864217a Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 15 Feb 2024 15:04:26 +0000
Subject: [PATCH 10/47] New format to download zip (issue 1326) (#530)

* stashing

* stashing

* feat: download structure fixed

TODO: add all the yamls

* All yaml files added to download
---
 viewer/download_structures.py | 179 +++++++++++++++++++++++++++-------
 viewer/views.py               |   2 +-
 2 files changed, 143 insertions(+), 38 deletions(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 65f94efc..59e7ceb9 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -11,6 +11,7 @@
 import shutil
 import uuid
 import zipfile
+from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
 from io import BytesIO
 from pathlib import Path
@@ -49,6 +50,13 @@
     'readme': (''),
 }
 
+
+@dataclass(frozen=True)
+class ArchiveFile:
+    path: str
+    archive_path: str
+
+
 # Dictionary containing all references needed to create the zip file
 # NB you may need to add a version number to this at some point...
 zip_template = {
@@ -216,7 +224,7 @@ def _read_and_patch_molecule_name(path, molecule_name=None):
     return content
 
 
-def _add_file_to_zip_aligned(ziparchive, code, filepath):
+def _add_file_to_zip_aligned(ziparchive, code, archive_file):
     """Add the requested file to the zip archive.
 
     If the file is an SDF or MOL we insert the name of the molecule
@@ -230,39 +238,32 @@ def _add_file_to_zip_aligned(ziparchive, code, filepath):
     Returns:
         [boolean]: [True of record added to archive]
     """
-    logger.debug('+_add_file_to_zip_aligned: %s, %s', code, filepath)
-    if not filepath:
+    logger.debug('+_add_file_to_zip_aligned: %s, %s', code, archive_file)
+    if not archive_file:
         # Odd - assume success
         logger.error('No filepath value')
         return True
 
-    # Incoming filepath can be both str and FieldFile
-    try:
-        filepath = filepath.path
-    except AttributeError:
-        filepath = str(Path(settings.MEDIA_ROOT).joinpath(filepath))
-
-    # strip off the leading parts of path
-    archive_path = str(Path(*Path(filepath).parts[7:]))
+    filepath = str(Path(settings.MEDIA_ROOT).joinpath(archive_file.path))
     if Path(filepath).is_file():
         if _is_mol_or_sdf(filepath):
             # It's a MOL or SD file.
             # Read and (potentially) adjust the file
             # and add to the archive as a string.
             content = _read_and_patch_molecule_name(filepath, molecule_name=code)
-            ziparchive.writestr(archive_path, content)
+            ziparchive.writestr(archive_file.archive_path, content)
         else:
             # Copy the file without modification
-            ziparchive.write(filepath, archive_path)
+            ziparchive.write(filepath, archive_file.archive_path)
         return True
     else:
         logger.warning('filepath "%s" is not a file', filepath)
-        _add_empty_file(ziparchive, archive_path)
+        _add_empty_file(ziparchive, archive_file.archive_path)
 
     return False
 
 
-def _add_file_to_sdf(combined_sdf_file, filepath):
+def _add_file_to_sdf(combined_sdf_file, archive_file):
     """Append the requested sdf file to the single sdf file provided.
 
     Args:
@@ -274,19 +275,19 @@ def _add_file_to_sdf(combined_sdf_file, filepath):
     """
     media_root = settings.MEDIA_ROOT
 
-    if not filepath:
+    if not archive_file.path:
         # Odd - assume success
         logger.error('No filepath value')
         return True
 
-    fullpath = os.path.join(media_root, filepath)
+    fullpath = os.path.join(media_root, archive_file.path)
     if os.path.isfile(fullpath):
         with open(combined_sdf_file, 'a', encoding='utf-8') as f_out:
             patched_sdf_content = _read_and_patch_molecule_name(fullpath)
             f_out.write(patched_sdf_content)
         return True
     else:
-        logger.warning('filepath "%s" is not a file', filepath)
+        logger.warning('filepath "%s" is not a file', archive_file.path)
 
     return False
 
@@ -301,11 +302,8 @@ def _protein_files_zip(zip_contents, ziparchive, error_file):
             continue
 
         for prot, prot_file in files.items():
-            # if it's a list of files (map_info) instead of single file
-            if not isinstance(prot_file, list):
-                prot_file = [prot_file]
             for f in prot_file:
-                if not _add_file_to_zip_aligned(ziparchive, prot.split(":")[0], f):
+                if not _add_file_to_zip_aligned(ziparchive, prot, f):
                     error_file.write(f'{param},{prot},{f}\n')
                     prot_errors += 1
 
@@ -333,14 +331,14 @@ def _molecule_files_zip(zip_contents, ziparchive, combined_sdf_file, error_file)
         ] is True and not _add_file_to_zip_aligned(
             ziparchive, prot.split(":")[0], file
         ):
-            error_file.write(f'sdf_info,{prot},{file}\n')
+            error_file.write(f'sdf_info,{prot},{file.path}\n')
             mol_errors += 1
 
         # Append sdf file on the Molecule record to the combined_sdf_file.
         if zip_contents['molecules'][
             'single_sdf_file'
         ] is True and not _add_file_to_sdf(combined_sdf_file, file):
-            error_file.write(f'single_sdf_file,{prot},{file}\n')
+            error_file.write(f'single_sdf_file,{prot},{file.path}\n')
             mol_errors += 1
 
     return mol_errors
@@ -448,6 +446,46 @@ def _extra_files_zip(ziparchive, target):
         logger.info('Processed %s extra files', num_processed)
 
 
+def _yaml_files_zip(ziparchive, target):
+    """Add all yaml files (except transforms) from upload to ziparchive"""
+
+    for experiment_upload in target.experimentupload_set.order_by('commit_datetime'):
+        yaml_paths = (
+            Path(settings.MEDIA_ROOT)
+            .joinpath(settings.TARGET_LOADER_MEDIA_DIRECTORY)
+            .joinpath(experiment_upload.task_id)
+        )
+
+        transforms = [
+            Path(f.name).name
+            for f in (
+                experiment_upload.neighbourhood_transforms,
+                experiment_upload.neighbourhood_transforms,
+                experiment_upload.neighbourhood_transforms,
+            )
+        ]
+        # taking the latest upload for now
+        # add unpacked zip directory
+        yaml_paths = [d for d in list(yaml_paths.glob("*")) if d.is_dir()][0]
+
+        # add upload_[d] dir
+        yaml_paths = next(yaml_paths.glob("upload_*"))
+
+        archive_path = Path('yaml_files').joinpath(yaml_paths.parts[-1])
+
+        yaml_files = [
+            f
+            for f in list(yaml_paths.glob("*.yaml"))
+            if f.is_file() and f.name not in transforms
+        ]
+
+        logger.info('Processing yaml files (%s)...', yaml_files)
+
+        for file in yaml_files:
+            logger.info('Adding yaml file "%s"...', file)
+            ziparchive.write(file, str(Path(archive_path).joinpath(file.name)))
+
+
 def _document_file_zip(ziparchive, download_path, original_search, host):
     """Create the document file
     This consists of a template plus an added contents description.
@@ -583,6 +621,8 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
 
         _extra_files_zip(ziparchive, target)
 
+        _yaml_files_zip(ziparchive, target)
+
         _document_file_zip(ziparchive, download_path, original_search, host)
 
         error_file.close()
@@ -625,21 +665,79 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params):
     for so in site_obvs:
         for param in protein_params:
             if protein_params[param] is True:
-                try:
-                    # getting the param from experiment. more data are
-                    # coming from there, that's why this is in try
-                    # block
+                if param in ['pdb_info', 'mtz_info', 'cif_info', 'map_info']:
+                    # experiment object
                     model_attr = getattr(so.experiment, param)
-                    # getattr retrieves FieldFile object, hence the .name
-                    if isinstance(model_attr, list):
-                        # except map_files, this returns a list of files
-                        zip_contents['proteins'][param][so.code] = model_attr
+                    logger.debug(
+                        'Adding param to zip: %s, value: %s', param, model_attr
+                    )
+                    if param != 'map_info':
+                        # treat all params as list
+                        model_attr = (
+                            [model_attr.name]
+                            # None - some weird glitch in storing the values
+                            if model_attr and not str(model_attr).find('None') > -1
+                            else [param]
+                        )
+
+                    afile = []
+                    for f in model_attr:
+                        # here the model_attr is already stringified
+                        if model_attr and model_attr != 'None':
+                            archive_path = str(
+                                Path('crystallographic_files')
+                                .joinpath(so.code)
+                                .joinpath(
+                                    Path(f)
+                                    .parts[-1]
+                                    .replace(so.experiment.code, so.code)
+                                )
+                            )
+                        else:
+                            archive_path = param
+                        afile.append(ArchiveFile(path=f, archive_path=archive_path))
+
+                elif param in [
+                    'bound_file',
+                    'apo_solv_file',
+                    'apo_desolv_file',
+                    'apo_file',
+                    'sigmaa_file',
+                    'event_file',
+                    'artefacts_file',
+                    'pdb_header_file',
+                    'diff_file',
+                ]:
+                    # siteobservation object
+
+                    model_attr = getattr(so, param)
+                    logger.debug(
+                        'Adding param to zip: %s, value: %s', param, model_attr
+                    )
+                    if model_attr and model_attr != 'None':
+                        archive_path = str(
+                            Path('aligned_files')
+                            .joinpath(so.code)
+                            .joinpath(
+                                Path(model_attr.name)
+                                .parts[-1]
+                                .replace(so.longcode, so.code)
+                            )
+                        )
                     else:
-                        zip_contents['proteins'][param][so.code] = model_attr.name
+                        archive_path = param
+
+                    afile = [
+                        ArchiveFile(
+                            path=model_attr.name,
+                            archive_path=archive_path,
+                        )
+                    ]
+                else:
+                    logger.warning('Unexpected param: %s', param)
+                    continue
 
-                except AttributeError:
-                    # on the off chance that the data are in site_observation model
-                    zip_contents['proteins'][param][so.code] = getattr(so, param).name
+                zip_contents['proteins'][param][so.code] = afile
 
     if other_params['single_sdf_file'] is True:
         zip_contents['molecules']['single_sdf_file'] = True
@@ -666,7 +764,14 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params):
 
             if rel_sd_file:
                 logger.debug('rel_sd_file=%s code=%s', rel_sd_file, so.code)
-                zip_contents['molecules']['sdf_files'].update({rel_sd_file: so.code})
+                zip_contents['molecules']['sdf_files'].update(
+                    {
+                        ArchiveFile(
+                            path=rel_sd_file,
+                            archive_path=rel_sd_file,
+                        ): so.code
+                    }
+                )
                 num_molecules_collected += 1
 
         # Report (in the log) anomalies
diff --git a/viewer/views.py b/viewer/views.py
index 6112f7ba..650945e1 100644
--- a/viewer/views.py
+++ b/viewer/views.py
@@ -1492,7 +1492,7 @@ def create(self, request):
                 # prot = models.Protein.objects.filter(code__contains=code_first_part).values()
                 # I don't see why I need to drop out of django objects here
                 prot = models.SiteObservation.objects.filter(
-                    code__contains=code_first_part
+                    experiment__experiment_upload__target=target, code=code_first_part
                 )
                 if prot.exists():
                     # even more than just django object, I need an

From ef6d56a47455cbde59a5819cf0bf7ccd7547b01e Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 16 Feb 2024 14:19:00 +0000
Subject: [PATCH 11/47] cset_upload.py: lhs_pdb renamed to ref_pdb

---
 viewer/cset_upload.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py
index 9cbeed32..bb4e704f 100644
--- a/viewer/cset_upload.py
+++ b/viewer/cset_upload.py
@@ -363,7 +363,7 @@ def set_mol(
         # Try to get the LHS SiteObservation,
         # This will be used to set the ComputedMolecule.site_observation_code.
         # This may fail.
-        lhs_property = 'lhs_pdb'
+        lhs_property = 'ref_pdb'
         lhs_so = self.get_site_observation(
             lhs_property,
             mol,

From be127621538194a7851399970652c5b3cdc577d4 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 16 Feb 2024 14:43:59 +0000
Subject: [PATCH 12/47] Renamed canon- and conf site tags

---
 viewer/target_loader.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index 8878bda9..edad072e 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1724,7 +1724,7 @@ def process_bundle(self):
 
         # tag site observations
         for val in canon_site_objects.values():  # pylint: disable=no-member
-            tag = f"{val.instance.canon_site_num} - {val.instance.name}"
+            tag = f"{val.instance.canon_site_num} - {''.join(val.instance.name.split('+')[1:-1])}"
             so_list = SiteObservation.objects.filter(
                 canon_site_conf__canon_site=val.instance
             )
@@ -1739,7 +1739,7 @@ def process_bundle(self):
             tag = (
                 f"{val.instance.canon_site.canon_site_num}"
                 + f"{next(numerators[val.instance.canon_site.canon_site_num])}"
-                + f" - {val.instance.name}"
+                + f" - {val.instance.name.split('+')[0]}"
             )
             so_list = [
                 site_observation_objects[strip_version(k)].instance

From f3483bb919db9db5836a42f3a30566f04bd2c366 Mon Sep 17 00:00:00 2001
From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com>
Date: Mon, 19 Feb 2024 16:29:08 +0100
Subject: [PATCH 13/47] Adds support for key-based SSH connections (#534)

* Centralised environment variables (#529)

* refactor: Restructured settings.py

* docs: Minor tweaks

* refactor: Move security and infection config to settings

* refactor: b/e & f/e/ tags now in settings (also fixed f/e tag value)

* refactor: Move Neo4j config to settings

* refactor: More variables into settings

* refactor: Moved remaining config

* docs: Adds configuration guide as comments

* docs: Variable prefix now 'stack_' not 'stack_env_'

---------

Co-authored-by: Alan Christie <alan.christie@matildapeak.com>

* feat: Adds support for private keys on SSH tunnel

* fix: Fixes key-based logic

---------

Co-authored-by: Alan Christie <alan.christie@matildapeak.com>
---
 api/infections.py             |  12 +-
 api/remote_ispyb_connector.py |  54 ++-
 api/security.py               |  39 +--
 fragalysis/settings.py        | 623 ++++++++++++++++++++--------------
 fragalysis/views.py           |  39 +--
 network/views.py              |  13 +-
 viewer/serializers.py         |   4 +-
 viewer/services.py            |   5 +-
 viewer/squonk2_agent.py       |  63 ++--
 viewer/views.py               |   2 +-
 10 files changed, 463 insertions(+), 391 deletions(-)

diff --git a/api/infections.py b/api/infections.py
index c1eb6cab..4143c585 100644
--- a/api/infections.py
+++ b/api/infections.py
@@ -4,9 +4,10 @@
 # Infections are injected into the application via the environment variable
 # 'INFECTIONS', a comma-separated list of infection names.
 
-import os
 from typing import Dict, Set
 
+from django.conf import settings
+
 from api.utils import deployment_mode_is_production
 
 # The built-in set of infections.
@@ -20,9 +21,6 @@
     INFECTION_STRUCTURE_DOWNLOAD: 'An error in the DownloadStructures view'
 }
 
-# What infection have been set?
-_INFECTIONS: str = os.environ.get('INFECTIONS', '').lower()
-
 
 def have_infection(name: str) -> bool:
     """Returns True if we've been given the named infection.
@@ -31,9 +29,11 @@ def have_infection(name: str) -> bool:
 
 
 def _get_infections() -> Set[str]:
-    if _INFECTIONS == '':
+    if settings.INFECTIONS == '':
         return set()
     infections: set[str] = {
-        infection for infection in _INFECTIONS.split(',') if infection in _CATALOGUE
+        infection
+        for infection in settings.INFECTIONS.split(',')
+        if infection in _CATALOGUE
     }
     return infections
diff --git a/api/remote_ispyb_connector.py b/api/remote_ispyb_connector.py
index 56fce7dc..398f3473 100644
--- a/api/remote_ispyb_connector.py
+++ b/api/remote_ispyb_connector.py
@@ -28,6 +28,7 @@ def __init__(
         remote=False,
         ssh_user=None,
         ssh_password=None,
+        ssh_private_key_filename=None,
         ssh_host=None,
         conn_inactivity=360,
     ):
@@ -45,6 +46,7 @@ def __init__(
                 'ssh_host': ssh_host,
                 'ssh_user': ssh_user,
                 'ssh_pass': ssh_password,
+                'ssh_pkey': ssh_private_key_filename,
                 'db_host': host,
                 'db_port': int(port),
                 'db_user': user,
@@ -53,12 +55,11 @@ def __init__(
             }
             self.remote_connect(**creds)
             logger.debug(
-                "Started host=%s username=%s local_bind_port=%s",
+                "Started remote ssh_host=%s ssh_user=%s local_bind_port=%s",
                 ssh_host,
                 ssh_user,
                 self.server.local_bind_port,
             )
-
         else:
             self.connect(
                 user=user,
@@ -68,29 +69,60 @@ def __init__(
                 port=port,
                 conn_inactivity=conn_inactivity,
             )
-            logger.debug("Started host=%s user=%s port=%s", host, user, port)
+            logger.debug("Started direct host=%s user=%s port=%s", host, user, port)
 
     def remote_connect(
-        self, ssh_host, ssh_user, ssh_pass, db_host, db_port, db_user, db_pass, db_name
+        self,
+        ssh_host,
+        ssh_user,
+        ssh_pass,
+        ssh_pkey,
+        db_host,
+        db_port,
+        db_user,
+        db_pass,
+        db_name,
     ):
         sshtunnel.SSH_TIMEOUT = 10.0
         sshtunnel.TUNNEL_TIMEOUT = 10.0
         sshtunnel.DEFAULT_LOGLEVEL = logging.CRITICAL
         self.conn_inactivity = int(self.conn_inactivity)
 
-        self.server = sshtunnel.SSHTunnelForwarder(
-            (ssh_host),
-            ssh_username=ssh_user,
-            ssh_password=ssh_pass,
-            remote_bind_address=(db_host, db_port),
-        )
+        if ssh_pkey:
+            logger.debug(
+                'Creating SSHTunnelForwarder (with SSH Key) host=%s user=%s',
+                ssh_host,
+                ssh_user,
+            )
+            self.server = sshtunnel.SSHTunnelForwarder(
+                (ssh_host),
+                ssh_username=ssh_user,
+                ssh_pkey=ssh_pkey,
+                remote_bind_address=(db_host, db_port),
+            )
+        else:
+            logger.debug(
+                'Creating SSHTunnelForwarder (with password) host=%s user=%s',
+                ssh_host,
+                ssh_user,
+            )
+            self.server = sshtunnel.SSHTunnelForwarder(
+                (ssh_host),
+                ssh_username=ssh_user,
+                ssh_password=ssh_pass,
+                remote_bind_address=(db_host, db_port),
+            )
+        logger.debug('Created SSHTunnelForwarder')
 
         # stops hanging connections in transport
         self.server.daemon_forward_servers = True
         self.server.daemon_transport = True
 
+        logger.debug('Starting SSH server...')
         self.server.start()
+        logger.debug('Started SSH server')
 
+        logger.debug('Connecting to ISPyB (db_user=%s db_name=%s)...', db_user, db_name)
         self.conn = pymysql.connect(
             user=db_user,
             password=db_pass,
@@ -100,8 +132,10 @@ def remote_connect(
         )
 
         if self.conn is not None:
+            logger.debug('Connected')
             self.conn.autocommit = True
         else:
+            logger.debug('Failed to connect')
             self.server.stop()
             raise ISPyBConnectionException
         self.last_activity_ts = time.time()
diff --git a/api/security.py b/api/security.py
index eafc31fe..01605352 100644
--- a/api/security.py
+++ b/api/security.py
@@ -48,40 +48,41 @@
 
 
 def get_remote_conn() -> Optional[SSHConnector]:
-    ispyb_credentials: Dict[str, Any] = {
-        "user": os.environ.get("ISPYB_USER"),
-        "pw": os.environ.get("ISPYB_PASSWORD"),
-        "host": os.environ.get("ISPYB_HOST"),
-        "port": os.environ.get("ISPYB_PORT"),
+    credentials: Dict[str, Any] = {
+        "user": settings.ISPYB_USER,
+        "pw": settings.ISPYB_PASSWORD,
+        "host": settings.ISPYB_HOST,
+        "port": settings.ISPYB_PORT,
         "db": "ispyb",
         "conn_inactivity": 360,
     }
 
     ssh_credentials: Dict[str, Any] = {
-        'ssh_host': os.environ.get("SSH_HOST"),
-        'ssh_user': os.environ.get("SSH_USER"),
-        'ssh_password': os.environ.get("SSH_PASSWORD"),
+        'ssh_host': settings.SSH_HOST,
+        'ssh_user': settings.SSH_USER,
+        'ssh_password': settings.SSH_PASSWORD,
+        "ssh_private_key_filename": settings.SSH_PRIVATE_KEY_FILENAME,
         'remote': True,
     }
 
-    ispyb_credentials.update(**ssh_credentials)
+    credentials.update(**ssh_credentials)
 
     # Caution: Credentials may not be set in the environment.
     #          Assume the credentials are invalid if there is no host.
     #          If a host is not defined other properties are useless.
-    if not ispyb_credentials["host"]:
+    if not credentials["host"]:
         logger.debug("No ISPyB host - cannot return a connector")
         return None
 
     # Try to get an SSH connection (aware that it might fail)
     conn: Optional[SSHConnector] = None
     try:
-        conn = SSHConnector(**ispyb_credentials)
+        conn = SSHConnector(**credentials)
     except Exception:
         # Log the exception if DEBUG level or lower/finer?
-        # The following wil not log if the level is set to INFO for example.
+        # The following will not log if the level is set to INFO for example.
         if logging.DEBUG >= logger.level:
-            logger.info("ispyb_credentials=%s", ispyb_credentials)
+            logger.info("credentials=%s", credentials)
             logger.exception("Got the following exception creating SSHConnector...")
 
     return conn
@@ -89,10 +90,10 @@ def get_remote_conn() -> Optional[SSHConnector]:
 
 def get_conn() -> Optional[Connector]:
     credentials: Dict[str, Any] = {
-        "user": os.environ.get("ISPYB_USER"),
-        "pw": os.environ.get("ISPYB_PASSWORD"),
-        "host": os.environ.get("ISPYB_HOST"),
-        "port": os.environ.get("ISPYB_PORT"),
+        "user": settings.ISPYB_USER,
+        "pw": settings.ISPYB_PASSWORD,
+        "host": settings.ISPYB_HOST,
+        "port": settings.ISPYB_PORT,
         "db": "ispyb",
         "conn_inactivity": 360,
     }
@@ -108,7 +109,7 @@ def get_conn() -> Optional[Connector]:
         conn = Connector(**credentials)
     except Exception:
         # Log the exception if DEBUG level or lower/finer?
-        # The following wil not log if the level is set to INFO for example.
+        # The following will not log if the level is set to INFO for example.
         if logging.DEBUG >= logger.level:
             logger.info("credentials=%s", credentials)
             logger.exception("Got the following exception creating Connector...")
@@ -349,7 +350,7 @@ def get_proposals_for_user(self, user, restrict_to_membership=False):
         assert user
 
         proposals = set()
-        ispyb_user = os.environ.get("ISPYB_USER")
+        ispyb_user = settings.ISPYB_USER
         logger.debug(
             "ispyb_user=%s restrict_to_membership=%s",
             ispyb_user,
diff --git a/fragalysis/settings.py b/fragalysis/settings.py
index 93885b01..3aa3b58a 100644
--- a/fragalysis/settings.py
+++ b/fragalysis/settings.py
@@ -1,18 +1,67 @@
-"""
-Django settings for fragalysis project.
+"""Django settings for the fragalysis 'backend'"""
 
-Generated by 'django-admin startproject' using Django 1.11.6.
-
-For more information on this file, see
-https://docs.djangoproject.com/en/1.11/topics/settings/
-
-For the full list of settings and their values, see
-https://docs.djangoproject.com/en/1.11/ref/settings/
-"""
+# This standard Django module is used to provide the dynamic configuration of
+# the backend logic. As well as providing vital django-related configuration
+# it is also the source of the numerous fragalysis-specific environment variables
+# that control the stack's configuration (behaviour).
+#
+# Not all settings are configured by environment variable. Some are hard-coded
+# and you'll need to edit their values here. Those that are configurable at run-time
+# should be obvious (i.e. they'll use "os.environ.get()" to obtain their value)
+# alternative run-time value.
+#
+# You will find the django-related configuration at the top of the file
+# (under DJANGO SETTINGS) and the fragalysis-specific configuration at the bottom of
+# the file (under FRAGALYSIS SETTINGS).
+#
+# Guidance for variables: -
+#
+# 1.    Everything *MUST* have a default value, this file should not raise an exception
+#       if a value cannot be found in the environment, that's the role of the
+#       application code.
+#
+# 2.    The constant used to hold the environment variable *SHOULD* match the
+#       environment variable's name. i.e. the "DEPLOYMENT_MODE" environment variable's
+#       value *SHOULD* be found in 'settings.DEPLOYMENT_MODE'.
+#
+# Providing run-time values for variables: -
+#
+# The environment variable values are set using either a 'docker-compose' file
+# (when used for local development) or, more typically, via an "Ansible variable"
+# provided by the "Ansible playbook" that's responsible for deploying the stack.
+#
+# Many (not all) of the environment variables are made available
+# for deployment using an Ansible playbook variable, explained below.
+#
+# 1.    Ansible variables are lower-case and use "snake case".
+#
+# 2.    Ansible variables that map directly to environment variables in this file
+#       use the same name as the environment variable and are prefixed with
+#       "stack_". For example the "DEPLOYMENT_MODE" environment variable
+#       can be set using the "stack_deployment_mode" variable.
+#
+# 3.    Variables are declared using the 'EXTRA VARIABLES' section of the corresponding
+#       AWX "Job Template".
+#
+# IMPORTANTLY: For a description of an environment variable (setting) and its value
+#              you *MUST* consult the comments in this file ("settings.py"), and *NOT*
+#              the Ansible playbook. This file is the primary authority for the
+#              configuration of the Fragalysis Stack.
+#
+# Ansible variables are declared in "roles/fragalysis-stack/defaults/main.yaml"
+# or "roles/fragalysis-stack/vars/main.yaml" of the playbook repository
+# https://github.com/xchem/fragalysis-stack-kubernetes
+#
+# For more information on "settings.py", see
+# https://docs.djangoproject.com/en/3.2/topics/settings/
+#
+# For the full list of Django-related settings and their values, see
+# https://docs.djangoproject.com/en/3.2/ref/settings/
 
 import os
 import sys
 from datetime import timedelta
+from typing import List
 
 import sentry_sdk
 from sentry_sdk.integrations.celery import CeleryIntegration
@@ -20,88 +69,52 @@
 from sentry_sdk.integrations.excepthook import ExcepthookIntegration
 from sentry_sdk.integrations.redis import RedisIntegration
 
-# SECURITY WARNING: don't run with debug turned on in production!
-DEBUG = False
-if os.environ.get("DEBUG_FRAGALYSIS") == True:
-    DEBUG = True
+# --------------------------------------------------------------------------------------
+# DJANGO SETTINGS
+# --------------------------------------------------------------------------------------
 
-# These flags are used in the upload_tset form as follows.
-# Proposal Supported | Proposal Required | Proposal / View fields
-# Y                  | Y                 | Shown / Required
-# Y                  | N                 | Shown / Optional
-# N                  | N                 | Not Shown
-PROPOSAL_SUPPORTED = True
-PROPOSAL_REQUIRED = True
+ALLOWED_HOSTS = ["*"]
 
 # AnonymousUser should be the first record inserted into the auth_user table.
 ANONYMOUS_USER = 1
 
-# This is set on AWX when the fragalysis-stack is rebuilt.
-SENTRY_DNS = os.environ.get("FRAGALYSIS_BACKEND_SENTRY_DNS")
-if SENTRY_DNS:
-    # By default only call sentry in staging/production
-    sentry_sdk.init(
-        dsn=SENTRY_DNS,
-        integrations=[
-            DjangoIntegration(),
-            CeleryIntegration(),
-            RedisIntegration(),
-            ExcepthookIntegration(always_run=True),
-        ],
-        # If you wish to associate users to errors (assuming you are using
-        # django.contrib.auth) you may enable sending PII data.
-        send_default_pii=True,
-    )
-
-# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
-BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
-
-PROJECT_ROOT = os.path.abspath(os.path.join(os.path.dirname(__file__), ".."))
-# Quick-start development settings - unsuitable for production
-# See https://docs.djangoproject.com/en/1.11/howto/deployment/checklist/
-
-# SECURITY WARNING: keep the secret key used in production secret!
-SECRET_KEY = os.environ.get(
-    "WEB_DJANGO_SECRET_KEY", "8flmz)c9i!o&f1-moi5-p&9ak4r9=ck$3!0y1@%34p^(6i*^_9"
+AUTHENTICATION_BACKENDS = (
+    "django.contrib.auth.backends.ModelBackend",
+    "fragalysis.auth.KeycloakOIDCAuthenticationBackend",
+    "guardian.backends.ObjectPermissionBackend",
 )
 
-USE_X_FORWARDED_HOST = True
-SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
-
-ALLOWED_HOSTS = ["*"]
-
-DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
+# Password validation
+# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators
 
-# DATA_UPLOAD_MAX_MEMORY_SIZE = 26214400 # 25 MB
+AUTH_PASSWORD_VALIDATORS = [
+    {
+        "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator"
+    },
+    {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator"},
+    {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator"},
+    {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator"},
+]
 
-REST_FRAMEWORK = {
-    "DEFAULT_FILTER_BACKENDS": ("django_filters.rest_framework.DjangoFilterBackend",),
-    "DEFAULT_PAGINATION_CLASS": "rest_framework.pagination.LimitOffsetPagination",
-    "PAGE_SIZE": 5000,
-    "DEFAULT_VERSIONING_CLASS": "rest_framework.versioning.QueryParameterVersioning",
-    'DEFAULT_AUTHENTICATION_CLASSES': [
-        'rest_framework.authentication.SessionAuthentication',
-        'mozilla_django_oidc.contrib.drf.OIDCAuthentication',
-        'rest_framework.authentication.BasicAuthentication',
-    ],
-}
+# Build paths inside the project like this: os.path.join(BASE_DIR, ...)
+BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 
 # CELERY STUFF
-CELERY_ACCEPT_CONTENT = ['application/json']
+CELERY_ACCEPT_CONTENT = ["application/json"]
 CELERY_BROKER_CONNECTION_RETRY_ON_STARTUP = True
-CELERY_BROKER_URL = os.environ.get('CELERY_BROKER_URL', 'redis://redis:6379/')
-CELERY_RESULT_BACKEND = os.environ.get('CELERY_RESULT_BACKEND', 'redis://redis:6379/0')
+CELERY_BROKER_URL = os.environ.get("CELERY_BROKER_URL", "redis://redis:6379/")
+CELERY_RESULT_BACKEND = os.environ.get("CELERY_RESULT_BACKEND", "redis://redis:6379/0")
 CELERY_RESULT_BACKEND_ALWAYS_RETRY = True
 CELERY_RESULT_EXPIRES = timedelta(days=15)
 CELERY_TASK_ALWAYS_EAGER = os.environ.get(
-    'CELERY_TASK_ALWAYS_EAGER', 'False'
-).lower() in ['true', 'yes']
+    "CELERY_TASK_ALWAYS_EAGER", "False"
+).lower() in ["true", "yes"]
 CELERY_WORKER_HIJACK_ROOT_LOGGER = False
 
-# This can be injected as an ENV var
-NEOMODEL_NEO4J_BOLT_URL = os.environ.get(
-    "NEO4J_BOLT_URL", "bolt://neo4j:test@neo4j:7687"
-)
+# SECURITY WARNING: don't run with DUBUG turned on in production!
+DEBUG = os.environ.get("DEBUG_FRAGALYSIS") == "True"
+
+DEFAULT_AUTO_FIELD = "django.db.models.BigAutoField"
 
 # Application definition
 INSTALLED_APPS = [
@@ -136,6 +149,17 @@
     "simple_history",
 ]
 
+LANGUAGE_CODE = "en-us"
+
+# Swagger logging / logout
+LOGIN_URL = "/accounts/login/"
+LOGOUT_URL = "/accounts/logout/"
+# LOGIN_REDIRECT_URL = "<URL path to redirect to after login>"
+LOGIN_REDIRECT_URL = "/viewer/react/landing"
+# LOGOUT_REDIRECT_URL = "<URL path to redirect to after logout.
+# Must be in keycloak call back if used>"
+LOGOUT_REDIRECT_URL = "/viewer/react/landing"
+
 MIDDLEWARE = [
     "django.middleware.security.SecurityMiddleware",
     "django.contrib.sessions.middleware.SessionMiddleware",
@@ -147,25 +171,88 @@
     "mozilla_django_oidc.middleware.SessionRefresh",
 ]
 
-AUTHENTICATION_BACKENDS = (
-    "django.contrib.auth.backends.ModelBackend",
-    "fragalysis.auth.KeycloakOIDCAuthenticationBackend",
-    "guardian.backends.ObjectPermissionBackend",
+PROJECT_ROOT = os.path.abspath(os.path.join(BASE_DIR, ".."))
+
+REST_FRAMEWORK = {
+    "DEFAULT_FILTER_BACKENDS": ("django_filters.rest_framework.DjangoFilterBackend",),
+    "DEFAULT_PAGINATION_CLASS": "rest_framework.pagination.LimitOffsetPagination",
+    "PAGE_SIZE": 5000,
+    "DEFAULT_VERSIONING_CLASS": "rest_framework.versioning.QueryParameterVersioning",
+    "DEFAULT_AUTHENTICATION_CLASSES": [
+        "rest_framework.authentication.SessionAuthentication",
+        "mozilla_django_oidc.contrib.drf.OIDCAuthentication",
+        "rest_framework.authentication.BasicAuthentication",
+    ],
+}
+
+ROOT_URLCONF = "fragalysis.urls"
+
+# SECURITY WARNING: keep the secret key used in production secret!
+SECRET_KEY = os.environ.get(
+    "WEB_DJANGO_SECRET_KEY", "8flmz)c9i!o&f1-moi5-p&9ak4r9=ck$3!0y1@%34p^(6i*^_9"
 )
 
-STATICFILES_DIRS = [os.path.join(BASE_DIR, "fragalysis", "../viewer/static")]
+if SENTRY_DNS := os.environ.get("FRAGALYSIS_BACKEND_SENTRY_DNS"):
+    # By default only call sentry in staging/production
+    sentry_sdk.init(
+        dsn=SENTRY_DNS,
+        integrations=[
+            DjangoIntegration(),
+            CeleryIntegration(),
+            RedisIntegration(),
+            ExcepthookIntegration(always_run=True),
+        ],
+        # If you wish to associate users to errors (assuming you are using
+        # django.contrib.auth) you may enable sending PII data.
+        send_default_pii=True,
+    )
 
+STATIC_ROOT = os.path.join(PROJECT_ROOT, "static")
+STATICFILES_DIRS = [os.path.join(BASE_DIR, "fragalysis", "../viewer/static")]
 STATICFILES_FINDERS = (
     "django.contrib.staticfiles.finders.FileSystemFinder",
     "django.contrib.staticfiles.finders.AppDirectoriesFinder",
 )
 
-# mozilla_django_oidc - from documentation: https://mozilla-django-oidc.readthedocs.io/en/stable/
-# Before you can configure your application, you need to set up a client with an OpenID Connect provider (OP).
-# You’ll need to set up a different client for every environment you have for your site. For example,
-# if your site has a -dev, -stage, and -prod environments, each of those has a different hostname and thus you
+USE_X_FORWARDED_HOST = True
+SECURE_PROXY_SSL_HEADER = ("HTTP_X_FORWARDED_PROTO", "https")
+
+# A list of identifiers of messages generated by the system check framework
+# that we wish to permanently acknowledge and ignore.
+# Silenced checks will not be output to the console.
+#
+# fields.W342   Is issued for the xchem-db package.
+#               The hint is "ForeignKey(unique=True) is usually better served by a OneToOneField."
+SILENCED_SYSTEM_CHECKS = [
+    "fields.W342",
+]
+
+TEMPLATES = [
+    {
+        "BACKEND": "django.template.backends.django.DjangoTemplates",
+        "DIRS": [],
+        "APP_DIRS": True,
+        "OPTIONS": {
+            "context_processors": [
+                "django.template.context_processors.debug",
+                "django.template.context_processors.request",
+                "django.contrib.auth.context_processors.auth",
+                "django.contrib.messages.context_processors.messages",
+            ]
+        },
+    }
+]
+
+TIME_ZONE = "UTC"
+
+# mozilla_django_oidc.
+# See: https://mozilla-django-oidc.readthedocs.io/en/stable/
+# Before you can configure your application, you need to set up a client with
+# an OpenID Connect provider (OP). You’ll need to set up a different client for
+# every environment you have for your site. For example, if your site has a -dev,
+# -stage, and -prod environments, each of those has a different hostname and thus you
 # need to set up a separate client for each one.
-# you need to provide your OpenID Connect provider (OP) the callback url for your site.
+# You need to provide your OpenID Connect provider (OP) the callback url for your site.
 # The URL path for the callback url is /oidc/callback/.
 #
 # Here are examples of callback urls:
@@ -179,14 +266,18 @@
 #   a client id (OIDC_RP_CLIENT_ID)
 #   a client secret (OIDC_RP_CLIENT_SECRET)
 
-# Keycloak mozilla_django_oidc - Settings
-# from keyclaok (openid provider = OP) - NB these should be environment variables - not checked in
+# Keycloak mozilla_django_oidc settings (openid provider = OP).
+# These should be environment variables - not checked in
 OIDC_RP_CLIENT_ID = os.environ.get("OIDC_RP_CLIENT_ID", "fragalysis-local")
-OIDC_RP_CLIENT_SECRET = os.environ.get('OIDC_RP_CLIENT_SECRET')
+OIDC_RP_CLIENT_SECRET = os.environ.get("OIDC_RP_CLIENT_SECRET")
 OIDC_KEYCLOAK_REALM = os.environ.get(
     "OIDC_KEYCLOAK_REALM", "https://keycloak.xchem-dev.diamond.ac.uk/auth/realms/xchem"
 )
 
+# Squonk2 Account Server and Data Manager Client IDs
+OIDC_AS_CLIENT_ID: str = os.environ.get("OIDC_AS_CLIENT_ID", "")
+OIDC_DM_CLIENT_ID: str = os.environ.get("OIDC_DM_CLIENT_ID", "")
+
 # OIDC_OP_AUTHORIZATION_ENDPOINT = "<URL of the OIDC OP authorization endpoint>"
 OIDC_OP_AUTHORIZATION_ENDPOINT = os.path.join(
     OIDC_KEYCLOAK_REALM, "protocol/openid-connect/auth"
@@ -199,11 +290,13 @@
 OIDC_OP_USER_ENDPOINT = os.path.join(
     OIDC_KEYCLOAK_REALM, "protocol/openid-connect/userinfo"
 )
-# OIDC_OP_JWKS_ENDPOINT = "<URL of the OIDC OP certs endpoint>" - This is required when using RS256.
+# OIDC_OP_JWKS_ENDPOINT = "<URL of the OIDC OP certs endpoint>"
+# This is required when using RS256.
 OIDC_OP_JWKS_ENDPOINT = os.path.join(
     OIDC_KEYCLOAK_REALM, "protocol/openid-connect/certs"
 )
-# OIDC_OP_LOGOUT_ENDPOINT = "<URL of the OIDC OP certs endpoint>" - This is required when using RS256.
+# OIDC_OP_LOGOUT_ENDPOINT = "<URL of the OIDC OP certs endpoint>"
+# This is required when using RS256.
 OIDC_OP_LOGOUT_ENDPOINT = os.path.join(
     OIDC_KEYCLOAK_REALM, "protocol/openid-connect/logout"
 )
@@ -212,76 +305,23 @@
 # If desired, this should be set to "fragalysis.views.keycloak_logout"
 OIDC_OP_LOGOUT_URL_METHOD = os.environ.get("OIDC_OP_LOGOUT_URL_METHOD")
 
-# LOGIN_REDIRECT_URL = "<URL path to redirect to after login>"
-LOGIN_REDIRECT_URL = "/viewer/react/landing"
-# LOGOUT_REDIRECT_URL = "<URL path to redirect to after logout - must be in keycloak call back if used>"
-LOGOUT_REDIRECT_URL = "/viewer/react/landing"
-
 # After much trial and error
-# Using RS256 + JWKS Endpoint seems to work with no value for OIDC_RP_IDP_SIGN_KEY seems to work for authentication.
-# Trying HS256 produces a "JWS token verification failed" error for some reason.
+# Using RS256 + JWKS Endpoint seems to work with no value for OIDC_RP_IDP_SIGN_KEY
+# seems to work for authentication. Trying HS256 produces a "JWS token verification failed"
+# error for some reason.
 OIDC_RP_SIGN_ALGO = "RS256"
 OIDC_STORE_ACCESS_TOKEN = True
 OIDC_STORE_ID_TOKEN = True
 
-# Security/access control connector.
-# Currently one of 'ispyb' or 'ssh_ispyb'.
-SECURITY_CONNECTOR = os.environ.get('SECURITY_CONNECTOR', 'ispyb').lower()
-# Number of minutes to cache security information for a user.
-# Set to '0' to disable caching.
-SECURITY_CONNECTOR_CACHE_MINUTES = int(
-    os.environ.get('SECURITY_CONNECTOR_CACHE_MINUTES', '2')
-)
-
 # SessionRefresh configuration.
 # There's only one item - the token expiry period, with a default of 15 minutes.
 # The default is 15 minutes if you don't set this value.
 TOKEN_EXPIRY_MINUTES = os.environ.get("OIDC_RENEW_ID_TOKEN_EXPIRY_MINUTES", "15")
 OIDC_RENEW_ID_TOKEN_EXPIRY_SECONDS = int(TOKEN_EXPIRY_MINUTES) * 60
-# Keycloak mozilla_django_oidc - Settings - End
-
-# The deployment mode.
-# Controls the behaviour of the application (it's strictness to errors etc).
-# Typically one of "DEVELOPMENT" or "PRODUCTION".
-# see api.utils for the 'deployment_mode_is_production()' function.
-DEPLOYMENT_MODE = os.environ.get("DEPLOYMENT_MODE", "production").upper()
-
-# Authentication check when uploading files.
-# This can be switched off to simplify development testing if required.
-# It's asserted as True for 'production' mode.
-AUTHENTICATE_UPLOAD = True
-if os.environ.get("AUTHENTICATE_UPLOAD") == 'False':
-    assert DEPLOYMENT_MODE != "PRODUCTION"
-    AUTHENTICATE_UPLOAD = False
-
-ROOT_URLCONF = "fragalysis.urls"
-
-STATIC_ROOT = os.path.join(PROJECT_ROOT, "static")
-
-TEMPLATES = [
-    {
-        "BACKEND": "django.template.backends.django.DjangoTemplates",
-        "DIRS": [],
-        "APP_DIRS": True,
-        "OPTIONS": {
-            "context_processors": [
-                "django.template.context_processors.debug",
-                "django.template.context_processors.request",
-                "django.contrib.auth.context_processors.auth",
-                "django.contrib.messages.context_processors.messages",
-            ]
-        },
-    }
-]
 
 WSGI_APPLICATION = "fragalysis.wsgi.application"
 
-# Database
-# https://docs.djangoproject.com/en/1.11/ref/settings/#databases
-
-CHEMCENTRAL_DB_NAME = os.environ.get("CHEMCENT_DB_NAME", "UNKOWN")
-
-DATABASE_ROUTERS = ['xchem_db.routers.AuthRouter']
+DATABASE_ROUTERS = ["xchem_db.routers.AuthRouter"]
 
 DATABASES = {
     "default": {
@@ -294,9 +334,9 @@
     }
 }
 
-if os.environ.get("BUILD_XCDB") == 'yes':
+if os.environ.get("BUILD_XCDB") == "yes":
     DATABASES["xchem_db"] = {
-        "ENGINE": 'django.db.backends.postgresql',
+        "ENGINE": "django.db.backends.postgresql",
         "NAME": os.environ.get("XCHEM_NAME", ""),
         "USER": os.environ.get("XCHEM_USER", ""),
         "PASSWORD": os.environ.get("XCHEM_PASSWORD", ""),
@@ -304,7 +344,8 @@
         "PORT": os.environ.get("XCHEM_PORT", ""),
     }
 
-if CHEMCENTRAL_DB_NAME != "UNKOWN":
+CHEMCENTRAL_DB_NAME = os.environ.get("CHEMCENT_DB_NAME", "UNKNOWN")
+if CHEMCENTRAL_DB_NAME != "UNKNOWN":
     DATABASES["chemcentral"] = {
         "ENGINE": "django.db.backends.postgresql",
         "NAME": CHEMCENTRAL_DB_NAME,
@@ -314,40 +355,14 @@
         "PORT": 5432,
     }
 
-# Password validation
-# https://docs.djangoproject.com/en/1.11/ref/settings/#auth-password-validators
-
-AUTH_PASSWORD_VALIDATORS = [
-    {
-        "NAME": "django.contrib.auth.password_validation.UserAttributeSimilarityValidator"
-    },
-    {"NAME": "django.contrib.auth.password_validation.MinimumLengthValidator"},
-    {"NAME": "django.contrib.auth.password_validation.CommonPasswordValidator"},
-    {"NAME": "django.contrib.auth.password_validation.NumericPasswordValidator"},
-]
-
-# Internationalization
-# https://docs.djangoproject.com/en/1.11/topics/i18n/
-
-LANGUAGE_CODE = "en-us"
-
-TIME_ZONE = "UTC"
-
 USE_I18N = True
-
 USE_L10N = True
-
 USE_TZ = True
 
 # Static files (CSS, JavaScript, Images)
-# https://docs.djangoproject.com/en/1.11/howto/static-files/
-
 STATIC_URL = "/static/"
 MEDIA_ROOT = "/code/media/"
 MEDIA_URL = "/media/"
-# Swagger loging / logout
-LOGIN_URL = "/accounts/login/"
-LOGOUT_URL = "/accounts/logout/"
 
 WEBPACK_LOADER = {
     "DEFAULT": {
@@ -361,69 +376,13 @@
 GRAPH_MODELS = {"all_applications": True, "group_models": True}
 
 # email settings for upload key stuff
-EMAIL_BACKEND = 'django.core.mail.backends.smtp.EmailBackend'
-EMAIL_HOST_USER = os.environ.get("EMAIL_USER")
-# If there is an email user is defined then check the rest of the configuration is present.
-# The defaults are set for the current (gamil) production configuration.
-if EMAIL_HOST_USER:
-    EMAIL_HOST = os.environ.get('EMAIL_HOST', 'smtp.gmail.com')
-    EMAIL_USE_TLS = os.environ.get('EMAIL_USE_TLS', True)
-    EMAIL_PORT = os.environ.get('EMAIL_PORT', 587)
+EMAIL_BACKEND = "django.core.mail.backends.smtp.EmailBackend"
+if EMAIL_HOST_USER := os.environ.get("EMAIL_USER"):
+    EMAIL_HOST = os.environ.get("EMAIL_HOST", "smtp.gmail.com")
+    EMAIL_USE_TLS = os.environ.get("EMAIL_USE_TLS", True)
+    EMAIL_PORT = os.environ.get("EMAIL_PORT", 587)
     EMAIL_HOST_PASSWORD = os.environ.get("EMAIL_PASSWORD")
 
-
-# DOCS_ROOT = "/code/docs/_build/html "
-
-# Discourse settings for API calls to Discourse Platform
-DISCOURSE_PARENT_CATEGORY = 'Fragalysis targets'
-DISCOURSE_USER = 'fragalysis'
-DISCOURSE_HOST = os.environ.get('DISCOURSE_HOST')
-# Note that this can be obtained from discourse for the desired environment.
-DISCOURSE_API_KEY = os.environ.get("DISCOURSE_API_KEY")
-
-# This suffix can be set to that the different development environments posting to the same Discourse
-# server can "automatically" generate different category/post titles - hopefully reducing confusion.
-# It will be appended at category or post-title, e.g. "Mpro-duncan", "Mpro-staging" etc.
-# Note that it is for dev systems. It is not required on production because production will have a
-# dedicated Discourse server.
-DISCOURSE_DEV_POST_SUFFIX = os.environ.get("DISCOURSE_DEV_POST_SUFFIX", '')
-
-# An optional URL that identifies the URL to a prior stack.
-# If set, it's typically something like "https://fragalysis.diamond.ac.uk".
-# It can be blank, indicating there is no legacy service.
-LEGACY_URL = os.environ.get("LEGACY_URL", "")
-
-SQUONK2_MEDIA_DIRECTORY = "fragalysis-files"
-SQUONK2_INSTANCE_API = "data-manager-ui/results/instance/"
-
-# The Target Access String (TAS) Python regular expression.
-# The Project title (the TAS) must match this expression to be valid.
-# See api/utils.py validate_tas() for the current implementation.
-# To simplify error messages when the match fails you can also
-# add an error message.
-TAS_REGEX = os.environ.get("TAS_REGEX", r"^(lb\d{5})(-(\d+)){0,1}$")
-TAS_REGEX_ERROR_MSG = os.environ.get(
-    "TAS_REGEX_ERROR_MSG",
-    "Must begin 'lb' followed by 5 digits, optionally followed by a hyphen and a number.",
-)
-# Are any public target access strings defined?
-# If so they'll be in the PUBLIC_TAS variable as a comma separated list.
-PUBLIC_TAS = os.environ.get("PUBLIC_TAS", "")
-PUBLIC_TAS_LIST = PUBLIC_TAS.split(",") if PUBLIC_TAS else []
-
-COMPUTED_SET_MEDIA_DIRECTORY = "computed_set_data"
-TARGET_LOADER_MEDIA_DIRECTORY = "target_loader_data"
-
-# A list of identifiers of messages generated by the system check framework
-# that we wish to permanently acknowledge and ignore.
-# Silenced checks will not be output to the console.
-#
-# fields.W342   Is issued for the xchem-db package.
-#               The hint is "ForeignKey(unique=True) is usually better served by a OneToOneField."
-SILENCED_SYSTEM_CHECKS = [
-    "fields.W342",
-]
-
 # Configure django logging.
 # We provide a standard formatter that emits a timestamp, the module issuing the log
 # and the level name, a little like this...
@@ -433,36 +392,34 @@
 # We provide a console and rotating file handler
 # (50Mi of logging in 10 files of 5M each),
 # with the rotating file handler typically used for everything.
-DISABLE_LOGGING_FRAMEWORK = (
-    True
-    if os.environ.get("DISABLE_LOGGING_FRAMEWORK", "no").lower() in ["yes"]
-    else False
-)
+DISABLE_LOGGING_FRAMEWORK = os.environ.get(
+    "DISABLE_LOGGING_FRAMEWORK", "no"
+).lower() in ["yes"]
 LOGGING_FRAMEWORK_ROOT_LEVEL = os.environ.get("LOGGING_FRAMEWORK_ROOT_LEVEL", "DEBUG")
 if not DISABLE_LOGGING_FRAMEWORK:
     LOGGING = {
-        'version': 1,
-        'disable_existing_loggers': False,
-        'formatters': {
-            'simple': {
-                'format': '%(asctime)s %(name)s.%(funcName)s():%(lineno)s %(levelname)s # %(message)s',
-                'datefmt': '%Y-%m-%dT%H:%M:%S%z',
+        "version": 1,
+        "disable_existing_loggers": False,
+        "formatters": {
+            "simple": {
+                "format": "%(asctime)s %(name)s.%(funcName)s():%(lineno)s %(levelname)s # %(message)s",
+                "datefmt": "%Y-%m-%dT%H:%M:%S%z",
             }
         },
-        'handlers': {
-            'console': {
-                'level': 'DEBUG',
-                'class': 'logging.StreamHandler',
-                'stream': sys.stdout,
-                'formatter': 'simple',
+        "handlers": {
+            "console": {
+                "level": "DEBUG",
+                "class": "logging.StreamHandler",
+                "stream": sys.stdout,
+                "formatter": "simple",
             },
-            'rotating': {
-                'level': 'DEBUG',
-                'class': 'logging.handlers.RotatingFileHandler',
-                'maxBytes': 5_000_000,
-                'backupCount': 10,
-                'filename': os.path.join(BASE_DIR, 'logs/backend.log'),
-                'formatter': 'simple',
+            "rotating": {
+                "level": "DEBUG",
+                "class": "logging.handlers.RotatingFileHandler",
+                "maxBytes": 5_000_000,
+                "backupCount": 10,
+                "filename": os.path.join(BASE_DIR, "logs/backend.log"),
+                "formatter": "simple",
             },
         },
         'loggers': {
@@ -474,8 +431,146 @@
             'urllib3': {'level': 'WARNING'},
             'paramiko': {'level': 'WARNING'},
         },
-        'root': {
-            'level': LOGGING_FRAMEWORK_ROOT_LEVEL,
-            'handlers': ['console', 'rotating'],
+        "root": {
+            "level": LOGGING_FRAMEWORK_ROOT_LEVEL,
+            "handlers": ["console", "rotating"],
         },
     }
+
+# --------------------------------------------------------------------------------------
+# FRAGALYSIS SETTINGS
+# --------------------------------------------------------------------------------------
+# With comprehensive comments where necessary to explain the setting's values.
+
+# The deployment mode.
+# Controls the behaviour of the application (it's strictness to errors etc).
+# Typically one of "DEVELOPMENT" or "PRODUCTION".
+# see api.utils for the 'deployment_mode_is_production()' function.
+DEPLOYMENT_MODE: str = os.environ.get("DEPLOYMENT_MODE", "production").upper()
+
+# Authentication check when uploading files.
+# This can be switched off to simplify development testing if required.
+# It's asserted as True for 'production' mode.
+AUTHENTICATE_UPLOAD: bool = True
+if os.environ.get("AUTHENTICATE_UPLOAD") == "False":
+    assert DEPLOYMENT_MODE != "PRODUCTION"
+    AUTHENTICATE_UPLOAD = False
+
+COMPUTED_SET_MEDIA_DIRECTORY: str = "computed_set_data"
+
+# Discourse settings for API calls to Discourse Platform
+DISCOURSE_PARENT_CATEGORY: str = "Fragalysis targets"
+DISCOURSE_USER: str = "fragalysis"
+DISCOURSE_HOST: str = os.environ.get("DISCOURSE_HOST", "")
+# Note that this can be obtained from discourse for the desired environment.
+DISCOURSE_API_KEY: str = os.environ.get("DISCOURSE_API_KEY", "")
+# This suffix can be set to that the different development environments posting
+# to the same Discourse server can "automatically" generate different category/post
+# titles - hopefully reducing confusion. It will be appended at category or post-title,
+# e.g. "Mpro-duncan", "Mpro-staging" etc. Note that it is for dev systems.
+# It is not required on production because production will have a
+# dedicated Discourse server.
+DISCOURSE_DEV_POST_SUFFIX: str = os.environ.get("DISCOURSE_DEV_POST_SUFFIX", "")
+
+DUMMY_TARGET_TITLE: str = os.environ.get("DUMMY_TARGET_TITLE", "")
+DUMMY_USER: str = os.environ.get("DUMMY_USER", "")
+DUMMY_TAS: str = os.environ.get("DUMMY_TAS", "")
+
+# Do we enable the collection and presentation
+# of the availability of underlying services?
+# A colon (:) separated list of services to enable.
+# See "viewer/services.py" for the full list of supported services.
+ENABLE_SERVICE_STATUS: str = os.environ.get("ENABLE_SERVICE_STATUS", "")
+
+# What infection have been set?
+# "Infections" are  built-in faults that can be induced by providing their names.
+# Typically these are "hard to reproduce" errors that are useful for testing.
+# The names are provided in a comma-separated list in this variable.
+# The full set of supported names can be used can be found in "api/infections.py"
+INFECTIONS: str = os.environ.get("INFECTIONS", "").lower()
+
+# The ISpyB database settings.
+# Can be used in conjunction with SSH settings (later in this file)
+ISPYB_USER: str = os.environ.get("ISPYB_USER", "")
+ISPYB_PASSWORD: str = os.environ.get("ISPYB_PASSWORD", "")
+ISPYB_HOST: str = os.environ.get("ISPYB_HOST", "")
+ISPYB_PORT: str = os.environ.get("ISPYB_PORT", "")
+
+# An optional URL that identifies the URL to a prior stack.
+# If set, it's typically something like "https://fragalysis.diamond.ac.uk".
+# It can be blank, indicating there is no legacy service.
+LEGACY_URL: str = os.environ.get("LEGACY_URL", "")
+
+NEOMODEL_NEO4J_BOLT_URL: str = os.environ.get(
+    "NEO4J_BOLT_URL", "bolt://neo4j:test@neo4j:7687"
+)
+
+NEO4J_QUERY: str = os.environ.get("NEO4J_QUERY", "neo4j")
+NEO4J_AUTH: str = os.environ.get("NEO4J_AUTH", "neo4j/neo4j")
+
+# These flags are used in the upload_tset form as follows.
+# Proposal Supported | Proposal Required | Proposal / View fields
+# Y                  | Y                 | Shown / Required
+# Y                  | N                 | Shown / Optional
+# N                  | N                 | Not Shown
+PROPOSAL_SUPPORTED: bool = True
+PROPOSAL_REQUIRED: bool = True
+
+# Are any public target access strings defined?
+# If so they'll be in the PUBLIC_TAS variable as a comma separated list.
+PUBLIC_TAS: str = os.environ.get("PUBLIC_TAS", "")
+PUBLIC_TAS_LIST: List[str] = PUBLIC_TAS.split(",") if PUBLIC_TAS else []
+
+# Security/access control connector.
+# Currently one of 'ispyb' or 'ssh_ispyb'.
+SECURITY_CONNECTOR: str = os.environ.get("SECURITY_CONNECTOR", "ispyb").lower()
+# Number of minutes to cache security information for a user.
+# Set to '0' to disable caching.
+SECURITY_CONNECTOR_CACHE_MINUTES: int = int(
+    os.environ.get("SECURITY_CONNECTOR_CACHE_MINUTES", "2")
+)
+
+# An SSH host.
+# Used in the security module in conjunction with ISPyB settings.
+# Any SSH_PRIVATE_KEY_FILENAME value will be used in preference to SSH_PASSWORD.
+SSH_HOST: str = os.environ.get("SSH_HOST", "")
+SSH_USER: str = os.environ.get("SSH_USER", "")
+SSH_PASSWORD: str = os.environ.get("SSH_PASSWORD", "")
+SSH_PRIVATE_KEY_FILENAME: str = os.environ.get("SSH_PRIVATE_KEY_FILENAME", "")
+
+# A slug used for names this Fragalysis will create
+SQUONK2_MAX_SLUG_LENGTH: int = 10
+
+SQUONK2_MEDIA_DIRECTORY: str = "fragalysis-files"
+SQUONK2_INSTANCE_API: str = "data-manager-ui/results/instance/"
+
+SQUONK2_ASAPI_URL: str = os.environ.get("SQUONK2_ASAPI_URL", "")
+SQUONK2_DMAPI_URL: str = os.environ.get("SQUONK2_DMAPI_URL", "")
+SQUONK2_UI_URL: str = os.environ.get("SQUONK2_UI_URL", "")
+SQUONK2_ORG_UUID: str = os.environ.get("SQUONK2_ORG_UUID", "")
+SQUONK2_UNIT_BILLING_DAY: str = os.environ.get("SQUONK2_UNIT_BILLING_DAY", "")
+SQUONK2_PRODUCT_FLAVOUR: str = os.environ.get("SQUONK2_PRODUCT_FLAVOUR", "")
+SQUONK2_SLUG: str = os.environ.get("SQUONK2_SLUG", "")[:SQUONK2_MAX_SLUG_LENGTH]
+SQUONK2_ORG_OWNER: str = os.environ.get("SQUONK2_ORG_OWNER", "")
+SQUONK2_ORG_OWNER_PASSWORD: str = os.environ.get("SQUONK2_ORG_OWNER_PASSWORD", "")
+SQUONK2_VERIFY_CERTIFICATES: str = os.environ.get("SQUONK2_VERIFY_CERTIFICATES", "")
+
+TARGET_LOADER_MEDIA_DIRECTORY: str = "target_loader_data"
+
+# The Target Access String (TAS) Python regular expression.
+# The Project title (the TAS) must match this expression to be valid.
+# See api/utils.py validate_tas() for the current implementation.
+# To simplify error messages when the match fails you can also
+# add an error message.
+TAS_REGEX: str = os.environ.get("TAS_REGEX", r"^(lb\d{5})(-(\d+)){0,1}$")
+TAS_REGEX_ERROR_MSG: str = os.environ.get(
+    "TAS_REGEX_ERROR_MSG",
+    "Must begin 'lb' followed by 5 digits, optionally followed by a hyphen and a number.",
+)
+
+BE_NAMESPACE: str = os.environ.get("BE_NAMESPACE", "undefined")
+BE_IMAGE_TAG: str = os.environ.get("BE_IMAGE_TAG", "undefined")
+FE_NAMESPACE: str = os.environ.get("FE_NAMESPACE", "undefined")
+FE_IMAGE_TAG: str = os.environ.get("FE_IMAGE_TAG", "undefined")
+STACK_NAMESPACE: str = os.environ.get("STACK_NAMESPACE", "undefined")
+STACK_VERSION: str = os.environ.get("STACK_VERSION", "undefined")
diff --git a/fragalysis/views.py b/fragalysis/views.py
index c68a4d4e..7b14e912 100644
--- a/fragalysis/views.py
+++ b/fragalysis/views.py
@@ -1,6 +1,4 @@
 # Classes/Methods to override default OIDC Views (Keycloak authentication)
-import os
-
 from django.conf import settings
 from django.http import JsonResponse
 from mozilla_django_oidc.views import OIDCLogoutView
@@ -34,41 +32,12 @@ def version(request):
     # Unused args
     del request
 
-    undefined_value = "undefined"
-
     # b/e, f/e and stack origin comes form container environment variables.
-    #
-    # We also need to deal with empty or unset strings
-    # so the get() default does not help
-    be_namespace = os.environ.get('BE_NAMESPACE')
-    if not be_namespace:
-        be_namespace = undefined_value
-
-    be_image_tag = os.environ.get('BE_IMAGE_TAG')
-    if not be_image_tag:
-        be_image_tag = undefined_value
-
-    fe_namespace = os.environ.get('FE_NAMESPACE')
-    if not fe_namespace:
-        fe_namespace = undefined_value
-
-    fe_branch = os.environ.get('FE_BRANCH')
-    if not fe_branch:
-        fe_branch = undefined_value
-
-    stack_namespace = os.environ.get('STACK_NAMESPACE')
-    if not stack_namespace:
-        stack_namespace = undefined_value
-
-    stack_version = os.environ.get('STACK_VERSION')
-    if not stack_version:
-        stack_version = undefined_value
-
     version_response = {
-        'version': {
-            'backend': f'{be_namespace}:{be_image_tag}',
-            'frontend': f'{fe_namespace}:{fe_branch}',
-            'stack': f'{stack_namespace}:{stack_version}',
+        "version": {
+            "backend": f"{settings.BE_NAMESPACE}:{settings.BE_IMAGE_TAG}",
+            "frontend": f"{settings.FE_NAMESPACE}:{settings.FE_IMAGE_TAG}",
+            "stack": f"{settings.STACK_NAMESPACE}:{settings.STACK_VERSION}",
         }
     }
     return JsonResponse(version_response)
diff --git a/network/views.py b/network/views.py
index 8d790247..0be98a9d 100644
--- a/network/views.py
+++ b/network/views.py
@@ -1,5 +1,4 @@
-import os
-
+from django.conf import settings
 from django.http import HttpResponse
 from frag.network.decorate import get_add_del_link
 from frag.network.query import get_full_graph
@@ -8,13 +7,9 @@
 
 
 def full_graph(request):
-    """
-    Get the full graph for a molecule from an input smiles
-    :param request:
-    :return:
-    """
-    graph_choice = os.environ.get("NEO4J_QUERY", "neo4j")
-    graph_auth = os.environ.get("NEO4J_AUTH", "neo4j/neo4j")
+    """Get the full graph for a molecule from an input smiles"""
+    graph_choice = settings.NEO4J_QUERY
+    graph_auth = settings.NEO4J_AUTH
     if "graph_choice" in request.GET:
         graph_choice = request.GET["graph_choice"]
     if "smiles" in request.GET:
diff --git a/viewer/serializers.py b/viewer/serializers.py
index 15093985..8694419f 100644
--- a/viewer/serializers.py
+++ b/viewer/serializers.py
@@ -465,8 +465,8 @@ class Meta:
 
 class GraphSerializer(serializers.ModelSerializer):
     graph = serializers.SerializerMethodField()
-    graph_choice = os.environ.get("NEO4J_QUERY", "neo4j")
-    graph_auth = os.environ.get("NEO4J_AUTH", "neo4j/neo4j")
+    graph_choice = settings.NEO4J_QUERY
+    graph_auth = settings.NEO4J_AUTH
 
     def get_graph(self, obj):
         return get_full_graph(
diff --git a/viewer/services.py b/viewer/services.py
index a203bfc3..77417143 100644
--- a/viewer/services.py
+++ b/viewer/services.py
@@ -6,6 +6,7 @@
 from enum import Enum
 
 import requests
+from django.conf import settings
 from frag.utils.network_utils import get_driver
 from pydiscourse import DiscourseClient
 
@@ -18,8 +19,8 @@
 # Default timeout for any request calls
 REQUEST_TIMEOUT_S = 5
 
-_NEO4J_LOCATION: str = os.environ.get("NEO4J_QUERY", "neo4j")
-_NEO4J_AUTH: str = os.environ.get("NEO4J_AUTH", "neo4j/neo4j")
+_NEO4J_LOCATION: str = settings.NEO4J_QUERY
+_NEO4J_AUTH: str = settings.NEO4J_AUTH
 
 
 class State(str, Enum):
diff --git a/viewer/squonk2_agent.py b/viewer/squonk2_agent.py
index 3d4f7936..aadab6c0 100644
--- a/viewer/squonk2_agent.py
+++ b/viewer/squonk2_agent.py
@@ -12,6 +12,7 @@
 from urllib.parse import ParseResult, urlparse
 
 import requests
+from django.conf import settings
 from requests import Response
 from squonk2.as_api import AsApi, AsApiRv
 from squonk2.auth import Auth
@@ -58,9 +59,7 @@
 # How long are Squonk2 'names'?
 _SQ2_MAX_NAME_LENGTH: int = 80
 
-# A slug used for names this Fragalysis will create
-# and a prefix string. So Squonk2 objects will be called "Fragalysis {slug}"
-_MAX_SLUG_LENGTH: int = 10
+# An object prefix string. So Squonk2 objects will be called "Fragalysis {slug}"
 _SQ2_NAME_PREFIX: str = "Fragalysis"
 
 # Built-in
@@ -94,46 +93,24 @@ def __init__(self):
         # "Fragalysis {SLUG} ", this leaves (80-22) 58 characters for the
         # use with the target-access-string and session project strings
         # to form Squonk2 Unit and Project names.
-        self.__CFG_SQUONK2_ASAPI_URL: Optional[str] = os.environ.get(
-            'SQUONK2_ASAPI_URL'
-        )
-        self.__CFG_SQUONK2_DMAPI_URL: Optional[str] = os.environ.get(
-            'SQUONK2_DMAPI_URL'
-        )
-        self.__CFG_SQUONK2_UI_URL: Optional[str] = os.environ.get('SQUONK2_UI_URL')
-        self.__CFG_SQUONK2_ORG_UUID: Optional[str] = os.environ.get('SQUONK2_ORG_UUID')
-        self.__CFG_SQUONK2_UNIT_BILLING_DAY: Optional[str] = os.environ.get(
-            'SQUONK2_UNIT_BILLING_DAY'
-        )
-        self.__CFG_SQUONK2_PRODUCT_FLAVOUR: Optional[str] = os.environ.get(
-            'SQUONK2_PRODUCT_FLAVOUR'
-        )
-        self.__CFG_SQUONK2_SLUG: Optional[str] = os.environ.get('SQUONK2_SLUG', '')[
-            :_MAX_SLUG_LENGTH
-        ]
-        self.__CFG_SQUONK2_ORG_OWNER: Optional[str] = os.environ.get(
-            'SQUONK2_ORG_OWNER'
-        )
-        self.__CFG_SQUONK2_ORG_OWNER_PASSWORD: Optional[str] = os.environ.get(
-            'SQUONK2_ORG_OWNER_PASSWORD'
-        )
-        self.__CFG_OIDC_AS_CLIENT_ID: Optional[str] = os.environ.get(
-            'OIDC_AS_CLIENT_ID'
-        )
-        self.__CFG_OIDC_DM_CLIENT_ID: Optional[str] = os.environ.get(
-            'OIDC_DM_CLIENT_ID'
-        )
-        self.__CFG_OIDC_KEYCLOAK_REALM: Optional[str] = os.environ.get(
-            'OIDC_KEYCLOAK_REALM'
-        )
+        self.__CFG_SQUONK2_ASAPI_URL: str = settings.SQUONK2_ASAPI_URL
+        self.__CFG_SQUONK2_DMAPI_URL: str = settings.SQUONK2_DMAPI_URL
+        self.__CFG_SQUONK2_UI_URL: str = settings.SQUONK2_UI_URL
+        self.__CFG_SQUONK2_ORG_UUID: str = settings.SQUONK2_ORG_UUID
+        self.__CFG_SQUONK2_UNIT_BILLING_DAY: str = settings.SQUONK2_UNIT_BILLING_DAY
+        self.__CFG_SQUONK2_PRODUCT_FLAVOUR: str = settings.SQUONK2_PRODUCT_FLAVOUR
+        self.__CFG_SQUONK2_SLUG: str = settings.SQUONK2_SLUG
+        self.__CFG_SQUONK2_ORG_OWNER: str = settings.SQUONK2_ORG_OWNER
+        self.__CFG_SQUONK2_ORG_OWNER_PASSWORD: str = settings.SQUONK2_ORG_OWNER_PASSWORD
+        self.__CFG_OIDC_AS_CLIENT_ID: str = settings.OIDC_AS_CLIENT_ID
+        self.__CFG_OIDC_DM_CLIENT_ID: str = settings.OIDC_DM_CLIENT_ID
+        self.__CFG_OIDC_KEYCLOAK_REALM: str = settings.OIDC_KEYCLOAK_REALM
 
         # Optional config (no '__CFG_' prefix)
-        self.__DUMMY_TARGET_TITLE: Optional[str] = os.environ.get('DUMMY_TARGET_TITLE')
-        self.__DUMMY_USER: Optional[str] = os.environ.get('DUMMY_USER')
-        self.__DUMMY_TAS: Optional[str] = os.environ.get('DUMMY_TAS')
-        self.__SQUONK2_VERIFY_CERTIFICATES: Optional[str] = os.environ.get(
-            'SQUONK2_VERIFY_CERTIFICATES'
-        )
+        self.__DUMMY_TARGET_TITLE: str = settings.DUMMY_TARGET_TITLE
+        self.__DUMMY_USER: str = settings.DUMMY_USER
+        self.__DUMMY_TAS: str = settings.DUMMY_TAS
+        self.__SQUONK2_VERIFY_CERTIFICATES: str = settings.SQUONK2_VERIFY_CERTIFICATES
 
         # The integer billing day, valid if greater than zero
         self.__unit_billing_day: int = 0
@@ -799,9 +776,9 @@ def configured(self) -> Squonk2AgentRv:
         # Is the slug too long?
         # Limited to 10 characters
         assert self.__CFG_SQUONK2_SLUG
-        if len(self.__CFG_SQUONK2_SLUG) > _MAX_SLUG_LENGTH:
+        if len(self.__CFG_SQUONK2_SLUG) > settings.SQUONK2_MAX_SLUG_LENGTH:
             msg = (
-                f'Slug is longer than {_MAX_SLUG_LENGTH} characters'
+                f'Slug is longer than {settings.SQUONK2_MAX_SLUG_LENGTH} characters'
                 f' ({self.__CFG_SQUONK2_SLUG})'
             )
             _LOGGER.error(msg)
diff --git a/viewer/views.py b/viewer/views.py
index 650945e1..b3e3562a 100644
--- a/viewer/views.py
+++ b/viewer/views.py
@@ -2487,7 +2487,7 @@ def get(self, *args, **kwargs):
         del args, kwargs
 
         logger.debug("+ ServiceServiceState.State.get called")
-        service_string = os.environ.get("ENABLE_SERVICE_STATUS", "")
+        service_string = settings.ENABLE_SERVICE_STATUS
         logger.debug("Service string: %s", service_string)
 
         services = [k for k in service_string.split(":") if k != ""]

From 62c04bd76f352a04bd3f68c87761d592d8f63622 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Feb 2024 16:29:39 +0100
Subject: [PATCH 14/47] build(deps): bump cryptography from 42.0.0 to 42.0.2
 (#533)

Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.0 to 42.0.2.
- [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst)
- [Commits](https://github.com/pyca/cryptography/compare/42.0.0...42.0.2)

---
updated-dependencies:
- dependency-name: cryptography
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 66 ++++++++++++++++++++++++++---------------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 64ceed5f..0fcf5b82 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -539,43 +539,43 @@ jinja2 = "*"
 
 [[package]]
 name = "cryptography"
-version = "42.0.0"
+version = "42.0.2"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cryptography-42.0.0-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:c640b0ef54138fde761ec99a6c7dc4ce05e80420262c20fa239e694ca371d434"},
-    {file = "cryptography-42.0.0-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:678cfa0d1e72ef41d48993a7be75a76b0725d29b820ff3cfd606a5b2b33fda01"},
-    {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:146e971e92a6dd042214b537a726c9750496128453146ab0ee8971a0299dc9bd"},
-    {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:87086eae86a700307b544625e3ba11cc600c3c0ef8ab97b0fda0705d6db3d4e3"},
-    {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:0a68bfcf57a6887818307600c3c0ebc3f62fbb6ccad2240aa21887cda1f8df1b"},
-    {file = "cryptography-42.0.0-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:5a217bca51f3b91971400890905a9323ad805838ca3fa1e202a01844f485ee87"},
-    {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:ca20550bb590db16223eb9ccc5852335b48b8f597e2f6f0878bbfd9e7314eb17"},
-    {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:33588310b5c886dfb87dba5f013b8d27df7ffd31dc753775342a1e5ab139e59d"},
-    {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:9515ea7f596c8092fdc9902627e51b23a75daa2c7815ed5aa8cf4f07469212ec"},
-    {file = "cryptography-42.0.0-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:35cf6ed4c38f054478a9df14f03c1169bb14bd98f0b1705751079b25e1cb58bc"},
-    {file = "cryptography-42.0.0-cp37-abi3-win32.whl", hash = "sha256:8814722cffcfd1fbd91edd9f3451b88a8f26a5fd41b28c1c9193949d1c689dc4"},
-    {file = "cryptography-42.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:a2a8d873667e4fd2f34aedab02ba500b824692c6542e017075a2efc38f60a4c0"},
-    {file = "cryptography-42.0.0-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:8fedec73d590fd30c4e3f0d0f4bc961aeca8390c72f3eaa1a0874d180e868ddf"},
-    {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:be41b0c7366e5549265adf2145135dca107718fa44b6e418dc7499cfff6b4689"},
-    {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3ca482ea80626048975360c8e62be3ceb0f11803180b73163acd24bf014133a0"},
-    {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:c58115384bdcfe9c7f644c72f10f6f42bed7cf59f7b52fe1bf7ae0a622b3a139"},
-    {file = "cryptography-42.0.0-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:56ce0c106d5c3fec1038c3cca3d55ac320a5be1b44bf15116732d0bc716979a2"},
-    {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:324721d93b998cb7367f1e6897370644751e5580ff9b370c0a50dc60a2003513"},
-    {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:d97aae66b7de41cdf5b12087b5509e4e9805ed6f562406dfcf60e8481a9a28f8"},
-    {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:85f759ed59ffd1d0baad296e72780aa62ff8a71f94dc1ab340386a1207d0ea81"},
-    {file = "cryptography-42.0.0-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:206aaf42e031b93f86ad60f9f5d9da1b09164f25488238ac1dc488334eb5e221"},
-    {file = "cryptography-42.0.0-cp39-abi3-win32.whl", hash = "sha256:74f18a4c8ca04134d2052a140322002fef535c99cdbc2a6afc18a8024d5c9d5b"},
-    {file = "cryptography-42.0.0-cp39-abi3-win_amd64.whl", hash = "sha256:14e4b909373bc5bf1095311fa0f7fcabf2d1a160ca13f1e9e467be1ac4cbdf94"},
-    {file = "cryptography-42.0.0-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:3005166a39b70c8b94455fdbe78d87a444da31ff70de3331cdec2c568cf25b7e"},
-    {file = "cryptography-42.0.0-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:be14b31eb3a293fc6e6aa2807c8a3224c71426f7c4e3639ccf1a2f3ffd6df8c3"},
-    {file = "cryptography-42.0.0-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:bd7cf7a8d9f34cc67220f1195884151426ce616fdc8285df9054bfa10135925f"},
-    {file = "cryptography-42.0.0-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:c310767268d88803b653fffe6d6f2f17bb9d49ffceb8d70aed50ad45ea49ab08"},
-    {file = "cryptography-42.0.0-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:bdce70e562c69bb089523e75ef1d9625b7417c6297a76ac27b1b8b1eb51b7d0f"},
-    {file = "cryptography-42.0.0-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:e9326ca78111e4c645f7e49cbce4ed2f3f85e17b61a563328c85a5208cf34440"},
-    {file = "cryptography-42.0.0-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:69fd009a325cad6fbfd5b04c711a4da563c6c4854fc4c9544bff3088387c77c0"},
-    {file = "cryptography-42.0.0-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:988b738f56c665366b1e4bfd9045c3efae89ee366ca3839cd5af53eaa1401bce"},
-    {file = "cryptography-42.0.0.tar.gz", hash = "sha256:6cf9b76d6e93c62114bd19485e5cb003115c134cf9ce91f8ac924c44f8c8c3f4"},
+    {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be"},
+    {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d"},
+    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4"},
+    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2"},
+    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529"},
+    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1"},
+    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1"},
+    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929"},
+    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9"},
+    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2"},
+    {file = "cryptography-42.0.2-cp37-abi3-win32.whl", hash = "sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee"},
+    {file = "cryptography-42.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee"},
+    {file = "cryptography-42.0.2-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242"},
+    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a"},
+    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446"},
+    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90"},
+    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3"},
+    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589"},
+    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a"},
+    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea"},
+    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33"},
+    {file = "cryptography-42.0.2-cp39-abi3-win32.whl", hash = "sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635"},
+    {file = "cryptography-42.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6"},
+    {file = "cryptography-42.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380"},
+    {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6"},
+    {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2"},
+    {file = "cryptography-42.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f"},
+    {file = "cryptography-42.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008"},
+    {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12"},
+    {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a"},
+    {file = "cryptography-42.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65"},
+    {file = "cryptography-42.0.2.tar.gz", hash = "sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888"},
 ]
 
 [package.dependencies]

From 4b509eb70f52826157235f95562cc0a483435564 Mon Sep 17 00:00:00 2001
From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com>
Date: Mon, 19 Feb 2024 18:00:58 +0100
Subject: [PATCH 15/47] docs: Updates documentation (#536)

Co-authored-by: Alan Christie <alan.christie@matildapeak.com>
---
 README.md              | 14 +++++++++++--
 fragalysis/settings.py | 47 +++++++++++++++++++++++++++++++++++-------
 2 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 0b417e5e..af572d65 100644
--- a/README.md
+++ b/README.md
@@ -66,13 +66,11 @@ installs/updates new packages to local venv. It's equivalent to running
 `poetry lock && poetry install`, so if you're not interested in local environment and
 just want to update the lockfile, you can run just `poetry lock`.
 
-
 ## Building and running (local)
 The backend is a Docker container image and can be build and deployed locally using `docker-compose`: -
 
     docker-compose build
 
-
 To run the application (which wil include deployment of the postgres and neo4j databases)
 run: -
 
@@ -181,6 +179,18 @@ at `/code/logs`.
 >   For local development using the `docker-compose.yml` file you'll find the logs
     at `./data/logs/backend.log`.
 
+## Configuration (environment variables)
+The backend configuration is controlled by a number of environment variables.
+Variables are typically defined in the project's `fragalysis/settings.py`, where you
+will also find **ALL** the dynamically configured variables (those that can be changed
+using *environment variables* in the deployed Pod/Container).
+
+- Not all variables are dynamic. For example `ALLOWED_HOSTS` is a static variable
+  that is set in the `settings.py` file and is not intended to be changed at run-time.
+
+Refer to the documentation in the `settings.py` file to understand the environment
+and the style guide for new variables that you need to add.
+
 ## Database migrations
 The best approach is to spin-up the development backend (locally) using
 `docker-compose` with the custom *migration* compose file and then shell into Django.
diff --git a/fragalysis/settings.py b/fragalysis/settings.py
index 3aa3b58a..c7b47cc4 100644
--- a/fragalysis/settings.py
+++ b/fragalysis/settings.py
@@ -6,9 +6,11 @@
 # that control the stack's configuration (behaviour).
 #
 # Not all settings are configured by environment variable. Some are hard-coded
-# and you'll need to edit their values here. Those that are configurable at run-time
-# should be obvious (i.e. they'll use "os.environ.get()" to obtain their value)
-# alternative run-time value.
+# and you'll need to edit their values here. For example `ALLOWED_HOSTS`
+# is a static variable that is not intended to be changed at run-time.
+#
+# Those that are configurable at run-time should be obvious
+# (i.e. they'll use "os.environ.get()" to obtain their value) alternative run-time value.
 #
 # You will find the django-related configuration at the top of the file
 # (under DJANGO SETTINGS) and the fragalysis-specific configuration at the bottom of
@@ -22,7 +24,11 @@
 #
 # 2.    The constant used to hold the environment variable *SHOULD* match the
 #       environment variable's name. i.e. the "DEPLOYMENT_MODE" environment variable's
-#       value *SHOULD* be found in 'settings.DEPLOYMENT_MODE'.
+#       value *SHOULD* be found in 'settings.DEPLOYMENT_MODE' variable.
+#
+# 3.    In the FRAGALYSIS section, document the variable's purpose and the values
+#       it can take in the comments. If there are dependencies or "gotchas"
+#       (i.e. changing its value after deployment) then these should be documented.
 #
 # Providing run-time values for variables: -
 #
@@ -45,7 +51,7 @@
 #
 # IMPORTANTLY: For a description of an environment variable (setting) and its value
 #              you *MUST* consult the comments in this file ("settings.py"), and *NOT*
-#              the Ansible playbook. This file is the primary authority for the
+#              the Ansible playbook. "settings.py" is the primary authority for the
 #              configuration of the Fragalysis Stack.
 #
 # Ansible variables are declared in "roles/fragalysis-stack/defaults/main.yaml"
@@ -472,6 +478,8 @@
 # dedicated Discourse server.
 DISCOURSE_DEV_POST_SUFFIX: str = os.environ.get("DISCOURSE_DEV_POST_SUFFIX", "")
 
+# Some Squonk2 developer/debug variables.
+# Unused in production.
 DUMMY_TARGET_TITLE: str = os.environ.get("DUMMY_TARGET_TITLE", "")
 DUMMY_USER: str = os.environ.get("DUMMY_USER", "")
 DUMMY_TAS: str = os.environ.get("DUMMY_TAS", "")
@@ -505,6 +513,9 @@
     "NEO4J_BOLT_URL", "bolt://neo4j:test@neo4j:7687"
 )
 
+# The graph (neo4j) database settings.
+# The query provides the graph endpoint, typically a service in a kubernetes namespace
+# like 'graph.graph-a.svc' and the 'auth' provides the graph username and password.
 NEO4J_QUERY: str = os.environ.get("NEO4J_QUERY", "neo4j")
 NEO4J_AUTH: str = os.environ.get("NEO4J_AUTH", "neo4j/neo4j")
 
@@ -532,27 +543,46 @@
 
 # An SSH host.
 # Used in the security module in conjunction with ISPyB settings.
-# Any SSH_PRIVATE_KEY_FILENAME value will be used in preference to SSH_PASSWORD.
+# The SSH_PRIVATE_KEY_FILENAME value will be used if there is no SSH_PASSWORD.
 SSH_HOST: str = os.environ.get("SSH_HOST", "")
 SSH_USER: str = os.environ.get("SSH_USER", "")
 SSH_PASSWORD: str = os.environ.get("SSH_PASSWORD", "")
 SSH_PRIVATE_KEY_FILENAME: str = os.environ.get("SSH_PRIVATE_KEY_FILENAME", "")
 
-# A slug used for names this Fragalysis will create
+# The maximum length of the 'slug' used for names this Fragalysis will create.
+#
+# Squonk2 variables are generally used by the 'squonk2_agent.py' module
+# in the 'viewer' package.
 SQUONK2_MAX_SLUG_LENGTH: int = 10
 
+# Where the Squonk2 logic places its files in Job containers.
 SQUONK2_MEDIA_DIRECTORY: str = "fragalysis-files"
+# The Squonk2 DataManger UI endpoint to obtain Job Instance information.
 SQUONK2_INSTANCE_API: str = "data-manager-ui/results/instance/"
 
+# The URL for the Squonk2 Account Server API.
 SQUONK2_ASAPI_URL: str = os.environ.get("SQUONK2_ASAPI_URL", "")
+# The URL for the Squonk2 Data Manaqer API.
 SQUONK2_DMAPI_URL: str = os.environ.get("SQUONK2_DMAPI_URL", "")
+# The URL for the Squonk2 User Interface.
 SQUONK2_UI_URL: str = os.environ.get("SQUONK2_UI_URL", "")
+# The pre-assigned Squonk2 Account Server Organisation for the stack.
+# This is created by an administrator of the Squonk2 service.
 SQUONK2_ORG_UUID: str = os.environ.get("SQUONK2_ORG_UUID", "")
+# The Account Server Unit billing day 9for all products (projects) that are created.
+# It's a day of the month (1..27).
 SQUONK2_UNIT_BILLING_DAY: str = os.environ.get("SQUONK2_UNIT_BILLING_DAY", "")
+# The Squonk2 Account Server product "flavour" created for Jobs (products/projects).
+# It's usually one of "GOLD", "SILVER" or "BRONZE".
 SQUONK2_PRODUCT_FLAVOUR: str = os.environ.get("SQUONK2_PRODUCT_FLAVOUR", "")
+# A short slug used when creating Squonk2 objects for this stack.
+# This must be unique across all stacks that share the same Squonk2 service.
 SQUONK2_SLUG: str = os.environ.get("SQUONK2_SLUG", "")[:SQUONK2_MAX_SLUG_LENGTH]
+# The pre-assigned Squonk2 Account Server Organisation owner and password.
+# This account is used to create Squonk2 objects for the stack.
 SQUONK2_ORG_OWNER: str = os.environ.get("SQUONK2_ORG_OWNER", "")
 SQUONK2_ORG_OWNER_PASSWORD: str = os.environ.get("SQUONK2_ORG_OWNER_PASSWORD", "")
+# Do we verify Squonk2 SSL certificates ("yes" or "no").
 SQUONK2_VERIFY_CERTIFICATES: str = os.environ.get("SQUONK2_VERIFY_CERTIFICATES", "")
 
 TARGET_LOADER_MEDIA_DIRECTORY: str = "target_loader_data"
@@ -568,6 +598,9 @@
     "Must begin 'lb' followed by 5 digits, optionally followed by a hyphen and a number.",
 )
 
+# Version variables.
+# These are set by the Dockerfile in the fragalysis-stack repository
+# and controlled by the CI process, i.e. they're not normally set by a a user.
 BE_NAMESPACE: str = os.environ.get("BE_NAMESPACE", "undefined")
 BE_IMAGE_TAG: str = os.environ.get("BE_IMAGE_TAG", "undefined")
 FE_NAMESPACE: str = os.environ.get("FE_NAMESPACE", "undefined")

From 51d9b352fb82abee3724a7807be57767efd9abd8 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 19 Feb 2024 18:01:21 +0100
Subject: [PATCH 16/47] build(deps): bump django from 3.2.20 to 3.2.24 (#535)

Bumps [django](https://github.com/django/django) from 3.2.20 to 3.2.24.
- [Commits](https://github.com/django/django/compare/3.2.20...3.2.24)

---
updated-dependencies:
- dependency-name: django
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 build-requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/build-requirements.txt b/build-requirements.txt
index 3a824035..22008213 100644
--- a/build-requirements.txt
+++ b/build-requirements.txt
@@ -8,7 +8,7 @@ pre-commit == 3.5.0
 poetry == 1.7.1
 
 # Matching main requirements...
-Django==3.2.20
+Django==3.2.24
 
 # Others
 httpie == 3.2.1

From 05ba0efeea40c425012254cff73d2657b49f5950 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Tue, 20 Feb 2024 09:31:14 +0000
Subject: [PATCH 17/47] fix: reverting wrong changes

---
 viewer/cset_upload.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py
index bb4e704f..9cbeed32 100644
--- a/viewer/cset_upload.py
+++ b/viewer/cset_upload.py
@@ -363,7 +363,7 @@ def set_mol(
         # Try to get the LHS SiteObservation,
         # This will be used to set the ComputedMolecule.site_observation_code.
         # This may fail.
-        lhs_property = 'ref_pdb'
+        lhs_property = 'lhs_pdb'
         lhs_so = self.get_site_observation(
             lhs_property,
             mol,

From ad39996e8e16ad584b1cce40376362a6a469b010 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Tue, 20 Feb 2024 09:43:36 +0000
Subject: [PATCH 18/47] fix: reverting wrong changes (#538)

---
 viewer/cset_upload.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py
index bb4e704f..9cbeed32 100644
--- a/viewer/cset_upload.py
+++ b/viewer/cset_upload.py
@@ -363,7 +363,7 @@ def set_mol(
         # Try to get the LHS SiteObservation,
         # This will be used to set the ComputedMolecule.site_observation_code.
         # This may fail.
-        lhs_property = 'ref_pdb'
+        lhs_property = 'lhs_pdb'
         lhs_so = self.get_site_observation(
             lhs_property,
             mol,

From 7521b7afb62d6eb7bb5f4d70730cd481d4202740 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 22 Feb 2024 12:14:22 +0000
Subject: [PATCH 19/47] stashing

---
 viewer/cset_upload.py                         | 205 +++++++++---------
 .../migrations/0044_computedmolecule_pdb.py   |  23 ++
 viewer/migrations/0045_auto_20240221_1203.py  |  34 +++
 viewer/models.py                              |  13 ++
 viewer/views.py                               |  22 +-
 5 files changed, 187 insertions(+), 110 deletions(-)
 create mode 100644 viewer/migrations/0044_computedmolecule_pdb.py
 create mode 100644 viewer/migrations/0045_auto_20240221_1203.py

diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py
index 9cbeed32..706b7832 100644
--- a/viewer/cset_upload.py
+++ b/viewer/cset_upload.py
@@ -2,9 +2,9 @@
 import datetime
 import logging
 import os
-import shutil
 import uuid
 import zipfile
+from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple
 
 from openpyxl.utils import get_column_letter
@@ -142,37 +142,45 @@ def __init__(
         self.zfile = zfile
         self.zfile_hashvals = zfile_hashvals
 
-    def process_pdb(self, pdb_code, target, zfile, zfile_hashvals) -> SiteObservation:
+    def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None:
         for key in zfile_hashvals.keys():
             if key == pdb_code:
                 pdb_code = f'{pdb_code}#{zfile_hashvals[pdb_code]}'
 
-        pdb_fp = zfile[pdb_code]
-        pdb_fn = zfile[pdb_code].split('/')[-1]
+        try:
+            pdb_fp = zfile[pdb_code]
+        except KeyError:
+            return None
 
-        new_filename = f'{settings.MEDIA_ROOT}pdbs/{pdb_fn}'
-        old_filename = settings.MEDIA_ROOT + pdb_fp
-        shutil.copy(old_filename, new_filename)
+        # ensure filename uniqueness
+        pdb_fn = '_'.join([zfile[pdb_code].split('/')[-1], uuid.uuid4().hex])
+        pdb_field = Path(settings.COMPUTED_SET_MEDIA_DIRECTORY).joinpath(pdb_fn)
 
-        # Create Protein object
-        target_obj = Target.objects.get(title=target)
-        # prot.target_id = target_obj
-        site_obvs, created = SiteObservation.objects.get_or_create(
-            code=pdb_code, target_id=target_obj
-        )
-        # prot.code = pdb_code
-        if created:
-            target_obj = Target.objects.get(title=target)
-            site_obvs.target_id = target_obj
-            site_obvs.pdb_info = f'pdbs/{pdb_fn}'
-            site_obvs.save()
+        new_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_field)
+        old_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_fp)
+        old_filename.rename(new_filename)
 
-        return site_obvs
+        return str(pdb_field)
+
+        # # Create Protein object
+        # target_obj = Target.objects.get(title=target)
+        # # prot.target_id = target_obj
+        # site_obvs, created = SiteObservation.objects.get_or_create(
+        #     code=pdb_code, target_id=target_obj
+        # )
+        # # prot.code = pdb_code
+        # if created:
+        #     target_obj = Target.objects.get(title=target)
+        #     site_obvs.target_id = target_obj
+        #     site_obvs.pdb_info = f'pdbs/{pdb_fn}'
+        #     site_obvs.save()
+
+        # return site_obvs
 
     # use zfile object for pdb files uploaded in zip
     def get_site_observation(
         self, property_name, mol, target, compound_set, zfile, zfile_hashvals
-    ) -> Optional[SiteObservation]:
+    ) -> SiteObservation | str | None:
         # Get a SiteObservation from the molecule using
         # a named property (i.e. lhs_pdb or ref_pdb for example)
 
@@ -187,61 +195,69 @@ def get_site_observation(
             return None
 
         pdb_fn = mol.GetProp(property_name).split('/')[-1]
-        site_obvs = None
 
         if zfile:
+            # pdb archive uploaded. referenced pdb file may or may not be included
             pdb_code = pdb_fn.replace('.pdb', '')
-            site_obvs = self.process_pdb(
+            pdb_file = self.process_pdb(
                 pdb_code=pdb_code,
-                target=target,
                 zfile=zfile,
                 zfile_hashvals=zfile_hashvals,
             )
-        else:
-            name = pdb_fn
-            try:
-                site_obvs = SiteObservation.objects.get(
-                    code__contains=name,
-                    experiment__experiment_upload__target__title=target,
+            if pdb_file:
+                return pdb_file
+            else:
+                logger.info(
+                    'No protein pdb (%s) found in zipfile',
+                    pdb_fn,
                 )
-            except SiteObservation.DoesNotExist:
-                # Initial SiteObservation lookup failed.
-                logger.warning(
-                    'Failed to get SiteObservation object (target=%s name=%s)',
-                    compound_set.target.title,
+
+        # pdb was not included, try to find the matching site observation
+        name = pdb_fn
+        site_obvs = None
+        try:
+            site_obvs = SiteObservation.objects.get(
+                code__contains=name,
+                experiment__experiment_upload__target__title=target,
+            )
+        except SiteObservation.DoesNotExist:
+            # Initial SiteObservation lookup failed.
+            logger.warning(
+                'Failed to get SiteObservation object (target=%s name=%s)',
+                compound_set.target.title,
+                name,
+            )
+            # Try alternatives.
+            # If all else fails then the site_obvs will be 'None'
+            qs = SiteObservation.objects.filter(
+                code__contains=name,
+                experiment__experiment_upload__target__title=target,
+            )
+            if qs.exists():
+                logger.info(
+                    'Found SiteObservation containing name=%s qs=%s',
                     name,
+                    qs,
                 )
-                # Try alternatives.
-                # If all else fails then the site_obvs will be 'None'
+            else:
+                alt_name = name.split(':')[0].split('_')[0]
                 qs = SiteObservation.objects.filter(
-                    code__contains=name,
+                    code__contains=alt_name,
                     experiment__experiment_upload__target__title=target,
                 )
                 if qs.exists():
                     logger.info(
-                        'Found SiteObservation containing name=%s qs=%s',
-                        name,
+                        'Found SiteObservation containing alternative name=%s qs=%s',
+                        alt_name,
                         qs,
                     )
-                else:
-                    alt_name = name.split(':')[0].split('_')[0]
-                    qs = SiteObservation.objects.filter(
-                        code__contains=alt_name,
-                        experiment__experiment_upload__target__title=target,
-                    )
-                    if qs.exists():
-                        logger.info(
-                            'Found SiteObservation containing alternative name=%s qs=%s',
-                            alt_name,
-                            qs,
-                        )
-                if qs.count() > 0:
-                    logger.debug(
-                        'Found alternative (target=%s name=%s)',
-                        compound_set.target.title,
-                        name,
-                    )
-                    site_obvs = qs[0]
+            if qs.count() > 0:
+                logger.debug(
+                    'Found alternative (target=%s name=%s)',
+                    compound_set.target.title,
+                    name,
+                )
+                site_obvs = qs[0]
 
         if not site_obvs:
             logger.warning(
@@ -360,31 +376,10 @@ def set_mol(
 
             insp_frags.append(ref)
 
-        # Try to get the LHS SiteObservation,
-        # This will be used to set the ComputedMolecule.site_observation_code.
-        # This may fail.
-        lhs_property = 'lhs_pdb'
-        lhs_so = self.get_site_observation(
-            lhs_property,
-            mol,
-            target,
-            compound_set,
-            zfile,
-            zfile_hashvals=zfile_hashvals,
-        )
-        if not lhs_so:
-            logger.warning(
-                'Failed to get a LHS SiteObservation (%s) for %s, %s, %s',
-                lhs_property,
-                mol,
-                target,
-                compound_set,
-            )
-
-        # Try to get the reference SiteObservation,
-        # This will be used to set the ComputedMolecule.reference_code.
-        # This may fail.
         ref_property = 'ref_pdb'
+        # data in ref ref_pdb field may be one of 2 things:
+        # - siteobservation's short code (code field)
+        # - pdb file in uploaded zipfile
         ref_so = self.get_site_observation(
             ref_property,
             mol,
@@ -404,12 +399,12 @@ def set_mol(
 
         # A LHS or Reference protein must be provided.
         # (Part of "Fix behaviour of RHS [P] button - also RHS upload change", issue #1249)
-        if not lhs_so and not ref_so:
-            logger.error(
-                'ComputedMolecule has no LHS (%s) or Reference (%s) property',
-                lhs_property,
-                ref_property,
-            )
+        # if not lhs_so and not ref_so:
+        #     logger.error(
+        #         'ComputedMolecule has no LHS (%s) or Reference (%s) property',
+        #         lhs_property,
+        #         ref_property,
+        #     )
 
         # Need a ComputedMolecule before saving.
         # Check if anything exists already...
@@ -433,15 +428,27 @@ def set_mol(
             logger.info('Creating new ComputedMolecule')
             computed_molecule = ComputedMolecule()
 
+        if isinstance(ref_so, SiteObservation):
+            code = ref_so.code
+            pdb_info = ref_so.experiment.pdb_info
+            lhs_so = ref_so
+        else:
+            code = None
+            pdb_info = ref_so
+            lhs_so = None
+
         assert computed_molecule
         computed_molecule.compound = compound
         computed_molecule.computed_set = compound_set
         computed_molecule.sdf_info = Chem.MolToMolBlock(mol)
-        computed_molecule.site_observation_code = lhs_so.code if lhs_so else None
-        computed_molecule.reference_code = ref_so.code if ref_so else None
+        computed_molecule.site_observation_code = code
+        computed_molecule.reference_code = code
         computed_molecule.molecule_name = molecule_name
         computed_molecule.name = f"{target}-{computed_molecule.identifier}"
         computed_molecule.smiles = smiles
+        computed_molecule.pdb = lhs_so
+        # TODO: this is wrong
+        computed_molecule.pdb_info = pdb_info
         # Extract possible reference URL and Rationale
         # URLs have to be valid URLs and rationals must contain more than one word
         ref_url: Optional[str] = (
@@ -591,6 +598,14 @@ def task(self) -> ComputedSet:
             assert settings.AUTHENTICATE_UPLOAD is False
         computed_set.save()
 
+        # check compound set folder exists.
+        cmp_set_folder = os.path.join(
+            settings.MEDIA_ROOT, settings.COMPUTED_SET_MEDIA_DIRECTORY
+        )
+        if not os.path.isdir(cmp_set_folder):
+            logger.info('Making ComputedSet folder (%s)', cmp_set_folder)
+            os.mkdir(cmp_set_folder)
+
         # Set descriptions in return for the Molecules.
         # This also sets the submitter and method URL properties of the computed set
         # while also saving it.
@@ -611,14 +626,6 @@ def task(self) -> ComputedSet:
                 self.zfile_hashvals,
             )
 
-        # check compound set folder exists.
-        cmp_set_folder = os.path.join(
-            settings.MEDIA_ROOT, settings.COMPUTED_SET_MEDIA_DIRECTORY
-        )
-        if not os.path.isdir(cmp_set_folder):
-            logger.info('Making ComputedSet folder (%s)', cmp_set_folder)
-            os.mkdir(cmp_set_folder)
-
         # move and save the compound set
         new_filename = f'{settings.MEDIA_ROOT}{settings.COMPUTED_SET_MEDIA_DIRECTORY}/{computed_set.name}.sdf'
         os.rename(sdf_filename, new_filename)
diff --git a/viewer/migrations/0044_computedmolecule_pdb.py b/viewer/migrations/0044_computedmolecule_pdb.py
new file mode 100644
index 00000000..e5b20906
--- /dev/null
+++ b/viewer/migrations/0044_computedmolecule_pdb.py
@@ -0,0 +1,23 @@
+# Generated by Django 3.2.23 on 2024-02-20 15:09
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('viewer', '0043_experiment_prefix_tooltip'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='computedmolecule',
+            name='pdb',
+            field=models.ForeignKey(
+                null=True,
+                on_delete=django.db.models.deletion.PROTECT,
+                related_name='pdb',
+                to='viewer.siteobservation',
+            ),
+        ),
+    ]
diff --git a/viewer/migrations/0045_auto_20240221_1203.py b/viewer/migrations/0045_auto_20240221_1203.py
new file mode 100644
index 00000000..53ba541d
--- /dev/null
+++ b/viewer/migrations/0045_auto_20240221_1203.py
@@ -0,0 +1,34 @@
+# Generated by Django 3.2.23 on 2024-02-21 12:03
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('viewer', '0044_computedmolecule_pdb'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='computedmolecule',
+            name='pdb_info',
+            field=models.FileField(
+                help_text='Link to pdb file; user-uploaded pdb or pdb.experiment.pdb_info',
+                max_length=255,
+                null=True,
+                upload_to='computed_set_data/',
+            ),
+        ),
+        migrations.AlterField(
+            model_name='computedmolecule',
+            name='pdb',
+            field=models.ForeignKey(
+                help_text='SiteObservation object user referenced in upload (if given)',
+                null=True,
+                on_delete=django.db.models.deletion.PROTECT,
+                related_name='pdb',
+                to='viewer.siteobservation',
+            ),
+        ),
+    ]
diff --git a/viewer/models.py b/viewer/models.py
index c2b8af72..e10c58c6 100644
--- a/viewer/models.py
+++ b/viewer/models.py
@@ -943,6 +943,19 @@ class ComputedMolecule(models.Model):
         blank=True,
         help_text="An optional rationale for this molecule",
     )
+    pdb = models.ForeignKey(
+        SiteObservation,
+        related_name="pdb",
+        on_delete=models.PROTECT,
+        null=True,
+        help_text="SiteObservation object user referenced in upload (if given)",
+    )
+    pdb_info = models.FileField(
+        upload_to="computed_set_data/",
+        null=True,
+        max_length=255,
+        help_text="Link to pdb file; user-uploaded pdb or pdb.experiment.pdb_info",
+    )
 
     def __str__(self) -> str:
         return f"{self.smiles}"
diff --git a/viewer/views.py b/viewer/views.py
index b3e3562a..8da3c3d0 100644
--- a/viewer/views.py
+++ b/viewer/views.py
@@ -490,17 +490,17 @@ def post(self, request):
             tmp_pdb_file = None
             tmp_sdf_file = None
             if 'pdb_zip' in list(request.FILES.keys()):
-                # In the first stage (green release) of the XCA-based Fragalysis Stack
-                # we do not support PDB files.
-                request.session[
-                    _SESSION_ERROR
-                ] = 'This release does not support the inclusion of PDB file.'
-                logger.warning(
-                    '- UploadCSet POST error_msg="%s"', request.session[_SESSION_ERROR]
-                )
-                return redirect('viewer:upload_cset')
-            #                pdb_file = request.FILES['pdb_zip']
-            #                tmp_pdb_file = save_tmp_file(pdb_file)
+                #     # In the first stage (green release) of the XCA-based Fragalysis Stack
+                #     # we do not support PDB files.
+                #     request.session[
+                #         _SESSION_ERROR
+                #     ] = 'This release does not support the inclusion of PDB file.'
+                #     logger.warning(
+                #         '- UploadCSet POST error_msg="%s"', request.session[_SESSION_ERROR]
+                #     )
+                #     return redirect('viewer:upload_cset')
+                pdb_file = request.FILES['pdb_zip']
+                tmp_pdb_file = save_tmp_file(pdb_file)
             if sdf_file:
                 tmp_sdf_file = save_tmp_file(sdf_file)
 

From d8d4e06ccc0b34bc15b22a2698537168577a87c8 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 22 Feb 2024 14:34:36 +0000
Subject: [PATCH 20/47] add site observation's ligand sdf to aligned_files

---
 viewer/download_structures.py | 97 +++++++++--------------------------
 1 file changed, 25 insertions(+), 72 deletions(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 59e7ceb9..1fd31bb0 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -20,7 +20,7 @@
 import pandoc
 from django.conf import settings
 
-from viewer.models import DownloadLinks
+from viewer.models import DownloadLinks, SiteObservation
 from viewer.utils import clean_filename
 
 logger = logging.getLogger(__name__)
@@ -55,6 +55,7 @@
 class ArchiveFile:
     path: str
     archive_path: str
+    site_observation: SiteObservation | None = None
 
 
 # Dictionary containing all references needed to create the zip file
@@ -81,62 +82,9 @@ class ArchiveFile:
 }
 
 
-# A directory, relative to the media directory,
-# where missing SD files are written.
-# The SD files are constructed from the molecule 'sdf_info' field
-# (essentially MOL-file text) when the 'sdf_file' field is blank.
-_MISSING_SDF_DIRECTORY = 'missing_sdfs'
-_MISSING_SDF_PATH = os.path.join(settings.MEDIA_ROOT, _MISSING_SDF_DIRECTORY)
-
 _ERROR_FILE = 'errors.csv'
 
 
-def _replace_missing_sdf(molecule, code):
-    """Creates a file in the 'missing SDFs' directory, using the protein code
-    provided. The file is constructed using the molecule's sdf_info field, skipping the
-    action if the file exists. The media-relative path of the written file is returned
-    (if it was written).
-
-    Files, once written, are left and are not removed (or replaced).
-    The directory serves an archive of missing SD files.
-
-    This was added for FE/915 to generate SD files for those that are missing
-    from the upload directory.
-    """
-    if not os.path.isdir(_MISSING_SDF_PATH):
-        os.mkdir(_MISSING_SDF_PATH)
-
-    # We shouldn't be called if molecule['sdf_info'] is blank.
-    # but check anyway.
-    sdf_info = molecule.ligand_mol_file
-    if not sdf_info:
-        return None
-    sdf_lines = sdf_info.splitlines(True)[1:]
-    if not sdf_lines:
-        return None
-    # Make sure last line ends with a new-line
-    if not sdf_lines[-1].endswith('\n'):
-        sdf_lines[-1] += '\n'
-
-    # media-relative path to missing file...
-    missing_file = os.path.join(_MISSING_SDF_DIRECTORY, f'{code}.sdf')
-    # absolute path to missing file...
-    missing_path = os.path.join(settings.MEDIA_ROOT, missing_file)
-    # create the file if it doesn't exist...
-    if not os.path.isfile(missing_path):
-        # No file - create one.
-        with open(missing_path, 'w', encoding='utf-8') as sd_file:
-            # First line is the protein code, i.e. "PGN_RS02895PGA-x0346_0B"
-            sd_file.write(f'{code}\n')
-            # Now write the lines from the molecule sdf_info record
-            sd_file.writelines(sdf_lines)
-            # And append file terminator...
-            sd_file.write('$$$$\n')
-
-    # Returns the media-relative path to the file in the missing file directory
-    return missing_file
-
-
 def _add_file_to_zip(ziparchive, param, filepath):
     """Add the requested file to the zip archive.
 
@@ -256,6 +204,11 @@ def _add_file_to_zip_aligned(ziparchive, code, archive_file):
             # Copy the file without modification
             ziparchive.write(filepath, archive_file.archive_path)
         return True
+    elif archive_file.site_observation:
+        # NB! this bypasses _read_and_patch_molecule_name. problem?
+        ziparchive.writestr(
+            archive_file.archive_path, archive_file.site_observation.ligand_mol_file
+        )
     else:
         logger.warning('filepath "%s" is not a file', filepath)
         _add_empty_file(ziparchive, archive_file.archive_path)
@@ -319,9 +272,9 @@ def _molecule_files_zip(zip_contents, ziparchive, combined_sdf_file, error_file)
     logger.info(
         'len(molecules.sd_files)=%s', len(zip_contents['molecules']['sdf_files'])
     )
-    for file, prot in zip_contents['molecules']['sdf_files'].items():
+    for archive_file, prot in zip_contents['molecules']['sdf_files'].items():
         # Do not try and process any missing SD files.
-        if not file:
+        if not archive_file:
             error_file.write(f'sdf_files,{prot},missing\n')
             mol_errors += 1
             continue
@@ -329,16 +282,16 @@ def _molecule_files_zip(zip_contents, ziparchive, combined_sdf_file, error_file)
         if zip_contents['molecules'][
             'sdf_info'
         ] is True and not _add_file_to_zip_aligned(
-            ziparchive, prot.split(":")[0], file
+            ziparchive, prot.split(":")[0], archive_file
         ):
-            error_file.write(f'sdf_info,{prot},{file.path}\n')
+            error_file.write(f'sdf_info,{prot},{archive_file.path}\n')
             mol_errors += 1
 
         # Append sdf file on the Molecule record to the combined_sdf_file.
         if zip_contents['molecules'][
             'single_sdf_file'
-        ] is True and not _add_file_to_sdf(combined_sdf_file, file):
-            error_file.write(f'single_sdf_file,{prot},{file.path}\n')
+        ] is True and not _add_file_to_sdf(combined_sdf_file, archive_file):
+            error_file.write(f'single_sdf_file,{prot},{archive_file.path}\n')
             mol_errors += 1
 
     return mol_errors
@@ -750,29 +703,29 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params):
         num_molecules_collected = 0
         num_missing_sd_files = 0
         for so in site_obvs:
-            rel_sd_file = None
             if so.ligand_mol_file:
                 # There is an SD file (normal)
                 # sdf info is now kept as text in db field
-                rel_sd_file = _replace_missing_sdf(so, so.code)
-            else:
-                # No file value (odd).
-                logger.warning(
-                    "SiteObservation record's 'ligand_mol_file' isn't set (%s)", so
+                archive_path = str(
+                    Path('aligned_files').joinpath(so.code).joinpath(f'{so.code}.sdf')
                 )
-                num_missing_sd_files += 1
-
-            if rel_sd_file:
-                logger.debug('rel_sd_file=%s code=%s', rel_sd_file, so.code)
+                # path is ignored when writing sdfs but mandatory field
                 zip_contents['molecules']['sdf_files'].update(
                     {
                         ArchiveFile(
-                            path=rel_sd_file,
-                            archive_path=rel_sd_file,
+                            path=archive_path,
+                            archive_path=archive_path,
+                            site_observation=so,
                         ): so.code
                     }
                 )
                 num_molecules_collected += 1
+            else:
+                # No file value (odd).
+                logger.warning(
+                    "SiteObservation record's 'ligand_mol_file' isn't set (%s)", so
+                )
+                num_missing_sd_files += 1
 
         # Report (in the log) anomalies
         if num_molecules_collected == 0:

From ad8cc40767e25b679181a40309cdaa3fc45a55f7 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 23 Feb 2024 13:50:06 +0000
Subject: [PATCH 21/47] fix: custom pdb now downloadable

---
 media_serve/urls.py   |  3 +++
 media_serve/views.py  | 26 ++++++++++++++++++++++----
 viewer/cset_upload.py | 25 +------------------------
 3 files changed, 26 insertions(+), 28 deletions(-)

diff --git a/media_serve/urls.py b/media_serve/urls.py
index 604b51ee..181b701a 100644
--- a/media_serve/urls.py
+++ b/media_serve/urls.py
@@ -11,5 +11,8 @@
     re_path(
         r"^target_loader_data/(?P<file_path>.+)", views.tld_download, name="get_tld"
     ),
+    re_path(
+        r"^computed_set_data/(?P<file_path>.+)", views.cspdb_download, name="get_cspdb"
+    ),
     re_path(r"^pdbs/(?P<file_path>.+)", views.file_download, name="get_file"),
 ]
diff --git a/media_serve/views.py b/media_serve/views.py
index 75923ce4..c44411fa 100644
--- a/media_serve/views.py
+++ b/media_serve/views.py
@@ -73,17 +73,35 @@ def tld_download(request, file_path):
     ispy_b_static.permission_string = (
         "experiment__experiment_upload__target__project_id"
     )
-    # ispy_b_static.field_name = "pdb_info"
     ispy_b_static.field_name = "apo_file"
     ispy_b_static.content_type = "application/x-pilot"
-    # ispy_b_static.prefix = "target_loader_data/48225dbf-204a-48e1-8ae7-f1632f4dba89/Mpro-v2/Mpro/upload_2/aligned_files/Mpro_Nterm-x0029/"
-    # ispy_b_static.prefix = "target_loader_data"
-    # ispy_b_static.prefix = "/target_loader_data/"
     ispy_b_static.prefix = "/target_loader_data/"
     ispy_b_static.input_string = file_path
     return ispy_b_static.get_response()
 
 
+def cspdb_download(request, file_path):
+    """
+    Download a protein by nginx redirect
+    :param request: the initial request
+    :param file_path: the file path we're getting from the static
+    :return: the response (a redirect to nginx internal)
+    """
+    logger.info("+ Received cspdb_download file path: %s", file_path)
+    ispy_b_static = ISpyBSafeStaticFiles2()
+    ispy_b_static.model = SiteObservation
+    ispy_b_static.request = request
+    # the following 2 aren't used atm
+    ispy_b_static.permission_string = (
+        "experiment__experiment_upload__target__project_id"
+    )
+    ispy_b_static.field_name = "apo_file"
+    ispy_b_static.content_type = "application/x-pilot"
+    ispy_b_static.prefix = "/computed_set_data/"
+    ispy_b_static.input_string = file_path
+    return ispy_b_static.get_response()
+
+
 def bound_download(request, file_path):
     """
     Download a protein by nginx redirect
diff --git a/viewer/cset_upload.py b/viewer/cset_upload.py
index 706b7832..0ccf7555 100644
--- a/viewer/cset_upload.py
+++ b/viewer/cset_upload.py
@@ -159,24 +159,10 @@ def process_pdb(self, pdb_code, zfile, zfile_hashvals) -> str | None:
         new_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_field)
         old_filename = Path(settings.MEDIA_ROOT).joinpath(pdb_fp)
         old_filename.rename(new_filename)
+        os.chmod(new_filename, 0o755)
 
         return str(pdb_field)
 
-        # # Create Protein object
-        # target_obj = Target.objects.get(title=target)
-        # # prot.target_id = target_obj
-        # site_obvs, created = SiteObservation.objects.get_or_create(
-        #     code=pdb_code, target_id=target_obj
-        # )
-        # # prot.code = pdb_code
-        # if created:
-        #     target_obj = Target.objects.get(title=target)
-        #     site_obvs.target_id = target_obj
-        #     site_obvs.pdb_info = f'pdbs/{pdb_fn}'
-        #     site_obvs.save()
-
-        # return site_obvs
-
     # use zfile object for pdb files uploaded in zip
     def get_site_observation(
         self, property_name, mol, target, compound_set, zfile, zfile_hashvals
@@ -397,15 +383,6 @@ def set_mol(
                 compound_set,
             )
 
-        # A LHS or Reference protein must be provided.
-        # (Part of "Fix behaviour of RHS [P] button - also RHS upload change", issue #1249)
-        # if not lhs_so and not ref_so:
-        #     logger.error(
-        #         'ComputedMolecule has no LHS (%s) or Reference (%s) property',
-        #         lhs_property,
-        #         ref_property,
-        #     )
-
         # Need a ComputedMolecule before saving.
         # Check if anything exists already...
         existing_computed_molecules = ComputedMolecule.objects.filter(

From 27d6ff5e9a19cb2b4c8d7f4989b8a22704b01562 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Tue, 27 Feb 2024 15:31:34 +0000
Subject: [PATCH 22/47] fix: increased loglevel to error on unexpected
 exceptions block

---
 viewer/target_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index edad072e..b91e64e9 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1957,7 +1957,7 @@ def load_target(
             # These are errors processing the data, which we handle gracefully.
             # The task should _always_ end successfully.
             # Any problem with the underlying data is transmitted in the report.
-            logger.debug(exc, exc_info=True)
+            logger.error(exc, exc_info=True)
             target_loader.report.final(
                 f"Failed to process '{target_loader.data_bundle}'", success=False
             )

From c130a12e89f003b9894febb3f1e0de6213271f84 Mon Sep 17 00:00:00 2001
From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com>
Date: Thu, 29 Feb 2024 11:31:47 +0100
Subject: [PATCH 23/47] fix: Discourse service check now checks API key before
 creating a service (#544)

Co-authored-by: Alan Christie <alan.christie@matildapeak.com>
---
 viewer/services.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/viewer/services.py b/viewer/services.py
index 77417143..4e208f57 100644
--- a/viewer/services.py
+++ b/viewer/services.py
@@ -166,6 +166,9 @@ def discourse(func_id, name, key=None, url=None, user=None) -> bool:
     del func_id, name
 
     logger.debug("+ discourse")
+    # Discourse is "unconfigured" if there is no API key
+    if not settings.DISCOURSE_API_KEY:
+        return False
     client = DiscourseClient(
         os.environ.get(url, None),
         api_username=os.environ.get(user, None),
@@ -206,6 +209,7 @@ def keycloak(func_id, name, url=None, secret=None) -> bool:
     del func_id, name, secret
 
     logger.debug("+ keycloak")
+    # Keycloak is "unconfigured" if there is no realm URL
     keycloak_realm = os.environ.get(url, None)
     if not keycloak_realm:
         return False

From 4e8db0baa96ba62d3ce96eb247013483e3e14241 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Thu, 29 Feb 2024 11:40:30 +0100
Subject: [PATCH 24/47] build(deps): bump cryptography from 42.0.2 to 42.0.4
 (#539)

Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.2 to 42.0.4.
- [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst)
- [Commits](https://github.com/pyca/cryptography/compare/42.0.2...42.0.4)

---
updated-dependencies:
- dependency-name: cryptography
  dependency-type: indirect
...

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
---
 poetry.lock | 66 ++++++++++++++++++++++++++---------------------------
 1 file changed, 33 insertions(+), 33 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index 0fcf5b82..306d4ce0 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -539,43 +539,43 @@ jinja2 = "*"
 
 [[package]]
 name = "cryptography"
-version = "42.0.2"
+version = "42.0.4"
 description = "cryptography is a package which provides cryptographic recipes and primitives to Python developers."
 optional = false
 python-versions = ">=3.7"
 files = [
-    {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:701171f825dcab90969596ce2af253143b93b08f1a716d4b2a9d2db5084ef7be"},
-    {file = "cryptography-42.0.2-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:61321672b3ac7aade25c40449ccedbc6db72c7f5f0fdf34def5e2f8b51ca530d"},
-    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:ea2c3ffb662fec8bbbfce5602e2c159ff097a4631d96235fcf0fb00e59e3ece4"},
-    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:3b15c678f27d66d247132cbf13df2f75255627bcc9b6a570f7d2fd08e8c081d2"},
-    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:8e88bb9eafbf6a4014d55fb222e7360eef53e613215085e65a13290577394529"},
-    {file = "cryptography-42.0.2-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:a047682d324ba56e61b7ea7c7299d51e61fd3bca7dad2ccc39b72bd0118d60a1"},
-    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:36d4b7c4be6411f58f60d9ce555a73df8406d484ba12a63549c88bd64f7967f1"},
-    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:a00aee5d1b6c20620161984f8ab2ab69134466c51f58c052c11b076715e72929"},
-    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:b97fe7d7991c25e6a31e5d5e795986b18fbbb3107b873d5f3ae6dc9a103278e9"},
-    {file = "cryptography-42.0.2-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:5fa82a26f92871eca593b53359c12ad7949772462f887c35edaf36f87953c0e2"},
-    {file = "cryptography-42.0.2-cp37-abi3-win32.whl", hash = "sha256:4b063d3413f853e056161eb0c7724822a9740ad3caa24b8424d776cebf98e7ee"},
-    {file = "cryptography-42.0.2-cp37-abi3-win_amd64.whl", hash = "sha256:841ec8af7a8491ac76ec5a9522226e287187a3107e12b7d686ad354bb78facee"},
-    {file = "cryptography-42.0.2-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:55d1580e2d7e17f45d19d3b12098e352f3a37fe86d380bf45846ef257054b242"},
-    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:28cb2c41f131a5758d6ba6a0504150d644054fd9f3203a1e8e8d7ac3aea7f73a"},
-    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:b9097a208875fc7bbeb1286d0125d90bdfed961f61f214d3f5be62cd4ed8a446"},
-    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:44c95c0e96b3cb628e8452ec060413a49002a247b2b9938989e23a2c8291fc90"},
-    {file = "cryptography-42.0.2-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:2f9f14185962e6a04ab32d1abe34eae8a9001569ee4edb64d2304bf0d65c53f3"},
-    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:09a77e5b2e8ca732a19a90c5bca2d124621a1edb5438c5daa2d2738bfeb02589"},
-    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:ad28cff53f60d99a928dfcf1e861e0b2ceb2bc1f08a074fdd601b314e1cc9e0a"},
-    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:130c0f77022b2b9c99d8cebcdd834d81705f61c68e91ddd614ce74c657f8b3ea"},
-    {file = "cryptography-42.0.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:fa3dec4ba8fb6e662770b74f62f1a0c7d4e37e25b58b2bf2c1be4c95372b4a33"},
-    {file = "cryptography-42.0.2-cp39-abi3-win32.whl", hash = "sha256:3dbd37e14ce795b4af61b89b037d4bc157f2cb23e676fa16932185a04dfbf635"},
-    {file = "cryptography-42.0.2-cp39-abi3-win_amd64.whl", hash = "sha256:8a06641fb07d4e8f6c7dda4fc3f8871d327803ab6542e33831c7ccfdcb4d0ad6"},
-    {file = "cryptography-42.0.2-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:087887e55e0b9c8724cf05361357875adb5c20dec27e5816b653492980d20380"},
-    {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:a7ef8dd0bf2e1d0a27042b231a3baac6883cdd5557036f5e8df7139255feaac6"},
-    {file = "cryptography-42.0.2-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4383b47f45b14459cab66048d384614019965ba6c1a1a141f11b5a551cace1b2"},
-    {file = "cryptography-42.0.2-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:fbeb725c9dc799a574518109336acccaf1303c30d45c075c665c0793c2f79a7f"},
-    {file = "cryptography-42.0.2-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:320948ab49883557a256eab46149df79435a22d2fefd6a66fe6946f1b9d9d008"},
-    {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:5ef9bc3d046ce83c4bbf4c25e1e0547b9c441c01d30922d812e887dc5f125c12"},
-    {file = "cryptography-42.0.2-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:52ed9ebf8ac602385126c9a2fe951db36f2cb0c2538d22971487f89d0de4065a"},
-    {file = "cryptography-42.0.2-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:141e2aa5ba100d3788c0ad7919b288f89d1fe015878b9659b307c9ef867d3a65"},
-    {file = "cryptography-42.0.2.tar.gz", hash = "sha256:e0ec52ba3c7f1b7d813cd52649a5b3ef1fc0d433219dc8c93827c57eab6cf888"},
+    {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_universal2.whl", hash = "sha256:ffc73996c4fca3d2b6c1c8c12bfd3ad00def8621da24f547626bf06441400449"},
+    {file = "cryptography-42.0.4-cp37-abi3-macosx_10_12_x86_64.whl", hash = "sha256:db4b65b02f59035037fde0998974d84244a64c3265bdef32a827ab9b63d61b18"},
+    {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:dad9c385ba8ee025bb0d856714f71d7840020fe176ae0229de618f14dae7a6e2"},
+    {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:69b22ab6506a3fe483d67d1ed878e1602bdd5912a134e6202c1ec672233241c1"},
+    {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:e09469a2cec88fb7b078e16d4adec594414397e8879a4341c6ace96013463d5b"},
+    {file = "cryptography-42.0.4-cp37-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:3e970a2119507d0b104f0a8e281521ad28fc26f2820687b3436b8c9a5fcf20d1"},
+    {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:e53dc41cda40b248ebc40b83b31516487f7db95ab8ceac1f042626bc43a2f992"},
+    {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:c3a5cbc620e1e17009f30dd34cb0d85c987afd21c41a74352d1719be33380885"},
+    {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:6bfadd884e7280df24d26f2186e4e07556a05d37393b0f220a840b083dc6a824"},
+    {file = "cryptography-42.0.4-cp37-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:01911714117642a3f1792c7f376db572aadadbafcd8d75bb527166009c9f1d1b"},
+    {file = "cryptography-42.0.4-cp37-abi3-win32.whl", hash = "sha256:fb0cef872d8193e487fc6bdb08559c3aa41b659a7d9be48b2e10747f47863925"},
+    {file = "cryptography-42.0.4-cp37-abi3-win_amd64.whl", hash = "sha256:c1f25b252d2c87088abc8bbc4f1ecbf7c919e05508a7e8628e6875c40bc70923"},
+    {file = "cryptography-42.0.4-cp39-abi3-macosx_10_12_universal2.whl", hash = "sha256:15a1fb843c48b4a604663fa30af60818cd28f895572386e5f9b8a665874c26e7"},
+    {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a1327f280c824ff7885bdeef8578f74690e9079267c1c8bd7dc5cc5aa065ae52"},
+    {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:6ffb03d419edcab93b4b19c22ee80c007fb2d708429cecebf1dd3258956a563a"},
+    {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_aarch64.whl", hash = "sha256:1df6fcbf60560d2113b5ed90f072dc0b108d64750d4cbd46a21ec882c7aefce9"},
+    {file = "cryptography-42.0.4-cp39-abi3-manylinux_2_28_x86_64.whl", hash = "sha256:44a64043f743485925d3bcac548d05df0f9bb445c5fcca6681889c7c3ab12764"},
+    {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_aarch64.whl", hash = "sha256:3c6048f217533d89f2f8f4f0fe3044bf0b2090453b7b73d0b77db47b80af8dff"},
+    {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_1_x86_64.whl", hash = "sha256:6d0fbe73728c44ca3a241eff9aefe6496ab2656d6e7a4ea2459865f2e8613257"},
+    {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_aarch64.whl", hash = "sha256:887623fe0d70f48ab3f5e4dbf234986b1329a64c066d719432d0698522749929"},
+    {file = "cryptography-42.0.4-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:ce8613beaffc7c14f091497346ef117c1798c202b01153a8cc7b8e2ebaaf41c0"},
+    {file = "cryptography-42.0.4-cp39-abi3-win32.whl", hash = "sha256:810bcf151caefc03e51a3d61e53335cd5c7316c0a105cc695f0959f2c638b129"},
+    {file = "cryptography-42.0.4-cp39-abi3-win_amd64.whl", hash = "sha256:a0298bdc6e98ca21382afe914c642620370ce0470a01e1bef6dd9b5354c36854"},
+    {file = "cryptography-42.0.4-pp310-pypy310_pp73-macosx_10_12_x86_64.whl", hash = "sha256:5f8907fcf57392cd917892ae83708761c6ff3c37a8e835d7246ff0ad251d9298"},
+    {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:12d341bd42cdb7d4937b0cabbdf2a94f949413ac4504904d0cdbdce4a22cbf88"},
+    {file = "cryptography-42.0.4-pp310-pypy310_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:1cdcdbd117681c88d717437ada72bdd5be9de117f96e3f4d50dab3f59fd9ab20"},
+    {file = "cryptography-42.0.4-pp310-pypy310_pp73-win_amd64.whl", hash = "sha256:0e89f7b84f421c56e7ff69f11c441ebda73b8a8e6488d322ef71746224c20fce"},
+    {file = "cryptography-42.0.4-pp39-pypy39_pp73-macosx_10_12_x86_64.whl", hash = "sha256:f1e85a178384bf19e36779d91ff35c7617c885da487d689b05c1366f9933ad74"},
+    {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_aarch64.whl", hash = "sha256:d2a27aca5597c8a71abbe10209184e1a8e91c1fd470b5070a2ea60cafec35bcd"},
+    {file = "cryptography-42.0.4-pp39-pypy39_pp73-manylinux_2_28_x86_64.whl", hash = "sha256:4e36685cb634af55e0677d435d425043967ac2f3790ec652b2b88ad03b85c27b"},
+    {file = "cryptography-42.0.4-pp39-pypy39_pp73-win_amd64.whl", hash = "sha256:f47be41843200f7faec0683ad751e5ef11b9a56a220d57f300376cd8aba81660"},
+    {file = "cryptography-42.0.4.tar.gz", hash = "sha256:831a4b37accef30cccd34fcb916a5d7b5be3cbbe27268a02832c3e450aea39cb"},
 ]
 
 [package.dependencies]

From 09c3c085df39a57f2981d6bade0e65d5a8a10b4c Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 29 Feb 2024 15:41:25 +0000
Subject: [PATCH 25/47] metadata.csv populated

Started working on issue 1355 as well, it's too tightly coupled. Some
work remaining re that:
- when tag created in UI, make sure upload_name attribute is populated
---
 viewer/download_structures.py                | 140 ++++++++++++++++---
 viewer/migrations/0046_auto_20240228_1651.py |  38 +++++
 viewer/models.py                             |   8 +-
 viewer/serializers.py                        |   4 +
 viewer/target_loader.py                      |   3 +-
 viewer/target_set_upload.py                  |   3 +-
 6 files changed, 172 insertions(+), 24 deletions(-)
 create mode 100644 viewer/migrations/0046_auto_20240228_1651.py

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 1fd31bb0..2ada27fe 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -8,19 +8,27 @@
 import json
 import logging
 import os
+import re
 import shutil
 import uuid
 import zipfile
 from dataclasses import dataclass
 from datetime import datetime, timedelta, timezone
-from io import BytesIO
+from io import BytesIO, StringIO
 from pathlib import Path
 from typing import Any, Dict
 
 import pandoc
 from django.conf import settings
-
-from viewer.models import DownloadLinks, SiteObservation
+from django.db.models import Exists, OuterRef, Subquery
+
+from viewer.models import (
+    DownloadLinks,
+    SiteObservation,
+    SiteObservationTag,
+    SiteObvsSiteObservationTag,
+    TagCategory,
+)
 from viewer.utils import clean_filename
 
 logger = logging.getLogger(__name__)
@@ -50,6 +58,43 @@
     'readme': (''),
 }
 
+TAG_CATEGORIES = (
+    'ConformerSites',
+    'CanonSites',
+    'CrystalformSites',
+    'Quatassemblies',
+    'Crystalforms',
+)
+CURATED_TAG_CATEGORIES = ('Series', 'Forum', 'Other')
+
+
+class TagSubquery(Subquery):
+    """Annotate SiteObservation with tag of given category"""
+
+    def __init__(self, category):
+        query = SiteObservationTag.objects.filter(
+            pk=Subquery(
+                SiteObvsSiteObservationTag.objects.filter(
+                    site_observation=OuterRef('pk'),
+                    site_obvs_tag__category=TagCategory.objects.get(
+                        category=category,
+                    ),
+                ).values('site_obvs_tag')[:1]
+            )
+        ).values('tag')[0:1]
+        super().__init__(query)
+
+
+class CuratedTagSubquery(Exists):
+    """Annotate SiteObservation with tag of given category"""
+
+    def __init__(self, tag):
+        query = SiteObvsSiteObservationTag.objects.filter(
+            site_observation=OuterRef('pk'),
+            site_obvs_tag=tag,
+        )
+        super().__init__(query)
+
 
 @dataclass(frozen=True)
 class ArchiveFile:
@@ -84,6 +129,9 @@ class ArchiveFile:
 
 _ERROR_FILE = 'errors.csv'
 
+# unlike v1, metadata doesn't exist anymore, needs compiling
+_METADATA_FILE = 'metadata.csv'
+
 
 def _add_file_to_zip(ziparchive, param, filepath):
     """Add the requested file to the zip archive.
@@ -350,6 +398,58 @@ def _trans_matrix_files_zip(ziparchive, target):
             _add_empty_file(ziparchive, archive_path)
 
 
+def _metadate_file_zip(ziparchive, target):
+    """Compile and add metadata file to archive."""
+    logger.info('+ Processing metadata')
+
+    annotations = {}
+    values = ['code', 'longcode', 'cmpd__compound_code', 'smiles']
+    header = ['Code', 'Long code', 'Compound code', 'Smiles']
+
+    for category in TagCategory.objects.filter(category__in=TAG_CATEGORIES):
+        tag = f'tag_{category.category.lower()}'
+        values.append(tag)
+        header.append(category.category)
+        annotations[tag] = TagSubquery(category.category)
+
+    pattern = re.compile(r'\W+')
+    for tag in SiteObservationTag.objects.filter(
+        category__in=TagCategory.objects.filter(category__in=CURATED_TAG_CATEGORIES),
+        target=target,
+    ):
+        # for reasons unknown, mypy thinks tag is a string
+        tagname = f'tag_{pattern.sub("_", tag.tag).strip().lower()}'  # type: ignore[attr-defined]
+        values.append(tagname)
+        header.append(f'[{tag.category}] {tag.tag}')  # type: ignore[attr-defined]
+        annotations[tagname] = CuratedTagSubquery(tag)
+
+    # fmt: off
+    qs = SiteObservation.filter_manager.by_target(
+        target=target,
+    ).prefetch_related(
+        'cmpd',
+        'siteobservationtags',
+    ).annotate(**annotations).values_list(*values)
+    # fmt: on
+
+    buff = StringIO()
+    buff.write(','.join(header))
+    buff.write('\n')
+    for so_values in qs:
+        buff.write(
+            ','.join(
+                [
+                    str(k) if k else 'False' if isinstance(k, bool) else ''
+                    for k in so_values
+                ]
+            )
+        )
+        buff.write('\n')
+
+    ziparchive.writestr(_METADATA_FILE, buff.getvalue())
+    logger.info('+ Processing metadata')
+
+
 def _extra_files_zip(ziparchive, target):
     """If an extra info folder exists at the target root level, then
     copy the contents to the output file as is.
@@ -514,7 +614,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
 
     error_filename = os.path.join(download_path, _ERROR_FILE)
     error_file = open(error_filename, "w", encoding="utf-8")
-    error_file.write("Param,Code,Invalid file reference\n")
+    error_file.write("Param,Code,File not found when assembling download\n")
     errors = 0
 
     # If a single sdf file is also wanted then create file to
@@ -560,14 +660,17 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
             _smiles_files_zip(zip_contents, ziparchive, download_path)
 
         # Add the metadata file from the target
-        if zip_contents['metadata_info'] and not _add_file_to_zip(
-            ziparchive, 'metadata_info', zip_contents['metadata_info']
-        ):
-            error_file.write(
-                f"metadata_info,{target},{zip_contents['metadata_info']}\n"
-            )
-            errors += 1
-            logger.warning('After _add_file_to_zip() errors=%s', errors)
+        # if zip_contents['metadata_info'] and not _add_file_to_zip(
+        #     ziparchive, 'metadata_info', zip_contents['metadata_info']
+        # ):
+        #     error_file.write(
+        #         f"metadata_info,{target},{zip_contents['metadata_info']}\n"
+        #     )
+        #     errors += 1
+        #     logger.warning('After _add_file_to_zip() errors=%s', errors)
+
+        if zip_contents['metadata_info']:
+            _metadate_file_zip(ziparchive, target)
 
         if zip_contents['trans_matrix_info']:
             _trans_matrix_files_zip(ziparchive, target)
@@ -599,7 +702,7 @@ def _protein_garbage_filter(proteins):
     return proteins.exclude(code__startswith=r'references_')
 
 
-def _create_structures_dict(target, site_obvs, protein_params, other_params):
+def _create_structures_dict(site_obvs, protein_params, other_params):
     """Write a ZIP file containing data from an input dictionary
 
     Args:
@@ -746,13 +849,10 @@ def _create_structures_dict(target, site_obvs, protein_params, other_params):
         for molecule in site_obvs:
             zip_contents['molecules']['smiles_info'].update({molecule.smiles: None})
 
-    # Add the metadata file from the target
-    if other_params['metadata_info'] is True:
-        zip_contents['metadata_info'] = target.metadata.name
+    zip_contents['metadata_info'] = other_params['metadata_info']
 
-    # Add the metadata file from the target
-    if other_params['trans_matrix_info'] is True:
-        zip_contents['trans_matrix_info'] = True
+    # Add the trans matrix files
+    zip_contents['trans_matrix_info'] = other_params['trans_matrix_info']
 
     return zip_contents
 
@@ -900,7 +1000,7 @@ def create_or_return_download_link(request, target, site_observations):
     logger.info('Creating new download (file_url=%s)...', file_url)
 
     zip_contents = _create_structures_dict(
-        target, site_observations, protein_params, other_params
+        site_observations, protein_params, other_params
     )
     _create_structures_zip(target, zip_contents, file_url, original_search, host)
 
diff --git a/viewer/migrations/0046_auto_20240228_1651.py b/viewer/migrations/0046_auto_20240228_1651.py
new file mode 100644
index 00000000..213834cf
--- /dev/null
+++ b/viewer/migrations/0046_auto_20240228_1651.py
@@ -0,0 +1,38 @@
+# Generated by Django 3.2.23 on 2024-02-28 16:51
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('viewer', '0045_auto_20240221_1203'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='sessionprojecttag',
+            name='upload_name',
+            field=models.CharField(
+                default='', help_text='The generated name of the tag', max_length=200
+            ),
+            preserve_default=False,
+        ),
+        migrations.AddField(
+            model_name='siteobservationtag',
+            name='upload_name',
+            field=models.CharField(
+                default='default',
+                help_text='The generated name of the tag',
+                max_length=200,
+            ),
+            preserve_default=False,
+        ),
+        migrations.AlterUniqueTogether(
+            name='sessionprojecttag',
+            unique_together={('upload_name', 'target')},
+        ),
+        migrations.AlterUniqueTogether(
+            name='siteobservationtag',
+            unique_together={('upload_name', 'target')},
+        ),
+    ]
diff --git a/viewer/models.py b/viewer/models.py
index e10c58c6..fc430d5f 100644
--- a/viewer/models.py
+++ b/viewer/models.py
@@ -1177,6 +1177,9 @@ class Meta:
 
 class Tag(models.Model):
     tag = models.CharField(max_length=200, help_text="The (unique) name of the tag")
+    upload_name = models.CharField(
+        max_length=200, help_text="The generated name of the tag"
+    )
     category = models.ForeignKey(TagCategory, on_delete=models.CASCADE)
     target = models.ForeignKey(Target, on_delete=models.CASCADE)
     user = models.ForeignKey(User, null=True, on_delete=models.CASCADE)
@@ -1196,9 +1199,10 @@ def __str__(self) -> str:
         return f"{self.tag}"
 
     def __repr__(self) -> str:
-        return "<Tag %r %r %r %r %r>" % (
+        return "<Tag %r %r %r %r %r %r>" % (
             self.id,
             self.tag,
+            self.upload_name,
             self.category,
             self.target,
             self.user,
@@ -1207,7 +1211,7 @@ def __repr__(self) -> str:
     class Meta:
         abstract = True
         unique_together = (
-            'tag',
+            'upload_name',
             'target',
         )
 
diff --git a/viewer/serializers.py b/viewer/serializers.py
index 8694419f..327631c1 100644
--- a/viewer/serializers.py
+++ b/viewer/serializers.py
@@ -713,6 +713,10 @@ class SiteObservationTagSerializer(serializers.ModelSerializer):
     class Meta:
         model = models.SiteObservationTag
         fields = '__all__'
+        extra_kwargs = {
+            "id": {"read_only": True},
+            "upload_name": {"read_only": True},
+        }
 
 
 class SessionProjectTagSerializer(serializers.ModelSerializer):
diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index b91e64e9..11df2b51 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1859,7 +1859,7 @@ def _tag_observations(self, tag, category, so_list):
             so_group.save()
 
         try:
-            so_tag = SiteObservationTag.objects.get(tag=tag, target=self.target)
+            so_tag = SiteObservationTag.objects.get(upload_name=tag, target=self.target)
             # Tag already exists
             # Apart from the new mol_group and molecules, we shouldn't be
             # changing anything.
@@ -1867,6 +1867,7 @@ def _tag_observations(self, tag, category, so_list):
         except SiteObservationTag.DoesNotExist:
             so_tag = SiteObservationTag()
             so_tag.tag = tag
+            so_tag.upload_name = tag
             so_tag.category = TagCategory.objects.get(category=category)
             so_tag.target = self.target
             so_tag.mol_group = so_group
diff --git a/viewer/target_set_upload.py b/viewer/target_set_upload.py
index c46a7531..bc4d9cc1 100644
--- a/viewer/target_set_upload.py
+++ b/viewer/target_set_upload.py
@@ -637,7 +637,7 @@ def specifc_site(rd_mols, site_observations, target, site_description=None):
 
     try:
         site_obvs_tag = SiteObservationTag.objects.get(
-            tag=site_description, target_id=target.id
+            upload_name=site_description, target_id=target.id
         )
     except SiteObservationTag.DoesNotExist:
         site_obvs_tag = None
@@ -646,6 +646,7 @@ def specifc_site(rd_mols, site_observations, target, site_description=None):
         # New site/tag or the tag has been deleted
         site_obvs_tag = SiteObservationTag()
         site_obvs_tag.tag = site_description
+        site_obvs_tag.upload_name = site_description
         site_obvs_tag.category = TagCategory.objects.get(category='Sites')
         site_obvs_tag.target = target
         site_obvs_tag.mol_group = site_obvs_group

From 0aa1f7da2f7d2998c09e4569f57a2ae223971bb0 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 1 Mar 2024 09:24:51 +0000
Subject: [PATCH 26/47] upload_name automatically pouplated when creating tags
 in UI

Only populated on creation, updates won't touch it
---
 viewer/serializers.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/viewer/serializers.py b/viewer/serializers.py
index 327631c1..d33b1b4d 100644
--- a/viewer/serializers.py
+++ b/viewer/serializers.py
@@ -710,6 +710,11 @@ class SiteObservationTagSerializer(serializers.ModelSerializer):
         many=True, queryset=models.SiteObservation.objects.all()
     )
 
+    def create(self, validated_data):
+        # populate 'upload_name' field at object creation
+        validated_data['upload_name'] = validated_data['tag']
+        return super().create(validated_data)
+
     class Meta:
         model = models.SiteObservationTag
         fields = '__all__'

From bc7fcd196e554eddace688994249768b62027be8 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 1 Mar 2024 13:03:26 +0000
Subject: [PATCH 27/47] changes to api/download_structures

- apo_file, bound_file, sdf_info and smiles_info merged into
all_aligned_structures
- added pdb_info field

NB! download_structures was requred to provide ligand_pdb as
well. This wasn't tracked previously, so I added field to
SiteObservation model. Meaning there's a migration and on stack
deployment data needs to be wiped and reuploaded
---
 viewer/download_structures.py                | 124 +++++++++----------
 viewer/migrations/0047_auto_20240301_1243.py |  24 ++++
 viewer/models.py                             |   3 +
 viewer/serializers.py                        |  12 +-
 viewer/target_loader.py                      |   3 +
 viewer/views.py                              |  12 +-
 6 files changed, 97 insertions(+), 81 deletions(-)
 create mode 100644 viewer/migrations/0047_auto_20240301_1243.py

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 2ada27fe..9d351b68 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -31,6 +31,8 @@
 )
 from viewer.utils import clean_filename
 
+from .serializers import DownloadStructuresSerializer
+
 logger = logging.getLogger(__name__)
 
 # Length of time to keep records of dynamic links.
@@ -41,18 +43,23 @@
 # the protein code subdirectory of the aligned directory
 # (as for the target upload).
 _ZIP_FILEPATHS = {
-    'apo_file': ('aligned'),
-    'bound_file': ('aligned'),
-    'cif_info': ('aligned'),
-    'mtz_info': ('aligned'),
-    'map_info': ('aligned'),
-    'sigmaa_file': ('aligned'),
-    'diff_file': ('aligned'),
-    'event_file': ('aligned'),
-    'sdf_info': ('aligned'),
+    'apo_file': ('aligned'),  # SiteObservation: apo_file
+    'apo_solv_file': ('aligned'),  # SiteObservation: apo_solv_file
+    'apo_desolv_file': ('aligned'),  # SiteObservation: apo_desolv_file
+    'bound_file': ('aligned'),  # SiteObservation: bound_file
+    'sdf_info': ('aligned'),  # SiteObservation: ligand_mol_file (indirectly)
+    'ligand_pdb': ('aligned'),  # SiteObservation: ligand_pdb
+    'smiles_info': (''),  # SiteObservation: smiles_info (indirectly)
+    # those above are all controlled by serializer's all_aligned_structures flag
+    'sigmaa_file': ('aligned'),  # SiteObservation: sigmaa_file
+    'diff_file': ('aligned'),  # SiteObservation: diff_file
+    'event_file': ('aligned'),  # SiteObservation: ligand_pdb
+    'pdb_info': ('aligned'),  # Experiment: cif_info
+    'cif_info': ('aligned'),  # Experiment: cif_info
+    'mtz_info': ('aligned'),  # Experiment: mtz_info
+    'map_info': ('aligned'),  # Experiment: map_info (multiple files)
     'single_sdf_file': (''),
     'metadata_info': (''),
-    'smiles_info': (''),
     'trans_matrix_info': (''),
     'extra_files': ('extra_files'),
     'readme': (''),
@@ -107,14 +114,18 @@ class ArchiveFile:
 # NB you may need to add a version number to this at some point...
 zip_template = {
     'proteins': {
-        'apo_file': {},  # from experiment
-        'bound_file': {},  # x
-        'cif_info': {},  # from experiment
-        'mtz_info': {},  # from experiment
-        'map_info': {},  # from experiment
-        'event_file': {},  # x
+        'apo_file': {},
+        'apo_solv_file': {},
+        'apo_desolv_file': {},
+        'bound_file': {},
+        'pdb_info': {},
+        'cif_info': {},
+        'mtz_info': {},
+        'map_info': {},
+        'event_file': {},
         'diff_file': {},
         'sigmaa_file': {},
+        'ligand_pdb': {},
     },
     'molecules': {
         'sdf_files': {},
@@ -755,13 +766,14 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
 
                 elif param in [
                     'bound_file',
+                    'apo_file',
                     'apo_solv_file',
                     'apo_desolv_file',
-                    'apo_file',
                     'sigmaa_file',
                     'event_file',
                     'artefacts_file',
                     'pdb_header_file',
+                    'ligand_pdb',
                     'diff_file',
                 ]:
                     # siteobservation object
@@ -866,55 +878,35 @@ def get_download_params(request):
     Returns:
         protein_params, other_params
     """
-    protein_param_flags = [
-        'apo_file',
-        'bound_file',
-        'cif_info',
-        'mtz_info',
-        'map_info',
-        'event_file',
-        'sigmaa_file',
-        'diff_file',
-    ]
-
-    other_param_flags = [
-        'sdf_info',
-        'single_sdf_file',
-        'metadata_info',
-        'smiles_info',
-        'trans_matrix_info',
-    ]
-
-    # protein_params = {'pdb_info': request.data['pdb_info'],
-    #               'bound_info': request.data['bound_info'],
-    #               'cif_info': request.data['cif_info'],
-    #               'mtz_info': request.data['mtz_info'],
-    #               'diff_info': request.data['diff_info'],
-    #               'event_info': request.data['event_info'],
-    #               'sigmaa_info': request.data['sigmaa_info'],
-    #               'trans_matrix_info':
-    #                   request.data['trans_matrix_info']}
-    protein_params = {}
-    for param in protein_param_flags:
-        protein_params[param] = False
-        if param in request.data and request.data[param] in [True, 'true']:
-            protein_params[param] = True
-
-    # other_params = {'sdf_info': request.data['sdf_info'],
-    #                 'single_sdf_file': request.data['single_sdf_file'],
-    #                 'metadata_info': request.data['metadata_info'],
-    #                 'smiles_info': request.data['smiles_info']}
-    other_params = {}
-    for param in other_param_flags:
-        other_params[param] = False
-        if param in request.data and request.data[param] in [True, 'true']:
-            other_params[param] = True
-
-    static_link = False
-    if 'static_link' in request.data and (
-        request.data['static_link'] is True or request.data['static_link'] == 'true'
-    ):
-        static_link = True
+
+    serializer = DownloadStructuresSerializer(data=request.data)
+    serializer.is_valid()
+    logger.debug('serializer data: %s', serializer.validated_data)
+
+    protein_params = {
+        'pdb_info': serializer.validated_data['pdb_info'],
+        'apo_file': serializer.validated_data['all_aligned_structures'],
+        'bound_file': serializer.validated_data['all_aligned_structures'],
+        'apo_solv_file': serializer.validated_data['all_aligned_structures'],
+        'apo_desolv_file': serializer.validated_data['all_aligned_structures'],
+        'ligand_pdb': serializer.validated_data['all_aligned_structures'],
+        'cif_info': serializer.validated_data['cif_info'],
+        'mtz_info': serializer.validated_data['mtz_info'],
+        'map_info': serializer.validated_data['map_info'],
+        'event_file': serializer.validated_data['event_file'],
+        'sigmaa_file': serializer.validated_data['sigmaa_file'],
+        'diff_file': serializer.validated_data['diff_file'],
+    }
+
+    other_params = {
+        'sdf_info': serializer.validated_data['all_aligned_structures'],
+        'single_sdf_file': serializer.validated_data['single_sdf_file'],
+        'metadata_info': serializer.validated_data['metadata_info'],
+        'smiles_info': serializer.validated_data['all_aligned_structures'],
+        'trans_matrix_info': serializer.validated_data['trans_matrix_info'],
+    }
+
+    static_link = serializer.validated_data['static_link']
 
     return protein_params, other_params, static_link
 
diff --git a/viewer/migrations/0047_auto_20240301_1243.py b/viewer/migrations/0047_auto_20240301_1243.py
new file mode 100644
index 00000000..e153c066
--- /dev/null
+++ b/viewer/migrations/0047_auto_20240301_1243.py
@@ -0,0 +1,24 @@
+# Generated by Django 3.2.23 on 2024-03-01 12:43
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('viewer', '0046_auto_20240228_1651'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='historicalsiteobservation',
+            name='ligand_pdb',
+            field=models.TextField(max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name='siteobservation',
+            name='ligand_pdb',
+            field=models.FileField(
+                max_length=255, null=True, upload_to='target_loader_data/'
+            ),
+        ),
+    ]
diff --git a/viewer/models.py b/viewer/models.py
index fc430d5f..2fe21e90 100644
--- a/viewer/models.py
+++ b/viewer/models.py
@@ -465,6 +465,9 @@ class SiteObservation(models.Model):
     seq_id = models.IntegerField()
     chain_id = models.CharField(max_length=1)
     ligand_mol_file = models.TextField(null=True)
+    ligand_pdb = models.FileField(
+        upload_to="target_loader_data/", null=True, max_length=255
+    )
 
     objects = models.Manager()
     history = HistoricalRecords()
diff --git a/viewer/serializers.py b/viewer/serializers.py
index d33b1b4d..b601ae04 100644
--- a/viewer/serializers.py
+++ b/viewer/serializers.py
@@ -832,22 +832,20 @@ class Meta:
 
 
 class DownloadStructuresSerializer(serializers.Serializer):
-    target_name = serializers.CharField(max_length=200)
-    proteins = serializers.CharField(max_length=5000)
-    apo_file = serializers.BooleanField(default=False)
-    bound_file = serializers.BooleanField(default=False)
+    target_name = serializers.CharField(max_length=200, default=None)
+    proteins = serializers.CharField(max_length=5000, default=None)
+    all_aligned_structures = serializers.BooleanField(default=False)
+    pdb_info = serializers.BooleanField(default=False)
     cif_info = serializers.BooleanField(default=False)
     mtz_info = serializers.BooleanField(default=False)
     diff_file = serializers.BooleanField(default=False)
     event_file = serializers.BooleanField(default=False)
     sigmaa_file = serializers.BooleanField(default=False)
     map_info = serializers.BooleanField(default=False)
-    sdf_info = serializers.BooleanField(default=False)
     single_sdf_file = serializers.BooleanField(default=False)
     metadata_info = serializers.BooleanField(default=False)
-    smiles_info = serializers.BooleanField(default=False)
     static_link = serializers.BooleanField(default=False)
-    file_url = serializers.CharField(max_length=200)
+    file_url = serializers.CharField(max_length=200, default=None)
     trans_matrix_info = serializers.BooleanField(default=False)
 
 
diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index 11df2b51..269f1b45 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1324,6 +1324,7 @@ def process_site_observation(
             sigmaa_file,
             diff_file,
             event_file,
+            ligand_pdb,
         ) = self.validate_files(
             obj_identifier=experiment_id,
             file_struct=data,
@@ -1339,6 +1340,7 @@ def process_site_observation(
                 "sigmaa_map",  # NB! keys in meta_aligner not yet updated
                 "diff_map",  # NB! keys in meta_aligner not yet updated
                 "event_map",
+                "ligand_pdb",
             ),
             validate_files=validate_files,
         )
@@ -1375,6 +1377,7 @@ def process_site_observation(
             "diff_file": str(self._get_final_path(diff_file)),
             "event_file": str(self._get_final_path(event_file)),
             "artefacts_file": str(self._get_final_path(artefacts_file)),
+            "ligand_pdb": str(self._get_final_path(ligand_pdb)),
             "pdb_header_file": "currently missing",
             "ligand_mol_file": mol_data,
         }
diff --git a/viewer/views.py b/viewer/views.py
index 8da3c3d0..56f4c43e 100644
--- a/viewer/views.py
+++ b/viewer/views.py
@@ -1462,14 +1462,10 @@ def create(self, request):
         target = None
         logger.info('Given target_name "%s"', target_name)
 
-        # Check target_name is valid
-        # (it should natch the title of an existing target)
-        for targ in self.queryset:
-            if targ.title == target_name:
-                target = targ
-                break
-
-        if not target:
+        # Check target_name is valid:
+        try:
+            target = self.queryset.get(title=target_name)
+        except models.Target.DoesNotExist:
             msg = f'Either the Target "{target_name}" is not present or you are not permitted access it'
             logger.warning(msg)
             content = {'message': msg}

From 5f2320a42c3c3ed1ef8ef0fd4618f5b88ff30c83 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 1 Mar 2024 14:43:20 +0000
Subject: [PATCH 28/47] don't download neighbourhoods.yaml unless
 trans_matrix_info is checked

---
 viewer/download_structures.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 9d351b68..13f25466 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -510,7 +510,7 @@ def _extra_files_zip(ziparchive, target):
         logger.info('Processed %s extra files', num_processed)
 
 
-def _yaml_files_zip(ziparchive, target):
+def _yaml_files_zip(ziparchive, target, transforms_requested: bool = False) -> None:
     """Add all yaml files (except transforms) from upload to ziparchive"""
 
     for experiment_upload in target.experimentupload_set.order_by('commit_datetime'):
@@ -547,6 +547,9 @@ def _yaml_files_zip(ziparchive, target):
 
         for file in yaml_files:
             logger.info('Adding yaml file "%s"...', file)
+            if not transforms_requested and file.name == 'neighbourhoods.yaml':
+                # don't add this file if transforms are not requested
+                continue
             ziparchive.write(file, str(Path(archive_path).joinpath(file.name)))
 
 
@@ -688,7 +691,9 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
 
         _extra_files_zip(ziparchive, target)
 
-        _yaml_files_zip(ziparchive, target)
+        _yaml_files_zip(
+            ziparchive, target, transforms_requested=zip_contents['trans_matrix_info']
+        )
 
         _document_file_zip(ziparchive, download_path, original_search, host)
 

From 5f268d752cf3633539ccdb6006eba7b127032164 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 1 Mar 2024 16:36:48 +0000
Subject: [PATCH 29/47] fixed error handling (errors.csv) and not returning
 combined sdf

---
 viewer/download_structures.py | 98 +++++++++++++----------------------
 1 file changed, 37 insertions(+), 61 deletions(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 13f25466..8286ee83 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -144,36 +144,6 @@ class ArchiveFile:
 _METADATA_FILE = 'metadata.csv'
 
 
-def _add_file_to_zip(ziparchive, param, filepath):
-    """Add the requested file to the zip archive.
-
-    Args:
-        ziparchive: Handle of zip archive
-        param: parameter of filelist
-        filepath: filepath from record
-
-    Returns:
-        [boolean]: [True of record added]
-    """
-    logger.debug('+_add_file_to_zip: %s, %s', param, filepath)
-    if not filepath:
-        # Odd - assume success
-        logger.error('No filepath value')
-        return True
-
-    fullpath = os.path.join(settings.MEDIA_ROOT, filepath)
-    cleaned_filename = clean_filename(filepath)
-    archive_path = os.path.join(_ZIP_FILEPATHS[param], cleaned_filename)
-    if os.path.isfile(fullpath):
-        ziparchive.write(fullpath, archive_path)
-        return True
-    else:
-        logger.warning('filepath "%s" is not a file', filepath)
-        _add_empty_file(ziparchive, archive_path)
-
-    return False
-
-
 def _is_mol_or_sdf(path):
     """Returns True if the file and path look like a MOL or SDF file.
     It does this by simply checking the file's extension.
@@ -231,6 +201,27 @@ def _read_and_patch_molecule_name(path, molecule_name=None):
     return content
 
 
+def _patch_molecule_name(site_observation):
+    """Patch the MOL or SDF file with molecule name.
+
+    Processes the content of ligand_mol attribute of the
+    site_observation object. Returns the content as string.
+
+    Alternative to _read_and_patch_molecule_name function above
+    which operates on files. As ligand_mol is now stored as text,
+    slightly different approach was necessary.
+
+    """
+    logger.debug('Patching MOL/SDF of "%s"', site_observation)
+
+    # Now read the file, checking the first line
+    # and setting it to the molecule name if it's blank.
+    lines = site_observation.ligand_mol_file.split('\n')
+    if not lines[0].strip():
+        lines[0] = site_observation.long_code
+    return '\n'.join(lines)
+
+
 def _add_file_to_zip_aligned(ziparchive, code, archive_file):
     """Add the requested file to the zip archive.
 
@@ -264,10 +255,11 @@ def _add_file_to_zip_aligned(ziparchive, code, archive_file):
             ziparchive.write(filepath, archive_file.archive_path)
         return True
     elif archive_file.site_observation:
-        # NB! this bypasses _read_and_patch_molecule_name. problem?
         ziparchive.writestr(
-            archive_file.archive_path, archive_file.site_observation.ligand_mol_file
+            archive_file.archive_path,
+            _patch_molecule_name(archive_file.site_observation),
         )
+        return True
     else:
         logger.warning('filepath "%s" is not a file', filepath)
         _add_empty_file(ziparchive, archive_file.archive_path)
@@ -285,17 +277,14 @@ def _add_file_to_sdf(combined_sdf_file, archive_file):
     Returns:
         [boolean]: [True of record added]
     """
-    media_root = settings.MEDIA_ROOT
-
     if not archive_file.path:
         # Odd - assume success
         logger.error('No filepath value')
         return True
 
-    fullpath = os.path.join(media_root, archive_file.path)
-    if os.path.isfile(fullpath):
+    if archive_file.path and archive_file.path != 'None':
         with open(combined_sdf_file, 'a', encoding='utf-8') as f_out:
-            patched_sdf_content = _read_and_patch_molecule_name(fullpath)
+            patched_sdf_content = _patch_molecule_name(archive_file.site_observation)
             f_out.write(patched_sdf_content)
         return True
     else:
@@ -315,8 +304,9 @@ def _protein_files_zip(zip_contents, ziparchive, error_file):
 
         for prot, prot_file in files.items():
             for f in prot_file:
+                # memo to self: f is ArchiveFile object
                 if not _add_file_to_zip_aligned(ziparchive, prot, f):
-                    error_file.write(f'{param},{prot},{f}\n')
+                    error_file.write(f'{param},{prot},{f.archive_path}\n')
                     prot_errors += 1
 
     return prot_errors
@@ -673,16 +663,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
         if zip_contents['molecules']['smiles_info']:
             _smiles_files_zip(zip_contents, ziparchive, download_path)
 
-        # Add the metadata file from the target
-        # if zip_contents['metadata_info'] and not _add_file_to_zip(
-        #     ziparchive, 'metadata_info', zip_contents['metadata_info']
-        # ):
-        #     error_file.write(
-        #         f"metadata_info,{target},{zip_contents['metadata_info']}\n"
-        #     )
-        #     errors += 1
-        #     logger.warning('After _add_file_to_zip() errors=%s', errors)
-
+        # compile and add metadata.csv
         if zip_contents['metadata_info']:
             _metadate_file_zip(ziparchive, target)
 
@@ -755,18 +736,17 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
                     afile = []
                     for f in model_attr:
                         # here the model_attr is already stringified
+                        apath = Path('crystallographic_files').joinpath(so.code)
                         if model_attr and model_attr != 'None':
                             archive_path = str(
-                                Path('crystallographic_files')
-                                .joinpath(so.code)
-                                .joinpath(
+                                apath.joinpath(
                                     Path(f)
                                     .parts[-1]
                                     .replace(so.experiment.code, so.code)
                                 )
                             )
                         else:
-                            archive_path = param
+                            archive_path = str(apath.joinpath(param))
                         afile.append(ArchiveFile(path=f, archive_path=archive_path))
 
                 elif param in [
@@ -787,18 +767,17 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
                     logger.debug(
                         'Adding param to zip: %s, value: %s', param, model_attr
                     )
+                    apath = Path('aligned_files').joinpath(so.code)
                     if model_attr and model_attr != 'None':
                         archive_path = str(
-                            Path('aligned_files')
-                            .joinpath(so.code)
-                            .joinpath(
+                            apath.joinpath(
                                 Path(model_attr.name)
                                 .parts[-1]
                                 .replace(so.longcode, so.code)
                             )
                         )
                     else:
-                        archive_path = param
+                        archive_path = str(apath.joinpath(param))
 
                     afile = [
                         ArchiveFile(
@@ -812,11 +791,8 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
 
                 zip_contents['proteins'][param][so.code] = afile
 
-    if other_params['single_sdf_file'] is True:
-        zip_contents['molecules']['single_sdf_file'] = True
-
-    if other_params['sdf_info'] is True:
-        zip_contents['molecules']['sdf_info'] = True
+    zip_contents['molecules']['single_sdf_file'] = other_params['single_sdf_file']
+    zip_contents['molecules']['sdf_info'] = other_params['sdf_info']
 
     # sdf information is held as a file on the Molecule record.
     if other_params['sdf_info'] or other_params['single_sdf_file']:

From 22f9641d4bca0afdac15d900797686ff6a885375 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Mon, 4 Mar 2024 12:16:03 +0000
Subject: [PATCH 30/47] fix: Added parsing directives to
 DownloadStructuresserializer

---
 viewer/download_structures.py | 9 ++++++---
 viewer/serializers.py         | 6 +++---
 viewer/views.py               | 1 +
 3 files changed, 10 insertions(+), 6 deletions(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 8286ee83..5036e018 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -242,7 +242,8 @@ def _add_file_to_zip_aligned(ziparchive, code, archive_file):
         logger.error('No filepath value')
         return True
 
-    filepath = str(Path(settings.MEDIA_ROOT).joinpath(archive_file.path))
+    # calling str on archive_file.path because could be None
+    filepath = str(Path(settings.MEDIA_ROOT).joinpath(str(archive_file.path)))
     if Path(filepath).is_file():
         if _is_mol_or_sdf(filepath):
             # It's a MOL or SD file.
@@ -861,8 +862,10 @@ def get_download_params(request):
     """
 
     serializer = DownloadStructuresSerializer(data=request.data)
-    serializer.is_valid()
-    logger.debug('serializer data: %s', serializer.validated_data)
+    valid = serializer.is_valid()
+    logger.debug('serializer validated data: %s, %s', valid, serializer.validated_data)
+    if not valid:
+        logger.error('serializer errors: %s', serializer.errors)
 
     protein_params = {
         'pdb_info': serializer.validated_data['pdb_info'],
diff --git a/viewer/serializers.py b/viewer/serializers.py
index b601ae04..400c55dd 100644
--- a/viewer/serializers.py
+++ b/viewer/serializers.py
@@ -832,8 +832,8 @@ class Meta:
 
 
 class DownloadStructuresSerializer(serializers.Serializer):
-    target_name = serializers.CharField(max_length=200, default=None)
-    proteins = serializers.CharField(max_length=5000, default=None)
+    target_name = serializers.CharField(max_length=200, default=None, allow_blank=True)
+    proteins = serializers.CharField(max_length=5000, default='', allow_blank=True)
     all_aligned_structures = serializers.BooleanField(default=False)
     pdb_info = serializers.BooleanField(default=False)
     cif_info = serializers.BooleanField(default=False)
@@ -845,7 +845,7 @@ class DownloadStructuresSerializer(serializers.Serializer):
     single_sdf_file = serializers.BooleanField(default=False)
     metadata_info = serializers.BooleanField(default=False)
     static_link = serializers.BooleanField(default=False)
-    file_url = serializers.CharField(max_length=200, default=None)
+    file_url = serializers.CharField(max_length=200, default='', allow_blank=True)
     trans_matrix_info = serializers.BooleanField(default=False)
 
 
diff --git a/viewer/views.py b/viewer/views.py
index 56f4c43e..8c33b318 100644
--- a/viewer/views.py
+++ b/viewer/views.py
@@ -1425,6 +1425,7 @@ def create(self, request):
         this method.
         """
         logger.info('+ DownloadStructures.post')
+        logger.debug('DownloadStructures.post.data: %s', request.data)
 
         erase_out_of_date_download_records()
 

From bc17249fa14886890449b4aba498d4316d1f27d4 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Mon, 4 Mar 2024 15:09:42 +0000
Subject: [PATCH 31/47] Consecutive numbering of observations under canon site

---
 viewer/target_loader.py | 105 ++++++++++++++++++++--------------------
 1 file changed, 53 insertions(+), 52 deletions(-)

diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index 269f1b45..6b957663 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -5,7 +5,6 @@
 import logging
 import math
 import os
-import re
 import string
 import tarfile
 import uuid
@@ -23,7 +22,7 @@
 from django.contrib.postgres.aggregates import ArrayAgg
 from django.core.exceptions import MultipleObjectsReturned
 from django.db import IntegrityError, transaction
-from django.db.models import Model
+from django.db.models import Count, Model
 from django.db.models.base import ModelBase
 from django.utils import timezone
 
@@ -1655,7 +1654,6 @@ def process_bundle(self):
             canon_site_confs=canon_site_conf_objects,
         )
 
-        # values = ["canon_site_conf__canon_site", "cmpd"]
         values = ["experiment"]
         qs = (
             SiteObservation.objects.values(*values)
@@ -1663,58 +1661,61 @@ def process_bundle(self):
             .annotate(obvs=ArrayAgg("id"))
             .values_list("obvs", flat=True)
         )
-        for elem in qs:
-            # objects in this group should be named with same scheme
-            so_group = SiteObservation.objects.filter(pk__in=elem)
 
-            # first process existing codes and find maximum value
-            codelist = so_group.filter(code__isnull=False).values_list(
-                "code", flat=True
-            )
-            stripped = []
-            for k in codelist:
-                try:
-                    stripped.append(re.search(r"x\d*\D*", k).group(0))
-                except AttributeError:
-                    # code exists but seems to be non-standard. don't
-                    # know if this has implications to upload
-                    # processing
-                    logger.error("Non-standard SiteObservation code: %s", k)
-
-            # get the latest iterator position
-            iter_pos = ""
-            if stripped:
-                last = sorted(stripped)[-1]
-                try:
-                    iter_pos = re.search(r"[^\d]+(?=\d*$)", last).group(0)
-                except AttributeError:
-                    # technically it should be validated in previous try-catch block
-                    logger.error("Non-standard SiteObservation code 2: %s", last)
-
-            # ... and create new one starting from next item
-            suffix = alphanumerator(start_from=iter_pos)
-            for so in so_group.filter(code__isnull=True):
-                code_prefix = experiment_objects[so.experiment.code].index_data[
-                    "code_prefix"
-                ]
-                code = f"{code_prefix}{so.experiment.code.split('-')[1]}{next(suffix)}"
-
-                # test uniqueness for target
-                # TODO: this should ideally be solved by db engine, before
-                # rushing to write the trigger, have think about the
-                # loader concurrency situations
-                if SiteObservation.objects.filter(
-                    experiment__experiment_upload__target=self.target,
-                    code=code,
-                ).exists():
-                    msg = (
-                        f"short code {code} already exists for this target;  "
-                        + "specify a code_prefix to resolve this conflict"
+        for elem in qs:
+            # fmt: off
+            subgroups = SiteObservation.objects.filter(
+                pk__in=elem,
+            ).order_by(
+                "canon_site_conf__canon_site",
+            ).annotate(
+                sites=Count("canon_site_conf__canon_site"),
+                obvs=ArrayAgg('id'),
+            ).order_by(
+                "-sites",
+            ).values_list("obvs", flat=True)
+            # fmt: on
+
+            suffix = alphanumerator()
+            for sub in subgroups:
+                # objects in this group should be named with same scheme
+                so_group = SiteObservation.objects.filter(pk__in=sub)
+
+                # memo to self: there used to be some code to test the
+                # position of the iterator in existing entries. This
+                # was because it was assumed, that when adding v2
+                # uploads, it can bring a long new observations under
+                # existing experiment. Following discussions with
+                # Conor, it seems that this will not be the case. But
+                # should it agin be, this code was deleted on
+                # 2024-03-04, if you need to check
+
+                for so in so_group.filter(code__isnull=True):
+                    code_prefix = experiment_objects[so.experiment.code].index_data[
+                        "code_prefix"
+                    ]
+                    # iter_pos = next(suffix)
+                    # code = f"{code_prefix}{so.experiment.code.split('-')[1]}{iter_pos}"
+                    code = (
+                        f"{code_prefix}{so.experiment.code.split('-')[1]}{next(suffix)}"
                     )
-                    self.report.log(logging.ERROR, msg)
 
-                so.code = code
-                so.save()
+                    # test uniqueness for target
+                    # TODO: this should ideally be solved by db engine, before
+                    # rushing to write the trigger, have think about the
+                    # loader concurrency situations
+                    if SiteObservation.objects.filter(
+                        experiment__experiment_upload__target=self.target,
+                        code=code,
+                    ).exists():
+                        msg = (
+                            f"short code {code} already exists for this target;  "
+                            + "specify a code_prefix to resolve this conflict"
+                        )
+                        self.report.log(logging.ERROR, msg)
+
+                    so.code = code
+                    so.save()
 
         # final remaining fk, attach reference site observation to canon_site_conf
         for val in canon_site_conf_objects.values():  # pylint: disable=no-member

From 3d8202a83baa63a49bf5690806f4cb6656ac8fd3 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Tue, 5 Mar 2024 11:07:58 +0000
Subject: [PATCH 32/47] SiteObservatdion.tag split to tag and tag_prefix (1361)

---
 viewer/migrations/0048_auto_20240305_1038.py | 26 ++++++++++++++++
 viewer/models.py                             |  3 ++
 viewer/serializers.py                        |  1 +
 viewer/target_loader.py                      | 31 +++++++++++---------
 4 files changed, 47 insertions(+), 14 deletions(-)
 create mode 100644 viewer/migrations/0048_auto_20240305_1038.py

diff --git a/viewer/migrations/0048_auto_20240305_1038.py b/viewer/migrations/0048_auto_20240305_1038.py
new file mode 100644
index 00000000..950f0605
--- /dev/null
+++ b/viewer/migrations/0048_auto_20240305_1038.py
@@ -0,0 +1,26 @@
+# Generated by Django 3.2.23 on 2024-03-05 10:38
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('viewer', '0047_auto_20240301_1243'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='sessionprojecttag',
+            name='tag_prefix',
+            field=models.TextField(
+                help_text='Tag prefix for auto-generated tags', null=True
+            ),
+        ),
+        migrations.AddField(
+            model_name='siteobservationtag',
+            name='tag_prefix',
+            field=models.TextField(
+                help_text='Tag prefix for auto-generated tags', null=True
+            ),
+        ),
+    ]
diff --git a/viewer/models.py b/viewer/models.py
index 2fe21e90..ac9742b8 100644
--- a/viewer/models.py
+++ b/viewer/models.py
@@ -1180,6 +1180,9 @@ class Meta:
 
 class Tag(models.Model):
     tag = models.CharField(max_length=200, help_text="The (unique) name of the tag")
+    tag_prefix = models.TextField(
+        null=True, help_text="Tag prefix for auto-generated tags"
+    )
     upload_name = models.CharField(
         max_length=200, help_text="The generated name of the tag"
     )
diff --git a/viewer/serializers.py b/viewer/serializers.py
index 400c55dd..a029267b 100644
--- a/viewer/serializers.py
+++ b/viewer/serializers.py
@@ -721,6 +721,7 @@ class Meta:
         extra_kwargs = {
             "id": {"read_only": True},
             "upload_name": {"read_only": True},
+            "tag_prefix": {"read_only": True},
         }
 
 
diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index 6b957663..5613973f 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1728,11 +1728,12 @@ def process_bundle(self):
 
         # tag site observations
         for val in canon_site_objects.values():  # pylint: disable=no-member
-            tag = f"{val.instance.canon_site_num} - {''.join(val.instance.name.split('+')[1:-1])}"
+            prefix = val.instance.canon_site_num
+            tag = ''.join(val.instance.name.split('+')[1:-1])
             so_list = SiteObservation.objects.filter(
                 canon_site_conf__canon_site=val.instance
             )
-            self._tag_observations(tag, "CanonSites", so_list)
+            self._tag_observations(tag, prefix, "CanonSites", so_list)
 
         logger.debug("canon_site objects tagged")
 
@@ -1740,51 +1741,52 @@ def process_bundle(self):
         for val in canon_site_conf_objects.values():  # pylint: disable=no-member
             if val.instance.canon_site.canon_site_num not in numerators.keys():
                 numerators[val.instance.canon_site.canon_site_num] = alphanumerator()
-            tag = (
+            prefix = (
                 f"{val.instance.canon_site.canon_site_num}"
                 + f"{next(numerators[val.instance.canon_site.canon_site_num])}"
-                + f" - {val.instance.name.split('+')[0]}"
             )
+            tag = val.instance.name.split('+')[0]
             so_list = [
                 site_observation_objects[strip_version(k)].instance
                 for k in val.index_data["members"]
             ]
-            self._tag_observations(tag, "ConformerSites", so_list)
+            self._tag_observations(tag, prefix, "ConformerSites", so_list)
 
         logger.debug("conf_site objects tagged")
 
         for val in quat_assembly_objects.values():  # pylint: disable=no-member
-            tag = f"A{val.instance.assembly_num} - {val.instance.name}"
+            prefix = f"A{val.instance.assembly_num}"
+            tag = val.instance.name
             so_list = SiteObservation.objects.filter(
                 xtalform_site__xtalform__in=XtalformQuatAssembly.objects.filter(
                     quat_assembly=val.instance
                 ).values("xtalform")
             )
-            self._tag_observations(tag, "Quatassemblies", so_list)
+            self._tag_observations(tag, prefix, "Quatassemblies", so_list)
 
         logger.debug("quat_assembly objects tagged")
 
         for val in xtalform_objects.values():  # pylint: disable=no-member
-            tag = f"F{val.instance.xtalform_num} - {val.instance.name}"
+            prefix = f"F{val.instance.xtalform_num}"
+            tag = val.instance.name
             so_list = SiteObservation.objects.filter(
                 xtalform_site__xtalform=val.instance
             )
-            self._tag_observations(tag, "Crystalforms", so_list)
+            self._tag_observations(tag, prefix, "Crystalforms", so_list)
 
         logger.debug("xtalform objects tagged")
 
         for val in xtalform_sites_objects.values():  # pylint: disable=no-member
-            tag = (
+            prefix = (
                 f"F{val.instance.xtalform.xtalform_num}"
                 + f"{val.instance.xtalform_site_num}"
-                + f" - {val.instance.xtalform.name}"
-                + f" - {val.instance.xtalform_site_id}"
             )
+            tag = f"{val.instance.xtalform.name} - {val.instance.xtalform_site_id}"
             so_list = [
                 site_observation_objects[strip_version(k)].instance
                 for k in val.index_data["residues"]
             ]
-            self._tag_observations(tag, "CrystalformSites", so_list)
+            self._tag_observations(tag, prefix, "CrystalformSites", so_list)
 
         logger.debug("xtalform_sites objects tagged")
 
@@ -1836,7 +1838,7 @@ def _extract(
 
         return result
 
-    def _tag_observations(self, tag, category, so_list):
+    def _tag_observations(self, tag, prefix, category, so_list):
         try:
             # memo to self: description is set to tag, but there's
             # no fk to tag, instead, tag has a fk to
@@ -1871,6 +1873,7 @@ def _tag_observations(self, tag, category, so_list):
         except SiteObservationTag.DoesNotExist:
             so_tag = SiteObservationTag()
             so_tag.tag = tag
+            so_tag.tag_prefix = prefix
             so_tag.upload_name = tag
             so_tag.category = TagCategory.objects.get(category=category)
             so_tag.target = self.target

From afe10e2f2843ae562ce37eb599ee69aed7b02c90 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Tue, 5 Mar 2024 12:15:51 +0000
Subject: [PATCH 33/47] fix: crystallographic_files folders in download now
 sans suffix (#550)

---
 viewer/download_structures.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 5036e018..9f154a6a 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -737,7 +737,13 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
                     afile = []
                     for f in model_attr:
                         # here the model_attr is already stringified
-                        apath = Path('crystallographic_files').joinpath(so.code)
+                        try:
+                            exp_path = re.search(r"x\d*", so.code).group(0)  # type: ignore[union-attr]
+                        except AttributeError:
+                            logger.error('Unexpected shortcodeformat: %s', so.code)
+                            exp_path = so.code
+
+                        apath = Path('crystallographic_files').joinpath(exp_path)
                         if model_attr and model_attr != 'None':
                             archive_path = str(
                                 apath.joinpath(

From d7cc29a31fd6f737e7f8625c50cb13ea5be5202c Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Tue, 5 Mar 2024 16:48:48 +0000
Subject: [PATCH 34/47] fix: tag names underdand prefix in download's
 metadata.csv

---
 viewer/download_structures.py | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 9f154a6a..cf5862d0 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -20,7 +20,8 @@
 
 import pandoc
 from django.conf import settings
-from django.db.models import Exists, OuterRef, Subquery
+from django.db.models import CharField, Exists, F, OuterRef, Subquery, Value
+from django.db.models.functions import Concat
 
 from viewer.models import (
     DownloadLinks,
@@ -79,6 +80,7 @@ class TagSubquery(Subquery):
     """Annotate SiteObservation with tag of given category"""
 
     def __init__(self, category):
+        # fmt: off
         query = SiteObservationTag.objects.filter(
             pk=Subquery(
                 SiteObvsSiteObservationTag.objects.filter(
@@ -88,8 +90,16 @@ def __init__(self, category):
                     ),
                 ).values('site_obvs_tag')[:1]
             )
-        ).values('tag')[0:1]
+        ).annotate(
+            combitag=Concat(
+                F('tag_prefix'),
+                Value(' - '),
+                F('tag'),
+                output_field=CharField(),
+            ),
+        ).values('combitag')[0:1]
         super().__init__(query)
+        # fmt: on
 
 
 class CuratedTagSubquery(Exists):

From e037ac0cee75e6b1461397c61e2f65e9596b808b Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 7 Mar 2024 09:43:22 +0000
Subject: [PATCH 35/47] fix: return all proteins listed in
 api/download_structures

---
 viewer/views.py | 42 +++++++++++++++---------------------------
 1 file changed, 15 insertions(+), 27 deletions(-)

diff --git a/viewer/views.py b/viewer/views.py
index 8c33b318..29ebf55d 100644
--- a/viewer/views.py
+++ b/viewer/views.py
@@ -1473,40 +1473,28 @@ def create(self, request):
             return Response(content, status=status.HTTP_404_NOT_FOUND)
 
         logger.info('Found Target record %r', target)
-        site_obvs = models.SiteObservation.objects.none()
-        proteins_list = []
-        if request.data['proteins']:
-            logger.info('Given Proteins in request')
-            # Get first part of protein code
-            proteins_list = [
-                p.strip().split(":")[0] for p in request.data['proteins'].split(',')
-            ]
+        proteins_list = [p.strip() for p in request.data.get('proteins', []).split(',')]
+        if proteins_list:
             logger.info('Given %s Proteins %s', len(proteins_list), proteins_list)
-
             logger.info('Looking for SiteObservation records for given Proteins...')
-            # Filter by protein codes
-            for code_first_part in proteins_list:
-                # prot = models.Protein.objects.filter(code__contains=code_first_part).values()
-                # I don't see why I need to drop out of django objects here
-                prot = models.SiteObservation.objects.filter(
-                    experiment__experiment_upload__target=target, code=code_first_part
+
+            site_obvs = models.SiteObservation.objects.filter(
+                experiment__experiment_upload__target=target,
+                code__in=proteins_list,
+            )
+
+            missing_obvs = set(proteins_list).difference(
+                set(site_obvs.values_list('code', flat=True))
+            )
+            if missing_obvs:
+                logger.warning(
+                    'Could not find SiteObservation record for "%s"',
+                    missing_obvs,
                 )
-                if prot.exists():
-                    # even more than just django object, I need an
-                    # unevaluated queryset down the line
-                    site_obvs = models.SiteObservation.objects.filter(
-                        pk=prot.first().pk,
-                    )
-                else:
-                    logger.warning(
-                        'Could not find SiteObservation record for "%s"',
-                        code_first_part,
-                    )
 
         else:
             logger.info('Request had no Proteins')
             logger.info('Looking for Protein records for %r...', target)
-            # proteins = models.Protein.objects.filter(target_id=target.id).values()
             site_obvs = models.SiteObservation.objects.filter(
                 experiment__experiment_upload__target=target
             )

From d4783bb7c2e5e84f981c91813d4517aefd5621b0 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 7 Mar 2024 11:02:18 +0000
Subject: [PATCH 36/47] fix: fixed 'All structures' option not working in
 download dialog

---
 viewer/views.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/viewer/views.py b/viewer/views.py
index 29ebf55d..1d1600e9 100644
--- a/viewer/views.py
+++ b/viewer/views.py
@@ -1473,7 +1473,9 @@ def create(self, request):
             return Response(content, status=status.HTTP_404_NOT_FOUND)
 
         logger.info('Found Target record %r', target)
-        proteins_list = [p.strip() for p in request.data.get('proteins', []).split(',')]
+        proteins_list = [
+            p.strip() for p in request.data.get('proteins', '').split(',') if p
+        ]
         if proteins_list:
             logger.info('Given %s Proteins %s', len(proteins_list), proteins_list)
             logger.info('Looking for SiteObservation records for given Proteins...')

From c08f18bfda076ae3493b391469ffc04927293006 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 7 Mar 2024 13:45:53 +0000
Subject: [PATCH 37/47] Migrations for new file fields

---
 viewer/migrations/0049_auto_20240307_1344.py | 36 ++++++++++++++++++++
 viewer/models.py                             |  6 ++++
 viewer/target_loader.py                      | 14 +++++---
 3 files changed, 52 insertions(+), 4 deletions(-)
 create mode 100644 viewer/migrations/0049_auto_20240307_1344.py

diff --git a/viewer/migrations/0049_auto_20240307_1344.py b/viewer/migrations/0049_auto_20240307_1344.py
new file mode 100644
index 00000000..862b6045
--- /dev/null
+++ b/viewer/migrations/0049_auto_20240307_1344.py
@@ -0,0 +1,36 @@
+# Generated by Django 3.2.23 on 2024-03-07 13:44
+
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+    dependencies = [
+        ('viewer', '0048_auto_20240305_1038'),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='historicalsiteobservation',
+            name='ligand_mol',
+            field=models.TextField(max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name='historicalsiteobservation',
+            name='ligand_smiles',
+            field=models.TextField(max_length=255, null=True),
+        ),
+        migrations.AddField(
+            model_name='siteobservation',
+            name='ligand_mol',
+            field=models.FileField(
+                max_length=255, null=True, upload_to='target_loader_data/'
+            ),
+        ),
+        migrations.AddField(
+            model_name='siteobservation',
+            name='ligand_smiles',
+            field=models.FileField(
+                max_length=255, null=True, upload_to='target_loader_data/'
+            ),
+        ),
+    ]
diff --git a/viewer/models.py b/viewer/models.py
index ac9742b8..31336fa4 100644
--- a/viewer/models.py
+++ b/viewer/models.py
@@ -465,6 +465,12 @@ class SiteObservation(models.Model):
     seq_id = models.IntegerField()
     chain_id = models.CharField(max_length=1)
     ligand_mol_file = models.TextField(null=True)
+    ligand_mol = models.FileField(
+        upload_to="target_loader_data/", null=True, max_length=255
+    )
+    ligand_smiles = models.FileField(
+        upload_to="target_loader_data/", null=True, max_length=255
+    )
     ligand_pdb = models.FileField(
         upload_to="target_loader_data/", null=True, max_length=255
     )
diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index 5613973f..aff26aaa 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1319,11 +1319,13 @@ def process_site_observation(
             apo_desolv_file,
             apo_file,
             artefacts_file,
-            ligand_mol,
+            ligand_mol_file,
             sigmaa_file,
             diff_file,
             event_file,
             ligand_pdb,
+            ligand_mol,
+            ligand_smiles,
         ) = self.validate_files(
             obj_identifier=experiment_id,
             file_struct=data,
@@ -1340,16 +1342,18 @@ def process_site_observation(
                 "diff_map",  # NB! keys in meta_aligner not yet updated
                 "event_map",
                 "ligand_pdb",
+                "ligand_mol",
+                "ligand_smiles",
             ),
             validate_files=validate_files,
         )
 
-        logger.debug('looking for ligand_mol: %s', ligand_mol)
+        logger.debug('looking for ligand_mol: %s', ligand_mol_file)
         mol_data = None
-        if ligand_mol:
+        if ligand_mol_file:
             with contextlib.suppress(TypeError, FileNotFoundError):
                 with open(
-                    self.raw_data.joinpath(ligand_mol),
+                    self.raw_data.joinpath(ligand_mol_file),
                     "r",
                     encoding="utf-8",
                 ) as f:
@@ -1377,6 +1381,8 @@ def process_site_observation(
             "event_file": str(self._get_final_path(event_file)),
             "artefacts_file": str(self._get_final_path(artefacts_file)),
             "ligand_pdb": str(self._get_final_path(ligand_pdb)),
+            "ligand_mol": str(self._get_final_path(ligand_mol)),
+            "ligand_smiles": str(self._get_final_path(ligand_smiles)),
             "pdb_header_file": "currently missing",
             "ligand_mol_file": mol_data,
         }

From 8b737490580e8d697cbe2e5afd13123be93114bc Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 7 Mar 2024 16:40:11 +0000
Subject: [PATCH 38/47] Issue 1326 - mol and smiles added to download bundle

NB! not prodction/staging ready, still contains a hack for testing
because XCA doesn't provide all the attributes.
---
 viewer/download_structures.py | 12 ++++++++++++
 viewer/target_loader.py       | 14 +++++++++++++-
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index cf5862d0..ce1091ff 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -49,6 +49,8 @@
     'apo_desolv_file': ('aligned'),  # SiteObservation: apo_desolv_file
     'bound_file': ('aligned'),  # SiteObservation: bound_file
     'sdf_info': ('aligned'),  # SiteObservation: ligand_mol_file (indirectly)
+    'ligand_mol': ('aligned'),  # SiteObservation: ligand_mol
+    'ligand_smiles': ('aligned'),  # SiteObservation: ligand_smiles
     'ligand_pdb': ('aligned'),  # SiteObservation: ligand_pdb
     'smiles_info': (''),  # SiteObservation: smiles_info (indirectly)
     # those above are all controlled by serializer's all_aligned_structures flag
@@ -136,6 +138,8 @@ class ArchiveFile:
         'diff_file': {},
         'sigmaa_file': {},
         'ligand_pdb': {},
+        'ligand_mol': {},
+        'ligand_smiles': {},
     },
     'molecules': {
         'sdf_files': {},
@@ -229,6 +233,10 @@ def _patch_molecule_name(site_observation):
     lines = site_observation.ligand_mol_file.split('\n')
     if not lines[0].strip():
         lines[0] = site_observation.long_code
+
+    # the db contents is mol file but what's requested here is
+    # sdf. add sdf separator
+    lines.append('$$$$\n')
     return '\n'.join(lines)
 
 
@@ -776,6 +784,8 @@ def _create_structures_dict(site_obvs, protein_params, other_params):
                     'artefacts_file',
                     'pdb_header_file',
                     'ligand_pdb',
+                    'ligand_mol',
+                    'ligand_smiles',
                     'diff_file',
                 ]:
                     # siteobservation object
@@ -890,6 +900,8 @@ def get_download_params(request):
         'apo_solv_file': serializer.validated_data['all_aligned_structures'],
         'apo_desolv_file': serializer.validated_data['all_aligned_structures'],
         'ligand_pdb': serializer.validated_data['all_aligned_structures'],
+        'ligand_mol': serializer.validated_data['all_aligned_structures'],
+        'ligand_smiles': serializer.validated_data['all_aligned_structures'],
         'cif_info': serializer.validated_data['cif_info'],
         'mtz_info': serializer.validated_data['mtz_info'],
         'map_info': serializer.validated_data['map_info'],
diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index aff26aaa..efbdcfc8 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -635,7 +635,7 @@ def logfunc(key, message):
 
         # memo to self: added type ignore directives to return line
         # below and append line above because after small refactoring,
-        # mypy all of the sudden started throwing errors on bothe or
+        # mypy all of the sudden started throwing errors on both of
         # these. the core of it's grievance is that it expects the
         # return type to be list[str]. no idea why, function signature
         # clearly defines it as list[str | None]
@@ -1348,7 +1348,19 @@ def process_site_observation(
             validate_files=validate_files,
         )
 
+        # TODO: ligand file simulation for testing, remove once the
+        # key is addded to XCA output
+        if ligand_mol:
+            ligand_smiles_path = f"{ligand_mol.removesuffix('.mol')}.smi"
+            if self.raw_data.joinpath(ligand_smiles_path).is_file():
+                ligand_smiles = ligand_smiles_path
+            else:
+                ligand_smiles = None
+        else:
+            ligand_smiles = None
+
         logger.debug('looking for ligand_mol: %s', ligand_mol_file)
+
         mol_data = None
         if ligand_mol_file:
             with contextlib.suppress(TypeError, FileNotFoundError):

From 1f1de43f64f79487be5541b3153682e63ef582b3 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 8 Mar 2024 09:27:39 +0000
Subject: [PATCH 39/47] Target loader should handle empty code_prefix and
 tooltip

'Should' because haven't tested yet with real data
---
 viewer/target_loader.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index efbdcfc8..e792169a 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -734,7 +734,6 @@ def process_experiment(
         """
         del kwargs
         assert item_data
-        assert prefix_tooltips
         logger.debug("incoming data: %s", item_data)
         experiment_name, data = item_data
 
@@ -814,8 +813,12 @@ def process_experiment(
         # version	int	old versions are kept	target loader
         version = 1
 
-        code_prefix = extract(key="code_prefix")
-        prefix_tooltip = prefix_tooltips.get(code_prefix, "")
+        # if empty or key missing entirely, ensure code_prefix returns empty
+        code_prefix = extract(key="code_prefix", level=logging.INFO)
+        # ignoring type because tooltip dict can legitimately be empty
+        # and in such case, assert statement fails. need to remove it
+        # and use the ignore
+        prefix_tooltip = prefix_tooltips.get(code_prefix, "")  # type: ignore[union-attr]
 
         fields = {
             "code": experiment_name,
@@ -1501,7 +1504,7 @@ def process_bundle(self):
         self.version_number = meta["version_number"]
         self.version_dir = meta["version_dir"]
         self.previous_version_dirs = meta["previous_version_dirs"]
-        prefix_tooltips = meta["code_prefix_tooltips"]
+        prefix_tooltips = meta.get("code_prefix_tooltips", {})
 
         # check transformation matrix files
         (  # pylint: disable=unbalanced-tuple-unpacking

From 35ba43998c8e8e833e7b508bdb53b2c812ec5e43 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 8 Mar 2024 10:51:04 +0000
Subject: [PATCH 40/47] Column 'Downloaded' to metadata.csv in downloads

---
 viewer/download_structures.py | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index ce1091ff..fc7e68d0 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -418,13 +418,13 @@ def _trans_matrix_files_zip(ziparchive, target):
             _add_empty_file(ziparchive, archive_path)
 
 
-def _metadate_file_zip(ziparchive, target):
+def _metadata_file_zip(ziparchive, target, site_observations):
     """Compile and add metadata file to archive."""
     logger.info('+ Processing metadata')
 
     annotations = {}
-    values = ['code', 'longcode', 'cmpd__compound_code', 'smiles']
-    header = ['Code', 'Long code', 'Compound code', 'Smiles']
+    values = ['code', 'longcode', 'cmpd__compound_code', 'smiles', 'downloaded']
+    header = ['Code', 'Long code', 'Compound code', 'Smiles', 'Downloaded']
 
     for category in TagCategory.objects.filter(category__in=TAG_CATEGORIES):
         tag = f'tag_{category.category.lower()}'
@@ -432,7 +432,7 @@ def _metadate_file_zip(ziparchive, target):
         header.append(category.category)
         annotations[tag] = TagSubquery(category.category)
 
-    pattern = re.compile(r'\W+')
+    pattern = re.compile(r'\W+')  # non-alphanumeric characters
     for tag in SiteObservationTag.objects.filter(
         category__in=TagCategory.objects.filter(category__in=CURATED_TAG_CATEGORIES),
         target=target,
@@ -449,6 +449,12 @@ def _metadate_file_zip(ziparchive, target):
     ).prefetch_related(
         'cmpd',
         'siteobservationtags',
+    ).annotate(
+        downloaded=Exists(
+            site_observations.filter(
+                pk=OuterRef('pk'),
+            ),
+        )
     ).annotate(**annotations).values_list(*values)
     # fmt: on
 
@@ -621,7 +627,9 @@ def _build_readme(readme, original_search, template_file, ziparchive):
         readme.write(f'- {filename}' + '\n')
 
 
-def _create_structures_zip(target, zip_contents, file_url, original_search, host):
+def _create_structures_zip(
+    target, zip_contents, file_url, original_search, host, site_observations
+):
     """Write a ZIP file containing data from an input dictionary."""
 
     logger.info('+ _create_structures_zip(%s)', target.title)
@@ -684,7 +692,7 @@ def _create_structures_zip(target, zip_contents, file_url, original_search, host
 
         # compile and add metadata.csv
         if zip_contents['metadata_info']:
-            _metadate_file_zip(ziparchive, target)
+            _metadata_file_zip(ziparchive, target, site_observations)
 
         if zip_contents['trans_matrix_info']:
             _trans_matrix_files_zip(ziparchive, target)
@@ -1006,7 +1014,14 @@ def create_or_return_download_link(request, target, site_observations):
     zip_contents = _create_structures_dict(
         site_observations, protein_params, other_params
     )
-    _create_structures_zip(target, zip_contents, file_url, original_search, host)
+    _create_structures_zip(
+        target,
+        zip_contents,
+        file_url,
+        original_search,
+        host,
+        site_observations,
+    )
 
     download_link = DownloadLinks()
     # Note: 'zip_file' and 'zip_contents' record properties are no longer used.

From 46278e3b152902acb4a2cdbe5a6fe2db60617e76 Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Fri, 8 Mar 2024 14:52:55 +0000
Subject: [PATCH 41/47] fix: restore 'upload_name' in site obvs tags to
 prefix-tag format

---
 viewer/target_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index 5613973f..b5196c73 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1874,7 +1874,7 @@ def _tag_observations(self, tag, prefix, category, so_list):
             so_tag = SiteObservationTag()
             so_tag.tag = tag
             so_tag.tag_prefix = prefix
-            so_tag.upload_name = tag
+            so_tag.upload_name = f"{prefix} - {tag}"
             so_tag.category = TagCategory.objects.get(category=category)
             so_tag.target = self.target
             so_tag.mol_group = so_group

From b9522423113796bb13b99c20836d85391038fabb Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Mon, 11 Mar 2024 10:38:28 +0000
Subject: [PATCH 42/47] Removed ligand_smiles workaround

All necessary files are now tracked by the database and returned in download.
---
 viewer/download_structures.py |  2 +-
 viewer/target_loader.py       | 13 +------------
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index fc7e68d0..1ec33714 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -512,7 +512,7 @@ def _extra_files_zip(ziparchive, target):
                     ziparchive.write(
                         filepath,
                         os.path.join(
-                            _ZIP_FILEPATHS[f'extra_files_{num_extra_dir}'], file
+                            f'{_ZIP_FILEPATHS["extra_files"]}_{num_extra_dir}', file
                         ),
                     )
                     num_processed += 1
diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index efbdcfc8..2ea15435 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1279,7 +1279,7 @@ def process_site_observation(
         longcode = f"{experiment.code}_{chain}_{str(ligand)}_{str(idx)}"
         key = f"{experiment.code}/{chain}/{str(ligand)}"
 
-        smiles = extract(key="ligand_smiles")
+        smiles = extract(key="ligand_smiles_string")
 
         try:
             compound = compounds[experiment_id].instance
@@ -1348,17 +1348,6 @@ def process_site_observation(
             validate_files=validate_files,
         )
 
-        # TODO: ligand file simulation for testing, remove once the
-        # key is addded to XCA output
-        if ligand_mol:
-            ligand_smiles_path = f"{ligand_mol.removesuffix('.mol')}.smi"
-            if self.raw_data.joinpath(ligand_smiles_path).is_file():
-                ligand_smiles = ligand_smiles_path
-            else:
-                ligand_smiles = None
-        else:
-            ligand_smiles = None
-
         logger.debug('looking for ligand_mol: %s', ligand_mol_file)
 
         mol_data = None

From 6d2511eb3bbb98ce707f4374fbb3dff1e7a3adbc Mon Sep 17 00:00:00 2001
From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com>
Date: Mon, 11 Mar 2024 17:54:07 +0100
Subject: [PATCH 43/47] fix: Add force_error_display to connection functions
 (default False) (#559)

Co-authored-by: Alan Christie <alan.christie@matildapeak.com>
---
 api/security.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/api/security.py b/api/security.py
index 01605352..12d4a987 100644
--- a/api/security.py
+++ b/api/security.py
@@ -47,7 +47,7 @@
 # response = view(request)
 
 
-def get_remote_conn() -> Optional[SSHConnector]:
+def get_remote_conn(force_error_display=False) -> Optional[SSHConnector]:
     credentials: Dict[str, Any] = {
         "user": settings.ISPYB_USER,
         "pw": settings.ISPYB_PASSWORD,
@@ -71,7 +71,8 @@ def get_remote_conn() -> Optional[SSHConnector]:
     #          Assume the credentials are invalid if there is no host.
     #          If a host is not defined other properties are useless.
     if not credentials["host"]:
-        logger.debug("No ISPyB host - cannot return a connector")
+        if logging.DEBUG >= logger.level or force_error_display:
+            logger.info("No ISPyB host - cannot return a connector")
         return None
 
     # Try to get an SSH connection (aware that it might fail)
@@ -81,14 +82,14 @@ def get_remote_conn() -> Optional[SSHConnector]:
     except Exception:
         # Log the exception if DEBUG level or lower/finer?
         # The following will not log if the level is set to INFO for example.
-        if logging.DEBUG >= logger.level:
+        if logging.DEBUG >= logger.level or force_error_display:
             logger.info("credentials=%s", credentials)
             logger.exception("Got the following exception creating SSHConnector...")
 
     return conn
 
 
-def get_conn() -> Optional[Connector]:
+def get_conn(force_error_display=False) -> Optional[Connector]:
     credentials: Dict[str, Any] = {
         "user": settings.ISPYB_USER,
         "pw": settings.ISPYB_PASSWORD,
@@ -101,7 +102,8 @@ def get_conn() -> Optional[Connector]:
     #          Assume the credentials are invalid if there is no host.
     #          If a host is not defined other properties are useless.
     if not credentials["host"]:
-        logger.debug("No ISPyB host - cannot return a connector")
+        if logging.DEBUG >= logger.level or force_error_display:
+            logger.info("No ISPyB host - cannot return a connector")
         return None
 
     conn: Optional[Connector] = None
@@ -110,7 +112,7 @@ def get_conn() -> Optional[Connector]:
     except Exception:
         # Log the exception if DEBUG level or lower/finer?
         # The following will not log if the level is set to INFO for example.
-        if logging.DEBUG >= logger.level:
+        if logging.DEBUG >= logger.level or force_error_display:
             logger.info("credentials=%s", credentials)
             logger.exception("Got the following exception creating Connector...")
 

From 14cd643eb6ecb3e1f7ae40e529f358071fa47db9 Mon Sep 17 00:00:00 2001
From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com>
Date: Tue, 12 Mar 2024 16:38:43 +0100
Subject: [PATCH 44/47] Align production with staging (#555) (#560)

* Some changes to cset_upload.py to allow site observation short codes (#527)

* stashing

* fix: cset_upload.py updated to allow new-style site observation codes

NB! this probably still won't work! I suspect the file I was given is
broken and I cannot test it further

* stashing

* stashing

* Short code prefix and tooltip to backend

Target loader now reads short code prefix and tooltip from
meta_aligner.yaml. Tooltip is saved to Experiment model.

TODO: make tooltip available via API

* Prefix tooltip now serverd by api/site_observation

* stashing

* Site observation groups for shortcodes now by experiment

* feat: download structure fixed

TODO: add all the yamls

* All yaml files added to download

* New format to download zip (issue 1326) (#530)

* stashing

* stashing

* feat: download structure fixed

TODO: add all the yamls

* All yaml files added to download

* cset_upload.py: lhs_pdb renamed to ref_pdb

* Renamed canon- and conf site tags

* Adds support for key-based SSH connections (#534)

* Centralised environment variables (#529)

* refactor: Restructured settings.py

* docs: Minor tweaks

* refactor: Move security and infection config to settings

* refactor: b/e & f/e/ tags now in settings (also fixed f/e tag value)

* refactor: Move Neo4j config to settings

* refactor: More variables into settings

* refactor: Moved remaining config

* docs: Adds configuration guide as comments

* docs: Variable prefix now 'stack_' not 'stack_env_'

---------


* feat: Adds support for private keys on SSH tunnel

* fix: Fixes key-based logic

---------


* build(deps): bump cryptography from 42.0.0 to 42.0.2 (#533)

Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.0 to 42.0.2.
- [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst)
- [Commits](https://github.com/pyca/cryptography/compare/42.0.0...42.0.2)

---
updated-dependencies:
- dependency-name: cryptography
  dependency-type: indirect
...


* docs: Updates documentation (#536)


* build(deps): bump django from 3.2.20 to 3.2.24 (#535)

Bumps [django](https://github.com/django/django) from 3.2.20 to 3.2.24.
- [Commits](https://github.com/django/django/compare/3.2.20...3.2.24)

---
updated-dependencies:
- dependency-name: django
  dependency-type: direct:production
...


* fix: reverting wrong changes

* fix: reverting wrong changes (#538)

* stashing

* add site observation's ligand sdf to aligned_files

* fix: custom pdb now downloadable

* fix: increased loglevel to error on unexpected exceptions block

* fix: Discourse service check now checks API key before creating a service (#544)


* build(deps): bump cryptography from 42.0.2 to 42.0.4 (#539)

Bumps [cryptography](https://github.com/pyca/cryptography) from 42.0.2 to 42.0.4.
- [Changelog](https://github.com/pyca/cryptography/blob/main/CHANGELOG.rst)
- [Commits](https://github.com/pyca/cryptography/compare/42.0.2...42.0.4)

---
updated-dependencies:
- dependency-name: cryptography
  dependency-type: indirect
...


* metadata.csv populated

Started working on issue 1355 as well, it's too tightly coupled. Some
work remaining re that:
- when tag created in UI, make sure upload_name attribute is populated

* upload_name automatically pouplated when creating tags in UI

Only populated on creation, updates won't touch it

* changes to api/download_structures

- apo_file, bound_file, sdf_info and smiles_info merged into
all_aligned_structures
- added pdb_info field

NB! download_structures was requred to provide ligand_pdb as
well. This wasn't tracked previously, so I added field to
SiteObservation model. Meaning there's a migration and on stack
deployment data needs to be wiped and reuploaded

* don't download neighbourhoods.yaml unless trans_matrix_info is checked

* fixed error handling (errors.csv) and not returning combined sdf

* fix: Added parsing directives to DownloadStructuresserializer

* Consecutive numbering of observations under canon site

* SiteObservatdion.tag split to tag and tag_prefix (1361)

* fix: crystallographic_files folders in download now sans suffix (#550)

* fix: tag names underdand prefix in download's metadata.csv

* fix: return all proteins listed in api/download_structures

* fix: fixed 'All structures' option not working in download dialog

* Migrations for new file fields

* Issue 1326 - mol and smiles added to download bundle

NB! not prodction/staging ready, still contains a hack for testing
because XCA doesn't provide all the attributes.

* Target loader should handle empty code_prefix and tooltip

'Should' because haven't tested yet with real data

* Column 'Downloaded' to metadata.csv in downloads

* fix: restore 'upload_name' in site obvs tags to prefix-tag format

* Removed ligand_smiles workaround

All necessary files are now tracked by the database and returned in download.

* fix: Add force_error_display to connection functions (default False) (#559)


---------

Signed-off-by: dependabot[bot] <support@github.com>
Co-authored-by: Kalev Takkis <ktakkis@informaticsmatters.com>
Co-authored-by: Warren Thompson <waztom@gmail.com>
Co-authored-by: Alan Christie <alan.christie@matildapeak.com>
Co-authored-by: dependabot[bot] <49699333+dependabot[bot]@users.noreply.github.com>
Co-authored-by: Warren Thompson <warren.thompson+1@diamond.ac.uk>

From 7805e71b7c86454816091563c5e8ed575a4771a2 Mon Sep 17 00:00:00 2001
From: "Alan B. Christie" <29806285+alanbchristie@users.noreply.github.com>
Date: Wed, 13 Mar 2024 10:39:50 +0100
Subject: [PATCH 45/47] Add DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP (#561)

* feat: Add DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP

* style: Minor log tweak

---------

Co-authored-by: Alan Christie <alan.christie@matildapeak.com>
---
 api/security.py        |  8 ++++++--
 fragalysis/settings.py | 11 +++++++++++
 viewer/views.py        |  2 +-
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/api/security.py b/api/security.py
index 12d4a987..4c2afc19 100644
--- a/api/security.py
+++ b/api/security.py
@@ -354,9 +354,10 @@ def get_proposals_for_user(self, user, restrict_to_membership=False):
         proposals = set()
         ispyb_user = settings.ISPYB_USER
         logger.debug(
-            "ispyb_user=%s restrict_to_membership=%s",
+            "ispyb_user=%s restrict_to_membership=%s (DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP=%s)",
             ispyb_user,
             restrict_to_membership,
+            settings.DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP,
         )
         if ispyb_user:
             if user.is_authenticated:
@@ -368,7 +369,10 @@ def get_proposals_for_user(self, user, restrict_to_membership=False):
 
         # We have all the proposals where the user has authority.
         # Add open/public proposals?
-        if not restrict_to_membership:
+        if (
+            not restrict_to_membership
+            or settings.DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP
+        ):
             proposals.update(self._get_open_proposals())
 
         # Return the set() as a list()
diff --git a/fragalysis/settings.py b/fragalysis/settings.py
index 633487f6..cfef3208 100644
--- a/fragalysis/settings.py
+++ b/fragalysis/settings.py
@@ -464,6 +464,17 @@
 
 COMPUTED_SET_MEDIA_DIRECTORY: str = "computed_set_data"
 
+# The following (part of m2ms-1385) is used to prevent the
+# 'restrict-to-membership' check in security.py - something that is designed to prevent
+# uploading to public proposals unless the user is explicitly part of the proposal
+# (according to ISPyB). This variable is used to defeat this test for situations
+# when ISPyB is unavailable. It is not permitted when the DEPLOYMENT_MODE
+# is 'PRODUCTION
+DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP: bool = False
+if os.environ.get("DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP") == "True":
+    assert DEPLOYMENT_MODE != "PRODUCTION"
+    DISABLE_RESTRICT_PROPOSALS_TO_MEMBERSHIP = True
+
 # Discourse settings for API calls to Discourse Platform.
 DISCOURSE_PARENT_CATEGORY: str = "Fragalysis targets"
 DISCOURSE_USER: str = "fragalysis"
diff --git a/viewer/views.py b/viewer/views.py
index 1d1600e9..91f91fbb 100644
--- a/viewer/views.py
+++ b/viewer/views.py
@@ -1579,7 +1579,7 @@ def create(self, request, *args, **kwargs):
             contact_email=contact_email,
             user_id=request.user.pk,
         )
-        logger.info("+ UploadTargetExperiments.create  got Celery id %s", task.task_id)
+        logger.info("+ UploadTargetExperiments.create got Celery id %s", task.task_id)
 
         url = reverse('viewer:task_status', kwargs={'task_id': task.task_id})
         # as it launches task, I think 202 is more appropriate

From 849c24cd5e3087227505ad9a7cc3aae3ecabd5fb Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Thu, 14 Mar 2024 12:01:27 +0000
Subject: [PATCH 46/47] fix: metadata.csv in download now showing correct tags

---
 viewer/download_structures.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/viewer/download_structures.py b/viewer/download_structures.py
index 1ec33714..0a25850c 100644
--- a/viewer/download_structures.py
+++ b/viewer/download_structures.py
@@ -86,7 +86,7 @@ def __init__(self, category):
         query = SiteObservationTag.objects.filter(
             pk=Subquery(
                 SiteObvsSiteObservationTag.objects.filter(
-                    site_observation=OuterRef('pk'),
+                    site_observation=OuterRef(OuterRef('pk')),
                     site_obvs_tag__category=TagCategory.objects.get(
                         category=category,
                     ),

From 173b303e9990a27a2d0716fae033c6c0843c625b Mon Sep 17 00:00:00 2001
From: Kalev Takkis <ktakkis@informaticsmatters.com>
Date: Wed, 20 Mar 2024 10:10:45 +0000
Subject: [PATCH 47/47] fix: fixed tag creation process for upload 2

---
 viewer/target_loader.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/viewer/target_loader.py b/viewer/target_loader.py
index 2590f163..4e72cc8a 100644
--- a/viewer/target_loader.py
+++ b/viewer/target_loader.py
@@ -1875,7 +1875,9 @@ def _tag_observations(self, tag, prefix, category, so_list):
             so_group.save()
 
         try:
-            so_tag = SiteObservationTag.objects.get(upload_name=tag, target=self.target)
+            so_tag = SiteObservationTag.objects.get(
+                upload_name=f"{prefix} - {tag}", target=self.target
+            )
             # Tag already exists
             # Apart from the new mol_group and molecules, we shouldn't be
             # changing anything.