Skip to content

Commit

Permalink
Merge pull request #520 from xchem/m2ms-1280-target-loader-data-version
Browse files Browse the repository at this point in the history
Xtalforms renamed to crystalforms (issue 1324)
  • Loading branch information
kaliif authored Feb 8, 2024
2 parents 5c9b415 + 6d9344a commit d1194d7
Show file tree
Hide file tree
Showing 2 changed files with 103 additions and 32 deletions.
4 changes: 2 additions & 2 deletions viewer/fixtures/tagcategories.json
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
"model": "viewer.tagcategory",
"pk": 3,
"fields": {
"category": "XtalformSites",
"category": "CrystalformSites",
"colour": "0099ff",
"description": null
}
Expand All @@ -39,7 +39,7 @@
"model": "viewer.tagcategory",
"pk": 5,
"fields": {
"category": "Xtalforms",
"category": "Crystalforms",
"colour": "ffcc00",
"description": null
}
Expand Down
131 changes: 101 additions & 30 deletions viewer/target_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,7 @@ class MetadataObject:

instance: Model
index_data: dict = field(default_factory=dict)
new: bool = False


# type hint for wrapped yaml block processors
Expand Down Expand Up @@ -165,7 +166,6 @@ def _update_task(self, message: str | list) -> None:
if not self.task:
return
with contextlib.suppress(AttributeError):
logger.debug("taskstuff %s", dir(self.task))
self.task.update_state(
state=self.upload_state,
meta={
Expand Down Expand Up @@ -295,6 +295,12 @@ def alphanumerator(start_from: str = "") -> Generator[str, None, None]:
return generator


def strip_version(s: str) -> str:
# format something like XX01ZVNS2B-x0673/B/501/1
# remove tailing '/1'
return s[0 : s.rfind('/')]


def create_objects(func=None, *, depth=math.inf):
"""Wrapper function for saving database objects.
Expand All @@ -313,29 +319,49 @@ def create_objects(func=None, *, depth=math.inf):
def wrapper_create_objects(
self, *args, yaml_data: dict, **kwargs
) -> dict[int | str, MetadataObject]:
logger.debug("+ wrapper_service_query")
# logger.debug("+ wrapper_service_query")
# logger.debug("args passed: %s", args)
logger.debug("kwargs passed: %s", kwargs)
# logger.debug("kwargs passed: %s", kwargs)

flattened_data = flatten_dict(yaml_data, depth=depth)
result = {}
created, existing, failed = 0, 0, 0
for item in flattened_data:
logger.debug("flattened data item: %s", item)
instance_data = func(self, *args, item_data=item, **kwargs)
instance_data = func(
self, *args, item_data=item, validate_files=False, **kwargs
)
logger.debug("Instance data returned: %s", instance_data)
obj = None
new = False
if not instance_data:
continue

obj = None
try:
if instance_data.fields:
obj, new = instance_data.model_class.filter_manager.by_target(
self.target
).get_or_create(
**instance_data.fields,
defaults=instance_data.defaults,
)
try:
obj = instance_data.model_class.filter_manager.by_target(
self.target
).get(**instance_data.fields)
logger.debug("Object exists: %s", instance_data.fields)
new = False
except instance_data.model_class.DoesNotExist:
# revalidate files
logger.debug("Object doesn't exist: %s", instance_data)
instance_data = func(self, *args, item_data=item, **kwargs)
obj = instance_data.model_class(
**instance_data.fields,
**instance_data.defaults,
)
obj.save()
new = True

# obj, new = instance_data.model_class.filter_manager.by_target(
# self.target
# ).get_or_create(
# **instance_data.fields,
# defaults=instance_data.defaults,
# )
else:
# no unique field requirements, just create new object
obj = instance_data.model_class(
Expand Down Expand Up @@ -380,7 +406,9 @@ def wrapper_create_objects(
obj,
)

m = MetadataObject(instance=obj, index_data=instance_data.index_data)
m = MetadataObject(
instance=obj, index_data=instance_data.index_data, new=new
)
# index data here probs
result[instance_data.key] = m

Expand Down Expand Up @@ -487,6 +515,7 @@ def validate_map_files(
key: str,
obj_identifier: str,
file_struct: list,
validate_files: bool = True,
) -> list[str]:
"""Validate list of panddas event files.
Expand All @@ -503,7 +532,8 @@ def logfunc(_, message):
if not fname:
continue

self._check_file_hash(obj_identifier, key, fname, file_hash, logfunc)
if validate_files:
self._check_file_hash(obj_identifier, key, fname, file_hash, logfunc)
result.append(fname)

return result
Expand All @@ -514,6 +544,7 @@ def validate_files(
file_struct: dict,
required: Iterable[str] = (),
recommended: Iterable[str] = (),
validate_files: bool = True,
) -> list[str | None]:
"""Check if file exists and if sha256 hash matches (if given).
Expand Down Expand Up @@ -566,11 +597,17 @@ def logfunc(key, message):
if not filename:
continue

self._check_file_hash(obj_identifier, key, filename, file_hash, logfunc)
if validate_files:
self._check_file_hash(
obj_identifier, key, filename, file_hash, logfunc
)

elif isinstance(value, str):
filename = value
self._check_file_hash(obj_identifier, key, filename, file_hash, logfunc)
if validate_files:
self._check_file_hash(
obj_identifier, key, filename, file_hash, logfunc
)

else:
# probably panddas files here
Expand Down Expand Up @@ -661,7 +698,10 @@ def _enumerate_objects(self, objects: dict, attr: str) -> None:

@create_objects(depth=1)
def process_experiment(
self, item_data: tuple[str, dict] | None = None, **kwargs
self,
item_data: tuple[str, dict] | None = None,
validate_files: bool = True,
**kwargs,
) -> ProcessedObject | None:
"""Extract data from yaml block for creating Experiment instance.
Expand Down Expand Up @@ -719,6 +759,7 @@ def process_experiment(
"xtal_mtz",
"ligand_cif",
),
validate_files=validate_files,
)

try:
Expand All @@ -730,6 +771,7 @@ def process_experiment(
key="panddas_event_files",
obj_identifier=experiment_name,
file_struct=panddas_files,
validate_files=validate_files,
)

dtype = extract(key="type")
Expand Down Expand Up @@ -767,7 +809,6 @@ def process_experiment(
version = 1

fields = {
"experiment_upload": self.experiment_upload,
"code": experiment_name,
}

Expand All @@ -776,6 +817,7 @@ def process_experiment(
map_info_paths = [str(self._get_final_path(k)) for k in map_info_files]

defaults = {
"experiment_upload": self.experiment_upload,
"status": status,
"version": version,
"type": exp_type,
Expand Down Expand Up @@ -803,7 +845,10 @@ def process_experiment(

@create_objects(depth=1)
def process_compound(
self, item_data: tuple[str, dict] | None = None, **kwargs
self,
experiments: dict[int | str, MetadataObject],
item_data: tuple[str, dict] | None = None,
**kwargs,
) -> ProcessedObject | None:
"""Extract data from yaml block for creating Compound instance.
Expand All @@ -825,6 +870,7 @@ def process_compound(
protein_name, data = item_data
if (
"aligned_files" not in data.keys()
or not experiments[protein_name].new # remove already saved objects
or "crystallographic_files" not in data.keys()
):
return None
Expand Down Expand Up @@ -858,7 +904,9 @@ def process_compound(

@create_objects(depth=1)
def process_xtalform(
self, item_data: tuple[str, dict] | None = None, **kwargs
self,
item_data: tuple[str, dict] | None = None,
**kwargs,
) -> ProcessedObject | None:
"""Create Xtalform model instance from data.
Expand Down Expand Up @@ -910,7 +958,9 @@ def process_xtalform(

@create_objects(depth=1)
def process_quat_assembly(
self, item_data: tuple[str, dict] | None = None, **kwargs
self,
item_data: tuple[str, dict] | None = None,
**kwargs,
) -> ProcessedObject | None:
"""Create QuatAssemblylform model instance from data.
Expand Down Expand Up @@ -997,7 +1047,9 @@ def process_xtalform_quatassembly(

@create_objects(depth=1)
def process_canon_site(
self, item_data: tuple[str, dict] | None = None, **kwargs
self,
item_data: tuple[str, dict] | None = None,
**kwargs,
) -> ProcessedObject | None:
"""Create CanonSite model instance from data.
Expand Down Expand Up @@ -1175,6 +1227,7 @@ def process_site_observation(
# ligand: str,
# idx: int | str,
# data: dict,
validate_files: bool = True,
**kwargs,
) -> ProcessedObject | None:
"""Create SiteObservation model instance from data.
Expand All @@ -1201,6 +1254,17 @@ def process_site_observation(
# wrong data item
return None

# this is wrong, shouldn't have to use it
# logger.debug(
# 'checking already exists: %s, %s',
# experiment_id,
# experiments[experiment_id].new,
# )
# # remove already saved objects
# if experiments[experiment_id].new == False:
# logger.debug('quitting sobvs')
# return None

extract = functools.partial(
self._extract,
data=data,
Expand Down Expand Up @@ -1249,6 +1313,7 @@ def process_site_observation(
"diff_map", # NB! keys in meta_aligner not yet updated
"event_map",
),
validate_files=validate_files,
)

logger.debug('looking for ligand_mol: %s', ligand_mol)
Expand Down Expand Up @@ -1428,7 +1493,8 @@ def process_bundle(self):
xtalform_assemblies,
) = self._get_yaml_blocks(
yaml_data=xtalforms_yaml,
blocks=("assemblies", "xtalforms"),
# blocks=("assemblies", "xtalforms"),
blocks=("assemblies", "crystalforms"),
)

( # pylint: disable=unbalanced-tuple-unpacking
Expand All @@ -1441,15 +1507,17 @@ def process_bundle(self):
yaml_data=meta,
blocks=(
"crystals",
"xtalforms",
"crystalforms",
"canon_sites",
"conformer_sites",
"xtalform_sites",
),
)

experiment_objects = self.process_experiment(yaml_data=crystals)
compound_objects = self.process_compound(yaml_data=crystals)
compound_objects = self.process_compound(
yaml_data=crystals, experiments=experiment_objects
)

# save components manytomany to experiment
# TODO: is it 1:1 relationship? looking at the meta_align it
Expand Down Expand Up @@ -1545,7 +1613,8 @@ def process_bundle(self):
xtalform_site_by_tag = {}
for val in xtalform_sites_objects.values(): # pylint: disable=no-member
for k in val.index_data["residues"]:
xtalform_site_by_tag[k] = val.instance
# strip the version number from tag
xtalform_site_by_tag[strip_version(k)] = val.instance

site_observation_objects = self.process_site_observation(
yaml_data=crystals,
Expand Down Expand Up @@ -1614,7 +1683,7 @@ def process_bundle(self):
# final remaining fk, attach reference site observation to canon_site_conf
for val in canon_site_conf_objects.values(): # pylint: disable=no-member
val.instance.ref_site_observation = site_observation_objects[
val.index_data["reference_ligands"]
strip_version(val.index_data["reference_ligands"])
].instance
val.instance.save()

Expand All @@ -1640,7 +1709,8 @@ def process_bundle(self):
+ f" - {val.instance.name}"
)
so_list = [
site_observation_objects[k].instance for k in val.index_data["members"]
site_observation_objects[strip_version(k)].instance
for k in val.index_data["members"]
]
self._tag_observations(tag, "ConformerSites", so_list)

Expand All @@ -1662,7 +1732,7 @@ def process_bundle(self):
so_list = SiteObservation.objects.filter(
xtalform_site__xtalform=val.instance
)
self._tag_observations(tag, "Xtalforms", so_list)
self._tag_observations(tag, "Crystalforms", so_list)

logger.debug("xtalform objects tagged")

Expand All @@ -1674,9 +1744,10 @@ def process_bundle(self):
+ f" - {val.instance.xtalform_site_id}"
)
so_list = [
site_observation_objects[k].instance for k in val.index_data["residues"]
site_observation_objects[strip_version(k)].instance
for k in val.index_data["residues"]
]
self._tag_observations(tag, "XtalformSites", so_list)
self._tag_observations(tag, "CrystalformSites", so_list)

logger.debug("xtalform_sites objects tagged")

Expand Down

0 comments on commit d1194d7

Please sign in to comment.