Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Mpes elab metadata #551

Open
wants to merge 26 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
26 commits
Select commit Hold shift + click to select a range
e41a2b1
split off metadata class, and first version if elabFTW metadata retri…
rettigl Jan 14, 2025
3a32112
sort metadata
rettigl Jan 14, 2025
862bc78
add user information
rettigl Jan 14, 2025
d8cce46
fixup metadata
rettigl Jan 14, 2025
94166fc
fix tests
rettigl Jan 14, 2025
8186e9a
remove pump beam section if pump closed
rettigl Jan 16, 2025
dffa6d1
use default config dir also for user config
rettigl Jan 16, 2025
ac18011
change to config_v1.yaml
rettigl Jan 16, 2025
5f00201
add mock tests for metadata retrieval
rettigl Jan 20, 2025
3cab84e
Merge remote-tracking branch 'origin/v1_feature_branch' into mpes_ela…
rettigl Jan 20, 2025
9089964
add additional tests
rettigl Jan 26, 2025
d7e182f
always store config
rettigl Jan 26, 2025
b014c18
fix docs
rettigl Jan 26, 2025
bcefdfb
fix warning messages
rettigl Jan 27, 2025
74128c2
fix tests to not overwrite .env file
rettigl Jan 28, 2025
cf2abc6
allow coverage report for config module
rettigl Jan 28, 2025
79547d3
Merge remote-tracking branch 'origin/main' into mpes_elab_metadata
rettigl Feb 6, 2025
935dfeb
fix beam status
rettigl Feb 10, 2025
9bc2f8b
allow disabling copy tool
rettigl Feb 10, 2025
2289c9c
add pump2, and fix polarization and pump status conversion
rettigl Feb 11, 2025
4b51a5d
also remove source_pump if not applied
rettigl Feb 27, 2025
037dfb3
don't add field if not found in epics archiver
rettigl Mar 3, 2025
e0ec72b
Merge remote-tracking branch 'origin/main' into mpes_elab_metadata
rettigl Mar 24, 2025
4bec385
Merge branch 'mpes_elab_metadata' into fix_mpes_metadata_and_copy_tool
rettigl Mar 25, 2025
027c684
Merge pull request #569 from OpenCOMPES/fix_mpes_metadata_and_copy_tool
rettigl Mar 26, 2025
bb5f7f9
add compatibility to old pre-2020 datasets
rettigl Apr 3, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions .cspell/custom-dictionary.txt
Original file line number Diff line number Diff line change
Expand Up @@ -99,13 +99,17 @@ dtype
dtypes
easimon
ecalibdict
elab
elabapi
elabid
electronanalyser
Elektronen
endstation
energycal
energycalfolder
ENERGYDISPERSION
ENOSPC
entityid
equiscale
Eref
errorbar
Expand Down Expand Up @@ -266,6 +270,7 @@ ontop
OPCPA
openmp
OPTICALDELAY
orcid
otherax
packetcoders
Pandoc
Expand Down Expand Up @@ -340,6 +345,7 @@ sdir
segs
setp
sfile
sharelink
shutil
Sixten
sohail
Expand Down Expand Up @@ -385,6 +391,7 @@ ufunc
unbinned
uncategorised
undoc
userid
utime
varnames
venv
Expand Down
4 changes: 4 additions & 0 deletions docs/sed/loader.rst
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ MpesLoader
:members:
:undoc-members:

.. automodule:: sed.loader.mpes.metadata
:members:
:undoc-members:

FlashLoader
###################################################
.. automodule:: sed.loader.flash.loader
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ classifiers = [
dependencies = [
"bokeh>=2.4.2",
"dask>=2021.12.0,<2024.8",
"elabapi-python>=5.0",
"fastdtw>=0.3.4",
"h5py>=3.6.0",
"ipympl>=0.9.1",
Expand Down Expand Up @@ -95,8 +96,8 @@ all = [

[tool.coverage.report]
omit = [
"config.py",
"config-3.py",
"./config.py",
"./config-3.py",
]

[tool.ruff]
Expand Down
2 changes: 2 additions & 0 deletions src/sed/core/config_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ class CopyToolModel(BaseModel):

source: DirectoryPath
dest: DirectoryPath
use: Optional[bool] = None
safety_margin: Optional[float] = None
gid: Optional[int] = None
scheduler: Optional[str] = None
Expand Down Expand Up @@ -321,6 +322,7 @@ class OffsetColumn(BaseModel):
class MetadataModel(BaseModel):
model_config = ConfigDict(extra="forbid")

elab_url: Optional[HttpUrl] = None
archiver_url: Optional[HttpUrl] = None
epics_pvs: Optional[Sequence[str]] = None
fa_in_channel: Optional[str] = None
Expand Down
6 changes: 4 additions & 2 deletions src/sed/core/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,8 +38,8 @@
from sed.io import to_tiff
from sed.loader import CopyTool
from sed.loader import get_loader
from sed.loader.mpes.loader import get_archiver_data
from sed.loader.mpes.loader import MpesLoader
from sed.loader.mpes.metadata import get_archiver_data

N_CPU = psutil.cpu_count()

Expand Down Expand Up @@ -162,7 +162,9 @@ def __init__(
verbose=self._verbose,
)

self.use_copy_tool = "copy_tool" in self._config["core"]
self.use_copy_tool = "copy_tool" in self._config["core"] and self._config["core"][
"copy_tool"
].pop("use", True)
if self.use_copy_tool:
try:
self.ct = CopyTool(
Expand Down
186 changes: 24 additions & 162 deletions src/sed/loader/mpes/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,13 +8,9 @@
import datetime
import glob
import io
import json
import os
from collections.abc import Sequence
from typing import Any
from urllib.error import HTTPError
from urllib.error import URLError
from urllib.request import urlopen

import dask
import dask.array as da
Expand All @@ -27,6 +23,7 @@
from sed.core.logging import set_verbosity
from sed.core.logging import setup_logging
from sed.loader.base.loader import BaseLoader
from sed.loader.mpes.metadata import MetadataRetriever


# Configure logging
Expand Down Expand Up @@ -580,34 +577,6 @@ def get_elapsed_time(
return secs


def get_archiver_data(
archiver_url: str,
archiver_channel: str,
ts_from: float,
ts_to: float,
) -> tuple[np.ndarray, np.ndarray]:
"""Extract time stamps and corresponding data from and EPICS archiver instance

Args:
archiver_url (str): URL of the archiver data extraction interface
archiver_channel (str): EPICS channel to extract data for
ts_from (float): starting time stamp of the range of interest
ts_to (float): ending time stamp of the range of interest

Returns:
tuple[np.ndarray, np.ndarray]: The extracted time stamps and corresponding data
"""
iso_from = datetime.datetime.utcfromtimestamp(ts_from).isoformat()
iso_to = datetime.datetime.utcfromtimestamp(ts_to).isoformat()
req_str = archiver_url + archiver_channel + "&from=" + iso_from + "Z&to=" + iso_to + "Z"
with urlopen(req_str) as req:
data = json.load(req)
secs = [x["secs"] + x["nanos"] * 1e-9 for x in data[0]["data"]]
vals = [x["val"] for x in data[0]["data"]]

return (np.asarray(secs), np.asarray(vals))


class MpesLoader(BaseLoader):
"""Mpes implementation of the Loader. Reads from h5 files or folders of the
SPECS Metis 1000 (FHI Berlin)
Expand Down Expand Up @@ -729,6 +698,7 @@ def read_dataframe(
metadata=metadata,
)

token = kwds.pop("token", None)
channels = kwds.pop(
"channels",
self._config.get("dataframe", {}).get("channels", None),
Expand Down Expand Up @@ -777,6 +747,7 @@ def read_dataframe(
metadata = self.gather_metadata(
files=self.files,
metadata=self.metadata,
token=token,
)
else:
metadata = self.metadata
Expand Down Expand Up @@ -821,6 +792,14 @@ def get_files_from_run_id(
recursive=True,
),
)
# Compatibility for old scan format
if not run_files:
run_files = natsorted(
glob.glob(
folder + "/**/Scan" + str(run_id).zfill(3) + "_*." + extension,
recursive=True,
),
)
files.extend(run_files)

# Check if any files are found
Expand Down Expand Up @@ -877,13 +856,15 @@ def gather_metadata(
self,
files: Sequence[str],
metadata: dict = None,
token: str = None,
) -> dict:
"""Collect meta data from files

Args:
files (Sequence[str]): List of files loaded
metadata (dict, optional): Manual meta data dictionary. Auto-generated
meta data are added to it. Defaults to None.
token (str, optional):: The elabFTW api token to use for fetching metadata

Returns:
dict: The completed metadata dictionary.
Expand Down Expand Up @@ -921,140 +902,21 @@ def gather_metadata(
os.path.realpath(files[0]),
)

logger.info("Collecting data from the EPICS archive...")
# Get metadata from Epics archive if not present already
epics_channels = self._config["metadata"]["epics_pvs"]

start = datetime.datetime.utcfromtimestamp(ts_from)
metadata_retriever = MetadataRetriever(self._config["metadata"], token)

channels_missing = set(epics_channels) - set(
metadata["file"].keys(),
metadata = metadata_retriever.fetch_epics_metadata(
ts_from=ts_from,
ts_to=ts_to,
metadata=metadata,
)
for channel in channels_missing:
try:
_, vals = get_archiver_data(
archiver_url=str(self._config["metadata"].get("archiver_url")),
archiver_channel=channel,
ts_from=ts_from,
ts_to=ts_to,
)
metadata["file"][f"{channel}"] = np.mean(vals)

except IndexError:
metadata["file"][f"{channel}"] = np.nan
logger.info(
f"Data for channel {channel} doesn't exist for time {start}",
)
except HTTPError as exc:
logger.warning(
f"Incorrect URL for the archive channel {channel}. "
"Make sure that the channel name and file start and end times are "
"correct.",
)
logger.warning(f"Error code: {exc}")
except URLError as exc:
logger.warning(
f"Cannot access the archive URL for channel {channel}. "
f"Make sure that you are within the FHI network."
f"Skipping over channels {channels_missing}.",
)
logger.warning(f"Error code: {exc}")
break

# Determine the correct aperture_config
stamps = sorted(
list(self._config["metadata"]["aperture_config"].keys()) + [start],
)
current_index = stamps.index(start)
timestamp = stamps[current_index - 1] # pick last configuration before file date

# Aperture metadata
if "instrument" not in metadata.keys():
metadata["instrument"] = {"analyzer": {}}
metadata["instrument"]["analyzer"]["fa_shape"] = "circle"
metadata["instrument"]["analyzer"]["ca_shape"] = "circle"
metadata["instrument"]["analyzer"]["fa_size"] = np.nan
metadata["instrument"]["analyzer"]["ca_size"] = np.nan
# get field aperture shape and size
if {
self._config["metadata"]["fa_in_channel"],
self._config["metadata"]["fa_hor_channel"],
}.issubset(set(metadata["file"].keys())):
fa_in = metadata["file"][self._config["metadata"]["fa_in_channel"]]
fa_hor = metadata["file"][self._config["metadata"]["fa_hor_channel"]]
for key, value in self._config["metadata"]["aperture_config"][timestamp][
"fa_size"
].items():
if value[0][0] < fa_in < value[0][1] and value[1][0] < fa_hor < value[1][1]:
try:
k_float = float(key)
metadata["instrument"]["analyzer"]["fa_size"] = k_float
except ValueError: # store string if numeric interpretation fails
metadata["instrument"]["analyzer"]["fa_shape"] = key
break
else:
logger.warning("Field aperture size not found.")

# get contrast aperture shape and size
if self._config["metadata"]["ca_in_channel"] in metadata["file"]:
ca_in = metadata["file"][self._config["metadata"]["ca_in_channel"]]
for key, value in self._config["metadata"]["aperture_config"][timestamp][
"ca_size"
].items():
if value[0] < ca_in < value[1]:
try:
k_float = float(key)
metadata["instrument"]["analyzer"]["ca_size"] = k_float
except ValueError: # store string if numeric interpretation fails
metadata["instrument"]["analyzer"]["ca_shape"] = key
break
else:
logger.warning("Contrast aperture size not found.")

# Storing the lens modes corresponding to lens voltages.
# Use lens voltages present in first lens_mode entry.
lens_list = self._config["metadata"]["lens_mode_config"][
next(iter(self._config["metadata"]["lens_mode_config"]))
].keys()

lens_volts = np.array(
[metadata["file"].get(f"KTOF:Lens:{lens}:V", np.nan) for lens in lens_list],
)
for mode, value in self._config["metadata"]["lens_mode_config"].items():
lens_volts_config = np.array([value[k] for k in lens_list])
if np.allclose(
lens_volts,
lens_volts_config,
rtol=0.005,
): # Equal upto 0.5% tolerance
metadata["instrument"]["analyzer"]["lens_mode"] = mode
break
else:
logger.warning(
"Lens mode for given lens voltages not found. "
"Storing lens mode from the user, if provided.",
)

# Determining projection from the lens mode
try:
lens_mode = metadata["instrument"]["analyzer"]["lens_mode"]
if "spatial" in lens_mode.split("_")[1]:
metadata["instrument"]["analyzer"]["projection"] = "real"
metadata["instrument"]["analyzer"]["scheme"] = "momentum dispersive"
else:
metadata["instrument"]["analyzer"]["projection"] = "reciprocal"
metadata["instrument"]["analyzer"]["scheme"] = "spatial dispersive"
except IndexError:
logger.warning(
"Lens mode must have the form, '6kV_kmodem4.0_20VTOF_v3.sav'. "
"Can't determine projection. "
"Storing projection from the user, if provided.",
)
except KeyError:
logger.warning(
"Lens mode not found. Can't determine projection. "
"Storing projection from the user, if provided.",
if self.runs:
metadata = metadata_retriever.fetch_elab_metadata(
runs=self.runs,
metadata=metadata,
)
else:
logger.warning('Fetching elabFTW metadata only supported for loading from "runs"')

return metadata

Expand Down
Loading
Loading