From e41a2b1482267f185cb870f63f09277649f305f3 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 14 Jan 2025 12:05:22 +0100
Subject: [PATCH 01/26] split off metadata class, and first version if elabFTW
 metadata retriever

---
 .cspell/custom-dictionary.txt   |   3 +
 docs/sed/loader.rst             |   4 +
 pyproject.toml                  |   1 +
 src/sed/core/config_model.py    |   1 +
 src/sed/core/processor.py       |   2 +-
 src/sed/loader/mpes/loader.py   | 175 ++------------------
 src/sed/loader/mpes/metadata.py | 285 ++++++++++++++++++++++++++++++++
 7 files changed, 308 insertions(+), 163 deletions(-)
 create mode 100644 src/sed/loader/mpes/metadata.py

diff --git a/.cspell/custom-dictionary.txt b/.cspell/custom-dictionary.txt
index 264069be..60496f9b 100644
--- a/.cspell/custom-dictionary.txt
+++ b/.cspell/custom-dictionary.txt
@@ -97,6 +97,8 @@ dtype
 dtypes
 easimon
 ecalibdict
+elab
+elabapi
 electronanalyser
 Elektronen
 endstation
@@ -104,6 +106,7 @@ energycal
 energycalfolder
 ENERGYDISPERSION
 ENOSPC
+entityid
 equiscale
 Eref
 errorbar
diff --git a/docs/sed/loader.rst b/docs/sed/loader.rst
index 468f34e4..bf945f66 100644
--- a/docs/sed/loader.rst
+++ b/docs/sed/loader.rst
@@ -29,6 +29,10 @@ MpesLoader
    :members:
    :undoc-members:
 
+.. automodule:: sed.loader.flash.metadata
+   :members:
+   :undoc-members:
+
 FlashLoader
 ###################################################
 .. automodule:: sed.loader.flash.loader
diff --git a/pyproject.toml b/pyproject.toml
index 6a852d9d..2c0a4a1b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -23,6 +23,7 @@ requires-python = ">=3.9,<3.13"
 dependencies = [
     "bokeh>=2.4.2",
     "dask>=2021.12.0,<2024.8",
+    "elabapi-python>=5.0",
     "fastdtw>=0.3.4",
     "h5py>=3.6.0",
     "ipympl>=0.9.1",
diff --git a/src/sed/core/config_model.py b/src/sed/core/config_model.py
index 6379b639..41ab9caa 100644
--- a/src/sed/core/config_model.py
+++ b/src/sed/core/config_model.py
@@ -321,6 +321,7 @@ class OffsetColumn(BaseModel):
 class MetadataModel(BaseModel):
     model_config = ConfigDict(extra="forbid")
 
+    elab_url: Optional[HttpUrl] = None
     archiver_url: Optional[HttpUrl] = None
     epics_pvs: Optional[Sequence[str]] = None
     fa_in_channel: Optional[str] = None
diff --git a/src/sed/core/processor.py b/src/sed/core/processor.py
index 2345f206..2c87eaaf 100644
--- a/src/sed/core/processor.py
+++ b/src/sed/core/processor.py
@@ -38,8 +38,8 @@
 from sed.io import to_tiff
 from sed.loader import CopyTool
 from sed.loader import get_loader
-from sed.loader.mpes.loader import get_archiver_data
 from sed.loader.mpes.loader import MpesLoader
+from sed.loader.mpes.metadata import get_archiver_data
 
 N_CPU = psutil.cpu_count()
 
diff --git a/src/sed/loader/mpes/loader.py b/src/sed/loader/mpes/loader.py
index e3e75cda..794badc1 100644
--- a/src/sed/loader/mpes/loader.py
+++ b/src/sed/loader/mpes/loader.py
@@ -8,13 +8,9 @@
 import datetime
 import glob
 import io
-import json
 import os
 from collections.abc import Sequence
 from typing import Any
-from urllib.error import HTTPError
-from urllib.error import URLError
-from urllib.request import urlopen
 
 import dask
 import dask.array as da
@@ -27,6 +23,7 @@
 from sed.core.logging import set_verbosity
 from sed.core.logging import setup_logging
 from sed.loader.base.loader import BaseLoader
+from sed.loader.mpes.metadata import MetadataRetriever
 
 
 # Configure logging
@@ -580,34 +577,6 @@ def get_elapsed_time(
     return secs
 
 
-def get_archiver_data(
-    archiver_url: str,
-    archiver_channel: str,
-    ts_from: float,
-    ts_to: float,
-) -> tuple[np.ndarray, np.ndarray]:
-    """Extract time stamps and corresponding data from and EPICS archiver instance
-
-    Args:
-        archiver_url (str): URL of the archiver data extraction interface
-        archiver_channel (str): EPICS channel to extract data for
-        ts_from (float): starting time stamp of the range of interest
-        ts_to (float): ending time stamp of the range of interest
-
-    Returns:
-        tuple[np.ndarray, np.ndarray]: The extracted time stamps and corresponding data
-    """
-    iso_from = datetime.datetime.utcfromtimestamp(ts_from).isoformat()
-    iso_to = datetime.datetime.utcfromtimestamp(ts_to).isoformat()
-    req_str = archiver_url + archiver_channel + "&from=" + iso_from + "Z&to=" + iso_to + "Z"
-    with urlopen(req_str) as req:
-        data = json.load(req)
-        secs = [x["secs"] + x["nanos"] * 1e-9 for x in data[0]["data"]]
-        vals = [x["val"] for x in data[0]["data"]]
-
-    return (np.asarray(secs), np.asarray(vals))
-
-
 class MpesLoader(BaseLoader):
     """Mpes implementation of the Loader. Reads from h5 files or folders of the
     SPECS Metis 1000 (FHI Berlin)
@@ -729,6 +698,7 @@ def read_dataframe(
                 metadata=metadata,
             )
 
+        token = kwds.pop("token", None)
         channels = kwds.pop(
             "channels",
             self._config.get("dataframe", {}).get("channels", None),
@@ -777,6 +747,7 @@ def read_dataframe(
             metadata = self.gather_metadata(
                 files=self.files,
                 metadata=self.metadata,
+                token=token,
             )
         else:
             metadata = self.metadata
@@ -877,6 +848,7 @@ def gather_metadata(
         self,
         files: Sequence[str],
         metadata: dict = None,
+        token: str = None,
     ) -> dict:
         """Collect meta data from files
 
@@ -884,6 +856,7 @@ def gather_metadata(
             files (Sequence[str]): List of files loaded
             metadata (dict, optional): Manual meta data dictionary. Auto-generated
                 meta data are added to it. Defaults to None.
+            token (str, optional):: The elabFTW api token to use for fetching metadata
 
         Returns:
             dict: The completed metadata dictionary.
@@ -921,140 +894,18 @@ def gather_metadata(
             os.path.realpath(files[0]),
         )
 
-        logger.info("Collecting data from the EPICS archive...")
-        # Get metadata from Epics archive if not present already
-        epics_channels = self._config["metadata"]["epics_pvs"]
+        metadata_retriever = MetadataRetriever(self._config["metadata"], token)
 
-        start = datetime.datetime.utcfromtimestamp(ts_from)
-
-        channels_missing = set(epics_channels) - set(
-            metadata["file"].keys(),
+        metadata = metadata_retriever.fetch_epics_metadata(
+            ts_from=ts_from,
+            ts_to=ts_to,
+            metadata=metadata,
         )
-        for channel in channels_missing:
-            try:
-                _, vals = get_archiver_data(
-                    archiver_url=str(self._config["metadata"].get("archiver_url")),
-                    archiver_channel=channel,
-                    ts_from=ts_from,
-                    ts_to=ts_to,
-                )
-                metadata["file"][f"{channel}"] = np.mean(vals)
 
-            except IndexError:
-                metadata["file"][f"{channel}"] = np.nan
-                logger.info(
-                    f"Data for channel {channel} doesn't exist for time {start}",
-                )
-            except HTTPError as exc:
-                logger.warning(
-                    f"Incorrect URL for the archive channel {channel}. "
-                    "Make sure that the channel name and file start and end times are "
-                    "correct.",
-                )
-                logger.warning(f"Error code: {exc}")
-            except URLError as exc:
-                logger.warning(
-                    f"Cannot access the archive URL for channel {channel}. "
-                    f"Make sure that you are within the FHI network."
-                    f"Skipping over channels {channels_missing}.",
-                )
-                logger.warning(f"Error code: {exc}")
-                break
-
-        # Determine the correct aperture_config
-        stamps = sorted(
-            list(self._config["metadata"]["aperture_config"].keys()) + [start],
+        metadata = metadata_retriever.fetch_elab_metadata(
+            runs=self.runs,
+            metadata=metadata,
         )
-        current_index = stamps.index(start)
-        timestamp = stamps[current_index - 1]  # pick last configuration before file date
-
-        # Aperture metadata
-        if "instrument" not in metadata.keys():
-            metadata["instrument"] = {"analyzer": {}}
-        metadata["instrument"]["analyzer"]["fa_shape"] = "circle"
-        metadata["instrument"]["analyzer"]["ca_shape"] = "circle"
-        metadata["instrument"]["analyzer"]["fa_size"] = np.nan
-        metadata["instrument"]["analyzer"]["ca_size"] = np.nan
-        # get field aperture shape and size
-        if {
-            self._config["metadata"]["fa_in_channel"],
-            self._config["metadata"]["fa_hor_channel"],
-        }.issubset(set(metadata["file"].keys())):
-            fa_in = metadata["file"][self._config["metadata"]["fa_in_channel"]]
-            fa_hor = metadata["file"][self._config["metadata"]["fa_hor_channel"]]
-            for key, value in self._config["metadata"]["aperture_config"][timestamp][
-                "fa_size"
-            ].items():
-                if value[0][0] < fa_in < value[0][1] and value[1][0] < fa_hor < value[1][1]:
-                    try:
-                        k_float = float(key)
-                        metadata["instrument"]["analyzer"]["fa_size"] = k_float
-                    except ValueError:  # store string if numeric interpretation fails
-                        metadata["instrument"]["analyzer"]["fa_shape"] = key
-                    break
-            else:
-                logger.warning("Field aperture size not found.")
-
-        # get contrast aperture shape and size
-        if self._config["metadata"]["ca_in_channel"] in metadata["file"]:
-            ca_in = metadata["file"][self._config["metadata"]["ca_in_channel"]]
-            for key, value in self._config["metadata"]["aperture_config"][timestamp][
-                "ca_size"
-            ].items():
-                if value[0] < ca_in < value[1]:
-                    try:
-                        k_float = float(key)
-                        metadata["instrument"]["analyzer"]["ca_size"] = k_float
-                    except ValueError:  # store string if numeric interpretation fails
-                        metadata["instrument"]["analyzer"]["ca_shape"] = key
-                    break
-            else:
-                logger.warning("Contrast aperture size not found.")
-
-        # Storing the lens modes corresponding to lens voltages.
-        # Use lens voltages present in first lens_mode entry.
-        lens_list = self._config["metadata"]["lens_mode_config"][
-            next(iter(self._config["metadata"]["lens_mode_config"]))
-        ].keys()
-
-        lens_volts = np.array(
-            [metadata["file"].get(f"KTOF:Lens:{lens}:V", np.nan) for lens in lens_list],
-        )
-        for mode, value in self._config["metadata"]["lens_mode_config"].items():
-            lens_volts_config = np.array([value[k] for k in lens_list])
-            if np.allclose(
-                lens_volts,
-                lens_volts_config,
-                rtol=0.005,
-            ):  # Equal upto 0.5% tolerance
-                metadata["instrument"]["analyzer"]["lens_mode"] = mode
-                break
-        else:
-            logger.warning(
-                "Lens mode for given lens voltages not found. "
-                "Storing lens mode from the user, if provided.",
-            )
-
-        # Determining projection from the lens mode
-        try:
-            lens_mode = metadata["instrument"]["analyzer"]["lens_mode"]
-            if "spatial" in lens_mode.split("_")[1]:
-                metadata["instrument"]["analyzer"]["projection"] = "real"
-                metadata["instrument"]["analyzer"]["scheme"] = "momentum dispersive"
-            else:
-                metadata["instrument"]["analyzer"]["projection"] = "reciprocal"
-                metadata["instrument"]["analyzer"]["scheme"] = "spatial dispersive"
-        except IndexError:
-            logger.warning(
-                "Lens mode must have the form, '6kV_kmodem4.0_20VTOF_v3.sav'. "
-                "Can't determine projection. "
-                "Storing projection from the user, if provided.",
-            )
-        except KeyError:
-            logger.warning(
-                "Lens mode not found. Can't determine projection. "
-                "Storing projection from the user, if provided.",
-            )
 
         return metadata
 
diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
new file mode 100644
index 00000000..c678ce1f
--- /dev/null
+++ b/src/sed/loader/mpes/metadata.py
@@ -0,0 +1,285 @@
+"""
+The module provides a MetadataRetriever class for retrieving metadata
+from an EPICS archiver and an elabFTW instance.
+"""
+from __future__ import annotations
+
+import datetime
+import json
+from urllib.error import HTTPError
+from urllib.error import URLError
+from urllib.request import urlopen
+
+import elabapi_python
+import numpy as np
+
+from sed.core.config import read_env_var
+from sed.core.config import save_env_var
+from sed.core.logging import setup_logging
+
+logger = setup_logging("flash_metadata_retriever")
+
+
+class MetadataRetriever:
+    """
+    A class for retrieving metadata from an EPICS archiver and an elabFTW instance.
+    """
+
+    def __init__(self, metadata_config: dict, token: str = None) -> None:
+        """
+        Initializes the MetadataRetriever class.
+
+        Args:
+            metadata_config (dict): Takes a dict containing at least url for the EPICS archiver and
+                elabFTW instance.
+            token (str, optional): The token to use for fetching metadata. If provided,
+                will be saved to .env file for future use.
+        """
+        # Token handling
+        if token:
+            self.token = token
+            save_env_var("ELAB_TOKEN", self.token)
+        else:
+            # Try to load token from config or .env file
+            self.token = read_env_var("ELAB_TOKEN")
+
+        if not self.token:
+            raise ValueError(
+                "Token is required for metadata collection. Either provide a token "
+                "parameter or set the ELAB_TOKEN environment variable.",
+            )
+
+        self._config = metadata_config
+
+        self.url = str(metadata_config.get("elab_url"))
+        if not self.url:
+            raise ValueError("No URL provided for fetching metadata from elabFTW.")
+
+        # Config
+        self.configuration = elabapi_python.Configuration()
+        self.configuration.api_key["api_key"] = self.token
+        self.configuration.api_key_prefix["api_key"] = "Authorization"
+        self.configuration.host = self.url
+        self.configuration.debug = False
+        self.configuration.verify_ssl = False
+
+        # create an instance of the API class
+        self.api_client = elabapi_python.ApiClient(self.configuration)
+        # fix issue with Authorization header not being properly set by the generated lib
+        self.api_client.set_default_header(header_name="Authorization", header_value=self.token)
+
+        # create an instance of Items
+        self.itemsApi = elabapi_python.ItemsApi(self.api_client)
+        self.experimentsApi = elabapi_python.ExperimentsApi(self.api_client)
+        self.linksApi = elabapi_python.LinksToItemsApi(self.api_client)
+        self.experimentsLinksApi = elabapi_python.LinksToExperimentsApi(self.api_client)
+        self.usersApi = elabapi_python.UsersApi(self.api_client)
+
+    def fetch_epics_metadata(self, ts_from: float, ts_to: float, metadata: dict) -> dict:
+        """Fetch metadata from an EPICS archiver instance for times between ts_from and ts_to.
+        Channels are defined in the config.
+
+        Args:
+            ts_from (float): Start timestamp of the range to collect data from.
+            ts_to (float): End timestamp of the range to collect data from.
+            metadata (dict): Input metadata dictionary. Will be updated
+
+        Returns:
+            dict: Updated metadata dictionary.
+        """
+        logger.info("Collecting data from the EPICS archive...")
+
+        start = datetime.datetime.utcfromtimestamp(ts_from)
+
+        # Get metadata from Epics archive if not present already
+        epics_channels = self._config["epics_pvs"]
+
+        channels_missing = set(epics_channels) - set(
+            metadata["file"].keys(),
+        )
+        for channel in channels_missing:
+            try:
+                _, vals = get_archiver_data(
+                    archiver_url=str(self._config.get("archiver_url")),
+                    archiver_channel=channel,
+                    ts_from=ts_from,
+                    ts_to=ts_to,
+                )
+                metadata["file"][f"{channel}"] = np.mean(vals)
+
+            except IndexError:
+                metadata["file"][f"{channel}"] = np.nan
+                logger.info(
+                    f"Data for channel {channel} doesn't exist for time {start}",
+                )
+            except HTTPError as exc:
+                logger.warning(
+                    f"Incorrect URL for the archive channel {channel}. "
+                    "Make sure that the channel name and file start and end times are "
+                    "correct.",
+                )
+                logger.warning(f"Error code: {exc}")
+            except URLError as exc:
+                logger.warning(
+                    f"Cannot access the archive URL for channel {channel}. "
+                    f"Make sure that you are within the FHI network."
+                    f"Skipping over channels {channels_missing}.",
+                )
+                logger.warning(f"Error code: {exc}")
+                break
+
+        # Determine the correct aperture_config
+        stamps = sorted(
+            list(self._config["aperture_config"].keys()) + [start],
+        )
+        current_index = stamps.index(start)
+        timestamp = stamps[current_index - 1]  # pick last configuration before file date
+
+        # Aperture metadata
+        if "instrument" not in metadata.keys():
+            metadata["instrument"] = {"analyzer": {}}
+        metadata["instrument"]["analyzer"]["fa_shape"] = "circle"
+        metadata["instrument"]["analyzer"]["ca_shape"] = "circle"
+        metadata["instrument"]["analyzer"]["fa_size"] = np.nan
+        metadata["instrument"]["analyzer"]["ca_size"] = np.nan
+        # get field aperture shape and size
+        if {
+            self._config["fa_in_channel"],
+            self._config["fa_hor_channel"],
+        }.issubset(set(metadata["file"].keys())):
+            fa_in = metadata["file"][self._config["fa_in_channel"]]
+            fa_hor = metadata["file"][self._config["fa_hor_channel"]]
+            for key, value in self._config["aperture_config"][timestamp]["fa_size"].items():
+                if value[0][0] < fa_in < value[0][1] and value[1][0] < fa_hor < value[1][1]:
+                    try:
+                        metadata["instrument"]["analyzer"]["fa_size"] = float(key)
+                    except ValueError:  # store string if numeric interpretation fails
+                        metadata["instrument"]["analyzer"]["fa_shape"] = key
+                    break
+            else:
+                logger.warning("Field aperture size not found.")
+
+        # get contrast aperture shape and size
+        if self._config["ca_in_channel"] in metadata["file"]:
+            ca_in = metadata["file"][self._config["ca_in_channel"]]
+            for key, value in self._config["aperture_config"][timestamp]["ca_size"].items():
+                if value[0] < ca_in < value[1]:
+                    try:
+                        metadata["instrument"]["analyzer"]["ca_size"] = float(key)
+                    except ValueError:  # store string if numeric interpretation fails
+                        metadata["instrument"]["analyzer"]["ca_shape"] = key
+                    break
+            else:
+                logger.warning("Contrast aperture size not found.")
+
+        # Storing the lens modes corresponding to lens voltages.
+        # Use lens voltages present in first lens_mode entry.
+        lens_list = self._config["lens_mode_config"][
+            next(iter(self._config["lens_mode_config"]))
+        ].keys()
+
+        lens_volts = np.array(
+            [metadata["file"].get(f"KTOF:Lens:{lens}:V", np.nan) for lens in lens_list],
+        )
+        for mode, value in self._config["lens_mode_config"].items():
+            lens_volts_config = np.array([value[k] for k in lens_list])
+            if np.allclose(
+                lens_volts,
+                lens_volts_config,
+                rtol=0.005,
+            ):  # Equal upto 0.5% tolerance
+                metadata["instrument"]["analyzer"]["lens_mode"] = mode
+                break
+        else:
+            logger.warning(
+                "Lens mode for given lens voltages not found. "
+                "Storing lens mode from the user, if provided.",
+            )
+
+        # Determining projection from the lens mode
+        try:
+            lens_mode = metadata["instrument"]["analyzer"]["lens_mode"]
+            if "spatial" in lens_mode.split("_")[1]:
+                metadata["instrument"]["analyzer"]["projection"] = "real"
+                metadata["instrument"]["analyzer"]["scheme"] = "spatial dispersive"
+            else:
+                metadata["instrument"]["analyzer"]["projection"] = "reciprocal"
+                metadata["instrument"]["analyzer"]["scheme"] = "momentum dispersive"
+        except IndexError:
+            logger.warning(
+                "Lens mode must have the form, '6kV_kmodem4.0_20VTOF_v3.sav'. "
+                "Can't determine projection. "
+                "Storing projection from the user, if provided.",
+            )
+        except KeyError:
+            logger.warning(
+                "Lens mode not found. Can't determine projection. "
+                "Storing projection from the user, if provided.",
+            )
+
+        return metadata
+
+    def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
+        """Fetch metadata from an elabFTW instance
+
+        Args:
+            runs (list[str]): List of runs for which to fetch metadata
+            metadata (dict): Input metadata dictionary. Will be updated
+
+        Returns:
+            dict: Updated metadata dictionary
+        """
+        logger.info("Collecting data from the elabFTW instance...")
+        # Get the experiment
+        try:
+            experiment = self.experimentsApi.read_experiments(q=f"'Metis scan {runs[0]}'")[0]
+        except IndexError:
+            logger.warning(f"No elabFTW entry found for run {runs[0]}")
+            return metadata
+
+        # Scan metadata
+        metadata_json = json.loads(experiment.metadata)
+        for key, val in metadata_json["extra_fields"].items():
+            metadata[key] = val["value"]
+
+        exp_id = experiment.id
+        # Get the links to items
+        links = self.linksApi.read_entity_items_links(entity_type="experiments", id=exp_id)
+        # Get the items
+        items = [self.itemsApi.get_item(link.entityid) for link in links]
+        # Get the metadata
+        for item in items:
+            if item.metadata is not None:
+                metadata_json = json.loads(item.metadata)
+                for key, val in metadata_json["extra_fields"].items():
+                    metadata[key] = val["value"]
+
+        return metadata
+
+
+def get_archiver_data(
+    archiver_url: str,
+    archiver_channel: str,
+    ts_from: float,
+    ts_to: float,
+) -> tuple[np.ndarray, np.ndarray]:
+    """Extract time stamps and corresponding data from and EPICS archiver instance
+
+    Args:
+        archiver_url (str): URL of the archiver data extraction interface
+        archiver_channel (str): EPICS channel to extract data for
+        ts_from (float): starting time stamp of the range of interest
+        ts_to (float): ending time stamp of the range of interest
+
+    Returns:
+        tuple[np.ndarray, np.ndarray]: The extracted time stamps and corresponding data
+    """
+    iso_from = datetime.datetime.utcfromtimestamp(ts_from).isoformat()
+    iso_to = datetime.datetime.utcfromtimestamp(ts_to).isoformat()
+    req_str = archiver_url + archiver_channel + "&from=" + iso_from + "Z&to=" + iso_to + "Z"
+    with urlopen(req_str) as req:
+        data = json.load(req)
+        secs = [x["secs"] + x["nanos"] * 1e-9 for x in data[0]["data"]]
+        vals = [x["val"] for x in data[0]["data"]]
+
+    return (np.asarray(secs), np.asarray(vals))

From 3a32112fa317e3eae322ce5de37a7c7a5b69da40 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 14 Jan 2025 15:20:37 +0100
Subject: [PATCH 02/26] sort metadata

---
 .cspell/custom-dictionary.txt   |  2 ++
 src/sed/loader/mpes/metadata.py | 43 +++++++++++++++++++++++++++------
 2 files changed, 38 insertions(+), 7 deletions(-)

diff --git a/.cspell/custom-dictionary.txt b/.cspell/custom-dictionary.txt
index 60496f9b..3bff045c 100644
--- a/.cspell/custom-dictionary.txt
+++ b/.cspell/custom-dictionary.txt
@@ -99,6 +99,7 @@ easimon
 ecalibdict
 elab
 elabapi
+elabid
 electronanalyser
 Elektronen
 endstation
@@ -341,6 +342,7 @@ sdir
 segs
 setp
 sfile
+sharelink
 shutil
 Sixten
 sohail
diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index c678ce1f..199ee7ca 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -237,22 +237,51 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
             logger.warning(f"No elabFTW entry found for run {runs[0]}")
             return metadata
 
-        # Scan metadata
-        metadata_json = json.loads(experiment.metadata)
-        for key, val in metadata_json["extra_fields"].items():
-            metadata[key] = val["value"]
+        if "elabFTW" not in metadata:
+            metadata["elabFTW"] = {}
 
         exp_id = experiment.id
         # Get the links to items
         links = self.linksApi.read_entity_items_links(entity_type="experiments", id=exp_id)
         # Get the items
         items = [self.itemsApi.get_item(link.entityid) for link in links]
-        # Get the metadata
-        for item in items:
+        items_dict = {item.category_title: item for item in items}
+        items_dict["scan"] = experiment
+
+        # Sort the metadata
+        for category, item in items_dict.items():
+            if category not in metadata["elabFTW"]:
+                metadata["elabFTW"][category] = {}
+            metadata["elabFTW"][category]["title"] = item.title
+            metadata["elabFTW"][category]["summary"] = item.body
+            metadata["elabFTW"][category]["id"] = item.id
+            metadata["elabFTW"][category]["elabid"] = item.elabid
+            metadata["elabFTW"][category]["link"] = item.sharelink
             if item.metadata is not None:
                 metadata_json = json.loads(item.metadata)
                 for key, val in metadata_json["extra_fields"].items():
-                    metadata[key] = val["value"]
+                    if val is not None and val != "None":
+                        metadata["elabFTW"][category][key] = val["value"]
+
+        # group beam profiles:
+        if (
+            "Laser Status" in metadata["elabFTW"]
+            and "pump_profile_x" in metadata["elabFTW"]["Laser Status"]
+            and "pump_profile_y" in metadata["elabFTW"]["Laser Status"]
+        ):
+            metadata["elabFTW"]["Laser Status"]["pump_profile"] = [
+                float(metadata["elabFTW"]["Laser Status"]["pump_profile_x"]),
+                float(metadata["elabFTW"]["Laser Status"]["pump_profile_y"]),
+            ]
+        if (
+            "Laser Status" in metadata["elabFTW"]
+            and "probe_profile_x" in metadata["elabFTW"]["Laser Status"]
+            and "probe_profile_y" in metadata["elabFTW"]["Laser Status"]
+        ):
+            metadata["elabFTW"]["Laser Status"]["probe_profile"] = [
+                float(metadata["elabFTW"]["Laser Status"]["probe_profile_x"]),
+                float(metadata["elabFTW"]["Laser Status"]["probe_profile_y"]),
+            ]
 
         return metadata
 

From 862bc78b8d9e2a7b0495a4e19ea8b826c24fe480 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 14 Jan 2025 15:23:38 +0100
Subject: [PATCH 03/26] add user information

---
 .cspell/custom-dictionary.txt   |  2 ++
 src/sed/loader/mpes/metadata.py | 13 +++++++++++--
 2 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/.cspell/custom-dictionary.txt b/.cspell/custom-dictionary.txt
index 3bff045c..fb0b30c6 100644
--- a/.cspell/custom-dictionary.txt
+++ b/.cspell/custom-dictionary.txt
@@ -268,6 +268,7 @@ ontop
 OPCPA
 openmp
 OPTICALDELAY
+orcid
 otherax
 packetcoders
 Pandoc
@@ -388,6 +389,7 @@ ufunc
 unbinned
 uncategorised
 undoc
+userid
 utime
 varnames
 venv
diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index 199ee7ca..55abb361 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -241,6 +241,14 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
             metadata["elabFTW"] = {}
 
         exp_id = experiment.id
+        # Get user information
+        user = self.usersApi.read_user(experiment.userid)
+        metadata["elabFTW"]["user"] = {}
+        metadata["elabFTW"]["user"]["name"] = user.fullname
+        metadata["elabFTW"]["user"]["email"] = user.email
+        metadata["elabFTW"]["user"]["id"] = user.userid
+        if user.orcid:
+            metadata["elabFTW"]["user"]["orcid"] = user.orcid
         # Get the links to items
         links = self.linksApi.read_entity_items_links(entity_type="experiments", id=exp_id)
         # Get the items
@@ -256,11 +264,12 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
             metadata["elabFTW"][category]["summary"] = item.body
             metadata["elabFTW"][category]["id"] = item.id
             metadata["elabFTW"][category]["elabid"] = item.elabid
-            metadata["elabFTW"][category]["link"] = item.sharelink
+            if item.sharelink:
+                metadata["elabFTW"][category]["link"] = item.sharelink
             if item.metadata is not None:
                 metadata_json = json.loads(item.metadata)
                 for key, val in metadata_json["extra_fields"].items():
-                    if val is not None and val != "None":
+                    if val["value"] and val["value"] != ["None"]:
                         metadata["elabFTW"][category][key] = val["value"]
 
         # group beam profiles:

From d8cce46235631fb25ef47c4f6964424d69966ee5 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 14 Jan 2025 17:43:16 +0100
Subject: [PATCH 04/26] fixup metadata

---
 src/sed/loader/mpes/metadata.py | 62 ++++++++++++++++++++++++++-------
 1 file changed, 49 insertions(+), 13 deletions(-)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index 55abb361..371f579c 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -258,6 +258,7 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
 
         # Sort the metadata
         for category, item in items_dict.items():
+            category = category.replace(":", "").replace(" ", "_").lower()
             if category not in metadata["elabFTW"]:
                 metadata["elabFTW"][category] = {}
             metadata["elabFTW"][category]["title"] = item.title
@@ -270,28 +271,63 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
                 metadata_json = json.loads(item.metadata)
                 for key, val in metadata_json["extra_fields"].items():
                     if val["value"] and val["value"] != ["None"]:
-                        metadata["elabFTW"][category][key] = val["value"]
+                        try:
+                            metadata["elabFTW"][category][key] = float(val["value"])
+                        except ValueError:
+                            metadata["elabFTW"][category][key] = val["value"]
 
         # group beam profiles:
         if (
-            "Laser Status" in metadata["elabFTW"]
-            and "pump_profile_x" in metadata["elabFTW"]["Laser Status"]
-            and "pump_profile_y" in metadata["elabFTW"]["Laser Status"]
+            "laser_status" in metadata["elabFTW"]
+            and "pump_profile_x" in metadata["elabFTW"]["laser_status"]
+            and "pump_profile_y" in metadata["elabFTW"]["laser_status"]
         ):
-            metadata["elabFTW"]["Laser Status"]["pump_profile"] = [
-                float(metadata["elabFTW"]["Laser Status"]["pump_profile_x"]),
-                float(metadata["elabFTW"]["Laser Status"]["pump_profile_y"]),
+            metadata["elabFTW"]["laser_status"]["pump_profile"] = [
+                float(metadata["elabFTW"]["laser_status"]["pump_profile_x"]),
+                float(metadata["elabFTW"]["laser_status"]["pump_profile_y"]),
             ]
         if (
-            "Laser Status" in metadata["elabFTW"]
-            and "probe_profile_x" in metadata["elabFTW"]["Laser Status"]
-            and "probe_profile_y" in metadata["elabFTW"]["Laser Status"]
+            "laser_status" in metadata["elabFTW"]
+            and "probe_profile_x" in metadata["elabFTW"]["laser_status"]
+            and "probe_profile_y" in metadata["elabFTW"]["laser_status"]
         ):
-            metadata["elabFTW"]["Laser Status"]["probe_profile"] = [
-                float(metadata["elabFTW"]["Laser Status"]["probe_profile_x"]),
-                float(metadata["elabFTW"]["Laser Status"]["probe_profile_y"]),
+            metadata["elabFTW"]["laser_status"]["probe_profile"] = [
+                float(metadata["elabFTW"]["laser_status"]["probe_profile_x"]),
+                float(metadata["elabFTW"]["laser_status"]["probe_profile_y"]),
             ]
 
+        # fix preparation date
+        if "sample" in metadata["elabFTW"] and "preparation_date" in metadata["elabFTW"]["sample"]:
+            metadata["elabFTW"]["sample"]["preparation_date"] = (
+                datetime.datetime.strptime(
+                    metadata["elabFTW"]["sample"]["preparation_date"],
+                    "%Y-%m-%d",
+                )
+                .replace(tzinfo=datetime.timezone.utc)
+                .isoformat()
+            )
+
+        # fix polarizations
+        if (
+            "scan" in metadata["elabFTW"]
+            and "pump_polarization" in metadata["elabFTW"]["scan"]
+            and isinstance(metadata["elabFTW"]["scan"]["pump_polarization"], str)
+        ):
+            if metadata["elabFTW"]["scan"]["pump_polarization"] == "s":
+                metadata["elabFTW"]["scan"]["pump_polarization"] = 90
+            elif metadata["elabFTW"]["scan"]["pump_polarization"] == "p":
+                metadata["elabFTW"]["scan"]["pump_polarization"] = 0
+
+        if (
+            "scan" in metadata["elabFTW"]
+            and "probe_polarization" in metadata["elabFTW"]["scan"]
+            and isinstance(metadata["elabFTW"]["scan"]["probe_polarization"], str)
+        ):
+            if metadata["elabFTW"]["scan"]["probe_polarization"] == "s":
+                metadata["elabFTW"]["scan"]["probe_polarization"] = 90
+            elif metadata["elabFTW"]["scan"]["probe_polarization"] == "p":
+                metadata["elabFTW"]["scan"]["probe_polarization"] = 0
+
         return metadata
 
 

From 94166fca5f3362b0482ec03e67594641da0fb5b6 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Wed, 15 Jan 2025 00:45:01 +0100
Subject: [PATCH 05/26] fix tests

---
 src/sed/loader/mpes/metadata.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index 371f579c..e4869fd6 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -17,7 +17,7 @@
 from sed.core.config import save_env_var
 from sed.core.logging import setup_logging
 
-logger = setup_logging("flash_metadata_retriever")
+logger = setup_logging("mpes_metadata_retriever")
 
 
 class MetadataRetriever:
@@ -43,12 +43,6 @@ def __init__(self, metadata_config: dict, token: str = None) -> None:
             # Try to load token from config or .env file
             self.token = read_env_var("ELAB_TOKEN")
 
-        if not self.token:
-            raise ValueError(
-                "Token is required for metadata collection. Either provide a token "
-                "parameter or set the ELAB_TOKEN environment variable.",
-            )
-
         self._config = metadata_config
 
         self.url = str(metadata_config.get("elab_url"))
@@ -229,6 +223,12 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
         Returns:
             dict: Updated metadata dictionary
         """
+        if not self.token:
+            logger.warning(
+                "No valid token found. Token is required for metadata collection. Either provide "
+                "a token parameter or set the ELAB_TOKEN environment variable.",
+            )
+            return metadata
         logger.info("Collecting data from the elabFTW instance...")
         # Get the experiment
         try:
@@ -270,7 +270,7 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
             if item.metadata is not None:
                 metadata_json = json.loads(item.metadata)
                 for key, val in metadata_json["extra_fields"].items():
-                    if val["value"] and val["value"] != ["None"]:
+                    if val["value"] is not None and val["value"] != "" and val["value"] != ["None"]:
                         try:
                             metadata["elabFTW"][category][key] = float(val["value"])
                         except ValueError:

From 8186e9a41ff0b546c622500ceeb6c74ae940aaf5 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Thu, 16 Jan 2025 15:34:24 +0100
Subject: [PATCH 06/26] remove pump beam section if pump closed

---
 src/sed/loader/mpes/metadata.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index e4869fd6..f1562e3f 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -328,6 +328,11 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
             elif metadata["elabFTW"]["scan"]["probe_polarization"] == "p":
                 metadata["elabFTW"]["scan"]["probe_polarization"] = 0
 
+        # remove pump information if pump not applied:
+        if not metadata["elabFTW"]["scan"].get("pump_status", 0):
+            if "pump_photon_energy" in metadata["elabFTW"].get("laser_status", {}):
+                del metadata["elabFTW"]["laser_status"]["pump_photon_energy"]
+
         return metadata
 
 

From dffa6d11743090c305f30b2c77ee09f387653458 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Thu, 16 Jan 2025 16:22:14 +0100
Subject: [PATCH 07/26] use default config dir also for user config find .env
 files also in system config

---
 docs/user_guide/config.md |  2 +-
 src/sed/core/config.py    | 33 ++++++++++++++++++---------------
 2 files changed, 19 insertions(+), 16 deletions(-)

diff --git a/docs/user_guide/config.md b/docs/user_guide/config.md
index bfc6e4da..d7adbbb6 100644
--- a/docs/user_guide/config.md
+++ b/docs/user_guide/config.md
@@ -4,7 +4,7 @@ The config module contains a mechanism to collect configuration parameters from
 It will load an (optional) provided config file, or alternatively use a passed python dictionary as initial config dictionary, and subsequently look for the following additional config files to load:
 
 * ``folder_config``: A config file of name :file:`sed_config.yaml` in the current working directory. This is mostly intended to pass calibration parameters of the workflow between different notebook instances.
-* ``user_config``: A config file provided by the user, stored as :file:`.sed/config.yaml` in the current user's home directly. This is intended to give a user the option for individual configuration modifications of system settings.
+* ``user_config``: A config file provided by the user, stored as :file:`.config/sed/config.yaml` in the current user's home directly. This is intended to give a user the option for individual configuration modifications of system settings.
 * ``system_config``: A config file provided by the system administrator, stored as :file:`/etc/sed/config.yaml` on Linux-based systems, and :file:`%ALLUSERSPROFILE%/sed/config.yaml` on Windows. This should provide all necessary default parameters for using the sed processor with a given setup. For an example for an mpes setup, see :ref:`example_config`
 * ``default_config``: The default configuration shipped with the package. Typically, all parameters here should be overwritten by any of the other configuration files.
 
diff --git a/src/sed/core/config.py b/src/sed/core/config.py
index 686ee5b3..1133236f 100644
--- a/src/sed/core/config.py
+++ b/src/sed/core/config.py
@@ -19,6 +19,11 @@
 package_dir = os.path.dirname(find_spec("sed").origin)
 
 USER_CONFIG_PATH = user_config_path(appname="sed", appauthor="OpenCOMPES", ensure_exists=True)
+SYSTEM_CONFIG_PATH = (
+    Path(os.environ["ALLUSERSPROFILE"]).joinpath("sed")
+    if platform.system() == "Windows"
+    else Path("/etc/").joinpath("sed")
+)
 
 # Configure logging
 logger = setup_logging("config")
@@ -49,7 +54,7 @@ def parse_config(
         user_config (dict | str, optional): user-based config dictionary
             or file path. The loaded dictionary is completed with the user-based values,
             taking preference over system and default values.
-            Defaults to the file ".sed/config.yaml" in the current user's home directory.
+            Defaults to the file ".config/sed/config.yaml" in the current user's home directory.
         system_config (dict | str, optional): system-wide config dictionary
             or file path. The loaded dictionary is completed with the system-wide values,
             taking preference over default values. Defaults to the file "/etc/sed/config.yaml"
@@ -93,9 +98,7 @@ def parse_config(
         user_dict = copy.deepcopy(user_config)
     else:
         if user_config is None:
-            user_config = str(
-                Path.home().joinpath(".sed").joinpath("config.yaml"),
-            )
+            user_config = str(USER_CONFIG_PATH.joinpath("config.yaml"))
         if Path(user_config).exists():
             user_dict = load_config(user_config)
             if verbose:
@@ -106,14 +109,7 @@ def parse_config(
         system_dict = copy.deepcopy(system_config)
     else:
         if system_config is None:
-            if platform.system() in ["Linux", "Darwin"]:
-                system_config = str(
-                    Path("/etc/").joinpath("sed").joinpath("config.yaml"),
-                )
-            elif platform.system() == "Windows":
-                system_config = str(
-                    Path(os.environ["ALLUSERSPROFILE"]).joinpath("sed").joinpath("config.yaml"),
-                )
+            system_config = str(SYSTEM_CONFIG_PATH.joinpath("config.yaml"))
         if Path(system_config).exists():
             system_dict = load_config(system_config)
             if verbose:
@@ -281,6 +277,7 @@ def read_env_var(var_name: str) -> str | None:
     1. OS environment variables
     2. .env file in current directory
     3. .env file in user config directory
+    4. .env file in system config directory
 
     Args:
         var_name (str): Name of the environment variable to read
@@ -288,24 +285,30 @@ def read_env_var(var_name: str) -> str | None:
     Returns:
         str | None: Value of the environment variable or None if not found
     """
-    # First check OS environment variables
+    # 1. check OS environment variables
     value = os.getenv(var_name)
     if value is not None:
         logger.debug(f"Found {var_name} in OS environment variables")
         return value
 
-    # Then check .env in current directory
+    # 2. check .env in current directory
     local_vars = _parse_env_file(Path(".env"))
     if var_name in local_vars:
         logger.debug(f"Found {var_name} in ./.env file")
         return local_vars[var_name]
 
-    # Finally check .env in user config directory
+    # 3. check .env in user config directory
     user_vars = _parse_env_file(USER_CONFIG_PATH / ".env")
     if var_name in user_vars:
         logger.debug(f"Found {var_name} in user config .env file")
         return user_vars[var_name]
 
+    # 4. check .env in system config directory
+    system_vars = _parse_env_file(SYSTEM_CONFIG_PATH / ".env")
+    if var_name in system_vars:
+        logger.debug(f"Found {var_name} in system config .env file")
+        return system_vars[var_name]
+
     logger.debug(f"Environment variable {var_name} not found in any location")
     return None
 

From ac18011da6ebfd44a75e576438605bf149e36915 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Thu, 16 Jan 2025 16:42:33 +0100
Subject: [PATCH 08/26] change to config_v1.yaml

---
 docs/user_guide/config.md     |  4 ++--
 src/sed/core/config.py        | 10 +++++-----
 src/sed/loader/mpes/loader.py | 11 +++++++----
 3 files changed, 14 insertions(+), 11 deletions(-)

diff --git a/docs/user_guide/config.md b/docs/user_guide/config.md
index d7adbbb6..27c53d67 100644
--- a/docs/user_guide/config.md
+++ b/docs/user_guide/config.md
@@ -4,8 +4,8 @@ The config module contains a mechanism to collect configuration parameters from
 It will load an (optional) provided config file, or alternatively use a passed python dictionary as initial config dictionary, and subsequently look for the following additional config files to load:
 
 * ``folder_config``: A config file of name :file:`sed_config.yaml` in the current working directory. This is mostly intended to pass calibration parameters of the workflow between different notebook instances.
-* ``user_config``: A config file provided by the user, stored as :file:`.config/sed/config.yaml` in the current user's home directly. This is intended to give a user the option for individual configuration modifications of system settings.
-* ``system_config``: A config file provided by the system administrator, stored as :file:`/etc/sed/config.yaml` on Linux-based systems, and :file:`%ALLUSERSPROFILE%/sed/config.yaml` on Windows. This should provide all necessary default parameters for using the sed processor with a given setup. For an example for an mpes setup, see :ref:`example_config`
+* ``user_config``: A config file provided by the user, stored as :file:`.config/sed/config_v1.yaml` in the current user's home directly. This is intended to give a user the option for individual configuration modifications of system settings.
+* ``system_config``: A config file provided by the system administrator, stored as :file:`/etc/sed/config_v1.yaml` on Linux-based systems, and :file:`%ALLUSERSPROFILE%/sed/config_v1.yaml` on Windows. This should provide all necessary default parameters for using the sed processor with a given setup. For an example for an mpes setup, see :ref:`example_config`
 * ``default_config``: The default configuration shipped with the package. Typically, all parameters here should be overwritten by any of the other configuration files.
 
 The config mechanism returns the combined dictionary, and reports the loaded configuration files. In order to disable or overwrite any of the configuration files, they can be also given as optional parameters (path to a file, or python dictionary).
diff --git a/src/sed/core/config.py b/src/sed/core/config.py
index 1133236f..42f21153 100644
--- a/src/sed/core/config.py
+++ b/src/sed/core/config.py
@@ -54,11 +54,11 @@ def parse_config(
         user_config (dict | str, optional): user-based config dictionary
             or file path. The loaded dictionary is completed with the user-based values,
             taking preference over system and default values.
-            Defaults to the file ".config/sed/config.yaml" in the current user's home directory.
+            Defaults to the file ".config/sed/config_v1.yaml" in the current user's home directory.
         system_config (dict | str, optional): system-wide config dictionary
             or file path. The loaded dictionary is completed with the system-wide values,
-            taking preference over default values. Defaults to the file "/etc/sed/config.yaml"
-            on linux, and "%ALLUSERSPROFILE%/sed/config.yaml" on windows.
+            taking preference over default values. Defaults to the file "/etc/sed/config_v1.yaml"
+            on linux, and "%ALLUSERSPROFILE%/sed/config_v1.yaml" on windows.
         default_config (dict | str, optional): default config dictionary
             or file path. The loaded dictionary is completed with the default values.
             Defaults to *package_dir*/config/default.yaml".
@@ -98,7 +98,7 @@ def parse_config(
         user_dict = copy.deepcopy(user_config)
     else:
         if user_config is None:
-            user_config = str(USER_CONFIG_PATH.joinpath("config.yaml"))
+            user_config = str(USER_CONFIG_PATH.joinpath("config_v1.yaml"))
         if Path(user_config).exists():
             user_dict = load_config(user_config)
             if verbose:
@@ -109,7 +109,7 @@ def parse_config(
         system_dict = copy.deepcopy(system_config)
     else:
         if system_config is None:
-            system_config = str(SYSTEM_CONFIG_PATH.joinpath("config.yaml"))
+            system_config = str(SYSTEM_CONFIG_PATH.joinpath("config_v1.yaml"))
         if Path(system_config).exists():
             system_dict = load_config(system_config)
             if verbose:
diff --git a/src/sed/loader/mpes/loader.py b/src/sed/loader/mpes/loader.py
index 794badc1..9a7cb402 100644
--- a/src/sed/loader/mpes/loader.py
+++ b/src/sed/loader/mpes/loader.py
@@ -902,10 +902,13 @@ def gather_metadata(
             metadata=metadata,
         )
 
-        metadata = metadata_retriever.fetch_elab_metadata(
-            runs=self.runs,
-            metadata=metadata,
-        )
+        if self.runs:
+            metadata = metadata_retriever.fetch_elab_metadata(
+                runs=self.runs,
+                metadata=metadata,
+            )
+        else:
+            logger.warning('Fetching elabFTW metadata only supported for loading from "runs"')
 
         return metadata
 

From 5f002018bb8007bf7477e401e3d05583af8ed900 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Mon, 20 Jan 2025 23:58:55 +0100
Subject: [PATCH 09/26] add mock tests for metadata retrieval

---
 tests/loader/mpes/test_mpes_metadata.py | 112 ++++++++++++++++++++++++
 1 file changed, 112 insertions(+)
 create mode 100644 tests/loader/mpes/test_mpes_metadata.py

diff --git a/tests/loader/mpes/test_mpes_metadata.py b/tests/loader/mpes/test_mpes_metadata.py
new file mode 100644
index 00000000..b93f2a82
--- /dev/null
+++ b/tests/loader/mpes/test_mpes_metadata.py
@@ -0,0 +1,112 @@
+"""Tests specific for Mpes loader metadata retrieval"""
+from __future__ import annotations
+
+import datetime
+import json
+from unittest.mock import MagicMock
+from unittest.mock import patch
+
+import numpy as np
+import pytest
+
+from sed.loader.mpes.metadata import get_archiver_data
+from sed.loader.mpes.metadata import MetadataRetriever
+
+
+@pytest.fixture
+def metadata_config():
+    return {
+        "elab_url": "http://example.com",
+        "epics_pvs": ["channel1"],
+        "archiver_url": "http://archiver.example.com",
+        "aperture_config": {
+            datetime.datetime.fromisoformat("2023-01-01T00:00:00"): {
+                "fa_size": {"1.0": [(0, 1), (0, 1)]},
+                "ca_size": {"1.0": (0, 1)},
+            },
+        },
+        "lens_mode_config": {"mode1": {"lens1": 1.0, "lens2": 2.0}},
+        "fa_in_channel": "fa_in",
+        "fa_hor_channel": "fa_hor",
+        "ca_in_channel": "ca_in",
+    }
+
+
+@pytest.fixture
+def metadata_retriever(metadata_config):
+    return MetadataRetriever(metadata_config, "dummy_token")
+
+
+def test_metadata_retriever_init(metadata_retriever):
+    assert metadata_retriever.token == "dummy_token"
+    assert metadata_retriever.url == "http://example.com"
+
+
+@patch("sed.loader.mpes.metadata.urlopen")
+def test_get_archiver_data(mock_urlopen):
+    """Test get_archiver_data using a mock of urlopen."""
+    mock_response = MagicMock()
+    mock_response.read.return_value = json.dumps(
+        [{"data": [{"secs": 1, "nanos": 500000000, "val": 10}]}],
+    )
+    mock_urlopen.return_value.__enter__.return_value = mock_response
+
+    ts_from = datetime.datetime(2023, 1, 1).timestamp()
+    ts_to = datetime.datetime(2023, 1, 2).timestamp()
+    archiver_url = "http://archiver.example.com"
+    archiver_channel = "channel1"
+
+    secs, vals = get_archiver_data(archiver_url, archiver_channel, ts_from, ts_to)
+
+    assert np.array_equal(secs, np.array([1.5]))
+    assert np.array_equal(vals, np.array([10]))
+
+
+@patch("sed.loader.mpes.metadata.get_archiver_data")
+def test_fetch_epics_metadata(mock_get_archiver_data, metadata_retriever):
+    """Test fetch_epics_metadata using a mock of get_archiver_data."""
+    mock_get_archiver_data.return_value = (np.array([1.5]), np.array([10]))
+    metadata = {"file": {}}
+    ts_from = datetime.datetime(2023, 1, 1).timestamp()
+    ts_to = datetime.datetime(2023, 1, 2).timestamp()
+
+    updated_metadata = metadata_retriever.fetch_epics_metadata(ts_from, ts_to, metadata)
+
+    assert updated_metadata["file"]["channel1"] == 10
+
+
+@patch("sed.loader.mpes.metadata.elabapi_python")
+def test_fetch_elab_metadata(mock_elabapi_python, metadata_config):
+    """Test fetch_elab_metadata using a mock of elabapi_python."""
+    mock_experiment = MagicMock()
+    mock_experiment.id = 1
+    mock_experiment.userid = 1
+    mock_experiment.title = "Test Experiment"
+    mock_experiment.body = "Test Body"
+    mock_experiment.metadata = json.dumps({"extra_fields": {"key": {"value": "value"}}})
+    mock_elabapi_python.ExperimentsApi.return_value.read_experiments.return_value = [
+        mock_experiment,
+    ]
+    mock_user = MagicMock()
+    mock_user.fullname = "Test User"
+    mock_user.email = "test@example.com"
+    mock_user.userid = 1
+    mock_user.orcid = "0000-0000-0000-0000"
+    mock_elabapi_python.UsersApi.return_value.read_user.return_value = mock_user
+    mock_elabapi_python.LinksToItemsApi.return_value.read_entity_items_links.return_value = []
+
+    metadata_retriever = MetadataRetriever(metadata_config, "dummy_token")
+
+    metadata = {}
+    runs = ["run1"]
+
+    updated_metadata = metadata_retriever.fetch_elab_metadata(runs, metadata)
+
+    assert updated_metadata["elabFTW"]["user"]["name"] == "Test User"
+    assert updated_metadata["elabFTW"]["user"]["email"] == "test@example.com"
+    assert updated_metadata["elabFTW"]["user"]["id"] == 1
+    assert updated_metadata["elabFTW"]["user"]["orcid"] == "0000-0000-0000-0000"
+    assert updated_metadata["elabFTW"]["scan"]["title"] == "Test Experiment"
+    assert updated_metadata["elabFTW"]["scan"]["summary"] == "Test Body"
+    assert updated_metadata["elabFTW"]["scan"]["id"] == 1
+    assert updated_metadata["elabFTW"]["scan"]["key"] == "value"

From 9089964ddaea552b3a482d6a4b9ec931059796d0 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Sun, 26 Jan 2025 20:37:43 +0100
Subject: [PATCH 10/26] add additional tests

---
 src/sed/loader/mpes/metadata.py         | 31 ++++++++++--
 tests/loader/mpes/test_mpes_metadata.py | 67 +++++++++++++++++++++++++
 2 files changed, 95 insertions(+), 3 deletions(-)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index f1562e3f..64ca63bf 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -43,17 +43,27 @@ def __init__(self, metadata_config: dict, token: str = None) -> None:
             # Try to load token from config or .env file
             self.token = read_env_var("ELAB_TOKEN")
 
+        if not self.token:
+            logger.warning(
+                "No valid token provided for elabFTW." "Fetching elabFTW metadata will be skipped.",
+            )
+            return
+
         self._config = metadata_config
 
-        self.url = str(metadata_config.get("elab_url"))
+        self.url = metadata_config.get("elab_url")
         if not self.url:
-            raise ValueError("No URL provided for fetching metadata from elabFTW.")
+            logger.warning(
+                "No URL provided for fetching metadata from elabFTW."
+                "Fetching elabFTW metadata will be skipped.",
+            )
+            return
 
         # Config
         self.configuration = elabapi_python.Configuration()
         self.configuration.api_key["api_key"] = self.token
         self.configuration.api_key_prefix["api_key"] = "Authorization"
-        self.configuration.host = self.url
+        self.configuration.host = str(self.url)
         self.configuration.debug = False
         self.configuration.verify_ssl = False
 
@@ -81,6 +91,13 @@ def fetch_epics_metadata(self, ts_from: float, ts_to: float, metadata: dict) ->
         Returns:
             dict: Updated metadata dictionary.
         """
+        if not self._config.get("archiver_url"):
+            logger.warning(
+                "No URL provided for fetching metadata from the EPICS archiver. "
+                "Fetching EPICS metadata will be skipped.",
+            )
+            return metadata
+
         logger.info("Collecting data from the EPICS archive...")
 
         start = datetime.datetime.utcfromtimestamp(ts_from)
@@ -229,6 +246,14 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
                 "a token parameter or set the ELAB_TOKEN environment variable.",
             )
             return metadata
+
+        if not self.url:
+            logger.warning(
+                "No URL provided for fetching metadata from elabFTW. "
+                "Fetching elabFTW metadata will be skipped.",
+            )
+            return metadata
+
         logger.info("Collecting data from the elabFTW instance...")
         # Get the experiment
         try:
diff --git a/tests/loader/mpes/test_mpes_metadata.py b/tests/loader/mpes/test_mpes_metadata.py
index b93f2a82..f605959c 100644
--- a/tests/loader/mpes/test_mpes_metadata.py
+++ b/tests/loader/mpes/test_mpes_metadata.py
@@ -42,6 +42,30 @@ def test_metadata_retriever_init(metadata_retriever):
     assert metadata_retriever.url == "http://example.com"
 
 
+def test_metadata_retriever_no_token(metadata_config, tmp_path, monkeypatch):
+    monkeypatch.setattr("sed.core.config.ENV_DIR", tmp_path / ".env")
+    monkeypatch.setattr("sed.core.config.SYSTEM_CONFIG_PATH", tmp_path)
+    monkeypatch.setattr("sed.core.config.USER_CONFIG_PATH", tmp_path)
+    retriever = MetadataRetriever(metadata_config)
+    assert retriever.token is None
+
+    metadata = {}
+    runs = ["run1"]
+    updated_metadata = retriever.fetch_elab_metadata(runs, metadata)
+    assert updated_metadata == metadata
+
+
+def test_metadata_retriever_no_url(metadata_config):
+    metadata_config.pop("elab_url")
+    retriever = MetadataRetriever(metadata_config, "dummy_token")
+    assert retriever.url is None
+
+    metadata = {}
+    runs = ["run1"]
+    updated_metadata = retriever.fetch_elab_metadata(runs, metadata)
+    assert updated_metadata == metadata
+
+
 @patch("sed.loader.mpes.metadata.urlopen")
 def test_get_archiver_data(mock_urlopen):
     """Test get_archiver_data using a mock of urlopen."""
@@ -75,6 +99,49 @@ def test_fetch_epics_metadata(mock_get_archiver_data, metadata_retriever):
     assert updated_metadata["file"]["channel1"] == 10
 
 
+@patch("sed.loader.mpes.metadata.get_archiver_data")
+def test_fetch_epics_metadata_missing_channels(mock_get_archiver_data, metadata_retriever):
+    """Test fetch_epics_metadata with missing EPICS channels."""
+    mock_get_archiver_data.return_value = (np.array([1.5]), np.array([10]))
+    metadata = {"file": {"channel1": 10}}
+    ts_from = datetime.datetime(2023, 1, 1).timestamp()
+    ts_to = datetime.datetime(2023, 1, 2).timestamp()
+
+    updated_metadata = metadata_retriever.fetch_epics_metadata(ts_from, ts_to, metadata)
+
+    assert "channel1" in updated_metadata["file"]
+
+
+@patch("sed.loader.mpes.metadata.get_archiver_data")
+def test_fetch_epics_metadata_missing_aperture_config(mock_get_archiver_data, metadata_retriever):
+    """Test fetch_epics_metadata with missing aperture configuration."""
+    mock_get_archiver_data.return_value = (np.array([1.5]), np.array([10]))
+    metadata = {"file": {}}
+    ts_from = datetime.datetime(2023, 1, 1).timestamp()
+    ts_to = datetime.datetime(2023, 1, 2).timestamp()
+    metadata_retriever._config["aperture_config"] = {}
+
+    updated_metadata = metadata_retriever.fetch_epics_metadata(ts_from, ts_to, metadata)
+
+    assert "instrument" in updated_metadata
+
+
+@patch("sed.loader.mpes.metadata.get_archiver_data")
+def test_fetch_epics_metadata_missing_field_aperture(mock_get_archiver_data, metadata_retriever):
+    """Test fetch_epics_metadata with missing field aperture shape and size."""
+    mock_get_archiver_data.return_value = (np.array([1.5]), np.array([10]))
+    metadata = {"file": {}}
+    ts_from = datetime.datetime(2023, 1, 1).timestamp()
+    ts_to = datetime.datetime(2023, 1, 2).timestamp()
+
+    updated_metadata = metadata_retriever.fetch_epics_metadata(ts_from, ts_to, metadata)
+
+    assert updated_metadata["instrument"]["analyzer"]["fa_shape"] == "circle"
+    assert updated_metadata["instrument"]["analyzer"]["ca_shape"] == "circle"
+    assert np.isnan(updated_metadata["instrument"]["analyzer"]["fa_size"])
+    assert np.isnan(updated_metadata["instrument"]["analyzer"]["ca_size"])
+
+
 @patch("sed.loader.mpes.metadata.elabapi_python")
 def test_fetch_elab_metadata(mock_elabapi_python, metadata_config):
     """Test fetch_elab_metadata using a mock of elabapi_python."""

From d7e182fb937724bd3b056c4821e538df84de20f1 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Sun, 26 Jan 2025 21:16:01 +0100
Subject: [PATCH 11/26] always store config

---
 src/sed/loader/mpes/metadata.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index 64ca63bf..fe1112cf 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -6,6 +6,7 @@
 
 import datetime
 import json
+from copy import deepcopy
 from urllib.error import HTTPError
 from urllib.error import URLError
 from urllib.request import urlopen
@@ -35,6 +36,7 @@ def __init__(self, metadata_config: dict, token: str = None) -> None:
             token (str, optional): The token to use for fetching metadata. If provided,
                 will be saved to .env file for future use.
         """
+        self._config = deepcopy(metadata_config)
         # Token handling
         if token:
             self.token = token
@@ -49,9 +51,7 @@ def __init__(self, metadata_config: dict, token: str = None) -> None:
             )
             return
 
-        self._config = metadata_config
-
-        self.url = metadata_config.get("elab_url")
+        self.url = self._config.get("elab_url")
         if not self.url:
             logger.warning(
                 "No URL provided for fetching metadata from elabFTW."

From b014c18ae9fbc32a69a6ba89d2181ba43f9e3cd3 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Sun, 26 Jan 2025 21:44:45 +0100
Subject: [PATCH 12/26] fix docs

---
 docs/sed/loader.rst | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/sed/loader.rst b/docs/sed/loader.rst
index bf945f66..b3d1f48a 100644
--- a/docs/sed/loader.rst
+++ b/docs/sed/loader.rst
@@ -29,7 +29,7 @@ MpesLoader
    :members:
    :undoc-members:
 
-.. automodule:: sed.loader.flash.metadata
+.. automodule:: sed.loader.mpes.metadata
    :members:
    :undoc-members:
 

From bcefdfb202f1c4c969351cc03ee56a3f08168b02 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Mon, 27 Jan 2025 15:24:16 +0100
Subject: [PATCH 13/26] fix warning messages

---
 src/sed/loader/mpes/metadata.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index fe1112cf..a7b3463b 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -47,15 +47,14 @@ def __init__(self, metadata_config: dict, token: str = None) -> None:
 
         if not self.token:
             logger.warning(
-                "No valid token provided for elabFTW." "Fetching elabFTW metadata will be skipped.",
+                "No valid token provided for elabFTW. Fetching elabFTW metadata will be skipped.",
             )
             return
 
         self.url = self._config.get("elab_url")
         if not self.url:
             logger.warning(
-                "No URL provided for fetching metadata from elabFTW."
-                "Fetching elabFTW metadata will be skipped.",
+                "No URL provided for elabFTW. Fetching elabFTW metadata will be skipped.",
             )
             return
 

From 74128c2c657541a3b5e59a699eca40cf13ebf2c6 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 28 Jan 2025 15:45:46 +0100
Subject: [PATCH 14/26] fix tests to not overwrite .env file

---
 tests/loader/mpes/test_mpes_metadata.py | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/tests/loader/mpes/test_mpes_metadata.py b/tests/loader/mpes/test_mpes_metadata.py
index f605959c..83003e40 100644
--- a/tests/loader/mpes/test_mpes_metadata.py
+++ b/tests/loader/mpes/test_mpes_metadata.py
@@ -11,6 +11,7 @@
 
 from sed.loader.mpes.metadata import get_archiver_data
 from sed.loader.mpes.metadata import MetadataRetriever
+from tests.test_config import mock_env_file  # noqa: F401
 
 
 @pytest.fixture
@@ -33,7 +34,7 @@ def metadata_config():
 
 
 @pytest.fixture
-def metadata_retriever(metadata_config):
+def metadata_retriever(metadata_config, mock_env_file):  # noqa: ARG001
     return MetadataRetriever(metadata_config, "dummy_token")
 
 
@@ -43,9 +44,9 @@ def test_metadata_retriever_init(metadata_retriever):
 
 
 def test_metadata_retriever_no_token(metadata_config, tmp_path, monkeypatch):
-    monkeypatch.setattr("sed.core.config.ENV_DIR", tmp_path / ".env")
-    monkeypatch.setattr("sed.core.config.SYSTEM_CONFIG_PATH", tmp_path)
-    monkeypatch.setattr("sed.core.config.USER_CONFIG_PATH", tmp_path)
+    monkeypatch.setattr("sed.core.config.ENV_DIR", tmp_path / ".dummy_env")
+    monkeypatch.setattr("sed.core.config.SYSTEM_CONFIG_PATH", tmp_path / "system")
+    monkeypatch.setattr("sed.core.config.USER_CONFIG_PATH", tmp_path / "user")
     retriever = MetadataRetriever(metadata_config)
     assert retriever.token is None
 
@@ -55,7 +56,7 @@ def test_metadata_retriever_no_token(metadata_config, tmp_path, monkeypatch):
     assert updated_metadata == metadata
 
 
-def test_metadata_retriever_no_url(metadata_config):
+def test_metadata_retriever_no_url(metadata_config, mock_env_file):  # noqa: ARG001
     metadata_config.pop("elab_url")
     retriever = MetadataRetriever(metadata_config, "dummy_token")
     assert retriever.url is None
@@ -143,7 +144,7 @@ def test_fetch_epics_metadata_missing_field_aperture(mock_get_archiver_data, met
 
 
 @patch("sed.loader.mpes.metadata.elabapi_python")
-def test_fetch_elab_metadata(mock_elabapi_python, metadata_config):
+def test_fetch_elab_metadata(mock_elabapi_python, metadata_config, mock_env_file):  # noqa: ARG001
     """Test fetch_elab_metadata using a mock of elabapi_python."""
     mock_experiment = MagicMock()
     mock_experiment.id = 1

From cf2abc65e8ec75183ff4edd0245f22085961ba6a Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 28 Jan 2025 16:43:42 +0100
Subject: [PATCH 15/26] allow coverage report for config module

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 827672e7..6d8cb31e 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -93,8 +93,8 @@ notebook = [
 
 [tool.coverage.report]
 omit = [
-    "config.py",
-    "config-3.py",
+    "./config.py",
+    "./config-3.py",
 ]
 
 [tool.ruff]

From 935dfebe1ce1620727929b55838ef4f3bf677b7a Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Mon, 10 Feb 2025 23:40:34 +0100
Subject: [PATCH 16/26] fix beam status

---
 src/sed/loader/mpes/metadata.py | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index a7b3463b..fb5e023b 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -352,6 +352,22 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
             elif metadata["elabFTW"]["scan"]["probe_polarization"] == "p":
                 metadata["elabFTW"]["scan"]["probe_polarization"] = 0
 
+        # fix pump status
+        if "scan" in metadata["elabFTW"] and "pump_status" in metadata["elabFTW"]["scan"]:
+            try:
+                metadata["elabFTW"]["scan"]["pump_status"] = (
+                    "opened" if int(metadata["elabFTW"]["scan"]["pump_status"]) else "closed"
+                )
+            except ValueError:
+                pass
+        if "scan" in metadata["elabFTW"] and "pump2_status" in metadata["elabFTW"]["scan"]:
+            try:
+                metadata["elabFTW"]["scan"]["pump2_status"] = (
+                    "opened" if int(metadata["elabFTW"]["scan"]["pump2_status"]) else "closed"
+                )
+            except ValueError:
+                pass
+
         # remove pump information if pump not applied:
         if not metadata["elabFTW"]["scan"].get("pump_status", 0):
             if "pump_photon_energy" in metadata["elabFTW"].get("laser_status", {}):

From 9bc2f8ba580ab32c83d3155a756adc6cd85f10d9 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Mon, 10 Feb 2025 23:41:19 +0100
Subject: [PATCH 17/26] allow disabling copy tool

---
 src/sed/core/config_model.py | 1 +
 src/sed/core/processor.py    | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/src/sed/core/config_model.py b/src/sed/core/config_model.py
index 41ab9caa..bca9f959 100644
--- a/src/sed/core/config_model.py
+++ b/src/sed/core/config_model.py
@@ -33,6 +33,7 @@ class CopyToolModel(BaseModel):
 
     source: DirectoryPath
     dest: DirectoryPath
+    use: Optional[bool] = None
     safety_margin: Optional[float] = None
     gid: Optional[int] = None
     scheduler: Optional[str] = None
diff --git a/src/sed/core/processor.py b/src/sed/core/processor.py
index 2c87eaaf..1610cb72 100644
--- a/src/sed/core/processor.py
+++ b/src/sed/core/processor.py
@@ -162,7 +162,9 @@ def __init__(
             verbose=self._verbose,
         )
 
-        self.use_copy_tool = "copy_tool" in self._config["core"]
+        self.use_copy_tool = "copy_tool" in self._config["core"] and self._config["core"][
+            "copy_tool"
+        ].pop("use", True)
         if self.use_copy_tool:
             try:
                 self.ct = CopyTool(

From 2289c9c5370776abd4552776cb58c3685c3a57ad Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 11 Feb 2025 12:10:02 +0100
Subject: [PATCH 18/26] add pump2, and fix polarization and pump status
 conversion

---
 src/sed/loader/mpes/metadata.py | 41 ++++++++++++++++++++++++++++++---
 1 file changed, 38 insertions(+), 3 deletions(-)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index fb5e023b..3983b35b 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -341,6 +341,13 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
                 metadata["elabFTW"]["scan"]["pump_polarization"] = 90
             elif metadata["elabFTW"]["scan"]["pump_polarization"] == "p":
                 metadata["elabFTW"]["scan"]["pump_polarization"] = 0
+            else:
+                try:
+                    metadata["elabFTW"]["scan"]["pump_polarization"] = float(
+                        metadata["elabFTW"]["scan"]["pump_polarization"],
+                    )
+                except ValueError:
+                    pass
 
         if (
             "scan" in metadata["elabFTW"]
@@ -351,28 +358,56 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
                 metadata["elabFTW"]["scan"]["probe_polarization"] = 90
             elif metadata["elabFTW"]["scan"]["probe_polarization"] == "p":
                 metadata["elabFTW"]["scan"]["probe_polarization"] = 0
+            else:
+                try:
+                    metadata["elabFTW"]["scan"]["probe_polarization"] = float(
+                        metadata["elabFTW"]["scan"]["probe_polarization"],
+                    )
+                except ValueError:
+                    pass
+
+        if (
+            "scan" in metadata["elabFTW"]
+            and "pump2_polarization" in metadata["elabFTW"]["scan"]
+            and isinstance(metadata["elabFTW"]["scan"]["pump2_polarization"], str)
+        ):
+            if metadata["elabFTW"]["scan"]["pump2_polarization"] == "s":
+                metadata["elabFTW"]["scan"]["pump2_polarization"] = 90
+            elif metadata["elabFTW"]["scan"]["pump2_polarization"] == "p":
+                metadata["elabFTW"]["scan"]["pump2_polarization"] = 0
+            else:
+                try:
+                    metadata["elabFTW"]["scan"]["pump2_polarization"] = float(
+                        metadata["elabFTW"]["scan"]["pump2_polarization"],
+                    )
+                except ValueError:
+                    pass
 
         # fix pump status
         if "scan" in metadata["elabFTW"] and "pump_status" in metadata["elabFTW"]["scan"]:
             try:
                 metadata["elabFTW"]["scan"]["pump_status"] = (
-                    "opened" if int(metadata["elabFTW"]["scan"]["pump_status"]) else "closed"
+                    "open" if int(metadata["elabFTW"]["scan"]["pump_status"]) else "closed"
                 )
             except ValueError:
                 pass
         if "scan" in metadata["elabFTW"] and "pump2_status" in metadata["elabFTW"]["scan"]:
             try:
                 metadata["elabFTW"]["scan"]["pump2_status"] = (
-                    "opened" if int(metadata["elabFTW"]["scan"]["pump2_status"]) else "closed"
+                    "open" if int(metadata["elabFTW"]["scan"]["pump2_status"]) else "closed"
                 )
             except ValueError:
                 pass
 
         # remove pump information if pump not applied:
-        if not metadata["elabFTW"]["scan"].get("pump_status", 0):
+        if metadata["elabFTW"]["scan"].get("pump_status", "closed") == "closed":
             if "pump_photon_energy" in metadata["elabFTW"].get("laser_status", {}):
                 del metadata["elabFTW"]["laser_status"]["pump_photon_energy"]
 
+        if metadata["elabFTW"]["scan"].get("pump2_status", "closed") == "closed":
+            if "pump2_photon_energy" in metadata["elabFTW"].get("laser_status", {}):
+                del metadata["elabFTW"]["laser_status"]["pump2_photon_energy"]
+
         return metadata
 
 

From 4b51a5d89a2fad66d1abd8ff29094e9c53927859 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Thu, 27 Feb 2025 23:06:01 +0100
Subject: [PATCH 19/26] also remove source_pump if not applied

---
 src/sed/loader/mpes/metadata.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index a7b3463b..949007e4 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -356,6 +356,8 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
         if not metadata["elabFTW"]["scan"].get("pump_status", 0):
             if "pump_photon_energy" in metadata["elabFTW"].get("laser_status", {}):
                 del metadata["elabFTW"]["laser_status"]["pump_photon_energy"]
+            if "pump_repetition_rate" in metadata["elabFTW"].get("laser_status", {}):
+                del metadata["elabFTW"]["laser_status"]["pump_repetition_rate"]
 
         return metadata
 

From 037dfb3555d221e39cd61aa22aab5269284aa49c Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Mon, 3 Mar 2025 22:42:35 +0100
Subject: [PATCH 20/26] don't add field if not found in epics archiver

---
 src/sed/loader/mpes/metadata.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index 949007e4..712fdcbd 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -118,7 +118,6 @@ def fetch_epics_metadata(self, ts_from: float, ts_to: float, metadata: dict) ->
                 metadata["file"][f"{channel}"] = np.mean(vals)
 
             except IndexError:
-                metadata["file"][f"{channel}"] = np.nan
                 logger.info(
                     f"Data for channel {channel} doesn't exist for time {start}",
                 )

From eb67d39b8f42bbc93c8f9084081fe7d9eaa7c8f1 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 25 Mar 2025 01:31:01 +0100
Subject: [PATCH 21/26] replace for loop with numpy indexing

---
 src/sed/loader/mpes/loader.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/sed/loader/mpes/loader.py b/src/sed/loader/mpes/loader.py
index e3e75cda..900b299e 100644
--- a/src/sed/loader/mpes/loader.py
+++ b/src/sed/loader/mpes/loader.py
@@ -224,7 +224,6 @@ def hdf5_to_timed_dataframe(
 
     electron_channels = []
     column_names = []
-
     for name, channel in channels.items():
         if channel["format"] == "per_electron":
             if channel["dataset_key"] in test_proc:
@@ -468,16 +467,13 @@ def hdf5_to_timed_array(
     # Delayed array for loading an HDF5 file of reasonable size (e.g. < 1GB)
 
     h5file = load_h5_in_memory(h5filename)
-
     # Read out groups:
     data_list = []
     ms_marker = np.asarray(h5file[ms_markers_key])
     for channel in channels:
-        timed_dataset = np.zeros_like(ms_marker)
         if channel["format"] == "per_electron":
             g_dataset = np.asarray(h5file[channel["dataset_key"]])
-            for i, point in enumerate(ms_marker):
-                timed_dataset[i] = g_dataset[int(point) - 1]
+            timed_dataset = g_dataset[np.maximum(ms_marker - 1, 0)]
         else:
             raise ValueError(
                 f"Invalid 'format':{channel['format']} for channel {channel['dataset_key']}.",

From 85cdddea709543a60390732816664e93b4698fd6 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Mon, 24 Mar 2025 11:45:44 +0100
Subject: [PATCH 22/26] update pyntools and definitions

---
 .cspell/custom-dictionary.txt     |   3 +-
 pyproject.toml                    |   2 +-
 src/sed/calibrator/momentum.py    |   4 +-
 src/sed/config/NXmpes_config.json | 134 +++++++++++++++---------------
 src/sed/loader/mpes/metadata.py   |   9 ++
 tests/test_processor.py           |  10 ++-
 6 files changed, 92 insertions(+), 70 deletions(-)
 mode change 100755 => 100644 src/sed/config/NXmpes_config.json

diff --git a/.cspell/custom-dictionary.txt b/.cspell/custom-dictionary.txt
index 92f668fd..7e3d25e3 100644
--- a/.cspell/custom-dictionary.txt
+++ b/.cspell/custom-dictionary.txt
@@ -102,7 +102,7 @@ ecalibdict
 elab
 elabapi
 elabid
-electronanalyser
+electronanalyzer
 Elektronen
 endstation
 energycal
@@ -379,6 +379,7 @@ toctree
 tofseg
 tqdm
 traceseg
+trarpes
 trseg
 Tsec
 txtsize
diff --git a/pyproject.toml b/pyproject.toml
index 8be214d5..ef82aef5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -45,7 +45,7 @@ dependencies = [
     "photutils<2.0",
     "psutil>=5.9.0",
     "pynxtools-mpes>=0.2.0",
-    "pynxtools>=0.9.0",
+    "pynxtools>=0.10.0",
     "pyyaml>=6.0.0",
     "scipy>=1.8.0",
     "symmetrize>=0.5.5",
diff --git a/src/sed/calibrator/momentum.py b/src/sed/calibrator/momentum.py
index 391270b0..fdb572b3 100644
--- a/src/sed/calibrator/momentum.py
+++ b/src/sed/calibrator/momentum.py
@@ -2004,9 +2004,9 @@ def gather_calibration_metadata(self, calibration: dict = None) -> dict:
         metadata["calibration"] = calibration
         # create empty calibrated axis entries, if they are not present.
         if "kx_axis" not in metadata["calibration"]:
-            metadata["calibration"]["kx_axis"] = 0
+            metadata["calibration"]["kx_axis"] = 0.0
         if "ky_axis" not in metadata["calibration"]:
-            metadata["calibration"]["ky_axis"] = 0
+            metadata["calibration"]["ky_axis"] = 0.0
 
         return metadata
 
diff --git a/src/sed/config/NXmpes_config.json b/src/sed/config/NXmpes_config.json
old mode 100755
new mode 100644
index 62110337..996be008
--- a/src/sed/config/NXmpes_config.json
+++ b/src/sed/config/NXmpes_config.json
@@ -1,43 +1,41 @@
 {
   "/@default": "entry",
   "/ENTRY/@default": "data",
-  "/ENTRY/title": "['@eln:/ENTRY/title', '@attrs:metadata/entry_title']",
+  "/ENTRY/title": "['@eln:/ENTRY/title', '@attrs:metadata/entry_title', '@attrs:metadata/elabFTW/scan/title']",
   "/ENTRY/start_time": "@attrs:metadata/timing/acquisition_start",
   "/ENTRY/experiment_institution": "Fritz Haber Institute - Max Planck Society",
   "/ENTRY/experiment_facility": "Time Resolved ARPES",
   "/ENTRY/experiment_laboratory": "Clean Room 4",
-  "/ENTRY/entry_identifier": {
-    "identifier":"@attrs:metadata/entry_identifier"
-  },
+  "/ENTRY/identifierNAME[entry_identifier]": "@attrs:metadata/entry_identifier",
   "/ENTRY/end_time": "@attrs:metadata/timing/acquisition_stop",
   "/ENTRY/duration": "@attrs:metadata/timing/acquisition_duration",
   "/ENTRY/duration/@units": "s",
   "/ENTRY/collection_time": "@attrs:metadata/timing/collection_time",
   "/ENTRY/collection_time/@units": "s",
   "/ENTRY/USER[user]": {
-    "name": "!['@eln:/ENTRY/User/name', '@attrs:metadata/user0/name']",
-    "role": "['@eln:/ENTRY/User/role', '@attrs:metadata/user0/role']",
-    "affiliation": "!['@eln:/ENTRY/User/affiliation', '@attrs:metadata/user0/affiliation']",
-    "address": "['@eln:/ENTRY/User/address', '@attrs:metadata/user0/address']",
-    "email": "['@eln:/ENTRY/User/email', '@attrs:metadata/user0/email']"
+    "name": "!['@eln:/ENTRY/User/name', '@attrs:metadata/user0/name', '@attrs:metadata/elabFTW/user/name']",
+    "role": "['@eln:/ENTRY/User/role', '@attrs:metadata/user0/role', 'Principal Investigator']",
+    "affiliation": "['@eln:/ENTRY/User/affiliation', '@attrs:metadata/user0/affiliation', 'Fritz Haber Institute of the Max Planck Society']",
+    "address": "['@eln:/ENTRY/User/address', '@attrs:metadata/user0/address', 'Faradayweg 4-6, 14195 Berlin, Germany']",
+    "email": "['@eln:/ENTRY/User/email', '@attrs:metadata/user0/email', '@attrs:metadata/elabFTW/user/email']"
   },
   "/ENTRY/INSTRUMENT[instrument]": {
     "name": "Time-of-flight momentum microscope equipped delay line detector, at the endstation of the high rep-rate HHG source at FHI",
     "name/@short_name": "TR-ARPES @ FHI",
     "energy_resolution": {
-      "resolution": "!['@eln:/ENTRY/Instrument/Analyzer/energy_resolution', '@attrs:metadata/instrument/energy_resolution']",
+      "resolution": "!['@eln:/ENTRY/Instrument/Analyzer/energy_resolution', '@attrs:metadata/instrument/energy_resolution', '@attrs:metadata/elabFTW/trarpes_metis/energy_resolution']",
       "resolution/@units": "meV",
       "physical_quantity": "energy",
       "type": "estimated"
     },
     "RESOLUTION[temporal_resolution]": {
-      "resolution": 35.0,
+      "resolution": "!['@attrs:metadata/elabFTW/laser_status/temporal_resolution', '35.0']",
       "resolution/@units": "fs",
       "physical_quantity": "time",
       "type": "estimated"
     },
     "RESOLUTION[momentum_resolution]": {
-      "resolution": "@link:/entry/instrument/electronanalyser/momentum_resolution",
+      "resolution": "@link:/entry/instrument/electronanalyzer/momentum_resolution",
       "resolution/@units": "1/angstrom",
       "physical_quantity": "momentum",
       "type": "estimated"
@@ -48,33 +46,33 @@
       "value": "!['@eln:/ENTRY/Sample/gas_pressure', '@attrs:metadata/file/trARPES:XGS600:PressureAC:P_RD']",
       "value/@units": "mbar"
     },
-    "ELECTRONANALYSER[electronanalyser]": {
+    "ELECTRONANALYZER[electronanalyzer]": {
       "description": "SPECS Metis 1000 Momentum Microscope",
       "device_information": {
         "vendor": "SPECS GmbH",
         "model": "Metis 1000 Momentum Microscope"
       },
       "fast_axes": ["kx", "ky", "E"],
-      "slow_axes": "@attrs:metadata/instrument/analyzer/slow_axes",
+      "slow_axes": "['@attrs:metadata/instrument/analyzer/slow_axes', '@attrs:metadata/elabFTW/scan/scan_type']",
       "energy_resolution": {
-        "resolution": "!['@eln:/ENTRY/Instrument/Analyzer/energy_resolution', '@attrs:metadata/instrument/analyzer/energy_resolution']",
+        "resolution": "!['@eln:/ENTRY/Instrument/Analyzer/energy_resolution', '@attrs:metadata/instrument/analyzer/energy_resolution', '@attrs:metadata/elabFTW/trarpes_metis/energy_resolution']",
         "resolution/@units": "meV",
         "physical_quantity": "energy",
         "type": "estimated"
       },
       "momentum_resolution": {
-        "resolution": "!['@eln:/ENTRY/Instrument/Analyzer/momentum_resolution', '@attrs:metadata/instrument/analyzer/momentum_resolution']",
+        "resolution": "!['@eln:/ENTRY/Instrument/Analyzer/momentum_resolution', '@attrs:metadata/instrument/analyzer/momentum_resolution', '@attrs:metadata/elabFTW/trarpes_metis/momentum_resolution']",
         "resolution/@units": "1/angstrom",
         "physical_quantity": "momentum",
         "type": "estimated"
       },
       "spatial_resolution": {
-        "resolution": "!['@eln:/ENTRY/Instrument/Analyzer/spatial_resolution', '@attrs:metadata/instrument/analyzer/spatial_resolution']",
+        "resolution": "!['@eln:/ENTRY/Instrument/Analyzer/spatial_resolution', '@attrs:metadata/instrument/analyzer/spatial_resolution', '@attrs:metadata/elabFTW/trarpes_metis/spatial_resolution']",
         "resolution/@units": "µm",
         "physical_quantity": "length",
         "type": "estimated"
       },
-      "depends_on": "/entry/instrument/electronanalyser/transformations/trans_z",
+      "depends_on": "/entry/instrument/electronanalyzer/transformations/trans_z",
       "TRANSFORMATIONS[transformations]": {
         "AXISNAME[trans_z]": 4.0,
         "AXISNAME[trans_z]/@depends_on": "rot_y",
@@ -89,7 +87,7 @@
       },
       "COLLECTIONCOLUMN[collectioncolumn]": {
         "projection": "@attrs:metadata/instrument/analyzer/projection",
-        "scheme": "@attrs:metadata/instrument/analyzer/scheme",
+        "scheme": "['@attrs:metadata/instrument/analyzer/scheme', 'momentum dispersive']",
         "lens_mode": "@attrs:metadata/instrument/analyzer/lens_mode",
         "extractor_voltage": "@attrs:metadata/file/KTOF:Lens:Extr:V",
         "extractor_voltage/@units": "V",
@@ -132,11 +130,9 @@
         "tof_distance": 0.9,
         "tof_distance/@units": "m"
       },
-      "DETECTOR[detector]": {
+      "ELECTRON_DETECTOR[detector]": {
         "amplifier_type": "MCP",
         "detector_type": "DLD",
-        "sensor_pixels": [1800, 1800],
-        "sensor_pixels/@units": "",
         "amplifier_bias": "@attrs:metadata/file/KTOF:Lens:MCPfront:V",
         "amplifier_bias/@units": "V",
         "amplifier_voltage": "@attrs:metadata/file/KTOF:Lens:MCPback:V",
@@ -145,59 +141,59 @@
         "detector_voltage/@units": "V"
       }
     },
-    "sourceTYPE[source_probe]": {
+    "source_probe": {
       "name": "HHG @ TR-ARPES @ FHI",
       "probe": "photon",
       "type": "HHG laser",
       "mode": "Single Bunch",
-      "frequency": "['@eln:/ENTRY/Instrument/Source/Probe/frequency', '@attrs:metadata/instrument/beam/probe/frequency']",
+      "frequency": "!['@eln:/ENTRY/Instrument/Source/Probe/frequency', '@attrs:metadata/instrument/beam/probe/frequency', '@attrs:metadata/elabFTW/laser_status/probe_repetition_rate']",
       "frequency/@units": "kHz",
       "associated_beam": "/entry/instrument/beam_probe"
     },
-    "beamTYPE[beam_probe]": {
+    "beam_probe": {
       "distance": 0.0,
       "distance/@units": "mm",
-      "incident_energy": "!['@eln:/ENTRY/Instrument/Beam/Probe/incident_energy', '@attrs:metadata/instrument/beam/probe/incident_energy']",
+      "incident_energy": "!['@eln:/ENTRY/Instrument/Beam/Probe/incident_energy', '@attrs:metadata/instrument/beam/probe/incident_energy', '@attrs:metadata/elabFTW/laser_status/probe_photon_energy']",
       "incident_energy/@units": "eV",
-      "incident_energy_spread": "['@eln:/ENTRY/Instrument/Beam/Probe/incident_energy_spread', '@attrs:metadata/instrument/beam/probe/incident_energy_spread']",
+      "incident_energy_spread": "['@eln:/ENTRY/Instrument/Beam/Probe/incident_energy_spread', '@attrs:metadata/instrument/beam/probe/incident_energy_spread', '@attrs:metadata/elabFTW/laser_status/probe_photon_energy_spread']",
       "incident_energy_spread/@units": "eV",
-      "pulse_duration": "['@eln:/ENTRY/Instrument/Beam/Probe/pulse_duration', '@attrs:metadata/instrument/beam/probe/pulse_duration']",
+      "pulse_duration": "['@eln:/ENTRY/Instrument/Beam/Probe/pulse_duration', '@attrs:metadata/instrument/beam/probe/pulse_duration', '@attrs:metadata/elabFTW/laser_status/probe_pulse_duration']",
       "pulse_duration/@units": "fs",
-      "incident_polarization": "['@eln:/ENTRY/Instrument/Beam/Probe/incident_polarization', '@attrs:metadata/instrument/beam/probe/incident_polarization']",
+      "incident_polarization": "['@eln:/ENTRY/Instrument/Beam/Probe/incident_polarization', '@attrs:metadata/instrument/beam/probe/incident_polarization', '@attrs:metadata/elabFTW/scan/probe_polarization']",
       "incident_polarization/@units": "V^2/mm^2",
-      "extent": "['@eln:/ENTRY/Instrument/Beam/Probe/extent', '@attrs:metadata/instrument/beam/probe/extent']",
+      "extent": "['@eln:/ENTRY/Instrument/Beam/Probe/extent', '@attrs:metadata/instrument/beam/probe/extent', '@attrs:metadata/elabFTW/laser_status/probe_profile']",
       "extent/@units": "µm",
       "associated_source": "/entry/instrument/source_probe"
     },
-    "sourceTYPE[source_pump]": {
+    "source_pump": {
       "name": "OPCPA @ TR-ARPES @ FHI",
       "probe": "visible light",
       "type": "Optical Laser",
       "mode": "Single Bunch",
-      "frequency": "['@eln:/ENTRY/Instrument/Source/Pump/frequency', '@attrs:metadata/instrument/beam/pump/frequency']",
+      "frequency": "!['@eln:/ENTRY/Instrument/Source/Pump/frequency', '@attrs:metadata/instrument/beam/pump/frequency', '@attrs:metadata/elabFTW/laser_status/pump_repetition_rate']",
       "frequency/@units": "kHz",
       "associated_beam": "/entry/instrument/beam_pump"
     },
-    "beamTYPE[beam_pump]": {
+    "beam_pump": {
       "distance": 0.0,
       "distance/@units": "mm",
-      "incident_energy": "!['@eln:/ENTRY/Instrument/Beam/Pump/incident_energy', '@attrs:metadata/instrument/beam/pump/incident_energy']",
+      "incident_energy": "!['@eln:/ENTRY/Instrument/Beam/Pump/incident_energy', '@attrs:metadata/instrument/beam/pump/incident_energy', '@attrs:metadata/elabFTW/laser_status/pump_photon_energy']",
       "incident_energy/@units": "eV",
-      "incident_energy_spread": "['@eln:/ENTRY/Instrument/Beam/Pump/incident_energy_spread', '@attrs:metadata/instrument/beam/pump/incident_energy_spread']",
+      "incident_energy_spread": "['@eln:/ENTRY/Instrument/Beam/Pump/incident_energy_spread', '@attrs:metadata/instrument/beam/pump/incident_energy_spread', '@attrs:metadata/elabFTW/laser_status/pump_photon_energy_spread']",
       "incident_energy_spread/@units": "eV",
-      "incident_wavelength": "['@eln:/ENTRY/Instrument/Beam/Pump/incident_wavelength', '@attrs:metadata/instrument/beam/pump/incident_wavelength']",
+      "incident_wavelength": "['@eln:/ENTRY/Instrument/Beam/Pump/incident_wavelength', '@attrs:metadata/instrument/beam/pump/incident_wavelength', '@attrs:metadata/file/trARPES:Orpheus:Wavelength']",
       "incident_wavelength/@units": "nm",
-      "pulse_duration": "['@eln:/ENTRY/Instrument/Beam/Pump/pulse_duration', '@attrs:metadata/instrument/beam/pump/pulse_duration']",
+      "pulse_duration": "['@eln:/ENTRY/Instrument/Beam/Pump/pulse_duration', '@attrs:metadata/instrument/beam/pump/pulse_duration', '@attrs:metadata/elabFTW/laser_status/pump_pulse_duration']",
       "pulse_duration/@units": "fs",
-      "incident_polarization": "['@eln:/ENTRY/Instrument/Beam/Pump/incident_polarization', '@attrs:metadata/instrument/beam/pump/incident_polarization']",
+      "incident_polarization": "['@eln:/ENTRY/Instrument/Beam/Pump/incident_polarization', '@attrs:metadata/instrument/beam/pump/incident_polarization', '@attrs:metadata/elabFTW/scan/pump_polarization']",
       "incident_polarization/@units": "V^2/mm^2",
-      "pulse_energy": "['@eln:/ENTRY/Instrument/Beam/Pump/pulse_energy', '@attrs:metadata/instrument/beam/pump/pulse_energy']",
+      "pulse_energy": "['@eln:/ENTRY/Instrument/Beam/Pump/pulse_energy', '@attrs:metadata/instrument/beam/pump/pulse_energy', '@attrs:metadata/elabFTW/scan/pump_pulse_energy']",
       "pulse_energy/@units": "µJ",
-      "average_power": "['@eln:/ENTRY/Instrument/Beam/Pump/average_power', '@attrs:metadata/instrument/beam/pump/average_power']",
+      "average_power": "['@eln:/ENTRY/Instrument/Beam/Pump/average_power', '@attrs:metadata/instrument/beam/pump/average_power', '@attrs:metadata/file/trARPES:Pump:Power.RBV']",
       "average_power/@units": "mW",
-      "extent": "['@eln:/ENTRY/Instrument/Beam/Pump/extent', '@attrs:metadata/instrument/beam/pump/extent']",
+      "extent": "['@eln:/ENTRY/Instrument/Beam/Pump/extent', '@attrs:metadata/instrument/beam/pump/extent', '@attrs:metadata/elabFTW/laser_status/pump_profile']",
       "extent/@units": "µm",
-      "fluence": "['@eln:/ENTRY/Instrument/Beam/Pump/fluence', '@attrs:metadata/instrument/beam/pump/fluence']",
+      "fluence": "['@eln:/ENTRY/Instrument/Beam/Pump/fluence', '@attrs:metadata/instrument/beam/pump/fluence', '@attrs:metadata/elabFTW/scan/pump_fluence']",
       "fluence/@units": "mJ/cm^2",
       "associated_source": "/entry/instrument/source_pump"
     },
@@ -235,13 +231,13 @@
     }
   },
   "/ENTRY/SAMPLE[sample]": {
-    "preparation_date": "['@eln:/ENTRY/Sample/preparation_date', '@attrs:metadata/sample/preparation_date']",
-    "history/notes/description": "['@eln:/ENTRY/Sample/sample_history', '@attrs:metadata/sample/sample_history']",
-    "history/notes/type": "text/plain",
-    "description": "['@eln:/ENTRY/Sample/description', '@attrs:metadata/sample/chemical_formula']",
-    "name": "['@eln:/ENTRY/Sample/name', '@attrs:metadata/sample/name']",
+    "preparation_date": "['@eln:/ENTRY/Sample/preparation_date', '@attrs:metadata/sample/preparation_date', '@attrs:metadata/elabFTW/sample/preparation_date']",
+    "history/sample_preparation/start_time": "['@eln:/ENTRY/Sample/preparation_date', '@attrs:metadata/sample/preparation_date', '@attrs:metadata/elabFTW/sample/preparation_date']",
+    "history/sample_preparation/description": "['@eln:/ENTRY/Sample/sample_history', '@attrs:metadata/sample/sample_history', '@attrs:metadata/elabFTW/sample/sample_history']",
+    "description": "['@eln:/ENTRY/Sample/description', '@attrs:metadata/sample/chemical_formula', '@attrs:metadata/elabFTW/sample/summary']",
+    "name": "['@eln:/ENTRY/Sample/name', '@attrs:metadata/sample/name', '@attrs:metadata/elabFTW/sample/title']",
     "situation": "vacuum",
-    "SUBSTANCE[substance]/molecular_formula_hill": "['@eln:/ENTRY/Sample/chemical_formula', '@attrs:metadata/sample/chemical_formula']",
+    "chemical_formula": "['@eln:/ENTRY/Sample/chemical_formula', '@attrs:metadata/sample/chemical_formula', '@attrs:metadata/elabFTW/sample/sample_formula']",
     "temperature_env": {
       "temperature_sensor": "@link:/entry/instrument/manipulator/temperature_sensor"
     },
@@ -290,7 +286,8 @@
       "AXISNAME[trans_x]/@vector": [1, 0, 0]
     }
   },
-  "/ENTRY/PROCESS_MPES[process]/DISTORTION[distortion]": {
+  "/ENTRY/DISTORTION[distortion]": {
+    "applied": "!@attrs:metadata/momentum_correction/correction/applied",
     "symmetry": "!@attrs:metadata/momentum_correction/correction/rotation_symmetry",
     "symmetry/@units": "",
     "original_centre": "@attrs:metadata/momentum_correction/correction/center_point",
@@ -302,7 +299,8 @@
     "rdeform_field": "@attrs:metadata/momentum_correction/correction/rdeform_field",
     "rdeform_field/@units": ""
   },
-  "/ENTRY/PROCESS_MPES[process]/REGISTRATION[registration]": {
+  "/ENTRY/REGISTRATION[registration]": {
+    "applied": "!@attrs:metadata/momentum_correction/registration/applied",
     "depends_on": "/entry/process/registration/transformations/rot_z",
     "TRANSFORMATIONS[transformations]": {
       "AXISNAME[trans_x]": "@attrs:metadata/momentum_correction/registration/trans_x/value",
@@ -323,39 +321,45 @@
       "AXISNAME[rot_z]/@depends_on": "@attrs:metadata/momentum_correction/registration/rot_z/depends_on"
     }
   },
-  "/ENTRY/PROCESS_MPES[process]/energy_calibration":{
-    "coefficients": "@attrs:metadata/energy_calibration/calibration/coefficients",
-    "coefficients/@units": "",
-    "fit_function": "@attrs:metadata/energy_calibration/calibration/fit_function",
+  "/ENTRY/CALIBRATION[energy_calibration]":{
+    "applied": "!@attrs:metadata/energy_calibration/applied",
+    "fit_formula_inputs/TERM[coefficients]": "@attrs:metadata/energy_calibration/calibration/coefficients",
+    "fit_formula_inputs/TERM[coefficients]/@units": "",
+    "fit_formula_description": "@attrs:metadata/energy_calibration/calibration/fit_function",
     "original_axis": "@attrs:metadata/energy_calibration/tof",
-    "original_axis/@units": "ns",
+    "original_axis/@units": "",
     "calibrated_axis": "@attrs:metadata/energy_calibration/calibration/axis",
     "calibrated_axis/@units": "eV",
     "physical_quantity": "energy"
   },
-  "/ENTRY/PROCESS_MPES[process]/kx_calibration": {
-    "scaling": "@attrs:metadata/momentum_calibration/calibration/kx_scale",
-    "scaling/@units": "",
+  "/ENTRY/CALIBRATION[kx_calibration]": {
+    "applied": "!@attrs:metadata/momentum_calibration/applied",
+    "scaling_factor": "@attrs:metadata/momentum_calibration/calibration/kx_scale",
+    "scaling_factor/@units": "",
     "offset": "@attrs:metadata/momentum_calibration/calibration/x_center",
     "offset/@units": "",
     "calibrated_axis": "@attrs:metadata/momentum_calibration/calibration/kx_axis",
     "calibrated_axis/@units": "1/angstrom",
     "physical_quantity": "momentum"
   },
-  "/ENTRY/PROCESS_MPES[process]/ky_calibration": {
-    "scaling": "@attrs:metadata/momentum_calibration/calibration/ky_scale",
+  "/ENTRY/CALIBRATION[ky_calibration]": {
+    "applied": "!@attrs:metadata/momentum_calibration/applied",
+    "scaling_factor": "@attrs:metadata/momentum_calibration/calibration/ky_scale",
+    "scaling_factor/@units": "",
     "offset": "@attrs:metadata/momentum_calibration/calibration/y_center",
+    "offset/@units": "",
     "calibrated_axis": "@attrs:metadata/momentum_calibration/calibration/ky_axis",
-    "calibrated_axis/@units": "Angstrom^-1"
+    "calibrated_axis/@units": "1/angstrom",
+    "physical_quantity": "momentum"
   },
   "/ENTRY/data": {
     "@axes": "@data:dims",
-    "@*_indices": "@data:*.index",
+    "AXISNAME_indices[@*_indices]": "@data:*.index",
     "@signal": "data",
     "data": "@data:data",
     "data/@units": "counts",
-    "*": "@data:*.data",
-    "*/@units": "@data:*.unit",
-    "energy/@type": "@attrs:metadata/energy_calibration/calibration/energy_scale"
+    "AXISNAME[*]": "@data:*.data",
+    "AXISNAME[*]/@units": "@data:*.unit",
+    "AXISNAME[energy]/@type": "['@attrs:metadata/energy_calibration/calibration/energy_scale', 'kinetic']"
   }
 }
diff --git a/src/sed/loader/mpes/metadata.py b/src/sed/loader/mpes/metadata.py
index 712fdcbd..3a015ee0 100644
--- a/src/sed/loader/mpes/metadata.py
+++ b/src/sed/loader/mpes/metadata.py
@@ -357,6 +357,15 @@ def fetch_elab_metadata(self, runs: list[str], metadata: dict) -> dict:
                 del metadata["elabFTW"]["laser_status"]["pump_photon_energy"]
             if "pump_repetition_rate" in metadata["elabFTW"].get("laser_status", {}):
                 del metadata["elabFTW"]["laser_status"]["pump_repetition_rate"]
+        else:
+            # add pulse energy if applicable
+            try:
+                metadata["elabFTW"]["scan"]["pump_pulse_energy"] = (
+                    metadata["file"]["trARPES:Pump:Power.RBV"]
+                    / metadata["elabFTW"]["laser_status"]["pump_repetition_rate"]
+                )
+            except KeyError:
+                pass
 
         return metadata
 
diff --git a/tests/test_processor.py b/tests/test_processor.py
index 853cd1c3..7f303a35 100644
--- a/tests/test_processor.py
+++ b/tests/test_processor.py
@@ -1081,11 +1081,19 @@ def test_get_normalization_histogram() -> None:
 metadata["instrument"]["beam"] = {}
 metadata["instrument"]["beam"]["probe"] = {}
 metadata["instrument"]["beam"]["probe"]["incident_energy"] = 21.7
+metadata["instrument"]["beam"]["probe"]["frequency"] = 500.0
+metadata["instrument"]["beam"]["probe"]["incident_energy_spread"] = 0.11
+metadata["instrument"]["beam"]["probe"]["pulse_duration"] = 20.0
+metadata["instrument"]["beam"]["probe"]["incident_polarization"] = [1, 1, 0, 0]
+metadata["instrument"]["beam"]["probe"]["extent"] = [80.0, 80.0]
 # sample
 metadata["sample"] = {}
 metadata["sample"]["preparation_date"] = "2019-01-13T10:00:00+00:00"
 metadata["sample"]["name"] = "Sample Name"
 
+metadata["file"] = {}
+metadata["file"]["KTOF:Lens:Extr:I"] = -0.12877
+
 
 def test_save(caplog) -> None:
     """Test the save functionality"""
@@ -1118,7 +1126,7 @@ def test_save(caplog) -> None:
     with pytest.raises(NameError):
         processor.save("output.tiff")
     axes = ["kx", "ky", "energy", "delay"]
-    bins = [100, 100, 200, 50]
+    bins = [10, 10, 20, 5]
     ranges = [(-2, 2), (-2, 2), (-4, 2), (-600, 1600)]
     processor.compute(bins=bins, axes=axes, ranges=ranges)
     with pytest.raises(NotImplementedError):

From a60da543c2c7949217be8f3f08d8348ce60ae7e6 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Mon, 24 Mar 2025 12:20:11 +0100
Subject: [PATCH 23/26] limit bokeh and first fixes to FLASH config

---
 pyproject.toml                           | 2 +-
 src/sed/config/NXmpes_config-HEXTOF.json | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index ef82aef5..f25d087f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -29,7 +29,7 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 dependencies = [
-    "bokeh>=2.4.2",
+    "bokeh>=2.4.2,<3.7.0",
     "dask>=2021.12.0,<2024.8",
     "elabapi-python>=5.0",
     "fastdtw>=0.3.4",
diff --git a/src/sed/config/NXmpes_config-HEXTOF.json b/src/sed/config/NXmpes_config-HEXTOF.json
index 3bd38a41..b3b807b9 100755
--- a/src/sed/config/NXmpes_config-HEXTOF.json
+++ b/src/sed/config/NXmpes_config-HEXTOF.json
@@ -48,7 +48,7 @@
       "value/@units": "@attrs:metadata/scientificMetadata/sample/sample_pressure/unit"
     }
   },
-  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYSER[electronanalyser]": {
+  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYZER[electronanalyzer]": {
     "description": "HEXTOF Momentum Microscope",
     "device_information": {
       "vendor": "None",
@@ -78,7 +78,7 @@
       "type": "estimated"
     }
   },
-  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYSER[electronanalyser]/COLLECTIONCOLUMN[collectioncolumn]": {
+  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYZER[electronanalyzer]/COLLECTIONCOLUMN[collectioncolumn]": {
     "projection": "@attrs:metadata/scientificMetadata/Collection/projection",
     "scheme": "momentum dispersive",
     "lens_mode": "@attrs:metadata/scientificMetadata/Collection/lens_mode",
@@ -107,14 +107,14 @@
       }
     }
   },
-  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYSER[electronanalyser]/ENERGYDISPERSION[energydispersion]": {
+  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYZER[electronanalyzer]/ENERGYDISPERSION[energydispersion]": {
     "pass_energy": "@attrs:metadata/scientificMetadata/Collection/tof_voltage",
     "pass_energy/@units": "V",
     "scheme": "tof",
     "tof_distance": 0.8,
     "tof_distance/@units": "m"
   },
-  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYSER[electronanalyser]/DETECTOR[detector]": {
+  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYZER[electronanalyzer]/DETECTOR[detector]": {
     "amplifier_type": "MCP",
     "detector_type": "DLD",
     "sensor_pixels": [2024, 2048]

From cb9c06c92610937ba347fa6e3e16eb8dc90782cc Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Tue, 25 Mar 2025 09:24:29 +0100
Subject: [PATCH 24/26] fix Flash Nexus config

---
 src/sed/config/NXmpes_config-HEXTOF.json | 23 ++++++++++-------------
 tutorial/4_hextof_workflow.ipynb         | 10 +++++++++-
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/src/sed/config/NXmpes_config-HEXTOF.json b/src/sed/config/NXmpes_config-HEXTOF.json
index b3b807b9..8eca3e78 100755
--- a/src/sed/config/NXmpes_config-HEXTOF.json
+++ b/src/sed/config/NXmpes_config-HEXTOF.json
@@ -6,9 +6,7 @@
   "/ENTRY[entry]/experiment_institution": "Deutsches Elektronen-Synchrotron DESY",
   "/ENTRY[entry]/experiment_facility": "Free-Electron Laser FLASH",
   "/ENTRY[entry]/experiment_laboratory": "@attrs:metadata/creationLocation",
-  "/ENTRY/entry_identifier": {
-    "identifier":"@attrs:metadata/pid"
-  },
+  "/ENTRY/identifierNAME[entry_identifier]": "@attrs:metadata/pid",
   "/ENTRY[entry]/USER[user0]": {
     "name": "!@attrs:metadata/principalInvestigator",
     "role": "Principal investigator",
@@ -114,12 +112,11 @@
     "tof_distance": 0.8,
     "tof_distance/@units": "m"
   },
-  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYZER[electronanalyzer]/DETECTOR[detector]": {
+  "/ENTRY[entry]/INSTRUMENT[instrument]/ELECTRONANALYZER[electronanalyzer]/ELECTRON_DETECTOR[detector]": {
     "amplifier_type": "MCP",
-    "detector_type": "DLD",
-    "sensor_pixels": [2024, 2048]
+    "detector_type": "DLD"
   },
-  "/ENTRY[entry]/INSTRUMENT[instrument]/sourceTYPE[source_probe]": {
+  "/ENTRY[entry]/INSTRUMENT[instrument]/source_probe": {
     "name": "@attrs:metadata/scientificMetadata/Source/name",
     "probe": "@attrs:metadata/scientificMetadata/Source/probe",
     "type": "@attrs:metadata/scientificMetadata/Source/type",
@@ -128,7 +125,7 @@
     "frequency/@units": "@attrs:metadata/scientificMetadata/Source/repetition_rate/unit",
     "associated_beam": "/entry/instrument/beam_probe"
   },
-  "/ENTRY[entry]/INSTRUMENT[instrument]/beamTYPE[beam_probe]": {
+  "/ENTRY[entry]/INSTRUMENT[instrument]/beam_probe": {
     "distance": 0.0,
     "distance/@units": "mm",
     "incident_energy": "@attrs:metadata/scientificMetadata/Source/photon_energy/value",
@@ -146,16 +143,16 @@
     "energy_dispersion": "@attrs:metadata/scientificMetadata/Source/dispersion/value",
     "energy_dispersion/@units": "@attrs:metadata/scientificMetadata/Source/dispersion/unit"
   },
-  "/ENTRY[entry]/INSTRUMENT[instrument]/sourceTYPE[source_pump]": {
+  "/ENTRY[entry]/INSTRUMENT[instrument]/source_pump": {
     "name": "PIGLET @ FLASH @ DESY",
     "probe": "visible light",
     "type": "Optical Laser",
     "mode": "Single Bunch",
-    "frequency": 1000,
+    "frequency": 1000.0,
     "frequency/@units": "kHz",
     "associated_beam": "/entry/instrument/beam_pump"
   },
-  "/ENTRY[entry]/INSTRUMENT[instrument]/beamTYPE[beam_pump]": {
+  "/ENTRY[entry]/INSTRUMENT[instrument]/beam_pump": {
     "distance": 0.0,
     "distance/@units": "mm",
     "incident_wavelength": "@attrs:metadata/scientificMetadata/Laser/wavelength/value",
@@ -195,7 +192,7 @@
     "gas_pressure_env": {
       "pressure_gauge": "@link:/entry/instrument/pressure_gauge"
     },
-    "bias": {
+    "bias_env": {
       "voltmeter": "@link:/entry/instrument/manipulator/sample_bias_voltmeter"
     }
   },
@@ -207,6 +204,6 @@
     "data/@units": "counts",
     "AXISNAME[*]": "@data:*.data",
     "AXISNAME[*]/@units": "@data:*.unit",
-    "energy/@type": "@attrs:metadata/energy_calibration/calibration/energy_scale"
+    "AXISNAME[energy]/@type": "@attrs:metadata/energy_calibration/calibration/energy_scale"
   }
 }
diff --git a/tutorial/4_hextof_workflow.ipynb b/tutorial/4_hextof_workflow.ipynb
index 0d0970a5..bbde1963 100644
--- a/tutorial/4_hextof_workflow.ipynb
+++ b/tutorial/4_hextof_workflow.ipynb
@@ -894,13 +894,21 @@
     "metadata = load_config(meta_path + \"/44824_20230324T060430.json\")\n",
     "\n",
     "# Fix metadata\n",
+    "metadata[\"scientificMetadata\"][\"Source\"][\"photon_energy\"][\"value\"] = float(metadata[\"scientificMetadata\"][\"Source\"][\"photon_energy\"][\"value\"])\n",
+    "metadata[\"scientificMetadata\"][\"Source\"][\"repetition_rate\"][\"value\"] = float(metadata[\"scientificMetadata\"][\"Source\"][\"repetition_rate\"][\"value\"])\n",
     "metadata[\"scientificMetadata\"][\"Laser\"][\"wavelength\"][\"value\"] = float(metadata[\"scientificMetadata\"][\"Laser\"][\"wavelength\"][\"value\"][:-2])\n",
+    "metadata[\"scientificMetadata\"][\"Laser\"][\"pulse duration\"][\"value\"] = float(metadata[\"scientificMetadata\"][\"Laser\"][\"pulse duration\"][\"value\"])\n",
+    "metadata[\"scientificMetadata\"][\"Laser\"][\"pulse_energy\"][\"value\"] = float(metadata[\"scientificMetadata\"][\"Laser\"][\"pulse_energy\"][\"value\"])\n",
     "metadata[\"scientificMetadata\"][\"Laser\"][\"energy\"] = {\"value\": 1239.84/metadata[\"scientificMetadata\"][\"Laser\"][\"wavelength\"][\"value\"], \"unit\": \"eV\"}\n",
     "metadata[\"scientificMetadata\"][\"Laser\"][\"polarization\"] = [1, 1, 0, 0]\n",
+    "metadata[\"scientificMetadata\"][\"Manipulator\"][\"sample_bias\"] = float(metadata[\"scientificMetadata\"][\"Manipulator\"][\"sample_bias\"])\n",
+    "metadata[\"scientificMetadata\"][\"Collection\"][\"tof_voltage\"] = float(metadata[\"scientificMetadata\"][\"Collection\"][\"tof_voltage\"])\n",
+    "metadata[\"scientificMetadata\"][\"Collection\"][\"extractor_voltage\"] = float(metadata[\"scientificMetadata\"][\"Collection\"][\"extractor_voltage\"])\n",
+    "metadata[\"scientificMetadata\"][\"Collection\"][\"field_aperture\"] = float(metadata[\"scientificMetadata\"][\"Collection\"][\"field_aperture\"])\n",
     "metadata[\"scientificMetadata\"][\"Collection\"][\"field_aperture_x\"] = float(metadata[\"scientificMetadata\"][\"Collection\"][\"field_aperture_x\"])\n",
     "metadata[\"scientificMetadata\"][\"Collection\"][\"field_aperture_y\"] = float(metadata[\"scientificMetadata\"][\"Collection\"][\"field_aperture_y\"])\n",
     "metadata[\"pi\"] = {\"institute\": \"JGU Mainz\"}\n",
-    "metadata[\"proposer\"] = {\"institute\": \"TU Dortmund\"}\n"
+    "metadata[\"proposer\"] = {\"institute\": \"TU Dortmund\"}"
    ]
   },
   {

From 38c4f31233cf78db03712c1b836ffb8aa1b22094 Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Wed, 26 Mar 2025 12:48:51 +0100
Subject: [PATCH 25/26] update pyntools

---
 pyproject.toml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index f25d087f..a6c549a4 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -44,8 +44,8 @@ dependencies = [
     "pandas>=1.4.1",
     "photutils<2.0",
     "psutil>=5.9.0",
-    "pynxtools-mpes>=0.2.0",
-    "pynxtools>=0.10.0",
+    "pynxtools-mpes>=0.2.2",
+    "pynxtools>=0.10.1",
     "pyyaml>=6.0.0",
     "scipy>=1.8.0",
     "symmetrize>=0.5.5",

From bb5f7f9e4dd394ac6978d7b6ecb4ba36097221ab Mon Sep 17 00:00:00 2001
From: rettigl <l.rettig@web.de>
Date: Thu, 3 Apr 2025 17:31:09 +0200
Subject: [PATCH 26/26] add compatibility to old pre-2020 datasets

---
 src/sed/loader/mpes/loader.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/sed/loader/mpes/loader.py b/src/sed/loader/mpes/loader.py
index 9a7cb402..b3c60638 100644
--- a/src/sed/loader/mpes/loader.py
+++ b/src/sed/loader/mpes/loader.py
@@ -792,6 +792,14 @@ def get_files_from_run_id(
                     recursive=True,
                 ),
             )
+            # Compatibility for old scan format
+            if not run_files:
+                run_files = natsorted(
+                    glob.glob(
+                        folder + "/**/Scan" + str(run_id).zfill(3) + "_*." + extension,
+                        recursive=True,
+                    ),
+                )
             files.extend(run_files)
 
         # Check if any files are found