From 0d933b2cb4b783c3d621afbad65927c5a711c65f Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Fri, 3 May 2024 15:53:05 +0200 Subject: [PATCH 1/9] basics for repr for metadata and processor class --- sed/core/metadata.py | 5 +++-- sed/core/processor.py | 40 ++++++++++++++++++++++++++++++++++++---- 2 files changed, 39 insertions(+), 6 deletions(-) diff --git a/sed/core/metadata.py b/sed/core/metadata.py index 803f5c3d..a0a244a0 100644 --- a/sed/core/metadata.py +++ b/sed/core/metadata.py @@ -5,6 +5,8 @@ from typing import Any from typing import Dict +import yaml + from sed.core.config import complete_dictionary @@ -18,8 +20,7 @@ def __getitem__(self, val: Any) -> None: return self._m[val] def __repr__(self) -> str: - # TODO: #35 add pretty print, possibly to HTML - return str(self._m) + return yaml.dump(self._m, allow_unicode=True, default_flow_style=False) @property def metadata(self) -> dict: diff --git a/sed/core/processor.py b/sed/core/processor.py index 089e0f39..11733d32 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -178,10 +178,42 @@ def __repr__(self): df_str = "Data Frame: No Data loaded" else: df_str = self._dataframe.__repr__() - attributes_str = f"Metadata: {self._attributes.metadata}" - pretty_str = df_str + "\n" + attributes_str + pretty_str = df_str + "\n" + "Metadata: " + "\n" + self._attributes.__repr__() return pretty_str + def _repr_html_(self): + html = "
" + + html += ( + f"
Dataframe{self.dataframe.head()._repr_html_()}
" + ) + + # Add expandable section for dataframe + html += f"
Dask{self.dataframe._repr_html_()}
" + + # Add expandable section for attributes + html += "
Attributes" + html += "
    " + html += f"
  • {self.attributes}
  • " + html += "
" + + # Add expandable section for plots + html += "
Plots" + # Add your plot generating code here + plt.figure() + # plot random data + plt.plot(np.random.rand(10)) + plt.xlabel("X-axis label") + plt.ylabel("Y-axis label") + plt.title("Plot Title") + html += "Plot" + plt.close() + html += "
" + + html += "
" + + return html + @property def dataframe(self) -> Union[pd.DataFrame, ddf.DataFrame]: """Accessor to the underlying dataframe. @@ -238,13 +270,13 @@ def timed_dataframe(self, timed_dataframe: Union[pd.DataFrame, ddf.DataFrame]): self._timed_dataframe = timed_dataframe @property - def attributes(self) -> dict: + def attributes(self) -> MetaHandler: """Accessor to the metadata dict. Returns: dict: The metadata dict. """ - return self._attributes.metadata + return self._attributes def add_attribute(self, attributes: dict, name: str, **kwds): """Function to add element to the attributes dict. From 337cbd332944c222e211058c6fc5420ebfe26de2 Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Fri, 3 May 2024 15:53:59 +0200 Subject: [PATCH 2/9] basics for repr for metadata and processor class --- sed/core/processor.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sed/core/processor.py b/sed/core/processor.py index 11733d32..e82259e9 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -274,7 +274,7 @@ def attributes(self) -> MetaHandler: """Accessor to the metadata dict. Returns: - dict: The metadata dict. + MetaHandler: The metadata object """ return self._attributes From 3ee0679eed6341c958826ad0a1e95e0598c2825f Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Fri, 3 May 2024 19:11:12 +0200 Subject: [PATCH 3/9] metadata pretty html representation --- sed/core/metadata.py | 24 ++++++++++++++++++++++++ sed/core/processor.py | 7 +++---- 2 files changed, 27 insertions(+), 4 deletions(-) diff --git a/sed/core/metadata.py b/sed/core/metadata.py index a0a244a0..b37ee2c1 100644 --- a/sed/core/metadata.py +++ b/sed/core/metadata.py @@ -22,6 +22,30 @@ def __getitem__(self, val: Any) -> None: def __repr__(self) -> str: return yaml.dump(self._m, allow_unicode=True, default_flow_style=False) + def _format_attributes(self, attributes, indent=0): + html = "" + for key, value in attributes.items(): + # Format key + formatted_key = key.replace("_", " ").title() + formatted_key = f"{formatted_key}" + + if isinstance(value, dict): + html += f"
" + html += f"{formatted_key}" + html += self._format_attributes(value, indent + 1) + html += "
" + else: + html += ( + f"
{formatted_key}: {value}
" + ) + return html + + def _repr_html_(self) -> str: + html = "
" + html += self._format_attributes(self._m) + html += "
" + return html + @property def metadata(self) -> dict: """Property returning the metadata dict. diff --git a/sed/core/processor.py b/sed/core/processor.py index e82259e9..d4a2ff05 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -192,10 +192,9 @@ def _repr_html_(self): html += f"
Dask{self.dataframe._repr_html_()}
" # Add expandable section for attributes - html += "
Attributes" - html += "
    " - html += f"
  • {self.attributes}
  • " - html += "
" + html += "
Metadata" + html += self.attributes._repr_html_() + html += "
" # Add expandable section for plots html += "
Plots" From 3f5056a4bb263d3da10eb495c5b5acaa2d7c9303 Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Sat, 4 May 2024 15:02:45 +0200 Subject: [PATCH 4/9] fix linting/test errors --- tests/test_processor.py | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/tests/test_processor.py b/tests/test_processor.py index 3d39f6e5..5471410a 100644 --- a/tests/test_processor.py +++ b/tests/test_processor.py @@ -189,11 +189,11 @@ def test_attributes_setters() -> None: processor.dataframe["X"].compute(), processor.dataframe["Y"].compute(), ) - processor_metadata = processor.attributes + processor_metadata = processor.attributes.metadata assert isinstance(processor_metadata, dict) assert "test" in processor_metadata.keys() processor.add_attribute({"key2": 5}, name="test2") - assert processor.attributes["test2"]["key2"] == 5 + assert processor_metadata["test2"]["key2"] == 5 assert processor.config["core"]["loader"] == "mpes" assert len(processor.files) == 2 @@ -398,7 +398,7 @@ def test_pose_adjustment_save_load() -> None: processor.apply_momentum_correction() assert "Xm" in processor.dataframe.columns assert "Ym" in processor.dataframe.columns - assert "momentum_correction" in processor.attributes + assert "momentum_correction" in processor.attributes.metadata os.remove("sed_config_pose_adjustments.yaml") @@ -609,7 +609,10 @@ def test_energy_calibration_workflow(energy_scale: str, calibration_method: str) processor.add_energy_offset(constant=1) processor.append_energy_axis(preview=False) assert "energy" in processor.dataframe.columns - assert processor.attributes["energy_calibration"]["calibration"]["energy_scale"] == energy_scale + assert ( + processor.attributes.metadata["energy_calibration"]["calibration"]["energy_scale"] + == energy_scale + ) os.remove(f"sed_config_energy_calibration_{energy_scale}-{calibration_method}.yaml") energy1 = processor.dataframe["energy"].compute().values @@ -743,11 +746,14 @@ def test_delay_calibration_workflow() -> None: processor.calibrate_delay_axis() assert "delay" in processor.dataframe.columns assert ( - processor.attributes["delay_calibration"]["calibration"]["creation_date"] + processor.attributes.metadata["delay_calibration"]["calibration"]["creation_date"] == creation_date_calibration ) processor.add_delay_offset(preview=True) - assert processor.attributes["delay_offset"]["offsets"]["creation_date"] == creation_date_offsets + assert ( + processor.attributes.metadata["delay_offset"]["offsets"]["creation_date"] + == creation_date_offsets + ) np.testing.assert_allclose(expected, processor.dataframe["delay"].compute()) os.remove("sed_config_delay_calibration.yaml") @@ -819,9 +825,12 @@ def test_add_time_stamped_data() -> None: res = processor.dataframe["time_stamped_data"].compute().values assert res[0] == 0 assert res[-1] == 1 - assert processor.attributes["time_stamped_data"][0] == "time_stamped_data" - np.testing.assert_array_equal(processor.attributes["time_stamped_data"][1], time_stamps) - np.testing.assert_array_equal(processor.attributes["time_stamped_data"][2], data) + assert processor.attributes.metadata["time_stamped_data"][0] == "time_stamped_data" + np.testing.assert_array_equal( + processor.attributes.metadata["time_stamped_data"][1], + time_stamps, + ) + np.testing.assert_array_equal(processor.attributes.metadata["time_stamped_data"][2], data) def test_event_histogram() -> None: From c65194a307779ad5b3719b29689561a2dde75b98 Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Sun, 5 May 2024 12:11:07 +0200 Subject: [PATCH 5/9] idea for plots --- sed/core/processor.py | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) diff --git a/sed/core/processor.py b/sed/core/processor.py index d4a2ff05..4677cc53 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -198,15 +198,9 @@ def _repr_html_(self): # Add expandable section for plots html += "
Plots" - # Add your plot generating code here - plt.figure() - # plot random data - plt.plot(np.random.rand(10)) - plt.xlabel("X-axis label") - plt.ylabel("Y-axis label") - plt.title("Plot Title") - html += "Plot" - plt.close() + # Something like the event histogram can be added here, + # but the method needs to output image/html + # self.view_event_histogram(dfpid=2, backend="matplotlib") html += "
" html += "" From ec547ca4b3426980a157496607c1d86bd97d5d6a Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Tue, 7 May 2024 14:06:28 +0200 Subject: [PATCH 6/9] apply some fixes --- sed/core/metadata.py | 17 ++++++++--------- sed/core/processor.py | 30 +++++++++++++++--------------- 2 files changed, 23 insertions(+), 24 deletions(-) diff --git a/sed/core/metadata.py b/sed/core/metadata.py index b37ee2c1..721439e7 100644 --- a/sed/core/metadata.py +++ b/sed/core/metadata.py @@ -29,21 +29,20 @@ def _format_attributes(self, attributes, indent=0): formatted_key = key.replace("_", " ").title() formatted_key = f"{formatted_key}" + html += f"
" if isinstance(value, dict): - html += f"
" - html += f"{formatted_key}" + html += f"
{formatted_key} [{key}]" html += self._format_attributes(value, indent + 1) - html += "
" + html += "
" + elif hasattr(value, "shape"): + html += f"{formatted_key} [{key}]: {value.shape}" else: - html += ( - f"
{formatted_key}: {value}
" - ) + html += f"{formatted_key} [{key}]: {value}" + html += "" return html def _repr_html_(self) -> str: - html = "
" - html += self._format_attributes(self._m) - html += "
" + html = self._format_attributes(self._m) return html @property diff --git a/sed/core/processor.py b/sed/core/processor.py index 4677cc53..ee085739 100644 --- a/sed/core/processor.py +++ b/sed/core/processor.py @@ -175,7 +175,7 @@ def __init__( def __repr__(self): if self._dataframe is None: - df_str = "Data Frame: No Data loaded" + df_str = "Dataframe: No Data loaded" else: df_str = self._dataframe.__repr__() pretty_str = df_str + "\n" + "Metadata: " + "\n" + self._attributes.__repr__() @@ -184,29 +184,29 @@ def __repr__(self): def _repr_html_(self): html = "
" - html += ( - f"
Dataframe{self.dataframe.head()._repr_html_()}
" - ) + if self._dataframe is None: + df_html = "Dataframe: No Data loaded" + else: + df_html = self._dataframe._repr_html_() - # Add expandable section for dataframe - html += f"
Dask{self.dataframe._repr_html_()}
" + html += f"
Dataframe{df_html}
" # Add expandable section for attributes html += "
Metadata" - html += self.attributes._repr_html_() - html += "
" - - # Add expandable section for plots - html += "
Plots" - # Something like the event histogram can be added here, - # but the method needs to output image/html - # self.view_event_histogram(dfpid=2, backend="matplotlib") - html += "
" + html += "
" + html += self._attributes._repr_html_() + html += "
" html += "
" return html + ## Suggestion: + # @property + # def overview_panel(self): + # """Provides an overview panel with plots of different data attributes.""" + # self.view_event_histogram(dfpid=2, backend="matplotlib") + @property def dataframe(self) -> Union[pd.DataFrame, ddf.DataFrame]: """Accessor to the underlying dataframe. From 3e84f82290055a545c7ffec7bc17299718d9beb1 Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Tue, 7 May 2024 14:19:57 +0200 Subject: [PATCH 7/9] remove yaml dump --- sed/core/metadata.py | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/sed/core/metadata.py b/sed/core/metadata.py index 721439e7..9752544e 100644 --- a/sed/core/metadata.py +++ b/sed/core/metadata.py @@ -1,12 +1,11 @@ """This is a metadata handler class from the sed package """ +import json from copy import deepcopy from typing import Any from typing import Dict -import yaml - from sed.core.config import complete_dictionary @@ -20,7 +19,7 @@ def __getitem__(self, val: Any) -> None: return self._m[val] def __repr__(self) -> str: - return yaml.dump(self._m, allow_unicode=True, default_flow_style=False) + return json.dumps(self._m, default=str, indent=4) def _format_attributes(self, attributes, indent=0): html = "" @@ -45,15 +44,6 @@ def _repr_html_(self) -> str: html = self._format_attributes(self._m) return html - @property - def metadata(self) -> dict: - """Property returning the metadata dict. - - Returns: - dict: Dictionary of metadata. - """ - return self._m - def add( self, entry: Any, From 9f45f812401ea015cfd5050456baea86982694aa Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Tue, 7 May 2024 14:24:41 +0200 Subject: [PATCH 8/9] put back metadata property --- sed/core/metadata.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/sed/core/metadata.py b/sed/core/metadata.py index 9752544e..2d50a4fc 100644 --- a/sed/core/metadata.py +++ b/sed/core/metadata.py @@ -44,6 +44,14 @@ def _repr_html_(self) -> str: html = self._format_attributes(self._m) return html + @property + def metadata(self) -> Dict: + """Property returning the metadata dict. + Returns: + dict: Dictionary of metadata. + """ + return self._m + def add( self, entry: Any, From 8c2246b9f52c5381b2f6c2f18a4e459acafde2ba Mon Sep 17 00:00:00 2001 From: Zain Sohail Date: Tue, 7 May 2024 16:52:40 +0200 Subject: [PATCH 9/9] add tests for metadata --- sed/core/metadata.py | 93 +++++++----------------------------------- tests/test_metadata.py | 75 ++++++++++++++++++++++++++++++++++ 2 files changed, 89 insertions(+), 79 deletions(-) create mode 100644 tests/test_metadata.py diff --git a/sed/core/metadata.py b/sed/core/metadata.py index 2d50a4fc..155bdfce 100644 --- a/sed/core/metadata.py +++ b/sed/core/metadata.py @@ -1,5 +1,4 @@ """This is a metadata handler class from the sed package - """ import json from copy import deepcopy @@ -10,7 +9,8 @@ class MetaHandler: - """[summary]""" + """This class provides methods to manipulate metadata dictionaries, + and give a nice representation of them.""" def __init__(self, meta: Dict = None) -> None: self._m = deepcopy(meta) if meta is not None else {} @@ -22,13 +22,14 @@ def __repr__(self) -> str: return json.dumps(self._m, default=str, indent=4) def _format_attributes(self, attributes, indent=0): + INDENT_FACTOR = 20 html = "" for key, value in attributes.items(): # Format key formatted_key = key.replace("_", " ").title() formatted_key = f"{formatted_key}" - html += f"
" + html += f"
" if isinstance(value, dict): html += f"
{formatted_key} [{key}]" html += self._format_attributes(value, indent + 1) @@ -107,83 +108,17 @@ def add( f"Please choose between overwrite,append or raise.", ) - def add_processing(self, method: str, **kwds: Any) -> None: - """docstring - - Args: - - Returns: - - """ - # TODO: #36 Add processing metadata validation tests - self._m["processing"][method] = kwds - - def from_nexus(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def to_nexus(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def from_json(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def to_json(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def from_dict(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - - def to_dict(self, val: Any) -> None: - """docstring - - Args: - - Returns: - - """ - raise NotImplementedError() - class DuplicateEntryError(Exception): - """[summary]""" + """Exception raised when attempting to add a duplicate entry to the metadata container. + + Attributes: + message -- explanation of the error + """ + def __init__(self, message: str = "An entry already exists in metadata"): + self.message = message + super().__init__(self.message) -if __name__ == "__main__": - m = MetaHandler() - m.add({"start": 0, "stop": 1}, name="test") - print(m) + def __str__(self): + return f"{self.__class__.__name__}: {self.message}" diff --git a/tests/test_metadata.py b/tests/test_metadata.py new file mode 100644 index 00000000..fbe979a4 --- /dev/null +++ b/tests/test_metadata.py @@ -0,0 +1,75 @@ +import json +from typing import Any +from typing import Dict + +import numpy as np +import pytest + +from sed.core.metadata import DuplicateEntryError +from sed.core.metadata import MetaHandler + +metadata: Dict[Any, Any] = {} +metadata["entry_title"] = "Title" +# sample +metadata["sample"] = {} +metadata["sample"]["size"] = np.array([1, 2, 3]) +metadata["sample"]["name"] = "Sample Name" + + +@pytest.fixture +def meta_handler(): + # Create a MetaHandler instance + return MetaHandler(meta=metadata) + + +def test_add_entry_overwrite(meta_handler): + # Add a new entry to metadata with 'overwrite' policy + new_entry = {"sample": "Sample Name"} + meta_handler.add(new_entry, "sample", duplicate_policy="overwrite") + assert "sample" in meta_handler.metadata + assert meta_handler.metadata["sample"] == new_entry + + +def test_add_entry_raise(meta_handler): + # Attempt to add a duplicate entry with 'raise' policy + with pytest.raises(DuplicateEntryError): + meta_handler.add({}, "entry_title", duplicate_policy="raise") + + +def test_add_entry_append(meta_handler): + # Add a new entry to metadata with 'append' policy + new_entry = {"sample": "Sample Name"} + meta_handler.add(new_entry, "sample", duplicate_policy="append") + assert "sample" in meta_handler.metadata + assert "sample_1" in meta_handler.metadata + assert meta_handler.metadata["sample_1"] == new_entry + + +def test_add_entry_merge(meta_handler): + # Add a new entry to metadata with 'merge' policy + entry_to_merge = {"name": "Name", "type": "type"} + meta_handler.add(entry_to_merge, "sample", duplicate_policy="merge") + print(meta_handler.metadata) + assert "sample" in meta_handler.metadata + assert "name" in meta_handler.metadata["sample"] + assert "type" in meta_handler.metadata["sample"] + + +def test_repr(meta_handler): + # Test the __repr__ method + assert repr(meta_handler) == json.dumps(metadata, default=str, indent=4) + + +def test_repr_html(meta_handler): + # Test the _repr_html_ method + html = meta_handler._format_attributes(metadata) + assert meta_handler._repr_html_() == html + + html_test = "
Entry Title [entry_title]: Title
" + html_test += ( + "
Sample [sample]" + ) + html_test += "
Size [size]: (3,)
" + html_test += "
Name [name]: Sample Name" + html_test += "
" + assert html == html_test