diff --git a/.github/workflows/ruff-lint-check.yml b/.github/workflows/ruff-lint-check.yml new file mode 100644 index 0000000..0fcd1e6 --- /dev/null +++ b/.github/workflows/ruff-lint-check.yml @@ -0,0 +1,32 @@ +name: Ruff Lint Check + +on: + push: + branches: + - main + - dev + pull_request: + branches: + - main + - dev + +jobs: + lint-and-test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + + - name: Setup Python + uses: actions/setup-python@v5 + with: + python-version: "3.10" + + - name: Install Ruff + run: | + pip install ruff + + - name: Check Linting (Ruff) + run: ruff check . + + - name: Check Formatting (Ruff Format) + run: ruff format --check . diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..b68b09c --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,16 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v6.0.0 + hooks: + - id: trailing-whitespace + - id: end-of-file-fixer + - id: check-yaml + - id: check-toml + - id: check-added-large-files + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.14.14 + hooks: + - id: ruff-check + args: [--fix] + - id: ruff-format diff --git a/README.md b/README.md index 0a83047..8704058 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,135 @@ # AngioEye + AngioEye is the cohort-analysis engine for retinal Doppler holography. It browses EyeFlow .h5 outputs, reads per-segment metrics, applies QC, compares models, and aggregates results at eye/cohort level (including artery–vein summaries) to help design biomarkers. It exports clean CSV reports for stats, figures, and clinical models. + +--- + +## Setup + +### Prerequisites + +- Python 3.10 or higher. +- It is highly recommended to use a virtual environment. + +This project uses a `pyproject.toml` to describe all requirements needed. To start using it, **it is better to use a Python virtual environment (venv)**. + +```sh +# Creates the venv +python -m venv .venv + +# To enter the venv +# If you are using Windows PowerShell, you might need to activate the "Exceution" policy +./.venv/Scripts/activate +``` + +You can easily exit it with the command + +```sh +deactivate +``` + +### 1. Basic Installation (User) + +```sh +pip install -e . + +# Installs pipeline-specific dependencies (optional) +pip install -e ".[pipelines]" +``` + +### 2. Development Setup (Contributor) + +```sh +# Install all dependencies including dev tools (ruff, pre-commit, pyinstaller) +pip install -e ".[dev,pipelines]" + +# Initialize pre-commit hooks (optionnal) +pre-commit install +``` + +> [!NOTE] +> The pre-commit is really usefull to run automatic checks before pushing code, reducing chances of ugly code being pushed. +> +> If a pre-commit hook fails, it will try to fix all needed files, **so you will need to add them again before recreating the commit**. + +> [!TIP] +> You can run the linter easily, once the `dev` dependencies are installed, with the command: +> +> ```sh +> # To only run the checks +> lint-tool +> +> # To let the linter try to fix as much as possible +> lint-tool --fix +> ``` + +--- + +## Usage + +Launch the main application to process files interactively: + +### GUI + +The GUI handles batch processing for folders, single .h5/.hdf5 files, or .zip archives and lets you run multiple pipelines at once. Batch outputs are written directly into the chosen output directory (one combined `.h5` per input file). + +```sh +# Via the entry point +angioeye + +# Or via the script +python src/angio_eye.py +``` + +### CLI + +The CLI is designed for batch processing in headless environments or clusters. + +```sh +# Via the entry point +angioeye-cli + +# Or via the script +python src/cli.py +``` + +--- + +## Pipeline System + +Pipelines are the heart of AngioEye. To add a new analysis, create a file in `src/pipelines/` with a class inheriting from `ProcessPipeline`. + +To register it to the app, add the decorator `@register_pipeline`. You can define any needed imports inside, as well as some more info. + +To see more complete examples, check out `src/pipelines/basic_stats.py` and `src/pipelines/dummy_heavy.py`. + +### Simple Pipeline Structure + +```python +from pipelines import ProcessPipeline, ProcessResult, registerPipeline + +@registerPipeline( + name="My Analysis", + description="Calculates a custom clinical metric.", + required_deps=["torch>=2.2"], +) +class MyAnalysis(ProcessPipeline): + def run(self, h5file): + import torch + # 1. Read data using h5py + # 2. Perform calculations + # 3. Return metrics + + metrics={"peak_flow": 12.5} + + # Optional attributes applied to the pipeline group. + attrs = { + "pipeline_version": "1.0", + "author": "StaticExample" + } + + return ProcessResult( + metrics=metrics, + attrs=attrs + ) +``` diff --git a/Viewer/viewer.py b/Viewer/viewer.py index e1c6016..b83b76c 100644 --- a/Viewer/viewer.py +++ b/Viewer/viewer.py @@ -1,7 +1,6 @@ import os import tkinter as tk from tkinter import filedialog, messagebox, ttk -from typing import Dict, List, Optional, Tuple, Union import h5py import numpy as np @@ -14,11 +13,11 @@ def __init__(self) -> None: super().__init__() self.title("HDF5 Viewer") self.geometry("1200x800") - self.h5_file: Optional[h5py.File] = None - self.current_dataset: Optional[h5py.Dataset] = None - self.current_dataset_path: Optional[str] = None - self.axis_label_to_index: Dict[str, int] = {} - self.slider_vars: Dict[int, Tuple[tk.IntVar, ttk.Label]] = {} + self.h5_file: h5py.File | None = None + self.current_dataset: h5py.Dataset | None = None + self.current_dataset_path: str | None = None + self.axis_label_to_index: dict[str, int] = {} + self.slider_vars: dict[int, tuple[tk.IntVar, ttk.Label]] = {} self.colorbar = None self._build_ui() @@ -47,8 +46,12 @@ def _build_ui(self) -> None: tree_frame.columnconfigure(0, weight=1) tree_frame.rowconfigure(0, weight=1) - self.tree = ttk.Treeview(tree_frame, columns=("path",), show="tree", selectmode="browse") - tree_scroll = ttk.Scrollbar(tree_frame, orient="vertical", command=self.tree.yview) + self.tree = ttk.Treeview( + tree_frame, columns=("path",), show="tree", selectmode="browse" + ) + tree_scroll = ttk.Scrollbar( + tree_frame, orient="vertical", command=self.tree.yview + ) self.tree.configure(yscrollcommand=tree_scroll.set) self.tree.grid(row=0, column=0, sticky="nsew") tree_scroll.grid(row=0, column=1, sticky="ns") @@ -65,17 +68,23 @@ def _build_ui(self) -> None: self.y_axis_var = tk.StringVar() ttk.Label(axis_frame, text="X axis").grid(row=0, column=0, sticky="w") - self.x_combo = ttk.Combobox(axis_frame, textvariable=self.x_axis_var, state="readonly", width=24) + self.x_combo = ttk.Combobox( + axis_frame, textvariable=self.x_axis_var, state="readonly", width=24 + ) self.x_combo.grid(row=0, column=1, sticky="ew", padx=4, pady=2) ttk.Label(axis_frame, text="Y axis").grid(row=1, column=0, sticky="w") - self.y_combo = ttk.Combobox(axis_frame, textvariable=self.y_axis_var, state="readonly", width=24) + self.y_combo = ttk.Combobox( + axis_frame, textvariable=self.y_axis_var, state="readonly", width=24 + ) self.y_combo.grid(row=1, column=1, sticky="ew", padx=4, pady=2) self.x_combo.bind("<>", self.on_axis_change) self.y_combo.bind("<>", self.on_axis_change) - self.slider_frame = ttk.LabelFrame(sidebar, text="Other axes sliders", padding=8) + self.slider_frame = ttk.LabelFrame( + sidebar, text="Other axes sliders", padding=8 + ) self.slider_frame.grid(row=4, column=0, sticky="nsew", pady=(8, 8)) self.slider_frame.columnconfigure(1, weight=1) @@ -93,10 +102,14 @@ def _build_ui(self) -> None: self.canvas.get_tk_widget().grid(row=1, column=0, sticky="nsew") def _axis_label(self, axis: int) -> str: - size = self.current_dataset.shape[axis] if self.current_dataset is not None else "?" + size = ( + self.current_dataset.shape[axis] + if self.current_dataset is not None + else "?" + ) return f"Dim {axis} (size {size})" - def _selected_axis(self, label: str) -> Optional[int]: + def _selected_axis(self, label: str) -> int | None: if label == "(none)": return None return self.axis_label_to_index.get(label) @@ -138,17 +151,32 @@ def _populate_tree(self) -> None: self.tree.delete(*self.tree.get_children()) if self.h5_file is None: return - root_id = self.tree.insert("", "end", text=os.path.basename(self.h5_file.filename), open=True, values=("/")) + root_id = self.tree.insert( + "", + "end", + text=os.path.basename(self.h5_file.filename), + open=True, + values=("/"), + ) self._add_tree_items(root_id, self.h5_file) def _add_tree_items(self, parent: str, obj: h5py.Group) -> None: for key, item in obj.items(): if isinstance(item, h5py.Group): - node_id = self.tree.insert(parent, "end", text=key, open=False, values=(item.name,), tags=("group",)) + node_id = self.tree.insert( + parent, + "end", + text=key, + open=False, + values=(item.name,), + tags=("group",), + ) self._add_tree_items(node_id, item) else: label = f"{key} {item.shape}" - self.tree.insert(parent, "end", text=label, values=(item.name,), tags=("dataset",)) + self.tree.insert( + parent, "end", text=label, values=(item.name,), tags=("dataset",) + ) def on_tree_select(self, _event: tk.Event) -> None: item_id = self.tree.focus() @@ -166,7 +194,9 @@ def load_dataset(self, path: str) -> None: dataset = self.h5_file[path] self.current_dataset = dataset self.current_dataset_path = path - self.info_label.config(text=f"{path}\nshape={dataset.shape} dtype={dataset.dtype}") + self.info_label.config( + text=f"{path}\nshape={dataset.shape} dtype={dataset.dtype}" + ) dims = dataset.ndim labels = [self._axis_label(i) for i in range(dims)] @@ -215,7 +245,9 @@ def refresh_sliders(self) -> None: if axis == x_axis or axis == y_axis: continue row = len(self.slider_vars) - ttk.Label(self.slider_frame, text=self._axis_label(axis)).grid(row=row, column=0, sticky="w") + ttk.Label(self.slider_frame, text=self._axis_label(axis)).grid( + row=row, column=0, sticky="w" + ) var = tk.IntVar(value=0) scale = tk.Scale( self.slider_frame, @@ -231,7 +263,7 @@ def refresh_sliders(self) -> None: value_label.grid(row=row, column=2, sticky="e") self.slider_vars[axis] = (var, value_label) - def on_axis_change(self, _event: Optional[tk.Event] = None) -> None: + def on_axis_change(self, _event: tk.Event | None = None) -> None: if self.current_dataset is None: return self.refresh_sliders() @@ -262,7 +294,9 @@ def update_plot(self) -> None: dims = ds.ndim x_axis = self._selected_axis(self.x_axis_var.get()) y_axis = self._selected_axis(self.y_axis_var.get()) - slider_indices = {axis: int(var.get()) for axis, (var, _) in self.slider_vars.items()} + slider_indices = { + axis: int(var.get()) for axis, (var, _) in self.slider_vars.items() + } if dims == 0: value = ds[()] @@ -277,7 +311,7 @@ def update_plot(self) -> None: if x_axis == y_axis: self._show_placeholder("Choose two different axes") return - slices: List[Union[int, slice]] = [] + slices: list[int | slice] = [] for axis in range(dims): if axis == x_axis or axis == y_axis: slices.append(slice(None)) @@ -289,7 +323,9 @@ def update_plot(self) -> None: self.ax.set_xlabel(self._axis_label(x_axis)) self.ax.set_ylabel(ds.name) else: - kept_axes = [idx for idx, slc in enumerate(slices) if isinstance(slc, slice)] + kept_axes = [ + idx for idx, slc in enumerate(slices) if isinstance(slc, slice) + ] order = [kept_axes.index(y_axis), kept_axes.index(x_axis)] if order != [0, 1]: data = np.transpose(data, order) diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 0000000..73f3c01 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,86 @@ +[build-system] +requires = ["setuptools >= 77.0.3"] +build-backend = "setuptools.build_meta" + +[project] +name = "AngioEye" +version = "0.1.0" +description = "Cohort-analysis engine for retinal Doppler holography" +readme = "README.md" +requires-python = ">=3.10" +license = { text = "GPL-3.0-only" } + +# =============== [ DEPENDENCIES ] =============== + +dependencies = ["numpy>=1.24", "h5py>=3.9", "sv-ttk>=2.6"] + +[project.optional-dependencies] +# For specific pipelines +pipelines = ["torch>=2.2", "pandas>=2.1"] + +# For developers +dev = ["ruff", "pre-commit", "pyinstaller"] + +# =============== [ SCRIPTS ] =============== + +[project.scripts] +# This allows users to simply type 'angioeye' in their terminal +angioeye = "angio_eye:main" +angioeye-cli = "cli:main" + +lint-tool = "scripts.ruff_linter:main" + +[tool.setuptools] +package-dir = { "" = "src" } +py-modules = ["angio_eye", "cli"] + +[tool.setuptools.packages.find] +where = ["src"] + +# =============== [ TOOLS OPTIONS ] =============== + +[tool.ruff] +exclude = [ + ".bzr", + ".direnv", + ".eggs", + ".git", + ".git-rewrite", + ".hg", + ".ipynb_checkpoints", + ".mypy_cache", + ".nox", + ".pants.d", + ".pyenv", + ".pytest_cache", + ".pytype", + ".ruff_cache", + ".svn", + ".tox", + ".venv", + ".vscode", + "__pypackages__", + "_build", + "buck-out", + "build", + "dist", + "node_modules", + "site-packages", + "venv", +] +line-length = 88 +indent-width = 4 +target-version = "py310" + +[tool.ruff.lint] +# Enable Pyflakes (`F`) and a subset of the pycodestyle (`E`) codes by default. +# pyupgrade (`UP`), isort (`I`), and flake8-bugbear (`B`). +select = ["E4", "E7", "E9", "F", "I", "UP", "B"] +ignore = [] +fixable = ["ALL"] +unfixable = [] + +[tool.ruff.format] +quote-style = "double" +indent-style = "space" +line-ending = "auto" diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1baa844..0000000 --- a/requirements.txt +++ /dev/null @@ -1,3 +0,0 @@ -numpy>=1.24 -h5py>=3.9 -sv-ttk>=2.6 diff --git a/src/angio_eye.py b/src/angio_eye.py index 311cf69..82cabed 100644 --- a/src/angio_eye.py +++ b/src/angio_eye.py @@ -1,11 +1,10 @@ import os import tempfile +import tkinter as tk import zipfile -from datetime import datetime +from collections.abc import Sequence from pathlib import Path -import tkinter as tk from tkinter import filedialog, messagebox, ttk -from typing import Dict, List, Optional, Sequence import h5py @@ -14,12 +13,9 @@ except ImportError: # optional dependency sv_ttk = None -from pipelines import ( - ProcessPipeline, - ProcessResult, - load_pipeline_catalog, -) -from pipelines.core.utils import write_combined_results_h5, write_result_h5 +from pipelines import PipelineDescriptor, ProcessResult, load_pipeline_catalog +from pipelines.core.errors import format_pipeline_exception +from pipelines.core.utils import write_combined_results_h5 class _Tooltip: @@ -32,7 +28,7 @@ def __init__( self.text = text self.bg = bg self.fg = fg - self.tipwindow: Optional[tk.Toplevel] = None + self.tipwindow: tk.Toplevel | None = None widget.bind("", self._show) widget.bind("", self._hide) @@ -69,20 +65,16 @@ def __init__(self) -> None: super().__init__() self.title("HDF5 Process") self.geometry("800x600") - self.h5_file: Optional[h5py.File] = None - self.pipeline_registry: Dict[str, ProcessPipeline] = {} - self.pipeline_check_vars: Dict[str, tk.BooleanVar] = {} - self.last_process_result: Optional[ProcessResult] = None - self.last_process_pipeline: Optional[ProcessPipeline] = None - self.output_dir_var = tk.StringVar(value=str(Path.cwd())) - self.last_output_dir: Optional[Path] = None + self.pipeline_registry: dict[str, PipelineDescriptor] = {} + self.pipeline_check_vars: dict[str, tk.BooleanVar] = {} self.batch_input_var = tk.StringVar() self.batch_output_var = tk.StringVar(value=str(Path.cwd())) + self.batch_zip_var = tk.BooleanVar(value=False) + self.batch_zip_name_var = tk.StringVar(value="outputs.zip") self._apply_theme() self._build_ui() self._register_pipelines() - self._show_placeholder() self._reset_batch_output() def _apply_theme(self) -> None: @@ -130,85 +122,15 @@ def _apply_theme(self) -> None: self._accent_color = accent def _build_ui(self) -> None: - notebook = ttk.Notebook(self) - notebook.pack(fill="both", expand=True) - - single_tab = ttk.Frame(notebook, padding=10) - batch_tab = ttk.Frame(notebook, padding=10) - notebook.add(single_tab, text="Single file") - notebook.add(batch_tab, text="Batch") - - self._build_single_tab(single_tab) - self._build_batch_tab(batch_tab) - - def _build_single_tab(self, parent: ttk.Frame) -> None: - parent.columnconfigure(1, weight=1) - parent.rowconfigure(2, weight=1) - - top_bar = ttk.Frame(parent) - top_bar.grid(row=0, column=0, columnspan=3, sticky="ew", pady=(0, 8)) - open_btn = ttk.Button(top_bar, text="Open .h5 file", command=self.open_file) - open_btn.pack(side="left") - self.file_label = ttk.Label(top_bar, text="No file loaded", wraplength=500) - self.file_label.pack(side="left", padx=8) - - ttk.Label(parent, text="Pipeline").grid(row=1, column=0, sticky="w") - self.pipeline_var = tk.StringVar() - self.pipeline_combo = ttk.Combobox( - parent, textvariable=self.pipeline_var, state="readonly", width=40 - ) - self.pipeline_combo.grid(row=1, column=1, sticky="w") - run_btn = ttk.Button( - parent, text="Run pipeline", command=self.run_selected_pipeline - ) - run_btn.grid(row=1, column=2, sticky="w", padx=6) - - ttk.Label(parent, text="Result").grid(row=2, column=0, sticky="nw", pady=(8, 2)) - output_frame = ttk.Frame(parent) - output_frame.grid(row=2, column=1, columnspan=2, sticky="nsew") - output_frame.columnconfigure(0, weight=1) - output_frame.rowconfigure(0, weight=1) - self.process_output = tk.Text( - output_frame, - height=18, - state="disabled", - bg=self._text_bg, - fg=self._text_fg, - insertbackground=self._text_fg, - ) - output_scroll = ttk.Scrollbar( - output_frame, orient="vertical", command=self.process_output.yview - ) - self.process_output.configure(yscrollcommand=output_scroll.set) - self.process_output.grid(row=0, column=0, sticky="nsew") - output_scroll.grid(row=0, column=1, sticky="ns") - - export_frame = ttk.Frame(parent, padding=(0, 8, 0, 0)) - export_frame.grid(row=3, column=0, columnspan=3, sticky="ew") - export_frame.columnconfigure(1, weight=1) - ttk.Label(export_frame, text="Output folder").grid(row=0, column=0, sticky="w") - output_dir_entry = ttk.Entry(export_frame, textvariable=self.output_dir_var) - output_dir_entry.grid(row=0, column=1, sticky="ew", padx=4) - ttk.Button(export_frame, text="Browse", command=self.choose_output_dir).grid( - row=0, column=2, sticky="w" - ) - - ttk.Label(export_frame, text="Export CSV").grid( - row=1, column=0, sticky="w", pady=(6, 0) - ) - self.export_path_var = tk.StringVar(value="process_result.csv") - export_entry = ttk.Entry(export_frame, textvariable=self.export_path_var) - export_entry.grid(row=1, column=1, sticky="ew", padx=4, pady=(6, 0)) - ttk.Button(export_frame, text="Browse", command=self.choose_export_path).grid( - row=1, column=2, sticky="w", pady=(6, 0) - ) - ttk.Button( - export_frame, text="Export", command=self.export_process_result - ).grid(row=1, column=3, sticky="w", padx=6, pady=(6, 0)) + container = ttk.Frame(self, padding=10) + container.pack(fill="both", expand=True) + self._build_batch_tab(container) def _build_batch_tab(self, parent: ttk.Frame) -> None: parent.columnconfigure(1, weight=1) - parent.rowconfigure(4, weight=1) + parent.columnconfigure(2, weight=0) + parent.columnconfigure(3, weight=0) + parent.rowconfigure(5, weight=1) ttk.Label(parent, text="Input (folder / .h5 / .hdf5 / .zip)").grid( row=0, column=0, sticky="w" @@ -287,12 +209,29 @@ def _build_batch_tab(self, parent: ttk.Frame) -> None: run_btn = ttk.Button(parent, text="Run batch", command=self.run_batch) run_btn.grid(row=3, column=0, sticky="w", pady=(10, 4)) + ttk.Checkbutton( + parent, + text="Zip outputs after run", + variable=self.batch_zip_var, + command=self._toggle_zip_name_visibility, + ).grid(row=3, column=1, sticky="w", pady=(10, 4)) + + # Archive name placed on its own row to avoid resizing the log/list area. + self.batch_zip_label = ttk.Label(parent, text="Archive name") + self.batch_zip_label.grid(row=4, column=0, sticky="w", pady=(2, 8), padx=(0, 4)) + self.batch_zip_entry = ttk.Entry( + parent, textvariable=self.batch_zip_name_var, width=28 + ) + self.batch_zip_entry.grid( + row=4, column=1, columnspan=3, sticky="w", pady=(2, 8) + ) + self._toggle_zip_name_visibility() ttk.Label(parent, text="Batch log").grid( - row=4, column=0, sticky="nw", pady=(8, 2) + row=5, column=0, sticky="nw", pady=(8, 2) ) batch_output_frame = ttk.Frame(parent) - batch_output_frame.grid(row=4, column=1, columnspan=2, sticky="nsew") + batch_output_frame.grid(row=5, column=1, columnspan=3, sticky="nsew") batch_output_frame.columnconfigure(0, weight=1) batch_output_frame.rowconfigure(0, weight=1) self.batch_output = tk.Text( @@ -313,20 +252,15 @@ def _build_batch_tab(self, parent: ttk.Frame) -> None: def _register_pipelines(self) -> None: available, missing = load_pipeline_catalog() self.pipeline_registry = {p.name: p for p in available} - self.missing_pipelines = {p.name: p for p in missing} - self.pipeline_combo["values"] = list(self.pipeline_registry.keys()) - if available: - self.pipeline_combo.current(0) - self.pipeline_var.set(available[0].name) self._populate_pipeline_checks(available, missing) def _populate_pipeline_checks( - self, available: List[ProcessPipeline], missing: List[ProcessPipeline] + self, available: list[PipelineDescriptor], missing: list[PipelineDescriptor] ) -> None: for child in self.pipeline_checks_inner.winfo_children(): child.destroy() self.pipeline_check_vars = {} - rows: List[ProcessPipeline] = [*available, *missing] + rows: list[PipelineDescriptor] = [*available, *missing] for idx, pipeline in enumerate(rows): is_available = getattr(pipeline, "available", True) var = tk.BooleanVar(value=is_available) @@ -338,7 +272,9 @@ def _populate_pipeline_checks( ) check.grid(row=idx, column=0, sticky="w", padx=(0, 8), pady=(0, 6)) tip_text = pipeline.description or "" - missing_deps = getattr(pipeline, "missing_deps", []) or getattr(pipeline, "requires", []) + missing_deps = getattr(pipeline, "missing_deps", []) or getattr( + pipeline, "requires", [] + ) if missing_deps: tip_suffix = f"\nInstall: {', '.join(missing_deps)}" tip_text = (tip_text + tip_suffix) if tip_text else tip_suffix @@ -351,14 +287,6 @@ def _populate_pipeline_checks( ) self.pipeline_check_vars[pipeline.name] = var - def _show_placeholder( - self, message: str = "Load a .h5 file then run a pipeline" - ) -> None: - self.process_output.configure(state="normal") - self.process_output.delete("1.0", "end") - self.process_output.insert("end", message) - self.process_output.configure(state="disabled") - def _reset_batch_output( self, message: str = "Select an input path, choose pipelines, then run batch." ) -> None: @@ -375,6 +303,39 @@ def _log_batch(self, text: str) -> None: self.batch_output.update_idletasks() self.update_idletasks() + def _export_batch_log(self, initial_dir: Path | None = None) -> Path | None: + if initial_dir is None: + initial_dir = Path(self.batch_output_var.get() or Path.cwd()) + if not initial_dir.exists(): + initial_dir = Path.cwd() + path = filedialog.asksaveasfilename( + defaultextension=".txt", + filetypes=[("Text file", "*.txt"), ("All files", "*.*")], + initialdir=str(initial_dir), + initialfile="batch_log.txt", + title="Export batch log", + ) + if not path: + return None + try: + log_text = self.batch_output.get("1.0", "end").rstrip() + Path(path).write_text(log_text, encoding="utf-8") + self._log_batch(f"[LOG] Exported batch log -> {path}") + return Path(path) + except Exception as exc: # noqa: BLE001 + messagebox.showerror("Export failed", f"Could not save log: {exc}") + return None + + def _show_batch_error_dialog(self, message: str, initial_dir: Path) -> None: + self.bell() + export = messagebox.askyesno( + "Batch completed with errors", + f"{message}\n\nExport log to .txt?", + icon="warning", + ) + if export: + self._export_batch_log(initial_dir) + def select_all_pipelines(self) -> None: for var in self.pipeline_check_vars.values(): if getattr(var, "_enabled", True): @@ -385,82 +346,6 @@ def clear_all_pipelines(self) -> None: if getattr(var, "_enabled", True): var.set(False) - def open_file(self) -> None: - path = filedialog.askopenfilename( - filetypes=[("HDF5", "*.h5 *.hdf5"), ("All files", "*.*")], - initialdir=os.path.abspath("h5_example"), - ) - if not path: - return - try: - if self.h5_file is not None: - self.h5_file.close() - self.h5_file = h5py.File(path, "r") - except Exception as exc: # noqa: BLE001 - messagebox.showerror("Error", f"Cannot open {path}: {exc}") - return - self.file_label.config(text=path) - self.last_process_result = None - self.last_process_pipeline = None - self.last_output_dir = None - self._show_placeholder("File loaded. Pick a pipeline and run.") - - def run_selected_pipeline(self) -> None: - name = self.pipeline_var.get() - if not name: - messagebox.showwarning( - "Missing pipeline", "Select a pipeline before running." - ) - return - pipeline = self.pipeline_registry.get(name) - if pipeline is None: - messagebox.showerror( - "Pipeline missing", f"Pipeline '{name}' is not registered." - ) - return - if self.h5_file is None: - messagebox.showwarning("Missing file", "Load a .h5 file first.") - return - try: - result = pipeline.run(self.h5_file) - except Exception as exc: # noqa: BLE001 - messagebox.showerror("Pipeline error", f"Pipeline failed: {exc}") - return - try: - output_dir = self._prepare_output_dir() - output_path = self._default_output_path(name, output_dir) - self._write_result_h5(result, output_path, pipeline_name=name) - result.output_h5_path = output_path - self.last_output_dir = output_dir - self._update_export_default(output_dir) - except Exception as exc: # noqa: BLE001 - messagebox.showerror("Output error", f"Cannot write outputs: {exc}") - return - self.last_process_result = result - self.last_process_pipeline = pipeline - file_label = self.h5_file.filename or "(in-memory file)" - self._render_process_result(result, pipeline_name=name, file_path=file_label) - - def _render_process_result( - self, result: ProcessResult, pipeline_name: str, file_path: str - ) -> None: - self.process_output.configure(state="normal") - self.process_output.delete("1.0", "end") - self.process_output.insert("end", f"Pipeline: {pipeline_name}\n") - self.process_output.insert("end", f"File: {file_path}\n\n") - self.process_output.insert("end", "Metrics:\n") - for key, value in result.metrics.items(): - self.process_output.insert("end", f" - {key}: {value}\n") - if result.artifacts: - self.process_output.insert("end", "\nArtifacts:\n") - for key, value in result.artifacts.items(): - self.process_output.insert("end", f" - {key}: {value}\n") - if result.output_h5_path: - self.process_output.insert( - "end", f"\nResult HDF5: {result.output_h5_path}\n" - ) - self.process_output.configure(state="disabled") - def choose_batch_folder(self) -> None: path = filedialog.askdirectory( initialdir=self.batch_input_var.get() or None, @@ -486,52 +371,6 @@ def choose_batch_output(self) -> None: if path: self.batch_output_var.set(path) - def choose_export_path(self) -> None: - initial_dir = self.last_output_dir or Path( - self.output_dir_var.get() or Path.cwd() - ) - path = filedialog.asksaveasfilename( - defaultextension=".csv", - filetypes=[("CSV", "*.csv"), ("All files", "*.*")], - initialdir=str(initial_dir), - initialfile=Path(self.export_path_var.get()).name, - ) - if path: - self.export_path_var.set(Path(path).name) - - def choose_output_dir(self) -> None: - path = filedialog.askdirectory( - initialdir=self.output_dir_var.get() or None, - title="Select base folder for outputs", - ) - if path: - self.output_dir_var.set(path) - - def export_process_result(self) -> None: - if self.last_process_result is None or self.last_process_pipeline is None: - messagebox.showwarning("No result", "Run a pipeline before exporting.") - return - if self.last_output_dir is None: - try: - self.last_output_dir = self._prepare_output_dir() - except Exception as exc: # noqa: BLE001 - messagebox.showerror( - "Export failed", f"Cannot prepare output folder: {exc}" - ) - return - export_name = Path(self.export_path_var.get() or "process_result.csv").name - final_path = self.last_output_dir / export_name - self.last_output_dir.mkdir(parents=True, exist_ok=True) - try: - final_path_str = self.last_process_pipeline.export( - self.last_process_result, str(final_path) - ) - except Exception as exc: # noqa: BLE001 - messagebox.showerror("Export failed", f"Cannot export: {exc}") - return - self.export_path_var.set(final_path_str) - messagebox.showinfo("Export done", f"Result exported to: {final_path_str}") - def run_batch(self) -> None: data_value = (self.batch_input_var.get() or "").strip() if not data_value: @@ -551,8 +390,8 @@ def run_batch(self) -> None: ) return - pipelines: List[ProcessPipeline] = [] - missing: List[str] = [] + pipelines: list[PipelineDescriptor] = [] + missing: list[str] = [] for name in selected_names: pipeline = self.pipeline_registry.get(name) if pipeline is None: @@ -565,17 +404,19 @@ def run_batch(self) -> None: ) return - output_dir_value = (self.batch_output_var.get() or "").strip() - output_dir = ( - Path(output_dir_value).expanduser() if output_dir_value else Path.cwd() + base_output_value = (self.batch_output_var.get() or "").strip() + base_output_dir = ( + Path(base_output_value).expanduser() if base_output_value else Path.cwd() ) - if not output_dir.is_absolute(): - output_dir = Path.cwd() / output_dir - output_dir.mkdir(parents=True, exist_ok=True) + if not base_output_dir.is_absolute(): + base_output_dir = Path.cwd() / base_output_dir + base_output_dir.mkdir(parents=True, exist_ok=True) self._reset_batch_output("Starting batch run...\n") - tempdir: Optional[tempfile.TemporaryDirectory] = None + tempdir: tempfile.TemporaryDirectory | None = None + temp_output_dir: tempfile.TemporaryDirectory | None = None + clean_temp_output = False try: data_root, tempdir = self._prepare_data_root(data_path) inputs = self._find_h5_inputs(data_root) @@ -586,7 +427,12 @@ def run_batch(self) -> None: tempdir.cleanup() return - failures: List[str] = [] + output_dir = base_output_dir + if self.batch_zip_var.get(): + temp_output_dir = tempfile.TemporaryDirectory(dir=base_output_dir) + output_dir = Path(temp_output_dir.name) + + failures: list[str] = [] for h5_path in inputs: try: self._run_pipelines_on_file(h5_path, pipelines, output_dir) @@ -594,23 +440,54 @@ def run_batch(self) -> None: failures.append(f"{h5_path}: {exc}") self._log_batch(f"[FAIL] {h5_path.name}: {exc}") - self._log_batch(f"Completed. Outputs stored under: {output_dir}") + summary_msg: str + if self.batch_zip_var.get(): + try: + zip_name = self.batch_zip_name_var.get().strip() or "outputs.zip" + if not zip_name.lower().endswith(".zip"): + zip_name += ".zip" + zip_path = self._zip_output_dir( + output_dir, target_path=base_output_dir / zip_name + ) + self._log_batch(f"[ZIP] Archive created: {zip_path}") + summary_msg = f"ZIP archive: {zip_path}" + # Mark for cleanup so only the archive remains + clean_temp_output = True + except Exception as exc: # noqa: BLE001 + self._log_batch(f"[ZIP FAIL] {exc}") + messagebox.showerror( + "Zip failed", f"Could not create ZIP archive: {exc}" + ) + summary_msg = f"Outputs stored under: {output_dir}" + else: + summary_msg = f"Outputs stored under: {output_dir}" + + self._log_batch(f"Completed. {summary_msg}") + if failures: - messagebox.showwarning( - "Batch completed with errors", - f"{len(failures)} failure(s). See log for details.", + self._show_batch_error_dialog( + f"{len(failures)} failure(s). See log for details.\n\n{summary_msg}", + initial_dir=base_output_dir, ) else: - messagebox.showinfo( - "Batch completed", f"Outputs stored under: {output_dir}" - ) + messagebox.showinfo("Batch completed", summary_msg) + if clean_temp_output and temp_output_dir is not None: + temp_output_dir.cleanup() if tempdir is not None: tempdir.cleanup() + def _toggle_zip_name_visibility(self) -> None: + if self.batch_zip_var.get(): + self.batch_zip_label.grid() + self.batch_zip_entry.grid() + else: + self.batch_zip_label.grid_remove() + self.batch_zip_entry.grid_remove() + def _prepare_data_root( self, data_path: Path - ) -> tuple[Path, Optional[tempfile.TemporaryDirectory]]: + ) -> tuple[Path, tempfile.TemporaryDirectory | None]: if data_path.is_file() and data_path.suffix.lower() == ".zip": tempdir = tempfile.TemporaryDirectory() with zipfile.ZipFile(data_path, "r") as zf: @@ -618,7 +495,7 @@ def _prepare_data_root( return Path(tempdir.name), tempdir return data_path, None - def _find_h5_inputs(self, path: Path) -> List[Path]: + def _find_h5_inputs(self, path: Path) -> list[Path]: if path.is_file(): if path.suffix.lower() in {".h5", ".hdf5"}: return [path] @@ -628,79 +505,62 @@ def _find_h5_inputs(self, path: Path) -> List[Path]: return files raise FileNotFoundError(f"Input path does not exist: {path}") - def _safe_pipeline_suffix(self, name: str) -> str: - cleaned = "".join(ch if ch.isalnum() else "_" for ch in name.lower()) - while "__" in cleaned: - cleaned = cleaned.replace("__", "_") - return cleaned.strip("_") or "pipeline" - def _run_pipelines_on_file( self, h5_path: Path, - pipelines: Sequence[ProcessPipeline], + pipelines: Sequence[PipelineDescriptor], output_root: Path, ) -> None: - data_dir = output_root / h5_path.stem - data_dir.mkdir(parents=True, exist_ok=True) - combined_h5_out = data_dir / f"{h5_path.stem}_pipelines_result.h5" - pipeline_results: List[tuple[str, ProcessResult]] = [] + # Place combined output directly in the output root (no per-file subfolder). + combined_h5_out = output_root / f"{h5_path.stem}_pipelines_result.h5" + suffix = 1 + while combined_h5_out.exists(): + combined_h5_out = ( + output_root / f"{h5_path.stem}_{suffix}_pipelines_result.h5" + ) + suffix += 1 + + pipeline_results: list[tuple[str, ProcessResult]] = [] with h5py.File(h5_path, "r") as h5file: - for pipeline in pipelines: - result = pipeline.run(h5file) + for pipeline_desc in pipelines: + pipeline = pipeline_desc.instantiate() + try: + result = pipeline.run(h5file) + except Exception as exc: # noqa: BLE001 + raise RuntimeError( + format_pipeline_exception(exc, pipeline) + ) from exc pipeline_results.append((pipeline.name, result)) - suffix = self._safe_pipeline_suffix(pipeline.name) - csv_out = data_dir / f"{h5_path.stem}_{suffix}_metrics.csv" - pipeline.export(result, str(csv_out)) self._log_batch(f"[OK] {h5_path.name} -> {pipeline.name}") write_combined_results_h5( pipeline_results, combined_h5_out, source_file=str(h5_path) ) for _, result in pipeline_results: result.output_h5_path = str(combined_h5_out) - self._log_batch( - f"[OK] {h5_path.name}: combined results -> {combined_h5_out.name}" - ) - - def _prepare_output_dir(self) -> Path: - base_dir_value = (self.output_dir_var.get() or "").strip() - base_dir = Path(base_dir_value).expanduser() if base_dir_value else Path.cwd() - if not base_dir.is_absolute(): - base_dir = Path.cwd() / base_dir - base_dir.mkdir(parents=True, exist_ok=True) - timestamp = datetime.now().strftime("%Y%m%d_%H%M%S") - base_name = ( - Path(self.h5_file.filename).stem - if self.h5_file and self.h5_file.filename - else "output" - ) - output_dir = base_dir / f"{base_name}_{timestamp}" - output_dir.mkdir(parents=True, exist_ok=True) - return output_dir - - def _default_output_path(self, pipeline_name: str, output_dir: Path) -> str: - safe_name = self._safe_pipeline_suffix(pipeline_name) - base = ( - Path(self.h5_file.filename).stem - if self.h5_file and self.h5_file.filename - else "output" - ) - return str(output_dir / f"{base}_{safe_name}_result.h5") + self._log_batch(f"[OK] {h5_path.name}: combined results -> {combined_h5_out}") + + def _zip_output_dir(self, folder: Path, target_path: Path | None = None) -> Path: + folder = folder.expanduser().resolve() + if not folder.exists() or not folder.is_dir(): + raise FileNotFoundError(f"Output folder does not exist: {folder}") + if target_path is None: + zip_name = f"{folder.name}_outputs.zip" if folder.name else "outputs.zip" + zip_path = folder.parent / zip_name + else: + zip_path = target_path.expanduser().resolve() + if zip_path.exists(): + zip_path.unlink() + with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for file_path in folder.rglob("*"): + if file_path.is_file(): + zf.write(file_path, file_path.relative_to(folder)) + return zip_path - def _update_export_default(self, output_dir: Path) -> None: - export_name = Path(self.export_path_var.get() or "process_result.csv").name - self.export_path_var.set(str(output_dir / export_name)) - def _write_result_h5( - self, result: ProcessResult, path: str, pipeline_name: str - ) -> None: - source_file = ( - self.h5_file.filename if self.h5_file and self.h5_file.filename else None - ) - write_result_h5( - result, path, pipeline_name=pipeline_name, source_file=source_file - ) +def main(): + app = ProcessApp() + app.mainloop() if __name__ == "__main__": - app = ProcessApp() - app.mainloop() + main() diff --git a/src/cli.py b/src/cli.py index 030ee51..885fbf3 100644 --- a/src/cli.py +++ b/src/cli.py @@ -2,37 +2,49 @@ Command-line interface to run AngioEye pipelines over a collection of HDF5 files. Usage example: - python cli.py --data data/ --pipelines pipelines.txt --output ./results + python cli.py --data data/ --pipelines pipelines.txt --output ./results --zip --zip-name my_run.zip Inputs: --data / -d Path to a directory (recursively scanned), a single .h5/.hdf5 file, or a .zip archive of .h5 files. --pipelines / -p Text file listing pipeline names (one per line, '#' and blank lines ignored). --output / -o Base directory where results will be written. A subfolder is created per input file. + --zip / -z When set, compress the outputs into a .zip archive after completion. + --zip-name Optional filename for the archive (default: outputs.zip). """ + from __future__ import annotations import argparse +import shutil import sys -from pathlib import Path import tempfile import zipfile -from typing import Dict, Iterable, List, Optional, Sequence, Tuple +from collections.abc import Sequence +from pathlib import Path import h5py -from pipelines import ProcessPipeline, ProcessResult, load_all_pipelines +from pipelines import ( + PipelineDescriptor, + ProcessResult, + load_pipeline_catalog, +) +from pipelines.core.errors import format_pipeline_exception from pipelines.core.utils import write_combined_results_h5 -def _build_pipeline_registry() -> Dict[str, ProcessPipeline]: - pipelines = load_all_pipelines() - return {p.name: p for p in pipelines} +def _build_pipeline_registry() -> dict[str, PipelineDescriptor]: + available, _ = load_pipeline_catalog() + # pipelines = load_all_pipelines() + return {p.name: p for p in available} -def _load_pipeline_list(path: Path, registry: Dict[str, ProcessPipeline]) -> List[ProcessPipeline]: +def _load_pipeline_list( + path: Path, registry: dict[str, PipelineDescriptor] +) -> list[PipelineDescriptor]: raw_lines = path.read_text(encoding="utf-8").splitlines() - selected: List[ProcessPipeline] = [] - missing: List[str] = [] + selected: list[PipelineDescriptor] = [] + missing: list[str] = [] for line in raw_lines: name = line.strip() if not name or name.startswith("#"): @@ -44,13 +56,17 @@ def _load_pipeline_list(path: Path, registry: Dict[str, ProcessPipeline]) -> Lis selected.append(pipeline) if missing: available = ", ".join(registry.keys()) - raise ValueError(f"Unknown pipeline(s): {', '.join(missing)}. Available: {available}") + raise ValueError( + f"Unknown pipeline(s): {', '.join(missing)}. Available: {available}" + ) if not selected: - raise ValueError("No pipelines selected (file is empty or only contains comments).") + raise ValueError( + "No pipelines selected (file is empty or only contains comments)." + ) return selected -def _find_h5_inputs(path: Path) -> List[Path]: +def _find_h5_inputs(path: Path) -> list[Path]: if path.is_file(): if path.suffix.lower() in {".h5", ".hdf5"}: return [path] @@ -68,7 +84,9 @@ def _safe_pipeline_suffix(name: str) -> str: return cleaned.strip("_") or "pipeline" -def _prepare_data_root(data_path: Path) -> Tuple[Path, Optional[tempfile.TemporaryDirectory]]: +def _prepare_data_root( + data_path: Path, +) -> tuple[Path, tempfile.TemporaryDirectory | None]: """Return a directory containing HDF5 files; extract zip archives when needed.""" if data_path.is_file() and data_path.suffix.lower() == ".zip": tempdir = tempfile.TemporaryDirectory() @@ -80,24 +98,26 @@ def _prepare_data_root(data_path: Path) -> Tuple[Path, Optional[tempfile.Tempora def _run_pipelines_on_file( h5_path: Path, - pipelines: Sequence[ProcessPipeline], + pipelines: Sequence[PipelineDescriptor], output_root: Path, -) -> List[Path]: - outputs: List[Path] = [] +) -> list[Path]: + outputs: list[Path] = [] data_dir = output_root / h5_path.stem data_dir.mkdir(parents=True, exist_ok=True) combined_h5_out = data_dir / f"{h5_path.stem}_pipelines_result.h5" - pipeline_results: List[Tuple[str, ProcessResult]] = [] + pipeline_results: list[tuple[str, ProcessResult]] = [] with h5py.File(h5_path, "r") as h5file: - for pipeline in pipelines: - result = pipeline.run(h5file) + for pipeline_desc in pipelines: + pipeline = pipeline_desc.instantiate() + try: + result = pipeline.run(h5file) + except Exception as exc: # noqa: BLE001 + raise RuntimeError(format_pipeline_exception(exc, pipeline)) from exc pipeline_results.append((pipeline.name, result)) - suffix = _safe_pipeline_suffix(pipeline.name) - csv_out = data_dir / f"{h5_path.stem}_{suffix}_metrics.csv" - pipeline.export(result, str(csv_out)) print(f"[OK] {h5_path.name} -> {pipeline.name}") - outputs.append(csv_out) - write_combined_results_h5(pipeline_results, combined_h5_out, source_file=str(h5_path)) + write_combined_results_h5( + pipeline_results, combined_h5_out, source_file=str(h5_path) + ) for _, result in pipeline_results: result.output_h5_path = str(combined_h5_out) outputs.append(combined_h5_out) @@ -105,10 +125,36 @@ def _run_pipelines_on_file( return outputs -def run_cli(data_path: Path, pipelines_file: Path, output_dir: Path) -> int: +def _zip_output_dir(folder: Path, target_path: Path | None = None) -> Path: + folder = folder.expanduser().resolve() + if not folder.exists() or not folder.is_dir(): + raise FileNotFoundError(f"Output folder does not exist: {folder}") + if target_path is None: + zip_name = f"{folder.name}_outputs.zip" if folder.name else "outputs.zip" + zip_path = folder.parent / zip_name + else: + zip_path = target_path.expanduser().resolve() + if zip_path.exists(): + zip_path.unlink() + with zipfile.ZipFile(zip_path, "w", compression=zipfile.ZIP_DEFLATED) as zf: + for file_path in folder.rglob("*"): + if file_path.is_file(): + zf.write(file_path, file_path.relative_to(folder)) + return zip_path + + +def run_cli( + data_path: Path, + pipelines_file: Path, + output_dir: Path, + zip_outputs: bool = False, + zip_name: str | None = None, +) -> int: registry = _build_pipeline_registry() pipelines = _load_pipeline_list(pipelines_file, registry) data_root, tempdir = _prepare_data_root(data_path) + work_tempdir_path: Path | None = None + clean_work_output = False try: inputs = _find_h5_inputs(data_root) if not inputs: @@ -117,15 +163,40 @@ def run_cli(data_path: Path, pipelines_file: Path, output_dir: Path) -> int: output_root = output_dir.expanduser().resolve() output_root.mkdir(parents=True, exist_ok=True) - failures: List[str] = [] + work_root = output_root + if zip_outputs: + work_tempdir_path = Path(tempfile.mkdtemp(dir=output_root)) + work_root = work_tempdir_path + + failures: list[str] = [] for h5_path in inputs: try: - _run_pipelines_on_file(h5_path, pipelines, output_root) + _run_pipelines_on_file(h5_path, pipelines, work_root) except Exception as exc: # noqa: BLE001 failures.append(f"{h5_path}: {exc}") print(f"[FAIL] {h5_path.name}: {exc}", file=sys.stderr) - print(f"Completed. Outputs stored under: {output_root}") + if zip_outputs: + try: + final_name = (zip_name or "outputs.zip").strip() or "outputs.zip" + if not final_name.lower().endswith(".zip"): + final_name += ".zip" + zip_path = _zip_output_dir( + work_root, target_path=output_root / final_name + ) + print(f"[ZIP] Archive created: {zip_path}") + summary_msg = f"ZIP archive: {zip_path}" + clean_work_output = True + except Exception as exc: # noqa: BLE001 + print( + f"[ZIP FAIL] Could not create ZIP archive: {exc}", file=sys.stderr + ) + summary_msg = f"Outputs stored under: {work_root}" + else: + summary_msg = f"Outputs stored under: {work_root}" + + print(f"Completed. {summary_msg}") + if failures: print(f"{len(failures)} failure(s):", file=sys.stderr) for msg in failures: @@ -135,10 +206,14 @@ def run_cli(data_path: Path, pipelines_file: Path, output_dir: Path) -> int: finally: if tempdir is not None: tempdir.cleanup() + if clean_work_output and work_tempdir_path is not None: + shutil.rmtree(work_tempdir_path, ignore_errors=True) -def main(argv: Optional[Iterable[str]] = None) -> int: - parser = argparse.ArgumentParser(description="Run AngioEye pipelines over a folder of HDF5 files.") +def main(argv: Sequence[str] | None = None) -> int: + parser = argparse.ArgumentParser( + description="Run AngioEye pipelines over a folder of HDF5 files." + ) parser.add_argument( "-d", "--data", @@ -160,10 +235,28 @@ def main(argv: Optional[Iterable[str]] = None) -> int: type=Path, help="Base output directory. A subfolder is created per input HDF5 file.", ) + parser.add_argument( + "-z", + "--zip", + action="store_true", + help="Zip the outputs after processing (only the archive is kept).", + ) + parser.add_argument( + "--zip-name", + type=str, + default="outputs.zip", + help="Archive filename to place inside the output directory (default: outputs.zip).", + ) args = parser.parse_args(argv) try: - return run_cli(args.data, args.pipelines, args.output) + return run_cli( + args.data, + args.pipelines, + args.output, + zip_outputs=args.zip, + zip_name=args.zip_name, + ) except Exception as exc: # noqa: BLE001 print(f"Error: {exc}", file=sys.stderr) return 1 diff --git a/src/pipelines/Segments.py b/src/pipelines/Segments.py new file mode 100644 index 0000000..99eb876 --- /dev/null +++ b/src/pipelines/Segments.py @@ -0,0 +1,140 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs + + +@registerPipeline(name="segmentformshape") +class ArterialSegExample(ProcessPipeline): + """ + Tutorial pipeline showing the full surface area of a pipeline: + + - Subclass ProcessPipeline and implement `run(self, h5file) -> ProcessResult`. + - Return metrics (scalars, vectors, matrices, cubes) and optional artifacts. + - Attach HDF5 attributes to any metric via `with_attrs(data, attrs_dict)`. + - Add attributes to the pipeline group (`attrs`) or root file (`file_attrs`). + - No input data is required; this pipeline is purely illustrative. + """ + + description = "Tutorial: metrics + artifacts + dataset attrs + file/pipeline attrs." + v_raw = "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegment/value" + v = "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegmentBandLimited/value" + T = "/Artery/VelocityPerBeat/beatPeriodSeconds/value" + + def run(self, h5file) -> ProcessResult: + vraw_ds = np.asarray(h5file[self.v_raw]) + v_ds = np.asarray(h5file[self.v]) + t_ds = np.asarray(h5file[self.T]) + + moment0_seg = 0 + moment1_seg = 0 + + TMI_seg = [] + RI_seg = [] + RVTI_seg = [] + for beat in range(len(v_ds[0, :, 0, 0])): + TMI_branch = [] + RI_branch = [] + RVTI_branch = [] + for branch in range(len(v_ds[0, beat, :, 0])): + speed = np.nanmean(v_ds[:, beat, branch, :], axis=1) + moment0_seg += np.sum(speed) + for i in range(len(speed)): + moment1_seg += speed[i] * i * t_ds[0][beat] / 64 + centroid_seg_branch = (moment1_seg) / (moment0_seg * t_ds[0][0]) + TMI_branch.append(centroid_seg_branch) + + speed_max = np.max(speed) + speed_min = np.min(speed) + RI_k = 1 - (speed_min / speed_max) + RI_branch.append(RI_k) + + epsilon = 10 ** (-12) + moitie = len(speed) // 2 + d1 = np.sum(speed[:moitie]) + d2 = np.sum(speed[moitie:]) + RVTI_k = d1 / (d2 + epsilon) + RVTI_branch.append(RVTI_k) + + RI_seg.append(RI_branch) + TMI_seg.append(TMI_branch) + RVTI_seg.append(RVTI_branch) + + moment0raw_seg = 0 + moment1raw_seg = 0 + + TMIraw_seg = [] + RIraw_seg = [] + RVTIraw_seg = [] + for beat in range(len(vraw_ds[0, :, 0, 0])): + TMIraw_branch = [] + RIraw_branch = [] + RVTIraw_branch = [] + for branch in range(len(vraw_ds[0, beat, :, 0])): + speed_raw = np.nanmean(vraw_ds[:, beat, branch, :], axis=1) + moment0raw_seg += np.sum(speed_raw) + for i in range(len(speed_raw)): + moment1raw_seg += speed_raw[i] * i * t_ds[0][beat] / 64 + centroidraw_seg_branch = (moment1raw_seg) / ( + moment0raw_seg * t_ds[0][0] + ) + TMIraw_branch.append(centroidraw_seg_branch) + + speedraw_max = np.max(speed_raw) + speedraw_min = np.min(speed_raw) + RIraw_k = 1 - (speedraw_min / speedraw_max) + RIraw_branch.append(RIraw_k) + + epsilon = 10 ** (-12) + moitie = len(speed_raw) // 2 + d1raw = np.sum(speed_raw[:moitie]) + d2raw = np.sum(speed_raw[moitie:]) + RVTIraw_k = d1raw / (d2raw + epsilon) + RVTIraw_branch.append(RVTIraw_k) + + RIraw_seg.append(RIraw_branch) + TMIraw_seg.append(TMIraw_branch) + RVTIraw_seg.append(RVTIraw_branch) + + # Metrics are the main numerical outputs; each key becomes a dataset under /pipelines//metrics. + metrics = { + "centroid": with_attrs( + np.asarray(TMI_seg), + { + "unit": [""], + }, + ), + "RI": with_attrs( + np.asarray(RI_seg), + { + "unit": [""], + }, + ), + "RTVI": with_attrs( + np.asarray(RVTI_seg), + { + "unit": [""], + }, + ), + "centroid raw": with_attrs( + np.asarray(TMIraw_seg), + { + "unit": [""], + }, + ), + "RI raw": with_attrs( + np.asarray(RIraw_seg), + { + "unit": [""], + }, + ), + "RTVI raw": with_attrs( + np.asarray(RVTIraw_seg), + { + "unit": [""], + }, + ), + } + + # Artifacts can store non-metric outputs (strings, paths, etc.). + + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/__init__.py b/src/pipelines/__init__.py index 74e0c22..f081254 100644 --- a/src/pipelines/__init__.py +++ b/src/pipelines/__init__.py @@ -1,160 +1,62 @@ -import ast import importlib -import importlib.util -import inspect import pkgutil -from typing import List, Tuple -from .core.base import ProcessPipeline, ProcessResult +# import inspect +from .core.base import ( + PIPELINE_REGISTRY, + MissingPipeline, + PipelineDescriptor, + ProcessPipeline, + ProcessResult, +) from .core.utils import write_combined_results_h5, write_result_h5 -class MissingPipeline(ProcessPipeline): - """Placeholder for pipelines whose dependencies are missing.""" - - available = False - missing_deps: List[str] - requires: List[str] - - def __init__(self, name: str, description: str, missing_deps: List[str], requires: List[str]) -> None: - super().__init__() - self.name = name - self.description = description or "Pipeline unavailable (missing dependencies)." - self.missing_deps = missing_deps - self.requires = requires - - def run(self, _h5file): - missing = ", ".join(self.missing_deps or self.requires or ["unknown dependency"]) - raise ImportError(f"Pipeline '{self.name}' unavailable. Missing dependencies: {missing}") - - -def _module_docstring(module_name: str) -> str: - spec = importlib.util.find_spec(module_name) - if not spec or not spec.origin: - return "" - origin = spec.origin - # When frozen with PyInstaller, source files may not be present on disk (only .pyc). - if not origin.endswith((".py", ".pyw")): - return "" - try: - with open(origin, "r", encoding="utf-8") as f: - source = f.read() - except OSError: - return "" - tree = ast.parse(source) - return ast.get_docstring(tree) or "" - - -def _parse_requires_from_source(module_name: str) -> List[str]: - spec = importlib.util.find_spec(module_name) - if not spec or not spec.origin: - return [] - origin = spec.origin - if not origin.endswith((".py", ".pyw")): - return [] - try: - with open(origin, "r", encoding="utf-8") as f: - tree = ast.parse(f.read(), filename=origin) - except OSError: - return [] - for node in tree.body: - if isinstance(node, ast.Assign): - for target in node.targets: - if isinstance(target, ast.Name) and target.id == "REQUIRES": - if isinstance(node.value, (ast.List, ast.Tuple)): - vals = [] - for elt in node.value.elts: - if isinstance(elt, ast.Constant) and isinstance(elt.value, str): - vals.append(elt.value) - return vals - return [] - - -def _normalize_req_name(req: str) -> str: - """Extract importable package name from a requirement string.""" - for sep in ("[", "==", ">=", "<=", "~=", "!=", ">", "<"): - if sep in req: - return req.split(sep, 1)[0] - return req - - -def _missing_requirements(requires: List[str]) -> List[str]: - missing: List[str] = [] - for req in requires: - pkg = _normalize_req_name(req).strip() - if not pkg: - continue - if importlib.util.find_spec(pkg) is None: - missing.append(pkg) - return missing - - -def _discover_pipelines() -> Tuple[List[ProcessPipeline], List[MissingPipeline]]: - available: List[ProcessPipeline] = [] - missing: List[MissingPipeline] = [] - seen_classes = set() +def _discover_pipelines() -> tuple[list[PipelineDescriptor], list[PipelineDescriptor]]: + available: list[PipelineDescriptor] = [] + missing: list[PipelineDescriptor] = [] for module_info in pkgutil.iter_modules(__path__): if module_info.name in {"core"} or module_info.name.startswith("_"): continue - module_name = f"{__name__}.{module_info.name}" - requires = _parse_requires_from_source(module_name) - doc = _module_docstring(module_name) - # First, check for missing requirements before importing heavy modules. - pre_missing = _missing_requirements(requires) - if pre_missing: - missing.append(MissingPipeline(module_info.name, doc, pre_missing, requires)) - continue + module_name = f"{__name__}.{module_info.name}" try: - module = importlib.import_module(module_name) - except ImportError as exc: - # Capture missing dependency if ModuleNotFoundError has a name. - missing_deps = [] - if isinstance(exc, ModuleNotFoundError) and exc.name and exc.name not in {module_name, module_info.name}: - missing_deps = [exc.name] - if not missing_deps: - missing_deps = requires - missing.append(MissingPipeline(module_info.name, doc, missing_deps, requires)) - continue - - module_requires = getattr(module, "REQUIRES", requires) - post_missing = _missing_requirements(module_requires) - if post_missing: - missing.append(MissingPipeline(module_info.name, doc, post_missing, module_requires)) - continue - for _, cls in inspect.getmembers(module, inspect.isclass): - if not issubclass(cls, ProcessPipeline) or cls is ProcessPipeline: - continue - if cls.__module__ != module.__name__: - continue - if cls in seen_classes: - continue - seen_classes.add(cls) - try: - inst = cls() - inst.available = True # type: ignore[attr-defined] - inst.requires = module_requires # type: ignore[attr-defined] - available.append(inst) - except TypeError: - # Skip classes requiring constructor args. - continue + importlib.import_module(module_name) + except Exception as e: + # Fallback for unknown failures (SyntaxError, etc.) + missing.append( + PipelineDescriptor( + name=module_info.name, + description=f"Import Error: {e}", + available=False, + error_msg=str(e), + ) + ) + + for _name, cls in PIPELINE_REGISTRY.items(): + desc = PipelineDescriptor( + name=cls.name, + description=cls.description, + available=cls.available, + requires=cls.requires, + missing_deps=cls.missing_deps, + pipeline_cls=cls, + ) + if getattr(cls, "is_available", True): + available.append(desc) + else: + missing.append(desc) available.sort(key=lambda p: p.name.lower()) missing.sort(key=lambda p: p.name.lower()) return available, missing -def load_all_pipelines(include_missing: bool = False) -> List[ProcessPipeline]: - """ - Discover and instantiate pipelines. Optionally include placeholders for missing deps. - """ - available, missing = _discover_pipelines() - return available + missing if include_missing else available - - -def load_pipeline_catalog() -> Tuple[List[ProcessPipeline], List[MissingPipeline]]: +def load_pipeline_catalog() -> tuple[ + list[PipelineDescriptor], list[PipelineDescriptor] +]: """Return (available, missing) pipelines for UI/CLI surfaces.""" return _discover_pipelines() @@ -170,7 +72,6 @@ def load_pipeline_catalog() -> Tuple[List[ProcessPipeline], List[MissingPipeline "ProcessResult", "write_result_h5", "write_combined_results_h5", - "load_all_pipelines", "load_pipeline_catalog", "MissingPipeline", *[_cls.__name__ for _cls in (p.__class__ for p in _AVAILABLE)], diff --git a/src/pipelines/arterial_velocity_segment.py b/src/pipelines/arterial_velocity_segment.py new file mode 100644 index 0000000..660d705 --- /dev/null +++ b/src/pipelines/arterial_velocity_segment.py @@ -0,0 +1,96 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs + + +@registerPipeline(name="arterialformshapesegment") +class ArterialExampleSegment(ProcessPipeline): + """ + Tutorial pipeline showing the full surface area of a pipeline: + + - Subclass ProcessPipeline and implement `run(self, h5file) -> ProcessResult`. + - Return metrics (scalars, vectors, matrices, cubes) and optional artifacts. + - Attach HDF5 attributes to any metric via `with_attrs(data, attrs_dict)`. + - Add attributes to the pipeline group (`attrs`) or root file (`file_attrs`). + - No input data is required; this pipeline is purely illustrative. + """ + + description = "Tutorial: metrics + artifacts + dataset attrs + file/pipeline attrs." + v_raw = "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegment/value" + v = "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegmentBandLimited/value" + T = "/Artery/VelocityPerBeat/beatPeriodSeconds/value" + + def run(self, h5file) -> ProcessResult: + vraw_ds_temp = np.asarray(h5file[self.v_raw]) + vraw_ds = np.maximum(vraw_ds_temp, 0) + v_ds_temp = np.asarray(h5file[self.v]) + v_ds = np.maximum(v_ds_temp, 0) + t_ds = np.asarray(h5file[self.T]) + + TMI_seg = [] + TMI_seg_band = [] + RTVI_seg = [] + RTVI_seg_band = [] + RI_seg = [] + RI_seg_band = [] + M0_seg = 0 + M1_seg = 0 + M0_seg_band = 0 + M1_seg_band = 0 + for k in range(len(vraw_ds[0, :, 0, 0])): + TMI_branch = [] + TMI_branch_band = [] + RTVI_band_branch = [] + RTVI_branch = [] + RI_branch = [] + RI_branch_band = [] + for i in range(len(vraw_ds[0, k, :, 0])): + avg_speed_band = np.nanmean(v_ds[:, k, i, :], axis=1) + avg_speed = np.nanmean(vraw_ds[:, k, i, :], axis=1) + vmin = np.min(avg_speed) + vmax = np.max(avg_speed) + vmin_band = np.min(avg_speed_band) + vmax_band = np.max(avg_speed_band) + + RI_branch.append(1 - (vmin / (vmax + 10 ** (-14)))) + RI_branch_band.append(1 - (vmin_band / (vmax_band + 10 ** (-14)))) + D1_raw = np.sum(avg_speed[:31]) + D2_raw = np.sum(avg_speed[32:]) + D1 = np.sum(avg_speed_band[:31]) + D2 = np.sum(avg_speed_band[32:]) + RTVI_band_branch.append(D1 / (D2 + 10 ** (-12))) + RTVI_branch.append(D1_raw / (D2_raw + 10 ** (-12))) + M0_seg += np.sum(avg_speed) + M0_seg_band += np.sum(avg_speed_band) + for j in range(len(avg_speed)): + M1_seg += avg_speed[j] * j * t_ds[0][k] / 64 + M1_seg_band += avg_speed_band[j] * j * t_ds[0][k] / 64 + if M0_seg != 0: + TMI_branch.append(M1_seg / (t_ds[0][k] * M0_seg)) + else: + TMI_branch.append(0) + if M0_seg_band != 0: + TMI_branch_band.append(M1_seg_band / (t_ds[0][k] * M0_seg_band)) + else: + TMI_branch_band.append(0) + + TMI_seg.append(TMI_branch) + TMI_seg_band.append(TMI_branch_band) + RI_seg.append(RI_branch) + RI_seg_band.append(RI_branch_band) + RTVI_seg.append(RTVI_branch) + RTVI_seg_band.append(RTVI_band_branch) + + # Metrics are the main numerical outputs; each key becomes a dataset under /pipelines//metrics. + metrics = { + "TMI_raw_seg": with_attrs(np.asarray(TMI_seg), {"unit": [""]}), + "TMI_seg": np.asarray(TMI_seg_band), + "RI": np.asarray(RI_seg_band), + "RI_raw": np.asarray(RI_seg), + "RTVI_band": np.asarray(RTVI_seg_band), + "RTVI": np.asarray(RTVI_seg), + } + + # Artifacts can store non-metric outputs (strings, paths, etc.). + + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/arterial_waveform_shape_metrics.py b/src/pipelines/arterial_waveform_shape_metrics.py new file mode 100644 index 0000000..33f94f4 --- /dev/null +++ b/src/pipelines/arterial_waveform_shape_metrics.py @@ -0,0 +1,568 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs + + +@registerPipeline(name="arterial_waveform_shape_metrics") +class ArterialSegExample(ProcessPipeline): + """ + Waveform-shape metrics on per-beat, per-branch, per-radius velocity waveforms. + + Expected segment layout: + v_seg[t, beat, branch, radius] + i.e. v_seg shape: (n_t, n_beats, n_branches, n_radii) + + Outputs + ------- + A) Per-segment (flattened branch x radius): + by_segment/*_segment : shape (n_beats, n_segments) + n_segments = n_branches * n_radii + seg_idx = branch_idx * n_radii + radius_idx (branch-major) + + B) Aggregated: + by_segment/*_branch : shape (n_beats, n_branches) (median over radii) + by_segment/*_global : shape (n_beats,) (mean over all branches & radii) + + C) Independent global metrics (from global waveform path): + global/* : shape (n_beats,) + + Definitions (gain-invariant / shape metrics) + -------------------------------------------- + Rectification: + v <- max(v, 0) with NaNs preserved + + Basic: + tau_M1 = M1 / M0 + tau_M1_over_T = (M1/M0) / T + + RI = 1 - vmin/vmax (robust) + PI = (vmax - vmin) / mean(v) (robust) + + RVTI (paper) = D1 / (D2 + eps), split at 1/2 T (ratio_rvti = 0.5) + + New: + SF_VTI (systolic fraction) = D1_1/3 / (D1_1/3 + D2_2/3 + eps) + where D1_1/3 is integral over first 1/3 of samples, D2_2/3 over remaining 2/3 + + Normalized central moments (shape, not scale): + mu2_norm = mu2 / (M0 * T^2 + eps) (variance-like) + + + with central moments around t_bar = tau_M1: + mu2 = sum(v * (t - t_bar)^2) + + + Quantile timing (on cumulative integral): + C(t) = cumsum(v) / sum(v) + t10_over_T,t25_over_T, t50_over_T,t75_over_T, t90_over_T + + Spectral shape ratios (per beat): + Compute FFT power P(f) of v(t). Define harmonic index h = f * T (cycles/beat). + E_total = sum_{h>=0} P + E_low = sum_{h in [1..H_LOW]} P + E_high = sum_{h in [H_HIGH1..H_HIGH2]} P + Return E_low_over_E_total and E_high_over_E_total + + Default bands: + low: 1..3 harmonics + high: 4..8 harmonics + """ + + description = "Waveform shape metrics (segment + aggregates + global), gain-invariant and robust." + + # Segment inputs + v_raw_segment_input = ( + "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegment/value" + ) + v_band_segment_input = "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegmentBandLimited/value" + + # Global inputs + v_raw_global_input = "/Artery/VelocityPerBeat/VelocitySignalPerBeat/value" + v_band_global_input = ( + "/Artery/VelocityPerBeat/VelocitySignalPerBeatBandLimited/value" + ) + + # Beat period + T_input = "/Artery/VelocityPerBeat/beatPeriodSeconds/value" + + # Parameters + eps = 1e-12 + ratio_rvti = 0.5 # split for RVTI + ratio_sf_vti = 1.0 / 3.0 # split for SF_VTI + + # Spectral bands (harmonic indices, inclusive) + H_LOW_MAX = 3 + H_HIGH_MIN = 4 + H_HIGH_MAX = 8 + + # ------------------------- + # Helpers + # ------------------------- + @staticmethod + def _rectify_keep_nan(x: np.ndarray) -> np.ndarray: + x = np.asarray(x, dtype=float) + return np.where(np.isfinite(x), np.maximum(x, 0.0), np.nan) + + @staticmethod + def _safe_nanmean(x: np.ndarray) -> float: + if x.size == 0 or not np.any(np.isfinite(x)): + return np.nan + return float(np.nanmean(x)) + + @staticmethod + def _safe_nanmedian(x: np.ndarray) -> float: + if x.size == 0 or not np.any(np.isfinite(x)): + return np.nan + return float(np.nanmedian(x)) + + @staticmethod + def _ensure_time_by_beat(v2: np.ndarray, n_beats: int) -> np.ndarray: + """ + Ensure v2 is shaped (n_t, n_beats). If it is (n_beats, n_t), transpose. + """ + v2 = np.asarray(v2, dtype=float) + if v2.ndim != 2: + raise ValueError(f"Expected 2D global waveform, got shape {v2.shape}") + + if v2.shape[1] == n_beats: + return v2 + if v2.shape[0] == n_beats and v2.shape[1] != n_beats: + return v2.T + + # Fallback: if ambiguous, assume (n_t, n_beats) + return v2 + + def _quantile_time_over_T(self, v: np.ndarray, Tbeat: float, q: float) -> float: + """ + v: rectified 1D waveform (NaNs allowed) + Returns t_q / Tbeat where C(t_q) >= q, with C = cumsum(v)/sum(v). + """ + if (not np.isfinite(Tbeat)) or Tbeat <= 0: + return np.nan + + if v.size == 0 or not np.any(np.isfinite(v)): + return np.nan + + vv = np.where(np.isfinite(v), v, 0.0) + m0 = float(np.sum(vv)) + if m0 <= 0: + return np.nan + + c = np.cumsum(vv) / m0 + idx = int(np.searchsorted(c, q, side="left")) + idx = max(0, min(v.size - 1, idx)) + + dt = Tbeat / v.size + t_q = idx * dt + return float(t_q / Tbeat) + + def _spectral_ratios(self, v: np.ndarray, Tbeat: float) -> tuple[float, float]: + """ + Return (E_low/E_total, E_high/E_total) using harmonic-index bands. + """ + if (not np.isfinite(Tbeat)) or Tbeat <= 0: + return np.nan, np.nan + + if v.size == 0 or not np.any(np.isfinite(v)): + return np.nan, np.nan + + vv = np.where(np.isfinite(v), v, 0.0) + + n = vv.size + if n < 2: + return np.nan, np.nan + + # Remove DC? For "shape" we typically keep DC in total energy but exclude it from low/high + # Here: total includes all bins (including DC). Low/high exclude DC by construction (harmonics >= 1). + fs = n / Tbeat # Hz + X = np.fft.rfft(vv) + P = np.abs(X) ** 2 + f = np.fft.rfftfreq(n, d=1.0 / fs) # Hz + h = f * Tbeat # cycles per beat (harmonic index, continuous) + # vv_no_mean = vv - np.mean(vv) + # idx_fund = np.argmax(A[1:]) + 1 + # f1 = f[idx_fund] + # V1 = A[idx_fund] + # if V1 <= 0: + # return np.nan, np.nan, np.nan + # HRI_2_10 = float(np.nan) + """for k in range(2, 11): + target_freq = k * f1 + + # trouver le bin le plus proche + idx = np.argmin(np.abs(f - target_freq)) + + # éviter de sortir du spectre + if idx < len(A): + HRI_2_10 += A[idx] / V1""" + E_total = float(np.sum(P)) + if not np.isfinite(E_total) or E_total <= 0: + return np.nan, np.nan, np.nan + + low_mask = (h >= 1.0) & (h <= float(self.H_LOW_MAX)) + high_mask = (h >= float(self.H_HIGH_MIN)) & (h <= float(self.H_HIGH_MAX)) + + E_low = float(np.sum(P[low_mask])) + E_high = float(np.sum(P[high_mask])) + + return float(E_low / E_total), float(E_high / E_total) + + def _compute_metrics_1d(self, v: np.ndarray, Tbeat: float) -> dict: + """ + Canonical metric kernel: compute all waveform-shape metrics from a single 1D waveform v(t). + Returns a dict of scalar metrics (floats). + """ + v = self._rectify_keep_nan(v) + n = int(v.size) + if n <= 0: + return {k: np.nan for k in self._metric_keys()} + + # If Tbeat invalid, many metrics become NaN + if (not np.isfinite(Tbeat)) or Tbeat <= 0: + return {k: np.nan for k in self._metric_keys()} + + vv = np.where( + np.isfinite(v), v, np.nan + ) # vv = np.where(np.isfinite(v), v, 0.0) + m0 = float(np.nansum(vv)) + if m0 <= 0: + return {k: np.nan for k in self._metric_keys()} + + dt = Tbeat / n + t = np.arange(n, dtype=float) * dt + + # First moment + m1 = float(np.nansum(vv * t)) + tau_M1 = m1 / m0 + tau_M1_over_T = tau_M1 / Tbeat + + # RI / PI robust + vmax = float(np.nanmax(vv)) + vmin = float(np.nanmin(v)) # vmin = float(np.min(vv)) + meanv = float(self._safe_nanmean(v)) # meanv = float(np.mean(vv)) + + if vmax <= 0: + RI = np.nan + PI = np.nan + else: + RI = 1.0 - (vmin / vmax) + RI = float(np.clip(RI, 0.0, 1.0)) if np.isfinite(RI) else np.nan + + if (not np.isfinite(meanv)) or meanv <= 0: + PI = np.nan + else: + PI = (vmax - vmin) / meanv + PI = float(PI) if np.isfinite(PI) else np.nan + + # RVTI (paper, split 1/2) + k_rvti = int(np.ceil(n * self.ratio_rvti)) + k_rvti = max(0, min(n, k_rvti)) + D1_rvti = float(np.sum(vv[:k_rvti])) if k_rvti > 0 else np.nan + D2_rvti = float(np.sum(vv[k_rvti:])) if k_rvti < n else np.nan + RVTI = D1_rvti / (D2_rvti + self.eps) + + # SF_VTI (split 1/3 vs 2/3) + k_sf = int(np.ceil(n * self.ratio_sf_vti)) + k_sf = max(0, min(n, k_sf)) + D1_sf = float(np.nansum(vv[:k_sf])) if k_sf > 0 else np.nan + D2_sf = float(np.nansum(vv[k_sf:])) if k_sf < n else np.nan + SF_VTI = D1_sf / (D1_sf + D2_sf + self.eps) + + # Central moments around tau_M1 (t_bar) + # mu2 = sum(v*(t-tau)^2) + dtau = t - tau_M1 + mu2 = float(np.nansum(vv * (dtau**2))) + tau_M2 = np.sqrt(mu2 / m0 + self.eps) + tau_M2_over_T = tau_M2 / Tbeat + + # Quantile timing features (on cumulative integral) + t10_over_T = self._quantile_time_over_T(vv, Tbeat, 0.10) + t25_over_T = self._quantile_time_over_T(vv, Tbeat, 0.25) + t50_over_T = self._quantile_time_over_T(vv, Tbeat, 0.50) + t75_over_T = self._quantile_time_over_T(vv, Tbeat, 0.75) + t90_over_T = self._quantile_time_over_T(vv, Tbeat, 0.90) + + # Spectral ratios + E_low_over_E_total, E_high_over_E_total = self._spectral_ratios(vv, Tbeat) + + return { + "tau_M1": float(tau_M1), + "tau_M1_over_T": float(tau_M1_over_T), + "RI": float(RI) if np.isfinite(RI) else np.nan, + "PI": float(PI) if np.isfinite(PI) else np.nan, + "R_VTI": float(RVTI), + "SF_VTI": float(SF_VTI), + "tau_M2_over_T": float(tau_M2_over_T), + "tau_M2": float(tau_M2), + "t10_over_T": float(t10_over_T), + "t25_over_T": float(t25_over_T), + "t50_over_T": float(t50_over_T), + "t75_over_T": float(t75_over_T), + "t90_over_T": float(t90_over_T), + "E_low_over_E_total": float(E_low_over_E_total), + "E_high_over_E_total": float(E_high_over_E_total), + # "HRI_2_10_total": float(HRI_2_10_total), + } + + @staticmethod + def _metric_keys() -> list[str]: + return [ + "tau_M1", + "tau_M1_over_T", + "RI", + "PI", + "R_VTI", + "SF_VTI", + "tau_M2_over_T", + "tau_M2", + "t10_over_T", + "t25_over_T", + "t50_over_T", + "t75_over_T", + "t90_over_T", + "E_low_over_E_total", + "E_high_over_E_total", + # "HRI_2_10_total", + ] + + def _compute_block_segment(self, v_block: np.ndarray, T: np.ndarray): + """ + v_block: (n_t, n_beats, n_branches, n_radii) + Returns: + per-segment arrays: (n_beats, n_segments) + per-branch arrays: (n_beats, n_branches) (median over radii) + global arrays: (n_beats,) (mean over all branches & radii) + """ + if v_block.ndim != 4: + raise ValueError( + f"Expected (n_t,n_beats,n_branches,n_radii), got {v_block.shape}" + ) + + n_t, n_beats, n_branches, n_radii = v_block.shape + n_segments = n_branches * n_radii + + # Allocate per metric + seg = { + k: np.full((n_beats, n_segments), np.nan, dtype=float) + for k in self._metric_keys() + } + br = { + k: np.full((n_beats, n_branches), np.nan, dtype=float) + for k in self._metric_keys() + } + gl = {k: np.full((n_beats,), np.nan, dtype=float) for k in self._metric_keys()} + + for beat_idx in range(n_beats): + Tbeat = float(T[0][beat_idx]) + + # For global aggregate at this beat + gl_vals = {k: [] for k in self._metric_keys()} + + for branch_idx in range(n_branches): + # For branch aggregate over radii + br_vals = {k: [] for k in self._metric_keys()} + + for radius_idx in range(n_radii): + v = v_block[:, beat_idx, branch_idx, radius_idx] + m = self._compute_metrics_1d(v, Tbeat) + + seg_idx = branch_idx * n_radii + radius_idx + for k in self._metric_keys(): + seg[k][beat_idx, seg_idx] = m[k] + br_vals[k].append(m[k]) + gl_vals[k].append(m[k]) + + # Branch aggregates: median over radii (nanmedian) + for k in self._metric_keys(): + br[k][beat_idx, branch_idx] = self._safe_nanmedian( + np.asarray(br_vals[k], dtype=float) + ) + + # Global aggregates: mean over all branches & radii (nanmean) + for k in self._metric_keys(): + gl[k][beat_idx] = self._safe_nanmean( + np.asarray(gl_vals[k], dtype=float) + ) + + seg_order_note = ( + "seg_idx = branch_idx * n_radii + radius_idx (branch-major flattening)" + ) + return seg, br, gl, n_branches, n_radii, seg_order_note + + def _compute_block_global(self, v_global: np.ndarray, T: np.ndarray): + """ + v_global: (n_t, n_beats) after _ensure_time_by_beat + Returns dict of arrays each shaped (n_beats,) + """ + n_beats = int(T.shape[1]) + v_global = self._ensure_time_by_beat(v_global, n_beats) + v_global = self._rectify_keep_nan(v_global) + + out = {k: np.full((n_beats,), np.nan, dtype=float) for k in self._metric_keys()} + + for beat_idx in range(n_beats): + Tbeat = float(T[0][beat_idx]) + v = v_global[:, beat_idx] + m = self._compute_metrics_1d(v, Tbeat) + for k in self._metric_keys(): + out[k][beat_idx] = m[k] + + return out + + # ------------------------- + # Pipeline entrypoint + # ------------------------- + def run(self, h5file) -> ProcessResult: + T = np.asarray(h5file[self.T_input]) + metrics = {} + + # ------------------------- + # Segment metrics (raw + bandlimited) + # ------------------------- + have_seg = (self.v_raw_segment_input in h5file) and ( + self.v_band_segment_input in h5file + ) + if have_seg: + v_raw_seg = np.asarray(h5file[self.v_raw_segment_input]) + v_band_seg = np.asarray(h5file[self.v_band_segment_input]) + + seg_b, br_b, gl_b, nb_b, nr_b, seg_note_b = self._compute_block_segment( + v_band_seg, T + ) + seg_r, br_r, gl_r, nb_r, nr_r, seg_note_r = self._compute_block_segment( + v_raw_seg, T + ) + + seg_note = seg_note_b + if (nb_b != nb_r) or (nr_b != nr_r): + seg_note = ( + seg_note_b + " | WARNING: raw/band branch/radius dims differ." + ) + + # Helper to pack dict-of-arrays into HDF5 metric keys + def pack(prefix: str, d: dict, attrs_common: dict): + for k, arr in d.items(): + metrics[f"{prefix}/{k}"] = with_attrs(arr, attrs_common) + + # Per-segment outputs (compat dataset names) + pack( + "by_segment/bandlimited_segment", + { + "tau_M1": seg_b["tau_M1"], + "tau_M1_over_T": seg_b["tau_M1_over_T"], + "RI": seg_b["RI"], + "PI": seg_b["PI"], + "R_VTI": seg_b["R_VTI"], + "SF_VTI": seg_b["SF_VTI"], + "tau_M2_over_T": seg_b["tau_M2_over_T"], + "tau_M2": seg_b["tau_M2"], + "t10_over_T": seg_b["t10_over_T"], + "t25_over_T": seg_b["t25_over_T"], + "t50_over_T": seg_b["t50_over_T"], + "t75_over_T": seg_b["t75_over_T"], + "t90_over_T": seg_b["t90_over_T"], + "E_low_over_E_total": seg_b["E_low_over_E_total"], + "E_high_over_E_total": seg_b["E_high_over_E_total"], + # "HRI_2_10_total": seg_b["HRI_2_10_total"], + }, + { + "segment_indexing": [seg_note], + }, + ) + pack( + "by_segment/raw_segment", + { + "tau_M1": seg_r["tau_M1"], + "tau_M1_over_T": seg_r["tau_M1_over_T"], + "RI": seg_r["RI"], + "PI": seg_r["PI"], + "R_VTI": seg_r["R_VTI"], + "SF_VTI": seg_r["SF_VTI"], + "tau_M2_over_T": seg_r["tau_M2_over_T"], + "tau_M2": seg_r["tau_M2"], + "t10_over_T": seg_r["t10_over_T"], + "t25_over_T": seg_r["t25_over_T"], + "t50_over_T": seg_r["t50_over_T"], + "t75_over_T": seg_r["t75_over_T"], + "t90_over_T": seg_r["t90_over_T"], + "E_low_over_E_total": seg_r["E_low_over_E_total"], + "E_high_over_E_total": seg_r["E_high_over_E_total"], + # "HRI_2_10_total": seg_r["HRI_2_10_total"], + }, + { + "segment_indexing": [seg_note], + }, + ) + + # Branch aggregates (median over radii) + pack( + "by_segment/bandlimited_branch", + br_b, + {"definition": ["median over radii per branch"]}, + ) + pack( + "by_segment/raw_branch", + br_r, + {"definition": ["median over radii per branch"]}, + ) + + # Global aggregates (mean over all branches & radii) + pack( + "by_segment/bandlimited_global", + gl_b, + {"definition": ["mean over branches and radii"]}, + ) + pack( + "by_segment/raw_global", + gl_r, + {"definition": ["mean over branches and radii"]}, + ) + + # Store parameters used (for provenance) + metrics["by_segment/params/ratio_rvti"] = np.asarray( + self.ratio_rvti, dtype=float + ) + metrics["by_segment/params/ratio_sf_vti"] = np.asarray( + self.ratio_sf_vti, dtype=float + ) + metrics["by_segment/params/eps"] = np.asarray(self.eps, dtype=float) + metrics["by_segment/params/H_LOW_MAX"] = np.asarray( + self.H_LOW_MAX, dtype=int + ) + metrics["by_segment/params/H_HIGH_MIN"] = np.asarray( + self.H_HIGH_MIN, dtype=int + ) + metrics["by_segment/params/H_HIGH_MAX"] = np.asarray( + self.H_HIGH_MAX, dtype=int + ) + + # ------------------------- + # Independent global metrics (raw + bandlimited) + # ------------------------- + have_glob = (self.v_raw_global_input in h5file) and ( + self.v_band_global_input in h5file + ) + if have_glob: + v_raw_gl = np.asarray(h5file[self.v_raw_global_input]) + v_band_gl = np.asarray(h5file[self.v_band_global_input]) + + out_raw = self._compute_block_global(v_raw_gl, T) + out_band = self._compute_block_global(v_band_gl, T) + + for k in self._metric_keys(): + metrics[f"global/raw/{k}"] = out_raw[k] + metrics[f"global/bandlimited/{k}"] = out_band[k] + + # provenance + metrics["global/params/ratio_rvti"] = np.asarray( + self.ratio_rvti, dtype=float + ) + metrics["global/params/ratio_sf_vti"] = np.asarray( + self.ratio_sf_vti, dtype=float + ) + metrics["global/params/eps"] = np.asarray(self.eps, dtype=float) + metrics["global/params/H_LOW_MAX"] = np.asarray(self.H_LOW_MAX, dtype=int) + metrics["global/params/H_HIGH_MIN"] = np.asarray(self.H_HIGH_MIN, dtype=int) + metrics["global/params/H_HIGH_MAX"] = np.asarray(self.H_HIGH_MAX, dtype=int) + + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/basic_stats.py b/src/pipelines/basic_stats.py index 934e69a..9c14b24 100644 --- a/src/pipelines/basic_stats.py +++ b/src/pipelines/basic_stats.py @@ -1,16 +1,15 @@ -from typing import Optional - import h5py import numpy as np -from .core.base import ProcessPipeline, ProcessResult +from .core.base import ProcessPipeline, ProcessResult, registerPipeline +@registerPipeline(name="Basic Stats") class BasicStats(ProcessPipeline): description = "Min / Max / Mean / Std over the first dataset found in the file." - def _first_dataset(self, h5file: h5py.File) -> Optional[h5py.Dataset]: - found: Optional[h5py.Dataset] = None + def _first_dataset(self, h5file: h5py.File) -> h5py.Dataset | None: + found: h5py.Dataset | None = None def visitor(_name: str, obj: h5py.Dataset) -> None: nonlocal found @@ -28,7 +27,13 @@ def run(self, h5file: h5py.File) -> ProcessResult: finite = data[np.isfinite(data)] arr = finite if finite.size > 0 else data if arr.size == 0: - metrics = {"count": 0, "min": float("nan"), "max": float("nan"), "mean": float("nan"), "std": float("nan")} + metrics = { + "count": 0, + "min": float("nan"), + "max": float("nan"), + "mean": float("nan"), + "std": float("nan"), + } else: metrics = { "count": float(arr.size), @@ -37,4 +42,4 @@ def run(self, h5file: h5py.File) -> ProcessResult: "mean": float(np.mean(arr)), "std": float(np.std(arr)), } - return ProcessResult(metrics=metrics, artifacts={"dataset": dataset.name}) + return ProcessResult(metrics=metrics, attrs={"dataset": dataset.name}) diff --git a/src/pipelines/core/__init__.py b/src/pipelines/core/__init__.py index 466a80c..3826dcc 100644 --- a/src/pipelines/core/__init__.py +++ b/src/pipelines/core/__init__.py @@ -1,4 +1,4 @@ -from .base import ProcessPipeline, ProcessResult +from .base import MissingPipeline, ProcessPipeline, ProcessResult from .utils import ( safe_h5_key, write_combined_results_h5, @@ -7,6 +7,7 @@ __all__ = [ "ProcessPipeline", + "MissingPipeline", "ProcessResult", "safe_h5_key", "write_result_h5", diff --git a/src/pipelines/core/base.py b/src/pipelines/core/base.py index 22201fa..7a26039 100644 --- a/src/pipelines/core/base.py +++ b/src/pipelines/core/base.py @@ -1,17 +1,49 @@ import csv -from dataclasses import dataclass -from typing import Any, Dict, Optional +import importlib.util +from dataclasses import dataclass, field +from typing import Any import h5py +# Global Registry of all imports needed by the pipelines +PIPELINE_REGISTRY: dict[str, type["ProcessPipeline"]] = {} + + +# Decorator to register all neede pipelines +def registerPipeline( + name: str, description: str = "", required_deps: list[str] | None = None +): + def decorator(cls): + # metadata for the class + cls.name = name + cls.description = description or getattr(cls, "description", "") + cls.requires = required_deps or [] + + # Check if requirements are missing in the current environment + missing = [] + for req in cls.requires: + # TODO: We should maybe include the version check + # RM the version "torch>=2.0" -> "torch" + pkg = req.split(">")[0].split("=")[0].split("<")[0].strip() + + if importlib.util.find_spec(pkg) is None: + missing.append(pkg) + + cls.missing_deps = missing + cls.available = len(missing) == 0 + + # Add to registry + PIPELINE_REGISTRY[name] = cls + return cls + + return decorator + @dataclass class ProcessResult: - metrics: Dict[str, Any] - artifacts: Optional[Dict[str, Any]] = None - attrs: Optional[Dict[str, Any]] = None # attributes stored on the pipeline group - file_attrs: Optional[Dict[str, Any]] = None # attributes stored on the root H5 file - output_h5_path: Optional[str] = None + metrics: dict[str, Any] + attrs: dict[str, Any] | None = None # attributes stored on the pipeline group + output_h5_path: str | None = None @dataclass @@ -19,21 +51,54 @@ class DatasetValue: """Represents a dataset payload plus optional attributes for that dataset.""" data: Any - attrs: Optional[Dict[str, Any]] = None + attrs: dict[str, Any] | None = None -def with_attrs(data: Any, attrs: Dict[str, Any]) -> DatasetValue: +def with_attrs(data: Any, attrs: dict[str, Any]) -> DatasetValue: """Convenience helper to attach attributes to a dataset value.""" return DatasetValue(data=data, attrs=attrs) +# +==========================================================================+ # +# | PIPELINES CLASSES | # +# +==========================================================================+ # + + +@dataclass +class PipelineDescriptor: + name: str + description: str + available: bool + # To avoid Python Mutable Default Arguments + requires: list[str] = field(default_factory=list) + missing_deps: list[str] = field(default_factory=list) + pipeline_cls: type["ProcessPipeline"] | None = None + error_msg: str = "" + + def instantiate(self) -> "ProcessPipeline": + """Factory method to create the actual pipeline instance.""" + if not self.available or self.pipeline_cls is None: + return MissingPipeline( + self.name, + self.error_msg or self.description, + self.missing_deps, + self.requires, + ) + return self.pipeline_cls() + + class ProcessPipeline: - description: str = "" + name: str + description: str + available: bool + missing_deps: list[str] + requires: list[str] def __init__(self) -> None: # Derive the pipeline name from the module filename (e.g., basic_stats.py -> basic_stats). - module_name = (self.__class__.__module__ or "").rsplit(".", 1)[-1] - self.name: str = module_name or self.__class__.__name__ + if not getattr(self, "name", None): + module_name = (self.__class__.__module__ or "").rsplit(".", 1)[-1] + self.name: str = module_name or self.__class__.__name__ def run(self, h5file: h5py.File) -> ProcessResult: raise NotImplementedError @@ -46,3 +111,26 @@ def export(self, result: ProcessResult, output_path: str) -> str: for key, value in result.metrics.items(): writer.writerow([key, value]) return output_path + + +class MissingPipeline(ProcessPipeline): + """Placeholder for pipelines whose dependencies are missing.""" + + available = False + + def __init__( + self, name: str, description: str, missing_deps: list[str], requires: list[str] + ) -> None: + # super().__init__() + self.name = name + self.description = description or "Pipeline unavailable (missing dependencies)." + self.missing_deps = missing_deps + self.requires = requires + + def run(self, h5file): + missing = ", ".join( + self.missing_deps or self.requires or ["unknown dependency"] + ) + raise ImportError( + f"Pipeline '{self.name}' unavailable. Missing dependencies: {missing}" + ) diff --git a/src/pipelines/core/errors.py b/src/pipelines/core/errors.py new file mode 100644 index 0000000..70bea7d --- /dev/null +++ b/src/pipelines/core/errors.py @@ -0,0 +1,65 @@ +import inspect +import linecache +import traceback +from pathlib import Path + +from .base import ProcessPipeline + + +def _resolve_path(path: str | None) -> Path | None: + if not path: + return None + try: + return Path(path).resolve() + except (OSError, RuntimeError, ValueError): + return None + + +def _shorten_path(path: str) -> str: + try: + resolved = Path(path).resolve() + return str(resolved.relative_to(Path.cwd())) + except (OSError, RuntimeError, ValueError): + return path + + +def _pick_relevant_frame( + frames: list[traceback.FrameSummary], + pipeline_path: Path | None, +) -> traceback.FrameSummary: + if pipeline_path is not None: + for frame in reversed(frames): + frame_path = _resolve_path(frame.filename) + if frame_path and frame_path == pipeline_path: + return frame + for frame in reversed(frames): + if "pipelines" in Path(frame.filename).parts: + return frame + return frames[-1] + + +def format_pipeline_exception( + exc: BaseException, pipeline: ProcessPipeline | None = None +) -> str: + """ + Format an exception raised while running a pipeline, highlighting the + most relevant line in the pipeline code when possible. + """ + summary = f"{type(exc).__name__}: {exc}" + label = f"Pipeline '{pipeline.name}'" if pipeline is not None else "Pipeline" + + frames = traceback.extract_tb(exc.__traceback__) + if not frames: + return f"{label} failed: {summary}" + + pipeline_path = None + if pipeline is not None: + pipeline_path = _resolve_path(inspect.getsourcefile(pipeline.__class__)) + + target = _pick_relevant_frame(frames, pipeline_path) + location = f"{_shorten_path(target.filename)}:{target.lineno} in {target.name}()" + line = (target.line or linecache.getline(target.filename, target.lineno)).strip() + + if line: + return f"{label} failed: {summary}\n at {location}\n {line}\n ^" + return f"{label} failed: {summary}\n at {location}" diff --git a/src/pipelines/core/utils.py b/src/pipelines/core/utils.py index 0eb852e..1ce8f4c 100644 --- a/src/pipelines/core/utils.py +++ b/src/pipelines/core/utils.py @@ -1,5 +1,5 @@ +from collections.abc import Sequence from pathlib import Path -from typing import Optional, Sequence, Tuple, Union import h5py import numpy as np @@ -16,7 +16,7 @@ def safe_h5_key(name: str) -> str: return cleaned or "pipeline" -def _copy_input_contents(source_file: Optional[Union[str, Path]], dest: h5py.File) -> None: +def _copy_input_contents(source_file: str | Path | None, dest: h5py.File) -> None: """Copy all attributes and top-level objects from the input H5 into dest.""" if not source_file: return @@ -32,7 +32,11 @@ def _copy_input_contents(source_file: Optional[Union[str, Path]], dest: h5py.Fil def _ensure_pipelines_group(h5file: h5py.File) -> h5py.Group: """Return a pipelines group, creating it when missing.""" - return h5file["pipelines"] if "pipelines" in h5file else h5file.create_group("pipelines") + return ( + h5file["Pipelines"] + if "Pipelines" in h5file + else h5file.create_group("Pipelines") + ) def _create_unique_group(parent: h5py.Group, base_name: str) -> h5py.Group: @@ -45,6 +49,35 @@ def _create_unique_group(parent: h5py.Group, base_name: str) -> h5py.Group: return parent.create_group(candidate) +def _resolve_dataset_target(root_group: h5py.Group, key: str) -> tuple[h5py.Group, str]: + """ + Resolve a metric key to (parent_group, dataset_name). + + Supports nested paths like "vesselA/tauH_10" under the provided root group. + Intermediate groups are created on demand. + """ + normalized_key = str(key).replace("\\", "/").strip("/") + parts = [part for part in normalized_key.split("/") if part] + if not parts: + raise ValueError("Dataset key cannot be empty.") + + parent = root_group + for part in parts[:-1]: + existing = parent.get(part) + if existing is None: + parent = parent.create_group(part) + continue + if isinstance(existing, h5py.Group): + parent = existing + continue + raise ValueError( + f"Cannot create subgroup '{part}' for key '{key}': a dataset already exists at that path." + ) + + dataset_name = parts[-1] + return parent, dataset_name + + def _write_value_dataset(group: h5py.Group, key: str, value) -> None: """ Create a dataset under group for the given value. @@ -63,17 +96,21 @@ def _write_value_dataset(group: h5py.Group, key: str, value) -> None: elif isinstance(value, tuple) and len(value) == 2 and isinstance(value[1], dict): data, ds_attrs = value + target_group, dataset_key = _resolve_dataset_target(group, str(key)) + if isinstance(data, str): - dataset = group.create_dataset(key, data=data, dtype=h5py.string_dtype(encoding="utf-8")) + dataset = target_group.create_dataset( + dataset_key, data=data, dtype=h5py.string_dtype(encoding="utf-8") + ) else: payload = data if isinstance(data, (list, tuple)): payload = np.asarray(data) try: - dataset = group.create_dataset(key, data=payload) + dataset = target_group.create_dataset(dataset_key, data=payload) except (TypeError, ValueError): - dataset = group.create_dataset( - key, data=str(data), dtype=h5py.string_dtype(encoding="utf-8") + dataset = target_group.create_dataset( + dataset_key, data=str(data), dtype=h5py.string_dtype(encoding="utf-8") ) if ds_attrs: @@ -81,7 +118,7 @@ def _write_value_dataset(group: h5py.Group, key: str, value) -> None: _set_attr_safe(dataset, attr_key, attr_val) -def _set_attr_safe(h5obj: Union[h5py.File, h5py.Group], key: str, value) -> None: +def _set_attr_safe(h5obj: h5py.File | h5py.Group, key: str, value) -> None: """ Set an attribute on a file or group, falling back to string when the type is unsupported. """ @@ -102,9 +139,9 @@ def _set_attr_safe(h5obj: Union[h5py.File, h5py.Group], key: str, value) -> None def write_result_h5( result: ProcessResult, - path: Union[Path, str], + path: Path | str, pipeline_name: str, - source_file: Optional[str] = None, + source_file: str | None = None, ) -> str: """ Write pipeline results to an HDF5 file. @@ -112,8 +149,8 @@ def write_result_h5( Attributes: pipeline: pipeline display name. source_file: optional path to the originating HDF5 input. - metrics: stored under /metrics/. - artifacts: stored under /artifacts/ when present. + metrics: stored under /Pipelines//, + supporting nested paths in keys. """ out_path = Path(path) out_path.parent.mkdir(parents=True, exist_ok=True) @@ -123,11 +160,6 @@ def write_result_h5( f.attrs["pipeline"] = pipeline_name if source_file: f.attrs["source_file"] = source_file - if result.file_attrs: - for key, value in result.file_attrs.items(): - if key in {"pipeline", "source_file"}: - continue - _set_attr_safe(f, key, value) pipelines_grp = _ensure_pipelines_group(f) pipeline_grp = _create_unique_group(pipelines_grp, safe_h5_key(pipeline_name)) pipeline_grp.attrs["pipeline"] = pipeline_name @@ -136,25 +168,20 @@ def write_result_h5( if key == "pipeline": continue _set_attr_safe(pipeline_grp, key, value) - metrics_grp = pipeline_grp.create_group("metrics") for key, value in result.metrics.items(): - _write_value_dataset(metrics_grp, key, value) - if result.artifacts: - artifacts_grp = pipeline_grp.create_group("artifacts") - for key, value in result.artifacts.items(): - _write_value_dataset(artifacts_grp, key, value) + _write_value_dataset(pipeline_grp, key, value) return str(out_path) def write_combined_results_h5( - results: Sequence[Tuple[str, ProcessResult]], - path: Union[Path, str], - source_file: Optional[str] = None, + results: Sequence[tuple[str, ProcessResult]], + path: Path | str, + source_file: str | None = None, ) -> str: """ Write multiple pipeline results into a single HDF5 file. - The file groups results under /pipelines//{metrics,artifacts}. + The file groups results under /Pipelines//. """ out_path = Path(path) out_path.parent.mkdir(parents=True, exist_ok=True) @@ -164,18 +191,15 @@ def write_combined_results_h5( f.attrs["source_file"] = source_file pipelines_grp = _ensure_pipelines_group(f) for pipeline_name, result in results: - pipeline_grp = _create_unique_group(pipelines_grp, safe_h5_key(pipeline_name)) + pipeline_grp = _create_unique_group( + pipelines_grp, safe_h5_key(pipeline_name) + ) pipeline_grp.attrs["pipeline"] = pipeline_name if result.attrs: for key, value in result.attrs.items(): if key == "pipeline": continue _set_attr_safe(pipeline_grp, key, value) - metrics_grp = pipeline_grp.create_group("metrics") for key, value in result.metrics.items(): - _write_value_dataset(metrics_grp, key, value) - if result.artifacts: - artifacts_grp = pipeline_grp.create_group("artifacts") - for key, value in result.artifacts.items(): - _write_value_dataset(artifacts_grp, key, value) + _write_value_dataset(pipeline_grp, key, value) return str(out_path) diff --git a/src/pipelines/dummy_heavy.py b/src/pipelines/dummy_heavy.py index 3b0111d..cdf442d 100644 --- a/src/pipelines/dummy_heavy.py +++ b/src/pipelines/dummy_heavy.py @@ -6,16 +6,19 @@ showing the missing deps. """ -REQUIRES = ["torch>=2.2", "pandas>=2.1"] +# REQUIRES = ["torch>=2.2", "pandas>=2.1"] -from .core.base import ProcessPipeline, ProcessResult +from .core.base import ProcessPipeline, ProcessResult, registerPipeline +@registerPipeline( + name="Dummy Heavy", + description="Demo pipeline that requires torch+pandas; computes a trivial metric.", + required_deps=["torch>=2.2", "pandas>=2.1"], +) class DummyHeavy(ProcessPipeline): - description = "Demo pipeline that requires torch+pandas; computes a trivial metric." - - def run(self, _h5file) -> ProcessResult: - import torch # noqa: F401 - import pandas as pd # noqa: F401 + def run(self, h5file) -> ProcessResult: + import pandas as pd # type: ignore # noqa: F401 + import torch # type: ignore # noqa: F401 return ProcessResult(metrics={"dummy": 1}) diff --git a/src/pipelines/modal_analysis.py b/src/pipelines/modal_analysis.py new file mode 100644 index 0000000..5ff20a9 --- /dev/null +++ b/src/pipelines/modal_analysis.py @@ -0,0 +1,112 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs + + +@registerPipeline(name="modal_analysis") +class ArterialExample(ProcessPipeline): + """ + Tutorial pipeline showing the full surface area of a pipeline: + + - Subclass ProcessPipeline and implement `run(self, h5file) -> ProcessResult`. + - Return metrics (scalars, vectors, matrices, cubes) and optional artifacts. + - Attach HDF5 attributes to any metric via `with_attrs(data, attrs_dict)`. + - Add attributes to the pipeline group (`attrs`) or root file (`file_attrs`). + - No input data is required; this pipeline is purely illustrative. + """ + + description = "Tutorial: metrics + artifacts + dataset attrs + file/pipeline attrs." + M0_input = "/moment0" + M1_input = "/moment1" + M2_input = "/moment2" + registration_input = "/registration" + + def run(self, h5file) -> ProcessResult: + from scipy.sparse.linalg import svds + + moment_0 = np.asarray(h5file[self.M0_input]) + moment_1 = np.asarray(h5file[self.M1_input]) + moment_2 = np.asarray(h5file[self.M2_input]) + + M0_matrix = [] + M1_matrix = [] + M2_matrix = [] + M2_over_M0_squared = [] + x_size = len(moment_0[0, 0, :, 0]) + y_size = len(moment_0[0, 0, 0, :]) + for time_idx in range(len(moment_0[:, 0, 0, 0])): + M0_matrix_time = [] + M1_matrix_time = [] + M2_matrix_time = [] + M2_over_M0_squared_time = [] + for x_idx in range(x_size): + for y_idx in range(y_size): + M0 = moment_0[time_idx, 0, x_idx, y_idx] + + M2 = moment_2[time_idx, 0, x_idx, y_idx] + M0_matrix_time.append(M0) + M2_over_M0_squared_time.append(np.sqrt(M2 / M0)) + + M1_matrix_time.append(moment_1[time_idx, 0, x_idx, y_idx]) + + M2_matrix_time.append(moment_2[time_idx, 0, x_idx, y_idx]) + + M0_matrix.append(M0_matrix_time) + M1_matrix.append(M1_matrix_time) + M2_matrix.append(M2_matrix_time) + M2_over_M0_squared.append(M2_over_M0_squared_time) + M0_matrix = np.transpose(np.asarray(M0_matrix)) + M2_over_M0_squared = np.transpose(np.asarray(M2_over_M0_squared)) + n_modes = 20 + U_0, S_0, Vt_0 = svds(M0_matrix, k=n_modes) + + idx = np.argsort(S_0)[::-1] + S_0 = S_0[idx] + U_0 = U_0[:, idx] + Vt_0 = Vt_0[idx, :] + + spatial_modes = [] + for mode_idx in range(len(U_0[0])): + spatial_modes.append(U_0[:, mode_idx].reshape(x_size, y_size)) + + # M2 over M0 + + U_M2_over_M0_squared, S_M2_over_M0_squared, Vt_M2_over_M0_squared = svds( + M2_over_M0_squared, k=n_modes + ) + idx = np.argsort(S_M2_over_M0_squared)[::-1] + S_M2_over_M0_squared = S_M2_over_M0_squared[idx] + U_M2_over_M0_squared = U_M2_over_M0_squared[:, idx] + Vt_M2_over_M0_squared = Vt_M2_over_M0_squared[idx, :] + spatial_modes_M2_over_M0_squared = [] + + for mode_idx in range(len(U_M2_over_M0_squared[0])): + spatial_modes_M2_over_M0_squared.append( + U_M2_over_M0_squared[:, mode_idx].reshape(x_size, y_size) + ) + + # Metrics are the main numerical outputs; each key becomes a dataset under /pipelines//metrics. + metrics = { + "Vt_moment0": with_attrs(np.asarray(Vt_0), {"unit": [""]}), + "spatial_modes_moment0": with_attrs( + np.asarray(spatial_modes), {"unit": [""]} + ), + "S_moment0": with_attrs(np.asarray(S_0), {"unit": [""]}), + "U_moment0": with_attrs(np.asarray(U_0), {"unit": [""]}), + "Vt_M2_over_M0_squared": with_attrs( + np.asarray(Vt_M2_over_M0_squared), {"unit": [""]} + ), + "spatial_modes_M2_over_M0_squared": with_attrs( + np.asarray(spatial_modes_M2_over_M0_squared), {"unit": [""]} + ), + "S_M2_over_M0_squared": with_attrs( + np.asarray(S_M2_over_M0_squared), {"unit": [""]} + ), + "U_M2_over_M0_squared": with_attrs( + np.asarray(U_M2_over_M0_squared), {"unit": [""]} + ), + } + + # Artifacts can store non-metric outputs (strings, paths, etc.). + + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/old_seg_metrics.py b/src/pipelines/old_seg_metrics.py new file mode 100644 index 0000000..cee5908 --- /dev/null +++ b/src/pipelines/old_seg_metrics.py @@ -0,0 +1,634 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs + + +@registerPipeline(name="old_waveform_shape_metrics") +class ArterialSegExample(ProcessPipeline): + """ + Waveform-shape metrics on per-beat, per-branch, per-radius velocity waveforms. + + Expected v_block layout: + v_block[:, beat_idx, branch_idx, radius_idx] + i.e. v_block shape: (n_t, n_beats, n_branches, n_radii) + + Outputs + ------- + A) Per-segment (flattened branch×radius): + *_segment : shape (n_beats, n_segments) + where n_segments = n_branches * n_radii and + seg_idx = branch_idx * n_radii + radius_idx (branch-major) + + B) Aggregated: + *_branch : shape (n_beats, n_branches) (median over radii) + *_global : shape (n_beats,) (mean over all branches & radii) + + Metric definitions + ------------------ + - Rectification: v <- max(v, 0) (NaNs preserved) + - tau_M1: first moment time / zeroth moment on rectified waveform + tau_M1 = M1/M0, M0 = sum(v), M1 = sum(v * t_k), t_k = k * (Tbeat/n_t) + - tau_M1_over_T: (tau_M1 / Tbeat) + - RI (robust): RI = 1 - vmin/vmax with guards for vmax<=0 or all-NaN + - R_VTI_*: kept dataset name for compatibility, but uses PAPER convention: + RVTI = D1 / (D2 + eps) + D1 = sum(v[0:k]), D2 = sum(v[k:n_t]), k = ceil(n_t * ratio), ratio=0.5 + """ + + description = ( + "Segment waveform shape metrics (tau, RI, RVTI) + branch/global aggregates." + ) + + v_raw_global_input = "/Artery/VelocityPerBeat/VelocitySignalPerBeat/value" + v_bandlimited_global_input = ( + "/Artery/VelocityPerBeat/VelocitySignalPerBeatBandLimited/value" + ) + v_bandlimited_global_max_input = ( + "/Artery/VelocityPerBeat/VmaxPerBeatBandLimited/value" + ) + v_bandlimited_global_min_input = ( + "/Artery/VelocityPerBeat/VminPerBeatBandLimited/value" + ) + T_input = "/Artery/VelocityPerBeat/beatPeriodSeconds/value" + + @staticmethod + def _rectify_keep_nan(x: np.ndarray) -> np.ndarray: + x = np.asarray(x, dtype=float) + return np.where(np.isfinite(x), np.maximum(x, 0.0), np.nan) + + @staticmethod + def _safe_nanmean(x: np.ndarray) -> float: + if x.size == 0 or not np.any(np.isfinite(x)): + return np.nan + return float(np.nanmean(x)) + + @staticmethod + def _safe_nanmedian(x: np.ndarray) -> float: + if x.size == 0 or not np.any(np.isfinite(x)): + return np.nan + return float(np.nanmedian(x)) + + @staticmethod + def _metrics_from_waveform( + v: np.ndarray, + Tbeat: float, + ratio: float = 0.5, + eps: float = 1e-12, + ): + v = ArterialSegExample._rectify_keep_nan(v) + + n = int(v.size) + if n <= 0: + return 0.0, 0.0, 0.0, 0.0 + + # tau_M1 and tau_M1/T (PER WAVEFORM ONLY) + if (not np.isfinite(Tbeat)) or Tbeat <= 0: + tau_M1 = np.nan + tau_M1_over_T = np.nan + else: + m0 = np.nansum(v) + if (not np.isfinite(m0)) or m0 <= 0: + tau_M1 = np.nan + tau_M1_over_T = np.nan + else: + dt = Tbeat / n + t = np.arange(n, dtype=float) * dt + m1 = np.nansum(v * t) + tau_M1 = (m1 / m0) if np.isfinite(m1) else np.nan + tau_M1_over_T = tau_M1 / Tbeat + + # RI robust + if not np.any(np.isfinite(v)): + RI = np.nan + PI = np.nan + else: + vmax = np.nanmax(v) + mean = np.nanmean(v) + if (not np.isfinite(vmax)) or vmax <= 0: + RI = np.nan + PI = np.nan + else: + vmin = np.nanmin(v) + RI = 1.0 - (vmin / vmax) + PI = (vmax - vmin) / mean + if not np.isfinite(RI): + RI = np.nan + PI = np.nan + else: + RI = float(np.clip(RI, 0.0, 1.0)) + PI = float(PI) + # RVTI (paper): D1/(D2+eps) + k = int(np.ceil(n * ratio)) + k = max(0, min(n, k)) + D1 = np.nansum(v[:k]) if k > 0 else np.nan + D2 = np.nansum(v[k:]) if k < n else np.nan + if not np.isfinite(D1) or D1 == 0.0: + D1 = np.nan + if not np.isfinite(D2) or D2 == 0.0: + D2 = np.nan + RVTI = float(D1 / (D2 + eps)) + + return float(tau_M1), float(tau_M1_over_T), float(RI), RVTI, float(PI) + + def _compute_block(self, v_block: np.ndarray, T: np.ndarray, ratio: float): + if v_block.ndim != 4: + raise ValueError( + f"Expected (n_t,n_beats,n_branches,n_radii), got {v_block.shape}" + ) + + n_t, n_beats, n_branches, n_radii = v_block.shape + n_segments = n_branches * n_radii + + # Per-segment flattened (beat, segment) + tau_seg = np.zeros((n_beats, n_segments), dtype=float) + tauT_seg = np.zeros((n_beats, n_segments), dtype=float) + RI_seg = np.zeros((n_beats, n_segments), dtype=float) + PI_seg = np.zeros((n_beats, n_segments), dtype=float) + RVTI_seg = np.zeros((n_beats, n_segments), dtype=float) + + # Aggregated + tau_branch = np.zeros((n_beats, n_branches), dtype=float) + tauT_branch = np.zeros((n_beats, n_branches), dtype=float) + RI_branch = np.zeros((n_beats, n_branches), dtype=float) + PI_branch = np.zeros((n_beats, n_branches), dtype=float) + RVTI_branch = np.zeros((n_beats, n_branches), dtype=float) + + tau_global = np.zeros((n_beats,), dtype=float) + tauT_global = np.zeros((n_beats,), dtype=float) + RI_global = np.zeros((n_beats,), dtype=float) + PI_global = np.zeros((n_beats,), dtype=float) + RVTI_global = np.zeros((n_beats,), dtype=float) + + for beat_idx in range(n_beats): + Tbeat = float(T[0][beat_idx]) + + # Global accumulators for this beat + tau_vals = [] + tauT_vals = [] + RI_vals = [] + PI_vals = [] + RVTI_vals = [] + + for branch_idx in range(n_branches): + # Branch accumulators across radii + tau_b = [] + tauT_b = [] + RI_b = [] + PI_b = [] + RVTI_b = [] + + for radius_idx in range(n_radii): + v = v_block[:, beat_idx, branch_idx, radius_idx] + tM1, tM1T, ri, rvti, pi = self._metrics_from_waveform( + v=v, Tbeat=Tbeat, ratio=ratio, eps=1e-12 + ) + + seg_idx = branch_idx * n_radii + radius_idx + tau_seg[beat_idx, seg_idx] = tM1 + tauT_seg[beat_idx, seg_idx] = tM1T + RI_seg[beat_idx, seg_idx] = ri + RVTI_seg[beat_idx, seg_idx] = rvti + PI_seg[beat_idx, seg_idx] = pi + + tau_b.append(tM1) + tauT_b.append(tM1T) + RI_b.append(ri) + RVTI_b.append(rvti) + PI_b.append(pi) + + tau_vals.append(tM1) + tauT_vals.append(tM1T) + RI_vals.append(ri) + RVTI_vals.append(rvti) + PI_vals.append(pi) + + # Branch aggregates: MEDIAN over radii + tau_branch[beat_idx, branch_idx] = self._safe_nanmedian( + np.asarray(tau_b) + ) + tauT_branch[beat_idx, branch_idx] = self._safe_nanmedian( + np.asarray(tauT_b) + ) + RI_branch[beat_idx, branch_idx] = self._safe_nanmedian(np.asarray(RI_b)) + PI_branch[beat_idx, branch_idx] = self._safe_nanmedian(np.asarray(PI_b)) + RVTI_branch[beat_idx, branch_idx] = self._safe_nanmedian( + np.asarray(RVTI_b) + ) + + # Global aggregates: MEAN over all branches & radii + tau_global[beat_idx] = self._safe_nanmean(np.asarray(tau_vals)) + tauT_global[beat_idx] = self._safe_nanmean(np.asarray(tauT_vals)) + RI_global[beat_idx] = self._safe_nanmean(np.asarray(RI_vals)) + RVTI_global[beat_idx] = self._safe_nanmean(np.asarray(RVTI_vals)) + PI_global[beat_idx] = self._safe_nanmean(np.asarray(PI_vals)) + + return ( + tau_seg, + tauT_seg, + RI_seg, + PI_seg, + RVTI_seg, + tau_branch, + tauT_branch, + RI_branch, + PI_branch, + RVTI_branch, + tau_global, + tauT_global, + RI_global, + PI_global, + RVTI_global, + n_branches, + n_radii, + ) + + def run(self, h5file) -> ProcessResult: + T = np.asarray(h5file[self.T_input]) + ratio_systole_diastole_R_VTI = 0.5 + + try: + v_raw_input = ( + "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegment/value" + ) + v_bandlimited_input = "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegmentBandLimited/value" + + v_raw = np.asarray(h5file[v_raw_input]) + v_band = np.asarray(h5file[v_bandlimited_input]) + v_raw = self._rectify_keep_nan(v_raw) + v_band = self._rectify_keep_nan(v_band) + + ( + tau_seg_b, + tauT_seg_b, + RI_seg_b, + PI_seg_b, + RVTI_seg_b, + tau_br_b, + tauT_br_b, + RI_br_b, + PI_br_b, + RVTI_br_b, + tau_gl_b, + tauT_gl_b, + RI_gl_b, + PI_gl_b, + RVTI_gl_b, + n_branches_b, + n_radii_b, + ) = self._compute_block(v_band, T, ratio_systole_diastole_R_VTI) + + ( + tau_seg_r, + tauT_seg_r, + RI_seg_r, + PI_seg_r, + RVTI_seg_r, + tau_br_r, + tauT_br_r, + RI_br_r, + PI_br_r, + RVTI_br_r, + tau_gl_r, + tauT_gl_r, + RI_gl_r, + PI_gl_r, + RVTI_gl_r, + n_branches_r, + n_radii_r, + ) = self._compute_block(v_raw, T, ratio_systole_diastole_R_VTI) + + # Consistency attributes (optional but useful) + seg_order_note = ( + "seg_idx = branch_idx * n_radii + radius_idx (branch-major flattening)" + ) + if n_radii_b != n_radii_r or n_branches_b != n_branches_r: + seg_order_note += ( + " | WARNING: raw/bandlimited branch/radius dims differ." + ) + + metrics = { + # --- Existing datasets (unchanged names/shapes) --- + "by_segment/tau_M1_bandlimited_segment": with_attrs( + tau_seg_b, + { + "unit": ["s"], + "definition": ["tau_M1 = M1/M0 on rectified waveform"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/tau_M1_over_T_bandlimited_segment": with_attrs( + tauT_seg_b, + { + "unit": [""], + "definition": ["tau_M1_over_T = (M1/M0)/T"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/RI_bandlimited_segment": with_attrs( + RI_seg_b, + { + "unit": [""], + "definition": ["RI = 1 - vmin/vmax (robust, rectified)"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/PI_bandlimited_segment": with_attrs( + PI_seg_b, + { + "unit": [""], + "definition": ["RI = 1 - vmin/vmax (robust, rectified)"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/R_VTI_bandlimited_segment": with_attrs( + RVTI_seg_b, + { + "unit": [""], + "definition": ["paper RVTI = D1/(D2+eps)"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/tau_M1_raw_segment": with_attrs( + tau_seg_r, + { + "unit": ["s"], + "definition": ["tau_M1 = M1/M0 on rectified waveform"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/tau_M1_over_T_raw_segment": with_attrs( + tauT_seg_r, + { + "unit": [""], + "definition": ["tau_M1_over_T = (M1/M0)/T"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/RI_raw_segment": with_attrs( + RI_seg_r, + { + "unit": [""], + "definition": ["RI = 1 - vmin/vmax (robust, rectified)"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/PI_raw_segment": with_attrs( + PI_seg_r, + { + "unit": [""], + "definition": ["RI = 1 - vmin/vmax (robust, rectified)"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/R_VTI_raw_segment": with_attrs( + RVTI_seg_r, + { + "unit": [""], + "definition": ["paper RVTI = D1/(D2+eps)"], + "segment_indexing": [seg_order_note], + }, + ), + "by_segment/ratio_systole_diastole_R_VTI": np.asarray( + ratio_systole_diastole_R_VTI, dtype=float + ), + # --- New aggregated outputs --- + "by_segment/tau_M1_bandlimited_branch": with_attrs( + tau_br_b, + { + "unit": ["s"], + "definition": ["median over radii: tau_M1 per branch"], + }, + ), + "by_segment/tau_M1_over_T_bandlimited_branch": with_attrs( + tauT_br_b, + { + "unit": [""], + "definition": ["median over radii: tau_M1/T per branch"], + }, + ), + "by_segment/RI_bandlimited_branch": with_attrs( + RI_br_b, + {"unit": [""], "definition": ["median over radii: RI per branch"]}, + ), + "by_segment/PI_bandlimited_branch": with_attrs( + PI_br_b, + {"unit": [""], "definition": ["median over radii: RI per branch"]}, + ), + "by_segment/R_VTI_bandlimited_branch": with_attrs( + RVTI_br_b, + { + "unit": [""], + "definition": ["median over radii: paper RVTI per branch"], + }, + ), + "by_segment/tau_M1_bandlimited_global": with_attrs( + tau_gl_b, + { + "unit": ["s"], + "definition": ["mean over branches & radii: tau_M1 global"], + }, + ), + "by_segment/tau_M1_over_T_bandlimited_global": with_attrs( + tauT_gl_b, + { + "unit": [""], + "definition": ["mean over branches & radii: tau_M1/T global"], + }, + ), + "by_segment/RI_bandlimited_global": with_attrs( + RI_gl_b, + { + "unit": [""], + "definition": ["mean over branches & radii: RI global"], + }, + ), + "by_segment/PI_bandlimited_global": with_attrs( + PI_gl_b, + { + "unit": [""], + "definition": ["mean over branches & radii: RI global"], + }, + ), + "by_segment/R_VTI_bandlimited_global": with_attrs( + RVTI_gl_b, + { + "unit": [""], + "definition": ["mean over branches & radii: paper RVTI global"], + }, + ), + "by_segment/tau_M1_raw_branch": with_attrs( + tau_br_r, + { + "unit": ["s"], + "definition": ["median over radii: tau_M1 per branch"], + }, + ), + "by_segment/tau_M1_over_T_raw_branch": with_attrs( + tauT_br_r, + { + "unit": [""], + "definition": ["median over radii: tau_M1/T per branch"], + }, + ), + "by_segment/RI_raw_branch": with_attrs( + RI_br_r, + {"unit": [""], "definition": ["median over radii: RI per branch"]}, + ), + "by_segment/PI_raw_branch": with_attrs( + PI_br_r, + {"unit": [""], "definition": ["median over radii: RI per branch"]}, + ), + "by_segment/R_VTI_raw_branch": with_attrs( + RVTI_br_r, + { + "unit": [""], + "definition": ["median over radii: paper RVTI per branch"], + }, + ), + "by_segment/tau_M1_raw_global": with_attrs( + tau_gl_r, + { + "unit": ["s"], + "definition": ["mean over branches & radii: tau_M1 global"], + }, + ), + "by_segment/tau_M1_over_T_raw_global": with_attrs( + tauT_gl_r, + { + "unit": [""], + "definition": ["mean over branches & radii: tau_M1/T global"], + }, + ), + "by_segment/RI_raw_global": with_attrs( + RI_gl_r, + { + "unit": [""], + "definition": ["mean over branches & radii: RI global"], + }, + ), + "by_segment/PI_raw_global": with_attrs( + PI_gl_r, + { + "unit": [""], + "definition": ["mean over branches & radii: RI global"], + }, + ), + "by_segment/R_VTI_raw_global": with_attrs( + RVTI_gl_r, + { + "unit": [""], + "definition": ["mean over branches & radii: paper RVTI global"], + }, + ), + } + + except Exception: # noqa: BLE001 + metrics = {} + v_raw = np.asarray(h5file[self.v_raw_global_input]) + v_raw = np.maximum(v_raw, 0) + v_bandlimited = np.asarray(h5file[self.v_bandlimited_global_input]) + v_bandlimited = np.maximum(v_bandlimited, 0) + v_bandlimited_max = np.asarray(h5file[self.v_bandlimited_global_max_input]) + v_bandlimited_max = np.maximum(v_bandlimited_max, 0) + v_bandlimited_min = np.asarray(h5file[self.v_bandlimited_global_min_input]) + v_bandlimited_min = np.maximum(v_bandlimited_min, 0) + tau_M1_raw = [] + tau_M1_over_T_raw = [] + tau_M1_bandlimited = [] + tau_M1_over_T_bandlimited = [] + + R_VTI_bandlimited = [] + R_VTI_raw = [] + + RI_bandlimited = [] + RI_raw = [] + PI_bandlimited = [] + PI_raw = [] + + ratio_systole_diastole_R_VTI = 0.5 + + for beat_idx in range(len(T[0])): + t = T[0][beat_idx] / len(v_raw.T[beat_idx]) + D1_raw = np.sum( + v_raw.T[beat_idx][ + : int(np.ceil(len(v_raw.T[0]) * ratio_systole_diastole_R_VTI)) + ] + ) + D2_raw = np.sum( + v_raw.T[beat_idx][ + int(np.ceil(len(v_raw.T[0]) * ratio_systole_diastole_R_VTI)) : + ] + ) + D1_bandlimited = np.sum( + v_bandlimited.T[beat_idx][ + : int( + np.ceil(len(v_bandlimited.T[0]) * ratio_systole_diastole_R_VTI) + ) + ] + ) + D2_bandlimited = np.sum( + v_bandlimited.T[beat_idx][ + int( + np.ceil(len(v_bandlimited.T[0]) * ratio_systole_diastole_R_VTI) + ) : + ] + ) + R_VTI_bandlimited.append(D1_bandlimited / (D2_bandlimited + 10 ** (-12))) + R_VTI_raw.append(D1_raw / (D2_raw + 10 ** (-12))) + M_0 = np.sum(v_raw.T[beat_idx]) + M_1 = 0 + for time_idx in range(len(v_raw.T[beat_idx])): + M_1 += v_raw[time_idx][beat_idx] * time_idx * t + TM1 = M_1 / M_0 + tau_M1_raw.append(TM1) + tau_M1_over_T_raw.append(TM1 / T[0][beat_idx]) + + for beat_idx in range(len(T[0])): + t = T[0][beat_idx] / len(v_raw.T[beat_idx]) + M_0 = np.sum(v_bandlimited.T[beat_idx]) + M_1 = 0 + for time_idx in range(len(v_raw.T[beat_idx])): + M_1 += v_bandlimited[time_idx][beat_idx] * time_idx * t + TM1 = M_1 / M_0 + tau_M1_bandlimited.append(TM1) + tau_M1_over_T_bandlimited.append(TM1 / T[0][beat_idx]) + + for beat_idx in range(len(v_bandlimited_max[0])): + RI_bandlimited_temp = 1 - ( + np.min(v_bandlimited.T[beat_idx]) / np.max(v_bandlimited.T[beat_idx]) + ) + RI_bandlimited.append(RI_bandlimited_temp) + PI_bandlimited_temp = ( + np.max(v_bandlimited.T[beat_idx]) - np.min(v_bandlimited.T[beat_idx]) + ) / np.mean(v_bandlimited.T[beat_idx]) + PI_bandlimited.append(PI_bandlimited_temp) + + for beat_idx in range(len(v_bandlimited_max[0])): + RI_raw_temp = 1 - (np.min(v_raw.T[beat_idx]) / np.max(v_raw.T[beat_idx])) + RI_raw.append(RI_raw_temp) + PI_raw_temp = ( + np.max(v_raw.T[beat_idx]) - np.min(v_raw.T[beat_idx]) + ) / np.mean(v_raw.T[beat_idx]) + PI_raw.append(PI_raw_temp) + metrics.update( + { + "global/tau_M1_raw": with_attrs(np.asarray(tau_M1_raw), {"unit": [""]}), + "global/tau_M1_bandlimited": np.asarray(tau_M1_bandlimited), + "global/tau_M1_over_T_raw": with_attrs( + np.asarray(tau_M1_over_T_raw), {"unit": [""]} + ), + "global/tau_M1_over_T_bandlimited": np.asarray( + tau_M1_over_T_bandlimited + ), + "global/RI_bandlimited": np.asarray(RI_bandlimited), + "global/RI_raw": np.asarray(RI_raw), + "global/PI_raw": np.asarray(PI_raw), + "global/PI_bandlimited": np.asarray(PI_bandlimited), + "global/R_VTI_bandlimited": np.asarray(R_VTI_bandlimited), + "global/R_VTI_raw": np.asarray(R_VTI_raw), + "global/ratio_systole_diastole_R_VTI": np.asarray( + ratio_systole_diastole_R_VTI + ), + } + ) + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/pulse_waveform_shape_metrics(harmonic analysis).py b/src/pipelines/pulse_waveform_shape_metrics(harmonic analysis).py new file mode 100644 index 0000000..7eaf3fd --- /dev/null +++ b/src/pipelines/pulse_waveform_shape_metrics(harmonic analysis).py @@ -0,0 +1,164 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs + + +@registerPipeline(name="Harmonic metrics") +class ArterialHarmonicAnalysis(ProcessPipeline): + """ + Tutorial pipeline showing the full surface area of a pipeline: + + - Subclass ProcessPipeline and implement `run(self, h5file) -> ProcessResult`. + - Return metrics (scalars, vectors, matrices, cubes) and optional artifacts. + - Attach HDF5 attributes to any metric via `with_attrs(data, attrs_dict)`. + - Add attributes to the pipeline group (`attrs`) or root file (`file_attrs`). + - No input data is required; this pipeline is purely illustrative. + """ + + description = "Tutorial: metrics + artifacts + dataset attrs + file/pipeline attrs." + v_raw = "/Artery/VelocityPerBeat/VelocitySignalPerBeat/value" + v = "/Artery/VelocityPerBeat/VelocitySignalPerBeatBandLimited/value" + T = "/Artery/VelocityPerBeat/beatPeriodSeconds/value" + + def run(self, h5file) -> ProcessResult: + + v_ds = np.asarray(h5file[self.v]) + t_ds = np.asarray(h5file[self.T]) + + N = len(v_ds[:, 0]) + nb_harmonic = 10 + Xn = [] + + CV_1 = [] + CV_2 = [] + + Vn_tot = [] + sigma_phase_tot = [] + v_hat = [] + CF_tot = [] + D_alpha = [] + + FVTI = [] + IVTI = [] + AVTI_tot = [] + + alpha = 0.5 # that needs to be specified + for k in range(len(v_ds[0])): + T = t_ds[0][k] + fft_vals = np.fft.fft(v_ds[:, k]) / N + limit = nb_harmonic + 1 + Vn = fft_vals[:limit] + V1 = Vn[1] + Xn_k = Vn / V1 + Xn.append(Xn_k) + Vn_tot.append(Vn) + + omega0 = (2 * np.pi) / T + t = np.linspace(0, T, N) + + v_hat_k = np.real(Vn[0]) * np.ones_like(t) + + for i in range(1, limit): + h = 2 * (Vn[i] * np.exp(1j * i * omega0 * t)).real + v_hat_k += h + v_hat.append(v_hat_k) + + RMST = np.sqrt(np.mean(v_hat[k] ** 2)) + CF = np.max(v_hat_k) / RMST + CF_tot.append(CF) + + amp1 = np.abs(np.asarray(Vn_tot)[:, 1]) + amp2 = np.abs(np.asarray(Vn_tot)[:, 2]) + + CV_1.append(np.std(amp1) / np.mean(amp1)) + CV_2.append(np.std(amp2) / np.mean(amp2)) + + phi1 = np.angle(np.asarray(Xn)[:, 1]) + phi2 = np.angle(np.asarray(Xn)[:, 2]) + + diff_phase = phi2 - 2 * phi1 + diff_phase_wrap = (diff_phase + np.pi) % (2 * np.pi) - np.pi + sigma_phase = np.std(diff_phase_wrap) + sigma_phase_tot.append(sigma_phase) + + seuil = Vn[0] + alpha * np.std(v_hat) + condition = v_hat > seuil + D_alpha.append(np.mean(condition)) + + v_base = np.min(v_hat[k]) + max = np.maximum(v_hat[k] - v_base, 0) + + dt = t[1] - t[0] + moitie = len(t) // 2 + d1 = np.sum(max[:moitie]) * dt + d2 = np.sum(max[moitie:]) * dt + AVTI = np.sum(max) * dt + AVTI_tot.append(AVTI) + FVTI.append(d1 / AVTI) + IVTI.append((d1 - d2) / (d1 + d2)) + + metrics = { + "Xn": with_attrs( + np.asarray(Xn), + { + "unit": [""], + }, + ), + "AVTI": with_attrs( + np.asarray(AVTI_tot), + { + "unit": [""], + }, + ), + "CV1 : Beat to beat amplitude stability (n=1)": with_attrs( + np.asarray(CV_1), + { + "unit": [""], + }, + ), + "CV2 : Beat to beat amplitude stability (n=2)": with_attrs( + np.asarray(CV_2), + { + "unit": [""], + }, + ), + "sigma : Beat to beat phase coupling stability (n=2)": with_attrs( + np.asarray(sigma_phase_tot), + { + "unit": [""], + }, + ), + "v_hat : Band limited waveform (definition)": with_attrs( + np.asarray(v_hat), + { + "unit": [""], + }, + ), + "CF : Band limited crest factor ": with_attrs( + np.asarray(CF_tot), + { + "unit": [""], + }, + ), + " D_alpha : Effective Duty cycle ": with_attrs( + np.asarray(D_alpha), + { + "unit": [""], + }, + ), + "FVTI : Normalised first half fraction ": with_attrs( + np.asarray(FVTI), + { + "unit": [""], + }, + ), + "IVTI : VTI asymmetry index ": with_attrs( + np.asarray(IVTI), + { + "unit": [""], + }, + ), + } + # Artifacts can store non-metric outputs (strings, paths, etc.). + + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/pulsewaveformshapemetrics .py b/src/pipelines/pulsewaveformshapemetrics .py new file mode 100644 index 0000000..beab013 --- /dev/null +++ b/src/pipelines/pulsewaveformshapemetrics .py @@ -0,0 +1,400 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs + + +@registerPipeline(name="waveform") +class WaveForm(ProcessPipeline): + """ + Tutorial pipeline showing the full surface area of a pipeline: + + - Subclass ProcessPipeline and implement `run(self, h5file) -> ProcessResult`. + - Return metrics (scalars, vectors, matrices, cubes) and optional artifacts. + - Attach HDF5 attributes to any metric via `with_attrs(data, attrs_dict)`. + - Add attributes to the pipeline group (`attrs`) or root file (`file_attrs`). + - No input data is required; this pipeline is purely illustrative. + """ + + description = "Tutorial: metrics + artifacts + dataset attrs + file/pipeline attrs." + v_raw = "/Artery/VelocityPerBeat/VelocitySignalPerBeat/value" + v = "/Artery/VelocityPerBeat/VelocitySignalPerBeatBandLimited/value" + T_val = "/Artery/VelocityPerBeat/beatPeriodSeconds/value" + vmax = "/Artery/VelocityPerBeat/VmaxPerBeatBandLimited/value" + vmin = "/Artery/VelocityPerBeat/VminPerBeatBandLimited/value" + + def run(self, h5file) -> ProcessResult: + vraw_ds_temp = np.asarray(h5file[self.v_raw]) + vraw_ds = np.maximum(vraw_ds_temp, 0) + t_ds = np.asarray(h5file[self.T_val]) + N = len(vraw_ds[:, 0]) + # période normalisée (1 beat) + # pulsation fondamentale + + V_coeff = [] + Xn = [] + H2 = [] + Delta_Phi_2 = [] + Delta_Phi_3 = [] + R10 = [] + HRI_2_10 = [] + HRI_2_10_noisecorrec = [] + S1_10 = [] + nc = [] + Sigma_n = [] + Hspec = [] + Fspec = [] + tau_phi = [] + tau_phi_n = [] + tau_G = [] + BV = [] + tau_M1 = [] + sigma_M = [] + TAU_H = [] + TAU_P_N = [] + TAU_P = [] + AVTI = [] + VTI_0_T2 = [] + VTI_T2_T = [] + FVTI = [] + IVTI = [] + RVTI = [] + for i in range(len(vraw_ds[0])): + T = t_ds[0, i] + omega0 = 2 * np.pi / T + t = np.linspace(0, T, N, endpoint=False) + Vfft = np.fft.fft(vraw_ds[:, i]) / N + Vn = Vfft[:11] # harmonics + V_coeff.append(Vn) + Xn.append(Vn[1:] / Vn[1]) + H2.append(np.abs(Xn[i][1])) + + absV = np.abs(Vn) + Delta_Phi_2.append(np.angle(Vn[2] * np.conj(Vn[1]) ** 2)) + Delta_Phi_3.append(np.angle(Vn[3] * np.conj(Vn[1]) ** 3)) + R10.append(np.abs(Vn[1]) / np.real(Vn[0])) + HRI_2_10.append(np.sum(np.abs(Xn[i][2:]))) + magnitudes = absV[1:11] + phi2 = np.angle(Xn[i][1:]) + absX2 = np.abs(Xn[i][1:]) + p = magnitudes / np.sum(magnitudes) + n = np.arange(1, 11) + n2 = np.arange(2, len(Xn[i]) + 1) + valid = magnitudes > 0 + + if np.sum(valid) < 3: + slope = np.nan + + else: + log_n = np.log(n[valid]) + log_mag = np.log(magnitudes[valid]) + + slope, _ = np.polyfit(log_n, log_mag, 1) + S1_10.append(slope) + HRI_2_10_noisecorrec.append([]) + nc_i = np.sum(n * p) + nc.append(nc_i) + Sigma_n.append(np.sqrt(np.sum((n - nc_i) ** 2 * p))) + Hspec.append(-np.sum(p * np.log(p + 1e-12))) + Fspec.append(np.exp(np.mean(np.log(p + 1e-12))) / np.mean(p)) + omega_n = n2 * omega0 + + tau_phi_n_i = -phi2 / omega_n + tau_phi_n.append(tau_phi_n_i) + if np.sum(np.isfinite(tau_phi_n_i)) >= 2: + tau_phi.append(np.median(tau_phi_n_i)) + else: + tau_phi.append(np.nan) + phi_unwrap = np.unwrap(phi2) + + A = np.column_stack([-omega_n, np.ones_like(omega_n)]) + params, _, _, _ = np.linalg.lstsq(A, phi_unwrap, rcond=None) + + tau_G.append(params[0]) + bv = np.real(Vn[0]) * np.ones_like(t) + + for k in range(1, 11): + bv += ( + Vn[k] * np.exp(1j * k * omega0 * t) + + np.conj(Vn[k]) * np.exp(-1j * k * omega0 * t) + ).real + BV.append(bv) + vbase = np.min(bv) + w = np.maximum(bv - vbase, 0) + if np.sum(w) > 0: + tau_M1.append(np.sum(w * t) / np.sum(w)) + else: + tau_M1.append(np.nan) + if np.sum(w) > 0: + t2 = np.sum(w * t**2) / np.sum(w) + sigma_M.append(np.sqrt(t2 - tau_M1[-1] ** 2)) + else: + sigma_M.append(np.nan) + + tau_H_n = np.full_like(absX2, np.nan, dtype=float) + + valid2 = absX2 <= 1 + idx2 = np.where(valid2)[0] + tau_H_n[idx2] = (1 / (omega_n[idx2])) * np.sqrt( + (1 / (absX2[valid2] ** 2)) - 1 + ) + TAU_H.append(tau_H_n) + tau_P_n = [] + + for k in range(len(Xn[i]) - 1): + taus = np.linspace(0, 10, 2000) + H = 1 / (1 + 1j * omega_n[2] * taus) + err = np.abs(Xn[i][k] - H) ** 2 + tau_P_n.append(taus[np.argmin(err)]) + + TAU_P_N.append(tau_P_n) + if np.sum(np.isfinite(tau_P_n)) >= 2: + TAU_P.append(np.nanmean(tau_P_n)) + else: + TAU_P.append(np.nan) + dt = t[1] - t[0] + AVTI_i = np.sum(w) * dt + AVTI.append(AVTI_i) + mid = len(t) // 2 + + VTI_early = np.sum(w[:mid]) * dt + VTI_late = np.sum(w[mid:]) * dt + + VTI_0_T2.append(VTI_early) + VTI_T2_T.append(VTI_late) + if AVTI_i > 0: + FVTI.append(VTI_early / AVTI_i) + else: + FVTI.append(np.nan) + den = VTI_early + VTI_late + if den > 0: + IVTI.append((VTI_early - VTI_late) / den) + else: + IVTI.append(np.nan) + eps = 1e-12 + RVTI.append(VTI_early / (VTI_late + eps)) + + # Metrics are the main numerical outputs; each key becomes a dataset under /pipelines//metrics. + + metrics = { + "Xn": with_attrs( + np.asarray(Xn), + { + "unit": [""], + "description": [""], + }, + ), + "H2": with_attrs( + np.asarray(H2), + { + "unit": [""], + "description": [ + "Strength of the second harmonic relative to the fundamental" + ], + }, + ), + "Delta_Phi_2": with_attrs( + np.asarray(Delta_Phi_2), + { + "unit": ["rad"], + "interval": ["-pi , pi"], + "description": ["Global-shift-invariant “shape phase” at n = 2"], + }, + ), + "Delta_Phi_3": with_attrs( + np.asarray(Delta_Phi_3), + { + "unit": [""], + "description": ["Global-shift-invariant “shape phase” at n = 3"], + }, + ), + "R10": with_attrs( + np.asarray(R10), + { + "unit": [""], + "description": ["Pulsatility relative to mean level"], + }, + ), + "HRI_2_10": with_attrs( + np.asarray(HRI_2_10), + { + "unit": [""], + "description": [ + "Relative high-frequency content beyond the fundamental" + ], + }, + ), + "HRI_2_10_noisecorrec": with_attrs( + np.asarray(HRI_2_10_noisecorrec), + { + "unit": [""], + "nb": ["not computed yet"], + "description": [ + "Richness corrected for an estimated noise floor to reduce bias when high harmonics approach noise" + ], + }, + ), + "S1_10": with_attrs( + np.asarray(S1_10), + { + "unit": [""], + "description": [ + "Compact damping descriptor: slope of a linear fit of log " + ], + }, + ), + "nc": with_attrs( + np.asarray(nc), + { + "unit": [""], + "description": ["nergy location across harmonics using pn"], + }, + ), + "Sigma_n": with_attrs( + np.asarray(Sigma_n), + { + "unit": [""], + "description": ["Spread of harmonic content around nc"], + }, + ), + "Hspec": with_attrs( + np.asarray(Hspec), + { + "unit": [""], + "description": [ + "Complexity of harmonic distribution: higher values indicate more evenly distributed energy" + ], + }, + ), + "Fspec": with_attrs( + np.asarray(Fspec), + { + "unit": [""], + "description": [ + "Peakedness vs flatness of the harmonic distribution; low values indicate dominance of few harmonics, high values indicate flatter spectra " + ], + }, + ), + "tau_phi": with_attrs( + np.asarray(tau_phi), + { + "unit": [""], + "description": [ + "Amplitude-insensitive timing descriptor from harmonic phases: robust aggregate of per-harmonic delays " + ], + }, + ), + "tau_phi_n": with_attrs( + np.asarray(tau_phi_n), + { + "unit": [""], + "description": [""], + }, + ), + "tau_G": with_attrs( + np.asarray(tau_G), + { + "unit": [""], + "description": [ + "Robust global delay estimated as the slope of (unwrapped) arg (Xn) vs ωn " + ], + }, + ), + "BV": with_attrs( + np.asarray(BV), + { + "unit": [""], + "description": [""], + }, + ), + "tau_M1": with_attrs( + np.asarray(tau_M1), + { + "unit": [""], + "description": [ + "Point-free timing: centroid (center-of-mass) of the systolic lobe from a band-limited waveform using a positive weight w(t) within a fixed systolic window" + ], + }, + ), + "sigma_M": with_attrs( + np.asarray(sigma_M), + { + "unit": [""], + "description": ["dispersion proxy "], + }, + ), + "TAU_H_N": with_attrs( + np.asarray(TAU_H), + { + "unit": [""], + "description": [ + "Scalar proxy of low-pass damping from harmonic magnitudes only. Larger τH indicates stronger attenuation of higher harmonics" + ], + }, + ), + "TAU_P_N": with_attrs( + np.asarray(TAU_P_N), + { + "unit": [""], + "description": [""], + }, + ), + "TAU_P": with_attrs( + np.asarray(TAU_P), + { + "unit": [""], + "description": [ + "Phase-aware scalar summary of vascular damping and phase lag, estimated from complex harmonics under explicit validity gates" + ], + }, + ), + "AVTI": with_attrs( + np.asarray(AVTI), + { + "unit": [""], + "description": [ + "Primary arterial stroke-distance proxy (non-negative)" + ], + }, + ), + "VTI_0_T2": with_attrs( + np.asarray(VTI_0_T2), + { + "unit": [""], + "description": ["Early-cycle stroke distance"], + }, + ), + "VTI_T2_T": with_attrs( + np.asarray(VTI_T2_T), + { + "unit": [""], + "description": ["Late-cycle stroke distance"], + }, + ), + "FVTI": with_attrs( + np.asarray(FVTI), + { + "unit": [""], + "description": ["Scale-free partition"], + }, + ), + "IVTI": with_attrs( + np.asarray(IVTI), + { + "unit": [""], + "description": [ + "Signed asymmetry in [−1, 1] (renamed to avoid symbol collision)" + ], + }, + ), + "RVTI": with_attrs( + np.asarray(RVTI), + { + "unit": [""], + "description": ["Partition imbalance"], + }, + ), + } + + # Artifacts can store non-metric outputs (strings, paths, etc.). + + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/recreatesig.py b/src/pipelines/recreatesig.py new file mode 100644 index 0000000..db13b30 --- /dev/null +++ b/src/pipelines/recreatesig.py @@ -0,0 +1,188 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline + + +@registerPipeline(name="reconstruct") +class Reconstruct(ProcessPipeline): + """ + Tutorial pipeline showing the full surface area of a pipeline: + + - Subclass ProcessPipeline and implement `run(self, h5file) -> ProcessResult`. + - Return metrics (scalars, vectors, matrices, cubes) and optional artifacts. + - Attach HDF5 attributes to any metric via `with_attrs(data, attrs_dict)`. + - Add attributes to the pipeline group (`attrs`) or root file (`file_attrs`). + - No input data is required; this pipeline is purely illustrative. + """ + + description = "Tutorial: metrics + artifacts + dataset attrs + file/pipeline attrs." + v_profile = "/Artery/CrossSections/VelocityProfileSeg/value" + vsystol = "/Artery/Velocity/SystolicAccelerationPeakIndexes" + T_val = "/Artery/VelocityPerBeat/beatPeriodSeconds/value" + + def gaussian(x, A, mu, sigma, c): + return A * np.exp(-((x - mu) ** 2) / (2 * sigma**2)) + c + + def run(self, h5file) -> ProcessResult: + v_seg = np.asarray(h5file[self.v_profile]) + # t_ds = np.asarray(h5file[self.T_val]) + + V = [] + threshold = 3 + + V_corrected = [] + V_ceil = [] + + for k in range(len(v_seg[0, :, 0, 0])): + Vit_br = [] + for br in range(len(v_seg[0, k, :, 0])): + v_branch = np.nanmean(v_seg[:, k, br, :], axis=1) + Vit_br.append(v_branch) + + V.append(np.mean(Vit_br)) + for k in range(len(v_seg[0, :, 0, 0])): + Vit = [] + for br in range(len(v_seg[0, k, :, 0])): + Vit_br = [] + for seg in range(len(v_seg[0, k, br, :])): + values = list(v_seg[:, k, br, seg]) + + try: + temp = values[ + : np.minimum( + values.index(next(filter(lambda x: x != 0, values))) + + threshold, + 17, + ) + ] + other = values[ + np.minimum( + values.index(next(filter(lambda x: x != 0, values))) + + threshold, + 17, + ) : + ] + test = other[ + np.maximum( + other.index(next(filter(lambda x: x == 0, other))) + - threshold, + 0, + ) : + ] + Vit_br.append(np.nanmean(temp + test)) + except Exception: # noqa: BLE001 + Vit_br.append(np.nan) + return None + + Vit.append(np.nanmean(Vit_br)) + + V_corrected.append(np.nanmean(Vit)) + + """vraw_ds = np.asarray(v_threshold_beat_segment) + vraw_ds_temp = vraw_ds.transpose(1, 0, 2, 3) + vraw_ds = np.maximum(vraw_ds_temp, 0) + v_ds = vraw_ds + t_ds = np.asarray(h5file[self.T_input]) + + TMI_seg = [] + TMI_seg_band = [] + RTVI_seg = [] + RTVI_seg_band = [] + RI_seg = [] + RI_seg_band = [] + M0_seg = 0 + M1_seg = 0 + M0_seg_band = 0 + M1_seg_band = 0 + for k in range(len(vraw_ds[0, :, 0, 0])): + TMI_branch = [] + TMI_branch_band = [] + RTVI_band_branch = [] + RTVI_branch = [] + RI_branch = [] + RI_branch_band = [] + for i in range(len(vraw_ds[0, k, :, 0])): + avg_speed_band = np.nanmean(v_ds[:, k, i, :], axis=1) + avg_speed = np.nanmean(vraw_ds[:, k, i, :], axis=1) + vmin = np.min(avg_speed) + vmax = np.max(avg_speed) + vmin_band = np.min(avg_speed_band) + vmax_band = np.max(avg_speed_band) + + RI_branch.append(1 - (vmin / (vmax + 10 ** (-14)))) + RI_branch_band.append(1 - (vmin_band / (vmax_band + 10 ** (-14)))) + D1_raw = np.sum(avg_speed[:31]) + D2_raw = np.sum(avg_speed[32:]) + D1 = np.sum(avg_speed_band[:31]) + D2 = np.sum(avg_speed_band[32:]) + RTVI_band_branch.append(D1 / (D2 + 10 ** (-12))) + RTVI_branch.append(D1_raw / (D2_raw + 10 ** (-12))) + M0_seg += np.sum(avg_speed) + M0_seg_band += np.sum(avg_speed_band) + for j in range(len(avg_speed)): + M1_seg += avg_speed[j] * j * t_ds[0][k] / 64 + M1_seg_band += avg_speed_band[j] * j * t_ds[0][k] / 64 + if M0_seg != 0: + TMI_branch.append(M1_seg / (t_ds[0][k] * M0_seg)) + else: + TMI_branch.append(0) + if M0_seg_band != 0: + TMI_branch_band.append(M1_seg_band / (t_ds[0][k] * M0_seg_band)) + else: + TMI_branch_band.append(0) + + TMI_seg.append(TMI_branch) + TMI_seg_band.append(TMI_branch_band) + RI_seg.append(RI_branch) + RI_seg_band.append(RI_branch_band) + RTVI_seg.append(RTVI_branch) + RTVI_seg_band.append(RTVI_band_branch)""" + for k in range(len(v_seg[0, :, 0, 0])): + Vit = [] + for br in range(len(v_seg[0, k, :, 0])): + Vit_br = [] + for seg in range(len(v_seg[0, k, br, :])): + values = list(v_seg[:, k, br, seg]) + + try: + first = values.index(next(filter(lambda x: x != 0, values))) + other = values[ + np.minimum( + values.index(next(filter(lambda x: x != 0, values))) + + threshold, + 17, + ) : + ] + last = first + other.index( + next(filter(lambda x: x == 0, other)) + ) + + Comp = [ + values[first + threshold] + for v in values[first + threshold : last - threshold] + ] + Vit_br.append( + np.nanmean( + values[: first + threshold] + + Comp + + values[last - threshold :] + ) + ) + except Exception: + Vit_br.append(np.nan) + return None + + Vit.append(np.nanmean(Vit_br)) + + V_ceil.append(np.nanmean(Vit)) + # Metrics are the main numerical outputs; each key becomes a dataset under /pipelines//metrics. + + metrics = { + "Xn": np.asarray(V), + "Xn_correc": np.asarray(V_corrected), + "Xn_ceil": np.asarray(V_ceil), + } + + # Artifacts can store non-metric outputs (strings, paths, etc.). + + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/segment_velocity_waveform_shape_metrics.py b/src/pipelines/segment_velocity_waveform_shape_metrics.py new file mode 100644 index 0000000..71878fb --- /dev/null +++ b/src/pipelines/segment_velocity_waveform_shape_metrics.py @@ -0,0 +1,238 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs + + +@registerPipeline(name="segment_waveform_shape_metrics") +class ArterialSegExample(ProcessPipeline): + """ + Tutorial pipeline showing the full surface area of a pipeline: + + - Subclass ProcessPipeline and implement `run(self, h5file) -> ProcessResult`. + - Return metrics (scalars, vectors, matrices, cubes) and optional artifacts. + - Attach HDF5 attributes to any metric via `with_attrs(data, attrs_dict)`. + - Add attributes to the pipeline group (`attrs`) or root file (`file_attrs`). + - No input data is required; this pipeline is purely illustrative. + """ + + description = "Tutorial: metrics + artifacts + dataset attrs + file/pipeline attrs." + v_raw_input = ( + "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegment/value" + ) + v_bandlimited_input = "/Artery/VelocityPerBeat/Segments/VelocitySignalPerBeatPerSegmentBandLimited/value" + T_input = "/Artery/VelocityPerBeat/beatPeriodSeconds/value" + + def run(self, h5file) -> ProcessResult: + + v_raw = np.asarray(h5file[self.v_raw_input]) + v_raw = np.maximum(v_raw, 0) + + v_bandlimited = np.asarray(h5file[self.v_bandlimited_input]) + v_bandlimited = np.maximum(v_bandlimited, 0) + + T = np.asarray(h5file[self.T_input]) + + moment_0_segment = 0 + moment_1_segment = 0 + + Tau_M1_bandlimited_segment = [] + Tau_M1_over_T_bandlimited_segment = [] + RI_bandlimited_segment = [] + R_VTI_bandlimited_segment = [] + + Tau_M1_raw_segment = [] + Tau_M1_over_T_raw_segment = [] + RI_raw_segment = [] + R_VTI_raw_segment = [] + + ratio_systole_diastole_R_VTI = 0.5 + + for beat_idx in range(len(v_bandlimited[0, :, 0, 0])): + Tau_M1_bandlimited_branch = [] + Tau_M1_over_T_bandlimited_branch = [] + RI_bandlimited_branch = [] + R_VTI_bandlimited_branch = [] + + t = T[0][beat_idx] / len(v_bandlimited) + + for branch_idx in range(len(v_bandlimited[0, beat_idx, :, 0])): + Tau_M1_bandlimited_radius = [] + Tau_M1_over_T_bandlimited_radius = [] + RI_bandlimited_radius = [] + R_VTI_bandlimited_radius = [] + + for radius_idx in range(len(v_bandlimited[0, beat_idx, branch_idx, :])): + v_bandlimited_idx = v_bandlimited[ + :, beat_idx, branch_idx, radius_idx + ] + + moment_0_segment = np.sum(v_bandlimited_idx) + moment_1_segment = 0 + + for time_idx in range(len(v_bandlimited_idx)): + moment_1_segment += v_bandlimited_idx[time_idx] * time_idx * t + + TM1 = moment_1_segment / moment_0_segment + Tau_M1_bandlimited_radius.append(TM1) + Tau_M1_over_T_bandlimited_radius.append(TM1 / T[0][beat_idx]) + + v_bandlimited_max = np.max(v_bandlimited_idx) + v_bandlimited_min = np.min(v_bandlimited_idx) + + RI_bandlimited_radius_idx = 1 - ( + v_bandlimited_min / v_bandlimited_max + ) + RI_bandlimited_radius.append(RI_bandlimited_radius_idx) + + epsilon = 10 ** (-12) + D1_bandlimited = np.sum( + v_bandlimited_idx[ + : int( + np.ceil( + len(v_bandlimited_idx) + * ratio_systole_diastole_R_VTI + ) + ) + ] + ) + D2_bandlimited = np.sum( + v_bandlimited_idx[ + int( + np.ceil( + len(v_bandlimited_idx) + * ratio_systole_diastole_R_VTI + ) + ) : + ] + ) + R_VTI_bandlimited_radius.append( + D1_bandlimited / (D2_bandlimited + epsilon) + ) + + Tau_M1_bandlimited_branch.append(Tau_M1_bandlimited_radius) + Tau_M1_over_T_bandlimited_branch.append( + Tau_M1_over_T_bandlimited_radius + ) + RI_bandlimited_branch.append(RI_bandlimited_radius) + R_VTI_bandlimited_branch.append(R_VTI_bandlimited_radius) + + Tau_M1_bandlimited_segment.append(Tau_M1_bandlimited_branch) + Tau_M1_over_T_bandlimited_segment.append(Tau_M1_over_T_bandlimited_branch) + RI_bandlimited_segment.append(RI_bandlimited_branch) + R_VTI_bandlimited_segment.append(R_VTI_bandlimited_branch) + + for beat_idx in range(len(v_raw[0, :, 0, 0])): + Tau_M1_raw_branch = [] + Tau_M1_over_T_raw_branch = [] + RI_raw_branch = [] + R_VTI_raw_branch = [] + + t = T[0][beat_idx] / len(v_raw) + + for branch_idx in range(len(v_raw[0, beat_idx, :, 0])): + Tau_M1_raw_radius = [] + Tau_M1_over_T_raw_radius = [] + RI_raw_radius = [] + R_VTI_raw_radius = [] + + for radius_idx in range(len(v_raw[0, beat_idx, branch_idx, :])): + v_raw_idx = v_raw[:, beat_idx, branch_idx, radius_idx] + + moment_0_segment = np.sum(v_raw_idx) + moment_1_segment = 0 + for time_idx in range(len(v_raw_idx)): + moment_1_segment += v_raw_idx[time_idx] * time_idx * t + + TM1 = moment_1_segment / moment_0_segment + Tau_M1_raw_radius.append(TM1) + Tau_M1_over_T_raw_radius.append(TM1 / T[0][beat_idx]) + + v_raw_max = np.max(v_raw_idx) + v_raw_min = np.min(v_raw_idx) + + RI_raw_radius_idx = 1 - (v_raw_min / v_raw_max) + RI_raw_radius.append(RI_raw_radius_idx) + + epsilon = 10 ** (-12) + D1_raw = np.sum( + v_raw_idx[ + : int( + np.ceil(len(v_raw_idx) * ratio_systole_diastole_R_VTI) + ) + ] + ) + D2_raw = np.sum( + v_raw_idx[ + int( + np.ceil(len(v_raw_idx) * ratio_systole_diastole_R_VTI) + ) : + ] + ) + R_VTI_raw_radius.append(D1_raw / (D2_raw + epsilon)) + + Tau_M1_raw_branch.append(Tau_M1_raw_radius) + Tau_M1_over_T_raw_branch.append(Tau_M1_over_T_raw_radius) + RI_raw_branch.append(RI_raw_radius) + R_VTI_raw_branch.append(R_VTI_raw_radius) + + Tau_M1_raw_segment.append(Tau_M1_raw_branch) + Tau_M1_over_T_raw_segment.append(Tau_M1_over_T_raw_branch) + RI_raw_segment.append(RI_raw_branch) + R_VTI_raw_segment.append(R_VTI_raw_branch) + + # Metrics are the main numerical outputs; each key becomes a dataset under /pipelines//metrics. + metrics = { + "tau_M1_bandlimited_segment": with_attrs( + np.asarray(Tau_M1_bandlimited_segment), + { + "unit": [""], + }, + ), + "R_VTI_bandlimited_segment": with_attrs( + np.asarray(R_VTI_bandlimited_segment), + { + "unit": [""], + }, + ), + "RI_bandlimited_segment": with_attrs( + np.asarray(RI_bandlimited_segment), + { + "unit": [""], + }, + ), + "tau_M1_over_T_bandlimited_segment": with_attrs( + np.asarray(Tau_M1_over_T_bandlimited_segment), + { + "unit": [""], + }, + ), + "tau_M1_raw_segment": with_attrs( + np.asarray(Tau_M1_raw_segment), + { + "unit": [""], + }, + ), + "R_VTI_raw_segment": with_attrs( + np.asarray(R_VTI_raw_segment), + { + "unit": [""], + }, + ), + "RI_raw_segment": with_attrs( + np.asarray(RI_raw_segment), + { + "unit": [""], + }, + ), + "tau_M1_over_T_raw_segment": with_attrs( + np.asarray(Tau_M1_over_T_raw_segment), + { + "unit": [""], + }, + ), + "ratio_systole_diastole_R_VTI": np.asarray(ratio_systole_diastole_R_VTI), + } + + # Artifacts can store non-metric outputs (strings, paths, etc.). + + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/signal_reconstruction_factor_reduced.py b/src/pipelines/signal_reconstruction_factor_reduced.py new file mode 100644 index 0000000..0a19b84 --- /dev/null +++ b/src/pipelines/signal_reconstruction_factor_reduced.py @@ -0,0 +1,563 @@ +import numpy as np + +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs + + +@registerPipeline(name="signal_reconstruction") +class Reconstruct(ProcessPipeline): + """ + Tutorial pipeline showing the full surface area of a pipeline: + + - Subclass ProcessPipeline and implement `run(self, h5file) -> ProcessResult`. + - Return metrics (scalars, vectors, matrices, cubes) and optional artifacts. + - Attach HDF5 attributes to any metric via `with_attrs(data, attrs_dict)`. + - Add attributes to the pipeline group (`attrs`) or root file (`file_attrs`). + - No input data is required; this pipeline is purely illustrative. + """ + + description = "Tutorial: metrics + artifacts + dataset attrs + file/pipeline attrs." + v_profile = "/Artery/CrossSections/VelocityProfileSeg/value" + v_profile_interp_onebeat = ( + "/Artery/CrossSections/VelocityProfilesSegInterpOneBeat/value" + ) + vsystol = "/Artery/Velocity/SystolicAccelerationPeakIndexes" + T_input = "/Artery/VelocityPerBeat/beatPeriodSeconds/value" + systole_idx_input = "/Artery/WaveformAnalysis/SystoleIndices/value" + + def gaussian(x, A, mu, sigma, c): + return A * np.exp(-((x - mu) ** 2) / (2 * sigma**2)) + c + + def run(self, h5file) -> ProcessResult: + v_seg = np.maximum(np.asarray(h5file[self.v_profile]), 0) + v_interp_onebeat = np.maximum( + np.asarray(h5file[self.v_profile_interp_onebeat]), 0 + ) + systole_idx = np.asarray(h5file[self.systole_idx_input]) + T = np.asarray(h5file[self.T_input]) + + V = [] + v_profile_beat_threshold = [] + v_profile_beat_ceiled_threshold = [] + v_profile_beat_cropped_threshold = [] + + for beat in range(len(T[0])): + vit_beat = [] + for time_idx in range( + int(systole_idx[0][beat]), int(systole_idx[0][beat + 1]) + ): + Vit_br = [] + for br in range(len(v_seg[0, time_idx, :, 0])): + vit_seg = [] + for segment in range(len(v_seg[0, time_idx, br, :])): + vit_seg.append(v_seg[:, time_idx, br, segment]) + Vit_br.append(vit_seg) + + vit_beat.append(Vit_br) + V.append(vit_beat) + threshold = 6 + + # Metrics are the main numerical outputs; each key becomes a dataset under /pipelines//metrics. + """for threshold_idx in range(threshold + 1): + v_profile_beat = [] + v_profile_beat_ceiled = [] + v_profile_beat_cropped = [] + for beat in range(len(T[0])): + vit_beat = [] + vit_beat_ceiled = [] + vit_beat_cropped = [] + v_raw_temp = np.asarray(V[beat]) + for time_idx in range(len(v_raw_temp[:, 0, 0, 0])): + vit_br = [] + vit_br_ceiled = [] + vit_br_cropped = [] + for br in range(len(v_raw_temp[time_idx, :, 0, 0])): + vit_seg = [] + vit_seg_ceiled = [] + vit_seg_cropped = [] + for segment in range(len(v_raw_temp[time_idx, br, :, 0])): + values_temp = v_raw_temp[time_idx, br, segment, :] + values = list(values_temp) + try: + first = values.index( + next(filter(lambda x: str(x) != "nan", values)) + ) + + other = values[ + np.minimum( + values.index( + next( + filter( + lambda x: str(x) != "nan", values + ) + ) + ), + 17, + ) : + ] + last = first + other.index( + next(filter(lambda x: str(x) == "nan", other)) + ) + + ceil_completion = [ + values[first + threshold_idx - 1] + for v in values[ + first + threshold_idx : last - threshold_idx + ] + ] + v_seg_band_ceiled = ( + values[first : first + threshold_idx] + + ceil_completion + + values[last - threshold_idx : last] + ) + vit_seg_cropped.append( + np.nanmean( + values[first : first + threshold_idx] + + values[last - threshold_idx : last] + ) + ) + vit_seg_ceiled.append(np.nanmean(v_seg_band_ceiled)) + vit_seg.append(np.nanmean(values_temp)) + except Exception: # noqa: BLE001 + vit_seg_cropped.append(np.nan) + vit_seg_ceiled.append(np.nan) + vit_seg.append(np.nanmean(values_temp)) + continue + + vit_br.append(vit_seg) + vit_br_ceiled.append(vit_seg_ceiled) + vit_br_cropped.append(vit_seg_cropped) + + vit_beat.append(vit_br) + vit_beat_ceiled.append(vit_br_ceiled) + vit_beat_cropped.append(vit_br_cropped) + + v_profile_beat.append(vit_beat) + v_profile_beat_ceiled.append(vit_beat_ceiled) + v_profile_beat_cropped.append(vit_beat_cropped) + v_profile_beat_threshold.append(v_profile_beat) + v_profile_beat_ceiled_threshold.append(v_profile_beat_ceiled) + v_profile_beat_cropped_threshold.append(v_profile_beat_cropped) + target_len = 128 + n_beats = len(v_profile_beat) + n_branches = len(v_profile_beat[0][0]) + n_segments = len(v_profile_beat[0][0][0]) + v_threshold_beat_segment = np.zeros( + (threshold + 1, n_beats, target_len, n_branches, n_segments) + ) + v_threshold_beat_segment_cropped = np.zeros( + (threshold + 1, n_beats, target_len, n_branches, n_segments) + ) + v_threshold_beat_segment_ceiled = np.zeros( + (threshold + 1, n_beats, target_len, n_branches, n_segments) + ) + for threshold_idx in range(threshold + 1): + for beat in range(n_beats): + beat_data = np.asarray(v_profile_beat_threshold[threshold_idx][beat]) + beat_data_ceiled = np.asarray( + v_profile_beat_ceiled_threshold[threshold_idx][beat] + ) + beat_data_cropped = np.asarray( + v_profile_beat_cropped_threshold[threshold_idx][beat] + ) + # shape: (time_len, branches, segments) + + time_len = beat_data.shape[0] + + old_indices = np.linspace(0, 1, time_len) + new_indices = np.linspace(0, 1, target_len) + + for br in range(n_branches): + for seg in range(n_segments): + signal = beat_data[:, br, seg] + signal_ceiled = beat_data_ceiled[:, br, seg] + signal_cropped = beat_data_cropped[:, br, seg] + + new_values = np.interp(new_indices, old_indices, signal) + new_values_ceiled = np.interp( + new_indices, old_indices, signal_ceiled + ) + new_values_cropped = np.interp( + new_indices, old_indices, signal_cropped + ) + + v_threshold_beat_segment[threshold_idx, beat, :, br, seg] = ( + new_values + ) + v_threshold_beat_segment_cropped[ + threshold_idx, beat, :, br, seg + ] = new_values_ceiled + v_threshold_beat_segment_ceiled[ + threshold_idx, beat, :, br, seg + ] = new_values_cropped""" + v_raw_temp = np.asarray(v_interp_onebeat) + for threshold_idx in range(threshold + 1): + v_profile = [] + v_profile_ceiled = [] + v_profile_cropped = [] + + for time_idx in range(len(v_raw_temp[:, 0, 0, 0])): + vit_br = [] + vit_br_ceiled = [] + vit_br_cropped = [] + for br in range(len(v_raw_temp[time_idx, 0, :, 0])): + vit_seg = [] + vit_seg_ceiled = [] + vit_seg_cropped = [] + for segment in range(len(v_raw_temp[time_idx, 0, 0, :])): + values_temp = v_raw_temp[time_idx, :, br, segment] + values = list(values_temp) + try: + first = values.index( + next(filter(lambda x: str(x) != "nan", values)) + ) + + other = values[ + np.minimum( + values.index( + next(filter(lambda x: str(x) != "nan", values)) + ), + 17, + ) : + ] + last = first + other.index( + next(filter(lambda x: str(x) == "nan", other)) + ) + len_signal = last - first + ceil_completion = [ + values[first + threshold_idx - 1] + for v in values[ + int( + np.minimum( + first + threshold_idx, + first + np.floor(len_signal / 3), + ) + ) : int( + np.maximum( + last - threshold_idx, + last - np.ceil(len_signal * 2 / 3), + ) + ) + ] + ] + v_seg_band_ceiled = ( + values[ + first : int( + np.minimum( + first + threshold_idx, + first + np.floor(len_signal / 3), + ) + ) + ] + + ceil_completion + + values[ + int( + np.maximum( + last - threshold_idx, + last - np.ceil(len_signal * 2 / 3), + ) + ) : last + ] + ) + vit_seg_cropped.append( + np.nanmean( + values[ + : int( + np.minimum( + first + threshold_idx, + first + np.floor(len_signal / 3), + ) + ) + ] + + values[ + int( + np.maximum( + last - threshold_idx, + last - np.ceil(len_signal * 2 / 3), + ) + ) : + ] + ) + ) + vit_seg_ceiled.append(np.nanmean(v_seg_band_ceiled)) + vit_seg.append(np.nanmean(values_temp)) + except Exception: # noqa: BLE001 + vit_seg_cropped.append(np.nan) + vit_seg_ceiled.append(np.nan) + vit_seg.append(np.nanmean(values_temp)) + continue + + vit_br.append(vit_seg) + vit_br_ceiled.append(vit_seg_ceiled) + vit_br_cropped.append(vit_seg_cropped) + + v_profile.append(vit_br) + v_profile_ceiled.append(vit_br_ceiled) + v_profile_cropped.append(vit_br_cropped) + v_profile_beat_threshold.append(v_profile) + v_profile_beat_ceiled_threshold.append(v_profile_ceiled) + v_profile_beat_cropped_threshold.append(v_profile_cropped) + v_raw = np.asarray(v_profile_beat_threshold) + v_raw = np.maximum(v_raw, 0) + v_raw_ceiled = np.asarray(v_profile_beat_ceiled_threshold) + v_raw_ceiled = np.maximum(v_raw_ceiled, 0) + v_raw_cropped = np.asarray(v_profile_beat_cropped_threshold) + v_raw_cropped = np.maximum(v_raw_cropped, 0) + + moment_1_segment = 0 + + moment_1_segment_cropped = 0 + + moment_1_segment_ceiled = 0 + + Tau_M1_raw_segment = [] + Tau_M1_over_T_raw_segment = [] + RI_raw_segment = [] + R_VTI_raw_segment = [] + + Tau_M1_raw_segment_cropped = [] + Tau_M1_over_T_raw_segment_cropped = [] + RI_raw_segment_cropped = [] + R_VTI_raw_segment_cropped = [] + + Tau_M1_raw_segment_ceiled = [] + Tau_M1_over_T_raw_segment_ceiled = [] + RI_raw_segment_ceiled = [] + R_VTI_raw_segment_ceiled = [] + + ratio_systole_diastole_R_VTI = 0.5 + + for threshold_idx in range(len(v_raw[:, 0, 0, 0])): + Tau_M1_raw_global = [] + Tau_M1_over_T_raw_global = [] + RI_raw_global = [] + R_VTI_raw_global = [] + + Tau_M1_raw_global_ceiled = [] + Tau_M1_over_T_raw_global_ceiled = [] + RI_raw_global_ceiled = [] + R_VTI_raw_global_ceiled = [] + + Tau_M1_raw_global_cropped = [] + Tau_M1_over_T_raw_global_cropped = [] + RI_raw_global_cropped = [] + R_VTI_raw_global_cropped = [] + + for branch_idx in range(len(v_raw[threshold_idx, 0, :, 0])): + Tau_M1_raw_branch = [] + Tau_M1_over_T_raw_branch = [] + RI_raw_branch = [] + R_VTI_raw_branch = [] + + Tau_M1_raw_branch_ceiled = [] + Tau_M1_over_T_raw_branch_ceiled = [] + RI_raw_branch_ceiled = [] + R_VTI_raw_branch_ceiled = [] + + Tau_M1_raw_branch_cropped = [] + Tau_M1_over_T_raw_branch_cropped = [] + RI_raw_branch_cropped = [] + R_VTI_raw_branch_cropped = [] + for _radius_idx in range(len(v_raw[threshold_idx, 0, 0, :])): + v_raw_average = np.nanmean( + v_raw[threshold_idx, :, branch_idx, :], axis=1 + ) + v_raw_ceiled_average = np.nanmean( + v_raw_ceiled[threshold_idx, :, branch_idx, :], axis=1 + ) + v_raw_cropped_average = np.nanmean( + v_raw_cropped[threshold_idx, :, branch_idx, :], axis=1 + ) + t = T[0][0] / len(v_raw_average) + + moment_0_segment = np.nansum(v_raw_average) + moment_0_segment_cropped = np.nansum(v_raw_cropped_average) + moment_0_segment_ceiled = np.nansum(v_raw_ceiled_average) + moment_1_segment = 0 + + moment_1_segment_cropped = 0 + + moment_1_segment_ceiled = 0 + for time_idx in range(len(v_raw_average)): + moment_1_segment += v_raw_average[time_idx] * time_idx * t + + moment_1_segment_cropped += ( + v_raw_cropped_average[time_idx] * time_idx * t + ) + + moment_1_segment_ceiled += ( + v_raw_ceiled_average[time_idx] * time_idx * t + ) + + if moment_0_segment != 0: + TM1 = moment_1_segment / moment_0_segment + Tau_M1_raw_branch.append(TM1) + Tau_M1_over_T_raw_branch.append(TM1 / T[0][0]) + else: + Tau_M1_raw_branch.append(0) + Tau_M1_over_T_raw_branch.append(0) + if moment_0_segment_cropped != 0: + TM1_cropped = ( + moment_1_segment_cropped / moment_0_segment_cropped + ) + Tau_M1_raw_branch_cropped.append(TM1_cropped) + Tau_M1_over_T_raw_branch_cropped.append(TM1_cropped / T[0][0]) + else: + Tau_M1_raw_branch_cropped.append(0) + Tau_M1_over_T_raw_branch_cropped.append(0) + if moment_0_segment_ceiled != 0: + TM1_ceiled = moment_1_segment_ceiled / moment_0_segment_ceiled + Tau_M1_raw_branch_ceiled.append(TM1_ceiled) + Tau_M1_over_T_raw_branch_ceiled.append( + TM1_ceiled / np.nanmean(T[0][:]) + ) + else: + Tau_M1_raw_branch_ceiled.append(0) + Tau_M1_over_T_raw_branch_ceiled.append(0) + + v_raw_max = np.max(v_raw_average) + v_raw_min = np.min(v_raw_average) + v_raw_max_cropped = np.max(v_raw_cropped_average) + v_raw_min_cropped = np.min(v_raw_cropped_average) + v_raw_max_ceiled = np.max(v_raw_ceiled_average) + v_raw_min_ceiled = np.min(v_raw_ceiled_average) + + RI_raw_branch_idx = 1 - (v_raw_min / v_raw_max) + RI_raw_branch.append(RI_raw_branch_idx) + RI_raw_branch_idx_cropped = 1 - ( + v_raw_min_cropped / v_raw_max_cropped + ) + RI_raw_branch_cropped.append(RI_raw_branch_idx_cropped) + RI_raw_branch_idx_ceiled = 1 - (v_raw_min_ceiled / v_raw_max_ceiled) + RI_raw_branch_ceiled.append(RI_raw_branch_idx_ceiled) + + epsilon = 10 ** (-12) + D1_raw = np.sum( + v_raw_average[ + : int( + np.ceil( + len(v_raw_average) * ratio_systole_diastole_R_VTI + ) + ) + ] + ) + D1_raw_cropped = np.sum( + v_raw_cropped_average[ + : int( + np.ceil( + len(v_raw_cropped_average) + * ratio_systole_diastole_R_VTI + ) + ) + ] + ) + D1_raw_ceiled = np.sum( + v_raw_ceiled_average[ + : int( + np.ceil( + len(v_raw_ceiled_average) + * ratio_systole_diastole_R_VTI + ) + ) + ] + ) + D2_raw = np.sum( + v_raw_average[ + int( + np.ceil( + len(v_raw_average) * ratio_systole_diastole_R_VTI + ) + ) : + ] + ) + D2_raw_cropped = np.sum( + v_raw_cropped_average[ + int( + np.ceil( + len(v_raw_cropped_average) + * ratio_systole_diastole_R_VTI + ) + ) : + ] + ) + D2_raw_ceiled = np.sum( + v_raw_ceiled_average[ + int( + np.ceil( + len(v_raw_ceiled_average) + * ratio_systole_diastole_R_VTI + ) + ) : + ] + ) + R_VTI_raw_branch.append(D1_raw / (D2_raw + epsilon)) + R_VTI_raw_branch_cropped.append( + D1_raw_cropped / (D2_raw_cropped + epsilon) + ) + R_VTI_raw_branch_ceiled.append( + D1_raw_ceiled / (D2_raw_ceiled + epsilon) + ) + + Tau_M1_raw_global.append(Tau_M1_raw_branch) + Tau_M1_over_T_raw_global.append(Tau_M1_over_T_raw_branch) + RI_raw_global.append(RI_raw_branch) + R_VTI_raw_global.append(R_VTI_raw_branch) + + Tau_M1_raw_global_cropped.append(Tau_M1_raw_branch_cropped) + Tau_M1_over_T_raw_global_cropped.append( + Tau_M1_over_T_raw_branch_cropped + ) + RI_raw_global_cropped.append(RI_raw_branch_cropped) + R_VTI_raw_global_cropped.append(R_VTI_raw_branch_cropped) + + Tau_M1_raw_global_ceiled.append(Tau_M1_raw_branch_ceiled) + Tau_M1_over_T_raw_global_ceiled.append(Tau_M1_over_T_raw_branch_ceiled) + RI_raw_global_ceiled.append(RI_raw_branch_ceiled) + R_VTI_raw_global_ceiled.append(R_VTI_raw_branch_ceiled) + Tau_M1_raw_segment.append(Tau_M1_raw_global) + Tau_M1_over_T_raw_segment.append(Tau_M1_over_T_raw_global) + RI_raw_segment.append(RI_raw_global) + R_VTI_raw_segment.append(R_VTI_raw_global) + + Tau_M1_raw_segment_cropped.append(Tau_M1_raw_global_cropped) + Tau_M1_over_T_raw_segment_cropped.append(Tau_M1_over_T_raw_global_cropped) + RI_raw_segment_cropped.append(RI_raw_global_cropped) + R_VTI_raw_segment_cropped.append(R_VTI_raw_global_cropped) + + Tau_M1_raw_segment_ceiled.append(Tau_M1_raw_global_ceiled) + Tau_M1_over_T_raw_segment_ceiled.append(Tau_M1_over_T_raw_global_ceiled) + RI_raw_segment_ceiled.append(RI_raw_global_ceiled) + R_VTI_raw_segment_ceiled.append(R_VTI_raw_global_ceiled) + + metrics = { + "signals/v_profile": np.asarray(v_profile_beat_threshold), + "signals/v_profile_cropped": np.asarray(v_profile_beat_ceiled_threshold), + "signals/v_profile_ceiled": np.asarray(v_profile_beat_cropped_threshold), + "tau_M1/tau_M1_raw": with_attrs( + np.asarray(Tau_M1_raw_segment), {"unit": [""]} + ), + "tau_M1_over_T/tau_M1_over_T_raw": with_attrs( + np.asarray(Tau_M1_over_T_raw_segment), {"unit": [""]} + ), + "RI/RI_raw": np.asarray(RI_raw_segment), + "R_VTI/R_VTI_raw": np.asarray(R_VTI_raw_segment), + "tau_M1/tau_M1_raw_cropped": with_attrs( + np.asarray(Tau_M1_raw_segment_cropped), {"unit": [""]} + ), + "tau_M1_over_T/tau_M1_over_T_raw_cropped": with_attrs( + np.asarray(Tau_M1_over_T_raw_segment_cropped), {"unit": [""]} + ), + "RI/RI_raw_cropped": np.asarray(RI_raw_segment_cropped), + "R_VTI/R_VTI_raw_cropped": np.asarray(R_VTI_raw_segment_cropped), + "tau_M1/tau_M1_raw_ceiled": with_attrs( + np.asarray(Tau_M1_raw_segment_ceiled), {"unit": [""]} + ), + "tau_M1_over_T/tau_M1_over_T_raw_ceiled": with_attrs( + np.asarray(Tau_M1_over_T_raw_segment_ceiled), {"unit": [""]} + ), + "RI/RI_raw_ceiled": np.asarray(RI_raw_segment_ceiled), + "R_VTI/R_VTI_raw_ceiled": np.asarray(R_VTI_raw_segment_ceiled), + } + + # Artifacts can store non-metric outputs (strings, paths, etc.). + + return ProcessResult(metrics=metrics) diff --git a/src/pipelines/static_example.py b/src/pipelines/static_example.py index 72d2806..aa60526 100644 --- a/src/pipelines/static_example.py +++ b/src/pipelines/static_example.py @@ -1,30 +1,40 @@ import numpy as np -from .core.base import ProcessPipeline, ProcessResult, with_attrs +from .core.base import ProcessPipeline, ProcessResult, registerPipeline, with_attrs +@registerPipeline(name="Static Example") class StaticExample(ProcessPipeline): """ Tutorial pipeline showing the full surface area of a pipeline: - Subclass ProcessPipeline and implement `run(self, h5file) -> ProcessResult`. - - Return metrics (scalars, vectors, matrices, cubes) and optional artifacts. + - Return metrics (scalars, vectors, matrices, cubes). + - Use "/" inside metric keys to create nested groups in output HDF5. - Attach HDF5 attributes to any metric via `with_attrs(data, attrs_dict)`. - - Add attributes to the pipeline group (`attrs`) or root file (`file_attrs`). + - Add attributes to the pipeline group (`attrs`). - No input data is required; this pipeline is purely illustrative. """ - description = "Tutorial: metrics + artifacts + dataset attrs + file/pipeline attrs." + description = "Tutorial: metrics + nested output groups + attrs." - def run(self, _h5file) -> ProcessResult: - # Metrics are the main numerical outputs; each key becomes a dataset under /pipelines//metrics. + def run(self, h5file) -> ProcessResult: + # Each key becomes a dataset under /Pipelines//. + # Keys with "/" automatically create sub-groups. metrics = { "scalar_example": 42.0, "vector_example": [1.0, 2.0, 3.0], + "subfolder/metrics_1": 2.5, + "subfolder/metrics_2": 3.1, # Attach dataset-level attributes (min/max/name/unit) using with_attrs. - "matrix_example": with_attrs( + "aggregates/matrix_example": with_attrs( [[1, 2], [3, 4]], - {"minimum": [1], "maximum": [4], "nameID": ["matrix_example"], "unit": ["a.u."]}, + { + "minimum": [1], + "maximum": [4], + "nameID": ["matrix_example"], + "unit": ["a.u."], + }, ), "cube_example": with_attrs( np.arange(8).reshape(2, 2, 2), @@ -32,11 +42,11 @@ def run(self, _h5file) -> ProcessResult: ), } - # Artifacts can store non-metric outputs (strings, paths, etc.). - artifacts = {"note": "Static data for demonstration"} - - # Optional attributes applied to the pipeline group and the root file. - attrs = {"pipeline_version": "1.0", "author": "StaticExample"} - file_attrs = {"example_generated": True} + # Optional attributes applied to the pipeline group. + attrs = { + "pipeline_version": "1.0", + "author": "StaticExample", + "example_generated": True, + } - return ProcessResult(metrics=metrics, artifacts=artifacts, attrs=attrs, file_attrs=file_attrs) + return ProcessResult(metrics=metrics, attrs=attrs) diff --git a/src/pipelines/tauh_n10.py b/src/pipelines/tauh_n10.py index 4484207..dd59e92 100644 --- a/src/pipelines/tauh_n10.py +++ b/src/pipelines/tauh_n10.py @@ -1,11 +1,10 @@ import math from dataclasses import dataclass -from typing import Dict import h5py import numpy as np -from .core.base import ProcessPipeline, ProcessResult +from .core.base import ProcessPipeline, ProcessResult, registerPipeline @dataclass @@ -28,6 +27,7 @@ def _freq_unit(h5file: h5py.File, path: str) -> str: return "hz" +@registerPipeline(name="TauhN10") class TauhN10(ProcessPipeline): """ Acquisition-level τ|H|,10 using synthetic spectral amplitudes. @@ -39,16 +39,15 @@ class TauhN10(ProcessPipeline): synthesis_points = 2048 # samples over one cardiac period for V_max estimation def run(self, h5file: h5py.File) -> ProcessResult: - metrics: Dict[str, float] = {} - artifacts: Dict[str, float] = {} + metrics: dict[str, float] = {} for vessel in ("Artery", "Vein"): vessel_result = self._compute_for_vessel(h5file, vessel) prefix = vessel.lower() metrics[f"{prefix}_tauH_{self.harmonic_index}"] = vessel_result.tau metrics[f"{prefix}_X_abs_{self.harmonic_index}"] = vessel_result.x_abs - artifacts[f"{prefix}_vmax"] = vessel_result.vmax - artifacts[f"{prefix}_freq_hz_{self.harmonic_index}"] = vessel_result.freq_hz - return ProcessResult(metrics=metrics, artifacts=artifacts) + metrics[f"{prefix}_vmax"] = vessel_result.vmax + metrics[f"{prefix}_freq_hz_{self.harmonic_index}"] = vessel_result.freq_hz + return ProcessResult(metrics=metrics) def _compute_for_vessel(self, h5file: h5py.File, vessel: str) -> TauHResult: spectral_prefix = "Arterial" if vessel.lower().startswith("arter") else "Venous" @@ -78,12 +77,16 @@ def _compute_for_vessel(self, h5file: h5py.File, vessel: str) -> TauHResult: freq_n_hz = freq_n_raw if is_hz else freq_n_raw / (2 * math.pi) if fundamental_hz <= 0: - raise ValueError(f"Invalid fundamental frequency for {vessel}: {fundamental_hz}") + raise ValueError( + f"Invalid fundamental frequency for {vessel}: {fundamental_hz}" + ) # Reconstruct the band-limited waveform (0..n) to get Vmax. vmax = self._estimate_vmax(amplitudes, phases, freqs, is_hz, n) if not np.isfinite(vmax) or vmax <= 0: - return TauHResult(tau=math.nan, x_abs=math.nan, vmax=float(vmax), freq_hz=freq_n_hz) + return TauHResult( + tau=math.nan, x_abs=math.nan, vmax=float(vmax), freq_hz=freq_n_hz + ) v_n = amplitudes[n] x_abs = float(abs(v_n) / vmax) @@ -97,7 +100,9 @@ def _compute_for_vessel(self, h5file: h5py.File, vessel: str) -> TauHResult: tau = math.nan else: tau = float(math.sqrt(denom) / omega_n) - return TauHResult(tau=tau, x_abs=x_abs, vmax=float(vmax), freq_hz=float(freq_n_hz)) + return TauHResult( + tau=tau, x_abs=x_abs, vmax=float(vmax), freq_hz=float(freq_n_hz) + ) def _estimate_vmax( self, @@ -112,7 +117,13 @@ def _estimate_vmax( if fundamental_hz <= 0: return math.nan omega_factor = 2 * math.pi if is_hz else 1.0 - t = np.linspace(0.0, 1.0 / fundamental_hz, num=self.synthesis_points, endpoint=False, dtype=np.float64) + t = np.linspace( + 0.0, + 1.0 / fundamental_hz, + num=self.synthesis_points, + endpoint=False, + dtype=np.float64, + ) waveform = np.full_like(t, fill_value=amplitudes[0], dtype=np.float64) for k in range(1, n_max + 1): # cosine synthesis over one cardiac period diff --git a/src/pipelines/tauh_n10_per_beat.py b/src/pipelines/tauh_n10_per_beat.py index 8b7cbeb..42c9e61 100644 --- a/src/pipelines/tauh_n10_per_beat.py +++ b/src/pipelines/tauh_n10_per_beat.py @@ -1,13 +1,13 @@ import math -from typing import Dict, List, Tuple import h5py import numpy as np -from .core.base import ProcessPipeline, ProcessResult +from .core.base import ProcessPipeline, ProcessResult, registerPipeline from .tauh_n10 import _freq_unit +@registerPipeline(name="TauhN10PerBeat") class TauhN10PerBeat(ProcessPipeline): """ Per-beat τ|H|,10 using per-beat FFT amplitudes and VmaxPerBeatBandLimited. @@ -18,21 +18,19 @@ class TauhN10PerBeat(ProcessPipeline): harmonic_index = 10 def run(self, h5file: h5py.File) -> ProcessResult: - metrics: Dict[str, float] = {} - artifacts: Dict[str, float] = {} + metrics: dict[str, float] = {} for vessel in ("Artery", "Vein"): - vessel_metrics, vessel_artifacts = self._compute_per_beat(h5file, vessel) + vessel_metrics = self._compute_per_beat(h5file, vessel) metrics.update(vessel_metrics) - artifacts.update(vessel_artifacts) - return ProcessResult(metrics=metrics, artifacts=artifacts) + return ProcessResult(metrics=metrics) - def _compute_per_beat(self, h5file: h5py.File, vessel: str) -> Tuple[Dict[str, float], Dict[str, float]]: + def _compute_per_beat(self, h5file: h5py.File, vessel: str) -> dict[str, float]: n = self.harmonic_index prefix = vessel.lower() # Per-beat FFT amplitudes/phases and per-beat Vmax for the band-limited signal. - amp_path = f"{vessel}/PerBeat/VelocitySignalPerBeatFFT_abs/value" - phase_path = f"{vessel}/PerBeat/VelocitySignalPerBeatFFT_arg/value" - vmax_path = f"{vessel}/PerBeat/VmaxPerBeatBandLimited/value" + amp_path = f"{vessel}/VelocityPerBeat/VelocitySignalPerBeatFFT_abs/value" + phase_path = f"{vessel}/VelocityPerBeat/VelocitySignalPerBeatFFT_arg/value" + vmax_path = f"{vessel}/VelocityPerBeat/VmaxPerBeatBandLimited/value" freq_path = ( f"{vessel}/Velocity/WaveformAnalysis/syntheticSpectralAnalysis/" f"{'Arterial' if vessel.lower().startswith('arter') else 'Venous'}PeakFrequencies/value" @@ -44,14 +42,22 @@ def _compute_per_beat(self, h5file: h5py.File, vessel: str) -> Tuple[Dict[str, f vmax = np.asarray(h5file[vmax_path]).astype(np.float64) freqs = np.asarray(h5file[freq_path]).astype(np.float64).ravel() except KeyError as exc: # noqa: BLE001 - raise ValueError(f"Missing per-beat spectral data for {vessel}: {exc}") from exc + raise ValueError( + f"Missing per-beat spectral data for {vessel}: {exc}" + ) from exc if amps.shape[0] <= n or phases.shape[0] <= n: - raise ValueError(f"Not enough harmonics in per-beat FFT for {vessel}: need index {n}") + raise ValueError( + f"Not enough harmonics in per-beat FFT for {vessel}: need index {n}" + ) if freqs.shape[0] <= n: - raise ValueError(f"Not enough frequency samples for {vessel}: need index {n}") + raise ValueError( + f"Not enough frequency samples for {vessel}: need index {n}" + ) if vmax.ndim != 2 or vmax.shape[1] != amps.shape[1]: - raise ValueError(f"Mismatch in beat count for {vessel}: vmax {vmax.shape}, amps {amps.shape}") + raise ValueError( + f"Mismatch in beat count for {vessel}: vmax {vmax.shape}, amps {amps.shape}" + ) # Frequency handling mirrors the acquisition-level pipeline. freq_unit = _freq_unit(h5file, freq_path) @@ -60,9 +66,9 @@ def _compute_per_beat(self, h5file: h5py.File, vessel: str) -> Tuple[Dict[str, f freq_n_hz = freq_n_raw if is_hz else freq_n_raw / (2 * math.pi) omega_n = (2 * math.pi * freq_n_raw) if is_hz else freq_n_raw - tau_values: List[float] = [] - x_values: List[float] = [] - vmax_values: List[float] = [] + tau_values: list[float] = [] + x_values: list[float] = [] + vmax_values: list[float] = [] beat_count = amps.shape[1] for beat_idx in range(beat_count): v_max = float(vmax[0, beat_idx]) @@ -70,18 +76,29 @@ def _compute_per_beat(self, h5file: h5py.File, vessel: str) -> Tuple[Dict[str, f v_n = float(amps[n, beat_idx]) x_abs = math.nan if v_max <= 0 else abs(v_n) / v_max x_values.append(x_abs) - if v_max <= 0 or not np.isfinite(x_abs) or x_abs <= 0 or x_abs > 1 or omega_n <= 0: + if ( + v_max <= 0 + or not np.isfinite(x_abs) + or x_abs <= 0 + or x_abs > 1 + or omega_n <= 0 + ): tau_values.append(math.nan) continue denom = (1.0 / (x_abs * x_abs)) - 1.0 - tau_values.append(float(math.sqrt(denom) / omega_n) if denom > 0 else math.nan) + tau_values.append( + float(math.sqrt(denom) / omega_n) if denom > 0 else math.nan + ) - metrics: Dict[str, float] = {} - artifacts: Dict[str, float] = {f"{prefix}_freq_hz_{n}": freq_n_hz} + metrics: dict[str, float] = {f"{prefix}_freq_hz_{n}": freq_n_hz} for i, tau in enumerate(tau_values): metrics[f"{prefix}_tauH_{n}_beat{i}"] = tau - artifacts[f"{prefix}_vmax_beat{i}"] = vmax_values[i] - artifacts[f"{prefix}_X_abs_{n}_beat{i}"] = x_values[i] - metrics[f"{prefix}_tauH_{n}_median"] = float(np.nanmedian(tau_values)) if tau_values else math.nan - metrics[f"{prefix}_tauH_{n}_mean"] = float(np.nanmean(tau_values)) if tau_values else math.nan - return metrics, artifacts + metrics[f"{prefix}_vmax_beat{i}"] = vmax_values[i] + metrics[f"{prefix}_X_abs_{n}_beat{i}"] = x_values[i] + metrics[f"{prefix}_tauH_{n}_median"] = ( + float(np.nanmedian(tau_values)) if tau_values else math.nan + ) + metrics[f"{prefix}_tauH_{n}_mean"] = ( + float(np.nanmean(tau_values)) if tau_values else math.nan + ) + return metrics diff --git a/src/pipelines/velocity_comparison.py b/src/pipelines/velocity_comparison.py index 9c0094f..c16cfb2 100644 --- a/src/pipelines/velocity_comparison.py +++ b/src/pipelines/velocity_comparison.py @@ -1,9 +1,10 @@ import h5py import numpy as np -from .core.base import ProcessPipeline, ProcessResult +from .core.base import ProcessPipeline, ProcessResult, registerPipeline +@registerPipeline(name="VelocityComparison") class VelocityComparison(ProcessPipeline): description = ( "Mean of /Artery/CrossSections/velocity_whole_seg_mean and " diff --git a/scripts/gen_optional_reqs.py b/src/scripts/gen_optional_reqs.py similarity index 84% rename from scripts/gen_optional_reqs.py rename to src/scripts/gen_optional_reqs.py index 9686529..cdbebc5 100644 --- a/scripts/gen_optional_reqs.py +++ b/src/scripts/gen_optional_reqs.py @@ -5,18 +5,18 @@ Output: AngioEye/pipelines/requirements-optional.txt Usage: python scripts/gen_optional_reqs.py """ + from __future__ import annotations import ast from pathlib import Path -from typing import List, Set PROJECT_ROOT = Path(__file__).resolve().parents[1] PIPELINES_DIR = PROJECT_ROOT / "AngioEye" / "pipelines" OUTPUT_PATH = PIPELINES_DIR / "requirements-optional.txt" -def parse_requires(path: Path) -> List[str]: +def parse_requires(path: Path) -> list[str]: try: tree = ast.parse(path.read_text(encoding="utf-8"), filename=str(path)) except OSError: @@ -28,14 +28,16 @@ def parse_requires(path: Path) -> List[str]: if isinstance(node.value, (ast.List, ast.Tuple)): vals = [] for elt in node.value.elts: - if isinstance(elt, ast.Constant) and isinstance(elt.value, str): + if isinstance(elt, ast.Constant) and isinstance( + elt.value, str + ): vals.append(elt.value) return vals return [] def main() -> None: - requirements: Set[str] = set() + requirements: set[str] = set() for path in PIPELINES_DIR.glob("*.py"): if path.name.startswith("_") or path.stem == "core": continue @@ -44,7 +46,9 @@ def main() -> None: OUTPUT_PATH.parent.mkdir(parents=True, exist_ok=True) sorted_reqs = sorted(requirements) - OUTPUT_PATH.write_text("\n".join(sorted_reqs) + ("\n" if sorted_reqs else ""), encoding="utf-8") + OUTPUT_PATH.write_text( + "\n".join(sorted_reqs) + ("\n" if sorted_reqs else ""), encoding="utf-8" + ) print(f"Wrote {len(sorted_reqs)} optional requirement(s) to {OUTPUT_PATH}") diff --git a/src/scripts/ruff_linter.py b/src/scripts/ruff_linter.py new file mode 100644 index 0000000..55bb1a2 --- /dev/null +++ b/src/scripts/ruff_linter.py @@ -0,0 +1,43 @@ +import argparse +import subprocess +import sys + + +def run_ruff(fix=False): + """Runs Ruff check and format.""" + + check_cmd = ["ruff", "check", "."] + format_cmd = ["ruff", "format", "."] + + if fix: + print("Applying auto-fixes...") + check_cmd.append("--fix") + else: + print("Checking code...") + format_cmd.insert(2, "--check") # adds --check to 'ruff format .' + + try: + res_check = subprocess.run(check_cmd) + res_format = subprocess.run(format_cmd) + + if res_check.returncode != 0 or res_format.returncode != 0: + print("\nErrors found. Run the script with --fix to resolve style issues.") + sys.exit(1) + + print("\n\033[92mCode looks great!\033[0m") + sys.exit(0) + + except FileNotFoundError: + print("Error: 'ruff' not found. Please run: pip install ruff") + sys.exit(1) + + +def main(): + parser = argparse.ArgumentParser(description="AngioEye Linting Tool") + parser.add_argument("--fix", action="store_true", help="Automatically fix issues") + args = parser.parse_args() + run_ruff(fix=args.fix) + + +if __name__ == "__main__": + main()