From 4c4a80b203050f31d8dd284463203227c31807ac Mon Sep 17 00:00:00 2001 From: Thang Pham Date: Tue, 17 Mar 2026 16:32:04 -0500 Subject: [PATCH] Add plot module with isotherm, mixture, and selectivity plotting Add a new matkit.plot module for visualizing simulation results: - parsers.py: auto-detect JSON format (single-component pressure isotherms vs mixture RH isotherms), auto-discover adsorbate names from data keys, normalize string/float values, handle NaN/Inf - isotherm.py: plot_single_isotherm, plot_mixture_isotherm, plot_selectivity, and unified plot_isotherm with auto-detection - Support multi-file overlay for comparing MOFs or temperatures, with auto-generated temperature labels (e.g. '273 K') - --data-dir option to load all JSON files from a directory - Pretty LaTeX labels for common adsorbates (CO2, N2, H2O, etc.) - CLI: matkit plot isotherm / matkit plot selectivity with options for DPI, figsize, log scale, error bars, fonts, custom labels - matplotlib is an optional dependency: pip install matkit[plot] - 72 new tests covering parsers, plotting, and CLI commands --- pyproject.toml | 3 +- src/matkit/cli.py | 281 +++++++++++ src/matkit/plot/__init__.py | 27 ++ src/matkit/plot/isotherm.py | 458 ++++++++++++++++++ src/matkit/plot/parsers.py | 311 ++++++++++++ tests/data/mixture_isotherm.json | 32 ++ tests/data/single_isotherm.json | 52 ++ tests/test_plot.py | 795 +++++++++++++++++++++++++++++++ 8 files changed, 1958 insertions(+), 1 deletion(-) create mode 100644 src/matkit/plot/__init__.py create mode 100644 src/matkit/plot/isotherm.py create mode 100644 src/matkit/plot/parsers.py create mode 100644 tests/data/mixture_isotherm.json create mode 100644 tests/data/single_isotherm.json create mode 100644 tests/test_plot.py diff --git a/pyproject.toml b/pyproject.toml index 9bb3f24..7dadbe0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,7 +18,8 @@ dependencies = [ [project.optional-dependencies] rdkit = ["rdkit"] mlip = ["mace-torch"] -all = ["rdkit", "mace-torch"] +plot = ["matplotlib>=3.5"] +all = ["rdkit", "mace-torch", "matplotlib>=3.5"] dev = ["pytest>=7.0", "ruff>=0.4"] [project.scripts] diff --git a/src/matkit/cli.py b/src/matkit/cli.py index 9c63f49..7976e72 100644 --- a/src/matkit/cli.py +++ b/src/matkit/cli.py @@ -259,5 +259,286 @@ def tobacco_create(smiles, site, out): click.echo(f"Error creating linker: {e}", err=True) +# ========================================== +# PLOT COMMANDS +# ========================================== +@main.group("plot") +def plot_cli(): + """Commands for plotting simulation data.""" + pass + + +@plot_cli.command("isotherm") +@click.option( + "--data", + multiple=True, + type=click.Path(exists=True), + help="Path to isotherm JSON file. Can be specified multiple " + "times to overlay plots.", +) +@click.option( + "--data-dir", + default=None, + type=click.Path(exists=True, file_okay=False), + help="Directory containing isotherm JSON files. " + "All *.json files are loaded and overlaid.", +) +@click.option( + "--output", + default=None, + type=click.Path(), + help="Output image file path (default: isotherm_plot.png " + "or mixture_isotherm_plot.png).", +) +@click.option("--dpi", default=600, help="Image resolution in DPI.") +@click.option( + "--figsize", + nargs=2, + type=float, + default=(8, 6), + help="Figure size as WIDTH HEIGHT in inches.", +) +@click.option( + "--adsorbate", + multiple=True, + help="Adsorbate(s) to include in mixture plots. " + "Can be specified multiple times. " + "Omit to plot all discovered adsorbates.", +) +@click.option( + "--label", + multiple=True, + help="Legend label for each --data file. Can be specified multiple times.", +) +@click.option("--xlabel", default=None, help="Custom x-axis label.") +@click.option("--ylabel", default=None, help="Custom y-axis label.") +@click.option("--title", default=None, help="Plot title.") +@click.option("--log-x", is_flag=True, help="Use logarithmic x-axis.") +@click.option("--log-y", is_flag=True, help="Use logarithmic y-axis.") +@click.option( + "--no-errorbars", + is_flag=True, + help="Omit error bars from the plot.", +) +@click.option( + "--fontsize-label", + default=24, + help="Font size for axis labels.", +) +@click.option( + "--fontsize-tick", + default=16, + help="Font size for tick labels.", +) +@click.option( + "--fontsize-legend", + default=16, + help="Font size for legend text.", +) +def plot_isotherm_cmd( + data, + data_dir, + output, + dpi, + figsize, + adsorbate, + label, + xlabel, + ylabel, + title, + log_x, + log_y, + no_errorbars, + fontsize_label, + fontsize_tick, + fontsize_legend, +): + """Plot isotherms from simulation JSON data. + + Auto-detects the data format (single-component pressure + isotherm or mixture RH isotherm) and generates the + appropriate plot. Multiple --data files or a --data-dir + can be specified to overlay isotherms for comparison. + + \b + Examples: + matkit plot isotherm --data CO2_isotherm_298K.json + matkit plot isotherm --data-dir results/ + matkit plot isotherm --data r1.json --data r2.json + matkit plot isotherm --data mixture.json --adsorbate co2 + matkit plot isotherm --data CO2.json --log-x + """ + try: + from matkit.plot.parsers import ( + collect_data_files, + load_isotherm, + ) + + files = collect_data_files(data=data, data_dir=data_dir) + + # Detect format from first file to route correctly + parsed = load_isotherm(files[0]) + fmt = parsed["format"] + + common_kwargs = dict( + output=output, + dpi=dpi, + figsize=figsize, + xlabel=xlabel, + ylabel=ylabel, + title=title, + labels=list(label) if label else None, + log_x=log_x, + log_y=log_y, + no_errorbars=no_errorbars, + fontsize_label=fontsize_label, + fontsize_tick=fontsize_tick, + fontsize_legend=fontsize_legend, + ) + + if fmt == "mixture_rh": + from matkit.plot.isotherm import ( + plot_mixture_isotherm, + ) + + common_kwargs["adsorbates"] = list(adsorbate) if adsorbate else None + if output is None: + common_kwargs["output"] = "mixture_isotherm_plot.png" + out = plot_mixture_isotherm(files, **common_kwargs) + else: + from matkit.plot.isotherm import ( + plot_single_isotherm, + ) + + if output is None: + common_kwargs["output"] = "isotherm_plot.png" + out = plot_single_isotherm(files, **common_kwargs) + + click.echo(f"Plot saved to {out}") + + except ImportError: + click.echo( + "Error: matplotlib is required for plotting. " + "Install with: pip install matkit[plot]", + err=True, + ) + except Exception as e: + click.echo(f"Error: {e}", err=True) + + +@plot_cli.command("selectivity") +@click.option( + "--data", + required=True, + multiple=True, + type=click.Path(exists=True), + help="Path to mixture isotherm JSON file. Can be specified multiple times.", +) +@click.option( + "--output", + default="selectivity_plot.png", + type=click.Path(), + help="Output image file path.", +) +@click.option("--dpi", default=600, help="Image resolution in DPI.") +@click.option( + "--figsize", + nargs=2, + type=float, + default=(8, 6), + help="Figure size as WIDTH HEIGHT in inches.", +) +@click.option( + "--selectivity-key", + multiple=True, + help="Selectivity field(s) to plot (e.g. co2_n2_selectivity). " + "Omit to plot all discovered selectivity fields.", +) +@click.option( + "--label", + multiple=True, + help="Legend label for each --data file.", +) +@click.option("--xlabel", default=None, help="Custom x-axis label.") +@click.option("--ylabel", default=None, help="Custom y-axis label.") +@click.option("--title", default=None, help="Plot title.") +@click.option("--log-x", is_flag=True, help="Use logarithmic x-axis.") +@click.option("--log-y", is_flag=True, help="Use logarithmic y-axis.") +@click.option( + "--fontsize-label", + default=24, + help="Font size for axis labels.", +) +@click.option( + "--fontsize-tick", + default=16, + help="Font size for tick labels.", +) +@click.option( + "--fontsize-legend", + default=16, + help="Font size for legend text.", +) +def plot_selectivity_cmd( + data, + output, + dpi, + figsize, + selectivity_key, + label, + xlabel, + ylabel, + title, + log_x, + log_y, + fontsize_label, + fontsize_tick, + fontsize_legend, +): + """Plot selectivity vs relative humidity from mixture data. + + Selectivity keys are auto-discovered from the JSON data + (e.g. co2_n2_selectivity). Multiple --data files can be + overlaid for comparison. + + \b + Examples: + matkit plot selectivity --data mixture.json + matkit plot selectivity --data m1.json --data m2.json + matkit plot selectivity --data mixture.json --log-y + """ + try: + from matkit.plot.isotherm import plot_selectivity + + out = plot_selectivity( + data_files=list(data), + output=output, + dpi=dpi, + figsize=figsize, + selectivity_keys=( + list(selectivity_key) if selectivity_key else None + ), + xlabel=xlabel, + ylabel=ylabel, + title=title, + labels=list(label) if label else None, + log_x=log_x, + log_y=log_y, + fontsize_label=fontsize_label, + fontsize_tick=fontsize_tick, + fontsize_legend=fontsize_legend, + ) + click.echo(f"Plot saved to {out}") + + except ImportError: + click.echo( + "Error: matplotlib is required for plotting. " + "Install with: pip install matkit[plot]", + err=True, + ) + except Exception as e: + click.echo(f"Error: {e}", err=True) + + if __name__ == "__main__": main() diff --git a/src/matkit/plot/__init__.py b/src/matkit/plot/__init__.py new file mode 100644 index 0000000..468a4b3 --- /dev/null +++ b/src/matkit/plot/__init__.py @@ -0,0 +1,27 @@ +"""Plotting utilities for isotherm and simulation data.""" + +from matkit.plot.isotherm import ( + plot_isotherm, + plot_mixture_isotherm, + plot_selectivity, + plot_single_isotherm, +) +from matkit.plot.parsers import ( + collect_data_files, + detect_format, + load_isotherm, + parse_mixture_isotherm, + parse_single_isotherm, +) + +__all__ = [ + "collect_data_files", + "detect_format", + "load_isotherm", + "parse_mixture_isotherm", + "parse_single_isotherm", + "plot_isotherm", + "plot_mixture_isotherm", + "plot_selectivity", + "plot_single_isotherm", +] diff --git a/src/matkit/plot/isotherm.py b/src/matkit/plot/isotherm.py new file mode 100644 index 0000000..8f97b08 --- /dev/null +++ b/src/matkit/plot/isotherm.py @@ -0,0 +1,458 @@ +"""Isotherm plotting functions. + +Provides functions for plotting single-component isotherms +(uptake vs pressure), mixture isotherms (uptake vs relative +humidity), and selectivity plots from gRASPA / RASPA simulation +output JSON files. + +All functions accept one or more data files for overlay +comparison and offer extensive customisation of labels, scales, +figure size, and output format. +""" + +from pathlib import Path + +from matkit.plot.parsers import load_isotherm + +# Mapping of lowercase adsorbate names to pretty LaTeX-style +# labels used by matplotlib's mathtext renderer. +_PRETTY_LABELS = { + "co2": r"CO$_2$", + "n2": r"N$_2$", + "h2o": r"H$_2$O", + "h2": r"H$_2$", + "ch4": r"CH$_4$", + "so2": r"SO$_2$", + "o2": r"O$_2$", + "ar": "Ar", + "he": "He", + "kr": "Kr", + "xe": "Xe", + "nh3": r"NH$_3$", + "h2s": r"H$_2$S", + "no2": r"NO$_2$", + "co": "CO", +} + +# Default marker cycle for multi-series plots +_MARKERS = ["o", "s", "^", "D", "v", "<", ">", "p", "h", "*"] + + +def _pretty(name: str) -> str: + """Return a publication-quality label for an adsorbate name.""" + return _PRETTY_LABELS.get(name.lower(), name) + + +def _get_matplotlib(): + """Import matplotlib, raising a helpful error if missing.""" + try: + import matplotlib + + matplotlib.use("Agg") # non-interactive backend + import matplotlib.pyplot as plt + + return plt + except ImportError: + raise ImportError( + "matplotlib is required for plotting. " + "Install it with: pip install matkit[plot]" + ) + + +def plot_single_isotherm( + data_files: list[str], + output: str = "isotherm_plot.png", + dpi: int = 600, + figsize: tuple[float, float] = (8, 6), + xlabel: str | None = None, + ylabel: str | None = None, + title: str | None = None, + labels: list[str] | None = None, + log_x: bool = False, + log_y: bool = False, + no_errorbars: bool = False, + fontsize_label: int = 24, + fontsize_tick: int = 16, + fontsize_legend: int = 16, +) -> str: + """Plot single-component isotherms (uptake vs pressure). + + Supports overlaying multiple data files on the same axes for + comparison (e.g. different MOFs at the same conditions). + + Args: + data_files: List of paths to isotherm JSON files. + output: Output image file path. + dpi: Image resolution in dots per inch. + figsize: Figure dimensions ``(width, height)`` in inches. + xlabel: Custom x-axis label. Defaults to + ``"Pressure ()"``. + ylabel: Custom y-axis label. Defaults to + ``"Uptake ()"``. + title: Plot title. ``None`` for no title. + labels: Legend labels, one per data file. Defaults to + filenames. + log_x: Use logarithmic x-axis. + log_y: Use logarithmic y-axis. + no_errorbars: If ``True``, omit error bars. + fontsize_label: Font size for axis labels. + fontsize_tick: Font size for tick labels. + fontsize_legend: Font size for legend entries. + + Returns: + The absolute path to the saved plot image. + + Raises: + ImportError: If matplotlib is not installed. + FileNotFoundError: If any data file is missing. + ValueError: If a data file is not single-component format. + """ + plt = _get_matplotlib() + + fig, ax = plt.subplots(figsize=figsize) + + # Pre-parse all files so we can auto-generate labels + parsed_list: list[dict] = [] + for fpath in data_files: + parsed = load_isotherm(fpath) + if parsed["format"] != "single": + raise ValueError( + f"Expected single-component format, got " + f"'{parsed['format']}' in {fpath}" + ) + parsed_list.append(parsed) + + # Auto-generate labels from temperature when the user + # did not supply custom labels and multiple files are + # loaded (typical use-case: same adsorbate at different T). + auto_labels = labels + if not labels and len(data_files) > 1: + temps = [p.get("temperature") for p in parsed_list] + if all(t is not None for t in temps): + auto_labels = [ + f"{int(t)} K" if t == int(t) else f"{t} K" for t in temps + ] + + for i, (fpath, parsed) in enumerate(zip(data_files, parsed_list)): + label = ( + auto_labels[i] + if auto_labels and i < len(auto_labels) + else Path(fpath).stem + ) + marker = _MARKERS[i % len(_MARKERS)] + + if no_errorbars: + ax.plot( + parsed["pressures"], + parsed["uptakes"], + f"{marker}-", + label=label, + ) + else: + ax.errorbar( + parsed["pressures"], + parsed["uptakes"], + yerr=parsed["errors"], + fmt=f"{marker}-", + label=label, + capsize=5, + ) + + # Axis labels (use last parsed for metadata) + pressure_unit = parsed_list[-1]["pressure_unit"] + uptake_unit = parsed_list[-1].get("unit", "mol/kg") + ax.set_xlabel( + xlabel or f"Pressure ({pressure_unit})", + fontsize=fontsize_label, + ) + ax.set_ylabel( + ylabel or f"Uptake ({uptake_unit})", + fontsize=fontsize_label, + ) + + if log_x: + ax.set_xscale("log") + if log_y: + ax.set_yscale("log") + + if title: + ax.set_title(title, fontsize=fontsize_label) + + ax.tick_params(labelsize=fontsize_tick) + ax.legend(fontsize=fontsize_legend) + ax.grid(True, linestyle="--", alpha=0.6) + + fig.tight_layout() + fig.savefig(output, dpi=dpi) + plt.close(fig) + + return str(Path(output).resolve()) + + +def plot_mixture_isotherm( + data_files: list[str], + output: str = "mixture_isotherm_plot.png", + dpi: int = 600, + figsize: tuple[float, float] = (8, 6), + adsorbates: list[str] | None = None, + xlabel: str | None = None, + ylabel: str | None = None, + title: str | None = None, + labels: list[str] | None = None, + log_x: bool = False, + log_y: bool = False, + no_errorbars: bool = False, + fontsize_label: int = 24, + fontsize_tick: int = 16, + fontsize_legend: int = 16, +) -> str: + """Plot mixture isotherms (uptake vs relative humidity). + + Adsorbate names are auto-discovered from the JSON keys. + Multiple files can be overlaid; when overlaying, legend + entries are prefixed with the file label. + + Args: + data_files: List of paths to mixture isotherm JSON files. + output: Output image file path. + dpi: Image resolution in dots per inch. + figsize: Figure dimensions ``(width, height)`` in inches. + adsorbates: Subset of adsorbate names to plot. ``None`` + plots all discovered adsorbates. + xlabel: Custom x-axis label. Defaults to + ``"Relative Humidity (%)"``. + ylabel: Custom y-axis label. Defaults to + ``"Uptake (mol/kg)"``. + title: Plot title. + labels: Legend labels, one per data file. + log_x: Use logarithmic x-axis. + log_y: Use logarithmic y-axis. + no_errorbars: If ``True``, omit error bars. + fontsize_label: Font size for axis labels. + fontsize_tick: Font size for tick labels. + fontsize_legend: Font size for legend entries. + + Returns: + The absolute path to the saved plot image. + """ + plt = _get_matplotlib() + + fig, ax = plt.subplots(figsize=figsize) + multi_file = len(data_files) > 1 + + marker_idx = 0 + for i, fpath in enumerate(data_files): + parsed = load_isotherm(fpath) + if parsed["format"] != "mixture_rh": + raise ValueError( + f"Expected mixture_rh format, got " + f"'{parsed['format']}' in {fpath}" + ) + + file_label = ( + labels[i] if labels and i < len(labels) else Path(fpath).stem + ) + + ads_to_plot = adsorbates or parsed["adsorbates"] + for ads in ads_to_plot: + if ads not in parsed["uptakes"]: + continue + + pretty = _pretty(ads) + legend = f"{file_label} - {pretty}" if multi_file else pretty + marker = _MARKERS[marker_idx % len(_MARKERS)] + marker_idx += 1 + + if no_errorbars: + ax.plot( + parsed["rh_values"], + parsed["uptakes"][ads], + f"{marker}-", + label=legend, + ) + else: + ax.errorbar( + parsed["rh_values"], + parsed["uptakes"][ads], + yerr=parsed["errors"].get(ads), + fmt=f"{marker}-", + label=legend, + capsize=5, + ) + + ax.set_xlabel( + xlabel or "Relative Humidity (%)", + fontsize=fontsize_label, + ) + ax.set_ylabel( + ylabel or "Uptake (mol/kg)", + fontsize=fontsize_label, + ) + + if log_x: + ax.set_xscale("log") + if log_y: + ax.set_yscale("log") + + if title: + ax.set_title(title, fontsize=fontsize_label) + + ax.tick_params(labelsize=fontsize_tick) + ax.legend(fontsize=fontsize_legend) + ax.grid(True, linestyle="--", alpha=0.6) + + fig.tight_layout() + fig.savefig(output, dpi=dpi) + plt.close(fig) + + return str(Path(output).resolve()) + + +def plot_selectivity( + data_files: list[str], + output: str = "selectivity_plot.png", + dpi: int = 600, + figsize: tuple[float, float] = (8, 6), + selectivity_keys: list[str] | None = None, + xlabel: str | None = None, + ylabel: str | None = None, + title: str | None = None, + labels: list[str] | None = None, + log_x: bool = False, + log_y: bool = False, + fontsize_label: int = 24, + fontsize_tick: int = 16, + fontsize_legend: int = 16, +) -> str: + """Plot selectivity vs relative humidity from mixture data. + + Selectivity keys are auto-discovered from the JSON data + (e.g. ``co2_n2_selectivity``). + + Args: + data_files: List of paths to mixture isotherm JSON files. + output: Output image file path. + dpi: Image resolution in dots per inch. + figsize: Figure dimensions ``(width, height)`` in inches. + selectivity_keys: Specific selectivity fields to plot. + ``None`` plots all discovered selectivity fields. + xlabel: Custom x-axis label. + ylabel: Custom y-axis label. + title: Plot title. + labels: Legend labels, one per data file. + log_x: Use logarithmic x-axis. + log_y: Use logarithmic y-axis. + fontsize_label: Font size for axis labels. + fontsize_tick: Font size for tick labels. + fontsize_legend: Font size for legend entries. + + Returns: + The absolute path to the saved plot image. + """ + plt = _get_matplotlib() + + fig, ax = plt.subplots(figsize=figsize) + multi_file = len(data_files) > 1 + + marker_idx = 0 + for i, fpath in enumerate(data_files): + parsed = load_isotherm(fpath) + if parsed["format"] != "mixture_rh": + raise ValueError( + f"Expected mixture_rh format, got " + f"'{parsed['format']}' in {fpath}" + ) + + if not parsed["selectivity"]: + raise ValueError(f"No selectivity data found in {fpath}") + + file_label = ( + labels[i] if labels and i < len(labels) else Path(fpath).stem + ) + + keys_to_plot = selectivity_keys or list(parsed["selectivity"].keys()) + for sel_key in keys_to_plot: + if sel_key not in parsed["selectivity"]: + continue + + # Make a readable legend label from the key + # e.g. "co2_n2_selectivity" -> "CO2/N2" + parts = sel_key.replace("_selectivity", "").split("_") + pretty_sel = "/".join(_pretty(p) for p in parts) + legend = ( + f"{file_label} - {pretty_sel}" if multi_file else pretty_sel + ) + marker = _MARKERS[marker_idx % len(_MARKERS)] + marker_idx += 1 + + ax.plot( + parsed["rh_values"], + parsed["selectivity"][sel_key], + f"{marker}-", + label=legend, + ) + + ax.set_xlabel( + xlabel or "Relative Humidity (%)", + fontsize=fontsize_label, + ) + ax.set_ylabel( + ylabel or "Selectivity", + fontsize=fontsize_label, + ) + + if log_x: + ax.set_xscale("log") + if log_y: + ax.set_yscale("log") + + if title: + ax.set_title(title, fontsize=fontsize_label) + + ax.tick_params(labelsize=fontsize_tick) + ax.legend(fontsize=fontsize_legend) + ax.grid(True, linestyle="--", alpha=0.6) + + fig.tight_layout() + fig.savefig(output, dpi=dpi) + plt.close(fig) + + return str(Path(output).resolve()) + + +def plot_isotherm( + data_files: list[str], + output: str | None = None, + **kwargs, +) -> str: + """Auto-detect format and plot isotherms. + + Inspects the first data file to determine the format, then + dispatches to :func:`plot_single_isotherm` or + :func:`plot_mixture_isotherm`. + + Args: + data_files: List of paths to isotherm JSON files. + output: Output image path. Defaults depend on format. + **kwargs: Forwarded to the format-specific plot function. + + Returns: + The absolute path to the saved plot image. + """ + parsed = load_isotherm(data_files[0]) + fmt = parsed["format"] + + if fmt == "single": + return plot_single_isotherm( + data_files, + output=output or "isotherm_plot.png", + **kwargs, + ) + elif fmt == "mixture_rh": + return plot_mixture_isotherm( + data_files, + output=output or "mixture_isotherm_plot.png", + **kwargs, + ) + else: + raise ValueError(f"Unsupported format: {fmt}") diff --git a/src/matkit/plot/parsers.py b/src/matkit/plot/parsers.py new file mode 100644 index 0000000..8653a7b --- /dev/null +++ b/src/matkit/plot/parsers.py @@ -0,0 +1,311 @@ +"""Parsers for isotherm JSON data files. + +Supports two formats produced by gRASPA / RASPA simulations: + +1. **Single-component isotherms** -- keys like ``"0.1bar_298K"`` + or ``"314.2Pa_298K"``, each containing uptake/error for one + adsorbate at one pressure point. + +2. **Mixture isotherms (RH-based)** -- keys like ``"50_RH"``, + each containing ``_uptake`` / ``_error`` pairs + for every adsorbate in the mixture. +""" + +import json +import math +import re +from pathlib import Path + +# Regex patterns for key formats +_PRESSURE_BAR_RE = re.compile(r"^(?P[\d.]+)bar_(?P[\d.]+)K$") +_PRESSURE_PA_RE = re.compile(r"^(?P[\d.]+)Pa_(?P[\d.]+)K$") +_RH_RE = re.compile(r"^(?P\d+)_RH$") + + +def _safe_float(value): + """Convert a value to float, returning NaN for invalid values. + + Handles string representations of numbers, ``'-nan'``, + ``'-inf'``, ``'inf'``, and ``None``. + """ + if value is None: + return float("nan") + try: + result = float(value) + if math.isinf(result) or math.isnan(result): + return float("nan") + return result + except (ValueError, TypeError): + return float("nan") + + +def detect_format(data: dict) -> str: + """Detect the isotherm JSON format from its keys. + + Args: + data: Parsed JSON dictionary. + + Returns: + ``"single"`` for single-component pressure isotherms, + ``"mixture_rh"`` for relative-humidity mixture isotherms. + + Raises: + ValueError: If the format cannot be determined. + """ + if not data: + raise ValueError("Empty data dictionary") + + keys = list(data.keys()) + sample = keys[0] + + if _RH_RE.match(sample): + return "mixture_rh" + + if _PRESSURE_BAR_RE.match(sample) or _PRESSURE_PA_RE.match(sample): + return "single" + + raise ValueError( + f"Cannot detect isotherm format from key: '{sample}'. " + "Expected patterns like '0.1bar_298K', '314.2Pa_298K', " + "or '50_RH'." + ) + + +def parse_single_isotherm(data: dict) -> dict: + """Parse a single-component isotherm JSON dict. + + Args: + data: Dict with keys like ``"0.1bar_298K"`` mapping to + dicts with ``uptake``, ``error``, and optional ``qst`` + fields. + + Returns: + Dict with keys: + - ``format``: ``"single"`` + - ``pressure_unit``: ``"bar"`` or ``"Pa"`` + - ``pressures``: sorted list of float pressures + - ``uptakes``: list of float uptakes (sorted by pressure) + - ``errors``: list of float errors (sorted by pressure) + - ``unit``: uptake unit string (e.g. ``"mol/kg"``) + - ``temperature``: float temperature in K (from first key) + - ``qst``: list of float QST values (NaN if absent) + - ``qst_errors``: list of float QST errors (NaN if absent) + - ``qst_unit``: QST unit string or ``None`` + """ + entries = [] + pressure_unit = None + + for key, values in data.items(): + m_bar = _PRESSURE_BAR_RE.match(key) + m_pa = _PRESSURE_PA_RE.match(key) + + if m_bar: + pressure = float(m_bar.group("pressure")) + temperature = float(m_bar.group("temp")) + pressure_unit = pressure_unit or "bar" + elif m_pa: + pressure = float(m_pa.group("pressure")) + temperature = float(m_pa.group("temp")) + pressure_unit = pressure_unit or "Pa" + else: + continue # skip unrecognised keys + + entries.append( + { + "pressure": pressure, + "temperature": temperature, + "uptake": _safe_float(values.get("uptake")), + "error": _safe_float(values.get("error")), + "unit": values.get("unit", "mol/kg"), + "qst": _safe_float(values.get("qst")), + "qst_error": _safe_float(values.get("error_qst")), + "qst_unit": values.get("qst_unit"), + } + ) + + entries.sort(key=lambda e: e["pressure"]) + + return { + "format": "single", + "pressure_unit": pressure_unit or "bar", + "pressures": [e["pressure"] for e in entries], + "uptakes": [e["uptake"] for e in entries], + "errors": [e["error"] for e in entries], + "unit": entries[0]["unit"] if entries else "mol/kg", + "temperature": entries[0]["temperature"] if entries else None, + "qst": [e["qst"] for e in entries], + "qst_errors": [e["qst_error"] for e in entries], + "qst_unit": (entries[0]["qst_unit"] if entries else None), + } + + +def _discover_adsorbates(data: dict) -> list[str]: + """Discover adsorbate names from mixture isotherm keys. + + Looks for keys matching the pattern ``_uptake`` in the + first data point and returns a sorted list of names. + """ + first_key = next(iter(data)) + point = data[first_key] + + adsorbates = [] + for k in point: + if k.endswith("_uptake"): + name = k[: -len("_uptake")] + adsorbates.append(name) + + return sorted(adsorbates) + + +def _discover_selectivity_keys(data: dict) -> list[str]: + """Discover selectivity field names from mixture isotherm data. + + Looks for keys matching ``*_selectivity`` in the first point. + """ + first_key = next(iter(data)) + point = data[first_key] + return sorted(k for k in point if k.endswith("_selectivity")) + + +def parse_mixture_isotherm(data: dict) -> dict: + """Parse a mixture (RH-based) isotherm JSON dict. + + Adsorbate names are auto-discovered from the data keys + (e.g. ``co2_uptake`` → adsorbate ``"co2"``). + + Args: + data: Dict with keys like ``"50_RH"`` mapping to dicts + containing ``_uptake`` and ``_error`` + fields. + + Returns: + Dict with keys: + - ``format``: ``"mixture_rh"`` + - ``rh_values``: sorted list of int RH percentages + - ``adsorbates``: sorted list of adsorbate name strings + - ``uptakes``: dict mapping adsorbate name → list of floats + - ``errors``: dict mapping adsorbate name → list of floats + - ``selectivity``: dict mapping selectivity key → list of + floats (e.g. ``"co2_n2_selectivity"`` → ``[...]``) + """ + adsorbates = _discover_adsorbates(data) + selectivity_keys = _discover_selectivity_keys(data) + + entries = [] + for key, values in data.items(): + m = _RH_RE.match(key) + if not m: + continue + rh = int(m.group("rh")) + entries.append({"rh": rh, "values": values}) + + entries.sort(key=lambda e: e["rh"]) + + result = { + "format": "mixture_rh", + "rh_values": [e["rh"] for e in entries], + "adsorbates": adsorbates, + "uptakes": {}, + "errors": {}, + "selectivity": {}, + } + + for ads in adsorbates: + result["uptakes"][ads] = [ + _safe_float(e["values"].get(f"{ads}_uptake")) for e in entries + ] + result["errors"][ads] = [ + _safe_float(e["values"].get(f"{ads}_error")) for e in entries + ] + + for sel_key in selectivity_keys: + result["selectivity"][sel_key] = [ + _safe_float(e["values"].get(sel_key)) for e in entries + ] + + return result + + +def collect_data_files( + data: tuple[str, ...] | list[str] = (), + data_dir: str | None = None, + pattern: str = "*.json", +) -> list[str]: + """Build a sorted list of data file paths. + + Combines explicitly listed files with all files matching + *pattern* inside an optional directory. Files from the + directory are sorted alphabetically so the order is + deterministic. + + Args: + data: Explicit file paths. + data_dir: Optional directory to scan for JSON files. + pattern: Glob pattern for directory scan (default + ``"*.json"``). + + Returns: + Deduplicated list of resolved file path strings. + + Raises: + FileNotFoundError: If *data_dir* does not exist. + ValueError: If no files are found. + """ + paths: list[Path] = [] + + for p in data: + paths.append(Path(p).resolve()) + + if data_dir is not None: + d = Path(data_dir) + if not d.is_dir(): + raise FileNotFoundError(f"Directory not found: {data_dir}") + paths.extend(sorted(d.glob(pattern))) + + # Deduplicate while preserving order + seen: set[Path] = set() + unique: list[str] = [] + for p in paths: + if p not in seen: + seen.add(p) + unique.append(str(p)) + + if not unique: + raise ValueError( + "No data files found. Provide --data and/or --data-dir." + ) + + return unique + + +def load_isotherm(filepath: str) -> dict: + """Load and parse an isotherm JSON file. + + Auto-detects the format (single-component or mixture) and + returns a normalised data structure. + + Args: + filepath: Path to the JSON file. + + Returns: + Parsed isotherm dict (see :func:`parse_single_isotherm` + or :func:`parse_mixture_isotherm` for structure). + + Raises: + FileNotFoundError: If the file does not exist. + ValueError: If the format cannot be detected or parsed. + """ + path = Path(filepath) + if not path.exists(): + raise FileNotFoundError(f"File not found: {filepath}") + + with open(path, "r") as f: + data = json.load(f) + + fmt = detect_format(data) + if fmt == "single": + return parse_single_isotherm(data) + elif fmt == "mixture_rh": + return parse_mixture_isotherm(data) + else: + raise ValueError(f"Unsupported format: {fmt}") diff --git a/tests/data/mixture_isotherm.json b/tests/data/mixture_isotherm.json new file mode 100644 index 0000000..3e08d06 --- /dev/null +++ b/tests/data/mixture_isotherm.json @@ -0,0 +1,32 @@ +{ + "0_RH": { + "co2_uptake": 3.99067, + "n2_uptake": 0.0222, + "h2o_uptake": 0.0, + "co2_error": 0.08111, + "n2_error": 0.00105, + "h2o_error": 0.0, + "co2_n2_selectivity": 1018.64, + "calc_time_in_s": 3332.97 + }, + "50_RH": { + "co2_uptake": 0.16258, + "n2_uptake": 0.00139, + "h2o_uptake": 11.3265, + "co2_error": 0.05529, + "n2_error": 0.00077, + "h2o_error": 0.1287, + "co2_n2_selectivity": 662.80, + "calc_time_in_s": 10863.46 + }, + "100_RH": { + "co2_uptake": 0.03966, + "n2_uptake": 0.0002, + "h2o_uptake": 12.94059, + "co2_error": 0.01295, + "n2_error": 0.00015, + "h2o_error": 0.17625, + "co2_n2_selectivity": 1123.70, + "calc_time_in_s": 10481.06 + } +} diff --git a/tests/data/single_isotherm.json b/tests/data/single_isotherm.json new file mode 100644 index 0000000..89e5071 --- /dev/null +++ b/tests/data/single_isotherm.json @@ -0,0 +1,52 @@ +{ + "0.001bar_298K": { + "success": true, + "uptake": "0.12345", + "error": "0.00567", + "unit": "mol/kg", + "qst": "32.5", + "error_qst": "1.2", + "qst_unit": "kJ/mol", + "calc_time_in_s": 100.5 + }, + "0.01bar_298K": { + "success": true, + "uptake": "0.98765", + "error": "0.01234", + "unit": "mol/kg", + "qst": "31.8", + "error_qst": "0.9", + "qst_unit": "kJ/mol", + "calc_time_in_s": 120.3 + }, + "0.1bar_298K": { + "success": true, + "uptake": "2.34567", + "error": "0.04321", + "unit": "mol/kg", + "qst": "30.1", + "error_qst": "0.7", + "qst_unit": "kJ/mol", + "calc_time_in_s": 150.7 + }, + "0.5bar_298K": { + "success": true, + "uptake": "3.56789", + "error": "0.06543", + "unit": "mol/kg", + "qst": "28.5", + "error_qst": "0.5", + "qst_unit": "kJ/mol", + "calc_time_in_s": 180.2 + }, + "1.0bar_298K": { + "success": true, + "uptake": "4.12345", + "error": "0.07890", + "unit": "mol/kg", + "qst": "27.2", + "error_qst": "0.4", + "qst_unit": "kJ/mol", + "calc_time_in_s": 200.1 + } +} diff --git a/tests/test_plot.py b/tests/test_plot.py new file mode 100644 index 0000000..9b7ebeb --- /dev/null +++ b/tests/test_plot.py @@ -0,0 +1,795 @@ +"""Tests for the matkit.plot module.""" + +import json +import math + +import pytest + +from matkit.plot.parsers import ( + _safe_float, + _discover_adsorbates, + _discover_selectivity_keys, + collect_data_files, + detect_format, + load_isotherm, + parse_mixture_isotherm, + parse_single_isotherm, +) + + +# ========================================== +# Fixtures +# ========================================== +@pytest.fixture +def single_isotherm_path(test_data_dir): + """Path to single-component isotherm test fixture.""" + return str(test_data_dir / "single_isotherm.json") + + +@pytest.fixture +def mixture_isotherm_path(test_data_dir): + """Path to mixture isotherm test fixture.""" + return str(test_data_dir / "mixture_isotherm.json") + + +@pytest.fixture +def single_isotherm_data(test_data_dir): + """Parsed JSON from single-component isotherm fixture.""" + with open(test_data_dir / "single_isotherm.json") as f: + return json.load(f) + + +@pytest.fixture +def mixture_isotherm_data(test_data_dir): + """Parsed JSON from mixture isotherm fixture.""" + with open(test_data_dir / "mixture_isotherm.json") as f: + return json.load(f) + + +# ========================================== +# Tests for _safe_float +# ========================================== +class TestSafeFloat: + """Tests for the _safe_float helper.""" + + def test_string_number(self): + assert _safe_float("3.14") == 3.14 + + def test_float_number(self): + assert _safe_float(2.718) == 2.718 + + def test_int_number(self): + assert _safe_float(42) == 42.0 + + def test_none_returns_nan(self): + assert math.isnan(_safe_float(None)) + + def test_nan_string_returns_nan(self): + assert math.isnan(_safe_float("-nan")) + + def test_inf_string_returns_nan(self): + assert math.isnan(_safe_float("-inf")) + + def test_inf_returns_nan(self): + assert math.isnan(_safe_float(float("inf"))) + + def test_invalid_string_returns_nan(self): + assert math.isnan(_safe_float("not_a_number")) + + +# ========================================== +# Tests for detect_format +# ========================================== +class TestDetectFormat: + """Tests for format auto-detection.""" + + def test_detects_single_bar(self, single_isotherm_data): + assert detect_format(single_isotherm_data) == "single" + + def test_detects_single_pa(self): + data = {"314.2Pa_298K": {"uptake": "1.0"}} + assert detect_format(data) == "single" + + def test_detects_mixture_rh(self, mixture_isotherm_data): + assert detect_format(mixture_isotherm_data) == "mixture_rh" + + def test_empty_raises(self): + with pytest.raises(ValueError, match="Empty"): + detect_format({}) + + def test_unknown_format_raises(self): + with pytest.raises(ValueError, match="Cannot detect"): + detect_format({"unknown_key": {"uptake": 1.0}}) + + +# ========================================== +# Tests for parse_single_isotherm +# ========================================== +class TestParseSingleIsotherm: + """Tests for single-component isotherm parsing.""" + + def test_sorts_by_pressure(self, single_isotherm_data): + result = parse_single_isotherm(single_isotherm_data) + assert result["pressures"] == [ + 0.001, + 0.01, + 0.1, + 0.5, + 1.0, + ] + + def test_format_is_single(self, single_isotherm_data): + result = parse_single_isotherm(single_isotherm_data) + assert result["format"] == "single" + + def test_pressure_unit(self, single_isotherm_data): + result = parse_single_isotherm(single_isotherm_data) + assert result["pressure_unit"] == "bar" + + def test_uptakes_are_floats(self, single_isotherm_data): + result = parse_single_isotherm(single_isotherm_data) + for val in result["uptakes"]: + assert isinstance(val, float) + + def test_uptake_values(self, single_isotherm_data): + result = parse_single_isotherm(single_isotherm_data) + assert result["uptakes"][0] == pytest.approx(0.12345) + assert result["uptakes"][-1] == pytest.approx(4.12345) + + def test_errors_are_floats(self, single_isotherm_data): + result = parse_single_isotherm(single_isotherm_data) + for val in result["errors"]: + assert isinstance(val, float) + + def test_temperature(self, single_isotherm_data): + result = parse_single_isotherm(single_isotherm_data) + assert result["temperature"] == 298.0 + + def test_unit(self, single_isotherm_data): + result = parse_single_isotherm(single_isotherm_data) + assert result["unit"] == "mol/kg" + + def test_qst_values(self, single_isotherm_data): + result = parse_single_isotherm(single_isotherm_data) + assert result["qst"][0] == pytest.approx(32.5) + assert result["qst_unit"] == "kJ/mol" + + def test_handles_pa_keys(self): + data = { + "314.2Pa_298K": { + "success": True, + "uptake": "1.5", + "error": "0.1", + "unit": "mol/kg", + }, + "628.4Pa_298K": { + "success": True, + "uptake": "2.5", + "error": "0.2", + "unit": "mol/kg", + }, + } + result = parse_single_isotherm(data) + assert result["pressure_unit"] == "Pa" + assert result["pressures"] == [314.2, 628.4] + + def test_handles_missing_qst(self): + data = { + "0.1bar_273K": { + "success": True, + "uptake": "0.18", + "error": "0.01", + "unit": "mol/kg", + "calc_time_in_s": 113.4, + }, + } + result = parse_single_isotherm(data) + assert math.isnan(result["qst"][0]) + assert result["qst_unit"] is None + + def test_handles_nan_values(self): + data = { + "1.0bar_77K": { + "success": True, + "uptake": "10.5", + "error": "0.5", + "unit": "mol/kg", + "qst": "-nan", + "error_qst": "-inf", + "qst_unit": "kJ/mol", + }, + } + result = parse_single_isotherm(data) + assert math.isnan(result["qst"][0]) + assert math.isnan(result["qst_errors"][0]) + assert result["uptakes"][0] == pytest.approx(10.5) + + +# ========================================== +# Tests for _discover_adsorbates +# ========================================== +class TestDiscoverAdsorbates: + """Tests for adsorbate name auto-discovery.""" + + def test_discovers_three_adsorbates(self, mixture_isotherm_data): + ads = _discover_adsorbates(mixture_isotherm_data) + assert ads == ["co2", "h2o", "n2"] + + def test_discovers_arbitrary_names(self): + data = { + "0_RH": { + "methane_uptake": 1.0, + "methane_error": 0.1, + "ethane_uptake": 2.0, + "ethane_error": 0.2, + } + } + ads = _discover_adsorbates(data) + assert ads == ["ethane", "methane"] + + +# ========================================== +# Tests for _discover_selectivity_keys +# ========================================== +class TestDiscoverSelectivityKeys: + """Tests for selectivity field auto-discovery.""" + + def test_discovers_selectivity(self, mixture_isotherm_data): + keys = _discover_selectivity_keys(mixture_isotherm_data) + assert keys == ["co2_n2_selectivity"] + + def test_no_selectivity(self): + data = { + "0_RH": { + "co2_uptake": 1.0, + "co2_error": 0.1, + } + } + keys = _discover_selectivity_keys(data) + assert keys == [] + + +# ========================================== +# Tests for parse_mixture_isotherm +# ========================================== +class TestParseMixtureIsotherm: + """Tests for mixture isotherm parsing.""" + + def test_format_is_mixture_rh(self, mixture_isotherm_data): + result = parse_mixture_isotherm(mixture_isotherm_data) + assert result["format"] == "mixture_rh" + + def test_sorts_by_rh(self, mixture_isotherm_data): + result = parse_mixture_isotherm(mixture_isotherm_data) + assert result["rh_values"] == [0, 50, 100] + + def test_discovers_adsorbates(self, mixture_isotherm_data): + result = parse_mixture_isotherm(mixture_isotherm_data) + assert result["adsorbates"] == ["co2", "h2o", "n2"] + + def test_uptake_values(self, mixture_isotherm_data): + result = parse_mixture_isotherm(mixture_isotherm_data) + assert result["uptakes"]["co2"][0] == pytest.approx(3.99067) + assert result["uptakes"]["h2o"][0] == pytest.approx(0.0) + + def test_error_values(self, mixture_isotherm_data): + result = parse_mixture_isotherm(mixture_isotherm_data) + assert result["errors"]["co2"][0] == pytest.approx(0.08111) + + def test_selectivity_values(self, mixture_isotherm_data): + result = parse_mixture_isotherm(mixture_isotherm_data) + assert "co2_n2_selectivity" in result["selectivity"] + assert result["selectivity"]["co2_n2_selectivity"][0] == pytest.approx( + 1018.64 + ) + + def test_uptake_lengths_match_rh(self, mixture_isotherm_data): + result = parse_mixture_isotherm(mixture_isotherm_data) + n_points = len(result["rh_values"]) + for ads in result["adsorbates"]: + assert len(result["uptakes"][ads]) == n_points + assert len(result["errors"][ads]) == n_points + + +# ========================================== +# Tests for load_isotherm +# ========================================== +class TestLoadIsotherm: + """Tests for the unified load_isotherm function.""" + + def test_loads_single(self, single_isotherm_path): + result = load_isotherm(single_isotherm_path) + assert result["format"] == "single" + assert len(result["pressures"]) == 5 + + def test_loads_mixture(self, mixture_isotherm_path): + result = load_isotherm(mixture_isotherm_path) + assert result["format"] == "mixture_rh" + assert len(result["rh_values"]) == 3 + + def test_file_not_found(self): + with pytest.raises(FileNotFoundError): + load_isotherm("/nonexistent/path.json") + + +# ========================================== +# Tests for plot functions (file output) +# ========================================== +class TestPlotSingleIsotherm: + """Tests for single-component isotherm plotting.""" + + def test_creates_output_file(self, single_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_single_isotherm + + outfile = str(tmp_output / "test_single.png") + result = plot_single_isotherm( + [single_isotherm_path], output=outfile, dpi=72 + ) + from pathlib import Path + + assert Path(result).exists() + assert Path(result).stat().st_size > 0 + + def test_with_log_x(self, single_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_single_isotherm + + outfile = str(tmp_output / "test_log.png") + result = plot_single_isotherm( + [single_isotherm_path], + output=outfile, + dpi=72, + log_x=True, + ) + from pathlib import Path + + assert Path(result).exists() + + def test_with_no_errorbars(self, single_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_single_isotherm + + outfile = str(tmp_output / "test_noerr.png") + result = plot_single_isotherm( + [single_isotherm_path], + output=outfile, + dpi=72, + no_errorbars=True, + ) + from pathlib import Path + + assert Path(result).exists() + + def test_with_custom_labels(self, single_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_single_isotherm + + outfile = str(tmp_output / "test_labels.png") + result = plot_single_isotherm( + [single_isotherm_path], + output=outfile, + dpi=72, + labels=["My MOF"], + xlabel="P (bar)", + ylabel="Loading (mol/kg)", + title="Test Plot", + ) + from pathlib import Path + + assert Path(result).exists() + + def test_rejects_mixture_format(self, mixture_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_single_isotherm + + outfile = str(tmp_output / "test_reject.png") + with pytest.raises(ValueError, match="single-component"): + plot_single_isotherm([mixture_isotherm_path], output=outfile) + + +class TestPlotMixtureIsotherm: + """Tests for mixture isotherm plotting.""" + + def test_creates_output_file(self, mixture_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_mixture_isotherm + + outfile = str(tmp_output / "test_mixture.png") + result = plot_mixture_isotherm( + [mixture_isotherm_path], output=outfile, dpi=72 + ) + from pathlib import Path + + assert Path(result).exists() + assert Path(result).stat().st_size > 0 + + def test_filter_adsorbates(self, mixture_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_mixture_isotherm + + outfile = str(tmp_output / "test_filtered.png") + result = plot_mixture_isotherm( + [mixture_isotherm_path], + output=outfile, + dpi=72, + adsorbates=["co2", "h2o"], + ) + from pathlib import Path + + assert Path(result).exists() + + def test_rejects_single_format(self, single_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_mixture_isotherm + + outfile = str(tmp_output / "test_reject.png") + with pytest.raises(ValueError, match="mixture_rh"): + plot_mixture_isotherm([single_isotherm_path], output=outfile) + + +class TestPlotSelectivity: + """Tests for selectivity plotting.""" + + def test_creates_output_file(self, mixture_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_selectivity + + outfile = str(tmp_output / "test_sel.png") + result = plot_selectivity( + [mixture_isotherm_path], output=outfile, dpi=72 + ) + from pathlib import Path + + assert Path(result).exists() + assert Path(result).stat().st_size > 0 + + def test_rejects_single_format(self, single_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_selectivity + + outfile = str(tmp_output / "test_reject.png") + with pytest.raises(ValueError, match="mixture_rh"): + plot_selectivity([single_isotherm_path], output=outfile) + + +class TestPlotIsotherm: + """Tests for the auto-detecting plot_isotherm function.""" + + def test_auto_detects_single(self, single_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_isotherm + + outfile = str(tmp_output / "test_auto_single.png") + result = plot_isotherm([single_isotherm_path], output=outfile, dpi=72) + from pathlib import Path + + assert Path(result).exists() + + def test_auto_detects_mixture(self, mixture_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_isotherm + + outfile = str(tmp_output / "test_auto_mixture.png") + result = plot_isotherm([mixture_isotherm_path], output=outfile, dpi=72) + from pathlib import Path + + assert Path(result).exists() + + +class TestMultiFileOverlay: + """Tests for overlaying multiple data files.""" + + def test_overlay_single_isotherms(self, single_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_single_isotherm + + outfile = str(tmp_output / "test_overlay.png") + result = plot_single_isotherm( + [single_isotherm_path, single_isotherm_path], + output=outfile, + dpi=72, + labels=["MOF-A", "MOF-B"], + ) + from pathlib import Path + + assert Path(result).exists() + + def test_overlay_mixture_isotherms(self, mixture_isotherm_path, tmp_output): + from matkit.plot.isotherm import plot_mixture_isotherm + + outfile = str(tmp_output / "test_overlay_mix.png") + result = plot_mixture_isotherm( + [mixture_isotherm_path, mixture_isotherm_path], + output=outfile, + dpi=72, + labels=["MOF-A", "MOF-B"], + ) + from pathlib import Path + + assert Path(result).exists() + + +# ========================================== +# Tests for CLI commands +# ========================================== +class TestPlotCLI: + """Tests for plot CLI command group.""" + + def test_plot_group_exists(self): + from click.testing import CliRunner + from matkit.cli import main + + runner = CliRunner() + result = runner.invoke(main, ["plot", "--help"]) + assert result.exit_code == 0 + assert "isotherm" in result.output + assert "selectivity" in result.output + + def test_isotherm_help(self): + from click.testing import CliRunner + from matkit.cli import main + + runner = CliRunner() + result = runner.invoke(main, ["plot", "isotherm", "--help"]) + assert result.exit_code == 0 + assert "--data" in result.output + assert "--output" in result.output + assert "--dpi" in result.output + assert "--log-x" in result.output + assert "--adsorbate" in result.output + + def test_selectivity_help(self): + from click.testing import CliRunner + from matkit.cli import main + + runner = CliRunner() + result = runner.invoke(main, ["plot", "selectivity", "--help"]) + assert result.exit_code == 0 + assert "--data" in result.output + assert "--selectivity-key" in result.output + + def test_isotherm_command_single(self, single_isotherm_path, tmp_output): + from click.testing import CliRunner + from matkit.cli import main + + runner = CliRunner() + outfile = str(tmp_output / "cli_single.png") + result = runner.invoke( + main, + [ + "plot", + "isotherm", + "--data", + single_isotherm_path, + "--output", + outfile, + "--dpi", + "72", + ], + ) + assert result.exit_code == 0 + assert "Plot saved to" in result.output + + def test_isotherm_command_mixture(self, mixture_isotherm_path, tmp_output): + from click.testing import CliRunner + from matkit.cli import main + + runner = CliRunner() + outfile = str(tmp_output / "cli_mixture.png") + result = runner.invoke( + main, + [ + "plot", + "isotherm", + "--data", + mixture_isotherm_path, + "--output", + outfile, + "--dpi", + "72", + ], + ) + assert result.exit_code == 0 + assert "Plot saved to" in result.output + + def test_selectivity_command(self, mixture_isotherm_path, tmp_output): + from click.testing import CliRunner + from matkit.cli import main + + runner = CliRunner() + outfile = str(tmp_output / "cli_sel.png") + result = runner.invoke( + main, + [ + "plot", + "selectivity", + "--data", + mixture_isotherm_path, + "--output", + outfile, + "--dpi", + "72", + ], + ) + assert result.exit_code == 0 + assert "Plot saved to" in result.output + + def test_isotherm_help_has_data_dir(self): + from click.testing import CliRunner + from matkit.cli import main + + runner = CliRunner() + result = runner.invoke(main, ["plot", "isotherm", "--help"]) + assert result.exit_code == 0 + assert "--data-dir" in result.output + + def test_isotherm_data_dir(self, test_data_dir, tmp_output): + """Test --data-dir loads all JSON files from a dir.""" + from click.testing import CliRunner + from matkit.cli import main + + # Create a temp dir with two single-component files + import shutil + + data_dir = tmp_output / "iso_dir" + data_dir.mkdir() + shutil.copy( + test_data_dir / "single_isotherm.json", + data_dir / "iso_298K.json", + ) + shutil.copy( + test_data_dir / "single_isotherm.json", + data_dir / "iso_300K.json", + ) + + runner = CliRunner() + outfile = str(tmp_output / "cli_dir.png") + result = runner.invoke( + main, + [ + "plot", + "isotherm", + "--data-dir", + str(data_dir), + "--output", + outfile, + "--dpi", + "72", + ], + ) + assert result.exit_code == 0 + assert "Plot saved to" in result.output + + +# ========================================== +# Tests for collect_data_files +# ========================================== +class TestCollectDataFiles: + """Tests for the collect_data_files helper.""" + + def test_explicit_files(self, single_isotherm_path): + files = collect_data_files(data=[single_isotherm_path]) + assert len(files) == 1 + + def test_data_dir(self, test_data_dir, tmp_output): + import shutil + + d = tmp_output / "coll_dir" + d.mkdir() + shutil.copy( + test_data_dir / "single_isotherm.json", + d / "a.json", + ) + shutil.copy( + test_data_dir / "mixture_isotherm.json", + d / "b.json", + ) + files = collect_data_files(data_dir=str(d)) + assert len(files) == 2 + + def test_data_dir_sorted(self, test_data_dir, tmp_output): + import shutil + from pathlib import Path + + d = tmp_output / "sort_dir" + d.mkdir() + shutil.copy( + test_data_dir / "single_isotherm.json", + d / "z.json", + ) + shutil.copy( + test_data_dir / "single_isotherm.json", + d / "a.json", + ) + files = collect_data_files(data_dir=str(d)) + assert Path(files[0]).name == "a.json" + assert Path(files[1]).name == "z.json" + + def test_deduplicates(self, single_isotherm_path): + files = collect_data_files( + data=[single_isotherm_path, single_isotherm_path] + ) + assert len(files) == 1 + + def test_combined(self, test_data_dir, tmp_output): + import shutil + + d = tmp_output / "combo_dir" + d.mkdir() + shutil.copy( + test_data_dir / "single_isotherm.json", + d / "a.json", + ) + files = collect_data_files( + data=[str(test_data_dir / "mixture_isotherm.json")], + data_dir=str(d), + ) + assert len(files) == 2 + + def test_missing_dir_raises(self): + with pytest.raises(FileNotFoundError): + collect_data_files(data_dir="/no/such/dir") + + def test_empty_raises(self): + with pytest.raises(ValueError, match="No data files"): + collect_data_files() + + +# ========================================== +# Tests for auto temperature labels +# ========================================== +class TestAutoTemperatureLabels: + """Tests for auto-generated temperature legend labels.""" + + def test_auto_labels_multi_file(self, single_isotherm_path, tmp_output): + """Two files with same T (298K) get '298 K' labels.""" + from matkit.plot.isotherm import plot_single_isotherm + + outfile = str(tmp_output / "test_auto_temp.png") + # Both files have temperature 298 -- labels should be + # auto-generated as "298 K" instead of filenames. + result = plot_single_isotherm( + [single_isotherm_path, single_isotherm_path], + output=outfile, + dpi=72, + ) + from pathlib import Path + + assert Path(result).exists() + + def test_custom_labels_override(self, single_isotherm_path, tmp_output): + """Custom labels should take precedence over auto.""" + from matkit.plot.isotherm import plot_single_isotherm + + outfile = str(tmp_output / "test_override.png") + result = plot_single_isotherm( + [single_isotherm_path, single_isotherm_path], + output=outfile, + dpi=72, + labels=["Run A", "Run B"], + ) + from pathlib import Path + + assert Path(result).exists() + + def test_single_file_uses_filename(self, single_isotherm_path, tmp_output): + """Single file should use filename, not temperature.""" + from matkit.plot.isotherm import plot_single_isotherm + + outfile = str(tmp_output / "test_single_label.png") + result = plot_single_isotherm( + [single_isotherm_path], + output=outfile, + dpi=72, + ) + from pathlib import Path + + assert Path(result).exists() + + def test_multi_temp_real_data(self, tmp_output): + """Test with real multi-temperature data if available.""" + from pathlib import Path + + data_dir = Path( + "/Users/tpham2/work/projects/matkit/scripts/data/results" + ) + if not data_dir.is_dir(): + pytest.skip("Real data directory not found") + + from matkit.plot.isotherm import plot_single_isotherm + + files = sorted(str(f) for f in data_dir.glob("*.json")) + if len(files) < 2: + pytest.skip("Need at least 2 JSON files") + + outfile = str(tmp_output / "test_real_multi.png") + result = plot_single_isotherm(files, output=outfile, dpi=72) + assert Path(result).exists()