ComPlat
diff --git a/‎.gitignore‎
Lines changed: 2 additions & 1 deletion b/‎.gitignore‎
Lines changed: 2 additions & 1 deletion
diff --git a/‎BinaryParser.egg-info/PKG-INFO‎
Lines changed: 3 additions & 0 deletions b/‎BinaryParser.egg-info/PKG-INFO‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎BinaryParser.egg-info/SOURCES.txt‎
Lines changed: 2 additions & 0 deletions b/‎BinaryParser.egg-info/SOURCES.txt‎
Lines changed: 2 additions & 0 deletions
diff --git a/‎BinaryParser.egg-info/requires.txt‎
Lines changed: 3 additions & 0 deletions b/‎BinaryParser.egg-info/requires.txt‎
Lines changed: 3 additions & 0 deletions
diff --git a/‎BinaryParser.egg-info/top_level.txt‎
Lines changed: 1 addition & 0 deletions b/‎BinaryParser.egg-info/top_level.txt‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎build/lib.linux-x86_64-cpython-312/BinaryParser/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎build/lib.linux-x86_64-cpython-312/BinaryParser/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/__init__.py‎
Lines changed: 1 addition & 0 deletions b/‎build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/__init__.py‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/openlab.py‎
Lines changed: 130 additions & 0 deletions b/‎build/lib.linux-x86_64-cpython-312/BinaryParser/openlab/openlab.py‎
Lines changed: 130 additions & 0 deletions
diff --git a/‎build/lib.linux-x86_64-cpython-312/BinaryParser/setup.py‎
Lines changed: 16 additions & 3 deletions b/‎build/lib.linux-x86_64-cpython-312/BinaryParser/setup.py‎
Lines changed: 16 additions & 3 deletions
@@ -1,2 +1,3 @@
 bin/TestDaten
-
+.development
+tests/__pycache__
@@ -11,6 +11,9 @@ Requires-Dist: pandas
 Requires-Dist: numpy
 Requires-Dist: typeguard
 Requires-Dist: plotly
+Requires-Dist: matplotlib
+Requires-Dist: seaborn
+Requires-Dist: netCDF4
 Provides-Extra: test
 Requires-Dist: pytest; extra == "test"
 Dynamic: author
 
@@ -14,6 +14,8 @@ chemstation/__init__.py
 chemstation/read_ms_file.py
 hplc/__init__.py
 hplc/read_files.py
+openlab/__init__.py
+openlab/openlab.py
 src/parser_hplc.cpp
 src/parser_ms.cpp
 src/parser_xray.cpp
 
@@ -3,6 +3,9 @@ pandas
 numpy
 typeguard
 plotly
+matplotlib
+seaborn
+netCDF4
 
 [test]
 pytest
@@ -1,6 +1,7 @@
 BinaryParser
 chemstation
 hplc
+openlab
 parser_hplc
 parser_ms
 parser_xray
 
@@ -1,4 +1,5 @@
 from hplc import read_chromatograms, plot_chromatograms
 from chemstation import read_chemstation_file
+from openlab import read_attr, read_lc, read_ms
 
 __all__ = ["read_chromatograms", "plot_chromatograms", "read_chemstation_file"]
@@ -1,4 +1,5 @@
 from hplc import read_chromatograms, plot_chromatograms
 from chemstation import read_chemstation_file
+from openlab import read_attr, read_lc, read_ms
 
 __all__ = ["read_chromatograms", "plot_chromatograms", "read_chemstation_file"]
@@ -0,0 +1 @@
+__all__ = ["read_attr", "read_lc", "read_ms"]
@@ -0,0 +1,130 @@
+import os
+import netCDF4 as nc
+import pandas as pd
+import numpy as np
+import re
+from typeguard import typechecked
+from typing import List
+
+
+@typechecked
+def get_files(path: str) -> List[str]:
+    fs = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".cdf")]
+    assert len(fs) > 0, "No files found"
+    return fs
+
+
+# Attributes
+@typechecked
+def get_attr(path: str):
+    with nc.Dataset(path, "r") as dataset:
+        attr = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
+    return attr
+
+
+@typechecked
+def read_attr(path: str) -> pd.DataFrame:
+    fs = get_files(path)
+    attrs_lc = [pd.DataFrame([get_attr(fs[x])]) for x in range(len(fs))]
+    attrs_lc = pd.concat(attrs_lc, ignore_index=True)
+    return attrs_lc
+
+
+# LC Data
+@typechecked
+def get_lc_data(path: str) -> pd.DataFrame:
+    with nc.Dataset(path, "r") as dataset:
+        detector_signals = dataset.variables["ordinate_values"][:]
+        global_atts = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
+        detector = global_atts.get("detector_name", "")
+        run_time_length = dataset.variables["actual_run_time_length"][...]
+
+    data = pd.DataFrame(
+        {
+            "RetentionTime": np.linspace(0, run_time_length, num=len(detector_signals)),
+            "DetectorSignal": detector_signals,
+        }
+    )
+    data.attrs["detector"] = detector
+    return data
+
+
+@typechecked
+def process_detector_info(df_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
+    for df in df_list:
+        detector_name = df.attrs.get("detector", "")
+        wl_match = (
+            re.search(r"\d+", detector_name.split(",")[1])
+            if "," in detector_name
+            else None
+        )
+        wl = float(wl_match.group()) if wl_match else None
+        df["wavelength"] = wl
+    return df_list
+
+
+@typechecked
+def read_lc(path: str) -> pd.DataFrame:
+    fs = get_files(path)
+    # Filter fs --> Files which contain DAD within their name
+    fs = [f for f in fs if "DAD" in os.path.basename(f)]
+    df = [get_lc_data(fs[x]) for x in range(len(fs))]
+    df = process_detector_info(df)
+    df = pd.concat(df, ignore_index=True)
+    return df
+
+
+# MS Data
+@typechecked
+def get_point_counts(path: str) -> List[int]:
+    with nc.Dataset(path, "r") as dataset:
+        return dataset.variables["point_count"][:]
+
+
+@typechecked
+def get_ms_data(path: str) -> pd.DataFrame:
+    with nc.Dataset(path, "r") as dataset:
+        mz_values = dataset.variables["mass_values"][:]
+        intensities = dataset.variables["intensity_values"][:]
+    return pd.DataFrame({"mz": mz_values, "intensities": intensities})
+
+
+@typechecked
+def get_scan_time(path: str) -> List[float]:
+    with nc.Dataset(path, "r") as dataset:
+        time = dataset.variables["scan_acquisition_time"][:]
+    return time / 60
+
+
+@typechecked
+def split_data(data: pd.DataFrame, point_counts: List[int]) -> List[pd.DataFrame]:
+    end_indices = np.cumsum(point_counts)
+    start_indices = np.insert(end_indices[:-1], 0, 0)
+    return [data.iloc[start:end] for start, end in zip(start_indices, end_indices)]
+
+
+@typechecked
+def normalise(data_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
+    return [
+        df.assign(intensities=df["intensities"] * (100 / df["intensities"].max()))
+        for df in data_list
+    ]
+
+
+@typechecked
+def read_ms(path: str) -> List[pd.DataFrame]:
+    fs = get_files(path)
+    fs_ms = [f for f in fs if "spectra" in os.path.basename(f)]
+    data_minus = get_ms_data(fs_ms[0])
+    point_counts_minus = get_point_counts(fs_ms[0])
+    time_minus = get_scan_time(fs_ms[0])
+    df_minus = normalise(split_data(data_minus, point_counts_minus))
+
+    data_plus = get_ms_data(fs_ms[1])
+    point_counts_plus = get_point_counts(fs_ms[1])
+    time_plus = get_scan_time(fs_ms[1])
+    df_plus = normalise(split_data(data_plus, point_counts_plus))
+
+    df_minus = pd.concat([df.assign(time=t) for df, t in zip(df_minus, time_minus)])
+    df_plus = pd.concat([df.assign(time=t) for df, t in zip(df_plus, time_plus)])
+    return [df_minus, df_plus]
@@ -37,8 +37,21 @@
     zip_safe=False,
     python_requires=">=3.7",
     # packages=find_packages(),
-    packages=(["BinaryParser"] + find_packages()),
-    package_dir={"BinaryParser": "."},
+    packages=(["BinaryParser"] + ["openlab"] + find_packages()),
+    # package_dir={"BinaryParser": "."},
+    package_dir={
+        "BinaryParser": ".",
+        "BinaryParser.openlab": "./openlab",
+    },
     setup_requires=["pybind11"],
-    install_requires=["pybind11", "pandas", "numpy", "typeguard", "plotly"],
+    install_requires=[
+        "pybind11",
+        "pandas",
+        "numpy",
+        "typeguard",
+        "plotly",
+        "matplotlib",
+        "seaborn",
+        "netCDF4",
+    ],
 )
Original file line number	Diff line number	Diff line change
`@@ -0,0 +1 @@`
	`1`	`+__all__ = ["read_attr", "read_lc", "read_ms"]`