Skip to content

Commit 7f325ee

Browse files
committed
added parser for openlab LCMS
1 parent e981d89 commit 7f325ee

35 files changed

Lines changed: 510 additions & 7 deletions

.gitignore

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,3 @@
11
bin/TestDaten
2-
2+
.development
3+
tests/__pycache__

BinaryParser.egg-info/PKG-INFO

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,9 @@ Requires-Dist: pandas
1111
Requires-Dist: numpy
1212
Requires-Dist: typeguard
1313
Requires-Dist: plotly
14+
Requires-Dist: matplotlib
15+
Requires-Dist: seaborn
16+
Requires-Dist: netCDF4
1417
Provides-Extra: test
1518
Requires-Dist: pytest; extra == "test"
1619
Dynamic: author

BinaryParser.egg-info/SOURCES.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -14,6 +14,8 @@ chemstation/__init__.py
1414
chemstation/read_ms_file.py
1515
hplc/__init__.py
1616
hplc/read_files.py
17+
openlab/__init__.py
18+
openlab/openlab.py
1719
src/parser_hplc.cpp
1820
src/parser_ms.cpp
1921
src/parser_xray.cpp

BinaryParser.egg-info/requires.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,9 @@ pandas
33
numpy
44
typeguard
55
plotly
6+
matplotlib
7+
seaborn
8+
netCDF4
69

710
[test]
811
pytest

BinaryParser.egg-info/top_level.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
BinaryParser
22
chemstation
33
hplc
4+
openlab
45
parser_hplc
56
parser_ms
67
parser_xray

__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from hplc import read_chromatograms, plot_chromatograms
22
from chemstation import read_chemstation_file
3+
from openlab import read_attr, read_lc, read_ms
34

45
__all__ = ["read_chromatograms", "plot_chromatograms", "read_chemstation_file"]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
from hplc import read_chromatograms, plot_chromatograms
22
from chemstation import read_chemstation_file
3+
from openlab import read_attr, read_lc, read_ms
34

45
__all__ = ["read_chromatograms", "plot_chromatograms", "read_chemstation_file"]
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
__all__ = ["read_attr", "read_lc", "read_ms"]
Lines changed: 130 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,130 @@
1+
import os
2+
import netCDF4 as nc
3+
import pandas as pd
4+
import numpy as np
5+
import re
6+
from typeguard import typechecked
7+
from typing import List
8+
9+
10+
@typechecked
11+
def get_files(path: str) -> List[str]:
12+
fs = [os.path.join(path, f) for f in os.listdir(path) if f.endswith(".cdf")]
13+
assert len(fs) > 0, "No files found"
14+
return fs
15+
16+
17+
# Attributes
18+
@typechecked
19+
def get_attr(path: str):
20+
with nc.Dataset(path, "r") as dataset:
21+
attr = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
22+
return attr
23+
24+
25+
@typechecked
26+
def read_attr(path: str) -> pd.DataFrame:
27+
fs = get_files(path)
28+
attrs_lc = [pd.DataFrame([get_attr(fs[x])]) for x in range(len(fs))]
29+
attrs_lc = pd.concat(attrs_lc, ignore_index=True)
30+
return attrs_lc
31+
32+
33+
# LC Data
34+
@typechecked
35+
def get_lc_data(path: str) -> pd.DataFrame:
36+
with nc.Dataset(path, "r") as dataset:
37+
detector_signals = dataset.variables["ordinate_values"][:]
38+
global_atts = {key: dataset.getncattr(key) for key in dataset.ncattrs()}
39+
detector = global_atts.get("detector_name", "")
40+
run_time_length = dataset.variables["actual_run_time_length"][...]
41+
42+
data = pd.DataFrame(
43+
{
44+
"RetentionTime": np.linspace(0, run_time_length, num=len(detector_signals)),
45+
"DetectorSignal": detector_signals,
46+
}
47+
)
48+
data.attrs["detector"] = detector
49+
return data
50+
51+
52+
@typechecked
53+
def process_detector_info(df_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
54+
for df in df_list:
55+
detector_name = df.attrs.get("detector", "")
56+
wl_match = (
57+
re.search(r"\d+", detector_name.split(",")[1])
58+
if "," in detector_name
59+
else None
60+
)
61+
wl = float(wl_match.group()) if wl_match else None
62+
df["wavelength"] = wl
63+
return df_list
64+
65+
66+
@typechecked
67+
def read_lc(path: str) -> pd.DataFrame:
68+
fs = get_files(path)
69+
# Filter fs --> Files which contain DAD within their name
70+
fs = [f for f in fs if "DAD" in os.path.basename(f)]
71+
df = [get_lc_data(fs[x]) for x in range(len(fs))]
72+
df = process_detector_info(df)
73+
df = pd.concat(df, ignore_index=True)
74+
return df
75+
76+
77+
# MS Data
78+
@typechecked
79+
def get_point_counts(path: str) -> List[int]:
80+
with nc.Dataset(path, "r") as dataset:
81+
return dataset.variables["point_count"][:]
82+
83+
84+
@typechecked
85+
def get_ms_data(path: str) -> pd.DataFrame:
86+
with nc.Dataset(path, "r") as dataset:
87+
mz_values = dataset.variables["mass_values"][:]
88+
intensities = dataset.variables["intensity_values"][:]
89+
return pd.DataFrame({"mz": mz_values, "intensities": intensities})
90+
91+
92+
@typechecked
93+
def get_scan_time(path: str) -> List[float]:
94+
with nc.Dataset(path, "r") as dataset:
95+
time = dataset.variables["scan_acquisition_time"][:]
96+
return time / 60
97+
98+
99+
@typechecked
100+
def split_data(data: pd.DataFrame, point_counts: List[int]) -> List[pd.DataFrame]:
101+
end_indices = np.cumsum(point_counts)
102+
start_indices = np.insert(end_indices[:-1], 0, 0)
103+
return [data.iloc[start:end] for start, end in zip(start_indices, end_indices)]
104+
105+
106+
@typechecked
107+
def normalise(data_list: List[pd.DataFrame]) -> List[pd.DataFrame]:
108+
return [
109+
df.assign(intensities=df["intensities"] * (100 / df["intensities"].max()))
110+
for df in data_list
111+
]
112+
113+
114+
@typechecked
115+
def read_ms(path: str) -> List[pd.DataFrame]:
116+
fs = get_files(path)
117+
fs_ms = [f for f in fs if "spectra" in os.path.basename(f)]
118+
data_minus = get_ms_data(fs_ms[0])
119+
point_counts_minus = get_point_counts(fs_ms[0])
120+
time_minus = get_scan_time(fs_ms[0])
121+
df_minus = normalise(split_data(data_minus, point_counts_minus))
122+
123+
data_plus = get_ms_data(fs_ms[1])
124+
point_counts_plus = get_point_counts(fs_ms[1])
125+
time_plus = get_scan_time(fs_ms[1])
126+
df_plus = normalise(split_data(data_plus, point_counts_plus))
127+
128+
df_minus = pd.concat([df.assign(time=t) for df, t in zip(df_minus, time_minus)])
129+
df_plus = pd.concat([df.assign(time=t) for df, t in zip(df_plus, time_plus)])
130+
return [df_minus, df_plus]

build/lib.linux-x86_64-cpython-312/BinaryParser/setup.py

Lines changed: 16 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -37,8 +37,21 @@
3737
zip_safe=False,
3838
python_requires=">=3.7",
3939
# packages=find_packages(),
40-
packages=(["BinaryParser"] + find_packages()),
41-
package_dir={"BinaryParser": "."},
40+
packages=(["BinaryParser"] + ["openlab"] + find_packages()),
41+
# package_dir={"BinaryParser": "."},
42+
package_dir={
43+
"BinaryParser": ".",
44+
"BinaryParser.openlab": "./openlab",
45+
},
4246
setup_requires=["pybind11"],
43-
install_requires=["pybind11", "pandas", "numpy", "typeguard", "plotly"],
47+
install_requires=[
48+
"pybind11",
49+
"pandas",
50+
"numpy",
51+
"typeguard",
52+
"plotly",
53+
"matplotlib",
54+
"seaborn",
55+
"netCDF4",
56+
],
4457
)

0 commit comments

Comments
 (0)