Skip to content

Commit 02bd061

Browse files
authored
Merge pull request #564 from OpenCOMPES/count_rate_flash
Add len and count rate methods to flash loader
2 parents 0279692 + c867653 commit 02bd061

File tree

2 files changed

+70
-8
lines changed

2 files changed

+70
-8
lines changed

.cspell/custom-dictionary.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ cryo
6464
cstart
6565
cstep
6666
csvfile
67+
cumsum
6768
custom-dictionary
6869
cval
6970
cvdist

src/sed/loader/flash/loader.py

Lines changed: 69 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
from pathlib import Path
1616

1717
import dask.dataframe as dd
18+
import numpy as np
1819
from natsort import natsorted
1920

2021
from sed.core.logging import set_verbosity
@@ -79,6 +80,21 @@ def verbose(self, verbose: bool):
7980
self._verbose = verbose
8081
set_verbosity(logger, self._verbose)
8182

83+
def __len__(self) -> int:
84+
"""
85+
Returns the total number of rows in the electron resolved dataframe.
86+
87+
Returns:
88+
int: Total number of rows.
89+
"""
90+
try:
91+
file_statistics = self.metadata["file_statistics"]["electron"]
92+
except KeyError as exc:
93+
raise KeyError("File statistics missing. Use 'read_dataframe' first.") from exc
94+
95+
total_rows = sum(stats["num_rows"] for stats in file_statistics.values())
96+
return total_rows
97+
8298
def _initialize_dirs(self) -> None:
8399
"""
84100
Initializes the directories on Maxwell based on configuration. If paths is provided in
@@ -223,12 +239,57 @@ def parse_metadata(self, token: str = None) -> dict:
223239

224240
return metadata
225241

226-
def get_count_rate(
227-
self,
228-
fids: Sequence[int] = None, # noqa: ARG002
229-
**kwds, # noqa: ARG002
230-
):
231-
return None, None
242+
def get_count_rate(self, fids=None, **kwds) -> tuple[np.ndarray, np.ndarray]:
243+
"""
244+
Calculates the count rate using the number of rows and elapsed time for each file.
245+
Hence the resolution is not very high, but this method is very fast.
246+
247+
Args:
248+
fids (Sequence[int]): A sequence of file IDs. Defaults to all files.
249+
250+
Keyword Args:
251+
runs: A sequence of run IDs.
252+
253+
Returns:
254+
tuple[np.ndarray, np.ndarray]: The count rate and elapsed time in seconds.
255+
256+
Raises:
257+
KeyError: If the file statistics are missing.
258+
"""
259+
260+
def counts_per_file(fid):
261+
try:
262+
file_statistics = self.metadata["file_statistics"]["electron"]
263+
except KeyError as exc:
264+
raise KeyError("File statistics missing. Use 'read_dataframe' first.") from exc
265+
266+
counts = file_statistics[str(fid)]["num_rows"]
267+
return counts
268+
269+
runs = kwds.pop("runs", None)
270+
if len(kwds) > 0:
271+
raise TypeError(f"get_count_rate() got unexpected keyword arguments {kwds.keys()}.")
272+
all_counts = []
273+
elapsed_times = []
274+
if runs is not None:
275+
fids = []
276+
for run_id in runs:
277+
if self.raw_dir is None:
278+
self._initialize_dirs()
279+
files = self.get_files_from_run_id(run_id=run_id, folders=self.raw_dir)
280+
for file in files:
281+
fids.append(self.files.index(file))
282+
else:
283+
if fids is None:
284+
fids = range(len(self.files))
285+
286+
for fid in fids:
287+
all_counts.append(counts_per_file(fid))
288+
elapsed_times.append(self.get_elapsed_time(fids=[fid]))
289+
290+
count_rate = np.array(all_counts) / np.array(elapsed_times)
291+
seconds = np.cumsum(elapsed_times)
292+
return count_rate, seconds
232293

233294
def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float | list[float]: # type: ignore[override]
234295
"""
@@ -254,7 +315,7 @@ def get_elapsed_time(self, fids: Sequence[int] = None, **kwds) -> float | list[f
254315
raise KeyError(
255316
"File statistics missing. Use 'read_dataframe' first.",
256317
) from exc
257-
time_stamp_alias = self._config["dataframe"].get("time_stamp_alias", "timeStamp")
318+
time_stamp_alias = self._config["dataframe"]["columns"].get("timestamp", "timeStamp")
258319

259320
def get_elapsed_time_from_fid(fid):
260321
try:
@@ -407,7 +468,7 @@ def read_dataframe(
407468
self.metadata.update(self.parse_metadata(token) if collect_metadata else {})
408469
self.metadata.update(bh.metadata)
409470

410-
print(f"loading complete in {time.time() - t0: .2f} s")
471+
logger.info(f"Loading complete in {time.time() - t0: .2f} s")
411472

412473
return df, df_timed, self.metadata
413474

0 commit comments

Comments
 (0)