Skip to content

Commit 0046f74

Browse files
committed
fix linting issues and clean up
Note: pydocstyle complaints due to missing docstrings in overloaded magic methods ignored as they should be fixed in the forseeable future (see PyCQA/pydocstyle#525)
1 parent 011e866 commit 0046f74

File tree

1 file changed

+99
-81
lines changed

1 file changed

+99
-81
lines changed

src/atmcirclib/simulations.py

+99-81
Original file line numberDiff line numberDiff line change
@@ -25,31 +25,32 @@
2525

2626
# Third-party
2727
import numpy as np
28+
import numpy.typing as npt
2829
import pandas as pd
2930

30-
# Generic type aliases
31+
# First-party
32+
from atmcirclib.typing import PathLike_T
3133

32-
# Misc.
33-
RawPathT = Union[Path, str]
34+
# Generic type aliases
3435

3536
# Time step notations
3637
DDHH = Tuple[int, int]
3738
DDHHMM = Tuple[int, int, int]
3839
YYYYMMDDHH = Tuple[int, int, int, int]
3940

4041
# Raw time interval(s)
41-
RawIntervalT = Union[pd.Interval, Tuple[Union[DDHH, DDHHMM], Union[DDHH, DDHHMM]]]
42-
RawIntervalsT = Sequence[RawIntervalT]
43-
RawIntervalST = Union[RawIntervalT, RawIntervalsT]
42+
IntervalLike_T = Union[pd.Interval, Tuple[Union[DDHH, DDHHMM], Union[DDHH, DDHHMM]]]
43+
IntervalsLike_T = Sequence[IntervalLike_T]
44+
IntervalSLike_T = Union[IntervalLike_T, IntervalsLike_T]
4445

4546
# Raw output stream type(s)
46-
RawOutputStreamTypeT = Union["OutputStreamType", str]
47-
RawOutputStreamTypesT = Sequence[RawOutputStreamTypeT]
48-
RawOutputStreamTypeST = Union[RawOutputStreamTypeT, RawOutputStreamTypesT]
47+
OutputStreamTypeLike_T = Union["OutputStreamType", str]
48+
OutputStreamTypesLike_T = Sequence[OutputStreamTypeLike_T]
49+
OutputStreamTypeSLike_T = Union[OutputStreamTypeLike_T, OutputStreamTypesLike_T]
4950

5051
# Raw output streams dict
51-
RawOutputStreamsDictT = Dict[RawOutputStreamTypeST, RawIntervalST]
52-
RawOutputStreamsT = Union["OutputStreams", RawOutputStreamsDictT]
52+
OutputStreamsDictLike_T = Dict[OutputStreamTypeSLike_T, IntervalSLike_T]
53+
OutputStreamsLike_T = Union["OutputStreams", OutputStreamsDictLike_T]
5354

5455

5556
class NamedObj(Protocol):
@@ -104,6 +105,7 @@ def __getitem__(
104105
return obj
105106

106107
def __repr__(self) -> str:
108+
"""Return string representation."""
107109
return f"{type(self).__name__}([\n " + "\n ".join(map(str, self)) + "\n])"
108110

109111

@@ -185,6 +187,7 @@ class OutputStream:
185187
intervals: list[pd.Interval]
186188

187189
def __post_init__(self) -> None:
190+
"""Finalize initialization."""
188191
self._run: Optional[SimulationRun] = None
189192
self.check_intervals()
190193

@@ -304,8 +307,8 @@ def check_intervals(self, steps: Optional[pd.DatetimeIndex] = None) -> None:
304307
@classmethod
305308
def create(
306309
cls,
307-
stream_type: RawOutputStreamTypeT,
308-
intervals: RawIntervalST,
310+
stream_type: OutputStreamTypeLike_T,
311+
intervals: IntervalSLike_T,
309312
*,
310313
output_stream_types: Optional[OutputStreamTypes] = None,
311314
start: Optional[pd.Timestamp] = None,
@@ -371,6 +374,17 @@ def get_all_steps(self) -> pd.DatetimeIndex:
371374
stream.check_intervals(all_steps)
372375
return all_steps
373376

377+
def count_per_step(self) -> tuple[npt.NDArray[np.int_], pd.DatetimeIndex]:
378+
"""Count the number of streams overing each step."""
379+
steps = self.get_all_steps()
380+
counts = np.zeros(steps.size, np.int32)
381+
for stream in self:
382+
for interval in stream.intervals:
383+
i0 = steps.get_loc(interval.left)
384+
i1 = steps.get_loc(interval.right)
385+
counts[i0 : i1 + 1] += 1
386+
return counts, steps
387+
374388
def set_run(self, run: SimulationRun) -> None:
375389
"""Associate all output streams with a simulation run."""
376390
for stream in self:
@@ -379,7 +393,7 @@ def set_run(self, run: SimulationRun) -> None:
379393
@classmethod
380394
def create(
381395
cls,
382-
output: RawOutputStreamsT,
396+
output: OutputStreamsLike_T,
383397
*,
384398
output_stream_types: Optional[OutputStreamTypes] = None,
385399
start: Optional[pd.Timestamp] = None,
@@ -388,9 +402,9 @@ def create(
388402
if isinstance(output, OutputStreams):
389403
return copy(output)
390404
streams = cls()
391-
stream_type_s: RawOutputStreamTypeST
392-
stream_type: RawOutputStreamTypeT
393-
interval_s: RawIntervalST
405+
stream_type_s: OutputStreamTypeSLike_T
406+
stream_type: OutputStreamTypeLike_T
407+
interval_s: IntervalSLike_T
394408
for stream_type_s, interval_s in output.items():
395409
if isinstance(stream_type_s, str) or not isinstance(
396410
stream_type_s, Collection
@@ -444,61 +458,14 @@ def __init__(
444458
self,
445459
*,
446460
start: Union[pd.Timestamp, YYYYMMDDHH],
447-
path: Optional[RawPathT] = None,
448-
rel_path: Optional[RawPathT] = None,
449-
output: Optional[RawOutputStreamsT] = None,
461+
path: Optional[PathLike_T] = None,
462+
rel_path: Optional[PathLike_T] = None,
463+
output: Optional[OutputStreamsLike_T] = None,
450464
end_rel: Optional[Union[pd.Timedelta, DDHH, DDHHMM]] = None,
451465
end_type: Optional[Union[SimulationRunEndType, str]] = None,
452466
simulation_run_end_types: Optional[SimulationRunEndTypes] = None,
453467
) -> None:
454468
"""Create an instance of ``SimulationRun``."""
455-
456-
def init_end_rel(
457-
raw_end_rel: Optional[Union[pd.Timedelta, DDHH, DDHHMM]],
458-
output: OutputStreams,
459-
start: pd.Timestamp,
460-
) -> pd.Timedelta:
461-
"""Initialize ``end_rel``."""
462-
if raw_end_rel is None:
463-
if not output:
464-
raw_end_rel = pd.Timedelta(0)
465-
else:
466-
last_output = max(
467-
interval.right
468-
for stream in output
469-
for interval in stream.intervals
470-
)
471-
raw_end_rel = last_output - start
472-
return init_timedelta(raw_end_rel)
473-
474-
def init_end_type(
475-
raw_end_type: Optional[Union[SimulationRunEndType, str]]
476-
) -> SimulationRunEndType:
477-
"""Initialize ``end_type``."""
478-
if isinstance(raw_end_type, SimulationRunEndType):
479-
return raw_end_type
480-
if simulation_run_end_types is None:
481-
raise ValueError(
482-
"must pass simulation_run_end_types if end_type is not of type"
483-
" SimulationRunEndType"
484-
)
485-
return simulation_run_end_types[raw_end_type or "success"]
486-
487-
def init_label(paths: Collection[Optional[Path]]) -> str:
488-
label = ""
489-
for path in paths:
490-
if path is not None:
491-
if not label:
492-
label = path.name
493-
elif label != path.name:
494-
raise ValueError(
495-
f"inconsistent labels: {label} != {path.name}; paths: "
496-
+ ", ".join(map(str, paths))
497-
)
498-
if not label:
499-
raise ValueError(f"could not derive label from paths: {paths}")
500-
return label
501-
502469
self.abs_path: Optional[Path] = None if path is None else Path(path)
503470
self.rel_path: Optional[Path] = None if rel_path is None else Path(rel_path)
504471
self.start: pd.Timestamp = init_timestamp(start)
@@ -508,11 +475,15 @@ def init_label(paths: Collection[Optional[Path]]) -> str:
508475
path = self.abs_path or self.rel_path
509476
raise Exception(f"error creating output streams for run at {path}") from e
510477
self.output: OutputStreams = streams
511-
self.end_rel: pd.Timedelta = init_end_rel(end_rel, self.output, self.start)
512-
self.end_type: SimulationRunEndType = init_end_type(end_type)
478+
self.end_rel: pd.Timedelta = self._init_end_rel(
479+
end_rel, self.output, self.start
480+
)
481+
self.end_type: SimulationRunEndType = self._init_end_type(
482+
end_type, simulation_run_end_types
483+
)
513484

514485
self.output.set_run(self)
515-
self.label: str = init_label([self.abs_path, self.rel_path])
486+
self.label: str = self._init_label([self.abs_path, self.rel_path])
516487
self.end: pd.Timestamp = self.start + self.end_rel
517488
self.write_start: pd.Timestamp = min(
518489
[interval.left for stream in self.output for interval in stream.intervals]
@@ -554,7 +525,9 @@ def get_full_path(self) -> Path:
554525
)
555526
return path
556527

557-
def init_path(path: Optional[RawPathT], rel_path: Optional[RawPathT]) -> Path:
528+
def init_path(
529+
path: Optional[PathLike_T], rel_path: Optional[PathLike_T]
530+
) -> Path:
558531
if path is None and rel_path is None:
559532
raise ValueError("path and rel_path are both None")
560533
elif rel_path is not None:
@@ -569,16 +542,65 @@ def exists(self) -> bool:
569542
return self.get_full_path().exists()
570543

571544
def __repr__(self) -> str:
545+
"""Return a string representation."""
572546
path = self.get_full_path()
573547
return f"{type(self).__name__}('{path}', {self.start})"
574548

549+
@staticmethod
550+
def _init_end_rel(
551+
raw_end_rel: Optional[Union[pd.Timedelta, DDHH, DDHHMM]],
552+
output: OutputStreams,
553+
start: pd.Timestamp,
554+
) -> pd.Timedelta:
555+
"""Initialize ``end_rel``."""
556+
if raw_end_rel is None:
557+
if not output:
558+
raw_end_rel = pd.Timedelta(0)
559+
else:
560+
last_output = max(
561+
interval.right for stream in output for interval in stream.intervals
562+
)
563+
raw_end_rel = last_output - start
564+
return init_timedelta(raw_end_rel)
565+
566+
@staticmethod
567+
def _init_end_type(
568+
raw_end_type: Optional[Union[SimulationRunEndType, str]],
569+
simulation_run_end_types: Optional[SimulationRunEndTypes],
570+
) -> SimulationRunEndType:
571+
"""Initialize ``end_type``."""
572+
if isinstance(raw_end_type, SimulationRunEndType):
573+
return raw_end_type
574+
if simulation_run_end_types is None:
575+
raise ValueError(
576+
"must pass simulation_run_end_types if end_type is not of type"
577+
" SimulationRunEndType"
578+
)
579+
return simulation_run_end_types[raw_end_type or "success"]
580+
581+
@staticmethod
582+
def _init_label(paths: Collection[Optional[Path]]) -> str:
583+
label = ""
584+
for path in paths:
585+
if path is not None:
586+
if not label:
587+
label = path.name
588+
elif label != path.name:
589+
raise ValueError(
590+
f"inconsistent labels: {label} != {path.name}; paths: "
591+
+ ", ".join(map(str, paths))
592+
)
593+
if not label:
594+
raise ValueError(f"could not derive label from paths: {paths}")
595+
return label
596+
575597

576598
class Simulation:
577599
"""A simulation comprised of one or more simulation runs."""
578600

579601
def __init__(
580602
self,
581-
path: RawPathT,
603+
path: PathLike_T,
582604
runs: Optional[Sequence[SimulationRun]] = None,
583605
) -> None:
584606
"""Create an instance of ``Simulation``."""
@@ -632,16 +654,10 @@ def find_redundant_output(
632654
if stream_type.removed_files:
633655
continue
634656
try:
635-
steps = streams.get_all_steps()
657+
counts, steps = streams.count_per_step()
636658
except EmptyOutputStreamError:
637659
continue
638-
coverage = np.zeros(steps.size)
639-
for stream in streams:
640-
for interval in stream.intervals:
641-
i0 = steps.get_loc(interval.left)
642-
i1 = steps.get_loc(interval.right)
643-
coverage[i0 : i1 + 1] += 1
644-
idcs = np.where(steps[coverage > 1])[0]
660+
idcs = np.where(steps[counts > 1])[0]
645661
if idcs.size == 0:
646662
continue
647663
multi_steps[stream_type] = {}
@@ -657,6 +673,7 @@ def find_redundant_output(
657673
return multi_steps
658674

659675
def __repr__(self) -> str:
676+
"""Return a string representation."""
660677
return f"{type(self).__name__}([" + ", ".join(map(str, self.get_runs())) + "])"
661678

662679

@@ -734,6 +751,7 @@ def collect_redundant_output_files(self) -> list[list[Path]]:
734751
return paths_sims
735752

736753
def __repr__(self) -> str:
754+
"""Return a string representation."""
737755
return f"{type(self).__name__}([\n " + "\n ".join(map(str, self)) + "\n])"
738756

739757
@classmethod
@@ -790,7 +808,7 @@ def init_timedelta(val: Union[pd.Timedelta, DDHH, DDHHMM]) -> pd.Timedelta:
790808

791809

792810
def init_interval(
793-
val: RawIntervalT,
811+
val: IntervalLike_T,
794812
start: Optional[pd.Timestamp] = None,
795813
) -> pd.Interval:
796814
"""Initialize a time interval object."""

0 commit comments

Comments
 (0)