25
25
26
26
# Third-party
27
27
import numpy as np
28
+ import numpy .typing as npt
28
29
import pandas as pd
29
30
30
- # Generic type aliases
31
+ # First-party
32
+ from atmcirclib .typing import PathLike_T
31
33
32
- # Misc.
33
- RawPathT = Union [Path , str ]
34
+ # Generic type aliases
34
35
35
36
# Time step notations
36
37
DDHH = Tuple [int , int ]
37
38
DDHHMM = Tuple [int , int , int ]
38
39
YYYYMMDDHH = Tuple [int , int , int , int ]
39
40
40
41
# Raw time interval(s)
41
- RawIntervalT = Union [pd .Interval , Tuple [Union [DDHH , DDHHMM ], Union [DDHH , DDHHMM ]]]
42
- RawIntervalsT = Sequence [RawIntervalT ]
43
- RawIntervalST = Union [RawIntervalT , RawIntervalsT ]
42
+ IntervalLike_T = Union [pd .Interval , Tuple [Union [DDHH , DDHHMM ], Union [DDHH , DDHHMM ]]]
43
+ IntervalsLike_T = Sequence [IntervalLike_T ]
44
+ IntervalSLike_T = Union [IntervalLike_T , IntervalsLike_T ]
44
45
45
46
# Raw output stream type(s)
46
- RawOutputStreamTypeT = Union ["OutputStreamType" , str ]
47
- RawOutputStreamTypesT = Sequence [RawOutputStreamTypeT ]
48
- RawOutputStreamTypeST = Union [RawOutputStreamTypeT , RawOutputStreamTypesT ]
47
+ OutputStreamTypeLike_T = Union ["OutputStreamType" , str ]
48
+ OutputStreamTypesLike_T = Sequence [OutputStreamTypeLike_T ]
49
+ OutputStreamTypeSLike_T = Union [OutputStreamTypeLike_T , OutputStreamTypesLike_T ]
49
50
50
51
# Raw output streams dict
51
- RawOutputStreamsDictT = Dict [RawOutputStreamTypeST , RawIntervalST ]
52
- RawOutputStreamsT = Union ["OutputStreams" , RawOutputStreamsDictT ]
52
+ OutputStreamsDictLike_T = Dict [OutputStreamTypeSLike_T , IntervalSLike_T ]
53
+ OutputStreamsLike_T = Union ["OutputStreams" , OutputStreamsDictLike_T ]
53
54
54
55
55
56
class NamedObj (Protocol ):
@@ -104,6 +105,7 @@ def __getitem__(
104
105
return obj
105
106
106
107
def __repr__ (self ) -> str :
108
+ """Return string representation."""
107
109
return f"{ type (self ).__name__ } ([\n " + "\n " .join (map (str , self )) + "\n ])"
108
110
109
111
@@ -185,6 +187,7 @@ class OutputStream:
185
187
intervals : list [pd .Interval ]
186
188
187
189
def __post_init__ (self ) -> None :
190
+ """Finalize initialization."""
188
191
self ._run : Optional [SimulationRun ] = None
189
192
self .check_intervals ()
190
193
@@ -304,8 +307,8 @@ def check_intervals(self, steps: Optional[pd.DatetimeIndex] = None) -> None:
304
307
@classmethod
305
308
def create (
306
309
cls ,
307
- stream_type : RawOutputStreamTypeT ,
308
- intervals : RawIntervalST ,
310
+ stream_type : OutputStreamTypeLike_T ,
311
+ intervals : IntervalSLike_T ,
309
312
* ,
310
313
output_stream_types : Optional [OutputStreamTypes ] = None ,
311
314
start : Optional [pd .Timestamp ] = None ,
@@ -371,6 +374,17 @@ def get_all_steps(self) -> pd.DatetimeIndex:
371
374
stream .check_intervals (all_steps )
372
375
return all_steps
373
376
377
+ def count_per_step (self ) -> tuple [npt .NDArray [np .int_ ], pd .DatetimeIndex ]:
378
+ """Count the number of streams overing each step."""
379
+ steps = self .get_all_steps ()
380
+ counts = np .zeros (steps .size , np .int32 )
381
+ for stream in self :
382
+ for interval in stream .intervals :
383
+ i0 = steps .get_loc (interval .left )
384
+ i1 = steps .get_loc (interval .right )
385
+ counts [i0 : i1 + 1 ] += 1
386
+ return counts , steps
387
+
374
388
def set_run (self , run : SimulationRun ) -> None :
375
389
"""Associate all output streams with a simulation run."""
376
390
for stream in self :
@@ -379,7 +393,7 @@ def set_run(self, run: SimulationRun) -> None:
379
393
@classmethod
380
394
def create (
381
395
cls ,
382
- output : RawOutputStreamsT ,
396
+ output : OutputStreamsLike_T ,
383
397
* ,
384
398
output_stream_types : Optional [OutputStreamTypes ] = None ,
385
399
start : Optional [pd .Timestamp ] = None ,
@@ -388,9 +402,9 @@ def create(
388
402
if isinstance (output , OutputStreams ):
389
403
return copy (output )
390
404
streams = cls ()
391
- stream_type_s : RawOutputStreamTypeST
392
- stream_type : RawOutputStreamTypeT
393
- interval_s : RawIntervalST
405
+ stream_type_s : OutputStreamTypeSLike_T
406
+ stream_type : OutputStreamTypeLike_T
407
+ interval_s : IntervalSLike_T
394
408
for stream_type_s , interval_s in output .items ():
395
409
if isinstance (stream_type_s , str ) or not isinstance (
396
410
stream_type_s , Collection
@@ -444,61 +458,14 @@ def __init__(
444
458
self ,
445
459
* ,
446
460
start : Union [pd .Timestamp , YYYYMMDDHH ],
447
- path : Optional [RawPathT ] = None ,
448
- rel_path : Optional [RawPathT ] = None ,
449
- output : Optional [RawOutputStreamsT ] = None ,
461
+ path : Optional [PathLike_T ] = None ,
462
+ rel_path : Optional [PathLike_T ] = None ,
463
+ output : Optional [OutputStreamsLike_T ] = None ,
450
464
end_rel : Optional [Union [pd .Timedelta , DDHH , DDHHMM ]] = None ,
451
465
end_type : Optional [Union [SimulationRunEndType , str ]] = None ,
452
466
simulation_run_end_types : Optional [SimulationRunEndTypes ] = None ,
453
467
) -> None :
454
468
"""Create an instance of ``SimulationRun``."""
455
-
456
- def init_end_rel (
457
- raw_end_rel : Optional [Union [pd .Timedelta , DDHH , DDHHMM ]],
458
- output : OutputStreams ,
459
- start : pd .Timestamp ,
460
- ) -> pd .Timedelta :
461
- """Initialize ``end_rel``."""
462
- if raw_end_rel is None :
463
- if not output :
464
- raw_end_rel = pd .Timedelta (0 )
465
- else :
466
- last_output = max (
467
- interval .right
468
- for stream in output
469
- for interval in stream .intervals
470
- )
471
- raw_end_rel = last_output - start
472
- return init_timedelta (raw_end_rel )
473
-
474
- def init_end_type (
475
- raw_end_type : Optional [Union [SimulationRunEndType , str ]]
476
- ) -> SimulationRunEndType :
477
- """Initialize ``end_type``."""
478
- if isinstance (raw_end_type , SimulationRunEndType ):
479
- return raw_end_type
480
- if simulation_run_end_types is None :
481
- raise ValueError (
482
- "must pass simulation_run_end_types if end_type is not of type"
483
- " SimulationRunEndType"
484
- )
485
- return simulation_run_end_types [raw_end_type or "success" ]
486
-
487
- def init_label (paths : Collection [Optional [Path ]]) -> str :
488
- label = ""
489
- for path in paths :
490
- if path is not None :
491
- if not label :
492
- label = path .name
493
- elif label != path .name :
494
- raise ValueError (
495
- f"inconsistent labels: { label } != { path .name } ; paths: "
496
- + ", " .join (map (str , paths ))
497
- )
498
- if not label :
499
- raise ValueError (f"could not derive label from paths: { paths } " )
500
- return label
501
-
502
469
self .abs_path : Optional [Path ] = None if path is None else Path (path )
503
470
self .rel_path : Optional [Path ] = None if rel_path is None else Path (rel_path )
504
471
self .start : pd .Timestamp = init_timestamp (start )
@@ -508,11 +475,15 @@ def init_label(paths: Collection[Optional[Path]]) -> str:
508
475
path = self .abs_path or self .rel_path
509
476
raise Exception (f"error creating output streams for run at { path } " ) from e
510
477
self .output : OutputStreams = streams
511
- self .end_rel : pd .Timedelta = init_end_rel (end_rel , self .output , self .start )
512
- self .end_type : SimulationRunEndType = init_end_type (end_type )
478
+ self .end_rel : pd .Timedelta = self ._init_end_rel (
479
+ end_rel , self .output , self .start
480
+ )
481
+ self .end_type : SimulationRunEndType = self ._init_end_type (
482
+ end_type , simulation_run_end_types
483
+ )
513
484
514
485
self .output .set_run (self )
515
- self .label : str = init_label ([self .abs_path , self .rel_path ])
486
+ self .label : str = self . _init_label ([self .abs_path , self .rel_path ])
516
487
self .end : pd .Timestamp = self .start + self .end_rel
517
488
self .write_start : pd .Timestamp = min (
518
489
[interval .left for stream in self .output for interval in stream .intervals ]
@@ -554,7 +525,9 @@ def get_full_path(self) -> Path:
554
525
)
555
526
return path
556
527
557
- def init_path (path : Optional [RawPathT ], rel_path : Optional [RawPathT ]) -> Path :
528
+ def init_path (
529
+ path : Optional [PathLike_T ], rel_path : Optional [PathLike_T ]
530
+ ) -> Path :
558
531
if path is None and rel_path is None :
559
532
raise ValueError ("path and rel_path are both None" )
560
533
elif rel_path is not None :
@@ -569,16 +542,65 @@ def exists(self) -> bool:
569
542
return self .get_full_path ().exists ()
570
543
571
544
def __repr__ (self ) -> str :
545
+ """Return a string representation."""
572
546
path = self .get_full_path ()
573
547
return f"{ type (self ).__name__ } ('{ path } ', { self .start } )"
574
548
549
+ @staticmethod
550
+ def _init_end_rel (
551
+ raw_end_rel : Optional [Union [pd .Timedelta , DDHH , DDHHMM ]],
552
+ output : OutputStreams ,
553
+ start : pd .Timestamp ,
554
+ ) -> pd .Timedelta :
555
+ """Initialize ``end_rel``."""
556
+ if raw_end_rel is None :
557
+ if not output :
558
+ raw_end_rel = pd .Timedelta (0 )
559
+ else :
560
+ last_output = max (
561
+ interval .right for stream in output for interval in stream .intervals
562
+ )
563
+ raw_end_rel = last_output - start
564
+ return init_timedelta (raw_end_rel )
565
+
566
+ @staticmethod
567
+ def _init_end_type (
568
+ raw_end_type : Optional [Union [SimulationRunEndType , str ]],
569
+ simulation_run_end_types : Optional [SimulationRunEndTypes ],
570
+ ) -> SimulationRunEndType :
571
+ """Initialize ``end_type``."""
572
+ if isinstance (raw_end_type , SimulationRunEndType ):
573
+ return raw_end_type
574
+ if simulation_run_end_types is None :
575
+ raise ValueError (
576
+ "must pass simulation_run_end_types if end_type is not of type"
577
+ " SimulationRunEndType"
578
+ )
579
+ return simulation_run_end_types [raw_end_type or "success" ]
580
+
581
+ @staticmethod
582
+ def _init_label (paths : Collection [Optional [Path ]]) -> str :
583
+ label = ""
584
+ for path in paths :
585
+ if path is not None :
586
+ if not label :
587
+ label = path .name
588
+ elif label != path .name :
589
+ raise ValueError (
590
+ f"inconsistent labels: { label } != { path .name } ; paths: "
591
+ + ", " .join (map (str , paths ))
592
+ )
593
+ if not label :
594
+ raise ValueError (f"could not derive label from paths: { paths } " )
595
+ return label
596
+
575
597
576
598
class Simulation :
577
599
"""A simulation comprised of one or more simulation runs."""
578
600
579
601
def __init__ (
580
602
self ,
581
- path : RawPathT ,
603
+ path : PathLike_T ,
582
604
runs : Optional [Sequence [SimulationRun ]] = None ,
583
605
) -> None :
584
606
"""Create an instance of ``Simulation``."""
@@ -632,16 +654,10 @@ def find_redundant_output(
632
654
if stream_type .removed_files :
633
655
continue
634
656
try :
635
- steps = streams .get_all_steps ()
657
+ counts , steps = streams .count_per_step ()
636
658
except EmptyOutputStreamError :
637
659
continue
638
- coverage = np .zeros (steps .size )
639
- for stream in streams :
640
- for interval in stream .intervals :
641
- i0 = steps .get_loc (interval .left )
642
- i1 = steps .get_loc (interval .right )
643
- coverage [i0 : i1 + 1 ] += 1
644
- idcs = np .where (steps [coverage > 1 ])[0 ]
660
+ idcs = np .where (steps [counts > 1 ])[0 ]
645
661
if idcs .size == 0 :
646
662
continue
647
663
multi_steps [stream_type ] = {}
@@ -657,6 +673,7 @@ def find_redundant_output(
657
673
return multi_steps
658
674
659
675
def __repr__ (self ) -> str :
676
+ """Return a string representation."""
660
677
return f"{ type (self ).__name__ } ([" + ", " .join (map (str , self .get_runs ())) + "])"
661
678
662
679
@@ -734,6 +751,7 @@ def collect_redundant_output_files(self) -> list[list[Path]]:
734
751
return paths_sims
735
752
736
753
def __repr__ (self ) -> str :
754
+ """Return a string representation."""
737
755
return f"{ type (self ).__name__ } ([\n " + "\n " .join (map (str , self )) + "\n ])"
738
756
739
757
@classmethod
@@ -790,7 +808,7 @@ def init_timedelta(val: Union[pd.Timedelta, DDHH, DDHHMM]) -> pd.Timedelta:
790
808
791
809
792
810
def init_interval (
793
- val : RawIntervalT ,
811
+ val : IntervalLike_T ,
794
812
start : Optional [pd .Timestamp ] = None ,
795
813
) -> pd .Interval :
796
814
"""Initialize a time interval object."""
0 commit comments