16
16
17
17
from ads .common .object_storage_details import ObjectStorageDetails
18
18
from ads .opctl import logger
19
- from ads .opctl .operator .lowcode .anomaly .const import OutputColumns , SupportedMetrics
19
+ from ads .opctl .operator .lowcode .anomaly .const import OutputColumns , SupportedMetrics , SUBSAMPLE_THRESHOLD
20
20
from ads .opctl .operator .lowcode .anomaly .utils import _build_metrics_df , default_signer
21
21
from ads .opctl .operator .lowcode .common .utils import (
22
22
disable_print ,
@@ -79,7 +79,7 @@ def generate_report(self):
79
79
anomaly_output , test_data , elapsed_time
80
80
)
81
81
table_blocks = [
82
- rc .DataTable (df , label = col , index = True )
82
+ rc .DataTable (df . head ( SUBSAMPLE_THRESHOLD ) if self . spec . subsample_report_data and len ( df ) > SUBSAMPLE_THRESHOLD else df , label = col , index = True )
83
83
for col , df in self .datasets .full_data_dict .items ()
84
84
]
85
85
data_table = rc .Select (blocks = table_blocks )
@@ -94,20 +94,36 @@ def generate_report(self):
94
94
anomaly_col = anomaly_output .get_anomalies_by_cat (category = target )[
95
95
OutputColumns .ANOMALY_COL
96
96
]
97
+ anomaly_indices = [i for i , index in enumerate (anomaly_col ) if index == 1 ]
98
+ downsampled_time_col = time_col
99
+ selected_indices = list (range (len (time_col )))
100
+ if self .spec .subsample_report_data :
101
+ non_anomaly_indices = [i for i in range (len (time_col )) if i not in anomaly_indices ]
102
+ # Downsample non-anomalous data if it exceeds the threshold (1000)
103
+ if len (non_anomaly_indices ) > SUBSAMPLE_THRESHOLD :
104
+ downsampled_non_anomaly_indices = non_anomaly_indices [::len (non_anomaly_indices )// SUBSAMPLE_THRESHOLD ]
105
+ selected_indices = anomaly_indices + downsampled_non_anomaly_indices
106
+ selected_indices .sort ()
107
+ downsampled_time_col = time_col [selected_indices ]
108
+
97
109
columns = set (df .columns ).difference ({date_column })
98
110
for col in columns :
99
111
y = df [col ].reset_index (drop = True )
112
+
113
+ downsampled_y = y [selected_indices ]
114
+
100
115
fig , ax = plt .subplots (figsize = (8 , 3 ), layout = "constrained" )
101
116
ax .grid ()
102
- ax .plot (time_col , y , color = "black" )
103
- for i , index in enumerate ( anomaly_col ):
104
- if index == 1 :
105
- ax .scatter (time_col [i ], y [i ], color = "red" , marker = "o" )
117
+ ax .plot (downsampled_time_col , downsampled_y , color = "black" )
118
+ # Plot anomalies
119
+ for i in anomaly_indices :
120
+ ax .scatter (time_col [i ], y [i ], color = "red" , marker = "o" )
106
121
plt .xlabel (date_column )
107
122
plt .ylabel (col )
108
123
plt .title (f"`{ col } ` with reference to anomalies" )
109
124
figure_blocks .append (rc .Widget (ax ))
110
- blocks .append (rc .Group (* figure_blocks , label = target ))
125
+
126
+ blocks .append (rc .Group (* figure_blocks , label = target ))
111
127
plots = rc .Select (blocks )
112
128
113
129
report_sections = []
0 commit comments