@@ -27,15 +27,229 @@ def cleanup_glob():
2727
2828
2929def test_empty_data (cleanup_glob ):
30- compute_anonymized_rollup_from_raw_data ('salt' , 2025 , 6 , 13 )
30+ compute_anonymized_rollup_from_raw_data ('salt' , 2025 , 6 , 13 , './out' )
3131
3232
3333def test_from_gather_to_json (cleanup_glob ):
3434 # run gather
35- json_data = task_anonymized_rollups ('salt' , 2025 , 6 , 13 , './out' )
35+ json_data = task_anonymized_rollups ('salt' , 2025 , 6 , 13 , './out' , save_rollups = False )
3636
3737 print (json_data )
3838
3939 # save as json inside rollups/2025/06/13/anonymized.json
40- with open (f'./out/rollups/{ 2025 } /06/13/anonymized.json' , 'w' ) as f :
40+ json_path = f'./out/rollups/{ 2025 } /06/13/anonymized.json'
41+
42+ # create the dir
43+ os .makedirs (os .path .dirname (json_path ), exist_ok = True )
44+
45+ with open (json_path , 'w' ) as f :
4146 json .dump (json_data , f , indent = 4 )
47+
48+ # ========== Validate the json_data that are containing what they should ==========
49+
50+ # Validate top-level structure
51+ assert 'events_modules' in json_data , "Missing 'events_modules' in json_data"
52+ assert 'execution_environments' in json_data , "Missing 'execution_environments' in json_data"
53+ assert 'jobs' in json_data , "Missing 'jobs' in json_data"
54+ assert 'job_host_summary' in json_data , "Missing 'job_host_summary' in json_data"
55+
56+ # Validate events_modules structure
57+ events_modules = json_data ['events_modules' ]
58+ assert isinstance (events_modules , dict ), 'events_modules should be a dictionary'
59+ assert 'list_of_modules_used_to_automate' in events_modules
60+ assert 'modules_used_to_automate_total' in events_modules
61+ assert 'avg_number_of_modules_used_in_a_playbooks' in events_modules
62+ assert 'modules_used_per_playbook_total' in events_modules
63+ assert 'module_stats' in events_modules
64+ assert 'collection_name_stats' in events_modules
65+ assert 'total_hosts_automated' in events_modules
66+
67+ # Validate events_modules data types
68+ assert isinstance (events_modules ['list_of_modules_used_to_automate' ], list )
69+ assert isinstance (events_modules ['modules_used_to_automate_total' ], int )
70+ assert isinstance (events_modules ['avg_number_of_modules_used_in_a_playbooks' ], (int , float ))
71+ assert isinstance (events_modules ['modules_used_per_playbook_total' ], dict )
72+ assert isinstance (events_modules ['module_stats' ], list )
73+ assert isinstance (events_modules ['collection_name_stats' ], list )
74+ assert isinstance (events_modules ['total_hosts_automated' ], int )
75+
76+ # Validate modules have required fields
77+ if events_modules ['list_of_modules_used_to_automate' ]:
78+ for module in events_modules ['list_of_modules_used_to_automate' ]:
79+ assert 'module_name' in module
80+ assert 'collection_source' in module
81+ assert 'collection_name' in module
82+
83+ # Validate module_stats have required fields
84+ if events_modules ['module_stats' ]:
85+ for module_stat in events_modules ['module_stats' ]:
86+ assert 'module_name' in module_stat
87+ assert 'collection_source' in module_stat
88+ assert 'collection_name' in module_stat
89+ assert 'jobs_total' in module_stat
90+ assert 'hosts_total' in module_stat
91+
92+ # Validate execution_environments structure
93+ execution_envs = json_data ['execution_environments' ]
94+ assert isinstance (execution_envs , dict ), 'execution_environments should be a dictionary'
95+ assert 'total_EE' in execution_envs
96+ assert 'default_EE' in execution_envs
97+ assert 'custom_EE' in execution_envs
98+ assert isinstance (execution_envs ['total_EE' ], int )
99+ assert isinstance (execution_envs ['default_EE' ], int )
100+ assert isinstance (execution_envs ['custom_EE' ], int )
101+
102+ # Validate jobs structure
103+ jobs = json_data ['jobs' ]
104+ assert isinstance (jobs , list ), 'jobs should be a list'
105+ if jobs :
106+ for job in jobs :
107+ assert 'job_template_name' in job
108+ assert 'number_of_jobs_executed' in job
109+ assert 'number_of_jobs_failed' in job
110+ assert 'job_duration_average_in_seconds' in job
111+ assert 'job_waiting_time_average_in_seconds' in job
112+
113+ # Validate job_host_summary structure
114+ job_host_summary = json_data ['job_host_summary' ]
115+ assert isinstance (job_host_summary , list ), 'job_host_summary should be a list'
116+ if job_host_summary :
117+ for jhs in job_host_summary :
118+ assert 'job_template_name' in jhs
119+ assert 'jobs_total' in jhs
120+ assert 'hosts_total' in jhs
121+ assert 'ok_total' in jhs
122+
123+ # Validate anonymization occurred (check for hashed values)
124+ # Job template names should be hashed (64 character hex strings)
125+ if jobs :
126+ for job in jobs :
127+ job_template_name = job ['job_template_name' ]
128+ assert len (job_template_name ) == 128 , f'Job template name should be hashed (128 chars): { job_template_name } '
129+ assert all (c in '0123456789abcdef' for c in job_template_name ), 'Job template name should be hex string'
130+
131+ # ========== Validate actual data values and relationships ==========
132+
133+ # Validate events_modules actual values
134+ print ('\n --- Validating events_modules data values ---' )
135+ assert events_modules ['modules_used_to_automate_total' ] == 2 , 'Should have 2 modules'
136+ assert events_modules ['total_hosts_automated' ] == 2 , 'Should have 2 hosts automated'
137+ assert len (events_modules ['list_of_modules_used_to_automate' ]) == 2 , 'Should have 2 modules in list'
138+ assert len (events_modules ['module_stats' ]) == 2 , 'Should have 2 module stats'
139+ assert len (events_modules ['collection_name_stats' ]) == 2 , 'Should have 2 collection stats'
140+
141+ # Validate first module is the unencrypted community module
142+ first_module = events_modules ['list_of_modules_used_to_automate' ][0 ]
143+ assert first_module ['module_name' ] == 'a10.acos_axapi.a10_slb_virtual_server' , 'First module should be a10_slb_virtual_server'
144+ assert first_module ['collection_source' ] == 'community' , 'First module should be from community'
145+ assert first_module ['collection_name' ] == 'a10.acos_axapi' , 'First module should be from a10.acos_axapi collection'
146+
147+ # Validate second module is hashed (encrypted)
148+ second_module = events_modules ['list_of_modules_used_to_automate' ][1 ]
149+ assert len (second_module ['module_name' ]) == 128 , 'Second module name should be hashed (128 chars)'
150+ assert second_module ['collection_source' ] == 'Unknown' , 'Second module should have Unknown source'
151+ assert len (second_module ['collection_name' ]) == 128 , 'Second module collection should be hashed (128 chars)'
152+
153+ # Validate module_stats actual values
154+ print ('--- Validating module_stats data values ---' )
155+ first_module_stats = events_modules ['module_stats' ][0 ]
156+ assert first_module_stats ['module_name' ] == 'a10.acos_axapi.a10_slb_virtual_server' , 'Module stats should match module'
157+ assert first_module_stats ['jobs_total' ] == 3 , 'Should have 3 jobs using this module'
158+ assert first_module_stats ['hosts_total' ] == 2 , 'Should have 2 hosts for this module'
159+ assert first_module_stats ['task_clean_success_total' ] == 6 , 'Should have 6 successful tasks (3 jobs × 2 hosts)'
160+ assert first_module_stats ['task_success_with_reruns_total' ] == 0 , 'Should have 0 reruns'
161+ assert first_module_stats ['task_failed_total' ] == 0 , 'Should have 0 failures'
162+ assert first_module_stats ['avg_hosts_per_job' ] == pytest .approx (2.0 , rel = 1e-6 ), 'Should average 2 hosts per job'
163+
164+ # Validate second module stats
165+ second_module_stats = events_modules ['module_stats' ][1 ]
166+ assert second_module_stats ['jobs_total' ] == 3 , 'Second module should also have 3 jobs'
167+ assert second_module_stats ['hosts_total' ] == 2 , 'Second module should have 2 hosts'
168+ assert second_module_stats ['task_clean_success_total' ] == 0 , 'Second module should have 0 clean successes'
169+
170+ # Validate collection_name_stats
171+ print ('--- Validating collection_name_stats data values ---' )
172+ first_collection_stats = events_modules ['collection_name_stats' ][0 ]
173+ assert first_collection_stats ['collection_name' ] == 'a10.acos_axapi' , 'Collection name should match'
174+ assert first_collection_stats ['collection_source' ] == 'community' , 'Collection should be from community'
175+ assert first_collection_stats ['jobs_total' ] == 3 , 'Collection should have 3 jobs'
176+ assert first_collection_stats ['hosts_total' ] == 2 , 'Collection should have 2 hosts'
177+ assert first_collection_stats ['task_clean_success_total' ] == 6 , 'Collection should have 6 successful tasks'
178+
179+ # Validate modules_used_per_playbook_total structure and values
180+ print ('--- Validating modules_used_per_playbook_total ---' )
181+ assert len (events_modules ['modules_used_per_playbook_total' ]) == 1 , 'Should have 1 playbook'
182+ playbook_module_count = list (events_modules ['modules_used_per_playbook_total' ].values ())[0 ]
183+ assert playbook_module_count == 2 , 'Playbook should use 2 modules'
184+
185+ # Validate avg_number_of_modules_used_in_a_playbooks calculation
186+ total_modules_across_playbooks = sum (events_modules ['modules_used_per_playbook_total' ].values ())
187+ num_playbooks = len (events_modules ['modules_used_per_playbook_total' ])
188+ expected_avg = total_modules_across_playbooks / num_playbooks if num_playbooks > 0 else 0
189+ assert events_modules ['avg_number_of_modules_used_in_a_playbooks' ] == pytest .approx (expected_avg , rel = 1e-6 ), (
190+ f'Average should be { expected_avg } , got { events_modules ["avg_number_of_modules_used_in_a_playbooks" ]} '
191+ )
192+
193+ # Validate execution_environments actual values
194+ print ('--- Validating execution_environments data values ---' )
195+ assert execution_envs ['total_EE' ] == 2 , 'Should have 2 total execution environments'
196+ assert execution_envs ['default_EE' ] == 1 , 'Should have 1 default execution environment'
197+ assert execution_envs ['custom_EE' ] == 1 , 'Should have 1 custom execution environment'
198+ # Validate that total = default + custom
199+ assert execution_envs ['total_EE' ] == execution_envs ['default_EE' ] + execution_envs ['custom_EE' ], 'Total EE should equal default + custom'
200+
201+ # Validate jobs actual values
202+ print ('--- Validating jobs data values ---' )
203+ assert len (jobs ) == 1 , 'Should have 1 job template'
204+ job = jobs [0 ]
205+ assert job ['number_of_jobs_executed' ] == 3 , 'Job template should have 3 executions'
206+ assert job ['number_of_jobs_failed' ] == 0 , 'Should have 0 failed jobs'
207+ assert job ['number_of_jobs_succeeded' ] == 3 , 'Should have 3 succeeded jobs'
208+ assert job ['number_of_jobs_succeeded' ] + job ['number_of_jobs_failed' ] == job ['number_of_jobs_executed' ], (
209+ 'Succeeded + failed should equal total executed'
210+ )
211+
212+ # Validate job duration fields are non-negative
213+ assert job ['job_duration_average_in_seconds' ] >= 0 , 'Job duration average should be non-negative'
214+ assert job ['job_duration_total_in_seconds' ] >= 0 , 'Job duration total should be non-negative'
215+ assert job ['job_duration_maximum_in_seconds' ] >= job ['job_duration_minimum_in_seconds' ], 'Max duration should be >= min duration'
216+
217+ # Validate job waiting time fields are non-negative
218+ assert job ['job_waiting_time_average_in_seconds' ] >= 0 , 'Job waiting time average should be non-negative'
219+ assert job ['job_waiting_time_total_in_seconds' ] >= 0 , 'Job waiting time total should be non-negative'
220+
221+ # Validate job_host_summary actual values
222+ print ('--- Validating job_host_summary data values ---' )
223+ assert len (job_host_summary ) == 1 , 'Should have 1 job template in summary'
224+ jhs = job_host_summary [0 ]
225+ assert jhs ['jobs_total' ] == 3 , 'Should have 3 jobs in summary'
226+ assert jhs ['hosts_total' ] == 2 , 'Should have 2 hosts in summary'
227+ assert jhs ['ok_total' ] == 6 , 'Should have 6 ok tasks (3 jobs × 2 hosts)'
228+ assert jhs ['dark_total' ] == 0 , 'Should have 0 dark (unreachable) hosts'
229+ assert jhs ['failures_total' ] == 0 , 'Should have 0 failures'
230+ assert jhs ['skipped_total' ] == 0 , 'Should have 0 skipped tasks'
231+ assert jhs ['ignored_total' ] == 0 , 'Should have 0 ignored failures'
232+ assert jhs ['rescued_total' ] == 0 , 'Should have 0 rescued tasks'
233+
234+ # Validate cross-section data consistency
235+ print ('--- Validating cross-section data consistency ---' )
236+ assert events_modules ['total_hosts_automated' ] == jhs ['hosts_total' ], 'Total hosts automated should match hosts in job_host_summary'
237+
238+ # Validate that module stats hosts match the total automated hosts
239+ for module_stat in events_modules ['module_stats' ]:
240+ assert module_stat ['hosts_total' ] <= events_modules ['total_hosts_automated' ], (
241+ f'Module { module_stat ["module_name" ][:50 ]} hosts should not exceed total automated hosts'
242+ )
243+
244+ print ('✅ All data value assertions passed!' )
245+
246+ # Verify data directory exists and contains raw data tarballs
247+ data_path = './out/data/2025/06/13'
248+ assert os .path .exists (data_path ), f'Data directory should exist at { data_path } '
249+
250+ # Check that raw data tarballs were created
251+ data_tarballs = [f for f in os .listdir (data_path ) if f .endswith ('.tar.gz' )]
252+ assert len (data_tarballs ) > 0 , 'Should have raw data tarballs in data directory'
253+ print (f'Found { len (data_tarballs )} raw data tarballs' )
254+
255+ print ('\n ✅ All assertions passed!' )
0 commit comments