@@ -60,7 +60,7 @@ def score_rocrate(crate_data: Union[Dict[str, Any], ROCrateV1_2]) -> AIReadyScor
6060 _score_pre_model (score .pre_model_explainability , root_data , metadata_graph )
6161 _score_ethics (score .ethics , root_data )
6262 _score_sustainability (score .sustainability , root_data )
63- _score_computability (score .computability , metadata_graph )
63+ _score_computability (score .computability , root_data , metadata_graph )
6464
6565 return score
6666
@@ -126,22 +126,34 @@ def _score_provenance(provenance: ProvenanceScore, root_data: Dict[str, Any], me
126126 has_content = True ,
127127 details = ", " .join (actors )
128128 )
129-
130- datasets_count = 0
131- transformations_count = 0
132- software_count = 0
133-
134- for entity in metadata_graph :
135- entity_type = _get_type (entity )
136129
137- if "Dataset" in entity_type :
138- datasets_count += 1
139-
140- if "Computation" in entity_type or "Experiment" in entity_type :
141- transformations_count += 1
142-
143- if "Software" in entity_type :
144- software_count += 1
130+ # Check for aggregated metrics first (from release-level RO-Crate)
131+ dataset_count = root_data .get ("evi:datasetCount" )
132+ computation_count = root_data .get ("evi:computationCount" )
133+ software_count = root_data .get ("evi:softwareCount" )
134+
135+ if dataset_count is not None :
136+ # Use pre-aggregated values from release
137+ datasets_count = dataset_count
138+ transformations_count = computation_count
139+ software_count = software_count
140+ else :
141+ # Fall back to counting in metadata_graph (for backwards compatibility)
142+ datasets_count = 0
143+ transformations_count = 0
144+ software_count = 0
145+
146+ for entity in metadata_graph :
147+ entity_type = _get_type (entity )
148+
149+ if "Dataset" in entity_type :
150+ datasets_count += 1
151+
152+ if "Computation" in entity_type or "Experiment" in entity_type :
153+ transformations_count += 1
154+
155+ if "Software" in entity_type :
156+ software_count += 1
145157
146158 if datasets_count > 0 :
147159 provenance .transparent = SubCriterionScore (
@@ -169,29 +181,39 @@ def _score_characterization(characterization: CharacterizationScore, root_data:
169181 has_content = True ,
170182 details = str (bias )[:200 ] + ("..." if len (str (bias )) > 200 else "" )
171183 )
172-
173- total_size = 0
174- stats_count = 0
175-
176- for entity in metadata_graph :
177- entity_type = _get_type (entity )
178184
179- if "Dataset" in entity_type or "ROCrate" in entity_type :
180- size = entity .get ("contentSize" , "" )
181- if size :
182- try :
183- if isinstance (size , str ):
184- if "TB" in size :
185- total_size += float (size .replace ("TB" , "" ).strip ()) * 1e12
186- elif "GB" in size :
187- total_size += float (size .replace ("GB" , "" ).strip ()) * 1e9
188- elif "MB" in size :
189- total_size += float (size .replace ("MB" , "" ).strip ()) * 1e6
190- except :
191- pass
192-
193- if entity .get ("hasSummaryStatistics" ):
194- stats_count += 1
185+ # Check for aggregated metrics first
186+ total_size_bytes = root_data .get ("evi:totalContentSizeBytes" )
187+ stats_count_agg = root_data .get ("evi:entitiesWithSummaryStats" )
188+
189+ if total_size_bytes is not None :
190+ # Use pre-aggregated statistics
191+ total_size = total_size_bytes
192+ stats_count = stats_count_agg
193+ else :
194+ # Fall back to iterating metadata_graph
195+ total_size = 0
196+ stats_count = 0
197+
198+ for entity in metadata_graph :
199+ entity_type = _get_type (entity )
200+
201+ if "Dataset" in entity_type or "ROCrate" in entity_type :
202+ size = entity .get ("contentSize" , "" )
203+ if size :
204+ try :
205+ if isinstance (size , str ):
206+ if "TB" in size :
207+ total_size += float (size .replace ("TB" , "" ).strip ()) * 1e12
208+ elif "GB" in size :
209+ total_size += float (size .replace ("GB" , "" ).strip ()) * 1e9
210+ elif "MB" in size :
211+ total_size += float (size .replace ("MB" , "" ).strip ()) * 1e6
212+ except :
213+ pass
214+
215+ if entity .get ("hasSummaryStatistics" ):
216+ stats_count += 1
195217
196218 details = []
197219 if total_size > 0 :
@@ -227,17 +249,27 @@ def _score_pre_model(pre_model: PreModelExplainabilityScore, root_data: Dict[str
227249 has_content = True ,
228250 details = ", " .join (details )
229251 )
230-
231- total = 0
232- with_checksum = 0
233-
234- for entity in metadata_graph :
235- entity_type = _get_type (entity )
236-
237- if "Dataset" in entity_type or "Software" in entity_type or "ROCrate" in entity_type :
238- total += 1
239- if entity .get ("md5" ) or entity .get ("MD5" ):
240- with_checksum += 1
252+
253+ # Check for aggregated metrics first
254+ total_entities = root_data .get ("evi:totalEntities" )
255+ entities_with_checksums = root_data .get ("evi:entitiesWithChecksums" )
256+
257+ if total_entities is not None :
258+ # Use pre-aggregated checksum data
259+ total = total_entities
260+ with_checksum = entities_with_checksums
261+ else :
262+ # Fall back to counting in metadata_graph
263+ total = 0
264+ with_checksum = 0
265+
266+ for entity in metadata_graph :
267+ entity_type = _get_type (entity )
268+
269+ if "Dataset" in entity_type or "Software" in entity_type or "ROCrate" in entity_type :
270+ total += 1
271+ if entity .get ("md5" ) or entity .get ("MD5" ):
272+ with_checksum += 1
241273
242274 if total > 0 and with_checksum > 0 :
243275 percentage = (with_checksum / total ) * 100
@@ -350,17 +382,25 @@ def _score_sustainability(sustainability: SustainabilityScore, root_data: Dict[s
350382 )
351383 break
352384
353- def _score_computability (computability : ComputabilityScore , metadata_graph : List [Dict ]):
385+ def _score_computability (computability : ComputabilityScore , root_data : Dict [ str , Any ], metadata_graph : List [Dict ]):
354386 """Score Computability criteria."""
355- formats = set ()
356-
357- for entity in metadata_graph :
358- entity_type = _get_type (entity )
359-
360- if "Dataset" in entity_type or "Software" in entity_type :
361- fmt = _get_format (entity )
362- if fmt :
363- formats .add (str (fmt ))
387+ # Check for aggregated metrics first
388+ formats_agg = root_data .get ("evi:formats" )
389+
390+ if formats_agg is not None :
391+ # Use pre-aggregated formats
392+ formats = set (formats_agg )
393+ else :
394+ # Fall back to collecting from metadata_graph
395+ formats = set ()
396+
397+ for entity in metadata_graph :
398+ entity_type = _get_type (entity )
399+
400+ if "Dataset" in entity_type or "Software" in entity_type :
401+ fmt = _get_format (entity )
402+ if fmt :
403+ formats .add (str (fmt ))
364404
365405 if formats :
366406 fmt_list = sorted (list (formats ))[:5 ]
@@ -415,7 +455,7 @@ def _build_ai_ready_score(value: Any, *, converter_instance) -> AIReadyScore:
415455 _score_pre_model (score .pre_model_explainability , root_data , metadata_graph )
416456 _score_ethics (score .ethics , root_data )
417457 _score_sustainability (score .sustainability , root_data )
418- _score_computability (score .computability , metadata_graph )
458+ _score_computability (score .computability , root_data , metadata_graph )
419459
420460 return score
421461
0 commit comments