Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
30 commits
Select commit Hold shift + click to select a range
1e0e74c
add basis ROHub test cases, just for information exchange
joergfunger Oct 6, 2025
68e1722
Delete RoHub test codes
M-Jafarkhani Dec 11, 2025
7e70674
Update RoHub Integration
M-Jafarkhani Dec 11, 2025
54f140c
Update yml files
M-Jafarkhani Dec 11, 2025
a2d5dca
Fix CI
M-Jafarkhani Dec 11, 2025
bfa0b09
Add mpich to fix CI error
M-Jafarkhani Dec 11, 2025
ceb4eee
Add missing py file
M-Jafarkhani Dec 12, 2025
39c5c8a
Change Re-zip structure
M-Jafarkhani Dec 12, 2025
7fa1fbe
Validation as a Separate Step
M-Jafarkhani Dec 12, 2025
8a8381a
Add missing argument
M-Jafarkhani Dec 12, 2025
80daf8f
Add expected.csv to test
M-Jafarkhani Dec 12, 2025
5e2df14
Fix Type
M-Jafarkhani Jan 6, 2026
08efdb3
Remove fenics temporary, move provenance files to common
M-Jafarkhani Jan 6, 2026
bab910a
Fix plot provenance location
M-Jafarkhani Jan 6, 2026
5397539
Add documentation
M-Jafarkhani Jan 6, 2026
c57c575
Fix CI
M-Jafarkhani Jan 6, 2026
e3bbac4
Disable RoHub Upload
M-Jafarkhani Jan 6, 2026
4a4b326
Merge Conflicts
M-Jafarkhani Jan 6, 2026
faba603
Roll back
M-Jafarkhani Jan 6, 2026
6eb8d30
Using v1.2.2
M-Jafarkhani Jan 6, 2026
2d83ac9
using v1.2.3
M-Jafarkhani Jan 6, 2026
1f80af1
Fix parameter names
M-Jafarkhani Jan 6, 2026
ea604a2
Un-comment uploading into RoHub, bring back fenics
M-Jafarkhani Jan 7, 2026
622d1ab
Remove libadios2 from fenics environment file to resolve CI error
div-tyg Jan 9, 2026
7d8403a
Remove libadios2=2.10.1 from fenics
M-Jafarkhani Jan 9, 2026
21fcb51
Merge branch '32-integrate-rohub' of https://github.com/BAMresearch/N…
M-Jafarkhani Jan 9, 2026
16139b8
Fix CSV numbers.
M-Jafarkhani Jan 9, 2026
4f0c20f
Fix CSV Number
M-Jafarkhani Jan 9, 2026
89a7912
Tolerance to 1+6
M-Jafarkhani Jan 9, 2026
1753029
Check tolrance only on stress columns
M-Jafarkhani Jan 9, 2026
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
51 changes: 37 additions & 14 deletions .github/workflows/run-benchmark.yml
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,12 @@ on:

env:
CACHE_NUMBER: 1 # increase to reset cache manually
SNAKEMAKE_RESULT_FILE: metadata4ing_provenance
PROVENANACE_FILE_NAME: element_size_vs_max_mises_stress.pdf

jobs:
tests:
run-simulation:
runs-on: ubuntu-latest



steps:
- name: checkout repo content
uses: actions/checkout@v2
Expand Down Expand Up @@ -52,8 +51,9 @@ jobs:
snakemake --use-conda --force --cores all \
--reporter metadata4ing \
--report-metadata4ing-paramscript parameter_extractor.py \
--report-metadata4ing-filename metadata4ing_provenance

--report-metadata4ing-config metadata4ing.config \
--report-metadata4ing-filename $SNAKEMAKE_RESULT_FILE

- name: run_linear-elastic-plate-with-hole-benchmarks_nextflow
shell: bash -l {0}
run: |
Expand All @@ -65,7 +65,7 @@ jobs:
with:
name: snakemake_results_linear-elastic-plate-with-hole
path: |
benchmarks/linear-elastic-plate-with-hole/metadata4ing_provenance.zip
benchmarks/linear-elastic-plate-with-hole/${{ env.SNAKEMAKE_RESULT_FILE }}.zip

- name: Archive Linear Elastic plate with a hole benchmark data for nextflow
uses: actions/upload-artifact@v4
Expand All @@ -76,7 +76,7 @@ jobs:

process-artifacts:
runs-on: ubuntu-latest
needs: tests
needs: run-simulation
steps:
- name: Checkout repo content
uses: actions/checkout@v2
Expand All @@ -87,10 +87,10 @@ jobs:
name: snakemake_results_linear-elastic-plate-with-hole
path: ./artifact_files

- name: Unzip metadata4ing_provenance.zip
- name: Unzip Snakemake Result File
run: |
mkdir -p ./metadata4ing_provenance
unzip -o ./artifact_files/metadata4ing_provenance.zip -d ./metadata4ing_provenance
mkdir -p ./$SNAKEMAKE_RESULT_FILE
unzip -o ./artifact_files/$SNAKEMAKE_RESULT_FILE.zip -d ./$SNAKEMAKE_RESULT_FILE

- name: Setup Mambaforge with postprocessing env
uses: conda-incubator/setup-miniconda@v3
Expand All @@ -99,14 +99,37 @@ jobs:
activate-environment: postprocessing
use-mamba: true
environment-file: benchmarks/linear-elastic-plate-with-hole/environment_postprocessing.yml

- name: Validate Snakemake Result File
shell: bash -l {0}
run: |
python benchmarks/common/validate_provenance.py \
--provenance_folderpath "./$SNAKEMAKE_RESULT_FILE"

- name: Run plotting script
shell: bash -l {0}
run: |
python benchmarks/linear-elastic-plate-with-hole/plot_provenance.py ./metadata4ing_provenance
python benchmarks/linear-elastic-plate-with-hole/plot_provenance.py \
--provenance_folderpath "./$SNAKEMAKE_RESULT_FILE" \
--output_file $PROVENANACE_FILE_NAME

- name: Upload PDF plot as artifact
- name: Upload snakemake results file as artifact
uses: actions/upload-artifact@v4
with:
name: element-size-vs-stress-plot
path: element_size_vs_stress.pdf
path: ${{ env.PROVENANACE_FILE_NAME }}

- name: Re-zip snakemake result folder
run: |
cd "./${SNAKEMAKE_RESULT_FILE}"
zip -r "../${SNAKEMAKE_RESULT_FILE}.zip" .


- name: Upload RoCrate Zip file onto RoHub
shell: bash -l {0}
run: |
python benchmarks/common/upload_provenance.py \
--provenance_folderpath "./${SNAKEMAKE_RESULT_FILE}.zip" \
--benchmark_name "linear-elastic-plate-with-hole" \
--username "${{ secrets.ROHUB_USERNAME }}" \
--password "${{ secrets.ROHUB_PASSWORD }}"
276 changes: 276 additions & 0 deletions benchmarks/common/provenance.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,276 @@
import os
from rdflib import Graph
import matplotlib.pyplot as plt
from collections import defaultdict
from typing import List, Tuple
import re
from rocrate_validator import services, models


class ProvenanceAnalyzer:
"""
A class to analyze, validate, and visualize provenance data from RO-Crate metadata files.

This class loads RO-Crate JSON-LD files, builds dynamic SPARQL queries to extract
workflow metadata about methods, parameters, and metrics, and provides visualization
capabilities. It also validates RO-Crate files against the RO-Crate 1.1 profile.

Attributes:
provenance_folderpath (str): The directory path containing the RO-Crate folder.
provenance_filename (str): The name of the provenance file (default: 'ro-crate-metadata.json').
"""

def __init__(
self,
provenance_folderpath: str = None,
provenance_filename: str = "ro-crate-metadata.json",
):
"""
Initialize the ProvenanceAnalyzer.

Args:
provenance_folderpath (str, optional): Path to the folder containing the RO-Crate.
Defaults to None.
provenance_filename (str, optional): Name of the RO-Crate metadata file.
Defaults to "ro-crate-metadata.json".
"""
self.provenance_folderpath = provenance_folderpath
self.provenance_filename = provenance_filename

def load_graph_from_file(self) -> Graph:
"""
Loads the RO-Crate metadata file into an rdflib Graph object.

Returns:
rdflib.Graph: The loaded RDF graph containing the provenance data.

Raises:
Exception: If the file cannot be parsed as JSON-LD.
"""
try:
g = Graph()
# The parse method handles file loading and format parsing
g.parse(
os.path.join(self.provenance_folderpath, self.provenance_filename),
format="json-ld",
)
return g
except Exception as e:
print(f"Failed to parse {self.provenance_filename}: {e}")
raise # Re-raise to ensure error is handled

def sanitize_variable_name(self, name: str) -> str:
"""
Convert a string into a valid SPARQL variable name.

Replaces invalid characters with underscores and ensures the variable
name doesn't start with a digit.

Args:
name (str): The original string to convert.

Returns:
str: A sanitized variable name safe for use in SPARQL queries.
"""
# Replace invalid chars with underscore
var = re.sub(r"[^a-zA-Z0-9_]", "_", name)
# Ensure it doesn't start with a digit
if re.match(r"^\d", var):
var = "_" + var
return var

def build_dynamic_query(self, parameters, metrics, tools=None, named_graph=None):
"""
Generate a dynamic SPARQL query to extract m4i:Method instances with specified
parameters and metrics.

The query extracts methods along with their associated parameters (via m4i:hasParameter),
metrics (via m4i:investigates), and the tools that implement them (via ssn:implementedBy).

Args:
parameters (list): List of parameter names to query (matched via rdfs:label).
metrics (list): List of metric names to query (matched via rdfs:label).
tools (list, optional): List of tool name substrings to filter results.
Case-insensitive matching. Defaults to None.
named_graph (str, optional): URI of a named graph to query within.
If None, queries the default graph. Defaults to None.

Returns:
str: A complete SPARQL query string ready to execute.
"""

all_names = parameters + metrics
# Map original names to safe SPARQL variable names
var_map = {name: self.sanitize_variable_name(name) for name in all_names}

# Build SELECT variables
select_vars = " ".join(f"?{var_map[name]}" for name in all_names)

# Build method→parameter and method→metric links
method_links = (
"\n ".join(
f"?method m4i:hasParameter ?param_{var_map[p]} ." for p in parameters
)
+ "\n"
+ "\n ".join(
f"?method m4i:investigates ?param_{var_map[m]} ." for m in metrics
)
)

# Build parameter and metric blocks
value_blocks = "\n".join(
f'?param_{var_map[name]} a schema:PropertyValue ;\n rdfs:label "{name}" ;\n schema:value ?{var_map[name]} .\n'
for name in all_names
)

# Tool block with optional filter
tool_block = "?method m4i:implementedByTool ?tool .\n?tool a schema:SoftwareApplication ;\n rdfs:label ?tool_name .\n"
if tools:
filter_cond = " || ".join(
f'CONTAINS(LCASE(?tool_name), "{t.lower()}")' for t in tools
)
tool_block += f"\nFILTER({filter_cond}) .\n"

# Build the inner query
inner_query = f"""
?method a m4i:Method .
{method_links}
{value_blocks}
{tool_block}
""".strip()

# Wrap in GRAPH if named_graph is provided
where_block = (
f"GRAPH <{named_graph}> {{\n{inner_query}\n}}"
if named_graph
else inner_query
)

# Final query
query = f"""
PREFIX schema: <http://schema.org/>
PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
PREFIX m4i: <http://w3id.org/nfdi4ing/metadata4ing#>
PREFIX ssn: <http://www.w3.org/ns/ssn/>

SELECT {select_vars} ?tool_name
WHERE {{
{where_block}
}}
""".strip()

return query

def run_query_on_graph(
self, graph: Graph, query: str
) -> Tuple[List[str], List[List]]:
"""
Executes a SPARQL query on the provided RDF graph.

Args:
graph (rdflib.Graph): The RDF graph to query.
query (str): The SPARQL query string to execute.

Returns:
rdflib.plugins.sparql.processor.SPARQLResult: The query results object
from rdflib.
"""
return graph.query(query)

def plot_provenance_graph(
self,
data: List[List],
x_axis_label: str,
y_axis_label: str,
x_axis_index: str,
y_axis_index: str,
group_by_index: str,
title: str,
output_file: str = None,
figsize: Tuple[int, int] = (12, 5),
):
"""
Generates a scatter/line plot from the extracted provenance data.

The plot displays data points grouped by a specified column, with each group
shown as a separate line series. The x-axis uses a logarithmic scale.

Args:
data (List[List]): The table data to plot, where each row is a list of values.
x_axis_label (str): Label for the x-axis.
y_axis_label (str): Label for the y-axis.
x_axis_index (int or str): Index or key for the x-axis values in each row.
y_axis_index (int or str): Index or key for the y-axis values in each row.
group_by_index (int or str): Index or key for the grouping variable (used for legend).
title (str): Title of the plot.
output_file (str, optional): Path where the plot will be saved as an image.
If None, displays the plot. Defaults to None.
figsize (Tuple[int, int], optional): Figure dimensions (width, height).
Defaults to (12, 5).
"""

grouped_data = defaultdict(list)
x_tick_set = set()

for row in data:
x = float(row[x_axis_index])
y = float(row[y_axis_index])
grouped_data[row[group_by_index]].append((x, y))
x_tick_set.add(x)

# Sort x-tick labels
x_ticks = sorted(x_tick_set)

plt.figure(figsize=figsize)
for grouped_title, values in grouped_data.items():
# Sort values by x-axis (element size) to ensure correct line plotting
values.sort()
x_vals, y_vals = zip(*values)
plt.plot(x_vals, y_vals, marker="o", linestyle="-", label=grouped_title)

plt.xlabel(x_axis_label)
plt.ylabel(y_axis_label)
plt.title(title)
plt.grid(True)
plt.legend()
plt.xscale("log")

# Set x-ticks to show original values
plt.xticks(ticks=x_ticks, labels=[str(x) for x in x_ticks], rotation=45)
plt.tight_layout()

if output_file:
plt.savefig(output_file)
print(f"Plot saved to: {output_file}")
else:
plt.show()

def validate_provenance(self):
"""
Validates the RO-Crate against the RO-Crate 1.1 profile.

Uses the rocrate-validator library to check if the RO-Crate metadata
conforms to the RO-Crate 1.1 specification with required severity level.

Raises:
AssertionError: If the RO-Crate has validation issues, with details
about each issue's severity and message.

Prints:
Success message if the RO-Crate is valid.
"""
settings = services.ValidationSettings(
rocrate_uri=self.provenance_folderpath,
profile_identifier="ro-crate-1.1",
requirement_severity=models.Severity.REQUIRED,
)

result = services.validate(settings)

assert not result.has_issues(), "RO-Crate is invalid!\n" + "\n".join(
f"Detected issue of severity {issue.severity.name} with check "
f'"{issue.check.identifier}": {issue.message}'
for issue in result.get_issues()
)

print("RO-Crate is valid!")
Loading