diff --git a/.github/workflows/run-benchmark.yml b/.github/workflows/run-benchmark.yml
index b401593..49661f1 100644
--- a/.github/workflows/run-benchmark.yml
+++ b/.github/workflows/run-benchmark.yml
@@ -14,13 +14,12 @@ on:
env:
CACHE_NUMBER: 1 # increase to reset cache manually
+ SNAKEMAKE_RESULT_FILE: metadata4ing_provenance
+ PROVENANACE_FILE_NAME: element_size_vs_max_mises_stress.pdf
jobs:
- tests:
+ run-simulation:
runs-on: ubuntu-latest
-
-
-
steps:
- name: checkout repo content
uses: actions/checkout@v2
@@ -52,8 +51,9 @@ jobs:
snakemake --use-conda --force --cores all \
--reporter metadata4ing \
--report-metadata4ing-paramscript parameter_extractor.py \
- --report-metadata4ing-filename metadata4ing_provenance
-
+ --report-metadata4ing-config metadata4ing.config \
+ --report-metadata4ing-filename $SNAKEMAKE_RESULT_FILE
+
- name: run_linear-elastic-plate-with-hole-benchmarks_nextflow
shell: bash -l {0}
run: |
@@ -65,7 +65,7 @@ jobs:
with:
name: snakemake_results_linear-elastic-plate-with-hole
path: |
- benchmarks/linear-elastic-plate-with-hole/metadata4ing_provenance.zip
+ benchmarks/linear-elastic-plate-with-hole/${{ env.SNAKEMAKE_RESULT_FILE }}.zip
- name: Archive Linear Elastic plate with a hole benchmark data for nextflow
uses: actions/upload-artifact@v4
@@ -76,7 +76,7 @@ jobs:
process-artifacts:
runs-on: ubuntu-latest
- needs: tests
+ needs: run-simulation
steps:
- name: Checkout repo content
uses: actions/checkout@v2
@@ -87,10 +87,10 @@ jobs:
name: snakemake_results_linear-elastic-plate-with-hole
path: ./artifact_files
- - name: Unzip metadata4ing_provenance.zip
+ - name: Unzip Snakemake Result File
run: |
- mkdir -p ./metadata4ing_provenance
- unzip -o ./artifact_files/metadata4ing_provenance.zip -d ./metadata4ing_provenance
+ mkdir -p ./$SNAKEMAKE_RESULT_FILE
+ unzip -o ./artifact_files/$SNAKEMAKE_RESULT_FILE.zip -d ./$SNAKEMAKE_RESULT_FILE
- name: Setup Mambaforge with postprocessing env
uses: conda-incubator/setup-miniconda@v3
@@ -99,14 +99,37 @@ jobs:
activate-environment: postprocessing
use-mamba: true
environment-file: benchmarks/linear-elastic-plate-with-hole/environment_postprocessing.yml
+
+ - name: Validate Snakemake Result File
+ shell: bash -l {0}
+ run: |
+ python benchmarks/common/validate_provenance.py \
+ --provenance_folderpath "./$SNAKEMAKE_RESULT_FILE"
- name: Run plotting script
shell: bash -l {0}
run: |
- python benchmarks/linear-elastic-plate-with-hole/plot_provenance.py ./metadata4ing_provenance
+ python benchmarks/linear-elastic-plate-with-hole/plot_provenance.py \
+ --provenance_folderpath "./$SNAKEMAKE_RESULT_FILE" \
+ --output_file $PROVENANACE_FILE_NAME
- - name: Upload PDF plot as artifact
+ - name: Upload snakemake results file as artifact
uses: actions/upload-artifact@v4
with:
name: element-size-vs-stress-plot
- path: element_size_vs_stress.pdf
\ No newline at end of file
+ path: ${{ env.PROVENANACE_FILE_NAME }}
+
+ - name: Re-zip snakemake result folder
+ run: |
+ cd "./${SNAKEMAKE_RESULT_FILE}"
+ zip -r "../${SNAKEMAKE_RESULT_FILE}.zip" .
+
+
+ - name: Upload RoCrate Zip file onto RoHub
+ shell: bash -l {0}
+ run: |
+ python benchmarks/common/upload_provenance.py \
+ --provenance_folderpath "./${SNAKEMAKE_RESULT_FILE}.zip" \
+ --benchmark_name "linear-elastic-plate-with-hole" \
+ --username "${{ secrets.ROHUB_USERNAME }}" \
+ --password "${{ secrets.ROHUB_PASSWORD }}"
\ No newline at end of file
diff --git a/benchmarks/common/provenance.py b/benchmarks/common/provenance.py
new file mode 100644
index 0000000..05a8a76
--- /dev/null
+++ b/benchmarks/common/provenance.py
@@ -0,0 +1,276 @@
+import os
+from rdflib import Graph
+import matplotlib.pyplot as plt
+from collections import defaultdict
+from typing import List, Tuple
+import re
+from rocrate_validator import services, models
+
+
+class ProvenanceAnalyzer:
+ """
+ A class to analyze, validate, and visualize provenance data from RO-Crate metadata files.
+
+ This class loads RO-Crate JSON-LD files, builds dynamic SPARQL queries to extract
+ workflow metadata about methods, parameters, and metrics, and provides visualization
+ capabilities. It also validates RO-Crate files against the RO-Crate 1.1 profile.
+
+ Attributes:
+ provenance_folderpath (str): The directory path containing the RO-Crate folder.
+ provenance_filename (str): The name of the provenance file (default: 'ro-crate-metadata.json').
+ """
+
+ def __init__(
+ self,
+ provenance_folderpath: str = None,
+ provenance_filename: str = "ro-crate-metadata.json",
+ ):
+ """
+ Initialize the ProvenanceAnalyzer.
+
+ Args:
+ provenance_folderpath (str, optional): Path to the folder containing the RO-Crate.
+ Defaults to None.
+ provenance_filename (str, optional): Name of the RO-Crate metadata file.
+ Defaults to "ro-crate-metadata.json".
+ """
+ self.provenance_folderpath = provenance_folderpath
+ self.provenance_filename = provenance_filename
+
+ def load_graph_from_file(self) -> Graph:
+ """
+ Loads the RO-Crate metadata file into an rdflib Graph object.
+
+ Returns:
+ rdflib.Graph: The loaded RDF graph containing the provenance data.
+
+ Raises:
+ Exception: If the file cannot be parsed as JSON-LD.
+ """
+ try:
+ g = Graph()
+ # The parse method handles file loading and format parsing
+ g.parse(
+ os.path.join(self.provenance_folderpath, self.provenance_filename),
+ format="json-ld",
+ )
+ return g
+ except Exception as e:
+ print(f"Failed to parse {self.provenance_filename}: {e}")
+ raise # Re-raise to ensure error is handled
+
+ def sanitize_variable_name(self, name: str) -> str:
+ """
+ Convert a string into a valid SPARQL variable name.
+
+ Replaces invalid characters with underscores and ensures the variable
+ name doesn't start with a digit.
+
+ Args:
+ name (str): The original string to convert.
+
+ Returns:
+ str: A sanitized variable name safe for use in SPARQL queries.
+ """
+ # Replace invalid chars with underscore
+ var = re.sub(r"[^a-zA-Z0-9_]", "_", name)
+ # Ensure it doesn't start with a digit
+ if re.match(r"^\d", var):
+ var = "_" + var
+ return var
+
+ def build_dynamic_query(self, parameters, metrics, tools=None, named_graph=None):
+ """
+ Generate a dynamic SPARQL query to extract m4i:Method instances with specified
+ parameters and metrics.
+
+ The query extracts methods along with their associated parameters (via m4i:hasParameter),
+ metrics (via m4i:investigates), and the tools that implement them (via ssn:implementedBy).
+
+ Args:
+ parameters (list): List of parameter names to query (matched via rdfs:label).
+ metrics (list): List of metric names to query (matched via rdfs:label).
+ tools (list, optional): List of tool name substrings to filter results.
+ Case-insensitive matching. Defaults to None.
+ named_graph (str, optional): URI of a named graph to query within.
+ If None, queries the default graph. Defaults to None.
+
+ Returns:
+ str: A complete SPARQL query string ready to execute.
+ """
+
+ all_names = parameters + metrics
+ # Map original names to safe SPARQL variable names
+ var_map = {name: self.sanitize_variable_name(name) for name in all_names}
+
+ # Build SELECT variables
+ select_vars = " ".join(f"?{var_map[name]}" for name in all_names)
+
+ # Build method→parameter and method→metric links
+ method_links = (
+ "\n ".join(
+ f"?method m4i:hasParameter ?param_{var_map[p]} ." for p in parameters
+ )
+ + "\n"
+ + "\n ".join(
+ f"?method m4i:investigates ?param_{var_map[m]} ." for m in metrics
+ )
+ )
+
+ # Build parameter and metric blocks
+ value_blocks = "\n".join(
+ f'?param_{var_map[name]} a schema:PropertyValue ;\n rdfs:label "{name}" ;\n schema:value ?{var_map[name]} .\n'
+ for name in all_names
+ )
+
+ # Tool block with optional filter
+ tool_block = "?method m4i:implementedByTool ?tool .\n?tool a schema:SoftwareApplication ;\n rdfs:label ?tool_name .\n"
+ if tools:
+ filter_cond = " || ".join(
+ f'CONTAINS(LCASE(?tool_name), "{t.lower()}")' for t in tools
+ )
+ tool_block += f"\nFILTER({filter_cond}) .\n"
+
+ # Build the inner query
+ inner_query = f"""
+ ?method a m4i:Method .
+ {method_links}
+ {value_blocks}
+ {tool_block}
+ """.strip()
+
+ # Wrap in GRAPH if named_graph is provided
+ where_block = (
+ f"GRAPH <{named_graph}> {{\n{inner_query}\n}}"
+ if named_graph
+ else inner_query
+ )
+
+ # Final query
+ query = f"""
+ PREFIX schema:
+ PREFIX rdfs:
+ PREFIX m4i:
+ PREFIX ssn:
+
+ SELECT {select_vars} ?tool_name
+ WHERE {{
+ {where_block}
+ }}
+ """.strip()
+
+ return query
+
+ def run_query_on_graph(
+ self, graph: Graph, query: str
+ ) -> Tuple[List[str], List[List]]:
+ """
+ Executes a SPARQL query on the provided RDF graph.
+
+ Args:
+ graph (rdflib.Graph): The RDF graph to query.
+ query (str): The SPARQL query string to execute.
+
+ Returns:
+ rdflib.plugins.sparql.processor.SPARQLResult: The query results object
+ from rdflib.
+ """
+ return graph.query(query)
+
+ def plot_provenance_graph(
+ self,
+ data: List[List],
+ x_axis_label: str,
+ y_axis_label: str,
+ x_axis_index: str,
+ y_axis_index: str,
+ group_by_index: str,
+ title: str,
+ output_file: str = None,
+ figsize: Tuple[int, int] = (12, 5),
+ ):
+ """
+ Generates a scatter/line plot from the extracted provenance data.
+
+ The plot displays data points grouped by a specified column, with each group
+ shown as a separate line series. The x-axis uses a logarithmic scale.
+
+ Args:
+ data (List[List]): The table data to plot, where each row is a list of values.
+ x_axis_label (str): Label for the x-axis.
+ y_axis_label (str): Label for the y-axis.
+ x_axis_index (int or str): Index or key for the x-axis values in each row.
+ y_axis_index (int or str): Index or key for the y-axis values in each row.
+ group_by_index (int or str): Index or key for the grouping variable (used for legend).
+ title (str): Title of the plot.
+ output_file (str, optional): Path where the plot will be saved as an image.
+ If None, displays the plot. Defaults to None.
+ figsize (Tuple[int, int], optional): Figure dimensions (width, height).
+ Defaults to (12, 5).
+ """
+
+ grouped_data = defaultdict(list)
+ x_tick_set = set()
+
+ for row in data:
+ x = float(row[x_axis_index])
+ y = float(row[y_axis_index])
+ grouped_data[row[group_by_index]].append((x, y))
+ x_tick_set.add(x)
+
+ # Sort x-tick labels
+ x_ticks = sorted(x_tick_set)
+
+ plt.figure(figsize=figsize)
+ for grouped_title, values in grouped_data.items():
+ # Sort values by x-axis (element size) to ensure correct line plotting
+ values.sort()
+ x_vals, y_vals = zip(*values)
+ plt.plot(x_vals, y_vals, marker="o", linestyle="-", label=grouped_title)
+
+ plt.xlabel(x_axis_label)
+ plt.ylabel(y_axis_label)
+ plt.title(title)
+ plt.grid(True)
+ plt.legend()
+ plt.xscale("log")
+
+ # Set x-ticks to show original values
+ plt.xticks(ticks=x_ticks, labels=[str(x) for x in x_ticks], rotation=45)
+ plt.tight_layout()
+
+ if output_file:
+ plt.savefig(output_file)
+ print(f"Plot saved to: {output_file}")
+ else:
+ plt.show()
+
+ def validate_provenance(self):
+ """
+ Validates the RO-Crate against the RO-Crate 1.1 profile.
+
+ Uses the rocrate-validator library to check if the RO-Crate metadata
+ conforms to the RO-Crate 1.1 specification with required severity level.
+
+ Raises:
+ AssertionError: If the RO-Crate has validation issues, with details
+ about each issue's severity and message.
+
+ Prints:
+ Success message if the RO-Crate is valid.
+ """
+ settings = services.ValidationSettings(
+ rocrate_uri=self.provenance_folderpath,
+ profile_identifier="ro-crate-1.1",
+ requirement_severity=models.Severity.REQUIRED,
+ )
+
+ result = services.validate(settings)
+
+ assert not result.has_issues(), "RO-Crate is invalid!\n" + "\n".join(
+ f"Detected issue of severity {issue.severity.name} with check "
+ f'"{issue.check.identifier}": {issue.message}'
+ for issue in result.get_issues()
+ )
+
+ print("RO-Crate is valid!")
diff --git a/benchmarks/common/upload_provenance.py b/benchmarks/common/upload_provenance.py
new file mode 100644
index 0000000..4ef6d82
--- /dev/null
+++ b/benchmarks/common/upload_provenance.py
@@ -0,0 +1,203 @@
+"""
+RoHub Provenance Upload Module
+
+This module handles the upload of research object provenance data to RoHub,
+a platform for research object management and sharing. It processes RO-Crate
+metadata artifacts and manages the complete upload workflow including:
+- Authentication with RoHub
+- Deletion of existing research objects
+- Upload of new research objects from zip files
+- Polling for upload completion
+- Adding semantic annotations to uploaded objects
+
+The module supports both production and development environments of RoHub.
+"""
+
+import argparse
+import rohub
+import time
+
+
+def parse_args():
+ """
+ Parse command-line arguments for RoHub provenance upload.
+
+ Returns:
+ argparse.Namespace: Parsed command-line arguments containing:
+ - provenance_folderpath (str): Path to the zip file containing
+ provenance data (RO-Crate)
+ - username (str): RoHub authentication username
+ - password (str): RoHub authentication password
+ """
+ parser = argparse.ArgumentParser(
+ description="Process ro-crate-metadata.json artifacts and display simulation results."
+ )
+ parser.add_argument(
+ "--provenance_folderpath",
+ type=str,
+ required=True,
+ help="Path to the folder containing provenance data",
+ )
+ parser.add_argument(
+ "--benchmark_name",
+ type=str,
+ required=True,
+ help="Name of the benchmark to be uploaded",
+ )
+ parser.add_argument(
+ "--username",
+ type=str,
+ required=True,
+ help="Username for RoHub",
+ )
+ parser.add_argument(
+ "--password",
+ type=str,
+ required=True,
+ help="Password for RoHub",
+ )
+ return parser.parse_args()
+
+
+def run(args):
+ """
+ Execute the complete RoHub upload workflow.
+
+ This function performs the following operations:
+ 1. Configures RoHub settings (API endpoints, authentication)
+ 2. Authenticates with RoHub using provided credentials
+ 3. Deletes all existing research objects owned by the user
+ 4. Uploads the new research object from the specified zip file
+ 5. Polls the upload job status until completion or timeout
+ 6. Adds semantic annotations to the successfully uploaded object
+
+ Args:
+ args (argparse.Namespace): Parsed command-line arguments containing:
+ - provenance_folderpath: Path to the provenance zip file
+ - username: RoHub username
+ - password: RoHub password
+
+ Raises:
+ Exception: If authentication fails
+ Exception: If upload fails
+ Exception: If deletion of existing ROs fails
+
+ Configuration:
+ USE_DEVELOPMENT_VERSION (bool): When True, uses RoHub development server.
+ Set to False for production environment.
+
+ Timeout Settings:
+ - Upload timeout: 5 minutes (300 seconds)
+ - Poll interval: 10 seconds between status checks
+ - Sleep time: 10 seconds between API calls
+
+ Annotations:
+ The function adds a predefined annotation linking the research object
+ to the NFDI4Ing Model Validation Platform benchmark.
+ """
+ # Configure API sleep time to avoid rate limiting
+ rohub.settings.SLEEP_TIME = 10
+
+ # Toggle between development and production environments
+ USE_DEVELOPMENT_VERSION = True
+
+ if USE_DEVELOPMENT_VERSION:
+ # Development server configuration
+ rohub.settings.API_URL = "https://rohub2020-rohub.apps.paas-dev.psnc.pl/api/"
+ rohub.settings.KEYCLOAK_CLIENT_ID = "rohub2020-cli"
+ rohub.settings.KEYCLOAK_CLIENT_SECRET = "714617a7-87bc-4a88-8682-5f9c2f60337d"
+ rohub.settings.KEYCLOAK_URL = "https://keycloak-dev.apps.paas-dev.psnc.pl/auth/realms/rohub/protocol/openid-connect/token"
+ rohub.settings.SPARQL_ENDPOINT = (
+ "https://rohub2020-api-virtuoso-route-rohub.apps.paas-dev.psnc.pl/sparql/"
+ )
+
+ # Authenticate with RoHub
+ rohub.login(args.username, args.password)
+
+ # Retrieve list of user's existing research objects
+ my_ros = rohub.list_my_ros()
+
+ # Delete all existing research objects to ensure clean upload
+ try:
+ for _, row in my_ros.iterrows():
+ rohub.ros_delete(row["identifier"])
+ except Exception as error:
+ print(f"Error on Deleting RoHub: {error}")
+
+ # Initialize tracking variables for upload
+ identifier = "" # Job identifier for status polling
+ uuid = "" # UUID of the uploaded research object
+
+ # Upload the research object zip file
+ try:
+ upload_result = rohub.ros_upload(path_to_zip=args.provenance_folderpath)
+ identifier = upload_result["identifier"]
+ uuid = upload_result["results"].rstrip("/").split("/")[-1]
+ except Exception as error:
+ print(f"Error on Upload RoHub: {error}")
+
+ # Configure polling parameters
+ timeout_seconds = 5 * 60 # 5 minutes maximum wait time
+ poll_interval = 10 # Check status every 10 seconds
+ start_time = time.time()
+
+ # Poll upload job status until completion or timeout
+ while True:
+ success_result = rohub.is_job_success(job_id=identifier)
+ status = success_result.get("status", "UNKNOWN")
+
+ if status == "SUCCESS":
+ print(f"Upload successful: {success_result}")
+ break
+ elif time.time() - start_time > timeout_seconds:
+ print(f"Upload did not succeed within 5 minutes. Last status: {status}")
+ break
+ else:
+ print(f"Current status: {status}, waiting {poll_interval}s...")
+ time.sleep(poll_interval)
+
+ # Define semantic annotation linking to the validation platform benchmark
+ ANNOTATION_PREDICATE = "http://w3id.org/nfdi4ing/metadata4ing#investigates"
+ ANNOTATION_OBJECT = f"https://github.com/BAMresearch/NFDI4IngModelValidationPlatform/tree/main/benchmarks/{args.benchmark_name}"
+
+ # Add semantic annotations if upload was successful
+ if uuid != "":
+ _RO = rohub.ros_load(uuid)
+ annotation_json = [
+ {"property": ANNOTATION_PREDICATE, "value": ANNOTATION_OBJECT}
+ ]
+ add_annotations_result = _RO.add_annotations(
+ body_specification_json=annotation_json
+ )
+ print(add_annotations_result)
+
+
+def main():
+ """
+ Main entry point for the RoHub provenance upload script.
+
+ Parses command-line arguments and initiates the upload workflow to RoHub.
+ This function is called when the script is executed directly.
+
+ Usage:
+ python upload_provenance.py \
+ --provenance_folderpath /path/to/ro-crate.zip \
+ --username user@example.com \
+ --password your_password
+
+ Note:
+ - Ensure the provenance file is a valid zip containing RO-Crate metadata
+ - Valid RoHub credentials are required for authentication
+ - The script will delete all existing research objects before uploading
+ - Upload process may take up to 5 minutes
+
+ Exits:
+ The script will exit with a non-zero status code if authentication
+ or upload fails, or if required arguments are not provided.
+ """
+ args = parse_args()
+ run(args)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/benchmarks/common/validate_provenance.py b/benchmarks/common/validate_provenance.py
new file mode 100644
index 0000000..57c2b27
--- /dev/null
+++ b/benchmarks/common/validate_provenance.py
@@ -0,0 +1,86 @@
+"""
+Provenance Validation Module
+
+This module provides functionality to validate research object provenance data
+against defined profiles. It processes RO-Crate metadata to ensure compliance
+with provenance standards.
+
+"""
+
+import argparse
+from provenance import ProvenanceAnalyzer
+
+
+def parse_args():
+ """
+ Parse command-line arguments for provenance validation.
+
+ Returns:
+ argparse.Namespace: Parsed command-line arguments containing:
+ - provenance_folderpath (str): Path to the folder containing provenance data
+ - provenance_filename (str): Name of the provenance metadata file
+ (default: 'ro-crate-metadata.json')
+ """
+ parser = argparse.ArgumentParser(
+ description="Process research object zip to validate against profile."
+ )
+ parser.add_argument(
+ "--provenance_folderpath",
+ type=str,
+ required=True,
+ help="Path to the folder containing provenance data",
+ )
+ parser.add_argument(
+ "--provenance_filename",
+ type=str,
+ default="ro-crate-metadata.json",
+ help="File name for the provenance graph",
+ )
+ return parser.parse_args()
+
+
+def run(args):
+ """
+ Execute the provenance validation process.
+
+ Creates a ProvenanceAnalyzer instance with the provided arguments and
+ runs the validation against the configured profile.
+
+ Args:
+ args (argparse.Namespace): Parsed command-line arguments containing
+ provenance folder path and filename
+
+ Raises:
+ FileNotFoundError: If the specified provenance file doesn't exist
+ ValidationError: If the provenance data fails validation checks
+ """
+ analyzer = ProvenanceAnalyzer(
+ provenance_folderpath=args.provenance_folderpath,
+ provenance_filename=args.provenance_filename,
+ )
+
+ analyzer.validate_provenance()
+
+
+def main():
+ """
+ Main entry point for the provenance validation script.
+
+ Parses command-line arguments and initiates the validation process.
+ This function is called when the script is executed directly.
+
+ Usage:
+ python validate_provenance.py --provenance_folderpath /path/to/folder
+ python validate_provenance.py --provenance_folderpath /path/to/folder \
+ --provenance_filename custom-metadata.json
+
+ Exits:
+ The script will exit with a non-zero status code if validation fails
+ or if required arguments are not provided.
+ """
+ args = parse_args()
+ run(args)
+
+
+if __name__ == "__main__":
+ main()
\ No newline at end of file
diff --git a/benchmarks/linear-elastic-plate-with-hole/environment_postprocessing.yml b/benchmarks/linear-elastic-plate-with-hole/environment_postprocessing.yml
index 43a2a8f..0ee4a2d 100644
--- a/benchmarks/linear-elastic-plate-with-hole/environment_postprocessing.yml
+++ b/benchmarks/linear-elastic-plate-with-hole/environment_postprocessing.yml
@@ -11,3 +11,9 @@ dependencies:
- pyvista
- rdflib
- matplotlib
+ - pandas
+ - numpy
+ - pip
+ - pip:
+ - roc-validator
+ - rohub
\ No newline at end of file
diff --git a/benchmarks/linear-elastic-plate-with-hole/fenics/environment_simulation.yml b/benchmarks/linear-elastic-plate-with-hole/fenics/environment_simulation.yml
index b5280ab..940b20e 100644
--- a/benchmarks/linear-elastic-plate-with-hole/fenics/environment_simulation.yml
+++ b/benchmarks/linear-elastic-plate-with-hole/fenics/environment_simulation.yml
@@ -7,7 +7,7 @@ channel_priority: strict
dependencies:
- python=3.12
- fenics-dolfinx=0.9.*
- - libadios2=2.10.1
+ - mpich
- petsc4py
- pint
- python-gmsh
diff --git a/benchmarks/linear-elastic-plate-with-hole/metadata4ing.config b/benchmarks/linear-elastic-plate-with-hole/metadata4ing.config
new file mode 100644
index 0000000..1881df1
--- /dev/null
+++ b/benchmarks/linear-elastic-plate-with-hole/metadata4ing.config
@@ -0,0 +1,10 @@
+{
+ "researchProblem": {
+ "description": "This benchmark describes a linear-elastic plate with a hole test case used in the NFDI-4Ing Model Validation Platform for comparing and validating computational modelling methods."
+ },
+ "rocrate": {
+ "name": "NFDI4Ing Provenance",
+ "description": "Benchmark for linear-elastic plate with a hole",
+ "license": "https://opensource.org/licenses/MIT"
+ }
+}
\ No newline at end of file
diff --git a/benchmarks/linear-elastic-plate-with-hole/plot_provenance.py b/benchmarks/linear-elastic-plate-with-hole/plot_provenance.py
index 8089eb1..40e3afa 100644
--- a/benchmarks/linear-elastic-plate-with-hole/plot_provenance.py
+++ b/benchmarks/linear-elastic-plate-with-hole/plot_provenance.py
@@ -1,153 +1,423 @@
-import os
import argparse
-from rdflib import Graph
-import matplotlib.pyplot as plt
-from collections import defaultdict
+import sys
+from pathlib import Path
from generate_config import workflow_config
+import json
+import os
+import pandas as pd
+import numpy as np
-def load_graphs(base_dir):
+
+def parse_args():
"""
- Walk through the base_dir and load all JSON-LD files into rdflib Graphs.
+ Parse command-line arguments for the provenance processing script.
+
+ Returns:
+ argparse.Namespace: Parsed arguments containing:
+ - provenance_folderpath: Path to the folder with RO-Crate data
+ - provenance_filename: Name of the RO-Crate metadata file
+ - output_file: Path for the final visualization output
"""
- graph_list = []
- for root, _, files in os.walk(base_dir):
- for file in files:
- if file.endswith(".jsonld"):
- file_path = os.path.join(root, file)
- try:
- g = Graph()
- g.parse(file_path, format='json-ld')
- graph_list.append(g)
- print(f"✅ Parsed: {file_path}")
- except Exception as e:
- print(f"❌ Failed to parse {file_path}: {e}")
- print(f"\nTotal graphs loaded: {len(graph_list)}")
- return graph_list
+ parser = argparse.ArgumentParser(
+ description="Process ro-crate-metadata.json artifacts and display simulation results."
+ )
+ parser.add_argument(
+ "--provenance_folderpath",
+ type=str,
+ required=True,
+ help="Path to the folder containing provenance data",
+ )
+ parser.add_argument(
+ "--provenance_filename",
+ type=str,
+ default="ro-crate-metadata.json",
+ help="File name for the provenance graph",
+ )
+ parser.add_argument(
+ "--output_file",
+ type=str,
+ required=True,
+ help="Final visualization file",
+ )
+ return parser.parse_args()
-def query_and_build_table(graph_list):
+def sparql_result_to_dataframe(results):
"""
- Run SPARQL query on graphs and build a table.
- Returns headers and table_data.
+ Convert SPARQL query results into a pandas DataFrame.
+
+ Extracts variable bindings from each result row using asdict() and converts
+ RDF values to Python native types using toPython().
+
+ Args:
+ results (rdflib.plugins.sparql.processor.SPARQLResult): SPARQL query results
+ from rdflib.
+
+ Returns:
+ pd.DataFrame: DataFrame where each row represents a query result and columns
+ correspond to SPARQL variables.
"""
- tools = workflow_config["tools"]
- filter_conditions = " || ".join(
- f'CONTAINS(LCASE(?tool_name), "{tool.lower()}")' for tool in tools
+ rows = []
+
+ for row in results:
+ row_dict = {k: v.toPython() for k, v in row.asdict().items()}
+ rows.append(row_dict)
+
+ return pd.DataFrame(rows)
+
+
+def apply_custom_filters(data: pd.DataFrame) -> pd.DataFrame:
+ """
+ Filter provenance data to include only first-order linear elements.
+
+ Filters rows where element_degree = 1 and element_order = 1, then removes
+ these filtering columns from the result.
+
+ Args:
+ data (pd.DataFrame): Input DataFrame containing element_degree and
+ element_order columns.
+
+ Returns:
+ pd.DataFrame: Filtered DataFrame with element_degree and element_order
+ columns removed and index reset.
+ """
+ filtered_df = data[(data["element_degree"] == 1) & (data["element_order"] == 1)]
+
+ return filtered_df.drop(columns=["element_degree", "element_order"]).reset_index(
+ drop=True
)
- query = f"""
- PREFIX cr:
- PREFIX sio:
-
- SELECT DISTINCT ?value_element_size ?value_max_von_mises_stress_gauss_points ?tool_name
- WHERE {{
- ?processing_step a schema:Action ;
- m4i:hasParameter ?element_size ;
- m4i:hasParameter ?element_order ;
- m4i:hasParameter ?element_degree ;
- m4i:investigates ?max_von_mises_stress_gauss_points ;
- schema:instrument ?tool .
-
- ?max_von_mises_stress_gauss_points a schema:PropertyValue ;
- rdfs:label "max_von_mises_stress_nodes" ;
- schema:value ?value_max_von_mises_stress_gauss_points .
-
- ?element_order a schema:PropertyValue ;
- rdfs:label "element_order" ;
- schema:value 1 .
-
- ?element_degree a schema:PropertyValue ;
- rdfs:label "element_degree" ;
- schema:value 1 .
-
- ?element_size a schema:PropertyValue ;
- rdfs:label "element_size" ;
- schema:value ?value_element_size .
-
- ?tool a schema:SoftwareApplication ;
- rdfs:label ?tool_name .
-
- FILTER ({filter_conditions})
- }}
- """
-
- headers = [
- "element-size",
- "max-mises-stress",
- "Tool Name"
- ]
-
- table_data = []
-
- for g in graph_list:
- results = g.query(query)
- for row in results:
- value_element_size = row.value_element_size
- value_max_von_mises_stress_gauss_points = row.value_max_von_mises_stress_gauss_points
- tool_name = row.tool_name
- table_data.append(
- [
- value_element_size,
- value_max_von_mises_stress_gauss_points,
- tool_name,
- ]
- )
-
- # Sort by element-size
- sort_key = headers.index("element-size")
- table_data.sort(key=lambda x: x[sort_key])
-
- return headers, table_data
-
-
-def plot_element_size_vs_stress(headers, table_data, output_file="element_size_vs_stress.pdf"):
- """Plots element-size vs max-mises-stress grouped by tool and saves as PDF."""
-
- idx_element_size = headers.index("element-size")
- idx_stress = headers.index("max-mises-stress")
- idx_tool = headers.index("Tool Name")
-
- grouped_data = defaultdict(list)
- x_tick_set = set()
-
- for row in table_data:
- tool = row[idx_tool]
- x = float(row[idx_element_size])
- y = float(row[idx_stress])
- grouped_data[tool].append((x, y))
- x_tick_set.add(x)
-
- # Sort x-tick labels
- x_ticks = sorted(x_tick_set)
-
- plt.figure(figsize=(12, 5))
- for tool, values in grouped_data.items():
- values.sort()
- x_vals, y_vals = zip(*values)
- plt.plot(x_vals, y_vals, marker='o', linestyle='-', label=tool)
-
- plt.xlabel("element-size")
- plt.ylabel("max-mises-stress")
- plt.title("element-size vs max-mises-stress by Tool\n(element-order = 1 , element-degree = 1)")
- plt.legend(title="Tool Name")
- plt.grid(True)
-
- # Use logarithmic scale for x-axis
- plt.xscale('log')
-
- # Set x-ticks to show original values
- plt.xticks(ticks=x_ticks, labels=[str(x) for x in x_ticks], rotation=45)
- plt.tight_layout()
-
- # Save to PDF instead of showing
- plt.savefig(output_file)
- print(f"Plot saved as {output_file}")
-if __name__ == "__main__":
- parser = argparse.ArgumentParser(description="Process JSON-LD artifacts and display simulation results.")
- parser.add_argument("artifact_folder", type=str, help="Path to the folder containing unzipped artifacts")
- args = parser.parse_args()
+def summary_file_to_dataframe(analyzer, summary_path, parameters, metrics):
+ """
+ Load benchmark data from a summary.json file into a DataFrame.
+
+ Handles both dictionary-style parameter/metric values (with 'value' key) and
+ direct scalar values. Converts parameter names from underscore to hyphen format
+ for JSON lookup.
+
+ Args:
+ summary_path (str): Path to the summary.json file.
+ parameters (list): List of parameter names to extract.
+ metrics (list): List of metric names to extract.
+
+ Returns:
+ pd.DataFrame: DataFrame with columns for each parameter and metric.
+ """
+ with open(summary_path, "r") as f:
+ data = json.load(f)
+
+ records = []
+ for entry in data:
+ record = {}
+
+ for p in parameters:
+ param_value = entry["parameters"][p]
+ sanitized_param_name = analyzer.sanitize_variable_name(p)
+ if isinstance(param_value, dict):
+ record[sanitized_param_name] = param_value.get("value")
+ else:
+ record[sanitized_param_name] = param_value
+
+ for m in metrics:
+ metric_value = entry["metrics"][m]
+ sanitized_metric_name = analyzer.sanitize_variable_name(m)
+ if isinstance(metric_value, dict):
+ record[sanitized_metric_name] = metric_value.get("value")
+ else:
+ record[sanitized_metric_name] = metric_value
+
+ records.append(record)
+
+ return pd.DataFrame(records)
+
+
+def compare_dataframes(df1: pd.DataFrame, df2: pd.DataFrame):
+ """
+ Compare two DataFrames for identical content regardless of row order.
+
+ Sorts both DataFrames by all columns, then checks for equality. If differences
+ are found, prints rows that appear in one DataFrame but not the other.
+
+ Args:
+ df1 (pd.DataFrame): First DataFrame to compare.
+ df2 (pd.DataFrame): Second DataFrame to compare.
+
+ Returns:
+ bool: True if DataFrames contain identical data, False otherwise.
+
+ Raises:
+ ValueError: If the DataFrames have different columns.
+
+ Prints:
+ Rows that are present in one DataFrame but missing in the other,
+ when differences are detected.
+ """
+ cols1 = sorted(df1.columns)
+ cols2 = sorted(df2.columns)
+
+ if cols1 != cols2:
+ raise ValueError("DataFrames have different columns.")
+
+ df1_sorted = df1[cols1].sort_values(by=cols1).reset_index(drop=True)
+ df2_sorted = df2[cols2].sort_values(by=cols2).reset_index(drop=True)
+
+ are_equal = df1_sorted.equals(df2_sorted)
+
+ if are_equal:
+ return True
+
+ missing_in_df2 = pd.concat([df1_sorted, df2_sorted, df2_sorted]).drop_duplicates(
+ keep=False
+ )
+
+ missing_in_df1 = pd.concat([df2_sorted, df1_sorted, df1_sorted]).drop_duplicates(
+ keep=False
+ )
+
+ print("Rows in df1 but not in df2:")
+ print(missing_in_df2 if not missing_in_df2.empty else "None")
+
+ print("\nRows in df2 but not in df1:")
+ print(missing_in_df1 if not missing_in_df1.empty else "None")
+
+ return False
+
+
+def load_and_query_graph(analyzer, parameters, metrics, tools):
+ """
+ Load the RO-Crate graph and execute a SPARQL query to extract provenance data.
+
+ Args:
+ analyzer (ProvenanceAnalyzer): Initialized analyzer instance.
+ parameters (list): List of parameter names to query.
+ metrics (list): List of metric names to query.
+ tools (list): List of tool names to filter by.
+
+ Returns:
+ pd.DataFrame: DataFrame containing the query results.
+
+ Raises:
+ AssertionError: If the query returns no data.
+ """
+ graph = analyzer.load_graph_from_file()
+ query = analyzer.build_dynamic_query(parameters, metrics, tools)
+ results = analyzer.run_query_on_graph(graph, query)
+
+ provenance_df = sparql_result_to_dataframe(results)
+ assert len(provenance_df), "No data found for the provenance query."
+
+ return provenance_df
+
- graphs = load_graphs(args.artifact_folder)
- headers, table_data = query_and_build_table(graphs)
- plot_element_size_vs_stress(headers, table_data, output_file="element_size_vs_stress.pdf")
\ No newline at end of file
+def validate_provenance_data_summary_file(
+ analyzer, provenance_df, parameters, metrics, tools, provenance_folderpath
+):
+ """
+ Validate provenance query results against ground truth data from summary.json files.
+
+ For each tool, loads the corresponding summary.json file and compares its data
+ against the filtered provenance query results for that tool.
+
+ Args:
+ provenance_df (pd.DataFrame): DataFrame containing all provenance query results.
+ parameters (list): List of parameter names used in the comparison.
+ metrics (list): List of metric names used in the comparison.
+ tools (list): List of tool names to validate.
+ provenance_folderpath (str): Base path to the provenance folder containing
+ summary.json files.
+
+ Raises:
+ AssertionError: If data mismatch is found between summary.json and provenance
+ data for any tool.
+ """
+ for tool in tools:
+ summary_path = os.path.join(
+ provenance_folderpath,
+ "snakemake_results",
+ "linear-elastic-plate-with-hole",
+ tool,
+ "summary.json",
+ )
+ summary_df = summary_file_to_dataframe(
+ analyzer, summary_path, parameters, metrics
+ )
+
+ filtered_df = provenance_df[
+ provenance_df["tool_name"].str.contains(tool, case=False, na=False)
+ ].drop(columns=["tool_name"])
+
+ assert compare_dataframes(
+ summary_df, filtered_df
+ ), f"Data mismatch for tool '{tool}'. See above for details."
+
+
+def validate_provenance_data_csv_file(analyzer, provenance_df, tools, float_precision=6, tol=1e+6):
+ """
+ Validate that the provided provenance DataFrame contains all rows from reference CSV files for the given tools.
+
+ The CSV file is treated as the ground truth. It may contain extra columns, but only
+ the columns that also exist in the input DataFrame are checked.
+
+ Float values are rounded to avoid minor numerical differences.
+
+ Args:
+ analyzer: ProvenanceAnalyzer: Initialized analyzer instance.
+ provenance_df (pd.DataFrame): The DataFrame containing provenance data to validate.
+ tools (list of str): List of tool names. For each tool, a CSV file `.csv`
+ must exist in the `tests` folder next to this script.
+ float_precision (int, optional): Decimal places for rounding float values.
+ Defaults to 6.
+
+ Raises:
+ AssertionError: If any CSV row (considering only overlapping columns) is missing in `provenance_df`.
+ """
+
+ stress_cols = {"max_von_mises_stress_nodes", "max_von_mises_stress_gauss_points"}
+
+ for tool in tools:
+ df_subset = provenance_df[
+ provenance_df["tool_name"].str.lower().str.startswith(tool.lower())
+ ].copy()
+
+ csv_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "tests", f"{tool}.csv")
+ df_csv = pd.read_csv(csv_path)
+
+ df_csv.columns = [analyzer.sanitize_variable_name(c) for c in df_csv.columns]
+
+ common_cols = df_csv.columns.intersection(df_subset.columns)
+ df_subset = df_subset[common_cols].reset_index(drop=True)
+ df_csv = df_csv[common_cols].reset_index(drop=True)
+
+ # Round all floats to avoid minor precision issues
+ for col in df_csv.select_dtypes(include=["float", "float64", "float32"]).columns:
+ df_csv[col] = df_csv[col].round(float_precision)
+
+ for i, row_csv in df_csv.iterrows():
+ best_mismatch = None
+ best_mismatch_count = float("inf")
+ matched = False
+
+ for j, row_df in df_subset.iterrows():
+ mismatches = []
+
+ for col in common_cols:
+ v_csv = row_csv[col]
+ v_df = row_df[col]
+
+ if pd.api.types.is_numeric_dtype(df_subset[col]):
+ if col in stress_cols:
+ if not np.isclose(v_csv, v_df, atol=tol, rtol=0):
+ mismatches.append((col, v_csv, v_df))
+ else:
+ if v_csv != v_df:
+ mismatches.append((col, v_csv, v_df))
+ else:
+ # Non-numeric columns: exact match
+ if v_csv != v_df:
+ mismatches.append((col, v_csv, v_df))
+
+ if not mismatches:
+ matched = True
+ break
+
+ if len(mismatches) < best_mismatch_count:
+ best_mismatch = j, mismatches
+ best_mismatch_count = len(mismatches)
+
+ if not matched:
+ _, mismatches_best = best_mismatch
+ for col, v_csv, v_df in mismatches_best:
+ print(f"Column `{col}` → CSV: {v_csv} | DataFrame: {v_df}")
+
+ raise AssertionError(
+ f"\n[{tool}] CSV row {i} not matched in DataFrame within tolerance {tol} "
+ f"on columns {list(common_cols)}:\n{row_csv.to_dict()}"
+ )
+
+
+def plot_results(analyzer, final_df, output_file):
+ """
+ Generate a visualization plot of the provenance results.
+
+ Creates a scatter/line plot showing the relationship between element size
+ and maximum von Mises stress, grouped by tool name.
+
+ Args:
+ analyzer (ProvenanceAnalyzer): Initialized analyzer instance.
+ final_df (pd.DataFrame): DataFrame containing filtered data to plot.
+ Expected columns: element_size, max_von_mises_stress_nodes,
+ tool_name (in that order).
+ output_file (str): Path where the plot image will be saved.
+ """
+ analyzer.plot_provenance_graph(
+ data=final_df.values.tolist(),
+ x_axis_label="Element Size",
+ y_axis_label="Max Von Mises Stress",
+ x_axis_index=0,
+ y_axis_index=1,
+ group_by_index=2,
+ title="Element Size vs Max Von Mises Stress",
+ output_file=output_file,
+ )
+
+
+def run(args, parameters, metrics, tools):
+ """
+ Execute the complete provenance analysis workflow.
+
+ Performs the following steps:
+ 1. Initialize the ProvenanceAnalyzer
+ 2. Load and query the provenance graph
+ 3. Validate query results against summary.json ground truth data
+ 4. Apply custom filters to the data
+ 5. Generate visualization plot
+
+ Args:
+ args (argparse.Namespace): Parsed command-line arguments.
+ parameters (list): List of parameter names to extract.
+ metrics (list): List of metric names to extract.
+ tools (list): List of tool names to process.
+ """
+ root_dir = Path(__file__).parent.parent.parent
+
+ sys.path.insert(0, str(root_dir))
+ from benchmarks.common.provenance import ProvenanceAnalyzer
+
+ analyzer = ProvenanceAnalyzer(
+ provenance_folderpath=args.provenance_folderpath,
+ provenance_filename=args.provenance_filename,
+ )
+
+ provenance_df = load_and_query_graph(analyzer, parameters, metrics, tools)
+
+ validate_provenance_data_summary_file(
+ analyzer, provenance_df, parameters, metrics, tools, args.provenance_folderpath
+ )
+
+ validate_provenance_data_csv_file(analyzer, provenance_df, tools)
+
+ final_df = apply_custom_filters(provenance_df)
+
+ plot_results(analyzer, final_df, args.output_file)
+
+
+def main():
+ """
+ Main entry point for the provenance analysis script.
+
+ Parses command-line arguments, defines the parameters and metrics to extract,
+ retrieves tool names from the workflow configuration, and executes the analysis
+ workflow.
+ """
+ args = parse_args()
+
+ parameters = ["element-size", "element-order", "element-degree"]
+ metrics = ["max_von_mises_stress_nodes"]
+ tools = workflow_config["tools"]
+
+ run(args, parameters, metrics, tools)
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmarks/linear-elastic-plate-with-hole/tests/fenics.csv b/benchmarks/linear-elastic-plate-with-hole/tests/fenics.csv
new file mode 100644
index 0000000..f0db114
--- /dev/null
+++ b/benchmarks/linear-elastic-plate-with-hole/tests/fenics.csv
@@ -0,0 +1,10 @@
+radius,length,load,element-size,element-order,element-degree,quadrature-rule,quadrature-degree,young-modulus,poisson-ratio,max_von_mises_stress_nodes,max_von_mises_stress_gauss_points
+0.33,1.0,100.0,0.025,1,1,gauss,1,210000000000.0,0.3,299791507.5586336,299791507.55863357
+0.33,1.0,100.0,0.1,1,2,gauss,2,210000000000.0,0.3,303258587.51563627,273716447.12326705
+0.33,1.0,100.0,0.0125,1,1,gauss,1,210000000000.0,0.3,300129618.71379626,300129618.7137961
+0.33,1.0,100.0,0.1,2,2,gauss,2,210000000000.0,0.3,303461918.9716136,274324640.6728336
+0.33,1.0,100.0,0.05,1,1,gauss,1,210000000000.0,0.3,296011478.74885035,296011478.7488503
+0.33,1.0,100.0,0.1,1,1,gauss,1,210000000000.0,0.3,273190934.3950996,273190934.39509964
+0.33,1.0,100.0,0.00625,1,1,gauss,1,210000000000.0,0.3,299475432.9322356,299475432.9322354
+0.33,1.0,100.0,0.5,2,2,gauss,2,210000000000.0,0.3,301751305.94987875,243231832.53382885
+0.33,1.0,100.0,0.003125,1,1,gauss,1,210000000000.0,0.3,299783353.33785635,299783353.3378568
diff --git a/benchmarks/linear-elastic-plate-with-hole/tests/kratos.csv b/benchmarks/linear-elastic-plate-with-hole/tests/kratos.csv
new file mode 100644
index 0000000..a8329f0
--- /dev/null
+++ b/benchmarks/linear-elastic-plate-with-hole/tests/kratos.csv
@@ -0,0 +1,10 @@
+radius,length,load,element-size,element-order,element-degree,quadrature-rule,quadrature-degree,young-modulus,poisson-ratio,max_von_mises_stress_nodes
+0.33,1.0,100.0,0.025,1,1,gauss,1,210000000000.0,0.3,283087904.0
+0.33,1.0,100.0,0.1,1,2,gauss,2,210000000000.0,0.3,226270384.0
+0.33,1.0,100.0,0.0125,1,1,gauss,1,210000000000.0,0.3,291662048.0
+0.33,1.0,100.0,0.1,2,2,gauss,2,210000000000.0,0.3,259553392.0
+0.33,1.0,100.0,0.05,1,1,gauss,1,210000000000.0,0.3,263995664.0
+0.33,1.0,100.0,0.1,1,1,gauss,1,210000000000.0,0.3,226270384.0
+0.33,1.0,100.0,0.00625,1,1,gauss,1,210000000000.0,0.3,296148032.0
+0.33,1.0,100.0,0.5,2,2,gauss,2,210000000000.0,0.3,197391808.0
+0.33,1.0,100.0,0.003125,1,1,gauss,1,210000000000.0,0.3,298100064.0
diff --git a/benchmarks/notebooks/RoCrate.ipynb b/benchmarks/notebooks/RoCrate.ipynb
new file mode 100644
index 0000000..10714b8
--- /dev/null
+++ b/benchmarks/notebooks/RoCrate.ipynb
@@ -0,0 +1,503 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 17,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Requirement already satisfied: rohub in /opt/anaconda3/lib/python3.12/site-packages (1.2.1)\n",
+ "Requirement already satisfied: requests in /opt/anaconda3/lib/python3.12/site-packages (from rohub) (2.32.5)\n",
+ "Requirement already satisfied: pandas in /opt/anaconda3/lib/python3.12/site-packages (from rohub) (2.2.2)\n",
+ "Requirement already satisfied: numpy>=1.26.0 in /opt/anaconda3/lib/python3.12/site-packages (from pandas->rohub) (1.26.4)\n",
+ "Requirement already satisfied: python-dateutil>=2.8.2 in /opt/anaconda3/lib/python3.12/site-packages (from pandas->rohub) (2.9.0.post0)\n",
+ "Requirement already satisfied: pytz>=2020.1 in /opt/anaconda3/lib/python3.12/site-packages (from pandas->rohub) (2024.1)\n",
+ "Requirement already satisfied: tzdata>=2022.7 in /opt/anaconda3/lib/python3.12/site-packages (from pandas->rohub) (2023.3)\n",
+ "Requirement already satisfied: charset_normalizer<4,>=2 in /opt/anaconda3/lib/python3.12/site-packages (from requests->rohub) (3.4.4)\n",
+ "Requirement already satisfied: idna<4,>=2.5 in /opt/anaconda3/lib/python3.12/site-packages (from requests->rohub) (3.11)\n",
+ "Requirement already satisfied: urllib3<3,>=1.21.1 in /opt/anaconda3/lib/python3.12/site-packages (from requests->rohub) (2.6.2)\n",
+ "Requirement already satisfied: certifi>=2017.4.17 in /opt/anaconda3/lib/python3.12/site-packages (from requests->rohub) (2026.1.4)\n",
+ "Requirement already satisfied: six>=1.5 in /opt/anaconda3/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->rohub) (1.17.0)\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip install rohub"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 18,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "\n",
+ "import rohub\n",
+ "import sys\n",
+ "import os\n",
+ "import pandas as pd\n",
+ "\n",
+ "module_path = os.path.abspath(os.path.join('..', 'common'))\n",
+ "\n",
+ "if module_path not in sys.path:\n",
+ " sys.path.append(module_path)\n",
+ "\n",
+ "import provenance\n",
+ "from generate_config import workflow_config\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 24,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "USE_DEVELOPMENT_VERSION = False"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 25,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "if USE_DEVELOPMENT_VERSION:\n",
+ " rohub.settings.API_URL = \"https://rohub2020-rohub.apps.paas-dev.psnc.pl/api/\"\n",
+ " rohub.settings.KEYCLOAK_CLIENT_ID = \"rohub2020-cli\"\n",
+ " rohub.settings.KEYCLOAK_CLIENT_SECRET = \"714617a7-87bc-4a88-8682-5f9c2f60337d\"\n",
+ " rohub.settings.KEYCLOAK_URL = \"https://keycloak-dev.apps.paas-dev.psnc.pl/auth/realms/rohub/protocol/openid-connect/token\"\n",
+ " rohub.settings.SPARQL_ENDPOINT = \"https://rohub2020-api-virtuoso-route-rohub.apps.paas-dev.psnc.pl/sparql/\"\n",
+ "else:\n",
+ " rohub.settings.API_URL = \"https://api.rohub.org/api/\"\n",
+ " rohub.settings.KEYCLOAK_CLIENT_ID = \"rohub2020-public-cli\"\n",
+ " rohub.settings.KEYCLOAK_URL = \"https://login.rohub.org/auth/realms/rohub/protocol/openid-connect/token\"\n",
+ " rohub.settings.SPARQL_ENDPOINT = \"https://rohub2020-api-virtuoso-route-rohub2020.apps.paas.psnc.pl/sparql\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Logged successfully as mahdi.jafarkhani@tik.uni-stuttgart.de.\n"
+ ]
+ }
+ ],
+ "source": [
+ "username = \"\"\n",
+ "password = \"\"\n",
+ "\n",
+ "rohub.login(username=username, password=password)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 27,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Trying to confirm status of the job. It can take a while...\n"
+ ]
+ }
+ ],
+ "source": [
+ "zip_path = \"/Users/mahdi/Downloads/metadata4ing_provenance.zip\"\n",
+ "resources_from_zip = rohub.ros_upload(path_to_zip=zip_path)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "ANNOTATION_PREDICATE = \"http://w3id.org/nfdi4ing/metadata4ing#investigates\"\n",
+ "ANNOTATION_OBJECT = \"https://github.com/BAMresearch/NFDI4IngModelValidationPlatform/tree/main/benchmarks/linear-elastic-plate-with-hole\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 30,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Research Object was successfully loaded with id = 5b8eae99-5f8b-4124-9eb6-05a64860819e\n"
+ ]
+ }
+ ],
+ "source": [
+ "RO = rohub.ros_load(\"5b8eae99-5f8b-4124-9eb6-05a64860819e\")\n",
+ "annotation_json = [\n",
+ "\t{\n",
+ "\t\t\"property\": ANNOTATION_PREDICATE,\n",
+ "\t\t\"value\": ANNOTATION_OBJECT\n",
+ "\t}\n",
+ "]\n",
+ "add_annotations_result = RO.add_annotations(body_specification_json=annotation_json)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 32,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "UUIDs: ['5b8eae99-5f8b-4124-9eb6-05a64860819e']\n"
+ ]
+ }
+ ],
+ "source": [
+ "UUID_QUERY = f\"\"\"\n",
+ "SELECT ?subject \n",
+ "WHERE {{\n",
+ " ?subject <{ANNOTATION_PREDICATE}> <{ANNOTATION_OBJECT}> .\n",
+ "}}\n",
+ "\"\"\"\n",
+ "\n",
+ "uuid_result = rohub.query_sparql_endpoint(UUID_QUERY)\n",
+ "uuids = []\n",
+ "\n",
+ "if not uuid_result.empty:\n",
+ " uuids = [iri.split('/')[-1] for iri in uuid_result[\"subject\"]]\n",
+ " print(\"UUIDs:\", uuids)\n",
+ "else:\n",
+ " uuids = []\n",
+ " print(\"No results found\")\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 33,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "[5b8eae99-5f8b-4124-9eb6-05a64860819e] Found Named Graph: https://w3id.org/ro-id/5b8eae99-5f8b-4124-9eb6-05a64860819e/.ro/annotations/eb1512d5-cb3b-47ba-93e6-fcc2863f6e39.ttl\n"
+ ]
+ }
+ ],
+ "source": [
+ "named_graphs = {}\n",
+ "\n",
+ "for UUID in uuids:\n",
+ " NAMED_GRAPH_QUERY = f\"\"\"\n",
+ " PREFIX schema: \n",
+ " SELECT ?graph WHERE {{\n",
+ " GRAPH ?graph {{ a schema:Dataset . }}\n",
+ " }}\n",
+ " \"\"\"\n",
+ "\n",
+ " named_graph_result = rohub.query_sparql_endpoint(NAMED_GRAPH_QUERY)\n",
+ "\n",
+ " if not named_graph_result.empty:\n",
+ " graph_iri = named_graph_result.iloc[0][\"graph\"]\n",
+ " named_graphs[UUID] = graph_iri\n",
+ " print(f\"[{UUID}] Found Named Graph: {graph_iri}\")\n",
+ " else:\n",
+ " named_graphs[UUID] = None\n",
+ " print(f\"[{UUID}] No named graph found\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 34,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "analyzer = provenance.ProvenanceAnalyzer()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 35,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "parameters = [\"element-size\", \"element-order\", \"element-degree\"]\n",
+ "metrics = [\"max_von_mises_stress_nodes\"]\n",
+ "tools = workflow_config[\"tools\"]\n",
+ "\n",
+ "data = []\n",
+ "\n",
+ "for uuid, named_graph in named_graphs.items():\n",
+ " query_string = analyzer.build_dynamic_query(parameters, metrics, tools, named_graph)\n",
+ " result = rohub.query_sparql_endpoint(query_string)\n",
+ " if not result.empty:\n",
+ " result[\"element_order\"] = pd.to_numeric(\n",
+ " result[\"element_order\"], errors=\"coerce\"\n",
+ " )\n",
+ " result[\"element_degree\"] = pd.to_numeric(\n",
+ " result[\"element_degree\"], errors=\"coerce\"\n",
+ " )\n",
+ " filtered_result = result[\n",
+ " (result[\"element_order\"] == 1) & (result[\"element_degree\"] == 1)\n",
+ " ]\n",
+ " rows = [\n",
+ " [\n",
+ " float(row[\"element_size\"]),\n",
+ " float(row[\"max_von_mises_stress_nodes\"]),\n",
+ " row[\"tool_name\"],\n",
+ " ]\n",
+ " for _, row in filtered_result.iterrows()\n",
+ " ]\n",
+ " data.extend(rows)\n",
+ "\n",
+ "data.sort(key=lambda row: row[0], reverse=False)\n",
+ "\n",
+ "analyzer.plot_provenance_graph(\n",
+ " data=data,\n",
+ " x_axis_label=\"Element Size\",\n",
+ " y_axis_label=\"Max Von Mises Stress Nodes\",\n",
+ " x_axis_index=0,\n",
+ " y_axis_index=1,\n",
+ " group_by_index=2,\n",
+ " title=\"Element Size vs Max Von Mises Stress Nodes \\n element-order = 1 , element-degree = 1 \",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 36,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "parameters = [\"element-size\", \"element-order\", \"element-degree\"]\n",
+ "metrics = [\"max_von_mises_stress_gauss_points\"]\n",
+ "tools = workflow_config[\"tools\"]\n",
+ "\n",
+ "data = []\n",
+ "\n",
+ "for uuid, named_graph in named_graphs.items():\n",
+ " query_string = analyzer.build_dynamic_query(parameters, metrics, tools, named_graph)\n",
+ " result = rohub.query_sparql_endpoint(query_string)\n",
+ " if not result.empty:\n",
+ " result[\"element_order\"] = pd.to_numeric(\n",
+ " result[\"element_order\"], errors=\"coerce\"\n",
+ " )\n",
+ " result[\"element_degree\"] = pd.to_numeric(\n",
+ " result[\"element_degree\"], errors=\"coerce\"\n",
+ " )\n",
+ " filtered_result = result[\n",
+ " (result[\"element_order\"] == 1) & (result[\"element_degree\"] == 1)\n",
+ " ]\n",
+ " rows = [\n",
+ " [\n",
+ " float(row[\"element_size\"]),\n",
+ " float(row[\"max_von_mises_stress_gauss_points\"]),\n",
+ " row[\"tool_name\"],\n",
+ " ]\n",
+ " for _, row in filtered_result.iterrows()\n",
+ " ]\n",
+ " data.extend(rows)\n",
+ "\n",
+ "data.sort(key=lambda row: row[0], reverse=False)\n",
+ "\n",
+ "analyzer.plot_provenance_graph(\n",
+ " data=data,\n",
+ " x_axis_label=\"Element Size\",\n",
+ " y_axis_label=\"Max Von Mises Stress Gauss Points\",\n",
+ " x_axis_index=0,\n",
+ " y_axis_index=1,\n",
+ " group_by_index=2,\n",
+ " title=\"Element Size vs Max Von Mises Stress Gauss Points \\n element-order = 1 , element-degree = 1 \",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 37,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "parameters = [\"element-size\", \"element-order\", \"element-degree\"]\n",
+ "metrics = [\"max_von_mises_stress_nodes\"]\n",
+ "tools = workflow_config[\"tools\"]\n",
+ "\n",
+ "data = []\n",
+ "\n",
+ "for uuid, named_graph in named_graphs.items():\n",
+ " query_string = analyzer.build_dynamic_query(parameters, metrics, tools, named_graph)\n",
+ " result = rohub.query_sparql_endpoint(query_string)\n",
+ " if not result.empty:\n",
+ " result[\"element_order\"] = pd.to_numeric(\n",
+ " result[\"element_order\"], errors=\"coerce\"\n",
+ " )\n",
+ " result[\"element_degree\"] = pd.to_numeric(\n",
+ " result[\"element_degree\"], errors=\"coerce\"\n",
+ " )\n",
+ " filtered_result = result[\n",
+ " (result[\"element_order\"] == 2) & (result[\"element_degree\"] == 2)\n",
+ " ]\n",
+ " rows = [\n",
+ " [\n",
+ " float(row[\"element_size\"]),\n",
+ " float(row[\"max_von_mises_stress_nodes\"]),\n",
+ " row[\"tool_name\"],\n",
+ " ]\n",
+ " for _, row in filtered_result.iterrows()\n",
+ " ]\n",
+ " data.extend(rows)\n",
+ "\n",
+ "data.sort(key=lambda row: row[0], reverse=False)\n",
+ "\n",
+ "analyzer.plot_provenance_graph(\n",
+ " data=data,\n",
+ " x_axis_label=\"Element Size\",\n",
+ " y_axis_label=\"Max Von Mises Stress Nodes\",\n",
+ " x_axis_index=0,\n",
+ " y_axis_index=1,\n",
+ " group_by_index=2,\n",
+ " title=\"Element Size vs Max Von Mises Stress Nodes \\n element-order = 2 , element-degree = 2 \",\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 38,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "No artists with labels found to put in legend. Note that artists whose label start with an underscore are ignored when legend() is called with no argument.\n"
+ ]
+ },
+ {
+ "data": {
+ "image/png": "",
+ "text/plain": [
+ ""
+ ]
+ },
+ "metadata": {},
+ "output_type": "display_data"
+ }
+ ],
+ "source": [
+ "parameters = [\"element-size\", \"element-order\", \"element-degree\"]\n",
+ "metrics = [\"max_von_mises_stress_gauss_points\"]\n",
+ "tools = workflow_config[\"tools\"]\n",
+ "\n",
+ "data = []\n",
+ "\n",
+ "for uuid, named_graph in named_graphs.items():\n",
+ " query_string = analyzer.build_dynamic_query(parameters, metrics, tools, named_graph)\n",
+ " result = rohub.query_sparql_endpoint(query_string)\n",
+ " if not result.empty:\n",
+ " result[\"element_order\"] = pd.to_numeric(\n",
+ " result[\"element_order\"], errors=\"coerce\"\n",
+ " )\n",
+ " result[\"element_degree\"] = pd.to_numeric(\n",
+ " result[\"element_degree\"], errors=\"coerce\"\n",
+ " )\n",
+ " filtered_result = result[\n",
+ " (result[\"element_order\"] == 2) & (result[\"element_degree\"] == 2)\n",
+ " ]\n",
+ " rows = [\n",
+ " [\n",
+ " float(row[\"element_size\"]),\n",
+ " float(row[\"max_von_mises_stress_gauss_points\"]),\n",
+ " row[\"tool_name\"],\n",
+ " ]\n",
+ " for _, row in filtered_result.iterrows()\n",
+ " ]\n",
+ " data.extend(rows)\n",
+ "\n",
+ "data.sort(key=lambda row: row[0], reverse=False)\n",
+ "\n",
+ "analyzer.plot_provenance_graph(\n",
+ " data=data,\n",
+ " x_axis_label=\"Element Size\",\n",
+ " y_axis_label=\"Max Von Mises Stress Nodes Gauss Points \",\n",
+ " x_axis_index=0,\n",
+ " y_axis_index=1,\n",
+ " group_by_index=2,\n",
+ " title=\"Element Size vs Max Von Mises Stress Nodes Gauss Points \\n element-order = 2 , element-degree = 2 \",\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "base",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.12.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
+}
diff --git a/environment_benchmarks.yml b/environment_benchmarks.yml
index 2ee70d3..0e36b3a 100644
--- a/environment_benchmarks.yml
+++ b/environment_benchmarks.yml
@@ -11,4 +11,4 @@ dependencies:
- conda
- pip
- pip:
- - "git+https://github.com/izus-fokus/snakemake-report-plugin-metadata4ing@v1.0.0#egg=snakemake-report-plugin-metadata4ing"
\ No newline at end of file
+ - "git+https://github.com/izus-fokus/snakemake-report-plugin-metadata4ing@v1.2.3#egg=snakemake-report-plugin-metadata4ing"
\ No newline at end of file