Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Binary file modified .DS_Store
Binary file not shown.
91 changes: 35 additions & 56 deletions .github/workflows/plugin-integration.yml
Original file line number Diff line number Diff line change
Expand Up @@ -3,21 +3,20 @@ name: Build and use plugin
on:
push:
workflow_dispatch:

jobs:
build:
runs-on: ${{ matrix.operating-system }}
environment:
name: ${{ github.ref_name }}
strategy:
matrix:
operating-system: [ubuntu-latest]
versions: [ { jdk: 17, mapping-service: v1.0.5 }, { jdk: 21, mapping-service: latest } ]
# Use both LTS releases and latest one for tests
versions: [ { jdk: 17, mapping-service: v2.0.2 }, { jdk: 21, mapping-service: latest } ]

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Extract branch name
shell: bash
run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
Expand All @@ -34,85 +33,65 @@ jobs:
JAR_VERSION=$(./mappingservice-plugin/gradlew printVersion -q -p ./mappingservice-plugin/)
JAR_VERSION=${JAR_VERSION##*$'\n'}
./mappingservice-plugin/gradlew clean jar -p ./mappingservice-plugin/
ls -ll ./mappingservice-plugin/build/libs/
echo "JAR_VERSION=${JAR_VERSION}"
mv -v ./mappingservice-plugin/build/libs/ApeHePlugin-$JAR_VERSION-plain.jar ./mappingservice-plugin/build/libs/ApeHeplugin.jar
mv -v ./mappingservice-plugin/build/libs/ApeHePlugin-$JAR_VERSION.jar ./mappingservice-plugin/build/libs/ApeHeplugin.jar
env:
VERSION_OVERRIDE_BY_BRANCH: ${{ steps.extract_branch.outputs.branch }}

- name: Upload job artifact
uses: actions/upload-artifact@v4
with:
name: jar-jdk${{ matrix.versions.jdk }}
path: ./mappingservice-plugin/build/libs/ApeHeplugin.jar
name: jar-jdk${{ matrix.versions.jdk }}
path: ./mappingservice-plugin/build/libs/SEMplugin.jar

test:
runs-on: ${{ matrix.operating-system }}
environment:
name: ${{ github.ref_name }}
strategy:
fail-fast: false
fail-fast: false #We want to test independent of each other - success on a stable version is more important than on the latest version
matrix:
operating-system: [ubuntu-latest]
versions: [ { jdk: 17, mapping-service: v1.0.5 }, { jdk: 21, mapping-service: latest } ]
# Use both LTS releases and latest one for tests
versions: [ { jdk: 17, mapping-service: v2.0.2 }, { jdk: 21, mapping-service: latest } ]
needs: build

steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Extract branch name
shell: bash
run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT
id: extract_branch

- name: Download built jar
uses: actions/download-artifact@v4
with:
name: jar-jdk${{ matrix.versions.jdk }}
path: ./plugins

- name: Run Docker Container
run: |
docker run -d -p 8095:8095 -e PIP_BREAK_SYSTEM_PACKAGES=1 -v ./plugins/ApeHeplugin.jar:/spring/mapping-service/plugins/ApeHeplugin.jar --name mapping4docker ghcr.io/kit-data-manager/mapping-service:${{ matrix.versions.mapping-service }}
echo "Waiting for mapping service to be healthy..."
while true; do
if ! docker ps | grep -q mapping4docker; then
echo "Docker container stopped unexpectedly. Aborting."
exit 1
fi
if curl -f http://localhost:8095/actuator/info; then
echo "Service is running."
break
fi
echo "Waiting..."
docker logs --tail 20 mapping4docker
sleep 5
done
name: jar-jdk${{ matrix.versions.jdk }}
path: ./plugins

- name: Install Hurl & Prepare JSON
- name: Run Docker Container # and wait for mapping service to be healthy before proceeding to tests
run: |
curl -LO https://github.com/Orange-OpenSource/hurl/releases/download/6.0.0/hurl_6.0.0_amd64.deb
sudo dpkg -i hurl_6.0.0_amd64.deb
sudo apt install -y dos2unix

# Fetch mappingType dynamically
mappingType=$(curl -s http://localhost:8095/api/v1/mappingAdministration/types | jq -r '.[0].id')

echo "Using mappingType: $mappingType"

echo "{\"mappingId\":\"96\",\"mappingType\":\"$mappingType\",\"title\":\"apeHe from CI test\",\"description\":\"\",\"acl\":[]}" > record.json
echo '{"entry.title.value": "entry.title"}' > document.json

unix2dos -n ./mappingservice-plugin/integrationtests/basic.hurl ./mappingservice-plugin/integrationtests/basic_crlf.hurl

docker run -d -p 8095:8095 -e PIP_BREAK_SYSTEM_PACKAGES=1 -e SPRING_APPLICATION_JSON='{"mapping-service.executionTimeout":300}' -v ./plugins/SEMplugin.jar:/spring/mapping-service/plugins/SEMplugin.jar --name mapping4docker ghcr.io/kit-data-manager/mapping-service:${{ matrix.versions.mapping-service }}
echo "Wait for mapping service to be healthy before proceeding to tests"
while true; do
if ! docker ps | grep -q mapping4docker; then
echo "Docker container stopped unexpectedly. Aborting."
exit 1
fi
if curl -f http://localhost:8095/actuator/info; then
echo "Service is running."
break
fi
echo "Waiting for the service to be ready..."
docker logs --tail 20 mapping4docker
sleep 5
done
- name: Run Tests with Hurl
run: |
hurl --variable host=http://localhost:8095 --test ./mappingservice-plugin/integrationtests/basic_crlf.hurl --verbose --file-root .
curl --location --remote-name https://github.com/Orange-OpenSource/hurl/releases/download/6.0.0/hurl_6.0.0_amd64.deb
sudo dpkg -i hurl_6.0.0_amd64.deb
sudo apt install -y dos2unix
unix2dos -n ./mappingservice-plugin/integrationtests/basic.hurl ./mappingservice-plugin/integrationtests/basic_crlf.hurl
hurl --variable host=http://localhost:8095 --test ./mappingservice-plugin/integrationtests/basic_crlf.hurl --verbose --file-root .
env:
VERSION_OVERRIDE_BY_BRANCH: ${{ steps.extract_branch.outputs.branch }}

- name: Clean up temp files
run: rm -f record.json document.json

- name: Stop Docker Container
run: docker stop mapping4docker
run: docker stop mapping4docker
4 changes: 3 additions & 1 deletion mapping_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,10 @@
import shutil
from pathlib import Path

from src.IO.MappingAbortionError import MappingAbortionError
from mappingservice_plugincore.exceptions.MappingAbortionError import MappingAbortionError
from src.IO.InputReader import InputReader as InputReader_apeHe
from src.IO.OutputWriter import OutputWriter
from src.parser import ParserConfig

# Make log level configurable from ENV, defaults to INFO level
logging.basicConfig(
Expand All @@ -28,6 +29,7 @@ def run_cli():
run_mapper(args)

def run_mapper(args):
ParserConfig.register_parsers()
INPUT_SOURCE = args.input
MAP_SOURCE = args.map
OUTPUT_PATH = args.output
Expand Down
3 changes: 2 additions & 1 deletion requirements.dist.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,5 @@ magika >= 0.5.1
numpy
pandas
h5py
zipfile2
zipfile2
mappingservice-plugincore @ git+https://github.com/GGoetzelmann/mappingservice_plugincore.git@development
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,4 @@ zipfile2
pytest >= 7.4
pytest-cov >= 6.0
pytest-mock >= 3.14.0

mappingservice-plugincore @ git+https://github.com/GGoetzelmann/mappingservice_plugincore.git@development
2 changes: 1 addition & 1 deletion somesy.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[project]
name = "APE-HE mapper"
version = "v1.0.0"
version = "separate_lib"
description = "APE-HE Mapper is a tool designed for mapping APE-HE (Advanced Photoelectric Effect - High Energy) metadata to a uniform, schema-compliant json format."

keywords = ["APE-HE", "neXus", "metadata", "extraction", "schema"]
Expand Down
4 changes: 2 additions & 2 deletions src/IO/InputReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
import mimetypes
import os

from src.IO.MappingAbortionError import MappingAbortionError
from src.parser.ParserFactory import ParserFactory
from mappingservice_plugincore.exceptions.MappingAbortionError import MappingAbortionError
from mappingservice_plugincore.parser.ParserFactory import ParserFactory
from src.util import load_json, get_filetype_with_magica, robust_textfile_read


Expand Down
59 changes: 53 additions & 6 deletions src/Preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,11 +30,52 @@ def get_expected_type(field_path):

expected_types = {
"entry.entry_identifier": "string_type",
"entry.instrument.monochromator.grating.period.value": "int_type",
"entry.title": "string_type",
"entry.sample.gas_flux[*].value": "float_type"
}

return expected_types.get(field_path, None)
# Check exact match first
exact_match = expected_types.get(field_path)
if exact_match:
return exact_match

# Check if any expected pattern exists within the field_path
for pattern, expected_type in expected_types.items():
if pattern in field_path:
return expected_type

return None

@staticmethod
def is_numeric_string(value):
"""Check if a string represents a valid number"""
if not isinstance(value, str):
return False

# Try to convert to float first
try:
float(value)
return True
except ValueError:
return False

@staticmethod
def convert_numeric_string(value):
"""Convert numeric string to int if possible, else float"""
if not Preprocessor.is_numeric_string(value):
return value

try:
# Try int first (for whole numbers)
if '.' not in value and 'e' not in value.lower():
return int(value)
else:
return float(value)
except ValueError:
try:
return float(value)
except ValueError:
return value

@staticmethod
def normalize_unit(input_value) -> str:
Expand Down Expand Up @@ -88,7 +129,8 @@ def normalize_all_datetimes(input_dict):
@staticmethod
def normalize_all_numbers(input_dict):
"""
In-place conversion of numeric strings into integers or floats, but checks if it's an appropriate field.
In-place conversion of numeric strings into integers or floats.
Converts all numeric strings automatically, with special handling for specific field types.
:param input_dict: dictionary to convert numeric values in
:return: None
"""
Expand All @@ -98,17 +140,22 @@ def normalize_all_numbers(input_dict):
original_value = match.value
current_field = str(match.full_path)
expected_type = Preprocessor.get_expected_type(current_field)
#print("<<<<>>>> ",original_value)

# Handle type conversions if needed (e.g.: int_type, float_type)
# Handle type conversions for explicitly defined fields
if isinstance(original_value, str):
try:
if expected_type == "int_type": # Convert only if it's a valid integer-like string
if expected_type == "string_type":
continue # Keep as string, do not convert
elif expected_type == "int_type": # Convert only if it is a valid integer-like string
converted_value = int(original_value)
match.full_path.update(input_dict, converted_value)
elif expected_type == "float_type": # Convert only if it's a valid float-like string
converted_value = float(original_value)
match.full_path.update(input_dict, converted_value)
else: # Auto-convert for other fields
converted_value = Preprocessor.convert_numeric_string(original_value)
if converted_value != original_value:
match.full_path.update(input_dict, converted_value)
except ValueError:
logging.warning(f"Error while trying to convert '{original_value}' into {expected_type} for field {current_field}")
continue
Expand Down
2 changes: 1 addition & 1 deletion src/model/SchemaConcepts/APE_HE_Image.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from pydantic import BaseModel

from src.model.SchemaConcepts.Schema_Concept import Schema_Concept
from mappingservice_plugincore.model.Schema_Concept import Schema_Concept
from src.model.SchemaConcepts.codegen.SchemaClasses_APE_HE import Entry, ApeHe


Expand Down
Binary file not shown.
Binary file not shown.
9 changes: 9 additions & 0 deletions src/model/SetupMD.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
from pydantic import BaseModel

from mappingservice_plugincore.model.SetupMD import SetupMD as GenericSetupMD

class SetupMD(GenericSetupMD):
"""
contains metadata derived from file(s) describing the experiment setup
"""
pass
16 changes: 16 additions & 0 deletions src/parser/ParserConfig.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
import enum
from typing import Type

from mappingservice_plugincore.parser.ParserFactory import ParserFactory
from mappingservice_plugincore.parser.RunMD_Parser import RunMD_Parser
from src.parser.impl.NexusParser import NexusParser


available_img_parsers = {
"NexusParser": NexusParser
}

def register_parsers():

for p_name, p_cls in available_img_parsers.items():
ParserFactory.register_imgparser(p_name, p_cls)
19 changes: 0 additions & 19 deletions src/parser/ParserFactory.py

This file was deleted.

20 changes: 18 additions & 2 deletions src/parser/impl/NexusParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,13 @@

from PIL import Image

from mappingservice_plugincore.exceptions.MappingAbortionError import MappingAbortionError
from src.Preprocessor import Preprocessor
from src.model.ImageMD import ImageMD
from src.parser.ImageParser import ImageParser
from mappingservice_plugincore.parser.ImageParser import ImageParser
from src.parser.mapping_util import map_a_dict
from src.resources.maps.mapping import nexusparser_apeHe
from src.util import input_to_dict
import configparser



Expand Down Expand Up @@ -39,11 +39,15 @@ def parse(self, file_path, mapping) -> tuple[ImageMD, str]:
mapping_dict = mapping if mapping else self.internal_mapping
image_md = map_a_dict(input_md, mapping_dict)

# Debug: Check values before preprocessing

#Preprocessor.normalize_all_datetimes(image_md)
Preprocessor.normalize_all_numbers(image_md)
Preprocessor.normalize_all_units(image_md)
Preprocessor.normalize_gas_names(image_md)

# Debug: Check values after preprocessing

image_from_md = ImageMD(image_metadata=image_md, filePath="")

return image_from_md, image_md
Expand All @@ -68,3 +72,15 @@ def _read_input_file(self, file_path) -> Optional[dict]:

output_dict.update(parsed_dict)
return output_dict

def _create_tomo_image(self, image_md, fp):
"""
Implementation of abstract method from ImageParser
For APE-HE data, we create a simple image format
"""
image_md_format = {
"image_metadata": image_md,
"filePath": fp
}

return ImageMD(image_metadata=image_md, filePath=fp)
Loading
Loading