diff --git a/.DS_Store b/.DS_Store index b3390a6..e3fc221 100644 Binary files a/.DS_Store and b/.DS_Store differ diff --git a/.github/workflows/plugin-integration.yml b/.github/workflows/plugin-integration.yml index 245aa3a..de64a17 100644 --- a/.github/workflows/plugin-integration.yml +++ b/.github/workflows/plugin-integration.yml @@ -3,21 +3,20 @@ name: Build and use plugin on: push: workflow_dispatch: - + jobs: build: runs-on: ${{ matrix.operating-system }} - environment: - name: ${{ github.ref_name }} strategy: matrix: operating-system: [ubuntu-latest] - versions: [ { jdk: 17, mapping-service: v1.0.5 }, { jdk: 21, mapping-service: latest } ] + # Use both LTS releases and latest one for tests + versions: [ { jdk: 17, mapping-service: v2.0.2 }, { jdk: 21, mapping-service: latest } ] steps: - name: Checkout repository uses: actions/checkout@v4 - + - name: Extract branch name shell: bash run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT @@ -34,85 +33,65 @@ jobs: JAR_VERSION=$(./mappingservice-plugin/gradlew printVersion -q -p ./mappingservice-plugin/) JAR_VERSION=${JAR_VERSION##*$'\n'} ./mappingservice-plugin/gradlew clean jar -p ./mappingservice-plugin/ + ls -ll ./mappingservice-plugin/build/libs/ echo "JAR_VERSION=${JAR_VERSION}" - mv -v ./mappingservice-plugin/build/libs/ApeHePlugin-$JAR_VERSION-plain.jar ./mappingservice-plugin/build/libs/ApeHeplugin.jar + mv -v ./mappingservice-plugin/build/libs/ApeHePlugin-$JAR_VERSION.jar ./mappingservice-plugin/build/libs/ApeHeplugin.jar env: VERSION_OVERRIDE_BY_BRANCH: ${{ steps.extract_branch.outputs.branch }} - name: Upload job artifact uses: actions/upload-artifact@v4 with: - name: jar-jdk${{ matrix.versions.jdk }} - path: ./mappingservice-plugin/build/libs/ApeHeplugin.jar + name: jar-jdk${{ matrix.versions.jdk }} + path: ./mappingservice-plugin/build/libs/SEMplugin.jar test: runs-on: ${{ matrix.operating-system }} - environment: - name: ${{ github.ref_name }} strategy: - fail-fast: false + fail-fast: false #We want to test independent of each other - success on a stable version is more important than on the latest version matrix: operating-system: [ubuntu-latest] - versions: [ { jdk: 17, mapping-service: v1.0.5 }, { jdk: 21, mapping-service: latest } ] + # Use both LTS releases and latest one for tests + versions: [ { jdk: 17, mapping-service: v2.0.2 }, { jdk: 21, mapping-service: latest } ] needs: build - steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Extract branch name shell: bash run: echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT id: extract_branch - - name: Download built jar uses: actions/download-artifact@v4 with: - name: jar-jdk${{ matrix.versions.jdk }} - path: ./plugins - - - name: Run Docker Container - run: | - docker run -d -p 8095:8095 -e PIP_BREAK_SYSTEM_PACKAGES=1 -v ./plugins/ApeHeplugin.jar:/spring/mapping-service/plugins/ApeHeplugin.jar --name mapping4docker ghcr.io/kit-data-manager/mapping-service:${{ matrix.versions.mapping-service }} - echo "Waiting for mapping service to be healthy..." - while true; do - if ! docker ps | grep -q mapping4docker; then - echo "Docker container stopped unexpectedly. Aborting." - exit 1 - fi - if curl -f http://localhost:8095/actuator/info; then - echo "Service is running." - break - fi - echo "Waiting..." - docker logs --tail 20 mapping4docker - sleep 5 - done + name: jar-jdk${{ matrix.versions.jdk }} + path: ./plugins - - name: Install Hurl & Prepare JSON + - name: Run Docker Container # and wait for mapping service to be healthy before proceeding to tests run: | - curl -LO https://github.com/Orange-OpenSource/hurl/releases/download/6.0.0/hurl_6.0.0_amd64.deb - sudo dpkg -i hurl_6.0.0_amd64.deb - sudo apt install -y dos2unix - - # Fetch mappingType dynamically - mappingType=$(curl -s http://localhost:8095/api/v1/mappingAdministration/types | jq -r '.[0].id') - - echo "Using mappingType: $mappingType" - - echo "{\"mappingId\":\"96\",\"mappingType\":\"$mappingType\",\"title\":\"apeHe from CI test\",\"description\":\"\",\"acl\":[]}" > record.json - echo '{"entry.title.value": "entry.title"}' > document.json - - unix2dos -n ./mappingservice-plugin/integrationtests/basic.hurl ./mappingservice-plugin/integrationtests/basic_crlf.hurl - + docker run -d -p 8095:8095 -e PIP_BREAK_SYSTEM_PACKAGES=1 -e SPRING_APPLICATION_JSON='{"mapping-service.executionTimeout":300}' -v ./plugins/SEMplugin.jar:/spring/mapping-service/plugins/SEMplugin.jar --name mapping4docker ghcr.io/kit-data-manager/mapping-service:${{ matrix.versions.mapping-service }} + echo "Wait for mapping service to be healthy before proceeding to tests" + while true; do + if ! docker ps | grep -q mapping4docker; then + echo "Docker container stopped unexpectedly. Aborting." + exit 1 + fi + if curl -f http://localhost:8095/actuator/info; then + echo "Service is running." + break + fi + echo "Waiting for the service to be ready..." + docker logs --tail 20 mapping4docker + sleep 5 + done - name: Run Tests with Hurl run: | - hurl --variable host=http://localhost:8095 --test ./mappingservice-plugin/integrationtests/basic_crlf.hurl --verbose --file-root . + curl --location --remote-name https://github.com/Orange-OpenSource/hurl/releases/download/6.0.0/hurl_6.0.0_amd64.deb + sudo dpkg -i hurl_6.0.0_amd64.deb + sudo apt install -y dos2unix + unix2dos -n ./mappingservice-plugin/integrationtests/basic.hurl ./mappingservice-plugin/integrationtests/basic_crlf.hurl + hurl --variable host=http://localhost:8095 --test ./mappingservice-plugin/integrationtests/basic_crlf.hurl --verbose --file-root . env: VERSION_OVERRIDE_BY_BRANCH: ${{ steps.extract_branch.outputs.branch }} - - - name: Clean up temp files - run: rm -f record.json document.json - - name: Stop Docker Container - run: docker stop mapping4docker + run: docker stop mapping4docker \ No newline at end of file diff --git a/mapping_cli.py b/mapping_cli.py index e70d77d..126023c 100644 --- a/mapping_cli.py +++ b/mapping_cli.py @@ -7,9 +7,10 @@ import shutil from pathlib import Path -from src.IO.MappingAbortionError import MappingAbortionError +from mappingservice_plugincore.exceptions.MappingAbortionError import MappingAbortionError from src.IO.InputReader import InputReader as InputReader_apeHe from src.IO.OutputWriter import OutputWriter +from src.parser import ParserConfig # Make log level configurable from ENV, defaults to INFO level logging.basicConfig( @@ -28,6 +29,7 @@ def run_cli(): run_mapper(args) def run_mapper(args): + ParserConfig.register_parsers() INPUT_SOURCE = args.input MAP_SOURCE = args.map OUTPUT_PATH = args.output diff --git a/requirements.dist.txt b/requirements.dist.txt index 154da5a..7de64e9 100644 --- a/requirements.dist.txt +++ b/requirements.dist.txt @@ -9,4 +9,5 @@ magika >= 0.5.1 numpy pandas h5py -zipfile2 \ No newline at end of file +zipfile2 +mappingservice-plugincore @ git+https://github.com/GGoetzelmann/mappingservice_plugincore.git@development \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 4525ef2..5960560 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,4 @@ zipfile2 pytest >= 7.4 pytest-cov >= 6.0 pytest-mock >= 3.14.0 - +mappingservice-plugincore @ git+https://github.com/GGoetzelmann/mappingservice_plugincore.git@development diff --git a/somesy.toml b/somesy.toml index f93193e..7dc3887 100644 --- a/somesy.toml +++ b/somesy.toml @@ -1,6 +1,6 @@ [project] name = "APE-HE mapper" -version = "v1.0.0" +version = "separate_lib" description = "APE-HE Mapper is a tool designed for mapping APE-HE (Advanced Photoelectric Effect - High Energy) metadata to a uniform, schema-compliant json format." keywords = ["APE-HE", "neXus", "metadata", "extraction", "schema"] diff --git a/src/IO/InputReader.py b/src/IO/InputReader.py index 012b1c0..8c50f4d 100644 --- a/src/IO/InputReader.py +++ b/src/IO/InputReader.py @@ -2,8 +2,8 @@ import mimetypes import os -from src.IO.MappingAbortionError import MappingAbortionError -from src.parser.ParserFactory import ParserFactory +from mappingservice_plugincore.exceptions.MappingAbortionError import MappingAbortionError +from mappingservice_plugincore.parser.ParserFactory import ParserFactory from src.util import load_json, get_filetype_with_magica, robust_textfile_read diff --git a/src/Preprocessor.py b/src/Preprocessor.py index 9103de1..9a2125d 100644 --- a/src/Preprocessor.py +++ b/src/Preprocessor.py @@ -30,11 +30,52 @@ def get_expected_type(field_path): expected_types = { "entry.entry_identifier": "string_type", - "entry.instrument.monochromator.grating.period.value": "int_type", + "entry.title": "string_type", "entry.sample.gas_flux[*].value": "float_type" } - return expected_types.get(field_path, None) + # Check exact match first + exact_match = expected_types.get(field_path) + if exact_match: + return exact_match + + # Check if any expected pattern exists within the field_path + for pattern, expected_type in expected_types.items(): + if pattern in field_path: + return expected_type + + return None + + @staticmethod + def is_numeric_string(value): + """Check if a string represents a valid number""" + if not isinstance(value, str): + return False + + # Try to convert to float first + try: + float(value) + return True + except ValueError: + return False + + @staticmethod + def convert_numeric_string(value): + """Convert numeric string to int if possible, else float""" + if not Preprocessor.is_numeric_string(value): + return value + + try: + # Try int first (for whole numbers) + if '.' not in value and 'e' not in value.lower(): + return int(value) + else: + return float(value) + except ValueError: + try: + return float(value) + except ValueError: + return value @staticmethod def normalize_unit(input_value) -> str: @@ -88,7 +129,8 @@ def normalize_all_datetimes(input_dict): @staticmethod def normalize_all_numbers(input_dict): """ - In-place conversion of numeric strings into integers or floats, but checks if it's an appropriate field. + In-place conversion of numeric strings into integers or floats. + Converts all numeric strings automatically, with special handling for specific field types. :param input_dict: dictionary to convert numeric values in :return: None """ @@ -98,17 +140,22 @@ def normalize_all_numbers(input_dict): original_value = match.value current_field = str(match.full_path) expected_type = Preprocessor.get_expected_type(current_field) - #print("<<<<>>>> ",original_value) - # Handle type conversions if needed (e.g.: int_type, float_type) + # Handle type conversions for explicitly defined fields if isinstance(original_value, str): try: - if expected_type == "int_type": # Convert only if it's a valid integer-like string + if expected_type == "string_type": + continue # Keep as string, do not convert + elif expected_type == "int_type": # Convert only if it is a valid integer-like string converted_value = int(original_value) match.full_path.update(input_dict, converted_value) elif expected_type == "float_type": # Convert only if it's a valid float-like string converted_value = float(original_value) match.full_path.update(input_dict, converted_value) + else: # Auto-convert for other fields + converted_value = Preprocessor.convert_numeric_string(original_value) + if converted_value != original_value: + match.full_path.update(input_dict, converted_value) except ValueError: logging.warning(f"Error while trying to convert '{original_value}' into {expected_type} for field {current_field}") continue diff --git a/src/model/SchemaConcepts/APE_HE_Image.py b/src/model/SchemaConcepts/APE_HE_Image.py index 8de2385..00e5493 100644 --- a/src/model/SchemaConcepts/APE_HE_Image.py +++ b/src/model/SchemaConcepts/APE_HE_Image.py @@ -1,6 +1,6 @@ from pydantic import BaseModel -from src.model.SchemaConcepts.Schema_Concept import Schema_Concept +from mappingservice_plugincore.model.Schema_Concept import Schema_Concept from src.model.SchemaConcepts.codegen.SchemaClasses_APE_HE import Entry, ApeHe diff --git a/src/model/SchemaConcepts/codegen/__pycache__/SchemaClasses_APE_HE.cpython-312.pyc b/src/model/SchemaConcepts/codegen/__pycache__/SchemaClasses_APE_HE.cpython-312.pyc index 626a3e9..b5a7f1b 100644 Binary files a/src/model/SchemaConcepts/codegen/__pycache__/SchemaClasses_APE_HE.cpython-312.pyc and b/src/model/SchemaConcepts/codegen/__pycache__/SchemaClasses_APE_HE.cpython-312.pyc differ diff --git a/src/model/SchemaConcepts/codegen/__pycache__/__init__.cpython-312.pyc b/src/model/SchemaConcepts/codegen/__pycache__/__init__.cpython-312.pyc index 5fcde19..7c47164 100644 Binary files a/src/model/SchemaConcepts/codegen/__pycache__/__init__.cpython-312.pyc and b/src/model/SchemaConcepts/codegen/__pycache__/__init__.cpython-312.pyc differ diff --git a/src/model/SetupMD.py b/src/model/SetupMD.py new file mode 100644 index 0000000..a420511 --- /dev/null +++ b/src/model/SetupMD.py @@ -0,0 +1,9 @@ +from pydantic import BaseModel + +from mappingservice_plugincore.model.SetupMD import SetupMD as GenericSetupMD + +class SetupMD(GenericSetupMD): + """ + contains metadata derived from file(s) describing the experiment setup + """ + pass \ No newline at end of file diff --git a/src/parser/ParserConfig.py b/src/parser/ParserConfig.py new file mode 100644 index 0000000..90d62ab --- /dev/null +++ b/src/parser/ParserConfig.py @@ -0,0 +1,16 @@ +import enum +from typing import Type + +from mappingservice_plugincore.parser.ParserFactory import ParserFactory +from mappingservice_plugincore.parser.RunMD_Parser import RunMD_Parser +from src.parser.impl.NexusParser import NexusParser + + +available_img_parsers = { + "NexusParser": NexusParser +} + +def register_parsers(): + + for p_name, p_cls in available_img_parsers.items(): + ParserFactory.register_imgparser(p_name, p_cls) \ No newline at end of file diff --git a/src/parser/ParserFactory.py b/src/parser/ParserFactory.py deleted file mode 100644 index 124cf9f..0000000 --- a/src/parser/ParserFactory.py +++ /dev/null @@ -1,19 +0,0 @@ -import logging - -from src.parser.impl.NexusParser import NexusParser - - -class ParserFactory: - - available_img_parsers = { - "NexusParser": NexusParser - } - - @staticmethod - def create_img_parser(parser_name, **kwargs): - parser_class = ParserFactory.available_img_parsers.get(parser_name) - if parser_class: - return parser_class(**kwargs) - else: - logging.error("Parser not available: {}. Available parsers: {}".format(parser_name, list(ParserFactory.available_img_parsers.keys()))) - raise ValueError(f"Parser {parser_name} not found") \ No newline at end of file diff --git a/src/parser/impl/NexusParser.py b/src/parser/impl/NexusParser.py index e362e01..9352d75 100644 --- a/src/parser/impl/NexusParser.py +++ b/src/parser/impl/NexusParser.py @@ -3,13 +3,13 @@ from PIL import Image +from mappingservice_plugincore.exceptions.MappingAbortionError import MappingAbortionError from src.Preprocessor import Preprocessor from src.model.ImageMD import ImageMD -from src.parser.ImageParser import ImageParser +from mappingservice_plugincore.parser.ImageParser import ImageParser from src.parser.mapping_util import map_a_dict from src.resources.maps.mapping import nexusparser_apeHe from src.util import input_to_dict -import configparser @@ -39,11 +39,15 @@ def parse(self, file_path, mapping) -> tuple[ImageMD, str]: mapping_dict = mapping if mapping else self.internal_mapping image_md = map_a_dict(input_md, mapping_dict) + # Debug: Check values before preprocessing + #Preprocessor.normalize_all_datetimes(image_md) Preprocessor.normalize_all_numbers(image_md) Preprocessor.normalize_all_units(image_md) Preprocessor.normalize_gas_names(image_md) + # Debug: Check values after preprocessing + image_from_md = ImageMD(image_metadata=image_md, filePath="") return image_from_md, image_md @@ -68,3 +72,15 @@ def _read_input_file(self, file_path) -> Optional[dict]: output_dict.update(parsed_dict) return output_dict + + def _create_tomo_image(self, image_md, fp): + """ + Implementation of abstract method from ImageParser + For APE-HE data, we create a simple image format + """ + image_md_format = { + "image_metadata": image_md, + "filePath": fp + } + + return ImageMD(image_metadata=image_md, filePath=fp) diff --git a/src/parser/mapping_util.py b/src/parser/mapping_util.py index e5e6a12..ef72abb 100644 --- a/src/parser/mapping_util.py +++ b/src/parser/mapping_util.py @@ -3,8 +3,7 @@ import typing import numpy as np from jsonpath_ng.ext.parser import ExtentedJsonPathParser -from src.IO.MappingAbortionError import MappingAbortionError -import re +from mappingservice_plugincore.exceptions.MappingAbortionError import MappingAbortionError parser = ExtentedJsonPathParser() diff --git a/tests/.DS_Store b/tests/.DS_Store index 68c253f..aa0c715 100644 Binary files a/tests/.DS_Store and b/tests/.DS_Store differ diff --git a/tests/io_tests/test_inputreader_apeHe.py b/tests/io_tests/test_inputreader_apeHe.py index 45e7c25..8632b31 100644 --- a/tests/io_tests/test_inputreader_apeHe.py +++ b/tests/io_tests/test_inputreader_apeHe.py @@ -1,7 +1,9 @@ import os import pytest +from src.IO.MappingAbortionError import MappingAbortionError from src.IO.InputReader import InputReader +from src.parser import ParserConfig from src.parser.impl.NexusParser import NexusParser @@ -14,6 +16,7 @@ def set_up_sample_data(self): def test_get_applicable_nexusparser(self): tp = self.set_up_sample_data() test_file = os.path.join(tp, "IV_CURVE.nxs") + ParserConfig.register_parsers() parsers = InputReader.get_applicable_parsers(test_file) assert len(parsers) >= 1 @@ -21,6 +24,7 @@ def test_get_applicable_nexusparser(self): def test_get_applicable_parsers_with_extension(self, mocker): tp = self.set_up_sample_data() nxs_file = os.path.join(tp, "2D_MAP_CELL.nxs") + ParserConfig.register_parsers() # Patch the expected_input_format method to return the correct list #mocker.patch('src.parser.impl.NexusParser.NexusParser.expected_input_format', self.return_plaintext_format()) @@ -34,6 +38,7 @@ def test_get_applicable_parsers_with_extension(self, mocker): def test_get_applicable_parsers_wo_extension(self, mocker): tp = self.set_up_sample_data() nxs_file = os.path.join(tp, "2D_MAP_CELL") + ParserConfig.register_parsers() mocker.patch.object(NexusParser, 'expected_input_format', return_value=["application/octet-stream", "application/x-hdf5"]) diff --git a/tests/parser_tests/__pycache__/__init__.cpython-312.pyc b/tests/parser_tests/__pycache__/__init__.cpython-312.pyc index 502d781..1d009c1 100644 Binary files a/tests/parser_tests/__pycache__/__init__.cpython-312.pyc and b/tests/parser_tests/__pycache__/__init__.cpython-312.pyc differ diff --git a/tests/parser_tests/__pycache__/test_mappingutil.cpython-312-pytest-7.4.4.pyc b/tests/parser_tests/__pycache__/test_mappingutil.cpython-312-pytest-7.4.4.pyc index dc863c2..e01699f 100644 Binary files a/tests/parser_tests/__pycache__/test_mappingutil.cpython-312-pytest-7.4.4.pyc and b/tests/parser_tests/__pycache__/test_mappingutil.cpython-312-pytest-7.4.4.pyc differ diff --git a/tests/parser_tests/__pycache__/test_preprocessor.cpython-312-pytest-7.4.4.pyc b/tests/parser_tests/__pycache__/test_preprocessor.cpython-312-pytest-7.4.4.pyc index 5e2a9b8..16c8e7a 100644 Binary files a/tests/parser_tests/__pycache__/test_preprocessor.cpython-312-pytest-7.4.4.pyc and b/tests/parser_tests/__pycache__/test_preprocessor.cpython-312-pytest-7.4.4.pyc differ