Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
67a173f
debugging tescan sem
gabinoumbe Feb 27, 2025
823aade
debugging tescan sem 2
gabinoumbe Feb 27, 2025
ee067bc
debugging tescan sem 3
gabinoumbe Feb 27, 2025
e127788
Merge branch 'development' into wip_semmapping
GGoetzelmann Feb 28, 2025
deb418d
Revoking change to tf parser map
GGoetzelmann Feb 28, 2025
7d52638
Moving SEM_tescan map to resources and make available as textparser_s…
GGoetzelmann Feb 28, 2025
60dd491
Adapting HdrParser to map rework
GGoetzelmann Feb 28, 2025
cee1d43
Tescan tomography mapping joinked from csv to json
GGoetzelmann Feb 28, 2025
fa9b8e3
TomographyProjectParser adapted to map rework
GGoetzelmann Feb 28, 2025
48740de
Date preprocessing update
GGoetzelmann Feb 28, 2025
94529e9
extension to tescan tomo mapping
gabinoumbe Mar 4, 2025
5bd63a6
extension to jeol sem mapping
gabinoumbe Mar 5, 2025
084ffc3
extension to jeol sem mapping
gabinoumbe Mar 5, 2025
e33ccd2
add SEM_jeol.json file
gabinoumbe Mar 5, 2025
e584c20
testing and updating date preprocessing
Mar 6, 2025
188ee22
join TxtParser and HdrParser, now all text inputs should be parsed wi…
Mar 6, 2025
472e123
Merge branch 'development' into wip_semmapping
Mar 6, 2025
a9247b4
Update README.md
gabinoumbe Mar 7, 2025
03834c2
development of tescan sem and tomo mapping
gabinoumbe Mar 10, 2025
fc4842f
determination of applicable parser now by default does not use guessi…
GGoetzelmann Mar 14, 2025
2784225
Basic testing for txtparser added.
GGoetzelmann Mar 14, 2025
43d05a1
updating map files
gabinoumbe Mar 14, 2025
33c6410
print acquisition datasets
gabinoumbe Mar 17, 2025
d18befa
update tescan map files
gabinoumbe Mar 18, 2025
0a89d93
resolve merge conflict on readme
gabinoumbe Mar 20, 2025
44f4cba
Merge remote-tracking branch 'origin/wip_semmapping' into wip_semmapp…
gabinoumbe Mar 20, 2025
fd7243d
JEOL Test removed from tescan specific feature branch
GGoetzelmann Mar 21, 2025
42ee240
fix for #37
GGoetzelmann Apr 10, 2025
7f6c803
Merge branch 'development' into wip_semmapping_tescan
gabinoumbe Jul 11, 2025
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,4 +9,4 @@ magika >= 0.5.1
pytest >= 7.4
pytest-cov >= 6.0
pytest-mock >= 3.14.0
python-dateutil
python-dateutil
15 changes: 11 additions & 4 deletions src/IO/sem/InputReader.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,18 +32,25 @@ def __init__(self, map_path, input_path):


@staticmethod
def get_applicable_parsers(input_path):
def get_applicable_parsers(input_path, by_extension = False):
"""
Filters the available image parsers to those applicable to the input file format.
It tries to determine by extension, but can fallback to using magica.
:param by_extension: set to True if guessing by extension should be used.
:param input_path: file path to input
:return: list of parser names that can handle the provided input format
"""
mt, _ = mimetypes.guess_type(input_path)
if not mt or mt == "application/unknown": #fallback, especially if file extension is not available
applicable_types = [ip.expected_input_format() for ip in ParserFactory.available_img_parsers.values()]

mt = None
if by_extension:
mt, _ = mimetypes.guess_type(input_path)
logging.debug("Mimetypes file identification result: {}".format(mt))
if not mt or mt == "application/unknown" or mt not in applicable_types: #fallback, especially if file extension is not available
#Text files are tricky with magica, so try to read as such first
mt = get_filetype_with_magica(input_path)
if mt != "text/plain" and "image/" not in mt:
logging.debug("Magika file identification result: {}".format(mt))
if mt not in applicable_types:
try:
robust_textfile_read(input_path)
mt = "text/plain"
Expand Down
3 changes: 3 additions & 0 deletions src/Preprocessor.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,9 @@ def normalize_datetime(input_value) -> str:
if not input_value.get("Date") and input_value.get("Time"):
logging.warning("Encountered complex date field, but cannot interpret it")
return input_value
if input_value.get("Date") and not input_value.get("Time"):
input_value["Time"] = "00:00:00"
logging.info("Input with date information but no time information found. Setting time to 00:00:00")
input_value = input_value.get("Date") + " " + input_value.get("Time")
output_value = parse_datetime(input_value)
if type(output_value) == datetime:
Expand Down
4 changes: 3 additions & 1 deletion src/model/SchemaConcepts/codegen/SchemaClasses_TOMO.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,7 +75,9 @@ class DatasetType(Enum):
FIB_Multi_Detector_Image_Q2 = 'FIB Multi-Detector Image-Q2'
SEM_Multi_Detector_Image_Q1 = 'SEM Multi-Detector Image-Q1'
SEM_Multi_Detector_Image_Q2 = 'SEM Multi-Detector Image-Q2'

LE_BSE = 'LE BSE'
In_Beam_f_BSE = 'In-Beam f-BSE'
SE = 'SE'

class Role(Enum):
Data_Curator = 'Data Curator'
Expand Down
11 changes: 8 additions & 3 deletions src/parser/ParserFactory.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,23 +3,28 @@
from src.parser.impl.Atlas3dParser import Atlas3dParser
from src.parser.impl.EMProjectParser import EMProjectParser
from src.parser.impl.ProjectDataParser import ProjectDataParser
from src.parser.impl.TomographyProjectParser import TomographyProjectParser
from src.parser.impl.TiffParser import TiffParser
from src.parser.impl.TxtParser import TxtParser


class ParserFactory:

available_setupmd_parsers = {
"EMProjectParser": EMProjectParser,
"Atlas3DParser": Atlas3dParser
"Atlas3DParser": Atlas3dParser,
"TomographyProjectParser": TomographyProjectParser
}

available_runmd_parsers = {
"ProjectDataParser": ProjectDataParser,
"Atlas3DParser": Atlas3dParser
"Atlas3DParser": Atlas3dParser,
"TomographyProjectParser": TomographyProjectParser
}

available_img_parsers = {
"TiffParser": TiffParser
"TiffParser": TiffParser,
"TxtParser": TxtParser
}

@staticmethod
Expand Down
3 changes: 3 additions & 0 deletions src/parser/impl/Atlas3dParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,9 @@ def parse_run(self, payload) -> tuple[RunMD, str]:

for imgmd in resultMD["Image"]:
image_fields = list(imgmd.keys())
#print("===image_fields===>",image_fields)
matchingFilenames = [elem for elem in image_fields if re.match(pattern_to_DatasetType, elem)]
#print("==matchingFilenames==>",matchingFilenames)
if len(matchingFilenames) != 0:
for field in matchingFilenames:

Expand All @@ -59,6 +61,7 @@ def parse_setup(self, payload) -> tuple[SetupMD, dict]:

# Ensure datasets is always a list
datasets = self._create_datasets(ac_md)
print(datasets)

if not datasets:
return acquisition, parsed
Expand Down
7 changes: 1 addition & 6 deletions src/parser/impl/TiffParser.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,12 +40,7 @@ def __init__(self, mode, tagID=None):
logging.error("Internal mapping for tag '{}' is not available".format(self.tagID))
raise MappingAbortionError("Setting up image parser failed.")
m = self.available_tomo_mappings[self.tagID]
if mode == ParserMode.SEM:
try:
m = self.available_sem_mappings[self.tagID]
except KeyError:
pass
self.internal_mapping = input_to_dict(m.read_text())
self.internal_mapping = input_to_dict(m.read_text())
super().__init__(mode)

@staticmethod
Expand Down
94 changes: 94 additions & 0 deletions src/parser/impl/TomographyProjectParser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
from typing import List

from src.model.SchemaConcepts.Acquisition_simplified import Acquisition
from src.model.SchemaConcepts.Dataset_simplified import Dataset
from src.model.SetupMD import SetupMD
from src.parser.SetupMD_Parser import SetupMD_Parser
from src.parser.mapping_util import map_a_dict
from src.resources.maps.mapping import setup_tescan
from src.util import input_to_dict
from src.model.SchemaConcepts.codegen.SchemaClasses_TOMO import DatasetType
from src.model.SchemaConcepts.TOMO_Image import TOMO_Image
from src.model.RunMD import RunMD
from src.util import normalize_path


class TomographyProjectParser(SetupMD_Parser):

@staticmethod
def supported_input_sources() -> List[str]:
return ['Tescan Solaris']

def __init__(self):
self.internal_mapping = input_to_dict(setup_tescan.read_text())

def parse_run(self, payload) -> tuple[RunMD, str]:
parsed = self._read_input(payload)

resultMD = parsed["TomographyProject"]["AcquisitionsHistory"]
resultMD = resultMD["Acquisition"]["Datasets"]
resultMD = resultMD["Dataset"]["Detectors"]

runMD = RunMD()

for imgmd in resultMD["Data"]:
detector = imgmd["@detector"]
print("===detector---> ", detector)
print(imgmd['@path'])
if detector in DatasetType:
print("=== ", f"{detector}/{imgmd['@path'].split('/')[-1].replace('.png', '-png.hdr')}")
fp = normalize_path(f"{detector}/{imgmd['@path'].split('/')[-1].replace('.png', '-png.hdr')}")
img = TOMO_Image(localPath=fp)
runMD.add_image(img, DatasetType(detector))
#print("===fp---> ", fp)

return runMD, parsed

def parse_setup(self, payload) -> tuple[SetupMD, dict]:
parsed = self._read_input(payload)

mapping_dict = self.internal_mapping
ac_md = map_a_dict(parsed, mapping_dict)
acquisition = self._create_acquisition(ac_md)
datasets = self._create_datasets(ac_md)
if not datasets:
return acquisition, parsed

if len(datasets) == 1:
acquisition.dataset_template = datasets[0]
else:
acquisition.datasets = datasets
return SetupMD(acquisition_metadata=acquisition), parsed

def _create_acquisition(self, ac_md) -> Acquisition:

ac_md_format = {
"genericMetadata": ac_md["genericMetadata"]
}

acquisition = Acquisition(**ac_md_format)
#datasets = self._create_datasets(metadata_dict)
#acquisition.datasets = datasets
return acquisition

def _create_datasets(self, ac_md) -> list:
datasets = []
for ds in ac_md["dataset"]:
datasets.append(self._create_dataset(ds))
return datasets

def _create_dataset(self, ds_dict) -> Dataset:
ds = Dataset(**ds_dict)
return ds


@staticmethod
def retrievable_datasets():
return True

@staticmethod
def expected_input_format():
return "xml"



89 changes: 89 additions & 0 deletions src/parser/impl/TxtParser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import logging
from typing import Optional

from PIL import Image

from src.Preprocessor import Preprocessor
from src.model.ImageMD import ImageMD
from src.parser.ImageParser import ImageParser, ParserMode
from src.parser.mapping_util import map_a_dict
from src.resources.maps.mapping import textparser_tomo_tescan
from src.util import input_to_dict
import configparser



#TODO: would this have any benefit from replacing with tifffile lib?

class TxtParser(ImageParser):

internal_mapping = None
def __init__(self, mode):
if mode == ParserMode.TOMO:
m1 = input_to_dict(textparser_tomo_tescan.read_text())
self.internal_mapping = m1
super().__init__(mode)

@staticmethod
def expected_input_format():
return "text/plain"

def parse(self, file_path, mapping) -> tuple[ImageMD, str]:
input_md = self._read_input_file(file_path)
if not input_md:
logging.warning("No metadata extractable from {}".format(file_path))
return None, None

if not mapping and not self.internal_mapping:
logging.error("No mapping provided for image parsing. Aborting")
exit(1)
mapping_dict = mapping if mapping else self.internal_mapping
image_md = map_a_dict(input_md, mapping_dict)
#print("image_md: ", image_md)

Preprocessor.normalize_all_units(image_md)
Preprocessor.normalize_all_datetimes(image_md)

if self.mode == ParserMode.TOMO:
image_from_md = self._create_tomo_image(image_md, file_path)
else:
image_from_md = ImageMD(image_metadata=image_md, filePath="")

#print("image_from_md: ", image_from_md)
return image_from_md, image_md

def _create_tomo_image(self, image_md, fp) -> ImageMD:

image_md_format = {
"acquisition_info": image_md["acquisition"],
"dataset_metadata": image_md["acquisition"]["dataset"],
"image_metadata": image_md["acquisition"]["dataset"]["images"],
"filePath": fp
}

image_md_format["dataset_metadata"].pop("images")
if image_md_format.get("image_metadata"):
image_md_format["image_metadata"]["localPath"] = fp

return ImageMD(**image_md_format)

def _read_input_file(self, file_path) -> Optional[dict]:
"""
Reading input may be done with a predefined tag or without. In the latter case we try to extract from all tags and use the joint dictionary for mapping.
:param file_path: image file path
:param tagID: tag to extract from, may be None
:return: data from extracted tag(s) as dict
"""
#print(f"I am trying to read a {file_path}")

config = configparser.ConfigParser(allow_no_value=True, delimiters=(" "))
config.optionxform = str

# Read the .txt file
with open(file_path, "r", encoding="utf-8") as file:
md = file.read()

output_dict = {}
output_dict.update(input_to_dict(md))

return output_dict
29 changes: 29 additions & 0 deletions src/resources/maps/acquisition_map.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
EMProject,Atlas3d,TomographyProject,TOMO_Schema
EMProject.ApplicationName,ATLAS3D-Job.ATLAS3D-Setup.Name,,genericMetadata.program.programName
EMProject.ApplicationVersion,ATLAS3D-Job.@version,,genericMetadata.program.programVersion
EMProject.ApplicationId,ATLAS3D-Job.ATLAS3D-Setup.ID,TomographyProject.Project.@sampleId,genericMetadata.applicationID.identifierValue
EMProject.FileVersion,,,genericMetadata.fileVersion
EMProject.ProjectName,ATLAS3D-Job.ATLAS3D-Setup.JobName,,genericMetadata.projectName
,ATLAS3D-Job.ATLAS3D-Setup.JobDescription,TomographyProject.Project.@description,genericMetadata.projectDescription
,ATLAS3D-Job.ATLAS3D-Setup.Description,,genericMetadata.userDescription
EMProject.ZCutSpacing,ATLAS3D-Job.ATLAS3D-Setup.Settings.Imaging.Interval,TomographyProject.Slicing.@thickness,genericMetadata.zCutSpacing.value
EMProject.Datasets.Dataset[*].NumberOfCuts,,TomographyProject.AcquisitionsHistory.Acquisition.@totalSlices,genericMetadata.numberOfCuts
EMProject.Datasets.Dataset[*].Rows,,,dataset[*].rows
EMProject.Datasets.Dataset[*].Columns,,,dataset[*].columns
EMProject.Datasets.Dataset[*].LiveAcquisition.TileColumn,,,dataset[*].tileColumn
EMProject.Datasets.Dataset[*].LiveAcquisition.TileRow,,,dataset[*].tileRow
EMProject.Datasets.Dataset[*].Name,,,dataset[*].datasetType
,ATLAS3D-Job.ATLAS3D-Setup.SEM_System_State.AccV,,dataset[*].instrument.eBeam.accelerationVoltage.value
,ATLAS3D-Job.ATLAS3D-Setup.SEM_System_State.ProbeI,TomographyProject.Acquisition.Positions.Position.Datasets.Dataset.ImageSettings.@beam_current,dataset[*].instrument.eBeam.beamCurrent.value
,ATLAS3D-Job.ATLAS3D-Setup.Settings.ImagingMode,TomographyProject.Acquisition.Positions.Position.Datasets.Dataset.ImageSettings.@scanMode,dataset[*].instrument.eBeam.imageMode
,ATLAS3D-Job.ATLAS3D-Setup.SEM_System_State.ApName,,dataset[*].instrument.eBeam.apertureSetting.size
,ATLAS3D-Job.ATLAS3D-Setup.Settings.Options.TiltCorrectionEnabled,,dataset[*].instrument.eBeam.tiltCorrectionIsOn
,ATLAS3D-Job.ATLAS3D-Setup.Settings.Options.DynamicFocusEnabled,,dataset[*].instrument.eBeam.dynamicFocusIsOn
,ATLAS3D-Job.ATLAS3D-Setup.Settings.NotchTracking.Ti,,dataset[*].instrument.iBeam.millingCurrent.value
,ATLAS3D-Job.ATLAS3D-Setup.FIB_System_State.AccV,,dataset[*].instrument.iBeam.accelerationVoltage.value
,ATLAS3D-Job.ATLAS3D-Setup.Settings.NotchTracking.Dwell,TomographyProject.Acquisition.Positions.Position.Datasets.Dataset.@dwelltime,dataset[*].instrument.scan.dwellTime.value
,ATLAS3D-Job.ATLAS3D-Setup.Settings.Imaging.FibicsRasterInfo.PixelSizeX,TomographyProject.Acquisition.Positions.Position.Datasets.Dataset.@pxlsize,dataset[*].instrument.scan.pixelWidth.value
,ATLAS3D-Job.ATLAS3D-Setup.Settings.Imaging.FibicsRasterInfo.PixelSizeY,TomographyProject.Acquisition.Positions.Position.Datasets.Dataset.@pxlsize,dataset[*].instrument.scan.pixelHeight.value
,ATLAS3D-Job.ATLAS3D-Setup.Settings.Imaging.FibicsRasterInfo.LineAveraging,,dataset[*].instrument.scan.eScan.lineAveraging
,ATLAS3D-Job.ATLAS3D-Setup.Settings.Imaging.FibicsRasterInfo.Width,,dataset[*].instrument.imaging.numberOfPixels.xPixels
,ATLAS3D-Job.ATLAS3D-Setup.Settings.Imaging.FibicsRasterInfo.Height,,dataset[*].instrument.imaging.numberOfPixels.yPixels
Loading
Loading