diff --git a/zml2lido/data/xsl/zml2lido/classificationWrap.xsl b/zml2lido/data/xsl/zml2lido/classificationWrap.xsl index afc74c8..b4e0cdb 100644 --- a/zml2lido/data/xsl/zml2lido/classificationWrap.xsl +++ b/zml2lido/data/xsl/zml2lido/classificationWrap.xsl @@ -202,21 +202,24 @@ --> - - - + - + + --> + + + + + + + + + + + + + http://terminology.lido-schema.org/lido01146 + Zuordnung zu einem kuratierten Bestand + + + + + + + + + + + + diff --git a/zml2lido/data/xsl/zml2lido/eventWrap.xsl b/zml2lido/data/xsl/zml2lido/eventWrap.xsl index 0dacfaf..fe3ea0e 100644 --- a/zml2lido/data/xsl/zml2lido/eventWrap.xsl +++ b/zml2lido/data/xsl/zml2lido/eventWrap.xsl @@ -22,6 +22,7 @@ + diff --git a/zml2lido/file.py b/zml2lido/file.py index 9c5b71a..35d6dc0 100644 --- a/zml2lido/file.py +++ b/zml2lido/file.py @@ -3,12 +3,13 @@ We're currently only zipping zml files, not lido files automatically -unpacked_path = unzip(Path("group1234-chunk1.zip") +unpacked_path = unzip(Path("group1234-chunk1.zip") for chunk in per_chunk(chunk_path): - do_something_with(chunk) + do_something_with(chunk) """ + from zipfile import ZipFile from pathlib import Path import re diff --git a/zml2lido/lidoTool.py b/zml2lido/lidoTool.py index d0ade65..1ff74ba 100644 --- a/zml2lido/lidoTool.py +++ b/zml2lido/lidoTool.py @@ -1,34 +1,34 @@ """ - Little script that converts native Zetcom xml to lido - - NEW - This is a new version that does not require java subprocess anymore - and uses saxon in c (saxonche) directory (installed with pip). - - This version also does no longer need to be executed from script - directory like C:/m3/zml2lido anymore. - - lido Command Line Tool - You need to specify three parameters - -j/--job: which flavor (job) of the transformation you want to use - -i/--src: where the src xml file is - -o/--output: will be used as output directory; in my case - C:/m3/zml2lido/sdata/{output} - - cd C:/m3/zml2lido - lido -j smb -i c:/m3/MpApi/sdata/3Wege/3Wege20210904.xml -o 3Wege - # writes lido to file C:/m3/zml2lido/sdata/3Wege/20210904.lido.xml - - Flavors (aka jobs): - FvH wants links in the Internet instead of image files, but we still give - images to the rst project. So we have different flavors or chains of steps - for these purposes. See jobs.py for details - - smb: for FvH - - dd: for debug - - localLido: for rst - - In an old version it did also image processing, but that function is - currently not used/tested. + Little script that converts native Zetcom xml to lido + +NEW +This is a new version that does not require java subprocess anymore +and uses saxon in c (saxonche) directory (installed with pip). + +This version also does no longer need to be executed from script +directory like C:/m3/zml2lido anymore. + +lido Command Line Tool +You need to specify three parameters + -j/--job: which flavor (job) of the transformation you want to use + -i/--src: where the src xml file is + -o/--output: will be used as output directory; in my case + C:/m3/zml2lido/sdata/{output} + + cd C:/m3/zml2lido + lido -j smb -i c:/m3/MpApi/sdata/3Wege/3Wege20210904.xml -o 3Wege + # writes lido to file C:/m3/zml2lido/sdata/3Wege/20210904.lido.xml + + Flavors (aka jobs): +FvH wants links in the Internet instead of image files, but we still give +images to the rst project. So we have different flavors or chains of steps +for these purposes. See jobs.py for details +- smb: for FvH +- dd: for debug +- localLido: for rst + +In an old version it did also image processing, but that function is +currently not used/tested. """ from lxml import etree @@ -76,6 +76,7 @@ def __init__( self.src = self._sanitize(src=src) self.outdir = self._prepareOutdir() + print(f" outdir {self.outdir}") self._initLog() # @@ -412,7 +413,6 @@ def _prepareOutdir(self) -> Path: if not outdir.exists(): print(f"Making new dir {outdir}") outdir.mkdir(parents=True, exist_ok=False) - # print(f" outdir {outdir}") return outdir def _sanitize(self, *, src: str | Path) -> Path: diff --git a/zml2lido/linkChecker.py b/zml2lido/linkChecker.py index 0aad69c..9693b11 100644 --- a/zml2lido/linkChecker.py +++ b/zml2lido/linkChecker.py @@ -1,20 +1,20 @@ """ - parse a LIDO file and work on linkResources that don't start with http - for those guess the URL based on heuristics indicated by the examples path below - write result to lido file in same dir as src - src and output are lido - - This step produces lvl2 lido. - - USAGE: - lc = LinkChecker(src="path/to/file.lido.xml") - - lc.fixRelatedWorks() # removes dead links in relatedWorks, also adds ISIL - lc.linkResource_online_http() # for all linkResources print online status - lc.rmInternalLinks() # remove linkResource with internal links, not used atm - lc.rmUnpublishedRecords() # removes objects without objectPublishedID - - lc.save(out_fn="path/to/lido.lvl2.xml") +parse a LIDO file and work on linkResources that don't start with http +for those guess the URL based on heuristics indicated by the examples path below +write result to lido file in same dir as src +src and output are lido + +This step produces lvl2 lido. + +USAGE: +lc = LinkChecker(src="path/to/file.lido.xml") + +lc.fixRelatedWorks() # removes dead links in relatedWorks, also adds ISIL +lc.linkResource_online_http() # for all linkResources print online status +lc.rmInternalLinks() # remove linkResource with internal links, not used atm +lc.rmUnpublishedRecords() # removes objects without objectPublishedID + +lc.save(out_fn="path/to/lido.lvl2.xml") """ diff --git a/zml2lido/qc.py b/zml2lido/qc.py index caa34bc..9d8ca39 100644 --- a/zml2lido/qc.py +++ b/zml2lido/qc.py @@ -1,6 +1,6 @@ """ A command line tool that reads in LIDO files and executes checks. Eventually, we want to -become modular, so that we can de/activate tests. Perhaps using a config file. That +become modular, so that we can de/activate tests. Perhaps using a config file. That would be a toml file. We already have a validation tool, so we don't need to start with validation diff --git a/zml2lido/relWorksCache.py b/zml2lido/relWorksCache.py index 7429eae..219ecbe 100644 --- a/zml2lido/relWorksCache.py +++ b/zml2lido/relWorksCache.py @@ -4,14 +4,14 @@ Most importantly, we want to know whether relWorks are online or not. So we query RIA and save the information (SMB-Freigabe) in a Module object and potentially the disk. - rw = relWorks(maxSize=20_000) # + rw = relWorks(maxSize=20_000) # rw.load_cache_file(path=Path("cache.xml")) # load cache file or nothing - rw.lookup_relWork(mtype, ID)? # lookup a single item in RIA and add it to cache. + rw.lookup_relWork(mtype, ID)? # lookup a single item in RIA and add it to cache. rw.lookup_from_lido_file(path=path) # grow cache by new items from a single file rw.lookup_from_lido_chunks(path=path) # grow cache by new items from a single file - rw.item_exists(mtype="Object", ID=1234) # true if item exists in cache + rw.item_exists(mtype="Object", ID=1234) # true if item exists in cache rw.item_is_online(mtype="Object", ID=1234) # true if item in cache indicates it's online rw.save() # save in-memory cache to disk @@ -23,6 +23,7 @@ """ + from lxml import etree from mpapi.constants import get_credentials from mpapi.client import MpApi diff --git a/zml2lido/subjectChecker.py b/zml2lido/subjectChecker.py index 05b53cc..7415b76 100644 --- a/zml2lido/subjectChecker.py +++ b/zml2lido/subjectChecker.py @@ -4,16 +4,16 @@ We walk thru a set of chunk files in generic ZML For each chunk - lookup subjects and see if they are vocmap.xml - if not write that term into a Excel file - making a new file if no excel exists and appending an existing one + lookup subjects and see if they are vocmap.xml + if not write that term into a Excel file + making a new file if no excel exists and appending an existing one NOTES QUESTIONS -1. -Looping thru chunk files is something I tend to need from multiple scrits. Where should that +1. +Looping thru chunk files is something I tend to need from multiple scrits. Where should that code live? 2. I have similar functionality in the xslt; now I re-implement that in the Python. diff --git a/zml2lido/vocmap.py b/zml2lido/vocmap.py index dc54184..d6af593 100644 --- a/zml2lido/vocmap.py +++ b/zml2lido/vocmap.py @@ -6,6 +6,7 @@ vocmap -i in.xml -o out.xslx """ + from lxml import etree from openpyxl import Workbook from openpyxl.styles import Font