ruff format and xslt: Zuordnung zu kuratierter Sammlung

mokko · Jun 1, 2024 · 33a48ae · 33a48ae
1 parent c8fbfa0
commit 33a48ae
Show file tree

Hide file tree

Showing 10 changed files with 109 additions and 63 deletions.
diff --git a/zml2lido/data/xsl/zml2lido/classificationWrap.xsl b/zml2lido/data/xsl/zml2lido/classificationWrap.xsl
@@ -202,21 +202,24 @@
 	-->
 	<xsl:template name="bereich3">
 		<xsl:variable name="bereich" select="z:vocabularyReference[@name = 'ObjOrgGroupVoc']/z:vocabularyReferenceItem/z:formattedValue"/>
-		<xsl:variable name="bereichControl" select="func:vocmap-control('Bereich',$bereich)"/>
 		<xsl:variable name="aaturi" select="func:vocmap-replace-laxer('Bereich',$bereich, 'aaturi')"/>
 		<xsl:variable name="aatlabel" select="func:vocmap-replace-laxer('Bereich',$bereich, 'aatlabel')"/>
-		<xsl:if test="$bereichControl ne ''">		
-			<!--xsl:message>
+		<!--
+		1.6.2024 Sammlung soll nicht classification sein.
+		<xsl:variable name="sammlung" select="func:vocmap-control('Bereich',$bereich)"/>
+		<xsl:if test="$sammlung ne ''">		
+			xsl:message>
 				<xsl:text>classification from Bereich </xsl:text>
 				<xsl:value-of select="@id"/>
-			</xsl:message-->
+			</xsl:message
 			<lido:classification>
 				<lido:conceptID lido:encodinganalog="RIA:Bereich" lido:source="ObjOrgGroupVoc" lido:type="local"/>
 				<lido:term xml:lang="de">
-					<xsl:value-of select="$bereichControl"/>
+					<xsl:value-of select="$sammlung"/>
 				</lido:term>
 			</lido:classification>
 		</xsl:if>
+		-->
 		<xsl:if test="$aaturi ne ''">		
 			<lido:classification>
 				<lido:conceptID lido:encodinganalog="RIA:SystematikArt(ObjSystematicClb)" 

diff --git a/zml2lido/data/xsl/zml2lido/event-Zuordnung.xsl b/zml2lido/data/xsl/zml2lido/event-Zuordnung.xsl
@@ -0,0 +1,38 @@
+<xsl:stylesheet version="2.0"
+	xmlns:func="http://func"
+    xmlns:lido="http://www.lido-schema.org"
+    xmlns:xsl="http://www.w3.org/1999/XSL/Transform"
+    xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+    xmlns:z="http://www.zetcom.com/ria/ws/module"
+    exclude-result-prefixes="z func"
+    xsi:schemaLocation="http://www.lido-schema.org http://www.lido-schema.org/schema/v1.0/lido-v1.0.xsd">
+
+    <xsl:output method="xml" version="1.0" encoding="UTF-8" indent="yes" />
+    <xsl:strip-space elements="*" />
+
+    <!-- 
+		Zuordnung zu einem kuratierten Bestand
+    -->
+    <xsl:template name="Zuordnung">
+		<xsl:variable name="bereich" select="z:vocabularyReference[@name = 'ObjOrgGroupVoc']/z:vocabularyReferenceItem/z:formattedValue"/>
+		<xsl:variable name="sammlung" select="func:vocmap-control('Bereich',$bereich)"/>
+
+		<lido:eventSet>
+			<lido:event>
+				<lido:eventType>
+					<lido:conceptID lido:source="LIDO-Terminologie"
+						lido:type="http://terminology.lido-schema.org/lido00099">http://terminology.lido-schema.org/lido01146</lido:conceptID>
+					<lido:term lido:label="Assignment to a curated holding"
+						lido:addedSearchTerm="no">Zuordnung zu einem kuratierten Bestand</lido:term>
+				</lido:eventType>
+				<lido:thingPresent>
+					<lido:displayObject>
+						<xsl:value-of select="$sammlung"/>
+					</lido:displayObject>
+				</lido:thingPresent>
+			</lido:event>
+		</lido:eventSet>
+    </xsl:template>
+</xsl:stylesheet>
+
+
diff --git a/zml2lido/data/xsl/zml2lido/eventWrap.xsl b/zml2lido/data/xsl/zml2lido/eventWrap.xsl
@@ -22,6 +22,7 @@
     <xsl:import href="event-Sammeln.xsl" />
     <xsl:import href="event-Veröffentlichung.xsl" />
     <xsl:import href="event-unknown.xsl" />
+    <xsl:import href="event-Zuordnung.xsl" />
 
 	<!-- 
 		http://terminology-view.lido-schema.org/vocnet/?startNode=lido00409&lang=en&uriVocItem=http://terminology.lido-schema.org/lido00228
@@ -49,6 +50,7 @@
             <xsl:call-template name="Herstellung"/>			
             <xsl:call-template name="Sammeln"/>
             <xsl:call-template name="Veröffentlichung"/>			
+            <xsl:call-template name="Zuordnung"/>			
             <!--xsl:call-template name="unknown"/ doesn't work yet-->			
         </lido:eventWrap>
     </xsl:template>

diff --git a/zml2lido/file.py b/zml2lido/file.py
@@ -3,12 +3,13 @@
 
 We're currently only zipping zml files, not lido files automatically
 
-unpacked_path = unzip(Path("group1234-chunk1.zip") 
+unpacked_path = unzip(Path("group1234-chunk1.zip")
 
 for chunk in per_chunk(chunk_path):
-	do_something_with(chunk)
+        do_something_with(chunk)
 
 """
+
 from zipfile import ZipFile
 from pathlib import Path
 import re

diff --git a/zml2lido/lidoTool.py b/zml2lido/lidoTool.py
@@ -1,34 +1,34 @@
 """
-	Little script that converts native Zetcom xml to lido
-	
-    NEW
-    This is a new version that does not require java subprocess anymore
-    and uses saxon in c (saxonche) directory (installed with pip).
-
-    This version also does no longer need to be executed from script 
-    directory like C:/m3/zml2lido anymore.
-    
-    lido Command Line Tool
-    You need to specify three parameters 
-        -j/--job: which flavor (job) of the transformation you want to use 
-        -i/--src: where the src xml file is
-        -o/--output: will be used as output directory; in my case 
-            C:/m3/zml2lido/sdata/{output}
-
-        cd C:/m3/zml2lido 
-        lido -j smb -i c:/m3/MpApi/sdata/3Wege/3Wege20210904.xml -o 3Wege
-        # writes lido to file C:/m3/zml2lido/sdata/3Wege/20210904.lido.xml
-
-	Flavors (aka jobs):
-    FvH wants links in the Internet instead of image files, but we still give 
-    images to the rst project. So we have different flavors or chains of steps 
-    for these purposes. See jobs.py for details
-    - smb: for FvH
-    - dd: for debug
-    - localLido: for rst
-    
-    In an old version it did also image processing, but that function is
-    currently not used/tested. 
+    Little script that converts native Zetcom xml to lido
+
+NEW
+This is a new version that does not require java subprocess anymore
+and uses saxon in c (saxonche) directory (installed with pip).
+
+This version also does no longer need to be executed from script
+directory like C:/m3/zml2lido anymore.
+
+lido Command Line Tool
+You need to specify three parameters
+    -j/--job: which flavor (job) of the transformation you want to use
+    -i/--src: where the src xml file is
+    -o/--output: will be used as output directory; in my case
+        C:/m3/zml2lido/sdata/{output}
+
+    cd C:/m3/zml2lido
+    lido -j smb -i c:/m3/MpApi/sdata/3Wege/3Wege20210904.xml -o 3Wege
+    # writes lido to file C:/m3/zml2lido/sdata/3Wege/20210904.lido.xml
+
+    Flavors (aka jobs):
+FvH wants links in the Internet instead of image files, but we still give
+images to the rst project. So we have different flavors or chains of steps
+for these purposes. See jobs.py for details
+- smb: for FvH
+- dd: for debug
+- localLido: for rst
+
+In an old version it did also image processing, but that function is
+currently not used/tested.
 """
 
 from lxml import etree
@@ -76,6 +76,7 @@ def __init__(
 
         self.src = self._sanitize(src=src)
         self.outdir = self._prepareOutdir()
+        print(f" outdir {self.outdir}")
         self._initLog()
 
     #
@@ -412,7 +413,6 @@ def _prepareOutdir(self) -> Path:
         if not outdir.exists():
             print(f"Making new dir {outdir}")
             outdir.mkdir(parents=True, exist_ok=False)
-        # print(f" outdir {outdir}")
         return outdir
 
     def _sanitize(self, *, src: str | Path) -> Path:

diff --git a/zml2lido/linkChecker.py b/zml2lido/linkChecker.py
@@ -1,20 +1,20 @@
 """
-    parse a LIDO file and work on linkResources that don't start with http 
-    for those guess the URL based on heuristics indicated by the examples path below
-    write result to lido file in same dir as src
-    src and output are lido
-    
-    This step produces lvl2 lido.
-
-    USAGE:
-    lc = LinkChecker(src="path/to/file.lido.xml")
-
-    lc.fixRelatedWorks()          # removes dead links in relatedWorks, also adds ISIL
-    lc.linkResource_online_http() # for all linkResources print online status
-    lc.rmInternalLinks()          # remove linkResource with internal links, not used atm
-    lc.rmUnpublishedRecords()     # removes objects without objectPublishedID
-
-    lc.save(out_fn="path/to/lido.lvl2.xml")
+parse a LIDO file and work on linkResources that don't start with http
+for those guess the URL based on heuristics indicated by the examples path below
+write result to lido file in same dir as src
+src and output are lido
+
+This step produces lvl2 lido.
+
+USAGE:
+lc = LinkChecker(src="path/to/file.lido.xml")
+
+lc.fixRelatedWorks()          # removes dead links in relatedWorks, also adds ISIL
+lc.linkResource_online_http() # for all linkResources print online status
+lc.rmInternalLinks()          # remove linkResource with internal links, not used atm
+lc.rmUnpublishedRecords()     # removes objects without objectPublishedID
+
+lc.save(out_fn="path/to/lido.lvl2.xml")
 
 """
 

diff --git a/zml2lido/qc.py b/zml2lido/qc.py
@@ -1,6 +1,6 @@
 """
 A command line tool that reads in LIDO files and executes checks. Eventually, we want to
-become modular, so that we can de/activate tests. Perhaps using a config file. That 
+become modular, so that we can de/activate tests. Perhaps using a config file. That
 would be a toml file.
 
 We already have a validation tool, so we don't need to start with validation

diff --git a/zml2lido/relWorksCache.py b/zml2lido/relWorksCache.py
@@ -4,14 +4,14 @@
     Most importantly, we want to know whether relWorks are online or not. So we query
     RIA and save the information (SMB-Freigabe) in a Module object and potentially the disk.
 
-    rw = relWorks(maxSize=20_000) # 
+    rw = relWorks(maxSize=20_000) #
     rw.load_cache_file(path=Path("cache.xml")) # load cache file or nothing
 
-    rw.lookup_relWork(mtype, ID)? # lookup a single item in RIA and add it to cache.  
+    rw.lookup_relWork(mtype, ID)? # lookup a single item in RIA and add it to cache.
     rw.lookup_from_lido_file(path=path) # grow cache by new items from a single file
     rw.lookup_from_lido_chunks(path=path) # grow cache by new items from a single file
 
-    rw.item_exists(mtype="Object", ID=1234) # true if item exists in cache 
+    rw.item_exists(mtype="Object", ID=1234) # true if item exists in cache
     rw.item_is_online(mtype="Object", ID=1234) # true if item in cache indicates it's online
 
     rw.save() # save in-memory cache to disk
@@ -23,6 +23,7 @@
 
 
 """
+
 from lxml import etree
 from mpapi.constants import get_credentials
 from mpapi.client import MpApi

diff --git a/zml2lido/subjectChecker.py b/zml2lido/subjectChecker.py
@@ -4,16 +4,16 @@
 
 We walk thru a set of chunk files in generic ZML
 For each chunk
-	lookup subjects and see if they are vocmap.xml
-	if not write that term into a Excel file
-	making a new file if no excel exists and appending an existing one
+        lookup subjects and see if they are vocmap.xml
+        if not write that term into a Excel file
+        making a new file if no excel exists and appending an existing one
 
 NOTES
 
 
 QUESTIONS
-1. 
-Looping thru chunk files is something I tend to need from multiple scrits. Where should that 
+1.
+Looping thru chunk files is something I tend to need from multiple scrits. Where should that
 code live?
 2.
 I have similar functionality in the xslt; now I re-implement that in the Python.

diff --git a/zml2lido/vocmap.py b/zml2lido/vocmap.py
@@ -6,6 +6,7 @@
 vocmap -i in.xml -o out.xslx
 
 """
+
 from lxml import etree
 from openpyxl import Workbook
 from openpyxl.styles import Font