Skip to content

Commit 9766f93

Browse files
committed
GdsCollector: proof-of-concept to catch validation errors including filename
1 parent 7687cd6 commit 9766f93

File tree

5 files changed

+27
-10
lines changed

5 files changed

+27
-10
lines changed

ocrd_models/ocrd_models/generatedscollector.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
class OcrdGdsCollector(object):
22

3-
def __init__(self, messages=None):
4-
print('GdsCollector.__init__', self)
3+
def __init__(self, filename=None, messages=None):
4+
self.filename = filename
55
if messages is None:
66
self.messages = []
77
else:

ocrd_models/ocrd_models/ocrd_page_generateds.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
# -*- coding: utf-8 -*-
33

44
#
5-
# Generated Thu Jul 30 13:44:35 2020 by generateDS.py version 2.35.24.
5+
# Generated Mon Aug 24 15:23:10 2020 by generateDS.py version 2.35.26.
66
# Python 3.6.9 (default, Jul 17 2020, 12:50:27) [GCC 8.4.0]
77
#
88
# Command line options:
@@ -14950,7 +14950,9 @@ def main():
1495014950
]
1495114951
def parse(inFileName, silence=False, print_warnings=True):
1495214952
global CapturedNsmap_
14953-
gds_collector = GdsCollector_()
14953+
if not filename:
14954+
filename=inFilename
14955+
gds_collector = GdsCollector_(filename=filenmae)
1495414956
parser = None
1495514957
doc = parsexml_(inFileName, parser)
1495614958
rootNode = doc.getroot()
@@ -14979,7 +14981,7 @@ def parse(inFileName, silence=False, print_warnings=True):
1497914981
sys.stderr.write(separator)
1498014982
return rootObj
1498114983

14982-
def parseString(inString, silence=False, print_warnings=True):
14984+
def parseString(inString, filename=None, silence=False, print_warnings=True):
1498314985
'''Parse a string, create the object tree, and export it.
1498414986

1498514987
Arguments:
@@ -14990,7 +14992,7 @@ def parseString(inString, silence=False, print_warnings=True):
1499014992
'''
1499114993
parser = None
1499214994
rootNode= parsexmlstring_(inString, parser)
14993-
gds_collector = GdsCollector_()
14995+
gds_collector = GdsCollector_(filename=filename)
1499414996
rootTag, rootClass = get_root_tag(rootNode)
1499514997
if rootClass is None:
1499614998
rootTag = 'PcGts'

ocrd_models/ocrd_page_parse.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
def parse(inFileName, silence=False, print_warnings=True):
22
global CapturedNsmap_
3-
gds_collector = GdsCollector_()
3+
if not filename:
4+
filename=inFilename
5+
gds_collector = GdsCollector_(filename=filenmae)
46
parser = None
57
doc = parsexml_(inFileName, parser)
68
rootNode = doc.getroot()
@@ -29,7 +31,7 @@ def parse(inFileName, silence=False, print_warnings=True):
2931
sys.stderr.write(separator)
3032
return rootObj
3133

32-
def parseString(inString, silence=False, print_warnings=True):
34+
def parseString(inString, filename=None, silence=False, print_warnings=True):
3335
'''Parse a string, create the object tree, and export it.
3436
3537
Arguments:
@@ -40,7 +42,7 @@ def parseString(inString, silence=False, print_warnings=True):
4042
'''
4143
parser = None
4244
rootNode= parsexmlstring_(inString, parser)
43-
gds_collector = GdsCollector_()
45+
gds_collector = GdsCollector_(filename=filename)
4446
rootTag, rootClass = get_root_tag(rootNode)
4547
if rootClass is None:
4648
rootTag = 'PcGts'

requirements_test.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,5 @@
11
pytest >= 4.0.0
2-
generateDS == 2.35.24
2+
generateDS == 2.35.26
33
coverage >= 4.5.2
44
sphinx
55
codecov >= 2.0.15

tests/model/test_ocrd_page.py

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -297,5 +297,18 @@ def test_gdscollector_override(self):
297297
gdc = pcgts.gds_collector_
298298
self.assertTrue(isinstance(gdc, OcrdGdsCollector))
299299

300+
def test_gdscollector_info(self):
301+
filename = assets.path_to('gutachten/data/TEMP1/PAGE_TEMP1.xml')
302+
with open(filename, 'r') as f:
303+
s = f.read()
304+
s = s.replace('pc:Page', 'pc:Foo')
305+
s = s.encode('utf-8')
306+
pcgts = parseString(s, silence=True, filename=filename)
307+
gdsc = pcgts.gds_collector_
308+
self.assertEqual(gdsc.messages, [])
309+
self.assertEqual(gdsc.filename, filename)
310+
pcgts.validate_(gdsc, True)
311+
self.assertEqual(gdsc.messages, ['Number of values for Page near line 2 is below the minimum allowed, expected at least 1, found 0'])
312+
300313
if __name__ == '__main__':
301314
main(__file__)

0 commit comments

Comments
 (0)