Skip to content

Commit bbea046

Browse files
committed
Solve Issue #142, test result depended on version of pdfinfo.
Changes the test to check whether the metadata extracted is a superset of what we expect to have.
1 parent c8e3ba0 commit bbea046

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

tests/test_worker_extractor.py

+10-1
Original file line numberDiff line numberDiff line change
@@ -60,7 +60,16 @@ def test_extraction_from_pdf_file(self):
6060
'PDF version': '1.4',
6161
}
6262
self.assertEqual(expected, result['text'])
63-
self.assertEqual(metadata, metadata_expected)
63+
# Check that the expected metadata is a subset of what
64+
# our Extractor found (it may have found more details
65+
# depending on the toolset used to extract metadata)
66+
metadata_expected_set = set(metadata_expected.iteritems())
67+
metadata_set = set(metadata.iteritems())
68+
diff_set = metadata_expected_set - metadata_set
69+
self.assertTrue(metadata_expected_set.issubset(metadata_set),
70+
("Extracted metadata is not a subset of the expected metadata. "
71+
"Items missing or with different values: {}").format(
72+
u", ".join(unicode(item) for item in diff_set)))
6473

6574
def test_extraction_from_html(self):
6675
contents = dedent('''

0 commit comments

Comments
 (0)