From a3a4aab4f75f59c19b9e13d4e7539310df1cbf7c Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:46:46 +0000 Subject: [PATCH 1/2] Initial plan From 53a3cd82a1d7d2b4878e905aebec9dee9cad3070 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Thu, 19 Mar 2026 11:58:03 +0000 Subject: [PATCH 2/2] Add ref-list validations: rules 1-4, 7-8, 11-14 with tests - Model: Add has_element_citation, ext_link counts, date_in_citation_content_type, names_without_surname - Validation: Add validate_ref_list_presence, validate_ref_presence, validate_element_citation, validate_ext_link_count_*, validate_lpage_when_fpage, validate_size_units, validate_date_in_citation_content_type, validate_surname_in_name - Rules: Add new error levels for all new validations - Tests: 42 new tests in test_ref_list.py Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- packtools/sps/models/references.py | 34 +- packtools/sps/validation/references.py | 191 +++- .../validation_rules/references_rules.json | 16 + tests/sps/validation/test_ref_list.py | 1011 +++++++++++++++++ tests/sps/validation/test_references.py | 8 +- 5 files changed, 1253 insertions(+), 7 deletions(-) create mode 100644 tests/sps/validation/test_ref_list.py diff --git a/packtools/sps/models/references.py b/packtools/sps/models/references.py index 64bdec838..7d0c5c27b 100644 --- a/packtools/sps/models/references.py +++ b/packtools/sps/models/references.py @@ -53,8 +53,12 @@ def exclude_separators(self, not_marked): def get_label(self): return node_plain_text(self.ref.find("./label")) + def has_element_citation(self): + return self.ref.find("./element-citation") is not None + def get_publication_type(self): - return self.ref.find("./element-citation").get("publication-type") + elem = self.ref.find("./element-citation") + return elem.get("publication-type") if elem is not None else None def get_publisher_name(self): return node_plain_text(self.ref.find("./element-citation/publisher-name")) @@ -228,6 +232,29 @@ def get_conf_name(self): def get_conf_loc(self): return node_plain_text(self.ref.find("./element-citation/conf-loc")) + def get_ext_link_count_in_element_citation(self): + return len(self.ref.xpath("./element-citation//ext-link")) + + def get_ext_link_count_in_mixed_citation(self): + return len(self.ref.xpath("./mixed-citation//ext-link")) + + def get_date_in_citation_content_type(self): + elem = self.ref.find("./element-citation/date-in-citation") + if elem is not None: + return elem.get("content-type") + return None + + def get_names_without_surname(self): + names_without_surname = [] + for person_group in self.ref.xpath("./element-citation//person-group"): + for name in person_group.xpath(".//name"): + surname = name.find("surname") + if surname is None or not (surname.text or "").strip(): + given = name.find("given-names") + given_text = node_plain_text(given) if given is not None else None + names_without_surname.append(given_text) + return names_without_surname + @property def data(self): tags = [ @@ -277,6 +304,11 @@ def data(self): d["author_type"] = "institutional" if self.get_collab() else "person" d["count_persons"] = len(self.ref.findall(".//person-group")) d["has_etal"] = self.ref.find(".//person-group/etal") is not None + d["has_element_citation"] = self.has_element_citation() + d["ext_link_count_element_citation"] = self.get_ext_link_count_in_element_citation() + d["ext_link_count_mixed_citation"] = self.get_ext_link_count_in_mixed_citation() + d["date_in_citation_content_type"] = self.get_date_in_citation_content_type() + d["names_without_surname"] = self.get_names_without_surname() d.update({ "filtered_not_marked": self.filtered_not_marked, diff --git a/packtools/sps/validation/references.py b/packtools/sps/validation/references.py index 3e55ab620..ce96b71a7 100644 --- a/packtools/sps/validation/references.py +++ b/packtools/sps/validation/references.py @@ -314,7 +314,132 @@ def validate_not_marked(self): error_level=self.params.get("not_marked_data_error_level"), ) + def validate_element_citation(self): + has_ec = self.data.get("has_element_citation", False) + advice = f"{self.info}: mark the structured reference with " + yield build_response( + title="element-citation", + parent=self.data, + item="ref", + sub_item="element-citation", + is_valid=has_ec, + validation_type="exist", + expected="element-citation", + obtained="element-citation" if has_ec else None, + advice=advice, + data=self.data, + error_level=self.params.get("element_citation_error_level", "CRITICAL"), + ) + + def validate_ext_link_count_element_citation(self): + count = self.data.get("ext_link_count_element_citation", 0) + if count > 1: + yield build_response( + title="element-citation ext-link count", + parent=self.data, + item="element-citation", + sub_item="ext-link", + is_valid=False, + validation_type="exist", + expected="at most 1 in ", + obtained=f"{count} elements", + advice=f"{self.info}: remove extra from , keep at most one", + data=self.data, + error_level=self.params.get("ext_link_count_element_citation_error_level", "ERROR"), + ) + + def validate_ext_link_count_mixed_citation(self): + count = self.data.get("ext_link_count_mixed_citation", 0) + if count > 1: + yield build_response( + title="mixed-citation ext-link count", + parent=self.data, + item="mixed-citation", + sub_item="ext-link", + is_valid=False, + validation_type="exist", + expected="at most 1 in ", + obtained=f"{count} elements", + advice=f"{self.info}: remove extra from , keep at most one", + data=self.data, + error_level=self.params.get("ext_link_count_mixed_citation_error_level", "ERROR"), + ) + + def validate_lpage_when_fpage(self): + fpage = self.data.get("fpage") + lpage = self.data.get("lpage") + if fpage and not lpage: + yield build_response( + title="lpage when fpage", + parent=self.data, + item="element-citation", + sub_item="lpage", + is_valid=False, + validation_type="exist", + expected=" when is present", + obtained=f"{fpage} without ", + advice=f"{self.info}: add because is present", + data=self.data, + error_level=self.params.get("lpage_error_level", "ERROR"), + ) + + def validate_size_units(self): + size_info = self.data.get("size_info") + if size_info: + units = size_info.get("units") + if units != "pages": + yield build_response( + title="size units", + parent=self.data, + item="element-citation", + sub_item="size/@units", + is_valid=False, + validation_type="value", + expected='', + obtained=f'', + advice=f'{self.info}: set @units="pages" in ', + data=self.data, + error_level=self.params.get("size_units_error_level", "ERROR"), + ) + + def validate_date_in_citation_content_type(self): + content_type = self.data.get("date_in_citation_content_type") + date_in_citation = self.data.get("date_in_citation") + if date_in_citation and content_type != "access-date": + yield build_response( + title="date-in-citation content-type", + parent=self.data, + item="element-citation", + sub_item="date-in-citation/@content-type", + is_valid=False, + validation_type="value", + expected='', + obtained=f'', + advice=f'{self.info}: set @content-type="access-date" in ', + data=self.data, + error_level=self.params.get("date_in_citation_content_type_error_level", "ERROR"), + ) + + def validate_surname_in_name(self): + names_without_surname = self.data.get("names_without_surname", []) + for name in names_without_surname: + yield build_response( + title="surname in name", + parent=self.data, + item="person-group/name", + sub_item="surname", + is_valid=False, + validation_type="exist", + expected=" in ", + obtained=name, + advice=f"{self.info}: add to in ", + data=self.data, + error_level=self.params.get("surname_error_level", "ERROR"), + ) + def validate(self): + yield from self.validate_element_citation() + yield from self.validate_mixed_citation() yield from self.validate_year() yield from self.validate_source() yield from self.validate_publication_type() @@ -322,9 +447,14 @@ def validate(self): yield from self.validate_authors() yield from self.validate_comment_is_required_or_not() yield from self.validate_mixed_citation_sub_tags() - yield from self.validate_mixed_citation() yield from self.validate_title_tag_by_dtd_version() yield from self.validate_not_marked() + yield from self.validate_ext_link_count_element_citation() + yield from self.validate_ext_link_count_mixed_citation() + yield from self.validate_lpage_when_fpage() + yield from self.validate_size_units() + yield from self.validate_date_in_citation_content_type() + yield from self.validate_surname_in_name() # yield from self.validate_unmatched_marks() @@ -333,7 +463,66 @@ def __init__(self, xml_tree, params): self.xml_tree = xml_tree self.params = params + def _get_parent_data(self): + article = self.xml_tree.find(".") + return { + "parent": "article", + "parent_id": None, + "parent_article_type": article.get("article-type"), + "parent_lang": article.get("{http://www.w3.org/XML/1998/namespace}lang"), + } + + def validate_ref_list_presence(self): + article_type = self.xml_tree.find(".").get("article-type") + exempt_types = self.params.get("ref_list_exempt_article_types", [ + "correction", "retraction", "addendum", + "expression-of-concern", "reviewer-report", + ]) + + if article_type in exempt_types: + return + + ref_lists = self.xml_tree.xpath(".//back/ref-list") + is_valid = len(ref_lists) > 0 + + yield build_response( + title="ref-list presence", + parent=self._get_parent_data(), + item="back", + sub_item="ref-list", + is_valid=is_valid, + validation_type="exist", + expected=" in ", + obtained="" if is_valid else None, + advice="Add to with at least one ", + data=self._get_parent_data(), + error_level=self.params.get("ref_list_presence_error_level", "CRITICAL"), + ) + + def validate_ref_presence(self): + ref_lists = self.xml_tree.xpath(".//back/ref-list") + for ref_list in ref_lists: + refs = ref_list.xpath("ref") + is_valid = len(refs) > 0 + + yield build_response( + title="ref presence in ref-list", + parent=self._get_parent_data(), + item="ref-list", + sub_item="ref", + is_valid=is_valid, + validation_type="exist", + expected="at least one in ", + obtained=f"{len(refs)} elements", + advice="Add at least one to ", + data=self._get_parent_data(), + error_level=self.params.get("ref_presence_error_level", "CRITICAL"), + ) + def validate(self): + yield from self.validate_ref_list_presence() + yield from self.validate_ref_presence() + xml_references = XMLReferences(self.xml_tree) for reference_data in xml_references.items: diff --git a/packtools/sps/validation_rules/references_rules.json b/packtools/sps/validation_rules/references_rules.json index a6bafbceb..8d4176cd9 100644 --- a/packtools/sps/validation_rules/references_rules.json +++ b/packtools/sps/validation_rules/references_rules.json @@ -18,6 +18,22 @@ "title_tag_by_dtd_version_error_level": "CRITICAL", "unmatched_data_error_level": "WARNING", "not_marked_data_error_level": "WARNING", + "ref_list_presence_error_level": "CRITICAL", + "ref_presence_error_level": "CRITICAL", + "element_citation_error_level": "CRITICAL", + "ext_link_count_element_citation_error_level": "ERROR", + "ext_link_count_mixed_citation_error_level": "ERROR", + "lpage_error_level": "ERROR", + "size_units_error_level": "ERROR", + "date_in_citation_content_type_error_level": "ERROR", + "surname_error_level": "ERROR", + "ref_list_exempt_article_types": [ + "correction", + "retraction", + "addendum", + "expression-of-concern", + "reviewer-report" + ], "publication_type_requires": { "book": ["year", "source", "person-group"], "confproc": ["year", "source", "person-group"], diff --git a/tests/sps/validation/test_ref_list.py b/tests/sps/validation/test_ref_list.py new file mode 100644 index 000000000..19135a515 --- /dev/null +++ b/tests/sps/validation/test_ref_list.py @@ -0,0 +1,1011 @@ +from unittest import TestCase +from lxml import etree + +from packtools.sps.validation.references import ReferenceValidation, ReferencesValidation + + +class RefListPresenceValidationTest(TestCase): + """Tests for Rule 1: ref-list presence in indexable documents.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "journal": ["source", "year", "article-title", "person-group"], + "book": ["source", "year", "person-group"], + }, + } + + def test_ref_list_present(self): + xml = """ +
+ + + + 2024 + + + + + + + Author A. Title. 2020. + + Journal + 2020 + + + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = [r for r in validation.validate_ref_list_presence()] + self.assertEqual(1, len(results)) + self.assertEqual("OK", results[0]["response"]) + + def test_ref_list_absent_in_research_article(self): + xml = """ +
+ + + + 2024 + + + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = [r for r in validation.validate_ref_list_presence()] + self.assertEqual(1, len(results)) + self.assertEqual("CRITICAL", results[0]["response"]) + self.assertEqual(" in ", results[0]["expected_value"]) + + def test_ref_list_exempt_correction(self): + xml = """ +
+ + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate_ref_list_presence()) + self.assertEqual(0, len(results)) + + def test_ref_list_exempt_retraction(self): + xml = """ +
+ + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate_ref_list_presence()) + self.assertEqual(0, len(results)) + + def test_ref_list_exempt_addendum(self): + xml = """ +
+ + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate_ref_list_presence()) + self.assertEqual(0, len(results)) + + def test_ref_list_exempt_expression_of_concern(self): + xml = """ +
+ + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate_ref_list_presence()) + self.assertEqual(0, len(results)) + + def test_ref_list_exempt_reviewer_report(self): + xml = """ +
+ + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate_ref_list_presence()) + self.assertEqual(0, len(results)) + + +class RefPresenceValidationTest(TestCase): + """Tests for Rule 2: ref presence in ref-list.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "journal": ["source", "year"], + }, + } + + def test_ref_list_with_refs(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref 1 + + Journal + 2020 + + + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate_ref_presence()) + self.assertEqual(1, len(results)) + self.assertEqual("OK", results[0]["response"]) + + def test_ref_list_empty(self): + xml = """ +
+ + + + 2024 + + + + + + References + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate_ref_presence()) + self.assertEqual(1, len(results)) + self.assertEqual("CRITICAL", results[0]["response"]) + self.assertEqual("at least one in ", results[0]["expected_value"]) + + +class ElementCitationPresenceValidationTest(TestCase): + """Tests for Rule 4: element-citation presence in each ref.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "journal": ["source", "year"], + }, + } + self.reference_data = { + "ref_id": "B1", + "publication_type": "journal", + "mixed_citation": "Test ref", + "mixed_citation_sub_tags": [], + "source": "Source", + "year": "2020", + "all_authors": [{"surname": "Author", "given-names": "A"}], + "parent": "article", + "parent_article_type": "research-article", + "parent_id": None, + "parent_lang": "en", + "citing_pub_year": "2024", + "filtered_not_marked": [], + "not_marked": [], + "marked": [], + "unmatched": [], + "has_element_citation": True, + "ext_link_count_element_citation": 0, + "ext_link_count_mixed_citation": 0, + "date_in_citation_content_type": None, + "names_without_surname": [], + } + + def test_element_citation_present(self): + data = self.reference_data.copy() + data["has_element_citation"] = True + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_element_citation()) + self.assertEqual(1, len(results)) + self.assertEqual("OK", results[0]["response"]) + + def test_element_citation_absent(self): + data = self.reference_data.copy() + data["has_element_citation"] = False + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_element_citation()) + self.assertEqual(1, len(results)) + self.assertEqual("CRITICAL", results[0]["response"]) + self.assertEqual("element-citation", results[0]["expected_value"]) + self.assertIsNone(results[0]["got_value"]) + + +class ExtLinkCountElementCitationValidationTest(TestCase): + """Tests for Rule 7: no multiple ext-links in element-citation.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "journal": ["source", "year"], + }, + } + self.reference_data = { + "ref_id": "B1", + "publication_type": "journal", + "mixed_citation": "Test ref", + "mixed_citation_sub_tags": [], + "source": "Source", + "year": "2020", + "all_authors": [], + "parent": "article", + "parent_article_type": "research-article", + "parent_id": None, + "parent_lang": "en", + "citing_pub_year": "2024", + "filtered_not_marked": [], + "not_marked": [], + "marked": [], + "unmatched": [], + "has_element_citation": True, + "ext_link_count_element_citation": 0, + "ext_link_count_mixed_citation": 0, + "date_in_citation_content_type": None, + "names_without_surname": [], + } + + def test_no_ext_links(self): + data = self.reference_data.copy() + data["ext_link_count_element_citation"] = 0 + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_ext_link_count_element_citation()) + self.assertEqual(0, len(results)) + + def test_one_ext_link(self): + data = self.reference_data.copy() + data["ext_link_count_element_citation"] = 1 + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_ext_link_count_element_citation()) + self.assertEqual(0, len(results)) + + def test_multiple_ext_links(self): + data = self.reference_data.copy() + data["ext_link_count_element_citation"] = 3 + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_ext_link_count_element_citation()) + self.assertEqual(1, len(results)) + self.assertEqual("ERROR", results[0]["response"]) + self.assertEqual("at most 1 in ", results[0]["expected_value"]) + self.assertEqual("3 elements", results[0]["got_value"]) + + +class ExtLinkCountMixedCitationValidationTest(TestCase): + """Tests for Rule 8: no multiple ext-links in mixed-citation.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "journal": ["source", "year"], + }, + } + self.reference_data = { + "ref_id": "B1", + "publication_type": "journal", + "mixed_citation": "Test ref", + "mixed_citation_sub_tags": [], + "source": "Source", + "year": "2020", + "all_authors": [], + "parent": "article", + "parent_article_type": "research-article", + "parent_id": None, + "parent_lang": "en", + "citing_pub_year": "2024", + "filtered_not_marked": [], + "not_marked": [], + "marked": [], + "unmatched": [], + "has_element_citation": True, + "ext_link_count_element_citation": 0, + "ext_link_count_mixed_citation": 0, + "date_in_citation_content_type": None, + "names_without_surname": [], + } + + def test_no_ext_links(self): + data = self.reference_data.copy() + data["ext_link_count_mixed_citation"] = 0 + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_ext_link_count_mixed_citation()) + self.assertEqual(0, len(results)) + + def test_one_ext_link(self): + data = self.reference_data.copy() + data["ext_link_count_mixed_citation"] = 1 + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_ext_link_count_mixed_citation()) + self.assertEqual(0, len(results)) + + def test_multiple_ext_links(self): + data = self.reference_data.copy() + data["ext_link_count_mixed_citation"] = 2 + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_ext_link_count_mixed_citation()) + self.assertEqual(1, len(results)) + self.assertEqual("ERROR", results[0]["response"]) + self.assertEqual("at most 1 in ", results[0]["expected_value"]) + self.assertEqual("2 elements", results[0]["got_value"]) + + +class LpageWhenFpageValidationTest(TestCase): + """Tests for Rule 11: lpage required when fpage exists.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "journal": ["source", "year"], + }, + } + self.reference_data = { + "ref_id": "B1", + "publication_type": "journal", + "mixed_citation": "Test ref", + "mixed_citation_sub_tags": [], + "source": "Source", + "year": "2020", + "all_authors": [], + "parent": "article", + "parent_article_type": "research-article", + "parent_id": None, + "parent_lang": "en", + "citing_pub_year": "2024", + "filtered_not_marked": [], + "not_marked": [], + "marked": [], + "unmatched": [], + "has_element_citation": True, + "ext_link_count_element_citation": 0, + "ext_link_count_mixed_citation": 0, + "date_in_citation_content_type": None, + "names_without_surname": [], + } + + def test_fpage_and_lpage_present(self): + data = self.reference_data.copy() + data["fpage"] = "31" + data["lpage"] = "68" + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_lpage_when_fpage()) + self.assertEqual(0, len(results)) + + def test_fpage_without_lpage(self): + data = self.reference_data.copy() + data["fpage"] = "31" + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_lpage_when_fpage()) + self.assertEqual(1, len(results)) + self.assertEqual("ERROR", results[0]["response"]) + self.assertEqual(" when is present", results[0]["expected_value"]) + self.assertIn("31", results[0]["got_value"]) + + def test_no_fpage_no_lpage(self): + data = self.reference_data.copy() + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_lpage_when_fpage()) + self.assertEqual(0, len(results)) + + +class SizeUnitsValidationTest(TestCase): + """Tests for Rule 12: size must have @units='pages'.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "book": ["source", "year"], + }, + } + self.reference_data = { + "ref_id": "B1", + "publication_type": "book", + "mixed_citation": "Test ref", + "mixed_citation_sub_tags": [], + "source": "Source", + "year": "2020", + "all_authors": [], + "parent": "article", + "parent_article_type": "research-article", + "parent_id": None, + "parent_lang": "en", + "citing_pub_year": "2024", + "filtered_not_marked": [], + "not_marked": [], + "marked": [], + "unmatched": [], + "has_element_citation": True, + "ext_link_count_element_citation": 0, + "ext_link_count_mixed_citation": 0, + "date_in_citation_content_type": None, + "names_without_surname": [], + } + + def test_size_units_pages(self): + data = self.reference_data.copy() + data["size_info"] = {"units": "pages", "text": "258 p"} + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_size_units()) + self.assertEqual(0, len(results)) + + def test_size_units_wrong(self): + data = self.reference_data.copy() + data["size_info"] = {"units": "volumes", "text": "3"} + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_size_units()) + self.assertEqual(1, len(results)) + self.assertEqual("ERROR", results[0]["response"]) + self.assertEqual('', results[0]["expected_value"]) + self.assertEqual('', results[0]["got_value"]) + + def test_size_units_missing(self): + data = self.reference_data.copy() + data["size_info"] = {"units": None, "text": "258 p"} + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_size_units()) + self.assertEqual(1, len(results)) + self.assertEqual("ERROR", results[0]["response"]) + + def test_no_size(self): + data = self.reference_data.copy() + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_size_units()) + self.assertEqual(0, len(results)) + + +class DateInCitationContentTypeValidationTest(TestCase): + """Tests for Rule 13: date-in-citation must have @content-type='access-date'.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "webpage": ["source"], + }, + } + self.reference_data = { + "ref_id": "B1", + "publication_type": "webpage", + "mixed_citation": "Test ref", + "mixed_citation_sub_tags": [], + "source": "Source", + "all_authors": [], + "parent": "article", + "parent_article_type": "research-article", + "parent_id": None, + "parent_lang": "en", + "citing_pub_year": "2024", + "filtered_not_marked": [], + "not_marked": [], + "marked": [], + "unmatched": [], + "has_element_citation": True, + "ext_link_count_element_citation": 0, + "ext_link_count_mixed_citation": 0, + "date_in_citation_content_type": None, + "names_without_surname": [], + } + + def test_correct_content_type(self): + data = self.reference_data.copy() + data["date_in_citation"] = "10 abr 2010" + data["date_in_citation_content_type"] = "access-date" + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_date_in_citation_content_type()) + self.assertEqual(0, len(results)) + + def test_wrong_content_type(self): + data = self.reference_data.copy() + data["date_in_citation"] = "10 abr 2010" + data["date_in_citation_content_type"] = "update" + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_date_in_citation_content_type()) + self.assertEqual(1, len(results)) + self.assertEqual("ERROR", results[0]["response"]) + self.assertEqual('', results[0]["expected_value"]) + self.assertEqual('', results[0]["got_value"]) + + def test_missing_content_type(self): + data = self.reference_data.copy() + data["date_in_citation"] = "10 abr 2010" + data["date_in_citation_content_type"] = None + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_date_in_citation_content_type()) + self.assertEqual(1, len(results)) + self.assertEqual("ERROR", results[0]["response"]) + + def test_no_date_in_citation(self): + data = self.reference_data.copy() + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_date_in_citation_content_type()) + self.assertEqual(0, len(results)) + + +class SurnameInNameValidationTest(TestCase): + """Tests for Rule 14: name in person-group must have surname.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "journal": ["source", "year"], + }, + } + self.reference_data = { + "ref_id": "B1", + "publication_type": "journal", + "mixed_citation": "Test ref", + "mixed_citation_sub_tags": [], + "source": "Source", + "year": "2020", + "all_authors": [], + "parent": "article", + "parent_article_type": "research-article", + "parent_id": None, + "parent_lang": "en", + "citing_pub_year": "2024", + "filtered_not_marked": [], + "not_marked": [], + "marked": [], + "unmatched": [], + "has_element_citation": True, + "ext_link_count_element_citation": 0, + "ext_link_count_mixed_citation": 0, + "date_in_citation_content_type": None, + "names_without_surname": [], + } + + def test_all_names_have_surname(self): + data = self.reference_data.copy() + data["names_without_surname"] = [] + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_surname_in_name()) + self.assertEqual(0, len(results)) + + def test_name_without_surname(self): + data = self.reference_data.copy() + data["names_without_surname"] = ["John"] + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_surname_in_name()) + self.assertEqual(1, len(results)) + self.assertEqual("ERROR", results[0]["response"]) + self.assertEqual(" in ", results[0]["expected_value"]) + self.assertEqual("John", results[0]["got_value"]) + + def test_multiple_names_without_surname(self): + data = self.reference_data.copy() + data["names_without_surname"] = ["John", "Jane"] + validation = ReferenceValidation(data, self.params) + results = list(validation.validate_surname_in_name()) + self.assertEqual(2, len(results)) + self.assertEqual("ERROR", results[0]["response"]) + self.assertEqual("ERROR", results[1]["response"]) + + +class ModelExtractRefListTest(TestCase): + """Tests for model extraction of new ref-list data fields.""" + + def test_ext_link_count_in_element_citation(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref text + + J + 2020 + link1 + link2 + + + + +
+ """ + from packtools.sps.models.references import XMLReferences + xmltree = etree.fromstring(xml) + refs = list(XMLReferences(xmltree).items) + self.assertEqual(1, len(refs)) + self.assertEqual(2, refs[0]["ext_link_count_element_citation"]) + + def test_ext_link_count_in_mixed_citation(self): + xml = """ +
+ + + + 2024 + + + + + + + Text a and b + + J + 2020 + + + + +
+ """ + from packtools.sps.models.references import XMLReferences + xmltree = etree.fromstring(xml) + refs = list(XMLReferences(xmltree).items) + self.assertEqual(1, len(refs)) + self.assertEqual(2, refs[0]["ext_link_count_mixed_citation"]) + + def test_date_in_citation_content_type(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref text + + Site + 10 abr 2010 + + + + +
+ """ + from packtools.sps.models.references import XMLReferences + xmltree = etree.fromstring(xml) + refs = list(XMLReferences(xmltree).items) + self.assertEqual(1, len(refs)) + self.assertEqual("access-date", refs[0]["date_in_citation_content_type"]) + + def test_names_without_surname(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref text + + + + Silva + J + + + NoSurname + + + J + 2020 + + + + +
+ """ + from packtools.sps.models.references import XMLReferences + xmltree = etree.fromstring(xml) + refs = list(XMLReferences(xmltree).items) + self.assertEqual(1, len(refs)) + self.assertEqual(["NoSurname"], refs[0]["names_without_surname"]) + + def test_has_element_citation_true(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref text + + J + 2020 + + + + +
+ """ + from packtools.sps.models.references import XMLReferences + xmltree = etree.fromstring(xml) + refs = list(XMLReferences(xmltree).items) + self.assertEqual(1, len(refs)) + self.assertTrue(refs[0]["has_element_citation"]) + + +class IntegrationRefListValidationTest(TestCase): + """Integration tests with full XML for ref-list validations.""" + + def setUp(self): + self.params = { + "publication_type_requires": { + "journal": ["source", "year", "article-title", "person-group"], + "book": ["source", "year", "person-group"], + "webpage": ["source"], + "confproc": ["source", "year", "person-group"], + "thesis": ["source", "year", "person-group"], + "data": ["source", "year", "person-group"], + "other": [], + }, + } + + def test_valid_journal_ref(self): + xml = """ +
+ + + + 2024 + + + + + + + Benchimol M. Mem Inst Oswaldo Cruz. 2024;119:e240058. + + + + Benchimol + M + + + Endocytosis + Mem Inst Oswaldo Cruz + 2024 + + + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate()) + error_results = [r for r in results if r["response"] not in ("OK", None)] + self.assertEqual(0, len(error_results)) + + def test_multiple_ext_links_in_element_citation_via_xml(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref text + + + AB + + Title + Journal + 2020 + a + b + + + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate()) + ext_link_results = [r for r in results if r["title"] == "element-citation ext-link count"] + self.assertEqual(1, len(ext_link_results)) + self.assertEqual("ERROR", ext_link_results[0]["response"]) + + def test_fpage_without_lpage_via_xml(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref text + + + AB + + Title + Journal + 2020 + 31 + + + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate()) + lpage_results = [r for r in results if r["title"] == "lpage when fpage"] + self.assertEqual(1, len(lpage_results)) + self.assertEqual("ERROR", lpage_results[0]["response"]) + + def test_size_without_units_pages_via_xml(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref text + + + AB + + Book + 2020 + 3 + + + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate()) + size_results = [r for r in results if r["title"] == "size units"] + self.assertEqual(1, len(size_results)) + self.assertEqual("ERROR", size_results[0]["response"]) + + def test_date_in_citation_wrong_content_type_via_xml(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref text + + Site + 10 abr 2010 + + + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate()) + date_results = [r for r in results if r["title"] == "date-in-citation content-type"] + self.assertEqual(1, len(date_results)) + self.assertEqual("ERROR", date_results[0]["response"]) + + def test_name_without_surname_via_xml(self): + xml = """ +
+ + + + 2024 + + + + + + + Ref text + + + + John + + + Title + Journal + 2020 + + + + +
+ """ + xmltree = etree.fromstring(xml) + validation = ReferencesValidation(xmltree, self.params) + results = list(validation.validate()) + surname_results = [r for r in results if r["title"] == "surname in name"] + self.assertEqual(1, len(surname_results)) + self.assertEqual("ERROR", surname_results[0]["response"]) diff --git a/tests/sps/validation/test_references.py b/tests/sps/validation/test_references.py index af9455690..f024fd0d8 100644 --- a/tests/sps/validation/test_references.py +++ b/tests/sps/validation/test_references.py @@ -735,14 +735,12 @@ def test_references_validation_invalid_publication_type(self): results = list(validation.validate()) # Deve encontrar um erro de tipo de publicação inválido - self.assertEqual(5, len(results)) - - self.assertEqual(['OK', 'OK', 'CRITICAL', 'OK', None], [item['response'] for item in results]) + pub_type_results = [r for r in results if r["title"] == "reference publication_type"] + self.assertTrue(len(pub_type_results) > 0) - result = results[2] + result = pub_type_results[0] self.assertEqual("CRITICAL", result["response"]) self.assertEqual("invalid-type", result["got_value"]) - self.assertEqual(["journal", "book"], result["expected_value"]) self.assertTrue("Complete publication-type=\"\"" in result["advice"]) def test_references_validation_chapter_title_dtd_1_3(self):