Skip to content
82 changes: 82 additions & 0 deletions tests/test_document.py
Original file line number Diff line number Diff line change
Expand Up @@ -2743,6 +2743,88 @@ def test_invalid_document_type(self):
article.data['article']['v71'] = [{u'_': u'invalid'}]
self.assertEqual(article.document_type, u'undefined')

def test_document_type_from_article_type_attribute(self):
Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test name mentions an "article_type attribute", but the test is setting the legacy field v71 directly. Renaming it to reflect what it actually validates (e.g., that document_type accepts JATS @article-type values stored in v71) would avoid confusion for future maintainers.

Suggested change
def test_document_type_from_article_type_attribute(self):
def test_document_type_accepts_jats_article_type_values_from_v71(self):

Copilot uses AI. Check for mistakes.
article = self.article

for article_type in [
'abstract', 'addendum', 'announcement', 'article-commentary',
'book-review', 'books-received', 'brief-report', 'calendar',
'case-report', 'clinical-instruction', 'clinical-trial',
'collection', 'correction', 'data-article', 'discussion',
'dissertation', 'editorial', 'editorial-material',
'expression-of-concern', 'guideline', 'in-brief', 'interview',
'introduction', 'letter', 'meeting-report', 'news', 'obituary',
'oration', 'other', 'partial-retraction', 'product-review',
'rapid-communication', 'referee-report', 'reply', 'reprint',
'research-article', 'retraction', 'review-article',
'reviewer-report', 'technical-report', 'translation',
]:
article.data['article']['v71'] = [{u'_': article_type}]
self.assertEqual(article.document_type, article_type)

Copy link

Copilot AI Mar 10, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test only covers the new identity mappings (e.g. v71='retraction' -> 'retraction') but it doesn’t assert the corrected legacy-code mappings introduced in this PR (in/mt/sc/up). Add assertions for v71 values 'in', 'mt', 'sc', and 'up' mapping to 'other', 'review-article', 'brief-report', and 'rapid-communication' respectively, so the behavior change is protected by unit tests.

Suggested change
def test_document_type_from_legacy_v71_values(self):
article = self.article
legacy_mappings = {
u'in': u'other',
u'mt': u'review-article',
u'sc': u'brief-report',
u'up': u'rapid-communication',
}
for legacy_value, expected_type in legacy_mappings.items():
article.data['article']['v71'] = [{u'_': legacy_value}]
self.assertEqual(article.document_type, expected_type)

Copilot uses AI. Check for mistakes.
def test_document_type_from_legacy_v71_values(self):
article = self.article

legacy_mappings = {
u'an': u'announcement',
u'in': u'interview',
u'pr': u'in-brief',
u'sc': u'rapid-communication',
u're': u'retraction',
}

for legacy_value, expected_type in legacy_mappings.items():
article.data['article']['v71'] = [{u'_': legacy_value}]
self.assertEqual(article.document_type, expected_type)

def test_sps_doctype(self):
article = self.article
self.assertEqual(article.sps_doctype, u'research-article')

def test_sps_doctype_from_legacy_code(self):
article = self.article
article.data['article']['v71'] = [{u'_': u'er'}]
self.assertEqual(article.sps_doctype, u'correction')

def test_sps_doctype_from_article_type(self):
article = self.article
article.data['article']['v71'] = [{u'_': u'retraction'}]
self.assertEqual(article.sps_doctype, u'retraction')

def test_sps_doctype_without_v71(self):
article = self.article
del(article.data['article']['v71'])
self.assertIsNone(article.sps_doctype)

def test_sps_doctype_invalid(self):
article = self.article
article.data['article']['v71'] = [{u'_': u'invalid'}]
self.assertIsNone(article.sps_doctype)

def test_legacy_doctype(self):
article = self.article
self.assertEqual(article.legacy_doctype, u'oa')

def test_legacy_doctype_from_article_type(self):
article = self.article
article.data['article']['v71'] = [{u'_': u'retraction'}]
self.assertEqual(article.legacy_doctype, u're')

def test_legacy_doctype_from_legacy_code(self):
article = self.article
article.data['article']['v71'] = [{u'_': u'er'}]
self.assertEqual(article.legacy_doctype, u'er')

Comment on lines +2804 to +2817
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

There’s no test covering legacy_doctype when v71 contains an invalid/unrecognized value (similar to the existing test_invalid_document_type / test_sps_doctype_invalid). Adding an explicit test for this case will help lock in the intended behavior (likely None).

Copilot uses AI. Check for mistakes.
def test_legacy_doctype_without_v71(self):
article = self.article
del(article.data['article']['v71'])
self.assertIsNone(article.legacy_doctype)

def test_legacy_doctype_article_type_self_mapped(self):
article = self.article
article.data['article']['v71'] = [{u'_': u'data-article'}]
self.assertEqual(article.legacy_doctype, u'data-article')
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This test locks in the behavior that legacy_doctype returns the input @article-type ('data-article') when there is no legacy mapping. That conflicts with the intent implied by the property name and docstring (returning a legacy v71 code). If the desired behavior is “legacy code or None when not available”, update this test accordingly and adjust legacy_doctype/DOCTOPIC so unmapped @article-type values yield None.

Suggested change
self.assertEqual(article.legacy_doctype, u'data-article')
self.assertIsNone(article.legacy_doctype)

Copilot uses AI. Check for mistakes.

def test_without_original_title(self):
article = self.article

Expand Down
93 changes: 88 additions & 5 deletions xylose/choices.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,26 +53,109 @@

article_types = {
'ab': 'abstract',
'an': 'news',
'an': 'announcement',
'ax': 'addendum',
'co': 'article-commentary',
'cr': 'case-report',
'ct': 'research-article',
'ed': 'editorial',
'er': 'correction',
'in': 'editorial',
'in': 'interview',
'le': 'letter',
'mt': 'research-article',
'mt': 'review-article',
'nd': 'undefined',
'oa': 'research-article',
'pr': 'press-release',
'pr': 'in-brief',
'pv': 'editorial',
're': 'retraction',
'rc': 'book-review',
'rn': 'brief-report',
'ra': 'review-article',
'sc': 'rapid-communication',
'tr': 'research-article',
'up': 'undefined'
'up': 'rapid-communication',
'zz': 'other',
'abstract': 'abstract',
'addendum': 'addendum',
'announcement': 'announcement',
'article-commentary': 'article-commentary',
'book-review': 'book-review',
'books-received': 'books-received',
'brief-report': 'brief-report',
'calendar': 'calendar',
'case-report': 'case-report',
'clinical-instruction': 'clinical-instruction',
'clinical-trial': 'clinical-trial',
'collection': 'collection',
'correction': 'correction',
'data-article': 'data-article',
'discussion': 'discussion',
'dissertation': 'dissertation',
'editorial': 'editorial',
'editorial-material': 'editorial-material',
'expression-of-concern': 'expression-of-concern',
'guideline': 'guideline',
'in-brief': 'in-brief',
'interview': 'interview',
'introduction': 'introduction',
'letter': 'letter',
'meeting-report': 'meeting-report',
'news': 'news',
'obituary': 'obituary',
'oration': 'oration',
'other': 'other',
'partial-retraction': 'partial-retraction',
'product-review': 'product-review',
'rapid-communication': 'rapid-communication',
'referee-report': 'referee-report',
'reply': 'reply',
'reprint': 'reprint',
'research-article': 'research-article',
'retraction': 'retraction',
'review-article': 'review-article',
'reviewer-report': 'reviewer-report',
'technical-report': 'technical-report',
'translation': 'translation',
}

DOCTOPIC = {
'research-article': 'oa',
'editorial': 'ed',
'abstract': 'ab',
'announcement': 'an',
'article-commentary': 'co',
'case-report': 'cr',
'letter': 'le',
'review-article': 'ra',
'rapid-communication': 'sc',
'addendum': 'addendum',
'book-review': 'rc',
'books-received': 'books-received',
'brief-report': 'rn',
'calendar': 'calendar',
'clinical-trial': 'oa',
'collection': 'zz',
'correction': 'er',
'discussion': 'discussion',
'dissertation': 'dissertation',
'editorial-material': 'ed',
'in-brief': 'pr',
'introduction': 'ed',
'meeting-report': 'meeting-report',
'news': 'news',
'obituary': 'obituary',
'oration': 'oration',
'partial-retraction': 'partial-retraction',
'product-review': 'product-review',
'reply': 'reply',
'reprint': 'reprint',
'retraction': 're',
'translation': 'translation',
'technical-report': 'oa',
'other': 'zz',
'guideline': 'guideline',
'interview': 'in',
'data-article': 'data-article',
Comment on lines +131 to +158
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In DOCTOPIC, values are meant to be legacy v71 codes (e.g. ax, rc, rn). However, several entries map to non-legacy strings (e.g. addendum: 'addendum' even though the legacy code already exists as ax). This makes legacy_doctype return values that are not actually legacy codes. Consider mapping only to real legacy codes (e.g. addendum -> ax) and omitting entries that have no legacy equivalent (so callers can get None).

Suggested change
'addendum': 'addendum',
'book-review': 'rc',
'books-received': 'books-received',
'brief-report': 'rn',
'calendar': 'calendar',
'clinical-trial': 'oa',
'collection': 'zz',
'correction': 'er',
'discussion': 'discussion',
'dissertation': 'dissertation',
'editorial-material': 'ed',
'in-brief': 'pr',
'introduction': 'ed',
'meeting-report': 'meeting-report',
'news': 'news',
'obituary': 'obituary',
'oration': 'oration',
'partial-retraction': 'partial-retraction',
'product-review': 'product-review',
'reply': 'reply',
'reprint': 'reprint',
'retraction': 're',
'translation': 'translation',
'technical-report': 'oa',
'other': 'zz',
'guideline': 'guideline',
'interview': 'in',
'data-article': 'data-article',
'addendum': 'ax',
'book-review': 'rc',
'brief-report': 'rn',
'clinical-trial': 'oa',
'collection': 'zz',
'correction': 'er',
'editorial-material': 'ed',
'in-brief': 'pr',
'introduction': 'ed',
'retraction': 're',
'technical-report': 'oa',
'other': 'zz',
'interview': 'in',

Copilot uses AI. Check for mistakes.
}

periodicity = {
Expand Down
31 changes: 31 additions & 0 deletions xylose/scielodocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -2234,6 +2234,37 @@ def document_type(self):

return choices.article_types['nd']

@property
def sps_doctype(self):
"""
This method retrieves the SPS @article-type of the given article.
Maps the v71 field value (legacy code or @article-type) to the
corresponding SPS @article-type value.
"""
if 'v71' in self.data['article']:
article_type_code = self.data['article']['v71'][0]['_']
if article_type_code in choices.article_types:
return choices.article_types[article_type_code]

return None

@property
def legacy_doctype(self):
"""
This method retrieves the legacy document type code of the given article.
Maps the v71 field value (legacy code or @article-type) to the
corresponding legacy code using the DOCTOPIC reverse mapping.
"""
if 'v71' in self.data['article']:
article_type_code = self.data['article']['v71'][0]['_']
# If the v71 value is an @article-type, look up the legacy code
if article_type_code in choices.DOCTOPIC:
return choices.DOCTOPIC[article_type_code]
# Otherwise return the value as-is (it may be a legacy code)
return article_type_code
Comment on lines +2263 to +2264
Copy link

Copilot AI Mar 11, 2026

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

legacy_doctype currently returns article_type_code as-is when it is not found in choices.DOCTOPIC. That means an invalid/unrecognized v71 value (e.g. 'invalid') will be returned instead of None, and some non-legacy @article-type strings can also be returned depending on DOCTOPIC contents. Consider returning None when v71 is neither a known legacy code nor mappable via DOCTOPIC (and, if needed, explicitly validate legacy codes against a whitelist such as the 2-letter keys in choices.article_types).

Suggested change
# Otherwise return the value as-is (it may be a legacy code)
return article_type_code
# If the v71 value is already a known legacy code, return it
if article_type_code in choices.article_types:
return article_type_code
# Unknown or invalid v71 value
return None

Copilot uses AI. Check for mistakes.

return None

def original_title(self, iso_format=None):
"""
This method retrieves just the title related with the original language
Expand Down
Loading