From a007b1bdae507ed2fe53f6fa907dc10ed04a5cb9 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 20:31:45 +0000 Subject: [PATCH 1/8] Initial plan From fc2edc724649e58308ef4e6c675847e23ffe4244 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 20:36:57 +0000 Subject: [PATCH 2/8] Update article_types with @article-type values from SciELO Publishing Schema Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- tests/test_document.py | 13 +++++++++++++ xylose/choices.py | 20 +++++++++++++++++++- 2 files changed, 32 insertions(+), 1 deletion(-) diff --git a/tests/test_document.py b/tests/test_document.py index 5c92bfe..4e7e273 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -2743,6 +2743,19 @@ def test_invalid_document_type(self): article.data['article']['v71'] = [{u'_': u'invalid'}] self.assertEqual(article.document_type, u'undefined') + def test_document_type_from_article_type_attribute(self): + article = self.article + + for article_type in [ + 'addendum', 'article-commentary', 'book-review', 'brief-report', + 'case-report', 'correction', 'data-article', 'editorial', + 'in-brief', 'letter', 'other', 'partial-retraction', + 'rapid-communication', 'referee-report', 'reply', + 'research-article', 'retraction', 'review-article', + ]: + article.data['article']['v71'] = [{u'_': article_type}] + self.assertEqual(article.document_type, article_type) + def test_without_original_title(self): article = self.article diff --git a/xylose/choices.py b/xylose/choices.py index a78215b..1cc3f41 100644 --- a/xylose/choices.py +++ b/xylose/choices.py @@ -72,7 +72,25 @@ 'ra': 'review-article', 'sc': 'rapid-communication', 'tr': 'research-article', - 'up': 'undefined' + 'up': 'undefined', + 'addendum': 'addendum', + 'article-commentary': 'article-commentary', + 'book-review': 'book-review', + 'brief-report': 'brief-report', + 'case-report': 'case-report', + 'correction': 'correction', + 'data-article': 'data-article', + 'editorial': 'editorial', + 'in-brief': 'in-brief', + 'letter': 'letter', + 'other': 'other', + 'partial-retraction': 'partial-retraction', + 'rapid-communication': 'rapid-communication', + 'referee-report': 'referee-report', + 'reply': 'reply', + 'research-article': 'research-article', + 'retraction': 'retraction', + 'review-article': 'review-article', } periodicity = { From fb7eda0ccec37861a3f42f45bfa5afa4afaf081b Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 21:31:10 +0000 Subject: [PATCH 3/8] Fix legacy code mappings and add missing @article-type values per reviewer feedback Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- tests/test_document.py | 10 ++++++---- xylose/choices.py | 12 +++++++++--- 2 files changed, 15 insertions(+), 7 deletions(-) diff --git a/tests/test_document.py b/tests/test_document.py index 4e7e273..08d9c49 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -2748,10 +2748,12 @@ def test_document_type_from_article_type_attribute(self): for article_type in [ 'addendum', 'article-commentary', 'book-review', 'brief-report', - 'case-report', 'correction', 'data-article', 'editorial', - 'in-brief', 'letter', 'other', 'partial-retraction', - 'rapid-communication', 'referee-report', 'reply', - 'research-article', 'retraction', 'review-article', + 'case-report', 'clinical-instruction', 'correction', + 'data-article', 'discussion', 'editorial', + 'expression-of-concern', 'in-brief', 'letter', 'obituary', + 'oration', 'other', 'partial-retraction', 'rapid-communication', + 'referee-report', 'reply', 'research-article', 'retraction', + 'review-article', 'reviewer-report', ]: article.data['article']['v71'] = [{u'_': article_type}] self.assertEqual(article.document_type, article_type) diff --git a/xylose/choices.py b/xylose/choices.py index 1cc3f41..0dfe45f 100644 --- a/xylose/choices.py +++ b/xylose/choices.py @@ -60,9 +60,9 @@ 'ct': 'research-article', 'ed': 'editorial', 'er': 'correction', - 'in': 'editorial', + 'in': 'other', 'le': 'letter', - 'mt': 'research-article', + 'mt': 'review-article', 'nd': 'undefined', 'oa': 'research-article', 'pr': 'press-release', @@ -70,7 +70,7 @@ 'rc': 'book-review', 'rn': 'brief-report', 'ra': 'review-article', - 'sc': 'rapid-communication', + 'sc': 'brief-report', 'tr': 'research-article', 'up': 'undefined', 'addendum': 'addendum', @@ -78,11 +78,16 @@ 'book-review': 'book-review', 'brief-report': 'brief-report', 'case-report': 'case-report', + 'clinical-instruction': 'clinical-instruction', 'correction': 'correction', 'data-article': 'data-article', + 'discussion': 'discussion', 'editorial': 'editorial', + 'expression-of-concern': 'expression-of-concern', 'in-brief': 'in-brief', 'letter': 'letter', + 'obituary': 'obituary', + 'oration': 'oration', 'other': 'other', 'partial-retraction': 'partial-retraction', 'rapid-communication': 'rapid-communication', @@ -91,6 +96,7 @@ 'research-article': 'research-article', 'retraction': 'retraction', 'review-article': 'review-article', + 'reviewer-report': 'reviewer-report', } periodicity = { From 776740c9b3659394204ef175969fe20efb6f8c8f Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 21:51:24 +0000 Subject: [PATCH 4/8] =?UTF-8?q?Fix=20legacy=20code=20mappings:=20in=20?= =?UTF-8?q?=E2=86=92=20interview,=20mt=20=E2=86=92=20methodology?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- xylose/choices.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xylose/choices.py b/xylose/choices.py index 0dfe45f..9e3bc4c 100644 --- a/xylose/choices.py +++ b/xylose/choices.py @@ -60,9 +60,9 @@ 'ct': 'research-article', 'ed': 'editorial', 'er': 'correction', - 'in': 'other', + 'in': 'interview', 'le': 'letter', - 'mt': 'review-article', + 'mt': 'methodology', 'nd': 'undefined', 'oa': 'research-article', 'pr': 'press-release', From f556ec4ca059c1c1cc51352d4a316311fc414b0a Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 21:54:09 +0000 Subject: [PATCH 5/8] =?UTF-8?q?Fix=20legacy=20code=20mapping:=20up=20?= =?UTF-8?q?=E2=86=92=20rapid-communication?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- xylose/choices.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/xylose/choices.py b/xylose/choices.py index 9e3bc4c..a440a21 100644 --- a/xylose/choices.py +++ b/xylose/choices.py @@ -72,7 +72,7 @@ 'ra': 'review-article', 'sc': 'brief-report', 'tr': 'research-article', - 'up': 'undefined', + 'up': 'rapid-communication', 'addendum': 'addendum', 'article-commentary': 'article-commentary', 'book-review': 'book-review', From 3135883184c6e0a1da59c8b9092c63e9b30d2f20 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Tue, 10 Mar 2026 21:56:42 +0000 Subject: [PATCH 6/8] =?UTF-8?q?Fix=20legacy=20mappings:=20in=20=E2=86=92?= =?UTF-8?q?=20other,=20mt=20=E2=86=92=20review-article?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- xylose/choices.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/xylose/choices.py b/xylose/choices.py index a440a21..761eb97 100644 --- a/xylose/choices.py +++ b/xylose/choices.py @@ -60,9 +60,9 @@ 'ct': 'research-article', 'ed': 'editorial', 'er': 'correction', - 'in': 'interview', + 'in': 'other', 'le': 'letter', - 'mt': 'methodology', + 'mt': 'review-article', 'nd': 'undefined', 'oa': 'research-article', 'pr': 'press-release', From 0d3fba8c0a10b8d1b6e34ae9b4e4ced2fa221fb5 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Mar 2026 16:23:05 +0000 Subject: [PATCH 7/8] Refactor article_types based on DOCTOPIC, add sps_doctype and legacy_doctype properties Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- tests/test_document.py | 81 ++++++++++++++++++++++++++++++++++++---- xylose/choices.py | 67 +++++++++++++++++++++++++++++++-- xylose/scielodocument.py | 32 ++++++++++++++++ 3 files changed, 169 insertions(+), 11 deletions(-) diff --git a/tests/test_document.py b/tests/test_document.py index 08d9c49..da9f9a0 100644 --- a/tests/test_document.py +++ b/tests/test_document.py @@ -2747,17 +2747,84 @@ def test_document_type_from_article_type_attribute(self): article = self.article for article_type in [ - 'addendum', 'article-commentary', 'book-review', 'brief-report', - 'case-report', 'clinical-instruction', 'correction', - 'data-article', 'discussion', 'editorial', - 'expression-of-concern', 'in-brief', 'letter', 'obituary', - 'oration', 'other', 'partial-retraction', 'rapid-communication', - 'referee-report', 'reply', 'research-article', 'retraction', - 'review-article', 'reviewer-report', + 'abstract', 'addendum', 'announcement', 'article-commentary', + 'book-review', 'books-received', 'brief-report', 'calendar', + 'case-report', 'clinical-instruction', 'clinical-trial', + 'collection', 'correction', 'data-article', 'discussion', + 'dissertation', 'editorial', 'editorial-material', + 'expression-of-concern', 'guideline', 'in-brief', 'interview', + 'introduction', 'letter', 'meeting-report', 'news', 'obituary', + 'oration', 'other', 'partial-retraction', 'product-review', + 'rapid-communication', 'referee-report', 'reply', 'reprint', + 'research-article', 'retraction', 'review-article', + 'reviewer-report', 'technical-report', 'translation', ]: article.data['article']['v71'] = [{u'_': article_type}] self.assertEqual(article.document_type, article_type) + def test_document_type_from_legacy_v71_values(self): + article = self.article + + legacy_mappings = { + u'an': u'announcement', + u'in': u'interview', + u'pr': u'in-brief', + u'sc': u'rapid-communication', + u're': u'retraction', + } + + for legacy_value, expected_type in legacy_mappings.items(): + article.data['article']['v71'] = [{u'_': legacy_value}] + self.assertEqual(article.document_type, expected_type) + + def test_sps_doctype(self): + article = self.article + self.assertEqual(article.sps_doctype, u'research-article') + + def test_sps_doctype_from_legacy_code(self): + article = self.article + article.data['article']['v71'] = [{u'_': u'er'}] + self.assertEqual(article.sps_doctype, u'correction') + + def test_sps_doctype_from_article_type(self): + article = self.article + article.data['article']['v71'] = [{u'_': u'retraction'}] + self.assertEqual(article.sps_doctype, u'retraction') + + def test_sps_doctype_without_v71(self): + article = self.article + del(article.data['article']['v71']) + self.assertIsNone(article.sps_doctype) + + def test_sps_doctype_invalid(self): + article = self.article + article.data['article']['v71'] = [{u'_': u'invalid'}] + self.assertIsNone(article.sps_doctype) + + def test_legacy_doctype(self): + article = self.article + self.assertEqual(article.legacy_doctype, u'oa') + + def test_legacy_doctype_from_article_type(self): + article = self.article + article.data['article']['v71'] = [{u'_': u'retraction'}] + self.assertEqual(article.legacy_doctype, u're') + + def test_legacy_doctype_from_legacy_code(self): + article = self.article + article.data['article']['v71'] = [{u'_': u'er'}] + self.assertEqual(article.legacy_doctype, u'er') + + def test_legacy_doctype_without_v71(self): + article = self.article + del(article.data['article']['v71']) + self.assertIsNone(article.legacy_doctype) + + def test_legacy_doctype_article_type_self_mapped(self): + article = self.article + article.data['article']['v71'] = [{u'_': u'data-article'}] + self.assertEqual(article.legacy_doctype, u'data-article') + def test_without_original_title(self): article = self.article diff --git a/xylose/choices.py b/xylose/choices.py index 761eb97..ad25e2c 100644 --- a/xylose/choices.py +++ b/xylose/choices.py @@ -53,50 +53,109 @@ article_types = { 'ab': 'abstract', - 'an': 'news', + 'an': 'announcement', 'ax': 'addendum', 'co': 'article-commentary', 'cr': 'case-report', 'ct': 'research-article', 'ed': 'editorial', 'er': 'correction', - 'in': 'other', + 'in': 'interview', 'le': 'letter', 'mt': 'review-article', 'nd': 'undefined', 'oa': 'research-article', - 'pr': 'press-release', + 'pr': 'in-brief', 'pv': 'editorial', + 're': 'retraction', 'rc': 'book-review', 'rn': 'brief-report', 'ra': 'review-article', - 'sc': 'brief-report', + 'sc': 'rapid-communication', 'tr': 'research-article', 'up': 'rapid-communication', + 'zz': 'other', + 'abstract': 'abstract', 'addendum': 'addendum', + 'announcement': 'announcement', 'article-commentary': 'article-commentary', 'book-review': 'book-review', + 'books-received': 'books-received', 'brief-report': 'brief-report', + 'calendar': 'calendar', 'case-report': 'case-report', 'clinical-instruction': 'clinical-instruction', + 'clinical-trial': 'clinical-trial', + 'collection': 'collection', 'correction': 'correction', 'data-article': 'data-article', 'discussion': 'discussion', + 'dissertation': 'dissertation', 'editorial': 'editorial', + 'editorial-material': 'editorial-material', 'expression-of-concern': 'expression-of-concern', + 'guideline': 'guideline', 'in-brief': 'in-brief', + 'interview': 'interview', + 'introduction': 'introduction', 'letter': 'letter', + 'meeting-report': 'meeting-report', + 'news': 'news', 'obituary': 'obituary', 'oration': 'oration', 'other': 'other', 'partial-retraction': 'partial-retraction', + 'product-review': 'product-review', 'rapid-communication': 'rapid-communication', 'referee-report': 'referee-report', 'reply': 'reply', + 'reprint': 'reprint', 'research-article': 'research-article', 'retraction': 'retraction', 'review-article': 'review-article', 'reviewer-report': 'reviewer-report', + 'technical-report': 'technical-report', + 'translation': 'translation', +} + +DOCTOPIC = { + 'research-article': 'oa', + 'editorial': 'ed', + 'abstract': 'ab', + 'announcement': 'an', + 'article-commentary': 'co', + 'case-report': 'cr', + 'letter': 'le', + 'review-article': 'ra', + 'rapid-communication': 'sc', + 'addendum': 'addendum', + 'book-review': 'rc', + 'books-received': 'books-received', + 'brief-report': 'rn', + 'calendar': 'calendar', + 'clinical-trial': 'oa', + 'collection': 'zz', + 'correction': 'er', + 'discussion': 'discussion', + 'dissertation': 'dissertation', + 'editorial-material': 'ed', + 'in-brief': 'pr', + 'introduction': 'ed', + 'meeting-report': 'meeting-report', + 'news': 'news', + 'obituary': 'obituary', + 'oration': 'oration', + 'partial-retraction': 'partial-retraction', + 'product-review': 'product-review', + 'reply': 'reply', + 'reprint': 'reprint', + 'retraction': 're', + 'translation': 'translation', + 'technical-report': 'oa', + 'other': 'zz', + 'guideline': 'guideline', + 'interview': 'in', + 'data-article': 'data-article', } periodicity = { diff --git a/xylose/scielodocument.py b/xylose/scielodocument.py index be1fcd2..e105f3e 100644 --- a/xylose/scielodocument.py +++ b/xylose/scielodocument.py @@ -2234,6 +2234,38 @@ def document_type(self): return choices.article_types['nd'] + @property + def sps_doctype(self): + """ + This method retrieves the SPS @article-type of the given article. + Maps the v71 field value (legacy code or @article-type) to the + corresponding SPS @article-type value. + """ + if 'v71' in self.data['article']: + article_type_code = self.data['article']['v71'][0]['_'] + if article_type_code in choices.article_types: + return choices.article_types[article_type_code] + + return None + + @property + def legacy_doctype(self): + """ + This method retrieves the legacy document type code of the given article. + Maps the v71 field value (legacy code or @article-type) to the + corresponding legacy code using the DOCTOPIC reverse mapping. + """ + if 'v71' in self.data['article']: + article_type_code = self.data['article']['v71'][0]['_'] + # If the v71 value is already a legacy code, return it directly + if len(article_type_code) == 2: + return article_type_code + # If the v71 value is an @article-type, look up the legacy code + if article_type_code in choices.DOCTOPIC: + return choices.DOCTOPIC[article_type_code] + + return None + def original_title(self, iso_format=None): """ This method retrieves just the title related with the original language From 6f0909bf877262b9eb0607b6c1379833625d9551 Mon Sep 17 00:00:00 2001 From: "copilot-swe-agent[bot]" <198982749+Copilot@users.noreply.github.com> Date: Wed, 11 Mar 2026 16:24:35 +0000 Subject: [PATCH 8/8] Fix legacy_doctype to use DOCTOPIC lookup instead of len==2 check Co-authored-by: robertatakenaka <505143+robertatakenaka@users.noreply.github.com> --- xylose/scielodocument.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/xylose/scielodocument.py b/xylose/scielodocument.py index e105f3e..9a7fa22 100644 --- a/xylose/scielodocument.py +++ b/xylose/scielodocument.py @@ -2257,12 +2257,11 @@ def legacy_doctype(self): """ if 'v71' in self.data['article']: article_type_code = self.data['article']['v71'][0]['_'] - # If the v71 value is already a legacy code, return it directly - if len(article_type_code) == 2: - return article_type_code # If the v71 value is an @article-type, look up the legacy code if article_type_code in choices.DOCTOPIC: return choices.DOCTOPIC[article_type_code] + # Otherwise return the value as-is (it may be a legacy code) + return article_type_code return None