Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
106 changes: 76 additions & 30 deletions xylose/choices.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,10 @@
# encoding: utf-8

import requests
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry


CREATIVE_COMMONS_TEXTS = {
"BY": "Attribution",
"BY-ND": "Attribution-NoDerivatives",
Expand Down Expand Up @@ -97,36 +102,6 @@
u'Z': u'undefined'
}

collections = {
'scl': ['Brazil', 'www.scielo.br'],
'arg': ['Argentina', 'www.scielo.org.ar'],
'cub': ['Cuba', 'scielo.sld.cu'],
'esp': ['Spain', 'scielo.isciii.es'],
'col': ['Colombia', 'www.scielo.org.co'],
'sss': ['Social Sciences', 'socialsciences.scielo.org'],
'spa': ['Public Health', 'www.scielosp.org'],
'mex': ['Mexico', 'www.scielo.org.mx'],
'prt': ['Portugal', 'www.scielo.mec.pt'],
'cri': ['Costa Rica', 'www.scielo.sa.cr'],
'ven': ['Venezuela', 'www.scielo.org.ve'],
'ury': ['Uruguay', 'www.scielo.edu.uy'],
'per': ['Peru', 'www.scielo.org.pe'],
'chl': ['Chile', 'www.scielo.cl'],
'sza': ['South Africa', 'www.scielo.org.za'],
'bol': ['Bolivia', 'www.scielo.org.bo'],
'pry': ['Paraguay', 'scielo.iics.una.py'],
'psi': ['PEPSIC', 'pepsic.bvsalud.org'],
'ppg': ['PPEGEO', 'ppegeo.igc.usp.br'],
'rve': ['RevOdonto', 'revodonto.bvsalud.org'],
'edc': ['Educa', 'educa.fcc.org.br'],
'inv': [u'Inovação', 'inovacao.scielo.br'],
'cic': [u'Ciência e Cultura', 'cienciaecultura.bvs.br'],
'cci': [u'ComCiência', 'comciencia.scielo.br'],
'wid': ['West Indians', 'caribbean.scielo.org'],
'pro': ['Proceedings', 'www.proceedings.scielo.br'],
'ecu': ['Ecuador', 'scielo.senescyt.gob.ec'],
}

journal_status = {
'c': u'current',
'd': u'deceased',
Expand Down Expand Up @@ -216,3 +191,74 @@
u'diciembre': 12,
u'december': 12
}


def fetch_collection_metadata(collection_code):
"""
Fetch metadata for a given SciELO collection code from the ArticleMeta API.

Args:
collection_code (str): SciELO collection code (e.g., 'ven', 'scl').

Returns:
dict or None: JSON metadata if successful, None otherwise.
"""
if not isinstance(collection_code, str) or not collection_code.strip():
return None

base_url = "https://articlemeta.scielo.org"
endpoint = f"/api/v1/collection/?code={collection_code}"
full_url = f"{base_url}{endpoint}"

session = requests.Session()
retry_policy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[500, 502, 503, 504],
allowed_methods=["GET"]
)
adapter = HTTPAdapter(max_retries=retry_policy)
session.mount(base_url, adapter)

try:
response = session.get(full_url, timeout=10)
Copy link

Copilot AI Nov 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The function creates a new session with retry configuration on every call. Consider implementing a module-level session or caching mechanism to avoid the overhead of creating a new session for each collection lookup, especially if multiple collections are queried in succession.

Copilot uses AI. Check for mistakes.
response.raise_for_status()
return response.json()
except (requests.RequestException, ValueError):
return None
Comment on lines +227 to +228
Copy link

Copilot AI Nov 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Error handling silently suppresses all exceptions without logging. Consider logging failures to help diagnose API connectivity issues or data parsing problems in production environments.

Copilot uses AI. Check for mistakes.


def get_collection_name_and_url(metadata):
"""
Extract the collection's original name and display URL from metadata.

Args:
metadata (dict): Metadata dictionary returned by the API.

Returns:
list[str]: [original_name, formatted_url] or an empty list on failure.
"""
if not isinstance(metadata, dict):
return []

try:
name = metadata["original_name"]
acron2 = metadata["acron2"]
url = f"www.scielo.org.{acron2}"
return [name, url]
except KeyError:
return []


def get_collection_summary(collection_code):
"""
Retrieve a summarized list with the collection's name and display URL.

Args:
collection_code (str): SciELO collection code (e.g., 'ven', 'scl').

Returns:
list[str]: [original_name, formatted_url] or an empty list if retrieval fails.
"""
metadata = fetch_collection_metadata(collection_code)
return get_collection_name_and_url(metadata) if metadata else []
Comment on lines +263 to +264
Copy link

Copilot AI Nov 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Consider adding caching for collection metadata to avoid repeated API calls for the same collection code. The static dictionary was effectively a cache; removing it without replacement could significantly impact performance if the same collection is queried multiple times.

Copilot uses AI. Check for mistakes.
25 changes: 9 additions & 16 deletions xylose/scielodocument.py
Original file line number Diff line number Diff line change
Expand Up @@ -252,10 +252,8 @@ def scielo_domain(self):
"""

if self.collection_acronym:
return choices.collections.get(
self.collection_acronym,
[u'Undefined: %s' % self.collection_acronym, None]
)[1] or None
summary = choices.get_collection_summary(self.collection_acronym)
return summary[1] if len(summary) > 1 and summary[1] else None

if 'v690' in self.data['title']:
return self.data['title']['v690'][0]['_'].replace('http://', '')
Expand Down Expand Up @@ -1072,10 +1070,8 @@ def scielo_domain(self):
"""

if self.collection_acronym:
return choices.collections.get(
self.collection_acronym,
[u'Undefined: %s' % self.collection_acronym, None]
)[1] or None
summary = choices.get_collection_summary(self.collection_acronym)
return summary[1] if len(summary) > 1 and summary[1] else None

if 'v690' in self.data:
return self.data['v690'][0]['_'].replace('http://', '')
Expand Down Expand Up @@ -1803,10 +1799,9 @@ def collection_name(self):
This method retrieves the collection name of the given article,
if it exists.
"""
return choices.collections.get(
self.collection_acronym,
[u'Undefined: %s' % self.collection_acronym, '']
)[0]
if self.collection_acronym:
summary = choices.get_collection_summary(self.collection_acronym)
return summary[0] if len(summary) > 0 and summary[0] else f'Undefined: {self.collection_acronym}'
Copy link

Copilot AI Nov 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Missing return statement when self.collection_acronym is None or falsy. The original code always returned a value, but this implementation returns None implicitly when the condition fails. Add an explicit return None or handle the else case.

Suggested change
return summary[0] if len(summary) > 0 and summary[0] else f'Undefined: {self.collection_acronym}'
return summary[0] if len(summary) > 0 and summary[0] else f'Undefined: {self.collection_acronym}'
return None

Copilot uses AI. Check for mistakes.

@property
def collection_acronym(self):
Expand Down Expand Up @@ -2565,10 +2560,8 @@ def scielo_domain(self):
"""

if self.collection_acronym:
return choices.collections.get(
self.collection_acronym,
[u'Undefined: %s' % self.collection_acronym, None]
)[1] or None
summary = choices.get_collection_summary(self.collection_acronym)
return summary[0] if len(summary) > 0 and summary[0] else f'Undefined: {self.collection_acronym}'
Copy link

Copilot AI Nov 1, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This scielo_domain method should return summary[1] (the URL) instead of summary[0] (the name). According to the method's docstring and similar implementations at lines 256 and 1074, it should retrieve the collection domain, not the name.

Suggested change
return summary[0] if len(summary) > 0 and summary[0] else f'Undefined: {self.collection_acronym}'
return summary[1] if len(summary) > 1 and summary[1] else f'Undefined: {self.collection_acronym}'

Copilot uses AI. Check for mistakes.

if 'v690' in self.data['title']:
return self.data['title']['v690'][0]['_'].replace('http://', '')
Expand Down