-
Notifications
You must be signed in to change notification settings - Fork 13
Refactor: collection summary api #199
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
477e412
7f16f14
ca11ad8
a58727b
6aa56c4
8f00c29
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,10 @@ | ||
| # encoding: utf-8 | ||
|
|
||
| import requests | ||
| from requests.adapters import HTTPAdapter | ||
| from urllib3.util.retry import Retry | ||
|
|
||
|
|
||
| CREATIVE_COMMONS_TEXTS = { | ||
| "BY": "Attribution", | ||
| "BY-ND": "Attribution-NoDerivatives", | ||
|
|
@@ -97,36 +102,6 @@ | |
| u'Z': u'undefined' | ||
| } | ||
|
|
||
| collections = { | ||
| 'scl': ['Brazil', 'www.scielo.br'], | ||
| 'arg': ['Argentina', 'www.scielo.org.ar'], | ||
| 'cub': ['Cuba', 'scielo.sld.cu'], | ||
| 'esp': ['Spain', 'scielo.isciii.es'], | ||
| 'col': ['Colombia', 'www.scielo.org.co'], | ||
| 'sss': ['Social Sciences', 'socialsciences.scielo.org'], | ||
| 'spa': ['Public Health', 'www.scielosp.org'], | ||
| 'mex': ['Mexico', 'www.scielo.org.mx'], | ||
| 'prt': ['Portugal', 'www.scielo.mec.pt'], | ||
| 'cri': ['Costa Rica', 'www.scielo.sa.cr'], | ||
| 'ven': ['Venezuela', 'www.scielo.org.ve'], | ||
| 'ury': ['Uruguay', 'www.scielo.edu.uy'], | ||
| 'per': ['Peru', 'www.scielo.org.pe'], | ||
| 'chl': ['Chile', 'www.scielo.cl'], | ||
| 'sza': ['South Africa', 'www.scielo.org.za'], | ||
| 'bol': ['Bolivia', 'www.scielo.org.bo'], | ||
| 'pry': ['Paraguay', 'scielo.iics.una.py'], | ||
| 'psi': ['PEPSIC', 'pepsic.bvsalud.org'], | ||
| 'ppg': ['PPEGEO', 'ppegeo.igc.usp.br'], | ||
| 'rve': ['RevOdonto', 'revodonto.bvsalud.org'], | ||
| 'edc': ['Educa', 'educa.fcc.org.br'], | ||
| 'inv': [u'Inovação', 'inovacao.scielo.br'], | ||
| 'cic': [u'Ciência e Cultura', 'cienciaecultura.bvs.br'], | ||
| 'cci': [u'ComCiência', 'comciencia.scielo.br'], | ||
| 'wid': ['West Indians', 'caribbean.scielo.org'], | ||
| 'pro': ['Proceedings', 'www.proceedings.scielo.br'], | ||
| 'ecu': ['Ecuador', 'scielo.senescyt.gob.ec'], | ||
| } | ||
|
|
||
| journal_status = { | ||
| 'c': u'current', | ||
| 'd': u'deceased', | ||
|
|
@@ -216,3 +191,74 @@ | |
| u'diciembre': 12, | ||
| u'december': 12 | ||
| } | ||
|
|
||
|
|
||
| def fetch_collection_metadata(collection_code): | ||
| """ | ||
| Fetch metadata for a given SciELO collection code from the ArticleMeta API. | ||
|
|
||
| Args: | ||
| collection_code (str): SciELO collection code (e.g., 'ven', 'scl'). | ||
|
|
||
| Returns: | ||
| dict or None: JSON metadata if successful, None otherwise. | ||
| """ | ||
| if not isinstance(collection_code, str) or not collection_code.strip(): | ||
| return None | ||
|
|
||
| base_url = "https://articlemeta.scielo.org" | ||
| endpoint = f"/api/v1/collection/?code={collection_code}" | ||
| full_url = f"{base_url}{endpoint}" | ||
|
|
||
| session = requests.Session() | ||
| retry_policy = Retry( | ||
| total=3, | ||
| backoff_factor=1, | ||
| status_forcelist=[500, 502, 503, 504], | ||
| allowed_methods=["GET"] | ||
| ) | ||
| adapter = HTTPAdapter(max_retries=retry_policy) | ||
| session.mount(base_url, adapter) | ||
|
|
||
| try: | ||
| response = session.get(full_url, timeout=10) | ||
| response.raise_for_status() | ||
| return response.json() | ||
| except (requests.RequestException, ValueError): | ||
| return None | ||
|
Comment on lines
+227
to
+228
|
||
|
|
||
|
|
||
| def get_collection_name_and_url(metadata): | ||
| """ | ||
| Extract the collection's original name and display URL from metadata. | ||
|
|
||
| Args: | ||
| metadata (dict): Metadata dictionary returned by the API. | ||
|
|
||
| Returns: | ||
| list[str]: [original_name, formatted_url] or an empty list on failure. | ||
| """ | ||
| if not isinstance(metadata, dict): | ||
| return [] | ||
|
|
||
| try: | ||
| name = metadata["original_name"] | ||
| acron2 = metadata["acron2"] | ||
| url = f"www.scielo.org.{acron2}" | ||
| return [name, url] | ||
| except KeyError: | ||
| return [] | ||
|
|
||
|
|
||
| def get_collection_summary(collection_code): | ||
| """ | ||
| Retrieve a summarized list with the collection's name and display URL. | ||
|
|
||
| Args: | ||
| collection_code (str): SciELO collection code (e.g., 'ven', 'scl'). | ||
|
|
||
| Returns: | ||
| list[str]: [original_name, formatted_url] or an empty list if retrieval fails. | ||
| """ | ||
| metadata = fetch_collection_metadata(collection_code) | ||
| return get_collection_name_and_url(metadata) if metadata else [] | ||
|
Comment on lines
+263
to
+264
|
||
| Original file line number | Diff line number | Diff line change | ||||||
|---|---|---|---|---|---|---|---|---|
|
|
@@ -252,10 +252,8 @@ def scielo_domain(self): | |||||||
| """ | ||||||||
|
|
||||||||
| if self.collection_acronym: | ||||||||
| return choices.collections.get( | ||||||||
| self.collection_acronym, | ||||||||
| [u'Undefined: %s' % self.collection_acronym, None] | ||||||||
| )[1] or None | ||||||||
| summary = choices.get_collection_summary(self.collection_acronym) | ||||||||
| return summary[1] if len(summary) > 1 and summary[1] else None | ||||||||
|
|
||||||||
| if 'v690' in self.data['title']: | ||||||||
| return self.data['title']['v690'][0]['_'].replace('http://', '') | ||||||||
|
|
@@ -1072,10 +1070,8 @@ def scielo_domain(self): | |||||||
| """ | ||||||||
|
|
||||||||
| if self.collection_acronym: | ||||||||
| return choices.collections.get( | ||||||||
| self.collection_acronym, | ||||||||
| [u'Undefined: %s' % self.collection_acronym, None] | ||||||||
| )[1] or None | ||||||||
| summary = choices.get_collection_summary(self.collection_acronym) | ||||||||
| return summary[1] if len(summary) > 1 and summary[1] else None | ||||||||
|
|
||||||||
| if 'v690' in self.data: | ||||||||
| return self.data['v690'][0]['_'].replace('http://', '') | ||||||||
|
|
@@ -1803,10 +1799,9 @@ def collection_name(self): | |||||||
| This method retrieves the collection name of the given article, | ||||||||
| if it exists. | ||||||||
| """ | ||||||||
| return choices.collections.get( | ||||||||
| self.collection_acronym, | ||||||||
| [u'Undefined: %s' % self.collection_acronym, ''] | ||||||||
| )[0] | ||||||||
| if self.collection_acronym: | ||||||||
| summary = choices.get_collection_summary(self.collection_acronym) | ||||||||
| return summary[0] if len(summary) > 0 and summary[0] else f'Undefined: {self.collection_acronym}' | ||||||||
|
||||||||
| return summary[0] if len(summary) > 0 and summary[0] else f'Undefined: {self.collection_acronym}' | |
| return summary[0] if len(summary) > 0 and summary[0] else f'Undefined: {self.collection_acronym}' | |
| return None |
Copilot
AI
Nov 1, 2025
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This scielo_domain method should return summary[1] (the URL) instead of summary[0] (the name). According to the method's docstring and similar implementations at lines 256 and 1074, it should retrieve the collection domain, not the name.
| return summary[0] if len(summary) > 0 and summary[0] else f'Undefined: {self.collection_acronym}' | |
| return summary[1] if len(summary) > 1 and summary[1] else f'Undefined: {self.collection_acronym}' |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
The function creates a new session with retry configuration on every call. Consider implementing a module-level session or caching mechanism to avoid the overhead of creating a new session for each collection lookup, especially if multiple collections are queried in succession.