diff --git a/CHANGES.md b/CHANGES.md index 8fbe9d9..ef4a9bf 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,5 +1,12 @@ # Changelog +## [5.3.0] - 26 June 2025 +- update format expected for jlist.csv dates + +## [5.3a0] - 21 May 2025 +- add API to get name from ORCID +- add ORCID Person + ## [5.2] - 16 May 2025 - add search by author_ids diff --git a/pub/tools/__init__.py b/pub/tools/__init__.py index 766e493..23bfda7 100644 --- a/pub/tools/__init__.py +++ b/pub/tools/__init__.py @@ -2,4 +2,4 @@ __all__ = [journals] -__version__ = "5.2" +__version__ = "5.3.0" diff --git a/pub/tools/entrez.py b/pub/tools/entrez.py index 58d6529..e8ceb6b 100644 --- a/pub/tools/entrez.py +++ b/pub/tools/entrez.py @@ -13,6 +13,7 @@ from .schema import Abstract from .schema import BookRecord from .schema import ChapterRecord +from .schema import EntrezRecord from .schema import Grant from .schema import JournalRecord from .schema import Person @@ -173,6 +174,7 @@ def _parse_author_name(author: dict, investigator: bool = False) -> Person: # strip excess spaces like in # https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=22606070&retmode=xml fname = " ".join([part for part in fname.split(" ") if part]) + identifiers = {source.attributes.get("Source", ""): str(source) for source in author.get("Identifier", [])} return Person( last_name=author.get("LastName", ""), first_name=fname, @@ -180,6 +182,7 @@ def _parse_author_name(author: dict, investigator: bool = False) -> Person: collective_name=author.get("CollectiveName", ""), suffix=author.get("Suffix", ""), investigator=investigator, + identifiers=identifiers, affiliations=author.get("affiliations", []), ) @@ -583,7 +586,7 @@ def find_publications( affl=None, doi="", inclusive=False, -): +) -> list[EntrezRecord]: """ You can use the resulting WebEnv and QueryKey values to call get_searched_publications https://www.ncbi.nlm.nih.gov/books/NBK3827/#_pubmedhelp_Search_Field_Descriptions_and_ diff --git a/pub/tools/journals.py b/pub/tools/journals.py index 81ef1b9..6c52dc8 100644 --- a/pub/tools/journals.py +++ b/pub/tools/journals.py @@ -79,8 +79,8 @@ def _parse_journals(text): deposit, url, ) = row - latest = latest.split(";")[-1] - earliest = earliest.split(";")[-1] + latest = latest.split(" ")[-1] + earliest = earliest.split(" ")[-1] _atoj[abbr.lower()] = title _jtoa[title.lower()] = abbr dates[abbr.lower()] = (earliest, latest) diff --git a/pub/tools/orcid.py b/pub/tools/orcid.py new file mode 100644 index 0000000..6de93cd --- /dev/null +++ b/pub/tools/orcid.py @@ -0,0 +1,17 @@ +import requests + +PUBLIC_API = "https://pub.orcid.org/v3.0/" + + +def get_author(orcid: str, full: bool = False) -> dict: + response = requests.get(f"{PUBLIC_API}{orcid}", headers={"Accept": "application/json"}, timeout=2.0) + if response.status_code != 200: + raise requests.exceptions.HTTPError(f"REST API returned: {response.status_code}") + + data = response.json() + if full: + return data + given_name = data["person"].get("name", {}).get("given-names", {}).get("value") + family_name = data["person"].get("name", {}).get("family-name", {}).get("value") + + return {"given_name": given_name, "family_name": family_name} diff --git a/pub/tools/schema.py b/pub/tools/schema.py index 04c6417..0af1061 100644 --- a/pub/tools/schema.py +++ b/pub/tools/schema.py @@ -16,6 +16,7 @@ class Person: suffix: str = "" investigator: bool = False affiliations: list[str] = dataclasses.field(default_factory=list) + identifiers: dict[str, str] = dataclasses.field(default_factory=dict) def asdict(self): base = dataclasses.asdict(self) @@ -24,6 +25,7 @@ def asdict(self): "fname": self.fname, "cname": self.cname, "iname": self.iname, + "orcid": self.orcid, }) return base @@ -47,6 +49,11 @@ def cname(self): """backwards compatibility""" return self.collective_name + @property + def orcid(self): + """Derivative from identifiers""" + return self.identifiers.get("ORCID", "") + @dataclasses.dataclass class Abstract: diff --git a/requirements.txt b/requirements.txt deleted file mode 100644 index 1f7fe59..0000000 --- a/requirements.txt +++ /dev/null @@ -1,4 +0,0 @@ -biopython -unidecode -lxml -requests \ No newline at end of file diff --git a/tests/test_entrez.py b/tests/test_entrez.py index 28097c5..9dddbf7 100644 --- a/tests/test_entrez.py +++ b/tests/test_entrez.py @@ -4,6 +4,7 @@ from pub.tools import citations from pub.tools import entrez +from pub.tools import orcid from pub.tools.schema import Abstract from pub.tools.schema import Grant from pub.tools.schema import JournalRecord @@ -447,6 +448,20 @@ def test_find_and_fetch(self): record = entrez.get_searched_publications(record["WebEnv"], record["QueryKey"]) self.check_pub_data(record[0]) - def test_orcid(self): + def test_orcid_search(self): record = entrez.find_publications(author_ids=["0000-0002-8953-3940"]) assert int(record["Count"]) > 0 + + def test_pubmed_orcid_author(self): + record = entrez.get_publication(pmid="32570285") + assert record.authors[0].orcid == "0000-0002-1771-9287" + assert record.authors[0].asdict()["orcid"] == "0000-0002-1771-9287" + + def test_full_identifiers(self): + record = entrez.get_publication(pmid="32570285") + assert record.authors[0].identifiers == {"ORCID": "0000-0002-1771-9287"} + + def test_get_orcid(self): + record = orcid.get_author(orcid="0000-0002-1771-9287") + assert record["given_name"] == "Rachel" + assert record["family_name"] == "Altshuler"