Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
7 changes: 7 additions & 0 deletions CHANGES.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,12 @@
# Changelog

## [5.3.0] - 26 June 2025
- update format expected for jlist.csv dates

## [5.3a0] - 21 May 2025
- add API to get name from ORCID
- add ORCID Person

## [5.2] - 16 May 2025
- add search by author_ids

Expand Down
2 changes: 1 addition & 1 deletion pub/tools/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,4 +2,4 @@

__all__ = [journals]

__version__ = "5.2"
__version__ = "5.3.0"
5 changes: 4 additions & 1 deletion pub/tools/entrez.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .schema import Abstract
from .schema import BookRecord
from .schema import ChapterRecord
from .schema import EntrezRecord
from .schema import Grant
from .schema import JournalRecord
from .schema import Person
Expand Down Expand Up @@ -173,13 +174,15 @@ def _parse_author_name(author: dict, investigator: bool = False) -> Person:
# strip excess spaces like in
# https://eutils.ncbi.nlm.nih.gov/entrez/eutils/efetch.fcgi?db=pubmed&id=22606070&retmode=xml
fname = " ".join([part for part in fname.split(" ") if part])
identifiers = {source.attributes.get("Source", ""): str(source) for source in author.get("Identifier", [])}
return Person(
last_name=author.get("LastName", ""),
first_name=fname,
initial=author.get("Initials", ""),
collective_name=author.get("CollectiveName", ""),
suffix=author.get("Suffix", ""),
investigator=investigator,
identifiers=identifiers,
affiliations=author.get("affiliations", []),
)

Expand Down Expand Up @@ -583,7 +586,7 @@ def find_publications(
affl=None,
doi="",
inclusive=False,
):
) -> list[EntrezRecord]:
"""
You can use the resulting WebEnv and QueryKey values to call get_searched_publications
https://www.ncbi.nlm.nih.gov/books/NBK3827/#_pubmedhelp_Search_Field_Descriptions_and_
Expand Down
4 changes: 2 additions & 2 deletions pub/tools/journals.py
Original file line number Diff line number Diff line change
Expand Up @@ -79,8 +79,8 @@ def _parse_journals(text):
deposit,
url,
) = row
latest = latest.split(";")[-1]
earliest = earliest.split(";")[-1]
latest = latest.split(" ")[-1]
earliest = earliest.split(" ")[-1]
_atoj[abbr.lower()] = title
_jtoa[title.lower()] = abbr
dates[abbr.lower()] = (earliest, latest)
Expand Down
17 changes: 17 additions & 0 deletions pub/tools/orcid.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import requests

PUBLIC_API = "https://pub.orcid.org/v3.0/"


def get_author(orcid: str, full: bool = False) -> dict:
response = requests.get(f"{PUBLIC_API}{orcid}", headers={"Accept": "application/json"}, timeout=2.0)
if response.status_code != 200:
raise requests.exceptions.HTTPError(f"REST API returned: {response.status_code}")

data = response.json()
if full:
return data
given_name = data["person"].get("name", {}).get("given-names", {}).get("value")
family_name = data["person"].get("name", {}).get("family-name", {}).get("value")

return {"given_name": given_name, "family_name": family_name}
7 changes: 7 additions & 0 deletions pub/tools/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@ class Person:
suffix: str = ""
investigator: bool = False
affiliations: list[str] = dataclasses.field(default_factory=list)
identifiers: dict[str, str] = dataclasses.field(default_factory=dict)

def asdict(self):
base = dataclasses.asdict(self)
Expand All @@ -24,6 +25,7 @@ def asdict(self):
"fname": self.fname,
"cname": self.cname,
"iname": self.iname,
"orcid": self.orcid,
})
return base

Expand All @@ -47,6 +49,11 @@ def cname(self):
"""backwards compatibility"""
return self.collective_name

@property
def orcid(self):
"""Derivative from identifiers"""
return self.identifiers.get("ORCID", "")


@dataclasses.dataclass
class Abstract:
Expand Down
4 changes: 0 additions & 4 deletions requirements.txt

This file was deleted.

17 changes: 16 additions & 1 deletion tests/test_entrez.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from pub.tools import citations
from pub.tools import entrez
from pub.tools import orcid
from pub.tools.schema import Abstract
from pub.tools.schema import Grant
from pub.tools.schema import JournalRecord
Expand Down Expand Up @@ -447,6 +448,20 @@ def test_find_and_fetch(self):
record = entrez.get_searched_publications(record["WebEnv"], record["QueryKey"])
self.check_pub_data(record[0])

def test_orcid(self):
def test_orcid_search(self):
record = entrez.find_publications(author_ids=["0000-0002-8953-3940"])
assert int(record["Count"]) > 0

def test_pubmed_orcid_author(self):
record = entrez.get_publication(pmid="32570285")
assert record.authors[0].orcid == "0000-0002-1771-9287"
assert record.authors[0].asdict()["orcid"] == "0000-0002-1771-9287"

def test_full_identifiers(self):
record = entrez.get_publication(pmid="32570285")
assert record.authors[0].identifiers == {"ORCID": "0000-0002-1771-9287"}

def test_get_orcid(self):
record = orcid.get_author(orcid="0000-0002-1771-9287")
assert record["given_name"] == "Rachel"
assert record["family_name"] == "Altshuler"