Skip to content

Commit

Permalink
gnd: improve validation and normalization
Browse files Browse the repository at this point in the history
* accept both `http://d-nb.info/gnd/<id>` and `https://d-nb.info/gnd/<id>` to be pasted as GND URI and being normalized

* re-use gnd_resolver_url var in regex

* use the regex in the normalize function

* utils: remove var and improve regex

* validators: dont use var

* utils: adapt to correct url match regex

* validators: remove additional check

* utils: improve regex to match for IDs without http prefix in order to remove additional check in validators

* validators: adhere to pydocstyle

---------

Co-authored-by: Karl Krägelin <[email protected]>
  • Loading branch information
karkraeg and Karl Krägelin authored Jan 31, 2025
1 parent ffd5a6b commit d1bc7ef
Show file tree
Hide file tree
Showing 3 changed files with 4 additions and 12 deletions.
7 changes: 2 additions & 5 deletions idutils/normalizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,11 +54,8 @@ def normalize_orcid(val):

def normalize_gnd(val):
"""Normalize a GND identifier."""
if val.startswith(gnd_resolver_url):
val = val[len(gnd_resolver_url) :]
if val.lower().startswith("gnd:"):
val = val[len("gnd:") :]
return "gnd:{0}".format(val)
m = gnd_regexp.match(val)
return f"gnd:{m.group(2)}"


def normalize_urn(val):
Expand Down
5 changes: 2 additions & 3 deletions idutils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -82,16 +82,15 @@
"""

gnd_regexp = re.compile(
r"(gnd:|GND:)?("
r"(1|10)\d{7}[0-9X]|"
r"(gnd:|GND:|https?://d-nb\.info/gnd/|d-nb\.info/gnd/)?("
r"1[012]?\d{7}[0-9X]|"
r"[47]\d{6}-\d|"
r"[1-9]\d{0,7}-[0-9X]|"
r"3\d{7}[0-9X]"
r")"
)
"""See https://www.wikidata.org/wiki/Property:P227."""

gnd_resolver_url = "http://d-nb.info/gnd/"

urn_resolver_url = "https://nbn-resolving.org/"

Expand Down
4 changes: 0 additions & 4 deletions idutils/validators.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,6 @@

"""Utility file containing ID validators."""


import unicodedata
from urllib.parse import urlparse

Expand Down Expand Up @@ -237,9 +236,6 @@ def is_pmcid(val):

def is_gnd(val):
"""Test if argument is a GND Identifier."""
if val.startswith(gnd_resolver_url):
val = val[len(gnd_resolver_url) :]

return gnd_regexp.match(val)


Expand Down

0 comments on commit d1bc7ef

Please sign in to comment.