diff --git a/pyproject.toml b/pyproject.toml
index 0e6d1f67..84139d7f 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -70,7 +70,7 @@ dependencies = [
"cachier",
"pystow>=0.7.5",
"bioversions>=0.8.101",
- "bioregistry>=0.12.30",
+ "bioregistry>=0.13.10",
"bioontologies>=0.7.2",
"ssslm>=0.0.13",
"zenodo-client>=0.3.6",
@@ -80,6 +80,7 @@ dependencies = [
"curies-processing>=0.1.2",
"python-dateutil",
"networkx>=3.4",
+ "jskos",
# Resource Downloaders
"drugbank_downloader",
"chembl_downloader",
diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py
index ddddb7c5..1cfed568 100644
--- a/src/pyobo/constants.py
+++ b/src/pyobo/constants.py
@@ -6,11 +6,14 @@
import re
from collections.abc import Callable
from pathlib import Path
-from typing import Literal, NamedTuple, TypeAlias
+from typing import TYPE_CHECKING, Literal, NamedTuple, TypeAlias
import pystow
from typing_extensions import NotRequired, TypedDict
+if TYPE_CHECKING:
+ from bioregistry.schema import AnnotatedURL
+
__all__ = [
"DATABASE_DIRECTORY",
"DEFAULT_PREFIX_MAP",
@@ -225,7 +228,7 @@ class IterHelperHelperDict(SlimGetOntologyKwargs):
#: The ontology format
-OntologyFormat: TypeAlias = Literal["obo", "owl", "json", "rdf"]
+OntologyFormat: TypeAlias = Literal["obo", "owl", "json", "rdf", "skos", "jskos"]
#: from table 2 of the Functional OWL syntax definition
#: at https://www.w3.org/TR/owl2-syntax/#IRIs
@@ -244,6 +247,8 @@ class OntologyPathPack(NamedTuple):
format: OntologyFormat
#: The path to the ontology file
path: Path
+ #: The RDF format
+ rdf_format: str | None
def _get_obo_download(prefix: str) -> str | None:
@@ -264,17 +269,31 @@ def _get_json_download(prefix: str) -> str | None:
return bioregistry.get_json_download(prefix)
-def _get_rdf_download(prefix: str) -> str | None:
+def _get_rdf_download(prefix: str) -> str | AnnotatedURL | None:
+ import bioregistry
+
+ return bioregistry.get_rdf_download(prefix, get_format=True)
+
+
+def _get_skos_download(prefix: str) -> str | AnnotatedURL | None:
+ import bioregistry
+
+ return bioregistry.get_skos_download(prefix, get_format=True)
+
+
+def _get_jskos_download(prefix: str) -> str | None:
import bioregistry
- return bioregistry.get_rdf_download(prefix)
+ return bioregistry.get_jskos_download(prefix)
#: Functions that get ontology files. Order matters in this list,
#: since order implicitly defines priority
-ONTOLOGY_GETTERS: list[tuple[OntologyFormat, Callable[[str], str | None]]] = [
+ONTOLOGY_GETTERS: list[tuple[OntologyFormat, Callable[[str], str | AnnotatedURL | None]]] = [
("obo", _get_obo_download),
("owl", _get_owl_download),
("json", _get_json_download),
("rdf", _get_rdf_download),
+ ("skos", _get_skos_download),
+ ("jskos", _get_jskos_download),
]
diff --git a/src/pyobo/getters.py b/src/pyobo/getters.py
index 22712098..7005e1e1 100644
--- a/src/pyobo/getters.py
+++ b/src/pyobo/getters.py
@@ -22,6 +22,7 @@
import click
import pystow.utils
import requests.exceptions
+from bioregistry.schema import AnnotatedURL, RDFFormat
from tabulate import tabulate
from tqdm.auto import tqdm
from typing_extensions import Unpack
@@ -32,6 +33,7 @@
ONTOLOGY_GETTERS,
GetOntologyKwargs,
IterHelperHelperDict,
+ OntologyFormat,
OntologyPathPack,
SlimGetOntologyKwargs,
)
@@ -177,11 +179,15 @@ def get_ontology(
path_pack = _ensure_ontology_path(prefix, force=force, version=version)
if path_pack is None:
raise NoBuildError(prefix)
- ontology_format, path = path_pack
+ ontology_format, path, rdf_format = path_pack
if ontology_format == "obo":
pass # all gucci
- elif ontology_format in {"owl", "rdf"}:
+ elif ontology_format == "owl":
path = _convert_to_obo(path)
+ elif ontology_format == "rdf":
+ from .struct.generic_rdf import read_generic_rdf
+
+ return read_generic_rdf(path=path, prefix=prefix, rdf_format=rdf_format)
elif ontology_format == "json":
from .struct.obograph import read_obograph
@@ -189,6 +195,20 @@ def get_ontology(
if cache:
obo.write_default(force=force_process)
return obo
+ elif ontology_format == "skos":
+ from .struct.skos import read_skos
+
+ obo = read_skos(prefix=prefix, path=path, rdf_format=rdf_format)
+ if cache:
+ obo.write_default(force=force)
+ return obo
+ elif ontology_format == "jskos":
+ from .struct.jskos_utils import read_jskos
+
+ obo = read_jskos(prefix=prefix, path=path)
+ if cache:
+ obo.write_default(force=force)
+ return obo
else:
raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}")
@@ -206,21 +226,52 @@ def get_ontology(
return obo
+ONTOLOGY_FORMAT_TO_SUFFIX: dict[OntologyFormat, str] = {
+ "skos": ".ttl",
+ "jskos": ".json",
+}
+
+XX_TO_SUFFIX: dict[str, str] = {"rdf/xml": ".xml", "xml": ".xml"}
+
+
+def _name_from_url(
+ url: str, ontology_format: OntologyFormat, *, rdf_format: str | None = None
+) -> str:
+ name = pystow.utils.name_from_url(url)
+ if "." not in name:
+ if rdf_format is None:
+ raise ValueError(f"need to curate a RDF format for {url}")
+ name = name + XX_TO_SUFFIX[rdf_format]
+ # TODO add unit test that checks all downloads with no extension have a (RDF) format
+ return name
+
+
def _ensure_ontology_path(
prefix: str, *, force: bool, version: str | None
) -> OntologyPathPack | None:
+ rdf_format: RDFFormat | None
for ontology_format, getter in ONTOLOGY_GETTERS:
- url = getter(prefix)
- if url is None:
- continue
+ match getter(prefix):
+ case None:
+ continue
+ case AnnotatedURL() as a:
+ url = a.url
+ rdf_format = a.rdf_format
+ case str() as url:
+ rdf_format = None
+ case _:
+ raise TypeError
+
+ name = _name_from_url(url, ontology_format, rdf_format=rdf_format)
+
try:
- path = ensure_path(prefix, url=url, force=force, version=version)
+ path = ensure_path(prefix, url=url, force=force, version=version, name=name)
except (urllib.error.HTTPError, pystow.utils.DownloadError):
continue
except pystow.utils.UnexpectedDirectoryError:
continue # TODO report more info about the URL and the name it tried to make
else:
- return OntologyPathPack(ontology_format, path)
+ return OntologyPathPack(ontology_format, path, rdf_format)
return None
diff --git a/src/pyobo/oer_demo.py b/src/pyobo/oer_demo.py
new file mode 100644
index 00000000..fcfc68da
--- /dev/null
+++ b/src/pyobo/oer_demo.py
@@ -0,0 +1,70 @@
+"""Get all OER-related prefixes."""
+
+import shutil
+
+import bioregistry
+import click
+import pystow
+from bioontologies.robot import ROBOTError
+from more_click import verbose_option
+from tqdm import tqdm
+from tqdm.contrib.logging import logging_redirect_tqdm
+
+import pyobo
+from pyobo.getters import NoBuildError
+
+VALIDATED = {"ccso", "iana.mediatype"}
+NEEDS_PYOBO = {
+ "loc.fdd", # see http://www.loc.gov/preservation/digital/formats/fddXML.zip
+ "oerschema", # see https://github.com/open-curriculum/oerschema/blob/master/src/config/schema.yml
+}
+
+
+# TODO add all vocabularies from https://vocabs.openeduhub.de/
+
+
+@click.command()
+@click.option("-r", "--refresh", is_flag=True)
+@verbose_option
+def main(refresh: bool = False) -> None:
+ """Get all OER-related prefixes."""
+ collection = bioregistry.get_collection("0000018")
+ if collection is None:
+ raise ValueError
+
+ prefixes = [p for p in collection.resources if p not in VALIDATED and p not in NEEDS_PYOBO]
+ if refresh:
+ for prefix in tqdm(prefixes):
+ directory = pystow.join("pyobo", "raw", prefix)
+ if directory.is_dir():
+ shutil.rmtree(directory)
+ return
+
+ for prefix in tqdm(prefixes, disable=True):
+ tqdm.write(
+ click.style(f"[{prefix}] {bioregistry.get_name(prefix, strict=True)}", fg="green")
+ )
+ with logging_redirect_tqdm():
+ try:
+ ontology = pyobo.get_ontology(prefix, cache=False, force_process=True, force=False)
+ except NotImplementedError as e:
+ tqdm.write(click.style(f"[{prefix}] failed because not implemented: {e}", fg="red"))
+ continue
+ except NoBuildError:
+ tqdm.write(click.style(f"[{prefix}] no build", fg="yellow"))
+ continue
+ except ROBOTError as e:
+ tqdm.write(click.style(f"[{prefix}]\n{e}", fg="yellow"))
+ continue
+ except Exception as e:
+ tqdm.write(click.style(f"[{prefix}] failed\n\t{e}\n\n", fg="red"))
+ raise
+ terms = list(ontology)
+ if not terms:
+ tqdm.write(click.style(f"[{prefix}] failed, got no terms\n", fg="red"))
+ else:
+ tqdm.write(f"[{prefix}] got {len(terms):,} terms\n")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/src/pyobo/struct/generic_rdf.py b/src/pyobo/struct/generic_rdf.py
new file mode 100644
index 00000000..c9f4cc7e
--- /dev/null
+++ b/src/pyobo/struct/generic_rdf.py
@@ -0,0 +1,181 @@
+"""Read from RDF."""
+
+import logging
+from pathlib import Path
+
+import curies
+import rdflib
+from bioregistry import NormalizedNamedReference
+from curies import ReferenceTuple
+from rdflib import OWL, RDF, RDFS, SKOS, Graph, Node, URIRef
+from tqdm import tqdm
+
+from pyobo.identifier_utils import get_converter
+from pyobo.struct import Obo, Term, TypeDef, build_ontology
+
+__all__ = [
+ "read_generic_rdf",
+]
+
+logger = logging.getLogger(__name__)
+
+
+def read_generic_rdf(
+ path: str | Path,
+ *,
+ prefix: str,
+ converter: curies.Converter | None = None,
+ rdf_format: str | None = None,
+) -> Obo:
+ """Read an RDF file."""
+ graph = rdflib.Graph()
+ graph.parse(path, format=rdf_format)
+ return _get_ontology(graph, prefix=prefix, converter=converter)
+
+
+TERM_OBJECT_TYPES: list[Node] = [RDFS.Class, SKOS.Concept, OWL.Class, OWL.NamedIndividual]
+TYPEDEF_OBJECT_TYPES: list[Node] = [RDF.Property]
+
+
+def _get_ontology(
+ graph: rdflib.Graph,
+ *,
+ prefix: str,
+ converter: curies.Converter | None = None,
+) -> Obo:
+ """Extract an ontology from a SKOS RDF graph."""
+ if converter is None:
+ converter = get_converter()
+ terms = [
+ term
+ for concept in tqdm(graph.subjects(RDF.type, TERM_OBJECT_TYPES))
+ if isinstance(concept, URIRef)
+ and (term := get_term(graph, concept, converter=converter)) is not None
+ ]
+ typedefs = [
+ typedef
+ for concept in tqdm(graph.subjects(RDF.type, TYPEDEF_OBJECT_TYPES))
+ if isinstance(concept, URIRef)
+ and (typedef := get_typedef(graph, concept, converter=converter)) is not None
+ ]
+ return build_ontology(
+ prefix=prefix,
+ terms=terms,
+ typedefs=typedefs,
+ idspaces={curie_prefix: str(uri_prefix) for curie_prefix, uri_prefix in graph.namespaces()},
+ )
+
+
+def _literal_objects(graph: Graph, subject: Node, predicate: Node) -> list[rdflib.Literal]:
+ return [
+ o
+ for o in graph.objects(subject, predicate)
+ if isinstance(o, rdflib.Literal) and o._language in DEFAULT_LANGUAGES
+ ]
+
+
+# until we have a better way of representing internationalization, this
+# just extracts a language-less or english language literal. otherwise,
+# it takes one at random
+DEFAULT_LANGUAGES = {"en", None}
+
+
+def get_term(graph: rdflib.Graph, node: URIRef, converter: curies.Converter) -> Term | None:
+ """Get a term."""
+ reference_tuple: ReferenceTuple | None = converter.parse_uri(str(node), strict=False)
+ if reference_tuple is None:
+ return None
+ labels = _literal_objects(graph, node, RDFS.label) or _literal_objects(
+ graph, node, SKOS.prefLabel
+ )
+ definitions = _literal_objects(graph, node, SKOS.definition) # MULTIPLE
+ # TODO decide if class or individual
+ term = Term(
+ reference=NormalizedNamedReference(
+ prefix=reference_tuple.prefix,
+ identifier=reference_tuple.identifier,
+ name=labels[0] if labels else None,
+ ),
+ definition=definitions[0] if definitions else None,
+ )
+ for alt in _literal_objects(graph, node, SKOS.altLabel):
+ term.append_synonym(alt)
+
+ for exact_match in graph.objects(node, SKOS.exactMatch):
+ if isinstance(exact_match, URIRef):
+ term.append_exact_match(converter.parse_uri(str(exact_match), strict=True))
+ for broad_match in graph.objects(node, SKOS.broadMatch):
+ if isinstance(broad_match, URIRef):
+ term.append_broad_match(converter.parse_uri(str(broad_match), strict=True))
+ for narrow_match in graph.objects(node, SKOS.narrowMatch):
+ if isinstance(narrow_match, URIRef):
+ term.append_narrow_match(converter.parse_uri(str(narrow_match), strict=True))
+ for related_match in graph.objects(node, SKOS.relatedMatch):
+ if isinstance(related_match, URIRef):
+ term.append_related_match(converter.parse_uri(str(related_match), strict=True))
+ return term
+
+
+def get_typedef(graph: rdflib.Graph, node: URIRef, converter: curies.Converter) -> TypeDef | None:
+ """Get a typedef."""
+ tqdm.write(str(node))
+ reference_tuple: ReferenceTuple | None = converter.parse_uri(str(node), strict=False)
+ if reference_tuple is None:
+ tqdm.write("failed to parse")
+ return None
+ labels = _literal_objects(graph, node, RDFS.label) or _literal_objects(
+ graph, node, SKOS.prefLabel
+ )
+ definitions = _literal_objects(graph, node, SKOS.definition) # MULTIPLE
+ if not definitions:
+ definition = None
+ elif len(definitions) == 1:
+ definition = definitions[0]
+ else:
+ logger.debug("[%s] multiple definitions found, only keeping first", reference_tuple.curie)
+ definition = definitions[0]
+
+ comments = _literal_objects(graph, node, RDFS.comment)
+ if not comments:
+ comment = None
+ elif len(comments) == 1:
+ comment = comments[0]
+ else:
+ logger.debug("[%s] multiple comments found, only keeping first", reference_tuple.curie)
+ comment = comments[0]
+
+ if not definition and comment:
+ logger.debug(
+ "[%s] had no definition but it did have a comment. upgrading", reference_tuple.curie
+ )
+ definition = comment
+ comment = None
+
+ typedef = TypeDef(
+ reference=NormalizedNamedReference(
+ prefix=reference_tuple.prefix,
+ identifier=reference_tuple.identifier,
+ name=labels[0] if labels else None,
+ ),
+ definition=definition,
+ comment=comment,
+ )
+ for parent_uri in graph.objects(node, RDFS.subClassOf):
+ if parent_reference := converter.parse_uri(str(parent_uri), strict=False):
+ typedef.append_parent(parent_reference)
+
+ return typedef
+
+
+def _demo():
+ import pystow
+
+ url = "https://nfdi4ing.pages.rwth-aachen.de/metadata4ing/metadata4ing/ontology.ttl"
+ url = "https://bioschemas.org/types/bioschemas_types.ttl"
+ graph = pystow.ensure_rdf("dalia", url=url)
+ ontology = _get_ontology(graph, prefix="bioschemas")
+ ontology.write_obo("/Users/cthoyt/Desktop/bioschemas.obo")
+
+
+if __name__ == "__main__":
+ _demo()
diff --git a/src/pyobo/struct/jskos_utils.py b/src/pyobo/struct/jskos_utils.py
new file mode 100644
index 00000000..fa7aebce
--- /dev/null
+++ b/src/pyobo/struct/jskos_utils.py
@@ -0,0 +1,54 @@
+"""Read JSKOS."""
+
+import itertools as itt
+from collections.abc import Iterable
+from pathlib import Path
+
+import curies
+import jskos
+from jskos import ProcessedConcept, ProcessedKOS
+
+from pyobo.struct import Obo, build_ontology
+
+__all__ = [
+ "from_pkos",
+ "read_jskos",
+]
+
+
+def read_jskos(path: str | Path, *, prefix: str, converter: curies.Converter | None = None) -> Obo:
+ """Read JSKOS into an ontology."""
+ if converter is None:
+ from ..identifier_utils import get_converter
+
+ converter = get_converter()
+ pkos = jskos.read(path).process(converter)
+ return from_pkos(prefix=prefix, pkos=pkos)
+
+
+def from_pkos(prefix: str, pkos: ProcessedKOS) -> Obo:
+ """Get from a processed knowledge organization system."""
+ return build_ontology(
+ prefix=prefix,
+ terms=get_terms(pkos),
+ )
+
+
+def get_terms(pkos: ProcessedKOS) -> Iterable[ProcessedConcept]:
+ return list(itt.chain.from_iterable(_iterate_concepts_inner(c) for c in pkos.concepts))
+
+
+def _iterate_concepts_inner(concept: ProcessedConcept):
+ yield concept
+ for narrower in concept.narrower:
+ yield from _iterate_concepts_inner(narrower)
+ for broader in concept.broader:
+ yield from _iterate_concepts_inner(broader)
+ for _mapping in concept.mappings:
+ raise NotImplementedError
+
+
+if __name__ == '__main__':
+ url = "https://oer-repo.uibk.ac.at/w3id.org/vocabs/oefos2012/schema.json"
+ o = read_jskos(url, prefix="oefos")
+ print(o)
diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py
index a64d4215..ecf33a4b 100644
--- a/src/pyobo/struct/reference.py
+++ b/src/pyobo/struct/reference.py
@@ -277,7 +277,7 @@ def _parse_reference_or_uri_literal(
case BlocklistError():
return None
case UnparsableIRIError():
- # this means that it's defininitely a URI,
+ # this means that it's definitely a URI,
# but it couldn't be parsed with Bioregistry
return OBOLiteral.uri(str_or_curie_or_uri)
case NotCURIEError() as exc:
diff --git a/src/pyobo/struct/skos.py b/src/pyobo/struct/skos.py
new file mode 100644
index 00000000..858da8ee
--- /dev/null
+++ b/src/pyobo/struct/skos.py
@@ -0,0 +1,137 @@
+"""Read SKOS from RDF."""
+
+from pathlib import Path
+
+import curies
+import rdflib
+from bioregistry import NormalizedNamableReference, NormalizedNamedReference
+from rdflib import DCTERMS, RDF, RDFS, SKOS, VANN, Graph, Node, URIRef
+from tqdm import tqdm
+
+from pyobo.identifier_utils import get_converter
+from pyobo.struct import Obo, Term, build_ontology
+
+__all__ = [
+ "get_skos_ontology",
+ "read_skos",
+]
+
+
+def read_skos(
+ path: str | Path,
+ *,
+ prefix: str | None = None,
+ converter: curies.Converter | None = None,
+ rdf_format: str | None = None,
+) -> Obo:
+ """Read a SKOS RDF file."""
+ graph = rdflib.Graph()
+ graph.parse(path, format=rdf_format or "ttl")
+ return get_skos_ontology(graph, prefix=prefix, converter=converter)
+
+
+def get_skos_ontology(
+ graph: rdflib.Graph,
+ *,
+ prefix: str | None = None,
+ converter: curies.Converter | None = None,
+) -> Obo:
+ """Extract an ontology from a SKOS RDF graph."""
+ if converter is None:
+ converter = get_converter()
+ schemes = list(graph.subjects(RDF.type, SKOS.ConceptScheme))
+ if len(schemes) != 1:
+ raise ValueError
+ scheme = schemes[0]
+
+ def _get_scheme_object_literal(p: Node) -> str | None:
+ for o in graph.objects(scheme, p):
+ return str(o)
+ return None
+
+ if prefix is None:
+ prefix = _get_scheme_object_literal(VANN.preferredNamespacePrefix)
+
+ if prefix is None:
+ raise ValueError(f"no prefix given nor found using {VANN.preferredNamespacePrefix}")
+
+ root_terms = [
+ NormalizedNamableReference.from_reference(
+ converter.parse_uri(str(subject), strict=True).to_pydantic()
+ )
+ for subject in graph.objects(scheme, SKOS.hasTopConcept)
+ ]
+ terms = [
+ get_term(graph, concept, converter=converter)
+ for concept in tqdm(graph.subjects(RDF.type, SKOS.Concept))
+ ]
+
+ # FIXME need to put in parents
+
+ return build_ontology(
+ prefix=prefix,
+ terms=terms,
+ root_terms=root_terms,
+ idspaces={curie_prefix: str(uri_prefix) for curie_prefix, uri_prefix in graph.namespaces()},
+ name=_get_scheme_object_literal(DCTERMS.title),
+ description=_get_scheme_object_literal(DCTERMS.description)
+ or _get_scheme_object_literal(RDFS.comment),
+ )
+
+
+def _literal_objects(graph: Graph, subject: Node, predicate: Node) -> list[rdflib.Literal]:
+ return [
+ o
+ for o in graph.objects(subject, predicate)
+ if isinstance(o, rdflib.Literal) and o._language in DEFAULT_LANGUAGES
+ ]
+
+
+# until we have a better way of representing internationalization, this
+# just extracts a language-less or english language literal. otherwise,
+# it takes one at random
+DEFAULT_LANGUAGES = {"en", None}
+
+
+def get_term(graph: rdflib.Graph, node: URIRef, converter: curies.Converter) -> Term:
+ """Get a term."""
+ reference_tuple = converter.parse_uri(str(node), strict=True)
+ labels = _literal_objects(graph, node, SKOS.prefLabel)
+ definitions = _literal_objects(graph, node, SKOS.definition)
+ term = Term(
+ reference=NormalizedNamedReference(
+ prefix=reference_tuple.prefix,
+ identifier=reference_tuple.identifier,
+ name=labels[0] if labels else None,
+ ),
+ definition=definitions[0] if definitions else None,
+ )
+ for alt in _literal_objects(graph, node, SKOS.altLabel):
+ term.append_synonym(alt)
+
+ for exact_match in graph.objects(node, SKOS.exactMatch):
+ if isinstance(exact_match, URIRef):
+ term.append_exact_match(converter.parse_uri(str(exact_match), strict=True))
+ for broad_match in graph.objects(node, SKOS.broadMatch):
+ if isinstance(broad_match, URIRef):
+ term.append_broad_match(converter.parse_uri(str(broad_match), strict=True))
+ for narrow_match in graph.objects(node, SKOS.narrowMatch):
+ if isinstance(narrow_match, URIRef):
+ term.append_narrow_match(converter.parse_uri(str(narrow_match), strict=True))
+ for related_match in graph.objects(node, SKOS.relatedMatch):
+ if isinstance(related_match, URIRef):
+ term.append_related_match(converter.parse_uri(str(related_match), strict=True))
+ return term
+
+
+def _demo():
+ import pystow
+
+ url = "https://raw.githubusercontent.com/dini-ag-kim/hcrt/refs/heads/master/hcrt.ttl"
+ graph = pystow.ensure_rdf("dalia", url=url)
+ ontology = get_skos_ontology(graph)
+ ontology.write_obo("/Users/cthoyt/Desktop/hcrt.obo")
+
+
+if __name__ == "__main__":
+ _demo()
diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py
index da88e41d..015a3636 100644
--- a/src/pyobo/struct/struct.py
+++ b/src/pyobo/struct/struct.py
@@ -382,6 +382,60 @@ def append_exact_match(
self.annotate_object(v.exact_match, reference, annotations=axioms)
return self
+ def append_broad_match(
+ self,
+ reference: ReferenceHint,
+ *,
+ mapping_justification: Reference | None = None,
+ confidence: float | None = None,
+ contributor: Reference | None = None,
+ ) -> Self:
+ """Append a broad match, also adding an xref."""
+ reference = _ensure_ref(reference)
+ axioms = self._prepare_mapping_annotations(
+ mapping_justification=mapping_justification,
+ confidence=confidence,
+ contributor=contributor,
+ )
+ self.annotate_object(v.broad_match, reference, annotations=axioms)
+ return self
+
+ def append_narrow_match(
+ self,
+ reference: ReferenceHint,
+ *,
+ mapping_justification: Reference | None = None,
+ confidence: float | None = None,
+ contributor: Reference | None = None,
+ ) -> Self:
+ """Append a narrow match, also adding an xref."""
+ reference = _ensure_ref(reference)
+ axioms = self._prepare_mapping_annotations(
+ mapping_justification=mapping_justification,
+ confidence=confidence,
+ contributor=contributor,
+ )
+ self.annotate_object(v.narrow_match, reference, annotations=axioms)
+ return self
+
+ def append_related_match(
+ self,
+ reference: ReferenceHint,
+ *,
+ mapping_justification: Reference | None = None,
+ confidence: float | None = None,
+ contributor: Reference | None = None,
+ ) -> Self:
+ """Append a related match, also adding an xref."""
+ reference = _ensure_ref(reference)
+ axioms = self._prepare_mapping_annotations(
+ mapping_justification=mapping_justification,
+ confidence=confidence,
+ contributor=contributor,
+ )
+ self.annotate_object(v.related_match, reference, annotations=axioms)
+ return self
+
def set_species(self, identifier: str, name: str | None = None) -> Self:
"""Append the from_species relation."""
if name is None:
@@ -1758,7 +1812,7 @@ def _get_typedef(
_warn_string = f"[{term.curie}] undefined typedef: {pp}"
if predicate.name:
_warn_string += f" ({predicate.name})"
- logger.warning(_warn_string)
+ logger.debug(_warn_string)
_warned.add(pp)
return None
diff --git a/src/pyobo/utils/misc.py b/src/pyobo/utils/misc.py
index 8f352c2c..3b879da1 100644
--- a/src/pyobo/utils/misc.py
+++ b/src/pyobo/utils/misc.py
@@ -5,10 +5,12 @@
import logging
from collections.abc import Callable, Iterable
from datetime import datetime
+from typing import TypeAlias
import bioversions.utils
+from bioregistry.schema import AnnotatedURL
-from pyobo.constants import ONTOLOGY_GETTERS, OntologyFormat
+from ..constants import ONTOLOGY_GETTERS, OntologyFormat
__all__ = [
"VERSION_GETTERS",
@@ -48,6 +50,8 @@
"https://w3id.org/lehrplan/ontology/", # like in https://w3id.org/lehrplan/ontology/1.0.0-4
"http://www.ebi.ac.uk/swo/version/", # http://www.ebi.ac.uk/swo/version/6.0
"https://w3id.org/emi/version/",
+ "https://nfdi4culture.de/ontology/", # https://nfdi4culture.de/ontology/3.0.0
+ "http://purls.helmholtz-metadaten.de/mwo/mwo.owl/", # http://purls.helmholtz-metadaten.de/mwo/mwo.owl/3.0.0
]
VERSION_PREFIX_SPLITS = [
"http://www.ebi.ac.uk/efo/releases/v",
@@ -56,6 +60,7 @@
"http://ontology.neuinfo.org/NIF/ttl/nif/version/",
"http://nmrml.org/cv/v", # as in http://nmrml.org/cv/v1.1.0/nmrCV
"http://enanomapper.github.io/ontologies/releases/", # as in http://enanomapper.github.io/ontologies/releases/10.0/enanomapper
+ "https://w3id.org/sulo/sulo-", # as in https://w3id.org/sulo/sulo-0.2.4.ttl
]
BAD = {
"http://purl.obolibrary.org/obo",
@@ -127,11 +132,25 @@ def _get_obograph_json_version(prefix: str, url: str) -> str | None:
return cleanup_version(rv, prefix)
+def _get_skos_version(prefix: str, url: str) -> str | None:
+ # TODO add implementation
+ return None
+
+
+def _get_jskos_version(prefix: str, url: str) -> str | None:
+ # TODO add implementation
+ return None
+
+
+VersionGetter: TypeAlias = Callable[[str, str], str | None]
+
#: A mapping from data type to gersion getter function
-VERSION_GETTERS: dict[OntologyFormat, Callable[[str, str], str | None]] = {
+VERSION_GETTERS: dict[OntologyFormat, VersionGetter] = {
"obo": _get_obo_version,
"owl": _get_owl_version,
"json": _get_obograph_json_version,
+ "skos": _get_skos_version,
+ "jskos": _get_jskos_version,
}
@@ -195,7 +214,7 @@ def _prioritize_version(
return None
-def _get_getter_urls(prefix: str) -> Iterable[tuple[OntologyFormat, str]]:
+def _get_getter_urls(prefix: str) -> Iterable[tuple[OntologyFormat, str | AnnotatedURL]]:
# assume that all possible files that can be downloaded
# are in sync and have the same version
for ontology_format, get_url_func in ONTOLOGY_GETTERS:
@@ -211,7 +230,11 @@ def _get_version_from_artifact(prefix: str) -> str | None:
get_version_func = VERSION_GETTERS.get(ontology_format)
if get_version_func is None:
continue
- version = get_version_func(prefix, url)
+ match url:
+ case str():
+ version = get_version_func(prefix, url)
+ case AnnotatedURL():
+ version = get_version_func(prefix, url.url)
if version:
return cleanup_version(version, prefix=prefix)
return None
diff --git a/tests/constants.py b/tests/constants.py
index 76bea8a1..fc7f990b 100644
--- a/tests/constants.py
+++ b/tests/constants.py
@@ -16,7 +16,7 @@
chebi_patch = mock.patch(
"pyobo.getters._ensure_ontology_path",
- side_effect=lambda *args, **kwargs: OntologyPathPack("obo", TEST_CHEBI_OBO_PATH),
+ side_effect=lambda *args, **kwargs: OntologyPathPack("obo", TEST_CHEBI_OBO_PATH, None),
)
chebi_version_patch = mock.patch(
"pyobo.getters._get_version_from_artifact",
diff --git a/tests/test_struct/test_jskos/__init__.py b/tests/test_struct/test_jskos/__init__.py
new file mode 100644
index 00000000..c89cc0ba
--- /dev/null
+++ b/tests/test_struct/test_jskos/__init__.py
@@ -0,0 +1 @@
+"""Tests for ingestion of JSKOS."""
diff --git a/tests/test_struct/test_jskos/test_jskos.py b/tests/test_struct/test_jskos/test_jskos.py
new file mode 100644
index 00000000..578119d8
--- /dev/null
+++ b/tests/test_struct/test_jskos/test_jskos.py
@@ -0,0 +1,25 @@
+"""Test JSKOS."""
+
+import unittest
+
+import curies
+
+from pyobo.struct.jskos_utils import read_jskos
+
+URL = "https://skohub.io/KDSF-FFK/kdsf-ffk/heads/main/w3id.org/kdsf-ffk/index.json"
+
+
+class TestJSKOS(unittest.TestCase):
+ """Test JSKOS."""
+
+ def test_jskos(self) -> None:
+ """Test JSKOS."""
+ converter = curies.Converter.from_prefix_map(
+ {
+ "ksdf.fkk": "https://w3id.org/kdsf-ffk/",
+ }
+ )
+ ontology = read_jskos(prefix="ksdf.fkk", path=URL, converter=converter)
+ names = ontology.get_id_name_mapping()
+ self.assertIn("ArbeitUndWirtschaft", names)
+ self.assertIn("Work and Economy", names["ArbeitUndWirtschaft"])
diff --git a/tests/test_struct/test_skos/__init__.py b/tests/test_struct/test_skos/__init__.py
new file mode 100644
index 00000000..55cb8f26
--- /dev/null
+++ b/tests/test_struct/test_skos/__init__.py
@@ -0,0 +1 @@
+"""Test the SKOS reader."""
diff --git a/tests/test_struct/test_skos/test.ttl b/tests/test_struct/test_skos/test.ttl
new file mode 100644
index 00000000..ef35503e
--- /dev/null
+++ b/tests/test_struct/test_skos/test.ttl
@@ -0,0 +1,28 @@
+@base .
+@prefix dct: .
+@prefix skos: .
+@prefix vann: .
+
+
+ a skos:ConceptScheme;
+ dct:title "Hochschulcampus Ressourcentypen"@de, "Higher Education Resource Types"@en, "Brontypen voor het hoger onderwijs"@nl, "Типи ресурсів вищої освіти"@uk, "Typy zdrojů vyššího vzdělání"@cs ;
+ dct:description "Eine Wertelliste für Typen von Lernressourcen (Learning Resource Type), entstanden im Kontext des Metadatenschemas \"LOM for Higher Education OER Repositories\" (https://w3id.org/dini-ag-kim/hs-oer-lom-profil/latest/)."@de ;
+ dct:publisher ;
+ dct:issued "2020-02-07" ;
+ vann:preferredNamespaceUri "https://w3id.org/kim/hcrt/" ;
+ vann:preferredNamespacePrefix "hcrt" ;
+ dct:license ;
+ skos:hasTopConcept , ,