diff --git a/pyproject.toml b/pyproject.toml index 0e6d1f67..84139d7f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -70,7 +70,7 @@ dependencies = [ "cachier", "pystow>=0.7.5", "bioversions>=0.8.101", - "bioregistry>=0.12.30", + "bioregistry>=0.13.10", "bioontologies>=0.7.2", "ssslm>=0.0.13", "zenodo-client>=0.3.6", @@ -80,6 +80,7 @@ dependencies = [ "curies-processing>=0.1.2", "python-dateutil", "networkx>=3.4", + "jskos", # Resource Downloaders "drugbank_downloader", "chembl_downloader", diff --git a/src/pyobo/constants.py b/src/pyobo/constants.py index ddddb7c5..1cfed568 100644 --- a/src/pyobo/constants.py +++ b/src/pyobo/constants.py @@ -6,11 +6,14 @@ import re from collections.abc import Callable from pathlib import Path -from typing import Literal, NamedTuple, TypeAlias +from typing import TYPE_CHECKING, Literal, NamedTuple, TypeAlias import pystow from typing_extensions import NotRequired, TypedDict +if TYPE_CHECKING: + from bioregistry.schema import AnnotatedURL + __all__ = [ "DATABASE_DIRECTORY", "DEFAULT_PREFIX_MAP", @@ -225,7 +228,7 @@ class IterHelperHelperDict(SlimGetOntologyKwargs): #: The ontology format -OntologyFormat: TypeAlias = Literal["obo", "owl", "json", "rdf"] +OntologyFormat: TypeAlias = Literal["obo", "owl", "json", "rdf", "skos", "jskos"] #: from table 2 of the Functional OWL syntax definition #: at https://www.w3.org/TR/owl2-syntax/#IRIs @@ -244,6 +247,8 @@ class OntologyPathPack(NamedTuple): format: OntologyFormat #: The path to the ontology file path: Path + #: The RDF format + rdf_format: str | None def _get_obo_download(prefix: str) -> str | None: @@ -264,17 +269,31 @@ def _get_json_download(prefix: str) -> str | None: return bioregistry.get_json_download(prefix) -def _get_rdf_download(prefix: str) -> str | None: +def _get_rdf_download(prefix: str) -> str | AnnotatedURL | None: + import bioregistry + + return bioregistry.get_rdf_download(prefix, get_format=True) + + +def _get_skos_download(prefix: str) -> str | AnnotatedURL | None: + import bioregistry + + return bioregistry.get_skos_download(prefix, get_format=True) + + +def _get_jskos_download(prefix: str) -> str | None: import bioregistry - return bioregistry.get_rdf_download(prefix) + return bioregistry.get_jskos_download(prefix) #: Functions that get ontology files. Order matters in this list, #: since order implicitly defines priority -ONTOLOGY_GETTERS: list[tuple[OntologyFormat, Callable[[str], str | None]]] = [ +ONTOLOGY_GETTERS: list[tuple[OntologyFormat, Callable[[str], str | AnnotatedURL | None]]] = [ ("obo", _get_obo_download), ("owl", _get_owl_download), ("json", _get_json_download), ("rdf", _get_rdf_download), + ("skos", _get_skos_download), + ("jskos", _get_jskos_download), ] diff --git a/src/pyobo/getters.py b/src/pyobo/getters.py index 22712098..7005e1e1 100644 --- a/src/pyobo/getters.py +++ b/src/pyobo/getters.py @@ -22,6 +22,7 @@ import click import pystow.utils import requests.exceptions +from bioregistry.schema import AnnotatedURL, RDFFormat from tabulate import tabulate from tqdm.auto import tqdm from typing_extensions import Unpack @@ -32,6 +33,7 @@ ONTOLOGY_GETTERS, GetOntologyKwargs, IterHelperHelperDict, + OntologyFormat, OntologyPathPack, SlimGetOntologyKwargs, ) @@ -177,11 +179,15 @@ def get_ontology( path_pack = _ensure_ontology_path(prefix, force=force, version=version) if path_pack is None: raise NoBuildError(prefix) - ontology_format, path = path_pack + ontology_format, path, rdf_format = path_pack if ontology_format == "obo": pass # all gucci - elif ontology_format in {"owl", "rdf"}: + elif ontology_format == "owl": path = _convert_to_obo(path) + elif ontology_format == "rdf": + from .struct.generic_rdf import read_generic_rdf + + return read_generic_rdf(path=path, prefix=prefix, rdf_format=rdf_format) elif ontology_format == "json": from .struct.obograph import read_obograph @@ -189,6 +195,20 @@ def get_ontology( if cache: obo.write_default(force=force_process) return obo + elif ontology_format == "skos": + from .struct.skos import read_skos + + obo = read_skos(prefix=prefix, path=path, rdf_format=rdf_format) + if cache: + obo.write_default(force=force) + return obo + elif ontology_format == "jskos": + from .struct.jskos_utils import read_jskos + + obo = read_jskos(prefix=prefix, path=path) + if cache: + obo.write_default(force=force) + return obo else: raise UnhandledFormatError(f"[{prefix}] unhandled ontology file format: {path.suffix}") @@ -206,21 +226,52 @@ def get_ontology( return obo +ONTOLOGY_FORMAT_TO_SUFFIX: dict[OntologyFormat, str] = { + "skos": ".ttl", + "jskos": ".json", +} + +XX_TO_SUFFIX: dict[str, str] = {"rdf/xml": ".xml", "xml": ".xml"} + + +def _name_from_url( + url: str, ontology_format: OntologyFormat, *, rdf_format: str | None = None +) -> str: + name = pystow.utils.name_from_url(url) + if "." not in name: + if rdf_format is None: + raise ValueError(f"need to curate a RDF format for {url}") + name = name + XX_TO_SUFFIX[rdf_format] + # TODO add unit test that checks all downloads with no extension have a (RDF) format + return name + + def _ensure_ontology_path( prefix: str, *, force: bool, version: str | None ) -> OntologyPathPack | None: + rdf_format: RDFFormat | None for ontology_format, getter in ONTOLOGY_GETTERS: - url = getter(prefix) - if url is None: - continue + match getter(prefix): + case None: + continue + case AnnotatedURL() as a: + url = a.url + rdf_format = a.rdf_format + case str() as url: + rdf_format = None + case _: + raise TypeError + + name = _name_from_url(url, ontology_format, rdf_format=rdf_format) + try: - path = ensure_path(prefix, url=url, force=force, version=version) + path = ensure_path(prefix, url=url, force=force, version=version, name=name) except (urllib.error.HTTPError, pystow.utils.DownloadError): continue except pystow.utils.UnexpectedDirectoryError: continue # TODO report more info about the URL and the name it tried to make else: - return OntologyPathPack(ontology_format, path) + return OntologyPathPack(ontology_format, path, rdf_format) return None diff --git a/src/pyobo/oer_demo.py b/src/pyobo/oer_demo.py new file mode 100644 index 00000000..fcfc68da --- /dev/null +++ b/src/pyobo/oer_demo.py @@ -0,0 +1,70 @@ +"""Get all OER-related prefixes.""" + +import shutil + +import bioregistry +import click +import pystow +from bioontologies.robot import ROBOTError +from more_click import verbose_option +from tqdm import tqdm +from tqdm.contrib.logging import logging_redirect_tqdm + +import pyobo +from pyobo.getters import NoBuildError + +VALIDATED = {"ccso", "iana.mediatype"} +NEEDS_PYOBO = { + "loc.fdd", # see http://www.loc.gov/preservation/digital/formats/fddXML.zip + "oerschema", # see https://github.com/open-curriculum/oerschema/blob/master/src/config/schema.yml +} + + +# TODO add all vocabularies from https://vocabs.openeduhub.de/ + + +@click.command() +@click.option("-r", "--refresh", is_flag=True) +@verbose_option +def main(refresh: bool = False) -> None: + """Get all OER-related prefixes.""" + collection = bioregistry.get_collection("0000018") + if collection is None: + raise ValueError + + prefixes = [p for p in collection.resources if p not in VALIDATED and p not in NEEDS_PYOBO] + if refresh: + for prefix in tqdm(prefixes): + directory = pystow.join("pyobo", "raw", prefix) + if directory.is_dir(): + shutil.rmtree(directory) + return + + for prefix in tqdm(prefixes, disable=True): + tqdm.write( + click.style(f"[{prefix}] {bioregistry.get_name(prefix, strict=True)}", fg="green") + ) + with logging_redirect_tqdm(): + try: + ontology = pyobo.get_ontology(prefix, cache=False, force_process=True, force=False) + except NotImplementedError as e: + tqdm.write(click.style(f"[{prefix}] failed because not implemented: {e}", fg="red")) + continue + except NoBuildError: + tqdm.write(click.style(f"[{prefix}] no build", fg="yellow")) + continue + except ROBOTError as e: + tqdm.write(click.style(f"[{prefix}]\n{e}", fg="yellow")) + continue + except Exception as e: + tqdm.write(click.style(f"[{prefix}] failed\n\t{e}\n\n", fg="red")) + raise + terms = list(ontology) + if not terms: + tqdm.write(click.style(f"[{prefix}] failed, got no terms\n", fg="red")) + else: + tqdm.write(f"[{prefix}] got {len(terms):,} terms\n") + + +if __name__ == "__main__": + main() diff --git a/src/pyobo/struct/generic_rdf.py b/src/pyobo/struct/generic_rdf.py new file mode 100644 index 00000000..c9f4cc7e --- /dev/null +++ b/src/pyobo/struct/generic_rdf.py @@ -0,0 +1,181 @@ +"""Read from RDF.""" + +import logging +from pathlib import Path + +import curies +import rdflib +from bioregistry import NormalizedNamedReference +from curies import ReferenceTuple +from rdflib import OWL, RDF, RDFS, SKOS, Graph, Node, URIRef +from tqdm import tqdm + +from pyobo.identifier_utils import get_converter +from pyobo.struct import Obo, Term, TypeDef, build_ontology + +__all__ = [ + "read_generic_rdf", +] + +logger = logging.getLogger(__name__) + + +def read_generic_rdf( + path: str | Path, + *, + prefix: str, + converter: curies.Converter | None = None, + rdf_format: str | None = None, +) -> Obo: + """Read an RDF file.""" + graph = rdflib.Graph() + graph.parse(path, format=rdf_format) + return _get_ontology(graph, prefix=prefix, converter=converter) + + +TERM_OBJECT_TYPES: list[Node] = [RDFS.Class, SKOS.Concept, OWL.Class, OWL.NamedIndividual] +TYPEDEF_OBJECT_TYPES: list[Node] = [RDF.Property] + + +def _get_ontology( + graph: rdflib.Graph, + *, + prefix: str, + converter: curies.Converter | None = None, +) -> Obo: + """Extract an ontology from a SKOS RDF graph.""" + if converter is None: + converter = get_converter() + terms = [ + term + for concept in tqdm(graph.subjects(RDF.type, TERM_OBJECT_TYPES)) + if isinstance(concept, URIRef) + and (term := get_term(graph, concept, converter=converter)) is not None + ] + typedefs = [ + typedef + for concept in tqdm(graph.subjects(RDF.type, TYPEDEF_OBJECT_TYPES)) + if isinstance(concept, URIRef) + and (typedef := get_typedef(graph, concept, converter=converter)) is not None + ] + return build_ontology( + prefix=prefix, + terms=terms, + typedefs=typedefs, + idspaces={curie_prefix: str(uri_prefix) for curie_prefix, uri_prefix in graph.namespaces()}, + ) + + +def _literal_objects(graph: Graph, subject: Node, predicate: Node) -> list[rdflib.Literal]: + return [ + o + for o in graph.objects(subject, predicate) + if isinstance(o, rdflib.Literal) and o._language in DEFAULT_LANGUAGES + ] + + +# until we have a better way of representing internationalization, this +# just extracts a language-less or english language literal. otherwise, +# it takes one at random +DEFAULT_LANGUAGES = {"en", None} + + +def get_term(graph: rdflib.Graph, node: URIRef, converter: curies.Converter) -> Term | None: + """Get a term.""" + reference_tuple: ReferenceTuple | None = converter.parse_uri(str(node), strict=False) + if reference_tuple is None: + return None + labels = _literal_objects(graph, node, RDFS.label) or _literal_objects( + graph, node, SKOS.prefLabel + ) + definitions = _literal_objects(graph, node, SKOS.definition) # MULTIPLE + # TODO decide if class or individual + term = Term( + reference=NormalizedNamedReference( + prefix=reference_tuple.prefix, + identifier=reference_tuple.identifier, + name=labels[0] if labels else None, + ), + definition=definitions[0] if definitions else None, + ) + for alt in _literal_objects(graph, node, SKOS.altLabel): + term.append_synonym(alt) + + for exact_match in graph.objects(node, SKOS.exactMatch): + if isinstance(exact_match, URIRef): + term.append_exact_match(converter.parse_uri(str(exact_match), strict=True)) + for broad_match in graph.objects(node, SKOS.broadMatch): + if isinstance(broad_match, URIRef): + term.append_broad_match(converter.parse_uri(str(broad_match), strict=True)) + for narrow_match in graph.objects(node, SKOS.narrowMatch): + if isinstance(narrow_match, URIRef): + term.append_narrow_match(converter.parse_uri(str(narrow_match), strict=True)) + for related_match in graph.objects(node, SKOS.relatedMatch): + if isinstance(related_match, URIRef): + term.append_related_match(converter.parse_uri(str(related_match), strict=True)) + return term + + +def get_typedef(graph: rdflib.Graph, node: URIRef, converter: curies.Converter) -> TypeDef | None: + """Get a typedef.""" + tqdm.write(str(node)) + reference_tuple: ReferenceTuple | None = converter.parse_uri(str(node), strict=False) + if reference_tuple is None: + tqdm.write("failed to parse") + return None + labels = _literal_objects(graph, node, RDFS.label) or _literal_objects( + graph, node, SKOS.prefLabel + ) + definitions = _literal_objects(graph, node, SKOS.definition) # MULTIPLE + if not definitions: + definition = None + elif len(definitions) == 1: + definition = definitions[0] + else: + logger.debug("[%s] multiple definitions found, only keeping first", reference_tuple.curie) + definition = definitions[0] + + comments = _literal_objects(graph, node, RDFS.comment) + if not comments: + comment = None + elif len(comments) == 1: + comment = comments[0] + else: + logger.debug("[%s] multiple comments found, only keeping first", reference_tuple.curie) + comment = comments[0] + + if not definition and comment: + logger.debug( + "[%s] had no definition but it did have a comment. upgrading", reference_tuple.curie + ) + definition = comment + comment = None + + typedef = TypeDef( + reference=NormalizedNamedReference( + prefix=reference_tuple.prefix, + identifier=reference_tuple.identifier, + name=labels[0] if labels else None, + ), + definition=definition, + comment=comment, + ) + for parent_uri in graph.objects(node, RDFS.subClassOf): + if parent_reference := converter.parse_uri(str(parent_uri), strict=False): + typedef.append_parent(parent_reference) + + return typedef + + +def _demo(): + import pystow + + url = "https://nfdi4ing.pages.rwth-aachen.de/metadata4ing/metadata4ing/ontology.ttl" + url = "https://bioschemas.org/types/bioschemas_types.ttl" + graph = pystow.ensure_rdf("dalia", url=url) + ontology = _get_ontology(graph, prefix="bioschemas") + ontology.write_obo("/Users/cthoyt/Desktop/bioschemas.obo") + + +if __name__ == "__main__": + _demo() diff --git a/src/pyobo/struct/jskos_utils.py b/src/pyobo/struct/jskos_utils.py new file mode 100644 index 00000000..fa7aebce --- /dev/null +++ b/src/pyobo/struct/jskos_utils.py @@ -0,0 +1,54 @@ +"""Read JSKOS.""" + +import itertools as itt +from collections.abc import Iterable +from pathlib import Path + +import curies +import jskos +from jskos import ProcessedConcept, ProcessedKOS + +from pyobo.struct import Obo, build_ontology + +__all__ = [ + "from_pkos", + "read_jskos", +] + + +def read_jskos(path: str | Path, *, prefix: str, converter: curies.Converter | None = None) -> Obo: + """Read JSKOS into an ontology.""" + if converter is None: + from ..identifier_utils import get_converter + + converter = get_converter() + pkos = jskos.read(path).process(converter) + return from_pkos(prefix=prefix, pkos=pkos) + + +def from_pkos(prefix: str, pkos: ProcessedKOS) -> Obo: + """Get from a processed knowledge organization system.""" + return build_ontology( + prefix=prefix, + terms=get_terms(pkos), + ) + + +def get_terms(pkos: ProcessedKOS) -> Iterable[ProcessedConcept]: + return list(itt.chain.from_iterable(_iterate_concepts_inner(c) for c in pkos.concepts)) + + +def _iterate_concepts_inner(concept: ProcessedConcept): + yield concept + for narrower in concept.narrower: + yield from _iterate_concepts_inner(narrower) + for broader in concept.broader: + yield from _iterate_concepts_inner(broader) + for _mapping in concept.mappings: + raise NotImplementedError + + +if __name__ == '__main__': + url = "https://oer-repo.uibk.ac.at/w3id.org/vocabs/oefos2012/schema.json" + o = read_jskos(url, prefix="oefos") + print(o) diff --git a/src/pyobo/struct/reference.py b/src/pyobo/struct/reference.py index a64d4215..ecf33a4b 100644 --- a/src/pyobo/struct/reference.py +++ b/src/pyobo/struct/reference.py @@ -277,7 +277,7 @@ def _parse_reference_or_uri_literal( case BlocklistError(): return None case UnparsableIRIError(): - # this means that it's defininitely a URI, + # this means that it's definitely a URI, # but it couldn't be parsed with Bioregistry return OBOLiteral.uri(str_or_curie_or_uri) case NotCURIEError() as exc: diff --git a/src/pyobo/struct/skos.py b/src/pyobo/struct/skos.py new file mode 100644 index 00000000..858da8ee --- /dev/null +++ b/src/pyobo/struct/skos.py @@ -0,0 +1,137 @@ +"""Read SKOS from RDF.""" + +from pathlib import Path + +import curies +import rdflib +from bioregistry import NormalizedNamableReference, NormalizedNamedReference +from rdflib import DCTERMS, RDF, RDFS, SKOS, VANN, Graph, Node, URIRef +from tqdm import tqdm + +from pyobo.identifier_utils import get_converter +from pyobo.struct import Obo, Term, build_ontology + +__all__ = [ + "get_skos_ontology", + "read_skos", +] + + +def read_skos( + path: str | Path, + *, + prefix: str | None = None, + converter: curies.Converter | None = None, + rdf_format: str | None = None, +) -> Obo: + """Read a SKOS RDF file.""" + graph = rdflib.Graph() + graph.parse(path, format=rdf_format or "ttl") + return get_skos_ontology(graph, prefix=prefix, converter=converter) + + +def get_skos_ontology( + graph: rdflib.Graph, + *, + prefix: str | None = None, + converter: curies.Converter | None = None, +) -> Obo: + """Extract an ontology from a SKOS RDF graph.""" + if converter is None: + converter = get_converter() + schemes = list(graph.subjects(RDF.type, SKOS.ConceptScheme)) + if len(schemes) != 1: + raise ValueError + scheme = schemes[0] + + def _get_scheme_object_literal(p: Node) -> str | None: + for o in graph.objects(scheme, p): + return str(o) + return None + + if prefix is None: + prefix = _get_scheme_object_literal(VANN.preferredNamespacePrefix) + + if prefix is None: + raise ValueError(f"no prefix given nor found using {VANN.preferredNamespacePrefix}") + + root_terms = [ + NormalizedNamableReference.from_reference( + converter.parse_uri(str(subject), strict=True).to_pydantic() + ) + for subject in graph.objects(scheme, SKOS.hasTopConcept) + ] + terms = [ + get_term(graph, concept, converter=converter) + for concept in tqdm(graph.subjects(RDF.type, SKOS.Concept)) + ] + + # FIXME need to put in parents + + return build_ontology( + prefix=prefix, + terms=terms, + root_terms=root_terms, + idspaces={curie_prefix: str(uri_prefix) for curie_prefix, uri_prefix in graph.namespaces()}, + name=_get_scheme_object_literal(DCTERMS.title), + description=_get_scheme_object_literal(DCTERMS.description) + or _get_scheme_object_literal(RDFS.comment), + ) + + +def _literal_objects(graph: Graph, subject: Node, predicate: Node) -> list[rdflib.Literal]: + return [ + o + for o in graph.objects(subject, predicate) + if isinstance(o, rdflib.Literal) and o._language in DEFAULT_LANGUAGES + ] + + +# until we have a better way of representing internationalization, this +# just extracts a language-less or english language literal. otherwise, +# it takes one at random +DEFAULT_LANGUAGES = {"en", None} + + +def get_term(graph: rdflib.Graph, node: URIRef, converter: curies.Converter) -> Term: + """Get a term.""" + reference_tuple = converter.parse_uri(str(node), strict=True) + labels = _literal_objects(graph, node, SKOS.prefLabel) + definitions = _literal_objects(graph, node, SKOS.definition) + term = Term( + reference=NormalizedNamedReference( + prefix=reference_tuple.prefix, + identifier=reference_tuple.identifier, + name=labels[0] if labels else None, + ), + definition=definitions[0] if definitions else None, + ) + for alt in _literal_objects(graph, node, SKOS.altLabel): + term.append_synonym(alt) + + for exact_match in graph.objects(node, SKOS.exactMatch): + if isinstance(exact_match, URIRef): + term.append_exact_match(converter.parse_uri(str(exact_match), strict=True)) + for broad_match in graph.objects(node, SKOS.broadMatch): + if isinstance(broad_match, URIRef): + term.append_broad_match(converter.parse_uri(str(broad_match), strict=True)) + for narrow_match in graph.objects(node, SKOS.narrowMatch): + if isinstance(narrow_match, URIRef): + term.append_narrow_match(converter.parse_uri(str(narrow_match), strict=True)) + for related_match in graph.objects(node, SKOS.relatedMatch): + if isinstance(related_match, URIRef): + term.append_related_match(converter.parse_uri(str(related_match), strict=True)) + return term + + +def _demo(): + import pystow + + url = "https://raw.githubusercontent.com/dini-ag-kim/hcrt/refs/heads/master/hcrt.ttl" + graph = pystow.ensure_rdf("dalia", url=url) + ontology = get_skos_ontology(graph) + ontology.write_obo("/Users/cthoyt/Desktop/hcrt.obo") + + +if __name__ == "__main__": + _demo() diff --git a/src/pyobo/struct/struct.py b/src/pyobo/struct/struct.py index da88e41d..015a3636 100644 --- a/src/pyobo/struct/struct.py +++ b/src/pyobo/struct/struct.py @@ -382,6 +382,60 @@ def append_exact_match( self.annotate_object(v.exact_match, reference, annotations=axioms) return self + def append_broad_match( + self, + reference: ReferenceHint, + *, + mapping_justification: Reference | None = None, + confidence: float | None = None, + contributor: Reference | None = None, + ) -> Self: + """Append a broad match, also adding an xref.""" + reference = _ensure_ref(reference) + axioms = self._prepare_mapping_annotations( + mapping_justification=mapping_justification, + confidence=confidence, + contributor=contributor, + ) + self.annotate_object(v.broad_match, reference, annotations=axioms) + return self + + def append_narrow_match( + self, + reference: ReferenceHint, + *, + mapping_justification: Reference | None = None, + confidence: float | None = None, + contributor: Reference | None = None, + ) -> Self: + """Append a narrow match, also adding an xref.""" + reference = _ensure_ref(reference) + axioms = self._prepare_mapping_annotations( + mapping_justification=mapping_justification, + confidence=confidence, + contributor=contributor, + ) + self.annotate_object(v.narrow_match, reference, annotations=axioms) + return self + + def append_related_match( + self, + reference: ReferenceHint, + *, + mapping_justification: Reference | None = None, + confidence: float | None = None, + contributor: Reference | None = None, + ) -> Self: + """Append a related match, also adding an xref.""" + reference = _ensure_ref(reference) + axioms = self._prepare_mapping_annotations( + mapping_justification=mapping_justification, + confidence=confidence, + contributor=contributor, + ) + self.annotate_object(v.related_match, reference, annotations=axioms) + return self + def set_species(self, identifier: str, name: str | None = None) -> Self: """Append the from_species relation.""" if name is None: @@ -1758,7 +1812,7 @@ def _get_typedef( _warn_string = f"[{term.curie}] undefined typedef: {pp}" if predicate.name: _warn_string += f" ({predicate.name})" - logger.warning(_warn_string) + logger.debug(_warn_string) _warned.add(pp) return None diff --git a/src/pyobo/utils/misc.py b/src/pyobo/utils/misc.py index 8f352c2c..3b879da1 100644 --- a/src/pyobo/utils/misc.py +++ b/src/pyobo/utils/misc.py @@ -5,10 +5,12 @@ import logging from collections.abc import Callable, Iterable from datetime import datetime +from typing import TypeAlias import bioversions.utils +from bioregistry.schema import AnnotatedURL -from pyobo.constants import ONTOLOGY_GETTERS, OntologyFormat +from ..constants import ONTOLOGY_GETTERS, OntologyFormat __all__ = [ "VERSION_GETTERS", @@ -48,6 +50,8 @@ "https://w3id.org/lehrplan/ontology/", # like in https://w3id.org/lehrplan/ontology/1.0.0-4 "http://www.ebi.ac.uk/swo/version/", # http://www.ebi.ac.uk/swo/version/6.0 "https://w3id.org/emi/version/", + "https://nfdi4culture.de/ontology/", # https://nfdi4culture.de/ontology/3.0.0 + "http://purls.helmholtz-metadaten.de/mwo/mwo.owl/", # http://purls.helmholtz-metadaten.de/mwo/mwo.owl/3.0.0 ] VERSION_PREFIX_SPLITS = [ "http://www.ebi.ac.uk/efo/releases/v", @@ -56,6 +60,7 @@ "http://ontology.neuinfo.org/NIF/ttl/nif/version/", "http://nmrml.org/cv/v", # as in http://nmrml.org/cv/v1.1.0/nmrCV "http://enanomapper.github.io/ontologies/releases/", # as in http://enanomapper.github.io/ontologies/releases/10.0/enanomapper + "https://w3id.org/sulo/sulo-", # as in https://w3id.org/sulo/sulo-0.2.4.ttl ] BAD = { "http://purl.obolibrary.org/obo", @@ -127,11 +132,25 @@ def _get_obograph_json_version(prefix: str, url: str) -> str | None: return cleanup_version(rv, prefix) +def _get_skos_version(prefix: str, url: str) -> str | None: + # TODO add implementation + return None + + +def _get_jskos_version(prefix: str, url: str) -> str | None: + # TODO add implementation + return None + + +VersionGetter: TypeAlias = Callable[[str, str], str | None] + #: A mapping from data type to gersion getter function -VERSION_GETTERS: dict[OntologyFormat, Callable[[str, str], str | None]] = { +VERSION_GETTERS: dict[OntologyFormat, VersionGetter] = { "obo": _get_obo_version, "owl": _get_owl_version, "json": _get_obograph_json_version, + "skos": _get_skos_version, + "jskos": _get_jskos_version, } @@ -195,7 +214,7 @@ def _prioritize_version( return None -def _get_getter_urls(prefix: str) -> Iterable[tuple[OntologyFormat, str]]: +def _get_getter_urls(prefix: str) -> Iterable[tuple[OntologyFormat, str | AnnotatedURL]]: # assume that all possible files that can be downloaded # are in sync and have the same version for ontology_format, get_url_func in ONTOLOGY_GETTERS: @@ -211,7 +230,11 @@ def _get_version_from_artifact(prefix: str) -> str | None: get_version_func = VERSION_GETTERS.get(ontology_format) if get_version_func is None: continue - version = get_version_func(prefix, url) + match url: + case str(): + version = get_version_func(prefix, url) + case AnnotatedURL(): + version = get_version_func(prefix, url.url) if version: return cleanup_version(version, prefix=prefix) return None diff --git a/tests/constants.py b/tests/constants.py index 76bea8a1..fc7f990b 100644 --- a/tests/constants.py +++ b/tests/constants.py @@ -16,7 +16,7 @@ chebi_patch = mock.patch( "pyobo.getters._ensure_ontology_path", - side_effect=lambda *args, **kwargs: OntologyPathPack("obo", TEST_CHEBI_OBO_PATH), + side_effect=lambda *args, **kwargs: OntologyPathPack("obo", TEST_CHEBI_OBO_PATH, None), ) chebi_version_patch = mock.patch( "pyobo.getters._get_version_from_artifact", diff --git a/tests/test_struct/test_jskos/__init__.py b/tests/test_struct/test_jskos/__init__.py new file mode 100644 index 00000000..c89cc0ba --- /dev/null +++ b/tests/test_struct/test_jskos/__init__.py @@ -0,0 +1 @@ +"""Tests for ingestion of JSKOS.""" diff --git a/tests/test_struct/test_jskos/test_jskos.py b/tests/test_struct/test_jskos/test_jskos.py new file mode 100644 index 00000000..578119d8 --- /dev/null +++ b/tests/test_struct/test_jskos/test_jskos.py @@ -0,0 +1,25 @@ +"""Test JSKOS.""" + +import unittest + +import curies + +from pyobo.struct.jskos_utils import read_jskos + +URL = "https://skohub.io/KDSF-FFK/kdsf-ffk/heads/main/w3id.org/kdsf-ffk/index.json" + + +class TestJSKOS(unittest.TestCase): + """Test JSKOS.""" + + def test_jskos(self) -> None: + """Test JSKOS.""" + converter = curies.Converter.from_prefix_map( + { + "ksdf.fkk": "https://w3id.org/kdsf-ffk/", + } + ) + ontology = read_jskos(prefix="ksdf.fkk", path=URL, converter=converter) + names = ontology.get_id_name_mapping() + self.assertIn("ArbeitUndWirtschaft", names) + self.assertIn("Work and Economy", names["ArbeitUndWirtschaft"]) diff --git a/tests/test_struct/test_skos/__init__.py b/tests/test_struct/test_skos/__init__.py new file mode 100644 index 00000000..55cb8f26 --- /dev/null +++ b/tests/test_struct/test_skos/__init__.py @@ -0,0 +1 @@ +"""Test the SKOS reader.""" diff --git a/tests/test_struct/test_skos/test.ttl b/tests/test_struct/test_skos/test.ttl new file mode 100644 index 00000000..ef35503e --- /dev/null +++ b/tests/test_struct/test_skos/test.ttl @@ -0,0 +1,28 @@ +@base . +@prefix dct: . +@prefix skos: . +@prefix vann: . + + + a skos:ConceptScheme; + dct:title "Hochschulcampus Ressourcentypen"@de, "Higher Education Resource Types"@en, "Brontypen voor het hoger onderwijs"@nl, "Типи ресурсів вищої освіти"@uk, "Typy zdrojů vyššího vzdělání"@cs ; + dct:description "Eine Wertelliste für Typen von Lernressourcen (Learning Resource Type), entstanden im Kontext des Metadatenschemas \"LOM for Higher Education OER Repositories\" (https://w3id.org/dini-ag-kim/hs-oer-lom-profil/latest/)."@de ; + dct:publisher ; + dct:issued "2020-02-07" ; + vann:preferredNamespaceUri "https://w3id.org/kim/hcrt/" ; + vann:preferredNamespacePrefix "hcrt" ; + dct:license ; + skos:hasTopConcept , ,