Skip to content

Commit 6e60a58

Browse files
Merge pull request #472 from linkml/schemaview_namespaces
schemaview.py: Adding tests for `namespaces` and `get_elements_applicable_by_prefix`
2 parents 2467f6d + f68744c commit 6e60a58

File tree

2 files changed

+234
-33
lines changed

2 files changed

+234
-33
lines changed

linkml_runtime/utils/schemaview.py

Lines changed: 9 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -529,13 +529,15 @@ def all_schema(self, imports: bool = True) -> list[SchemaDefinition]:
529529
def namespaces(self) -> Namespaces:
530530
"""Return the namespaces present in a schema.
531531
532+
Note: the output of this function will differ, depending on whether any functions that process imports have been run.
533+
532534
:return: namespaces
533535
:rtype: Namespaces
534536
"""
535537
namespaces = Namespaces()
538+
for cmap in self.schema.default_curi_maps:
539+
namespaces.add_prefixmap(cmap, include_defaults=False)
536540
for s in self.schema_map.values():
537-
for cmap in self.schema.default_curi_maps:
538-
namespaces.add_prefixmap(cmap, include_defaults=False)
539541
for prefix in s.prefixes.values():
540542
namespaces[prefix.prefix_prefix] = prefix.prefix_reference
541543
return namespaces
@@ -1429,13 +1431,12 @@ def get_elements_applicable_by_prefix(self, prefix: str) -> list[str]:
14291431
:return: Optional[str]
14301432
14311433
"""
1432-
applicable_elements = []
14331434
elements = self.all_elements()
1434-
for category_element in elements.values():
1435-
if hasattr(category_element, "id_prefixes") and prefix in category_element.id_prefixes:
1436-
applicable_elements.append(category_element.name)
1437-
1438-
return applicable_elements
1435+
return [
1436+
element.name
1437+
for element in elements.values()
1438+
if hasattr(element, "id_prefixes") and prefix in element.id_prefixes
1439+
]
14391440

14401441
@lru_cache(None)
14411442
def all_aliases(self) -> list[str]:

tests/test_utils/test_schemaview.py

Lines changed: 225 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -62,16 +62,18 @@
6262
CREATURE_SCHEMA_BASE_PATH = INPUT_DIR_PATH / "mcc"
6363

6464
yaml_loader = YAMLLoader()
65-
IS_CURRENT = "is current"
66-
EMPLOYED_AT = "employed at"
67-
COMPANY = "Company"
68-
PERSON = "Person"
65+
ACTIVITY = "activity"
6966
ADULT = "Adult"
70-
THING = "Thing"
67+
AGE_IN_YEARS = "age in years"
7168
AGENT = "agent"
72-
ACTIVITY = "activity"
69+
COMPANY = "Company"
70+
EMPLOYED_AT = "employed at"
71+
IS_CURRENT = "is current"
72+
ORGANIZATION = "Organization"
73+
PERSON = "Person"
7374
RELATED_TO = "related to"
74-
AGE_IN_YEARS = "age in years"
75+
THING = "Thing"
76+
7577
EMPTY = ""
7678
ID = "identifier"
7779
KEY = "key"
@@ -789,6 +791,150 @@ def test_in_schema(schema_view_with_imports: SchemaView) -> None:
789791
view.in_schema("fake_element")
790792

791793

794+
# Prefixes, curi_maps, and imports to add to a schema
795+
# The lines to add are under the "text" key
796+
# The prefix-URL pairs are under the "exp" key
797+
prefixes_to_add = {
798+
None: {"exp": {}},
799+
"short": {
800+
"text": """
801+
prefixes:
802+
personinfo: https://w3id.org/linkml/examples/personinfo/
803+
""",
804+
"exp": {"personinfo": "https://w3id.org/linkml/examples/personinfo/"},
805+
},
806+
"med": {
807+
"text": """
808+
prefixes:
809+
personinfo: https://w3id.org/linkml/examples/personinfo/
810+
linkml: https://w3id.org/linkml/
811+
schema: http://schema.org/
812+
rdfs: https://www.w3.org/2000/01/rdf-schema#
813+
""",
814+
"exp": {
815+
"personinfo": "https://w3id.org/linkml/examples/personinfo/",
816+
"linkml": "https://w3id.org/linkml/",
817+
"schema": "http://schema.org/",
818+
"rdfs": "https://www.w3.org/2000/01/rdf-schema#",
819+
},
820+
},
821+
"long": {
822+
"text": """
823+
prefixes:
824+
personinfo: https://w3id.org/linkml/examples/personinfo/
825+
linkml: https://w3id.org/linkml/
826+
schema: http://schema.org/
827+
rdfs: https://www.w3.org/2000/01/rdf-schema#
828+
prov: http://www.w3.org/ns/prov#
829+
GSSO: http://purl.obolibrary.org/obo/GSSO_
830+
famrel: https://example.org/FamilialRelations#
831+
bizcodes: https://example.org/bizcodes/
832+
skos: http://www.w3.org/2004/02/skos/core#
833+
P: http://example.org/P/
834+
ROR: http://example.org/ror/
835+
CODE: http://example.org/code/
836+
GEO: http://example.org/geoloc/
837+
""",
838+
"exp": {
839+
"personinfo": "https://w3id.org/linkml/examples/personinfo/",
840+
"linkml": "https://w3id.org/linkml/",
841+
"schema": "http://schema.org/",
842+
"rdfs": "https://www.w3.org/2000/01/rdf-schema#",
843+
"prov": "http://www.w3.org/ns/prov#",
844+
"GSSO": "http://purl.obolibrary.org/obo/GSSO_",
845+
"famrel": "https://example.org/FamilialRelations#",
846+
"bizcodes": "https://example.org/bizcodes/",
847+
"skos": "http://www.w3.org/2004/02/skos/core#",
848+
"P": "http://example.org/P/",
849+
"ROR": "http://example.org/ror/",
850+
"CODE": "http://example.org/code/",
851+
"GEO": "http://example.org/geoloc/",
852+
},
853+
},
854+
}
855+
curi_maps_to_add = {
856+
None: {
857+
"exp": {},
858+
},
859+
"semweb_context": {
860+
"text": "\ndefault_curi_maps:\n - semweb_context\n",
861+
"exp": {
862+
"dc": "http://purl.org/dc/terms/",
863+
"dcat": "http://www.w3.org/ns/dcat#",
864+
"dcterms": "http://purl.org/dc/terms/",
865+
"faldo": "http://biohackathon.org/resource/faldo#",
866+
"foaf": "http://xmlns.com/foaf/0.1/",
867+
"idot": "http://identifiers.org/",
868+
"oa": "http://www.w3.org/ns/oa#",
869+
"owl": "http://www.w3.org/2002/07/owl#",
870+
"prov": "http://www.w3.org/ns/prov#",
871+
"rdf": "http://www.w3.org/1999/02/22-rdf-syntax-ns#",
872+
"rdfs": "http://www.w3.org/2000/01/rdf-schema#",
873+
"void": "http://rdfs.org/ns/void#",
874+
"xsd": "http://www.w3.org/2001/XMLSchema#",
875+
"oboInOwl": "http://www.geneontology.org/formats/oboInOwl#",
876+
},
877+
},
878+
}
879+
# Prefixes from imports only show up if a function that generates the imports closure is run.
880+
imports_to_add = {
881+
None: {"exp": {}, "imported": {}},
882+
"linkml_types": {
883+
"text": "\nimports:\n - linkml:types\n",
884+
"exp": {},
885+
"imported": {
886+
"linkml": "https://w3id.org/linkml/",
887+
"xsd": "http://www.w3.org/2001/XMLSchema#",
888+
"shex": "http://www.w3.org/ns/shex#",
889+
"schema": "http://schema.org/",
890+
},
891+
},
892+
}
893+
894+
895+
@pytest.mark.parametrize("prefix", prefixes_to_add.keys())
896+
@pytest.mark.parametrize("curi_map", curi_maps_to_add.keys())
897+
@pytest.mark.parametrize("imports", imports_to_add.keys())
898+
@pytest.mark.parametrize("run_imports", [True, False])
899+
def test_namespaces(prefix: str | None, curi_map: str | None, imports: str | None, run_imports: bool) -> None:
900+
"""Check that the `namespaces` function correctly loads the prefix <==> URL mapping.
901+
902+
Note: the `namespaces` function does not check whether a prefix already exists in the mapping and whether
903+
the existing value is the same as the incoming value; the incoming value always overwrites any existing value.
904+
To test this, the RDFS URL is "http://" in the "semweb_context" mapping and "https://" in the "prefixes" section.
905+
Whenever the prefixes section is present, it will override the value from semweb_context.
906+
907+
The `run_imports` parameter indicates whether or not schemas referenced under 'imports' should be imported; if not, none of the prefixes from imported schemas will be registered as `namespaces`.
908+
"""
909+
schema = """
910+
id: https://w3id.org/linkml/examples/personinfo
911+
name: personinfo
912+
"""
913+
if curi_map:
914+
schema += curi_maps_to_add[curi_map]["text"]
915+
if prefix:
916+
schema += prefixes_to_add[prefix]["text"]
917+
if imports:
918+
schema += imports_to_add[imports]["text"]
919+
920+
sv = SchemaView(schema)
921+
if run_imports:
922+
# execute a function that will import linked schemas
923+
sv.all_schema()
924+
925+
assert sv.namespaces() == {
926+
**curi_maps_to_add[curi_map]["exp"],
927+
**prefixes_to_add[prefix]["exp"],
928+
**imports_to_add[imports]["imported"],
929+
}
930+
else:
931+
assert sv.namespaces() == {
932+
**curi_maps_to_add[curi_map]["exp"],
933+
**prefixes_to_add[prefix]["exp"],
934+
**imports_to_add[imports]["exp"],
935+
}
936+
937+
792938
CREATURE_EXPECTED = {
793939
"class": {
794940
CREATURE_SCHEMA: {"MythicalCreature", "HasMagic", "MagicalAbility", "Dragon", "Phoenix", "Unicorn"},
@@ -1903,17 +2049,71 @@ def test_dynamic_enum(schema_view_with_imports: SchemaView) -> None:
19032049
assert set(e.include[0].reachable_from.source_nodes) == {"GO:0007049", "GO:0022403"}
19042050

19052051

1906-
def test_get_elements_applicable_by_identifier(schema_view_no_imports: SchemaView) -> None:
2052+
# dictionary mapping class name to id_prefixes
2053+
KS_PREFIXES_BY_CLASS = {PERSON: {"orcid", "doi", "zfin", "wb"}, ORGANIZATION: {"pmid", "zfin", "wb"}}
2054+
2055+
2056+
def test_get_elements_applicable_by_prefix(schema_view_no_imports: SchemaView) -> None:
2057+
"""Test get_elements_applicable_by_prefix method."""
2058+
view = schema_view_no_imports
2059+
# create a dictionary mapping class name to id_prefixes
2060+
prefixes = {el: set(view.get_element(el).id_prefixes) for el in [PERSON, ORGANIZATION]}
2061+
2062+
for el in [PERSON, ORGANIZATION]:
2063+
assert prefixes[el] == {prfx.upper() for prfx in KS_PREFIXES_BY_CLASS[el]}
2064+
2065+
for prefix in ["ORCID", "DOI", "ZFIN", "PMID", "WB"]:
2066+
els_applicable_by_prefix = view.get_elements_applicable_by_prefix(prefix)
2067+
for el in [PERSON, ORGANIZATION]:
2068+
if prefix.lower() in KS_PREFIXES_BY_CLASS[el]:
2069+
assert el in els_applicable_by_prefix
2070+
else:
2071+
assert el not in els_applicable_by_prefix
2072+
2073+
2074+
@pytest.mark.parametrize("prefix", ["ORCID", "DOI", "ZFIN", "PMID", "WB", "Pmid", "TEST", "rdfs", "some_crap"])
2075+
def test_get_elements_applicable_by_identifier(schema_view_no_imports: SchemaView, prefix: str) -> None:
19072076
"""Test get_elements_applicable_by_identifier method."""
19082077
view = schema_view_no_imports
1909-
elements = view.get_elements_applicable_by_identifier("ORCID:1234")
1910-
assert PERSON in elements
1911-
elements = view.get_elements_applicable_by_identifier("PMID:1234")
1912-
assert "Organization" in elements
1913-
elements = view.get_elements_applicable_by_identifier("http://www.ncbi.nlm.nih.gov/pubmed/1234")
1914-
assert "Organization" in elements
1915-
elements = view.get_elements_applicable_by_identifier("TEST:1234")
1916-
assert "anatomical entity" not in elements
2078+
# make sure imports are loaded
2079+
view.all_schema()
2080+
2081+
elements = view.get_elements_applicable_by_identifier(f"{prefix}:1234-5678-90")
2082+
# check in KS_PREFIXES_BY_CLASS to see whether we expect PERSON or ORGANIZATION to have this prefix
2083+
for el in [PERSON, ORGANIZATION]:
2084+
if prefix.lower() in KS_PREFIXES_BY_CLASS[el]:
2085+
assert el in elements
2086+
else:
2087+
assert el not in elements
2088+
2089+
no_els = False
2090+
if prefix.lower() not in {*KS_PREFIXES_BY_CLASS[PERSON], *KS_PREFIXES_BY_CLASS[ORGANIZATION]}:
2091+
assert elements == []
2092+
no_els = True
2093+
2094+
# Prefix => URL mapping; some URLs are valid, others are made up.
2095+
prefix_to_url = {
2096+
"DOI": "http://dx.doi.org/", # valid
2097+
"PMID": "http://www.ncbi.nlm.nih.gov/pubmed/", # valid
2098+
"rdfs": "http://www.w3.org/2000/01/rdf-schema#", # valid
2099+
"ZFIN": "http://zfin.org/", # valid
2100+
# made up URLs
2101+
"WB": "https://www.wormbase.org/get?name=",
2102+
"TEST": "https://www.test.com/id=",
2103+
"Pmid": "http://www.ncbi.nlm.nih.gov/pubmed/",
2104+
"ORCID": "http://orcids-r-us.com/orcid/",
2105+
"some_crap": "https://whatev.er/",
2106+
}
2107+
# These URLs are defined in the schema prefixes section
2108+
valid_urls = {"doi", "pmid", "rdfs", "zfin"}
2109+
2110+
# Get element by URL
2111+
# This will only successfully retrieve the element if the URL is in `valid_urls`
2112+
url_els = view.get_elements_applicable_by_identifier(f"{prefix_to_url[prefix]}1234-5678-90")
2113+
if no_els or prefix.lower() not in valid_urls:
2114+
assert url_els == []
2115+
else:
2116+
assert url_els == elements
19172117

19182118

19192119
# FIXME: improve test to actually test the annotations
@@ -2016,9 +2216,9 @@ def test_ancestors_descendants(schema_view_no_imports: SchemaView) -> None:
20162216
view = schema_view_no_imports
20172217

20182218
assert set(view.class_ancestors(ADULT)) == {ADULT, PERSON, "HasAliases", THING}
2019-
assert set(view.class_ancestors(COMPANY)) == {COMPANY, "Organization", "HasAliases", THING}
2020-
assert set(view.class_ancestors(COMPANY, reflexive=False)) == {"Organization", "HasAliases", THING}
2021-
assert set(view.class_descendants(THING)) == {THING, PERSON, "Organization", COMPANY, ADULT}
2219+
assert set(view.class_ancestors(COMPANY)) == {COMPANY, ORGANIZATION, "HasAliases", THING}
2220+
assert set(view.class_ancestors(COMPANY, reflexive=False)) == {ORGANIZATION, "HasAliases", THING}
2221+
assert set(view.class_descendants(THING)) == {THING, PERSON, ORGANIZATION, COMPANY, ADULT}
20222222

20232223

20242224
def test_get_mappings(schema_view_no_imports: SchemaView) -> None:
@@ -2324,7 +2524,7 @@ def test_induced_slot_again(schema_view_no_imports: SchemaView) -> None:
23242524
assert rng is not None
23252525

23262526
# test induced slots
2327-
for cn in [COMPANY, PERSON, "Organization"]:
2527+
for cn in [COMPANY, PERSON, ORGANIZATION]:
23282528
islot = view.induced_slot("aliases", cn)
23292529
assert islot.multivalued is True
23302530
assert islot.owner == cn
@@ -2334,7 +2534,7 @@ def test_induced_slot_again(schema_view_no_imports: SchemaView) -> None:
23342534
assert view.get_identifier_slot(THING).name == "id"
23352535
assert view.get_identifier_slot("FamilialRelationship") is None
23362536

2337-
for cn in [COMPANY, PERSON, "Organization", THING]:
2537+
for cn in [COMPANY, PERSON, ORGANIZATION, THING]:
23382538
assert view.induced_slot("id", cn).identifier
23392539
assert not view.induced_slot("name", cn).identifier
23402540
assert not view.induced_slot("name", cn).required
@@ -2842,7 +3042,7 @@ def sv_cycles_schema() -> SchemaView:
28423042
}
28433043

28443044

2845-
@pytest.mark.parametrize(("target", "cycle_start_node"), [(k, v) for k, v in CYCLES[TYPES][0].items()])
3045+
@pytest.mark.parametrize(("target", "cycle_start_node"), list(CYCLES[TYPES][0].items()))
28463046
@pytest.mark.parametrize("fn", ["detect_cycles", "graph_closure", "type_ancestors"])
28473047
def test_detect_type_cycles_error(sv_cycles_schema: SchemaView, target: str, cycle_start_node: str, fn: str) -> None:
28483048
"""Test detection of cycles in the types segment of the cycles schema."""
@@ -2857,7 +3057,7 @@ def test_detect_type_cycles_error(sv_cycles_schema: SchemaView, target: str, cyc
28573057
sv_cycles_schema.type_ancestors(type_name=target, detect_cycles=True)
28583058

28593059

2860-
@pytest.mark.parametrize(("target", "expected"), [(k, v) for k, v in CYCLES[TYPES][1].items()])
3060+
@pytest.mark.parametrize(("target", "expected"), list(CYCLES[TYPES][1].items()))
28613061
@pytest.mark.parametrize("fn", ["detect_cycles", "graph_closure", "type_ancestors"])
28623062
def test_detect_type_cycles_no_cycles(sv_cycles_schema: SchemaView, target: str, expected: set[str], fn: str) -> None:
28633063
"""Ensure that types without cycles in their ancestry do not throw an error."""
@@ -2871,7 +3071,7 @@ def test_detect_type_cycles_no_cycles(sv_cycles_schema: SchemaView, target: str,
28713071
assert set(got) == expected
28723072

28733073

2874-
@pytest.mark.parametrize(("target", "cycle_start_node"), [(k, v) for k, v in CYCLES[CLASSES][0].items()])
3074+
@pytest.mark.parametrize(("target", "cycle_start_node"), list(CYCLES[CLASSES][0].items()))
28753075
@pytest.mark.parametrize("fn", ["detect_cycles", "graph_closure", "class_ancestors"])
28763076
def test_detect_class_cycles_error(sv_cycles_schema: SchemaView, target: str, cycle_start_node: str, fn: str) -> None:
28773077
"""Test detection of class cycles in the cycles schema."""
@@ -2887,7 +3087,7 @@ def test_detect_class_cycles_error(sv_cycles_schema: SchemaView, target: str, cy
28873087
sv_cycles_schema.class_ancestors(target, detect_cycles=True)
28883088

28893089

2890-
@pytest.mark.parametrize(("target", "expected"), [(k, v) for k, v in CYCLES[CLASSES][1].items()])
3090+
@pytest.mark.parametrize(("target", "expected"), list(CYCLES[CLASSES][1].items()))
28913091
@pytest.mark.parametrize("fn", ["detect_cycles", "graph_closure", "class_ancestors"])
28923092
def test_detect_class_cycles_no_cycles(sv_cycles_schema: SchemaView, target: str, expected: set[str], fn: str) -> None:
28933093
"""Ensure that classes without cycles in their ancestry do not throw an error."""

0 commit comments

Comments
 (0)