Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

bodc: updated file uri & label #458

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions invenio_vocabularies/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@

import re

import idutils

Check warning on line 15 in invenio_vocabularies/config.py

View workflow job for this annotation

GitHub Actions / Python / Tests (3.9, postgresql14, opensearch2)

Implicit imports (e.g., 'import idutils; idutils.function;') might be removed in the next major version. Please use explicit imports (e.g., 'from idutils import function;') instead.

Check warning on line 15 in invenio_vocabularies/config.py

View workflow job for this annotation

GitHub Actions / Python / Tests (3.12, postgresql14, opensearch2)

Implicit imports (e.g., 'import idutils; idutils.function;') might be removed in the next major version. Please use explicit imports (e.g., 'from idutils import function;') instead.
from invenio_i18n import lazy_gettext as _

from .datastreams.readers import (
Expand Down Expand Up @@ -196,8 +196,8 @@
)
"""Subject GEMET file download link."""

VOCABULARIES_SUBJECTS_BODC_PUV_FILE_URL = "http://vocab.nerc.ac.uk/collection/P01/current/?_profile=nvs&_mediatype=application/rdf+xml"
"""Subject BODC-PUV file download link."""
VOCABULARIES_SUBJECTS_NVS_FILE_URL = "http://vocab.nerc.ac.uk/collection/P02/current/?_profile=nvs&_mediatype=application/rdf+xml"
"""Subject NVS-P02 file download link."""

VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING = {
"Cape Verde": "Cabo Verde",
Expand Down
4 changes: 2 additions & 2 deletions invenio_vocabularies/contrib/subjects/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,8 +36,8 @@
lambda: current_app.config["VOCABULARIES_SUBJECTS_EUROSCIVOC_FILE_URL"]
)

bodc_puv_file_url = LocalProxy(
lambda: current_app.config["VOCABULARIES_SUBJECTS_BODC_PUV_FILE_URL"]
nvs_file_url = LocalProxy(
lambda: current_app.config["VOCABULARIES_SUBJECTS_NVS_FILE_URL"]
)


Expand Down
4 changes: 2 additions & 2 deletions invenio_vocabularies/contrib/subjects/datastreams.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,10 @@
from invenio_i18n import lazy_gettext as _

from ...datastreams.writers import ServiceWriter
from .bodc import datastreams as bodc_datastreams
from .euroscivoc import datastreams as euroscivoc_datastreams
from .gemet import datastreams as gemet_datastreams
from .mesh import datastreams as mesh_datastreams
from .nvs import datastreams as nvs_datastreams


class SubjectsServiceWriter(ServiceWriter):
Expand All @@ -40,7 +40,7 @@ def _entry_id(self, entry):
**mesh_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
**euroscivoc_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
**gemet_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
**bodc_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
**nvs_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
}
"""Subjects Data Streams transformers."""

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -6,4 +6,4 @@
# modify it under the terms of the MIT License; see LICENSE file for more
# details.

"""BODC Subjects module."""
"""NVS Subjects module."""
Original file line number Diff line number Diff line change
Expand Up @@ -6,13 +6,13 @@
# modify it under the terms of the MIT License; see LICENSE file for more
# details.

"""BODC subjects datastreams, readers, transformers, and writers."""
"""NVS subjects datastreams, readers, transformers, and writers."""

from invenio_vocabularies.datastreams.errors import TransformerError
from invenio_vocabularies.datastreams.readers import RDFReader
from invenio_vocabularies.datastreams.transformers import RDFTransformer

from ..config import bodc_puv_file_url
from ..config import nvs_file_url

# Available with the "rdf" extra
try:
Expand All @@ -21,9 +21,9 @@
rdflib = None


class BODCPUVSubjectsTransformer(RDFTransformer):
class NVSSubjectsTransformer(RDFTransformer):
"""
Transformer class to convert BODC-PUV RDF data to a dictionary format.
Transformer class to convert NVS RDF data to a dictionary format.

Input:
- Relevant fields:
Expand All @@ -36,8 +36,8 @@ class BODCPUVSubjectsTransformer(RDFTransformer):
Output:
- A dictionary with the following structure:
{
"id": "SDN:P01::SAGEMSFM", # BODC-specific parameter ID (skos:notation).
"scheme": "BODC-PUV", # The scheme name indicating this is a BODC Parameter Usage Vocabulary concept.
"id": "SDN:P01::SAGEMSFM", # NVS-specific parameter ID (skos:notation).
"scheme": "NVS-P01", # The scheme name indicating this is a collection P01 from NERC Vocabulary Server (NVS).
"subject": "AMSSedAge", # The alternative label (skos:altLabel), if available, or None.
"title": {
"en": "14C age of Foraminiferida" # English preferred label (skos:prefLabel).
Expand Down Expand Up @@ -78,13 +78,16 @@ def _transform_entry(self, subject, rdf_graph):
else:
raise TransformerError(f"No id found for: {subject}")

alt_labels = [obj for obj in subject_data.get(str(self.skos_core.altLabel), [])]
subject_text = str(alt_labels[0]) if alt_labels else ""
pref_labels = [
obj for obj in subject_data.get(str(self.skos_core.prefLabel), [])
]

subject_text = str(pref_labels[0]) if pref_labels else labels["en"]
definition = str(subject_data.get(str(self.skos_core.definition), [None])[0])

return {
"id": id,
"scheme": "BODC-PUV",
"scheme": "NVS-P02",
"subject": subject_text,
"title": labels,
"props": {"definition": definition} if definition else {},
Expand All @@ -94,18 +97,18 @@ def _transform_entry(self, subject, rdf_graph):

# Configuration for datastream

VOCABULARIES_DATASTREAM_TRANSFORMERS = {"bodc-transformer": BODCPUVSubjectsTransformer}
VOCABULARIES_DATASTREAM_TRANSFORMERS = {"nvs-transformer": NVSSubjectsTransformer}

DATASTREAM_CONFIG = {
"readers": [
{
"type": "http",
"args": {
"origin": bodc_puv_file_url,
"origin": nvs_file_url,
},
},
{"type": "rdf"},
],
"transformers": [{"type": "bodc-transformer"}],
"transformers": [{"type": "nvs-transformer"}],
"writers": [{"args": {"writer": {"type": "subjects-service"}}, "type": "async"}],
}
14 changes: 7 additions & 7 deletions invenio_vocabularies/factories.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,12 @@
)
from .contrib.funders.datastreams import DATASTREAM_CONFIG as funders_ds_config
from .contrib.names.datastreams import DATASTREAM_CONFIG as names_ds_config
from .contrib.subjects.bodc.datastreams import DATASTREAM_CONFIG as bodc_ds_config
from .contrib.subjects.datastreams import DATASTREAM_CONFIG as subjects_ds_config
from .contrib.subjects.euroscivoc.datastreams import (
DATASTREAM_CONFIG as euroscivoc_ds_config,
)
from .contrib.subjects.gemet.datastreams import DATASTREAM_CONFIG as gemet_ds_config
from .contrib.subjects.nvs.datastreams import DATASTREAM_CONFIG as nvs_ds_config


class VocabularyConfig:
Expand Down Expand Up @@ -164,15 +164,15 @@ def get_service(self):
raise NotImplementedError("Service not implemented for GEMET Subjects")


class SubjectsBODCVocabularyConfig(VocabularyConfig):
"""BODC Subjects Vocabulary Config."""
class SubjectsNVSVocabularyConfig(VocabularyConfig):
"""NVS Subjects Vocabulary Config."""

config = bodc_ds_config
vocabulary_name = "subjects:bodc-puv"
config = nvs_ds_config
vocabulary_name = "subjects:nvs"

def get_service(self):
"""Get the service for the vocabulary."""
raise NotImplementedError("Service not implemented for BODC Subjects")
raise NotImplementedError("Service not implemented for NVS Subjects")


def get_vocabulary_config(vocabulary):
Expand All @@ -187,7 +187,7 @@ def get_vocabulary_config(vocabulary):
"affiliations:edmo": AffiliationsEDMOVocabularyConfig,
"subjects": SubjectsVocabularyConfig,
"subjects:gemet": SubjectsGEMETVocabularyConfig,
"subjects:bodc": SubjectsBODCVocabularyConfig,
"subjects:nvs": SubjectsNVSVocabularyConfig,
"subjects:euroscivoc": SubjectsEuroSciVocVocabularyConfig,
}
return vocab_config.get(vocabulary, VocabularyConfig)()
122 changes: 0 additions & 122 deletions tests/contrib/subjects/bodc/test_subjects_bodc_datastream.py

This file was deleted.

Loading
Loading