inveniosoftware · 0einstein0 · Jan 31, 2025 · Jan 31, 2025
diff --git a/invenio_vocabularies/config.py b/invenio_vocabularies/config.py
@@ -12,7 +12,7 @@

 import re

 import idutils
 from invenio_i18n import lazy_gettext as _

 from .datastreams.readers import (
@@ -196,8 +196,8 @@
 )
 """Subject GEMET file download link."""
 
-VOCABULARIES_SUBJECTS_BODC_PUV_FILE_URL = "http://vocab.nerc.ac.uk/collection/P01/current/?_profile=nvs&_mediatype=application/rdf+xml"
-"""Subject BODC-PUV file download link."""
+VOCABULARIES_SUBJECTS_NVS_FILE_URL = "http://vocab.nerc.ac.uk/collection/P02/current/?_profile=nvs&_mediatype=application/rdf+xml"
+"""Subject NVS-P02 file download link."""
 
 VOCABULARIES_AFFILIATIONS_EDMO_COUNTRY_MAPPING = {
     "Cape Verde": "Cabo Verde",

diff --git a/invenio_vocabularies/contrib/subjects/config.py b/invenio_vocabularies/contrib/subjects/config.py
@@ -36,8 +36,8 @@
     lambda: current_app.config["VOCABULARIES_SUBJECTS_EUROSCIVOC_FILE_URL"]
 )
 
-bodc_puv_file_url = LocalProxy(
-    lambda: current_app.config["VOCABULARIES_SUBJECTS_BODC_PUV_FILE_URL"]
+nvs_file_url = LocalProxy(
+    lambda: current_app.config["VOCABULARIES_SUBJECTS_NVS_FILE_URL"]
 )
 
 

diff --git a/invenio_vocabularies/contrib/subjects/datastreams.py b/invenio_vocabularies/contrib/subjects/datastreams.py
@@ -12,10 +12,10 @@
 from invenio_i18n import lazy_gettext as _
 
 from ...datastreams.writers import ServiceWriter
-from .bodc import datastreams as bodc_datastreams
 from .euroscivoc import datastreams as euroscivoc_datastreams
 from .gemet import datastreams as gemet_datastreams
 from .mesh import datastreams as mesh_datastreams
+from .nvs import datastreams as nvs_datastreams
 
 
 class SubjectsServiceWriter(ServiceWriter):
@@ -40,7 +40,7 @@ def _entry_id(self, entry):
     **mesh_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
     **euroscivoc_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
     **gemet_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
-    **bodc_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
+    **nvs_datastreams.VOCABULARIES_DATASTREAM_TRANSFORMERS,
 }
 """Subjects Data Streams transformers."""
 

diff --git a/...ularies/contrib/subjects/bodc/__init__.py → ...bularies/contrib/subjects/nvs/__init__.py b/...ularies/contrib/subjects/bodc/__init__.py → ...bularies/contrib/subjects/nvs/__init__.py
@@ -6,4 +6,4 @@
 # modify it under the terms of the MIT License; see LICENSE file for more
 # details.
 
-"""BODC Subjects module."""
+"""NVS Subjects module."""
diff --git a/...ries/contrib/subjects/bodc/datastreams.py → ...aries/contrib/subjects/nvs/datastreams.py b/...ries/contrib/subjects/bodc/datastreams.py → ...aries/contrib/subjects/nvs/datastreams.py
@@ -6,13 +6,13 @@
 # modify it under the terms of the MIT License; see LICENSE file for more
 # details.
 
-"""BODC subjects datastreams, readers, transformers, and writers."""
+"""NVS subjects datastreams, readers, transformers, and writers."""
 
 from invenio_vocabularies.datastreams.errors import TransformerError
 from invenio_vocabularies.datastreams.readers import RDFReader
 from invenio_vocabularies.datastreams.transformers import RDFTransformer
 
-from ..config import bodc_puv_file_url
+from ..config import nvs_file_url
 
 # Available with the "rdf" extra
 try:
@@ -21,9 +21,9 @@
     rdflib = None
 
 
-class BODCPUVSubjectsTransformer(RDFTransformer):
+class NVSSubjectsTransformer(RDFTransformer):
     """
-    Transformer class to convert BODC-PUV RDF data to a dictionary format.
+    Transformer class to convert NVS RDF data to a dictionary format.
 
     Input:
         - Relevant fields:
@@ -36,8 +36,8 @@ class BODCPUVSubjectsTransformer(RDFTransformer):
     Output:
         - A dictionary with the following structure:
             {
-                "id": "SDN:P01::SAGEMSFM",  # BODC-specific parameter ID (skos:notation).
-                "scheme": "BODC-PUV",  # The scheme name indicating this is a BODC Parameter Usage Vocabulary concept.
+                "id": "SDN:P01::SAGEMSFM",  # NVS-specific parameter ID (skos:notation).
+                "scheme": "NVS-P01",  # The scheme name indicating this is a collection P01 from NERC Vocabulary Server (NVS).
                 "subject": "AMSSedAge",  # The alternative label (skos:altLabel), if available, or None.
                 "title": {
                     "en": "14C age of Foraminiferida"  # English preferred label (skos:prefLabel).
@@ -78,13 +78,16 @@ def _transform_entry(self, subject, rdf_graph):
         else:
             raise TransformerError(f"No id found for: {subject}")
 
-        alt_labels = [obj for obj in subject_data.get(str(self.skos_core.altLabel), [])]
-        subject_text = str(alt_labels[0]) if alt_labels else ""
+        pref_labels = [
+            obj for obj in subject_data.get(str(self.skos_core.prefLabel), [])
+        ]
+
+        subject_text = str(pref_labels[0]) if pref_labels else labels["en"]
         definition = str(subject_data.get(str(self.skos_core.definition), [None])[0])
 
         return {
             "id": id,
-            "scheme": "BODC-PUV",
+            "scheme": "NVS-P02",
             "subject": subject_text,
             "title": labels,
             "props": {"definition": definition} if definition else {},
@@ -94,18 +97,18 @@ def _transform_entry(self, subject, rdf_graph):
 
 # Configuration for datastream
 
-VOCABULARIES_DATASTREAM_TRANSFORMERS = {"bodc-transformer": BODCPUVSubjectsTransformer}
+VOCABULARIES_DATASTREAM_TRANSFORMERS = {"nvs-transformer": NVSSubjectsTransformer}
 
 DATASTREAM_CONFIG = {
     "readers": [
         {
             "type": "http",
             "args": {
-                "origin": bodc_puv_file_url,
+                "origin": nvs_file_url,
             },
         },
         {"type": "rdf"},
     ],
-    "transformers": [{"type": "bodc-transformer"}],
+    "transformers": [{"type": "nvs-transformer"}],
     "writers": [{"args": {"writer": {"type": "subjects-service"}}, "type": "async"}],
 }
diff --git a/invenio_vocabularies/factories.py b/invenio_vocabularies/factories.py
@@ -28,12 +28,12 @@
 )
 from .contrib.funders.datastreams import DATASTREAM_CONFIG as funders_ds_config
 from .contrib.names.datastreams import DATASTREAM_CONFIG as names_ds_config
-from .contrib.subjects.bodc.datastreams import DATASTREAM_CONFIG as bodc_ds_config
 from .contrib.subjects.datastreams import DATASTREAM_CONFIG as subjects_ds_config
 from .contrib.subjects.euroscivoc.datastreams import (
     DATASTREAM_CONFIG as euroscivoc_ds_config,
 )
 from .contrib.subjects.gemet.datastreams import DATASTREAM_CONFIG as gemet_ds_config
+from .contrib.subjects.nvs.datastreams import DATASTREAM_CONFIG as nvs_ds_config
 
 
 class VocabularyConfig:
@@ -164,15 +164,15 @@ def get_service(self):
         raise NotImplementedError("Service not implemented for GEMET Subjects")
 
 
-class SubjectsBODCVocabularyConfig(VocabularyConfig):
-    """BODC Subjects Vocabulary Config."""
+class SubjectsNVSVocabularyConfig(VocabularyConfig):
+    """NVS Subjects Vocabulary Config."""
 
-    config = bodc_ds_config
-    vocabulary_name = "subjects:bodc-puv"
+    config = nvs_ds_config
+    vocabulary_name = "subjects:nvs"
 
     def get_service(self):
         """Get the service for the vocabulary."""
-        raise NotImplementedError("Service not implemented for BODC Subjects")
+        raise NotImplementedError("Service not implemented for NVS Subjects")
 
 
 def get_vocabulary_config(vocabulary):
@@ -187,7 +187,7 @@ def get_vocabulary_config(vocabulary):
         "affiliations:edmo": AffiliationsEDMOVocabularyConfig,
         "subjects": SubjectsVocabularyConfig,
         "subjects:gemet": SubjectsGEMETVocabularyConfig,
-        "subjects:bodc": SubjectsBODCVocabularyConfig,
+        "subjects:nvs": SubjectsNVSVocabularyConfig,
         "subjects:euroscivoc": SubjectsEuroSciVocVocabularyConfig,
     }
     return vocab_config.get(vocabulary, VocabularyConfig)()
diff --git a/tests/contrib/subjects/bodc/test_subjects_bodc_datastream.py b/tests/contrib/subjects/bodc/test_subjects_bodc_datastream.py