diff --git a/nidm/core/Constants.py b/nidm/core/Constants.py index 5343bcce..522b4a31 100644 --- a/nidm/core/Constants.py +++ b/nidm/core/Constants.py @@ -505,7 +505,6 @@ def __init__(self, namespaces): #PROV_ATTR_COLLECTION } - # Set of formal attributes of PROV records PROVONE_ATTRIBUTES = PROVONE_ATTRIBUTE_QNAMES | PROV_ATTRIBUTE_QNAMES | \ PROV_ATTRIBUTE_LITERALS diff --git a/nidm/core/provone.py b/nidm/core/provone.py index 16a419b5..bc4628ce 100644 --- a/nidm/core/provone.py +++ b/nidm/core/provone.py @@ -20,7 +20,7 @@ from prov.model import ProvEntity, ProvAgent, ProvDocument, ProvAttribution, \ PROV_REC_CLS, ProvActivity, _ensure_datetime, ProvAssociation, \ ProvCommunication, ProvDerivation, ProvRelation, ProvGeneration, ProvUsage, \ - ProvMembership + ProvMembership, ProvRecord from .Constants import PROVONE_N_MAP, PROVONE_PROCESS, PROVONE_INPUTPORT, \ PROVONE_OUTPUTPORT, PROVONE_DATA, PROVONE_DATALINK, PROVONE_SEQCTRLLINK, \ PROVONE_USER, PROVONE_PROCESSEXEC, PROVONE_ATTR_PROCESS, PROVONE_ATTR_USER, \ @@ -43,11 +43,22 @@ logger = logging.getLogger(__name__) -# add ProvOne Notation mapping to Prov_N_MAP dict -#PROV_N_MAP.update(PROVONE_N_MAP) +# update ProvOne Notation mapping with PROV_N_MAP PROVONE_N_MAP.update(PROV_N_MAP) +class ProvOneRecord(ProvRecord): + """Base class for PROVONE records.""" + + def copy(self): + """ + Return an exact copy of this record. + """ + return PROVONE_REC_CLS[self.get_type()]( + self._bundle, self.identifier, self.attributes + ) + + class ProvPlan(ProvEntity): """ ProvONE Plan element @@ -144,7 +155,7 @@ class Generation(ProvGeneration): FORMAL_ATTRIBUTES = (PROVONE_ATTR_DATA, PROVONE_ATTR_PROCESSEXEC, PROV_ATTR_TIME) - #_prov_type = PROV_GENERATION + _prov_type = PROV_GENERATION class Usage(ProvUsage): @@ -152,7 +163,7 @@ class Usage(ProvUsage): FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESSEXEC, PROVONE_ATTR_DATA, PROV_ATTR_TIME) - #_prov_type = PROV_USAGE + _prov_type = PROV_USAGE class Partnership(ProvRelation): @@ -268,16 +279,10 @@ class Workflow(Process, ): # Class mappings from PROVONE record type -PROV_REC_CLS.update({ +PROVONE_REC_CLS = { PROVONE_PROCESS: Process, PROVONE_PROCESSEXEC: ProcessExec, PROVONE_DATA: Data, - PROV_ATTRIBUTION: Attribution, - PROV_ASSOCIATION: Association, - PROV_COMMUNICATION: Communication, - PROV_DERIVATION: Derivation, - PROV_GENERATION: Generation, - PROV_USAGE: Usage, PROVONE_INPUTPORT: InputPort, PROVONE_HASINPORT: HasInput, PROVONE_OUTPUTPORT: OutputPort, @@ -295,9 +300,10 @@ class Workflow(Process, ): PROVONE_HASDEFAULTPARAM: Parameterization, PROVONE_USER: User, PROVONE_ISPARTOF: Partnership, - PROV_MEMBERSHIP: Membership, -}) +} + +PROVONE_REC_CLS.update(PROV_REC_CLS) class ProvONEDocument(ProvDocument): @@ -848,6 +854,41 @@ def membership(self, collection, data): } ) + # same method as in prov/model.py with just the modification of PROVONE + # constants. Need to re-architect prov and then make necessary changes in + # provone. + def new_record(self, record_type, identifier, attributes=None, + other_attributes=None): + """ + Creates a new record. + + :param record_type: Type of record (one of :py:const:`PROVONE_REC_CLS`). + :param identifier: Identifier for new record. + :param attributes: Attributes as a dictionary or list of tuples to be added + to the record optionally (default: None). + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + attr_list = [] + if attributes: + if isinstance(attributes, dict): + attr_list.extend( + (attr, value) for attr, value in attributes.items() + ) + else: + # expecting a list of attributes here + attr_list.extend(attributes) + if other_attributes: + attr_list.extend( + other_attributes.items() if isinstance(other_attributes, dict) + else other_attributes + ) + new_record = PROVONE_REC_CLS[record_type]( + self, self.valid_qualified_name(identifier), attr_list + ) + self._add_record(new_record) + return new_record + # Aliases wasAttributedTo = attribution wasAssociatedWith = association diff --git a/nidm/core/serializers/provonerdf.py b/nidm/core/serializers/provonerdf.py index 075cc006..cf30b766 100644 --- a/nidm/core/serializers/provonerdf.py +++ b/nidm/core/serializers/provonerdf.py @@ -25,7 +25,7 @@ PROV_ATTR_INFORMANT, PROV_ATTR_RESPONSIBLE, PROV_ATTR_TRIGGER, PROV_ATTR_ENDER, PROV_ATTR_STARTER, PROV_ATTR_USED_ENTITY, PROV_ASSOCIATION) from prov.serializers import Error -import prov.serializers.provrdf +from prov.serializers.provrdf import attr2rdf from nidm.core.Constants import PROVONE_ID_ATTRIBUTES_MAP, PROVONE from nidm.core.serializers import Serializer @@ -66,14 +66,39 @@ def get_anon_id(self, obj, local_prefix="id"): if six.integer_types[-1] not in LITERAL_XSDTYPE_MAP: LITERAL_XSDTYPE_MAP[six.integer_types[-1]] = XSD['long'] +relation_mapper = {URIRef(PROV['alternateOf'].uri): 'alternate', + URIRef(PROV['actedOnBehalfOf'].uri): 'delegation', + URIRef(PROV['specializationOf'].uri): 'specialization', + URIRef(PROV['mentionOf'].uri): 'mention', + URIRef(PROV['wasAssociatedWith'].uri): 'association', + URIRef(PROV['wasDerivedFrom'].uri): 'derivation', + URIRef(PROV['wasAttributedTo'].uri): 'attribution', + URIRef(PROV['wasInformedBy'].uri): 'communication', + URIRef(PROV['wasGeneratedBy'].uri): 'generation', + URIRef(PROV['wasInfluencedBy'].uri): 'influence', + URIRef(PROV['wasInvalidatedBy'].uri): 'invalidation', + URIRef(PROV['wasEndedBy'].uri): 'end', + URIRef(PROV['wasStartedBy'].uri): 'start', + URIRef(PROV['hadMember'].uri): 'membership', + URIRef(PROV['used'].uri): 'usage', + } +predicate_mapper = {RDFS.label: pm.PROV['label'], + URIRef(PROV['atLocation'].uri): PROV_LOCATION, + URIRef(PROV['startedAtTime'].uri): PROV_ATTR_STARTTIME, + URIRef(PROV['endedAtTime'].uri): PROV_ATTR_ENDTIME, + URIRef(PROV['atTime'].uri): PROV_ATTR_TIME, + URIRef(PROV['hadRole'].uri): PROV_ROLE, + URIRef(PROV['hadPlan'].uri): pm.PROV_ATTR_PLAN, + URIRef(PROV['hadUsage'].uri): pm.PROV_ATTR_USAGE, + URIRef(PROV['hadGeneration'].uri): pm.PROV_ATTR_GENERATION, + URIRef(PROV['hadActivity'].uri): pm.PROV_ATTR_ACTIVITY, + } + def attr2rdf(attr): return URIRef(PROVONE[PROVONE_ID_ATTRIBUTES_MAP[attr].split('provone:')[1]].uri) -prov.serializers.provrdf.attr2rdf = attr2rdf - - def valid_qualified_name(bundle, value, xsd_qname=False): if value is None: return None @@ -83,10 +108,11 @@ def valid_qualified_name(bundle, value, xsd_qname=False): class ProvONERDFSerializer(Serializer): """ - PROV-O serializer for :class:`~prov.model.ProvDocument` + PROVONE-O serializer for :class:`~provone.ProvONEDocument` """ - def serialize(self, stream=None, rdf_format='trig', **kwargs): + def serialize(self, stream=None, rdf_format='trig', PROV_N_MAP=PROVONE_N_MAP, + **kwargs): """ Serializes a :class:`~prov.model.ProvDocument` instance to `PROV-O `_. @@ -94,7 +120,7 @@ def serialize(self, stream=None, rdf_format='trig', **kwargs): :param stream: Where to save the output. :param rdf_format: The RDF format of the output, default to TRiG. """ - container = self.encode_document(self.document) + container = self.encode_document(self.document, PROV_N_MAP=PROVONE_N_MAP) newargs = kwargs.copy() newargs['format'] = rdf_format @@ -127,7 +153,9 @@ def serialize(self, stream=None, rdf_format='trig', **kwargs): finally: buf.close() - def deserialize(self, stream, rdf_format='trig', **kwargs): + def deserialize(self, stream, rdf_format='trig', + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper, **kwargs): """ Deserialize from the `PROV-O `_ representation to a :class:`~prov.model.ProvDocument` instance. @@ -141,7 +169,9 @@ def deserialize(self, stream, rdf_format='trig', **kwargs): container.parse(stream, **newargs) document = pm.ProvDocument() self.document = document - self.decode_document(container, document) + self.decode_document(container, document, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper) return document def valid_identifier(self, value): @@ -199,15 +229,17 @@ def decode_rdf_representation(self, literal, graph): # simple type, just return it return literal - def encode_document(self, document): + def encode_document(self, document, PROV_N_MAP=PROVONE_N_MAP): container = self.encode_container(document) for item in document.bundles: # encoding the sub-bundle - bundle = self.encode_container(item, identifier=item.identifier.uri) + bundle = self.encode_container(item, identifier=item.identifier.uri, + PROV_N_MAP=PROVONE_N_MAP) container.addN(bundle.quads()) return container - def encode_container(self, bundle, container=None, identifier=None): + def encode_container(self, bundle, PROV_N_MAP=PROV_N_MAP, + container=None, identifier=None): if container is None: container = ConjunctiveGraph(identifier=identifier) nm = container.namespace_manager @@ -395,21 +427,30 @@ def encode_container(self, bundle, container=None, identifier=None): container.add((identifier, pred, obj)) return container - def decode_document(self, content, document): + def decode_document(self, content, document, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper): for prefix, url in content.namespaces(): document.add_namespace(prefix, six.text_type(url)) if hasattr(content, 'contexts'): for graph in content.contexts(): if isinstance(graph.identifier, BNode): - self.decode_container(graph, document) + self.decode_container(graph, document, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper) else: bundle_id = six.text_type(graph.identifier) bundle = document.bundle(bundle_id) - self.decode_container(graph, bundle) + self.decode_container(graph, bundle, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper) else: - self.decode_container(content, document) + self.decode_container(content, document, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper) - def decode_container(self, graph, bundle): + def decode_container(self, graph, bundle, relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper): ids = {} PROV_CLS_MAP = {} formal_attributes = {} diff --git a/nidm/core/tests/test_provone.py b/nidm/core/tests/test_provone.py index 483b3b9e..f5687adb 100644 --- a/nidm/core/tests/test_provone.py +++ b/nidm/core/tests/test_provone.py @@ -1,9 +1,8 @@ -#from nidm.core.provone import ProvONEDocument +from nidm.core.provone import ProvONEDocument from nidm.core import Constants from nidm.core.dot import provone_to_dot import pytest -pytestmark = pytest.mark.skip(reason="had to comment provone import - was breaking tests from experiment") @pytest.fixture(scope="module") def doc(): @@ -34,34 +33,6 @@ def test_ispartof(doc): rdf_format='ttl')) -def test_used(doc): - pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", - "2013-08-21 13:37:53") - dt1 = doc.data('dcterms:identifier:defparam1', {'rdfs:label': "filename", - 'prov:value': "DLEM_NEE_onedeg_v1.0nc", - 'wfms:type': "edu.sci.wfms.basic:File"}) - doc.used(pe1, dt1) - - # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) - - -def test_wasderivedfrom(doc): - - dt1 = doc.data('dcterms:identifier:defparam1', {'rdfs:label': "filename", - 'prov:value': "DLEM_NEE_onedeg_v1.0nc", - 'wfms:type': "edu.sci.wfms.basic:File"}) - dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) - doc.wasDerivedFrom(dt1, dt2) - - # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) - - def test_dataonlink(doc): dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) dl1 = doc.dataLink('dcterms:identifier:e1_e2DL') @@ -72,16 +43,30 @@ def test_dataonlink(doc): doc.dataOnLink(dt2, dl1) -def test_wasgeneratedby(doc): - - dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) - pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", - "2013-08-21 13:37:53") - doc.wasGeneratedBy(dt2, pe1) - # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) +# def test_used(doc): +# pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", +# "2013-08-21 16:37:53") +# dt1 = doc.data('dcterms:identifier:defparam1', {'rdfs:label': "filename", +# 'prov:value': "DLEM_NEE_onedeg_v1.0nc", +# 'wfms:type': "edu.sci.wfms.basic:File"}) +# doc.used(pe1, dt1) +# +# # save a turtle file +# with open("test.ttl", 'w') as f: +# f.write(doc.serialize(format='rdf', +# rdf_format='ttl')) +# +# +# def test_wasgeneratedby(doc): +# +# dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) +# pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", +# "2013-08-21 13:37:53") +# doc.wasGeneratedBy(dt2, pe1) +# # save a turtle file +# with open("test.ttl", 'w') as f: +# f.write(doc.serialize(format='rdf', +# rdf_format='ttl')) def test_wasassociatedwith(doc): @@ -116,13 +101,13 @@ def test_dltoinport(doc): doc.DLToInPort(dl1, i1) -def test_documentserialize(doc): - # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) - - -def test_write_to_dot(doc): - dot = provone_to_dot(doc) - dot.write_png('provone-test.png') +# def test_documentserialize(doc): +# # save a turtle file +# with open("test.ttl", 'w') as f: +# f.write(doc.serialize(format='rdf', +# rdf_format='ttl')) +# +# +# def test_write_to_dot(doc): +# dot = provone_to_dot(doc) +# dot.write_png('provone-test.png')