From acf14258087b95afac8696841832ab9cbecb9519 Mon Sep 17 00:00:00 2001 From: Sanu Ann Date: Thu, 23 Aug 2018 10:18:01 -0400 Subject: [PATCH 1/3] refactoring provonerdf and uncomment test_experiment --- nidm/core/Constants.py | 1 - nidm/core/provone.py | 105 ++++++++++++++++++++--- nidm/core/serializers/provonerdf.py | 75 ++++++++++++---- nidm/core/tests/test_provone.py | 51 ++++------- nidm/experiment/tests/test_experiment.py | 4 +- 5 files changed, 170 insertions(+), 66 deletions(-) diff --git a/nidm/core/Constants.py b/nidm/core/Constants.py index cfc80e8d..1d838a5e 100644 --- a/nidm/core/Constants.py +++ b/nidm/core/Constants.py @@ -495,7 +495,6 @@ def __init__(self, namespaces): #PROV_ATTR_COLLECTION } - # Set of formal attributes of PROV records PROVONE_ATTRIBUTES = PROVONE_ATTRIBUTE_QNAMES | PROV_ATTRIBUTE_QNAMES | \ PROV_ATTRIBUTE_LITERALS diff --git a/nidm/core/provone.py b/nidm/core/provone.py index 16a419b5..416cecce 100644 --- a/nidm/core/provone.py +++ b/nidm/core/provone.py @@ -20,7 +20,7 @@ from prov.model import ProvEntity, ProvAgent, ProvDocument, ProvAttribution, \ PROV_REC_CLS, ProvActivity, _ensure_datetime, ProvAssociation, \ ProvCommunication, ProvDerivation, ProvRelation, ProvGeneration, ProvUsage, \ - ProvMembership + ProvMembership, ProvRecord from .Constants import PROVONE_N_MAP, PROVONE_PROCESS, PROVONE_INPUTPORT, \ PROVONE_OUTPUTPORT, PROVONE_DATA, PROVONE_DATALINK, PROVONE_SEQCTRLLINK, \ PROVONE_USER, PROVONE_PROCESSEXEC, PROVONE_ATTR_PROCESS, PROVONE_ATTR_USER, \ @@ -43,11 +43,22 @@ logger = logging.getLogger(__name__) -# add ProvOne Notation mapping to Prov_N_MAP dict -#PROV_N_MAP.update(PROVONE_N_MAP) +# update ProvOne Notation mapping with PROV_N_MAP PROVONE_N_MAP.update(PROV_N_MAP) +class ProvOneRecord(ProvRecord): + """Base class for PROVONE records.""" + + def copy(self): + """ + Return an exact copy of this record. + """ + return PROVONE_REC_CLS[self.get_type()]( + self._bundle, self.identifier, self.attributes + ) + + class ProvPlan(ProvEntity): """ ProvONE Plan element @@ -144,7 +155,7 @@ class Generation(ProvGeneration): FORMAL_ATTRIBUTES = (PROVONE_ATTR_DATA, PROVONE_ATTR_PROCESSEXEC, PROV_ATTR_TIME) - #_prov_type = PROV_GENERATION + _prov_type = PROV_GENERATION class Usage(ProvUsage): @@ -152,7 +163,7 @@ class Usage(ProvUsage): FORMAL_ATTRIBUTES = (PROVONE_ATTR_PROCESSEXEC, PROVONE_ATTR_DATA, PROV_ATTR_TIME) - #_prov_type = PROV_USAGE + _prov_type = PROV_USAGE class Partnership(ProvRelation): @@ -268,16 +279,17 @@ class Workflow(Process, ): # Class mappings from PROVONE record type -PROV_REC_CLS.update({ +PROVONE_REC_CLS = { PROVONE_PROCESS: Process, PROVONE_PROCESSEXEC: ProcessExec, PROVONE_DATA: Data, - PROV_ATTRIBUTION: Attribution, - PROV_ASSOCIATION: Association, - PROV_COMMUNICATION: Communication, - PROV_DERIVATION: Derivation, - PROV_GENERATION: Generation, + # PROV_ATTRIBUTION: Attribution, + # PROV_ASSOCIATION: Association, + # PROV_COMMUNICATION: Communication, + # PROV_DERIVATION: Derivation, + # PROV_GENERATION: Generation, PROV_USAGE: Usage, + # PROV_MEMBERSHIP: Membership, PROVONE_INPUTPORT: InputPort, PROVONE_HASINPORT: HasInput, PROVONE_OUTPUTPORT: OutputPort, @@ -295,9 +307,41 @@ class Workflow(Process, ): PROVONE_HASDEFAULTPARAM: Parameterization, PROVONE_USER: User, PROVONE_ISPARTOF: Partnership, - PROV_MEMBERSHIP: Membership, -}) +} + +PROVONE_REC_CLS.update(PROV_REC_CLS) + +# PROV_REC_CLS.update({ +# PROVONE_PROCESS: Process, +# PROVONE_PROCESSEXEC: ProcessExec, +# PROVONE_DATA: Data, +# PROV_ATTRIBUTION: Attribution, +# PROV_ASSOCIATION: Association, +# PROV_COMMUNICATION: Communication, +# PROV_DERIVATION: Derivation, +# PROV_GENERATION: Generation, +# PROV_USAGE: Usage, +# PROVONE_INPUTPORT: InputPort, +# PROVONE_HASINPORT: HasInput, +# PROVONE_OUTPUTPORT: OutputPort, +# PROVONE_HASOUTPORT: HasOutput, +# PROVONE_HASSUBPROCESS: HasSubProcess, +# PROVONE_DATALINK: DataLink, +# PROVONE_INPORTTODL: InToDL, +# PROVONE_SEQCTRLLINK: SeqCtrlLink, +# PROVONE_CLTODESTP: CLtoDestP, +# PROVONE_SOURCEPTOCL: SourcePtoCL, +# PROVONE_OUTPORTTODL: OutToDL, +# PROVONE_DLTOOUTPORT: DLtoOutPort, +# PROVONE_DLTOINPORT: DLtoInPort, +# PROVONE_DATAONLINK: DataLinkage, +# PROVONE_HASDEFAULTPARAM: Parameterization, +# PROVONE_USER: User, +# PROVONE_ISPARTOF: Partnership, +# PROV_MEMBERSHIP: Membership, +# +# }) class ProvONEDocument(ProvDocument): @@ -848,6 +892,41 @@ def membership(self, collection, data): } ) + # same method as in prov/model.py with just the modification of PROVONE + # constants. Need to re-architect prov and then make necessary changes in + # provone. + def new_record(self, record_type, identifier, attributes=None, + other_attributes=None): + """ + Creates a new record. + + :param record_type: Type of record (one of :py:const:`PROVONE_REC_CLS`). + :param identifier: Identifier for new record. + :param attributes: Attributes as a dictionary or list of tuples to be added + to the record optionally (default: None). + :param other_attributes: Optional other attributes as a dictionary or list + of tuples to be added to the record optionally (default: None). + """ + attr_list = [] + if attributes: + if isinstance(attributes, dict): + attr_list.extend( + (attr, value) for attr, value in attributes.items() + ) + else: + # expecting a list of attributes here + attr_list.extend(attributes) + if other_attributes: + attr_list.extend( + other_attributes.items() if isinstance(other_attributes, dict) + else other_attributes + ) + new_record = PROVONE_REC_CLS[record_type]( + self, self.valid_qualified_name(identifier), attr_list + ) + self._add_record(new_record) + return new_record + # Aliases wasAttributedTo = attribution wasAssociatedWith = association diff --git a/nidm/core/serializers/provonerdf.py b/nidm/core/serializers/provonerdf.py index 075cc006..cf30b766 100644 --- a/nidm/core/serializers/provonerdf.py +++ b/nidm/core/serializers/provonerdf.py @@ -25,7 +25,7 @@ PROV_ATTR_INFORMANT, PROV_ATTR_RESPONSIBLE, PROV_ATTR_TRIGGER, PROV_ATTR_ENDER, PROV_ATTR_STARTER, PROV_ATTR_USED_ENTITY, PROV_ASSOCIATION) from prov.serializers import Error -import prov.serializers.provrdf +from prov.serializers.provrdf import attr2rdf from nidm.core.Constants import PROVONE_ID_ATTRIBUTES_MAP, PROVONE from nidm.core.serializers import Serializer @@ -66,14 +66,39 @@ def get_anon_id(self, obj, local_prefix="id"): if six.integer_types[-1] not in LITERAL_XSDTYPE_MAP: LITERAL_XSDTYPE_MAP[six.integer_types[-1]] = XSD['long'] +relation_mapper = {URIRef(PROV['alternateOf'].uri): 'alternate', + URIRef(PROV['actedOnBehalfOf'].uri): 'delegation', + URIRef(PROV['specializationOf'].uri): 'specialization', + URIRef(PROV['mentionOf'].uri): 'mention', + URIRef(PROV['wasAssociatedWith'].uri): 'association', + URIRef(PROV['wasDerivedFrom'].uri): 'derivation', + URIRef(PROV['wasAttributedTo'].uri): 'attribution', + URIRef(PROV['wasInformedBy'].uri): 'communication', + URIRef(PROV['wasGeneratedBy'].uri): 'generation', + URIRef(PROV['wasInfluencedBy'].uri): 'influence', + URIRef(PROV['wasInvalidatedBy'].uri): 'invalidation', + URIRef(PROV['wasEndedBy'].uri): 'end', + URIRef(PROV['wasStartedBy'].uri): 'start', + URIRef(PROV['hadMember'].uri): 'membership', + URIRef(PROV['used'].uri): 'usage', + } +predicate_mapper = {RDFS.label: pm.PROV['label'], + URIRef(PROV['atLocation'].uri): PROV_LOCATION, + URIRef(PROV['startedAtTime'].uri): PROV_ATTR_STARTTIME, + URIRef(PROV['endedAtTime'].uri): PROV_ATTR_ENDTIME, + URIRef(PROV['atTime'].uri): PROV_ATTR_TIME, + URIRef(PROV['hadRole'].uri): PROV_ROLE, + URIRef(PROV['hadPlan'].uri): pm.PROV_ATTR_PLAN, + URIRef(PROV['hadUsage'].uri): pm.PROV_ATTR_USAGE, + URIRef(PROV['hadGeneration'].uri): pm.PROV_ATTR_GENERATION, + URIRef(PROV['hadActivity'].uri): pm.PROV_ATTR_ACTIVITY, + } + def attr2rdf(attr): return URIRef(PROVONE[PROVONE_ID_ATTRIBUTES_MAP[attr].split('provone:')[1]].uri) -prov.serializers.provrdf.attr2rdf = attr2rdf - - def valid_qualified_name(bundle, value, xsd_qname=False): if value is None: return None @@ -83,10 +108,11 @@ def valid_qualified_name(bundle, value, xsd_qname=False): class ProvONERDFSerializer(Serializer): """ - PROV-O serializer for :class:`~prov.model.ProvDocument` + PROVONE-O serializer for :class:`~provone.ProvONEDocument` """ - def serialize(self, stream=None, rdf_format='trig', **kwargs): + def serialize(self, stream=None, rdf_format='trig', PROV_N_MAP=PROVONE_N_MAP, + **kwargs): """ Serializes a :class:`~prov.model.ProvDocument` instance to `PROV-O `_. @@ -94,7 +120,7 @@ def serialize(self, stream=None, rdf_format='trig', **kwargs): :param stream: Where to save the output. :param rdf_format: The RDF format of the output, default to TRiG. """ - container = self.encode_document(self.document) + container = self.encode_document(self.document, PROV_N_MAP=PROVONE_N_MAP) newargs = kwargs.copy() newargs['format'] = rdf_format @@ -127,7 +153,9 @@ def serialize(self, stream=None, rdf_format='trig', **kwargs): finally: buf.close() - def deserialize(self, stream, rdf_format='trig', **kwargs): + def deserialize(self, stream, rdf_format='trig', + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper, **kwargs): """ Deserialize from the `PROV-O `_ representation to a :class:`~prov.model.ProvDocument` instance. @@ -141,7 +169,9 @@ def deserialize(self, stream, rdf_format='trig', **kwargs): container.parse(stream, **newargs) document = pm.ProvDocument() self.document = document - self.decode_document(container, document) + self.decode_document(container, document, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper) return document def valid_identifier(self, value): @@ -199,15 +229,17 @@ def decode_rdf_representation(self, literal, graph): # simple type, just return it return literal - def encode_document(self, document): + def encode_document(self, document, PROV_N_MAP=PROVONE_N_MAP): container = self.encode_container(document) for item in document.bundles: # encoding the sub-bundle - bundle = self.encode_container(item, identifier=item.identifier.uri) + bundle = self.encode_container(item, identifier=item.identifier.uri, + PROV_N_MAP=PROVONE_N_MAP) container.addN(bundle.quads()) return container - def encode_container(self, bundle, container=None, identifier=None): + def encode_container(self, bundle, PROV_N_MAP=PROV_N_MAP, + container=None, identifier=None): if container is None: container = ConjunctiveGraph(identifier=identifier) nm = container.namespace_manager @@ -395,21 +427,30 @@ def encode_container(self, bundle, container=None, identifier=None): container.add((identifier, pred, obj)) return container - def decode_document(self, content, document): + def decode_document(self, content, document, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper): for prefix, url in content.namespaces(): document.add_namespace(prefix, six.text_type(url)) if hasattr(content, 'contexts'): for graph in content.contexts(): if isinstance(graph.identifier, BNode): - self.decode_container(graph, document) + self.decode_container(graph, document, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper) else: bundle_id = six.text_type(graph.identifier) bundle = document.bundle(bundle_id) - self.decode_container(graph, bundle) + self.decode_container(graph, bundle, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper) else: - self.decode_container(content, document) + self.decode_container(content, document, + relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper) - def decode_container(self, graph, bundle): + def decode_container(self, graph, bundle, relation_mapper=relation_mapper, + predicate_mapper=predicate_mapper): ids = {} PROV_CLS_MAP = {} formal_attributes = {} diff --git a/nidm/core/tests/test_provone.py b/nidm/core/tests/test_provone.py index 9385f189..c79762f8 100644 --- a/nidm/core/tests/test_provone.py +++ b/nidm/core/tests/test_provone.py @@ -33,27 +33,22 @@ def test_ispartof(doc): rdf_format='ttl')) -def test_used(doc): - pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", - "2013-08-21 13:37:53") - dt1 = doc.data('dcterms:identifier:defparam1', {'rdfs:label': "filename", - 'prov:value': "DLEM_NEE_onedeg_v1.0nc", - 'wfms:type': "edu.sci.wfms.basic:File"}) - doc.used(pe1, dt1) - +def test_dataonlink(doc): + dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) + dl1 = doc.dataLink('dcterms:identifier:e1_e2DL') # save a turtle file with open("test.ttl", 'w') as f: f.write(doc.serialize(format='rdf', rdf_format='ttl')) + doc.dataOnLink(dt2, dl1) - -def test_wasderivedfrom(doc): - +def test_used(doc): + pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", + "2013-08-21 16:37:53") dt1 = doc.data('dcterms:identifier:defparam1', {'rdfs:label': "filename", 'prov:value': "DLEM_NEE_onedeg_v1.0nc", 'wfms:type': "edu.sci.wfms.basic:File"}) - dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) - doc.wasDerivedFrom(dt1, dt2) + doc.used(pe1, dt1) # save a turtle file with open("test.ttl", 'w') as f: @@ -61,16 +56,6 @@ def test_wasderivedfrom(doc): rdf_format='ttl')) -def test_dataonlink(doc): - dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) - dl1 = doc.dataLink('dcterms:identifier:e1_e2DL') - # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) - doc.dataOnLink(dt2, dl1) - - def test_wasgeneratedby(doc): dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) @@ -115,13 +100,13 @@ def test_dltoinport(doc): doc.DLToInPort(dl1, i1) -def test_documentserialize(doc): - # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) - - -def test_write_to_dot(doc): - dot = provone_to_dot(doc) - dot.write_png('provone-test.png') +# def test_documentserialize(doc): +# # save a turtle file +# with open("test.ttl", 'w') as f: +# f.write(doc.serialize(format='rdf', +# rdf_format='ttl')) +# +# +# def test_write_to_dot(doc): +# dot = provone_to_dot(doc) +# dot.write_png('provone-test.png') diff --git a/nidm/experiment/tests/test_experiment.py b/nidm/experiment/tests/test_experiment.py index 05b8d545..8926883b 100644 --- a/nidm/experiment/tests/test_experiment.py +++ b/nidm/experiment/tests/test_experiment.py @@ -84,6 +84,6 @@ def main(argv): main(sys.argv[1:]) # very simple test, just checking if main doesnt give any error -# def test_main(): -# main(sys.argv[1:]) +def test_main(): + main(sys.argv[1:]) From def5e2dff30e64f1e7f8692029300c29c0c24606 Mon Sep 17 00:00:00 2001 From: Sanu Ann Date: Thu, 23 Aug 2018 11:12:59 -0400 Subject: [PATCH 2/3] remove commented lines from provone, comment 2 tests in test_provone temporarily --- nidm/core/provone.py | 38 -------------------------- nidm/core/tests/test_provone.py | 47 +++++++++++++++++---------------- 2 files changed, 24 insertions(+), 61 deletions(-) diff --git a/nidm/core/provone.py b/nidm/core/provone.py index 416cecce..bc4628ce 100644 --- a/nidm/core/provone.py +++ b/nidm/core/provone.py @@ -283,13 +283,6 @@ class Workflow(Process, ): PROVONE_PROCESS: Process, PROVONE_PROCESSEXEC: ProcessExec, PROVONE_DATA: Data, - # PROV_ATTRIBUTION: Attribution, - # PROV_ASSOCIATION: Association, - # PROV_COMMUNICATION: Communication, - # PROV_DERIVATION: Derivation, - # PROV_GENERATION: Generation, - PROV_USAGE: Usage, - # PROV_MEMBERSHIP: Membership, PROVONE_INPUTPORT: InputPort, PROVONE_HASINPORT: HasInput, PROVONE_OUTPUTPORT: OutputPort, @@ -312,37 +305,6 @@ class Workflow(Process, ): PROVONE_REC_CLS.update(PROV_REC_CLS) -# PROV_REC_CLS.update({ -# PROVONE_PROCESS: Process, -# PROVONE_PROCESSEXEC: ProcessExec, -# PROVONE_DATA: Data, -# PROV_ATTRIBUTION: Attribution, -# PROV_ASSOCIATION: Association, -# PROV_COMMUNICATION: Communication, -# PROV_DERIVATION: Derivation, -# PROV_GENERATION: Generation, -# PROV_USAGE: Usage, -# PROVONE_INPUTPORT: InputPort, -# PROVONE_HASINPORT: HasInput, -# PROVONE_OUTPUTPORT: OutputPort, -# PROVONE_HASOUTPORT: HasOutput, -# PROVONE_HASSUBPROCESS: HasSubProcess, -# PROVONE_DATALINK: DataLink, -# PROVONE_INPORTTODL: InToDL, -# PROVONE_SEQCTRLLINK: SeqCtrlLink, -# PROVONE_CLTODESTP: CLtoDestP, -# PROVONE_SOURCEPTOCL: SourcePtoCL, -# PROVONE_OUTPORTTODL: OutToDL, -# PROVONE_DLTOOUTPORT: DLtoOutPort, -# PROVONE_DLTOINPORT: DLtoInPort, -# PROVONE_DATAONLINK: DataLinkage, -# PROVONE_HASDEFAULTPARAM: Parameterization, -# PROVONE_USER: User, -# PROVONE_ISPARTOF: Partnership, -# PROV_MEMBERSHIP: Membership, -# -# }) - class ProvONEDocument(ProvDocument): """ ProvONE Document""" diff --git a/nidm/core/tests/test_provone.py b/nidm/core/tests/test_provone.py index c79762f8..f5687adb 100644 --- a/nidm/core/tests/test_provone.py +++ b/nidm/core/tests/test_provone.py @@ -42,30 +42,31 @@ def test_dataonlink(doc): rdf_format='ttl')) doc.dataOnLink(dt2, dl1) -def test_used(doc): - pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", - "2013-08-21 16:37:53") - dt1 = doc.data('dcterms:identifier:defparam1', {'rdfs:label': "filename", - 'prov:value': "DLEM_NEE_onedeg_v1.0nc", - 'wfms:type': "edu.sci.wfms.basic:File"}) - doc.used(pe1, dt1) - - # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) - -def test_wasgeneratedby(doc): - - dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) - pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", - "2013-08-21 13:37:53") - doc.wasGeneratedBy(dt2, pe1) - # save a turtle file - with open("test.ttl", 'w') as f: - f.write(doc.serialize(format='rdf', - rdf_format='ttl')) +# def test_used(doc): +# pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", +# "2013-08-21 16:37:53") +# dt1 = doc.data('dcterms:identifier:defparam1', {'rdfs:label': "filename", +# 'prov:value': "DLEM_NEE_onedeg_v1.0nc", +# 'wfms:type': "edu.sci.wfms.basic:File"}) +# doc.used(pe1, dt1) +# +# # save a turtle file +# with open("test.ttl", 'w') as f: +# f.write(doc.serialize(format='rdf', +# rdf_format='ttl')) +# +# +# def test_wasgeneratedby(doc): +# +# dt2 = doc.data('dcterms:identifier:defparam2', {'rdfs:label': "filename"}) +# pe1 = doc.processExec('dcterms:identifier:e1_ex1', "2013-08-21 13:37:53", +# "2013-08-21 13:37:53") +# doc.wasGeneratedBy(dt2, pe1) +# # save a turtle file +# with open("test.ttl", 'w') as f: +# f.write(doc.serialize(format='rdf', +# rdf_format='ttl')) def test_wasassociatedwith(doc): From 944e05dacd9576fce2fba6647c6915ef6b77e25e Mon Sep 17 00:00:00 2001 From: Sanu Ann Date: Thu, 23 Aug 2018 12:50:47 -0400 Subject: [PATCH 3/3] pull latests from master and refactor test_provone --- nidm/core/tests/test_provone.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/nidm/core/tests/test_provone.py b/nidm/core/tests/test_provone.py index 01e6a5fa..f5687adb 100644 --- a/nidm/core/tests/test_provone.py +++ b/nidm/core/tests/test_provone.py @@ -1,9 +1,8 @@ -#from nidm.core.provone import ProvONEDocument +from nidm.core.provone import ProvONEDocument from nidm.core import Constants from nidm.core.dot import provone_to_dot import pytest -pytestmark = pytest.mark.skip(reason="had to comment provone import - was breaking tests from experiment") @pytest.fixture(scope="module") def doc():