diff --git a/README.md b/README.md index 35e0e09..a19da64 100644 --- a/README.md +++ b/README.md @@ -506,6 +506,16 @@ To list a specific event type, call the `read_event_type` function with the even event_type = await client.read_event_type("io.eventsourcingdb.library.book-acquired") ``` +### Verifying an Event's Hash + +To verify the integrity of an event, call the `verify_hash` function on the event instance. This recomputes the event's hash locally and compares it to the hash stored in the event. If the hashes differ, the function raises an error: + +```python +event.verify_hash(); +``` + +*Note that this only verifies the hash. If you also want to verify the signature, you can skip this step and call `verifySignature` directly, which performs a hash verification internally.* + ### Using Testcontainers Import the `Container` class, create an instance, call the `start` function to run a test container, get a client, run your test code, and finally call the `stop` function to stop the test container: diff --git a/eventsourcingdb/client.py b/eventsourcingdb/client.py index f8fece0..53d6f5a 100644 --- a/eventsourcingdb/client.py +++ b/eventsourcingdb/client.py @@ -1,3 +1,4 @@ +from collections import OrderedDict from collections.abc import AsyncGenerator from types import TracebackType @@ -124,7 +125,7 @@ async def write_events( response_data = await response.body.read() response_data = bytes.decode(response_data, encoding='utf-8') - response_data = json.loads(response_data) + response_data = json.loads(response_data, object_pairs_hook=OrderedDict) if not isinstance(response_data, list): raise ServerError( diff --git a/eventsourcingdb/event/event.py b/eventsourcingdb/event/event.py index 0611c91..c1d7b9a 100644 --- a/eventsourcingdb/event/event.py +++ b/eventsourcingdb/event/event.py @@ -1,5 +1,7 @@ -from dataclasses import dataclass +from dataclasses import dataclass, field from datetime import datetime +import json +from hashlib import sha256 from typing import Any, TypeVar from ..errors.internal_error import InternalError @@ -17,6 +19,7 @@ class Event: spec_version: str event_id: str time: datetime + _time_from_server: str = field(init=False, repr=False) data_content_type: str predecessor_hash: str hash: str @@ -45,10 +48,10 @@ def parse(unknown_object: dict) -> "Event": if not isinstance(event_id, str): raise ValidationError(f"Failed to parse event_id '{event_id}' to string.") - time_str = unknown_object.get("time") - if not isinstance(time_str, str): - raise ValidationError(f"Failed to parse time '{time_str}' to string.") - time = Event.__parse_time(time_str) + time_from_server = unknown_object.get("time") + if not isinstance(time_from_server, str): + raise ValidationError(f"Failed to parse time '{time_from_server}' to string.") + time = Event.__parse_time(time_from_server) data_content_type = unknown_object.get("datacontenttype") if not isinstance(data_content_type, str): @@ -79,7 +82,7 @@ def parse(unknown_object: dict) -> "Event": if not isinstance(data, dict): raise ValidationError(f"Failed to parse data '{data}' to object.") - return Event( + event = Event( data=data, source=source, subject=subject, @@ -93,6 +96,39 @@ def parse(unknown_object: dict) -> "Event": trace_parent=trace_parent, trace_state=trace_state, ) + event._time_from_server = time_from_server + + return event + + def verify_hash(self) -> None: + metadata = "|".join([ + self.spec_version, + self.event_id, + self.predecessor_hash, + self._time_from_server, + self.source, + self.subject, + self.type, + self.data_content_type, + ]) + + metadata_bytes = metadata.encode("utf-8") + data_bytes = json.dumps( + self.data, + separators=(',', ':'), + indent=None, + ).encode("utf-8") + + metadata_hash = sha256(metadata_bytes).hexdigest() + data_hash = sha256(data_bytes).hexdigest() + + final_hash = sha256() + final_hash.update(metadata_hash.encode("utf-8")) + final_hash.update(data_hash.encode("utf-8")) + final_hash_hex = final_hash.hexdigest() + + if final_hash_hex != self.hash: + raise ValidationError("Failed to verify hash.") def to_json(self) -> dict[str, Any]: json = { @@ -117,17 +153,17 @@ def to_json(self) -> dict[str, Any]: return json @staticmethod - def __parse_time(time_str: str) -> datetime: - if not isinstance(time_str, str): - raise ValidationError(f"Failed to parse time '{time_str}' to datetime.") + def __parse_time(time_from_server: str) -> datetime: + if not isinstance(time_from_server, str): + raise ValidationError(f"Failed to parse time '{time_from_server}' to datetime.") - rest, sub_seconds = time_str.split(".") + rest, sub_seconds = time_from_server.split(".") sub_seconds = f"{sub_seconds[:6]:06}" try: return datetime.fromisoformat(f"{rest}.{sub_seconds}") except ValueError as value_error: raise ValidationError( - f"Failed to parse time '{time_str}' to datetime." + f"Failed to parse time '{time_from_server}' to datetime." ) from value_error except Exception as other_error: raise InternalError(str(other_error)) from other_error diff --git a/tests/event/__init__.py b/tests/event/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/tests/event/test_verify_hash.py b/tests/event/test_verify_hash.py new file mode 100644 index 0000000..3f1435e --- /dev/null +++ b/tests/event/test_verify_hash.py @@ -0,0 +1,58 @@ +import pytest + +from eventsourcingdb import EventCandidate +from eventsourcingdb.errors.validation_error import ValidationError +from hashlib import sha256 + +from ..conftest import TestData +from ..shared.database import Database + + +class TestVerifyHash: + @staticmethod + @pytest.mark.asyncio + async def test_verifies_the_event_hash( + database: Database, + test_data: TestData, + ) -> None: + client = database.get_client() + + written_events = await client.write_events( + [ + EventCandidate( + source=test_data.TEST_SOURCE_STRING, subject="/test", type="io.eventsourcingdb.test", data={"value": 23} + ) + ], + ) + + assert len(written_events) == 1 + + written_event = written_events[0] + written_event.verify_hash() + + @staticmethod + @pytest.mark.asyncio + async def test_fails_if_the_event_hash_is_invalid( + database: Database, + test_data: TestData, + ) -> None: + client = database.get_client() + + written_events = await client.write_events( + [ + EventCandidate( + source=test_data.TEST_SOURCE_STRING, subject="/test", type="io.eventsourcingdb.test", data={"value": 23} + ) + ], + ) + + assert len(written_events) == 1 + + written_event = written_events[0] + + invalid_hash_data = "invalid data".encode("utf-8") + invalid_hash = sha256(invalid_hash_data).hexdigest() + written_event.hash = invalid_hash + + with pytest.raises(ValidationError): + written_event.verify_hash()