diff --git a/ddtrace/llmobs/_experiment.py b/ddtrace/llmobs/_experiment.py
index 7bfb80c0ea5..d31a4680626 100644
--- a/ddtrace/llmobs/_experiment.py
+++ b/ddtrace/llmobs/_experiment.py
@@ -107,6 +107,7 @@ class Dataset:
     _id: str
     _records: List[DatasetRecord]
     _version: int
+    _latest_version: int
     _dne_client: "LLMObsExperimentsClient"
     _new_records_by_record_id: Dict[str, DatasetRecordRaw]
     _updated_record_ids_to_new_fields: Dict[str, UpdatableDatasetRecord]
@@ -121,6 +122,7 @@ def __init__(
         dataset_id: str,
         records: List[DatasetRecord],
         description: str,
+        latest_version: int,
         version: int,
         _dne_client: "LLMObsExperimentsClient",
     ) -> None:
@@ -128,6 +130,7 @@ def __init__(
         self.project = project
         self.description = description
         self._id = dataset_id
+        self._latest_version = latest_version
         self._version = version
         self._dne_client = _dne_client
         self._records = records
@@ -168,7 +171,10 @@ def push(self) -> None:
                 record["record_id"] = record_id  # type: ignore
 
             # FIXME: we don't get version numbers in responses to deletion requests
-            self._version = new_version if new_version != -1 else self._version + 1
+            self._latest_version = new_version if new_version != -1 else self._latest_version + 1
+            # no matter what the version was before the push, pushing will result in the dataset being on the current
+            # version tracked by the backend
+            self._version = self._latest_version
         self._new_records_by_record_id = {}
         self._deleted_record_ids = []
         self._updated_record_ids_to_new_fields = {}
@@ -225,6 +231,14 @@ def url(self) -> str:
         # FIXME: will not work for subdomain orgs
         return f"{_get_base_url()}/llm/datasets/{self._id}"
 
+    @property
+    def latest_version(self) -> int:
+        return self._latest_version
+
+    @property
+    def version(self) -> int:
+        return self._version
+
     def _estimate_delta_size(self) -> int:
         """rough estimate (in bytes) of the size of the next batch update call if it happens"""
         size = len(safe_json(self._new_records_by_record_id)) + len(safe_json(self._updated_record_ids_to_new_fields))
@@ -434,6 +448,7 @@ def _run_task(self, jobs: int, raise_errors: bool = False, sample_size: Optional
                 dataset_id=self._dataset._id,
                 records=subset_records,
                 description=self._dataset.description,
+                latest_version=self._dataset._latest_version,
                 version=self._dataset._version,
                 _dne_client=self._dataset._dne_client,
             )
diff --git a/ddtrace/llmobs/_llmobs.py b/ddtrace/llmobs/_llmobs.py
index 457ead64212..2a342e27889 100644
--- a/ddtrace/llmobs/_llmobs.py
+++ b/ddtrace/llmobs/_llmobs.py
@@ -669,8 +669,12 @@ def _on_asyncio_execute_task(self, task_data: Dict[str, Any]) -> None:
             self._llmobs_context_provider.activate(llmobs_ctx)
 
     @classmethod
-    def pull_dataset(cls, dataset_name: str, project_name: Optional[str] = None) -> Dataset:
-        ds = cls._instance._dne_client.dataset_get_with_records(dataset_name, (project_name or cls._project_name))
+    def pull_dataset(
+        cls, dataset_name: str, project_name: Optional[str] = None, version: Optional[int] = None
+    ) -> Dataset:
+        ds = cls._instance._dne_client.dataset_get_with_records(
+            dataset_name, (project_name or cls._project_name), version
+        )
         return ds
 
     @classmethod
diff --git a/ddtrace/llmobs/_writer.py b/ddtrace/llmobs/_writer.py
index e5f4cbcf333..1f70c0c3bae 100644
--- a/ddtrace/llmobs/_writer.py
+++ b/ddtrace/llmobs/_writer.py
@@ -11,6 +11,7 @@
 from typing import TypedDict
 from typing import Union
 from typing import cast
+import urllib
 from urllib.parse import quote
 from urllib.parse import urlparse
 
@@ -400,7 +401,16 @@ def dataset_create(
         if dataset_id is None or dataset_id == "":
             raise ValueError(f"unexpected dataset state, invalid ID (is None: {dataset_id is None})")
         curr_version = response_data["data"]["attributes"]["current_version"]
-        return Dataset(dataset_name, project, dataset_id, [], description, curr_version, _dne_client=self)
+        return Dataset(
+            name=dataset_name,
+            project=project,
+            dataset_id=dataset_id,
+            records=[],
+            description=description,
+            latest_version=curr_version,
+            version=curr_version,
+            _dne_client=self,
+        )
 
     @staticmethod
     def _get_record_json(record: Union[UpdatableDatasetRecord, DatasetRecordRaw], is_update: bool) -> JSONType:
@@ -458,10 +468,14 @@ def dataset_batch_update(
         new_record_ids: List[str] = [r["id"] for r in data] if data else []
         return new_version, new_record_ids
 
-    def dataset_get_with_records(self, dataset_name: str, project_name: Optional[str] = None) -> Dataset:
+    def dataset_get_with_records(
+        self, dataset_name: str, project_name: Optional[str] = None, version: Optional[int] = None
+    ) -> Dataset:
         project = self.project_create_or_get(project_name)
         project_id = project.get("_id")
-        logger.debug("getting records with project ID %s for %s", project_id, project_name)
+        logger.debug(
+            "getting records with project ID %s for %s, version: %s", project_id, project_name, str(version) or "latest"
+        )
 
         path = f"/api/unstable/llm-obs/v1/{project_id}/datasets?filter[name]={quote(dataset_name)}"
         resp = self.request("GET", path)
@@ -480,11 +494,17 @@ def dataset_get_with_records(self, dataset_name: str, project_name: Optional[str
         dataset_id = data[0]["id"]
 
         list_base_path = f"/api/unstable/llm-obs/v1/datasets/{dataset_id}/records"
+
         has_next_page = True
         class_records: List[DatasetRecord] = []
-        list_path = list_base_path
         page_num = 0
+        url_options = {}
         while has_next_page:
+            if version:
+                url_options["filter[version]"] = version
+
+            list_path = f"{list_base_path}?{urllib.parse.urlencode(url_options, safe='[]')}"
+            logger.debug("list records page %d, request path=%s", page_num, list_path)
             resp = self.request("GET", list_path, timeout=self.LIST_RECORDS_TIMEOUT)
             if resp.status != 200:
                 raise ValueError(
@@ -504,14 +524,22 @@ def dataset_get_with_records(self, dataset_name: str, project_name: Optional[str
                     }
                 )
             next_cursor = records_data.get("meta", {}).get("after")
+
+            url_options = {}
             has_next_page = False
             if next_cursor:
                 has_next_page = True
-                list_path = f"{list_base_path}?page[cursor]={next_cursor}"
-                logger.debug("next list records request path %s", list_path)
+                url_options["page[cursor]"] = next_cursor
                 page_num += 1
         return Dataset(
-            dataset_name, project, dataset_id, class_records, dataset_description, curr_version, _dne_client=self
+            name=dataset_name,
+            project=project,
+            dataset_id=dataset_id,
+            records=class_records,
+            description=dataset_description,
+            latest_version=curr_version,
+            version=version or curr_version,
+            _dne_client=self,
         )
 
     def dataset_bulk_upload(self, dataset_id: str, records: List[DatasetRecord]):
diff --git a/releasenotes/notes/llmobs-dne-allow-versioned-dataset-pull-c7017f982b2c1f5b.yaml b/releasenotes/notes/llmobs-dne-allow-versioned-dataset-pull-c7017f982b2c1f5b.yaml
new file mode 100644
index 00000000000..f1f32a86735
--- /dev/null
+++ b/releasenotes/notes/llmobs-dne-allow-versioned-dataset-pull-c7017f982b2c1f5b.yaml
@@ -0,0 +1,8 @@
+---
+features:
+  - |
+    LLM Observability: Previous dataset versions can be optionally pulled by passing the ``version`` 
+    argument to ``LLMObs.pull_dataset``
+  - |
+    LLM Observability: Datasets have new properties ``version`` and ``latest_version`` to provide information on the
+    version of the dataset that is being worked with and the latest global version of the dataset, respectively
\ No newline at end of file
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_0bb93ae7-43c4-48ff-91e4-d2817fee85fe_batch_update_post_aa45718c.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_0bb93ae7-43c4-48ff-91e4-d2817fee85fe_batch_update_post_aa45718c.yaml
new file mode 100644
index 00000000000..b750ac7724c
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_0bb93ae7-43c4-48ff-91e4-d2817fee85fe_batch_update_post_aa45718c.yaml
@@ -0,0 +1,48 @@
+interactions:
+- request:
+    body: '{"data": {"type": "datasets", "id": "0bb93ae7-43c4-48ff-91e4-d2817fee85fe",
+      "attributes": {"insert_records": [{"input": {"prompt": "What is the capital
+      of France?"}, "expected_output": {"answer": "Paris"}, "metadata": null}], "update_records":
+      [], "delete_records": []}}}'
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '271'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/0bb93ae7-43c4-48ff-91e4-d2817fee85fe/batch_update
+  response:
+    body:
+      string: '{"data":[{"id":"eaadecb4-836e-49b3-8390-212b3fffb60b","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:26:24.929416376Z","dataset_id":"0bb93ae7-43c4-48ff-91e4-d2817fee85fe","expected_output":{"answer":"Paris"},"input":{"prompt":"What
+        is the capital of France?"},"updated_at":"2025-10-21T18:26:24.929416376Z","version":1}}]}'
+    headers:
+      content-length:
+      - '389'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:26:24 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_0bb93ae7-43c4-48ff-91e4-d2817fee85fe_records_filter_version__420_get_0060e684.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_0bb93ae7-43c4-48ff-91e4-d2817fee85fe_records_filter_version__420_get_0060e684.yaml
new file mode 100644
index 00000000000..3de4ae57834
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_0bb93ae7-43c4-48ff-91e4-d2817fee85fe_records_filter_version__420_get_0060e684.yaml
@@ -0,0 +1,46 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Length
+      : - '0'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: GET
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/0bb93ae7-43c4-48ff-91e4-d2817fee85fe/records?filter%5Bversion%5D=420
+  response:
+    body:
+      string: '{"errors":[{"title":"Generic Error","detail":"invalid version: version
+        is greater than the current version or negative"}]}'
+    headers:
+      content-length:
+      - '122'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:26:27 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 400
+      message: Bad Request
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_batch_update_post_0226e07c.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_batch_update_post_0226e07c.yaml
new file mode 100644
index 00000000000..1db2779735a
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_batch_update_post_0226e07c.yaml
@@ -0,0 +1,48 @@
+interactions:
+- request:
+    body: '{"data": {"type": "datasets", "id": "4607e918-094d-4aa9-8b7a-50fa63a95b56",
+      "attributes": {"insert_records": [{"input": {"prompt": "What is the capital
+      of France?"}, "expected_output": {"answer": "Paris"}, "metadata": null}], "update_records":
+      [], "delete_records": []}}}'
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '271'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/4607e918-094d-4aa9-8b7a-50fa63a95b56/batch_update
+  response:
+    body:
+      string: '{"data":[{"id":"93328f7a-bfd2-4672-8b94-76b0698cd754","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:25:23.855004356Z","dataset_id":"4607e918-094d-4aa9-8b7a-50fa63a95b56","expected_output":{"answer":"Paris"},"input":{"prompt":"What
+        is the capital of France?"},"updated_at":"2025-10-21T18:25:23.855004356Z","version":1}}]}'
+    headers:
+      content-length:
+      - '389'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:25:23 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_batch_update_post_b64aaeeb.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_batch_update_post_b64aaeeb.yaml
new file mode 100644
index 00000000000..aa56782bb87
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_batch_update_post_b64aaeeb.yaml
@@ -0,0 +1,48 @@
+interactions:
+- request:
+    body: '{"data": {"type": "datasets", "id": "4607e918-094d-4aa9-8b7a-50fa63a95b56",
+      "attributes": {"insert_records": [{"input": {"prompt": "What is the capital
+      of China?"}, "expected_output": {"answer": "Beijing"}, "metadata": null}], "update_records":
+      [], "delete_records": []}}}'
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '272'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/4607e918-094d-4aa9-8b7a-50fa63a95b56/batch_update
+  response:
+    body:
+      string: '{"data":[{"id":"5bbd89ec-4eba-4f41-bd47-2a23a005a20a","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:25:26.024986597Z","dataset_id":"4607e918-094d-4aa9-8b7a-50fa63a95b56","expected_output":{"answer":"Beijing"},"input":{"prompt":"What
+        is the capital of China?"},"updated_at":"2025-10-21T18:25:26.024986597Z","version":2}}]}'
+    headers:
+      content-length:
+      - '390'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:25:26 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_records_filter_version__1_get_bcd9fab6.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_records_filter_version__1_get_bcd9fab6.yaml
new file mode 100644
index 00000000000..c903d760daf
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_records_filter_version__1_get_bcd9fab6.yaml
@@ -0,0 +1,46 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Length
+      : - '0'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: GET
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/4607e918-094d-4aa9-8b7a-50fa63a95b56/records?filter%5Bversion%5D=1
+  response:
+    body:
+      string: '{"data":[{"id":"93328f7a-bfd2-4672-8b94-76b0698cd754","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:25:23.855004Z","dataset_id":"4607e918-094d-4aa9-8b7a-50fa63a95b56","expected_output":{"answer":"Paris"},"input":{"prompt":"What
+        is the capital of France?"},"updated_at":"2025-10-21T18:25:23.855004Z"}}],"meta":{"after":""}}'
+    headers:
+      content-length:
+      - '391'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:25:34 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_records_get_eea1d61b.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_records_get_eea1d61b.yaml
new file mode 100644
index 00000000000..61ee8d32222
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_4607e918-094d-4aa9-8b7a-50fa63a95b56_records_get_eea1d61b.yaml
@@ -0,0 +1,47 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Length
+      : - '0'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: GET
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/4607e918-094d-4aa9-8b7a-50fa63a95b56/records
+  response:
+    body:
+      string: '{"data":[{"id":"5bbd89ec-4eba-4f41-bd47-2a23a005a20a","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:25:26.024986Z","dataset_id":"4607e918-094d-4aa9-8b7a-50fa63a95b56","expected_output":{"answer":"Beijing"},"input":{"prompt":"What
+        is the capital of China?"},"updated_at":"2025-10-21T18:25:26.024986Z"}},{"id":"93328f7a-bfd2-4672-8b94-76b0698cd754","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:25:23.855004Z","dataset_id":"4607e918-094d-4aa9-8b7a-50fa63a95b56","expected_output":{"answer":"Paris"},"input":{"prompt":"What
+        is the capital of France?"},"updated_at":"2025-10-21T18:25:23.855004Z"}}],"meta":{"after":""}}'
+    headers:
+      content-length:
+      - '753'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:25:32 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_3f1a88cb.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_3f1a88cb.yaml
new file mode 100644
index 00000000000..efd2579c8d3
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_3f1a88cb.yaml
@@ -0,0 +1,46 @@
+interactions:
+- request:
+    body: '{"data": {"type": "datasets", "attributes": {"type": "soft", "dataset_ids":
+      ["4607e918-094d-4aa9-8b7a-50fa63a95b56"]}}}'
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '119'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/delete
+  response:
+    body:
+      string: '{"data":[{"id":"4607e918-094d-4aa9-8b7a-50fa63a95b56","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:25:23.688284Z","current_version":2,"deleted_at":"2025-10-21T18:25:34.800676Z","description":"A
+        test dataset","name":"test-dataset-test_dataset_pull_w_versions[test_dataset_records0]","project_id":"f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9","updated_at":"2025-10-21T18:25:26.033352Z"}}]}'
+    headers:
+      content-length:
+      - '450'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:25:34 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_516a93fa.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_516a93fa.yaml
new file mode 100644
index 00000000000..42896697f82
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_datasets_delete_post_516a93fa.yaml
@@ -0,0 +1,46 @@
+interactions:
+- request:
+    body: '{"data": {"type": "datasets", "attributes": {"type": "soft", "dataset_ids":
+      ["0bb93ae7-43c4-48ff-91e4-d2817fee85fe"]}}}'
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '119'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/datasets/delete
+  response:
+    body:
+      string: '{"data":[{"id":"0bb93ae7-43c4-48ff-91e4-d2817fee85fe","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:26:24.806202Z","current_version":1,"deleted_at":"2025-10-21T18:26:27.521296Z","description":"A
+        test dataset","name":"test-dataset-test_dataset_pull_w_invalid_version[test_dataset_records0]","project_id":"f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9","updated_at":"2025-10-21T18:26:24.946573Z"}}]}'
+    headers:
+      content-length:
+      - '457'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:26:27 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_filter_name__test-dataset-test_dataset_pull_w_invalid_version_test_dataset_records0__get_12b16625.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_filter_name__test-dataset-test_dataset_pull_w_invalid_version_test_dataset_records0__get_12b16625.yaml
new file mode 100644
index 00000000000..bcc667c8752
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_filter_name__test-dataset-test_dataset_pull_w_invalid_version_test_dataset_records0__get_12b16625.yaml
@@ -0,0 +1,46 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Length
+      : - '0'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: GET
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9/datasets?filter%5Bname%5D=test-dataset-test_dataset_pull_w_invalid_version%5Btest_dataset_records0%5D
+  response:
+    body:
+      string: '{"data":[{"id":"0bb93ae7-43c4-48ff-91e4-d2817fee85fe","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:26:24.806202Z","current_version":1,"description":"A
+        test dataset","name":"test-dataset-test_dataset_pull_w_invalid_version[test_dataset_records0]","project_id":"f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9","updated_at":"2025-10-21T18:26:24.946573Z"}}],"meta":{"after":""}}'
+    headers:
+      content-length:
+      - '434'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:26:27 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_filter_name__test-dataset-test_dataset_pull_w_versions_test_dataset_records0__get_5cf2366a.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_filter_name__test-dataset-test_dataset_pull_w_versions_test_dataset_records0__get_5cf2366a.yaml
new file mode 100644
index 00000000000..a8107245785
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_filter_name__test-dataset-test_dataset_pull_w_versions_test_dataset_records0__get_5cf2366a.yaml
@@ -0,0 +1,46 @@
+interactions:
+- request:
+    body: null
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Length
+      : - '0'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: GET
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9/datasets?filter%5Bname%5D=test-dataset-test_dataset_pull_w_versions%5Btest_dataset_records0%5D
+  response:
+    body:
+      string: '{"data":[{"id":"4607e918-094d-4aa9-8b7a-50fa63a95b56","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:25:23.688284Z","current_version":2,"description":"A
+        test dataset","name":"test-dataset-test_dataset_pull_w_versions[test_dataset_records0]","project_id":"f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9","updated_at":"2025-10-21T18:25:26.033352Z"}}],"meta":{"after":""}}'
+    headers:
+      content-length:
+      - '427'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:25:31 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_post_5a9c5f1b.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_post_5a9c5f1b.yaml
new file mode 100644
index 00000000000..3b03000d203
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_post_5a9c5f1b.yaml
@@ -0,0 +1,46 @@
+interactions:
+- request:
+    body: '{"data": {"type": "datasets", "attributes": {"name": "test-dataset-test_dataset_pull_w_versions[test_dataset_records0]",
+      "description": "A test dataset"}}}'
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '155'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9/datasets
+  response:
+    body:
+      string: '{"data":{"id":"4607e918-094d-4aa9-8b7a-50fa63a95b56","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:25:23.688284458Z","current_version":0,"description":"A
+        test dataset","name":"test-dataset-test_dataset_pull_w_versions[test_dataset_records0]","project_id":"f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9","updated_at":"2025-10-21T18:25:23.688284458Z"}}}'
+    headers:
+      content-length:
+      - '411'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:25:23 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_post_f3e0da1c.yaml b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_post_f3e0da1c.yaml
new file mode 100644
index 00000000000..17ca6f8adec
--- /dev/null
+++ b/tests/llmobs/llmobs_cassettes/datadog/datadog_api_unstable_llm-obs_v1_f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9_datasets_post_f3e0da1c.yaml
@@ -0,0 +1,46 @@
+interactions:
+- request:
+    body: '{"data": {"type": "datasets", "attributes": {"name": "test-dataset-test_dataset_pull_w_invalid_version[test_dataset_records0]",
+      "description": "A test dataset"}}}'
+    headers:
+      Accept:
+      - '*/*'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Accept-Encoding
+      : - identity
+      Connection:
+      - keep-alive
+      Content-Length:
+      - '162'
+      ? !!python/object/apply:multidict._multidict.istr
+      - Content-Type
+      : - application/json
+      User-Agent:
+      - python-requests/2.32.3
+    method: POST
+    uri: https://api.datadoghq.com/api/unstable/llm-obs/v1/f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9/datasets
+  response:
+    body:
+      string: '{"data":{"id":"0bb93ae7-43c4-48ff-91e4-d2817fee85fe","type":"datasets","attributes":{"author":{"id":"de473b30-eb9f-11e9-a77a-c7405862b8bd"},"created_at":"2025-10-21T18:26:24.80620202Z","current_version":0,"description":"A
+        test dataset","name":"test-dataset-test_dataset_pull_w_invalid_version[test_dataset_records0]","project_id":"f0a6723e-a7e8-4efd-a94a-b892b7b6fbf9","updated_at":"2025-10-21T18:26:24.80620202Z"}}}'
+    headers:
+      content-length:
+      - '416'
+      content-security-policy:
+      - frame-ancestors 'self'; report-uri https://logs.browser-intake-datadoghq.com/api/v2/logs?dd-api-key=pube4f163c23bbf91c16b8f57f56af9fc58&dd-evp-origin=content-security-policy&ddsource=csp-report&ddtags=site%3Adatadoghq.com
+      content-type:
+      - application/vnd.api+json
+      date:
+      - Tue, 21 Oct 2025 18:26:24 GMT
+      strict-transport-security:
+      - max-age=31536000; includeSubDomains; preload
+      vary:
+      - Accept-Encoding
+      x-content-type-options:
+      - nosniff
+      x-frame-options:
+      - SAMEORIGIN
+    status:
+      code: 200
+      message: OK
+version: 1
diff --git a/tests/llmobs/test_experiments.py b/tests/llmobs/test_experiments.py
index dd36225d589..cf4e44905f4 100644
--- a/tests/llmobs/test_experiments.py
+++ b/tests/llmobs/test_experiments.py
@@ -300,7 +300,8 @@ def test_dataset_csv_no_expected_output(llmobs, tmp_csv_file_for_upload):
             assert len(ds) == len(dataset)
             assert ds.name == dataset.name
             assert ds.description == dataset.description
-            assert ds._version == 1
+            assert ds.latest_version == 1
+            assert ds.latest_version == ds.version
         finally:
             if dataset_id:
                 llmobs._delete_dataset(dataset_id=dataset_id)
@@ -347,7 +348,8 @@ def test_dataset_csv(llmobs, tmp_csv_file_for_upload):
             assert len(ds) == len(dataset)
             assert ds.name == dataset.name
             assert ds.description == dataset.description
-            assert ds._version == 1
+            assert ds.latest_version == 1
+            assert ds.latest_version == ds.version
         finally:
             if dataset_id:
                 llmobs._delete_dataset(dataset_id=dataset_id)
@@ -400,7 +402,8 @@ def test_dataset_csv_pipe_separated(llmobs, tmp_csv_file_for_upload):
             assert len(ds) == len(dataset)
             assert ds.name == dataset.name
             assert ds.description == dataset.description
-            assert ds._version == 1
+            assert ds.latest_version == 1
+            assert ds.latest_version == ds.version
         finally:
             if dataset_id:
                 llmobs._delete_dataset(dataset_id=dataset._id)
@@ -423,7 +426,8 @@ def test_dataset_pull_large_num_records(llmobs, test_dataset_large_num_records):
     assert len(pds) == len(test_dataset_large_num_records)
     assert pds.name == test_dataset_large_num_records.name
     assert pds.description == test_dataset_large_num_records.description
-    assert pds._version == test_dataset_large_num_records._version == 1
+    assert pds.latest_version == test_dataset_large_num_records.latest_version == 1
+    assert pds.version == test_dataset_large_num_records.version == 1
 
     dataset = sorted(pds, key=lambda r: int(r["input_data"].lstrip("input_")))
     for i, d in enumerate(dataset):
@@ -450,7 +454,57 @@ def test_dataset_pull_exists_with_record(llmobs, test_dataset_one_record):
     assert dataset[0]["expected_output"] == {"answer": "Paris"}
     assert dataset.name == test_dataset_one_record.name
     assert dataset.description == test_dataset_one_record.description
-    assert dataset._version == test_dataset_one_record._version == 1
+    assert dataset.latest_version == test_dataset_one_record.latest_version == 1
+    assert dataset.version == test_dataset_one_record.version == 1
+
+
+@pytest.mark.parametrize(
+    "test_dataset_records",
+    [[DatasetRecord(input_data={"prompt": "What is the capital of France?"}, expected_output={"answer": "Paris"})]],
+)
+def test_dataset_pull_w_versions(llmobs, test_dataset, test_dataset_records):
+    assert len(test_dataset) == 1
+    assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of France?"}
+    assert test_dataset[0]["expected_output"] == {"answer": "Paris"}
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
+
+    test_dataset.append(
+        {"input_data": {"prompt": "What is the capital of China?"}, "expected_output": {"answer": "Beijing"}}
+    )
+    test_dataset.push()
+    wait_for_backend(4)
+
+    dataset_v2 = llmobs.pull_dataset(dataset_name=test_dataset.name)
+    assert len(dataset_v2) == 2
+    assert dataset_v2[1]["input_data"] == {"prompt": "What is the capital of France?"}
+    assert dataset_v2[1]["expected_output"] == {"answer": "Paris"}
+    assert dataset_v2[0]["input_data"] == {"prompt": "What is the capital of China?"}
+    assert dataset_v2[0]["expected_output"] == {"answer": "Beijing"}
+    assert dataset_v2.name == test_dataset.name
+    assert dataset_v2.description == test_dataset.description
+    assert dataset_v2.latest_version == test_dataset.latest_version == 2
+    assert dataset_v2.version == test_dataset.version == 2
+
+    dataset_v1 = llmobs.pull_dataset(dataset_name=test_dataset.name, version=1)
+    assert len(dataset_v1) == 1
+    assert dataset_v1[0]["input_data"] == {"prompt": "What is the capital of France?"}
+    assert dataset_v1[0]["expected_output"] == {"answer": "Paris"}
+    assert dataset_v1.name == test_dataset.name
+    assert dataset_v1.description == test_dataset.description
+    assert dataset_v1.latest_version == test_dataset.latest_version == 2
+    assert dataset_v1.version == 1
+
+
+@pytest.mark.parametrize(
+    "test_dataset_records",
+    [[DatasetRecord(input_data={"prompt": "What is the capital of France?"}, expected_output={"answer": "Paris"})]],
+)
+def test_dataset_pull_w_invalid_version(llmobs, test_dataset, test_dataset_records):
+    with pytest.raises(
+        ValueError, match="Failed to pull dataset records for.*version is greater than the current version or negative"
+    ):
+        llmobs.pull_dataset(dataset_name=test_dataset.name, version=420)
 
 
 def test_dataset_pull_from_project(llmobs, test_dataset_one_record_separate_project):
@@ -464,7 +518,8 @@ def test_dataset_pull_from_project(llmobs, test_dataset_one_record_separate_proj
     assert dataset[0]["expected_output"] == {"answer": "Boston"}
     assert dataset.name == test_dataset_one_record_separate_project.name
     assert dataset.description == test_dataset_one_record_separate_project.description
-    assert dataset._version == test_dataset_one_record_separate_project._version == 1
+    assert dataset.latest_version == test_dataset_one_record_separate_project.latest_version == 1
+    assert dataset.version == test_dataset_one_record_separate_project.version == 1
 
 
 @pytest.mark.parametrize(
@@ -479,7 +534,8 @@ def test_dataset_pull_from_project(llmobs, test_dataset_one_record_separate_proj
     ],
 )
 def test_dataset_modify_records_multiple_times(llmobs, test_dataset, test_dataset_records):
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     test_dataset.update(
         0,
@@ -518,7 +574,8 @@ def test_dataset_modify_records_multiple_times(llmobs, test_dataset, test_datase
 
     test_dataset.push()
     assert len(test_dataset) == 2
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
 
     assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of Germany?"}
     assert test_dataset[0]["expected_output"] == {"answer": "Berlin"}
@@ -548,7 +605,8 @@ def test_dataset_modify_records_multiple_times(llmobs, test_dataset, test_datase
     assert len(ds) == 2
     assert ds.name == test_dataset.name
     assert ds.description == test_dataset.description
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
 
 
 @pytest.mark.parametrize(
@@ -556,7 +614,8 @@ def test_dataset_modify_records_multiple_times(llmobs, test_dataset, test_datase
     [[DatasetRecord(input_data={"prompt": "What is the capital of France?"}, expected_output={"answer": "Paris"})]],
 )
 def test_dataset_modify_single_record(llmobs, test_dataset, test_dataset_records):
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     test_dataset.update(
         0,
@@ -568,7 +627,8 @@ def test_dataset_modify_single_record(llmobs, test_dataset, test_dataset_records
 
     test_dataset.push()
     assert len(test_dataset) == 1
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
 
     assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of Germany?"}
     assert test_dataset[0]["expected_output"] == {"answer": "Berlin"}
@@ -585,7 +645,8 @@ def test_dataset_modify_single_record(llmobs, test_dataset, test_dataset_records
     assert len(ds) == 1
     assert ds.name == test_dataset.name
     assert ds.description == test_dataset.description
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
 
 
 @pytest.mark.parametrize(
@@ -593,7 +654,8 @@ def test_dataset_modify_single_record(llmobs, test_dataset, test_dataset_records
     [[DatasetRecord(input_data={"prompt": "What is the capital of France?"}, expected_output={"answer": "Paris"})]],
 )
 def test_dataset_modify_single_record_empty_record(llmobs, test_dataset, test_dataset_records):
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     with pytest.raises(
         ValueError,
@@ -615,7 +677,8 @@ def test_dataset_estimate_size(llmobs, test_dataset):
     [[DatasetRecord(input_data={"prompt": "What is the capital of France?"}, expected_output={"answer": "Paris"})]],
 )
 def test_dataset_modify_record_on_optional(llmobs, test_dataset, test_dataset_records):
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     test_dataset.update(0, {"expected_output": None})
     assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of France?"}
@@ -624,7 +687,8 @@ def test_dataset_modify_record_on_optional(llmobs, test_dataset, test_dataset_re
 
     test_dataset.push()
     assert len(test_dataset) == 1
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
 
     assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of France?"}
     assert test_dataset[0]["expected_output"] is None
@@ -641,7 +705,8 @@ def test_dataset_modify_record_on_optional(llmobs, test_dataset, test_dataset_re
     assert len(ds) == 1
     assert ds.name == test_dataset.name
     assert ds.description == test_dataset.description
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
 
 
 @pytest.mark.parametrize(
@@ -657,7 +722,8 @@ def test_dataset_modify_record_on_optional(llmobs, test_dataset, test_dataset_re
     ],
 )
 def test_dataset_modify_record_on_input(llmobs, test_dataset, test_dataset_records):
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
     test_dataset.update(0, {"input_data": "A"})
     assert test_dataset[0]["input_data"] == "A"
     assert test_dataset[0]["expected_output"] == {"answer": "Paris"}
@@ -665,7 +731,8 @@ def test_dataset_modify_record_on_input(llmobs, test_dataset, test_dataset_recor
 
     test_dataset.push()
     assert len(test_dataset) == 1
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
 
     assert test_dataset[0]["input_data"] == "A"
     assert test_dataset[0]["expected_output"] == {"answer": "Paris"}
@@ -684,7 +751,8 @@ def test_dataset_modify_record_on_input(llmobs, test_dataset, test_dataset_recor
     assert len(ds) == 1
     assert ds.name == test_dataset.name
     assert ds.description == test_dataset.description
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
 
 
 @pytest.mark.parametrize(
@@ -696,7 +764,8 @@ def test_dataset_append(llmobs, test_dataset):
         DatasetRecord(input_data={"prompt": "What is the capital of Italy?"}, expected_output={"answer": "Rome"})
     )
     assert len(test_dataset) == 2
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     wait_for_backend()
     test_dataset.push()
@@ -707,12 +776,14 @@ def test_dataset_append(llmobs, test_dataset):
     assert test_dataset[1]["expected_output"] == {"answer": "Rome"}
     assert test_dataset.name == test_dataset.name
     assert test_dataset.description == test_dataset.description
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
 
     # check that a pulled dataset matches the pushed dataset
     wait_for_backend()
     ds = llmobs.pull_dataset(dataset_name=test_dataset.name)
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
     assert len(ds) == 2
     # note: it looks like dataset order is not deterministic
     assert ds[1]["input_data"] == {"prompt": "What is the capital of France?"}
@@ -735,7 +806,8 @@ def test_dataset_extend(llmobs, test_dataset):
         ]
     )
     assert len(test_dataset) == 3
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     wait_for_backend()
     test_dataset.push()
@@ -748,12 +820,14 @@ def test_dataset_extend(llmobs, test_dataset):
     assert test_dataset[2]["expected_output"] == {"answer": "Stockholm"}
     assert test_dataset.name == test_dataset.name
     assert test_dataset.description == test_dataset.description
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
 
     # check that a pulled dataset matches the pushed dataset
     wait_for_backend()
     ds = llmobs.pull_dataset(dataset_name=test_dataset.name)
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
     assert len(ds) == 3
     assert ds[2]["input_data"] == {"prompt": "What is the capital of France?"}
     # order is non deterministic
@@ -770,7 +844,8 @@ def test_dataset_extend(llmobs, test_dataset):
 def test_dataset_append_no_expected_output(llmobs, test_dataset):
     test_dataset.append(DatasetRecord(input_data={"prompt": "What is the capital of Sealand?"}))
     assert len(test_dataset) == 2
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     wait_for_backend()
     test_dataset.push()
@@ -781,12 +856,14 @@ def test_dataset_append_no_expected_output(llmobs, test_dataset):
     assert "expected_output" not in test_dataset[1]
     assert test_dataset.name == test_dataset.name
     assert test_dataset.description == test_dataset.description
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
 
     # check that a pulled dataset matches the pushed dataset
     wait_for_backend()
     ds = llmobs.pull_dataset(dataset_name=test_dataset.name)
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
     assert len(ds) == 2
     # note: it looks like dataset order is not deterministic
     assert ds[1]["input_data"] == {"prompt": "What is the capital of France?"}
@@ -809,11 +886,13 @@ def test_dataset_append_no_expected_output(llmobs, test_dataset):
 def test_dataset_delete(llmobs, test_dataset):
     test_dataset.delete(0)
     assert len(test_dataset) == 1
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     wait_for_backend()
     test_dataset.push()
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
     assert len(test_dataset) == 1
     assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of Italy?"}
     assert test_dataset[0]["expected_output"] == {"answer": "Rome"}
@@ -823,7 +902,8 @@ def test_dataset_delete(llmobs, test_dataset):
     # check that a pulled dataset matches the pushed dataset
     wait_for_backend()
     ds = llmobs.pull_dataset(dataset_name=test_dataset.name)
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
     assert len(ds) == 1
     assert ds[0]["input_data"] == {"prompt": "What is the capital of Italy?"}
     assert ds[0]["expected_output"] == {"answer": "Rome"}
@@ -841,11 +921,13 @@ def test_dataset_delete(llmobs, test_dataset):
 def test_dataset_delete_no_expected_output(llmobs, test_dataset):
     test_dataset.delete(1)
     assert len(test_dataset) == 1
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     wait_for_backend()
     test_dataset.push()
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
     assert len(test_dataset) == 1
     assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of Nauru?"}
     assert "expected_output" not in test_dataset[0]
@@ -855,7 +937,8 @@ def test_dataset_delete_no_expected_output(llmobs, test_dataset):
     # check that a pulled dataset matches the pushed dataset
     wait_for_backend()
     ds = llmobs.pull_dataset(dataset_name=test_dataset.name)
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
     assert len(ds) == 1
     assert ds[0]["input_data"] == {"prompt": "What is the capital of Nauru?"}
     assert ds[0]["expected_output"] is None
@@ -879,11 +962,13 @@ def test_dataset_delete_after_update(llmobs, test_dataset):
 
     test_dataset.delete(0)
     assert len(test_dataset) == 1
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     wait_for_backend()
     test_dataset.push()
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
     assert len(test_dataset) == 1
     assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of Italy?"}
     assert test_dataset[0]["expected_output"] == {"answer": "Rome"}
@@ -893,7 +978,8 @@ def test_dataset_delete_after_update(llmobs, test_dataset):
     # check that a pulled dataset matches the pushed dataset
     wait_for_backend()
     ds = llmobs.pull_dataset(dataset_name=test_dataset.name)
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
     assert len(ds) == 1
     assert ds[0]["input_data"] == {"prompt": "What is the capital of Italy?"}
     assert ds[0]["expected_output"] == {"answer": "Rome"}
@@ -918,14 +1004,16 @@ def test_dataset_delete_after_append(llmobs, test_dataset):
     test_dataset.delete(0)
     # all that remains should be Italy and Sweden questions
     assert len(test_dataset) == 2
-    assert test_dataset._version == 1
+    assert test_dataset.latest_version == 1
+    assert test_dataset.version == 1
 
     assert len(test_dataset._new_records_by_record_id) == 1
     assert len(test_dataset._deleted_record_ids) == 1
 
     wait_for_backend()
     test_dataset.push()
-    assert test_dataset._version == 2
+    assert test_dataset.latest_version == 2
+    assert test_dataset.version == 2
     assert len(test_dataset) == 2
     assert test_dataset[0]["input_data"] == {"prompt": "What is the capital of Italy?"}
     assert test_dataset[0]["expected_output"] == {"answer": "Rome"}
@@ -937,7 +1025,8 @@ def test_dataset_delete_after_append(llmobs, test_dataset):
     # check that a pulled dataset matches the pushed dataset
     wait_for_backend()
     ds = llmobs.pull_dataset(dataset_name=test_dataset.name)
-    assert ds._version == 2
+    assert ds.latest_version == 2
+    assert ds.version == 2
     assert len(ds) == 2
     sds = sorted(ds, key=lambda r: r["input_data"]["prompt"])
     assert sds[0]["input_data"] == {"prompt": "What is the capital of Italy?"}
@@ -1106,7 +1195,7 @@ def test_experiment_create(llmobs, test_dataset_one_record):
     project = llmobs._instance._dne_client.project_create_or_get("test-project")
     project_id = project.get("_id")
     exp_id, exp_run_name = llmobs._instance._dne_client.experiment_create(
-        exp.name, exp._dataset._id, project_id, exp._dataset._version, exp._config
+        exp.name, exp._dataset._id, project_id, exp._dataset.latest_version, exp._config
     )
     assert exp_id is not None
     assert exp_run_name.startswith("test_experiment")