IBM · dafnapension · Feb 11, 2025
diff --git a/tests/library/test_loaders.py b/tests/library/test_loaders.py
@@ -4,6 +4,7 @@
 from unittest.mock import patch
 
 import pandas as pd
+from datasets import Dataset, DatasetDict, IterableDataset, IterableDatasetDict
 from unitxt.error_utils import UnitxtError
 from unitxt.loaders import (
     LoadCSV,
@@ -232,6 +233,23 @@ def test_load_from_HF_multiple_innvocation_with_filter(self):
         )  # that HF dataset only has the 'test' split
         self.assertEqual(instance["language"], "eng")
 
+    def test_load_HF_lazily(self):
+        lazy_loader = LoadHF(path="ibm/finqa", streaming=True)
+        dataset = lazy_loader.load_dataset(split="test")
+        self.assertIsInstance(dataset, (Dataset, IterableDataset))
+        # we just assured that load_dataset completed OK, having changed the streaming from True to False
+        # now we try to touch the arriving dataset, which in current main is only done by the split generator when yielding
+        first_example = next(iter(dataset))
+        self.assertIsNotNone (first_example)
+        # the same goes when split=None:
+        dataset = lazy_loader.load_dataset(split=None)
+        self.assertIsInstance(dataset, (DatasetDict, IterableDatasetDict))
+        # we just assured that load_dataset completed OK, having changed to streaming=False
+        # now we try to touch the samples in the arriving dataset, which in current main is only done by the split generator when yielding
+        for k in dataset.keys():
+            first_example = next(iter(dataset[k]))
+            self.assertIsNotNone (first_example)
+
     def test_load_from_HF_split(self):
         loader = LoadHF(path="sst2", split="train")
         ms = loader()

diff --git a/utils/.secrets.baseline b/utils/.secrets.baseline
@@ -171,15 +171,15 @@
         "filename": "tests/library/test_loaders.py",
         "hashed_secret": "8d814baafe5d8412572dc520dcab83f60ce1375c",
         "is_verified": false,
-        "line_number": 125,
+        "line_number": 126,
         "is_secret": false
       },
       {
         "type": "Secret Keyword",
         "filename": "tests/library/test_loaders.py",
         "hashed_secret": "42a472ac88cd8d43a2c5ae0bd0bdf4626cdaba31",
         "is_verified": false,
-        "line_number": 135,
+        "line_number": 136,
         "is_secret": false
       }
     ]