diff --git a/src/tatm/tokenizer/engine.py b/src/tatm/tokenizer/engine.py index 23d2149..75262af 100644 --- a/src/tatm/tokenizer/engine.py +++ b/src/tatm/tokenizer/engine.py @@ -67,7 +67,7 @@ def get_example(self): return None if not self.initialized: raise RuntimeError("DataServer not initialized. Call 'initialize' first.") - dataset_idx = self.rng.randint(0, len(self.datasets) - 1) + dataset_idx = self.rng.randint(0, len(self.dataset_iters) - 1) try: example = next(self.dataset_iters[dataset_idx]) content_field = self.datasets[dataset_idx].metadata.content_field