Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: import error for TestsetGeneration and small fixes #1516

Merged
merged 5 commits into from
Oct 16, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
114 changes: 66 additions & 48 deletions src/ragas/dataset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

import json
import typing as t
from abc import ABC, abstractmethod
from dataclasses import dataclass, field

from datasets import Dataset as HFDataset
Expand All @@ -12,6 +13,8 @@
from ragas.utils import safe_nanmean

if t.TYPE_CHECKING:
from pathlib import Path

from datasets import Dataset as HFDataset
from pandas import DataFrame as PandasDataframe

Expand Down Expand Up @@ -136,9 +139,20 @@ def pretty_repr(self):
Sample = t.TypeVar("Sample", bound=BaseSample)


class RagasDataset(BaseModel, t.Generic[Sample]):
class RagasDataset(ABC, BaseModel, t.Generic[Sample]):
samples: t.List[Sample]

@abstractmethod
def to_list(self) -> t.List[t.Dict]:
"""Converts the samples to a list of dictionaries."""
pass

@classmethod
@abstractmethod
def from_list(cls, data: t.List[t.Dict]) -> RagasDataset[Sample]:
"""Creates an EvaluationDataset from a list of dictionaries."""
pass

@field_validator("samples")
def validate_samples(cls, samples: t.List[BaseSample]) -> t.List[BaseSample]:
"""Validates that all samples are of the same type."""
Expand All @@ -155,20 +169,6 @@ def get_sample_type(self) -> t.Type[Sample]:
"""Returns the type of the samples in the dataset."""
return type(self.samples[0])

def _to_list(self) -> t.List[t.Dict]:
"""Converts the samples to a list of dictionaries."""
rows = [sample.to_dict() for sample in self.samples]

if self.get_sample_type() == MultiTurnSample:
for sample in rows:
for item in sample["user_input"]:
if not isinstance(item["content"], str):
item["content"] = json.dumps(
item["content"], ensure_ascii=False
)

return rows

def to_hf_dataset(self) -> HFDataset:
"""Converts the dataset to a Hugging Face Dataset."""
try:
Expand All @@ -178,7 +178,7 @@ def to_hf_dataset(self) -> HFDataset:
"datasets is not installed. Please install it to use this function."
)

return HFDataset.from_list(self._to_list())
return HFDataset.from_list(self.to_list())

@classmethod
def from_hf_dataset(cls, dataset: HFDataset):
Expand All @@ -194,26 +194,13 @@ def to_pandas(self) -> PandasDataframe:
"pandas is not installed. Please install it to use this function."
)

data = self._to_list()
data = self.to_list()
return pd.DataFrame(data)

def features(self):
"""Returns the features of the samples."""
return self.samples[0].get_features()

@classmethod
def from_list(cls, mapping: t.List[t.Dict]):
"""Creates an EvaluationDataset from a list of dictionaries."""
samples = []
if all(
"user_input" in item and isinstance(mapping[0]["user_input"], list)
for item in mapping
):
samples.extend(MultiTurnSample(**sample) for sample in mapping)
else:
samples.extend(SingleTurnSample(**sample) for sample in mapping)
return cls(samples=samples)

@classmethod
def from_dict(cls, mapping: t.Dict):
"""Creates an EvaluationDataset from a dictionary."""
Expand All @@ -227,40 +214,30 @@ def from_dict(cls, mapping: t.Dict):
samples.extend(SingleTurnSample(**sample) for sample in mapping)
return cls(samples=samples)

@classmethod
def from_csv(cls, path: str):
"""Creates an EvaluationDataset from a CSV file."""
import csv

with open(path, "r", newline="") as csvfile:
reader = csv.DictReader(csvfile)
data = [row for row in reader]
return cls.from_list(data)

def to_csv(self, path: str):
def to_csv(self, path: t.Union[str, Path]):
"""Converts the dataset to a CSV file."""
import csv

data = self._to_list()
data = self.to_list()
if not data:
return

fieldnames = self.features()
fieldnames = data[0].keys()

with open(path, "w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
for row in data:
writer.writerow(row)

def to_jsonl(self, path: str):
def to_jsonl(self, path: t.Union[str, Path]):
"""Converts the dataset to a JSONL file."""
with open(path, "w") as jsonlfile:
for sample in self.samples:
jsonlfile.write(json.dumps(sample.to_dict(), ensure_ascii=False) + "\n")

@classmethod
def from_jsonl(cls, path: str):
def from_jsonl(cls, path: t.Union[str, Path]):
"""Creates an EvaluationDataset from a JSONL file."""
with open(path, "r") as jsonlfile:
data = [json.loads(line) for line in jsonlfile]
Expand Down Expand Up @@ -307,8 +284,6 @@ class EvaluationDataset(RagasDataset[SingleTurnSampleOrMultiTurnSample]):
Creates an EvaluationDataset from a list of dictionaries.
from_dict(mapping)
Creates an EvaluationDataset from a dictionary.
from_csv(path)
Creates an EvaluationDataset from a CSV file.
to_csv(path)
Converts the dataset to a CSV file.
to_jsonl(path)
Expand All @@ -333,6 +308,37 @@ def __getitem__(
else:
raise TypeError("Index must be int or slice")

def to_list(self) -> t.List[t.Dict]:
rows = [sample.to_dict() for sample in self.samples]

if self.get_sample_type() == MultiTurnSample:
for sample in rows:
for item in sample["user_input"]:
if not isinstance(item["content"], str):
item["content"] = json.dumps(
item["content"], ensure_ascii=False
)

return rows

@classmethod
def from_list(cls, data: t.List[t.Dict]) -> EvaluationDataset:
samples = []
if all(
"user_input" in item and isinstance(data[0]["user_input"], list)
for item in data
):
samples.extend(MultiTurnSample(**sample) for sample in data)
else:
samples.extend(SingleTurnSample(**sample) for sample in data)
return cls(samples=samples)


class EvaluationResultRow(BaseModel):
dataset_row: t.Dict
scores: t.Dict[str, t.Any]
trace: t.Dict[str, t.Any] = field(default_factory=dict) # none for now


@dataclass
class EvaluationResult:
Expand All @@ -352,7 +358,7 @@ class EvaluationResult:
"""

scores: t.List[t.Dict[str, t.Any]]
dataset: t.Optional[EvaluationDataset] = None
dataset: EvaluationDataset
binary_columns: t.List[str] = field(default_factory=list)
cost_cb: t.Optional[CostCallbackHandler] = None

Expand Down Expand Up @@ -407,6 +413,18 @@ def to_pandas(self, batch_size: int | None = None, batched: bool = False):
dataset_df = self.dataset.to_pandas()
return pd.concat([dataset_df, scores_df], axis=1)

def serialized(self) -> t.List[EvaluationResultRow]:
"""
Convert the result to a list of EvaluationResultRow.
"""
return [
EvaluationResultRow(
dataset_row=self.dataset[i].to_dict(),
scores=self.scores[i],
)
for i in range(len(self.scores))
]

def total_tokens(self) -> t.Union[t.List[TokenUsage], TokenUsage]:
"""
Compute the total tokens used in the evaluation.
Expand Down
55 changes: 39 additions & 16 deletions src/ragas/testset/synthesizers/testset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,14 +2,16 @@

import typing as t

from ragas.dataset_schema import BaseSample, RagasDataset
from ragas.dataset_schema import (
BaseSample,
EvaluationDataset,
MultiTurnSample,
RagasDataset,
SingleTurnSample,
)

if t.TYPE_CHECKING:
from ragas.dataset_schema import (
EvaluationDataset,
MultiTurnSample,
SingleTurnSample,
)
from ragas.dataset_schema import MultiTurnSample, SingleTurnSample


class TestsetSample(BaseSample):
Expand Down Expand Up @@ -48,13 +50,34 @@ def to_evaluation_dataset(self) -> EvaluationDataset:
samples=[sample.eval_sample for sample in self.samples]
)

def _to_list(self) -> t.List[t.Dict]:
eval_list = self.to_evaluation_dataset()._to_list()
testset_list_without_eval_sample = [
sample.model_dump(exclude={"eval_sample"}) for sample in self.samples
]
testset_list = [
{**eval_sample, **sample}
for eval_sample, sample in zip(eval_list, testset_list_without_eval_sample)
]
return testset_list
def to_list(self) -> t.List[t.Dict]:
"""
Converts the Testset to a list of dictionaries.
"""
return [sample.model_dump() for sample in self.samples]

@classmethod
def from_list(cls, data: t.List[t.Dict]) -> Testset:
"""
Converts a list of dictionaries to a Testset.
"""
# first create the samples
samples = []
for sample in data:
eval_sample = sample["eval_sample"]

# if user_input is a list it is MultiTurnSample
if "user_input" in eval_sample and not isinstance(
eval_sample.get("user_input"), list
):
eval_sample = SingleTurnSample(**sample["eval_sample"])
else:
eval_sample = MultiTurnSample(**sample["eval_sample"])

samples.append(
TestsetSample(
eval_sample=eval_sample, synthesizer_name=sample["synthesizer_name"]
)
)
# then create the testset
return Testset(samples=samples)
64 changes: 42 additions & 22 deletions tests/unit/test_dataset_schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,46 +3,66 @@
import pytest
from pydantic import ValidationError

from ragas.dataset_schema import EvaluationDataset, MultiTurnSample, SingleTurnSample


def test_evaluation_dataset():
single_turn_sample = SingleTurnSample(user_input="What is X", response="Y")

dataset = EvaluationDataset(samples=[single_turn_sample, single_turn_sample])
from ragas.dataset_schema import (
EvaluationDataset,
HumanMessage,
MultiTurnSample,
SingleTurnSample,
)

samples = [
SingleTurnSample(user_input="What is X", response="Y"),
MultiTurnSample(
user_input=[HumanMessage(content="What is X")],
reference="Y",
),
]


@pytest.mark.parametrize("eval_sample", samples)
def test_evaluation_dataset(eval_sample):
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])

hf_dataset = dataset.to_hf_dataset()

assert dataset.get_sample_type() == SingleTurnSample
assert dataset.get_sample_type() is type(eval_sample)
assert len(hf_dataset) == 2
assert dataset.features() == ["user_input", "response"]
assert len(dataset) == 2
assert dataset[0] == single_turn_sample
assert dataset[0] == eval_sample


def test_evaluation_dataset_save_load(tmpdir):
single_turn_sample = SingleTurnSample(user_input="What is X", response="Y")
@pytest.mark.parametrize("eval_sample", samples)
def test_evaluation_dataset_save_load_csv(tmpdir, eval_sample):
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])

dataset = EvaluationDataset(samples=[single_turn_sample, single_turn_sample])
# save and load to csv
csv_path = tmpdir / "csvfile.csv"
dataset.to_csv(csv_path)

hf_dataset = dataset.to_hf_dataset()

# save and load to csv
dataset.to_csv(tmpdir / "csvfile.csv")
loaded_dataset = EvaluationDataset.from_csv(tmpdir / "csvfile.csv")
assert loaded_dataset == dataset
@pytest.mark.parametrize("eval_sample", samples)
def test_evaluation_dataset_save_load_jsonl(tmpdir, eval_sample):
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])

# save and load to jsonl
dataset.to_jsonl(tmpdir / "jsonlfile.jsonl")
loaded_dataset = EvaluationDataset.from_jsonl(tmpdir / "jsonlfile.jsonl")
jsonl_path = tmpdir / "jsonlfile.jsonl"
dataset.to_jsonl(jsonl_path)
loaded_dataset = EvaluationDataset.from_jsonl(jsonl_path)
assert loaded_dataset == dataset

# load from hf dataset

@pytest.mark.parametrize("eval_sample", samples)
def test_evaluation_dataset_load_from_hf(eval_sample):
dataset = EvaluationDataset(samples=[eval_sample, eval_sample])

# convert to and load from hf dataset
hf_dataset = dataset.to_hf_dataset()
loaded_dataset = EvaluationDataset.from_hf_dataset(hf_dataset)
assert loaded_dataset == dataset


def test_single_type_evaluation_dataset():
@pytest.mark.parametrize("eval_sample", samples)
def test_single_type_evaluation_dataset(eval_sample):
single_turn_sample = SingleTurnSample(user_input="What is X", response="Y")
multi_turn_sample = MultiTurnSample(
user_input=[{"content": "What is X"}],
Expand Down
Loading
Loading