Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions nemo_retriever/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,11 @@ dependencies = [
svg = [
"cairosvg>=2.7.0",
]
eval = [
Copy link
Copy Markdown
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does this mean, if we dont do pip install nemo-retriever[eval] we wont be able to use the LLM generator or judge operators? I don't think that is what we want.

Copy link
Copy Markdown
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This concern is mainly due to the fact that we want to use llm generator or judge operators outside of the pure eval scope for now right?

I can look into refactoring that later alongside this issue (#1769) but the only need for it now seems to be in the evals

"litellm>=1.40.0",
"pyyaml>=6.0",
"tenacity>=8.0.0",
]
dev = [
"build>=1.2.2",
"pytest>=8.0.2",
Expand Down
2 changes: 2 additions & 0 deletions nemo_retriever/src/nemo_retriever/adapters/cli/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
from nemo_retriever.utils.benchmark import app as benchmark_app
from nemo_retriever.chart import app as chart_app
from nemo_retriever.utils.compare import app as compare_app
from nemo_retriever.evaluation.cli import app as eval_app
from nemo_retriever.harness import app as harness_app
from nemo_retriever.html import __main__ as html_main
from nemo_retriever.utils.image import app as image_app
Expand All @@ -28,6 +29,7 @@
app.add_typer(local_app, name="local")
app.add_typer(chart_app, name="chart")
app.add_typer(compare_app, name="compare")
app.add_typer(eval_app, name="eval")
app.add_typer(benchmark_app, name="benchmark")
app.add_typer(harness_app, name="harness")
app.add_typer(vector_store_app, name="vector-store")
Expand Down
77 changes: 77 additions & 0 deletions nemo_retriever/src/nemo_retriever/evaluation/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
# SPDX-FileCopyrightText: Copyright (c) 2024-25, NVIDIA CORPORATION & AFFILIATES.
# All rights reserved.
# SPDX-License-Identifier: Apache-2.0

"""QA evaluation framework for nemo_retriever.

Provides pluggable retrieval, generation, judging, and orchestration
components for measuring LLM answer quality given retrieved context.

The ``EvalOperator`` base class bridges ``graph.AbstractOperator`` into the
evaluation domain, enabling ``>>`` chaining, ``Graph.execute()``, and
executor compatibility for all evaluation operators.

Types, scoring, and ``EvalOperator`` are always available.
Modules that depend on ``litellm`` (generators, judges, generation,
judging, orchestrator, config) are lazy-loaded so that lightweight
consumers can use scoring without installing the ``[eval]`` extra::

pip install nemo-retriever[eval]
"""

from nemo_retriever.evaluation.eval_operator import EvalOperator
from nemo_retriever.evaluation.scoring import score_dataframe
from nemo_retriever.evaluation.types import (
AnswerJudge,
GenerationResult,
JudgeResult,
LLMClient,
RetrievalResult,
RetrieverStrategy,
)

_LAZY_IMPORTS = {
"QAGenerationOperator": "nemo_retriever.evaluation.generation",
"JudgingOperator": "nemo_retriever.evaluation.judging",
"ScoringOperator": "nemo_retriever.evaluation.scoring_operator",
"RetrievalLoaderOperator": "nemo_retriever.evaluation.retrieval_loader",
"LiteLLMClient": "nemo_retriever.evaluation.generators",
"LLMJudge": "nemo_retriever.evaluation.judges",
"QAEvalPipeline": "nemo_retriever.evaluation.orchestrator",
"load_eval_config": "nemo_retriever.evaluation.config",
"build_eval_chain": "nemo_retriever.evaluation.config",
"build_eval_pipeline": "nemo_retriever.evaluation.config",
"run_eval_sweep": "nemo_retriever.evaluation.runner",
}


def __getattr__(name: str):
if name in _LAZY_IMPORTS:
import importlib

module = importlib.import_module(_LAZY_IMPORTS[name])
return getattr(module, name)
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


__all__ = [
"AnswerJudge",
"EvalOperator",
"GenerationResult",
"JudgeResult",
"JudgingOperator",
"LLMClient",
"LLMJudge",
"LiteLLMClient",
"QAEvalPipeline",
"QAGenerationOperator",
"RetrievalLoaderOperator",
"RetrievalResult",
"RetrieverStrategy",
"ScoringOperator",
"build_eval_chain",
"build_eval_pipeline",
"load_eval_config",
"run_eval_sweep",
"score_dataframe",
]
Loading
Loading