⚡️ Speed up method RetryQueryEngine._get_prompt_modules by 43%
#130
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 43% (0.43x) speedup for
RetryQueryEngine._get_prompt_modulesinllama-index-core/llama_index/core/query_engine/retry_query_engine.py⏱️ Runtime :
72.9 microseconds→50.9 microseconds(best of250runs)📝 Explanation and details
The optimization precomputes the dictionary returned by
_get_prompt_modules()during initialization instead of creating it fresh on every call.Key changes:
self._prompt_modulesattribute in__init__()that stores the dictionary containingquery_engineandevaluatorreferences_get_prompt_modules()to return the precomputed dictionary instead of constructing a new one each timeWhy this leads to speedup:
In Python, dictionary creation (
{"key": value}) involves memory allocation and hashtable construction overhead. The original code created a new dictionary on every call to_get_prompt_modules(), while the optimized version eliminates this repeated work by creating the dictionary once during object initialization. The line profiler shows the per-hit time decreased from 366ns to 255ns (30% improvement per call).Performance characteristics:
_get_prompt_modules()- the second consecutive call in isolation tests shows 65.9% speedupTrade-offs:
This optimization is particularly valuable if
_get_prompt_modules()is called frequently during query processing workflows, as the 43% overall speedup compounds across multiple invocations.✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
from typing import Any, Dict
imports
import pytest
from llama_index.core.query_engine.retry_query_engine import RetryQueryEngine
--- Minimal stub implementations for dependencies ---
class CallbackManager:
"""Stub for CallbackManager."""
def init(self, callbacks):
self.callbacks = callbacks
class ChainableMixin:
pass
class PromptMixin:
pass
class BaseQueryEngine(ChainableMixin, PromptMixin):
"""Base query engine."""
class BaseEvaluator:
"""Stub for BaseEvaluator."""
def init(self, name="DefaultEvaluator"):
self.name = name
from llama_index.core.query_engine.retry_query_engine import RetryQueryEngine
--- Unit Tests ---
1. Basic Test Cases
def test_basic_prompt_modules_return_type():
"""Test that _get_prompt_modules returns a dict with correct keys and values."""
qe = BaseQueryEngine(None)
ev = BaseEvaluator()
retry_qe = RetryQueryEngine(qe, ev)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 452ns -> 294ns (53.7% faster)
def test_prompt_modules_with_custom_objects():
"""Test that custom query_engine and evaluator instances are returned correctly."""
class MyQueryEngine(BaseQueryEngine): pass
class MyEvaluator(BaseEvaluator): pass
qe = MyQueryEngine(None)
ev = MyEvaluator("custom")
retry_qe = RetryQueryEngine(qe, ev)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 430ns -> 291ns (47.8% faster)
def test_prompt_modules_with_callback_manager():
"""Test that callback_manager does not affect prompt modules."""
cb = CallbackManager(["cb1"])
qe = BaseQueryEngine(cb)
ev = BaseEvaluator()
retry_qe = RetryQueryEngine(qe, ev, callback_manager=cb)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 457ns -> 283ns (61.5% faster)
2. Edge Test Cases
def test_prompt_modules_with_none_evaluator():
"""Test behavior when evaluator is None."""
qe = BaseQueryEngine(None)
retry_qe = RetryQueryEngine(qe, None)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 443ns -> 284ns (56.0% faster)
def test_prompt_modules_with_none_query_engine():
"""Test behavior when query_engine is None."""
ev = BaseEvaluator()
retry_qe = RetryQueryEngine(None, ev)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 480ns -> 284ns (69.0% faster)
def test_prompt_modules_with_both_none():
"""Test behavior when both query_engine and evaluator are None."""
retry_qe = RetryQueryEngine(None, None)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 457ns -> 299ns (52.8% faster)
def test_prompt_modules_with_unusual_types():
"""Test behavior when query_engine and evaluator are unusual types."""
qe = "not_a_query_engine"
ev = 12345
retry_qe = RetryQueryEngine(qe, ev)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 447ns -> 292ns (53.1% faster)
def test_prompt_modules_isolation():
"""Test that returned dict is a new object and not a reference to an internal dict."""
qe = BaseQueryEngine(None)
ev = BaseEvaluator()
retry_qe = RetryQueryEngine(qe, ev)
codeflash_output = retry_qe._get_prompt_modules(); result1 = codeflash_output # 454ns -> 300ns (51.3% faster)
codeflash_output = retry_qe._get_prompt_modules(); result2 = codeflash_output # 297ns -> 179ns (65.9% faster)
result1["query_engine"] = "changed"
3. Large Scale Test Cases
def test_prompt_modules_with_large_query_engine():
"""Test with a query_engine containing a large data structure."""
class LargeQueryEngine(BaseQueryEngine):
def init(self, callback_manager=None):
super().init(callback_manager)
self.large_data = [i for i in range(1000)]
qe = LargeQueryEngine()
ev = BaseEvaluator()
retry_qe = RetryQueryEngine(qe, ev)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 482ns -> 308ns (56.5% faster)
def test_prompt_modules_with_large_evaluator():
"""Test with an evaluator containing a large data structure."""
class LargeEvaluator(BaseEvaluator):
def init(self):
self.large_dict = {i: str(i) for i in range(1000)}
qe = BaseQueryEngine(None)
ev = LargeEvaluator()
retry_qe = RetryQueryEngine(qe, ev)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 458ns -> 305ns (50.2% faster)
def test_prompt_modules_with_many_instances():
"""Test creating many RetryQueryEngine instances and calling _get_prompt_modules."""
engines = []
for i in range(100):
qe = BaseQueryEngine(None)
ev = BaseEvaluator(str(i))
retry_qe = RetryQueryEngine(qe, ev, max_retries=i)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 20.5μs -> 15.0μs (36.8% faster)
engines.append(retry_qe)
def test_prompt_modules_with_deeply_nested_objects():
"""Test with deeply nested query_engine and evaluator objects."""
class Nested:
def init(self, value):
self.value = value
qe = Nested(Nested(Nested("deep_qe")))
ev = Nested(Nested(Nested("deep_ev")))
retry_qe = RetryQueryEngine(qe, ev)
codeflash_output = retry_qe._get_prompt_modules(); result = codeflash_output # 431ns -> 274ns (57.3% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
import pytest
from llama_index.core.query_engine.retry_query_engine import RetryQueryEngine
Minimal stubs for dependencies, as we cannot import llama_index
class CallbackManager:
def init(self, callbacks):
self.callbacks = callbacks
class PromptMixin:
pass
class ChainableMixin:
pass
class BaseQueryEngine(ChainableMixin, PromptMixin):
"""Base query engine stub for testing."""
def init(self, callback_manager=None):
self.callback_manager = callback_manager or CallbackManager([])
class BaseEvaluator:
"""Stub evaluator for testing."""
def init(self, name=None):
self.name = name
Type alias for prompt modules
PromptMixinType = dict
from llama_index.core.query_engine.retry_query_engine import RetryQueryEngine
------------------- Unit Tests -------------------
1. Basic Test Cases
def test_basic_returns_correct_dict():
"""Test that _get_prompt_modules returns dict with correct keys and values."""
qe = BaseQueryEngine()
ev = BaseEvaluator()
rqe = RetryQueryEngine(qe, ev)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 503ns -> 276ns (82.2% faster)
def test_basic_different_instances():
"""Test with different instances for query_engine and evaluator."""
qe1 = BaseQueryEngine()
qe2 = BaseQueryEngine()
ev1 = BaseEvaluator()
ev2 = BaseEvaluator()
rqe = RetryQueryEngine(qe1, ev2)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 470ns -> 283ns (66.1% faster)
def test_basic_with_callback_manager():
"""Test that callback_manager is handled and does not affect _get_prompt_modules."""
cm = CallbackManager([lambda: None])
qe = BaseQueryEngine(callback_manager=cm)
ev = BaseEvaluator()
rqe = RetryQueryEngine(qe, ev, callback_manager=cm)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 424ns -> 281ns (50.9% faster)
2. Edge Test Cases
def test_evaluator_is_none():
"""Test with evaluator as None (should still be returned)."""
qe = BaseQueryEngine()
rqe = RetryQueryEngine(qe, None)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 457ns -> 285ns (60.4% faster)
def test_query_engine_is_none():
"""Test with query_engine as None (should still be returned)."""
ev = BaseEvaluator()
rqe = RetryQueryEngine(None, ev)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 419ns -> 276ns (51.8% faster)
def test_both_none():
"""Test with both query_engine and evaluator as None."""
rqe = RetryQueryEngine(None, None)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 507ns -> 327ns (55.0% faster)
def test_max_retries_edge_values():
"""Test with unusual max_retries values (should not affect output)."""
qe = BaseQueryEngine()
ev = BaseEvaluator()
for val in [0, -1, 999]:
rqe = RetryQueryEngine(qe, ev, max_retries=val)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 952ns -> 635ns (49.9% faster)
def test_custom_attributes_on_engine_and_evaluator():
"""Test that custom attributes on engine/evaluator are preserved."""
qe = BaseQueryEngine()
qe.custom_attr = "engine"
ev = BaseEvaluator()
ev.custom_attr = "evaluator"
rqe = RetryQueryEngine(qe, ev)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 457ns -> 285ns (60.4% faster)
def test_mutation_does_not_affect_returned_dict():
"""Test that mutating returned dict does not affect internal state."""
qe = BaseQueryEngine()
ev = BaseEvaluator()
rqe = RetryQueryEngine(qe, ev)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 438ns -> 252ns (73.8% faster)
modules["query_engine"] = "changed"
# The next call should return the original object, not "changed"
codeflash_output = rqe._get_prompt_modules(); modules2 = codeflash_output # 261ns -> 195ns (33.8% faster)
def test_returned_dict_is_new_each_time():
"""Test that a new dict is returned each call (not the same object)."""
qe = BaseQueryEngine()
ev = BaseEvaluator()
rqe = RetryQueryEngine(qe, ev)
codeflash_output = rqe._get_prompt_modules(); m1 = codeflash_output # 461ns -> 288ns (60.1% faster)
codeflash_output = rqe._get_prompt_modules(); m2 = codeflash_output # 265ns -> 185ns (43.2% faster)
def test_returned_dict_has_only_expected_keys():
"""Test that returned dict has only 'query_engine' and 'evaluator' keys."""
qe = BaseQueryEngine()
ev = BaseEvaluator()
rqe = RetryQueryEngine(qe, ev)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 410ns -> 268ns (53.0% faster)
3. Large Scale Test Cases
def test_large_scale_many_engines_and_evaluators():
"""Test with many unique query_engine and evaluator instances."""
engines = [BaseQueryEngine() for _ in range(100)]
evaluators = [BaseEvaluator(name=f"eval{i}") for i in range(100)]
rqe_list = [RetryQueryEngine(engines[i], evaluators[i]) for i in range(100)]
for i, rqe in enumerate(rqe_list):
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 19.4μs -> 13.6μs (42.5% faster)
def test_large_scale_shared_engine_evaluator():
"""Test with same engine and evaluator shared among many RetryQueryEngine instances."""
qe = BaseQueryEngine()
ev = BaseEvaluator()
rqe_list = [RetryQueryEngine(qe, ev) for _ in range(100)]
for rqe in rqe_list:
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output # 19.4μs -> 13.5μs (43.7% faster)
def test_large_scale_with_varied_types():
"""Test with varied types for query_engine/evaluator (should still work)."""
class DummyEngine: pass
class DummyEvaluator: pass
engines = [BaseQueryEngine(), DummyEngine(), None]
evaluators = [BaseEvaluator(), DummyEvaluator(), None]
for qe in engines:
for ev in evaluators:
rqe = RetryQueryEngine(qe, ev)
codeflash_output = rqe._get_prompt_modules(); modules = codeflash_output
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes
git checkout codeflash/optimize-RetryQueryEngine._get_prompt_modules-mhvbitp2and push.