⚡️ Speed up method ArgPackComponent._run_component by 40%
#141
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 40% (0.40x) speedup for
ArgPackComponent._run_componentinllama-index-core/llama_index/core/query_pipeline/components/argpacks.py⏱️ Runtime :
835 microseconds→595 microseconds(best of250runs)📝 Explanation and details
The optimization achieves a 40% speedup by making two key changes that reduce Python overhead:
1. Eliminated repeated attribute lookups: The original code accessed
self.convert_fninside the loop for every iteration (7,063 times according to profiler). The optimized version caches it once asconvert_fn = self.convert_fn, avoiding expensive attribute access overhead.2. Replaced loop+append with list comprehension: The original used a manual loop with
output.append(v)for each item. The optimized version uses[convert_fn(v) for v in kwargs.values()]when conversion is needed, orlist(kwargs.values())when not. List comprehensions are implemented in C and are significantly faster than Python loops.Performance impact by test category:
The profiler shows the optimization reduced total execution time from 6.5ms to 1.9ms in the benchmark, with the list comprehension line taking 94% of optimized execution time versus the original's distributed overhead across multiple operations.
This optimization is particularly valuable for scenarios with many arguments or frequent calls, making it ideal for query pipeline components that may process large batches of data.
✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
from typing import Any, Callable, Optional
imports
import pytest # used for our unit tests
from llama_index.core.query_pipeline.components.argpacks import
ArgPackComponent
unit tests
--- BASIC TEST CASES ---
def test_no_kwargs_returns_empty_list():
# Test when no kwargs are provided
comp = ArgPackComponent()
codeflash_output = comp._run_component(); result = codeflash_output # 914ns -> 1.21μs (24.6% slower)
def test_single_argument():
# Test with a single argument
comp = ArgPackComponent()
codeflash_output = comp._run_component(a=10); result = codeflash_output # 1.23μs -> 1.39μs (11.8% slower)
def test_multiple_arguments():
# Test with multiple arguments
comp = ArgPackComponent()
codeflash_output = comp._run_component(a=1, b=2, c=3); result = codeflash_output # 1.44μs -> 1.52μs (5.71% slower)
def test_argument_types():
# Test with mixed types
comp = ArgPackComponent()
codeflash_output = comp._run_component(a=1, b="foo", c=[1,2], d=None); result = codeflash_output # 1.55μs -> 1.57μs (1.59% slower)
def test_convert_fn_applied():
# Test with a convert_fn that doubles numbers
comp = ArgPackComponent(convert_fn=lambda x: x*2 if isinstance(x, int) else x)
codeflash_output = comp._run_component(a=1, b=2, c="foo"); result = codeflash_output # 2.40μs -> 2.57μs (6.88% slower)
def test_convert_fn_none():
# Test with convert_fn explicitly set to None
comp = ArgPackComponent(convert_fn=None)
codeflash_output = comp._run_component(a=5); result = codeflash_output # 1.14μs -> 1.26μs (9.14% slower)
def test_convert_fn_returns_none():
# Test convert_fn that returns None for all inputs
comp = ArgPackComponent(convert_fn=lambda x: None)
codeflash_output = comp._run_component(a=1, b=2); result = codeflash_output # 1.59μs -> 1.75μs (9.37% slower)
--- EDGE TEST CASES ---
def test_empty_string_and_zero():
# Edge case: empty string and zero
comp = ArgPackComponent()
codeflash_output = comp._run_component(a="", b=0); result = codeflash_output # 1.21μs -> 1.33μs (9.19% slower)
def test_large_integer_and_float():
# Edge case: very large integer and float
comp = ArgPackComponent()
codeflash_output = comp._run_component(a=10**18, b=1.7e308); result = codeflash_output # 1.19μs -> 1.29μs (7.85% slower)
def test_kwargs_with_mutable_objects():
# Edge case: mutable objects (lists, dicts)
comp = ArgPackComponent()
d = {'x': 1}
l = [1,2,3]
codeflash_output = comp._run_component(a=d, b=l); result = codeflash_output # 1.21μs -> 1.26μs (3.50% slower)
def test_convert_fn_raises_exception():
# Edge case: convert_fn raises exception
def bad_fn(x): raise ValueError("fail")
comp = ArgPackComponent(convert_fn=bad_fn)
with pytest.raises(ValueError):
comp._run_component(a=1) # 1.84μs -> 2.18μs (15.7% slower)
def test_convert_fn_changes_type():
# Edge case: convert_fn changes type
comp = ArgPackComponent(convert_fn=lambda x: str(x))
codeflash_output = comp._run_component(a=1, b=2.5); result = codeflash_output # 3.50μs -> 3.67μs (4.65% slower)
def test_kwargs_with_same_value():
# Edge case: different keys, same value
comp = ArgPackComponent()
codeflash_output = comp._run_component(a=7, b=7); result = codeflash_output # 1.25μs -> 1.28μs (2.34% slower)
def test_convert_fn_with_side_effects():
# Edge: convert_fn modifies input (list)
def pop_first(x):
if isinstance(x, list) and x:
return x.pop(0)
return x
comp = ArgPackComponent(convert_fn=pop_first)
l = [1,2,3]
codeflash_output = comp._run_component(a=l); result = codeflash_output # 1.60μs -> 2.01μs (20.6% slower)
--- LARGE SCALE TEST CASES ---
def test_many_arguments():
# Large scale: many arguments
comp = ArgPackComponent()
kwargs = {f"key{i}": i for i in range(1000)}
codeflash_output = comp._run_component(**kwargs); result = codeflash_output # 77.1μs -> 46.3μs (66.5% faster)
def test_large_lists_as_values():
# Large scale: values are large lists
comp = ArgPackComponent()
big_list = list(range(1000))
codeflash_output = comp._run_component(a=big_list, b=big_list[::-1]); result = codeflash_output # 1.38μs -> 1.43μs (3.64% slower)
def test_large_scale_convert_fn():
# Large scale: convert_fn on many items
comp = ArgPackComponent(convert_fn=lambda x: x+1 if isinstance(x, int) else x)
kwargs = {f"key{i}": i for i in range(1000)}
codeflash_output = comp._run_component(**kwargs); result = codeflash_output # 142μs -> 106μs (33.9% faster)
def test_large_scale_mixed_types():
# Large scale: mixed types
comp = ArgPackComponent()
kwargs = {f"int{i}": i for i in range(500)}
kwargs.update({f"str{i}": str(i) for i in range(500)})
codeflash_output = comp._run_component(**kwargs); result = codeflash_output # 77.1μs -> 46.8μs (64.6% faster)
# The order should be all ints then all strs
expected = list(range(500)) + [str(i) for i in range(500)]
def test_large_scale_with_convert_fn_side_effect():
# Large scale: convert_fn with side effect (count calls)
call_count = {"count": 0}
def count_fn(x):
call_count["count"] += 1
return x
comp = ArgPackComponent(convert_fn=count_fn)
kwargs = {f"k{i}": i for i in range(1000)}
codeflash_output = comp._run_component(**kwargs); result = codeflash_output # 155μs -> 116μs (33.9% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from typing import Any, Callable, Optional
imports
import pytest
from llama_index.core.query_pipeline.components.argpacks import
ArgPackComponent
unit tests
1. Basic Test Cases
def test_no_arguments():
"""Test with no keyword arguments."""
component = ArgPackComponent()
codeflash_output = component._run_component(); result = codeflash_output # 774ns -> 1.16μs (33.3% slower)
def test_single_argument():
"""Test with a single keyword argument."""
component = ArgPackComponent()
codeflash_output = component._run_component(a=1); result = codeflash_output # 1.24μs -> 1.32μs (6.14% slower)
def test_multiple_arguments():
"""Test with multiple keyword arguments."""
component = ArgPackComponent()
codeflash_output = component._run_component(a=1, b=2, c=3); result = codeflash_output # 1.49μs -> 1.45μs (2.83% faster)
def test_with_convert_fn_identity():
"""Test with a convert_fn that returns the value unchanged."""
component = ArgPackComponent(convert_fn=lambda x: x)
codeflash_output = component._run_component(a=10, b=20); result = codeflash_output # 1.58μs -> 1.81μs (12.6% slower)
def test_with_convert_fn_increment():
"""Test with a convert_fn that increments integers by 1."""
component = ArgPackComponent(convert_fn=lambda x: x + 1)
codeflash_output = component._run_component(a=1, b=2, c=3); result = codeflash_output # 1.84μs -> 2.05μs (9.87% slower)
2. Edge Test Cases
def test_with_none_values():
"""Test with None as values."""
component = ArgPackComponent()
codeflash_output = component._run_component(a=None, b=None); result = codeflash_output # 1.21μs -> 1.34μs (9.91% slower)
def test_with_empty_string_and_zero():
"""Test with empty string and zero values."""
component = ArgPackComponent()
codeflash_output = component._run_component(a="", b=0); result = codeflash_output # 1.25μs -> 1.30μs (3.94% slower)
def test_with_mixed_types():
"""Test with mixed types: int, str, list, dict, float, bool."""
component = ArgPackComponent()
codeflash_output = component._run_component(
a=42, b="hello", c=[1,2], d={"k": "v"}, e=3.14, f=True
); result = codeflash_output # 1.99μs -> 1.93μs (3.32% faster)
def test_with_convert_fn_to_str():
"""Test with a convert_fn that converts everything to string."""
component = ArgPackComponent(convert_fn=str)
codeflash_output = component._run_component(a=1, b=None, c=[1,2]); result = codeflash_output # 3.05μs -> 3.15μs (3.14% slower)
def test_with_convert_fn_raises():
"""Test with a convert_fn that raises an exception for a certain input."""
def raise_on_none(x):
if x is None:
raise ValueError("None not allowed")
return x
component = ArgPackComponent(convert_fn=raise_on_none)
with pytest.raises(ValueError):
component._run_component(a=1, b=None) # 2.35μs -> 2.47μs (4.70% slower)
def test_with_keyword_named_output():
"""Test with a keyword argument named 'output'."""
component = ArgPackComponent()
codeflash_output = component._run_component(output=123, a=456); result = codeflash_output # 1.31μs -> 1.35μs (2.82% slower)
def test_with_duplicate_values():
"""Test with duplicate values for different keys."""
component = ArgPackComponent()
codeflash_output = component._run_component(a=1, b=1, c=1); result = codeflash_output # 1.38μs -> 1.45μs (4.43% slower)
def test_with_large_integer():
"""Test with a very large integer value."""
component = ArgPackComponent()
big_int = 10**100
codeflash_output = component._run_component(a=big_int); result = codeflash_output # 1.13μs -> 1.22μs (7.43% slower)
def test_with_mutable_input_and_conversion():
"""Test that convert_fn can mutate input values."""
def append_foo(x):
if isinstance(x, list):
x.append("foo")
return x
component = ArgPackComponent(convert_fn=append_foo)
input_list = [1,2,3]
codeflash_output = component._run_component(a=input_list); result = codeflash_output # 1.68μs -> 2.03μs (17.2% slower)
3. Large Scale Test Cases
def test_many_arguments():
"""Test with a large number of keyword arguments (up to 1000)."""
component = ArgPackComponent()
kwargs = {f"key{i}": i for i in range(1000)}
codeflash_output = component._run_component(**kwargs); result = codeflash_output # 76.9μs -> 45.3μs (69.7% faster)
def test_many_arguments_with_conversion():
"""Test with a large number of arguments and a convert_fn."""
component = ArgPackComponent(convert_fn=lambda x: x * 2)
kwargs = {f"key{i}": i for i in range(1000)}
codeflash_output = component._run_component(**kwargs); result = codeflash_output # 132μs -> 95.6μs (38.6% faster)
def test_large_nested_structures():
"""Test with large nested data structures as values."""
component = ArgPackComponent()
nested_list = [[j for j in range(10)] for i in range(100)]
nested_dict = {f"k{i}": [i] * 10 for i in range(100)}
codeflash_output = component._run_component(a=nested_list, b=nested_dict); result = codeflash_output # 1.40μs -> 1.40μs (0.142% slower)
def test_large_string_values():
"""Test with very large string values."""
component = ArgPackComponent()
big_string = "x" * 10000
codeflash_output = component._run_component(a=big_string, b=big_string); result = codeflash_output # 1.31μs -> 1.33μs (1.57% slower)
def test_performance_with_large_inputs():
"""Test that the function does not crash or slow down unreasonably with large inputs."""
component = ArgPackComponent(convert_fn=lambda x: x)
# 1000 elements, each a list of 100 integers
kwargs = {f"k{i}": list(range(100)) for i in range(1000)}
codeflash_output = component._run_component(**kwargs); result = codeflash_output # 125μs -> 85.7μs (46.6% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes
git checkout codeflash/optimize-ArgPackComponent._run_component-mhvino2tand push.