⚡️ Speed up function calc_percentage by 7%
#140
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
📄 7% (0.07x) speedup for
calc_percentageininvokeai/app/util/step_callback.py⏱️ Runtime :
73.1 microseconds→68.4 microseconds(best of214runs)📝 Explanation and details
The optimization replaces Python's
floor()function with integer division (//) for computing denominator and numerator values in the order=2 case. This change provides a 6% speedup by eliminating function call overhead and avoiding floating-point arithmetic.Key changes:
floor(total_steps / 2)→total_steps // 2floor(step / 2)→step // 2math.floorimportWhy this is faster:
Integer division (
//) is a native Python operator that directly computes floor division in a single operation, whilefloor()requires a function call, floating-point division, and then floor conversion. This eliminates both the function call overhead and the intermediate float operations.Performance analysis from tests:
total_steps=1)The mathematical behavior remains identical since
//andfloor()produce the same results for positive integers, and the function handles edge cases (zero denominators) the same way. This is a pure performance optimization with no behavioral changes.✅ Correctness verification report:
🌀 Generated Regression Tests and Runtime
from math import floor
imports
import pytest
from invokeai.app.util.step_callback import calc_percentage
--- Mock definition of PipelineIntermediateState for testing ---
class PipelineIntermediateState:
def init(self, step, total_steps, order):
self.step = step
self.total_steps = total_steps
self.order = order
from invokeai.app.util.step_callback import calc_percentage
------------------- UNIT TESTS -------------------
1. BASIC TEST CASES
def test_order1_basic_middle():
# order=1, step in the middle
state = PipelineIntermediateState(step=5, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 919ns -> 970ns (5.26% slower)
def test_order1_basic_start():
# order=1, step at start
state = PipelineIntermediateState(step=0, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 755ns -> 826ns (8.60% slower)
def test_order1_basic_end():
# order=1, step at end
state = PipelineIntermediateState(step=10, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 746ns -> 737ns (1.22% faster)
def test_order2_basic_middle():
# order=2, step in the middle
state = PipelineIntermediateState(step=4, total_steps=10, order=2)
# floor(4/2) = 2, floor(10/2) = 5, so 2/5 = 0.4
codeflash_output = calc_percentage(state) # 1.01μs -> 817ns (24.0% faster)
def test_order2_basic_start():
# order=2, step at start
state = PipelineIntermediateState(step=0, total_steps=10, order=2)
codeflash_output = calc_percentage(state) # 929ns -> 869ns (6.90% faster)
def test_order2_basic_end():
# order=2, step at end
state = PipelineIntermediateState(step=10, total_steps=10, order=2)
# floor(10/2) = 5, floor(10/2) = 5, so 5/5 = 1.0
codeflash_output = calc_percentage(state) # 869ns -> 797ns (9.03% faster)
2. EDGE TEST CASES
def test_total_steps_zero_order1():
# total_steps=0 should always return 0.0 regardless of step or order
state = PipelineIntermediateState(step=5, total_steps=0, order=1)
codeflash_output = calc_percentage(state) # 499ns -> 527ns (5.31% slower)
def test_total_steps_zero_order2():
state = PipelineIntermediateState(step=5, total_steps=0, order=2)
codeflash_output = calc_percentage(state) # 493ns -> 500ns (1.40% slower)
def test_order2_total_steps_1():
# floor(1/2) = 0, denominator is zero, should return 0.0
state = PipelineIntermediateState(step=0, total_steps=1, order=2)
codeflash_output = calc_percentage(state) # 883ns -> 695ns (27.1% faster)
state = PipelineIntermediateState(step=1, total_steps=1, order=2)
codeflash_output = calc_percentage(state) # 469ns -> 379ns (23.7% faster)
def test_order2_total_steps_2():
# floor(2/2) = 1, so denominator = 1
state = PipelineIntermediateState(step=0, total_steps=2, order=2)
# floor(0/2)=0, 0/1=0.0
codeflash_output = calc_percentage(state) # 845ns -> 828ns (2.05% faster)
state = PipelineIntermediateState(step=1, total_steps=2, order=2)
# floor(1/2)=0, 0/1=0.0
codeflash_output = calc_percentage(state) # 504ns -> 481ns (4.78% faster)
state = PipelineIntermediateState(step=2, total_steps=2, order=2)
# floor(2/2)=1, 1/1=1.0
codeflash_output = calc_percentage(state) # 400ns -> 340ns (17.6% faster)
def test_order1_step_greater_than_total_steps():
# step > total_steps, should return >1.0 for order=1
state = PipelineIntermediateState(step=15, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 636ns -> 651ns (2.30% slower)
def test_order2_step_greater_than_total_steps():
# step > total_steps, should still work for order=2
state = PipelineIntermediateState(step=12, total_steps=10, order=2)
# floor(12/2)=6, floor(10/2)=5, 6/5=1.2
codeflash_output = calc_percentage(state) # 837ns -> 717ns (16.7% faster)
def test_order1_negative_step():
# negative step, should return negative percentage for order=1
state = PipelineIntermediateState(step=-1, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 665ns -> 667ns (0.300% slower)
def test_order2_negative_step():
# negative step, should return negative percentage for order=2
state = PipelineIntermediateState(step=-2, total_steps=10, order=2)
# floor(-2/2) = -1, floor(10/2)=5, -1/5=-0.2
codeflash_output = calc_percentage(state) # 854ns -> 734ns (16.3% faster)
def test_order1_negative_total_steps():
# negative total_steps, should return negative percentage for order=1
state = PipelineIntermediateState(step=5, total_steps=-10, order=1)
codeflash_output = calc_percentage(state) # 670ns -> 598ns (12.0% faster)
def test_order2_negative_total_steps():
# negative total_steps, denominator negative
state = PipelineIntermediateState(step=4, total_steps=-10, order=2)
# floor(4/2)=2, floor(-10/2)=-5, 2/-5=-0.4
codeflash_output = calc_percentage(state) # 887ns -> 793ns (11.9% faster)
def test_order2_step_odd_values():
# order=2, step is odd, ensure floor division is correct
state = PipelineIntermediateState(step=5, total_steps=9, order=2)
# floor(5/2)=2, floor(9/2)=4, 2/4=0.5
codeflash_output = calc_percentage(state) # 876ns -> 739ns (18.5% faster)
def test_order2_total_steps_odd():
# order=2, total_steps odd
state = PipelineIntermediateState(step=6, total_steps=9, order=2)
# floor(6/2)=3, floor(9/2)=4, 3/4=0.75
codeflash_output = calc_percentage(state) # 850ns -> 692ns (22.8% faster)
def test_order1_float_result():
# order=1, ensure float division
state = PipelineIntermediateState(step=3, total_steps=4, order=1)
codeflash_output = calc_percentage(state) # 661ns -> 645ns (2.48% faster)
def test_order2_float_result():
# order=2, ensure float division
state = PipelineIntermediateState(step=3, total_steps=4, order=2)
# floor(3/2)=1, floor(4/2)=2, 1/2=0.5
codeflash_output = calc_percentage(state) # 845ns -> 753ns (12.2% faster)
def test_order2_step_negative_and_total_steps_odd():
# order=2, negative step and odd total_steps
state = PipelineIntermediateState(step=-3, total_steps=9, order=2)
# floor(-3/2) = -2, floor(9/2)=4, -2/4 = -0.5
codeflash_output = calc_percentage(state) # 828ns -> 765ns (8.24% faster)
def test_order2_step_and_total_steps_zero():
# Both step and total_steps are zero
state = PipelineIntermediateState(step=0, total_steps=0, order=2)
codeflash_output = calc_percentage(state) # 501ns -> 537ns (6.70% slower)
def test_order1_step_and_total_steps_zero():
# Both step and total_steps are zero
state = PipelineIntermediateState(step=0, total_steps=0, order=1)
codeflash_output = calc_percentage(state) # 496ns -> 502ns (1.20% slower)
3. LARGE SCALE TEST CASES
def test_order1_large_scale():
# order=1, large step and total_steps
state = PipelineIntermediateState(step=500, total_steps=1000, order=1)
codeflash_output = calc_percentage(state) # 695ns -> 658ns (5.62% faster)
def test_order2_large_scale_even():
# order=2, large even step and total_steps
state = PipelineIntermediateState(step=800, total_steps=1000, order=2)
# floor(800/2)=400, floor(1000/2)=500, 400/500=0.8
codeflash_output = calc_percentage(state) # 1.02μs -> 928ns (10.5% faster)
def test_order2_large_scale_odd():
# order=2, large odd step and total_steps
state = PipelineIntermediateState(step=801, total_steps=999, order=2)
def test_order2_large_scale_step_greater_than_total_steps():
# order=2, step > total_steps
state = PipelineIntermediateState(step=1200, total_steps=1000, order=2)
# floor(1200/2)=600, floor(1000/2)=500, 600/500=1.2
codeflash_output = calc_percentage(state) # 971ns -> 929ns (4.52% faster)
def test_order1_large_scale_step_greater_than_total_steps():
# order=1, step > total_steps
state = PipelineIntermediateState(step=1500, total_steps=1000, order=1)
codeflash_output = calc_percentage(state) # 658ns -> 689ns (4.50% slower)
def test_order2_large_scale_negative_step():
# order=2, large negative step
state = PipelineIntermediateState(step=-200, total_steps=1000, order=2)
# floor(-200/2) = -100, floor(1000/2)=500, -100/500=-0.2
codeflash_output = calc_percentage(state) # 1.01μs -> 913ns (10.6% faster)
def test_order1_large_scale_negative_total_steps():
# order=1, large negative total_steps
state = PipelineIntermediateState(step=500, total_steps=-1000, order=1)
codeflash_output = calc_percentage(state) # 643ns -> 663ns (3.02% slower)
def test_order2_large_scale_negative_total_steps():
# order=2, large negative total_steps
state = PipelineIntermediateState(step=400, total_steps=-1000, order=2)
# floor(400/2)=200, floor(-1000/2)=-500, 200/-500=-0.4
codeflash_output = calc_percentage(state) # 1.03μs -> 882ns (16.6% faster)
def test_order2_large_scale_step_and_total_steps_zero():
# order=2, large scale, step=0, total_steps=0
state = PipelineIntermediateState(step=0, total_steps=0, order=2)
codeflash_output = calc_percentage(state) # 490ns -> 490ns (0.000% faster)
def test_order1_large_scale_step_and_total_steps_zero():
# order=1, large scale, step=0, total_steps=0
state = PipelineIntermediateState(step=0, total_steps=0, order=1)
codeflash_output = calc_percentage(state) # 491ns -> 502ns (2.19% slower)
4. PARAMETRIZED TESTS FOR BROAD COVERAGE
@pytest.mark.parametrize(
"step,total_steps,order,expected",
[
# order 1, basic
(0, 10, 1, 0.0),
(5, 10, 1, 0.5),
(10, 10, 1, 1.0),
# order 2, basic
(0, 10, 2, 0.0),
(4, 10, 2, 0.4),
(10, 10, 2, 1.0),
# order 2, edge
(2, 2, 2, 1.0),
(1, 2, 2, 0.0),
(0, 2, 2, 0.0),
# negative step
(-1, 10, 1, -0.1),
(-2, 10, 2, -0.2),
# negative total_steps
(5, -10, 1, -0.5),
(4, -10, 2, -0.4),
# step > total_steps
(15, 10, 1, 1.5),
(12, 10, 2, 1.2),
# large scale
(500, 1000, 1, 0.5),
(800, 1000, 2, 0.8),
# zero total_steps
(5, 0, 1, 0.0),
(5, 0, 2, 0.0),
]
)
def test_calc_percentage_parametrized(step, total_steps, order, expected):
# Parametrized test for broad coverage
state = PipelineIntermediateState(step=step, total_steps=total_steps, order=order)
codeflash_output = calc_percentage(state); result = codeflash_output # 16.0μs -> 14.6μs (9.38% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
#------------------------------------------------
from math import floor
imports
import pytest
from invokeai.app.util.step_callback import calc_percentage
Minimal mock for PipelineIntermediateState
class PipelineIntermediateState:
def init(self, step, total_steps, order):
self.step = step
self.total_steps = total_steps
self.order = order
from invokeai.app.util.step_callback import calc_percentage
unit tests
--- Basic Test Cases ---
def test_basic_order_1_middle_step():
# Normal case, order 1, halfway through
state = PipelineIntermediateState(step=5, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 771ns -> 779ns (1.03% slower)
def test_basic_order_1_first_step():
# First step, order 1
state = PipelineIntermediateState(step=0, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 716ns -> 732ns (2.19% slower)
def test_basic_order_1_last_step():
# Last step, order 1
state = PipelineIntermediateState(step=10, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 710ns -> 660ns (7.58% faster)
def test_basic_order_2_middle_step():
# Normal case, order 2, halfway through
state = PipelineIntermediateState(step=5, total_steps=10, order=2)
# denominator = floor(10/2) = 5, numerator = floor(5/2) = 2
codeflash_output = calc_percentage(state) # 1.01μs -> 804ns (25.2% faster)
def test_basic_order_2_first_step():
# First step, order 2
state = PipelineIntermediateState(step=0, total_steps=10, order=2)
codeflash_output = calc_percentage(state) # 954ns -> 814ns (17.2% faster)
def test_basic_order_2_last_step():
# Last step, order 2
state = PipelineIntermediateState(step=10, total_steps=10, order=2)
# denominator = 5, numerator = floor(10/2) = 5
codeflash_output = calc_percentage(state) # 876ns -> 771ns (13.6% faster)
--- Edge Test Cases ---
def test_total_steps_zero_order_1():
# total_steps = 0 should always return 0.0
state = PipelineIntermediateState(step=5, total_steps=0, order=1)
codeflash_output = calc_percentage(state) # 496ns -> 529ns (6.24% slower)
def test_total_steps_zero_order_2():
# total_steps = 0 should always return 0.0
state = PipelineIntermediateState(step=5, total_steps=0, order=2)
codeflash_output = calc_percentage(state) # 470ns -> 535ns (12.1% slower)
def test_total_steps_one_order_2():
# order=2, total_steps=1, denominator=floor(1/2)=0, should return 0.0
state = PipelineIntermediateState(step=1, total_steps=1, order=2)
codeflash_output = calc_percentage(state) # 950ns -> 636ns (49.4% faster)
def test_total_steps_two_order_2():
# order=2, total_steps=2, denominator=floor(2/2)=1
state = PipelineIntermediateState(step=2, total_steps=2, order=2)
# numerator = floor(2/2) = 1, denominator = 1
codeflash_output = calc_percentage(state) # 944ns -> 813ns (16.1% faster)
def test_step_greater_than_total_steps_order_1():
# step > total_steps, order=1
state = PipelineIntermediateState(step=15, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 671ns -> 703ns (4.55% slower)
def test_step_greater_than_total_steps_order_2():
# step > total_steps, order=2
state = PipelineIntermediateState(step=15, total_steps=10, order=2)
# denominator = 5, numerator = floor(15/2) = 7
codeflash_output = calc_percentage(state) # 928ns -> 785ns (18.2% faster)
def test_step_negative_order_1():
# Negative step, order=1
state = PipelineIntermediateState(step=-5, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 670ns -> 665ns (0.752% faster)
def test_step_negative_order_2():
# Negative step, order=2
state = PipelineIntermediateState(step=-5, total_steps=10, order=2)
# numerator = floor(-5/2) = -3, denominator = 5
codeflash_output = calc_percentage(state) # 909ns -> 798ns (13.9% faster)
def test_total_steps_negative_order_1():
# Negative total_steps, order=1
state = PipelineIntermediateState(step=5, total_steps=-10, order=1)
codeflash_output = calc_percentage(state) # 667ns -> 643ns (3.73% faster)
def test_total_steps_negative_order_2():
# Negative total_steps, order=2
state = PipelineIntermediateState(step=5, total_steps=-10, order=2)
# denominator = floor(-10/2) = -5, numerator = floor(5/2) = 2
codeflash_output = calc_percentage(state) # 952ns -> 790ns (20.5% faster)
def test_step_and_total_steps_zero_order_1():
# Both step and total_steps are zero, order=1
state = PipelineIntermediateState(step=0, total_steps=0, order=1)
codeflash_output = calc_percentage(state) # 515ns -> 538ns (4.28% slower)
def test_step_and_total_steps_zero_order_2():
# Both step and total_steps are zero, order=2
state = PipelineIntermediateState(step=0, total_steps=0, order=2)
codeflash_output = calc_percentage(state) # 517ns -> 516ns (0.194% faster)
def test_step_and_total_steps_one_order_2():
# step=1, total_steps=1, order=2
state = PipelineIntermediateState(step=1, total_steps=1, order=2)
codeflash_output = calc_percentage(state) # 907ns -> 666ns (36.2% faster)
def test_step_fractional_order_1():
# step is float, order=1
state = PipelineIntermediateState(step=2.5, total_steps=10, order=1)
codeflash_output = calc_percentage(state) # 783ns -> 791ns (1.01% slower)
def test_step_fractional_order_2():
# step is float, order=2
state = PipelineIntermediateState(step=2.5, total_steps=10, order=2)
# numerator = floor(2.5/2) = 1, denominator = 5
codeflash_output = calc_percentage(state) # 1.03μs -> 1.32μs (21.7% slower)
def test_total_steps_fractional_order_1():
# total_steps is float, order=1
state = PipelineIntermediateState(step=5, total_steps=10.5, order=1)
codeflash_output = calc_percentage(state) # 803ns -> 857ns (6.30% slower)
def test_total_steps_fractional_order_2():
# total_steps is float, order=2
state = PipelineIntermediateState(step=5, total_steps=10.5, order=2)
# denominator = floor(10.5/2) = floor(5.25) = 5, numerator = floor(5/2) = 2
codeflash_output = calc_percentage(state) # 1.09μs -> 1.33μs (18.1% slower)
def test_order_other_than_1_or_2():
# order not 1 or 2, should treat as order=1
state = PipelineIntermediateState(step=5, total_steps=10, order=3)
codeflash_output = calc_percentage(state) # 690ns -> 727ns (5.09% slower)
--- Large Scale Test Cases ---
def test_large_total_steps_order_1():
# Large total_steps, order=1
state = PipelineIntermediateState(step=500, total_steps=1000, order=1)
codeflash_output = calc_percentage(state) # 643ns -> 663ns (3.02% slower)
def test_large_total_steps_order_2():
# Large total_steps, order=2
state = PipelineIntermediateState(step=500, total_steps=1000, order=2)
# denominator = floor(1000/2) = 500, numerator = floor(500/2) = 250
codeflash_output = calc_percentage(state) # 1.07μs -> 875ns (22.2% faster)
def test_large_steps_near_end_order_1():
# step near total_steps, order=1
state = PipelineIntermediateState(step=999, total_steps=1000, order=1)
codeflash_output = calc_percentage(state) # 610ns -> 642ns (4.98% slower)
def test_large_steps_near_end_order_2():
# step near total_steps, order=2
state = PipelineIntermediateState(step=999, total_steps=1000, order=2)
# denominator = 500, numerator = floor(999/2) = 499
codeflash_output = calc_percentage(state) # 1.02μs -> 920ns (11.4% faster)
def test_large_steps_exceeding_total_order_1():
# step > total_steps, order=1
state = PipelineIntermediateState(step=1200, total_steps=1000, order=1)
codeflash_output = calc_percentage(state) # 677ns -> 667ns (1.50% faster)
def test_large_steps_exceeding_total_order_2():
# step > total_steps, order=2
state = PipelineIntermediateState(step=1200, total_steps=1000, order=2)
# denominator = 500, numerator = floor(1200/2) = 600
codeflash_output = calc_percentage(state) # 982ns -> 868ns (13.1% faster)
def test_large_scale_batch_order_1():
# Batch test for many steps, order=1
total_steps = 1000
for step in range(0, total_steps+1, 100):
state = PipelineIntermediateState(step=step, total_steps=total_steps, order=1)
codeflash_output = calc_percentage(state) # 2.98μs -> 3.00μs (0.434% slower)
def test_large_scale_batch_order_2():
# Batch test for many steps, order=2
total_steps = 1000
denominator = floor(total_steps/2)
for step in range(0, total_steps+1, 100):
state = PipelineIntermediateState(step=step, total_steps=total_steps, order=2)
numerator = floor(step/2)
expected = numerator/denominator if denominator != 0 else 0.0
codeflash_output = calc_percentage(state) # 4.22μs -> 3.76μs (12.4% faster)
codeflash_output is used to check that the output of the original code is the same as that of the optimized code.
To edit these changes
git checkout codeflash/optimize-calc_percentage-mhvl1qpiand push.