diff --git a/src/Query/ItemQueryBuilder.php b/src/Query/ItemQueryBuilder.php index 3ad11bdc770..14fb2ae2b1f 100644 --- a/src/Query/ItemQueryBuilder.php +++ b/src/Query/ItemQueryBuilder.php @@ -2,6 +2,7 @@ namespace Statamic\Query; +use Generator; use Illuminate\Support\Collection; class ItemQueryBuilder extends IteratorBuilder @@ -20,6 +21,13 @@ protected function getBaseItems() return $this->items; } + protected function getBaseItemsLazy(): Generator + { + foreach ($this->items as $item) { + yield $item; + } + } + public function whereStatus($status) { return $this->where('status', $status); diff --git a/src/Query/IteratorBuilder.php b/src/Query/IteratorBuilder.php index 3025b3a55ed..ed79ed3e8fb 100644 --- a/src/Query/IteratorBuilder.php +++ b/src/Query/IteratorBuilder.php @@ -2,6 +2,7 @@ namespace Statamic\Query; +use Generator; use Statamic\Support\Arr; abstract class IteratorBuilder extends Builder @@ -39,11 +40,66 @@ public function pluck($column, $key = null) protected function getFilteredItems() { - $items = $this->getBaseItems(); + // Can't optimize: no limit, has orderBy, or randomize + // These require all items to be loaded first + if (! $this->limit || $this->orderBys || $this->randomize) { + $items = $this->getBaseItems(); - $items = $this->filterWheres($items); + return $this->filterWheres($items); + } + + // No wheres - just get limited items directly + if (empty($this->wheres)) { + return $this->getBaseItemsLimited(); + } + + // Has limit AND wheres - batch hydrate until we have enough + return $this->getFilteredItemsInBatches(); + } + + protected function getBaseItemsLimited() + { + $needed = ($this->offset ?? 0) + $this->limit; + $collected = collect(); + + foreach ($this->getBaseItemsLazy() as $item) { + $collected->push($item); + if ($collected->count() >= $needed) { + break; + } + } - return $items; + return $collected; + } + + protected function getFilteredItemsInBatches() + { + $needed = ($this->offset ?? 0) + $this->limit; + $batchSize = max(50, $this->limit * 2); + $collected = collect(); + $batch = collect(); + + foreach ($this->getBaseItemsLazy() as $item) { + $batch->push($item); + + if ($batch->count() >= $batchSize) { + $filtered = $this->filterWheres($batch); + $collected = $collected->concat($filtered); + $batch = collect(); + + if ($collected->count() >= $needed) { + break; + } + } + } + + // Process remaining items in final partial batch + if ($batch->isNotEmpty() && $collected->count() < $needed) { + $filtered = $this->filterWheres($batch); + $collected = $collected->concat($filtered); + } + + return $collected; } protected function getFilteredAndLimitedItems() @@ -362,6 +418,8 @@ protected function operatorToCarbonMethod($operator) abstract protected function getBaseItems(); + abstract protected function getBaseItemsLazy(): Generator; + public function inRandomOrder() { $this->randomize = true; diff --git a/src/Search/QueryBuilder.php b/src/Search/QueryBuilder.php index fc01ad19f68..bd006373421 100644 --- a/src/Search/QueryBuilder.php +++ b/src/Search/QueryBuilder.php @@ -2,6 +2,7 @@ namespace Statamic\Search; +use Generator; use Statamic\Contracts\Search\Result; use Statamic\Data\DataCollection; use Statamic\Query\Concerns\FakesQueries; @@ -55,6 +56,35 @@ public function getBaseItems() return $this->transformResults($results); } + protected function getBaseItemsLazy(): Generator + { + $results = $this->getSearchResults($this->query); + + // If withoutData mode, yield PlainResults directly (cheap, no hydration) + if (! $this->withData) { + foreach ($results as $i => $result) { + $plainResult = new PlainResult($result); + $plainResult->setIndex($this->index)->setScore($result['search_score'] ?? null); + yield $plainResult; + } + + return; + } + + // With data mode - batch hydrate to reduce database queries + // Use smaller batches when we know the limit and don't need filtering + $batchSize = $this->limit && empty($this->wheres) && empty($this->orderBys) && ! $this->randomize + ? ($this->offset ?? 0) + $this->limit + : 50; + + foreach ($this->collect($results)->chunk($batchSize) as $batch) { + $hydrated = $this->transformResults($batch); + foreach ($hydrated as $item) { + yield $item; + } + } + } + public function transformResults($results) { if (! $this->withData) { diff --git a/tests/Fakes/Query/HydrationTrackingQueryBuilder.php b/tests/Fakes/Query/HydrationTrackingQueryBuilder.php new file mode 100644 index 00000000000..f98a9dd1d29 --- /dev/null +++ b/tests/Fakes/Query/HydrationTrackingQueryBuilder.php @@ -0,0 +1,48 @@ +results = $results; + $this->hydrationCounter = &$counter; + parent::__construct(Mockery::mock(Index::class)); + } + + public function getSearchResults($query) + { + return $this->results; + } + + public function getBaseItems() + { + return $this->collect($this->results)->map(function ($item) { + $this->hydrationCounter++; + $result = new PlainResult($item); + $result->setScore($item['search_score'] ?? null); + + return $result; + }); + } + + protected function getBaseItemsLazy(): Generator + { + foreach ($this->results as $item) { + $this->hydrationCounter++; + $result = new PlainResult($item); + $result->setScore($item['search_score'] ?? null); + yield $result; + } + } +} diff --git a/tests/Fakes/Query/TestIteratorBuilder.php b/tests/Fakes/Query/TestIteratorBuilder.php new file mode 100644 index 00000000000..a2137440261 --- /dev/null +++ b/tests/Fakes/Query/TestIteratorBuilder.php @@ -0,0 +1,36 @@ +items = $items; + $this->loadCounter = &$counter; + } + + protected function getBaseItems() + { + $this->items->each(function () { + $this->loadCounter++; + }); + + return new DataCollection($this->items->all()); + } + + protected function getBaseItemsLazy(): Generator + { + foreach ($this->items as $item) { + $this->loadCounter++; + yield $item; + } + } +} diff --git a/tests/Query/IteratorBuilderTest.php b/tests/Query/IteratorBuilderTest.php new file mode 100644 index 00000000000..32e81616574 --- /dev/null +++ b/tests/Query/IteratorBuilderTest.php @@ -0,0 +1,216 @@ +map(fn ($i) => ['id' => $i, 'value' => "item-$i"]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->limit(10)->get(); + + $this->assertCount(10, $results); + $this->assertEquals(10, $loadCount, 'Should only load 10 items, not all 10000'); + } + + #[Test] + public function it_optimizes_limit_with_offset() + { + $loadCount = 0; + $items = collect(range(1, 10000))->map(fn ($i) => ['id' => $i, 'value' => "item-$i"]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->offset(5)->limit(10)->get(); + + $this->assertCount(10, $results); + $this->assertEquals(15, $loadCount, 'Should load offset + limit items'); + $this->assertEquals(6, $results->first()['id'], 'First result should be offset by 5'); + } + + #[Test] + public function it_batches_with_wheres() + { + $loadCount = 0; + $items = collect(range(1, 10000))->map(fn ($i) => [ + 'id' => $i, + 'value' => "item-$i", + 'even' => $i % 2 === 0, + ]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->where('even', true)->limit(10)->get(); + + $this->assertCount(10, $results); + // With 50% match rate and batch size of 50, should need ~1-2 batches + $this->assertLessThan(150, $loadCount, 'Should batch and stop early, not load all 10000'); + $this->assertTrue($results->every(fn ($item) => $item['even'] === true)); + } + + #[Test] + public function it_batches_with_wheres_low_match_rate() + { + $loadCount = 0; + // Only 10% of items match (every 10th item) + $items = collect(range(1, 10000))->map(fn ($i) => [ + 'id' => $i, + 'value' => "item-$i", + 'matches' => $i % 10 === 0, + ]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->where('matches', true)->limit(10)->get(); + + $this->assertCount(10, $results); + // With 10% match rate, need ~100 items to find 10 matches + // Batch size is max(50, 10*2) = 50, so ~2-3 batches + $this->assertLessThan(300, $loadCount, 'Should batch efficiently with low match rate'); + $this->assertTrue($results->every(fn ($item) => $item['matches'] === true)); + } + + #[Test] + public function it_loads_all_when_has_orderby() + { + $loadCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => ['id' => $i, 'value' => "item-$i"]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->orderBy('id', 'desc')->limit(10)->get(); + + $this->assertCount(10, $results); + $this->assertEquals(100, $loadCount, 'Must load all items to sort'); + $this->assertEquals(100, $results->first()['id'], 'First result should be highest id'); + } + + #[Test] + public function it_loads_all_when_randomize() + { + $loadCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => ['id' => $i, 'value' => "item-$i"]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->inRandomOrder()->limit(10)->get(); + + $this->assertCount(10, $results); + $this->assertEquals(100, $loadCount, 'Must load all items to randomize'); + } + + #[Test] + public function it_loads_all_when_no_limit() + { + $loadCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => ['id' => $i, 'value' => "item-$i"]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->get(); + + $this->assertCount(100, $results); + $this->assertEquals(100, $loadCount, 'Should load all items when no limit'); + } + + #[Test] + public function it_handles_limit_greater_than_total() + { + $loadCount = 0; + $items = collect(range(1, 50))->map(fn ($i) => ['id' => $i, 'value' => "item-$i"]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->limit(100)->get(); + + $this->assertCount(50, $results); + $this->assertEquals(50, $loadCount, 'Should load all available items'); + } + + #[Test] + public function it_handles_offset_near_end() + { + $loadCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => ['id' => $i, 'value' => "item-$i"]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->offset(95)->limit(10)->get(); + + $this->assertCount(5, $results); + $this->assertEquals(100, $loadCount, 'Should load up to available items'); + } + + #[Test] + public function it_handles_wheres_with_no_matches() + { + $loadCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => [ + 'id' => $i, + 'value' => "item-$i", + 'status' => 'draft', + ]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->where('status', 'published')->limit(10)->get(); + + $this->assertCount(0, $results); + $this->assertEquals(100, $loadCount, 'Should scan all items when no matches found'); + } + + #[Test] + public function it_handles_complex_wheres() + { + $loadCount = 0; + $items = collect(range(1, 10000))->map(fn ($i) => [ + 'id' => $i, + 'value' => "item-$i", + 'status' => $i % 3 === 0 ? 'published' : 'draft', + 'featured' => $i % 5 === 0, + ]); + + $builder = new TestIteratorBuilder($items, $loadCount); + // Items matching: divisible by 3 AND divisible by 5 = divisible by 15 + $results = $builder + ->where('status', 'published') + ->where('featured', true) + ->limit(10) + ->get(); + + $this->assertCount(10, $results); + // ~6.67% match rate (every 15th item), should need ~150 items + $this->assertLessThan(500, $loadCount, 'Should batch efficiently with complex wheres'); + } + + #[Test] + public function it_preserves_item_order_without_orderby() + { + $loadCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => ['id' => $i, 'value' => "item-$i"]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder->limit(10)->get(); + + $this->assertEquals([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], $results->pluck('id')->all()); + } + + #[Test] + public function it_works_with_whereIn() + { + $loadCount = 0; + $items = collect(range(1, 10000))->map(fn ($i) => [ + 'id' => $i, + 'value' => "item-$i", + 'category' => 'cat-'.($i % 100), + ]); + + $builder = new TestIteratorBuilder($items, $loadCount); + $results = $builder + ->whereIn('category', ['cat-1', 'cat-2', 'cat-3']) + ->limit(10) + ->get(); + + $this->assertCount(10, $results); + $this->assertLessThan(500, $loadCount); + } +} diff --git a/tests/Search/QueryBuilderPerformanceTest.php b/tests/Search/QueryBuilderPerformanceTest.php new file mode 100644 index 00000000000..7e74c0c0a66 --- /dev/null +++ b/tests/Search/QueryBuilderPerformanceTest.php @@ -0,0 +1,247 @@ +map(fn ($i) => ['reference' => "entry::item-$i"]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData()->limit(10)->get(); + + $this->assertCount(10, $results); + $this->assertLessThanOrEqual(10, $hydrationCount, 'Should only hydrate 10 items, not all 10000'); + } + + #[Test] + public function it_batches_hydration_when_has_wheres() + { + $hydrationCount = 0; + // Create items where only 10% match the filter + $items = collect(range(1, 10000))->map(fn ($i) => [ + 'reference' => "entry::item-$i", + 'status' => $i % 10 === 0 ? 'published' : 'draft', + ]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData() + ->where('status', 'published') + ->limit(10) + ->get(); + + $this->assertCount(10, $results); + // Should hydrate ~100-200 items (enough batches to find 10 matches), not all 10000 + $this->assertLessThan(500, $hydrationCount, 'Should batch hydrate, not hydrate all 10000'); + } + + #[Test] + public function it_loads_all_when_has_orderby() + { + $hydrationCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => [ + 'reference' => "entry::item-$i", + 'title' => "Title $i", + ]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData() + ->orderBy('title') + ->limit(10) + ->get(); + + $this->assertCount(10, $results); + // Must load all 100 to sort + $this->assertEquals(100, $hydrationCount, 'Must hydrate all items to sort'); + } + + #[Test] + public function it_handles_offset_with_limit_optimization() + { + $hydrationCount = 0; + $items = collect(range(1, 10000))->map(fn ($i) => ['reference' => "entry::item-$i"]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData()->offset(100)->limit(10)->get(); + + $this->assertCount(10, $results); + $this->assertLessThanOrEqual(110, $hydrationCount, 'Should only hydrate offset + limit items'); + } + + #[Test] + public function it_handles_large_limit_efficiently() + { + $hydrationCount = 0; + $items = collect(range(1, 10000))->map(fn ($i) => ['reference' => "entry::item-$i"]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData()->limit(1000)->get(); + + $this->assertCount(1000, $results); + $this->assertEquals(1000, $hydrationCount, 'Should hydrate exactly 1000 items'); + } + + #[Test] + public function it_handles_wheres_with_very_low_match_rate() + { + $hydrationCount = 0; + // Only 1% of items match (every 100th item) + $items = collect(range(1, 10000))->map(fn ($i) => [ + 'reference' => "entry::item-$i", + 'rare' => $i % 100 === 0, + ]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData() + ->where('rare', true) + ->limit(5) + ->get(); + + $this->assertCount(5, $results); + // With 1% match rate, need ~500 items to find 5 matches + // Should be much less than 10000 + $this->assertLessThan(1500, $hydrationCount, 'Should batch efficiently even with low match rate'); + } + + #[Test] + public function it_handles_no_matching_items() + { + $hydrationCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => [ + 'reference' => "entry::item-$i", + 'status' => 'draft', + ]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData() + ->where('status', 'published') + ->limit(10) + ->get(); + + $this->assertCount(0, $results); + $this->assertEquals(100, $hydrationCount, 'Should scan all items when no matches'); + } + + #[Test] + public function it_respects_search_score_ordering_after_optimization() + { + $hydrationCount = 0; + // Items with search scores in descending order + $items = collect(range(1, 100))->map(fn ($i) => [ + 'reference' => "entry::item-$i", + 'search_score' => 100 - $i + 1, // 100, 99, 98, ... + ]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData()->limit(10)->get(); + + $this->assertCount(10, $results); + // Results should be in original order (by search_score) + $this->assertEquals( + [100, 99, 98, 97, 96, 95, 94, 93, 92, 91], + $results->pluck('search_score')->all() + ); + } + + #[Test] + public function it_loads_all_when_randomized() + { + $hydrationCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => ['reference' => "entry::item-$i"]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData()->inRandomOrder()->limit(10)->get(); + + $this->assertCount(10, $results); + $this->assertEquals(100, $hydrationCount, 'Must hydrate all items to randomize'); + } + + #[Test] + public function it_loads_all_when_no_limit() + { + $hydrationCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => ['reference' => "entry::item-$i"]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData()->get(); + + $this->assertCount(100, $results); + $this->assertEquals(100, $hydrationCount, 'Should hydrate all items when no limit'); + } + + #[Test] + public function it_handles_limit_greater_than_total() + { + $hydrationCount = 0; + $items = collect(range(1, 50))->map(fn ($i) => ['reference' => "entry::item-$i"]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData()->limit(100)->get(); + + $this->assertCount(50, $results); + $this->assertEquals(50, $hydrationCount, 'Should hydrate all available items'); + } + + #[Test] + public function it_handles_multiple_wheres() + { + $hydrationCount = 0; + $items = collect(range(1, 10000))->map(fn ($i) => [ + 'reference' => "entry::item-$i", + 'status' => $i % 3 === 0 ? 'published' : 'draft', + 'featured' => $i % 5 === 0, + ]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + // Items matching: divisible by 3 AND divisible by 5 = divisible by 15 (~6.67%) + $results = $builder->withoutData() + ->where('status', 'published') + ->where('featured', true) + ->limit(10) + ->get(); + + $this->assertCount(10, $results); + // Should batch efficiently, not load all 10000 + $this->assertLessThan(1000, $hydrationCount, 'Should batch efficiently with multiple wheres'); + } + + #[Test] + public function it_handles_whereIn() + { + $hydrationCount = 0; + $items = collect(range(1, 10000))->map(fn ($i) => [ + 'reference' => "entry::item-$i", + 'category' => 'cat-'.($i % 100), + ]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + // 3% match rate (3 categories out of 100) + $results = $builder->withoutData() + ->whereIn('category', ['cat-1', 'cat-2', 'cat-3']) + ->limit(10) + ->get(); + + $this->assertCount(10, $results); + $this->assertLessThan(1000, $hydrationCount, 'Should batch efficiently with whereIn'); + } + + #[Test] + public function it_handles_offset_near_end() + { + $hydrationCount = 0; + $items = collect(range(1, 100))->map(fn ($i) => ['reference' => "entry::item-$i"]); + + $builder = new HydrationTrackingQueryBuilder($items, $hydrationCount); + $results = $builder->withoutData()->offset(95)->limit(10)->get(); + + $this->assertCount(5, $results); + $this->assertEquals(100, $hydrationCount, 'Should hydrate up to available items'); + } +} diff --git a/tests/Search/QueryBuilderTest.php b/tests/Search/QueryBuilderTest.php index 0578cedb7b1..316a3aa02cf 100644 --- a/tests/Search/QueryBuilderTest.php +++ b/tests/Search/QueryBuilderTest.php @@ -659,6 +659,74 @@ public function values_can_be_plucked() 'Smeagol\'s Precious', ], $query->where('type', 'b')->pluck('title')->all()); } + + #[Test] + public function results_are_limited() + { + $items = collect([ + ['reference' => 'a'], + ['reference' => 'b'], + ['reference' => 'c'], + ['reference' => 'd'], + ['reference' => 'e'], + ]); + + $results = (new FakeQueryBuilder($items))->withoutData()->limit(3)->get(); + + $this->assertCount(3, $results); + $this->assertEquals(['a', 'b', 'c'], $results->map->reference->all()); + } + + #[Test] + public function results_are_limited_with_offset() + { + $items = collect([ + ['reference' => 'a'], + ['reference' => 'b'], + ['reference' => 'c'], + ['reference' => 'd'], + ['reference' => 'e'], + ]); + + $results = (new FakeQueryBuilder($items))->withoutData()->offset(2)->limit(2)->get(); + + $this->assertCount(2, $results); + $this->assertEquals(['c', 'd'], $results->map->reference->all()); + } + + #[Test] + public function results_are_limited_with_wheres() + { + $items = collect([ + ['reference' => 'a', 'status' => 'published'], + ['reference' => 'b', 'status' => 'draft'], + ['reference' => 'c', 'status' => 'published'], + ['reference' => 'd', 'status' => 'draft'], + ['reference' => 'e', 'status' => 'published'], + ['reference' => 'f', 'status' => 'published'], + ]); + + $results = (new FakeQueryBuilder($items))->withoutData() + ->where('status', 'published') + ->limit(2) + ->get(); + + $this->assertCount(2, $results); + $this->assertEquals(['a', 'c'], $results->map->reference->all()); + } + + #[Test] + public function limit_handles_fewer_results_than_requested() + { + $items = collect([ + ['reference' => 'a'], + ['reference' => 'b'], + ]); + + $results = (new FakeQueryBuilder($items))->withoutData()->limit(10)->get(); + + $this->assertCount(2, $results); + } } class FakeQueryBuilder extends QueryBuilder @@ -675,4 +743,13 @@ public function getSearchResults($query) { return $this->results; } + + protected function getBaseItemsLazy(): \Generator + { + foreach ($this->results as $i => $result) { + $plainResult = new \Statamic\Search\PlainResult($result); + $plainResult->setScore($result['search_score'] ?? null); + yield $plainResult; + } + } }