|
11 | 11 |
|
12 | 12 | use Symfony\AI\Platform\Bridge\OpenAi\PlatformFactory; |
13 | 13 | use Symfony\AI\Store\Document\Filter\TextContainsFilter; |
14 | | -use Symfony\AI\Store\Document\Loader\InMemoryLoader; |
| 14 | +use Symfony\AI\Store\Document\Loader; |
| 15 | +use Symfony\AI\Store\Document\Loader\DocumentCollectionLoader; |
15 | 16 | use Symfony\AI\Store\Document\Metadata; |
| 17 | +use Symfony\AI\Store\Document\Source\DocumentCollection; |
16 | 18 | use Symfony\AI\Store\Document\TextDocument; |
17 | 19 | use Symfony\AI\Store\Document\Transformer\TextTrimTransformer; |
18 | 20 | use Symfony\AI\Store\Document\Vectorizer; |
|
27 | 29 | $vectorizer = new Vectorizer($platform, 'text-embedding-3-small'); |
28 | 30 |
|
29 | 31 | // Sample documents with some unwanted content |
30 | | -$documents = [ |
| 32 | +$documents = new DocumentCollection([ |
31 | 33 | new TextDocument( |
32 | 34 | Uuid::v4(), |
33 | 35 | 'Artificial Intelligence is transforming the way we work and live. Machine learning algorithms can now process vast amounts of data and make predictions with remarkable accuracy.', |
|
48 | 50 | 'Climate change is one of the most pressing challenges of our time. Renewable energy sources like solar and wind power are becoming increasingly important for a sustainable future.', |
49 | 51 | new Metadata(['title' => 'Climate Action', 'category' => 'environment']) |
50 | 52 | ), |
51 | | -]; |
| 53 | +]); |
52 | 54 |
|
53 | 55 | // Create filters to remove unwanted content |
54 | 56 | $filters = [ |
|
57 | 59 | ]; |
58 | 60 |
|
59 | 61 | $indexer = new Indexer( |
60 | | - loader: new InMemoryLoader($documents), |
| 62 | + loader: new Loader([new DocumentCollectionLoader()]), |
61 | 63 | vectorizer: $vectorizer, |
62 | 64 | store: $store, |
63 | | - source: null, |
64 | 65 | filters: $filters, |
65 | 66 | transformers: [ |
66 | 67 | new TextTrimTransformer(), |
67 | 68 | ], |
68 | 69 | ); |
69 | 70 |
|
70 | | -$indexer->index(); |
| 71 | +$indexer->index($documents); |
71 | 72 |
|
72 | 73 | $vector = $vectorizer->vectorize('technology artificial intelligence'); |
73 | 74 | $results = $store->query($vector); |
|
81 | 82 | } |
82 | 83 |
|
83 | 84 | echo "=== Results Summary ===\n"; |
84 | | -echo sprintf("Original documents: %d\n", count($documents)); |
85 | | -echo sprintf("Documents after filtering: %d\n", count($results)); |
86 | | -echo sprintf("Filtered out: %d documents\n", count($documents) - count($results)); |
| 85 | +echo sprintf("Original documents: %d\n", count($documents->getDocuments())); |
| 86 | +echo sprintf("Documents after filtering: %d\n", $i + 1); |
| 87 | +echo sprintf("Filtered out: %d documents\n", count($documents->getDocuments()) - ($i + 1)); |
87 | 88 | echo "\nThe 'Week of Symfony' newsletter and SPAM advertisement were successfully filtered out!\n"; |
0 commit comments