refactor: Update recipe service to support batch indexing for new recipes

aLEGEND21 · aLEGEND21 · commit 41d5108691df · 2025-08-02T19:36:10.000-04:00
diff --git a/api/es_service.py b/api/es_service.py
@@ -108,141 +108,84 @@ def _generate_feature_vector(self, recipe: Dict) -> Optional[np.ndarray]:
             logger.error(f"Error generating feature vector: {e}")
             return None
 
-    def index_recipe(
-        self, recipe_id: str, title: str, recipe_data: Dict = None
-    ) -> bool:
+    def bulk_index_recipes(self, recipes_data: List[Dict]) -> List[str]:
         """
-        Index a recipe in Elasticsearch for search functionality with feature vector
+        Bulk index multiple recipes in Elasticsearch with feature vectors
 
         Args:
-            recipe_id: Unique identifier for the recipe
-            title: Recipe title for search indexing
-            recipe_data: Full recipe data for feature vector generation
+            recipes_data: List of recipe dictionaries to index
 
         Returns:
-            bool: True if indexing was successful, False otherwise
+            List[str]: List of successfully indexed recipe IDs
         """
-        try:
-            # Create document for Elasticsearch
-            doc = {
-                "id": recipe_id,
-                "title": title,
-            }
-
-            # Add feature vector if recipe data is provided and models are loaded
-            if recipe_data and self.tfidf_vectorizer and self.pca:
-                feature_vector = self._generate_feature_vector(recipe_data)
-                if feature_vector is not None:
-                    doc["feature_vector"] = feature_vector.tolist()
-                    logger.info(f"Added feature vector to recipe {recipe_id}")
+        if not recipes_data:
+            return []
 
-            # Index the document
-            self.es.index(index=self.INDEX_NAME, body=doc)
+        try:
+            # Prepare bulk operations
+            bulk_operations = []
+            indexed_recipe_ids = []
 
-            # Refresh the index to make document searchable immediately
-            self.es.indices.refresh(index=self.INDEX_NAME)
+            for recipe_data in recipes_data:
+                recipe_id = recipe_data.get("id")
+                title = recipe_data.get("title", "")
 
-            logger.info(
-                f"Successfully indexed recipe in Elasticsearch with ID: {recipe_id}"
-            )
-            return True
+                if not recipe_id or not title:
+                    logger.warning(
+                        f"Skipping recipe with missing ID or title: {recipe_id}"
+                    )
+                    continue
 
-        except Exception as e:
-            logger.error(f"Failed to index recipe in Elasticsearch: {e}")
-            return False
+                # Create document for Elasticsearch
+                doc = {
+                    "id": recipe_id,
+                    "title": title,
+                }
 
-    def create_index_if_not_exists(self) -> bool:
-        """
-        Create the recipes index if it doesn't exist with feature vector support
+                # Add feature vector if models are loaded
+                if self.tfidf_vectorizer and self.pca:
+                    feature_vector = self._generate_feature_vector(recipe_data)
+                    if feature_vector is not None:
+                        doc["feature_vector"] = feature_vector.tolist()
+                        logger.debug(f"Generated feature vector for recipe {recipe_id}")
 
-        Returns:
-            bool: True if index exists or was created successfully, False otherwise
-        """
-        try:
-            # Check if index exists
-            if not self.es.indices.exists(index=self.INDEX_NAME):
-                # Define the mapping for recipe search with feature vectors
-                mapping = {
-                    "mappings": {
-                        "properties": {
-                            "id": {"type": "keyword"},
-                            "title": {
-                                "type": "text",
-                                "analyzer": "standard",
-                                "fields": {
-                                    "keyword": {"type": "keyword"},
-                                },
-                            },
-                            "description": {
-                                "type": "text",
-                                "analyzer": "standard",
-                            },
-                            "recipe_url": {"type": "keyword"},
-                            "image_url": {"type": "keyword"},
-                            "ingredients": {
-                                "type": "text",
-                                "analyzer": "standard",
-                            },
-                            "instructions": {
-                                "type": "text",
-                                "analyzer": "standard",
-                            },
-                            "category": {
-                                "type": "text",
-                                "analyzer": "standard",
-                                "fields": {"keyword": {"type": "keyword"}},
-                            },
-                            "cuisine": {
-                                "type": "text",
-                                "analyzer": "standard",
-                                "fields": {"keyword": {"type": "keyword"}},
-                            },
-                            "site_name": {
-                                "type": "text",
-                                "analyzer": "standard",
-                                "fields": {"keyword": {"type": "keyword"}},
-                            },
-                            "keywords": {
-                                "type": "text",
-                                "analyzer": "standard",
-                            },
-                            "dietary_restrictions": {
-                                "type": "text",
-                                "analyzer": "standard",
-                                "fields": {"keyword": {"type": "keyword"}},
-                            },
-                            "total_time": {"type": "integer"},
-                            "overall_rating": {"type": "float"},
-                            "feature_vector": {
-                                "type": "dense_vector",
-                                "dims": 4000,  # Default dimension, will be updated if models are loaded
-                                "index": True,
-                                "similarity": "cosine",
-                            },
-                        }
-                    },
-                    "settings": {
-                        "number_of_shards": 1,
-                        "number_of_replicas": 0,
-                    },
-                }
+                # Add bulk operation
+                bulk_operations.extend(
+                    [{"index": {"_index": self.INDEX_NAME, "_id": recipe_id}}, doc]
+                )
 
-                # Update feature vector dimensions if models are loaded
-                if self.pca:
-                    mapping["mappings"]["properties"]["feature_vector"]["dims"] = (
-                        self.pca.n_components_
-                    )
+            if not bulk_operations:
+                logger.warning("No valid recipes to index")
+                return []
 
-                self.es.indices.create(index=self.INDEX_NAME, body=mapping)
-                logger.info(f"Created Elasticsearch index: {self.INDEX_NAME}")
-            else:
-                logger.info(f"Elasticsearch index {self.INDEX_NAME} already exists")
+            # Execute bulk indexing
+            if bulk_operations:
+                response = self.es.bulk(body=bulk_operations, refresh=True)
+
+                # Check for errors in bulk response
+                if response.get("errors", False):
+                    logger.error("Some errors occurred during bulk indexing:")
+                    for item in response.get("items", []):
+                        if "index" in item and item["index"].get("error"):
+                            error_recipe_id = item["index"]["_id"]
+                            error_msg = item["index"]["error"]["reason"]
+                            logger.error(
+                                f"Failed to index recipe {error_recipe_id}: {error_msg}"
+                            )
+                else:
+                    # Extract successfully indexed recipe IDs
+                    for item in response.get("items", []):
+                        if "index" in item and item["index"].get("result") == "created":
+                            indexed_recipe_ids.append(item["index"]["_id"])
 
-            return True
+                logger.info(
+                    f"Successfully bulk indexed {len(indexed_recipe_ids)} recipes in Elasticsearch"
+                )
+                return indexed_recipe_ids
 
         except Exception as e:
-            logger.error(f"Error creating Elasticsearch index: {e}")
-            return False
+            logger.error(f"Failed to bulk index recipes in Elasticsearch: {e}")
+            return []
 
     def _get_recipe_feature_vectors(
         self, recipe_ids: List[str]
diff --git a/api/main.py b/api/main.py
@@ -439,6 +439,7 @@ async def delete_user(user_id: str, db: DatabaseManager = Depends(get_db)):
 async def create_recipes(
     recipes_data: List[RecipeCreateRequest],
     recipe_svc: RecipeService = Depends(get_recipe_service),
+    db: DatabaseManager = Depends(get_db),
 ):
     """
     Create multiple recipes with automatic feature vector calculation and Elasticsearch indexing.
@@ -469,7 +470,7 @@ async def create_recipes(
         sample_recipe_id = random.choice(recipe_ids)
 
         # Get the recipe details
-        sample_recipe = recipe_svc.get_recipe(sample_recipe_id)
+        sample_recipe = db.get_recipe(sample_recipe_id)
         if sample_recipe:
             sample_recipe_title = sample_recipe.title
 
diff --git a/api/recipe_service.py b/api/recipe_service.py
@@ -108,7 +108,7 @@ def add_recipe(self, recipes_data: List[Dict]) -> List[str]:
             logger.error(f"Database error while adding recipes: {e}")
             raise RuntimeError(f"Failed to add recipes to database: {e}")
 
-        # Index in Elasticsearch with feature vectors
+        # Bulk index in Elasticsearch with feature vectors
         if self.es_service:
             # Check if feature vector models are available
             if not self.es_service.tfidf_vectorizer or not self.es_service.pca:
@@ -120,9 +120,9 @@ def add_recipe(self, recipes_data: List[Dict]) -> List[str]:
                 )
 
             try:
-                indexed_count = 0
+                # Convert database format back to dict format for feature vector generation
+                es_recipes_data = []
                 for recipe_data in recipes_to_add:
-                    # Convert database format back to dict format for feature vector generation
                     es_recipe_data = {
                         "id": recipe_data["id"],
                         "title": recipe_data["title"],
@@ -141,49 +141,20 @@ def add_recipe(self, recipes_data: List[Dict]) -> List[str]:
                         "total_time": recipe_data["total_time"],
                         "overall_rating": recipe_data["overall_rating"],
                     }
+                    es_recipes_data.append(es_recipe_data)
 
-                    success = self.es_service.index_recipe(
-                        recipe_data["id"], recipe_data["title"], es_recipe_data
-                    )
-                    if success:
-                        indexed_count += 1
-                        logger.debug(
-                            f"Successfully indexed recipe {recipe_data['id']} with feature vector"
-                        )
-                    else:
-                        logger.warning(
-                            f"Failed to index recipe in Elasticsearch: {recipe_data['id']}"
-                        )
+                # Bulk index all recipes
+                indexed_recipe_ids = self.es_service.bulk_index_recipes(es_recipes_data)
 
                 logger.info(
-                    f"Successfully indexed {indexed_count} recipes in Elasticsearch with feature vectors"
+                    f"Successfully bulk indexed {len(indexed_recipe_ids)} recipes in Elasticsearch with feature vectors"
                 )
 
-                # Refresh the index to make new documents searchable immediately
-                try:
-                    self.es_service.es.indices.refresh(index=self.es_service.INDEX_NAME)
-                    logger.info("Elasticsearch index refreshed")
-                except Exception as e:
-                    logger.warning(f"Failed to refresh Elasticsearch index: {e}")
-
             except Exception as e:
-                logger.error(f"Failed to index recipes in Elasticsearch: {e}")
-                # Don't raise here - recipes are already in database, just log the error
+                logger.error(f"Failed to bulk index recipes in Elasticsearch: {e}")
 
         return recipe_ids
 
-    def get_recipe(self, recipe_id: str):
-        """
-        Get a recipe by ID from the database
-
-        Args:
-            recipe_id: ID of the recipe to retrieve
-
-        Returns:
-            Recipe object or None if not found
-        """
-        return self.db_manager.get_recipe(recipe_id)
-
     def get_most_similar_recipe(self, recipe_id: str) -> Optional[Dict]:
         """
         Get the most similar recipe to the given recipe for validation purposes using Elasticsearch.
diff --git a/frontend/next.config.ts b/frontend/next.config.ts
@@ -2,20 +2,7 @@ import type { NextConfig } from "next";
 
 const nextConfig: NextConfig = {
   images: {
-    remotePatterns: [
-      {
-        protocol: "https",
-        hostname: "**",
-        port: "",
-        pathname: "/**",
-      },
-      {
-        protocol: "http",
-        hostname: "**",
-        port: "",
-        pathname: "/**",
-      },
-    ],
+    unoptimized: true, // Allow images to be served from any external domain
   },
   async redirects() {
     return [