@@ -108,141 +108,84 @@ def _generate_feature_vector(self, recipe: Dict) -> Optional[np.ndarray]:
108108 logger .error (f"Error generating feature vector: { e } " )
109109 return None
110110
111- def index_recipe (
112- self , recipe_id : str , title : str , recipe_data : Dict = None
113- ) -> bool :
111+ def bulk_index_recipes (self , recipes_data : List [Dict ]) -> List [str ]:
114112 """
115- Index a recipe in Elasticsearch for search functionality with feature vector
113+ Bulk index multiple recipes in Elasticsearch with feature vectors
116114
117115 Args:
118- recipe_id: Unique identifier for the recipe
119- title: Recipe title for search indexing
120- recipe_data: Full recipe data for feature vector generation
116+ recipes_data: List of recipe dictionaries to index
121117
122118 Returns:
123- bool: True if indexing was successful, False otherwise
119+ List[str]: List of successfully indexed recipe IDs
124120 """
125- try :
126- # Create document for Elasticsearch
127- doc = {
128- "id" : recipe_id ,
129- "title" : title ,
130- }
131-
132- # Add feature vector if recipe data is provided and models are loaded
133- if recipe_data and self .tfidf_vectorizer and self .pca :
134- feature_vector = self ._generate_feature_vector (recipe_data )
135- if feature_vector is not None :
136- doc ["feature_vector" ] = feature_vector .tolist ()
137- logger .info (f"Added feature vector to recipe { recipe_id } " )
121+ if not recipes_data :
122+ return []
138123
139- # Index the document
140- self .es .index (index = self .INDEX_NAME , body = doc )
124+ try :
125+ # Prepare bulk operations
126+ bulk_operations = []
127+ indexed_recipe_ids = []
141128
142- # Refresh the index to make document searchable immediately
143- self .es .indices .refresh (index = self .INDEX_NAME )
129+ for recipe_data in recipes_data :
130+ recipe_id = recipe_data .get ("id" )
131+ title = recipe_data .get ("title" , "" )
144132
145- logger .info (
146- f"Successfully indexed recipe in Elasticsearch with ID: { recipe_id } "
147- )
148- return True
133+ if not recipe_id or not title :
134+ logger .warning (
135+ f"Skipping recipe with missing ID or title: { recipe_id } "
136+ )
137+ continue
149138
150- except Exception as e :
151- logger .error (f"Failed to index recipe in Elasticsearch: { e } " )
152- return False
139+ # Create document for Elasticsearch
140+ doc = {
141+ "id" : recipe_id ,
142+ "title" : title ,
143+ }
153144
154- def create_index_if_not_exists (self ) -> bool :
155- """
156- Create the recipes index if it doesn't exist with feature vector support
145+ # Add feature vector if models are loaded
146+ if self .tfidf_vectorizer and self .pca :
147+ feature_vector = self ._generate_feature_vector (recipe_data )
148+ if feature_vector is not None :
149+ doc ["feature_vector" ] = feature_vector .tolist ()
150+ logger .debug (f"Generated feature vector for recipe { recipe_id } " )
157151
158- Returns:
159- bool: True if index exists or was created successfully, False otherwise
160- """
161- try :
162- # Check if index exists
163- if not self .es .indices .exists (index = self .INDEX_NAME ):
164- # Define the mapping for recipe search with feature vectors
165- mapping = {
166- "mappings" : {
167- "properties" : {
168- "id" : {"type" : "keyword" },
169- "title" : {
170- "type" : "text" ,
171- "analyzer" : "standard" ,
172- "fields" : {
173- "keyword" : {"type" : "keyword" },
174- },
175- },
176- "description" : {
177- "type" : "text" ,
178- "analyzer" : "standard" ,
179- },
180- "recipe_url" : {"type" : "keyword" },
181- "image_url" : {"type" : "keyword" },
182- "ingredients" : {
183- "type" : "text" ,
184- "analyzer" : "standard" ,
185- },
186- "instructions" : {
187- "type" : "text" ,
188- "analyzer" : "standard" ,
189- },
190- "category" : {
191- "type" : "text" ,
192- "analyzer" : "standard" ,
193- "fields" : {"keyword" : {"type" : "keyword" }},
194- },
195- "cuisine" : {
196- "type" : "text" ,
197- "analyzer" : "standard" ,
198- "fields" : {"keyword" : {"type" : "keyword" }},
199- },
200- "site_name" : {
201- "type" : "text" ,
202- "analyzer" : "standard" ,
203- "fields" : {"keyword" : {"type" : "keyword" }},
204- },
205- "keywords" : {
206- "type" : "text" ,
207- "analyzer" : "standard" ,
208- },
209- "dietary_restrictions" : {
210- "type" : "text" ,
211- "analyzer" : "standard" ,
212- "fields" : {"keyword" : {"type" : "keyword" }},
213- },
214- "total_time" : {"type" : "integer" },
215- "overall_rating" : {"type" : "float" },
216- "feature_vector" : {
217- "type" : "dense_vector" ,
218- "dims" : 4000 , # Default dimension, will be updated if models are loaded
219- "index" : True ,
220- "similarity" : "cosine" ,
221- },
222- }
223- },
224- "settings" : {
225- "number_of_shards" : 1 ,
226- "number_of_replicas" : 0 ,
227- },
228- }
152+ # Add bulk operation
153+ bulk_operations .extend (
154+ [{"index" : {"_index" : self .INDEX_NAME , "_id" : recipe_id }}, doc ]
155+ )
229156
230- # Update feature vector dimensions if models are loaded
231- if self .pca :
232- mapping ["mappings" ]["properties" ]["feature_vector" ]["dims" ] = (
233- self .pca .n_components_
234- )
157+ if not bulk_operations :
158+ logger .warning ("No valid recipes to index" )
159+ return []
235160
236- self .es .indices .create (index = self .INDEX_NAME , body = mapping )
237- logger .info (f"Created Elasticsearch index: { self .INDEX_NAME } " )
238- else :
239- logger .info (f"Elasticsearch index { self .INDEX_NAME } already exists" )
161+ # Execute bulk indexing
162+ if bulk_operations :
163+ response = self .es .bulk (body = bulk_operations , refresh = True )
164+
165+ # Check for errors in bulk response
166+ if response .get ("errors" , False ):
167+ logger .error ("Some errors occurred during bulk indexing:" )
168+ for item in response .get ("items" , []):
169+ if "index" in item and item ["index" ].get ("error" ):
170+ error_recipe_id = item ["index" ]["_id" ]
171+ error_msg = item ["index" ]["error" ]["reason" ]
172+ logger .error (
173+ f"Failed to index recipe { error_recipe_id } : { error_msg } "
174+ )
175+ else :
176+ # Extract successfully indexed recipe IDs
177+ for item in response .get ("items" , []):
178+ if "index" in item and item ["index" ].get ("result" ) == "created" :
179+ indexed_recipe_ids .append (item ["index" ]["_id" ])
240180
241- return True
181+ logger .info (
182+ f"Successfully bulk indexed { len (indexed_recipe_ids )} recipes in Elasticsearch"
183+ )
184+ return indexed_recipe_ids
242185
243186 except Exception as e :
244- logger .error (f"Error creating Elasticsearch index: { e } " )
245- return False
187+ logger .error (f"Failed to bulk index recipes in Elasticsearch : { e } " )
188+ return []
246189
247190 def _get_recipe_feature_vectors (
248191 self , recipe_ids : List [str ]
0 commit comments