Skip to content

Commit 41d5108

Browse files
committed
refactor: Update recipe service to support batch indexing for new recipes
1 parent ab55afa commit 41d5108

File tree

4 files changed

+72
-170
lines changed

4 files changed

+72
-170
lines changed

api/es_service.py

Lines changed: 61 additions & 118 deletions
Original file line numberDiff line numberDiff line change
@@ -108,141 +108,84 @@ def _generate_feature_vector(self, recipe: Dict) -> Optional[np.ndarray]:
108108
logger.error(f"Error generating feature vector: {e}")
109109
return None
110110

111-
def index_recipe(
112-
self, recipe_id: str, title: str, recipe_data: Dict = None
113-
) -> bool:
111+
def bulk_index_recipes(self, recipes_data: List[Dict]) -> List[str]:
114112
"""
115-
Index a recipe in Elasticsearch for search functionality with feature vector
113+
Bulk index multiple recipes in Elasticsearch with feature vectors
116114
117115
Args:
118-
recipe_id: Unique identifier for the recipe
119-
title: Recipe title for search indexing
120-
recipe_data: Full recipe data for feature vector generation
116+
recipes_data: List of recipe dictionaries to index
121117
122118
Returns:
123-
bool: True if indexing was successful, False otherwise
119+
List[str]: List of successfully indexed recipe IDs
124120
"""
125-
try:
126-
# Create document for Elasticsearch
127-
doc = {
128-
"id": recipe_id,
129-
"title": title,
130-
}
131-
132-
# Add feature vector if recipe data is provided and models are loaded
133-
if recipe_data and self.tfidf_vectorizer and self.pca:
134-
feature_vector = self._generate_feature_vector(recipe_data)
135-
if feature_vector is not None:
136-
doc["feature_vector"] = feature_vector.tolist()
137-
logger.info(f"Added feature vector to recipe {recipe_id}")
121+
if not recipes_data:
122+
return []
138123

139-
# Index the document
140-
self.es.index(index=self.INDEX_NAME, body=doc)
124+
try:
125+
# Prepare bulk operations
126+
bulk_operations = []
127+
indexed_recipe_ids = []
141128

142-
# Refresh the index to make document searchable immediately
143-
self.es.indices.refresh(index=self.INDEX_NAME)
129+
for recipe_data in recipes_data:
130+
recipe_id = recipe_data.get("id")
131+
title = recipe_data.get("title", "")
144132

145-
logger.info(
146-
f"Successfully indexed recipe in Elasticsearch with ID: {recipe_id}"
147-
)
148-
return True
133+
if not recipe_id or not title:
134+
logger.warning(
135+
f"Skipping recipe with missing ID or title: {recipe_id}"
136+
)
137+
continue
149138

150-
except Exception as e:
151-
logger.error(f"Failed to index recipe in Elasticsearch: {e}")
152-
return False
139+
# Create document for Elasticsearch
140+
doc = {
141+
"id": recipe_id,
142+
"title": title,
143+
}
153144

154-
def create_index_if_not_exists(self) -> bool:
155-
"""
156-
Create the recipes index if it doesn't exist with feature vector support
145+
# Add feature vector if models are loaded
146+
if self.tfidf_vectorizer and self.pca:
147+
feature_vector = self._generate_feature_vector(recipe_data)
148+
if feature_vector is not None:
149+
doc["feature_vector"] = feature_vector.tolist()
150+
logger.debug(f"Generated feature vector for recipe {recipe_id}")
157151

158-
Returns:
159-
bool: True if index exists or was created successfully, False otherwise
160-
"""
161-
try:
162-
# Check if index exists
163-
if not self.es.indices.exists(index=self.INDEX_NAME):
164-
# Define the mapping for recipe search with feature vectors
165-
mapping = {
166-
"mappings": {
167-
"properties": {
168-
"id": {"type": "keyword"},
169-
"title": {
170-
"type": "text",
171-
"analyzer": "standard",
172-
"fields": {
173-
"keyword": {"type": "keyword"},
174-
},
175-
},
176-
"description": {
177-
"type": "text",
178-
"analyzer": "standard",
179-
},
180-
"recipe_url": {"type": "keyword"},
181-
"image_url": {"type": "keyword"},
182-
"ingredients": {
183-
"type": "text",
184-
"analyzer": "standard",
185-
},
186-
"instructions": {
187-
"type": "text",
188-
"analyzer": "standard",
189-
},
190-
"category": {
191-
"type": "text",
192-
"analyzer": "standard",
193-
"fields": {"keyword": {"type": "keyword"}},
194-
},
195-
"cuisine": {
196-
"type": "text",
197-
"analyzer": "standard",
198-
"fields": {"keyword": {"type": "keyword"}},
199-
},
200-
"site_name": {
201-
"type": "text",
202-
"analyzer": "standard",
203-
"fields": {"keyword": {"type": "keyword"}},
204-
},
205-
"keywords": {
206-
"type": "text",
207-
"analyzer": "standard",
208-
},
209-
"dietary_restrictions": {
210-
"type": "text",
211-
"analyzer": "standard",
212-
"fields": {"keyword": {"type": "keyword"}},
213-
},
214-
"total_time": {"type": "integer"},
215-
"overall_rating": {"type": "float"},
216-
"feature_vector": {
217-
"type": "dense_vector",
218-
"dims": 4000, # Default dimension, will be updated if models are loaded
219-
"index": True,
220-
"similarity": "cosine",
221-
},
222-
}
223-
},
224-
"settings": {
225-
"number_of_shards": 1,
226-
"number_of_replicas": 0,
227-
},
228-
}
152+
# Add bulk operation
153+
bulk_operations.extend(
154+
[{"index": {"_index": self.INDEX_NAME, "_id": recipe_id}}, doc]
155+
)
229156

230-
# Update feature vector dimensions if models are loaded
231-
if self.pca:
232-
mapping["mappings"]["properties"]["feature_vector"]["dims"] = (
233-
self.pca.n_components_
234-
)
157+
if not bulk_operations:
158+
logger.warning("No valid recipes to index")
159+
return []
235160

236-
self.es.indices.create(index=self.INDEX_NAME, body=mapping)
237-
logger.info(f"Created Elasticsearch index: {self.INDEX_NAME}")
238-
else:
239-
logger.info(f"Elasticsearch index {self.INDEX_NAME} already exists")
161+
# Execute bulk indexing
162+
if bulk_operations:
163+
response = self.es.bulk(body=bulk_operations, refresh=True)
164+
165+
# Check for errors in bulk response
166+
if response.get("errors", False):
167+
logger.error("Some errors occurred during bulk indexing:")
168+
for item in response.get("items", []):
169+
if "index" in item and item["index"].get("error"):
170+
error_recipe_id = item["index"]["_id"]
171+
error_msg = item["index"]["error"]["reason"]
172+
logger.error(
173+
f"Failed to index recipe {error_recipe_id}: {error_msg}"
174+
)
175+
else:
176+
# Extract successfully indexed recipe IDs
177+
for item in response.get("items", []):
178+
if "index" in item and item["index"].get("result") == "created":
179+
indexed_recipe_ids.append(item["index"]["_id"])
240180

241-
return True
181+
logger.info(
182+
f"Successfully bulk indexed {len(indexed_recipe_ids)} recipes in Elasticsearch"
183+
)
184+
return indexed_recipe_ids
242185

243186
except Exception as e:
244-
logger.error(f"Error creating Elasticsearch index: {e}")
245-
return False
187+
logger.error(f"Failed to bulk index recipes in Elasticsearch: {e}")
188+
return []
246189

247190
def _get_recipe_feature_vectors(
248191
self, recipe_ids: List[str]

api/main.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -439,6 +439,7 @@ async def delete_user(user_id: str, db: DatabaseManager = Depends(get_db)):
439439
async def create_recipes(
440440
recipes_data: List[RecipeCreateRequest],
441441
recipe_svc: RecipeService = Depends(get_recipe_service),
442+
db: DatabaseManager = Depends(get_db),
442443
):
443444
"""
444445
Create multiple recipes with automatic feature vector calculation and Elasticsearch indexing.
@@ -469,7 +470,7 @@ async def create_recipes(
469470
sample_recipe_id = random.choice(recipe_ids)
470471

471472
# Get the recipe details
472-
sample_recipe = recipe_svc.get_recipe(sample_recipe_id)
473+
sample_recipe = db.get_recipe(sample_recipe_id)
473474
if sample_recipe:
474475
sample_recipe_title = sample_recipe.title
475476

api/recipe_service.py

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -108,7 +108,7 @@ def add_recipe(self, recipes_data: List[Dict]) -> List[str]:
108108
logger.error(f"Database error while adding recipes: {e}")
109109
raise RuntimeError(f"Failed to add recipes to database: {e}")
110110

111-
# Index in Elasticsearch with feature vectors
111+
# Bulk index in Elasticsearch with feature vectors
112112
if self.es_service:
113113
# Check if feature vector models are available
114114
if not self.es_service.tfidf_vectorizer or not self.es_service.pca:
@@ -120,9 +120,9 @@ def add_recipe(self, recipes_data: List[Dict]) -> List[str]:
120120
)
121121

122122
try:
123-
indexed_count = 0
123+
# Convert database format back to dict format for feature vector generation
124+
es_recipes_data = []
124125
for recipe_data in recipes_to_add:
125-
# Convert database format back to dict format for feature vector generation
126126
es_recipe_data = {
127127
"id": recipe_data["id"],
128128
"title": recipe_data["title"],
@@ -141,49 +141,20 @@ def add_recipe(self, recipes_data: List[Dict]) -> List[str]:
141141
"total_time": recipe_data["total_time"],
142142
"overall_rating": recipe_data["overall_rating"],
143143
}
144+
es_recipes_data.append(es_recipe_data)
144145

145-
success = self.es_service.index_recipe(
146-
recipe_data["id"], recipe_data["title"], es_recipe_data
147-
)
148-
if success:
149-
indexed_count += 1
150-
logger.debug(
151-
f"Successfully indexed recipe {recipe_data['id']} with feature vector"
152-
)
153-
else:
154-
logger.warning(
155-
f"Failed to index recipe in Elasticsearch: {recipe_data['id']}"
156-
)
146+
# Bulk index all recipes
147+
indexed_recipe_ids = self.es_service.bulk_index_recipes(es_recipes_data)
157148

158149
logger.info(
159-
f"Successfully indexed {indexed_count} recipes in Elasticsearch with feature vectors"
150+
f"Successfully bulk indexed {len(indexed_recipe_ids)} recipes in Elasticsearch with feature vectors"
160151
)
161152

162-
# Refresh the index to make new documents searchable immediately
163-
try:
164-
self.es_service.es.indices.refresh(index=self.es_service.INDEX_NAME)
165-
logger.info("Elasticsearch index refreshed")
166-
except Exception as e:
167-
logger.warning(f"Failed to refresh Elasticsearch index: {e}")
168-
169153
except Exception as e:
170-
logger.error(f"Failed to index recipes in Elasticsearch: {e}")
171-
# Don't raise here - recipes are already in database, just log the error
154+
logger.error(f"Failed to bulk index recipes in Elasticsearch: {e}")
172155

173156
return recipe_ids
174157

175-
def get_recipe(self, recipe_id: str):
176-
"""
177-
Get a recipe by ID from the database
178-
179-
Args:
180-
recipe_id: ID of the recipe to retrieve
181-
182-
Returns:
183-
Recipe object or None if not found
184-
"""
185-
return self.db_manager.get_recipe(recipe_id)
186-
187158
def get_most_similar_recipe(self, recipe_id: str) -> Optional[Dict]:
188159
"""
189160
Get the most similar recipe to the given recipe for validation purposes using Elasticsearch.

frontend/next.config.ts

Lines changed: 1 addition & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -2,20 +2,7 @@ import type { NextConfig } from "next";
22

33
const nextConfig: NextConfig = {
44
images: {
5-
remotePatterns: [
6-
{
7-
protocol: "https",
8-
hostname: "**",
9-
port: "",
10-
pathname: "/**",
11-
},
12-
{
13-
protocol: "http",
14-
hostname: "**",
15-
port: "",
16-
pathname: "/**",
17-
},
18-
],
5+
unoptimized: true, // Allow images to be served from any external domain
196
},
207
async redirects() {
218
return [

0 commit comments

Comments
 (0)