ModelEngine-Group · liutao12138 · Oct 25, 2025 · Sep 17, 2025 · Sep 5, 2025 · Sep 10, 2025
@@ -211,6 +211,8 @@ class AgentInfoRequest(BaseModel):
     constraint_prompt: Optional[str] = None
     few_shots_prompt: Optional[str] = None
     enabled: Optional[bool] = None
+    business_logic_model_name: Optional[str] = None
+    business_logic_model_id: Optional[int] = None
 
 
 class AgentIDRequest(BaseModel):

@@ -201,8 +201,8 @@ def process(
                 f"[{self.request.id}] PROCESS TASK: File size: {file_size_mb:.2f}MB")
 
             # The unified actor call, mapping 'file' source_type to 'local' destination
-            # Submit Ray work and do not block here
-            logger.debug(
+            # Submit Ray work and WAIT for processing to complete
+            logger.info(
                 f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for source='{source}', strategy='{chunking_strategy}', destination='{source_type}'")
             chunks_ref = actor.process_file.remote(
                 source,
@@ -211,10 +211,17 @@ def process(
                 task_id=task_id,
                 **params
             )
-            # Persist chunks into Redis via Ray to decouple Celery
+            # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state)
+            logger.info(
+                f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...")
+            chunks = ray.get(chunks_ref)
+            logger.info(
+                f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks")
+
+            # Persist chunks into Redis via Ray (fire-and-forget, don't block)
             redis_key = f"dp:{task_id}:chunks"
-            actor.store_chunks_in_redis.remote(redis_key, chunks_ref)
-            logger.debug(
+            actor.store_chunks_in_redis.remote(redis_key, chunks)
+            logger.info(
                 f"[{self.request.id}] PROCESS TASK: Scheduled store_chunks_in_redis for key '{redis_key}'")
 
             end_time = time.time()
@@ -229,7 +236,7 @@ def process(
                 f"[{self.request.id}] PROCESS TASK: Processing from URL: {source}")
 
             # For URL source, core.py expects a non-local destination to trigger URL fetching
-            logger.debug(
+            logger.info(
                 f"[{self.request.id}] PROCESS TASK: Submitting Ray processing for URL='{source}', strategy='{chunking_strategy}', destination='{source_type}'")
             chunks_ref = actor.process_file.remote(
                 source,
@@ -238,11 +245,19 @@ def process(
                 task_id=task_id,
                 **params
             )
-            # Persist chunks into Redis via Ray to decouple Celery
+            # Wait for Ray processing to complete (this keeps task in STARTED/"PROCESSING" state)
+            logger.info(
+                f"[{self.request.id}] PROCESS TASK: Waiting for Ray processing to complete...")
+            chunks = ray.get(chunks_ref)
+            logger.info(
+                f"[{self.request.id}] PROCESS TASK: Ray processing completed, got {len(chunks) if chunks else 0} chunks")
+
+            # Persist chunks into Redis via Ray (fire-and-forget, don't block)
             redis_key = f"dp:{task_id}:chunks"
-            actor.store_chunks_in_redis.remote(redis_key, chunks_ref)
-            logger.debug(
+            actor.store_chunks_in_redis.remote(redis_key, chunks)
+            logger.info(
                 f"[{self.request.id}] PROCESS TASK: Scheduled store_chunks_in_redis for key '{redis_key}'")
+
             end_time = time.time()
             elapsed_time = end_time - start_time
             logger.info(
@@ -253,24 +268,25 @@ def process(
             raise NotImplementedError(
                 f"Source type '{source_type}' not yet supported")
 
-        # Update task state to SUCCESS with metadata (without materializing chunks here)
+        # Update task state to SUCCESS after Ray processing completes
+        # This transitions from STARTED (PROCESSING) to SUCCESS (WAIT_FOR_FORWARDING)
         self.update_state(
             state=states.SUCCESS,
             meta={
-                'chunks_count': None,
+                'chunks_count': len(chunks) if chunks else 0,
                 'processing_time': elapsed_time,
                 'source': source,
                 'index_name': index_name,
                 'original_filename': original_filename,
                 'task_name': 'process',
                 'stage': 'text_extracted',
                 'file_size_mb': file_size_mb,
-                'processing_speed_mb_s': file_size_mb / elapsed_time if elapsed_time > 0 else 0
+                'processing_speed_mb_s': file_size_mb / elapsed_time if file_size_mb > 0 and elapsed_time > 0 else 0
             }
         )
 
         logger.info(
-            f"[{self.request.id}] PROCESS TASK: Submitted for Ray processing; result will be fetched by forward")
+            f"[{self.request.id}] PROCESS TASK: Processing complete, waiting for forward task")
 
         # Prepare data for the next task in the chain; pass redis_key
         returned_data = {
@@ -563,6 +579,9 @@ async def index_documents():
                             "source": original_source,
                             "original_filename": original_filename
                         }, ensure_ascii=False))
+
+        logger.info(
+            f"[{self.request.id}] FORWARD TASK: Starting ES indexing for {len(formatted_chunks)} chunks to index '{original_index_name}'...")
         es_result = run_async(index_documents())
         logger.debug(
             f"[{self.request.id}] FORWARD TASK: API response from main_server for source '{original_source}': {es_result}")
@@ -605,6 +624,8 @@ async def index_documents():
                 "original_filename": original_filename
             }, ensure_ascii=False))
         end_time = time.time()
+        logger.info(
+            f"[{self.request.id}] FORWARD TASK: Updating task state to SUCCESS after ES indexing completion")
         self.update_state(
             state=states.SUCCESS,
             meta={
@@ -620,7 +641,7 @@ async def index_documents():
         )
 
         logger.info(
-            f"Stored {len(chunks)} chunks to index {original_index_name} in {end_time - start_time:.2f}s")
+            f"[{self.request.id}] FORWARD TASK: Successfully stored {len(chunks)} chunks to index {original_index_name} in {end_time - start_time:.2f}s")
         return {
             'task_id': task_id,
             'source': original_source,

@@ -206,6 +206,8 @@ class AgentInfo(TableBase):
         Boolean, doc="Whether to provide the running summary to the manager agent")
     business_description = Column(
         Text, doc="Manually entered by the user to describe the entire business process")
+    business_logic_model_name = Column(String(100), doc="Model name used for business logic prompt generation")
+    business_logic_model_id = Column(Integer, doc="Model ID used for business logic prompt generation, foreign key reference to model_record_t.model_id")
 
 
 class ToolInstance(TableBase):

@@ -0,0 +1,24 @@
+system_prompt: |-
+  You are a professional knowledge summarization assistant. Your task is to generate a concise summary of a document cluster based on multiple documents.
+
+  **Summary Requirements:**
+  1. The input contains multiple documents (each document has title and content snippets)
+  2. You need to extract the common themes and key topics from these documents
+  3. Generate a summary that represents the collective content of the cluster
+  4. The summary should be accurate, coherent, and written in natural language
+  5. Keep the summary within the specified word limit
+
+  **Guidelines:**
+  - Focus on identifying shared themes and topics across documents
+  - Highlight key concepts, domains, or subject matter
+  - Use clear and concise language
+  - Avoid listing individual document titles unless necessary
+  - The summary should help users understand what this group of documents covers
+
+user_prompt: |
+  Please generate a concise summary of the following document cluster:
+
+  {{ cluster_content }}
+
+  Summary ({{ max_words }} words):
+
@@ -0,0 +1,31 @@
+system_prompt: |-
+  You are a professional cluster summarization assistant. Your task is to merge multiple document summaries into a cohesive cluster summary.
+
+  **Summary Requirements:**
+  1. The input contains summaries of multiple documents that belong to the same cluster
+  2. These documents share similar themes or topics (grouped by clustering)
+  3. You need to synthesize a unified summary that captures the collective content
+  4. The summary should highlight common themes and key information across documents
+  5. Keep the summary within the specified word limit
+
+  **Guidelines:**
+  - Identify shared themes and topics across documents
+  - Highlight common concepts and subject matter
+  - Use clear and concise language
+  - Avoid listing individual document titles unless necessary
+  - Focus on what this group of documents collectively covers
+  - The summary should be coherent and represent the cluster's unified content
+  - **Important: Do not use any separators (like ---, ***, etc.), generate plain text summary only**
+
+user_prompt: |
+  Please generate a unified summary of the following document cluster based on individual document summaries:
+
+  {{ document_summaries }}
+
+  **Important Reminders:**
+  - Do not use any separators (like ---, ***, ===, etc.)
+  - Do not include document titles or filenames
+  - Generate plain text summary content only
+
+  Cluster Summary ({{ max_words }} words):
+
@@ -0,0 +1,32 @@
+system_prompt: |-
+  你是一个专业的簇总结助手。你的任务是将多个文档总结合并为一个连贯的簇总结。
+
+  **总结要求：**
+  1. 输入包含属于同一簇的多个文档的总结
+  2. 这些文档共享相似的主题或话题（通过聚类分组）
+  3. 你需要综合成一个统一的总结，捕捉集合内容
+  4. 总结应突出文档间的共同主题和关键信息
+  5. 保持在指定的字数限制内
+
+  **指导原则：**
+  - 识别文档间的共同主题和话题
+  - 突出共同概念和主题内容
+  - 使用清晰简洁的语言
+  - 除非必要，避免列出单个文档标题
+  - 专注于这组文档共同涵盖的内容
+  - 总结应连贯且代表簇的统一内容
+  - 确保准确、全面，明确关键实体，不要遗漏重要信息
+  - **重要：不要使用任何分隔符（如---、***等），直接生成纯文本总结**
+
+user_prompt: |
+  请根据以下文档总结生成统一的学生簇总结：
+
+  {{ document_summaries }}
+
+  **重要提醒：**
+  - 不要使用任何分隔符（如---、***、===等）
+  - 不要包含文档标题或文件名
+  - 直接生成纯文本总结内容
+
+  簇总结（{{ max_words }}字）：
+
@@ -0,0 +1,28 @@
+system_prompt: |-
+  You are a professional document summarization assistant. Your task is to generate a concise summary of a document based on its key content snippets.
+
+  **Summary Requirements:**
+  1. The input contains key snippets from a document (typically from beginning, middle, and end sections)
+  2. You need to extract the main themes, topics, and key information
+  3. Generate a summary that represents the document's core content
+  4. The summary should be accurate, coherent, and concise
+  5. Keep the summary within the specified word limit
+
+  **Guidelines:**
+  - Focus on identifying main themes and key topics
+  - Highlight important concepts and information
+  - Use clear and concise language
+  - Avoid redundancy and unnecessary details
+  - The summary should help users understand what the document covers
+  - **Important: Do not use any separators (like ---, ***, etc.), generate plain text summary only**
+
+user_prompt: |
+  Please generate a concise summary of the following document:
+
+  Document name: {{ filename }}
+
+  Content snippets:
+  {{ content }}
+
+  Summary ({{ max_words }} words):
+
@@ -0,0 +1,29 @@
+system_prompt: |-
+  你是一个专业的文档总结助手。你的任务是根据文档的关键内容片段生成简洁的总结。
+
+  **总结要求：**
+  1. 输入包含文档的关键片段（通常来自开头、中间和结尾部分）
+  2. 你需要提取主要主题、话题和关键信息
+  3. 生成能代表文档核心内容的总结
+  4. 总结应准确、连贯且简洁
+  5. 保持在指定的字数限制内
+
+  **指导原则：**
+  - 专注于识别主要主题和关键话题
+  - 突出重要概念和信息
+  - 使用清晰简洁的语言
+  - 避免冗余和不必要的细节
+  - 总结应帮助用户理解文档涵盖的内容
+  - 确保总结准确、全面，不要遗漏关键实体和信息
+  - **重要：不要使用任何分隔符（如---、***等），直接生成纯文本总结**
+
+user_prompt: |
+  请为以下文档生成简洁的总结：
+
+  文档名称：{{ filename }}
+
+  内容片段：
+  {{ content }}
+
+  总结（{{ max_words }}字）：
+
@@ -14,7 +14,9 @@ dependencies = [
     "pyyaml>=6.0.2",
     "redis>=5.0.0",
     "fastmcp==2.12.0",
-    "langchain>=0.3.26"
+    "langchain>=0.3.26",
+    "scikit-learn>=1.0.0",
+    "numpy>=1.24.0"
 ]
 
 [project.optional-dependencies]

@@ -236,6 +236,13 @@ async def get_agent_info_impl(agent_id: int, tenant_id: str):
     else:
         agent_info["model_name"] = None
 
+    # Get business logic model display name from model_id
+    if agent_info.get("business_logic_model_id") is not None:
+        business_logic_model_info = get_model_by_model_id(agent_info["business_logic_model_id"])
+        agent_info["business_logic_model_name"] = business_logic_model_info.get("display_name", None) if business_logic_model_info is not None else None
+    elif "business_logic_model_name" not in agent_info:
+        agent_info["business_logic_model_name"] = None
+
     return agent_info