diff --git a/python/dify_plugin/interfaces/agent/__init__.py b/python/dify_plugin/interfaces/agent/__init__.py index 1b9b0d7..538a55a 100644 --- a/python/dify_plugin/interfaces/agent/__init__.py +++ b/python/dify_plugin/interfaces/agent/__init__.py @@ -9,7 +9,7 @@ from dify_plugin.entities.model import AIModelEntity, ModelPropertyKey from dify_plugin.entities.model.llm import LLMModelConfig, LLMUsage from dify_plugin.entities.model.message import PromptMessage, PromptMessageTool -from dify_plugin.entities.tool import ToolDescription, ToolIdentity, ToolParameter +from dify_plugin.entities.tool import ToolDescription, ToolIdentity, ToolParameter, ToolProviderType from dify_plugin.interfaces.tool import ToolLike, ToolProvider @@ -64,6 +64,8 @@ class ToolEntity(BaseModel): description: Optional[ToolDescription] = None output_schema: Optional[dict] = None has_runtime_parameters: bool = Field(default=False, description="Whether the tool has runtime parameters") + # provider type + provider_type: ToolProviderType = ToolProviderType.BUILT_IN # runtime parameters runtime_parameters: Mapping[str, Any] = {} diff --git a/python/examples/agent/.env.example b/python/examples/agent/.env.example new file mode 100644 index 0000000..2fa2ad9 --- /dev/null +++ b/python/examples/agent/.env.example @@ -0,0 +1,4 @@ +INSTALL_METHOD=remote +REMOTE_INSTALL_HOST=debug-plugin.dify.dev +REMOTE_INSTALL_PORT=5003 +REMOTE_INSTALL_KEY=ae1aa1c9-0af4-43db-b6d4-4fa9e6bfb646 diff --git a/python/examples/agent/README.md b/python/examples/agent/README.md new file mode 100644 index 0000000..4e9b491 --- /dev/null +++ b/python/examples/agent/README.md @@ -0,0 +1,24 @@ +# Overview +The Agent node in Dify Chatflow/Workflow lets LLMs autonomously use tools. This plugin features two official Dify Agent reasoning strategies, enabling LLMs to dynamically select and run tools during runtime for multi-step problem-solving. + +## Strategies + +### 1. Function Calling +Function Calling maps user commands to specific functions or tools. The LLM identifies the user's intent, decides which function to call, and extracts the required parameters. It is a straightforward mechanism for invoking external capabilities. + +![](./_assets/function_calling.png) + +#### Pros: +- **Precise:** Directly calls the right tool for defined tasks, avoiding complex reasoning. +- **Easy External Integration:** Integrates external APIs and tools as callable functions. +- **Structured Output:** Provides structured function call information for easy processing. + +### 2. ReAct (Reason + Act) +ReAct alternates between the LLM reasoning about the situation and taking actions. The LLM analyzes the current state and goal, selects and uses a tool, and then uses the tool's output for the next thought and action. This cycle repeats until the problem is resolved. + +![](./_assets/react.png) + +#### Pros: +- **Leverages External Information:** Effectively uses external tools to gather information for tasks the model cannot handle alone. +- **Explainable Reasoning:** Interwoven reasoning and action steps allow some tracking of the Agent's process. +- **Wide Applicability:** Suitable for tasks requiring external knowledge or specific actions, such as Q&A, information retrieval, and task execution. \ No newline at end of file diff --git a/python/examples/agent/_assets/function_calling.png b/python/examples/agent/_assets/function_calling.png new file mode 100644 index 0000000..f0ce6b0 Binary files /dev/null and b/python/examples/agent/_assets/function_calling.png differ diff --git a/python/examples/agent/_assets/react.png b/python/examples/agent/_assets/react.png new file mode 100644 index 0000000..271d6b8 Binary files /dev/null and b/python/examples/agent/_assets/react.png differ diff --git a/python/examples/agent/manifest.yaml b/python/examples/agent/manifest.yaml index 9fcd6cf..36e5f52 100644 --- a/python/examples/agent/manifest.yaml +++ b/python/examples/agent/manifest.yaml @@ -1,9 +1,10 @@ -version: 0.0.2 +version: 0.0.3 type: plugin author: "langgenius" name: "agent" label: - en_US: "Agent" + en_US: "Dify Agent Strategies" + zh_Hans: "Dify Agent 策略" created_at: "2024-07-12T08:03:44.658609186Z" icon: icon.svg description: diff --git a/python/examples/agent/output_parser/cot_output_parser.py b/python/examples/agent/output_parser/cot_output_parser.py new file mode 100644 index 0000000..82e32d8 --- /dev/null +++ b/python/examples/agent/output_parser/cot_output_parser.py @@ -0,0 +1,212 @@ +import json +import re +from collections.abc import Generator +from typing import Union + +from dify_plugin.entities.model.llm import LLMResultChunk +from dify_plugin.interfaces.agent import AgentScratchpadUnit + + +class CotAgentOutputParser: + @classmethod + def handle_react_stream_output( + cls, llm_response: Generator[LLMResultChunk, None, None], usage_dict: dict + ) -> Generator[Union[str, AgentScratchpadUnit.Action], None, None]: + def parse_action(json_str): + try: + action = json.loads(json_str, strict=False) + action_name = None + action_input = None + + # cohere always returns a list + if isinstance(action, list) and len(action) == 1: + action = action[0] + + for key, value in action.items(): + if "input" in key.lower(): + action_input = value + else: + action_name = value + + if action_name is not None and action_input is not None: + return AgentScratchpadUnit.Action( + action_name=action_name, + action_input=action_input, + ) + else: + return json_str or "" + except: + return json_str or "" + + def extra_json_from_code_block( + code_block, + ) -> Generator[Union[str, AgentScratchpadUnit.Action], None, None]: + code_blocks = re.findall(r"```(.*?)```", code_block, re.DOTALL) + if not code_blocks: + return + for block in code_blocks: + json_text = re.sub( + r"^[a-zA-Z]+\n", "", block.strip(), flags=re.MULTILINE + ) + yield parse_action(json_text) + + code_block_cache = "" + code_block_delimiter_count = 0 + in_code_block = False + json_cache = "" + json_quote_count = 0 + in_json = False + got_json = False + + action_cache = "" + action_str = "action:" + action_idx = 0 + + thought_cache = "" + thought_str = "thought:" + thought_idx = 0 + + last_character = "" + + for response in llm_response: + if response.delta.usage: + usage_dict["usage"] = response.delta.usage + response_content = response.delta.message.content + if not isinstance(response_content, str): + continue + + # stream + index = 0 + while index < len(response_content): + steps = 1 + delta = response_content[index : index + steps] + yield_delta = False + + if delta == "`": + last_character = delta + code_block_cache += delta + code_block_delimiter_count += 1 + else: + if not in_code_block: + if code_block_delimiter_count > 0: + last_character = delta + yield code_block_cache + code_block_cache = "" + else: + last_character = delta + code_block_cache += delta + code_block_delimiter_count = 0 + + if not in_code_block and not in_json: + if delta.lower() == action_str[action_idx] and action_idx == 0: + if last_character not in {"\n", " ", ""}: + yield_delta = True + else: + last_character = delta + action_cache += delta + action_idx += 1 + if action_idx == len(action_str): + action_cache = "" + action_idx = 0 + index += steps + continue + elif delta.lower() == action_str[action_idx] and action_idx > 0: + last_character = delta + action_cache += delta + action_idx += 1 + if action_idx == len(action_str): + action_cache = "" + action_idx = 0 + index += steps + continue + else: + if action_cache: + last_character = delta + yield action_cache + action_cache = "" + action_idx = 0 + + if delta.lower() == thought_str[thought_idx] and thought_idx == 0: + if last_character not in {"\n", " ", ""}: + yield_delta = True + else: + last_character = delta + thought_cache += delta + thought_idx += 1 + if thought_idx == len(thought_str): + thought_cache = "" + thought_idx = 0 + index += steps + continue + elif delta.lower() == thought_str[thought_idx] and thought_idx > 0: + last_character = delta + thought_cache += delta + thought_idx += 1 + if thought_idx == len(thought_str): + thought_cache = "" + thought_idx = 0 + index += steps + continue + else: + if thought_cache: + last_character = delta + yield thought_cache + thought_cache = "" + thought_idx = 0 + + if yield_delta: + index += steps + last_character = delta + yield delta + continue + + if code_block_delimiter_count == 3: + if in_code_block: + last_character = delta + yield from extra_json_from_code_block(code_block_cache) + code_block_cache = "" + + in_code_block = not in_code_block + code_block_delimiter_count = 0 + + if not in_code_block: + # handle single json + if delta == "{": + json_quote_count += 1 + in_json = True + last_character = delta + json_cache += delta + elif delta == "}": + last_character = delta + json_cache += delta + if json_quote_count > 0: + json_quote_count -= 1 + if json_quote_count == 0: + in_json = False + got_json = True + index += steps + continue + else: + if in_json: + last_character = delta + json_cache += delta + + if got_json: + got_json = False + last_character = delta + yield parse_action(json_cache) + json_cache = "" + json_quote_count = 0 + in_json = False + + if not in_code_block and not in_json: + last_character = delta + yield delta.replace("`", "") + + index += steps + + if code_block_cache: + yield code_block_cache + + if json_cache: + yield parse_action(json_cache) diff --git a/python/examples/agent/prompt/template.py b/python/examples/agent/prompt/template.py new file mode 100644 index 0000000..ef64fd2 --- /dev/null +++ b/python/examples/agent/prompt/template.py @@ -0,0 +1,106 @@ +ENGLISH_REACT_COMPLETION_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible. + +{{instruction}} + +You have access to the following tools: + +{{tools}} + +Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input). +Valid "action" values: "Final Answer" or {{tool_names}} + +Provide only ONE action per $JSON_BLOB, as shown: + +``` +{ + "action": $TOOL_NAME, + "action_input": $ACTION_INPUT +} +``` + +Follow this format: + +Question: input question to answer +Thought: consider previous and subsequent steps +Action: +``` +$JSON_BLOB +``` +Observation: action result +... (repeat Thought/Action/Observation N times) +Thought: I know what to respond +Action: +``` +{ + "action": "Final Answer", + "action_input": "Final response to human" +} +``` + +Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:. +{{historic_messages}} +Question: {{query}} +{{agent_scratchpad}} +Thought:""" # noqa: E501 + + +ENGLISH_REACT_COMPLETION_AGENT_SCRATCHPAD_TEMPLATES = """Observation: {{observation}} +Thought:""" + +ENGLISH_REACT_CHAT_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible. + +{{instruction}} + +You have access to the following tools: + +{{tools}} + +Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input). +Valid "action" values: "Final Answer" or {{tool_names}} + +Provide only ONE action per $JSON_BLOB, as shown: + +``` +{ + "action": $TOOL_NAME, + "action_input": $ACTION_INPUT +} +``` + +Follow this format: + +Question: input question to answer +Thought: consider previous and subsequent steps +Action: +``` +$JSON_BLOB +``` +Observation: action result +... (repeat Thought/Action/Observation N times) +Thought: I know what to respond +Action: +``` +{ + "action": "Final Answer", + "action_input": "Final response to human" +} +``` + +Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:. +""" # noqa: E501 + + +ENGLISH_REACT_CHAT_AGENT_SCRATCHPAD_TEMPLATES = "" + +REACT_PROMPT_TEMPLATES = { + "english": { + "chat": { + "prompt": ENGLISH_REACT_CHAT_PROMPT_TEMPLATES, + "agent_scratchpad": ENGLISH_REACT_CHAT_AGENT_SCRATCHPAD_TEMPLATES, + }, + "completion": { + "prompt": ENGLISH_REACT_COMPLETION_PROMPT_TEMPLATES, + "agent_scratchpad": ENGLISH_REACT_COMPLETION_AGENT_SCRATCHPAD_TEMPLATES, + }, + } +} diff --git a/python/examples/agent/provider/agent.yaml b/python/examples/agent/provider/agent.yaml index fb560e7..7be6dbb 100644 --- a/python/examples/agent/provider/agent.yaml +++ b/python/examples/agent/provider/agent.yaml @@ -12,6 +12,7 @@ identity: icon: icon.svg strategies: - strategies/function_calling.yaml + - strategies/ReAct.yaml extra: python: source: provider/agent.py diff --git a/python/examples/agent/requirements.txt b/python/examples/agent/requirements.txt index 32e4ff7..d444270 100644 --- a/python/examples/agent/requirements.txt +++ b/python/examples/agent/requirements.txt @@ -1 +1 @@ -dify_plugin~=0.0.1b50 +dify_plugin~=0.0.1b60 diff --git a/python/examples/agent/strategies/ReAct.py b/python/examples/agent/strategies/ReAct.py new file mode 100644 index 0000000..d51baf2 --- /dev/null +++ b/python/examples/agent/strategies/ReAct.py @@ -0,0 +1,537 @@ +import contextlib +import json +import time +from collections.abc import Generator, Mapping +from typing import Any, Optional, cast + +from output_parser.cot_output_parser import CotAgentOutputParser +from prompt.template import REACT_PROMPT_TEMPLATES +from pydantic import BaseModel, Field + +from dify_plugin.entities.agent import AgentInvokeMessage +from dify_plugin.entities.model.llm import LLMModelConfig, LLMUsage +from dify_plugin.entities.model.message import ( + AssistantPromptMessage, + PromptMessage, + SystemPromptMessage, + ToolPromptMessage, + UserPromptMessage, +) +from dify_plugin.entities.tool import ToolInvokeMessage, ToolProviderType +from dify_plugin.interfaces.agent import ( + AgentModelConfig, + AgentScratchpadUnit, + AgentStrategy, + ToolEntity, +) + +ignore_observation_providers = ["wenxin"] + + +class ReActParams(BaseModel): + query: str + instruction: str | None + model: AgentModelConfig + tools: list[ToolEntity] | None + inputs: dict[str, Any] = {} + maximum_iterations: int = 3 + + +class AgentPromptEntity(BaseModel): + """ + Agent Prompt Entity. + """ + + first_prompt: str + next_iteration: str + + +class ToolInvokeMeta(BaseModel): + """ + Tool invoke meta + """ + + time_cost: float = Field(..., description="The time cost of the tool invoke") + error: Optional[str] = None + tool_config: Optional[dict] = None + + @classmethod + def empty(cls) -> "ToolInvokeMeta": + """ + Get an empty instance of ToolInvokeMeta + """ + return cls(time_cost=0.0, error=None, tool_config={}) + + @classmethod + def error_instance(cls, error: str) -> "ToolInvokeMeta": + """ + Get an instance of ToolInvokeMeta with error + """ + return cls(time_cost=0.0, error=error, tool_config={}) + + def to_dict(self) -> dict: + return { + "time_cost": self.time_cost, + "error": self.error, + "tool_config": self.tool_config, + } + + +class ReActAgentStrategy(AgentStrategy): + def _invoke(self, parameters: dict[str, Any]) -> Generator[AgentInvokeMessage]: + react_params = ReActParams(**parameters) + query = react_params.query + model = react_params.model + agent_scratchpad = [] + history_prompt_messages: list[PromptMessage] = [] + current_session_messages = [] + self._organize_historic_prompt_messages( + history_prompt_messages, current_session_messages=current_session_messages + ) + tools = react_params.tools + tool_instances = {tool.identity.name: tool for tool in tools} if tools else {} + react_params.model.completion_params = react_params.model.completion_params or {} + # check model mode + stop = react_params.model.completion_params.get("stop", []) if react_params.model.completion_params else [] + + if "Observation" not in stop and model.provider not in ignore_observation_providers: + stop.append("Observation") + # init instruction + inputs = react_params.inputs + instruction = "" + self._instruction = self._fill_in_inputs_from_external_data_tools(instruction, inputs) + + iteration_step = 1 + max_iteration_steps = react_params.maximum_iterations + 1 + + # convert tools into ModelRuntime Tool format + prompt_messages_tools = self._init_prompt_tools(tools) + self._prompt_messages_tools = prompt_messages_tools + + run_agent_state = True + llm_usage: dict[str, Optional[LLMUsage]] = {"usage": None} + final_answer = "" + prompt_messages = [] + while run_agent_state and iteration_step <= max_iteration_steps: + # continue to run until there is not any tool call + run_agent_state = False + round_started_at = time.perf_counter() + round_log = self.create_log_message( + label=f"ROUND {iteration_step}", + data={}, + metadata={ + "started_at": round_started_at, + }, + status=ToolInvokeMessage.LogMessage.LogStatus.START, + ) + yield round_log + if iteration_step == max_iteration_steps: + # the last iteration, remove all tools + self._prompt_messages_tools = [] + + message_file_ids: list[str] = [] + + # recalc llm max tokens + prompt_messages = self._organize_prompt_messages(agent_scratchpad, query) + if model.completion_params: + self.recalc_llm_max_tokens(model.entity, prompt_messages, model.completion_params) + # invoke model + chunks = self.session.model.llm.invoke( + model_config=LLMModelConfig(**model.model_dump(mode="json")), + prompt_messages=prompt_messages, + stream=True, + stop=stop, + ) + + usage_dict = {} + react_chunks = CotAgentOutputParser.handle_react_stream_output(chunks, usage_dict) + scratchpad = AgentScratchpadUnit( + agent_response="", + thought="", + action_str="", + observation="", + action=None, + ) + + model_started_at = time.perf_counter() + model_log = self.create_log_message( + label=f"{model.model} Thought", + data={}, + metadata={"start_at": model_started_at, "provider": model.provider}, + parent=round_log, + status=ToolInvokeMessage.LogMessage.LogStatus.START, + ) + yield model_log + + for chunk in react_chunks: + if isinstance(chunk, AgentScratchpadUnit.Action): + action = chunk + # detect action + assert scratchpad.agent_response is not None + scratchpad.agent_response += json.dumps(chunk.model_dump()) + + scratchpad.action_str = json.dumps(chunk.model_dump()) + scratchpad.action = action + else: + scratchpad.agent_response = scratchpad.agent_response or "" + scratchpad.thought = scratchpad.thought or "" + scratchpad.agent_response += chunk + scratchpad.thought += chunk + if scratchpad.is_final(): + yield self.create_text_message(chunk) + + scratchpad.thought = ( + scratchpad.thought.strip() if scratchpad.thought else "I am thinking about how to help you" + ) + agent_scratchpad.append(scratchpad) + + # get llm usage + if "usage" in usage_dict: + if usage_dict["usage"] is not None: + self.increase_usage(llm_usage, usage_dict["usage"]) + else: + usage_dict["usage"] = LLMUsage.empty_usage() + + if not scratchpad.is_final(): + pass + yield self.finish_log_message( + log=model_log, + data={ + "output": scratchpad.agent_response, + }, + metadata={ + "started_at": model_started_at, + "finished_at": time.perf_counter(), + "elapsed_time": time.perf_counter() - model_started_at, + "provider": model.provider, + "total_price": usage_dict["usage"].total_price if usage_dict["usage"] else 0, + "currency": usage_dict["usage"].currency if usage_dict["usage"] else "", + "total_tokens": usage_dict["usage"].total_tokens if usage_dict["usage"] else 0, + }, + ) + if not scratchpad.action: + final_answer = "" + else: + if scratchpad.action.action_name.lower() == "final answer": + # action is final answer, return final answer directly + try: + if isinstance(scratchpad.action.action_input, dict): + final_answer = json.dumps(scratchpad.action.action_input) + elif isinstance(scratchpad.action.action_input, str): + final_answer = scratchpad.action.action_input + else: + final_answer = f"{scratchpad.action.action_input}" + except json.JSONDecodeError: + final_answer = f"{scratchpad.action.action_input}" + else: + run_agent_state = True + # action is tool call, invoke tool + tool_call_started_at = time.perf_counter() + tool_name = scratchpad.action.action_name + tool_call_log = self.create_log_message( + label=f"CALL {tool_name}", + data={}, + metadata={ + "started_at": time.perf_counter(), + "provider": tool_instances[tool_name].identity.provider + if tool_instances.get(tool_name) + else "", + }, + parent=round_log, + status=ToolInvokeMessage.LogMessage.LogStatus.START, + ) + yield tool_call_log + tool_invoke_response, tool_invoke_meta = self._handle_invoke_action( + action=scratchpad.action, + tool_instances=tool_instances, + message_file_ids=message_file_ids, + ) + scratchpad.observation = tool_invoke_response + scratchpad.agent_response = tool_invoke_response + yield self.finish_log_message( + log=tool_call_log, + data={ + "output": tool_invoke_response, + "meta": tool_invoke_meta.to_dict(), + }, + metadata={ + "started_at": tool_call_started_at, + "provider": tool_instances[tool_name].identity.provider + if tool_instances.get(tool_name) + else "", + "finished_at": time.perf_counter(), + "elapsed_time": time.perf_counter() - tool_call_started_at, + }, + ) + + # update prompt tool message + for prompt_tool in self._prompt_messages_tools: + self.update_prompt_message_tool(tool_instances[prompt_tool.name], prompt_tool) + yield self.finish_log_message( + log=round_log, + data={ + "output": { + "llm_response": scratchpad.agent_response, + }, + }, + metadata={ + "started_at": round_started_at, + "finished_at": time.perf_counter(), + "elapsed_time": time.perf_counter() - round_started_at, + "total_price": usage_dict["usage"].total_price if usage_dict["usage"] else 0, + "currency": usage_dict["usage"].currency if usage_dict["usage"] else "", + "total_tokens": usage_dict["usage"].total_tokens if usage_dict["usage"] else 0, + }, + ) + iteration_step += 1 + + yield self.create_text_message(final_answer) + yield self.create_json_message( + { + "execution_metadata": { + "total_price": llm_usage["usage"].total_price if llm_usage["usage"] is not None else 0, + "currency": llm_usage["usage"].currency if llm_usage["usage"] is not None else "", + "total_tokens": llm_usage["usage"].total_tokens if llm_usage["usage"] is not None else 0, + } + } + ) + + def _organize_system_prompt(self) -> SystemPromptMessage: + """ + Organize system prompt + """ + + prompt_entity = AgentPromptEntity( + first_prompt=REACT_PROMPT_TEMPLATES["english"]["chat"]["prompt"], + next_iteration=REACT_PROMPT_TEMPLATES["english"]["chat"]["agent_scratchpad"], + ) + if not prompt_entity: + raise ValueError("Agent prompt configuration is not set") + first_prompt = prompt_entity.first_prompt + + system_prompt = ( + first_prompt.replace("{{instruction}}", self._instruction) + .replace( + "{{tools}}", + json.dumps([tool.model_dump(mode="json") for tool in self._prompt_messages_tools]), + ) + .replace( + "{{tool_names}}", + ", ".join([tool.name for tool in self._prompt_messages_tools]), + ) + ) + + return SystemPromptMessage(content=system_prompt) + + def _organize_user_query(self, query, prompt_messages: list[PromptMessage]) -> list[PromptMessage]: + """ + Organize user query + """ + prompt_messages.append(UserPromptMessage(content=query)) + + return prompt_messages + + def _organize_prompt_messages(self, agent_scratchpad: list, query: str) -> list[PromptMessage]: + """ + Organize + """ + # organize system prompt + system_message = self._organize_system_prompt() + + # organize current assistant messages + agent_scratchpad = agent_scratchpad + if not agent_scratchpad: + assistant_messages = [] + else: + assistant_message = AssistantPromptMessage(content="") + assistant_message.content = "" # FIXME: type check tell mypy that assistant_message.content is str + for unit in agent_scratchpad: + if unit.is_final(): + assert isinstance(assistant_message.content, str) + assistant_message.content += f"Final Answer: {unit.agent_response}" + else: + assert isinstance(assistant_message.content, str) + assistant_message.content += f"Thought: {unit.thought}\n\n" + if unit.action_str: + assistant_message.content += f"Action: {unit.action_str}\n\n" + if unit.observation: + assistant_message.content += f"Observation: {unit.observation}\n\n" + + assistant_messages = [assistant_message] + + # query messages + query_messages = self._organize_user_query(query, []) + + if assistant_messages: + # organize historic prompt messages + historic_messages = self._organize_historic_prompt_messages( + [ + system_message, + *query_messages, + *assistant_messages, + UserPromptMessage(content="continue"), + ] + ) + messages = [ + system_message, + *historic_messages, + *query_messages, + *assistant_messages, + UserPromptMessage(content="continue"), + ] + else: + # organize historic prompt messages + historic_messages = self._organize_historic_prompt_messages([system_message, *query_messages]) + messages = [system_message, *historic_messages, *query_messages] + + # join all messages + return messages + + def _handle_invoke_action( + self, + action: AgentScratchpadUnit.Action, + tool_instances: Mapping[str, ToolEntity], + message_file_ids: list[str], + ) -> tuple[str, ToolInvokeMeta]: + """ + handle invoke action + :param action: action + :param tool_instances: tool instances + :param message_file_ids: message file ids + :param trace_manager: trace manager + :return: observation, meta + """ + # action is tool call, invoke tool + tool_call_name = action.action_name + tool_call_args = action.action_input + tool_instance = tool_instances.get(tool_call_name) + + if not tool_instance: + answer = f"there is not a tool named {tool_call_name}" + return answer, ToolInvokeMeta.error_instance(answer) + + if isinstance(tool_call_args, str): + with contextlib.suppress(json.JSONDecodeError): + tool_call_args = json.loads(tool_call_args) + tool_call_args = cast(dict, tool_call_args) + tool_invoke_responses = self.session.tool.invoke( + provider_type=ToolProviderType(tool_instance.provider_type), + provider=tool_instance.identity.provider, + tool_name=tool_instance.identity.name, + parameters={**tool_instance.runtime_parameters, **tool_call_args}, + ) + result = "" + for response in tool_invoke_responses: + if response.type == ToolInvokeMessage.MessageType.TEXT: + result += cast(ToolInvokeMessage.TextMessage, response.message).text + elif response.type == ToolInvokeMessage.MessageType.LINK: + result += ( + f"result link: {cast(ToolInvokeMessage.TextMessage, response.message).text}." + + " please tell user to check it." + ) + elif response.type in { + ToolInvokeMessage.MessageType.IMAGE_LINK, + ToolInvokeMessage.MessageType.IMAGE, + }: + result += ( + "image has been created and sent to user already, " + + "you do not need to create it, just tell the user to check it now." + ) + elif response.type == ToolInvokeMessage.MessageType.JSON: + text = json.dumps( + cast(ToolInvokeMessage.JsonMessage, response.message).json_object, + ensure_ascii=False, + ) + result += f"tool response: {text}." + else: + result += f"tool response: {response.message!r}." + tool_invoke_meta = ToolInvokeMeta.error_instance("") + + return result, tool_invoke_meta + + def _convert_dict_to_action(self, action: dict) -> AgentScratchpadUnit.Action: + """ + convert dict to action + """ + return AgentScratchpadUnit.Action(action_name=action["action"], action_input=action["action_input"]) + + def _fill_in_inputs_from_external_data_tools(self, instruction: str, inputs: Mapping[str, Any]) -> str: + """ + fill in inputs from external data tools + """ + for key, value in inputs.items(): + try: + instruction = instruction.replace(f"{{{{{key}}}}}", str(value)) + except Exception: + continue + + return instruction + + def _format_assistant_message(self, agent_scratchpad: list[AgentScratchpadUnit]) -> str: + """ + format assistant message + """ + message = "" + for scratchpad in agent_scratchpad: + if scratchpad.is_final(): + message += f"Final Answer: {scratchpad.agent_response}" + else: + message += f"Thought: {scratchpad.thought}\n\n" + if scratchpad.action_str: + message += f"Action: {scratchpad.action_str}\n\n" + if scratchpad.observation: + message += f"Observation: {scratchpad.observation}\n\n" + + return message + + def _organize_historic_prompt_messages( + self, + history_prompt_messages: list[PromptMessage], + current_session_messages: list[PromptMessage] | None = None, + ) -> list[PromptMessage]: + """ + organize historic prompt messages + """ + result: list[PromptMessage] = [] + scratchpads: list[AgentScratchpadUnit] = [] + current_scratchpad: AgentScratchpadUnit | None = None + + for message in history_prompt_messages: + if isinstance(message, AssistantPromptMessage): + if not current_scratchpad: + assert isinstance(message.content, str) + current_scratchpad = AgentScratchpadUnit( + agent_response=message.content, + thought=message.content or "I am thinking about how to help you", + action_str="", + action=None, + observation=None, + ) + scratchpads.append(current_scratchpad) + if message.tool_calls: + try: + current_scratchpad.action = AgentScratchpadUnit.Action( + action_name=message.tool_calls[0].function.name, + action_input=json.loads(message.tool_calls[0].function.arguments), + ) + current_scratchpad.action_str = json.dumps(current_scratchpad.action.to_dict()) + except Exception: + pass + elif isinstance(message, ToolPromptMessage): + if current_scratchpad: + assert isinstance(message.content, str) + current_scratchpad.observation = message.content + else: + raise NotImplementedError("expected str type") + elif isinstance(message, UserPromptMessage): + if scratchpads: + result.append(AssistantPromptMessage(content=self._format_assistant_message(scratchpads))) + scratchpads = [] + current_scratchpad = None + + result.append(message) + + if scratchpads: + result.append(AssistantPromptMessage(content=self._format_assistant_message(scratchpads))) + + return current_session_messages or [] diff --git a/python/examples/agent/strategies/ReAct.yaml b/python/examples/agent/strategies/ReAct.yaml new file mode 100644 index 0000000..ad6f230 --- /dev/null +++ b/python/examples/agent/strategies/ReAct.yaml @@ -0,0 +1,68 @@ +identity: + name: ReAct + author: Dify + label: + en_US: ReAct + zh_Hans: ReAct + pt_BR: ReAct +description: + en_US: ReAct is a basic strategy for agent, model will use the tools provided to perform the task. + zh_Hans: ReAct 是一个基本的 Agent 策略,模型将使用提供的工具来执行任务。 + pt_BR: ReAct is a basic strategy for agent, model will use the tools provided to perform the task. +parameters: + - name: model + type: model-selector + scope: tool-call&llm + required: true + label: + en_US: Model + zh_Hans: 模型 + pt_BR: Model + - name: tools + type: array[tools] + required: true + label: + en_US: Tools list + zh_Hans: 工具列表 + pt_BR: Tools list + - name: instruction + type: string + required: true + label: + en_US: Instruction + zh_Hans: 指令 + pt_BR: Instruction + auto_generate: + type: prompt_instruction + template: + enabled: true + - name: query + type: string + required: true + label: + en_US: Query + zh_Hans: 查询 + pt_BR: Query + - name: maximum_iterations + type: number + required: true + label: + en_US: Maxium Iterations + zh_Hans: 最大迭代次数 + pt_BR: Maxium Iterations + default: 3 + min: 1 + max: 30 + +output_schema: + type: object + properties: + text: + type: string + json: + type: string + file: + type: string +extra: + python: + source: strategies/ReAct.py diff --git a/python/examples/agent/strategies/function_calling.py b/python/examples/agent/strategies/function_calling.py index f33e954..1db6f87 100644 --- a/python/examples/agent/strategies/function_calling.py +++ b/python/examples/agent/strategies/function_calling.py @@ -72,10 +72,7 @@ def _invoke(self, parameters: dict[str, Any]) -> Generator[AgentInvokeMessage]: query = fc_params.query self.query = query instruction = fc_params.instruction - init_prompt_messages = [ - SystemPromptMessage(content=instruction), - UserPromptMessage(content=query), - ] + init_prompt_messages = [SystemPromptMessage(content=instruction)] tools = fc_params.tools tool_instances = {tool.identity.name: tool for tool in tools} if tools else {} model = fc_params.model @@ -164,8 +161,12 @@ def _invoke(self, parameters: dict[str, Any]) -> Generator[AgentInvokeMessage]: if isinstance(chunk.delta.message.content, list): for content in chunk.delta.message.content: response += content.data + if not function_call_state or iteration_step == max_iteration_steps: + yield self.create_text_message(content.data) else: response += str(chunk.delta.message.content) + if not function_call_state or iteration_step == max_iteration_steps: + yield self.create_text_message(str(chunk.delta.message.content)) if chunk.delta.usage: self.increase_usage(llm_usage, chunk.delta.usage) @@ -261,7 +262,7 @@ def _invoke(self, parameters: dict[str, Any]) -> Generator[AgentInvokeMessage]: else: # invoke tool tool_invoke_responses = self.session.tool.invoke( - provider_type=ToolProviderType.BUILT_IN, + provider_type=ToolProviderType(tool_instance.provider_type), provider=tool_instance.identity.provider, tool_name=tool_instance.identity.name, parameters={**tool_instance.runtime_parameters, **tool_call_args}, @@ -342,7 +343,6 @@ def _invoke(self, parameters: dict[str, Any]) -> Generator[AgentInvokeMessage]: ) iteration_step += 1 - yield self.create_text_message(final_answer) yield self.create_json_message( { "execution_metadata": { diff --git a/python/examples/agent/strategies/function_calling.yaml b/python/examples/agent/strategies/function_calling.yaml index bb70f27..3374c09 100644 --- a/python/examples/agent/strategies/function_calling.yaml +++ b/python/examples/agent/strategies/function_calling.yaml @@ -51,6 +51,17 @@ parameters: zh_Hans: 最大迭代次数 pt_BR: Maxium Iterations default: 3 + max: 30 + min: 1 +output_schema: + type: object + properties: + text: + type: string + json: + type: string + llm: + type: string extra: python: source: strategies/function_calling.py