Merge pull request #19 from langgenius/feat/supoort-workflow-tools

feat: support workflow tools
langgenius · Jan 22, 2025 · 58f01ea · 58f01ea
2 parents 857dfbc + 9db1efd
commit 58f01ea
Show file tree

Hide file tree

Showing 14 changed files with 976 additions and 10 deletions.
diff --git a/python/dify_plugin/interfaces/agent/__init__.py b/python/dify_plugin/interfaces/agent/__init__.py
@@ -9,7 +9,7 @@
 from dify_plugin.entities.model import AIModelEntity, ModelPropertyKey
 from dify_plugin.entities.model.llm import LLMModelConfig, LLMUsage
 from dify_plugin.entities.model.message import PromptMessage, PromptMessageTool
-from dify_plugin.entities.tool import ToolDescription, ToolIdentity, ToolParameter
+from dify_plugin.entities.tool import ToolDescription, ToolIdentity, ToolParameter, ToolProviderType
 from dify_plugin.interfaces.tool import ToolLike, ToolProvider
 
 
@@ -64,6 +64,8 @@ class ToolEntity(BaseModel):
     description: Optional[ToolDescription] = None
     output_schema: Optional[dict] = None
     has_runtime_parameters: bool = Field(default=False, description="Whether the tool has runtime parameters")
+    # provider type
+    provider_type: ToolProviderType = ToolProviderType.BUILT_IN
 
     # runtime parameters
     runtime_parameters: Mapping[str, Any] = {}

diff --git a/python/examples/agent/.env.example b/python/examples/agent/.env.example
@@ -0,0 +1,4 @@
+INSTALL_METHOD=remote
+REMOTE_INSTALL_HOST=debug-plugin.dify.dev
+REMOTE_INSTALL_PORT=5003
+REMOTE_INSTALL_KEY=ae1aa1c9-0af4-43db-b6d4-4fa9e6bfb646
diff --git a/python/examples/agent/README.md b/python/examples/agent/README.md
@@ -0,0 +1,24 @@
+# Overview
+The Agent node in Dify Chatflow/Workflow lets LLMs autonomously use tools. This plugin features two official Dify Agent reasoning strategies, enabling LLMs to dynamically select and run tools during runtime for multi-step problem-solving.
+
+## Strategies
+
+### 1. Function Calling
+Function Calling maps user commands to specific functions or tools. The LLM identifies the user's intent, decides which function to call, and extracts the required parameters. It is a straightforward mechanism for invoking external capabilities.
+
+![](./_assets/function_calling.png)
+
+#### Pros:
+- **Precise:** Directly calls the right tool for defined tasks, avoiding complex reasoning.
+- **Easy External Integration:** Integrates external APIs and tools as callable functions.
+- **Structured Output:** Provides structured function call information for easy processing.
+
+### 2. ReAct (Reason + Act)
+ReAct alternates between the LLM reasoning about the situation and taking actions. The LLM analyzes the current state and goal, selects and uses a tool, and then uses the tool's output for the next thought and action. This cycle repeats until the problem is resolved.
+
+![](./_assets/react.png)
+
+#### Pros:
+- **Leverages External Information:** Effectively uses external tools to gather information for tasks the model cannot handle alone.
+- **Explainable Reasoning:** Interwoven reasoning and action steps allow some tracking of the Agent's process.
+- **Wide Applicability:** Suitable for tasks requiring external knowledge or specific actions, such as Q&A, information retrieval, and task execution.
diff --git a/python/examples/agent/_assets/function_calling.png b/python/examples/agent/_assets/function_calling.png
diff --git a/python/examples/agent/_assets/react.png b/python/examples/agent/_assets/react.png
diff --git a/python/examples/agent/manifest.yaml b/python/examples/agent/manifest.yaml
@@ -1,9 +1,10 @@
-version: 0.0.2
+version: 0.0.3
 type: plugin
 author: "langgenius"
 name: "agent"
 label:
-  en_US: "Agent"
+  en_US: "Dify Agent Strategies"
+  zh_Hans: "Dify Agent 策略"
 created_at: "2024-07-12T08:03:44.658609186Z"
 icon: icon.svg
 description:

diff --git a/python/examples/agent/output_parser/cot_output_parser.py b/python/examples/agent/output_parser/cot_output_parser.py
@@ -0,0 +1,212 @@
+import json
+import re
+from collections.abc import Generator
+from typing import Union
+
+from dify_plugin.entities.model.llm import LLMResultChunk
+from dify_plugin.interfaces.agent import AgentScratchpadUnit
+
+
+class CotAgentOutputParser:
+    @classmethod
+    def handle_react_stream_output(
+        cls, llm_response: Generator[LLMResultChunk, None, None], usage_dict: dict
+    ) -> Generator[Union[str, AgentScratchpadUnit.Action], None, None]:
+        def parse_action(json_str):
+            try:
+                action = json.loads(json_str, strict=False)
+                action_name = None
+                action_input = None
+
+                # cohere always returns a list
+                if isinstance(action, list) and len(action) == 1:
+                    action = action[0]
+
+                for key, value in action.items():
+                    if "input" in key.lower():
+                        action_input = value
+                    else:
+                        action_name = value
+
+                if action_name is not None and action_input is not None:
+                    return AgentScratchpadUnit.Action(
+                        action_name=action_name,
+                        action_input=action_input,
+                    )
+                else:
+                    return json_str or ""
+            except:
+                return json_str or ""
+
+        def extra_json_from_code_block(
+            code_block,
+        ) -> Generator[Union[str, AgentScratchpadUnit.Action], None, None]:
+            code_blocks = re.findall(r"```(.*?)```", code_block, re.DOTALL)
+            if not code_blocks:
+                return
+            for block in code_blocks:
+                json_text = re.sub(
+                    r"^[a-zA-Z]+\n", "", block.strip(), flags=re.MULTILINE
+                )
+                yield parse_action(json_text)
+
+        code_block_cache = ""
+        code_block_delimiter_count = 0
+        in_code_block = False
+        json_cache = ""
+        json_quote_count = 0
+        in_json = False
+        got_json = False
+
+        action_cache = ""
+        action_str = "action:"
+        action_idx = 0
+
+        thought_cache = ""
+        thought_str = "thought:"
+        thought_idx = 0
+
+        last_character = ""
+
+        for response in llm_response:
+            if response.delta.usage:
+                usage_dict["usage"] = response.delta.usage
+            response_content = response.delta.message.content
+            if not isinstance(response_content, str):
+                continue
+
+            # stream
+            index = 0
+            while index < len(response_content):
+                steps = 1
+                delta = response_content[index : index + steps]
+                yield_delta = False
+
+                if delta == "`":
+                    last_character = delta
+                    code_block_cache += delta
+                    code_block_delimiter_count += 1
+                else:
+                    if not in_code_block:
+                        if code_block_delimiter_count > 0:
+                            last_character = delta
+                            yield code_block_cache
+                        code_block_cache = ""
+                    else:
+                        last_character = delta
+                        code_block_cache += delta
+                    code_block_delimiter_count = 0
+
+                if not in_code_block and not in_json:
+                    if delta.lower() == action_str[action_idx] and action_idx == 0:
+                        if last_character not in {"\n", " ", ""}:
+                            yield_delta = True
+                        else:
+                            last_character = delta
+                            action_cache += delta
+                            action_idx += 1
+                            if action_idx == len(action_str):
+                                action_cache = ""
+                                action_idx = 0
+                            index += steps
+                            continue
+                    elif delta.lower() == action_str[action_idx] and action_idx > 0:
+                        last_character = delta
+                        action_cache += delta
+                        action_idx += 1
+                        if action_idx == len(action_str):
+                            action_cache = ""
+                            action_idx = 0
+                        index += steps
+                        continue
+                    else:
+                        if action_cache:
+                            last_character = delta
+                            yield action_cache
+                            action_cache = ""
+                            action_idx = 0
+
+                    if delta.lower() == thought_str[thought_idx] and thought_idx == 0:
+                        if last_character not in {"\n", " ", ""}:
+                            yield_delta = True
+                        else:
+                            last_character = delta
+                            thought_cache += delta
+                            thought_idx += 1
+                            if thought_idx == len(thought_str):
+                                thought_cache = ""
+                                thought_idx = 0
+                            index += steps
+                            continue
+                    elif delta.lower() == thought_str[thought_idx] and thought_idx > 0:
+                        last_character = delta
+                        thought_cache += delta
+                        thought_idx += 1
+                        if thought_idx == len(thought_str):
+                            thought_cache = ""
+                            thought_idx = 0
+                        index += steps
+                        continue
+                    else:
+                        if thought_cache:
+                            last_character = delta
+                            yield thought_cache
+                            thought_cache = ""
+                            thought_idx = 0
+
+                    if yield_delta:
+                        index += steps
+                        last_character = delta
+                        yield delta
+                        continue
+
+                if code_block_delimiter_count == 3:
+                    if in_code_block:
+                        last_character = delta
+                        yield from extra_json_from_code_block(code_block_cache)
+                        code_block_cache = ""
+
+                    in_code_block = not in_code_block
+                    code_block_delimiter_count = 0
+
+                if not in_code_block:
+                    # handle single json
+                    if delta == "{":
+                        json_quote_count += 1
+                        in_json = True
+                        last_character = delta
+                        json_cache += delta
+                    elif delta == "}":
+                        last_character = delta
+                        json_cache += delta
+                        if json_quote_count > 0:
+                            json_quote_count -= 1
+                            if json_quote_count == 0:
+                                in_json = False
+                                got_json = True
+                                index += steps
+                                continue
+                    else:
+                        if in_json:
+                            last_character = delta
+                            json_cache += delta
+
+                    if got_json:
+                        got_json = False
+                        last_character = delta
+                        yield parse_action(json_cache)
+                        json_cache = ""
+                        json_quote_count = 0
+                        in_json = False
+
+                if not in_code_block and not in_json:
+                    last_character = delta
+                    yield delta.replace("`", "")
+
+                index += steps
+
+        if code_block_cache:
+            yield code_block_cache
+
+        if json_cache:
+            yield parse_action(json_cache)
diff --git a/python/examples/agent/prompt/template.py b/python/examples/agent/prompt/template.py
@@ -0,0 +1,106 @@
+ENGLISH_REACT_COMPLETION_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible. 
+
+{{instruction}}
+
+You have access to the following tools:
+
+{{tools}}
+
+Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
+Valid "action" values: "Final Answer" or {{tool_names}}
+
+Provide only ONE action per $JSON_BLOB, as shown:
+
+```
+{
+  "action": $TOOL_NAME,
+  "action_input": $ACTION_INPUT
+}
+```
+
+Follow this format:
+
+Question: input question to answer
+Thought: consider previous and subsequent steps
+Action:
+```
+$JSON_BLOB
+```
+Observation: action result
+... (repeat Thought/Action/Observation N times)
+Thought: I know what to respond
+Action:
+```
+{
+  "action": "Final Answer",
+  "action_input": "Final response to human"
+}
+```
+
+Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.
+{{historic_messages}}
+Question: {{query}}
+{{agent_scratchpad}}
+Thought:"""  # noqa: E501
+
+
+ENGLISH_REACT_COMPLETION_AGENT_SCRATCHPAD_TEMPLATES = """Observation: {{observation}}
+Thought:"""
+
+ENGLISH_REACT_CHAT_PROMPT_TEMPLATES = """Respond to the human as helpfully and accurately as possible. 
+
+{{instruction}}
+
+You have access to the following tools:
+
+{{tools}}
+
+Use a json blob to specify a tool by providing an action key (tool name) and an action_input key (tool input).
+Valid "action" values: "Final Answer" or {{tool_names}}
+
+Provide only ONE action per $JSON_BLOB, as shown:
+
+```
+{
+  "action": $TOOL_NAME,
+  "action_input": $ACTION_INPUT
+}
+```
+
+Follow this format:
+
+Question: input question to answer
+Thought: consider previous and subsequent steps
+Action:
+```
+$JSON_BLOB
+```
+Observation: action result
+... (repeat Thought/Action/Observation N times)
+Thought: I know what to respond
+Action:
+```
+{
+  "action": "Final Answer",
+  "action_input": "Final response to human"
+}
+```
+
+Begin! Reminder to ALWAYS respond with a valid json blob of a single action. Use tools if necessary. Respond directly if appropriate. Format is Action:```$JSON_BLOB```then Observation:.
+"""  # noqa: E501
+
+
+ENGLISH_REACT_CHAT_AGENT_SCRATCHPAD_TEMPLATES = ""
+
+REACT_PROMPT_TEMPLATES = {
+    "english": {
+        "chat": {
+            "prompt": ENGLISH_REACT_CHAT_PROMPT_TEMPLATES,
+            "agent_scratchpad": ENGLISH_REACT_CHAT_AGENT_SCRATCHPAD_TEMPLATES,
+        },
+        "completion": {
+            "prompt": ENGLISH_REACT_COMPLETION_PROMPT_TEMPLATES,
+            "agent_scratchpad": ENGLISH_REACT_COMPLETION_AGENT_SCRATCHPAD_TEMPLATES,
+        },
+    }
+}
diff --git a/python/examples/agent/provider/agent.yaml b/python/examples/agent/provider/agent.yaml
@@ -12,6 +12,7 @@ identity:
   icon: icon.svg
 strategies:
   - strategies/function_calling.yaml
+  - strategies/ReAct.yaml
 extra:
   python:
     source: provider/agent.py
diff --git a/python/examples/agent/requirements.txt b/python/examples/agent/requirements.txt
@@ -1 +1 @@
-dify_plugin~=0.0.1b50
+dify_plugin~=0.0.1b60