Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 31 additions & 6 deletions .env.example
Original file line number Diff line number Diff line change
Expand Up @@ -46,9 +46,14 @@ MAX_WORKERS=30
# API Keys and External Services
# =============================================================================

# Serper API for web search and Google Scholar
# Get your key from: https://serper.dev/
SERPER_KEY_ID=your_key
# Exa.ai API for semantic web search
# Get your key from: https://exa.ai/
# Exa provides AI-native neural search with:
# - Semantic understanding (not just keyword matching)
# - Built-in query optimization
# - Direct content retrieval
# - Better results for complex research queries
EXA_API_KEY=your_key

# Jina API for web page reading
# Get your key from: https://jina.ai/
Expand All @@ -57,8 +62,8 @@ JINA_API_KEYS=your_key
# Summary model API (OpenAI-compatible) for page summarization
# Get your key from: https://platform.openai.com/
API_KEY=your_key
API_BASE=your_api_base
SUMMARY_MODEL_NAME=your_summary_model_name
API_BASE=https://api.openai.com/v1
SUMMARY_MODEL_NAME=gpt-4o-mini

# Dashscope API for file parsing (PDF, Office, etc.)
# Get your key from: https://dashscope.aliyun.com/
Expand Down Expand Up @@ -95,4 +100,24 @@ IDP_KEY_SECRET=your_idp_key_secret

# These are typically set by distributed training frameworks
# WORLD_SIZE=1
# RANK=0
# RANK=0

# =============================================================================
# MLX Configuration (Apple Silicon Only)
# =============================================================================
# For running on Apple Silicon Macs (M1/M2/M3/M4) using MLX framework
# instead of CUDA/vLLM. Uses mlx-lm for efficient local inference.
#
# Requirements:
# pip install mlx-lm
#
# Recommended models:
# - abalogh/Tongyi-DeepResearch-30B-A3B-4bit (17GB, fits 32GB RAM)
# - Original BF16 model requires 62GB+
#
# Usage:
# bash inference/run_mlx_infer.sh
#
# MLX_MODEL=abalogh/Tongyi-DeepResearch-30B-A3B-4bit
# MLX_HOST=127.0.0.1
# MLX_PORT=8080
218 changes: 218 additions & 0 deletions inference/interactive.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,218 @@
#!/usr/bin/env python3
"""
Interactive CLI for DeepResearch on Apple Silicon (MLX)

Usage:
python interactive.py [--model MODEL_PATH]

Example:
python interactive.py
python interactive.py --model abalogh/Tongyi-DeepResearch-30B-A3B-4bit
"""

import argparse
import json
import os
import sys
import time

# Load environment variables first
from dotenv import load_dotenv
load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))

# Disable tokenizer parallelism warning
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Optional: rich for better formatting
try:
from rich.console import Console
from rich.markdown import Markdown
from rich.panel import Panel
from rich.progress import Progress, SpinnerColumn, TextColumn
RICH_AVAILABLE = True
console = Console()
except ImportError:
RICH_AVAILABLE = False
console = None


def print_header():
"""Print welcome header."""
header = """
╔══════════════════════════════════════════════════════════════╗
║ DeepResearch - Interactive Mode (MLX) ║
║ Apple Silicon Optimized ║
╚══════════════════════════════════════════════════════════════╝
"""
if RICH_AVAILABLE:
console.print(header, style="bold blue")
else:
print(header)


def print_help():
"""Print help information."""
help_text = """
Commands:
/help - Show this help message
/quit - Exit the program (or Ctrl+C)
/clear - Clear conversation history (start fresh)
/status - Show model and memory status

Just type your research question to begin!

Examples:
> What is the current population of Tokyo?
> Who won the 2024 Nobel Prize in Physics?
> Explain the mechanism of CRISPR-Cas9 gene editing
"""
if RICH_AVAILABLE:
console.print(Panel(help_text, title="Help", border_style="green"))
else:
print(help_text)


def format_answer(answer: str):
"""Format the answer for display."""
if RICH_AVAILABLE:
console.print("\n")
console.print(Panel(Markdown(answer), title="[bold green]Answer[/]", border_style="green"))
else:
print("\n" + "=" * 60)
print("ANSWER:")
print("=" * 60)
print(answer)
print("=" * 60)


def main():
parser = argparse.ArgumentParser(description="Interactive DeepResearch CLI")
parser.add_argument("--model", type=str,
default="abalogh/Tongyi-DeepResearch-30B-A3B-4bit",
help="Model path or HuggingFace ID")
parser.add_argument("--temperature", type=float, default=0.7,
help="Sampling temperature")
parser.add_argument("--max_tokens", type=int, default=4096,
help="Max tokens per generation")
parser.add_argument("--max_rounds", type=int, default=15,
help="Max research rounds per question")
args = parser.parse_args()

print_header()

# Set max rounds via environment
os.environ['MAX_LLM_CALL_PER_RUN'] = str(args.max_rounds)

# Import agent after setting environment
print("Loading model (this may take a minute)...")

try:
from run_mlx_react import MLXReactAgent, TOOL_MAP
except ImportError as e:
print(f"Error importing agent: {e}")
print("Make sure you're running from the inference directory.")
return 1

if RICH_AVAILABLE:
with Progress(
SpinnerColumn(),
TextColumn("[progress.description]{task.description}"),
console=console
) as progress:
progress.add_task("Loading MLX model...", total=None)
agent = MLXReactAgent(
model_path=args.model,
temperature=args.temperature,
max_tokens=args.max_tokens
)
else:
agent = MLXReactAgent(
model_path=args.model,
temperature=args.temperature,
max_tokens=args.max_tokens
)

print(f"\nTools available: {list(TOOL_MAP.keys())}")
print(f"Max rounds per question: {args.max_rounds}")
print_help()

while True:
try:
# Get user input
if RICH_AVAILABLE:
query = console.input("\n[bold cyan]Research Query>[/] ").strip()
else:
query = input("\nResearch Query> ").strip()

# Handle commands
if not query:
continue

if query.lower() in ('/quit', '/exit', '/q'):
print("Goodbye!")
break

if query.lower() == '/help':
print_help()
continue

if query.lower() == '/clear':
print("Ready for a new question.")
continue

if query.lower() == '/status':
try:
import mlx.core as mx
# Use new API (mlx >= 0.24) or fall back to deprecated
if hasattr(mx, 'get_active_memory'):
mem_gb = mx.get_active_memory() / (1024**3)
else:
mem_gb = mx.metal.get_active_memory() / (1024**3)
print(f"Model: {args.model}")
print(f"GPU Memory: {mem_gb:.1f} GB")
except Exception:
print(f"Model: {args.model}")
continue

if query.startswith('/'):
print(f"Unknown command: {query}. Type /help for available commands.")
continue

# Run research
print("\nResearching...\n")
start = time.time()

data = {'item': {'question': query, 'answer': ''}}
result = agent.run(data)

elapsed = time.time() - start

# Display result
prediction = result.get('prediction', 'No answer found.')
termination = result.get('termination', 'unknown')
num_rounds = len([m for m in result.get('messages', []) if m.get('role') == 'assistant'])

format_answer(prediction)

if RICH_AVAILABLE:
console.print(f"[dim]Completed in {elapsed:.1f}s | {num_rounds} rounds | Termination: {termination}[/]")
else:
print(f"\nCompleted in {elapsed:.1f}s | {num_rounds} rounds | Termination: {termination}")

except KeyboardInterrupt:
print("\n\nInterrupted. Type /quit to exit or continue with a new question.")
continue
except EOFError:
print("\nGoodbye!")
break
except Exception as e:
print(f"\nError: {e}")
import traceback
traceback.print_exc()
continue

return 0


if __name__ == "__main__":
sys.exit(main())
41 changes: 21 additions & 20 deletions inference/prompt.py
Original file line number Diff line number Diff line change
@@ -1,30 +1,31 @@
SYSTEM_PROMPT = """You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response. When you have gathered sufficient information and are ready to provide the definitive response, you must enclose the entire final answer within <answer></answer> tags.
SYSTEM_PROMPT = """You are a deep research assistant. Your core function is to conduct thorough, multi-source investigations into any topic. You must handle both broad, open-domain inquiries and queries within specialized academic fields. For every request, synthesize information from credible, diverse sources to deliver a comprehensive, accurate, and objective response.

# CRITICAL: Answer Behavior

**You MUST provide a final answer after gathering sufficient information.** Do not continue researching indefinitely.

Guidelines for when to provide your answer:
1. After 2-3 search queries that return relevant results, you likely have enough information
2. If multiple sources agree on key facts, you have sufficient confirmation
3. If a webpage visit fails, use the search snippets you already have
4. A good answer with available information is better than endless searching
5. When uncertain, provide the best answer you can with appropriate caveats

**When ready to answer, use this format:**
<think>Final reasoning about the gathered information</think>
<answer>Your comprehensive answer here</answer>

# Tools

You may call one or more functions to assist with the user query.

You are provided with function signatures within <tools></tools> XML tags:
<tools>
{"type": "function", "function": {"name": "search", "description": "Perform Google web searches then returns a string of the top search results. Accepts multiple queries.", "parameters": {"type": "object", "properties": {"query": {"type": "array", "items": {"type": "string", "description": "The search query."}, "minItems": 1, "description": "The list of search queries."}}, "required": ["query"]}}}
{"type": "function", "function": {"name": "visit", "description": "Visit webpage(s) and return the summary of the content.", "parameters": {"type": "object", "properties": {"url": {"type": "array", "items": {"type": "string"}, "description": "The URL(s) of the webpage(s) to visit. Can be a single URL or an array of URLs."}, "goal": {"type": "string", "description": "The specific information goal for visiting webpage(s)."}}, "required": ["url", "goal"]}}}
{"type": "function", "function": {"name": "PythonInterpreter", "description": "Executes Python code in a sandboxed environment. To use this tool, you must follow this format:
1. The 'arguments' JSON object must be empty: {}.
2. The Python code to be executed must be placed immediately after the JSON block, enclosed within <code> and </code> tags.

IMPORTANT: Any output you want to see MUST be printed to standard output using the print() function.

Example of a correct call:
<tool_call>
{"name": "PythonInterpreter", "arguments": {}}
<code>
import numpy as np
# Your code here
print(f"The result is: {np.mean([1,2,3])}")
</code>
</tool_call>", "parameters": {"type": "object", "properties": {}, "required": []}}}
{"type": "function", "function": {"name": "google_scholar", "description": "Leverage Google Scholar to retrieve relevant information from academic publications. Accepts multiple queries. This tool will also return results from google search", "parameters": {"type": "object", "properties": {"query": {"type": "array", "items": {"type": "string", "description": "The search query."}, "minItems": 1, "description": "The list of search queries for Google Scholar."}}, "required": ["query"]}}}
{"type": "function", "function": {"name": "parse_file", "description": "This is a tool that can be used to parse multiple user uploaded local files such as PDF, DOCX, PPTX, TXT, CSV, XLSX, DOC, ZIP, MP4, MP3.", "parameters": {"type": "object", "properties": {"files": {"type": "array", "items": {"type": "string"}, "description": "The file name of the user uploaded local files to be parsed."}}, "required": ["files"]}}}
{"type": "function", "function": {"name": "search", "description": "Perform web searches and return top results with snippets. Use this first to find relevant sources.", "parameters": {"type": "object", "properties": {"query": {"type": "array", "items": {"type": "string"}, "minItems": 1, "description": "Search queries (1-3 queries recommended)."}}, "required": ["query"]}}}
{"type": "function", "function": {"name": "visit", "description": "Visit webpage(s) to extract detailed content. Only visit if search snippets are insufficient.", "parameters": {"type": "object", "properties": {"url": {"type": "array", "items": {"type": "string"}, "description": "URL(s) to visit."}, "goal": {"type": "string", "description": "What specific information you need from the page."}}, "required": ["url", "goal"]}}}
{"type": "function", "function": {"name": "google_scholar", "description": "Search academic publications. Use for scientific/research questions.", "parameters": {"type": "object", "properties": {"query": {"type": "array", "items": {"type": "string"}, "minItems": 1, "description": "Academic search queries."}}, "required": ["query"]}}}
{"type": "function", "function": {"name": "PythonInterpreter", "description": "Execute Python code for calculations or data processing.", "parameters": {"type": "object", "properties": {}, "required": []}}}
{"type": "function", "function": {"name": "parse_file", "description": "Parse uploaded files (PDF, DOCX, etc.).", "parameters": {"type": "object", "properties": {"files": {"type": "array", "items": {"type": "string"}, "description": "File names to parse."}}, "required": ["files"]}}}
</tools>

For each function call, return a json object with function name and arguments within <tool_call></tool_call> XML tags:
Expand Down
Loading