diff --git a/examples/model_providers/README.md b/examples/model_providers/README.md index f9330c24..e5fd2dac 100644 --- a/examples/model_providers/README.md +++ b/examples/model_providers/README.md @@ -17,3 +17,29 @@ Loops within themselves, Function calls its own being, Depth without ending. ``` + + +## LiteLLM Proxy Server integration + +LiteLLM integration helps out switch between models easily and rapidly. This is easy to integrate via `AsyncOpenAI`: + +```bash +# launch server proxy with your configuration +litellm --config examples/model_providers/litellm_config.yaml + +# then use the proxy via the SDK +python3 examples/model_providers/litellm.py +``` + +### Testing the proxy server +Testing some basic models against proxy to verify it's operational: +```bash +# qwen2.5:14b +curl -s http://localhost:4000/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "qwen2.5:14b", "messages": [{"role": "user", "content": "Say hi"}], "max_tokens": 10}' | jq + +# claude-3-7 +curl -s http://localhost:4000/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "claude-3-7", "messages": [{"role": "user", "content": "Say hi"}], "max_tokens": 10}' | jq + +# gpt-4o +curl -s http://localhost:4000/v1/chat/completions -H "Content-Type: application/json" -d '{"model": "gpt-4o", "messages": [{"role": "user", "content": "Say hi"}], "max_tokens": 10}' | jq +``` diff --git a/examples/model_providers/litellm.py b/examples/model_providers/litellm.py new file mode 100644 index 00000000..1540a36a --- /dev/null +++ b/examples/model_providers/litellm.py @@ -0,0 +1,36 @@ +import os +from dotenv import load_dotenv +from openai import AsyncOpenAI +from agents import OpenAIChatCompletionsModel,Agent,Runner +from agents.model_settings import ModelSettings +from agents import set_default_openai_client, set_tracing_disabled + +# Load environment variables from .env file +load_dotenv() + +external_client = AsyncOpenAI( + base_url = os.getenv('LITELLM_BASE_URL', 'http://localhost:4000'), + api_key=os.getenv('LITELLM_API_KEY', 'key')) + +set_default_openai_client(external_client) +set_tracing_disabled(True) + +llm_model=os.getenv('LLM_MODEL', 'gpt-4o') +# llm_model=os.getenv('LLM_MODEL', 'claude-3-7') +# llm_model=os.getenv('LLM_MODEL', 'qwen2.5:14b') + +# For Qwen models, we need to skip system instructions as they're not supported +instructions = None if "qwen" in llm_model.lower() else "You are a helpful assistant" + +agent = Agent( + name="Assistant", + instructions=instructions, + model=OpenAIChatCompletionsModel( + model=llm_model, + openai_client=external_client, + ) +) + + +result = Runner.run_sync(agent, "Write a haiku about recursion in programming.") +print(result.final_output) \ No newline at end of file diff --git a/examples/model_providers/litellm_config.yaml b/examples/model_providers/litellm_config.yaml new file mode 100644 index 00000000..91caa55d --- /dev/null +++ b/examples/model_providers/litellm_config.yaml @@ -0,0 +1,23 @@ +model_list: + - model_name: gpt-4o + litellm_params: + model: gpt-4o + api_key: "os.environ/OPENAI_API_KEY" + api_base: https://api.openai.com/v1 + - model_name: claude-3-7 + litellm_params: + model: claude-3-7-sonnet-20250219 + api_key: "os.environ/ANTHROPIC_API_KEY" # does os.getenv("ANTHROPIC_API_KEY") + base_url: 'https://api.anthropic.com' + - model_name: qwen2.5:14b + litellm_params: + model: ollama/qwen2.5:14b + api_base: http://localhost:8000 + api_key: "os.environ/OLLAMA" + +litellm_settings: # module level litellm settings - https://github.com/BerriAI/litellm/blob/main/litellm/__init__.py + drop_params: True + # set_verbose: True + +general_settings: + port: 4000 \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 667ab355..17cf1366 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -7,7 +7,7 @@ requires-python = ">=3.9" license = "MIT" authors = [{ name = "OpenAI", email = "support@openai.com" }] dependencies = [ - "openai>=1.66.5", + "openai>=1.68.2", "pydantic>=2.10, <3", "griffe>=1.5.6, <2", "typing-extensions>=4.12.2, <5",