From 7cf0f05cb3cd17ef608f4752143e5e5df646569f Mon Sep 17 00:00:00 2001
From: xiongyuwu <5070319@qq.com>
Date: Fri, 11 Apr 2025 11:12:42 +0800
Subject: [PATCH 1/2] update call_llm() to use environ variable for choosing
 LLM provider (and model)

---
 utils/call_llm.py | 202 +++++++++++++++++++++++++++++++---------------
 1 file changed, 138 insertions(+), 64 deletions(-)
diff --git a/utils/call_llm.py b/utils/call_llm.py
index 0d794b4..4397d6e 100644
--- a/utils/call_llm.py
+++ b/utils/call_llm.py
@@ -2,6 +2,7 @@
 import os
 import logging
 import json
+import requests
 from datetime import datetime
 
 # Configure logging
@@ -20,68 +21,141 @@
 # Simple cache configuration
 cache_file = "llm_cache.json"
 
-# By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
-def call_llm(prompt: str, use_cache: bool = True) -> str:
-    # Log the prompt
-    logger.info(f"PROMPT: {prompt}")
-    
-    # Check cache if enabled
-    if use_cache:
-        # Load cache from disk
-        cache = {}
-        if os.path.exists(cache_file):
-            try:
-                with open(cache_file, 'r') as f:
-                    cache = json.load(f)
-            except:
-                logger.warning(f"Failed to load cache, starting with empty cache")
-        
-        # Return from cache if exists
-        if prompt in cache:
-            logger.info(f"RESPONSE: {cache[prompt]}")
-            return cache[prompt]
-    
-    # Call the LLM if not in cache or cache disabled
-    client = genai.Client(
-        vertexai=True, 
-        # TODO: change to your own project id and location
-        project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
-        location=os.getenv("GEMINI_LOCATION", "us-central1")
-    )
-    # You can comment the previous line and use the AI Studio key instead:
-    # client = genai.Client(
-    #     api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
-    # )
-    model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
-    response = client.models.generate_content(
-        model=model,
-        contents=[prompt]
-    )
-    response_text = response.text
-    
-    # Log the response
-    logger.info(f"RESPONSE: {response_text}")
-    
-    # Update cache if enabled
-    if use_cache:
-        # Load cache again to avoid overwrites
-        cache = {}
-        if os.path.exists(cache_file):
-            try:
-                with open(cache_file, 'r') as f:
-                    cache = json.load(f)
-            except:
-                pass
-        
-        # Add to cache and save
-        cache[prompt] = response_text
+def call_llm(prompt, use_cache: bool = True) -> str:
+    """
+    Call an LLM provider based on environment variables.
+    Environment variables:
+    - LLM_PROVIDER: "OLLAMA" or "XAI"
+    - <provider>_MODEL: Model name (e.g., OLLAMA_MODEL, XAI_MODEL)
+    - <provider>_BASE_URL: Base URL without endpoint (e.g., OLLAMA_BASE_URL, XAI_BASE_URL)
+    - <provider>_API_KEY: API key (e.g., OLLAMA_API_KEY, XAI_API_KEY; optional for providers that don't require it)
+    The endpoint /v1/chat/completions will be appended to the base URL.
+    """
+    logger.info(f"PROMPT: {prompt}") # log the prompt
+
+    # Read the provider from environment variable
+    provider = os.environ.get("LLM_PROVIDER")
+    if not provider:
+        raise ValueError("LLM_PROVIDER environment variable is required")
+
+    # Construct the names of the other environment variables
+    model_var = f"{provider}_MODEL"
+    base_url_var = f"{provider}_BASE_URL"
+    api_key_var = f"{provider}_API_KEY"
+
+    # Read the provider-specific variables
+    model = os.environ.get(model_var)
+    base_url = os.environ.get(base_url_var)
+    api_key = os.environ.get(api_key_var, "")  # API key is optional, default to empty string
+
+    # Validate required variables
+    if not model:
+        raise ValueError(f"{model_var} environment variable is required")
+    if not base_url:
+        raise ValueError(f"{base_url_var} environment variable is required")
+
+    # Append the endpoint to the base URL
+    url = f"{base_url}/v1/chat/completions"
+
+    # Configure headers and payload based on provider
+    headers = {
+        "Content-Type": "application/json",
+    }
+    if api_key:  # Only add Authorization header if API key is provided
+        headers["Authorization"] = f"Bearer {api_key}"
+
+    payload = {
+        "model": model,
+        "messages": [{"role": "user", "content": prompt}],
+        "temperature": 0.7,
+    }
+
+    try:
+        response = requests.post(url, headers=headers, json=payload)
+        response_json = response.json() # Log the response
+        logger.info("RESPONSE:\n%s", json.dumps(response_json, indent=2))
+        #logger.info(f"RESPONSE: {response.json()}")
+        response.raise_for_status()
+        return response.json()["choices"][0]["message"]["content"]
+    except requests.exceptions.HTTPError as e:
+        error_message = f"HTTP error occurred: {e}"
         try:
-            with open(cache_file, 'w') as f:
-                json.dump(cache, f)
-        except Exception as e:
-            logger.error(f"Failed to save cache: {e}")
-    
-    return response_text
+            error_details = response.json().get("error", "No additional details")
+            error_message += f" (Details: {error_details})"
+        except:
+            pass
+        raise Exception(error_message)
+    except requests.exceptions.ConnectionError:
+        raise Exception(f"Failed to connect to {provider} API. Check your network connection.")
+    except requests.exceptions.Timeout:
+        raise Exception(f"Request to {provider} API timed out.")
+    except requests.exceptions.RequestException as e:
+        raise Exception(f"An error occurred while making the request to {provider}: {e}")
+    except ValueError:
+        raise Exception(f"Failed to parse response as JSON from {provider}. The server might have returned an invalid response.")
+
+# By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
+#def call_llm(prompt: str, use_cache: bool = True) -> str:
+#    # Log the prompt
+#    logger.info(f"PROMPT: {prompt}")
+#
+#    # Check cache if enabled
+#    if use_cache:
+#        # Load cache from disk
+#        cache = {}
+#        if os.path.exists(cache_file):
+#            try:
+#                with open(cache_file, 'r') as f:
+#                    cache = json.load(f)
+#            except:
+#                logger.warning(f"Failed to load cache, starting with empty cache")
+#
+#        # Return from cache if exists
+#        if prompt in cache:
+#            logger.info(f"RESPONSE: {cache[prompt]}")
+#            return cache[prompt]
+#
+#    # Call the LLM if not in cache or cache disabled
+#    client = genai.Client(
+#        vertexai=True,
+#        # TODO: change to your own project id and location
+#        project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
+#        location=os.getenv("GEMINI_LOCATION", "us-central1")
+#    )
+#    # You can comment the previous line and use the AI Studio key instead:
+#    # client = genai.Client(
+#    #     api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
+#    # )
+#    model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
+#    response = client.models.generate_content(
+#        model=model,
+#        contents=[prompt]
+#    )
+#    response_text = response.text
+#
+#    # Log the response
+#    logger.info(f"RESPONSE: {response_text}")
+#
+#    # Update cache if enabled
+#    if use_cache:
+#        # Load cache again to avoid overwrites
+#        cache = {}
+#        if os.path.exists(cache_file):
+#            try:
+#                with open(cache_file, 'r') as f:
+#                    cache = json.load(f)
+#            except:
+#                pass
+#
+#        # Add to cache and save
+#        cache[prompt] = response_text
+#        try:
+#            with open(cache_file, 'w') as f:
+#                json.dump(cache, f)
+#        except Exception as e:
+#            logger.error(f"Failed to save cache: {e}")
+#
+#    return response_text
 
 # # Use Anthropic Claude 3.7 Sonnet Extended Thinking
 # def call_llm(prompt, use_cache: bool = True):
@@ -101,7 +175,7 @@ def call_llm(prompt: str, use_cache: bool = True) -> str:
 #     return response.content[1].text
 
 # # Use OpenAI o1
-# def call_llm(prompt, use_cache: bool = True):    
+# def call_llm(prompt, use_cache: bool = True):
 #     from openai import OpenAI
 #     client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY", "your-api-key"))
 #     r = client.chat.completions.create(
@@ -117,9 +191,9 @@ def call_llm(prompt: str, use_cache: bool = True) -> str:
 
 if __name__ == "__main__":
     test_prompt = "Hello, how are you?"
-    
+
     # First call - should hit the API
     print("Making call...")
     response1 = call_llm(test_prompt, use_cache=False)
     print(f"Response: {response1}")
-    
+

From d66d97639092051cd7eb0df82a96bec5a5b6bec4 Mon Sep 17 00:00:00 2001
From: woolion <wooliondrawz@gmail.com>
Date: Thu, 24 Apr 2025 14:18:33 +0200
Subject: [PATCH 2/2] switch provider based on env variables

---
 README.md         |   8 +--
 utils/call_llm.py | 142 ++++++++++++++++++++++++++--------------------
 2 files changed, 80 insertions(+), 70 deletions(-)

diff --git a/README.md b/README.md
index 58ca928..f4daeea 100644
--- a/README.md
+++ b/README.md
@@ -77,13 +77,7 @@ This is a tutorial project of [Pocket Flow](https://github.com/The-Pocket/Pocket
    pip install -r requirements.txt
    ```
 
-3. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. By default, you can use the AI Studio key with this client for Gemini Pro 2.5:
-
-   ```python
-   client = genai.Client(
-     api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
-   )
-   ```
+3. Set up LLM in [`utils/call_llm.py`](./utils/call_llm.py) by providing credentials. To do so, you can put the values in a `.env` file. By default, you can use the AI Studio key with this client for Gemini Pro 2.5 by setting the `GEMINI_API_KEY` environment variable. If you want to use another LLM, you can set the `LLM_PROVIDER` environment variable (e.g. `XAI`), and then set the model, url, and API key (e.g. `XAI_MODEL`, `XAI_URL`,`XAI_API_KEY`). If using Ollama, the url is `http://localhost:11434/` and the API key can be omitted.
 
    You can use your own models. We highly recommend the latest models with thinking capabilities (Claude 3.7 with thinking, O1). You can verify that it is correctly set up by running:
    ```bash
diff --git a/utils/call_llm.py b/utils/call_llm.py
index 4397d6e..9f21a44 100644
--- a/utils/call_llm.py
+++ b/utils/call_llm.py
@@ -21,7 +21,33 @@
 # Simple cache configuration
 cache_file = "llm_cache.json"
 
-def call_llm(prompt, use_cache: bool = True) -> str:
+
+def load_cache():
+    try:
+        with open(cache_file, 'r') as f:
+            return json.load(f)
+    except:
+        logger.warning(f"Failed to load cache.")
+    return {}
+
+
+def save_cache(cache):
+    try:
+        with open(cache_file, 'w') as f:
+            json.dump(cache, f)
+    except:
+        logger.warning(f"Failed to save cache")
+
+
+def get_llm_provider():
+    provider = os.getenv("LLM_PROVIDER")
+    if not provider and (os.getenv("GEMINI_PROJECT_ID") or os.getenv("GEMINI_API_KEY")):
+        provider = "GEMINI"
+    # if necessary, add ANTHROPIC/OPENAI
+    return provider
+
+
+def _call_llm_provider(prompt: str) -> str:
     """
     Call an LLM provider based on environment variables.
     Environment variables:
@@ -55,7 +81,7 @@ def call_llm(prompt, use_cache: bool = True) -> str:
         raise ValueError(f"{base_url_var} environment variable is required")
 
     # Append the endpoint to the base URL
-    url = f"{base_url}/v1/chat/completions"
+    url = f"{base_url.rstrip('/')}/v1/chat/completions"
 
     # Configure headers and payload based on provider
     headers = {
@@ -94,68 +120,58 @@ def call_llm(prompt, use_cache: bool = True) -> str:
     except ValueError:
         raise Exception(f"Failed to parse response as JSON from {provider}. The server might have returned an invalid response.")
 
+
 # By default, we Google Gemini 2.5 pro, as it shows great performance for code understanding
-#def call_llm(prompt: str, use_cache: bool = True) -> str:
-#    # Log the prompt
-#    logger.info(f"PROMPT: {prompt}")
-#
-#    # Check cache if enabled
-#    if use_cache:
-#        # Load cache from disk
-#        cache = {}
-#        if os.path.exists(cache_file):
-#            try:
-#                with open(cache_file, 'r') as f:
-#                    cache = json.load(f)
-#            except:
-#                logger.warning(f"Failed to load cache, starting with empty cache")
-#
-#        # Return from cache if exists
-#        if prompt in cache:
-#            logger.info(f"RESPONSE: {cache[prompt]}")
-#            return cache[prompt]
-#
-#    # Call the LLM if not in cache or cache disabled
-#    client = genai.Client(
-#        vertexai=True,
-#        # TODO: change to your own project id and location
-#        project=os.getenv("GEMINI_PROJECT_ID", "your-project-id"),
-#        location=os.getenv("GEMINI_LOCATION", "us-central1")
-#    )
-#    # You can comment the previous line and use the AI Studio key instead:
-#    # client = genai.Client(
-#    #     api_key=os.getenv("GEMINI_API_KEY", "your-api_key"),
-#    # )
-#    model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
-#    response = client.models.generate_content(
-#        model=model,
-#        contents=[prompt]
-#    )
-#    response_text = response.text
-#
-#    # Log the response
-#    logger.info(f"RESPONSE: {response_text}")
-#
-#    # Update cache if enabled
-#    if use_cache:
-#        # Load cache again to avoid overwrites
-#        cache = {}
-#        if os.path.exists(cache_file):
-#            try:
-#                with open(cache_file, 'r') as f:
-#                    cache = json.load(f)
-#            except:
-#                pass
-#
-#        # Add to cache and save
-#        cache[prompt] = response_text
-#        try:
-#            with open(cache_file, 'w') as f:
-#                json.dump(cache, f)
-#        except Exception as e:
-#            logger.error(f"Failed to save cache: {e}")
-#
-#    return response_text
+def call_llm(prompt: str, use_cache: bool = True) -> str:
+    # Log the prompt
+    logger.info(f"PROMPT: {prompt}")
+
+    # Check cache if enabled
+    if use_cache:
+        # Load cache from disk
+        cache = load_cache()
+        # Return from cache if exists
+        if prompt in cache:
+            logger.info(f"RESPONSE: {cache[prompt]}")
+            return cache[prompt]
+
+    provider = get_llm_provider()
+    if provider == "GEMINI":
+        response_text = _call_llm_gemini(prompt)
+    else:  # generic method using a URL that is OpenAI compatible API (Ollama, ...)
+        response_text = _call_llm_provider(prompt)
+
+    # Log the response
+    logger.info(f"RESPONSE: {response_text}")
+
+    # Update cache if enabled
+    if use_cache:
+        # Load cache again to avoid overwrites
+        cache = load_cache()
+        # Add to cache and save
+        cache[prompt] = response_text
+        save_cache(cache)
+
+    return response_text
+
+
+def _call_llm_gemini(prompt: str) -> str:
+    if os.getenv("GEMINI_PROJECT_ID"):
+        client = genai.Client(
+            vertexai=True,
+            project=os.getenv("GEMINI_PROJECT_ID"),
+            location=os.getenv("GEMINI_LOCATION", "us-central1")
+        )
+    elif os.getenv("GEMINI_API_KEY"):
+        client = genai.Client(api_key=os.getenv("GEMINI_API_KEY"))
+    else:
+        raise ValueError("Either GEMINI_PROJECT_ID or GEMINI_API_KEY must be set in the environment")
+    model = os.getenv("GEMINI_MODEL", "gemini-2.5-pro-exp-03-25")
+    response = client.models.generate_content(
+        model=model,
+        contents=[prompt]
+    )
+    return response.text
 
 # # Use Anthropic Claude 3.7 Sonnet Extended Thinking
 # def call_llm(prompt, use_cache: bool = True):