Add TTL cache to prevent redundant API calls

sm-stack · sm-stack · commit f2738e2f6a5a · 2025-06-13T16:33:56.000-04:00
diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -38,6 +38,7 @@ sentence-transformers = "^4.1.0"
 peft = "^0.15.2"
 flagembedding = "^1.3.5"
 logfmt = "^0.4"
+cachetools = "^6.0.0"
 
 [tool.poetry.group.dev.dependencies]
 pytest = "^8.3.4"
diff --git a/src/app/api/helper/get_system_prompt.py b/src/app/api/helper/get_system_prompt.py
@@ -1,29 +1,41 @@
 import httpx
 from fastapi import HTTPException
+from cachetools import TTLCache
 from ...config import get_settings
 from ...logger import log
 
+_system_prompts_cache = TTLCache(maxsize=100, ttl=300)  # 5 minutes TTL
+
 async def get_system_prompt(model: str, usage: str) -> str:
     """
     Get the system prompt for the model and usage.
     """
-    base_url = get_settings().PANDA_APP_SERVER_URL
-    api_key = get_settings().PANDA_APP_SERVER_TOKEN
-    client = httpx.AsyncClient()
-    response = await client.get(
-        f"{base_url}/system-prompt?model={model}&usage={usage}",
-        headers={"X-API-Key": f"{api_key}"}
-    )
+    try:
+        cached_prompts = _system_prompts_cache.get(f"prompt-{model}-{usage}")
+        if cached_prompts is not None:
+            log.info("Retrieved system prompts from cache")
+            return cached_prompts
 
-    if response.status_code != 200:
-        if response.status_code == 401:
-            log.error(f"Invalid API key for system prompt")
-            raise HTTPException(status_code=401, detail="Invalid API key")
-        if response.status_code == 404:
-            log.warning(f"No system prompt found for model {model} and usage {usage}, proceeding without it.")
-            return None
-        else:
-            log.error(f"Failed to get system prompt for model {model} and usage {usage}", response.text)
-            raise HTTPException(status_code=500, detail="Failed to get system prompt")
+        base_url = get_settings().PANDA_APP_SERVER_URL
+        api_key = get_settings().PANDA_APP_SERVER_TOKEN
+        client = httpx.AsyncClient()
+        response = await client.get(
+            f"{base_url}/system-prompt?model={model}&usage={usage}",
+            headers={"X-API-Key": f"{api_key}"}
+        )
 
-    return response.json()["system_prompt"]
+        if response.status_code != 200:
+            if response.status_code == 401:
+                log.error(f"Invalid API key for system prompt")
+                raise HTTPException(status_code=401, detail="Invalid API key")
+            if response.status_code == 404:
+                log.warning(f"No system prompt found for model {model} and usage {usage}, proceeding without it.")
+                return None
+            else:
+                log.error(f"Failed to get system prompt for model {model} and usage {usage}", response.text)
+                raise HTTPException(status_code=500, detail="Failed to get system prompt")
+        _system_prompts_cache[f"prompt-{model}-{usage}"] = response.json()["system_prompt"]
+        return response.json()["system_prompt"]
+    except Exception as e:
+        log.error(f"Error getting system prompt for model {model} and usage {usage}: {str(e)}")
+        raise HTTPException(status_code=500, detail=f"Error getting system prompt for model {model} and usage {usage}: {str(e)}")