robusta-dev · Avi-Robusta · Oct 6, 2024 · Oct 6, 2024 · Oct 6, 2024 · Oct 6, 2024
diff --git a/README.md b/README.md
@@ -341,6 +341,31 @@ For OpenAI, only the ``model`` and ``api-key`` should be provided
 
 </details>
 
+<details>
+<summary>OCI (Oracle Cloud Infrastructure) in Kubernetes</summary>
+
+To configure OCI in Kubernetes, you need to provide the following environment variables:
+
+    additionalEnvVars:
+    - name: MODEL
+      value: oci/cohere                               # Your OCI model identifier
+    - name: OCI_ENDPOINT
+      value: https://inference.generativeai.<region>.oci.oraclecloud.com  # OCI endpoint URL
+    - name: OCI_COMPARTMENT_ID
+      value: ocid1.tenancy.oc1..example                # Your OCI Compartment OCID
+    - name: OCI_MODEL_ID
+      value: ocid1.generativeaimodel.oc1.<region>.example  # Your OCI Model OCID
+
+Additionally, ensure your `~/.oci/config` is set up with the correct API key, and provide the API key using Kubernetes secrets:
+
+    - name: OCI_API_KEY
+      valueFrom:
+        secretKeyRef:
+          name: my-holmes-secret
+          key: ociApiKey
+
+</details>
+
 <details>
 <summary>Azure OpenAI</summary>
 
@@ -417,6 +442,25 @@ If you prefer not to pass secrets on the cli, set the OPENAI_API_KEY environment
 
 </details>
 
+<details>
+<summary>OCI (Oracle Cloud Infrastructure)</summary>
+
+To work with Oracle Cloud Infrastructure (OCI) Generative AI models, you need to set the following environment variables:
+
+* OCI_ENDPOINT - e.g. https://inference.generativeai.<region>.oci.oraclecloud.com
+* OCI_COMPARTMENT_ID - Your OCI Compartment OCID
+* OCI_MODEL_ID - Your OCI Generative AI Model OCID
+
+Set those environment variables and run:
+
+```bash
+poetry run python3 holmes.py ask "what kubernetes pods are Crashlooping?" --model="oci/cohere.command-r-plus"
+```
+
+Ensure that your `~/.oci/config` file is properly configured with your API key. Refer to OCI CLI Configuration Guide ↗ for more details.
+
+</details> 
+
 <details>
 <summary>Azure OpenAI</summary>
 

diff --git a/holmes/core/aux_llms/oci_llms.py b/holmes/core/aux_llms/oci_llms.py
@@ -0,0 +1,194 @@
+from typing import List, Optional, Dict
+import oci
+import os
+import json
+from oci.generative_ai_inference.models import CohereTool, CohereParameterDefinition, CohereToolCall, CohereSystemMessage, CohereUserMessage, CohereToolResult
+from pydantic import BaseModel
+from openai.types.chat.chat_completion_message_tool_call import ChatCompletionMessageToolCall, Function
+
+class Message(BaseModel):
+    content: str
+    tool_calls: Optional[List[ChatCompletionMessageToolCall]] = None
+
+class Choice(BaseModel):
+    message: Message
+
+class LiteLLMResponse(BaseModel):
+    choices: List[Choice]
+
+class ModelSupportException(Exception):
+    pass
+
+class OCILLM:
+    supported_models = ['oci/cohere.command-r-plus', 'oci/cohere.command-r-16k']
+
+    def __init__(self):
+        # Get values from environment variables, with defaults if not set
+        self.config_profile = os.environ.get("OCI_CONFIG_PROFILE", "DEFAULT")
+        self.endpoint = os.environ.get("OCI_ENDPOINT", None)
+        self.model_id = os.environ.get("OCI_MODEL_ID", None)
+        self.compartment_id = os.environ.get("OCI_COMPARTMENT_ID", None)
+        self.tenancy_id = os.environ.get("OCI_TENANCY_ID", None)
+        self.user_id = os.environ.get("OCI_USER_ID", None)
+        self.fingerprint = os.environ.get("OCI_FINGERPRINT", None)
+        self.private_key = os.environ.get("OCI_PRIVATE_KEY", None)  # Read private key as string
+        self.region = os.environ.get("OCI_REGION", None)
+        self.passphrase = os.environ.get("OCI_PRIVATE_KEY_PASSPHRASE", None)  # Optional passphrase
+
+        # Check if we have all environment variables needed; if not, fall back to config file
+        if self.tenancy_id and self.user_id and self.fingerprint and self.private_key and self.region:
+            self.use_env_config()
+        else:
+            self.use_file_config()
+
+        # Ensure required variables are set, whether from env or config file
+        self.check_llm()
+
+        # Set up the Generative AI client
+        self.generative_ai_inference_client = oci.generative_ai_inference.GenerativeAiInferenceClient(
+            config=self.config,
+            service_endpoint=self.endpoint,
+            retry_strategy=oci.retry.NoneRetryStrategy(),
+            timeout=(10, 240)
+        )
+
+    def use_env_config(self):
+        """Use environment variables for OCI config."""
+        self.config = {
+            "user": self.user_id,
+            "fingerprint": self.fingerprint,
+            "key_content": self.private_key,  # Pass the private key as a string
+            "tenancy": self.tenancy_id,
+            "region": self.region,
+            "pass_phrase": self.passphrase  # If the private key has a passphrase
+        }
+
+    def use_file_config(self):
+        """Fallback to OCI config file if environment variables are missing."""
+        print("Falling back to OCI config file")
+        self.config = oci.config.from_file('~/.oci/config', self.config_profile)
+
+    def _convert_parameters(self, parameters) -> Dict[str, CohereParameterDefinition]:
+        parameter_definitions = {}
+        for param_name, param_properties in parameters['properties'].items():
+            parameter_definitions[param_name] = CohereParameterDefinition(
+                description=f"Parameter {param_name} of type {param_properties['type']}",
+                type=param_properties['type'],
+                is_required=param_name in parameters.get('required', [])
+            )
+        return parameter_definitions
+
+    def _make_jsonable(self, obj) -> str:
+        try:
+            return json.dumps(obj)
+        except (TypeError, ValueError):
+            return str(obj)
+
+    def _process_oci_response(self, full_response: oci.response.Response) -> LiteLLMResponse:
+        """
+            Converts from oci.response.Response to the response format of LiteLLMResponse 
+        """
+        chat_response = full_response.data.chat_response
+        tool_calls = []
+        if chat_response.tool_calls:
+            for tc in chat_response.tool_calls:
+                function = Function(name=tc.name, arguments=self._make_jsonable(tc.parameters))
+                tool_call = ChatCompletionMessageToolCall(
+                    id=tc.name,
+                    function=function,
+                    type="function"
+                )
+                tool_calls.append(tool_call)
+
+        message = Message(content=chat_response.text, tool_calls=tool_calls)
+        choice = Choice(message=message)
+        return LiteLLMResponse(choices=[choice])
+
+    def _convert_to_chat_history(self, messages: List[Dict]) -> List[oci.generative_ai_inference.models.CohereMessage]:
+        chat_history = []
+        for message in messages:
+            role = message.get("role")
+            if role == "system":
+                system_message = CohereSystemMessage(role=CohereSystemMessage.ROLE_SYSTEM, message=message.get("content"))
+                chat_history.append(system_message)
+            elif role == "user":
+                user_message = CohereUserMessage(role=CohereUserMessage.ROLE_USER, message=message.get("content"))
+                chat_history.append(user_message)
+        return chat_history
+
+    def _convert_to_tool_results(self, messages: List[dict]) -> Optional[List[CohereToolResult]]:
+        tool_results = []
+        for message in messages:
+            if message.get("role") == "tool":
+                cohere_tool = CohereToolCall(name=message['name'], parameters={})
+                tool_result = CohereToolResult(call=cohere_tool, outputs=[{"content": message.get("content")}])
+                tool_results.append(tool_result)
+        return tool_results if tool_results else None
+
+    def oci_chat(self, message: str, messages: List, model: str, tools: Optional[List], temperature: float) -> LiteLLMResponse:
+        chat_detail = oci.generative_ai_inference.models.ChatDetails()
+        chat_request = oci.generative_ai_inference.models.CohereChatRequest()
+        chat_request.message = message
+        chat_request.chat_history = self._convert_to_chat_history(messages)
+        chat_request.max_tokens = self.get_maximum_output_token(model)
+        chat_request.tool_results = self._convert_to_tool_results(messages)
+        chat_request.is_force_single_step = True
+        chat_request.temperature = temperature
+
+        cohere_tools = []
+        if tools:
+            for tool in tools:
+                tool_function = tool["function"]
+                parameter_definitions = self._convert_parameters(tool_function['parameters'])
+                cohere_tool = CohereTool(
+                    name=tool_function['name'],
+                    description=tool_function['description'],
+                    parameter_definitions=parameter_definitions
+                )
+                cohere_tools.append(cohere_tool)
+
+        chat_request.tools = cohere_tools
+        chat_detail.serving_mode = oci.generative_ai_inference.models.OnDemandServingMode(model_id=self.model_id)
+        chat_detail.chat_request = chat_request
+        chat_detail.compartment_id = self.compartment_id
+
+        chat_response = self.generative_ai_inference_client.chat(chat_detail)
+        return self._process_oci_response(chat_response)
+
+    @staticmethod
+    def supports_llm(model: str) -> bool:
+        # Check if the model is supported
+        if 'oci' not in model.lower():
+            return False
+
+        if model in OCILLM.supported_models:
+            return True
+        else:
+            # Raise an exception if it is oci but model is not supported like 'oci/llama'
+            raise ModelSupportException(f"Unsupported model: {model}. Supported models are: {', '.join(OCILLM.supported_models)}")
+
+    @staticmethod
+    def check_llm() -> bool:
+        """
+            Verifies all the env vars exist to run the LLM
+        """
+        required_vars = ["OCI_MODEL_ID", "OCI_COMPARTMENT_ID", "OCI_ENDPOINT"]
+        missing_vars = [var for var in required_vars if var not in os.environ]
+        if missing_vars:
+            raise Exception(f"Missing required environment variables: {', '.join(missing_vars)}")
+
+        return True
+
+    @staticmethod
+    def get_context_window_size(model) -> int:
+        if "cohere.command-r-plus" in model:
+            return 128000
+        elif 'cohere.command-r-16k' in model:
+            return 16000
+        raise ModelSupportException(f"Unsupported model: {model}. Supported models are: {', '.join(OCILLM.supported_models)}")
+
+    @staticmethod
+    def get_maximum_output_token(model) -> int:
+        if "cohere.command-r-plus" in model or 'cohere.command-r-16k' in model:
+            return 4000
+        raise ModelSupportException(f"Unsupported model: {model}. Supported models are: {', '.join(OCILLM.supported_models)}")
diff --git a/holmes/core/tool_calling_llm.py b/holmes/core/tool_calling_llm.py
@@ -4,6 +4,7 @@
 import textwrap
 import os
 from typing import List, Optional
+from holmes.core.aux_llms.oci_llms import OCILLM
 from holmes.plugins.prompts import load_and_render_prompt
 from litellm import get_supported_openai_params
 import litellm
@@ -62,8 +63,10 @@ def __init__(
 
         if ROBUSTA_AI:
             self.base_url = ROBUSTA_API_ENDPOINT
-
-        self.check_llm(self.model, self.api_key)
+        if OCILLM.supports_llm(self.model):
+            OCILLM.check_llm()
+        else:
+            self.check_llm(self.model, self.api_key)
 
     def check_llm(self, model, api_key):
         logging.debug(f"Checking LiteLLM model {model}")
@@ -86,14 +89,20 @@ def check_llm(self, model, api_key):
         #if not litellm.supports_function_calling(model=model):
         #    raise Exception(f"model {model} does not support function calling. You must use HolmesGPT with a model that supports function calling.")
     def get_context_window_size(self) -> int:
-        return litellm.model_cost[self.model]['max_input_tokens']
+        if OCILLM.supports_llm(self.model):
+            return OCILLM.get_context_window_size(self.model)
+        else:
+            return litellm.model_cost[self.model]['max_input_tokens'] 
 
     def count_tokens_for_message(self, messages: list[dict]) -> int:
         return litellm.token_counter(model=self.model,
                                      messages=messages)
 
     def get_maximum_output_token(self) -> int:
-         return litellm.model_cost[self.model]['max_output_tokens'] 
+        if OCILLM.supports_llm(self.model):
+            return OCILLM.get_maximum_output_token(self.model)
+        else:
+            return litellm.model_cost[self.model]['max_output_tokens'] 
 
     def call(self, system_prompt, user_prompt, post_process_prompt: Optional[str] = None, response_format: dict = None) -> LLMResult:
         messages = [
@@ -126,17 +135,21 @@ def call(self, system_prompt, user_prompt, post_process_prompt: Optional[str] =
 
             logging.debug(f"sending messages {messages}")
             try:
-                full_response = litellm.completion(
-                    model=self.model,
-                    api_key=self.api_key,
-                    messages=messages,
-                    tools=tools,
-                    tool_choice=tool_choice,
-                    base_url=self.base_url,
-                    temperature=0.00000001,
-                    response_format=response_format
+                temperature = 0.00000001
+                if OCILLM.supports_llm(self.model):
+                    full_response = OCILLM().oci_chat(message=user_prompt,messages=messages,model=self.model, tools=tools, temperature= temperature)
+                else:
+                    full_response = litellm.completion(
+                        model=self.model,
+                        api_key=self.api_key,
+                        messages=messages,
+                        tools=tools,
+                        tool_choice=tool_choice,
+                        base_url=self.base_url,
+                        temperature=temperature,
+                        response_format=response_format
 
-                )
+                    )
                 logging.debug(f"got response {full_response}")
             # catch a known error that occurs with Azure and replace the error message with something more obvious to the user
             except BadRequestError as e: