|
| 1 | +############################################################################### |
| 2 | +################### Ollama Local LLM Interface Setup ########################## |
| 3 | +############################################################################### |
| 4 | +# |
| 5 | +# Install Ollama from https://ollama.com/ |
| 6 | +# ``` |
| 7 | +# curl -fsSL https://ollama.com/install.sh | sh |
| 8 | +# ollama --version |
| 9 | +# ``` |
| 10 | +# |
| 11 | +# Pull and run a local model (e.g., qwen3-coder:latest) |
| 12 | +# ``` |
| 13 | +# ollama pull qwen3-coder:latest |
| 14 | +# ollama serve qwen3-coder:latest --port 11434 |
| 15 | +# ``` |
| 16 | +# |
| 17 | +# Set environment variables in a .env file or your system environment: |
| 18 | +# ``` |
| 19 | +# LOCAL_MODEL_NAME=qwen3-coder:latest |
| 20 | +# LOCAL_MODEL_PORT=11434 |
| 21 | +# ``` |
| 22 | +# |
| 23 | +# To use with Microbot, define your Microbot as following |
| 24 | +# ```python |
| 25 | +# bot = Microbot( |
| 26 | +# model="ollama-local/qwen3-coder:latest", |
| 27 | +# folder_to_mount=str(test_repo) |
| 28 | +# ) |
| 29 | +# ``` |
| 30 | +############################################################################### |
| 31 | + |
| 32 | +import json |
| 33 | +import os |
| 34 | +from dataclasses import asdict |
| 35 | + |
| 36 | +from dotenv import load_dotenv |
| 37 | +from microbots.llm.llm import LLMAskResponse, LLMInterface, llm_output_format_str |
| 38 | +import requests |
| 39 | +import logging |
| 40 | + |
| 41 | +logger = logging.getLogger(__name__) |
| 42 | + |
| 43 | +load_dotenv() |
| 44 | + |
| 45 | +class OllamaLocal(LLMInterface): |
| 46 | + def __init__(self, system_prompt, model_name=None, model_port=None, max_retries=3): |
| 47 | + self.model_name = model_name or os.environ.get("LOCAL_MODEL_NAME") |
| 48 | + self.model_port = model_port or os.environ.get("LOCAL_MODEL_PORT") |
| 49 | + self.system_prompt = system_prompt |
| 50 | + self.messages = [{"role": "system", "content": system_prompt}] |
| 51 | + |
| 52 | + if not self.model_name or not self.model_port: |
| 53 | + raise ValueError("LOCAL_MODEL_NAME and LOCAL_MODEL_PORT environment variables must be set or passed as arguments to OllamaLocal.") |
| 54 | + |
| 55 | + # Set these values here. This logic will be handled in the parent class. |
| 56 | + self.max_retries = max_retries |
| 57 | + self.retries = 0 |
| 58 | + |
| 59 | + def ask(self, message) -> LLMAskResponse: |
| 60 | + self.retries = 0 # reset retries for each ask. Handled in parent class. |
| 61 | + |
| 62 | + self.messages.append({"role": "user", "content": message}) |
| 63 | + |
| 64 | + # TODO: If the retry count is maintained here, all the wrong responses from the history |
| 65 | + # can be removed. It will be a natural history cleaning process. |
| 66 | + valid = False |
| 67 | + while not valid and self.retries < self.max_retries: |
| 68 | + response = self._send_request_to_local_model(self.messages) |
| 69 | + self.messages.append({"role": "assistant", "content": response}) |
| 70 | + valid, askResponse = self._validate_llm_response(response=response) |
| 71 | + |
| 72 | + if not valid and self.retries >= self.max_retries: |
| 73 | + raise Exception("Max retries reached. Failed to get valid response from local model.") |
| 74 | + |
| 75 | + # Remove last assistant message and replace with structured response |
| 76 | + self.messages.pop() |
| 77 | + self.messages.append({"role": "assistant", "content": json.dumps(asdict(askResponse))}) |
| 78 | + |
| 79 | + return askResponse |
| 80 | + |
| 81 | + def clear_history(self): |
| 82 | + self.messages = [ |
| 83 | + { |
| 84 | + "role": "system", |
| 85 | + "content": self.system_prompt, |
| 86 | + } |
| 87 | + ] |
| 88 | + return True |
| 89 | + |
| 90 | + def _send_request_to_local_model(self, messages): |
| 91 | + logger.debug(f"Sending request to local model {self.model_name} at port {self.model_port}") |
| 92 | + logger.debug(f"Messages: {messages}") |
| 93 | + server = f"http://localhost:{self.model_port}/api/generate" |
| 94 | + payload = { |
| 95 | + "model": self.model_name, |
| 96 | + "prompt": json.dumps(messages), |
| 97 | + "stream": False |
| 98 | + } |
| 99 | + headers = { |
| 100 | + "Content-Type": "application/json" |
| 101 | + } |
| 102 | + # Set timeout: 30 seconds connect, 600 seconds read to handle model cold start |
| 103 | + response = requests.post(server, json=payload, headers=headers, timeout=(30, 600)) |
| 104 | + logger.debug(f"\nResponse Code: {response.status_code}\nResponse Text:\n{response.text}\n---") |
| 105 | + if response.status_code == 200: |
| 106 | + response_json = response.json() |
| 107 | + logger.debug(f"\nResponse JSON: {response_json}") |
| 108 | + return response_json.get("response", "") |
| 109 | + else: |
| 110 | + raise Exception(f"Error from local model server: {response.status_code} - {response.text}") |
| 111 | + |
| 112 | + def _validate_llm_response(self, response): |
| 113 | + # However, as instructed, Ollama is not providing the response only in JSON. |
| 114 | + # It adds some extra text above or below the JSON sometimes. |
| 115 | + # So, this hack extracts the JSON part from the response. |
| 116 | + try: |
| 117 | + response = response.split("{", 1)[1] |
| 118 | + response = "{" + response.rsplit("}", 1)[0] + "}" |
| 119 | + except Exception as e: |
| 120 | + self.retries += 1 |
| 121 | + logger.warning("No JSON in LLM response.\nException: %s\nRetrying... (%d/%d)", e, self.retries, self.max_retries) |
| 122 | + self.messages.append({"role": "user", "content": "LLM_RES_ERROR: Please respond in the following JSON format.\n" + llm_output_format_str}) |
| 123 | + return False, None |
| 124 | + |
| 125 | + logger.debug(f"\nResponse from local model: {response}") |
| 126 | + return super()._validate_llm_response(response) |
0 commit comments