diff --git a/README.md b/README.md
index 8208092..57a1e97 100644
--- a/README.md
+++ b/README.md
@@ -20,6 +20,7 @@ Infinite Radio generates endless music that automatically changes based on your
## Prerequisites
For running the music model locally, you will need:
+
- **Docker** with GPU support
- **NVIDIA GPU** with CUDA support
- **[NVIDIA Container Toolkit](https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/latest/install-guide.html)**
@@ -27,6 +28,7 @@ For running the music model locally, you will need:
## Music Model
1. **Run the Docker Container from [Dockerhub](https://hub.docker.com/repository/docker/lauriewired/musicbeats/general):**
+
```bash
docker run --gpus all --network host lauriewired/musicbeats:latest
```
@@ -34,7 +36,7 @@ For running the music model locally, you will need:
2. **Access the web interface:**
- Open your browser and navigate to `http://127.0.0.1:8080` or the IP where the music container is running
- Click the play button to start streaming
-
+
## Running a DJ
## Option 1: Running the DJ on MacOS
@@ -42,6 +44,7 @@ For running the music model locally, you will need:
The Mac application can start the Process DJ or connect to the LLM DJ. It lives as a tray application to easily configure and examine the music control. **Note:** When using the Mac application, you may need to provide additional permissions to allow the DJ to examine your screen to dynamically select the genre.
1. **Download the latest release:**
+
- Go to the releases page and download the [latest version](https://github.com/LaurieWired/InfiniteRadio/releases/download/v1.0/InfiniteRadio.zip)
- Run the .app file and Infinite Radio will appear in your tray
@@ -49,7 +52,7 @@ The Mac application can start the Process DJ or connect to the LLM DJ. It lives
3. **Select and run your DJ of choice**
- You can run the process DJ immediately or choose the LLM DJ
- - If selecting the LLM DJ, ensure the model server is running already in [LM Studio](https://lmstudio.ai) (See *Option 3* below for an example although you may skip the python step when using the Mac app)
+ - If selecting the LLM DJ, ensure the model server is running already in [LM Studio](https://lmstudio.ai) (See _Option 3_ below for an example although you may skip the python step when using the Mac app)
## Option 2: Running Process DJ with Python
@@ -66,7 +69,7 @@ The LLM DJ analyzes the data on your screen to automatically configure the genre
1. **Run the LLM in LM Studio:**
- Download [InternVL3](https://huggingface.co/OpenGVLab/InternVL3-2B) (or any image to text model)
- Start the server in LM Studio
-
+
2. **Run the Python Connection:**
@@ -74,6 +77,21 @@ The LLM DJ analyzes the data on your screen to automatically configure the genre
python llm_dj.py 127.0.0.1 8080 # Point this to the IP and port of the music model
```
+## Option 4: Running the LLM DJ with Ollama (Alternative)
+
+The Ollama DJ provides the same functionality as LM Studio but with a lighter, easier-to-use local LLM server.
+
+1. **Pull the InternVL3 model:**
+
+ ```bash
+ ollama run hf.co/mradermacher/InternVL3-2B-GGUF:Q8_0
+ ```
+
+2. **Run the Python Connection:**
+ ```bash
+ python ollama_dj.py 127.0.0.1 8080 # Point this to the IP and port of the music model
+ ```
+
# API Reference
## Change Genre
diff --git a/ollama_dj.py b/ollama_dj.py
new file mode 100644
index 0000000..34493af
--- /dev/null
+++ b/ollama_dj.py
@@ -0,0 +1,266 @@
+#!/usr/bin/env python3
+"""
+Ollama DJ - Uses Ollama to determine music genre based on activity
+Alternative to llm_dj.py that works with Ollama instead of LM Studio
+"""
+
+import time
+import sys
+import requests
+import argparse
+import json
+import base64
+from io import BytesIO
+from PIL import Image
+import mss
+from openai import OpenAI
+
+
+def examine_activity(debug=False, monitor_index=0):
+ """Take a screenshot of the current screen and return it as a base64 encoded string."""
+ try:
+ with mss.mss() as sct:
+ # Take screenshot to share to the LLM
+ # monitor_index 0 = all monitors combined, 1+ = specific monitor
+ if monitor_index >= len(sct.monitors):
+ print(f" WARNING: Monitor {monitor_index} not found, using all monitors")
+ monitor_index = 0
+
+ monitor = sct.monitors[monitor_index]
+ if monitor_index == 0:
+ print(f" Examining all monitors combined")
+ else:
+ print(f" Examining monitor {monitor_index}")
+ screenshot = sct.grab(monitor)
+
+ # Convert to PIL Image
+ img = Image.frombytes("RGB", screenshot.size, screenshot.bgra, "raw", "BGRX")
+
+ # Resize image to reduce file size (optional, but recommended for LLM processing)
+ # Keep aspect ratio but limit max dimension to 1024px
+ max_size = 1024
+ if img.width > max_size or img.height > max_size:
+ img.thumbnail((max_size, max_size), Image.Resampling.LANCZOS)
+
+ if debug:
+ print(" DEBUG: Opening screenshot preview...")
+ img.show()
+
+ # Convert to base64
+ buffer = BytesIO()
+ img.save(buffer, format="PNG")
+ img_str = base64.b64encode(buffer.getvalue()).decode()
+
+ return img_str
+ except Exception as e:
+ print(f"ERROR: Failed to take screenshot: {e}")
+ return None
+
+
+def get_genre_from_ollama(client, model_name, screenshot_b64):
+ """Use Ollama to get music genre from screenshot."""
+ try:
+ print(f"-> Analyzing activity with Ollama model '{model_name}'...")
+
+ # Request JSON output via optimized system prompt
+ response = client.chat.completions.create(
+ model=model_name,
+ messages=[
+ {
+ "role": "system",
+ "content": "### SYSTEM\nYou are given one image.\n\n### INSTRUCTION\n1. Silently infer what the user is doing in the screenshot.\n2. Pick one 1-2-word music genre that fits the activity.\n *Think step-by-step internally only.*\n3. Return a JSON object that conforms to the provided schema.\n **Do not output anything else.**\n\n### RESPONSE FORMAT\n{\"music_genre\": \"\"}"
+ },
+ {
+ "role": "user",
+ "content": [
+ {
+ "type": "image_url",
+ "image_url": {
+ "url": f"data:image/png;base64,{screenshot_b64}"
+ }
+ }
+ ]
+ }
+ ],
+ max_tokens=50,
+ temperature=0.0
+ )
+
+ content = response.choices[0].message.content
+
+ try:
+ # First, strip any markdown code blocks that might wrap the JSON
+ import re
+ # Remove ```json and ``` markers
+ cleaned_content = re.sub(r'^```(?:json)?\s*\n?', '', content.strip(), flags=re.MULTILINE)
+ cleaned_content = re.sub(r'\n?```\s*$', '', cleaned_content, flags=re.MULTILINE)
+
+ genre_data = json.loads(cleaned_content.strip())
+ if "music_genre" in genre_data and isinstance(genre_data["music_genre"], str):
+ return genre_data["music_genre"]
+ else:
+ print(f" WARNING: 'music_genre' key missing or invalid in Ollama response: {content}")
+ return None
+ except json.JSONDecodeError:
+ print(f" WARNING: Could not parse JSON from Ollama response: {content}")
+ # Try to find JSON-like pattern in the text as fallback
+ import re
+ match = re.search(r'\{"music_genre":\s*"([^"]+)"\}', content)
+ if match:
+ return match.group(1)
+ return None
+
+ except Exception as e:
+ print(f" ERROR: Failed to get genre from Ollama: {e}")
+ return None
+
+
+def change_server_genre(server_ip, server_port, genre):
+ """Sends a POST request to the music server to change the genre."""
+ url = f"http://{server_ip}:{server_port}/genre"
+ payload = {"genre": genre}
+ print(f"-> Attempting to change genre to '{genre}'...")
+ try:
+ response = requests.post(url, json=payload, timeout=5)
+ response.raise_for_status()
+ print(f" SUCCESS: Genre changed to '{response.json().get('genre', genre)}'.")
+ return True
+ except requests.exceptions.RequestException as e:
+ print(f" ERROR: Could not connect to the music server at {url}. Details: {e}")
+ return False
+
+
+def check_ollama_connection(ollama_url, model_name):
+ """Check if Ollama is running and the model is available."""
+ try:
+ # Check if Ollama is running
+ response = requests.get(f"{ollama_url}/api/tags", timeout=5)
+ response.raise_for_status()
+
+ # Check if the model is available
+ models = response.json().get("models", [])
+ model_names = [model.get("name", "") for model in models]
+
+ if model_name not in model_names:
+ print(f" WARNING: Model '{model_name}' not found in Ollama.")
+ print(f" Available models: {', '.join(model_names)}")
+ print(f" You can pull the model with: ollama pull {model_name}")
+ return False
+
+ print(f" SUCCESS: Ollama is running and model '{model_name}' is available.")
+ return True
+
+ except requests.exceptions.RequestException as e:
+ print(f" ERROR: Could not connect to Ollama at {ollama_url}. Details: {e}")
+ print(f" Make sure Ollama is running with: ollama serve")
+ return False
+
+
+def main(args):
+ """Main loop to take screenshots, get genre suggestions, and update music."""
+ ollama_url = f"http://{args.ollama_host}:{args.ollama_port}"
+
+ print("--- Ollama DJ Starting ---")
+ print(f"Screen Activity Analysis every {args.interval} seconds")
+ print(f"Ollama URL: {ollama_url}")
+ print(f"Ollama Model: {args.model}")
+ print(f"Music Server: http://{args.music_ip}:{args.music_port}/genre")
+
+ # Show monitor info
+ try:
+ with mss.mss() as sct:
+ if args.monitor == 0:
+ print(f"Monitor: All monitors combined")
+ elif args.monitor < len(sct.monitors):
+ monitor = sct.monitors[args.monitor]
+ print(f"Monitor: Monitor {args.monitor} ({monitor['width']}x{monitor['height']})")
+ else:
+ print(f"Monitor: {args.monitor} (will fallback to all monitors)")
+ except Exception as e:
+ print(f"Monitor: Unable to detect monitor info - {e}")
+
+ print("Press Ctrl+C to stop.")
+
+ # Check Ollama connection and model availability
+ if not check_ollama_connection(ollama_url, args.model):
+ print(" Exiting due to Ollama connection issues.")
+ sys.exit(1)
+
+ # Initialize the OpenAI client to point to Ollama
+ client = OpenAI(
+ base_url=f"{ollama_url}/v1",
+ api_key="ollama" # Ollama doesn't require a real API key
+ )
+
+ last_genre = None
+
+ try:
+ while True:
+ print(f"\n--- Screen Activity Analysis cycle at {time.strftime('%H:%M:%S')} ---")
+
+ # Take screenshot
+ screenshot_b64 = examine_activity(debug=args.debug, monitor_index=args.monitor)
+ if not screenshot_b64:
+ print(" Skipping this cycle due to screenshot failure.")
+ time.sleep(args.interval)
+ continue
+
+ # Pass the client instance to the function
+ suggested_genre = get_genre_from_ollama(client, args.model, screenshot_b64)
+ if not suggested_genre:
+ print(" No genre suggestion received from Ollama.")
+ time.sleep(args.interval)
+ continue
+
+ print(f" Ollama suggested genre: '{suggested_genre}'")
+
+ # Only change if it's different from the last genre
+ if suggested_genre.lower() != str(last_genre).lower():
+ if change_server_genre(args.music_ip, args.music_port, suggested_genre):
+ last_genre = suggested_genre
+ else:
+ print(" Failed to change genre on music server.")
+ else:
+ print(" Genre unchanged, skipping server update.")
+
+ # Wait for next cycle
+ time.sleep(args.interval)
+
+ except KeyboardInterrupt:
+ print("\n--- Ollama DJ Stopping ---")
+ except Exception as e:
+ print(f"\nAn unexpected error occurred: {e}")
+ sys.exit(1)
+
+
+if __name__ == "__main__":
+ parser = argparse.ArgumentParser(
+ description="Uses Ollama to determine music genre from screen activity."
+ )
+ parser.add_argument("music_ip", help="IP address of the music server")
+ parser.add_argument("music_port", type=int, help="Port of the music server")
+ parser.add_argument("--model", default="hf.co/mradermacher/InternVL3-2B-GGUF:Q8_0", help="Ollama model to use (default: 'hf.co/mradermacher/InternVL3-2B-GGUF:Q8_0')")
+ parser.add_argument("--ollama-host", default="localhost", help="Ollama host (default: localhost)")
+ parser.add_argument("--ollama-port", type=int, default=11434, help="Ollama port (default: 11434)")
+ parser.add_argument("--interval", type=int, default=10, help="Interval in seconds between screen analysis (default: 10)")
+ parser.add_argument("--monitor", type=int, default=1, help="Monitor to capture (0=all monitors, 1=first monitor, 2=second monitor, etc.)")
+ parser.add_argument("--list-monitors", action="store_true", help="List available monitors and exit")
+ parser.add_argument("--debug", action="store_true", help="Show screenshot preview before sending to Ollama")
+
+ parsed_args = parser.parse_args()
+
+ # Handle monitor listing
+ if parsed_args.list_monitors:
+ print("Available monitors:")
+ try:
+ with mss.mss() as sct:
+ for i, monitor in enumerate(sct.monitors):
+ if i == 0:
+ print(f" {i}: All monitors combined ({monitor['width']}x{monitor['height']})")
+ else:
+ print(f" {i}: Monitor {i} ({monitor['width']}x{monitor['height']} at {monitor['left']},{monitor['top']})")
+ except Exception as e:
+ print(f"Error listing monitors: {e}")
+ sys.exit(0)
+
+ main(parsed_args)