Bishibop · Bishibop · Aug 9, 2025 · Aug 9, 2025 · Aug 9, 2025 · Aug 9, 2025
diff --git a/.gitignore b/.gitignore
@@ -31,6 +31,7 @@ vendor
 *.swo
 *~
 .envrc
+.env
 
 
 dist
@@ -48,3 +49,4 @@ sbom/
 # Ignore generated support bundles
 *.tar.gz
 !testdata/supportbundle/*.tar.gz
+support-bundle-*/
diff --git a/DEMO_WALKTHROUGH.md b/DEMO_WALKTHROUGH.md
@@ -0,0 +1,197 @@
+# LLM Analyzer Demo Walkthrough
+
+This guide walks through demonstrating the new AI-powered LLM analyzer feature in Troubleshoot.sh.
+
+## Prerequisites
+
+Before starting the demo, ensure you have:
+- A Kubernetes cluster (Kind, Minikube, or any cluster)
+- kubectl configured to access your cluster
+- The Troubleshoot project built locally (`make build`)
+- An OpenAI API key in a `.env` file
+
+## Introduction (1 minute)
+
+**Key points:**
+- No need to anticipate every failure mode
+- AI understands context and correlations
+- Works with any application, not just pre-configured scenarios
+- Uses cost-effective models (gpt-4o-mini by default)
+
+## Part 1: Setup (2 minutes)
+
+### Build the project (if not already done)
+
+```bash
+# Build the Troubleshoot binaries
+make build
+
+# Verify the binaries exist
+ls -la ./bin/support-bundle ./bin/analyze
+```
+
+### Set your OpenAI API key (if not already done)
+
+Troubleshoot.sh now automatically reads `.env` files in the current directory, following modern CLI tool conventions.
+
+### Create a test cluster (if needed)
+
+```bash
+# If you don't have a cluster, create one with Kind
+kind create cluster --name demo-cluster
+```
+
+## Part 2: Deploy a Failing Application (3 minutes)
+
+### Deploy the demo application
+
+```bash
+# Run the deployment script
+./demo-app-deploy.sh
+```
+
+This script will:
+- Create a namespace called `demo-app`
+- Deploy a web application that fails to connect to the database
+- Deploy a database that gets OOMKilled due to memory limits
+- Show the failing pods and recent events
+
+### Verify the problems
+
+```bash
+# Check pod status - you'll see CrashLoopBackOff
+kubectl get pods -n demo-app
+
+# Optional: Check logs to see the errors
+kubectl logs -n demo-app -l app=web --tail=10
+kubectl logs -n demo-app -l app=db --tail=10
+```
+
+## Part 3: Collect & Analyze with LLM (4 minutes)
+
+### Create the support bundle specification
+
+```bash
+cat <<EOF > demo-support-bundle.yaml
+apiVersion: troubleshoot.sh/v1beta2
+kind: SupportBundle
+metadata:
+  name: demo-app-troubleshoot
+spec:
+  collectors:
+    - clusterInfo: {}
+    - clusterResources: {}
+    - logs:
+        name: demo-logs
+        namespace: demo-app
+        limits:
+          maxLines: 1000
+    - events:
+        namespace: demo-app
+
+  analyzers:
+    # AI-Powered Analyzer
+    - llm:
+        checkName: "AI Diagnostic Analysis"
+        collectorName: "demo-logs"
+        fileName: "**/*.log"
+        model: "gpt-4o-mini"
+        maxFiles: 10
+        priorityPatterns:
+          - "error"
+          - "fatal"
+          - "failed"
+          - "OOM"
+        outcomes:
+          - fail:
+              when: "issue_found"
+              message: |
+                AI Analysis Found Critical Issues:
+                {{.Summary}}
+
+                Root Cause: {{.RootCause}}
+                Affected Components: {{.AffectedPods}}
+          - pass:
+              message: "No critical issues detected"
+
+    # Traditional analyzer for comparison
+    - deploymentStatus:
+        name: web-app
+        namespace: demo-app
+        outcomes:
+          - fail:
+              when: "< 1"
+              message: "Web app deployment has no ready replicas"
+          - pass:
+              message: "Web app deployment is running"
+EOF
+```
+
+### Run the support bundle collection
+
+```bash
+# Run with a problem description
+./bin/support-bundle demo-support-bundle.yaml \
+  --problem-description "Application is not starting and keeps crashing"
+```
+
+**Alternative: Interactive mode**
+```bash
+# Or use interactive mode to be prompted
+./bin/support-bundle demo-support-bundle.yaml --interactive
+# When prompted, type: "My application won't start and the database keeps restarting"
+```
+
+**Note:** End users would use `kubectl support-bundle` after installing the plugin, but we're using the local binary.
+
+## Part 4: Re-analyze an Existing Bundle (3 minutes)
+
+### Create an analyzer-only specification
+
+```bash
+cat <<EOF > reanalyze.yaml
+apiVersion: troubleshoot.sh/v1beta2
+kind: Analyzer
+metadata:
+  name: focused-reanalysis
+spec:
+  analyzers:
+    - llm:
+        checkName: "Memory Issue Deep Dive"
+        collectorName: "demo-logs"
+        fileName: "**/*.log"
+        model: "gpt-4o-mini"
+        problemDescription: "Focus on memory and resource issues only"
+        outcomes:
+          - fail:
+              when: "issue_found"
+              message: |
+                Memory Analysis Results:
+                {{.Summary}}
+
+                Recommendations: {{.Solution}}
+          - pass:
+              message: "No memory issues found"
+EOF
+```
+
+### Re-analyze the existing bundle
+
+```bash
+# Use the bundle we just created (adjust filename as needed)
+./bin/analyze support-bundle-*.tar.gz \
+  --analyzers reanalyze.yaml
+```
+
+## Cleanup (30 seconds)
+
+```bash
+# Remove the demo application
+kubectl delete namespace demo-app
+
+# Remove temporary files
+rm demo-support-bundle.yaml reanalyze.yaml security-check.yaml
+
+# Delete the Kind cluster
+kind delete cluster --name demo-cluster
+```
diff --git a/README.md b/README.md
@@ -45,6 +45,91 @@ For more details on creating the custom resource files that drive support-bundle
 
 And see our other tool [sbctl](https://github.com/replicatedhq/sbctl) that makes it easier to interact with support bundles using `kubectl` commands you already know
 
+## LLM Analyzer (AI-Powered Analysis)
+
+The LLM analyzer uses OpenAI to automatically analyze Kubernetes logs and identify issues. It understands context, finds root causes, and correlates problems across multiple components.
+
+### What's Different
+
+- **No rules to write** - AI understands logs and errors automatically
+- **Finds root causes** - Identifies why problems occur, not just symptoms
+- **Correlates issues** - Understands relationships (e.g., DB crash → app failures)
+- **Natural language** - Describe problems in plain English
+
+### Setup
+
+1. **Get an OpenAI API key** from [platform.openai.com](https://platform.openai.com)
+2. **Create a `.env` file**:
+   ```bash
+   echo 'OPENAI_API_KEY=sk-...' > .env
+   ```
+   The tool automatically loads `.env` files.
+
+### How to Use
+
+Add the `llm` analyzer to your spec:
+
+```yaml
+apiVersion: troubleshoot.sh/v1beta2
+kind: SupportBundle
+spec:
+  collectors:
+    - logs:
+        name: app-logs
+        namespace: default
+  analyzers:
+    - llm:
+        checkName: "AI Analysis"
+        collectorName: "app-logs"
+        fileName: "**/*.log"  # Use **/* for nested dirs
+        model: "gpt-4o-mini"  # Cost-effective, ~$0.01 per analysis
+        outcomes:
+          - fail:
+              when: "issue_found"
+              message: "Found: {{.Summary}}"
+          - pass:
+              message: "No issues detected"
+```
+
+Run with problem description:
+```bash
+./bin/support-bundle spec.yaml --problem-description "App keeps crashing"
+```
+
+Or re-analyze existing bundles:
+```bash
+./bin/analyze bundle.tar.gz --analyzers spec.yaml
+```
+
+### Model Selection Guide
+
+- **gpt-4o-mini**: (Default) Cost-effective with 128K context window, recommended for most use cases ($0.15/1M input tokens)
+- **gpt-5**: Most advanced model for complex issues requiring cutting-edge reasoning (pricing TBD)
+
+### Enhanced Output
+
+The LLM analyzer now provides structured, actionable output including:
+- **Root Cause Analysis**: Identified root cause of the problem
+- **Recommended Commands**: kubectl commands to resolve issues
+- **Affected Resources**: List of impacted pods and services
+- **Next Steps**: Ordered action items
+- **Documentation Links**: Relevant Kubernetes documentation
+- **Related Issues**: Other potential problems found
+
+Template variables available in outcome messages:
+- `{{.Summary}}`, `{{.Issue}}`, `{{.Solution}}`, `{{.RootCause}}`
+- `{{.Commands}}`, `{{.AffectedPods}}`, `{{.NextSteps}}`
+- `{{.Severity}}`, `{{.Confidence}}`
+
+### Examples
+
+See [examples/analyzers/llm-analyzer.yaml](examples/analyzers/llm-analyzer.yaml) for complete examples including:
+- Using LLM analyzer alongside traditional analyzers
+- Re-analyzing existing bundles
+- Different model configurations
+- Smart file selection with priority patterns
+- Enhanced output templates
+
 # Community
 
 For questions about using Troubleshoot, how to contribute and engaging with the project in any other way, please refer to the following resources and channels.

diff --git a/cmd/analyze/main.go b/cmd/analyze/main.go
@@ -1,10 +1,14 @@
 package main
 
 import (
+	"github.com/joho/godotenv"
 	"github.com/replicatedhq/troubleshoot/cmd/analyze/cli"
 	_ "k8s.io/client-go/plugin/pkg/client/auth"
 )
 
 func main() {
+	// Load .env file if it exists (ignore error if not found)
+	_ = godotenv.Load()
+
 	cli.InitAndExecute()
 }
diff --git a/cmd/collect/main.go b/cmd/collect/main.go
@@ -1,10 +1,14 @@
 package main
 
 import (
+	"github.com/joho/godotenv"
 	"github.com/replicatedhq/troubleshoot/cmd/collect/cli"
 	_ "k8s.io/client-go/plugin/pkg/client/auth"
 )
 
 func main() {
+	// Load .env file if it exists (ignore error if not found)
+	_ = godotenv.Load()
+
 	cli.InitAndExecute()
 }
diff --git a/cmd/preflight/main.go b/cmd/preflight/main.go
@@ -1,10 +1,14 @@
 package main
 
 import (
+	"github.com/joho/godotenv"
 	"github.com/replicatedhq/troubleshoot/cmd/preflight/cli"
 	_ "k8s.io/client-go/plugin/pkg/client/auth"
 )
 
 func main() {
+	// Load .env file if it exists (ignore error if not found)
+	_ = godotenv.Load()
+
 	cli.InitAndExecute()
 }
diff --git a/cmd/troubleshoot/cli/analyze.go b/cmd/troubleshoot/cli/analyze.go
@@ -1,12 +1,15 @@
 package cli
 
 import (
+	"bufio"
 	"encoding/json"
 	"fmt"
 	"io/ioutil"
 	"net/http"
 	"os"
+	"strings"
 
+	"github.com/mattn/go-isatty"
 	"github.com/replicatedhq/troubleshoot/internal/util"
 	analyzer "github.com/replicatedhq/troubleshoot/pkg/analyze"
 	"github.com/replicatedhq/troubleshoot/pkg/convert"
@@ -33,6 +36,25 @@ func Analyze() *cobra.Command {
 				return err
 			}
 
+			// Handle problem description for LLM analyzer
+			problemDescription := v.GetString("problem-description")
+			if problemDescription == "" && isatty.IsTerminal(os.Stdin.Fd()) {
+				// Check if spec contains LLM analyzer
+				if strings.Contains(analyzerSpec, "llm:") {
+					fmt.Print("Please describe the problem you're experiencing: ")
+					reader := bufio.NewReader(os.Stdin)
+					problemDescription, err = reader.ReadString('\n')
+					if err == nil {
+						problemDescription = strings.TrimSpace(problemDescription)
+					}
+				}
+			}
+
+			// Set problem description via environment variable for LLM analyzer
+			if problemDescription != "" {
+				os.Setenv("PROBLEM_DESCRIPTION", problemDescription)
+			}
+
 			result, err := analyzer.DownloadAndAnalyze(v.GetString("bundle"), analyzerSpec)
 			if err != nil {
 				return err
@@ -71,6 +93,7 @@ func Analyze() *cobra.Command {
 	cmd.Flags().String("compatibility", "", "output compatibility mode: support-bundle")
 	cmd.Flags().MarkHidden("compatibility")
 	cmd.Flags().Bool("quiet", false, "enable/disable error messaging and only show parseable output")
+	cmd.Flags().String("problem-description", "", "describe the problem for LLM analysis (interactive prompt if not provided)")
 
 	return cmd
 }