3-shake · pacificbelt30 · Feb 21, 2025 · Feb 27, 2025 · Feb 28, 2025 · Feb 28, 2025
@@ -5,20 +5,26 @@ system:
 ai:
   provider: "openai" # "openai" or "vertexai"
   openai:
-    model: "gpt-4o-mini-2024-07-18" # Check the list of available models by `curl https://api.openai.com/v1/models -H "Authorization: Bearer $OPENAI_API_KEY"`
+    model: "gpt-4o-mini" # Check the list of available models by `curl https://api.openai.com/v1/models -H "Authorization: Bearer $OPENAI_API_KEY"`
 
   vertexai:
     project: "<YOUR_PROJECT_ID>"
     location: "us-central1"
-    model: "gemini-1.5-flash-001"
+    model: "gemini-2.0-flash-001"
 
   commands:
     - describe:
         description: "Generate a detailed description of the Issue."
         system_prompt: "The following is the GitHub Issue and comments on it. Please Generate a detailed description.\n"
+        similar_code: false
+        similar_issue: true
     - suggest:
         description: "Provide suggestions for improvement based on the contents of the Issue."
         system_prompt: "The following is the GitHub Issue and comments on it. Please identify the issues that need to be resolved based on the contents of the Issue and provide three suggestions for improvement.\n"
+        similar_code: true
+        similar_issue: false
     - ask:
         description: "Answer free-text questions."
         system_prompt: "The following is the GitHub Issue and comments on it. Based on the content provide a detailed response to the following question:\n"
+        similar_code: true
+        similar_issue: false
@@ -50,7 +50,7 @@ jobs:
       - name: Add Comment
         run: |
           if [[ "$COMMAND" == "ask" ]]; then
-            ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -intent "$INTENT"
+            ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -intent "$INTENT" -use-rag -pinecone-api-key ${{ secrets.PINECONE_API_KEY }}
           else
-            ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE
+            ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -use-rag -pinecone-api-key ${{ secrets.PINECONE_API_KEY }}
           fi
@@ -9,11 +9,18 @@ We reduce the burden of system failure response using LLM.
 You can receive support for failure handling that is completed within GitHub.
 - Execute commands interactively in GitHub Issue comments:
   - `describe` command to summarize the Issue
-  - `analysis` command for root cause analysis of failures (in development)
   - `suggest` command for proposing improvement measures for failures
   - `ask` command for asking additional questions
-- Mechanism to improve response accuracy using [RAG](https://cloud.google.com/use-cases/retrieval-augmented-generation?hl=en) (in development)
-- Selectable LLM models (OpenAI, VertexAI)
+- Execute any command defined by the user other than the above (e.g. `analysis` command for root cause analysis of failures). Please see [here](#.alert-menta.user.yaml)
+- Mechanism to improve response accuracy using [RAG](https://cloud.google.com/use-cases/retrieval-augmented-generation?hl=en) in [Pinecone](https://www.pinecone.io/)
+  - If you use RAG, you must register with Pinecone, the vector database, and register your API key as PINECONE_API_KEY in Actions Secrets. Please see [here](#3.-configure-to-use-rag)
+  - `similar_code` option to search similar code
+  - `similar_issue` option to search similar issues
+- Selectable LLM models
+  - OpenAI
+  - VertexAI
+  - Claude (under development)
+  - OpenAI-Compatible Server (under development)
 - Extensible prompt text
   - Multilingual support
 
@@ -24,18 +31,26 @@ Prepare a GitHub PAT with the following permissions and register it in Secrets:
 - repo
 - workflow
 ### 2. Configure to use LLM
-#### Open AI
+#### OpenAI
 Generate an API key and register it in Secrets.
 #### Vertex AI
 Enable Vertex AI on Google Cloud.
 Alert-menta obtains access to VertexAI using [Workload Identity Federation](https://cloud.google.com/iam/docs/workload-identity-federation). Please see [here](#if-using-vertex-ai) for details.
-### 3. Create the alert-menta configuration file
+### 3. Configure to use RAG
+#### Overview
+Alert-Menta offers the ability to suggest similar issues and search the code base.
+This functionality uses Pinecone as a vector database.
+#### Setup Pinecone
+Pinecone can be found [here](https://docs.pinecone.io/guides/get-started/overview) to register an account and create a database.
+After creating an account, obtain an API key and save it in Actions Secrets under the name PINECONE_API_KEY.
+Alert-Menta will automatically create the Index.
+### 4. Create the alert-menta configuration file
 Create the alert-menta configuration file in the root of the repository. For details, please see [here](#alert-mentauseryaml).
-### 4. Create the Actions configuration file
+### 5. Create the Actions configuration file
 There is a [template](#template) available, so please use it.
-### 5. Monitoring alerts or user reports are received on Issues
+### 6. Monitoring alerts or user reports are received on Issues
 For the method to bring monitoring alerts to Issues, please see [this repository](https://github.com/kechigon/alert-menta-lab/tree/main).
-### 6. Execute alert-menta
+### 7. Execute alert-menta
 Execute commands on the Issue. Run commands with a backslash at the beginning (e.g., `/describe`). For the `ask` command, leave a space and enter the question (e.g., `/ask What about the Next Action?`). Alert-menta includes the text of the Issue in the prompt and sends it to the LLM, then posts the response as a comment on the Issue.
 
 ## Configuration
@@ -52,17 +67,23 @@ ai:
   vertexai:
     project: "<YOUR_PROJECT_ID>"
     location: "us-central1"
-    model: "gemini-1.5-flash-001"
+    model: "gemini-2.0-flash-001"
   commands:
     - describe:
         description: "Generate a detailed description of the Issue."
         system_prompt: "The following is the GitHub Issue and comments on it. Please Generate a detailed description.\n"
+        similar_code: false
+        similar_issue: true
     - suggest:
         description: "Provide suggestions for improvement based on the contents of the Issue."
         system_prompt: "The following is the GitHub Issue and comments on it. Please identify the issues that need to be resolved based on the contents of the Issue and provide three suggestions for improvement.\n"
+        similar_code: true
+        similar_issue: false
     - ask:
         description: "Answer free-text questions."
         system_prompt: "The following is the GitHub Issue and comments on it. Based on the content, provide a detailed response to the following question:\n"
+        similar_code: true
+        similar_issue: false
 ```
 Specify the LLM to use with `ai.provider`.
 You can change the system prompt with `commands.{command}.system_prompt`.
@@ -122,9 +143,9 @@ jobs:
       - name: Add Comment
         run: |
           if [[ "$COMMAND" == "ask" ]]; then
-            ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -intent "$INTENT"
+            ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -intent "$INTENT" -use-rag -pinecone-api-key ${{ secrets.PINECONE_API_KEY }}
           else
-            ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE
+            ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -use-rag -pinecone-api-key ${{ secrets.PINECONE_API_KEY }}
           fi
 ```
 #### If using Vertex AI

@@ -7,9 +7,11 @@ import (
 	"os"
 	"regexp"
 	"strings"
+	"text/template"
 
 	"github.com/3-shake/alert-menta/internal/ai"
 	"github.com/3-shake/alert-menta/internal/github"
+	"github.com/3-shake/alert-menta/internal/rag"
 	"github.com/3-shake/alert-menta/internal/utils"
 )
 
@@ -23,10 +25,21 @@ type Config struct {
 	configFile  string
 	ghToken     string
 	oaiKey      string
+	useRag      bool
+	pineconeKey string
+}
+
+type Neo4jConfig struct {
+	uri           string
+	username      string
+	password      string
+	fulltextIndex string
+	vectorIndex   string
 }
 
 func main() {
 	cfg := &Config{}
+	// neo4jcfg := &Neo4jConfig{}
 	flag.StringVar(&cfg.repo, "repo", "", "Repository name")
 	flag.StringVar(&cfg.owner, "owner", "", "Repository owner")
 	flag.IntVar(&cfg.issueNumber, "issue", 0, "Issue number")
@@ -35,13 +48,28 @@ func main() {
 	flag.StringVar(&cfg.configFile, "config", "", "Configuration file")
 	flag.StringVar(&cfg.ghToken, "github-token", "", "GitHub token")
 	flag.StringVar(&cfg.oaiKey, "api-key", "", "OpenAI api key")
+	flag.BoolVar(&cfg.useRag, "use-rag", false, "Use RAG model for response generation")
+	flag.StringVar(&cfg.pineconeKey, "pinecone-api-key", "", "Pinecone api key")
 	flag.Parse()
 
 	if cfg.repo == "" || cfg.owner == "" || cfg.issueNumber == 0 || cfg.ghToken == "" || cfg.command == "" || cfg.configFile == "" {
 		flag.PrintDefaults()
 		os.Exit(1)
 	}
 
+	var retriever rag.Retriever
+	if cfg.useRag {
+		flag.Parse()
+		if cfg.pineconeKey == "" {
+			log.Fatalf("Error: If -useRag is set, -pinecone-api-key is required")
+		}
+		r, err := getPineconeRetriever(cfg)
+		retriever = r
+		if err != nil {
+			log.Fatalf("Error getting retriever: %v", err)
+		}
+	}
+
 	logger := log.New(
 		os.Stdout, "[alert-menta main] ",
 		log.Ldate|log.Ltime|log.Llongfile|log.Lmsgprefix,
@@ -52,6 +80,15 @@ func main() {
 		logger.Fatalf("Error loading config: %v", err)
 	}
 
+	if cfg.command == "upsert_db" {
+		idxName := rag.GetPineconeIndexName(cfg.owner, cfg.repo)
+		err := CreateDB(idxName, []string{"main"}, cfg, loadedcfg, logger)
+		if err != nil {
+			logger.Fatalf("Error upserting DB: %v", err)
+		}
+		return
+	}
+
 	err = validateCommand(cfg.command, loadedcfg)
 	if err != nil {
 		logger.Fatalf("Error validating command: %v", err)
@@ -74,10 +111,60 @@ func main() {
 		logger.Fatalf("Error geting AI client: %v", err)
 	}
 
+	var docs []rag.Document
+	relatedIssue := ""
+	if cfg.useRag {
+		emb, err := getEmbeddingClient(cfg.oaiKey, loadedcfg, logger)
+		if err != nil {
+			logger.Fatalf("Error geting AI client: %v", err)
+		}
+
+		if loadedcfg.Ai.Commands[cfg.command].Similar_code {
+			ragPrompt, err := constructRAGPrompt(cfg.command, cfg.intent, userPrompt, imgs, *issue, loadedcfg, logger)
+			if err != nil {
+				logger.Fatalf("Error constructing RAG prompt: %v", err)
+			}
+
+			ragComment, err := aic.GetResponse(ragPrompt)
+			if err != nil {
+				logger.Fatalf("Error getting RAG comment: %v", err)
+			}
+			logger.Println("RAG Comment:", ragComment)
+
+			ragVector, err := emb.GetEmbedding(ragComment)
+			if err != nil {
+				logger.Fatalf("Error getting RAG vector: %v", err)
+			}
+
+			docs, err = retriever.RetrieveByVector(ragVector, rag.Options{})
+			for _, d := range docs {
+				prompt.UserPrompt += "\n" + d.String()
+			}
+		}
+		if loadedcfg.Ai.Commands[cfg.command].Similar_issue {
+			issueVector, err := emb.GetEmbedding(userPrompt)
+			if err != nil {
+				logger.Fatalf("Error getting issue vector: %v", err)
+			}
+			relatedIssue = retriever.RetrieveIssue(issueVector, uint32(cfg.issueNumber), rag.Options{})
+		}
+	}
+
 	comment, err := aic.GetResponse(prompt)
 	if err != nil {
 		logger.Fatalf("Error getting Response: %v", err)
 	}
+	if cfg.useRag {
+		if len(docs) > 0 {
+			comment += "\n\n" + "## Sources:\n"
+			for i, d := range docs {
+				comment += fmt.Sprintf("%d. [%s](%s)\n", i+1, d.Id, d.URL)
+			}
+		}
+		if relatedIssue != "" {
+			comment += "\n\n" + relatedIssue
+		}
+	}
 	logger.Println("Response:", comment)
 
 	if err := issue.PostComment(comment); err != nil {
@@ -159,6 +246,45 @@ func constructPrompt(command, intent, userPrompt string, imgs []ai.Image, cfg *u
 	return &ai.Prompt{UserPrompt: userPrompt, SystemPrompt: systemPrompt, Images: imgs}, nil
 }
 
+// RAG の前処理を行うプロンプトを作成する関数
+func constructRAGPrompt(command, intent, userPrompt string, imgs []ai.Image, issue github.GitHubIssue, cfg *utils.Config, logger *log.Logger) (*ai.Prompt, error) {
+	systemPrompt := `
+I'm looking to identify related files and functions to solve a GitHub Issue. Please provide analysis and advice based on the information I'll share in the following format:
+
+## Analysis Requested:
+1. Files likely related to this Issue and why
+2. Specific functions or code blocks that should be investigated
+3. Possible root causes of the problem
+4. Approaches for resolution
+
+Please suggest specific file paths and function names where possible. Maximize the use of information available from the repository structure to understand the code architecture before making suggestions.
+	`
+	userPromptTmpl, err := template.New("userPrompt").Parse(`## GitHub Issue:
+{{.UserPrompt}}
+
+## Repository Structure:
+{{.RepositoryStructure}}
+	`)
+	if err != nil {
+		return nil, fmt.Errorf("Error parsing userPrompt template: %w", err)
+	}
+	type PromptData struct {
+		UserPrompt          string
+		RepositoryStructure string
+	}
+	defaultBranch, _ := issue.GetDefaultBranch()
+	lf, err := issue.ListFiles(defaultBranch)
+	if err != nil {
+		return nil, fmt.Errorf("Error listing files: %w", err)
+	}
+	lfs := strings.Join(lf, "\n")
+	userPromptBuf := strings.Builder{}
+	err = userPromptTmpl.Execute(&userPromptBuf, PromptData{UserPrompt: userPrompt, RepositoryStructure: lfs})
+	userPrompt = userPromptBuf.String()
+	logger.Println("\x1b[34mRAGPrompt: |\n", systemPrompt, userPrompt, "\x1b[0m")
+	return &ai.Prompt{UserPrompt: userPrompt, SystemPrompt: systemPrompt, Images: imgs}, nil
+}
+
 // Initialize AI client
 func getAIClient(oaiKey string, cfg *utils.Config, logger *log.Logger) (ai.Ai, error) {
 	switch cfg.Ai.Provider {
@@ -181,3 +307,83 @@ func getAIClient(oaiKey string, cfg *utils.Config, logger *log.Logger) (ai.Ai, e
 		return nil, fmt.Errorf("Error: Invalid provider")
 	}
 }
+
+// Initialize EmbeddingModel
+func getEmbeddingClient(oaiKey string, cfg *utils.Config, logger *log.Logger) (ai.EmbeddingModel, error) {
+	switch cfg.Ai.Provider {
+	case "openai":
+		if oaiKey == "" {
+			return nil, fmt.Errorf("Error: Please provide your Open AI API key")
+		}
+		logger.Println("Using OpenAI API")
+		logger.Println("OpenAI model:", cfg.Ai.OpenAI.Model)
+		return ai.NewOpenAIClient(oaiKey, cfg.Ai.OpenAI.Model), nil
+	case "vertexai":
+		logger.Println("Using VertexAI API")
+		logger.Println("VertexAI model:", cfg.Ai.VertexAI.Model)
+		aic, err := ai.NewVertexAIClient(cfg.Ai.VertexAI.Project, cfg.Ai.VertexAI.Region, cfg.Ai.VertexAI.Model)
+		if err != nil {
+			return nil, fmt.Errorf("Error: new Vertex AI client: %w", err)
+		}
+		return aic, nil
+	default:
+		return nil, fmt.Errorf("Error: Invalid provider")
+	}
+}
+
+// Initialize Neo4jRetriever
+func getNeo4jRetriever(cfg *Neo4jConfig, logger *log.Logger) (*rag.Neo4jRetriever, error) {
+	r, err := rag.NewNeo4jRetriever(cfg.uri, cfg.username, cfg.password, cfg.fulltextIndex, cfg.vectorIndex)
+	logger.Println("Neo4jRetriever:", r)
+	if err != nil {
+		return nil, fmt.Errorf("Error: new Neo4jRetriever: %w", err)
+	}
+	return r, nil
+}
+
+// Initialize PineconeRetriever
+func getPineconeRetriever(cfg *Config) (*rag.PineconeClient, error) {
+	idxName := rag.GetPineconeIndexName(cfg.owner, cfg.repo)
+	r, err := rag.NewPineconeClient(idxName, cfg.pineconeKey)
+	if err != nil {
+		return nil, fmt.Errorf("Error: new Neo4jRetriever: %w", err)
+	}
+	return r, nil
+}
+
+func CreateDB(idxName string, targetBranches []string, cfg *Config, loadedcfg *utils.Config, logger *log.Logger) error {
+	logger.Println("Creating DB to Index:", idxName)
+	repoURL := fmt.Sprintf("https://github.com/%s/%s", cfg.owner, cfg.repo)
+	repo, err := utils.CloneRepository(repoURL, &utils.AuthOptions{Username: cfg.owner, Token: cfg.ghToken})
+	if err != nil {
+		return fmt.Errorf("Error cloning repository: %w", err)
+	}
+	branches, err := utils.GetBranches(repo, []string{})
+	_, err = utils.ListFiles(repo)
+
+	var docs []rag.Document
+	for _, branch := range branches {
+		branchDocs, err := rag.ConvertBranchtoDocuments(cfg.owner, cfg.repo, repo, branch)
+		if err != nil {
+			return fmt.Errorf("Error converting branch to documents: %w", err)
+		}
+		docs = append(docs, *branchDocs...)
+	}
+
+	emb, err := getEmbeddingClient(cfg.oaiKey, loadedcfg, logger)
+	if err != nil {
+		return fmt.Errorf("Error getting embedding client: %w", err)
+	}
+
+	pc, err := rag.NewPineconeClient(idxName, cfg.pineconeKey)
+	if err != nil {
+		return fmt.Errorf("Error getting Pinecone client: %w", err)
+	}
+
+	pc.CreateCodebaseDB(docs, emb, rag.CodebaseEmbeddingOptions{Branches: targetBranches})
+
+	issues := github.GetAllIssues(cfg.owner, cfg.repo, cfg.ghToken)
+	pc.CreateIssueDB(issues, emb)
+
+	return nil
+}