From 2000a2772d82ebdf8f56da55d3df7b1ff806f911 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Fri, 21 Feb 2025 21:14:43 +0900 Subject: [PATCH 01/18] first commit for graphrag --- go.mod | 1 + go.sum | 56 ++++++++++++++++++++++++++++++ internal/ai/embedding.go | 5 +++ internal/ai/openai.go | 4 +++ internal/rag/neo4j.go | 73 ++++++++++++++++++++++++++++++++++++++++ internal/rag/rag.go | 24 +++++++++++++ 6 files changed, 163 insertions(+) create mode 100644 internal/ai/embedding.go create mode 100644 internal/rag/neo4j.go create mode 100644 internal/rag/rag.go diff --git a/go.mod b/go.mod index 0057cf5..be71b95 100644 --- a/go.mod +++ b/go.mod @@ -7,6 +7,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/ai/azopenai v0.7.1 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 github.com/google/go-github v17.0.0+incompatible + github.com/neo4j/neo4j-go-driver/v4 v4.4.7 github.com/spf13/viper v1.19.0 golang.org/x/oauth2 v0.24.0 ) diff --git a/go.sum b/go.sum index aedefb5..50db4f7 100644 --- a/go.sum +++ b/go.sum @@ -41,6 +41,9 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= +github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= +github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= +github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -48,6 +51,7 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= +github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -63,7 +67,10 @@ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrU github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= +github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= +github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= +github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -73,6 +80,7 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= +github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY= @@ -90,6 +98,7 @@ github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDP github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -100,6 +109,17 @@ github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0V github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= +github.com/neo4j/neo4j-go-driver/v4 v4.4.7 h1:6D0DPI7VOVF6zB8eubY1lav7RI7dZ2mytnr3fj369Ow= +github.com/neo4j/neo4j-go-driver/v4 v4.4.7/go.mod h1:NexOfrm4c317FVjekrhVV8pHBXgtMG5P6GeweJWCyo4= +github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= +github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= +github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= +github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= +github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= +github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= +github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= +github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= +github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= @@ -129,6 +149,7 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -137,6 +158,7 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc= @@ -154,6 +176,7 @@ go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= +golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= @@ -163,12 +186,19 @@ golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqR golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= +golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= +golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= +golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= +golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= +golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= +golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= +golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -177,16 +207,29 @@ golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbht golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= +golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= +golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= @@ -196,7 +239,12 @@ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= +golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= +golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= +golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= +golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.203.0 h1:SrEeuwU3S11Wlscsn+LA1kb/Y5xT8uggJSkIhD08NAU= google.golang.org/api v0.203.0/go.mod h1:BuOVyCSYEPwJb3npWvDnNmFI92f3GeRnHNkETneT3SI= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= @@ -226,13 +274,21 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= +google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= +google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= +gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/ai/embedding.go b/internal/ai/embedding.go new file mode 100644 index 0000000..8ff2d25 --- /dev/null +++ b/internal/ai/embedding.go @@ -0,0 +1,5 @@ +package ai + +type EmbeddingModel interface { + GetEmbedding(text string) ([]float32, error) +} diff --git a/internal/ai/openai.go b/internal/ai/openai.go index fa03ab1..1284daa 100644 --- a/internal/ai/openai.go +++ b/internal/ai/openai.go @@ -69,3 +69,7 @@ func NewOpenAIClient(apiKey string, model string) *OpenAI { model: model, } } + +func (ai *OpenAI) GetEmbedding(text string) ([]float32, error) { + return nil, nil +} diff --git a/internal/rag/neo4j.go b/internal/rag/neo4j.go new file mode 100644 index 0000000..2859fb1 --- /dev/null +++ b/internal/rag/neo4j.go @@ -0,0 +1,73 @@ +package rag + +import ( + "context" + + "github.com/3-shake/alert-menta/internal/ai" + "github.com/neo4j/neo4j-go-driver/v4/neo4j" +) + +type Neo4jRetriever struct { + session *neo4j.Session + driver neo4j.Driver + fulltextIndex string + vectorIndex string +} + +func NewNeo4jRetriever(uri, username, password, fulltextIndex, vectorIndex string) (*Neo4jRetriever, error) { + driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, "")) + if err != nil { + return nil, err + } + session, err := driver.Session(neo4j.AccessModeRead) + if err != nil { + return nil, err + } + return &Neo4jRetriever{ + session: session, + driver: driver, + fulltextIndex: fulltextIndex, + vectorIndex: vectorIndex, + }, nil +} + +func (r *Neo4jRetriever) Close() { + r.session.Close() + r.driver.Close() +} + +func (r *Neo4jRetriever) Retrieve(emb ai.EmbeddingModel, query string, options Options) ([]Document, error) { + // var documents []Document + embedding, err := emb.GetEmbedding(query) + if err != nil { + return nil, err + } + results, err := r.retrieveHybrid(embedding, query, options) + + return results, nil +} + +func (r *Neo4jRetriever) retrieveHybrid(embedding []float32, query string, options Options) ([]Document, error) { + return nil, nil +} + +func (r *Neo4jRetriever) retrieveFulltext(query string, options Options) ([]Document, error) { + return nil, nil +} + +func (r *Neo4jRetriever) runCypher(query string, params map[string]interface{}) ([]Document, error) { + var documents []Document + result, err := r.session.Run(query, params) + if err != nil { + return nil, err + } + for result.Next() { + record := result.Record() + documents = append(documents, Document{ + Id: record.GetByIndex(0).(string), + Content: record.GetByIndex(1).(string), + Score: record.GetByIndex(2).(float64), + }) + } + return documents, nil +} diff --git a/internal/rag/rag.go b/internal/rag/rag.go new file mode 100644 index 0000000..65a82cc --- /dev/null +++ b/internal/rag/rag.go @@ -0,0 +1,24 @@ +package rag + +import ( + "context" + + "github.com/3-shake/alert-menta/internal/ai" +) + +type Retriever interface { + // Retrieve(ctx context.Context, query string, options ...Option) ([]Document, error) + Retrieve(embedding ai.EmbeddingModel, options Options) ([]Document, error) +} + +type Options struct { + topK int + withStructuredData bool + enableHybridRetrieval bool +} + +type Document struct { + Id string + Content string + Score float64 +} From 7a7fdfe6b254f21da8f03779054ee145c33d23b2 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Thu, 27 Feb 2025 20:11:12 +0900 Subject: [PATCH 02/18] Update neo4j module from v4 to v5 --- go.mod | 2 +- go.sum | 58 ++------------------------------- internal/rag/neo4j.go | 75 +++++++++++++++++++++++++++++++++++++------ 3 files changed, 69 insertions(+), 66 deletions(-) diff --git a/go.mod b/go.mod index be71b95..89217e5 100644 --- a/go.mod +++ b/go.mod @@ -7,7 +7,7 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/ai/azopenai v0.7.1 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 github.com/google/go-github v17.0.0+incompatible - github.com/neo4j/neo4j-go-driver/v4 v4.4.7 + github.com/neo4j/neo4j-go-driver/v5 v5.27.0 github.com/spf13/viper v1.19.0 golang.org/x/oauth2 v0.24.0 ) diff --git a/go.sum b/go.sum index 50db4f7..e3a3d99 100644 --- a/go.sum +++ b/go.sum @@ -41,9 +41,6 @@ github.com/felixge/httpsnoop v1.0.4 h1:NFTV2Zj1bL4mc9sqWACXbQFVBBg2W3GPvqp8/ESS2 github.com/felixge/httpsnoop v1.0.4/go.mod h1:m8KPJKqk1gH5J9DgRY2ASl2lWCfGKXixSwevea8zH2U= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= -github.com/fsnotify/fsnotify v1.4.7/go.mod h1:jwhsz4b93w/PPRr/qN1Yymfu8t87LnFCMoQvtojpjFo= -github.com/fsnotify/fsnotify v1.4.9/go.mod h1:znqG4EE+3YCdAaPaxE2ZRY/06pZUdp0tY4IgpuI1SZQ= -github.com/fsnotify/fsnotify v1.5.1/go.mod h1:T3375wBYaZdLLcVNkcVbzGHY7f1l/uK5T5Ai1i3InKU= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= @@ -51,7 +48,6 @@ github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag= github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE= -github.com/go-task/slim-sprig v0.0.0-20210107165309-348f09dbbbc0/go.mod h1:fyg7847qk6SyHyPtNmDHnmrv/HOrqktSC+C9fM+CJOE= github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17wHk= github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= @@ -67,10 +63,7 @@ github.com/golang/protobuf v1.4.0-rc.2/go.mod h1:LlEzMj4AhA7rCAGe4KMBDvJI+AwstrU github.com/golang/protobuf v1.4.0-rc.4.0.20200313231945-b860323f09d0/go.mod h1:WU3c8KckQ9AFe+yFwt9sWVRKCVIyN9cPHBJSNnbL67w= github.com/golang/protobuf v1.4.0/go.mod h1:jodUvKwWbYaEsadDk5Fwe5c77LiNKVO9IDvqG2KuDX0= github.com/golang/protobuf v1.4.1/go.mod h1:U8fpvMrcmy5pZrNK1lt4xCsGvpyWQ/VVv6QDs8UjoX8= -github.com/golang/protobuf v1.4.2/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= github.com/golang/protobuf v1.4.3/go.mod h1:oDoupMAO8OvCJWAcko0GGGIgR6R6ocIYbsSw735rRwI= -github.com/golang/protobuf v1.5.0/go.mod h1:FsONVRAS9T7sI+LIUmWTfcYkHO4aIWwzhcaSAoJOfIk= -github.com/golang/protobuf v1.5.2/go.mod h1:XVQd3VNwM+JqD3oG2Ue2ip4fOMUkwXdXDdiuN0vRsmY= github.com/golang/protobuf v1.5.4 h1:i7eJL8qZTpSEXOPTxNKhASYpMn+8e5Q6AdndVa1dWek= github.com/golang/protobuf v1.5.4/go.mod h1:lnTiLA8Wa4RWRcIUkrtSVa5nRhsEGBg48fD6rSs7xps= github.com/google/go-cmp v0.2.0/go.mod h1:oXzfMopK8JAjlY9xF4vHSVASa0yLyX7SntLO5aqRK0M= @@ -80,7 +73,6 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.5.5/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY= @@ -98,7 +90,6 @@ github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDP github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= -github.com/hpcloud/tail v1.0.0/go.mod h1:ab1qPbhIpdTxEkNHXyeSf5vhxWSCs/tWer42PpOxQnU= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -109,17 +100,8 @@ github.com/magiconair/properties v1.8.7 h1:IeQXZAiQcpL9mgcAe1Nu6cX9LLw6ExEHKjN0V github.com/magiconair/properties v1.8.7/go.mod h1:Dhd985XPs7jluiymwWYZ0G4Z61jb3vdS329zhj2hYo0= github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyuac5Z2hdY= github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= -github.com/neo4j/neo4j-go-driver/v4 v4.4.7 h1:6D0DPI7VOVF6zB8eubY1lav7RI7dZ2mytnr3fj369Ow= -github.com/neo4j/neo4j-go-driver/v4 v4.4.7/go.mod h1:NexOfrm4c317FVjekrhVV8pHBXgtMG5P6GeweJWCyo4= -github.com/nxadm/tail v1.4.4/go.mod h1:kenIhsEOeOJmVchQTgglprH7qJGnHDVpk1VPCcaMI8A= -github.com/nxadm/tail v1.4.8/go.mod h1:+ncqLTQzXmGhMZNUePPaPqPvBxHAIsmXswZKocGu+AU= -github.com/onsi/ginkgo v1.6.0/go.mod h1:lLunBs/Ym6LB5Z9jYTR76FiuTmxDTDusOGeTQH+WWjE= -github.com/onsi/ginkgo v1.12.1/go.mod h1:zj2OWP4+oCPe1qIXoGWkgMRwljMUYCdkwsT2108oapk= -github.com/onsi/ginkgo v1.16.4/go.mod h1:dX+/inL/fNMqNlz0e9LfyB9TswhZpCVdJM/Z6Vvnwo0= -github.com/onsi/ginkgo v1.16.5/go.mod h1:+E8gABHa3K6zRBolWtd+ROzc/U5bkGt0FwiG042wbpU= -github.com/onsi/gomega v1.7.1/go.mod h1:XdKZgCCFLUoM/7CFJVPcG8C1xQ1AJ0vpAezJrB7JYyY= -github.com/onsi/gomega v1.10.1/go.mod h1:iN09h71vgCQne3DLsj+A5owkum+a2tYe+TOCB1ybHNo= -github.com/onsi/gomega v1.16.0/go.mod h1:HnhC7FXeEQY45zxNK3PPoIUhzk/80Xly9PcubAlGdZY= +github.com/neo4j/neo4j-go-driver/v5 v5.27.0 h1:YdsIxDjAQbjlP/4Ha9B/gF8Y39UdgdTwCyihSxy8qTw= +github.com/neo4j/neo4j-go-driver/v5 v5.27.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= @@ -149,7 +131,6 @@ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSS github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= -github.com/stretchr/testify v1.5.1/go.mod h1:5W2xD1RspED5o8YsWQXVCued0rvSQ+mT+I5cxcmMvtA= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= @@ -158,7 +139,6 @@ github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsT github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= -github.com/yuin/goldmark v1.2.1/go.mod h1:3hX8gzYuyVAZsxl0MRgGTJEmQBFcNTphYh9decYSb74= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc= @@ -176,7 +156,6 @@ go.uber.org/atomic v1.9.0/go.mod h1:fEN4uk6kAWBTFdckzkM89CLk9XfWZrxpCo0nPH17wJc= go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= -golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= @@ -186,19 +165,12 @@ golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqR golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= -golang.org/x/mod v0.3.0/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= golang.org/x/net v0.0.0-20180724234803-3673e40ba225/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20180826012351-8a410e7b638d/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= -golang.org/x/net v0.0.0-20180906233101-161cd47e91fd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4= golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= -golang.org/x/net v0.0.0-20190620200207-3b0461eec859/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s= -golang.org/x/net v0.0.0-20200520004742-59133d7f0dd7/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A= -golang.org/x/net v0.0.0-20201021035429-f5854403a974/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.0.0-20210428140749-89ef3d95e781/go.mod h1:OJAsFXCWl8Ukc7SiCT/9KSuxbyM7479/AVlXFRxuMCk= -golang.org/x/net v0.0.0-20210614182718-04defd469f4e/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= @@ -207,29 +179,16 @@ golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbht golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.0.0-20201020160332-67f06af15bc9/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= -golang.org/x/sys v0.0.0-20180909124046-d0be0721c37e/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20190904154756-749cb33beabd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191005200804-aed5e4c7ecf9/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20191120155948-bd437916bb0e/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210112080510-489259a85091/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.0.0-20210630005230-0f9fa26af87c/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.0.0-20211124211545-fe61309f8881/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= -golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= @@ -239,12 +198,7 @@ golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGm golang.org/x/tools v0.0.0-20190226205152-f727befe758c/go.mod h1:9Yl7xja0Znq3iFh3HoIrodX9oNMXvdceNzlUR8zjMvY= golang.org/x/tools v0.0.0-20190311212946-11955173bddd/go.mod h1:LCzVGOaR6xXOjkQ3onu1FJEFr0SW1gC7cKk1uF8kGRs= golang.org/x/tools v0.0.0-20190524140312-2c0ae7006135/go.mod h1:RgjU9mgBXZiqYHBnxXauZ1Gv1EHHAz9KjViQ78xBX0Q= -golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= -golang.org/x/tools v0.0.0-20201224043029-2b0845dc783e/go.mod h1:emZCQorbCU4vsT4fOWvOPXz4eW1wZW4PmDk9uLelYpA= -golang.org/x/xerrors v0.0.0-20190717185122-a985d3407aa7/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20191011141410-1b5146add898/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= -golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0= google.golang.org/api v0.203.0 h1:SrEeuwU3S11Wlscsn+LA1kb/Y5xT8uggJSkIhD08NAU= google.golang.org/api v0.203.0/go.mod h1:BuOVyCSYEPwJb3npWvDnNmFI92f3GeRnHNkETneT3SI= google.golang.org/appengine v1.1.0/go.mod h1:EbEs0AVv82hx2wNQdGPgUI5lhzA/G0D9YwlJXL52JkM= @@ -274,21 +228,13 @@ google.golang.org/protobuf v1.22.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2 google.golang.org/protobuf v1.23.0/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.23.1-0.20200526195155-81db48ad09cc/go.mod h1:EGpADcykh3NcUnDUJcl1+ZksZNG86OlYog2l/sGQquU= google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlbajtzgsN7c= -google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw= -google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc= google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= -gopkg.in/fsnotify.v1 v1.4.7/go.mod h1:Tz8NjZHkW78fSQdbUxIjBTcgA1z1m8ZHf0WmKUhAMys= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= -gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7/go.mod h1:dt/ZhP58zS4L8KSrWDmTeBkI65Dw0HsyUHuEVlX15mw= -gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.2.4/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.3.0/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= -gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/rag/neo4j.go b/internal/rag/neo4j.go index 2859fb1..6cd2f5d 100644 --- a/internal/rag/neo4j.go +++ b/internal/rag/neo4j.go @@ -1,39 +1,48 @@ package rag import ( + "bytes" "context" + "fmt" + "text/template" "github.com/3-shake/alert-menta/internal/ai" - "github.com/neo4j/neo4j-go-driver/v4/neo4j" + "github.com/neo4j/neo4j-go-driver/v5/neo4j" ) type Neo4jRetriever struct { - session *neo4j.Session - driver neo4j.Driver + session neo4j.SessionWithContext + driver neo4j.DriverWithContext + context context.Context fulltextIndex string vectorIndex string } func NewNeo4jRetriever(uri, username, password, fulltextIndex, vectorIndex string) (*Neo4jRetriever, error) { - driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, "")) + // driver, err := neo4j.NewDriver(uri, neo4j.BasicAuth(username, password, "")) + driver, err := neo4j.NewDriverWithContext(uri, neo4j.BasicAuth(username, password, "")) if err != nil { return nil, err } - session, err := driver.Session(neo4j.AccessModeRead) + // session, err := driver.Session(neo4j.AccessModeRead) + ctx := context.Background() + session := driver.NewSession(ctx, + neo4j.SessionConfig{AccessMode: neo4j.AccessModeRead}) if err != nil { return nil, err } return &Neo4jRetriever{ session: session, driver: driver, + context: ctx, fulltextIndex: fulltextIndex, vectorIndex: vectorIndex, }, nil } func (r *Neo4jRetriever) Close() { - r.session.Close() - r.driver.Close() + r.session.Close(r.context) + r.driver.Close(r.context) } func (r *Neo4jRetriever) Retrieve(emb ai.EmbeddingModel, query string, options Options) ([]Document, error) { @@ -55,13 +64,61 @@ func (r *Neo4jRetriever) retrieveFulltext(query string, options Options) ([]Docu return nil, nil } +func (r *Neo4jRetriever) StructuredRetriever(question string) (string, error) { + query := fmt.Sprintf("MATCH (n:Question) WHERE n.text = '%s' RETURN n", question) + _, err := r.session.Run(r.context, query, nil) + if err != nil { + return "", err + } + // return result.Single().GetByIndex(0).(neo4j.Node).Props()["answer"].(string), nil + return "", nil +} + +func (r *Neo4jRetriever) UnstructuredRetriever(question string) (string, error) { + // query := fmt.Sprintf("MATCH (n:Question) WHERE n.text = '%s' RETURN n", question) + return "", nil +} + +func (r *Neo4jRetriever) HybridSearch(embedding, query string) (string, error) { + cypherTemplate, _ := template.New("cypher").Parse(`CALL { + CALL db.index.vector.queryNodes("vector", {{.K}}, {{.Embedding}}) + YIELD node, score + WITH collect({node:node, score:score}) AS nodes, max(score) AS max + UNWIND nodes AS n + RETURN n.node AS node, (n.score / max) AS score + UNION + CALL db.index.fulltext.queryNodes("keyword", "{{.Query}}", {limit: {{.K}}}) + YIELD node, score + WITH collect({node:node, score:score}) AS nodes, max(score) AS max + UNWIND nodes AS n + RETURN n.node AS node, (n.score / max) AS score + } + WITH node, max(score) AS score ORDER BY score DESC LIMIT {{.K}} + RETURN node.{{.Content}} AS text, score`) + type Cypher struct { + K int + Embedding string + Query string + Content string + } + var buf bytes.Buffer + err := cypherTemplate.Execute(&buf, Cypher{5, embedding, query, "source"}) + if err != nil { + fmt.Println("Error:", err) + return "", err + } + result := buf.String() + fmt.Println(result) + return result, nil +} + func (r *Neo4jRetriever) runCypher(query string, params map[string]interface{}) ([]Document, error) { var documents []Document - result, err := r.session.Run(query, params) + result, err := r.session.Run(r.context, query, params) if err != nil { return nil, err } - for result.Next() { + for result.Next(r.context) { record := result.Record() documents = append(documents, Document{ Id: record.GetByIndex(0).(string), From 5de56dd20d257b39d5597ef9fd3daa4b5fb777e4 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Fri, 28 Feb 2025 20:56:40 +0900 Subject: [PATCH 03/18] Add func GetEmbedding --- internal/ai/openai.go | 17 ++++++++++++++++- internal/ai/vertexai.go | 4 ++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/internal/ai/openai.go b/internal/ai/openai.go index 1284daa..85f2872 100644 --- a/internal/ai/openai.go +++ b/internal/ai/openai.go @@ -71,5 +71,20 @@ func NewOpenAIClient(apiKey string, model string) *OpenAI { } func (ai *OpenAI) GetEmbedding(text string) ([]float32, error) { - return nil, nil + // Create a new OpenAI client + keyCredential := azcore.NewKeyCredential(ai.apiKey) + client, _ := azopenai.NewClientForOpenAI("https://api.openai.com/v1/", keyCredential, nil) + modelDeploymentID := "text-embedding-ada-002" + options := &azopenai.EmbeddingsOptions{ + Input: []string{text}, + DeploymentName: &modelDeploymentID, + } + response, err := client.GetEmbeddings(context.TODO(), *options, nil) + if err != nil { + fmt.Println(err) + return []float32{}, err + } + // fmt.Println(reflect.TypeOf(response)) + // fmt.Println(response.Embeddings.Data[0].Embedding) + return response.Embeddings.Data[0].Embedding, nil } diff --git a/internal/ai/vertexai.go b/internal/ai/vertexai.go index c8f886c..e1a1b37 100644 --- a/internal/ai/vertexai.go +++ b/internal/ai/vertexai.go @@ -62,3 +62,7 @@ func NewVertexAIClient(projectID, localtion, modelName string) (*VertexAI, error model: modelName, }, nil } + +func (ai *VertexAI) GetEmbedding(text string) ([]float32, error) { + return nil, nil +} From 386c1664f65d04e53bdbe8fbd67ae25552ba2bc8 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Fri, 28 Feb 2025 20:57:10 +0900 Subject: [PATCH 04/18] Neo4j GraphRAG prototype --- cmd/main.go | 85 +++++++++++++++++++++++++++++++++++++++++ internal/rag/neo4j.go | 89 ++++++++++++++++++++++++++++++++++++++----- internal/rag/rag.go | 7 +++- 3 files changed, 170 insertions(+), 11 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 010d151..f9e259c 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -10,6 +10,7 @@ import ( "github.com/3-shake/alert-menta/internal/ai" "github.com/3-shake/alert-menta/internal/github" + "github.com/3-shake/alert-menta/internal/rag" "github.com/3-shake/alert-menta/internal/utils" ) @@ -23,10 +24,20 @@ type Config struct { configFile string ghToken string oaiKey string + useRag bool +} + +type Neo4jConfig struct { + uri string + username string + password string + fulltextIndex string + vectorIndex string } func main() { cfg := &Config{} + neo4jcfg := &Neo4jConfig{} flag.StringVar(&cfg.repo, "repo", "", "Repository name") flag.StringVar(&cfg.owner, "owner", "", "Repository owner") flag.IntVar(&cfg.issueNumber, "issue", 0, "Issue number") @@ -35,6 +46,12 @@ func main() { flag.StringVar(&cfg.configFile, "config", "", "Configuration file") flag.StringVar(&cfg.ghToken, "github-token", "", "GitHub token") flag.StringVar(&cfg.oaiKey, "api-key", "", "OpenAI api key") + flag.BoolVar(&cfg.useRag, "use-rag", false, "Use RAG model for response generation") + flag.StringVar(&neo4jcfg.uri, "neo4j-uri", "", "Neo4j URI") + flag.StringVar(&neo4jcfg.username, "neo4j-username", "", "Neo4j username") + flag.StringVar(&neo4jcfg.password, "neo4j-password", "", "Neo4j password") + flag.StringVar(&neo4jcfg.fulltextIndex, "fulltext-index", "keyword", "Neo4j fulltext index(default: keyword)") + flag.StringVar(&neo4jcfg.vectorIndex, "vector-index", "vector", "Neo4j vector index(default: vector)") flag.Parse() if cfg.repo == "" || cfg.owner == "" || cfg.issueNumber == 0 || cfg.ghToken == "" || cfg.command == "" || cfg.configFile == "" { @@ -42,6 +59,25 @@ func main() { os.Exit(1) } + var retriever *rag.Neo4jRetriever + if cfg.useRag { + flag.Parse() + if neo4jcfg.uri == "" || neo4jcfg.username == "" || neo4jcfg.password == "" { + fmt.Println("if -useRag is set, neo4j-uri, neo4j-username, and neo4j-password are required") + fmt.Println("Usage: alert-menta -use-rag -neo4j-uri -neo4j-username -neo4j-password ") + fmt.Println("[-fulltext-index ] [-vector-index ]") + os.Exit(1) + } + r, err := getNeo4jRetriever(neo4jcfg, log.New(os.Stdout, "[alert-menta main] ", log.Ldate|log.Ltime|log.Llongfile|log.Lmsgprefix)) + retriever = r + retriever.TestConnection() + if err != nil { + log.Fatalf("Error getting Neo4j retriever: %v", err) + } + } + fmt.Println("Neo4j Retriever:", retriever) + // os.Exit(0) + logger := log.New( os.Stdout, "[alert-menta main] ", log.Ldate|log.Ltime|log.Llongfile|log.Lmsgprefix, @@ -74,6 +110,23 @@ func main() { logger.Fatalf("Error geting AI client: %v", err) } + emb, err := getEmbeddingClient(cfg.oaiKey, loadedcfg, logger) + if err != nil { + logger.Fatalf("Error geting AI client: %v", err) + } + fmt.Println("Prompt:", prompt.UserPrompt) + doc, err := retriever.Retrieve(emb, prompt.UserPrompt, rag.Options{}) + // doc, err := retriever.Retrieve(emb, "What is Memory Management?", rag.Options{}) + if err != nil { + fmt.Println("Error retrieving document:", err) + } + fmt.Println("Document:", doc) + for _, d := range doc { + prompt.UserPrompt += "\n" + d.String() + } + fmt.Println("Prompt:", prompt.UserPrompt) + // os.Exit(0) + comment, err := aic.GetResponse(prompt) if err != nil { logger.Fatalf("Error getting Response: %v", err) @@ -181,3 +234,35 @@ func getAIClient(oaiKey string, cfg *utils.Config, logger *log.Logger) (ai.Ai, e return nil, fmt.Errorf("Error: Invalid provider") } } + +// Initialize EmbeddingModel +func getEmbeddingClient(oaiKey string, cfg *utils.Config, logger *log.Logger) (ai.EmbeddingModel, error) { + switch cfg.Ai.Provider { + case "openai": + if oaiKey == "" { + return nil, fmt.Errorf("Error: Please provide your Open AI API key") + } + logger.Println("Using OpenAI API") + logger.Println("OpenAI model:", cfg.Ai.OpenAI.Model) + return ai.NewOpenAIClient(oaiKey, cfg.Ai.OpenAI.Model), nil + case "vertexai": + logger.Println("Using VertexAI API") + logger.Println("VertexAI model:", cfg.Ai.VertexAI.Model) + aic, err := ai.NewVertexAIClient(cfg.Ai.VertexAI.Project, cfg.Ai.VertexAI.Region, cfg.Ai.VertexAI.Model) + if err != nil { + return nil, fmt.Errorf("Error: new Vertex AI client: %w", err) + } + return aic, nil + default: + return nil, fmt.Errorf("Error: Invalid provider") + } +} + +// Initialize Neo4jRetriever +func getNeo4jRetriever(cfg *Neo4jConfig, logger *log.Logger) (*rag.Neo4jRetriever, error) { + r, err := rag.NewNeo4jRetriever(cfg.uri, cfg.username, cfg.password, cfg.fulltextIndex, cfg.vectorIndex) + if err != nil { + return nil, fmt.Errorf("Error: new Neo4jRetriever: %w", err) + } + return r, nil +} diff --git a/internal/rag/neo4j.go b/internal/rag/neo4j.go index 6cd2f5d..7f4cf24 100644 --- a/internal/rag/neo4j.go +++ b/internal/rag/neo4j.go @@ -4,6 +4,7 @@ import ( "bytes" "context" "fmt" + "strings" "text/template" "github.com/3-shake/alert-menta/internal/ai" @@ -16,6 +17,7 @@ type Neo4jRetriever struct { context context.Context fulltextIndex string vectorIndex string + contentProp string } func NewNeo4jRetriever(uri, username, password, fulltextIndex, vectorIndex string) (*Neo4jRetriever, error) { @@ -37,6 +39,7 @@ func NewNeo4jRetriever(uri, username, password, fulltextIndex, vectorIndex strin context: ctx, fulltextIndex: fulltextIndex, vectorIndex: vectorIndex, + contentProp: "text", }, nil } @@ -45,6 +48,23 @@ func (r *Neo4jRetriever) Close() { r.driver.Close(r.context) } +func (r *Neo4jRetriever) TestConnection() error { + result, err := r.session.Run(r.context, "MATCH (n) RETURN n LIMIT 1", nil) + if err != nil { + return err + } + for result.Next(r.context) { + record := result.Record() + // レコードの各フィールドを処理します。 + for i, value := range record.Values { + fmt.Printf("%s: %v\n", record.Keys[i], value) + } + fmt.Println("---") + } + fmt.Println(result) + return nil +} + func (r *Neo4jRetriever) Retrieve(emb ai.EmbeddingModel, query string, options Options) ([]Document, error) { // var documents []Document embedding, err := emb.GetEmbedding(query) @@ -52,12 +72,29 @@ func (r *Neo4jRetriever) Retrieve(emb ai.EmbeddingModel, query string, options O return nil, err } results, err := r.retrieveHybrid(embedding, query, options) + if err != nil { + return nil, err + } return results, nil } func (r *Neo4jRetriever) retrieveHybrid(embedding []float32, query string, options Options) ([]Document, error) { - return nil, nil + // []float32 to string + embeddingStr := fmt.Sprintf("%v", embedding) + embeddingStr = strings.ReplaceAll(embeddingStr, " ", ", ") + cypher, err := r.HybridSearch(embeddingStr, query) + if err != nil { + fmt.Errorf("Error: %v", err) + return nil, err + } + + documents, err := r.runCypher(cypher, nil) + if err != nil { + fmt.Errorf("Error: %v", err) + return nil, err + } + return documents, nil } func (r *Neo4jRetriever) retrieveFulltext(query string, options Options) ([]Document, error) { @@ -94,7 +131,7 @@ func (r *Neo4jRetriever) HybridSearch(embedding, query string) (string, error) { RETURN n.node AS node, (n.score / max) AS score } WITH node, max(score) AS score ORDER BY score DESC LIMIT {{.K}} - RETURN node.{{.Content}} AS text, score`) + RETURN node.source AS id, node.{{.Content}} AS text, score`) type Cypher struct { K int Embedding string @@ -102,29 +139,63 @@ func (r *Neo4jRetriever) HybridSearch(embedding, query string) (string, error) { Content string } var buf bytes.Buffer - err := cypherTemplate.Execute(&buf, Cypher{5, embedding, query, "source"}) + err := cypherTemplate.Execute(&buf, Cypher{5, embedding, r.sanitizeQuery(query), r.contentProp}) if err != nil { fmt.Println("Error:", err) return "", err } - result := buf.String() - fmt.Println(result) - return result, nil + cypher := buf.String() + fmt.Println(cypher) + return cypher, nil } func (r *Neo4jRetriever) runCypher(query string, params map[string]interface{}) ([]Document, error) { var documents []Document result, err := r.session.Run(r.context, query, params) if err != nil { + fmt.Println("Error:", err) return nil, err } for result.Next(r.context) { record := result.Record() + // レコードの各フィールドを処理します。 + for i, value := range record.Values { + fmt.Printf("%s: %v\n", record.Keys[i], value) + } + fmt.Println("---") + id, _, _ := neo4j.GetRecordValue[string](record, "id") + content, _, _ := neo4j.GetRecordValue[string](record, "text") + score, _, _ := neo4j.GetRecordValue[float64](record, "score") documents = append(documents, Document{ - Id: record.GetByIndex(0).(string), - Content: record.GetByIndex(1).(string), - Score: record.GetByIndex(2).(float64), + Id: id, + Content: content, + Score: score, }) } + fmt.Println(len(documents)) + if len(documents) == 0 { + fmt.Println("Error:", "No results found") + return nil, fmt.Errorf("No results found") + } return documents, nil } + +func (r *Neo4jRetriever) sanitizeQuery(query string) string { + // Escaping special characters so that they can be interpreted as cypher + newQuery := strings.ReplaceAll(query, "\"", "\\\"") + newQuery = strings.ReplaceAll(newQuery, "'", "\\'") + newQuery = strings.ReplaceAll(newQuery, "\n", "\\n") + newQuery = strings.ReplaceAll(newQuery, "\r", "\\r") + newQuery = strings.ReplaceAll(newQuery, "$", "\\$") + newQuery = strings.ReplaceAll(newQuery, ":", "\\:") + newQuery = strings.ReplaceAll(newQuery, "/", "\\/") + newQuery = strings.ReplaceAll(newQuery, "[", "\\[") + newQuery = strings.ReplaceAll(newQuery, "]", "\\]") + newQuery = strings.ReplaceAll(newQuery, "(", "\\(") + newQuery = strings.ReplaceAll(newQuery, ")", "\\)") + newQuery = strings.ReplaceAll(newQuery, "{", "\\{") + newQuery = strings.ReplaceAll(newQuery, "}", "\\}") + newQuery = strings.ReplaceAll(newQuery, "~", "\\~") + newQuery = strings.ReplaceAll(newQuery, "^", "\\^") + return newQuery +} diff --git a/internal/rag/rag.go b/internal/rag/rag.go index 65a82cc..18061cc 100644 --- a/internal/rag/rag.go +++ b/internal/rag/rag.go @@ -1,8 +1,6 @@ package rag import ( - "context" - "github.com/3-shake/alert-menta/internal/ai" ) @@ -22,3 +20,8 @@ type Document struct { Content string Score float64 } + +func (d Document) String() string { + str := "id: " + d.Id + ", content: " + d.Content + return str +} From 721f0de9f48955b8d44853f5824507d1355ffdc9 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Thu, 6 Mar 2025 21:20:58 +0900 Subject: [PATCH 05/18] similar issues prototype --- go.mod | 9 +- go.sum | 10 ++ internal/rag/similar_issue.go | 270 ++++++++++++++++++++++++++++++++++ 3 files changed, 288 insertions(+), 1 deletion(-) create mode 100644 internal/rag/similar_issue.go diff --git a/go.mod b/go.mod index 89217e5..de27c3e 100644 --- a/go.mod +++ b/go.mod @@ -8,8 +8,10 @@ require ( github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 github.com/google/go-github v17.0.0+incompatible github.com/neo4j/neo4j-go-driver/v5 v5.27.0 + github.com/pinecone-io/go-pinecone/v3 v3.0.0 github.com/spf13/viper v1.19.0 golang.org/x/oauth2 v0.24.0 + google.golang.org/protobuf v1.35.1 ) require ( @@ -21,6 +23,8 @@ require ( cloud.google.com/go/iam v1.2.1 // indirect cloud.google.com/go/longrunning v0.6.1 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect + github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect + github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-logr/logr v1.4.2 // indirect @@ -28,18 +32,22 @@ require ( github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/s2a-go v0.1.8 // indirect + github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect + github.com/oapi-codegen/runtime v1.1.1 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect + github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.6.0 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/stretchr/testify v1.9.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect @@ -61,7 +69,6 @@ require ( google.golang.org/genproto/googleapis/api v0.0.0-20241007155032-5fefd90f89a9 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 // indirect google.golang.org/grpc v1.67.1 // indirect - google.golang.org/protobuf v1.35.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index e3a3d99..35f4d63 100644 --- a/go.sum +++ b/go.sum @@ -26,6 +26,10 @@ github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkY github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU= github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= +github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= +github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= +github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= @@ -90,6 +94,7 @@ github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDP github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= @@ -102,8 +107,12 @@ github.com/mitchellh/mapstructure v1.5.0 h1:jeMsZIYE/09sWLaz43PL7Gy6RuMjD2eJVyua github.com/mitchellh/mapstructure v1.5.0/go.mod h1:bFUtVrKA4DC2yAKiSyO/QUcy7e+RRV2QTWOzhPopBRo= github.com/neo4j/neo4j-go-driver/v5 v5.27.0 h1:YdsIxDjAQbjlP/4Ha9B/gF8Y39UdgdTwCyihSxy8qTw= github.com/neo4j/neo4j-go-driver/v5 v5.27.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= +github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= +github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= +github.com/pinecone-io/go-pinecone/v3 v3.0.0 h1:RykZu34ejebysPR6i1BH+o8BNPcZYObd9vC4JaSNy0g= +github.com/pinecone-io/go-pinecone/v3 v3.0.0/go.mod h1:i+I3G/SAx07ky/3PwCjqG41EcTPP6lT+8fEDZDnnHMs= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= @@ -126,6 +135,7 @@ github.com/spf13/pflag v1.0.5 h1:iy+VFUOCP1a+8yFto/drg2CJ5u0yRoB7fZw3DKv/JXA= github.com/spf13/pflag v1.0.5/go.mod h1:McXfInJRrz4CZXVZOBLb0bTZqETkiAhM9Iw0y3An2Bg= github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI= github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg= +github.com/spkg/bom v0.0.0-20160624110644-59b7046e48ad/go.mod h1:qLr4V1qq6nMqFKkMo8ZTx3f+BZEkzsRUY10Xsm2mwU0= github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME= github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= diff --git a/internal/rag/similar_issue.go b/internal/rag/similar_issue.go new file mode 100644 index 0000000..f700808 --- /dev/null +++ b/internal/rag/similar_issue.go @@ -0,0 +1,270 @@ +package rag + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + + // "github.com/joho/godotenv" + "github.com/3-shake/alert-menta/internal/ai" + "github.com/3-shake/alert-menta/internal/github" + // gogithub "github.com/google/go-github/github" + "github.com/pinecone-io/go-pinecone/v3/pinecone" + "google.golang.org/protobuf/types/known/structpb" +) + +type PineconeClient struct { + context context.Context + pc *pinecone.Client + indexName string +} + +type Issue struct { + Id string + Url string + Content string + Title string + State string + // Source string +} + +func prettifyStruct(obj interface{}) string { + bytes, _ := json.MarshalIndent(obj, "", " ") + return string(bytes) +} + +func NewPineconeClient(indexName string) *PineconeClient { + ctx := context.Background() + + pc, err := pinecone.NewClient(pinecone.NewClientParams{ + ApiKey: os.Getenv("PINECONE_API_KEY"), + }) + + if err != nil { + log.Fatalf("Failed to create Client: %v", err) + } + return &PineconeClient{context: ctx, pc: pc, indexName: indexName} +} + +func (pc *PineconeClient) TestUpsert(metadataMap map[string]interface{}, vector []float32) { + indexName := "similar-issues" + // Add to the main function: + + idxModel, err := pc.pc.DescribeIndex(pc.context, indexName) + if err != nil { + log.Fatalf("Failed to describe index \"%v\": %v", indexName, err) + } + + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + if err != nil { + log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) + } + metadata, err := structpb.NewStruct(metadataMap) + if err != nil { + log.Fatalf("Failed to create metadata map: %v", err) + } + pcVector := []*pinecone.Vector{ + { + Id: "vec2", + Values: &vector, + Metadata: metadata, + }, + } + + count, err := idxConnection.UpsertVectors(pc.context, pcVector) + if err != nil { + log.Fatalf("Failed to upsert vectors: %v", err) + } else { + log.Printf("Successfully upserted %d vector(s)!\n", count) + } +} + +func (pc *PineconeClient) convertStructtoMap(issue Issue) map[string]interface{} { + return map[string]interface{}{ + "id": issue.Id, + "content": issue.Content, + "title": issue.Title, + "url": issue.Url, + "state": issue.State, + } +} + +func (pc *PineconeClient) RetrieveIssue(vector []float32) string { + idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) + } + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + if err != nil { + log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) + } + res, err := idxConnection.QueryByVectorValues(pc.context, &pinecone.QueryByVectorValuesRequest{ + Vector: vector, + TopK: 3, + IncludeValues: false, + IncludeMetadata: true, + }) + if err != nil { + log.Fatalf("Error encountered when querying by vector: %v", err) + } else { + log.Printf(prettifyStruct(res)) + } + text := "## Other issues similar to this one are: \n" + text += fmt.Sprintf("1. [%s #%s (%s)](%s)\n", res.Matches[0].Vector.Metadata.GetFields()["title"].GetStringValue(), res.Matches[0].Vector.Metadata.GetFields()["id"].GetStringValue(), res.Matches[0].Vector.Metadata.GetFields()["state"].GetStringValue(), res.Matches[0].Vector.Metadata.GetFields()["url"].GetStringValue()) + text += fmt.Sprintf("1. [%s #%s (%s)](%s)\n", res.Matches[1].Vector.Metadata.GetFields()["title"].GetStringValue(), res.Matches[1].Vector.Metadata.GetFields()["id"].GetStringValue(), res.Matches[1].Vector.Metadata.GetFields()["state"].GetStringValue(), res.Matches[1].Vector.Metadata.GetFields()["url"].GetStringValue()) + text += fmt.Sprintf("3. [%s #%s (%s)](%s)\n", res.Matches[2].Vector.Metadata.GetFields()["title"].GetStringValue(), res.Matches[2].Vector.Metadata.GetFields()["id"].GetStringValue(), res.Matches[2].Vector.Metadata.GetFields()["state"].GetStringValue(), res.Matches[2].Vector.Metadata.GetFields()["url"].GetStringValue()) + return text +} + +func (pc *PineconeClient) UpsertIssuesWithStruct(issues []Issue, vectors [][]float32) error { + idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) + } + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + if err != nil { + log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) + } + pcVectors := make([]*pinecone.Vector, len(issues)) + for i, issue := range issues { + metadataMap := pc.convertStructtoMap(issue) + metadata, err := structpb.NewStruct(metadataMap) + if err != nil { + log.Fatalf("Failed to create metadata map: %v", err) + } + pcVectors[i] = &pinecone.Vector{ + Id: issue.Id, + Values: &vectors[i], + Metadata: metadata, + } + } + count, err := idxConnection.UpsertVectors(pc.context, pcVectors) + if err != nil { + log.Fatalf("Failed to upsert vectors: %v", err) + return err + } else { + log.Printf("Successfully upserted %d vector(s)!\n", count) + } + return nil +} + +func (pc *PineconeClient) UpsertIssue(id string, metadataMap map[string]interface{}, vector []float32) error { + idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) + } + + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + if err != nil { + log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) + } + metadata, err := structpb.NewStruct(metadataMap) + if err != nil { + log.Fatalf("Failed to create metadata map: %v", err) + } + pcVector := []*pinecone.Vector{ + { + Id: id, + Values: &vector, + Metadata: metadata, + }, + } + + count, err := idxConnection.UpsertVectors(pc.context, pcVector) + if err != nil { + log.Fatalf("Failed to upsert vectors: %v", err) + return err + } else { + log.Printf("Successfully upserted %d vector(s)!\n", count) + } + return nil +} + +func (pc *PineconeClient) UpsertIssueWithStruct(issue Issue, vector []float32) error { + metadataMap := pc.convertStructtoMap(issue) + pc.UpsertIssue(issue.Id, metadataMap, vector) + return nil +} + +func (pc *PineconeClient) DeleteIndex() { + err := pc.pc.DeleteIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to delete index \"%v\": %v", pc.indexName, err) + } +} + +// Query the index +// func (pc *PineconeClient) GetSpecifiedData(id string) Issue { +func (pc *PineconeClient) GetSpecifiedData(id string) { + // Add to the main function: + + idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) + } + + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + if err != nil { + log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) + } + + // metadataFilter, err := structpb.NewStruct(metadataMap) + // if err != nil { + // log.Fatalf("Failed to create metadata map: %v", err) + // } + + res, err := idxConnection.QueryByVectorId(pc.context, &pinecone.QueryByVectorIdRequest{ + VectorId: id, + TopK: 1, + IncludeValues: true, + IncludeMetadata: true, + }) + + if err != nil { + log.Fatalf("Error encountered when querying by vector: %v", err) + } else { + log.Printf(prettifyStruct(res)) + } + log.Println(res.Matches[0].Vector.Metadata.GetFields()["question"].GetStringValue()) + // return Issue{id: res.Matches["vector"][0]["metadata"], content: res.Matches["vector"][0]["content"], source: res.Matches["vector"][0]["source"]} +} + +func (pc *PineconeClient) CreateIssueDB(issues []*github.GitHubIssue, embedding ai.EmbeddingModel) error { + // github.GetAllIssues("pacificbelt30", "actios_tester", os.Getenv("GITHUB_TOKEN")) + structIssues := make([]Issue, len(issues)) + var vectors [][]float32 + for i, issue := range issues { + gissue, _ := issue.GetIssue() + body, _ := issue.GetBody() + if body == nil { + body = new(string) + } + + comments, _ := issue.GetComments() + content := *body + "\n" + "Comments: " + for _, comment := range comments { + content += *comment.User.Login + ":" + *comment.Body + "\n" + } + + structIssues[i] = Issue{ + Id: fmt.Sprintf("%d", *gissue.Number), + Url: *gissue.HTMLURL, + Content: content, + Title: *gissue.Title, + State: *gissue.State, + } + vector, err := embedding.GetEmbedding("Title:" + *gissue.Title + "Body:" + content) + if err != nil { + log.Fatalf("Error getting embedding: %v", err) + } + vectors = append(vectors, vector) + } + err := pc.UpsertIssuesWithStruct(structIssues, vectors) + if err != nil { + log.Fatalf("Error upserting issues: %v", err) + return err + } + return nil +} From b28a3b9a0c3a3bedb5cbecc8f9d389a38d23b731 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Fri, 7 Mar 2025 16:35:50 +0900 Subject: [PATCH 06/18] Add func to get all issues from GitHub repository --- internal/github/github.go | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/internal/github/github.go b/internal/github/github.go index ac67f8c..9bce469 100644 --- a/internal/github/github.go +++ b/internal/github/github.go @@ -87,3 +87,35 @@ func NewIssue(owner string, repo string, issueNumber int, token string) *GitHubI issue := &GitHubIssue{owner: owner, repo: repo, issueNumber: issueNumber, token: token, client: client, ctx: ctx, logger: logger} return issue } + +func GetAllIssues(owner, repo, token string) []*GitHubIssue { + // Create GitHub client with OAuth2 token + ctx := context.Background() + ts := oauth2.StaticTokenSource( + &oauth2.Token{AccessToken: token}, + ) + tc := oauth2.NewClient(ctx, ts) + client := github.NewClient(tc) + + opt := &github.IssueListByRepoOptions{ + State: "all", // "open", "closed", "all" (デフォルトは "open") + ListOptions: github.ListOptions{PerPage: 100}, // 1ページあたりのIssue数(最大100) + } + + var allIssues []*GitHubIssue + for { + issues, resp, err := client.Issues.ListByRepo(ctx, owner, repo, opt) + if err != nil { + log.Fatal(err) + } + for _, issue := range issues { + allIssues = append(allIssues, NewIssue(owner, repo, *issue.Number, token)) + } + // allIssues = append(allIssues, issues...) + if resp.NextPage == 0 { + break // 次のページがなければ終了 + } + opt.ListOptions.Page = resp.NextPage // 次のページの番号をセット + } + return allIssues +} From 38da3409127c0edd45e4efed8aa7eedca4bf1234 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Fri, 7 Mar 2025 19:55:06 +0900 Subject: [PATCH 07/18] Add Naive RAG prototype using Pinecone DB --- go.mod | 36 ++++-- go.sum | 107 +++++++++++++---- internal/rag/neo4j.go | 2 + internal/rag/pinecone.go | 209 ++++++++++++++++++++++++++++++++++ internal/rag/rag.go | 5 +- internal/rag/similar_issue.go | 59 ++-------- internal/utils/git.go | 193 +++++++++++++++++++++++++++++++ 7 files changed, 531 insertions(+), 80 deletions(-) create mode 100644 internal/rag/pinecone.go create mode 100644 internal/utils/git.go diff --git a/go.mod b/go.mod index de27c3e..35f2d87 100644 --- a/go.mod +++ b/go.mod @@ -1,11 +1,14 @@ module github.com/3-shake/alert-menta -go 1.22.2 +go 1.23.0 + +toolchain go1.23.3 require ( cloud.google.com/go/vertexai v0.13.2 github.com/Azure/azure-sdk-for-go/sdk/ai/azopenai v0.7.1 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 + github.com/go-git/go-git/v5 v5.14.0 github.com/google/go-github v17.0.0+incompatible github.com/neo4j/neo4j-go-driver/v5 v5.27.0 github.com/pinecone-io/go-pinecone/v3 v3.0.0 @@ -22,33 +25,47 @@ require ( cloud.google.com/go/compute/metadata v0.5.2 // indirect cloud.google.com/go/iam v1.2.1 // indirect cloud.google.com/go/longrunning v0.6.1 // indirect + dario.cat/mergo v1.0.0 // indirect github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0 // indirect + github.com/Microsoft/go-winio v0.6.2 // indirect + github.com/ProtonMail/go-crypto v1.1.5 // indirect github.com/apapsch/go-jsonmerge/v2 v2.0.0 // indirect + github.com/cloudflare/circl v1.6.0 // indirect + github.com/cyphar/filepath-securejoin v0.4.1 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/emirpasic/gods v1.18.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect + github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect + github.com/go-git/go-billy/v5 v5.6.2 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect - github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect + github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect github.com/google/go-querystring v1.1.0 // indirect github.com/google/s2a-go v0.1.8 // indirect github.com/google/uuid v1.6.0 // indirect github.com/googleapis/enterprise-certificate-proxy v0.3.4 // indirect github.com/googleapis/gax-go/v2 v2.13.0 // indirect github.com/hashicorp/hcl v1.0.0 // indirect + github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 // indirect + github.com/kevinburke/ssh_config v1.2.0 // indirect github.com/magiconair/properties v1.8.7 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/oapi-codegen/runtime v1.1.1 // indirect github.com/pelletier/go-toml/v2 v2.2.2 // indirect + github.com/pjbgf/sha1cd v0.3.2 // indirect github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect + github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 // indirect + github.com/skeema/knownhosts v1.3.1 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.6.0 // indirect github.com/spf13/pflag v1.0.5 // indirect - github.com/stretchr/testify v1.9.0 // indirect + github.com/stretchr/testify v1.10.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect + github.com/xanzy/ssh-agent v0.3.3 // indirect go.opencensus.io v0.24.0 // indirect go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 // indirect go.opentelemetry.io/contrib/instrumentation/net/http/otelhttp v0.54.0 // indirect @@ -57,12 +74,12 @@ require ( go.opentelemetry.io/otel/trace v1.29.0 // indirect go.uber.org/atomic v1.9.0 // indirect go.uber.org/multierr v1.9.0 // indirect - golang.org/x/crypto v0.28.0 // indirect - golang.org/x/exp v0.0.0-20230905200255-921286631fa9 // indirect - golang.org/x/net v0.30.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/text v0.19.0 // indirect + golang.org/x/crypto v0.35.0 // indirect + golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 // indirect + golang.org/x/net v0.35.0 // indirect + golang.org/x/sync v0.11.0 // indirect + golang.org/x/sys v0.30.0 // indirect + golang.org/x/text v0.22.0 // indirect golang.org/x/time v0.7.0 // indirect google.golang.org/api v0.203.0 // indirect google.golang.org/genproto v0.0.0-20241015192408-796eee8c2d53 // indirect @@ -70,5 +87,6 @@ require ( google.golang.org/genproto/googleapis/rpc v0.0.0-20241015192408-796eee8c2d53 // indirect google.golang.org/grpc v1.67.1 // indirect gopkg.in/ini.v1 v1.67.0 // indirect + gopkg.in/warnings.v0 v0.1.2 // indirect gopkg.in/yaml.v3 v3.0.1 // indirect ) diff --git a/go.sum b/go.sum index 35f4d63..7b58861 100644 --- a/go.sum +++ b/go.sum @@ -15,6 +15,8 @@ cloud.google.com/go/longrunning v0.6.1 h1:lOLTFxYpr8hcRtcwWir5ITh1PAKUD/sG2lKrTS cloud.google.com/go/longrunning v0.6.1/go.mod h1:nHISoOZpBcmlwbJmiVk5oDRz0qG/ZxPynEGs1iZ79s0= cloud.google.com/go/vertexai v0.13.2 h1:dOnvkMDZy3GdKAz8Isd2d6KV3jQpk6CKvYao1SIupuk= cloud.google.com/go/vertexai v0.13.2/go.mod h1:+nmz1z8AeYILA5QM2yii3CED1PqGknZH1CUNDVatIg4= +dario.cat/mergo v1.0.0 h1:AGCNq9Evsj31mOgNPcLyXc+4PNABt905YmuqPYYpBWk= +dario.cat/mergo v1.0.0/go.mod h1:uNxQE+84aUszobStD9th8a29P2fMDhsBdgRYvZOxGmk= github.com/Azure/azure-sdk-for-go/sdk/ai/azopenai v0.7.1 h1:6njivKrpo02SQ3CsaGKIFh0c5ZhQyzjVhBmLIl84h4Q= github.com/Azure/azure-sdk-for-go/sdk/ai/azopenai v0.7.1/go.mod h1:W+7E7pJtvdzscy/I4tqL5C0/weLsa32wyTbHbPdkkv0= github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 h1:JZg6HRh6W6U4OLl6lk7BZ7BLisIzM9dG1R50zUk9C/M= @@ -26,17 +28,34 @@ github.com/Azure/azure-sdk-for-go/sdk/internal v1.10.0/go.mod h1:iZDifYGJTIgIIkY github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2 h1:XHOnouVk1mxXfQidrMEnLlPk9UMeRtyBTnEFtxkV0kU= github.com/AzureAD/microsoft-authentication-library-for-go v1.2.2/go.mod h1:wP83P5OoQ5p6ip3ScPr0BAq0BvuPAvacpEuSzyouqAI= github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU= +github.com/Microsoft/go-winio v0.5.2/go.mod h1:WpS1mjBmmwHBEWmogvA2mj8546UReBk4v8QkMxJ6pZY= +github.com/Microsoft/go-winio v0.6.2 h1:F2VQgta7ecxGYO8k3ZZz3RS8fVIXVxONVUPlNERoyfY= +github.com/Microsoft/go-winio v0.6.2/go.mod h1:yd8OoFMLzJbo9gZq8j5qaps8bJ9aShtEA8Ipt1oGCvU= +github.com/ProtonMail/go-crypto v1.1.5 h1:eoAQfK2dwL+tFSFpr7TbOaPNUbPiJj4fLYwwGE1FQO4= +github.com/ProtonMail/go-crypto v1.1.5/go.mod h1:rA3QumHc/FZ8pAHreoekgiAbzpNsfQAosU5td4SnOrE= github.com/RaveNoX/go-jsoncommentstrip v1.0.0/go.mod h1:78ihd09MekBnJnxpICcwzCMzGrKSKYe4AqU6PDYYpjk= +github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be h1:9AeTilPcZAjCFIImctFaOjnTIavg87rW78vTPkQqLI8= +github.com/anmitsu/go-shlex v0.0.0-20200514113438-38f4b401e2be/go.mod h1:ySMOLuWl6zY27l47sB3qLNK6tF2fkHG55UZxx8oIVo4= github.com/apapsch/go-jsonmerge/v2 v2.0.0 h1:axGnT1gRIfimI7gJifB699GoE/oq+F2MU7Dml6nw9rQ= github.com/apapsch/go-jsonmerge/v2 v2.0.0/go.mod h1:lvDnEdqiQrp0O42VQGgmlKpxL1AP2+08jFMw88y4klk= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5 h1:0CwZNZbxp69SHPdPJAN/hZIm0C4OItdklCFmMRWYpio= +github.com/armon/go-socks5 v0.0.0-20160902184237-e75332964ef5/go.mod h1:wHh0iHkYZB8zMSxRWpUBQtwG5a7fFgvEO+odwuTv2gs= github.com/bmatcuk/doublestar v1.1.1/go.mod h1:UD6OnuiIn0yFxxA2le/rnRU1G4RaI4UvFv1sNto9p6w= github.com/census-instrumentation/opencensus-proto v0.2.1/go.mod h1:f6KPmirojxKA12rnyqOA5BBL4O983OfeGPqjHWSTneU= github.com/client9/misspell v0.3.4/go.mod h1:qj6jICC3Q7zFZvVWo7KLAzC3yx5G7kyvSDkc90ppPyw= +github.com/cloudflare/circl v1.6.0 h1:cr5JKic4HI+LkINy2lg3W2jF8sHCVTBncJr5gIIq7qk= +github.com/cloudflare/circl v1.6.0/go.mod h1:uddAzsPgqdMAYatqJ0lsjX1oECcQLIlRpzZh3pJrofs= github.com/cncf/udpa/go v0.0.0-20191209042840-269d4d468f6f/go.mod h1:M8M6+tZqaGXZJjfX53e64911xZQV5JYwmTeXPW+k8Sc= +github.com/cyphar/filepath-securejoin v0.4.1 h1:JyxxyPEaktOD+GAnqIqTf9A8tHyAG22rowi7HkoSU1s= +github.com/cyphar/filepath-securejoin v0.4.1/go.mod h1:Sdj7gXlvMcPZsbhwhQ33GguGLDGQL7h7bg04C/+u9jI= github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= +github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= +github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= +github.com/emirpasic/gods v1.18.1/go.mod h1:8tpGGwCnJ5H4r6BWwaV6OrWmMoPhUl5jm/FMNAnJvWQ= github.com/envoyproxy/go-control-plane v0.9.0/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.1-0.20191026205805-5f8ba28d4473/go.mod h1:YTl/9mNaCwkRvm6d1a2C3ymFceY/DCBVvsKhRF0iEA4= github.com/envoyproxy/go-control-plane v0.9.4/go.mod h1:6rpuAdCZL397s3pYoYcLgu1mIlRU8Am5FuJP05cCM98= @@ -47,6 +66,16 @@ github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHk github.com/frankban/quicktest v1.14.6/go.mod h1:4ptaffx2x8+WTWXmUCuVU6aPUX1/Mz7zb5vbUoiM6w0= github.com/fsnotify/fsnotify v1.7.0 h1:8JEhPFa5W2WU7YfeZzPNqzMP6Lwt7L2715Ggo0nosvA= github.com/fsnotify/fsnotify v1.7.0/go.mod h1:40Bi/Hjc2AVfZrqy+aj+yEI+/bRxZnMJyTJwOpGvigM= +github.com/gliderlabs/ssh v0.3.8 h1:a4YXD1V7xMF9g5nTkdfnja3Sxy1PVDCj1Zg4Wb8vY6c= +github.com/gliderlabs/ssh v0.3.8/go.mod h1:xYoytBv1sV0aL3CavoDuJIQNURXkkfPA/wxQ1pL1fAU= +github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 h1:+zs/tPmkDkHx3U66DAb0lQFJrpS6731Oaa12ikc+DiI= +github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376/go.mod h1:an3vInlBmSxCcxctByoQdvwPiA7DTK7jaaFDBTtu0ic= +github.com/go-git/go-billy/v5 v5.6.2 h1:6Q86EsPXMa7c3YZ3aLAQsMA0VlWmy43r6FHqa/UNbRM= +github.com/go-git/go-billy/v5 v5.6.2/go.mod h1:rcFC2rAsp/erv7CMz9GczHcuD0D32fWzH+MJAU+jaUU= +github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399 h1:eMje31YglSBqCdIqdhKBW8lokaMrL3uTkpGYlE2OOT4= +github.com/go-git/go-git-fixtures/v4 v4.3.2-0.20231010084843-55a94097c399/go.mod h1:1OCfN199q1Jm3HZlxleg+Dw/mwps2Wbk9frAWm+4FII= +github.com/go-git/go-git/v5 v5.14.0 h1:/MD3lCrGjCen5WfEAzKg00MJJffKhC8gzS80ycmCi60= +github.com/go-git/go-git/v5 v5.14.0/go.mod h1:Z5Xhoia5PcWA3NF8vRLURn9E5FRhSl7dGj9ItW3Wk5k= github.com/go-logr/logr v1.2.2/go.mod h1:jdQByPbusPIv2/zmleS9BjJVeZ6kBagPoEUsqbVz/1A= github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY= github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY= @@ -56,8 +85,8 @@ github.com/golang-jwt/jwt/v5 v5.2.1 h1:OuVbFODueb089Lh128TAcimifWaLhJwVflnrgM17w github.com/golang-jwt/jwt/v5 v5.2.1/go.mod h1:pqrtFR0X4osieyHYxtmOUWsAWrfe1Q5UVIyoH402zdk= github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q= github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE= -github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc= +github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 h1:f+oWsMOmNPc8JmEHVZIycC7hBoQxHH9pNKQORJNozsQ= +github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8/go.mod h1:wcDNUvekVysuuOpQKo3191zZyTpiI6se1N1ULghS0sw= github.com/golang/mock v1.1.1/go.mod h1:oTYuIxOrZwtPieC+H1uAHpcLFnEyAGVDL/k47Jfbm0A= github.com/golang/protobuf v1.2.0/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= github.com/golang/protobuf v1.3.2/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U= @@ -77,8 +106,8 @@ github.com/google/go-cmp v0.4.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/ github.com/google/go-cmp v0.5.0/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.2/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= github.com/google/go-cmp v0.5.3/go.mod h1:v8dTdLbMG2kIc/vJvl+f65V22dbkXbowE6jgT/gNBxE= -github.com/google/go-cmp v0.6.0 h1:ofyhxvXcZhMsU5ulbFiLKl/XBFqE1GSq7atu8tAmTRI= -github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeNGIjoY= +github.com/google/go-cmp v0.7.0 h1:wk8382ETsv4JYUZwIsn6YpYiWiBsYLSJiTsyBybVuN8= +github.com/google/go-cmp v0.7.0/go.mod h1:pXiqmnSA92OHEEa9HXL2W4E7lf9JzCmGVUdgjX3N/iU= github.com/google/go-github v17.0.0+incompatible h1:N0LgJ1j65A7kfXrZnUDaYCs/Sf4rEjNlfyDHW9dolSY= github.com/google/go-github v17.0.0+incompatible/go.mod h1:zLgOLi98H3fifZn+44m+umXrS52loVEgC2AApnigrVQ= github.com/google/go-querystring v1.1.0 h1:AnCroh3fv4ZBgVIf1Iwtovgjaw/GiKJo8M8yD/fhyJ8= @@ -94,9 +123,16 @@ github.com/googleapis/gax-go/v2 v2.13.0 h1:yitjD5f7jQHhyDsnhKEBU52NdvvdSeGzlAnDP github.com/googleapis/gax-go/v2 v2.13.0/go.mod h1:Z/fvTZXF8/uw7Xu5GuslPw+bplx6SS338j1Is2S+B7A= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= +github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99 h1:BQSFePA1RWJOlocH6Fxy8MmwDt+yVQYULKfN0RoTN8A= +github.com/jbenet/go-context v0.0.0-20150711004518-d14ea06fba99/go.mod h1:1lJo3i6rXxKeerYnT8Nvf0QmHCRC1n8sfWVwXF2Frvo= github.com/juju/gnuflag v0.0.0-20171113085948-2ce1bb71843d/go.mod h1:2PavIy+JPciBPrBUjwbNvtwB6RQlve+hkpll6QSNmOE= +github.com/kevinburke/ssh_config v1.2.0 h1:x584FjTGwHzMwvHx18PXxbBVzfnxogHaAReU4gf13a4= +github.com/kevinburke/ssh_config v1.2.0/go.mod h1:CT57kijsi8u/K/BOFA39wgDQJ9CxiF4nAY/ojJ6r6mM= +github.com/kr/pretty v0.1.0/go.mod h1:dAy3ld7l9f0ibDNOQOHHMYYIIbhfbHSm3C4ZsoJORNo= github.com/kr/pretty v0.3.1 h1:flRD4NNwYAUpkphVc1HcthR4KEIFJ65n8Mw5qdRn3LE= github.com/kr/pretty v0.3.1/go.mod h1:hoEshYVHaxMs3cyo3Yncou5ZscifuDolrwPKZanG3xk= +github.com/kr/pty v1.1.1/go.mod h1:pFQYn66WHrOpPYNljwOMqo10TkYh1fy3cYio2l3bCsQ= +github.com/kr/text v0.1.0/go.mod h1:4Jbv+DJW3UT/LiOwJeYQe1efqtUx/iVham/4vfdArNI= github.com/kr/text v0.2.0 h1:5Nx0Ya0ZqY2ygV366QzturHI13Jq95ApcVaJBhpS+AY= github.com/kr/text v0.2.0/go.mod h1:eLer722TekiGuMkidMxC/pM04lWEeraHUUmBw8l2grE= github.com/kylelemons/godebug v1.1.0 h1:RPNrshWIDI6G2gRW9EHilWtl7Z6Sb1BR0xunSBf0SNc= @@ -109,22 +145,33 @@ github.com/neo4j/neo4j-go-driver/v5 v5.27.0 h1:YdsIxDjAQbjlP/4Ha9B/gF8Y39UdgdTwC github.com/neo4j/neo4j-go-driver/v5 v5.27.0/go.mod h1:Vff8OwT7QpLm7L2yYr85XNWe9Rbqlbeb9asNXJTHO4k= github.com/oapi-codegen/runtime v1.1.1 h1:EXLHh0DXIJnWhdRPN2w4MXAzFyE4CskzhNLUmtpMYro= github.com/oapi-codegen/runtime v1.1.1/go.mod h1:SK9X900oXmPWilYR5/WKPzt3Kqxn/uS/+lbpREv+eCg= +github.com/onsi/gomega v1.34.1 h1:EUMJIKUjM8sKjYbtxQI9A4z2o+rruxnzNvpknOXie6k= +github.com/onsi/gomega v1.34.1/go.mod h1:kU1QgUvBDLXBJq618Xvm2LUX6rSAfRaFRTcdOeDLwwY= github.com/pelletier/go-toml/v2 v2.2.2 h1:aYUidT7k73Pcl9nb2gScu7NSrKCSHIDE89b3+6Wq+LM= github.com/pelletier/go-toml/v2 v2.2.2/go.mod h1:1t835xjRzz80PqgE6HHgN2JOsmgYu/h4qDAS4n929Rs= github.com/pinecone-io/go-pinecone/v3 v3.0.0 h1:RykZu34ejebysPR6i1BH+o8BNPcZYObd9vC4JaSNy0g= github.com/pinecone-io/go-pinecone/v3 v3.0.0/go.mod h1:i+I3G/SAx07ky/3PwCjqG41EcTPP6lT+8fEDZDnnHMs= +github.com/pjbgf/sha1cd v0.3.2 h1:a9wb0bp1oC2TGwStyn0Umc/IGKQnEgF0vVaZ8QF8eo4= +github.com/pjbgf/sha1cd v0.3.2/go.mod h1:zQWigSxVmsHEZow5qaLtPYxpcKMMQpa09ixqBxuCS6A= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmdv1U2eRNDiU2ErMBj1gwrq8eQ= github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= +github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= +github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/prometheus/client_model v0.0.0-20190812154241-14fe0d1b01d4/go.mod h1:xMI15A0UPsDsEKsMN9yxemIoYk6Tm2C1GtYGdfGttqA= -github.com/rogpeppe/go-internal v1.12.0 h1:exVL4IDcn6na9z1rAb56Vxr+CgyK3nn3O+epU5NdKM8= -github.com/rogpeppe/go-internal v1.12.0/go.mod h1:E+RYuTGaKKdloAfM02xzb0FW3Paa99yedzYV+kq4uf4= +github.com/rogpeppe/go-internal v1.14.1 h1:UQB4HGPB6osV0SQTLymcB4TgvyWu6ZyliaW0tI/otEQ= +github.com/rogpeppe/go-internal v1.14.1/go.mod h1:MaRKkUm5W0goXpeCfT7UZI6fk/L7L7so1lCWt35ZSgc= github.com/sagikazarmark/locafero v0.4.0 h1:HApY1R9zGo4DBgr7dqsTH/JJxLTTsOt7u6keLGt6kNQ= github.com/sagikazarmark/locafero v0.4.0/go.mod h1:Pe1W6UlPYUk/+wc/6KFhbORCfqzgYEpgQ3O5fPuL3H4= github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6gto+ugjYE= github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= +github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3 h1:n661drycOFuPLCN3Uc8sB6B/s6Z4t2xvBgU1htSHuq8= +github.com/sergi/go-diff v1.3.2-0.20230802210424-5b0b94c5c0d3/go.mod h1:A0bzQcvG0E7Rwjx0REVgAGH58e96+X0MeOfepqsbeW4= +github.com/sirupsen/logrus v1.7.0/go.mod h1:yWOB1SBYBC5VeMP7gHvWumXLIWorT60ONWic61uBYv0= +github.com/skeema/knownhosts v1.3.1 h1:X2osQ+RAjK76shCbvhHHHVl3ZlgDm8apHEHFqRjnBY8= +github.com/skeema/knownhosts v1.3.1/go.mod h1:r7KTdC8l4uxWRyK2TpQZ/1o5HaSzh06ePQNxPwTcfiY= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= @@ -140,15 +187,20 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/objx v0.4.0/go.mod h1:YvHI0jy2hoMjB+UWwv71VJQ9isScKT/TqJzVSSt89Yw= github.com/stretchr/objx v0.5.0/go.mod h1:Yh+to48EsGEfYuaHDzXPcE3xhTkx73EhmCGUpEOglKo= github.com/stretchr/objx v0.5.2/go.mod h1:FRsXN1f5AsAjCGJKqEizvkpNtU+EGNCLh3NxZ/8L+MA= +github.com/stretchr/testify v1.2.2/go.mod h1:a8OnRcib4nhh0OaRAV+Yts87kKdq0PP7pXfy6kDkUVs= github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= +github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.1/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= github.com/stretchr/testify v1.8.0/go.mod h1:yNjHg4UonilssWZ8iaSj1OCr/vHnekPRkoO+kdMU+MU= github.com/stretchr/testify v1.8.1/go.mod h1:w2LPCIKwWwSfY2zedu0+kehJoqGctiVI29o6fzry7u4= github.com/stretchr/testify v1.8.4/go.mod h1:sz/lmYIOXD/1dqDmKjjqLyZ2RngseejIcXlSw2iwfAo= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= +github.com/xanzy/ssh-agent v0.3.3 h1:+/15pJfg/RsTxqYcX6fHqOXZwwMP+2VyYWJeWM2qQFM= +github.com/xanzy/ssh-agent v0.3.3/go.mod h1:6dzNDKs0J9rVPHPhaGCukekBHKqfl+L3KghI1Bc68Uw= go.opencensus.io v0.24.0 h1:y73uSU6J157QMP2kn2r30vwW1A2W2WFwSCGnAVxeaD0= go.opencensus.io v0.24.0/go.mod h1:vNK8G9p7aAivkbmorf4v+7Hgx+Zs0yY+0fOtgBfjQKo= go.opentelemetry.io/contrib/instrumentation/google.golang.org/grpc/otelgrpc v0.54.0 h1:r6I7RJCN86bpD/FQwedZ0vSixDpwuWREjW9oRMsmqDc= @@ -167,11 +219,12 @@ go.uber.org/multierr v1.9.0 h1:7fIwc/ZtS0q++VgcfqFDxSBZVv/Xo49/SYnDFupUwlI= go.uber.org/multierr v1.9.0/go.mod h1:X2jQV1h+kxSjClGpnseKVIxpmcjrj7MNnI0bnlfKTVQ= golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.0.0-20220622213112-05595931fe9d/go.mod h1:IxCIyHEi3zRg3s0A5j5BB6A9Jmi73HwBIUl50j+osU4= +golang.org/x/crypto v0.35.0 h1:b15kiHdrGCHrP6LvwaQ3c03kgNhhiMgvlhxHQhmg2Xs= +golang.org/x/crypto v0.35.0/go.mod h1:dy7dXNW32cAb/6/PRuTNsix8T+vJAqvuIy5Bli/x0YQ= golang.org/x/exp v0.0.0-20190121172915-509febef88a4/go.mod h1:CJ0aWSM057203Lf6IL+f9T1iT9GByDxfZKAQTCR3kQA= -golang.org/x/exp v0.0.0-20230905200255-921286631fa9 h1:GoHiUyI/Tp2nVkLI2mCxVkOjsbSXD66ic0XW0js0R9g= -golang.org/x/exp v0.0.0-20230905200255-921286631fa9/go.mod h1:S2oDrQGGwySpoQPVqRShND87VCbxmc6bL1Yd2oYrm6k= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56 h1:2dVuKD2vS7b0QIHQbpyTISPd0LeHDbnYEryqj5Q1ug8= +golang.org/x/exp v0.0.0-20240719175910-8a7402abbf56/go.mod h1:M4RDyNAINzryxdtnbRXRL/OHtkFuWGRjvuhBJpk2IlY= golang.org/x/lint v0.0.0-20181026193005-c67002cb31c3/go.mod h1:UVdnD1Gm6xHRNCYTkRU2/jEulfH38KcIWyp/GAMgvoE= golang.org/x/lint v0.0.0-20190227174305-5b3e6a55c961/go.mod h1:wehouNa3lNwaWXcvxsM5YxQ5yQlVC4a0KAMCusXpPoU= golang.org/x/lint v0.0.0-20190313153728-d0100b6bd8b3/go.mod h1:6SW0HCj/g11FgYtHlgUYUwCkIfeOF89ocIRzGO/8vkc= @@ -181,26 +234,37 @@ golang.org/x/net v0.0.0-20190213061140-3a22650c66bd/go.mod h1:mL1N/T3taQHkDXs73r golang.org/x/net v0.0.0-20190311183353-d8887717615a/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg= golang.org/x/net v0.0.0-20201110031124-69a78807bb2b/go.mod h1:sp8m0HH+o8qH0wwXwYZr8TS3Oi6o0r6Gce1SSxlDquU= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.0.0-20211112202133-69e39bad7dc2/go.mod h1:9nx3DQGgdP8bBQD5qxJ1jj9UTztislL4KSBs9R2vV5Y= +golang.org/x/net v0.35.0 h1:T5GQRQb2y08kTAByq9L4/bz8cipCdA8FbRTXewonqY8= +golang.org/x/net v0.35.0/go.mod h1:EglIi67kWsHKlRzzVMUD93VMSWGFOMSZgxFjparz1Qk= golang.org/x/oauth2 v0.0.0-20180821212333-d2e6202438be/go.mod h1:N/0e6XlmueqKjAGxoOufVs8QHGRruUQn6yWY3a++T0U= golang.org/x/oauth2 v0.24.0 h1:KTBBxWqUa0ykRPLtV69rRto9TLXcqYkeswu48x/gvNE= golang.org/x/oauth2 v0.24.0/go.mod h1:XYTD2NtWslqkgxebSiOHnXEap4TF09sJSc7H1sXbhtI= golang.org/x/sync v0.0.0-20180314180146-1d60e4601c6f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20181108010431-42b317875d0f/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.11.0 h1:GGz8+XQP4FvTTrjZPzNKTMFtSXH80RAzG+5ghFPgK9w= +golang.org/x/sync v0.11.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20180830151530-49385e6e1522/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20191026070338-33540a1f6037/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20200930185726-fdedc70b468f/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210124154548-22da62e12c0c/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210423082822-04245dca01da/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= +golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.0.0-20220715151400-c0bba94af5f8/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.30.0 h1:QjkSwP/36a20jFYWkSue1YwXzLmsV5Gfq7Eiy72C1uc= +golang.org/x/sys v0.30.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= +golang.org/x/term v0.29.0 h1:L6pJp37ocefwRRtYPKSWOWzOtWSxVajvz2ldH/xi3iU= +golang.org/x/term v0.29.0/go.mod h1:6bl4lRlvVuDgSf3179VpIxBF0o10JUpXWOnI7nErv7s= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.3.6/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= +golang.org/x/text v0.22.0 h1:bofq7m3/HAFvbF51jz3Q9wLg3jkvSPuiZu/pD1XwgtM= +golang.org/x/text v0.22.0/go.mod h1:YRoo4H8PVmsu+E3Ou7cqLVH8oXWIHVoX0jqUWALQhfY= golang.org/x/time v0.7.0 h1:ntUhktv3OPE6TgYxXWv9vKvUSJyIFJlyohwbkEwPrKQ= golang.org/x/time v0.7.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= @@ -241,10 +305,15 @@ google.golang.org/protobuf v1.25.0/go.mod h1:9JNX74DMeImyA3h4bdi1ymwjUzf21/xIlba google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA= google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE= gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= +gopkg.in/check.v1 v1.0.0-20190902080502-41f04d3bba15/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk= gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q= gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA= gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k= +gopkg.in/warnings.v0 v0.1.2 h1:wFXVbFY8DY5/xOe1ECiWdKCzZlxgshcYVNkBHstARME= +gopkg.in/warnings.v0 v0.1.2/go.mod h1:jksf8JmL6Qr/oQM2OXTHunEvvTAsrWBLb6OOjuVWRNI= +gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI= +gopkg.in/yaml.v2 v2.4.0/go.mod h1:RDklbk79AGWmwhnvt/jBztapEOGDOx6ZbXqjP6csGnQ= gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA= gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM= diff --git a/internal/rag/neo4j.go b/internal/rag/neo4j.go index 7f4cf24..48e7504 100644 --- a/internal/rag/neo4j.go +++ b/internal/rag/neo4j.go @@ -170,6 +170,8 @@ func (r *Neo4jRetriever) runCypher(query string, params map[string]interface{}) Id: id, Content: content, Score: score, + URL: "", // Temporary placeholder for URL + Branch: "", // Temporary placeholder for Branch }) } fmt.Println(len(documents)) diff --git a/internal/rag/pinecone.go b/internal/rag/pinecone.go new file mode 100644 index 0000000..83af4f8 --- /dev/null +++ b/internal/rag/pinecone.go @@ -0,0 +1,209 @@ +package rag + +import ( + "context" + "encoding/json" + "fmt" + "log" + "os" + "path/filepath" + + "github.com/3-shake/alert-menta/internal/ai" + "github.com/3-shake/alert-menta/internal/utils" + "github.com/pinecone-io/go-pinecone/v3/pinecone" + "google.golang.org/protobuf/types/known/structpb" +) + +func prettifyStruct(obj interface{}) string { + bytes, _ := json.MarshalIndent(obj, "", " ") + return string(bytes) +} + +type PineconeClient struct { + context context.Context + pc *pinecone.Client + indexName string +} + +func NewPineconeClient(indexName string) *PineconeClient { + ctx := context.Background() + + pc, err := pinecone.NewClient(pinecone.NewClientParams{ + ApiKey: os.Getenv("PINECONE_API_KEY"), + }) + + if err != nil { + log.Fatalf("Failed to create Client: %v", err) + } + return &PineconeClient{context: ctx, pc: pc, indexName: indexName} +} + +func ConvertPathtoDocument(owner, repo string, path utils.Path, root string) (*Document, error) { + contentBytes, err := os.ReadFile(filepath.Join(root, path.FilePath)) + if err != nil { + log.Fatalf("Failed to read file \"%v\": %v", path.FilePath, err) + return nil, err + } + content := string(contentBytes) + + if len(content) == 0 { + // log.Fatalf("File \"%v\" is empty", path) + return nil, fmt.Errorf("File \"%v\" is empty", path) + } + + return &Document{ + Id: path.Branch + "@" + path.FilePath, + Content: content, + Branch: path.Branch, + URL: fmt.Sprintf("https://github.com/%v/%v/blob/%v/%v", owner, repo, path.Branch, path.FilePath), + Score: 0, + }, nil +} + +// func (pc *PineconeClient) Retrieve(embedding ai.EmbeddingModel, options Options) ([]Document, error) { +func (pc *PineconeClient) RetrieveByVector(vector []float32, options Options) ([]Document, error) { + var docs []Document + idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) + } + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "codebase"}) + if err != nil { + log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) + } + res, err := idxConnection.QueryByVectorValues(pc.context, &pinecone.QueryByVectorValuesRequest{ + Vector: vector, + TopK: 3, + IncludeValues: false, + IncludeMetadata: true, + }) + if err != nil { + log.Fatalf("Error encountered when querying by vector: %v", err) + } else { + log.Printf(prettifyStruct(res)) + } + for _, match := range res.Matches { + doc := Document{ + Id: match.Vector.Metadata.GetFields()["id"].GetStringValue(), + Content: match.Vector.Metadata.GetFields()["content"].GetStringValue(), + Branch: match.Vector.Metadata.GetFields()["branch"].GetStringValue(), + URL: match.Vector.Metadata.GetFields()["url"].GetStringValue(), + Score: 0, + } + docs = append(docs, doc) + } + return docs, nil +} + +func (pc *PineconeClient) convertIssueStructtoMap(issue Issue) map[string]interface{} { + return map[string]interface{}{ + "id": issue.Id, + "content": issue.Content, + "title": issue.Title, + "url": issue.Url, + "state": issue.State, + } +} + +func (pc *PineconeClient) convertDocumentStructtoMap(doc Document) map[string]interface{} { + return map[string]interface{}{ + "id": doc.Id, + "content": doc.Content, + "branch": doc.Branch, + "url": doc.URL, + "score": doc.Score, + } +} + +func (pc *PineconeClient) DeleteIndex() { + err := pc.pc.DeleteIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to delete index \"%v\": %v", pc.indexName, err) + } +} + +func (pc *PineconeClient) DeleteRecords(ids []string) error { + for _, id := range ids { + err := pc.DeleteRecord(id) + if err != nil { + log.Fatalf("Failed to delete record with id %v: %v", id, err) + return err + } + } + return nil +} + +func (pc *PineconeClient) DeleteRecord(id string) error { + nameSpace := "codebase" + idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) + return err + } + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: nameSpace}) + if err != nil { + log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) + return err + } + err = idxConnection.DeleteVectorsById(pc.context, []string{id}) + if err != nil { + log.Fatalf("Failed to delete vectors: %v", err) + return err + } else { + log.Printf("Successfully deleted vector with id %v!\n", id) + } + return nil +} + +func (pc *PineconeClient) CreateCodebaseDB(docs []Document, embedding ai.EmbeddingModel) error { + var vectors [][]float32 + for _, doc := range docs { + // 1536 is the default embedding size for the Universal Sentence Encoder + vector, err := embedding.GetEmbedding(doc.Content) + // vector := make([]float32, 1536) + // var err error + if err != nil { + log.Fatalf("Error getting embedding: %v", err) + } + vectors = append(vectors, vector) + } + err := pc.UpsertWithStruct(docs, vectors) + if err != nil { + log.Fatalf("Error upserting docs: %v", err) + return err + } + return nil +} + +func (pc *PineconeClient) UpsertWithStruct(docs []Document, vectors [][]float32) error { + nameSpace := "codebase" + idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) + } + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: nameSpace}) + if err != nil { + log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) + } + pcVectors := make([]*pinecone.Vector, len(docs)) + for i, doc := range docs { + metadataMap := pc.convertDocumentStructtoMap(doc) + metadata, err := structpb.NewStruct(metadataMap) + if err != nil { + log.Fatalf("Failed to create metadata map: %v", err) + } + pcVectors[i] = &pinecone.Vector{ + Id: doc.Id, + Values: &vectors[i], + Metadata: metadata, + } + } + count, err := idxConnection.UpsertVectors(pc.context, pcVectors) + if err != nil { + log.Fatalf("Failed to upsert vectors: %v", err) + return err + } else { + log.Printf("Successfully upserted %d vector(s)!\n", count) + } + return nil +} diff --git a/internal/rag/rag.go b/internal/rag/rag.go index 18061cc..3e311a5 100644 --- a/internal/rag/rag.go +++ b/internal/rag/rag.go @@ -6,7 +6,8 @@ import ( type Retriever interface { // Retrieve(ctx context.Context, query string, options ...Option) ([]Document, error) - Retrieve(embedding ai.EmbeddingModel, options Options) ([]Document, error) + Retrieve(query string, embedding ai.EmbeddingModel, options Options) ([]Document, error) + RetrieveByVector(vector []float32, options Options) ([]Document, error) } type Options struct { @@ -18,6 +19,8 @@ type Options struct { type Document struct { Id string Content string + Branch string + URL string Score float64 } diff --git a/internal/rag/similar_issue.go b/internal/rag/similar_issue.go index f700808..647f13d 100644 --- a/internal/rag/similar_issue.go +++ b/internal/rag/similar_issue.go @@ -1,11 +1,8 @@ package rag import ( - "context" - "encoding/json" "fmt" "log" - "os" // "github.com/joho/godotenv" "github.com/3-shake/alert-menta/internal/ai" @@ -15,12 +12,6 @@ import ( "google.golang.org/protobuf/types/known/structpb" ) -type PineconeClient struct { - context context.Context - pc *pinecone.Client - indexName string -} - type Issue struct { Id string Url string @@ -30,24 +21,6 @@ type Issue struct { // Source string } -func prettifyStruct(obj interface{}) string { - bytes, _ := json.MarshalIndent(obj, "", " ") - return string(bytes) -} - -func NewPineconeClient(indexName string) *PineconeClient { - ctx := context.Background() - - pc, err := pinecone.NewClient(pinecone.NewClientParams{ - ApiKey: os.Getenv("PINECONE_API_KEY"), - }) - - if err != nil { - log.Fatalf("Failed to create Client: %v", err) - } - return &PineconeClient{context: ctx, pc: pc, indexName: indexName} -} - func (pc *PineconeClient) TestUpsert(metadataMap map[string]interface{}, vector []float32) { indexName := "similar-issues" // Add to the main function: @@ -57,7 +30,7 @@ func (pc *PineconeClient) TestUpsert(metadataMap map[string]interface{}, vector log.Fatalf("Failed to describe index \"%v\": %v", indexName, err) } - idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "issues"}) if err != nil { log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) } @@ -81,22 +54,12 @@ func (pc *PineconeClient) TestUpsert(metadataMap map[string]interface{}, vector } } -func (pc *PineconeClient) convertStructtoMap(issue Issue) map[string]interface{} { - return map[string]interface{}{ - "id": issue.Id, - "content": issue.Content, - "title": issue.Title, - "url": issue.Url, - "state": issue.State, - } -} - func (pc *PineconeClient) RetrieveIssue(vector []float32) string { idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) if err != nil { log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) } - idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "issues"}) if err != nil { log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) } @@ -119,17 +82,18 @@ func (pc *PineconeClient) RetrieveIssue(vector []float32) string { } func (pc *PineconeClient) UpsertIssuesWithStruct(issues []Issue, vectors [][]float32) error { + nameSpace := "issues" idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) if err != nil { log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) } - idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: nameSpace}) if err != nil { log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) } pcVectors := make([]*pinecone.Vector, len(issues)) for i, issue := range issues { - metadataMap := pc.convertStructtoMap(issue) + metadataMap := pc.convertIssueStructtoMap(issue) metadata, err := structpb.NewStruct(metadataMap) if err != nil { log.Fatalf("Failed to create metadata map: %v", err) @@ -156,7 +120,7 @@ func (pc *PineconeClient) UpsertIssue(id string, metadataMap map[string]interfac log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) } - idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "issues"}) if err != nil { log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) } @@ -183,18 +147,11 @@ func (pc *PineconeClient) UpsertIssue(id string, metadataMap map[string]interfac } func (pc *PineconeClient) UpsertIssueWithStruct(issue Issue, vector []float32) error { - metadataMap := pc.convertStructtoMap(issue) + metadataMap := pc.convertIssueStructtoMap(issue) pc.UpsertIssue(issue.Id, metadataMap, vector) return nil } -func (pc *PineconeClient) DeleteIndex() { - err := pc.pc.DeleteIndex(pc.context, pc.indexName) - if err != nil { - log.Fatalf("Failed to delete index \"%v\": %v", pc.indexName, err) - } -} - // Query the index // func (pc *PineconeClient) GetSpecifiedData(id string) Issue { func (pc *PineconeClient) GetSpecifiedData(id string) { @@ -205,7 +162,7 @@ func (pc *PineconeClient) GetSpecifiedData(id string) { log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) } - idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "bug"}) + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "issues"}) if err != nil { log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) } diff --git a/internal/utils/git.go b/internal/utils/git.go new file mode 100644 index 0000000..8185eb6 --- /dev/null +++ b/internal/utils/git.go @@ -0,0 +1,193 @@ +package utils + +import ( + "fmt" + // "io/ioutil" + "io/fs" + "os" + "path/filepath" + "slices" + "strings" + + "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/plumbing" + "github.com/go-git/go-git/v5/plumbing/object" + // "github.com/go-git/go-git/v5/storage/memory" +) + +// Path 構造体: ファイルパスとブランチ名を保持 +type Path struct { + FilePath string + Branch string +} + +func GetAllFilesFromAllBranches(repoPath string, ignoreList, branchList []string) ([]Path, error) { + + var allPaths []Path + + if ignoreList == nil { + ignoreList = []string{} + } + if branchList == nil { + branchList = []string{} + } + + repo, err := git.PlainOpen(repoPath) + if err != nil { + return nil, fmt.Errorf("failed to open repository: %w", err) + } + + ignoreMap := make(map[string]bool) + for _, item := range ignoreList { + ignoreMap[item] = true + } + + branchRefs, err := repo.Branches() + if err != nil { + return nil, fmt.Errorf("failed to get branches: %w", err) + } + + err = branchRefs.ForEach(func(branchRef *plumbing.Reference) error { + branchName := branchRef.Name().Short() + + if !slices.Contains(branchList, branchName) { + return nil + } + + commit, err := repo.CommitObject(branchRef.Hash()) + if err != nil { + return fmt.Errorf("failed to get commit for branch %s: %w", branchName, err) + } + + tree, err := commit.Tree() + if err != nil { + return fmt.Errorf("failed to get tree for branch %s: %w", branchName, err) + } + + tree.Files().ForEach(func(file *object.File) error { + + baseName := filepath.Base(file.Name) + if _, ok := ignoreMap[baseName]; ok { + return nil + } + + for _, ignoreItem := range ignoreList { + matched, _ := filepath.Match(ignoreItem, file.Name) + if matched { + return nil + } + } + + allPaths = append(allPaths, Path{ + FilePath: file.Name, + Branch: branchName, + }) + return nil + }) + return nil + }) + + if err != nil { + return nil, err + } + + return allPaths, nil +} + +func Tree(path string, level int) error { + // prefix を計算 (インデント用) + prefix := strings.Repeat("│ ", level) + + // ファイル/ディレクトリ情報を取得 + files, err := os.ReadDir(path) // os.ReadDir がより新しいGoのバージョンでは推奨 + // files, err := ioutil.ReadDir(path) // os.ReadDir がより新しいGoのバージョンでは推奨 + if err != nil { + return err + } + + // 各エントリを処理 + for i, file := range files { + isLast := i == len(files)-1 // 最後のエントリかどうかを判定 + + // コネクタの文字列を決定 + connector := "├── " + if isLast { + connector = "└── " + } + + // ファイル/ディレクトリ名を表示 + fmt.Printf("%s%s%s\n", prefix, connector, file.Name()) + + // ディレクトリの場合は再帰的に tree を呼び出す + if file.IsDir() { + newPath := filepath.Join(path, file.Name()) + if err := Tree(newPath, level+1); err != nil { + return err + } + } else { // ファイルの場合は内容を出力 + filePath := filepath.Join(path, file.Name()) + content, err := os.ReadFile(filePath) + if err != nil { + fmt.Printf("%s│ %sError reading file: %v\n", prefix, strings.Repeat(" ", level), err) + continue //エラーでも続行 + } + + // 内容をprefix付きで出力 + lines := strings.Split(string(content), "\n") + for _, line := range lines { + fmt.Printf("%s│ %s\n", prefix, line) + } + } + } + return nil +} + +func CollectFiles(rootPath string, ignoreList []string) ([]string, error) { + var filePaths []string + + if ignoreList == nil { + ignoreList = []string{} + } + + absRootPath, err := filepath.Abs(rootPath) + if err != nil { + return nil, fmt.Errorf("error converting to absolute path: %w", err) + } + + // transform ignoreList to map for faster search + ignoreMap := make(map[string]bool) + for _, item := range ignoreList { + ignoreMap[item] = true + } + + err = filepath.WalkDir(absRootPath, func(path string, d fs.DirEntry, err error) error { + if err != nil { + return err + } + + if _, ok := ignoreMap[d.Name()]; ok { + if d.IsDir() { + return filepath.SkipDir + } + return nil + } + + if d.IsDir() { + return nil + } + + relPath, err := filepath.Rel(absRootPath, path) + if err != nil { + return fmt.Errorf("error getting relative path: %w", err) + } + + filePaths = append(filePaths, relPath) + return nil + }) + + if err != nil { + return nil, fmt.Errorf("error walking directory: %w", err) + } + + return filePaths, nil +} From aefb704baf7c88c916c1d5142c7f2fe6880e0384 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Mon, 10 Mar 2025 18:24:02 +0900 Subject: [PATCH 08/18] Feature Pass GitHub file structure for efficient RAG --- internal/ai/openai.go | 4 +- internal/github/github.go | 39 ++++++++++++++-- internal/rag/pinecone.go | 96 +++++++++++++++++++++++++++++++++++++-- internal/rag/rag.go | 6 +-- 4 files changed, 130 insertions(+), 15 deletions(-) diff --git a/internal/ai/openai.go b/internal/ai/openai.go index 85f2872..55cceaf 100644 --- a/internal/ai/openai.go +++ b/internal/ai/openai.go @@ -74,7 +74,7 @@ func (ai *OpenAI) GetEmbedding(text string) ([]float32, error) { // Create a new OpenAI client keyCredential := azcore.NewKeyCredential(ai.apiKey) client, _ := azopenai.NewClientForOpenAI("https://api.openai.com/v1/", keyCredential, nil) - modelDeploymentID := "text-embedding-ada-002" + modelDeploymentID := "text-embedding-3-small" options := &azopenai.EmbeddingsOptions{ Input: []string{text}, DeploymentName: &modelDeploymentID, @@ -84,7 +84,5 @@ func (ai *OpenAI) GetEmbedding(text string) ([]float32, error) { fmt.Println(err) return []float32{}, err } - // fmt.Println(reflect.TypeOf(response)) - // fmt.Println(response.Embeddings.Data[0].Embedding) return response.Embeddings.Data[0].Embedding, nil } diff --git a/internal/github/github.go b/internal/github/github.go index 9bce469..bce5d47 100644 --- a/internal/github/github.go +++ b/internal/github/github.go @@ -2,8 +2,11 @@ package github import ( "context" + "fmt" "log" + "net/http" "os" + "strings" "github.com/google/go-github/github" "golang.org/x/oauth2" @@ -68,6 +71,34 @@ func (gh *GitHubIssue) PostComment(commentBody string) error { return err } +func (gh *GitHubIssue) GetDefaultBranch() (string, error) { + repoInfo, _, err := gh.client.Repositories.Get(gh.ctx, gh.owner, gh.repo) + return repoInfo.GetDefaultBranch(), err +} + +// ListFiles lists all files in a specific branch of a GitHub repository. +func (gh *GitHubIssue) ListFiles(branch string) ([]string, error) { + tree, response, err := gh.client.Git.GetTree(gh.ctx, gh.owner, gh.repo, branch, true) + if err != nil { + if response != nil && response.StatusCode == http.StatusForbidden { + if strings.Contains(response.Header.Get("Retry-After"), "s") { + return nil, fmt.Errorf("rate limited by GitHub API, recommended to use personal access token: %w", err) + } + + } + return nil, fmt.Errorf("failed to get tree for branch %s: %w", branch, err) + } + + var fileNames []string + for _, entry := range tree.Entries { + if entry.GetType() == "blob" { + fileNames = append(fileNames, entry.GetPath()) + } + } + + return fileNames, nil +} + func NewIssue(owner string, repo string, issueNumber int, token string) *GitHubIssue { // Create GitHub client with OAuth2 token ctx := context.Background() @@ -98,8 +129,8 @@ func GetAllIssues(owner, repo, token string) []*GitHubIssue { client := github.NewClient(tc) opt := &github.IssueListByRepoOptions{ - State: "all", // "open", "closed", "all" (デフォルトは "open") - ListOptions: github.ListOptions{PerPage: 100}, // 1ページあたりのIssue数(最大100) + State: "all", + ListOptions: github.ListOptions{PerPage: 100}, } var allIssues []*GitHubIssue @@ -113,9 +144,9 @@ func GetAllIssues(owner, repo, token string) []*GitHubIssue { } // allIssues = append(allIssues, issues...) if resp.NextPage == 0 { - break // 次のページがなければ終了 + break } - opt.ListOptions.Page = resp.NextPage // 次のページの番号をセット + opt.ListOptions.Page = resp.NextPage } return allIssues } diff --git a/internal/rag/pinecone.go b/internal/rag/pinecone.go index 83af4f8..fdbee50 100644 --- a/internal/rag/pinecone.go +++ b/internal/rag/pinecone.go @@ -7,6 +7,8 @@ import ( "log" "os" "path/filepath" + "strings" + "time" "github.com/3-shake/alert-menta/internal/ai" "github.com/3-shake/alert-menta/internal/utils" @@ -25,6 +27,12 @@ type PineconeClient struct { indexName string } +func GetIndexName(owner, repo string) string { + indexName := owner + "-" + strings.ToLower(repo) + indexName = strings.ReplaceAll(indexName, "_", "-") + return indexName +} + func NewPineconeClient(indexName string) *PineconeClient { ctx := context.Background() @@ -35,7 +43,12 @@ func NewPineconeClient(indexName string) *PineconeClient { if err != nil { log.Fatalf("Failed to create Client: %v", err) } - return &PineconeClient{context: ctx, pc: pc, indexName: indexName} + pcClient := &PineconeClient{context: ctx, pc: pc, indexName: indexName} + err = pcClient.createIndex() + if err != nil { + log.Fatalf("Failed to create index \"%v\": %v", indexName, err) + } + return pcClient } func ConvertPathtoDocument(owner, repo string, path utils.Path, root string) (*Document, error) { @@ -60,20 +73,60 @@ func ConvertPathtoDocument(owner, repo string, path utils.Path, root string) (*D }, nil } -// func (pc *PineconeClient) Retrieve(embedding ai.EmbeddingModel, options Options) ([]Document, error) { +func (pc *PineconeClient) createIndex() error { + _, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + if err == nil { + return nil + } + metric := pinecone.Cosine + dimension := int32(1536) + + _, err = pc.pc.CreateServerlessIndex(pc.context, &pinecone.CreateServerlessIndexRequest{ + Name: pc.indexName, + Cloud: pinecone.Aws, + Region: "us-east-1", + Metric: &metric, + Dimension: &dimension, + Tags: &pinecone.IndexTags{"environment": "development"}, + }) + if err != nil { + return err + } + return nil +} + +func (pc *PineconeClient) Retrieve(query string, embedding ai.EmbeddingModel, options Options) ([]Document, error) { + emb, err := embedding.GetEmbedding(query) + if err != nil { + return nil, err + } + return pc.RetrieveByVector(emb, options) +} + func (pc *PineconeClient) RetrieveByVector(vector []float32, options Options) ([]Document, error) { var docs []Document idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) if err != nil { log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) } + + if state, err := pc.waitUntilIndexReady(); state { + return nil, err + } + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "codebase"}) if err != nil { log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) } + + topK := options.topK + if topK == 0 { + topK = 3 // Default topK value + } + res, err := idxConnection.QueryByVectorValues(pc.context, &pinecone.QueryByVectorValuesRequest{ Vector: vector, - TopK: 3, + TopK: topK, IncludeValues: false, IncludeMetadata: true, }) @@ -140,6 +193,11 @@ func (pc *PineconeClient) DeleteRecord(id string) error { log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) return err } + + if state, err := pc.waitUntilIndexReady(); state { + return err + } + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: nameSpace}) if err != nil { log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) @@ -160,8 +218,6 @@ func (pc *PineconeClient) CreateCodebaseDB(docs []Document, embedding ai.Embeddi for _, doc := range docs { // 1536 is the default embedding size for the Universal Sentence Encoder vector, err := embedding.GetEmbedding(doc.Content) - // vector := make([]float32, 1536) - // var err error if err != nil { log.Fatalf("Error getting embedding: %v", err) } @@ -181,6 +237,11 @@ func (pc *PineconeClient) UpsertWithStruct(docs []Document, vectors [][]float32) if err != nil { log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) } + + if state, err := pc.waitUntilIndexReady(); state { + return err + } + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: nameSpace}) if err != nil { log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) @@ -207,3 +268,28 @@ func (pc *PineconeClient) UpsertWithStruct(docs []Document, vectors [][]float32) } return nil } + +// Referenced in https://github.com/pinecone-io/go-pinecone/blob/af29d07e7c68/pinecone/test_suite.go#L147 +func (pc *PineconeClient) waitUntilIndexReady() (bool, error) { + start := time.Now() + delay := 5 * time.Second + maxWaitTimeSeconds := 280 * time.Second + + for { + index, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + + if index.Status.Ready && index.Status.State == "Ready" { + log.Printf("Index \"%s\" is ready after %f seconds\n", pc.indexName, time.Since(start).Seconds()) + return true, err + } + + totalSeconds := time.Since(start) + + if totalSeconds >= maxWaitTimeSeconds { + return false, fmt.Errorf("Index \"%s\" not ready after %f seconds", pc.indexName, totalSeconds.Seconds()) + } + + log.Printf("Index \"%s\" not ready yet, retrying... (%f/%f)\n", pc.indexName, totalSeconds.Seconds(), maxWaitTimeSeconds.Seconds()) + time.Sleep(delay) + } +} diff --git a/internal/rag/rag.go b/internal/rag/rag.go index 3e311a5..6248d3c 100644 --- a/internal/rag/rag.go +++ b/internal/rag/rag.go @@ -11,9 +11,9 @@ type Retriever interface { } type Options struct { - topK int - withStructuredData bool - enableHybridRetrieval bool + topK uint32 + withStructuredData bool // Not implemented yet + enableHybridRetrieval bool // Not implemented yet } type Document struct { From a157ba88f93f7141937b70e4c7c01d5c0a2d11a4 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Mon, 10 Mar 2025 19:41:06 +0900 Subject: [PATCH 09/18] Added RAG processing in main function, refactoring of rag part --- cmd/main.go | 120 +++++++++++++++++++++++++++++---------- internal/rag/pinecone.go | 21 ++++--- internal/rag/rag.go | 1 + 3 files changed, 102 insertions(+), 40 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index f9e259c..2ede5f9 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -25,6 +25,7 @@ type Config struct { ghToken string oaiKey string useRag bool + pineconeKey string } type Neo4jConfig struct { @@ -37,7 +38,7 @@ type Neo4jConfig struct { func main() { cfg := &Config{} - neo4jcfg := &Neo4jConfig{} + // neo4jcfg := &Neo4jConfig{} flag.StringVar(&cfg.repo, "repo", "", "Repository name") flag.StringVar(&cfg.owner, "owner", "", "Repository owner") flag.IntVar(&cfg.issueNumber, "issue", 0, "Issue number") @@ -47,11 +48,7 @@ func main() { flag.StringVar(&cfg.ghToken, "github-token", "", "GitHub token") flag.StringVar(&cfg.oaiKey, "api-key", "", "OpenAI api key") flag.BoolVar(&cfg.useRag, "use-rag", false, "Use RAG model for response generation") - flag.StringVar(&neo4jcfg.uri, "neo4j-uri", "", "Neo4j URI") - flag.StringVar(&neo4jcfg.username, "neo4j-username", "", "Neo4j username") - flag.StringVar(&neo4jcfg.password, "neo4j-password", "", "Neo4j password") - flag.StringVar(&neo4jcfg.fulltextIndex, "fulltext-index", "keyword", "Neo4j fulltext index(default: keyword)") - flag.StringVar(&neo4jcfg.vectorIndex, "vector-index", "vector", "Neo4j vector index(default: vector)") + flag.StringVar(&cfg.pineconeKey, "pinecone-api-key", "", "Pinecone api key") flag.Parse() if cfg.repo == "" || cfg.owner == "" || cfg.issueNumber == 0 || cfg.ghToken == "" || cfg.command == "" || cfg.configFile == "" { @@ -59,24 +56,20 @@ func main() { os.Exit(1) } - var retriever *rag.Neo4jRetriever + var retriever rag.Retriever if cfg.useRag { flag.Parse() - if neo4jcfg.uri == "" || neo4jcfg.username == "" || neo4jcfg.password == "" { - fmt.Println("if -useRag is set, neo4j-uri, neo4j-username, and neo4j-password are required") - fmt.Println("Usage: alert-menta -use-rag -neo4j-uri -neo4j-username -neo4j-password ") - fmt.Println("[-fulltext-index ] [-vector-index ]") + if cfg.pineconeKey == "" { + fmt.Println("If -useRag is set, -pinecone-api-key is required") os.Exit(1) } - r, err := getNeo4jRetriever(neo4jcfg, log.New(os.Stdout, "[alert-menta main] ", log.Ldate|log.Ltime|log.Llongfile|log.Lmsgprefix)) + // r, err := getNeo4jRetriever(neo4jcfg, log.New(os.Stdout, "[alert-menta main] ", log.Ldate|log.Ltime|log.Llongfile|log.Lmsgprefix)) + r, err := getPineconeRetriever(cfg) retriever = r - retriever.TestConnection() if err != nil { - log.Fatalf("Error getting Neo4j retriever: %v", err) + log.Fatalf("Error getting retriever: %v", err) } } - fmt.Println("Neo4j Retriever:", retriever) - // os.Exit(0) logger := log.New( os.Stdout, "[alert-menta main] ", @@ -109,28 +102,43 @@ func main() { if err != nil { logger.Fatalf("Error geting AI client: %v", err) } + // idxName := rag.GetPineconeIndexName(cfg.owner, cfg.repo) + // tempCreateDB(idxName, cfg, loadedcfg, logger) + // os.Exit(0) - emb, err := getEmbeddingClient(cfg.oaiKey, loadedcfg, logger) - if err != nil { - logger.Fatalf("Error geting AI client: %v", err) - } - fmt.Println("Prompt:", prompt.UserPrompt) - doc, err := retriever.Retrieve(emb, prompt.UserPrompt, rag.Options{}) - // doc, err := retriever.Retrieve(emb, "What is Memory Management?", rag.Options{}) - if err != nil { - fmt.Println("Error retrieving document:", err) - } - fmt.Println("Document:", doc) - for _, d := range doc { - prompt.UserPrompt += "\n" + d.String() + var docs []rag.Document + var relatedIssue string + if cfg.useRag { + emb, err := getEmbeddingClient(cfg.oaiKey, loadedcfg, logger) + if err != nil { + logger.Fatalf("Error geting AI client: %v", err) + } + + ragPrompt, err := constructRAGPrompt(cfg.command, cfg.intent, userPrompt, imgs, *issue, loadedcfg, logger) + if err != nil { + logger.Fatalf("Error constructing RAG prompt: %v", err) + } + ragComment, err := aic.GetResponse(ragPrompt) + ragVector, err := emb.GetEmbedding(ragComment) + docs, err = retriever.RetrieveByVector(ragVector, rag.Options{}) + for _, d := range docs { + prompt.UserPrompt += "\n" + d.String() + } + issueVector, err := emb.GetEmbedding(userPrompt) + relatedIssue = retriever.RetrieveIssue(issueVector) } - fmt.Println("Prompt:", prompt.UserPrompt) - // os.Exit(0) comment, err := aic.GetResponse(prompt) if err != nil { logger.Fatalf("Error getting Response: %v", err) } + if cfg.useRag { + comment += "\n\n" + "## Sources:\n" + for i, d := range docs { + comment += fmt.Sprintf("%d. [%s](%s)\n", i+1, d.Id, d.URL) + } + comment += "\n\n" + relatedIssue + } logger.Println("Response:", comment) if err := issue.PostComment(comment); err != nil { @@ -212,6 +220,24 @@ func constructPrompt(command, intent, userPrompt string, imgs []ai.Image, cfg *u return &ai.Prompt{UserPrompt: userPrompt, SystemPrompt: systemPrompt, Images: imgs}, nil } +// RAG の前処理を行うプロンプトを作成する関数 +func constructRAGPrompt(command, intent, userPrompt string, imgs []ai.Image, issue github.GitHubIssue, cfg *utils.Config, logger *log.Logger) (*ai.Prompt, error) { + systemPrompt := "A GitHub Issue has been opened with the following content. Extract relevant information in a RAG. List files, functions, structures, etc. that should be checked. Also provide the file structure of your main branch to extract the information you need." + defaultBranch, _ := issue.GetDefaultBranch() + lf, _ := issue.ListFiles(defaultBranch) + lfs := strings.Join(lf, "\n") + if command == "ask" { + if intent == "" { + return nil, fmt.Errorf("Error: intent is required for 'ask' command") + } + systemPrompt = cfg.Ai.Commands[command].System_prompt + intent + "\n" + } else { + systemPrompt = cfg.Ai.Commands[command].System_prompt + } + logger.Println("\x1b[34mRAGPrompt: |\n", systemPrompt, userPrompt, "\x1b[0m") + return &ai.Prompt{UserPrompt: userPrompt + lfs, SystemPrompt: systemPrompt, Images: imgs}, nil +} + // Initialize AI client func getAIClient(oaiKey string, cfg *utils.Config, logger *log.Logger) (ai.Ai, error) { switch cfg.Ai.Provider { @@ -266,3 +292,35 @@ func getNeo4jRetriever(cfg *Neo4jConfig, logger *log.Logger) (*rag.Neo4jRetrieve } return r, nil } + +// Initialize PineconeRetriever +func getPineconeRetriever(cfg *Config) (*rag.PineconeClient, error) { + idxName := rag.GetPineconeIndexName(cfg.owner, cfg.repo) + r, err := rag.NewPineconeClient(idxName, cfg.pineconeKey) + if err != nil { + return nil, fmt.Errorf("Error: new Neo4jRetriever: %w", err) + } + return r, nil +} + +func tempCreateDB(idxName string, cfg *Config, loadedcfg *utils.Config, logger *log.Logger) { + rootPath := "../GameAssistant" + paths, err := utils.GetAllFilesFromAllBranches(rootPath, []string{".git"}, []string{"main"}) + var docs []rag.Document + for _, path := range paths { + doc, err := rag.ConvertPathtoDocument(cfg.owner, cfg.repo, path, rootPath) + if err == nil { + // continue + docs = append(docs, *doc) + } + } + fmt.Println("Number of documents:", len(docs)) + emb, err := getEmbeddingClient(cfg.oaiKey, loadedcfg, logger) + if err != nil { + logger.Fatalf("Error geting AI client: %v", err) + } + pc, _ := rag.NewPineconeClient(idxName, cfg.pineconeKey) + pc.CreateCodebaseDB(docs, emb) + issues := github.GetAllIssues(cfg.owner, cfg.repo, cfg.ghToken) + pc.CreateIssueDB(issues, emb) +} diff --git a/internal/rag/pinecone.go b/internal/rag/pinecone.go index fdbee50..70699c9 100644 --- a/internal/rag/pinecone.go +++ b/internal/rag/pinecone.go @@ -27,28 +27,31 @@ type PineconeClient struct { indexName string } -func GetIndexName(owner, repo string) string { +func GetPineconeIndexName(owner, repo string) string { indexName := owner + "-" + strings.ToLower(repo) indexName = strings.ReplaceAll(indexName, "_", "-") return indexName } -func NewPineconeClient(indexName string) *PineconeClient { +func NewPineconeClient(indexName, apiKey string) (*PineconeClient, error) { ctx := context.Background() pc, err := pinecone.NewClient(pinecone.NewClientParams{ - ApiKey: os.Getenv("PINECONE_API_KEY"), + // ApiKey: os.Getenv("PINECONE_API_KEY"), + ApiKey: apiKey, }) if err != nil { - log.Fatalf("Failed to create Client: %v", err) + // log.Fatalf("Failed to create Client: %v", err) + return nil, fmt.Errorf("Failed to create Client: %v", err) } pcClient := &PineconeClient{context: ctx, pc: pc, indexName: indexName} err = pcClient.createIndex() if err != nil { - log.Fatalf("Failed to create index \"%v\": %v", indexName, err) + // log.Fatalf("Failed to create index \"%v\": %v", indexName, err) + return nil, fmt.Errorf("Failed to create index \"%v\": %v", indexName, err) } - return pcClient + return pcClient, nil } func ConvertPathtoDocument(owner, repo string, path utils.Path, root string) (*Document, error) { @@ -110,7 +113,7 @@ func (pc *PineconeClient) RetrieveByVector(vector []float32, options Options) ([ log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) } - if state, err := pc.waitUntilIndexReady(); state { + if state, err := pc.waitUntilIndexReady(); !state { return nil, err } @@ -194,7 +197,7 @@ func (pc *PineconeClient) DeleteRecord(id string) error { return err } - if state, err := pc.waitUntilIndexReady(); state { + if state, err := pc.waitUntilIndexReady(); !state { return err } @@ -238,7 +241,7 @@ func (pc *PineconeClient) UpsertWithStruct(docs []Document, vectors [][]float32) log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) } - if state, err := pc.waitUntilIndexReady(); state { + if state, err := pc.waitUntilIndexReady(); !state { return err } diff --git a/internal/rag/rag.go b/internal/rag/rag.go index 6248d3c..6f6a647 100644 --- a/internal/rag/rag.go +++ b/internal/rag/rag.go @@ -8,6 +8,7 @@ type Retriever interface { // Retrieve(ctx context.Context, query string, options ...Option) ([]Document, error) Retrieve(query string, embedding ai.EmbeddingModel, options Options) ([]Document, error) RetrieveByVector(vector []float32, options Options) ([]Document, error) + RetrieveIssue(vector []float32) string } type Options struct { From e3f855e86e8319226c93a4468ac93c8af800af91 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Tue, 18 Mar 2025 21:05:46 +0900 Subject: [PATCH 10/18] Add upsert-db command & In-memory git operations feature --- cmd/main.go | 63 ++++++-- go.mod | 4 +- go.sum | 4 + internal/ai/embedding.go | 2 +- internal/ai/openai.go | 11 +- internal/rag/pinecone.go | 85 +++++++++- internal/rag/similar_issue.go | 33 ---- internal/utils/git.go | 284 ++++++++++++++++++++++++++++++++-- internal/utils/git_test.go | 42 +++++ 9 files changed, 459 insertions(+), 69 deletions(-) create mode 100644 internal/utils/git_test.go diff --git a/cmd/main.go b/cmd/main.go index 2ede5f9..1096482 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -81,6 +81,15 @@ func main() { logger.Fatalf("Error loading config: %v", err) } + if cfg.command == "upsert_db" { + idxName := rag.GetPineconeIndexName(cfg.owner, cfg.repo) + err := CreateDB(idxName, cfg, loadedcfg, logger) + if err != nil { + logger.Fatalf("Error upserting DB: %v", err) + } + return + } + err = validateCommand(cfg.command, loadedcfg) if err != nil { logger.Fatalf("Error validating command: %v", err) @@ -102,9 +111,6 @@ func main() { if err != nil { logger.Fatalf("Error geting AI client: %v", err) } - // idxName := rag.GetPineconeIndexName(cfg.owner, cfg.repo) - // tempCreateDB(idxName, cfg, loadedcfg, logger) - // os.Exit(0) var docs []rag.Document var relatedIssue string @@ -303,24 +309,53 @@ func getPineconeRetriever(cfg *Config) (*rag.PineconeClient, error) { return r, nil } -func tempCreateDB(idxName string, cfg *Config, loadedcfg *utils.Config, logger *log.Logger) { - rootPath := "../GameAssistant" - paths, err := utils.GetAllFilesFromAllBranches(rootPath, []string{".git"}, []string{"main"}) +func CreateDB(idxName string, cfg *Config, loadedcfg *utils.Config, logger *log.Logger) error { + logger.Println("Creating DB to Index:", idxName) + repoURL := fmt.Sprintf("https://github.com/%s/%s", cfg.owner, cfg.repo) + repo, err := utils.CloneRepository(repoURL, &utils.AuthOptions{Username: cfg.owner, Token: cfg.ghToken}) + if err != nil { + return fmt.Errorf("Error cloning repository: %w", err) + } + branches, err := utils.GetBranches(repo, []string{}) + _, err = utils.ListFiles(repo) + var docs []rag.Document - for _, path := range paths { - doc, err := rag.ConvertPathtoDocument(cfg.owner, cfg.repo, path, rootPath) - if err == nil { - // continue - docs = append(docs, *doc) + for _, branch := range branches { + branchDocs, err := rag.ConvertBranchtoDocuments(cfg.owner, repo, branch) + if err != nil { + return fmt.Errorf("Error converting branch to documents: %w", err) } + docs = append(docs, *branchDocs...) } - fmt.Println("Number of documents:", len(docs)) + emb, err := getEmbeddingClient(cfg.oaiKey, loadedcfg, logger) if err != nil { - logger.Fatalf("Error geting AI client: %v", err) + return fmt.Errorf("Error getting embedding client: %w", err) } - pc, _ := rag.NewPineconeClient(idxName, cfg.pineconeKey) + + pc, err := rag.NewPineconeClient(idxName, cfg.pineconeKey) + if err != nil { + return fmt.Errorf("Error getting Pinecone client: %w", err) + } + + /* Temporarily commented out because searching one by one in Pinecone takes too long + // get file content from Pinecone, compare with the new docs and add only the new docs + newDocs := []rag.Document{} + for _, doc := range docs { + // get file content from Pinecone + existedDoc, _ := pc.QueryById(doc.Id) + if existedDoc == nil { + newDocs = append(newDocs, doc) + } else if existedDoc.Content != doc.Content { + newDocs = append(newDocs, doc) + } + } + pc.CreateCodebaseDB(newDocs, emb) + */ pc.CreateCodebaseDB(docs, emb) + issues := github.GetAllIssues(cfg.owner, cfg.repo, cfg.ghToken) pc.CreateIssueDB(issues, emb) + + return nil } diff --git a/go.mod b/go.mod index 35f2d87..2ef0d40 100644 --- a/go.mod +++ b/go.mod @@ -8,10 +8,12 @@ require ( cloud.google.com/go/vertexai v0.13.2 github.com/Azure/azure-sdk-for-go/sdk/ai/azopenai v0.7.1 github.com/Azure/azure-sdk-for-go/sdk/azcore v1.16.0 + github.com/go-git/go-billy/v5 v5.6.2 github.com/go-git/go-git/v5 v5.14.0 github.com/google/go-github v17.0.0+incompatible github.com/neo4j/neo4j-go-driver/v5 v5.27.0 github.com/pinecone-io/go-pinecone/v3 v3.0.0 + github.com/pkoukk/tiktoken-go v0.1.7 github.com/spf13/viper v1.19.0 golang.org/x/oauth2 v0.24.0 google.golang.org/protobuf v1.35.1 @@ -33,11 +35,11 @@ require ( github.com/cloudflare/circl v1.6.0 // indirect github.com/cyphar/filepath-securejoin v0.4.1 // indirect github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc // indirect + github.com/dlclark/regexp2 v1.10.0 // indirect github.com/emirpasic/gods v1.18.1 // indirect github.com/felixge/httpsnoop v1.0.4 // indirect github.com/fsnotify/fsnotify v1.7.0 // indirect github.com/go-git/gcfg v1.5.1-0.20230307220236-3a3c6141e376 // indirect - github.com/go-git/go-billy/v5 v5.6.2 // indirect github.com/go-logr/logr v1.4.2 // indirect github.com/go-logr/stdr v1.2.2 // indirect github.com/golang/groupcache v0.0.0-20241129210726-2c02b8208cf8 // indirect diff --git a/go.sum b/go.sum index 7b58861..68789d0 100644 --- a/go.sum +++ b/go.sum @@ -52,6 +52,8 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM= github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38= +github.com/dlclark/regexp2 v1.10.0 h1:+/GIL799phkJqYW+3YbOd8LCcbHzT0Pbo8zl70MHsq0= +github.com/dlclark/regexp2 v1.10.0/go.mod h1:DHkYz0B9wPfa6wondMfaivmHpzrQ3v9q8cnmRbL6yW8= github.com/elazarl/goproxy v1.7.2 h1:Y2o6urb7Eule09PjlhQRGNsqRfPmYI3KKQLFpCAV3+o= github.com/elazarl/goproxy v1.7.2/go.mod h1:82vkLNir0ALaW14Rc399OTTjyNREgmdL2cVoIbS6XaE= github.com/emirpasic/gods v1.18.1 h1:FXtiHYKDGKCW2KzwZKx0iC0PQmdlorYgdFG9jPXJ1Bc= @@ -157,6 +159,8 @@ github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c h1:+mdjkGKdHQG3305AYmd github.com/pkg/browser v0.0.0-20240102092130-5ac0b6a4141c/go.mod h1:7rwL4CYBLnjLxUqIJNnCWiEdr3bn6IUYi15bNlnbCCU= github.com/pkg/errors v0.9.1 h1:FEBLx1zS214owpjy7qsBeixbURkuhQAwrK5UwLGTwt4= github.com/pkg/errors v0.9.1/go.mod h1:bwawxfHBFNV+L2hUp1rHADufV3IMtnDRdf1r5NINEl0= +github.com/pkoukk/tiktoken-go v0.1.7 h1:qOBHXX4PHtvIvmOtyg1EeKlwFRiMKAcoMp4Q+bLQDmw= +github.com/pkoukk/tiktoken-go v0.1.7/go.mod h1:9NiV+i9mJKGj1rYOT+njbv+ZwA/zJxYdewGl6qVatpg= github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 h1:Jamvg5psRIccs7FGNTlIRMkT8wgtp5eCXdBlqhYGL6U= github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4= diff --git a/internal/ai/embedding.go b/internal/ai/embedding.go index 8ff2d25..767ac86 100644 --- a/internal/ai/embedding.go +++ b/internal/ai/embedding.go @@ -1,5 +1,5 @@ package ai type EmbeddingModel interface { - GetEmbedding(text string) ([]float32, error) + GetEmbedding(string) ([]float32, error) } diff --git a/internal/ai/openai.go b/internal/ai/openai.go index 55cceaf..6a75bf5 100644 --- a/internal/ai/openai.go +++ b/internal/ai/openai.go @@ -7,6 +7,7 @@ import ( "github.com/3-shake/alert-menta/internal/utils" "github.com/Azure/azure-sdk-for-go/sdk/ai/azopenai" "github.com/Azure/azure-sdk-for-go/sdk/azcore" + "github.com/pkoukk/tiktoken-go" ) type OpenAI struct { @@ -81,8 +82,16 @@ func (ai *OpenAI) GetEmbedding(text string) ([]float32, error) { } response, err := client.GetEmbeddings(context.TODO(), *options, nil) if err != nil { - fmt.Println(err) return []float32{}, err } return response.Embeddings.Data[0].Embedding, nil } + +func NumberofTokens(text string) (int, error) { + encoding := "cl100k_base" // text-embedding-3-small + tke, err := tiktoken.GetEncoding(encoding) + if err != nil { + return 0, err + } + return len(tke.Encode(text, nil, nil)), nil +} diff --git a/internal/rag/pinecone.go b/internal/rag/pinecone.go index 70699c9..b79ac24 100644 --- a/internal/rag/pinecone.go +++ b/internal/rag/pinecone.go @@ -12,6 +12,7 @@ import ( "github.com/3-shake/alert-menta/internal/ai" "github.com/3-shake/alert-menta/internal/utils" + "github.com/go-git/go-git/v5" "github.com/pinecone-io/go-pinecone/v3/pinecone" "google.golang.org/protobuf/types/known/structpb" ) @@ -54,6 +55,43 @@ func NewPineconeClient(indexName, apiKey string) (*PineconeClient, error) { return pcClient, nil } +func ConvertBranchtoDocuments(owner string, repo *git.Repository, branch utils.Branch) (*[]Document, error) { + var docs []Document + if err := utils.SwitchBranch(repo, branch.Name); err != nil { + fmt.Printf("Failed to switch branch: %v\n", err) + } + for _, file := range branch.Files { + content, err := utils.GetFileContent(repo, file) + + if err != nil && len(content) == 0 { + continue + } + if err != nil { + fmt.Printf("Failed to get file content: %s, %v\n", content, err) + return nil, fmt.Errorf("Failed to get file content: %s@%s", branch.Name, file.Path) + } + + // content のトークン数が 8192 以上の場合は分割する(とりあえずトークン数だけ切り取る) + n, err := ai.NumberofTokens(content) + if err != nil { + return nil, fmt.Errorf("Failed to get number of tokens: %v", err) + } + if n > 8192 { + content = content[:8192] + } + + docs = append(docs, Document{ + Id: branch.Name + "@" + file.Path, + Content: content, + Branch: branch.Name, + URL: fmt.Sprintf("https://github.com/%v/%v/blob/%v/%v", owner, repo, branch.Name, file.Path), + Score: 0, + }) + } + + return &docs, nil +} + func ConvertPathtoDocument(owner, repo string, path utils.Path, root string) (*Document, error) { contentBytes, err := os.ReadFile(filepath.Join(root, path.FilePath)) if err != nil { @@ -151,6 +189,46 @@ func (pc *PineconeClient) RetrieveByVector(vector []float32, options Options) ([ return docs, nil } +func (pc *PineconeClient) QueryById(id string) (*Document, error) { + var doc *Document + idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) + if err != nil { + log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) + } + + if state, err := pc.waitUntilIndexReady(); !state { + return nil, err + } + + idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "codebase"}) + if err != nil { + log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) + } + + res, err := idxConnection.QueryByVectorId(pc.context, &pinecone.QueryByVectorIdRequest{ + VectorId: id, + TopK: 1, + IncludeValues: false, + IncludeMetadata: true, + }) + if err != nil { + log.Fatalf("Error encountered when querying by vector: %v", err) + } else { + log.Printf(prettifyStruct(res)) + } + + tempDoc := res.Matches[0].Vector.Metadata.GetFields() + doc = &Document{ + Id: tempDoc["id"].GetStringValue(), + Content: tempDoc["content"].GetStringValue(), + Branch: tempDoc["branch"].GetStringValue(), + URL: tempDoc["url"].GetStringValue(), + Score: 0, + } + + return doc, nil +} + func (pc *PineconeClient) convertIssueStructtoMap(issue Issue) map[string]interface{} { return map[string]interface{}{ "id": issue.Id, @@ -222,14 +300,15 @@ func (pc *PineconeClient) CreateCodebaseDB(docs []Document, embedding ai.Embeddi // 1536 is the default embedding size for the Universal Sentence Encoder vector, err := embedding.GetEmbedding(doc.Content) if err != nil { - log.Fatalf("Error getting embedding: %v", err) + return fmt.Errorf("Error getting embedding: %v", err) // MAX input length is 8192 in OpenAI } vectors = append(vectors, vector) } + fmt.Println("vectors", vectors) + fmt.Println("docs", docs) err := pc.UpsertWithStruct(docs, vectors) if err != nil { - log.Fatalf("Error upserting docs: %v", err) - return err + return fmt.Errorf("Error upserting vectors: %v", err) } return nil } diff --git a/internal/rag/similar_issue.go b/internal/rag/similar_issue.go index 647f13d..d8f6601 100644 --- a/internal/rag/similar_issue.go +++ b/internal/rag/similar_issue.go @@ -21,39 +21,6 @@ type Issue struct { // Source string } -func (pc *PineconeClient) TestUpsert(metadataMap map[string]interface{}, vector []float32) { - indexName := "similar-issues" - // Add to the main function: - - idxModel, err := pc.pc.DescribeIndex(pc.context, indexName) - if err != nil { - log.Fatalf("Failed to describe index \"%v\": %v", indexName, err) - } - - idxConnection, err := pc.pc.Index(pinecone.NewIndexConnParams{Host: idxModel.Host, Namespace: "issues"}) - if err != nil { - log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) - } - metadata, err := structpb.NewStruct(metadataMap) - if err != nil { - log.Fatalf("Failed to create metadata map: %v", err) - } - pcVector := []*pinecone.Vector{ - { - Id: "vec2", - Values: &vector, - Metadata: metadata, - }, - } - - count, err := idxConnection.UpsertVectors(pc.context, pcVector) - if err != nil { - log.Fatalf("Failed to upsert vectors: %v", err) - } else { - log.Printf("Successfully upserted %d vector(s)!\n", count) - } -} - func (pc *PineconeClient) RetrieveIssue(vector []float32) string { idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) if err != nil { diff --git a/internal/utils/git.go b/internal/utils/git.go index 8185eb6..17f4550 100644 --- a/internal/utils/git.go +++ b/internal/utils/git.go @@ -2,27 +2,44 @@ package utils import ( "fmt" - // "io/ioutil" + "log" + // "reflect" "io/fs" "os" "path/filepath" "slices" "strings" + "github.com/go-git/go-billy/v5/memfs" "github.com/go-git/go-git/v5" + "github.com/go-git/go-git/v5/config" "github.com/go-git/go-git/v5/plumbing" "github.com/go-git/go-git/v5/plumbing/object" - // "github.com/go-git/go-git/v5/storage/memory" + "github.com/go-git/go-git/v5/plumbing/transport/http" + "github.com/go-git/go-git/v5/storage/memory" ) -// Path 構造体: ファイルパスとブランチ名を保持 +type MemoryFile struct { + Size int64 + Path string +} + type Path struct { FilePath string Branch string } -func GetAllFilesFromAllBranches(repoPath string, ignoreList, branchList []string) ([]Path, error) { +type Branch struct { + Name string + Files []MemoryFile +} +type AuthOptions struct { + Username string + Token string +} + +func GetAllFilesFromAllBranches(repoPath string, ignoreList, branchList []string) ([]Path, error) { var allPaths []Path if ignoreList == nil { @@ -95,44 +112,36 @@ func GetAllFilesFromAllBranches(repoPath string, ignoreList, branchList []string } func Tree(path string, level int) error { - // prefix を計算 (インデント用) prefix := strings.Repeat("│ ", level) - // ファイル/ディレクトリ情報を取得 - files, err := os.ReadDir(path) // os.ReadDir がより新しいGoのバージョンでは推奨 - // files, err := ioutil.ReadDir(path) // os.ReadDir がより新しいGoのバージョンでは推奨 + files, err := os.ReadDir(path) if err != nil { return err } - // 各エントリを処理 for i, file := range files { - isLast := i == len(files)-1 // 最後のエントリかどうかを判定 + isLast := i == len(files)-1 - // コネクタの文字列を決定 connector := "├── " if isLast { connector = "└── " } - // ファイル/ディレクトリ名を表示 fmt.Printf("%s%s%s\n", prefix, connector, file.Name()) - // ディレクトリの場合は再帰的に tree を呼び出す if file.IsDir() { newPath := filepath.Join(path, file.Name()) if err := Tree(newPath, level+1); err != nil { return err } - } else { // ファイルの場合は内容を出力 + } else { filePath := filepath.Join(path, file.Name()) content, err := os.ReadFile(filePath) if err != nil { fmt.Printf("%s│ %sError reading file: %v\n", prefix, strings.Repeat(" ", level), err) - continue //エラーでも続行 + continue } - // 内容をprefix付きで出力 lines := strings.Split(string(content), "\n") for _, line := range lines { fmt.Printf("%s│ %s\n", prefix, line) @@ -191,3 +200,246 @@ func CollectFiles(rootPath string, ignoreList []string) ([]string, error) { return filePaths, nil } + +func CloneRepository(url string, authOptions *AuthOptions) (*git.Repository, error) { + fs := memfs.New() + + var auth *http.BasicAuth + if authOptions == nil { + authOptions = &AuthOptions{} + } else { + auth = &http.BasicAuth{ + Username: authOptions.Username, + Password: authOptions.Token, + } + } + + repo, err := git.Clone(memory.NewStorage(), fs, &git.CloneOptions{ + URL: url, + Progress: os.Stdout, + Depth: 1, + Auth: auth, + }) + if err != nil { + return nil, fmt.Errorf("failed to clone repository: %w", err) + } + err = PullAllRemoteBranch(repo, "origin", authOptions) + if err != nil { + return nil, err + } + return repo, nil +} + +func SwitchBranch(repo *git.Repository, branchName string) error { + force := true + wt, err := repo.Worktree() + if err != nil { + return fmt.Errorf("failed to get worktree: %w", err) + } + err = wt.Checkout(&git.CheckoutOptions{ + Branch: plumbing.NewBranchReferenceName(branchName), + Force: force, + Keep: !force, + }) + if err != nil { + return fmt.Errorf("failed to switch branch: %w", err) + } + return nil +} + +func PullAllRemoteBranch(repo *git.Repository, remoteName string, authOptions *AuthOptions) error { + var auth *http.BasicAuth + if authOptions == nil { + authOptions = &AuthOptions{} + } else { + auth = &http.BasicAuth{ + Username: authOptions.Username, + Password: authOptions.Token, + } + } + + remote, err := repo.Remote(remoteName) + if err != nil { + log.Fatalf("Failed to get remote 'origin': %v", err) + } + + refs, err := remote.List(&git.ListOptions{Auth: auth}) + if err != nil { + log.Fatalf("Failed to list remote refs: %v", err) + } + + for _, ref := range refs { + if ref.Name().IsBranch() { + localBranchName := ref.Name().Short() + remoteBranchName := ref.Name() + remoteBranchRefSpec := fmt.Sprintf("+%s:%s", remoteBranchName, plumbing.NewBranchReferenceName(localBranchName)) + + _, err := repo.Reference(plumbing.NewBranchReferenceName(localBranchName), true) + if err != nil { + headRef, err := repo.Head() + if err != nil && err != plumbing.ErrReferenceNotFound { + log.Printf("get head err=%v", err) + continue + } + + var commit *object.Commit + if err == plumbing.ErrReferenceNotFound { + commit, err = repo.CommitObject(ref.Hash()) + if err != nil { + log.Printf("branch %s commit not found. err=%v\n", localBranchName, err) + continue + } + } else { + commit, err = repo.CommitObject(headRef.Hash()) + if err != nil { + log.Printf("branch %s commit not found from haed ref. err=%v\n", localBranchName, err) + continue + } + } + + newRef := plumbing.NewHashReference(plumbing.NewBranchReferenceName(localBranchName), commit.Hash) + if err := repo.Storer.SetReference(newRef); err != nil { + log.Printf("failed to create local branch %s: %v", localBranchName, err) + continue + } + _, _ = repo.Reference(plumbing.NewBranchReferenceName(localBranchName), true) + + } + err = repo.CreateBranch(&config.Branch{ + Name: localBranchName, + Remote: remoteName, + Merge: remoteBranchName, + }) + + if err != nil { + log.Printf("Error setting up tracking for %s: %v\n", localBranchName, err) + continue + } + + err = repo.Fetch(&git.FetchOptions{ + RemoteName: remoteName, + RefSpecs: []config.RefSpec{config.RefSpec(remoteBranchRefSpec)}, + Progress: os.Stdout, + Force: true, + Auth: auth, + }) + + if err != nil && err != git.NoErrAlreadyUpToDate { + log.Printf("fetch error (branch %s): %v", localBranchName, err) + continue + } + fmt.Printf("Branch %s set up to track remote branch %s from origin.\n", localBranchName, remoteBranchName) + } + } + fmt.Println("All remote branches have been pulled to local branches.") + return nil +} + +func ListFiles(repo *git.Repository) ([]MemoryFile, error) { + files, err := recursiveListFiles(repo, ".") + return files, err +} + +func recursiveListFiles(repo *git.Repository, path string) ([]MemoryFile, error) { + // var files []string + files := []MemoryFile{} + wt, err := repo.Worktree() + if err != nil { + return files, err + } + + wtfiles, err := wt.Filesystem.ReadDir(path) + if err != nil { + return files, err + } + for _, file := range wtfiles { + if file.IsDir() { + // var tempFiles []string + tempFiles := []MemoryFile{} + if path == "." { + tempFiles, err = recursiveListFiles(repo, file.Name()) + } else { + tempFiles, err = recursiveListFiles(repo, path+"/"+file.Name()) + } + if err != nil { + return files, err + } + files = append(files, tempFiles...) + } else { + if path == "." { + files = append(files, MemoryFile{Path: file.Name(), Size: file.Size()}) + } else { + files = append(files, MemoryFile{Path: path + "/" + file.Name(), Size: file.Size()}) + } + } + } + return files, err +} + +func GetFileContent(repo *git.Repository, file MemoryFile) (string, error) { + byteContent := make([]byte, file.Size) + wt, err := repo.Worktree() + if err != nil { + return "", err + } + + billyFile, err := wt.Filesystem.Open(file.Path) + if err != nil { + return "", err + } + + _, err = billyFile.Read(byteContent) + if err != nil { + return "", err + } + + return string(byteContent), err +} + +func GetAllBranchNames(repo *git.Repository) ([]string, error) { + branchRefs, err := repo.Branches() + if err != nil { + return nil, fmt.Errorf("failed to get branches: %w", err) + } + var branches []string + err = branchRefs.ForEach(func(branchRef *plumbing.Reference) error { + branches = append(branches, branchRef.Name().Short()) + return nil + }) + if err != nil { + return nil, fmt.Errorf("failed to get branches: %w", err) + } + return branches, nil +} + +// if specifiedBranch is empty, get all branches +func GetBranches(repo *git.Repository, specifiedBranch []string) ([]Branch, error) { + var branches []Branch + branchNames, err := GetAllBranchNames(repo) + if err != nil { + return nil, fmt.Errorf("failed to get branch names: %w", err) + } + + for _, branchName := range branchNames { + if len(specifiedBranch) > 0 && !slices.Contains(specifiedBranch, branchName) { + continue + } + + err := SwitchBranch(repo, branchName) + if err != nil { + return nil, fmt.Errorf("failed to switch branch: %w", err) + } + + memFiles, err := ListFiles(repo) + if err != nil { + return nil, fmt.Errorf("failed to list files: %w", err) + } + + branches = append(branches, Branch{ + Name: branchName, + Files: memFiles, + }) + } + + return branches, nil +} diff --git a/internal/utils/git_test.go b/internal/utils/git_test.go new file mode 100644 index 0000000..4a4add7 --- /dev/null +++ b/internal/utils/git_test.go @@ -0,0 +1,42 @@ +package utils + +import ( + "testing" +) + +func TestCloneAndPullAllBranches(t *testing.T) { + // テスト用のリモートリポジトリURL (公開されている小さなリポジトリ) + testRepoURL := "https://github.com/3-shake/alert-menta.git" + + // 関数を呼び出す + repo, err := CloneRepository(testRepoURL, nil) + if err != nil { + t.Fatalf("CloneAndPullAllBranches failed: %v", err) + } + + branches, err := GetBranches(repo, []string{}) + if err != nil { + t.Fatalf("GetAllBranches failed: %v", err) + } + t.Logf("GetAllBranches: %v", branches) + + if err := SwitchBranch(repo, "develop"); err != nil { + t.Fatalf("SwitchBranch failed: %v", err) + } + + files, err := ListFiles(repo) + if err != nil { + t.Fatalf("ListFiles failed: %v", err) + } + t.Logf("ListFiles: %v", files) + + for _, file := range files { + if file.Path == ".gitignore" { + content, err := GetFileContent(repo, file) + if err != nil { + t.Fatalf("GetFileContent failed: %v", err) + } + t.Logf("GetFileContent: %v: %v", file.Path, content) + } + } +} From 63551530efa3ea5c90fb5341c5b0c06bd2fb7a5c Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Mon, 24 Mar 2025 13:30:58 +0900 Subject: [PATCH 11/18] Fix repository name upsert --- cmd/main.go | 2 +- internal/rag/pinecone.go | 6 ++---- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 1096482..cfb2bf3 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -321,7 +321,7 @@ func CreateDB(idxName string, cfg *Config, loadedcfg *utils.Config, logger *log. var docs []rag.Document for _, branch := range branches { - branchDocs, err := rag.ConvertBranchtoDocuments(cfg.owner, repo, branch) + branchDocs, err := rag.ConvertBranchtoDocuments(cfg.owner, cfg.repo, repo, branch) if err != nil { return fmt.Errorf("Error converting branch to documents: %w", err) } diff --git a/internal/rag/pinecone.go b/internal/rag/pinecone.go index b79ac24..29293c4 100644 --- a/internal/rag/pinecone.go +++ b/internal/rag/pinecone.go @@ -55,7 +55,7 @@ func NewPineconeClient(indexName, apiKey string) (*PineconeClient, error) { return pcClient, nil } -func ConvertBranchtoDocuments(owner string, repo *git.Repository, branch utils.Branch) (*[]Document, error) { +func ConvertBranchtoDocuments(owner, repoName string, repo *git.Repository, branch utils.Branch) (*[]Document, error) { var docs []Document if err := utils.SwitchBranch(repo, branch.Name); err != nil { fmt.Printf("Failed to switch branch: %v\n", err) @@ -84,7 +84,7 @@ func ConvertBranchtoDocuments(owner string, repo *git.Repository, branch utils.B Id: branch.Name + "@" + file.Path, Content: content, Branch: branch.Name, - URL: fmt.Sprintf("https://github.com/%v/%v/blob/%v/%v", owner, repo, branch.Name, file.Path), + URL: fmt.Sprintf("https://github.com/%v/%v/blob/%v/%v", owner, repoName, branch.Name, file.Path), Score: 0, }) } @@ -304,8 +304,6 @@ func (pc *PineconeClient) CreateCodebaseDB(docs []Document, embedding ai.Embeddi } vectors = append(vectors, vector) } - fmt.Println("vectors", vectors) - fmt.Println("docs", docs) err := pc.UpsertWithStruct(docs, vectors) if err != nil { return fmt.Errorf("Error upserting vectors: %v", err) From 9d8a3883d2932cc8e3bc217f8c817515c66fb864 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Tue, 25 Mar 2025 03:38:46 +0900 Subject: [PATCH 12/18] Add new options simiar-issue and similar_code --- .alert-menta.user.yaml | 8 +++++++- internal/utils/utils.go | 2 ++ 2 files changed, 9 insertions(+), 1 deletion(-) diff --git a/.alert-menta.user.yaml b/.alert-menta.user.yaml index 74664f3..10fe7cc 100644 --- a/.alert-menta.user.yaml +++ b/.alert-menta.user.yaml @@ -5,7 +5,7 @@ system: ai: provider: "openai" # "openai" or "vertexai" openai: - model: "gpt-4o-mini-2024-07-18" # Check the list of available models by `curl https://api.openai.com/v1/models -H "Authorization: Bearer $OPENAI_API_KEY"` + model: "gpt-4o-mini" # Check the list of available models by `curl https://api.openai.com/v1/models -H "Authorization: Bearer $OPENAI_API_KEY"` vertexai: project: "" @@ -16,9 +16,15 @@ ai: - describe: description: "Generate a detailed description of the Issue." system_prompt: "The following is the GitHub Issue and comments on it. Please Generate a detailed description.\n" + similar_code: false + similar_issue: true - suggest: description: "Provide suggestions for improvement based on the contents of the Issue." system_prompt: "The following is the GitHub Issue and comments on it. Please identify the issues that need to be resolved based on the contents of the Issue and provide three suggestions for improvement.\n" + similar_code: true + similar_issue: false - ask: description: "Answer free-text questions." system_prompt: "The following is the GitHub Issue and comments on it. Based on the content provide a detailed response to the following question:\n" + similar_code: true + similar_issue: false diff --git a/internal/utils/utils.go b/internal/utils/utils.go index e4c5231..8cc3bed 100644 --- a/internal/utils/utils.go +++ b/internal/utils/utils.go @@ -38,6 +38,8 @@ type Ai struct { type Command struct { Description string `yaml:"description"` System_prompt string `yaml:"system_prompt"` + Similar_issue bool `yaml:"similar_issue"` + Similar_code bool `yaml:"similar_code"` } type OpenAI struct { From 427ebcb5c770abfb1e25c7c157c3881c7fce4cc4 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Tue, 25 Mar 2025 03:40:51 +0900 Subject: [PATCH 13/18] Add RAG features & Query filter in Pinecone --- cmd/main.go | 104 ++++++++++++++++++++-------------- internal/rag/pinecone.go | 40 ++++++++++--- internal/rag/rag.go | 13 +++-- internal/rag/similar_issue.go | 28 +++++++-- 4 files changed, 127 insertions(+), 58 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index cfb2bf3..d2db314 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -7,6 +7,7 @@ import ( "os" "regexp" "strings" + "text/template" "github.com/3-shake/alert-menta/internal/ai" "github.com/3-shake/alert-menta/internal/github" @@ -60,10 +61,8 @@ func main() { if cfg.useRag { flag.Parse() if cfg.pineconeKey == "" { - fmt.Println("If -useRag is set, -pinecone-api-key is required") - os.Exit(1) + log.Fatalf("Error: If -useRag is set, -pinecone-api-key is required") } - // r, err := getNeo4jRetriever(neo4jcfg, log.New(os.Stdout, "[alert-menta main] ", log.Ldate|log.Ltime|log.Llongfile|log.Lmsgprefix)) r, err := getPineconeRetriever(cfg) retriever = r if err != nil { @@ -83,7 +82,7 @@ func main() { if cfg.command == "upsert_db" { idxName := rag.GetPineconeIndexName(cfg.owner, cfg.repo) - err := CreateDB(idxName, cfg, loadedcfg, logger) + err := CreateDB(idxName, []string{"main"}, cfg, loadedcfg, logger) if err != nil { logger.Fatalf("Error upserting DB: %v", err) } @@ -113,25 +112,42 @@ func main() { } var docs []rag.Document - var relatedIssue string + relatedIssue := "" if cfg.useRag { emb, err := getEmbeddingClient(cfg.oaiKey, loadedcfg, logger) if err != nil { logger.Fatalf("Error geting AI client: %v", err) } - ragPrompt, err := constructRAGPrompt(cfg.command, cfg.intent, userPrompt, imgs, *issue, loadedcfg, logger) - if err != nil { - logger.Fatalf("Error constructing RAG prompt: %v", err) + if loadedcfg.Ai.Commands[cfg.command].Similar_code { + ragPrompt, err := constructRAGPrompt(cfg.command, cfg.intent, userPrompt, imgs, *issue, loadedcfg, logger) + if err != nil { + logger.Fatalf("Error constructing RAG prompt: %v", err) + } + + ragComment, err := aic.GetResponse(ragPrompt) + if err != nil { + logger.Fatalf("Error getting RAG comment: %v", err) + } + logger.Println("RAG Comment:", ragComment) + + ragVector, err := emb.GetEmbedding(ragComment) + if err != nil { + logger.Fatalf("Error getting RAG vector: %v", err) + } + + docs, err = retriever.RetrieveByVector(ragVector, rag.Options{}) + for _, d := range docs { + prompt.UserPrompt += "\n" + d.String() + } } - ragComment, err := aic.GetResponse(ragPrompt) - ragVector, err := emb.GetEmbedding(ragComment) - docs, err = retriever.RetrieveByVector(ragVector, rag.Options{}) - for _, d := range docs { - prompt.UserPrompt += "\n" + d.String() + if loadedcfg.Ai.Commands[cfg.command].Similar_issue { + issueVector, err := emb.GetEmbedding(userPrompt) + if err != nil { + logger.Fatalf("Error getting issue vector: %v", err) + } + relatedIssue = retriever.RetrieveIssue(issueVector, uint32(cfg.issueNumber), rag.Options{}) } - issueVector, err := emb.GetEmbedding(userPrompt) - relatedIssue = retriever.RetrieveIssue(issueVector) } comment, err := aic.GetResponse(prompt) @@ -228,20 +244,39 @@ func constructPrompt(command, intent, userPrompt string, imgs []ai.Image, cfg *u // RAG の前処理を行うプロンプトを作成する関数 func constructRAGPrompt(command, intent, userPrompt string, imgs []ai.Image, issue github.GitHubIssue, cfg *utils.Config, logger *log.Logger) (*ai.Prompt, error) { - systemPrompt := "A GitHub Issue has been opened with the following content. Extract relevant information in a RAG. List files, functions, structures, etc. that should be checked. Also provide the file structure of your main branch to extract the information you need." + systemPrompt := ` +I'm looking to identify related files and functions to solve a GitHub Issue. Please provide analysis and advice based on the information I'll share in the following format: + +## Analysis Requested: +1. Files likely related to this Issue and why +2. Specific functions or code blocks that should be investigated +3. Possible root causes of the problem +4. Approaches for resolution + +Please suggest specific file paths and function names where possible. Maximize the use of information available from the repository structure to understand the code architecture before making suggestions. + ` + userPromptPlaceholder := `## GitHub Issue: +{{.UserPrompt}} + +## Repository Structure: +{{.RepositoryStructure}} + ` + userPromptTmpl, err := template.New("userPrompt").Parse(userPromptPlaceholder) + if err != nil { + logger.Fatalf("Error parsing userPrompt template: %v", err) + } + type PromptData struct { + UserPrompt string + RepositoryStructure string + } defaultBranch, _ := issue.GetDefaultBranch() lf, _ := issue.ListFiles(defaultBranch) lfs := strings.Join(lf, "\n") - if command == "ask" { - if intent == "" { - return nil, fmt.Errorf("Error: intent is required for 'ask' command") - } - systemPrompt = cfg.Ai.Commands[command].System_prompt + intent + "\n" - } else { - systemPrompt = cfg.Ai.Commands[command].System_prompt - } + userPromptBuf := strings.Builder{} + err = userPromptTmpl.Execute(&userPromptBuf, PromptData{UserPrompt: userPrompt, RepositoryStructure: lfs}) + userPrompt = userPromptBuf.String() logger.Println("\x1b[34mRAGPrompt: |\n", systemPrompt, userPrompt, "\x1b[0m") - return &ai.Prompt{UserPrompt: userPrompt + lfs, SystemPrompt: systemPrompt, Images: imgs}, nil + return &ai.Prompt{UserPrompt: userPrompt, SystemPrompt: systemPrompt, Images: imgs}, nil } // Initialize AI client @@ -293,6 +328,7 @@ func getEmbeddingClient(oaiKey string, cfg *utils.Config, logger *log.Logger) (a // Initialize Neo4jRetriever func getNeo4jRetriever(cfg *Neo4jConfig, logger *log.Logger) (*rag.Neo4jRetriever, error) { r, err := rag.NewNeo4jRetriever(cfg.uri, cfg.username, cfg.password, cfg.fulltextIndex, cfg.vectorIndex) + logger.Println("Neo4jRetriever:", r) if err != nil { return nil, fmt.Errorf("Error: new Neo4jRetriever: %w", err) } @@ -309,7 +345,7 @@ func getPineconeRetriever(cfg *Config) (*rag.PineconeClient, error) { return r, nil } -func CreateDB(idxName string, cfg *Config, loadedcfg *utils.Config, logger *log.Logger) error { +func CreateDB(idxName string, targetBranches []string, cfg *Config, loadedcfg *utils.Config, logger *log.Logger) error { logger.Println("Creating DB to Index:", idxName) repoURL := fmt.Sprintf("https://github.com/%s/%s", cfg.owner, cfg.repo) repo, err := utils.CloneRepository(repoURL, &utils.AuthOptions{Username: cfg.owner, Token: cfg.ghToken}) @@ -338,21 +374,7 @@ func CreateDB(idxName string, cfg *Config, loadedcfg *utils.Config, logger *log. return fmt.Errorf("Error getting Pinecone client: %w", err) } - /* Temporarily commented out because searching one by one in Pinecone takes too long - // get file content from Pinecone, compare with the new docs and add only the new docs - newDocs := []rag.Document{} - for _, doc := range docs { - // get file content from Pinecone - existedDoc, _ := pc.QueryById(doc.Id) - if existedDoc == nil { - newDocs = append(newDocs, doc) - } else if existedDoc.Content != doc.Content { - newDocs = append(newDocs, doc) - } - } - pc.CreateCodebaseDB(newDocs, emb) - */ - pc.CreateCodebaseDB(docs, emb) + pc.CreateCodebaseDB(docs, emb, rag.CodebaseEmbeddingOptions{Branches: targetBranches}) issues := github.GetAllIssues(cfg.owner, cfg.repo, cfg.ghToken) pc.CreateIssueDB(issues, emb) diff --git a/internal/rag/pinecone.go b/internal/rag/pinecone.go index 29293c4..21498d7 100644 --- a/internal/rag/pinecone.go +++ b/internal/rag/pinecone.go @@ -7,6 +7,7 @@ import ( "log" "os" "path/filepath" + "slices" "strings" "time" @@ -160,14 +161,28 @@ func (pc *PineconeClient) RetrieveByVector(vector []float32, options Options) ([ log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) } - topK := options.topK + topK := options.TopK if topK == 0 { - topK = 3 // Default topK value + topK = 5 // Default topK value + } + + var branchFilter map[string]interface{} + if len(options.Branches) != 0 { + branchFilter = map[string]interface{}{ + "branch": map[string]interface{}{ + "$in": []interface{}{options.Branches}, + }, + } + } + filter, err := structpb.NewStruct(branchFilter) + if err != nil { + log.Fatalf("Failed to create filter %v", err) } res, err := idxConnection.QueryByVectorValues(pc.context, &pinecone.QueryByVectorValuesRequest{ Vector: vector, TopK: topK, + MetadataFilter: filter, IncludeValues: false, IncludeMetadata: true, }) @@ -294,17 +309,26 @@ func (pc *PineconeClient) DeleteRecord(id string) error { return nil } -func (pc *PineconeClient) CreateCodebaseDB(docs []Document, embedding ai.EmbeddingModel) error { +func (pc *PineconeClient) CreateCodebaseDB(docs []Document, embedding ai.EmbeddingModel, options CodebaseEmbeddingOptions) error { var vectors [][]float32 + var tempDocs []Document + allBranchFlag := false + if len(options.Branches) == 0 { + allBranchFlag = true + } + for _, doc := range docs { // 1536 is the default embedding size for the Universal Sentence Encoder - vector, err := embedding.GetEmbedding(doc.Content) - if err != nil { - return fmt.Errorf("Error getting embedding: %v", err) // MAX input length is 8192 in OpenAI + if allBranchFlag || slices.Contains(options.Branches, doc.Branch) { + vector, err := embedding.GetEmbedding(doc.Content) + tempDocs = append(tempDocs, doc) + if err != nil { + return fmt.Errorf("Error getting embedding: %v", err) // MAX input length is 8192 in OpenAI + } + vectors = append(vectors, vector) } - vectors = append(vectors, vector) } - err := pc.UpsertWithStruct(docs, vectors) + err := pc.UpsertWithStruct(tempDocs, vectors) if err != nil { return fmt.Errorf("Error upserting vectors: %v", err) } diff --git a/internal/rag/rag.go b/internal/rag/rag.go index 6f6a647..440f1ed 100644 --- a/internal/rag/rag.go +++ b/internal/rag/rag.go @@ -8,13 +8,18 @@ type Retriever interface { // Retrieve(ctx context.Context, query string, options ...Option) ([]Document, error) Retrieve(query string, embedding ai.EmbeddingModel, options Options) ([]Document, error) RetrieveByVector(vector []float32, options Options) ([]Document, error) - RetrieveIssue(vector []float32) string + RetrieveIssue(vector []float32, issueNumber uint32, options Options) string } type Options struct { - topK uint32 - withStructuredData bool // Not implemented yet - enableHybridRetrieval bool // Not implemented yet + TopK uint32 + Branches []string + WithStructuredData bool // Not implemented yet + EnableHybridRetrieval bool // Not implemented yet +} + +type CodebaseEmbeddingOptions struct { + Branches []string } type Document struct { diff --git a/internal/rag/similar_issue.go b/internal/rag/similar_issue.go index d8f6601..b63b69c 100644 --- a/internal/rag/similar_issue.go +++ b/internal/rag/similar_issue.go @@ -3,6 +3,7 @@ package rag import ( "fmt" "log" + "strconv" // "github.com/joho/godotenv" "github.com/3-shake/alert-menta/internal/ai" @@ -21,7 +22,12 @@ type Issue struct { // Source string } -func (pc *PineconeClient) RetrieveIssue(vector []float32) string { +func (pc *PineconeClient) RetrieveIssue(vector []float32, issueNumber uint32, options Options) string { + topK := options.TopK + if topK == 0 { + topK = 3 // Default topK value + } + idxModel, err := pc.pc.DescribeIndex(pc.context, pc.indexName) if err != nil { log.Fatalf("Failed to describe index \"%v\": %v", pc.indexName, err) @@ -30,9 +36,21 @@ func (pc *PineconeClient) RetrieveIssue(vector []float32) string { if err != nil { log.Fatalf("Failed to create IndexConnection1 for Host %v: %v", idxModel.Host, err) } + + excludeIdsFilter := map[string]interface{}{ + "id": map[string]interface{}{ + "$nin": []interface{}{strconv.Itoa(int(issueNumber))}, + }, + } + filter, err := structpb.NewStruct(excludeIdsFilter) + if err != nil { + log.Fatalf("Failed to create filter %v", err) + } + res, err := idxConnection.QueryByVectorValues(pc.context, &pinecone.QueryByVectorValuesRequest{ Vector: vector, - TopK: 3, + TopK: uint32(topK), + MetadataFilter: filter, IncludeValues: false, IncludeMetadata: true, }) @@ -42,9 +60,9 @@ func (pc *PineconeClient) RetrieveIssue(vector []float32) string { log.Printf(prettifyStruct(res)) } text := "## Other issues similar to this one are: \n" - text += fmt.Sprintf("1. [%s #%s (%s)](%s)\n", res.Matches[0].Vector.Metadata.GetFields()["title"].GetStringValue(), res.Matches[0].Vector.Metadata.GetFields()["id"].GetStringValue(), res.Matches[0].Vector.Metadata.GetFields()["state"].GetStringValue(), res.Matches[0].Vector.Metadata.GetFields()["url"].GetStringValue()) - text += fmt.Sprintf("1. [%s #%s (%s)](%s)\n", res.Matches[1].Vector.Metadata.GetFields()["title"].GetStringValue(), res.Matches[1].Vector.Metadata.GetFields()["id"].GetStringValue(), res.Matches[1].Vector.Metadata.GetFields()["state"].GetStringValue(), res.Matches[1].Vector.Metadata.GetFields()["url"].GetStringValue()) - text += fmt.Sprintf("3. [%s #%s (%s)](%s)\n", res.Matches[2].Vector.Metadata.GetFields()["title"].GetStringValue(), res.Matches[2].Vector.Metadata.GetFields()["id"].GetStringValue(), res.Matches[2].Vector.Metadata.GetFields()["state"].GetStringValue(), res.Matches[2].Vector.Metadata.GetFields()["url"].GetStringValue()) + for i, match := range res.Matches { + text += fmt.Sprintf("%d. [%s #%s (%s)](%s)\n", i+1, match.Vector.Metadata.GetFields()["title"].GetStringValue(), match.Vector.Metadata.GetFields()["id"].GetStringValue(), match.Vector.Metadata.GetFields()["state"].GetStringValue(), match.Vector.Metadata.GetFields()["url"].GetStringValue()) + } return text } From 5925d22668d7bdfa6c730560a9bed161504454d8 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Tue, 25 Mar 2025 16:36:14 +0900 Subject: [PATCH 14/18] Update output format similar issue & code --- cmd/main.go | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index d2db314..6153330 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -155,11 +155,15 @@ func main() { logger.Fatalf("Error getting Response: %v", err) } if cfg.useRag { - comment += "\n\n" + "## Sources:\n" - for i, d := range docs { - comment += fmt.Sprintf("%d. [%s](%s)\n", i+1, d.Id, d.URL) + if len(docs) > 0 { + comment += "\n\n" + "## Sources:\n" + for i, d := range docs { + comment += fmt.Sprintf("%d. [%s](%s)\n", i+1, d.Id, d.URL) + } + } + if relatedIssue != "" { + comment += "\n\n" + relatedIssue } - comment += "\n\n" + relatedIssue } logger.Println("Response:", comment) From 0917222a02c0f605048ab318728ff25aea7ba2b4 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Wed, 26 Mar 2025 17:15:41 +0900 Subject: [PATCH 15/18] =?UTF-8?q?ragPrompt=20=E3=81=AE=E3=82=A8=E3=83=A9?= =?UTF-8?q?=E3=83=BC=E3=83=8F=E3=83=B3=E3=83=89=E3=83=AA=E3=83=B3=E3=82=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- cmd/main.go | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 6153330..bb5219a 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -259,22 +259,24 @@ I'm looking to identify related files and functions to solve a GitHub Issue. Ple Please suggest specific file paths and function names where possible. Maximize the use of information available from the repository structure to understand the code architecture before making suggestions. ` - userPromptPlaceholder := `## GitHub Issue: + userPromptTmpl, err := template.New("userPrompt").Parse(`## GitHub Issue: {{.UserPrompt}} ## Repository Structure: {{.RepositoryStructure}} - ` - userPromptTmpl, err := template.New("userPrompt").Parse(userPromptPlaceholder) + `) if err != nil { - logger.Fatalf("Error parsing userPrompt template: %v", err) + return nil, fmt.Errorf("Error parsing userPrompt template: %w", err) } type PromptData struct { UserPrompt string RepositoryStructure string } defaultBranch, _ := issue.GetDefaultBranch() - lf, _ := issue.ListFiles(defaultBranch) + lf, err := issue.ListFiles(defaultBranch) + if err != nil { + return nil, fmt.Errorf("Error listing files: %w", err) + } lfs := strings.Join(lf, "\n") userPromptBuf := strings.Builder{} err = userPromptTmpl.Execute(&userPromptBuf, PromptData{UserPrompt: userPrompt, RepositoryStructure: lfs}) From 282776868556fa7eed7f31f2a90b939f554d4951 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Wed, 26 Mar 2025 17:17:14 +0900 Subject: [PATCH 16/18] =?UTF-8?q?actions=20=E3=83=95=E3=82=A1=E3=82=A4?= =?UTF-8?q?=E3=83=AB=E3=81=A7=E3=81=AE=20-use-rag=20=E3=82=AA=E3=83=97?= =?UTF-8?q?=E3=82=B7=E3=83=A7=E3=83=B3?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .github/workflows/alert-menta.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/alert-menta.yaml b/.github/workflows/alert-menta.yaml index 9002a8b..b2b2250 100644 --- a/.github/workflows/alert-menta.yaml +++ b/.github/workflows/alert-menta.yaml @@ -50,7 +50,7 @@ jobs: - name: Add Comment run: | if [[ "$COMMAND" == "ask" ]]; then - ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -intent "$INTENT" + ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -intent "$INTENT" -use-rag -pinecone-api-key ${{ secrets.PINECONE_API_KEY }} else - ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE + ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -use-rag -pinecone-api-key ${{ secrets.PINECONE_API_KEY }} fi From b5d6fdf46c3c3dd47baa48d0d7a8354587f8dd2f Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Wed, 26 Mar 2025 17:23:54 +0900 Subject: [PATCH 17/18] Update README.me (New Option similar_code & similar_issue) --- .alert-menta.user.yaml | 2 +- README.md | 12 +++++++++--- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/.alert-menta.user.yaml b/.alert-menta.user.yaml index 10fe7cc..243bc87 100644 --- a/.alert-menta.user.yaml +++ b/.alert-menta.user.yaml @@ -10,7 +10,7 @@ ai: vertexai: project: "" location: "us-central1" - model: "gemini-1.5-flash-001" + model: "gemini-2.0-flash-001" commands: - describe: diff --git a/README.md b/README.md index a77170f..66a8eac 100644 --- a/README.md +++ b/README.md @@ -52,17 +52,23 @@ ai: vertexai: project: "" location: "us-central1" - model: "gemini-1.5-flash-001" + model: "gemini-2.0-flash-001" commands: - describe: description: "Generate a detailed description of the Issue." system_prompt: "The following is the GitHub Issue and comments on it. Please Generate a detailed description.\n" + similar_code: false + similar_issue: true - suggest: description: "Provide suggestions for improvement based on the contents of the Issue." system_prompt: "The following is the GitHub Issue and comments on it. Please identify the issues that need to be resolved based on the contents of the Issue and provide three suggestions for improvement.\n" + similar_code: true + similar_issue: false - ask: description: "Answer free-text questions." system_prompt: "The following is the GitHub Issue and comments on it. Based on the content, provide a detailed response to the following question:\n" + similar_code: true + similar_issue: false ``` Specify the LLM to use with `ai.provider`. You can change the system prompt with `commands.{command}.system_prompt`. @@ -122,9 +128,9 @@ jobs: - name: Add Comment run: | if [[ "$COMMAND" == "ask" ]]; then - ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -intent "$INTENT" + ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -intent "$INTENT" -use-rag -pinecone-api-key ${{ secrets.PINECONE_API_KEY }} else - ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE + ./alert-menta -owner ${{ github.repository_owner }} -issue ${{ github.event.issue.number }} -repo ${{ env.REPOSITORY_NAME }} -github-token ${{ secrets.GH_TOKEN }} -api-key ${{ secrets.OPENAI_API_KEY }} -command $COMMAND -config $CONFIG_FILE -use-rag -pinecone-api-key ${{ secrets.PINECONE_API_KEY }} fi ``` #### If using Vertex AI From e722c8fa760dc6f9d7ca63b68c523a3b388c0322 Mon Sep 17 00:00:00 2001 From: pacificbelt30 <57101176+pacificbelt30@users.noreply.github.com> Date: Wed, 26 Mar 2025 19:13:11 +0900 Subject: [PATCH 18/18] Update README.me (Pinecone setup, alert-menta.yaml, .alert-menta.user.yaml template) --- README.md | 31 +++++++++++++++++++++++-------- 1 file changed, 23 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 66a8eac..eaeeff8 100644 --- a/README.md +++ b/README.md @@ -9,11 +9,18 @@ We reduce the burden of system failure response using LLM. You can receive support for failure handling that is completed within GitHub. - Execute commands interactively in GitHub Issue comments: - `describe` command to summarize the Issue - - `analysis` command for root cause analysis of failures (in development) - `suggest` command for proposing improvement measures for failures - `ask` command for asking additional questions -- Mechanism to improve response accuracy using [RAG](https://cloud.google.com/use-cases/retrieval-augmented-generation?hl=en) (in development) -- Selectable LLM models (OpenAI, VertexAI) +- Execute any command defined by the user other than the above (e.g. `analysis` command for root cause analysis of failures). Please see [here](#.alert-menta.user.yaml) +- Mechanism to improve response accuracy using [RAG](https://cloud.google.com/use-cases/retrieval-augmented-generation?hl=en) in [Pinecone](https://www.pinecone.io/) + - If you use RAG, you must register with Pinecone, the vector database, and register your API key as PINECONE_API_KEY in Actions Secrets. Please see [here](#3.-configure-to-use-rag) + - `similar_code` option to search similar code + - `similar_issue` option to search similar issues +- Selectable LLM models + - OpenAI + - VertexAI + - Claude (under development) + - OpenAI-Compatible Server (under development) - Extensible prompt text - Multilingual support @@ -24,18 +31,26 @@ Prepare a GitHub PAT with the following permissions and register it in Secrets: - repo - workflow ### 2. Configure to use LLM -#### Open AI +#### OpenAI Generate an API key and register it in Secrets. #### Vertex AI Enable Vertex AI on Google Cloud. Alert-menta obtains access to VertexAI using [Workload Identity Federation](https://cloud.google.com/iam/docs/workload-identity-federation). Please see [here](#if-using-vertex-ai) for details. -### 3. Create the alert-menta configuration file +### 3. Configure to use RAG +#### Overview +Alert-Menta offers the ability to suggest similar issues and search the code base. +This functionality uses Pinecone as a vector database. +#### Setup Pinecone +Pinecone can be found [here](https://docs.pinecone.io/guides/get-started/overview) to register an account and create a database. +After creating an account, obtain an API key and save it in Actions Secrets under the name PINECONE_API_KEY. +Alert-Menta will automatically create the Index. +### 4. Create the alert-menta configuration file Create the alert-menta configuration file in the root of the repository. For details, please see [here](#alert-mentauseryaml). -### 4. Create the Actions configuration file +### 5. Create the Actions configuration file There is a [template](#template) available, so please use it. -### 5. Monitoring alerts or user reports are received on Issues +### 6. Monitoring alerts or user reports are received on Issues For the method to bring monitoring alerts to Issues, please see [this repository](https://github.com/kechigon/alert-menta-lab/tree/main). -### 6. Execute alert-menta +### 7. Execute alert-menta Execute commands on the Issue. Run commands with a backslash at the beginning (e.g., `/describe`). For the `ask` command, leave a space and enter the question (e.g., `/ask What about the Next Action?`). Alert-menta includes the text of the Issue in the prompt and sends it to the LLM, then posts the response as a comment on the Issue. ## Configuration