From ef3354f310c3db83820e6b95acce221aaffea9cc Mon Sep 17 00:00:00 2001 From: Ayman Date: Fri, 22 Jul 2022 22:00:47 +0200 Subject: [PATCH 1/2] use s3 caching instead of elasticsearch Signed-off-by: Ayman --- cmd/github/github.go | 27 ++++++++++++++++++++++----- go.mod | 4 ++-- go.sum | 20 ++------------------ 3 files changed, 26 insertions(+), 25 deletions(-) diff --git a/cmd/github/github.go b/cmd/github/github.go index 4b8dbc0..b5020d0 100644 --- a/cmd/github/github.go +++ b/cmd/github/github.go @@ -5,6 +5,7 @@ import ( "flag" "fmt" "github.com/LF-Engineering/insights-datasource-github/build" + "github.com/LF-Engineering/insights-datasource-shared/cache" "github.com/sirupsen/logrus" "io/ioutil" "math/rand" @@ -221,8 +222,9 @@ type DSGitHub struct { // SourceID: the optional external source identifier (such as the repo ID from github/gitlab, or gerrit project slug) // this field is required for github, gitlab and gerrit. For github and gitlab, this is typically a numeric value // converted to a string such as 194341141. For gerrit this is the project (repository) slug. - SourceID string - log *logrus.Entry + SourceID string + log *logrus.Entry + cacheProvider cache.Manager } // AddPublisher - sets Kinesis publisher @@ -5649,7 +5651,10 @@ func (j *DSGitHub) OutputDocs(ctx *shared.Ctx, items []interface{}, docs *[]inte *docs = []interface{}{} gMaxUpstreamDtMtx.Lock() defer gMaxUpstreamDtMtx.Unlock() - shared.SetLastUpdate(ctx, j.Endpoint(), gMaxUpstreamDt) + err = j.cacheProvider.SetLastSync(fmt.Sprintf("%s/%s", j.Org, j.Repo), gMaxUpstreamDt) + if err != nil { + j.log.WithFields(logrus.Fields{"operation": "OutputDocs"}).Infof("unable to set last sync date to cache.error: %v", err) + } } } @@ -5712,7 +5717,12 @@ func (j *DSGitHub) Sync(ctx *shared.Ctx, category string) (err error) { j.log.WithFields(logrus.Fields{"operation": "Sync"}).Infof("%s fetching from %v (%d threads)", j.Endpoint(), ctx.DateFrom, j.ThrN) } if ctx.DateFrom == nil { - ctx.DateFrom = shared.GetLastUpdate(ctx, j.Endpoint()) + cachedLastSync, er := j.cacheProvider.GetLastSync(fmt.Sprintf("%s/%s", j.Org, j.Repo)) + if er != nil { + err = er + return + } + ctx.DateFrom = &cachedLastSync if ctx.DateFrom != nil { j.log.WithFields(logrus.Fields{"operation": "Sync"}).Infof("%s resuming from %v (%d threads)", j.Endpoint(), ctx.DateFrom, j.ThrN) } @@ -5725,7 +5735,7 @@ func (j *DSGitHub) Sync(ctx *shared.Ctx, category string) (err error) { // NOTE: Non-generic ends here gMaxUpstreamDtMtx.Lock() defer gMaxUpstreamDtMtx.Unlock() - shared.SetLastUpdate(ctx, j.Endpoint(), gMaxUpstreamDt) + err = j.cacheProvider.SetLastSync(fmt.Sprintf("%s/%s", j.Org, j.Repo), gMaxUpstreamDt) return } @@ -7719,6 +7729,7 @@ func main() { shared.SetSyncMode(true, false) shared.SetLogLoggerError(false) shared.AddLogger(&github.Logger, GitHubDataSource, logger.Internal, []map[string]string{{"GITHUB_ORG": github.Org, "GITHUB_REPO": github.Repo, "REPO_URL": github.URL, "ProjectSlug": ctx.Project}}) + github.AddCacheProvider() for cat := range ctx.Categories { github.WriteLog(&ctx, timestamp, logger.InProgress, cat) err = github.Sync(&ctx, cat) @@ -7744,3 +7755,9 @@ func (j *DSGitHub) createStructuredLogger() { }) j.log = log } + +// AddCacheProvider - adds cache provider +func (j *DSGitHub) AddCacheProvider() { + cacheProvider := cache.NewManager(GitHubDataSource, os.Getenv("STAGE")) + j.cacheProvider = *cacheProvider +} diff --git a/go.mod b/go.mod index 3d37121..b7cf19d 100644 --- a/go.mod +++ b/go.mod @@ -4,12 +4,13 @@ go 1.17 require ( github.com/LF-Engineering/dev-analytics-libraries v1.1.28 - github.com/LF-Engineering/insights-datasource-shared v1.4.5-0.20220511063206-6754a12066b9 + github.com/LF-Engineering/insights-datasource-shared v1.5.5 github.com/LF-Engineering/lfx-event-schema v0.1.20 github.com/aws/aws-lambda-go v1.28.0 github.com/aws/aws-sdk-go v1.43.22 github.com/google/go-github/v43 v43.0.0 github.com/json-iterator/go v1.1.12 + github.com/sirupsen/logrus v1.8.1 golang.org/x/oauth2 v0.0.0-20220309155454-6242fa91716a ) @@ -35,7 +36,6 @@ require ( github.com/modern-go/concurrent v0.0.0-20180228061459-e0a39a4cb421 // indirect github.com/modern-go/reflect2 v1.0.2 // indirect github.com/pkg/errors v0.9.1 // indirect - github.com/sirupsen/logrus v1.8.1 // indirect golang.org/x/crypto v0.0.0-20211215153901-e495a2d5b3d3 // indirect golang.org/x/net v0.0.0-20220127200216-cd36cc0744dd // indirect golang.org/x/sys v0.0.0-20211216021012-1d35b9e2eb4e // indirect diff --git a/go.sum b/go.sum index b29e477..3a4dd44 100644 --- a/go.sum +++ b/go.sum @@ -35,25 +35,9 @@ github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03 github.com/BurntSushi/xgb v0.0.0-20160522181843-27f122750802/go.mod h1:IVnqGOEym/WlBOVXweHU+Q+/VP0lqqI8lqeDx9IjBqo= github.com/LF-Engineering/dev-analytics-libraries v1.1.28 h1:sjmYNPSY3hXUl2+ouCqn+Xq7AmHkto9/5PsCV/7eYBw= github.com/LF-Engineering/dev-analytics-libraries v1.1.28/go.mod h1:O+9mOX1nf6qGKrZne33F6speSzrGj6+Y1tPF6jh/mcw= -github.com/LF-Engineering/insights-datasource-shared v1.4.2 h1:bGjwqlLUhG2eSHTXvxhjPZYbrEK3xP2feD+jcfCRyik= -github.com/LF-Engineering/insights-datasource-shared v1.4.2/go.mod h1:cy/YH2aldmSRGUg9XUe6W+rW+m7p2N8RsSSwyATIe9Y= -github.com/LF-Engineering/insights-datasource-shared v1.4.3-0.20220314162813-49e8868ac871 h1:7DT1e8zUIjxAdlzJkB0pWfwMTZyQA5JE8aPqTm3Mph8= -github.com/LF-Engineering/insights-datasource-shared v1.4.3-0.20220314162813-49e8868ac871/go.mod h1:cy/YH2aldmSRGUg9XUe6W+rW+m7p2N8RsSSwyATIe9Y= -github.com/LF-Engineering/insights-datasource-shared v1.4.5-0.20220511063206-6754a12066b9 h1:CcKhPF0SgXbOFasPhoD/1of1ohvgLc4aASJj2KTdsrI= -github.com/LF-Engineering/insights-datasource-shared v1.4.5-0.20220511063206-6754a12066b9/go.mod h1:ZQCRAJNyizhBemQDSjqm14G2MPkOJbRzCPfjwFhYKy4= -github.com/LF-Engineering/lfx-event-schema v0.1.8-0.20220222202022-c7ff0e6233a6/go.mod h1:GvEBmXvYGafFRIpZ6I0G5Ss4jsmkBNptLJIasLClXu8= -github.com/LF-Engineering/lfx-event-schema v0.1.10 h1:pBRwEiOOeY0T6g4f9TL6YYoZsXTD0hFHyXlzezBNPSM= -github.com/LF-Engineering/lfx-event-schema v0.1.10/go.mod h1:CfFIZ4mwzo88umf5+KxDQEzqlVkPG7Vx8eLK2oDfWIs= -github.com/LF-Engineering/lfx-event-schema v0.1.11-0.20220321134948-d47d1cbd922e h1:UTknseUySCZwZ0HCfpkyEgnWVEJsYMutZHeXv9zi180= -github.com/LF-Engineering/lfx-event-schema v0.1.11-0.20220321134948-d47d1cbd922e/go.mod h1:CfFIZ4mwzo88umf5+KxDQEzqlVkPG7Vx8eLK2oDfWIs= -github.com/LF-Engineering/lfx-event-schema v0.1.11 h1:qDk4YE+ZdZakdG2VYfpUy4HNUtSpTCwVfIuzk1Tne6M= -github.com/LF-Engineering/lfx-event-schema v0.1.11/go.mod h1:CfFIZ4mwzo88umf5+KxDQEzqlVkPG7Vx8eLK2oDfWIs= -github.com/LF-Engineering/lfx-event-schema v0.1.14 h1:WhdJbeLloWIMPgbHoUPOUOIGipwTSl8hXxp530ui7XM= +github.com/LF-Engineering/insights-datasource-shared v1.5.5 h1:jlSldY2MTR4gy82uQTed1M738gWrZuJj+EhAcYBrm3M= +github.com/LF-Engineering/insights-datasource-shared v1.5.5/go.mod h1:ZQCRAJNyizhBemQDSjqm14G2MPkOJbRzCPfjwFhYKy4= github.com/LF-Engineering/lfx-event-schema v0.1.14/go.mod h1:CfFIZ4mwzo88umf5+KxDQEzqlVkPG7Vx8eLK2oDfWIs= -github.com/LF-Engineering/lfx-event-schema v0.1.20-0.20220510142557-956ba192fade h1:YTBk0uQnUVrfEFcyHaR2hzVXDh6ebmeliV0f/O/jj8c= -github.com/LF-Engineering/lfx-event-schema v0.1.20-0.20220510142557-956ba192fade/go.mod h1:CfFIZ4mwzo88umf5+KxDQEzqlVkPG7Vx8eLK2oDfWIs= -github.com/LF-Engineering/lfx-event-schema v0.1.20-0.20220511083836-c30a2d7cf561 h1:6UCZsa14yxMNPcUN2S8Pgnb7WwOfs+N3UugFuW1tJAg= -github.com/LF-Engineering/lfx-event-schema v0.1.20-0.20220511083836-c30a2d7cf561/go.mod h1:CfFIZ4mwzo88umf5+KxDQEzqlVkPG7Vx8eLK2oDfWIs= github.com/LF-Engineering/lfx-event-schema v0.1.20 h1:MFDSASlnxHjswMS+ek/5EOWZyorefeaRgsKsZQqckrU= github.com/LF-Engineering/lfx-event-schema v0.1.20/go.mod h1:CfFIZ4mwzo88umf5+KxDQEzqlVkPG7Vx8eLK2oDfWIs= github.com/alcortesm/tgz v0.0.0-20161220082320-9c5fe88206d7/go.mod h1:6zEj6s6u/ghQa61ZWa/C2Aw3RkjiTBOix7dkqa1VLIs= From 500fa1e524f4e20edfe3c9763802838af4bc4eaa Mon Sep 17 00:00:00 2001 From: Ayman Date: Fri, 22 Jul 2022 22:03:45 +0200 Subject: [PATCH 2/2] clean up Signed-off-by: Ayman --- cmd/github/github.go | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/cmd/github/github.go b/cmd/github/github.go index b5020d0..bc1c12e 100644 --- a/cmd/github/github.go +++ b/cmd/github/github.go @@ -4,9 +4,6 @@ import ( "context" "flag" "fmt" - "github.com/LF-Engineering/insights-datasource-github/build" - "github.com/LF-Engineering/insights-datasource-shared/cache" - "github.com/sirupsen/logrus" "io/ioutil" "math/rand" "os" @@ -17,11 +14,14 @@ import ( "sync" "time" + "github.com/LF-Engineering/insights-datasource-github/build" + "github.com/LF-Engineering/insights-datasource-shared/cache" "github.com/LF-Engineering/insights-datasource-shared/cryptography" "github.com/LF-Engineering/lfx-event-schema/service" "github.com/LF-Engineering/lfx-event-schema/service/insights" "github.com/LF-Engineering/lfx-event-schema/service/repository" "github.com/LF-Engineering/lfx-event-schema/service/user" + "github.com/sirupsen/logrus" "github.com/LF-Engineering/lfx-event-schema/utils/datalake" @@ -36,7 +36,6 @@ import ( "github.com/aws/aws-sdk-go/aws/session" "github.com/aws/aws-sdk-go/service/s3" "github.com/google/go-github/v43/github" - jsoniter "github.com/json-iterator/go" "golang.org/x/oauth2" )