Skip to content

Commit

Permalink
WIP: Telemetry
Browse files Browse the repository at this point in the history
  • Loading branch information
Jeffail committed Oct 10, 2024
1 parent df39acc commit a6e3796
Show file tree
Hide file tree
Showing 9 changed files with 314 additions and 7 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,11 @@ jobs:
- name: Check Out Repo
uses: actions/checkout@v4

- name: Add telemetry variables
env:
CONNECT_TELEMETRY_PRIV_KEY: ${{ secrets.TelemetryPrivateKey }}
CONNECT_TELEMETRY_DELAY: 1m

- name: Configure AWS credentials
uses: aws-actions/configure-aws-credentials@v4
with:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/upload_plugin.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,9 @@ jobs:
with:
aws-region: ${{ vars.RP_AWS_CRED_REGION }}
role-to-assume: arn:aws:iam::${{ secrets.RP_AWS_CRED_ACCOUNT_ID }}:role/${{ vars.RP_AWS_CRED_BASE_ROLE_NAME }}${{ github.event.repository.name }}
- name: Add telemetry variables
env:
CONNECT_TELEMETRY_PRIV_KEY: ${{ secrets.TelemetryPrivateKey }}
- uses: actions/checkout@v4
- uses: actions/setup-go@v5
with:
Expand Down
4 changes: 3 additions & 1 deletion .goreleaser.yml
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,8 @@ builds:
-X main.Version={{.Version}}
-X main.DateBuilt={{.Date}}
-X main.BinaryName=redpanda-connect
-X github.com/redpanda-data/connect/v4/internal/telemetry.PrivateKey={{ if index .Env "CONNECT_TELEMETRY_PRIV_KEY" }}{{ .Env.CONNECT_TELEMETRY_PRIV_KEY }}{{ else }}{{ end }}
-X github.com/redpanda-data/connect/v4/internal/telemetry.ExportDelay={{ if index .Env "CONNECT_TELEMETRY_DELAY" }}{{ .Env.CONNECT_TELEMETRY_DELAY }}{{ else }}{{ end }}
- id: connect-cloud
main: cmd/redpanda-connect-cloud/main.go
Expand Down Expand Up @@ -122,4 +124,4 @@ publishers:
- connect-linux-pkgs
cmd: ./resources/scripts/push_pkg_to_cloudsmith.sh {{ .ArtifactPath }}
env:
- CLOUDSMITH_API_KEY={{ .Env.CLOUDSMITH_API_KEY }}
- CLOUDSMITH_API_KEY={{ .Env.CLOUDSMITH_API_KEY }}
4 changes: 3 additions & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ require (
github.com/generikvault/gvalstrings v0.0.0-20180926130504-471f38f0112a
github.com/getsentry/sentry-go v0.28.1
github.com/go-faker/faker/v4 v4.4.2
github.com/go-jose/go-jose/v3 v3.0.3
github.com/go-resty/resty/v2 v2.15.3
github.com/go-sql-driver/mysql v1.8.1
github.com/gocql/gocql v1.6.0
github.com/gofrs/uuid v4.4.0+incompatible
Expand Down Expand Up @@ -375,7 +377,7 @@ require (
golang.org/x/oauth2 v0.22.0 // indirect
golang.org/x/sys v0.24.0 // indirect
golang.org/x/term v0.23.0 // indirect
golang.org/x/time v0.5.0 // indirect
golang.org/x/time v0.6.0 // indirect
golang.org/x/tools v0.24.0 // indirect
golang.org/x/xerrors v0.0.0-20231012003039-104605ab7028 // indirect
google.golang.org/genproto v0.0.0-20240708141625-4ad9e859172b // indirect
Expand Down
8 changes: 6 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -463,6 +463,8 @@ github.com/go-fonts/stix v0.1.0/go.mod h1:w/c1f0ldAUlJmLBvlbkvVXLAD+tAMqobIIQpmn
github.com/go-gl/glfw v0.0.0-20190409004039-e6da0acd62b1/go.mod h1:vR7hzQXu2zJy9AVAgeJqvqgH9Q5CA+iKCZ2gyEVpxRU=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20191125211704-12ad95a8df72/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-gl/glfw/v3.3/glfw v0.0.0-20200222043503-6f7a984d4dc4/go.mod h1:tQ2UAYgL5IevRw8kRxooKSPJfGvJ9fJQFa0TUsXzTg8=
github.com/go-jose/go-jose/v3 v3.0.3 h1:fFKWeig/irsp7XD2zBxvnmA/XaRWp5V3CBsZXJF7G7k=
github.com/go-jose/go-jose/v3 v3.0.3/go.mod h1:5b+7YgP7ZICgJDBdfjZaIt+H/9L9T/YQrVfLAMboGkQ=
github.com/go-kit/kit v0.8.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/kit v0.9.0/go.mod h1:xBxKIO96dXMWWy0MnWVtmwkA9/13aqxPnvrjFYMA2as=
github.com/go-kit/log v0.1.0/go.mod h1:zbhenjAZHb184qTLMA9ZjW7ThYL0H2mk7Q6pNt4vbaY=
Expand All @@ -484,6 +486,8 @@ github.com/go-pg/zerochecker v0.2.0 h1:pp7f72c3DobMWOb2ErtZsnrPaSvHd2W4o9//8HtF4
github.com/go-pg/zerochecker v0.2.0/go.mod h1:NJZ4wKL0NmTtz0GKCoJ8kym6Xn/EQzXRl2OnAe7MmDo=
github.com/go-quicktest/qt v1.101.0 h1:O1K29Txy5P2OK0dGo59b7b0LR6wKfIhttaAhHUyn7eI=
github.com/go-quicktest/qt v1.101.0/go.mod h1:14Bz/f7NwaXPtdYEgzsx46kqSxVwTbzVZsDC26tQJow=
github.com/go-resty/resty/v2 v2.15.3 h1:bqff+hcqAflpiF591hhJzNdkRsFhlB96CYfBwSFvql8=
github.com/go-resty/resty/v2 v2.15.3/go.mod h1:0fHAoK7JoBy/Ch36N8VFeMsK7xQOHhvWaC3iOktwmIU=
github.com/go-sourcemap/sourcemap v2.1.4+incompatible h1:a+iTbH5auLKxaNwQFg0B+TCYl6lbukKPc7b5x0n1s6Q=
github.com/go-sourcemap/sourcemap v2.1.4+incompatible/go.mod h1:F8jJfvm2KbVjc5NqelyYJmf/v5J0dwNLS2mL4sNA1Jg=
github.com/go-sql-driver/mysql v1.5.0/go.mod h1:DCzpHaOWr8IXmIStZouvnhqoel9Qv2LBy8hT2VhHyBg=
Expand Down Expand Up @@ -1485,8 +1489,8 @@ golang.org/x/time v0.0.0-20181108054448-85acf8d2951c/go.mod h1:tRJNPiyCQ0inRvYxb
golang.org/x/time v0.0.0-20190308202827-9d24e82272b4/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20191024005414-555d28b269f0/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.0.0-20211116232009-f0f3c7e86c11/go.mod h1:tRJNPiyCQ0inRvYxbN9jk5I+vvW/OXSQhTDSoE431IQ=
golang.org/x/time v0.5.0 h1:o7cqy6amK/52YcAKIPlM3a+Fpj35zvRj2TP+e1xFSfk=
golang.org/x/time v0.5.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/time v0.6.0 h1:eTDhh4ZXt5Qf0augr54TN6suAUudPcawVZeIAPU7D4U=
golang.org/x/time v0.6.0/go.mod h1:3BpzKBy/shNhVucY/MWOyx10tF3SFh9QdLuxbVysPQM=
golang.org/x/tools v0.0.0-20180525024113-a5b4c53f6e8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
golang.org/x/tools v0.0.0-20190114222345-bf090417da8b/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
Expand Down
12 changes: 9 additions & 3 deletions internal/cli/enterprise.go
Original file line number Diff line number Diff line change
Expand Up @@ -18,14 +18,17 @@ import (
"github.com/rs/xid"

"github.com/redpanda-data/connect/v4/internal/impl/kafka/enterprise"
"github.com/redpanda-data/connect/v4/internal/telemetry"
)

// InitEnterpriseCLI kicks off the benthos cli with a suite of options that adds
// all of the enterprise functionality of Redpanda Connect. This has been
// abstracted into a separate package so that multiple distributions (classic
// versus cloud) can reference the same code.
func InitEnterpriseCLI(binaryName, version, dateBuilt string, schema *service.ConfigSchema, opts ...service.CLIOptFunc) {
rpLogger := enterprise.NewTopicLogger(xid.New().String())
instanceID := xid.New().String()

rpLogger := enterprise.NewTopicLogger(instanceID)
var fbLogger *service.Logger

opts = append(opts,
Expand Down Expand Up @@ -57,8 +60,11 @@ func InitEnterpriseCLI(binaryName, version, dateBuilt string, schema *service.Co
rpLogger.SetFallbackLogger(l)
}),
service.CLIOptAddTeeLogger(slog.New(rpLogger)),
service.CLIOptOnConfigParse(func(fn *service.ParsedConfig) error {
return rpLogger.InitOutputFromParsed(fn.Namespace("redpanda"))
service.CLIOptOnConfigParse(func(pConf *service.ParsedConfig) error {
// Kick off telemetry exporter.
telemetry.ActivateExporter(instanceID, version, fbLogger, schema, pConf)

return rpLogger.InitOutputFromParsed(pConf.Namespace("redpanda"))
}),
service.CLIOptOnStreamStart(func(s *service.RunningStreamSummary) error {
rpLogger.SetStreamSummary(s)
Expand Down
44 changes: 44 additions & 0 deletions internal/telemetry/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
Telemetry
=========

## What is this for?

Our main goal is to find out the frequency with which each plugin is used in production environments, as this helps us prioritise enhancements and bugs for various plugin families on our roadmap.

Ideally, we'd also like to identify common patterns in plugin usage that may help us plan new work or identify gaps in our functionality. For example, if we were to see that almost all `aws_s3` outputs were paired with a `mutation` processor then we might conclude that embedding a mutation field into the plugin itself could be a useful feature.

## What is being sent?

When a Redpanda Connect instance exports telemetry data to our collection server it sends a JSON payload that contains a high-level and anonymous summary of the contents of the config file being executed. Specific field values are never transmitted, nor are decorations of the config such as label names. For example, with an instance running the following config:

```yaml
input:
label: fooer
generate:
interval: 1s
mapping: 'root.foo = "bar"'

output:
label: bazer
aws_s3:
bucket: baz
path: meow.txt
```
We would extract the following information:
- A unique identifier for the Redpanda Connect instance.
- The duration for which the config has been running thus far.
- That the config contains a `generate` input and an `aws_s3` output.
- The IP address of the running Redpanda Connect instance (as a byproduct of the data delivery mechanism).

The code responsible for extracting this data is simple enough to dig into, and we encourage curious users to do so. A good place to start is the data format, which can be found at [`./payload.go`](./payload.go).

## When is it sent?

Telemetry data is only sent from an instance of Redpanda Connect that has been running for a significant period of time (e.g. 7 days), this is in order to avoid sending data from instances used for testing or experimentation. Once telemetry data starts being emitted it is sent once per hour.

## How do I avoid it?

Any custom build of Redpanda Connect will not send this data, as it is only included in the build artifacts published by us either through Github releases or our official Docker images.

84 changes: 84 additions & 0 deletions internal/telemetry/payload.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
// Copyright 2024 Redpanda Data, Inc.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

package telemetry

import (
"fmt"
"time"

"github.com/redpanda-data/benthos/v4/public/service"
)

// Information gathered from each component present in the running config.
type componentInfo struct {
// The type (input, output, etc) of the plugin.
Type string `json:"type"`

// The name (aws_s3, generate, etc) of the plugin.
Name string `json:"name"`
}

// Contains all of the information which is delivered during a telemetry
// export, serialisable in JSON format.
type payload struct {
// A unique identifier for the Redpanda Connect instance.
ID string `json:"id"`

// Uptime of the Redpanda Connect instance.
Uptime int64 `json:"uptime"`

// A slice representing each component within a config.
Components []componentInfo `json:"components"`
}

// All information sent during a telemetry export is extracted within this
// function and stored within the payload.
func extractPayload(identifier string, schema *service.ConfigSchema, conf *service.ParsedConfig) (*payload, error) {
p := payload{ID: identifier, Uptime: 0}

rootValue, err := conf.FieldAny()
if err != nil {
return nil, fmt.Errorf("failed to obtain root of config: %w", err)
}

if err := schema.NewStreamConfigWalker().WalkComponentsAny(rootValue, func(w *service.WalkedComponent) error {
p.Components = append(p.Components, componentInfo{
Type: w.ComponentType,
Name: w.Name,
})
return nil
}); err != nil {
return nil, fmt.Errorf("failed to walk config: %w", err)
}

return &p, nil
}

// This function runs asynchronously and is solely where telemetry data is
// exported.
func exporterLoop(p *payload, exportDelay, exportPeriod time.Duration, exporter *telemetryExporter) {
started := time.Now()

// First, wait until after the export delay has passed.
time.Sleep(exportDelay)

for {
p.Uptime = int64(time.Since(started) / time.Second)
exporter.export(p)

// Now wait for the next export.
time.Sleep(exportPeriod)
}
}
Loading

0 comments on commit a6e3796

Please sign in to comment.