This repository has been archived by the owner on May 24, 2024. It is now read-only.
-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #3 from LF-Engineering/implement-kinesis-firehose-…
…wrapper Implement kinesis firehose wrapper with chunks
- Loading branch information
Showing
8 changed files
with
1,049 additions
and
131 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,277 @@ | ||
package firehose | ||
|
||
import ( | ||
"context" | ||
"encoding/json" | ||
"errors" | ||
"fmt" | ||
"log" | ||
"os" | ||
|
||
"github.com/aws/aws-sdk-go-v2/aws" | ||
"github.com/aws/aws-sdk-go-v2/config" | ||
"github.com/aws/aws-sdk-go-v2/service/firehose" | ||
"github.com/aws/aws-sdk-go-v2/service/firehose/types" | ||
) | ||
|
||
const ( | ||
region = "AWS_REGION" | ||
defaultRegion = "us-east-1" | ||
maxChunkSize = 1020000 | ||
) | ||
|
||
//Config aws configuration | ||
type Config struct { | ||
Endpoint string | ||
Region string | ||
} | ||
|
||
// PutResponse per record | ||
type PutResponse struct { | ||
RecordID string | ||
Error error | ||
} | ||
|
||
// ClientProvider for kinesis firehose | ||
type ClientProvider struct { | ||
firehose *firehose.Client | ||
region string | ||
endPoint string | ||
} | ||
|
||
// NewClientProvider initiate new client provider | ||
func NewClientProvider() (*ClientProvider, error) { | ||
c := &ClientProvider{} | ||
c.region = os.Getenv(region) | ||
if c.region == "" { | ||
log.Printf("No AWS Region found for env var AWS_REGION. setting defaultRegion=%s \n", defaultRegion) | ||
c.region = defaultRegion | ||
} | ||
|
||
if os.Getenv("LOCALSTACK_HOSTNAME") != "" { | ||
c.endPoint = os.Getenv("LOCALSTACK_HOSTNAME") | ||
} | ||
|
||
customResolver := aws.EndpointResolverFunc(func(service, region string) (aws.Endpoint, error) { | ||
if c.endPoint != "" { | ||
return aws.Endpoint{ | ||
URL: fmt.Sprintf("http://%s:4566", c.endPoint), | ||
SigningRegion: c.region, | ||
}, nil | ||
} | ||
|
||
// returning EndpointNotFoundError will allow the service to fall back to its default resolution | ||
return aws.Endpoint{}, &aws.EndpointNotFoundError{} | ||
}) | ||
|
||
cfg, err := config.LoadDefaultConfig(context.Background(), | ||
config.WithRegion(c.region), | ||
config.WithEndpointResolver(customResolver), | ||
) | ||
if err != nil { | ||
return nil, err | ||
} | ||
c.firehose = firehose.NewFromConfig(cfg) | ||
|
||
return c, nil | ||
} | ||
|
||
// CreateDeliveryStream creating firehose delivery stream channel | ||
// You must provide channel name as required parameter | ||
// If channel created successfully it will return nil else it will return error | ||
func (c *ClientProvider) CreateDeliveryStream(channel string) error { | ||
params := &firehose.CreateDeliveryStreamInput{ | ||
DeliveryStreamName: aws.String(channel), | ||
DeliveryStreamType: types.DeliveryStreamTypeDirectPut, | ||
} | ||
_, err := c.firehose.CreateDeliveryStream(context.Background(), params) | ||
return err | ||
} | ||
|
||
// PutRecordBatch is operation for Amazon Kinesis Firehose | ||
// Writes multiple data records into a delivery stream in a single call, which | ||
// can achieve higher throughput per producer than when writing single records. | ||
// | ||
// Each PutRecordBatch request supports up to 500 records. Each record in the | ||
// request can be as large as 1,000 KB (before 64-bit encoding), up to a limit | ||
// of 4 MB for the entire request. | ||
// | ||
// You must specify the name of the delivery stream and the data record when | ||
// using PutRecord. The data record consists of a data blob that can be up to | ||
// 1,000 KB in size. | ||
// | ||
// The PutRecordBatch response includes a map of failed records. | ||
// Even if the PutRecordBatch call succeeds | ||
// | ||
// Data records sent to Kinesis Data Firehose are stored for 24 hours from the | ||
// time they are added to a delivery stream as it attempts to send the records | ||
// to the destination. If the destination is unreachable for more than 24 hours, | ||
// the data is no longer available. | ||
// | ||
// Don't concatenate two or more base64 strings to form the data fields of your | ||
// records. Instead, concatenate the raw data, then perform base64 encoding. | ||
func (c *ClientProvider) PutRecordBatch(channel string, records []interface{}) ([]*PutResponse, error) { | ||
ch := make(chan *chanPutResponse) | ||
chunk := make([]interface{}, 0) | ||
requestCounter := 0 | ||
smallerChunks := make([][]byte, 0) | ||
for _, record := range records { | ||
r, err := json.Marshal(record) | ||
if err != nil { | ||
return []*PutResponse{}, err | ||
} | ||
if len(r) > maxChunkSize { | ||
smallerChunks = c.chunkSlice(r, maxChunkSize) | ||
} | ||
chunkSize, err := json.Marshal(chunk) | ||
if err != nil { | ||
return []*PutResponse{}, err | ||
} | ||
if (len(chunkSize)+len(r)) < 3670016 && len(chunk) < 500 { | ||
if len(smallerChunks) > 0 { | ||
var smallChunk interface{} | ||
for _, c := range smallerChunks { | ||
err := json.Unmarshal(c, &smallChunk) | ||
if err != nil { | ||
return []*PutResponse{}, err | ||
} | ||
chunk = append(chunk, smallChunk) | ||
} | ||
// TODO: reset smallChunks slice to 0? | ||
} else { | ||
chunk = append(chunk, record) | ||
} | ||
} else { | ||
requestCounter++ | ||
go func() { | ||
result, err := c.send(channel, chunk) | ||
if err != nil { | ||
ch <- &chanPutResponse{Error: err} | ||
} | ||
ch <- &chanPutResponse{Result: result} | ||
}() | ||
|
||
chunk = make([]interface{}, 0) | ||
chunk = append(chunk, record) | ||
} | ||
} | ||
|
||
if len(chunk) > 0 { | ||
requestCounter++ | ||
go func() { | ||
result, err := c.send(channel, chunk) | ||
if err != nil { | ||
ch <- &chanPutResponse{Error: err} | ||
} | ||
ch <- &chanPutResponse{Result: result} | ||
}() | ||
} | ||
|
||
var res []*PutResponse | ||
for i := 0; i < requestCounter; i++ { | ||
select { | ||
case r := <-ch: | ||
if r.Error != nil { | ||
return []*PutResponse{}, r.Error | ||
} | ||
res = append(res, r.Result...) | ||
} | ||
} | ||
|
||
return res, nil | ||
} | ||
|
||
// chunkSlice creates small chunks that are less than or equal to 1 MB | ||
func (c *ClientProvider) chunkSlice(slice []byte, chunkSize int) [][]byte { | ||
var chunks [][]byte | ||
for i := 0; i < len(slice); i += chunkSize { | ||
end := i + chunkSize | ||
|
||
if end > len(slice) { | ||
end = len(slice) | ||
} | ||
|
||
chunks = append(chunks, slice[i:end]) | ||
} | ||
|
||
return chunks | ||
} | ||
|
||
// PutRecord is operation for Amazon Kinesis Firehose. | ||
// Writes a single data record into an Amazon Kinesis Data Firehose delivery | ||
// stream. | ||
// | ||
// By default, each delivery stream can take in up to 2,000 transactions per | ||
// second, 5,000 records per second, or 5 MB per second. | ||
// | ||
// You must specify the name of the delivery stream and the data record when | ||
// using PutRecord. The data record consists of a data blob that can be up to | ||
// 1,000 KB in size, and any kind of data. You must specify the name of the delivery stream and the data record when | ||
// using PutRecord. The data record consists of a data blob that can be up to | ||
// 1,000 KB in size, and any kind of data. | ||
// | ||
// Kinesis Data Firehose buffers records before delivering them to the destination. | ||
// To disambiguate the data blobs at the destination, a common solution is to | ||
// use delimiters in the data, such as a newline (\n) or some other character | ||
// unique within the data. This allows the consumer application to parse individual | ||
// data items when reading the data from the destination. | ||
// | ||
// The PutRecord operation returns a RecordId, which is a unique string assigned | ||
// to each record. | ||
func (c *ClientProvider) PutRecord(channel string, record interface{}) (*PutResponse, error) { | ||
b, err := json.Marshal(record) | ||
if err != nil { | ||
return &PutResponse{}, err | ||
} | ||
if len(b) > 1020000 { | ||
return &PutResponse{}, errors.New("record exceeded the limit of 1 mb") | ||
} | ||
|
||
params := &firehose.PutRecordInput{ | ||
DeliveryStreamName: aws.String(channel), | ||
Record: &types.Record{Data: b}, | ||
} | ||
res, err := c.firehose.PutRecord(context.Background(), params) | ||
if err != nil { | ||
return &PutResponse{}, err | ||
} | ||
return &PutResponse{RecordID: *res.RecordId, Error: nil}, nil | ||
} | ||
|
||
func (c *ClientProvider) send(channel string, records []interface{}) ([]*PutResponse, error) { | ||
inputs := make([]types.Record, 0) | ||
for _, r := range records { | ||
b, err := json.Marshal(r) | ||
if err != nil { | ||
return []*PutResponse{}, err | ||
} | ||
inputs = append(inputs, types.Record{Data: b}) | ||
} | ||
|
||
params := &firehose.PutRecordBatchInput{ | ||
DeliveryStreamName: aws.String(channel), | ||
Records: inputs, | ||
} | ||
recordBatch, err := c.firehose.PutRecordBatch(context.Background(), params) | ||
if err != nil { | ||
return []*PutResponse{}, err | ||
} | ||
|
||
res := make([]*PutResponse, 0) | ||
for _, r := range recordBatch.RequestResponses { | ||
var err error | ||
if r.ErrorMessage != nil { | ||
err = errors.New(*r.ErrorMessage) | ||
} | ||
|
||
if r.RecordId != nil { | ||
res = append(res, &PutResponse{RecordID: *r.RecordId, Error: err}) | ||
} | ||
} | ||
return res, nil | ||
} | ||
|
||
type chanPutResponse struct { | ||
Result []*PutResponse | ||
Error error | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,16 +1,12 @@ | ||
module github.com/LF-Engineering/insights-datasource-shared | ||
|
||
go 1.16 | ||
go 1.15 | ||
|
||
require ( | ||
github.com/LF-Engineering/dev-analytics-libraries v1.1.20 | ||
github.com/avast/retry-go v3.0.0+incompatible | ||
github.com/dgrijalva/jwt-go v3.2.0+incompatible | ||
github.com/elastic/go-elasticsearch/v8 v8.0.0-20210817124755-97fca1753fd7 | ||
github.com/google/uuid v1.3.0 | ||
github.com/aws/aws-sdk-go-v2 v1.8.0 | ||
github.com/aws/aws-sdk-go-v2/config v1.6.0 | ||
github.com/aws/aws-sdk-go-v2/service/firehose v1.4.2 | ||
github.com/json-iterator/go v1.1.11 | ||
github.com/pkg/errors v0.9.1 | ||
github.com/stretchr/testify v1.7.0 | ||
github.com/stretchr/testify v1.6.1 | ||
golang.org/x/text v0.3.7 | ||
gopkg.in/resty.v1 v1.12.0 | ||
) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.