forked from awslabs/LISA
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample_config.yaml
257 lines (256 loc) · 9.07 KB
/
example_config.yaml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
env: dev
dev:
appName: lisa
profile:
deploymentName:
accountNumber: 012345678901
region: us-east-1
deploymentStage: dev
removalPolicy: destroy
runCdkNag: false
# lambdaLayerAssets:
# authorizerLayerPath: /path/to/authorizer_layer.zip
# commonLayerPath: /path/to/common_layer.zip
# ragLayerPath: /path/to/rag_layer.zip
# sdkLayerPath: /path/to/sdk_layer.zip
# stackSynthesizer: CliCredentialsStackSynthesizer
# permissionsBoundaryAspect:
# permissionsBoundaryPolicyName: CustomPermissionBoundary
# rolePrefix: CustomPrefix
# policyPrefix: CustomPrefix
# instanceProfilePrefix: CustomPrefix
# systemBanner:
# text: 'LISA System'
# backgroundColor: orange
# fontColor: black
s3BucketModels: hf-models-gaiic
# aws partition mountS3 package location
mountS3DebUrl: https://s3.amazonaws.com/mountpoint-s3-release/latest/x86_64/mount-s3.deb
# aws-iso partition mountS3 package location
# mountS3DebUrl: https://mountpoint-s3-release-us-iso-east-1.s3.us-iso-east-1.c2s.ic.gov/latest/x86_64/mount-s3.deb
# aws-iso-b partition mountS3 package location
# mountS3DebUrl: https://mountpoint-s3-release-us-isob-east-1.s3.us-isob-east-1.sc2s.sgov.gov/latest/x86_64/mount-s3.deb
accountNumbersEcr:
- 012345678901
deployRag: true
deployChat: true
deployUi: true
lambdaConfig:
pythonRuntime: PYTHON_3_10
logLevel: DEBUG
vpcAutoscalingConfig:
provisionedConcurrentExecutions: 5
minCapacity: 1
maxCapacity: 50
targetValue: 0.80
cooldown: 30
authConfig:
authority:
clientId:
logLevel: DEBUG
# NOTE: The following configuration will allow for using a custom domain for the chat user interface.
# If this option is specified, the API Gateway invocation URL will NOT work on its own as the application URL.
# Users must use the custom domain for the user interface to work if this option is populated.
apiGatewayConfig:
domainName:
restApiConfig:
apiVersion: v2
instanceType: m5.large
containerConfig:
image:
baseImage: python:3.9
path: lib/serve/rest-api
type: asset
healthCheckConfig:
command: ["CMD-SHELL", "exit 0"]
interval: 10
startPeriod: 30
timeout: 5
retries: 3
autoScalingConfig:
minCapacity: 1
maxCapacity: 1
cooldown: 60
defaultInstanceWarmup: 60
metricConfig:
AlbMetricName: RequestCountPerTarget
targetValue: 1000
duration: 60
estimatedInstanceWarmup: 30
internetFacing: true
loadBalancerConfig:
sslCertIamArn: arn:aws:iam::012345678901:server-certificate/lisa-self-signed-dev
healthCheckConfig:
path: /health
interval: 60
timeout: 30
healthyThresholdCount: 2
unhealthyThresholdCount: 10
domainName:
ragRepositories:
- repositoryId: pgvector-rag
type: pgvector
rdsConfig:
username: postgres
# - repositoryId: default
# type: opensearch
# opensearchConfig:
# dataNodes: 2
# dataNodeInstanceType: r6g.large.search
# masterNodes: 0
# masterNodeInstanceType: r6g.large.search
# volumeSize: 300
# If adding an existing PGVector database, this configurations assumes:
# 1. The database has been configured to have pgvector installed and enabled: https://aws.amazon.com/about-aws/whats-new/2023/05/amazon-rds-postgresql-pgvector-ml-model-integration/
# 2. The database is accessible by RAG-related lambda functions (add inbound PostgreSQL access on the database's security group for all Lambda RAG security groups)
# 3. A secret ID exists in SecretsManager holding the database password within a json block of '{"password":"your_password_here"}'. This is the same format that RDS natively provides a password in SecretsManager.
# If the passwordSecretId or dbHost are not provided, then a sample database will be created for you. Only the username is required.
# - repositoryId: pgvector-rag
# type: pgvector
# rdsConfig:
# username: postgres
# passwordSecretId: # password ID as stored in SecretsManager. Example: "rds!db-aa88493d-be8d-4a3f-96dc-c668165f7826"
# dbHost: # Host name of database. Example hostname from RDS: "my-db-name.291b2f03.us-east-1.rds.amazonaws.com"
# dbName: postgres
ragFileProcessingConfig:
chunkSize: 512
chunkOverlap: 51
ecsModels:
- modelName: mistralai/Mistral-7B-Instruct-v0.2
modelId: mistral7b
deploy: true
streaming: true
modelType: textgen
instanceType: g5.xlarge
inferenceContainer: tgi
containerConfig:
image:
baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1
path: lib/serve/ecs-model/textgen/tgi
type: asset
sharedMemorySize: 2048
healthCheckConfig:
command: ["CMD-SHELL", "exit 0"]
interval: 10
startPeriod: 30
timeout: 5
retries: 3
environment:
MAX_CONCURRENT_REQUESTS: 128
MAX_INPUT_LENGTH: 1024
MAX_TOTAL_TOKENS: 2048
autoScalingConfig:
minCapacity: 1
maxCapacity: 1
defaultInstanceWarmup: 180
cooldown: 420
metricConfig:
AlbMetricName: RequestCountPerTarget
targetValue: 30
duration: 60
estimatedInstanceWarmup: 330
loadBalancerConfig:
healthCheckConfig:
path: /health
interval: 60
timeout: 30
healthyThresholdCount: 2
unhealthyThresholdCount: 10
- modelName: intfloat/e5-large-v2
modelId: e5v2
deploy: true
modelType: embedding
instanceType: g5.xlarge
inferenceContainer: tei
containerConfig:
image:
baseImage: ghcr.io/huggingface/text-embeddings-inference:1.2.3
path: lib/serve/ecs-model/embedding/tei
type: asset
sharedMemorySize: 2048
healthCheckConfig:
command: ["CMD-SHELL", "exit 0"]
interval: 10
startPeriod: 30
timeout: 5
retries: 3
environment:
MAX_CONCURRENT_REQUESTS: 512
MAX_CLIENT_BATCH_SIZE: 1024
MAX_BATCH_TOKENS: 16384
autoScalingConfig:
minCapacity: 1
maxCapacity: 1
cooldown: 420
defaultInstanceWarmup: 180
metricConfig:
AlbMetricName: RequestCountPerTarget
targetValue: 60
duration: 60
estimatedInstanceWarmup: 330
loadBalancerConfig:
healthCheckConfig:
path: /health
interval: 30
timeout: 10
healthyThresholdCount: 2
unhealthyThresholdCount: 10
# - modelName: mistralai/Mixtral-8x7B-Instruct-v0.1
# deploy: true
# streaming: true
# modelType: textgen
# instanceType: g5.12xlarge
# inferenceContainer: tgi
# containerConfig:
# image:
# baseImage: ghcr.io/huggingface/text-generation-inference:2.0.1
# path: lib/serve/ecs-model/textgen/tgi
# type: asset
# sharedMemorySize: 2048
# healthCheckConfig:
# command: ["CMD-SHELL", "exit 0"]
# interval: 10
# startPeriod: 30
# timeout: 5
# retries: 3
# environment:
# QUANTIZE: bitsandbytes-nf4
# MAX_CONCURRENT_REQUESTS: 128
# MAX_INPUT_LENGTH: 1024
# MAX_TOTAL_TOKENS: 2048
# autoScalingConfig:
# minCapacity: 1
# maxCapacity: 1
# defaultInstanceWarmup: 180
# cooldown: 420
# metricConfig:
# AlbMetricName: RequestCountPerTarget
# targetValue: 30
# duration: 60
# estimatedInstanceWarmup: 330
# loadBalancerConfig:
# healthCheckConfig:
# path: /health
# interval: 60
# timeout: 30
# healthyThresholdCount: 2
# unhealthyThresholdCount: 10
# LiteLLM Config options found here: https://litellm.vercel.app/docs/proxy/configs#all-settings
# Anything within this config is copied to a configuration for starting LiteLLM in the REST API container.
# It is suggested to put an "ignored" API key so that calls to locally hosted models don't fail on OpenAI calls
# from LiteLLM.
# We added `lisa_params` to add additional metadata for interaction with the Chat UI. Specify if the model is a
# textgen or embedding model, and if it is textgen, specify whether it supports streaming. If embedding, then
# omit the `streaming` parameter. When defining the model list, the `lisa_params` will be an object in the model
# definition that will have the `model_type` and `streaming` fields in it. A commented example is provided below.
litellmConfig:
litellm_settings:
telemetry: false # Don't try to send telemetry to LiteLLM servers.
model_list: # Add any of your existing (not LISA-hosted) models here.
# - model_name: mymodel
# litellm_params:
# model: openai/myprovider/mymodel
# api_key: ignored
# lisa_params:
# model_type: textgen
# streaming: true