Skip to content

Commit

Permalink
Merge pull request #75 from kerthcet/refactor/readme
Browse files Browse the repository at this point in the history
Prepare for v0.0.2
  • Loading branch information
InftyAI-Agent authored Aug 8, 2024
2 parents 6407c4c + a2374e1 commit 67f78a7
Show file tree
Hide file tree
Showing 11 changed files with 25 additions and 31 deletions.
14 changes: 5 additions & 9 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -178,25 +178,21 @@ image-build:
--build-arg BASE_IMAGE=$(BASE_IMAGE) \
--build-arg BUILDER_IMAGE=$(BUILDER_IMAGE) \
--build-arg CGO_ENABLED=$(CGO_ENABLED) \
$(PUSH) \
$(LOAD) \
$(IMAGE_BUILD_EXTRA_OPTS) ./
image-push: PUSH=--push
image-push: IMAGE_BUILD_EXTRA_OPTS=--push
image-push: image-build
image-load: LOAD=--load
image-load: IMAGE_BUILD_EXTRA_OPTS=--load
image-load: image-load

.PHONY: loader-image-build
loader-image-build:
$(IMAGE_BUILD_CMD) -t $(LOADER_IMG) \
-f Dockerfile.loader \
$(PUSH) \
$(LOAD) \
$(IMAGE_BUILD_EXTRA_OPTS) ./
loader-image-push: PUSH=--push
loader-image-push: IMAGE_BUILD_EXTRA_OPTS=--push
loader-image-push: loader-image-build

loader-image-load: LOAD=--load
loader-image-load: IMAGE_BUILD_EXTRA_OPTS=--load
loader-image-load: loader-image-build

KIND = $(shell pwd)/bin/kind
Expand All @@ -206,7 +202,7 @@ kind:

.PHONY: kind-image-build
kind-image-build: PLATFORMS=linux/amd64
kind-image-build: LOAD=--load
kind-image-build: IMAGE_BUILD_EXTRA_OPTS=--load
kind-image-build: kind image-build

##@ Deployment
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
## Feature Overview

- **User Friendly**: People can quick deploy a LLM service with minimal configurations.
- **High performance**: llmaz supports a wide range of advanced inference backends for high performance, like [vLLM](https://github.com/vllm-project/vllm), [SGLang](https://github.com/sgl-project/sglang). Find the full list of supported backends [here](./docs/support-backends.md).
- **High Performance**: llmaz supports a wide range of advanced inference backends for high performance, like [vLLM](https://github.com/vllm-project/vllm), [SGLang](https://github.com/sgl-project/sglang). Find the full list of supported backends [here](./docs/support-backends.md).
- **Scaling Efficiency (WIP)**: llmaz works smoothly with autoscaling components like [Cluster-Autoscaler](https://github.com/kubernetes/autoscaler/tree/master/cluster-autoscaler) or [Karpenter](https://github.com/kubernetes-sigs/karpenter) to support elastic scenarios.
- **Accelerator Fungibility (WIP)**: llmaz supports serving the same LLM with various accelerators to optimize cost and performance.
- **SOTA Inference (WIP)**: llmaz supports the latest cutting-edge researches like [Speculative Decoding](https://arxiv.org/abs/2211.17192) or [Splitwise](https://arxiv.org/abs/2311.18677) to run on Kubernetes.
Expand All @@ -35,7 +35,7 @@ is to apply the `Model` and `Playground` yamls.

Please refer to **[examples](/docs/examples/README.md)** to learn more.

> Note: if your model needs Huggingface token for weight downloads, please run `kubectl create secret generic model-secret --from-literal=HF_TOKEN=<your token>` ahead.
> Note: if your model needs Huggingface token for weight downloads, please run `kubectl create secret generic model-secret --from-literal=HF_TOKEN=<your token>` ahead.
#### Model

Expand Down
1 change: 0 additions & 1 deletion docs/examples/vllm-huggingface/model.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ spec:
source:
modelHub:
modelID: facebook/opt-125m
revision: main
inferenceFlavors:
- name: t4 # GPU type
requests:
Expand Down
4 changes: 2 additions & 2 deletions docs/installation.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ LWS_VERSION=v0.3.0
kubectl apply --server-side -f https://github.com/kubernetes-sigs/lws/releases/download/$LWS_VERSION/manifests.yaml
# llmaz runs in llmaz-system
LLMAZ_VERSION=v0.0.1
LLMAZ_VERSION=v0.0.2
kubectl apply --server-side -f https://github.com/inftyai/llmaz/releases/download/$LLMAZ_VERSION/manifests.yaml
```

Expand All @@ -24,7 +24,7 @@ kubectl apply --server-side -f https://github.com/inftyai/llmaz/releases/downloa
LWS_VERSION=v0.3.0
kubectl delete -f https://github.com/kubernetes-sigs/lws/releases/download/$LWS_VERSION/manifests.yaml
LLMAZ_VERSION=v0.0.1
LLMAZ_VERSION=v0.0.2
kubectl delete -f https://github.com/inftyai/llmaz/releases/download/$LLMAZ_VERSION/manifests.yaml
```

Expand Down
6 changes: 3 additions & 3 deletions pkg/controller/inference/service_controller.go
Original file line number Diff line number Diff line change
Expand Up @@ -142,15 +142,15 @@ func buildWorkloadApplyConfiguration(service *inferenceapi.Service, model *corea
}

func injectModelProperties(template *applyconfigurationv1.LeaderWorkerTemplateApplyConfiguration, model *coreapi.Model) {
modelSource := modelSource.NewDataSourceProvider(model)
source := modelSource.NewModelSourceProvider(model)

template.WorkerTemplate.Labels = util.MergeKVs(template.WorkerTemplate.Labels, modelLabels(model))

injectModelLoader(template, modelSource)
injectModelLoader(template, source)
injectModelFlavor(template, model)
}

func injectModelLoader(template *applyconfigurationv1.LeaderWorkerTemplateApplyConfiguration, source modelSource.DataSourceProvider) {
func injectModelLoader(template *applyconfigurationv1.LeaderWorkerTemplateApplyConfiguration, source modelSource.ModelSourceProvider) {
source.InjectModelLoader(template.WorkerTemplate)
}

Expand Down
8 changes: 4 additions & 4 deletions pkg/controller_helper/backend/sglang.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
coreapi "inftyai.com/llmaz/api/core/v1alpha1"
inferenceapi "inftyai.com/llmaz/api/inference/v1alpha1"
"inftyai.com/llmaz/pkg"
source "inftyai.com/llmaz/pkg/controller_helper/model_source"
modelSource "inftyai.com/llmaz/pkg/controller_helper/model_source"
)

var _ Backend = (*SGLANG)(nil)
Expand Down Expand Up @@ -66,10 +66,10 @@ func (s *SGLANG) DefaultCommands() []string {
}

func (s *SGLANG) DefaultArgs(model *coreapi.Model) []string {
modelSource := source.NewDataSourceProvider(model)
source := modelSource.NewModelSourceProvider(model)
return []string{
"--model-path", modelSource.ModelPath(),
"--served-model-name", modelSource.ModelName(),
"--model-path", source.ModelPath(),
"--served-model-name", source.ModelName(),
"--host", "0.0.0.0",
"--port", strconv.Itoa(pkg.DEFAULT_BACKEND_PORT),
}
Expand Down
8 changes: 4 additions & 4 deletions pkg/controller_helper/backend/vllm.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
coreapi "inftyai.com/llmaz/api/core/v1alpha1"
inferenceapi "inftyai.com/llmaz/api/inference/v1alpha1"
"inftyai.com/llmaz/pkg"
source "inftyai.com/llmaz/pkg/controller_helper/model_source"
modelSource "inftyai.com/llmaz/pkg/controller_helper/model_source"
)

var _ Backend = (*VLLM)(nil)
Expand Down Expand Up @@ -66,10 +66,10 @@ func (v *VLLM) DefaultCommands() []string {
}

func (v *VLLM) DefaultArgs(model *coreapi.Model) []string {
modelSource := source.NewDataSourceProvider(model)
source := modelSource.NewModelSourceProvider(model)
return []string{
"--model", modelSource.ModelPath(),
"--served-model-name", modelSource.ModelName(),
"--model", source.ModelPath(),
"--served-model-name", source.ModelName(),
"--port", strconv.Itoa(pkg.DEFAULT_BACKEND_PORT),
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ import (
"k8s.io/utils/ptr"
)

var _ DataSourceProvider = &ModelHubProvider{}
var _ ModelSourceProvider = &ModelHubProvider{}

type ModelHubProvider struct {
model *coreapi.Model
Expand Down
4 changes: 2 additions & 2 deletions pkg/controller_helper/model_source/modelsource.go
Original file line number Diff line number Diff line change
Expand Up @@ -22,13 +22,13 @@ import (
coreapi "inftyai.com/llmaz/api/core/v1alpha1"
)

type DataSourceProvider interface {
type ModelSourceProvider interface {
ModelName() string
ModelPath() string
InjectModelLoader(*corev1.PodTemplateSpec)
}

func NewDataSourceProvider(model *coreapi.Model) DataSourceProvider {
func NewModelSourceProvider(model *coreapi.Model) ModelSourceProvider {
if model.Spec.Source.ModelHub != nil {
return &ModelHubProvider{model: model}
}
Expand Down
2 changes: 1 addition & 1 deletion pkg/controller_helper/model_source/modelsource_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ func Test_ModelSourceProvider(t *testing.T) {

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
provider := NewDataSourceProvider(tc.model)
provider := NewModelSourceProvider(tc.model)
if tc.wantModelName != provider.ModelName() {
t.Fatalf("unexpected model name, want %s, got %s", tc.wantModelName, provider.ModelName())
}
Expand Down
3 changes: 1 addition & 2 deletions test/e2e/suit_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@ import (
"sigs.k8s.io/controller-runtime/pkg/log/zap"
lws "sigs.k8s.io/lws/api/leaderworkerset/v1"

api "inftyai.com/llmaz/api/core/v1alpha1"
coreapi "inftyai.com/llmaz/api/core/v1alpha1"
inferenceapi "inftyai.com/llmaz/api/inference/v1alpha1"
"inftyai.com/llmaz/test/util"
Expand Down Expand Up @@ -103,6 +102,6 @@ func readyForTesting(client client.Client) {
// Delete this model before beginning tests.
Expect(client.Delete(ctx, model))
Eventually(func() error {
return client.Get(ctx, types.NamespacedName{Name: model.Name, Namespace: model.Namespace}, &api.Model{})
return client.Get(ctx, types.NamespacedName{Name: model.Name, Namespace: model.Namespace}, &coreapi.Model{})
}).ShouldNot(Succeed())
}

0 comments on commit 67f78a7

Please sign in to comment.