Skip to content

Commit

Permalink
Merge pull request #157 from kerthcet/cleanup/support-release-helm
Browse files Browse the repository at this point in the history
Refactor backendRuntime
  • Loading branch information
InftyAI-Agent authored Sep 12, 2024
2 parents b236943 + e8fea37 commit e2f0786
Show file tree
Hide file tree
Showing 35 changed files with 183 additions and 155 deletions.
12 changes: 0 additions & 12 deletions .github/workflows/publish-helm-chart.yaml

This file was deleted.

3 changes: 1 addition & 2 deletions Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -295,15 +295,14 @@ $(HELMIFY): $(LOCALBIN)

.PHONY: helm
helm: manifests kustomize helmify
$(KUBECTL) create namespace llmaz-system --dry-run=client -o yaml | $(KUBECTL) apply -f -
$(KUSTOMIZE) build config/default | $(HELMIFY) -crd-dir

.PHONY: helm-install
helm-install: helm
helm upgrade --install llmaz ./chart --namespace llmaz-system --create-namespace -f ./chart/values.global.yaml

.PHONY: helm-package
helm-package:
helm-package: helm
# Make sure will alwasy start with a new line.
printf "\n" >> ./chart/values.yaml
cat ./chart/values.global.yaml >> ./chart/values.yaml
Expand Down
8 changes: 6 additions & 2 deletions api/core/v1alpha1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -131,10 +131,14 @@ const (
DraftRole ModelRole = "draft"
)

type ModelRepresentative struct {
// ModelRefer refers to a created Model with it's role.
type ModelRefer struct {
// Name represents the model name.
Name ModelName `json:"name"`
// Role represents the model role once more than one model is required.
// Such as a draft role, which means running with SpeculativeDecoding,
// and default arguments for backend will be searched in backendRuntime
// with the name of speculative-decoding.
// +kubebuilder:validation:Enum={main,draft}
// +kubebuilder:default=main
// +optional
Expand All @@ -148,7 +152,7 @@ type ModelClaims struct {
// speculative decoding, then one model is main(target) model, another one
// is draft model.
// +kubebuilder:validation:MinItems=1
Models []ModelRepresentative `json:"models,omitempty"`
Models []ModelRefer `json:"models,omitempty"`
// InferenceFlavors represents a list of flavors with fungibility supported
// to serve the model.
// - If not set, always apply with the 0-index model by default.
Expand Down
10 changes: 5 additions & 5 deletions api/core/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

24 changes: 10 additions & 14 deletions api/inference/v1alpha1/backendruntime_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,16 +21,15 @@ import (
metav1 "k8s.io/apimachinery/pkg/apis/meta/v1"
)

type InferenceMode string

const (
DefaultInferenceMode InferenceMode = "Default"
SpeculativeDecodingInferenceMode InferenceMode = "SpeculativeDecoding"
)

// BackendRuntimeArg is preset arguments for easy to use.
// Do not edit the preset names unless set the argument name explicitly
// in Playground backendRuntimeConfig.
type BackendRuntimeArg struct {
Mode InferenceMode `json:"mode"`
Flags []string `json:"flags,omitempty"`
// Name represents the identifier of the backendRuntime argument.
Name string `json:"name"`
// Flags represents all the preset configurations.
// Flag around with {{ .CONFIG }} is a configuration waiting for render.
Flags []string `json:"flags,omitempty"`
}

// BackendRuntimeSpec defines the desired state of BackendRuntime
Expand All @@ -43,11 +42,8 @@ type BackendRuntimeSpec struct {
// Version represents the default version of the backendRuntime.
// It will be appended to the image as a tag.
Version string `json:"version"`
// Args represents the args of the backendRuntime.
// They can be appended or overwritten by the Playground args.
// The key is the inference option, like default one or advanced
// speculativeDecoding, the values are the corresponding args.
// Flag around with {{ .XXX }} is a flag waiting for render.
// Args represents the preset arguments of the backendRuntime.
// They can be appended or overwritten by the Playground backendRuntimeConfig.
Args []BackendRuntimeArg `json:"args,omitempty"`
// Envs represents the environments set to the container.
// +optional
Expand Down
4 changes: 2 additions & 2 deletions chart/Chart.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@ type: application
# This is the chart version. This version number should be incremented each time you make changes
# to the chart and its templates, including the app version.
# Versions are expected to follow Semantic Versioning (https://semver.org/)
version: 0.0.2
version: 0.0.3
# This is the version number of the application being deployed. This version number should be
# incremented each time you make changes to the application. Versions are not expected to
# follow Semantic Versioning. They should reflect the version the application is using.
# It is recommended to use it with quotes.
appVersion: "0.0.6"
appVersion: 0.0.7
20 changes: 13 additions & 7 deletions chart/crds/backendruntime-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -42,21 +42,27 @@ spec:
properties:
args:
description: |-
Args represents the args of the backendRuntime.
They can be appended or overwritten by the Playground args.
The key is the inference option, like default one or advanced
speculativeDecoding, the values are the corresponding args.
Flag around with {{ .XXX }} is a flag waiting for render.
Args represents the preset arguments of the backendRuntime.
They can be appended or overwritten by the Playground backendRuntimeConfig.
items:
description: |-
BackendRuntimeArg is preset arguments for easy to use.
Do not edit the preset names unless set the argument name explicitly
in Playground backendRuntimeConfig.
properties:
flags:
description: |-
Flags represents all the preset configurations.
Flag around with {{ .CONFIG }} is a configuration waiting for render.
items:
type: string
type: array
mode:
name:
description: Name represents the identifier of the backendRuntime
argument.
type: string
required:
- mode
- name
type: object
type: array
commands:
Expand Down
9 changes: 7 additions & 2 deletions chart/crds/playground-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -259,14 +259,19 @@ spec:
speculative decoding, then one model is main(target) model, another one
is draft model.
items:
description: ModelRefer refers to a created Model with it's
role.
properties:
name:
description: Name represents the model name.
type: string
role:
default: main
description: Role represents the model role once more than
one model is required.
description: |-
Role represents the model role once more than one model is required.
Such as a draft role, which means running with SpeculativeDecoding,
and default arguments for backend will be searched in backendRuntime
with the name of speculative-decoding.
enum:
- main
- draft
Expand Down
9 changes: 7 additions & 2 deletions chart/crds/service-crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -84,14 +84,19 @@ spec:
speculative decoding, then one model is main(target) model, another one
is draft model.
items:
description: ModelRefer refers to a created Model with it's
role.
properties:
name:
description: Name represents the model name.
type: string
role:
default: main
description: Role represents the model role once more than
one model is required.
description: |-
Role represents the model role once more than one model is required.
Such as a draft role, which means running with SpeculativeDecoding,
and default arguments for backend will be searched in backendRuntime
with the name of speculative-decoding.
enum:
- main
- draft
Expand Down
6 changes: 4 additions & 2 deletions chart/templates/backends/llamacpp.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,16 +12,18 @@ spec:
- ./llama-server
image: ghcr.io/ggerganov/llama.cpp
version: server
# Do not edit the preset argument name unless you know what you're doing.
# Free to add more arguments with your requirements.
args:
- mode: Default
- name: default
flags:
- -m
- "{{`{{ .ModelPath }}`}}"
- --host
- "0.0.0.0"
- --port
- "8080"
- mode: SpeculativeDecoding
- name: speculative-decoding
flags:
- -m
- "{{`{{ .ModelPath }}`}}"
Expand Down
4 changes: 3 additions & 1 deletion chart/templates/backends/sglang.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ spec:
- sglang.launch_server
image: lmsysorg/sglang
version: v0.2.10-cu121
# Do not edit the preset argument name unless you know what you're doing.
# Free to add more arguments with your requirements.
args:
- mode: Default
- name: default
flags:
- --model-path
- "{{`{{ .ModelPath }}`}}"
Expand Down
6 changes: 4 additions & 2 deletions chart/templates/backends/vllm.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@ spec:
- vllm.entrypoints.openai.api_server
image: vllm/vllm-openai
version: v0.6.0
# Do not edit the preset argument name unless you know what you're doing.
# Free to add more arguments with your requirements.
args:
- mode: Default
- name: default
flags:
- --model
- "{{`{{ .ModelPath }}`}}"
Expand All @@ -25,7 +27,7 @@ spec:
- "0.0.0.0"
- --port
- "8080"
- mode: SpeculativeDecoding
- name: speculative-decoding
flags:
- --model
- "{{`{{ .ModelPath }}`}}"
Expand Down
6 changes: 3 additions & 3 deletions client-go/applyconfiguration/core/v1alpha1/modelclaims.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 2 additions & 2 deletions client-go/applyconfiguration/utils.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

20 changes: 13 additions & 7 deletions config/crd/bases/inference.llmaz.io_backendruntimes.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -43,21 +43,27 @@ spec:
properties:
args:
description: |-
Args represents the args of the backendRuntime.
They can be appended or overwritten by the Playground args.
The key is the inference option, like default one or advanced
speculativeDecoding, the values are the corresponding args.
Flag around with {{ .XXX }} is a flag waiting for render.
Args represents the preset arguments of the backendRuntime.
They can be appended or overwritten by the Playground backendRuntimeConfig.
items:
description: |-
BackendRuntimeArg is preset arguments for easy to use.
Do not edit the preset names unless set the argument name explicitly
in Playground backendRuntimeConfig.
properties:
flags:
description: |-
Flags represents all the preset configurations.
Flag around with {{ .CONFIG }} is a configuration waiting for render.
items:
type: string
type: array
mode:
name:
description: Name represents the identifier of the backendRuntime
argument.
type: string
required:
- mode
- name
type: object
type: array
commands:
Expand Down
9 changes: 7 additions & 2 deletions config/crd/bases/inference.llmaz.io_playgrounds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -260,14 +260,19 @@ spec:
speculative decoding, then one model is main(target) model, another one
is draft model.
items:
description: ModelRefer refers to a created Model with it's
role.
properties:
name:
description: Name represents the model name.
type: string
role:
default: main
description: Role represents the model role once more than
one model is required.
description: |-
Role represents the model role once more than one model is required.
Such as a draft role, which means running with SpeculativeDecoding,
and default arguments for backend will be searched in backendRuntime
with the name of speculative-decoding.
enum:
- main
- draft
Expand Down
Loading

0 comments on commit e2f0786

Please sign in to comment.