Skip to content

Commit

Permalink
Merge pull request #125 from kerthcet/cleanup/modelclaims-change
Browse files Browse the repository at this point in the history
Change ModelClaims API
  • Loading branch information
InftyAI-Agent authored Sep 5, 2024
2 parents 24ee839 + a4f6746 commit 71a9652
Show file tree
Hide file tree
Showing 37 changed files with 549 additions and 400 deletions.
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<p align="center">
<picture>
<source media="(prefers-color-scheme: dark)" srcset="./docs/assets/logo.png">
<img alt="llmaz" src="./docs/assets/logo.png" width=55%>
<img alt="llmaz" src="https://github.com/InftyAI/llmaz/blob/main/docs/assets/logo.png" width=55%>
</picture>
</p>

Expand Down
41 changes: 24 additions & 17 deletions api/core/v1alpha1/model_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -120,28 +120,35 @@ type ModelClaim struct {
InferenceFlavors []FlavorName `json:"inferenceFlavors,omitempty"`
}

type InferenceMode string
type ModelRole string

const (
Standard InferenceMode = "Standard"
SpeculativeDecoding InferenceMode = "SpeculativeDecoding"
// Main represents the main model, if only one model is required,
// it must be the main model. Only one main model is allowed.
MainRole ModelRole = "main"
// Draft represents the draft model in speculative decoding,
// the main model is the target model then.
DraftRole ModelRole = "draft"
)

// MultiModelsClaim represents claiming for multiple models with different claimModes,
// like standard or speculative-decoding to support different inference scenarios.
type MultiModelsClaim struct {
// ModelNames represents a list of models, there maybe multiple models here
// to support state-of-the-art technologies like speculative decoding.
// If the composedMode is SpeculativeDecoding, the first model is the target model,
// and the second model is the draft model.
// +kubebuilder:validation:MinItems=1
ModelNames []ModelName `json:"modelNames,omitempty"`
// Mode represents the paradigm to serve the model, whether via a standard way
// or via an advanced technique like SpeculativeDecoding.
// +kubebuilder:default=Standard
// +kubebuilder:validation:Enum={Standard,SpeculativeDecoding}
type ModelRepresentative struct {
// Name represents the model name.
Name ModelName `json:"name"`
// Role represents the model role once more than one model is required.
// +kubebuilder:validation:Enum={main,draft}
// +kubebuilder:default=main
// +optional
InferenceMode InferenceMode `json:"inferenceMode,omitempty"`
Role *ModelRole `json:"role,omitempty"`
}

// ModelClaims represents multiple claims for different models.
type ModelClaims struct {
// Models represents a list of models with roles specified, there maybe
// multiple models here to support state-of-the-art technologies like
// speculative decoding, then one model is main(target) model, another one
// is draft model.
// +kubebuilder:validation:MinItems=1
Models []ModelRepresentative `json:"models,omitempty"`
// InferenceFlavors represents a list of flavors with fungibility supported
// to serve the model.
// - If not set, always apply with the 0-index model by default.
Expand Down
72 changes: 47 additions & 25 deletions api/core/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

15 changes: 7 additions & 8 deletions api/inference/v1alpha1/playground_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,17 +28,16 @@ type PlaygroundSpec struct {
// +kubebuilder:default=1
// +optional
Replicas *int32 `json:"replicas,omitempty"`
// ModelClaim represents claiming for one model, it's the standard claimMode
// of multiModelsClaim compared to other modes like SpeculativeDecoding.
// Most of the time, modelClaim is enough.
// ModelClaim and multiModelsClaim are exclusive configured.
// ModelClaim represents claiming for one model, it's a simplified use case
// of modelClaims. Most of the time, modelClaim is enough.
// ModelClaim and modelClaims are exclusive configured.
// +optional
ModelClaim *coreapi.ModelClaim `json:"modelClaim,omitempty"`
// MultiModelsClaim represents claiming for multiple models with different claimModes,
// like standard or speculative-decoding to support different inference scenarios.
// ModelClaim and multiModelsClaim are exclusive configured.
// ModelClaims represents claiming for multiple models for more complicated
// use cases like speculative-decoding.
// ModelClaims and modelClaim are exclusive configured.
// +optional
MultiModelsClaim *coreapi.MultiModelsClaim `json:"multiModelsClaim,omitempty"`
ModelClaims *coreapi.ModelClaims `json:"modelClaims,omitempty"`
// BackendConfig represents the inference backend configuration
// under the hood, e.g. vLLM, which is the default backend.
// +optional
Expand Down
5 changes: 2 additions & 3 deletions api/inference/v1alpha1/service_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,8 @@ import (
// Service controller will maintain multi-flavor of workloads with
// different accelerators for cost or performance considerations.
type ServiceSpec struct {
// MultiModelsClaim represents claiming for multiple models with different claimModes,
// like standard or speculative-decoding to support different inference scenarios.
MultiModelsClaim coreapi.MultiModelsClaim `json:"multiModelsClaim,omitempty"`
// ModelClaims represents multiple claims for different models.
ModelClaims coreapi.ModelClaims `json:"modelClaims,omitempty"`
// WorkloadTemplate defines the underlying workload layout and configuration.
// Note: the LWS spec might be twisted with various LWS instances to support
// accelerator fungibility or other cutting-edge researches.
Expand Down
8 changes: 4 additions & 4 deletions api/inference/v1alpha1/zz_generated.deepcopy.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

58 changes: 58 additions & 0 deletions client-go/applyconfiguration/core/v1alpha1/modelclaims.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

51 changes: 51 additions & 0 deletions client-go/applyconfiguration/core/v1alpha1/modelrepresentative.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

64 changes: 0 additions & 64 deletions client-go/applyconfiguration/core/v1alpha1/multimodelsclaim.go

This file was deleted.

Loading

0 comments on commit 71a9652

Please sign in to comment.