Merge pull request #125 from kerthcet/cleanup/modelclaims-change

Change ModelClaims API
InftyAI · Sep 5, 2024 · 71a9652 · 71a9652
2 parents 24ee839 + a4f6746
commit 71a9652
Show file tree

Hide file tree

Showing 37 changed files with 549 additions and 400 deletions.
diff --git a/README.md b/README.md
@@ -1,7 +1,7 @@
 <p align="center">
   <picture>
     <source media="(prefers-color-scheme: dark)" srcset="./docs/assets/logo.png">
-    <img alt="llmaz" src="./docs/assets/logo.png" width=55%>
+    <img alt="llmaz" src="https://github.com/InftyAI/llmaz/blob/main/docs/assets/logo.png" width=55%>
   </picture>
 </p>
 

diff --git a/api/core/v1alpha1/model_types.go b/api/core/v1alpha1/model_types.go
@@ -120,28 +120,35 @@ type ModelClaim struct {
 	InferenceFlavors []FlavorName `json:"inferenceFlavors,omitempty"`
 }
 
-type InferenceMode string
+type ModelRole string
 
 const (
-	Standard            InferenceMode = "Standard"
-	SpeculativeDecoding InferenceMode = "SpeculativeDecoding"
+	// Main represents the main model, if only one model is required,
+	// it must be the main model. Only one main model is allowed.
+	MainRole ModelRole = "main"
+	// Draft represents the draft model in speculative decoding,
+	// the main model is the target model then.
+	DraftRole ModelRole = "draft"
 )
 
-// MultiModelsClaim represents claiming for multiple models with different claimModes,
-// like standard or speculative-decoding to support different inference scenarios.
-type MultiModelsClaim struct {
-	// ModelNames represents a list of models, there maybe multiple models here
-	// to support state-of-the-art technologies like speculative decoding.
-	// If the composedMode is SpeculativeDecoding, the first model is the target model,
-	// and the second model is the draft model.
-	// +kubebuilder:validation:MinItems=1
-	ModelNames []ModelName `json:"modelNames,omitempty"`
-	// Mode represents the paradigm to serve the model, whether via a standard way
-	// or via an advanced technique like SpeculativeDecoding.
-	// +kubebuilder:default=Standard
-	// +kubebuilder:validation:Enum={Standard,SpeculativeDecoding}
+type ModelRepresentative struct {
+	// Name represents the model name.
+	Name ModelName `json:"name"`
+	// Role represents the model role once more than one model is required.
+	// +kubebuilder:validation:Enum={main,draft}
+	// +kubebuilder:default=main
 	// +optional
-	InferenceMode InferenceMode `json:"inferenceMode,omitempty"`
+	Role *ModelRole `json:"role,omitempty"`
+}
+
+// ModelClaims represents multiple claims for different models.
+type ModelClaims struct {
+	// Models represents a list of models with roles specified, there maybe
+	// multiple models here to support state-of-the-art technologies like
+	// speculative decoding, then one model is main(target) model, another one
+	// is draft model.
+	// +kubebuilder:validation:MinItems=1
+	Models []ModelRepresentative `json:"models,omitempty"`
 	// InferenceFlavors represents a list of flavors with fungibility supported
 	// to serve the model.
 	// - If not set, always apply with the 0-index model by default.

diff --git a/api/core/v1alpha1/zz_generated.deepcopy.go b/api/core/v1alpha1/zz_generated.deepcopy.go
diff --git a/api/inference/v1alpha1/playground_types.go b/api/inference/v1alpha1/playground_types.go
@@ -28,17 +28,16 @@ type PlaygroundSpec struct {
 	// +kubebuilder:default=1
 	// +optional
 	Replicas *int32 `json:"replicas,omitempty"`
-	// ModelClaim represents claiming for one model, it's the standard claimMode
-	// of multiModelsClaim compared to other modes like SpeculativeDecoding.
-	// Most of the time, modelClaim is enough.
-	// ModelClaim and multiModelsClaim are exclusive configured.
+	// ModelClaim represents claiming for one model, it's a simplified use case
+	// of modelClaims. Most of the time, modelClaim is enough.
+	// ModelClaim and modelClaims are exclusive configured.
 	// +optional
 	ModelClaim *coreapi.ModelClaim `json:"modelClaim,omitempty"`
-	// MultiModelsClaim represents claiming for multiple models with different claimModes,
-	// like standard or speculative-decoding to support different inference scenarios.
-	// ModelClaim and multiModelsClaim are exclusive configured.
+	// ModelClaims represents claiming for multiple models for more complicated
+	// use cases like speculative-decoding.
+	// ModelClaims and modelClaim are exclusive configured.
 	// +optional
-	MultiModelsClaim *coreapi.MultiModelsClaim `json:"multiModelsClaim,omitempty"`
+	ModelClaims *coreapi.ModelClaims `json:"modelClaims,omitempty"`
 	// BackendConfig represents the inference backend configuration
 	// under the hood, e.g. vLLM, which is the default backend.
 	// +optional

diff --git a/api/inference/v1alpha1/service_types.go b/api/inference/v1alpha1/service_types.go
@@ -27,9 +27,8 @@ import (
 // Service controller will maintain multi-flavor of workloads with
 // different accelerators for cost or performance considerations.
 type ServiceSpec struct {
-	// MultiModelsClaim represents claiming for multiple models with different claimModes,
-	// like standard or speculative-decoding to support different inference scenarios.
-	MultiModelsClaim coreapi.MultiModelsClaim `json:"multiModelsClaim,omitempty"`
+	// ModelClaims represents multiple claims for different models.
+	ModelClaims coreapi.ModelClaims `json:"modelClaims,omitempty"`
 	// WorkloadTemplate defines the underlying workload layout and configuration.
 	// Note: the LWS spec might be twisted with various LWS instances to support
 	// accelerator fungibility or other cutting-edge researches.

diff --git a/api/inference/v1alpha1/zz_generated.deepcopy.go b/api/inference/v1alpha1/zz_generated.deepcopy.go
diff --git a/client-go/applyconfiguration/core/v1alpha1/modelclaims.go b/client-go/applyconfiguration/core/v1alpha1/modelclaims.go
diff --git a/client-go/applyconfiguration/core/v1alpha1/modelrepresentative.go b/client-go/applyconfiguration/core/v1alpha1/modelrepresentative.go
diff --git a/client-go/applyconfiguration/core/v1alpha1/multimodelsclaim.go b/client-go/applyconfiguration/core/v1alpha1/multimodelsclaim.go