diff --git a/charts/kthena/charts/networking/crds/networking.serving.volcano.sh_modelroutes.yaml b/charts/kthena/charts/networking/crds/networking.serving.volcano.sh_modelroutes.yaml index d5bb2a411..ed84f4132 100644 --- a/charts/kthena/charts/networking/crds/networking.serving.volcano.sh_modelroutes.yaml +++ b/charts/kthena/charts/networking/crds/networking.serving.volcano.sh_modelroutes.yaml @@ -51,7 +51,7 @@ spec: description: |- `model` in the LLM request, it could be a base model name, lora adapter name or even a virtual model name. This field is used to match scenarios other than model adapter name and - this field could be empty, but it and `ModelAdapters` can't both be empty. + this field could be empty, but it and `ModelAdapters` can't both be empty. type: string x-kubernetes-validations: - message: modelName is immutable diff --git a/docs/kthena/docs/reference/crd/networking.serving.volcano.sh.md b/docs/kthena/docs/reference/crd/networking.serving.volcano.sh.md index 79bc40c8e..fa7bc47a6 100644 --- a/docs/kthena/docs/reference/crd/networking.serving.volcano.sh.md +++ b/docs/kthena/docs/reference/crd/networking.serving.volcano.sh.md @@ -170,7 +170,7 @@ _Appears in:_ | Field | Description | Default | Validation | | --- | --- | --- | --- | -| `modelName` _string_ | `model` in the LLM request, it could be a base model name, lora adapter name or even
a virtual model name. This field is used to match scenarios other than model adapter name and
this field could be empty, but it and `ModelAdapters` can't both be empty. | | | +| `modelName` _string_ | `model` in the LLM request, it could be a base model name, lora adapter name or even
a virtual model name. This field is used to match scenarios other than model adapter name and
this field could be empty, but it and `ModelAdapters` can't both be empty. | | | | `loraAdapters` _string array_ | `model` in the LLM request could be lora adapter name,
here is a list of Lora Adapter Names to match. | | MaxItems: 10
| | `parentRefs` _ParentReference array_ | ParentRefs references the Gateways that this ModelRoute should be attached to.
If empty, the ModelRoute will be attached to all Gateways in the same namespace. | | | | `rules` _[Rule](#rule) array_ | An ordered list of route rules for LLM traffic. The first rule
matching an incoming request will be used.
If no rule is matched, an HTTP 404 status code MUST be returned. | | MaxItems: 16
| diff --git a/pkg/apis/networking/v1alpha1/modelroute_types.go b/pkg/apis/networking/v1alpha1/modelroute_types.go index a0f7b9914..cc6062a05 100644 --- a/pkg/apis/networking/v1alpha1/modelroute_types.go +++ b/pkg/apis/networking/v1alpha1/modelroute_types.go @@ -26,7 +26,7 @@ import ( type ModelRouteSpec struct { // `model` in the LLM request, it could be a base model name, lora adapter name or even // a virtual model name. This field is used to match scenarios other than model adapter name and - // this field could be empty, but it and `ModelAdapters` can't both be empty. + // this field could be empty, but it and `ModelAdapters` can't both be empty. // // +kubebuilder:validation:XValidation:rule="self == oldSelf",message="modelName is immutable" ModelName string `json:"modelName,omitempty"` diff --git a/pkg/kthena-router/backend/metrics/metrics.go b/pkg/kthena-router/backend/metrics/metrics.go index 835930b4e..88aa19a59 100644 --- a/pkg/kthena-router/backend/metrics/metrics.go +++ b/pkg/kthena-router/backend/metrics/metrics.go @@ -34,7 +34,7 @@ func HTTPClient() *http.Client { return httpClient } -// This function refer to aibrix(https://github.com/vllm-project/aibrix/blob/main/pkg/metrics/utils.go) +// This function refers to aibrix(https://github.com/vllm-project/aibrix/blob/main/pkg/metrics/utils.go) func ParseMetricsURL(url string) (map[string]*dto.MetricFamily, error) { resp, err := httpClient.Get(url) if err != nil { diff --git a/pkg/kthena-router/scheduler/plugins/least_request.go b/pkg/kthena-router/scheduler/plugins/least_request.go index 5c48100ab..69fcab4ff 100644 --- a/pkg/kthena-router/scheduler/plugins/least_request.go +++ b/pkg/kthena-router/scheduler/plugins/least_request.go @@ -75,7 +75,7 @@ func (l *LeastRequest) Score(ctx *framework.Context, pods []*datastore.PodInfo) baseScores := make(map[*datastore.PodInfo]float64) maxScore := 0.0 for _, info := range pods { - // The weight of waiting requests is 100. It's a magic number just to sinificantly lower the score of the pod when there are waiting reqs. + // The weight of waiting requests is 100. It's a magic number just to significantly lower the score of the pod when there are waiting reqs. base := info.GetRequestRunningNum() + 100*info.GetRequestWaitingNum() baseScores[info] = base if base > maxScore {