From 189801bb5f898d343a9679ffee6cb389e8ded2f9 Mon Sep 17 00:00:00 2001 From: Ewe Zi Yi <36802364+deadlycoconuts@users.noreply.github.com> Date: Thu, 25 Jan 2024 16:11:56 +0800 Subject: [PATCH] feat(api): Split UserContainerLimitRequestFactor into two separate config values (#367) * Split UserContainerLimitRequestFactor into two separate config values * Set UserContainerCPULimitRequestFactor as 0 in unit tests * Fix lint comment to shorten line length * Remove outdated golangci linters * Make setting of resource limits conditional * Increase defaultMemoryLimitRequestFactor to 2 --- .golangci.yml | 3 - api/turing/cluster/knative_service.go | 10 +- api/turing/cluster/knative_service_test.go | 66 ++++--- api/turing/cluster/kubernetes_service.go | 13 +- api/turing/cluster/kubernetes_service_test.go | 2 +- api/turing/cluster/models.go | 16 +- api/turing/cluster/servicebuilder/router.go | 26 +-- .../cluster/servicebuilder/router_test.go | 177 +++++++++--------- .../cluster/servicebuilder/service_builder.go | 57 +++--- .../servicebuilder/service_builder_test.go | 65 ++++--- api/turing/config/config.go | 8 +- api/turing/config/config_test.go | 20 +- api/turing/config/example.yaml | 3 +- api/turing/config/testdata/config-1.yaml | 3 +- .../service/router_deployment_service.go | 18 +- .../service/router_deployment_service_test.go | 49 +++-- 16 files changed, 299 insertions(+), 237 deletions(-) diff --git a/.golangci.yml b/.golangci.yml index 0f4ea2852..151fdea9a 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -7,7 +7,6 @@ run: linters: enable: - bodyclose - - deadcode - errcheck - gocyclo - gofmt @@ -19,9 +18,7 @@ linters: - misspell - revive - staticcheck - - structcheck - unused - - varcheck linters-settings: gocyclo: diff --git a/api/turing/cluster/knative_service.go b/api/turing/cluster/knative_service.go index 8a3e4f9e3..9d9a85fe2 100644 --- a/api/turing/cluster/knative_service.go +++ b/api/turing/cluster/knative_service.go @@ -50,8 +50,9 @@ type KnativeService struct { TopologySpreadConstraints []corev1.TopologySpreadConstraint `json:"topologySpreadConstraints"` // Resource properties - QueueProxyResourcePercentage int `json:"queueProxyResourcePercentage"` - UserContainerLimitRequestFactor float64 `json:"userContainerLimitRequestFactor"` + QueueProxyResourcePercentage int `json:"queueProxyResourcePercentage"` + UserContainerCPULimitRequestFactor float64 `json:"userContainerLimitCPURequestFactor"` + UserContainerMemoryLimitRequestFactor float64 `json:"userContainerLimitMemoryRequestFactor"` } // Creates a new config object compatible with the knative serving API, from @@ -131,7 +132,10 @@ func (cfg *KnativeService) buildSvcSpec( revisionName := getDefaultRevisionName(cfg.Name) // Build resource requirements for the user container - resourceReqs := cfg.buildResourceReqs(cfg.UserContainerLimitRequestFactor) + resourceReqs := cfg.buildResourceReqs( + cfg.UserContainerCPULimitRequestFactor, + cfg.UserContainerMemoryLimitRequestFactor, + ) // Build container spec var portName string diff --git a/api/turing/cluster/knative_service_test.go b/api/turing/cluster/knative_service_test.go index 2aac1c4fb..001a19bb0 100644 --- a/api/turing/cluster/knative_service_test.go +++ b/api/turing/cluster/knative_service_test.go @@ -172,15 +172,16 @@ func TestBuildKnativeServiceConfig(t *testing.T) { }{ "basic": { serviceCfg: KnativeService{ - BaseService: baseSvc, - ContainerPort: 8080, - MinReplicas: 1, - MaxReplicas: 2, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - IsClusterLocal: true, - QueueProxyResourcePercentage: 30, - UserContainerLimitRequestFactor: 1.5, + BaseService: baseSvc, + ContainerPort: 8080, + MinReplicas: 1, + MaxReplicas: 2, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + IsClusterLocal: true, + QueueProxyResourcePercentage: 30, + UserContainerCPULimitRequestFactor: 1.5, + UserContainerMemoryLimitRequestFactor: 1.5, }, expectedSpec: knservingv1.Service{ ObjectMeta: metav1.ObjectMeta{ @@ -222,16 +223,17 @@ func TestBuildKnativeServiceConfig(t *testing.T) { // upi has no liveness probe in pod spec and user-container is using h2c "upi": { serviceCfg: KnativeService{ - BaseService: baseSvc, - ContainerPort: 8080, - MinReplicas: 1, - MaxReplicas: 2, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - IsClusterLocal: true, - QueueProxyResourcePercentage: 30, - UserContainerLimitRequestFactor: 1.5, - Protocol: routerConfig.UPI, + BaseService: baseSvc, + ContainerPort: 8080, + MinReplicas: 1, + MaxReplicas: 2, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + IsClusterLocal: true, + QueueProxyResourcePercentage: 30, + UserContainerCPULimitRequestFactor: 1.5, + UserContainerMemoryLimitRequestFactor: 1.5, + Protocol: routerConfig.UPI, }, expectedSpec: knservingv1.Service{ ObjectMeta: metav1.ObjectMeta{ @@ -291,14 +293,15 @@ func TestBuildKnativeServiceConfig(t *testing.T) { }, "annotations": { serviceCfg: KnativeService{ - BaseService: baseSvc, - ContainerPort: 8080, - MinReplicas: 5, - MaxReplicas: 6, - AutoscalingMetric: "memory", - AutoscalingTarget: "70", - IsClusterLocal: false, - UserContainerLimitRequestFactor: 1.5, + BaseService: baseSvc, + ContainerPort: 8080, + MinReplicas: 5, + MaxReplicas: 6, + AutoscalingMetric: "memory", + AutoscalingTarget: "70", + IsClusterLocal: false, + UserContainerCPULimitRequestFactor: 1.5, + UserContainerMemoryLimitRequestFactor: 1.5, }, expectedSpec: knservingv1.Service{ ObjectMeta: metav1.ObjectMeta{ @@ -381,10 +384,11 @@ func TestBuildKnativeServiceConfig(t *testing.T) { }, }, }, - IsClusterLocal: true, - QueueProxyResourcePercentage: 30, - UserContainerLimitRequestFactor: 1.5, - Protocol: routerConfig.UPI, + IsClusterLocal: true, + QueueProxyResourcePercentage: 30, + UserContainerCPULimitRequestFactor: 1.5, + UserContainerMemoryLimitRequestFactor: 1.5, + Protocol: routerConfig.UPI, }, expectedSpec: knservingv1.Service{ ObjectMeta: metav1.ObjectMeta{ diff --git a/api/turing/cluster/kubernetes_service.go b/api/turing/cluster/kubernetes_service.go index 9a65b10fa..3192a244e 100644 --- a/api/turing/cluster/kubernetes_service.go +++ b/api/turing/cluster/kubernetes_service.go @@ -7,9 +7,14 @@ import ( "k8s.io/apimachinery/pkg/util/intstr" ) -// defaultLimitRequestFactor is the default multiplication factor applied to the resource request, -// to be set as resource limit -const defaultLimitRequestFactor = 1.0 +const ( + // defaultCPULimitRequestFactor is the default multiplication factor applied to the CPU request, + // to be set as the limit + defaultCPULimitRequestFactor = 1.0 + // defaultMemoryLimitRequestFactor is the default multiplication factor applied to the memory request, + // to be set as the limit + defaultMemoryLimitRequestFactor = 2.0 +) // KubernetesService defines the properties for Kubernetes services type KubernetesService struct { @@ -68,7 +73,7 @@ func (cfg *KubernetesService) buildDeployment(labels map[string]string) *appsv1. Args: cfg.Command, Ports: cfg.buildContainerPorts(), Env: cfg.Envs, - Resources: cfg.buildResourceReqs(defaultLimitRequestFactor), + Resources: cfg.buildResourceReqs(defaultCPULimitRequestFactor, defaultMemoryLimitRequestFactor), VolumeMounts: cfg.VolumeMounts, LivenessProbe: cfg.buildContainerProbe(livenessProbeType, int(cfg.ProbePort)), ReadinessProbe: cfg.buildContainerProbe(readinessProbeType, int(cfg.ProbePort)), diff --git a/api/turing/cluster/kubernetes_service_test.go b/api/turing/cluster/kubernetes_service_test.go index 73947bd35..d2f800d30 100644 --- a/api/turing/cluster/kubernetes_service_test.go +++ b/api/turing/cluster/kubernetes_service_test.go @@ -110,7 +110,7 @@ func TestBuildKubernetesServiceConfig(t *testing.T) { }, Limits: map[corev1.ResourceName]resource.Quantity{ corev1.ResourceCPU: resource.MustParse("1"), - corev1.ResourceMemory: resource.MustParse("1"), + corev1.ResourceMemory: resource.MustParse("2"), }, }, VolumeMounts: svcConf.VolumeMounts, diff --git a/api/turing/cluster/models.go b/api/turing/cluster/models.go index 9603eab17..afc3f056f 100644 --- a/api/turing/cluster/models.go +++ b/api/turing/cluster/models.go @@ -62,16 +62,22 @@ type BaseService struct { InitContainers []Container `json:"init_containers"` } -func (cfg *BaseService) buildResourceReqs(userContainerLimitRequestFactor float64) corev1.ResourceRequirements { +func (cfg *BaseService) buildResourceReqs( + UserContainerCPULimitRequestFactor float64, + UserContainerMemoryLimitRequestFactor float64, +) corev1.ResourceRequirements { reqs := map[corev1.ResourceName]resource.Quantity{ corev1.ResourceCPU: cfg.CPURequests, corev1.ResourceMemory: cfg.MemoryRequests, } - // Set resource limits to request * userContainerLimitRequestFactor - limits := map[corev1.ResourceName]resource.Quantity{ - corev1.ResourceCPU: ComputeResource(cfg.CPURequests, userContainerLimitRequestFactor), - corev1.ResourceMemory: ComputeResource(cfg.MemoryRequests, userContainerLimitRequestFactor), + // Set resource limits to request * userContainerCPULimitRequestFactor or UserContainerMemoryLimitRequestFactor + limits := map[corev1.ResourceName]resource.Quantity{} + if UserContainerCPULimitRequestFactor != 0 { + limits[corev1.ResourceCPU] = ComputeResource(cfg.CPURequests, UserContainerCPULimitRequestFactor) + } + if UserContainerMemoryLimitRequestFactor != 0 { + limits[corev1.ResourceMemory] = ComputeResource(cfg.MemoryRequests, UserContainerMemoryLimitRequestFactor) } return corev1.ResourceRequirements{ diff --git a/api/turing/cluster/servicebuilder/router.go b/api/turing/cluster/servicebuilder/router.go index 41f8f0b5a..a3cb67f07 100644 --- a/api/turing/cluster/servicebuilder/router.go +++ b/api/turing/cluster/servicebuilder/router.go @@ -105,7 +105,8 @@ func (sb *clusterSvcBuilder) NewRouterService( sentryEnabled bool, sentryDSN string, knativeQueueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, initialScale *int, ) (*cluster.KnativeService, error) { // Create service name @@ -150,17 +151,18 @@ func (sb *clusterSvcBuilder) NewRouterService( VolumeMounts: volumeMounts, InitContainers: initContainers, }, - IsClusterLocal: false, - ContainerPort: routerPort, - Protocol: routerVersion.Protocol, - MinReplicas: routerVersion.ResourceRequest.MinReplica, - MaxReplicas: routerVersion.ResourceRequest.MaxReplica, - InitialScale: initialScale, - AutoscalingMetric: string(routerVersion.AutoscalingPolicy.Metric), - AutoscalingTarget: routerVersion.AutoscalingPolicy.Target, - TopologySpreadConstraints: topologySpreadConstraints, - QueueProxyResourcePercentage: knativeQueueProxyResourcePercentage, - UserContainerLimitRequestFactor: userContainerLimitRequestFactor, + IsClusterLocal: false, + ContainerPort: routerPort, + Protocol: routerVersion.Protocol, + MinReplicas: routerVersion.ResourceRequest.MinReplica, + MaxReplicas: routerVersion.ResourceRequest.MaxReplica, + InitialScale: initialScale, + AutoscalingMetric: string(routerVersion.AutoscalingPolicy.Metric), + AutoscalingTarget: routerVersion.AutoscalingPolicy.Target, + TopologySpreadConstraints: topologySpreadConstraints, + QueueProxyResourcePercentage: knativeQueueProxyResourcePercentage, + UserContainerCPULimitRequestFactor: userContainerCPULimitRequestFactor, + UserContainerMemoryLimitRequestFactor: userContainerMemoryLimitRequestFactor, } return sb.validateKnativeService(svc) } diff --git a/api/turing/cluster/servicebuilder/router_test.go b/api/turing/cluster/servicebuilder/router_test.go index ec41a6e8a..5ccd64b10 100644 --- a/api/turing/cluster/servicebuilder/router_test.go +++ b/api/turing/cluster/servicebuilder/router_test.go @@ -162,16 +162,17 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - InitialScale: &testInitialScale, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + InitialScale: &testInitialScale, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "success | basic upi": { @@ -263,16 +264,17 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.UPI, - MinReplicas: 2, - MaxReplicas: 4, - InitialScale: &testInitialScale, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.UPI, + MinReplicas: 2, + MaxReplicas: 4, + InitialScale: &testInitialScale, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "success | all components": { @@ -371,15 +373,16 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "success | standard ensembler with experiment mappings": { @@ -470,15 +473,16 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "rps", - AutoscalingTarget: "100", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "rps", + AutoscalingTarget: "100", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "success | standard ensembler with route name path": { @@ -569,15 +573,16 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "rps", - AutoscalingTarget: "100", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "rps", + AutoscalingTarget: "100", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "success | standard ensembler lazy routing": { @@ -668,15 +673,16 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "rps", - AutoscalingTarget: "100", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "rps", + AutoscalingTarget: "100", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "success | traffic-splitting": { @@ -767,15 +773,16 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "success | experiment engine": { @@ -895,15 +902,16 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "rps", - AutoscalingTarget: "100", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "rps", + AutoscalingTarget: "100", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "success | no default route": { @@ -971,15 +979,16 @@ func TestNewRouterService(t *testing.T) { }, }, }, - ContainerPort: 8080, - Protocol: routerConfig.HTTP, - MinReplicas: 2, - MaxReplicas: 4, - AutoscalingMetric: "memory", - AutoscalingTarget: "90", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + ContainerPort: 8080, + Protocol: routerConfig.HTTP, + MinReplicas: 2, + MaxReplicas: 4, + AutoscalingMetric: "memory", + AutoscalingTarget: "90", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "failure missing bigquery": { @@ -1016,7 +1025,7 @@ func TestNewRouterService(t *testing.T) { }, true, "sentry-dsn", - 20, 1.5, data.initialScale, + 20, 0, 1.5, data.initialScale, ) if data.err == "" { diff --git a/api/turing/cluster/servicebuilder/service_builder.go b/api/turing/cluster/servicebuilder/service_builder.go index 4465bbbbf..c0ac45a6d 100644 --- a/api/turing/cluster/servicebuilder/service_builder.go +++ b/api/turing/cluster/servicebuilder/service_builder.go @@ -67,7 +67,8 @@ type ClusterServiceBuilder interface { project *mlp.Project, secretName string, knativeQueueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, initialScale *int, ) (*cluster.KnativeService, error) NewEnsemblerService( @@ -75,7 +76,8 @@ type ClusterServiceBuilder interface { project *mlp.Project, secretName string, knativeQueueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, initialScale *int, ) (*cluster.KnativeService, error) NewRouterService( @@ -88,7 +90,8 @@ type ClusterServiceBuilder interface { sentryEnabled bool, sentryDSN string, knativeQueueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, initialScale *int, ) (*cluster.KnativeService, error) NewFluentdService( @@ -149,7 +152,8 @@ func (sb *clusterSvcBuilder) NewEnricherService( project *mlp.Project, secretName string, knativeQueueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, initialScale *int, ) (*cluster.KnativeService, error) { // Get the enricher reference @@ -215,16 +219,17 @@ func (sb *clusterSvcBuilder) NewEnricherService( Volumes: volumes, VolumeMounts: volumeMounts, }, - IsClusterLocal: true, - ContainerPort: int32(enricher.Port), - MinReplicas: enricher.ResourceRequest.MinReplica, - MaxReplicas: enricher.ResourceRequest.MaxReplica, - InitialScale: initialScale, - AutoscalingMetric: string(enricher.AutoscalingPolicy.Metric), - AutoscalingTarget: enricher.AutoscalingPolicy.Target, - TopologySpreadConstraints: topologySpreadConstraints, - QueueProxyResourcePercentage: knativeQueueProxyResourcePercentage, - UserContainerLimitRequestFactor: userContainerLimitRequestFactor, + IsClusterLocal: true, + ContainerPort: int32(enricher.Port), + MinReplicas: enricher.ResourceRequest.MinReplica, + MaxReplicas: enricher.ResourceRequest.MaxReplica, + InitialScale: initialScale, + AutoscalingMetric: string(enricher.AutoscalingPolicy.Metric), + AutoscalingTarget: enricher.AutoscalingPolicy.Target, + TopologySpreadConstraints: topologySpreadConstraints, + QueueProxyResourcePercentage: knativeQueueProxyResourcePercentage, + UserContainerCPULimitRequestFactor: userContainerCPULimitRequestFactor, + UserContainerMemoryLimitRequestFactor: userContainerMemoryLimitRequestFactor, }) } @@ -235,7 +240,8 @@ func (sb *clusterSvcBuilder) NewEnsemblerService( project *mlp.Project, secretName string, knativeQueueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, initialScale *int, ) (*cluster.KnativeService, error) { // Get the ensembler reference @@ -302,16 +308,17 @@ func (sb *clusterSvcBuilder) NewEnsemblerService( Volumes: volumes, VolumeMounts: volumeMounts, }, - IsClusterLocal: true, - ContainerPort: int32(docker.Port), - MinReplicas: docker.ResourceRequest.MinReplica, - MaxReplicas: docker.ResourceRequest.MaxReplica, - InitialScale: initialScale, - AutoscalingMetric: string(docker.AutoscalingPolicy.Metric), - AutoscalingTarget: docker.AutoscalingPolicy.Target, - TopologySpreadConstraints: topologySpreadConstraints, - QueueProxyResourcePercentage: knativeQueueProxyResourcePercentage, - UserContainerLimitRequestFactor: userContainerLimitRequestFactor, + IsClusterLocal: true, + ContainerPort: int32(docker.Port), + MinReplicas: docker.ResourceRequest.MinReplica, + MaxReplicas: docker.ResourceRequest.MaxReplica, + InitialScale: initialScale, + AutoscalingMetric: string(docker.AutoscalingPolicy.Metric), + AutoscalingTarget: docker.AutoscalingPolicy.Target, + TopologySpreadConstraints: topologySpreadConstraints, + QueueProxyResourcePercentage: knativeQueueProxyResourcePercentage, + UserContainerCPULimitRequestFactor: userContainerCPULimitRequestFactor, + UserContainerMemoryLimitRequestFactor: userContainerMemoryLimitRequestFactor, }) } diff --git a/api/turing/cluster/servicebuilder/service_builder_test.go b/api/turing/cluster/servicebuilder/service_builder_test.go index 092971f9b..196585005 100644 --- a/api/turing/cluster/servicebuilder/service_builder_test.go +++ b/api/turing/cluster/servicebuilder/service_builder_test.go @@ -79,16 +79,17 @@ func TestNewEnricherService(t *testing.T) { }, VolumeMounts: []corev1.VolumeMount{{Name: secretVolume, MountPath: secretMountPath}}, }, - IsClusterLocal: true, - ContainerPort: 8080, - MinReplicas: 1, - MaxReplicas: 2, - InitialScale: &testInitialScale, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 10, - UserContainerLimitRequestFactor: 1.5, + IsClusterLocal: true, + ContainerPort: 8080, + MinReplicas: 1, + MaxReplicas: 2, + InitialScale: &testInitialScale, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 10, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "failure": { @@ -108,7 +109,7 @@ func TestNewEnricherService(t *testing.T) { Stream: "test-stream", Team: "test-team", } - svc, err := sb.NewEnricherService(routerVersion, project, "secret", 10, 1.5, data.initialScale) + svc, err := sb.NewEnricherService(routerVersion, project, "secret", 10, 0, 1.5, data.initialScale) if data.err == "" { assert.NoError(t, err) assert.Equal(t, data.expected, svc) @@ -157,16 +158,17 @@ func TestNewEnsemblerService(t *testing.T) { }, VolumeMounts: []corev1.VolumeMount{{Name: secretVolume, MountPath: secretMountPath}}, }, - IsClusterLocal: true, - ContainerPort: 8080, - MinReplicas: 2, - MaxReplicas: 3, - AutoscalingMetric: "concurrency", - AutoscalingTarget: "1", - InitialScale: &testInitialScale, - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + IsClusterLocal: true, + ContainerPort: 8080, + MinReplicas: 2, + MaxReplicas: 3, + AutoscalingMetric: "concurrency", + AutoscalingTarget: "1", + InitialScale: &testInitialScale, + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "success with ensembler docker type": { @@ -200,15 +202,16 @@ func TestNewEnsemblerService(t *testing.T) { }, VolumeMounts: []corev1.VolumeMount{{Name: secretVolume, MountPath: secretMountPath}}, }, - IsClusterLocal: true, - ContainerPort: 8080, - MinReplicas: 2, - MaxReplicas: 3, - AutoscalingMetric: "cpu", - AutoscalingTarget: "90", - TopologySpreadConstraints: testTopologySpreadConstraints, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.5, + IsClusterLocal: true, + ContainerPort: 8080, + MinReplicas: 2, + MaxReplicas: 3, + AutoscalingMetric: "cpu", + AutoscalingTarget: "90", + TopologySpreadConstraints: testTopologySpreadConstraints, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.5, }, }, "failure": { @@ -228,7 +231,7 @@ func TestNewEnsemblerService(t *testing.T) { Stream: "test-stream", Team: "test-team", } - svc, err := sb.NewEnsemblerService(routerVersion, project, "secret", 20, 1.5, data.initialScale) + svc, err := sb.NewEnsemblerService(routerVersion, project, "secret", 20, 0, 1.5, data.initialScale) if data.err == "" { assert.NoError(t, err) assert.Equal(t, data.expected, svc) diff --git a/api/turing/config/config.go b/api/turing/config/config.go index 1bccf49b1..42d974c79 100644 --- a/api/turing/config/config.go +++ b/api/turing/config/config.go @@ -263,8 +263,9 @@ type KubernetesLabelConfigs struct { // KnativeServiceDefaults captures some of the configurable defaults specific to // Knative services type KnativeServiceDefaults struct { - QueueProxyResourcePercentage int - UserContainerLimitRequestFactor float64 + QueueProxyResourcePercentage int + UserContainerCPULimitRequestFactor float64 `json:"userContainerLimitCPURequestFactor"` + UserContainerMemoryLimitRequestFactor float64 `json:"userContainerLimitMemoryRequestFactor"` } // SinglePageApplicationConfig holds configuration required for serving SPAs @@ -599,7 +600,8 @@ func setDefaultValues(v *viper.Viper) { v.SetDefault("DeployConfig::MaxAllowedReplica", "20") v.SetDefault("KnativeServiceDefaults::QueueProxyResourcePercentage", "30") - v.SetDefault("KnativeServiceDefaults::UserContainerLimitRequestFactor", "1") + v.SetDefault("KnativeServiceDefaults::UserContainerCPULimitRequestFactor", "0") + v.SetDefault("KnativeServiceDefaults::UserContainerMemoryLimitRequestFactor", "1") v.SetDefault("RouterDefaults::Image", "") v.SetDefault("RouterDefaults::FiberDebugLogEnabled", "false") diff --git a/api/turing/config/config_test.go b/api/turing/config/config_test.go index 1ea2bdb5f..b3bf52679 100644 --- a/api/turing/config/config_test.go +++ b/api/turing/config/config_test.go @@ -254,8 +254,9 @@ func TestLoad(t *testing.T) { MaxAllowedReplica: 20, }, KnativeServiceDefaults: &config.KnativeServiceDefaults{ - QueueProxyResourcePercentage: 30, - UserContainerLimitRequestFactor: 1, + QueueProxyResourcePercentage: 30, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1, }, RouterDefaults: &config.RouterDefaults{ LogLevel: "INFO", @@ -371,8 +372,9 @@ func TestLoad(t *testing.T) { }, }, KnativeServiceDefaults: &config.KnativeServiceDefaults{ - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.25, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.25, }, RouterDefaults: &config.RouterDefaults{ LogLevel: "INFO", @@ -520,8 +522,9 @@ func TestLoad(t *testing.T) { }, }, KnativeServiceDefaults: &config.KnativeServiceDefaults{ - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.25, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.25, }, RouterDefaults: &config.RouterDefaults{ LogLevel: "INFO", @@ -660,8 +663,9 @@ func TestLoad(t *testing.T) { MaxOpenConns: 4, }, KnativeServiceDefaults: &config.KnativeServiceDefaults{ - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.25, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 0, + UserContainerMemoryLimitRequestFactor: 1.25, }, DeployConfig: &config.DeploymentConfig{ EnvironmentType: "dev", diff --git a/api/turing/config/example.yaml b/api/turing/config/example.yaml index d8b7ae796..7dd467c49 100644 --- a/api/turing/config/example.yaml +++ b/api/turing/config/example.yaml @@ -125,7 +125,8 @@ KubernetesLabelConfigs: KnativeServiceDefaults: QueueProxyResourcePercentage: 30 # The CPU / memory limit will be applied as a factor of the requested value - UserContainerLimitRequestFactor: 1 + UserContainerCPULimitRequestFactor: 0 + UserContainerMemoryLimitRequestFactor: 1 # Spark App config for running on Kubernetes # This is specific to the environment that you Kubernetes cluster runs on. diff --git a/api/turing/config/testdata/config-1.yaml b/api/turing/config/testdata/config-1.yaml index 6e73ca7d2..ff2a7aeac 100644 --- a/api/turing/config/testdata/config-1.yaml +++ b/api/turing/config/testdata/config-1.yaml @@ -46,7 +46,8 @@ DeployConfig: MinAvailablePercentage: 20 KnativeServiceDefaults: QueueProxyResourcePercentage: 20 - UserContainerLimitRequestFactor: 1.25 + UserContainerCPULimitRequestFactor: 0 + UserContainerMemoryLimitRequestFactor: 1.25 RouterDefaults: FluentdConfig: FlushIntervalSeconds: 60 diff --git a/api/turing/service/router_deployment_service.go b/api/turing/service/router_deployment_service.go index 86edaa5b0..0c7772909 100644 --- a/api/turing/service/router_deployment_service.go +++ b/api/turing/service/router_deployment_service.go @@ -176,7 +176,8 @@ func (ds *deploymentService) DeployRouterVersion( secretName, experimentConfig, ds.routerDefaults, ds.sentryEnabled, ds.sentryDSN, ds.knativeServiceConfig.QueueProxyResourcePercentage, - ds.knativeServiceConfig.UserContainerLimitRequestFactor, + ds.knativeServiceConfig.UserContainerCPULimitRequestFactor, + ds.knativeServiceConfig.UserContainerMemoryLimitRequestFactor, ) if err != nil { return endpoint, err @@ -283,7 +284,8 @@ func (ds *deploymentService) UndeployRouterVersion( routerVersion, nil, project, ds.environmentType, "", nil, ds.routerDefaults, ds.sentryEnabled, ds.sentryDSN, ds.knativeServiceConfig.QueueProxyResourcePercentage, - ds.knativeServiceConfig.UserContainerLimitRequestFactor, + ds.knativeServiceConfig.UserContainerCPULimitRequestFactor, + ds.knativeServiceConfig.UserContainerMemoryLimitRequestFactor, ) if err != nil { return err @@ -372,7 +374,8 @@ func (ds *deploymentService) createServices( sentryEnabled bool, sentryDSN string, knativeQueueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, ) ([]*cluster.KnativeService, error) { services := []*cluster.KnativeService{} namespace := servicebuilder.GetNamespace(project) @@ -391,7 +394,8 @@ func (ds *deploymentService) createServices( } enricherSvc, err := ds.svcBuilder.NewEnricherService( routerVersion, project, secretName, - knativeQueueProxyResourcePercentage, userContainerLimitRequestFactor, currEnricherReplicas, + knativeQueueProxyResourcePercentage, userContainerCPULimitRequestFactor, + userContainerMemoryLimitRequestFactor, currEnricherReplicas, ) if err != nil { return services, err @@ -413,7 +417,8 @@ func (ds *deploymentService) createServices( } ensemblerSvc, err := ds.svcBuilder.NewEnsemblerService( routerVersion, project, secretName, - knativeQueueProxyResourcePercentage, userContainerLimitRequestFactor, currEnsemblerReplicas, + knativeQueueProxyResourcePercentage, userContainerCPULimitRequestFactor, + userContainerMemoryLimitRequestFactor, currEnsemblerReplicas, ) if err != nil { return services, err @@ -433,7 +438,8 @@ func (ds *deploymentService) createServices( routerService, err := ds.svcBuilder.NewRouterService( routerVersion, project, envType, secretName, experimentConfig, routerDefaults, sentryEnabled, sentryDSN, - knativeQueueProxyResourcePercentage, userContainerLimitRequestFactor, currRouterReplicas, + knativeQueueProxyResourcePercentage, userContainerCPULimitRequestFactor, + userContainerMemoryLimitRequestFactor, currRouterReplicas, ) if err != nil { return services, err diff --git a/api/turing/service/router_deployment_service_test.go b/api/turing/service/router_deployment_service_test.go index b6e33e89f..af7ecbcee 100644 --- a/api/turing/service/router_deployment_service_test.go +++ b/api/turing/service/router_deployment_service_test.go @@ -76,7 +76,8 @@ func (msb *mockClusterServiceBuilder) NewEnricherService( project *mlp.Project, _ string, queueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, initialScale *int, ) (*cluster.KnativeService, error) { if rv != msb.rv { @@ -87,8 +88,9 @@ func (msb *mockClusterServiceBuilder) NewEnricherService( Name: fmt.Sprintf("%s-enricher-%d", rv.Router.Name, rv.Version), Namespace: project.Name, }, - QueueProxyResourcePercentage: queueProxyResourcePercentage, - UserContainerLimitRequestFactor: userContainerLimitRequestFactor, + QueueProxyResourcePercentage: queueProxyResourcePercentage, + UserContainerCPULimitRequestFactor: userContainerCPULimitRequestFactor, + UserContainerMemoryLimitRequestFactor: userContainerMemoryLimitRequestFactor, }, nil } @@ -97,7 +99,8 @@ func (msb *mockClusterServiceBuilder) NewEnsemblerService( project *mlp.Project, _ string, queueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, initialScale *int, ) (*cluster.KnativeService, error) { if rv != msb.rv { @@ -108,8 +111,9 @@ func (msb *mockClusterServiceBuilder) NewEnsemblerService( Name: fmt.Sprintf("%s-ensembler-%d", rv.Router.Name, rv.Version), Namespace: project.Name, }, - QueueProxyResourcePercentage: queueProxyResourcePercentage, - UserContainerLimitRequestFactor: userContainerLimitRequestFactor, + QueueProxyResourcePercentage: queueProxyResourcePercentage, + UserContainerCPULimitRequestFactor: userContainerCPULimitRequestFactor, + UserContainerMemoryLimitRequestFactor: userContainerMemoryLimitRequestFactor, }, nil } @@ -123,7 +127,8 @@ func (msb *mockClusterServiceBuilder) NewRouterService( sentryEnabled bool, sentryDSN string, queueProxyResourcePercentage int, - userContainerLimitRequestFactor float64, + userContainerCPULimitRequestFactor float64, + userContainerMemoryLimitRequestFactor float64, initialScale *int, ) (*cluster.KnativeService, error) { if rv != msb.rv { @@ -145,8 +150,9 @@ func (msb *mockClusterServiceBuilder) NewRouterService( Data: string(expConfig), }, }, - QueueProxyResourcePercentage: queueProxyResourcePercentage, - UserContainerLimitRequestFactor: userContainerLimitRequestFactor, + QueueProxyResourcePercentage: queueProxyResourcePercentage, + UserContainerCPULimitRequestFactor: userContainerCPULimitRequestFactor, + UserContainerMemoryLimitRequestFactor: userContainerMemoryLimitRequestFactor, }, nil } @@ -234,8 +240,9 @@ func TestDeployEndpoint(t *testing.T) { sentryEnabled: true, sentryDSN: "test:dsn", knativeServiceConfig: &config.KnativeServiceDefaults{ - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.75, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 1.75, + UserContainerMemoryLimitRequestFactor: 1.75, }, clusterControllers: map[string]cluster.Controller{ testEnv: controller, @@ -300,16 +307,18 @@ func TestDeployEndpoint(t *testing.T) { Name: fmt.Sprintf("%s-enricher-%d", routerVersion.Router.Name, routerVersion.Version), Namespace: testNamespace, }, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.75, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 1.75, + UserContainerMemoryLimitRequestFactor: 1.75, }) controller.AssertCalled(t, "DeployKnativeService", mock.Anything, &cluster.KnativeService{ BaseService: &cluster.BaseService{ Name: fmt.Sprintf("%s-ensembler-%d", routerVersion.Router.Name, routerVersion.Version), Namespace: testNamespace, }, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.75, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 1.75, + UserContainerMemoryLimitRequestFactor: 1.75, }) controller.AssertCalled(t, "ApplyConfigMap", mock.Anything, testNamespace, &cluster.ConfigMap{Name: fmt.Sprintf("%s-fiber-config-%d", routerVersion.Router.Name, routerVersion.Version)}) @@ -331,8 +340,9 @@ func TestDeployEndpoint(t *testing.T) { ), }, }, - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.75, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 1.75, + UserContainerMemoryLimitRequestFactor: 1.75, }) controller.AssertCalled(t, "CreateSecret", mock.Anything, &cluster.Secret{ Name: fmt.Sprintf("%s-svc-acct-secret-%d", routerVersion.Router.Name, routerVersion.Version), @@ -431,8 +441,9 @@ func TestDeleteEndpoint(t *testing.T) { deploymentTimeout: timeout, deploymentDeletionTimeout: timeout, knativeServiceConfig: &config.KnativeServiceDefaults{ - QueueProxyResourcePercentage: 20, - UserContainerLimitRequestFactor: 1.75, + QueueProxyResourcePercentage: 20, + UserContainerCPULimitRequestFactor: 1.75, + UserContainerMemoryLimitRequestFactor: 1.75, }, clusterControllers: map[string]cluster.Controller{ testEnv: controller,