diff --git a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml index 0317f5e882..fdcb7d6288 100644 --- a/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml +++ b/helm-chart/kuberay-operator/crds/ray.io_rayservices.yaml @@ -12144,6 +12144,300 @@ spec: required: - importPath type: object + serveService: + description: ServeService is the Kubernetes service for head node + and worker nodes who have healthy http proxy to + properties: + apiVersion: + description: APIVersion defines the versioned schema of this representation + of an object. + type: string + kind: + description: Kind is a string value representing the REST resource + this object represents. + type: string + metadata: + description: 'Standard object''s metadata. More info: https://git.k8s.' + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string + type: object + spec: + description: Spec defines the behavior of a service. https://git.k8s. + properties: + allocateLoadBalancerNodePorts: + description: allocateLoadBalancerNodePorts defines if NodePorts + will be automatically allocated for services with + type: boolean + clusterIP: + description: clusterIP is the IP address of the service and + is usually assigned randomly. + type: string + clusterIPs: + description: ClusterIPs is a list of IP addresses assigned + to this service, and are usually assigned randomly. + items: + type: string + type: array + x-kubernetes-list-type: atomic + externalIPs: + description: externalIPs is a list of IP addresses for which + nodes in the cluster will also accept traffic for th + items: + type: string + type: array + externalName: + description: externalName is the external reference that discovery + mechanisms will return as an alias for this se + type: string + externalTrafficPolicy: + description: externalTrafficPolicy denotes if this Service + desires to route external traffic to node-local or clu + type: string + healthCheckNodePort: + description: healthCheckNodePort specifies the healthcheck + nodePort for the service. + format: int32 + type: integer + internalTrafficPolicy: + description: InternalTrafficPolicy specifies if the cluster + internal traffic should be routed to all endpoints or + type: string + ipFamilies: + description: IPFamilies is a list of IP families (e.g. IPv4, + IPv6) assigned to this service. + items: + description: IPFamily represents the IP Family (IPv4 or + IPv6). + type: string + type: array + x-kubernetes-list-type: atomic + ipFamilyPolicy: + description: IPFamilyPolicy represents the dual-stack-ness + requested or required by this Service. + type: string + loadBalancerClass: + description: loadBalancerClass is the class of the load balancer + implementation this Service belongs to. + type: string + loadBalancerIP: + description: 'Only applies to Service Type: LoadBalancer LoadBalancer + will get created with the IP specified in th' + type: string + loadBalancerSourceRanges: + description: If specified and supported by the platform, this + will restrict traffic through the cloud-provider lo + items: + type: string + type: array + ports: + description: 'The list of ports that are exposed by this service. + More info: https://kubernetes.' + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: The application protocol for this port. + This field follows standard Kubernetes label syntax. + type: string + name: + description: The name of this port within the service. + This must be a DNS_LABEL. + type: string + nodePort: + description: The port on each node on which this service + is exposed when type is NodePort or LoadBalancer. + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: The IP protocol for this port. Supports + "TCP", "UDP", and "SCTP". Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the pods targeted by the service. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array + x-kubernetes-list-map-keys: + - port + - protocol + x-kubernetes-list-type: map + publishNotReadyAddresses: + description: publishNotReadyAddresses indicates that any agent + which deals with endpoints for this Service should + type: boolean + selector: + additionalProperties: + type: string + description: Route service traffic to pods with label keys + and values matching this selector. + type: object + x-kubernetes-map-type: atomic + sessionAffinity: + description: Supports "ClientIP" and "None". Used to maintain + session affinity. + type: string + sessionAffinityConfig: + description: sessionAffinityConfig contains the configurations + of session affinity. + properties: + clientIP: + description: clientIP contains the configurations of Client + IP based session affinity. + properties: + timeoutSeconds: + description: timeoutSeconds specifies the seconds + of ClientIP type session sticky time. + format: int32 + type: integer + type: object + type: object + type: + description: type determines how the Service is exposed. Defaults + to ClusterIP. + type: string + type: object + status: + description: Most recently observed status of the service. Populated + by the system. Read-only. + properties: + conditions: + description: Current service state + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: lastTransitionTime is the last time the + condition transitioned from one status to another. + format: date-time + type: string + message: + description: message is a human readable message indicating + details about the transition. + maxLength: 32768 + type: string + observedGeneration: + description: observedGeneration represents the .metadata.generation + that the condition was set based upon. + format: int64 + minimum: 0 + type: integer + reason: + description: reason contains a programmatic identifier + indicating the reason for the condition's last transition. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + --- Many .condition. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + loadBalancer: + description: LoadBalancer contains the current status of the + load-balancer, if one is present. + properties: + ingress: + description: Ingress is a list containing ingress points + for the load-balancer. + items: + description: 'LoadBalancerIngress represents the status + of a load-balancer ingress point: traffic intended + for the' + properties: + hostname: + description: Hostname is set for load-balancer ingress + points that are DNS based (typically AWS load-balancers) + type: string + ip: + description: IP is set for load-balancer ingress + points that are IP based (typically GCE or OpenStack + load-balanc + type: string + ports: + description: Ports is a list of records of service + ports If used, every port defined in the service + should have a + items: + properties: + error: + description: Error is to record the problem + with the service port The format of the + error shall comply with the f + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + port: + description: Port is the port number of the + service port of which status is recorded + here + format: int32 + type: integer + protocol: + default: TCP + description: Protocol is the protocol of the + service port of which status is recorded + here The supported values a + type: string + required: + - port + - protocol + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: array + type: object + type: object + type: object serviceUnhealthySecondThreshold: format: int32 type: integer diff --git a/ray-operator/apis/ray/v1alpha1/rayservice_types.go b/ray-operator/apis/ray/v1alpha1/rayservice_types.go index 4c1618703a..824e48a6c1 100644 --- a/ray-operator/apis/ray/v1alpha1/rayservice_types.go +++ b/ray-operator/apis/ray/v1alpha1/rayservice_types.go @@ -1,6 +1,7 @@ package v1alpha1 import ( + v1 "k8s.io/api/core/v1" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) @@ -52,6 +53,8 @@ type RayServiceSpec struct { RayClusterSpec RayClusterSpec `json:"rayClusterConfig,omitempty"` ServiceUnhealthySecondThreshold *int32 `json:"serviceUnhealthySecondThreshold,omitempty"` DeploymentUnhealthySecondThreshold *int32 `json:"deploymentUnhealthySecondThreshold,omitempty"` + // ServeService is the Kubernetes service for head node and worker nodes who have healthy http proxy to serve traffics. + ServeService *v1.Service `json:"serveService,omitempty"` } type ServeDeploymentGraphSpec struct { @@ -142,9 +145,9 @@ type ServeDeploymentStatus struct { HealthLastUpdateTime *metav1.Time `json:"healthLastUpdateTime,omitempty"` } -//+kubebuilder:object:root=true -//+kubebuilder:subresource:status -//+genclient +// +kubebuilder:object:root=true +// +kubebuilder:subresource:status +// +genclient // RayService is the Schema for the rayservices API type RayService struct { metav1.TypeMeta `json:",inline"` diff --git a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go index c4d2b00433..bad7833c4e 100644 --- a/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go +++ b/ray-operator/apis/ray/v1alpha1/zz_generated.deepcopy.go @@ -532,6 +532,11 @@ func (in *RayServiceSpec) DeepCopyInto(out *RayServiceSpec) { *out = new(int32) **out = **in } + if in.ServeService != nil { + in, out := &in.ServeService, &out.ServeService + *out = new(v1.Service) + (*in).DeepCopyInto(*out) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new RayServiceSpec. diff --git a/ray-operator/config/crd/bases/ray.io_rayservices.yaml b/ray-operator/config/crd/bases/ray.io_rayservices.yaml index 0317f5e882..fdcb7d6288 100644 --- a/ray-operator/config/crd/bases/ray.io_rayservices.yaml +++ b/ray-operator/config/crd/bases/ray.io_rayservices.yaml @@ -12144,6 +12144,300 @@ spec: required: - importPath type: object + serveService: + description: ServeService is the Kubernetes service for head node + and worker nodes who have healthy http proxy to + properties: + apiVersion: + description: APIVersion defines the versioned schema of this representation + of an object. + type: string + kind: + description: Kind is a string value representing the REST resource + this object represents. + type: string + metadata: + description: 'Standard object''s metadata. More info: https://git.k8s.' + properties: + annotations: + additionalProperties: + type: string + type: object + finalizers: + items: + type: string + type: array + labels: + additionalProperties: + type: string + type: object + name: + type: string + namespace: + type: string + type: object + spec: + description: Spec defines the behavior of a service. https://git.k8s. + properties: + allocateLoadBalancerNodePorts: + description: allocateLoadBalancerNodePorts defines if NodePorts + will be automatically allocated for services with + type: boolean + clusterIP: + description: clusterIP is the IP address of the service and + is usually assigned randomly. + type: string + clusterIPs: + description: ClusterIPs is a list of IP addresses assigned + to this service, and are usually assigned randomly. + items: + type: string + type: array + x-kubernetes-list-type: atomic + externalIPs: + description: externalIPs is a list of IP addresses for which + nodes in the cluster will also accept traffic for th + items: + type: string + type: array + externalName: + description: externalName is the external reference that discovery + mechanisms will return as an alias for this se + type: string + externalTrafficPolicy: + description: externalTrafficPolicy denotes if this Service + desires to route external traffic to node-local or clu + type: string + healthCheckNodePort: + description: healthCheckNodePort specifies the healthcheck + nodePort for the service. + format: int32 + type: integer + internalTrafficPolicy: + description: InternalTrafficPolicy specifies if the cluster + internal traffic should be routed to all endpoints or + type: string + ipFamilies: + description: IPFamilies is a list of IP families (e.g. IPv4, + IPv6) assigned to this service. + items: + description: IPFamily represents the IP Family (IPv4 or + IPv6). + type: string + type: array + x-kubernetes-list-type: atomic + ipFamilyPolicy: + description: IPFamilyPolicy represents the dual-stack-ness + requested or required by this Service. + type: string + loadBalancerClass: + description: loadBalancerClass is the class of the load balancer + implementation this Service belongs to. + type: string + loadBalancerIP: + description: 'Only applies to Service Type: LoadBalancer LoadBalancer + will get created with the IP specified in th' + type: string + loadBalancerSourceRanges: + description: If specified and supported by the platform, this + will restrict traffic through the cloud-provider lo + items: + type: string + type: array + ports: + description: 'The list of ports that are exposed by this service. + More info: https://kubernetes.' + items: + description: ServicePort contains information on service's + port. + properties: + appProtocol: + description: The application protocol for this port. + This field follows standard Kubernetes label syntax. + type: string + name: + description: The name of this port within the service. + This must be a DNS_LABEL. + type: string + nodePort: + description: The port on each node on which this service + is exposed when type is NodePort or LoadBalancer. + format: int32 + type: integer + port: + description: The port that will be exposed by this service. + format: int32 + type: integer + protocol: + default: TCP + description: The IP protocol for this port. Supports + "TCP", "UDP", and "SCTP". Default is TCP. + type: string + targetPort: + anyOf: + - type: integer + - type: string + description: Number or name of the port to access on + the pods targeted by the service. + x-kubernetes-int-or-string: true + required: + - port + type: object + type: array + x-kubernetes-list-map-keys: + - port + - protocol + x-kubernetes-list-type: map + publishNotReadyAddresses: + description: publishNotReadyAddresses indicates that any agent + which deals with endpoints for this Service should + type: boolean + selector: + additionalProperties: + type: string + description: Route service traffic to pods with label keys + and values matching this selector. + type: object + x-kubernetes-map-type: atomic + sessionAffinity: + description: Supports "ClientIP" and "None". Used to maintain + session affinity. + type: string + sessionAffinityConfig: + description: sessionAffinityConfig contains the configurations + of session affinity. + properties: + clientIP: + description: clientIP contains the configurations of Client + IP based session affinity. + properties: + timeoutSeconds: + description: timeoutSeconds specifies the seconds + of ClientIP type session sticky time. + format: int32 + type: integer + type: object + type: object + type: + description: type determines how the Service is exposed. Defaults + to ClusterIP. + type: string + type: object + status: + description: Most recently observed status of the service. Populated + by the system. Read-only. + properties: + conditions: + description: Current service state + items: + description: Condition contains details for one aspect of + the current state of this API Resource. + properties: + lastTransitionTime: + description: lastTransitionTime is the last time the + condition transitioned from one status to another. + format: date-time + type: string + message: + description: message is a human readable message indicating + details about the transition. + maxLength: 32768 + type: string + observedGeneration: + description: observedGeneration represents the .metadata.generation + that the condition was set based upon. + format: int64 + minimum: 0 + type: integer + reason: + description: reason contains a programmatic identifier + indicating the reason for the condition's last transition. + maxLength: 1024 + minLength: 1 + pattern: ^[A-Za-z]([A-Za-z0-9_,:]*[A-Za-z0-9_])?$ + type: string + status: + description: status of the condition, one of True, False, + Unknown. + enum: + - "True" + - "False" + - Unknown + type: string + type: + description: type of condition in CamelCase or in foo.example.com/CamelCase. + --- Many .condition. + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + required: + - lastTransitionTime + - message + - reason + - status + - type + type: object + type: array + x-kubernetes-list-map-keys: + - type + x-kubernetes-list-type: map + loadBalancer: + description: LoadBalancer contains the current status of the + load-balancer, if one is present. + properties: + ingress: + description: Ingress is a list containing ingress points + for the load-balancer. + items: + description: 'LoadBalancerIngress represents the status + of a load-balancer ingress point: traffic intended + for the' + properties: + hostname: + description: Hostname is set for load-balancer ingress + points that are DNS based (typically AWS load-balancers) + type: string + ip: + description: IP is set for load-balancer ingress + points that are IP based (typically GCE or OpenStack + load-balanc + type: string + ports: + description: Ports is a list of records of service + ports If used, every port defined in the service + should have a + items: + properties: + error: + description: Error is to record the problem + with the service port The format of the + error shall comply with the f + maxLength: 316 + pattern: ^([a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*/)?(([A-Za-z0-9][-A-Za-z0-9_.]*)?[A-Za-z0-9])$ + type: string + port: + description: Port is the port number of the + service port of which status is recorded + here + format: int32 + type: integer + protocol: + default: TCP + description: Protocol is the protocol of the + service port of which status is recorded + here The supported values a + type: string + required: + - port + - protocol + type: object + type: array + x-kubernetes-list-type: atomic + type: object + type: array + type: object + type: object + type: object serviceUnhealthySecondThreshold: format: int32 type: integer diff --git a/ray-operator/config/samples/ray-service.custom-serve-service.yaml b/ray-operator/config/samples/ray-service.custom-serve-service.yaml new file mode 100644 index 0000000000..aa52c04277 --- /dev/null +++ b/ray-operator/config/samples/ray-service.custom-serve-service.yaml @@ -0,0 +1,116 @@ +# Make sure to increase resource requests and limits before using this example in production. +# For examples with more realistic resource configuration, see +# ray-cluster.complete.large.yaml and +# ray-cluster.autoscaler.large.yaml. +apiVersion: ray.io/v1alpha1 +kind: RayService +metadata: + name: rayservice-sample +spec: + serviceUnhealthySecondThreshold: 300 # Config for the health check threshold for service. Default value is 60. + deploymentUnhealthySecondThreshold: 300 # Config for the health check threshold for deployments. Default value is 60. + serveService: + metadata: + name: custom-ray-serve-service-name + labels: + custom-label: custom-ray-serve-service-label + annotations: + custom-annotation: custom-ray-serve-service-annotation + spec: + type: LoadBalancer + ports: + - port: 12345 + targetPort: 12345 + name: custom-ray-serve-service-port + serveConfig: + importPath: fruit.deployment_graph + runtimeEnv: | + working_dir: "https://github.com/ray-project/test_dag/archive/41d09119cbdf8450599f993f51318e9e27c59098.zip" + deployments: + - name: MangoStand + numReplicas: 1 + userConfig: | + price: 3 + rayActorOptions: + numCpus: 0.1 + - name: OrangeStand + numReplicas: 1 + userConfig: | + price: 2 + rayActorOptions: + numCpus: 0.1 + - name: PearStand + numReplicas: 1 + userConfig: | + price: 1 + rayActorOptions: + numCpus: 0.1 + - name: FruitMarket + numReplicas: 1 + rayActorOptions: + numCpus: 0.1 + - name: DAGDriver + numReplicas: 1 + routePrefix: "/" + rayActorOptions: + numCpus: 0.1 + rayClusterConfig: + rayVersion: '2.4.0' # should match the Ray version in the image of the containers + ######################headGroupSpecs################################# + # Ray head pod template. + headGroupSpec: + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. + rayStartParams: + dashboard-host: '0.0.0.0' + #pod template + template: + spec: + containers: + - name: ray-head + image: rayproject/ray:2.4.0 + resources: + limits: + cpu: 2 + memory: 2Gi + requests: + cpu: 2 + memory: 2Gi + ports: + - containerPort: 6379 + name: gcs-server + - containerPort: 8265 # Ray dashboard + name: dashboard + - containerPort: 10001 + name: client + - containerPort: 8000 + name: serve + workerGroupSpecs: + # the pod replicas in this group typed worker + - replicas: 1 + minReplicas: 1 + maxReplicas: 5 + # logical group name, for this called small-group, also can be functional + groupName: small-group + # The `rayStartParams` are used to configure the `ray start` command. + # See https://github.com/ray-project/kuberay/blob/master/docs/guidance/rayStartParams.md for the default settings of `rayStartParams` in KubeRay. + # See https://docs.ray.io/en/latest/cluster/cli.html#ray-start for all available options in `rayStartParams`. + rayStartParams: {} + #pod template + template: + spec: + containers: + - name: ray-worker # must consist of lower case alphanumeric characters or '-', and must start and end with an alphanumeric character (e.g. 'my-name', or '123-abc' + image: rayproject/ray:2.4.0 + lifecycle: + preStop: + exec: + command: ["/bin/sh","-c","ray stop"] + resources: + limits: + cpu: "1" + memory: "2Gi" + requests: + cpu: "500m" + memory: "2Gi" diff --git a/ray-operator/controllers/ray/common/service.go b/ray-operator/controllers/ray/common/service.go index 7b07308452..114871e3ac 100644 --- a/ray-operator/controllers/ray/common/service.go +++ b/ray-operator/controllers/ray/common/service.go @@ -68,15 +68,6 @@ func BuildServiceForHeadPod(cluster rayv1alpha1.RayCluster, labels map[string]st // Deep copy the HeadService to avoid modifying the original object headService := cluster.Spec.HeadGroupSpec.HeadService.DeepCopy() - // For the Labels field, merge labels_for_service with custom HeadService labels. - // If there are overlaps, ignore the custom HeadService labels. - if headService.ObjectMeta.Labels == nil { - headService.ObjectMeta.Labels = make(map[string]string) - } - for k, v := range labels_for_service { - headService.ObjectMeta.Labels[k] = v - } - // For the selector, ignore any custom HeadService selectors or labels. headService.Spec.Selector = selector @@ -99,37 +90,10 @@ func BuildServiceForHeadPod(cluster rayv1alpha1.RayCluster, labels map[string]st // Append default ports. headService.Spec.Ports = append(headService.Spec.Ports, ports...) - // If the user has not specified a name, generate one - if headService.ObjectMeta.Name == "" { - headService.ObjectMeta.Name = default_name - log.Info("Using default name for head service.", "default_name", default_name) - } else { - log.Info("Overriding default name for head service with provided name in HeadGroupSpec.HeadService", - "default_name", default_name, - "provided_name", headService.ObjectMeta.Name) - } - // If the user has specified a namespace, ignore it and raise a warning - if headService.ObjectMeta.Namespace != "" && headService.ObjectMeta.Namespace != default_namespace { - log.Info("Ignoring namespace provided in HeadGroupSpec.HeadService", - "provided_namespace", headService.ObjectMeta.Namespace, - "headService_name", headService.ObjectMeta.Name, - "default_namespace", default_namespace) - } - headService.ObjectMeta.Namespace = default_namespace - - // If the user has not specified a service type, use the cluster's service type - if headService.Spec.Type == "" { - headService.Spec.Type = default_type - log.Info("Using HeadGroupSpec.ServiceType for head service", - "HeadGroupSpec.ServiceType", default_type, - "headService.ObjectMeta.Name", headService.ObjectMeta.Name) - } else { - log.Info("Overriding HeadGroupSpec.ServiceType for head service with provided type in HeadGroupSpec.HeadService.Spec.Type", - "HeadGroupSpec.ServiceType", default_type, - "headService.ObjectMeta.Name", headService.ObjectMeta.Name, - "HeadGroupSpec.ServiceType", default_type, - "HeadGroupSpec.HeadService.Spec.Type", headService.Spec.Type) - } + setLabelsforUserProvidedService(headService, labels_for_service) + setNameforUserProvidedService(headService, default_name) + setNamespaceforUserProvidedService(headService, default_namespace) + setServiceTypeForUserProvidedService(headService, default_type) return headService, nil } @@ -190,29 +154,72 @@ func BuildServeServiceForRayService(rayService rayv1alpha1.RayService, rayCluste RayClusterServingServiceLabelKey: EnableRayClusterServingServiceTrue, } - service := &corev1.Service{ + default_name := utils.GenerateServeServiceName(rayService.Name) + default_namespace := rayService.Namespace + default_type := rayService.Spec.RayClusterSpec.HeadGroupSpec.ServiceType + + // `ports_int` is a map of port names to port numbers, while `ports` is a list of ServicePort objects + ports_int := getServicePorts(rayCluster) + ports := []corev1.ServicePort{} + for name, port := range ports_int { + if name == DefaultServingPortName { + svcPort := corev1.ServicePort{Name: name, Port: port} + ports = append(ports, svcPort) + break + } + } + + if rayService.Spec.ServeService != nil { + // Use the provided "custom" ServeService. + // Deep copy the ServeService to avoid modifying the original object + serveService := rayService.Spec.ServeService.DeepCopy() + + // For the selector, ignore any custom ServeService selectors or labels. + serveService.Spec.Selector = selectorLabels + + if serveService.ObjectMeta.Annotations == nil { + serveService.ObjectMeta.Annotations = make(map[string]string) + } + + // Add port with name "serve" if it is already not added and ignore any custom ports + // Keeping this consistentent with adding only serve port in serve service + if len(ports) != 0 { + log.Info("port with name 'serve' already added. Ignoring user provided ports for serve service") + serveService.Spec.Ports = ports + } else { + ports := []corev1.ServicePort{} + for _, port := range serveService.Spec.Ports { + if port.Name == DefaultServingPortName { + svcPort := corev1.ServicePort{Name: port.Name, Port: port.Port} + ports = append(ports, svcPort) + break + } + } + serveService.Spec.Ports = ports + } + + setLabelsforUserProvidedService(serveService, labels) + setNameforUserProvidedService(serveService, default_name) + setNamespaceforUserProvidedService(serveService, default_namespace) + setServiceTypeForUserProvidedService(serveService, default_type) + + return serveService, nil + } + + serveService := &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ - Name: utils.GenerateServeServiceName(rayService.Name), - Namespace: rayService.Namespace, + Name: default_name, + Namespace: default_namespace, Labels: labels, }, Spec: corev1.ServiceSpec{ Selector: selectorLabels, - Ports: []corev1.ServicePort{}, - Type: rayService.Spec.RayClusterSpec.HeadGroupSpec.ServiceType, + Ports: ports, + Type: default_type, }, } - ports := getServicePorts(rayCluster) - for name, port := range ports { - if name == DefaultServingPortName { - svcPort := corev1.ServicePort{Name: name, Port: port} - service.Spec.Ports = append(service.Spec.Ports, svcPort) - break - } - } - - return service, nil + return serveService, nil } // BuildDashboardService Builds the service for dashboard agent and head node. @@ -249,6 +256,57 @@ func BuildDashboardService(cluster rayv1alpha1.RayCluster) (*corev1.Service, err return service, nil } +func setServiceTypeForUserProvidedService(service *corev1.Service, default_type corev1.ServiceType) { + // If the user has not specified a service type, use the default service type + if service.Spec.Type == "" { + log.Info("Using default serviceType passed for the user provided service", + "default_type passed", default_type, + "service.ObjectMeta.Name", service.ObjectMeta.Name) + service.Spec.Type = default_type + } else { + log.Info("Overriding default serviceType with user provided serviceType", + "default_type passed", default_type, + "service.ObjectMeta.Name", service.ObjectMeta.Name, + "default_type passed", default_type, + "service.Spec.Type", service.Spec.Type) + } +} + +func setNamespaceforUserProvidedService(service *corev1.Service, default_namespace string) { + // If the user has specified a namespace, ignore it and raise a warning + if service.ObjectMeta.Namespace != "" && service.ObjectMeta.Namespace != default_namespace { + log.Info("Ignoring namespace in user provided service", + "provided_namespace", service.ObjectMeta.Namespace, + "service_name", service.ObjectMeta.Name, + "default_namespace", default_namespace) + } + + service.ObjectMeta.Namespace = default_namespace +} + +func setNameforUserProvidedService(service *corev1.Service, default_name string) { + // If the user has not specified a name, use the default name passed + if service.ObjectMeta.Name == "" { + log.Info("Using default name for user provided service.", "default_name", default_name) + service.ObjectMeta.Name = default_name + } else { + log.Info("Overriding default name for user provided service with name in service.ObjectMeta.Name.", + "default_name", default_name, + "provided_name", service.ObjectMeta.Name) + } +} + +func setLabelsforUserProvidedService(service *corev1.Service, labels map[string]string) { + // For the Labels field, merge labels with user provided labels. + // If there are overlaps, ignore the user provided Service labels. + if service.ObjectMeta.Labels == nil { + service.ObjectMeta.Labels = make(map[string]string) + } + for k, v := range labels { + service.ObjectMeta.Labels[k] = v + } +} + // getServicePorts will either user passing ports or default ports to create service. func getServicePorts(cluster rayv1alpha1.RayCluster) map[string]int32 { ports, err := getPortsFromCluster(cluster) diff --git a/ray-operator/controllers/ray/common/service_test.go b/ray-operator/controllers/ray/common/service_test.go index 982c6d00c6..26abdcf2b6 100644 --- a/ray-operator/controllers/ray/common/service_test.go +++ b/ray-operator/controllers/ray/common/service_test.go @@ -21,7 +21,21 @@ var ( headServiceAnnotationValue1 = "HeadServiceAnnotationValue1" headServiceAnnotationKey2 = "HeadServiceAnnotationKey2" headServiceAnnotationValue2 = "HeadServiceAnnotationValue2" - instanceWithWrongSvc = &rayv1alpha1.RayCluster{ + serviceInstance = &rayv1alpha1.RayService{ + ObjectMeta: metav1.ObjectMeta{ + Name: "rayservice-sample", + Namespace: "default", + }, + Spec: rayv1alpha1.RayServiceSpec{ + RayClusterSpec: rayv1alpha1.RayClusterSpec{ + RayVersion: "1.0", + HeadGroupSpec: rayv1alpha1.HeadGroupSpec{ + ServiceType: corev1.ServiceTypeClusterIP, + }, + }, + }, + } + instanceWithWrongSvc = &rayv1alpha1.RayCluster{ ObjectMeta: metav1.ObjectMeta{ Name: "raycluster-sample", Namespace: "default", @@ -61,6 +75,10 @@ var ( { ContainerPort: 8265, }, + { + ContainerPort: 8000, + Name: "serve", + }, }, Command: []string{"python"}, Args: []string{"/opt/code.py"}, @@ -249,13 +267,6 @@ func TestUserSpecifiedHeadService(t *testing.T) { if err != nil { t.Errorf("failed to build head service: %v", err) } - // The user-provided namespace should be ignored, but the name should be respected - if headService.ObjectMeta.Namespace != testRayClusterWithHeadService.ObjectMeta.Namespace { - t.Errorf("User-provided namespace should be ignored: expected namespace=%s, actual namespace=%s", testRayClusterWithHeadService.ObjectMeta.Namespace, headService.ObjectMeta.Namespace) - } - if headService.ObjectMeta.Name != userName { - t.Errorf("User-provided name should be respected: expected name=%s, actual name=%s", userName, headService.ObjectMeta.Name) - } // The selector field should only use the keys from the five default labels. The values should be updated with the values from the template labels. // The user-provided HeadService labels should be ignored for the purposes of the selector field. The user-provided Selector field should be ignored. @@ -318,11 +329,6 @@ func TestUserSpecifiedHeadService(t *testing.T) { t.Errorf("Final labels should contain key=%s", k) } } - for k := range userLabels { - if _, ok := headService.ObjectMeta.Labels[k]; !ok { - t.Errorf("Final labels should contain key=%s", k) - } - } // Test merged annotations. In the case of overlap (HeadServiceAnnotationKey1) the user annotation should be ignored. for k, v := range userAnnotations { @@ -360,18 +366,9 @@ func TestUserSpecifiedHeadService(t *testing.T) { } } - // Test name and namespace are generated if not specified - if headService.ObjectMeta.Name == "" { - t.Errorf("Generated head service name is empty") - } - if headService.ObjectMeta.Namespace == "" { - t.Errorf("Generated head service namespace is empty") - } - - // Test that the user service type takes priority over the default service type (ClusterIP) - if headService.Spec.Type != userType { - t.Errorf("Generated head service type is not %s", userType) - } + validateServiceTypeForUserSpecifiedService(headService, userType, t) + validateLabelsForUserSpecifiedService(headService, userLabels, t) + validateNameAndNamespaceForUserSpecifiedService(headService, testRayClusterWithHeadService.ObjectMeta.Namespace, userName, t) } func TestNilMapDoesntErrorInUserSpecifiedHeadService(t *testing.T) { @@ -405,3 +402,131 @@ func TestBuildServiceForHeadPodPortsOrder(t *testing.T) { assert.Equal(t, ports1[i].Name, ports2[i].Name) } } + +func TestBuildServeServiceForRayService(t *testing.T) { + svc, err := BuildServeServiceForRayService(*serviceInstance, *instanceWithWrongSvc) + assert.Nil(t, err) + + actualResult := svc.Spec.Selector[RayClusterLabelKey] + expectedResult := string(instanceWithWrongSvc.Name) + if !reflect.DeepEqual(expectedResult, actualResult) { + t.Fatalf("Expected `%v` but got `%v`", expectedResult, actualResult) + } + + actualLabel := svc.Labels[RayServiceLabelKey] + expectedLabel := string(serviceInstance.Name) + if !reflect.DeepEqual(expectedLabel, actualLabel) { + t.Fatalf("Expected `%v` but got `%v`", expectedLabel, actualLabel) + } + + actualType := svc.Spec.Type + expectedType := corev1.ServiceTypeClusterIP + if !reflect.DeepEqual(expectedType, actualType) { + t.Fatalf("Expected `%v` but got `%v`", expectedType, actualType) + } + + expectedName := fmt.Sprintf("%s-%s-%s", serviceInstance.Name, "serve", "svc") + validateNameAndNamespaceForUserSpecifiedService(svc, serviceInstance.ObjectMeta.Namespace, expectedName, t) +} + +func TestUserSpecifiedServeService(t *testing.T) { + // Use any RayService instance as a base for the test. + testRayServiceWithServeService := serviceInstance.DeepCopy() + + userName := "user-custom-name" + userNamespace := "user-custom-namespace" + userLabels := map[string]string{"userLabelKey": "userLabelValue", RayClusterLabelKey: "userClusterName"} // Override default cluster name + userAnnotations := map[string]string{"userAnnotationKey": "userAnnotationValue", "userAnnotationKey2": "userAnnotationValue2"} + userPort := corev1.ServicePort{Name: "serve", Port: 12345} + userPortOverride := corev1.ServicePort{Name: DefaultClientPortName, Port: 98765} // Override default client port (10001) + userPorts := []corev1.ServicePort{userPort, userPortOverride} + userSelector := map[string]string{"userSelectorKey": "userSelectorValue", RayClusterLabelKey: "userSelectorClusterName"} + // Specify a "LoadBalancer" type, which differs from the default "ClusterIP" type. + userType := corev1.ServiceTypeLoadBalancer + + testRayServiceWithServeService.Spec.ServeService = &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: userName, + Namespace: userNamespace, + Labels: userLabels, + Annotations: userAnnotations, + }, + Spec: corev1.ServiceSpec{ + Ports: userPorts, + Selector: userSelector, + Type: userType, + }, + } + + svc, err := BuildServeServiceForRayService(*testRayServiceWithServeService, *instanceWithWrongSvc) + if err != nil { + t.Errorf("failed to build serve service: %v", err) + } + + // Check every annotation is in the service annotation + for k := range userAnnotations { + if _, ok := svc.ObjectMeta.Annotations[k]; !ok { + t.Errorf("Final labels should contain key=%s", k) + } + } + + // Check that selectors only have default selectors + if len(svc.Spec.Selector) != 2 { + t.Errorf("Selectors should have just 2 keys %s and %s", RayClusterLabelKey, RayClusterServingServiceLabelKey) + } + if svc.Spec.Selector[RayClusterLabelKey] != instanceWithWrongSvc.Name { + t.Errorf("Serve Service selector key %s value didn't match expected value : expected value=%s, actual value=%s", RayClusterLabelKey, instanceWithWrongSvc.Name, svc.Spec.Selector[RayClusterLabelKey]) + } + if svc.Spec.Selector[RayClusterServingServiceLabelKey] != EnableRayClusterServingServiceTrue { + t.Errorf("Serve Service selector key %s value didn't match expected value : expected value=%s, actual value=%s", RayClusterServingServiceLabelKey, EnableRayClusterServingServiceTrue, svc.Spec.Selector[RayClusterServingServiceLabelKey]) + } + + // ports should only have DefaultServePort + ports := svc.Spec.Ports + expectedPortName := DefaultServingPortName + expectedPortNumber := int32(8000) + for _, port := range ports { + if port.Name != DefaultServingPortName { + t.Fatalf("Expected `%v` but got `%v`", expectedPortName, port.Name) + } + if port.Port != expectedPortNumber { + t.Fatalf("Expected `%v` but got `%v`", expectedPortNumber, port.Port) + } + } + + validateServiceTypeForUserSpecifiedService(svc, userType, t) + validateLabelsForUserSpecifiedService(svc, userLabels, t) + validateNameAndNamespaceForUserSpecifiedService(svc, testRayServiceWithServeService.ObjectMeta.Namespace, userName, t) +} + +func validateServiceTypeForUserSpecifiedService(svc *corev1.Service, userType corev1.ServiceType, t *testing.T) { + // Test that the user service type takes priority over the default service type (example: ClusterIP) + if svc.Spec.Type != userType { + t.Errorf("Generated service type is not %s", userType) + } +} + +func validateNameAndNamespaceForUserSpecifiedService(svc *corev1.Service, default_namespace string, userName string, t *testing.T) { + // Test name and namespace are generated if not specified + if svc.ObjectMeta.Name == "" { + t.Errorf("Generated service name is empty") + } + if svc.ObjectMeta.Namespace == "" { + t.Errorf("Generated service namespace is empty") + } + // The user-provided namespace should be ignored, but the name should be respected + if svc.ObjectMeta.Namespace != default_namespace { + t.Errorf("User-provided namespace should be ignored: expected namespace=%s, actual namespace=%s", default_namespace, svc.ObjectMeta.Namespace) + } + if svc.ObjectMeta.Name != userName { + t.Errorf("User-provided name should be respected: expected name=%s, actual name=%s", userName, svc.ObjectMeta.Name) + } +} + +func validateLabelsForUserSpecifiedService(svc *corev1.Service, userLabels map[string]string, t *testing.T) { + for k := range userLabels { + if _, ok := svc.ObjectMeta.Labels[k]; !ok { + t.Errorf("Final labels should contain key=%s", k) + } + } +}