From a91f660f3b87cbbfc78f43fdb9c9c86c917624f9 Mon Sep 17 00:00:00 2001 From: mcheah Date: Tue, 6 Dec 2016 14:25:59 -0800 Subject: [PATCH 001/225] [SPARK-18278] Minimal support for submitting to Kubernetes. --- .../org/apache/spark/deploy/SparkSubmit.scala | 35 +- .../spark/deploy/SparkSubmitArguments.scala | 30 ++ kubernetes/core/pom.xml | 101 +++++ ...che.spark.scheduler.ExternalClusterManager | 1 + .../spark/deploy/kubernetes/Client.scala | 355 ++++++++++++++++++ .../kubernetes/KubernetesClientBuilder.scala | 53 +++ .../spark/deploy/kubernetes/Retry.scala | 42 +++ .../rest/KubernetesRestProtocolMessages.scala | 58 +++ .../rest/kubernetes/HttpClientUtil.scala | 57 +++ .../kubernetes/KubernetesSparkRestApi.scala | 39 ++ .../KubernetesSparkRestServer.scala | 274 ++++++++++++++ .../kubernetes/KubernetesClusterManager.scala | 42 +++ .../KubernetesClusterSchedulerBackend.scala | 264 +++++++++++++ kubernetes/docker-minimal-bundle/pom.xml | 137 +++++++ .../src/main/assembly/driver-assembly.xml | 84 +++++ .../src/main/assembly/executor-assembly.xml | 84 +++++ .../src/main/docker/driver/Dockerfile | 26 ++ .../src/main/docker/executor/Dockerfile | 26 ++ .../integration-tests-spark-jobs/pom.xml | 45 +++ .../jobs/SparkPiWithInfiniteWait.scala | 50 +++ kubernetes/integration-tests/pom.xml | 206 ++++++++++ .../integrationtest/KubernetesSuite.scala | 157 ++++++++ .../docker/SparkDockerImageBuilder.scala | 59 +++ .../integrationtest/minikube/Minikube.scala | 173 +++++++++ .../restapis/SparkRestApiV1.scala | 50 +++ .../launcher/SparkSubmitOptionParser.java | 10 + pom.xml | 49 +++ 27 files changed, 2504 insertions(+), 3 deletions(-) create mode 100644 kubernetes/core/pom.xml create mode 100644 kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala create mode 100644 kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala create mode 100644 kubernetes/docker-minimal-bundle/pom.xml create mode 100644 kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml create mode 100644 kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml create mode 100644 kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile create mode 100644 kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile create mode 100644 kubernetes/integration-tests-spark-jobs/pom.xml create mode 100644 kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala create mode 100644 kubernetes/integration-tests/pom.xml create mode 100644 kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala create mode 100644 kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala create mode 100644 kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala create mode 100644 kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index c60a2a1706d5a..e15d212212507 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -72,7 +72,8 @@ object SparkSubmit extends CommandLineUtils { private val STANDALONE = 2 private val MESOS = 4 private val LOCAL = 8 - private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | LOCAL + private val KUBERNETES = 16 + private val ALL_CLUSTER_MGRS = YARN | STANDALONE | MESOS | KUBERNETES | LOCAL // Deploy modes private val CLIENT = 1 @@ -232,6 +233,7 @@ object SparkSubmit extends CommandLineUtils { YARN case m if m.startsWith("spark") => STANDALONE case m if m.startsWith("mesos") => MESOS + case m if m.startsWith("kubernetes") => KUBERNETES case m if m.startsWith("local") => LOCAL case _ => printErrorAndExit("Master must either be yarn or start with spark, mesos, local") @@ -277,6 +279,7 @@ object SparkSubmit extends CommandLineUtils { } val isYarnCluster = clusterManager == YARN && deployMode == CLUSTER val isMesosCluster = clusterManager == MESOS && deployMode == CLUSTER + val isKubernetesCluster = clusterManager == KUBERNETES && deployMode == CLUSTER // Resolve maven dependencies if there are any and add classpath to jars. Add them to py-files // too for packages that include Python code @@ -341,6 +344,10 @@ object SparkSubmit extends CommandLineUtils { // The following modes are not supported or applicable (clusterManager, deployMode) match { + case (KUBERNETES, CLIENT) => + printErrorAndExit("Client mode is currently not supported for Kubernetes.") + case (KUBERNETES, CLUSTER) if args.isPython || args.isR => + printErrorAndExit("Kubernetes does not currently support python or R applications.") case (STANDALONE, CLUSTER) if args.isPython => printErrorAndExit("Cluster deploy mode is currently not supported for python " + "applications on standalone clusters.") @@ -474,7 +481,17 @@ object SparkSubmit extends CommandLineUtils { OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.principal"), OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.keytab"), - // Other options + // Kubernetes only + OptionAssigner(args.kubernetesMaster, KUBERNETES, ALL_DEPLOY_MODES, + sysProp = "spark.kubernetes.master"), + OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES, + sysProp = "spark.kubernetes.namespace"), + OptionAssigner(args.kubernetesUploadJars, KUBERNETES, CLUSTER, + sysProp = "spark.kubernetes.driver.uploads.jars"), + OptionAssigner(args.kubernetesUploadDriverExtraClasspath, KUBERNETES, CLUSTER, + sysProp = "spark.kubernetes.driver.uploads.driverExtraClasspath"), + + // Other options OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES, sysProp = "spark.executor.cores"), OptionAssigner(args.executorMemory, STANDALONE | MESOS | YARN, ALL_DEPLOY_MODES, @@ -522,8 +539,9 @@ object SparkSubmit extends CommandLineUtils { // Add the application jar automatically so the user doesn't have to call sc.addJar // For YARN cluster mode, the jar is already distributed on each node as "app.jar" + // In Kubernetes cluster mode, the jar will be uploaded by the client separately. // For python and R files, the primary resource is already distributed as a regular file - if (!isYarnCluster && !args.isPython && !args.isR) { + if (!isYarnCluster && !isKubernetesCluster && !args.isPython && !args.isR) { var jars = sysProps.get("spark.jars").map(x => x.split(",").toSeq).getOrElse(Seq.empty) if (isUserJar(args.primaryResource)) { jars = jars ++ Seq(args.primaryResource) @@ -622,6 +640,13 @@ object SparkSubmit extends CommandLineUtils { } } + if (isKubernetesCluster) { + childMainClass = "org.apache.spark.deploy.kubernetes.Client" + childArgs += args.primaryResource + childArgs += args.mainClass + childArgs ++= args.childArgs + } + // Load any properties specified through --conf and the default properties file for ((k, v) <- args.sparkProperties) { sysProps.getOrElseUpdate(k, v) @@ -881,6 +906,7 @@ private[spark] object SparkSubmitUtils { /** * Represents a Maven Coordinate + * * @param groupId the groupId of the coordinate * @param artifactId the artifactId of the coordinate * @param version the version of the coordinate @@ -892,6 +918,7 @@ private[spark] object SparkSubmitUtils { /** * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided * in the format `groupId:artifactId:version` or `groupId/artifactId:version`. + * * @param coordinates Comma-delimited string of maven coordinates * @return Sequence of Maven coordinates */ @@ -969,6 +996,7 @@ private[spark] object SparkSubmitUtils { /** * Output a comma-delimited list of paths for the downloaded jars to be added to the classpath * (will append to jars in SparkSubmit). + * * @param artifacts Sequence of dependencies that were resolved and retrieved * @param cacheDirectory directory where jars are cached * @return a comma-delimited list of paths for the dependencies @@ -1106,6 +1134,7 @@ private[spark] object SparkSubmitUtils { /** * Resolves any dependencies that were supplied through maven coordinates + * * @param coordinates Comma-delimited string of maven coordinates * @param ivySettings An IvySettings containing resolvers to use * @param exclusions Exclusions to apply when resolving transitive dependencies diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 0144fd1056bac..ceb508e124692 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -71,6 +71,12 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S var principal: String = null var keytab: String = null + // Kubernetes only + var kubernetesMaster: String = null + var kubernetesNamespace: String = null + var kubernetesUploadJars: String = null + var kubernetesUploadDriverExtraClasspath: String = null + // Standalone cluster mode only var supervise: Boolean = false var driverCores: String = null @@ -193,6 +199,18 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull + kubernetesMaster = Option(kubernetesMaster) + .orElse(sparkProperties.get("spark.kubernetes.master")) + .orNull + kubernetesNamespace = Option(kubernetesNamespace) + .orElse(sparkProperties.get("spark.kubernetes.namespace")) + .orNull + kubernetesUploadJars = Option(kubernetesUploadJars) + .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.jars")) + .orNull + kubernetesUploadDriverExtraClasspath = Option(kubernetesUploadDriverExtraClasspath) + .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.driverExtraClasspath")) + .orNull // Try to set main class from JAR if no --class argument is given if (mainClass == null && !isPython && !isR && primaryResource != null) { @@ -431,6 +449,18 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S case KEYTAB => keytab = value + case KUBERNETES_MASTER => + kubernetesMaster = value + + case KUBERNETES_NAMESPACE => + kubernetesNamespace = value + + case KUBERNETES_UPLOAD_JARS => + kubernetesUploadJars = value + + case KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH => + kubernetesUploadDriverExtraClasspath = value + case HELP => printUsageAndExit(0) diff --git a/kubernetes/core/pom.xml b/kubernetes/core/pom.xml new file mode 100644 index 0000000000000..9c7eb52b2680a --- /dev/null +++ b/kubernetes/core/pom.xml @@ -0,0 +1,101 @@ + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0-SNAPSHOT + ../../pom.xml + + + spark-kubernetes_2.11 + jar + Spark Project Kubernetes + + kubernetes + 1.4.17 + + + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + test-jar + test + + + + io.fabric8 + kubernetes-client + ${kubernetes.client.version} + + + com.netflix.feign + feign-core + + + com.netflix.feign + feign-okhttp + + + com.netflix.feign + feign-jackson + + + com.netflix.feign + feign-jaxrs + + + javax.ws.rs + jsr311-api + + + + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + + + javax.ws.rs + javax.ws.rs-api + + + + com.google.guava + guava + + + + + + + + target/scala-${scala.binary.version}/classes + target/scala-${scala.binary.version}/test-classes + + + + diff --git a/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager new file mode 100644 index 0000000000000..55e7e38b28a08 --- /dev/null +++ b/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager @@ -0,0 +1 @@ +org.apache.spark.scheduler.cluster.kubernetes.KubernetesClusterManager diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala new file mode 100644 index 0000000000000..4ee00e8802080 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -0,0 +1,355 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.io.File +import java.security.SecureRandom +import java.util.concurrent.{Executors, TimeUnit} +import javax.net.ssl.X509TrustManager + +import com.google.common.io.Files +import com.google.common.util.concurrent.{SettableFuture, ThreadFactoryBuilder} +import io.fabric8.kubernetes.api.model._ +import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import io.fabric8.kubernetes.client.internal.SSLUtils +import org.apache.commons.codec.binary.Base64 +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.concurrent.ExecutionContext +import scala.concurrent.duration.DurationInt +import scala.util.Success + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, UploadedAppResource} +import org.apache.spark.deploy.rest.kubernetes._ +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +private[spark] class Client( + sparkConf: SparkConf, + mainClass: String, + mainAppResource: String, + appArgs: Array[String]) extends Logging { + import Client._ + + private val namespace = sparkConf.getOption("spark.kubernetes.namespace").getOrElse( + throw new IllegalArgumentException("Namespace must be provided in spark.kubernetes.namespace")) + private val master = sparkConf + .getOption("spark.kubernetes.master") + .getOrElse("Master must be provided in spark.kubernetes.master") + + private val launchTime = System.currentTimeMillis + private val kubernetesAppId = sparkConf.getOption("spark.app.name") + .orElse(sparkConf.getOption("spark.app.id")) + .getOrElse(s"spark-$launchTime") + + private val secretName = s"spark-submission-server-secret-$kubernetesAppId" + private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" + // TODO set precise version by default + private val driverDockerImage = sparkConf.get( + "spark.kubernetes.driver.docker.image", "spark-driver:latest") + private val uploadedDriverExtraClasspath = sparkConf + .getOption("spark.kubernetes.driver.uploads.driverExtraClasspath") + private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") + + private val secretBytes = new Array[Byte](128) + SECURE_RANDOM.nextBytes(secretBytes) + private val secretBase64String = Base64.encodeBase64String(secretBytes) + + private implicit val retryableExecutionContext = ExecutionContext + .fromExecutorService( + Executors.newSingleThreadExecutor(new ThreadFactoryBuilder() + .setNameFormat("kubernetes-client-retryable-futures-%d") + .setDaemon(true) + .build())) + + def run(): Unit = { + var k8ConfBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(master) + .withNamespace(namespace) + sparkConf.getOption("spark.kubernetes.submit.caCertFile").foreach { + f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) + } + sparkConf.getOption("spark.kubernetes.submit.clientKeyFile").foreach { + f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) + } + sparkConf.getOption("spark.kubernetes.submit.clientCertFile").foreach { + f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) + } + + val k8ClientConfig = k8ConfBuilder.build + Utils.tryWithResource(new DefaultKubernetesClient(k8ClientConfig))(kubernetesClient => { + val secret = kubernetesClient.secrets().createNew() + .withNewMetadata() + .withName(secretName) + .endMetadata() + .withData(Map((SUBMISSION_SERVER_SECRET_NAME, secretBase64String)).asJava) + .withType("Opaque") + .done() + try { + val selectors = Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue).asJava + val uiPort = sparkConf + .getOption("spark.ui.port") + .map(_.toInt) + .getOrElse(DEFAULT_UI_PORT) + val (servicePorts, containerPorts) = configurePorts(uiPort) + val service = kubernetesClient.services().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .endMetadata() + .withNewSpec() + .withSelector(selectors) + .withPorts(servicePorts.asJava) + .endSpec() + .done() + sparkConf.set("spark.kubernetes.driver.service.name", service.getMetadata.getName) + sparkConf.setIfMissing("spark.driver.port", DRIVER_PORT.toString) + sparkConf.setIfMissing("spark.blockmanager.port", BLOCKMANAGER_PORT.toString) + val submitRequest = buildSubmissionRequest() + val submitCompletedFuture = SettableFuture.create[Boolean] + val secretDirectory = s"/var/run/secrets/spark-submission/$kubernetesAppId" + + val podWatcher = new Watcher[Pod] { + override def eventReceived(action: Action, t: Pod): Unit = { + if ((action == Action.ADDED || action == Action.MODIFIED) + && t.getStatus.getPhase == "Running" + && !submitCompletedFuture.isDone) { + t.getStatus + .getContainerStatuses + .asScala + .find(status => + status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { + case Some(status) => + try { + val driverLauncher = getDriverLauncherService( + k8ClientConfig, master) + val ping = Retry.retry(5, 5.seconds) { + driverLauncher.ping() + } + ping onFailure { + case t: Throwable => + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(t) + } + } + val submitComplete = ping andThen { + case Success(_) => + driverLauncher.create(submitRequest) + submitCompletedFuture.set(true) + } + submitComplete onFailure { + case t: Throwable => + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(t) + } + } + } catch { + case e: Throwable => + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(e) + throw e + } + } + case None => + } + } + } + + override def onClose(e: KubernetesClientException): Unit = { + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(e) + } + } + } + + def createDriverPod(unused: Watch): Unit = { + kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(selectors) + .endMetadata() + .withNewSpec() + .withRestartPolicy("OnFailure") + .addNewVolume() + .withName(s"spark-submission-secret-volume") + .withNewSecret() + .withSecretName(secret.getMetadata.getName) + .endSecret() + .endVolume + .addNewContainer() + .withName(DRIVER_LAUNCHER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName("spark-submission-secret-volume") + .withMountPath(secretDirectory) + .withReadOnly(true) + .endVolumeMount() + .addNewEnv() + .withName("SPARK_SUBMISSION_SECRET_LOCATION") + .withValue(s"$secretDirectory/$SUBMISSION_SERVER_SECRET_NAME") + .endEnv() + .addNewEnv() + .withName("SPARK_DRIVER_LAUNCHER_SERVER_PORT") + .withValue(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT.toString) + .endEnv() + .withPorts(containerPorts.asJava) + .endContainer() + .endSpec() + .done() + submitCompletedFuture.get(30, TimeUnit.SECONDS) + } + + Utils.tryWithResource(kubernetesClient + .pods() + .withLabels(selectors) + .watch(podWatcher)) { createDriverPod } + } finally { + kubernetesClient.secrets().delete(secret) + } + }) + } + + private def configurePorts(uiPort: Int): (Seq[ServicePort], Seq[ContainerPort]) = { + val servicePorts = new ArrayBuffer[ServicePort] + val containerPorts = new ArrayBuffer[ContainerPort] + + def addPortToServiceAndContainer(portName: String, portValue: Int): Unit = { + servicePorts += new ServicePortBuilder() + .withName(portName) + .withPort(portValue) + .withNewTargetPort(portValue) + .build() + containerPorts += new ContainerPortBuilder() + .withContainerPort(portValue) + .build() + } + + addPortToServiceAndContainer( + DRIVER_LAUNCHER_SERVICE_PORT_NAME, + DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) + addPortToServiceAndContainer( + DRIVER_PORT_NAME, + sparkConf + .getOption("spark.driver.port") + .map(_.toInt) + .getOrElse(DRIVER_PORT)) + addPortToServiceAndContainer( + BLOCKMANAGER_PORT_NAME, + sparkConf + .getOption("spark.blockmanager.port") + .map(_.toInt) + .getOrElse(BLOCKMANAGER_PORT)) + + addPortToServiceAndContainer(UI_PORT_NAME, uiPort) + (servicePorts.toSeq, containerPorts.toSeq) + } + + private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { + val appResourceUri = Utils.resolveURI(mainAppResource) + val resolvedAppResource: AppResource = appResourceUri.getScheme match { + case "file" | null => + val appFile = new File(appResourceUri.getPath) + if (!appFile.isFile) { + throw new IllegalStateException("Provided local file path does not exist" + + s" or is not a file: ${appFile.getAbsolutePath}") + } + val fileBytes = Files.toByteArray(appFile) + val fileBase64 = Base64.encodeBase64String(fileBytes) + UploadedAppResource(resourceBase64Contents = fileBase64, name = appFile.getName) + case other => RemoteAppResource(other) + } + + val uploadDriverExtraClasspathBase64Contents = getFileContents(uploadedDriverExtraClasspath) + val uploadJarsBase64Contents = getFileContents(uploadedJars) + KubernetesCreateSubmissionRequest( + appResource = resolvedAppResource, + mainClass = mainClass, + appArgs = appArgs, + secret = secretBase64String, + sparkProperties = sparkConf.getAll.toMap, + uploadedDriverExtraClasspathBase64Contents = uploadDriverExtraClasspathBase64Contents, + uploadedJarsBase64Contents = uploadJarsBase64Contents) + } + + def getFileContents(maybeFilePaths: Option[String]): Array[(String, String)] = { + maybeFilePaths + .map(_.split(",").map(filePath => { + val driverExtraClasspathFile = new File(filePath) + if (!driverExtraClasspathFile.isFile) { + throw new IllegalStateException("Provided file to upload for driver extra classpath" + + s" does not exist or is not a file: $filePath") + } else { + val fileBytes = Files.toByteArray(driverExtraClasspathFile) + val fileBase64 = Base64.encodeBase64String(fileBytes) + (driverExtraClasspathFile.getName, fileBase64) + } + })).getOrElse(Array.empty[(String, String)]) + } + + private def getDriverLauncherService( + k8ClientConfig: Config, + kubernetesMaster: String): KubernetesSparkRestApi = { + val url = s"${ + Array[String]( + kubernetesMaster, + "api", "v1", "proxy", + "namespaces", namespace, + "services", kubernetesAppId).mkString("/")}" + + s":$DRIVER_LAUNCHER_SERVICE_PORT_NAME/" + + val sslContext = SSLUtils.sslContext(k8ClientConfig) + val trustManager = SSLUtils.trustManagers( + k8ClientConfig)(0).asInstanceOf[X509TrustManager] + HttpClientUtil.createClient[KubernetesSparkRestApi]( + uri = url, + sslSocketFactory = sslContext.getSocketFactory, + trustContext = trustManager) + } +} + +private object Client { + + private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" + private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" + private val DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT = 7077 + private val DRIVER_PORT = 7078 + private val BLOCKMANAGER_PORT = 7079 + private val DEFAULT_UI_PORT = 4040 + private val UI_PORT_NAME = "spark-ui-port" + private val DRIVER_LAUNCHER_SERVICE_PORT_NAME = "driver-launcher-port" + private val DRIVER_PORT_NAME = "driver-port" + private val BLOCKMANAGER_PORT_NAME = "block-manager-port" + private val DRIVER_LAUNCHER_CONTAINER_NAME = "spark-kubernetes-driver-launcher" + private val SECURE_RANDOM = new SecureRandom() + + def main(args: Array[String]): Unit = { + require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + + s" []") + val mainAppResource = args(0) + val mainClass = args(1) + val appArgs = args.drop(2) + val sparkConf = new SparkConf(true) + new Client( + mainAppResource = mainAppResource, + mainClass = mainClass, + sparkConf = sparkConf, + appArgs = appArgs).run() + } +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala new file mode 100644 index 0000000000000..4c715c86cc7f9 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala @@ -0,0 +1,53 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.Files +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} + +private[spark] object KubernetesClientBuilder { + private val API_SERVER_TOKEN = new File("/var/run/secrets/kubernetes.io/serviceaccount/token") + private val CA_CERT_FILE = new File("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + + /** + * Creates a {@link KubernetesClient}, expecting to be from + * within the context of a pod. When doing so, credentials files + * are picked up from canonical locations, as they are injected + * into the pod's disk space. + */ + def buildFromWithinPod( + kubernetesMaster: String, + kubernetesNamespace: String): DefaultKubernetesClient = { + var clientConfigBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(kubernetesMaster) + .withNamespace(kubernetesNamespace) + + if (CA_CERT_FILE.isFile) { + clientConfigBuilder = clientConfigBuilder.withCaCertFile(CA_CERT_FILE.getAbsolutePath) + } + + if (API_SERVER_TOKEN.isFile) { + clientConfigBuilder = clientConfigBuilder.withOauthToken( + Files.toString(API_SERVER_TOKEN, Charsets.UTF_8)) + } + new DefaultKubernetesClient(clientConfigBuilder.build) + } +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala new file mode 100644 index 0000000000000..e5ce0bcd606b2 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.duration.Duration + +private[spark] object Retry { + + private def retryableFuture[T] + (times: Int, interval: Duration) + (f: => Future[T]) + (implicit executionContext: ExecutionContext): Future[T] = { + f recoverWith { + case _ if times > 0 => { + Thread.sleep(interval.toMillis) + retryableFuture(times - 1, interval)(f) + } + } + } + + def retry[T] + (times: Int, interval: Duration) + (f: => T) + (implicit executionContext: ExecutionContext): Future[T] = { + retryableFuture(times, interval)(Future[T] { f }) + } +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala new file mode 100644 index 0000000000000..4b7bb66083f29 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest + +import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} + +import org.apache.spark.SPARK_VERSION + +// TODO: jars should probably be compressed. Shipping tarballs would be optimal. +case class KubernetesCreateSubmissionRequest( + val appResource: AppResource, + val mainClass: String, + val appArgs: Array[String], + val sparkProperties: Map[String, String], + val secret: String, + val uploadedDriverExtraClasspathBase64Contents: Array[(String, String)] + = Array.empty[(String, String)], + val uploadedJarsBase64Contents: Array[(String, String)] + = Array.empty[(String, String)]) extends SubmitRestProtocolRequest { + message = "create" + clientSparkVersion = SPARK_VERSION +} + +@JsonTypeInfo( + use = JsonTypeInfo.Id.NAME, + include = JsonTypeInfo.As.PROPERTY, + property = "type") +@JsonSubTypes(value = Array( + new JsonSubTypes.Type(value = classOf[UploadedAppResource], name = "UploadedAppResource"), + new JsonSubTypes.Type(value = classOf[RemoteAppResource], name = "RemoteAppResource"))) +abstract class AppResource + +case class UploadedAppResource( + resourceBase64Contents: String, + name: String = "spark-app-resource") extends AppResource + +case class RemoteAppResource(resource: String) extends AppResource + +class PingResponse extends SubmitRestProtocolResponse { + val text = "pong" + message = "pong" + serverSparkVersion = SPARK_VERSION +} + diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala new file mode 100644 index 0000000000000..eb7d411700829 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala @@ -0,0 +1,57 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import javax.net.ssl.{SSLContext, SSLSocketFactory, X509TrustManager} + +import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import feign.Feign +import feign.Request.Options +import feign.jackson.{JacksonDecoder, JacksonEncoder} +import feign.jaxrs.JAXRSContract +import okhttp3.OkHttpClient +import scala.reflect.ClassTag + +import org.apache.spark.status.api.v1.JacksonMessageWriter + +private[spark] object HttpClientUtil { + + def createClient[T: ClassTag]( + uri: String, + sslSocketFactory: SSLSocketFactory = SSLContext.getDefault.getSocketFactory, + trustContext: X509TrustManager = null, + readTimeoutMillis: Int = 20000, + connectTimeoutMillis: Int = 20000): T = { + var httpClientBuilder = new OkHttpClient.Builder() + Option.apply(trustContext).foreach(context => { + httpClientBuilder = httpClientBuilder.sslSocketFactory(sslSocketFactory, context) + }) + val objectMapper = new ObjectMapper() + .registerModule(new DefaultScalaModule) + .setDateFormat(JacksonMessageWriter.makeISODateFormat) + objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) + val clazz = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] + Feign.builder() + .client(new feign.okhttp.OkHttpClient(httpClientBuilder.build())) + .contract(new JAXRSContract) + .encoder(new JacksonEncoder(objectMapper)) + .decoder(new JacksonDecoder(objectMapper)) + .options(new Options(connectTimeoutMillis, readTimeoutMillis)) + .target(clazz, uri) + } +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala new file mode 100644 index 0000000000000..3cbcb16293b1d --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import javax.ws.rs.{Consumes, GET, Path, POST, Produces} +import javax.ws.rs.core.MediaType + +import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KubernetesCreateSubmissionRequest, PingResponse} + +@Path("/v1/submissions/") +trait KubernetesSparkRestApi { + + @POST + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + @Path("/create") + def create(request: KubernetesCreateSubmissionRequest): CreateSubmissionResponse + + @GET + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + @Path("/ping") + def ping(): PingResponse + +} diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala new file mode 100644 index 0000000000000..0a2e8176394ab --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -0,0 +1,274 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.File +import java.net.URI +import java.nio.file.Paths +import java.util.concurrent.CountDownLatch +import javax.servlet.http.{HttpServletRequest, HttpServletResponse} + +import com.google.common.io.Files +import org.apache.commons.codec.binary.Base64 +import scala.collection.mutable +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.{SecurityManager, SPARK_VERSION, SparkConf} +import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.rest._ +import org.apache.spark.util.{ShutdownHookManager, Utils} + +private case class KubernetesSparkRestServerArguments( + val host: Option[String] = None, + val port: Option[Int] = None, + val secretFile: Option[String] = None) { + def validate(): KubernetesSparkRestServerArguments = { + require(host.isDefined, "Hostname not set via --hostname.") + require(port.isDefined, "Port not set via --port") + require(secretFile.isDefined, "Secret file not set via --secret-file") + this + } +} + +private object KubernetesSparkRestServerArguments { + def fromArgsArray(inputArgs: Array[String]): KubernetesSparkRestServerArguments = { + var args = inputArgs.toList + var resolvedArguments = KubernetesSparkRestServerArguments() + while (args.nonEmpty) { + resolvedArguments = args match { + case "--hostname" :: value :: tail => + args = tail + resolvedArguments.copy(host = Some(value)) + case "--port" :: value :: tail => + args = tail + resolvedArguments.copy(port = Some(value.toInt)) + case "--secret-file" :: value :: tail => + args = tail + resolvedArguments.copy(secretFile = Some(value)) + // TODO polish usage message + case Nil => resolvedArguments + case unknown => throw new IllegalStateException(s"Unknown argument(s) found: $unknown") + } + } + resolvedArguments.validate() + } +} + +private[spark] class KubernetesSparkRestServer( + host: String, + port: Int, + conf: SparkConf, + expectedApplicationSecret: Array[Byte]) + extends RestSubmissionServer(host, port, conf) { + + private val javaExecutable = s"${System.getenv("JAVA_HOME")}/bin/java" + private val sparkHome = System.getenv("SPARK_HOME") + private val securityManager = new SecurityManager(conf) + override protected lazy val contextToServlet = Map[String, RestServlet]( + s"$baseContext/create/*" -> submitRequestServlet, + s"$baseContext/ping/*" -> pingServlet) + + private val pingServlet = new PingServlet + override protected val submitRequestServlet: SubmitRequestServlet + = new KubernetesSubmitRequestServlet + // TODO + override protected val statusRequestServlet: StatusRequestServlet = null + override protected val killRequestServlet: KillRequestServlet = null + + private class PingServlet extends RestServlet { + protected override def doGet( + request: HttpServletRequest, + response: HttpServletResponse): Unit = { + sendResponse(new PingResponse, response) + } + } + + private class KubernetesSubmitRequestServlet extends SubmitRequestServlet { + + // TODO validating the secret should be done as part of a header of the request. + // Instead here we have to specify the secret in the body. + override protected def handleSubmit( + requestMessageJson: String, + requestMessage: SubmitRestProtocolMessage, + responseServlet: HttpServletResponse): SubmitRestProtocolResponse = { + requestMessage match { + case KubernetesCreateSubmissionRequest( + appResource, + mainClass, + appArgs, + sparkProperties, + secret, + uploadedDriverExtraClasspath, + uploadedJars) => + val decodedSecret = Base64.decodeBase64(secret) + if (!expectedApplicationSecret.sameElements(decodedSecret)) { + responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) + handleError("Unauthorized to submit application.") + } else { + val tempDir = Utils.createTempDir() + val appResourcePath = resolvedAppResource(appResource, tempDir) + val driverClasspathDirectory = new File(tempDir, "driver-extra-classpath") + if (!driverClasspathDirectory.mkdir) { + throw new IllegalStateException("Failed to create driver extra classpath" + + s" dir at ${driverClasspathDirectory.getAbsolutePath}") + } + val jarsDirectory = new File(tempDir, "jars") + if (!jarsDirectory.mkdir) { + throw new IllegalStateException("Failed to create jars dir at" + + s"${jarsDirectory.getAbsolutePath}") + } + val writtenDriverExtraClasspath = writeBase64ContentsToFiles( + uploadedDriverExtraClasspath, driverClasspathDirectory) + val writtenJars = writeBase64ContentsToFiles(uploadedJars, jarsDirectory) + val originalDriverExtraClasspath = sparkProperties.get("spark.driver.extraClassPath") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val resolvedDriverExtraClasspath = writtenDriverExtraClasspath ++ + originalDriverExtraClasspath + val originalJars = sparkProperties.get("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) + val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) + val driverClasspath = resolvedDriverExtraClasspath ++ + resolvedJars ++ + sparkJars ++ + Array(appResourcePath) + val resolvedSparkProperties = new mutable.HashMap[String, String] + resolvedSparkProperties ++= sparkProperties + resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + + val command = new ArrayBuffer[String] + command += javaExecutable + command += "-cp" + command += s"${driverClasspath.mkString(":")}" + for (prop <- resolvedSparkProperties) { + command += s"-D${prop._1}=${prop._2}" + } + val driverMemory = resolvedSparkProperties.getOrElse("spark.driver.memory", "1g") + command += s"-Xms$driverMemory" + command += s"-Xmx$driverMemory" + command += mainClass + command ++= appArgs + val pb = new ProcessBuilder(command: _*) + Paths.get(sparkHome, "logs").toFile.mkdirs + pb.redirectOutput(Paths.get(sparkHome, "logs", "stdout").toFile) + pb.redirectError(Paths.get(sparkHome, "logs", "stderr").toFile) + val process = pb.start() + ShutdownHookManager.addShutdownHook(() => { + logInfo("Received stop command, shutting down the running Spark application...") + process.destroy() + }) + val response = new CreateSubmissionResponse + response.success = true + response.submissionId = null + response.message = "success" + response.serverSparkVersion = SPARK_VERSION + response + } + case unexpected => + responseServlet.setStatus(HttpServletResponse.SC_BAD_REQUEST) + handleError(s"Received message of unexpected type ${unexpected.messageType}.") + } + } + + def resolvedAppResource(appResource: AppResource, tempDir: File): String = { + val appResourcePath = appResource match { + case UploadedAppResource(resourceContentsBase64, resourceName) => + val resourceFile = new File(tempDir, resourceName) + val resourceFilePath = resourceFile.getAbsolutePath + if (resourceFile.createNewFile()) { + val resourceContentsBytes = Base64.decodeBase64(resourceContentsBase64) + Files.write(resourceContentsBytes, resourceFile) + resourceFile.getAbsolutePath + } else { + throw new IllegalStateException(s"Failed to write main app resource file" + + s" to $resourceFilePath") + } + case RemoteAppResource(resource) => + Utils.fetchFile(resource, tempDir, conf, + securityManager, SparkHadoopUtil.get.newConfiguration(conf), + System.currentTimeMillis(), useCache = false) + val fileName = Utils.decodeFileNameInURI(URI.create(resource)) + val downloadedFile = new File(tempDir, fileName) + val downloadedFilePath = downloadedFile.getAbsolutePath + if (!downloadedFile.isFile) { + throw new IllegalStateException(s"Main app resource is not a file or" + + s" does not exist at $downloadedFilePath") + } + downloadedFilePath + } + appResourcePath + } + } + + private def writeBase64ContentsToFiles( + filesBase64Contents: Array[(String, String)], + rootDir: File): Seq[String] = { + val resolvedFileNames = new scala.collection.mutable.HashSet[String] + val resolvedFilePaths = new ArrayBuffer[String] + for (file <- filesBase64Contents) { + var currentFileName = file._1 + var deduplicationCounter = 1 + while (resolvedFileNames.contains(currentFileName)) { + // Prepend the deduplication counter so as to not mess with the extension + currentFileName = s"$deduplicationCounter-$currentFileName" + deduplicationCounter += 1 + } + val resolvedFile = new File(rootDir, currentFileName) + val resolvedFilePath = resolvedFile.getAbsolutePath + if (resolvedFile.createNewFile()) { + val fileContents = Base64.decodeBase64(file._2) + Files.write(fileContents, resolvedFile) + } else { + throw new IllegalStateException(s"Could not write jar file to $resolvedFilePath") + } + resolvedFileNames += currentFileName + resolvedFilePaths += resolvedFilePath + } + resolvedFilePaths.toSeq + } +} + +private[spark] object KubernetesSparkRestServer { + private val barrier = new CountDownLatch(1) + def main(args: Array[String]): Unit = { + val parsedArguments = KubernetesSparkRestServerArguments.fromArgsArray(args) + val secretFile = new File(parsedArguments.secretFile.get) + if (!secretFile.isFile) { + throw new IllegalArgumentException(s"Secret file specified by --secret-file" + + " is not a file, or does not exist.") + } + val secretBytes = Files.toByteArray(secretFile) + val sparkConf = new SparkConf(true) + val server = new KubernetesSparkRestServer( + parsedArguments.host.get, + parsedArguments.port.get, + sparkConf, + secretBytes) + server.start() + ShutdownHookManager.addShutdownHook(() => { + try { + server.stop() + } finally { + barrier.countDown() + } + }) + barrier.await() + } +} + diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala new file mode 100644 index 0000000000000..0d3b97c636ca3 --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import org.apache.spark.SparkContext +import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl} + +private[spark] class KubernetesClusterManager extends ExternalClusterManager { + + override def canCreate(masterURL: String): Boolean = masterURL.startsWith("kubernetes") + + override def createTaskScheduler(sc: SparkContext, masterURL: String): TaskScheduler = { + val scheduler = new TaskSchedulerImpl(sc) + sc.taskScheduler = scheduler + scheduler + } + + override def createSchedulerBackend(sc: SparkContext, masterURL: String, scheduler: TaskScheduler) + : SchedulerBackend = { + new KubernetesClusterSchedulerBackend(sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc) + } + + override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { + scheduler.asInstanceOf[TaskSchedulerImpl].initialize(backend) + } + +} + diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala new file mode 100644 index 0000000000000..f37b97e4dd0dc --- /dev/null +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -0,0 +1,264 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import java.util.UUID +import java.util.concurrent.Executors +import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} + +import com.google.common.util.concurrent.ThreadFactoryBuilder +import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, EnvVar, EnvVarBuilder, Pod, QuantityBuilder} +import scala.collection.JavaConverters._ +import scala.collection.mutable.ArrayBuffer +import scala.concurrent.{ExecutionContext, Future} + +import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.deploy.kubernetes.KubernetesClientBuilder +import org.apache.spark.rpc.RpcEndpointAddress +import org.apache.spark.scheduler.TaskSchedulerImpl +import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend +import org.apache.spark.util.Utils + +private[spark] class KubernetesClusterSchedulerBackend( + scheduler: TaskSchedulerImpl, + val sc: SparkContext) + extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { + + import KubernetesClusterSchedulerBackend._ + + private val EXECUTOR_MODIFICATION_LOCK = new Object + private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] + + private val kubernetesMaster = conf + .getOption("spark.kubernetes.master") + .getOrElse( + throw new SparkException("Kubernetes master must be specified in kubernetes mode.")) + + private val executorDockerImage = conf + .get("spark.kubernetes.executor.docker.image", s"spark-executor:${sc.version}") + + private val kubernetesNamespace = conf + .getOption("spark.kubernetes.namespace") + .getOrElse( + throw new SparkException("Kubernetes namespace must be specified in kubernetes mode.")) + + private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) + + private val blockmanagerPort = conf + .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT) + + private val kubernetesDriverServiceName = conf + .getOption("spark.kubernetes.driver.service.name") + .getOrElse( + throw new SparkException("Must specify the service name the driver is running with")) + + private val executorMemory = conf.getOption("spark.executor.memory").getOrElse("1g") + private val executorMemoryBytes = Utils.byteStringAsBytes(executorMemory) + + private val memoryOverheadBytes = conf + .getOption("spark.kubernetes.executor.memoryOverhead") + .map(overhead => Utils.byteStringAsBytes(overhead)) + .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * executorMemoryBytes).toInt, + MEMORY_OVERHEAD_MIN)) + private val executorMemoryWithOverhead = executorMemoryBytes + memoryOverheadBytes + + private val executorCores = conf.getOption("spark.executor.cores").getOrElse("1") + + private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( + Executors.newCachedThreadPool( + new ThreadFactoryBuilder() + .setDaemon(true) + .setNameFormat("kubernetes-executor-requests-%d") + .build)) + + private val kubernetesClient = KubernetesClientBuilder + .buildFromWithinPod(kubernetesMaster, kubernetesNamespace) + + override val minRegisteredRatio = + if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) { + 0.8 + } else { + super.minRegisteredRatio + } + + protected var totalExpectedExecutors = new AtomicInteger(0) + + private val driverUrl = RpcEndpointAddress( + System.getenv(s"${convertToEnvMode(kubernetesDriverServiceName)}_SERVICE_HOST"), + sc.getConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), + CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString + + private def convertToEnvMode(value: String): String = + value.toUpperCase.map { c => if (c == '-') '_' else c } + + private val initialExecutors = getInitialTargetExecutorNumber(1) + + private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = { + if (Utils.isDynamicAllocationEnabled(conf)) { + val minNumExecutors = conf.getInt("spark.dynamicAllocation.minExecutors", 0) + val initialNumExecutors = Utils.getDynamicAllocationInitialExecutors(conf) + val maxNumExecutors = conf.getInt("spark.dynamicAllocation.maxExecutors", 1) + require(initialNumExecutors >= minNumExecutors && initialNumExecutors <= maxNumExecutors, + s"initial executor number $initialNumExecutors must between min executor number " + + s"$minNumExecutors and max executor number $maxNumExecutors") + + initialNumExecutors + } else { + conf.getInt("spark.executor.instances", defaultNumExecutors) + } + } + + override def sufficientResourcesRegistered(): Boolean = { + totalRegisteredExecutors.get() >= initialExecutors * minRegisteredRatio + } + + override def start(): Unit = { + super.start() + if (!Utils.isDynamicAllocationEnabled(sc.conf)) { + doRequestTotalExecutors(initialExecutors) + } + } + + override def stop(): Unit = { + // TODO investigate why Utils.tryLogNonFatalError() doesn't work in this context. + // When using Utils.tryLogNonFatalError some of the code fails but without any logs or + // indication as to why. + try { + runningExecutorPods.values.foreach(kubernetesClient.pods().delete(_)) + } catch { + case e: Throwable => logError("Uncaught exception while shutting down controllers.", e) + } + try { + kubernetesClient.services().withName(kubernetesDriverServiceName).delete() + } catch { + case e: Throwable => logError("Uncaught exception while shutting down driver service.", e) + } + try { + kubernetesClient.close() + } catch { + case e: Throwable => logError("Uncaught exception closing Kubernetes client.", e) + } + super.stop() + } + + private def allocateNewExecutorPod(): (String, Pod) = { + val executorKubernetesId = UUID.randomUUID().toString.replaceAll("-", "") + val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString + val name = s"exec$executorKubernetesId" + val selectors = Map(SPARK_EXECUTOR_SELECTOR -> executorId, + SPARK_APP_SELECTOR -> applicationId()).asJava + val executorMemoryQuantity = new QuantityBuilder(false) + .withAmount(executorMemoryBytes.toString) + .build() + val executorMemoryLimitQuantity = new QuantityBuilder(false) + .withAmount(executorMemoryWithOverhead.toString) + .build() + val requiredEnv = new ArrayBuffer[EnvVar] + requiredEnv += new EnvVarBuilder() + .withName("SPARK_EXECUTOR_PORT") + .withValue(executorPort.toString) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_DRIVER_URL") + .withValue(driverUrl) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_EXECUTOR_CORES") + .withValue(executorCores) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_EXECUTOR_MEMORY") + .withValue(executorMemory) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_APPLICATION_ID") + .withValue(applicationId()) + .build() + requiredEnv += new EnvVarBuilder() + .withName("SPARK_EXECUTOR_ID") + .withValue(executorId) + .build() + val requiredPorts = new ArrayBuffer[ContainerPort] + requiredPorts += new ContainerPortBuilder() + .withName(EXECUTOR_PORT_NAME) + .withContainerPort(executorPort) + .build() + requiredPorts += new ContainerPortBuilder() + .withName(BLOCK_MANAGER_PORT_NAME) + .withContainerPort(blockmanagerPort) + .build() + (executorKubernetesId, kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(name) + .withLabels(selectors) + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName(s"exec-${applicationId()}-container") + .withImage(executorDockerImage) + .withImagePullPolicy("IfNotPresent") + .withNewResources() + .addToRequests("memory", executorMemoryQuantity) + .addToLimits("memory", executorMemoryLimitQuantity) + .endResources() + .withEnv(requiredEnv.asJava) + .withPorts(requiredPorts.asJava) + .endContainer() + .endSpec() + .done()) + } + + override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { + EXECUTOR_MODIFICATION_LOCK.synchronized { + if (requestedTotal > totalExpectedExecutors.get) { + logInfo(s"Requesting ${requestedTotal - totalExpectedExecutors.get}" + + s" additional executors, expecting total $requestedTotal and currently" + + s" expected ${totalExpectedExecutors.get}") + for (i <- 0 until (requestedTotal - totalExpectedExecutors.get)) { + runningExecutorPods += allocateNewExecutorPod() + } + } + totalExpectedExecutors.set(requestedTotal) + } + true + } + + override def doKillExecutors(executorIds: Seq[String]): Future[Boolean] = Future[Boolean] { + EXECUTOR_MODIFICATION_LOCK.synchronized { + for (executor <- executorIds) { + runningExecutorPods.remove(executor) match { + case Some(pod) => kubernetesClient.pods().delete(pod) + case None => logWarning(s"Unable to remove pod for unknown executor $executor") + } + } + } + true + } +} + +private object KubernetesClusterSchedulerBackend { + private val SPARK_EXECUTOR_SELECTOR = "spark-exec" + private val SPARK_APP_SELECTOR = "spark-app" + private val DEFAULT_STATIC_PORT = 10000 + private val DEFAULT_BLOCKMANAGER_PORT = 7079 + private val DEFAULT_DRIVER_PORT = 7078 + private val BLOCK_MANAGER_PORT_NAME = "blockmanager" + private val EXECUTOR_PORT_NAME = "executor" + private val MEMORY_OVERHEAD_FACTOR = 0.10 + private val MEMORY_OVERHEAD_MIN = 384L + private val EXECUTOR_ID_COUNTER = new AtomicLong(0L) +} diff --git a/kubernetes/docker-minimal-bundle/pom.xml b/kubernetes/docker-minimal-bundle/pom.xml new file mode 100644 index 0000000000000..3de939ea3978a --- /dev/null +++ b/kubernetes/docker-minimal-bundle/pom.xml @@ -0,0 +1,137 @@ + + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0-SNAPSHOT + ../pom.xml + + + spark-docker-minimal-bundle_2.11 + Spark Project Docker Minimal Bundle + http://spark.apache.org/ + pom + + + docker-minimal-bundle + none + pre-integration-test + + + + + org.apache.spark + spark-assembly_${scala.binary.version} + ${project.version} + pom + + + + com.google.guava + guava + ${hadoop.deps.scope} + + + + + org.apache.spark + spark-kubernetes_${scala.binary.version} + ${project.version} + + + + + + org.apache.maven.plugins + maven-assembly-plugin + + + driver-docker-dist + pre-integration-test + + single + + + + src/main/assembly/driver-assembly.xml + + posix + + + + executor-docker-dist + pre-integration-test + + single + + + + src/main/assembly/executor-assembly.xml + + posix + + + + + + + + + + + hive + + + org.apache.spark + spark-hive_${scala.binary.version} + ${project.version} + + + + + hive-thriftserver + + + org.apache.spark + spark-hive-thriftserver_${scala.binary.version} + ${project.version} + + + + + spark-ganglia-lgpl + + + org.apache.spark + spark-ganglia-lgpl_${scala.binary.version} + ${project.version} + + + + + diff --git a/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml b/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml new file mode 100644 index 0000000000000..145244f34d1d9 --- /dev/null +++ b/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml @@ -0,0 +1,84 @@ + + + driver-docker-dist + + tar.gz + dir + + false + + + + ${project.parent.basedir}/core/src/main/resources/org/apache/spark/ui/static/ + + ui-resources/org/apache/spark/ui/static + + **/* + + + + + ${project.parent.basedir}/sbin/ + + sbin + + **/* + + + + + ${project.parent.basedir}/bin/ + + bin + + **/* + + + + + ${project.parent.basedir}/conf/ + + conf + + **/* + + + + + src/main/docker/driver + + + + **/* + + + + + + jars + true + false + runtime + false + + org.apache.spark:spark-assembly_${scala.binary.version}:pom + org.spark-project.spark:unused + + + + diff --git a/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml b/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml new file mode 100644 index 0000000000000..d97ba56562a12 --- /dev/null +++ b/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml @@ -0,0 +1,84 @@ + + + executor-docker-dist + + tar.gz + dir + + false + + + + ${project.parent.basedir}/core/src/main/resources/org/apache/spark/ui/static/ + + ui-resources/org/apache/spark/ui/static + + **/* + + + + + ${project.parent.basedir}/sbin/ + + sbin + + **/* + + + + + ${project.parent.basedir}/bin/ + + bin + + **/* + + + + + ${project.parent.basedir}/conf/ + + conf + + **/* + + + + + src/main/docker/executor + + + + **/* + + + + + + jars + true + false + runtime + false + + org.apache.spark:spark-assembly_${scala.binary.version}:pom + org.spark-project.spark:unused + + + + diff --git a/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile new file mode 100644 index 0000000000000..3bba38d8395ae --- /dev/null +++ b/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -0,0 +1,26 @@ +FROM ubuntu:trusty + +# Upgrade package index +# install a few other useful packages plus Open Jdk 7 +# Remove unneeded /var/lib/apt/lists/* after install to reduce the +# docker image size (by ~30MB) +RUN apt-get update && \ + apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server procps && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /opt/spark +RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark +ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64/jre + +WORKDIR /opt/spark + +# This class will also require setting a secret via the SPARK_APP_SECRET environment variable +CMD exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer --hostname $HOSTNAME --port $SPARK_DRIVER_LAUNCHER_SERVER_PORT --secret-file $SPARK_SUBMISSION_SECRET_LOCATION diff --git a/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile new file mode 100644 index 0000000000000..f68f1a3fb2694 --- /dev/null +++ b/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -0,0 +1,26 @@ +FROM ubuntu:trusty + +# Upgrade package index +# install a few other useful packages plus Open Jdk 7 +# Remove unneeded /var/lib/apt/lists/* after install to reduce the +# docker image size (by ~30MB) +RUN apt-get update && \ + apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server procps && \ + rm -rf /var/lib/apt/lists/* + +RUN mkdir -p /opt/spark +RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark +ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64/jre + +WORKDIR /opt/spark + +# TODO support spark.executor.extraClassPath +CMD exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp ${SPARK_HOME}/jars/\* org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $HOSTNAME diff --git a/kubernetes/integration-tests-spark-jobs/pom.xml b/kubernetes/integration-tests-spark-jobs/pom.xml new file mode 100644 index 0000000000000..17f1c4906214f --- /dev/null +++ b/kubernetes/integration-tests-spark-jobs/pom.xml @@ -0,0 +1,45 @@ + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0-SNAPSHOT + ../../pom.xml + + + spark-kubernetes-integration-tests-spark-jobs_2.11 + jar + Spark Project Kubernetes Integration Tests Spark Jobs + + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + provided + + + org.apache.spark + spark-sql_${scala.binary.version} + ${project.version} + provided + + + diff --git a/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala b/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala new file mode 100644 index 0000000000000..6e4660b771305 --- /dev/null +++ b/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.jobs + +import scala.math.random + +import org.apache.spark.sql.SparkSession + +// Equivalent to SparkPi except does not stop the Spark Context +// at the end and spins forever, so other things can inspect the +// Spark UI immediately after the fact. +private[spark] object SparkPiWithInfiniteWait { + + def main(args: Array[String]): Unit = { + val spark = SparkSession + .builder + .appName("Spark Pi") + .getOrCreate() + val slices = if (args.length > 0) args(0).toInt else 10 + val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow + val count = spark.sparkContext.parallelize(1 until n, slices).map { i => + val x = random * 2 - 1 + val y = random * 2 - 1 + if (x*x + y*y < 1) 1 else 0 + }.reduce(_ + _) + // scalastyle:off println + println("Pi is roughly " + 4.0 * count / (n - 1)) + // scalastyle:on println + + // Spin forever to keep the Spark UI active, so other things can inspect the job. + while (true) { + Thread.sleep(600000) + } + } + +} diff --git a/kubernetes/integration-tests/pom.xml b/kubernetes/integration-tests/pom.xml new file mode 100644 index 0000000000000..0568cb1e21826 --- /dev/null +++ b/kubernetes/integration-tests/pom.xml @@ -0,0 +1,206 @@ + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0-SNAPSHOT + ../../pom.xml + + + spark-kubernetes-integration-tests_2.11 + jar + Spark Project Kubernetes Integration Tests + + + + org.apache.spark + spark-kubernetes_${scala.binary.version} + ${project.version} + test + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + test-jar + test + + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs_${scala.binary.version} + ${project.version} + test + + + org.apache.spark + spark-docker-minimal-bundle_${scala.binary.version} + ${project.version} + tar.gz + driver-docker-dist + test + + + * + * + + + + + com.google.guava + guava + test + + 18.0 + + + com.spotify + docker-client + test + + + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + + + com.fasterxml.jackson.core + jackson-databind + + + org.glassfish.jersey.core + jersey-client + + + org.glassfish.jersey.core + jersey-common + + + javax.ws.rs + jsr311-api + + + + + + + + + org.apache.maven.plugins + maven-dependency-plugin + + + copy-test-spark-jobs + pre-integration-test + + copy + + + + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs_${scala.binary.version} + ${project.version} + jar + ${project.build.directory}/integration-tests-spark-jobs + + + + + + unpack-docker-driver-bundle + pre-integration-test + + unpack + + + + + org.apache.spark + spark-docker-minimal-bundle_${scala.binary.version} + ${project.version} + driver-docker-dist + tar.gz + true + ${project.build.directory}/docker/driver + + + + + + unpack-docker-executor-bundle + pre-integration-test + + unpack + + + + + org.apache.spark + spark-docker-minimal-bundle_${scala.binary.version} + ${project.version} + executor-docker-dist + tar.gz + true + ${project.build.directory}/docker/executor + + + + + + + + com.googlecode.maven-download-plugin + download-maven-plugin + 1.3.0 + + + download-minikube-linux + pre-integration-test + + wget + + + https://storage.googleapis.com/minikube/releases/v0.12.2/minikube-linux-amd64 + ${project.build.directory}/minikube-bin/linux-amd64 + minikube + + + + download-minikube-darwin + pre-integration-test + + wget + + + https://storage.googleapis.com/minikube/releases/v0.12.2/minikube-darwin-amd64 + ${project.build.directory}/minikube-bin/darwin-amd64 + minikube + + + + + + + + + diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala new file mode 100644 index 0000000000000..d79c75e484af5 --- /dev/null +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -0,0 +1,157 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.nio.file.Paths +import java.util.UUID + +import com.google.common.collect.ImmutableList +import io.fabric8.kubernetes.client.{Config, KubernetesClient} +import org.scalatest.BeforeAndAfter +import org.scalatest.concurrent.{Eventually, PatienceConfiguration} +import org.scalatest.time.{Minutes, Seconds, Span} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.SparkSubmit +import org.apache.spark.deploy.kubernetes.Client +import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder +import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 +import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} + +private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { + + private val EXAMPLES_JAR = Paths.get("target", "integration-tests-spark-jobs") + .toFile + .listFiles()(0) + .getAbsolutePath + + private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) + private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) + private val MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.SparkPiWithInfiniteWait" + private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") + private var minikubeKubernetesClient: KubernetesClient = _ + private var clientConfig: Config = _ + + override def beforeAll(): Unit = { + Minikube.startMinikube() + new SparkDockerImageBuilder(Minikube.getDockerEnv).buildSparkDockerImages() + Minikube.getKubernetesClient.namespaces.createNew() + .withNewMetadata() + .withName(NAMESPACE) + .endMetadata() + .done() + minikubeKubernetesClient = Minikube.getKubernetesClient.inNamespace(NAMESPACE) + clientConfig = minikubeKubernetesClient.getConfiguration + } + + before { + Eventually.eventually(TIMEOUT, INTERVAL) { + assert(minikubeKubernetesClient.pods().list().getItems.isEmpty) + assert(minikubeKubernetesClient.services().list().getItems.isEmpty) + } + } + + after { + val pods = minikubeKubernetesClient.pods().list().getItems.asScala + pods.par.foreach(pod => { + minikubeKubernetesClient + .pods() + .withName(pod.getMetadata.getName) + .withGracePeriod(60) + .delete + }) + } + + override def afterAll(): Unit = { + if (!System.getProperty("spark.docker.test.persistMinikube", "false").toBoolean) { + Minikube.deleteMinikube() + } + } + + private def expectationsForStaticAllocation(sparkMetricsService: SparkRestApiV1): Unit = { + val apps = Eventually.eventually(TIMEOUT, INTERVAL) { + val result = sparkMetricsService + .getApplications(ImmutableList.of(ApplicationStatus.RUNNING, ApplicationStatus.COMPLETED)) + assert(result.size == 1 + && !result.head.id.equalsIgnoreCase("appid") + && !result.head.id.equalsIgnoreCase("{appId}")) + result + } + Eventually.eventually(TIMEOUT, INTERVAL) { + val result = sparkMetricsService.getExecutors(apps.head.id) + assert(result.size == 2) + assert(result.count(exec => exec.id != "driver") == 1) + result + } + Eventually.eventually(TIMEOUT, INTERVAL) { + val result = sparkMetricsService.getStages( + apps.head.id, Seq(StageStatus.COMPLETE).asJava) + assert(result.size == 1) + result + } + } + + test("Run a simple example") { + val sparkConf = new SparkConf(true) + .setMaster("kubernetes") + .set("spark.kubernetes.master", s"https://${Minikube.getMinikubeIp}:8443") + .set("spark.kubernetes.submit.caCertFile", clientConfig.getCaCertFile) + .set("spark.kubernetes.submit.clientKeyFile", clientConfig.getClientKeyFile) + .set("spark.kubernetes.submit.clientCertFile", clientConfig.getClientCertFile) + .set("spark.kubernetes.namespace", NAMESPACE) + .set("spark.kubernetes.executor.docker.image", "spark-executor:latest") + .set("spark.executor.memory", "500m") + .set("spark.executor.cores", "1") + .set("spark.executors.instances", "1") + .set("spark.app.id", "spark-pi") + val mainAppResource = s"file://$EXAMPLES_JAR" + + new Client( + sparkConf = sparkConf, + mainClass = MAIN_CLASS, + mainAppResource = mainAppResource, + appArgs = Array.empty[String]).run() + val sparkMetricsService = Minikube.getService[SparkRestApiV1]( + "spark-pi", NAMESPACE, "spark-ui-port") + expectationsForStaticAllocation(sparkMetricsService) + } + + test("Run using spark-submit") { + val args = Array( + "--master", "kubernetes", + "--deploy-mode", "cluster", + "--kubernetes-master", s"https://${Minikube.getMinikubeIp}:8443", + "--kubernetes-namespace", NAMESPACE, + "--name", "spark-pi", + "--executor-memory", "512m", + "--executor-cores", "1", + "--num-executors", "1", + "--class", MAIN_CLASS, + "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", + "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", + "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", + "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + EXAMPLES_JAR) + SparkSubmit.main(args) + val sparkMetricsService = Minikube.getService[SparkRestApiV1]( + "spark-pi", NAMESPACE, "spark-ui-port") + expectationsForStaticAllocation(sparkMetricsService) + } +} diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala new file mode 100644 index 0000000000000..22d78142508c1 --- /dev/null +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.docker + +import java.net.URI +import java.nio.file.Paths + +import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider +import com.spotify.docker.client.{DefaultDockerClient, DockerCertificates} +import org.apache.http.client.utils.URIBuilder +import org.scalatest.concurrent.{Eventually, PatienceConfiguration} +import org.scalatest.time.{Minutes, Seconds, Span} + +private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, String]) { + + private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) + private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) + private val dockerHost = dockerEnv.getOrElse("DOCKER_HOST", + throw new IllegalStateException("DOCKER_HOST env not found.")) + + private val originalDockerUri = URI.create(dockerHost) + private val httpsDockerUri = new URIBuilder() + .setHost(originalDockerUri.getHost) + .setPort(originalDockerUri.getPort) + .setScheme("https") + .build() + + private val dockerCerts = dockerEnv.getOrElse("DOCKER_CERT_PATH", + throw new IllegalStateException("DOCKER_CERT_PATH env not found.")) + + private val dockerClient = new DefaultDockerClient.Builder() + .uri(httpsDockerUri) + .dockerCertificates(DockerCertificates + .builder() + .dockerCertPath(Paths.get(dockerCerts)) + .build().get()) + .build() + + def buildSparkDockerImages(): Unit = { + Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } + dockerClient.build(Paths.get("target", "docker", "driver"), "spark-driver") + dockerClient.build(Paths.get("target", "docker", "executor"), "spark-executor") + } + +} \ No newline at end of file diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala new file mode 100644 index 0000000000000..92b809a4c7c59 --- /dev/null +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.minikube + +import java.io.{BufferedReader, InputStreamReader} +import java.nio.file.Paths +import java.util.concurrent.TimeUnit +import javax.net.ssl.X509TrustManager + +import io.fabric8.kubernetes.client.internal.SSLUtils +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} +import scala.collection.mutable.ArrayBuffer +import scala.reflect.ClassTag + +import org.apache.spark.deploy.rest.kubernetes.HttpClientUtil +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +// TODO support windows +private[spark] object Minikube extends Logging { + private val MINIKUBE_EXECUTABLE_DEST = if (Utils.isMac) { + Paths.get("target", "minikube-bin", "darwin-amd64", "minikube").toFile + } else if (Utils.isWindows) { + throw new IllegalStateException("Executing Minikube based integration tests not yet " + + " available on Windows.") + } else { + Paths.get("target", "minikube-bin", "linux-amd64", "minikube").toFile + } + + private val EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE = "Minikube is not downloaded, expected at " + + s"${MINIKUBE_EXECUTABLE_DEST.getAbsolutePath}" + + private val MINIKUBE_STARTUP_TIMEOUT_SECONDS = 60 + + def startMinikube(): Unit = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + if (getMinikubeStatus != MinikubeStatus.RUNNING) { + executeMinikube("start", "--memory", "6000", "--cpus", "8") + } else { + logInfo("Minikube is already started.") + } + } + + def getMinikubeIp: String = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + val outputs = executeMinikube("ip") + assert(outputs.size == 1, "Unexpected amount of output from minikube ip") + outputs.head + } + + def getMinikubeStatus: MinikubeStatus.Value = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + val statusString = executeMinikube("status").head.replaceFirst("minikubeVM: ", "") + MinikubeStatus.unapply(statusString) + .getOrElse(throw new IllegalStateException(s"Unknown status $statusString")) + } + + def getDockerEnv: Map[String, String] = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + executeMinikube("docker-env") + .filter(_.startsWith("export")) + .map(_.replaceFirst("export ", "").split('=')) + .map(arr => (arr(0), arr(1).replaceAllLiterally("\"", ""))) + .toMap + } + + def deleteMinikube(): Unit = synchronized { + assert(MINIKUBE_EXECUTABLE_DEST.exists, EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) + if (getMinikubeStatus != MinikubeStatus.DOES_NOT_EXIST) { + executeMinikube("delete") + } else { + logInfo("Minikube was already not running.") + } + } + + def getKubernetesClient: DefaultKubernetesClient = synchronized { + val kubernetesMaster = s"https://$getMinikubeIp:8443" + val userHome = System.getProperty("user.home") + val kubernetesConf = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(kubernetesMaster) + .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath) + .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath) + .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath) + .build() + new DefaultKubernetesClient(kubernetesConf) + } + + def getService[T: ClassTag]( + serviceName: String, + namespace: String, + servicePortName: String, + servicePath: String = ""): T = synchronized { + val kubernetesMaster = s"https://$getMinikubeIp:8443" + val url = s"${ + Array[String]( + kubernetesMaster, + "api", "v1", "proxy", + "namespaces", namespace, + "services", serviceName).mkString("/")}" + + s":$servicePortName$servicePath" + val userHome = System.getProperty("user.home") + val kubernetesConf = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(kubernetesMaster) + .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath) + .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath) + .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath) + .build() + val sslContext = SSLUtils.sslContext(kubernetesConf) + val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] + HttpClientUtil.createClient[T](url, sslContext.getSocketFactory, trustManager) + } + + def executeMinikubeSsh(command: String): Unit = { + executeMinikube("ssh", command) + } + + private def executeMinikube(action: String, args: String*): Seq[String] = { + if (!MINIKUBE_EXECUTABLE_DEST.canExecute) { + if (!MINIKUBE_EXECUTABLE_DEST.setExecutable(true)) { + throw new IllegalStateException("Failed to make the Minikube binary executable.") + } + } + val fullCommand = Array(MINIKUBE_EXECUTABLE_DEST.getAbsolutePath, action) ++ args + val pb = new ProcessBuilder().command(fullCommand: _*) + pb.redirectErrorStream(true) + val proc = pb.start() + val outputLines = new ArrayBuffer[String] + + Utils.tryWithResource(new InputStreamReader(proc.getInputStream)) { procOutput => + Utils.tryWithResource(new BufferedReader(procOutput)) { (bufferedOutput: BufferedReader) => + var line: String = null + do { + line = bufferedOutput.readLine() + if (line != null) { + logInfo(line) + outputLines += line + } + } while (line != null) + } + } + assert(proc.waitFor(MINIKUBE_STARTUP_TIMEOUT_SECONDS, TimeUnit.SECONDS), + s"Timed out while executing $action on minikube.") + assert(proc.exitValue == 0, s"Failed to execute minikube $action ${args.mkString(" ")}") + outputLines.toSeq + } +} + +private[spark] object MinikubeStatus extends Enumeration { + + val RUNNING = status("Running") + val STOPPED = status("Stopped") + val DOES_NOT_EXIST = status("Does Not Exist") + val SAVED = status("Saved") + + def status(value: String): Value = new Val(nextId, value) + def unapply(s: String): Option[Value] = values.find(s == _.toString) +} diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala new file mode 100644 index 0000000000000..7a3b06b1b5e58 --- /dev/null +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala @@ -0,0 +1,50 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.restapis + +import java.util.{List => JList} +import javax.ws.rs._ +import javax.ws.rs.core.MediaType + +import org.apache.spark.status.api.v1._ + +@Path("/api/v1") +@Consumes(Array(MediaType.APPLICATION_JSON)) +@Produces(Array(MediaType.APPLICATION_JSON)) +trait SparkRestApiV1 { + + @GET + @Path("/applications") + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + def getApplications( + @QueryParam("status") applicationStatuses: JList[ApplicationStatus]): Seq[ApplicationInfo] + + @GET + @Path("applications/{appId}/stages") + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + def getStages( + @PathParam("appId") appId: String, + @QueryParam("status") statuses: JList[StageStatus]): Seq[StageData] + + @GET + @Path("applications/{appId}/executors") + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_JSON)) + def getExecutors(@PathParam("appId") appId: String): Seq[ExecutorSummary] +} diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java index 6767cc5079649..94f9bc319b6a2 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java @@ -76,6 +76,12 @@ class SparkSubmitOptionParser { protected final String PRINCIPAL = "--principal"; protected final String QUEUE = "--queue"; + // Kubernetes-only options. + protected final String KUBERNETES_MASTER = "--kubernetes-master"; + protected final String KUBERNETES_NAMESPACE = "--kubernetes-namespace"; + protected final String KUBERNETES_UPLOAD_JARS = "--upload-jars"; + protected final String KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH = "--upload-driver-extra-classpath"; + /** * This is the canonical list of spark-submit options. Each entry in the array contains the * different aliases for the same option; the first element of each entry is the "official" @@ -115,6 +121,10 @@ class SparkSubmitOptionParser { { REPOSITORIES }, { STATUS }, { TOTAL_EXECUTOR_CORES }, + { KUBERNETES_MASTER }, + { KUBERNETES_NAMESPACE }, + { KUBERNETES_UPLOAD_JARS }, + { KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH } }; /** diff --git a/pom.xml b/pom.xml index 7a3be5baea16c..69f7eb97be2d7 100644 --- a/pom.xml +++ b/pom.xml @@ -133,6 +133,7 @@ 1.8.2 1.6.0 9.3.11.v20160721 + 8.18.0 3.1.0 0.8.0 2.4.0 @@ -302,6 +303,33 @@ chill-java ${chill.version} + + + com.netflix.feign + feign-core + ${feign.version} + + + com.netflix.feign + feign-okhttp + ${feign.version} + + + com.netflix.feign + feign-jackson + ${feign.version} + + + com.netflix.feign + feign-jaxrs + ${feign.version} + + + com.squareup.okhttp3 + okhttp + 3.4.1 + + @@ -616,6 +644,11 @@ jackson-module-jaxb-annotations ${fasterxml.jackson.version} + + com.fasterxml.jackson.jaxrs + jackson-jaxrs-json-provider + ${fasterxml.jackson.version} + org.glassfish.jersey.core jersey-server @@ -2539,6 +2572,22 @@ + + kubernetes + + kubernetes/core + + + + + kubernetes-integration-tests + + kubernetes/docker-minimal-bundle + kubernetes/integration-tests + kubernetes/integration-tests-spark-jobs + + + hive-thriftserver From 9d713481af8e159419e589503f6ef4e6dac860a8 Mon Sep 17 00:00:00 2001 From: mcheah Date: Tue, 6 Dec 2016 14:36:57 -0800 Subject: [PATCH 002/225] Fix style --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 4 ---- 1 file changed, 4 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index e15d212212507..012841694bc55 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -906,7 +906,6 @@ private[spark] object SparkSubmitUtils { /** * Represents a Maven Coordinate - * * @param groupId the groupId of the coordinate * @param artifactId the artifactId of the coordinate * @param version the version of the coordinate @@ -918,7 +917,6 @@ private[spark] object SparkSubmitUtils { /** * Extracts maven coordinates from a comma-delimited string. Coordinates should be provided * in the format `groupId:artifactId:version` or `groupId/artifactId:version`. - * * @param coordinates Comma-delimited string of maven coordinates * @return Sequence of Maven coordinates */ @@ -996,7 +994,6 @@ private[spark] object SparkSubmitUtils { /** * Output a comma-delimited list of paths for the downloaded jars to be added to the classpath * (will append to jars in SparkSubmit). - * * @param artifacts Sequence of dependencies that were resolved and retrieved * @param cacheDirectory directory where jars are cached * @return a comma-delimited list of paths for the dependencies @@ -1134,7 +1131,6 @@ private[spark] object SparkSubmitUtils { /** * Resolves any dependencies that were supplied through maven coordinates - * * @param coordinates Comma-delimited string of maven coordinates * @param ivySettings An IvySettings containing resolvers to use * @param exclusions Exclusions to apply when resolving transitive dependencies From f1baed2bb69626bb9b90181fd234ef843aa7cd91 Mon Sep 17 00:00:00 2001 From: mcheah Date: Tue, 6 Dec 2016 17:23:24 -0800 Subject: [PATCH 003/225] Make naming more consistent --- dev/scalastyle | 2 ++ .../org/apache/spark/deploy/kubernetes/Client.scala | 13 ++++++------- .../integrationtest/KubernetesSuite.scala | 2 ++ 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/dev/scalastyle b/dev/scalastyle index f3dec833636c6..de7423913fad9 100755 --- a/dev/scalastyle +++ b/dev/scalastyle @@ -26,6 +26,8 @@ ERRORS=$(echo -e "q\n" \ -Pyarn \ -Phive \ -Phive-thriftserver \ + -Pkubernetes \ + -Pkubernetes-integration-tests \ scalastyle test:scalastyle \ | awk '{if($1~/error/)print}' \ ) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 4ee00e8802080..f402b6df82fc4 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -34,7 +34,7 @@ import scala.concurrent.ExecutionContext import scala.concurrent.duration.DurationInt import scala.util.Success -import org.apache.spark.SparkConf +import org.apache.spark.{SPARK_VERSION, SparkConf} import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging @@ -60,9 +60,8 @@ private[spark] class Client( private val secretName = s"spark-submission-server-secret-$kubernetesAppId" private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" - // TODO set precise version by default private val driverDockerImage = sparkConf.get( - "spark.kubernetes.driver.docker.image", "spark-driver:latest") + "spark.kubernetes.driver.docker.image", s"spark-driver:$SPARK_VERSION") private val uploadedDriverExtraClasspath = sparkConf .getOption("spark.kubernetes.driver.uploads.driverExtraClasspath") private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") @@ -291,14 +290,14 @@ private[spark] class Client( def getFileContents(maybeFilePaths: Option[String]): Array[(String, String)] = { maybeFilePaths .map(_.split(",").map(filePath => { - val driverExtraClasspathFile = new File(filePath) - if (!driverExtraClasspathFile.isFile) { + val fileToUpload = new File(filePath) + if (!fileToUpload.isFile) { throw new IllegalStateException("Provided file to upload for driver extra classpath" + s" does not exist or is not a file: $filePath") } else { - val fileBytes = Files.toByteArray(driverExtraClasspathFile) + val fileBytes = Files.toByteArray(fileToUpload) val fileBase64 = Base64.encodeBase64String(fileBytes) - (driverExtraClasspathFile.getName, fileBase64) + (fileToUpload.getName, fileBase64) } })).getOrElse(Array.empty[(String, String)]) } diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index d79c75e484af5..3f3d2e609ea4d 100644 --- a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -116,6 +116,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set("spark.kubernetes.submit.clientKeyFile", clientConfig.getClientKeyFile) .set("spark.kubernetes.submit.clientCertFile", clientConfig.getClientCertFile) .set("spark.kubernetes.namespace", NAMESPACE) + .set("spark.kubernetes.driver.docker.image", "spark-driver:latest") .set("spark.kubernetes.executor.docker.image", "spark-executor:latest") .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") @@ -148,6 +149,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", EXAMPLES_JAR) SparkSubmit.main(args) val sparkMetricsService = Minikube.getService[SparkRestApiV1]( From 5a45654dba36aec72a1cdf6c8b3389082c4540f3 Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 9 Dec 2016 14:55:13 -0800 Subject: [PATCH 004/225] Fix building assembly with Kubernetes. --- assembly/pom.xml | 10 ++++++++++ .../kubernetes/integrationtest/minikube/Minikube.scala | 2 +- 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 3a7003f5e94f5..d9e8bd1d8e5b6 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -148,6 +148,16 @@ + + kubernetes + + + org.apache.spark + spark-kubernetes_${scala.binary.version} + ${project.version} + + + hive diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index 92b809a4c7c59..60c6564579a6e 100644 --- a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -21,8 +21,8 @@ import java.nio.file.Paths import java.util.concurrent.TimeUnit import javax.net.ssl.X509TrustManager -import io.fabric8.kubernetes.client.internal.SSLUtils import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} +import io.fabric8.kubernetes.client.internal.SSLUtils import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag From 5694a8a2623723f8564485cd4c1c5953e7027c9b Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 9 Dec 2016 16:18:11 -0800 Subject: [PATCH 005/225] Service account support, use constants from fabric8 library. --- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 7 +++++-- .../spark/deploy/kubernetes/KubernetesClientBuilder.scala | 6 +++--- .../kubernetes/integrationtest/KubernetesSuite.scala | 6 +++--- 3 files changed, 11 insertions(+), 8 deletions(-) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index f402b6df82fc4..cea90a51386b5 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -55,8 +55,8 @@ private[spark] class Client( private val launchTime = System.currentTimeMillis private val kubernetesAppId = sparkConf.getOption("spark.app.name") - .orElse(sparkConf.getOption("spark.app.id")) - .getOrElse(s"spark-$launchTime") + .orElse(sparkConf.getOption("spark.app.id")) + .getOrElse(s"spark-$launchTime") private val secretName = s"spark-submission-server-secret-$kubernetesAppId" private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" @@ -69,6 +69,8 @@ private[spark] class Client( private val secretBytes = new Array[Byte](128) SECURE_RANDOM.nextBytes(secretBytes) private val secretBase64String = Base64.encodeBase64String(secretBytes) + private val serviceAccount = sparkConf.get("spark.kubernetes.submit.serviceAccountName", + "default") private implicit val retryableExecutionContext = ExecutionContext .fromExecutorService( @@ -191,6 +193,7 @@ private[spark] class Client( .withSecretName(secret.getMetadata.getName) .endSecret() .endVolume + .withServiceAccount(serviceAccount) .addNewContainer() .withName(DRIVER_LAUNCHER_CONTAINER_NAME) .withImage(driverDockerImage) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala index 4c715c86cc7f9..61a13dc7274d7 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala @@ -20,11 +20,11 @@ import java.io.File import com.google.common.base.Charsets import com.google.common.io.Files -import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} +import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} private[spark] object KubernetesClientBuilder { - private val API_SERVER_TOKEN = new File("/var/run/secrets/kubernetes.io/serviceaccount/token") - private val CA_CERT_FILE = new File("/var/run/secrets/kubernetes.io/serviceaccount/ca.crt") + private val API_SERVER_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) + private val CA_CERT_FILE = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) /** * Creates a {@link KubernetesClient}, expecting to be from diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 3f3d2e609ea4d..902631b874539 100644 --- a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -37,9 +37,9 @@ import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private val EXAMPLES_JAR = Paths.get("target", "integration-tests-spark-jobs") - .toFile - .listFiles()(0) - .getAbsolutePath + .toFile + .listFiles()(0) + .getAbsolutePath private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) From dbfb87d9744180aa64c91222ed0484ad9c63940e Mon Sep 17 00:00:00 2001 From: mcheah Date: Fri, 6 Jan 2017 16:15:35 -0800 Subject: [PATCH 006/225] Some small changes - Don't hold the raw secret bytes - Add CPU limits and requests --- .../spark/deploy/kubernetes/Client.scala | 39 +++++++++++-------- .../KubernetesClusterSchedulerBackend.scala | 9 ++++- 2 files changed, 29 insertions(+), 19 deletions(-) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index cea90a51386b5..21c83dbf40e21 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -66,9 +66,12 @@ private[spark] class Client( .getOption("spark.kubernetes.driver.uploads.driverExtraClasspath") private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") - private val secretBytes = new Array[Byte](128) - SECURE_RANDOM.nextBytes(secretBytes) - private val secretBase64String = Base64.encodeBase64String(secretBytes) + private val secretBase64String = { + val secretBytes = new Array[Byte](128) + SECURE_RANDOM.nextBytes(secretBytes) + Base64.encodeBase64String(secretBytes) + } + private val serviceAccount = sparkConf.get("spark.kubernetes.submit.serviceAccountName", "default") @@ -105,11 +108,7 @@ private[spark] class Client( .done() try { val selectors = Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue).asJava - val uiPort = sparkConf - .getOption("spark.ui.port") - .map(_.toInt) - .getOrElse(DEFAULT_UI_PORT) - val (servicePorts, containerPorts) = configurePorts(uiPort) + val (servicePorts, containerPorts) = configurePorts() val service = kubernetesClient.services().createNew() .withNewMetadata() .withName(kubernetesAppId) @@ -120,11 +119,11 @@ private[spark] class Client( .endSpec() .done() sparkConf.set("spark.kubernetes.driver.service.name", service.getMetadata.getName) - sparkConf.setIfMissing("spark.driver.port", DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.blockmanager.port", BLOCKMANAGER_PORT.toString) + sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) + sparkConf.setIfMissing("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT.toString) val submitRequest = buildSubmissionRequest() val submitCompletedFuture = SettableFuture.create[Boolean] - val secretDirectory = s"/var/run/secrets/spark-submission/$kubernetesAppId" + val secretDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId" val podWatcher = new Watcher[Pod] { override def eventReceived(action: Action, t: Pod): Unit = { @@ -228,7 +227,7 @@ private[spark] class Client( }) } - private def configurePorts(uiPort: Int): (Seq[ServicePort], Seq[ContainerPort]) = { + private def configurePorts(): (Seq[ServicePort], Seq[ContainerPort]) = { val servicePorts = new ArrayBuffer[ServicePort] val containerPorts = new ArrayBuffer[ContainerPort] @@ -251,15 +250,20 @@ private[spark] class Client( sparkConf .getOption("spark.driver.port") .map(_.toInt) - .getOrElse(DRIVER_PORT)) + .getOrElse(DEFAULT_DRIVER_PORT)) addPortToServiceAndContainer( BLOCKMANAGER_PORT_NAME, sparkConf .getOption("spark.blockmanager.port") .map(_.toInt) - .getOrElse(BLOCKMANAGER_PORT)) + .getOrElse(DEFAULT_BLOCKMANAGER_PORT)) - addPortToServiceAndContainer(UI_PORT_NAME, uiPort) + addPortToServiceAndContainer( + UI_PORT_NAME, + sparkConf + .getOption("spark.ui.port") + .map(_.toInt) + .getOrElse(DEFAULT_UI_PORT)) (servicePorts.toSeq, containerPorts.toSeq) } @@ -331,8 +335,8 @@ private object Client { private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" private val DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT = 7077 - private val DRIVER_PORT = 7078 - private val BLOCKMANAGER_PORT = 7079 + private val DEFAULT_DRIVER_PORT = 7078 + private val DEFAULT_BLOCKMANAGER_PORT = 7079 private val DEFAULT_UI_PORT = 4040 private val UI_PORT_NAME = "spark-ui-port" private val DRIVER_LAUNCHER_SERVICE_PORT_NAME = "driver-launcher-port" @@ -340,6 +344,7 @@ private object Client { private val BLOCKMANAGER_PORT_NAME = "block-manager-port" private val DRIVER_LAUNCHER_CONTAINER_NAME = "spark-kubernetes-driver-launcher" private val SECURE_RANDOM = new SecureRandom() + private val SPARK_SUBMISSION_SECRET_BASE_DIR = "/var/run/secrets/spark-submission" def main(args: Array[String]): Unit = { require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index f37b97e4dd0dc..bbc95d4f4b7e3 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -34,8 +34,8 @@ import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend import org.apache.spark.util.Utils private[spark] class KubernetesClusterSchedulerBackend( - scheduler: TaskSchedulerImpl, - val sc: SparkContext) + scheduler: TaskSchedulerImpl, + val sc: SparkContext) extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { import KubernetesClusterSchedulerBackend._ @@ -167,6 +167,9 @@ private[spark] class KubernetesClusterSchedulerBackend( val executorMemoryLimitQuantity = new QuantityBuilder(false) .withAmount(executorMemoryWithOverhead.toString) .build() + val executorCpuQuantity = new QuantityBuilder(false) + .withAmount(executorCores) + .build() val requiredEnv = new ArrayBuffer[EnvVar] requiredEnv += new EnvVarBuilder() .withName("SPARK_EXECUTOR_PORT") @@ -214,6 +217,8 @@ private[spark] class KubernetesClusterSchedulerBackend( .withNewResources() .addToRequests("memory", executorMemoryQuantity) .addToLimits("memory", executorMemoryLimitQuantity) + .addToRequests("cpu", executorCpuQuantity) + .addToLimits("cpu", executorCpuQuantity) .endResources() .withEnv(requiredEnv.asJava) .withPorts(requiredPorts.asJava) From acb8b149ce60a476b0c6306fed09cdbae3efb811 Mon Sep 17 00:00:00 2001 From: mcheah Date: Mon, 9 Jan 2017 14:25:49 -0800 Subject: [PATCH 007/225] Use k8s:// formatted URL instead of separate setting. --- .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 5 +---- .../org/apache/spark/deploy/SparkSubmitArguments.scala | 7 ------- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 8 +++++--- .../cluster/kubernetes/KubernetesClusterManager.scala | 2 +- .../kubernetes/KubernetesClusterSchedulerBackend.scala | 5 +---- .../kubernetes/integrationtest/KubernetesSuite.scala | 6 ++---- 6 files changed, 10 insertions(+), 23 deletions(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 012841694bc55..d1bf6bdaf7345 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -233,7 +233,7 @@ object SparkSubmit extends CommandLineUtils { YARN case m if m.startsWith("spark") => STANDALONE case m if m.startsWith("mesos") => MESOS - case m if m.startsWith("kubernetes") => KUBERNETES + case m if m.startsWith("k8s") => KUBERNETES case m if m.startsWith("local") => LOCAL case _ => printErrorAndExit("Master must either be yarn or start with spark, mesos, local") @@ -481,9 +481,6 @@ object SparkSubmit extends CommandLineUtils { OptionAssigner(args.principal, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.principal"), OptionAssigner(args.keytab, YARN, ALL_DEPLOY_MODES, sysProp = "spark.yarn.keytab"), - // Kubernetes only - OptionAssigner(args.kubernetesMaster, KUBERNETES, ALL_DEPLOY_MODES, - sysProp = "spark.kubernetes.master"), OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES, sysProp = "spark.kubernetes.namespace"), OptionAssigner(args.kubernetesUploadJars, KUBERNETES, CLUSTER, diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index ceb508e124692..1d4ba54a90ca3 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -72,7 +72,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S var keytab: String = null // Kubernetes only - var kubernetesMaster: String = null var kubernetesNamespace: String = null var kubernetesUploadJars: String = null var kubernetesUploadDriverExtraClasspath: String = null @@ -199,9 +198,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S queue = Option(queue).orElse(sparkProperties.get("spark.yarn.queue")).orNull keytab = Option(keytab).orElse(sparkProperties.get("spark.yarn.keytab")).orNull principal = Option(principal).orElse(sparkProperties.get("spark.yarn.principal")).orNull - kubernetesMaster = Option(kubernetesMaster) - .orElse(sparkProperties.get("spark.kubernetes.master")) - .orNull kubernetesNamespace = Option(kubernetesNamespace) .orElse(sparkProperties.get("spark.kubernetes.namespace")) .orNull @@ -449,9 +445,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S case KEYTAB => keytab = value - case KUBERNETES_MASTER => - kubernetesMaster = value - case KUBERNETES_NAMESPACE => kubernetesNamespace = value diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 21c83dbf40e21..0715c84495a2c 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -49,9 +49,11 @@ private[spark] class Client( private val namespace = sparkConf.getOption("spark.kubernetes.namespace").getOrElse( throw new IllegalArgumentException("Namespace must be provided in spark.kubernetes.namespace")) - private val master = sparkConf - .getOption("spark.kubernetes.master") - .getOrElse("Master must be provided in spark.kubernetes.master") + private val rawMaster = sparkConf.get("spark.master") + if (!rawMaster.startsWith("k8s://")) { + throw new IllegalArgumentException("Master should be a URL with scheme k8s://") + } + private val master = rawMaster.replaceFirst("k8s://", "") private val launchTime = System.currentTimeMillis private val kubernetesAppId = sparkConf.getOption("spark.app.name") diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 0d3b97c636ca3..36f7149a832c3 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -21,7 +21,7 @@ import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, Tas private[spark] class KubernetesClusterManager extends ExternalClusterManager { - override def canCreate(masterURL: String): Boolean = masterURL.startsWith("kubernetes") + override def canCreate(masterURL: String): Boolean = masterURL.startsWith("k8s") override def createTaskScheduler(sc: SparkContext, masterURL: String): TaskScheduler = { val scheduler = new TaskSchedulerImpl(sc) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index bbc95d4f4b7e3..4e099cea3198b 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -43,10 +43,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_MODIFICATION_LOCK = new Object private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] - private val kubernetesMaster = conf - .getOption("spark.kubernetes.master") - .getOrElse( - throw new SparkException("Kubernetes master must be specified in kubernetes mode.")) + private val kubernetesMaster = sc.master.replaceFirst("k8s://", "") private val executorDockerImage = conf .get("spark.kubernetes.executor.docker.image", s"spark-executor:${sc.version}") diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 902631b874539..183f666994d38 100644 --- a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -110,8 +110,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { test("Run a simple example") { val sparkConf = new SparkConf(true) - .setMaster("kubernetes") - .set("spark.kubernetes.master", s"https://${Minikube.getMinikubeIp}:8443") + .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") .set("spark.kubernetes.submit.caCertFile", clientConfig.getCaCertFile) .set("spark.kubernetes.submit.clientKeyFile", clientConfig.getClientKeyFile) .set("spark.kubernetes.submit.clientCertFile", clientConfig.getClientCertFile) @@ -136,9 +135,8 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { test("Run using spark-submit") { val args = Array( - "--master", "kubernetes", + "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", "--deploy-mode", "cluster", - "--kubernetes-master", s"https://${Minikube.getMinikubeIp}:8443", "--kubernetes-namespace", NAMESPACE, "--name", "spark-pi", "--executor-memory", "512m", From f9ae918809ed97f8be549937c7ede767b1c7e984 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 9 Jan 2017 14:33:41 -0800 Subject: [PATCH 008/225] Reindent comment to conforn to JavaDoc style The build process fails ScalaStyle checks otherwise. --- .../deploy/kubernetes/KubernetesClientBuilder.scala | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala index 61a13dc7274d7..61d3ac17ac34a 100644 --- a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala +++ b/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala @@ -27,11 +27,11 @@ private[spark] object KubernetesClientBuilder { private val CA_CERT_FILE = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) /** - * Creates a {@link KubernetesClient}, expecting to be from - * within the context of a pod. When doing so, credentials files - * are picked up from canonical locations, as they are injected - * into the pod's disk space. - */ + * Creates a {@link KubernetesClient}, expecting to be from + * within the context of a pod. When doing so, credentials files + * are picked up from canonical locations, as they are injected + * into the pod's disk space. + */ def buildFromWithinPod( kubernetesMaster: String, kubernetesNamespace: String): DefaultKubernetesClient = { From f20397b7e7500ad08f4f1547742cbbc3f4d1677c Mon Sep 17 00:00:00 2001 From: mcheah Date: Mon, 9 Jan 2017 15:30:02 -0800 Subject: [PATCH 009/225] Move kubernetes under resource-managers folder. --- .../main/scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +- pom.xml | 8 ++++---- {kubernetes => resource-managers/kubernetes}/core/pom.xml | 4 ++-- .../org.apache.spark.scheduler.ExternalClusterManager | 0 .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 0 .../spark/deploy/kubernetes/KubernetesClientBuilder.scala | 0 .../scala/org/apache/spark/deploy/kubernetes/Retry.scala | 0 .../deploy/rest/KubernetesRestProtocolMessages.scala | 0 .../spark/deploy/rest/kubernetes/HttpClientUtil.scala | 0 .../deploy/rest/kubernetes/KubernetesSparkRestApi.scala | 0 .../rest/kubernetes/KubernetesSparkRestServer.scala | 0 .../cluster/kubernetes/KubernetesClusterManager.scala | 0 .../kubernetes/KubernetesClusterSchedulerBackend.scala | 0 .../kubernetes}/docker-minimal-bundle/pom.xml | 4 ++-- .../src/main/assembly/driver-assembly.xml | 0 .../src/main/assembly/executor-assembly.xml | 0 .../src/main/docker/driver/Dockerfile | 0 .../src/main/docker/executor/Dockerfile | 0 .../kubernetes}/integration-tests-spark-jobs/pom.xml | 4 ++-- .../integrationtest/jobs/SparkPiWithInfiniteWait.scala | 0 .../kubernetes}/integration-tests/pom.xml | 4 ++-- .../kubernetes/integrationtest/KubernetesSuite.scala | 0 .../integrationtest/docker/SparkDockerImageBuilder.scala | 0 .../kubernetes/integrationtest/minikube/Minikube.scala | 0 .../integrationtest/restapis/SparkRestApiV1.scala | 0 25 files changed, 13 insertions(+), 13 deletions(-) rename {kubernetes => resource-managers/kubernetes}/core/pom.xml (97%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala (100%) rename {kubernetes => resource-managers/kubernetes}/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala (100%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/pom.xml (98%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/src/main/assembly/driver-assembly.xml (100%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/src/main/assembly/executor-assembly.xml (100%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/src/main/docker/driver/Dockerfile (100%) rename {kubernetes => resource-managers/kubernetes}/docker-minimal-bundle/src/main/docker/executor/Dockerfile (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests-spark-jobs/pom.xml (95%) rename {kubernetes => resource-managers/kubernetes}/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/pom.xml (98%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala (100%) rename {kubernetes => resource-managers/kubernetes}/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala (100%) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index d1bf6bdaf7345..cdd45f4c46bd4 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -236,7 +236,7 @@ object SparkSubmit extends CommandLineUtils { case m if m.startsWith("k8s") => KUBERNETES case m if m.startsWith("local") => LOCAL case _ => - printErrorAndExit("Master must either be yarn or start with spark, mesos, local") + printErrorAndExit("Master must either be yarn or start with spark, mesos, k8s, or local") -1 } diff --git a/pom.xml b/pom.xml index 69f7eb97be2d7..963c9ad4e4a0c 100644 --- a/pom.xml +++ b/pom.xml @@ -2575,16 +2575,16 @@ kubernetes - kubernetes/core + resource-managers/kubernetes/core kubernetes-integration-tests - kubernetes/docker-minimal-bundle - kubernetes/integration-tests - kubernetes/integration-tests-spark-jobs + resource-managers/kubernetes/docker-minimal-bundle + resource-managers/kubernetes/integration-tests + resource-managers/kubernetes/integration-tests-spark-jobs diff --git a/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml similarity index 97% rename from kubernetes/core/pom.xml rename to resource-managers/kubernetes/core/pom.xml index 9c7eb52b2680a..388defd93465d 100644 --- a/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,8 +20,8 @@ org.apache.spark spark-parent_2.11 - 2.1.0-SNAPSHOT - ../../pom.xml + 2.2.0-SNAPSHOT + ../../../pom.xml spark-kubernetes_2.11 diff --git a/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager similarity index 100% rename from kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager rename to resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.scheduler.ExternalClusterManager diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala diff --git a/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala similarity index 100% rename from kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala diff --git a/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml similarity index 98% rename from kubernetes/docker-minimal-bundle/pom.xml rename to resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 3de939ea3978a..c20e51c93e7c7 100644 --- a/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,8 +21,8 @@ org.apache.spark spark-parent_2.11 - 2.1.0-SNAPSHOT - ../pom.xml + 2.2.0-SNAPSHOT + ../../../pom.xml spark-docker-minimal-bundle_2.11 diff --git a/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml similarity index 100% rename from kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml diff --git a/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml similarity index 100% rename from kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml diff --git a/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile similarity index 100% rename from kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile diff --git a/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile similarity index 100% rename from kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile diff --git a/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml similarity index 95% rename from kubernetes/integration-tests-spark-jobs/pom.xml rename to resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 17f1c4906214f..12b0234ae71bd 100644 --- a/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,8 +20,8 @@ org.apache.spark spark-parent_2.11 - 2.1.0-SNAPSHOT - ../../pom.xml + 2.2.0-SNAPSHOT + ../../../pom.xml spark-kubernetes-integration-tests-spark-jobs_2.11 diff --git a/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala similarity index 100% rename from kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala rename to resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala diff --git a/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml similarity index 98% rename from kubernetes/integration-tests/pom.xml rename to resource-managers/kubernetes/integration-tests/pom.xml index 0568cb1e21826..1e7eb0e12e6df 100644 --- a/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,8 +20,8 @@ org.apache.spark spark-parent_2.11 - 2.1.0-SNAPSHOT - ../../pom.xml + 2.2.0-SNAPSHOT + ../../../pom.xml spark-kubernetes-integration-tests_2.11 diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala similarity index 100% rename from kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala similarity index 100% rename from kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala similarity index 100% rename from kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala diff --git a/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala similarity index 100% rename from kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/restapis/SparkRestApiV1.scala From 728be0ec0f9788568a5768dacc2e668e779f84a3 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 11 Jan 2017 14:36:45 -0800 Subject: [PATCH 010/225] Use tar and gzip to compress+archive shipped jars (#2) * Use tar and gzip to archive shipped jars. * Address comments * Move files to resolve merge --- pom.xml | 1 + .../spark/deploy/kubernetes/Client.scala | 21 +-- .../rest/KubernetesRestProtocolMessages.scala | 13 +- .../rest/kubernetes/CompressionUtils.scala | 139 ++++++++++++++++++ .../KubernetesSparkRestServer.scala | 27 +--- .../pom.xml | 33 +++++ .../kubernetes/integrationtest/PiHelper.java | 33 +++++ .../integration-tests-spark-jobs/pom.xml | 6 + .../jobs/SparkPiWithInfiniteWait.scala | 9 +- .../kubernetes/integration-tests/pom.xml | 13 ++ .../integrationtest/KubernetesSuite.scala | 7 + 11 files changed, 254 insertions(+), 48 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala create mode 100644 resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml create mode 100644 resource-managers/kubernetes/integration-tests-spark-jobs-helpers/src/main/java/org/apache/spark/deploy/kubernetes/integrationtest/PiHelper.java diff --git a/pom.xml b/pom.xml index 963c9ad4e4a0c..ee156556373ad 100644 --- a/pom.xml +++ b/pom.xml @@ -2585,6 +2585,7 @@ resource-managers/kubernetes/docker-minimal-bundle resource-managers/kubernetes/integration-tests resource-managers/kubernetes/integration-tests-spark-jobs + resource-managers/kubernetes/integration-tests-spark-jobs-helpers diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 0715c84495a2c..230598d63bed1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -35,7 +35,7 @@ import scala.concurrent.duration.DurationInt import scala.util.Success import org.apache.spark.{SPARK_VERSION, SparkConf} -import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, UploadedAppResource} +import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -284,8 +284,8 @@ private[spark] class Client( case other => RemoteAppResource(other) } - val uploadDriverExtraClasspathBase64Contents = getFileContents(uploadedDriverExtraClasspath) - val uploadJarsBase64Contents = getFileContents(uploadedJars) + val uploadDriverExtraClasspathBase64Contents = compressJars(uploadedDriverExtraClasspath) + val uploadJarsBase64Contents = compressJars(uploadedJars) KubernetesCreateSubmissionRequest( appResource = resolvedAppResource, mainClass = mainClass, @@ -296,19 +296,10 @@ private[spark] class Client( uploadedJarsBase64Contents = uploadJarsBase64Contents) } - def getFileContents(maybeFilePaths: Option[String]): Array[(String, String)] = { + def compressJars(maybeFilePaths: Option[String]): Option[TarGzippedData] = { maybeFilePaths - .map(_.split(",").map(filePath => { - val fileToUpload = new File(filePath) - if (!fileToUpload.isFile) { - throw new IllegalStateException("Provided file to upload for driver extra classpath" + - s" does not exist or is not a file: $filePath") - } else { - val fileBytes = Files.toByteArray(fileToUpload) - val fileBase64 = Base64.encodeBase64String(fileBytes) - (fileToUpload.getName, fileBase64) - } - })).getOrElse(Array.empty[(String, String)]) + .map(_.split(",")) + .map(CompressionUtils.createTarGzip(_)) } private def getDriverLauncherService( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 4b7bb66083f29..6da1a848b25e7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -27,14 +27,19 @@ case class KubernetesCreateSubmissionRequest( val appArgs: Array[String], val sparkProperties: Map[String, String], val secret: String, - val uploadedDriverExtraClasspathBase64Contents: Array[(String, String)] - = Array.empty[(String, String)], - val uploadedJarsBase64Contents: Array[(String, String)] - = Array.empty[(String, String)]) extends SubmitRestProtocolRequest { + val uploadedDriverExtraClasspathBase64Contents: Option[TarGzippedData], + val uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION } +case class TarGzippedData( + val dataBase64: String, + val blockSize: Int = 10240, + val recordSize: Int = 512, + val encoding: String +) + @JsonTypeInfo( use = JsonTypeInfo.Id.NAME, include = JsonTypeInfo.As.PROPERTY, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala new file mode 100644 index 0000000000000..805a52bada219 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala @@ -0,0 +1,139 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream} +import java.util.zip.{GZIPInputStream, GZIPOutputStream} + +import com.google.common.io.Files +import org.apache.commons.codec.binary.Base64 +import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream, TarArchiveOutputStream} +import org.apache.commons.compress.utils.CharsetNames +import org.apache.commons.io.IOUtils +import scala.collection.mutable + +import org.apache.spark.deploy.rest.TarGzippedData +import org.apache.spark.internal.Logging +import org.apache.spark.util.{ByteBufferOutputStream, Utils} + +private[spark] object CompressionUtils extends Logging { + // Defaults from TarArchiveOutputStream + private val BLOCK_SIZE = 10240 + private val RECORD_SIZE = 512 + private val ENCODING = CharsetNames.UTF_8 + + /** + * Compresses all of the given paths into a gzipped-tar archive, returning the compressed data in + * memory as an instance of {@link TarGzippedData}. The files are taken without consideration to their + * original folder structure, and are added to the tar archive in a flat hierarchy. Directories are + * not allowed, and duplicate file names are de-duplicated by appending a numeric suffix to the file name, + * before the file extension. For example, if paths a/b.txt and b/b.txt were provided, then the files added + * to the tar archive would be b.txt and b-1.txt. + * @param paths A list of file paths to be archived + * @return An in-memory representation of the compressed data. + */ + def createTarGzip(paths: Iterable[String]): TarGzippedData = { + val compressedBytesStream = Utils.tryWithResource(new ByteBufferOutputStream()) { raw => + Utils.tryWithResource(new GZIPOutputStream(raw)) { gzipping => + Utils.tryWithResource(new TarArchiveOutputStream( + gzipping, + BLOCK_SIZE, + RECORD_SIZE, + ENCODING)) { tarStream => + val usedFileNames = mutable.HashSet.empty[String] + for (path <- paths) { + val file = new File(path) + if (!file.isFile) { + throw new IllegalArgumentException(s"Cannot add $path to tarball; either does" + + s" not exist or is a directory.") + } + var resolvedFileName = file.getName + val extension = Files.getFileExtension(file.getName) + val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) + var deduplicationCounter = 1 + while (usedFileNames.contains(resolvedFileName)) { + val oldResolvedFileName = resolvedFileName + resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" + logWarning(s"File with name $oldResolvedFileName already exists. Trying to add with" + + s" file name $resolvedFileName instead.") + deduplicationCounter += 1 + } + usedFileNames += resolvedFileName + val tarEntry = new TarArchiveEntry(file, resolvedFileName) + tarStream.putArchiveEntry(tarEntry) + Utils.tryWithResource(new FileInputStream(file)) { fileInput => + IOUtils.copy(fileInput, tarStream) + } + tarStream.closeArchiveEntry() + } + } + } + raw + } + val compressedAsBase64 = Base64.encodeBase64String(compressedBytesStream.toByteBuffer.array) + TarGzippedData( + dataBase64 = compressedAsBase64, + blockSize = BLOCK_SIZE, + recordSize = RECORD_SIZE, + encoding = ENCODING + ) + } + + /** + * Decompresses the provided tar archive to a directory. + * @param compressedData In-memory representation of the compressed data, ideally created via + * {@link createTarGzip}. + * @param rootOutputDir Directory to write the output files to. All files from the tarball + * are written here in a flat hierarchy. + * @return List of file paths for each file that was unpacked from the archive. + */ + def unpackAndWriteCompressedFiles( + compressedData: TarGzippedData, + rootOutputDir: File): Seq[String] = { + val paths = mutable.Buffer.empty[String] + val compressedBytes = Base64.decodeBase64(compressedData.dataBase64) + if (!rootOutputDir.exists) { + if (!rootOutputDir.mkdirs) { + throw new IllegalStateException(s"Failed to create output directory for unpacking" + + s" files at ${rootOutputDir.getAbsolutePath}") + } + } else if (rootOutputDir.isFile) { + throw new IllegalArgumentException(s"Root dir for writing decompressed files: " + + s"${rootOutputDir.getAbsolutePath} exists and is not a directory.") + } + Utils.tryWithResource(new ByteArrayInputStream(compressedBytes)) { compressedBytesStream => + Utils.tryWithResource(new GZIPInputStream(compressedBytesStream)) { gzipped => + Utils.tryWithResource(new TarArchiveInputStream( + gzipped, + compressedData.blockSize, + compressedData.recordSize, + compressedData.encoding)) { tarInputStream => + var nextTarEntry = tarInputStream.getNextTarEntry + while (nextTarEntry != null) { + val outputFile = new File(rootOutputDir, nextTarEntry.getName) + Utils.tryWithResource(new FileOutputStream(outputFile)) { fileOutputStream => + IOUtils.copy(tarInputStream, fileOutputStream) + } + paths += outputFile.getAbsolutePath + nextTarEntry = tarInputStream.getNextTarEntry + } + } + } + } + paths.toSeq + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 0a2e8176394ab..2ca3d4a8c0656 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -217,30 +217,11 @@ private[spark] class KubernetesSparkRestServer( } private def writeBase64ContentsToFiles( - filesBase64Contents: Array[(String, String)], + maybeCompressedFiles: Option[TarGzippedData], rootDir: File): Seq[String] = { - val resolvedFileNames = new scala.collection.mutable.HashSet[String] - val resolvedFilePaths = new ArrayBuffer[String] - for (file <- filesBase64Contents) { - var currentFileName = file._1 - var deduplicationCounter = 1 - while (resolvedFileNames.contains(currentFileName)) { - // Prepend the deduplication counter so as to not mess with the extension - currentFileName = s"$deduplicationCounter-$currentFileName" - deduplicationCounter += 1 - } - val resolvedFile = new File(rootDir, currentFileName) - val resolvedFilePath = resolvedFile.getAbsolutePath - if (resolvedFile.createNewFile()) { - val fileContents = Base64.decodeBase64(file._2) - Files.write(fileContents, resolvedFile) - } else { - throw new IllegalStateException(s"Could not write jar file to $resolvedFilePath") - } - resolvedFileNames += currentFileName - resolvedFilePaths += resolvedFilePath - } - resolvedFilePaths.toSeq + maybeCompressedFiles.map { compressedFiles => + CompressionUtils.unpackAndWriteCompressedFiles(compressedFiles, rootDir) + }.getOrElse(Seq.empty[String]) } } diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml new file mode 100644 index 0000000000000..f99838636b349 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -0,0 +1,33 @@ + + + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.2.0-SNAPSHOT + ../../../pom.xml + + + spark-kubernetes-integration-tests-spark-jobs-helpers_2.11 + jar + Spark Project Kubernetes Integration Tests Spark Jobs Helpers + + + + diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/src/main/java/org/apache/spark/deploy/kubernetes/integrationtest/PiHelper.java b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/src/main/java/org/apache/spark/deploy/kubernetes/integrationtest/PiHelper.java new file mode 100644 index 0000000000000..99d982397bb6e --- /dev/null +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/src/main/java/org/apache/spark/deploy/kubernetes/integrationtest/PiHelper.java @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest; + +/** + * Primarily extracted so that a separate jar can be added as a dependency for the + * test Spark job. + */ +public class PiHelper { + public static int helpPi() { + double x = Math.random() * 2 - 1; + double y = Math.random() * 2 - 1; + if (x*x + y*y < 1) { + return 1; + } else { + return 0; + } + } +} diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 12b0234ae71bd..59e59aca5109b 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -29,6 +29,12 @@ Spark Project Kubernetes Integration Tests Spark Jobs + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} + ${project.version} + provided + org.apache.spark spark-core_${scala.binary.version} diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala index 6e4660b771305..d3372749f999e 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/SparkPiWithInfiniteWait.scala @@ -16,8 +16,7 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest.jobs -import scala.math.random - +import org.apache.spark.deploy.kubernetes.integrationtest.PiHelper import org.apache.spark.sql.SparkSession // Equivalent to SparkPi except does not stop the Spark Context @@ -32,10 +31,8 @@ private[spark] object SparkPiWithInfiniteWait { .getOrCreate() val slices = if (args.length > 0) args(0).toInt else 10 val n = math.min(100000L * slices, Int.MaxValue).toInt // avoid overflow - val count = spark.sparkContext.parallelize(1 until n, slices).map { i => - val x = random * 2 - 1 - val y = random * 2 - 1 - if (x*x + y*y < 1) 1 else 0 + val count = spark.sparkContext.parallelize(1 until n, slices).map { _ => + PiHelper.helpPi() }.reduce(_ + _) // scalastyle:off println println("Pi is roughly " + 4.0 * count / (n - 1)) diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 1e7eb0e12e6df..569527de8e300 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -48,6 +48,12 @@ ${project.version} test + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} + ${project.version} + test + org.apache.spark spark-docker-minimal-bundle_${scala.binary.version} @@ -123,6 +129,13 @@ jar ${project.build.directory}/integration-tests-spark-jobs + + org.apache.spark + spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} + ${project.version} + jar + ${project.build.directory}/integration-tests-spark-jobs-helpers + diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 183f666994d38..6247a1674f8d6 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -41,6 +41,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .listFiles()(0) .getAbsolutePath + private val HELPER_JAR = Paths.get("target", "integration-tests-spark-jobs-helpers") + .toFile + .listFiles()(0) + .getAbsolutePath + private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) private val MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + @@ -117,6 +122,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set("spark.kubernetes.namespace", NAMESPACE) .set("spark.kubernetes.driver.docker.image", "spark-driver:latest") .set("spark.kubernetes.executor.docker.image", "spark-executor:latest") + .set("spark.kubernetes.driver.uploads.jars", HELPER_JAR) .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") .set("spark.executors.instances", "1") @@ -142,6 +148,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--executor-memory", "512m", "--executor-cores", "1", "--num-executors", "1", + "--upload-jars", HELPER_JAR, "--class", MAIN_CLASS, "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", From 793143da121503d5a65bd1f6729dd3d7be2b3691 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 11 Jan 2017 16:05:16 -0800 Subject: [PATCH 011/225] Use alpine and java 8 for docker images. (#10) * Use alpine and java 8 for docker images. * Remove installation of vim and redundant comment --- .../src/main/docker/driver/Dockerfile | 11 +---------- .../src/main/docker/executor/Dockerfile | 11 +---------- 2 files changed, 2 insertions(+), 20 deletions(-) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 3bba38d8395ae..7bbabc40c34fc 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -1,12 +1,4 @@ -FROM ubuntu:trusty - -# Upgrade package index -# install a few other useful packages plus Open Jdk 7 -# Remove unneeded /var/lib/apt/lists/* after install to reduce the -# docker image size (by ~30MB) -RUN apt-get update && \ - apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server procps && \ - rm -rf /var/lib/apt/lists/* +FROM anapsix/alpine-java:8 RUN mkdir -p /opt/spark RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static @@ -18,7 +10,6 @@ ADD sbin /opt/spark/sbin ADD conf /opt/spark/conf ENV SPARK_HOME /opt/spark -ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64/jre WORKDIR /opt/spark diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index f68f1a3fb2694..f584525cdc5e9 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -1,12 +1,4 @@ -FROM ubuntu:trusty - -# Upgrade package index -# install a few other useful packages plus Open Jdk 7 -# Remove unneeded /var/lib/apt/lists/* after install to reduce the -# docker image size (by ~30MB) -RUN apt-get update && \ - apt-get install -y less openjdk-7-jre-headless net-tools vim-tiny sudo openssh-server procps && \ - rm -rf /var/lib/apt/lists/* +FROM anapsix/alpine-java:8 RUN mkdir -p /opt/spark RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static @@ -18,7 +10,6 @@ ADD sbin /opt/spark/sbin ADD conf /opt/spark/conf ENV SPARK_HOME /opt/spark -ENV JAVA_HOME /usr/lib/jvm/java-7-openjdk-amd64/jre WORKDIR /opt/spark From 2b1a99d3c5c3eda03f6011fc7071d8107f5a0927 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 11 Jan 2017 18:20:12 -0800 Subject: [PATCH 012/225] Copy the Dockerfiles from docker-minimal-bundle into the distribution. (#12) --- dev/make-distribution.sh | 7 +++++++ .../src/main/docker/driver/Dockerfile | 5 ++++- .../src/main/docker/executor/Dockerfile | 5 ++++- 3 files changed, 15 insertions(+), 2 deletions(-) diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index 48a824499acb9..b06bece03d4df 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -168,6 +168,13 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" # Copy jars cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/" +# Copy docker files +mkdir -p "$DISTDIR/dockerfiles/driver" +mkdir -p "$DISTDIR/dockerfiles/executor" +DOCKERFILES_SRC="$SPARK_HOME/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker" +cp "$DOCKERFILES_SRC/driver/Dockerfile" "$DISTDIR/dockerfiles/driver/Dockerfile" +cp "$DOCKERFILES_SRC/executor/Dockerfile" "$DISTDIR/dockerfiles/executor/Dockerfile" + # Only create the yarn directory if the yarn artifacts were build. if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then mkdir "$DISTDIR/yarn" diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 7bbabc40c34fc..308bf392fb202 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -1,7 +1,10 @@ FROM anapsix/alpine-java:8 +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . + RUN mkdir -p /opt/spark -RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static RUN touch /opt/spark/RELEASE ADD jars /opt/spark/jars diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index f584525cdc5e9..164c0a4289cac 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -1,7 +1,10 @@ FROM anapsix/alpine-java:8 +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . + RUN mkdir -p /opt/spark -RUN mkdir -p /opt/spark/ui-resources/org/apache/spark/ui/static RUN touch /opt/spark/RELEASE ADD jars /opt/spark/jars From 457ebd8ce7017cea0d36c44f0e3a5f364a33705a Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Thu, 12 Jan 2017 14:11:02 -0800 Subject: [PATCH 013/225] inherit IO (#13) --- .../deploy/rest/kubernetes/KubernetesSparkRestServer.scala | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 2ca3d4a8c0656..837706ca9f5a8 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -164,10 +164,7 @@ private[spark] class KubernetesSparkRestServer( command += s"-Xmx$driverMemory" command += mainClass command ++= appArgs - val pb = new ProcessBuilder(command: _*) - Paths.get(sparkHome, "logs").toFile.mkdirs - pb.redirectOutput(Paths.get(sparkHome, "logs", "stdout").toFile) - pb.redirectError(Paths.get(sparkHome, "logs", "stderr").toFile) + val pb = new ProcessBuilder(command: _*).inheritIO() val process = pb.start() ShutdownHookManager.addShutdownHook(() => { logInfo("Received stop command, shutting down the running Spark application...") From 94ab8dd7ea74cb0b216cb470386e071084afe948 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 12 Jan 2017 17:59:11 -0800 Subject: [PATCH 014/225] Error messages when the driver container fails to start. (#11) * Error messages when the driver container fails to start. * Fix messages a bit * Use timeout constant * Delete the pod if it fails for any reason (not just timeout) * Actually set submit succeeded * Fix typo --- .../spark/deploy/kubernetes/Client.scala | 83 +++++++++++++++++-- 1 file changed, 77 insertions(+), 6 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 230598d63bed1..6d7de973a52c2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.kubernetes import java.io.File import java.security.SecureRandom -import java.util.concurrent.{Executors, TimeUnit} +import java.util.concurrent.{Executors, TimeoutException, TimeUnit} import javax.net.ssl.X509TrustManager import com.google.common.io.Files @@ -34,7 +34,7 @@ import scala.concurrent.ExecutionContext import scala.concurrent.duration.DurationInt import scala.util.Success -import org.apache.spark.{SPARK_VERSION, SparkConf} +import org.apache.spark.{SPARK_VERSION, SparkConf, SparkException} import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging @@ -130,8 +130,8 @@ private[spark] class Client( val podWatcher = new Watcher[Pod] { override def eventReceived(action: Action, t: Pod): Unit = { if ((action == Action.ADDED || action == Action.MODIFIED) - && t.getStatus.getPhase == "Running" - && !submitCompletedFuture.isDone) { + && t.getStatus.getPhase == "Running" + && !submitCompletedFuture.isDone) { t.getStatus .getContainerStatuses .asScala @@ -216,8 +216,78 @@ private[spark] class Client( .endContainer() .endSpec() .done() - submitCompletedFuture.get(30, TimeUnit.SECONDS) - } + var submitSucceeded = false + try { + submitCompletedFuture.get(LAUNCH_TIMEOUT_SECONDS, TimeUnit.SECONDS) + submitSucceeded = true + } catch { + case e: TimeoutException => + val driverPod = try { + kubernetesClient.pods().withName(kubernetesAppId).get() + } catch { + case throwable: Throwable => + logError(s"Timed out while waiting $LAUNCH_TIMEOUT_SECONDS seconds for the" + + " driver pod to start, but an error occurred while fetching the driver" + + " pod's details.", throwable) + throw new SparkException(s"Timed out while waiting $LAUNCH_TIMEOUT_SECONDS" + + " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + + " the latest state of the pod, another error was thrown. Check the logs for" + + " the error that was thrown in looking up the driver pod.", e) + } + val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + + s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + + s" $LAUNCH_TIMEOUT_SECONDS seconds." + val podStatusPhase = if (driverPod.getStatus.getPhase != null) { + s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" + } else { + "The pod had no final phase." + } + val podStatusMessage = if (driverPod.getStatus.getMessage != null) { + s"Latest message from the pod is: ${driverPod.getStatus.getMessage}" + } else { + "The pod had no final message." + } + val failedDriverContainerStatusString = driverPod.getStatus + .getContainerStatuses + .asScala + .find(_.getName == DRIVER_LAUNCHER_CONTAINER_NAME) + .map(status => { + val lastState = status.getState + if (lastState.getRunning != null) { + "Driver container last state: Running\n" + + s"Driver container started at: ${lastState.getRunning.getStartedAt}" + } else if (lastState.getWaiting != null) { + "Driver container last state: Waiting\n" + + s"Driver container wait reason: ${lastState.getWaiting.getReason}\n" + + s"Driver container message: ${lastState.getWaiting.getMessage}\n" + } else if (lastState.getTerminated != null) { + "Driver container last state: Terminated\n" + + s"Driver container started at: ${lastState.getTerminated.getStartedAt}\n" + + s"Driver container finished at: ${lastState.getTerminated.getFinishedAt}\n" + + s"Driver container exit reason: ${lastState.getTerminated.getReason}\n" + + s"Driver container exit code: ${lastState.getTerminated.getExitCode}\n" + + s"Driver container message: ${lastState.getTerminated.getMessage}" + } else { + "Driver container last state: Unknown" + } + }).getOrElse("The driver container wasn't found in the pod; expected to find" + + s" container with name $DRIVER_LAUNCHER_CONTAINER_NAME") + val finalErrorMessage = s"$topLevelMessage\n" + + s"$podStatusPhase\n" + + s"$podStatusMessage\n\n$failedDriverContainerStatusString" + logError(finalErrorMessage, e) + throw new SparkException(finalErrorMessage, e) + } finally { + if (!submitSucceeded) { + try { + kubernetesClient.pods.withName(kubernetesAppId).delete + } catch { + case throwable: Throwable => + logError("Failed to delete driver pod after it failed to run.", throwable) + } + } + } + } Utils.tryWithResource(kubernetesClient .pods() @@ -338,6 +408,7 @@ private object Client { private val DRIVER_LAUNCHER_CONTAINER_NAME = "spark-kubernetes-driver-launcher" private val SECURE_RANDOM = new SecureRandom() private val SPARK_SUBMISSION_SECRET_BASE_DIR = "/var/run/secrets/spark-submission" + private val LAUNCH_TIMEOUT_SECONDS = 30 def main(args: Array[String]): Unit = { require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + From 7afadb32e505585490c0d43859afb21f805a3c1a Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Thu, 12 Jan 2017 19:27:44 -0800 Subject: [PATCH 015/225] Fix linter error to make CI happy (#18) --- .../org/apache/spark/launcher/SparkSubmitOptionParser.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java index 94f9bc319b6a2..2b7290a12f8c1 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java @@ -80,7 +80,8 @@ class SparkSubmitOptionParser { protected final String KUBERNETES_MASTER = "--kubernetes-master"; protected final String KUBERNETES_NAMESPACE = "--kubernetes-namespace"; protected final String KUBERNETES_UPLOAD_JARS = "--upload-jars"; - protected final String KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH = "--upload-driver-extra-classpath"; + protected final String KUBERNETES_UPLOAD_DRIVER_EXTRA_CLASSPATH = + "--upload-driver-extra-classpath"; /** * This is the canonical list of spark-submit options. Each entry in the array contains the From 909b28123e6f32ac1ba42f84e2741deb29ad5b6b Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 13 Jan 2017 14:11:08 -0800 Subject: [PATCH 016/225] Documentation for the current state of the world (#16) * Documentation for the current state of the world. * Adding navigation links from other pages * Address comments, add TODO for things that should be fixed * Address comments, mostly making images section clearer * Virtual runtime -> container runtime --- docs/_layouts/global.html | 1 + docs/running-on-kubernetes.md | 224 ++++++++++++++++++++++++++++++++++ 2 files changed, 225 insertions(+) create mode 100644 docs/running-on-kubernetes.md diff --git a/docs/_layouts/global.html b/docs/_layouts/global.html index c00d0db63cd10..3c786a6344066 100755 --- a/docs/_layouts/global.html +++ b/docs/_layouts/global.html @@ -99,6 +99,7 @@
  • Spark Standalone
  • Mesos
  • YARN
  • +
  • Kubernetes
  • diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md new file mode 100644 index 0000000000000..5192d9d086618 --- /dev/null +++ b/docs/running-on-kubernetes.md @@ -0,0 +1,224 @@ +--- +layout: global +title: Running Spark on Kubernetes +--- + +Support for running on [Kubernetes](https://kubernetes.io/) is available in experimental status. The feature set is +currently limited and not well-tested. This should not be used in production environments. + +## Setting Up Docker Images + +Kubernetes requires users to supply images that can be deployed into containers within pods. The images are built to +be run in a container runtime environment that Kubernetes supports. Docker is a container runtime environment that is +frequently used with Kubernetes, so Spark provides some support for working with Docker to get started quickly. + +To use Spark on Kubernetes with Docker, images for the driver and the executors need to built and published to an +accessible Docker registry. Spark distributions include the Docker files for the driver and the executor at +`dockerfiles/driver/Dockerfile` and `docker/executor/Dockerfile`, respectively. Use these Docker files to build the +Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images to the +registry. + +For example, if the registry host is `registry-host` and the registry is listening on port 5000: + + cd $SPARK_HOME + docker build -t registry-host:5000/spark-driver:latest -f dockerfiles/driver/Dockerfile . + docker build -t registry-host:5000/spark-executor:latest -f dockerfiles/executor/Dockerfile . + docker push registry-host:5000/spark-driver:latest + docker push registry-host:5000/spark-executor:latest + +## Submitting Applications to Kubernetes + +Kubernetes applications can be executed via `spark-submit`. For example, to compute the value of pi, assuming the images +are set up as described above: + + bin/spark-submit + --deploy-mode cluster + --class org.apache.spark.examples.SparkPi + --master k8s://https://: + --kubernetes-namespace default + --conf spark.executor.instances=5 + --conf spark.app.name=spark-pi + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + examples/jars/spark_2.11-2.2.0.jar + + +The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting +`spark.master` in the application's configuration, must be a URL with the format `k8s://`. Prefixing the +master string with `k8s://` will cause the Spark application to launch on the Kubernetes cluster, with the API server +being contacted at `api_server_url`. The HTTP protocol must also be specified. + +Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on +the cluster. + +### Adding Other JARs + +Spark allows users to provide dependencies that are bundled into the driver's Docker image, or that are on the local +disk of the submitter's machine. These two types of dependencies are specified via different configuration options to +`spark-submit`: + +* Local jars provided by specifying the `--jars` command line argument to `spark-submit`, or by setting `spark.jars` in + the application's configuration, will be treated as jars that are located on the *disk of the driver Docker + container*. This only applies to jar paths that do not specify a scheme or that have the scheme `file://`. Paths with + other schemes are fetched from their appropriate locations. +* Local jars provided by specifying the `--upload-jars` command line argument to `spark-submit`, or by setting + `spark.kubernetes.driver.uploads.jars` in the application's configuration, will be treated as jars that are located on + the *disk of the submitting machine*. These jars are uploaded to the driver docker container before executing the + application. + +* A main application resource path that does not have a scheme or that has the scheme `file://` is assumed to be on the + *disk of the submitting machine*. This resource is uploaded to the driver docker container before executing the + application. A remote path can still be specified and the resource will be fetched from the appropriate location. + +In all of these cases, the jars are placed on the driver's classpath, and are also sent to the executors. Below are some +examples of providing application dependencies. + +To submit an application with both the main resource and two other jars living on the submitting user's machine: + + bin/spark-submit + --deploy-mode cluster + --class com.example.applications.SampleApplication + --master k8s://https://192.168.99.100 + --kubernetes-namespace default + --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + /home/exampleuser/exampleapplication/main.jar + +Note that since passing the jars through the `--upload-jars` command line argument is equivalent to setting the +`spark.kubernetes.driver.uploads.jars` Spark property, the above will behave identically to this command: + + bin/spark-submit + --deploy-mode cluster + --class com.example.applications.SampleApplication + --master k8s://https://192.168.99.100 + --kubernetes-namespace default + --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + /home/exampleuser/exampleapplication/main.jar + +To specify a main application resource that can be downloaded from an HTTP service, and if a plugin for that application +is located in the jar `/opt/spark-plugins/app-plugin.jar` on the docker image's disk: + + bin/spark-submit + --deploy-mode cluster + --class com.example.applications.PluggableApplication + --master k8s://https://192.168.99.100 + --kubernetes-namespace default + --jars /opt/spark-plugins/app-plugin.jar + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + http://example.com:8080/applications/sparkpluggable/app.jar + +Note that since passing the jars through the `--jars` command line argument is equivalent to setting the `spark.jars` +Spark property, the above will behave identically to this command: + + bin/spark-submit + --deploy-mode cluster + --class com.example.applications.PluggableApplication + --master k8s://https://192.168.99.100 + --kubernetes-namespace default + --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + http://example.com:8080/applications/sparkpluggable/app.jar + +### Spark Properties + +Below are some other common properties that are specific to Kubernetes. Most of the other configurations are the same +from the other deployment modes. See the [configuration page](configuration.html) for more information on those. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
    Property NameDefaultMeaning
    spark.kubernetes.namespace(none) + The namespace that will be used for running the driver and executor pods. Must be specified. When using + spark-submit in cluster mode, this can also be passed to spark-submit via the + --kubernetes-namespace command line argument. +
    spark.kubernetes.driver.docker.imagespark-driver:2.2.0 + Docker image to use for the driver. Specify this using the standard + Docker tag format. +
    spark.kubernetes.executor.docker.imagespark-executor:2.2.0 + Docker image to use for the executors. Specify this using the standard + Docker tag format. +
    spark.kubernetes.submit.caCertFile(none) + CA cert file for connecting to Kubernetes over SSL. This file should be located on the submitting machine's disk. +
    spark.kubernetes.submit.clientKeyFile(none) + Client key file for authenticating against the Kubernetes API server. This file should be located on the submitting + machine's disk. +
    spark.kubernetes.submit.clientCertFile(none) + Client cert file for authenticating against the Kubernetes API server. This file should be located on the submitting + machine's disk. +
    spark.kubernetes.submit.serviceAccountNamedefault + Service account that is used when running the driver pod. The driver pod uses this service account when requesting + executor pods from the API server. +
    spark.kubernetes.driver.uploads.jars(none) + Comma-separated list of jars to sent to the driver and all executors when submitting the application in cluster + mode. Refer to adding other jars for more information. +
    spark.kubernetes.driver.uploads.driverExtraClasspath(none) + Comma-separated list of jars to be sent to the driver only when submitting the application in cluster mode. +
    spark.kubernetes.executor.memoryOverheadexecutorMemory * 0.10, with minimum of 384 + The amount of off-heap memory (in megabytes) to be allocated per executor. This is memory that accounts for things + like VM overheads, interned strings, other native overheads, etc. This tends to grow with the executor size + (typically 6-10%). +
    + +## Current Limitations + +Running Spark on Kubernetes is currently an experimental feature. Some restrictions on the current implementation that +should be lifted in the future include: +* Applications can only use a fixed number of executors. Dynamic allocation is not supported. +* Applications can only run in cluster mode. +* Only Scala and Java applications can be run. From 77b287e3ca65aa2813b2e365b6e45221c5f46d5b Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 13 Jan 2017 14:56:08 -0800 Subject: [PATCH 017/225] Development workflow documentation for the current state of the world. (#20) * Development workflow documentation for the current state of the world. * Address comments. * Clarified code change and added ticket link --- resource-managers/kubernetes/README.md | 56 ++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 resource-managers/kubernetes/README.md diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md new file mode 100644 index 0000000000000..3c11efa38d5af --- /dev/null +++ b/resource-managers/kubernetes/README.md @@ -0,0 +1,56 @@ +--- +layout: global +title: Spark on Kubernetes Development +--- + +[Kubernetes](https://kubernetes.io/) is a framework for easily deploying, scaling, and managing containerized +applications. It would be useful for a user to run their Spark jobs on a Kubernetes cluster alongside their +other Kubernetes-managed applications. For more about the motivations for adding this feature, see the umbrella JIRA +ticket that tracks this project: [SPARK-18278](https://issues.apache.org/jira/browse/SPARK-18278). + +This submodule is an initial implementation of allowing Kubernetes to be a +supported cluster manager for Spark, along with Mesos, Hadoop YARN, and Standalone. This document provides a summary of +important matters to keep in mind when developing this feature. + +# Building Spark with Kubernetes Support + +To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. For example, to simply compile +the Kubernetes core implementation module along with its dependencies: + + build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am + +To build a distribution of Spark with Kubernetes support, use the `dev/make-distribution.sh` script, and add the +`kubernetes` profile as part of the build arguments. Any other build arguments can be specified as one would expect when +building Spark normally. For example, to build Spark against Hadoop 2.7 and Kubernetes: + + dev/make-distribution.sh --tgz -Phadoop2.7 -Pkubernetes + +# Kubernetes Code Modules + +Below is a list of the submodules for this cluster manager and what they do. + +* `core`: Implementation of the Kubernetes cluster manager support. +* `integration-tests`: Integration tests for the project. +* `docker-minimal-bundle`: Base Dockerfiles for the driver and the executors. The Dockerfiles are used for integration + tests as well as being provided in packaged distributions of Spark. +* `integration-tests-spark-jobs`: Spark jobs that are only used in integration tests. +* `integration-tests-spark-jobs-helpers`: Dependencies for the spark jobs used in integration tests. These dependencies + are separated out to facilitate testing the shipping of jars to drivers running on Kubernetes clusters. + +# Running the Kubernetes Integration Tests + +Note that the integration test framework is currently being heavily revised and is subject to change. + +Running any of the integration tests requires including `kubernetes-integration-tests` profile in the build command. In +order to prepare the environment for running the integration tests, the `pre-integration-test` step must be run in Maven +on the `resource-managers/kubernetes/integration-tests` module: + + build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am + +Afterwards, the integration tests can be executed with Maven or your IDE. Note that when running tests from an IDE, the +`pre-integration-test` phase must be run every time the Spark main code changes. When running tests from the +command line, the `pre-integration-test` phase should automatically be invoked if the `integration-test` phase is run. + +# Usage Guide + +See the [usage guide](../../docs/running-on-kubernetes.md) for more information. From 0bcc391912c6738f050f419d8dca96ab9aff08e6 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Fri, 13 Jan 2017 15:05:22 -0800 Subject: [PATCH 018/225] Added service name as prefix to executor pods (#14) * Added service name as prefix to executor pods to be able to tell them apart from kubectl output * Addressed comments --- .../cluster/kubernetes/KubernetesClusterSchedulerBackend.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 4e099cea3198b..2717d2f37d910 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -155,7 +155,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private def allocateNewExecutorPod(): (String, Pod) = { val executorKubernetesId = UUID.randomUUID().toString.replaceAll("-", "") val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString - val name = s"exec$executorKubernetesId" + val name = s"$kubernetesDriverServiceName-exec-$executorKubernetesId" val selectors = Map(SPARK_EXECUTOR_SELECTOR -> executorId, SPARK_APP_SELECTOR -> applicationId()).asJava val executorMemoryQuantity = new QuantityBuilder(false) From 979fa92029472355c67d66fa8b7542329b9cb16e Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Fri, 13 Jan 2017 20:44:56 -0800 Subject: [PATCH 019/225] Add kubernetes profile to travis CI yml file (#21) * Add kubernetes profile to travis yml file * Fix long lines in CompressionUtils.scala --- .travis.yml | 2 +- .../deploy/rest/kubernetes/CompressionUtils.scala | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/.travis.yml b/.travis.yml index d7e9f8c0290e8..4f7ec46947e27 100644 --- a/.travis.yml +++ b/.travis.yml @@ -43,7 +43,7 @@ notifications: # 5. Run maven install before running lint-java. install: - export MAVEN_SKIP_RC=1 - - build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Pkinesis-asl -Phive -Phive-thriftserver install + - build/mvn -T 4 -q -DskipTests -Pmesos -Pyarn -Phadoop-2.3 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver install # 6. Run lint-java. script: diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala index 805a52bada219..1c95dacc7eb01 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala @@ -38,11 +38,11 @@ private[spark] object CompressionUtils extends Logging { /** * Compresses all of the given paths into a gzipped-tar archive, returning the compressed data in - * memory as an instance of {@link TarGzippedData}. The files are taken without consideration to their - * original folder structure, and are added to the tar archive in a flat hierarchy. Directories are - * not allowed, and duplicate file names are de-duplicated by appending a numeric suffix to the file name, - * before the file extension. For example, if paths a/b.txt and b/b.txt were provided, then the files added - * to the tar archive would be b.txt and b-1.txt. + * memory as an instance of {@link TarGzippedData}. The files are taken without consideration to + * their original folder structure, and are added to the tar archive in a flat hierarchy. + * Directories are not allowed, and duplicate file names are de-duplicated by appending a numeric + * suffix to the file name, before the file extension. For example, if paths a/b.txt and b/b.txt + * were provided, then the files added to the tar archive would be b.txt and b-1.txt. * @param paths A list of file paths to be archived * @return An in-memory representation of the compressed data. */ From 087555a1fc62330b50eff2b1d37e4176190d307e Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Tue, 17 Jan 2017 17:24:58 +0000 Subject: [PATCH 020/225] Improved the example commands in running-on-k8s document. (#25) * Improved the example commands in running-on-k8s document. * Fixed more example commands. * Fixed typo. --- docs/running-on-kubernetes.md | 84 +++++++++++++++++------------------ 1 file changed, 42 insertions(+), 42 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5192d9d086618..234c9870548c7 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -31,16 +31,16 @@ For example, if the registry host is `registry-host` and the registry is listeni Kubernetes applications can be executed via `spark-submit`. For example, to compute the value of pi, assuming the images are set up as described above: - bin/spark-submit - --deploy-mode cluster - --class org.apache.spark.examples.SparkPi - --master k8s://https://: - --kubernetes-namespace default - --conf spark.executor.instances=5 - --conf spark.app.name=spark-pi - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest - examples/jars/spark_2.11-2.2.0.jar + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.SparkPi \ + --master k8s://https://: \ + --kubernetes-namespace default \ + --conf spark.executor.instances=5 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ + examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting @@ -75,53 +75,53 @@ examples of providing application dependencies. To submit an application with both the main resource and two other jars living on the submitting user's machine: - bin/spark-submit - --deploy-mode cluster - --class com.example.applications.SampleApplication - --master k8s://https://192.168.99.100 - --kubernetes-namespace default - --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.SampleApplication \ + --master k8s://https://192.168.99.100 \ + --kubernetes-namespace default \ + --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ /home/exampleuser/exampleapplication/main.jar Note that since passing the jars through the `--upload-jars` command line argument is equivalent to setting the `spark.kubernetes.driver.uploads.jars` Spark property, the above will behave identically to this command: - bin/spark-submit - --deploy-mode cluster - --class com.example.applications.SampleApplication - --master k8s://https://192.168.99.100 - --kubernetes-namespace default - --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.SampleApplication \ + --master k8s://https://192.168.99.100 \ + --kubernetes-namespace default \ + --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ /home/exampleuser/exampleapplication/main.jar To specify a main application resource that can be downloaded from an HTTP service, and if a plugin for that application is located in the jar `/opt/spark-plugins/app-plugin.jar` on the docker image's disk: - bin/spark-submit - --deploy-mode cluster - --class com.example.applications.PluggableApplication - --master k8s://https://192.168.99.100 - --kubernetes-namespace default - --jars /opt/spark-plugins/app-plugin.jar - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.PluggableApplication \ + --master k8s://https://192.168.99.100 \ + --kubernetes-namespace default \ + --jars /opt/spark-plugins/app-plugin.jar \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ http://example.com:8080/applications/sparkpluggable/app.jar Note that since passing the jars through the `--jars` command line argument is equivalent to setting the `spark.jars` Spark property, the above will behave identically to this command: - bin/spark-submit - --deploy-mode cluster - --class com.example.applications.PluggableApplication - --master k8s://https://192.168.99.100 - --kubernetes-namespace default - --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.PluggableApplication \ + --master k8s://https://192.168.99.100 \ + --kubernetes-namespace default \ + --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ http://example.com:8080/applications/sparkpluggable/app.jar ### Spark Properties From a89b4b0da2813a9fd8ad449b03d35af2ef496071 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 18 Jan 2017 11:24:43 -0800 Subject: [PATCH 021/225] Fix spacing for command highlighting (#31) --- resource-managers/kubernetes/README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index 3c11efa38d5af..62764dcb2ca03 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -45,7 +45,7 @@ Running any of the integration tests requires including `kubernetes-integration- order to prepare the environment for running the integration tests, the `pre-integration-test` step must be run in Maven on the `resource-managers/kubernetes/integration-tests` module: - build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am + build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am Afterwards, the integration tests can be executed with Maven or your IDE. Note that when running tests from an IDE, the `pre-integration-test` phase must be run every time the Spark main code changes. When running tests from the From 85f02bfe1aa34eb0b56522de788b2eed1fd42746 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 18 Jan 2017 17:30:09 -0800 Subject: [PATCH 022/225] Support custom labels on the driver pod. (#27) * Support custom labels on the driver pod. * Add integration test and fix logic. * Fix tests * Fix minor formatting mistake * Reduce unnecessary diff --- docs/running-on-kubernetes.md | 8 +++++ .../spark/deploy/kubernetes/Client.scala | 35 +++++++++++++++---- .../integrationtest/KubernetesSuite.scala | 34 ++++++++++++++++++ 3 files changed, 70 insertions(+), 7 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 234c9870548c7..14e2df4ed0702 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -213,6 +213,14 @@ from the other deployment modes. See the [configuration page](configuration.html (typically 6-10%). + + spark.kubernetes.driver.labels + (none) + + Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, + where each label is in the format key=value. + + ## Current Limitations diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 6d7de973a52c2..073afcbba7b52 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -77,6 +77,8 @@ private[spark] class Client( private val serviceAccount = sparkConf.get("spark.kubernetes.submit.serviceAccountName", "default") + private val customLabels = sparkConf.get("spark.kubernetes.driver.labels", "") + private implicit val retryableExecutionContext = ExecutionContext .fromExecutorService( Executors.newSingleThreadExecutor(new ThreadFactoryBuilder() @@ -85,6 +87,7 @@ private[spark] class Client( .build())) def run(): Unit = { + val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(master) @@ -109,14 +112,15 @@ private[spark] class Client( .withType("Opaque") .done() try { - val selectors = Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue).asJava + val resolvedSelectors = (Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue) + ++ parsedCustomLabels).asJava val (servicePorts, containerPorts) = configurePorts() val service = kubernetesClient.services().createNew() .withNewMetadata() .withName(kubernetesAppId) .endMetadata() .withNewSpec() - .withSelector(selectors) + .withSelector(resolvedSelectors) .withPorts(servicePorts.asJava) .endSpec() .done() @@ -137,7 +141,7 @@ private[spark] class Client( .asScala .find(status => status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { - case Some(status) => + case Some(_) => try { val driverLauncher = getDriverLauncherService( k8ClientConfig, master) @@ -184,7 +188,7 @@ private[spark] class Client( kubernetesClient.pods().createNew() .withNewMetadata() .withName(kubernetesAppId) - .withLabels(selectors) + .withLabels(resolvedSelectors) .endMetadata() .withNewSpec() .withRestartPolicy("OnFailure") @@ -291,7 +295,7 @@ private[spark] class Client( Utils.tryWithResource(kubernetesClient .pods() - .withLabels(selectors) + .withLabels(resolvedSelectors) .watch(podWatcher)) { createDriverPod } } finally { kubernetesClient.secrets().delete(secret) @@ -336,7 +340,7 @@ private[spark] class Client( .getOption("spark.ui.port") .map(_.toInt) .getOrElse(DEFAULT_UI_PORT)) - (servicePorts.toSeq, containerPorts.toSeq) + (servicePorts, containerPorts) } private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { @@ -366,7 +370,7 @@ private[spark] class Client( uploadedJarsBase64Contents = uploadJarsBase64Contents) } - def compressJars(maybeFilePaths: Option[String]): Option[TarGzippedData] = { + private def compressJars(maybeFilePaths: Option[String]): Option[TarGzippedData] = { maybeFilePaths .map(_.split(",")) .map(CompressionUtils.createTarGzip(_)) @@ -391,6 +395,23 @@ private[spark] class Client( sslSocketFactory = sslContext.getSocketFactory, trustContext = trustManager) } + + private def parseCustomLabels(labels: String): Map[String, String] = { + labels.split(",").map(_.trim).filterNot(_.isEmpty).map(label => { + label.split("=", 2).toSeq match { + case Seq(k, v) => + require(k != DRIVER_LAUNCHER_SELECTOR_LABEL, "Label with key" + + s" $DRIVER_LAUNCHER_SELECTOR_LABEL cannot be used in" + + " spark.kubernetes.driver.labels, as it is reserved for Spark's" + + " internal configuration.") + (k, v) + case _ => + throw new SparkException("Custom labels set by spark.kubernetes.driver.labels" + + " must be a comma-separated list of key-value pairs, with format =." + + s" Got label: $label. All labels: $labels") + } + }).toMap + } } private object Client { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 6247a1674f8d6..7b3c2b93b865b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -161,4 +161,38 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "spark-pi", NAMESPACE, "spark-ui-port") expectationsForStaticAllocation(sparkMetricsService) } + + test("Run with custom labels") { + val args = Array( + "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", + "--deploy-mode", "cluster", + "--kubernetes-namespace", NAMESPACE, + "--name", "spark-pi", + "--executor-memory", "512m", + "--executor-cores", "1", + "--num-executors", "1", + "--upload-jars", HELPER_JAR, + "--class", MAIN_CLASS, + "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", + "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", + "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", + "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.kubernetes.driver.labels=label1=label1value,label2=label2value", + EXAMPLES_JAR) + SparkSubmit.main(args) + val driverPodLabels = minikubeKubernetesClient + .pods + .withName("spark-pi") + .get + .getMetadata + .getLabels + // We can't match all of the selectors directly since one of the selectors is based on the + // launch time. + assert(driverPodLabels.size == 3, "Unexpected number of pod labels.") + assert(driverPodLabels.containsKey("driver-launcher-selector"), "Expected driver launcher" + + " selector label to be present.") + assert(driverPodLabels.get("label1") == "label1value", "Unexpected value for label1") + assert(driverPodLabels.get("label2") == "label2value", "Unexpected value for label2") + } } From f71abc1df5d84006456f14b4192047587ac050bf Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 18 Jan 2017 17:34:02 -0800 Subject: [PATCH 023/225] Make pod name unique using the submission timestamp (#32) --- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 073afcbba7b52..30eaa6269cf47 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -56,10 +56,10 @@ private[spark] class Client( private val master = rawMaster.replaceFirst("k8s://", "") private val launchTime = System.currentTimeMillis - private val kubernetesAppId = sparkConf.getOption("spark.app.name") + private val appName = sparkConf.getOption("spark.app.name") .orElse(sparkConf.getOption("spark.app.id")) - .getOrElse(s"spark-$launchTime") - + .getOrElse("spark") + private val kubernetesAppId = s"$appName-$launchTime" private val secretName = s"spark-submission-server-secret-$kubernetesAppId" private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" private val driverDockerImage = sparkConf.get( From 95747bc2d6335fd8233b16f5dfa4c7ea011b9d0b Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 23 Jan 2017 18:02:45 -0800 Subject: [PATCH 024/225] A number of small tweaks to the MVP. (#23) * A number of small tweaks to the MVP. - Master protocol defaults to https if not specified - Removed upload driver extra classpath functionality - Added ability to specify main app resource with container:// URI - Updated docs to reflect all of the above - Add examples to Docker images, mostly for integration testing but could be useful for easily getting started without shipping anything * Add example to documentation. --- docs/running-on-kubernetes.md | 49 +++-- .../spark/deploy/kubernetes/Client.scala | 40 ++-- .../rest/KubernetesRestProtocolMessages.scala | 4 +- .../KubernetesSparkRestServer.scala | 183 ++++++++++-------- .../KubernetesClusterSchedulerBackend.scala | 9 +- .../kubernetes/docker-minimal-bundle/pom.xml | 7 + .../src/main/assembly/driver-assembly.xml | 20 +- .../src/main/assembly/executor-assembly.xml | 11 ++ .../src/main/docker/driver/Dockerfile | 1 + .../src/main/docker/executor/Dockerfile | 1 + .../integrationtest/KubernetesSuite.scala | 104 +++++++++- 11 files changed, 287 insertions(+), 142 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 14e2df4ed0702..5a73b1ad1ea29 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -42,11 +42,12 @@ are set up as described above: --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ examples/jars/spark_examples_2.11-2.2.0.jar - The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting `spark.master` in the application's configuration, must be a URL with the format `k8s://`. Prefixing the master string with `k8s://` will cause the Spark application to launch on the Kubernetes cluster, with the API server -being contacted at `api_server_url`. The HTTP protocol must also be specified. +being contacted at `api_server_url`. If no HTTP protocol is specified in the URL, it defaults to `https`. For example, +setting the master to `k8s://example.com:443` is equivalent to setting it to `k8s://https://example.com:443`, but to +connect without SSL on a different port, the master would be set to `k8s://http://example.com:8443`. Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. @@ -58,17 +59,18 @@ disk of the submitter's machine. These two types of dependencies are specified v `spark-submit`: * Local jars provided by specifying the `--jars` command line argument to `spark-submit`, or by setting `spark.jars` in - the application's configuration, will be treated as jars that are located on the *disk of the driver Docker - container*. This only applies to jar paths that do not specify a scheme or that have the scheme `file://`. Paths with - other schemes are fetched from their appropriate locations. + the application's configuration, will be treated as jars that are located on the *disk of the driver container*. This + only applies to jar paths that do not specify a scheme or that have the scheme `file://`. Paths with other schemes are + fetched from their appropriate locations. * Local jars provided by specifying the `--upload-jars` command line argument to `spark-submit`, or by setting `spark.kubernetes.driver.uploads.jars` in the application's configuration, will be treated as jars that are located on the *disk of the submitting machine*. These jars are uploaded to the driver docker container before executing the application. - * A main application resource path that does not have a scheme or that has the scheme `file://` is assumed to be on the *disk of the submitting machine*. This resource is uploaded to the driver docker container before executing the application. A remote path can still be specified and the resource will be fetched from the appropriate location. +* A main application resource path that has the scheme `container://` is assumed to be on the *disk of the driver + container*. In all of these cases, the jars are placed on the driver's classpath, and are also sent to the executors. Below are some examples of providing application dependencies. @@ -78,8 +80,7 @@ To submit an application with both the main resource and two other jars living o bin/spark-submit \ --deploy-mode cluster \ --class com.example.applications.SampleApplication \ - --master k8s://https://192.168.99.100 \ - --kubernetes-namespace default \ + --master k8s://192.168.99.100 \ --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ @@ -91,8 +92,7 @@ Note that since passing the jars through the `--upload-jars` command line argume bin/spark-submit \ --deploy-mode cluster \ --class com.example.applications.SampleApplication \ - --master k8s://https://192.168.99.100 \ - --kubernetes-namespace default \ + --master k8s://192.168.99.100 \ --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ @@ -104,8 +104,7 @@ is located in the jar `/opt/spark-plugins/app-plugin.jar` on the docker image's bin/spark-submit \ --deploy-mode cluster \ --class com.example.applications.PluggableApplication \ - --master k8s://https://192.168.99.100 \ - --kubernetes-namespace default \ + --master k8s://192.168.99.100 \ --jars /opt/spark-plugins/app-plugin.jar \ --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ @@ -117,13 +116,22 @@ Spark property, the above will behave identically to this command: bin/spark-submit \ --deploy-mode cluster \ --class com.example.applications.PluggableApplication \ - --master k8s://https://192.168.99.100 \ - --kubernetes-namespace default \ + --master k8s://192.168.99.100 \ --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar \ --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ http://example.com:8080/applications/sparkpluggable/app.jar +To specify a main application resource that is in the Docker image, and if it has no other dependencies: + + bin/spark-submit \ + --deploy-mode cluster \ + --class com.example.applications.PluggableApplication \ + --master k8s://192.168.99.100:8443 \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ + container:///home/applications/examples/example.jar + ### Spark Properties Below are some other common properties that are specific to Kubernetes. Most of the other configurations are the same @@ -133,10 +141,9 @@ from the other deployment modes. See the [configuration page](configuration.html Property NameDefaultMeaning spark.kubernetes.namespace - - (none) + default - The namespace that will be used for running the driver and executor pods. Must be specified. When using + The namespace that will be used for running the driver and executor pods. When using spark-submit in cluster mode, this can also be passed to spark-submit via the --kubernetes-namespace command line argument. @@ -196,14 +203,6 @@ from the other deployment modes. See the [configuration page](configuration.html mode. Refer to adding other jars for more information. - - - spark.kubernetes.driver.uploads.driverExtraClasspath - (none) - - Comma-separated list of jars to be sent to the driver only when submitting the application in cluster mode. - - spark.kubernetes.executor.memoryOverhead executorMemory * 0.10, with minimum of 384 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 30eaa6269cf47..fe3256b9e12be 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -35,7 +35,7 @@ import scala.concurrent.duration.DurationInt import scala.util.Success import org.apache.spark.{SPARK_VERSION, SparkConf, SparkException} -import org.apache.spark.deploy.rest.{AppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} +import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -47,13 +47,8 @@ private[spark] class Client( appArgs: Array[String]) extends Logging { import Client._ - private val namespace = sparkConf.getOption("spark.kubernetes.namespace").getOrElse( - throw new IllegalArgumentException("Namespace must be provided in spark.kubernetes.namespace")) - private val rawMaster = sparkConf.get("spark.master") - if (!rawMaster.startsWith("k8s://")) { - throw new IllegalArgumentException("Master should be a URL with scheme k8s://") - } - private val master = rawMaster.replaceFirst("k8s://", "") + private val namespace = sparkConf.get("spark.kubernetes.namespace", "default") + private val master = resolveK8sMaster(sparkConf.get("spark.master")) private val launchTime = System.currentTimeMillis private val appName = sparkConf.getOption("spark.app.name") @@ -64,8 +59,6 @@ private[spark] class Client( private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" private val driverDockerImage = sparkConf.get( "spark.kubernetes.driver.docker.image", s"spark-driver:$SPARK_VERSION") - private val uploadedDriverExtraClasspath = sparkConf - .getOption("spark.kubernetes.driver.uploads.driverExtraClasspath") private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") private val secretBase64String = { @@ -112,12 +105,15 @@ private[spark] class Client( .withType("Opaque") .done() try { - val resolvedSelectors = (Map(DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue) + val resolvedSelectors = (Map( + DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue, + SPARK_APP_NAME_LABEL -> appName) ++ parsedCustomLabels).asJava val (servicePorts, containerPorts) = configurePorts() val service = kubernetesClient.services().createNew() .withNewMetadata() .withName(kubernetesAppId) + .withLabels(Map(SPARK_APP_NAME_LABEL -> appName).asJava) .endMetadata() .withNewSpec() .withSelector(resolvedSelectors) @@ -355,10 +351,10 @@ private[spark] class Client( val fileBytes = Files.toByteArray(appFile) val fileBase64 = Base64.encodeBase64String(fileBytes) UploadedAppResource(resourceBase64Contents = fileBase64, name = appFile.getName) + case "container" => ContainerAppResource(appResourceUri.getPath) case other => RemoteAppResource(other) } - val uploadDriverExtraClasspathBase64Contents = compressJars(uploadedDriverExtraClasspath) val uploadJarsBase64Contents = compressJars(uploadedJars) KubernetesCreateSubmissionRequest( appResource = resolvedAppResource, @@ -366,7 +362,6 @@ private[spark] class Client( appArgs = appArgs, secret = secretBase64String, sparkProperties = sparkConf.getAll.toMap, - uploadedDriverExtraClasspathBase64Contents = uploadDriverExtraClasspathBase64Contents, uploadedJarsBase64Contents = uploadJarsBase64Contents) } @@ -414,7 +409,7 @@ private[spark] class Client( } } -private object Client { +private[spark] object Client extends Logging { private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" @@ -430,6 +425,7 @@ private object Client { private val SECURE_RANDOM = new SecureRandom() private val SPARK_SUBMISSION_SECRET_BASE_DIR = "/var/run/secrets/spark-submission" private val LAUNCH_TIMEOUT_SECONDS = 30 + private val SPARK_APP_NAME_LABEL = "spark-app-name" def main(args: Array[String]): Unit = { require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + @@ -444,4 +440,20 @@ private object Client { sparkConf = sparkConf, appArgs = appArgs).run() } + + def resolveK8sMaster(rawMasterString: String): String = { + if (!rawMasterString.startsWith("k8s://")) { + throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") + } + val masterWithoutK8sPrefix = rawMasterString.replaceFirst("k8s://", "") + if (masterWithoutK8sPrefix.startsWith("http://") + || masterWithoutK8sPrefix.startsWith("https://")) { + masterWithoutK8sPrefix + } else { + val resolvedURL = s"https://$masterWithoutK8sPrefix" + logDebug(s"No scheme specified for kubernetes master URL, so defaulting to https. Resolved" + + s" URL is $resolvedURL") + resolvedURL + } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 6da1a848b25e7..813d070e0f876 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -27,7 +27,6 @@ case class KubernetesCreateSubmissionRequest( val appArgs: Array[String], val sparkProperties: Map[String, String], val secret: String, - val uploadedDriverExtraClasspathBase64Contents: Option[TarGzippedData], val uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION @@ -46,6 +45,7 @@ case class TarGzippedData( property = "type") @JsonSubTypes(value = Array( new JsonSubTypes.Type(value = classOf[UploadedAppResource], name = "UploadedAppResource"), + new JsonSubTypes.Type(value = classOf[ContainerAppResource], name = "ContainerLocalAppResource"), new JsonSubTypes.Type(value = classOf[RemoteAppResource], name = "RemoteAppResource"))) abstract class AppResource @@ -53,6 +53,8 @@ case class UploadedAppResource( resourceBase64Contents: String, name: String = "spark-app-resource") extends AppResource +case class ContainerAppResource(resourcePath: String) extends AppResource + case class RemoteAppResource(resource: String) extends AppResource class PingResponse extends SubmitRestProtocolResponse { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 837706ca9f5a8..08ddbaf5e50dc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -18,7 +18,6 @@ package org.apache.spark.deploy.rest.kubernetes import java.io.File import java.net.URI -import java.nio.file.Paths import java.util.concurrent.CountDownLatch import javax.servlet.http.{HttpServletRequest, HttpServletResponse} @@ -30,12 +29,12 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SecurityManager, SPARK_VERSION, SparkConf} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.rest._ -import org.apache.spark.util.{ShutdownHookManager, Utils} +import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} private case class KubernetesSparkRestServerArguments( - val host: Option[String] = None, - val port: Option[Int] = None, - val secretFile: Option[String] = None) { + val host: Option[String] = None, + val port: Option[Int] = None, + val secretFile: Option[String] = None) { def validate(): KubernetesSparkRestServerArguments = { require(host.isDefined, "Hostname not set via --hostname.") require(port.isDefined, "Port not set via --port") @@ -68,13 +67,21 @@ private object KubernetesSparkRestServerArguments { } } +/** + * Runs in the driver pod and receives a request to run an application. Note that + * unlike the submission rest server in standalone mode, this server is expected + * to be used to run one application only, and then shut down once that application + * is complete. + */ private[spark] class KubernetesSparkRestServer( host: String, port: Int, conf: SparkConf, - expectedApplicationSecret: Array[Byte]) + expectedApplicationSecret: Array[Byte], + shutdownLock: CountDownLatch) extends RestSubmissionServer(host, port, conf) { + private val SERVLET_LOCK = new Object private val javaExecutable = s"${System.getenv("JAVA_HOME")}/bin/java" private val sparkHome = System.getenv("SPARK_HOME") private val securityManager = new SecurityManager(conf) @@ -99,87 +106,105 @@ private[spark] class KubernetesSparkRestServer( private class KubernetesSubmitRequestServlet extends SubmitRequestServlet { + private val waitForProcessCompleteExecutor = ThreadUtils + .newDaemonSingleThreadExecutor("wait-for-spark-app-complete") + private var startedApplication = false + // TODO validating the secret should be done as part of a header of the request. // Instead here we have to specify the secret in the body. override protected def handleSubmit( - requestMessageJson: String, - requestMessage: SubmitRestProtocolMessage, - responseServlet: HttpServletResponse): SubmitRestProtocolResponse = { - requestMessage match { - case KubernetesCreateSubmissionRequest( + requestMessageJson: String, + requestMessage: SubmitRestProtocolMessage, + responseServlet: HttpServletResponse): SubmitRestProtocolResponse = { + SERVLET_LOCK.synchronized { + if (startedApplication) { + throw new IllegalStateException("Application has already been submitted.") + } else { + requestMessage match { + case KubernetesCreateSubmissionRequest( appResource, mainClass, appArgs, sparkProperties, secret, - uploadedDriverExtraClasspath, uploadedJars) => - val decodedSecret = Base64.decodeBase64(secret) - if (!expectedApplicationSecret.sameElements(decodedSecret)) { - responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) - handleError("Unauthorized to submit application.") - } else { - val tempDir = Utils.createTempDir() - val appResourcePath = resolvedAppResource(appResource, tempDir) - val driverClasspathDirectory = new File(tempDir, "driver-extra-classpath") - if (!driverClasspathDirectory.mkdir) { - throw new IllegalStateException("Failed to create driver extra classpath" + - s" dir at ${driverClasspathDirectory.getAbsolutePath}") - } - val jarsDirectory = new File(tempDir, "jars") - if (!jarsDirectory.mkdir) { - throw new IllegalStateException("Failed to create jars dir at" + - s"${jarsDirectory.getAbsolutePath}") - } - val writtenDriverExtraClasspath = writeBase64ContentsToFiles( - uploadedDriverExtraClasspath, driverClasspathDirectory) - val writtenJars = writeBase64ContentsToFiles(uploadedJars, jarsDirectory) - val originalDriverExtraClasspath = sparkProperties.get("spark.driver.extraClassPath") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val resolvedDriverExtraClasspath = writtenDriverExtraClasspath ++ - originalDriverExtraClasspath - val originalJars = sparkProperties.get("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) - val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) - val driverClasspath = resolvedDriverExtraClasspath ++ - resolvedJars ++ - sparkJars ++ - Array(appResourcePath) - val resolvedSparkProperties = new mutable.HashMap[String, String] - resolvedSparkProperties ++= sparkProperties - resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") - - val command = new ArrayBuffer[String] - command += javaExecutable - command += "-cp" - command += s"${driverClasspath.mkString(":")}" - for (prop <- resolvedSparkProperties) { - command += s"-D${prop._1}=${prop._2}" - } - val driverMemory = resolvedSparkProperties.getOrElse("spark.driver.memory", "1g") - command += s"-Xms$driverMemory" - command += s"-Xmx$driverMemory" - command += mainClass - command ++= appArgs - val pb = new ProcessBuilder(command: _*).inheritIO() - val process = pb.start() - ShutdownHookManager.addShutdownHook(() => { - logInfo("Received stop command, shutting down the running Spark application...") - process.destroy() - }) - val response = new CreateSubmissionResponse - response.success = true - response.submissionId = null - response.message = "success" - response.serverSparkVersion = SPARK_VERSION - response + val decodedSecret = Base64.decodeBase64(secret) + if (!expectedApplicationSecret.sameElements(decodedSecret)) { + responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) + handleError("Unauthorized to submit application.") + } else { + val tempDir = Utils.createTempDir() + val appResourcePath = resolvedAppResource(appResource, tempDir) + val driverClasspathDirectory = new File(tempDir, "driver-extra-classpath") + if (!driverClasspathDirectory.mkdir) { + throw new IllegalStateException("Failed to create driver extra classpath" + + s" dir at ${driverClasspathDirectory.getAbsolutePath}") + } + val jarsDirectory = new File(tempDir, "jars") + if (!jarsDirectory.mkdir) { + throw new IllegalStateException("Failed to create jars dir at" + + s"${jarsDirectory.getAbsolutePath}") + } + val writtenJars = writeBase64ContentsToFiles(uploadedJars, jarsDirectory) + val driverExtraClasspath = sparkProperties + .get("spark.driver.extraClassPath") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val originalJars = sparkProperties.get("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) + val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) + val driverClasspath = driverExtraClasspath ++ + resolvedJars ++ + sparkJars ++ + Array(appResourcePath) + val resolvedSparkProperties = new mutable.HashMap[String, String] + resolvedSparkProperties ++= sparkProperties + resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + + val command = new ArrayBuffer[String] + command += javaExecutable + command += "-cp" + command += s"${driverClasspath.mkString(":")}" + for (prop <- resolvedSparkProperties) { + command += s"-D${prop._1}=${prop._2}" + } + val driverMemory = resolvedSparkProperties.getOrElse("spark.driver.memory", "1g") + command += s"-Xms$driverMemory" + command += s"-Xmx$driverMemory" + command += mainClass + command ++= appArgs + val pb = new ProcessBuilder(command: _*).inheritIO() + val process = pb.start() + ShutdownHookManager.addShutdownHook(() => { + logInfo("Received stop command, shutting down the running Spark application...") + process.destroy() + shutdownLock.countDown() + }) + waitForProcessCompleteExecutor.submit(new Runnable { + override def run(): Unit = { + process.waitFor + SERVLET_LOCK.synchronized { + logInfo("Spark application complete. Shutting down submission server...") + KubernetesSparkRestServer.this.stop + shutdownLock.countDown() + } + } + }) + startedApplication = true + val response = new CreateSubmissionResponse + response.success = true + response.submissionId = null + response.message = "success" + response.serverSparkVersion = SPARK_VERSION + response + } + case unexpected => + responseServlet.setStatus(HttpServletResponse.SC_BAD_REQUEST) + handleError(s"Received message of unexpected type ${unexpected.messageType}.") } - case unexpected => - responseServlet.setStatus(HttpServletResponse.SC_BAD_REQUEST) - handleError(s"Received message of unexpected type ${unexpected.messageType}.") + } } } @@ -196,6 +221,7 @@ private[spark] class KubernetesSparkRestServer( throw new IllegalStateException(s"Failed to write main app resource file" + s" to $resourceFilePath") } + case ContainerAppResource(resource) => resource case RemoteAppResource(resource) => Utils.fetchFile(resource, tempDir, conf, securityManager, SparkHadoopUtil.get.newConfiguration(conf), @@ -237,7 +263,8 @@ private[spark] object KubernetesSparkRestServer { parsedArguments.host.get, parsedArguments.port.get, sparkConf, - secretBytes) + secretBytes, + barrier) server.start() ShutdownHookManager.addShutdownHook(() => { try { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 2717d2f37d910..b7110ba901842 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -27,7 +27,7 @@ import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkException} -import org.apache.spark.deploy.kubernetes.KubernetesClientBuilder +import org.apache.spark.deploy.kubernetes.{Client, KubernetesClientBuilder} import org.apache.spark.rpc.RpcEndpointAddress import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend @@ -43,15 +43,12 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_MODIFICATION_LOCK = new Object private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] - private val kubernetesMaster = sc.master.replaceFirst("k8s://", "") + private val kubernetesMaster = Client.resolveK8sMaster(sc.master) private val executorDockerImage = conf .get("spark.kubernetes.executor.docker.image", s"spark-executor:${sc.version}") - private val kubernetesNamespace = conf - .getOption("spark.kubernetes.namespace") - .getOrElse( - throw new SparkException("Kubernetes namespace must be specified in kubernetes mode.")) + private val kubernetesNamespace = conf.get("spark.kubernetes.namespace", "default") private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index c20e51c93e7c7..0ec2f36075db3 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -43,6 +43,13 @@ ${project.version} pom
    + + + org.apache.spark + spark-examples_${scala.binary.version} + ${project.version} + provided + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 77b7c793dc37e..07a45c7577bcd 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -16,25 +16,25 @@ */ package org.apache.spark.deploy.kubernetes -import java.io.File -import java.security.SecureRandom +import java.io.{File, FileInputStream} +import java.security.{KeyStore, SecureRandom} import java.util.concurrent.{Executors, TimeoutException, TimeUnit} -import javax.net.ssl.X509TrustManager +import java.util.concurrent.atomic.AtomicBoolean +import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} +import com.google.common.base.Charsets import com.google.common.io.Files import com.google.common.util.concurrent.{SettableFuture, ThreadFactoryBuilder} import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action -import io.fabric8.kubernetes.client.internal.SSLUtils import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer -import scala.concurrent.ExecutionContext +import scala.collection.mutable +import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.DurationInt -import scala.util.Success -import org.apache.spark.{SPARK_VERSION, SparkConf, SparkException} +import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging @@ -56,10 +56,14 @@ private[spark] class Client( .getOrElse("spark") private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") private val secretName = s"spark-submission-server-secret-$kubernetesAppId" + private val secretDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId" + private val sslSecretsDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId-ssl" + private val sslSecretsName = s"spark-submission-server-ssl-$kubernetesAppId" private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" private val driverDockerImage = sparkConf.get( - "spark.kubernetes.driver.docker.image", s"spark-driver:$SPARK_VERSION") + "spark.kubernetes.driver.docker.image", s"spark-driver:$sparkVersion") private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") + private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) private val driverLaunchTimeoutSecs = sparkConf.getTimeAsSeconds( "spark.kubernetes.driverLaunchTimeout", s"${DEFAULT_LAUNCH_TIMEOUT_SECONDS}s") @@ -82,6 +86,7 @@ private[spark] class Client( .build())) def run(): Unit = { + val (driverLaunchSslOptions, isKeyStoreLocalFile) = parseDriverLaunchSslOptions() val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new ConfigBuilder() .withApiVersion("v1") @@ -98,123 +103,50 @@ private[spark] class Client( } val k8ClientConfig = k8ConfBuilder.build - Utils.tryWithResource(new DefaultKubernetesClient(k8ClientConfig))(kubernetesClient => { - val secret = kubernetesClient.secrets().createNew() + Utils.tryWithResource(new DefaultKubernetesClient(k8ClientConfig)) { kubernetesClient => + val submitServerSecret = kubernetesClient.secrets().createNew() .withNewMetadata() - .withName(secretName) - .endMetadata() + .withName(secretName) + .endMetadata() .withData(Map((SUBMISSION_SERVER_SECRET_NAME, secretBase64String)).asJava) .withType("Opaque") .done() + val (sslEnvs, sslVolumes, sslVolumeMounts, sslSecrets) = configureSsl(kubernetesClient, + driverLaunchSslOptions, + isKeyStoreLocalFile) try { - val resolvedSelectors = (Map( + val driverKubernetesSelectors = (Map( DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue, SPARK_APP_NAME_LABEL -> appName) ++ parsedCustomLabels).asJava - val (servicePorts, containerPorts) = configurePorts() - val service = kubernetesClient.services().createNew() - .withNewMetadata() - .withName(kubernetesAppId) - .withLabels(Map(SPARK_APP_NAME_LABEL -> appName).asJava) - .endMetadata() - .withNewSpec() - .withSelector(resolvedSelectors) - .withPorts(servicePorts.asJava) - .endSpec() - .done() - sparkConf.set("spark.kubernetes.driver.service.name", service.getMetadata.getName) - sparkConf.set("spark.kubernetes.driver.pod.name", kubernetesAppId) - - sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT.toString) - val submitRequest = buildSubmissionRequest() + val containerPorts = buildContainerPorts() val submitCompletedFuture = SettableFuture.create[Boolean] - val secretDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId" - - val podWatcher = new Watcher[Pod] { - override def eventReceived(action: Action, t: Pod): Unit = { - if (action == Action.ADDED) { - val ownerRefs = new ArrayBuffer[OwnerReference] - ownerRefs += new OwnerReferenceBuilder() - .withApiVersion(t.getApiVersion) - .withController(true) - .withKind(t.getKind) - .withName(t.getMetadata.getName) - .withUid(t.getMetadata.getUid) - .build() - - secret.getMetadata().setOwnerReferences(ownerRefs.asJava) - kubernetesClient.secrets().createOrReplace(secret) - - service.getMetadata().setOwnerReferences(ownerRefs.asJava) - kubernetesClient.services().createOrReplace(service) - } - - if ((action == Action.ADDED || action == Action.MODIFIED) - && t.getStatus.getPhase == "Running" - && !submitCompletedFuture.isDone) { - t.getStatus - .getContainerStatuses - .asScala - .find(status => - status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { - case Some(_) => - try { - val driverLauncher = getDriverLauncherService( - k8ClientConfig, master) - val ping = Retry.retry(5, 5.seconds) { - driverLauncher.ping() - } - ping onFailure { - case t: Throwable => - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(t) - } - } - val submitComplete = ping andThen { - case Success(_) => - driverLauncher.create(submitRequest) - submitCompletedFuture.set(true) - } - submitComplete onFailure { - case t: Throwable => - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(t) - } - } - } catch { - case e: Throwable => - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(e) - throw e - } - } - case None => - } - } - } - - override def onClose(e: KubernetesClientException): Unit = { - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(e) - } - } - } - - def createDriverPod(unused: Watch): Unit = { + val submitPending = new AtomicBoolean(false) + val podWatcher = new DriverPodWatcher( + submitCompletedFuture, + submitPending, + kubernetesClient, + driverLaunchSslOptions, + Array(submitServerSecret) ++ sslSecrets, + driverKubernetesSelectors) + Utils.tryWithResource(kubernetesClient + .pods() + .withLabels(driverKubernetesSelectors) + .watch(podWatcher)) { _ => kubernetesClient.pods().createNew() .withNewMetadata() .withName(kubernetesAppId) - .withLabels(resolvedSelectors) + .withLabels(driverKubernetesSelectors) .endMetadata() .withNewSpec() .withRestartPolicy("OnFailure") .addNewVolume() .withName(s"spark-submission-secret-volume") - .withNewSecret() - .withSecretName(secret.getMetadata.getName) + .withNewSecret() + .withSecretName(submitServerSecret.getMetadata.getName) .endSecret() .endVolume + .addToVolumes(sslVolumes: _*) .withServiceAccount(serviceAccount) .addNewContainer() .withName(DRIVER_LAUNCHER_CONTAINER_NAME) @@ -225,6 +157,7 @@ private[spark] class Client( .withMountPath(secretDirectory) .withReadOnly(true) .endVolumeMount() + .addToVolumeMounts(sslVolumeMounts: _*) .addNewEnv() .withName("SPARK_SUBMISSION_SECRET_LOCATION") .withValue(s"$secretDirectory/$SUBMISSION_SERVER_SECRET_NAME") @@ -233,6 +166,7 @@ private[spark] class Client( .withName("SPARK_DRIVER_LAUNCHER_SERVER_PORT") .withValue(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT.toString) .endEnv() + .addToEnv(sslEnvs: _*) .withPorts(containerPorts.asJava) .endContainer() .endSpec() @@ -243,121 +177,321 @@ private[spark] class Client( submitSucceeded = true } catch { case e: TimeoutException => - val driverPod = try { - kubernetesClient.pods().withName(kubernetesAppId).get() - } catch { - case throwable: Throwable => - logError(s"Timed out while waiting $driverLaunchTimeoutSecs seconds for the" + - " driver pod to start, but an error occurred while fetching the driver" + - " pod's details.", throwable) - throw new SparkException(s"Timed out while waiting $driverLaunchTimeoutSecs" + - " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + - " the latest state of the pod, another error was thrown. Check the logs for" + - " the error that was thrown in looking up the driver pod.", e) - } - val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + - s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + - s" $driverLaunchTimeoutSecs seconds." - val podStatusPhase = if (driverPod.getStatus.getPhase != null) { - s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" - } else { - "The pod had no final phase." - } - val podStatusMessage = if (driverPod.getStatus.getMessage != null) { - s"Latest message from the pod is: ${driverPod.getStatus.getMessage}" - } else { - "The pod had no final message." - } - val failedDriverContainerStatusString = driverPod.getStatus - .getContainerStatuses - .asScala - .find(_.getName == DRIVER_LAUNCHER_CONTAINER_NAME) - .map(status => { - val lastState = status.getState - if (lastState.getRunning != null) { - "Driver container last state: Running\n" + - s"Driver container started at: ${lastState.getRunning.getStartedAt}" - } else if (lastState.getWaiting != null) { - "Driver container last state: Waiting\n" + - s"Driver container wait reason: ${lastState.getWaiting.getReason}\n" + - s"Driver container message: ${lastState.getWaiting.getMessage}\n" - } else if (lastState.getTerminated != null) { - "Driver container last state: Terminated\n" + - s"Driver container started at: ${lastState.getTerminated.getStartedAt}\n" + - s"Driver container finished at: ${lastState.getTerminated.getFinishedAt}\n" + - s"Driver container exit reason: ${lastState.getTerminated.getReason}\n" + - s"Driver container exit code: ${lastState.getTerminated.getExitCode}\n" + - s"Driver container message: ${lastState.getTerminated.getMessage}" - } else { - "Driver container last state: Unknown" - } - }).getOrElse("The driver container wasn't found in the pod; expected to find" + - s" container with name $DRIVER_LAUNCHER_CONTAINER_NAME") - val finalErrorMessage = s"$topLevelMessage\n" + - s"$podStatusPhase\n" + - s"$podStatusMessage\n\n$failedDriverContainerStatusString" + val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) logError(finalErrorMessage, e) throw new SparkException(finalErrorMessage, e) - } finally { - if (!submitSucceeded) { - try { - kubernetesClient.pods.withName(kubernetesAppId).delete - } catch { - case throwable: Throwable => - logError("Failed to delete driver pod after it failed to run.", throwable) - } + } finally { + if (!submitSucceeded) { + Utils.tryLogNonFatalError { + kubernetesClient.pods.withName(kubernetesAppId).delete() } } } - - Utils.tryWithResource(kubernetesClient - .pods() - .withLabels(resolvedSelectors) - .watch(podWatcher)) { createDriverPod } + } } finally { - kubernetesClient.secrets().delete(secret) + Utils.tryLogNonFatalError { + kubernetesClient.secrets().delete(submitServerSecret) + } + Utils.tryLogNonFatalError { + kubernetesClient.secrets().delete(sslSecrets: _*) + } } - }) + } } - private def configurePorts(): (Seq[ServicePort], Seq[ContainerPort]) = { - val servicePorts = new ArrayBuffer[ServicePort] - val containerPorts = new ArrayBuffer[ContainerPort] + private def parseDriverLaunchSslOptions(): (SSLOptions, Boolean) = { + val maybeKeyStore = sparkConf.getOption("spark.ssl.kubernetes.driverlaunch.keyStore") + val resolvedSparkConf = sparkConf.clone() + val (isLocalKeyStore, resolvedKeyStore) = maybeKeyStore.map(keyStore => { + val keyStoreURI = Utils.resolveURI(keyStore) + val isProvidedKeyStoreLocal = keyStoreURI.getScheme match { + case "file" | null => true + case "container" => false + case _ => throw new SparkException(s"Invalid KeyStore URI $keyStore; keyStore URI" + + " for submit server must have scheme file:// or container:// (no scheme defaults" + + " to file://)") + } + (isProvidedKeyStoreLocal, Option.apply(keyStoreURI.getPath)) + }).getOrElse((true, Option.empty[String])) + resolvedKeyStore.foreach { + resolvedSparkConf.set("spark.ssl.kubernetes.driverlaunch.keyStore", _) + } + sparkConf.getOption("spark.ssl.kubernetes.driverlaunch.trustStore").foreach { trustStore => + val trustStoreURI = Utils.resolveURI(trustStore) + trustStoreURI.getScheme match { + case "file" | null => + resolvedSparkConf.set("spark.ssl.kubernetes.driverlaunch.trustStore", + trustStoreURI.getPath) + case _ => throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + + " for submit server must have no scheme, or scheme file://") + } + } + val securityManager = new SecurityManager(resolvedSparkConf) + (securityManager.getSSLOptions("kubernetes.driverlaunch"), isLocalKeyStore) + } - def addPortToServiceAndContainer(portName: String, portValue: Int): Unit = { - servicePorts += new ServicePortBuilder() - .withName(portName) - .withPort(portValue) - .withNewTargetPort(portValue) + private def configureSsl(kubernetesClient: KubernetesClient, driverLaunchSslOptions: SSLOptions, + isKeyStoreLocalFile: Boolean): + (Array[EnvVar], Array[Volume], Array[VolumeMount], Array[Secret]) = { + if (driverLaunchSslOptions.enabled) { + val sslSecretsMap = mutable.HashMap[String, String]() + val sslEnvs = mutable.Buffer[EnvVar]() + val secrets = mutable.Buffer[Secret]() + driverLaunchSslOptions.keyStore.foreach(store => { + val resolvedKeyStoreFile = if (isKeyStoreLocalFile) { + if (!store.isFile) { + throw new SparkException(s"KeyStore specified at $store is not a file or" + + s" does not exist.") + } + val keyStoreBytes = Files.toByteArray(store) + val keyStoreBase64 = Base64.encodeBase64String(keyStoreBytes) + sslSecretsMap += (SSL_KEYSTORE_SECRET_NAME -> keyStoreBase64) + s"$sslSecretsDirectory/$SSL_KEYSTORE_SECRET_NAME" + } else { + store.getAbsolutePath + } + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_KEYSTORE_FILE") + .withValue(resolvedKeyStoreFile) + .build() + }) + driverLaunchSslOptions.keyStorePassword.foreach(password => { + val passwordBase64 = Base64.encodeBase64String(password.getBytes(Charsets.UTF_8)) + sslSecretsMap += (SSL_KEYSTORE_PASSWORD_SECRET_NAME -> passwordBase64) + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE") + .withValue(s"$sslSecretsDirectory/$SSL_KEYSTORE_PASSWORD_SECRET_NAME") + .build() + }) + driverLaunchSslOptions.keyPassword.foreach(password => { + val passwordBase64 = Base64.encodeBase64String(password.getBytes(Charsets.UTF_8)) + sslSecretsMap += (SSL_KEY_PASSWORD_SECRET_NAME -> passwordBase64) + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE") + .withValue(s"$sslSecretsDirectory/$SSL_KEY_PASSWORD_SECRET_NAME") + .build() + }) + driverLaunchSslOptions.keyStoreType.foreach(storeType => { + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_KEYSTORE_TYPE") + .withValue(storeType) + .build() + }) + sslEnvs += new EnvVarBuilder() + .withName("SPARK_SUBMISSION_USE_SSL") + .withValue("true") + .build() + val sslSecrets = kubernetesClient.secrets().createNew() + .withNewMetadata() + .withName(sslSecretsName) + .endMetadata() + .withData(sslSecretsMap.asJava) + .withType("Opaque") + .done() + secrets += sslSecrets + val sslVolume = new VolumeBuilder() + .withName("spark-submission-server-ssl-secrets") + .withNewSecret() + .withSecretName(sslSecrets.getMetadata.getName) + .endSecret() .build() - containerPorts += new ContainerPortBuilder() - .withContainerPort(portValue) + val sslVolumeMount = new VolumeMountBuilder() + .withName("spark-submission-server-ssl-secrets") + .withReadOnly(true) + .withMountPath(sslSecretsDirectory) .build() + (sslEnvs.toArray, Array(sslVolume), Array(sslVolumeMount), secrets.toArray) + } else { + (Array[EnvVar](), Array[Volume](), Array[VolumeMount](), Array[Secret]()) } + } + + private class DriverPodWatcher( + submitCompletedFuture: SettableFuture[Boolean], + submitPending: AtomicBoolean, + kubernetesClient: KubernetesClient, + driverLaunchSslOptions: SSLOptions, + applicationSecrets: Array[Secret], + driverKubernetesSelectors: java.util.Map[String, String]) extends Watcher[Pod] { + override def eventReceived(action: Action, pod: Pod): Unit = { + if ((action == Action.ADDED || action == Action.MODIFIED) + && pod.getStatus.getPhase == "Running" + && !submitCompletedFuture.isDone) { + if (!submitPending.getAndSet(true)) { + pod.getStatus + .getContainerStatuses + .asScala + .find(status => + status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { + case Some(_) => + val ownerRefs = Seq(new OwnerReferenceBuilder() + .withName(pod.getMetadata.getName) + .withUid(pod.getMetadata.getUid) + .withApiVersion(pod.getApiVersion) + .withKind(pod.getKind) + .withController(true) + .build()) + + applicationSecrets.foreach(secret => { + secret.getMetadata.setOwnerReferences(ownerRefs.asJava) + kubernetesClient.secrets().createOrReplace(secret) + }) - addPortToServiceAndContainer( - DRIVER_LAUNCHER_SERVICE_PORT_NAME, - DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) - addPortToServiceAndContainer( - DRIVER_PORT_NAME, - sparkConf - .getOption("spark.driver.port") - .map(_.toInt) - .getOrElse(DEFAULT_DRIVER_PORT)) - addPortToServiceAndContainer( - BLOCKMANAGER_PORT_NAME, - sparkConf - .getOption("spark.blockmanager.port") - .map(_.toInt) - .getOrElse(DEFAULT_BLOCKMANAGER_PORT)) + val driverLauncherServicePort = new ServicePortBuilder() + .withName(DRIVER_LAUNCHER_SERVICE_PORT_NAME) + .withPort(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) + .withNewTargetPort(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) + .build() + val service = kubernetesClient.services().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(driverKubernetesSelectors) + .withOwnerReferences(ownerRefs.asJava) + .endMetadata() + .withNewSpec() + .withType("NodePort") + .withSelector(driverKubernetesSelectors) + .withPorts(driverLauncherServicePort) + .endSpec() + .done() + try { + sparkConf.set("spark.kubernetes.driver.service.name", + service.getMetadata.getName) + sparkConf.set("spark.kubernetes.driver.pod.name", kubernetesAppId) + sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) + sparkConf.setIfMissing("spark.blockmanager.port", + DEFAULT_BLOCKMANAGER_PORT.toString) + val driverLauncher = buildDriverLauncherClient(kubernetesClient, service, + driverLaunchSslOptions) + val ping = Retry.retry(5, 5.seconds) { + driverLauncher.ping() + } + ping onFailure { + case t: Throwable => + submitCompletedFuture.setException(t) + kubernetesClient.services().delete(service) + } + val submitComplete = ping.flatMap { _ => + Future { + sparkConf.set("spark.driver.host", pod.getStatus.getPodIP) + val submitRequest = buildSubmissionRequest() + driverLauncher.create(submitRequest) + } + } + submitComplete onFailure { + case t: Throwable => + submitCompletedFuture.setException(t) + kubernetesClient.services().delete(service) + } + val adjustServicePort = submitComplete.flatMap { _ => + Future { + // After submitting, adjust the service to only expose the Spark UI + val uiServicePort = new ServicePortBuilder() + .withName(UI_PORT_NAME) + .withPort(uiPort) + .withNewTargetPort(uiPort) + .build() + kubernetesClient.services().withName(kubernetesAppId).edit() + .editSpec() + .withType("ClusterIP") + .withPorts(uiServicePort) + .endSpec() + .done + } + } + adjustServicePort onSuccess { + case _ => + submitCompletedFuture.set(true) + } + adjustServicePort onFailure { + case throwable: Throwable => + submitCompletedFuture.setException(throwable) + kubernetesClient.services().delete(service) + } + } catch { + case e: Throwable => + submitCompletedFuture.setException(e) + Utils.tryLogNonFatalError({ + kubernetesClient.services().delete(service) + }) + throw e + } + case None => + } + } + } + } - addPortToServiceAndContainer( - UI_PORT_NAME, - sparkConf - .getOption("spark.ui.port") - .map(_.toInt) - .getOrElse(DEFAULT_UI_PORT)) - (servicePorts, containerPorts) + override def onClose(e: KubernetesClientException): Unit = { + if (!submitCompletedFuture.isDone) { + submitCompletedFuture.setException(e) + } + } + } + + private def buildSubmitFailedErrorMessage( + kubernetesClient: DefaultKubernetesClient, + e: TimeoutException): String = { + val driverPod = try { + kubernetesClient.pods().withName(kubernetesAppId).get() + } catch { + case throwable: Throwable => + logError(s"Timed out while waiting $driverLaunchTimeoutSecs seconds for the" + + " driver pod to start, but an error occurred while fetching the driver" + + " pod's details.", throwable) + throw new SparkException(s"Timed out while waiting $driverLaunchTimeoutSecs" + + " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + + " the latest state of the pod, another error was thrown. Check the logs for" + + " the error that was thrown in looking up the driver pod.", e) + } + val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + + s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + + s" $driverLaunchTimeoutSecs seconds." + val podStatusPhase = if (driverPod.getStatus.getPhase != null) { + s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" + } else { + "The pod had no final phase." + } + val podStatusMessage = if (driverPod.getStatus.getMessage != null) { + s"Latest message from the pod is: ${driverPod.getStatus.getMessage}" + } else { + "The pod had no final message." + } + val failedDriverContainerStatusString = driverPod.getStatus + .getContainerStatuses + .asScala + .find(_.getName == DRIVER_LAUNCHER_CONTAINER_NAME) + .map(status => { + val lastState = status.getState + if (lastState.getRunning != null) { + "Driver container last state: Running\n" + + s"Driver container started at: ${lastState.getRunning.getStartedAt}" + } else if (lastState.getWaiting != null) { + "Driver container last state: Waiting\n" + + s"Driver container wait reason: ${lastState.getWaiting.getReason}\n" + + s"Driver container message: ${lastState.getWaiting.getMessage}\n" + } else if (lastState.getTerminated != null) { + "Driver container last state: Terminated\n" + + s"Driver container started at: ${lastState.getTerminated.getStartedAt}\n" + + s"Driver container finished at: ${lastState.getTerminated.getFinishedAt}\n" + + s"Driver container exit reason: ${lastState.getTerminated.getReason}\n" + + s"Driver container exit code: ${lastState.getTerminated.getExitCode}\n" + + s"Driver container message: ${lastState.getTerminated.getMessage}" + } else { + "Driver container last state: Unknown" + } + }).getOrElse("The driver container wasn't found in the pod; expected to find" + + s" container with name $DRIVER_LAUNCHER_CONTAINER_NAME") + s"$topLevelMessage\n" + + s"$podStatusPhase\n" + + s"$podStatusMessage\n\n$failedDriverContainerStatusString" + } + + private def buildContainerPorts(): Seq[ContainerPort] = { + Seq(sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), + sparkConf.getInt("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT), + DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT, + uiPort).map(new ContainerPortBuilder().withContainerPort(_).build()) } private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { @@ -392,26 +526,67 @@ private[spark] class Client( .map(CompressionUtils.createTarGzip(_)) } - private def getDriverLauncherService( - k8ClientConfig: Config, - kubernetesMaster: String): KubernetesSparkRestApi = { - val url = s"${ - Array[String]( - kubernetesMaster, - "api", "v1", "proxy", - "namespaces", namespace, - "services", kubernetesAppId).mkString("/")}" + - s":$DRIVER_LAUNCHER_SERVICE_PORT_NAME/" - - val sslContext = SSLUtils.sslContext(k8ClientConfig) - val trustManager = SSLUtils.trustManagers( - k8ClientConfig)(0).asInstanceOf[X509TrustManager] + private def buildDriverLauncherClient( + kubernetesClient: KubernetesClient, + service: Service, + driverLaunchSslOptions: SSLOptions): KubernetesSparkRestApi = { + val servicePort = service + .getSpec + .getPorts + .asScala + .filter(_.getName == DRIVER_LAUNCHER_SERVICE_PORT_NAME) + .head + .getNodePort + // NodePort is exposed on every node, so just pick one of them. + // TODO be resilient to node failures and try all of them + val node = kubernetesClient.nodes.list.getItems.asScala.head + val nodeAddress = node.getStatus.getAddresses.asScala.head.getAddress + val urlScheme = if (driverLaunchSslOptions.enabled) { + "https" + } else { + logWarning("Submitting application details, application secret, and local" + + " jars to the cluster over an insecure connection. You should configure SSL" + + " to secure this step.") + "http" + } + val (trustManager, sslContext): (X509TrustManager, SSLContext) = + if (driverLaunchSslOptions.enabled) { + buildSslConnectionConfiguration(driverLaunchSslOptions) + } else { + (null, SSLContext.getDefault) + } + val url = s"$urlScheme://$nodeAddress:$servicePort" HttpClientUtil.createClient[KubernetesSparkRestApi]( - uri = url, + url, sslSocketFactory = sslContext.getSocketFactory, trustContext = trustManager) } + private def buildSslConnectionConfiguration(driverLaunchSslOptions: SSLOptions) = { + driverLaunchSslOptions.trustStore.map(trustStoreFile => { + val trustManagerFactory = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm) + val trustStore = KeyStore.getInstance( + driverLaunchSslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType)) + if (!trustStoreFile.isFile) { + throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + + s" does not exist or is not a file.") + } + Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => + driverLaunchSslOptions.trustStorePassword match { + case Some(password) => + trustStore.load(trustStoreStream, password.toCharArray) + case None => trustStore.load(trustStoreStream, null) + } + } + trustManagerFactory.init(trustStore) + val trustManagers = trustManagerFactory.getTrustManagers + val sslContext = SSLContext.getInstance("TLSv1.2") + sslContext.init(null, trustManagers, SECURE_RANDOM) + (trustManagers(0).asInstanceOf[X509TrustManager], sslContext) + }).getOrElse((null, SSLContext.getDefault)) + } + private def parseCustomLabels(labels: String): Map[String, String] = { labels.split(",").map(_.trim).filterNot(_.isEmpty).map(label => { label.split("=", 2).toSeq match { @@ -433,6 +608,9 @@ private[spark] class Client( private[spark] object Client extends Logging { private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" + private val SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" + private val SSL_KEYSTORE_PASSWORD_SECRET_NAME = "spark-submission-server-keystore-password" + private val SSL_KEY_PASSWORD_SECRET_NAME = "spark-submission-server-key-password" private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" private val DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT = 7077 private val DEFAULT_DRIVER_PORT = 7078 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 38fa4d1d3f0b2..451dc96dd65ed 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -21,20 +21,26 @@ import java.net.URI import java.util.concurrent.CountDownLatch import javax.servlet.http.{HttpServletRequest, HttpServletResponse} +import com.google.common.base.Charsets import com.google.common.io.Files import org.apache.commons.codec.binary.Base64 import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import org.apache.spark.{SecurityManager, SPARK_VERSION, SparkConf} +import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.rest._ import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} private case class KubernetesSparkRestServerArguments( - val host: Option[String] = None, - val port: Option[Int] = None, - val secretFile: Option[String] = None) { + host: Option[String] = None, + port: Option[Int] = None, + useSsl: Boolean = false, + secretFile: Option[String] = None, + keyStoreFile: Option[String] = None, + keyStorePasswordFile: Option[String] = None, + keyStoreType: Option[String] = None, + keyPasswordFile: Option[String] = None) { def validate(): KubernetesSparkRestServerArguments = { require(host.isDefined, "Hostname not set via --hostname.") require(port.isDefined, "Port not set via --port") @@ -58,6 +64,21 @@ private object KubernetesSparkRestServerArguments { case "--secret-file" :: value :: tail => args = tail resolvedArguments.copy(secretFile = Some(value)) + case "--use-ssl" :: value :: tail => + args = tail + resolvedArguments.copy(useSsl = value.toBoolean) + case "--keystore-file" :: value :: tail => + args = tail + resolvedArguments.copy(keyStoreFile = Some(value)) + case "--keystore-password-file" :: value :: tail => + args = tail + resolvedArguments.copy(keyStorePasswordFile = Some(value)) + case "--keystore-type" :: value :: tail => + args = tail + resolvedArguments.copy(keyStoreType = Some(value)) + case "--keystore-key-password-file" :: value :: tail => + args = tail + resolvedArguments.copy(keyPasswordFile = Some(value)) // TODO polish usage message case Nil => resolvedArguments case unknown => throw new IllegalStateException(s"Unknown argument(s) found: $unknown") @@ -78,8 +99,9 @@ private[spark] class KubernetesSparkRestServer( port: Int, conf: SparkConf, expectedApplicationSecret: Array[Byte], - shutdownLock: CountDownLatch) - extends RestSubmissionServer(host, port, conf) { + shutdownLock: CountDownLatch, + sslOptions: SSLOptions = new SSLOptions) + extends RestSubmissionServer(host, port, conf, sslOptions) { private val SERVLET_LOCK = new Object private val javaExecutable = s"${System.getenv("JAVA_HOME")}/bin/java" @@ -196,7 +218,7 @@ private[spark] class KubernetesSparkRestServer( response.success = true response.submissionId = null response.message = "success" - response.serverSparkVersion = SPARK_VERSION + response.serverSparkVersion = sparkVersion response } case unexpected => @@ -249,6 +271,7 @@ private[spark] class KubernetesSparkRestServer( private[spark] object KubernetesSparkRestServer { private val barrier = new CountDownLatch(1) + def main(args: Array[String]): Unit = { val parsedArguments = KubernetesSparkRestServerArguments.fromArgsArray(args) val secretFile = new File(parsedArguments.secretFile.get) @@ -256,6 +279,24 @@ private[spark] object KubernetesSparkRestServer { throw new IllegalArgumentException(s"Secret file specified by --secret-file" + " is not a file, or does not exist.") } + val sslOptions = if (parsedArguments.useSsl) { + val keyStorePassword = parsedArguments + .keyStorePasswordFile + .map(new File(_)) + .map(Files.toString(_, Charsets.UTF_8)) + val keyPassword = parsedArguments + .keyPasswordFile + .map(new File(_)) + .map(Files.toString(_, Charsets.UTF_8)) + new SSLOptions( + enabled = true, + keyStore = parsedArguments.keyStoreFile.map(new File(_)), + keyStoreType = parsedArguments.keyStoreType, + keyStorePassword = keyStorePassword, + keyPassword = keyPassword) + } else { + new SSLOptions + } val secretBytes = Files.toByteArray(secretFile) val sparkConf = new SparkConf(true) val server = new KubernetesSparkRestServer( @@ -263,7 +304,8 @@ private[spark] object KubernetesSparkRestServer { parsedArguments.port.get, sparkConf, secretBytes, - barrier) + barrier, + sslOptions) server.start() ShutdownHookManager.addShutdownHook(() => { try { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index f512c50a9a934..dae4b2714b4e4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -106,13 +106,10 @@ private[spark] class KubernetesClusterSchedulerBackend( protected var totalExpectedExecutors = new AtomicInteger(0) private val driverUrl = RpcEndpointAddress( - System.getenv(s"${convertToEnvMode(kubernetesDriverServiceName)}_SERVICE_HOST"), + sc.getConf.get("spark.driver.host"), sc.getConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString - private def convertToEnvMode(value: String): String = - value.toUpperCase.map { c => if (c == '-') '_' else c } - private val initialExecutors = getInitialTargetExecutorNumber(1) private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = { diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 4d345158f356a..070008fce7410 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -19,5 +19,14 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -# This class will also require setting a secret via the SPARK_APP_SECRET environment variable -CMD exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer --hostname $HOSTNAME --port $SPARK_DRIVER_LAUNCHER_SERVER_PORT --secret-file $SPARK_SUBMISSION_SECRET_LOCATION +CMD SSL_ARGS="" && \ + if ! [ -z ${SPARK_SUBMISSION_USE_SSL+x} ]; then SSL_ARGS="$SSL_ARGS --use-ssl $SPARK_SUBMISSION_USE_SSL"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-file $SPARK_SUBMISSION_KEYSTORE_FILE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_TYPE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-type $SPARK_SUBMISSION_KEYSTORE_TYPE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-password-file $SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ + exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer \ + --hostname $HOSTNAME \ + --port $SPARK_DRIVER_LAUNCHER_SERVER_PORT \ + --secret-file $SPARK_SUBMISSION_SECRET_LOCATION \ + ${SSL_ARGS} diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 569527de8e300..f6a322f18cd75 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -106,6 +106,10 @@ + + org.bouncycastle + bcpkix-jdk15on +
    diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index c4bb389f5ada2..13edea02dce9a 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -16,6 +16,7 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest +import java.io.File import java.nio.file.Paths import java.util.UUID import java.util.concurrent.TimeUnit @@ -36,7 +37,7 @@ import org.apache.spark.deploy.kubernetes.Client import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.internal.Logging +import org.apache.spark.deploy.kubernetes.integrationtest.sslutil.SSLUtils import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} import org.apache.spark.util.Utils @@ -68,6 +69,8 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") private var minikubeKubernetesClient: KubernetesClient = _ private var clientConfig: Config = _ + private var keyStoreFile: File = _ + private var trustStoreFile: File = _ override def beforeAll(): Unit = { Minikube.startMinikube() @@ -79,6 +82,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .done() minikubeKubernetesClient = Minikube.getKubernetesClient.inNamespace(NAMESPACE) clientConfig = minikubeKubernetesClient.getConfiguration + val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + Minikube.getMinikubeIp, + "changeit", + "changeit", + "changeit") + keyStoreFile = keyStore + trustStoreFile = trustStore } before { @@ -296,4 +306,32 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { assert(driverPodLabels.get("label1") == "label1value", "Unexpected value for label1") assert(driverPodLabels.get("label2") == "label2value", "Unexpected value for label2") } + + test("Enable SSL on the driver submit server") { + val args = Array( + "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", + "--deploy-mode", "cluster", + "--kubernetes-namespace", NAMESPACE, + "--name", "spark-pi", + "--executor-memory", "512m", + "--executor-cores", "1", + "--num-executors", "1", + "--upload-jars", HELPER_JAR, + "--class", MAIN_CLASS, + "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", + "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", + "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", + "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.ssl.kubernetes.driverlaunch.enabled=true", + "--conf", "spark.ssl.kubernetes.driverlaunch.keyStore=" + + s"file://${keyStoreFile.getAbsolutePath}", + "--conf", "spark.ssl.kubernetes.driverlaunch.keyStorePassword=changeit", + "--conf", "spark.ssl.kubernetes.driverlaunch.keyPassword=changeit", + "--conf", "spark.ssl.kubernetes.driverlaunch.trustStore=" + + s"file://${trustStoreFile.getAbsolutePath}", + "--conf", s"spark.ssl.kubernetes.driverlaunch.trustStorePassword=changeit", + EXAMPLES_JAR) + SparkSubmit.main(args) + } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala new file mode 100644 index 0000000000000..bde7b43226660 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.sslutil + +import java.io.{File, FileOutputStream} +import java.math.BigInteger +import java.nio.file.Files +import java.security.{KeyPairGenerator, KeyStore, SecureRandom} +import java.util.{Calendar, Random} +import javax.security.auth.x500.X500Principal + +import org.bouncycastle.asn1.x509.{Extension, GeneralName, GeneralNames} +import org.bouncycastle.cert.jcajce.{JcaX509CertificateConverter, JcaX509v3CertificateBuilder} +import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder + +import org.apache.spark.util.Utils + +private[spark] object SSLUtils { + + def generateKeyStoreTrustStorePair( + ipAddress: String, + keyStorePassword: String, + keyPassword: String, + trustStorePassword: String): (File, File) = { + val keyPairGenerator = KeyPairGenerator.getInstance("RSA") + keyPairGenerator.initialize(512) + val keyPair = keyPairGenerator.generateKeyPair() + val selfPrincipal = new X500Principal(s"cn=$ipAddress") + val currentDate = Calendar.getInstance + val validForOneHundredYears = Calendar.getInstance + validForOneHundredYears.add(Calendar.YEAR, 100) + val certificateBuilder = new JcaX509v3CertificateBuilder( + selfPrincipal, + new BigInteger(4096, new Random()), + currentDate.getTime, + validForOneHundredYears.getTime, + selfPrincipal, + keyPair.getPublic) + certificateBuilder.addExtension(Extension.subjectAlternativeName, false, + new GeneralNames(new GeneralName(GeneralName.iPAddress, ipAddress))) + val signer = new JcaContentSignerBuilder("SHA1WithRSA") + .setSecureRandom(new SecureRandom()) + .build(keyPair.getPrivate) + val bcCertificate = certificateBuilder.build(signer) + val jcaCertificate = new JcaX509CertificateConverter().getCertificate(bcCertificate) + val keyStore = KeyStore.getInstance("JKS") + keyStore.load(null, null) + keyStore.setKeyEntry("key", keyPair.getPrivate, + keyPassword.toCharArray, Array(jcaCertificate)) + val tempDir = Files.createTempDirectory("temp-ssl-stores").toFile() + tempDir.deleteOnExit() + val keyStoreFile = new File(tempDir, "keyStore.jks") + Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { + keyStore.store(_, keyStorePassword.toCharArray) + } + val trustStore = KeyStore.getInstance("JKS") + trustStore.load(null, null) + trustStore.setCertificateEntry("key", jcaCertificate) + val trustStoreFile = new File(tempDir, "trustStore.jks") + Utils.tryWithResource(new FileOutputStream(trustStoreFile)) { + trustStore.store(_, trustStorePassword.toCharArray) + } + (keyStoreFile, trustStoreFile) + } + +} From c57ccdc2898a4d9130a943f1fbd75bda238afa10 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 31 Jan 2017 12:07:01 -0800 Subject: [PATCH 036/225] Extract constants and config into separate file. Launch => Submit. (#65) * Extract constants and config into separate file. Launch => Submit. * Address comments * A small shorthand * Refactor more ThreadUtils * Fix scalastyle, use cached thread pool * Tiny Scala style change --- docs/running-on-kubernetes.md | 16 +- .../spark/deploy/kubernetes/Client.scala | 251 +++++++++--------- .../spark/deploy/kubernetes/config.scala | 177 ++++++++++++ .../spark/deploy/kubernetes/constants.scala | 70 +++++ .../rest/KubernetesRestProtocolMessages.scala | 21 +- .../kubernetes/KubernetesSparkRestApi.scala | 3 +- .../KubernetesClusterSchedulerBackend.scala | 162 +++++------ .../src/main/docker/driver/Dockerfile | 2 +- .../integrationtest/KubernetesSuite.scala | 18 +- 9 files changed, 470 insertions(+), 250 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index e25e189aa6d74..e256535fbbc9d 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -140,12 +140,12 @@ Spark supports using SSL to encrypt the traffic in this bootstrapping process. I whenever possible. See the [security page](security.html) and [configuration](configuration.html) sections for more information on -configuring SSL; use the prefix `spark.ssl.kubernetes.driverlaunch` in configuring the SSL-related fields in the context +configuring SSL; use the prefix `spark.ssl.kubernetes.submit` in configuring the SSL-related fields in the context of submitting to Kubernetes. For example, to set the trustStore used when the local machine communicates with the driver -pod in starting the application, set `spark.ssl.kubernetes.driverlaunch.trustStore`. +pod in starting the application, set `spark.ssl.kubernetes.submit.trustStore`. One note about the keyStore is that it can be specified as either a file on the client machine or a file in the -container image's disk. Thus `spark.ssl.kubernetes.driverlaunch.keyStore` can be a URI with a scheme of either `file:` +container image's disk. Thus `spark.ssl.kubernetes.submit.keyStore` can be a URI with a scheme of either `file:` or `container:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme `container:`, the file is assumed to already be on the container's disk at the appropriate path. @@ -235,7 +235,15 @@ from the other deployment modes. See the [configuration page](configuration.html (none) Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, - where each label is in the format key=value. + where each label is in the format key=value. Note that Spark also adds its own labels to the driver pod + for bookkeeping purposes. + + + + spark.kubernetes.driverSubmitTimeout + 60s + + Time to wait for the driver pod to start running before aborting its execution. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 07a45c7577bcd..fed9334dbbab4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -18,13 +18,13 @@ package org.apache.spark.deploy.kubernetes import java.io.{File, FileInputStream} import java.security.{KeyStore, SecureRandom} -import java.util.concurrent.{Executors, TimeoutException, TimeUnit} +import java.util.concurrent.{TimeoutException, TimeUnit} import java.util.concurrent.atomic.AtomicBoolean import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} import com.google.common.base.Charsets import com.google.common.io.Files -import com.google.common.util.concurrent.{SettableFuture, ThreadFactoryBuilder} +import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action @@ -34,11 +34,13 @@ import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.DurationInt -import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} +import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils +import org.apache.spark.util.{ThreadUtils, Utils} private[spark] class Client( sparkConf: SparkConf, @@ -47,25 +49,21 @@ private[spark] class Client( appArgs: Array[String]) extends Logging { import Client._ - private val namespace = sparkConf.get("spark.kubernetes.namespace", "default") + private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) private val master = resolveK8sMaster(sparkConf.get("spark.master")) private val launchTime = System.currentTimeMillis private val appName = sparkConf.getOption("spark.app.name") - .orElse(sparkConf.getOption("spark.app.id")) .getOrElse("spark") private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") - private val secretName = s"spark-submission-server-secret-$kubernetesAppId" - private val secretDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId" - private val sslSecretsDirectory = s"$SPARK_SUBMISSION_SECRET_BASE_DIR/$kubernetesAppId-ssl" - private val sslSecretsName = s"spark-submission-server-ssl-$kubernetesAppId" - private val driverLauncherSelectorValue = s"driver-launcher-$launchTime" - private val driverDockerImage = sparkConf.get( - "spark.kubernetes.driver.docker.image", s"spark-driver:$sparkVersion") - private val uploadedJars = sparkConf.getOption("spark.kubernetes.driver.uploads.jars") + private val secretName = s"$SUBMISSION_APP_SECRET_PREFIX-$kubernetesAppId" + private val secretDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId" + private val sslSecretsDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId-ssl" + private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" + private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) + private val uploadedJars = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_JARS) private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) - private val driverLaunchTimeoutSecs = sparkConf.getTimeAsSeconds( - "spark.kubernetes.driverLaunchTimeout", s"${DEFAULT_LAUNCH_TIMEOUT_SECONDS}s") + private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) private val secretBase64String = { val secretBytes = new Array[Byte](128) @@ -73,32 +71,27 @@ private[spark] class Client( Base64.encodeBase64String(secretBytes) } - private val serviceAccount = sparkConf.get("spark.kubernetes.submit.serviceAccountName", - "default") - - private val customLabels = sparkConf.get("spark.kubernetes.driver.labels", "") + private val serviceAccount = sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME) + private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) private implicit val retryableExecutionContext = ExecutionContext .fromExecutorService( - Executors.newSingleThreadExecutor(new ThreadFactoryBuilder() - .setNameFormat("kubernetes-client-retryable-futures-%d") - .setDaemon(true) - .build())) + ThreadUtils.newDaemonSingleThreadExecutor("kubernetes-client-retryable-futures")) def run(): Unit = { - val (driverLaunchSslOptions, isKeyStoreLocalFile) = parseDriverLaunchSslOptions() + val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(master) .withNamespace(namespace) - sparkConf.getOption("spark.kubernetes.submit.caCertFile").foreach { + sparkConf.get(KUBERNETES_CA_CERT_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) } - sparkConf.getOption("spark.kubernetes.submit.clientKeyFile").foreach { + sparkConf.get(KUBERNETES_CLIENT_KEY_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) } - sparkConf.getOption("spark.kubernetes.submit.clientCertFile").foreach { + sparkConf.get(KUBERNETES_CLIENT_CERT_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) } @@ -108,15 +101,16 @@ private[spark] class Client( .withNewMetadata() .withName(secretName) .endMetadata() - .withData(Map((SUBMISSION_SERVER_SECRET_NAME, secretBase64String)).asJava) + .withData(Map((SUBMISSION_APP_SECRET_NAME, secretBase64String)).asJava) .withType("Opaque") .done() val (sslEnvs, sslVolumes, sslVolumeMounts, sslSecrets) = configureSsl(kubernetesClient, - driverLaunchSslOptions, + driverSubmitSslOptions, isKeyStoreLocalFile) try { val driverKubernetesSelectors = (Map( - DRIVER_LAUNCHER_SELECTOR_LABEL -> driverLauncherSelectorValue, + SPARK_DRIVER_LABEL -> kubernetesAppId, + SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_APP_NAME_LABEL -> appName) ++ parsedCustomLabels).asJava val containerPorts = buildContainerPorts() @@ -126,7 +120,7 @@ private[spark] class Client( submitCompletedFuture, submitPending, kubernetesClient, - driverLaunchSslOptions, + driverSubmitSslOptions, Array(submitServerSecret) ++ sslSecrets, driverKubernetesSelectors) Utils.tryWithResource(kubernetesClient @@ -141,7 +135,7 @@ private[spark] class Client( .withNewSpec() .withRestartPolicy("OnFailure") .addNewVolume() - .withName(s"spark-submission-secret-volume") + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) .withNewSecret() .withSecretName(submitServerSecret.getMetadata.getName) .endSecret() @@ -149,22 +143,22 @@ private[spark] class Client( .addToVolumes(sslVolumes: _*) .withServiceAccount(serviceAccount) .addNewContainer() - .withName(DRIVER_LAUNCHER_CONTAINER_NAME) + .withName(DRIVER_CONTAINER_NAME) .withImage(driverDockerImage) .withImagePullPolicy("IfNotPresent") .addNewVolumeMount() - .withName("spark-submission-secret-volume") + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) .withMountPath(secretDirectory) .withReadOnly(true) .endVolumeMount() .addToVolumeMounts(sslVolumeMounts: _*) .addNewEnv() - .withName("SPARK_SUBMISSION_SECRET_LOCATION") - .withValue(s"$secretDirectory/$SUBMISSION_SERVER_SECRET_NAME") + .withName(ENV_SUBMISSION_SECRET_LOCATION) + .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") .endEnv() .addNewEnv() - .withName("SPARK_DRIVER_LAUNCHER_SERVER_PORT") - .withValue(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT.toString) + .withName(ENV_SUBMISSION_SERVER_PORT) + .withValue(SUBMISSION_SERVER_PORT.toString) .endEnv() .addToEnv(sslEnvs: _*) .withPorts(containerPorts.asJava) @@ -173,7 +167,7 @@ private[spark] class Client( .done() var submitSucceeded = false try { - submitCompletedFuture.get(driverLaunchTimeoutSecs, TimeUnit.SECONDS) + submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) submitSucceeded = true } catch { case e: TimeoutException => @@ -199,8 +193,8 @@ private[spark] class Client( } } - private def parseDriverLaunchSslOptions(): (SSLOptions, Boolean) = { - val maybeKeyStore = sparkConf.getOption("spark.ssl.kubernetes.driverlaunch.keyStore") + private def parseDriverSubmitSslOptions(): (SSLOptions, Boolean) = { + val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_KEYSTORE) val resolvedSparkConf = sparkConf.clone() val (isLocalKeyStore, resolvedKeyStore) = maybeKeyStore.map(keyStore => { val keyStoreURI = Utils.resolveURI(keyStore) @@ -214,30 +208,29 @@ private[spark] class Client( (isProvidedKeyStoreLocal, Option.apply(keyStoreURI.getPath)) }).getOrElse((true, Option.empty[String])) resolvedKeyStore.foreach { - resolvedSparkConf.set("spark.ssl.kubernetes.driverlaunch.keyStore", _) + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_KEYSTORE, _) } - sparkConf.getOption("spark.ssl.kubernetes.driverlaunch.trustStore").foreach { trustStore => + sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE).foreach { trustStore => val trustStoreURI = Utils.resolveURI(trustStore) trustStoreURI.getScheme match { case "file" | null => - resolvedSparkConf.set("spark.ssl.kubernetes.driverlaunch.trustStore", - trustStoreURI.getPath) + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, trustStoreURI.getPath) case _ => throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + " for submit server must have no scheme, or scheme file://") } } val securityManager = new SecurityManager(resolvedSparkConf) - (securityManager.getSSLOptions("kubernetes.driverlaunch"), isLocalKeyStore) + (securityManager.getSSLOptions(KUBERNETES_SUBMIT_SSL_NAMESPACE), isLocalKeyStore) } - private def configureSsl(kubernetesClient: KubernetesClient, driverLaunchSslOptions: SSLOptions, + private def configureSsl(kubernetesClient: KubernetesClient, driverSubmitSslOptions: SSLOptions, isKeyStoreLocalFile: Boolean): (Array[EnvVar], Array[Volume], Array[VolumeMount], Array[Secret]) = { - if (driverLaunchSslOptions.enabled) { + if (driverSubmitSslOptions.enabled) { val sslSecretsMap = mutable.HashMap[String, String]() val sslEnvs = mutable.Buffer[EnvVar]() val secrets = mutable.Buffer[Secret]() - driverLaunchSslOptions.keyStore.foreach(store => { + driverSubmitSslOptions.keyStore.foreach(store => { val resolvedKeyStoreFile = if (isKeyStoreLocalFile) { if (!store.isFile) { throw new SparkException(s"KeyStore specified at $store is not a file or" + @@ -245,40 +238,40 @@ private[spark] class Client( } val keyStoreBytes = Files.toByteArray(store) val keyStoreBase64 = Base64.encodeBase64String(keyStoreBytes) - sslSecretsMap += (SSL_KEYSTORE_SECRET_NAME -> keyStoreBase64) - s"$sslSecretsDirectory/$SSL_KEYSTORE_SECRET_NAME" + sslSecretsMap += (SUBMISSION_SSL_KEYSTORE_SECRET_NAME -> keyStoreBase64) + s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_SECRET_NAME" } else { store.getAbsolutePath } sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_KEYSTORE_FILE") + .withName(ENV_SUBMISSION_KEYSTORE_FILE) .withValue(resolvedKeyStoreFile) .build() }) - driverLaunchSslOptions.keyStorePassword.foreach(password => { + driverSubmitSslOptions.keyStorePassword.foreach(password => { val passwordBase64 = Base64.encodeBase64String(password.getBytes(Charsets.UTF_8)) - sslSecretsMap += (SSL_KEYSTORE_PASSWORD_SECRET_NAME -> passwordBase64) + sslSecretsMap += (SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME -> passwordBase64) sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE") - .withValue(s"$sslSecretsDirectory/$SSL_KEYSTORE_PASSWORD_SECRET_NAME") + .withName(ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE) + .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME") .build() }) - driverLaunchSslOptions.keyPassword.foreach(password => { + driverSubmitSslOptions.keyPassword.foreach(password => { val passwordBase64 = Base64.encodeBase64String(password.getBytes(Charsets.UTF_8)) - sslSecretsMap += (SSL_KEY_PASSWORD_SECRET_NAME -> passwordBase64) + sslSecretsMap += (SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME -> passwordBase64) sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE") - .withValue(s"$sslSecretsDirectory/$SSL_KEY_PASSWORD_SECRET_NAME") + .withName(ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE) + .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME") .build() }) - driverLaunchSslOptions.keyStoreType.foreach(storeType => { + driverSubmitSslOptions.keyStoreType.foreach(storeType => { sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_KEYSTORE_TYPE") + .withName(ENV_SUBMISSION_KEYSTORE_TYPE) .withValue(storeType) .build() }) sslEnvs += new EnvVarBuilder() - .withName("SPARK_SUBMISSION_USE_SSL") + .withName(ENV_SUBMISSION_USE_SSL) .withValue("true") .build() val sslSecrets = kubernetesClient.secrets().createNew() @@ -290,13 +283,13 @@ private[spark] class Client( .done() secrets += sslSecrets val sslVolume = new VolumeBuilder() - .withName("spark-submission-server-ssl-secrets") + .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) .withNewSecret() .withSecretName(sslSecrets.getMetadata.getName) .endSecret() .build() val sslVolumeMount = new VolumeMountBuilder() - .withName("spark-submission-server-ssl-secrets") + .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) .withReadOnly(true) .withMountPath(sslSecretsDirectory) .build() @@ -310,7 +303,7 @@ private[spark] class Client( submitCompletedFuture: SettableFuture[Boolean], submitPending: AtomicBoolean, kubernetesClient: KubernetesClient, - driverLaunchSslOptions: SSLOptions, + driverSubmitSslOptions: SSLOptions, applicationSecrets: Array[Secret], driverKubernetesSelectors: java.util.Map[String, String]) extends Watcher[Pod] { override def eventReceived(action: Action, pod: Pod): Unit = { @@ -322,7 +315,7 @@ private[spark] class Client( .getContainerStatuses .asScala .find(status => - status.getName == DRIVER_LAUNCHER_CONTAINER_NAME && status.getReady) match { + status.getName == DRIVER_CONTAINER_NAME && status.getReady) match { case Some(_) => val ownerRefs = Seq(new OwnerReferenceBuilder() .withName(pod.getMetadata.getName) @@ -337,10 +330,10 @@ private[spark] class Client( kubernetesClient.secrets().createOrReplace(secret) }) - val driverLauncherServicePort = new ServicePortBuilder() - .withName(DRIVER_LAUNCHER_SERVICE_PORT_NAME) - .withPort(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) - .withNewTargetPort(DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT) + val driverSubmissionServicePort = new ServicePortBuilder() + .withName(SUBMISSION_SERVER_PORT_NAME) + .withPort(SUBMISSION_SERVER_PORT) + .withNewTargetPort(SUBMISSION_SERVER_PORT) .build() val service = kubernetesClient.services().createNew() .withNewMetadata() @@ -351,20 +344,25 @@ private[spark] class Client( .withNewSpec() .withType("NodePort") .withSelector(driverKubernetesSelectors) - .withPorts(driverLauncherServicePort) + .withPorts(driverSubmissionServicePort) .endSpec() .done() try { - sparkConf.set("spark.kubernetes.driver.service.name", - service.getMetadata.getName) - sparkConf.set("spark.kubernetes.driver.pod.name", kubernetesAppId) + sparkConf.getOption("spark.app.id").foreach { id => + logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + + s" overridden as $kubernetesAppId") + } + sparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, service.getMetadata.getName) + sparkConf.set("spark.app.id", kubernetesAppId) + sparkConf.setIfMissing("spark.app.name", appName) sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) sparkConf.setIfMissing("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT.toString) - val driverLauncher = buildDriverLauncherClient(kubernetesClient, service, - driverLaunchSslOptions) + val driverSubmitter = buildDriverSubmissionClient(kubernetesClient, service, + driverSubmitSslOptions) val ping = Retry.retry(5, 5.seconds) { - driverLauncher.ping() + driverSubmitter.ping() } ping onFailure { case t: Throwable => @@ -375,7 +373,7 @@ private[spark] class Client( Future { sparkConf.set("spark.driver.host", pod.getStatus.getPodIP) val submitRequest = buildSubmissionRequest() - driverLauncher.create(submitRequest) + driverSubmitter.submitApplication(submitRequest) } } submitComplete onFailure { @@ -436,17 +434,17 @@ private[spark] class Client( kubernetesClient.pods().withName(kubernetesAppId).get() } catch { case throwable: Throwable => - logError(s"Timed out while waiting $driverLaunchTimeoutSecs seconds for the" + + logError(s"Timed out while waiting $driverSubmitTimeoutSecs seconds for the" + " driver pod to start, but an error occurred while fetching the driver" + " pod's details.", throwable) - throw new SparkException(s"Timed out while waiting $driverLaunchTimeoutSecs" + + throw new SparkException(s"Timed out while waiting $driverSubmitTimeoutSecs" + " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + " the latest state of the pod, another error was thrown. Check the logs for" + " the error that was thrown in looking up the driver pod.", e) } val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + - s" $driverLaunchTimeoutSecs seconds." + s" $driverSubmitTimeoutSecs seconds." val podStatusPhase = if (driverPod.getStatus.getPhase != null) { s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" } else { @@ -460,7 +458,7 @@ private[spark] class Client( val failedDriverContainerStatusString = driverPod.getStatus .getContainerStatuses .asScala - .find(_.getName == DRIVER_LAUNCHER_CONTAINER_NAME) + .find(_.getName == DRIVER_CONTAINER_NAME) .map(status => { val lastState = status.getState if (lastState.getRunning != null) { @@ -481,17 +479,21 @@ private[spark] class Client( "Driver container last state: Unknown" } }).getOrElse("The driver container wasn't found in the pod; expected to find" + - s" container with name $DRIVER_LAUNCHER_CONTAINER_NAME") + s" container with name $DRIVER_CONTAINER_NAME") s"$topLevelMessage\n" + s"$podStatusPhase\n" + s"$podStatusMessage\n\n$failedDriverContainerStatusString" } private def buildContainerPorts(): Seq[ContainerPort] = { - Seq(sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), - sparkConf.getInt("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT), - DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT, - uiPort).map(new ContainerPortBuilder().withContainerPort(_).build()) + Seq((DRIVER_PORT_NAME, sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT)), + (BLOCK_MANAGER_PORT_NAME, + sparkConf.getInt("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT)), + (SUBMISSION_SERVER_PORT_NAME, SUBMISSION_SERVER_PORT), + (UI_PORT_NAME, uiPort)).map(port => new ContainerPortBuilder() + .withName(port._1) + .withContainerPort(port._2) + .build()) } private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { @@ -526,22 +528,22 @@ private[spark] class Client( .map(CompressionUtils.createTarGzip(_)) } - private def buildDriverLauncherClient( + private def buildDriverSubmissionClient( kubernetesClient: KubernetesClient, service: Service, - driverLaunchSslOptions: SSLOptions): KubernetesSparkRestApi = { + driverSubmitSslOptions: SSLOptions): KubernetesSparkRestApi = { val servicePort = service .getSpec .getPorts .asScala - .filter(_.getName == DRIVER_LAUNCHER_SERVICE_PORT_NAME) + .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) .head .getNodePort // NodePort is exposed on every node, so just pick one of them. // TODO be resilient to node failures and try all of them val node = kubernetesClient.nodes.list.getItems.asScala.head val nodeAddress = node.getStatus.getAddresses.asScala.head.getAddress - val urlScheme = if (driverLaunchSslOptions.enabled) { + val urlScheme = if (driverSubmitSslOptions.enabled) { "https" } else { logWarning("Submitting application details, application secret, and local" + @@ -550,8 +552,8 @@ private[spark] class Client( "http" } val (trustManager, sslContext): (X509TrustManager, SSLContext) = - if (driverLaunchSslOptions.enabled) { - buildSslConnectionConfiguration(driverLaunchSslOptions) + if (driverSubmitSslOptions.enabled) { + buildSslConnectionConfiguration(driverSubmitSslOptions) } else { (null, SSLContext.getDefault) } @@ -562,18 +564,18 @@ private[spark] class Client( trustContext = trustManager) } - private def buildSslConnectionConfiguration(driverLaunchSslOptions: SSLOptions) = { - driverLaunchSslOptions.trustStore.map(trustStoreFile => { + private def buildSslConnectionConfiguration(driverSubmitSslOptions: SSLOptions) = { + driverSubmitSslOptions.trustStore.map(trustStoreFile => { val trustManagerFactory = TrustManagerFactory.getInstance( TrustManagerFactory.getDefaultAlgorithm) val trustStore = KeyStore.getInstance( - driverLaunchSslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType)) + driverSubmitSslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType)) if (!trustStoreFile.isFile) { throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + s" does not exist or is not a file.") } Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => - driverLaunchSslOptions.trustStorePassword match { + driverSubmitSslOptions.trustStorePassword match { case Some(password) => trustStore.load(trustStoreStream, password.toCharArray) case None => trustStore.load(trustStoreStream, null) @@ -587,44 +589,29 @@ private[spark] class Client( }).getOrElse((null, SSLContext.getDefault)) } - private def parseCustomLabels(labels: String): Map[String, String] = { - labels.split(",").map(_.trim).filterNot(_.isEmpty).map(label => { - label.split("=", 2).toSeq match { - case Seq(k, v) => - require(k != DRIVER_LAUNCHER_SELECTOR_LABEL, "Label with key" + - s" $DRIVER_LAUNCHER_SELECTOR_LABEL cannot be used in" + - " spark.kubernetes.driver.labels, as it is reserved for Spark's" + - " internal configuration.") - (k, v) - case _ => - throw new SparkException("Custom labels set by spark.kubernetes.driver.labels" + - " must be a comma-separated list of key-value pairs, with format =." + - s" Got label: $label. All labels: $labels") - } - }).toMap + private def parseCustomLabels(maybeLabels: Option[String]): Map[String, String] = { + maybeLabels.map(labels => { + labels.split(",").map(_.trim).filterNot(_.isEmpty).map(label => { + label.split("=", 2).toSeq match { + case Seq(k, v) => + require(k != SPARK_APP_ID_LABEL, "Label with key" + + s" $SPARK_APP_ID_LABEL cannot be used in" + + " spark.kubernetes.driver.labels, as it is reserved for Spark's" + + " internal configuration.") + (k, v) + case _ => + throw new SparkException("Custom labels set by spark.kubernetes.driver.labels" + + " must be a comma-separated list of key-value pairs, with format =." + + s" Got label: $label. All labels: $labels") + } + }).toMap + }).getOrElse(Map.empty[String, String]) } } private[spark] object Client extends Logging { - private val SUBMISSION_SERVER_SECRET_NAME = "spark-submission-server-secret" - private val SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" - private val SSL_KEYSTORE_PASSWORD_SECRET_NAME = "spark-submission-server-keystore-password" - private val SSL_KEY_PASSWORD_SECRET_NAME = "spark-submission-server-key-password" - private val DRIVER_LAUNCHER_SELECTOR_LABEL = "driver-launcher-selector" - private val DRIVER_LAUNCHER_SERVICE_INTERNAL_PORT = 7077 - private val DEFAULT_DRIVER_PORT = 7078 - private val DEFAULT_BLOCKMANAGER_PORT = 7079 - private val DEFAULT_UI_PORT = 4040 - private val UI_PORT_NAME = "spark-ui-port" - private val DRIVER_LAUNCHER_SERVICE_PORT_NAME = "driver-launcher-port" - private val DRIVER_PORT_NAME = "driver-port" - private val BLOCKMANAGER_PORT_NAME = "block-manager-port" - private val DRIVER_LAUNCHER_CONTAINER_NAME = "spark-kubernetes-driver-launcher" - private val SECURE_RANDOM = new SecureRandom() - private val SPARK_SUBMISSION_SECRET_BASE_DIR = "/var/run/secrets/spark-submission" - private val DEFAULT_LAUNCH_TIMEOUT_SECONDS = 60 - private val SPARK_APP_NAME_LABEL = "spark-app-name" + private[spark] val SECURE_RANDOM = new SecureRandom() def main(args: Array[String]): Unit = { require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala new file mode 100644 index 0000000000000..9b145370f87d6 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.util.concurrent.TimeUnit + +import org.apache.spark.{SPARK_VERSION => sparkVersion} +import org.apache.spark.internal.config.ConfigBuilder + +package object config { + + private[spark] val KUBERNETES_NAMESPACE = + ConfigBuilder("spark.kubernetes.namespace") + .doc(""" + | The namespace that will be used for running the driver and + | executor pods. When using spark-submit in cluster mode, + | this can also be passed to spark-submit via the + | --kubernetes-namespace command line argument. + """.stripMargin) + .stringConf + .createWithDefault("default") + + private[spark] val DRIVER_DOCKER_IMAGE = + ConfigBuilder("spark.kubernetes.driver.docker.image") + .doc(""" + | Docker image to use for the driver. Specify this using the + | standard Docker tag format. + """.stripMargin) + .stringConf + .createWithDefault(s"spark-driver:$sparkVersion") + + private[spark] val EXECUTOR_DOCKER_IMAGE = + ConfigBuilder("spark.kubernetes.executor.docker.image") + .doc(""" + | Docker image to use for the executors. Specify this using + | the standard Docker tag format. + """.stripMargin) + .stringConf + .createWithDefault(s"spark-executor:$sparkVersion") + + private[spark] val KUBERNETES_CA_CERT_FILE = + ConfigBuilder("spark.kubernetes.submit.caCertFile") + .doc(""" + | CA cert file for connecting to Kubernetes over SSL. This + | file should be located on the submitting machine's disk. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_CLIENT_KEY_FILE = + ConfigBuilder("spark.kubernetes.submit.clientKeyFile") + .doc(""" + | Client key file for authenticating against the Kubernetes + | API server. This file should be located on the submitting + | machine's disk. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_CLIENT_CERT_FILE = + ConfigBuilder("spark.kubernetes.submit.clientCertFile") + .doc(""" + | Client cert file for authenticating against the + | Kubernetes API server. This file should be located on + | the submitting machine's disk. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_SERVICE_ACCOUNT_NAME = + ConfigBuilder("spark.kubernetes.submit.serviceAccountName") + .doc(""" + | Service account that is used when running the driver pod. + | The driver pod uses this service account when requesting + | executor pods from the API server. + """.stripMargin) + .stringConf + .createWithDefault("default") + + private[spark] val KUBERNETES_DRIVER_UPLOAD_JARS = + ConfigBuilder("spark.kubernetes.driver.uploads.jars") + .doc(""" + | Comma-separated list of jars to sent to the driver and + | all executors when submitting the application in cluster + | mode. + """.stripMargin) + .stringConf + .createOptional + + // Note that while we set a default for this when we start up the + // scheduler, the specific default value is dynamically determined + // based on the executor memory. + private[spark] val KUBERNETES_EXECUTOR_MEMORY_OVERHEAD = + ConfigBuilder("spark.kubernetes.executor.memoryOverhead") + .doc(""" + | The amount of off-heap memory (in megabytes) to be + | allocated per executor. This is memory that accounts for + | things like VM overheads, interned strings, other native + | overheads, etc. This tends to grow with the executor size + | (typically 6-10%). + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_LABELS = + ConfigBuilder("spark.kubernetes.driver.labels") + .doc(""" + | Custom labels that will be added to the driver pod. + | This should be a comma-separated list of label key-value + | pairs, where each label is in the format key=value. Note + | that Spark also adds its own labels to the driver pod + | for bookkeeping purposes. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_SUBMIT_TIMEOUT = + ConfigBuilder("spark.kubernetes.driverSubmitTimeout") + .doc(""" + | Time to wait for the driver process to start running + | before aborting its execution. + """.stripMargin) + .timeConf(TimeUnit.SECONDS) + .createWithDefault(60L) + + private[spark] val KUBERNETES_DRIVER_SUBMIT_KEYSTORE = + ConfigBuilder("spark.ssl.kubernetes.submit.keyStore") + .doc(""" + | KeyStore file for the driver submission server listening + | on SSL. Can be pre-mounted on the driver container + | or uploaded from the submitting client. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE = + ConfigBuilder("spark.ssl.kubernetes.submit.trustStore") + .doc(""" + | TrustStore containing certificates for communicating + | to the driver submission server over SSL. + """.stripMargin) + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_SERVICE_NAME = + ConfigBuilder("spark.kubernetes.driver.service.name") + .doc(""" + | Kubernetes service that exposes the driver pod + | for external access. + """.stripMargin) + .internal() + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_POD_NAME = + ConfigBuilder("spark.kubernetes.driver.pod.name") + .doc(""" + | Name of the driver pod. + """.stripMargin) + .internal() + .stringConf + .createOptional +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala new file mode 100644 index 0000000000000..027cc3c022b4e --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +package object constants { + // Labels + private[spark] val SPARK_DRIVER_LABEL = "spark-driver" + private[spark] val SPARK_APP_ID_LABEL = "spark-app-id" + private[spark] val SPARK_APP_NAME_LABEL = "spark-app-name" + private[spark] val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id" + + // Secrets + private[spark] val DRIVER_CONTAINER_SECRETS_BASE_DIR = "/var/run/secrets/spark-submission" + private[spark] val SUBMISSION_APP_SECRET_NAME = "spark-submission-server-secret" + private[spark] val SUBMISSION_APP_SECRET_PREFIX = "spark-submission-server-secret" + private[spark] val SUBMISSION_APP_SECRET_VOLUME_NAME = "spark-submission-secret-volume" + private[spark] val SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME = + "spark-submission-server-key-password" + private[spark] val SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME = + "spark-submission-server-keystore-password" + private[spark] val SUBMISSION_SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" + private[spark] val SUBMISSION_SSL_SECRETS_PREFIX = "spark-submission-server-ssl" + private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" + + // Default and fixed ports + private[spark] val SUBMISSION_SERVER_PORT = 7077 + private[spark] val DEFAULT_DRIVER_PORT = 7078 + private[spark] val DEFAULT_BLOCKMANAGER_PORT = 7079 + private[spark] val DEFAULT_UI_PORT = 4040 + private[spark] val UI_PORT_NAME = "spark-ui-port" + private[spark] val SUBMISSION_SERVER_PORT_NAME = "submit-server" + private[spark] val BLOCK_MANAGER_PORT_NAME = "blockmanager" + private[spark] val DRIVER_PORT_NAME = "driver" + private[spark] val EXECUTOR_PORT_NAME = "executor" + + // Environment Variables + private[spark] val ENV_SUBMISSION_SECRET_LOCATION = "SPARK_SUBMISSION_SECRET_LOCATION" + private[spark] val ENV_SUBMISSION_SERVER_PORT = "SPARK_SUBMISSION_SERVER_PORT" + private[spark] val ENV_SUBMISSION_KEYSTORE_FILE = "SPARK_SUBMISSION_KEYSTORE_FILE" + private[spark] val ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE = + "SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE" + private[spark] val ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE = + "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" + private[spark] val ENV_SUBMISSION_KEYSTORE_TYPE = "SPARK_SUBMISSION_KEYSTORE_TYPE" + private[spark] val ENV_SUBMISSION_USE_SSL = "SPARK_SUBMISSION_USE_SSL" + private[spark] val ENV_EXECUTOR_PORT = "SPARK_EXECUTOR_PORT" + private[spark] val ENV_DRIVER_URL = "SPARK_DRIVER_URL" + private[spark] val ENV_EXECUTOR_CORES = "SPARK_EXECUTOR_CORES" + private[spark] val ENV_EXECUTOR_MEMORY = "SPARK_EXECUTOR_MEMORY" + private[spark] val ENV_APPLICATION_ID = "SPARK_APPLICATION_ID" + private[spark] val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID" + + // Miscellaneous + private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" + private[spark] val KUBERNETES_SUBMIT_SSL_NAMESPACE = "kubernetes.submit" +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 813d070e0f876..8beba23bc8e11 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -20,23 +20,22 @@ import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} import org.apache.spark.SPARK_VERSION -// TODO: jars should probably be compressed. Shipping tarballs would be optimal. case class KubernetesCreateSubmissionRequest( - val appResource: AppResource, - val mainClass: String, - val appArgs: Array[String], - val sparkProperties: Map[String, String], - val secret: String, - val uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { + appResource: AppResource, + mainClass: String, + appArgs: Array[String], + sparkProperties: Map[String, String], + secret: String, + uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION } case class TarGzippedData( - val dataBase64: String, - val blockSize: Int = 10240, - val recordSize: Int = 512, - val encoding: String + dataBase64: String, + blockSize: Int = 10240, + recordSize: Int = 512, + encoding: String ) @JsonTypeInfo( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala index 3cbcb16293b1d..18eb9b7a12ca6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala @@ -28,12 +28,11 @@ trait KubernetesSparkRestApi { @Consumes(Array(MediaType.APPLICATION_JSON)) @Produces(Array(MediaType.APPLICATION_JSON)) @Path("/create") - def create(request: KubernetesCreateSubmissionRequest): CreateSubmissionResponse + def submitApplication(request: KubernetesCreateSubmissionRequest): CreateSubmissionResponse @GET @Consumes(Array(MediaType.APPLICATION_JSON)) @Produces(Array(MediaType.APPLICATION_JSON)) @Path("/ping") def ping(): PingResponse - } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index dae4b2714b4e4..550ddd113fa42 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -21,17 +21,18 @@ import java.util.concurrent.Executors import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} import com.google.common.util.concurrent.ThreadFactoryBuilder -import io.fabric8.kubernetes.api.model.{ContainerPort, ContainerPortBuilder, EnvVar, EnvVarBuilder, Pod, QuantityBuilder} +import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, Pod, QuantityBuilder} import scala.collection.JavaConverters._ -import scala.collection.mutable.ArrayBuffer import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkException} import org.apache.spark.deploy.kubernetes.{Client, KubernetesClientBuilder} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.rpc.RpcEndpointAddress import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend -import org.apache.spark.util.Utils +import org.apache.spark.util.{ThreadUtils, Utils} private[spark] class KubernetesClusterSchedulerBackend( scheduler: TaskSchedulerImpl, @@ -44,24 +45,19 @@ private[spark] class KubernetesClusterSchedulerBackend( private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] private val kubernetesMaster = Client.resolveK8sMaster(sc.master) - - private val executorDockerImage = conf - .get("spark.kubernetes.executor.docker.image", s"spark-executor:${sc.version}") - - private val kubernetesNamespace = conf.get("spark.kubernetes.namespace", "default") - + private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) + private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) - private val blockmanagerPort = conf .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT) private val kubernetesDriverServiceName = conf - .getOption("spark.kubernetes.driver.service.name") + .get(KUBERNETES_DRIVER_SERVICE_NAME) .getOrElse( throw new SparkException("Must specify the service name the driver is running with")) private val kubernetesDriverPodName = conf - .getOption("spark.kubernetes.driver.pod.name") + .get(KUBERNETES_DRIVER_POD_NAME) .getOrElse( throw new SparkException("Must specify the driver pod name")) @@ -69,7 +65,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorMemoryBytes = Utils.byteStringAsBytes(executorMemory) private val memoryOverheadBytes = conf - .getOption("spark.kubernetes.executor.memoryOverhead") + .get(KUBERNETES_EXECUTOR_MEMORY_OVERHEAD) .map(overhead => Utils.byteStringAsBytes(overhead)) .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * executorMemoryBytes).toInt, MEMORY_OVERHEAD_MIN)) @@ -78,16 +74,12 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorCores = conf.getOption("spark.executor.cores").getOrElse("1") private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( - Executors.newCachedThreadPool( - new ThreadFactoryBuilder() - .setDaemon(true) - .setNameFormat("kubernetes-executor-requests-%d") - .build)) + ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) private val kubernetesClient = KubernetesClientBuilder .buildFromWithinPod(kubernetesMaster, kubernetesNamespace) - val driverPod = try { + private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). withName(kubernetesDriverPodName).get() } catch { @@ -127,6 +119,8 @@ private[spark] class KubernetesClusterSchedulerBackend( } } + override def applicationId(): String = conf.get("spark.app.id", super.applicationId()) + override def sufficientResourcesRegistered(): Boolean = { totalRegisteredExecutors.get() >= initialExecutors * minRegisteredRatio } @@ -163,9 +157,9 @@ private[spark] class KubernetesClusterSchedulerBackend( private def allocateNewExecutorPod(): (String, Pod) = { val executorKubernetesId = UUID.randomUUID().toString.replaceAll("-", "") val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString - val name = s"$kubernetesDriverServiceName-exec-$executorKubernetesId" - val selectors = Map(SPARK_EXECUTOR_SELECTOR -> executorId, - SPARK_APP_SELECTOR -> applicationId()).asJava + val name = s"${applicationId()}-exec-$executorKubernetesId" + val selectors = Map(SPARK_EXECUTOR_ID_LABEL -> executorId, + SPARK_APP_ID_LABEL -> applicationId()).asJava val executorMemoryQuantity = new QuantityBuilder(false) .withAmount(executorMemoryBytes.toString) .build() @@ -175,69 +169,61 @@ private[spark] class KubernetesClusterSchedulerBackend( val executorCpuQuantity = new QuantityBuilder(false) .withAmount(executorCores) .build() - val requiredEnv = new ArrayBuffer[EnvVar] - requiredEnv += new EnvVarBuilder() - .withName("SPARK_EXECUTOR_PORT") - .withValue(executorPort.toString) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_DRIVER_URL") - .withValue(driverUrl) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_EXECUTOR_CORES") - .withValue(executorCores) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_EXECUTOR_MEMORY") - .withValue(executorMemory) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_APPLICATION_ID") - .withValue(applicationId()) - .build() - requiredEnv += new EnvVarBuilder() - .withName("SPARK_EXECUTOR_ID") - .withValue(executorId) - .build() - val requiredPorts = new ArrayBuffer[ContainerPort] - requiredPorts += new ContainerPortBuilder() - .withName(EXECUTOR_PORT_NAME) - .withContainerPort(executorPort) - .build() - requiredPorts += new ContainerPortBuilder() - .withName(BLOCK_MANAGER_PORT_NAME) - .withContainerPort(blockmanagerPort) - .build() - (executorKubernetesId, kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(name) - .withLabels(selectors) - .withOwnerReferences() - .addNewOwnerReference() - .withController(true) - .withApiVersion(driverPod.getApiVersion) - .withKind(driverPod.getKind) - .withName(driverPod.getMetadata.getName) - .withUid(driverPod.getMetadata.getUid) - .endOwnerReference() - .endMetadata() - .withNewSpec() - .addNewContainer() - .withName(s"exec-${applicationId()}-container") - .withImage(executorDockerImage) - .withImagePullPolicy("IfNotPresent") - .withNewResources() - .addToRequests("memory", executorMemoryQuantity) - .addToLimits("memory", executorMemoryLimitQuantity) - .addToRequests("cpu", executorCpuQuantity) - .addToLimits("cpu", executorCpuQuantity) - .endResources() - .withEnv(requiredEnv.asJava) - .withPorts(requiredPorts.asJava) - .endContainer() - .endSpec() - .done()) + val requiredEnv = Seq( + (ENV_EXECUTOR_PORT, executorPort.toString), + (ENV_DRIVER_URL, driverUrl), + (ENV_EXECUTOR_CORES, executorCores), + (ENV_EXECUTOR_MEMORY, executorMemory), + (ENV_APPLICATION_ID, applicationId()), + (ENV_EXECUTOR_ID, executorId) + ).map(env => new EnvVarBuilder() + .withName(env._1) + .withValue(env._2) + .build()) + val requiredPorts = Seq( + (EXECUTOR_PORT_NAME, executorPort), + (BLOCK_MANAGER_PORT_NAME, blockmanagerPort)) + .map(port => { + new ContainerPortBuilder() + .withName(port._1) + .withContainerPort(port._2) + .build() + }) + try { + (executorKubernetesId, kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(name) + .withLabels(selectors) + .withOwnerReferences() + .addNewOwnerReference() + .withController(true) + .withApiVersion(driverPod.getApiVersion) + .withKind(driverPod.getKind) + .withName(driverPod.getMetadata.getName) + .withUid(driverPod.getMetadata.getUid) + .endOwnerReference() + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName(s"executor") + .withImage(executorDockerImage) + .withImagePullPolicy("IfNotPresent") + .withNewResources() + .addToRequests("memory", executorMemoryQuantity) + .addToLimits("memory", executorMemoryLimitQuantity) + .addToRequests("cpu", executorCpuQuantity) + .addToLimits("cpu", executorCpuQuantity) + .endResources() + .withEnv(requiredEnv.asJava) + .withPorts(requiredPorts.asJava) + .endContainer() + .endSpec() + .done()) + } catch { + case throwable: Throwable => + logError("Failed to allocate executor pod.", throwable) + throw throwable + } } override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { @@ -269,13 +255,7 @@ private[spark] class KubernetesClusterSchedulerBackend( } private object KubernetesClusterSchedulerBackend { - private val SPARK_EXECUTOR_SELECTOR = "spark-exec" - private val SPARK_APP_SELECTOR = "spark-app" private val DEFAULT_STATIC_PORT = 10000 - private val DEFAULT_BLOCKMANAGER_PORT = 7079 - private val DEFAULT_DRIVER_PORT = 7078 - private val BLOCK_MANAGER_PORT_NAME = "blockmanager" - private val EXECUTOR_PORT_NAME = "executor" private val MEMORY_OVERHEAD_FACTOR = 0.10 private val MEMORY_OVERHEAD_MIN = 384L private val EXECUTOR_ID_COUNTER = new AtomicLong(0L) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 070008fce7410..92fdfb8ac5f41 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -27,6 +27,6 @@ CMD SSL_ARGS="" && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer \ --hostname $HOSTNAME \ - --port $SPARK_DRIVER_LAUNCHER_SERVER_PORT \ + --port $SPARK_SUBMISSION_SERVER_PORT \ --secret-file $SPARK_SUBMISSION_SECRET_LOCATION \ ${SSL_ARGS} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 13edea02dce9a..16de71118dec4 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -172,7 +172,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") .set("spark.executors.instances", "1") - .set("spark.app.id", "spark-pi") + .set("spark.app.name", "spark-pi") .set("spark.ui.enabled", "true") .set("spark.testing", "false") val mainAppResource = s"file://$EXAMPLES_JAR" @@ -298,11 +298,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .getLabels // We can't match all of the selectors directly since one of the selectors is based on the // launch time. - assert(driverPodLabels.size == 4, "Unexpected number of pod labels.") - assert(driverPodLabels.containsKey("driver-launcher-selector"), "Expected driver launcher" + - " selector label to be present.") + assert(driverPodLabels.size == 5, "Unexpected number of pod labels.") assert(driverPodLabels.get("spark-app-name") == "spark-pi", "Unexpected value for" + " spark-app-name label.") + assert(driverPodLabels.get("spark-app-id").startsWith("spark-pi"), "Unexpected value for" + + " spark-app-id label (should be prefixed with the app name).") assert(driverPodLabels.get("label1") == "label1value", "Unexpected value for label1") assert(driverPodLabels.get("label2") == "label2value", "Unexpected value for label2") } @@ -323,12 +323,12 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", - "--conf", "spark.ssl.kubernetes.driverlaunch.enabled=true", - "--conf", "spark.ssl.kubernetes.driverlaunch.keyStore=" + + "--conf", "spark.ssl.kubernetes.submit.enabled=true", + "--conf", "spark.ssl.kubernetes.submit.keyStore=" + s"file://${keyStoreFile.getAbsolutePath}", - "--conf", "spark.ssl.kubernetes.driverlaunch.keyStorePassword=changeit", - "--conf", "spark.ssl.kubernetes.driverlaunch.keyPassword=changeit", - "--conf", "spark.ssl.kubernetes.driverlaunch.trustStore=" + + "--conf", "spark.ssl.kubernetes.submit.keyStorePassword=changeit", + "--conf", "spark.ssl.kubernetes.submit.keyPassword=changeit", + "--conf", "spark.ssl.kubernetes.submit.trustStore=" + s"file://${trustStoreFile.getAbsolutePath}", "--conf", s"spark.ssl.kubernetes.driverlaunch.trustStorePassword=changeit", EXAMPLES_JAR) From 261a624782cd153c43888bc0c5712860604ede3f Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 2 Feb 2017 10:58:15 -0800 Subject: [PATCH 037/225] Retry the submit-application request to multiple nodes (#69) * Retry the submit-application request to multiple nodes. * Fix doc style comment * Check node unschedulable, log retry failures --- .../spark/deploy/kubernetes/Client.scala | 27 ++++---- .../spark/deploy/kubernetes/Retry.scala | 28 +++++--- .../rest/kubernetes/HttpClientUtil.scala | 21 ++++-- .../kubernetes/MultiServerFeignTarget.scala | 67 +++++++++++++++++++ .../integrationtest/minikube/Minikube.scala | 2 +- 5 files changed, 117 insertions(+), 28 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index fed9334dbbab4..715df54e573c3 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -361,11 +361,13 @@ private[spark] class Client( DEFAULT_BLOCKMANAGER_PORT.toString) val driverSubmitter = buildDriverSubmissionClient(kubernetesClient, service, driverSubmitSslOptions) - val ping = Retry.retry(5, 5.seconds) { + val ping = Retry.retry(5, 5.seconds, + Some("Failed to contact the driver server")) { driverSubmitter.ping() } ping onFailure { case t: Throwable => + logError("Ping failed to the driver server", t) submitCompletedFuture.setException(t) kubernetesClient.services().delete(service) } @@ -532,17 +534,6 @@ private[spark] class Client( kubernetesClient: KubernetesClient, service: Service, driverSubmitSslOptions: SSLOptions): KubernetesSparkRestApi = { - val servicePort = service - .getSpec - .getPorts - .asScala - .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) - .head - .getNodePort - // NodePort is exposed on every node, so just pick one of them. - // TODO be resilient to node failures and try all of them - val node = kubernetesClient.nodes.list.getItems.asScala.head - val nodeAddress = node.getStatus.getAddresses.asScala.head.getAddress val urlScheme = if (driverSubmitSslOptions.enabled) { "https" } else { @@ -551,15 +542,23 @@ private[spark] class Client( " to secure this step.") "http" } + val servicePort = service.getSpec.getPorts.asScala + .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) + .head.getNodePort + val nodeUrls = kubernetesClient.nodes.list.getItems.asScala + .filterNot(_.getSpec.getUnschedulable) + .flatMap(_.getStatus.getAddresses.asScala.map(address => { + s"$urlScheme://${address.getAddress}:$servicePort" + })).toArray + require(nodeUrls.nonEmpty, "No nodes found to contact the driver!") val (trustManager, sslContext): (X509TrustManager, SSLContext) = if (driverSubmitSslOptions.enabled) { buildSslConnectionConfiguration(driverSubmitSslOptions) } else { (null, SSLContext.getDefault) } - val url = s"$urlScheme://$nodeAddress:$servicePort" HttpClientUtil.createClient[KubernetesSparkRestApi]( - url, + uris = nodeUrls, sslSocketFactory = sslContext.getSocketFactory, trustContext = trustManager) } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala index e5ce0bcd606b2..378583b29c547 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala @@ -19,24 +19,36 @@ package org.apache.spark.deploy.kubernetes import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.Duration -private[spark] object Retry { +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging + +private[spark] object Retry extends Logging { private def retryableFuture[T] - (times: Int, interval: Duration) + (attempt: Int, maxAttempts: Int, interval: Duration, retryMessage: Option[String]) (f: => Future[T]) (implicit executionContext: ExecutionContext): Future[T] = { f recoverWith { - case _ if times > 0 => { - Thread.sleep(interval.toMillis) - retryableFuture(times - 1, interval)(f) - } + case error: Throwable => + if (attempt <= maxAttempts) { + retryMessage.foreach { message => + logWarning(s"$message - attempt $attempt of $maxAttempts", error) + } + Thread.sleep(interval.toMillis) + retryableFuture(attempt + 1, maxAttempts, interval, retryMessage)(f) + } else { + Future.failed(retryMessage.map(message => + new SparkException(s"$message - reached $maxAttempts attempts," + + s" and aborting task.", error) + ).getOrElse(error)) + } } } def retry[T] - (times: Int, interval: Duration) + (times: Int, interval: Duration, retryMessage: Option[String] = None) (f: => T) (implicit executionContext: ExecutionContext): Future[T] = { - retryableFuture(times, interval)(Future[T] { f }) + retryableFuture(1, times, interval, retryMessage)(Future[T] { f }) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala index eb7d411700829..1cabfbad656eb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala @@ -20,7 +20,7 @@ import javax.net.ssl.{SSLContext, SSLSocketFactory, X509TrustManager} import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} import com.fasterxml.jackson.module.scala.DefaultScalaModule -import feign.Feign +import feign.{Client, Feign, Request, Response} import feign.Request.Options import feign.jackson.{JacksonDecoder, JacksonEncoder} import feign.jaxrs.JAXRSContract @@ -32,7 +32,7 @@ import org.apache.spark.status.api.v1.JacksonMessageWriter private[spark] object HttpClientUtil { def createClient[T: ClassTag]( - uri: String, + uris: Array[String], sslSocketFactory: SSLSocketFactory = SSLContext.getDefault.getSocketFactory, trustContext: X509TrustManager = null, readTimeoutMillis: Int = 20000, @@ -45,13 +45,24 @@ private[spark] object HttpClientUtil { .registerModule(new DefaultScalaModule) .setDateFormat(JacksonMessageWriter.makeISODateFormat) objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - val clazz = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] + val target = new MultiServerFeignTarget[T](uris) + val baseHttpClient = new feign.okhttp.OkHttpClient(httpClientBuilder.build()) + val resetTargetHttpClient = new Client { + override def execute(request: Request, options: Options): Response = { + val response = baseHttpClient.execute(request, options) + if (response.status() >= 200 && response.status() < 300) { + target.reset() + } + response + } + } Feign.builder() - .client(new feign.okhttp.OkHttpClient(httpClientBuilder.build())) + .client(resetTargetHttpClient) .contract(new JAXRSContract) .encoder(new JacksonEncoder(objectMapper)) .decoder(new JacksonDecoder(objectMapper)) .options(new Options(connectTimeoutMillis, readTimeoutMillis)) - .target(clazz, uri) + .retryer(target) + .target(target) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala new file mode 100644 index 0000000000000..fea7f057cfa1b --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala @@ -0,0 +1,67 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import feign.{Request, RequestTemplate, RetryableException, Retryer, Target} +import scala.reflect.ClassTag +import scala.util.Random + +private[kubernetes] class MultiServerFeignTarget[T : ClassTag]( + private val servers: Seq[String]) extends Target[T] with Retryer { + require(servers.nonEmpty, "Must provide at least one server URI.") + + private val threadLocalShuffledServers = new ThreadLocal[Seq[String]] { + override def initialValue(): Seq[String] = Random.shuffle(servers) + } + + override def `type`(): Class[T] = { + implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] + } + + override def url(): String = threadLocalShuffledServers.get.head + + /** + * Cloning the target is done on every request, for use on the current + * thread - thus it's important that clone returns a "fresh" target. + */ + override def clone(): Retryer = { + reset() + this + } + + override def name(): String = { + s"${getClass.getSimpleName} with servers [${servers.mkString(",")}]" + } + + override def apply(requestTemplate: RequestTemplate): Request = { + if (!requestTemplate.url().startsWith("http")) { + requestTemplate.insert(0, url()) + } + requestTemplate.request() + } + + override def continueOrPropagate(e: RetryableException): Unit = { + threadLocalShuffledServers.set(threadLocalShuffledServers.get.drop(1)) + if (threadLocalShuffledServers.get.isEmpty) { + throw e + } + } + + def reset(): Unit = { + threadLocalShuffledServers.set(Random.shuffle(servers)) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index 60c6564579a6e..b42f97952394e 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -123,7 +123,7 @@ private[spark] object Minikube extends Logging { .build() val sslContext = SSLUtils.sslContext(kubernetesConf) val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] - HttpClientUtil.createClient[T](url, sslContext.getSocketFactory, trustManager) + HttpClientUtil.createClient[T](Array(url), sslContext.getSocketFactory, trustManager) } def executeMinikubeSsh(command: String): Unit = { From ab731f154d0eced9cc4033784c5de92e0eafaa2e Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 2 Feb 2017 12:22:54 -0800 Subject: [PATCH 038/225] Allow adding arbitrary files (#71) * Allow adding arbitrary files * Address comments and add documentation --- .../org/apache/spark/deploy/SparkSubmit.scala | 2 + .../spark/deploy/SparkSubmitArguments.scala | 7 ++ docs/running-on-kubernetes.md | 12 ++- .../launcher/SparkSubmitOptionParser.java | 4 +- .../spark/deploy/kubernetes/Client.scala | 34 ++++++-- .../spark/deploy/kubernetes/config.scala | 16 +++- .../rest/KubernetesRestProtocolMessages.scala | 3 +- .../rest/kubernetes/CompressionUtils.scala | 4 +- .../KubernetesSparkRestServer.scala | 53 ++++++++---- .../jobs/FileExistenceTest.scala | 54 ++++++++++++ .../integrationtest/KubernetesSuite.scala | 85 +++++++++++++++++-- .../integration-tests/test-data/input.txt | 1 + 12 files changed, 243 insertions(+), 32 deletions(-) create mode 100644 resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala create mode 100644 resource-managers/kubernetes/integration-tests/test-data/input.txt diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index e3feaf3974777..6ec90e7819aee 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -485,6 +485,8 @@ object SparkSubmit extends CommandLineUtils { sysProp = "spark.kubernetes.namespace"), OptionAssigner(args.kubernetesUploadJars, KUBERNETES, CLUSTER, sysProp = "spark.kubernetes.driver.uploads.jars"), + OptionAssigner(args.kubernetesUploadFiles, KUBERNETES, CLUSTER, + sysProp = "spark.kubernetes.driver.uploads.files"), // Other options OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES, diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 204a2cb37016c..2b5e8baa7f611 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -74,6 +74,7 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S // Kubernetes only var kubernetesNamespace: String = null var kubernetesUploadJars: String = null + var kubernetesUploadFiles: String = null // Standalone cluster mode only var supervise: Boolean = false @@ -203,6 +204,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S kubernetesUploadJars = Option(kubernetesUploadJars) .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.jars")) .orNull + kubernetesUploadFiles = Option(kubernetesUploadFiles) + .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.files")) + .orNull // Try to set main class from JAR if no --class argument is given if (mainClass == null && !isPython && !isR && primaryResource != null) { @@ -447,6 +451,9 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S case KUBERNETES_UPLOAD_JARS => kubernetesUploadJars = value + case KUBERNETES_UPLOAD_FILES => + kubernetesUploadFiles = value + case HELP => printUsageAndExit(0) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index e256535fbbc9d..5a48bb254a6df 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -217,10 +217,20 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.driver.uploads.jars (none) - Comma-separated list of jars to sent to the driver and all executors when submitting the application in cluster + Comma-separated list of jars to send to the driver and all executors when submitting the application in cluster mode. Refer to adding other jars for more information. + + spark.kubernetes.driver.uploads.files + (none) + + Comma-separated list of files to send to the driver and all executors when submitting the application in cluster + mode. The files are added in a flat hierarchy to the current working directory of the driver, having the same + names as the names of the original files. Note that two files with the same name cannot be added, even if they + were in different source directories on the client disk. + + spark.kubernetes.executor.memoryOverhead executorMemory * 0.10, with minimum of 384 diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java index f1dac20f52f0d..3369b5d8301be 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java @@ -80,6 +80,7 @@ class SparkSubmitOptionParser { protected final String KUBERNETES_MASTER = "--kubernetes-master"; protected final String KUBERNETES_NAMESPACE = "--kubernetes-namespace"; protected final String KUBERNETES_UPLOAD_JARS = "--upload-jars"; + protected final String KUBERNETES_UPLOAD_FILES = "--upload-files"; /** * This is the canonical list of spark-submit options. Each entry in the array contains the @@ -122,7 +123,8 @@ class SparkSubmitOptionParser { { TOTAL_EXECUTOR_CORES }, { KUBERNETES_MASTER }, { KUBERNETES_NAMESPACE }, - { KUBERNETES_UPLOAD_JARS } + { KUBERNETES_UPLOAD_JARS }, + { KUBERNETES_UPLOAD_FILES } }; /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 715df54e573c3..c350c4817664d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -61,7 +61,9 @@ private[spark] class Client( private val sslSecretsDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId-ssl" private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val uploadedJars = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_JARS) + private val uploadedJars = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_JARS).filter(_.nonEmpty) + private val uploadedFiles = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_FILES).filter(_.nonEmpty) + uploadedFiles.foreach(validateNoDuplicateUploadFileNames) private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) @@ -513,18 +515,40 @@ private[spark] class Client( case "container" => ContainerAppResource(appResourceUri.getPath) case other => RemoteAppResource(other) } - - val uploadJarsBase64Contents = compressJars(uploadedJars) + val uploadJarsBase64Contents = compressFiles(uploadedJars) + val uploadFilesBase64Contents = compressFiles(uploadedFiles) KubernetesCreateSubmissionRequest( appResource = resolvedAppResource, mainClass = mainClass, appArgs = appArgs, secret = secretBase64String, sparkProperties = sparkConf.getAll.toMap, - uploadedJarsBase64Contents = uploadJarsBase64Contents) + uploadedJarsBase64Contents = uploadJarsBase64Contents, + uploadedFilesBase64Contents = uploadFilesBase64Contents) + } + + // Because uploaded files should be added to the working directory of the driver, they + // need to not have duplicate file names. They are added to the working directory so the + // user can reliably locate them in their application. This is similar in principle to how + // YARN handles its `spark.files` setting. + private def validateNoDuplicateUploadFileNames(uploadedFilesCommaSeparated: String): Unit = { + val pathsWithDuplicateNames = uploadedFilesCommaSeparated + .split(",") + .groupBy(new File(_).getName) + .filter(_._2.length > 1) + if (pathsWithDuplicateNames.nonEmpty) { + val pathsWithDuplicateNamesSorted = pathsWithDuplicateNames + .values + .flatten + .toList + .sortBy(new File(_).getName) + throw new SparkException("Cannot upload files with duplicate names via" + + s" ${KUBERNETES_DRIVER_UPLOAD_FILES.key}. The following paths have a duplicated" + + s" file name: ${pathsWithDuplicateNamesSorted.mkString(",")}") + } } - private def compressJars(maybeFilePaths: Option[String]): Option[TarGzippedData] = { + private def compressFiles(maybeFilePaths: Option[String]): Option[TarGzippedData] = { maybeFilePaths .map(_.split(",")) .map(CompressionUtils.createTarGzip(_)) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 9b145370f87d6..3e0c400febca1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -94,13 +94,27 @@ package object config { private[spark] val KUBERNETES_DRIVER_UPLOAD_JARS = ConfigBuilder("spark.kubernetes.driver.uploads.jars") .doc(""" - | Comma-separated list of jars to sent to the driver and + | Comma-separated list of jars to send to the driver and | all executors when submitting the application in cluster | mode. """.stripMargin) .stringConf .createOptional + private[spark] val KUBERNETES_DRIVER_UPLOAD_FILES = + ConfigBuilder("spark.kubernetes.driver.uploads.files") + .doc(""" + | Comma-separated list of files to send to the driver and + | all executors when submitting the application in cluster + | mode. The files are added in a flat hierarchy to the + | current working directory of the driver, having the same + | names as the names of the original files. Note that two + | files with the same name cannot be added, even if they + | were in different source directories on the client disk. + """.stripMargin) + .stringConf + .createOptional + // Note that while we set a default for this when we start up the // scheduler, the specific default value is dynamically determined // based on the executor memory. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 8beba23bc8e11..6aeb851a16bf4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -26,7 +26,8 @@ case class KubernetesCreateSubmissionRequest( appArgs: Array[String], sparkProperties: Map[String, String], secret: String, - uploadedJarsBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { + uploadedJarsBase64Contents: Option[TarGzippedData], + uploadedFilesBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala index 1c95dacc7eb01..7204cb874aaec 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala @@ -68,8 +68,8 @@ private[spark] object CompressionUtils extends Logging { while (usedFileNames.contains(resolvedFileName)) { val oldResolvedFileName = resolvedFileName resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" - logWarning(s"File with name $oldResolvedFileName already exists. Trying to add with" + - s" file name $resolvedFileName instead.") + logWarning(s"File with name $oldResolvedFileName already exists. Trying to add" + + s" with file name $resolvedFileName instead.") deduplicationCounter += 1 } usedFileNames += resolvedFileName diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 451dc96dd65ed..c5a7e27b15927 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -18,6 +18,7 @@ package org.apache.spark.deploy.rest.kubernetes import java.io.File import java.net.URI +import java.nio.file.Paths import java.util.concurrent.CountDownLatch import javax.servlet.http.{HttpServletRequest, HttpServletResponse} @@ -27,7 +28,7 @@ import org.apache.commons.codec.binary.Base64 import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} +import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.rest._ import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} @@ -149,7 +150,8 @@ private[spark] class KubernetesSparkRestServer( appArgs, sparkProperties, secret, - uploadedJars) => + uploadedJars, + uploadedFiles) => val decodedSecret = Base64.decodeBase64(secret) if (!expectedApplicationSecret.sameElements(decodedSecret)) { responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) @@ -157,29 +159,33 @@ private[spark] class KubernetesSparkRestServer( } else { val tempDir = Utils.createTempDir() val appResourcePath = resolvedAppResource(appResource, tempDir) - val jarsDirectory = new File(tempDir, "jars") - if (!jarsDirectory.mkdir) { - throw new IllegalStateException("Failed to create jars dir at" + - s"${jarsDirectory.getAbsolutePath}") - } - val writtenJars = writeBase64ContentsToFiles(uploadedJars, jarsDirectory) - val driverExtraClasspath = sparkProperties - .get("spark.driver.extraClassPath") - .map(_.split(",")) - .getOrElse(Array.empty[String]) + val writtenJars = writeUploadedJars(uploadedJars, tempDir) + val writtenFiles = writeUploadedFiles(uploadedFiles) + val resolvedSparkProperties = new mutable.HashMap[String, String] + resolvedSparkProperties ++= sparkProperties + + // Resolve driver classpath and jars val originalJars = sparkProperties.get("spark.jars") .map(_.split(",")) .getOrElse(Array.empty[String]) val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) + val driverExtraClasspath = sparkProperties + .get("spark.driver.extraClassPath") + .map(_.split(",")) + .getOrElse(Array.empty[String]) val driverClasspath = driverExtraClasspath ++ resolvedJars ++ - sparkJars ++ - Array(appResourcePath) - val resolvedSparkProperties = new mutable.HashMap[String, String] - resolvedSparkProperties ++= sparkProperties + sparkJars resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + // Resolve spark.files + val originalFiles = sparkProperties.get("spark.files") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val resolvedFiles = originalFiles ++ writtenFiles + resolvedSparkProperties("spark.files") = resolvedFiles.mkString(",") + val command = new ArrayBuffer[String] command += javaExecutable command += "-cp" @@ -229,6 +235,21 @@ private[spark] class KubernetesSparkRestServer( } } + private def writeUploadedJars(files: Option[TarGzippedData], rootTempDir: File): + Seq[String] = { + val resolvedDirectory = new File(rootTempDir, "jars") + if (!resolvedDirectory.mkdir()) { + throw new IllegalStateException(s"Failed to create jars dir at " + + resolvedDirectory.getAbsolutePath) + } + writeBase64ContentsToFiles(files, resolvedDirectory) + } + + private def writeUploadedFiles(files: Option[TarGzippedData]): Seq[String] = { + val workingDir = Paths.get("").toFile.getAbsoluteFile + writeBase64ContentsToFiles(files, workingDir) + } + def resolvedAppResource(appResource: AppResource, tempDir: File): String = { val appResourcePath = appResource match { case UploadedAppResource(resourceContentsBase64, resourceName) => diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala new file mode 100644 index 0000000000000..8b8d5e05f6479 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.jobs + +import java.nio.file.Paths + +import com.google.common.base.Charsets +import com.google.common.io.Files + +import org.apache.spark.SparkException +import org.apache.spark.sql.SparkSession + +private[spark] object FileExistenceTest { + + def main(args: Array[String]): Unit = { + if (args.length < 2) { + throw new IllegalArgumentException("Usage: WordCount ") + } + // Can't use SparkContext.textFile since the file is local to the driver + val file = Paths.get(args(0)).toFile + if (!file.exists()) { + throw new SparkException(s"Failed to find file at ${file.getAbsolutePath}") + } else { + // scalastyle:off println + val contents = Files.toString(file, Charsets.UTF_8) + if (args(1) != contents) { + throw new SparkException(s"Contents do not match. Expected: ${args(1)}," + + s" actual, $contents") + } else { + println(s"File found at ${file.getAbsolutePath} with correct contents.") + } + // scalastyle:on println + } + val spark = SparkSession.builder() + .appName("Test") + .getOrCreate() + spark.stop() + } + +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 16de71118dec4..40867c40d4474 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -21,7 +21,9 @@ import java.nio.file.Paths import java.util.UUID import java.util.concurrent.TimeUnit +import com.google.common.base.Charsets import com.google.common.collect.ImmutableList +import com.google.common.io.Files import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model.Pod import io.fabric8.kubernetes.client.{Config, KubernetesClient, KubernetesClientException, Watcher} @@ -62,10 +64,14 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .getOrElse(throw new IllegalStateException("Expected to find spark-examples jar; was the" + " pre-integration-test phase run?")) + private val TEST_EXISTENCE_FILE = Paths.get("test-data", "input.txt").toFile + private val TEST_EXISTENCE_FILE_CONTENTS = Files.toString(TEST_EXISTENCE_FILE, Charsets.UTF_8) private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) - private val MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + private val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.SparkPiWithInfiniteWait" + private val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.FileExistenceTest" private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") private var minikubeKubernetesClient: KubernetesClient = _ private var clientConfig: Config = _ @@ -179,7 +185,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { new Client( sparkConf = sparkConf, - mainClass = MAIN_CLASS, + mainClass = SPARK_PI_MAIN_CLASS, mainAppResource = mainAppResource, appArgs = Array.empty[String]).run() val sparkMetricsService = getSparkMetricsService("spark-pi") @@ -196,7 +202,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--executor-cores", "1", "--num-executors", "1", "--upload-jars", HELPER_JAR, - "--class", MAIN_CLASS, + "--class", SPARK_PI_MAIN_CLASS, "--conf", "spark.ui.enabled=true", "--conf", "spark.testing=false", "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", @@ -279,7 +285,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--executor-cores", "1", "--num-executors", "1", "--upload-jars", HELPER_JAR, - "--class", MAIN_CLASS, + "--class", SPARK_PI_MAIN_CLASS, "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", @@ -317,7 +323,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--executor-cores", "1", "--num-executors", "1", "--upload-jars", HELPER_JAR, - "--class", MAIN_CLASS, + "--class", SPARK_PI_MAIN_CLASS, "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", @@ -334,4 +340,73 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { EXAMPLES_JAR) SparkSubmit.main(args) } + + test("Added files should exist on the driver.") { + val args = Array( + "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", + "--deploy-mode", "cluster", + "--kubernetes-namespace", NAMESPACE, + "--name", "spark-file-existence-test", + "--executor-memory", "512m", + "--executor-cores", "1", + "--num-executors", "1", + "--upload-jars", HELPER_JAR, + "--upload-files", TEST_EXISTENCE_FILE.getAbsolutePath, + "--class", FILE_EXISTENCE_MAIN_CLASS, + "--conf", "spark.ui.enabled=false", + "--conf", "spark.testing=true", + "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", + "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", + "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", + "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", + "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + EXAMPLES_JAR, + TEST_EXISTENCE_FILE.getName, + TEST_EXISTENCE_FILE_CONTENTS) + val podCompletedFuture = SettableFuture.create[Boolean] + val watch = new Watcher[Pod] { + override def eventReceived(action: Action, pod: Pod): Unit = { + val containerStatuses = pod.getStatus.getContainerStatuses.asScala + val allSuccessful = containerStatuses.nonEmpty && containerStatuses + .forall(status => { + status.getState.getTerminated != null && status.getState.getTerminated.getExitCode == 0 + }) + if (allSuccessful) { + podCompletedFuture.set(true) + } else { + val failedContainers = containerStatuses.filter(container => { + container.getState.getTerminated != null && + container.getState.getTerminated.getExitCode != 0 + }) + if (failedContainers.nonEmpty) { + podCompletedFuture.setException(new SparkException( + "One or more containers in the driver failed with a nonzero exit code.")) + } + } + } + + override def onClose(e: KubernetesClientException): Unit = { + logWarning("Watch closed", e) + } + } + Utils.tryWithResource(minikubeKubernetesClient + .pods + .withLabel("spark-app-name", "spark-file-existence-test") + .watch(watch)) { _ => + SparkSubmit.main(args) + assert(podCompletedFuture.get, "Failed to run driver pod") + val driverPod = minikubeKubernetesClient + .pods + .withLabel("spark-app-name", "spark-file-existence-test") + .list() + .getItems + .get(0) + val podLog = minikubeKubernetesClient + .pods + .withName(driverPod.getMetadata.getName) + .getLog + assert(podLog.contains(s"File found at /opt/spark/${TEST_EXISTENCE_FILE.getName}" + + s" with correct contents."), "Job did not find the file as expected.") + } + } } diff --git a/resource-managers/kubernetes/integration-tests/test-data/input.txt b/resource-managers/kubernetes/integration-tests/test-data/input.txt new file mode 100644 index 0000000000000..dfe437bdebebc --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/test-data/input.txt @@ -0,0 +1 @@ +Contents From 0cf0d0225becca798af9dd70f085153a3c310d05 Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Thu, 2 Feb 2017 15:13:39 -0800 Subject: [PATCH 039/225] Fix NPE around unschedulable pod specs (#79) --- .../main/scala/org/apache/spark/deploy/kubernetes/Client.scala | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index c350c4817664d..bef5a605f173b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -570,7 +570,8 @@ private[spark] class Client( .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) .head.getNodePort val nodeUrls = kubernetesClient.nodes.list.getItems.asScala - .filterNot(_.getSpec.getUnschedulable) + .filterNot(node => node.getSpec.getUnschedulable != null && + node.getSpec.getUnschedulable) .flatMap(_.getStatus.getAddresses.asScala.map(address => { s"$urlScheme://${address.getAddress}:$servicePort" })).toArray From efd803df585f71856c1420f8e84b746d199fb71d Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Thu, 2 Feb 2017 17:34:15 -0800 Subject: [PATCH 040/225] Introduce blocking submit to kubernetes by default (#53) * Introduce blocking submit to kubernetes by default Two new configuration settings: - spark.kubernetes.submit.waitAppCompletion - spark.kubernetes.report.interval * Minor touchups * More succinct logging for pod state * Fix import order * Switch to watch-based logging * Spaces in comma-joined volumes, labels, and containers * Use CountDownLatch instead of SettableFuture * Match parallel ConfigBuilder style * Disable logging in fire-and-forget mode Which is enabled with spark.kubernetes.submit.waitAppCompletion=false (default: true) * Additional log line for when application is launched * Minor wording changes * More logging * Drop log to DEBUG --- .../spark/deploy/kubernetes/Client.scala | 154 +++++++++++------- .../kubernetes/LoggingPodStatusWatcher.scala | 114 +++++++++++++ .../spark/deploy/kubernetes/config.scala | 19 +++ 3 files changed, 225 insertions(+), 62 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index bef5a605f173b..433c45d51fd6b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.kubernetes import java.io.{File, FileInputStream} import java.security.{KeyStore, SecureRandom} -import java.util.concurrent.{TimeoutException, TimeUnit} +import java.util.concurrent.{CountDownLatch, TimeoutException, TimeUnit} import java.util.concurrent.atomic.AtomicBoolean import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} @@ -26,7 +26,7 @@ import com.google.common.base.Charsets import com.google.common.io.Files import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.{ConfigBuilder => K8SConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ @@ -67,6 +67,8 @@ private[spark] class Client( private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) + private val waitForAppCompletion: Boolean = sparkConf.get(WAIT_FOR_APP_COMPLETION) + private val secretBase64String = { val secretBytes = new Array[Byte](128) SECURE_RANDOM.nextBytes(secretBytes) @@ -81,9 +83,11 @@ private[spark] class Client( ThreadUtils.newDaemonSingleThreadExecutor("kubernetes-client-retryable-futures")) def run(): Unit = { + logInfo(s"Starting application $kubernetesAppId in Kubernetes...") val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() + val parsedCustomLabels = parseCustomLabels(customLabels) - var k8ConfBuilder = new ConfigBuilder() + var k8ConfBuilder = new K8SConfigBuilder() .withApiVersion("v1") .withMasterUrl(master) .withNamespace(namespace) @@ -116,73 +120,97 @@ private[spark] class Client( SPARK_APP_NAME_LABEL -> appName) ++ parsedCustomLabels).asJava val containerPorts = buildContainerPorts() - val submitCompletedFuture = SettableFuture.create[Boolean] - val submitPending = new AtomicBoolean(false) - val podWatcher = new DriverPodWatcher( - submitCompletedFuture, - submitPending, - kubernetesClient, - driverSubmitSslOptions, - Array(submitServerSecret) ++ sslSecrets, - driverKubernetesSelectors) + + // start outer watch for status logging of driver pod + val driverPodCompletedLatch = new CountDownLatch(1) + // only enable interval logging if in waitForAppCompletion mode + val loggingInterval = if (waitForAppCompletion) sparkConf.get(REPORT_INTERVAL) else 0 + val loggingWatch = new LoggingPodStatusWatcher(driverPodCompletedLatch, kubernetesAppId, + loggingInterval) Utils.tryWithResource(kubernetesClient .pods() .withLabels(driverKubernetesSelectors) - .watch(podWatcher)) { _ => - kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(kubernetesAppId) + .watch(loggingWatch)) { _ => + + // launch driver pod with inner watch to upload jars when it's ready + val submitCompletedFuture = SettableFuture.create[Boolean] + val submitPending = new AtomicBoolean(false) + val podWatcher = new DriverPodWatcher( + submitCompletedFuture, + submitPending, + kubernetesClient, + driverSubmitSslOptions, + Array(submitServerSecret) ++ sslSecrets, + driverKubernetesSelectors) + Utils.tryWithResource(kubernetesClient + .pods() .withLabels(driverKubernetesSelectors) - .endMetadata() - .withNewSpec() - .withRestartPolicy("OnFailure") - .addNewVolume() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withNewSecret() - .withSecretName(submitServerSecret.getMetadata.getName) - .endSecret() - .endVolume - .addToVolumes(sslVolumes: _*) - .withServiceAccount(serviceAccount) - .addNewContainer() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() + .watch(podWatcher)) { _ => + kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(driverKubernetesSelectors) + .endMetadata() + .withNewSpec() + .withRestartPolicy("OnFailure") + .addNewVolume() .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withMountPath(secretDirectory) - .withReadOnly(true) - .endVolumeMount() - .addToVolumeMounts(sslVolumeMounts: _*) - .addNewEnv() - .withName(ENV_SUBMISSION_SECRET_LOCATION) - .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") - .endEnv() - .addNewEnv() - .withName(ENV_SUBMISSION_SERVER_PORT) - .withValue(SUBMISSION_SERVER_PORT.toString) - .endEnv() - .addToEnv(sslEnvs: _*) - .withPorts(containerPorts.asJava) - .endContainer() - .endSpec() - .done() - var submitSucceeded = false - try { - submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - submitSucceeded = true - } catch { - case e: TimeoutException => - val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) - logError(finalErrorMessage, e) - throw new SparkException(finalErrorMessage, e) - } finally { - if (!submitSucceeded) { - Utils.tryLogNonFatalError { - kubernetesClient.pods.withName(kubernetesAppId).delete() + .withNewSecret() + .withSecretName(submitServerSecret.getMetadata.getName) + .endSecret() + .endVolume + .addToVolumes(sslVolumes: _*) + .withServiceAccount(serviceAccount) + .addNewContainer() + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) + .withMountPath(secretDirectory) + .withReadOnly(true) + .endVolumeMount() + .addToVolumeMounts(sslVolumeMounts: _*) + .addNewEnv() + .withName(ENV_SUBMISSION_SECRET_LOCATION) + .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") + .endEnv() + .addNewEnv() + .withName(ENV_SUBMISSION_SERVER_PORT) + .withValue(SUBMISSION_SERVER_PORT.toString) + .endEnv() + .addToEnv(sslEnvs: _*) + .withPorts(containerPorts.asJava) + .endContainer() + .endSpec() + .done() + var submitSucceeded = false + try { + submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) + submitSucceeded = true + logInfo(s"Finished launching local resources to application $kubernetesAppId") + } catch { + case e: TimeoutException => + val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) + logError(finalErrorMessage, e) + throw new SparkException(finalErrorMessage, e) + } finally { + if (!submitSucceeded) { + Utils.tryLogNonFatalError { + kubernetesClient.pods.withName(kubernetesAppId).delete() + } } } } + + // wait if configured to do so + if (waitForAppCompletion) { + logInfo(s"Waiting for application $kubernetesAppId to finish...") + driverPodCompletedLatch.await() + logInfo(s"Application $kubernetesAppId finished.") + } else { + logInfo(s"Application $kubernetesAppId successfully launched.") + } } } finally { Utils.tryLogNonFatalError { @@ -377,6 +405,8 @@ private[spark] class Client( Future { sparkConf.set("spark.driver.host", pod.getStatus.getPodIP) val submitRequest = buildSubmissionRequest() + logInfo(s"Submitting local resources to driver pod for application " + + s"$kubernetesAppId ...") driverSubmitter.submitApplication(submitRequest) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala new file mode 100644 index 0000000000000..cbacaf6bda854 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.util.concurrent.{CountDownLatch, Executors, TimeUnit} + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action + +import org.apache.spark.internal.Logging + +/** + * A monitor for the running Kubernetes pod of a Spark application. Status logging occurs on + * every state change and also at an interval for liveness. + * + * @param podCompletedFuture a CountDownLatch that is set to true when the watched pod finishes + * @param appId + * @param interval ms between each state request. If set to 0 or a negative number, the periodic + * logging will be disabled. + */ +private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownLatch, + appId: String, + interval: Long) + extends Watcher[Pod] with Logging { + + // start timer for periodic logging + private val scheduler = Executors.newScheduledThreadPool(1) + private val logRunnable: Runnable = new Runnable { + override def run() = logShortStatus() + } + if (interval > 0) { + scheduler.scheduleWithFixedDelay(logRunnable, 0, interval, TimeUnit.MILLISECONDS) + } + + private var pod: Option[Pod] = Option.empty + private var prevPhase: String = null + private def phase: String = pod.map(_.getStatus().getPhase()).getOrElse("unknown") + + override def eventReceived(action: Action, pod: Pod): Unit = { + this.pod = Option(pod) + + logShortStatus() + if (prevPhase != phase) { + logLongStatus() + } + prevPhase = phase + + if (phase == "Succeeded" || phase == "Failed") { + podCompletedFuture.countDown() + } + } + + override def onClose(e: KubernetesClientException): Unit = { + scheduler.shutdown() + logDebug(s"Stopped watching application $appId with last-observed phase $phase") + } + + private def logShortStatus() = { + logInfo(s"Application status for $appId (phase: $phase)") + } + + private def logLongStatus() = { + logInfo("Phase changed, new state: " + pod.map(formatPodState(_)).getOrElse("unknown")) + } + + private def formatPodState(pod: Pod): String = { + + val details = Seq[(String, String)]( + // pod metadata + ("pod name", pod.getMetadata.getName()), + ("namespace", pod.getMetadata.getNamespace()), + ("labels", pod.getMetadata.getLabels().asScala.mkString(", ")), + ("pod uid", pod.getMetadata.getUid), + ("creation time", pod.getMetadata.getCreationTimestamp()), + + // spec details + ("service account name", pod.getSpec.getServiceAccountName()), + ("volumes", pod.getSpec.getVolumes().asScala.map(_.getName).mkString(", ")), + ("node name", pod.getSpec.getNodeName()), + + // status + ("start time", pod.getStatus.getStartTime), + ("container images", + pod.getStatus.getContainerStatuses() + .asScala + .map(_.getImage) + .mkString(", ")), + ("phase", pod.getStatus.getPhase()) + ) + + // Use more loggable format if value is null or empty + details.map { case (k, v) => + val newValue = Option(v).filter(_.nonEmpty).getOrElse("N/A") + s"\n\t $k: $newValue" + }.mkString("") + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 3e0c400febca1..cb4cd42142ca4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -188,4 +188,23 @@ package object config { .internal() .stringConf .createOptional + + private[spark] val WAIT_FOR_APP_COMPLETION = + ConfigBuilder("spark.kubernetes.submit.waitAppCompletion") + .doc( + """ + | In cluster mode, whether to wait for the application to finish before exiting the + | launcher process. + """.stripMargin) + .booleanConf + .createWithDefault(true) + + private[spark] val REPORT_INTERVAL = + ConfigBuilder("spark.kubernetes.report.interval") + .doc( + """ + | Interval between reports of the current app status in cluster mode. + """.stripMargin) + .timeConf(TimeUnit.MILLISECONDS) + .createWithDefaultString("1s") } From 381b69a9f761501816e547e09e2a3052262ff425 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Fri, 3 Feb 2017 19:40:32 +0000 Subject: [PATCH 041/225] Do not wait for pod finishing in integration tests. (#84) Since the example job are patched to never finish. --- .../deploy/kubernetes/integrationtest/KubernetesSuite.scala | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 40867c40d4474..c5458eccf830d 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -181,6 +181,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set("spark.app.name", "spark-pi") .set("spark.ui.enabled", "true") .set("spark.testing", "false") + .set("spark.kubernetes.submit.waitAppCompletion", "false") val mainAppResource = s"file://$EXAMPLES_JAR" new Client( @@ -210,6 +211,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", EXAMPLES_JAR) SparkSubmit.main(args) val sparkMetricsService = getSparkMetricsService("spark-pi") @@ -231,6 +233,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", s"container:///opt/spark/examples/jars/$EXAMPLES_JAR_FILE_NAME") val allContainersSucceeded = SettableFuture.create[Boolean] val watcher = new Watcher[Pod] { @@ -292,6 +295,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", "--conf", "spark.kubernetes.driver.labels=label1=label1value,label2=label2value", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", EXAMPLES_JAR) SparkSubmit.main(args) val driverPodLabels = minikubeKubernetesClient @@ -337,6 +341,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", "spark.ssl.kubernetes.submit.trustStore=" + s"file://${trustStoreFile.getAbsolutePath}", "--conf", s"spark.ssl.kubernetes.driverlaunch.trustStorePassword=changeit", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", EXAMPLES_JAR) SparkSubmit.main(args) } @@ -360,6 +365,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", + "--conf", "spark.kubernetes.submit.waitAppCompletion=false", EXAMPLES_JAR, TEST_EXISTENCE_FILE.getName, TEST_EXISTENCE_FILE_CONTENTS) From 15a8292325b011bab3b9d11ed469e8ce0755efd2 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Wed, 8 Feb 2017 20:47:41 +0000 Subject: [PATCH 042/225] Check for user jars/files existence before creating the driver pod. (#86) * Check for user jars/files existence before creating the driver pod. Close apache-spark-on-k8s/spark#85 * CR --- .../spark/deploy/kubernetes/Client.scala | 20 ++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 433c45d51fd6b..b9b275c190fee 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -84,8 +84,10 @@ private[spark] class Client( def run(): Unit = { logInfo(s"Starting application $kubernetesAppId in Kubernetes...") - val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() + Seq(uploadedFiles, uploadedJars, Some(mainAppResource)).foreach(checkForFilesExistence) + + val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new K8SConfigBuilder() .withApiVersion("v1") @@ -661,6 +663,22 @@ private[spark] class Client( }).toMap }).getOrElse(Map.empty[String, String]) } + + private def checkForFilesExistence(maybePaths: Option[String]): Unit = { + maybePaths.foreach { paths => + paths.split(",").foreach { path => + val uri = Utils.resolveURI(path) + uri.getScheme match { + case "file" | null => + val file = new File(uri.getPath) + if (!file.isFile) { + throw new SparkException(s"""file "${uri}" does not exist!""") + } + case _ => + } + } + } + } } private[spark] object Client extends Logging { From a62c20fced7e436d9f7d58f298b99f278d0ee658 Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 8 Feb 2017 17:59:50 -0800 Subject: [PATCH 043/225] Use readiness probe instead of client-side ping. (#75) * Use readiness probe instead of client-side ping. Keep one ping() just as a sanity check, but otherwise set up the readiness probe to report the container as ready only when the ping endpoint can be reached. Also add a liveliness probe for convenience and symmetry. * Extract common HTTP get action * Remove some code * Add delay to liveliness check * Fix merge conflicts. * Fix more merge conflicts * Fix more merge conflicts * Revamp readiness check logic * Add addresses ready condition to endpoints watch * Rearrange the logic some more. * Remove liveness probe, retry against servers * Fix compiler error * Fix another compiler error * Delay between retries. Remove unintended test modification * FIx another compiler error * Extract method * Address comments * Deduplicate node addresses, use lower initial connect timeout * Drop maxRetriesPerServer from 10 to 3 --- .../spark/deploy/kubernetes/Client.scala | 643 +++++++++++------- .../spark/deploy/kubernetes/Retry.scala | 54 -- .../rest/kubernetes/HttpClientUtil.scala | 7 +- .../kubernetes/MultiServerFeignTarget.scala | 34 +- .../integrationtest/minikube/Minikube.scala | 2 +- 5 files changed, 426 insertions(+), 314 deletions(-) delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index b9b275c190fee..9eed9bfd2cd79 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -18,8 +18,8 @@ package org.apache.spark.deploy.kubernetes import java.io.{File, FileInputStream} import java.security.{KeyStore, SecureRandom} -import java.util.concurrent.{CountDownLatch, TimeoutException, TimeUnit} -import java.util.concurrent.atomic.AtomicBoolean +import java.util +import java.util.concurrent.{CountDownLatch, TimeUnit} import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} import com.google.common.base.Charsets @@ -31,8 +31,6 @@ import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ import scala.collection.mutable -import scala.concurrent.{ExecutionContext, Future} -import scala.concurrent.duration.DurationInt import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ @@ -40,7 +38,7 @@ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging -import org.apache.spark.util.{ThreadUtils, Utils} +import org.apache.spark.util.Utils private[spark] class Client( sparkConf: SparkConf, @@ -78,10 +76,6 @@ private[spark] class Client( private val serviceAccount = sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME) private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) - private implicit val retryableExecutionContext = ExecutionContext - .fromExecutorService( - ThreadUtils.newDaemonSingleThreadExecutor("kubernetes-client-retryable-futures")) - def run(): Unit = { logInfo(s"Starting application $kubernetesAppId in Kubernetes...") @@ -112,119 +106,398 @@ private[spark] class Client( .withData(Map((SUBMISSION_APP_SECRET_NAME, secretBase64String)).asJava) .withType("Opaque") .done() - val (sslEnvs, sslVolumes, sslVolumeMounts, sslSecrets) = configureSsl(kubernetesClient, - driverSubmitSslOptions, - isKeyStoreLocalFile) try { - val driverKubernetesSelectors = (Map( - SPARK_DRIVER_LABEL -> kubernetesAppId, - SPARK_APP_ID_LABEL -> kubernetesAppId, - SPARK_APP_NAME_LABEL -> appName) - ++ parsedCustomLabels).asJava - val containerPorts = buildContainerPorts() - - // start outer watch for status logging of driver pod - val driverPodCompletedLatch = new CountDownLatch(1) - // only enable interval logging if in waitForAppCompletion mode - val loggingInterval = if (waitForAppCompletion) sparkConf.get(REPORT_INTERVAL) else 0 - val loggingWatch = new LoggingPodStatusWatcher(driverPodCompletedLatch, kubernetesAppId, - loggingInterval) - Utils.tryWithResource(kubernetesClient - .pods() - .withLabels(driverKubernetesSelectors) - .watch(loggingWatch)) { _ => - - // launch driver pod with inner watch to upload jars when it's ready - val submitCompletedFuture = SettableFuture.create[Boolean] - val submitPending = new AtomicBoolean(false) - val podWatcher = new DriverPodWatcher( - submitCompletedFuture, - submitPending, - kubernetesClient, - driverSubmitSslOptions, - Array(submitServerSecret) ++ sslSecrets, - driverKubernetesSelectors) + val (sslEnvs, sslVolumes, sslVolumeMounts, sslSecrets) = configureSsl(kubernetesClient, + driverSubmitSslOptions, + isKeyStoreLocalFile) + try { + // start outer watch for status logging of driver pod + val driverPodCompletedLatch = new CountDownLatch(1) + // only enable interval logging if in waitForAppCompletion mode + val loggingInterval = if (waitForAppCompletion) sparkConf.get(REPORT_INTERVAL) else 0 + val loggingWatch = new LoggingPodStatusWatcher(driverPodCompletedLatch, kubernetesAppId, + loggingInterval) Utils.tryWithResource(kubernetesClient .pods() - .withLabels(driverKubernetesSelectors) - .watch(podWatcher)) { _ => - kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(kubernetesAppId) - .withLabels(driverKubernetesSelectors) - .endMetadata() - .withNewSpec() - .withRestartPolicy("OnFailure") - .addNewVolume() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withNewSecret() - .withSecretName(submitServerSecret.getMetadata.getName) - .endSecret() - .endVolume - .addToVolumes(sslVolumes: _*) - .withServiceAccount(serviceAccount) - .addNewContainer() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withMountPath(secretDirectory) - .withReadOnly(true) - .endVolumeMount() - .addToVolumeMounts(sslVolumeMounts: _*) - .addNewEnv() - .withName(ENV_SUBMISSION_SECRET_LOCATION) - .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") - .endEnv() - .addNewEnv() - .withName(ENV_SUBMISSION_SERVER_PORT) - .withValue(SUBMISSION_SERVER_PORT.toString) - .endEnv() - .addToEnv(sslEnvs: _*) - .withPorts(containerPorts.asJava) - .endContainer() - .endSpec() - .done() - var submitSucceeded = false - try { - submitCompletedFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - submitSucceeded = true - logInfo(s"Finished launching local resources to application $kubernetesAppId") + .withName(kubernetesAppId) + .watch(loggingWatch)) { _ => + val (driverPod, driverService) = launchDriverKubernetesComponents( + kubernetesClient, + parsedCustomLabels, + submitServerSecret, + driverSubmitSslOptions, + sslSecrets, + sslVolumes, + sslVolumeMounts, + sslEnvs, + isKeyStoreLocalFile) + val ownerReferenceConfiguredDriverService = try { + configureOwnerReferences( + kubernetesClient, + submitServerSecret, + sslSecrets, + driverPod, + driverService) } catch { - case e: TimeoutException => - val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) - logError(finalErrorMessage, e) - throw new SparkException(finalErrorMessage, e) - } finally { - if (!submitSucceeded) { - Utils.tryLogNonFatalError { - kubernetesClient.pods.withName(kubernetesAppId).delete() - } + case e: Throwable => + cleanupPodAndService(kubernetesClient, driverPod, driverService) + throw new SparkException("Failed to set owner references to the driver pod.", e) + } + try { + submitApplicationToDriverServer(kubernetesClient, driverSubmitSslOptions, + ownerReferenceConfiguredDriverService) + // wait if configured to do so + if (waitForAppCompletion) { + logInfo(s"Waiting for application $kubernetesAppId to finish...") + driverPodCompletedLatch.await() + logInfo(s"Application $kubernetesAppId finished.") + } else { + logInfo(s"Application $kubernetesAppId successfully launched.") } + } catch { + case e: Throwable => + cleanupPodAndService(kubernetesClient, driverPod, + ownerReferenceConfiguredDriverService) + throw new SparkException("Failed to submit the application to the driver pod.", e) } } - - // wait if configured to do so - if (waitForAppCompletion) { - logInfo(s"Waiting for application $kubernetesAppId to finish...") - driverPodCompletedLatch.await() - logInfo(s"Application $kubernetesAppId finished.") - } else { - logInfo(s"Application $kubernetesAppId successfully launched.") + } finally { + Utils.tryLogNonFatalError { + // Secrets may have been mutated so delete by name to avoid problems with not having + // the latest version. + sslSecrets.foreach { secret => + kubernetesClient.secrets().withName(secret.getMetadata.getName).delete() + } } } } finally { Utils.tryLogNonFatalError { - kubernetesClient.secrets().delete(submitServerSecret) + kubernetesClient.secrets().withName(submitServerSecret.getMetadata.getName).delete() } - Utils.tryLogNonFatalError { - kubernetesClient.secrets().delete(sslSecrets: _*) + } + } + } + + private def cleanupPodAndService( + kubernetesClient: KubernetesClient, + driverPod: Pod, + driverService: Service): Unit = { + Utils.tryLogNonFatalError { + kubernetesClient.services().delete(driverService) + } + Utils.tryLogNonFatalError { + kubernetesClient.pods().delete(driverPod) + } + } + + private def submitApplicationToDriverServer( + kubernetesClient: KubernetesClient, + driverSubmitSslOptions: SSLOptions, + driverService: Service) = { + sparkConf.getOption("spark.app.id").foreach { id => + logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + + s" overridden as $kubernetesAppId") + } + sparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, driverService.getMetadata.getName) + sparkConf.set("spark.app.id", kubernetesAppId) + sparkConf.setIfMissing("spark.app.name", appName) + sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) + sparkConf.setIfMissing("spark.blockmanager.port", + DEFAULT_BLOCKMANAGER_PORT.toString) + val driverSubmitter = buildDriverSubmissionClient(kubernetesClient, driverService, + driverSubmitSslOptions) + // Sanity check to see if the driver submitter is even reachable. + driverSubmitter.ping() + logInfo(s"Submitting local resources to driver pod for application " + + s"$kubernetesAppId ...") + val submitRequest = buildSubmissionRequest() + driverSubmitter.submitApplication(submitRequest) + logInfo("Successfully submitted local resources and driver configuration to" + + " driver pod.") + // After submitting, adjust the service to only expose the Spark UI + val uiServicePort = new ServicePortBuilder() + .withName(UI_PORT_NAME) + .withPort(uiPort) + .withNewTargetPort(uiPort) + .build() + kubernetesClient.services().withName(kubernetesAppId).edit().editSpec() + .withType("ClusterIP") + .withPorts(uiServicePort) + .endSpec() + .done() + logInfo("Finished submitting application to Kubernetes.") + } + + private def launchDriverKubernetesComponents( + kubernetesClient: KubernetesClient, + parsedCustomLabels: Map[String, String], + submitServerSecret: Secret, + driverSubmitSslOptions: SSLOptions, + sslSecrets: Array[Secret], + sslVolumes: Array[Volume], + sslVolumeMounts: Array[VolumeMount], + sslEnvs: Array[EnvVar], + isKeyStoreLocalFile: Boolean): (Pod, Service) = { + val endpointsReadyFuture = SettableFuture.create[Endpoints] + val endpointsReadyWatcher = new DriverEndpointsReadyWatcher(endpointsReadyFuture) + val serviceReadyFuture = SettableFuture.create[Service] + val driverKubernetesSelectors = (Map( + SPARK_DRIVER_LABEL -> kubernetesAppId, + SPARK_APP_ID_LABEL -> kubernetesAppId, + SPARK_APP_NAME_LABEL -> appName) + ++ parsedCustomLabels).asJava + val serviceReadyWatcher = new DriverServiceReadyWatcher(serviceReadyFuture) + val podReadyFuture = SettableFuture.create[Pod] + val podWatcher = new DriverPodReadyWatcher(podReadyFuture) + Utils.tryWithResource(kubernetesClient + .pods() + .withName(kubernetesAppId) + .watch(podWatcher)) { _ => + Utils.tryWithResource(kubernetesClient + .services() + .withName(kubernetesAppId) + .watch(serviceReadyWatcher)) { _ => + Utils.tryWithResource(kubernetesClient + .endpoints() + .withName(kubernetesAppId) + .watch(endpointsReadyWatcher)) { _ => + val driverService = createDriverService( + kubernetesClient, + driverKubernetesSelectors, + submitServerSecret) + val driverPod = try { + createDriverPod( + kubernetesClient, + driverKubernetesSelectors, + submitServerSecret, + driverSubmitSslOptions, + sslVolumes, + sslVolumeMounts, + sslEnvs) + } catch { + case e: Throwable => + Utils.tryLogNonFatalError { + kubernetesClient.services().delete(driverService) + } + throw new SparkException("Failed to create the driver pod.", e) + } + try { + waitForReadyKubernetesComponents(kubernetesClient, endpointsReadyFuture, + serviceReadyFuture, podReadyFuture) + (driverPod, driverService) + } catch { + case e: Throwable => + Utils.tryLogNonFatalError { + kubernetesClient.services().delete(driverService) + } + Utils.tryLogNonFatalError { + kubernetesClient.pods().delete(driverPod) + } + throw new SparkException("Timed out while waiting for a Kubernetes component to be" + + " ready.", e) + } } } } } + /** + * Sets the owner reference for all the kubernetes components to link to the driver pod. + * + * @return The driver service after it has been adjusted to reflect the new owner + * reference. + */ + private def configureOwnerReferences( + kubernetesClient: KubernetesClient, + submitServerSecret: Secret, + sslSecrets: Array[Secret], + driverPod: Pod, + driverService: Service): Service = { + val driverPodOwnerRef = new OwnerReferenceBuilder() + .withName(driverPod.getMetadata.getName) + .withUid(driverPod.getMetadata.getUid) + .withApiVersion(driverPod.getApiVersion) + .withKind(driverPod.getKind) + .withController(true) + .build() + sslSecrets.foreach(secret => { + kubernetesClient.secrets().withName(secret.getMetadata.getName).edit() + .editMetadata() + .addToOwnerReferences(driverPodOwnerRef) + .endMetadata() + .done() + }) + kubernetesClient.secrets().withName(submitServerSecret.getMetadata.getName).edit() + .editMetadata() + .addToOwnerReferences(driverPodOwnerRef) + .endMetadata() + .done() + kubernetesClient.services().withName(driverService.getMetadata.getName).edit() + .editMetadata() + .addToOwnerReferences(driverPodOwnerRef) + .endMetadata() + .done() + } + + private def waitForReadyKubernetesComponents( + kubernetesClient: KubernetesClient, + endpointsReadyFuture: SettableFuture[Endpoints], + serviceReadyFuture: SettableFuture[Service], + podReadyFuture: SettableFuture[Pod]) = { + try { + podReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) + logInfo("Driver pod successfully created in Kubernetes cluster.") + } catch { + case e: Throwable => + val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) + logError(finalErrorMessage, e) + throw new SparkException(finalErrorMessage, e) + } + try { + serviceReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) + logInfo("Driver service created successfully in Kubernetes.") + } catch { + case e: Throwable => + throw new SparkException(s"The driver service was not ready" + + s" in $driverSubmitTimeoutSecs seconds.", e) + } + try { + endpointsReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) + logInfo("Driver endpoints ready to receive application submission") + } catch { + case e: Throwable => + throw new SparkException(s"The driver service endpoint was not ready" + + s" in $driverSubmitTimeoutSecs seconds.", e) + } + } + + private def createDriverService( + kubernetesClient: KubernetesClient, + driverKubernetesSelectors: java.util.Map[String, String], + submitServerSecret: Secret): Service = { + val driverSubmissionServicePort = new ServicePortBuilder() + .withName(SUBMISSION_SERVER_PORT_NAME) + .withPort(SUBMISSION_SERVER_PORT) + .withNewTargetPort(SUBMISSION_SERVER_PORT) + .build() + kubernetesClient.services().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(driverKubernetesSelectors) + .endMetadata() + .withNewSpec() + .withType("NodePort") + .withSelector(driverKubernetesSelectors) + .withPorts(driverSubmissionServicePort) + .endSpec() + .done() + } + + private def createDriverPod( + kubernetesClient: KubernetesClient, + driverKubernetesSelectors: util.Map[String, String], + submitServerSecret: Secret, + driverSubmitSslOptions: SSLOptions, + sslVolumes: Array[Volume], + sslVolumeMounts: Array[VolumeMount], + sslEnvs: Array[EnvVar]) = { + val containerPorts = buildContainerPorts() + val probePingHttpGet = new HTTPGetActionBuilder() + .withScheme(if (driverSubmitSslOptions.enabled) "HTTPS" else "HTTP") + .withPath("/v1/submissions/ping") + .withNewPort(SUBMISSION_SERVER_PORT_NAME) + .build() + kubernetesClient.pods().createNew() + .withNewMetadata() + .withName(kubernetesAppId) + .withLabels(driverKubernetesSelectors) + .endMetadata() + .withNewSpec() + .withRestartPolicy("OnFailure") + .addNewVolume() + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) + .withNewSecret() + .withSecretName(submitServerSecret.getMetadata.getName) + .endSecret() + .endVolume() + .addToVolumes(sslVolumes: _*) + .withServiceAccount(serviceAccount) + .addNewContainer() + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) + .withMountPath(secretDirectory) + .withReadOnly(true) + .endVolumeMount() + .addToVolumeMounts(sslVolumeMounts: _*) + .addNewEnv() + .withName(ENV_SUBMISSION_SECRET_LOCATION) + .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") + .endEnv() + .addNewEnv() + .withName(ENV_SUBMISSION_SERVER_PORT) + .withValue(SUBMISSION_SERVER_PORT.toString) + .endEnv() + .addToEnv(sslEnvs: _*) + .withPorts(containerPorts.asJava) + .withNewReadinessProbe().withHttpGet(probePingHttpGet).endReadinessProbe() + .endContainer() + .endSpec() + .done() + } + + private class DriverPodReadyWatcher(resolvedDriverPod: SettableFuture[Pod]) extends Watcher[Pod] { + override def eventReceived(action: Action, pod: Pod): Unit = { + if ((action == Action.ADDED || action == Action.MODIFIED) + && pod.getStatus.getPhase == "Running" + && !resolvedDriverPod.isDone) { + pod.getStatus + .getContainerStatuses + .asScala + .find(status => + status.getName == DRIVER_CONTAINER_NAME && status.getReady) + .foreach { _ => resolvedDriverPod.set(pod) } + } + } + + override def onClose(cause: KubernetesClientException): Unit = { + logDebug("Driver pod readiness watch closed.", cause) + } + } + + private class DriverEndpointsReadyWatcher(resolvedDriverEndpoints: SettableFuture[Endpoints]) + extends Watcher[Endpoints] { + override def eventReceived(action: Action, endpoints: Endpoints): Unit = { + if ((action == Action.ADDED) || (action == Action.MODIFIED) + && endpoints.getSubsets.asScala.nonEmpty + && endpoints.getSubsets.asScala.exists(_.getAddresses.asScala.nonEmpty) + && !resolvedDriverEndpoints.isDone) { + resolvedDriverEndpoints.set(endpoints) + } + } + + override def onClose(cause: KubernetesClientException): Unit = { + logDebug("Driver endpoints readiness watch closed.", cause) + } + } + + private class DriverServiceReadyWatcher(resolvedDriverService: SettableFuture[Service]) + extends Watcher[Service] { + override def eventReceived(action: Action, service: Service): Unit = { + if ((action == Action.ADDED) || (action == Action.MODIFIED) + && !resolvedDriverService.isDone) { + resolvedDriverService.set(service) + } + } + + override def onClose(cause: KubernetesClientException): Unit = { + logDebug("Driver service readiness watch closed.", cause) + } + } + private def parseDriverSubmitSslOptions(): (SSLOptions, Boolean) = { val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_KEYSTORE) val resolvedSparkConf = sparkConf.clone() @@ -306,18 +579,10 @@ private[spark] class Client( .withName(ENV_SUBMISSION_USE_SSL) .withValue("true") .build() - val sslSecrets = kubernetesClient.secrets().createNew() - .withNewMetadata() - .withName(sslSecretsName) - .endMetadata() - .withData(sslSecretsMap.asJava) - .withType("Opaque") - .done() - secrets += sslSecrets val sslVolume = new VolumeBuilder() .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) .withNewSecret() - .withSecretName(sslSecrets.getMetadata.getName) + .withSecretName(sslSecretsName) .endSecret() .build() val sslVolumeMount = new VolumeMountBuilder() @@ -325,147 +590,23 @@ private[spark] class Client( .withReadOnly(true) .withMountPath(sslSecretsDirectory) .build() + val sslSecrets = kubernetesClient.secrets().createNew() + .withNewMetadata() + .withName(sslSecretsName) + .endMetadata() + .withData(sslSecretsMap.asJava) + .withType("Opaque") + .done() + secrets += sslSecrets (sslEnvs.toArray, Array(sslVolume), Array(sslVolumeMount), secrets.toArray) } else { (Array[EnvVar](), Array[Volume](), Array[VolumeMount](), Array[Secret]()) } } - private class DriverPodWatcher( - submitCompletedFuture: SettableFuture[Boolean], - submitPending: AtomicBoolean, - kubernetesClient: KubernetesClient, - driverSubmitSslOptions: SSLOptions, - applicationSecrets: Array[Secret], - driverKubernetesSelectors: java.util.Map[String, String]) extends Watcher[Pod] { - override def eventReceived(action: Action, pod: Pod): Unit = { - if ((action == Action.ADDED || action == Action.MODIFIED) - && pod.getStatus.getPhase == "Running" - && !submitCompletedFuture.isDone) { - if (!submitPending.getAndSet(true)) { - pod.getStatus - .getContainerStatuses - .asScala - .find(status => - status.getName == DRIVER_CONTAINER_NAME && status.getReady) match { - case Some(_) => - val ownerRefs = Seq(new OwnerReferenceBuilder() - .withName(pod.getMetadata.getName) - .withUid(pod.getMetadata.getUid) - .withApiVersion(pod.getApiVersion) - .withKind(pod.getKind) - .withController(true) - .build()) - - applicationSecrets.foreach(secret => { - secret.getMetadata.setOwnerReferences(ownerRefs.asJava) - kubernetesClient.secrets().createOrReplace(secret) - }) - - val driverSubmissionServicePort = new ServicePortBuilder() - .withName(SUBMISSION_SERVER_PORT_NAME) - .withPort(SUBMISSION_SERVER_PORT) - .withNewTargetPort(SUBMISSION_SERVER_PORT) - .build() - val service = kubernetesClient.services().createNew() - .withNewMetadata() - .withName(kubernetesAppId) - .withLabels(driverKubernetesSelectors) - .withOwnerReferences(ownerRefs.asJava) - .endMetadata() - .withNewSpec() - .withType("NodePort") - .withSelector(driverKubernetesSelectors) - .withPorts(driverSubmissionServicePort) - .endSpec() - .done() - try { - sparkConf.getOption("spark.app.id").foreach { id => - logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + - s" overridden as $kubernetesAppId") - } - sparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) - sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, service.getMetadata.getName) - sparkConf.set("spark.app.id", kubernetesAppId) - sparkConf.setIfMissing("spark.app.name", appName) - sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.blockmanager.port", - DEFAULT_BLOCKMANAGER_PORT.toString) - val driverSubmitter = buildDriverSubmissionClient(kubernetesClient, service, - driverSubmitSslOptions) - val ping = Retry.retry(5, 5.seconds, - Some("Failed to contact the driver server")) { - driverSubmitter.ping() - } - ping onFailure { - case t: Throwable => - logError("Ping failed to the driver server", t) - submitCompletedFuture.setException(t) - kubernetesClient.services().delete(service) - } - val submitComplete = ping.flatMap { _ => - Future { - sparkConf.set("spark.driver.host", pod.getStatus.getPodIP) - val submitRequest = buildSubmissionRequest() - logInfo(s"Submitting local resources to driver pod for application " + - s"$kubernetesAppId ...") - driverSubmitter.submitApplication(submitRequest) - } - } - submitComplete onFailure { - case t: Throwable => - submitCompletedFuture.setException(t) - kubernetesClient.services().delete(service) - } - val adjustServicePort = submitComplete.flatMap { _ => - Future { - // After submitting, adjust the service to only expose the Spark UI - val uiServicePort = new ServicePortBuilder() - .withName(UI_PORT_NAME) - .withPort(uiPort) - .withNewTargetPort(uiPort) - .build() - kubernetesClient.services().withName(kubernetesAppId).edit() - .editSpec() - .withType("ClusterIP") - .withPorts(uiServicePort) - .endSpec() - .done - } - } - adjustServicePort onSuccess { - case _ => - submitCompletedFuture.set(true) - } - adjustServicePort onFailure { - case throwable: Throwable => - submitCompletedFuture.setException(throwable) - kubernetesClient.services().delete(service) - } - } catch { - case e: Throwable => - submitCompletedFuture.setException(e) - Utils.tryLogNonFatalError({ - kubernetesClient.services().delete(service) - }) - throw e - } - case None => - } - } - } - } - - override def onClose(e: KubernetesClientException): Unit = { - if (!submitCompletedFuture.isDone) { - submitCompletedFuture.setException(e) - } - } - } - private def buildSubmitFailedErrorMessage( - kubernetesClient: DefaultKubernetesClient, - e: TimeoutException): String = { + kubernetesClient: KubernetesClient, + e: Throwable): String = { val driverPod = try { kubernetesClient.pods().withName(kubernetesAppId).get() } catch { @@ -606,7 +747,7 @@ private[spark] class Client( node.getSpec.getUnschedulable) .flatMap(_.getStatus.getAddresses.asScala.map(address => { s"$urlScheme://${address.getAddress}:$servicePort" - })).toArray + })).toSet require(nodeUrls.nonEmpty, "No nodes found to contact the driver!") val (trustManager, sslContext): (X509TrustManager, SSLContext) = if (driverSubmitSslOptions.enabled) { @@ -616,8 +757,10 @@ private[spark] class Client( } HttpClientUtil.createClient[KubernetesSparkRestApi]( uris = nodeUrls, + maxRetriesPerServer = 3, sslSocketFactory = sslContext.getSocketFactory, - trustContext = trustManager) + trustContext = trustManager, + connectTimeoutMillis = 5000) } private def buildSslConnectionConfiguration(driverSubmitSslOptions: SSLOptions) = { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala deleted file mode 100644 index 378583b29c547..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Retry.scala +++ /dev/null @@ -1,54 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes - -import scala.concurrent.{ExecutionContext, Future} -import scala.concurrent.duration.Duration - -import org.apache.spark.SparkException -import org.apache.spark.internal.Logging - -private[spark] object Retry extends Logging { - - private def retryableFuture[T] - (attempt: Int, maxAttempts: Int, interval: Duration, retryMessage: Option[String]) - (f: => Future[T]) - (implicit executionContext: ExecutionContext): Future[T] = { - f recoverWith { - case error: Throwable => - if (attempt <= maxAttempts) { - retryMessage.foreach { message => - logWarning(s"$message - attempt $attempt of $maxAttempts", error) - } - Thread.sleep(interval.toMillis) - retryableFuture(attempt + 1, maxAttempts, interval, retryMessage)(f) - } else { - Future.failed(retryMessage.map(message => - new SparkException(s"$message - reached $maxAttempts attempts," + - s" and aborting task.", error) - ).getOrElse(error)) - } - } - } - - def retry[T] - (times: Int, interval: Duration, retryMessage: Option[String] = None) - (f: => T) - (implicit executionContext: ExecutionContext): Future[T] = { - retryableFuture(1, times, interval, retryMessage)(Future[T] { f }) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala index 1cabfbad656eb..576f7058f20ee 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala @@ -32,7 +32,8 @@ import org.apache.spark.status.api.v1.JacksonMessageWriter private[spark] object HttpClientUtil { def createClient[T: ClassTag]( - uris: Array[String], + uris: Set[String], + maxRetriesPerServer: Int = 1, sslSocketFactory: SSLSocketFactory = SSLContext.getDefault.getSocketFactory, trustContext: X509TrustManager = null, readTimeoutMillis: Int = 20000, @@ -45,12 +46,12 @@ private[spark] object HttpClientUtil { .registerModule(new DefaultScalaModule) .setDateFormat(JacksonMessageWriter.makeISODateFormat) objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - val target = new MultiServerFeignTarget[T](uris) + val target = new MultiServerFeignTarget[T](uris.toSeq, maxRetriesPerServer) val baseHttpClient = new feign.okhttp.OkHttpClient(httpClientBuilder.build()) val resetTargetHttpClient = new Client { override def execute(request: Request, options: Options): Response = { val response = baseHttpClient.execute(request, options) - if (response.status() >= 200 && response.status() < 300) { + if (response.status() / 100 == 2) { target.reset() } response diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala index fea7f057cfa1b..51313e00ce2da 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala @@ -20,20 +20,25 @@ import feign.{Request, RequestTemplate, RetryableException, Retryer, Target} import scala.reflect.ClassTag import scala.util.Random +import org.apache.spark.internal.Logging + private[kubernetes] class MultiServerFeignTarget[T : ClassTag]( - private val servers: Seq[String]) extends Target[T] with Retryer { + private val servers: Seq[String], + private val maxRetriesPerServer: Int = 1, + private val delayBetweenRetriesMillis: Int = 1000) extends Target[T] with Retryer with Logging { require(servers.nonEmpty, "Must provide at least one server URI.") private val threadLocalShuffledServers = new ThreadLocal[Seq[String]] { override def initialValue(): Seq[String] = Random.shuffle(servers) } + private val threadLocalCurrentAttempt = new ThreadLocal[Int] { + override def initialValue(): Int = 0 + } override def `type`(): Class[T] = { implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] } - override def url(): String = threadLocalShuffledServers.get.head - /** * Cloning the target is done on every request, for use on the current * thread - thus it's important that clone returns a "fresh" target. @@ -54,14 +59,31 @@ private[kubernetes] class MultiServerFeignTarget[T : ClassTag]( requestTemplate.request() } + override def url(): String = threadLocalShuffledServers.get.head + override def continueOrPropagate(e: RetryableException): Unit = { - threadLocalShuffledServers.set(threadLocalShuffledServers.get.drop(1)) - if (threadLocalShuffledServers.get.isEmpty) { - throw e + threadLocalCurrentAttempt.set(threadLocalCurrentAttempt.get + 1) + val currentAttempt = threadLocalCurrentAttempt.get + if (threadLocalCurrentAttempt.get < maxRetriesPerServer) { + logWarning(s"Attempt $currentAttempt of $maxRetriesPerServer failed for" + + s" server ${url()}. Retrying request...", e) + Thread.sleep(delayBetweenRetriesMillis) + } else { + val previousUrl = url() + threadLocalShuffledServers.set(threadLocalShuffledServers.get.drop(1)) + if (threadLocalShuffledServers.get.isEmpty) { + logError(s"Failed request to all servers $maxRetriesPerServer times.", e) + throw e + } else { + logWarning(s"Failed request to $previousUrl $maxRetriesPerServer times." + + s" Trying to access ${url()} instead.", e) + threadLocalCurrentAttempt.set(0) + } } } def reset(): Unit = { threadLocalShuffledServers.set(Random.shuffle(servers)) + threadLocalCurrentAttempt.set(0) } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index b42f97952394e..736b92cc2d628 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -123,7 +123,7 @@ private[spark] object Minikube extends Logging { .build() val sslContext = SSLUtils.sslContext(kubernetesConf) val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] - HttpClientUtil.createClient[T](Array(url), sslContext.getSocketFactory, trustManager) + HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) } def executeMinikubeSsh(command: String): Unit = { From 1a4395792330842427cc087d937529aa1139825c Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Thu, 9 Feb 2017 17:54:39 -0800 Subject: [PATCH 044/225] Note integration tests require Java 8 (#99) --- resource-managers/kubernetes/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index 5e4ffaa54cb55..92ec305513f42 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -41,6 +41,8 @@ Below is a list of the submodules for this cluster manager and what they do. Note that the integration test framework is currently being heavily revised and is subject to change. +Note that currently the integration tests only run with Java 8. + Running any of the integration tests requires including `kubernetes-integration-tests` profile in the build command. In order to prepare the environment for running the integration tests, the `pre-integration-test` step must be run in Maven on the `resource-managers/kubernetes/integration-tests` module: From 3aba68afb819639429038333b41a19085b68b500 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Fri, 10 Feb 2017 14:50:38 -0800 Subject: [PATCH 045/225] Bumping up kubernetes-client version to fix GKE and local proxy (#105) * Bumping up kubernetes-client version to add fixes * Modify wording * Addressed comments --- docs/running-on-kubernetes.md | 30 ++++++++++++++++++- resource-managers/kubernetes/core/pom.xml | 2 +- .../KubernetesClusterSchedulerBackend.scala | 2 +- 3 files changed, 31 insertions(+), 3 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5a48bb254a6df..19f406039e261 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -51,7 +51,7 @@ connect without SSL on a different port, the master would be set to `k8s://http: Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. - + ### Adding Other JARs Spark allows users to provide dependencies that are bundled into the driver's Docker image, or that are on the local @@ -150,6 +150,34 @@ or `container:`. A scheme of `file:` corresponds to the keyStore being located o the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme `container:`, the file is assumed to already be on the container's disk at the appropriate path. +### Kubernetes Clusters and the authenticated proxy endpoint + +Spark-submit also supports submission through the +[local kubectl proxy](https://kubernetes.io/docs/user-guide/connecting-to-applications-proxy/). One can use the +authenticating proxy to communicate with the api server directly without passing credentials to spark-submit. + +The local proxy can be started by running: + + kubectl proxy + +If our local proxy were listening on port 8001, we would have our submission looking like the following: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.SparkPi \ + --master k8s://http://127.0.0.1:8001 \ + --kubernetes-namespace default \ + --conf spark.executor.instances=5 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ + examples/jars/spark_examples_2.11-2.2.0.jar + +Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. +The above mechanism using `kubectl proxy` can be used when we have authentication providers that the fabric8 +kubernetes-client library does not support. Authentication using X509 Client Certs and oauth tokens +is currently supported. + ### Spark Properties Below are some other common properties that are specific to Kubernetes. Most of the other configurations are the same diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 86d7dec2c076f..a7eba625cd56c 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -29,7 +29,7 @@ Spark Project Kubernetes kubernetes - 1.4.34 + 2.0.3 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 550ddd113fa42..83225098bc651 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -44,7 +44,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_MODIFICATION_LOCK = new Object private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] - private val kubernetesMaster = Client.resolveK8sMaster(sc.master) + private val kubernetesMaster = "https://kubernetes" private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) From 1f2fd806679a4af0ef024bc6e73ec6a74c0c755f Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Fri, 10 Feb 2017 18:38:29 -0800 Subject: [PATCH 046/225] Truncate k8s hostnames to be no longer than 63 characters (#102) * Truncate k8s hostnames to be no longer than 63 characters * Use only executorId not executorKubernetesId --- .../KubernetesClusterSchedulerBackend.scala | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 83225098bc651..d4e7da464be4a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -155,9 +155,14 @@ private[spark] class KubernetesClusterSchedulerBackend( } private def allocateNewExecutorPod(): (String, Pod) = { - val executorKubernetesId = UUID.randomUUID().toString.replaceAll("-", "") val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString - val name = s"${applicationId()}-exec-$executorKubernetesId" + val name = s"${applicationId()}-exec-$executorId" + + // hostname must be no longer than 63 characters, so take the last 63 characters of the pod + // name as the hostname. This preserves uniqueness since the end of name contains + // executorId and applicationId + val hostname = name.substring(Math.max(0, name.length - 63)) + val selectors = Map(SPARK_EXECUTOR_ID_LABEL -> executorId, SPARK_APP_ID_LABEL -> applicationId()).asJava val executorMemoryQuantity = new QuantityBuilder(false) @@ -190,7 +195,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .build() }) try { - (executorKubernetesId, kubernetesClient.pods().createNew() + (executorId, kubernetesClient.pods().createNew() .withNewMetadata() .withName(name) .withLabels(selectors) @@ -204,6 +209,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .endOwnerReference() .endMetadata() .withNewSpec() + .withHostname(hostname) .addNewContainer() .withName(s"executor") .withImage(executorDockerImage) From e239ac7384b13fc6d3d352f872330cd91ccbc3eb Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Mon, 13 Feb 2017 15:43:30 +0000 Subject: [PATCH 047/225] Fixed loading the executors page through the kubectl proxy. (#95) Fix apache-spark-on-k8s/spark#87 --- .../apache/spark/ui/static/executorspage.js | 34 +++++++++++++++++++ 1 file changed, 34 insertions(+) diff --git a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js index d430d8c5fb35a..6137bf7b31984 100644 --- a/core/src/main/resources/org/apache/spark/ui/static/executorspage.js +++ b/core/src/main/resources/org/apache/spark/ui/static/executorspage.js @@ -53,7 +53,28 @@ $(document).ajaxStart(function () { $.blockUI({message: '

    Loading Executors Page...

    '}); }); +function findKubernetesServiceBaseURI() { + var k8sProxyPattern = '/api/v1/proxy/namespaces/'; + var k8sProxyPatternPos = document.baseURI.indexOf(k8sProxyPattern); + if (k8sProxyPatternPos > 0) { + // Spark is running in a kubernetes cluster, and the web ui is served + // through the kubectl proxy. + var remaining = document.baseURI.substr(k8sProxyPatternPos + k8sProxyPattern.length); + var urlSlashesCount = remaining.split('/').length - 3; + var words = document.baseURI.split('/'); + var baseURI = words.slice(0, words.length - urlSlashesCount).join('/'); + return baseURI; + } + + return null; +} + function createTemplateURI(appId) { + var kubernetesBaseURI = findKubernetesServiceBaseURI(); + if (kubernetesBaseURI) { + return kubernetesBaseURI + '/static/executorspage-template.html'; + } + var words = document.baseURI.split('/'); var ind = words.indexOf("proxy"); if (ind > 0) { @@ -69,6 +90,14 @@ function createTemplateURI(appId) { } function getStandAloneppId(cb) { + var kubernetesBaseURI = findKubernetesServiceBaseURI(); + if (kubernetesBaseURI) { + var appIdAndPort = kubernetesBaseURI.split('/').slice(-1)[0]; + var appId = appIdAndPort.split(':')[0]; + cb(appId); + return; + } + var words = document.baseURI.split('/'); var ind = words.indexOf("proxy"); if (ind > 0) { @@ -94,6 +123,11 @@ function getStandAloneppId(cb) { } function createRESTEndPoint(appId) { + var kubernetesBaseURI = findKubernetesServiceBaseURI(); + if (kubernetesBaseURI) { + return kubernetesBaseURI + "/api/v1/applications/" + appId + "/allexecutors"; + } + var words = document.baseURI.split('/'); var ind = words.indexOf("proxy"); if (ind > 0) { From 3a51dbee0b57b737f0a23fd6f40db09619037388 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 13 Feb 2017 12:49:16 -0800 Subject: [PATCH 048/225] Filter nodes to only try and send files to external IPs (#106) * Filter node addresses * Added comment --- .../org/apache/spark/deploy/kubernetes/Client.scala | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 9eed9bfd2cd79..d3aa515484f78 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -745,9 +745,14 @@ private[spark] class Client( val nodeUrls = kubernetesClient.nodes.list.getItems.asScala .filterNot(node => node.getSpec.getUnschedulable != null && node.getSpec.getUnschedulable) - .flatMap(_.getStatus.getAddresses.asScala.map(address => { + .flatMap(_.getStatus.getAddresses.asScala) + // The list contains hostnames, internal and external IP addresses. + // we want only external IP addresses in our list + // (https://kubernetes.io/docs/admin/node/#addresses) + .filter(_.getType == "ExternalIP") + .map(address => { s"$urlScheme://${address.getAddress}:$servicePort" - })).toSet + }).toSet require(nodeUrls.nonEmpty, "No nodes found to contact the driver!") val (trustManager, sslContext): (X509TrustManager, SSLContext) = if (driverSubmitSslOptions.enabled) { From ba6a9e5d31997fd5ae9f507ae94eab3ce6b30d1c Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Mon, 13 Feb 2017 23:18:01 +0000 Subject: [PATCH 049/225] Parse results of minikube status more rigorously (#97) * Parse results of minikube status more rigorously Prior code assumes the minikubeVM status line is always the first row output from minikube status, and it is not when the version upgrade notifier prints an upgrade suggestion message. * Also filter ip response to expected rows --- .../kubernetes/integrationtest/minikube/Minikube.scala | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index 736b92cc2d628..e7eea679adf79 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -19,6 +19,7 @@ package org.apache.spark.deploy.kubernetes.integrationtest.minikube import java.io.{BufferedReader, InputStreamReader} import java.nio.file.Paths import java.util.concurrent.TimeUnit +import java.util.regex.Pattern import javax.net.ssl.X509TrustManager import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} @@ -58,13 +59,17 @@ private[spark] object Minikube extends Logging { def getMinikubeIp: String = synchronized { assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) val outputs = executeMinikube("ip") + .filter(_.matches("^\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}\\.\\d{1,3}$")) assert(outputs.size == 1, "Unexpected amount of output from minikube ip") outputs.head } def getMinikubeStatus: MinikubeStatus.Value = synchronized { assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) - val statusString = executeMinikube("status").head.replaceFirst("minikubeVM: ", "") + val statusString = executeMinikube("status") + .filter(_.contains("minikubeVM: ")) + .head + .replaceFirst("minikubeVM: ", "") MinikubeStatus.unapply(statusString) .getOrElse(throw new IllegalStateException(s"Unknown status $statusString")) } From bab88e07329ce08528d377e4a3b73c5dd24980ba Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Tue, 14 Feb 2017 14:32:54 -0800 Subject: [PATCH 050/225] Adding legacyHostIP to the list of IPs we look at (#114) --- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index d3aa515484f78..279ee505de609 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -747,9 +747,11 @@ private[spark] class Client( node.getSpec.getUnschedulable) .flatMap(_.getStatus.getAddresses.asScala) // The list contains hostnames, internal and external IP addresses. - // we want only external IP addresses in our list // (https://kubernetes.io/docs/admin/node/#addresses) - .filter(_.getType == "ExternalIP") + // we want only external IP addresses and legacyHostIP addresses in our list + // legacyHostIPs are deprecated and will be removed in the future. + // (https://github.com/kubernetes/kubernetes/issues/9267) + .filter(address => address.getType == "ExternalIP" || address.getType == "LegacyHostIP") .map(address => { s"$urlScheme://${address.getAddress}:$servicePort" }).toSet From be4330f950436a56e1bb433f1766f9c3bba9ba2b Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Wed, 15 Feb 2017 01:29:31 +0000 Subject: [PATCH 051/225] Add -DskipTests to dev docs (#115) * Add -DskipTests to dev docs * Remove extraneous skipTests --- resource-managers/kubernetes/README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index 92ec305513f42..25b62ba35a193 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -17,7 +17,7 @@ important matters to keep in mind when developing this feature. To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. For example, to simply compile the Kubernetes core implementation module along with its dependencies: - build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am + build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am -DskipTests To build a distribution of Spark with Kubernetes support, use the `dev/make-distribution.sh` script, and add the `kubernetes` profile as part of the build arguments. Any other build arguments can be specified as one would expect when @@ -47,7 +47,7 @@ Running any of the integration tests requires including `kubernetes-integration- order to prepare the environment for running the integration tests, the `pre-integration-test` step must be run in Maven on the `resource-managers/kubernetes/integration-tests` module: - build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am + build/mvn pre-integration-test -Pkubernetes -Pkubernetes-integration-tests -pl resource-managers/kubernetes/integration-tests -am -DskipTests Afterwards, the integration tests can be executed with Maven or your IDE. Note that when running tests from an IDE, the `pre-integration-test` phase must be run every time the Spark main code changes. When running tests from the From b1d7706e685875af7ecda599bc943bde53c97185 Mon Sep 17 00:00:00 2001 From: Varun Date: Wed, 15 Feb 2017 16:38:46 -0800 Subject: [PATCH 052/225] Shutdown the thread scheduler in LoggingPodStatusWatcher on receiving job finish event notifications (#121) --- .../apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala index cbacaf6bda854..b7a29fedcbd2d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala @@ -64,6 +64,7 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL if (phase == "Succeeded" || phase == "Failed") { podCompletedFuture.countDown() + scheduler.shutdown() } } From 6ea304794ed801daa2f0e59e28f243fc3397313c Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Wed, 15 Feb 2017 17:22:36 -0800 Subject: [PATCH 053/225] Trigger scalatest plugin in the integration-test phase (#93) * Trigger scalatest plugin in the integration-test phase * Clean up unnecessary config section --- .../kubernetes/integration-tests/pom.xml | 27 +++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index f6a322f18cd75..3de10f94c4aca 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -216,6 +216,33 @@ + + + org.scalatest + scalatest-maven-plugin + + + test + + test + + + + (?<!Suite) + + + + integration-test + integration-test + + test + + + +
    From de5a105d3e1c34aa14305b1c3f866beb6e2d2223 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 15 Feb 2017 17:56:35 -0800 Subject: [PATCH 054/225] Fix issue with DNS resolution (#118) * Fix issue with DNS resolution * Address comments --- .../spark/deploy/kubernetes/KubernetesClientBuilder.scala | 5 +++-- .../scala/org/apache/spark/deploy/kubernetes/constants.scala | 1 + .../kubernetes/KubernetesClusterSchedulerBackend.scala | 3 +-- .../kubernetes/integrationtest/minikube/Minikube.scala | 2 +- 4 files changed, 6 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala index 61d3ac17ac34a..89369b30694ee 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala @@ -22,6 +22,8 @@ import com.google.common.base.Charsets import com.google.common.io.Files import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} +import org.apache.spark.deploy.kubernetes.constants._ + private[spark] object KubernetesClientBuilder { private val API_SERVER_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) private val CA_CERT_FILE = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) @@ -33,11 +35,10 @@ private[spark] object KubernetesClientBuilder { * into the pod's disk space. */ def buildFromWithinPod( - kubernetesMaster: String, kubernetesNamespace: String): DefaultKubernetesClient = { var clientConfigBuilder = new ConfigBuilder() .withApiVersion("v1") - .withMasterUrl(kubernetesMaster) + .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) .withNamespace(kubernetesNamespace) if (CA_CERT_FILE.isFile) { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 027cc3c022b4e..688cd858e79ff 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -67,4 +67,5 @@ package object constants { // Miscellaneous private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" private[spark] val KUBERNETES_SUBMIT_SSL_NAMESPACE = "kubernetes.submit" + private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index d4e7da464be4a..898b215b92d04 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -44,7 +44,6 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_MODIFICATION_LOCK = new Object private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] - private val kubernetesMaster = "https://kubernetes" private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) @@ -77,7 +76,7 @@ private[spark] class KubernetesClusterSchedulerBackend( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) private val kubernetesClient = KubernetesClientBuilder - .buildFromWithinPod(kubernetesMaster, kubernetesNamespace) + .buildFromWithinPod(kubernetesNamespace) private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index e7eea679adf79..07274bf962dde 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -76,7 +76,7 @@ private[spark] object Minikube extends Logging { def getDockerEnv: Map[String, String] = synchronized { assert(MINIKUBE_EXECUTABLE_DEST.exists(), EXPECTED_DOWNLOADED_MINIKUBE_MESSAGE) - executeMinikube("docker-env") + executeMinikube("docker-env", "--shell", "bash") .filter(_.startsWith("export")) .map(_.replaceFirst("export ", "").split('=')) .map(arr => (arr(0), arr(1).replaceAllLiterally("\"", ""))) From 81c696882730e866b8b4c0c56f14e02e4604cd7f Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 16 Feb 2017 15:28:42 -0800 Subject: [PATCH 055/225] Change the API contract for uploading local files (#107) * Change the API contract for uploading local jars. This mirrors similarly to what YARN and Mesos expects. * Address comments * Fix test --- .../org/apache/spark/deploy/SparkSubmit.scala | 9 +- .../spark/deploy/SparkSubmitArguments.scala | 14 -- docs/running-on-kubernetes.md | 108 ++-------------- .../launcher/SparkSubmitOptionParser.java | 8 +- .../spark/deploy/kubernetes/Client.scala | 121 ++++++------------ .../spark/deploy/kubernetes/config.scala | 24 ---- .../rest/KubernetesRestProtocolMessages.scala | 4 +- .../rest/kubernetes/KubernetesFileUtils.scala | 44 +++++++ .../KubernetesSparkRestServer.scala | 115 +++++++++++------ .../kubernetes/docker-minimal-bundle/pom.xml | 6 - .../src/main/assembly/driver-assembly.xml | 11 -- .../src/main/assembly/executor-assembly.xml | 11 -- .../kubernetes/integration-tests/pom.xml | 50 ++++++++ .../integrationtest/KubernetesSuite.scala | 87 +++---------- 14 files changed, 244 insertions(+), 368 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 6ec90e7819aee..53098c5085ddc 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -483,10 +483,6 @@ object SparkSubmit extends CommandLineUtils { OptionAssigner(args.kubernetesNamespace, KUBERNETES, ALL_DEPLOY_MODES, sysProp = "spark.kubernetes.namespace"), - OptionAssigner(args.kubernetesUploadJars, KUBERNETES, CLUSTER, - sysProp = "spark.kubernetes.driver.uploads.jars"), - OptionAssigner(args.kubernetesUploadFiles, KUBERNETES, CLUSTER, - sysProp = "spark.kubernetes.driver.uploads.files"), // Other options OptionAssigner(args.executorCores, STANDALONE | YARN, ALL_DEPLOY_MODES, @@ -495,10 +491,11 @@ object SparkSubmit extends CommandLineUtils { sysProp = "spark.executor.memory"), OptionAssigner(args.totalExecutorCores, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.cores.max"), - OptionAssigner(args.files, LOCAL | STANDALONE | MESOS, ALL_DEPLOY_MODES, + OptionAssigner(args.files, LOCAL | STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES, sysProp = "spark.files"), OptionAssigner(args.jars, LOCAL, CLIENT, sysProp = "spark.jars"), - OptionAssigner(args.jars, STANDALONE | MESOS, ALL_DEPLOY_MODES, sysProp = "spark.jars"), + OptionAssigner(args.jars, STANDALONE | MESOS | KUBERNETES, ALL_DEPLOY_MODES, + sysProp = "spark.jars"), OptionAssigner(args.driverMemory, STANDALONE | MESOS | YARN, CLUSTER, sysProp = "spark.driver.memory"), OptionAssigner(args.driverCores, STANDALONE | MESOS | YARN, CLUSTER, diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala index 2b5e8baa7f611..3350987d17a83 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmitArguments.scala @@ -73,8 +73,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S // Kubernetes only var kubernetesNamespace: String = null - var kubernetesUploadJars: String = null - var kubernetesUploadFiles: String = null // Standalone cluster mode only var supervise: Boolean = false @@ -201,12 +199,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S kubernetesNamespace = Option(kubernetesNamespace) .orElse(sparkProperties.get("spark.kubernetes.namespace")) .orNull - kubernetesUploadJars = Option(kubernetesUploadJars) - .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.jars")) - .orNull - kubernetesUploadFiles = Option(kubernetesUploadFiles) - .orElse(sparkProperties.get("spark.kubernetes.driver.uploads.files")) - .orNull // Try to set main class from JAR if no --class argument is given if (mainClass == null && !isPython && !isR && primaryResource != null) { @@ -448,12 +440,6 @@ private[deploy] class SparkSubmitArguments(args: Seq[String], env: Map[String, S case KUBERNETES_NAMESPACE => kubernetesNamespace = value - case KUBERNETES_UPLOAD_JARS => - kubernetesUploadJars = value - - case KUBERNETES_UPLOAD_FILES => - kubernetesUploadFiles = value - case HELP => printUsageAndExit(0) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 19f406039e261..e5c7e9bb69448 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -51,87 +51,15 @@ connect without SSL on a different port, the master would be set to `k8s://http: Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. - -### Adding Other JARs - -Spark allows users to provide dependencies that are bundled into the driver's Docker image, or that are on the local -disk of the submitter's machine. These two types of dependencies are specified via different configuration options to -`spark-submit`: -* Local jars provided by specifying the `--jars` command line argument to `spark-submit`, or by setting `spark.jars` in - the application's configuration, will be treated as jars that are located on the *disk of the driver container*. This - only applies to jar paths that do not specify a scheme or that have the scheme `file://`. Paths with other schemes are - fetched from their appropriate locations. -* Local jars provided by specifying the `--upload-jars` command line argument to `spark-submit`, or by setting - `spark.kubernetes.driver.uploads.jars` in the application's configuration, will be treated as jars that are located on - the *disk of the submitting machine*. These jars are uploaded to the driver docker container before executing the - application. -* A main application resource path that does not have a scheme or that has the scheme `file://` is assumed to be on the - *disk of the submitting machine*. This resource is uploaded to the driver docker container before executing the - application. A remote path can still be specified and the resource will be fetched from the appropriate location. -* A main application resource path that has the scheme `container://` is assumed to be on the *disk of the driver - container*. - -In all of these cases, the jars are placed on the driver's classpath, and are also sent to the executors. Below are some -examples of providing application dependencies. - -To submit an application with both the main resource and two other jars living on the submitting user's machine: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.SampleApplication \ - --master k8s://192.168.99.100 \ - --upload-jars /home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - /home/exampleuser/exampleapplication/main.jar - -Note that since passing the jars through the `--upload-jars` command line argument is equivalent to setting the -`spark.kubernetes.driver.uploads.jars` Spark property, the above will behave identically to this command: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.SampleApplication \ - --master k8s://192.168.99.100 \ - --conf spark.kubernetes.driver.uploads.jars=/home/exampleuser/exampleapplication/dep1.jar,/home/exampleuser/exampleapplication/dep2.jar \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - /home/exampleuser/exampleapplication/main.jar - -To specify a main application resource that can be downloaded from an HTTP service, and if a plugin for that application -is located in the jar `/opt/spark-plugins/app-plugin.jar` on the docker image's disk: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.PluggableApplication \ - --master k8s://192.168.99.100 \ - --jars /opt/spark-plugins/app-plugin.jar \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - http://example.com:8080/applications/sparkpluggable/app.jar - -Note that since passing the jars through the `--jars` command line argument is equivalent to setting the `spark.jars` -Spark property, the above will behave identically to this command: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.PluggableApplication \ - --master k8s://192.168.99.100 \ - --conf spark.jars=file:///opt/spark-plugins/app-plugin.jar \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - http://example.com:8080/applications/sparkpluggable/app.jar - -To specify a main application resource that is in the Docker image, and if it has no other dependencies: - - bin/spark-submit \ - --deploy-mode cluster \ - --class com.example.applications.PluggableApplication \ - --master k8s://192.168.99.100:8443 \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver-custom:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ - container:///home/applications/examples/example.jar +### Dependency Management and Docker Containers +Spark supports specifying JAR paths that are either on the submitting host's disk, or are located on the disk of the +driver and executors. Refer to the [application submission](submitting-applications.html#advanced-dependency-management) +section for details. Note that files specified with the `local` scheme should be added to the container image of both +the driver and the executors. Files without a scheme or with the scheme `file://` are treated as being on the disk of +the submitting machine, and are uploaded to the driver running in Kubernetes before launching the application. + ### Setting Up SSL For Submitting the Driver When submitting to Kubernetes, a pod is started for the driver, and the pod starts an HTTP server. This HTTP server @@ -146,9 +74,9 @@ pod in starting the application, set `spark.ssl.kubernetes.submit.trustStore`. One note about the keyStore is that it can be specified as either a file on the client machine or a file in the container image's disk. Thus `spark.ssl.kubernetes.submit.keyStore` can be a URI with a scheme of either `file:` -or `container:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto +or `local:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme -`container:`, the file is assumed to already be on the container's disk at the appropriate path. +`local:`, the file is assumed to already be on the container's disk at the appropriate path. ### Kubernetes Clusters and the authenticated proxy endpoint @@ -241,24 +169,6 @@ from the other deployment modes. See the [configuration page](configuration.html executor pods from the API server. - - spark.kubernetes.driver.uploads.jars - (none) - - Comma-separated list of jars to send to the driver and all executors when submitting the application in cluster - mode. Refer to adding other jars for more information. - - - - spark.kubernetes.driver.uploads.files - (none) - - Comma-separated list of files to send to the driver and all executors when submitting the application in cluster - mode. The files are added in a flat hierarchy to the current working directory of the driver, having the same - names as the names of the original files. Note that two files with the same name cannot be added, even if they - were in different source directories on the client disk. - - spark.kubernetes.executor.memoryOverhead executorMemory * 0.10, with minimum of 384 diff --git a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java index 3369b5d8301be..a4d43c0795abc 100644 --- a/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java +++ b/launcher/src/main/java/org/apache/spark/launcher/SparkSubmitOptionParser.java @@ -77,10 +77,7 @@ class SparkSubmitOptionParser { protected final String QUEUE = "--queue"; // Kubernetes-only options. - protected final String KUBERNETES_MASTER = "--kubernetes-master"; protected final String KUBERNETES_NAMESPACE = "--kubernetes-namespace"; - protected final String KUBERNETES_UPLOAD_JARS = "--upload-jars"; - protected final String KUBERNETES_UPLOAD_FILES = "--upload-files"; /** * This is the canonical list of spark-submit options. Each entry in the array contains the @@ -121,10 +118,7 @@ class SparkSubmitOptionParser { { REPOSITORIES }, { STATUS }, { TOTAL_EXECUTOR_CORES }, - { KUBERNETES_MASTER }, - { KUBERNETES_NAMESPACE }, - { KUBERNETES_UPLOAD_JARS }, - { KUBERNETES_UPLOAD_FILES } + { KUBERNETES_NAMESPACE } }; /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 279ee505de609..aa273a024f6f9 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -35,7 +35,7 @@ import scala.collection.mutable import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, TarGzippedData, UploadedAppResource} +import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -59,11 +59,10 @@ private[spark] class Client( private val sslSecretsDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId-ssl" private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val uploadedJars = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_JARS).filter(_.nonEmpty) - private val uploadedFiles = sparkConf.get(KUBERNETES_DRIVER_UPLOAD_FILES).filter(_.nonEmpty) - uploadedFiles.foreach(validateNoDuplicateUploadFileNames) private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) + private val sparkFiles = sparkConf.getOption("spark.files") + private val sparkJars = sparkConf.getOption("spark.jars") private val waitForAppCompletion: Boolean = sparkConf.get(WAIT_FOR_APP_COMPLETION) @@ -78,9 +77,18 @@ private[spark] class Client( def run(): Unit = { logInfo(s"Starting application $kubernetesAppId in Kubernetes...") - - Seq(uploadedFiles, uploadedJars, Some(mainAppResource)).foreach(checkForFilesExistence) - + val submitterLocalFiles = KubernetesFileUtils.getOnlySubmitterLocalFiles(sparkFiles) + val submitterLocalJars = KubernetesFileUtils.getOnlySubmitterLocalFiles(sparkJars) + (submitterLocalFiles ++ submitterLocalJars).foreach { file => + if (!new File(Utils.resolveURI(file).getPath).isFile) { + throw new SparkException(s"File $file does not exist or is a directory.") + } + } + if (KubernetesFileUtils.isUriLocalFile(mainAppResource) && + !new File(Utils.resolveURI(mainAppResource).getPath).isFile) { + throw new SparkException(s"Main app resource file $mainAppResource is not a file or" + + s" is a directory.") + } val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() val parsedCustomLabels = parseCustomLabels(customLabels) var k8ConfBuilder = new K8SConfigBuilder() @@ -145,7 +153,7 @@ private[spark] class Client( } try { submitApplicationToDriverServer(kubernetesClient, driverSubmitSslOptions, - ownerReferenceConfiguredDriverService) + ownerReferenceConfiguredDriverService, submitterLocalFiles, submitterLocalJars) // wait if configured to do so if (waitForAppCompletion) { logInfo(s"Waiting for application $kubernetesAppId to finish...") @@ -193,7 +201,9 @@ private[spark] class Client( private def submitApplicationToDriverServer( kubernetesClient: KubernetesClient, driverSubmitSslOptions: SSLOptions, - driverService: Service) = { + driverService: Service, + submitterLocalFiles: Iterable[String], + submitterLocalJars: Iterable[String]): Unit = { sparkConf.getOption("spark.app.id").foreach { id => logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + s" overridden as $kubernetesAppId") @@ -211,7 +221,7 @@ private[spark] class Client( driverSubmitter.ping() logInfo(s"Submitting local resources to driver pod for application " + s"$kubernetesAppId ...") - val submitRequest = buildSubmissionRequest() + val submitRequest = buildSubmissionRequest(submitterLocalFiles, submitterLocalJars) driverSubmitter.submitApplication(submitRequest) logInfo("Successfully submitted local resources and driver configuration to" + " driver pod.") @@ -502,25 +512,18 @@ private[spark] class Client( val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_KEYSTORE) val resolvedSparkConf = sparkConf.clone() val (isLocalKeyStore, resolvedKeyStore) = maybeKeyStore.map(keyStore => { - val keyStoreURI = Utils.resolveURI(keyStore) - val isProvidedKeyStoreLocal = keyStoreURI.getScheme match { - case "file" | null => true - case "container" => false - case _ => throw new SparkException(s"Invalid KeyStore URI $keyStore; keyStore URI" + - " for submit server must have scheme file:// or container:// (no scheme defaults" + - " to file://)") - } - (isProvidedKeyStoreLocal, Option.apply(keyStoreURI.getPath)) - }).getOrElse((true, Option.empty[String])) + (KubernetesFileUtils.isUriLocalFile(keyStore), + Option.apply(Utils.resolveURI(keyStore).getPath)) + }).getOrElse((false, Option.empty[String])) resolvedKeyStore.foreach { resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_KEYSTORE, _) } sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE).foreach { trustStore => - val trustStoreURI = Utils.resolveURI(trustStore) - trustStoreURI.getScheme match { - case "file" | null => - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, trustStoreURI.getPath) - case _ => throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + + if (KubernetesFileUtils.isUriLocalFile(trustStore)) { + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, + Utils.resolveURI(trustStore).getPath) + } else { + throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + " for submit server must have no scheme, or scheme file://") } } @@ -673,23 +676,24 @@ private[spark] class Client( .build()) } - private def buildSubmissionRequest(): KubernetesCreateSubmissionRequest = { - val appResourceUri = Utils.resolveURI(mainAppResource) - val resolvedAppResource: AppResource = appResourceUri.getScheme match { - case "file" | null => - val appFile = new File(appResourceUri.getPath) - if (!appFile.isFile) { - throw new IllegalStateException("Provided local file path does not exist" + - s" or is not a file: ${appFile.getAbsolutePath}") - } + private def buildSubmissionRequest( + submitterLocalFiles: Iterable[String], + submitterLocalJars: Iterable[String]): KubernetesCreateSubmissionRequest = { + val mainResourceUri = Utils.resolveURI(mainAppResource) + val resolvedAppResource: AppResource = Option(mainResourceUri.getScheme) + .getOrElse("file") match { + case "file" => + val appFile = new File(mainResourceUri.getPath) val fileBytes = Files.toByteArray(appFile) val fileBase64 = Base64.encodeBase64String(fileBytes) UploadedAppResource(resourceBase64Contents = fileBase64, name = appFile.getName) - case "container" => ContainerAppResource(appResourceUri.getPath) + case "local" => ContainerAppResource(mainAppResource) case other => RemoteAppResource(other) } - val uploadJarsBase64Contents = compressFiles(uploadedJars) - val uploadFilesBase64Contents = compressFiles(uploadedFiles) + val uploadFilesBase64Contents = CompressionUtils.createTarGzip(submitterLocalFiles.map( + Utils.resolveURI(_).getPath)) + val uploadJarsBase64Contents = CompressionUtils.createTarGzip(submitterLocalJars.map( + Utils.resolveURI(_).getPath)) KubernetesCreateSubmissionRequest( appResource = resolvedAppResource, mainClass = mainClass, @@ -700,33 +704,6 @@ private[spark] class Client( uploadedFilesBase64Contents = uploadFilesBase64Contents) } - // Because uploaded files should be added to the working directory of the driver, they - // need to not have duplicate file names. They are added to the working directory so the - // user can reliably locate them in their application. This is similar in principle to how - // YARN handles its `spark.files` setting. - private def validateNoDuplicateUploadFileNames(uploadedFilesCommaSeparated: String): Unit = { - val pathsWithDuplicateNames = uploadedFilesCommaSeparated - .split(",") - .groupBy(new File(_).getName) - .filter(_._2.length > 1) - if (pathsWithDuplicateNames.nonEmpty) { - val pathsWithDuplicateNamesSorted = pathsWithDuplicateNames - .values - .flatten - .toList - .sortBy(new File(_).getName) - throw new SparkException("Cannot upload files with duplicate names via" + - s" ${KUBERNETES_DRIVER_UPLOAD_FILES.key}. The following paths have a duplicated" + - s" file name: ${pathsWithDuplicateNamesSorted.mkString(",")}") - } - } - - private def compressFiles(maybeFilePaths: Option[String]): Option[TarGzippedData] = { - maybeFilePaths - .map(_.split(",")) - .map(CompressionUtils.createTarGzip(_)) - } - private def buildDriverSubmissionClient( kubernetesClient: KubernetesClient, service: Service, @@ -813,22 +790,6 @@ private[spark] class Client( }).toMap }).getOrElse(Map.empty[String, String]) } - - private def checkForFilesExistence(maybePaths: Option[String]): Unit = { - maybePaths.foreach { paths => - paths.split(",").foreach { path => - val uri = Utils.resolveURI(path) - uri.getScheme match { - case "file" | null => - val file = new File(uri.getPath) - if (!file.isFile) { - throw new SparkException(s"""file "${uri}" does not exist!""") - } - case _ => - } - } - } - } } private[spark] object Client extends Logging { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index cb4cd42142ca4..ad83b0446538e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -91,30 +91,6 @@ package object config { .stringConf .createWithDefault("default") - private[spark] val KUBERNETES_DRIVER_UPLOAD_JARS = - ConfigBuilder("spark.kubernetes.driver.uploads.jars") - .doc(""" - | Comma-separated list of jars to send to the driver and - | all executors when submitting the application in cluster - | mode. - """.stripMargin) - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_UPLOAD_FILES = - ConfigBuilder("spark.kubernetes.driver.uploads.files") - .doc(""" - | Comma-separated list of files to send to the driver and - | all executors when submitting the application in cluster - | mode. The files are added in a flat hierarchy to the - | current working directory of the driver, having the same - | names as the names of the original files. Note that two - | files with the same name cannot be added, even if they - | were in different source directories on the client disk. - """.stripMargin) - .stringConf - .createOptional - // Note that while we set a default for this when we start up the // scheduler, the specific default value is dynamically determined // based on the executor memory. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 6aeb851a16bf4..0d2d1a1c6f5e3 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -26,8 +26,8 @@ case class KubernetesCreateSubmissionRequest( appArgs: Array[String], sparkProperties: Map[String, String], secret: String, - uploadedJarsBase64Contents: Option[TarGzippedData], - uploadedFilesBase64Contents: Option[TarGzippedData]) extends SubmitRestProtocolRequest { + uploadedJarsBase64Contents: TarGzippedData, + uploadedFilesBase64Contents: TarGzippedData) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala new file mode 100644 index 0000000000000..f30be1535f81c --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala @@ -0,0 +1,44 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import org.apache.spark.util.Utils + +private[spark] object KubernetesFileUtils { + + private def filterUriStringsByScheme( + uris: Iterable[String], schemeFilter: (String => Boolean)): Iterable[String] = { + uris.filter(uri => schemeFilter(Option(Utils.resolveURI(uri).getScheme).getOrElse("file"))) + } + + def getNonSubmitterLocalFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, _ != "file") + } + + def getOnlyContainerLocalFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, _ == "local") + } + + def getOnlySubmitterLocalFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, _ == "file") + } + + def isUriLocalFile(uri: String): Boolean = { + Option(Utils.resolveURI(uri).getScheme).getOrElse("file") == "file" + } + +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index c5a7e27b15927..f0b01b2320982 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -16,14 +16,14 @@ */ package org.apache.spark.deploy.rest.kubernetes -import java.io.File +import java.io.{File, FileOutputStream, StringReader} import java.net.URI import java.nio.file.Paths import java.util.concurrent.CountDownLatch import javax.servlet.http.{HttpServletRequest, HttpServletResponse} import com.google.common.base.Charsets -import com.google.common.io.Files +import com.google.common.io.{BaseEncoding, ByteStreams, Files} import org.apache.commons.codec.binary.Base64 import scala.collection.mutable import scala.collection.mutable.ArrayBuffer @@ -145,46 +145,73 @@ private[spark] class KubernetesSparkRestServer( } else { requestMessage match { case KubernetesCreateSubmissionRequest( - appResource, - mainClass, - appArgs, - sparkProperties, - secret, - uploadedJars, - uploadedFiles) => + appResource, + mainClass, + appArgs, + sparkProperties, + secret, + uploadedJars, + uploadedFiles) => val decodedSecret = Base64.decodeBase64(secret) if (!expectedApplicationSecret.sameElements(decodedSecret)) { responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) handleError("Unauthorized to submit application.") } else { val tempDir = Utils.createTempDir() - val appResourcePath = resolvedAppResource(appResource, tempDir) + val resolvedAppResource = resolveAppResource(appResource, tempDir) val writtenJars = writeUploadedJars(uploadedJars, tempDir) val writtenFiles = writeUploadedFiles(uploadedFiles) val resolvedSparkProperties = new mutable.HashMap[String, String] resolvedSparkProperties ++= sparkProperties - - // Resolve driver classpath and jars val originalJars = sparkProperties.get("spark.jars") .map(_.split(",")) - .getOrElse(Array.empty[String]) - val resolvedJars = writtenJars ++ originalJars ++ Array(appResourcePath) - val sparkJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) + .getOrElse(Array.empty) + + // The driver at this point has handed us the value of spark.jars verbatim as + // specified in spark-submit. At this point, remove all jars that were local + // to the submitting user's disk, and replace them with the paths that were + // written to disk above. + val onlyContainerLocalOrRemoteJars = KubernetesFileUtils + .getNonSubmitterLocalFiles(originalJars) + val resolvedJars = (writtenJars ++ + onlyContainerLocalOrRemoteJars ++ + Array(resolvedAppResource.sparkJarPath)).toSet + if (resolvedJars.nonEmpty) { + resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + } else { + resolvedSparkProperties.remove("spark.jars") + } + + // Determining the driver classpath is similar. It's the combination of: + // - Jars written from uploads + // - Jars in (spark.jars + mainAppResource) that has a "local" prefix + // - spark.driver.extraClasspath + // - Spark core jars from the installation + val sparkCoreJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) val driverExtraClasspath = sparkProperties .get("spark.driver.extraClassPath") .map(_.split(",")) .getOrElse(Array.empty[String]) + val onlyContainerLocalJars = KubernetesFileUtils + .getOnlyContainerLocalFiles(originalJars) val driverClasspath = driverExtraClasspath ++ - resolvedJars ++ - sparkJars - resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") + Seq(resolvedAppResource.localPath) ++ + writtenJars ++ + onlyContainerLocalJars ++ + sparkCoreJars - // Resolve spark.files + // Resolve spark.files similarly to spark.jars. val originalFiles = sparkProperties.get("spark.files") .map(_.split(",")) .getOrElse(Array.empty[String]) - val resolvedFiles = originalFiles ++ writtenFiles - resolvedSparkProperties("spark.files") = resolvedFiles.mkString(",") + val onlyContainerLocalOrRemoteFiles = KubernetesFileUtils + .getNonSubmitterLocalFiles(originalFiles) + val resolvedFiles = writtenFiles ++ onlyContainerLocalOrRemoteFiles + if (resolvedFiles.nonEmpty) { + resolvedSparkProperties("spark.files") = resolvedFiles.mkString(",") + } else { + resolvedSparkProperties.remove("spark.files") + } val command = new ArrayBuffer[String] command += javaExecutable @@ -235,35 +262,50 @@ private[spark] class KubernetesSparkRestServer( } } - private def writeUploadedJars(files: Option[TarGzippedData], rootTempDir: File): + private def writeUploadedJars(jars: TarGzippedData, rootTempDir: File): Seq[String] = { val resolvedDirectory = new File(rootTempDir, "jars") if (!resolvedDirectory.mkdir()) { throw new IllegalStateException(s"Failed to create jars dir at " + resolvedDirectory.getAbsolutePath) } - writeBase64ContentsToFiles(files, resolvedDirectory) + CompressionUtils.unpackAndWriteCompressedFiles(jars, resolvedDirectory) } - private def writeUploadedFiles(files: Option[TarGzippedData]): Seq[String] = { + private def writeUploadedFiles(files: TarGzippedData): Seq[String] = { val workingDir = Paths.get("").toFile.getAbsoluteFile - writeBase64ContentsToFiles(files, workingDir) + CompressionUtils.unpackAndWriteCompressedFiles(files, workingDir) } - def resolvedAppResource(appResource: AppResource, tempDir: File): String = { - val appResourcePath = appResource match { + + /** + * Retrieve the path on the driver container where the main app resource is, and what value it + * ought to have in the spark.jars property. The two may be different because for non-local + * dependencies, we have to fetch the resource (if it is not "local") but still want to use + * the full URI in spark.jars. + */ + private def resolveAppResource(appResource: AppResource, tempDir: File): + ResolvedAppResource = { + appResource match { case UploadedAppResource(resourceContentsBase64, resourceName) => val resourceFile = new File(tempDir, resourceName) val resourceFilePath = resourceFile.getAbsolutePath if (resourceFile.createNewFile()) { - val resourceContentsBytes = Base64.decodeBase64(resourceContentsBase64) - Files.write(resourceContentsBytes, resourceFile) - resourceFile.getAbsolutePath + Utils.tryWithResource(new StringReader(resourceContentsBase64)) { reader => + Utils.tryWithResource(new FileOutputStream(resourceFile)) { os => + Utils.tryWithResource(BaseEncoding.base64().decodingStream(reader)) { + decodingStream => + ByteStreams.copy(decodingStream, os) + } + } + } + ResolvedAppResource(resourceFile.getAbsolutePath, resourceFile.getAbsolutePath) } else { throw new IllegalStateException(s"Failed to write main app resource file" + s" to $resourceFilePath") } - case ContainerAppResource(resource) => resource + case ContainerAppResource(resource) => + ResolvedAppResource(Utils.resolveURI(resource).getPath, resource) case RemoteAppResource(resource) => Utils.fetchFile(resource, tempDir, conf, securityManager, SparkHadoopUtil.get.newConfiguration(conf), @@ -275,19 +317,12 @@ private[spark] class KubernetesSparkRestServer( throw new IllegalStateException(s"Main app resource is not a file or" + s" does not exist at $downloadedFilePath") } - downloadedFilePath + ResolvedAppResource(downloadedFilePath, resource) } - appResourcePath } } - private def writeBase64ContentsToFiles( - maybeCompressedFiles: Option[TarGzippedData], - rootDir: File): Seq[String] = { - maybeCompressedFiles.map { compressedFiles => - CompressionUtils.unpackAndWriteCompressedFiles(compressedFiles, rootDir) - }.getOrElse(Seq.empty[String]) - } + private case class ResolvedAppResource(localPath: String, sparkJarPath: String) } private[spark] object KubernetesSparkRestServer { diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 0ec2f36075db3..7f4d935e0e243 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -44,12 +44,6 @@ pom - - org.apache.spark - spark-examples_${scala.binary.version} - ${project.version} - provided - + + 4.0.0 + + org.apache.spark + spark-parent_2.11 + 2.1.0-k8s-0.3.0-SNAPSHOT + ../../pom.xml + + + java8-tests_2.11 + pom + Spark Project Java 8 Tests + + + java8-tests + + + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} + test-jar + test + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-streaming_${scala.binary.version} + ${project.version} + test-jar + test + + + org.apache.spark + spark-sql_${scala.binary.version} + ${project.version} + + + org.apache.spark + spark-sql_${scala.binary.version} + ${project.version} + test-jar + test + + + org.apache.spark + spark-tags_${scala.binary.version} + + + + + org.apache.spark + spark-tags_${scala.binary.version} + test-jar + test + + + + + + + + org.apache.maven.plugins + maven-deploy-plugin + + true + + + + org.apache.maven.plugins + maven-install-plugin + + true + + + + org.apache.maven.plugins + maven-compiler-plugin + + true + 1.8 + 1.8 + 1.8 + + + + net.alchim31.maven + scala-maven-plugin + + ${useZincForJdk8} + + -source + 1.8 + -target + 1.8 + -Xlint:all,-serial,-path + + + + + + diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 27b1bfcfa7dcc..07dfe3727205c 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 6bcbb612fef77..f8ae23dc348ce 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 2df99403840ee..3bbb59c47fb5c 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml index 0e93b75f67ca1..e92964c8d101f 100644 --- a/external/kafka-0-8-assembly/pom.xml +++ b/external/kafka-0-8-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml index e17b960c9a5b8..f28c98dba6819 100644 --- a/external/kafka-0-8/pom.xml +++ b/external/kafka-0-8/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 73852fc4c7656..aaa95e5d632bb 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 2f761fbcda2d4..528a90eab53de 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 22fe1dca3343e..5ea9c5e9cff75 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index df69c5e58727a..5f4f9c88a55a5 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index d3cb2dce3fab5..008d9b3bc9e5d 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 996763ad6c256..598fe1f5fe5c7 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index af032ed035f97..e0580c5ff2ecb 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 8d7398c167712..d6eecc5605338 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 2a5d2f4354ecf..928d8053a14ec 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 1c26af6593d37..985ffd08f3fc7 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-SNAPSHOT + 2.1.0-k8s-0.1.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 7f4d935e0e243..e9f88e37a5f89 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-SNAPSHOT + 2.1.0-k8s-0.1.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index f99838636b349..b9c29b26eb648 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-SNAPSHOT + 2.1.0-k8s-0.1.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 59e59aca5109b..16dd0c9322c13 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-SNAPSHOT + 2.1.0-k8s-0.1.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 03c713b6bc068..5c54d0e5e3aab 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-SNAPSHOT + 2.1.0-k8s-0.1.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index f94ff4e925e08..81042ad683512 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 72f891f7c10bd..942d3be645dd3 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 722e362943e26..6b8f730b95c00 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 84c82f6b86ef8..bcc3153a264b9 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index ab5593da0d655..a14d2c1196bc5 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index f0ef6779a4742..0c4b8def896cd 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index bed07015e4540..ee611f9b6d299 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 19b44577ca124..10ca3a5d7d0d6 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0 + 2.1.0-k8s-0.3.0-SNAPSHOT ../pom.xml From 0a5c4d5ef509b71ba9fa1e9d61d539da8b7e206d Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Wed, 15 Mar 2017 16:19:48 -0700 Subject: [PATCH 076/225] Exclude flaky ExternalShuffleServiceSuite from Travis (#185) --- .travis.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.travis.yml b/.travis.yml index 87bc84645ca7d..b9ae28a421309 100644 --- a/.travis.yml +++ b/.travis.yml @@ -35,7 +35,7 @@ env: # Used by the install section below. - PHASE=test \ PROFILES="-Pmesos -Pyarn -Phadoop-2.7 -Pkubernetes" \ MODULES="-pl core,resource-managers/kubernetes/core -am" \ - ARGS="-Dtest=none -Dsuffixes='^org\.apache\.spark\.(?!SortShuffleSuite$|rdd\.LocalCheckpointSuite$|deploy\.SparkSubmitSuite$|deploy\.StandaloneDynamicAllocationSuite$).*'" + ARGS="-Dtest=none -Dsuffixes='^org\.apache\.spark\.(?!ExternalShuffleServiceSuite|SortShuffleSuite$|rdd\.LocalCheckpointSuite$|deploy\.SparkSubmitSuite$|deploy\.StandaloneDynamicAllocationSuite$).*'" # Configure the full build. - PHASE=install \ PROFILES="-Pmesos -Pyarn -Phadoop-2.7 -Pkubernetes -Pkinesis-asl -Phive -Phive-thriftserver" \ From 73a0de34142df0265a815ac93c1a2ddc4ac8f8d2 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 8 Mar 2017 12:27:54 -0700 Subject: [PATCH 077/225] Docs improvements (#176) * Adding official alpha docker image to docs * Reorder sections and create a specific one for "advanced" * Provide limitations and instructions about running on GKE * Fix title of advanced section: submission * Improved section on running in the cloud * Update versioning * Address comments * Address comments (cherry picked from commit e5da90dc57a831d99adef9d31b20893de57cae4d) --- docs/running-on-kubernetes-cloud.md | 24 ++++++++ docs/running-on-kubernetes.md | 76 ++++++++++++++++---------- resource-managers/kubernetes/README.md | 8 +++ 3 files changed, 78 insertions(+), 30 deletions(-) create mode 100644 docs/running-on-kubernetes-cloud.md diff --git a/docs/running-on-kubernetes-cloud.md b/docs/running-on-kubernetes-cloud.md new file mode 100644 index 0000000000000..244c64d696ab3 --- /dev/null +++ b/docs/running-on-kubernetes-cloud.md @@ -0,0 +1,24 @@ +--- +layout: global +title: Running Spark in the cloud with Kubernetes +--- + +For general information about running Spark on Kubernetes, refer to [running Spark on Kubernetes](running-on-kubernetes.md). + +A Kubernetes cluster may be brought up on different cloud providers or on premise. It is commonly provisioned through [Google Container Engine](https://cloud.google.com/container-engine/), or using [kops](https://github.com/kubernetes/kops) on AWS, or on premise using [kubeadm](https://kubernetes.io/docs/getting-started-guides/kubeadm/). + +## Running on Google Container Engine (GKE) + +* Create a GKE [container cluster](https://cloud.google.com/container-engine/docs/clusters/operations). +* Obtain kubectl and [configure](https://cloud.google.com/container-engine/docs/clusters/operations#configuring_kubectl) it appropriately. +* Find the identity of the master associated with this project. + + > kubectl cluster-info + Kubernetes master is running at https://:443 + +* Run spark-submit with the master option set to `k8s://https://:443`. The instructions for running spark-submit are provided in the [running on kubernetes](running-on-kubernetes.md) tutorial. +* Check that your driver pod, and subsequently your executor pods are launched using `kubectl get pods`. +* Read the stdout and stderr of the driver pod using `kubectl logs `, or stream the logs using `kubectl logs -f `. + +Known issues: +* If you face OAuth token expiry errors when you run spark-submit, it is likely because the token needs to be refreshed. The easiest way to fix this is to run any `kubectl` command, say, `kubectl version` and then retry your submission. diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 27ddc4b04062f..73c28ec69919b 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -12,15 +12,28 @@ currently limited and not well-tested. This should not be used in production env * You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), [nodes](https://kubernetes.io/docs/admin/node/) and [services](https://kubernetes.io/docs/user-guide/services/) in your cluster. You can verify that you can list these resources by running `kubectl get nodes`, `kubectl get pods` and `kubectl get svc` which should give you a list of nodes, pods and services (if any) respectively. * You must have an extracted spark distribution with Kubernetes support, or build one from [source](https://github.com/apache-spark-on-k8s/spark). -## Setting Up Docker Images +## Driver & Executor Images Kubernetes requires users to supply images that can be deployed into containers within pods. The images are built to be run in a container runtime environment that Kubernetes supports. Docker is a container runtime environment that is frequently used with Kubernetes, so Spark provides some support for working with Docker to get started quickly. -To use Spark on Kubernetes with Docker, images for the driver and the executors need to built and published to an -accessible Docker registry. Spark distributions include the Docker files for the driver and the executor at -`dockerfiles/driver/Dockerfile` and `docker/executor/Dockerfile`, respectively. Use these Docker files to build the +If you wish to use pre-built docker images, you may use the images published in [kubespark](https://hub.docker.com/u/kubespark/). The images are as follows: + + + + + + + + + + + +
    ComponentImage
    Spark Driver Imagekubespark/spark-driver:v2.1.0-k8s-support-0.1.0-alpha.1
    Spark Executor Imagekubespark/spark-executor:v2.1.0-k8s-support-0.1.0-alpha.1
    + +You may also build these docker images from sources, or customize them as required. Spark distributions include the Docker files for the driver and the executor at +`dockerfiles/driver/Dockerfile` and `dockerfiles/executor/Dockerfile`, respectively. Use these Docker files to build the Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images to the registry. @@ -44,8 +57,8 @@ are set up as described above: --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-k8s-support-0.1.0-alpha.1 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-k8s-support-0.1.0-alpha.1 \ examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting @@ -55,7 +68,6 @@ being contacted at `api_server_url`. If no HTTP protocol is specified in the URL setting the master to `k8s://example.com:443` is equivalent to setting it to `k8s://https://example.com:443`, but to connect without SSL on a different port, the master would be set to `k8s://http://example.com:8443`. - If you have a Kubernetes cluster setup, one way to discover the apiserver URL is by executing `kubectl cluster-info`. > kubectl cluster-info @@ -67,33 +79,17 @@ In the above example, the specific Kubernetes cluster can be used with spark sub Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. -### Dependency Management and Docker Containers +### Specifying input files Spark supports specifying JAR paths that are either on the submitting host's disk, or are located on the disk of the driver and executors. Refer to the [application submission](submitting-applications.html#advanced-dependency-management) section for details. Note that files specified with the `local://` scheme should be added to the container image of both the driver and the executors. Files without a scheme or with the scheme `file://` are treated as being on the disk of the submitting machine, and are uploaded to the driver running in Kubernetes before launching the application. - -### Setting Up SSL For Submitting the Driver -When submitting to Kubernetes, a pod is started for the driver, and the pod starts an HTTP server. This HTTP server -receives the driver's configuration, including uploaded driver jars, from the client before starting the application. -Spark supports using SSL to encrypt the traffic in this bootstrapping process. It is recommended to configure this -whenever possible. +### Accessing Kubernetes Clusters -See the [security page](security.html) and [configuration](configuration.html) sections for more information on -configuring SSL; use the prefix `spark.ssl.kubernetes.submit` in configuring the SSL-related fields in the context -of submitting to Kubernetes. For example, to set the trustStore used when the local machine communicates with the driver -pod in starting the application, set `spark.ssl.kubernetes.submit.trustStore`. - -One note about the keyStore is that it can be specified as either a file on the client machine or a file in the -container image's disk. Thus `spark.ssl.kubernetes.submit.keyStore` can be a URI with a scheme of either `file:` -or `local:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto -the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme -`local:`, the file is assumed to already be on the container's disk at the appropriate path. - -### Kubernetes Clusters and the authenticated proxy endpoint +For details about running on public cloud environments, such as Google Container Engine (GKE), refer to [running Spark in the cloud with Kubernetes](running-on-kubernetes-cloud.md). Spark-submit also supports submission through the [local kubectl proxy](https://kubernetes.io/docs/user-guide/accessing-the-cluster/#using-kubectl-proxy). One can use the @@ -112,16 +108,36 @@ If our local proxy were listening on port 8001, we would have our submission loo --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=registry-host:5000/spark-driver:latest \ - --conf spark.kubernetes.executor.docker.image=registry-host:5000/spark-executor:latest \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-k8s-support-0.1.0-alpha.1 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-k8s-support-0.1.0-alpha.1 \ examples/jars/spark_examples_2.11-2.2.0.jar Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. The above mechanism using `kubectl proxy` can be used when we have authentication providers that the fabric8 -kubernetes-client library does not support. Authentication using X509 Client Certs and oauth tokens +kubernetes-client library does not support. Authentication using X509 Client Certs and OAuth tokens is currently supported. -### Determining the Driver Base URI +## Advanced + +### Setting Up SSL For Submitting the Driver + +When submitting to Kubernetes, a pod is started for the driver, and the pod starts an HTTP server. This HTTP server +receives the driver's configuration, including uploaded driver jars, from the client before starting the application. +Spark supports using SSL to encrypt the traffic in this bootstrapping process. It is recommended to configure this +whenever possible. + +See the [security page](security.html) and [configuration](configuration.html) sections for more information on +configuring SSL; use the prefix `spark.ssl.kubernetes.submit` in configuring the SSL-related fields in the context +of submitting to Kubernetes. For example, to set the trustStore used when the local machine communicates with the driver +pod in starting the application, set `spark.ssl.kubernetes.submit.trustStore`. + +One note about the keyStore is that it can be specified as either a file on the client machine or a file in the +container image's disk. Thus `spark.ssl.kubernetes.submit.keyStore` can be a URI with a scheme of either `file:` +or `local:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto +the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme +`local:`, the file is assumed to already be on the container's disk at the appropriate path. + +### Submission of Local Files through Ingress/External controller Kubernetes pods run with their own IP address space. If Spark is run in cluster mode, the driver pod may not be accessible to the submitter. However, the submitter needs to send local dependencies from its local disk to the driver diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index 25b62ba35a193..d70c38fdc64d5 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -53,6 +53,14 @@ Afterwards, the integration tests can be executed with Maven or your IDE. Note t `pre-integration-test` phase must be run every time the Spark main code changes. When running tests from the command line, the `pre-integration-test` phase should automatically be invoked if the `integration-test` phase is run. +After the above step, the integration test can be run using the following command: + +```sh +build/mvn integration-test \ + -Pkubernetes -Pkubernetes-integration-tests \ + -pl resource-managers/kubernetes/integration-tests -am +``` + # Preserve the Minikube VM The integration tests make use of [Minikube](https://github.com/kubernetes/minikube), which fires up a virtual machine From face1f43d4fb764cc2b12e620e34a19ac568e2a8 Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Wed, 8 Mar 2017 12:31:41 -0800 Subject: [PATCH 078/225] Add Apache license to a few files (#175) * Add Apache license to a few files * Ignore license check on META-INF service (cherry picked from commit 2a6143804b0f3112b1aefb90f2aa3efb1e04b525) --- dev/.rat-excludes | 1 + ....deploy.rest.kubernetes.DriverServiceManager | 2 +- .../src/main/docker/driver/Dockerfile | 17 +++++++++++++++++ .../src/main/docker/executor/Dockerfile | 17 +++++++++++++++++ 4 files changed, 36 insertions(+), 1 deletion(-) diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 2355d40d1e6fe..8998e752a9bd8 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -106,3 +106,4 @@ org.apache.spark.deploy.yarn.security.ServiceCredentialProvider spark-warehouse structured-streaming/* kafka-source-initial-offset-version-2.1.0.bin +org.apache.spark.deploy.rest.kubernetes.DriverServiceManager diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager index 5a306335b4166..56203ee38ac99 100644 --- a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager +++ b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager @@ -1,2 +1,2 @@ org.apache.spark.deploy.rest.kubernetes.ExternalSuppliedUrisDriverServiceManager -org.apache.spark.deploy.rest.kubernetes.NodePortUrisDriverServiceManager \ No newline at end of file +org.apache.spark.deploy.rest.kubernetes.NodePortUrisDriverServiceManager diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 92fdfb8ac5f41..3bf6b50ff69c1 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -1,3 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + FROM openjdk:8-alpine # If this docker file is being used in the context of building your images from a Spark distribution, the docker build diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index a225110d55c14..cd5ac466a1fa0 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -1,3 +1,20 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + FROM openjdk:8-alpine # If this docker file is being used in the context of building your images from a Spark distribution, the docker build From 804d0f802430cc8087a652b674ac4da6aca9794f Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 8 Mar 2017 14:26:06 -0700 Subject: [PATCH 079/225] Adding clarification pre-alpha (#181) (cherry picked from commit be109ab952dee94c91147641a2da933ee4d1bd42) --- docs/running-on-kubernetes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 73c28ec69919b..bc0d89bf13d5e 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -10,7 +10,7 @@ currently limited and not well-tested. This should not be used in production env * You must have a running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/user-guide/prereqs/). If you do not already have a working Kubernetes cluster, you may setup a test cluster on your local machine using [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). * You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), [nodes](https://kubernetes.io/docs/admin/node/) and [services](https://kubernetes.io/docs/user-guide/services/) in your cluster. You can verify that you can list these resources by running `kubectl get nodes`, `kubectl get pods` and `kubectl get svc` which should give you a list of nodes, pods and services (if any) respectively. -* You must have an extracted spark distribution with Kubernetes support, or build one from [source](https://github.com/apache-spark-on-k8s/spark). +* You must [build Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support) from source. ## Driver & Executor Images From c5ab210c1a0cb8d17ab479af82705131bd72d4e4 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 13 Mar 2017 14:52:28 -0700 Subject: [PATCH 080/225] Allow providing an OAuth token for authenticating against k8s (#180) * Allow providing an OAuth token for authenticating against k8s * Organize imports * Fix style * Remove extra newline * Use OAuth token data instead of a file. (cherry picked from commit 1aba3617c6e10b4e712d49bd8c39fa9e2cbfad46) --- docs/running-on-kubernetes.md | 8 ++++++++ .../org/apache/spark/deploy/kubernetes/Client.scala | 6 ++++++ .../org/apache/spark/deploy/kubernetes/config.scala | 12 ++++++++++++ 3 files changed, 26 insertions(+) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index bc0d89bf13d5e..c1f3a3ca653b9 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -222,6 +222,14 @@ from the other deployment modes. See the [configuration page](configuration.html machine's disk. + + spark.kubernetes.submit.oauthToken + (none) + + OAuth token to use when authenticating against the against the Kubernetes API server. Note that unlike the other + authentication options, this should be the exact string value of the token to use for the authentication. + + spark.kubernetes.submit.serviceAccountName default diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 770821e97d12c..6f715ebad2d75 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -21,6 +21,7 @@ import java.security.SecureRandom import java.util.ServiceLoader import java.util.concurrent.{CountDownLatch, TimeUnit} +import com.google.common.base.Charsets import com.google.common.io.Files import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model._ @@ -131,6 +132,11 @@ private[spark] class Client( sparkConf.get(KUBERNETES_CLIENT_CERT_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) } + sparkConf.get(KUBERNETES_OAUTH_TOKEN).foreach { token => + k8ConfBuilder = k8ConfBuilder.withOauthToken(token) + // Remove the oauth token from Spark conf so that its doesn't appear in the Spark UI. + sparkConf.set(KUBERNETES_OAUTH_TOKEN, "") + } val k8ClientConfig = k8ConfBuilder.build Utils.tryWithResource(new DefaultKubernetesClient(k8ClientConfig)) { kubernetesClient => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index dc61ad4025f0f..0c4269080335f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -83,6 +83,18 @@ package object config { .stringConf .createOptional + private[spark] val KUBERNETES_OAUTH_TOKEN = + ConfigBuilder("spark.kubernetes.submit.oauthToken") + .doc(""" + | OAuth token to use when authenticating against the + | against the Kubernetes API server. Note that unlike + | the other authentication options, this should be the + | exact string value of the token to use for the + | authentication. + """.stripMargin) + .stringConf + .createOptional + private[spark] val KUBERNETES_SERVICE_ACCOUNT_NAME = ConfigBuilder("spark.kubernetes.submit.serviceAccountName") .doc(""" From ffacd1f04da4922fc3c6d3155ee79195ecda0a5c Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Fri, 17 Mar 2017 16:09:31 -0700 Subject: [PATCH 081/225] Allow the driver pod's credentials to be shipped from the submission client (squashed) (#192) * Allow the driver pod's credentials to be shipped through secrets. * Fix scalastyle * Change apiserver -> authentication * Address comments. Also some quality of life fixes, most notably formatting all of the documentation strings in config.scala to no longer use triple quotes. Triple quoted strings are difficult to format consistently. * Fix scalastyle * Fix comment * Remove unnecessary constants * Remove unnecessary whitespace * Authentication -> Authenticate --- docs/running-on-kubernetes.md | 88 ++++-- .../spark/deploy/kubernetes/Client.scala | 46 +-- ...iverPodKubernetesCredentialsProvider.scala | 66 +++++ .../kubernetes/KubernetesClientBuilder.scala | 65 +++-- .../kubernetes/SslConfigurationProvider.scala | 3 +- .../spark/deploy/kubernetes/config.scala | 261 +++++++++--------- .../spark/deploy/kubernetes/constants.scala | 5 +- .../rest/KubernetesRestProtocolMessages.scala | 21 +- .../KubernetesSparkRestServer.scala | 47 ++++ .../NodePortUrisDriverServiceManager.scala | 4 +- .../KubernetesClusterSchedulerBackend.scala | 9 +- .../integrationtest/KubernetesSuite.scala | 225 ++++++--------- 12 files changed, 498 insertions(+), 342 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index c1f3a3ca653b9..dcfa70a85a970 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -66,7 +66,7 @@ The Spark master, specified either via passing the `--master` command line argum master string with `k8s://` will cause the Spark application to launch on the Kubernetes cluster, with the API server being contacted at `api_server_url`. If no HTTP protocol is specified in the URL, it defaults to `https`. For example, setting the master to `k8s://example.com:443` is equivalent to setting it to `k8s://https://example.com:443`, but to -connect without SSL on a different port, the master would be set to `k8s://http://example.com:8443`. +connect without TLS on a different port, the master would be set to `k8s://http://example.com:8443`. If you have a Kubernetes cluster setup, one way to discover the apiserver URL is by executing `kubectl cluster-info`. @@ -119,20 +119,20 @@ is currently supported. ## Advanced -### Setting Up SSL For Submitting the Driver +### Setting Up TLS For Submitting the Driver When submitting to Kubernetes, a pod is started for the driver, and the pod starts an HTTP server. This HTTP server receives the driver's configuration, including uploaded driver jars, from the client before starting the application. -Spark supports using SSL to encrypt the traffic in this bootstrapping process. It is recommended to configure this +Spark supports using TLS to encrypt the traffic in this bootstrapping process. It is recommended to configure this whenever possible. See the [security page](security.html) and [configuration](configuration.html) sections for more information on -configuring SSL; use the prefix `spark.ssl.kubernetes.submit` in configuring the SSL-related fields in the context +configuring TLS; use the prefix `spark.ssl.kubernetes.submission` in configuring the TLS-related fields in the context of submitting to Kubernetes. For example, to set the trustStore used when the local machine communicates with the driver -pod in starting the application, set `spark.ssl.kubernetes.submit.trustStore`. +pod in starting the application, set `spark.ssl.kubernetes.submission.trustStore`. One note about the keyStore is that it can be specified as either a file on the client machine or a file in the -container image's disk. Thus `spark.ssl.kubernetes.submit.keyStore` can be a URI with a scheme of either `file:` +container image's disk. Thus `spark.ssl.kubernetes.submission.keyStore` can be a URI with a scheme of either `file:` or `local:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme `local:`, the file is assumed to already be on the container's disk at the appropriate path. @@ -200,42 +200,88 @@ from the other deployment modes. See the [configuration page](configuration.html - spark.kubernetes.submit.caCertFile + spark.kubernetes.authenticate.submission.caCertFile (none) - CA cert file for connecting to Kubernetes over SSL. This file should be located on the submitting machine's disk. + Path to the CA cert file for connecting to the Kubernetes API server over TLS when starting the driver. This file + must be located on the submitting machine's disk. Specify this as a path as opposed to a URI (i.e. do not provide + a scheme). - spark.kubernetes.submit.clientKeyFile + spark.kubernetes.authenticate.submission.clientKeyFile (none) - Client key file for authenticating against the Kubernetes API server. This file should be located on the submitting - machine's disk. + Path to the client key file for authenticating against the Kubernetes API server when starting the driver. This file + must be located on the submitting machine's disk. Specify this as a path as opposed to a URI (i.e. do not provide + a scheme). - spark.kubernetes.submit.clientCertFile + spark.kubernetes.authenticate.submission.clientCertFile (none) - Client cert file for authenticating against the Kubernetes API server. This file should be located on the submitting - machine's disk. + Path to the client cert file for authenticating against the Kubernetes API server when starting the driver. This + file must be located on the submitting machine's disk. Specify this as a path as opposed to a URI (i.e. do not + provide a scheme). - spark.kubernetes.submit.oauthToken + spark.kubernetes.authenticate.submission.oauthToken (none) - OAuth token to use when authenticating against the against the Kubernetes API server. Note that unlike the other - authentication options, this should be the exact string value of the token to use for the authentication. + OAuth token to use when authenticating against the Kubernetes API server when starting the driver. Note + that unlike the other authentication options, this is expected to be the exact string value of the token to use for + the authentication. - spark.kubernetes.submit.serviceAccountName + spark.kubernetes.authenticate.driver.caCertFile + (none) + + Path to the CA cert file for connecting to the Kubernetes API server over TLS from the driver pod when requesting + executors. This file must be located on the submitting machine's disk, and will be uploaded to the driver pod. + Specify this as a path as opposed to a URI (i.e. do not provide a scheme). + + + + spark.kubernetes.authenticate.driver.clientKeyFile + (none) + + Path to the client key file for authenticating against the Kubernetes API server from the driver pod when requesting + executors. This file must be located on the submitting machine's disk, and will be uploaded to the driver pod. + Specify this as a path as opposed to a URI (i.e. do not provide a scheme). If this is specified, it is highly + recommended to set up TLS for the driver submission server, as this value is sensitive information that would be + passed to the driver pod in plaintext otherwise. + + + + spark.kubernetes.authenticate.driver.clientCertFile + (none) + + Path to the client cert file for authenticating against the Kubernetes API server from the driver pod when + requesting executors. This file must be located on the submitting machine's disk, and will be uploaded to the + driver pod. Specify this as a path as opposed to a URI (i.e. do not provide a scheme). + + + + spark.kubernetes.authenticate.driver.oauthToken + (none) + + OAuth token to use when authenticating against the against the Kubernetes API server from the driver pod when + requesting executors. Note that unlike the other authentication options, this must be the exact string value of + the token to use for the authentication. This token value is uploaded to the driver pod. If this is specified, it is + highly recommended to set up TLS for the driver submission server, as this value is sensitive information that would + be passed to the driver pod in plaintext otherwise. + + + + spark.kubernetes.authenticate.driver.serviceAccountName default Service account that is used when running the driver pod. The driver pod uses this service account when requesting - executor pods from the API server. + executor pods from the API server. Note that this cannot be specified alongside a CA cert file, client key file, + client cert file, and/or OAuth token. @@ -281,7 +327,7 @@ from the other deployment modes. See the [configuration page](configuration.html - spark.kubernetes.driverSubmitTimeout + spark.kubernetes.driverSubmissionTimeout 60s Time to wait for the driver pod to start running before aborting its execution. @@ -296,7 +342,7 @@ from the other deployment modes. See the [configuration page](configuration.html - spark.kubernetes.submit.waitAppCompletion + spark.kubernetes.submission.waitAppCompletion true In cluster mode, whether to wait for the application to finish before exiting the launcher process. When changed to diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 6f715ebad2d75..e6b2e31568653 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -21,7 +21,6 @@ import java.security.SecureRandom import java.util.ServiceLoader import java.util.concurrent.{CountDownLatch, TimeUnit} -import com.google.common.base.Charsets import com.google.common.io.Files import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model._ @@ -33,7 +32,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, RemoteAppResource, UploadedAppResource} +import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, KubernetesCredentials, RemoteAppResource, UploadedAppResource} import org.apache.spark.deploy.rest.kubernetes._ import org.apache.spark.internal.Logging import org.apache.spark.util.{ShutdownHookManager, Utils} @@ -53,7 +52,7 @@ private[spark] class Client( .getOrElse("spark") private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") private val secretName = s"$SUBMISSION_APP_SECRET_PREFIX-$kubernetesAppId" - private val secretDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId" + private val secretDirectory = s"$DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR/$kubernetesAppId" private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) @@ -119,23 +118,22 @@ private[spark] class Client( customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") + val driverPodKubernetesCredentials = new DriverPodKubernetesCredentialsProvider(sparkConf).get() var k8ConfBuilder = new K8SConfigBuilder() .withApiVersion("v1") .withMasterUrl(master) .withNamespace(namespace) - sparkConf.get(KUBERNETES_CA_CERT_FILE).foreach { + sparkConf.get(KUBERNETES_SUBMIT_CA_CERT_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) } - sparkConf.get(KUBERNETES_CLIENT_KEY_FILE).foreach { + sparkConf.get(KUBERNETES_SUBMIT_CLIENT_KEY_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) } - sparkConf.get(KUBERNETES_CLIENT_CERT_FILE).foreach { + sparkConf.get(KUBERNETES_SUBMIT_CLIENT_CERT_FILE).foreach { f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) } - sparkConf.get(KUBERNETES_OAUTH_TOKEN).foreach { token => + sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { token => k8ConfBuilder = k8ConfBuilder.withOauthToken(token) - // Remove the oauth token from Spark conf so that its doesn't appear in the Spark UI. - sparkConf.set(KUBERNETES_OAUTH_TOKEN, "") } val k8ClientConfig = k8ConfBuilder.build @@ -174,11 +172,6 @@ private[spark] class Client( .done() kubernetesResourceCleaner.registerOrUpdateResource(submitServerSecret) val sslConfiguration = sslConfigurationProvider.getSslConfiguration() - val driverKubernetesSelectors = (Map( - SPARK_DRIVER_LABEL -> kubernetesAppId, - SPARK_APP_ID_LABEL -> kubernetesAppId, - SPARK_APP_NAME_LABEL -> appName) - ++ parsedCustomLabels) val (driverPod, driverService) = launchDriverKubernetesComponents( kubernetesClient, driverServiceManager, @@ -198,7 +191,8 @@ private[spark] class Client( sslConfiguration, driverService, submitterLocalFiles, - submitterLocalJars) + submitterLocalJars, + driverPodKubernetesCredentials) // Now that the application has started, persist the components that were created beyond // the shutdown hook. We still want to purge the one-time secrets, so do not unregister // those. @@ -245,7 +239,8 @@ private[spark] class Client( sslConfiguration: SslConfiguration, driverService: Service, submitterLocalFiles: Iterable[String], - submitterLocalJars: Iterable[String]): Unit = { + submitterLocalJars: Iterable[String], + driverPodKubernetesCredentials: KubernetesCredentials): Unit = { sparkConf.getOption("spark.app.id").foreach { id => logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + s" overridden as $kubernetesAppId") @@ -257,6 +252,12 @@ private[spark] class Client( sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) sparkConf.setIfMissing("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT.toString) + sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => + sparkConf.set(KUBERNETES_SUBMIT_OAUTH_TOKEN, "") + } + sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => + sparkConf.set(KUBERNETES_DRIVER_OAUTH_TOKEN, "") + } val driverSubmitter = buildDriverSubmissionClient( kubernetesClient, driverServiceManager, @@ -266,7 +267,10 @@ private[spark] class Client( driverSubmitter.ping() logInfo(s"Submitting local resources to driver pod for application " + s"$kubernetesAppId ...") - val submitRequest = buildSubmissionRequest(submitterLocalFiles, submitterLocalJars) + val submitRequest = buildSubmissionRequest( + submitterLocalFiles, + submitterLocalJars, + driverPodKubernetesCredentials) driverSubmitter.submitApplication(submitRequest) logInfo("Successfully submitted local resources and driver configuration to" + " driver pod.") @@ -449,7 +453,7 @@ private[spark] class Client( .endSecret() .endVolume() .addToVolumes(sslConfiguration.sslPodVolumes: _*) - .withServiceAccount(serviceAccount) + .withServiceAccount(serviceAccount.getOrElse("default")) .addNewContainer() .withName(DRIVER_CONTAINER_NAME) .withImage(driverDockerImage) @@ -625,7 +629,8 @@ private[spark] class Client( private def buildSubmissionRequest( submitterLocalFiles: Iterable[String], - submitterLocalJars: Iterable[String]): KubernetesCreateSubmissionRequest = { + submitterLocalJars: Iterable[String], + driverPodKubernetesCredentials: KubernetesCredentials): KubernetesCreateSubmissionRequest = { val mainResourceUri = Utils.resolveURI(mainAppResource) val resolvedAppResource: AppResource = Option(mainResourceUri.getScheme) .getOrElse("file") match { @@ -648,7 +653,8 @@ private[spark] class Client( secret = secretBase64String, sparkProperties = sparkConf.getAll.toMap, uploadedJarsBase64Contents = uploadJarsBase64Contents, - uploadedFilesBase64Contents = uploadFilesBase64Contents) + uploadedFilesBase64Contents = uploadFilesBase64Contents, + driverPodKubernetesCredentials = driverPodKubernetesCredentials) } private def buildDriverSubmissionClient( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala new file mode 100644 index 0000000000000..cee47aad79393 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.io.File + +import com.google.common.io.{BaseEncoding, Files} + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.internal.config.OptionalConfigEntry + +private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf) { + + def get(): KubernetesCredentials = { + sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME).foreach { _ => + require(sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).isEmpty, + "Cannot specify both a service account and a driver pod OAuth token.") + require(sparkConf.get(KUBERNETES_DRIVER_CA_CERT_FILE).isEmpty, + "Cannot specify both a service account and a driver pod CA cert file.") + require(sparkConf.get(KUBERNETES_DRIVER_CLIENT_KEY_FILE).isEmpty, + "Cannot specify both a service account and a driver pod client key file.") + require(sparkConf.get(KUBERNETES_DRIVER_CLIENT_CERT_FILE).isEmpty, + "Cannot specify both a service account and a driver pod client cert file.") + } + val oauthToken = sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN) + val caCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CA_CERT_FILE, + s"Driver CA cert file provided at %s does not exist or is not a file.") + val clientKeyDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_KEY_FILE, + s"Driver client key file provided at %s does not exist or is not a file.") + val clientCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_CERT_FILE, + s"Driver client cert file provided at %s does not exist or is not a file.") + val serviceAccountName = sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME) + KubernetesCredentials( + oauthToken = oauthToken, + caCertDataBase64 = caCertDataBase64, + clientKeyDataBase64 = clientKeyDataBase64, + clientCertDataBase64 = clientCertDataBase64) + } + + private def safeFileConfToBase64( + conf: OptionalConfigEntry[String], + fileNotFoundFormatString: String): Option[String] = { + sparkConf.get(conf) + .map(new File(_)) + .map { file => + require(file.isFile, String.format(fileNotFoundFormatString, file.getAbsolutePath)) + BaseEncoding.base64().encode(Files.toByteArray(file)) + } + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala index 89369b30694ee..554ed17ff25c4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala @@ -22,33 +22,62 @@ import com.google.common.base.Charsets import com.google.common.io.Files import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -private[spark] object KubernetesClientBuilder { - private val API_SERVER_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) - private val CA_CERT_FILE = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) +private[spark] class KubernetesClientBuilder(sparkConf: SparkConf, namespace: String) { + private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) + private val SERVICE_ACCOUNT_CA_CERT = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) + private val oauthTokenFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) + private val caCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) + private val clientKeyFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) + private val clientCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) /** - * Creates a {@link KubernetesClient}, expecting to be from - * within the context of a pod. When doing so, credentials files - * are picked up from canonical locations, as they are injected - * into the pod's disk space. + * Creates a {@link KubernetesClient}, expecting to be from within the context of a pod. When + * doing so, service account token files can be picked up from canonical locations. */ - def buildFromWithinPod( - kubernetesNamespace: String): DefaultKubernetesClient = { - var clientConfigBuilder = new ConfigBuilder() + def buildFromWithinPod(): DefaultKubernetesClient = { + val baseClientConfigBuilder = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) - .withNamespace(kubernetesNamespace) + .withNamespace(namespace) - if (CA_CERT_FILE.isFile) { - clientConfigBuilder = clientConfigBuilder.withCaCertFile(CA_CERT_FILE.getAbsolutePath) - } + val configBuilder = oauthTokenFile + .orElse(caCertFile) + .orElse(clientKeyFile) + .orElse(clientCertFile) + .map { _ => + var mountedAuthConfigBuilder = baseClientConfigBuilder + oauthTokenFile.foreach { tokenFilePath => + val tokenFile = new File(tokenFilePath) + mountedAuthConfigBuilder = mountedAuthConfigBuilder + .withOauthToken(Files.toString(tokenFile, Charsets.UTF_8)) + } + caCertFile.foreach { caFile => + mountedAuthConfigBuilder = mountedAuthConfigBuilder.withCaCertFile(caFile) + } + clientKeyFile.foreach { keyFile => + mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientKeyFile(keyFile) + } + clientCertFile.foreach { certFile => + mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientCertFile(certFile) + } + mountedAuthConfigBuilder + }.getOrElse { + var serviceAccountConfigBuilder = baseClientConfigBuilder + if (SERVICE_ACCOUNT_CA_CERT.isFile) { + serviceAccountConfigBuilder = serviceAccountConfigBuilder.withCaCertFile( + SERVICE_ACCOUNT_CA_CERT.getAbsolutePath) + } - if (API_SERVER_TOKEN.isFile) { - clientConfigBuilder = clientConfigBuilder.withOauthToken( - Files.toString(API_SERVER_TOKEN, Charsets.UTF_8)) + if (SERVICE_ACCOUNT_TOKEN.isFile) { + serviceAccountConfigBuilder = serviceAccountConfigBuilder.withOauthToken( + Files.toString(SERVICE_ACCOUNT_TOKEN, Charsets.UTF_8)) + } + serviceAccountConfigBuilder } - new DefaultKubernetesClient(clientConfigBuilder.build) + new DefaultKubernetesClient(configBuilder.build) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala index 4c031fcba91ab..4bbe3ed385a4d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala @@ -49,7 +49,8 @@ private[spark] class SslConfigurationProvider( kubernetesResourceCleaner: KubernetesResourceCleaner) { private val SECURE_RANDOM = new SecureRandom() private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" - private val sslSecretsDirectory = s"$DRIVER_CONTAINER_SECRETS_BASE_DIR/$kubernetesAppId-ssl" + private val sslSecretsDirectory = DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR + + s"/$kubernetesAppId-ssl" def getSslConfiguration(): SslConfiguration = { val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 0c4269080335f..e33c761ecc8d1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -27,233 +27,240 @@ package object config { private[spark] val KUBERNETES_NAMESPACE = ConfigBuilder("spark.kubernetes.namespace") - .doc(""" - | The namespace that will be used for running the driver and - | executor pods. When using spark-submit in cluster mode, - | this can also be passed to spark-submit via the - | --kubernetes-namespace command line argument. - """.stripMargin) + .doc("The namespace that will be used for running the driver and executor pods. When using" + + " spark-submit in cluster mode, this can also be passed to spark-submit via the" + + " --kubernetes-namespace command line argument.") .stringConf .createWithDefault("default") private[spark] val DRIVER_DOCKER_IMAGE = ConfigBuilder("spark.kubernetes.driver.docker.image") - .doc(""" - | Docker image to use for the driver. Specify this using the - | standard Docker tag format. - """.stripMargin) + .doc("Docker image to use for the driver. Specify this using the standard Docker tag format.") .stringConf .createWithDefault(s"spark-driver:$sparkVersion") private[spark] val EXECUTOR_DOCKER_IMAGE = ConfigBuilder("spark.kubernetes.executor.docker.image") - .doc(""" - | Docker image to use for the executors. Specify this using - | the standard Docker tag format. - """.stripMargin) + .doc("Docker image to use for the executors. Specify this using the standard Docker tag" + + " format.") .stringConf .createWithDefault(s"spark-executor:$sparkVersion") - private[spark] val KUBERNETES_CA_CERT_FILE = - ConfigBuilder("spark.kubernetes.submit.caCertFile") - .doc(""" - | CA cert file for connecting to Kubernetes over SSL. This - | file should be located on the submitting machine's disk. - """.stripMargin) + private val APISERVER_SUBMIT_CONF_PREFIX = "spark.kubernetes.authenticate.submission" + private val APISERVER_DRIVER_CONF_PREFIX = "spark.kubernetes.authenticate.driver" + + private[spark] val KUBERNETES_SUBMIT_CA_CERT_FILE = + ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.caCertFile") + .doc("Path to the CA cert file for connecting to Kubernetes over SSL when creating" + + " Kubernetes resources for the driver. This file should be located on the submitting" + + " machine's disk.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_SUBMIT_CLIENT_KEY_FILE = + ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.clientKeyFile") + .doc("Path to the client key file for authenticating against the Kubernetes API server" + + " when initially creating Kubernetes resources for the driver. This file should be" + + " located on the submitting machine's disk.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_SUBMIT_CLIENT_CERT_FILE = + ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.clientCertFile") + .doc("Path to the client cert file for authenticating against the Kubernetes API server" + + " when initially creating Kubernetes resources for the driver. This file should be" + + " located on the submitting machine's disk.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_SUBMIT_OAUTH_TOKEN = + ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.oauthToken") + .doc("OAuth token to use when authenticating against the against the Kubernetes API server" + + " when initially creating Kubernetes resources for the driver. Note that unlike the other" + + " authentication options, this should be the exact string value of the token to use for" + + " the authentication.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_CA_CERT_FILE = + ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.caCertFile") + .doc("Path to the CA cert file for connecting to Kubernetes over TLS from the driver pod" + + " when requesting executors. This file should be located on the submitting machine's disk" + + " and will be uploaded to the driver pod.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_CLIENT_KEY_FILE = + ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.clientKeyFile") + .doc("Path to the client key file for authenticating against the Kubernetes API server from" + + " the driver pod when requesting executors. This file should be located on the submitting" + + " machine's disk, and will be uploaded to the driver pod.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_CLIENT_CERT_FILE = + ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.clientCertFile") + .doc("Path to the client cert file for authenticating against the Kubernetes API server" + + " from the driver pod when requesting executors. This file should be located on the" + + " submitting machine's disk, and will be uploaded to the driver pod.") .stringConf .createOptional - private[spark] val KUBERNETES_CLIENT_KEY_FILE = - ConfigBuilder("spark.kubernetes.submit.clientKeyFile") - .doc(""" - | Client key file for authenticating against the Kubernetes - | API server. This file should be located on the submitting - | machine's disk. - """.stripMargin) + private[spark] val KUBERNETES_DRIVER_OAUTH_TOKEN = + ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.oauthToken") + .doc("OAuth token to use when authenticating against the Kubernetes API server from the" + + " driver pod when requesting executors. Note that unlike the other authentication options" + + " this should be the exact string value of the token to use for the authentication. This" + + " token value is mounted as a secret on the driver pod.") .stringConf .createOptional - private[spark] val KUBERNETES_CLIENT_CERT_FILE = - ConfigBuilder("spark.kubernetes.submit.clientCertFile") - .doc(""" - | Client cert file for authenticating against the - | Kubernetes API server. This file should be located on - | the submitting machine's disk. - """.stripMargin) + private[spark] val KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE = + ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.caCertFile") + .doc("Path on the driver pod's disk containing the CA cert file to use when authenticating" + + " against Kubernetes.") .stringConf .createOptional - private[spark] val KUBERNETES_OAUTH_TOKEN = - ConfigBuilder("spark.kubernetes.submit.oauthToken") - .doc(""" - | OAuth token to use when authenticating against the - | against the Kubernetes API server. Note that unlike - | the other authentication options, this should be the - | exact string value of the token to use for the - | authentication. - """.stripMargin) + private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE = + ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientKeyFile") + .doc("Path on the driver pod's disk containing the client key file to use when" + + " authenticating against Kubernetes.") + .internal() + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE = + ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientCertFile") + .doc("Path on the driver pod's disk containing the client cert file to use when" + + " authenticating against Kubernetes.") + .internal() + .stringConf + .createOptional + + private[spark] val KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN = + ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.oauthTokenFile") + .doc("Path on the driver pod's disk containing the OAuth token file to use when" + + " authenticating against Kubernetes.") + .internal() .stringConf .createOptional private[spark] val KUBERNETES_SERVICE_ACCOUNT_NAME = - ConfigBuilder("spark.kubernetes.submit.serviceAccountName") - .doc(""" - | Service account that is used when running the driver pod. - | The driver pod uses this service account when requesting - | executor pods from the API server. - """.stripMargin) + ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.serviceAccountName") + .doc("Service account that is used when running the driver pod. The driver pod uses" + + " this service account when requesting executor pods from the API server. If specific" + + " credentials are given for the driver pod to use, the driver will favor" + + " using those credentials instead.") .stringConf - .createWithDefault("default") + .createOptional // Note that while we set a default for this when we start up the // scheduler, the specific default value is dynamically determined // based on the executor memory. private[spark] val KUBERNETES_EXECUTOR_MEMORY_OVERHEAD = ConfigBuilder("spark.kubernetes.executor.memoryOverhead") - .doc(""" - | The amount of off-heap memory (in megabytes) to be - | allocated per executor. This is memory that accounts for - | things like VM overheads, interned strings, other native - | overheads, etc. This tends to grow with the executor size - | (typically 6-10%). - """.stripMargin) + .doc("The amount of off-heap memory (in megabytes) to be allocated per executor. This" + + " is memory that accounts for things like VM overheads, interned strings, other native" + + " overheads, etc. This tends to grow with the executor size. (typically 6-10%).") .bytesConf(ByteUnit.MiB) .createOptional private[spark] val KUBERNETES_DRIVER_MEMORY_OVERHEAD = ConfigBuilder("spark.kubernetes.driver.memoryOverhead") - .doc(""" - | The amount of off-heap memory (in megabytes) to be - | allocated for the driver and the driver submission server. - | This is memory that accounts for things like VM overheads, - | interned strings, other native overheads, etc. This tends - | to grow with the driver's memory size (typically 6-10%). - """.stripMargin) + .doc("The amount of off-heap memory (in megabytes) to be allocated for the driver and the" + + " driver submission server. This is memory that accounts for things like VM overheads," + + " interned strings, other native overheads, etc. This tends to grow with the driver's" + + " memory size (typically 6-10%).") .bytesConf(ByteUnit.MiB) .createOptional private[spark] val KUBERNETES_DRIVER_LABELS = ConfigBuilder("spark.kubernetes.driver.labels") - .doc(""" - | Custom labels that will be added to the driver pod. - | This should be a comma-separated list of label key-value - | pairs, where each label is in the format key=value. Note - | that Spark also adds its own labels to the driver pod - | for bookkeeping purposes. - """.stripMargin) + .doc("Custom labels that will be added to the driver pod. This should be a comma-separated" + + " list of label key-value pairs, where each label is in the format key=value. Note that" + + " Spark also adds its own labels to the driver pod for bookkeeping purposes.") .stringConf .createOptional private[spark] val KUBERNETES_DRIVER_ANNOTATIONS = ConfigBuilder("spark.kubernetes.driver.annotations") - .doc(""" - | Custom annotations that will be added to the driver pod. - | This should be a comma-separated list of annotation key-value - | pairs, where each annotation is in the format key=value. - """.stripMargin) + .doc("Custom annotations that will be added to the driver pod. This should be a" + + " comma-separated list of annotation key-value pairs, where each annotation is in the" + + " format key=value.") .stringConf .createOptional private[spark] val KUBERNETES_DRIVER_SUBMIT_TIMEOUT = - ConfigBuilder("spark.kubernetes.driverSubmitTimeout") - .doc(""" - | Time to wait for the driver process to start running - | before aborting its execution. - """.stripMargin) + ConfigBuilder("spark.kubernetes.driverSubmissionTimeout") + .doc("Time to wait for the driver process to start running before aborting its execution.") .timeConf(TimeUnit.SECONDS) .createWithDefault(60L) private[spark] val KUBERNETES_DRIVER_SUBMIT_KEYSTORE = - ConfigBuilder("spark.ssl.kubernetes.submit.keyStore") - .doc(""" - | KeyStore file for the driver submission server listening - | on SSL. Can be pre-mounted on the driver container - | or uploaded from the submitting client. - """.stripMargin) + ConfigBuilder("spark.ssl.kubernetes.submission.keyStore") + .doc("KeyStore file for the driver submission server listening on SSL. Can be pre-mounted" + + " on the driver container or uploaded from the submitting client.") .stringConf .createOptional private[spark] val KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE = - ConfigBuilder("spark.ssl.kubernetes.submit.trustStore") - .doc(""" - | TrustStore containing certificates for communicating - | to the driver submission server over SSL. - """.stripMargin) + ConfigBuilder("spark.ssl.kubernetes.submission.trustStore") + .doc("TrustStore containing certificates for communicating to the driver submission server" + + " over SSL.") .stringConf .createOptional private[spark] val DRIVER_SUBMIT_SSL_ENABLED = - ConfigBuilder("spark.ssl.kubernetes.submit.enabled") - .doc(""" - | Whether or not to use SSL when sending the - | application dependencies to the driver pod. - | - """.stripMargin) + ConfigBuilder("spark.ssl.kubernetes.submission.enabled") + .doc("Whether or not to use SSL when sending the application dependencies to the driver pod.") .booleanConf .createWithDefault(false) private[spark] val KUBERNETES_DRIVER_SERVICE_NAME = ConfigBuilder("spark.kubernetes.driver.service.name") - .doc(""" - | Kubernetes service that exposes the driver pod - | for external access. - """.stripMargin) + .doc("Kubernetes service that exposes the driver pod for external access.") .internal() .stringConf .createOptional private[spark] val KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY = ConfigBuilder("spark.kubernetes.driver.submissionServerMemory") - .doc(""" - | The amount of memory to allocate for the driver submission server. - """.stripMargin) + .doc("The amount of memory to allocate for the driver submission server.") .bytesConf(ByteUnit.MiB) .createWithDefaultString("256m") private[spark] val EXPOSE_KUBERNETES_DRIVER_SERVICE_UI_PORT = ConfigBuilder("spark.kubernetes.driver.service.exposeUiPort") - .doc(""" - | Whether to expose the driver Web UI port as a service NodePort. Turned off by default - | because NodePort is a limited resource. Use alternatives such as Ingress if possible. - """.stripMargin) + .doc("Whether to expose the driver Web UI port as a service NodePort. Turned off by default" + + " because NodePort is a limited resource. Use alternatives if possible.") .booleanConf .createWithDefault(false) private[spark] val KUBERNETES_DRIVER_POD_NAME = ConfigBuilder("spark.kubernetes.driver.pod.name") - .doc(""" - | Name of the driver pod. - """.stripMargin) + .doc("Name of the driver pod.") .internal() .stringConf .createOptional private[spark] val DRIVER_SERVICE_MANAGER_TYPE = ConfigBuilder("spark.kubernetes.driver.serviceManagerType") - .doc(s""" - | A tag indicating which class to use for creating the - | Kubernetes service and determining its URI for the submission - | client. - """.stripMargin) + .doc("A tag indicating which class to use for creating the Kubernetes service and" + + " determining its URI for the submission client.") .stringConf .createWithDefault(NodePortUrisDriverServiceManager.TYPE) private[spark] val WAIT_FOR_APP_COMPLETION = - ConfigBuilder("spark.kubernetes.submit.waitAppCompletion") - .doc( - """ - | In cluster mode, whether to wait for the application to finish before exiting the - | launcher process. - """.stripMargin) + ConfigBuilder("spark.kubernetes.submission.waitAppCompletion") + .doc("In cluster mode, whether to wait for the application to finish before exiting the" + + " launcher process.") .booleanConf .createWithDefault(true) private[spark] val REPORT_INTERVAL = ConfigBuilder("spark.kubernetes.report.interval") - .doc( - """ - | Interval between reports of the current app status in cluster mode. - """.stripMargin) + .doc("Interval between reports of the current app status in cluster mode.") .timeConf(TimeUnit.MILLISECONDS) .createWithDefaultString("1s") } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 4af065758e674..23d216e799fff 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -24,7 +24,8 @@ package object constants { private[spark] val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id" // Secrets - private[spark] val DRIVER_CONTAINER_SECRETS_BASE_DIR = "/var/run/secrets/spark-submission" + private[spark] val DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR = + "/var/run/secrets/spark-submission" private[spark] val SUBMISSION_APP_SECRET_NAME = "spark-submission-server-secret" private[spark] val SUBMISSION_APP_SECRET_PREFIX = "spark-submission-server-secret" private[spark] val SUBMISSION_APP_SECRET_VOLUME_NAME = "spark-submission-secret-volume" @@ -73,7 +74,7 @@ package object constants { // Miscellaneous private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" - private[spark] val KUBERNETES_SUBMIT_SSL_NAMESPACE = "kubernetes.submit" + private[spark] val KUBERNETES_SUBMIT_SSL_NAMESPACE = "kubernetes.submission" private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 private[spark] val MEMORY_OVERHEAD_MIN = 384L diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala index 0d2d1a1c6f5e3..1ea44109c5f5e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala @@ -20,14 +20,21 @@ import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} import org.apache.spark.SPARK_VERSION +case class KubernetesCredentials( + oauthToken: Option[String], + caCertDataBase64: Option[String], + clientKeyDataBase64: Option[String], + clientCertDataBase64: Option[String]) + case class KubernetesCreateSubmissionRequest( - appResource: AppResource, - mainClass: String, - appArgs: Array[String], - sparkProperties: Map[String, String], - secret: String, - uploadedJarsBase64Contents: TarGzippedData, - uploadedFilesBase64Contents: TarGzippedData) extends SubmitRestProtocolRequest { + appResource: AppResource, + mainClass: String, + appArgs: Array[String], + sparkProperties: Map[String, String], + secret: String, + driverPodKubernetesCredentials: KubernetesCredentials, + uploadedJarsBase64Contents: TarGzippedData, + uploadedFilesBase64Contents: TarGzippedData) extends SubmitRestProtocolRequest { message = "create" clientSparkVersion = SPARK_VERSION } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 5952acc0d5916..4688521a59d38 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -31,7 +31,9 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.rest._ +import org.apache.spark.internal.config.OptionalConfigEntry import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} private case class KubernetesSparkRestServerArguments( @@ -152,6 +154,7 @@ private[spark] class KubernetesSparkRestServer( appArgs, sparkProperties, secret, + driverPodKubernetesCredentials, uploadedJars, uploadedFiles) => val decodedSecret = Base64.decodeBase64(secret) @@ -214,6 +217,8 @@ private[spark] class KubernetesSparkRestServer( } else { resolvedSparkProperties.remove("spark.files") } + resolvedSparkProperties ++= writeKubernetesCredentials( + driverPodKubernetesCredentials, tempDir) val command = new ArrayBuffer[String] command += javaExecutable @@ -280,6 +285,48 @@ private[spark] class KubernetesSparkRestServer( CompressionUtils.unpackAndWriteCompressedFiles(files, workingDir) } + private def writeKubernetesCredentials( + kubernetesCredentials: KubernetesCredentials, + rootTempDir: File): Map[String, String] = { + val resolvedDirectory = new File(rootTempDir, "kubernetes-credentials") + if (!resolvedDirectory.mkdir()) { + throw new IllegalStateException(s"Failed to create credentials dir at " + + resolvedDirectory.getAbsolutePath) + } + val oauthTokenFile = writeRawStringCredentialAndGetConf("oauth-token.txt", resolvedDirectory, + KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, kubernetesCredentials.oauthToken) + val caCertFile = writeBase64CredentialAndGetConf("ca.crt", resolvedDirectory, + KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE, kubernetesCredentials.caCertDataBase64) + val clientKeyFile = writeBase64CredentialAndGetConf("key.key", resolvedDirectory, + KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE, kubernetesCredentials.clientKeyDataBase64) + val clientCertFile = writeBase64CredentialAndGetConf("cert.crt", resolvedDirectory, + KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE, kubernetesCredentials.clientCertDataBase64) + (oauthTokenFile ++ caCertFile ++ clientKeyFile ++ clientCertFile).toMap + } + + private def writeRawStringCredentialAndGetConf( + fileName: String, + dir: File, + conf: OptionalConfigEntry[String], + credential: Option[String]): Option[(String, String)] = { + credential.map { cred => + val credentialFile = new File(dir, fileName) + Files.write(cred, credentialFile, Charsets.UTF_8) + (conf.key, credentialFile.getAbsolutePath) + } + } + + private def writeBase64CredentialAndGetConf( + fileName: String, + dir: File, + conf: OptionalConfigEntry[String], + credential: Option[String]): Option[(String, String)] = { + credential.map { cred => + val credentialFile = new File(dir, fileName) + Files.write(BaseEncoding.base64().decode(cred), credentialFile) + (conf.key, credentialFile.getAbsolutePath) + } + } /** * Retrieve the path on the driver container where the main app resource is, and what value it diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala index fa8362677f38f..1416476824793 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala @@ -39,8 +39,8 @@ private[spark] class NodePortUrisDriverServiceManager extends DriverServiceManag val urlScheme = if (sparkConf.get(DRIVER_SUBMIT_SSL_ENABLED)) { "https" } else { - logWarning("Submitting application details, application secret, and local" + - " jars to the cluster over an insecure connection. You should configure SSL" + + logWarning("Submitting application details, application secret, Kubernetes credentials," + + " and local jars to the cluster over an insecure connection. You should configure SSL" + " to secure this step.") "http" } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 90907ff83ed84..234829a541c30 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -16,17 +16,14 @@ */ package org.apache.spark.scheduler.cluster.kubernetes -import java.util.UUID -import java.util.concurrent.Executors import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} -import com.google.common.util.concurrent.ThreadFactoryBuilder import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, Pod, QuantityBuilder} import scala.collection.JavaConverters._ import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkException} -import org.apache.spark.deploy.kubernetes.{Client, KubernetesClientBuilder} +import org.apache.spark.deploy.kubernetes.KubernetesClientBuilder import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.rpc.RpcEndpointAddress @@ -76,8 +73,8 @@ private[spark] class KubernetesClusterSchedulerBackend( private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) - private val kubernetesClient = KubernetesClientBuilder - .buildFromWithinPod(kubernetesNamespace) + private val kubernetesClient = new KubernetesClientBuilder(conf, kubernetesNamespace) + .buildFromWithinPod() private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 6aa1c1fee0d47..16564ca746b40 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -19,6 +19,7 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.io.File import java.nio.file.Paths import java.util.UUID +import java.util.concurrent.TimeUnit import com.google.common.base.Charsets import com.google.common.collect.ImmutableList @@ -54,6 +55,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private val HELPER_JAR_FILE = Paths.get("target", "integration-tests-spark-jobs-helpers") .toFile .listFiles()(0) + private val SUBMITTER_LOCAL_MAIN_APP_RESOURCE = s"file://${EXAMPLES_JAR_FILE.getAbsolutePath}" + private val CONTAINER_LOCAL_MAIN_APP_RESOURCE = s"local:///opt/spark/examples/" + + s"integration-tests-jars/${EXAMPLES_JAR_FILE.getName}" + private val CONTAINER_LOCAL_HELPER_JAR_PATH = s"local:///opt/spark/examples/" + + s"integration-tests-jars/${HELPER_JAR_FILE.getName}" private val TEST_EXISTENCE_FILE = Paths.get("test-data", "input.txt").toFile private val TEST_EXISTENCE_FILE_CONTENTS = Files.toString(TEST_EXISTENCE_FILE, Charsets.UTF_8) @@ -68,6 +74,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private var clientConfig: Config = _ private var keyStoreFile: File = _ private var trustStoreFile: File = _ + private var sparkConf: SparkConf = _ override def beforeAll(): Unit = { Minikube.startMinikube() @@ -100,6 +107,22 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { || servicesList.getItems == null || servicesList.getItems.isEmpty) } + sparkConf = new SparkConf(true) + .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") + .set(KUBERNETES_SUBMIT_CA_CERT_FILE, clientConfig.getCaCertFile) + .set(KUBERNETES_SUBMIT_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) + .set(KUBERNETES_SUBMIT_CLIENT_CERT_FILE, clientConfig.getClientCertFile) + .set(KUBERNETES_NAMESPACE, NAMESPACE) + .set(DRIVER_DOCKER_IMAGE, "spark-driver:latest") + .set(EXECUTOR_DOCKER_IMAGE, "spark-executor:latest") + .setJars(Seq(HELPER_JAR_FILE.getAbsolutePath)) + .set("spark.executor.memory", "500m") + .set("spark.executor.cores", "1") + .set("spark.executors.instances", "1") + .set("spark.app.name", "spark-pi") + .set("spark.ui.enabled", "true") + .set("spark.testing", "false") + .set(WAIT_FOR_APP_COMPLETION, false) } after { @@ -112,7 +135,10 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .delete }) // spark-submit sets system properties so we have to clear them - new SparkConf(true).getAll.map(_._1).foreach { System.clearProperty } + new SparkConf(true) + .getAll.map(_._1) + .filter(_ != "spark.docker.test.persistMinikube") + .foreach { System.clearProperty } } override def afterAll(): Unit = { @@ -159,28 +185,10 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { // We'll make assertions based on spark rest api, so we need to turn on // spark.ui.enabled explicitly since the scalatest-maven-plugin would set it // to false by default. - val sparkConf = new SparkConf(true) - .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") - .set("spark.kubernetes.submit.caCertFile", clientConfig.getCaCertFile) - .set("spark.kubernetes.submit.clientKeyFile", clientConfig.getClientKeyFile) - .set("spark.kubernetes.submit.clientCertFile", clientConfig.getClientCertFile) - .set("spark.kubernetes.namespace", NAMESPACE) - .set("spark.kubernetes.driver.docker.image", "spark-driver:latest") - .set("spark.kubernetes.executor.docker.image", "spark-executor:latest") - .set("spark.jars", HELPER_JAR_FILE.getAbsolutePath) - .set("spark.executor.memory", "500m") - .set("spark.executor.cores", "1") - .set("spark.executors.instances", "1") - .set("spark.app.name", "spark-pi") - .set("spark.ui.enabled", "true") - .set("spark.testing", "false") - .set("spark.kubernetes.submit.waitAppCompletion", "false") - val mainAppResource = s"file://${EXAMPLES_JAR_FILE.getAbsolutePath}" - new Client( sparkConf = sparkConf, mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = mainAppResource, + mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, appArgs = Array.empty[String]).run() val sparkMetricsService = getSparkMetricsService("spark-pi") expectationsForStaticAllocation(sparkMetricsService) @@ -199,64 +207,38 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "--class", SPARK_PI_MAIN_CLASS, "--conf", "spark.ui.enabled=true", "--conf", "spark.testing=false", - "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", - "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", - "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", - "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", - "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", - "--conf", "spark.kubernetes.submit.waitAppCompletion=false", + "--conf", s"${KUBERNETES_SUBMIT_CA_CERT_FILE.key}=${clientConfig.getCaCertFile}", + "--conf", s"${KUBERNETES_SUBMIT_CLIENT_KEY_FILE.key}=${clientConfig.getClientKeyFile}", + "--conf", s"${KUBERNETES_SUBMIT_CLIENT_CERT_FILE.key}=${clientConfig.getClientCertFile}", + "--conf", s"${EXECUTOR_DOCKER_IMAGE.key}=spark-executor:latest", + "--conf", s"${DRIVER_DOCKER_IMAGE.key}=spark-driver:latest", + "--conf", s"${WAIT_FOR_APP_COMPLETION.key}=false", EXAMPLES_JAR_FILE.getAbsolutePath) SparkSubmit.main(args) val sparkMetricsService = getSparkMetricsService("spark-pi") expectationsForStaticAllocation(sparkMetricsService) } - test("Run using spark-submit with the examples jar on the docker image") { - val args = Array( - "--master", s"k8s://${Minikube.getMinikubeIp}:8443", - "--deploy-mode", "cluster", - "--kubernetes-namespace", NAMESPACE, - "--name", "spark-pi", - "--executor-memory", "512m", - "--executor-cores", "1", - "--num-executors", "1", - "--jars", s"local:///opt/spark/examples/integration-tests-jars/${HELPER_JAR_FILE.getName}", - "--class", SPARK_PI_MAIN_CLASS, - "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", - "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", - "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", - "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", - "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", - "--conf", "spark.kubernetes.submit.waitAppCompletion=false", - s"local:///opt/spark/examples/integration-tests-jars/${EXAMPLES_JAR_FILE.getName}") - SparkSubmit.main(args) + test("Run with the examples jar on the docker image") { + sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)) + new Client( + sparkConf = sparkConf, + mainClass = SPARK_PI_MAIN_CLASS, + mainAppResource = CONTAINER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() val sparkMetricsService = getSparkMetricsService("spark-pi") expectationsForStaticAllocation(sparkMetricsService) } test("Run with custom labels and annotations") { - val args = Array( - "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", - "--deploy-mode", "cluster", - "--kubernetes-namespace", NAMESPACE, - "--name", "spark-pi", - "--executor-memory", "512m", - "--executor-cores", "1", - "--num-executors", "1", - "--jars", HELPER_JAR_FILE.getAbsolutePath, - "--class", SPARK_PI_MAIN_CLASS, - "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", - "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", - "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", - "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", - "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", - "--conf", "spark.kubernetes.driver.labels=label1=label1value,label2=label2value", - "--conf", "spark.kubernetes.driver.annotations=" + - "annotation1=annotation1value," + - "annotation2=annotation2value", - "--conf", "spark.kubernetes.submit.waitAppCompletion=false", - EXAMPLES_JAR_FILE.getAbsolutePath) - SparkSubmit.main(args) + sparkConf.set(KUBERNETES_DRIVER_LABELS, "label1=label1value,label2=label2value") + sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, "annotation1=annotation1value," + + "annotation2=annotation2value") + new Client( + sparkConf = sparkConf, + mainClass = SPARK_PI_MAIN_CLASS, + mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() val driverPodMetadata = minikubeKubernetesClient .pods .withLabel("spark-app-name", "spark-pi") @@ -283,57 +265,22 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { } test("Enable SSL on the driver submit server") { - val args = Array( - "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", - "--deploy-mode", "cluster", - "--kubernetes-namespace", NAMESPACE, - "--name", "spark-pi", - "--executor-memory", "512m", - "--executor-cores", "1", - "--num-executors", "1", - "--jars", HELPER_JAR_FILE.getAbsolutePath, - "--class", SPARK_PI_MAIN_CLASS, - "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", - "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", - "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", - "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", - "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", - "--conf", "spark.ssl.kubernetes.submit.enabled=true", - "--conf", "spark.ssl.kubernetes.submit.keyStore=" + - s"file://${keyStoreFile.getAbsolutePath}", - "--conf", "spark.ssl.kubernetes.submit.keyStorePassword=changeit", - "--conf", "spark.ssl.kubernetes.submit.keyPassword=changeit", - "--conf", "spark.ssl.kubernetes.submit.trustStore=" + - s"file://${trustStoreFile.getAbsolutePath}", - "--conf", s"spark.ssl.kubernetes.driverlaunch.trustStorePassword=changeit", - "--conf", "spark.kubernetes.submit.waitAppCompletion=false", - EXAMPLES_JAR_FILE.getAbsolutePath) - SparkSubmit.main(args) + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") + sparkConf.set("spark.ssl.kubernetes.submission.keyStorePassword", "changeit") + sparkConf.set("spark.ssl.kubernetes.submission.keyPassword", "changeit") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, + s"file://${trustStoreFile.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) + new Client( + sparkConf = sparkConf, + mainClass = SPARK_PI_MAIN_CLASS, + mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() } test("Added files should exist on the driver.") { - val args = Array( - "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", - "--deploy-mode", "cluster", - "--kubernetes-namespace", NAMESPACE, - "--name", "spark-file-existence-test", - "--executor-memory", "512m", - "--executor-cores", "1", - "--num-executors", "1", - "--jars", HELPER_JAR_FILE.getAbsolutePath, - "--files", TEST_EXISTENCE_FILE.getAbsolutePath, - "--class", FILE_EXISTENCE_MAIN_CLASS, - "--conf", "spark.ui.enabled=false", - "--conf", "spark.testing=true", - "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", - "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", - "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", - "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", - "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", - "--conf", "spark.kubernetes.submit.waitAppCompletion=false", - EXAMPLES_JAR_FILE.getAbsolutePath, - TEST_EXISTENCE_FILE.getName, - TEST_EXISTENCE_FILE_CONTENTS) + sparkConf.set("spark.files", TEST_EXISTENCE_FILE.getAbsolutePath) + sparkConf.setAppName("spark-file-existence-test") val podCompletedFuture = SettableFuture.create[Boolean] val watch = new Watcher[Pod] { override def eventReceived(action: Action, pod: Pod): Unit = { @@ -364,8 +311,12 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .pods .withLabel("spark-app-name", "spark-file-existence-test") .watch(watch)) { _ => - SparkSubmit.main(args) - assert(podCompletedFuture.get, "Failed to run driver pod") + new Client( + sparkConf = sparkConf, + mainClass = FILE_EXISTENCE_MAIN_CLASS, + mainAppResource = CONTAINER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array(TEST_EXISTENCE_FILE.getName, TEST_EXISTENCE_FILE_CONTENTS)).run() + assert(podCompletedFuture.get(60, TimeUnit.SECONDS), "Failed to run driver pod") val driverPod = minikubeKubernetesClient .pods .withLabel("spark-app-name", "spark-file-existence-test") @@ -386,27 +337,12 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { Utils.tryWithResource(minikubeKubernetesClient.services() .withLabel("spark-app-name", "spark-pi") .watch(externalUriProviderWatch)) { _ => - val args = Array( - "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", - "--deploy-mode", "cluster", - "--kubernetes-namespace", NAMESPACE, - "--name", "spark-pi", - "--executor-memory", "512m", - "--executor-cores", "1", - "--num-executors", "1", - "--jars", HELPER_JAR_FILE.getAbsolutePath, - "--class", SPARK_PI_MAIN_CLASS, - "--conf", "spark.ui.enabled=true", - "--conf", "spark.testing=false", - "--conf", s"spark.kubernetes.submit.caCertFile=${clientConfig.getCaCertFile}", - "--conf", s"spark.kubernetes.submit.clientKeyFile=${clientConfig.getClientKeyFile}", - "--conf", s"spark.kubernetes.submit.clientCertFile=${clientConfig.getClientCertFile}", - "--conf", "spark.kubernetes.executor.docker.image=spark-executor:latest", - "--conf", "spark.kubernetes.driver.docker.image=spark-driver:latest", - "--conf", "spark.kubernetes.submit.waitAppCompletion=false", - "--conf", s"${DRIVER_SERVICE_MANAGER_TYPE.key}=${ExternalSuppliedUrisDriverServiceManager.TYPE}", - EXAMPLES_JAR_FILE.getAbsolutePath) - SparkSubmit.main(args) + sparkConf.set(DRIVER_SERVICE_MANAGER_TYPE, ExternalSuppliedUrisDriverServiceManager.TYPE) + new Client( + sparkConf = sparkConf, + mainClass = SPARK_PI_MAIN_CLASS, + mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() val sparkMetricsService = getSparkMetricsService("spark-pi") expectationsForStaticAllocation(sparkMetricsService) assert(externalUriProviderWatch.annotationSet.get) @@ -425,4 +361,17 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { "Resolved URI annotation not set on driver service.") } } + + test("Mount the Kubernetes credentials onto the driver pod") { + sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, clientConfig.getCaCertFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, clientConfig.getClientCertFile) + new Client( + sparkConf = sparkConf, + mainClass = SPARK_PI_MAIN_CLASS, + mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val sparkMetricsService = getSparkMetricsService("spark-pi") + expectationsForStaticAllocation(sparkMetricsService) + } } From 64f3a69620047e4cebff050c55ef5602c53c04d2 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 20 Mar 2017 13:38:18 -0700 Subject: [PATCH 082/225] Support using PEM files to configure SSL for driver submission (#173) * Support configuring SSL using PEM files. * Address some missed comments * Fix import ordering * Slight rewording of comments * Fix scalastyle --- docs/running-on-kubernetes.md | 14 +- resource-managers/kubernetes/core/pom.xml | 4 + .../spark/deploy/kubernetes/Client.scala | 23 +- ...DriverSubmitSslConfigurationProvider.scala | 353 ++++++++++++++++++ .../kubernetes/SslConfigurationProvider.scala | 204 ---------- .../spark/deploy/kubernetes/config.scala | 35 +- .../spark/deploy/kubernetes/constants.scala | 6 +- .../KubernetesSparkRestServer.scala | 69 +++- .../kubernetes/PemsToKeyStoreConverter.scala | 125 +++++++ .../src/main/docker/driver/Dockerfile | 2 + .../kubernetes/integration-tests/pom.xml | 4 - .../integrationtest/KubernetesSuite.scala | 39 +- .../integrationtest/sslutil/SSLUtils.scala | 80 ++-- 13 files changed, 680 insertions(+), 278 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index dcfa70a85a970..3b6935560a575 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -127,16 +127,24 @@ Spark supports using TLS to encrypt the traffic in this bootstrapping process. I whenever possible. See the [security page](security.html) and [configuration](configuration.html) sections for more information on -configuring TLS; use the prefix `spark.ssl.kubernetes.submission` in configuring the TLS-related fields in the context +configuring TLS; use the prefix `spark.ssl.kubernetes.driversubmitserver` in configuring the TLS-related fields in the context of submitting to Kubernetes. For example, to set the trustStore used when the local machine communicates with the driver -pod in starting the application, set `spark.ssl.kubernetes.submission.trustStore`. +pod in starting the application, set `spark.ssl.kubernetes.driversubmitserver.trustStore`. One note about the keyStore is that it can be specified as either a file on the client machine or a file in the -container image's disk. Thus `spark.ssl.kubernetes.submission.keyStore` can be a URI with a scheme of either `file:` +container image's disk. Thus `spark.ssl.kubernetes.driversubmitserver.keyStore` can be a URI with a scheme of either `file:` or `local:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme `local:`, the file is assumed to already be on the container's disk at the appropriate path. +Finally, the submission server and client can be configured to use PEM files instead of Java keyStores. When using +this mode, set `spark.ssl.kubernetes.driversubmitserver.keyPem` and +`spark.ssl.kubernetes.driversubmitserver.serverCertPem` to configure the key and certificate files on the driver +submission server. These files can be uploaded from the submitter's machine if they have no scheme or a scheme of +`file:`, or they can be located on the container's disk if they have the scheme `local:`. The client's certificate +file should be provided via setting `spark.ssl.kubernetes.driversubmitserver.clientCertPem`, and this file must be +located on the submitting machine's local disk. + ### Submission of Local Files through Ingress/External controller Kubernetes pods run with their own IP address space. If Spark is run in cluster mode, the driver pod may not be diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 985ffd08f3fc7..6d2f1d0fd2769 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -87,6 +87,10 @@ com.google.guava guava + + org.bouncycastle + bcpkix-jdk15on + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index e6b2e31568653..7e700b569a3fb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -161,7 +161,7 @@ private[spark] class Client( driverServiceManager.handleSubmissionError( new SparkException("Submission shutting down early..."))) try { - val sslConfigurationProvider = new SslConfigurationProvider( + val sslConfigurationProvider = new DriverSubmitSslConfigurationProvider( sparkConf, kubernetesAppId, kubernetesClient, kubernetesResourceCleaner) val submitServerSecret = kubernetesClient.secrets().createNew() .withNewMetadata() @@ -182,7 +182,7 @@ private[spark] class Client( configureOwnerReferences( kubernetesClient, submitServerSecret, - sslConfiguration.sslSecrets, + sslConfiguration.sslSecret, driverPod, driverService) submitApplicationToDriverServer( @@ -209,7 +209,6 @@ private[spark] class Client( Utils.tryLogNonFatalError { driverServiceManager.stop() } - // Remove the shutdown hooks that would be redundant Utils.tryLogNonFatalError { ShutdownHookManager.removeShutdownHook(resourceCleanShutdownHook) @@ -236,7 +235,7 @@ private[spark] class Client( private def submitApplicationToDriverServer( kubernetesClient: KubernetesClient, driverServiceManager: DriverServiceManager, - sslConfiguration: SslConfiguration, + sslConfiguration: DriverSubmitSslConfiguration, driverService: Service, submitterLocalFiles: Iterable[String], submitterLocalJars: Iterable[String], @@ -298,7 +297,7 @@ private[spark] class Client( customLabels: Map[String, String], customAnnotations: Map[String, String], submitServerSecret: Secret, - sslConfiguration: SslConfiguration): (Pod, Service) = { + sslConfiguration: DriverSubmitSslConfiguration): (Pod, Service) = { val driverKubernetesSelectors = (Map( SPARK_DRIVER_LABEL -> kubernetesAppId, SPARK_APP_ID_LABEL -> kubernetesAppId, @@ -349,7 +348,7 @@ private[spark] class Client( private def configureOwnerReferences( kubernetesClient: KubernetesClient, submitServerSecret: Secret, - sslSecrets: Array[Secret], + sslSecret: Option[Secret], driverPod: Pod, driverService: Service): Service = { val driverPodOwnerRef = new OwnerReferenceBuilder() @@ -359,7 +358,7 @@ private[spark] class Client( .withKind(driverPod.getKind) .withController(true) .build() - sslSecrets.foreach(secret => { + sslSecret.foreach(secret => { val updatedSecret = kubernetesClient.secrets().withName(secret.getMetadata.getName).edit() .editMetadata() .addToOwnerReferences(driverPodOwnerRef) @@ -425,10 +424,10 @@ private[spark] class Client( driverKubernetesSelectors: Map[String, String], customAnnotations: Map[String, String], submitServerSecret: Secret, - sslConfiguration: SslConfiguration): Pod = { + sslConfiguration: DriverSubmitSslConfiguration): Pod = { val containerPorts = buildContainerPorts() val probePingHttpGet = new HTTPGetActionBuilder() - .withScheme(if (sslConfiguration.sslOptions.enabled) "HTTPS" else "HTTP") + .withScheme(if (sslConfiguration.enabled) "HTTPS" else "HTTP") .withPath("/v1/submissions/ping") .withNewPort(SUBMISSION_SERVER_PORT_NAME) .build() @@ -452,7 +451,7 @@ private[spark] class Client( .withSecretName(submitServerSecret.getMetadata.getName) .endSecret() .endVolume() - .addToVolumes(sslConfiguration.sslPodVolumes: _*) + .addToVolumes(sslConfiguration.sslPodVolume.toSeq: _*) .withServiceAccount(serviceAccount.getOrElse("default")) .addNewContainer() .withName(DRIVER_CONTAINER_NAME) @@ -463,7 +462,7 @@ private[spark] class Client( .withMountPath(secretDirectory) .withReadOnly(true) .endVolumeMount() - .addToVolumeMounts(sslConfiguration.sslPodVolumeMounts: _*) + .addToVolumeMounts(sslConfiguration.sslPodVolumeMount.toSeq: _*) .addNewEnv() .withName(ENV_SUBMISSION_SECRET_LOCATION) .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") @@ -661,7 +660,7 @@ private[spark] class Client( kubernetesClient: KubernetesClient, driverServiceManager: DriverServiceManager, service: Service, - sslConfiguration: SslConfiguration): KubernetesSparkRestApi = { + sslConfiguration: DriverSubmitSslConfiguration): KubernetesSparkRestApi = { val serviceUris = driverServiceManager.getDriverServiceSubmissionServerUris(service) require(serviceUris.nonEmpty, "No uris found to contact the driver!") HttpClientUtil.createClient[KubernetesSparkRestApi]( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala new file mode 100644 index 0000000000000..a83c9a9896a08 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala @@ -0,0 +1,353 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.io.{File, FileInputStream} +import java.security.{KeyStore, SecureRandom} +import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{EnvVar, EnvVarBuilder, Secret, Volume, VolumeBuilder, VolumeMount, VolumeMountBuilder} +import io.fabric8.kubernetes.client.KubernetesClient +import scala.collection.JavaConverters._ + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.{KubernetesFileUtils, PemsToKeyStoreConverter} +import org.apache.spark.util.Utils + +/** + * Raw SSL configuration as the user specified in SparkConf for setting up the driver + * submission server. + */ +private case class DriverSubmitSslConfigurationParameters( + storeBasedSslOptions: SSLOptions, + isKeyStoreLocalFile: Boolean, + driverSubmitServerKeyPem: Option[File], + isDriverSubmitKeyPemLocalFile: Boolean, + driverSubmitServerCertPem: Option[File], + isDriverSubmitServerCertPemLocalFile: Boolean, + submissionClientCertPem: Option[File]) + +/** + * Resolved from translating options provided in + * {@link DriverSubmitSslConfigurationParameters} into Kubernetes volumes, environment variables + * for the driver pod, Kubernetes secrets, client-side trust managers, and the client-side SSL + * context. This is used for setting up the SSL connection for the submission server where the + * application local dependencies and configuration is provided from. + */ +private[spark] case class DriverSubmitSslConfiguration( + enabled: Boolean, + sslPodEnvVars: Array[EnvVar], + sslPodVolume: Option[Volume], + sslPodVolumeMount: Option[VolumeMount], + sslSecret: Option[Secret], + driverSubmitClientTrustManager: Option[X509TrustManager], + driverSubmitClientSslContext: SSLContext) + +/** + * Provides the SSL configuration for bootstrapping the driver pod to listen for the driver + * submission over SSL, and then supply the client-side configuration for establishing the + * SSL connection. This is done in two phases: first, interpreting the raw configuration + * values from the SparkConf object; then second, converting the configuration parameters + * into the appropriate Kubernetes constructs, namely the volume and volume mount to add to the + * driver pod, and the secret to create at the API server; and finally, constructing the + * client-side trust manager and SSL context for sending the local dependencies. + */ +private[spark] class DriverSubmitSslConfigurationProvider( + sparkConf: SparkConf, + kubernetesAppId: String, + kubernetesClient: KubernetesClient, + kubernetesResourceCleaner: KubernetesResourceCleaner) { + private val SECURE_RANDOM = new SecureRandom() + private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" + private val sslSecretsDirectory = DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR + + s"/$kubernetesAppId-ssl" + + def getSslConfiguration(): DriverSubmitSslConfiguration = { + val sslConfigurationParameters = parseSslConfigurationParameters() + if (sslConfigurationParameters.storeBasedSslOptions.enabled) { + val storeBasedSslOptions = sslConfigurationParameters.storeBasedSslOptions + val keyStoreSecret = resolveFileToSecretMapping( + sslConfigurationParameters.isKeyStoreLocalFile, + SUBMISSION_SSL_KEYSTORE_SECRET_NAME, + storeBasedSslOptions.keyStore, + "KeyStore") + val keyStorePathEnv = resolveFilePathEnv( + sslConfigurationParameters.isKeyStoreLocalFile, + ENV_SUBMISSION_KEYSTORE_FILE, + SUBMISSION_SSL_KEYSTORE_SECRET_NAME, + storeBasedSslOptions.keyStore) + val storePasswordSecret = storeBasedSslOptions.keyStorePassword.map(password => { + val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) + (SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME, passwordBase64) + }).toMap + val storePasswordLocationEnv = storeBasedSslOptions.keyStorePassword.map(_ => { + new EnvVarBuilder() + .withName(ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE) + .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME") + .build() + }) + val storeKeyPasswordSecret = storeBasedSslOptions.keyPassword.map(password => { + val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) + (SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME, passwordBase64) + }).toMap + val storeKeyPasswordEnv = storeBasedSslOptions.keyPassword.map(_ => { + new EnvVarBuilder() + .withName(ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE) + .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME") + .build() + }) + val storeTypeEnv = storeBasedSslOptions.keyStoreType.map(storeType => { + new EnvVarBuilder() + .withName(ENV_SUBMISSION_KEYSTORE_TYPE) + .withValue(storeType) + .build() + }) + val keyPemSecret = resolveFileToSecretMapping( + sslConfigurationParameters.isDriverSubmitKeyPemLocalFile, + secretName = SUBMISSION_SSL_KEY_PEM_SECRET_NAME, + secretType = "Key pem", + secretFile = sslConfigurationParameters.driverSubmitServerKeyPem) + val keyPemLocationEnv = resolveFilePathEnv( + sslConfigurationParameters.isDriverSubmitKeyPemLocalFile, + envName = ENV_SUBMISSION_KEY_PEM_FILE, + secretName = SUBMISSION_SSL_KEY_PEM_SECRET_NAME, + maybeFile = sslConfigurationParameters.driverSubmitServerKeyPem) + val certPemSecret = resolveFileToSecretMapping( + sslConfigurationParameters.isDriverSubmitServerCertPemLocalFile, + secretName = SUBMISSION_SSL_CERT_PEM_SECRET_NAME, + secretType = "Cert pem", + secretFile = sslConfigurationParameters.driverSubmitServerCertPem) + val certPemLocationEnv = resolveFilePathEnv( + sslConfigurationParameters.isDriverSubmitServerCertPemLocalFile, + envName = ENV_SUBMISSION_CERT_PEM_FILE, + secretName = SUBMISSION_SSL_CERT_PEM_SECRET_NAME, + maybeFile = sslConfigurationParameters.driverSubmitServerCertPem) + val useSslEnv = new EnvVarBuilder() + .withName(ENV_SUBMISSION_USE_SSL) + .withValue("true") + .build() + val sslVolume = new VolumeBuilder() + .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) + .withNewSecret() + .withSecretName(sslSecretsName) + .endSecret() + .build() + val sslVolumeMount = new VolumeMountBuilder() + .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) + .withReadOnly(true) + .withMountPath(sslSecretsDirectory) + .build() + val allSecrets = keyStoreSecret ++ + storePasswordSecret ++ + storeKeyPasswordSecret ++ + keyPemSecret ++ + certPemSecret + val sslSecret = kubernetesClient.secrets().createNew() + .withNewMetadata() + .withName(sslSecretsName) + .endMetadata() + .withData(allSecrets.asJava) + .withType("Opaque") + .done() + kubernetesResourceCleaner.registerOrUpdateResource(sslSecret) + val allSslEnvs = keyStorePathEnv ++ + storePasswordLocationEnv ++ + storeKeyPasswordEnv ++ + storeTypeEnv ++ + keyPemLocationEnv ++ + Array(useSslEnv) ++ + certPemLocationEnv + val (driverSubmitClientTrustManager, driverSubmitClientSslContext) = + buildSslConnectionConfiguration(sslConfigurationParameters) + DriverSubmitSslConfiguration( + true, + allSslEnvs.toArray, + Some(sslVolume), + Some(sslVolumeMount), + Some(sslSecret), + driverSubmitClientTrustManager, + driverSubmitClientSslContext) + } else { + DriverSubmitSslConfiguration( + false, + Array[EnvVar](), + None, + None, + None, + None, + SSLContext.getDefault) + } + } + + private def resolveFilePathEnv( + isLocal: Boolean, + envName: String, + secretName: String, + maybeFile: Option[File]): Option[EnvVar] = { + maybeFile.map(file => { + val pemPath = if (isLocal) { + s"$sslSecretsDirectory/$secretName" + } else { + file.getAbsolutePath + } + new EnvVarBuilder() + .withName(envName) + .withValue(pemPath) + .build() + }) + } + + private def resolveFileToSecretMapping( + isLocal: Boolean, + secretName: String, + secretFile: Option[File], + secretType: String): Map[String, String] = { + secretFile.filter(_ => isLocal).map(file => { + if (!file.isFile) { + throw new SparkException(s"$secretType specified at ${file.getAbsolutePath} is not" + + s" a file or does not exist.") + } + val keyStoreBytes = Files.toByteArray(file) + (secretName, BaseEncoding.base64().encode(keyStoreBytes)) + }).toMap + } + + private def parseSslConfigurationParameters(): DriverSubmitSslConfigurationParameters = { + val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE) + val maybeTrustStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE) + val maybeKeyPem = sparkConf.get(DRIVER_SUBMIT_SSL_KEY_PEM) + val maybeDriverSubmitServerCertPem = sparkConf.get(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM) + val maybeDriverSubmitClientCertPem = sparkConf.get(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM) + validatePemsDoNotConflictWithStores( + maybeKeyStore, + maybeTrustStore, + maybeKeyPem, + maybeDriverSubmitServerCertPem, + maybeDriverSubmitClientCertPem) + val resolvedSparkConf = sparkConf.clone() + val (isLocalKeyStore, resolvedKeyStore) = resolveLocalFile(maybeKeyStore, "keyStore") + resolvedKeyStore.foreach { + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, _) + } + val (isLocalDriverSubmitServerCertPem, resolvedDriverSubmitServerCertPem) = + resolveLocalFile(maybeDriverSubmitServerCertPem, "server cert PEM") + val (isLocalKeyPem, resolvedKeyPem) = resolveLocalFile(maybeKeyPem, "key PEM") + maybeTrustStore.foreach { trustStore => + require(KubernetesFileUtils.isUriLocalFile(trustStore), s"Invalid trustStore URI" + + s" $trustStore; trustStore URI for submit server must have no scheme, or scheme file://") + resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, + Utils.resolveURI(trustStore).getPath) + } + val driverSubmitClientCertPem = maybeDriverSubmitClientCertPem.map { driverSubmitClientCert => + require(KubernetesFileUtils.isUriLocalFile(driverSubmitClientCert), + "Invalid client certificate PEM URI $driverSubmitClientCert: client certificate URI must" + + " have no scheme, or scheme file://") + Utils.resolveURI(driverSubmitClientCert).getPath + } + val securityManager = new SparkSecurityManager(resolvedSparkConf) + val storeBasedSslOptions = securityManager.getSSLOptions(DRIVER_SUBMIT_SSL_NAMESPACE) + DriverSubmitSslConfigurationParameters( + storeBasedSslOptions, + isLocalKeyStore, + resolvedKeyPem.map(new File(_)), + isLocalKeyPem, + resolvedDriverSubmitServerCertPem.map(new File(_)), + isLocalDriverSubmitServerCertPem, + driverSubmitClientCertPem.map(new File(_))) + } + + private def resolveLocalFile(file: Option[String], + fileType: String): (Boolean, Option[String]) = { + file.map { f => + require(isValidSslFileScheme(f), s"Invalid $fileType URI $f, $fileType URI" + + s" for submit server must have scheme file:// or local:// (no scheme defaults to file://") + val isLocal = KubernetesFileUtils.isUriLocalFile(f) + (isLocal, Option.apply(Utils.resolveURI(f).getPath)) + }.getOrElse(false, None) + } + + private def validatePemsDoNotConflictWithStores( + maybeKeyStore: Option[String], + maybeTrustStore: Option[String], + maybeKeyPem: Option[String], + maybeDriverSubmitServerCertPem: Option[String], + maybeSubmitClientCertPem: Option[String]) = { + maybeKeyPem.orElse(maybeDriverSubmitServerCertPem).foreach { _ => + require(maybeKeyStore.isEmpty, + "Cannot specify server PEM files and key store files; must specify only one or the other.") + } + maybeKeyPem.foreach { _ => + require(maybeDriverSubmitServerCertPem.isDefined, + "When specifying the key PEM file, the server certificate PEM file must also be provided.") + } + maybeDriverSubmitServerCertPem.foreach { _ => + require(maybeKeyPem.isDefined, + "When specifying the server certificate PEM file, the key PEM file must also be provided.") + } + maybeTrustStore.foreach { _ => + require(maybeSubmitClientCertPem.isEmpty, + "Cannot specify client cert file and truststore file; must specify only one or the other.") + } + } + + private def isValidSslFileScheme(rawUri: String): Boolean = { + val resolvedScheme = Option.apply(Utils.resolveURI(rawUri).getScheme).getOrElse("file") + resolvedScheme == "file" || resolvedScheme == "local" + } + + private def buildSslConnectionConfiguration( + sslConfigurationParameters: DriverSubmitSslConfigurationParameters) + : (Option[X509TrustManager], SSLContext) = { + val maybeTrustStore = sslConfigurationParameters.submissionClientCertPem.map { certPem => + PemsToKeyStoreConverter.convertCertPemToTrustStore( + certPem, + sslConfigurationParameters.storeBasedSslOptions.trustStoreType) + }.orElse(sslConfigurationParameters.storeBasedSslOptions.trustStore.map { trustStoreFile => + if (!trustStoreFile.isFile) { + throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + + s" does not exist or is not a file.") + } + val trustStore = KeyStore.getInstance( + sslConfigurationParameters + .storeBasedSslOptions + .trustStoreType + .getOrElse(KeyStore.getDefaultType)) + Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => + val trustStorePassword = sslConfigurationParameters + .storeBasedSslOptions + .trustStorePassword + .map(_.toCharArray) + .orNull + trustStore.load(trustStoreStream, trustStorePassword) + } + trustStore + }) + maybeTrustStore.map { trustStore => + val trustManagerFactory = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm) + trustManagerFactory.init(trustStore) + val trustManagers = trustManagerFactory.getTrustManagers + val sslContext = SSLContext.getInstance("TLSv1.2") + sslContext.init(null, trustManagers, SECURE_RANDOM) + (Option.apply(trustManagers(0).asInstanceOf[X509TrustManager]), sslContext) + }.getOrElse((Option.empty[X509TrustManager], SSLContext.getDefault)) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala deleted file mode 100644 index 4bbe3ed385a4d..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SslConfigurationProvider.scala +++ /dev/null @@ -1,204 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes - -import java.io.FileInputStream -import java.security.{KeyStore, SecureRandom} -import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{EnvVar, EnvVarBuilder, Secret, Volume, VolumeBuilder, VolumeMount, VolumeMountBuilder} -import io.fabric8.kubernetes.client.KubernetesClient -import scala.collection.JavaConverters._ -import scala.collection.mutable - -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.util.Utils - -private[spark] case class SslConfiguration( - sslOptions: SSLOptions, - isKeyStoreLocalFile: Boolean, - sslPodEnvVars: Array[EnvVar], - sslPodVolumes: Array[Volume], - sslPodVolumeMounts: Array[VolumeMount], - sslSecrets: Array[Secret], - driverSubmitClientTrustManager: Option[X509TrustManager], - driverSubmitClientSslContext: SSLContext) - -private[spark] class SslConfigurationProvider( - sparkConf: SparkConf, - kubernetesAppId: String, - kubernetesClient: KubernetesClient, - kubernetesResourceCleaner: KubernetesResourceCleaner) { - private val SECURE_RANDOM = new SecureRandom() - private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" - private val sslSecretsDirectory = DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR + - s"/$kubernetesAppId-ssl" - - def getSslConfiguration(): SslConfiguration = { - val (driverSubmitSslOptions, isKeyStoreLocalFile) = parseDriverSubmitSslOptions() - if (driverSubmitSslOptions.enabled) { - val sslSecretsMap = mutable.HashMap[String, String]() - val sslEnvs = mutable.Buffer[EnvVar]() - val secrets = mutable.Buffer[Secret]() - driverSubmitSslOptions.keyStore.foreach(store => { - val resolvedKeyStoreFile = if (isKeyStoreLocalFile) { - if (!store.isFile) { - throw new SparkException(s"KeyStore specified at $store is not a file or" + - s" does not exist.") - } - val keyStoreBytes = Files.toByteArray(store) - val keyStoreBase64 = BaseEncoding.base64().encode(keyStoreBytes) - sslSecretsMap += (SUBMISSION_SSL_KEYSTORE_SECRET_NAME -> keyStoreBase64) - s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_SECRET_NAME" - } else { - store.getAbsolutePath - } - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_FILE) - .withValue(resolvedKeyStoreFile) - .build() - }) - driverSubmitSslOptions.keyStorePassword.foreach(password => { - val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) - sslSecretsMap += (SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME -> passwordBase64) - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE) - .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME") - .build() - }) - driverSubmitSslOptions.keyPassword.foreach(password => { - val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) - sslSecretsMap += (SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME -> passwordBase64) - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE) - .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME") - .build() - }) - driverSubmitSslOptions.keyStoreType.foreach(storeType => { - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_TYPE) - .withValue(storeType) - .build() - }) - sslEnvs += new EnvVarBuilder() - .withName(ENV_SUBMISSION_USE_SSL) - .withValue("true") - .build() - val sslVolume = new VolumeBuilder() - .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) - .withNewSecret() - .withSecretName(sslSecretsName) - .endSecret() - .build() - val sslVolumeMount = new VolumeMountBuilder() - .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) - .withReadOnly(true) - .withMountPath(sslSecretsDirectory) - .build() - val sslSecrets = kubernetesClient.secrets().createNew() - .withNewMetadata() - .withName(sslSecretsName) - .endMetadata() - .withData(sslSecretsMap.asJava) - .withType("Opaque") - .done() - kubernetesResourceCleaner.registerOrUpdateResource(sslSecrets) - secrets += sslSecrets - val (driverSubmitClientTrustManager, driverSubmitClientSslContext) = - buildSslConnectionConfiguration(driverSubmitSslOptions) - SslConfiguration( - driverSubmitSslOptions, - isKeyStoreLocalFile, - sslEnvs.toArray, - Array(sslVolume), - Array(sslVolumeMount), - secrets.toArray, - driverSubmitClientTrustManager, - driverSubmitClientSslContext) - } else { - SslConfiguration( - driverSubmitSslOptions, - isKeyStoreLocalFile, - Array[EnvVar](), - Array[Volume](), - Array[VolumeMount](), - Array[Secret](), - None, - SSLContext.getDefault) - } - } - - private def parseDriverSubmitSslOptions(): (SSLOptions, Boolean) = { - val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_KEYSTORE) - val resolvedSparkConf = sparkConf.clone() - val (isLocalKeyStore, resolvedKeyStore) = maybeKeyStore.map(keyStore => { - val keyStoreURI = Utils.resolveURI(keyStore) - val isProvidedKeyStoreLocal = keyStoreURI.getScheme match { - case "file" | null => true - case "local" => false - case _ => throw new SparkException(s"Invalid KeyStore URI $keyStore; keyStore URI" + - " for submit server must have scheme file:// or local:// (no scheme defaults" + - " to file://)") - } - (isProvidedKeyStoreLocal, Option.apply(keyStoreURI.getPath)) - }).getOrElse((false, Option.empty[String])) - resolvedKeyStore.foreach { - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_KEYSTORE, _) - } - sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE).foreach { trustStore => - val trustStoreURI = Utils.resolveURI(trustStore) - trustStoreURI.getScheme match { - case "file" | null => - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, trustStoreURI.getPath) - case _ => throw new SparkException(s"Invalid trustStore URI $trustStore; trustStore URI" + - " for submit server must have no scheme, or scheme file://") - } - } - val securityManager = new SparkSecurityManager(resolvedSparkConf) - (securityManager.getSSLOptions(KUBERNETES_SUBMIT_SSL_NAMESPACE), isLocalKeyStore) - } - - private def buildSslConnectionConfiguration(driverSubmitSslOptions: SSLOptions): - (Option[X509TrustManager], SSLContext) = { - driverSubmitSslOptions.trustStore.map(trustStoreFile => { - val trustManagerFactory = TrustManagerFactory.getInstance( - TrustManagerFactory.getDefaultAlgorithm) - val trustStore = KeyStore.getInstance( - driverSubmitSslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType)) - if (!trustStoreFile.isFile) { - throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + - s" does not exist or is not a file.") - } - Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => - driverSubmitSslOptions.trustStorePassword match { - case Some(password) => - trustStore.load(trustStoreStream, password.toCharArray) - case None => trustStore.load(trustStoreStream, null) - } - } - trustManagerFactory.init(trustStore) - val trustManagers = trustManagerFactory.getTrustManagers - val sslContext = SSLContext.getInstance("TLSv1.2") - sslContext.init(null, trustManagers, SECURE_RANDOM) - (Option.apply(trustManagers(0).asInstanceOf[X509TrustManager]), sslContext) - }).getOrElse((Option.empty[X509TrustManager], SSLContext.getDefault)) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index e33c761ecc8d1..3328809e186e4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -197,26 +197,51 @@ package object config { .timeConf(TimeUnit.SECONDS) .createWithDefault(60L) - private[spark] val KUBERNETES_DRIVER_SUBMIT_KEYSTORE = - ConfigBuilder("spark.ssl.kubernetes.submission.keyStore") + private[spark] val KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.keyStore") .doc("KeyStore file for the driver submission server listening on SSL. Can be pre-mounted" + " on the driver container or uploaded from the submitting client.") .stringConf .createOptional - private[spark] val KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE = - ConfigBuilder("spark.ssl.kubernetes.submission.trustStore") + private[spark] val KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.trustStore") .doc("TrustStore containing certificates for communicating to the driver submission server" + " over SSL.") .stringConf .createOptional private[spark] val DRIVER_SUBMIT_SSL_ENABLED = - ConfigBuilder("spark.ssl.kubernetes.submission.enabled") + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.enabled") .doc("Whether or not to use SSL when sending the application dependencies to the driver pod.") .booleanConf .createWithDefault(false) + private[spark] val DRIVER_SUBMIT_SSL_KEY_PEM = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.keyPem") + .doc("Key PEM file that the driver submission server will use when setting up TLS" + + " connections. Can be pre-mounted on the driver pod's disk or uploaded from the" + + " submitting client's machine.") + .stringConf + .createOptional + + private[spark] val DRIVER_SUBMIT_SSL_SERVER_CERT_PEM = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.serverCertPem") + .doc("Certificate PEM file that is associated with the key PEM file" + + " the submission server uses to set up TLS connections. Can be pre-mounted" + + " on the driver pod's disk or uploaded from the submitting client's machine.") + .stringConf + .createOptional + + private[spark] val DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM = + ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.clientCertPem") + .doc("Certificate pem file that the submission client uses to connect to the submission" + + " server over TLS. This should often be the same as the server certificate, but can be" + + " different if the submission client will contact the driver through a proxy instead of" + + " the driver service directly.") + .stringConf + .createOptional + private[spark] val KUBERNETES_DRIVER_SERVICE_NAME = ConfigBuilder("spark.kubernetes.driver.service.name") .doc("Kubernetes service that exposes the driver pod for external access.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 23d216e799fff..0e5fada302421 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -36,6 +36,8 @@ package object constants { private[spark] val SUBMISSION_SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" private[spark] val SUBMISSION_SSL_SECRETS_PREFIX = "spark-submission-server-ssl" private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" + private[spark] val SUBMISSION_SSL_KEY_PEM_SECRET_NAME = "spark-submission-server-key-pem" + private[spark] val SUBMISSION_SSL_CERT_PEM_SECRET_NAME = "spark-submission-server-cert-pem" // Default and fixed ports private[spark] val SUBMISSION_SERVER_PORT = 7077 @@ -57,6 +59,8 @@ package object constants { private[spark] val ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE = "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" private[spark] val ENV_SUBMISSION_KEYSTORE_TYPE = "SPARK_SUBMISSION_KEYSTORE_TYPE" + private[spark] val ENV_SUBMISSION_KEY_PEM_FILE = "SPARK_SUBMISSION_KEY_PEM_FILE" + private[spark] val ENV_SUBMISSION_CERT_PEM_FILE = "SPARK_SUBMISSION_CERT_PEM_FILE" private[spark] val ENV_SUBMISSION_USE_SSL = "SPARK_SUBMISSION_USE_SSL" private[spark] val ENV_EXECUTOR_PORT = "SPARK_EXECUTOR_PORT" private[spark] val ENV_DRIVER_URL = "SPARK_DRIVER_URL" @@ -74,7 +78,7 @@ package object constants { // Miscellaneous private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" - private[spark] val KUBERNETES_SUBMIT_SSL_NAMESPACE = "kubernetes.submission" + private[spark] val DRIVER_SUBMIT_SSL_NAMESPACE = "kubernetes.driversubmitserver" private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 private[spark] val MEMORY_OVERHEAD_MIN = 384L diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala index 4688521a59d38..4ca01b2f6bd38 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala @@ -19,6 +19,7 @@ package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileOutputStream, StringReader} import java.net.URI import java.nio.file.Paths +import java.security.SecureRandom import java.util.concurrent.CountDownLatch import java.util.concurrent.atomic.AtomicInteger import javax.servlet.http.{HttpServletRequest, HttpServletResponse} @@ -26,10 +27,11 @@ import javax.servlet.http.{HttpServletRequest, HttpServletResponse} import com.google.common.base.Charsets import com.google.common.io.{BaseEncoding, ByteStreams, Files} import org.apache.commons.codec.binary.Base64 +import org.apache.commons.lang3.RandomStringUtils import scala.collection.mutable import scala.collection.mutable.ArrayBuffer -import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SSLOptions} +import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.rest._ @@ -44,7 +46,9 @@ private case class KubernetesSparkRestServerArguments( keyStoreFile: Option[String] = None, keyStorePasswordFile: Option[String] = None, keyStoreType: Option[String] = None, - keyPasswordFile: Option[String] = None) { + keyPasswordFile: Option[String] = None, + keyPemFile: Option[String] = None, + certPemFile: Option[String] = None) { def validate(): KubernetesSparkRestServerArguments = { require(host.isDefined, "Hostname not set via --hostname.") require(port.isDefined, "Port not set via --port") @@ -83,6 +87,12 @@ private object KubernetesSparkRestServerArguments { case "--keystore-key-password-file" :: value :: tail => args = tail resolvedArguments.copy(keyPasswordFile = Some(value)) + case "--key-pem-file" :: value :: tail => + args = tail + resolvedArguments.copy(keyPemFile = Some(value)) + case "--cert-pem-file" :: value :: tail => + args = tail + resolvedArguments.copy(certPemFile = Some(value)) // TODO polish usage message case Nil => resolvedArguments case unknown => throw new IllegalStateException(s"Unknown argument(s) found: $unknown") @@ -377,26 +387,43 @@ private[spark] class KubernetesSparkRestServer( private[spark] object KubernetesSparkRestServer { private val barrier = new CountDownLatch(1) + private val SECURE_RANDOM = new SecureRandom() def main(args: Array[String]): Unit = { val parsedArguments = KubernetesSparkRestServerArguments.fromArgsArray(args) val secretFile = new File(parsedArguments.secretFile.get) - if (!secretFile.isFile) { - throw new IllegalArgumentException(s"Secret file specified by --secret-file" + - " is not a file, or does not exist.") - } + require(secretFile.isFile, "Secret file specified by --secret-file is not a file, or" + + " does not exist.") val sslOptions = if (parsedArguments.useSsl) { - val keyStorePassword = parsedArguments - .keyStorePasswordFile - .map(new File(_)) - .map(Files.toString(_, Charsets.UTF_8)) + validateSslOptions(parsedArguments) val keyPassword = parsedArguments .keyPasswordFile .map(new File(_)) .map(Files.toString(_, Charsets.UTF_8)) + // If key password isn't set but we're using PEM files, generate a password + .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) + val keyStorePassword = parsedArguments + .keyStorePasswordFile + .map(new File(_)) + .map(Files.toString(_, Charsets.UTF_8)) + // If keystore password isn't set but we're using PEM files, generate a password + .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) + val resolvedKeyStore = parsedArguments.keyStoreFile.map(new File(_)).orElse( + parsedArguments.keyPemFile.map(keyPemFile => { + parsedArguments.certPemFile.map(certPemFile => { + PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( + new File(keyPemFile), + new File(certPemFile), + "provided-key", + keyStorePassword, + keyPassword, + parsedArguments.keyStoreType) + }) + }).getOrElse(throw new SparkException("When providing PEM files to set up TLS for the" + + " submission server, both the key and the certificate must be specified."))) new SSLOptions( enabled = true, - keyStore = parsedArguments.keyStoreFile.map(new File(_)), + keyStore = resolvedKeyStore, keyStoreType = parsedArguments.keyStoreType, keyStorePassword = keyStorePassword, keyPassword = keyPassword) @@ -425,5 +452,25 @@ private[spark] object KubernetesSparkRestServer { barrier.await() System.exit(exitCode.get()) } + + private def validateSslOptions(parsedArguments: KubernetesSparkRestServerArguments): Unit = { + parsedArguments.keyStoreFile.foreach { _ => + require(parsedArguments.keyPemFile.orElse(parsedArguments.certPemFile).isEmpty, + "Cannot provide both key/cert PEM files and a keyStore file; select one or the other" + + " for configuring SSL.") + } + parsedArguments.keyPemFile.foreach { _ => + require(parsedArguments.certPemFile.isDefined, + "When providing the key PEM file, the certificate PEM file must also be provided.") + } + parsedArguments.certPemFile.foreach { _ => + require(parsedArguments.keyPemFile.isDefined, + "When providing the certificate PEM file, the key PEM file must also be provided.") + } + } + + private def randomPassword(): String = { + RandomStringUtils.random(1024, 0, Integer.MAX_VALUE, false, false, null, SECURE_RANDOM) + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala new file mode 100644 index 0000000000000..e5c43560eccb4 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala @@ -0,0 +1,125 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.{File, FileInputStream, FileOutputStream, InputStreamReader} +import java.nio.file.Paths +import java.security.{KeyStore, PrivateKey} +import java.security.cert.Certificate +import java.util.UUID + +import com.google.common.base.Charsets +import org.bouncycastle.asn1.pkcs.PrivateKeyInfo +import org.bouncycastle.cert.X509CertificateHolder +import org.bouncycastle.cert.jcajce.JcaX509CertificateConverter +import org.bouncycastle.openssl.{PEMKeyPair, PEMParser} +import org.bouncycastle.openssl.jcajce.JcaPEMKeyConverter +import scala.collection.mutable + +import org.apache.spark.SparkException +import org.apache.spark.util.Utils + +private[spark] object PemsToKeyStoreConverter { + + /** + * Loads the given key-cert pair into a temporary keystore file. Returns the File pointing + * to where the keyStore was written to disk. + */ + def convertPemsToTempKeyStoreFile( + keyPemFile: File, + certPemFile: File, + keyAlias: String, + keyStorePassword: Option[String], + keyPassword: Option[String], + keyStoreType: Option[String]): File = { + require(keyPemFile.isFile, s"Key PEM file provided at ${keyPemFile.getAbsolutePath}" + + " does not exist or is not a file.") + require(certPemFile.isFile, s"Cert PEM file provided at ${certPemFile.getAbsolutePath}" + + " does not exist or is not a file.") + val privateKey = parsePrivateKeyFromPemFile(keyPemFile) + val certificates = parseCertificatesFromPemFile(certPemFile) + val resolvedKeyStoreType = keyStoreType.getOrElse(KeyStore.getDefaultType) + val keyStore = KeyStore.getInstance(resolvedKeyStoreType) + keyStore.load(null, null) + keyStore.setKeyEntry( + keyAlias, + privateKey, + keyPassword.map(_.toCharArray).orNull, + certificates) + val keyStoreOutputPath = Paths.get(s"keystore-${UUID.randomUUID()}.$resolvedKeyStoreType") + Utils.tryWithResource(new FileOutputStream(keyStoreOutputPath.toFile)) { storeStream => + keyStore.store(storeStream, keyStorePassword.map(_.toCharArray).orNull) + } + keyStoreOutputPath.toFile + } + + def convertCertPemToTrustStore( + certPemFile: File, + trustStoreType: Option[String]): KeyStore = { + require(certPemFile.isFile, s"Cert PEM file provided at ${certPemFile.getAbsolutePath}" + + " does not exist or is not a file.") + val trustStore = KeyStore.getInstance(trustStoreType.getOrElse(KeyStore.getDefaultType)) + trustStore.load(null, null) + parseCertificatesFromPemFile(certPemFile).zipWithIndex.foreach { case (cert, index) => + trustStore.setCertificateEntry(s"certificate-$index", cert) + } + trustStore + } + + private def withPemParsedFromFile[T](pemFile: File)(f: (PEMParser => T)): T = { + Utils.tryWithResource(new FileInputStream(pemFile)) { pemStream => + Utils.tryWithResource(new InputStreamReader(pemStream, Charsets.UTF_8)) { pemReader => + Utils.tryWithResource(new PEMParser(pemReader))(f) + } + } + } + + private def parsePrivateKeyFromPemFile(keyPemFile: File): PrivateKey = { + withPemParsedFromFile(keyPemFile) { keyPemParser => + val converter = new JcaPEMKeyConverter + keyPemParser.readObject() match { + case privateKey: PrivateKeyInfo => + converter.getPrivateKey(privateKey) + case keyPair: PEMKeyPair => + converter.getPrivateKey(keyPair.getPrivateKeyInfo) + case _ => + throw new SparkException(s"Key file provided at ${keyPemFile.getAbsolutePath}" + + s" is not a key pair or private key PEM file.") + } + } + } + + private def parseCertificatesFromPemFile(certPemFile: File): Array[Certificate] = { + withPemParsedFromFile(certPemFile) { certPemParser => + val certificates = mutable.Buffer[Certificate]() + var pemObject = certPemParser.readObject() + while (pemObject != null) { + pemObject match { + case certificate: X509CertificateHolder => + val converter = new JcaX509CertificateConverter + certificates += converter.getCertificate(certificate) + case _ => + } + pemObject = certPemParser.readObject() + } + if (certificates.isEmpty) { + throw new SparkException(s"No certificates found in ${certPemFile.getAbsolutePath}") + } + certificates.toArray + } + } +} diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 3bf6b50ff69c1..1f35e7e5eb209 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -42,6 +42,8 @@ CMD SSL_ARGS="" && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_TYPE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-type $SPARK_SUBMISSION_KEYSTORE_TYPE"; fi && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-password-file $SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE"; fi && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_KEY_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --key-pem-file $SPARK_SUBMISSION_KEY_PEM_FILE"; fi && \ + if ! [ -z ${SPARK_SUBMISSION_CERT_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --cert-pem-file $SPARK_SUBMISSION_CERT_PEM_FILE"; fi && \ exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer \ --hostname $HOSTNAME \ --port $SPARK_SUBMISSION_SERVER_PORT \ diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 5c54d0e5e3aab..da78e783cac1b 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -106,10 +106,6 @@ - - org.bouncycastle - bcpkix-jdk15on - diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 16564ca746b40..0e55e64fd1d77 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -72,8 +72,6 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") private var minikubeKubernetesClient: KubernetesClient = _ private var clientConfig: Config = _ - private var keyStoreFile: File = _ - private var trustStoreFile: File = _ private var sparkConf: SparkConf = _ override def beforeAll(): Unit = { @@ -86,13 +84,6 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .done() minikubeKubernetesClient = Minikube.getKubernetesClient.inNamespace(NAMESPACE) clientConfig = minikubeKubernetesClient.getConfiguration - val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( - Minikube.getMinikubeIp, - "changeit", - "changeit", - "changeit") - keyStoreFile = keyStore - trustStoreFile = trustStore } before { @@ -182,9 +173,6 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { } test("Run a simple example") { - // We'll make assertions based on spark rest api, so we need to turn on - // spark.ui.enabled explicitly since the scalatest-maven-plugin would set it - // to false by default. new Client( sparkConf = sparkConf, mainClass = SPARK_PI_MAIN_CLASS, @@ -265,11 +253,30 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { } test("Enable SSL on the driver submit server") { - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.submission.keyStorePassword", "changeit") - sparkConf.set("spark.ssl.kubernetes.submission.keyPassword", "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_TRUSTSTORE, + val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( + Minikube.getMinikubeIp, + "changeit", + "changeit", + "changeit") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, s"file://${trustStoreFile.getAbsolutePath}") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") + sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) + new Client( + sparkConf = sparkConf, + mainClass = SPARK_PI_MAIN_CLASS, + mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + } + + test("Enable SSL on the driver submit server using PEM files") { + val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${certPem.getAbsolutePath}") sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) new Client( sparkConf = sparkConf, diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala index bde7b43226660..2078e0585e8f0 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala @@ -16,15 +16,18 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest.sslutil -import java.io.{File, FileOutputStream} +import java.io.{File, FileOutputStream, OutputStreamWriter} import java.math.BigInteger import java.nio.file.Files -import java.security.{KeyPairGenerator, KeyStore, SecureRandom} +import java.security.cert.X509Certificate +import java.security.{KeyPair, KeyPairGenerator, KeyStore, SecureRandom} import java.util.{Calendar, Random} import javax.security.auth.x500.X500Principal +import com.google.common.base.Charsets import org.bouncycastle.asn1.x509.{Extension, GeneralName, GeneralNames} import org.bouncycastle.cert.jcajce.{JcaX509CertificateConverter, JcaX509v3CertificateBuilder} +import org.bouncycastle.openssl.jcajce.JcaPEMWriter import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder import org.apache.spark.util.Utils @@ -39,6 +42,58 @@ private[spark] object SSLUtils { val keyPairGenerator = KeyPairGenerator.getInstance("RSA") keyPairGenerator.initialize(512) val keyPair = keyPairGenerator.generateKeyPair() + val certificate = generateCertificate(ipAddress, keyPair) + val keyStore = KeyStore.getInstance("JKS") + keyStore.load(null, null) + keyStore.setKeyEntry("key", keyPair.getPrivate, + keyPassword.toCharArray, Array(certificate)) + val tempDir = Files.createTempDirectory("temp-ssl-stores").toFile + tempDir.deleteOnExit() + val keyStoreFile = new File(tempDir, "keyStore.jks") + Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { + keyStore.store(_, keyStorePassword.toCharArray) + } + val trustStore = KeyStore.getInstance("JKS") + trustStore.load(null, null) + trustStore.setCertificateEntry("key", certificate) + val trustStoreFile = new File(tempDir, "trustStore.jks") + Utils.tryWithResource(new FileOutputStream(trustStoreFile)) { + trustStore.store(_, trustStorePassword.toCharArray) + } + (keyStoreFile, trustStoreFile) + } + + def generateKeyCertPemPair(ipAddress: String): (File, File) = { + val keyPairGenerator = KeyPairGenerator.getInstance("RSA") + keyPairGenerator.initialize(512) + val keyPair = keyPairGenerator.generateKeyPair() + val certificate = generateCertificate(ipAddress, keyPair) + val tempDir = Files.createTempDirectory("temp-ssl-pems").toFile + tempDir.deleteOnExit() + val keyPemFile = new File(tempDir, "key.pem") + val certPemFile = new File(tempDir, "cert.pem") + Utils.tryWithResource(new FileOutputStream(keyPemFile)) { keyPemStream => + Utils.tryWithResource( + new OutputStreamWriter(keyPemStream, Charsets.UTF_8)) { streamWriter => + Utils.tryWithResource( + new JcaPEMWriter(streamWriter)) { pemWriter => + pemWriter.writeObject(keyPair.getPrivate) + } + } + } + Utils.tryWithResource(new FileOutputStream(certPemFile)) { keyPemStream => + Utils.tryWithResource( + new OutputStreamWriter(keyPemStream, Charsets.UTF_8)) { streamWriter => + Utils.tryWithResource( + new JcaPEMWriter(streamWriter)) { pemWriter => + pemWriter.writeObject(certificate) + } + } + } + (keyPemFile, certPemFile) + } + + private def generateCertificate(ipAddress: String, keyPair: KeyPair): X509Certificate = { val selfPrincipal = new X500Principal(s"cn=$ipAddress") val currentDate = Calendar.getInstance val validForOneHundredYears = Calendar.getInstance @@ -56,25 +111,6 @@ private[spark] object SSLUtils { .setSecureRandom(new SecureRandom()) .build(keyPair.getPrivate) val bcCertificate = certificateBuilder.build(signer) - val jcaCertificate = new JcaX509CertificateConverter().getCertificate(bcCertificate) - val keyStore = KeyStore.getInstance("JKS") - keyStore.load(null, null) - keyStore.setKeyEntry("key", keyPair.getPrivate, - keyPassword.toCharArray, Array(jcaCertificate)) - val tempDir = Files.createTempDirectory("temp-ssl-stores").toFile() - tempDir.deleteOnExit() - val keyStoreFile = new File(tempDir, "keyStore.jks") - Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { - keyStore.store(_, keyStorePassword.toCharArray) - } - val trustStore = KeyStore.getInstance("JKS") - trustStore.load(null, null) - trustStore.setCertificateEntry("key", jcaCertificate) - val trustStoreFile = new File(tempDir, "trustStore.jks") - Utils.tryWithResource(new FileOutputStream(trustStoreFile)) { - trustStore.store(_, trustStorePassword.toCharArray) - } - (keyStoreFile, trustStoreFile) + new JcaX509CertificateConverter().getCertificate(bcCertificate) } - } From d6b3234a11cfb766b2b79a5ac2a08a7dfea075a5 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 20 Mar 2017 23:51:50 -0700 Subject: [PATCH 083/225] Update tags on docker images. (#196) --- docs/running-on-kubernetes.md | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 3b6935560a575..b03396f37f644 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -24,11 +24,11 @@ If you wish to use pre-built docker images, you may use the images published in ComponentImage Spark Driver Image - kubespark/spark-driver:v2.1.0-k8s-support-0.1.0-alpha.1 + kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 Spark Executor Image - kubespark/spark-executor:v2.1.0-k8s-support-0.1.0-alpha.1 + kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 @@ -57,8 +57,8 @@ are set up as described above: --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-k8s-support-0.1.0-alpha.1 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-k8s-support-0.1.0-alpha.1 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 \ examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting @@ -108,8 +108,8 @@ If our local proxy were listening on port 8001, we would have our submission loo --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-k8s-support-0.1.0-alpha.1 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-k8s-support-0.1.0-alpha.1 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 \ examples/jars/spark_examples_2.11-2.2.0.jar Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. From 368664f47244c23ffff56a62467ad52f5e6d42bd Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 22 Mar 2017 12:10:46 -0700 Subject: [PATCH 084/225] Add additional instructions to use release tarball (#198) --- docs/running-on-kubernetes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index b03396f37f644..794099638f80c 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -10,7 +10,7 @@ currently limited and not well-tested. This should not be used in production env * You must have a running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/user-guide/prereqs/). If you do not already have a working Kubernetes cluster, you may setup a test cluster on your local machine using [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). * You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), [nodes](https://kubernetes.io/docs/admin/node/) and [services](https://kubernetes.io/docs/user-guide/services/) in your cluster. You can verify that you can list these resources by running `kubectl get nodes`, `kubectl get pods` and `kubectl get svc` which should give you a list of nodes, pods and services (if any) respectively. -* You must [build Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support) from source. +* You must have a spark distribution with Kubernetes support. This may be obtained from the [release tarball](https://github.com/apache-spark-on-k8s/spark/releases) or by [building Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support). ## Driver & Executor Images From 37880e210e0016d4888fb39ef4fa6835c40e1ce5 Mon Sep 17 00:00:00 2001 From: Ye Yin Date: Thu, 30 Mar 2017 13:01:29 +0800 Subject: [PATCH 085/225] Support specify CPU cores for driver pod (#207) --- .../scala/org/apache/spark/deploy/kubernetes/Client.scala | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index 7e700b569a3fb..e628464aa6201 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -64,6 +64,9 @@ private[spark] class Client( .map(_.split(",")) .getOrElse(Array.empty[String]) + // CPU settings + private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") + // Memory settings private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) private val driverSubmitServerMemoryMb = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY) @@ -431,6 +434,9 @@ private[spark] class Client( .withPath("/v1/submissions/ping") .withNewPort(SUBMISSION_SERVER_PORT_NAME) .build() + val driverCpuQuantity = new QuantityBuilder(false) + .withAmount(driverCpuCores) + .build() val driverMemoryQuantity = new QuantityBuilder(false) .withAmount(s"${driverContainerMemoryMb}M") .build() @@ -478,6 +484,8 @@ private[spark] class Client( .endEnv() .addToEnv(sslConfiguration.sslPodEnvVars: _*) .withNewResources() + .addToRequests("cpu", driverCpuQuantity) + .addToLimits("cpu", driverCpuQuantity) .addToRequests("memory", driverMemoryQuantity) .addToLimits("memory", driverMemoryLimitQuantity) .endResources() From 3a0b77004f10422227521d14a7f822999194ffcc Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Wed, 5 Apr 2017 13:08:37 -0700 Subject: [PATCH 086/225] Register executors using pod IPs instead of pod host names (#215) * Register executors using pod IPs * Fix block manager port typo * Fix import * Keep requiredEnv to be a val * Clean up indentation --- .../spark/deploy/kubernetes/Client.scala | 4 ++-- .../spark/deploy/kubernetes/constants.scala | 1 + .../KubernetesClusterSchedulerBackend.scala | 21 +++++++++++++------ .../src/main/docker/executor/Dockerfile | 2 +- 4 files changed, 19 insertions(+), 9 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala index e628464aa6201..5d115115b4595 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala @@ -252,8 +252,8 @@ private[spark] class Client( sparkConf.set("spark.app.id", kubernetesAppId) sparkConf.setIfMissing("spark.app.name", appName) sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.blockmanager.port", - DEFAULT_BLOCKMANAGER_PORT.toString) + sparkConf.setIfMissing("spark.driver.blockManager.port", DEFAULT_BLOCKMANAGER_PORT.toString) + sparkConf.setIfMissing("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT.toString) sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => sparkConf.set(KUBERNETES_SUBMIT_OAUTH_TOKEN, "") } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 0e5fada302421..03b3d21ac9c45 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -68,6 +68,7 @@ package object constants { private[spark] val ENV_EXECUTOR_MEMORY = "SPARK_EXECUTOR_MEMORY" private[spark] val ENV_APPLICATION_ID = "SPARK_APPLICATION_ID" private[spark] val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID" + private[spark] val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP" private[spark] val ENV_DRIVER_MEMORY = "SPARK_DRIVER_MEMORY" // Annotation keys diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 234829a541c30..7eb1a6214df07 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -18,7 +18,8 @@ package org.apache.spark.scheduler.cluster.kubernetes import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} -import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, Pod, QuantityBuilder} +import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, + EnvVarSourceBuilder, Pod, QuantityBuilder} import scala.collection.JavaConverters._ import scala.concurrent.{ExecutionContext, Future} @@ -177,11 +178,19 @@ private[spark] class KubernetesClusterSchedulerBackend( (ENV_EXECUTOR_CORES, executorCores), (ENV_EXECUTOR_MEMORY, executorMemoryString), (ENV_APPLICATION_ID, applicationId()), - (ENV_EXECUTOR_ID, executorId) - ).map(env => new EnvVarBuilder() - .withName(env._1) - .withValue(env._2) - .build()) + (ENV_EXECUTOR_ID, executorId)) + .map(env => new EnvVarBuilder() + .withName(env._1) + .withValue(env._2) + .build() + ) ++ Seq( + new EnvVarBuilder() + .withName(ENV_EXECUTOR_POD_IP) + .withValueFrom(new EnvVarSourceBuilder() + .withNewFieldRef("v1", "status.podIP") + .build()) + .build() + ) val requiredPorts = Seq( (EXECUTOR_PORT_NAME, executorPort), (BLOCK_MANAGER_PORT_NAME, blockmanagerPort)) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index cd5ac466a1fa0..23c6751f1b3ed 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -37,4 +37,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark # TODO support spark.executor.extraClassPath -CMD exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp ${SPARK_HOME}/jars/\* org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $HOSTNAME +CMD exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp ${SPARK_HOME}/jars/\* org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP From 02ab18ed0bcf594e041bf2987e8dc8d060c1e57d Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 10 Apr 2017 11:27:41 -0700 Subject: [PATCH 087/225] Upgrade bouncycastle, force bcprov version (#223) --- pom.xml | 7 ++++++- resource-managers/kubernetes/core/pom.xml | 4 ++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index d6eecc5605338..999684c68a543 100644 --- a/pom.xml +++ b/pom.xml @@ -134,7 +134,7 @@ 1.6.0 9.3.11.v20160721 8.18.0 - 1.52 + 1.54 3.1.0 0.8.0 2.4.0 @@ -331,6 +331,11 @@ bcpkix-jdk15on ${bouncycastle.version} + + org.bouncycastle + bcprov-jdk15on + ${bouncycastle.version} + diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 6d2f1d0fd2769..649d004f971d5 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -91,6 +91,10 @@ org.bouncycastle bcpkix-jdk15on + + org.bouncycastle + bcprov-jdk15on + From d0e27b172d7b8cd0fb08cb073a0efb412e5033ee Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Wed, 12 Apr 2017 17:30:11 -0700 Subject: [PATCH 088/225] Stop executors cleanly before deleting their pods (#231) --- .../kubernetes/KubernetesClusterSchedulerBackend.scala | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 7eb1a6214df07..ccb4194336a44 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -131,6 +131,10 @@ private[spark] class KubernetesClusterSchedulerBackend( } override def stop(): Unit = { + // send stop message to executors so they shut down cleanly + super.stop() + + // then delete the executor pods // TODO investigate why Utils.tryLogNonFatalError() doesn't work in this context. // When using Utils.tryLogNonFatalError some of the code fails but without any logs or // indication as to why. @@ -149,7 +153,6 @@ private[spark] class KubernetesClusterSchedulerBackend( } catch { case e: Throwable => logError("Uncaught exception closing Kubernetes client.", e) } - super.stop() } private def allocateNewExecutorPod(): (String, Pod) = { From 9a895a8367807ae1358cf92a5eb4462051cb9804 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 13 Apr 2017 17:08:30 -0700 Subject: [PATCH 089/225] Upgrade Kubernetes client to 2.2.13. (#230) --- resource-managers/kubernetes/core/pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 649d004f971d5..09f0debd50c9c 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -29,7 +29,7 @@ Spark Project Kubernetes kubernetes - 2.2.1 + 2.2.13 From 88ec1c5fc710eb3d20cb69b63eb7c209d2d0c755 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 17 Apr 2017 13:22:34 -0700 Subject: [PATCH 090/225] Respect JVM http proxy settings when using Feign. (#228) * Respect JVM http proxy settings when using Feign. * Address comments * Address more comments` --- .../rest/kubernetes/HttpClientUtil.scala | 64 ++++++++++++++++++- 1 file changed, 63 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala index 576f7058f20ee..33988bdc36f04 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala @@ -16,6 +16,9 @@ */ package org.apache.spark.deploy.rest.kubernetes +import java.io.IOException +import java.net.{InetSocketAddress, ProxySelector, SocketAddress, URI} +import java.util.Collections import javax.net.ssl.{SSLContext, SSLSocketFactory, X509TrustManager} import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} @@ -24,12 +27,15 @@ import feign.{Client, Feign, Request, Response} import feign.Request.Options import feign.jackson.{JacksonDecoder, JacksonEncoder} import feign.jaxrs.JAXRSContract +import io.fabric8.kubernetes.client.Config import okhttp3.OkHttpClient import scala.reflect.ClassTag +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging import org.apache.spark.status.api.v1.JacksonMessageWriter -private[spark] object HttpClientUtil { +private[spark] object HttpClientUtil extends Logging { def createClient[T: ClassTag]( uris: Set[String], @@ -42,6 +48,49 @@ private[spark] object HttpClientUtil { Option.apply(trustContext).foreach(context => { httpClientBuilder = httpClientBuilder.sslSocketFactory(sslSocketFactory, context) }) + val uriObjects = uris.map(URI.create) + val httpUris = uriObjects.filter(uri => uri.getScheme == "http") + val httpsUris = uriObjects.filter(uri => uri.getScheme == "https") + val maybeAllProxy = Option.apply(System.getProperty(Config.KUBERNETES_ALL_PROXY)) + val maybeHttpProxy = Option.apply(System.getProperty(Config.KUBERNETES_HTTP_PROXY)) + .orElse(maybeAllProxy) + .map(uriStringToProxy) + val maybeHttpsProxy = Option.apply(System.getProperty(Config.KUBERNETES_HTTPS_PROXY)) + .orElse(maybeAllProxy) + .map(uriStringToProxy) + val maybeNoProxy = Option.apply(System.getProperty(Config.KUBERNETES_NO_PROXY)) + .map(_.split(",")) + .toSeq + .flatten + val proxySelector = new ProxySelector { + override def select(uri: URI): java.util.List[java.net.Proxy] = { + val directProxy = java.net.Proxy.NO_PROXY + val resolvedProxy = maybeNoProxy.find( _ == uri.getHost) + .map( _ => directProxy) + .orElse(uri.getScheme match { + case "http" => + logDebug(s"Looking up http proxies to route $uri") + maybeHttpProxy.filter { _ => + matchingUriExists(uri, httpUris) + } + case "https" => + logDebug(s"Looking up https proxies to route $uri") + maybeHttpsProxy.filter { _ => + matchingUriExists(uri, httpsUris) + } + case _ => None + }).getOrElse(directProxy) + logDebug(s"Routing $uri through ${resolvedProxy.address()} with proxy" + + s" type ${resolvedProxy.`type`()}") + Collections.singletonList(resolvedProxy) + } + + override def connectFailed(uri: URI, sa: SocketAddress, ioe: IOException) = { + throw new SparkException(s"Failed to connect to proxy through uri $uri," + + s" socket address: $sa", ioe) + } + } + httpClientBuilder = httpClientBuilder.proxySelector(proxySelector) val objectMapper = new ObjectMapper() .registerModule(new DefaultScalaModule) .setDateFormat(JacksonMessageWriter.makeISODateFormat) @@ -66,4 +115,17 @@ private[spark] object HttpClientUtil { .retryer(target) .target(target) } + + private def matchingUriExists(uri: URI, httpUris: Set[URI]): Boolean = { + httpUris.exists(httpUri => { + httpUri.getScheme == uri.getScheme && httpUri.getHost == uri.getHost && + httpUri.getPort == uri.getPort + }) + } + + private def uriStringToProxy(uriString: String): java.net.Proxy = { + val uriObject = URI.create(uriString) + new java.net.Proxy(java.net.Proxy.Type.HTTP, + new InetSocketAddress(uriObject.getHost, uriObject.getPort)) + } } From 275510a3e6e6d9055d56026da2fb29c7f177f5f8 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 20 Apr 2017 23:15:24 -0700 Subject: [PATCH 091/225] Staging server for receiving application dependencies. (#212) * Staging server for receiving application dependencies. * Add unit test for file writing * Minor fixes * Remove getting credentials from the API We still want to post them because in the future we can use these credentials to monitor the API server and handle cleaning up the data accordingly. * Generalize to resource staging server outside of Spark * Update code documentation * Val instead of var * Fix naming, remove unused import * Move suites from integration test package to core * Use TrieMap instead of locks * Address comments * Fix imports * Change paths, use POST instead of PUT * Use a resource identifier as well as a resource secret --- pom.xml | 21 ++++ resource-managers/kubernetes/core/pom.xml | 21 ++++ .../kubernetes/v2/ResourceStagingServer.scala | 61 ++++++++++++ .../v2/ResourceStagingService.scala | 85 ++++++++++++++++ .../v2/ResourceStagingServiceImpl.scala | 98 ++++++++++++++++++ .../v2/ResourceStagingServiceRetrofit.scala | 42 ++++++++ .../rest/kubernetes/v2/RetrofitUtils.scala | 38 +++++++ .../v2/StagedResourceIdentifier.scala | 19 ++++ .../v2/ResourceStagingServerSuite.scala | 99 +++++++++++++++++++ .../v2/ResourceStagingServiceImplSuite.scala | 60 +++++++++++ 10 files changed, 544 insertions(+) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala diff --git a/pom.xml b/pom.xml index 999684c68a543..5752d95466bbd 100644 --- a/pom.xml +++ b/pom.xml @@ -134,6 +134,7 @@ 1.6.0 9.3.11.v20160721 8.18.0 + 2.2.0 1.54 3.1.0 0.8.0 @@ -326,6 +327,21 @@ feign-jaxrs ${feign.version} + + com.squareup.retrofit2 + retrofit + ${retrofit.version} + + + com.squareup.retrofit2 + converter-jackson + ${retrofit.version} + + + com.squareup.retrofit2 + converter-scalars + ${retrofit.version} + org.bouncycastle bcpkix-jdk15on @@ -685,6 +701,11 @@ jersey-client ${jersey.version} + + org.glassfish.jersey.media + jersey-media-multipart + ${jersey.version} + javax.ws.rs javax.ws.rs-api diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 09f0debd50c9c..8856339d4f6d9 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -60,10 +60,31 @@ com.netflix.feign feign-okhttp + + org.glassfish.jersey.containers + jersey-container-servlet + + + org.glassfish.jersey.media + jersey-media-multipart + com.netflix.feign feign-jackson + + com.squareup.retrofit2 + retrofit + + + com.squareup.retrofit2 + converter-jackson + + + com.squareup.retrofit2 + converter-scalars + + com.netflix.feign feign-jaxrs diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala new file mode 100644 index 0000000000000..e09a788c45321 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala @@ -0,0 +1,61 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import org.eclipse.jetty.server.{Server, ServerConnector} +import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} +import org.eclipse.jetty.util.thread.QueuedThreadPool +import org.glassfish.jersey.media.multipart.MultiPartFeature +import org.glassfish.jersey.server.ResourceConfig +import org.glassfish.jersey.servlet.ServletContainer + +private[spark] class ResourceStagingServer( + port: Int, + serviceInstance: ResourceStagingService) { + + private var jettyServer: Option[Server] = None + + def start(): Unit = synchronized { + val threadPool = new QueuedThreadPool + val contextHandler = new ServletContextHandler() + val jsonProvider = new JacksonJaxbJsonProvider() + jsonProvider.setMapper(new ObjectMapper().registerModule(new DefaultScalaModule)) + val resourceConfig = new ResourceConfig().registerInstances( + serviceInstance, + jsonProvider, + new MultiPartFeature) + val servletHolder = new ServletHolder("main", new ServletContainer(resourceConfig)) + contextHandler.setContextPath("/api/") + contextHandler.addServlet(servletHolder, "/*") + threadPool.setDaemon(true) + val server = new Server(threadPool) + val connector = new ServerConnector(server) + connector.setPort(port) + server.addConnector(connector) + server.setHandler(contextHandler) + server.start() + jettyServer = Some(server) + } + + def stop(): Unit = synchronized { + jettyServer.foreach(_.stop()) + jettyServer = None + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala new file mode 100644 index 0000000000000..5f7ceb461615e --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -0,0 +1,85 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.InputStream +import javax.ws.rs.{Consumes, GET, HeaderParam, Path, PathParam, POST, Produces} +import javax.ws.rs.core.{MediaType, StreamingOutput} + +import org.glassfish.jersey.media.multipart.FormDataParam + +import org.apache.spark.deploy.rest.KubernetesCredentials + +/** + * Service that receives application data that can be retrieved later on. This is primarily used + * in the context of Spark, but the concept is generic enough to be used for arbitrary applications. + * The use case is to have a place for Kubernetes application submitters to bootstrap dynamic, + * heavyweight application data for pods. Application submitters may have data stored on their + * local disks that they want to provide to the pods they create through the API server. ConfigMaps + * are one way to provide this data, but the data in ConfigMaps are stored in etcd which cannot + * maintain data in the hundreds of megabytes in size. + *

    + * The general use case is for an application submitter to ship the dependencies to the server via + * {@link uploadResources}; the application submitter will then receive a unique secure token. + * The application submitter then ought to convert the token into a secret, and use this secret in + * a pod that fetches the uploaded dependencies via {@link downloadResources}. An application can + * provide multiple resource bundles simply by hitting the upload endpoint multiple times and + * downloading each bundle with the appropriate secret. + */ +@Path("/v0") +private[spark] trait ResourceStagingService { + + /** + * Register a resource with the dependency service, so that pods with the given labels can + * retrieve them when they run. + * + * @param resources Application resources to upload, compacted together in tar + gzip format. + * The tarball should contain the files laid out in a flat hierarchy, without + * any directories. We take a stream here to avoid holding these entirely in + * memory. + * @param podLabels Labels of pods to monitor. When no more pods are running with the given label, + * after some period of time, these dependencies will be cleared. + * @param podNamespace Namespace of pods to monitor. + * @param kubernetesCredentials These credentials are primarily used to monitor the progress of + * the application. When the application shuts down normally, shuts + * down abnormally and does not restart, or fails to start entirely, + * the data uploaded through this endpoint is cleared. + * @return A unique token that should be provided when retrieving these dependencies later. + */ + @POST + @Consumes(Array(MediaType.MULTIPART_FORM_DATA, MediaType.APPLICATION_JSON, MediaType.TEXT_PLAIN)) + @Produces(Array(MediaType.APPLICATION_JSON)) + @Path("/resources") + def uploadResources( + @FormDataParam("podLabels") podLabels: Map[String, String], + @FormDataParam("podNamespace") podNamespace: String, + @FormDataParam("resources") resources: InputStream, + @FormDataParam("kubernetesCredentials") kubernetesCredentials: KubernetesCredentials) + : StagedResourceIdentifier + + /** + * Download an application's resources. The resources are provided as a stream, where the stream's + * underlying data matches the stream that was uploaded in uploadResources. + */ + @GET + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.APPLICATION_OCTET_STREAM)) + @Path("/resources/{resourceId}") + def downloadResources( + @PathParam("resourceId") resourceId: String, + @HeaderParam("Authorization") resourceSecret: String): StreamingOutput +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala new file mode 100644 index 0000000000000..bb338dacdf511 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{File, FileOutputStream, InputStream, OutputStream} +import java.security.SecureRandom +import java.util.UUID +import javax.ws.rs.{NotAuthorizedException, NotFoundException} +import javax.ws.rs.core.StreamingOutput + +import com.google.common.io.{BaseEncoding, ByteStreams, Files} +import scala.collection.concurrent.TrieMap + +import org.apache.spark.SparkException +import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) + extends ResourceStagingService with Logging { + + private val SECURE_RANDOM = new SecureRandom() + // TODO clean up these resources based on the driver's lifecycle + private val stagedResources = TrieMap.empty[String, StagedResources] + + override def uploadResources( + podLabels: Map[String, String], + podNamespace: String, + resources: InputStream, + kubernetesCredentials: KubernetesCredentials): StagedResourceIdentifier = { + val resourceId = UUID.randomUUID().toString + val secretBytes = new Array[Byte](1024) + SECURE_RANDOM.nextBytes(secretBytes) + val resourceSecret = resourceId + "-" + BaseEncoding.base64().encode(secretBytes) + + val namespaceDir = new File(dependenciesRootDir, podNamespace) + val resourcesDir = new File(namespaceDir, resourceId) + try { + if (!resourcesDir.exists()) { + if (!resourcesDir.mkdirs()) { + throw new SparkException("Failed to create dependencies directory for application" + + s" at ${resourcesDir.getAbsolutePath}") + } + } + // TODO encrypt the written data with the secret. + val resourcesTgz = new File(resourcesDir, "resources.data") + Utils.tryWithResource(new FileOutputStream(resourcesTgz)) { ByteStreams.copy(resources, _) } + stagedResources(resourceId) = StagedResources( + resourceSecret, + podLabels, + podNamespace, + resourcesTgz, + kubernetesCredentials) + StagedResourceIdentifier(resourceId, resourceSecret) + } catch { + case e: Throwable => + if (!resourcesDir.delete()) { + logWarning(s"Failed to delete application directory $resourcesDir.") + } + throw e + } + } + + override def downloadResources(resourceId: String, resourceSecret: String): StreamingOutput = { + val resource = stagedResources + .get(resourceId) + .getOrElse(throw new NotFoundException(s"No resource bundle found with id $resourceId")) + if (!resource.resourceSecret.equals(resourceSecret)) { + throw new NotAuthorizedException(s"Unauthorized to download resource with id $resourceId") + } + new StreamingOutput { + override def write(outputStream: OutputStream) = { + Files.copy(resource.resourcesFile, outputStream) + } + } + } +} + +private case class StagedResources( + resourceSecret: String, + podLabels: Map[String, String], + podNamespace: String, + resourcesFile: File, + kubernetesCredentials: KubernetesCredentials) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala new file mode 100644 index 0000000000000..daf03f764b35a --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import okhttp3.{RequestBody, ResponseBody} +import retrofit2.Call +import retrofit2.http.{Multipart, Path, Streaming} + +/** + * Retrofit-compatible variant of {@link ResourceStagingService}. For documentation on + * how to use this service, see the aforementioned JAX-RS based interface. + */ +private[spark] trait ResourceStagingServiceRetrofit { + + @Multipart + @retrofit2.http.POST("/api/v0/resources/") + def uploadResources( + @retrofit2.http.Part("podLabels") podLabels: RequestBody, + @retrofit2.http.Part("podNamespace") podNamespace: RequestBody, + @retrofit2.http.Part("resources") resources: RequestBody, + @retrofit2.http.Part("kubernetesCredentials") + kubernetesCredentials: RequestBody): Call[StagedResourceIdentifier] + + @Streaming + @retrofit2.http.GET("/api/v0/resources/{resourceId}") + def downloadResources(@Path("resourceId") resourceId: String, + @retrofit2.http.Header("Authorization") resourceSecret: String): Call[ResponseBody] +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala new file mode 100644 index 0000000000000..c5c5c0d35b7cb --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala @@ -0,0 +1,38 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import retrofit2.Retrofit +import retrofit2.converter.jackson.JacksonConverterFactory +import retrofit2.converter.scalars.ScalarsConverterFactory + +private[spark] object RetrofitUtils { + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + def createRetrofitClient[T](baseUrl: String, serviceType: Class[T]): T = { + new Retrofit.Builder() + .baseUrl(baseUrl) + .addConverterFactory(ScalarsConverterFactory.create()) + .addConverterFactory(JacksonConverterFactory.create(OBJECT_MAPPER)) + .build() + .create(serviceType) + } + +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala new file mode 100644 index 0000000000000..65bc9bc17dae9 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala @@ -0,0 +1,19 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +case class StagedResourceIdentifier(resourceId: String, resourceSecret: String) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala new file mode 100644 index 0000000000000..70ba5be395042 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -0,0 +1,99 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.net.ServerSocket +import javax.ws.rs.core.MediaType + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.io.ByteStreams +import okhttp3.{RequestBody, ResponseBody} +import org.scalatest.BeforeAndAfterAll +import retrofit2.Call + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.util.Utils + +/** + * Tests for {@link ResourceStagingServer} and its APIs. Note that this is not an end-to-end + * integration test, and as such does not upload and download files in tar.gz as would be done + * in production. Thus we use the retrofit clients directly despite the fact that in practice + * we would likely want to create an opinionated abstraction on top of the retrofit client; we + * can test this abstraction layer separately, however. This test is mainly for checking that + * we've configured the Jetty server correctly and that the endpoints reached over HTTP can + * receive streamed uploads and can stream downloads. + */ +class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfterAll { + + private val serverPort = new ServerSocket(0).getLocalPort + private val serviceImpl = new ResourceStagingServiceImpl(Utils.createTempDir()) + private val server = new ResourceStagingServer(serverPort, serviceImpl) + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + override def beforeAll(): Unit = { + server.start() + } + + override def afterAll(): Unit = { + server.stop() + } + + test("Accept file and jar uploads and downloads") { + val retrofitService = RetrofitUtils.createRetrofitClient(s"http://localhost:$serverPort/", + classOf[ResourceStagingServiceRetrofit]) + val resourcesBytes = Array[Byte](1, 2, 3, 4) + val labels = Map("label1" -> "label1Value", "label2" -> "label2value") + val namespace = "namespace" + val labelsJson = OBJECT_MAPPER.writer().writeValueAsString(labels) + val resourcesRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) + val labelsRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsJson) + val namespaceRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), namespace) + val kubernetesCredentials = KubernetesCredentials(Some("token"), Some("ca-cert"), None, None) + val kubernetesCredentialsString = OBJECT_MAPPER.writer() + .writeValueAsString(kubernetesCredentials) + val kubernetesCredentialsBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) + val uploadResponse = retrofitService.uploadResources( + labelsRequestBody, namespaceRequestBody, resourcesRequestBody, kubernetesCredentialsBody) + val resourceIdentifier = getTypedResponseResult(uploadResponse) + checkResponseBodyBytesMatches( + retrofitService.downloadResources( + resourceIdentifier.resourceId, resourceIdentifier.resourceSecret), resourcesBytes) + } + + private def getTypedResponseResult[T](call: Call[T]): T = { + val response = call.execute() + assert(response.code() >= 200 && response.code() < 300, Option(response.errorBody()) + .map(_.string()) + .getOrElse("Error executing HTTP request, but error body was not provided.")) + val callResult = response.body() + assert(callResult != null) + callResult + } + + private def checkResponseBodyBytesMatches(call: Call[ResponseBody], bytes: Array[Byte]): Unit = { + val responseBody = getTypedResponseResult(call) + val downloadedBytes = ByteStreams.toByteArray(responseBody.byteStream()) + assert(downloadedBytes.toSeq === bytes) + } + +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala new file mode 100644 index 0000000000000..b92257005d5df --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{ByteArrayInputStream, File} +import java.nio.file.Paths + +import com.google.common.io.Files + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.util.Utils + +/** + * Unit, scala-level tests for KubernetesSparkDependencyServiceImpl. The coverage here + * differs from that of KubernetesSparkDependencyServerSuite as here we invoke the + * implementation methods directly as opposed to over HTTP, as well as check the + * data written to the underlying disk. + */ +class ResourceStagingServiceImplSuite extends SparkFunSuite { + + private val dependencyRootDir = Utils.createTempDir() + private val serviceImpl = new ResourceStagingServiceImpl(dependencyRootDir) + private val resourceBytes = Array[Byte](1, 2, 3, 4) + private val kubernetesCredentials = KubernetesCredentials( + Some("token"), Some("caCert"), Some("key"), Some("cert")) + private val namespace = "namespace" + private val labels = Map("label1" -> "label1value", "label2" -> "label2value") + + test("Uploads should write data to the underlying disk") { + Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { resourceStream => + serviceImpl.uploadResources(labels, namespace, resourceStream, kubernetesCredentials) + } + val resourceNamespaceDir = Paths.get(dependencyRootDir.getAbsolutePath, "namespace").toFile + assert(resourceNamespaceDir.isDirectory, s"Resource namespace dir was not created at" + + s" ${resourceNamespaceDir.getAbsolutePath} or is not a directory.") + val resourceDirs = resourceNamespaceDir.listFiles() + assert(resourceDirs.length === 1, s"Resource root directory did not have exactly one" + + s" subdirectory. Got: ${resourceDirs.map(_.getAbsolutePath).mkString(",")}") + val resourceTgz = new File(resourceDirs(0), "resources.data") + assert(resourceTgz.isFile, + s"Resources written to ${resourceTgz.getAbsolutePath} does not exist or is not a file.") + val resourceTgzBytes = Files.toByteArray(resourceTgz) + assert(resourceTgzBytes.toSeq === resourceBytes.toSeq, "Incorrect resource bytes were written.") + } +} From b1964263925d8f1d432a4da806005608f188e8b5 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 21 Apr 2017 00:34:27 -0700 Subject: [PATCH 092/225] Reorganize packages between v1 work and v2 work (#220) * Staging server for receiving application dependencies. * Move packages around to split between v1 work and v2 work * Add unit test for file writing * Remove unnecessary main * Add back license header * Minor fixes * Fix integration test with renamed package for client. Fix scalastyle. * Force json serialization to consider the different package. * Revert extraneous log * Fix scalastyle * Remove getting credentials from the API We still want to post them because in the future we can use these credentials to monitor the API server and handle cleaning up the data accordingly. * Generalize to resource staging server outside of Spark * Update code documentation * Val instead of var * Fix build * Fix naming, remove unused import * Move suites from integration test package to core * Use TrieMap instead of locks * Address comments * Fix imports * Change paths, use POST instead of PUT * Use a resource identifier as well as a resource secret --- .../scala/org/apache/spark/deploy/SparkSubmit.scala | 2 +- dev/.rat-excludes | 2 +- ....deploy.kubernetes.submit.v1.DriverServiceManager | 2 ++ ...spark.deploy.rest.kubernetes.DriverServiceManager | 2 -- .../org/apache/spark/deploy/kubernetes/config.scala | 2 +- .../deploy/kubernetes/{ => submit/v1}/Client.scala | 5 ++--- .../submit/v1}/CompressionUtils.scala | 4 ++-- .../v1}/DriverPodKubernetesCredentialsProvider.scala | 4 ++-- .../submit/v1}/DriverServiceManager.scala | 3 +-- .../v1}/DriverSubmitSslConfigurationProvider.scala | 4 ++-- .../ExternalSuppliedUrisDriverServiceManager.scala | 2 +- .../{ => submit/v1}/KubernetesResourceCleaner.scala | 2 +- .../{ => submit/v1}/LoggingPodStatusWatcher.scala | 5 ++--- .../v1}/NodePortUrisDriverServiceManager.scala | 2 +- .../rest/kubernetes/{ => v1}/HttpClientUtil.scala | 2 +- .../kubernetes/{ => v1}/KubernetesFileUtils.scala | 2 +- .../v1}/KubernetesRestProtocolMessages.scala | 12 ++++++++++-- .../kubernetes/{ => v1}/KubernetesSparkRestApi.scala | 4 ++-- .../{ => v1}/KubernetesSparkRestServer.scala | 3 ++- .../kubernetes/{ => v1}/MultiServerFeignTarget.scala | 2 +- .../{ => v1}/PemsToKeyStoreConverter.scala | 2 +- .../rest/kubernetes/v2/ResourceStagingService.scala | 2 +- .../kubernetes/v2/ResourceStagingServiceImpl.scala | 2 +- .../kubernetes/KubernetesClientBuilder.scala | 2 +- .../KubernetesClusterSchedulerBackend.scala | 1 - .../kubernetes/v2/ResourceStagingServerSuite.scala | 2 +- .../v2/ResourceStagingServiceImplSuite.scala | 2 +- .../src/main/docker/driver/Dockerfile | 2 +- .../kubernetes/integrationtest/KubernetesSuite.scala | 3 +-- .../integrationtest/minikube/Minikube.scala | 2 +- 30 files changed, 45 insertions(+), 41 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager delete mode 100644 resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/Client.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes => kubernetes/submit/v1}/CompressionUtils.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/DriverPodKubernetesCredentialsProvider.scala (96%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes => kubernetes/submit/v1}/DriverServiceManager.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/DriverSubmitSslConfigurationProvider.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes => kubernetes/submit/v1}/ExternalSuppliedUrisDriverServiceManager.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/KubernetesResourceCleaner.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{ => submit/v1}/LoggingPodStatusWatcher.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes => kubernetes/submit/v1}/NodePortUrisDriverServiceManager.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/HttpClientUtil.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/KubernetesFileUtils.scala (96%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/{ => kubernetes/v1}/KubernetesRestProtocolMessages.scala (81%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/KubernetesSparkRestApi.scala (89%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/KubernetesSparkRestServer.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/MultiServerFeignTarget.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{ => v1}/PemsToKeyStoreConverter.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/{deploy => scheduler/cluster}/kubernetes/KubernetesClientBuilder.scala (98%) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 53098c5085ddc..83a58bc2864ba 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -635,7 +635,7 @@ object SparkSubmit extends CommandLineUtils { } if (isKubernetesCluster) { - childMainClass = "org.apache.spark.deploy.kubernetes.Client" + childMainClass = "org.apache.spark.deploy.kubernetes.submit.v1.Client" childArgs += args.primaryResource childArgs += args.mainClass childArgs ++= args.childArgs diff --git a/dev/.rat-excludes b/dev/.rat-excludes index 8998e752a9bd8..2fdb5b90ed7a4 100644 --- a/dev/.rat-excludes +++ b/dev/.rat-excludes @@ -106,4 +106,4 @@ org.apache.spark.deploy.yarn.security.ServiceCredentialProvider spark-warehouse structured-streaming/* kafka-source-initial-offset-version-2.1.0.bin -org.apache.spark.deploy.rest.kubernetes.DriverServiceManager +org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager new file mode 100644 index 0000000000000..2ed0387c51bc6 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager @@ -0,0 +1,2 @@ +org.apache.spark.deploy.kubernetes.submit.v1.ExternalSuppliedUrisDriverServiceManager +org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager deleted file mode 100644 index 56203ee38ac99..0000000000000 --- a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.rest.kubernetes.DriverServiceManager +++ /dev/null @@ -1,2 +0,0 @@ -org.apache.spark.deploy.rest.kubernetes.ExternalSuppliedUrisDriverServiceManager -org.apache.spark.deploy.rest.kubernetes.NodePortUrisDriverServiceManager diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 3328809e186e4..e403a6e8b927f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.kubernetes import java.util.concurrent.TimeUnit import org.apache.spark.{SPARK_VERSION => sparkVersion} -import org.apache.spark.deploy.rest.kubernetes.NodePortUrisDriverServiceManager +import org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.network.util.ByteUnit diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 5d115115b4595..72d24f7bf8342 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.io.File import java.security.SecureRandom @@ -32,8 +32,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.{AppResource, ContainerAppResource, KubernetesCreateSubmissionRequest, KubernetesCredentials, RemoteAppResource, UploadedAppResource} -import org.apache.spark.deploy.rest.kubernetes._ +import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesFileUtils, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} import org.apache.spark.internal.Logging import org.apache.spark.util.{ShutdownHookManager, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala index 7204cb874aaec..8296218ba1f70 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream} import java.util.zip.{GZIPInputStream, GZIPOutputStream} @@ -26,7 +26,7 @@ import org.apache.commons.compress.utils.CharsetNames import org.apache.commons.io.IOUtils import scala.collection.mutable -import org.apache.spark.deploy.rest.TarGzippedData +import org.apache.spark.deploy.rest.kubernetes.v1.TarGzippedData import org.apache.spark.internal.Logging import org.apache.spark.util.{ByteBufferOutputStream, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala similarity index 96% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala index cee47aad79393..bc7490ef9ec4a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverPodKubernetesCredentialsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.io.File @@ -22,7 +22,7 @@ import com.google.common.io.{BaseEncoding, Files} import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.config.OptionalConfigEntry private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf) { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/DriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/DriverServiceManager.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala index d92c0247e2a35..c7d394fcf00ad 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/DriverServiceManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala @@ -14,8 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} import io.fabric8.kubernetes.client.KubernetesClient diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala index a83c9a9896a08..10ffddcd7e7fc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/DriverSubmitSslConfigurationProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.io.{File, FileInputStream} import java.security.{KeyStore, SecureRandom} @@ -29,7 +29,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.{KubernetesFileUtils, PemsToKeyStoreConverter} +import org.apache.spark.deploy.rest.kubernetes.v1.{KubernetesFileUtils, PemsToKeyStoreConverter} import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ExternalSuppliedUrisDriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ExternalSuppliedUrisDriverServiceManager.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala index 257571b5a9d3e..4c784aeb5692f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ExternalSuppliedUrisDriverServiceManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.util.concurrent.TimeUnit diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesResourceCleaner.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesResourceCleaner.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala index 6329bb1359516..266ec652ed8ae 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesResourceCleaner.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import io.fabric8.kubernetes.api.model.HasMetadata import io.fabric8.kubernetes.client.KubernetesClient diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala index 17c3db8331ac4..7be334194d9d7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala @@ -14,15 +14,14 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import java.util.concurrent.{CountDownLatch, Executors, TimeUnit} -import scala.collection.JavaConverters._ - import io.fabric8.kubernetes.api.model.Pod import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action +import scala.collection.JavaConverters._ import org.apache.spark.internal.Logging diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala index 1416476824793..965d71917403e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/NodePortUrisDriverServiceManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.kubernetes.submit.v1 import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} import scala.collection.JavaConverters._ diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala index 33988bdc36f04..ea1abed72c07f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/HttpClientUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import java.io.IOException import java.net.{InetSocketAddress, ProxySelector, SocketAddress, URI} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala similarity index 96% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala index f30be1535f81c..b8e644219097e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesFileUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala similarity index 81% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala index 1ea44109c5f5e..cd1f9dcdf5879 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala @@ -14,11 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest +package org.apache.spark.deploy.rest.kubernetes.v1 -import com.fasterxml.jackson.annotation.{JsonSubTypes, JsonTypeInfo} +import com.fasterxml.jackson.annotation.{JsonIgnore, JsonSubTypes, JsonTypeInfo} import org.apache.spark.SPARK_VERSION +import org.apache.spark.deploy.rest.{SubmitRestProtocolRequest, SubmitRestProtocolResponse} +import org.apache.spark.util.Utils case class KubernetesCredentials( oauthToken: Option[String], @@ -35,6 +37,9 @@ case class KubernetesCreateSubmissionRequest( driverPodKubernetesCredentials: KubernetesCredentials, uploadedJarsBase64Contents: TarGzippedData, uploadedFilesBase64Contents: TarGzippedData) extends SubmitRestProtocolRequest { + @JsonIgnore + override val messageType: String = s"kubernetes.v1.${Utils.getFormattedClassName(this)}" + override val action = messageType message = "create" clientSparkVersion = SPARK_VERSION } @@ -68,5 +73,8 @@ class PingResponse extends SubmitRestProtocolResponse { val text = "pong" message = "pong" serverSparkVersion = SPARK_VERSION + @JsonIgnore + override val messageType: String = s"kubernetes.v1.${Utils.getFormattedClassName(this)}" + override val action: String = messageType } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala similarity index 89% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala index 18eb9b7a12ca6..270e7ea0e77bf 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestApi.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala @@ -14,12 +14,12 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import javax.ws.rs.{Consumes, GET, Path, POST, Produces} import javax.ws.rs.core.MediaType -import org.apache.spark.deploy.rest.{CreateSubmissionResponse, KubernetesCreateSubmissionRequest, PingResponse} +import org.apache.spark.deploy.rest.CreateSubmissionResponse @Path("/v1/submissions/") trait KubernetesSparkRestApi { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index 4ca01b2f6bd38..048427fa4ec23 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import java.io.{File, FileOutputStream, StringReader} import java.net.URI @@ -34,6 +34,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.v1.CompressionUtils import org.apache.spark.deploy.rest._ import org.apache.spark.internal.config.OptionalConfigEntry import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala index 51313e00ce2da..56ff82ea2fc33 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/MultiServerFeignTarget.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import feign.{Request, RequestTemplate, RetryableException, Retryer, Target} import scala.reflect.ClassTag diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala index e5c43560eccb4..da863a9fb48e2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes +package org.apache.spark.deploy.rest.kubernetes.v1 import java.io.{File, FileInputStream, FileOutputStream, InputStreamReader} import java.nio.file.Paths diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala index 5f7ceb461615e..95cc6ab949d5c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -22,7 +22,7 @@ import javax.ws.rs.core.{MediaType, StreamingOutput} import org.glassfish.jersey.media.multipart.FormDataParam -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials /** * Service that receives application data that can be retrieved later on. This is primarily used diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala index bb338dacdf511..732969cd67d89 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -26,7 +26,7 @@ import com.google.common.io.{BaseEncoding, ByteStreams, Files} import scala.collection.concurrent.TrieMap import org.apache.spark.SparkException -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.Logging import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala index 554ed17ff25c4..6725992aae978 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesClientBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes +package org.apache.spark.scheduler.cluster.kubernetes import java.io.File diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index ccb4194336a44..130b143c7e92b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -24,7 +24,6 @@ import scala.collection.JavaConverters._ import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkException} -import org.apache.spark.deploy.kubernetes.KubernetesClientBuilder import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.rpc.RpcEndpointAddress diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index 70ba5be395042..babc0994d25dc 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -27,7 +27,7 @@ import org.scalatest.BeforeAndAfterAll import retrofit2.Call import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala index b92257005d5df..60850bb877540 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala @@ -22,7 +22,7 @@ import java.nio.file.Paths import com.google.common.io.Files import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.rest.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 1f35e7e5eb209..8ab7a58704505 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -44,7 +44,7 @@ CMD SSL_ARGS="" && \ if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ if ! [ -z ${SPARK_SUBMISSION_KEY_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --key-pem-file $SPARK_SUBMISSION_KEY_PEM_FILE"; fi && \ if ! [ -z ${SPARK_SUBMISSION_CERT_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --cert-pem-file $SPARK_SUBMISSION_CERT_PEM_FILE"; fi && \ - exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.KubernetesSparkRestServer \ + exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.v1.KubernetesSparkRestServer \ --hostname $HOSTNAME \ --port $SPARK_SUBMISSION_SERVER_PORT \ --secret-file $SPARK_SUBMISSION_SECRET_LOCATION \ diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 0e55e64fd1d77..8deb790f4b7a0 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -35,14 +35,13 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} import org.apache.spark.deploy.SparkSubmit -import org.apache.spark.deploy.kubernetes.Client import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 import org.apache.spark.deploy.kubernetes.integrationtest.sslutil.SSLUtils -import org.apache.spark.deploy.rest.kubernetes.ExternalSuppliedUrisDriverServiceManager +import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala index 07274bf962dde..81491be944d3e 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala @@ -27,7 +27,7 @@ import io.fabric8.kubernetes.client.internal.SSLUtils import scala.collection.mutable.ArrayBuffer import scala.reflect.ClassTag -import org.apache.spark.deploy.rest.kubernetes.HttpClientUtil +import org.apache.spark.deploy.rest.kubernetes.v1.HttpClientUtil import org.apache.spark.internal.Logging import org.apache.spark.util.Utils From d432dba424295e62f449dc9a7ed40e111b6d140a Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 21 Apr 2017 02:20:26 -0700 Subject: [PATCH 093/225] Support SSL on the file staging server (#221) * Staging server for receiving application dependencies. * Move packages around to split between v1 work and v2 work * Add unit test for file writing * Remove unnecessary main * Allow the file staging server to be secured with TLS. * Add back license header * Minor fixes * Fix integration test with renamed package for client. Fix scalastyle. * Remove unused import * Force json serialization to consider the different package. * Revert extraneous log * Fix scalastyle * Remove getting credentials from the API We still want to post them because in the future we can use these credentials to monitor the API server and handle cleaning up the data accordingly. * Fix build * Randomize name and namespace in test to prevent collisions * Generalize to resource staging server outside of Spark * Update code documentation * Val instead of var * Fix unit tests. * Fix build * Fix naming, remove unused import * Move suites from integration test package to core * Fix unit test * Use TrieMap instead of locks * Address comments * Fix imports * Address comments * Change main object name * Change config variable names * Change paths, use POST instead of PUT * Use a resource identifier as well as a resource secret --- .../spark/deploy/kubernetes/config.scala | 33 +++++ .../v1/PemsToKeyStoreConverter.scala | 7 +- .../kubernetes/v2/ResourceStagingServer.scala | 76 +++++++++- ...ourceStagingServerSslOptionsProvider.scala | 133 ++++++++++++++++++ .../rest/kubernetes/v2/RetrofitUtils.scala | 31 +++- .../spark/deploy/kubernetes}/SSLUtils.scala | 2 +- ...StagingServerSslOptionsProviderSuite.scala | 116 +++++++++++++++ .../v2/ResourceStagingServerSuite.scala | 57 ++++++-- .../kubernetes/integration-tests/pom.xml | 7 + .../integrationtest/KubernetesSuite.scala | 2 +- 10 files changed, 442 insertions(+), 22 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala rename resource-managers/kubernetes/{integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil => core/src/test/scala/org/apache/spark/deploy/kubernetes}/SSLUtils.scala (98%) create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index e403a6e8b927f..15f7a17857f1f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -288,4 +288,37 @@ package object config { .doc("Interval between reports of the current app status in cluster mode.") .timeConf(TimeUnit.MILLISECONDS) .createWithDefaultString("1s") + + // Spark dependency server for submission v2 + + private[spark] val RESOURCE_STAGING_SERVER_PORT = + ConfigBuilder("spark.kubernetes.resourceStagingServer.port") + .doc("Port for the Kubernetes resource staging server to listen on.") + .intConf + .createWithDefault(10000) + + private[spark] val RESOURCE_STAGING_SERVER_KEY_PEM = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyPem") + .doc("Key PEM file to use when having the Kubernetes dependency server listen on TLS.") + .stringConf + .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_CERT_PEM = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.serverCertPem") + .doc("Certificate PEM file to use when having the Kubernetes dependency server" + + " listen on TLS.") + .stringConf + .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile") + .doc("File containing the keystore password for the Kubernetes dependency server.") + .stringConf + .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile") + .doc("File containing the key password for the Kubernetes dependency server.") + .stringConf + .createOptional } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala index da863a9fb48e2..2c68b150baf91 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala @@ -60,11 +60,12 @@ private[spark] object PemsToKeyStoreConverter { privateKey, keyPassword.map(_.toCharArray).orNull, certificates) - val keyStoreOutputPath = Paths.get(s"keystore-${UUID.randomUUID()}.$resolvedKeyStoreType") - Utils.tryWithResource(new FileOutputStream(keyStoreOutputPath.toFile)) { storeStream => + val keyStoreDir = Utils.createTempDir("temp-keystores") + val keyStoreFile = new File(keyStoreDir, s"keystore-${UUID.randomUUID()}.$resolvedKeyStoreType") + Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { storeStream => keyStore.store(storeStream, keyStorePassword.map(_.toCharArray).orNull) } - keyStoreOutputPath.toFile + keyStoreFile } def convertCertPemToTrustStore( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala index e09a788c45321..8ca13da545d5d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala @@ -16,19 +16,32 @@ */ package org.apache.spark.deploy.rest.kubernetes.v2 +import java.io.{File, FileInputStream} +import java.util.Properties + import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider import com.fasterxml.jackson.module.scala.DefaultScalaModule -import org.eclipse.jetty.server.{Server, ServerConnector} +import com.google.common.collect.Maps +import org.eclipse.jetty.http.HttpVersion +import org.eclipse.jetty.server.{HttpConfiguration, HttpConnectionFactory, Server, ServerConnector, SslConnectionFactory} import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} -import org.eclipse.jetty.util.thread.QueuedThreadPool +import org.eclipse.jetty.util.thread.{QueuedThreadPool, ScheduledExecutorScheduler} import org.glassfish.jersey.media.multipart.MultiPartFeature import org.glassfish.jersey.server.ResourceConfig import org.glassfish.jersey.servlet.ServletContainer +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.{ConfigReader, SparkConfigProvider} +import org.apache.spark.util.Utils private[spark] class ResourceStagingServer( port: Int, - serviceInstance: ResourceStagingService) { + serviceInstance: ResourceStagingService, + sslOptionsProvider: ResourceStagingServerSslOptionsProvider) extends Logging { private var jettyServer: Option[Server] = None @@ -45,17 +58,72 @@ private[spark] class ResourceStagingServer( contextHandler.setContextPath("/api/") contextHandler.addServlet(servletHolder, "/*") threadPool.setDaemon(true) + val resolvedConnectionFactories = sslOptionsProvider.getSslOptions + .createJettySslContextFactory() + .map(sslFactory => { + val sslConnectionFactory = new SslConnectionFactory( + sslFactory, HttpVersion.HTTP_1_1.asString()) + val rawHttpConfiguration = new HttpConfiguration() + rawHttpConfiguration.setSecureScheme("https") + rawHttpConfiguration.setSecurePort(port) + val rawHttpConnectionFactory = new HttpConnectionFactory(rawHttpConfiguration) + Array(sslConnectionFactory, rawHttpConnectionFactory) + }).getOrElse(Array(new HttpConnectionFactory())) val server = new Server(threadPool) - val connector = new ServerConnector(server) + val connector = new ServerConnector( + server, + null, + // Call this full constructor to set this, which forces daemon threads: + new ScheduledExecutorScheduler("DependencyServer-Executor", true), + null, + -1, + -1, + resolvedConnectionFactories: _*) connector.setPort(port) server.addConnector(connector) server.setHandler(contextHandler) server.start() jettyServer = Some(server) + logInfo(s"Resource staging server started on port $port.") } + def join(): Unit = jettyServer.foreach(_.join()) + def stop(): Unit = synchronized { jettyServer.foreach(_.stop()) jettyServer = None } } + +object ResourceStagingServer { + def main(args: Array[String]): Unit = { + val sparkConf = new SparkConf(true) + if (args.nonEmpty) { + val propertiesFile = new File(args(0)) + if (!propertiesFile.isFile) { + throw new IllegalArgumentException(s"Server properties file given at" + + s" ${propertiesFile.getAbsoluteFile} does not exist or is not a file.") + } + val properties = new Properties + Utils.tryWithResource(new FileInputStream(propertiesFile))(properties.load) + val propertiesMap = Maps.fromProperties(properties) + val configReader = new ConfigReader(new SparkConfigProvider(propertiesMap)) + propertiesMap.asScala.keys.foreach { key => + configReader.get(key).foreach(sparkConf.set(key, _)) + } + } + val dependenciesRootDir = Utils.createTempDir(namePrefix = "local-application-dependencies") + val serviceInstance = new ResourceStagingServiceImpl(dependenciesRootDir) + val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) + val server = new ResourceStagingServer( + port = sparkConf.get(RESOURCE_STAGING_SERVER_PORT), + serviceInstance = serviceInstance, + sslOptionsProvider = sslOptionsProvider) + server.start() + try { + server.join() + } finally { + server.stop() + } + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala new file mode 100644 index 0000000000000..2744ed0a74616 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.Files + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter +import org.apache.spark.internal.Logging + +private[spark] trait ResourceStagingServerSslOptionsProvider { + def getSslOptions: SSLOptions +} + +private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: SparkConf) + extends ResourceStagingServerSslOptionsProvider with Logging { + def getSslOptions: SSLOptions = { + val baseSslOptions = new SparkSecurityManager(sparkConf) + .getSSLOptions("kubernetes.resourceStagingServer") + val maybeKeyPem = sparkConf.get(RESOURCE_STAGING_SERVER_KEY_PEM) + val maybeCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CERT_PEM) + val maybeKeyStorePasswordFile = sparkConf.get(RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE) + val maybeKeyPasswordFile = sparkConf.get(RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE) + + logSslConfigurations( + baseSslOptions, maybeKeyPem, maybeCertPem, maybeKeyStorePasswordFile, maybeKeyPasswordFile) + + requireNandDefined(baseSslOptions.keyStore, maybeKeyPem, + "Shouldn't provide both key PEM and keyStore files for TLS.") + requireNandDefined(baseSslOptions.keyStore, maybeCertPem, + "Shouldn't provide both certificate PEM and keyStore files for TLS.") + requireNandDefined(baseSslOptions.keyStorePassword, maybeKeyStorePasswordFile, + "Shouldn't provide both the keyStore password value and the keyStore password file.") + requireNandDefined(baseSslOptions.keyPassword, maybeKeyPasswordFile, + "Shouldn't provide both the keyStore key password value and the keyStore key password file.") + requireBothOrNeitherDefined( + maybeKeyPem, + maybeCertPem, + "When providing a certificate PEM file, the key PEM file must also be provided.", + "When providing a key PEM file, the certificate PEM file must also be provided.") + + val resolvedKeyStorePassword = baseSslOptions.keyStorePassword + .orElse(maybeKeyStorePasswordFile.map { keyStorePasswordFile => + safeFileToString(keyStorePasswordFile, "KeyStore password file") + }) + val resolvedKeyStoreKeyPassword = baseSslOptions.keyPassword + .orElse(maybeKeyPasswordFile.map { keyPasswordFile => + safeFileToString(keyPasswordFile, "KeyStore key password file") + }) + val resolvedKeyStore = baseSslOptions.keyStore + .orElse(maybeKeyPem.map { keyPem => + val keyPemFile = new File(keyPem) + val certPemFile = new File(maybeCertPem.get) + PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( + keyPemFile, + certPemFile, + "key", + resolvedKeyStorePassword, + resolvedKeyStoreKeyPassword, + baseSslOptions.keyStoreType) + }) + baseSslOptions.copy( + keyStore = resolvedKeyStore, + keyStorePassword = resolvedKeyStorePassword, + keyPassword = resolvedKeyStoreKeyPassword) + } + + private def logSslConfigurations( + baseSslOptions: SSLOptions, + maybeKeyPem: Option[String], + maybeCertPem: Option[String], + maybeKeyStorePasswordFile: Option[String], + maybeKeyPasswordFile: Option[String]) = { + logDebug("The following SSL configurations were provided for the resource staging server:") + logDebug(s"KeyStore File: ${baseSslOptions.keyStore.map(_.getAbsolutePath).getOrElse("N/A")}") + logDebug("KeyStore Password: " + + baseSslOptions.keyStorePassword.map(_ => "").getOrElse("N/A")) + logDebug(s"KeyStore Password File: ${maybeKeyStorePasswordFile.getOrElse("N/A")}") + logDebug("Key Password: " + + baseSslOptions.keyPassword.map(_ => "").getOrElse("N/A")) + logDebug(s"Key Password File: ${maybeKeyPasswordFile.getOrElse("N/A")}") + logDebug(s"KeyStore Type: ${baseSslOptions.keyStoreType.getOrElse("N/A")}") + logDebug(s"Key PEM: ${maybeKeyPem.getOrElse("N/A")}") + logDebug(s"Certificate PEM: ${maybeCertPem.getOrElse("N/A")}") + } + + private def requireBothOrNeitherDefined( + opt1: Option[_], + opt2: Option[_], + errMessageWhenFirstIsMissing: String, + errMessageWhenSecondIsMissing: String): Unit = { + requireSecondIfFirstIsDefined(opt1, opt2, errMessageWhenSecondIsMissing) + requireSecondIfFirstIsDefined(opt2, opt1, errMessageWhenFirstIsMissing) + } + + private def requireSecondIfFirstIsDefined( + opt1: Option[_], opt2: Option[_], errMessageWhenSecondIsMissing: String): Unit = { + opt1.foreach { _ => + require(opt2.isDefined, errMessageWhenSecondIsMissing) + } + } + + private def requireNandDefined(opt1: Option[_], opt2: Option[_], errMessage: String): Unit = { + opt1.foreach { _ => require(opt2.isEmpty, errMessage) } + } + + private def safeFileToString(filePath: String, fileType: String): String = { + val file = new File(filePath) + if (!file.isFile) { + throw new SparkException(s"$fileType provided at ${file.getAbsolutePath} does not exist or" + + s" is not a file.") + } + Files.toString(file, Charsets.UTF_8) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala index c5c5c0d35b7cb..7416c624e97f6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala @@ -16,21 +16,50 @@ */ package org.apache.spark.deploy.rest.kubernetes.v2 +import java.io.FileInputStream +import java.security.{KeyStore, SecureRandom} +import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} + import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule +import okhttp3.OkHttpClient import retrofit2.Retrofit import retrofit2.converter.jackson.JacksonConverterFactory import retrofit2.converter.scalars.ScalarsConverterFactory +import org.apache.spark.SSLOptions +import org.apache.spark.util.Utils + private[spark] object RetrofitUtils { private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val SECURE_RANDOM = new SecureRandom() - def createRetrofitClient[T](baseUrl: String, serviceType: Class[T]): T = { + def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T = { + val okHttpClientBuilder = new OkHttpClient.Builder() + sslOptions.trustStore.foreach { trustStoreFile => + require(trustStoreFile.isFile, s"TrustStore provided at ${trustStoreFile.getAbsolutePath}" + + " does not exist, or is not a file.") + val trustStoreType = sslOptions.trustStoreType.getOrElse(KeyStore.getDefaultType) + val trustStore = KeyStore.getInstance(trustStoreType) + val trustStorePassword = sslOptions.trustStorePassword.map(_.toCharArray).orNull + Utils.tryWithResource(new FileInputStream(trustStoreFile)) { + trustStore.load(_, trustStorePassword) + } + val trustManagerFactory = TrustManagerFactory.getInstance( + TrustManagerFactory.getDefaultAlgorithm) + trustManagerFactory.init(trustStore) + val trustManagers = trustManagerFactory.getTrustManagers + val sslContext = SSLContext.getInstance("TLSv1.2") + sslContext.init(null, trustManagers, SECURE_RANDOM) + okHttpClientBuilder.sslSocketFactory(sslContext.getSocketFactory, + trustManagers(0).asInstanceOf[X509TrustManager]) + } new Retrofit.Builder() .baseUrl(baseUrl) .addConverterFactory(ScalarsConverterFactory.create()) .addConverterFactory(JacksonConverterFactory.create(OBJECT_MAPPER)) + .client(okHttpClientBuilder.build()) .build() .create(serviceType) } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala similarity index 98% rename from resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala index 2078e0585e8f0..dacb017d8a513 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/sslutil/SSLUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.integrationtest.sslutil +package org.apache.spark.deploy.kubernetes import java.io.{File, FileOutputStream, OutputStreamWriter} import java.math.BigInteger diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala new file mode 100644 index 0000000000000..290b46a537bf3 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{File, FileInputStream, StringWriter} +import java.security.KeyStore + +import com.google.common.base.Charsets +import com.google.common.io.Files +import org.bouncycastle.openssl.jcajce.JcaPEMWriter +import org.scalatest.BeforeAndAfter + +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.util.Utils + +class ResourceStagingServerSslOptionsProviderSuite extends SparkFunSuite with BeforeAndAfter { + + private var sslTempDir: File = _ + private var keyStoreFile: File = _ + + private var sparkConf: SparkConf = _ + private var sslOptionsProvider: ResourceStagingServerSslOptionsProvider = _ + + before { + sslTempDir = Utils.createTempDir(namePrefix = "resource-staging-server-ssl-test") + keyStoreFile = new File(sslTempDir, "keyStore.jks") + sparkConf = new SparkConf(true) + sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) + } + + test("Default SparkConf does not have TLS enabled.") { + assert(sslOptionsProvider.getSslOptions === SSLOptions()) + assert(!sslOptionsProvider.getSslOptions.enabled) + keyStoreFile.delete() + sslTempDir.delete() + } + + test("Setting keyStore, key password, and key field directly.") { + sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStoreFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStorePassword") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "keyPassword") + val sslOptions = sslOptionsProvider.getSslOptions + assert(sslOptions.enabled, "SSL should be enabled.") + assert(sslOptions.keyStore.map(_.getAbsolutePath) === Some(keyStoreFile.getAbsolutePath), + "Incorrect keyStore path or it was not set.") + assert(sslOptions.keyStorePassword === Some("keyStorePassword"), + "Incorrect keyStore password or it was not set.") + assert(sslOptions.keyPassword === Some("keyPassword"), + "Incorrect key password or it was not set.") + } + + test("Setting key and certificate pem files should write an appropriate keyStore.") { + val (keyPemFile, certPemFile) = SSLUtils.generateKeyCertPemPair("127.0.0.1") + sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPem", keyPemFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.serverCertPem", certPemFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStorePassword") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "keyPassword") + val sslOptions = sslOptionsProvider.getSslOptions + assert(sslOptions.enabled, "SSL should be enabled.") + assert(sslOptions.keyStore.isDefined, "KeyStore should be defined.") + sslOptions.keyStore.foreach { keyStoreFile => + val keyStore = KeyStore.getInstance(KeyStore.getDefaultType) + Utils.tryWithResource(new FileInputStream(keyStoreFile)) { + keyStore.load(_, "keyStorePassword".toCharArray) + } + val key = keyStore.getKey("key", "keyPassword".toCharArray) + compareJcaPemObjectToFileString(key, keyPemFile) + val certificate = keyStore.getCertificateChain("key")(0) + compareJcaPemObjectToFileString(certificate, certPemFile) + } + } + + test("Using password files should read from the appropriate locations.") { + val keyStorePasswordFile = new File(sslTempDir, "keyStorePassword.txt") + Files.write("keyStorePassword", keyStorePasswordFile, Charsets.UTF_8) + val keyPasswordFile = new File(sslTempDir, "keyPassword.txt") + Files.write("keyPassword", keyPasswordFile, Charsets.UTF_8) + sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStoreFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile", + keyStorePasswordFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile", keyPasswordFile.getAbsolutePath) + val sslOptions = sslOptionsProvider.getSslOptions + assert(sslOptions.keyStorePassword === Some("keyStorePassword"), + "Incorrect keyStore password or it was not set.") + assert(sslOptions.keyPassword === Some("keyPassword"), + "Incorrect key password or it was not set.") + } + + private def compareJcaPemObjectToFileString(pemObject: Any, pemFile: File): Unit = { + Utils.tryWithResource(new StringWriter()) { stringWriter => + Utils.tryWithResource(new JcaPEMWriter(stringWriter)) { pemWriter => + pemWriter.writeObject(pemObject) + } + val pemFileAsString = Files.toString(pemFile, Charsets.UTF_8) + assert(stringWriter.toString === pemFileAsString) + } + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index babc0994d25dc..51c5e43af1124 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -23,10 +23,11 @@ import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.common.io.ByteStreams import okhttp3.{RequestBody, ResponseBody} -import org.scalatest.BeforeAndAfterAll +import org.scalatest.BeforeAndAfter import retrofit2.Call -import org.apache.spark.SparkFunSuite +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.util.Utils @@ -39,30 +40,53 @@ import org.apache.spark.util.Utils * we've configured the Jetty server correctly and that the endpoints reached over HTTP can * receive streamed uploads and can stream downloads. */ -class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfterAll { +class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) private val serverPort = new ServerSocket(0).getLocalPort private val serviceImpl = new ResourceStagingServiceImpl(Utils.createTempDir()) - private val server = new ResourceStagingServer(serverPort, serviceImpl) - private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val sslOptionsProvider = new SettableReferenceSslOptionsProvider() + private val server = new ResourceStagingServer(serverPort, serviceImpl, sslOptionsProvider) - override def beforeAll(): Unit = { + after { + server.stop() + } + + test("Accept file and jar uploads and downloads") { server.start() + runUploadAndDownload(SSLOptions()) } - override def afterAll(): Unit = { - server.stop() + test("Enable SSL on the server") { + val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + ipAddress = "127.0.0.1", + keyStorePassword = "keyStore", + keyPassword = "key", + trustStorePassword = "trustStore") + val sslOptions = SSLOptions( + enabled = true, + keyStore = Some(keyStore), + keyStorePassword = Some("keyStore"), + keyPassword = Some("key"), + trustStore = Some(trustStore), + trustStorePassword = Some("trustStore")) + sslOptionsProvider.setOptions(sslOptions) + server.start() + runUploadAndDownload(sslOptions) } - test("Accept file and jar uploads and downloads") { - val retrofitService = RetrofitUtils.createRetrofitClient(s"http://localhost:$serverPort/", - classOf[ResourceStagingServiceRetrofit]) + private def runUploadAndDownload(sslOptions: SSLOptions): Unit = { + val scheme = if (sslOptions.enabled) "https" else "http" + val retrofitService = RetrofitUtils.createRetrofitClient( + s"$scheme://127.0.0.1:$serverPort/", + classOf[ResourceStagingServiceRetrofit], + sslOptions) val resourcesBytes = Array[Byte](1, 2, 3, 4) val labels = Map("label1" -> "label1Value", "label2" -> "label2value") val namespace = "namespace" val labelsJson = OBJECT_MAPPER.writer().writeValueAsString(labels) val resourcesRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) val labelsRequestBody = RequestBody.create( okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsJson) val namespaceRequestBody = RequestBody.create( @@ -95,5 +119,14 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfterAll { val downloadedBytes = ByteStreams.toByteArray(responseBody.byteStream()) assert(downloadedBytes.toSeq === bytes) } +} + +private class SettableReferenceSslOptionsProvider extends ResourceStagingServerSslOptionsProvider { + private var options = SSLOptions() + + def setOptions(newOptions: SSLOptions): Unit = { + this.options = newOptions + } + override def getSslOptions: SSLOptions = options } diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index da78e783cac1b..5418afa25ca85 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -35,6 +35,13 @@ ${project.version} test + + org.apache.spark + spark-kubernetes_${scala.binary.version} + ${project.version} + test + test-jar + org.apache.spark spark-core_${scala.binary.version} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 8deb790f4b7a0..750e7668b9912 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -35,12 +35,12 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} import org.apache.spark.deploy.SparkSubmit +import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.deploy.kubernetes.integrationtest.sslutil.SSLUtils import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} import org.apache.spark.util.Utils From 7c29732cb66174f9d559928e06c84879db89b7a8 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 24 Apr 2017 18:15:11 -0700 Subject: [PATCH 094/225] Driver submission with mounting dependencies from the staging server (#227) --- resource-managers/kubernetes/core/pom.xml | 8 +- .../{submit/v1 => }/CompressionUtils.scala | 123 +++--- .../spark/deploy/kubernetes/config.scala | 107 ++++- .../spark/deploy/kubernetes/constants.scala | 42 +- .../deploy/kubernetes/submit/v1/Client.scala | 17 +- ...iverPodKubernetesCredentialsProvider.scala | 1 - .../deploy/kubernetes/submit/v2/Client.scala | 249 ++++++++++++ .../v2/ContainerNameEqualityPredicate.scala | 29 ++ .../v2/MountedDependencyManagerImpl.scala | 324 +++++++++++++++ .../v2/MountedDependencyManagerProvider.scala | 58 +++ .../SubmissionKubernetesClientProvider.scala | 55 +++ .../v1/KubernetesSparkRestServer.scala | 2 +- ...SparkDependencyDownloadInitContainer.scala | 127 ++++++ .../kubernetes/v2/ResourceStagingServer.scala | 24 +- .../v2/ResourceStagingService.scala | 13 +- .../v2/ResourceStagingServiceImpl.scala | 2 + .../v2/ResourceStagingServiceRetrofit.scala | 8 +- ...tils.scala => RetrofitClientFactory.scala} | 13 +- .../v2/SparkConfPropertiesParser.scala | 46 +++ .../DriverPodKubernetesClientProvider.scala | 83 ++++ .../KubernetesClusterSchedulerBackend.scala | 14 +- .../kubernetes/submit/v2/ClientV2Suite.scala | 328 ++++++++++++++++ .../v2/MountedDependencyManagerSuite.scala | 323 +++++++++++++++ ...DependencyDownloadInitContainerSuite.scala | 165 ++++++++ .../v2/ResourceStagingServerSuite.scala | 2 +- .../kubernetes/docker-minimal-bundle/pom.xml | 17 +- ...river-assembly.xml => docker-assembly.xml} | 6 +- .../src/main/assembly/executor-assembly.xml | 84 ---- .../src/main/docker/driver-init/Dockerfile | 38 ++ .../src/main/docker/driver-v2/Dockerfile | 43 ++ .../docker/resource-staging-server/Dockerfile | 38 ++ .../kubernetes/integration-tests/pom.xml | 65 +--- .../integrationtest/KubernetesSuite.scala | 368 ++---------------- .../KubernetesTestComponents.scala | 72 ++++ .../integrationtest/KubernetesV1Suite.scala | 306 +++++++++++++++ .../integrationtest/KubernetesV2Suite.scala | 127 ++++++ .../ResourceStagingServerLauncher.scala | 196 ++++++++++ .../docker/SparkDockerImageBuilder.scala | 25 +- 38 files changed, 2932 insertions(+), 616 deletions(-) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/{submit/v1 => }/CompressionUtils.scala (58%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/{RetrofitUtils.scala => RetrofitClientFactory.scala} (85%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala rename resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/{driver-assembly.xml => docker-assembly.xml} (95%) delete mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 8856339d4f6d9..70c252009c9b4 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -108,6 +108,8 @@ com.google.guava guava + + org.bouncycastle bcpkix-jdk15on @@ -116,7 +118,11 @@ org.bouncycastle bcprov-jdk15on - + + org.mockito + mockito-core + test + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala similarity index 58% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala index 8296218ba1f70..03991ba26a6f7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala @@ -14,9 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v1 +package org.apache.spark.deploy.kubernetes -import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream} +import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream, InputStream, OutputStream} import java.util.zip.{GZIPInputStream, GZIPOutputStream} import com.google.common.io.Files @@ -48,40 +48,7 @@ private[spark] object CompressionUtils extends Logging { */ def createTarGzip(paths: Iterable[String]): TarGzippedData = { val compressedBytesStream = Utils.tryWithResource(new ByteBufferOutputStream()) { raw => - Utils.tryWithResource(new GZIPOutputStream(raw)) { gzipping => - Utils.tryWithResource(new TarArchiveOutputStream( - gzipping, - BLOCK_SIZE, - RECORD_SIZE, - ENCODING)) { tarStream => - val usedFileNames = mutable.HashSet.empty[String] - for (path <- paths) { - val file = new File(path) - if (!file.isFile) { - throw new IllegalArgumentException(s"Cannot add $path to tarball; either does" + - s" not exist or is a directory.") - } - var resolvedFileName = file.getName - val extension = Files.getFileExtension(file.getName) - val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) - var deduplicationCounter = 1 - while (usedFileNames.contains(resolvedFileName)) { - val oldResolvedFileName = resolvedFileName - resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" - logWarning(s"File with name $oldResolvedFileName already exists. Trying to add" + - s" with file name $resolvedFileName instead.") - deduplicationCounter += 1 - } - usedFileNames += resolvedFileName - val tarEntry = new TarArchiveEntry(file, resolvedFileName) - tarStream.putArchiveEntry(tarEntry) - Utils.tryWithResource(new FileInputStream(file)) { fileInput => - IOUtils.copy(fileInput, tarStream) - } - tarStream.closeArchiveEntry() - } - } - } + writeTarGzipToStream(raw, paths) raw } val compressedAsBase64 = Base64.encodeBase64String(compressedBytesStream.toByteBuffer.array) @@ -93,6 +60,44 @@ private[spark] object CompressionUtils extends Logging { ) } + def writeTarGzipToStream(outputStream: OutputStream, paths: Iterable[String]): Unit = { + Utils.tryWithResource(new GZIPOutputStream(outputStream)) { gzipping => + Utils.tryWithResource(new TarArchiveOutputStream( + gzipping, + BLOCK_SIZE, + RECORD_SIZE, + ENCODING)) { tarStream => + val usedFileNames = mutable.HashSet.empty[String] + for (path <- paths) { + val file = new File(path) + if (!file.isFile) { + throw new IllegalArgumentException(s"Cannot add $path to tarball; either does" + + s" not exist or is a directory.") + } + var resolvedFileName = file.getName + val extension = Files.getFileExtension(file.getName) + val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) + var deduplicationCounter = 1 + while (usedFileNames.contains(resolvedFileName)) { + val oldResolvedFileName = resolvedFileName + resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" + logWarning(s"File with name $oldResolvedFileName already exists. Trying to add" + + s" with file name $resolvedFileName instead.") + deduplicationCounter += 1 + } + usedFileNames += resolvedFileName + val tarEntry = new TarArchiveEntry(resolvedFileName) + tarEntry.setSize(file.length()); + tarStream.putArchiveEntry(tarEntry) + Utils.tryWithResource(new FileInputStream(file)) { fileInput => + IOUtils.copy(fileInput, tarStream) + } + tarStream.closeArchiveEntry() + } + } + } + } + /** * Decompresses the provided tar archive to a directory. * @param compressedData In-memory representation of the compressed data, ideally created via @@ -104,7 +109,6 @@ private[spark] object CompressionUtils extends Logging { def unpackAndWriteCompressedFiles( compressedData: TarGzippedData, rootOutputDir: File): Seq[String] = { - val paths = mutable.Buffer.empty[String] val compressedBytes = Base64.decodeBase64(compressedData.dataBase64) if (!rootOutputDir.exists) { if (!rootOutputDir.mkdirs) { @@ -116,24 +120,39 @@ private[spark] object CompressionUtils extends Logging { s"${rootOutputDir.getAbsolutePath} exists and is not a directory.") } Utils.tryWithResource(new ByteArrayInputStream(compressedBytes)) { compressedBytesStream => - Utils.tryWithResource(new GZIPInputStream(compressedBytesStream)) { gzipped => - Utils.tryWithResource(new TarArchiveInputStream( - gzipped, - compressedData.blockSize, - compressedData.recordSize, - compressedData.encoding)) { tarInputStream => - var nextTarEntry = tarInputStream.getNextTarEntry - while (nextTarEntry != null) { - val outputFile = new File(rootOutputDir, nextTarEntry.getName) - Utils.tryWithResource(new FileOutputStream(outputFile)) { fileOutputStream => - IOUtils.copy(tarInputStream, fileOutputStream) - } - paths += outputFile.getAbsolutePath - nextTarEntry = tarInputStream.getNextTarEntry + unpackTarStreamToDirectory( + compressedBytesStream, + rootOutputDir, + compressedData.blockSize, + compressedData.recordSize, + compressedData.encoding) + } + } + + def unpackTarStreamToDirectory( + inputStream: InputStream, + outputDir: File, + blockSize: Int = BLOCK_SIZE, + recordSize: Int = RECORD_SIZE, + encoding: String = ENCODING): Seq[String] = { + val paths = mutable.Buffer.empty[String] + Utils.tryWithResource(new GZIPInputStream(inputStream)) { gzipped => + Utils.tryWithResource(new TarArchiveInputStream( + gzipped, + blockSize, + recordSize, + encoding)) { tarInputStream => + var nextTarEntry = tarInputStream.getNextTarEntry + while (nextTarEntry != null) { + val outputFile = new File(outputDir, nextTarEntry.getName) + Utils.tryWithResource(new FileOutputStream(outputFile)) { fileOutputStream => + IOUtils.copy(tarInputStream, fileOutputStream) } + paths += outputFile.getAbsolutePath + nextTarEntry = tarInputStream.getNextTarEntry } } } - paths.toSeq + paths } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 15f7a17857f1f..1c8b6798bbdd5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -19,11 +19,13 @@ package org.apache.spark.deploy.kubernetes import java.util.concurrent.TimeUnit import org.apache.spark.{SPARK_VERSION => sparkVersion} +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager +import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.network.util.ByteUnit -package object config { +package object config extends Logging { private[spark] val KUBERNETES_NAMESPACE = ConfigBuilder("spark.kubernetes.namespace") @@ -321,4 +323,107 @@ package object config { .doc("File containing the key password for the Kubernetes dependency server.") .stringConf .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_SSL_ENABLED = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.enabled") + .doc("Whether or not to use SSL when communicating with the dependency server.") + .booleanConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStore") + .doc("File containing the trustStore to communicate with the Kubernetes dependency server.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword") + .doc("Password for the trustStore for talking to the dependency server.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE = + ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStoreType") + .doc("Type of trustStore for communicating with the dependency server.") + .stringConf + .createOptional + + // Driver and Init-Container parameters for submission v2 + private[spark] val RESOURCE_STAGING_SERVER_URI = + ConfigBuilder("spark.kubernetes.resourceStagingServer.uri") + .doc("Base URI for the Spark resource staging server") + .stringConf + .createOptional + + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER = + ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadJarsResourceIdentifier") + .doc("Identifier for the jars tarball that was uploaded to the staging service.") + .internal() + .stringConf + .createOptional + + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION = + ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadJarsSecretLocation") + .doc("Location of the application secret to use when the init-container contacts the" + + " resource staging server to download jars.") + .internal() + .stringConf + .createWithDefault(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH) + + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER = + ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadFilesResourceIdentifier") + .doc("Identifier for the files tarball that was uploaded to the staging service.") + .internal() + .stringConf + .createOptional + + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION = + ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadFilesSecretLocation") + .doc("Location of the application secret to use when the init-container contacts the" + + " resource staging server to download files.") + .internal() + .stringConf + .createWithDefault(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH) + + private[spark] val INIT_CONTAINER_DOCKER_IMAGE = + ConfigBuilder("spark.kubernetes.driver.initcontainer.docker.image") + .doc("Image for the driver's init-container that downloads mounted dependencies.") + .stringConf + .createWithDefault(s"spark-driver-init:$sparkVersion") + + private[spark] val DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION = + ConfigBuilder("spark.kubernetes.driver.mountdependencies.jarsDownloadDir") + .doc("Location to download local jars to in the driver. When using spark-submit, this" + + " directory must be empty and will be mounted as an empty directory volume on the" + + " driver pod.") + .stringConf + .createWithDefault("/var/spark-data/spark-local-jars") + + private[spark] val DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION = + ConfigBuilder("spark.kubernetes.driver.mountdependencies.filesDownloadDir") + .doc("Location to download local files to in the driver. When using spark-submit, this" + + " directory must be empty and will be mounted as an empty directory volume on the" + + " driver pod.") + .stringConf + .createWithDefault("/var/spark-data/spark-local-files") + + private[spark] val DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT = + ConfigBuilder("spark.kubernetes.mountdependencies.mountTimeout") + .doc("Timeout before aborting the attempt to download and unpack local dependencies from" + + " the dependency staging server when initializing the driver pod.") + .timeConf(TimeUnit.MINUTES) + .createWithDefault(5) + + private[spark] def resolveK8sMaster(rawMasterString: String): String = { + if (!rawMasterString.startsWith("k8s://")) { + throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") + } + val masterWithoutK8sPrefix = rawMasterString.replaceFirst("k8s://", "") + if (masterWithoutK8sPrefix.startsWith("http://") + || masterWithoutK8sPrefix.startsWith("https://")) { + masterWithoutK8sPrefix + } else { + val resolvedURL = s"https://$masterWithoutK8sPrefix" + logDebug(s"No scheme specified for kubernetes master URL, so defaulting to https. Resolved" + + s" URL is $resolvedURL") + resolvedURL + } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 03b3d21ac9c45..f82cb88b4c622 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -30,9 +30,9 @@ package object constants { private[spark] val SUBMISSION_APP_SECRET_PREFIX = "spark-submission-server-secret" private[spark] val SUBMISSION_APP_SECRET_VOLUME_NAME = "spark-submission-secret-volume" private[spark] val SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME = - "spark-submission-server-key-password" + "spark-submission-server-key-password" private[spark] val SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME = - "spark-submission-server-keystore-password" + "spark-submission-server-keystore-password" private[spark] val SUBMISSION_SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" private[spark] val SUBMISSION_SSL_SECRETS_PREFIX = "spark-submission-server-ssl" private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" @@ -55,9 +55,9 @@ package object constants { private[spark] val ENV_SUBMISSION_SERVER_PORT = "SPARK_SUBMISSION_SERVER_PORT" private[spark] val ENV_SUBMISSION_KEYSTORE_FILE = "SPARK_SUBMISSION_KEYSTORE_FILE" private[spark] val ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE = - "SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE" + "SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE" private[spark] val ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE = - "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" + "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" private[spark] val ENV_SUBMISSION_KEYSTORE_TYPE = "SPARK_SUBMISSION_KEYSTORE_TYPE" private[spark] val ENV_SUBMISSION_KEY_PEM_FILE = "SPARK_SUBMISSION_KEY_PEM_FILE" private[spark] val ENV_SUBMISSION_CERT_PEM_FILE = "SPARK_SUBMISSION_CERT_PEM_FILE" @@ -70,12 +70,18 @@ package object constants { private[spark] val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID" private[spark] val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP" private[spark] val ENV_DRIVER_MEMORY = "SPARK_DRIVER_MEMORY" + private[spark] val ENV_UPLOADED_JARS_DIR = "SPARK_UPLOADED_JARS_DIR" + private[spark] val ENV_SUBMIT_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" + private[spark] val ENV_MOUNTED_CLASSPATH = "SPARK_MOUNTED_CLASSPATH" + private[spark] val ENV_DRIVER_MAIN_CLASS = "SPARK_DRIVER_CLASS" + private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" + private[spark] val ENV_DRIVER_JAVA_OPTS = "SPARK_DRIVER_JAVA_OPTS" // Annotation keys private[spark] val ANNOTATION_PROVIDE_EXTERNAL_URI = - "spark-job.alpha.apache.org/provideExternalUri" + "spark-job.alpha.apache.org/provideExternalUri" private[spark] val ANNOTATION_RESOLVED_EXTERNAL_URI = - "spark-job.alpha.apache.org/resolvedExternalUri" + "spark-job.alpha.apache.org/resolvedExternalUri" // Miscellaneous private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" @@ -83,4 +89,28 @@ package object constants { private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 private[spark] val MEMORY_OVERHEAD_MIN = 384L + + // V2 submission init container + private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers" + private[spark] val INIT_CONTAINER_SECRETS_VOLUME_NAME = "dependency-secret" + private[spark] val INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH = "/mnt/secrets/spark-init" + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY = "downloadJarsSecret" + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY = "downloadFilesSecret" + private[spark] val INIT_CONTAINER_TRUSTSTORE_SECRET_KEY = "trustStore" + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH = + s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY" + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH = + s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY" + private[spark] val INIT_CONTAINER_TRUSTSTORE_PATH = + s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_TRUSTSTORE_SECRET_KEY" + private[spark] val INIT_CONTAINER_DOWNLOAD_CREDENTIALS_PATH = + "/mnt/secrets/kubernetes-credentials" + private[spark] val INIT_CONTAINER_CONFIG_MAP_KEY = "init-driver" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_VOLUME = "init-container-properties" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH = "/etc/spark-init/" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_NAME = "init-driver.properties" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_PATH = + s"$INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH/$INIT_CONTAINER_PROPERTIES_FILE_NAME" + private[spark] val DOWNLOAD_JARS_VOLUME_NAME = "download-jars" + private[spark] val DOWNLOAD_FILES_VOLUME_NAME = "download-files" } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 72d24f7bf8342..e1cfac8feba37 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -30,6 +30,7 @@ import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesFileUtils, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} @@ -733,20 +734,4 @@ private[spark] object Client extends Logging { sparkConf = sparkConf, appArgs = appArgs).run() } - - def resolveK8sMaster(rawMasterString: String): String = { - if (!rawMasterString.startsWith("k8s://")) { - throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") - } - val masterWithoutK8sPrefix = rawMasterString.replaceFirst("k8s://", "") - if (masterWithoutK8sPrefix.startsWith("http://") - || masterWithoutK8sPrefix.startsWith("https://")) { - masterWithoutK8sPrefix - } else { - val resolvedURL = s"https://$masterWithoutK8sPrefix" - logDebug(s"No scheme specified for kubernetes master URL, so defaulting to https. Resolved" + - s" URL is $resolvedURL") - resolvedURL - } - } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala index bc7490ef9ec4a..112226dbe3fc1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala @@ -45,7 +45,6 @@ private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf s"Driver client key file provided at %s does not exist or is not a file.") val clientCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_CERT_FILE, s"Driver client cert file provided at %s does not exist or is not a file.") - val serviceAccountName = sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME) KubernetesCredentials( oauthToken = oauthToken, caCertDataBase64 = caCertDataBase64, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala new file mode 100644 index 0000000000000..69dbfd041bb86 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -0,0 +1,249 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File +import java.util.Collections + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, HasMetadata, OwnerReferenceBuilder, PodBuilder} +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.internal.Logging +import org.apache.spark.launcher.SparkLauncher +import org.apache.spark.util.Utils + +/** + * Submission client for launching Spark applications on Kubernetes clusters. + * + * This class is responsible for instantiating Kubernetes resources that allow a Spark driver to + * run in a pod on the Kubernetes cluster with the Spark configurations specified by spark-submit. + * Application submitters that desire to provide their application's dependencies from their local + * disk must provide a resource staging server URI to this client so that the client can push the + * local resources to the resource staging server and have the driver pod pull the resources in an + * init-container. Interactions with the resource staging server are offloaded to the + * {@link MountedDependencyManager} class. If instead the application submitter has their + * dependencies pre-staged in remote locations like HDFS or their own HTTP servers already, then + * the mounted dependency manager is bypassed entirely, but the init-container still needs to + * fetch these remote dependencies (TODO https://github.com/apache-spark-on-k8s/spark/issues/238). + */ +private[spark] class Client( + mainClass: String, + sparkConf: SparkConf, + appArgs: Array[String], + mainAppResource: String, + kubernetesClientProvider: SubmissionKubernetesClientProvider, + mountedDependencyManagerProvider: MountedDependencyManagerProvider) extends Logging { + + private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + private val master = resolveK8sMaster(sparkConf.get("spark.master")) + private val launchTime = System.currentTimeMillis + private val appName = sparkConf.getOption("spark.app.name") + .getOrElse("spark") + private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) + private val maybeStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) + private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) + private val memoryOverheadMb = sparkConf + .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) + .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverMemoryMb).toInt, + MEMORY_OVERHEAD_MIN)) + private val driverContainerMemoryWithOverhead = driverMemoryMb + memoryOverheadMb + private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) + private val customAnnotations = sparkConf.get(KUBERNETES_DRIVER_ANNOTATIONS) + private val sparkJars = sparkConf.getOption("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) ++ + Option(mainAppResource) + .filterNot(_ == SparkLauncher.NO_RESOURCE) + .toSeq + + private val sparkFiles = sparkConf.getOption("spark.files") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + private val driverExtraClasspath = sparkConf.get( + org.apache.spark.internal.config.DRIVER_CLASS_PATH) + private val driverJavaOptions = sparkConf.get( + org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + + def run(): Unit = { + val parsedCustomLabels = parseKeyValuePairs(customLabels, KUBERNETES_DRIVER_LABELS.key, + "labels") + require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") + require(!parsedCustomLabels.contains(SPARK_APP_NAME_LABEL), s"Label with key" + + s" $SPARK_APP_NAME_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") + val allLabels = parsedCustomLabels ++ + Map(SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_APP_NAME_LABEL -> appName) + val parsedCustomAnnotations = parseKeyValuePairs( + customAnnotations, + KUBERNETES_DRIVER_ANNOTATIONS.key, + "annotations") + Utils.tryWithResource(kubernetesClientProvider.get) { kubernetesClient => + val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => + new EnvVarBuilder() + .withName(ENV_SUBMIT_EXTRA_CLASSPATH) + .withValue(classPath) + .build() + } + val driverContainer = new ContainerBuilder() + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addToEnv(driverExtraClasspathEnv.toSeq: _*) + .addNewEnv() + .withName(ENV_DRIVER_MEMORY) + .withValue(driverContainerMemoryWithOverhead + "m") + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_MAIN_CLASS) + .withValue(mainClass) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_ARGS) + .withValue(appArgs.mkString(" ")) + .endEnv() + .build() + val basePod = new PodBuilder() + .withNewMetadata() + .withName(kubernetesAppId) + .addToLabels(allLabels.asJava) + .addToAnnotations(parsedCustomAnnotations.asJava) + .endMetadata() + .withNewSpec() + .addToContainers(driverContainer) + .endSpec() + + val nonDriverPodKubernetesResources = mutable.Buffer[HasMetadata]() + val resolvedJars = mutable.Buffer[String]() + val resolvedFiles = mutable.Buffer[String]() + val driverPodWithMountedDeps = maybeStagingServerUri.map { stagingServerUri => + val mountedDependencyManager = mountedDependencyManagerProvider.getMountedDependencyManager( + kubernetesAppId, + stagingServerUri, + allLabels, + namespace, + sparkJars, + sparkFiles) + val jarsResourceIdentifier = mountedDependencyManager.uploadJars() + val filesResourceIdentifier = mountedDependencyManager.uploadFiles() + val initContainerKubernetesSecret = mountedDependencyManager.buildInitContainerSecret( + jarsResourceIdentifier.resourceSecret, filesResourceIdentifier.resourceSecret) + val initContainerConfigMap = mountedDependencyManager.buildInitContainerConfigMap( + jarsResourceIdentifier.resourceId, filesResourceIdentifier.resourceId) + resolvedJars ++= mountedDependencyManager.resolveSparkJars() + resolvedFiles ++= mountedDependencyManager.resolveSparkFiles() + nonDriverPodKubernetesResources += initContainerKubernetesSecret + nonDriverPodKubernetesResources += initContainerConfigMap + mountedDependencyManager.configurePodToMountLocalDependencies( + driverContainer.getName, initContainerKubernetesSecret, initContainerConfigMap, basePod) + }.getOrElse { + sparkJars.map(Utils.resolveURI).foreach { jar => + require(Option.apply(jar.getScheme).getOrElse("file") != "file", + "When submitting with local jars, a resource staging server must be provided to" + + s" deploy your jars into the driver pod. Cannot send jar with URI $jar.") + } + sparkFiles.map(Utils.resolveURI).foreach { file => + require(Option.apply(file.getScheme).getOrElse("file") != "file", + "When submitting with local files, a resource staging server must be provided to" + + s" deploy your files into the driver pod. Cannot send file with URI $file") + } + resolvedJars ++= sparkJars + resolvedFiles ++= sparkFiles + basePod + } + val resolvedSparkConf = sparkConf.clone() + if (resolvedJars.nonEmpty) { + resolvedSparkConf.set("spark.jars", resolvedJars.mkString(",")) + } + if (resolvedFiles.nonEmpty) { + resolvedSparkConf.set("spark.files", resolvedFiles.mkString(",")) + } + resolvedSparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + resolvedSparkConf.set("spark.app.id", kubernetesAppId) + // We don't need this anymore since we just set the JVM options on the environment + resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + resolvedSparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => + resolvedSparkConf.set(KUBERNETES_SUBMIT_OAUTH_TOKEN.key, "") + } + resolvedSparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => + resolvedSparkConf.set(KUBERNETES_DRIVER_OAUTH_TOKEN.key, "") + } + + val mountedClassPath = resolvedJars.map(Utils.resolveURI).filter { jarUri => + val scheme = Option.apply(jarUri.getScheme).getOrElse("file") + scheme == "local" || scheme == "file" + }.map(_.getPath).mkString(File.pathSeparator) + val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { case (confKey, confValue) => + s"-D$confKey=$confValue" + }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") + val resolvedDriverPod = driverPodWithMountedDeps.editSpec() + .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) + .addNewEnv() + .withName(ENV_MOUNTED_CLASSPATH) + .withValue(mountedClassPath) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_JAVA_OPTS) + .withValue(resolvedDriverJavaOpts) + .endEnv() + .endContainer() + .endSpec() + .build() + val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) + try { + val driverPodOwnerReference = new OwnerReferenceBuilder() + .withName(createdDriverPod.getMetadata.getName) + .withApiVersion(createdDriverPod.getApiVersion) + .withUid(createdDriverPod.getMetadata.getUid) + .withKind(createdDriverPod.getKind) + .withController(true) + .build() + nonDriverPodKubernetesResources.foreach { resource => + val originalMetadata = resource.getMetadata + originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + } + kubernetesClient.resourceList(nonDriverPodKubernetesResources: _*).createOrReplace() + } catch { + case e: Throwable => + kubernetesClient.pods().delete(createdDriverPod) + throw e + } + } + } + + private def parseKeyValuePairs( + maybeKeyValues: Option[String], + configKey: String, + keyValueType: String): Map[String, String] = { + maybeKeyValues.map(keyValues => { + keyValues.split(",").map(_.trim).filterNot(_.isEmpty).map(keyValue => { + keyValue.split("=", 2).toSeq match { + case Seq(k, v) => + (k, v) + case _ => + throw new SparkException(s"Custom $keyValueType set by $configKey must be a" + + s" comma-separated list of key-value pairs, with format =." + + s" Got value: $keyValue. All values: $keyValues") + } + }).toMap + }).getOrElse(Map.empty[String, String]) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala new file mode 100644 index 0000000000000..5101e1506e4d5 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala @@ -0,0 +1,29 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.lang.Boolean + +import io.fabric8.kubernetes.api.builder.Predicate +import io.fabric8.kubernetes.api.model.ContainerBuilder + +private[spark] class ContainerNameEqualityPredicate(containerName: String) + extends Predicate[ContainerBuilder] { + override def apply(item: ContainerBuilder): Boolean = { + item.getName == containerName + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala new file mode 100644 index 0000000000000..9dbbcd0d56a3b --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala @@ -0,0 +1,324 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.{File, FileOutputStream, StringWriter} +import java.util.Properties +import javax.ws.rs.core.MediaType + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, ContainerBuilder, EmptyDirVolumeSource, PodBuilder, Secret, SecretBuilder, VolumeMount, VolumeMountBuilder} +import okhttp3.RequestBody +import retrofit2.Call +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.{SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v1.{KubernetesCredentials, KubernetesFileUtils} +import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourceIdentifier} +import org.apache.spark.util.Utils + +private[spark] trait MountedDependencyManager { + + /** + * Upload submitter-local jars to the resource staging server. + * @return The resource ID and secret to use to retrieve these jars. + */ + def uploadJars(): StagedResourceIdentifier + + /** + * Upload submitter-local files to the resource staging server. + * @return The resource ID and secret to use to retrieve these files. + */ + def uploadFiles(): StagedResourceIdentifier + + def configurePodToMountLocalDependencies( + driverContainerName: String, + initContainerSecret: Secret, + initContainerConfigMap: ConfigMap, + originalPodSpec: PodBuilder): PodBuilder + + def buildInitContainerSecret(jarsSecret: String, filesSecret: String): Secret + + def buildInitContainerConfigMap( + jarsResourceId: String, filesResourceId: String): ConfigMap + + /** + * Convert the Spark jar paths from their locations on the submitter's disk to + * the locations they will be downloaded to on the driver's disk. + */ + def resolveSparkJars(): Seq[String] + + /** + * Convert the Spark file paths from their locations on the submitter's disk to + * the locations they will be downloaded to on the driver's disk. + */ + def resolveSparkFiles(): Seq[String] +} + +/** + * Default implementation of a MountedDependencyManager that is backed by a + * Resource Staging Service. + */ +private[spark] class MountedDependencyManagerImpl( + kubernetesAppId: String, + podLabels: Map[String, String], + podNamespace: String, + stagingServerUri: String, + initContainerImage: String, + jarsDownloadPath: String, + filesDownloadPath: String, + downloadTimeoutMinutes: Long, + sparkJars: Seq[String], + sparkFiles: Seq[String], + stagingServiceSslOptions: SSLOptions, + retrofitClientFactory: RetrofitClientFactory) extends MountedDependencyManager { + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + private def localUriStringsToFiles(uris: Seq[String]): Iterable[File] = { + KubernetesFileUtils.getOnlySubmitterLocalFiles(uris) + .map(Utils.resolveURI) + .map(uri => new File(uri.getPath)) + } + private def localJars: Iterable[File] = localUriStringsToFiles(sparkJars) + private def localFiles: Iterable[File] = localUriStringsToFiles(sparkFiles) + + override def uploadJars(): StagedResourceIdentifier = doUpload(localJars, "uploaded-jars") + override def uploadFiles(): StagedResourceIdentifier = doUpload(localFiles, "uploaded-files") + + private def doUpload(files: Iterable[File], fileNamePrefix: String): StagedResourceIdentifier = { + val filesDir = Utils.createTempDir(namePrefix = fileNamePrefix) + val filesTgz = new File(filesDir, s"$fileNamePrefix.tgz") + Utils.tryWithResource(new FileOutputStream(filesTgz)) { filesOutputStream => + CompressionUtils.writeTarGzipToStream(filesOutputStream, files.map(_.getAbsolutePath)) + } + // TODO provide credentials properly when the staging server monitors the Kubernetes API. + val kubernetesCredentialsString = OBJECT_MAPPER.writer() + .writeValueAsString(KubernetesCredentials(None, None, None, None)) + val labelsAsString = OBJECT_MAPPER.writer().writeValueAsString(podLabels) + + val filesRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) + + val kubernetesCredentialsBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) + + val namespaceRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), podNamespace) + + val labelsRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsAsString) + + val service = retrofitClientFactory.createRetrofitClient( + stagingServerUri, + classOf[ResourceStagingServiceRetrofit], + stagingServiceSslOptions) + val uploadResponse = service.uploadResources( + labelsRequestBody, namespaceRequestBody, filesRequestBody, kubernetesCredentialsBody) + getTypedResponseResult(uploadResponse) + } + + override def configurePodToMountLocalDependencies( + driverContainerName: String, + initContainerSecret: Secret, + initContainerConfigMap: ConfigMap, + originalPodSpec: PodBuilder): PodBuilder = { + val sharedVolumeMounts = Seq[VolumeMount]( + new VolumeMountBuilder() + .withName(DOWNLOAD_JARS_VOLUME_NAME) + .withMountPath(jarsDownloadPath) + .build(), + new VolumeMountBuilder() + .withName(DOWNLOAD_FILES_VOLUME_NAME) + .withMountPath(filesDownloadPath) + .build()) + + val initContainers = Seq(new ContainerBuilder() + .withName("spark-driver-init") + .withImage(initContainerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + .withMountPath(INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH) + .endVolumeMount() + .addNewVolumeMount() + .withName(INIT_CONTAINER_SECRETS_VOLUME_NAME) + .withMountPath(INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH) + .endVolumeMount() + .addToVolumeMounts(sharedVolumeMounts: _*) + .addToArgs(INIT_CONTAINER_PROPERTIES_FILE_PATH) + .build()) + + // Make sure we don't override any user-provided init containers by just appending ours to + // the existing list. + val resolvedInitContainers = originalPodSpec + .editMetadata() + .getAnnotations + .asScala + .get(INIT_CONTAINER_ANNOTATION) + .map { existingInitContainerAnnotation => + val existingInitContainers = OBJECT_MAPPER.readValue( + existingInitContainerAnnotation, classOf[List[Container]]) + existingInitContainers ++ initContainers + }.getOrElse(initContainers) + val resolvedSerializedInitContainers = OBJECT_MAPPER.writeValueAsString(resolvedInitContainers) + originalPodSpec + .editMetadata() + .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) + .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) + .endMetadata() + .editSpec() + .addNewVolume() + .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + .withNewConfigMap() + .withName(initContainerConfigMap.getMetadata.getName) + .addNewItem() + .withKey(INIT_CONTAINER_CONFIG_MAP_KEY) + .withPath(INIT_CONTAINER_PROPERTIES_FILE_NAME) + .endItem() + .endConfigMap() + .endVolume() + .addNewVolume() + .withName(DOWNLOAD_JARS_VOLUME_NAME) + .withEmptyDir(new EmptyDirVolumeSource()) + .endVolume() + .addNewVolume() + .withName(DOWNLOAD_FILES_VOLUME_NAME) + .withEmptyDir(new EmptyDirVolumeSource()) + .endVolume() + .addNewVolume() + .withName(INIT_CONTAINER_SECRETS_VOLUME_NAME) + .withNewSecret() + .withSecretName(initContainerSecret.getMetadata.getName) + .endSecret() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainerName)) + .addToVolumeMounts(sharedVolumeMounts: _*) + .addNewEnv() + .withName(ENV_UPLOADED_JARS_DIR) + .withValue(jarsDownloadPath) + .endEnv() + .endContainer() + .endSpec() + } + + override def buildInitContainerSecret(jarsSecret: String, filesSecret: String): Secret = { + val trustStoreBase64 = stagingServiceSslOptions.trustStore.map { trustStoreFile => + require(trustStoreFile.isFile, "Dependency server trustStore provided at" + + trustStoreFile.getAbsolutePath + " does not exist or is not a file.") + (INIT_CONTAINER_TRUSTSTORE_SECRET_KEY, + BaseEncoding.base64().encode(Files.toByteArray(trustStoreFile))) + }.toMap + val jarsSecretBase64 = BaseEncoding.base64().encode(jarsSecret.getBytes(Charsets.UTF_8)) + val filesSecretBase64 = BaseEncoding.base64().encode(filesSecret.getBytes(Charsets.UTF_8)) + val secretData = Map( + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY -> jarsSecretBase64, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY -> filesSecretBase64) ++ + trustStoreBase64 + val kubernetesSecret = new SecretBuilder() + .withNewMetadata() + .withName(s"$kubernetesAppId-spark-init") + .endMetadata() + .addToData(secretData.asJava) + .build() + kubernetesSecret + } + + override def buildInitContainerConfigMap( + jarsResourceId: String, filesResourceId: String): ConfigMap = { + val initContainerProperties = new Properties() + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_URI.key, stagingServerUri) + initContainerProperties.setProperty(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.key, jarsDownloadPath) + initContainerProperties.setProperty(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.key, filesDownloadPath) + initContainerProperties.setProperty( + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key, jarsResourceId) + initContainerProperties.setProperty( + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key, INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH) + initContainerProperties.setProperty( + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key, filesResourceId) + initContainerProperties.setProperty( + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key, INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH) + initContainerProperties.setProperty(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT.key, + s"${downloadTimeoutMinutes}m") + stagingServiceSslOptions.trustStore.foreach { _ => + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key, + INIT_CONTAINER_TRUSTSTORE_PATH) + } + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_SSL_ENABLED.key, + stagingServiceSslOptions.enabled.toString) + stagingServiceSslOptions.trustStorePassword.foreach { password => + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) + } + stagingServiceSslOptions.trustStoreType.foreach { storeType => + initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) + } + val propertiesWriter = new StringWriter() + initContainerProperties.store(propertiesWriter, "Init-container properties.") + new ConfigMapBuilder() + .withNewMetadata() + .withName(s"$kubernetesAppId-init-properties") + .endMetadata() + .addToData(INIT_CONTAINER_CONFIG_MAP_KEY, propertiesWriter.toString) + .build() + } + + override def resolveSparkJars(): Seq[String] = resolveLocalFiles(sparkJars, jarsDownloadPath) + + override def resolveSparkFiles(): Seq[String] = resolveLocalFiles(sparkFiles, filesDownloadPath) + + private def resolveLocalFiles( + allFileUriStrings: Seq[String], localDownloadRoot: String): Seq[String] = { + val usedLocalFileNames = mutable.HashSet.empty[String] + val resolvedFiles = mutable.Buffer.empty[String] + for (fileUriString <- allFileUriStrings) { + val fileUri = Utils.resolveURI(fileUriString) + val resolvedFile = Option(fileUri.getScheme).getOrElse("file") match { + case "file" => + // Deduplication logic matches that of CompressionUtils#writeTarGzipToStream + val file = new File(fileUri.getPath) + val extension = Files.getFileExtension(file.getName) + val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) + var resolvedFileName = file.getName + var deduplicationCounter = 1 + while (usedLocalFileNames.contains(resolvedFileName)) { + resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" + deduplicationCounter += 1 + } + s"file://$localDownloadRoot/$resolvedFileName" + case _ => fileUriString + } + resolvedFiles += resolvedFile + } + resolvedFiles + } + + private def getTypedResponseResult[T](call: Call[T]): T = { + val response = call.execute() + if (response.code() < 200 || response.code() >= 300) { + throw new SparkException("Unexpected response from dependency server when uploading" + + s" dependencies: ${response.code()}. Error body: " + + Option(response.errorBody()).map(_.string()).getOrElse("N/A")) + } + response.body() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala new file mode 100644 index 0000000000000..8f09112132b2c --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala @@ -0,0 +1,58 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl + +private[spark] trait MountedDependencyManagerProvider { + def getMountedDependencyManager( + kubernetesAppId: String, + stagingServerUri: String, + podLabels: Map[String, String], + podNamespace: String, + sparkJars: Seq[String], + sparkFiles: Seq[String]): MountedDependencyManager +} + +private[spark] class MountedDependencyManagerProviderImpl(sparkConf: SparkConf) + extends MountedDependencyManagerProvider { + override def getMountedDependencyManager( + kubernetesAppId: String, + stagingServerUri: String, + podLabels: Map[String, String], + podNamespace: String, + sparkJars: Seq[String], + sparkFiles: Seq[String]): MountedDependencyManager = { + val resourceStagingServerSslOptions = new SparkSecurityManager(sparkConf) + .getSSLOptions("kubernetes.resourceStagingServer") + new MountedDependencyManagerImpl( + kubernetesAppId, + podLabels, + podNamespace, + stagingServerUri, + sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE), + sparkConf.get(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION), + sparkConf.get(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION), + sparkConf.get(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT), + sparkJars, + sparkFiles, + resourceStagingServerSslOptions, + RetrofitClientFactoryImpl) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala new file mode 100644 index 0000000000000..af3de6ce85026 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient} + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.internal.Logging + +trait SubmissionKubernetesClientProvider { + def get: KubernetesClient +} + +private[spark] class SubmissionKubernetesClientProviderImpl(sparkConf: SparkConf) + extends SubmissionKubernetesClientProvider with Logging { + + private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + private val master = resolveK8sMaster(sparkConf.get("spark.master")) + + override def get: KubernetesClient = { + var k8ConfBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(master) + .withNamespace(namespace) + sparkConf.get(KUBERNETES_SUBMIT_CA_CERT_FILE).foreach { + f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) + } + sparkConf.get(KUBERNETES_SUBMIT_CLIENT_KEY_FILE).foreach { + f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) + } + sparkConf.get(KUBERNETES_SUBMIT_CLIENT_CERT_FILE).foreach { + f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) + } + sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { token => + k8ConfBuilder = k8ConfBuilder.withOauthToken(token) + } + val k8ClientConfig = k8ConfBuilder.build + new DefaultKubernetesClient(k8ClientConfig) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index 048427fa4ec23..ca05fe767146b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -33,8 +33,8 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.v1.CompressionUtils import org.apache.spark.deploy.rest._ import org.apache.spark.internal.config.OptionalConfigEntry import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala new file mode 100644 index 0000000000000..680d305985cc0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.File +import java.util.concurrent.TimeUnit + +import com.google.common.base.Charsets +import com.google.common.io.Files +import com.google.common.util.concurrent.SettableFuture +import okhttp3.ResponseBody +import retrofit2.{Call, Callback, Response} + +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +private trait WaitableCallback[T] extends Callback[T] { + private val complete = SettableFuture.create[Boolean] + + override final def onFailure(call: Call[T], t: Throwable): Unit = complete.setException(t) + + override final def onResponse(call: Call[T], response: Response[T]): Unit = { + require(response.code() >= 200 && response.code() < 300, Option(response.errorBody()) + .map(_.string()) + .getOrElse(s"Error executing HTTP request, but error body was not provided.")) + handleResponse(response.body()) + complete.set(true) + } + + protected def handleResponse(body: T): Unit + + final def waitForCompletion(time: Long, timeUnit: TimeUnit): Unit = { + complete.get(time, timeUnit) + } +} + +private class DownloadTarGzCallback(downloadDir: File) extends WaitableCallback[ResponseBody] { + + override def handleResponse(responseBody: ResponseBody): Unit = { + Utils.tryWithResource(responseBody.byteStream()) { responseStream => + CompressionUtils.unpackTarStreamToDirectory(responseStream, downloadDir) + } + } +} + +private[spark] class KubernetesSparkDependencyDownloadInitContainer( + sparkConf: SparkConf, retrofitClientFactory: RetrofitClientFactory) extends Logging { + + private val resourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) + .getOrElse(throw new SparkException("No dependency server URI was provided.")) + + private val downloadJarsResourceIdentifier = sparkConf + .get(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER) + .getOrElse(throw new SparkException("No resource identifier provided for jars.")) + private val downloadJarsSecretLocation = new File( + sparkConf.get(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION)) + private val downloadFilesResourceIdentifier = sparkConf + .get(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER) + .getOrElse(throw new SparkException("No resource identifier provided for files.")) + private val downloadFilesSecretLocation = new File( + sparkConf.get(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION)) + require(downloadJarsSecretLocation.isFile, "Application jars download secret provided" + + s" at ${downloadJarsSecretLocation.getAbsolutePath} does not exist or is not a file.") + require(downloadFilesSecretLocation.isFile, "Application files download secret provided" + + s" at ${downloadFilesSecretLocation.getAbsolutePath} does not exist or is not a file.") + + private val jarsDownloadDir = new File(sparkConf.get(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION)) + require(jarsDownloadDir.isDirectory, "Application jars download directory provided at" + + s" ${jarsDownloadDir.getAbsolutePath} does not exist or is not a directory.") + + private val filesDownloadDir = new File(sparkConf.get(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION)) + require(filesDownloadDir.isDirectory, "Application files download directory provided at" + + s" ${filesDownloadDir.getAbsolutePath} does not exist or is not a directory.") + private val downloadTimeoutMinutes = sparkConf.get(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT) + + def run(): Unit = { + val securityManager = new SparkSecurityManager(sparkConf) + val sslOptions = securityManager.getSSLOptions("kubernetes.resourceStagingServer") + val service = retrofitClientFactory.createRetrofitClient( + resourceStagingServerUri, classOf[ResourceStagingServiceRetrofit], sslOptions) + val jarsSecret = Files.toString(downloadJarsSecretLocation, Charsets.UTF_8) + val filesSecret = Files.toString(downloadFilesSecretLocation, Charsets.UTF_8) + val downloadJarsCallback = new DownloadTarGzCallback(jarsDownloadDir) + val downloadFilesCallback = new DownloadTarGzCallback(filesDownloadDir) + service.downloadResources(downloadJarsResourceIdentifier, jarsSecret) + .enqueue(downloadJarsCallback) + service.downloadResources(downloadFilesResourceIdentifier, filesSecret) + .enqueue(downloadFilesCallback) + logInfo("Waiting to download jars...") + downloadJarsCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) + logInfo(s"Jars downloaded to ${jarsDownloadDir.getAbsolutePath}") + logInfo("Waiting to download files...") + downloadFilesCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) + logInfo(s"Files downloaded to ${filesDownloadDir.getAbsolutePath}") + } +} + +object KubernetesSparkDependencyDownloadInitContainer extends Logging { + def main(args: Array[String]): Unit = { + logInfo("Starting init-container to download Spark application dependencies.") + val sparkConf = if (args.nonEmpty) { + SparkConfPropertiesParser.getSparkConfFromPropertiesFile(new File(args(0))) + } else { + new SparkConf(true) + } + new KubernetesSparkDependencyDownloadInitContainer(sparkConf, RetrofitClientFactoryImpl).run() + logInfo("Finished downloading application dependencies.") + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala index 8ca13da545d5d..4ecb6369ff3b0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala @@ -16,13 +16,11 @@ */ package org.apache.spark.deploy.rest.kubernetes.v2 -import java.io.{File, FileInputStream} -import java.util.Properties +import java.io.File import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.google.common.collect.Maps import org.eclipse.jetty.http.HttpVersion import org.eclipse.jetty.server.{HttpConfiguration, HttpConnectionFactory, Server, ServerConnector, SslConnectionFactory} import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} @@ -30,12 +28,10 @@ import org.eclipse.jetty.util.thread.{QueuedThreadPool, ScheduledExecutorSchedul import org.glassfish.jersey.media.multipart.MultiPartFeature import org.glassfish.jersey.server.ResourceConfig import org.glassfish.jersey.servlet.ServletContainer -import scala.collection.JavaConverters._ import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.internal.Logging -import org.apache.spark.internal.config.{ConfigReader, SparkConfigProvider} import org.apache.spark.util.Utils private[spark] class ResourceStagingServer( @@ -97,20 +93,10 @@ private[spark] class ResourceStagingServer( object ResourceStagingServer { def main(args: Array[String]): Unit = { - val sparkConf = new SparkConf(true) - if (args.nonEmpty) { - val propertiesFile = new File(args(0)) - if (!propertiesFile.isFile) { - throw new IllegalArgumentException(s"Server properties file given at" + - s" ${propertiesFile.getAbsoluteFile} does not exist or is not a file.") - } - val properties = new Properties - Utils.tryWithResource(new FileInputStream(propertiesFile))(properties.load) - val propertiesMap = Maps.fromProperties(properties) - val configReader = new ConfigReader(new SparkConfigProvider(propertiesMap)) - propertiesMap.asScala.keys.foreach { key => - configReader.get(key).foreach(sparkConf.set(key, _)) - } + val sparkConf = if (args.nonEmpty) { + SparkConfPropertiesParser.getSparkConfFromPropertiesFile(new File(args(0))) + } else { + new SparkConf(true) } val dependenciesRootDir = Utils.createTempDir(namePrefix = "local-application-dependencies") val serviceInstance = new ResourceStagingServiceImpl(dependenciesRootDir) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala index 95cc6ab949d5c..844809dec995c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -80,6 +80,15 @@ private[spark] trait ResourceStagingService { @Produces(Array(MediaType.APPLICATION_OCTET_STREAM)) @Path("/resources/{resourceId}") def downloadResources( - @PathParam("resourceId") resourceId: String, - @HeaderParam("Authorization") resourceSecret: String): StreamingOutput + @PathParam("resourceId") resourceId: String, + @HeaderParam("Authorization") resourceSecret: String): StreamingOutput + + /** + * Health check. + */ + @GET + @Consumes(Array(MediaType.APPLICATION_JSON)) + @Produces(Array(MediaType.TEXT_PLAIN)) + @Path("/ping") + def ping(): String } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala index 732969cd67d89..cf6180fbf53d4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -88,6 +88,8 @@ private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) } } } + + override def ping(): String = "pong" } private case class StagedResources( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala index daf03f764b35a..b1a3cc0676757 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala @@ -37,6 +37,10 @@ private[spark] trait ResourceStagingServiceRetrofit { @Streaming @retrofit2.http.GET("/api/v0/resources/{resourceId}") - def downloadResources(@Path("resourceId") resourceId: String, - @retrofit2.http.Header("Authorization") resourceSecret: String): Call[ResponseBody] + def downloadResources( + @Path("resourceId") resourceId: String, + @retrofit2.http.Header("Authorization") resourceSecret: String): Call[ResponseBody] + + @retrofit2.http.GET("/api/ping") + def ping(): String } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala similarity index 85% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala index 7416c624e97f6..f906423524944 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala @@ -22,21 +22,26 @@ import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule -import okhttp3.OkHttpClient +import okhttp3.{Dispatcher, OkHttpClient} import retrofit2.Retrofit import retrofit2.converter.jackson.JacksonConverterFactory import retrofit2.converter.scalars.ScalarsConverterFactory import org.apache.spark.SSLOptions -import org.apache.spark.util.Utils +import org.apache.spark.util.{ThreadUtils, Utils} -private[spark] object RetrofitUtils { +private[spark] trait RetrofitClientFactory { + def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T +} + +private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory { private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) private val SECURE_RANDOM = new SecureRandom() def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T = { - val okHttpClientBuilder = new OkHttpClient.Builder() + val dispatcher = new Dispatcher(ThreadUtils.newDaemonCachedThreadPool(s"http-client-$baseUrl")) + val okHttpClientBuilder = new OkHttpClient.Builder().dispatcher(dispatcher) sslOptions.trustStore.foreach { trustStoreFile => require(trustStoreFile.isFile, s"TrustStore provided at ${trustStoreFile.getAbsolutePath}" + " does not exist, or is not a file.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala new file mode 100644 index 0000000000000..cf9decab127c5 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{File, FileInputStream} +import java.util.Properties + +import com.google.common.collect.Maps +import scala.collection.JavaConverters.mapAsScalaMapConverter + +import org.apache.spark.SparkConf +import org.apache.spark.internal.config.{ConfigReader, SparkConfigProvider} +import org.apache.spark.util.Utils + +private[spark] object SparkConfPropertiesParser { + + def getSparkConfFromPropertiesFile(propertiesFile: File): SparkConf = { + val sparkConf = new SparkConf(true) + if (!propertiesFile.isFile) { + throw new IllegalArgumentException(s"Server properties file given at" + + s" ${propertiesFile.getAbsoluteFile} does not exist or is not a file.") + } + val properties = new Properties + Utils.tryWithResource(new FileInputStream(propertiesFile))(properties.load) + val propertiesMap = Maps.fromProperties(properties) + val configReader = new ConfigReader(new SparkConfigProvider(propertiesMap)) + propertiesMap.asScala.keys.foreach { key => + configReader.get(key).foreach(sparkConf.set(key, _)) + } + sparkConf + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala new file mode 100644 index 0000000000000..b8c2b0c91bbeb --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.Files +import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, namespace: String) { + private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) + private val SERVICE_ACCOUNT_CA_CERT = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) + private val oauthTokenFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) + private val caCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) + private val clientKeyFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) + private val clientCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) + + /** + * Creates a {@link KubernetesClient}, expecting to be from within the context of a pod. When + * doing so, service account token files can be picked up from canonical locations. + */ + def get: DefaultKubernetesClient = { + val baseClientConfigBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) + .withNamespace(namespace) + + val configBuilder = oauthTokenFile + .orElse(caCertFile) + .orElse(clientKeyFile) + .orElse(clientCertFile) + .map { _ => + var mountedAuthConfigBuilder = baseClientConfigBuilder + oauthTokenFile.foreach { tokenFilePath => + val tokenFile = new File(tokenFilePath) + mountedAuthConfigBuilder = mountedAuthConfigBuilder + .withOauthToken(Files.toString(tokenFile, Charsets.UTF_8)) + } + caCertFile.foreach { caFile => + mountedAuthConfigBuilder = mountedAuthConfigBuilder.withCaCertFile(caFile) + } + clientKeyFile.foreach { keyFile => + mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientKeyFile(keyFile) + } + clientCertFile.foreach { certFile => + mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientCertFile(certFile) + } + mountedAuthConfigBuilder + }.getOrElse { + var serviceAccountConfigBuilder = baseClientConfigBuilder + if (SERVICE_ACCOUNT_CA_CERT.isFile) { + serviceAccountConfigBuilder = serviceAccountConfigBuilder.withCaCertFile( + SERVICE_ACCOUNT_CA_CERT.getAbsolutePath) + } + + if (SERVICE_ACCOUNT_TOKEN.isFile) { + serviceAccountConfigBuilder = serviceAccountConfigBuilder.withOauthToken( + Files.toString(SERVICE_ACCOUNT_TOKEN, Charsets.UTF_8)) + } + serviceAccountConfigBuilder + } + new DefaultKubernetesClient(configBuilder.build) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 130b143c7e92b..15457db7e1459 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -47,11 +47,6 @@ private[spark] class KubernetesClusterSchedulerBackend( private val blockmanagerPort = conf .getInt("spark.blockmanager.port", DEFAULT_BLOCKMANAGER_PORT) - private val kubernetesDriverServiceName = conf - .get(KUBERNETES_DRIVER_SERVICE_NAME) - .getOrElse( - throw new SparkException("Must specify the service name the driver is running with")) - private val kubernetesDriverPodName = conf .get(KUBERNETES_DRIVER_POD_NAME) .getOrElse( @@ -73,8 +68,8 @@ private[spark] class KubernetesClusterSchedulerBackend( private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) - private val kubernetesClient = new KubernetesClientBuilder(conf, kubernetesNamespace) - .buildFromWithinPod() + private val kubernetesClient = new DriverPodKubernetesClientProvider(conf, kubernetesNamespace) + .get private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). @@ -142,11 +137,6 @@ private[spark] class KubernetesClusterSchedulerBackend( } catch { case e: Throwable => logError("Uncaught exception while shutting down controllers.", e) } - try { - kubernetesClient.services().withName(kubernetesDriverServiceName).delete() - } catch { - case e: Throwable => logError("Uncaught exception while shutting down driver service.", e) - } try { kubernetesClient.close() } catch { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala new file mode 100644 index 0000000000000..9e2ab26460412 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} +import io.fabric8.kubernetes.client.KubernetesClient +import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} +import org.hamcrest.{BaseMatcher, Description} +import org.mockito.Matchers.{any, anyVararg, argThat, startsWith, eq => mockitoEq} +import org.mockito.Mockito.when +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import scala.collection.JavaConverters._ +import scala.reflect.ClassTag + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v2.StagedResourceIdentifier +import org.apache.spark.util.Utils + +class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { + + private val MAIN_CLASS = "org.apache.spark.test.Main" + private val APP_ARGS = Array[String]("arg1", "arg2") + private val MAIN_APP_RESOURCE = "local:///app/jars/spark-main.jar" + private val APP_NAME = "spark-test-app" + private val STAGING_SERVER_URI = "http://localhost:9000" + private val SPARK_JARS = Seq( + "local:///app/jars/spark-helper.jar", "file:///var/data/spark-local-helper.jar") + private val RESOLVED_SPARK_JARS = Seq( + "local:///app/jars/spark-helper.jar", + "file:///var/data/spark-downloaded/spark-local-helper.jar") + private val SPARK_FILES = Seq( + "local:///app/files/spark-file.txt", "file:///var/data/spark-local-file.txt") + private val RESOLVED_SPARK_FILES = Seq( + "local:///app/files/spark-file.txt", "file:///var/data/spark-downloaded/spark-local-file.txt") + private val DRIVER_EXTRA_CLASSPATH = "/app/jars/extra-jar1.jar:/app/jars/extra-jars2.jar" + private val DRIVER_DOCKER_IMAGE_VALUE = "spark-driver:latest" + private val DRIVER_MEMORY_OVERHEARD_MB = 128L + private val DRIVER_MEMORY_MB = 512L + private val NAMESPACE = "namespace" + private val DOWNLOAD_JARS_RESOURCE_IDENTIFIER = StagedResourceIdentifier("jarsId", "jarsSecret") + private val DOWNLOAD_FILES_RESOURCE_IDENTIFIER = StagedResourceIdentifier( + "filesId", "filesSecret") + private val MOUNTED_FILES_ANNOTATION_KEY = "mountedFiles" + + private var sparkConf: SparkConf = _ + private var submissionKubernetesClientProvider: SubmissionKubernetesClientProvider = _ + private var submissionKubernetesClient: KubernetesClient = _ + private type PODS = MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] + private type RESOURCES = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ + HasMetadata, Boolean] + private var podOperations: PODS = _ + private var resourceListOperations: RESOURCES = _ + private var mountedDependencyManagerProvider: MountedDependencyManagerProvider = _ + private var mountedDependencyManager: MountedDependencyManager = _ + private var captureCreatedPodAnswer: SelfArgumentCapturingAnswer[Pod] = _ + private var captureCreatedResourcesAnswer: AllArgumentsCapturingAnswer[HasMetadata, RESOURCES] = _ + + before { + sparkConf = new SparkConf(true) + .set("spark.app.name", APP_NAME) + .set("spark.master", "k8s://https://localhost:443") + .set(DRIVER_DOCKER_IMAGE, DRIVER_DOCKER_IMAGE_VALUE) + .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEARD_MB) + .set(KUBERNETES_NAMESPACE, NAMESPACE) + .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB) + submissionKubernetesClientProvider = mock[SubmissionKubernetesClientProvider] + submissionKubernetesClient = mock[KubernetesClient] + podOperations = mock[PODS] + resourceListOperations = mock[RESOURCES] + mountedDependencyManagerProvider = mock[MountedDependencyManagerProvider] + mountedDependencyManager = mock[MountedDependencyManager] + when(submissionKubernetesClientProvider.get).thenReturn(submissionKubernetesClient) + when(submissionKubernetesClient.pods()).thenReturn(podOperations) + captureCreatedPodAnswer = new SelfArgumentCapturingAnswer[Pod] + captureCreatedResourcesAnswer = new AllArgumentsCapturingAnswer[HasMetadata, RESOURCES]( + resourceListOperations) + when(podOperations.create(any())).thenAnswer(captureCreatedPodAnswer) + when(submissionKubernetesClient.resourceList(anyVararg[HasMetadata])) + .thenAnswer(captureCreatedResourcesAnswer) + } + + // Tests w/o local dependencies, or behave independently to that configuration. + test("Simple properties and environment set on the driver pod.") { + sparkConf.set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) + val createdDriverPod = createAndGetDriverPod() + val maybeDriverContainer = getDriverContainer(createdDriverPod) + maybeDriverContainer.foreach { driverContainer => + assert(driverContainer.getName === DRIVER_CONTAINER_NAME) + assert(driverContainer.getImage === DRIVER_DOCKER_IMAGE_VALUE) + assert(driverContainer.getImagePullPolicy === "IfNotPresent") + val envs = driverContainer.getEnv.asScala.map { env => + (env.getName, env.getValue) + }.toMap + assert(envs(ENV_DRIVER_MEMORY) === s"${DRIVER_MEMORY_MB + DRIVER_MEMORY_OVERHEARD_MB}m") + assert(envs(ENV_DRIVER_MAIN_CLASS) === MAIN_CLASS) + assert(envs(ENV_DRIVER_ARGS) === APP_ARGS.mkString(" ")) + assert(envs(ENV_SUBMIT_EXTRA_CLASSPATH) === DRIVER_EXTRA_CLASSPATH) + } + } + + test("Created pod should apply custom annotations and labels") { + sparkConf.set(KUBERNETES_DRIVER_LABELS, + "label1=label1value,label2=label2value") + sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, + "annotation1=annotation1value,annotation2=annotation2value") + val createdDriverPod = createAndGetDriverPod() + val labels = createdDriverPod.getMetadata.getLabels.asScala + assert(labels.size === 4) + // App ID is non-deterministic, but just check if it's set and is prefixed with the app name + val appIdLabel = labels(SPARK_APP_ID_LABEL) + assert(appIdLabel != null && appIdLabel.startsWith(APP_NAME) && appIdLabel != APP_NAME) + val appNameLabel = labels(SPARK_APP_NAME_LABEL) + assert(appNameLabel != null && appNameLabel == APP_NAME) + assert(labels("label1") === "label1value") + assert(labels("label2") === "label2value") + val annotations = createdDriverPod.getMetadata.getAnnotations.asScala + val expectedAnnotations = Map( + "annotation1" -> "annotation1value", "annotation2" -> "annotation2value") + assert(annotations === expectedAnnotations) + } + + test("Driver JVM Options should be set in the environment.") { + sparkConf.set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, "-Dopt1=opt1value") + sparkConf.set("spark.logConf", "true") + val createdDriverPod = createAndGetDriverPod() + val maybeDriverContainer = getDriverContainer(createdDriverPod) + maybeDriverContainer.foreach { driverContainer => + val maybeJvmOptionsEnv = driverContainer.getEnv + .asScala + .find(_.getName == ENV_DRIVER_JAVA_OPTS) + assert(maybeJvmOptionsEnv.isDefined) + maybeJvmOptionsEnv.foreach { jvmOptionsEnv => + val jvmOptions = jvmOptionsEnv.getValue.split(" ") + jvmOptions.foreach { opt => assert(opt.startsWith("-D")) } + val optionKeyValues = jvmOptions.map { option => + val withoutDashDPrefix = option.stripPrefix("-D") + val split = withoutDashDPrefix.split('=') + assert(split.length == 2) + (split(0), split(1)) + }.toMap + assert(optionKeyValues("opt1") === "opt1value") + assert(optionKeyValues.contains("spark.app.id")) + assert(optionKeyValues("spark.jars") === MAIN_APP_RESOURCE) + assert(optionKeyValues(KUBERNETES_DRIVER_POD_NAME.key).startsWith(APP_NAME)) + assert(optionKeyValues("spark.app.name") === APP_NAME) + assert(optionKeyValues("spark.logConf") === "true") + } + } + } + + // Tests with local dependencies with the mounted dependency manager. + test("Uploading local dependencies should create Kubernetes secrets and config map") { + val initContainerConfigMap = getInitContainerConfigMap() + val initContainerSecret = getInitContainerSecret() + runWithMountedDependencies(initContainerConfigMap, initContainerSecret) + val driverPod = captureCreatedPodAnswer.capturedArgument + assert(captureCreatedResourcesAnswer.capturedArguments != null) + assert(captureCreatedResourcesAnswer.capturedArguments.size === 2) + assert(captureCreatedResourcesAnswer.capturedArguments.toSet === + Set(initContainerSecret, initContainerConfigMap)) + captureCreatedResourcesAnswer.capturedArguments.foreach { resource => + val driverPodOwnerReferences = resource.getMetadata.getOwnerReferences + assert(driverPodOwnerReferences.size === 1) + val driverPodOwnerReference = driverPodOwnerReferences.asScala.head + assert(driverPodOwnerReference.getName === driverPod.getMetadata.getName) + assert(driverPodOwnerReference.getApiVersion === driverPod.getApiVersion) + assert(driverPodOwnerReference.getUid === driverPod.getMetadata.getUid) + assert(driverPodOwnerReference.getKind === driverPod.getKind) + assert(driverPodOwnerReference.getController) + } + } + + test("Uploading local resources should set classpath environment variables") { + val initContainerConfigMap = getInitContainerConfigMap() + val initContainerSecret = getInitContainerSecret() + runWithMountedDependencies(initContainerConfigMap, initContainerSecret) + val driverPod = captureCreatedPodAnswer.capturedArgument + val maybeDriverContainer = getDriverContainer(driverPod) + maybeDriverContainer.foreach { driverContainer => + val envs = driverContainer.getEnv + .asScala + .map { env => (env.getName, env.getValue) } + .toMap + val classPathEntries = envs(ENV_MOUNTED_CLASSPATH).split(File.pathSeparator).toSet + val expectedClassPathEntries = RESOLVED_SPARK_JARS + .map(Utils.resolveURI) + .map(_.getPath) + .toSet + assert(classPathEntries === expectedClassPathEntries) + } + } + + private def getInitContainerSecret(): Secret = { + new SecretBuilder() + .withNewMetadata().withName(s"$APP_NAME-init-container-secret").endMetadata() + .addToData( + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY, DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceSecret) + .addToData(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY, + DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceSecret) + .build() + } + + private def getInitContainerConfigMap(): ConfigMap = { + new ConfigMapBuilder() + .withNewMetadata().withName(s"$APP_NAME-init-container-conf").endMetadata() + .addToData("key", "configuration") + .build() + } + + private def runWithMountedDependencies( + initContainerConfigMap: ConfigMap, initContainerSecret: Secret): Unit = { + sparkConf.set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) + .setJars(SPARK_JARS) + .set("spark.files", SPARK_FILES.mkString(",")) + val labelsMatcher = new BaseMatcher[Map[String, String]] { + override def matches(maybeLabels: scala.Any) = { + maybeLabels match { + case labels: Map[String, String] => + labels(SPARK_APP_ID_LABEL).startsWith(APP_NAME) && + labels(SPARK_APP_NAME_LABEL) == APP_NAME + case _ => false + } + } + + override def describeTo(description: Description) = { + description.appendText("Checks if the labels contain the app ID and app name.") + } + } + when(mountedDependencyManagerProvider.getMountedDependencyManager( + startsWith(APP_NAME), + mockitoEq(STAGING_SERVER_URI), + argThat(labelsMatcher), + mockitoEq(NAMESPACE), + mockitoEq(SPARK_JARS ++ Seq(MAIN_APP_RESOURCE)), + mockitoEq(SPARK_FILES))).thenReturn(mountedDependencyManager) + when(mountedDependencyManager.uploadJars()).thenReturn(DOWNLOAD_JARS_RESOURCE_IDENTIFIER) + when(mountedDependencyManager.uploadFiles()).thenReturn(DOWNLOAD_FILES_RESOURCE_IDENTIFIER) + when(mountedDependencyManager.buildInitContainerSecret( + DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceSecret, + DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceSecret)) + .thenReturn(initContainerSecret) + when(mountedDependencyManager.buildInitContainerConfigMap( + DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceId, DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceId)) + .thenReturn(initContainerConfigMap) + when(mountedDependencyManager.resolveSparkJars()).thenReturn(RESOLVED_SPARK_JARS) + when(mountedDependencyManager.resolveSparkFiles()).thenReturn(RESOLVED_SPARK_FILES) + when(mountedDependencyManager.configurePodToMountLocalDependencies( + mockitoEq(DRIVER_CONTAINER_NAME), + mockitoEq(initContainerSecret), + mockitoEq(initContainerConfigMap), + any())).thenAnswer(new Answer[PodBuilder] { + override def answer(invocationOnMock: InvocationOnMock): PodBuilder = { + val basePod = invocationOnMock.getArgumentAt(3, classOf[PodBuilder]) + basePod.editMetadata().addToAnnotations(MOUNTED_FILES_ANNOTATION_KEY, "true").endMetadata() + } + }) + val clientUnderTest = createClient() + clientUnderTest.run() + } + + private def getDriverContainer(driverPod: Pod): Option[Container] = { + val maybeDriverContainer = driverPod.getSpec + .getContainers + .asScala + .find(_.getName == DRIVER_CONTAINER_NAME) + assert(maybeDriverContainer.isDefined) + maybeDriverContainer + } + + private def createAndGetDriverPod(): Pod = { + val clientUnderTest = createClient() + clientUnderTest.run() + val createdDriverPod = captureCreatedPodAnswer.capturedArgument + assert(createdDriverPod != null) + createdDriverPod + } + + private def createClient(): Client = { + new Client( + MAIN_CLASS, + sparkConf, + APP_ARGS, + MAIN_APP_RESOURCE, + submissionKubernetesClientProvider, + mountedDependencyManagerProvider) + } + + private class SelfArgumentCapturingAnswer[T: ClassTag] extends Answer[T] { + var capturedArgument: T = _ + + override def answer(invocationOnMock: InvocationOnMock): T = { + val argumentClass = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] + val argument = invocationOnMock.getArgumentAt(0, argumentClass) + this.capturedArgument = argument + argument + } + } + + private class AllArgumentsCapturingAnswer[I, T](returnValue: T) extends Answer[T] { + var capturedArguments: Seq[I] = _ + + override def answer(invocationOnMock: InvocationOnMock): T = { + capturedArguments = invocationOnMock.getArguments.map(_.asInstanceOf[I]).toSeq + returnValue + } + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala new file mode 100644 index 0000000000000..321fe1b3fd889 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala @@ -0,0 +1,323 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.{ByteArrayOutputStream, File, StringReader} +import java.util.{Properties, UUID} + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Container, Pod, PodBuilder, SecretBuilder} +import okhttp3.RequestBody +import okio.Okio +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.mockito.Matchers.any +import org.mockito.Mockito +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import retrofit2.{Call, Response} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourceIdentifier} +import org.apache.spark.util.Utils + +private[spark] class MountedDependencyManagerSuite extends SparkFunSuite with BeforeAndAfter { + import MountedDependencyManagerSuite.createTempFile + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val APP_ID = "app-id" + private val LABELS = Map("label1" -> "label1value", "label2" -> "label2value") + private val NAMESPACE = "namespace" + private val STAGING_SERVER_URI = "http://localhost:8000" + private val INIT_CONTAINER_IMAGE = "spark-driver-init:latest" + private val JARS_DOWNLOAD_PATH = DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.defaultValue.get + private val FILES_DOWNLOAD_PATH = DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.defaultValue.get + private val DOWNLOAD_TIMEOUT_MINUTES = 5 + private val LOCAL_JARS = Seq(createTempFile("jar"), createTempFile("jar")) + private val JARS = Seq("hdfs://localhost:9000/jars/jar1.jar", + s"file://${LOCAL_JARS.head}", + LOCAL_JARS(1)) + private val LOCAL_FILES = Seq(createTempFile("txt")) + private val FILES = Seq("hdfs://localhost:9000/files/file1.txt", + LOCAL_FILES.head) + private val TRUSTSTORE_FILE = new File(createTempFile(".jks")) + private val TRUSTSTORE_PASSWORD = "trustStorePassword" + private val TRUSTSTORE_TYPE = "jks" + private val STAGING_SERVER_SSL_OPTIONS = SSLOptions( + enabled = true, + trustStore = Some(TRUSTSTORE_FILE), + trustStorePassword = Some(TRUSTSTORE_PASSWORD), + trustStoreType = Some(TRUSTSTORE_TYPE)) + private val JARS_RESOURCE_ID = "jarsId" + private val JARS_SECRET = "jarsSecret" + private val FILES_RESOURCE_ID = "filesId" + private val FILES_SECRET = "filesSecret" + private var retrofitClientFactory: RetrofitClientFactory = _ + private var retrofitClient: ResourceStagingServiceRetrofit = _ + + private var dependencyManagerUnderTest: MountedDependencyManager = _ + + before { + retrofitClientFactory = mock[RetrofitClientFactory] + retrofitClient = mock[ResourceStagingServiceRetrofit] + Mockito.when( + retrofitClientFactory.createRetrofitClient( + STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) + .thenReturn(retrofitClient) + dependencyManagerUnderTest = new MountedDependencyManagerImpl( + APP_ID, + LABELS, + NAMESPACE, + STAGING_SERVER_URI, + INIT_CONTAINER_IMAGE, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOWNLOAD_TIMEOUT_MINUTES, + JARS, + FILES, + STAGING_SERVER_SSL_OPTIONS, + retrofitClientFactory) + } + + test("Uploading jars should contact the staging server with the appropriate parameters") { + val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( + StagedResourceIdentifier("resourceId", "resourceSecret")) + Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) + .thenAnswer(capturingArgumentsAnswer) + dependencyManagerUnderTest.uploadJars() + testUploadSendsCorrectFiles(LOCAL_JARS, capturingArgumentsAnswer) + } + + test("Uploading files should contact the staging server with the appropriate parameters") { + val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( + StagedResourceIdentifier("resourceId", "resourceSecret")) + Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) + .thenAnswer(capturingArgumentsAnswer) + dependencyManagerUnderTest.uploadFiles() + testUploadSendsCorrectFiles(LOCAL_FILES, capturingArgumentsAnswer) + } + + test("Init container secret should contain jars, files, and trustStore") { + val jarsSecretBase64 = BaseEncoding.base64().encode(JARS_SECRET.getBytes(Charsets.UTF_8)) + val filesSecretBase64 = BaseEncoding.base64().encode(FILES_SECRET.getBytes(Charsets.UTF_8)) + val trustStoreBase64 = BaseEncoding.base64().encode(Files.toByteArray(TRUSTSTORE_FILE)) + val secret = dependencyManagerUnderTest.buildInitContainerSecret("jarsSecret", "filesSecret") + assert(secret.getMetadata.getName === s"$APP_ID-spark-init") + val expectedSecrets = Map( + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY -> jarsSecretBase64, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY -> filesSecretBase64, + INIT_CONTAINER_TRUSTSTORE_SECRET_KEY -> trustStoreBase64) + assert(secret.getData.asScala === expectedSecrets) + } + + test("Init container config map should contain parameters for downloading from staging server") { + val configMap = dependencyManagerUnderTest.buildInitContainerConfigMap( + JARS_RESOURCE_ID, FILES_RESOURCE_ID) + assert(configMap.getMetadata.getName === s"$APP_ID-init-properties") + val propertiesRawString = configMap.getData.get(INIT_CONTAINER_CONFIG_MAP_KEY) + assert(propertiesRawString != null) + val propertiesReader = new StringReader(propertiesRawString) + val properties = new Properties() + properties.load(propertiesReader) + val propertiesMap = properties.stringPropertyNames().asScala.map { prop => + (prop, properties.getProperty(prop)) + }.toMap + val expectedProperties = Map[String, String]( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.key -> JARS_DOWNLOAD_PATH, + DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.key -> FILES_DOWNLOAD_PATH, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH, + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH, + DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT.key -> s"${DOWNLOAD_TIMEOUT_MINUTES}m", + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> INIT_CONTAINER_TRUSTSTORE_PATH, + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", + RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE) + assert(propertiesMap === expectedProperties) + } + + test("Resolving jars should map local paths to their mounted counterparts") { + val resolvedJars = dependencyManagerUnderTest.resolveSparkJars() + val expectedResolvedJars = Seq( + "hdfs://localhost:9000/jars/jar1.jar", + s"file://$JARS_DOWNLOAD_PATH/${new File(JARS(1)).getName}", + s"file://$JARS_DOWNLOAD_PATH/${new File(JARS(2)).getName}") + assert(resolvedJars === expectedResolvedJars) + } + + test("Resolving files should map local paths to their mounted counterparts") { + val resolvedFiles = dependencyManagerUnderTest.resolveSparkFiles() + val expectedResolvedFiles = Seq( + "hdfs://localhost:9000/files/file1.txt", + s"file://$FILES_DOWNLOAD_PATH/${new File(FILES(1)).getName}") + assert(resolvedFiles === expectedResolvedFiles) + } + + test("Downloading init container should be added to pod") { + val driverPod = configureDriverPod() + val podAnnotations = driverPod.getMetadata.getAnnotations + assert(podAnnotations.size === 1) + val initContainerRawAnnotation = podAnnotations.get(INIT_CONTAINER_ANNOTATION) + val initContainers = OBJECT_MAPPER.readValue( + initContainerRawAnnotation, classOf[Array[Container]]) + assert(initContainers.size === 1) + val initContainer = initContainers.head + assert(initContainer.getName === "spark-driver-init") + assert(initContainer.getImage === INIT_CONTAINER_IMAGE) + assert(initContainer.getImagePullPolicy === "IfNotPresent") + val volumeMounts = initContainer.getVolumeMounts + .asScala + .map(mount => (mount.getName, mount.getMountPath)) + .toMap + val expectedVolumeMounts = Map[String, String]( + DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, + DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH, + INIT_CONTAINER_PROPERTIES_FILE_VOLUME -> INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH, + INIT_CONTAINER_SECRETS_VOLUME_NAME -> INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH) + assert(volumeMounts === expectedVolumeMounts) + } + + test("Driver pod should have added volumes and volume mounts for file downloads") { + val driverPod = configureDriverPod() + val volumes = driverPod.getSpec.getVolumes.asScala.map(volume => (volume.getName, volume)).toMap + val initContainerPropertiesVolume = volumes(INIT_CONTAINER_PROPERTIES_FILE_VOLUME).getConfigMap + assert(initContainerPropertiesVolume != null) + assert(initContainerPropertiesVolume.getName === "config") + assert(initContainerPropertiesVolume.getItems.asScala.exists { keyToPath => + keyToPath.getKey == INIT_CONTAINER_CONFIG_MAP_KEY && + keyToPath.getPath == INIT_CONTAINER_PROPERTIES_FILE_NAME + }) + val jarsVolume = volumes(DOWNLOAD_JARS_VOLUME_NAME) + assert(jarsVolume.getEmptyDir != null) + val filesVolume = volumes(DOWNLOAD_FILES_VOLUME_NAME) + assert(filesVolume.getEmptyDir != null) + val initContainerSecretVolume = volumes(INIT_CONTAINER_SECRETS_VOLUME_NAME) + assert(initContainerSecretVolume.getSecret != null) + assert(initContainerSecretVolume.getSecret.getSecretName === "secret") + val driverContainer = driverPod.getSpec + .getContainers + .asScala + .find(_.getName == "driver-container").get + val driverContainerVolumeMounts = driverContainer.getVolumeMounts + .asScala + .map(mount => (mount.getName, mount.getMountPath)) + .toMap + val expectedVolumeMountNamesAndPaths = Map[String, String]( + DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, + DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) + assert(driverContainerVolumeMounts === expectedVolumeMountNamesAndPaths) + val envs = driverContainer.getEnv + assert(envs.size() === 1) + assert(envs.asScala.head.getName === ENV_UPLOADED_JARS_DIR) + assert(envs.asScala.head.getValue === JARS_DOWNLOAD_PATH) + } + + private def configureDriverPod(): Pod = { + val initContainerSecret = new SecretBuilder() + .withNewMetadata().withName("secret").endMetadata() + .addToData("datakey", "datavalue") + .build() + val initContainerConfigMap = new ConfigMapBuilder() + .withNewMetadata().withName("config").endMetadata() + .addToData("datakey", "datavalue") + .build() + val basePod = new PodBuilder() + .withNewMetadata() + .withName("driver-pod") + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName("driver-container") + .withImage("spark-driver:latest") + .endContainer() + .endSpec() + val adjustedPod = dependencyManagerUnderTest.configurePodToMountLocalDependencies( + "driver-container", + initContainerSecret, + initContainerConfigMap, + basePod).build() + adjustedPod + } + + private def testUploadSendsCorrectFiles( + expectedFiles: Seq[String], + capturingArgumentsAnswer: UploadDependenciesArgumentsCapturingAnswer) = { + val requestLabelsBytes = requestBodyBytes(capturingArgumentsAnswer.podLabelsArg) + val requestLabelsString = new String(requestLabelsBytes, Charsets.UTF_8) + val requestLabelsMap = OBJECT_MAPPER.readValue( + requestLabelsString, classOf[Map[String, String]]) + assert(requestLabelsMap === LABELS) + val requestNamespaceBytes = requestBodyBytes(capturingArgumentsAnswer.podNamespaceArg) + val requestNamespaceString = new String(requestNamespaceBytes, Charsets.UTF_8) + assert(requestNamespaceString === NAMESPACE) + val localJarsTarStream = new ByteArrayOutputStream() + CompressionUtils.writeTarGzipToStream(localJarsTarStream, expectedFiles) + val requestResourceBytes = requestBodyBytes(capturingArgumentsAnswer.podResourcesArg) + assert(requestResourceBytes.sameElements(localJarsTarStream.toByteArray)) + } + + private def requestBodyBytes(requestBody: RequestBody): Array[Byte] = { + Utils.tryWithResource(new ByteArrayOutputStream()) { outputStream => + Utils.tryWithResource(Okio.sink(outputStream)) { sink => + Utils.tryWithResource(Okio.buffer(sink)) { bufferedSink => + requestBody.writeTo(bufferedSink) + } + } + outputStream.toByteArray + } + } +} + +private class UploadDependenciesArgumentsCapturingAnswer(returnValue: StagedResourceIdentifier) + extends Answer[Call[StagedResourceIdentifier]] { + + var podLabelsArg: RequestBody = _ + var podNamespaceArg: RequestBody = _ + var podResourcesArg: RequestBody = _ + var kubernetesCredentialsArg: RequestBody = _ + + override def answer(invocationOnMock: InvocationOnMock): Call[StagedResourceIdentifier] = { + podLabelsArg = invocationOnMock.getArgumentAt(0, classOf[RequestBody]) + podNamespaceArg = invocationOnMock.getArgumentAt(1, classOf[RequestBody]) + podResourcesArg = invocationOnMock.getArgumentAt(2, classOf[RequestBody]) + kubernetesCredentialsArg = invocationOnMock.getArgumentAt(3, classOf[RequestBody]) + val responseCall = mock[Call[StagedResourceIdentifier]] + Mockito.when(responseCall.execute()).thenReturn(Response.success(returnValue)) + responseCall + } +} + +private object MountedDependencyManagerSuite { + def createTempFile(extension: String): String = { + val dir = Utils.createTempDir() + val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") + Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) + file.getAbsolutePath + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala new file mode 100644 index 0000000000000..77eb7f2b9f49c --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala @@ -0,0 +1,165 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes.v2 + +import java.io.{ByteArrayOutputStream, File} +import java.util.UUID +import javax.ws.rs.core + +import com.google.common.base.Charsets +import com.google.common.io.Files +import okhttp3.{MediaType, ResponseBody} +import org.mockito.Matchers.any +import org.mockito.Mockito.{doAnswer, when} +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import retrofit2.{Call, Callback, Response} + +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.util.Utils + +class KubernetesSparkDependencyDownloadInitContainerSuite + extends SparkFunSuite with BeforeAndAfter { + import KubernetesSparkDependencyDownloadInitContainerSuite.createTempFile + private val STAGING_SERVER_URI = "http://localhost:8000" + private val TRUSTSTORE_FILE = new File(createTempFile(".jks")) + private val TRUSTSTORE_PASSWORD = "trustStorePassword" + private val TRUSTSTORE_TYPE = "jks" + private val STAGING_SERVER_SSL_OPTIONS = SSLOptions( + enabled = true, + trustStore = Some(TRUSTSTORE_FILE), + trustStorePassword = Some(TRUSTSTORE_PASSWORD), + trustStoreType = Some(TRUSTSTORE_TYPE)) + private val JARS = Seq(createTempFile("jar"), createTempFile("jar")) + private val FILES = Seq(createTempFile("txt"), createTempFile("csv")) + private val DOWNLOAD_JARS_SECRET_LOCATION = createTempFile("txt") + private val DOWNLOAD_FILES_SECRET_LOCATION = createTempFile("txt") + private val JARS_RESOURCE_ID = "jarsId" + private val FILES_RESOURCE_ID = "filesId" + + private var sparkConf: SparkConf = _ + private var downloadJarsDir: File = _ + private var downloadFilesDir: File = _ + private var downloadJarsSecretValue: String = _ + private var downloadFilesSecretValue: String = _ + private var jarsCompressedBytes: Array[Byte] = _ + private var filesCompressedBytes: Array[Byte] = _ + private var retrofitClientFactory: RetrofitClientFactory = _ + private var retrofitClient: ResourceStagingServiceRetrofit = _ + private var initContainerUnderTest: KubernetesSparkDependencyDownloadInitContainer = _ + + override def beforeAll(): Unit = { + jarsCompressedBytes = compressPathsToBytes(JARS) + filesCompressedBytes = compressPathsToBytes(FILES) + downloadJarsSecretValue = Files.toString( + new File(DOWNLOAD_JARS_SECRET_LOCATION), Charsets.UTF_8) + downloadFilesSecretValue = Files.toString( + new File(DOWNLOAD_FILES_SECRET_LOCATION), Charsets.UTF_8) + } + + before { + downloadJarsDir = Utils.createTempDir() + downloadFilesDir = Utils.createTempDir() + retrofitClientFactory = mock[RetrofitClientFactory] + retrofitClient = mock[ResourceStagingServiceRetrofit] + sparkConf = new SparkConf(true) + .set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) + .set(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER, JARS_RESOURCE_ID) + .set(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION, DOWNLOAD_JARS_SECRET_LOCATION) + .set(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER, FILES_RESOURCE_ID) + .set(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION, DOWNLOAD_FILES_SECRET_LOCATION) + .set(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION, downloadJarsDir.getAbsolutePath) + .set(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION, downloadFilesDir.getAbsolutePath) + .set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, TRUSTSTORE_FILE.getAbsolutePath) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD, TRUSTSTORE_PASSWORD) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE, TRUSTSTORE_TYPE) + + when(retrofitClientFactory.createRetrofitClient( + STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) + .thenReturn(retrofitClient) + initContainerUnderTest = new KubernetesSparkDependencyDownloadInitContainer( + sparkConf, retrofitClientFactory) + } + + after { + downloadJarsDir.delete() + downloadFilesDir.delete() + } + + test("Downloads should unpack response body streams to directories") { + val downloadJarsCall = mock[Call[ResponseBody]] + val downloadFilesCall = mock[Call[ResponseBody]] + when(retrofitClient.downloadResources(JARS_RESOURCE_ID, downloadJarsSecretValue)) + .thenReturn(downloadJarsCall) + when(retrofitClient.downloadResources(FILES_RESOURCE_ID, downloadFilesSecretValue)) + .thenReturn(downloadFilesCall) + val jarsResponseBody = ResponseBody.create( + MediaType.parse(core.MediaType.APPLICATION_OCTET_STREAM), jarsCompressedBytes) + val filesResponseBody = ResponseBody.create( + MediaType.parse(core.MediaType.APPLICATION_OCTET_STREAM), filesCompressedBytes) + doAnswer(new InvokeCallbackAnswer(downloadJarsCall, jarsResponseBody)) + .when(downloadJarsCall) + .enqueue(any()) + doAnswer(new InvokeCallbackAnswer(downloadFilesCall, filesResponseBody)) + .when(downloadFilesCall) + .enqueue(any()) + initContainerUnderTest.run() + checkWrittenFilesAreTheSameAsOriginal(JARS, downloadJarsDir) + checkWrittenFilesAreTheSameAsOriginal(FILES, downloadFilesDir) + } + + private def checkWrittenFilesAreTheSameAsOriginal( + originalFiles: Iterable[String], downloadDir: File): Unit = { + originalFiles.map(new File(_)).foreach { file => + val writtenFile = new File(downloadDir, file.getName) + assert(writtenFile.exists) + val originalJarContents = Seq(Files.toByteArray(file): _*) + val writtenJarContents = Seq(Files.toByteArray(writtenFile): _*) + assert(writtenJarContents === originalJarContents) + } + } + + private def compressPathsToBytes(paths: Iterable[String]): Array[Byte] = { + Utils.tryWithResource(new ByteArrayOutputStream()) { compressedBytes => + CompressionUtils.writeTarGzipToStream (compressedBytes, paths) + compressedBytes.toByteArray + } + } +} + +private object KubernetesSparkDependencyDownloadInitContainerSuite { + def createTempFile(extension: String): String = { + val dir = Utils.createTempDir() + val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") + Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) + file.getAbsolutePath + } +} + +private class InvokeCallbackAnswer(call: Call[ResponseBody], responseBody: ResponseBody) + extends Answer[Unit] { + override def answer(invocationOnMock: InvocationOnMock): Unit = { + val callback = invocationOnMock.getArgumentAt(0, classOf[Callback[ResponseBody]]) + val response = Response.success(responseBody) + callback.onResponse(call, response) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index 51c5e43af1124..08be8af30b3bc 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -77,7 +77,7 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { private def runUploadAndDownload(sslOptions: SSLOptions): Unit = { val scheme = if (sslOptions.enabled) "https" else "http" - val retrofitService = RetrofitUtils.createRetrofitClient( + val retrofitService = RetrofitClientFactoryImpl.createRetrofitClient( s"$scheme://127.0.0.1:$serverPort/", classOf[ResourceStagingServiceRetrofit], sslOptions) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index e9f88e37a5f89..a10fe8fb58408 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -72,27 +72,14 @@ maven-assembly-plugin - driver-docker-dist + docker-dist pre-integration-test single - src/main/assembly/driver-assembly.xml - - posix - - - - executor-docker-dist - pre-integration-test - - single - - - - src/main/assembly/executor-assembly.xml + src/main/assembly/docker-assembly.xml posix diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml similarity index 95% rename from resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml index b5fcaa75f049c..2b48d366256fe 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/driver-assembly.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml @@ -15,7 +15,7 @@ ~ limitations under the License. --> - driver-docker-dist + docker-dist tar.gz dir @@ -51,9 +51,9 @@ - src/main/docker/driver + src/main/docker/ - + dockerfiles **/* diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml deleted file mode 100644 index d97ba56562a12..0000000000000 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/executor-assembly.xml +++ /dev/null @@ -1,84 +0,0 @@ - - - executor-docker-dist - - tar.gz - dir - - false - - - - ${project.parent.basedir}/core/src/main/resources/org/apache/spark/ui/static/ - - ui-resources/org/apache/spark/ui/static - - **/* - - - - - ${project.parent.basedir}/sbin/ - - sbin - - **/* - - - - - ${project.parent.basedir}/bin/ - - bin - - **/* - - - - - ${project.parent.basedir}/conf/ - - conf - - **/* - - - - - src/main/docker/executor - - - - **/* - - - - - - jars - true - false - runtime - false - - org.apache.spark:spark-assembly_${scala.binary.version}:pom - org.spark-project.spark:unused - - - - diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile new file mode 100644 index 0000000000000..59029a6c08b4a --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . + +RUN apk upgrade --update +RUN apk add --update bash +RUN mkdir -p /opt/spark +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark + +ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.v2.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile new file mode 100644 index 0000000000000..40f9459dc06dc --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile @@ -0,0 +1,43 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . + +RUN apk upgrade --update +RUN apk add --update bash +RUN mkdir -p /opt/spark +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD examples /opt/spark/examples +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark + +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile new file mode 100644 index 0000000000000..15e1ce75815df --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -0,0 +1,38 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . + +RUN apk upgrade --update +RUN apk add --update bash +RUN mkdir -p /opt/spark +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark + +ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 5418afa25ca85..ac7a549c9b483 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -33,7 +33,11 @@ org.apache.spark spark-kubernetes_${scala.binary.version} ${project.version} - test + + + org.apache.spark + spark-core_${scala.binary.version} + ${project.version} org.apache.spark @@ -66,7 +70,7 @@ spark-docker-minimal-bundle_${scala.binary.version} ${project.version} tar.gz - driver-docker-dist + docker-dist test @@ -147,7 +151,7 @@ - copy-test-spark-jobs-to-docker-driver + copy-test-spark-jobs-to-docker-dist pre-integration-test copy @@ -159,65 +163,20 @@ spark-kubernetes-integration-tests-spark-jobs_${scala.binary.version} ${project.version} jar - ${project.build.directory}/docker/driver/examples/integration-tests-jars + ${project.build.directory}/docker/examples/integration-tests-jars org.apache.spark spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} ${project.version} jar - ${project.build.directory}/docker/driver/examples/integration-tests-jars - - - - - - copy-test-spark-jobs-to-docker-executor - pre-integration-test - - copy - - - - - org.apache.spark - spark-kubernetes-integration-tests-spark-jobs_${scala.binary.version} - ${project.version} - jar - ${project.build.directory}/docker/executor/examples/integration-tests-jars - - - org.apache.spark - spark-kubernetes-integration-tests-spark-jobs-helpers_${scala.binary.version} - ${project.version} - jar - ${project.build.directory}/docker/executor/examples/integration-tests-jars - - - - - - unpack-docker-driver-bundle - pre-integration-test - - unpack - - - - - org.apache.spark - spark-docker-minimal-bundle_${scala.binary.version} - ${project.version} - driver-docker-dist - tar.gz - true - ${project.build.directory}/docker/driver + ${project.build.directory}/docker/examples/integration-tests-jars - unpack-docker-executor-bundle + unpack-docker-bundle pre-integration-test unpack @@ -228,10 +187,10 @@ org.apache.spark spark-docker-minimal-bundle_${scala.binary.version} ${project.version} - executor-docker-dist + docker-dist tar.gz true - ${project.build.directory}/docker/executor + ${project.build.directory}/docker/ diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 750e7668b9912..abbf7e4d5ce1b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -16,119 +16,23 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest -import java.io.File import java.nio.file.Paths -import java.util.UUID -import java.util.concurrent.TimeUnit import com.google.common.base.Charsets -import com.google.common.collect.ImmutableList import com.google.common.io.Files -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model.Pod -import io.fabric8.kubernetes.client.{Config, KubernetesClient, KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import org.scalatest.BeforeAndAfter -import org.scalatest.concurrent.{Eventually, PatienceConfiguration} +import org.scalatest.Suite +import org.scalatest.concurrent.PatienceConfiguration import org.scalatest.time.{Minutes, Seconds, Span} -import scala.collection.JavaConverters._ -import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} -import org.apache.spark.deploy.SparkSubmit -import org.apache.spark.deploy.kubernetes.SSLUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube -import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} -import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} -import org.apache.spark.util.Utils -private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { - - private val EXAMPLES_JAR_FILE = Paths.get("target", "integration-tests-spark-jobs") - .toFile - .listFiles()(0) - - private val HELPER_JAR_FILE = Paths.get("target", "integration-tests-spark-jobs-helpers") - .toFile - .listFiles()(0) - private val SUBMITTER_LOCAL_MAIN_APP_RESOURCE = s"file://${EXAMPLES_JAR_FILE.getAbsolutePath}" - private val CONTAINER_LOCAL_MAIN_APP_RESOURCE = s"local:///opt/spark/examples/" + - s"integration-tests-jars/${EXAMPLES_JAR_FILE.getName}" - private val CONTAINER_LOCAL_HELPER_JAR_PATH = s"local:///opt/spark/examples/" + - s"integration-tests-jars/${HELPER_JAR_FILE.getName}" - - private val TEST_EXISTENCE_FILE = Paths.get("test-data", "input.txt").toFile - private val TEST_EXISTENCE_FILE_CONTENTS = Files.toString(TEST_EXISTENCE_FILE, Charsets.UTF_8) - private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) - private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) - private val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + - ".integrationtest.jobs.SparkPiWithInfiniteWait" - private val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + - ".integrationtest.jobs.FileExistenceTest" - private val NAMESPACE = UUID.randomUUID().toString.replaceAll("-", "") - private var minikubeKubernetesClient: KubernetesClient = _ - private var clientConfig: Config = _ - private var sparkConf: SparkConf = _ +private[spark] class KubernetesSuite extends SparkFunSuite { override def beforeAll(): Unit = { Minikube.startMinikube() new SparkDockerImageBuilder(Minikube.getDockerEnv).buildSparkDockerImages() - Minikube.getKubernetesClient.namespaces.createNew() - .withNewMetadata() - .withName(NAMESPACE) - .endMetadata() - .done() - minikubeKubernetesClient = Minikube.getKubernetesClient.inNamespace(NAMESPACE) - clientConfig = minikubeKubernetesClient.getConfiguration - } - - before { - Eventually.eventually(TIMEOUT, INTERVAL) { - val podsList = minikubeKubernetesClient.pods().list() - assert(podsList == null - || podsList.getItems == null - || podsList.getItems.isEmpty - ) - val servicesList = minikubeKubernetesClient.services().list() - assert(servicesList == null - || servicesList.getItems == null - || servicesList.getItems.isEmpty) - } - sparkConf = new SparkConf(true) - .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") - .set(KUBERNETES_SUBMIT_CA_CERT_FILE, clientConfig.getCaCertFile) - .set(KUBERNETES_SUBMIT_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) - .set(KUBERNETES_SUBMIT_CLIENT_CERT_FILE, clientConfig.getClientCertFile) - .set(KUBERNETES_NAMESPACE, NAMESPACE) - .set(DRIVER_DOCKER_IMAGE, "spark-driver:latest") - .set(EXECUTOR_DOCKER_IMAGE, "spark-executor:latest") - .setJars(Seq(HELPER_JAR_FILE.getAbsolutePath)) - .set("spark.executor.memory", "500m") - .set("spark.executor.cores", "1") - .set("spark.executors.instances", "1") - .set("spark.app.name", "spark-pi") - .set("spark.ui.enabled", "true") - .set("spark.testing", "false") - .set(WAIT_FOR_APP_COMPLETION, false) - } - - after { - val pods = minikubeKubernetesClient.pods().list().getItems.asScala - pods.par.foreach(pod => { - minikubeKubernetesClient - .pods() - .withName(pod.getMetadata.getName) - .withGracePeriod(60) - .delete - }) - // spark-submit sets system properties so we have to clear them - new SparkConf(true) - .getAll.map(_._1) - .filter(_ != "spark.docker.test.persistMinikube") - .foreach { System.clearProperty } } override def afterAll(): Unit = { @@ -137,247 +41,33 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { } } - private def getSparkMetricsService(sparkBaseAppName: String): SparkRestApiV1 = { - val serviceName = minikubeKubernetesClient.services() - .withLabel("spark-app-name", sparkBaseAppName) - .list() - .getItems - .get(0) - .getMetadata - .getName - Minikube.getService[SparkRestApiV1](serviceName, NAMESPACE, "spark-ui-port") - } - - private def expectationsForStaticAllocation(sparkMetricsService: SparkRestApiV1): Unit = { - val apps = Eventually.eventually(TIMEOUT, INTERVAL) { - val result = sparkMetricsService - .getApplications(ImmutableList.of(ApplicationStatus.RUNNING, ApplicationStatus.COMPLETED)) - assert(result.size == 1 - && !result.head.id.equalsIgnoreCase("appid") - && !result.head.id.equalsIgnoreCase("{appId}")) - result - } - Eventually.eventually(TIMEOUT, INTERVAL) { - val result = sparkMetricsService.getExecutors(apps.head.id) - assert(result.size == 2) - assert(result.count(exec => exec.id != "driver") == 1) - result - } - Eventually.eventually(TIMEOUT, INTERVAL) { - val result = sparkMetricsService.getStages( - apps.head.id, Seq(StageStatus.COMPLETE).asJava) - assert(result.size == 1) - result - } - } - - test("Run a simple example") { - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) + override def nestedSuites: scala.collection.immutable.IndexedSeq[Suite] = { + Vector( + new KubernetesV1Suite, + new KubernetesV2Suite) } +} - test("Run using spark-submit") { - val args = Array( - "--master", s"k8s://https://${Minikube.getMinikubeIp}:8443", - "--deploy-mode", "cluster", - "--kubernetes-namespace", NAMESPACE, - "--name", "spark-pi", - "--executor-memory", "512m", - "--executor-cores", "1", - "--num-executors", "1", - "--jars", HELPER_JAR_FILE.getAbsolutePath, - "--class", SPARK_PI_MAIN_CLASS, - "--conf", "spark.ui.enabled=true", - "--conf", "spark.testing=false", - "--conf", s"${KUBERNETES_SUBMIT_CA_CERT_FILE.key}=${clientConfig.getCaCertFile}", - "--conf", s"${KUBERNETES_SUBMIT_CLIENT_KEY_FILE.key}=${clientConfig.getClientKeyFile}", - "--conf", s"${KUBERNETES_SUBMIT_CLIENT_CERT_FILE.key}=${clientConfig.getClientCertFile}", - "--conf", s"${EXECUTOR_DOCKER_IMAGE.key}=spark-executor:latest", - "--conf", s"${DRIVER_DOCKER_IMAGE.key}=spark-driver:latest", - "--conf", s"${WAIT_FOR_APP_COMPLETION.key}=false", - EXAMPLES_JAR_FILE.getAbsolutePath) - SparkSubmit.main(args) - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - - test("Run with the examples jar on the docker image") { - sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = CONTAINER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - - test("Run with custom labels and annotations") { - sparkConf.set(KUBERNETES_DRIVER_LABELS, "label1=label1value,label2=label2value") - sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, "annotation1=annotation1value," + - "annotation2=annotation2value") - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val driverPodMetadata = minikubeKubernetesClient - .pods - .withLabel("spark-app-name", "spark-pi") - .list() - .getItems - .get(0) - .getMetadata - val driverPodLabels = driverPodMetadata.getLabels - // We can't match all of the selectors directly since one of the selectors is based on the - // launch time. - assert(driverPodLabels.size === 5, "Unexpected number of pod labels.") - assert(driverPodLabels.get("spark-app-name") === "spark-pi", "Unexpected value for" + - " spark-app-name label.") - assert(driverPodLabels.get("spark-app-id").startsWith("spark-pi"), "Unexpected value for" + - " spark-app-id label (should be prefixed with the app name).") - assert(driverPodLabels.get("label1") === "label1value", "Unexpected value for label1") - assert(driverPodLabels.get("label2") === "label2value", "Unexpected value for label2") - val driverPodAnnotations = driverPodMetadata.getAnnotations - assert(driverPodAnnotations.size === 2, "Unexpected number of pod annotations.") - assert(driverPodAnnotations.get("annotation1") === "annotation1value", - "Unexpected value for annotation1") - assert(driverPodAnnotations.get("annotation2") === "annotation2value", - "Unexpected value for annotation2") - } - - test("Enable SSL on the driver submit server") { - val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( - Minikube.getMinikubeIp, - "changeit", - "changeit", - "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, - s"file://${trustStoreFile.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") - sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - } - - test("Enable SSL on the driver submit server using PEM files") { - val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) - sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${certPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - } - - test("Added files should exist on the driver.") { - sparkConf.set("spark.files", TEST_EXISTENCE_FILE.getAbsolutePath) - sparkConf.setAppName("spark-file-existence-test") - val podCompletedFuture = SettableFuture.create[Boolean] - val watch = new Watcher[Pod] { - override def eventReceived(action: Action, pod: Pod): Unit = { - val containerStatuses = pod.getStatus.getContainerStatuses.asScala - val allSuccessful = containerStatuses.nonEmpty && containerStatuses - .forall(status => { - status.getState.getTerminated != null && status.getState.getTerminated.getExitCode == 0 - }) - if (allSuccessful) { - podCompletedFuture.set(true) - } else { - val failedContainers = containerStatuses.filter(container => { - container.getState.getTerminated != null && - container.getState.getTerminated.getExitCode != 0 - }) - if (failedContainers.nonEmpty) { - podCompletedFuture.setException(new SparkException( - "One or more containers in the driver failed with a nonzero exit code.")) - } - } - } - - override def onClose(e: KubernetesClientException): Unit = { - logWarning("Watch closed", e) - } - } - Utils.tryWithResource(minikubeKubernetesClient - .pods - .withLabel("spark-app-name", "spark-file-existence-test") - .watch(watch)) { _ => - new Client( - sparkConf = sparkConf, - mainClass = FILE_EXISTENCE_MAIN_CLASS, - mainAppResource = CONTAINER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array(TEST_EXISTENCE_FILE.getName, TEST_EXISTENCE_FILE_CONTENTS)).run() - assert(podCompletedFuture.get(60, TimeUnit.SECONDS), "Failed to run driver pod") - val driverPod = minikubeKubernetesClient - .pods - .withLabel("spark-app-name", "spark-file-existence-test") - .list() - .getItems - .get(0) - val podLog = minikubeKubernetesClient - .pods - .withName(driverPod.getMetadata.getName) - .getLog - assert(podLog.contains(s"File found at /opt/spark/${TEST_EXISTENCE_FILE.getName}" + - s" with correct contents."), "Job did not find the file as expected.") - } - } +private[spark] object KubernetesSuite { + val EXAMPLES_JAR_FILE = Paths.get("target", "integration-tests-spark-jobs") + .toFile + .listFiles()(0) - test("Use external URI provider") { - val externalUriProviderWatch = new ExternalUriProviderWatch(minikubeKubernetesClient) - Utils.tryWithResource(minikubeKubernetesClient.services() - .withLabel("spark-app-name", "spark-pi") - .watch(externalUriProviderWatch)) { _ => - sparkConf.set(DRIVER_SERVICE_MANAGER_TYPE, ExternalSuppliedUrisDriverServiceManager.TYPE) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - assert(externalUriProviderWatch.annotationSet.get) - val driverService = minikubeKubernetesClient - .services() - .withLabel("spark-app-name", "spark-pi") - .list() - .getItems - .asScala(0) - assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_PROVIDE_EXTERNAL_URI), - "External URI request annotation was not set on the driver service.") - // Unfortunately we can't check the correctness of the actual value of the URI, as it depends - // on the driver submission port set on the driver service but we remove that port from the - // service once the submission is complete. - assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_RESOLVED_EXTERNAL_URI), - "Resolved URI annotation not set on driver service.") - } - } + val HELPER_JAR_FILE = Paths.get("target", "integration-tests-spark-jobs-helpers") + .toFile + .listFiles()(0) + val SUBMITTER_LOCAL_MAIN_APP_RESOURCE = s"file://${EXAMPLES_JAR_FILE.getAbsolutePath}" + val CONTAINER_LOCAL_MAIN_APP_RESOURCE = s"local:///opt/spark/examples/" + + s"integration-tests-jars/${EXAMPLES_JAR_FILE.getName}" + val CONTAINER_LOCAL_HELPER_JAR_PATH = s"local:///opt/spark/examples/" + + s"integration-tests-jars/${HELPER_JAR_FILE.getName}" - test("Mount the Kubernetes credentials onto the driver pod") { - sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, clientConfig.getCaCertFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, clientConfig.getClientCertFile) - new Client( - sparkConf = sparkConf, - mainClass = SPARK_PI_MAIN_CLASS, - mainAppResource = SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } + val TEST_EXISTENCE_FILE = Paths.get("test-data", "input.txt").toFile + val TEST_EXISTENCE_FILE_CONTENTS = Files.toString(TEST_EXISTENCE_FILE, Charsets.UTF_8) + val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) + val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) + val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.SparkPiWithInfiniteWait" + val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.FileExistenceTest" } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala new file mode 100644 index 0000000000000..53e02f9e479c1 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.util.UUID + +import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube + +private[spark] class KubernetesTestComponents { + + val namespace = UUID.randomUUID().toString.replaceAll("-", "") + val kubernetesClient = Minikube.getKubernetesClient.inNamespace(namespace) + val clientConfig = kubernetesClient.getConfiguration + + def createNamespace(): Unit = { + Minikube.getKubernetesClient.namespaces.createNew() + .withNewMetadata() + .withName(namespace) + .endMetadata() + .done() + } + + def deleteNamespace(): Unit = { + Minikube.getKubernetesClient.namespaces.withName(namespace).delete() + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val namespaceList = Minikube.getKubernetesClient + .namespaces() + .list() + .getItems() + .asScala + require(!namespaceList.exists(_.getMetadata.getName == namespace)) + } + } + + def newSparkConf(): SparkConf = { + new SparkConf(true) + .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") + .set(KUBERNETES_SUBMIT_CA_CERT_FILE, clientConfig.getCaCertFile) + .set(KUBERNETES_SUBMIT_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) + .set(KUBERNETES_SUBMIT_CLIENT_CERT_FILE, clientConfig.getClientCertFile) + .set(KUBERNETES_NAMESPACE, namespace) + .set(DRIVER_DOCKER_IMAGE, "spark-driver:latest") + .set(EXECUTOR_DOCKER_IMAGE, "spark-executor:latest") + .setJars(Seq(KubernetesSuite.HELPER_JAR_FILE.getAbsolutePath)) + .set("spark.executor.memory", "500m") + .set("spark.executor.cores", "1") + .set("spark.executors.instances", "1") + .set("spark.app.name", "spark-pi") + .set("spark.ui.enabled", "true") + .set("spark.testing", "false") + .set(WAIT_FOR_APP_COMPLETION, false) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala new file mode 100644 index 0000000000000..a4e3353032b71 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala @@ -0,0 +1,306 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.util.concurrent.TimeUnit + +import com.google.common.collect.ImmutableList +import com.google.common.util.concurrent.SettableFuture +import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import org.scalatest.{BeforeAndAfter, DoNotDiscover} +import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 +import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} +import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} +import org.apache.spark.util.Utils + +@DoNotDiscover +private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter { + + private var kubernetesTestComponents: KubernetesTestComponents = _ + private var sparkConf: SparkConf = _ + + override def beforeAll(): Unit = { + kubernetesTestComponents = new KubernetesTestComponents() + kubernetesTestComponents.createNamespace() + } + + override def afterAll(): Unit = { + kubernetesTestComponents.deleteNamespace() + } + + before { + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val podsList = kubernetesTestComponents.kubernetesClient.pods().list() + assert(podsList == null + || podsList.getItems == null + || podsList.getItems.isEmpty + ) + val servicesList = kubernetesTestComponents.kubernetesClient.services().list() + assert(servicesList == null + || servicesList.getItems == null + || servicesList.getItems.isEmpty) + } + sparkConf = kubernetesTestComponents.newSparkConf() + } + + after { + val pods = kubernetesTestComponents.kubernetesClient.pods().list().getItems.asScala + pods.par.foreach(pod => { + kubernetesTestComponents.kubernetesClient.pods() + .withName(pod.getMetadata.getName) + .withGracePeriod(60) + .delete + }) + } + + private def getSparkMetricsService(sparkBaseAppName: String): SparkRestApiV1 = { + val serviceName = kubernetesTestComponents.kubernetesClient.services() + .withLabel("spark-app-name", sparkBaseAppName) + .list() + .getItems + .get(0) + .getMetadata + .getName + Minikube.getService[SparkRestApiV1](serviceName, + kubernetesTestComponents.namespace, "spark-ui-port") + } + + private def expectationsForStaticAllocation(sparkMetricsService: SparkRestApiV1): Unit = { + val apps = Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val result = sparkMetricsService + .getApplications(ImmutableList.of(ApplicationStatus.RUNNING, ApplicationStatus.COMPLETED)) + assert(result.size == 1 + && !result.head.id.equalsIgnoreCase("appid") + && !result.head.id.equalsIgnoreCase("{appId}")) + result + } + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val result = sparkMetricsService.getExecutors(apps.head.id) + assert(result.size == 2) + assert(result.count(exec => exec.id != "driver") == 1) + result + } + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val result = sparkMetricsService.getStages( + apps.head.id, Seq(StageStatus.COMPLETE).asJava) + assert(result.size == 1) + result + } + } + + test("Run a simple example") { + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val sparkMetricsService = getSparkMetricsService("spark-pi") + expectationsForStaticAllocation(sparkMetricsService) + } + + test("Run with the examples jar on the docker image") { + sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val sparkMetricsService = getSparkMetricsService("spark-pi") + expectationsForStaticAllocation(sparkMetricsService) + } + + test("Run with custom labels and annotations") { + sparkConf.set(KUBERNETES_DRIVER_LABELS, "label1=label1value,label2=label2value") + sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, "annotation1=annotation1value," + + "annotation2=annotation2value") + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val driverPodMetadata = kubernetesTestComponents.kubernetesClient + .pods + .withLabel("spark-app-name", "spark-pi") + .list() + .getItems + .get(0) + .getMetadata + val driverPodLabels = driverPodMetadata.getLabels + // We can't match all of the selectors directly since one of the selectors is based on the + // launch time. + assert(driverPodLabels.size === 5, "Unexpected number of pod labels.") + assert(driverPodLabels.get("spark-app-name") === "spark-pi", "Unexpected value for" + + " spark-app-name label.") + assert(driverPodLabels.get("spark-app-id").startsWith("spark-pi"), "Unexpected value for" + + " spark-app-id label (should be prefixed with the app name).") + assert(driverPodLabels.get("label1") === "label1value", "Unexpected value for label1") + assert(driverPodLabels.get("label2") === "label2value", "Unexpected value for label2") + val driverPodAnnotations = driverPodMetadata.getAnnotations + assert(driverPodAnnotations.size === 2, "Unexpected number of pod annotations.") + assert(driverPodAnnotations.get("annotation1") === "annotation1value", + "Unexpected value for annotation1") + assert(driverPodAnnotations.get("annotation2") === "annotation2value", + "Unexpected value for annotation2") + } + + test("Enable SSL on the driver submit server") { + val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( + Minikube.getMinikubeIp, + "changeit", + "changeit", + "changeit") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, + s"file://${trustStoreFile.getAbsolutePath}") + sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") + sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + } + + test("Enable SSL on the driver submit server using PEM files") { + val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${certPem.getAbsolutePath}") + sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + } + + test("Added files should exist on the driver.") { + sparkConf.set("spark.files", KubernetesSuite.TEST_EXISTENCE_FILE.getAbsolutePath) + sparkConf.setAppName("spark-file-existence-test") + val podCompletedFuture = SettableFuture.create[Boolean] + val watch = new Watcher[Pod] { + override def eventReceived(action: Action, pod: Pod): Unit = { + val containerStatuses = pod.getStatus.getContainerStatuses.asScala + val allSuccessful = containerStatuses.nonEmpty && containerStatuses + .forall(status => { + status.getState.getTerminated != null && status.getState.getTerminated.getExitCode == 0 + }) + if (allSuccessful) { + podCompletedFuture.set(true) + } else { + val failedContainers = containerStatuses.filter(container => { + container.getState.getTerminated != null && + container.getState.getTerminated.getExitCode != 0 + }) + if (failedContainers.nonEmpty) { + podCompletedFuture.setException(new SparkException( + "One or more containers in the driver failed with a nonzero exit code.")) + } + } + } + + override def onClose(e: KubernetesClientException): Unit = { + logWarning("Watch closed", e) + } + } + Utils.tryWithResource(kubernetesTestComponents.kubernetesClient + .pods + .withLabel("spark-app-name", "spark-file-existence-test") + .watch(watch)) { _ => + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.FILE_EXISTENCE_MAIN_CLASS, + mainAppResource = KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array(KubernetesSuite.TEST_EXISTENCE_FILE.getName, + KubernetesSuite.TEST_EXISTENCE_FILE_CONTENTS)).run() + assert(podCompletedFuture.get(60, TimeUnit.SECONDS), "Failed to run driver pod") + val driverPod = kubernetesTestComponents.kubernetesClient + .pods + .withLabel("spark-app-name", "spark-file-existence-test") + .list() + .getItems + .get(0) + val podLog = kubernetesTestComponents.kubernetesClient + .pods + .withName(driverPod.getMetadata.getName) + .getLog + assert(podLog.contains(s"File found at" + + s" /opt/spark/${KubernetesSuite.TEST_EXISTENCE_FILE.getName} with correct contents."), + "Job did not find the file as expected.") + } + } + + test("Use external URI provider") { + val externalUriProviderWatch = + new ExternalUriProviderWatch(kubernetesTestComponents.kubernetesClient) + Utils.tryWithResource(kubernetesTestComponents.kubernetesClient.services() + .withLabel("spark-app-name", "spark-pi") + .watch(externalUriProviderWatch)) { _ => + sparkConf.set(DRIVER_SERVICE_MANAGER_TYPE, ExternalSuppliedUrisDriverServiceManager.TYPE) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val sparkMetricsService = getSparkMetricsService("spark-pi") + expectationsForStaticAllocation(sparkMetricsService) + assert(externalUriProviderWatch.annotationSet.get) + val driverService = kubernetesTestComponents.kubernetesClient + .services() + .withLabel("spark-app-name", "spark-pi") + .list() + .getItems + .asScala(0) + assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_PROVIDE_EXTERNAL_URI), + "External URI request annotation was not set on the driver service.") + // Unfortunately we can't check the correctness of the actual value of the URI, as it depends + // on the driver submission port set on the driver service but we remove that port from the + // service once the submission is complete. + assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_RESOLVED_EXTERNAL_URI), + "Resolved URI annotation not set on driver service.") + } + } + + test("Mount the Kubernetes credentials onto the driver pod") { + sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, + kubernetesTestComponents.clientConfig.getCaCertFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, + kubernetesTestComponents.clientConfig.getClientKeyFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, + kubernetesTestComponents.clientConfig.getClientCertFile) + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val sparkMetricsService = getSparkMetricsService("spark-pi") + expectationsForStaticAllocation(sparkMetricsService) + } + +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala new file mode 100644 index 0000000000000..0d74067334028 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.util.UUID + +import org.scalatest.{BeforeAndAfter, DoNotDiscover} +import org.scalatest.concurrent.Eventually + +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.submit.v2.{MountedDependencyManagerProviderImpl, SubmissionKubernetesClientProviderImpl} + +@DoNotDiscover +private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter { + + private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") + private var kubernetesTestComponents: KubernetesTestComponents = _ + private var sparkConf: SparkConf = _ + private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ + + override def beforeAll(): Unit = { + kubernetesTestComponents = new KubernetesTestComponents + resourceStagingServerLauncher = new ResourceStagingServerLauncher( + kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) + } + + before { + sparkConf = kubernetesTestComponents.newSparkConf() + .set(INIT_CONTAINER_DOCKER_IMAGE, s"spark-driver-init:latest") + .set(DRIVER_DOCKER_IMAGE, s"spark-driver-v2:latest") + .set(KUBERNETES_DRIVER_LABELS, s"spark-app-locator=$APP_LOCATOR_LABEL") + kubernetesTestComponents.createNamespace() + } + + after { + kubernetesTestComponents.deleteNamespace() + } + + test("Use submission v2.") { + launchStagingServer(SSLOptions()) + runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Enable SSL on the submission server") { + val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + ipAddress = Minikube.getMinikubeIp, + keyStorePassword = "keyStore", + keyPassword = "key", + trustStorePassword = "trustStore") + sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", trustStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStore") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "key") + .set("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword", "trustStore") + launchStagingServer(SSLOptions( + enabled = true, + keyStore = Some(keyStore), + trustStore = Some(trustStore), + keyStorePassword = Some("keyStore"), + keyPassword = Some("key"), + trustStorePassword = Some("trustStore"))) + runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Use container-local resources without the resource staging server") { + sparkConf.setJars(Seq( + KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, + KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) + runSparkAppAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) + } + + private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { + val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( + resourceStagingServerSslOptions) + val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { + "https" + } else { + "http" + } + sparkConf.set(RESOURCE_STAGING_SERVER_URI, + s"$resourceStagingServerUriScheme://${Minikube.getMinikubeIp}:$resourceStagingServerPort") + } + + private def runSparkAppAndVerifyCompletion(appResource: String): Unit = { + val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + appArgs = Array.empty[String], + mainAppResource = appResource, + kubernetesClientProvider = + new SubmissionKubernetesClientProviderImpl(sparkConf), + mountedDependencyManagerProvider = + new MountedDependencyManagerProviderImpl(sparkConf)) + client.run() + val driverPod = kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", APP_LOCATOR_LABEL) + .list() + .getItems + .get(0) + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains("Pi is roughly 3"), "The application did not compute the value of pi.") + } + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala new file mode 100644 index 0000000000000..ca549fa27d630 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala @@ -0,0 +1,196 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.io.StringWriter +import java.util.Properties +import java.util.concurrent.TimeUnit + +import com.google.common.io.{BaseEncoding, Files} +import com.google.common.util.concurrent.SettableFuture +import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Endpoints, HasMetadata, HTTPGetActionBuilder, KeyToPathBuilder, Pod, PodBuilder, SecretBuilder, ServiceBuilder} +import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import io.fabric8.kubernetes.client.internal.readiness.Readiness +import scala.collection.JavaConverters._ + +import org.apache.spark.SSLOptions +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.v2.ContainerNameEqualityPredicate +import org.apache.spark.util.Utils + +/** + * Launches a pod that runs the resource staging server, exposing it over a NodePort. + */ +private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesClient) { + + private val KEYSTORE_DIR = "/mnt/secrets/spark-staging" + private val KEYSTORE_FILE = s"$KEYSTORE_DIR/keyStore" + private val PROPERTIES_FILE_NAME = "staging-server.properties" + private val PROPERTIES_DIR = "/var/data/spark-staging-server" + private val PROPERTIES_FILE_PATH = s"$PROPERTIES_DIR/$PROPERTIES_FILE_NAME" + private var activeResources = Seq.empty[HasMetadata] + + // Returns the NodePort the staging server is listening on + def launchStagingServer(sslOptions: SSLOptions): Int = { + val stagingServerProperties = new Properties() + val stagingServerSecret = sslOptions.keyStore.map { keyStore => + val keyStoreBytes = Files.toByteArray(keyStore) + val keyStoreBase64 = BaseEncoding.base64().encode(keyStoreBytes) + new SecretBuilder() + .withNewMetadata() + .withName("resource-staging-server-keystore") + .endMetadata() + .addToData("keyStore", keyStoreBase64) + .build() + } + stagingServerProperties.setProperty( + RESOURCE_STAGING_SERVER_SSL_ENABLED.key, sslOptions.enabled.toString) + sslOptions.keyStorePassword.foreach { password => + stagingServerProperties.setProperty( + "spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", password) + } + sslOptions.keyPassword.foreach { password => + stagingServerProperties.setProperty( + "spark.ssl.kubernetes.resourceStagingServer.keyPassword", password) + } + stagingServerSecret.foreach { _ => + stagingServerProperties.setProperty( + "spark.ssl.kubernetes.resourceStagingServer.keyStore", KEYSTORE_FILE) + } + val propertiesWriter = new StringWriter() + stagingServerProperties.store(propertiesWriter, "Resource staging server properties.") + val stagingServerConfigMap = new ConfigMapBuilder() + .withNewMetadata() + .withName(s"staging-server-properties") + .endMetadata() + .addToData("staging-server", propertiesWriter.toString) + .build() + val probePingHttpGet = new HTTPGetActionBuilder() + .withScheme(if (sslOptions.enabled) "HTTPS" else "HTTP") + .withPath("/api/v0/ping") + .withNewPort(RESOURCE_STAGING_SERVER_PORT.defaultValue.get) + .build() + val basePod = new PodBuilder() + .withNewMetadata() + .withName("resource-staging-server") + .addToLabels("resource-staging-server", "staging-server") + .endMetadata() + .withNewSpec() + .addNewVolume() + .withName("staging-server-properties") + .withNewConfigMap() + .withName(stagingServerConfigMap.getMetadata.getName) + .withItems( + new KeyToPathBuilder() + .withKey("staging-server") + .withPath(PROPERTIES_FILE_NAME) + .build()) + .endConfigMap() + .endVolume() + .addNewContainer() + .withName("staging-server-container") + .withImage("spark-resource-staging-server:latest") + .withImagePullPolicy("IfNotPresent") + .withNewReadinessProbe() + .withHttpGet(probePingHttpGet) + .endReadinessProbe() + .addNewVolumeMount() + .withName("staging-server-properties") + .withMountPath(PROPERTIES_DIR) + .endVolumeMount() + .addToArgs(PROPERTIES_FILE_PATH) + .endContainer() + .endSpec() + val withMountedKeyStorePod = stagingServerSecret.map { secret => + basePod.editSpec() + .addNewVolume() + .withName("keystore-volume") + .withNewSecret() + .withSecretName(secret.getMetadata.getName) + .endSecret() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate("staging-server-container")) + .addNewVolumeMount() + .withName("keystore-volume") + .withMountPath(KEYSTORE_DIR) + .endVolumeMount() + .endContainer() + .endSpec() + }.getOrElse(basePod).build() + val stagingServerService = new ServiceBuilder() + .withNewMetadata() + .withName("resource-staging-server") + .endMetadata() + .withNewSpec() + .withType("NodePort") + .addToSelector("resource-staging-server", "staging-server") + .addNewPort() + .withName("staging-server-port") + .withPort(RESOURCE_STAGING_SERVER_PORT.defaultValue.get) + .withNewTargetPort(RESOURCE_STAGING_SERVER_PORT.defaultValue.get) + .endPort() + .endSpec() + .build() + val stagingServerPodReadyWatcher = new ReadinessWatcher[Pod] + val serviceReadyWatcher = new ReadinessWatcher[Endpoints] + val allResources = Seq( + stagingServerService, + stagingServerConfigMap, + withMountedKeyStorePod) ++ + stagingServerSecret.toSeq + Utils.tryWithResource(kubernetesClient.pods() + .withName(withMountedKeyStorePod.getMetadata.getName) + .watch(stagingServerPodReadyWatcher)) { _ => + Utils.tryWithResource(kubernetesClient.endpoints() + .withName(stagingServerService.getMetadata.getName) + .watch(serviceReadyWatcher)) { _ => + activeResources = kubernetesClient.resourceList(allResources: _*) + .createOrReplace() + .asScala + stagingServerPodReadyWatcher.waitUntilReady() + serviceReadyWatcher.waitUntilReady() + } + } + kubernetesClient.services().withName(stagingServerService.getMetadata.getName).get() + .getSpec + .getPorts + .get(0) + .getNodePort + } + + def tearDownStagingServer(): Unit = { + kubernetesClient.resourceList(activeResources: _*).delete() + activeResources = Seq.empty[HasMetadata] + } + + private class ReadinessWatcher[T <: HasMetadata] extends Watcher[T] { + + private val signal = SettableFuture.create[Boolean] + + override def eventReceived(action: Action, resource: T): Unit = { + if ((action == Action.MODIFIED || action == Action.ADDED) && + Readiness.isReady(resource)) { + signal.set(true) + } + } + + override def onClose(cause: KubernetesClientException): Unit = {} + + def waitUntilReady(): Boolean = signal.get(30, TimeUnit.SECONDS) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 1aa6a7b7e70c2..d807c4d81009b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -19,14 +19,20 @@ package org.apache.spark.deploy.kubernetes.integrationtest.docker import java.net.URI import java.nio.file.Paths -import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider -import com.spotify.docker.client.{DefaultDockerClient, DockerCertificates} +import com.spotify.docker.client.{DefaultDockerClient, DockerCertificates, LoggingBuildHandler} import org.apache.http.client.utils.URIBuilder import org.scalatest.concurrent.{Eventually, PatienceConfiguration} import org.scalatest.time.{Minutes, Seconds, Span} private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, String]) { + private val DOCKER_BUILD_PATH = Paths.get("target", "docker") + // Dockerfile paths must be relative to the build path. + private val DRIVER_V1_DOCKER_FILE = "dockerfiles/driver/Dockerfile" + private val DRIVER_V2_DOCKER_FILE = "dockerfiles/driver-v2/Dockerfile" + private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" + private val DRIVER_INIT_DOCKER_FILE = "dockerfiles/driver-init/Dockerfile" + private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) private val dockerHost = dockerEnv.getOrElse("DOCKER_HOST", @@ -52,7 +58,18 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, def buildSparkDockerImages(): Unit = { Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } - dockerClient.build(Paths.get("target", "docker", "driver"), "spark-driver") - dockerClient.build(Paths.get("target", "docker", "executor"), "spark-executor") + buildImage("spark-driver", DRIVER_V1_DOCKER_FILE) + buildImage("spark-executor", EXECUTOR_DOCKER_FILE) + buildImage("spark-driver-v2", DRIVER_V2_DOCKER_FILE) + buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) + buildImage("spark-driver-init", DRIVER_INIT_DOCKER_FILE) + } + + private def buildImage(name: String, dockerFile: String): Unit = { + dockerClient.build( + DOCKER_BUILD_PATH, + name, + dockerFile, + new LoggingBuildHandler()) } } From 2c753dedaf89f1b9396ec4725b73ff1172df4568 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 1 May 2017 18:54:46 -0700 Subject: [PATCH 095/225] Enable testing against GCE clusters (#243) * Part 1: making test code cluster-agnostic * Final checked * Move all test code into KubernetesTestComponents * Addressed comments * Fixed doc * Restructure the test backends (#248) * Restructured the test backends * Address comments * var -> val * Comments * removed deadcode --- resource-managers/kubernetes/README.md | 8 +++ .../ExternalUriProviderWatch.scala | 2 +- .../integrationtest/KubernetesSuite.scala | 15 ++--- .../KubernetesTestComponents.scala | 54 +++++++++++----- .../integrationtest/KubernetesV1Suite.scala | 24 +++++-- .../integrationtest/KubernetesV2Suite.scala | 22 +++++-- .../integrationtest/ProcessUtils.scala | 55 ++++++++++++++++ .../backend/GCE/GCETestBackend.scala | 40 ++++++++++++ .../backend/IntegrationTestBackend.scala | 39 ++++++++++++ .../{ => backend}/minikube/Minikube.scala | 63 ++----------------- .../minikube/MinikubeTestBackend.scala | 47 ++++++++++++++ .../integrationtest/constants.scala | 22 +++++++ 12 files changed, 299 insertions(+), 92 deletions(-) create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/GCE/GCETestBackend.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/IntegrationTestBackend.scala rename resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/{ => backend}/minikube/Minikube.scala (64%) create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index d70c38fdc64d5..fd1ad29eb795d 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -61,6 +61,14 @@ build/mvn integration-test \ -pl resource-managers/kubernetes/integration-tests -am ``` +# Running against an arbitrary cluster + +In order to run against any cluster, use the following: +build/mvn integration-test \ + -Pkubernetes -Pkubernetes-integration-tests \ + -pl resource-managers/kubernetes/integration-tests -am + -DextraScalaTestArgs="-Dspark.kubernetes.test.master=k8s://https:// -Dspark.docker.test.driverImage= -Dspark.docker.test.executorImage=" + # Preserve the Minikube VM The integration tests make use of [Minikube](https://github.com/kubernetes/minikube), which fires up a virtual machine diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala index 3199a8c385f95..f402d240bfc33 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala @@ -24,7 +24,7 @@ import io.fabric8.kubernetes.client.Watcher.Action import scala.collection.JavaConverters._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.internal.Logging /** diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index abbf7e4d5ce1b..bd5ff7a005d46 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -25,26 +25,23 @@ import org.scalatest.concurrent.PatienceConfiguration import org.scalatest.time.{Minutes, Seconds, Span} import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.backend.{IntegrationTestBackend, IntegrationTestBackendFactory} private[spark] class KubernetesSuite extends SparkFunSuite { + private val testBackend: IntegrationTestBackend = IntegrationTestBackendFactory.getTestBackend() override def beforeAll(): Unit = { - Minikube.startMinikube() - new SparkDockerImageBuilder(Minikube.getDockerEnv).buildSparkDockerImages() + testBackend.initialize() } override def afterAll(): Unit = { - if (!System.getProperty("spark.docker.test.persistMinikube", "false").toBoolean) { - Minikube.deleteMinikube() - } + testBackend.cleanUp() } override def nestedSuites: scala.collection.immutable.IndexedSeq[Suite] = { Vector( - new KubernetesV1Suite, - new KubernetesV2Suite) + new KubernetesV1Suite(testBackend), + new KubernetesV2Suite(testBackend)) } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala index 53e02f9e479c1..8cdacee655c05 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -17,22 +17,27 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.UUID +import javax.net.ssl.X509TrustManager -import org.scalatest.concurrent.Eventually import scala.collection.JavaConverters._ +import scala.reflect.ClassTag + +import io.fabric8.kubernetes.client.DefaultKubernetesClient +import io.fabric8.kubernetes.client.internal.SSLUtils +import org.scalatest.concurrent.Eventually import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.rest.kubernetes.v1.HttpClientUtil -private[spark] class KubernetesTestComponents { +private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesClient) { val namespace = UUID.randomUUID().toString.replaceAll("-", "") - val kubernetesClient = Minikube.getKubernetesClient.inNamespace(namespace) + val kubernetesClient = defaultClient.inNamespace(namespace) val clientConfig = kubernetesClient.getConfiguration def createNamespace(): Unit = { - Minikube.getKubernetesClient.namespaces.createNew() + defaultClient.namespaces.createNew() .withNewMetadata() .withName(namespace) .endMetadata() @@ -40,9 +45,9 @@ private[spark] class KubernetesTestComponents { } def deleteNamespace(): Unit = { - Minikube.getKubernetesClient.namespaces.withName(namespace).delete() + defaultClient.namespaces.withName(namespace).delete() Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val namespaceList = Minikube.getKubernetesClient + val namespaceList = defaultClient .namespaces() .list() .getItems() @@ -53,13 +58,12 @@ private[spark] class KubernetesTestComponents { def newSparkConf(): SparkConf = { new SparkConf(true) - .setMaster(s"k8s://https://${Minikube.getMinikubeIp}:8443") - .set(KUBERNETES_SUBMIT_CA_CERT_FILE, clientConfig.getCaCertFile) - .set(KUBERNETES_SUBMIT_CLIENT_KEY_FILE, clientConfig.getClientKeyFile) - .set(KUBERNETES_SUBMIT_CLIENT_CERT_FILE, clientConfig.getClientCertFile) + .setMaster(s"k8s://${kubernetesClient.getMasterUrl}") .set(KUBERNETES_NAMESPACE, namespace) - .set(DRIVER_DOCKER_IMAGE, "spark-driver:latest") - .set(EXECUTOR_DOCKER_IMAGE, "spark-executor:latest") + .set(DRIVER_DOCKER_IMAGE, + System.getProperty("spark.docker.test.driverImage", "spark-driver:latest")) + .set(EXECUTOR_DOCKER_IMAGE, + System.getProperty("spark.docker.test.executorImage", "spark-executor:latest")) .setJars(Seq(KubernetesSuite.HELPER_JAR_FILE.getAbsolutePath)) .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") @@ -69,4 +73,26 @@ private[spark] class KubernetesTestComponents { .set("spark.testing", "false") .set(WAIT_FOR_APP_COMPLETION, false) } -} + + def getService[T: ClassTag]( + serviceName: String, + namespace: String, + servicePortName: String, + servicePath: String = ""): T = synchronized { + val kubernetesMaster = s"${defaultClient.getMasterUrl}" + + val url = s"${ + Array[String]( + s"${kubernetesClient.getMasterUrl}", + "api", "v1", "proxy", + "namespaces", namespace, + "services", serviceName).mkString("/") + }" + + s":$servicePortName$servicePath" + val userHome = System.getProperty("user.home") + val kubernetesConf = kubernetesClient.getConfiguration + val sslContext = SSLUtils.sslContext(kubernetesConf) + val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] + HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) + } +} \ No newline at end of file diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala index a4e3353032b71..4cbd074547915 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala @@ -18,6 +18,8 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.concurrent.TimeUnit +import scala.collection.JavaConverters._ + import com.google.common.collect.ImmutableList import com.google.common.util.concurrent.SettableFuture import io.fabric8.kubernetes.api.model.Pod @@ -25,26 +27,28 @@ import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.scalatest.{BeforeAndAfter, DoNotDiscover} import org.scalatest.concurrent.Eventually -import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.constants.{GCE_TEST_BACKEND, MINIKUBE_TEST_BACKEND} import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} import org.apache.spark.util.Utils @DoNotDiscover -private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter { +private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) + extends SparkFunSuite with BeforeAndAfter { private var kubernetesTestComponents: KubernetesTestComponents = _ private var sparkConf: SparkConf = _ override def beforeAll(): Unit = { - kubernetesTestComponents = new KubernetesTestComponents() + kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) kubernetesTestComponents.createNamespace() } @@ -85,7 +89,7 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter .get(0) .getMetadata .getName - Minikube.getService[SparkRestApiV1](serviceName, + kubernetesTestComponents.getService[SparkRestApiV1](serviceName, kubernetesTestComponents.namespace, "spark-ui-port") } @@ -168,6 +172,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Enable SSL on the driver submit server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( Minikube.getMinikubeIp, "changeit", @@ -188,6 +194,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Enable SSL on the driver submit server using PEM files") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") @@ -201,6 +209,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Added files should exist on the driver.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + sparkConf.set("spark.files", KubernetesSuite.TEST_EXISTENCE_FILE.getAbsolutePath) sparkConf.setAppName("spark-file-existence-test") val podCompletedFuture = SettableFuture.create[Boolean] @@ -257,6 +267,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Use external URI provider") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val externalUriProviderWatch = new ExternalUriProviderWatch(kubernetesTestComponents.kubernetesClient) Utils.tryWithResource(kubernetesTestComponents.kubernetesClient.services() @@ -288,6 +300,8 @@ private[spark] class KubernetesV1Suite extends SparkFunSuite with BeforeAndAfter } test("Mount the Kubernetes credentials onto the driver pod") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, kubernetesTestComponents.clientConfig.getCaCertFile) sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index 0d74067334028..8fa7cbd52ee83 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -21,14 +21,17 @@ import java.util.UUID import org.scalatest.{BeforeAndAfter, DoNotDiscover} import org.scalatest.concurrent.Eventually -import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark._ import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.integrationtest.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND import org.apache.spark.deploy.kubernetes.submit.v2.{MountedDependencyManagerProviderImpl, SubmissionKubernetesClientProviderImpl} @DoNotDiscover -private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter { +private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) + extends SparkFunSuite with BeforeAndAfter { private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") private var kubernetesTestComponents: KubernetesTestComponents = _ @@ -36,7 +39,7 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ override def beforeAll(): Unit = { - kubernetesTestComponents = new KubernetesTestComponents + kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) resourceStagingServerLauncher = new ResourceStagingServerLauncher( kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) } @@ -54,11 +57,15 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter } test("Use submission v2.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + launchStagingServer(SSLOptions()) runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } test("Enable SSL on the submission server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( ipAddress = Minikube.getMinikubeIp, keyStorePassword = "keyStore", @@ -81,6 +88,8 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter } test("Use container-local resources without the resource staging server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + sparkConf.setJars(Seq( KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) @@ -88,6 +97,8 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter } private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( resourceStagingServerSslOptions) val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { @@ -96,7 +107,8 @@ private[spark] class KubernetesV2Suite extends SparkFunSuite with BeforeAndAfter "http" } sparkConf.set(RESOURCE_STAGING_SERVER_URI, - s"$resourceStagingServerUriScheme://${Minikube.getMinikubeIp}:$resourceStagingServerPort") + s"$resourceStagingServerUriScheme://" + + s"${Minikube.getMinikubeIp}:$resourceStagingServerPort") } private def runSparkAppAndVerifyCompletion(appResource: String): Unit = { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala new file mode 100644 index 0000000000000..d0bfac3085487 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.io.{BufferedReader, InputStreamReader} +import java.util.concurrent.TimeUnit + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + +object ProcessUtils extends Logging { + /** + * executeProcess is used to run a command and return the output if it + * completes within timeout seconds. + */ + def executeProcess(fullCommand: Array[String], timeout: Long): Seq[String] = { + val pb = new ProcessBuilder().command(fullCommand: _*) + pb.redirectErrorStream(true) + val proc = pb.start() + val outputLines = new ArrayBuffer[String] + + Utils.tryWithResource(new InputStreamReader(proc.getInputStream)) { procOutput => + Utils.tryWithResource(new BufferedReader(procOutput)) { (bufferedOutput: BufferedReader) => + var line: String = null + do { + line = bufferedOutput.readLine() + if (line != null) { + logInfo(line) + outputLines += line + } + } while (line != null) + } + } + assert(proc.waitFor(timeout, TimeUnit.SECONDS), + s"Timed out while executing ${fullCommand.mkString(" ")}") + assert(proc.exitValue == 0, s"Failed to execute ${fullCommand.mkString(" ")}") + outputLines.toSeq + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/GCE/GCETestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/GCE/GCETestBackend.scala new file mode 100644 index 0000000000000..1ef096be4af02 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/GCE/GCETestBackend.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.backend.GCE + +import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} + +import org.apache.spark.deploy.kubernetes.config.resolveK8sMaster +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.constants.GCE_TEST_BACKEND + +private[spark] class GCETestBackend(val master: String) extends IntegrationTestBackend { + private var defaultClient: DefaultKubernetesClient = _ + + override def initialize(): Unit = { + var k8ConfBuilder = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(resolveK8sMaster(master)) + defaultClient = new DefaultKubernetesClient(k8ConfBuilder.build) + } + + override def getKubernetesClient(): DefaultKubernetesClient = { + defaultClient + } + + override def name(): String = GCE_TEST_BACKEND +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/IntegrationTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/IntegrationTestBackend.scala new file mode 100644 index 0000000000000..c5bc923dd51a6 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/IntegrationTestBackend.scala @@ -0,0 +1,39 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes.integrationtest.backend + +import io.fabric8.kubernetes.client.DefaultKubernetesClient + +import org.apache.spark.deploy.kubernetes.integrationtest.backend.GCE.GCETestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.{Minikube, MinikubeTestBackend} +import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder + +private[spark] trait IntegrationTestBackend { + def name(): String + def initialize(): Unit + def getKubernetesClient(): DefaultKubernetesClient + def cleanUp(): Unit = {} +} + +private[spark] object IntegrationTestBackendFactory { + def getTestBackend(): IntegrationTestBackend = { + Option(System.getProperty("spark.kubernetes.test.master")) + .map(new GCETestBackend(_)) + .getOrElse(new MinikubeTestBackend()) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/Minikube.scala similarity index 64% rename from resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala rename to resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/Minikube.scala index 81491be944d3e..7c4b344e8f72b 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/minikube/Minikube.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/Minikube.scala @@ -14,20 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.integrationtest.minikube +package org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube -import java.io.{BufferedReader, InputStreamReader} import java.nio.file.Paths -import java.util.concurrent.TimeUnit -import java.util.regex.Pattern -import javax.net.ssl.X509TrustManager import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient} -import io.fabric8.kubernetes.client.internal.SSLUtils -import scala.collection.mutable.ArrayBuffer -import scala.reflect.ClassTag -import org.apache.spark.deploy.rest.kubernetes.v1.HttpClientUtil +import org.apache.spark.deploy.kubernetes.integrationtest.ProcessUtils import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -93,7 +86,7 @@ private[spark] object Minikube extends Logging { } def getKubernetesClient: DefaultKubernetesClient = synchronized { - val kubernetesMaster = s"https://$getMinikubeIp:8443" + val kubernetesMaster = s"https://${getMinikubeIp}:8443" val userHome = System.getProperty("user.home") val kubernetesConf = new ConfigBuilder() .withApiVersion("v1") @@ -105,32 +98,6 @@ private[spark] object Minikube extends Logging { new DefaultKubernetesClient(kubernetesConf) } - def getService[T: ClassTag]( - serviceName: String, - namespace: String, - servicePortName: String, - servicePath: String = ""): T = synchronized { - val kubernetesMaster = s"https://$getMinikubeIp:8443" - val url = s"${ - Array[String]( - kubernetesMaster, - "api", "v1", "proxy", - "namespaces", namespace, - "services", serviceName).mkString("/")}" + - s":$servicePortName$servicePath" - val userHome = System.getProperty("user.home") - val kubernetesConf = new ConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(kubernetesMaster) - .withCaCertFile(Paths.get(userHome, ".minikube", "ca.crt").toFile.getAbsolutePath) - .withClientCertFile(Paths.get(userHome, ".minikube", "apiserver.crt").toFile.getAbsolutePath) - .withClientKeyFile(Paths.get(userHome, ".minikube", "apiserver.key").toFile.getAbsolutePath) - .build() - val sslContext = SSLUtils.sslContext(kubernetesConf) - val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] - HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) - } - def executeMinikubeSsh(command: String): Unit = { executeMinikube("ssh", command) } @@ -141,28 +108,8 @@ private[spark] object Minikube extends Logging { throw new IllegalStateException("Failed to make the Minikube binary executable.") } } - val fullCommand = Array(MINIKUBE_EXECUTABLE_DEST.getAbsolutePath, action) ++ args - val pb = new ProcessBuilder().command(fullCommand: _*) - pb.redirectErrorStream(true) - val proc = pb.start() - val outputLines = new ArrayBuffer[String] - - Utils.tryWithResource(new InputStreamReader(proc.getInputStream)) { procOutput => - Utils.tryWithResource(new BufferedReader(procOutput)) { (bufferedOutput: BufferedReader) => - var line: String = null - do { - line = bufferedOutput.readLine() - if (line != null) { - logInfo(line) - outputLines += line - } - } while (line != null) - } - } - assert(proc.waitFor(MINIKUBE_STARTUP_TIMEOUT_SECONDS, TimeUnit.SECONDS), - s"Timed out while executing $action on minikube.") - assert(proc.exitValue == 0, s"Failed to execute minikube $action ${args.mkString(" ")}") - outputLines.toSeq + ProcessUtils.executeProcess(Array(MINIKUBE_EXECUTABLE_DEST.getAbsolutePath, action) ++ args, + MINIKUBE_STARTUP_TIMEOUT_SECONDS) } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala new file mode 100644 index 0000000000000..6e0049b813719 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube + +import io.fabric8.kubernetes.client.DefaultKubernetesClient + +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend +import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND +import org.apache.spark.deploy.kubernetes.integrationtest.docker.SparkDockerImageBuilder + +private[spark] class MinikubeTestBackend extends IntegrationTestBackend { + private var defaultClient: DefaultKubernetesClient = _ + + override def initialize(): Unit = { + Minikube.startMinikube() + new SparkDockerImageBuilder(Minikube.getDockerEnv).buildSparkDockerImages() + defaultClient = Minikube.getKubernetesClient + } + + override def getKubernetesClient(): DefaultKubernetesClient = { + defaultClient + } + + override def cleanUp(): Unit = { + if (!System.getProperty("spark.docker.test.persistMinikube", "false").toBoolean) { + Minikube.deleteMinikube() + } + } + + override def name(): String = MINIKUBE_TEST_BACKEND + + +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala new file mode 100644 index 0000000000000..8207198b529d2 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +package object constants { + val MINIKUBE_TEST_BACKEND = "minikube" + val GCE_TEST_BACKEND = "gce" +} \ No newline at end of file From c902d692dfa482fd026251e982a1870570bdd3f9 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Tue, 2 May 2017 16:52:22 -0700 Subject: [PATCH 096/225] Update running-on-kubernetes.md (#259) --- docs/running-on-kubernetes.md | 1 + 1 file changed, 1 insertion(+) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 794099638f80c..66ea381e306a5 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -9,6 +9,7 @@ currently limited and not well-tested. This should not be used in production env ## Prerequisites * You must have a running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/user-guide/prereqs/). If you do not already have a working Kubernetes cluster, you may setup a test cluster on your local machine using [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). + * We recommend that minikube be updated to the most recent version (0.18.0 at the time of this documentation), as some earlier versions may not start up the kubernetes cluster with all the necessary components. * You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), [nodes](https://kubernetes.io/docs/admin/node/) and [services](https://kubernetes.io/docs/user-guide/services/) in your cluster. You can verify that you can list these resources by running `kubectl get nodes`, `kubectl get pods` and `kubectl get svc` which should give you a list of nodes, pods and services (if any) respectively. * You must have a spark distribution with Kubernetes support. This may be obtained from the [release tarball](https://github.com/apache-spark-on-k8s/spark/releases) or by [building Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support). From f09bf4a249c57b4505bba69f0916682493093aef Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Wed, 3 May 2017 22:11:49 +0000 Subject: [PATCH 097/225] Build with sbt and fix scalastyle checks. (#241) --- project/SparkBuild.scala | 12 +++++++----- .../apache/spark/deploy/kubernetes/SSLUtils.scala | 2 +- .../deploy/kubernetes/submit/v2/ClientV2Suite.scala | 2 +- ...esourceStagingServerSslOptionsProviderSuite.scala | 3 ++- .../kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../integration-tests-spark-jobs-helpers/pom.xml | 3 +++ .../kubernetes/integration-tests-spark-jobs/pom.xml | 3 +++ .../kubernetes/integration-tests/pom.xml | 3 +++ 8 files changed, 21 insertions(+), 9 deletions(-) diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index e52baf51aed1a..4572b8dc67ffe 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -55,11 +55,13 @@ object BuildCommons { "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe", "tags", "sketch" ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects - - val optionallyEnabledProjects@Seq(mesos, yarn, sparkGangliaLgpl, - streamingKinesisAsl, dockerIntegrationTests) = - Seq("mesos", "yarn", "ganglia-lgpl", "streaming-kinesis-asl", - "docker-integration-tests").map(ProjectRef(buildLocation, _)) + + val optionallyEnabledProjects@Seq(mesos, yarn, java8Tests, sparkGangliaLgpl, + streamingKinesisAsl, dockerIntegrationTests, kubernetes, _*) = + Seq("mesos", "yarn", "java8-tests", "ganglia-lgpl", "streaming-kinesis-asl", "docker-integration-tests", + "kubernetes", "kubernetes-integration-tests", "kubernetes-integration-tests-spark-jobs", + "kubernetes-integration-tests-spark-jobs-helpers", "kubernetes-docker-minimal-bundle" + ).map(ProjectRef(buildLocation, _)) val assemblyProjects@Seq(networkYarn, streamingFlumeAssembly, streamingKafkaAssembly, streamingKafka010Assembly, streamingKinesisAslAssembly) = Seq("network-yarn", "streaming-flume-assembly", "streaming-kafka-0-8-assembly", "streaming-kafka-0-10-assembly", "streaming-kinesis-asl-assembly") diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala index dacb017d8a513..0cb056dcf5493 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala @@ -19,8 +19,8 @@ package org.apache.spark.deploy.kubernetes import java.io.{File, FileOutputStream, OutputStreamWriter} import java.math.BigInteger import java.nio.file.Files -import java.security.cert.X509Certificate import java.security.{KeyPair, KeyPairGenerator, KeyStore, SecureRandom} +import java.security.cert.X509Certificate import java.util.{Calendar, Random} import javax.security.auth.x500.X500Principal diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala index 9e2ab26460412..e6536fbaa6941 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, import io.fabric8.kubernetes.client.KubernetesClient import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} -import org.mockito.Matchers.{any, anyVararg, argThat, startsWith, eq => mockitoEq} +import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq, startsWith} import org.mockito.Mockito.when import org.mockito.invocation.InvocationOnMock import org.mockito.stubbing.Answer diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala index 290b46a537bf3..10aced9000bf8 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala @@ -96,7 +96,8 @@ class ResourceStagingServerSslOptionsProviderSuite extends SparkFunSuite with Be .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStoreFile.getAbsolutePath) .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile", keyStorePasswordFile.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile", keyPasswordFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile", + keyPasswordFile.getAbsolutePath) val sslOptions = sslOptionsProvider.getSslOptions assert(sslOptions.keyStorePassword === Some("keyStorePassword"), "Incorrect keyStore password or it was not set.") diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index a10fe8fb58408..c66b87ac0952d 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -31,7 +31,7 @@ pom - docker-minimal-bundle + kubernetes-docker-minimal-bundle none pre-integration-test diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index b9c29b26eb648..581bf9453f2f2 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -27,6 +27,9 @@ spark-kubernetes-integration-tests-spark-jobs-helpers_2.11 jar Spark Project Kubernetes Integration Tests Spark Jobs Helpers + + kubernetes-integration-tests-spark-jobs-helpers + diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 16dd0c9322c13..9639811479ff5 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -27,6 +27,9 @@ spark-kubernetes-integration-tests-spark-jobs_2.11 jar Spark Project Kubernetes Integration Tests Spark Jobs + + kubernetes-integration-tests-spark-jobs + diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index ac7a549c9b483..c94893cbce410 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -25,6 +25,9 @@ spark-kubernetes-integration-tests_2.11 + + kubernetes-integration-tests + jar Spark Project Kubernetes Integration Tests From 68ddcd5727b1c038796c7980ed5c06e4bc50233d Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 3 May 2017 15:33:14 -0700 Subject: [PATCH 098/225] Updating images in doc (#219) --- docs/running-on-kubernetes.md | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 66ea381e306a5..5377d61d35b2f 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -25,11 +25,11 @@ If you wish to use pre-built docker images, you may use the images published in ComponentImage Spark Driver Image - kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 + kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 Spark Executor Image - kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 + kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 @@ -45,7 +45,7 @@ For example, if the registry host is `registry-host` and the registry is listeni docker build -t registry-host:5000/spark-executor:latest -f dockerfiles/executor/Dockerfile . docker push registry-host:5000/spark-driver:latest docker push registry-host:5000/spark-executor:latest - + ## Submitting Applications to Kubernetes Kubernetes applications can be executed via `spark-submit`. For example, to compute the value of pi, assuming the images @@ -58,8 +58,8 @@ are set up as described above: --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting @@ -79,7 +79,7 @@ In the above example, the specific Kubernetes cluster can be used with spark sub Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. - + ### Specifying input files Spark supports specifying JAR paths that are either on the submitting host's disk, or are located on the disk of the @@ -109,8 +109,8 @@ If our local proxy were listening on port 8001, we would have our submission loo --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-rc1 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-rc1 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ examples/jars/spark_examples_2.11-2.2.0.jar Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. From c4f17b79908a3a3dd49032d473d4152292a5907e Mon Sep 17 00:00:00 2001 From: Johannes Scheuermann Date: Fri, 5 May 2017 20:25:24 +0200 Subject: [PATCH 099/225] Correct readme links (#266) --- docs/running-on-kubernetes.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5377d61d35b2f..02933c28bbc66 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -189,7 +189,7 @@ from the other deployment modes. See the [configuration page](configuration.html The namespace that will be used for running the driver and executor pods. When using spark-submit in cluster mode, this can also be passed to spark-submit via the - --kubernetes-namespace command line argument. + --kubernetes-namespace command line argument. The namespace must already exist. From da94d918b9b75ef8f9751de87621893eecc24ffe Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Tue, 9 May 2017 14:12:23 -0700 Subject: [PATCH 100/225] edit readme with a working build example command (#254) --- resource-managers/kubernetes/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index fd1ad29eb795d..734c29947b6d9 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -14,10 +14,10 @@ important matters to keep in mind when developing this feature. # Building Spark with Kubernetes Support -To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. For example, to simply compile -the Kubernetes core implementation module along with its dependencies: +To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. - build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am -DskipTests + git checkout branch-2.1-kubernetes + build/mvn package -Pkubernetes -DskipTests To build a distribution of Spark with Kubernetes support, use the `dev/make-distribution.sh` script, and add the `kubernetes` profile as part of the build arguments. Any other build arguments can be specified as one would expect when From ecf248cd72b5d8ea8345f2f76b0ffbe99c90d1fd Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Tue, 9 May 2017 23:20:48 -0700 Subject: [PATCH 101/225] Fix watcher conditional logic (#269) --- .../apache/spark/deploy/kubernetes/submit/v1/Client.scala | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index e1cfac8feba37..65e47ddca4bfe 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -538,7 +538,9 @@ private[spark] class Client( private class DriverEndpointsReadyWatcher(resolvedDriverEndpoints: SettableFuture[Endpoints]) extends Watcher[Endpoints] { override def eventReceived(action: Action, endpoints: Endpoints): Unit = { - if ((action == Action.ADDED) || (action == Action.MODIFIED) + if ((action == Action.ADDED || action == Action.MODIFIED) + && (endpoints != null) + && (endpoints.getSubsets != null) && endpoints.getSubsets.asScala.nonEmpty && endpoints.getSubsets.asScala.exists(_.getAddresses.asScala.nonEmpty) && !resolvedDriverEndpoints.isDone) { @@ -554,7 +556,7 @@ private[spark] class Client( private class DriverServiceReadyWatcher(resolvedDriverService: SettableFuture[Service]) extends Watcher[Service] { override def eventReceived(action: Action, service: Service): Unit = { - if ((action == Action.ADDED) || (action == Action.MODIFIED) + if ((action == Action.ADDED || action == Action.MODIFIED) && !resolvedDriverService.isDone) { resolvedDriverService.set(service) } From 085fcd12cf0a5779c09f00133f7232e2562dc294 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Wed, 10 May 2017 00:47:39 -0700 Subject: [PATCH 102/225] Dispatch tasks to right executors that have tasks' input HDFS data (#216) * Dispatch tasks to right executors that have tasks' input HDFS data on local disks * Fix style issues * Clean up unnecessary fields * Clean up a misleading method name * Address review comments * Fix import ordering * Delete executor pods in watcher * Fix the driver hang by unblocking the main thread * Fix import order * Clear runningExecutorPods * Fix incorrect merge * Address review comments * Clean up imports --- .../spark/scheduler/TaskSetManager.scala | 2 +- .../kubernetes/KubernetesClientBuilder.scala | 16 +++- .../kubernetes/KubernetesClusterManager.scala | 3 +- .../KubernetesClusterSchedulerBackend.scala | 73 +++++++++++++++++-- .../KubernetesTaskSchedulerImpl.scala | 27 +++++++ .../kubernetes/KubernetesTaskSetManager.scala | 63 ++++++++++++++++ 6 files changed, 172 insertions(+), 12 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSchedulerImpl.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala diff --git a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala index a41b059fa7dec..83783a55a34ee 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/TaskSetManager.scala @@ -242,7 +242,7 @@ private[spark] class TaskSetManager( * Return the pending tasks list for a given host, or an empty list if * there is no map entry for that host */ - private def getPendingTasksForHost(host: String): ArrayBuffer[Int] = { + protected def getPendingTasksForHost(host: String): ArrayBuffer[Int] = { pendingTasksForHost.getOrElse(host, ArrayBuffer()) } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala index 6725992aae978..31c6eda77d058 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala @@ -21,10 +21,13 @@ import java.io.File import com.google.common.base.Charsets import com.google.common.io.Files import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} +import io.fabric8.kubernetes.client.utils.HttpClientUtils +import okhttp3.Dispatcher import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.util.ThreadUtils private[spark] class KubernetesClientBuilder(sparkConf: SparkConf, namespace: String) { private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) @@ -78,6 +81,17 @@ private[spark] class KubernetesClientBuilder(sparkConf: SparkConf, namespace: St } serviceAccountConfigBuilder } - new DefaultKubernetesClient(configBuilder.build) + // Disable the ping thread that is not daemon, in order to allow + // the driver main thread to shut down upon errors. Otherwise, the driver + // will hang indefinitely. + val config = configBuilder + .withWebsocketPingInterval(0) + .build() + val httpClient = HttpClientUtils.createHttpClient(config).newBuilder() + // Use a Dispatcher with a custom executor service that creates daemon threads. The default + // executor service used by Dispatcher creates non-daemon threads. + .dispatcher(new Dispatcher(ThreadUtils.newDaemonCachedThreadPool("spark-on-k8s"))) + .build() + new DefaultKubernetesClient(httpClient, config) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 36f7149a832c3..70098f1f46ac0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -24,7 +24,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager { override def canCreate(masterURL: String): Boolean = masterURL.startsWith("k8s") override def createTaskScheduler(sc: SparkContext, masterURL: String): TaskScheduler = { - val scheduler = new TaskSchedulerImpl(sc) + val scheduler = new KubernetesTaskSchedulerImpl(sc) sc.taskScheduler = scheduler scheduler } @@ -37,6 +37,5 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager { override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { scheduler.asInstanceOf[TaskSchedulerImpl].initialize(backend) } - } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 15457db7e1459..a2294a6766980 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -16,13 +16,18 @@ */ package org.apache.spark.scheduler.cluster.kubernetes -import java.util.concurrent.atomic.{AtomicInteger, AtomicLong} +import java.io.Closeable +import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} -import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, - EnvVarSourceBuilder, Pod, QuantityBuilder} import scala.collection.JavaConverters._ +import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} +import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, + EnvVarSourceBuilder, Pod, QuantityBuilder} +import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action + import org.apache.spark.{SparkContext, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ @@ -38,8 +43,11 @@ private[spark] class KubernetesClusterSchedulerBackend( import KubernetesClusterSchedulerBackend._ - private val EXECUTOR_MODIFICATION_LOCK = new Object - private val runningExecutorPods = new scala.collection.mutable.HashMap[String, Pod] + private val RUNNING_EXECUTOR_PODS_LOCK = new Object + private val runningExecutorPods = new mutable.HashMap[String, Pod] // Indexed by executor IDs. + + private val EXECUTOR_PODS_BY_IPS_LOCK = new Object + private val executorPodsByIPs = new mutable.HashMap[String, Pod] // Indexed by executor IP addrs. private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) @@ -87,6 +95,7 @@ private[spark] class KubernetesClusterSchedulerBackend( super.minRegisteredRatio } + private val executorWatchResource = new AtomicReference[Closeable] protected var totalExpectedExecutors = new AtomicInteger(0) private val driverUrl = RpcEndpointAddress( @@ -119,6 +128,8 @@ private[spark] class KubernetesClusterSchedulerBackend( override def start(): Unit = { super.start() + executorWatchResource.set(kubernetesClient.pods().withLabel(SPARK_APP_ID_LABEL, applicationId()) + .watch(new ExecutorPodsWatcher())) if (!Utils.isDynamicAllocationEnabled(sc.conf)) { doRequestTotalExecutors(initialExecutors) } @@ -133,11 +144,22 @@ private[spark] class KubernetesClusterSchedulerBackend( // When using Utils.tryLogNonFatalError some of the code fails but without any logs or // indication as to why. try { - runningExecutorPods.values.foreach(kubernetesClient.pods().delete(_)) + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + runningExecutorPods.values.foreach(kubernetesClient.pods().delete(_)) + runningExecutorPods.clear() + } + EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs.clear() + } + val resource = executorWatchResource.getAndSet(null) + if (resource != null) { + resource.close() + } } catch { case e: Throwable => logError("Uncaught exception while shutting down controllers.", e) } try { + logInfo("Closing kubernetes client") kubernetesClient.close() } catch { case e: Throwable => logError("Uncaught exception closing Kubernetes client.", e) @@ -231,7 +253,7 @@ private[spark] class KubernetesClusterSchedulerBackend( } override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { - EXECUTOR_MODIFICATION_LOCK.synchronized { + RUNNING_EXECUTOR_PODS_LOCK.synchronized { if (requestedTotal > totalExpectedExecutors.get) { logInfo(s"Requesting ${requestedTotal - totalExpectedExecutors.get}" + s" additional executors, expecting total $requestedTotal and currently" + @@ -246,7 +268,7 @@ private[spark] class KubernetesClusterSchedulerBackend( } override def doKillExecutors(executorIds: Seq[String]): Future[Boolean] = Future[Boolean] { - EXECUTOR_MODIFICATION_LOCK.synchronized { + RUNNING_EXECUTOR_PODS_LOCK.synchronized { for (executor <- executorIds) { runningExecutorPods.remove(executor) match { case Some(pod) => kubernetesClient.pods().delete(pod) @@ -256,6 +278,41 @@ private[spark] class KubernetesClusterSchedulerBackend( } true } + + def getExecutorPodByIP(podIP: String): Option[Pod] = { + EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs.get(podIP) + } + } + + private class ExecutorPodsWatcher extends Watcher[Pod] { + + override def eventReceived(action: Action, pod: Pod): Unit = { + if (action == Action.MODIFIED && pod.getStatus.getPhase == "Running" + && pod.getMetadata.getDeletionTimestamp == null) { + val podIP = pod.getStatus.getPodIP + val clusterNodeName = pod.getSpec.getNodeName + logDebug(s"Executor pod $pod ready, launched at $clusterNodeName as IP $podIP.") + EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs += ((podIP, pod)) + } + } else if ((action == Action.MODIFIED && pod.getMetadata.getDeletionTimestamp != null) || + action == Action.DELETED || action == Action.ERROR) { + val podName = pod.getMetadata.getName + val podIP = pod.getStatus.getPodIP + logDebug(s"Executor pod $podName at IP $podIP was at $action.") + if (podIP != null) { + EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs -= podIP + } + } + } + } + + override def onClose(cause: KubernetesClientException): Unit = { + logDebug("Executor pod watch closed.", cause) + } + } } private object KubernetesClusterSchedulerBackend { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSchedulerImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSchedulerImpl.scala new file mode 100644 index 0000000000000..a5e126480b83d --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSchedulerImpl.scala @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import org.apache.spark.SparkContext +import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} + +private[spark] class KubernetesTaskSchedulerImpl(sc: SparkContext) extends TaskSchedulerImpl(sc) { + + override def createTaskSetManager(taskSet: TaskSet, maxTaskFailures: Int): TaskSetManager = { + new KubernetesTaskSetManager(this, taskSet, maxTaskFailures) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala new file mode 100644 index 0000000000000..5cea95be382f0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala @@ -0,0 +1,63 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import scala.collection.mutable.ArrayBuffer + +import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} + +private[spark] class KubernetesTaskSetManager( + sched: TaskSchedulerImpl, + taskSet: TaskSet, + maxTaskFailures: Int) extends TaskSetManager(sched, taskSet, maxTaskFailures) { + + /** + * Overrides the lookup to use not only the executor pod IP, but also the cluster node + * name and host IP address that the pod is running on. The base class may have populated + * the lookup target map with HDFS datanode locations if this task set reads HDFS data. + * Those datanode locations are based on cluster node names or host IP addresses. Using + * only executor pod IPs may not match them. + */ + override def getPendingTasksForHost(executorIP: String): ArrayBuffer[Int] = { + val pendingTasksExecutorIP = super.getPendingTasksForHost(executorIP) + if (pendingTasksExecutorIP.nonEmpty) { + pendingTasksExecutorIP + } else { + val backend = sched.backend.asInstanceOf[KubernetesClusterSchedulerBackend] + val pod = backend.getExecutorPodByIP(executorIP) + if (pod.nonEmpty) { + val clusterNodeName = pod.get.getSpec.getNodeName + val pendingTasksClusterNodeName = super.getPendingTasksForHost(clusterNodeName) + if (pendingTasksClusterNodeName.nonEmpty) { + logDebug(s"Got preferred task list $pendingTasksClusterNodeName for executor host " + + s"$executorIP using cluster node name $clusterNodeName") + pendingTasksClusterNodeName + } else { + val clusterNodeIP = pod.get.getStatus.getHostIP + val pendingTasksClusterNodeIP = super.getPendingTasksForHost(clusterNodeIP) + if (pendingTasksClusterNodeIP.nonEmpty) { + logDebug(s"Got preferred task list $pendingTasksClusterNodeIP for executor host " + + s"$executorIP using cluster node IP $clusterNodeIP") + } + pendingTasksClusterNodeIP + } + } else { + pendingTasksExecutorIP // Empty + } + } + } +} From 2af7f0585e03685461df40e3267fbcb01589bad3 Mon Sep 17 00:00:00 2001 From: Ye Yin Date: Wed, 17 May 2017 00:10:45 +0800 Subject: [PATCH 103/225] Add parameter for driver pod name (#258) * Add parameter for driver pod name * Mark KUBERNETES_DRIVER_POD_NAME not being internal. Update docment. * Add test case for driver pod name * Diff driver pod name with appid * replace 'spark.kubernetes.driver.pod.name` with KUBERNETES_DRIVER_POD_NAME * Update readme to complete item --- docs/running-on-kubernetes.md | 7 +++++++ .../apache/spark/deploy/kubernetes/config.scala | 1 - .../deploy/kubernetes/submit/v1/Client.scala | 12 +++++++----- .../deploy/kubernetes/submit/v2/Client.scala | 6 ++++-- .../integrationtest/KubernetesV1Suite.scala | 16 ++++++++++++++++ 5 files changed, 34 insertions(+), 8 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 02933c28bbc66..be410f18b5cfc 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -350,6 +350,13 @@ from the other deployment modes. See the [configuration page](configuration.html resource. + + spark.kubernetes.driver.pod.name + (none) + + Name of the driver pod. If not set, the driver pod name is set to "spark.app.name" suffixed by the current timestamp to avoid name conflicts. + + spark.kubernetes.submission.waitAppCompletion true diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 1c8b6798bbdd5..e379b40e376fc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -267,7 +267,6 @@ package object config extends Logging { private[spark] val KUBERNETES_DRIVER_POD_NAME = ConfigBuilder("spark.kubernetes.driver.pod.name") .doc("Name of the driver pod.") - .internal() .stringConf .createOptional diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 65e47ddca4bfe..a4dfe90f71a8a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -51,6 +51,8 @@ private[spark] class Client( private val appName = sparkConf.getOption("spark.app.name") .getOrElse("spark") private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) + .getOrElse(kubernetesAppId) private val secretName = s"$SUBMISSION_APP_SECRET_PREFIX-$kubernetesAppId" private val secretDirectory = s"$DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR/$kubernetesAppId" private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) @@ -150,7 +152,7 @@ private[spark] class Client( loggingInterval) Utils.tryWithResource(kubernetesClient .pods() - .withName(kubernetesAppId) + .withName(kubernetesDriverPodName) .watch(loggingWatch)) { _ => val resourceCleanShutdownHook = ShutdownHookManager.addShutdownHook(() => kubernetesResourceCleaner.deleteAllRegisteredResourcesFromKubernetes(kubernetesClient)) @@ -247,7 +249,7 @@ private[spark] class Client( logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + s" overridden as $kubernetesAppId") } - sparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + sparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, driverService.getMetadata.getName) sparkConf.set("spark.app.id", kubernetesAppId) sparkConf.setIfMissing("spark.app.name", appName) @@ -314,7 +316,7 @@ private[spark] class Client( val podWatcher = new DriverPodReadyWatcher(podReadyFuture) Utils.tryWithResource(kubernetesClient .pods() - .withName(kubernetesAppId) + .withName(kubernetesDriverPodName) .watch(podWatcher)) { _ => Utils.tryWithResource(kubernetesClient .services() @@ -445,7 +447,7 @@ private[spark] class Client( .build() val driverPod = kubernetesClient.pods().createNew() .withNewMetadata() - .withName(kubernetesAppId) + .withName(kubernetesDriverPodName) .withLabels(driverKubernetesSelectors.asJava) .withAnnotations(customAnnotations.asJava) .endMetadata() @@ -571,7 +573,7 @@ private[spark] class Client( kubernetesClient: KubernetesClient, e: Throwable): String = { val driverPod = try { - kubernetesClient.pods().withName(kubernetesAppId).get() + kubernetesClient.pods().withName(kubernetesDriverPodName).get() } catch { case throwable: Throwable => logError(s"Timed out while waiting $driverSubmitTimeoutSecs seconds for the" + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index 69dbfd041bb86..a70c93942ffb5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -58,6 +58,8 @@ private[spark] class Client( private val appName = sparkConf.getOption("spark.app.name") .getOrElse("spark") private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) + .getOrElse(kubernetesAppId) private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) private val maybeStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) @@ -123,7 +125,7 @@ private[spark] class Client( .build() val basePod = new PodBuilder() .withNewMetadata() - .withName(kubernetesAppId) + .withName(kubernetesDriverPodName) .addToLabels(allLabels.asJava) .addToAnnotations(parsedCustomAnnotations.asJava) .endMetadata() @@ -176,7 +178,7 @@ private[spark] class Client( if (resolvedFiles.nonEmpty) { resolvedSparkConf.set("spark.files", resolvedFiles.mkString(",")) } - resolvedSparkConf.set(KUBERNETES_DRIVER_POD_NAME, kubernetesAppId) + resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) resolvedSparkConf.set("spark.app.id", kubernetesAppId) // We don't need this anymore since we just set the JVM options on the environment resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala index 4cbd074547915..f09339a9c3e08 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala @@ -171,6 +171,22 @@ private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) "Unexpected value for annotation2") } + test("Run with driver pod name") { + sparkConf.set(KUBERNETES_DRIVER_POD_NAME, "spark-pi") + new Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, + mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + appArgs = Array.empty[String]).run() + val driverPodMetadata = kubernetesTestComponents.kubernetesClient + .pods() + .withName("spark-pi") + .get() + .getMetadata() + val driverName = driverPodMetadata.getName + assert(driverName === "spark-pi", "Unexpected driver pod name.") + } + test("Enable SSL on the driver submit server") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) From 20956e71fdc36fc03fec26f4ea930a0163d271af Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Wed, 17 May 2017 09:44:22 -0700 Subject: [PATCH 104/225] Dynamic allocation (#272) * dynamic allocation: shuffle service docker, yaml and test fixture * dynamic allocation: changes to spark-core * dynamic allocation: tests * dynamic allocation: docs * dynamic allocation: kubernetes allocator and executor accounting * dynamic allocation: shuffle service, node caching --- conf/kubernetes-shuffle-service.yaml | 53 +++++ .../CoarseGrainedExecutorBackend.scala | 2 +- .../cluster/CoarseGrainedClusterMessage.scala | 2 +- .../CoarseGrainedSchedulerBackend.scala | 2 +- .../apache/spark/storage/BlockManager.scala | 10 +- docs/running-on-kubernetes.md | 66 +++++- project/SparkBuild.scala | 2 +- resource-managers/kubernetes/README.md | 6 +- .../kubernetes/ConfigurationUtils.scala | 41 ++++ .../spark/deploy/kubernetes/config.scala | 45 ++++ .../spark/deploy/kubernetes/constants.scala | 1 + .../KubernetesClusterSchedulerBackend.scala | 224 ++++++++++++++---- .../cluster/kubernetes/ShufflePodCache.scala | 91 +++++++ .../main/docker/shuffle-service/Dockerfile | 39 +++ .../integrationtest/jobs/GroupByTest.scala | 54 +++++ .../integrationtest/KubernetesSuite.scala | 4 + .../integrationtest/KubernetesV2Suite.scala | 99 +++++++- .../docker/SparkDockerImageBuilder.scala | 2 + 18 files changed, 683 insertions(+), 60 deletions(-) create mode 100644 conf/kubernetes-shuffle-service.yaml create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile create mode 100644 resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/GroupByTest.scala diff --git a/conf/kubernetes-shuffle-service.yaml b/conf/kubernetes-shuffle-service.yaml new file mode 100644 index 0000000000000..3aeb1f54f301c --- /dev/null +++ b/conf/kubernetes-shuffle-service.yaml @@ -0,0 +1,53 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +apiVersion: extensions/v1beta1 +kind: DaemonSet +metadata: + labels: + app: spark-shuffle-service + spark-version: 2.1.0 + name: shuffle +spec: + template: + metadata: + labels: + app: spark-shuffle-service + spark-version: 2.1.0 + spec: + volumes: + - name: temp-volume + hostPath: + path: '/var/tmp' # change this path according to your cluster configuration. + containers: + - name: shuffle + # This is an official image that is built + # from the dockerfiles/shuffle directory + # in the spark distribution. + image: kubespark/spark-shuffle:v2.1.0-kubernetes-0.1.0-alpha.3 + volumeMounts: + - mountPath: '/tmp' + name: temp-volume + # more volumes can be mounted here. + # The spark job must be configured to use these + # mounts using the configuration: + # spark.kubernetes.shuffle.dir=,,... + resources: + requests: + cpu: "1" + limits: + cpu: "1" \ No newline at end of file diff --git a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala index b2b26ee107c00..9fffa536c1296 100644 --- a/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala +++ b/core/src/main/scala/org/apache/spark/executor/CoarseGrainedExecutorBackend.scala @@ -200,7 +200,7 @@ private[spark] object CoarseGrainedExecutorBackend extends Logging { new SecurityManager(executorConf), clientMode = true) val driver = fetcher.setupEndpointRefByURI(driverUrl) - val cfg = driver.askSync[SparkAppConfig](RetrieveSparkAppConfig) + val cfg = driver.askSync[SparkAppConfig](RetrieveSparkAppConfig(executorId)) val props = cfg.sparkProperties ++ Seq[(String, String)](("spark.app.id", appId)) fetcher.shutdown() diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala index 6b49bd699a13a..a53c3cdf48b33 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedClusterMessage.scala @@ -28,7 +28,7 @@ private[spark] sealed trait CoarseGrainedClusterMessage extends Serializable private[spark] object CoarseGrainedClusterMessages { - case object RetrieveSparkAppConfig extends CoarseGrainedClusterMessage + case class RetrieveSparkAppConfig(executorId: String) extends CoarseGrainedClusterMessage case class SparkAppConfig( sparkProperties: Seq[(String, String)], diff --git a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala index dc82bb7704727..d6c212dcda958 100644 --- a/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala +++ b/core/src/main/scala/org/apache/spark/scheduler/cluster/CoarseGrainedSchedulerBackend.scala @@ -219,7 +219,7 @@ class CoarseGrainedSchedulerBackend(scheduler: TaskSchedulerImpl, val rpcEnv: Rp removeExecutor(executorId, reason) context.reply(true) - case RetrieveSparkAppConfig => + case RetrieveSparkAppConfig(executorId) => val reply = SparkAppConfig(sparkProperties, SparkEnv.get.securityManager.getIOEncryptionKey()) context.reply(reply) diff --git a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala index 5f067191070ec..19c2d2c13133e 100644 --- a/core/src/main/scala/org/apache/spark/storage/BlockManager.scala +++ b/core/src/main/scala/org/apache/spark/storage/BlockManager.scala @@ -233,8 +233,14 @@ private[spark] class BlockManager( blockManagerId = if (idFromMaster != null) idFromMaster else id shuffleServerId = if (externalShuffleServiceEnabled) { - logInfo(s"external shuffle service port = $externalShuffleServicePort") - BlockManagerId(executorId, blockTransferService.hostName, externalShuffleServicePort) + val shuffleServerHostName = if (blockManagerId.isDriver) { + blockTransferService.hostName + } else { + conf.get("spark.shuffle.service.host", blockTransferService.hostName) + } + logInfo(s"external shuffle service host = $shuffleServerHostName, " + + s"port = $externalShuffleServicePort") + BlockManagerId(executorId, shuffleServerHostName, externalShuffleServicePort) } else { blockManagerId } diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index be410f18b5cfc..5b7bb6cc612c5 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -189,7 +189,7 @@ from the other deployment modes. See the [configuration page](configuration.html The namespace that will be used for running the driver and executor pods. When using spark-submit in cluster mode, this can also be passed to spark-submit via the - --kubernetes-namespace command line argument. The namespace must already exist. + --kubernetes-namespace command line argument. @@ -208,6 +208,37 @@ from the other deployment modes. See the [configuration page](configuration.html Docker tag format. + + spark.kubernetes.shuffle.namespace + default + + Namespace in which the shuffle service pods are present. The shuffle service must be + created in the cluster prior to attempts to use it. + + + + spark.kubernetes.shuffle.labels + (none) + + Labels that will be used to look up shuffle service pods. This should be a comma-separated list of label key-value pairs, + where each label is in the format key=value. The labels chosen must be such that + they match exactly one shuffle service pod on each node that executors are launched. + + + + spark.kubernetes.allocation.batch.size + 5 + + Number of pods to launch at once in each round of executor pod allocation. + + + + spark.kubernetes.allocation.batch.delay + 1 + + Number of seconds to wait between each round of executor pod allocation. + + spark.kubernetes.authenticate.submission.caCertFile (none) @@ -389,10 +420,41 @@ from the other deployment modes. See the [configuration page](configuration.html +## Dynamic Executor Scaling + +Spark on Kubernetes supports Dynamic Allocation with cluster mode. This mode requires running +an external shuffle service. This is typically a [daemonset](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) +with a provisioned [hostpath](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) volume. +This shuffle service may be shared by executors belonging to different SparkJobs. Using Spark with dynamic allocation +on Kubernetes assumes that a cluster administrator has set up one or more shuffle-service daemonsets in the cluster. + +A sample configuration file is provided in `conf/kubernetes-shuffle-service.yaml` which can be customized as needed +for a particular cluster. It is important to note that `spec.template.metadata.labels` are setup appropriately for the shuffle +service because there may be multiple shuffle service instances running in a cluster. The labels give us a way to target a particular +shuffle service. + +For example, if the shuffle service we want to use is in the default namespace, and +has pods with labels `app=spark-shuffle-service` and `spark-version=2.1.0`, we can +use those tags to target that particular shuffle service at job launch time. In order to run a job with dynamic allocation enabled, +the command may then look like the following: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.GroupByTest \ + --master k8s://: \ + --kubernetes-namespace default \ + --conf spark.app.name=group-by-test \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:latest \ + --conf spark.dynamicAllocation.enabled=true \ + --conf spark.shuffle.service.enabled=true \ + --conf spark.kubernetes.shuffle.namespace=default \ + --conf spark.kubernetes.shuffle.labels="app=spark-shuffle-service,spark-version=2.1.0" \ + examples/jars/spark_examples_2.11-2.2.0.jar 10 400000 2 + ## Current Limitations Running Spark on Kubernetes is currently an experimental feature. Some restrictions on the current implementation that should be lifted in the future include: -* Applications can only use a fixed number of executors. Dynamic allocation is not supported. * Applications can only run in cluster mode. * Only Scala and Java applications can be run. diff --git a/project/SparkBuild.scala b/project/SparkBuild.scala index 4572b8dc67ffe..b541bfc4ae5d6 100644 --- a/project/SparkBuild.scala +++ b/project/SparkBuild.scala @@ -55,7 +55,7 @@ object BuildCommons { "core", "graphx", "mllib", "mllib-local", "repl", "network-common", "network-shuffle", "launcher", "unsafe", "tags", "sketch" ).map(ProjectRef(buildLocation, _)) ++ sqlProjects ++ streamingProjects - + val optionallyEnabledProjects@Seq(mesos, yarn, java8Tests, sparkGangliaLgpl, streamingKinesisAsl, dockerIntegrationTests, kubernetes, _*) = Seq("mesos", "yarn", "java8-tests", "ganglia-lgpl", "streaming-kinesis-asl", "docker-integration-tests", diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index 734c29947b6d9..fd1ad29eb795d 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -14,10 +14,10 @@ important matters to keep in mind when developing this feature. # Building Spark with Kubernetes Support -To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. +To build Spark with Kubernetes support, use the `kubernetes` profile when invoking Maven. For example, to simply compile +the Kubernetes core implementation module along with its dependencies: - git checkout branch-2.1-kubernetes - build/mvn package -Pkubernetes -DskipTests + build/mvn compile -Pkubernetes -pl resource-managers/kubernetes/core -am -DskipTests To build a distribution of Spark with Kubernetes support, use the `dev/make-distribution.sh` script, and add the `kubernetes` profile as part of the build arguments. Any other build arguments can be specified as one would expect when diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala new file mode 100644 index 0000000000000..f3bd598556019 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes + +import org.apache.spark.SparkException + +object ConfigurationUtils { + def parseKeyValuePairs( + maybeKeyValues: Option[String], + configKey: String, + keyValueType: String): Map[String, String] = { + + maybeKeyValues.map(keyValues => { + keyValues.split(",").map(_.trim).filterNot(_.isEmpty).map(keyValue => { + keyValue.split("=", 2).toSeq match { + case Seq(k, v) => + (k, v) + case _ => + throw new SparkException(s"Custom $keyValueType set by $configKey must be a" + + s" comma-separated list of key-value pairs, with format =." + + s" Got value: $keyValue. All values: $keyValues") + } + }).toMap + }).getOrElse(Map.empty[String, String]) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index e379b40e376fc..09b2d38cb8e38 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -157,6 +157,13 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val SPARK_SHUFFLE_SERVICE_HOST = + ConfigBuilder("spark.shuffle.service.host") + .doc("Host for Spark Shuffle Service") + .internal() + .stringConf + .createOptional + // Note that while we set a default for this when we start up the // scheduler, the specific default value is dynamically determined // based on the executor memory. @@ -270,6 +277,44 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_SHUFFLE_NAMESPACE = + ConfigBuilder("spark.kubernetes.shuffle.namespace") + .doc("Namespace of the shuffle service") + .stringConf + .createWithDefault("default") + + private[spark] val KUBERNETES_SHUFFLE_SVC_IP = + ConfigBuilder("spark.kubernetes.shuffle.ip") + .doc("This setting is for debugging only. Setting this " + + "allows overriding the IP that the executor thinks its colocated " + + "shuffle service is on") + .stringConf + .createOptional + + private[spark] val KUBERNETES_SHUFFLE_LABELS = + ConfigBuilder("spark.kubernetes.shuffle.labels") + .doc("Labels to identify the shuffle service") + .stringConf + .createOptional + + private[spark] val KUBERNETES_SHUFFLE_DIR = + ConfigBuilder("spark.kubernetes.shuffle.dir") + .doc("Path to the shared shuffle directories.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_ALLOCATION_BATCH_SIZE = + ConfigBuilder("spark.kubernetes.allocation.batch.size") + .doc("Number of pods to launch at once in each round of dynamic allocation. ") + .intConf + .createWithDefault(5) + + private[spark] val KUBERNETES_ALLOCATION_BATCH_DELAY = + ConfigBuilder("spark.kubernetes.allocation.batch.delay") + .doc("Number of seconds to wait between each round of executor allocation. ") + .longConf + .createWithDefault(1) + private[spark] val DRIVER_SERVICE_MANAGER_TYPE = ConfigBuilder("spark.kubernetes.driver.serviceManagerType") .doc("A tag indicating which class to use for creating the Kubernetes service and" + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index f82cb88b4c622..27e47eb61933f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -113,4 +113,5 @@ package object constants { s"$INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH/$INIT_CONTAINER_PROPERTIES_FILE_NAME" private[spark] val DOWNLOAD_JARS_VOLUME_NAME = "download-jars" private[spark] val DOWNLOAD_FILES_VOLUME_NAME = "download-files" + private[spark] val DEFAULT_SHUFFLE_MOUNT_NAME = "shuffle" } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index a2294a6766980..669a073b1fab6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -17,22 +17,25 @@ package org.apache.spark.scheduler.cluster.kubernetes import java.io.Closeable +import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} import scala.collection.JavaConverters._ import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} -import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, - EnvVarSourceBuilder, Pod, QuantityBuilder} +import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action +import org.apache.commons.io.FilenameUtils -import org.apache.spark.{SparkContext, SparkException} +import org.apache.spark.{SparkContext, SparkEnv, SparkException} +import org.apache.spark.deploy.kubernetes.ConfigurationUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.rpc.RpcEndpointAddress +import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} import org.apache.spark.scheduler.TaskSchedulerImpl +import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RetrieveSparkAppConfig, SparkAppConfig} import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend import org.apache.spark.util.{ThreadUtils, Utils} @@ -49,6 +52,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_PODS_BY_IPS_LOCK = new Object private val executorPodsByIPs = new mutable.HashMap[String, Pod] // Indexed by executor IP addrs. + private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) @@ -88,6 +92,28 @@ private[spark] class KubernetesClusterSchedulerBackend( throw new SparkException(s"Executor cannot find driver pod", throwable) } + private val shuffleServiceConfig: Option[ShuffleServiceConfig] = + if (Utils.isDynamicAllocationEnabled(sc.conf)) { + val shuffleNamespace = conf.get(KUBERNETES_SHUFFLE_NAMESPACE) + val parsedShuffleLabels = ConfigurationUtils.parseKeyValuePairs( + conf.get(KUBERNETES_SHUFFLE_LABELS), KUBERNETES_SHUFFLE_LABELS.key, + "shuffle-labels") + if (parsedShuffleLabels.size == 0) { + throw new SparkException(s"Dynamic allocation enabled " + + s"but no ${KUBERNETES_SHUFFLE_LABELS.key} specified") + } + + val shuffleDirs = conf.get(KUBERNETES_SHUFFLE_DIR).map { + _.split(",") + }.getOrElse(Utils.getConfiguredLocalDirs(conf)) + Some( + ShuffleServiceConfig(shuffleNamespace, + parsedShuffleLabels, + shuffleDirs)) + } else { + None + } + override val minRegisteredRatio = if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) { 0.8 @@ -105,6 +131,38 @@ private[spark] class KubernetesClusterSchedulerBackend( private val initialExecutors = getInitialTargetExecutorNumber(1) + private val podAllocationInterval = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY) + require(podAllocationInterval > 0, s"Allocation batch delay " + + s"${KUBERNETES_ALLOCATION_BATCH_DELAY} " + + s"is ${podAllocationInterval}, should be a positive integer") + + private val podAllocationSize = conf.get(KUBERNETES_ALLOCATION_BATCH_SIZE) + require(podAllocationSize > 0, s"Allocation batch size " + + s"${KUBERNETES_ALLOCATION_BATCH_SIZE} " + + s"is ${podAllocationSize}, should be a positive integer") + + private val allocator = ThreadUtils + .newDaemonSingleThreadScheduledExecutor("kubernetes-pod-allocator") + + private val allocatorRunnable: Runnable = new Runnable { + override def run(): Unit = { + if (totalRegisteredExecutors.get() < runningExecutorPods.size) { + logDebug("Waiting for pending executors before scaling") + } else if (totalExpectedExecutors.get() <= runningExecutorPods.size) { + logDebug("Maximum allowed executor limit reached. Not scaling up further.") + } else { + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + for (i <- 0 until math.min( + totalExpectedExecutors.get - runningExecutorPods.size, podAllocationSize)) { + runningExecutorPods += allocateNewExecutorPod() + logInfo( + s"Requesting a new executor, total executors is now ${runningExecutorPods.size}") + } + } + } + } + } + private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = { if (Utils.isDynamicAllocationEnabled(conf)) { val minNumExecutors = conf.getInt("spark.dynamicAllocation.minExecutors", 0) @@ -118,6 +176,7 @@ private[spark] class KubernetesClusterSchedulerBackend( } else { conf.getInt("spark.executor.instances", defaultNumExecutors) } + } override def applicationId(): String = conf.get("spark.app.id", super.applicationId()) @@ -130,12 +189,25 @@ private[spark] class KubernetesClusterSchedulerBackend( super.start() executorWatchResource.set(kubernetesClient.pods().withLabel(SPARK_APP_ID_LABEL, applicationId()) .watch(new ExecutorPodsWatcher())) + + allocator.scheduleWithFixedDelay( + allocatorRunnable, 0, podAllocationInterval, TimeUnit.SECONDS) + if (!Utils.isDynamicAllocationEnabled(sc.conf)) { doRequestTotalExecutors(initialExecutors) + } else { + shufflePodCache = shuffleServiceConfig + .map { config => new ShufflePodCache( + kubernetesClient, config.shuffleNamespace, config.shuffleLabels) } + shufflePodCache.foreach(_.start()) } } override def stop(): Unit = { + // stop allocation of new resources and caches. + allocator.shutdown() + shufflePodCache.foreach(_.stop()) + // send stop message to executors so they shut down cleanly super.stop() @@ -214,37 +286,60 @@ private[spark] class KubernetesClusterSchedulerBackend( .withContainerPort(port._2) .build() }) + + val basePodBuilder = new PodBuilder() + .withNewMetadata() + .withName(name) + .withLabels(selectors) + .withOwnerReferences() + .addNewOwnerReference() + .withController(true) + .withApiVersion(driverPod.getApiVersion) + .withKind(driverPod.getKind) + .withName(driverPod.getMetadata.getName) + .withUid(driverPod.getMetadata.getUid) + .endOwnerReference() + .endMetadata() + .withNewSpec() + .withHostname(hostname) + .addNewContainer() + .withName(s"executor") + .withImage(executorDockerImage) + .withImagePullPolicy("IfNotPresent") + .withNewResources() + .addToRequests("memory", executorMemoryQuantity) + .addToLimits("memory", executorMemoryLimitQuantity) + .addToRequests("cpu", executorCpuQuantity) + .addToLimits("cpu", executorCpuQuantity) + .endResources() + .withEnv(requiredEnv.asJava) + .withPorts(requiredPorts.asJava) + .endContainer() + .endSpec() + + val resolvedPodBuilder = shuffleServiceConfig + .map { config => + config.shuffleDirs.foldLeft(basePodBuilder) { (builder, dir) => + builder + .editSpec() + .addNewVolume() + .withName(FilenameUtils.getBaseName(dir)) + .withNewHostPath() + .withPath(dir) + .endHostPath() + .endVolume() + .editFirstContainer() + .addNewVolumeMount() + .withName(FilenameUtils.getBaseName(dir)) + .withMountPath(dir) + .endVolumeMount() + .endContainer() + .endSpec() + } + }.getOrElse(basePodBuilder) + try { - (executorId, kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(name) - .withLabels(selectors) - .withOwnerReferences() - .addNewOwnerReference() - .withController(true) - .withApiVersion(driverPod.getApiVersion) - .withKind(driverPod.getKind) - .withName(driverPod.getMetadata.getName) - .withUid(driverPod.getMetadata.getUid) - .endOwnerReference() - .endMetadata() - .withNewSpec() - .withHostname(hostname) - .addNewContainer() - .withName(s"executor") - .withImage(executorDockerImage) - .withImagePullPolicy("IfNotPresent") - .withNewResources() - .addToRequests("memory", executorMemoryQuantity) - .addToLimits("memory", executorMemoryLimitQuantity) - .addToRequests("cpu", executorCpuQuantity) - .addToLimits("cpu", executorCpuQuantity) - .endResources() - .withEnv(requiredEnv.asJava) - .withPorts(requiredPorts.asJava) - .endContainer() - .endSpec() - .done()) + (executorId, kubernetesClient.pods().create(resolvedPodBuilder.build())) } catch { case throwable: Throwable => logError("Failed to allocate executor pod.", throwable) @@ -252,18 +347,13 @@ private[spark] class KubernetesClusterSchedulerBackend( } } + override def createDriverEndpoint( + properties: Seq[(String, String)]): DriverEndpoint = { + new KubernetesDriverEndpoint(rpcEnv, properties) + } + override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { - RUNNING_EXECUTOR_PODS_LOCK.synchronized { - if (requestedTotal > totalExpectedExecutors.get) { - logInfo(s"Requesting ${requestedTotal - totalExpectedExecutors.get}" - + s" additional executors, expecting total $requestedTotal and currently" + - s" expected ${totalExpectedExecutors.get}") - for (i <- 0 until (requestedTotal - totalExpectedExecutors.get)) { - runningExecutorPods += allocateNewExecutorPod() - } - } - totalExpectedExecutors.set(requestedTotal) - } + totalExpectedExecutors.set(requestedTotal) true } @@ -313,6 +403,50 @@ private[spark] class KubernetesClusterSchedulerBackend( logDebug("Executor pod watch closed.", cause) } } + + private class KubernetesDriverEndpoint( + rpcEnv: RpcEnv, + sparkProperties: Seq[(String, String)]) + extends DriverEndpoint(rpcEnv, sparkProperties) { + override def receiveAndReply( + context: RpcCallContext): PartialFunction[Any, Unit] = { + new PartialFunction[Any, Unit]() { + override def isDefinedAt(msg: Any): Boolean = { + msg match { + case RetrieveSparkAppConfig(executorId) => + Utils.isDynamicAllocationEnabled(sc.conf) + case _ => false + } + } + + override def apply(msg: Any): Unit = { + msg match { + case RetrieveSparkAppConfig(executorId) => + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + var resolvedProperties = sparkProperties + val runningExecutorPod = kubernetesClient + .pods() + .withName(runningExecutorPods(executorId).getMetadata.getName) + .get() + val nodeName = runningExecutorPod.getSpec.getNodeName + val shufflePodIp = shufflePodCache.get.getShufflePodForExecutor(nodeName) + resolvedProperties = resolvedProperties ++ Seq( + (SPARK_SHUFFLE_SERVICE_HOST.key, shufflePodIp)) + + val reply = SparkAppConfig( + resolvedProperties, + SparkEnv.get.securityManager.getIOEncryptionKey()) + context.reply(reply) + } + } + } + }.orElse(super.receiveAndReply(context)) + } + } + + case class ShuffleServiceConfig(shuffleNamespace: String, + shuffleLabels: Map[String, String], + shuffleDirs: Seq[String]) } private object KubernetesClusterSchedulerBackend { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala new file mode 100644 index 0000000000000..53b4e745ce7c7 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala @@ -0,0 +1,91 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.scheduler.cluster.kubernetes + +import scala.collection.JavaConverters._ + +import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import io.fabric8.kubernetes.client.internal.readiness.Readiness + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging + +private[spark] class ShufflePodCache ( + client: KubernetesClient, + dsNamespace: String, + dsLabels: Map[String, String]) extends Logging { + + private var shufflePodCache = scala.collection.mutable.Map[String, String]() + private var watcher: Watch = _ + + def start(): Unit = { + // seed the initial cache. + val pods = client.pods().withLabels(dsLabels.asJava).list() + pods.getItems.asScala.foreach { + pod => + if (Readiness.isReady(pod)) { + addShufflePodToCache(pod) + } else { + logWarning(s"Found unready shuffle pod ${pod.getMetadata.getName} " + + s"on node ${pod.getSpec.getNodeName}") + } + } + + watcher = client + .pods() + .withLabels(dsLabels.asJava) + .watch(new Watcher[Pod] { + override def eventReceived(action: Watcher.Action, p: Pod): Unit = { + action match { + case Action.DELETED | Action.ERROR => + shufflePodCache.remove(p.getSpec.getNodeName) + case Action.ADDED | Action.MODIFIED if Readiness.isReady(p) => + addShufflePodToCache(p) + } + } + override def onClose(e: KubernetesClientException): Unit = {} + }) + } + + private def addShufflePodToCache(pod: Pod): Unit = { + if (shufflePodCache.contains(pod.getSpec.getNodeName)) { + val registeredPodName = shufflePodCache.get(pod.getSpec.getNodeName).get + logError(s"Ambiguous specification of shuffle service pod. " + + s"Found multiple matching pods: ${pod.getMetadata.getName}, " + + s"${registeredPodName} on ${pod.getSpec.getNodeName}") + + throw new SparkException(s"Ambiguous specification of shuffle service pod. " + + s"Found multiple matching pods: ${pod.getMetadata.getName}, " + + s"${registeredPodName} on ${pod.getSpec.getNodeName}") + } else { + shufflePodCache(pod.getSpec.getNodeName) = pod.getStatus.getPodIP + } + } + + def stop(): Unit = { + watcher.close() + } + + def getShufflePodForExecutor(executorNode: String): String = { + shufflePodCache.get(executorNode) + .getOrElse(throw new SparkException(s"Unable to find shuffle pod on node $executorNode")) + } +} + diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile new file mode 100644 index 0000000000000..630d3408519ac --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -0,0 +1,39 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-shuffle:latest -f dockerfiles/shuffle/Dockerfile . + +RUN apk upgrade --update +RUN apk add --update bash +RUN mkdir -p /opt/spark +RUN touch /opt/spark/RELEASE + +ADD jars /opt/spark/jars +ADD examples /opt/spark/examples +ADD bin /opt/spark/bin +ADD sbin /opt/spark/sbin +ADD conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark + +CMD ["/bin/sh","-c","/opt/spark/bin/spark-class org.apache.spark.deploy.ExternalShuffleService 1"] \ No newline at end of file diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/GroupByTest.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/GroupByTest.scala new file mode 100644 index 0000000000000..fe47d42485b24 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/GroupByTest.scala @@ -0,0 +1,54 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest.jobs + +import java.util.Random + +import org.apache.spark.sql.SparkSession + +object GroupByTest { + def main(args: Array[String]) { + val spark = SparkSession + .builder + .appName("GroupBy Test") + .getOrCreate() + + val numMappers = if (args.length > 0) args(0).toInt else 5 + val numKVPairs = if (args.length > 1) args(1).toInt else 200000 + val valSize = if (args.length > 2) args(2).toInt else 2 + val numReducers = if (args.length > 3) args(3).toInt else numMappers + + val pairs1 = spark.sparkContext.parallelize(0 until numMappers, numMappers).flatMap { p => + val ranGen = new Random + val arr1 = new Array[(Int, Array[Byte])](numKVPairs) + for (i <- 0 until numKVPairs) { + val byteArr = new Array[Byte](valSize) + ranGen.nextBytes(byteArr) + arr1(i) = (ranGen.nextInt(Int.MaxValue), byteArr) + } + arr1 + }.cache() + // Enforce that everything has been calculated and in cache + pairs1.count() + + // scalastyle:off println + println("The Result is", pairs1.groupByKey(numReducers).count()) + // scalastyle:on println + spark.stop() + } +} + diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index bd5ff7a005d46..56fcf692b8ff7 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -67,4 +67,8 @@ private[spark] object KubernetesSuite { ".integrationtest.jobs.SparkPiWithInfiniteWait" val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.FileExistenceTest" + val GROUP_BY_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + + ".integrationtest.jobs.GroupByTest" + + case class ShuffleNotReadyException() extends Exception } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index 8fa7cbd52ee83..ae02de7937c6a 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -18,6 +18,10 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.UUID +import scala.collection.JavaConverters._ + +import com.google.common.collect.ImmutableList +import io.fabric8.kubernetes.client.internal.readiness.Readiness import org.scalatest.{BeforeAndAfter, DoNotDiscover} import org.scalatest.concurrent.Eventually @@ -27,7 +31,10 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND +import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 +import org.apache.spark.deploy.kubernetes.submit.v1.Client import org.apache.spark.deploy.kubernetes.submit.v2.{MountedDependencyManagerProviderImpl, SubmissionKubernetesClientProviderImpl} +import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} @DoNotDiscover private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) @@ -60,7 +67,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) assume(testBackend.name == MINIKUBE_TEST_BACKEND) launchStagingServer(SSLOptions()) - runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } test("Enable SSL on the submission server") { @@ -84,7 +91,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) keyStorePassword = Some("keyStore"), keyPassword = Some("key"), trustStorePassword = Some("trustStore"))) - runSparkAppAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } test("Use container-local resources without the resource staging server") { @@ -93,7 +100,22 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) sparkConf.setJars(Seq( KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - runSparkAppAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) + runSparkPiAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Dynamic executor scaling basic test") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + launchStagingServer(SSLOptions()) + createShuffleServiceDaemonSet() + + sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf.set("spark.dynamicAllocation.enabled", "true") + sparkConf.set("spark.shuffle.service.enabled", "true") + sparkConf.set("spark.kubernetes.shuffle.labels", "app=spark-shuffle-service") + sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) + sparkConf.set("spark.app.name", "group-by-test") + runSparkGroupByTestAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { @@ -111,7 +133,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) s"${Minikube.getMinikubeIp}:$resourceStagingServerPort") } - private def runSparkAppAndVerifyCompletion(appResource: String): Unit = { + private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( sparkConf = sparkConf, mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, @@ -136,4 +158,73 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) .contains("Pi is roughly 3"), "The application did not compute the value of pi.") } } + + private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { + val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( + sparkConf = sparkConf, + mainClass = KubernetesSuite.GROUP_BY_MAIN_CLASS, + appArgs = Array.empty[String], + mainAppResource = appResource, + kubernetesClientProvider = + new SubmissionKubernetesClientProviderImpl(sparkConf), + mountedDependencyManagerProvider = + new MountedDependencyManagerProviderImpl(sparkConf)) + client.run() + val driverPod = kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", APP_LOCATOR_LABEL) + .list() + .getItems + .get(0) + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains("The Result is"), "The application did not complete.") + } + } + + private def createShuffleServiceDaemonSet(): Unit = { + val ds = kubernetesTestComponents.kubernetesClient.extensions().daemonSets() + .createNew() + .withNewMetadata() + .withName("shuffle") + .endMetadata() + .withNewSpec() + .withNewTemplate() + .withNewMetadata() + .withLabels(Map("app" -> "spark-shuffle-service").asJava) + .endMetadata() + .withNewSpec() + .addNewVolume() + .withName("shuffle-dir") + .withNewHostPath() + .withPath("/tmp") + .endHostPath() + .endVolume() + .addNewContainer() + .withName("shuffle") + .withImage("spark-shuffle:latest") + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName("shuffle-dir") + .withMountPath("/tmp") + .endVolumeMount() + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .done() + + // wait for daemonset to become available. + Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { + val pods = kubernetesTestComponents.kubernetesClient.pods() + .withLabel("app", "spark-shuffle-service").list().getItems() + + if (pods.size() == 0 || Readiness.isReady(pods.get(0))) { + throw KubernetesSuite.ShuffleNotReadyException() + } + } + } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index d807c4d81009b..52b8c7d7359a6 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -31,6 +31,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, private val DRIVER_V1_DOCKER_FILE = "dockerfiles/driver/Dockerfile" private val DRIVER_V2_DOCKER_FILE = "dockerfiles/driver-v2/Dockerfile" private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" + private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" private val DRIVER_INIT_DOCKER_FILE = "dockerfiles/driver-init/Dockerfile" private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) @@ -60,6 +61,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } buildImage("spark-driver", DRIVER_V1_DOCKER_FILE) buildImage("spark-executor", EXECUTOR_DOCKER_FILE) + buildImage("spark-shuffle", SHUFFLE_SERVICE_DOCKER_FILE) buildImage("spark-driver-v2", DRIVER_V2_DOCKER_FILE) buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) buildImage("spark-driver-init", DRIVER_INIT_DOCKER_FILE) From 30597f6e3b2c27766ec465cbb5a06eef25dcac2b Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 17 May 2017 11:55:23 -0700 Subject: [PATCH 105/225] Download remotely-located resources on driver and executor startup via init-container (#251) * Download remotely-located resources on driver startup. Use init-container in executors. * FIx owner reference slightly * Clean up config * Don't rely too heavily on conventions that can change * Fix flaky test * Tidy up file resolver * Whitespace arrangement * Indentation change * Fix more indentation * Consolidate init container component providers * Minor method signature and comment changes * Rename class for consistency * Resolve conflicts * Fix flaky test * Add some tests and some refactoring. * Make naming consistent for Staged -> Submitted * Add unit test for the submission client. * Refine expectations * Rename variables and fix typos * Address more comments. Remove redundant SingleKeyConfigMap. * Minor test adjustments. * add another test * Fix conflicts. --- ...nerResourceStagingServerSecretPlugin.scala | 62 ++ .../SparkPodInitContainerBootstrap.scala | 103 ++++ .../spark/deploy/kubernetes/config.scala | 117 +++- .../spark/deploy/kubernetes/constants.scala | 37 +- .../submit}/KubernetesFileUtils.scala | 5 +- .../deploy/kubernetes/submit/v1/Client.scala | 3 +- ...DriverSubmitSslConfigurationProvider.scala | 3 +- .../deploy/kubernetes/submit/v2/Client.scala | 188 +++--- .../v2/ContainerLocalizedFilesResolver.scala | 68 +++ ...riverInitContainerComponentsProvider.scala | 155 +++++ .../ExecutorInitContainerConfiguration.scala | 47 ++ .../submit/v2/InitContainerUtil.scala | 49 ++ .../v2/MountedDependencyManagerImpl.scala | 324 ----------- .../v2/MountedDependencyManagerProvider.scala | 58 -- ...opertiesConfigMapFromScalaMapBuilder.scala | 48 ++ .../SparkInitContainerConfigMapBuilder.scala | 69 +++ ...dDependencyInitContainerConfigPlugin.scala | 69 +++ .../v2/SubmittedDependencySecretBuilder.scala | 66 +++ .../v2/SubmittedDependencyUploaderImpl.scala | 116 ++++ .../submit/v2/SubmittedResources.scala} | 17 +- .../v1/KubernetesSparkRestServer.scala | 1 + ...SparkDependencyDownloadInitContainer.scala | 181 ++++-- .../v2/ResourceStagingService.scala | 3 +- .../v2/ResourceStagingServiceImpl.scala | 5 +- .../v2/ResourceStagingServiceRetrofit.scala | 4 +- .../kubernetes/KubernetesClusterManager.scala | 49 +- .../KubernetesClusterSchedulerBackend.scala | 35 +- .../SparkPodInitContainerBootstrapSuite.scala | 164 ++++++ ...dencyInitContainerVolumesPluginSuite.scala | 60 ++ .../kubernetes/submit/v2/ClientV2Suite.scala | 542 +++++++++--------- ...ContainerLocalizedFilesResolverSuite.scala | 69 +++ ...cutorInitContainerConfigurationSuite.scala | 56 ++ .../v2/MountedDependencyManagerSuite.scala | 323 ----------- ...rkInitContainerConfigMapBuilderSuite.scala | 101 ++++ ...ndencyInitContainerConfigPluginSuite.scala | 83 +++ ...ubmittedDependencySecretBuilderSuite.scala | 83 +++ .../v2/SubmittedDependencyUploaderSuite.scala | 177 ++++++ ...DependencyDownloadInitContainerSuite.scala | 70 ++- .../src/main/docker/executor/Dockerfile | 5 +- .../kubernetes/integration-tests/pom.xml | 22 + .../integration-test-asset-server/Dockerfile | 21 + .../integrationtest/KubernetesV2Suite.scala | 57 +- .../ResourceStagingServerLauncher.scala | 30 +- .../SparkReadinessWatcher.scala | 41 ++ .../StaticAssetServerLauncher.scala | 64 +++ .../docker/SparkDockerImageBuilder.scala | 3 + 46 files changed, 2620 insertions(+), 1233 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes/v1 => kubernetes/submit}/KubernetesFileUtils.scala (88%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/{rest/kubernetes/v2/StagedResourceIdentifier.scala => kubernetes/submit/v2/SubmittedResources.scala} (51%) create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/main/docker/integration-test-asset-server/Dockerfile create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala create mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/StaticAssetServerLauncher.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala new file mode 100644 index 0000000000000..45b881a8a3737 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala @@ -0,0 +1,62 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, Secret} + +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] trait InitContainerResourceStagingServerSecretPlugin { + + /** + * Configure the init-container to mount the secret files that allow it to retrieve dependencies + * from a resource staging server. + */ + def mountResourceStagingServerSecretIntoInitContainer( + initContainer: ContainerBuilder): ContainerBuilder + + /** + * Configure the pod to attach a Secret volume which hosts secret files allowing the + * init-container to retrieve dependencies from the resource staging server. + */ + def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder): PodBuilder +} + +private[spark] class InitContainerResourceStagingServerSecretPluginImpl( + initContainerSecretName: String, + initContainerSecretMountPath: String) + extends InitContainerResourceStagingServerSecretPlugin { + + override def mountResourceStagingServerSecretIntoInitContainer( + initContainer: ContainerBuilder): ContainerBuilder = { + initContainer.addNewVolumeMount() + .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) + .withMountPath(initContainerSecretMountPath) + .endVolumeMount() + } + + override def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder): PodBuilder = { + basePod.editSpec() + .addNewVolume() + .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) + .withNewSecret() + .withSecretName(initContainerSecretName) + .endSecret() + .endVolume() + .endSpec() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala new file mode 100644 index 0000000000000..227420db4636d --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EmptyDirVolumeSource, PodBuilder, VolumeMount, VolumeMountBuilder} + +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.v2.{ContainerNameEqualityPredicate, InitContainerUtil} + +private[spark] trait SparkPodInitContainerBootstrap { + /** + * Bootstraps an init-container that downloads dependencies to be used by a main container. + * Note that this primarily assumes that the init-container's configuration is being provided + * by a ConfigMap that was installed by some other component; that is, the implementation + * here makes no assumptions about how the init-container is specifically configured. For + * example, this class is unaware if the init-container is fetching remote dependencies or if + * it is fetching dependencies from a resource staging server. + */ + def bootstrapInitContainerAndVolumes( + mainContainerName: String, originalPodSpec: PodBuilder): PodBuilder +} + +private[spark] class SparkPodInitContainerBootstrapImpl( + initContainerImage: String, + jarsDownloadPath: String, + filesDownloadPath: String, + downloadTimeoutMinutes: Long, + initContainerConfigMapName: String, + initContainerConfigMapKey: String, + resourceStagingServerSecretPlugin: Option[InitContainerResourceStagingServerSecretPlugin]) + extends SparkPodInitContainerBootstrap { + + override def bootstrapInitContainerAndVolumes( + mainContainerName: String, + originalPodSpec: PodBuilder): PodBuilder = { + val sharedVolumeMounts = Seq[VolumeMount]( + new VolumeMountBuilder() + .withName(INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME) + .withMountPath(jarsDownloadPath) + .build(), + new VolumeMountBuilder() + .withName(INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) + .withMountPath(filesDownloadPath) + .build()) + + val initContainer = new ContainerBuilder() + .withName(s"spark-init") + .withImage(initContainerImage) + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + .withMountPath(INIT_CONTAINER_PROPERTIES_FILE_DIR) + .endVolumeMount() + .addToVolumeMounts(sharedVolumeMounts: _*) + .addToArgs(INIT_CONTAINER_PROPERTIES_FILE_PATH) + val resolvedInitContainer = resourceStagingServerSecretPlugin.map { plugin => + plugin.mountResourceStagingServerSecretIntoInitContainer(initContainer) + }.getOrElse(initContainer).build() + val podWithBasicVolumes = InitContainerUtil.appendInitContainer( + originalPodSpec, resolvedInitContainer) + .editSpec() + .addNewVolume() + .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + .withNewConfigMap() + .withName(initContainerConfigMapName) + .addNewItem() + .withKey(initContainerConfigMapKey) + .withPath(INIT_CONTAINER_PROPERTIES_FILE_NAME) + .endItem() + .endConfigMap() + .endVolume() + .addNewVolume() + .withName(INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME) + .withEmptyDir(new EmptyDirVolumeSource()) + .endVolume() + .addNewVolume() + .withName(INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) + .withEmptyDir(new EmptyDirVolumeSource()) + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) + .addToVolumeMounts(sharedVolumeMounts: _*) + .endContainer() + .endSpec() + resourceStagingServerSecretPlugin.map { plugin => + plugin.addResourceStagingServerSecretVolumeToPod(podWithBasicVolumes) + }.getOrElse(podWithBasicVolumes) + } + +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 09b2d38cb8e38..f0a39fe359227 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -349,42 +349,43 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val RESOURCE_STAGING_SERVER_SSL_NAMESPACE = "kubernetes.resourceStagingServer" private[spark] val RESOURCE_STAGING_SERVER_CERT_PEM = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.serverCertPem") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.serverCertPem") .doc("Certificate PEM file to use when having the Kubernetes dependency server" + " listen on TLS.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyStorePasswordFile") .doc("File containing the keystore password for the Kubernetes dependency server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyPasswordFile") .doc("File containing the key password for the Kubernetes dependency server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_SSL_ENABLED = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.enabled") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.enabled") .doc("Whether or not to use SSL when communicating with the dependency server.") .booleanConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStore") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStore") .doc("File containing the trustStore to communicate with the Kubernetes dependency server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStorePassword") .doc("Password for the trustStore for talking to the dependency server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE = - ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.trustStoreType") + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStoreType") .doc("Type of trustStore for communicating with the dependency server.") .stringConf .createOptional @@ -397,64 +398,120 @@ package object config extends Logging { .createOptional private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER = - ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadJarsResourceIdentifier") + ConfigBuilder("spark.kubernetes.initcontainer.downloadJarsResourceIdentifier") .doc("Identifier for the jars tarball that was uploaded to the staging service.") .internal() .stringConf .createOptional private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION = - ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadJarsSecretLocation") + ConfigBuilder("spark.kubernetes.initcontainer.downloadJarsSecretLocation") .doc("Location of the application secret to use when the init-container contacts the" + " resource staging server to download jars.") .internal() .stringConf - .createWithDefault(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH) + .createWithDefault(s"$INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH/" + + s"$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY") private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER = - ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadFilesResourceIdentifier") + ConfigBuilder("spark.kubernetes.initcontainer.downloadFilesResourceIdentifier") .doc("Identifier for the files tarball that was uploaded to the staging service.") .internal() .stringConf .createOptional private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION = - ConfigBuilder("spark.kubernetes.driver.initcontainer.downloadFilesSecretLocation") + ConfigBuilder("spark.kubernetes.initcontainer.downloadFilesSecretLocation") .doc("Location of the application secret to use when the init-container contacts the" + " resource staging server to download files.") .internal() .stringConf - .createWithDefault(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH) + .createWithDefault( + s"$INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY") + + private[spark] val INIT_CONTAINER_REMOTE_JARS = + ConfigBuilder("spark.kubernetes.initcontainer.remoteJars") + .doc("Comma-separated list of jar URIs to download in the init-container. This is" + + " calculated from spark.jars.") + .internal() + .stringConf + .createOptional + + private[spark] val INIT_CONTAINER_REMOTE_FILES = + ConfigBuilder("spark.kubernetes.initcontainer.remoteFiles") + .doc("Comma-separated list of file URIs to download in the init-container. This is" + + " calculated from spark.files.") + .internal() + .stringConf + .createOptional private[spark] val INIT_CONTAINER_DOCKER_IMAGE = - ConfigBuilder("spark.kubernetes.driver.initcontainer.docker.image") - .doc("Image for the driver's init-container that downloads mounted dependencies.") + ConfigBuilder("spark.kubernetes.initcontainer.docker.image") + .doc("Image for the driver and executor's init-container that downloads dependencies.") .stringConf - .createWithDefault(s"spark-driver-init:$sparkVersion") + .createWithDefault(s"spark-init:$sparkVersion") - private[spark] val DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION = - ConfigBuilder("spark.kubernetes.driver.mountdependencies.jarsDownloadDir") - .doc("Location to download local jars to in the driver. When using spark-submit, this" + - " directory must be empty and will be mounted as an empty directory volume on the" + - " driver pod.") + private[spark] val INIT_CONTAINER_JARS_DOWNLOAD_LOCATION = + ConfigBuilder("spark.kubernetes.mountdependencies.jarsDownloadDir") + .doc("Location to download jars to in the driver and executors. When using" + + " spark-submit, this directory must be empty and will be mounted as an empty directory" + + " volume on the driver and executor pod.") .stringConf - .createWithDefault("/var/spark-data/spark-local-jars") + .createWithDefault("/var/spark-data/spark-submitted-jars") - private[spark] val DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION = - ConfigBuilder("spark.kubernetes.driver.mountdependencies.filesDownloadDir") - .doc("Location to download local files to in the driver. When using spark-submit, this" + - " directory must be empty and will be mounted as an empty directory volume on the" + - " driver pod.") + private[spark] val INIT_CONTAINER_FILES_DOWNLOAD_LOCATION = + ConfigBuilder("spark.kubernetes.mountdependencies.filesDownloadDir") + .doc("Location to download files to in the driver and executors. When using" + + " spark-submit, this directory must be empty and will be mounted as an empty directory" + + " volume on the driver and executor pods.") .stringConf - .createWithDefault("/var/spark-data/spark-local-files") + .createWithDefault("/var/spark-data/spark-submitted-files") - private[spark] val DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT = + private[spark] val INIT_CONTAINER_MOUNT_TIMEOUT = ConfigBuilder("spark.kubernetes.mountdependencies.mountTimeout") .doc("Timeout before aborting the attempt to download and unpack local dependencies from" + - " the dependency staging server when initializing the driver pod.") + " remote locations and the resource staging server when initializing the driver and" + + " executor pods.") .timeConf(TimeUnit.MINUTES) .createWithDefault(5) + private[spark] val EXECUTOR_INIT_CONTAINER_CONFIG_MAP = + ConfigBuilder("spark.kubernetes.initcontainer.executor.configmapname") + .doc("Name of the config map to use in the init-container that retrieves submitted files" + + " for the executor.") + .internal() + .stringConf + .createOptional + + private[spark] val EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY = + ConfigBuilder("spark.kubernetes.initcontainer.executor.configmapkey") + .doc("Key for the entry in the init container config map for submitted files that" + + " corresponds to the properties for this init-container.") + .internal() + .stringConf + .createOptional + + private[spark] val EXECUTOR_INIT_CONTAINER_SECRET = + ConfigBuilder("spark.kubernetes.initcontainer.executor.stagingServerSecret.name") + .doc("Name of the secret to mount into the init-container that retrieves submitted files.") + .internal() + .stringConf + .createOptional + + private[spark] val EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR = + ConfigBuilder("spark.kubernetes.initcontainer.executor.stagingServerSecret.mountDir") + .doc("Directory to mount the resource staging server secrets into for the executor" + + " init-containers. This must be exactly the same as the directory that the submission" + + " client mounted the secret into because the config map's properties specify the" + + " secret location as to be the same between the driver init-container and the executor" + + " init-container. Thus the submission client will always set this and the driver will" + + " never rely on a constant or convention, in order to protect against cases where the" + + " submission client has a different version from the driver itself, and hence might" + + " have different constants loaded in constants.scala.") + .internal() + .stringConf + .createOptional + private[spark] def resolveK8sMaster(rawMasterString: String): String = { if (!rawMasterString.startsWith("k8s://")) { throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 27e47eb61933f..4c4f7b9fc3b23 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -70,8 +70,8 @@ package object constants { private[spark] val ENV_EXECUTOR_ID = "SPARK_EXECUTOR_ID" private[spark] val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP" private[spark] val ENV_DRIVER_MEMORY = "SPARK_DRIVER_MEMORY" - private[spark] val ENV_UPLOADED_JARS_DIR = "SPARK_UPLOADED_JARS_DIR" private[spark] val ENV_SUBMIT_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" + private[spark] val ENV_EXECUTOR_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" private[spark] val ENV_MOUNTED_CLASSPATH = "SPARK_MOUNTED_CLASSPATH" private[spark] val ENV_DRIVER_MAIN_CLASS = "SPARK_DRIVER_CLASS" private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" @@ -92,26 +92,21 @@ package object constants { // V2 submission init container private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers" - private[spark] val INIT_CONTAINER_SECRETS_VOLUME_NAME = "dependency-secret" - private[spark] val INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH = "/mnt/secrets/spark-init" - private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY = "downloadJarsSecret" - private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY = "downloadFilesSecret" - private[spark] val INIT_CONTAINER_TRUSTSTORE_SECRET_KEY = "trustStore" - private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH = - s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY" - private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH = - s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY" - private[spark] val INIT_CONTAINER_TRUSTSTORE_PATH = - s"$INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH/$INIT_CONTAINER_TRUSTSTORE_SECRET_KEY" - private[spark] val INIT_CONTAINER_DOWNLOAD_CREDENTIALS_PATH = - "/mnt/secrets/kubernetes-credentials" - private[spark] val INIT_CONTAINER_CONFIG_MAP_KEY = "init-driver" - private[spark] val INIT_CONTAINER_PROPERTIES_FILE_VOLUME = "init-container-properties" - private[spark] val INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH = "/etc/spark-init/" - private[spark] val INIT_CONTAINER_PROPERTIES_FILE_NAME = "init-driver.properties" + private[spark] val INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH = + "/mnt/secrets/spark-init" + private[spark] val INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY = + "downloadSubmittedJarsSecret" + private[spark] val INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY = + "downloadSubmittedFilesSecret" + private[spark] val INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY = "trustStore" + private[spark] val INIT_CONTAINER_CONFIG_MAP_KEY = "download-submitted-files" + private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME = "download-jars-volume" + private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME = "download-files" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_VOLUME = "spark-init-properties" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_DIR = "/etc/spark-init" + private[spark] val INIT_CONTAINER_PROPERTIES_FILE_NAME = "spark-init.properties" private[spark] val INIT_CONTAINER_PROPERTIES_FILE_PATH = - s"$INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH/$INIT_CONTAINER_PROPERTIES_FILE_NAME" - private[spark] val DOWNLOAD_JARS_VOLUME_NAME = "download-jars" - private[spark] val DOWNLOAD_FILES_VOLUME_NAME = "download-files" + s"$INIT_CONTAINER_PROPERTIES_FILE_DIR/$INIT_CONTAINER_PROPERTIES_FILE_NAME" private[spark] val DEFAULT_SHUFFLE_MOUNT_NAME = "shuffle" + private[spark] val INIT_CONTAINER_SECRET_VOLUME_NAME = "spark-init-secret" } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala similarity index 88% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala index b8e644219097e..1b0af3fa9fb01 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesFileUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v1 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.util.Utils @@ -41,4 +41,7 @@ private[spark] object KubernetesFileUtils { Option(Utils.resolveURI(uri).getScheme).getOrElse("file") == "file" } + def getOnlyRemoteFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, scheme => scheme != "file" && scheme != "local") + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index a4dfe90f71a8a..0f1e7886a1ba2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -33,7 +33,8 @@ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesFileUtils, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils +import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} import org.apache.spark.internal.Logging import org.apache.spark.util.{ShutdownHookManager, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala index 10ffddcd7e7fc..174e9c57a65ca 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala @@ -29,7 +29,8 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v1.{KubernetesFileUtils, PemsToKeyStoreConverter} +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils +import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index a70c93942ffb5..a403a91840bd6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -19,11 +19,10 @@ package org.apache.spark.deploy.kubernetes.submit.v2 import java.io.File import java.util.Collections -import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, HasMetadata, OwnerReferenceBuilder, PodBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder} import scala.collection.JavaConverters._ -import scala.collection.mutable -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException} +import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.internal.Logging @@ -35,33 +34,25 @@ import org.apache.spark.util.Utils * * This class is responsible for instantiating Kubernetes resources that allow a Spark driver to * run in a pod on the Kubernetes cluster with the Spark configurations specified by spark-submit. - * Application submitters that desire to provide their application's dependencies from their local - * disk must provide a resource staging server URI to this client so that the client can push the - * local resources to the resource staging server and have the driver pod pull the resources in an - * init-container. Interactions with the resource staging server are offloaded to the - * {@link MountedDependencyManager} class. If instead the application submitter has their - * dependencies pre-staged in remote locations like HDFS or their own HTTP servers already, then - * the mounted dependency manager is bypassed entirely, but the init-container still needs to - * fetch these remote dependencies (TODO https://github.com/apache-spark-on-k8s/spark/issues/238). + * The API of this class makes it such that much of the specific behavior can be stubbed for + * testing; most of the detailed logic must be dependency-injected when constructing an instance + * of this client. Therefore the submission process is designed to be as modular as possible, + * where different steps of submission should be factored out into separate classes. */ private[spark] class Client( + appName: String, + kubernetesAppId: String, mainClass: String, sparkConf: SparkConf, appArgs: Array[String], - mainAppResource: String, + sparkJars: Seq[String], + sparkFiles: Seq[String], kubernetesClientProvider: SubmissionKubernetesClientProvider, - mountedDependencyManagerProvider: MountedDependencyManagerProvider) extends Logging { + initContainerComponentsProvider: DriverInitContainerComponentsProvider) extends Logging { - private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) - private val master = resolveK8sMaster(sparkConf.get("spark.master")) - private val launchTime = System.currentTimeMillis - private val appName = sparkConf.getOption("spark.app.name") - .getOrElse("spark") - private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) .getOrElse(kubernetesAppId) private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val maybeStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) private val memoryOverheadMb = sparkConf .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) @@ -70,22 +61,15 @@ private[spark] class Client( private val driverContainerMemoryWithOverhead = driverMemoryMb + memoryOverheadMb private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) private val customAnnotations = sparkConf.get(KUBERNETES_DRIVER_ANNOTATIONS) - private val sparkJars = sparkConf.getOption("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty[String]) ++ - Option(mainAppResource) - .filterNot(_ == SparkLauncher.NO_RESOURCE) - .toSeq - private val sparkFiles = sparkConf.getOption("spark.files") - .map(_.split(",")) - .getOrElse(Array.empty[String]) private val driverExtraClasspath = sparkConf.get( org.apache.spark.internal.config.DRIVER_CLASS_PATH) private val driverJavaOptions = sparkConf.get( org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) def run(): Unit = { + validateNoDuplicateFileNames(sparkJars) + validateNoDuplicateFileNames(sparkFiles) val parsedCustomLabels = parseKeyValuePairs(customLabels, KUBERNETES_DRIVER_LABELS.key, "labels") require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + @@ -133,50 +117,39 @@ private[spark] class Client( .addToContainers(driverContainer) .endSpec() - val nonDriverPodKubernetesResources = mutable.Buffer[HasMetadata]() - val resolvedJars = mutable.Buffer[String]() - val resolvedFiles = mutable.Buffer[String]() - val driverPodWithMountedDeps = maybeStagingServerUri.map { stagingServerUri => - val mountedDependencyManager = mountedDependencyManagerProvider.getMountedDependencyManager( - kubernetesAppId, - stagingServerUri, - allLabels, - namespace, - sparkJars, - sparkFiles) - val jarsResourceIdentifier = mountedDependencyManager.uploadJars() - val filesResourceIdentifier = mountedDependencyManager.uploadFiles() - val initContainerKubernetesSecret = mountedDependencyManager.buildInitContainerSecret( - jarsResourceIdentifier.resourceSecret, filesResourceIdentifier.resourceSecret) - val initContainerConfigMap = mountedDependencyManager.buildInitContainerConfigMap( - jarsResourceIdentifier.resourceId, filesResourceIdentifier.resourceId) - resolvedJars ++= mountedDependencyManager.resolveSparkJars() - resolvedFiles ++= mountedDependencyManager.resolveSparkFiles() - nonDriverPodKubernetesResources += initContainerKubernetesSecret - nonDriverPodKubernetesResources += initContainerConfigMap - mountedDependencyManager.configurePodToMountLocalDependencies( - driverContainer.getName, initContainerKubernetesSecret, initContainerConfigMap, basePod) - }.getOrElse { - sparkJars.map(Utils.resolveURI).foreach { jar => - require(Option.apply(jar.getScheme).getOrElse("file") != "file", - "When submitting with local jars, a resource staging server must be provided to" + - s" deploy your jars into the driver pod. Cannot send jar with URI $jar.") - } - sparkFiles.map(Utils.resolveURI).foreach { file => - require(Option.apply(file.getScheme).getOrElse("file") != "file", - "When submitting with local files, a resource staging server must be provided to" + - s" deploy your files into the driver pod. Cannot send file with URI $file") - } - resolvedJars ++= sparkJars - resolvedFiles ++= sparkFiles - basePod + val maybeSubmittedDependencyUploader = initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(allLabels) + val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => + SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) } - val resolvedSparkConf = sparkConf.clone() - if (resolvedJars.nonEmpty) { - resolvedSparkConf.set("spark.jars", resolvedJars.mkString(",")) + val maybeSecretBuilder = initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceIdentifiers.map(_.secrets())) + val maybeSubmittedDependenciesSecret = maybeSecretBuilder.map(_.build()) + val initContainerConfigMap = initContainerComponentsProvider + .provideInitContainerConfigMapBuilder(maybeSubmittedResourceIdentifiers.map(_.ids())) + .build() + val podWithInitContainer = initContainerComponentsProvider + .provideInitContainerBootstrap() + .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod) + + val driverOwnedResources = Seq(initContainerConfigMap) ++ + maybeSubmittedDependenciesSecret.toSeq + + val containerLocalizedFilesResolver = initContainerComponentsProvider + .provideContainerLocalizedFilesResolver() + val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() + val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() + + val executorInitContainerConfiguration = initContainerComponentsProvider + .provideExecutorInitContainerConfiguration() + val resolvedSparkConf = executorInitContainerConfiguration + .configureSparkConfForExecutorInitContainer(sparkConf) + if (resolvedSparkJars.nonEmpty) { + resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) } - if (resolvedFiles.nonEmpty) { - resolvedSparkConf.set("spark.files", resolvedFiles.mkString(",")) + if (resolvedSparkFiles.nonEmpty) { + resolvedSparkConf.set("spark.files", resolvedSparkFiles.mkString(",")) } resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) resolvedSparkConf.set("spark.app.id", kubernetesAppId) @@ -188,19 +161,16 @@ private[spark] class Client( resolvedSparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => resolvedSparkConf.set(KUBERNETES_DRIVER_OAUTH_TOKEN.key, "") } - - val mountedClassPath = resolvedJars.map(Utils.resolveURI).filter { jarUri => - val scheme = Option.apply(jarUri.getScheme).getOrElse("file") - scheme == "local" || scheme == "file" - }.map(_.getPath).mkString(File.pathSeparator) - val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { case (confKey, confValue) => - s"-D$confKey=$confValue" + val resolvedLocalClasspath = containerLocalizedFilesResolver + .resolveSubmittedAndRemoteSparkJars() + val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { + case (confKey, confValue) => s"-D$confKey=$confValue" }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPod = driverPodWithMountedDeps.editSpec() + val resolvedDriverPod = podWithInitContainer.editSpec() .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) .addNewEnv() .withName(ENV_MOUNTED_CLASSPATH) - .withValue(mountedClassPath) + .withValue(resolvedLocalClasspath.mkString(File.pathSeparator)) .endEnv() .addNewEnv() .withName(ENV_DRIVER_JAVA_OPTS) @@ -218,11 +188,11 @@ private[spark] class Client( .withKind(createdDriverPod.getKind) .withController(true) .build() - nonDriverPodKubernetesResources.foreach { resource => + driverOwnedResources.foreach { resource => val originalMetadata = resource.getMetadata originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) } - kubernetesClient.resourceList(nonDriverPodKubernetesResources: _*).createOrReplace() + kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() } catch { case e: Throwable => kubernetesClient.pods().delete(createdDriverPod) @@ -231,6 +201,17 @@ private[spark] class Client( } } + private def validateNoDuplicateFileNames(allFiles: Seq[String]): Unit = { + val fileNamesToUris = allFiles.map { file => + (new File(Utils.resolveURI(file).getPath).getName, file) + } + fileNamesToUris.groupBy(_._1).foreach { + case (fileName, urisWithFileName) => + require(urisWithFileName.size == 1, "Cannot add multiple files with the same name, but" + + s" file name $fileName is shared by all of these URIs: $urisWithFileName") + } + } + private def parseKeyValuePairs( maybeKeyValues: Option[String], configKey: String, @@ -249,3 +230,46 @@ private[spark] class Client( }).getOrElse(Map.empty[String, String]) } } + +private[spark] object Client { + def main(args: Array[String]): Unit = { + val sparkConf = new SparkConf(true) + val mainAppResource = args(0) + val mainClass = args(1) + val appArgs = args.drop(2) + run(sparkConf, mainAppResource, mainClass, appArgs) + } + + def run( + sparkConf: SparkConf, + mainAppResource: String, + mainClass: String, + appArgs: Array[String]): Unit = { + val sparkJars = sparkConf.getOption("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) ++ + Option(mainAppResource) + .filterNot(_ == SparkLauncher.NO_RESOURCE) + .toSeq + val launchTime = System.currentTimeMillis + val sparkFiles = sparkConf.getOption("spark.files") + .map(_.split(",")) + .getOrElse(Array.empty[String]) + val appName = sparkConf.getOption("spark.app.name") + .getOrElse("spark") + val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( + sparkConf, kubernetesAppId, sparkJars, sparkFiles) + val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) + new Client( + appName, + kubernetesAppId, + mainClass, + sparkConf, + appArgs, + sparkJars, + sparkFiles, + kubernetesClientProvider, + initContainerComponentsProvider).run() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala new file mode 100644 index 0000000000000..5505d87fa8072 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala @@ -0,0 +1,68 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +import org.apache.spark.util.Utils + +private[spark] trait ContainerLocalizedFilesResolver { + def resolveSubmittedAndRemoteSparkJars(): Seq[String] + def resolveSubmittedSparkJars(): Seq[String] + def resolveSubmittedSparkFiles(): Seq[String] +} + +private[spark] class ContainerLocalizedFilesResolverImpl( + sparkJars: Seq[String], + sparkFiles: Seq[String], + jarsDownloadPath: String, + filesDownloadPath: String) extends ContainerLocalizedFilesResolver { + + override def resolveSubmittedAndRemoteSparkJars(): Seq[String] = { + sparkJars.map { jar => + val jarUri = Utils.resolveURI(jar) + Option(jarUri.getScheme).getOrElse("file") match { + case "local" => + jarUri.getPath + case _ => + val jarFileName = new File(jarUri.getPath).getName + s"$jarsDownloadPath/$jarFileName" + } + } + } + + override def resolveSubmittedSparkJars(): Seq[String] = { + resolveSubmittedFiles(sparkJars, jarsDownloadPath) + } + + override def resolveSubmittedSparkFiles(): Seq[String] = { + resolveSubmittedFiles(sparkFiles, filesDownloadPath) + } + + private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { + files.map { file => + val fileUri = Utils.resolveURI(file) + Option(fileUri.getScheme).getOrElse("file") match { + case "file" => + val fileName = new File(fileUri.getPath).getName + s"$downloadPath/$fileName" + case _ => + file + } + } + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala new file mode 100644 index 0000000000000..5b649735f2b3d --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala @@ -0,0 +1,155 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.{SecurityManager, SparkConf} +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl + +/** + * Interface that wraps the provision of everything the submission client needs to set up the + * driver's init-container. This is all wrapped in the same place to ensure that related + * components are being constructed with consistent configurations with respect to one another. + */ +private[spark] trait DriverInitContainerComponentsProvider { + + def provideInitContainerConfigMapBuilder( + maybeSubmittedResourceIds: Option[SubmittedResourceIds]) + : SparkInitContainerConfigMapBuilder + def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver + def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration + def provideInitContainerSubmittedDependencyUploader( + driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] + def provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) + : Option[SubmittedDependencySecretBuilder] + def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap +} + +private[spark] class DriverInitContainerComponentsProviderImpl( + sparkConf: SparkConf, + kubernetesAppId: String, + sparkJars: Seq[String], + sparkFiles: Seq[String]) + extends DriverInitContainerComponentsProvider { + + private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) + private val resourceStagingServerSslOptions = new SecurityManager(sparkConf) + .getSSLOptions(RESOURCE_STAGING_SERVER_SSL_NAMESPACE) + private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) + private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) + private val maybeSecretName = maybeResourceStagingServerUri.map { _ => + s"$kubernetesAppId-init-secret" + } + private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + private val configMapName = s"$kubernetesAppId-init-config" + private val configMapKey = s"$kubernetesAppId-init-config-key" + private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) + private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) + + override def provideInitContainerConfigMapBuilder( + maybeSubmittedResourceIds: Option[SubmittedResourceIds]) + : SparkInitContainerConfigMapBuilder = { + val submittedDependencyConfigPlugin = for { + stagingServerUri <- maybeResourceStagingServerUri + jarsResourceId <- maybeSubmittedResourceIds.map(_.jarsResourceId) + filesResourceId <- maybeSubmittedResourceIds.map(_.filesResourceId) + } yield { + new SubmittedDependencyInitContainerConfigPluginImpl( + stagingServerUri, + jarsResourceId, + filesResourceId, + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, + resourceStagingServerSslOptions) + } + new SparkInitContainerConfigMapBuilderImpl( + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath, + configMapName, + configMapKey, + submittedDependencyConfigPlugin) + } + + override def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver = { + new ContainerLocalizedFilesResolverImpl( + sparkJars, sparkFiles, jarsDownloadPath, filesDownloadPath) + } + + override def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { + new ExecutorInitContainerConfigurationImpl( + maybeSecretName, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, + configMapName, + configMapKey) + } + + override def provideInitContainerSubmittedDependencyUploader( + driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] = { + maybeResourceStagingServerUri.map { stagingServerUri => + new SubmittedDependencyUploaderImpl( + kubernetesAppId, + driverPodLabels, + namespace, + stagingServerUri, + sparkJars, + sparkFiles, + resourceStagingServerSslOptions, + RetrofitClientFactoryImpl) + } + } + + override def provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) + : Option[SubmittedDependencySecretBuilder] = { + for { + secretName <- maybeSecretName + jarsResourceSecret <- maybeSubmittedResourceSecrets.map(_.jarsResourceSecret) + filesResourceSecret <- maybeSubmittedResourceSecrets.map(_.filesResourceSecret) + } yield { + new SubmittedDependencySecretBuilderImpl( + secretName, + jarsResourceSecret, + filesResourceSecret, + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + resourceStagingServerSslOptions) + } + } + + override def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap = { + val resourceStagingServerSecretPlugin = maybeSecretName.map { secret => + new InitContainerResourceStagingServerSecretPluginImpl( + secret, INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) + } + new SparkPodInitContainerBootstrapImpl( + initContainerImage, + jarsDownloadPath, + filesDownloadPath, + downloadTimeoutMinutes, + configMapName, + configMapKey, + resourceStagingServerSecretPlugin) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala new file mode 100644 index 0000000000000..adfdc060f0d0f --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ + +private[spark] trait ExecutorInitContainerConfiguration { + /** + * Provide the driver with configuration that allows it to configure executors to + * fetch resources in the same way the driver does. + */ + def configureSparkConfForExecutorInitContainer(originalSparkConf: SparkConf): SparkConf +} + +private[spark] class ExecutorInitContainerConfigurationImpl( + initContainerSecretName: Option[String], + initContainerSecretMountDir: String, + initContainerConfigMapName: String, + initContainerConfigMapKey: String) + extends ExecutorInitContainerConfiguration { + def configureSparkConfForExecutorInitContainer(originalSparkConf: SparkConf): SparkConf = { + val configuredSparkConf = originalSparkConf.clone() + .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP, + initContainerConfigMapName) + .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY, + initContainerConfigMapKey) + .set(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR, initContainerSecretMountDir) + initContainerSecretName.map { secret => + configuredSparkConf.set(EXECUTOR_INIT_CONTAINER_SECRET, secret) + }.getOrElse(configuredSparkConf) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala new file mode 100644 index 0000000000000..0526ca53baaab --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import io.fabric8.kubernetes.api.model.{Container, PodBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] object InitContainerUtil { + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + def appendInitContainer( + originalPodSpec: PodBuilder, initContainer: Container): PodBuilder = { + val resolvedInitContainers = originalPodSpec + .editMetadata() + .getAnnotations + .asScala + .get(INIT_CONTAINER_ANNOTATION) + .map { existingInitContainerAnnotation => + val existingInitContainers = OBJECT_MAPPER.readValue( + existingInitContainerAnnotation, classOf[List[Container]]) + existingInitContainers ++ Seq(initContainer) + }.getOrElse(Seq(initContainer)) + val resolvedSerializedInitContainers = OBJECT_MAPPER.writeValueAsString(resolvedInitContainers) + originalPodSpec + .editMetadata() + .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) + .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) + .endMetadata() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala deleted file mode 100644 index 9dbbcd0d56a3b..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerImpl.scala +++ /dev/null @@ -1,324 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v2 - -import java.io.{File, FileOutputStream, StringWriter} -import java.util.Properties -import javax.ws.rs.core.MediaType - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, ContainerBuilder, EmptyDirVolumeSource, PodBuilder, Secret, SecretBuilder, VolumeMount, VolumeMountBuilder} -import okhttp3.RequestBody -import retrofit2.Call -import scala.collection.JavaConverters._ -import scala.collection.mutable - -import org.apache.spark.{SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.CompressionUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v1.{KubernetesCredentials, KubernetesFileUtils} -import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourceIdentifier} -import org.apache.spark.util.Utils - -private[spark] trait MountedDependencyManager { - - /** - * Upload submitter-local jars to the resource staging server. - * @return The resource ID and secret to use to retrieve these jars. - */ - def uploadJars(): StagedResourceIdentifier - - /** - * Upload submitter-local files to the resource staging server. - * @return The resource ID and secret to use to retrieve these files. - */ - def uploadFiles(): StagedResourceIdentifier - - def configurePodToMountLocalDependencies( - driverContainerName: String, - initContainerSecret: Secret, - initContainerConfigMap: ConfigMap, - originalPodSpec: PodBuilder): PodBuilder - - def buildInitContainerSecret(jarsSecret: String, filesSecret: String): Secret - - def buildInitContainerConfigMap( - jarsResourceId: String, filesResourceId: String): ConfigMap - - /** - * Convert the Spark jar paths from their locations on the submitter's disk to - * the locations they will be downloaded to on the driver's disk. - */ - def resolveSparkJars(): Seq[String] - - /** - * Convert the Spark file paths from their locations on the submitter's disk to - * the locations they will be downloaded to on the driver's disk. - */ - def resolveSparkFiles(): Seq[String] -} - -/** - * Default implementation of a MountedDependencyManager that is backed by a - * Resource Staging Service. - */ -private[spark] class MountedDependencyManagerImpl( - kubernetesAppId: String, - podLabels: Map[String, String], - podNamespace: String, - stagingServerUri: String, - initContainerImage: String, - jarsDownloadPath: String, - filesDownloadPath: String, - downloadTimeoutMinutes: Long, - sparkJars: Seq[String], - sparkFiles: Seq[String], - stagingServiceSslOptions: SSLOptions, - retrofitClientFactory: RetrofitClientFactory) extends MountedDependencyManager { - private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) - - private def localUriStringsToFiles(uris: Seq[String]): Iterable[File] = { - KubernetesFileUtils.getOnlySubmitterLocalFiles(uris) - .map(Utils.resolveURI) - .map(uri => new File(uri.getPath)) - } - private def localJars: Iterable[File] = localUriStringsToFiles(sparkJars) - private def localFiles: Iterable[File] = localUriStringsToFiles(sparkFiles) - - override def uploadJars(): StagedResourceIdentifier = doUpload(localJars, "uploaded-jars") - override def uploadFiles(): StagedResourceIdentifier = doUpload(localFiles, "uploaded-files") - - private def doUpload(files: Iterable[File], fileNamePrefix: String): StagedResourceIdentifier = { - val filesDir = Utils.createTempDir(namePrefix = fileNamePrefix) - val filesTgz = new File(filesDir, s"$fileNamePrefix.tgz") - Utils.tryWithResource(new FileOutputStream(filesTgz)) { filesOutputStream => - CompressionUtils.writeTarGzipToStream(filesOutputStream, files.map(_.getAbsolutePath)) - } - // TODO provide credentials properly when the staging server monitors the Kubernetes API. - val kubernetesCredentialsString = OBJECT_MAPPER.writer() - .writeValueAsString(KubernetesCredentials(None, None, None, None)) - val labelsAsString = OBJECT_MAPPER.writer().writeValueAsString(podLabels) - - val filesRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) - - val kubernetesCredentialsBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) - - val namespaceRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), podNamespace) - - val labelsRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsAsString) - - val service = retrofitClientFactory.createRetrofitClient( - stagingServerUri, - classOf[ResourceStagingServiceRetrofit], - stagingServiceSslOptions) - val uploadResponse = service.uploadResources( - labelsRequestBody, namespaceRequestBody, filesRequestBody, kubernetesCredentialsBody) - getTypedResponseResult(uploadResponse) - } - - override def configurePodToMountLocalDependencies( - driverContainerName: String, - initContainerSecret: Secret, - initContainerConfigMap: ConfigMap, - originalPodSpec: PodBuilder): PodBuilder = { - val sharedVolumeMounts = Seq[VolumeMount]( - new VolumeMountBuilder() - .withName(DOWNLOAD_JARS_VOLUME_NAME) - .withMountPath(jarsDownloadPath) - .build(), - new VolumeMountBuilder() - .withName(DOWNLOAD_FILES_VOLUME_NAME) - .withMountPath(filesDownloadPath) - .build()) - - val initContainers = Seq(new ContainerBuilder() - .withName("spark-driver-init") - .withImage(initContainerImage) - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() - .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) - .withMountPath(INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH) - .endVolumeMount() - .addNewVolumeMount() - .withName(INIT_CONTAINER_SECRETS_VOLUME_NAME) - .withMountPath(INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH) - .endVolumeMount() - .addToVolumeMounts(sharedVolumeMounts: _*) - .addToArgs(INIT_CONTAINER_PROPERTIES_FILE_PATH) - .build()) - - // Make sure we don't override any user-provided init containers by just appending ours to - // the existing list. - val resolvedInitContainers = originalPodSpec - .editMetadata() - .getAnnotations - .asScala - .get(INIT_CONTAINER_ANNOTATION) - .map { existingInitContainerAnnotation => - val existingInitContainers = OBJECT_MAPPER.readValue( - existingInitContainerAnnotation, classOf[List[Container]]) - existingInitContainers ++ initContainers - }.getOrElse(initContainers) - val resolvedSerializedInitContainers = OBJECT_MAPPER.writeValueAsString(resolvedInitContainers) - originalPodSpec - .editMetadata() - .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) - .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) - .endMetadata() - .editSpec() - .addNewVolume() - .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) - .withNewConfigMap() - .withName(initContainerConfigMap.getMetadata.getName) - .addNewItem() - .withKey(INIT_CONTAINER_CONFIG_MAP_KEY) - .withPath(INIT_CONTAINER_PROPERTIES_FILE_NAME) - .endItem() - .endConfigMap() - .endVolume() - .addNewVolume() - .withName(DOWNLOAD_JARS_VOLUME_NAME) - .withEmptyDir(new EmptyDirVolumeSource()) - .endVolume() - .addNewVolume() - .withName(DOWNLOAD_FILES_VOLUME_NAME) - .withEmptyDir(new EmptyDirVolumeSource()) - .endVolume() - .addNewVolume() - .withName(INIT_CONTAINER_SECRETS_VOLUME_NAME) - .withNewSecret() - .withSecretName(initContainerSecret.getMetadata.getName) - .endSecret() - .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainerName)) - .addToVolumeMounts(sharedVolumeMounts: _*) - .addNewEnv() - .withName(ENV_UPLOADED_JARS_DIR) - .withValue(jarsDownloadPath) - .endEnv() - .endContainer() - .endSpec() - } - - override def buildInitContainerSecret(jarsSecret: String, filesSecret: String): Secret = { - val trustStoreBase64 = stagingServiceSslOptions.trustStore.map { trustStoreFile => - require(trustStoreFile.isFile, "Dependency server trustStore provided at" + - trustStoreFile.getAbsolutePath + " does not exist or is not a file.") - (INIT_CONTAINER_TRUSTSTORE_SECRET_KEY, - BaseEncoding.base64().encode(Files.toByteArray(trustStoreFile))) - }.toMap - val jarsSecretBase64 = BaseEncoding.base64().encode(jarsSecret.getBytes(Charsets.UTF_8)) - val filesSecretBase64 = BaseEncoding.base64().encode(filesSecret.getBytes(Charsets.UTF_8)) - val secretData = Map( - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY -> jarsSecretBase64, - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY -> filesSecretBase64) ++ - trustStoreBase64 - val kubernetesSecret = new SecretBuilder() - .withNewMetadata() - .withName(s"$kubernetesAppId-spark-init") - .endMetadata() - .addToData(secretData.asJava) - .build() - kubernetesSecret - } - - override def buildInitContainerConfigMap( - jarsResourceId: String, filesResourceId: String): ConfigMap = { - val initContainerProperties = new Properties() - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_URI.key, stagingServerUri) - initContainerProperties.setProperty(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.key, jarsDownloadPath) - initContainerProperties.setProperty(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.key, filesDownloadPath) - initContainerProperties.setProperty( - INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key, jarsResourceId) - initContainerProperties.setProperty( - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key, INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH) - initContainerProperties.setProperty( - INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key, filesResourceId) - initContainerProperties.setProperty( - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key, INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH) - initContainerProperties.setProperty(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT.key, - s"${downloadTimeoutMinutes}m") - stagingServiceSslOptions.trustStore.foreach { _ => - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key, - INIT_CONTAINER_TRUSTSTORE_PATH) - } - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_SSL_ENABLED.key, - stagingServiceSslOptions.enabled.toString) - stagingServiceSslOptions.trustStorePassword.foreach { password => - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) - } - stagingServiceSslOptions.trustStoreType.foreach { storeType => - initContainerProperties.setProperty(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) - } - val propertiesWriter = new StringWriter() - initContainerProperties.store(propertiesWriter, "Init-container properties.") - new ConfigMapBuilder() - .withNewMetadata() - .withName(s"$kubernetesAppId-init-properties") - .endMetadata() - .addToData(INIT_CONTAINER_CONFIG_MAP_KEY, propertiesWriter.toString) - .build() - } - - override def resolveSparkJars(): Seq[String] = resolveLocalFiles(sparkJars, jarsDownloadPath) - - override def resolveSparkFiles(): Seq[String] = resolveLocalFiles(sparkFiles, filesDownloadPath) - - private def resolveLocalFiles( - allFileUriStrings: Seq[String], localDownloadRoot: String): Seq[String] = { - val usedLocalFileNames = mutable.HashSet.empty[String] - val resolvedFiles = mutable.Buffer.empty[String] - for (fileUriString <- allFileUriStrings) { - val fileUri = Utils.resolveURI(fileUriString) - val resolvedFile = Option(fileUri.getScheme).getOrElse("file") match { - case "file" => - // Deduplication logic matches that of CompressionUtils#writeTarGzipToStream - val file = new File(fileUri.getPath) - val extension = Files.getFileExtension(file.getName) - val nameWithoutExtension = Files.getNameWithoutExtension(file.getName) - var resolvedFileName = file.getName - var deduplicationCounter = 1 - while (usedLocalFileNames.contains(resolvedFileName)) { - resolvedFileName = s"$nameWithoutExtension-$deduplicationCounter.$extension" - deduplicationCounter += 1 - } - s"file://$localDownloadRoot/$resolvedFileName" - case _ => fileUriString - } - resolvedFiles += resolvedFile - } - resolvedFiles - } - - private def getTypedResponseResult[T](call: Call[T]): T = { - val response = call.execute() - if (response.code() < 200 || response.code() >= 300) { - throw new SparkException("Unexpected response from dependency server when uploading" + - s" dependencies: ${response.code()}. Error body: " + - Option(response.errorBody()).map(_.string()).getOrElse("N/A")) - } - response.body() - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala deleted file mode 100644 index 8f09112132b2c..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerProvider.scala +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v2 - -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl - -private[spark] trait MountedDependencyManagerProvider { - def getMountedDependencyManager( - kubernetesAppId: String, - stagingServerUri: String, - podLabels: Map[String, String], - podNamespace: String, - sparkJars: Seq[String], - sparkFiles: Seq[String]): MountedDependencyManager -} - -private[spark] class MountedDependencyManagerProviderImpl(sparkConf: SparkConf) - extends MountedDependencyManagerProvider { - override def getMountedDependencyManager( - kubernetesAppId: String, - stagingServerUri: String, - podLabels: Map[String, String], - podNamespace: String, - sparkJars: Seq[String], - sparkFiles: Seq[String]): MountedDependencyManager = { - val resourceStagingServerSslOptions = new SparkSecurityManager(sparkConf) - .getSSLOptions("kubernetes.resourceStagingServer") - new MountedDependencyManagerImpl( - kubernetesAppId, - podLabels, - podNamespace, - stagingServerUri, - sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE), - sparkConf.get(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION), - sparkConf.get(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION), - sparkConf.get(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT), - sparkJars, - sparkFiles, - resourceStagingServerSslOptions, - RetrofitClientFactoryImpl) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala new file mode 100644 index 0000000000000..cb9194552d2b6 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.StringWriter +import java.util.Properties + +import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder} + +/** + * Creates a config map from a map object, with a single given key + * and writing the map in a {@link java.util.Properties} format. + */ +private[spark] object PropertiesConfigMapFromScalaMapBuilder { + + def buildConfigMap( + configMapName: String, + configMapKey: String, + config: Map[String, String]): ConfigMap = { + val properties = new Properties() + config.foreach { case (key, value) => properties.setProperty(key, value) } + val propertiesWriter = new StringWriter() + properties.store(propertiesWriter, + s"Java properties built from Kubernetes config map with name: $configMapName" + + " and config map key: $configMapKey") + new ConfigMapBuilder() + .withNewMetadata() + .withName(configMapName) + .endMetadata() + .addToData(configMapKey, propertiesWriter.toString) + .build() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala new file mode 100644 index 0000000000000..362fbbdf517dc --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import io.fabric8.kubernetes.api.model.ConfigMap + +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils + +private[spark] trait SparkInitContainerConfigMapBuilder { + /** + * Construct a config map that an init-container should reference for fetching + * remote dependencies. The config map includes the remote jars and files to download, + * as well as details to fetch files from a resource staging server, if applicable. + */ + def build(): ConfigMap +} + +private[spark] class SparkInitContainerConfigMapBuilderImpl( + sparkJars: Seq[String], + sparkFiles: Seq[String], + jarsDownloadPath: String, + filesDownloadPath: String, + configMapName: String, + configMapKey: String, + submittedDependenciesPlugin: Option[SubmittedDependencyInitContainerConfigPlugin]) + extends SparkInitContainerConfigMapBuilder { + + override def build(): ConfigMap = { + val remoteJarsToDownload = KubernetesFileUtils.getOnlyRemoteFiles(sparkJars) + val remoteFilesToDownload = KubernetesFileUtils.getOnlyRemoteFiles(sparkFiles) + val remoteJarsConf = if (remoteJarsToDownload.nonEmpty) { + Map(INIT_CONTAINER_REMOTE_JARS.key -> remoteJarsToDownload.mkString(",")) + } else { + Map.empty[String, String] + } + val remoteFilesConf = if (remoteFilesToDownload.nonEmpty) { + Map(INIT_CONTAINER_REMOTE_FILES.key -> remoteFilesToDownload.mkString(",")) + } else { + Map.empty[String, String] + } + val baseInitContainerConfig = Map[String, String]( + INIT_CONTAINER_JARS_DOWNLOAD_LOCATION.key -> jarsDownloadPath, + INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key -> filesDownloadPath) ++ + remoteJarsConf ++ + remoteFilesConf + val submittedDependenciesConfig = submittedDependenciesPlugin.map { plugin => + plugin.configurationsToFetchSubmittedDependencies() + }.toSeq.flatten.toMap + PropertiesConfigMapFromScalaMapBuilder.buildConfigMap( + configMapName, + configMapKey, + baseInitContainerConfig ++ submittedDependenciesConfig) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala new file mode 100644 index 0000000000000..bc9abc4eaba81 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.SSLOptions +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] trait SubmittedDependencyInitContainerConfigPlugin { + /** + * Obtain configuration to fetch submitted dependencies from a resource staging server. + * This includes the resource identifiers for the jar and file bundles, as well as the + * remote location of the resource staging server, and the location of secret files for + * authenticating to the resource staging server. Note that the secret file paths here need to + * line up with the locations the secrets are mounted by + * SubmittedDependencyInitContainerVolumesPlugin; constants provide the consistency and + * convention for these to line up. + */ + def configurationsToFetchSubmittedDependencies(): Map[String, String] +} + +private[spark] class SubmittedDependencyInitContainerConfigPluginImpl( + resourceStagingServerUri: String, + jarsResourceId: String, + filesResourceId: String, + jarsSecretKey: String, + filesSecretKey: String, + trustStoreSecretKey: String, + secretsVolumeMountPath: String, + resourceStagingServiceSslOptions: SSLOptions) + extends SubmittedDependencyInitContainerConfigPlugin { + + override def configurationsToFetchSubmittedDependencies(): Map[String, String] = { + Map[String, String]( + RESOURCE_STAGING_SERVER_URI.key -> resourceStagingServerUri, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> jarsResourceId, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$secretsVolumeMountPath/$jarsSecretKey", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> filesResourceId, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$secretsVolumeMountPath/$filesSecretKey", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> + resourceStagingServiceSslOptions.enabled.toString) ++ + resourceStagingServiceSslOptions.trustStore.map { _ => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key, + s"$secretsVolumeMountPath/$trustStoreSecretKey") + }.toMap ++ + resourceStagingServiceSslOptions.trustStorePassword.map { password => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) + }.toMap ++ + resourceStagingServiceSslOptions.trustStoreType.map { storeType => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) + }.toMap + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala new file mode 100644 index 0000000000000..1853b2ecce6d2 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SSLOptions +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] trait SubmittedDependencySecretBuilder { + /** + * Construct a Kubernetes secret bundle that init-containers can use to retrieve an + * application's dependencies. + */ + def build(): Secret +} + +private[spark] class SubmittedDependencySecretBuilderImpl( + secretName: String, + jarsResourceSecret: String, + filesResourceSecret: String, + jarsSecretKey: String, + filesSecretKey: String, + trustStoreSecretKey: String, + resourceStagingServerSslOptions: SSLOptions) + extends SubmittedDependencySecretBuilder { + + override def build(): Secret = { + val trustStoreBase64 = resourceStagingServerSslOptions.trustStore.map { trustStoreFile => + require(trustStoreFile.isFile, "Dependency server trustStore provided at" + + trustStoreFile.getAbsolutePath + " does not exist or is not a file.") + (trustStoreSecretKey, BaseEncoding.base64().encode(Files.toByteArray(trustStoreFile))) + }.toMap + val jarsSecretBase64 = BaseEncoding.base64().encode(jarsResourceSecret.getBytes(Charsets.UTF_8)) + val filesSecretBase64 = BaseEncoding.base64().encode( + filesResourceSecret.getBytes(Charsets.UTF_8)) + val secretData = Map( + jarsSecretKey -> jarsSecretBase64, + filesSecretKey -> filesSecretBase64) ++ + trustStoreBase64 + val kubernetesSecret = new SecretBuilder() + .withNewMetadata() + .withName(secretName) + .endMetadata() + .addToData(secretData.asJava) + .build() + kubernetesSecret + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala new file mode 100644 index 0000000000000..f22759d463cb7 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala @@ -0,0 +1,116 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.{File, FileOutputStream} +import javax.ws.rs.core.MediaType + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import okhttp3.RequestBody +import retrofit2.Call + +import org.apache.spark.{SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils +import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials +import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.util.Utils + +private[spark] trait SubmittedDependencyUploader { + /** + * Upload submitter-local jars to the resource staging server. + * @return The resource ID and secret to use to retrieve these jars. + */ + def uploadJars(): SubmittedResourceIdAndSecret + + /** + * Upload submitter-local files to the resource staging server. + * @return The resource ID and secret to use to retrieve these files. + */ + def uploadFiles(): SubmittedResourceIdAndSecret +} + +/** + * Default implementation of a SubmittedDependencyManager that is backed by a + * Resource Staging Service. + */ +private[spark] class SubmittedDependencyUploaderImpl( + kubernetesAppId: String, + podLabels: Map[String, String], + podNamespace: String, + stagingServerUri: String, + sparkJars: Seq[String], + sparkFiles: Seq[String], + stagingServiceSslOptions: SSLOptions, + retrofitClientFactory: RetrofitClientFactory) extends SubmittedDependencyUploader { + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + + private def localUriStringsToFiles(uris: Seq[String]): Iterable[File] = { + KubernetesFileUtils.getOnlySubmitterLocalFiles(uris) + .map(Utils.resolveURI) + .map(uri => new File(uri.getPath)) + } + private def localJars: Iterable[File] = localUriStringsToFiles(sparkJars) + private def localFiles: Iterable[File] = localUriStringsToFiles(sparkFiles) + + override def uploadJars(): SubmittedResourceIdAndSecret = doUpload(localJars, "uploaded-jars") + override def uploadFiles(): SubmittedResourceIdAndSecret = doUpload(localFiles, "uploaded-files") + + private def doUpload(files: Iterable[File], fileNamePrefix: String) + : SubmittedResourceIdAndSecret = { + val filesDir = Utils.createTempDir(namePrefix = fileNamePrefix) + val filesTgz = new File(filesDir, s"$fileNamePrefix.tgz") + Utils.tryWithResource(new FileOutputStream(filesTgz)) { filesOutputStream => + CompressionUtils.writeTarGzipToStream(filesOutputStream, files.map(_.getAbsolutePath)) + } + // TODO provide credentials properly when the staging server monitors the Kubernetes API. + val kubernetesCredentialsString = OBJECT_MAPPER.writer() + .writeValueAsString(KubernetesCredentials(None, None, None, None)) + val labelsAsString = OBJECT_MAPPER.writer().writeValueAsString(podLabels) + + val filesRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) + + val kubernetesCredentialsBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) + + val namespaceRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), podNamespace) + + val labelsRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsAsString) + + val service = retrofitClientFactory.createRetrofitClient( + stagingServerUri, + classOf[ResourceStagingServiceRetrofit], + stagingServiceSslOptions) + val uploadResponse = service.uploadResources( + labelsRequestBody, namespaceRequestBody, filesRequestBody, kubernetesCredentialsBody) + getTypedResponseResult(uploadResponse) + } + + private def getTypedResponseResult[T](call: Call[T]): T = { + val response = call.execute() + if (response.code() < 200 || response.code() >= 300) { + throw new SparkException("Unexpected response from dependency server when uploading" + + s" dependencies: ${response.code()}. Error body: " + + Option(response.errorBody()).map(_.string()).getOrElse("N/A")) + } + response.body() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala similarity index 51% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala index 65bc9bc17dae9..f4e5e991180ce 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/StagedResourceIdentifier.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala @@ -14,6 +14,19 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.kubernetes.submit.v2 -case class StagedResourceIdentifier(resourceId: String, resourceSecret: String) +case class SubmittedResourceIdAndSecret(resourceId: String, resourceSecret: String) + +case class SubmittedResources( + jarsResourceIdAndSecret: SubmittedResourceIdAndSecret, + filesResourceIdAndSecret: SubmittedResourceIdAndSecret) { + def ids(): SubmittedResourceIds = SubmittedResourceIds( + jarsResourceIdAndSecret.resourceId, filesResourceIdAndSecret.resourceId) + def secrets(): SubmittedResourceSecrets = SubmittedResourceSecrets( + jarsResourceIdAndSecret.resourceSecret, filesResourceIdAndSecret.resourceSecret) +} + +case class SubmittedResourceIds(jarsResourceId: String, filesResourceId: String) + +case class SubmittedResourceSecrets(jarsResourceSecret: String, filesResourceSecret: String) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index ca05fe767146b..7847ba2546594 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -35,6 +35,7 @@ import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkCo import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils import org.apache.spark.deploy.rest._ import org.apache.spark.internal.config.OptionalConfigEntry import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala index 680d305985cc0..67caa176930ea 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala @@ -25,12 +25,15 @@ import com.google.common.io.Files import com.google.common.util.concurrent.SettableFuture import okhttp3.ResponseBody import retrofit2.{Call, Callback, Response} +import scala.concurrent.{ExecutionContext, Future} +import scala.concurrent.duration.Duration -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException} +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf} +import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils +import org.apache.spark.util.{ThreadUtils, Utils} private trait WaitableCallback[T] extends Callback[T] { private val complete = SettableFuture.create[Boolean] @@ -61,55 +64,149 @@ private class DownloadTarGzCallback(downloadDir: File) extends WaitableCallback[ } } +// Extracted for testing so that unit tests don't have to depend on Utils.fetchFile +private[v2] trait FileFetcher { + def fetchFile(uri: String, targetDir: File): Unit +} + +private class FileFetcherImpl(sparkConf: SparkConf, securityManager: SparkSecurityManager) + extends FileFetcher { + def fetchFile(uri: String, targetDir: File): Unit = { + Utils.fetchFile( + url = uri, + targetDir = targetDir, + conf = sparkConf, + securityMgr = securityManager, + hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf), + timestamp = System.currentTimeMillis(), + useCache = false) + } +} + +/** + * Process that fetches files from a resource staging server and/or arbitrary remote locations. + * + * The init-container can handle fetching files from any of those sources, but not all of the + * sources need to be specified. This allows for composing multiple instances of this container + * with different configurations for different download sources, or using the same container to + * download everything at once. + */ private[spark] class KubernetesSparkDependencyDownloadInitContainer( - sparkConf: SparkConf, retrofitClientFactory: RetrofitClientFactory) extends Logging { + sparkConf: SparkConf, + retrofitClientFactory: RetrofitClientFactory, + fileFetcher: FileFetcher, + securityManager: SparkSecurityManager) extends Logging { - private val resourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) - .getOrElse(throw new SparkException("No dependency server URI was provided.")) + private implicit val downloadExecutor = ExecutionContext.fromExecutorService( + ThreadUtils.newDaemonCachedThreadPool("download-executor")) + private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) - private val downloadJarsResourceIdentifier = sparkConf + private val maybeDownloadJarsResourceIdentifier = sparkConf .get(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER) - .getOrElse(throw new SparkException("No resource identifier provided for jars.")) private val downloadJarsSecretLocation = new File( sparkConf.get(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION)) - private val downloadFilesResourceIdentifier = sparkConf + private val maybeDownloadFilesResourceIdentifier = sparkConf .get(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER) - .getOrElse(throw new SparkException("No resource identifier provided for files.")) private val downloadFilesSecretLocation = new File( sparkConf.get(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION)) - require(downloadJarsSecretLocation.isFile, "Application jars download secret provided" + - s" at ${downloadJarsSecretLocation.getAbsolutePath} does not exist or is not a file.") - require(downloadFilesSecretLocation.isFile, "Application files download secret provided" + - s" at ${downloadFilesSecretLocation.getAbsolutePath} does not exist or is not a file.") - private val jarsDownloadDir = new File(sparkConf.get(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION)) - require(jarsDownloadDir.isDirectory, "Application jars download directory provided at" + - s" ${jarsDownloadDir.getAbsolutePath} does not exist or is not a directory.") + private val jarsDownloadDir = new File( + sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION)) + private val filesDownloadDir = new File( + sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION)) + + private val remoteJars = sparkConf.get(INIT_CONTAINER_REMOTE_JARS) + private val remoteFiles = sparkConf.get(INIT_CONTAINER_REMOTE_FILES) - private val filesDownloadDir = new File(sparkConf.get(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION)) - require(filesDownloadDir.isDirectory, "Application files download directory provided at" + - s" ${filesDownloadDir.getAbsolutePath} does not exist or is not a directory.") - private val downloadTimeoutMinutes = sparkConf.get(DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT) + private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) def run(): Unit = { - val securityManager = new SparkSecurityManager(sparkConf) - val sslOptions = securityManager.getSSLOptions("kubernetes.resourceStagingServer") - val service = retrofitClientFactory.createRetrofitClient( - resourceStagingServerUri, classOf[ResourceStagingServiceRetrofit], sslOptions) - val jarsSecret = Files.toString(downloadJarsSecretLocation, Charsets.UTF_8) - val filesSecret = Files.toString(downloadFilesSecretLocation, Charsets.UTF_8) - val downloadJarsCallback = new DownloadTarGzCallback(jarsDownloadDir) - val downloadFilesCallback = new DownloadTarGzCallback(filesDownloadDir) - service.downloadResources(downloadJarsResourceIdentifier, jarsSecret) - .enqueue(downloadJarsCallback) - service.downloadResources(downloadFilesResourceIdentifier, filesSecret) - .enqueue(downloadFilesCallback) - logInfo("Waiting to download jars...") - downloadJarsCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) - logInfo(s"Jars downloaded to ${jarsDownloadDir.getAbsolutePath}") - logInfo("Waiting to download files...") - downloadFilesCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) - logInfo(s"Files downloaded to ${filesDownloadDir.getAbsolutePath}") + val resourceStagingServerJarsDownload = Future[Unit] { + downloadResourcesFromStagingServer( + maybeDownloadJarsResourceIdentifier, + downloadJarsSecretLocation, + jarsDownloadDir, + "Starting to download jars from resource staging server...", + "Finished downloading jars from resource staging server.", + s"Application jars download secret provided at" + + s" ${downloadJarsSecretLocation.getAbsolutePath} does not exist or is not a file.", + s"Application jars download directory provided at" + + s" ${jarsDownloadDir.getAbsolutePath} does not exist or is not a directory.") + } + val resourceStagingServerFilesDownload = Future[Unit] { + downloadResourcesFromStagingServer( + maybeDownloadFilesResourceIdentifier, + downloadFilesSecretLocation, + filesDownloadDir, + "Starting to download files from resource staging server...", + "Finished downloading files from resource staging server.", + s"Application files download secret provided at" + + s" ${downloadFilesSecretLocation.getAbsolutePath} does not exist or is not a file.", + s"Application files download directory provided at" + + s" ${filesDownloadDir.getAbsolutePath} does not exist or is not" + + s" a directory.") + } + val remoteJarsDownload = Future[Unit] { + downloadFiles(remoteJars, + jarsDownloadDir, + s"Remote jars download directory specified at $jarsDownloadDir does not exist" + + s" or is not a directory.") + } + val remoteFilesDownload = Future[Unit] { + downloadFiles(remoteFiles, + filesDownloadDir, + s"Remote files download directory specified at $filesDownloadDir does not exist" + + s" or is not a directory.") + } + waitForFutures( + resourceStagingServerJarsDownload, + resourceStagingServerFilesDownload, + remoteJarsDownload, + remoteFilesDownload) + } + + private def downloadResourcesFromStagingServer( + maybeResourceId: Option[String], + resourceSecretLocation: File, + resourceDownloadDir: File, + downloadStartMessage: String, + downloadFinishedMessage: String, + errMessageOnSecretNotAFile: String, + errMessageOnDownloadDirNotADirectory: String): Unit = { + maybeResourceStagingServerUri.foreach { resourceStagingServerUri => + maybeResourceId.foreach { resourceId => + require(resourceSecretLocation.isFile, errMessageOnSecretNotAFile) + require(resourceDownloadDir.isDirectory, errMessageOnDownloadDirNotADirectory) + val sslOptions = securityManager.getSSLOptions("kubernetes.resourceStagingServer") + val service = retrofitClientFactory.createRetrofitClient( + resourceStagingServerUri, classOf[ResourceStagingServiceRetrofit], sslOptions) + val resourceSecret = Files.toString(resourceSecretLocation, Charsets.UTF_8) + val downloadResourceCallback = new DownloadTarGzCallback(resourceDownloadDir) + logInfo(downloadStartMessage) + service.downloadResources(resourceId, resourceSecret) + .enqueue(downloadResourceCallback) + downloadResourceCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) + logInfo(downloadFinishedMessage) + } + } + } + + private def downloadFiles( + filesCommaSeparated: Option[String], + downloadDir: File, + errMessageOnDestinationNotADirectory: String): Unit = { + if (filesCommaSeparated.isDefined) { + require(downloadDir.isDirectory, errMessageOnDestinationNotADirectory) + } + filesCommaSeparated.map(_.split(",")).toSeq.flatten.foreach { file => + fileFetcher.fetchFile(file, downloadDir) + } + } + + private def waitForFutures(futures: Future[_]*) { + futures.foreach { + ThreadUtils.awaitResult(_, Duration.create(downloadTimeoutMinutes, TimeUnit.MINUTES)) + } } } @@ -121,7 +218,13 @@ object KubernetesSparkDependencyDownloadInitContainer extends Logging { } else { new SparkConf(true) } - new KubernetesSparkDependencyDownloadInitContainer(sparkConf, RetrofitClientFactoryImpl).run() + val securityManager = new SparkSecurityManager(sparkConf) + val fileFetcher = new FileFetcherImpl(sparkConf, securityManager) + new KubernetesSparkDependencyDownloadInitContainer( + sparkConf, + RetrofitClientFactoryImpl, + fileFetcher, + securityManager).run() logInfo("Finished downloading application dependencies.") } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala index 844809dec995c..b7c6c4fb913da 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -22,6 +22,7 @@ import javax.ws.rs.core.{MediaType, StreamingOutput} import org.glassfish.jersey.media.multipart.FormDataParam +import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials /** @@ -69,7 +70,7 @@ private[spark] trait ResourceStagingService { @FormDataParam("podNamespace") podNamespace: String, @FormDataParam("resources") resources: InputStream, @FormDataParam("kubernetesCredentials") kubernetesCredentials: KubernetesCredentials) - : StagedResourceIdentifier + : SubmittedResourceIdAndSecret /** * Download an application's resources. The resources are provided as a stream, where the stream's diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala index cf6180fbf53d4..3dfa83c85e6dd 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -26,6 +26,7 @@ import com.google.common.io.{BaseEncoding, ByteStreams, Files} import scala.collection.concurrent.TrieMap import org.apache.spark.SparkException +import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -41,7 +42,7 @@ private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) podLabels: Map[String, String], podNamespace: String, resources: InputStream, - kubernetesCredentials: KubernetesCredentials): StagedResourceIdentifier = { + kubernetesCredentials: KubernetesCredentials): SubmittedResourceIdAndSecret = { val resourceId = UUID.randomUUID().toString val secretBytes = new Array[Byte](1024) SECURE_RANDOM.nextBytes(secretBytes) @@ -65,7 +66,7 @@ private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) podNamespace, resourcesTgz, kubernetesCredentials) - StagedResourceIdentifier(resourceId, resourceSecret) + SubmittedResourceIdAndSecret(resourceId, resourceSecret) } catch { case e: Throwable => if (!resourcesDir.delete()) { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala index b1a3cc0676757..e0079a372f0d9 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala @@ -20,6 +20,8 @@ import okhttp3.{RequestBody, ResponseBody} import retrofit2.Call import retrofit2.http.{Multipart, Path, Streaming} +import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret + /** * Retrofit-compatible variant of {@link ResourceStagingService}. For documentation on * how to use this service, see the aforementioned JAX-RS based interface. @@ -33,7 +35,7 @@ private[spark] trait ResourceStagingServiceRetrofit { @retrofit2.http.Part("podNamespace") podNamespace: RequestBody, @retrofit2.http.Part("resources") resources: RequestBody, @retrofit2.http.Part("kubernetesCredentials") - kubernetesCredentials: RequestBody): Call[StagedResourceIdentifier] + kubernetesCredentials: RequestBody): Call[SubmittedResourceIdAndSecret] @Streaming @retrofit2.http.GET("/api/v0/resources/{resourceId}") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 70098f1f46ac0..e2630b9918b61 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -17,9 +17,12 @@ package org.apache.spark.scheduler.cluster.kubernetes import org.apache.spark.SparkContext +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl} -private[spark] class KubernetesClusterManager extends ExternalClusterManager { +private[spark] class KubernetesClusterManager extends ExternalClusterManager with Logging { override def canCreate(masterURL: String): Boolean = masterURL.startsWith("k8s") @@ -31,7 +34,49 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager { override def createSchedulerBackend(sc: SparkContext, masterURL: String, scheduler: TaskScheduler) : SchedulerBackend = { - new KubernetesClusterSchedulerBackend(sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc) + val sparkConf = sc.getConf + val maybeConfigMap = sparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP) + val maybeConfigMapKey = sparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY) + + val maybeExecutorInitContainerSecretName = + sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET) + val maybeExecutorInitContainerSecretMount = + sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) + val executorInitContainerSecretVolumePlugin = for { + initContainerSecretName <- maybeExecutorInitContainerSecretName + initContainerSecretMountPath <- maybeExecutorInitContainerSecretMount + } yield { + new InitContainerResourceStagingServerSecretPluginImpl( + initContainerSecretName, + initContainerSecretMountPath) + } + // Only set up the bootstrap if they've provided both the config map key and the config map + // name. Note that we generally expect both to have been set from spark-submit V2, but for + // testing developers may simply run the driver JVM locally, but the config map won't be set + // then. + val bootStrap = for { + configMap <- maybeConfigMap + configMapKey <- maybeConfigMapKey + } yield { + new SparkPodInitContainerBootstrapImpl( + sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE), + sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION), + sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION), + sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT), + configMap, + configMapKey, + executorInitContainerSecretVolumePlugin) + } + if (maybeConfigMap.isEmpty) { + logWarning("The executor's init-container config map was not specified. Executors will" + + " therefore not attempt to fetch remote or submitted dependencies.") + } + if (maybeConfigMapKey.isEmpty) { + logWarning("The executor's init-container config map key was not specified. Executors will" + + " therefore not attempt to fetch remote or submitted dependencies.") + } + new KubernetesClusterSchedulerBackend( + sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc, bootStrap) } override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 669a073b1fab6..0dd875b307a6d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -20,17 +20,16 @@ import java.io.Closeable import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} -import scala.collection.JavaConverters._ -import scala.collection.mutable -import scala.concurrent.{ExecutionContext, Future} - -import io.fabric8.kubernetes.api.model._ +import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.FilenameUtils +import scala.collection.JavaConverters._ +import scala.collection.mutable +import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkEnv, SparkException} -import org.apache.spark.deploy.kubernetes.ConfigurationUtils +import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} @@ -41,7 +40,8 @@ import org.apache.spark.util.{ThreadUtils, Utils} private[spark] class KubernetesClusterSchedulerBackend( scheduler: TaskSchedulerImpl, - val sc: SparkContext) + val sc: SparkContext, + executorInitContainerBootstrap: Option[SparkPodInitContainerBootstrap]) extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { import KubernetesClusterSchedulerBackend._ @@ -52,6 +52,9 @@ private[spark] class KubernetesClusterSchedulerBackend( private val EXECUTOR_PODS_BY_IPS_LOCK = new Object private val executorPodsByIPs = new mutable.HashMap[String, Pod] // Indexed by executor IP addrs. + private val executorExtraClasspath = conf.get( + org.apache.spark.internal.config.EXECUTOR_CLASS_PATH) + private val executorJarsDownloadDir = conf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) @@ -258,13 +261,20 @@ private[spark] class KubernetesClusterSchedulerBackend( val executorCpuQuantity = new QuantityBuilder(false) .withAmount(executorCores) .build() + val executorExtraClasspathEnv = executorExtraClasspath.map { cp => + new EnvVarBuilder() + .withName(ENV_EXECUTOR_EXTRA_CLASSPATH) + .withValue(cp) + .build() + } val requiredEnv = Seq( (ENV_EXECUTOR_PORT, executorPort.toString), (ENV_DRIVER_URL, driverUrl), (ENV_EXECUTOR_CORES, executorCores), (ENV_EXECUTOR_MEMORY, executorMemoryString), (ENV_APPLICATION_ID, applicationId()), - (ENV_EXECUTOR_ID, executorId)) + (ENV_EXECUTOR_ID, executorId), + (ENV_MOUNTED_CLASSPATH, s"$executorJarsDownloadDir/*")) .map(env => new EnvVarBuilder() .withName(env._1) .withValue(env._2) @@ -317,7 +327,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .endContainer() .endSpec() - val resolvedPodBuilder = shuffleServiceConfig + val withMaybeShuffleConfigPodBuilder = shuffleServiceConfig .map { config => config.shuffleDirs.foldLeft(basePodBuilder) { (builder, dir) => builder @@ -337,9 +347,14 @@ private[spark] class KubernetesClusterSchedulerBackend( .endSpec() } }.getOrElse(basePodBuilder) + val resolvedExecutorPod = executorInitContainerBootstrap.map { bootstrap => + bootstrap.bootstrapInitContainerAndVolumes( + "executor", + withMaybeShuffleConfigPodBuilder) + }.getOrElse(withMaybeShuffleConfigPodBuilder) try { - (executorId, kubernetesClient.pods().create(resolvedPodBuilder.build())) + (executorId, kubernetesClient.pods.create(resolvedExecutorPod.build())) } catch { case throwable: Throwable => logError("Failed to allocate executor pod.", throwable) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala new file mode 100644 index 0000000000000..6db7d3ff2da53 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -0,0 +1,164 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import com.fasterxml.jackson.databind.ObjectMapper +import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder} +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.constants._ + +class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAfter { + private val OBJECT_MAPPER = new ObjectMapper() + private val INIT_CONTAINER_IMAGE = "spark-init:latest" + private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" + private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" + private val DOWNLOAD_TIMEOUT_MINUTES = 5 + private val INIT_CONTAINER_CONFIG_MAP_NAME = "spark-init-config-map" + private val INIT_CONTAINER_CONFIG_MAP_KEY = "spark-init-config-map-key" + private val ADDED_SUBMITTED_DEPENDENCY_ENV = "ADDED_SUBMITTED_DEPENDENCY" + private val ADDED_SUBMITTED_DEPENDENCY_ANNOTATION = "added-submitted-dependencies" + private val MAIN_CONTAINER_NAME = "spark-main" + private val TRUE = "true" + + private val submittedDependencyPlugin = new InitContainerResourceStagingServerSecretPlugin { + override def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder) + : PodBuilder = { + basePod.editMetadata() + .addToAnnotations(ADDED_SUBMITTED_DEPENDENCY_ANNOTATION, TRUE) + .endMetadata() + } + + override def mountResourceStagingServerSecretIntoInitContainer(container: ContainerBuilder) + : ContainerBuilder = { + container + .addNewEnv() + .withName(ADDED_SUBMITTED_DEPENDENCY_ENV) + .withValue(TRUE) + .endEnv() + } + } + + test("Running without submitted dependencies adds init-container with volume mounts.") { + val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() + val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala + assert(podAnnotations.contains(INIT_CONTAINER_ANNOTATION)) + val initContainers = OBJECT_MAPPER.readValue( + podAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) + assert(initContainers.length === 1) + val initContainer = initContainers.head + val initContainerVolumeMounts = initContainer.getVolumeMounts.asScala.map { + mount => (mount.getName, mount.getMountPath) + }.toMap + val expectedInitContainerVolumeMounts = Map( + INIT_CONTAINER_PROPERTIES_FILE_VOLUME -> INIT_CONTAINER_PROPERTIES_FILE_DIR, + INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, + INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) + assert(initContainerVolumeMounts === expectedInitContainerVolumeMounts) + assert(initContainer.getName === "spark-init") + assert(initContainer.getImage === INIT_CONTAINER_IMAGE) + assert(initContainer.getImagePullPolicy === "IfNotPresent") + assert(initContainer.getArgs.asScala === List(INIT_CONTAINER_PROPERTIES_FILE_PATH)) + } + + test("Running without submitted dependencies adds volume mounts to main container.") { + val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() + val containers = bootstrappedPod.getSpec.getContainers.asScala + val mainContainer = containers.find(_.getName === MAIN_CONTAINER_NAME) + assert(mainContainer.isDefined) + val volumeMounts = mainContainer.map(_.getVolumeMounts.asScala).toSeq.flatten.map { + mount => (mount.getName, mount.getMountPath) + }.toMap + val expectedVolumeMounts = Map( + INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, + INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) + assert(volumeMounts === expectedVolumeMounts) + } + + test("Running without submitted dependencies adds volumes to the pod") { + val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() + val podVolumes = bootstrappedPod.getSpec.getVolumes.asScala + assert(podVolumes.size === 3) + assert(podVolumes.exists { volume => + volume.getName == INIT_CONTAINER_PROPERTIES_FILE_VOLUME && + Option(volume.getConfigMap).map { configMap => + configMap.getItems.asScala.map { + keyToPath => (keyToPath.getKey, keyToPath.getPath) + }.toMap + }.contains(Map(INIT_CONTAINER_CONFIG_MAP_KEY -> INIT_CONTAINER_PROPERTIES_FILE_NAME)) + }) + assert(podVolumes.exists { volume => + volume.getName == INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME && volume.getEmptyDir != null + }) + assert(podVolumes.exists { volume => + volume.getName == INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME && volume.getEmptyDir != null + }) + } + + test("Running with submitted dependencies modifies the init container with the plugin.") { + val bootstrappedPod = bootstrapPodWithSubmittedDependencies() + val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala + assert(podAnnotations(ADDED_SUBMITTED_DEPENDENCY_ANNOTATION) === TRUE) + val initContainers = OBJECT_MAPPER.readValue( + podAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) + assert(initContainers.length === 1) + val initContainer = initContainers.head + assert(initContainer.getEnv.asScala.exists { + env => env.getName === ADDED_SUBMITTED_DEPENDENCY_ENV && env.getValue === TRUE + }) + } + + private def bootstrapPodWithoutSubmittedDependencies(): Pod = { + val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( + INIT_CONTAINER_IMAGE, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOWNLOAD_TIMEOUT_MINUTES, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY, + None) + bootstrapUnderTest.bootstrapInitContainerAndVolumes( + MAIN_CONTAINER_NAME, basePod()).build() + } + + private def bootstrapPodWithSubmittedDependencies(): Pod = { + val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( + INIT_CONTAINER_IMAGE, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOWNLOAD_TIMEOUT_MINUTES, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY, + Some(submittedDependencyPlugin)) + bootstrapUnderTest.bootstrapInitContainerAndVolumes( + MAIN_CONTAINER_NAME, basePod()).build() + } + + private def basePod(): PodBuilder = { + new PodBuilder() + .withNewMetadata() + .withName("spark-pod") + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName(MAIN_CONTAINER_NAME) + .endContainer() + .endSpec() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala new file mode 100644 index 0000000000000..473d369c8eca3 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.constants._ + +class SubmittedDependencyInitContainerVolumesPluginSuite extends SparkFunSuite { + + private val SECRET_NAME = "secret" + private val SECRET_MOUNT_PATH = "/mnt/secrets" + private val plugin = new InitContainerResourceStagingServerSecretPluginImpl( + SECRET_NAME, SECRET_MOUNT_PATH) + + test("The init container should have the secret volume mount.") { + val baseInitContainer = new ContainerBuilder().withName("container") + val configuredInitContainer = plugin.mountResourceStagingServerSecretIntoInitContainer( + baseInitContainer).build() + val volumeMounts = configuredInitContainer.getVolumeMounts.asScala + assert(volumeMounts.size === 1) + assert(volumeMounts.exists { volumeMount => + volumeMount.getName === INIT_CONTAINER_SECRET_VOLUME_NAME && + volumeMount.getMountPath === SECRET_MOUNT_PATH + }) + } + + test("The pod should have the secret volume.") { + val basePod = new PodBuilder() + .withNewMetadata().withName("pod").endMetadata() + .withNewSpec() + .addNewContainer() + .withName("container") + .endContainer() + .endSpec() + val configuredPod = plugin.addResourceStagingServerSecretVolumeToPod(basePod).build() + val volumes = configuredPod.getSpec.getVolumes.asScala + assert(volumes.size === 1) + assert(volumes.exists { volume => + volume.getName === INIT_CONTAINER_SECRET_VOLUME_NAME && + Option(volume.getSecret).map(_.getSecretName).contains(SECRET_NAME) + }) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala index e6536fbaa6941..4dc1e2e44980a 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -18,311 +18,331 @@ package org.apache.spark.deploy.kubernetes.submit.v2 import java.io.File -import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, Container, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} +import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} import io.fabric8.kubernetes.client.KubernetesClient import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} -import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq, startsWith} -import org.mockito.Mockito.when +import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations} +import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq} +import org.mockito.Mockito.{times, verify, when} import org.mockito.invocation.InvocationOnMock import org.mockito.stubbing.Answer import org.scalatest.BeforeAndAfter -import org.scalatest.mock.MockitoSugar._ import scala.collection.JavaConverters._ -import scala.reflect.ClassTag +import scala.collection.mutable import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.SparkPodInitContainerBootstrap import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v2.StagedResourceIdentifier -import org.apache.spark.util.Utils class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { - private val MAIN_CLASS = "org.apache.spark.test.Main" - private val APP_ARGS = Array[String]("arg1", "arg2") - private val MAIN_APP_RESOURCE = "local:///app/jars/spark-main.jar" - private val APP_NAME = "spark-test-app" - private val STAGING_SERVER_URI = "http://localhost:9000" + private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") + private val FILES_RESOURCE = SubmittedResourceIdAndSecret("filesId", "filesSecret") + private val SUBMITTED_RESOURCES = SubmittedResources(JARS_RESOURCE, FILES_RESOURCE) + private val BOOTSTRAPPED_POD_ANNOTATION = "bootstrapped" + private val TRUE = "true" + private val APP_NAME = "spark-test" + private val APP_ID = "spark-app-id" + private val CUSTOM_LABEL_KEY = "customLabel" + private val CUSTOM_LABEL_VALUE = "customLabelValue" + private val ALL_EXPECTED_LABELS = Map( + CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, + SPARK_APP_ID_LABEL -> APP_ID, + SPARK_APP_NAME_LABEL -> APP_NAME) + private val CUSTOM_ANNOTATION_KEY = "customAnnotation" + private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" + private val SECRET_NAME = "secret" + private val SECRET_KEY = "secret-key" + private val SECRET_DATA = "secret-data" + private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" + private val APP_ARGS = Array("3", "20") private val SPARK_JARS = Seq( - "local:///app/jars/spark-helper.jar", "file:///var/data/spark-local-helper.jar") + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") private val RESOLVED_SPARK_JARS = Seq( - "local:///app/jars/spark-helper.jar", - "file:///var/data/spark-downloaded/spark-local-helper.jar") + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///var/data/spark-jars/jar2.jar") + private val RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS = Seq( + "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") private val SPARK_FILES = Seq( - "local:///app/files/spark-file.txt", "file:///var/data/spark-local-file.txt") + "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") private val RESOLVED_SPARK_FILES = Seq( - "local:///app/files/spark-file.txt", "file:///var/data/spark-downloaded/spark-local-file.txt") - private val DRIVER_EXTRA_CLASSPATH = "/app/jars/extra-jar1.jar:/app/jars/extra-jars2.jar" - private val DRIVER_DOCKER_IMAGE_VALUE = "spark-driver:latest" - private val DRIVER_MEMORY_OVERHEARD_MB = 128L - private val DRIVER_MEMORY_MB = 512L - private val NAMESPACE = "namespace" - private val DOWNLOAD_JARS_RESOURCE_IDENTIFIER = StagedResourceIdentifier("jarsId", "jarsSecret") - private val DOWNLOAD_FILES_RESOURCE_IDENTIFIER = StagedResourceIdentifier( - "filesId", "filesSecret") - private val MOUNTED_FILES_ANNOTATION_KEY = "mountedFiles" - - private var sparkConf: SparkConf = _ - private var submissionKubernetesClientProvider: SubmissionKubernetesClientProvider = _ - private var submissionKubernetesClient: KubernetesClient = _ - private type PODS = MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] - private type RESOURCES = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ - HasMetadata, Boolean] - private var podOperations: PODS = _ - private var resourceListOperations: RESOURCES = _ - private var mountedDependencyManagerProvider: MountedDependencyManagerProvider = _ - private var mountedDependencyManager: MountedDependencyManager = _ - private var captureCreatedPodAnswer: SelfArgumentCapturingAnswer[Pod] = _ - private var captureCreatedResourcesAnswer: AllArgumentsCapturingAnswer[HasMetadata, RESOURCES] = _ + "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") + private val INIT_CONTAINER_SECRET = new SecretBuilder() + .withNewMetadata() + .withName(SECRET_NAME) + .endMetadata() + .addToData(SECRET_KEY, SECRET_DATA) + .build() + private val CONFIG_MAP_NAME = "config-map" + private val CONFIG_MAP_KEY = "config-map-key" + private val CONFIG_MAP_DATA = "config-map-data" + private val CUSTOM_JAVA_OPTION_KEY = "myappoption" + private val CUSTOM_JAVA_OPTION_VALUE = "myappoptionvalue" + private val DRIVER_JAVA_OPTIONS = s"-D$CUSTOM_JAVA_OPTION_KEY=$CUSTOM_JAVA_OPTION_VALUE" + private val DRIVER_EXTRA_CLASSPATH = "/var/data/spark-app-custom/custom-jar.jar" + private val INIT_CONTAINER_CONFIG_MAP = new ConfigMapBuilder() + .withNewMetadata() + .withName(CONFIG_MAP_NAME) + .endMetadata() + .addToData(CONFIG_MAP_KEY, CONFIG_MAP_DATA) + .build() + private val CUSTOM_DRIVER_IMAGE = "spark-custom-driver:latest" + private val DRIVER_MEMORY_MB = 512 + private val DRIVER_MEMORY_OVERHEAD_MB = 128 + private val SPARK_CONF = new SparkConf(true) + .set(DRIVER_DOCKER_IMAGE, CUSTOM_DRIVER_IMAGE) + .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB.toLong) + .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEAD_MB.toLong) + .set(KUBERNETES_DRIVER_LABELS, s"$CUSTOM_LABEL_KEY=$CUSTOM_LABEL_VALUE") + .set(KUBERNETES_DRIVER_ANNOTATIONS, s"$CUSTOM_ANNOTATION_KEY=$CUSTOM_ANNOTATION_VALUE") + .set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) + .set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, DRIVER_JAVA_OPTIONS) + private val EXECUTOR_INIT_CONF_KEY = "executor-init-conf" + private val SPARK_CONF_WITH_EXECUTOR_INIT_CONF = SPARK_CONF.clone() + .set(EXECUTOR_INIT_CONF_KEY, TRUE) + private val DRIVER_POD_UID = "driver-pod-uid" + private val DRIVER_POD_KIND = "pod" + private val DRIVER_POD_API_VERSION = "v1" + @Mock + private var initContainerConfigMapBuilder: SparkInitContainerConfigMapBuilder = _ + @Mock + private var containerLocalizedFilesResolver: ContainerLocalizedFilesResolver = _ + @Mock + private var executorInitContainerConfiguration: ExecutorInitContainerConfiguration = _ + @Mock + private var submittedDependencyUploader: SubmittedDependencyUploader = _ + @Mock + private var submittedDependenciesSecretBuilder: SubmittedDependencySecretBuilder = _ + @Mock + private var initContainerBootstrap: SparkPodInitContainerBootstrap = _ + @Mock + private var initContainerComponentsProvider: DriverInitContainerComponentsProvider = _ + @Mock + private var kubernetesClientProvider: SubmissionKubernetesClientProvider = _ + @Mock + private var kubernetesClient: KubernetesClient = _ + @Mock + private var podOps: MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ + private type ResourceListOps = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ + HasMetadata, java.lang.Boolean] + @Mock + private var resourceListOps: ResourceListOps = _ before { - sparkConf = new SparkConf(true) - .set("spark.app.name", APP_NAME) - .set("spark.master", "k8s://https://localhost:443") - .set(DRIVER_DOCKER_IMAGE, DRIVER_DOCKER_IMAGE_VALUE) - .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEARD_MB) - .set(KUBERNETES_NAMESPACE, NAMESPACE) - .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB) - submissionKubernetesClientProvider = mock[SubmissionKubernetesClientProvider] - submissionKubernetesClient = mock[KubernetesClient] - podOperations = mock[PODS] - resourceListOperations = mock[RESOURCES] - mountedDependencyManagerProvider = mock[MountedDependencyManagerProvider] - mountedDependencyManager = mock[MountedDependencyManager] - when(submissionKubernetesClientProvider.get).thenReturn(submissionKubernetesClient) - when(submissionKubernetesClient.pods()).thenReturn(podOperations) - captureCreatedPodAnswer = new SelfArgumentCapturingAnswer[Pod] - captureCreatedResourcesAnswer = new AllArgumentsCapturingAnswer[HasMetadata, RESOURCES]( - resourceListOperations) - when(podOperations.create(any())).thenAnswer(captureCreatedPodAnswer) - when(submissionKubernetesClient.resourceList(anyVararg[HasMetadata])) - .thenAnswer(captureCreatedResourcesAnswer) - } - - // Tests w/o local dependencies, or behave independently to that configuration. - test("Simple properties and environment set on the driver pod.") { - sparkConf.set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) - val createdDriverPod = createAndGetDriverPod() - val maybeDriverContainer = getDriverContainer(createdDriverPod) - maybeDriverContainer.foreach { driverContainer => - assert(driverContainer.getName === DRIVER_CONTAINER_NAME) - assert(driverContainer.getImage === DRIVER_DOCKER_IMAGE_VALUE) - assert(driverContainer.getImagePullPolicy === "IfNotPresent") - val envs = driverContainer.getEnv.asScala.map { env => - (env.getName, env.getValue) - }.toMap - assert(envs(ENV_DRIVER_MEMORY) === s"${DRIVER_MEMORY_MB + DRIVER_MEMORY_OVERHEARD_MB}m") - assert(envs(ENV_DRIVER_MAIN_CLASS) === MAIN_CLASS) - assert(envs(ENV_DRIVER_ARGS) === APP_ARGS.mkString(" ")) - assert(envs(ENV_SUBMIT_EXTRA_CLASSPATH) === DRIVER_EXTRA_CLASSPATH) - } - } - - test("Created pod should apply custom annotations and labels") { - sparkConf.set(KUBERNETES_DRIVER_LABELS, - "label1=label1value,label2=label2value") - sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, - "annotation1=annotation1value,annotation2=annotation2value") - val createdDriverPod = createAndGetDriverPod() - val labels = createdDriverPod.getMetadata.getLabels.asScala - assert(labels.size === 4) - // App ID is non-deterministic, but just check if it's set and is prefixed with the app name - val appIdLabel = labels(SPARK_APP_ID_LABEL) - assert(appIdLabel != null && appIdLabel.startsWith(APP_NAME) && appIdLabel != APP_NAME) - val appNameLabel = labels(SPARK_APP_NAME_LABEL) - assert(appNameLabel != null && appNameLabel == APP_NAME) - assert(labels("label1") === "label1value") - assert(labels("label2") === "label2value") - val annotations = createdDriverPod.getMetadata.getAnnotations.asScala - val expectedAnnotations = Map( - "annotation1" -> "annotation1value", "annotation2" -> "annotation2value") - assert(annotations === expectedAnnotations) - } - - test("Driver JVM Options should be set in the environment.") { - sparkConf.set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, "-Dopt1=opt1value") - sparkConf.set("spark.logConf", "true") - val createdDriverPod = createAndGetDriverPod() - val maybeDriverContainer = getDriverContainer(createdDriverPod) - maybeDriverContainer.foreach { driverContainer => - val maybeJvmOptionsEnv = driverContainer.getEnv - .asScala - .find(_.getName == ENV_DRIVER_JAVA_OPTS) - assert(maybeJvmOptionsEnv.isDefined) - maybeJvmOptionsEnv.foreach { jvmOptionsEnv => - val jvmOptions = jvmOptionsEnv.getValue.split(" ") - jvmOptions.foreach { opt => assert(opt.startsWith("-D")) } - val optionKeyValues = jvmOptions.map { option => - val withoutDashDPrefix = option.stripPrefix("-D") - val split = withoutDashDPrefix.split('=') - assert(split.length == 2) - (split(0), split(1)) - }.toMap - assert(optionKeyValues("opt1") === "opt1value") - assert(optionKeyValues.contains("spark.app.id")) - assert(optionKeyValues("spark.jars") === MAIN_APP_RESOURCE) - assert(optionKeyValues(KUBERNETES_DRIVER_POD_NAME.key).startsWith(APP_NAME)) - assert(optionKeyValues("spark.app.name") === APP_NAME) - assert(optionKeyValues("spark.logConf") === "true") + MockitoAnnotations.initMocks(this) + when(initContainerComponentsProvider.provideInitContainerBootstrap()) + .thenReturn(initContainerBootstrap) + when(submittedDependencyUploader.uploadJars()).thenReturn(JARS_RESOURCE) + when(submittedDependencyUploader.uploadFiles()).thenReturn(FILES_RESOURCE) + when(initContainerBootstrap + .bootstrapInitContainerAndVolumes(mockitoEq(DRIVER_CONTAINER_NAME), any())) + .thenAnswer(new Answer[PodBuilder] { + override def answer(invocationOnMock: InvocationOnMock): PodBuilder = { + invocationOnMock.getArgumentAt(1, classOf[PodBuilder]).editMetadata() + .addToAnnotations(BOOTSTRAPPED_POD_ANNOTATION, TRUE) + .endMetadata() + } + }) + when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver()) + .thenReturn(containerLocalizedFilesResolver) + when(initContainerComponentsProvider.provideExecutorInitContainerConfiguration()) + .thenReturn(executorInitContainerConfiguration) + when(submittedDependenciesSecretBuilder.build()) + .thenReturn(INIT_CONTAINER_SECRET) + when(initContainerConfigMapBuilder.build()) + .thenReturn(INIT_CONTAINER_CONFIG_MAP) + when(kubernetesClientProvider.get).thenReturn(kubernetesClient) + when(kubernetesClient.pods()).thenReturn(podOps) + when(podOps.create(any())).thenAnswer(new Answer[Pod] { + override def answer(invocation: InvocationOnMock): Pod = { + new PodBuilder(invocation.getArgumentAt(0, classOf[Pod])) + .editMetadata() + .withUid(DRIVER_POD_UID) + .endMetadata() + .withKind(DRIVER_POD_KIND) + .withApiVersion(DRIVER_POD_API_VERSION) + .build() } - } + }) + when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) + .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) + when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) + .thenReturn(RESOLVED_SPARK_JARS) + when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) + .thenReturn(RESOLVED_SPARK_FILES) + when(executorInitContainerConfiguration.configureSparkConfForExecutorInitContainer(SPARK_CONF)) + .thenReturn(SPARK_CONF_WITH_EXECUTOR_INIT_CONF) + when(kubernetesClient.resourceList(anyVararg[HasMetadata]())).thenReturn(resourceListOps) } - // Tests with local dependencies with the mounted dependency manager. - test("Uploading local dependencies should create Kubernetes secrets and config map") { - val initContainerConfigMap = getInitContainerConfigMap() - val initContainerSecret = getInitContainerSecret() - runWithMountedDependencies(initContainerConfigMap, initContainerSecret) - val driverPod = captureCreatedPodAnswer.capturedArgument - assert(captureCreatedResourcesAnswer.capturedArguments != null) - assert(captureCreatedResourcesAnswer.capturedArguments.size === 2) - assert(captureCreatedResourcesAnswer.capturedArguments.toSet === - Set(initContainerSecret, initContainerConfigMap)) - captureCreatedResourcesAnswer.capturedArguments.foreach { resource => - val driverPodOwnerReferences = resource.getMetadata.getOwnerReferences - assert(driverPodOwnerReferences.size === 1) - val driverPodOwnerReference = driverPodOwnerReferences.asScala.head - assert(driverPodOwnerReference.getName === driverPod.getMetadata.getName) - assert(driverPodOwnerReference.getApiVersion === driverPod.getApiVersion) - assert(driverPodOwnerReference.getUid === driverPod.getMetadata.getUid) - assert(driverPodOwnerReference.getKind === driverPod.getKind) - assert(driverPodOwnerReference.getController) - } + test("Run with dependency uploader") { + when(initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) + .thenReturn(Some(submittedDependencyUploader)) + when(initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets()))) + .thenReturn(Some(submittedDependenciesSecretBuilder)) + when(initContainerComponentsProvider + .provideInitContainerConfigMapBuilder(Some(SUBMITTED_RESOURCES.ids()))) + .thenReturn(initContainerConfigMapBuilder) + runAndVerifyDriverPodHasCorrectProperties() + val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) + verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) + val createdResources = resourceListArgumentCaptor.getAllValues.asScala + assert(createdResources.size === 2) + verifyCreatedResourcesHaveOwnerReferences(createdResources) + assert(createdResources.exists { + case secret: Secret => + val expectedSecretData = Map(SECRET_KEY -> SECRET_DATA) + secret.getMetadata.getName == SECRET_NAME && secret.getData.asScala == expectedSecretData + case _ => false + }) + verifyConfigMapWasCreated(createdResources) + verify(submittedDependencyUploader).uploadJars() + verify(submittedDependencyUploader).uploadFiles() + verify(initContainerComponentsProvider) + .provideInitContainerConfigMapBuilder(Some(SUBMITTED_RESOURCES.ids())) + verify(initContainerComponentsProvider) + .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets())) } - test("Uploading local resources should set classpath environment variables") { - val initContainerConfigMap = getInitContainerConfigMap() - val initContainerSecret = getInitContainerSecret() - runWithMountedDependencies(initContainerConfigMap, initContainerSecret) - val driverPod = captureCreatedPodAnswer.capturedArgument - val maybeDriverContainer = getDriverContainer(driverPod) - maybeDriverContainer.foreach { driverContainer => - val envs = driverContainer.getEnv - .asScala - .map { env => (env.getName, env.getValue) } - .toMap - val classPathEntries = envs(ENV_MOUNTED_CLASSPATH).split(File.pathSeparator).toSet - val expectedClassPathEntries = RESOLVED_SPARK_JARS - .map(Utils.resolveURI) - .map(_.getPath) - .toSet - assert(classPathEntries === expectedClassPathEntries) - } + test("Run without dependency uploader") { + when(initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) + .thenReturn(None) + when(initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder(None)) + .thenReturn(None) + when(initContainerComponentsProvider + .provideInitContainerConfigMapBuilder(None)) + .thenReturn(initContainerConfigMapBuilder) + runAndVerifyDriverPodHasCorrectProperties() + val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) + verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) + val createdResources = resourceListArgumentCaptor.getAllValues.asScala + assert(createdResources.size === 1) + verifyCreatedResourcesHaveOwnerReferences(createdResources) + verifyConfigMapWasCreated(createdResources) + verify(submittedDependencyUploader, times(0)).uploadJars() + verify(submittedDependencyUploader, times(0)).uploadFiles() + verify(initContainerComponentsProvider) + .provideInitContainerConfigMapBuilder(None) + verify(initContainerComponentsProvider) + .provideSubmittedDependenciesSecretBuilder(None) } - private def getInitContainerSecret(): Secret = { - new SecretBuilder() - .withNewMetadata().withName(s"$APP_NAME-init-container-secret").endMetadata() - .addToData( - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY, DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceSecret) - .addToData(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY, - DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceSecret) - .build() + private def verifyCreatedResourcesHaveOwnerReferences( + createdResources: mutable.Buffer[HasMetadata]): Unit = { + assert(createdResources.forall { resource => + val owners = resource.getMetadata.getOwnerReferences.asScala + owners.size === 1 && + owners.head.getController && + owners.head.getKind == DRIVER_POD_KIND && + owners.head.getUid == DRIVER_POD_UID && + owners.head.getName == APP_ID && + owners.head.getApiVersion == DRIVER_POD_API_VERSION + }) } - private def getInitContainerConfigMap(): ConfigMap = { - new ConfigMapBuilder() - .withNewMetadata().withName(s"$APP_NAME-init-container-conf").endMetadata() - .addToData("key", "configuration") - .build() + private def verifyConfigMapWasCreated(createdResources: mutable.Buffer[HasMetadata]): Unit = { + assert(createdResources.exists { + case configMap: ConfigMap => + val expectedConfigMapData = Map(CONFIG_MAP_KEY -> CONFIG_MAP_DATA) + configMap.getMetadata.getName == CONFIG_MAP_NAME && + configMap.getData.asScala == expectedConfigMapData + case _ => false + }) } - private def runWithMountedDependencies( - initContainerConfigMap: ConfigMap, initContainerSecret: Secret): Unit = { - sparkConf.set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) - .setJars(SPARK_JARS) - .set("spark.files", SPARK_FILES.mkString(",")) - val labelsMatcher = new BaseMatcher[Map[String, String]] { - override def matches(maybeLabels: scala.Any) = { - maybeLabels match { - case labels: Map[String, String] => - labels(SPARK_APP_ID_LABEL).startsWith(APP_NAME) && - labels(SPARK_APP_NAME_LABEL) == APP_NAME - case _ => false + private def runAndVerifyDriverPodHasCorrectProperties(): Unit = { + new Client( + APP_NAME, + APP_ID, + MAIN_CLASS, + SPARK_CONF, + APP_ARGS, + SPARK_JARS, + SPARK_FILES, + kubernetesClientProvider, + initContainerComponentsProvider).run() + val podMatcher = new BaseMatcher[Pod] { + override def matches(o: scala.Any): Boolean = { + o match { + case p: Pod => + Option(p) + .filter(_.getMetadata.getName == APP_ID) + .filter(podHasCorrectAnnotations) + .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) + .filter(containerHasCorrectBasicContainerConfiguration) + .filter(containerHasCorrectBasicEnvs) + .filter(containerHasCorrectMountedClasspath) + .exists(containerHasCorrectJvmOptions) + case _ => + false } } - override def describeTo(description: Description) = { - description.appendText("Checks if the labels contain the app ID and app name.") - } + override def describeTo(description: Description): Unit = {} } - when(mountedDependencyManagerProvider.getMountedDependencyManager( - startsWith(APP_NAME), - mockitoEq(STAGING_SERVER_URI), - argThat(labelsMatcher), - mockitoEq(NAMESPACE), - mockitoEq(SPARK_JARS ++ Seq(MAIN_APP_RESOURCE)), - mockitoEq(SPARK_FILES))).thenReturn(mountedDependencyManager) - when(mountedDependencyManager.uploadJars()).thenReturn(DOWNLOAD_JARS_RESOURCE_IDENTIFIER) - when(mountedDependencyManager.uploadFiles()).thenReturn(DOWNLOAD_FILES_RESOURCE_IDENTIFIER) - when(mountedDependencyManager.buildInitContainerSecret( - DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceSecret, - DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceSecret)) - .thenReturn(initContainerSecret) - when(mountedDependencyManager.buildInitContainerConfigMap( - DOWNLOAD_JARS_RESOURCE_IDENTIFIER.resourceId, DOWNLOAD_FILES_RESOURCE_IDENTIFIER.resourceId)) - .thenReturn(initContainerConfigMap) - when(mountedDependencyManager.resolveSparkJars()).thenReturn(RESOLVED_SPARK_JARS) - when(mountedDependencyManager.resolveSparkFiles()).thenReturn(RESOLVED_SPARK_FILES) - when(mountedDependencyManager.configurePodToMountLocalDependencies( - mockitoEq(DRIVER_CONTAINER_NAME), - mockitoEq(initContainerSecret), - mockitoEq(initContainerConfigMap), - any())).thenAnswer(new Answer[PodBuilder] { - override def answer(invocationOnMock: InvocationOnMock): PodBuilder = { - val basePod = invocationOnMock.getArgumentAt(3, classOf[PodBuilder]) - basePod.editMetadata().addToAnnotations(MOUNTED_FILES_ANNOTATION_KEY, "true").endMetadata() - } - }) - val clientUnderTest = createClient() - clientUnderTest.run() + verify(podOps).create(argThat(podMatcher)) } - private def getDriverContainer(driverPod: Pod): Option[Container] = { - val maybeDriverContainer = driverPod.getSpec - .getContainers - .asScala - .find(_.getName == DRIVER_CONTAINER_NAME) - assert(maybeDriverContainer.isDefined) - maybeDriverContainer + private def containerHasCorrectJvmOptions(pod: Pod): Boolean = { + val driverContainer = pod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) + envs.toMap.get(ENV_DRIVER_JAVA_OPTS).exists { javaOptions => + val splitOptions = javaOptions.split(" ") + val expectedOptions = SPARK_CONF.getAll + .filterNot(_._1 == org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS.key) + .toMap ++ + Map( + "spark.app.id" -> APP_ID, + KUBERNETES_DRIVER_POD_NAME.key -> APP_ID, + EXECUTOR_INIT_CONF_KEY -> TRUE, + CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, + "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), + "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) + splitOptions.forall(_.startsWith("-D")) && + splitOptions.map { option => + val withoutPrefix = option.substring(2) + (withoutPrefix.split("=", 2)(0), withoutPrefix.split("=", 2)(1)) + }.toMap == expectedOptions + } } - private def createAndGetDriverPod(): Pod = { - val clientUnderTest = createClient() - clientUnderTest.run() - val createdDriverPod = captureCreatedPodAnswer.capturedArgument - assert(createdDriverPod != null) - createdDriverPod + private def containerHasCorrectMountedClasspath(pod: Pod): Boolean = { + val driverContainer = pod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) + envs.toMap.get(ENV_MOUNTED_CLASSPATH).exists { classpath => + val mountedClasspathEntities = classpath.split(File.pathSeparator) + mountedClasspathEntities.toSet == RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS.toSet + } } - private def createClient(): Client = { - new Client( - MAIN_CLASS, - sparkConf, - APP_ARGS, - MAIN_APP_RESOURCE, - submissionKubernetesClientProvider, - mountedDependencyManagerProvider) + private def containerHasCorrectBasicEnvs(pod: Pod): Boolean = { + val driverContainer = pod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) + val expectedBasicEnvs = Map( + ENV_SUBMIT_EXTRA_CLASSPATH -> DRIVER_EXTRA_CLASSPATH, + ENV_DRIVER_MEMORY -> s"${DRIVER_MEMORY_MB + DRIVER_MEMORY_OVERHEAD_MB}m", + ENV_DRIVER_MAIN_CLASS -> MAIN_CLASS, + ENV_DRIVER_ARGS -> APP_ARGS.mkString(" ")) + expectedBasicEnvs.toSet.subsetOf(envs.toSet) } - private class SelfArgumentCapturingAnswer[T: ClassTag] extends Answer[T] { - var capturedArgument: T = _ - - override def answer(invocationOnMock: InvocationOnMock): T = { - val argumentClass = implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] - val argument = invocationOnMock.getArgumentAt(0, argumentClass) - this.capturedArgument = argument - argument - } + private def containerHasCorrectBasicContainerConfiguration(pod: Pod): Boolean = { + val containers = pod.getSpec.getContainers.asScala + containers.size == 1 && + containers.head.getName == DRIVER_CONTAINER_NAME && + containers.head.getImage == CUSTOM_DRIVER_IMAGE && + containers.head.getImagePullPolicy == "IfNotPresent" } - private class AllArgumentsCapturingAnswer[I, T](returnValue: T) extends Answer[T] { - var capturedArguments: Seq[I] = _ - - override def answer(invocationOnMock: InvocationOnMock): T = { - capturedArguments = invocationOnMock.getArguments.map(_.asInstanceOf[I]).toSeq - returnValue - } + private def podHasCorrectAnnotations(pod: Pod): Boolean = { + val expectedAnnotations = Map( + CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, + BOOTSTRAPPED_POD_ANNOTATION -> TRUE) + pod.getMetadata.getAnnotations.asScala == expectedAnnotations } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala new file mode 100644 index 0000000000000..6804f0010b6a5 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala @@ -0,0 +1,69 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.SparkFunSuite + +class ContainerLocalizedFilesResolverSuite extends SparkFunSuite { + private val SPARK_JARS = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", + "file:///app/jars/jar2.jar", + "local:///app/jars/jar3.jar", + "http://app/jars/jar4.jar") + private val SPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.txt", + "file:///app/files/file2.txt", + "local:///app/files/file3.txt", + "http://app/files/file4.txt") + private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" + private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" + private val localizedFilesResolver = new ContainerLocalizedFilesResolverImpl( + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH) + + test("Submitted and remote Spark jars should resolve non-local uris to download path.") { + val resolvedJars = localizedFilesResolver.resolveSubmittedAndRemoteSparkJars() + val expectedResolvedJars = Seq( + s"$JARS_DOWNLOAD_PATH/jar1.jar", + s"$JARS_DOWNLOAD_PATH/jar2.jar", + "/app/jars/jar3.jar", + s"$JARS_DOWNLOAD_PATH/jar4.jar") + assert(resolvedJars === expectedResolvedJars) + } + + test("Submitted Spark jars should resolve to the download path.") { + val resolvedJars = localizedFilesResolver.resolveSubmittedSparkJars() + val expectedResolvedJars = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", + s"$JARS_DOWNLOAD_PATH/jar2.jar", + "local:///app/jars/jar3.jar", + "http://app/jars/jar4.jar") + assert(resolvedJars === expectedResolvedJars) + } + + test("Submitted Spark files should resolve to the download path.") { + val resolvedFiles = localizedFilesResolver.resolveSubmittedSparkFiles() + val expectedResolvedFiles = Seq( + "hdfs://localhost:9000/app/files/file1.txt", + s"$FILES_DOWNLOAD_PATH/file2.txt", + "local:///app/files/file3.txt", + "http://app/files/file4.txt") + assert(resolvedFiles === expectedResolvedFiles) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala new file mode 100644 index 0000000000000..62bfd127d17e2 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ + +class ExecutorInitContainerConfigurationSuite extends SparkFunSuite { + + private val SECRET_NAME = "init-container-secret" + private val SECRET_MOUNT_DIR = "/mnt/secrets/spark" + private val CONFIG_MAP_NAME = "spark-config-map" + private val CONFIG_MAP_KEY = "spark-config-map-key" + + test("Not passing a secret name should not set the secret value.") { + val baseSparkConf = new SparkConf(false) + val configurationUnderTest = new ExecutorInitContainerConfigurationImpl( + None, + SECRET_MOUNT_DIR, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY) + val resolvedSparkConf = configurationUnderTest + .configureSparkConfForExecutorInitContainer(baseSparkConf) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP).contains(CONFIG_MAP_NAME)) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY).contains(CONFIG_MAP_KEY)) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) + .contains(SECRET_MOUNT_DIR)) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET).isEmpty) + } + + test("Passing a secret name should set the secret value.") { + val baseSparkConf = new SparkConf(false) + val configurationUnderTest = new ExecutorInitContainerConfigurationImpl( + Some(SECRET_NAME), + SECRET_MOUNT_DIR, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY) + val resolvedSparkConf = configurationUnderTest + .configureSparkConfForExecutorInitContainer(baseSparkConf) + assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET).contains(SECRET_NAME)) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala deleted file mode 100644 index 321fe1b3fd889..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/MountedDependencyManagerSuite.scala +++ /dev/null @@ -1,323 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.spark.deploy.kubernetes.submit.v2 - -import java.io.{ByteArrayOutputStream, File, StringReader} -import java.util.{Properties, UUID} - -import com.fasterxml.jackson.databind.ObjectMapper -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Container, Pod, PodBuilder, SecretBuilder} -import okhttp3.RequestBody -import okio.Okio -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer -import org.mockito.Matchers.any -import org.mockito.Mockito -import org.scalatest.BeforeAndAfter -import org.scalatest.mock.MockitoSugar._ -import retrofit2.{Call, Response} -import scala.collection.JavaConverters._ - -import org.apache.spark.{SparkFunSuite, SSLOptions} -import org.apache.spark.deploy.kubernetes.CompressionUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourceIdentifier} -import org.apache.spark.util.Utils - -private[spark] class MountedDependencyManagerSuite extends SparkFunSuite with BeforeAndAfter { - import MountedDependencyManagerSuite.createTempFile - - private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) - private val APP_ID = "app-id" - private val LABELS = Map("label1" -> "label1value", "label2" -> "label2value") - private val NAMESPACE = "namespace" - private val STAGING_SERVER_URI = "http://localhost:8000" - private val INIT_CONTAINER_IMAGE = "spark-driver-init:latest" - private val JARS_DOWNLOAD_PATH = DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.defaultValue.get - private val FILES_DOWNLOAD_PATH = DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.defaultValue.get - private val DOWNLOAD_TIMEOUT_MINUTES = 5 - private val LOCAL_JARS = Seq(createTempFile("jar"), createTempFile("jar")) - private val JARS = Seq("hdfs://localhost:9000/jars/jar1.jar", - s"file://${LOCAL_JARS.head}", - LOCAL_JARS(1)) - private val LOCAL_FILES = Seq(createTempFile("txt")) - private val FILES = Seq("hdfs://localhost:9000/files/file1.txt", - LOCAL_FILES.head) - private val TRUSTSTORE_FILE = new File(createTempFile(".jks")) - private val TRUSTSTORE_PASSWORD = "trustStorePassword" - private val TRUSTSTORE_TYPE = "jks" - private val STAGING_SERVER_SSL_OPTIONS = SSLOptions( - enabled = true, - trustStore = Some(TRUSTSTORE_FILE), - trustStorePassword = Some(TRUSTSTORE_PASSWORD), - trustStoreType = Some(TRUSTSTORE_TYPE)) - private val JARS_RESOURCE_ID = "jarsId" - private val JARS_SECRET = "jarsSecret" - private val FILES_RESOURCE_ID = "filesId" - private val FILES_SECRET = "filesSecret" - private var retrofitClientFactory: RetrofitClientFactory = _ - private var retrofitClient: ResourceStagingServiceRetrofit = _ - - private var dependencyManagerUnderTest: MountedDependencyManager = _ - - before { - retrofitClientFactory = mock[RetrofitClientFactory] - retrofitClient = mock[ResourceStagingServiceRetrofit] - Mockito.when( - retrofitClientFactory.createRetrofitClient( - STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) - .thenReturn(retrofitClient) - dependencyManagerUnderTest = new MountedDependencyManagerImpl( - APP_ID, - LABELS, - NAMESPACE, - STAGING_SERVER_URI, - INIT_CONTAINER_IMAGE, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - DOWNLOAD_TIMEOUT_MINUTES, - JARS, - FILES, - STAGING_SERVER_SSL_OPTIONS, - retrofitClientFactory) - } - - test("Uploading jars should contact the staging server with the appropriate parameters") { - val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( - StagedResourceIdentifier("resourceId", "resourceSecret")) - Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) - .thenAnswer(capturingArgumentsAnswer) - dependencyManagerUnderTest.uploadJars() - testUploadSendsCorrectFiles(LOCAL_JARS, capturingArgumentsAnswer) - } - - test("Uploading files should contact the staging server with the appropriate parameters") { - val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( - StagedResourceIdentifier("resourceId", "resourceSecret")) - Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) - .thenAnswer(capturingArgumentsAnswer) - dependencyManagerUnderTest.uploadFiles() - testUploadSendsCorrectFiles(LOCAL_FILES, capturingArgumentsAnswer) - } - - test("Init container secret should contain jars, files, and trustStore") { - val jarsSecretBase64 = BaseEncoding.base64().encode(JARS_SECRET.getBytes(Charsets.UTF_8)) - val filesSecretBase64 = BaseEncoding.base64().encode(FILES_SECRET.getBytes(Charsets.UTF_8)) - val trustStoreBase64 = BaseEncoding.base64().encode(Files.toByteArray(TRUSTSTORE_FILE)) - val secret = dependencyManagerUnderTest.buildInitContainerSecret("jarsSecret", "filesSecret") - assert(secret.getMetadata.getName === s"$APP_ID-spark-init") - val expectedSecrets = Map( - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_KEY -> jarsSecretBase64, - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_KEY -> filesSecretBase64, - INIT_CONTAINER_TRUSTSTORE_SECRET_KEY -> trustStoreBase64) - assert(secret.getData.asScala === expectedSecrets) - } - - test("Init container config map should contain parameters for downloading from staging server") { - val configMap = dependencyManagerUnderTest.buildInitContainerConfigMap( - JARS_RESOURCE_ID, FILES_RESOURCE_ID) - assert(configMap.getMetadata.getName === s"$APP_ID-init-properties") - val propertiesRawString = configMap.getData.get(INIT_CONTAINER_CONFIG_MAP_KEY) - assert(propertiesRawString != null) - val propertiesReader = new StringReader(propertiesRawString) - val properties = new Properties() - properties.load(propertiesReader) - val propertiesMap = properties.stringPropertyNames().asScala.map { prop => - (prop, properties.getProperty(prop)) - }.toMap - val expectedProperties = Map[String, String]( - RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, - DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION.key -> JARS_DOWNLOAD_PATH, - DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION.key -> FILES_DOWNLOAD_PATH, - INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_PATH, - INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_PATH, - DRIVER_MOUNT_DEPENDENCIES_INIT_TIMEOUT.key -> s"${DOWNLOAD_TIMEOUT_MINUTES}m", - RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> INIT_CONTAINER_TRUSTSTORE_PATH, - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", - RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, - RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE) - assert(propertiesMap === expectedProperties) - } - - test("Resolving jars should map local paths to their mounted counterparts") { - val resolvedJars = dependencyManagerUnderTest.resolveSparkJars() - val expectedResolvedJars = Seq( - "hdfs://localhost:9000/jars/jar1.jar", - s"file://$JARS_DOWNLOAD_PATH/${new File(JARS(1)).getName}", - s"file://$JARS_DOWNLOAD_PATH/${new File(JARS(2)).getName}") - assert(resolvedJars === expectedResolvedJars) - } - - test("Resolving files should map local paths to their mounted counterparts") { - val resolvedFiles = dependencyManagerUnderTest.resolveSparkFiles() - val expectedResolvedFiles = Seq( - "hdfs://localhost:9000/files/file1.txt", - s"file://$FILES_DOWNLOAD_PATH/${new File(FILES(1)).getName}") - assert(resolvedFiles === expectedResolvedFiles) - } - - test("Downloading init container should be added to pod") { - val driverPod = configureDriverPod() - val podAnnotations = driverPod.getMetadata.getAnnotations - assert(podAnnotations.size === 1) - val initContainerRawAnnotation = podAnnotations.get(INIT_CONTAINER_ANNOTATION) - val initContainers = OBJECT_MAPPER.readValue( - initContainerRawAnnotation, classOf[Array[Container]]) - assert(initContainers.size === 1) - val initContainer = initContainers.head - assert(initContainer.getName === "spark-driver-init") - assert(initContainer.getImage === INIT_CONTAINER_IMAGE) - assert(initContainer.getImagePullPolicy === "IfNotPresent") - val volumeMounts = initContainer.getVolumeMounts - .asScala - .map(mount => (mount.getName, mount.getMountPath)) - .toMap - val expectedVolumeMounts = Map[String, String]( - DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, - DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH, - INIT_CONTAINER_PROPERTIES_FILE_VOLUME -> INIT_CONTAINER_PROPERTIES_FILE_MOUNT_PATH, - INIT_CONTAINER_SECRETS_VOLUME_NAME -> INIT_CONTAINER_SECRETS_VOLUME_MOUNT_PATH) - assert(volumeMounts === expectedVolumeMounts) - } - - test("Driver pod should have added volumes and volume mounts for file downloads") { - val driverPod = configureDriverPod() - val volumes = driverPod.getSpec.getVolumes.asScala.map(volume => (volume.getName, volume)).toMap - val initContainerPropertiesVolume = volumes(INIT_CONTAINER_PROPERTIES_FILE_VOLUME).getConfigMap - assert(initContainerPropertiesVolume != null) - assert(initContainerPropertiesVolume.getName === "config") - assert(initContainerPropertiesVolume.getItems.asScala.exists { keyToPath => - keyToPath.getKey == INIT_CONTAINER_CONFIG_MAP_KEY && - keyToPath.getPath == INIT_CONTAINER_PROPERTIES_FILE_NAME - }) - val jarsVolume = volumes(DOWNLOAD_JARS_VOLUME_NAME) - assert(jarsVolume.getEmptyDir != null) - val filesVolume = volumes(DOWNLOAD_FILES_VOLUME_NAME) - assert(filesVolume.getEmptyDir != null) - val initContainerSecretVolume = volumes(INIT_CONTAINER_SECRETS_VOLUME_NAME) - assert(initContainerSecretVolume.getSecret != null) - assert(initContainerSecretVolume.getSecret.getSecretName === "secret") - val driverContainer = driverPod.getSpec - .getContainers - .asScala - .find(_.getName == "driver-container").get - val driverContainerVolumeMounts = driverContainer.getVolumeMounts - .asScala - .map(mount => (mount.getName, mount.getMountPath)) - .toMap - val expectedVolumeMountNamesAndPaths = Map[String, String]( - DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, - DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) - assert(driverContainerVolumeMounts === expectedVolumeMountNamesAndPaths) - val envs = driverContainer.getEnv - assert(envs.size() === 1) - assert(envs.asScala.head.getName === ENV_UPLOADED_JARS_DIR) - assert(envs.asScala.head.getValue === JARS_DOWNLOAD_PATH) - } - - private def configureDriverPod(): Pod = { - val initContainerSecret = new SecretBuilder() - .withNewMetadata().withName("secret").endMetadata() - .addToData("datakey", "datavalue") - .build() - val initContainerConfigMap = new ConfigMapBuilder() - .withNewMetadata().withName("config").endMetadata() - .addToData("datakey", "datavalue") - .build() - val basePod = new PodBuilder() - .withNewMetadata() - .withName("driver-pod") - .endMetadata() - .withNewSpec() - .addNewContainer() - .withName("driver-container") - .withImage("spark-driver:latest") - .endContainer() - .endSpec() - val adjustedPod = dependencyManagerUnderTest.configurePodToMountLocalDependencies( - "driver-container", - initContainerSecret, - initContainerConfigMap, - basePod).build() - adjustedPod - } - - private def testUploadSendsCorrectFiles( - expectedFiles: Seq[String], - capturingArgumentsAnswer: UploadDependenciesArgumentsCapturingAnswer) = { - val requestLabelsBytes = requestBodyBytes(capturingArgumentsAnswer.podLabelsArg) - val requestLabelsString = new String(requestLabelsBytes, Charsets.UTF_8) - val requestLabelsMap = OBJECT_MAPPER.readValue( - requestLabelsString, classOf[Map[String, String]]) - assert(requestLabelsMap === LABELS) - val requestNamespaceBytes = requestBodyBytes(capturingArgumentsAnswer.podNamespaceArg) - val requestNamespaceString = new String(requestNamespaceBytes, Charsets.UTF_8) - assert(requestNamespaceString === NAMESPACE) - val localJarsTarStream = new ByteArrayOutputStream() - CompressionUtils.writeTarGzipToStream(localJarsTarStream, expectedFiles) - val requestResourceBytes = requestBodyBytes(capturingArgumentsAnswer.podResourcesArg) - assert(requestResourceBytes.sameElements(localJarsTarStream.toByteArray)) - } - - private def requestBodyBytes(requestBody: RequestBody): Array[Byte] = { - Utils.tryWithResource(new ByteArrayOutputStream()) { outputStream => - Utils.tryWithResource(Okio.sink(outputStream)) { sink => - Utils.tryWithResource(Okio.buffer(sink)) { bufferedSink => - requestBody.writeTo(bufferedSink) - } - } - outputStream.toByteArray - } - } -} - -private class UploadDependenciesArgumentsCapturingAnswer(returnValue: StagedResourceIdentifier) - extends Answer[Call[StagedResourceIdentifier]] { - - var podLabelsArg: RequestBody = _ - var podNamespaceArg: RequestBody = _ - var podResourcesArg: RequestBody = _ - var kubernetesCredentialsArg: RequestBody = _ - - override def answer(invocationOnMock: InvocationOnMock): Call[StagedResourceIdentifier] = { - podLabelsArg = invocationOnMock.getArgumentAt(0, classOf[RequestBody]) - podNamespaceArg = invocationOnMock.getArgumentAt(1, classOf[RequestBody]) - podResourcesArg = invocationOnMock.getArgumentAt(2, classOf[RequestBody]) - kubernetesCredentialsArg = invocationOnMock.getArgumentAt(3, classOf[RequestBody]) - val responseCall = mock[Call[StagedResourceIdentifier]] - Mockito.when(responseCall.execute()).thenReturn(Response.success(returnValue)) - responseCall - } -} - -private object MountedDependencyManagerSuite { - def createTempFile(extension: String): String = { - val dir = Utils.createTempDir() - val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") - Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) - file.getAbsolutePath - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala new file mode 100644 index 0000000000000..7c6fbf5ce6da2 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala @@ -0,0 +1,101 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.StringReader +import java.util.Properties + +import com.google.common.collect.Maps +import org.mockito.Mockito.{verify, when} +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.config._ + +class SparkInitContainerConfigMapBuilderSuite extends SparkFunSuite with BeforeAndAfter { + + private val JARS = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", + "file:///app/jars/jar2.jar", + "http://localhost:9000/app/jars/jar3.jar", + "local:///app/jars/jar4.jar") + private val FILES = Seq( + "hdfs://localhost:9000/app/files/file1.txt", + "file:///app/files/file2.txt", + "http://localhost:9000/app/files/file3.txt", + "local:///app/files/file4.txt") + private val JARS_DOWNLOAD_PATH = "/var/data/jars" + private val FILES_DOWNLOAD_PATH = "/var/data/files" + private val CONFIG_MAP_NAME = "config-map" + private val CONFIG_MAP_KEY = "config-map-key" + + test("Config map without submitted dependencies sets remote download configurations") { + val configMap = new SparkInitContainerConfigMapBuilderImpl( + JARS, + FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY, + None).build() + assert(configMap.getMetadata.getName === CONFIG_MAP_NAME) + val maybeConfigValue = configMap.getData.asScala.get(CONFIG_MAP_KEY) + assert(maybeConfigValue.isDefined) + maybeConfigValue.foreach { configValue => + val propertiesStringReader = new StringReader(configValue) + val properties = new Properties() + properties.load(propertiesStringReader) + val propertiesMap = Maps.fromProperties(properties).asScala + val remoteJarsString = propertiesMap.get(INIT_CONTAINER_REMOTE_JARS.key) + assert(remoteJarsString.isDefined) + val remoteJars = remoteJarsString.map(_.split(",")).toSet.flatten + assert(remoteJars === + Set("hdfs://localhost:9000/app/jars/jar1.jar", "http://localhost:9000/app/jars/jar3.jar")) + val remoteFilesString = propertiesMap.get(INIT_CONTAINER_REMOTE_FILES.key) + assert(remoteFilesString.isDefined) + val remoteFiles = remoteFilesString.map(_.split(",")).toSet.flatten + assert(remoteFiles === + Set("hdfs://localhost:9000/app/files/file1.txt", + "http://localhost:9000/app/files/file3.txt")) + assert(propertiesMap(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION.key) === JARS_DOWNLOAD_PATH) + assert(propertiesMap(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key) === FILES_DOWNLOAD_PATH) + } + } + + test("Config map with submitted dependencies adds configurations from plugin") { + val submittedDependenciesPlugin = mock[SubmittedDependencyInitContainerConfigPlugin] + when(submittedDependenciesPlugin.configurationsToFetchSubmittedDependencies()) + .thenReturn(Map("customConf" -> "customConfValue")) + val configMap = new SparkInitContainerConfigMapBuilderImpl( + JARS, + FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY, + Some(submittedDependenciesPlugin)).build() + val configValue = configMap.getData.asScala(CONFIG_MAP_KEY) + val propertiesStringReader = new StringReader(configValue) + val properties = new Properties() + properties.load(propertiesStringReader) + val propertiesMap = Maps.fromProperties(properties).asScala + assert(propertiesMap("customConf") === "customConfValue") + verify(submittedDependenciesPlugin).configurationsToFetchSubmittedDependencies() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala new file mode 100644 index 0000000000000..11a671085c201 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.config._ + +class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { + private val STAGING_SERVER_URI = "http://localhost:9000" + private val JARS_RESOURCE_ID = "jars-id" + private val FILES_RESOURCE_ID = "files-id" + private val JARS_SECRET_KEY = "jars" + private val FILES_SECRET_KEY = "files" + private val TRUSTSTORE_SECRET_KEY = "trustStore" + private val SECRETS_VOLUME_MOUNT_PATH = "/var/data/" + private val TRUSTSTORE_PASSWORD = "trustStore" + private val TRUSTSTORE_FILE = "/mnt/secrets/trustStore.jks" + private val TRUSTSTORE_TYPE = "jks" + private val RESOURCE_STAGING_SERVICE_SSL_OPTIONS = SSLOptions( + enabled = true, + trustStore = Some(new File(TRUSTSTORE_FILE)), + trustStorePassword = Some(TRUSTSTORE_PASSWORD), + trustStoreType = Some(TRUSTSTORE_TYPE)) + + test("Plugin should provide configuration for fetching uploaded dependencies") { + val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( + STAGING_SERVER_URI, + JARS_RESOURCE_ID, + FILES_RESOURCE_ID, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + SECRETS_VOLUME_MOUNT_PATH, + SSLOptions()) + val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() + val expectedConfigurations = Map( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$SECRETS_VOLUME_MOUNT_PATH/$JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$SECRETS_VOLUME_MOUNT_PATH/$FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "false") + assert(addedConfigurations === expectedConfigurations) + } + + test("Plugin should set up SSL with the appropriate trustStore if it's provided.") { + val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( + STAGING_SERVER_URI, + JARS_RESOURCE_ID, + FILES_RESOURCE_ID, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + SECRETS_VOLUME_MOUNT_PATH, + RESOURCE_STAGING_SERVICE_SSL_OPTIONS) + val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() + val expectedSslConfigurations = Map( + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> + s"$SECRETS_VOLUME_MOUNT_PATH/$TRUSTSTORE_SECRET_KEY", + RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE) + assert(expectedSslConfigurations.toSet.subsetOf(addedConfigurations.toSet)) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala new file mode 100644 index 0000000000000..189d87e27a28a --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.Secret +import scala.collection.JavaConverters._ +import scala.collection.Map + +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.util.Utils + +class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { + + private val SECRET_NAME = "submitted-dependency-secret" + private val JARS_SECRET = "jars-secret" + private val FILES_SECRET = "files-secret" + private val JARS_SECRET_KEY = "jars-secret-key" + private val FILES_SECRET_KEY = "files-secret-key" + private val TRUSTSTORE_SECRET_KEY = "truststore-secret-key" + private val TRUSTSTORE_STRING_CONTENTS = "trustStore-contents" + + test("Building the secret without a trustStore") { + val builder = new SubmittedDependencySecretBuilderImpl( + SECRET_NAME, + JARS_SECRET, + FILES_SECRET, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + SSLOptions()) + val secret = builder.build() + assert(secret.getMetadata.getName === SECRET_NAME) + val secretDecodedData = decodeSecretData(secret) + val expectedSecretData = Map(JARS_SECRET_KEY -> JARS_SECRET, FILES_SECRET_KEY -> FILES_SECRET) + assert(secretDecodedData === expectedSecretData) + } + + private def decodeSecretData(secret: Secret): Map[String, String] = { + val secretData = secret.getData.asScala + secretData.mapValues(encoded => + new String(BaseEncoding.base64().decode(encoded), Charsets.UTF_8)) + } + + test("Building the secret with a trustStore") { + val tempTrustStoreDir = Utils.createTempDir(namePrefix = "temp-truststores") + try { + val trustStoreFile = new File(tempTrustStoreDir, "trustStore.jks") + Files.write(TRUSTSTORE_STRING_CONTENTS, trustStoreFile, Charsets.UTF_8) + val builder = new SubmittedDependencySecretBuilderImpl( + SECRET_NAME, + JARS_SECRET, + FILES_SECRET, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + SSLOptions(trustStore = Some(trustStoreFile))) + val secret = builder.build() + val secretDecodedData = decodeSecretData(secret) + assert(secretDecodedData(TRUSTSTORE_SECRET_KEY) === TRUSTSTORE_STRING_CONTENTS) + } finally { + tempTrustStoreDir.delete() + } + } + +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala new file mode 100644 index 0000000000000..7b259aa2c3a0c --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} +import java.util.UUID + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.base.Charsets +import com.google.common.io.Files +import okhttp3.RequestBody +import okio.Okio +import org.mockito.Matchers.any +import org.mockito.Mockito +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import retrofit2.{Call, Response} + +import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.util.Utils + +private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with BeforeAndAfter { + import SubmittedDependencyUploaderSuite.createTempFile + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val APP_ID = "app-id" + private val LABELS = Map("label1" -> "label1value", "label2" -> "label2value") + private val NAMESPACE = "namespace" + private val STAGING_SERVER_URI = "http://localhost:8000" + private val LOCAL_JARS = Seq(createTempFile("jar"), createTempFile("jar")) + private val JARS = Seq("hdfs://localhost:9000/jars/jar1.jar", + s"file://${LOCAL_JARS.head}", + LOCAL_JARS(1)) + private val LOCAL_FILES = Seq(createTempFile("txt")) + private val FILES = Seq("hdfs://localhost:9000/files/file1.txt", + LOCAL_FILES.head) + private val TRUSTSTORE_FILE = new File(createTempFile(".jks")) + private val TRUSTSTORE_PASSWORD = "trustStorePassword" + private val TRUSTSTORE_TYPE = "jks" + private val STAGING_SERVER_SSL_OPTIONS = SSLOptions( + enabled = true, + trustStore = Some(TRUSTSTORE_FILE), + trustStorePassword = Some(TRUSTSTORE_PASSWORD), + trustStoreType = Some(TRUSTSTORE_TYPE)) + private var retrofitClientFactory: RetrofitClientFactory = _ + private var retrofitClient: ResourceStagingServiceRetrofit = _ + + private var dependencyUploaderUnderTest: SubmittedDependencyUploader = _ + + before { + retrofitClientFactory = mock[RetrofitClientFactory] + retrofitClient = mock[ResourceStagingServiceRetrofit] + Mockito.when( + retrofitClientFactory.createRetrofitClient( + STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) + .thenReturn(retrofitClient) + dependencyUploaderUnderTest = new SubmittedDependencyUploaderImpl( + APP_ID, + LABELS, + NAMESPACE, + STAGING_SERVER_URI, + JARS, + FILES, + STAGING_SERVER_SSL_OPTIONS, + retrofitClientFactory) + } + + test("Uploading jars should contact the staging server with the appropriate parameters") { + val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( + SubmittedResourceIdAndSecret("resourceId", "resourceSecret")) + Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) + .thenAnswer(capturingArgumentsAnswer) + dependencyUploaderUnderTest.uploadJars() + testUploadSendsCorrectFiles(LOCAL_JARS, capturingArgumentsAnswer) + } + + test("Uploading files should contact the staging server with the appropriate parameters") { + val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( + SubmittedResourceIdAndSecret("resourceId", "resourceSecret")) + Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) + .thenAnswer(capturingArgumentsAnswer) + dependencyUploaderUnderTest.uploadFiles() + testUploadSendsCorrectFiles(LOCAL_FILES, capturingArgumentsAnswer) + } + + private def testUploadSendsCorrectFiles( + expectedFiles: Seq[String], + capturingArgumentsAnswer: UploadDependenciesArgumentsCapturingAnswer) = { + val requestLabelsBytes = requestBodyBytes(capturingArgumentsAnswer.podLabelsArg) + val requestLabelsString = new String(requestLabelsBytes, Charsets.UTF_8) + val requestLabelsMap = OBJECT_MAPPER.readValue( + requestLabelsString, classOf[Map[String, String]]) + assert(requestLabelsMap === LABELS) + val requestNamespaceBytes = requestBodyBytes(capturingArgumentsAnswer.podNamespaceArg) + val requestNamespaceString = new String(requestNamespaceBytes, Charsets.UTF_8) + assert(requestNamespaceString === NAMESPACE) + + val unpackedFilesDir = Utils.createTempDir(namePrefix = "test-unpacked-files") + val compressedBytesInput = new ByteArrayInputStream( + requestBodyBytes(capturingArgumentsAnswer.podResourcesArg)) + CompressionUtils.unpackTarStreamToDirectory(compressedBytesInput, unpackedFilesDir) + val writtenFiles = unpackedFilesDir.listFiles + assert(writtenFiles.size === expectedFiles.size) + + expectedFiles.map(new File(_)).foreach { expectedFile => + val maybeWrittenFile = writtenFiles.find(_.getName == expectedFile.getName) + assert(maybeWrittenFile.isDefined) + maybeWrittenFile.foreach { writtenFile => + val writtenFileBytes = Files.toByteArray(writtenFile) + val expectedFileBytes = Files.toByteArray(expectedFile) + assert(expectedFileBytes.toSeq === writtenFileBytes.toSeq) + } + } + } + + private def requestBodyBytes(requestBody: RequestBody): Array[Byte] = { + Utils.tryWithResource(new ByteArrayOutputStream()) { outputStream => + Utils.tryWithResource(Okio.sink(outputStream)) { sink => + Utils.tryWithResource(Okio.buffer(sink)) { bufferedSink => + try { + requestBody.writeTo(bufferedSink) + } finally { + bufferedSink.flush() + } + } + } + outputStream.toByteArray + } + } +} + +private class UploadDependenciesArgumentsCapturingAnswer(returnValue: SubmittedResourceIdAndSecret) + extends Answer[Call[SubmittedResourceIdAndSecret]] { + + var podLabelsArg: RequestBody = _ + var podNamespaceArg: RequestBody = _ + var podResourcesArg: RequestBody = _ + var kubernetesCredentialsArg: RequestBody = _ + + override def answer(invocationOnMock: InvocationOnMock): Call[SubmittedResourceIdAndSecret] = { + podLabelsArg = invocationOnMock.getArgumentAt(0, classOf[RequestBody]) + podNamespaceArg = invocationOnMock.getArgumentAt(1, classOf[RequestBody]) + podResourcesArg = invocationOnMock.getArgumentAt(2, classOf[RequestBody]) + kubernetesCredentialsArg = invocationOnMock.getArgumentAt(3, classOf[RequestBody]) + val responseCall = mock[Call[SubmittedResourceIdAndSecret]] + Mockito.when(responseCall.execute()).thenReturn(Response.success(returnValue)) + responseCall + } +} + +private object SubmittedDependencyUploaderSuite { + def createTempFile(extension: String): String = { + val dir = Utils.createTempDir() + val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") + Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) + file.getAbsolutePath + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala index 77eb7f2b9f49c..6ab37185b8d07 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala @@ -24,6 +24,7 @@ import com.google.common.base.Charsets import com.google.common.io.Files import okhttp3.{MediaType, ResponseBody} import org.mockito.Matchers.any +import org.mockito.Mockito import org.mockito.Mockito.{doAnswer, when} import org.mockito.invocation.InvocationOnMock import org.mockito.stubbing.Answer @@ -31,7 +32,7 @@ import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar._ import retrofit2.{Call, Callback, Response} -import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.util.Utils @@ -55,7 +56,6 @@ class KubernetesSparkDependencyDownloadInitContainerSuite private val JARS_RESOURCE_ID = "jarsId" private val FILES_RESOURCE_ID = "filesId" - private var sparkConf: SparkConf = _ private var downloadJarsDir: File = _ private var downloadFilesDir: File = _ private var downloadJarsSecretValue: String = _ @@ -64,7 +64,7 @@ class KubernetesSparkDependencyDownloadInitContainerSuite private var filesCompressedBytes: Array[Byte] = _ private var retrofitClientFactory: RetrofitClientFactory = _ private var retrofitClient: ResourceStagingServiceRetrofit = _ - private var initContainerUnderTest: KubernetesSparkDependencyDownloadInitContainer = _ + private var fileFetcher: FileFetcher = _ override def beforeAll(): Unit = { jarsCompressedBytes = compressPathsToBytes(JARS) @@ -80,24 +80,10 @@ class KubernetesSparkDependencyDownloadInitContainerSuite downloadFilesDir = Utils.createTempDir() retrofitClientFactory = mock[RetrofitClientFactory] retrofitClient = mock[ResourceStagingServiceRetrofit] - sparkConf = new SparkConf(true) - .set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) - .set(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER, JARS_RESOURCE_ID) - .set(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION, DOWNLOAD_JARS_SECRET_LOCATION) - .set(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER, FILES_RESOURCE_ID) - .set(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION, DOWNLOAD_FILES_SECRET_LOCATION) - .set(DRIVER_LOCAL_JARS_DOWNLOAD_LOCATION, downloadJarsDir.getAbsolutePath) - .set(DRIVER_LOCAL_FILES_DOWNLOAD_LOCATION, downloadFilesDir.getAbsolutePath) - .set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) - .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, TRUSTSTORE_FILE.getAbsolutePath) - .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD, TRUSTSTORE_PASSWORD) - .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE, TRUSTSTORE_TYPE) - + fileFetcher = mock[FileFetcher] when(retrofitClientFactory.createRetrofitClient( STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) .thenReturn(retrofitClient) - initContainerUnderTest = new KubernetesSparkDependencyDownloadInitContainer( - sparkConf, retrofitClientFactory) } after { @@ -105,9 +91,15 @@ class KubernetesSparkDependencyDownloadInitContainerSuite downloadFilesDir.delete() } - test("Downloads should unpack response body streams to directories") { + test("Downloads from resource staging server should unpack response body to directories") { val downloadJarsCall = mock[Call[ResponseBody]] val downloadFilesCall = mock[Call[ResponseBody]] + val sparkConf = getSparkConfForResourceStagingServerDownloads + val initContainerUnderTest = new KubernetesSparkDependencyDownloadInitContainer( + sparkConf, + retrofitClientFactory, + fileFetcher, + securityManager = new SparkSecurityManager(sparkConf)) when(retrofitClient.downloadResources(JARS_RESOURCE_ID, downloadJarsSecretValue)) .thenReturn(downloadJarsCall) when(retrofitClient.downloadResources(FILES_RESOURCE_ID, downloadFilesSecretValue)) @@ -125,6 +117,46 @@ class KubernetesSparkDependencyDownloadInitContainerSuite initContainerUnderTest.run() checkWrittenFilesAreTheSameAsOriginal(JARS, downloadJarsDir) checkWrittenFilesAreTheSameAsOriginal(FILES, downloadFilesDir) + Mockito.verifyZeroInteractions(fileFetcher) + } + + test("Downloads from remote server should invoke the file fetcher") { + val sparkConf = getSparkConfForRemoteFileDownloads + val initContainerUnderTest = new KubernetesSparkDependencyDownloadInitContainer( + sparkConf, + retrofitClientFactory, + fileFetcher, + securityManager = new SparkSecurityManager(sparkConf)) + initContainerUnderTest.run() + Mockito.verify(fileFetcher).fetchFile("http://localhost:9000/jar1.jar", downloadJarsDir) + Mockito.verify(fileFetcher).fetchFile("hdfs://localhost:9000/jar2.jar", downloadJarsDir) + Mockito.verify(fileFetcher).fetchFile("http://localhost:9000/file.txt", downloadFilesDir) + + } + + private def getSparkConfForResourceStagingServerDownloads: SparkConf = { + new SparkConf(true) + .set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) + .set(INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER, JARS_RESOURCE_ID) + .set(INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION, DOWNLOAD_JARS_SECRET_LOCATION) + .set(INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER, FILES_RESOURCE_ID) + .set(INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION, DOWNLOAD_FILES_SECRET_LOCATION) + .set(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION, downloadJarsDir.getAbsolutePath) + .set(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION, downloadFilesDir.getAbsolutePath) + .set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, TRUSTSTORE_FILE.getAbsolutePath) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD, TRUSTSTORE_PASSWORD) + .set(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE, TRUSTSTORE_TYPE) + } + + private def getSparkConfForRemoteFileDownloads: SparkConf = { + new SparkConf(true) + .set(INIT_CONTAINER_REMOTE_JARS, + "http://localhost:9000/jar1.jar,hdfs://localhost:9000/jar2.jar") + .set(INIT_CONTAINER_REMOTE_FILES, + "http://localhost:9000/file.txt") + .set(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION, downloadJarsDir.getAbsolutePath) + .set(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION, downloadFilesDir.getAbsolutePath) } private def checkWrittenFilesAreTheSameAsOriginal( diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index 23c6751f1b3ed..c5f1c43ff7cf4 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -37,4 +37,7 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark # TODO support spark.executor.extraClassPath -CMD exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp ${SPARK_HOME}/jars/\* org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index c94893cbce410..02904c0e5fe21 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -200,6 +200,28 @@ + + maven-resources-plugin + 3.0.2 + + + copy-integration-test-http-server-dockerfile + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/dockerfiles + + + src/main/docker + true + + + + + + com.googlecode.maven-download-plugin download-maven-plugin diff --git a/resource-managers/kubernetes/integration-tests/src/main/docker/integration-test-asset-server/Dockerfile b/resource-managers/kubernetes/integration-tests/src/main/docker/integration-test-asset-server/Dockerfile new file mode 100644 index 0000000000000..e26d207cf4397 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/main/docker/integration-test-asset-server/Dockerfile @@ -0,0 +1,21 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Simple asset server that can provide the integration test jars over HTTP. +FROM trinitronx/python-simplehttpserver:travis-12 + +ADD examples/integration-tests-jars /var/www diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index ae02de7937c6a..3be4507ac105a 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -18,23 +18,19 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.UUID -import scala.collection.JavaConverters._ - -import com.google.common.collect.ImmutableList import io.fabric8.kubernetes.client.internal.readiness.Readiness import org.scalatest.{BeforeAndAfter, DoNotDiscover} import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ -import org.apache.spark._ +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.deploy.kubernetes.submit.v1.Client -import org.apache.spark.deploy.kubernetes.submit.v2.{MountedDependencyManagerProviderImpl, SubmissionKubernetesClientProviderImpl} -import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} +import org.apache.spark.deploy.kubernetes.submit.v2.Client +import org.apache.spark.launcher.SparkLauncher @DoNotDiscover private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) @@ -44,11 +40,14 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) private var kubernetesTestComponents: KubernetesTestComponents = _ private var sparkConf: SparkConf = _ private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ + private var staticAssetServerLauncher: StaticAssetServerLauncher = _ override def beforeAll(): Unit = { kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) resourceStagingServerLauncher = new ResourceStagingServerLauncher( kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) + staticAssetServerLauncher = new StaticAssetServerLauncher( + kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) } before { @@ -98,7 +97,6 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) assume(testBackend.name == MINIKUBE_TEST_BACKEND) sparkConf.setJars(Seq( - KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) runSparkPiAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) } @@ -118,6 +116,25 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) runSparkGroupByTestAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } + test("Use remote resources without the resource staging server.") { + val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() + sparkConf.setJars(Seq( + s"$assetServerUri/${KubernetesSuite.EXAMPLES_JAR_FILE.getName}", + s"$assetServerUri/${KubernetesSuite.HELPER_JAR_FILE.getName}" + )) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + + test("Mix remote resources with submitted ones.") { + launchStagingServer(SSLOptions()) + val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() + sparkConf.setJars(Seq( + KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + s"$assetServerUri/${KubernetesSuite.HELPER_JAR_FILE.getName}" + )) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) @@ -134,16 +151,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) } private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { - val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - appArgs = Array.empty[String], - mainAppResource = appResource, - kubernetesClientProvider = - new SubmissionKubernetesClientProviderImpl(sparkConf), - mountedDependencyManagerProvider = - new MountedDependencyManagerProviderImpl(sparkConf)) - client.run() + Client.run(sparkConf, appResource, KubernetesSuite.SPARK_PI_MAIN_CLASS, Array.empty[String]) val driverPod = kubernetesTestComponents.kubernetesClient .pods() .withLabel("spark-app-locator", APP_LOCATOR_LABEL) @@ -160,16 +168,11 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) } private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { - val client = new org.apache.spark.deploy.kubernetes.submit.v2.Client( + Client.run( sparkConf = sparkConf, - mainClass = KubernetesSuite.GROUP_BY_MAIN_CLASS, appArgs = Array.empty[String], - mainAppResource = appResource, - kubernetesClientProvider = - new SubmissionKubernetesClientProviderImpl(sparkConf), - mountedDependencyManagerProvider = - new MountedDependencyManagerProviderImpl(sparkConf)) - client.run() + mainClass = KubernetesSuite.GROUP_BY_MAIN_CLASS, + mainAppResource = appResource) val driverPod = kubernetesTestComponents.kubernetesClient .pods() .withLabel("spark-app-locator", APP_LOCATOR_LABEL) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala index ca549fa27d630..3a99f907d15fd 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala @@ -43,7 +43,6 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC private val PROPERTIES_FILE_NAME = "staging-server.properties" private val PROPERTIES_DIR = "/var/data/spark-staging-server" private val PROPERTIES_FILE_PATH = s"$PROPERTIES_DIR/$PROPERTIES_FILE_NAME" - private var activeResources = Seq.empty[HasMetadata] // Returns the NodePort the staging server is listening on def launchStagingServer(sslOptions: SSLOptions): Int = { @@ -146,8 +145,8 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC .endPort() .endSpec() .build() - val stagingServerPodReadyWatcher = new ReadinessWatcher[Pod] - val serviceReadyWatcher = new ReadinessWatcher[Endpoints] + val stagingServerPodReadyWatcher = new SparkReadinessWatcher[Pod] + val serviceReadyWatcher = new SparkReadinessWatcher[Endpoints] val allResources = Seq( stagingServerService, stagingServerConfigMap, @@ -159,9 +158,7 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC Utils.tryWithResource(kubernetesClient.endpoints() .withName(stagingServerService.getMetadata.getName) .watch(serviceReadyWatcher)) { _ => - activeResources = kubernetesClient.resourceList(allResources: _*) - .createOrReplace() - .asScala + kubernetesClient.resourceList(allResources: _*).createOrReplace() stagingServerPodReadyWatcher.waitUntilReady() serviceReadyWatcher.waitUntilReady() } @@ -172,25 +169,4 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC .get(0) .getNodePort } - - def tearDownStagingServer(): Unit = { - kubernetesClient.resourceList(activeResources: _*).delete() - activeResources = Seq.empty[HasMetadata] - } - - private class ReadinessWatcher[T <: HasMetadata] extends Watcher[T] { - - private val signal = SettableFuture.create[Boolean] - - override def eventReceived(action: Action, resource: T): Unit = { - if ((action == Action.MODIFIED || action == Action.ADDED) && - Readiness.isReady(resource)) { - signal.set(true) - } - } - - override def onClose(cause: KubernetesClientException): Unit = {} - - def waitUntilReady(): Boolean = signal.get(30, TimeUnit.SECONDS) - } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala new file mode 100644 index 0000000000000..20517eb2fc2a6 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import java.util.concurrent.TimeUnit + +import com.google.common.util.concurrent.SettableFuture +import io.fabric8.kubernetes.api.model.HasMetadata +import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import io.fabric8.kubernetes.client.internal.readiness.Readiness + +private[spark] class SparkReadinessWatcher[T <: HasMetadata] extends Watcher[T] { + + private val signal = SettableFuture.create[Boolean] + + override def eventReceived(action: Action, resource: T): Unit = { + if ((action == Action.MODIFIED || action == Action.ADDED) && + Readiness.isReady(resource)) { + signal.set(true) + } + } + + override def onClose(cause: KubernetesClientException): Unit = {} + + def waitUntilReady(): Boolean = signal.get(30, TimeUnit.SECONDS) +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/StaticAssetServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/StaticAssetServerLauncher.scala new file mode 100644 index 0000000000000..6b483769f5254 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/StaticAssetServerLauncher.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.integrationtest + +import io.fabric8.kubernetes.api.model.{HTTPGetActionBuilder, Pod} +import io.fabric8.kubernetes.client.KubernetesClient + +import org.apache.spark.util.Utils + +/** + * Launches a simple HTTP server which provides jars that can be downloaded by Spark applications + * in integration tests. + */ +private[spark] class StaticAssetServerLauncher(kubernetesClient: KubernetesClient) { + + // Returns the HTTP Base URI of the server. + def launchStaticAssetServer(): String = { + val readinessWatcher = new SparkReadinessWatcher[Pod] + val probePingHttpGet = new HTTPGetActionBuilder() + .withNewPort(8080) + .withScheme("HTTP") + .withPath("/") + .build() + Utils.tryWithResource(kubernetesClient + .pods() + .withName("integration-test-static-assets") + .watch(readinessWatcher)) { _ => + val pod = kubernetesClient.pods().createNew() + .withNewMetadata() + .withName("integration-test-static-assets") + .endMetadata() + .withNewSpec() + .addNewContainer() + .withName("static-asset-server-container") + .withImage("spark-integration-test-asset-server:latest") + .withImagePullPolicy("IfNotPresent") + .withNewReadinessProbe() + .withHttpGet(probePingHttpGet) + .endReadinessProbe() + .endContainer() + .endSpec() + .done() + readinessWatcher.waitUntilReady() + val podIP = kubernetesClient.pods().withName(pod.getMetadata.getName).get() + .getStatus + .getPodIP + s"http://$podIP:8080" + } + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 52b8c7d7359a6..0692cf55db848 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -34,6 +34,8 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" private val DRIVER_INIT_DOCKER_FILE = "dockerfiles/driver-init/Dockerfile" private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" + private val STATIC_ASSET_SERVER_DOCKER_FILE = + "dockerfiles/integration-test-asset-server/Dockerfile" private val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) private val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) private val dockerHost = dockerEnv.getOrElse("DOCKER_HOST", @@ -65,6 +67,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, buildImage("spark-driver-v2", DRIVER_V2_DOCKER_FILE) buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) buildImage("spark-driver-init", DRIVER_INIT_DOCKER_FILE) + buildImage("spark-integration-test-asset-server", STATIC_ASSET_SERVER_DOCKER_FILE) } private def buildImage(name: String, dockerFile: String): Unit = { From 636dbdabac7921094c4f9b6d52ff44ca4d14d627 Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Wed, 17 May 2017 16:33:55 -0700 Subject: [PATCH 106/225] Scalastyle fixes (#278) --- .../integrationtest/KubernetesTestComponents.scala | 2 +- .../deploy/kubernetes/integrationtest/ProcessUtils.scala | 6 +++--- .../spark/deploy/kubernetes/integrationtest/constants.scala | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala index 8cdacee655c05..677c0db606a47 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -95,4 +95,4 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) } -} \ No newline at end of file +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala index d0bfac3085487..4008007b72fc4 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ProcessUtils.scala @@ -26,9 +26,9 @@ import org.apache.spark.util.Utils object ProcessUtils extends Logging { /** - * executeProcess is used to run a command and return the output if it - * completes within timeout seconds. - */ + * executeProcess is used to run a command and return the output if it + * completes within timeout seconds. + */ def executeProcess(fullCommand: Array[String], timeout: Long): Seq[String] = { val pb = new ProcessBuilder().command(fullCommand: _*) pb.redirectErrorStream(true) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala index 8207198b529d2..bfded1003fc25 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/constants.scala @@ -19,4 +19,4 @@ package org.apache.spark.deploy.kubernetes.integrationtest package object constants { val MINIKUBE_TEST_BACKEND = "minikube" val GCE_TEST_BACKEND = "gce" -} \ No newline at end of file +} From 76c865d14ea1d6f2daf56149643fbc99e1db37f1 Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Thu, 18 May 2017 00:25:17 -0500 Subject: [PATCH 107/225] Exit properly when the k8s cluster is not available. (#256) * Exit properly when the k8s cluster is not available. * add jetty to k8s module dependency so we can use only rebuild the k8s module. * CR * Fixed single thread scheduler. * Fixed scalastyle check. * CR --- resource-managers/kubernetes/core/pom.xml | 1 - .../spark/deploy/kubernetes/submit/v1/Client.scala | 1 + .../submit/v1/LoggingPodStatusWatcher.scala | 13 +++++++++---- 3 files changed, 10 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 70c252009c9b4..aa429f73a5627 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -133,4 +133,3 @@ - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 0f1e7886a1ba2..8f1e356bec8ca 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -155,6 +155,7 @@ private[spark] class Client( .pods() .withName(kubernetesDriverPodName) .watch(loggingWatch)) { _ => + loggingWatch.start() val resourceCleanShutdownHook = ShutdownHookManager.addShutdownHook(() => kubernetesResourceCleaner.deleteAllRegisteredResourcesFromKubernetes(kubernetesClient)) val cleanupServiceManagerHook = ShutdownHookManager.addShutdownHook( diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala index 7be334194d9d7..537bcccaa1458 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala @@ -24,6 +24,7 @@ import io.fabric8.kubernetes.client.Watcher.Action import scala.collection.JavaConverters._ import org.apache.spark.internal.Logging +import org.apache.spark.util.ThreadUtils /** * A monitor for the running Kubernetes pod of a Spark application. Status logging occurs on @@ -40,19 +41,23 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL extends Watcher[Pod] with Logging { // start timer for periodic logging - private val scheduler = Executors.newScheduledThreadPool(1) + private val scheduler = + ThreadUtils.newDaemonSingleThreadScheduledExecutor("logging-pod-status-watcher") private val logRunnable: Runnable = new Runnable { override def run() = logShortStatus() } - if (interval > 0) { - scheduler.scheduleWithFixedDelay(logRunnable, 0, interval, TimeUnit.MILLISECONDS) - } private var pod: Option[Pod] = Option.empty private def phase: String = pod.map(_.getStatus().getPhase()).getOrElse("unknown") private def status: String = pod.map(_.getStatus().getContainerStatuses().toString()) .getOrElse("unknown") + def start(): Unit = { + if (interval > 0) { + scheduler.scheduleAtFixedRate(logRunnable, 0, interval, TimeUnit.MILLISECONDS) + } + } + override def eventReceived(action: Action, pod: Pod): Unit = { this.pod = Option(pod) action match { From a6cebcbe5eee524140af3d902261148c74746972 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 18 May 2017 14:27:31 -0700 Subject: [PATCH 108/225] Support driver pod kubernetes credentials mounting in V2 submission (#246) --- .../kubernetes/KubernetesCredentials.scala | 23 +++ .../spark/deploy/kubernetes/config.scala | 20 +- .../spark/deploy/kubernetes/constants.scala | 16 ++ ...iverPodKubernetesCredentialsProvider.scala | 11 +- .../deploy/kubernetes/submit/v1/Client.scala | 6 +- .../deploy/kubernetes/submit/v2/Client.scala | 26 ++- ...riverPodKubernetesCredentialsMounter.scala | 175 ++++++++++++++++++ ...KubernetesCredentialsMounterProvider.scala | 46 +++++ .../v2/SubmittedDependencyUploaderImpl.scala | 3 +- .../v1/KubernetesRestProtocolMessages.scala | 7 +- .../v1/KubernetesSparkRestServer.scala | 7 +- .../v2/ResourceStagingService.scala | 2 +- .../v2/ResourceStagingServiceImpl.scala | 2 +- .../DriverPodKubernetesClientProvider.scala | 16 +- .../kubernetes/KubernetesClientBuilder.scala | 97 ---------- .../kubernetes/submit/v2/ClientV2Suite.scala | 171 ++++++++++++----- ...PodKubernetesCredentialsMounterSuite.scala | 167 +++++++++++++++++ .../v2/ResourceStagingServerSuite.scala | 3 +- .../v2/ResourceStagingServiceImplSuite.scala | 2 +- .../integrationtest/KubernetesV2Suite.scala | 13 ++ 20 files changed, 632 insertions(+), 181 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesCredentials.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v1 => }/DriverPodKubernetesCredentialsProvider.scala (88%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesCredentials.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesCredentials.scala new file mode 100644 index 0000000000000..aba94e6969529 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesCredentials.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +case class KubernetesCredentials( + oauthTokenBase64: Option[String], + caCertDataBase64: Option[String], + clientKeyDataBase64: Option[String], + clientCertDataBase64: Option[String]) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index f0a39fe359227..45e5a46a26258 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -120,14 +120,20 @@ package object config extends Logging { private[spark] val KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE = ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.caCertFile") .doc("Path on the driver pod's disk containing the CA cert file to use when authenticating" + - " against Kubernetes.") + " against Kubernetes. Typically this is configured by spark-submit from mounting a" + + " secret from the submitting machine into the pod, and hence this configuration is marked" + + " as internal, but this can also be set manually to use a certificate that is mounted" + + " into the driver pod via other means.") .stringConf .createOptional private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE = ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientKeyFile") .doc("Path on the driver pod's disk containing the client key file to use when" + - " authenticating against Kubernetes.") + " authenticating against Kubernetes. Typically this is configured by spark-submit from" + + " mounting a secret from the submitting machine into the pod, and hence this" + + " configuration is marked as internal, but this can also be set manually to" + + " use a key file that is mounted into the driver pod via other means.") .internal() .stringConf .createOptional @@ -135,7 +141,10 @@ package object config extends Logging { private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE = ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientCertFile") .doc("Path on the driver pod's disk containing the client cert file to use when" + - " authenticating against Kubernetes.") + " authenticating against Kubernetes. Typically this is configured by spark-submit from" + + " mounting a secret from the submitting machine into the pod, and hence this" + + " configuration is marked as internal, but this can also be set manually to" + + " use a certificate that is mounted into the driver pod via other means.") .internal() .stringConf .createOptional @@ -143,7 +152,10 @@ package object config extends Logging { private[spark] val KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN = ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.oauthTokenFile") .doc("Path on the driver pod's disk containing the OAuth token file to use when" + - " authenticating against Kubernetes.") + " authenticating against Kubernetes. Typically this is configured by spark-submit from" + + " mounting a secret from the submitting machine into the pod, and hence this" + + " configuration is marked as internal, but this can also be set manually to" + + " use a token that is mounted into the driver pod via other means.") .internal() .stringConf .createOptional diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 4c4f7b9fc3b23..8d0965078aaa8 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -38,6 +38,22 @@ package object constants { private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" private[spark] val SUBMISSION_SSL_KEY_PEM_SECRET_NAME = "spark-submission-server-key-pem" private[spark] val SUBMISSION_SSL_CERT_PEM_SECRET_NAME = "spark-submission-server-cert-pem" + private[spark] val DRIVER_CREDENTIALS_SECRETS_BASE_DIR = + "/mnt/secrets/spark-kubernetes-credentials" + private[spark] val DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME = "ca-cert" + private[spark] val DRIVER_CREDENTIALS_CA_CERT_PATH = + s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME" + private[spark] val DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME = "client-key" + private[spark] val DRIVER_CREDENTIALS_CLIENT_KEY_PATH = + s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME" + private[spark] val DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME = "client-cert" + private[spark] val DRIVER_CREDENTIALS_CLIENT_CERT_PATH = + s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME" + private[spark] val DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME = "oauth-token" + private[spark] val DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH = + s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME" + private[spark] val DRIVER_CREDENTIALS_SECRET_VOLUME_NAME = "kubernetes-credentials" + // Default and fixed ports private[spark] val SUBMISSION_SERVER_PORT = 7077 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala similarity index 88% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala index 112226dbe3fc1..404741520c059 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverPodKubernetesCredentialsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala @@ -14,15 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v1 +package org.apache.spark.deploy.kubernetes.submit import java.io.File +import com.google.common.base.Charsets import com.google.common.io.{BaseEncoding, Files} import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.config.OptionalConfigEntry private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf) { @@ -38,7 +39,9 @@ private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf require(sparkConf.get(KUBERNETES_DRIVER_CLIENT_CERT_FILE).isEmpty, "Cannot specify both a service account and a driver pod client cert file.") } - val oauthToken = sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN) + val oauthTokenBase64 = sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).map { token => + BaseEncoding.base64().encode(token.getBytes(Charsets.UTF_8)) + } val caCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CA_CERT_FILE, s"Driver CA cert file provided at %s does not exist or is not a file.") val clientKeyDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_KEY_FILE, @@ -46,7 +49,7 @@ private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf val clientCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_CERT_FILE, s"Driver client cert file provided at %s does not exist or is not a file.") KubernetesCredentials( - oauthToken = oauthToken, + oauthTokenBase64 = oauthTokenBase64, caCertDataBase64 = caCertDataBase64, clientKeyDataBase64 = clientKeyDataBase64, clientCertDataBase64 = clientCertDataBase64) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index 8f1e356bec8ca..fa3c97c6957b5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -30,11 +30,11 @@ import org.apache.commons.codec.binary.Base64 import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} -import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesCredentials, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} +import org.apache.spark.deploy.kubernetes.submit.{DriverPodKubernetesCredentialsProvider, KubernetesFileUtils} +import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} import org.apache.spark.internal.Logging import org.apache.spark.util.{ShutdownHookManager, Utils} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index a403a91840bd6..da08e17dee85b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -48,7 +48,9 @@ private[spark] class Client( sparkJars: Seq[String], sparkFiles: Seq[String], kubernetesClientProvider: SubmissionKubernetesClientProvider, - initContainerComponentsProvider: DriverInitContainerComponentsProvider) extends Logging { + initContainerComponentsProvider: DriverInitContainerComponentsProvider, + kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider) + extends Logging { private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) .getOrElse(kubernetesAppId) @@ -133,9 +135,6 @@ private[spark] class Client( .provideInitContainerBootstrap() .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod) - val driverOwnedResources = Seq(initContainerConfigMap) ++ - maybeSubmittedDependenciesSecret.toSeq - val containerLocalizedFilesResolver = initContainerComponentsProvider .provideContainerLocalizedFilesResolver() val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() @@ -143,8 +142,15 @@ private[spark] class Client( val executorInitContainerConfiguration = initContainerComponentsProvider .provideExecutorInitContainerConfiguration() - val resolvedSparkConf = executorInitContainerConfiguration + val sparkConfWithExecutorInit = executorInitContainerConfiguration .configureSparkConfForExecutorInitContainer(sparkConf) + val credentialsMounter = kubernetesCredentialsMounterProvider + .getDriverPodKubernetesCredentialsMounter() + val credentialsSecret = credentialsMounter.createCredentialsSecret() + val podWithInitContainerAndMountedCreds = credentialsMounter.mountDriverKubernetesCredentials( + podWithInitContainer, driverContainer.getName, credentialsSecret) + val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations( + sparkConfWithExecutorInit) if (resolvedSparkJars.nonEmpty) { resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) } @@ -166,7 +172,7 @@ private[spark] class Client( val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { case (confKey, confValue) => s"-D$confKey=$confValue" }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPod = podWithInitContainer.editSpec() + val resolvedDriverPod = podWithInitContainerAndMountedCreds.editSpec() .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) .addNewEnv() .withName(ENV_MOUNTED_CLASSPATH) @@ -181,6 +187,9 @@ private[spark] class Client( .build() val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) try { + val driverOwnedResources = Seq(initContainerConfigMap) ++ + maybeSubmittedDependenciesSecret.toSeq ++ + credentialsSecret.toSeq val driverPodOwnerReference = new OwnerReferenceBuilder() .withName(createdDriverPod.getMetadata.getName) .withApiVersion(createdDriverPod.getApiVersion) @@ -261,6 +270,8 @@ private[spark] object Client { val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( sparkConf, kubernetesAppId, sparkJars, sparkFiles) val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) + val kubernetesCredentialsMounterProvider = + new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) new Client( appName, kubernetesAppId, @@ -270,6 +281,7 @@ private[spark] object Client { sparkJars, sparkFiles, kubernetesClientProvider, - initContainerComponentsProvider).run() + initContainerComponentsProvider, + kubernetesCredentialsMounterProvider).run() } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala new file mode 100644 index 0000000000000..9759669335774 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala @@ -0,0 +1,175 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import io.fabric8.kubernetes.api.model.{PodBuilder, Secret, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.KubernetesCredentials +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.internal.config.OptionalConfigEntry + +private[spark] trait DriverPodKubernetesCredentialsMounter { + + /** + * Set fields on the Spark configuration that indicate where the driver pod is + * to find its Kubernetes credentials for requesting executors. + */ + def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf + + /** + * Create the Kubernetes secret object that correspond to the driver's credentials + * that have to be created and mounted into the driver pod. The single Secret + * object contains all of the data entries for the driver pod's Kubernetes + * credentials. Returns empty if no secrets are to be mounted. + */ + def createCredentialsSecret(): Option[Secret] + + /** + * Mount any Kubernetes credentials from the submitting machine's disk into the driver pod. The + * secret that is passed in here should have been created from createCredentialsSecret so that + * the implementation does not need to hold its state. + */ + def mountDriverKubernetesCredentials( + originalPodSpec: PodBuilder, + driverContainerName: String, + credentialsSecret: Option[Secret]): PodBuilder +} + +private[spark] class DriverPodKubernetesCredentialsMounterImpl( + kubernetesAppId: String, + submitterLocalDriverPodKubernetesCredentials: KubernetesCredentials, + maybeUserSpecifiedMountedClientKeyFile: Option[String], + maybeUserSpecifiedMountedClientCertFile: Option[String], + maybeUserSpecifiedMountedOAuthTokenFile: Option[String], + maybeUserSpecifiedMountedCaCertFile: Option[String]) + extends DriverPodKubernetesCredentialsMounter { + + override def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf = { + val resolvedMountedClientKeyFile = resolveSecretLocation( + maybeUserSpecifiedMountedClientKeyFile, + submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_PATH) + val resolvedMountedClientCertFile = resolveSecretLocation( + maybeUserSpecifiedMountedClientCertFile, + submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_PATH) + val resolvedMountedCaCertFile = resolveSecretLocation( + maybeUserSpecifiedMountedCaCertFile, + submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_PATH) + val resolvedMountedOAuthTokenFile = resolveSecretLocation( + maybeUserSpecifiedMountedOAuthTokenFile, + submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) + val sparkConfWithCredentialLocations = sparkConf.clone() + .setOption(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE, resolvedMountedCaCertFile) + .setOption(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE, resolvedMountedClientKeyFile) + .setOption(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE, resolvedMountedClientCertFile) + .setOption(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, resolvedMountedOAuthTokenFile) + sparkConfWithCredentialLocations.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => + sparkConfWithCredentialLocations.set(KUBERNETES_DRIVER_OAUTH_TOKEN, "") + } + sparkConfWithCredentialLocations.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => + sparkConfWithCredentialLocations.set(KUBERNETES_SUBMIT_OAUTH_TOKEN, "") + } + sparkConfWithCredentialLocations + } + + override def createCredentialsSecret(): Option[Secret] = { + val allSecretData = + resolveSecretData( + maybeUserSpecifiedMountedClientKeyFile, + submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME) ++ + resolveSecretData( + maybeUserSpecifiedMountedClientCertFile, + submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME) ++ + resolveSecretData( + maybeUserSpecifiedMountedCaCertFile, + submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME) ++ + resolveSecretData( + maybeUserSpecifiedMountedOAuthTokenFile, + submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME) + if (allSecretData.isEmpty) { + None + } else { + Some(new SecretBuilder() + .withNewMetadata().withName(s"$kubernetesAppId-kubernetes-credentials").endMetadata() + .withData(allSecretData.asJava) + .build()) + } + } + + override def mountDriverKubernetesCredentials( + originalPodSpec: PodBuilder, + driverContainerName: String, + credentialsSecret: Option[Secret]): PodBuilder = { + credentialsSecret.map { secret => + originalPodSpec.editSpec() + .addNewVolume() + .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + .withNewSecret().withSecretName(secret.getMetadata.getName).endSecret() + .endVolume() + .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainerName)) + .addNewVolumeMount() + .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + .withMountPath(DRIVER_CREDENTIALS_SECRETS_BASE_DIR) + .endVolumeMount() + .endContainer() + .endSpec() + }.getOrElse(originalPodSpec) + } + + private def resolveSecretLocation( + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + mountedCanonicalLocation: String): Option[String] = { + mountedUserSpecified.orElse(valueMountedFromSubmitter.map( _ => { + mountedCanonicalLocation + })) + } + + private def resolveSecretData( + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + secretName: String): Map[String, String] = { + mountedUserSpecified.map { _ => Map.empty[String, String]} + .getOrElse { + valueMountedFromSubmitter.map { valueBase64 => + Map(secretName -> valueBase64) + }.getOrElse(Map.empty[String, String]) + } + } + + private implicit def augmentSparkConf(sparkConf: SparkConf): OptionSettableSparkConf = { + new OptionSettableSparkConf(sparkConf) + } +} + +private class OptionSettableSparkConf(sparkConf: SparkConf) { + def setOption[T](configEntry: OptionalConfigEntry[T], option: Option[T]): SparkConf = { + option.map( opt => { + sparkConf.set(configEntry, opt) + }).getOrElse(sparkConf) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala new file mode 100644 index 0000000000000..e981c54d23a9d --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.DriverPodKubernetesCredentialsProvider + +private[spark] trait DriverPodKubernetesCredentialsMounterProvider { + + def getDriverPodKubernetesCredentialsMounter() + : DriverPodKubernetesCredentialsMounter +} + +private[spark] class DriverPodKubernetesCredentialsMounterProviderImpl( + sparkConf: SparkConf, + kubernetesAppId: String) + extends DriverPodKubernetesCredentialsMounterProvider { + + override def getDriverPodKubernetesCredentialsMounter() + : DriverPodKubernetesCredentialsMounter = { + val submitterLocalDriverPodKubernetesCredentials = + new DriverPodKubernetesCredentialsProvider(sparkConf).get() + new DriverPodKubernetesCredentialsMounterImpl( + kubernetesAppId, + submitterLocalDriverPodKubernetesCredentials, + sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE), + sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE), + sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN), + sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE)) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala index f22759d463cb7..5f98facfb691f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala @@ -25,9 +25,8 @@ import okhttp3.RequestBody import retrofit2.Call import org.apache.spark.{SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala index cd1f9dcdf5879..bdd4a85da8f85 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala @@ -19,15 +19,10 @@ package org.apache.spark.deploy.rest.kubernetes.v1 import com.fasterxml.jackson.annotation.{JsonIgnore, JsonSubTypes, JsonTypeInfo} import org.apache.spark.SPARK_VERSION +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.rest.{SubmitRestProtocolRequest, SubmitRestProtocolResponse} import org.apache.spark.util.Utils -case class KubernetesCredentials( - oauthToken: Option[String], - caCertDataBase64: Option[String], - clientKeyDataBase64: Option[String], - clientCertDataBase64: Option[String]) - case class KubernetesCreateSubmissionRequest( appResource: AppResource, mainClass: String, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index 7847ba2546594..52ca3ef956a79 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -33,7 +33,7 @@ import scala.collection.mutable.ArrayBuffer import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils import org.apache.spark.deploy.rest._ @@ -306,7 +306,10 @@ private[spark] class KubernetesSparkRestServer( + resolvedDirectory.getAbsolutePath) } val oauthTokenFile = writeRawStringCredentialAndGetConf("oauth-token.txt", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, kubernetesCredentials.oauthToken) + KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, + kubernetesCredentials.oauthTokenBase64.map { base64 => + new String(BaseEncoding.base64().decode(base64), Charsets.UTF_8) + }) val caCertFile = writeBase64CredentialAndGetConf("ca.crt", resolvedDirectory, KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE, kubernetesCredentials.caCertDataBase64) val clientKeyFile = writeBase64CredentialAndGetConf("key.key", resolvedDirectory, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala index b7c6c4fb913da..5dbe55b72bd8b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala @@ -22,8 +22,8 @@ import javax.ws.rs.core.{MediaType, StreamingOutput} import org.glassfish.jersey.media.multipart.FormDataParam +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials /** * Service that receives application data that can be retrieved later on. This is primarily used diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala index 3dfa83c85e6dd..34c3192ae6780 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala @@ -26,8 +26,8 @@ import com.google.common.io.{BaseEncoding, ByteStreams, Files} import scala.collection.concurrent.TrieMap import org.apache.spark.SparkException +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials import org.apache.spark.internal.Logging import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala index b8c2b0c91bbeb..50f2c218c22c4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala @@ -21,10 +21,13 @@ import java.io.File import com.google.common.base.Charsets import com.google.common.io.Files import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} +import io.fabric8.kubernetes.client.utils.HttpClientUtils +import okhttp3.Dispatcher import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.util.ThreadUtils private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, namespace: String) { private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) @@ -78,6 +81,17 @@ private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, nam } serviceAccountConfigBuilder } - new DefaultKubernetesClient(configBuilder.build) + // Disable the ping thread that is not daemon, in order to allow + // the driver main thread to shut down upon errors. Otherwise, the driver + // will hang indefinitely. + val config = configBuilder + .withWebsocketPingInterval(0) + .build() + val httpClient = HttpClientUtils.createHttpClient(config).newBuilder() + // Use a Dispatcher with a custom executor service that creates daemon threads. The default + // executor service used by Dispatcher creates non-daemon threads. + .dispatcher(new Dispatcher(ThreadUtils.newDaemonCachedThreadPool("spark-on-k8s"))) + .build() + new DefaultKubernetesClient(httpClient, config) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala deleted file mode 100644 index 31c6eda77d058..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClientBuilder.scala +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.scheduler.cluster.kubernetes - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.Files -import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} -import io.fabric8.kubernetes.client.utils.HttpClientUtils -import okhttp3.Dispatcher - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.util.ThreadUtils - -private[spark] class KubernetesClientBuilder(sparkConf: SparkConf, namespace: String) { - private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) - private val SERVICE_ACCOUNT_CA_CERT = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) - private val oauthTokenFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) - private val caCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) - private val clientKeyFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) - private val clientCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) - - /** - * Creates a {@link KubernetesClient}, expecting to be from within the context of a pod. When - * doing so, service account token files can be picked up from canonical locations. - */ - def buildFromWithinPod(): DefaultKubernetesClient = { - val baseClientConfigBuilder = new ConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) - .withNamespace(namespace) - - val configBuilder = oauthTokenFile - .orElse(caCertFile) - .orElse(clientKeyFile) - .orElse(clientCertFile) - .map { _ => - var mountedAuthConfigBuilder = baseClientConfigBuilder - oauthTokenFile.foreach { tokenFilePath => - val tokenFile = new File(tokenFilePath) - mountedAuthConfigBuilder = mountedAuthConfigBuilder - .withOauthToken(Files.toString(tokenFile, Charsets.UTF_8)) - } - caCertFile.foreach { caFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withCaCertFile(caFile) - } - clientKeyFile.foreach { keyFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientKeyFile(keyFile) - } - clientCertFile.foreach { certFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientCertFile(certFile) - } - mountedAuthConfigBuilder - }.getOrElse { - var serviceAccountConfigBuilder = baseClientConfigBuilder - if (SERVICE_ACCOUNT_CA_CERT.isFile) { - serviceAccountConfigBuilder = serviceAccountConfigBuilder.withCaCertFile( - SERVICE_ACCOUNT_CA_CERT.getAbsolutePath) - } - - if (SERVICE_ACCOUNT_TOKEN.isFile) { - serviceAccountConfigBuilder = serviceAccountConfigBuilder.withOauthToken( - Files.toString(SERVICE_ACCOUNT_TOKEN, Charsets.UTF_8)) - } - serviceAccountConfigBuilder - } - // Disable the ping thread that is not daemon, in order to allow - // the driver main thread to shut down upon errors. Otherwise, the driver - // will hang indefinitely. - val config = configBuilder - .withWebsocketPingInterval(0) - .build() - val httpClient = HttpClientUtils.createHttpClient(config).newBuilder() - // Use a Dispatcher with a custom executor service that creates daemon threads. The default - // executor service used by Dispatcher creates non-daemon threads. - .dispatcher(new Dispatcher(ThreadUtils.newDaemonCachedThreadPool("spark-on-k8s"))) - .build() - new DefaultKubernetesClient(httpClient, config) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala index 4dc1e2e44980a..f0282dbb6d31a 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -22,7 +22,7 @@ import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, DoneablePod import io.fabric8.kubernetes.client.KubernetesClient import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} -import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations} +import org.mockito.{AdditionalAnswers, ArgumentCaptor, Mock, MockitoAnnotations} import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq} import org.mockito.Mockito.{times, verify, when} import org.mockito.invocation.InvocationOnMock @@ -37,7 +37,6 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { - private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") private val FILES_RESOURCE = SubmittedResourceIdAndSecret("filesId", "filesSecret") private val SUBMITTED_RESOURCES = SubmittedResources(JARS_RESOURCE, FILES_RESOURCE) @@ -53,9 +52,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { SPARK_APP_NAME_LABEL -> APP_NAME) private val CUSTOM_ANNOTATION_KEY = "customAnnotation" private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" - private val SECRET_NAME = "secret" - private val SECRET_KEY = "secret-key" - private val SECRET_DATA = "secret-data" + private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" + private val INIT_CONTAINER_SECRET_DATA = Map("secret-key" -> "secret-data") private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" private val APP_ARGS = Array("3", "20") private val SPARK_JARS = Seq( @@ -70,22 +68,21 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") private val INIT_CONTAINER_SECRET = new SecretBuilder() .withNewMetadata() - .withName(SECRET_NAME) + .withName(INIT_CONTAINER_SECRET_NAME) .endMetadata() - .addToData(SECRET_KEY, SECRET_DATA) + .withData(INIT_CONTAINER_SECRET_DATA.asJava) .build() - private val CONFIG_MAP_NAME = "config-map" - private val CONFIG_MAP_KEY = "config-map-key" - private val CONFIG_MAP_DATA = "config-map-data" private val CUSTOM_JAVA_OPTION_KEY = "myappoption" private val CUSTOM_JAVA_OPTION_VALUE = "myappoptionvalue" private val DRIVER_JAVA_OPTIONS = s"-D$CUSTOM_JAVA_OPTION_KEY=$CUSTOM_JAVA_OPTION_VALUE" private val DRIVER_EXTRA_CLASSPATH = "/var/data/spark-app-custom/custom-jar.jar" + private val CONFIG_MAP_NAME = "config-map" + private val CONFIG_MAP_DATA = Map("config-map-key" -> "config-map-data") private val INIT_CONTAINER_CONFIG_MAP = new ConfigMapBuilder() .withNewMetadata() .withName(CONFIG_MAP_NAME) .endMetadata() - .addToData(CONFIG_MAP_KEY, CONFIG_MAP_DATA) + .withData(CONFIG_MAP_DATA.asJava) .build() private val CUSTOM_DRIVER_IMAGE = "spark-custom-driver:latest" private val DRIVER_MEMORY_MB = 512 @@ -104,6 +101,17 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val DRIVER_POD_UID = "driver-pod-uid" private val DRIVER_POD_KIND = "pod" private val DRIVER_POD_API_VERSION = "v1" + private val CREDENTIALS_SECRET_NAME = "credentials-secret" + private val CREDENTIALS_SECRET_DATA = Map("credentials-secret-key" -> "credentials-secret-value") + private val CREDENTIALS_SECRET = new SecretBuilder() + .withNewMetadata() + .withName(CREDENTIALS_SECRET_NAME) + .endMetadata() + .withData(CREDENTIALS_SECRET_DATA.asJava) + .build() + private val CREDENTIALS_SET_CONF = "spark.kubernetes.driverCredentials.provided" + private val CREDENTIALS_SET_ANNOTATION = "credentials-set" + @Mock private var initContainerConfigMapBuilder: SparkInitContainerConfigMapBuilder = _ @Mock @@ -128,6 +136,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { HasMetadata, java.lang.Boolean] @Mock private var resourceListOps: ResourceListOps = _ + @Mock + private var credentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider = _ + @Mock + private var credentialsMounter: DriverPodKubernetesCredentialsMounter = _ before { MockitoAnnotations.initMocks(this) @@ -174,9 +186,12 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(executorInitContainerConfiguration.configureSparkConfForExecutorInitContainer(SPARK_CONF)) .thenReturn(SPARK_CONF_WITH_EXECUTOR_INIT_CONF) when(kubernetesClient.resourceList(anyVararg[HasMetadata]())).thenReturn(resourceListOps) + when(credentialsMounterProvider.getDriverPodKubernetesCredentialsMounter()) + .thenReturn(credentialsMounter) } test("Run with dependency uploader") { + expectationsForNoMountedCredentials() when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) .thenReturn(Some(submittedDependencyUploader)) @@ -194,8 +209,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verifyCreatedResourcesHaveOwnerReferences(createdResources) assert(createdResources.exists { case secret: Secret => - val expectedSecretData = Map(SECRET_KEY -> SECRET_DATA) - secret.getMetadata.getName == SECRET_NAME && secret.getData.asScala == expectedSecretData + secret.getMetadata.getName == INIT_CONTAINER_SECRET_NAME && + secret.getData.asScala == INIT_CONTAINER_SECRET_DATA case _ => false }) verifyConfigMapWasCreated(createdResources) @@ -208,15 +223,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { } test("Run without dependency uploader") { - when(initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) - .thenReturn(None) - when(initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder(None)) - .thenReturn(None) - when(initContainerComponentsProvider - .provideInitContainerConfigMapBuilder(None)) - .thenReturn(initContainerConfigMapBuilder) + expectationsForNoMountedCredentials() + expectationsForNoDependencyUploader() runAndVerifyDriverPodHasCorrectProperties() val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) @@ -232,6 +240,65 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .provideSubmittedDependenciesSecretBuilder(None) } + test("Run with mounted credentials") { + expectationsForNoDependencyUploader() + when(credentialsMounter.createCredentialsSecret()).thenReturn(Some(CREDENTIALS_SECRET)) + when(credentialsMounter.mountDriverKubernetesCredentials( + any(), mockitoEq(DRIVER_CONTAINER_NAME), mockitoEq(Some(CREDENTIALS_SECRET)))) + .thenAnswer(new Answer[PodBuilder] { + override def answer(invocation: InvocationOnMock): PodBuilder = { + invocation.getArgumentAt(0, classOf[PodBuilder]).editMetadata() + .addToAnnotations(CREDENTIALS_SET_ANNOTATION, TRUE) + .endMetadata() + } + }) + when(credentialsMounter.setDriverPodKubernetesCredentialLocations(any())) + .thenAnswer(new Answer[SparkConf] { + override def answer(invocation: InvocationOnMock): SparkConf = { + invocation.getArgumentAt(0, classOf[SparkConf]).clone().set(CREDENTIALS_SET_CONF, TRUE) + } + }) + runAndVerifyPodMatchesPredicate { p => + Option(p) + .filter(pod => containerHasCorrectJvmOptions(pod, _(CREDENTIALS_SET_CONF) == TRUE)) + .exists { pod => + pod.getMetadata.getAnnotations.asScala(CREDENTIALS_SET_ANNOTATION) == TRUE + } + } + val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) + verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) + val createdResources = resourceListArgumentCaptor.getAllValues.asScala + assert(createdResources.size === 2) + verifyCreatedResourcesHaveOwnerReferences(createdResources) + assert(createdResources.exists { + case secret: Secret => + secret.getMetadata.getName == CREDENTIALS_SECRET_NAME && + secret.getData.asScala == CREDENTIALS_SECRET_DATA + case _ => false + }) + } + + private def expectationsForNoDependencyUploader(): Unit = { + when(initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) + .thenReturn(None) + when(initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder(None)) + .thenReturn(None) + when(initContainerComponentsProvider + .provideInitContainerConfigMapBuilder(None)) + .thenReturn(initContainerConfigMapBuilder) + } + + private def expectationsForNoMountedCredentials(): Unit = { + when(credentialsMounter.setDriverPodKubernetesCredentialLocations(any())) + .thenAnswer(AdditionalAnswers.returnsFirstArg()) + when(credentialsMounter.createCredentialsSecret()).thenReturn(None) + when(credentialsMounter.mountDriverKubernetesCredentials( + any(), mockitoEq(DRIVER_CONTAINER_NAME), mockitoEq(None))) + .thenAnswer(AdditionalAnswers.returnsFirstArg()) + } + private def verifyCreatedResourcesHaveOwnerReferences( createdResources: mutable.Buffer[HasMetadata]): Unit = { assert(createdResources.forall { resource => @@ -248,14 +315,36 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private def verifyConfigMapWasCreated(createdResources: mutable.Buffer[HasMetadata]): Unit = { assert(createdResources.exists { case configMap: ConfigMap => - val expectedConfigMapData = Map(CONFIG_MAP_KEY -> CONFIG_MAP_DATA) configMap.getMetadata.getName == CONFIG_MAP_NAME && - configMap.getData.asScala == expectedConfigMapData + configMap.getData.asScala == CONFIG_MAP_DATA case _ => false }) } private def runAndVerifyDriverPodHasCorrectProperties(): Unit = { + val expectedOptions = SPARK_CONF.getAll + .filterNot(_._1 == org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS.key) + .toMap ++ + Map( + "spark.app.id" -> APP_ID, + KUBERNETES_DRIVER_POD_NAME.key -> APP_ID, + EXECUTOR_INIT_CONF_KEY -> TRUE, + CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, + "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), + "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) + runAndVerifyPodMatchesPredicate { p => + Option(p) + .filter(_.getMetadata.getName == APP_ID) + .filter(podHasCorrectAnnotations) + .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) + .filter(containerHasCorrectBasicContainerConfiguration) + .filter(containerHasCorrectBasicEnvs) + .filter(containerHasCorrectMountedClasspath) + .exists(pod => containerHasCorrectJvmOptions(pod, _ == expectedOptions)) + } + } + + private def runAndVerifyPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { new Client( APP_NAME, APP_ID, @@ -265,49 +354,31 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { SPARK_JARS, SPARK_FILES, kubernetesClientProvider, - initContainerComponentsProvider).run() + initContainerComponentsProvider, + credentialsMounterProvider).run() val podMatcher = new BaseMatcher[Pod] { override def matches(o: scala.Any): Boolean = { o match { - case p: Pod => - Option(p) - .filter(_.getMetadata.getName == APP_ID) - .filter(podHasCorrectAnnotations) - .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) - .filter(containerHasCorrectBasicContainerConfiguration) - .filter(containerHasCorrectBasicEnvs) - .filter(containerHasCorrectMountedClasspath) - .exists(containerHasCorrectJvmOptions) - case _ => - false + case p: Pod => pred(p) + case _ => false } } - override def describeTo(description: Description): Unit = {} } verify(podOps).create(argThat(podMatcher)) } - private def containerHasCorrectJvmOptions(pod: Pod): Boolean = { + private def containerHasCorrectJvmOptions( + pod: Pod, optionsCorrectnessPredicate: (Map[String, String] => Boolean)): Boolean = { val driverContainer = pod.getSpec.getContainers.asScala.head val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) envs.toMap.get(ENV_DRIVER_JAVA_OPTS).exists { javaOptions => val splitOptions = javaOptions.split(" ") - val expectedOptions = SPARK_CONF.getAll - .filterNot(_._1 == org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS.key) - .toMap ++ - Map( - "spark.app.id" -> APP_ID, - KUBERNETES_DRIVER_POD_NAME.key -> APP_ID, - EXECUTOR_INIT_CONF_KEY -> TRUE, - CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, - "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), - "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) splitOptions.forall(_.startsWith("-D")) && - splitOptions.map { option => + optionsCorrectnessPredicate(splitOptions.map { option => val withoutPrefix = option.substring(2) (withoutPrefix.split("=", 2)(0), withoutPrefix.split("=", 2)(1)) - }.toMap == expectedOptions + }.toMap) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala new file mode 100644 index 0000000000000..d4413076fb092 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala @@ -0,0 +1,167 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import io.fabric8.kubernetes.api.model.{PodBuilder, SecretBuilder} +import org.scalatest.prop.TableDrivenPropertyChecks +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.KubernetesCredentials + +class DriverPodKubernetesCredentialsMounterSuite + extends SparkFunSuite with TableDrivenPropertyChecks { + + private val CLIENT_KEY_DATA = "client-key-data" + private val CLIENT_CERT_DATA = "client-cert-data" + private val OAUTH_TOKEN_DATA = "oauth-token" + private val CA_CERT_DATA = "ca-cert-data" + private val SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS = KubernetesCredentials( + caCertDataBase64 = Some(CA_CERT_DATA), + clientKeyDataBase64 = Some(CLIENT_KEY_DATA), + clientCertDataBase64 = Some(CLIENT_CERT_DATA), + oauthTokenBase64 = Some(OAUTH_TOKEN_DATA)) + private val APP_ID = "app-id" + private val USER_SPECIFIED_CLIENT_KEY_FILE = Some("/var/data/client-key.pem") + private val USER_SPECIFIED_CLIENT_CERT_FILE = Some("/var/data/client-cert.pem") + private val USER_SPECIFIED_OAUTH_TOKEN_FILE = Some("/var/data/token.txt") + private val USER_SPECIFIED_CA_CERT_FILE = Some("/var/data/ca.pem") + + // Different configurations of credentials mounters + private val credentialsMounterWithPreMountedFiles = + new DriverPodKubernetesCredentialsMounterImpl( + kubernetesAppId = APP_ID, + submitterLocalDriverPodKubernetesCredentials = SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS, + maybeUserSpecifiedMountedClientKeyFile = USER_SPECIFIED_CLIENT_KEY_FILE, + maybeUserSpecifiedMountedClientCertFile = USER_SPECIFIED_CLIENT_CERT_FILE, + maybeUserSpecifiedMountedOAuthTokenFile = USER_SPECIFIED_OAUTH_TOKEN_FILE, + maybeUserSpecifiedMountedCaCertFile = USER_SPECIFIED_CA_CERT_FILE) + private val credentialsMounterWithoutPreMountedFiles = + new DriverPodKubernetesCredentialsMounterImpl( + kubernetesAppId = APP_ID, + submitterLocalDriverPodKubernetesCredentials = SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS, + maybeUserSpecifiedMountedClientKeyFile = None, + maybeUserSpecifiedMountedClientCertFile = None, + maybeUserSpecifiedMountedOAuthTokenFile = None, + maybeUserSpecifiedMountedCaCertFile = None) + private val credentialsMounterWithoutAnyDriverCredentials = + new DriverPodKubernetesCredentialsMounterImpl( + APP_ID, KubernetesCredentials(None, None, None, None), None, None, None, None) + + // Test matrices + private val TEST_MATRIX_EXPECTED_SPARK_CONFS = Table( + ("Credentials Mounter Implementation", + "Expected client key file", + "Expected client cert file", + "Expected CA Cert file", + "Expected OAuth Token File"), + (credentialsMounterWithoutAnyDriverCredentials, + None, + None, + None, + None), + (credentialsMounterWithoutPreMountedFiles, + Some(DRIVER_CREDENTIALS_CLIENT_KEY_PATH), + Some(DRIVER_CREDENTIALS_CLIENT_CERT_PATH), + Some(DRIVER_CREDENTIALS_CA_CERT_PATH), + Some(DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH)), + (credentialsMounterWithPreMountedFiles, + USER_SPECIFIED_CLIENT_KEY_FILE, + USER_SPECIFIED_CLIENT_CERT_FILE, + USER_SPECIFIED_CA_CERT_FILE, + USER_SPECIFIED_OAUTH_TOKEN_FILE)) + + private val TEST_MATRIX_EXPECTED_CREDENTIALS_SECRET = Table( + ("Credentials Mounter Implementation", "Expected Credentials Secret Data"), + (credentialsMounterWithoutAnyDriverCredentials, None), + (credentialsMounterWithoutPreMountedFiles, + Some(KubernetesSecretNameAndData( + data = Map[String, String]( + DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME -> CLIENT_KEY_DATA, + DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME -> CLIENT_CERT_DATA, + DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME -> CA_CERT_DATA, + DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME -> OAUTH_TOKEN_DATA + ), + name = s"$APP_ID-kubernetes-credentials"))), + (credentialsMounterWithPreMountedFiles, None)) + + test("Credentials mounter should set the driver's Kubernetes credentials locations") { + forAll(TEST_MATRIX_EXPECTED_SPARK_CONFS) { + case (credentialsMounter, + expectedClientKeyFile, + expectedClientCertFile, + expectedCaCertFile, + expectedOAuthTokenFile) => + val baseSparkConf = new SparkConf() + val resolvedSparkConf = + credentialsMounter.setDriverPodKubernetesCredentialLocations(baseSparkConf) + assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) === + expectedClientKeyFile) + assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) === + expectedClientCertFile) + assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) === + expectedCaCertFile) + assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) === + expectedOAuthTokenFile) + } + } + + test("Credentials mounter should create the correct credentials secret.") { + forAll(TEST_MATRIX_EXPECTED_CREDENTIALS_SECRET) { + case (credentialsMounter, expectedSecretNameAndData) => + val builtSecret = credentialsMounter.createCredentialsSecret() + val secretNameAndData = builtSecret.map { secret => + KubernetesSecretNameAndData(secret.getMetadata.getName, secret.getData.asScala.toMap) + } + assert(secretNameAndData === expectedSecretNameAndData) + } + } + + test("When credentials secret is provided, driver pod should mount the secret volume.") { + val credentialsSecret = new SecretBuilder() + .withNewMetadata().withName("secret").endMetadata() + .addToData("secretKey", "secretValue") + .build() + val originalPodSpec = new PodBuilder() + .withNewMetadata().withName("pod").endMetadata() + .withNewSpec() + .addNewContainer() + .withName("container") + .endContainer() + .endSpec() + val podSpecWithMountedDriverKubernetesCredentials = + credentialsMounterWithoutPreMountedFiles.mountDriverKubernetesCredentials( + originalPodSpec, "container", Some(credentialsSecret)).build() + val volumes = podSpecWithMountedDriverKubernetesCredentials.getSpec.getVolumes.asScala + assert(volumes.exists(_.getName == DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)) + volumes.find(_.getName == DRIVER_CREDENTIALS_SECRET_VOLUME_NAME).foreach { secretVolume => + assert(secretVolume.getSecret != null && secretVolume.getSecret.getSecretName == "secret") + } + } + + test("When credentials secret is absent, driver pod should not be changed.") { + val originalPodSpec = new PodBuilder() + val nonAdjustedPodSpec = + credentialsMounterWithoutAnyDriverCredentials.mountDriverKubernetesCredentials( + originalPodSpec, "driver", None) + assert(nonAdjustedPodSpec === originalPodSpec) + } +} + +private case class KubernetesSecretNameAndData(name: String, data: Map[String, String]) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index 08be8af30b3bc..4ef12e8686bb0 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -27,8 +27,7 @@ import org.scalatest.BeforeAndAfter import retrofit2.Call import org.apache.spark.{SparkFunSuite, SSLOptions} -import org.apache.spark.deploy.kubernetes.SSLUtils -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials +import org.apache.spark.deploy.kubernetes.{KubernetesCredentials, SSLUtils} import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala index 60850bb877540..9677d12681a16 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala @@ -22,7 +22,7 @@ import java.nio.file.Paths import com.google.common.io.Files import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.rest.kubernetes.v1.KubernetesCredentials +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index 3be4507ac105a..ba9d088bfcfcc 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -135,6 +135,19 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } + test("Use client key and client cert file when requesting executors") { + sparkConf.setJars(Seq( + KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, + KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, + kubernetesTestComponents.clientConfig.getClientKeyFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, + kubernetesTestComponents.clientConfig.getClientCertFile) + sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, + kubernetesTestComponents.clientConfig.getCaCertFile) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) From 2458b81a978ca70b952a32f133bab069521c1f6e Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 19 May 2017 12:18:33 -0700 Subject: [PATCH 109/225] Allow client certificate PEM for resource staging server. (#257) --- .../spark/deploy/kubernetes/config.scala | 7 +- .../deploy/kubernetes/submit/v2/Client.scala | 4 +- ...riverInitContainerComponentsProvider.scala | 7 +- .../v2/SubmittedDependencySecretBuilder.scala | 1 - .../v1/KubernetesSparkRestServer.scala | 26 +++---- .../v1/PemsToKeyStoreConverter.scala | 22 ++++-- ...SparkDependencyDownloadInitContainer.scala | 13 ++-- ...ourceStagingServerSslOptionsProvider.scala | 72 +++++++++++++++---- .../spark/deploy/kubernetes/SSLUtils.scala | 9 +-- .../kubernetes/submit/v2/SSLFilePairs.scala | 23 ++++++ ...DependencyDownloadInitContainerSuite.scala | 4 +- ...StagingServerSslOptionsProviderSuite.scala | 40 +++++++++-- .../v2/ResourceStagingServerSuite.scala | 6 +- .../integrationtest/KubernetesV1Suite.scala | 19 ++--- .../integrationtest/KubernetesV2Suite.scala | 43 +++++++---- .../ResourceStagingServerLauncher.scala | 54 +++++++++----- .../minikube/MinikubeTestBackend.scala | 2 - 17 files changed, 256 insertions(+), 96 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 45e5a46a26258..ab442131ad271 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -364,10 +364,15 @@ package object config extends Logging { private[spark] val RESOURCE_STAGING_SERVER_SSL_NAMESPACE = "kubernetes.resourceStagingServer" private[spark] val RESOURCE_STAGING_SERVER_CERT_PEM = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.serverCertPem") - .doc("Certificate PEM file to use when having the Kubernetes dependency server" + + .doc("Certificate PEM file to use when having the resource staging server" + " listen on TLS.") .stringConf .createOptional + private[spark] val RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.clientCertPem") + .doc("Certificate PEM file to use when the client contacts the resource staging server.") + .stringConf + .createOptional private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyStorePasswordFile") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index da08e17dee85b..23e3e09834372 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -25,6 +25,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServerSslOptionsProviderImpl import org.apache.spark.internal.Logging import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils @@ -267,8 +268,9 @@ private[spark] object Client { val appName = sparkConf.getOption("spark.app.name") .getOrElse("spark") val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( - sparkConf, kubernetesAppId, sparkJars, sparkFiles) + sparkConf, kubernetesAppId, sparkJars, sparkFiles, sslOptionsProvider.getSslOptions) val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) val kubernetesCredentialsMounterProvider = new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala index 5b649735f2b3d..7f6ae2ec47675 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.deploy.kubernetes.submit.v2 -import org.apache.spark.{SecurityManager, SparkConf} +import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ @@ -46,12 +46,11 @@ private[spark] class DriverInitContainerComponentsProviderImpl( sparkConf: SparkConf, kubernetesAppId: String, sparkJars: Seq[String], - sparkFiles: Seq[String]) + sparkFiles: Seq[String], + resourceStagingServerSslOptions: SSLOptions) extends DriverInitContainerComponentsProvider { private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) - private val resourceStagingServerSslOptions = new SecurityManager(sparkConf) - .getSSLOptions(RESOURCE_STAGING_SERVER_SSL_NAMESPACE) private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) private val maybeSecretName = maybeResourceStagingServerUri.map { _ => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala index 1853b2ecce6d2..b8fa43d0573f7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala @@ -22,7 +22,6 @@ import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} import scala.collection.JavaConverters._ import org.apache.spark.SSLOptions -import org.apache.spark.deploy.kubernetes.constants._ private[spark] trait SubmittedDependencySecretBuilder { /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala index 52ca3ef956a79..5cd24a8f9b75e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala @@ -414,18 +414,20 @@ private[spark] object KubernetesSparkRestServer { // If keystore password isn't set but we're using PEM files, generate a password .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) val resolvedKeyStore = parsedArguments.keyStoreFile.map(new File(_)).orElse( - parsedArguments.keyPemFile.map(keyPemFile => { - parsedArguments.certPemFile.map(certPemFile => { - PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( - new File(keyPemFile), - new File(certPemFile), - "provided-key", - keyStorePassword, - keyPassword, - parsedArguments.keyStoreType) - }) - }).getOrElse(throw new SparkException("When providing PEM files to set up TLS for the" + - " submission server, both the key and the certificate must be specified."))) + for { + keyPemFile <- parsedArguments.keyPemFile + certPemFile <- parsedArguments.certPemFile + resolvedKeyStorePassword <- keyStorePassword + resolvedKeyPassword <- keyPassword + } yield { + PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( + new File(keyPemFile), + new File(certPemFile), + "provided-key", + resolvedKeyStorePassword, + resolvedKeyPassword, + parsedArguments.keyStoreType) + }) new SSLOptions( enabled = true, keyStore = resolvedKeyStore, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala index 2c68b150baf91..178956a136d1c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala @@ -43,8 +43,8 @@ private[spark] object PemsToKeyStoreConverter { keyPemFile: File, certPemFile: File, keyAlias: String, - keyStorePassword: Option[String], - keyPassword: Option[String], + keyStorePassword: String, + keyPassword: String, keyStoreType: Option[String]): File = { require(keyPemFile.isFile, s"Key PEM file provided at ${keyPemFile.getAbsolutePath}" + " does not exist or is not a file.") @@ -58,12 +58,12 @@ private[spark] object PemsToKeyStoreConverter { keyStore.setKeyEntry( keyAlias, privateKey, - keyPassword.map(_.toCharArray).orNull, + keyPassword.toCharArray, certificates) val keyStoreDir = Utils.createTempDir("temp-keystores") val keyStoreFile = new File(keyStoreDir, s"keystore-${UUID.randomUUID()}.$resolvedKeyStoreType") Utils.tryWithResource(new FileOutputStream(keyStoreFile)) { storeStream => - keyStore.store(storeStream, keyStorePassword.map(_.toCharArray).orNull) + keyStore.store(storeStream, keyStorePassword.toCharArray) } keyStoreFile } @@ -81,6 +81,20 @@ private[spark] object PemsToKeyStoreConverter { trustStore } + def convertCertPemToTempTrustStoreFile( + certPemFile: File, + trustStorePassword: String, + trustStoreType: Option[String]): File = { + val trustStore = convertCertPemToTrustStore(certPemFile, trustStoreType) + val tempTrustStoreDir = Utils.createTempDir(namePrefix = "temp-trustStore") + val tempTrustStoreFile = new File(tempTrustStoreDir, + s"trustStore.${trustStoreType.getOrElse(KeyStore.getDefaultType)}") + Utils.tryWithResource(new FileOutputStream(tempTrustStoreFile)) { + trustStore.store(_, trustStorePassword.toCharArray) + } + tempTrustStoreFile + } + private def withPemParsedFromFile[T](pemFile: File)(f: (PEMParser => T)): T = { Utils.tryWithResource(new FileInputStream(pemFile)) { pemStream => Utils.tryWithResource(new InputStreamReader(pemStream, Charsets.UTF_8)) { pemReader => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala index 67caa176930ea..7f21087159145 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala @@ -28,7 +28,7 @@ import retrofit2.{Call, Callback, Response} import scala.concurrent.{ExecutionContext, Future} import scala.concurrent.duration.Duration -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf} +import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.CompressionUtils @@ -95,7 +95,7 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( sparkConf: SparkConf, retrofitClientFactory: RetrofitClientFactory, fileFetcher: FileFetcher, - securityManager: SparkSecurityManager) extends Logging { + resourceStagingServerSslOptions: SSLOptions) extends Logging { private implicit val downloadExecutor = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("download-executor")) @@ -177,9 +177,10 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( maybeResourceId.foreach { resourceId => require(resourceSecretLocation.isFile, errMessageOnSecretNotAFile) require(resourceDownloadDir.isDirectory, errMessageOnDownloadDirNotADirectory) - val sslOptions = securityManager.getSSLOptions("kubernetes.resourceStagingServer") val service = retrofitClientFactory.createRetrofitClient( - resourceStagingServerUri, classOf[ResourceStagingServiceRetrofit], sslOptions) + resourceStagingServerUri, + classOf[ResourceStagingServiceRetrofit], + resourceStagingServerSslOptions) val resourceSecret = Files.toString(resourceSecretLocation, Charsets.UTF_8) val downloadResourceCallback = new DownloadTarGzCallback(resourceDownloadDir) logInfo(downloadStartMessage) @@ -219,12 +220,14 @@ object KubernetesSparkDependencyDownloadInitContainer extends Logging { new SparkConf(true) } val securityManager = new SparkSecurityManager(sparkConf) + val resourceStagingServerSslOptions = + new ResourceStagingServerSslOptionsProviderImpl(sparkConf).getSslOptions val fileFetcher = new FileFetcherImpl(sparkConf, securityManager) new KubernetesSparkDependencyDownloadInitContainer( sparkConf, RetrofitClientFactoryImpl, fileFetcher, - securityManager).run() + resourceStagingServerSslOptions).run() logInfo("Finished downloading application dependencies.") } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala index 2744ed0a74616..6b88426d00e72 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala @@ -17,9 +17,11 @@ package org.apache.spark.deploy.rest.kubernetes.v2 import java.io.File +import java.security.SecureRandom import com.google.common.base.Charsets import com.google.common.io.Files +import org.apache.commons.lang3.RandomStringUtils import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.config._ @@ -32,20 +34,29 @@ private[spark] trait ResourceStagingServerSslOptionsProvider { private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: SparkConf) extends ResourceStagingServerSslOptionsProvider with Logging { + + private val SECURE_RANDOM = new SecureRandom() + def getSslOptions: SSLOptions = { val baseSslOptions = new SparkSecurityManager(sparkConf) - .getSSLOptions("kubernetes.resourceStagingServer") + .getSSLOptions(RESOURCE_STAGING_SERVER_SSL_NAMESPACE) val maybeKeyPem = sparkConf.get(RESOURCE_STAGING_SERVER_KEY_PEM) - val maybeCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CERT_PEM) + val maybeServerCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CERT_PEM) val maybeKeyStorePasswordFile = sparkConf.get(RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE) val maybeKeyPasswordFile = sparkConf.get(RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE) + val maybeClientCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM) logSslConfigurations( - baseSslOptions, maybeKeyPem, maybeCertPem, maybeKeyStorePasswordFile, maybeKeyPasswordFile) + baseSslOptions, + maybeKeyPem, + maybeServerCertPem, + maybeKeyStorePasswordFile, + maybeKeyPasswordFile, + maybeClientCertPem) requireNandDefined(baseSslOptions.keyStore, maybeKeyPem, "Shouldn't provide both key PEM and keyStore files for TLS.") - requireNandDefined(baseSslOptions.keyStore, maybeCertPem, + requireNandDefined(baseSslOptions.keyStore, maybeServerCertPem, "Shouldn't provide both certificate PEM and keyStore files for TLS.") requireNandDefined(baseSslOptions.keyStorePassword, maybeKeyStorePasswordFile, "Shouldn't provide both the keyStore password value and the keyStore password file.") @@ -53,42 +64,68 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar "Shouldn't provide both the keyStore key password value and the keyStore key password file.") requireBothOrNeitherDefined( maybeKeyPem, - maybeCertPem, + maybeServerCertPem, "When providing a certificate PEM file, the key PEM file must also be provided.", "When providing a key PEM file, the certificate PEM file must also be provided.") + requireNandDefined(baseSslOptions.trustStore, maybeClientCertPem, + "Shouldn't provide both the trustStore and a client certificate PEM file.") val resolvedKeyStorePassword = baseSslOptions.keyStorePassword .orElse(maybeKeyStorePasswordFile.map { keyStorePasswordFile => safeFileToString(keyStorePasswordFile, "KeyStore password file") }) + .orElse(maybeKeyPem.map { _ => randomPassword()}) val resolvedKeyStoreKeyPassword = baseSslOptions.keyPassword .orElse(maybeKeyPasswordFile.map { keyPasswordFile => safeFileToString(keyPasswordFile, "KeyStore key password file") }) - val resolvedKeyStore = baseSslOptions.keyStore - .orElse(maybeKeyPem.map { keyPem => + .orElse(maybeKeyPem.map { _ => randomPassword()}) + val resolvedKeyStore = baseSslOptions.keyStore.orElse { + for { + keyPem <- maybeKeyPem + certPem <- maybeServerCertPem + keyStorePassword <- resolvedKeyStorePassword + keyPassword <- resolvedKeyStoreKeyPassword + } yield { val keyPemFile = new File(keyPem) - val certPemFile = new File(maybeCertPem.get) + val certPemFile = new File(certPem) PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( keyPemFile, certPemFile, "key", - resolvedKeyStorePassword, - resolvedKeyStoreKeyPassword, + keyStorePassword, + keyPassword, baseSslOptions.keyStoreType) - }) + } + } + val resolvedTrustStorePassword = baseSslOptions.trustStorePassword + .orElse(maybeClientCertPem.map( _ => "defaultTrustStorePassword")) + val resolvedTrustStore = baseSslOptions.trustStore.orElse { + for { + clientCertPem <- maybeClientCertPem + trustStorePassword <- resolvedTrustStorePassword + } yield { + val certPemFile = new File(clientCertPem) + PemsToKeyStoreConverter.convertCertPemToTempTrustStoreFile( + certPemFile, + trustStorePassword, + baseSslOptions.trustStoreType) + } + } baseSslOptions.copy( keyStore = resolvedKeyStore, keyStorePassword = resolvedKeyStorePassword, - keyPassword = resolvedKeyStoreKeyPassword) + keyPassword = resolvedKeyStoreKeyPassword, + trustStore = resolvedTrustStore) } private def logSslConfigurations( baseSslOptions: SSLOptions, maybeKeyPem: Option[String], - maybeCertPem: Option[String], + maybeServerCertPem: Option[String], maybeKeyStorePasswordFile: Option[String], - maybeKeyPasswordFile: Option[String]) = { + maybeKeyPasswordFile: Option[String], + maybeClientCertPem: Option[String]) = { logDebug("The following SSL configurations were provided for the resource staging server:") logDebug(s"KeyStore File: ${baseSslOptions.keyStore.map(_.getAbsolutePath).getOrElse("N/A")}") logDebug("KeyStore Password: " + @@ -99,7 +136,8 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar logDebug(s"Key Password File: ${maybeKeyPasswordFile.getOrElse("N/A")}") logDebug(s"KeyStore Type: ${baseSslOptions.keyStoreType.getOrElse("N/A")}") logDebug(s"Key PEM: ${maybeKeyPem.getOrElse("N/A")}") - logDebug(s"Certificate PEM: ${maybeCertPem.getOrElse("N/A")}") + logDebug(s"Server-side certificate PEM: ${maybeServerCertPem.getOrElse("N/A")}") + logDebug(s"Client-side certificate PEM: ${maybeClientCertPem.getOrElse("N/A")}") } private def requireBothOrNeitherDefined( @@ -130,4 +168,8 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar } Files.toString(file, Charsets.UTF_8) } + + private def randomPassword(): String = { + RandomStringUtils.random(1024, 0, Integer.MAX_VALUE, false, false, null, SECURE_RANDOM) + } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala index 0cb056dcf5493..886484ffb4692 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala @@ -30,6 +30,7 @@ import org.bouncycastle.cert.jcajce.{JcaX509CertificateConverter, JcaX509v3Certi import org.bouncycastle.openssl.jcajce.JcaPEMWriter import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder +import org.apache.spark.deploy.kubernetes.submit.v2.{KeyAndCertPem, KeyStoreAndTrustStore} import org.apache.spark.util.Utils private[spark] object SSLUtils { @@ -38,7 +39,7 @@ private[spark] object SSLUtils { ipAddress: String, keyStorePassword: String, keyPassword: String, - trustStorePassword: String): (File, File) = { + trustStorePassword: String): KeyStoreAndTrustStore = { val keyPairGenerator = KeyPairGenerator.getInstance("RSA") keyPairGenerator.initialize(512) val keyPair = keyPairGenerator.generateKeyPair() @@ -60,10 +61,10 @@ private[spark] object SSLUtils { Utils.tryWithResource(new FileOutputStream(trustStoreFile)) { trustStore.store(_, trustStorePassword.toCharArray) } - (keyStoreFile, trustStoreFile) + KeyStoreAndTrustStore(keyStoreFile, trustStoreFile) } - def generateKeyCertPemPair(ipAddress: String): (File, File) = { + def generateKeyCertPemPair(ipAddress: String): KeyAndCertPem = { val keyPairGenerator = KeyPairGenerator.getInstance("RSA") keyPairGenerator.initialize(512) val keyPair = keyPairGenerator.generateKeyPair() @@ -90,7 +91,7 @@ private[spark] object SSLUtils { } } } - (keyPemFile, certPemFile) + KeyAndCertPem(keyPemFile, certPemFile) } private def generateCertificate(ipAddress: String, keyPair: KeyPair): X509Certificate = { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala new file mode 100644 index 0000000000000..3d3ff7ad7011a --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala @@ -0,0 +1,23 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.v2 + +import java.io.File + +case class KeyAndCertPem(keyPem: File, certPem: File) + +case class KeyStoreAndTrustStore(keyStore: File, trustStore: File) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala index 6ab37185b8d07..c551fbc01d060 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala @@ -99,7 +99,7 @@ class KubernetesSparkDependencyDownloadInitContainerSuite sparkConf, retrofitClientFactory, fileFetcher, - securityManager = new SparkSecurityManager(sparkConf)) + resourceStagingServerSslOptions = STAGING_SERVER_SSL_OPTIONS) when(retrofitClient.downloadResources(JARS_RESOURCE_ID, downloadJarsSecretValue)) .thenReturn(downloadJarsCall) when(retrofitClient.downloadResources(FILES_RESOURCE_ID, downloadFilesSecretValue)) @@ -126,7 +126,7 @@ class KubernetesSparkDependencyDownloadInitContainerSuite sparkConf, retrofitClientFactory, fileFetcher, - securityManager = new SparkSecurityManager(sparkConf)) + resourceStagingServerSslOptions = STAGING_SERVER_SSL_OPTIONS) initContainerUnderTest.run() Mockito.verify(fileFetcher).fetchFile("http://localhost:9000/jar1.jar", downloadJarsDir) Mockito.verify(fileFetcher).fetchFile("hdfs://localhost:9000/jar2.jar", downloadJarsDir) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala index 10aced9000bf8..c33d8beb2c397 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala @@ -66,10 +66,12 @@ class ResourceStagingServerSslOptionsProviderSuite extends SparkFunSuite with Be } test("Setting key and certificate pem files should write an appropriate keyStore.") { - val (keyPemFile, certPemFile) = SSLUtils.generateKeyCertPemPair("127.0.0.1") + val keyAndCertPem = SSLUtils.generateKeyCertPemPair("127.0.0.1") sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") - .set("spark.ssl.kubernetes.resourceStagingServer.keyPem", keyPemFile.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.serverCertPem", certPemFile.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyPem", + keyAndCertPem.keyPem.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.serverCertPem", + keyAndCertPem.certPem.getAbsolutePath) .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStorePassword") .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "keyPassword") val sslOptions = sslOptionsProvider.getSslOptions @@ -81,9 +83,37 @@ class ResourceStagingServerSslOptionsProviderSuite extends SparkFunSuite with Be keyStore.load(_, "keyStorePassword".toCharArray) } val key = keyStore.getKey("key", "keyPassword".toCharArray) - compareJcaPemObjectToFileString(key, keyPemFile) + compareJcaPemObjectToFileString(key, keyAndCertPem.keyPem) val certificate = keyStore.getCertificateChain("key")(0) - compareJcaPemObjectToFileString(certificate, certPemFile) + compareJcaPemObjectToFileString(certificate, keyAndCertPem.certPem) + } + } + + test("Setting pem files without setting passwords should use random passwords.") { + val keyAndCertPem = SSLUtils.generateKeyCertPemPair("127.0.0.1") + sparkConf.set("spark.ssl.kubernetes.resourceStagingServer.enabled", "true") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPem", + keyAndCertPem.keyPem.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.serverCertPem", + keyAndCertPem.certPem.getAbsolutePath) + val sslOptions = sslOptionsProvider.getSslOptions + assert(sslOptions.enabled, "SSL should be enabled.") + assert(sslOptions.keyStore.isDefined, "KeyStore should be defined.") + assert(sslOptions.keyStorePassword.isDefined) + assert(sslOptions.keyPassword.isDefined) + for { + keyStoreFile <- sslOptions.keyStore + keyStorePassword <- sslOptions.keyStorePassword + keyPassword <- sslOptions.keyPassword + } { + val keyStore = KeyStore.getInstance(KeyStore.getDefaultType) + Utils.tryWithResource(new FileInputStream(keyStoreFile)) { + keyStore.load(_, keyStorePassword.toCharArray) + } + val key = keyStore.getKey("key", keyPassword.toCharArray) + compareJcaPemObjectToFileString(key, keyAndCertPem.keyPem) + val certificate = keyStore.getCertificateChain("key")(0) + compareJcaPemObjectToFileString(certificate, keyAndCertPem.certPem) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala index 4ef12e8686bb0..4ffb0d4dfa887 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala @@ -57,17 +57,17 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { } test("Enable SSL on the server") { - val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( ipAddress = "127.0.0.1", keyStorePassword = "keyStore", keyPassword = "key", trustStorePassword = "trustStore") val sslOptions = SSLOptions( enabled = true, - keyStore = Some(keyStore), + keyStore = Some(keyStoreAndTrustStore.keyStore), keyStorePassword = Some("keyStore"), keyPassword = Some("key"), - trustStore = Some(trustStore), + trustStore = Some(keyStoreAndTrustStore.trustStore), trustStorePassword = Some("trustStore")) sslOptionsProvider.setOptions(sslOptions) server.start() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala index f09339a9c3e08..559cb281c7c62 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala @@ -34,7 +34,7 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube -import org.apache.spark.deploy.kubernetes.integrationtest.constants.{GCE_TEST_BACKEND, MINIKUBE_TEST_BACKEND} +import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} @@ -190,16 +190,17 @@ private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) test("Enable SSL on the driver submit server") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val (keyStoreFile, trustStoreFile) = SSLUtils.generateKeyStoreTrustStorePair( + val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( Minikube.getMinikubeIp, "changeit", "changeit", "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, s"file://${keyStoreFile.getAbsolutePath}") + sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, + s"file://${keyStoreAndTrustStore.keyStore.getAbsolutePath}") sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, - s"file://${trustStoreFile.getAbsolutePath}") + s"file://${keyStoreAndTrustStore.trustStore.getAbsolutePath}") sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) new Client( @@ -212,10 +213,12 @@ private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) test("Enable SSL on the driver submit server using PEM files") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val (keyPem, certPem) = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) - sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${certPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${certPem.getAbsolutePath}") + val keyAndCertPem = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyAndCertPem.keyPem.getAbsolutePath}") + sparkConf.set( + DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${keyAndCertPem.certPem.getAbsolutePath}") + sparkConf.set( + DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${keyAndCertPem.certPem.getAbsolutePath}") sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) new Client( sparkConf = sparkConf, diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala index ba9d088bfcfcc..e9900b90cb588 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala @@ -29,7 +29,7 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.submit.v2.Client +import org.apache.spark.deploy.kubernetes.submit.v2.{Client, KeyAndCertPem} import org.apache.spark.launcher.SparkLauncher @DoNotDiscover @@ -65,31 +65,34 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) test("Use submission v2.") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - launchStagingServer(SSLOptions()) + launchStagingServer(SSLOptions(), None) runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } test("Enable SSL on the submission server") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val (keyStore, trustStore) = SSLUtils.generateKeyStoreTrustStorePair( + val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( ipAddress = Minikube.getMinikubeIp, keyStorePassword = "keyStore", keyPassword = "key", trustStorePassword = "trustStore") sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) - .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", keyStore.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", trustStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", + keyStoreAndTrustStore.keyStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", + keyStoreAndTrustStore.trustStore.getAbsolutePath) .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStore") .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "key") .set("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword", "trustStore") launchStagingServer(SSLOptions( enabled = true, - keyStore = Some(keyStore), - trustStore = Some(trustStore), + keyStore = Some(keyStoreAndTrustStore.keyStore), + trustStore = Some(keyStoreAndTrustStore.trustStore), keyStorePassword = Some("keyStore"), keyPassword = Some("key"), - trustStorePassword = Some("trustStore"))) + trustStorePassword = Some("trustStore")), + None) runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) } @@ -104,7 +107,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) test("Dynamic executor scaling basic test") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) - launchStagingServer(SSLOptions()) + launchStagingServer(SSLOptions(), None) createShuffleServiceDaemonSet() sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) @@ -117,6 +120,7 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) } test("Use remote resources without the resource staging server.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() sparkConf.setJars(Seq( s"$assetServerUri/${KubernetesSuite.EXAMPLES_JAR_FILE.getName}", @@ -126,7 +130,8 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) } test("Mix remote resources with submitted ones.") { - launchStagingServer(SSLOptions()) + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + launchStagingServer(SSLOptions(), None) val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() sparkConf.setJars(Seq( KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, @@ -135,7 +140,20 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } + test("Use key and certificate PEM files for TLS.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val keyAndCertificate = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + launchStagingServer( + SSLOptions(enabled = true), + Some(keyAndCertificate)) + sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set( + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key, keyAndCertificate.certPem.getAbsolutePath) + runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + test("Use client key and client cert file when requesting executors") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) sparkConf.setJars(Seq( KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) @@ -148,11 +166,12 @@ private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } - private def launchStagingServer(resourceStagingServerSslOptions: SSLOptions): Unit = { + private def launchStagingServer( + resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( - resourceStagingServerSslOptions) + resourceStagingServerSslOptions, keyAndCertPem) val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { "https" } else { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala index 3a99f907d15fd..1ba54c131c196 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala @@ -16,21 +16,17 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest -import java.io.StringWriter +import java.io.{File, StringWriter} import java.util.Properties -import java.util.concurrent.TimeUnit import com.google.common.io.{BaseEncoding, Files} -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Endpoints, HasMetadata, HTTPGetActionBuilder, KeyToPathBuilder, Pod, PodBuilder, SecretBuilder, ServiceBuilder} -import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import io.fabric8.kubernetes.client.internal.readiness.Readiness +import io.fabric8.kubernetes.api.model.{ConfigMapBuilder, Endpoints, HTTPGetActionBuilder, KeyToPathBuilder, Pod, PodBuilder, SecretBuilder, ServiceBuilder} +import io.fabric8.kubernetes.client.KubernetesClient import scala.collection.JavaConverters._ import org.apache.spark.SSLOptions import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.v2.ContainerNameEqualityPredicate +import org.apache.spark.deploy.kubernetes.submit.v2.{ContainerNameEqualityPredicate, KeyAndCertPem} import org.apache.spark.util.Utils /** @@ -38,23 +34,39 @@ import org.apache.spark.util.Utils */ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesClient) { - private val KEYSTORE_DIR = "/mnt/secrets/spark-staging" - private val KEYSTORE_FILE = s"$KEYSTORE_DIR/keyStore" + private val SECRETS_ROOT_DIR = "/mnt/secrets/spark-staging" + private val KEYSTORE_SECRET_KEY = "keyStore" + private val KEYSTORE_FILE = s"$SECRETS_ROOT_DIR/$KEYSTORE_SECRET_KEY" + private val KEY_PEM_SECRET_KEY = "keyPem" + private val CERT_PEM_SECRET_KEY = "certPem" + private val KEY_PEM_FILE = s"$SECRETS_ROOT_DIR/$KEY_PEM_SECRET_KEY" + private val CERT_PEM_FILE = s"$SECRETS_ROOT_DIR/$CERT_PEM_SECRET_KEY" + private val SSL_SECRET_NAME = "resource-staging-server-ssl-secrets" private val PROPERTIES_FILE_NAME = "staging-server.properties" private val PROPERTIES_DIR = "/var/data/spark-staging-server" private val PROPERTIES_FILE_PATH = s"$PROPERTIES_DIR/$PROPERTIES_FILE_NAME" // Returns the NodePort the staging server is listening on - def launchStagingServer(sslOptions: SSLOptions): Int = { + def launchStagingServer( + sslOptions: SSLOptions, + keyAndCertPem: Option[KeyAndCertPem] = None): Int = { val stagingServerProperties = new Properties() val stagingServerSecret = sslOptions.keyStore.map { keyStore => val keyStoreBytes = Files.toByteArray(keyStore) val keyStoreBase64 = BaseEncoding.base64().encode(keyStoreBytes) + Map(KEYSTORE_SECRET_KEY -> keyStoreBase64) + }.orElse { + keyAndCertPem.map { keyAndCert => + val keyPemBytes = Files.toByteArray(keyAndCert.keyPem) + val keyPemBase64 = BaseEncoding.base64().encode(keyPemBytes) + val certPemBytes = Files.toByteArray(keyAndCert.certPem) + val certPemBase64 = BaseEncoding.base64().encode(certPemBytes) + Map(KEY_PEM_SECRET_KEY -> keyPemBase64, CERT_PEM_SECRET_KEY -> certPemBase64) + } + }.map { secretData => new SecretBuilder() - .withNewMetadata() - .withName("resource-staging-server-keystore") - .endMetadata() - .addToData("keyStore", keyStoreBase64) + .withNewMetadata().withName(SSL_SECRET_NAME).endMetadata() + .withData(secretData.asJava) .build() } stagingServerProperties.setProperty( @@ -67,10 +79,18 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC stagingServerProperties.setProperty( "spark.ssl.kubernetes.resourceStagingServer.keyPassword", password) } - stagingServerSecret.foreach { _ => + sslOptions.keyStore.foreach { _ => stagingServerProperties.setProperty( "spark.ssl.kubernetes.resourceStagingServer.keyStore", KEYSTORE_FILE) } + keyAndCertPem.foreach { _ => + stagingServerProperties.setProperty( + RESOURCE_STAGING_SERVER_KEY_PEM.key, KEY_PEM_FILE) + } + keyAndCertPem.foreach { _ => + stagingServerProperties.setProperty( + RESOURCE_STAGING_SERVER_CERT_PEM.key, CERT_PEM_FILE) + } val propertiesWriter = new StringWriter() stagingServerProperties.store(propertiesWriter, "Resource staging server properties.") val stagingServerConfigMap = new ConfigMapBuilder() @@ -126,7 +146,7 @@ private[spark] class ResourceStagingServerLauncher(kubernetesClient: KubernetesC .editMatchingContainer(new ContainerNameEqualityPredicate("staging-server-container")) .addNewVolumeMount() .withName("keystore-volume") - .withMountPath(KEYSTORE_DIR) + .withMountPath(SECRETS_ROOT_DIR) .endVolumeMount() .endContainer() .endSpec() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala index 6e0049b813719..461264877edc2 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/backend/minikube/MinikubeTestBackend.scala @@ -42,6 +42,4 @@ private[spark] class MinikubeTestBackend extends IntegrationTestBackend { } override def name(): String = MINIKUBE_TEST_BACKEND - - } From 910865fce996f0bfdda765c7a679494b6b63f341 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 19 May 2017 15:47:11 -0700 Subject: [PATCH 110/225] Differentiate between URI and SSL settings for in-cluster vs. submission (#281) --- .../kubernetes/OptionRequirements.scala | 40 ++++++ .../spark/deploy/kubernetes/config.scala | 69 ++++++++-- .../spark/deploy/kubernetes/constants.scala | 1 + ...riverInitContainerComponentsProvider.scala | 64 ++++++++- ...dDependencyInitContainerConfigPlugin.scala | 55 ++++++-- .../v2/SubmittedDependencySecretBuilder.scala | 44 +++++-- ...ourceStagingServerSslOptionsProvider.scala | 121 ++++++++---------- .../KubernetesClusterSchedulerBackend.scala | 3 +- ...ndencyInitContainerConfigPluginSuite.scala | 34 +++-- ...ubmittedDependencySecretBuilderSuite.scala | 40 +++++- 10 files changed, 341 insertions(+), 130 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/OptionRequirements.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/OptionRequirements.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/OptionRequirements.scala new file mode 100644 index 0000000000000..eda43de0a9a5b --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/OptionRequirements.scala @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +private[spark] object OptionRequirements { + + def requireBothOrNeitherDefined( + opt1: Option[_], + opt2: Option[_], + errMessageWhenFirstIsMissing: String, + errMessageWhenSecondIsMissing: String): Unit = { + requireSecondIfFirstIsDefined(opt1, opt2, errMessageWhenSecondIsMissing) + requireSecondIfFirstIsDefined(opt2, opt1, errMessageWhenFirstIsMissing) + } + + def requireSecondIfFirstIsDefined( + opt1: Option[_], opt2: Option[_], errMessageWhenSecondIsMissing: String): Unit = { + opt1.foreach { _ => + require(opt2.isDefined, errMessageWhenSecondIsMissing) + } + } + + def requireNandDefined(opt1: Option[_], opt2: Option[_], errMessage: String): Unit = { + opt1.foreach { _ => require(opt2.isEmpty, errMessage) } + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index ab442131ad271..759a7df505829 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -362,6 +362,8 @@ package object config extends Logging { .createOptional private[spark] val RESOURCE_STAGING_SERVER_SSL_NAMESPACE = "kubernetes.resourceStagingServer" + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE = + "kubernetes.resourceStagingServer.internal" private[spark] val RESOURCE_STAGING_SERVER_CERT_PEM = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.serverCertPem") .doc("Certificate PEM file to use when having the resource staging server" + @@ -370,35 +372,70 @@ package object config extends Logging { .createOptional private[spark] val RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.clientCertPem") - .doc("Certificate PEM file to use when the client contacts the resource staging server.") + .doc("Certificate PEM file to use when the client contacts the resource staging server." + + " This must strictly be a path to a file on the submitting machine's disk.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_CLIENT_CERT_PEM = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.clientCertPem") + .doc("Certificate PEM file to use when the init-container contacts the resource staging" + + " server. If this is not provided, it defaults to the value of" + + " spark.ssl.kubernetes.resourceStagingServer.clientCertPem. This can be a URI with" + + " a scheme of local:// which denotes that the file is pre-mounted on the init-container's" + + " disk. A uri without a scheme or a scheme of file:// will result in this file being" + + " mounted from the submitting machine's disk as a secret into the pods.") .stringConf .createOptional - private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_PASSWORD_FILE = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyStorePasswordFile") - .doc("File containing the keystore password for the Kubernetes dependency server.") + .doc("File containing the keystore password for the Kubernetes resource staging server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_KEYSTORE_KEY_PASSWORD_FILE = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.keyPasswordFile") - .doc("File containing the key password for the Kubernetes dependency server.") + .doc("File containing the key password for the Kubernetes resource staging server.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_SSL_ENABLED = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.enabled") - .doc("Whether or not to use SSL when communicating with the dependency server.") + .doc("Whether or not to use SSL when communicating with the resource staging server.") + .booleanConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_SSL_ENABLED = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.enabled") + .doc("Whether or not to use SSL when communicating with the resource staging server from" + + " the init-container. If this is not provided, defaults to" + + " the value of spark.ssl.kubernetes.resourceStagingServer.enabled") .booleanConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStore") - .doc("File containing the trustStore to communicate with the Kubernetes dependency server.") + .doc("File containing the trustStore to communicate with the Kubernetes dependency server." + + " This must strictly be a path on the submitting machine's disk.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_FILE = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.trustStore") + .doc("File containing the trustStore to communicate with the Kubernetes dependency server" + + " from the init-container. If this is not provided, defaults to the value of" + + " spark.ssl.kubernetes.resourceStagingServer.trustStore. This can be a URI with a scheme" + + " of local:// indicating that the trustStore is pre-mounted on the init-container's" + + " disk. If no scheme, or a scheme of file:// is provided, this file is mounted from the" + + " submitting machine's disk as a Kubernetes secret into the pods.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD = ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_SSL_NAMESPACE.trustStorePassword") - .doc("Password for the trustStore for talking to the dependency server.") + .doc("Password for the trustStore for communicating to the dependency server.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_PASSWORD = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.trustStorePassword") + .doc("Password for the trustStore for communicating to the dependency server from the" + + " init-container. If this is not provided, defaults to" + + " spark.ssl.kubernetes.resourceStagingServer.trustStorePassword.") .stringConf .createOptional private[spark] val RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE = @@ -406,11 +443,27 @@ package object config extends Logging { .doc("Type of trustStore for communicating with the dependency server.") .stringConf .createOptional + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_TYPE = + ConfigBuilder(s"spark.ssl.$RESOURCE_STAGING_SERVER_INTERNAL_SSL_NAMESPACE.trustStoreType") + .doc("Type of trustStore for communicating with the dependency server from the" + + " init-container. If this is not provided, defaults to" + + " spark.ssl.kubernetes.resourceStagingServer.trustStoreType") + .stringConf + .createOptional // Driver and Init-Container parameters for submission v2 private[spark] val RESOURCE_STAGING_SERVER_URI = ConfigBuilder("spark.kubernetes.resourceStagingServer.uri") - .doc("Base URI for the Spark resource staging server") + .doc("Base URI for the Spark resource staging server.") + .stringConf + .createOptional + + private[spark] val RESOURCE_STAGING_SERVER_INTERNAL_URI = + ConfigBuilder("spark.kubernetes.resourceStagingServer.internal.uri") + .doc("Base URI for the Spark resource staging server when the init-containers access it for" + + " downloading resources. If this is not provided, it defaults to the value provided in" + + " spark.kubernetes.resourceStagingServer.uri, the URI that the submission client uses to" + + " upload the resources from outside the cluster.") .stringConf .createOptional diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 8d0965078aaa8..ea11ca2ec8f21 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -115,6 +115,7 @@ package object constants { private[spark] val INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY = "downloadSubmittedFilesSecret" private[spark] val INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY = "trustStore" + private[spark] val INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY = "ssl-certificate" private[spark] val INIT_CONTAINER_CONFIG_MAP_KEY = "download-submitted-files" private[spark] val INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME = "download-jars-volume" private[spark] val INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME = "download-files" diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala index 7f6ae2ec47675..0a5e6cd216011 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala @@ -17,10 +17,11 @@ package org.apache.spark.deploy.kubernetes.submit.v2 import org.apache.spark.{SparkConf, SSLOptions} -import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl +import org.apache.spark.util.Utils /** * Interface that wraps the provision of everything the submission client needs to set up the @@ -47,10 +48,51 @@ private[spark] class DriverInitContainerComponentsProviderImpl( kubernetesAppId: String, sparkJars: Seq[String], sparkFiles: Seq[String], - resourceStagingServerSslOptions: SSLOptions) + resourceStagingServerExternalSslOptions: SSLOptions) extends DriverInitContainerComponentsProvider { private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) + private val maybeResourceStagingServerInternalUri = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_URI) + private val maybeResourceStagingServerInternalTrustStore = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_FILE) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE)) + private val maybeResourceStagingServerInternalTrustStorePassword = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_PASSWORD) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD)) + private val maybeResourceStagingServerInternalTrustStoreType = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_TYPE) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE)) + private val maybeResourceStagingServerInternalClientCert = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_CLIENT_CERT_PEM) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM)) + private val resourceStagingServerInternalSslEnabled = + sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_SSL_ENABLED) + .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_SSL_ENABLED)) + .getOrElse(false) + + OptionRequirements.requireNandDefined( + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStore, + "Cannot provide both a certificate file and a trustStore file for init-containers to" + + " use for contacting the resource staging server over TLS.") + + require(maybeResourceStagingServerInternalTrustStore.forall { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { + case "file" | "local" => true + case _ => false + } + }, "TrustStore URI used for contacting the resource staging server from init containers must" + + " have no scheme, or scheme file://, or scheme local://.") + + require(maybeResourceStagingServerInternalClientCert.forall { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { + case "file" | "local" => true + case _ => false + } + }, "Client cert file URI used for contacting the resource staging server from init containers" + + " must have no scheme, or scheme file://, or scheme local://.") + private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) private val maybeSecretName = maybeResourceStagingServerUri.map { _ => @@ -71,14 +113,20 @@ private[spark] class DriverInitContainerComponentsProviderImpl( filesResourceId <- maybeSubmittedResourceIds.map(_.filesResourceId) } yield { new SubmittedDependencyInitContainerConfigPluginImpl( - stagingServerUri, + // Configure the init-container with the internal URI over the external URI. + maybeResourceStagingServerInternalUri.getOrElse(stagingServerUri), jarsResourceId, filesResourceId, INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, - resourceStagingServerSslOptions) + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + resourceStagingServerInternalSslEnabled, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStorePassword, + maybeResourceStagingServerInternalTrustStoreType, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) } new SparkInitContainerConfigMapBuilderImpl( sparkJars, @@ -113,7 +161,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( stagingServerUri, sparkJars, sparkFiles, - resourceStagingServerSslOptions, + resourceStagingServerExternalSslOptions, RetrofitClientFactoryImpl) } } @@ -133,7 +181,9 @@ private[spark] class DriverInitContainerComponentsProviderImpl( INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - resourceStagingServerSslOptions) + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala index bc9abc4eaba81..1b086e60d3d0d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala @@ -16,9 +16,10 @@ */ package org.apache.spark.deploy.kubernetes.submit.v2 -import org.apache.spark.SSLOptions +import org.apache.spark.SparkException import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.internal.config.OptionalConfigEntry +import org.apache.spark.util.Utils private[spark] trait SubmittedDependencyInitContainerConfigPlugin { /** @@ -34,36 +35,62 @@ private[spark] trait SubmittedDependencyInitContainerConfigPlugin { } private[spark] class SubmittedDependencyInitContainerConfigPluginImpl( - resourceStagingServerUri: String, + internalResourceStagingServerUri: String, jarsResourceId: String, filesResourceId: String, jarsSecretKey: String, filesSecretKey: String, trustStoreSecretKey: String, - secretsVolumeMountPath: String, - resourceStagingServiceSslOptions: SSLOptions) + clientCertSecretKey: String, + resourceStagingServerSslEnabled: Boolean, + maybeInternalTrustStoreUri: Option[String], + maybeInternalClientCertUri: Option[String], + maybeInternalTrustStorePassword: Option[String], + maybeInternalTrustStoreType: Option[String], + secretsVolumeMountPath: String) extends SubmittedDependencyInitContainerConfigPlugin { override def configurationsToFetchSubmittedDependencies(): Map[String, String] = { Map[String, String]( - RESOURCE_STAGING_SERVER_URI.key -> resourceStagingServerUri, + RESOURCE_STAGING_SERVER_URI.key -> internalResourceStagingServerUri, INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> jarsResourceId, INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> s"$secretsVolumeMountPath/$jarsSecretKey", INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> filesResourceId, INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> s"$secretsVolumeMountPath/$filesSecretKey", - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> - resourceStagingServiceSslOptions.enabled.toString) ++ - resourceStagingServiceSslOptions.trustStore.map { _ => - (RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key, - s"$secretsVolumeMountPath/$trustStoreSecretKey") - }.toMap ++ - resourceStagingServiceSslOptions.trustStorePassword.map { password => + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> resourceStagingServerSslEnabled.toString) ++ + resolveSecretPath( + maybeInternalTrustStoreUri, + trustStoreSecretKey, + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, + "TrustStore URI") ++ + resolveSecretPath( + maybeInternalClientCertUri, + clientCertSecretKey, + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM, + "Client certificate URI") ++ + maybeInternalTrustStorePassword.map { password => (RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) }.toMap ++ - resourceStagingServiceSslOptions.trustStoreType.map { storeType => + maybeInternalTrustStoreType.map { storeType => (RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) }.toMap } + + private def resolveSecretPath( + maybeUri: Option[String], + secretKey: String, + configEntry: OptionalConfigEntry[String], + uriType: String): Map[String, String] = { + maybeUri.map(Utils.resolveURI).map { uri => + val resolvedPath = Option(uri.getScheme).getOrElse("file") match { + case "file" => s"$secretsVolumeMountPath/$secretKey" + case "local" => uri.getPath + case invalid => throw new SparkException(s"$uriType has invalid scheme $invalid must be" + + s" local://, file://, or empty.") + } + (configEntry.key, resolvedPath) + }.toMap + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala index b8fa43d0573f7..1a33757e45aa0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala @@ -16,12 +16,14 @@ */ package org.apache.spark.deploy.kubernetes.submit.v2 +import java.io.File + import com.google.common.base.Charsets import com.google.common.io.{BaseEncoding, Files} import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} import scala.collection.JavaConverters._ -import org.apache.spark.SSLOptions +import org.apache.spark.util.Utils private[spark] trait SubmittedDependencySecretBuilder { /** @@ -32,28 +34,30 @@ private[spark] trait SubmittedDependencySecretBuilder { } private[spark] class SubmittedDependencySecretBuilderImpl( - secretName: String, - jarsResourceSecret: String, - filesResourceSecret: String, - jarsSecretKey: String, - filesSecretKey: String, - trustStoreSecretKey: String, - resourceStagingServerSslOptions: SSLOptions) + secretName: String, + jarsResourceSecret: String, + filesResourceSecret: String, + jarsSecretKey: String, + filesSecretKey: String, + trustStoreSecretKey: String, + clientCertSecretKey: String, + internalTrustStoreUri: Option[String], + internalClientCertUri: Option[String]) extends SubmittedDependencySecretBuilder { override def build(): Secret = { - val trustStoreBase64 = resourceStagingServerSslOptions.trustStore.map { trustStoreFile => - require(trustStoreFile.isFile, "Dependency server trustStore provided at" + - trustStoreFile.getAbsolutePath + " does not exist or is not a file.") - (trustStoreSecretKey, BaseEncoding.base64().encode(Files.toByteArray(trustStoreFile))) - }.toMap + val trustStoreBase64 = convertFileToBase64IfSubmitterLocal( + trustStoreSecretKey, internalTrustStoreUri) + val clientCertBase64 = convertFileToBase64IfSubmitterLocal( + clientCertSecretKey, internalClientCertUri) val jarsSecretBase64 = BaseEncoding.base64().encode(jarsResourceSecret.getBytes(Charsets.UTF_8)) val filesSecretBase64 = BaseEncoding.base64().encode( filesResourceSecret.getBytes(Charsets.UTF_8)) val secretData = Map( jarsSecretKey -> jarsSecretBase64, filesSecretKey -> filesSecretBase64) ++ - trustStoreBase64 + trustStoreBase64 ++ + clientCertBase64 val kubernetesSecret = new SecretBuilder() .withNewMetadata() .withName(secretName) @@ -62,4 +66,16 @@ private[spark] class SubmittedDependencySecretBuilderImpl( .build() kubernetesSecret } + + private def convertFileToBase64IfSubmitterLocal(secretKey: String, secretUri: Option[String]) + : Map[String, String] = { + secretUri.filter { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") == "file" + }.map { uri => + val file = new File(Utils.resolveURI(uri).getPath) + require(file.isFile, "Dependency server trustStore provided at" + + file.getAbsolutePath + " does not exist or is not a file.") + (secretKey, BaseEncoding.base64().encode(Files.toByteArray(file))) + }.toMap + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala index 6b88426d00e72..0dd0b08433def 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala @@ -23,7 +23,8 @@ import com.google.common.base.Charsets import com.google.common.io.Files import org.apache.commons.lang3.RandomStringUtils -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} +import org.apache.spark.deploy.kubernetes.OptionRequirements import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter import org.apache.spark.internal.Logging @@ -38,7 +39,7 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar private val SECURE_RANDOM = new SecureRandom() def getSslOptions: SSLOptions = { - val baseSslOptions = new SparkSecurityManager(sparkConf) + val baseSslOptions = new SecurityManager(sparkConf) .getSSLOptions(RESOURCE_STAGING_SERVER_SSL_NAMESPACE) val maybeKeyPem = sparkConf.get(RESOURCE_STAGING_SERVER_KEY_PEM) val maybeServerCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CERT_PEM) @@ -47,39 +48,47 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar val maybeClientCertPem = sparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM) logSslConfigurations( - baseSslOptions, - maybeKeyPem, - maybeServerCertPem, - maybeKeyStorePasswordFile, - maybeKeyPasswordFile, - maybeClientCertPem) - - requireNandDefined(baseSslOptions.keyStore, maybeKeyPem, - "Shouldn't provide both key PEM and keyStore files for TLS.") - requireNandDefined(baseSslOptions.keyStore, maybeServerCertPem, - "Shouldn't provide both certificate PEM and keyStore files for TLS.") - requireNandDefined(baseSslOptions.keyStorePassword, maybeKeyStorePasswordFile, - "Shouldn't provide both the keyStore password value and the keyStore password file.") - requireNandDefined(baseSslOptions.keyPassword, maybeKeyPasswordFile, - "Shouldn't provide both the keyStore key password value and the keyStore key password file.") - requireBothOrNeitherDefined( - maybeKeyPem, - maybeServerCertPem, - "When providing a certificate PEM file, the key PEM file must also be provided.", - "When providing a key PEM file, the certificate PEM file must also be provided.") - requireNandDefined(baseSslOptions.trustStore, maybeClientCertPem, - "Shouldn't provide both the trustStore and a client certificate PEM file.") + baseSslOptions, + maybeKeyPem, + maybeServerCertPem, + maybeKeyStorePasswordFile, + maybeKeyPasswordFile, + maybeClientCertPem) + + OptionRequirements.requireNandDefined( + baseSslOptions.keyStore, + maybeKeyPem, + "Shouldn't provide both key PEM and keyStore files for TLS.") + OptionRequirements.requireNandDefined( + baseSslOptions.keyStore, + maybeServerCertPem, + "Shouldn't provide both certificate PEM and keyStore files for TLS.") + OptionRequirements.requireNandDefined( + baseSslOptions.keyStorePassword, + maybeKeyStorePasswordFile, + "Shouldn't provide both the keyStore password value and the keyStore password file.") + OptionRequirements.requireNandDefined( + baseSslOptions.keyPassword, + maybeKeyPasswordFile, + "Shouldn't provide both a keyStore key password value and a keyStore key password file.") + OptionRequirements.requireBothOrNeitherDefined( + maybeKeyPem, + maybeServerCertPem, + "When providing a certificate PEM file, the key PEM file must also be provided.", + "When providing a key PEM file, the certificate PEM file must also be provided.") + OptionRequirements.requireNandDefined(baseSslOptions.trustStore, maybeClientCertPem, + "Shouldn't provide both the trustStore and a client certificate PEM file.") val resolvedKeyStorePassword = baseSslOptions.keyStorePassword - .orElse(maybeKeyStorePasswordFile.map { keyStorePasswordFile => - safeFileToString(keyStorePasswordFile, "KeyStore password file") - }) - .orElse(maybeKeyPem.map { _ => randomPassword()}) + .orElse(maybeKeyStorePasswordFile.map { keyStorePasswordFile => + safeFileToString(keyStorePasswordFile, "KeyStore password file") + }) + .orElse(maybeKeyPem.map { _ => randomPassword()}) val resolvedKeyStoreKeyPassword = baseSslOptions.keyPassword - .orElse(maybeKeyPasswordFile.map { keyPasswordFile => - safeFileToString(keyPasswordFile, "KeyStore key password file") - }) - .orElse(maybeKeyPem.map { _ => randomPassword()}) + .orElse(maybeKeyPasswordFile.map { keyPasswordFile => + safeFileToString(keyPasswordFile, "KeyStore key password file") + }) + .orElse(maybeKeyPem.map { _ => randomPassword()}) val resolvedKeyStore = baseSslOptions.keyStore.orElse { for { keyPem <- maybeKeyPem @@ -90,16 +99,16 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar val keyPemFile = new File(keyPem) val certPemFile = new File(certPem) PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( - keyPemFile, - certPemFile, - "key", - keyStorePassword, - keyPassword, - baseSslOptions.keyStoreType) + keyPemFile, + certPemFile, + "key", + keyStorePassword, + keyPassword, + baseSslOptions.keyStoreType) } } val resolvedTrustStorePassword = baseSslOptions.trustStorePassword - .orElse(maybeClientCertPem.map( _ => "defaultTrustStorePassword")) + .orElse(maybeClientCertPem.map( _ => "defaultTrustStorePassword")) val resolvedTrustStore = baseSslOptions.trustStore.orElse { for { clientCertPem <- maybeClientCertPem @@ -107,16 +116,16 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar } yield { val certPemFile = new File(clientCertPem) PemsToKeyStoreConverter.convertCertPemToTempTrustStoreFile( - certPemFile, - trustStorePassword, - baseSslOptions.trustStoreType) + certPemFile, + trustStorePassword, + baseSslOptions.trustStoreType) } } baseSslOptions.copy( - keyStore = resolvedKeyStore, - keyStorePassword = resolvedKeyStorePassword, - keyPassword = resolvedKeyStoreKeyPassword, - trustStore = resolvedTrustStore) + keyStore = resolvedKeyStore, + keyStorePassword = resolvedKeyStorePassword, + keyPassword = resolvedKeyStoreKeyPassword, + trustStore = resolvedTrustStore) } private def logSslConfigurations( @@ -140,26 +149,6 @@ private[spark] class ResourceStagingServerSslOptionsProviderImpl(sparkConf: Spar logDebug(s"Client-side certificate PEM: ${maybeClientCertPem.getOrElse("N/A")}") } - private def requireBothOrNeitherDefined( - opt1: Option[_], - opt2: Option[_], - errMessageWhenFirstIsMissing: String, - errMessageWhenSecondIsMissing: String): Unit = { - requireSecondIfFirstIsDefined(opt1, opt2, errMessageWhenSecondIsMissing) - requireSecondIfFirstIsDefined(opt2, opt1, errMessageWhenFirstIsMissing) - } - - private def requireSecondIfFirstIsDefined( - opt1: Option[_], opt2: Option[_], errMessageWhenSecondIsMissing: String): Unit = { - opt1.foreach { _ => - require(opt2.isDefined, errMessageWhenSecondIsMissing) - } - } - - private def requireNandDefined(opt1: Option[_], opt2: Option[_], errMessage: String): Unit = { - opt1.foreach { _ => require(opt2.isEmpty, errMessage) } - } - private def safeFileToString(filePath: String, fileType: String): String = { val file = new File(filePath) if (!file.isFile) { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 0dd875b307a6d..5627f7c20de3d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -322,7 +322,8 @@ private[spark] class KubernetesClusterSchedulerBackend( .addToRequests("cpu", executorCpuQuantity) .addToLimits("cpu", executorCpuQuantity) .endResources() - .withEnv(requiredEnv.asJava) + .addAllToEnv(requiredEnv.asJava) + .addToEnv(executorExtraClasspathEnv.toSeq: _*) .withPorts(requiredPorts.asJava) .endContainer() .endSpec() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala index 11a671085c201..09b41dc1bcaaf 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala @@ -23,20 +23,18 @@ import org.apache.spark.deploy.kubernetes.config._ class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { private val STAGING_SERVER_URI = "http://localhost:9000" + private val STAGING_SERVER_INTERNAL_URI = "http://internalHost:9000" private val JARS_RESOURCE_ID = "jars-id" private val FILES_RESOURCE_ID = "files-id" private val JARS_SECRET_KEY = "jars" private val FILES_SECRET_KEY = "files" private val TRUSTSTORE_SECRET_KEY = "trustStore" - private val SECRETS_VOLUME_MOUNT_PATH = "/var/data/" + private val CLIENT_CERT_SECRET_KEY = "client-cert" + private val SECRETS_VOLUME_MOUNT_PATH = "/var/data" private val TRUSTSTORE_PASSWORD = "trustStore" private val TRUSTSTORE_FILE = "/mnt/secrets/trustStore.jks" + private val CLIENT_CERT_URI = "local:///mnt/secrets/client-cert.pem" private val TRUSTSTORE_TYPE = "jks" - private val RESOURCE_STAGING_SERVICE_SSL_OPTIONS = SSLOptions( - enabled = true, - trustStore = Some(new File(TRUSTSTORE_FILE)), - trustStorePassword = Some(TRUSTSTORE_PASSWORD), - trustStoreType = Some(TRUSTSTORE_TYPE)) test("Plugin should provide configuration for fetching uploaded dependencies") { val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( @@ -46,8 +44,13 @@ class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { JARS_SECRET_KEY, FILES_SECRET_KEY, TRUSTSTORE_SECRET_KEY, - SECRETS_VOLUME_MOUNT_PATH, - SSLOptions()) + CLIENT_CERT_SECRET_KEY, + false, + None, + None, + None, + None, + SECRETS_VOLUME_MOUNT_PATH) val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() val expectedConfigurations = Map( RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, @@ -65,19 +68,24 @@ class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( STAGING_SERVER_URI, JARS_RESOURCE_ID, - FILES_RESOURCE_ID, - JARS_SECRET_KEY, + FILES_RESOURCE_ID, JARS_SECRET_KEY, FILES_SECRET_KEY, TRUSTSTORE_SECRET_KEY, - SECRETS_VOLUME_MOUNT_PATH, - RESOURCE_STAGING_SERVICE_SSL_OPTIONS) + CLIENT_CERT_SECRET_KEY, + true, + Some(TRUSTSTORE_FILE), + Some(CLIENT_CERT_URI), + Some(TRUSTSTORE_PASSWORD), + Some(TRUSTSTORE_TYPE), + SECRETS_VOLUME_MOUNT_PATH) val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() val expectedSslConfigurations = Map( RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> s"$SECRETS_VOLUME_MOUNT_PATH/$TRUSTSTORE_SECRET_KEY", RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, - RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE) + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE, + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key -> "/mnt/secrets/client-cert.pem") assert(expectedSslConfigurations.toSet.subsetOf(addedConfigurations.toSet)) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala index 189d87e27a28a..358edbecf8708 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala @@ -35,7 +35,9 @@ class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { private val JARS_SECRET_KEY = "jars-secret-key" private val FILES_SECRET_KEY = "files-secret-key" private val TRUSTSTORE_SECRET_KEY = "truststore-secret-key" + private val CLIENT_CERT_SECRET_KEY = "client-cert" private val TRUSTSTORE_STRING_CONTENTS = "trustStore-contents" + private val CLIENT_CERT_STRING_CONTENTS = "client-certificate-contents" test("Building the secret without a trustStore") { val builder = new SubmittedDependencySecretBuilderImpl( @@ -45,7 +47,9 @@ class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { JARS_SECRET_KEY, FILES_SECRET_KEY, TRUSTSTORE_SECRET_KEY, - SSLOptions()) + CLIENT_CERT_SECRET_KEY, + None, + None) val secret = builder.build() assert(secret.getMetadata.getName === SECRET_NAME) val secretDecodedData = decodeSecretData(secret) @@ -60,10 +64,12 @@ class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { } test("Building the secret with a trustStore") { - val tempTrustStoreDir = Utils.createTempDir(namePrefix = "temp-truststores") + val tempSslDir = Utils.createTempDir(namePrefix = "temp-ssl-tests") try { - val trustStoreFile = new File(tempTrustStoreDir, "trustStore.jks") + val trustStoreFile = new File(tempSslDir, "trustStore.jks") Files.write(TRUSTSTORE_STRING_CONTENTS, trustStoreFile, Charsets.UTF_8) + val clientCertFile = new File(tempSslDir, "cert.pem") + Files.write(CLIENT_CERT_STRING_CONTENTS, clientCertFile, Charsets.UTF_8) val builder = new SubmittedDependencySecretBuilderImpl( SECRET_NAME, JARS_SECRET, @@ -71,13 +77,33 @@ class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { JARS_SECRET_KEY, FILES_SECRET_KEY, TRUSTSTORE_SECRET_KEY, - SSLOptions(trustStore = Some(trustStoreFile))) + CLIENT_CERT_SECRET_KEY, + Some(trustStoreFile.getAbsolutePath), + Some(clientCertFile.getAbsolutePath)) val secret = builder.build() - val secretDecodedData = decodeSecretData(secret) - assert(secretDecodedData(TRUSTSTORE_SECRET_KEY) === TRUSTSTORE_STRING_CONTENTS) + val decodedSecretData = decodeSecretData(secret) + assert(decodedSecretData(TRUSTSTORE_SECRET_KEY) === TRUSTSTORE_STRING_CONTENTS) + assert(decodedSecretData(CLIENT_CERT_SECRET_KEY) === CLIENT_CERT_STRING_CONTENTS) } finally { - tempTrustStoreDir.delete() + tempSslDir.delete() } } + test("If trustStore and certificate are container-local, don't add secret entries") { + val builder = new SubmittedDependencySecretBuilderImpl( + SECRET_NAME, + JARS_SECRET, + FILES_SECRET, + JARS_SECRET_KEY, + FILES_SECRET_KEY, + TRUSTSTORE_SECRET_KEY, + CLIENT_CERT_SECRET_KEY, + Some("local:///mnt/secrets/trustStore.jks"), + Some("local:///mnt/secrets/cert.pem")) + val secret = builder.build() + val decodedSecretData = decodeSecretData(secret) + assert(!decodedSecretData.contains(TRUSTSTORE_SECRET_KEY)) + assert(!decodedSecretData.contains(CLIENT_CERT_SECRET_KEY)) + } + } From 2e5f2cdfa356347c734a19de6a207b9ad207ecb3 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 22 May 2017 14:31:42 -0700 Subject: [PATCH 111/225] Monitor pod status in submission v2. (#283) * Monitor pod status in submission v2. * Address comments --- .../{v1 => }/LoggingPodStatusWatcher.scala | 90 ++++++++++++++----- .../deploy/kubernetes/submit/v1/Client.scala | 14 +-- .../deploy/kubernetes/submit/v2/Client.scala | 65 +++++++++----- .../kubernetes/submit/v2/ClientV2Suite.scala | 42 +++++++-- 4 files changed, 154 insertions(+), 57 deletions(-) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v1 => }/LoggingPodStatusWatcher.scala (54%) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala similarity index 54% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala index 537bcccaa1458..1633a084e463c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala @@ -14,32 +14,36 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v1 +package org.apache.spark.deploy.kubernetes.submit -import java.util.concurrent.{CountDownLatch, Executors, TimeUnit} +import java.util.concurrent.{CountDownLatch, TimeUnit} -import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.api.model.{ContainerStateRunning, ContainerStateTerminated, ContainerStateWaiting, ContainerStatus, Pod} import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import scala.collection.JavaConverters._ +import org.apache.spark.SparkException import org.apache.spark.internal.Logging import org.apache.spark.util.ThreadUtils +private[kubernetes] trait LoggingPodStatusWatcher extends Watcher[Pod] { + def awaitCompletion(): Unit +} + /** * A monitor for the running Kubernetes pod of a Spark application. Status logging occurs on * every state change and also at an interval for liveness. * - * @param podCompletedFuture a CountDownLatch that is set to true when the watched pod finishes * @param appId - * @param interval ms between each state request. If set to 0 or a negative number, the periodic - * logging will be disabled. + * @param maybeLoggingInterval ms between each state request. If provided, must be a positive + * number. */ -private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownLatch, - appId: String, - interval: Long) - extends Watcher[Pod] with Logging { +private[kubernetes] class LoggingPodStatusWatcherImpl( + appId: String, maybeLoggingInterval: Option[Long]) + extends LoggingPodStatusWatcher with Logging { + private val podCompletedFuture = new CountDownLatch(1) // start timer for periodic logging private val scheduler = ThreadUtils.newDaemonSingleThreadScheduledExecutor("logging-pod-status-watcher") @@ -47,13 +51,13 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL override def run() = logShortStatus() } - private var pod: Option[Pod] = Option.empty - private def phase: String = pod.map(_.getStatus().getPhase()).getOrElse("unknown") - private def status: String = pod.map(_.getStatus().getContainerStatuses().toString()) - .getOrElse("unknown") + private var pod = Option.empty[Pod] + + private def phase: String = pod.map(_.getStatus.getPhase).getOrElse("unknown") def start(): Unit = { - if (interval > 0) { + maybeLoggingInterval.foreach { interval => + require(interval > 0, s"Logging interval must be a positive time value, got: $interval ms.") scheduler.scheduleAtFixedRate(logRunnable, 0, interval, TimeUnit.MILLISECONDS) } } @@ -98,7 +102,7 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL } private def formatPodState(pod: Pod): String = { - + // TODO include specific container state val details = Seq[(String, String)]( // pod metadata ("pod name", pod.getMetadata.getName()), @@ -116,17 +120,59 @@ private[kubernetes] class LoggingPodStatusWatcher(podCompletedFuture: CountDownL ("start time", pod.getStatus.getStartTime), ("container images", pod.getStatus.getContainerStatuses() - .asScala - .map(_.getImage) - .mkString(", ")), + .asScala + .map(_.getImage) + .mkString(", ")), ("phase", pod.getStatus.getPhase()), ("status", pod.getStatus.getContainerStatuses().toString) ) + formatPairsBundle(details) + } + private def formatPairsBundle(pairs: Seq[(String, String)]) = { // Use more loggable format if value is null or empty - details.map { case (k, v) => - val newValue = Option(v).filter(_.nonEmpty).getOrElse("N/A") - s"\n\t $k: $newValue" + pairs.map { + case (k, v) => s"\n\t $k: ${Option(v).filter(_.nonEmpty).getOrElse("N/A")}" }.mkString("") } + + override def awaitCompletion(): Unit = { + podCompletedFuture.countDown() + logInfo(pod.map { p => + s"Container final statuses:\n\n${containersDescription(p)}" + }.getOrElse("No containers were found in the driver pod.")) + } + + private def containersDescription(p: Pod): String = { + p.getStatus.getContainerStatuses.asScala.map { status => + Seq( + ("Container name", status.getName), + ("Container image", status.getImage)) ++ + containerStatusDescription(status) + }.map(formatPairsBundle).mkString("\n\n") + } + + private def containerStatusDescription( + containerStatus: ContainerStatus): Seq[(String, String)] = { + val state = containerStatus.getState + Option(state.getRunning) + .orElse(Option(state.getTerminated)) + .orElse(Option(state.getWaiting)) + .map { + case running: ContainerStateRunning => + Seq( + ("Container state", "Running"), + ("Container started at", running.getStartedAt)) + case waiting: ContainerStateWaiting => + Seq( + ("Container state", "Waiting"), + ("Pending reason", waiting.getReason)) + case terminated: ContainerStateTerminated => + Seq( + ("Container state", "Terminated"), + ("Exit code", terminated.getExitCode.toString)) + case unknown => + throw new SparkException(s"Unexpected container status type ${unknown.getClass}.") + }.getOrElse(Seq(("Container state", "N/A"))) + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala index fa3c97c6957b5..32fc434cb693a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala @@ -33,7 +33,7 @@ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.{DriverPodKubernetesCredentialsProvider, KubernetesFileUtils} +import org.apache.spark.deploy.kubernetes.submit.{DriverPodKubernetesCredentialsProvider, KubernetesFileUtils, LoggingPodStatusWatcherImpl} import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} import org.apache.spark.internal.Logging import org.apache.spark.util.{ShutdownHookManager, Utils} @@ -83,7 +83,9 @@ private[spark] class Client( MEMORY_OVERHEAD_MIN)) private val driverContainerMemoryWithOverhead = driverContainerMemoryMb + memoryOverheadMb - private val waitForAppCompletion: Boolean = sparkConf.get(WAIT_FOR_APP_COMPLETION) + private val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) + private val loggingInterval = Some(sparkConf.get(REPORT_INTERVAL)) + .filter( _ => waitForAppCompletion) private val secretBase64String = { val secretBytes = new Array[Byte](128) @@ -147,10 +149,8 @@ private[spark] class Client( driverServiceManager.start(kubernetesClient, kubernetesAppId, sparkConf) // start outer watch for status logging of driver pod // only enable interval logging if in waitForAppCompletion mode - val loggingInterval = if (waitForAppCompletion) sparkConf.get(REPORT_INTERVAL) else 0 - val driverPodCompletedLatch = new CountDownLatch(1) - val loggingWatch = new LoggingPodStatusWatcher(driverPodCompletedLatch, kubernetesAppId, - loggingInterval) + val loggingWatch = new LoggingPodStatusWatcherImpl( + kubernetesAppId, loggingInterval) Utils.tryWithResource(kubernetesClient .pods() .withName(kubernetesDriverPodName) @@ -230,7 +230,7 @@ private[spark] class Client( // wait if configured to do so if (waitForAppCompletion) { logInfo(s"Waiting for application $kubernetesAppId to finish...") - driverPodCompletedLatch.await() + loggingWatch.awaitCompletion() logInfo(s"Application $kubernetesAppId finished.") } else { logInfo(s"Application $kubernetesAppId successfully launched.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala index 23e3e09834372..e4ca5c1458abe 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala @@ -25,6 +25,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.{LoggingPodStatusWatcher, LoggingPodStatusWatcherImpl} import org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServerSslOptionsProviderImpl import org.apache.spark.internal.Logging import org.apache.spark.launcher.SparkLauncher @@ -48,9 +49,11 @@ private[spark] class Client( appArgs: Array[String], sparkJars: Seq[String], sparkFiles: Seq[String], + waitForAppCompletion: Boolean, kubernetesClientProvider: SubmissionKubernetesClientProvider, initContainerComponentsProvider: DriverInitContainerComponentsProvider, - kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider) + kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, + loggingPodStatusWatcher: LoggingPodStatusWatcher) extends Logging { private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) @@ -186,27 +189,40 @@ private[spark] class Client( .endContainer() .endSpec() .build() - val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) - try { - val driverOwnedResources = Seq(initContainerConfigMap) ++ - maybeSubmittedDependenciesSecret.toSeq ++ - credentialsSecret.toSeq - val driverPodOwnerReference = new OwnerReferenceBuilder() - .withName(createdDriverPod.getMetadata.getName) - .withApiVersion(createdDriverPod.getApiVersion) - .withUid(createdDriverPod.getMetadata.getUid) - .withKind(createdDriverPod.getKind) - .withController(true) - .build() - driverOwnedResources.foreach { resource => - val originalMetadata = resource.getMetadata - originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + Utils.tryWithResource( + kubernetesClient + .pods() + .withName(resolvedDriverPod.getMetadata.getName) + .watch(loggingPodStatusWatcher)) { _ => + val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) + try { + val driverOwnedResources = Seq(initContainerConfigMap) ++ + maybeSubmittedDependenciesSecret.toSeq ++ + credentialsSecret.toSeq + val driverPodOwnerReference = new OwnerReferenceBuilder() + .withName(createdDriverPod.getMetadata.getName) + .withApiVersion(createdDriverPod.getApiVersion) + .withUid(createdDriverPod.getMetadata.getUid) + .withKind(createdDriverPod.getKind) + .withController(true) + .build() + driverOwnedResources.foreach { resource => + val originalMetadata = resource.getMetadata + originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + } + kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() + } catch { + case e: Throwable => + kubernetesClient.pods().delete(createdDriverPod) + throw e + } + if (waitForAppCompletion) { + logInfo(s"Waiting for application $kubernetesAppId to finish...") + loggingPodStatusWatcher.awaitCompletion() + logInfo(s"Application $kubernetesAppId finished.") + } else { + logInfo(s"Deployed Spark application $kubernetesAppId into Kubernetes.") } - kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() - } catch { - case e: Throwable => - kubernetesClient.pods().delete(createdDriverPod) - throw e } } } @@ -274,6 +290,9 @@ private[spark] object Client { val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) val kubernetesCredentialsMounterProvider = new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) + val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) + val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)).filter( _ => waitForAppCompletion) + val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl(kubernetesAppId, loggingInterval) new Client( appName, kubernetesAppId, @@ -282,8 +301,10 @@ private[spark] object Client { appArgs, sparkJars, sparkFiles, + waitForAppCompletion, kubernetesClientProvider, initContainerComponentsProvider, - kubernetesCredentialsMounterProvider).run() + kubernetesCredentialsMounterProvider, + loggingPodStatusWatcher).run() } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala index f0282dbb6d31a..9ad46e52747fd 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.kubernetes.submit.v2 import java.io.File import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} -import io.fabric8.kubernetes.client.KubernetesClient +import io.fabric8.kubernetes.client.{KubernetesClient, Watch} import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} import org.mockito.{AdditionalAnswers, ArgumentCaptor, Mock, MockitoAnnotations} @@ -35,6 +35,7 @@ import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.kubernetes.SparkPodInitContainerBootstrap import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.LoggingPodStatusWatcher class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") @@ -59,13 +60,13 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val SPARK_JARS = Seq( "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") private val RESOLVED_SPARK_JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", "file:///var/data/spark-jars/jar2.jar") + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///var/data/spark-jars/jar2.jar") private val RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS = Seq( - "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") + "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") private val SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") + "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") private val RESOLVED_SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") + "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") private val INIT_CONTAINER_SECRET = new SecretBuilder() .withNewMetadata() .withName(INIT_CONTAINER_SECRET_NAME) @@ -140,6 +141,12 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private var credentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider = _ @Mock private var credentialsMounter: DriverPodKubernetesCredentialsMounter = _ + @Mock + private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _ + @Mock + private var namedPodResource: PodResource[Pod, DoneablePod] = _ + @Mock + private var watch: Watch = _ before { MockitoAnnotations.initMocks(this) @@ -177,6 +184,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .build() } }) + when(podOps.withName(APP_ID)).thenReturn(namedPodResource) + when(namedPodResource.watch(loggingPodStatusWatcher)).thenReturn(watch) when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) @@ -278,6 +287,25 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { }) } + test("Waiting for completion should await completion on the status watcher.") { + expectationsForNoMountedCredentials() + expectationsForNoDependencyUploader() + new Client( + APP_NAME, + APP_ID, + MAIN_CLASS, + SPARK_CONF, + APP_ARGS, + SPARK_JARS, + SPARK_FILES, + true, + kubernetesClientProvider, + initContainerComponentsProvider, + credentialsMounterProvider, + loggingPodStatusWatcher).run() + verify(loggingPodStatusWatcher).awaitCompletion() + } + private def expectationsForNoDependencyUploader(): Unit = { when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) @@ -353,9 +381,11 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { APP_ARGS, SPARK_JARS, SPARK_FILES, + false, kubernetesClientProvider, initContainerComponentsProvider, - credentialsMounterProvider).run() + credentialsMounterProvider, + loggingPodStatusWatcher).run() val podMatcher = new BaseMatcher[Pod] { override def matches(o: scala.Any): Boolean = { o match { From cc5eb85bdc418bd431562c5f1bbe994a071d2e52 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 23 May 2017 12:57:45 -0700 Subject: [PATCH 112/225] Replace submission v1 with submission v2. (#286) * Replace submission v1 with submission v2. * Address documentation changes. * Fix documentation --- conf/kubernetes-resource-staging-server.yaml | 63 ++ .../org/apache/spark/deploy/SparkSubmit.scala | 2 +- docs/running-on-kubernetes.md | 416 ++++++---- ....kubernetes.submit.v1.DriverServiceManager | 2 - .../deploy/kubernetes/CompressionUtils.scala | 74 +- .../SparkPodInitContainerBootstrap.scala | 2 +- .../spark/deploy/kubernetes/config.scala | 84 +- .../kubernetes/submit/{v2 => }/Client.scala | 5 +- .../ContainerLocalizedFilesResolver.scala | 2 +- .../ContainerNameEqualityPredicate.scala | 2 +- ...riverInitContainerComponentsProvider.scala | 4 +- ...riverPodKubernetesCredentialsMounter.scala | 2 +- ...KubernetesCredentialsMounterProvider.scala | 3 +- .../ExecutorInitContainerConfiguration.scala | 2 +- .../submit/{v2 => }/InitContainerUtil.scala | 2 +- ...opertiesConfigMapFromScalaMapBuilder.scala | 2 +- .../SparkInitContainerConfigMapBuilder.scala | 3 +- .../SubmissionKubernetesClientProvider.scala | 2 +- ...dDependencyInitContainerConfigPlugin.scala | 2 +- .../SubmittedDependencySecretBuilder.scala | 2 +- .../SubmittedDependencyUploaderImpl.scala | 5 +- .../submit/{v2 => }/SubmittedResources.scala | 2 +- .../deploy/kubernetes/submit/v1/Client.scala | 743 ------------------ .../submit/v1/DriverServiceManager.scala | 99 --- ...DriverSubmitSslConfigurationProvider.scala | 354 --------- ...rnalSuppliedUrisDriverServiceManager.scala | 105 --- .../submit/v1/KubernetesResourceCleaner.scala | 53 -- .../v1/NodePortUrisDriverServiceManager.scala | 70 -- ...esSparkRestApi.scala => FileFetcher.scala} | 24 +- ...SparkDependencyDownloadInitContainer.scala | 50 +- .../{v1 => }/PemsToKeyStoreConverter.scala | 3 +- .../{v2 => }/ResourceStagingServer.scala | 2 +- ...ourceStagingServerSslOptionsProvider.scala | 3 +- .../{v2 => }/ResourceStagingService.scala | 4 +- .../{v2 => }/ResourceStagingServiceImpl.scala | 11 +- .../ResourceStagingServiceRetrofit.scala | 4 +- .../{v2 => }/RetrofitClientFactory.scala | 2 +- .../{v2 => }/SparkConfPropertiesParser.scala | 4 +- .../rest/kubernetes/v1/HttpClientUtil.scala | 131 --- .../v1/KubernetesRestProtocolMessages.scala | 75 -- .../v1/KubernetesSparkRestServer.scala | 483 ------------ .../v1/MultiServerFeignTarget.scala | 89 --- .../spark/deploy/kubernetes/SSLUtils.scala | 2 +- .../submit/{v2 => }/ClientV2Suite.scala | 3 +- ...ContainerLocalizedFilesResolverSuite.scala | 2 +- ...PodKubernetesCredentialsMounterSuite.scala | 4 +- ...cutorInitContainerConfigurationSuite.scala | 2 +- .../submit/{v2 => }/SSLFilePairs.scala | 2 +- ...rkInitContainerConfigMapBuilderSuite.scala | 2 +- ...ndencyInitContainerConfigPluginSuite.scala | 6 +- ...ubmittedDependencySecretBuilderSuite.scala | 4 +- .../SubmittedDependencyUploaderSuite.scala | 4 +- ...DependencyDownloadInitContainerSuite.scala | 4 +- ...StagingServerSslOptionsProviderSuite.scala | 2 +- .../{v2 => }/ResourceStagingServerSuite.scala | 2 +- .../ResourceStagingServiceImplSuite.scala | 2 +- .../src/main/docker/driver-v2/Dockerfile | 43 - .../src/main/docker/driver/Dockerfile | 18 +- .../Dockerfile | 2 +- .../docker/resource-staging-server/Dockerfile | 2 +- .../integrationtest/KubernetesSuite.scala | 248 +++++- .../KubernetesTestComponents.scala | 29 +- .../integrationtest/KubernetesV1Suite.scala | 339 -------- .../integrationtest/KubernetesV2Suite.scala | 265 ------- .../ResourceStagingServerLauncher.scala | 2 +- .../docker/SparkDockerImageBuilder.scala | 10 +- 66 files changed, 668 insertions(+), 3323 deletions(-) create mode 100644 conf/kubernetes-resource-staging-server.yaml delete mode 100644 resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/Client.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ContainerLocalizedFilesResolver.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ContainerNameEqualityPredicate.scala (95%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/DriverInitContainerComponentsProvider.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/DriverPodKubernetesCredentialsMounter.scala (99%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/DriverPodKubernetesCredentialsMounterProvider.scala (92%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ExecutorInitContainerConfiguration.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/InitContainerUtil.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/PropertiesConfigMapFromScalaMapBuilder.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SparkInitContainerConfigMapBuilder.scala (95%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmissionKubernetesClientProvider.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencyInitContainerConfigPlugin.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencySecretBuilder.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencyUploaderImpl.scala (95%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedResources.scala (96%) delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v1/KubernetesSparkRestApi.scala => FileFetcher.scala} (56%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/KubernetesSparkDependencyDownloadInitContainer.scala (95%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v1 => }/PemsToKeyStoreConverter.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServer.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServerSslOptionsProvider.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingService.scala (97%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServiceImpl.scala (91%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServiceRetrofit.scala (93%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/RetrofitClientFactory.scala (98%) rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/SparkConfPropertiesParser.scala (94%) delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ClientV2Suite.scala (99%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ContainerLocalizedFilesResolverSuite.scala (98%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/DriverPodKubernetesCredentialsMounterSuite.scala (99%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/ExecutorInitContainerConfigurationSuite.scala (97%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SSLFilePairs.scala (94%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SparkInitContainerConfigMapBuilderSuite.scala (98%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencyInitContainerConfigPluginSuite.scala (96%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencySecretBuilderSuite.scala (97%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/{v2 => }/SubmittedDependencyUploaderSuite.scala (97%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/KubernetesSparkDependencyDownloadInitContainerSuite.scala (98%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServerSslOptionsProviderSuite.scala (99%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServerSuite.scala (99%) rename resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/{v2 => }/ResourceStagingServiceImplSuite.scala (98%) delete mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile rename resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/{driver-init => init-container}/Dockerfile (95%) delete mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala delete mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala diff --git a/conf/kubernetes-resource-staging-server.yaml b/conf/kubernetes-resource-staging-server.yaml new file mode 100644 index 0000000000000..de0da3edcb901 --- /dev/null +++ b/conf/kubernetes-resource-staging-server.yaml @@ -0,0 +1,63 @@ +--- +apiVersion: extensions/v1beta1 +kind: Deployment +metadata: + name: spark-resource-staging-server +spec: + replicas: 1 + template: + metadata: + labels: + resource-staging-server-instance: default + spec: + volumes: + - name: resource-staging-server-properties + configMap: + name: spark-resource-staging-server-config + containers: + - name: spark-resource-staging-server + image: kubespark/spark-resource-staging-server:v2.1.0-kubernetes-0.1.0-alpha.3 + resources: + requests: + cpu: 100m + memory: 256Mi + limits: + cpu: 100m + memory: 256Mi + volumeMounts: + - name: resource-staging-server-properties + mountPath: '/etc/spark-resource-staging-server' + args: + - '/etc/spark-resource-staging-server/resource-staging-server.properties' +--- +apiVersion: v1 +kind: ConfigMap +metadata: + name: spark-resource-staging-server-config +data: + resource-staging-server.properties: | + spark.kubernetes.resourceStagingServer.port=10000 + spark.ssl.kubernetes.resourceStagingServer.enabled=false +# Other possible properties are listed below, primarily for setting up TLS. The paths given by KeyStore, password, and PEM files here should correspond to +# files that are securely mounted into the resource staging server container, via e.g. secret volumes. +# spark.ssl.kubernetes.resourceStagingServer.keyStore=/mnt/secrets/resource-staging-server/keyStore.jks +# spark.ssl.kubernetes.resourceStagingServer.keyStorePassword=changeit +# spark.ssl.kubernetes.resourceStagingServer.keyPassword=changeit +# spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile=/mnt/secrets/resource-staging-server/keystore-password.txt +# spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile=/mnt/secrets/resource-staging-server/keystore-key-password.txt +# spark.ssl.kubernetes.resourceStagingServer.keyPem=/mnt/secrets/resource-staging-server/key.pem +# spark.ssl.kubernetes.resourceStagingServer.serverCertPem=/mnt/secrets/resource-staging-server/cert.pem +--- +apiVersion: v1 +kind: Service +metadata: + name: spark-resource-staging-service +spec: + type: NodePort + selector: + resource-staging-server-instance: default + ports: + - protocol: TCP + port: 10000 + targetPort: 10000 + nodePort: 31000 diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 83a58bc2864ba..9c6174901ac40 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -635,7 +635,7 @@ object SparkSubmit extends CommandLineUtils { } if (isKubernetesCluster) { - childMainClass = "org.apache.spark.deploy.kubernetes.submit.v1.Client" + childMainClass = "org.apache.spark.deploy.kubernetes.submit.Client" childArgs += args.primaryResource childArgs += args.mainClass childArgs ++= args.childArgs diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5b7bb6cc612c5..98393cbbbba2d 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -3,15 +3,25 @@ layout: global title: Running Spark on Kubernetes --- -Support for running on [Kubernetes](https://kubernetes.io/docs/whatisk8s/) is available in experimental status. The feature set is -currently limited and not well-tested. This should not be used in production environments. +Support for running on [Kubernetes](https://kubernetes.io/docs/whatisk8s/) is available in experimental status. The +feature set is currently limited and not well-tested. This should not be used in production environments. ## Prerequisites -* You must have a running Kubernetes cluster with access configured to it using [kubectl](https://kubernetes.io/docs/user-guide/prereqs/). If you do not already have a working Kubernetes cluster, you may setup a test cluster on your local machine using [minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). - * We recommend that minikube be updated to the most recent version (0.18.0 at the time of this documentation), as some earlier versions may not start up the kubernetes cluster with all the necessary components. -* You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), [nodes](https://kubernetes.io/docs/admin/node/) and [services](https://kubernetes.io/docs/user-guide/services/) in your cluster. You can verify that you can list these resources by running `kubectl get nodes`, `kubectl get pods` and `kubectl get svc` which should give you a list of nodes, pods and services (if any) respectively. -* You must have a spark distribution with Kubernetes support. This may be obtained from the [release tarball](https://github.com/apache-spark-on-k8s/spark/releases) or by [building Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support). +* You must have a running Kubernetes cluster with access configured to it +using [kubectl](https://kubernetes.io/docs/user-guide/prereqs/). If you do not already have a working Kubernetes +cluster, you may setup a test cluster on your local machine using +[minikube](https://kubernetes.io/docs/getting-started-guides/minikube/). + * We recommend that minikube be updated to the most recent version (0.19.0 at the time of this documentation), as some + earlier versions may not start up the kubernetes cluster with all the necessary components. +* You must have appropriate permissions to create and list [pods](https://kubernetes.io/docs/user-guide/pods/), +[ConfigMaps](https://kubernetes.io/docs/tasks/configure-pod-container/configmap/) and +[secrets](https://kubernetes.io/docs/concepts/configuration/secret/) in your cluster. You can verify that +you can list these resources by running `kubectl get pods` `kubectl get configmap`, and `kubectl get secrets` which +should give you a list of pods and configmaps (if any) respectively. +* You must have a spark distribution with Kubernetes support. This may be obtained from the +[release tarball](https://github.com/apache-spark-on-k8s/spark/releases) or by +[building Spark with Kubernetes support](../resource-managers/kubernetes/README.md#building-spark-with-kubernetes-support). ## Driver & Executor Images @@ -19,7 +29,8 @@ Kubernetes requires users to supply images that can be deployed into containers be run in a container runtime environment that Kubernetes supports. Docker is a container runtime environment that is frequently used with Kubernetes, so Spark provides some support for working with Docker to get started quickly. -If you wish to use pre-built docker images, you may use the images published in [kubespark](https://hub.docker.com/u/kubespark/). The images are as follows: +If you wish to use pre-built docker images, you may use the images published in +[kubespark](https://hub.docker.com/u/kubespark/). The images are as follows: @@ -31,20 +42,27 @@ If you wish to use pre-built docker images, you may use the images published in + + + +
    ComponentImage
    Spark Executor Image kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2
    Spark Initialization Imagekubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2
    -You may also build these docker images from sources, or customize them as required. Spark distributions include the Docker files for the driver and the executor at -`dockerfiles/driver/Dockerfile` and `dockerfiles/executor/Dockerfile`, respectively. Use these Docker files to build the -Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images to the -registry. +You may also build these docker images from sources, or customize them as required. Spark distributions include the +Docker files for the driver, executor, and init-container at `dockerfiles/driver/Dockerfile`, +`dockerfiles/executor/Dockerfile`, and `dockerfiles/init-container/Dockerfile` respectively. Use these Docker files to +build the Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images +to the registry. For example, if the registry host is `registry-host` and the registry is listening on port 5000: cd $SPARK_HOME docker build -t registry-host:5000/spark-driver:latest -f dockerfiles/driver/Dockerfile . docker build -t registry-host:5000/spark-executor:latest -f dockerfiles/executor/Dockerfile . + docker build -t registry-host:5000/spark-init:latest -f dockerfiles/init-container/Dockerfile . docker push registry-host:5000/spark-driver:latest docker push registry-host:5000/spark-executor:latest + docker push registry-host:5000/spark-init:latest ## Submitting Applications to Kubernetes @@ -60,7 +78,8 @@ are set up as described above: --conf spark.app.name=spark-pi \ --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - examples/jars/spark_examples_2.11-2.2.0.jar + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting `spark.master` in the application's configuration, must be a URL with the format `k8s://`. Prefixing the @@ -80,13 +99,53 @@ In the above example, the specific Kubernetes cluster can be used with spark sub Note that applications can currently only be executed in cluster mode, where the driver and its executors are running on the cluster. -### Specifying input files +Finally, notice that in the above example we specify a jar with a specific URI with a scheme of `local://`. This URI is +the location of the example jar that is already in the Docker image. Using dependencies that are on your machine's local +disk is discussed below. + +## Dependency Management + +Application dependencies that are being submitted from your machine need to be sent to a **resource staging server** +that the driver and executor can then communicate with to retrieve those dependencies. A YAML file denoting a minimal +set of Kubernetes resources that runs this service is located in the file `conf/kubernetes-resource-staging-server.yaml`. +This YAML file configures a Deployment with one pod running the resource staging server configured with a ConfigMap, +and exposes the server through a Service with a fixed NodePort. Deploying a resource staging server with the included +YAML file requires you to have permissions to create Deployments, Services, and ConfigMaps. + +To run the resource staging server with default configurations, the Kubernetes resources can be created: + + kubectl create -f conf/kubernetes-resource-staging-server.yaml + +and then you can compute the value of Pi as follows: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.SparkPi \ + --master k8s://: \ + --kubernetes-namespace default \ + --conf spark.executor.instances=5 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.resourceStagingServer.uri=http://:31000 \ + examples/jars/spark_examples_2.11-2.2.0.jar + +The Docker image for the resource staging server may also be built from source, in a similar manner to the driver +and executor images. The Dockerfile is provided in `dockerfiles/resource-staging-server/Dockerfile`. + +The provided YAML file specifically sets the NodePort to 31000 on the service's specification. If port 31000 is not +available on any of the nodes of your cluster, you should remove the NodePort field from the service's specification +and allow the Kubernetes cluster to determine the NodePort itself. Be sure to provide the correct port in the resource +staging server URI when submitting your application, in accordance to the NodePort chosen by the Kubernetes cluster. + +### Dependency Management Without The Resource Staging Server -Spark supports specifying JAR paths that are either on the submitting host's disk, or are located on the disk of the -driver and executors. Refer to the [application submission](submitting-applications.html#advanced-dependency-management) -section for details. Note that files specified with the `local://` scheme should be added to the container image of both -the driver and the executors. Files without a scheme or with the scheme `file://` are treated as being on the disk of -the submitting machine, and are uploaded to the driver running in Kubernetes before launching the application. +Note that this resource staging server is only required for submitting local dependencies. If your application's +dependencies are all hosted in remote locations like HDFS or http servers, they may be referred to by their appropriate +remote URIs. Also, application dependencies can be pre-mounted into custom-built Docker images. Those dependencies +can be added to the classpath by referencing them with `local://` URIs and/or setting the `SPARK_EXTRA_CLASSPATH` +environment variable in your Dockerfiles. ### Accessing Kubernetes Clusters @@ -111,70 +170,127 @@ If our local proxy were listening on port 8001, we would have our submission loo --conf spark.app.name=spark-pi \ --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - examples/jars/spark_examples_2.11-2.2.0.jar + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. The above mechanism using `kubectl proxy` can be used when we have authentication providers that the fabric8 kubernetes-client library does not support. Authentication using X509 Client Certs and OAuth tokens is currently supported. +## Dynamic Executor Scaling + +Spark on Kubernetes supports Dynamic Allocation with cluster mode. This mode requires running +an external shuffle service. This is typically a [daemonset](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) +with a provisioned [hostpath](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) volume. +This shuffle service may be shared by executors belonging to different SparkJobs. Using Spark with dynamic allocation +on Kubernetes assumes that a cluster administrator has set up one or more shuffle-service daemonsets in the cluster. + +A sample configuration file is provided in `conf/kubernetes-shuffle-service.yaml` which can be customized as needed +for a particular cluster. It is important to note that `spec.template.metadata.labels` are setup appropriately for the shuffle +service because there may be multiple shuffle service instances running in a cluster. The labels give Spark applications +a way to target a particular shuffle service. + +For example, if the shuffle service we want to use is in the default namespace, and +has pods with labels `app=spark-shuffle-service` and `spark-version=2.1.0`, we can +use those tags to target that particular shuffle service at job launch time. In order to run a job with dynamic allocation enabled, +the command may then look like the following: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.GroupByTest \ + --master k8s://: \ + --kubernetes-namespace default \ + --conf spark.app.name=group-by-test \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:latest \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:latest \ + --conf spark.dynamicAllocation.enabled=true \ + --conf spark.shuffle.service.enabled=true \ + --conf spark.kubernetes.shuffle.namespace=default \ + --conf spark.kubernetes.shuffle.labels="app=spark-shuffle-service,spark-version=2.1.0" \ + local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar 10 400000 2 + ## Advanced - -### Setting Up TLS For Submitting the Driver - -When submitting to Kubernetes, a pod is started for the driver, and the pod starts an HTTP server. This HTTP server -receives the driver's configuration, including uploaded driver jars, from the client before starting the application. -Spark supports using TLS to encrypt the traffic in this bootstrapping process. It is recommended to configure this -whenever possible. - -See the [security page](security.html) and [configuration](configuration.html) sections for more information on -configuring TLS; use the prefix `spark.ssl.kubernetes.driversubmitserver` in configuring the TLS-related fields in the context -of submitting to Kubernetes. For example, to set the trustStore used when the local machine communicates with the driver -pod in starting the application, set `spark.ssl.kubernetes.driversubmitserver.trustStore`. - -One note about the keyStore is that it can be specified as either a file on the client machine or a file in the -container image's disk. Thus `spark.ssl.kubernetes.driversubmitserver.keyStore` can be a URI with a scheme of either `file:` -or `local:`. A scheme of `file:` corresponds to the keyStore being located on the client machine; it is mounted onto -the driver container as a [secret volume](https://kubernetes.io/docs/user-guide/secrets/). When the URI has the scheme -`local:`, the file is assumed to already be on the container's disk at the appropriate path. - -Finally, the submission server and client can be configured to use PEM files instead of Java keyStores. When using -this mode, set `spark.ssl.kubernetes.driversubmitserver.keyPem` and -`spark.ssl.kubernetes.driversubmitserver.serverCertPem` to configure the key and certificate files on the driver -submission server. These files can be uploaded from the submitter's machine if they have no scheme or a scheme of -`file:`, or they can be located on the container's disk if they have the scheme `local:`. The client's certificate -file should be provided via setting `spark.ssl.kubernetes.driversubmitserver.clientCertPem`, and this file must be -located on the submitting machine's local disk. - -### Submission of Local Files through Ingress/External controller - -Kubernetes pods run with their own IP address space. If Spark is run in cluster mode, the driver pod may not be -accessible to the submitter. However, the submitter needs to send local dependencies from its local disk to the driver -pod. - -By default, Spark will place a [Service](https://kubernetes.io/docs/user-guide/services/#type-nodeport) with a NodePort -that is opened on every node. The submission client will then contact the driver at one of the node's -addresses with the appropriate service port. - -There may be cases where the nodes cannot be reached by the submission client. For example, the cluster may -only be reachable through an external load balancer. The user may provide their own external URI for Spark driver -services. To use a your own external URI instead of a node's IP and node port, first set -`spark.kubernetes.driver.serviceManagerType` to `ExternalAnnotation`. A service will be created with the annotation -`spark-job.alpha.apache.org/provideExternalUri`, and this service routes to the driver pod. You will need to run a -separate process that watches the API server for services that are created with this annotation in the application's -namespace (set by `spark.kubernetes.namespace`). The process should determine a URI that routes to this service -(potentially configuring infrastructure to handle the URI behind the scenes), and patch the service to include an -annotation `spark-job.alpha.apache.org/resolvedExternalUri`, which has its value as the external URI that your process -has provided (e.g. `https://example.com:8080/my-job`). - -Note that the URI provided in the annotation needs to route traffic to the appropriate destination on the pod, which has -a empty path portion of the URI. This means the external URI provider will likely need to rewrite the path from the -external URI to the destination on the pod, e.g. https://example.com:8080/spark-app-1/submit will need to route traffic -to https://:/. Note that the paths of these two URLs are different. - -If the above is confusing, keep in mind that this functionality is only necessary if the submitter cannot reach any of -the nodes at the driver's node port. It is recommended to use the default configuration with the node port service -whenever possible. + +### Securing the Resource Staging Server with TLS + +The default configuration of the resource staging server is not secured with TLS. It is highly recommended to configure +this to protect the secrets and jars/files being submitted through the staging server. + +The YAML file in `conf/kubernetes-resource-staging-server.yaml` includes a ConfigMap resource that holds the resource +staging server's configuration. The properties can be adjusted here to make the resource staging server listen over TLS. +Refer to the [security](security.html) page for the available settings related to TLS. The namespace for the +resource staging server is `kubernetes.resourceStagingServer`, so for example the path to the server's keyStore would +be set by `spark.ssl.kubernetes.resourceStagingServer.keyStore`. + +In addition to the settings specified by the previously linked security page, the resource staging server supports the +following additional configurations: + + + + + + + + + + + + + + + + + + + + + + + +
    Property NameDefaultMeaning
    spark.ssl.kubernetes.resourceStagingServer.keyPem(none) + Private key file encoded in PEM format that the resource staging server uses to secure connections over TLS. If this + is specified, the associated public key file must be specified in + spark.ssl.kubernetes.resourceStagingServer.serverCertPem. PEM files and a keyStore file (set by + spark.ssl.kubernetes.resourceStagingServer.keyStore) cannot both be specified at the same time. +
    spark.ssl.kubernetes.resourceStagingServer.serverCertPem(none) + Certificate file encoded in PEM format that the resource staging server uses to secure connections over TLS. If this + is specified, the associated private key file must be specified in + spark.ssl.kubernetes.resourceStagingServer.keyPem. PEM files and a keyStore file (set by + spark.ssl.kubernetes.resourceStagingServer.keyStore) cannot both be specified at the same time. +
    spark.ssl.kubernetes.resourceStagingServer.keyStorePasswordFile(none) + Provides the KeyStore password through a file in the container instead of a static value. This is useful if the + keyStore's password is to be mounted into the container with a secret. +
    spark.ssl.kubernetes.resourceStagingServer.keyPasswordFile(none) + Provides the keyStore's key password using a file in the container instead of a static value. This is useful if the + keyStore's key password is to be mounted into the container with a secret. +
    + +Note that while the properties can be set in the ConfigMap, you will still need to consider the means of mounting the +appropriate secret files into the resource staging server's container. A common mechanism that is used for this is +to use [Kubernetes secrets](https://kubernetes.io/docs/concepts/configuration/secret/) that are mounted as secret +volumes. Refer to the appropriate Kubernetes documentation for guidance and adjust the resource staging server's +specification in the provided YAML file accordingly. + +Finally, when you submit your application, you must specify either a trustStore or a PEM-encoded certificate file to +communicate with the resource staging server over TLS. The trustStore can be set with +`spark.ssl.kubernetes.resourceStagingServer.trustStore`, or a certificate file can be set with +`spark.ssl.kubernetes.resourceStagingServer.clientCertPem`. For example, our SparkPi example now looks like this: + + bin/spark-submit \ + --deploy-mode cluster \ + --class org.apache.spark.examples.SparkPi \ + --master k8s://https://: \ + --kubernetes-namespace default \ + --conf spark.executor.instances=5 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.resourceStagingServer.uri=https://:31000 \ + --conf spark.ssl.kubernetes.resourceStagingServer.enabled=true \ + --conf spark.ssl.kubernetes.resourceStagingServer.clientCertPem=/home/myuser/cert.pem \ + examples/jars/spark_examples_2.11-2.2.0.jar ### Spark Properties @@ -208,6 +324,16 @@ from the other deployment modes. See the [configuration page](configuration.html Docker tag format. + + spark.kubernetes.initcontainer.docker.image + spark-init:2.2.0 + + Docker image to use for the init-container that is run before the driver and executor containers. Specify this using + the standard Docker tag format. The + init-container is responsible for fetching application dependencies from both remote locations like HDFS or S3, + and from the resource staging server, if applicable. + + spark.kubernetes.shuffle.namespace default @@ -218,7 +344,7 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.shuffle.labels - (none) + (none) Labels that will be used to look up shuffle service pods. This should be a comma-separated list of label key-value pairs, where each label is in the format key=value. The labels chosen must be such that @@ -334,123 +460,113 @@ from the other deployment modes. See the [configuration page](configuration.html - spark.kubernetes.driver.submissionServerMemory - 256m + spark.kubernetes.driver.labels + (none) - The amount of memory to allocate for the driver submission server. + Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, + where each label is in the format key=value. Note that Spark also adds its own labels to the driver pod + for bookkeeping purposes. - spark.kubernetes.driver.memoryOverhead - (driverMemory + driverSubmissionServerMemory) * 0.10, with minimum of 384 + spark.kubernetes.driver.annotations + (none) - The amount of off-heap memory (in megabytes) to be allocated for the driver and the driver submission server. This - is memory that accounts for things like VM overheads, interned strings, other native overheads, etc. This tends to - grow with the driver size (typically 6-10%). + Custom annotations that will be added to the driver pod. This should be a comma-separated list of label key-value + pairs, where each annotation is in the format key=value. - spark.kubernetes.driver.labels + spark.kubernetes.driver.pod.name (none) - Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, - where each label is in the format key=value. Note that Spark also adds its own labels to the driver pod - for bookkeeping purposes. + Name of the driver pod. If not set, the driver pod name is set to "spark.app.name" suffixed by the current timestamp + to avoid name conflicts. - spark.kubernetes.driver.annotations + spark.kubernetes.submission.waitAppCompletion + true + + In cluster mode, whether to wait for the application to finish before exiting the launcher process. When changed to + false, the launcher has a "fire-and-forget" behavior when launching the Spark job. + + + + spark.kubernetes.resourceStagingServer.port + 10000 + + Port for the resource staging server to listen on when it is deployed. + + + + spark.kubernetes.resourceStagingServer.uri (none) - Custom annotations that will be added to the driver pod. This should be a comma-separated list of label key-value - pairs, where each annotation is in the format key=value. + URI of the resource staging server that Spark should use to distribute the application's local dependencies. Note + that by default, this URI must be reachable by both the submitting machine and the pods running in the cluster. If + one URI is not simultaneously reachable both by the submitter and the driver/executor pods, configure the pods to + access the staging server at a different URI by setting + spark.kubernetes.resourceStagingServer.internal.uri as discussed below. - spark.kubernetes.driverSubmissionTimeout - 60s + spark.kubernetes.resourceStagingServer.internal.uri + Value of spark.kubernetes.resourceStagingServer.uri - Time to wait for the driver pod to start running before aborting its execution. + URI of the resource staging server to communicate with when init-containers bootstrap the driver and executor pods + with submitted local dependencies. Note that this URI must by the pods running in the cluster. This is useful to + set if the resource staging server has a separate "internal" URI that must be accessed by components running in the + cluster. - spark.kubernetes.driver.service.exposeUiPort - false + spark.ssl.kubernetes.resourceStagingServer.internal.trustStore + Value of spark.ssl.kubernetes.resourceStagingServer.trustStore - Whether to expose the driver Web UI port as a service NodePort. Turned off by default because NodePort is a limited - resource. + Location of the trustStore file to use when communicating with the resource staging server over TLS, as + init-containers bootstrap the driver and executor pods with submitted local dependencies. This can be a URI with a + scheme of local://, which denotes that the file is pre-mounted on the pod's disk. A uri without a + scheme or a scheme of file:// will result in this file being mounted from the submitting machine's + disk as a secret into the init-containers. - spark.kubernetes.driver.pod.name - (none) + spark.ssl.kubernetes.resourceStagingServer.internal.trustStorePassword + Value of spark.ssl.kubernetes.resourceStagingServer.trustStorePassword - Name of the driver pod. If not set, the driver pod name is set to "spark.app.name" suffixed by the current timestamp to avoid name conflicts. + Password of the trustStore file that is used when communicating with the resource staging server over TLS, as + init-containers bootstrap the driver and executor pods with submitted local dependencies. - spark.kubernetes.submission.waitAppCompletion - true + spark.ssl.kubernetes.resourceStagingServer.internal.trustStoreType + Value of spark.ssl.kubernetes.resourceStagingServer.trustStoreType - In cluster mode, whether to wait for the application to finish before exiting the launcher process. When changed to - false, the launcher has a "fire-and-forget" behavior when launching the Spark job. + Type of the trustStore file that is used when communicating with the resource staging server over TLS, when + init-containers bootstrap the driver and executor pods with submitted local dependencies. - spark.kubernetes.report.interval - 1s + spark.ssl.kubernetes.resourceStagingServer.internal.clientCertPem + Value of spark.ssl.kubernetes.resourceStagingServer.clientCertPem - Interval between reports of the current Spark job status in cluster mode. + Location of the certificate file to use when communicating with the resource staging server over TLS, as + init-containers bootstrap the driver and executor pods with submitted local dependencies. This can be a URI with a + scheme of local://, which denotes that the file is pre-mounted on the pod's disk. A uri without a + scheme or a scheme of file:// will result in this file being mounted from the submitting machine's + disk as a secret into the init-containers. - spark.kubernetes.driver.serviceManagerType - NodePort + spark.kubernetes.report.interval + 1s - A tag indicating which class to use for creating the Kubernetes service and determining its URI for the submission - client. Valid values are currently NodePort and ExternalAnnotation. By default, a service - is created with the NodePort type, and the driver will be contacted at one of the nodes at the port - that the nodes expose for the service. If the nodes cannot be contacted from the submitter's machine, consider - setting this to ExternalAnnotation as described in "Determining the Driver Base URI" above. One may - also include a custom implementation of org.apache.spark.deploy.rest.kubernetes.DriverServiceManager on - the submitter's classpath - spark-submit service loads an instance of that class. To use the custom - implementation, set this value to the custom implementation's return value of - DriverServiceManager#getServiceManagerType(). This method should only be done as a last resort. + Interval between reports of the current Spark job status in cluster mode. -## Dynamic Executor Scaling - -Spark on Kubernetes supports Dynamic Allocation with cluster mode. This mode requires running -an external shuffle service. This is typically a [daemonset](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) -with a provisioned [hostpath](https://kubernetes.io/docs/concepts/storage/volumes/#hostpath) volume. -This shuffle service may be shared by executors belonging to different SparkJobs. Using Spark with dynamic allocation -on Kubernetes assumes that a cluster administrator has set up one or more shuffle-service daemonsets in the cluster. - -A sample configuration file is provided in `conf/kubernetes-shuffle-service.yaml` which can be customized as needed -for a particular cluster. It is important to note that `spec.template.metadata.labels` are setup appropriately for the shuffle -service because there may be multiple shuffle service instances running in a cluster. The labels give us a way to target a particular -shuffle service. - -For example, if the shuffle service we want to use is in the default namespace, and -has pods with labels `app=spark-shuffle-service` and `spark-version=2.1.0`, we can -use those tags to target that particular shuffle service at job launch time. In order to run a job with dynamic allocation enabled, -the command may then look like the following: - - bin/spark-submit \ - --deploy-mode cluster \ - --class org.apache.spark.examples.GroupByTest \ - --master k8s://: \ - --kubernetes-namespace default \ - --conf spark.app.name=group-by-test \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:latest \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:latest \ - --conf spark.dynamicAllocation.enabled=true \ - --conf spark.shuffle.service.enabled=true \ - --conf spark.kubernetes.shuffle.namespace=default \ - --conf spark.kubernetes.shuffle.labels="app=spark-shuffle-service,spark-version=2.1.0" \ - examples/jars/spark_examples_2.11-2.2.0.jar 10 400000 2 ## Current Limitations diff --git a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager b/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager deleted file mode 100644 index 2ed0387c51bc6..0000000000000 --- a/resource-managers/kubernetes/core/src/main/resources/META-INF/services/org.apache.spark.deploy.kubernetes.submit.v1.DriverServiceManager +++ /dev/null @@ -1,2 +0,0 @@ -org.apache.spark.deploy.kubernetes.submit.v1.ExternalSuppliedUrisDriverServiceManager -org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala index 03991ba26a6f7..a6f0ca502f6f0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/CompressionUtils.scala @@ -16,19 +16,17 @@ */ package org.apache.spark.deploy.kubernetes -import java.io.{ByteArrayInputStream, File, FileInputStream, FileOutputStream, InputStream, OutputStream} +import java.io.{File, FileInputStream, FileOutputStream, InputStream, OutputStream} import java.util.zip.{GZIPInputStream, GZIPOutputStream} import com.google.common.io.Files -import org.apache.commons.codec.binary.Base64 import org.apache.commons.compress.archivers.tar.{TarArchiveEntry, TarArchiveInputStream, TarArchiveOutputStream} import org.apache.commons.compress.utils.CharsetNames import org.apache.commons.io.IOUtils import scala.collection.mutable -import org.apache.spark.deploy.rest.kubernetes.v1.TarGzippedData import org.apache.spark.internal.Logging -import org.apache.spark.util.{ByteBufferOutputStream, Utils} +import org.apache.spark.util.Utils private[spark] object CompressionUtils extends Logging { // Defaults from TarArchiveOutputStream @@ -36,30 +34,6 @@ private[spark] object CompressionUtils extends Logging { private val RECORD_SIZE = 512 private val ENCODING = CharsetNames.UTF_8 - /** - * Compresses all of the given paths into a gzipped-tar archive, returning the compressed data in - * memory as an instance of {@link TarGzippedData}. The files are taken without consideration to - * their original folder structure, and are added to the tar archive in a flat hierarchy. - * Directories are not allowed, and duplicate file names are de-duplicated by appending a numeric - * suffix to the file name, before the file extension. For example, if paths a/b.txt and b/b.txt - * were provided, then the files added to the tar archive would be b.txt and b-1.txt. - * @param paths A list of file paths to be archived - * @return An in-memory representation of the compressed data. - */ - def createTarGzip(paths: Iterable[String]): TarGzippedData = { - val compressedBytesStream = Utils.tryWithResource(new ByteBufferOutputStream()) { raw => - writeTarGzipToStream(raw, paths) - raw - } - val compressedAsBase64 = Base64.encodeBase64String(compressedBytesStream.toByteBuffer.array) - TarGzippedData( - dataBase64 = compressedAsBase64, - blockSize = BLOCK_SIZE, - recordSize = RECORD_SIZE, - encoding = ENCODING - ) - } - def writeTarGzipToStream(outputStream: OutputStream, paths: Iterable[String]): Unit = { Utils.tryWithResource(new GZIPOutputStream(outputStream)) { gzipping => Utils.tryWithResource(new TarArchiveOutputStream( @@ -98,50 +72,14 @@ private[spark] object CompressionUtils extends Logging { } } - /** - * Decompresses the provided tar archive to a directory. - * @param compressedData In-memory representation of the compressed data, ideally created via - * {@link createTarGzip}. - * @param rootOutputDir Directory to write the output files to. All files from the tarball - * are written here in a flat hierarchy. - * @return List of file paths for each file that was unpacked from the archive. - */ - def unpackAndWriteCompressedFiles( - compressedData: TarGzippedData, - rootOutputDir: File): Seq[String] = { - val compressedBytes = Base64.decodeBase64(compressedData.dataBase64) - if (!rootOutputDir.exists) { - if (!rootOutputDir.mkdirs) { - throw new IllegalStateException(s"Failed to create output directory for unpacking" + - s" files at ${rootOutputDir.getAbsolutePath}") - } - } else if (rootOutputDir.isFile) { - throw new IllegalArgumentException(s"Root dir for writing decompressed files: " + - s"${rootOutputDir.getAbsolutePath} exists and is not a directory.") - } - Utils.tryWithResource(new ByteArrayInputStream(compressedBytes)) { compressedBytesStream => - unpackTarStreamToDirectory( - compressedBytesStream, - rootOutputDir, - compressedData.blockSize, - compressedData.recordSize, - compressedData.encoding) - } - } - - def unpackTarStreamToDirectory( - inputStream: InputStream, - outputDir: File, - blockSize: Int = BLOCK_SIZE, - recordSize: Int = RECORD_SIZE, - encoding: String = ENCODING): Seq[String] = { + def unpackTarStreamToDirectory(inputStream: InputStream, outputDir: File): Seq[String] = { val paths = mutable.Buffer.empty[String] Utils.tryWithResource(new GZIPInputStream(inputStream)) { gzipped => Utils.tryWithResource(new TarArchiveInputStream( gzipped, - blockSize, - recordSize, - encoding)) { tarInputStream => + BLOCK_SIZE, + RECORD_SIZE, + ENCODING)) { tarInputStream => var nextTarEntry = tarInputStream.getNextTarEntry while (nextTarEntry != null) { val outputFile = new File(outputDir, nextTarEntry.getName) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala index 227420db4636d..0d4e82566643d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.kubernetes import io.fabric8.kubernetes.api.model.{ContainerBuilder, EmptyDirVolumeSource, PodBuilder, VolumeMount, VolumeMountBuilder} import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.v2.{ContainerNameEqualityPredicate, InitContainerUtil} +import org.apache.spark.deploy.kubernetes.submit.{ContainerNameEqualityPredicate, InitContainerUtil} private[spark] trait SparkPodInitContainerBootstrap { /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 759a7df505829..bcb9a96cae960 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -20,7 +20,6 @@ import java.util.concurrent.TimeUnit import org.apache.spark.{SPARK_VERSION => sparkVersion} import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.v1.NodePortUrisDriverServiceManager import org.apache.spark.internal.Logging import org.apache.spark.internal.config.ConfigBuilder import org.apache.spark.network.util.ByteUnit @@ -212,77 +211,6 @@ package object config extends Logging { .stringConf .createOptional - private[spark] val KUBERNETES_DRIVER_SUBMIT_TIMEOUT = - ConfigBuilder("spark.kubernetes.driverSubmissionTimeout") - .doc("Time to wait for the driver process to start running before aborting its execution.") - .timeConf(TimeUnit.SECONDS) - .createWithDefault(60L) - - private[spark] val KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.keyStore") - .doc("KeyStore file for the driver submission server listening on SSL. Can be pre-mounted" + - " on the driver container or uploaded from the submitting client.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.trustStore") - .doc("TrustStore containing certificates for communicating to the driver submission server" + - " over SSL.") - .stringConf - .createOptional - - private[spark] val DRIVER_SUBMIT_SSL_ENABLED = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.enabled") - .doc("Whether or not to use SSL when sending the application dependencies to the driver pod.") - .booleanConf - .createWithDefault(false) - - private[spark] val DRIVER_SUBMIT_SSL_KEY_PEM = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.keyPem") - .doc("Key PEM file that the driver submission server will use when setting up TLS" + - " connections. Can be pre-mounted on the driver pod's disk or uploaded from the" + - " submitting client's machine.") - .stringConf - .createOptional - - private[spark] val DRIVER_SUBMIT_SSL_SERVER_CERT_PEM = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.serverCertPem") - .doc("Certificate PEM file that is associated with the key PEM file" + - " the submission server uses to set up TLS connections. Can be pre-mounted" + - " on the driver pod's disk or uploaded from the submitting client's machine.") - .stringConf - .createOptional - - private[spark] val DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM = - ConfigBuilder("spark.ssl.kubernetes.driversubmitserver.clientCertPem") - .doc("Certificate pem file that the submission client uses to connect to the submission" + - " server over TLS. This should often be the same as the server certificate, but can be" + - " different if the submission client will contact the driver through a proxy instead of" + - " the driver service directly.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_SERVICE_NAME = - ConfigBuilder("spark.kubernetes.driver.service.name") - .doc("Kubernetes service that exposes the driver pod for external access.") - .internal() - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY = - ConfigBuilder("spark.kubernetes.driver.submissionServerMemory") - .doc("The amount of memory to allocate for the driver submission server.") - .bytesConf(ByteUnit.MiB) - .createWithDefaultString("256m") - - private[spark] val EXPOSE_KUBERNETES_DRIVER_SERVICE_UI_PORT = - ConfigBuilder("spark.kubernetes.driver.service.exposeUiPort") - .doc("Whether to expose the driver Web UI port as a service NodePort. Turned off by default" + - " because NodePort is a limited resource. Use alternatives if possible.") - .booleanConf - .createWithDefault(false) - private[spark] val KUBERNETES_DRIVER_POD_NAME = ConfigBuilder("spark.kubernetes.driver.pod.name") .doc("Name of the driver pod.") @@ -327,13 +255,6 @@ package object config extends Logging { .longConf .createWithDefault(1) - private[spark] val DRIVER_SERVICE_MANAGER_TYPE = - ConfigBuilder("spark.kubernetes.driver.serviceManagerType") - .doc("A tag indicating which class to use for creating the Kubernetes service and" + - " determining its URI for the submission client.") - .stringConf - .createWithDefault(NodePortUrisDriverServiceManager.TYPE) - private[spark] val WAIT_FOR_APP_COMPLETION = ConfigBuilder("spark.kubernetes.submission.waitAppCompletion") .doc("In cluster mode, whether to wait for the application to finish before exiting the" + @@ -347,8 +268,7 @@ package object config extends Logging { .timeConf(TimeUnit.MILLISECONDS) .createWithDefaultString("1s") - // Spark dependency server for submission v2 - + // Spark resource staging server. private[spark] val RESOURCE_STAGING_SERVER_PORT = ConfigBuilder("spark.kubernetes.resourceStagingServer.port") .doc("Port for the Kubernetes resource staging server to listen on.") @@ -451,7 +371,7 @@ package object config extends Logging { .stringConf .createOptional - // Driver and Init-Container parameters for submission v2 + // Driver and Init-Container parameters private[spark] val RESOURCE_STAGING_SERVER_URI = ConfigBuilder("spark.kubernetes.resourceStagingServer.uri") .doc("Base URI for the Spark resource staging server.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index e4ca5c1458abe..bfb0bc3ffb0f3 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File import java.util.Collections @@ -25,8 +25,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkException} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.{LoggingPodStatusWatcher, LoggingPodStatusWatcherImpl} -import org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServerSslOptionsProviderImpl +import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl import org.apache.spark.internal.Logging import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala index 5505d87fa8072..c635484c4c124 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolver.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerNameEqualityPredicate.scala similarity index 95% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerNameEqualityPredicate.scala index 5101e1506e4d5..434919208ba2e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerNameEqualityPredicate.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerNameEqualityPredicate.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.lang.Boolean diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index 0a5e6cd216011..7fbb0c9274bf5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.v2.RetrofitClientFactoryImpl +import org.apache.spark.deploy.rest.kubernetes.RetrofitClientFactoryImpl import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala similarity index 99% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala index 9759669335774..ded0237732ce0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.api.model.{PodBuilder, Secret, SecretBuilder} import scala.collection.JavaConverters._ diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala similarity index 92% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala index e981c54d23a9d..3f0e7d97275a5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala @@ -14,11 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.DriverPodKubernetesCredentialsProvider private[spark] trait DriverPodKubernetesCredentialsMounterProvider { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala index adfdc060f0d0f..2292365995d1f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfiguration.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala index 0526ca53baaab..9b7faaa78a9aa 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/InitContainerUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PropertiesConfigMapFromScalaMapBuilder.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PropertiesConfigMapFromScalaMapBuilder.scala index cb9194552d2b6..8103272c27518 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/PropertiesConfigMapFromScalaMapBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PropertiesConfigMapFromScalaMapBuilder.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.StringWriter import java.util.Properties diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala similarity index 95% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala index 362fbbdf517dc..4062a3113eddf 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala @@ -14,12 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.api.model.ConfigMap import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils private[spark] trait SparkInitContainerConfigMapBuilder { /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala index af3de6ce85026..17b61d4a6ace0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmissionKubernetesClientProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala index 1b086e60d3d0d..06d3648efb89f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPlugin.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.SparkException import org.apache.spark.deploy.kubernetes.config._ diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala index 1a33757e45aa0..7850853df97e6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala similarity index 95% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala index 5f98facfb691f..9d0d863d174bc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.{File, FileOutputStream} import javax.ws.rs.core.MediaType @@ -26,8 +26,7 @@ import retrofit2.Call import org.apache.spark.{SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory} import org.apache.spark.util.Utils private[spark] trait SubmittedDependencyUploader { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedResources.scala similarity index 96% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedResources.scala index f4e5e991180ce..225972c1057f2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedResources.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedResources.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit case class SubmittedResourceIdAndSecret(resourceId: String, resourceSecret: String) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala deleted file mode 100644 index 32fc434cb693a..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/Client.scala +++ /dev/null @@ -1,743 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import java.io.File -import java.security.SecureRandom -import java.util.ServiceLoader -import java.util.concurrent.{CountDownLatch, TimeUnit} - -import com.google.common.io.Files -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.client.{ConfigBuilder => K8SConfigBuilder, DefaultKubernetesClient, KubernetesClient, KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import org.apache.commons.codec.binary.Base64 -import scala.collection.JavaConverters._ - -import org.apache.spark.{SparkConf, SparkException} -import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.{DriverPodKubernetesCredentialsProvider, KubernetesFileUtils, LoggingPodStatusWatcherImpl} -import org.apache.spark.deploy.rest.kubernetes.v1.{AppResource, ContainerAppResource, HttpClientUtil, KubernetesCreateSubmissionRequest, KubernetesSparkRestApi, RemoteAppResource, UploadedAppResource} -import org.apache.spark.internal.Logging -import org.apache.spark.util.{ShutdownHookManager, Utils} - -private[spark] class Client( - sparkConf: SparkConf, - mainClass: String, - mainAppResource: String, - appArgs: Array[String]) extends Logging { - import Client._ - - private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) - private val master = resolveK8sMaster(sparkConf.get("spark.master")) - - private val launchTime = System.currentTimeMillis - private val appName = sparkConf.getOption("spark.app.name") - .getOrElse("spark") - private val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") - private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) - .getOrElse(kubernetesAppId) - private val secretName = s"$SUBMISSION_APP_SECRET_PREFIX-$kubernetesAppId" - private val secretDirectory = s"$DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR/$kubernetesAppId" - private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val uiPort = sparkConf.getInt("spark.ui.port", DEFAULT_UI_PORT) - private val driverSubmitTimeoutSecs = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) - private val driverServiceManagerType = sparkConf.get(DRIVER_SERVICE_MANAGER_TYPE) - private val sparkFiles = sparkConf.getOption("spark.files") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - private val sparkJars = sparkConf.getOption("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - - // CPU settings - private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") - - // Memory settings - private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) - private val driverSubmitServerMemoryMb = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY) - private val driverSubmitServerMemoryString = sparkConf.get( - KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY.key, - KUBERNETES_DRIVER_SUBMIT_SERVER_MEMORY.defaultValueString) - private val driverContainerMemoryMb = driverMemoryMb + driverSubmitServerMemoryMb - private val memoryOverheadMb = sparkConf - .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) - .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverContainerMemoryMb).toInt, - MEMORY_OVERHEAD_MIN)) - private val driverContainerMemoryWithOverhead = driverContainerMemoryMb + memoryOverheadMb - - private val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) - private val loggingInterval = Some(sparkConf.get(REPORT_INTERVAL)) - .filter( _ => waitForAppCompletion) - - private val secretBase64String = { - val secretBytes = new Array[Byte](128) - SECURE_RANDOM.nextBytes(secretBytes) - Base64.encodeBase64String(secretBytes) - } - - private val serviceAccount = sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME) - private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) - private val customAnnotations = sparkConf.get(KUBERNETES_DRIVER_ANNOTATIONS) - - private val kubernetesResourceCleaner = new KubernetesResourceCleaner - - def run(): Unit = { - logInfo(s"Starting application $kubernetesAppId in Kubernetes...") - val submitterLocalFiles = KubernetesFileUtils.getOnlySubmitterLocalFiles(sparkFiles) - val submitterLocalJars = KubernetesFileUtils.getOnlySubmitterLocalFiles(sparkJars) - (submitterLocalFiles ++ submitterLocalJars).foreach { file => - if (!new File(Utils.resolveURI(file).getPath).isFile) { - throw new SparkException(s"File $file does not exist or is a directory.") - } - } - if (KubernetesFileUtils.isUriLocalFile(mainAppResource) && - !new File(Utils.resolveURI(mainAppResource).getPath).isFile) { - throw new SparkException(s"Main app resource file $mainAppResource is not a file or" + - s" is a directory.") - } - val driverServiceManager = getDriverServiceManager - val parsedCustomLabels = parseKeyValuePairs(customLabels, KUBERNETES_DRIVER_LABELS.key, - "labels") - parsedCustomLabels.keys.foreach { key => - require(key != SPARK_APP_ID_LABEL, "Label with key" + - s" $SPARK_APP_ID_LABEL cannot be used in" + - " spark.kubernetes.driver.labels, as it is reserved for Spark's" + - " internal configuration.") - } - val parsedCustomAnnotations = parseKeyValuePairs( - customAnnotations, - KUBERNETES_DRIVER_ANNOTATIONS.key, - "annotations") - val driverPodKubernetesCredentials = new DriverPodKubernetesCredentialsProvider(sparkConf).get() - var k8ConfBuilder = new K8SConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(master) - .withNamespace(namespace) - sparkConf.get(KUBERNETES_SUBMIT_CA_CERT_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_CLIENT_KEY_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_CLIENT_CERT_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { token => - k8ConfBuilder = k8ConfBuilder.withOauthToken(token) - } - - val k8ClientConfig = k8ConfBuilder.build - Utils.tryWithResource(new DefaultKubernetesClient(k8ClientConfig)) { kubernetesClient => - driverServiceManager.start(kubernetesClient, kubernetesAppId, sparkConf) - // start outer watch for status logging of driver pod - // only enable interval logging if in waitForAppCompletion mode - val loggingWatch = new LoggingPodStatusWatcherImpl( - kubernetesAppId, loggingInterval) - Utils.tryWithResource(kubernetesClient - .pods() - .withName(kubernetesDriverPodName) - .watch(loggingWatch)) { _ => - loggingWatch.start() - val resourceCleanShutdownHook = ShutdownHookManager.addShutdownHook(() => - kubernetesResourceCleaner.deleteAllRegisteredResourcesFromKubernetes(kubernetesClient)) - val cleanupServiceManagerHook = ShutdownHookManager.addShutdownHook( - ShutdownHookManager.DEFAULT_SHUTDOWN_PRIORITY)( - () => driverServiceManager.stop()) - // Place the error hook at a higher priority in order for the error hook to run before - // the stop hook. - val serviceManagerErrorHook = ShutdownHookManager.addShutdownHook( - ShutdownHookManager.DEFAULT_SHUTDOWN_PRIORITY + 1)(() => - driverServiceManager.handleSubmissionError( - new SparkException("Submission shutting down early..."))) - try { - val sslConfigurationProvider = new DriverSubmitSslConfigurationProvider( - sparkConf, kubernetesAppId, kubernetesClient, kubernetesResourceCleaner) - val submitServerSecret = kubernetesClient.secrets().createNew() - .withNewMetadata() - .withName(secretName) - .endMetadata() - .withData(Map((SUBMISSION_APP_SECRET_NAME, secretBase64String)).asJava) - .withType("Opaque") - .done() - kubernetesResourceCleaner.registerOrUpdateResource(submitServerSecret) - val sslConfiguration = sslConfigurationProvider.getSslConfiguration() - val (driverPod, driverService) = launchDriverKubernetesComponents( - kubernetesClient, - driverServiceManager, - parsedCustomLabels, - parsedCustomAnnotations, - submitServerSecret, - sslConfiguration) - configureOwnerReferences( - kubernetesClient, - submitServerSecret, - sslConfiguration.sslSecret, - driverPod, - driverService) - submitApplicationToDriverServer( - kubernetesClient, - driverServiceManager, - sslConfiguration, - driverService, - submitterLocalFiles, - submitterLocalJars, - driverPodKubernetesCredentials) - // Now that the application has started, persist the components that were created beyond - // the shutdown hook. We still want to purge the one-time secrets, so do not unregister - // those. - kubernetesResourceCleaner.unregisterResource(driverPod) - kubernetesResourceCleaner.unregisterResource(driverService) - } catch { - case e: Throwable => - driverServiceManager.handleSubmissionError(e) - throw e - } finally { - Utils.tryLogNonFatalError { - kubernetesResourceCleaner.deleteAllRegisteredResourcesFromKubernetes(kubernetesClient) - } - Utils.tryLogNonFatalError { - driverServiceManager.stop() - } - // Remove the shutdown hooks that would be redundant - Utils.tryLogNonFatalError { - ShutdownHookManager.removeShutdownHook(resourceCleanShutdownHook) - } - Utils.tryLogNonFatalError { - ShutdownHookManager.removeShutdownHook(cleanupServiceManagerHook) - } - Utils.tryLogNonFatalError { - ShutdownHookManager.removeShutdownHook(serviceManagerErrorHook) - } - } - // wait if configured to do so - if (waitForAppCompletion) { - logInfo(s"Waiting for application $kubernetesAppId to finish...") - loggingWatch.awaitCompletion() - logInfo(s"Application $kubernetesAppId finished.") - } else { - logInfo(s"Application $kubernetesAppId successfully launched.") - } - } - } - } - - private def submitApplicationToDriverServer( - kubernetesClient: KubernetesClient, - driverServiceManager: DriverServiceManager, - sslConfiguration: DriverSubmitSslConfiguration, - driverService: Service, - submitterLocalFiles: Iterable[String], - submitterLocalJars: Iterable[String], - driverPodKubernetesCredentials: KubernetesCredentials): Unit = { - sparkConf.getOption("spark.app.id").foreach { id => - logWarning(s"Warning: Provided app id in spark.app.id as $id will be" + - s" overridden as $kubernetesAppId") - } - sparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) - sparkConf.set(KUBERNETES_DRIVER_SERVICE_NAME, driverService.getMetadata.getName) - sparkConf.set("spark.app.id", kubernetesAppId) - sparkConf.setIfMissing("spark.app.name", appName) - sparkConf.setIfMissing("spark.driver.port", DEFAULT_DRIVER_PORT.toString) - sparkConf.setIfMissing("spark.driver.blockManager.port", DEFAULT_BLOCKMANAGER_PORT.toString) - sparkConf.setIfMissing("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT.toString) - sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => - sparkConf.set(KUBERNETES_SUBMIT_OAUTH_TOKEN, "") - } - sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => - sparkConf.set(KUBERNETES_DRIVER_OAUTH_TOKEN, "") - } - val driverSubmitter = buildDriverSubmissionClient( - kubernetesClient, - driverServiceManager, - driverService, - sslConfiguration) - // Sanity check to see if the driver submitter is even reachable. - driverSubmitter.ping() - logInfo(s"Submitting local resources to driver pod for application " + - s"$kubernetesAppId ...") - val submitRequest = buildSubmissionRequest( - submitterLocalFiles, - submitterLocalJars, - driverPodKubernetesCredentials) - driverSubmitter.submitApplication(submitRequest) - logInfo("Successfully submitted local resources and driver configuration to" + - " driver pod.") - // After submitting, adjust the service to only expose the Spark UI - val uiServiceType = if (sparkConf.get(EXPOSE_KUBERNETES_DRIVER_SERVICE_UI_PORT)) "NodePort" - else "ClusterIP" - val uiServicePort = new ServicePortBuilder() - .withName(UI_PORT_NAME) - .withPort(uiPort) - .withNewTargetPort(uiPort) - .build() - val resolvedService = kubernetesClient.services().withName(kubernetesAppId).edit() - .editSpec() - .withType(uiServiceType) - .withPorts(uiServicePort) - .endSpec() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(resolvedService) - logInfo("Finished submitting application to Kubernetes.") - } - - private def launchDriverKubernetesComponents( - kubernetesClient: KubernetesClient, - driverServiceManager: DriverServiceManager, - customLabels: Map[String, String], - customAnnotations: Map[String, String], - submitServerSecret: Secret, - sslConfiguration: DriverSubmitSslConfiguration): (Pod, Service) = { - val driverKubernetesSelectors = (Map( - SPARK_DRIVER_LABEL -> kubernetesAppId, - SPARK_APP_ID_LABEL -> kubernetesAppId, - SPARK_APP_NAME_LABEL -> appName) - ++ customLabels) - val endpointsReadyFuture = SettableFuture.create[Endpoints] - val endpointsReadyWatcher = new DriverEndpointsReadyWatcher(endpointsReadyFuture) - val serviceReadyFuture = SettableFuture.create[Service] - val serviceReadyWatcher = new DriverServiceReadyWatcher(serviceReadyFuture) - val podReadyFuture = SettableFuture.create[Pod] - val podWatcher = new DriverPodReadyWatcher(podReadyFuture) - Utils.tryWithResource(kubernetesClient - .pods() - .withName(kubernetesDriverPodName) - .watch(podWatcher)) { _ => - Utils.tryWithResource(kubernetesClient - .services() - .withName(kubernetesAppId) - .watch(serviceReadyWatcher)) { _ => - Utils.tryWithResource(kubernetesClient - .endpoints() - .withName(kubernetesAppId) - .watch(endpointsReadyWatcher)) { _ => - val serviceTemplate = createDriverServiceTemplate(driverKubernetesSelectors) - val driverService = kubernetesClient.services().create( - driverServiceManager.customizeDriverService(serviceTemplate).build()) - kubernetesResourceCleaner.registerOrUpdateResource(driverService) - val driverPod = createDriverPod( - kubernetesClient, - driverKubernetesSelectors, - customAnnotations, - submitServerSecret, - sslConfiguration) - waitForReadyKubernetesComponents(kubernetesClient, endpointsReadyFuture, - serviceReadyFuture, podReadyFuture) - (driverPod, driverService) - } - } - } - } - - /** - * Sets the owner reference for all the kubernetes components to link to the driver pod. - * - * @return The driver service after it has been adjusted to reflect the new owner - * reference. - */ - private def configureOwnerReferences( - kubernetesClient: KubernetesClient, - submitServerSecret: Secret, - sslSecret: Option[Secret], - driverPod: Pod, - driverService: Service): Service = { - val driverPodOwnerRef = new OwnerReferenceBuilder() - .withName(driverPod.getMetadata.getName) - .withUid(driverPod.getMetadata.getUid) - .withApiVersion(driverPod.getApiVersion) - .withKind(driverPod.getKind) - .withController(true) - .build() - sslSecret.foreach(secret => { - val updatedSecret = kubernetesClient.secrets().withName(secret.getMetadata.getName).edit() - .editMetadata() - .addToOwnerReferences(driverPodOwnerRef) - .endMetadata() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(updatedSecret) - }) - val updatedSubmitServerSecret = kubernetesClient - .secrets() - .withName(submitServerSecret.getMetadata.getName) - .edit() - .editMetadata() - .addToOwnerReferences(driverPodOwnerRef) - .endMetadata() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(updatedSubmitServerSecret) - val updatedService = kubernetesClient - .services() - .withName(driverService.getMetadata.getName) - .edit() - .editMetadata() - .addToOwnerReferences(driverPodOwnerRef) - .endMetadata() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(updatedService) - updatedService - } - - private def waitForReadyKubernetesComponents( - kubernetesClient: KubernetesClient, - endpointsReadyFuture: SettableFuture[Endpoints], - serviceReadyFuture: SettableFuture[Service], - podReadyFuture: SettableFuture[Pod]) = { - try { - podReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - logInfo("Driver pod successfully created in Kubernetes cluster.") - } catch { - case e: Throwable => - val finalErrorMessage: String = buildSubmitFailedErrorMessage(kubernetesClient, e) - logError(finalErrorMessage, e) - throw new SparkException(finalErrorMessage, e) - } - try { - serviceReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - logInfo("Driver service created successfully in Kubernetes.") - } catch { - case e: Throwable => - throw new SparkException(s"The driver service was not ready" + - s" in $driverSubmitTimeoutSecs seconds.", e) - } - try { - endpointsReadyFuture.get(driverSubmitTimeoutSecs, TimeUnit.SECONDS) - logInfo("Driver endpoints ready to receive application submission") - } catch { - case e: Throwable => - throw new SparkException(s"The driver service endpoint was not ready" + - s" in $driverSubmitTimeoutSecs seconds.", e) - } - } - - private def createDriverPod( - kubernetesClient: KubernetesClient, - driverKubernetesSelectors: Map[String, String], - customAnnotations: Map[String, String], - submitServerSecret: Secret, - sslConfiguration: DriverSubmitSslConfiguration): Pod = { - val containerPorts = buildContainerPorts() - val probePingHttpGet = new HTTPGetActionBuilder() - .withScheme(if (sslConfiguration.enabled) "HTTPS" else "HTTP") - .withPath("/v1/submissions/ping") - .withNewPort(SUBMISSION_SERVER_PORT_NAME) - .build() - val driverCpuQuantity = new QuantityBuilder(false) - .withAmount(driverCpuCores) - .build() - val driverMemoryQuantity = new QuantityBuilder(false) - .withAmount(s"${driverContainerMemoryMb}M") - .build() - val driverMemoryLimitQuantity = new QuantityBuilder(false) - .withAmount(s"${driverContainerMemoryWithOverhead}M") - .build() - val driverPod = kubernetesClient.pods().createNew() - .withNewMetadata() - .withName(kubernetesDriverPodName) - .withLabels(driverKubernetesSelectors.asJava) - .withAnnotations(customAnnotations.asJava) - .endMetadata() - .withNewSpec() - .withRestartPolicy("Never") - .addNewVolume() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withNewSecret() - .withSecretName(submitServerSecret.getMetadata.getName) - .endSecret() - .endVolume() - .addToVolumes(sslConfiguration.sslPodVolume.toSeq: _*) - .withServiceAccount(serviceAccount.getOrElse("default")) - .addNewContainer() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() - .withName(SUBMISSION_APP_SECRET_VOLUME_NAME) - .withMountPath(secretDirectory) - .withReadOnly(true) - .endVolumeMount() - .addToVolumeMounts(sslConfiguration.sslPodVolumeMount.toSeq: _*) - .addNewEnv() - .withName(ENV_SUBMISSION_SECRET_LOCATION) - .withValue(s"$secretDirectory/$SUBMISSION_APP_SECRET_NAME") - .endEnv() - .addNewEnv() - .withName(ENV_SUBMISSION_SERVER_PORT) - .withValue(SUBMISSION_SERVER_PORT.toString) - .endEnv() - // Note that SPARK_DRIVER_MEMORY only affects the REST server via spark-class. - .addNewEnv() - .withName(ENV_DRIVER_MEMORY) - .withValue(driverSubmitServerMemoryString) - .endEnv() - .addToEnv(sslConfiguration.sslPodEnvVars: _*) - .withNewResources() - .addToRequests("cpu", driverCpuQuantity) - .addToLimits("cpu", driverCpuQuantity) - .addToRequests("memory", driverMemoryQuantity) - .addToLimits("memory", driverMemoryLimitQuantity) - .endResources() - .withPorts(containerPorts.asJava) - .withNewReadinessProbe().withHttpGet(probePingHttpGet).endReadinessProbe() - .endContainer() - .endSpec() - .done() - kubernetesResourceCleaner.registerOrUpdateResource(driverPod) - driverPod - } - - private def createDriverServiceTemplate(driverKubernetesSelectors: Map[String, String]) - : ServiceBuilder = { - val driverSubmissionServicePort = new ServicePortBuilder() - .withName(SUBMISSION_SERVER_PORT_NAME) - .withPort(SUBMISSION_SERVER_PORT) - .withNewTargetPort(SUBMISSION_SERVER_PORT) - .build() - new ServiceBuilder() - .withNewMetadata() - .withName(kubernetesAppId) - .withLabels(driverKubernetesSelectors.asJava) - .endMetadata() - .withNewSpec() - .withSelector(driverKubernetesSelectors.asJava) - .withPorts(driverSubmissionServicePort) - .endSpec() - } - - private class DriverPodReadyWatcher(resolvedDriverPod: SettableFuture[Pod]) extends Watcher[Pod] { - override def eventReceived(action: Action, pod: Pod): Unit = { - if ((action == Action.ADDED || action == Action.MODIFIED) - && pod.getStatus.getPhase == "Running" - && !resolvedDriverPod.isDone) { - pod.getStatus - .getContainerStatuses - .asScala - .find(status => - status.getName == DRIVER_CONTAINER_NAME && status.getReady) - .foreach { _ => resolvedDriverPod.set(pod) } - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logDebug("Driver pod readiness watch closed.", cause) - } - } - - private class DriverEndpointsReadyWatcher(resolvedDriverEndpoints: SettableFuture[Endpoints]) - extends Watcher[Endpoints] { - override def eventReceived(action: Action, endpoints: Endpoints): Unit = { - if ((action == Action.ADDED || action == Action.MODIFIED) - && (endpoints != null) - && (endpoints.getSubsets != null) - && endpoints.getSubsets.asScala.nonEmpty - && endpoints.getSubsets.asScala.exists(_.getAddresses.asScala.nonEmpty) - && !resolvedDriverEndpoints.isDone) { - resolvedDriverEndpoints.set(endpoints) - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logDebug("Driver endpoints readiness watch closed.", cause) - } - } - - private class DriverServiceReadyWatcher(resolvedDriverService: SettableFuture[Service]) - extends Watcher[Service] { - override def eventReceived(action: Action, service: Service): Unit = { - if ((action == Action.ADDED || action == Action.MODIFIED) - && !resolvedDriverService.isDone) { - resolvedDriverService.set(service) - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logDebug("Driver service readiness watch closed.", cause) - } - } - - private def buildSubmitFailedErrorMessage( - kubernetesClient: KubernetesClient, - e: Throwable): String = { - val driverPod = try { - kubernetesClient.pods().withName(kubernetesDriverPodName).get() - } catch { - case throwable: Throwable => - logError(s"Timed out while waiting $driverSubmitTimeoutSecs seconds for the" + - " driver pod to start, but an error occurred while fetching the driver" + - " pod's details.", throwable) - throw new SparkException(s"Timed out while waiting $driverSubmitTimeoutSecs" + - " seconds for the driver pod to start. Unfortunately, in attempting to fetch" + - " the latest state of the pod, another error was thrown. Check the logs for" + - " the error that was thrown in looking up the driver pod.", e) - } - val topLevelMessage = s"The driver pod with name ${driverPod.getMetadata.getName}" + - s" in namespace ${driverPod.getMetadata.getNamespace} was not ready in" + - s" $driverSubmitTimeoutSecs seconds." - val podStatusPhase = if (driverPod.getStatus.getPhase != null) { - s"Latest phase from the pod is: ${driverPod.getStatus.getPhase}" - } else { - "The pod had no final phase." - } - val podStatusMessage = if (driverPod.getStatus.getMessage != null) { - s"Latest message from the pod is: ${driverPod.getStatus.getMessage}" - } else { - "The pod had no final message." - } - val failedDriverContainerStatusString = driverPod.getStatus - .getContainerStatuses - .asScala - .find(_.getName == DRIVER_CONTAINER_NAME) - .map(status => { - val lastState = status.getState - if (lastState.getRunning != null) { - "Driver container last state: Running\n" + - s"Driver container started at: ${lastState.getRunning.getStartedAt}" - } else if (lastState.getWaiting != null) { - "Driver container last state: Waiting\n" + - s"Driver container wait reason: ${lastState.getWaiting.getReason}\n" + - s"Driver container message: ${lastState.getWaiting.getMessage}\n" - } else if (lastState.getTerminated != null) { - "Driver container last state: Terminated\n" + - s"Driver container started at: ${lastState.getTerminated.getStartedAt}\n" + - s"Driver container finished at: ${lastState.getTerminated.getFinishedAt}\n" + - s"Driver container exit reason: ${lastState.getTerminated.getReason}\n" + - s"Driver container exit code: ${lastState.getTerminated.getExitCode}\n" + - s"Driver container message: ${lastState.getTerminated.getMessage}" - } else { - "Driver container last state: Unknown" - } - }).getOrElse("The driver container wasn't found in the pod; expected to find" + - s" container with name $DRIVER_CONTAINER_NAME") - s"$topLevelMessage\n" + - s"$podStatusPhase\n" + - s"$podStatusMessage\n\n$failedDriverContainerStatusString" - } - - private def buildContainerPorts(): Seq[ContainerPort] = { - Seq((DRIVER_PORT_NAME, sparkConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT)), - (BLOCK_MANAGER_PORT_NAME, - sparkConf.getInt("spark.blockManager.port", DEFAULT_BLOCKMANAGER_PORT)), - (SUBMISSION_SERVER_PORT_NAME, SUBMISSION_SERVER_PORT), - (UI_PORT_NAME, uiPort)).map(port => new ContainerPortBuilder() - .withName(port._1) - .withContainerPort(port._2) - .build()) - } - - private def buildSubmissionRequest( - submitterLocalFiles: Iterable[String], - submitterLocalJars: Iterable[String], - driverPodKubernetesCredentials: KubernetesCredentials): KubernetesCreateSubmissionRequest = { - val mainResourceUri = Utils.resolveURI(mainAppResource) - val resolvedAppResource: AppResource = Option(mainResourceUri.getScheme) - .getOrElse("file") match { - case "file" => - val appFile = new File(mainResourceUri.getPath) - val fileBytes = Files.toByteArray(appFile) - val fileBase64 = Base64.encodeBase64String(fileBytes) - UploadedAppResource(resourceBase64Contents = fileBase64, name = appFile.getName) - case "local" => ContainerAppResource(mainAppResource) - case other => RemoteAppResource(other) - } - val uploadFilesBase64Contents = CompressionUtils.createTarGzip(submitterLocalFiles.map( - Utils.resolveURI(_).getPath)) - val uploadJarsBase64Contents = CompressionUtils.createTarGzip(submitterLocalJars.map( - Utils.resolveURI(_).getPath)) - KubernetesCreateSubmissionRequest( - appResource = resolvedAppResource, - mainClass = mainClass, - appArgs = appArgs, - secret = secretBase64String, - sparkProperties = sparkConf.getAll.toMap, - uploadedJarsBase64Contents = uploadJarsBase64Contents, - uploadedFilesBase64Contents = uploadFilesBase64Contents, - driverPodKubernetesCredentials = driverPodKubernetesCredentials) - } - - private def buildDriverSubmissionClient( - kubernetesClient: KubernetesClient, - driverServiceManager: DriverServiceManager, - service: Service, - sslConfiguration: DriverSubmitSslConfiguration): KubernetesSparkRestApi = { - val serviceUris = driverServiceManager.getDriverServiceSubmissionServerUris(service) - require(serviceUris.nonEmpty, "No uris found to contact the driver!") - HttpClientUtil.createClient[KubernetesSparkRestApi]( - uris = serviceUris, - maxRetriesPerServer = 10, - sslSocketFactory = sslConfiguration - .driverSubmitClientSslContext - .getSocketFactory, - trustContext = sslConfiguration - .driverSubmitClientTrustManager - .orNull, - connectTimeoutMillis = 5000) - } - - private def parseKeyValuePairs( - maybeKeyValues: Option[String], - configKey: String, - keyValueType: String): Map[String, String] = { - maybeKeyValues.map(keyValues => { - keyValues.split(",").map(_.trim).filterNot(_.isEmpty).map(keyValue => { - keyValue.split("=", 2).toSeq match { - case Seq(k, v) => - (k, v) - case _ => - throw new SparkException(s"Custom $keyValueType set by $configKey must be a" + - s" comma-separated list of key-value pairs, with format =." + - s" Got value: $keyValue. All values: $keyValues") - } - }).toMap - }).getOrElse(Map.empty[String, String]) - } - - private def getDriverServiceManager: DriverServiceManager = { - val driverServiceManagerLoader = ServiceLoader.load(classOf[DriverServiceManager]) - val matchingServiceManagers = driverServiceManagerLoader - .iterator() - .asScala - .filter(_.getServiceManagerType == driverServiceManagerType) - .toList - require(matchingServiceManagers.nonEmpty, - s"No driver service manager found matching type $driverServiceManagerType") - require(matchingServiceManagers.size == 1, "Multiple service managers found" + - s" matching type $driverServiceManagerType, got: " + - matchingServiceManagers.map(_.getClass).toList.mkString(",")) - matchingServiceManagers.head - } -} - -private[spark] object Client extends Logging { - - private[spark] val SECURE_RANDOM = new SecureRandom() - - def main(args: Array[String]): Unit = { - require(args.length >= 2, s"Too few arguments. Usage: ${getClass.getName} " + - s" []") - val mainAppResource = args(0) - val mainClass = args(1) - val appArgs = args.drop(2) - val sparkConf = new SparkConf(true) - new Client( - mainAppResource = mainAppResource, - mainClass = mainClass, - sparkConf = sparkConf, - appArgs = appArgs).run() - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala deleted file mode 100644 index c7d394fcf00ad..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverServiceManager.scala +++ /dev/null @@ -1,99 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} -import io.fabric8.kubernetes.client.KubernetesClient - -import org.apache.spark.SparkConf - -/** - * Implementations of this interface are responsible for exposing the driver pod by: - * - Creating a Kubernetes Service that is backed by the driver pod, and - * - Providing one or more URIs that the service can be reached at from the submission client. - * - * In general, one should not need to implement custom variants of this interface. Consider - * if the built-in service managers, NodePort and ExternalAnnotation, suit your needs first. - * - * This API is in an alpha state and may break without notice. - */ -trait DriverServiceManager { - - protected var kubernetesClient: KubernetesClient = _ - protected var serviceName: String = _ - protected var sparkConf: SparkConf = _ - - /** - * The tag that identifies this service manager type. This service manager will be loaded - * only if the Spark configuration spark.kubernetes.driver.serviceManagerType matches this - * value. - */ - def getServiceManagerType: String - - final def start( - kubernetesClient: KubernetesClient, - serviceName: String, - sparkConf: SparkConf): Unit = { - this.kubernetesClient = kubernetesClient - this.serviceName = serviceName - this.sparkConf = sparkConf - onStart(kubernetesClient, serviceName, sparkConf) - } - - /** - * Guaranteed to be called before {@link createDriverService} or - * {@link getDriverServiceSubmissionServerUris} is called. - */ - protected def onStart( - kubernetesClient: KubernetesClient, - serviceName: String, - sparkConf: SparkConf): Unit = {} - - /** - * Customize the driver service that overlays on the driver pod. - * - * Implementations are expected to take the service template and adjust it - * according to the particular needs of how the Service will be accessed by - * URIs provided in {@link getDriverServiceSubmissionServerUris}. - * - * @param driverServiceTemplate Base settings for the driver service. - * @return The same ServiceBuilder object with any required customizations. - */ - def customizeDriverService(driverServiceTemplate: ServiceBuilder): ServiceBuilder - - /** - * Return the set of URIs that can be used to reach the submission server that - * is running on the driver pod. - */ - def getDriverServiceSubmissionServerUris(driverService: Service): Set[String] - - /** - * Called when the Spark application failed to start. Allows the service - * manager to clean up any state it may have created that should not be persisted - * in the case of an unsuccessful launch. Note that stop() is still called - * regardless if this method is called. - */ - def handleSubmissionError(cause: Throwable): Unit = {} - - final def stop(): Unit = onStop() - - /** - * Perform any cleanup of this service manager. - * the super implementation. - */ - protected def onStop(): Unit = {} -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala deleted file mode 100644 index 174e9c57a65ca..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/DriverSubmitSslConfigurationProvider.scala +++ /dev/null @@ -1,354 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import java.io.{File, FileInputStream} -import java.security.{KeyStore, SecureRandom} -import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{EnvVar, EnvVarBuilder, Secret, Volume, VolumeBuilder, VolumeMount, VolumeMountBuilder} -import io.fabric8.kubernetes.client.KubernetesClient -import scala.collection.JavaConverters._ - -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter -import org.apache.spark.util.Utils - -/** - * Raw SSL configuration as the user specified in SparkConf for setting up the driver - * submission server. - */ -private case class DriverSubmitSslConfigurationParameters( - storeBasedSslOptions: SSLOptions, - isKeyStoreLocalFile: Boolean, - driverSubmitServerKeyPem: Option[File], - isDriverSubmitKeyPemLocalFile: Boolean, - driverSubmitServerCertPem: Option[File], - isDriverSubmitServerCertPemLocalFile: Boolean, - submissionClientCertPem: Option[File]) - -/** - * Resolved from translating options provided in - * {@link DriverSubmitSslConfigurationParameters} into Kubernetes volumes, environment variables - * for the driver pod, Kubernetes secrets, client-side trust managers, and the client-side SSL - * context. This is used for setting up the SSL connection for the submission server where the - * application local dependencies and configuration is provided from. - */ -private[spark] case class DriverSubmitSslConfiguration( - enabled: Boolean, - sslPodEnvVars: Array[EnvVar], - sslPodVolume: Option[Volume], - sslPodVolumeMount: Option[VolumeMount], - sslSecret: Option[Secret], - driverSubmitClientTrustManager: Option[X509TrustManager], - driverSubmitClientSslContext: SSLContext) - -/** - * Provides the SSL configuration for bootstrapping the driver pod to listen for the driver - * submission over SSL, and then supply the client-side configuration for establishing the - * SSL connection. This is done in two phases: first, interpreting the raw configuration - * values from the SparkConf object; then second, converting the configuration parameters - * into the appropriate Kubernetes constructs, namely the volume and volume mount to add to the - * driver pod, and the secret to create at the API server; and finally, constructing the - * client-side trust manager and SSL context for sending the local dependencies. - */ -private[spark] class DriverSubmitSslConfigurationProvider( - sparkConf: SparkConf, - kubernetesAppId: String, - kubernetesClient: KubernetesClient, - kubernetesResourceCleaner: KubernetesResourceCleaner) { - private val SECURE_RANDOM = new SecureRandom() - private val sslSecretsName = s"$SUBMISSION_SSL_SECRETS_PREFIX-$kubernetesAppId" - private val sslSecretsDirectory = DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR + - s"/$kubernetesAppId-ssl" - - def getSslConfiguration(): DriverSubmitSslConfiguration = { - val sslConfigurationParameters = parseSslConfigurationParameters() - if (sslConfigurationParameters.storeBasedSslOptions.enabled) { - val storeBasedSslOptions = sslConfigurationParameters.storeBasedSslOptions - val keyStoreSecret = resolveFileToSecretMapping( - sslConfigurationParameters.isKeyStoreLocalFile, - SUBMISSION_SSL_KEYSTORE_SECRET_NAME, - storeBasedSslOptions.keyStore, - "KeyStore") - val keyStorePathEnv = resolveFilePathEnv( - sslConfigurationParameters.isKeyStoreLocalFile, - ENV_SUBMISSION_KEYSTORE_FILE, - SUBMISSION_SSL_KEYSTORE_SECRET_NAME, - storeBasedSslOptions.keyStore) - val storePasswordSecret = storeBasedSslOptions.keyStorePassword.map(password => { - val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) - (SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME, passwordBase64) - }).toMap - val storePasswordLocationEnv = storeBasedSslOptions.keyStorePassword.map(_ => { - new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE) - .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME") - .build() - }) - val storeKeyPasswordSecret = storeBasedSslOptions.keyPassword.map(password => { - val passwordBase64 = BaseEncoding.base64().encode(password.getBytes(Charsets.UTF_8)) - (SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME, passwordBase64) - }).toMap - val storeKeyPasswordEnv = storeBasedSslOptions.keyPassword.map(_ => { - new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE) - .withValue(s"$sslSecretsDirectory/$SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME") - .build() - }) - val storeTypeEnv = storeBasedSslOptions.keyStoreType.map(storeType => { - new EnvVarBuilder() - .withName(ENV_SUBMISSION_KEYSTORE_TYPE) - .withValue(storeType) - .build() - }) - val keyPemSecret = resolveFileToSecretMapping( - sslConfigurationParameters.isDriverSubmitKeyPemLocalFile, - secretName = SUBMISSION_SSL_KEY_PEM_SECRET_NAME, - secretType = "Key pem", - secretFile = sslConfigurationParameters.driverSubmitServerKeyPem) - val keyPemLocationEnv = resolveFilePathEnv( - sslConfigurationParameters.isDriverSubmitKeyPemLocalFile, - envName = ENV_SUBMISSION_KEY_PEM_FILE, - secretName = SUBMISSION_SSL_KEY_PEM_SECRET_NAME, - maybeFile = sslConfigurationParameters.driverSubmitServerKeyPem) - val certPemSecret = resolveFileToSecretMapping( - sslConfigurationParameters.isDriverSubmitServerCertPemLocalFile, - secretName = SUBMISSION_SSL_CERT_PEM_SECRET_NAME, - secretType = "Cert pem", - secretFile = sslConfigurationParameters.driverSubmitServerCertPem) - val certPemLocationEnv = resolveFilePathEnv( - sslConfigurationParameters.isDriverSubmitServerCertPemLocalFile, - envName = ENV_SUBMISSION_CERT_PEM_FILE, - secretName = SUBMISSION_SSL_CERT_PEM_SECRET_NAME, - maybeFile = sslConfigurationParameters.driverSubmitServerCertPem) - val useSslEnv = new EnvVarBuilder() - .withName(ENV_SUBMISSION_USE_SSL) - .withValue("true") - .build() - val sslVolume = new VolumeBuilder() - .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) - .withNewSecret() - .withSecretName(sslSecretsName) - .endSecret() - .build() - val sslVolumeMount = new VolumeMountBuilder() - .withName(SUBMISSION_SSL_SECRETS_VOLUME_NAME) - .withReadOnly(true) - .withMountPath(sslSecretsDirectory) - .build() - val allSecrets = keyStoreSecret ++ - storePasswordSecret ++ - storeKeyPasswordSecret ++ - keyPemSecret ++ - certPemSecret - val sslSecret = kubernetesClient.secrets().createNew() - .withNewMetadata() - .withName(sslSecretsName) - .endMetadata() - .withData(allSecrets.asJava) - .withType("Opaque") - .done() - kubernetesResourceCleaner.registerOrUpdateResource(sslSecret) - val allSslEnvs = keyStorePathEnv ++ - storePasswordLocationEnv ++ - storeKeyPasswordEnv ++ - storeTypeEnv ++ - keyPemLocationEnv ++ - Array(useSslEnv) ++ - certPemLocationEnv - val (driverSubmitClientTrustManager, driverSubmitClientSslContext) = - buildSslConnectionConfiguration(sslConfigurationParameters) - DriverSubmitSslConfiguration( - true, - allSslEnvs.toArray, - Some(sslVolume), - Some(sslVolumeMount), - Some(sslSecret), - driverSubmitClientTrustManager, - driverSubmitClientSslContext) - } else { - DriverSubmitSslConfiguration( - false, - Array[EnvVar](), - None, - None, - None, - None, - SSLContext.getDefault) - } - } - - private def resolveFilePathEnv( - isLocal: Boolean, - envName: String, - secretName: String, - maybeFile: Option[File]): Option[EnvVar] = { - maybeFile.map(file => { - val pemPath = if (isLocal) { - s"$sslSecretsDirectory/$secretName" - } else { - file.getAbsolutePath - } - new EnvVarBuilder() - .withName(envName) - .withValue(pemPath) - .build() - }) - } - - private def resolveFileToSecretMapping( - isLocal: Boolean, - secretName: String, - secretFile: Option[File], - secretType: String): Map[String, String] = { - secretFile.filter(_ => isLocal).map(file => { - if (!file.isFile) { - throw new SparkException(s"$secretType specified at ${file.getAbsolutePath} is not" + - s" a file or does not exist.") - } - val keyStoreBytes = Files.toByteArray(file) - (secretName, BaseEncoding.base64().encode(keyStoreBytes)) - }).toMap - } - - private def parseSslConfigurationParameters(): DriverSubmitSslConfigurationParameters = { - val maybeKeyStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE) - val maybeTrustStore = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE) - val maybeKeyPem = sparkConf.get(DRIVER_SUBMIT_SSL_KEY_PEM) - val maybeDriverSubmitServerCertPem = sparkConf.get(DRIVER_SUBMIT_SSL_SERVER_CERT_PEM) - val maybeDriverSubmitClientCertPem = sparkConf.get(DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM) - validatePemsDoNotConflictWithStores( - maybeKeyStore, - maybeTrustStore, - maybeKeyPem, - maybeDriverSubmitServerCertPem, - maybeDriverSubmitClientCertPem) - val resolvedSparkConf = sparkConf.clone() - val (isLocalKeyStore, resolvedKeyStore) = resolveLocalFile(maybeKeyStore, "keyStore") - resolvedKeyStore.foreach { - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, _) - } - val (isLocalDriverSubmitServerCertPem, resolvedDriverSubmitServerCertPem) = - resolveLocalFile(maybeDriverSubmitServerCertPem, "server cert PEM") - val (isLocalKeyPem, resolvedKeyPem) = resolveLocalFile(maybeKeyPem, "key PEM") - maybeTrustStore.foreach { trustStore => - require(KubernetesFileUtils.isUriLocalFile(trustStore), s"Invalid trustStore URI" + - s" $trustStore; trustStore URI for submit server must have no scheme, or scheme file://") - resolvedSparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, - Utils.resolveURI(trustStore).getPath) - } - val driverSubmitClientCertPem = maybeDriverSubmitClientCertPem.map { driverSubmitClientCert => - require(KubernetesFileUtils.isUriLocalFile(driverSubmitClientCert), - "Invalid client certificate PEM URI $driverSubmitClientCert: client certificate URI must" + - " have no scheme, or scheme file://") - Utils.resolveURI(driverSubmitClientCert).getPath - } - val securityManager = new SparkSecurityManager(resolvedSparkConf) - val storeBasedSslOptions = securityManager.getSSLOptions(DRIVER_SUBMIT_SSL_NAMESPACE) - DriverSubmitSslConfigurationParameters( - storeBasedSslOptions, - isLocalKeyStore, - resolvedKeyPem.map(new File(_)), - isLocalKeyPem, - resolvedDriverSubmitServerCertPem.map(new File(_)), - isLocalDriverSubmitServerCertPem, - driverSubmitClientCertPem.map(new File(_))) - } - - private def resolveLocalFile(file: Option[String], - fileType: String): (Boolean, Option[String]) = { - file.map { f => - require(isValidSslFileScheme(f), s"Invalid $fileType URI $f, $fileType URI" + - s" for submit server must have scheme file:// or local:// (no scheme defaults to file://") - val isLocal = KubernetesFileUtils.isUriLocalFile(f) - (isLocal, Option.apply(Utils.resolveURI(f).getPath)) - }.getOrElse(false, None) - } - - private def validatePemsDoNotConflictWithStores( - maybeKeyStore: Option[String], - maybeTrustStore: Option[String], - maybeKeyPem: Option[String], - maybeDriverSubmitServerCertPem: Option[String], - maybeSubmitClientCertPem: Option[String]) = { - maybeKeyPem.orElse(maybeDriverSubmitServerCertPem).foreach { _ => - require(maybeKeyStore.isEmpty, - "Cannot specify server PEM files and key store files; must specify only one or the other.") - } - maybeKeyPem.foreach { _ => - require(maybeDriverSubmitServerCertPem.isDefined, - "When specifying the key PEM file, the server certificate PEM file must also be provided.") - } - maybeDriverSubmitServerCertPem.foreach { _ => - require(maybeKeyPem.isDefined, - "When specifying the server certificate PEM file, the key PEM file must also be provided.") - } - maybeTrustStore.foreach { _ => - require(maybeSubmitClientCertPem.isEmpty, - "Cannot specify client cert file and truststore file; must specify only one or the other.") - } - } - - private def isValidSslFileScheme(rawUri: String): Boolean = { - val resolvedScheme = Option.apply(Utils.resolveURI(rawUri).getScheme).getOrElse("file") - resolvedScheme == "file" || resolvedScheme == "local" - } - - private def buildSslConnectionConfiguration( - sslConfigurationParameters: DriverSubmitSslConfigurationParameters) - : (Option[X509TrustManager], SSLContext) = { - val maybeTrustStore = sslConfigurationParameters.submissionClientCertPem.map { certPem => - PemsToKeyStoreConverter.convertCertPemToTrustStore( - certPem, - sslConfigurationParameters.storeBasedSslOptions.trustStoreType) - }.orElse(sslConfigurationParameters.storeBasedSslOptions.trustStore.map { trustStoreFile => - if (!trustStoreFile.isFile) { - throw new SparkException(s"TrustStore file at ${trustStoreFile.getAbsolutePath}" + - s" does not exist or is not a file.") - } - val trustStore = KeyStore.getInstance( - sslConfigurationParameters - .storeBasedSslOptions - .trustStoreType - .getOrElse(KeyStore.getDefaultType)) - Utils.tryWithResource(new FileInputStream(trustStoreFile)) { trustStoreStream => - val trustStorePassword = sslConfigurationParameters - .storeBasedSslOptions - .trustStorePassword - .map(_.toCharArray) - .orNull - trustStore.load(trustStoreStream, trustStorePassword) - } - trustStore - }) - maybeTrustStore.map { trustStore => - val trustManagerFactory = TrustManagerFactory.getInstance( - TrustManagerFactory.getDefaultAlgorithm) - trustManagerFactory.init(trustStore) - val trustManagers = trustManagerFactory.getTrustManagers - val sslContext = SSLContext.getInstance("TLSv1.2") - sslContext.init(null, trustManagers, SECURE_RANDOM) - (Option.apply(trustManagers(0).asInstanceOf[X509TrustManager]), sslContext) - }.getOrElse((Option.empty[X509TrustManager], SSLContext.getDefault)) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala deleted file mode 100644 index 4c784aeb5692f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/ExternalSuppliedUrisDriverServiceManager.scala +++ /dev/null @@ -1,105 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import java.util.concurrent.TimeUnit - -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} -import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import scala.collection.JavaConverters._ - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils - -/** - * Creates the service with an annotation that is expected to be detected by another process - * which the user provides and is not built in this project. When the external process detects - * the creation of the service with the appropriate annotation, it is expected to populate the - * value of a second annotation that is the URI of the driver submission server. - */ -private[spark] class ExternalSuppliedUrisDriverServiceManager - extends DriverServiceManager with Logging { - - private val externalUriFuture = SettableFuture.create[String] - private var externalUriSetWatch: Option[Watch] = None - - override def onStart( - kubernetesClient: KubernetesClient, - serviceName: String, - sparkConf: SparkConf): Unit = { - externalUriSetWatch = Some(kubernetesClient - .services() - .withName(serviceName) - .watch(new ExternalUriSetWatcher(externalUriFuture))) - } - - override def getServiceManagerType: String = ExternalSuppliedUrisDriverServiceManager.TYPE - - override def customizeDriverService(driverServiceTemplate: ServiceBuilder): ServiceBuilder = { - require(serviceName != null, "Service name was null; was start() called?") - driverServiceTemplate - .editMetadata() - .addToAnnotations(ANNOTATION_PROVIDE_EXTERNAL_URI, "true") - .endMetadata() - .editSpec() - .withType("ClusterIP") - .endSpec() - } - - override def getDriverServiceSubmissionServerUris(driverService: Service): Set[String] = { - val timeoutSeconds = sparkConf.get(KUBERNETES_DRIVER_SUBMIT_TIMEOUT) - require(externalUriSetWatch.isDefined, "The watch that listens for the provision of" + - " the external URI was not started; was start() called?") - Set(externalUriFuture.get(timeoutSeconds, TimeUnit.SECONDS)) - } - - override def onStop(): Unit = { - Utils.tryLogNonFatalError { - externalUriSetWatch.foreach(_.close()) - externalUriSetWatch = None - } - } -} - -private[spark] object ExternalSuppliedUrisDriverServiceManager { - val TYPE = "ExternalAnnotation" -} - -private[spark] class ExternalUriSetWatcher(externalUriFuture: SettableFuture[String]) - extends Watcher[Service] with Logging { - - override def eventReceived(action: Action, service: Service): Unit = { - if (action == Action.MODIFIED && !externalUriFuture.isDone) { - service - .getMetadata - .getAnnotations - .asScala - .get(ANNOTATION_RESOLVED_EXTERNAL_URI) - .foreach(externalUriFuture.set) - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logDebug("External URI set watcher closed.", cause) - } -} - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala deleted file mode 100644 index 266ec652ed8ae..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/KubernetesResourceCleaner.scala +++ /dev/null @@ -1,53 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import io.fabric8.kubernetes.api.model.HasMetadata -import io.fabric8.kubernetes.client.KubernetesClient -import scala.collection.mutable - -import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils - -private[spark] class KubernetesResourceCleaner extends Logging { - - private val resources = mutable.HashMap.empty[(String, String), HasMetadata] - - // Synchronized because deleteAllRegisteredResourcesFromKubernetes may be called from a - // shutdown hook - def registerOrUpdateResource(resource: HasMetadata): Unit = synchronized { - resources.put((resource.getMetadata.getName, resource.getKind), resource) - } - - def unregisterResource(resource: HasMetadata): Unit = synchronized { - resources.remove((resource.getMetadata.getName, resource.getKind)) - } - - def deleteAllRegisteredResourcesFromKubernetes(kubernetesClient: KubernetesClient): Unit = { - synchronized { - val resourceCount = resources.size - logInfo(s"Deleting ${resourceCount} registered Kubernetes resources...") - resources.values.foreach { resource => - Utils.tryLogNonFatalError { - kubernetesClient.resource(resource).delete() - } - } - resources.clear() - logInfo(s"Deleted ${resourceCount} registered Kubernetes resources.") - } - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala deleted file mode 100644 index 965d71917403e..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/v1/NodePortUrisDriverServiceManager.scala +++ /dev/null @@ -1,70 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit.v1 - -import io.fabric8.kubernetes.api.model.{Service, ServiceBuilder} -import scala.collection.JavaConverters._ - -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.internal.Logging - -/** - * Creates the service with an open NodePort. The URI to reach the submission server is thus - * at the address of any of the nodes through the service's node port. - */ -private[spark] class NodePortUrisDriverServiceManager extends DriverServiceManager with Logging { - - override def getServiceManagerType: String = NodePortUrisDriverServiceManager.TYPE - - override def customizeDriverService(driverServiceTemplate: ServiceBuilder): ServiceBuilder = { - driverServiceTemplate.editSpec().withType("NodePort").endSpec() - } - - override def getDriverServiceSubmissionServerUris(driverService: Service): Set[String] = { - val urlScheme = if (sparkConf.get(DRIVER_SUBMIT_SSL_ENABLED)) { - "https" - } else { - logWarning("Submitting application details, application secret, Kubernetes credentials," + - " and local jars to the cluster over an insecure connection. You should configure SSL" + - " to secure this step.") - "http" - } - val servicePort = driverService.getSpec.getPorts.asScala - .filter(_.getName == SUBMISSION_SERVER_PORT_NAME) - .head.getNodePort - val nodeUrls = kubernetesClient.nodes.list.getItems.asScala - .filterNot(node => node.getSpec.getUnschedulable != null && - node.getSpec.getUnschedulable) - .flatMap(_.getStatus.getAddresses.asScala) - // The list contains hostnames, internal and external IP addresses. - // (https://kubernetes.io/docs/admin/node/#addresses) - // we want only external IP addresses and legacyHostIP addresses in our list - // legacyHostIPs are deprecated and will be removed in the future. - // (https://github.com/kubernetes/kubernetes/issues/9267) - .filter(address => address.getType == "ExternalIP" || address.getType == "LegacyHostIP") - .map(address => { - s"$urlScheme://${address.getAddress}:$servicePort" - }).toSet - require(nodeUrls.nonEmpty, "No nodes found to contact the driver!") - nodeUrls - } -} - -private[spark] object NodePortUrisDriverServiceManager { - val TYPE = "NodePort" -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/FileFetcher.scala similarity index 56% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/FileFetcher.scala index 270e7ea0e77bf..d050e0a41a15a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestApi.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/FileFetcher.scala @@ -14,25 +14,11 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v1 +package org.apache.spark.deploy.rest.kubernetes -import javax.ws.rs.{Consumes, GET, Path, POST, Produces} -import javax.ws.rs.core.MediaType +import java.io.File -import org.apache.spark.deploy.rest.CreateSubmissionResponse - -@Path("/v1/submissions/") -trait KubernetesSparkRestApi { - - @POST - @Consumes(Array(MediaType.APPLICATION_JSON)) - @Produces(Array(MediaType.APPLICATION_JSON)) - @Path("/create") - def submitApplication(request: KubernetesCreateSubmissionRequest): CreateSubmissionResponse - - @GET - @Consumes(Array(MediaType.APPLICATION_JSON)) - @Produces(Array(MediaType.APPLICATION_JSON)) - @Path("/ping") - def ping(): PingResponse +// Extracted for testing so that unit tests don't have to depend on Utils.fetchFile +private[spark] trait FileFetcher { + def fetchFile(uri: String, targetDir: File): Unit } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala similarity index 95% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala index 7f21087159145..9bdc224f10c90 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala @@ -14,8 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ - -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.File import java.util.concurrent.TimeUnit @@ -30,8 +29,8 @@ import scala.concurrent.duration.Duration import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SSLOptions} import org.apache.spark.deploy.SparkHadoopUtil +import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.internal.Logging import org.apache.spark.util.{ThreadUtils, Utils} @@ -63,26 +62,6 @@ private class DownloadTarGzCallback(downloadDir: File) extends WaitableCallback[ } } } - -// Extracted for testing so that unit tests don't have to depend on Utils.fetchFile -private[v2] trait FileFetcher { - def fetchFile(uri: String, targetDir: File): Unit -} - -private class FileFetcherImpl(sparkConf: SparkConf, securityManager: SparkSecurityManager) - extends FileFetcher { - def fetchFile(uri: String, targetDir: File): Unit = { - Utils.fetchFile( - url = uri, - targetDir = targetDir, - conf = sparkConf, - securityMgr = securityManager, - hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf), - timestamp = System.currentTimeMillis(), - useCache = false) - } -} - /** * Process that fetches files from a resource staging server and/or arbitrary remote locations. * @@ -97,6 +76,7 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( fileFetcher: FileFetcher, resourceStagingServerSslOptions: SSLOptions) extends Logging { + private implicit val downloadExecutor = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("download-executor")) private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) @@ -184,8 +164,7 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( val resourceSecret = Files.toString(resourceSecretLocation, Charsets.UTF_8) val downloadResourceCallback = new DownloadTarGzCallback(resourceDownloadDir) logInfo(downloadStartMessage) - service.downloadResources(resourceId, resourceSecret) - .enqueue(downloadResourceCallback) + service.downloadResources(resourceId, resourceSecret).enqueue(downloadResourceCallback) downloadResourceCallback.waitForCompletion(downloadTimeoutMinutes, TimeUnit.MINUTES) logInfo(downloadFinishedMessage) } @@ -211,6 +190,27 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( } } +private class FileFetcherImpl(sparkConf: SparkConf, securityManager: SparkSecurityManager) + extends FileFetcher { + def fetchFile(uri: String, targetDir: File): Unit = { + Utils.fetchFile( + url = uri, + targetDir = targetDir, + conf = sparkConf, + securityMgr = securityManager, + hadoopConf = SparkHadoopUtil.get.newConfiguration(sparkConf), + timestamp = System.currentTimeMillis(), + useCache = false) + } +} + +private case class StagedResources( + resourceSecret: String, + podLabels: Map[String, String], + podNamespace: String, + resourcesFile: File, + kubernetesCredentials: KubernetesCredentials) + object KubernetesSparkDependencyDownloadInitContainer extends Logging { def main(args: Array[String]): Unit = { logInfo("Starting init-container to download Spark application dependencies.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala index 178956a136d1c..17f90118e150d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/PemsToKeyStoreConverter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/PemsToKeyStoreConverter.scala @@ -14,10 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v1 +package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileInputStream, FileOutputStream, InputStreamReader} -import java.nio.file.Paths import java.security.{KeyStore, PrivateKey} import java.security.cert.Certificate import java.util.UUID diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala index 4ecb6369ff3b0..34594ba518b62 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.File diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProvider.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProvider.scala index 0dd0b08433def..cb1e65421c013 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProvider.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.File import java.security.SecureRandom @@ -26,7 +26,6 @@ import org.apache.commons.lang3.RandomStringUtils import org.apache.spark.{SecurityManager, SparkConf, SparkException, SSLOptions} import org.apache.spark.deploy.kubernetes.OptionRequirements import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.kubernetes.v1.PemsToKeyStoreConverter import org.apache.spark.internal.Logging private[spark] trait ResourceStagingServerSslOptionsProvider { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala similarity index 97% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala index 5dbe55b72bd8b..525711e78c01c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.InputStream import javax.ws.rs.{Consumes, GET, HeaderParam, Path, PathParam, POST, Produces} @@ -23,7 +23,7 @@ import javax.ws.rs.core.{MediaType, StreamingOutput} import org.glassfish.jersey.media.multipart.FormDataParam import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret +import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret /** * Service that receives application data that can be retrieved later on. This is primarily used diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala similarity index 91% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala index 34c3192ae6780..abe956da9914d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileOutputStream, InputStream, OutputStream} import java.security.SecureRandom @@ -27,7 +27,7 @@ import scala.collection.concurrent.TrieMap import org.apache.spark.SparkException import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret +import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret import org.apache.spark.internal.Logging import org.apache.spark.util.Utils @@ -92,10 +92,3 @@ private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) override def ping(): String = "pong" } - -private case class StagedResources( - resourceSecret: String, - podLabels: Map[String, String], - podNamespace: String, - resourcesFile: File, - kubernetesCredentials: KubernetesCredentials) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala similarity index 93% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala index e0079a372f0d9..3c2fe8ebbc3c8 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import okhttp3.{RequestBody, ResponseBody} import retrofit2.Call import retrofit2.http.{Multipart, Path, Streaming} -import org.apache.spark.deploy.kubernetes.submit.v2.SubmittedResourceIdAndSecret +import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret /** * Retrofit-compatible variant of {@link ResourceStagingService}. For documentation on diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala similarity index 98% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala index f906423524944..a374982444f79 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/RetrofitClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.FileInputStream import java.security.{KeyStore, SecureRandom} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/SparkConfPropertiesParser.scala similarity index 94% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/SparkConfPropertiesParser.scala index cf9decab127c5..9e2b8a780df29 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v2/SparkConfPropertiesParser.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/SparkConfPropertiesParser.scala @@ -14,13 +14,13 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileInputStream} import java.util.Properties import com.google.common.collect.Maps -import scala.collection.JavaConverters.mapAsScalaMapConverter +import scala.collection.JavaConverters._ import org.apache.spark.SparkConf import org.apache.spark.internal.config.{ConfigReader, SparkConfigProvider} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala deleted file mode 100644 index ea1abed72c07f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/HttpClientUtil.scala +++ /dev/null @@ -1,131 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes.v1 - -import java.io.IOException -import java.net.{InetSocketAddress, ProxySelector, SocketAddress, URI} -import java.util.Collections -import javax.net.ssl.{SSLContext, SSLSocketFactory, X509TrustManager} - -import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} -import com.fasterxml.jackson.module.scala.DefaultScalaModule -import feign.{Client, Feign, Request, Response} -import feign.Request.Options -import feign.jackson.{JacksonDecoder, JacksonEncoder} -import feign.jaxrs.JAXRSContract -import io.fabric8.kubernetes.client.Config -import okhttp3.OkHttpClient -import scala.reflect.ClassTag - -import org.apache.spark.SparkException -import org.apache.spark.internal.Logging -import org.apache.spark.status.api.v1.JacksonMessageWriter - -private[spark] object HttpClientUtil extends Logging { - - def createClient[T: ClassTag]( - uris: Set[String], - maxRetriesPerServer: Int = 1, - sslSocketFactory: SSLSocketFactory = SSLContext.getDefault.getSocketFactory, - trustContext: X509TrustManager = null, - readTimeoutMillis: Int = 20000, - connectTimeoutMillis: Int = 20000): T = { - var httpClientBuilder = new OkHttpClient.Builder() - Option.apply(trustContext).foreach(context => { - httpClientBuilder = httpClientBuilder.sslSocketFactory(sslSocketFactory, context) - }) - val uriObjects = uris.map(URI.create) - val httpUris = uriObjects.filter(uri => uri.getScheme == "http") - val httpsUris = uriObjects.filter(uri => uri.getScheme == "https") - val maybeAllProxy = Option.apply(System.getProperty(Config.KUBERNETES_ALL_PROXY)) - val maybeHttpProxy = Option.apply(System.getProperty(Config.KUBERNETES_HTTP_PROXY)) - .orElse(maybeAllProxy) - .map(uriStringToProxy) - val maybeHttpsProxy = Option.apply(System.getProperty(Config.KUBERNETES_HTTPS_PROXY)) - .orElse(maybeAllProxy) - .map(uriStringToProxy) - val maybeNoProxy = Option.apply(System.getProperty(Config.KUBERNETES_NO_PROXY)) - .map(_.split(",")) - .toSeq - .flatten - val proxySelector = new ProxySelector { - override def select(uri: URI): java.util.List[java.net.Proxy] = { - val directProxy = java.net.Proxy.NO_PROXY - val resolvedProxy = maybeNoProxy.find( _ == uri.getHost) - .map( _ => directProxy) - .orElse(uri.getScheme match { - case "http" => - logDebug(s"Looking up http proxies to route $uri") - maybeHttpProxy.filter { _ => - matchingUriExists(uri, httpUris) - } - case "https" => - logDebug(s"Looking up https proxies to route $uri") - maybeHttpsProxy.filter { _ => - matchingUriExists(uri, httpsUris) - } - case _ => None - }).getOrElse(directProxy) - logDebug(s"Routing $uri through ${resolvedProxy.address()} with proxy" + - s" type ${resolvedProxy.`type`()}") - Collections.singletonList(resolvedProxy) - } - - override def connectFailed(uri: URI, sa: SocketAddress, ioe: IOException) = { - throw new SparkException(s"Failed to connect to proxy through uri $uri," + - s" socket address: $sa", ioe) - } - } - httpClientBuilder = httpClientBuilder.proxySelector(proxySelector) - val objectMapper = new ObjectMapper() - .registerModule(new DefaultScalaModule) - .setDateFormat(JacksonMessageWriter.makeISODateFormat) - objectMapper.configure(DeserializationFeature.FAIL_ON_UNKNOWN_PROPERTIES, false) - val target = new MultiServerFeignTarget[T](uris.toSeq, maxRetriesPerServer) - val baseHttpClient = new feign.okhttp.OkHttpClient(httpClientBuilder.build()) - val resetTargetHttpClient = new Client { - override def execute(request: Request, options: Options): Response = { - val response = baseHttpClient.execute(request, options) - if (response.status() / 100 == 2) { - target.reset() - } - response - } - } - Feign.builder() - .client(resetTargetHttpClient) - .contract(new JAXRSContract) - .encoder(new JacksonEncoder(objectMapper)) - .decoder(new JacksonDecoder(objectMapper)) - .options(new Options(connectTimeoutMillis, readTimeoutMillis)) - .retryer(target) - .target(target) - } - - private def matchingUriExists(uri: URI, httpUris: Set[URI]): Boolean = { - httpUris.exists(httpUri => { - httpUri.getScheme == uri.getScheme && httpUri.getHost == uri.getHost && - httpUri.getPort == uri.getPort - }) - } - - private def uriStringToProxy(uriString: String): java.net.Proxy = { - val uriObject = URI.create(uriString) - new java.net.Proxy(java.net.Proxy.Type.HTTP, - new InetSocketAddress(uriObject.getHost, uriObject.getPort)) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala deleted file mode 100644 index bdd4a85da8f85..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesRestProtocolMessages.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes.v1 - -import com.fasterxml.jackson.annotation.{JsonIgnore, JsonSubTypes, JsonTypeInfo} - -import org.apache.spark.SPARK_VERSION -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.rest.{SubmitRestProtocolRequest, SubmitRestProtocolResponse} -import org.apache.spark.util.Utils - -case class KubernetesCreateSubmissionRequest( - appResource: AppResource, - mainClass: String, - appArgs: Array[String], - sparkProperties: Map[String, String], - secret: String, - driverPodKubernetesCredentials: KubernetesCredentials, - uploadedJarsBase64Contents: TarGzippedData, - uploadedFilesBase64Contents: TarGzippedData) extends SubmitRestProtocolRequest { - @JsonIgnore - override val messageType: String = s"kubernetes.v1.${Utils.getFormattedClassName(this)}" - override val action = messageType - message = "create" - clientSparkVersion = SPARK_VERSION -} - -case class TarGzippedData( - dataBase64: String, - blockSize: Int = 10240, - recordSize: Int = 512, - encoding: String -) - -@JsonTypeInfo( - use = JsonTypeInfo.Id.NAME, - include = JsonTypeInfo.As.PROPERTY, - property = "type") -@JsonSubTypes(value = Array( - new JsonSubTypes.Type(value = classOf[UploadedAppResource], name = "UploadedAppResource"), - new JsonSubTypes.Type(value = classOf[ContainerAppResource], name = "ContainerLocalAppResource"), - new JsonSubTypes.Type(value = classOf[RemoteAppResource], name = "RemoteAppResource"))) -abstract class AppResource - -case class UploadedAppResource( - resourceBase64Contents: String, - name: String = "spark-app-resource") extends AppResource - -case class ContainerAppResource(resourcePath: String) extends AppResource - -case class RemoteAppResource(resource: String) extends AppResource - -class PingResponse extends SubmitRestProtocolResponse { - val text = "pong" - message = "pong" - serverSparkVersion = SPARK_VERSION - @JsonIgnore - override val messageType: String = s"kubernetes.v1.${Utils.getFormattedClassName(this)}" - override val action: String = messageType -} - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala deleted file mode 100644 index 5cd24a8f9b75e..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/KubernetesSparkRestServer.scala +++ /dev/null @@ -1,483 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes.v1 - -import java.io.{File, FileOutputStream, StringReader} -import java.net.URI -import java.nio.file.Paths -import java.security.SecureRandom -import java.util.concurrent.CountDownLatch -import java.util.concurrent.atomic.AtomicInteger -import javax.servlet.http.{HttpServletRequest, HttpServletResponse} - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, ByteStreams, Files} -import org.apache.commons.codec.binary.Base64 -import org.apache.commons.lang3.RandomStringUtils -import scala.collection.mutable -import scala.collection.mutable.ArrayBuffer - -import org.apache.spark.{SecurityManager, SPARK_VERSION => sparkVersion, SparkConf, SparkException, SSLOptions} -import org.apache.spark.deploy.SparkHadoopUtil -import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -import org.apache.spark.deploy.rest._ -import org.apache.spark.internal.config.OptionalConfigEntry -import org.apache.spark.util.{ShutdownHookManager, ThreadUtils, Utils} - -private case class KubernetesSparkRestServerArguments( - host: Option[String] = None, - port: Option[Int] = None, - useSsl: Boolean = false, - secretFile: Option[String] = None, - keyStoreFile: Option[String] = None, - keyStorePasswordFile: Option[String] = None, - keyStoreType: Option[String] = None, - keyPasswordFile: Option[String] = None, - keyPemFile: Option[String] = None, - certPemFile: Option[String] = None) { - def validate(): KubernetesSparkRestServerArguments = { - require(host.isDefined, "Hostname not set via --hostname.") - require(port.isDefined, "Port not set via --port") - require(secretFile.isDefined, "Secret file not set via --secret-file") - this - } -} - -private object KubernetesSparkRestServerArguments { - def fromArgsArray(inputArgs: Array[String]): KubernetesSparkRestServerArguments = { - var args = inputArgs.toList - var resolvedArguments = KubernetesSparkRestServerArguments() - while (args.nonEmpty) { - resolvedArguments = args match { - case "--hostname" :: value :: tail => - args = tail - resolvedArguments.copy(host = Some(value)) - case "--port" :: value :: tail => - args = tail - resolvedArguments.copy(port = Some(value.toInt)) - case "--secret-file" :: value :: tail => - args = tail - resolvedArguments.copy(secretFile = Some(value)) - case "--use-ssl" :: value :: tail => - args = tail - resolvedArguments.copy(useSsl = value.toBoolean) - case "--keystore-file" :: value :: tail => - args = tail - resolvedArguments.copy(keyStoreFile = Some(value)) - case "--keystore-password-file" :: value :: tail => - args = tail - resolvedArguments.copy(keyStorePasswordFile = Some(value)) - case "--keystore-type" :: value :: tail => - args = tail - resolvedArguments.copy(keyStoreType = Some(value)) - case "--keystore-key-password-file" :: value :: tail => - args = tail - resolvedArguments.copy(keyPasswordFile = Some(value)) - case "--key-pem-file" :: value :: tail => - args = tail - resolvedArguments.copy(keyPemFile = Some(value)) - case "--cert-pem-file" :: value :: tail => - args = tail - resolvedArguments.copy(certPemFile = Some(value)) - // TODO polish usage message - case Nil => resolvedArguments - case unknown => throw new IllegalStateException(s"Unknown argument(s) found: $unknown") - } - } - resolvedArguments.validate() - } -} - -/** - * Runs in the driver pod and receives a request to run an application. Note that - * unlike the submission rest server in standalone mode, this server is expected - * to be used to run one application only, and then shut down once that application - * is complete. - */ -private[spark] class KubernetesSparkRestServer( - host: String, - port: Int, - conf: SparkConf, - expectedApplicationSecret: Array[Byte], - shutdownLock: CountDownLatch, - exitCode: AtomicInteger, - sslOptions: SSLOptions = new SSLOptions) - extends RestSubmissionServer(host, port, conf, sslOptions) { - - private val SERVLET_LOCK = new Object - private val javaExecutable = s"${System.getenv("JAVA_HOME")}/bin/java" - private val sparkHome = System.getenv("SPARK_HOME") - private val securityManager = new SecurityManager(conf) - override protected lazy val contextToServlet = Map[String, RestServlet]( - s"$baseContext/create/*" -> submitRequestServlet, - s"$baseContext/ping/*" -> pingServlet) - - private val pingServlet = new PingServlet - override protected val submitRequestServlet: SubmitRequestServlet - = new KubernetesSubmitRequestServlet - // TODO - override protected val statusRequestServlet: StatusRequestServlet = null - override protected val killRequestServlet: KillRequestServlet = null - - private class PingServlet extends RestServlet { - protected override def doGet( - request: HttpServletRequest, - response: HttpServletResponse): Unit = { - sendResponse(new PingResponse, response) - } - } - - private class KubernetesSubmitRequestServlet extends SubmitRequestServlet { - - private val waitForProcessCompleteExecutor = ThreadUtils - .newDaemonSingleThreadExecutor("wait-for-spark-app-complete") - private var startedApplication = false - - // TODO validating the secret should be done as part of a header of the request. - // Instead here we have to specify the secret in the body. - override protected def handleSubmit( - requestMessageJson: String, - requestMessage: SubmitRestProtocolMessage, - responseServlet: HttpServletResponse): SubmitRestProtocolResponse = { - SERVLET_LOCK.synchronized { - if (startedApplication) { - throw new IllegalStateException("Application has already been submitted.") - } else { - requestMessage match { - case KubernetesCreateSubmissionRequest( - appResource, - mainClass, - appArgs, - sparkProperties, - secret, - driverPodKubernetesCredentials, - uploadedJars, - uploadedFiles) => - val decodedSecret = Base64.decodeBase64(secret) - if (!expectedApplicationSecret.sameElements(decodedSecret)) { - responseServlet.setStatus(HttpServletResponse.SC_UNAUTHORIZED) - handleError("Unauthorized to submit application.") - } else { - val tempDir = Utils.createTempDir() - val resolvedAppResource = resolveAppResource(appResource, tempDir) - val writtenJars = writeUploadedJars(uploadedJars, tempDir) - val writtenFiles = writeUploadedFiles(uploadedFiles) - val resolvedSparkProperties = new mutable.HashMap[String, String] - resolvedSparkProperties ++= sparkProperties - val originalJars = sparkProperties.get("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty) - - // The driver at this point has handed us the value of spark.jars verbatim as - // specified in spark-submit. At this point, remove all jars that were local - // to the submitting user's disk, and replace them with the paths that were - // written to disk above. - val onlyContainerLocalOrRemoteJars = KubernetesFileUtils - .getNonSubmitterLocalFiles(originalJars) - val resolvedJars = (writtenJars ++ - onlyContainerLocalOrRemoteJars ++ - Array(resolvedAppResource.sparkJarPath)).toSet - if (resolvedJars.nonEmpty) { - resolvedSparkProperties("spark.jars") = resolvedJars.mkString(",") - } else { - resolvedSparkProperties.remove("spark.jars") - } - - // Determining the driver classpath is similar. It's the combination of: - // - Jars written from uploads - // - Jars in (spark.jars + mainAppResource) that has a "local" prefix - // - spark.driver.extraClasspath - // - Spark core jars from the installation - val sparkCoreJars = new File(sparkHome, "jars").listFiles().map(_.getAbsolutePath) - val driverExtraClasspath = sparkProperties - .get("spark.driver.extraClassPath") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val onlyContainerLocalJars = KubernetesFileUtils - .getOnlyContainerLocalFiles(originalJars) - val driverClasspath = driverExtraClasspath ++ - Seq(resolvedAppResource.localPath) ++ - writtenJars ++ - onlyContainerLocalJars ++ - sparkCoreJars - - // Resolve spark.files similarly to spark.jars. - val originalFiles = sparkProperties.get("spark.files") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val onlyContainerLocalOrRemoteFiles = KubernetesFileUtils - .getNonSubmitterLocalFiles(originalFiles) - val resolvedFiles = writtenFiles ++ onlyContainerLocalOrRemoteFiles - if (resolvedFiles.nonEmpty) { - resolvedSparkProperties("spark.files") = resolvedFiles.mkString(",") - } else { - resolvedSparkProperties.remove("spark.files") - } - resolvedSparkProperties ++= writeKubernetesCredentials( - driverPodKubernetesCredentials, tempDir) - - val command = new ArrayBuffer[String] - command += javaExecutable - command += "-cp" - command += s"${driverClasspath.mkString(":")}" - for (prop <- resolvedSparkProperties) { - command += s"-D${prop._1}=${prop._2}" - } - val driverMemory = resolvedSparkProperties.getOrElse("spark.driver.memory", "1g") - command += s"-Xms$driverMemory" - command += s"-Xmx$driverMemory" - val extraJavaOpts = resolvedSparkProperties.get("spark.driver.extraJavaOptions") - .map(Utils.splitCommandString) - .getOrElse(Seq.empty) - command ++= extraJavaOpts - command += mainClass - command ++= appArgs - val pb = new ProcessBuilder(command: _*).inheritIO() - val process = pb.start() - ShutdownHookManager.addShutdownHook(() => { - logInfo("Received stop command, shutting down the running Spark application...") - process.destroy() - shutdownLock.countDown() - }) - waitForProcessCompleteExecutor.submit(new Runnable { - override def run(): Unit = { - // set the REST service's exit code to the exit code of the driver subprocess - exitCode.set(process.waitFor) - SERVLET_LOCK.synchronized { - logInfo("Spark application complete. Shutting down submission server...") - KubernetesSparkRestServer.this.stop - shutdownLock.countDown() - } - } - }) - startedApplication = true - val response = new CreateSubmissionResponse - response.success = true - response.submissionId = null - response.message = "success" - response.serverSparkVersion = sparkVersion - response - } - case unexpected => - responseServlet.setStatus(HttpServletResponse.SC_BAD_REQUEST) - handleError(s"Received message of unexpected type ${unexpected.messageType}.") - } - } - } - } - - private def writeUploadedJars(jars: TarGzippedData, rootTempDir: File): - Seq[String] = { - val resolvedDirectory = new File(rootTempDir, "jars") - if (!resolvedDirectory.mkdir()) { - throw new IllegalStateException(s"Failed to create jars dir at " + - resolvedDirectory.getAbsolutePath) - } - CompressionUtils.unpackAndWriteCompressedFiles(jars, resolvedDirectory) - } - - private def writeUploadedFiles(files: TarGzippedData): Seq[String] = { - val workingDir = Paths.get("").toFile.getAbsoluteFile - CompressionUtils.unpackAndWriteCompressedFiles(files, workingDir) - } - - private def writeKubernetesCredentials( - kubernetesCredentials: KubernetesCredentials, - rootTempDir: File): Map[String, String] = { - val resolvedDirectory = new File(rootTempDir, "kubernetes-credentials") - if (!resolvedDirectory.mkdir()) { - throw new IllegalStateException(s"Failed to create credentials dir at " - + resolvedDirectory.getAbsolutePath) - } - val oauthTokenFile = writeRawStringCredentialAndGetConf("oauth-token.txt", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, - kubernetesCredentials.oauthTokenBase64.map { base64 => - new String(BaseEncoding.base64().decode(base64), Charsets.UTF_8) - }) - val caCertFile = writeBase64CredentialAndGetConf("ca.crt", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE, kubernetesCredentials.caCertDataBase64) - val clientKeyFile = writeBase64CredentialAndGetConf("key.key", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE, kubernetesCredentials.clientKeyDataBase64) - val clientCertFile = writeBase64CredentialAndGetConf("cert.crt", resolvedDirectory, - KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE, kubernetesCredentials.clientCertDataBase64) - (oauthTokenFile ++ caCertFile ++ clientKeyFile ++ clientCertFile).toMap - } - - private def writeRawStringCredentialAndGetConf( - fileName: String, - dir: File, - conf: OptionalConfigEntry[String], - credential: Option[String]): Option[(String, String)] = { - credential.map { cred => - val credentialFile = new File(dir, fileName) - Files.write(cred, credentialFile, Charsets.UTF_8) - (conf.key, credentialFile.getAbsolutePath) - } - } - - private def writeBase64CredentialAndGetConf( - fileName: String, - dir: File, - conf: OptionalConfigEntry[String], - credential: Option[String]): Option[(String, String)] = { - credential.map { cred => - val credentialFile = new File(dir, fileName) - Files.write(BaseEncoding.base64().decode(cred), credentialFile) - (conf.key, credentialFile.getAbsolutePath) - } - } - - /** - * Retrieve the path on the driver container where the main app resource is, and what value it - * ought to have in the spark.jars property. The two may be different because for non-local - * dependencies, we have to fetch the resource (if it is not "local") but still want to use - * the full URI in spark.jars. - */ - private def resolveAppResource(appResource: AppResource, tempDir: File): - ResolvedAppResource = { - appResource match { - case UploadedAppResource(resourceContentsBase64, resourceName) => - val resourceFile = new File(tempDir, resourceName) - val resourceFilePath = resourceFile.getAbsolutePath - if (resourceFile.createNewFile()) { - Utils.tryWithResource(new StringReader(resourceContentsBase64)) { reader => - Utils.tryWithResource(new FileOutputStream(resourceFile)) { os => - Utils.tryWithResource(BaseEncoding.base64().decodingStream(reader)) { - decodingStream => - ByteStreams.copy(decodingStream, os) - } - } - } - ResolvedAppResource(resourceFile.getAbsolutePath, resourceFile.getAbsolutePath) - } else { - throw new IllegalStateException(s"Failed to write main app resource file" + - s" to $resourceFilePath") - } - case ContainerAppResource(resource) => - ResolvedAppResource(Utils.resolveURI(resource).getPath, resource) - case RemoteAppResource(resource) => - Utils.fetchFile(resource, tempDir, conf, - securityManager, SparkHadoopUtil.get.newConfiguration(conf), - System.currentTimeMillis(), useCache = false) - val fileName = Utils.decodeFileNameInURI(URI.create(resource)) - val downloadedFile = new File(tempDir, fileName) - val downloadedFilePath = downloadedFile.getAbsolutePath - if (!downloadedFile.isFile) { - throw new IllegalStateException(s"Main app resource is not a file or" + - s" does not exist at $downloadedFilePath") - } - ResolvedAppResource(downloadedFilePath, resource) - } - } - } - - private case class ResolvedAppResource(localPath: String, sparkJarPath: String) -} - -private[spark] object KubernetesSparkRestServer { - private val barrier = new CountDownLatch(1) - private val SECURE_RANDOM = new SecureRandom() - - def main(args: Array[String]): Unit = { - val parsedArguments = KubernetesSparkRestServerArguments.fromArgsArray(args) - val secretFile = new File(parsedArguments.secretFile.get) - require(secretFile.isFile, "Secret file specified by --secret-file is not a file, or" + - " does not exist.") - val sslOptions = if (parsedArguments.useSsl) { - validateSslOptions(parsedArguments) - val keyPassword = parsedArguments - .keyPasswordFile - .map(new File(_)) - .map(Files.toString(_, Charsets.UTF_8)) - // If key password isn't set but we're using PEM files, generate a password - .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) - val keyStorePassword = parsedArguments - .keyStorePasswordFile - .map(new File(_)) - .map(Files.toString(_, Charsets.UTF_8)) - // If keystore password isn't set but we're using PEM files, generate a password - .orElse(parsedArguments.keyPemFile.map(_ => randomPassword())) - val resolvedKeyStore = parsedArguments.keyStoreFile.map(new File(_)).orElse( - for { - keyPemFile <- parsedArguments.keyPemFile - certPemFile <- parsedArguments.certPemFile - resolvedKeyStorePassword <- keyStorePassword - resolvedKeyPassword <- keyPassword - } yield { - PemsToKeyStoreConverter.convertPemsToTempKeyStoreFile( - new File(keyPemFile), - new File(certPemFile), - "provided-key", - resolvedKeyStorePassword, - resolvedKeyPassword, - parsedArguments.keyStoreType) - }) - new SSLOptions( - enabled = true, - keyStore = resolvedKeyStore, - keyStoreType = parsedArguments.keyStoreType, - keyStorePassword = keyStorePassword, - keyPassword = keyPassword) - } else { - new SSLOptions - } - val secretBytes = Files.toByteArray(secretFile) - val sparkConf = new SparkConf(true) - val exitCode = new AtomicInteger(0) - val server = new KubernetesSparkRestServer( - parsedArguments.host.get, - parsedArguments.port.get, - sparkConf, - secretBytes, - barrier, - exitCode, - sslOptions) - server.start() - ShutdownHookManager.addShutdownHook(() => { - try { - server.stop() - } finally { - barrier.countDown() - } - }) - barrier.await() - System.exit(exitCode.get()) - } - - private def validateSslOptions(parsedArguments: KubernetesSparkRestServerArguments): Unit = { - parsedArguments.keyStoreFile.foreach { _ => - require(parsedArguments.keyPemFile.orElse(parsedArguments.certPemFile).isEmpty, - "Cannot provide both key/cert PEM files and a keyStore file; select one or the other" + - " for configuring SSL.") - } - parsedArguments.keyPemFile.foreach { _ => - require(parsedArguments.certPemFile.isDefined, - "When providing the key PEM file, the certificate PEM file must also be provided.") - } - parsedArguments.certPemFile.foreach { _ => - require(parsedArguments.keyPemFile.isDefined, - "When providing the certificate PEM file, the key PEM file must also be provided.") - } - } - - private def randomPassword(): String = { - RandomStringUtils.random(1024, 0, Integer.MAX_VALUE, false, false, null, SECURE_RANDOM) - } -} - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala deleted file mode 100644 index 56ff82ea2fc33..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/v1/MultiServerFeignTarget.scala +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes.v1 - -import feign.{Request, RequestTemplate, RetryableException, Retryer, Target} -import scala.reflect.ClassTag -import scala.util.Random - -import org.apache.spark.internal.Logging - -private[kubernetes] class MultiServerFeignTarget[T : ClassTag]( - private val servers: Seq[String], - private val maxRetriesPerServer: Int = 1, - private val delayBetweenRetriesMillis: Int = 1000) extends Target[T] with Retryer with Logging { - require(servers.nonEmpty, "Must provide at least one server URI.") - - private val threadLocalShuffledServers = new ThreadLocal[Seq[String]] { - override def initialValue(): Seq[String] = Random.shuffle(servers) - } - private val threadLocalCurrentAttempt = new ThreadLocal[Int] { - override def initialValue(): Int = 0 - } - - override def `type`(): Class[T] = { - implicitly[ClassTag[T]].runtimeClass.asInstanceOf[Class[T]] - } - - /** - * Cloning the target is done on every request, for use on the current - * thread - thus it's important that clone returns a "fresh" target. - */ - override def clone(): Retryer = { - reset() - this - } - - override def name(): String = { - s"${getClass.getSimpleName} with servers [${servers.mkString(",")}]" - } - - override def apply(requestTemplate: RequestTemplate): Request = { - if (!requestTemplate.url().startsWith("http")) { - requestTemplate.insert(0, url()) - } - requestTemplate.request() - } - - override def url(): String = threadLocalShuffledServers.get.head - - override def continueOrPropagate(e: RetryableException): Unit = { - threadLocalCurrentAttempt.set(threadLocalCurrentAttempt.get + 1) - val currentAttempt = threadLocalCurrentAttempt.get - if (threadLocalCurrentAttempt.get < maxRetriesPerServer) { - logWarning(s"Attempt $currentAttempt of $maxRetriesPerServer failed for" + - s" server ${url()}. Retrying request...", e) - Thread.sleep(delayBetweenRetriesMillis) - } else { - val previousUrl = url() - threadLocalShuffledServers.set(threadLocalShuffledServers.get.drop(1)) - if (threadLocalShuffledServers.get.isEmpty) { - logError(s"Failed request to all servers $maxRetriesPerServer times.", e) - throw e - } else { - logWarning(s"Failed request to $previousUrl $maxRetriesPerServer times." + - s" Trying to access ${url()} instead.", e) - threadLocalCurrentAttempt.set(0) - } - } - } - - def reset(): Unit = { - threadLocalShuffledServers.set(Random.shuffle(servers)) - threadLocalCurrentAttempt.set(0) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala index 886484ffb4692..8de0f56f007dc 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SSLUtils.scala @@ -30,7 +30,7 @@ import org.bouncycastle.cert.jcajce.{JcaX509CertificateConverter, JcaX509v3Certi import org.bouncycastle.openssl.jcajce.JcaPEMWriter import org.bouncycastle.operator.jcajce.JcaContentSignerBuilder -import org.apache.spark.deploy.kubernetes.submit.v2.{KeyAndCertPem, KeyStoreAndTrustStore} +import org.apache.spark.deploy.kubernetes.submit.{KeyAndCertPem, KeyStoreAndTrustStore} import org.apache.spark.util.Utils private[spark] object SSLUtils { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala similarity index 99% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 9ad46e52747fd..d4d3882bb8bab 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File @@ -35,7 +35,6 @@ import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.kubernetes.SparkPodInitContainerBootstrap import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.LoggingPodStatusWatcher class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala similarity index 98% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala index 6804f0010b6a5..ca5cd1fff9b74 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ContainerLocalizedFilesResolverSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.SparkFunSuite diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala similarity index 99% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala index d4413076fb092..c1005a176408c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/DriverPodKubernetesCredentialsMounterSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala @@ -14,16 +14,16 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.api.model.{PodBuilder, SecretBuilder} import org.scalatest.prop.TableDrivenPropertyChecks import scala.collection.JavaConverters._ import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.KubernetesCredentials class DriverPodKubernetesCredentialsMounterSuite extends SparkFunSuite with TableDrivenPropertyChecks { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala similarity index 97% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala index 62bfd127d17e2..ead1d49b8a37c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/ExecutorInitContainerConfigurationSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import org.apache.spark.{SparkConf, SparkFunSuite} import org.apache.spark.deploy.kubernetes.config._ diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SSLFilePairs.scala similarity index 94% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SSLFilePairs.scala index 3d3ff7ad7011a..5240128743b76 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SSLFilePairs.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SSLFilePairs.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala similarity index 98% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala index 7c6fbf5ce6da2..f1e1ff7013496 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SparkInitContainerConfigMapBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.StringReader import java.util.Properties diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala similarity index 96% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala index 09b41dc1bcaaf..8431b77c9e85f 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyInitContainerConfigPluginSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala @@ -14,11 +14,9 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit -import java.io.File - -import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.SparkFunSuite import org.apache.spark.deploy.kubernetes.config._ class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala similarity index 97% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala index 358edbecf8708..83fd568e7a3aa 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencySecretBuilderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.File @@ -24,7 +24,7 @@ import io.fabric8.kubernetes.api.model.Secret import scala.collection.JavaConverters._ import scala.collection.Map -import org.apache.spark.{SparkFunSuite, SSLOptions} +import org.apache.spark.SparkFunSuite import org.apache.spark.util.Utils class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala similarity index 97% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala index 7b259aa2c3a0c..8693ff4e15372 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/v2/SubmittedDependencyUploaderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit.v2 +package org.apache.spark.deploy.kubernetes.submit import java.io.{ByteArrayInputStream, ByteArrayOutputStream, File} import java.util.UUID @@ -35,7 +35,7 @@ import retrofit2.{Call, Response} import org.apache.spark.{SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.CompressionUtils -import org.apache.spark.deploy.rest.kubernetes.v2.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory} import org.apache.spark.util.Utils private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with BeforeAndAfter { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainerSuite.scala similarity index 98% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainerSuite.scala index c551fbc01d060..f2fdf026390cd 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/KubernetesSparkDependencyDownloadInitContainerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainerSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{ByteArrayOutputStream, File} import java.util.UUID @@ -32,7 +32,7 @@ import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar._ import retrofit2.{Call, Callback, Response} -import org.apache.spark.{SecurityManager => SparkSecurityManager, SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.CompressionUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.util.Utils diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProviderSuite.scala similarity index 99% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProviderSuite.scala index c33d8beb2c397..3bb318d713a54 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSslOptionsProviderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSslOptionsProviderSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{File, FileInputStream, StringWriter} import java.security.KeyStore diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala similarity index 99% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala index 4ffb0d4dfa887..0604e0d6494ae 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.net.ServerSocket import javax.ws.rs.core.MediaType diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala similarity index 98% rename from resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala rename to resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala index 9677d12681a16..53396a3f27a1a 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/v2/ResourceStagingServiceImplSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.rest.kubernetes.v2 +package org.apache.spark.deploy.rest.kubernetes import java.io.{ByteArrayInputStream, File} import java.nio.file.Paths diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile deleted file mode 100644 index 40f9459dc06dc..0000000000000 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-v2/Dockerfile +++ /dev/null @@ -1,43 +0,0 @@ -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# - -FROM openjdk:8-alpine - -# If this docker file is being used in the context of building your images from a Spark distribution, the docker build -# command should be invoked from the top level directory of the Spark distribution. E.g.: -# docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . - -RUN apk upgrade --update -RUN apk add --update bash -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark - -CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ - if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ - if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ - if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ - exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 8ab7a58704505..40f9459dc06dc 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -36,16 +36,8 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -CMD SSL_ARGS="" && \ - if ! [ -z ${SPARK_SUBMISSION_USE_SSL+x} ]; then SSL_ARGS="$SSL_ARGS --use-ssl $SPARK_SUBMISSION_USE_SSL"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-file $SPARK_SUBMISSION_KEYSTORE_FILE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_TYPE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-type $SPARK_SUBMISSION_KEYSTORE_TYPE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-password-file $SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --keystore-key-password-file $SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_KEY_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --key-pem-file $SPARK_SUBMISSION_KEY_PEM_FILE"; fi && \ - if ! [ -z ${SPARK_SUBMISSION_CERT_PEM_FILE+x} ]; then SSL_ARGS="$SSL_ARGS --cert-pem-file $SPARK_SUBMISSION_CERT_PEM_FILE"; fi && \ - exec bin/spark-class org.apache.spark.deploy.rest.kubernetes.v1.KubernetesSparkRestServer \ - --hostname $HOSTNAME \ - --port $SPARK_SUBMISSION_SERVER_PORT \ - --secret-file $SPARK_SUBMISSION_SECRET_LOCATION \ - ${SSL_ARGS} +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile similarity index 95% rename from resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile rename to resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index 59029a6c08b4a..bb249a4ea86b6 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-init/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -35,4 +35,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.v2.KubernetesSparkDependencyDownloadInitContainer" ] +ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index 15e1ce75815df..125749c71c79a 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -35,4 +35,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.v2.ResourceStagingServer" ] +ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 56fcf692b8ff7..d23bfcdbc5251 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -17,31 +17,257 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.nio.file.Paths +import java.util.UUID import com.google.common.base.Charsets import com.google.common.io.Files -import org.scalatest.Suite -import org.scalatest.concurrent.PatienceConfiguration +import io.fabric8.kubernetes.client.internal.readiness.Readiness +import org.scalatest.BeforeAndAfter +import org.scalatest.concurrent.{Eventually, PatienceConfiguration} import org.scalatest.time.{Minutes, Seconds, Span} +import scala.collection.JavaConverters._ -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.integrationtest.backend.{IntegrationTestBackend, IntegrationTestBackendFactory} +import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} +import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackendFactory +import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube +import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND +import org.apache.spark.deploy.kubernetes.submit.{Client, KeyAndCertPem} +import org.apache.spark.launcher.SparkLauncher -private[spark] class KubernetesSuite extends SparkFunSuite { - private val testBackend: IntegrationTestBackend = IntegrationTestBackendFactory.getTestBackend() +private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { + import KubernetesSuite._ + private val testBackend = IntegrationTestBackendFactory.getTestBackend() + + private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") + private var kubernetesTestComponents: KubernetesTestComponents = _ + private var sparkConf: SparkConf = _ + private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ + private var staticAssetServerLauncher: StaticAssetServerLauncher = _ override def beforeAll(): Unit = { testBackend.initialize() + kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) + resourceStagingServerLauncher = new ResourceStagingServerLauncher( + kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) + staticAssetServerLauncher = new StaticAssetServerLauncher( + kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) } override def afterAll(): Unit = { testBackend.cleanUp() } - override def nestedSuites: scala.collection.immutable.IndexedSeq[Suite] = { - Vector( - new KubernetesV1Suite(testBackend), - new KubernetesV2Suite(testBackend)) + before { + sparkConf = kubernetesTestComponents.newSparkConf() + .set(INIT_CONTAINER_DOCKER_IMAGE, s"spark-init:latest") + .set(DRIVER_DOCKER_IMAGE, s"spark-driver:latest") + .set(KUBERNETES_DRIVER_LABELS, s"spark-app-locator=$APP_LOCATOR_LABEL") + kubernetesTestComponents.createNamespace() + } + + after { + kubernetesTestComponents.deleteNamespace() + } + + test("Simple submission test with the resource staging server.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + launchStagingServer(SSLOptions(), None) + runSparkPiAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Enable SSL on the resource staging server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( + ipAddress = Minikube.getMinikubeIp, + keyStorePassword = "keyStore", + keyPassword = "key", + trustStorePassword = "trustStore") + sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", + keyStoreAndTrustStore.keyStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", + keyStoreAndTrustStore.trustStore.getAbsolutePath) + .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStore") + .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "key") + .set("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword", "trustStore") + launchStagingServer(SSLOptions( + enabled = true, + keyStore = Some(keyStoreAndTrustStore.keyStore), + trustStore = Some(keyStoreAndTrustStore.trustStore), + keyStorePassword = Some("keyStore"), + keyPassword = Some("key"), + trustStorePassword = Some("trustStore")), + None) + runSparkPiAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Use container-local resources without the resource staging server") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)) + runSparkPiAndVerifyCompletion(CONTAINER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Dynamic executor scaling basic test") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + launchStagingServer(SSLOptions(), None) + createShuffleServiceDaemonSet() + + sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf.set("spark.dynamicAllocation.enabled", "true") + sparkConf.set("spark.shuffle.service.enabled", "true") + sparkConf.set("spark.kubernetes.shuffle.labels", "app=spark-shuffle-service") + sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) + sparkConf.set("spark.app.name", "group-by-test") + runSparkGroupByTestAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Use remote resources without the resource staging server.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() + sparkConf.setJars(Seq( + s"$assetServerUri/${EXAMPLES_JAR_FILE.getName}", + s"$assetServerUri/${HELPER_JAR_FILE.getName}" + )) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + + test("Mix remote resources with submitted ones.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + launchStagingServer(SSLOptions(), None) + val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() + sparkConf.setJars(Seq( + SUBMITTER_LOCAL_MAIN_APP_RESOURCE, s"$assetServerUri/${HELPER_JAR_FILE.getName}" + )) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + + test("Use key and certificate PEM files for TLS.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val keyAndCertificate = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) + launchStagingServer( + SSLOptions(enabled = true), + Some(keyAndCertificate)) + sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) + .set( + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key, keyAndCertificate.certPem.getAbsolutePath) + runSparkPiAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + } + + test("Use client key and client cert file when requesting executors") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + sparkConf.setJars(Seq( + CONTAINER_LOCAL_MAIN_APP_RESOURCE, + CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, + kubernetesTestComponents.clientConfig.getClientKeyFile) + sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, + kubernetesTestComponents.clientConfig.getClientCertFile) + sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, + kubernetesTestComponents.clientConfig.getCaCertFile) + runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) + } + + private def launchStagingServer( + resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( + resourceStagingServerSslOptions, keyAndCertPem) + val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { + "https" + } else { + "http" + } + sparkConf.set(RESOURCE_STAGING_SERVER_URI, + s"$resourceStagingServerUriScheme://" + + s"${Minikube.getMinikubeIp}:$resourceStagingServerPort") + } + + private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { + Client.run(sparkConf, appResource, SPARK_PI_MAIN_CLASS, Array.empty[String]) + val driverPod = kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", APP_LOCATOR_LABEL) + .list() + .getItems + .get(0) + Eventually.eventually(TIMEOUT, INTERVAL) { + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains("Pi is roughly 3"), "The application did not compute the value of pi.") + } + } + + private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { + Client.run( + sparkConf = sparkConf, + appArgs = Array.empty[String], + mainClass = GROUP_BY_MAIN_CLASS, + mainAppResource = appResource) + val driverPod = kubernetesTestComponents.kubernetesClient + .pods() + .withLabel("spark-app-locator", APP_LOCATOR_LABEL) + .list() + .getItems + .get(0) + Eventually.eventually(TIMEOUT, INTERVAL) { + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains("The Result is"), "The application did not complete.") + } + } + + private def createShuffleServiceDaemonSet(): Unit = { + val ds = kubernetesTestComponents.kubernetesClient.extensions().daemonSets() + .createNew() + .withNewMetadata() + .withName("shuffle") + .endMetadata() + .withNewSpec() + .withNewTemplate() + .withNewMetadata() + .withLabels(Map("app" -> "spark-shuffle-service").asJava) + .endMetadata() + .withNewSpec() + .addNewVolume() + .withName("shuffle-dir") + .withNewHostPath() + .withPath("/tmp") + .endHostPath() + .endVolume() + .addNewContainer() + .withName("shuffle") + .withImage("spark-shuffle:latest") + .withImagePullPolicy("IfNotPresent") + .addNewVolumeMount() + .withName("shuffle-dir") + .withMountPath("/tmp") + .endVolumeMount() + .endContainer() + .endSpec() + .endTemplate() + .endSpec() + .done() + + // wait for daemonset to become available. + Eventually.eventually(TIMEOUT, INTERVAL) { + val pods = kubernetesTestComponents.kubernetesClient.pods() + .withLabel("app", "spark-shuffle-service").list().getItems + + if (pods.size() == 0 || !Readiness.isReady(pods.get(0))) { + throw ShuffleNotReadyException + } + } } } @@ -70,5 +296,5 @@ private[spark] object KubernetesSuite { val GROUP_BY_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.GroupByTest" - case class ShuffleNotReadyException() extends Exception + case object ShuffleNotReadyException extends Exception } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala index 677c0db606a47..9ae0d9ade7dc2 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -17,18 +17,13 @@ package org.apache.spark.deploy.kubernetes.integrationtest import java.util.UUID -import javax.net.ssl.X509TrustManager - -import scala.collection.JavaConverters._ -import scala.reflect.ClassTag import io.fabric8.kubernetes.client.DefaultKubernetesClient -import io.fabric8.kubernetes.client.internal.SSLUtils import org.scalatest.concurrent.Eventually +import scala.collection.JavaConverters._ import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.rest.kubernetes.v1.HttpClientUtil private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesClient) { @@ -73,26 +68,4 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl .set("spark.testing", "false") .set(WAIT_FOR_APP_COMPLETION, false) } - - def getService[T: ClassTag]( - serviceName: String, - namespace: String, - servicePortName: String, - servicePath: String = ""): T = synchronized { - val kubernetesMaster = s"${defaultClient.getMasterUrl}" - - val url = s"${ - Array[String]( - s"${kubernetesClient.getMasterUrl}", - "api", "v1", "proxy", - "namespaces", namespace, - "services", serviceName).mkString("/") - }" + - s":$servicePortName$servicePath" - val userHome = System.getProperty("user.home") - val kubernetesConf = kubernetesClient.getConfiguration - val sslContext = SSLUtils.sslContext(kubernetesConf) - val trustManager = SSLUtils.trustManagers(kubernetesConf)(0).asInstanceOf[X509TrustManager] - HttpClientUtil.createClient[T](Set(url), 5, sslContext.getSocketFactory, trustManager) - } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala deleted file mode 100644 index 559cb281c7c62..0000000000000 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV1Suite.scala +++ /dev/null @@ -1,339 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.integrationtest - -import java.util.concurrent.TimeUnit - -import scala.collection.JavaConverters._ - -import com.google.common.collect.ImmutableList -import com.google.common.util.concurrent.SettableFuture -import io.fabric8.kubernetes.api.model.Pod -import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import org.scalatest.{BeforeAndAfter, DoNotDiscover} -import org.scalatest.concurrent.Eventually - -import org.apache.spark.{SparkConf, SparkException, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.SSLUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend -import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube -import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.integrationtest.restapis.SparkRestApiV1 -import org.apache.spark.deploy.kubernetes.submit.v1.{Client, ExternalSuppliedUrisDriverServiceManager} -import org.apache.spark.status.api.v1.{ApplicationStatus, StageStatus} -import org.apache.spark.util.Utils - -@DoNotDiscover -private[spark] class KubernetesV1Suite(testBackend: IntegrationTestBackend) - extends SparkFunSuite with BeforeAndAfter { - - private var kubernetesTestComponents: KubernetesTestComponents = _ - private var sparkConf: SparkConf = _ - - override def beforeAll(): Unit = { - kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) - kubernetesTestComponents.createNamespace() - } - - override def afterAll(): Unit = { - kubernetesTestComponents.deleteNamespace() - } - - before { - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val podsList = kubernetesTestComponents.kubernetesClient.pods().list() - assert(podsList == null - || podsList.getItems == null - || podsList.getItems.isEmpty - ) - val servicesList = kubernetesTestComponents.kubernetesClient.services().list() - assert(servicesList == null - || servicesList.getItems == null - || servicesList.getItems.isEmpty) - } - sparkConf = kubernetesTestComponents.newSparkConf() - } - - after { - val pods = kubernetesTestComponents.kubernetesClient.pods().list().getItems.asScala - pods.par.foreach(pod => { - kubernetesTestComponents.kubernetesClient.pods() - .withName(pod.getMetadata.getName) - .withGracePeriod(60) - .delete - }) - } - - private def getSparkMetricsService(sparkBaseAppName: String): SparkRestApiV1 = { - val serviceName = kubernetesTestComponents.kubernetesClient.services() - .withLabel("spark-app-name", sparkBaseAppName) - .list() - .getItems - .get(0) - .getMetadata - .getName - kubernetesTestComponents.getService[SparkRestApiV1](serviceName, - kubernetesTestComponents.namespace, "spark-ui-port") - } - - private def expectationsForStaticAllocation(sparkMetricsService: SparkRestApiV1): Unit = { - val apps = Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val result = sparkMetricsService - .getApplications(ImmutableList.of(ApplicationStatus.RUNNING, ApplicationStatus.COMPLETED)) - assert(result.size == 1 - && !result.head.id.equalsIgnoreCase("appid") - && !result.head.id.equalsIgnoreCase("{appId}")) - result - } - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val result = sparkMetricsService.getExecutors(apps.head.id) - assert(result.size == 2) - assert(result.count(exec => exec.id != "driver") == 1) - result - } - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val result = sparkMetricsService.getStages( - apps.head.id, Seq(StageStatus.COMPLETE).asJava) - assert(result.size == 1) - result - } - } - - test("Run a simple example") { - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - - test("Run with the examples jar on the docker image") { - sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - - test("Run with custom labels and annotations") { - sparkConf.set(KUBERNETES_DRIVER_LABELS, "label1=label1value,label2=label2value") - sparkConf.set(KUBERNETES_DRIVER_ANNOTATIONS, "annotation1=annotation1value," + - "annotation2=annotation2value") - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val driverPodMetadata = kubernetesTestComponents.kubernetesClient - .pods - .withLabel("spark-app-name", "spark-pi") - .list() - .getItems - .get(0) - .getMetadata - val driverPodLabels = driverPodMetadata.getLabels - // We can't match all of the selectors directly since one of the selectors is based on the - // launch time. - assert(driverPodLabels.size === 5, "Unexpected number of pod labels.") - assert(driverPodLabels.get("spark-app-name") === "spark-pi", "Unexpected value for" + - " spark-app-name label.") - assert(driverPodLabels.get("spark-app-id").startsWith("spark-pi"), "Unexpected value for" + - " spark-app-id label (should be prefixed with the app name).") - assert(driverPodLabels.get("label1") === "label1value", "Unexpected value for label1") - assert(driverPodLabels.get("label2") === "label2value", "Unexpected value for label2") - val driverPodAnnotations = driverPodMetadata.getAnnotations - assert(driverPodAnnotations.size === 2, "Unexpected number of pod annotations.") - assert(driverPodAnnotations.get("annotation1") === "annotation1value", - "Unexpected value for annotation1") - assert(driverPodAnnotations.get("annotation2") === "annotation2value", - "Unexpected value for annotation2") - } - - test("Run with driver pod name") { - sparkConf.set(KUBERNETES_DRIVER_POD_NAME, "spark-pi") - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val driverPodMetadata = kubernetesTestComponents.kubernetesClient - .pods() - .withName("spark-pi") - .get() - .getMetadata() - val driverName = driverPodMetadata.getName - assert(driverName === "spark-pi", "Unexpected driver pod name.") - } - - test("Enable SSL on the driver submit server") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( - Minikube.getMinikubeIp, - "changeit", - "changeit", - "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_KEYSTORE, - s"file://${keyStoreAndTrustStore.keyStore.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyStorePassword", "changeit") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.keyPassword", "changeit") - sparkConf.set(KUBERNETES_DRIVER_SUBMIT_SSL_TRUSTSTORE, - s"file://${keyStoreAndTrustStore.trustStore.getAbsolutePath}") - sparkConf.set("spark.ssl.kubernetes.driversubmitserver.trustStorePassword", "changeit") - sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - } - - test("Enable SSL on the driver submit server using PEM files") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val keyAndCertPem = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) - sparkConf.set(DRIVER_SUBMIT_SSL_KEY_PEM, s"file://${keyAndCertPem.keyPem.getAbsolutePath}") - sparkConf.set( - DRIVER_SUBMIT_SSL_CLIENT_CERT_PEM, s"file://${keyAndCertPem.certPem.getAbsolutePath}") - sparkConf.set( - DRIVER_SUBMIT_SSL_SERVER_CERT_PEM, s"file://${keyAndCertPem.certPem.getAbsolutePath}") - sparkConf.set(DRIVER_SUBMIT_SSL_ENABLED, true) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - } - - test("Added files should exist on the driver.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - sparkConf.set("spark.files", KubernetesSuite.TEST_EXISTENCE_FILE.getAbsolutePath) - sparkConf.setAppName("spark-file-existence-test") - val podCompletedFuture = SettableFuture.create[Boolean] - val watch = new Watcher[Pod] { - override def eventReceived(action: Action, pod: Pod): Unit = { - val containerStatuses = pod.getStatus.getContainerStatuses.asScala - val allSuccessful = containerStatuses.nonEmpty && containerStatuses - .forall(status => { - status.getState.getTerminated != null && status.getState.getTerminated.getExitCode == 0 - }) - if (allSuccessful) { - podCompletedFuture.set(true) - } else { - val failedContainers = containerStatuses.filter(container => { - container.getState.getTerminated != null && - container.getState.getTerminated.getExitCode != 0 - }) - if (failedContainers.nonEmpty) { - podCompletedFuture.setException(new SparkException( - "One or more containers in the driver failed with a nonzero exit code.")) - } - } - } - - override def onClose(e: KubernetesClientException): Unit = { - logWarning("Watch closed", e) - } - } - Utils.tryWithResource(kubernetesTestComponents.kubernetesClient - .pods - .withLabel("spark-app-name", "spark-file-existence-test") - .watch(watch)) { _ => - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.FILE_EXISTENCE_MAIN_CLASS, - mainAppResource = KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array(KubernetesSuite.TEST_EXISTENCE_FILE.getName, - KubernetesSuite.TEST_EXISTENCE_FILE_CONTENTS)).run() - assert(podCompletedFuture.get(60, TimeUnit.SECONDS), "Failed to run driver pod") - val driverPod = kubernetesTestComponents.kubernetesClient - .pods - .withLabel("spark-app-name", "spark-file-existence-test") - .list() - .getItems - .get(0) - val podLog = kubernetesTestComponents.kubernetesClient - .pods - .withName(driverPod.getMetadata.getName) - .getLog - assert(podLog.contains(s"File found at" + - s" /opt/spark/${KubernetesSuite.TEST_EXISTENCE_FILE.getName} with correct contents."), - "Job did not find the file as expected.") - } - } - - test("Use external URI provider") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val externalUriProviderWatch = - new ExternalUriProviderWatch(kubernetesTestComponents.kubernetesClient) - Utils.tryWithResource(kubernetesTestComponents.kubernetesClient.services() - .withLabel("spark-app-name", "spark-pi") - .watch(externalUriProviderWatch)) { _ => - sparkConf.set(DRIVER_SERVICE_MANAGER_TYPE, ExternalSuppliedUrisDriverServiceManager.TYPE) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - assert(externalUriProviderWatch.annotationSet.get) - val driverService = kubernetesTestComponents.kubernetesClient - .services() - .withLabel("spark-app-name", "spark-pi") - .list() - .getItems - .asScala(0) - assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_PROVIDE_EXTERNAL_URI), - "External URI request annotation was not set on the driver service.") - // Unfortunately we can't check the correctness of the actual value of the URI, as it depends - // on the driver submission port set on the driver service but we remove that port from the - // service once the submission is complete. - assert(driverService.getMetadata.getAnnotations.containsKey(ANNOTATION_RESOLVED_EXTERNAL_URI), - "Resolved URI annotation not set on driver service.") - } - } - - test("Mount the Kubernetes credentials onto the driver pod") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, - kubernetesTestComponents.clientConfig.getCaCertFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, - kubernetesTestComponents.clientConfig.getClientKeyFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, - kubernetesTestComponents.clientConfig.getClientCertFile) - new Client( - sparkConf = sparkConf, - mainClass = KubernetesSuite.SPARK_PI_MAIN_CLASS, - mainAppResource = KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - appArgs = Array.empty[String]).run() - val sparkMetricsService = getSparkMetricsService("spark-pi") - expectationsForStaticAllocation(sparkMetricsService) - } - -} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala deleted file mode 100644 index e9900b90cb588..0000000000000 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesV2Suite.scala +++ /dev/null @@ -1,265 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.integrationtest - -import java.util.UUID - -import io.fabric8.kubernetes.client.internal.readiness.Readiness -import org.scalatest.{BeforeAndAfter, DoNotDiscover} -import org.scalatest.concurrent.Eventually -import scala.collection.JavaConverters._ - -import org.apache.spark.{SparkConf, SparkFunSuite, SSLOptions} -import org.apache.spark.deploy.kubernetes.SSLUtils -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackend -import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube -import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.submit.v2.{Client, KeyAndCertPem} -import org.apache.spark.launcher.SparkLauncher - -@DoNotDiscover -private[spark] class KubernetesV2Suite(testBackend: IntegrationTestBackend) - extends SparkFunSuite with BeforeAndAfter { - - private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") - private var kubernetesTestComponents: KubernetesTestComponents = _ - private var sparkConf: SparkConf = _ - private var resourceStagingServerLauncher: ResourceStagingServerLauncher = _ - private var staticAssetServerLauncher: StaticAssetServerLauncher = _ - - override def beforeAll(): Unit = { - kubernetesTestComponents = new KubernetesTestComponents(testBackend.getKubernetesClient) - resourceStagingServerLauncher = new ResourceStagingServerLauncher( - kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) - staticAssetServerLauncher = new StaticAssetServerLauncher( - kubernetesTestComponents.kubernetesClient.inNamespace(kubernetesTestComponents.namespace)) - } - - before { - sparkConf = kubernetesTestComponents.newSparkConf() - .set(INIT_CONTAINER_DOCKER_IMAGE, s"spark-driver-init:latest") - .set(DRIVER_DOCKER_IMAGE, s"spark-driver-v2:latest") - .set(KUBERNETES_DRIVER_LABELS, s"spark-app-locator=$APP_LOCATOR_LABEL") - kubernetesTestComponents.createNamespace() - } - - after { - kubernetesTestComponents.deleteNamespace() - } - - test("Use submission v2.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - launchStagingServer(SSLOptions(), None) - runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Enable SSL on the submission server") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val keyStoreAndTrustStore = SSLUtils.generateKeyStoreTrustStorePair( - ipAddress = Minikube.getMinikubeIp, - keyStorePassword = "keyStore", - keyPassword = "key", - trustStorePassword = "trustStore") - sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) - .set("spark.ssl.kubernetes.resourceStagingServer.keyStore", - keyStoreAndTrustStore.keyStore.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.trustStore", - keyStoreAndTrustStore.trustStore.getAbsolutePath) - .set("spark.ssl.kubernetes.resourceStagingServer.keyStorePassword", "keyStore") - .set("spark.ssl.kubernetes.resourceStagingServer.keyPassword", "key") - .set("spark.ssl.kubernetes.resourceStagingServer.trustStorePassword", "trustStore") - launchStagingServer(SSLOptions( - enabled = true, - keyStore = Some(keyStoreAndTrustStore.keyStore), - trustStore = Some(keyStoreAndTrustStore.trustStore), - keyStorePassword = Some("keyStore"), - keyPassword = Some("key"), - trustStorePassword = Some("trustStore")), - None) - runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Use container-local resources without the resource staging server") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - sparkConf.setJars(Seq( - KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - runSparkPiAndVerifyCompletion(KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Dynamic executor scaling basic test") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - launchStagingServer(SSLOptions(), None) - createShuffleServiceDaemonSet() - - sparkConf.setJars(Seq(KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - sparkConf.set("spark.dynamicAllocation.enabled", "true") - sparkConf.set("spark.shuffle.service.enabled", "true") - sparkConf.set("spark.kubernetes.shuffle.labels", "app=spark-shuffle-service") - sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) - sparkConf.set("spark.app.name", "group-by-test") - runSparkGroupByTestAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Use remote resources without the resource staging server.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() - sparkConf.setJars(Seq( - s"$assetServerUri/${KubernetesSuite.EXAMPLES_JAR_FILE.getName}", - s"$assetServerUri/${KubernetesSuite.HELPER_JAR_FILE.getName}" - )) - runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) - } - - test("Mix remote resources with submitted ones.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - launchStagingServer(SSLOptions(), None) - val assetServerUri = staticAssetServerLauncher.launchStaticAssetServer() - sparkConf.setJars(Seq( - KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE, - s"$assetServerUri/${KubernetesSuite.HELPER_JAR_FILE.getName}" - )) - runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) - } - - test("Use key and certificate PEM files for TLS.") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - val keyAndCertificate = SSLUtils.generateKeyCertPemPair(Minikube.getMinikubeIp) - launchStagingServer( - SSLOptions(enabled = true), - Some(keyAndCertificate)) - sparkConf.set(RESOURCE_STAGING_SERVER_SSL_ENABLED, true) - .set( - RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key, keyAndCertificate.certPem.getAbsolutePath) - runSparkPiAndVerifyCompletion(KubernetesSuite.SUBMITTER_LOCAL_MAIN_APP_RESOURCE) - } - - test("Use client key and client cert file when requesting executors") { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - sparkConf.setJars(Seq( - KubernetesSuite.CONTAINER_LOCAL_MAIN_APP_RESOURCE, - KubernetesSuite.CONTAINER_LOCAL_HELPER_JAR_PATH)) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, - kubernetesTestComponents.clientConfig.getClientKeyFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, - kubernetesTestComponents.clientConfig.getClientCertFile) - sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, - kubernetesTestComponents.clientConfig.getCaCertFile) - runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) - } - - private def launchStagingServer( - resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { - assume(testBackend.name == MINIKUBE_TEST_BACKEND) - - val resourceStagingServerPort = resourceStagingServerLauncher.launchStagingServer( - resourceStagingServerSslOptions, keyAndCertPem) - val resourceStagingServerUriScheme = if (resourceStagingServerSslOptions.enabled) { - "https" - } else { - "http" - } - sparkConf.set(RESOURCE_STAGING_SERVER_URI, - s"$resourceStagingServerUriScheme://" + - s"${Minikube.getMinikubeIp}:$resourceStagingServerPort") - } - - private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { - Client.run(sparkConf, appResource, KubernetesSuite.SPARK_PI_MAIN_CLASS, Array.empty[String]) - val driverPod = kubernetesTestComponents.kubernetesClient - .pods() - .withLabel("spark-app-locator", APP_LOCATOR_LABEL) - .list() - .getItems - .get(0) - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - assert(kubernetesTestComponents.kubernetesClient - .pods() - .withName(driverPod.getMetadata.getName) - .getLog - .contains("Pi is roughly 3"), "The application did not compute the value of pi.") - } - } - - private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { - Client.run( - sparkConf = sparkConf, - appArgs = Array.empty[String], - mainClass = KubernetesSuite.GROUP_BY_MAIN_CLASS, - mainAppResource = appResource) - val driverPod = kubernetesTestComponents.kubernetesClient - .pods() - .withLabel("spark-app-locator", APP_LOCATOR_LABEL) - .list() - .getItems - .get(0) - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - assert(kubernetesTestComponents.kubernetesClient - .pods() - .withName(driverPod.getMetadata.getName) - .getLog - .contains("The Result is"), "The application did not complete.") - } - } - - private def createShuffleServiceDaemonSet(): Unit = { - val ds = kubernetesTestComponents.kubernetesClient.extensions().daemonSets() - .createNew() - .withNewMetadata() - .withName("shuffle") - .endMetadata() - .withNewSpec() - .withNewTemplate() - .withNewMetadata() - .withLabels(Map("app" -> "spark-shuffle-service").asJava) - .endMetadata() - .withNewSpec() - .addNewVolume() - .withName("shuffle-dir") - .withNewHostPath() - .withPath("/tmp") - .endHostPath() - .endVolume() - .addNewContainer() - .withName("shuffle") - .withImage("spark-shuffle:latest") - .withImagePullPolicy("IfNotPresent") - .addNewVolumeMount() - .withName("shuffle-dir") - .withMountPath("/tmp") - .endVolumeMount() - .endContainer() - .endSpec() - .endTemplate() - .endSpec() - .done() - - // wait for daemonset to become available. - Eventually.eventually(KubernetesSuite.TIMEOUT, KubernetesSuite.INTERVAL) { - val pods = kubernetesTestComponents.kubernetesClient.pods() - .withLabel("app", "spark-shuffle-service").list().getItems() - - if (pods.size() == 0 || Readiness.isReady(pods.get(0))) { - throw KubernetesSuite.ShuffleNotReadyException() - } - } - } -} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala index 1ba54c131c196..e5e1b1f085f9f 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ResourceStagingServerLauncher.scala @@ -26,7 +26,7 @@ import scala.collection.JavaConverters._ import org.apache.spark.SSLOptions import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.submit.v2.{ContainerNameEqualityPredicate, KeyAndCertPem} +import org.apache.spark.deploy.kubernetes.submit.{ContainerNameEqualityPredicate, KeyAndCertPem} import org.apache.spark.util.Utils /** diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 0692cf55db848..3ff72829f88a7 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -28,11 +28,10 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, private val DOCKER_BUILD_PATH = Paths.get("target", "docker") // Dockerfile paths must be relative to the build path. - private val DRIVER_V1_DOCKER_FILE = "dockerfiles/driver/Dockerfile" - private val DRIVER_V2_DOCKER_FILE = "dockerfiles/driver-v2/Dockerfile" + private val DRIVER_DOCKER_FILE = "dockerfiles/driver/Dockerfile" private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" - private val DRIVER_INIT_DOCKER_FILE = "dockerfiles/driver-init/Dockerfile" + private val INIT_CONTAINER_DOCKER_FILE = "dockerfiles/init-container/Dockerfile" private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" private val STATIC_ASSET_SERVER_DOCKER_FILE = "dockerfiles/integration-test-asset-server/Dockerfile" @@ -61,12 +60,11 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, def buildSparkDockerImages(): Unit = { Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } - buildImage("spark-driver", DRIVER_V1_DOCKER_FILE) + buildImage("spark-driver", DRIVER_DOCKER_FILE) buildImage("spark-executor", EXECUTOR_DOCKER_FILE) buildImage("spark-shuffle", SHUFFLE_SERVICE_DOCKER_FILE) - buildImage("spark-driver-v2", DRIVER_V2_DOCKER_FILE) buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) - buildImage("spark-driver-init", DRIVER_INIT_DOCKER_FILE) + buildImage("spark-init", INIT_CONTAINER_DOCKER_FILE) buildImage("spark-integration-test-asset-server", STATIC_ASSET_SERVER_DOCKER_FILE) } From 27b79a24c6e1722a9f288b887b7f31cfcc8af04b Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 23 May 2017 16:38:07 -0700 Subject: [PATCH 113/225] Added files should be in the working directories. (#294) * Added files should be in the working directories. * Revert unintentional changes * Fix test --- docs/running-on-kubernetes.md | 16 ++++++ .../SparkPodInitContainerBootstrap.scala | 4 ++ .../spark/deploy/kubernetes/config.scala | 4 +- .../spark/deploy/kubernetes/constants.scala | 1 + .../SparkPodInitContainerBootstrapSuite.scala | 10 ++++ .../src/main/docker/driver/Dockerfile | 1 + .../src/main/docker/executor/Dockerfile | 1 + .../jobs/FileExistenceTest.scala | 13 ++--- .../integrationtest/KubernetesSuite.scala | 54 +++++++++++-------- .../KubernetesTestComponents.scala | 2 +- 10 files changed, 75 insertions(+), 31 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 98393cbbbba2d..b18987f6af4a4 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -558,6 +558,22 @@ from the other deployment modes. See the [configuration page](configuration.html disk as a secret into the init-containers. + + spark.kubernetes.mountdependencies.jarsDownloadDir + /var/spark-data/spark-jars + + Location to download jars to in the driver and executors. This will be mounted as an empty directory volume + into the driver and executor containers. + + + + spark.kubernetes.mountdependencies.filesDownloadDir + /var/spark-data/spark-files + + Location to download files to in the driver and executors. This will be mounted as an empty directory volume + into the driver and executor containers. + + spark.kubernetes.report.interval 1s diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala index 0d4e82566643d..a4d0aeb23d01f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -93,6 +93,10 @@ private[spark] class SparkPodInitContainerBootstrapImpl( .endVolume() .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) .addToVolumeMounts(sharedVolumeMounts: _*) + .addNewEnv() + .withName(ENV_MOUNTED_FILES_DIR) + .withValue(filesDownloadPath) + .endEnv() .endContainer() .endSpec() resourceStagingServerSecretPlugin.map { plugin => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index bcb9a96cae960..c892b01314975 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -447,7 +447,7 @@ package object config extends Logging { " spark-submit, this directory must be empty and will be mounted as an empty directory" + " volume on the driver and executor pod.") .stringConf - .createWithDefault("/var/spark-data/spark-submitted-jars") + .createWithDefault("/var/spark-data/spark-jars") private[spark] val INIT_CONTAINER_FILES_DOWNLOAD_LOCATION = ConfigBuilder("spark.kubernetes.mountdependencies.filesDownloadDir") @@ -455,7 +455,7 @@ package object config extends Logging { " spark-submit, this directory must be empty and will be mounted as an empty directory" + " volume on the driver and executor pods.") .stringConf - .createWithDefault("/var/spark-data/spark-submitted-files") + .createWithDefault("/var/spark-data/spark-files") private[spark] val INIT_CONTAINER_MOUNT_TIMEOUT = ConfigBuilder("spark.kubernetes.mountdependencies.mountTimeout") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index ea11ca2ec8f21..5515e88a50fb0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -92,6 +92,7 @@ package object constants { private[spark] val ENV_DRIVER_MAIN_CLASS = "SPARK_DRIVER_CLASS" private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" private[spark] val ENV_DRIVER_JAVA_OPTS = "SPARK_DRIVER_JAVA_OPTS" + private[spark] val ENV_MOUNTED_FILES_DIR = "SPARK_MOUNTED_FILES_DIR" // Annotation keys private[spark] val ANNOTATION_PROVIDE_EXTERNAL_URI = diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala index 6db7d3ff2da53..3feba80f800c7 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -111,6 +111,16 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf }) } + test("Files download path is set as environment variable") { + val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() + val containers = bootstrappedPod.getSpec.getContainers.asScala + val maybeMainContainer = containers.find(_.getName === MAIN_CONTAINER_NAME) + assert(maybeMainContainer.exists { mainContainer => + mainContainer.getEnv.asScala.exists(envVar => + envVar.getName == ENV_MOUNTED_FILES_DIR && envVar.getValue == FILES_DOWNLOAD_PATH) + }) + } + test("Running with submitted dependencies modifies the init container with the plugin.") { val bootstrappedPod = bootstrapPodWithSubmittedDependencies() val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 40f9459dc06dc..c4c75642c9d22 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -40,4 +40,5 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index c5f1c43ff7cf4..e345f10056522 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -40,4 +40,5 @@ WORKDIR /opt/spark CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala index 8b8d5e05f6479..8994c998bffee 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/src/main/scala/org/apache/spark/deploy/kubernetes/integrationtest/jobs/FileExistenceTest.scala @@ -28,7 +28,9 @@ private[spark] object FileExistenceTest { def main(args: Array[String]): Unit = { if (args.length < 2) { - throw new IllegalArgumentException("Usage: WordCount ") + throw new IllegalArgumentException( + s"Invalid args: ${args.mkString}, " + + "Usage: FileExistenceTest ") } // Can't use SparkContext.textFile since the file is local to the driver val file = Paths.get(args(0)).toFile @@ -39,16 +41,15 @@ private[spark] object FileExistenceTest { val contents = Files.toString(file, Charsets.UTF_8) if (args(1) != contents) { throw new SparkException(s"Contents do not match. Expected: ${args(1)}," + - s" actual, $contents") + s" actual: $contents") } else { println(s"File found at ${file.getAbsolutePath} with correct contents.") } // scalastyle:on println } - val spark = SparkSession.builder() - .appName("Test") - .getOrCreate() - spark.stop() + while (true) { + Thread.sleep(600000) + } } } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index d23bfcdbc5251..95775d262a69d 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -16,6 +16,7 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest +import java.io.File import java.nio.file.Paths import java.util.UUID @@ -35,11 +36,11 @@ import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minik import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND import org.apache.spark.deploy.kubernetes.submit.{Client, KeyAndCertPem} import org.apache.spark.launcher.SparkLauncher +import org.apache.spark.util.Utils private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { import KubernetesSuite._ private val testBackend = IntegrationTestBackendFactory.getTestBackend() - private val APP_LOCATOR_LABEL = UUID.randomUUID().toString.replaceAll("-", "") private var kubernetesTestComponents: KubernetesTestComponents = _ private var sparkConf: SparkConf = _ @@ -124,7 +125,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { sparkConf.set("spark.kubernetes.shuffle.labels", "app=spark-shuffle-service") sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) sparkConf.set("spark.app.name", "group-by-test") - runSparkGroupByTestAndVerifyCompletion(SUBMITTER_LOCAL_MAIN_APP_RESOURCE) + runSparkApplicationAndVerifyCompletion( + SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + GROUP_BY_MAIN_CLASS, + "The Result is", + Array.empty[String]) } test("Use remote resources without the resource staging server.") { @@ -173,6 +178,20 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } + test("Added files should be placed in the driver's working directory.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + val testExistenceFileTempDir = Utils.createTempDir(namePrefix = "test-existence-file-temp-dir") + val testExistenceFile = new File(testExistenceFileTempDir, "input.txt") + Files.write(TEST_EXISTENCE_FILE_CONTENTS, testExistenceFile, Charsets.UTF_8) + launchStagingServer(SSLOptions(), None) + sparkConf.set("spark.files", testExistenceFile.getAbsolutePath) + runSparkApplicationAndVerifyCompletion( + SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + FILE_EXISTENCE_MAIN_CLASS, + s"File found at /opt/spark/${testExistenceFile.getName} with correct contents.", + Array(testExistenceFile.getName, TEST_EXISTENCE_FILE_CONTENTS)) + } + private def launchStagingServer( resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) @@ -190,27 +209,19 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { } private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { - Client.run(sparkConf, appResource, SPARK_PI_MAIN_CLASS, Array.empty[String]) - val driverPod = kubernetesTestComponents.kubernetesClient - .pods() - .withLabel("spark-app-locator", APP_LOCATOR_LABEL) - .list() - .getItems - .get(0) - Eventually.eventually(TIMEOUT, INTERVAL) { - assert(kubernetesTestComponents.kubernetesClient - .pods() - .withName(driverPod.getMetadata.getName) - .getLog - .contains("Pi is roughly 3"), "The application did not compute the value of pi.") - } + runSparkApplicationAndVerifyCompletion( + appResource, SPARK_PI_MAIN_CLASS, "Pi is roughly 3", Array.empty[String]) } - private def runSparkGroupByTestAndVerifyCompletion(appResource: String): Unit = { + private def runSparkApplicationAndVerifyCompletion( + appResource: String, + mainClass: String, + expectedLogOnCompletion: String, + appArgs: Array[String]): Unit = { Client.run( sparkConf = sparkConf, - appArgs = Array.empty[String], - mainClass = GROUP_BY_MAIN_CLASS, + appArgs = appArgs, + mainClass = mainClass, mainAppResource = appResource) val driverPod = kubernetesTestComponents.kubernetesClient .pods() @@ -223,7 +234,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .pods() .withName(driverPod.getMetadata.getName) .getLog - .contains("The Result is"), "The application did not complete.") + .contains(expectedLogOnCompletion), "The application did not complete.") } } @@ -285,8 +296,6 @@ private[spark] object KubernetesSuite { val CONTAINER_LOCAL_HELPER_JAR_PATH = s"local:///opt/spark/examples/" + s"integration-tests-jars/${HELPER_JAR_FILE.getName}" - val TEST_EXISTENCE_FILE = Paths.get("test-data", "input.txt").toFile - val TEST_EXISTENCE_FILE_CONTENTS = Files.toString(TEST_EXISTENCE_FILE, Charsets.UTF_8) val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + @@ -295,6 +304,7 @@ private[spark] object KubernetesSuite { ".integrationtest.jobs.FileExistenceTest" val GROUP_BY_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.GroupByTest" + val TEST_EXISTENCE_FILE_CONTENTS = "contents" case object ShuffleNotReadyException extends Exception } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala index 9ae0d9ade7dc2..0ca1f482269db 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesTestComponents.scala @@ -63,7 +63,7 @@ private[spark] class KubernetesTestComponents(defaultClient: DefaultKubernetesCl .set("spark.executor.memory", "500m") .set("spark.executor.cores", "1") .set("spark.executors.instances", "1") - .set("spark.app.name", "spark-pi") + .set("spark.app.name", "spark-test-app") .set("spark.ui.enabled", "true") .set("spark.testing", "false") .set(WAIT_FOR_APP_COMPLETION, false) From 4d4819c4f3d44666759f82ea19d61fb101a2ed69 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 23 May 2017 20:53:09 -0700 Subject: [PATCH 114/225] Add missing license (#296) --- conf/kubernetes-resource-staging-server.yaml | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/conf/kubernetes-resource-staging-server.yaml b/conf/kubernetes-resource-staging-server.yaml index de0da3edcb901..11f5d3a13b9e3 100644 --- a/conf/kubernetes-resource-staging-server.yaml +++ b/conf/kubernetes-resource-staging-server.yaml @@ -1,3 +1,19 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# --- apiVersion: extensions/v1beta1 kind: Deployment From 1311de13116b1ccd256e1da5f8651e913c8ce1af Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 23 May 2017 22:57:27 -0700 Subject: [PATCH 115/225] Remove some leftover code and fix a constant. (#297) * Remove some leftover code and fix a constant. * Fix build --- .../spark/deploy/kubernetes/constants.scala | 54 +++---------- .../ExternalUriProviderWatch.scala | 75 ------------------- 2 files changed, 9 insertions(+), 120 deletions(-) delete mode 100644 resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 5515e88a50fb0..950c1f6efe4e8 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -23,21 +23,7 @@ package object constants { private[spark] val SPARK_APP_NAME_LABEL = "spark-app-name" private[spark] val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id" - // Secrets - private[spark] val DRIVER_CONTAINER_SUBMISSION_SECRETS_BASE_DIR = - "/var/run/secrets/spark-submission" - private[spark] val SUBMISSION_APP_SECRET_NAME = "spark-submission-server-secret" - private[spark] val SUBMISSION_APP_SECRET_PREFIX = "spark-submission-server-secret" - private[spark] val SUBMISSION_APP_SECRET_VOLUME_NAME = "spark-submission-secret-volume" - private[spark] val SUBMISSION_SSL_KEY_PASSWORD_SECRET_NAME = - "spark-submission-server-key-password" - private[spark] val SUBMISSION_SSL_KEYSTORE_PASSWORD_SECRET_NAME = - "spark-submission-server-keystore-password" - private[spark] val SUBMISSION_SSL_KEYSTORE_SECRET_NAME = "spark-submission-server-keystore" - private[spark] val SUBMISSION_SSL_SECRETS_PREFIX = "spark-submission-server-ssl" - private[spark] val SUBMISSION_SSL_SECRETS_VOLUME_NAME = "spark-submission-server-ssl-secrets" - private[spark] val SUBMISSION_SSL_KEY_PEM_SECRET_NAME = "spark-submission-server-key-pem" - private[spark] val SUBMISSION_SSL_CERT_PEM_SECRET_NAME = "spark-submission-server-cert-pem" + // Credentials secrets private[spark] val DRIVER_CREDENTIALS_SECRETS_BASE_DIR = "/mnt/secrets/spark-kubernetes-credentials" private[spark] val DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME = "ca-cert" @@ -54,30 +40,15 @@ package object constants { s"$DRIVER_CREDENTIALS_SECRETS_BASE_DIR/$DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME" private[spark] val DRIVER_CREDENTIALS_SECRET_VOLUME_NAME = "kubernetes-credentials" - // Default and fixed ports private[spark] val SUBMISSION_SERVER_PORT = 7077 private[spark] val DEFAULT_DRIVER_PORT = 7078 private[spark] val DEFAULT_BLOCKMANAGER_PORT = 7079 private[spark] val DEFAULT_UI_PORT = 4040 - private[spark] val UI_PORT_NAME = "spark-ui-port" - private[spark] val SUBMISSION_SERVER_PORT_NAME = "submit-server" private[spark] val BLOCK_MANAGER_PORT_NAME = "blockmanager" - private[spark] val DRIVER_PORT_NAME = "driver" private[spark] val EXECUTOR_PORT_NAME = "executor" // Environment Variables - private[spark] val ENV_SUBMISSION_SECRET_LOCATION = "SPARK_SUBMISSION_SECRET_LOCATION" - private[spark] val ENV_SUBMISSION_SERVER_PORT = "SPARK_SUBMISSION_SERVER_PORT" - private[spark] val ENV_SUBMISSION_KEYSTORE_FILE = "SPARK_SUBMISSION_KEYSTORE_FILE" - private[spark] val ENV_SUBMISSION_KEYSTORE_PASSWORD_FILE = - "SPARK_SUBMISSION_KEYSTORE_PASSWORD_FILE" - private[spark] val ENV_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE = - "SPARK_SUBMISSION_KEYSTORE_KEY_PASSWORD_FILE" - private[spark] val ENV_SUBMISSION_KEYSTORE_TYPE = "SPARK_SUBMISSION_KEYSTORE_TYPE" - private[spark] val ENV_SUBMISSION_KEY_PEM_FILE = "SPARK_SUBMISSION_KEY_PEM_FILE" - private[spark] val ENV_SUBMISSION_CERT_PEM_FILE = "SPARK_SUBMISSION_CERT_PEM_FILE" - private[spark] val ENV_SUBMISSION_USE_SSL = "SPARK_SUBMISSION_USE_SSL" private[spark] val ENV_EXECUTOR_PORT = "SPARK_EXECUTOR_PORT" private[spark] val ENV_DRIVER_URL = "SPARK_DRIVER_URL" private[spark] val ENV_EXECUTOR_CORES = "SPARK_EXECUTOR_CORES" @@ -87,27 +58,14 @@ package object constants { private[spark] val ENV_EXECUTOR_POD_IP = "SPARK_EXECUTOR_POD_IP" private[spark] val ENV_DRIVER_MEMORY = "SPARK_DRIVER_MEMORY" private[spark] val ENV_SUBMIT_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" - private[spark] val ENV_EXECUTOR_EXTRA_CLASSPATH = "SPARK_SUBMIT_EXTRA_CLASSPATH" + private[spark] val ENV_EXECUTOR_EXTRA_CLASSPATH = "SPARK_EXECUTOR_EXTRA_CLASSPATH" private[spark] val ENV_MOUNTED_CLASSPATH = "SPARK_MOUNTED_CLASSPATH" private[spark] val ENV_DRIVER_MAIN_CLASS = "SPARK_DRIVER_CLASS" private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" private[spark] val ENV_DRIVER_JAVA_OPTS = "SPARK_DRIVER_JAVA_OPTS" private[spark] val ENV_MOUNTED_FILES_DIR = "SPARK_MOUNTED_FILES_DIR" - // Annotation keys - private[spark] val ANNOTATION_PROVIDE_EXTERNAL_URI = - "spark-job.alpha.apache.org/provideExternalUri" - private[spark] val ANNOTATION_RESOLVED_EXTERNAL_URI = - "spark-job.alpha.apache.org/resolvedExternalUri" - - // Miscellaneous - private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" - private[spark] val DRIVER_SUBMIT_SSL_NAMESPACE = "kubernetes.driversubmitserver" - private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" - private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 - private[spark] val MEMORY_OVERHEAD_MIN = 384L - - // V2 submission init container + // Bootstrapping dependencies with the init-container private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers" private[spark] val INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH = "/mnt/secrets/spark-init" @@ -127,4 +85,10 @@ package object constants { s"$INIT_CONTAINER_PROPERTIES_FILE_DIR/$INIT_CONTAINER_PROPERTIES_FILE_NAME" private[spark] val DEFAULT_SHUFFLE_MOUNT_NAME = "shuffle" private[spark] val INIT_CONTAINER_SECRET_VOLUME_NAME = "spark-init-secret" + + // Miscellaneous + private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" + private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" + private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 + private[spark] val MEMORY_OVERHEAD_MIN = 384L } diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala deleted file mode 100644 index f402d240bfc33..0000000000000 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/ExternalUriProviderWatch.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.integrationtest - -import java.util.concurrent.atomic.AtomicBoolean - -import io.fabric8.kubernetes.api.model.Service -import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} -import io.fabric8.kubernetes.client.Watcher.Action -import scala.collection.JavaConverters._ - -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube -import org.apache.spark.internal.Logging - -/** - * A slightly unrealistic implementation of external URI provision, but works - * for tests - essentially forces the service to revert back to being exposed - * on NodePort. - */ -private[spark] class ExternalUriProviderWatch(kubernetesClient: KubernetesClient) - extends Watcher[Service] with Logging { - - // Visible for testing - val annotationSet = new AtomicBoolean(false) - - override def eventReceived(action: Action, service: Service): Unit = { - if (action == Action.ADDED) { - service.getMetadata - .getAnnotations - .asScala - .get(ANNOTATION_PROVIDE_EXTERNAL_URI).foreach { _ => - if (!annotationSet.getAndSet(true)) { - val nodePortService = kubernetesClient.services().withName(service.getMetadata.getName) - .edit() - .editSpec() - .withType("NodePort") - .endSpec() - .done() - val submissionServerPort = nodePortService - .getSpec() - .getPorts - .asScala - .find(_.getName == SUBMISSION_SERVER_PORT_NAME) - .map(_.getNodePort) - .getOrElse(throw new IllegalStateException("Submission server port not found.")) - val resolvedNodePortUri = s"http://${Minikube.getMinikubeIp}:$submissionServerPort" - kubernetesClient.services().withName(service.getMetadata.getName).edit() - .editMetadata() - .addToAnnotations(ANNOTATION_RESOLVED_EXTERNAL_URI, resolvedNodePortUri) - .endMetadata() - .done() - } - } - } - } - - override def onClose(cause: KubernetesClientException): Unit = { - logWarning("External URI provider watch closed.", cause) - } -} From e9f0a37806034ae90feefb384564d67ede679eea Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Thu, 25 May 2017 10:43:35 -0700 Subject: [PATCH 116/225] Adding restart policy fix for v2 (#303) --- .../scala/org/apache/spark/deploy/kubernetes/submit/Client.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index bfb0bc3ffb0f3..a8029a28009c2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -119,6 +119,7 @@ private[spark] class Client( .addToAnnotations(parsedCustomAnnotations.asJava) .endMetadata() .withNewSpec() + .withRestartPolicy("Never") .addToContainers(driverContainer) .endSpec() From bd8f6dacb8200ca8c2d640aaeb6dc814e83c837e Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 25 May 2017 22:13:05 -0700 Subject: [PATCH 117/225] Add all dockerfiles to distributions. (#307) --- dev/make-distribution.sh | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index b06bece03d4df..a7a171dee09a3 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -169,11 +169,9 @@ echo "Build flags: $@" >> "$DISTDIR/RELEASE" cp "$SPARK_HOME"/assembly/target/scala*/jars/* "$DISTDIR/jars/" # Copy docker files -mkdir -p "$DISTDIR/dockerfiles/driver" -mkdir -p "$DISTDIR/dockerfiles/executor" +mkdir -p "$DISTDIR/dockerfiles" DOCKERFILES_SRC="$SPARK_HOME/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker" -cp "$DOCKERFILES_SRC/driver/Dockerfile" "$DISTDIR/dockerfiles/driver/Dockerfile" -cp "$DOCKERFILES_SRC/executor/Dockerfile" "$DISTDIR/dockerfiles/executor/Dockerfile" +cp -R "$DOCKERFILES_SRC/." "$DISTDIR/dockerfiles/." # Only create the yarn directory if the yarn artifacts were build. if [ -f "$SPARK_HOME"/common/network-yarn/target/scala*/spark-*-yarn-shuffle.jar ]; then From fc5d9c5a0aba3bd012cd65215090847a1c308bad Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 25 May 2017 22:46:53 -0700 Subject: [PATCH 118/225] Add proxy configuration to retrofit clients. (#301) * Add proxy configuration to retrofit clients. * Add logging --- .../kubernetes/RetrofitClientFactory.scala | 34 +++++++++++++++++-- 1 file changed, 32 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala index a374982444f79..e38a3d9ad928e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala @@ -17,31 +17,56 @@ package org.apache.spark.deploy.rest.kubernetes import java.io.FileInputStream +import java.net.{InetSocketAddress, URI} import java.security.{KeyStore, SecureRandom} import javax.net.ssl.{SSLContext, TrustManagerFactory, X509TrustManager} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule +import io.fabric8.kubernetes.client.Config import okhttp3.{Dispatcher, OkHttpClient} import retrofit2.Retrofit import retrofit2.converter.jackson.JacksonConverterFactory import retrofit2.converter.scalars.ScalarsConverterFactory import org.apache.spark.SSLOptions +import org.apache.spark.internal.Logging import org.apache.spark.util.{ThreadUtils, Utils} private[spark] trait RetrofitClientFactory { def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T } -private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory { +private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory with Logging { private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) private val SECURE_RANDOM = new SecureRandom() def createRetrofitClient[T](baseUrl: String, serviceType: Class[T], sslOptions: SSLOptions): T = { val dispatcher = new Dispatcher(ThreadUtils.newDaemonCachedThreadPool(s"http-client-$baseUrl")) - val okHttpClientBuilder = new OkHttpClient.Builder().dispatcher(dispatcher) + val serviceUri = URI.create(baseUrl) + val maybeAllProxy = Option.apply(System.getProperty(Config.KUBERNETES_ALL_PROXY)) + val serviceUriScheme = serviceUri.getScheme + val maybeHttpProxy = (if (serviceUriScheme.equalsIgnoreCase("https")) { + Option.apply(System.getProperty(Config.KUBERNETES_HTTPS_PROXY)) + } else if (serviceUriScheme.equalsIgnoreCase("http")) { + Option.apply(System.getProperty(Config.KUBERNETES_HTTP_PROXY)) + } else { + maybeAllProxy + }).map(uriStringToProxy) + val maybeNoProxy = Option.apply(System.getProperty(Config.KUBERNETES_NO_PROXY)) + .map(_.split(",")) + .toSeq + .flatten + val resolvedProxy = maybeNoProxy.find(_ == serviceUri.getHost) + .map( _ => java.net.Proxy.NO_PROXY) + .orElse(maybeHttpProxy) + .getOrElse(java.net.Proxy.NO_PROXY) + val okHttpClientBuilder = new OkHttpClient.Builder() + .dispatcher(dispatcher) + .proxy(resolvedProxy) + logDebug(s"Proxying to $baseUrl through address ${resolvedProxy.address()} with proxy of" + + s" type ${resolvedProxy.`type`()}") sslOptions.trustStore.foreach { trustStoreFile => require(trustStoreFile.isFile, s"TrustStore provided at ${trustStoreFile.getAbsolutePath}" + " does not exist, or is not a file.") @@ -69,4 +94,9 @@ private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory { .create(serviceType) } + private def uriStringToProxy(uriString: String): java.net.Proxy = { + val uriObject = URI.create(uriString) + new java.net.Proxy(java.net.Proxy.Type.HTTP, + new InetSocketAddress(uriObject.getHost, uriObject.getPort)) + } } From 51a325c4a2efcdfe3d191eca7e1732dc556bf24e Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Thu, 25 May 2017 23:29:16 -0700 Subject: [PATCH 119/225] Fix an HDFS data locality bug in case cluster node names are short host names (#291) * Fix an HDFS data locality bug in case cluster node names are not full host names * Add a NOTE about InetAddress caching --- .../kubernetes/KubernetesTaskSetManager.scala | 26 +++- .../KubernetesTaskSetManagerSuite.scala | 117 ++++++++++++++++++ 2 files changed, 141 insertions(+), 2 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala index 5cea95be382f0..51566d03a7a6c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.scheduler.cluster.kubernetes +import java.net.InetAddress + import scala.collection.mutable.ArrayBuffer import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} @@ -23,7 +25,9 @@ import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} private[spark] class KubernetesTaskSetManager( sched: TaskSchedulerImpl, taskSet: TaskSet, - maxTaskFailures: Int) extends TaskSetManager(sched, taskSet, maxTaskFailures) { + maxTaskFailures: Int, + inetAddressUtil: InetAddressUtil = new InetAddressUtil) + extends TaskSetManager(sched, taskSet, maxTaskFailures) { /** * Overrides the lookup to use not only the executor pod IP, but also the cluster node @@ -52,8 +56,16 @@ private[spark] class KubernetesTaskSetManager( if (pendingTasksClusterNodeIP.nonEmpty) { logDebug(s"Got preferred task list $pendingTasksClusterNodeIP for executor host " + s"$executorIP using cluster node IP $clusterNodeIP") + pendingTasksClusterNodeIP + } else { + val clusterNodeFullName = inetAddressUtil.getFullHostName(clusterNodeIP) + val pendingTasksClusterNodeFullName = super.getPendingTasksForHost(clusterNodeFullName) + if (pendingTasksClusterNodeFullName.nonEmpty) { + logDebug(s"Got preferred task list $pendingTasksClusterNodeFullName " + + s"for executor host $executorIP using cluster node full name $clusterNodeFullName") + } + pendingTasksClusterNodeFullName } - pendingTasksClusterNodeIP } } else { pendingTasksExecutorIP // Empty @@ -61,3 +73,13 @@ private[spark] class KubernetesTaskSetManager( } } } + +// To support mocks in unit tests. +private[kubernetes] class InetAddressUtil { + + // NOTE: This does issue a network call to DNS. Caching is done internally by the InetAddress + // class for both hits and misses. + def getFullHostName(ipAddress: String): String = { + InetAddress.getByName(ipAddress).getCanonicalHostName + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala new file mode 100644 index 0000000000000..7618c137ab22b --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.scheduler.cluster.kubernetes + +import scala.collection.mutable.ArrayBuffer + +import io.fabric8.kubernetes.api.model.{Pod, PodSpec, PodStatus} +import org.mockito.Mockito._ + +import org.apache.spark.{SparkContext, SparkFunSuite} +import org.apache.spark.scheduler.{FakeTask, FakeTaskScheduler, HostTaskLocation, TaskLocation} + +class KubernetesTaskSetManagerSuite extends SparkFunSuite { + + val sc = new SparkContext("local", "test") + val sched = new FakeTaskScheduler(sc, + ("execA", "10.0.0.1"), ("execB", "10.0.0.2"), ("execC", "10.0.0.3")) + val backend = mock(classOf[KubernetesClusterSchedulerBackend]) + sched.backend = backend + + test("Find pending tasks for executors using executor pod IP addresses") { + val taskSet = FakeTask.createTaskSet(3, + Seq(TaskLocation("10.0.0.1", "execA")), // Task 0 runs on executor pod 10.0.0.1. + Seq(TaskLocation("10.0.0.1", "execA")), // Task 1 runs on executor pod 10.0.0.1. + Seq(TaskLocation("10.0.0.2", "execB")) // Task 2 runs on executor pod 10.0.0.2. + ) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) + assert(manager.getPendingTasksForHost("10.0.0.2") == ArrayBuffer(2)) + } + + test("Find pending tasks for executors using cluster node names that executor pods run on") { + val taskSet = FakeTask.createTaskSet(2, + Seq(HostTaskLocation("kube-node1")), // Task 0's partition belongs to datanode on kube-node1 + Seq(HostTaskLocation("kube-node1")) // Task 1's partition belongs to datanode on kube-node2 + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node1") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) + } + + test("Find pending tasks for executors using cluster node IPs that executor pods run on") { + val taskSet = FakeTask.createTaskSet(2, + Seq(HostTaskLocation("196.0.0.5")), // Task 0's partition belongs to datanode on 196.0.0.5. + Seq(HostTaskLocation("196.0.0.5")) // Task 1's partition belongs to datanode on 196.0.0.5. + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node1") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + val status1 = mock(classOf[PodStatus]) + when(status1.getHostIP).thenReturn("196.0.0.5") + when(pod1.getStatus).thenReturn(status1) + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) + } + + test("Find pending tasks for executors using cluster node FQDNs that executor pods run on") { + val taskSet = FakeTask.createTaskSet(2, + Seq(HostTaskLocation("kube-node1.domain1")), // Task 0's partition belongs to datanode here. + Seq(HostTaskLocation("kube-node1.domain1")) // task 1's partition belongs to datanode here. + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node1") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + val status1 = mock(classOf[PodStatus]) + when(status1.getHostIP).thenReturn("196.0.0.5") + when(pod1.getStatus).thenReturn(status1) + val inetAddressUtil = mock(classOf[InetAddressUtil]) + when(inetAddressUtil.getFullHostName("196.0.0.5")).thenReturn("kube-node1.domain1") + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2, inetAddressUtil) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) + } + + test("Return empty pending tasks for executors when all look up fail") { + val taskSet = FakeTask.createTaskSet(1, + Seq(HostTaskLocation("kube-node1.domain1")) // task 0's partition belongs to datanode here. + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node2") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + val status1 = mock(classOf[PodStatus]) + when(status1.getHostIP).thenReturn("196.0.0.6") + when(pod1.getStatus).thenReturn(status1) + val inetAddressUtil = mock(classOf[InetAddressUtil]) + when(inetAddressUtil.getFullHostName("196.0.0.6")).thenReturn("kube-node2.domain1") + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2, inetAddressUtil) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer()) + } +} From b8dc23d79c1cc6fc424ff61f13ed360837bd6b2f Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 30 May 2017 16:01:58 -0700 Subject: [PATCH 120/225] Remove leading slash from Retrofit interface. (#308) --- .../rest/kubernetes/ResourceStagingServiceRetrofit.scala | 4 ++-- .../deploy/rest/kubernetes/RetrofitClientFactory.scala | 7 ++++++- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala index 3c2fe8ebbc3c8..c0da44838aba3 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala @@ -29,7 +29,7 @@ import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret private[spark] trait ResourceStagingServiceRetrofit { @Multipart - @retrofit2.http.POST("/api/v0/resources/") + @retrofit2.http.POST("api/v0/resources/") def uploadResources( @retrofit2.http.Part("podLabels") podLabels: RequestBody, @retrofit2.http.Part("podNamespace") podNamespace: RequestBody, @@ -38,7 +38,7 @@ private[spark] trait ResourceStagingServiceRetrofit { kubernetesCredentials: RequestBody): Call[SubmittedResourceIdAndSecret] @Streaming - @retrofit2.http.GET("/api/v0/resources/{resourceId}") + @retrofit2.http.GET("api/v0/resources/{resourceId}") def downloadResources( @Path("resourceId") resourceId: String, @retrofit2.http.Header("Authorization") resourceSecret: String): Call[ResponseBody] diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala index e38a3d9ad928e..5046cb479054c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/RetrofitClientFactory.scala @@ -85,8 +85,13 @@ private[spark] object RetrofitClientFactoryImpl extends RetrofitClientFactory wi okHttpClientBuilder.sslSocketFactory(sslContext.getSocketFactory, trustManagers(0).asInstanceOf[X509TrustManager]) } + val resolvedBaseUrl = if (!baseUrl.endsWith("/")) { + s"$baseUrl/" + } else { + baseUrl + } new Retrofit.Builder() - .baseUrl(baseUrl) + .baseUrl(resolvedBaseUrl) .addConverterFactory(ScalarsConverterFactory.create()) .addConverterFactory(JacksonConverterFactory.create(OBJECT_MAPPER)) .client(okHttpClientBuilder.build()) From 1c8bf38294d25bbc86fd0302c3108b6bcdba643f Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 31 May 2017 15:01:13 -0700 Subject: [PATCH 121/225] Use tini in Docker images (#320) --- .../docker-minimal-bundle/src/main/docker/driver/Dockerfile | 4 ++-- .../docker-minimal-bundle/src/main/docker/executor/Dockerfile | 4 ++-- .../src/main/docker/init-container/Dockerfile | 4 ++-- .../src/main/docker/resource-staging-server/Dockerfile | 4 ++-- .../src/main/docker/shuffle-service/Dockerfile | 4 ++-- 5 files changed, 10 insertions(+), 10 deletions(-) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index c4c75642c9d22..fa651ff43aaa0 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -41,4 +41,4 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ - exec ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS + exec /sbin/tini -- ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index e345f10056522..fbad43b6255b9 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -41,4 +41,4 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ - exec ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP + exec /sbin/tini -- ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index bb249a4ea86b6..40557a7465a8a 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -35,4 +35,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] +ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index 125749c71c79a..c8b13c44207bc 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -35,4 +35,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] +ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index 630d3408519ac..06aac56ba2f52 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -22,7 +22,7 @@ FROM openjdk:8-alpine # docker build -t spark-shuffle:latest -f dockerfiles/shuffle/Dockerfile . RUN apk upgrade --update -RUN apk add --update bash +RUN apk add --update bash tini RUN mkdir -p /opt/spark RUN touch /opt/spark/RELEASE @@ -36,4 +36,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -CMD ["/bin/sh","-c","/opt/spark/bin/spark-class org.apache.spark.deploy.ExternalShuffleService 1"] \ No newline at end of file +ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.ExternalShuffleService", "1" ] From 2cbd6fc023342ead6aa6184b3991d88b2dc7505d Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 1 Jun 2017 12:05:16 -0700 Subject: [PATCH 122/225] Allow custom executor labels and annotations (#321) * Allow custom executor labels and annotations * Address comments. * Fix scalastyle. --- docs/running-on-kubernetes.md | 17 ++++++++++ .../spark/deploy/kubernetes/config.scala | 16 ++++++++++ .../deploy/kubernetes/submit/Client.scala | 31 ++++--------------- .../KubernetesClusterSchedulerBackend.scala | 26 ++++++++++++++-- 4 files changed, 62 insertions(+), 28 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index b18987f6af4a4..488efbe5eef36 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -476,6 +476,23 @@ from the other deployment modes. See the [configuration page](configuration.html pairs, where each annotation is in the format key=value. + + spark.kubernetes.executor.labels + (none) + + Custom labels that will be added to the executor pods. This should be a comma-separated list of label key-value + pairs, where each label is in the format key=value. Note that Spark also adds its own labels to the + executor pods for bookkeeping purposes. + + + + spark.kubernetes.executor.annotations + (none) + + Custom annotations that will be added to the executor pods. This should be a comma-separated list of annotation + key-value pairs, where each annotation is in the format key=value. + + spark.kubernetes.driver.pod.name (none) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index c892b01314975..d1341b15afaca 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -211,6 +211,22 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_EXECUTOR_LABELS = + ConfigBuilder("spark.kubernetes.executor.labels") + .doc("Custom labels that will be added to the executor pods. This should be a" + + " comma-separated list of label key-value pairs, where each label is in the format" + + " key=value.") + .stringConf + .createOptional + + private[spark] val KUBERNETES_EXECUTOR_ANNOTATIONS = + ConfigBuilder("spark.kubernetes.executor.annotations") + .doc("Custom annotations that will be added to the executor pods. This should be a" + + " comma-separated list of annotation key-value pairs, where each annotation is in the" + + " format key=value.") + .stringConf + .createOptional + private[spark] val KUBERNETES_DRIVER_POD_NAME = ConfigBuilder("spark.kubernetes.driver.pod.name") .doc("Name of the driver pod.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index a8029a28009c2..743ec9d7707e0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -22,7 +22,8 @@ import java.util.Collections import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder} import scala.collection.JavaConverters._ -import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.ConfigurationUtils import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl @@ -75,18 +76,16 @@ private[spark] class Client( def run(): Unit = { validateNoDuplicateFileNames(sparkJars) validateNoDuplicateFileNames(sparkFiles) - val parsedCustomLabels = parseKeyValuePairs(customLabels, KUBERNETES_DRIVER_LABELS.key, - "labels") + val parsedCustomLabels = ConfigurationUtils.parseKeyValuePairs( + customLabels, KUBERNETES_DRIVER_LABELS.key, "labels") require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") require(!parsedCustomLabels.contains(SPARK_APP_NAME_LABEL), s"Label with key" + s" $SPARK_APP_NAME_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") val allLabels = parsedCustomLabels ++ Map(SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_APP_NAME_LABEL -> appName) - val parsedCustomAnnotations = parseKeyValuePairs( - customAnnotations, - KUBERNETES_DRIVER_ANNOTATIONS.key, - "annotations") + val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( + customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") Utils.tryWithResource(kubernetesClientProvider.get) { kubernetesClient => val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => new EnvVarBuilder() @@ -237,24 +236,6 @@ private[spark] class Client( s" file name $fileName is shared by all of these URIs: $urisWithFileName") } } - - private def parseKeyValuePairs( - maybeKeyValues: Option[String], - configKey: String, - keyValueType: String): Map[String, String] = { - maybeKeyValues.map(keyValues => { - keyValues.split(",").map(_.trim).filterNot(_.isEmpty).map(keyValue => { - keyValue.split("=", 2).toSeq match { - case Seq(k, v) => - (k, v) - case _ => - throw new SparkException(s"Custom $keyValueType set by $configKey must be a" + - s" comma-separated list of key-value pairs, with format =." + - s" Got value: $keyValue. All values: $keyValues") - } - }).toMap - }).getOrElse(Map.empty[String, String]) - } } private[spark] object Client { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 5627f7c20de3d..7fcfa36a771fb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -55,6 +55,23 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorExtraClasspath = conf.get( org.apache.spark.internal.config.EXECUTOR_CLASS_PATH) private val executorJarsDownloadDir = conf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) + + private val executorLabels = ConfigurationUtils.parseKeyValuePairs( + conf.get(KUBERNETES_EXECUTOR_LABELS), + KUBERNETES_EXECUTOR_LABELS.key, + "executor labels") + require( + !executorLabels.contains(SPARK_APP_ID_LABEL), + s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is reserved for Spark.") + require( + !executorLabels.contains(SPARK_EXECUTOR_ID_LABEL), + s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it is reserved for" + + s" Spark.") + private val executorAnnotations = ConfigurationUtils.parseKeyValuePairs( + conf.get(KUBERNETES_EXECUTOR_ANNOTATIONS), + KUBERNETES_EXECUTOR_ANNOTATIONS.key, + "executor annotations") + private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) @@ -250,8 +267,10 @@ private[spark] class KubernetesClusterSchedulerBackend( // executorId and applicationId val hostname = name.substring(Math.max(0, name.length - 63)) - val selectors = Map(SPARK_EXECUTOR_ID_LABEL -> executorId, - SPARK_APP_ID_LABEL -> applicationId()).asJava + val resolvedExecutorLabels = Map( + SPARK_EXECUTOR_ID_LABEL -> executorId, + SPARK_APP_ID_LABEL -> applicationId()) ++ + executorLabels val executorMemoryQuantity = new QuantityBuilder(false) .withAmount(s"${executorMemoryMb}M") .build() @@ -300,7 +319,8 @@ private[spark] class KubernetesClusterSchedulerBackend( val basePodBuilder = new PodBuilder() .withNewMetadata() .withName(name) - .withLabels(selectors) + .withLabels(resolvedExecutorLabels.asJava) + .withAnnotations(executorAnnotations.asJava) .withOwnerReferences() .addNewOwnerReference() .withController(true) From 5be593812518e36e98329e2a9d36c45fbd2f94dc Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Fri, 2 Jun 2017 11:59:43 -0700 Subject: [PATCH 123/225] Dynamic allocation, cleanup in case of driver death (#319) * Adding cleanup for shuffle service for driver death * Address comments + fix tests * Cleanly open and close resources. * Added unit test, reusing RegisterDriver * lint + fix mesos --- .../KubernetesExternalShuffleClient.java | 79 ++++++++ .../mesos/MesosExternalShuffleClient.java | 2 +- .../protocol/BlockTransferMessage.java | 1 - .../protocol/{mesos => }/RegisterDriver.java | 5 +- conf/kubernetes-shuffle-service.yaml | 3 +- .../KubernetesExternalShuffleService.scala | 179 ++++++++++++++++++ .../spark/deploy/kubernetes/constants.scala | 1 + .../deploy/kubernetes/submit/Client.scala | 7 +- .../DriverPodKubernetesClientProvider.scala | 10 +- .../KubernetesClusterSchedulerBackend.scala | 36 +++- .../kubernetes/submit/ClientV2Suite.scala | 34 +++- .../main/docker/shuffle-service/Dockerfile | 2 +- .../mesos/MesosExternalShuffleService.scala | 3 +- 13 files changed, 343 insertions(+), 19 deletions(-) create mode 100644 common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java rename common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/{mesos => }/RegisterDriver.java (91%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java new file mode 100644 index 0000000000000..49cb5243e32dc --- /dev/null +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.network.shuffle.kubernetes; + +import org.apache.spark.network.client.RpcResponseCallback; +import org.apache.spark.network.client.TransportClient; +import org.apache.spark.network.sasl.SecretKeyHolder; +import org.apache.spark.network.shuffle.ExternalShuffleClient; +import org.apache.spark.network.shuffle.protocol.RegisterDriver; +import org.apache.spark.network.util.TransportConf; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.ByteBuffer; + +/** + * A client for talking to the external shuffle service in Kubernetes cluster mode. + * + * This is used by the each Spark executor to register with a corresponding external + * shuffle service on the cluster. The purpose is for cleaning up shuffle files + * reliably if the application exits unexpectedly. + */ +public class KubernetesExternalShuffleClient extends ExternalShuffleClient { + private static final Logger logger = LoggerFactory + .getLogger(KubernetesExternalShuffleClient.class); + + /** + * Creates an Kubernetes external shuffle client that wraps the {@link ExternalShuffleClient}. + * Please refer to docs on {@link ExternalShuffleClient} for more information. + */ + public KubernetesExternalShuffleClient( + TransportConf conf, + SecretKeyHolder secretKeyHolder, + boolean saslEnabled, + boolean saslEncryptionEnabled) { + super(conf, secretKeyHolder, saslEnabled, saslEncryptionEnabled); + } + + public void registerDriverWithShuffleService(String host, int port) throws IOException { + checkInit(); + ByteBuffer registerDriver = new RegisterDriver(appId, 0).toByteBuffer(); + TransportClient client = clientFactory.createClient(host, port); + client.sendRpc(registerDriver, new RegisterDriverCallback()); + } + + private class RegisterDriverCallback implements RpcResponseCallback { + @Override + public void onSuccess(ByteBuffer response) { + logger.info("Successfully registered app " + appId + " with external shuffle service."); + } + + @Override + public void onFailure(Throwable e) { + logger.warn("Unable to register app " + appId + " with external shuffle service. " + + "Please manually remove shuffle data after driver exit. Error: " + e); + } + } + + @Override + public void close() { + super.close(); + } +} diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java index dbc1010847fb1..2b317fec11146 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/mesos/MesosExternalShuffleClient.java @@ -32,7 +32,7 @@ import org.apache.spark.network.client.TransportClient; import org.apache.spark.network.sasl.SecretKeyHolder; import org.apache.spark.network.shuffle.ExternalShuffleClient; -import org.apache.spark.network.shuffle.protocol.mesos.RegisterDriver; +import org.apache.spark.network.shuffle.protocol.RegisterDriver; import org.apache.spark.network.util.TransportConf; /** diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java index 9af6759f5d5f3..6012a84599368 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/BlockTransferMessage.java @@ -23,7 +23,6 @@ import io.netty.buffer.Unpooled; import org.apache.spark.network.protocol.Encodable; -import org.apache.spark.network.shuffle.protocol.mesos.RegisterDriver; import org.apache.spark.network.shuffle.protocol.mesos.ShuffleServiceHeartbeat; /** diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/mesos/RegisterDriver.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterDriver.java similarity index 91% rename from common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/mesos/RegisterDriver.java rename to common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterDriver.java index d5f53ccb7f741..ac606e6539f3e 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/mesos/RegisterDriver.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/protocol/RegisterDriver.java @@ -15,19 +15,18 @@ * limitations under the License. */ -package org.apache.spark.network.shuffle.protocol.mesos; +package org.apache.spark.network.shuffle.protocol; import com.google.common.base.Objects; import io.netty.buffer.ByteBuf; import org.apache.spark.network.protocol.Encoders; -import org.apache.spark.network.shuffle.protocol.BlockTransferMessage; // Needed by ScalaDoc. See SPARK-7726 import static org.apache.spark.network.shuffle.protocol.BlockTransferMessage.Type; /** - * A message sent from the driver to register with the MesosExternalShuffleService. + * A message sent from the driver to register with an ExternalShuffleService. */ public class RegisterDriver extends BlockTransferMessage { private final String appId; diff --git a/conf/kubernetes-shuffle-service.yaml b/conf/kubernetes-shuffle-service.yaml index 3aeb1f54f301c..c0cc310cf4755 100644 --- a/conf/kubernetes-shuffle-service.yaml +++ b/conf/kubernetes-shuffle-service.yaml @@ -38,7 +38,8 @@ spec: # This is an official image that is built # from the dockerfiles/shuffle directory # in the spark distribution. - image: kubespark/spark-shuffle:v2.1.0-kubernetes-0.1.0-alpha.3 + image: spark-shuffle:latest + imagePullPolicy: IfNotPresent volumeMounts: - mountPath: '/tmp' name: temp-volume diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala new file mode 100644 index 0000000000000..94292dae10f29 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.spark.deploy.kubernetes + +import java.nio.ByteBuffer + +import io.fabric8.kubernetes.api.model.Pod +import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.Watcher.Action +import org.apache.commons.io.IOUtils +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.{SecurityManager, SparkConf} +import org.apache.spark.deploy.ExternalShuffleService +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.internal.Logging +import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} +import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler +import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, RegisterDriver} +import org.apache.spark.network.util.TransportConf +import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientProvider + +/** + * An RPC endpoint that receives registration requests from Spark drivers running on Kubernetes. + * It detects driver termination and calls the cleanup callback to [[ExternalShuffleService]]. + */ +private[spark] class KubernetesShuffleBlockHandler ( + transportConf: TransportConf, + kubernetesClientProvider: DriverPodKubernetesClientProvider) + extends ExternalShuffleBlockHandler(transportConf, null) with Logging { + + private val INIT_AND_STOP_LOCK = new Object + private val CONNECTED_APPS_LOCK = new Object + private val connectedApps = mutable.Set.empty[String] + private var shuffleWatch: Option[Watch] = None + private var kubernetesClient: Option[KubernetesClient] = None + + def start(): Unit = INIT_AND_STOP_LOCK.synchronized { + val client = kubernetesClientProvider.get + shuffleWatch = startShuffleWatcher(client) + kubernetesClient = Some(client) + } + + override def close(): Unit = { + try { + super.close() + } finally { + INIT_AND_STOP_LOCK.synchronized { + shuffleWatch.foreach(IOUtils.closeQuietly) + shuffleWatch = None + kubernetesClient.foreach(IOUtils.closeQuietly) + kubernetesClient = None + } + } + } + + protected override def handleMessage( + message: BlockTransferMessage, + client: TransportClient, + callback: RpcResponseCallback): Unit = { + message match { + case RegisterDriverParam(appId) => + val address = client.getSocketAddress + logInfo(s"Received registration request from app $appId (remote address $address).") + CONNECTED_APPS_LOCK.synchronized { + if (connectedApps.contains(appId)) { + logWarning(s"Received a registration request from app $appId, but it was already " + + s"registered") + } + connectedApps += appId + } + callback.onSuccess(ByteBuffer.allocate(0)) + case _ => super.handleMessage(message, client, callback) + } + } + + private def startShuffleWatcher(client: KubernetesClient): Option[Watch] = { + try { + Some(client + .pods() + .withLabels(Map(SPARK_ROLE_LABEL -> "driver").asJava) + .watch(new Watcher[Pod] { + override def eventReceived(action: Watcher.Action, p: Pod): Unit = { + action match { + case Action.DELETED | Action.ERROR => + val labels = p.getMetadata.getLabels + if (labels.containsKey(SPARK_APP_ID_LABEL)) { + val appId = labels.get(SPARK_APP_ID_LABEL) + CONNECTED_APPS_LOCK.synchronized { + if (connectedApps.contains(appId)) { + connectedApps -= appId + applicationRemoved(appId, true) + } + } + } + case Action.ADDED | Action.MODIFIED => + } + } + + override def onClose(e: KubernetesClientException): Unit = {} + })) + } catch { + case throwable: Throwable => + logWarning(s"Shuffle service cannot access Kubernetes. " + + s"Orphaned file cleanup is disabled.", throwable) + None + } + } + + /** An extractor object for matching [[RegisterDriver]] message. */ + private object RegisterDriverParam { + def unapply(r: RegisterDriver): Option[(String)] = + Some(r.getAppId) + } +} + +/** + * A wrapper of [[ExternalShuffleService]] that provides an additional endpoint for drivers + * to associate with. This allows the shuffle service to detect when a driver is terminated + * and can clean up the associated shuffle files. + */ +private[spark] class KubernetesExternalShuffleService( + conf: SparkConf, + securityManager: SecurityManager, + kubernetesClientProvider: DriverPodKubernetesClientProvider) + extends ExternalShuffleService(conf, securityManager) { + + private var shuffleBlockHandlers: mutable.Buffer[KubernetesShuffleBlockHandler] = _ + protected override def newShuffleBlockHandler( + tConf: TransportConf): ExternalShuffleBlockHandler = { + val newBlockHandler = new KubernetesShuffleBlockHandler(tConf, kubernetesClientProvider) + newBlockHandler.start() + + // TODO: figure out a better way of doing this. + // This is necessary because the constructor is not called + // when this class is initialized through ExternalShuffleService. + if (shuffleBlockHandlers == null) { + shuffleBlockHandlers = mutable.Buffer.empty[KubernetesShuffleBlockHandler] + } + shuffleBlockHandlers += newBlockHandler + newBlockHandler + } + + override def stop(): Unit = { + try { + super.stop() + } finally { + shuffleBlockHandlers.foreach(_.close()) + } + } +} + +private[spark] object KubernetesExternalShuffleService extends Logging { + def main(args: Array[String]): Unit = { + ExternalShuffleService.main(args, + (conf: SparkConf, sm: SecurityManager) => { + val kubernetesClientProvider = new DriverPodKubernetesClientProvider(conf) + new KubernetesExternalShuffleService(conf, sm, kubernetesClientProvider) + }) + } +} + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 950c1f6efe4e8..e267c9ff7e1d1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -22,6 +22,7 @@ package object constants { private[spark] val SPARK_APP_ID_LABEL = "spark-app-id" private[spark] val SPARK_APP_NAME_LABEL = "spark-app-name" private[spark] val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id" + private[spark] val SPARK_ROLE_LABEL = "spark-role" // Credentials secrets private[spark] val DRIVER_CREDENTIALS_SECRETS_BASE_DIR = diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 743ec9d7707e0..dc8a6da45495e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -82,10 +82,13 @@ private[spark] class Client( s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") require(!parsedCustomLabels.contains(SPARK_APP_NAME_LABEL), s"Label with key" + s" $SPARK_APP_NAME_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") - val allLabels = parsedCustomLabels ++ - Map(SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_APP_NAME_LABEL -> appName) + val allLabels = parsedCustomLabels ++ Map( + SPARK_APP_ID_LABEL -> kubernetesAppId, + SPARK_APP_NAME_LABEL -> appName, + SPARK_ROLE_LABEL -> "driver") val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") + Utils.tryWithResource(kubernetesClientProvider.get) { kubernetesClient => val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => new EnvVarBuilder() diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala index 50f2c218c22c4..cc2032219f885 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala @@ -29,7 +29,10 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.util.ThreadUtils -private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, namespace: String) { +private[spark] class DriverPodKubernetesClientProvider( + sparkConf: SparkConf, + namespace: Option[String] = None) { + private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) private val SERVICE_ACCOUNT_CA_CERT = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) private val oauthTokenFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) @@ -45,7 +48,10 @@ private[spark] class DriverPodKubernetesClientProvider(sparkConf: SparkConf, nam val baseClientConfigBuilder = new ConfigBuilder() .withApiVersion("v1") .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) - .withNamespace(namespace) + + // Build a namespaced client if specified. + val namespacedClientConfigBuilder = namespace + .map(baseClientConfigBuilder.withNamespace(_)).getOrElse(baseClientConfigBuilder) val configBuilder = oauthTokenFile .orElse(caCertFile) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 7fcfa36a771fb..257cee80fdea9 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -32,6 +32,8 @@ import org.apache.spark.{SparkContext, SparkEnv, SparkException} import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.network.netty.SparkTransportConf +import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} import org.apache.spark.scheduler.TaskSchedulerImpl import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RetrieveSparkAppConfig, SparkAppConfig} @@ -100,8 +102,8 @@ private[spark] class KubernetesClusterSchedulerBackend( private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) - private val kubernetesClient = new DriverPodKubernetesClientProvider(conf, kubernetesNamespace) - .get + private val kubernetesClient = new DriverPodKubernetesClientProvider(conf, + Some(kubernetesNamespace)).get private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). @@ -134,6 +136,15 @@ private[spark] class KubernetesClusterSchedulerBackend( None } + // A client for talking to the external shuffle service + private val kubernetesExternalShuffleClient: Option[KubernetesExternalShuffleClient] = { + if (Utils.isDynamicAllocationEnabled(sc.conf)) { + Some(getShuffleClient()) + } else { + None + } + } + override val minRegisteredRatio = if (conf.getOption("spark.scheduler.minRegisteredResourcesRatio").isEmpty) { 0.8 @@ -183,6 +194,14 @@ private[spark] class KubernetesClusterSchedulerBackend( } } + private def getShuffleClient(): KubernetesExternalShuffleClient = { + new KubernetesExternalShuffleClient( + SparkTransportConf.fromSparkConf(conf, "shuffle"), + sc.env.securityManager, + sc.env.securityManager.isAuthenticationEnabled(), + sc.env.securityManager.isSaslEncryptionEnabled()) + } + private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = { if (Utils.isDynamicAllocationEnabled(conf)) { val minNumExecutors = conf.getInt("spark.dynamicAllocation.minExecutors", 0) @@ -220,6 +239,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .map { config => new ShufflePodCache( kubernetesClient, config.shuffleNamespace, config.shuffleLabels) } shufflePodCache.foreach(_.start()) + kubernetesExternalShuffleClient.foreach(_.init(applicationId())) } } @@ -227,6 +247,7 @@ private[spark] class KubernetesClusterSchedulerBackend( // stop allocation of new resources and caches. allocator.shutdown() shufflePodCache.foreach(_.stop()) + kubernetesExternalShuffleClient.foreach(_.close()) // send stop message to executors so they shut down cleanly super.stop() @@ -266,10 +287,10 @@ private[spark] class KubernetesClusterSchedulerBackend( // name as the hostname. This preserves uniqueness since the end of name contains // executorId and applicationId val hostname = name.substring(Math.max(0, name.length - 63)) - val resolvedExecutorLabels = Map( SPARK_EXECUTOR_ID_LABEL -> executorId, - SPARK_APP_ID_LABEL -> applicationId()) ++ + SPARK_APP_ID_LABEL -> applicationId(), + SPARK_ROLE_LABEL -> "executor") ++ executorLabels val executorMemoryQuantity = new QuantityBuilder(false) .withAmount(s"${executorMemoryMb}M") @@ -444,6 +465,8 @@ private[spark] class KubernetesClusterSchedulerBackend( rpcEnv: RpcEnv, sparkProperties: Seq[(String, String)]) extends DriverEndpoint(rpcEnv, sparkProperties) { + private val externalShufflePort = conf.getInt("spark.shuffle.service.port", 7337) + override def receiveAndReply( context: RpcCallContext): PartialFunction[Any, Unit] = { new PartialFunction[Any, Unit]() { @@ -466,6 +489,11 @@ private[spark] class KubernetesClusterSchedulerBackend( .get() val nodeName = runningExecutorPod.getSpec.getNodeName val shufflePodIp = shufflePodCache.get.getShufflePodForExecutor(nodeName) + + // Inform the shuffle pod about this application so it can watch. + kubernetesExternalShuffleClient.foreach( + _.registerDriverWithShuffleService(shufflePodIp, externalShufflePort)) + resolvedProperties = resolvedProperties ++ Seq( (SPARK_SHUFFLE_SERVICE_HOST.key, shufflePodIp)) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index d4d3882bb8bab..ff6c710117318 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -31,10 +31,13 @@ import org.scalatest.BeforeAndAfter import scala.collection.JavaConverters._ import scala.collection.mutable -import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.SparkPodInitContainerBootstrap +import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.{KubernetesExternalShuffleService, KubernetesShuffleBlockHandler, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.network.netty.SparkTransportConf +import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient +import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientProvider class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") @@ -49,7 +52,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val ALL_EXPECTED_LABELS = Map( CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, SPARK_APP_ID_LABEL -> APP_ID, - SPARK_APP_NAME_LABEL -> APP_NAME) + SPARK_APP_NAME_LABEL -> APP_NAME, + SPARK_ROLE_LABEL -> "driver") private val CUSTOM_ANNOTATION_KEY = "customAnnotation" private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" @@ -305,6 +309,30 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verify(loggingPodStatusWatcher).awaitCompletion() } + test("Run kubernetes shuffle service.") { + expectationsForNoMountedCredentials() + expectationsForNoDependencyUploader() + + val shuffleService = new KubernetesExternalShuffleService( + SPARK_CONF, + new SecurityManager(SPARK_CONF), + new DriverPodKubernetesClientProvider(SPARK_CONF)) + + val shuffleClient = new KubernetesExternalShuffleClient( + SparkTransportConf.fromSparkConf(SPARK_CONF, "shuffle"), + new SecurityManager(SPARK_CONF), + false, + false) + + shuffleService.start() + shuffleClient.init("newapp") + + // verifies that we can connect to the shuffle service and send + // it a message. + shuffleClient.registerDriverWithShuffleService("localhost", 7337) + shuffleService.stop() + } + private def expectationsForNoDependencyUploader(): Unit = { when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index 06aac56ba2f52..1f64376b89aae 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -36,4 +36,4 @@ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark -ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.ExternalShuffleService", "1" ] +ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.kubernetes.KubernetesExternalShuffleService", "1" ] diff --git a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosExternalShuffleService.scala b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosExternalShuffleService.scala index 859aa836a3157..cbb03c7d3b1d6 100644 --- a/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosExternalShuffleService.scala +++ b/resource-managers/mesos/src/main/scala/org/apache/spark/deploy/mesos/MesosExternalShuffleService.scala @@ -29,7 +29,8 @@ import org.apache.spark.internal.Logging import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler import org.apache.spark.network.shuffle.protocol.BlockTransferMessage -import org.apache.spark.network.shuffle.protocol.mesos.{RegisterDriver, ShuffleServiceHeartbeat} +import org.apache.spark.network.shuffle.protocol.RegisterDriver +import org.apache.spark.network.shuffle.protocol.mesos.ShuffleServiceHeartbeat import org.apache.spark.network.util.TransportConf import org.apache.spark.util.ThreadUtils From 6610cd3004c785e11f01c3d623dcc593f4ba0d86 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Fri, 2 Jun 2017 16:08:10 -0700 Subject: [PATCH 124/225] Fix client to await the driver pod (#325) --- .../deploy/kubernetes/submit/LoggingPodStatusWatcher.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala index 1633a084e463c..4a8a7308b9fe4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/LoggingPodStatusWatcher.scala @@ -137,7 +137,7 @@ private[kubernetes] class LoggingPodStatusWatcherImpl( } override def awaitCompletion(): Unit = { - podCompletedFuture.countDown() + podCompletedFuture.await() logInfo(pod.map { p => s"Container final statuses:\n\n${containersDescription(p)}" }.getOrElse("No containers were found in the driver pod.")) From 04ff1d8e1229ac73f2e4af0b578672061fbd6871 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 2 Jun 2017 20:25:03 -0700 Subject: [PATCH 125/225] Clean up resources that are not used by pods. (#305) * Clean up resources that are not used by pods. * Make client side send correct credentials. * Simplify cleanup logic. Cancellation is no longer instantaneous and we might clean up a little later than the given TTL. However, the tradeoff is a simpler implementation with clearer contracts about when things will and will not be cleaned up. * Remove class * Fix imports and line length. * Remove import. * Add a unit test for StagingResourcesStore. * Revamp cleanup process. - Delete resources immediately when owners do not exist - Delete resources if after they are first uploaded, they are not accessed for a certain period of time. - Resource owners are more specifically defined and can have a type (currently only uses pods) * Clarify log messages * Use a single set of credentials in resource staging server. Also refactors construction of Kubernetes Clients to unify the code paths. * Fix unit test. * Safe close if creating shuffle block handler fails * Use implicit class. * Address comments. * Fix broken test. --- docs/running-on-kubernetes.md | 63 ++++ .../KubernetesExternalShuffleService.scala | 64 ++-- .../SparkKubernetesClientFactory.scala | 103 ++++++ .../spark/deploy/kubernetes/config.scala | 173 ++++----- .../deploy/kubernetes/submit/Client.scala | 329 +++++++++--------- ...riverInitContainerComponentsProvider.scala | 4 +- ...riverPodKubernetesCredentialsMounter.scala | 71 ++-- ...KubernetesCredentialsMounterProvider.scala | 12 +- ...iverPodKubernetesCredentialsProvider.scala | 33 +- .../SubmissionKubernetesClientProvider.scala | 55 --- .../SubmittedDependencyUploaderImpl.scala | 30 +- ...SparkDependencyDownloadInitContainer.scala | 7 - .../kubernetes/ResourceStagingServer.scala | 31 +- .../kubernetes/ResourceStagingService.scala | 19 +- .../ResourceStagingServiceImpl.scala | 52 +-- .../ResourceStagingServiceRetrofit.scala | 6 +- .../rest/kubernetes/StagedResources.scala | 24 ++ .../kubernetes/StagedResourcesCleaner.scala | 150 ++++++++ .../kubernetes/StagedResourcesOwner.scala | 34 ++ .../kubernetes/StagedResourcesStore.scala | 108 ++++++ .../DriverPodKubernetesClientProvider.scala | 103 ------ .../kubernetes/KubernetesClusterManager.scala | 16 +- .../KubernetesClusterSchedulerBackend.scala | 8 +- .../kubernetes/submit/ClientV2Suite.scala | 32 +- ...PodKubernetesCredentialsMounterSuite.scala | 12 +- ...ubernetesExternalShuffleServiceSuite.scala | 49 +++ .../SubmittedDependencyUploaderSuite.scala | 74 ++-- .../ResourceStagingServerSuite.scala | 37 +- .../ResourceStagingServiceImplSuite.scala | 60 ---- .../StagedResourcesCleanerSuite.scala | 149 ++++++++ .../StagedResourcesStoreSuite.scala | 86 +++++ .../integrationtest/KubernetesSuite.scala | 9 +- 32 files changed, 1242 insertions(+), 761 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkKubernetesClientFactory.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResources.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleaner.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesOwner.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStore.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleanerSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStoreSuite.scala diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 488efbe5eef36..e9002bdfe0502 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -450,6 +450,69 @@ from the other deployment modes. See the [configuration page](configuration.html client cert file, and/or OAuth token. + + spark.kubernetes.authenticate.resourceStagingServer.caCertFile + (none) + + Path to the CA cert file for connecting to the Kubernetes API server over TLS from the resource staging server when + it monitors objects in determining when to clean up resource bundles. + + + + spark.kubernetes.authenticate.resourceStagingServer.clientKeyFile + (none) + + Path to the client key file for authenticating against the Kubernetes API server from the resource staging server + when it monitors objects in determining when to clean up resource bundles. The resource staging server must have + credentials that allow it to view API objects in any namespace. + + + + spark.kubernetes.authenticate.resourceStagingServer.clientCertFile + (none) + + Path to the client cert file for authenticating against the Kubernetes API server from the resource staging server + when it monitors objects in determining when to clean up resource bundles. The resource staging server must have + credentials that allow it to view API objects in any namespace. + + + + spark.kubernetes.authenticate.resourceStagingServer.oauthToken + (none) + + OAuth token value for authenticating against the Kubernetes API server from the resource staging server + when it monitors objects in determining when to clean up resource bundles. The resource staging server must have + credentials that allow it to view API objects in any namespace. Note that this cannot be set at the same time as + spark.kubernetes.authenticate.resourceStagingServer.oauthTokenFile. + + + + spark.kubernetes.authenticate.resourceStagingServer.oauthTokenFile + (none) + + File containing the OAuth token to use when authenticating against the against the Kubernetes API server from the + resource staging server, when it monitors objects in determining when to clean up resource bundles. The resource + staging server must have credentials that allow it to view API objects in any namespace. Note that this cannot be + set at the same time as spark.kubernetes.authenticate.resourceStagingServer.oauthToken. + + + + spark.kubernetes.authenticate.resourceStagingServer.useServiceAccountCredentials + true + + Whether or not to use a service account token and a service account CA certificate when the resource staging server + authenticates to Kubernetes. If this is set, interactions with Kubernetes will authenticate using a token located at + /var/run/secrets/kubernetes.io/serviceaccount/token and the CA certificate located at + /var/run/secrets/kubernetes.io/serviceaccount/ca.crt. Note that if + spark.kubernetes.authenticate.resourceStagingServer.oauthTokenFile is set, it takes precedence + over the usage of the service account token file. Also, if + spark.kubernetes.authenticate.resourceStagingServer.caCertFile is set, it takes precedence over using + the service account's CA certificate file. This generally should be set to true (the default value) when the + resource staging server is deployed as a Kubernetes pod, but should be set to false if the resource staging server + is deployed by other means (i.e. when running the staging server process outside of Kubernetes). The resource + staging server must have credentials that allow it to view API objects in any namespace. + + spark.kubernetes.executor.memoryOverhead executorMemory * 0.10, with minimum of 384 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala index 94292dae10f29..01a8a9a6899fd 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala @@ -17,10 +17,11 @@ package org.apache.spark.deploy.kubernetes +import java.io.File import java.nio.ByteBuffer import io.fabric8.kubernetes.api.model.Pod -import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watch, Watcher} +import io.fabric8.kubernetes.client.{Config, KubernetesClient, KubernetesClientException, Watch, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.IOUtils import scala.collection.JavaConverters._ @@ -28,13 +29,13 @@ import scala.collection.mutable import org.apache.spark.{SecurityManager, SparkConf} import org.apache.spark.deploy.ExternalShuffleService +import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.internal.Logging import org.apache.spark.network.client.{RpcResponseCallback, TransportClient} import org.apache.spark.network.shuffle.ExternalShuffleBlockHandler import org.apache.spark.network.shuffle.protocol.{BlockTransferMessage, RegisterDriver} import org.apache.spark.network.util.TransportConf -import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientProvider /** * An RPC endpoint that receives registration requests from Spark drivers running on Kubernetes. @@ -42,19 +43,16 @@ import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientPr */ private[spark] class KubernetesShuffleBlockHandler ( transportConf: TransportConf, - kubernetesClientProvider: DriverPodKubernetesClientProvider) + kubernetesClient: KubernetesClient) extends ExternalShuffleBlockHandler(transportConf, null) with Logging { private val INIT_AND_STOP_LOCK = new Object private val CONNECTED_APPS_LOCK = new Object private val connectedApps = mutable.Set.empty[String] private var shuffleWatch: Option[Watch] = None - private var kubernetesClient: Option[KubernetesClient] = None def start(): Unit = INIT_AND_STOP_LOCK.synchronized { - val client = kubernetesClientProvider.get - shuffleWatch = startShuffleWatcher(client) - kubernetesClient = Some(client) + shuffleWatch = startShuffleWatcher() } override def close(): Unit = { @@ -64,8 +62,7 @@ private[spark] class KubernetesShuffleBlockHandler ( INIT_AND_STOP_LOCK.synchronized { shuffleWatch.foreach(IOUtils.closeQuietly) shuffleWatch = None - kubernetesClient.foreach(IOUtils.closeQuietly) - kubernetesClient = None + IOUtils.closeQuietly(kubernetesClient) } } } @@ -90,9 +87,9 @@ private[spark] class KubernetesShuffleBlockHandler ( } } - private def startShuffleWatcher(client: KubernetesClient): Option[Watch] = { + private def startShuffleWatcher(): Option[Watch] = { try { - Some(client + Some(kubernetesClient .pods() .withLabels(Map(SPARK_ROLE_LABEL -> "driver").asJava) .watch(new Watcher[Pod] { @@ -137,31 +134,47 @@ private[spark] class KubernetesShuffleBlockHandler ( */ private[spark] class KubernetesExternalShuffleService( conf: SparkConf, - securityManager: SecurityManager, - kubernetesClientProvider: DriverPodKubernetesClientProvider) + securityManager: SecurityManager) extends ExternalShuffleService(conf, securityManager) { private var shuffleBlockHandlers: mutable.Buffer[KubernetesShuffleBlockHandler] = _ protected override def newShuffleBlockHandler( tConf: TransportConf): ExternalShuffleBlockHandler = { - val newBlockHandler = new KubernetesShuffleBlockHandler(tConf, kubernetesClientProvider) - newBlockHandler.start() - - // TODO: figure out a better way of doing this. - // This is necessary because the constructor is not called - // when this class is initialized through ExternalShuffleService. - if (shuffleBlockHandlers == null) { + val kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient( + conf.get(KUBERNETES_SHUFFLE_APISERVER_URI), + None, + APISERVER_AUTH_SHUFFLE_SERVICE_CONF_PREFIX, + conf, + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)) + .filter( _ => conf.get(KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS)), + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH)) + .filter( _ => conf.get(KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS))) + val newBlockHandler = new KubernetesShuffleBlockHandler(tConf, kubernetesClient) + try { + newBlockHandler.start() + // TODO: figure out a better way of doing this. + // This is necessary because the constructor is not called + // when this class is initialized through ExternalShuffleService. + if (shuffleBlockHandlers == null) { shuffleBlockHandlers = mutable.Buffer.empty[KubernetesShuffleBlockHandler] + } + shuffleBlockHandlers += newBlockHandler + newBlockHandler + } catch { + case e: Throwable => + logError("Failed to create Kubernetes shuffle block handler.", e) + newBlockHandler.close() + throw e } - shuffleBlockHandlers += newBlockHandler - newBlockHandler } override def stop(): Unit = { try { super.stop() } finally { - shuffleBlockHandlers.foreach(_.close()) + if (shuffleBlockHandlers != null) { + shuffleBlockHandlers.foreach(_.close()) + } } } } @@ -169,10 +182,7 @@ private[spark] class KubernetesExternalShuffleService( private[spark] object KubernetesExternalShuffleService extends Logging { def main(args: Array[String]): Unit = { ExternalShuffleService.main(args, - (conf: SparkConf, sm: SecurityManager) => { - val kubernetesClientProvider = new DriverPodKubernetesClientProvider(conf) - new KubernetesExternalShuffleService(conf, sm, kubernetesClientProvider) - }) + (conf: SparkConf, sm: SecurityManager) => new KubernetesExternalShuffleService(conf, sm)) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkKubernetesClientFactory.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkKubernetesClientFactory.scala new file mode 100644 index 0000000000000..d2729a2db2fa0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkKubernetesClientFactory.scala @@ -0,0 +1,103 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.Files +import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient, KubernetesClient} +import io.fabric8.kubernetes.client.utils.HttpClientUtils +import okhttp3.Dispatcher + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.util.ThreadUtils + +/** + * Spark-opinionated builder for Kubernetes clients. It uses a prefix plus common suffixes to + * parse configuration keys, similar to the manner in which Spark's SecurityManager parses SSL + * options for different components. + */ +private[spark] object SparkKubernetesClientFactory { + + def createKubernetesClient( + master: String, + namespace: Option[String], + kubernetesAuthConfPrefix: String, + sparkConf: SparkConf, + maybeServiceAccountToken: Option[File], + maybeServiceAccountCaCert: Option[File]): KubernetesClient = { + val oauthTokenFileConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_FILE_CONF_SUFFIX" + val oauthTokenConf = s"$kubernetesAuthConfPrefix.$OAUTH_TOKEN_CONF_SUFFIX" + val oauthTokenFile = sparkConf.getOption(oauthTokenFileConf) + .map(new File(_)) + .orElse(maybeServiceAccountToken) + val oauthTokenValue = sparkConf.getOption(oauthTokenConf) + OptionRequirements.requireNandDefined( + oauthTokenFile, + oauthTokenValue, + s"Cannot specify OAuth token through both a file $oauthTokenFileConf and a" + + s" value $oauthTokenConf.") + + val caCertFile = sparkConf + .getOption(s"$kubernetesAuthConfPrefix.$CA_CERT_FILE_CONF_SUFFIX") + .orElse(maybeServiceAccountCaCert.map(_.getAbsolutePath)) + val clientKeyFile = sparkConf + .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_KEY_FILE_CONF_SUFFIX") + val clientCertFile = sparkConf + .getOption(s"$kubernetesAuthConfPrefix.$CLIENT_CERT_FILE_CONF_SUFFIX") + val dispatcher = new Dispatcher( + ThreadUtils.newDaemonCachedThreadPool("kubernetes-dispatcher")) + val config = new ConfigBuilder() + .withApiVersion("v1") + .withMasterUrl(master) + .withWebsocketPingInterval(0) + .withOption(oauthTokenValue) { + (token, configBuilder) => configBuilder.withOauthToken(token) + }.withOption(oauthTokenFile) { + (file, configBuilder) => + configBuilder.withOauthToken(Files.toString(file, Charsets.UTF_8)) + }.withOption(caCertFile) { + (file, configBuilder) => configBuilder.withCaCertFile(file) + }.withOption(clientKeyFile) { + (file, configBuilder) => configBuilder.withClientKeyFile(file) + }.withOption(clientCertFile) { + (file, configBuilder) => configBuilder.withClientCertFile(file) + }.withOption(namespace) { + (ns, configBuilder) => configBuilder.withNamespace(ns) + }.build() + val baseHttpClient = HttpClientUtils.createHttpClient(config) + val httpClientWithCustomDispatcher = baseHttpClient.newBuilder() + .dispatcher(dispatcher) + .build() + new DefaultKubernetesClient(httpClientWithCustomDispatcher, config) + } + + private implicit class OptionConfigurableConfigBuilder(configBuilder: ConfigBuilder) { + + def withOption[T] + (option: Option[T]) + (configurator: ((T, ConfigBuilder) => ConfigBuilder)): OptionConfigurableConfigBuilder = { + new OptionConfigurableConfigBuilder(option.map { opt => + configurator(opt, configBuilder) + }.getOrElse(configBuilder)) + } + + def build(): Config = configBuilder.build() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index d1341b15afaca..dd99e0f7a5ae0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -47,120 +47,32 @@ package object config extends Logging { .stringConf .createWithDefault(s"spark-executor:$sparkVersion") - private val APISERVER_SUBMIT_CONF_PREFIX = "spark.kubernetes.authenticate.submission" - private val APISERVER_DRIVER_CONF_PREFIX = "spark.kubernetes.authenticate.driver" - - private[spark] val KUBERNETES_SUBMIT_CA_CERT_FILE = - ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.caCertFile") - .doc("Path to the CA cert file for connecting to Kubernetes over SSL when creating" + - " Kubernetes resources for the driver. This file should be located on the submitting" + - " machine's disk.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_SUBMIT_CLIENT_KEY_FILE = - ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.clientKeyFile") - .doc("Path to the client key file for authenticating against the Kubernetes API server" + - " when initially creating Kubernetes resources for the driver. This file should be" + - " located on the submitting machine's disk.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_SUBMIT_CLIENT_CERT_FILE = - ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.clientCertFile") - .doc("Path to the client cert file for authenticating against the Kubernetes API server" + - " when initially creating Kubernetes resources for the driver. This file should be" + - " located on the submitting machine's disk.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_SUBMIT_OAUTH_TOKEN = - ConfigBuilder(s"$APISERVER_SUBMIT_CONF_PREFIX.oauthToken") - .doc("OAuth token to use when authenticating against the against the Kubernetes API server" + - " when initially creating Kubernetes resources for the driver. Note that unlike the other" + - " authentication options, this should be the exact string value of the token to use for" + - " the authentication.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_CA_CERT_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.caCertFile") - .doc("Path to the CA cert file for connecting to Kubernetes over TLS from the driver pod" + - " when requesting executors. This file should be located on the submitting machine's disk" + - " and will be uploaded to the driver pod.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_CLIENT_KEY_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.clientKeyFile") - .doc("Path to the client key file for authenticating against the Kubernetes API server from" + - " the driver pod when requesting executors. This file should be located on the submitting" + - " machine's disk, and will be uploaded to the driver pod.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_CLIENT_CERT_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.clientCertFile") - .doc("Path to the client cert file for authenticating against the Kubernetes API server" + - " from the driver pod when requesting executors. This file should be located on the" + - " submitting machine's disk, and will be uploaded to the driver pod.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_OAUTH_TOKEN = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.oauthToken") - .doc("OAuth token to use when authenticating against the Kubernetes API server from the" + - " driver pod when requesting executors. Note that unlike the other authentication options" + - " this should be the exact string value of the token to use for the authentication. This" + - " token value is mounted as a secret on the driver pod.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.caCertFile") - .doc("Path on the driver pod's disk containing the CA cert file to use when authenticating" + - " against Kubernetes. Typically this is configured by spark-submit from mounting a" + - " secret from the submitting machine into the pod, and hence this configuration is marked" + - " as internal, but this can also be set manually to use a certificate that is mounted" + - " into the driver pod via other means.") - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientKeyFile") - .doc("Path on the driver pod's disk containing the client key file to use when" + - " authenticating against Kubernetes. Typically this is configured by spark-submit from" + - " mounting a secret from the submitting machine into the pod, and hence this" + - " configuration is marked as internal, but this can also be set manually to" + - " use a key file that is mounted into the driver pod via other means.") - .internal() - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.clientCertFile") - .doc("Path on the driver pod's disk containing the client cert file to use when" + - " authenticating against Kubernetes. Typically this is configured by spark-submit from" + - " mounting a secret from the submitting machine into the pod, and hence this" + - " configuration is marked as internal, but this can also be set manually to" + - " use a certificate that is mounted into the driver pod via other means.") - .internal() - .stringConf - .createOptional - - private[spark] val KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.mounted.oauthTokenFile") - .doc("Path on the driver pod's disk containing the OAuth token file to use when" + - " authenticating against Kubernetes. Typically this is configured by spark-submit from" + - " mounting a secret from the submitting machine into the pod, and hence this" + - " configuration is marked as internal, but this can also be set manually to" + - " use a token that is mounted into the driver pod via other means.") - .internal() - .stringConf - .createOptional + private[spark] val APISERVER_AUTH_SUBMISSION_CONF_PREFIX = + "spark.kubernetes.authenticate.submission" + private[spark] val APISERVER_AUTH_DRIVER_CONF_PREFIX = + "spark.kubernetes.authenticate.driver" + private[spark] val APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX = + "spark.kubernetes.authenticate.driver.mounted" + private[spark] val APISERVER_AUTH_RESOURCE_STAGING_SERVER_CONF_PREFIX = + "spark.kubernetes.authenticate.resourceStagingServer" + private[spark] val APISERVER_AUTH_SHUFFLE_SERVICE_CONF_PREFIX = + "spark.kubernetes.authenticate.shuffleService" + private[spark] val OAUTH_TOKEN_CONF_SUFFIX = "oauthToken" + private[spark] val OAUTH_TOKEN_FILE_CONF_SUFFIX = "oauthTokenFile" + private[spark] val CLIENT_KEY_FILE_CONF_SUFFIX = "clientKeyFile" + private[spark] val CLIENT_CERT_FILE_CONF_SUFFIX = "clientCertFile" + private[spark] val CA_CERT_FILE_CONF_SUFFIX = "caCertFile" + + private[spark] val RESOURCE_STAGING_SERVER_USE_SERVICE_ACCOUNT_CREDENTIALS = + ConfigBuilder( + s"$APISERVER_AUTH_RESOURCE_STAGING_SERVER_CONF_PREFIX.useServiceAccountCredentials") + .doc("Use a service account token and CA certificate in the resource staging server to" + + " watch the API server's objects.") + .booleanConf + .createWithDefault(true) private[spark] val KUBERNETES_SERVICE_ACCOUNT_NAME = - ConfigBuilder(s"$APISERVER_DRIVER_CONF_PREFIX.serviceAccountName") + ConfigBuilder(s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.serviceAccountName") .doc("Service account that is used when running the driver pod. The driver pod uses" + " this service account when requesting executor pods from the API server. If specific" + " credentials are given for the driver pod to use, the driver will favor" + @@ -259,6 +171,19 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_SHUFFLE_APISERVER_URI = + ConfigBuilder("spark.kubernetes.shuffle.apiServer.url") + .doc("URL to the Kubernetes API server that the shuffle service will monitor for Spark pods.") + .stringConf + .createWithDefault(KUBERNETES_MASTER_INTERNAL_URL) + + private[spark] val KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS = + ConfigBuilder(s"$APISERVER_AUTH_SHUFFLE_SERVICE_CONF_PREFIX.useServiceAccountCredentials") + .doc("Whether or not to use service account credentials when contacting the API server from" + + " the shuffle service.") + .booleanConf + .createWithDefault(true) + private[spark] val KUBERNETES_ALLOCATION_BATCH_SIZE = ConfigBuilder("spark.kubernetes.allocation.batch.size") .doc("Number of pods to launch at once in each round of dynamic allocation. ") @@ -285,12 +210,36 @@ package object config extends Logging { .createWithDefaultString("1s") // Spark resource staging server. + private[spark] val RESOURCE_STAGING_SERVER_API_SERVER_URL = + ConfigBuilder("spark.kubernetes.resourceStagingServer.apiServer.url") + .doc("URL for the Kubernetes API server. The resource staging server monitors the API" + + " server to check when pods no longer are using mounted resources. Note that this isn't" + + " to be used in Spark applications, as the API server URL should be set via spark.master.") + .stringConf + .createWithDefault(KUBERNETES_MASTER_INTERNAL_URL) + + private[spark] val RESOURCE_STAGING_SERVER_API_SERVER_CA_CERT_FILE = + ConfigBuilder("spark.kubernetes.resourceStagingServer.apiServer.caCertFile") + .doc("CA certificate for the resource staging server to use when contacting the Kubernetes" + + " API server over TLS.") + .stringConf + .createOptional + private[spark] val RESOURCE_STAGING_SERVER_PORT = ConfigBuilder("spark.kubernetes.resourceStagingServer.port") .doc("Port for the Kubernetes resource staging server to listen on.") .intConf .createWithDefault(10000) + private[spark] val RESOURCE_STAGING_SERVER_INITIAL_ACCESS_EXPIRATION_TIMEOUT = + ConfigBuilder("spark.kubernetes.resourceStagingServer.initialAccessExpirationTimeout") + .doc("The resource staging server will wait for any resource bundle to be accessed for a" + + " first time for this period. If this timeout expires before the resources are accessed" + + " the first time, the resources are cleaned up under the assumption that the dependents" + + " of the given resource bundle failed to launch at all.") + .timeConf(TimeUnit.MILLISECONDS) + .createWithDefaultString("30m") + private[spark] val RESOURCE_STAGING_SERVER_KEY_PEM = ConfigBuilder("spark.ssl.kubernetes.resourceStagingServer.keyPem") .doc("Key PEM file to use when having the Kubernetes dependency server listen on TLS.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index dc8a6da45495e..85dac3df57b4c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -20,10 +20,11 @@ import java.io.File import java.util.Collections import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder} +import io.fabric8.kubernetes.client.KubernetesClient import scala.collection.JavaConverters._ -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.ConfigurationUtils +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkKubernetesClientFactory} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl @@ -42,18 +43,18 @@ import org.apache.spark.util.Utils * where different steps of submission should be factored out into separate classes. */ private[spark] class Client( - appName: String, - kubernetesAppId: String, - mainClass: String, - sparkConf: SparkConf, - appArgs: Array[String], - sparkJars: Seq[String], - sparkFiles: Seq[String], - waitForAppCompletion: Boolean, - kubernetesClientProvider: SubmissionKubernetesClientProvider, - initContainerComponentsProvider: DriverInitContainerComponentsProvider, - kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, - loggingPodStatusWatcher: LoggingPodStatusWatcher) + appName: String, + kubernetesAppId: String, + mainClass: String, + sparkConf: SparkConf, + appArgs: Array[String], + sparkJars: Seq[String], + sparkFiles: Seq[String], + waitForAppCompletion: Boolean, + kubernetesClient: KubernetesClient, + initContainerComponentsProvider: DriverInitContainerComponentsProvider, + kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, + loggingPodStatusWatcher: LoggingPodStatusWatcher) extends Logging { private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) @@ -89,142 +90,134 @@ private[spark] class Client( val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") - Utils.tryWithResource(kubernetesClientProvider.get) { kubernetesClient => - val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => - new EnvVarBuilder() - .withName(ENV_SUBMIT_EXTRA_CLASSPATH) - .withValue(classPath) - .build() - } - val driverContainer = new ContainerBuilder() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") - .addToEnv(driverExtraClasspathEnv.toSeq: _*) - .addNewEnv() - .withName(ENV_DRIVER_MEMORY) - .withValue(driverContainerMemoryWithOverhead + "m") - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_MAIN_CLASS) - .withValue(mainClass) - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_ARGS) - .withValue(appArgs.mkString(" ")) - .endEnv() + val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => + new EnvVarBuilder() + .withName(ENV_SUBMIT_EXTRA_CLASSPATH) + .withValue(classPath) .build() - val basePod = new PodBuilder() - .withNewMetadata() - .withName(kubernetesDriverPodName) - .addToLabels(allLabels.asJava) - .addToAnnotations(parsedCustomAnnotations.asJava) - .endMetadata() - .withNewSpec() - .withRestartPolicy("Never") - .addToContainers(driverContainer) - .endSpec() + } + val driverContainer = new ContainerBuilder() + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy("IfNotPresent") + .addToEnv(driverExtraClasspathEnv.toSeq: _*) + .addNewEnv() + .withName(ENV_DRIVER_MEMORY) + .withValue(driverContainerMemoryWithOverhead + "m") + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_MAIN_CLASS) + .withValue(mainClass) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_ARGS) + .withValue(appArgs.mkString(" ")) + .endEnv() + .build() + val basePod = new PodBuilder() + .withNewMetadata() + .withName(kubernetesDriverPodName) + .addToLabels(allLabels.asJava) + .addToAnnotations(parsedCustomAnnotations.asJava) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Never") + .addToContainers(driverContainer) + .endSpec() - val maybeSubmittedDependencyUploader = initContainerComponentsProvider + val maybeSubmittedDependencyUploader = initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(allLabels) - val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => - SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) - } - val maybeSecretBuilder = initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceIdentifiers.map(_.secrets())) - val maybeSubmittedDependenciesSecret = maybeSecretBuilder.map(_.build()) - val initContainerConfigMap = initContainerComponentsProvider + val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => + SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) + } + val maybeSecretBuilder = initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceIdentifiers.map(_.secrets())) + val maybeSubmittedDependenciesSecret = maybeSecretBuilder.map(_.build()) + val initContainerConfigMap = initContainerComponentsProvider .provideInitContainerConfigMapBuilder(maybeSubmittedResourceIdentifiers.map(_.ids())) .build() - val podWithInitContainer = initContainerComponentsProvider + val podWithInitContainer = initContainerComponentsProvider .provideInitContainerBootstrap() .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod) - val containerLocalizedFilesResolver = initContainerComponentsProvider - .provideContainerLocalizedFilesResolver() - val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() - val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() + val containerLocalizedFilesResolver = initContainerComponentsProvider + .provideContainerLocalizedFilesResolver() + val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() + val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() - val executorInitContainerConfiguration = initContainerComponentsProvider - .provideExecutorInitContainerConfiguration() - val sparkConfWithExecutorInit = executorInitContainerConfiguration - .configureSparkConfForExecutorInitContainer(sparkConf) - val credentialsMounter = kubernetesCredentialsMounterProvider - .getDriverPodKubernetesCredentialsMounter() - val credentialsSecret = credentialsMounter.createCredentialsSecret() - val podWithInitContainerAndMountedCreds = credentialsMounter.mountDriverKubernetesCredentials( - podWithInitContainer, driverContainer.getName, credentialsSecret) - val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations( - sparkConfWithExecutorInit) - if (resolvedSparkJars.nonEmpty) { - resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) - } - if (resolvedSparkFiles.nonEmpty) { - resolvedSparkConf.set("spark.files", resolvedSparkFiles.mkString(",")) - } - resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) - resolvedSparkConf.set("spark.app.id", kubernetesAppId) - // We don't need this anymore since we just set the JVM options on the environment - resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) - resolvedSparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => - resolvedSparkConf.set(KUBERNETES_SUBMIT_OAUTH_TOKEN.key, "") - } - resolvedSparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => - resolvedSparkConf.set(KUBERNETES_DRIVER_OAUTH_TOKEN.key, "") - } - val resolvedLocalClasspath = containerLocalizedFilesResolver - .resolveSubmittedAndRemoteSparkJars() - val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { - case (confKey, confValue) => s"-D$confKey=$confValue" - }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPod = podWithInitContainerAndMountedCreds.editSpec() - .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) - .addNewEnv() - .withName(ENV_MOUNTED_CLASSPATH) - .withValue(resolvedLocalClasspath.mkString(File.pathSeparator)) - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_JAVA_OPTS) - .withValue(resolvedDriverJavaOpts) - .endEnv() - .endContainer() - .endSpec() - .build() - Utils.tryWithResource( - kubernetesClient - .pods() - .withName(resolvedDriverPod.getMetadata.getName) - .watch(loggingPodStatusWatcher)) { _ => - val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) - try { - val driverOwnedResources = Seq(initContainerConfigMap) ++ - maybeSubmittedDependenciesSecret.toSeq ++ - credentialsSecret.toSeq - val driverPodOwnerReference = new OwnerReferenceBuilder() - .withName(createdDriverPod.getMetadata.getName) - .withApiVersion(createdDriverPod.getApiVersion) - .withUid(createdDriverPod.getMetadata.getUid) - .withKind(createdDriverPod.getKind) - .withController(true) - .build() - driverOwnedResources.foreach { resource => - val originalMetadata = resource.getMetadata - originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) - } - kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() - } catch { - case e: Throwable => - kubernetesClient.pods().delete(createdDriverPod) - throw e - } - if (waitForAppCompletion) { - logInfo(s"Waiting for application $kubernetesAppId to finish...") - loggingPodStatusWatcher.awaitCompletion() - logInfo(s"Application $kubernetesAppId finished.") - } else { - logInfo(s"Deployed Spark application $kubernetesAppId into Kubernetes.") + val executorInitContainerConfiguration = initContainerComponentsProvider + .provideExecutorInitContainerConfiguration() + val sparkConfWithExecutorInit = executorInitContainerConfiguration + .configureSparkConfForExecutorInitContainer(sparkConf) + val credentialsMounter = kubernetesCredentialsMounterProvider + .getDriverPodKubernetesCredentialsMounter() + val credentialsSecret = credentialsMounter.createCredentialsSecret() + val podWithInitContainerAndMountedCreds = credentialsMounter.mountDriverKubernetesCredentials( + podWithInitContainer, driverContainer.getName, credentialsSecret) + val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations( + sparkConfWithExecutorInit) + if (resolvedSparkJars.nonEmpty) { + resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) + } + if (resolvedSparkFiles.nonEmpty) { + resolvedSparkConf.set("spark.files", resolvedSparkFiles.mkString(",")) + } + resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) + resolvedSparkConf.set("spark.app.id", kubernetesAppId) + // We don't need this anymore since we just set the JVM options on the environment + resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + val resolvedLocalClasspath = containerLocalizedFilesResolver + .resolveSubmittedAndRemoteSparkJars() + val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { + case (confKey, confValue) => s"-D$confKey=$confValue" + }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") + val resolvedDriverPod = podWithInitContainerAndMountedCreds.editSpec() + .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) + .addNewEnv() + .withName(ENV_MOUNTED_CLASSPATH) + .withValue(resolvedLocalClasspath.mkString(File.pathSeparator)) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_JAVA_OPTS) + .withValue(resolvedDriverJavaOpts) + .endEnv() + .endContainer() + .endSpec() + .build() + Utils.tryWithResource( + kubernetesClient + .pods() + .withName(resolvedDriverPod.getMetadata.getName) + .watch(loggingPodStatusWatcher)) { _ => + val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) + try { + val driverOwnedResources = Seq(initContainerConfigMap) ++ + maybeSubmittedDependenciesSecret.toSeq ++ + credentialsSecret.toSeq + val driverPodOwnerReference = new OwnerReferenceBuilder() + .withName(createdDriverPod.getMetadata.getName) + .withApiVersion(createdDriverPod.getApiVersion) + .withUid(createdDriverPod.getMetadata.getUid) + .withKind(createdDriverPod.getKind) + .withController(true) + .build() + driverOwnedResources.foreach { resource => + val originalMetadata = resource.getMetadata + originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) } + kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() + } catch { + case e: Throwable => + kubernetesClient.pods().delete(createdDriverPod) + throw e + } + if (waitForAppCompletion) { + logInfo(s"Waiting for application $kubernetesAppId to finish...") + loggingPodStatusWatcher.awaitCompletion() + logInfo(s"Application $kubernetesAppId finished.") + } else { + logInfo(s"Deployed Spark application $kubernetesAppId into Kubernetes.") } } } @@ -268,27 +261,43 @@ private[spark] object Client { val appName = sparkConf.getOption("spark.app.name") .getOrElse("spark") val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + val master = resolveK8sMaster(sparkConf.get("spark.master")) val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( - sparkConf, kubernetesAppId, sparkJars, sparkFiles, sslOptionsProvider.getSslOptions) - val kubernetesClientProvider = new SubmissionKubernetesClientProviderImpl(sparkConf) - val kubernetesCredentialsMounterProvider = - new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) - val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) - val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)).filter( _ => waitForAppCompletion) - val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl(kubernetesAppId, loggingInterval) - new Client( - appName, - kubernetesAppId, - mainClass, - sparkConf, - appArgs, - sparkJars, - sparkFiles, - waitForAppCompletion, - kubernetesClientProvider, - initContainerComponentsProvider, - kubernetesCredentialsMounterProvider, - loggingPodStatusWatcher).run() + sparkConf, + kubernetesAppId, + namespace, + sparkJars, + sparkFiles, + sslOptionsProvider.getSslOptions) + Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient( + master, + Some(namespace), + APISERVER_AUTH_SUBMISSION_CONF_PREFIX, + sparkConf, + None, + None)) { kubernetesClient => + val kubernetesCredentialsMounterProvider = + new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) + val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) + val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)) + .filter( _ => waitForAppCompletion) + val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl( + kubernetesAppId, loggingInterval) + new Client( + appName, + kubernetesAppId, + mainClass, + sparkConf, + appArgs, + sparkJars, + sparkFiles, + waitForAppCompletion, + kubernetesClient, + initContainerComponentsProvider, + kubernetesCredentialsMounterProvider, + loggingPodStatusWatcher).run() + } } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index 7fbb0c9274bf5..ccb349c5b2988 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.deploy.kubernetes.submit +import java.io.File + import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ @@ -46,6 +48,7 @@ private[spark] trait DriverInitContainerComponentsProvider { private[spark] class DriverInitContainerComponentsProviderImpl( sparkConf: SparkConf, kubernetesAppId: String, + namespace: String, sparkJars: Seq[String], sparkFiles: Seq[String], resourceStagingServerExternalSslOptions: SSLOptions) @@ -98,7 +101,6 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val maybeSecretName = maybeResourceStagingServerUri.map { _ => s"$kubernetesAppId-init-secret" } - private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) private val configMapName = s"$kubernetesAppId-init-config" private val configMapKey = s"$kubernetesAppId-init-config-key" private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala index ded0237732ce0..b13800f389605 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala @@ -53,41 +53,50 @@ private[spark] trait DriverPodKubernetesCredentialsMounter { } private[spark] class DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId: String, - submitterLocalDriverPodKubernetesCredentials: KubernetesCredentials, - maybeUserSpecifiedMountedClientKeyFile: Option[String], - maybeUserSpecifiedMountedClientCertFile: Option[String], - maybeUserSpecifiedMountedOAuthTokenFile: Option[String], - maybeUserSpecifiedMountedCaCertFile: Option[String]) + kubernetesAppId: String, + submitterLocalDriverPodKubernetesCredentials: KubernetesCredentials, + maybeUserSpecifiedMountedClientKeyFile: Option[String], + maybeUserSpecifiedMountedClientCertFile: Option[String], + maybeUserSpecifiedMountedOAuthTokenFile: Option[String], + maybeUserSpecifiedMountedCaCertFile: Option[String]) extends DriverPodKubernetesCredentialsMounter { override def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf = { val resolvedMountedClientKeyFile = resolveSecretLocation( - maybeUserSpecifiedMountedClientKeyFile, - submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, - DRIVER_CREDENTIALS_CLIENT_KEY_PATH) + maybeUserSpecifiedMountedClientKeyFile, + submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_PATH) val resolvedMountedClientCertFile = resolveSecretLocation( - maybeUserSpecifiedMountedClientCertFile, - submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, - DRIVER_CREDENTIALS_CLIENT_CERT_PATH) + maybeUserSpecifiedMountedClientCertFile, + submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_PATH) val resolvedMountedCaCertFile = resolveSecretLocation( - maybeUserSpecifiedMountedCaCertFile, - submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, - DRIVER_CREDENTIALS_CA_CERT_PATH) + maybeUserSpecifiedMountedCaCertFile, + submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_PATH) val resolvedMountedOAuthTokenFile = resolveSecretLocation( - maybeUserSpecifiedMountedOAuthTokenFile, - submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, - DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) + maybeUserSpecifiedMountedOAuthTokenFile, + submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) val sparkConfWithCredentialLocations = sparkConf.clone() - .setOption(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE, resolvedMountedCaCertFile) - .setOption(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE, resolvedMountedClientKeyFile) - .setOption(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE, resolvedMountedClientCertFile) - .setOption(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN, resolvedMountedOAuthTokenFile) - sparkConfWithCredentialLocations.get(KUBERNETES_DRIVER_OAUTH_TOKEN).foreach { _ => - sparkConfWithCredentialLocations.set(KUBERNETES_DRIVER_OAUTH_TOKEN, "") - } - sparkConfWithCredentialLocations.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { _ => - sparkConfWithCredentialLocations.set(KUBERNETES_SUBMIT_OAUTH_TOKEN, "") + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + resolvedMountedCaCertFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + resolvedMountedClientKeyFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + resolvedMountedClientCertFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX", + resolvedMountedOAuthTokenFile) + // Redact all OAuth token values + sparkConfWithCredentialLocations + .getAll + .filter(_._1.endsWith(OAUTH_TOKEN_CONF_SUFFIX)).map(_._1) + .foreach { + sparkConfWithCredentialLocations.set(_, "") } sparkConfWithCredentialLocations } @@ -141,9 +150,9 @@ private[spark] class DriverPodKubernetesCredentialsMounterImpl( } private def resolveSecretLocation( - mountedUserSpecified: Option[String], - valueMountedFromSubmitter: Option[String], - mountedCanonicalLocation: String): Option[String] = { + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + mountedCanonicalLocation: String): Option[String] = { mountedUserSpecified.orElse(valueMountedFromSubmitter.map( _ => { mountedCanonicalLocation })) @@ -167,7 +176,7 @@ private[spark] class DriverPodKubernetesCredentialsMounterImpl( } private class OptionSettableSparkConf(sparkConf: SparkConf) { - def setOption[T](configEntry: OptionalConfigEntry[T], option: Option[T]): SparkConf = { + def setOption(configEntry: String, option: Option[String]): SparkConf = { option.map( opt => { sparkConf.set(configEntry, opt) }).getOrElse(sparkConf) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala index 3f0e7d97275a5..913279198146a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala @@ -37,9 +37,13 @@ private[spark] class DriverPodKubernetesCredentialsMounterProviderImpl( new DriverPodKubernetesCredentialsMounterImpl( kubernetesAppId, submitterLocalDriverPodKubernetesCredentials, - sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE), - sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE), - sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN), - sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE)) + sparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX"), + sparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX"), + sparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX"), + sparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX")) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala index 404741520c059..41b0cf8ceaeab 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala @@ -29,25 +29,20 @@ import org.apache.spark.internal.config.OptionalConfigEntry private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf) { def get(): KubernetesCredentials = { - sparkConf.get(KUBERNETES_SERVICE_ACCOUNT_NAME).foreach { _ => - require(sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).isEmpty, - "Cannot specify both a service account and a driver pod OAuth token.") - require(sparkConf.get(KUBERNETES_DRIVER_CA_CERT_FILE).isEmpty, - "Cannot specify both a service account and a driver pod CA cert file.") - require(sparkConf.get(KUBERNETES_DRIVER_CLIENT_KEY_FILE).isEmpty, - "Cannot specify both a service account and a driver pod client key file.") - require(sparkConf.get(KUBERNETES_DRIVER_CLIENT_CERT_FILE).isEmpty, - "Cannot specify both a service account and a driver pod client cert file.") - } - val oauthTokenBase64 = sparkConf.get(KUBERNETES_DRIVER_OAUTH_TOKEN).map { token => + val oauthTokenBase64 = sparkConf + .getOption(s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX") + .map { token => BaseEncoding.base64().encode(token.getBytes(Charsets.UTF_8)) } - val caCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CA_CERT_FILE, - s"Driver CA cert file provided at %s does not exist or is not a file.") - val clientKeyDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_KEY_FILE, - s"Driver client key file provided at %s does not exist or is not a file.") - val clientCertDataBase64 = safeFileConfToBase64(KUBERNETES_DRIVER_CLIENT_CERT_FILE, - s"Driver client cert file provided at %s does not exist or is not a file.") + val caCertDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + s"Driver CA cert file provided at %s does not exist or is not a file.") + val clientKeyDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + s"Driver client key file provided at %s does not exist or is not a file.") + val clientCertDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + s"Driver client cert file provided at %s does not exist or is not a file.") KubernetesCredentials( oauthTokenBase64 = oauthTokenBase64, caCertDataBase64 = caCertDataBase64, @@ -56,9 +51,9 @@ private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf } private def safeFileConfToBase64( - conf: OptionalConfigEntry[String], + conf: String, fileNotFoundFormatString: String): Option[String] = { - sparkConf.get(conf) + sparkConf.getOption(conf) .map(new File(_)) .map { file => require(file.isFile, String.format(fileNotFoundFormatString, file.getAbsolutePath)) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala deleted file mode 100644 index 17b61d4a6ace0..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmissionKubernetesClientProvider.scala +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.client.{ConfigBuilder, DefaultKubernetesClient, KubernetesClient} - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.internal.Logging - -trait SubmissionKubernetesClientProvider { - def get: KubernetesClient -} - -private[spark] class SubmissionKubernetesClientProviderImpl(sparkConf: SparkConf) - extends SubmissionKubernetesClientProvider with Logging { - - private val namespace = sparkConf.get(KUBERNETES_NAMESPACE) - private val master = resolveK8sMaster(sparkConf.get("spark.master")) - - override def get: KubernetesClient = { - var k8ConfBuilder = new ConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(master) - .withNamespace(namespace) - sparkConf.get(KUBERNETES_SUBMIT_CA_CERT_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withCaCertFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_CLIENT_KEY_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withClientKeyFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_CLIENT_CERT_FILE).foreach { - f => k8ConfBuilder = k8ConfBuilder.withClientCertFile(f) - } - sparkConf.get(KUBERNETES_SUBMIT_OAUTH_TOKEN).foreach { token => - k8ConfBuilder = k8ConfBuilder.withOauthToken(token) - } - val k8ClientConfig = k8ConfBuilder.build - new DefaultKubernetesClient(k8ClientConfig) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala index 9d0d863d174bc..a891cf3904d2d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala @@ -21,12 +21,14 @@ import javax.ws.rs.core.MediaType import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} import okhttp3.RequestBody import retrofit2.Call import org.apache.spark.{SparkException, SSLOptions} -import org.apache.spark.deploy.kubernetes.{CompressionUtils, KubernetesCredentials} -import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.deploy.kubernetes.CompressionUtils +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourcesOwner, StagedResourcesOwnerType} import org.apache.spark.util.Utils private[spark] trait SubmittedDependencyUploader { @@ -76,29 +78,23 @@ private[spark] class SubmittedDependencyUploaderImpl( Utils.tryWithResource(new FileOutputStream(filesTgz)) { filesOutputStream => CompressionUtils.writeTarGzipToStream(filesOutputStream, files.map(_.getAbsolutePath)) } - // TODO provide credentials properly when the staging server monitors the Kubernetes API. - val kubernetesCredentialsString = OBJECT_MAPPER.writer() - .writeValueAsString(KubernetesCredentials(None, None, None, None)) - val labelsAsString = OBJECT_MAPPER.writer().writeValueAsString(podLabels) + val stagedResourcesOwner = StagedResourcesOwner( + ownerNamespace = podNamespace, + ownerLabels = podLabels, + ownerType = StagedResourcesOwnerType.Pod) + val stagedResourcesOwnerString = OBJECT_MAPPER.writeValueAsString(stagedResourcesOwner) + val stagedResourcesOwnerBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), stagedResourcesOwnerString) val filesRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) - - val kubernetesCredentialsBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) - - val namespaceRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), podNamespace) - - val labelsRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsAsString) + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), filesTgz) val service = retrofitClientFactory.createRetrofitClient( stagingServerUri, classOf[ResourceStagingServiceRetrofit], stagingServiceSslOptions) val uploadResponse = service.uploadResources( - labelsRequestBody, namespaceRequestBody, filesRequestBody, kubernetesCredentialsBody) + resources = filesRequestBody, resourcesOwner = stagedResourcesOwnerBody) getTypedResponseResult(uploadResponse) } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala index 9bdc224f10c90..ac19c2463218b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala @@ -204,13 +204,6 @@ private class FileFetcherImpl(sparkConf: SparkConf, securityManager: SparkSecuri } } -private case class StagedResources( - resourceSecret: String, - podLabels: Map[String, String], - podNamespace: String, - resourcesFile: File, - kubernetesCredentials: KubernetesCredentials) - object KubernetesSparkDependencyDownloadInitContainer extends Logging { def main(args: Array[String]): Unit = { logInfo("Starting init-container to download Spark application dependencies.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala index 34594ba518b62..0b97317eba8b1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServer.scala @@ -21,6 +21,7 @@ import java.io.File import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.jaxrs.json.JacksonJaxbJsonProvider import com.fasterxml.jackson.module.scala.DefaultScalaModule +import io.fabric8.kubernetes.client.Config import org.eclipse.jetty.http.HttpVersion import org.eclipse.jetty.server.{HttpConfiguration, HttpConnectionFactory, Server, ServerConnector, SslConnectionFactory} import org.eclipse.jetty.servlet.{ServletContextHandler, ServletHolder} @@ -30,9 +31,10 @@ import org.glassfish.jersey.server.ResourceConfig import org.glassfish.jersey.servlet.ServletContainer import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.SparkKubernetesClientFactory import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.internal.Logging -import org.apache.spark.util.Utils +import org.apache.spark.util.{SystemClock, ThreadUtils, Utils} private[spark] class ResourceStagingServer( port: Int, @@ -98,8 +100,33 @@ object ResourceStagingServer { } else { new SparkConf(true) } + val apiServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_API_SERVER_URL) + val initialAccessExpirationMs = sparkConf.get( + RESOURCE_STAGING_SERVER_INITIAL_ACCESS_EXPIRATION_TIMEOUT) val dependenciesRootDir = Utils.createTempDir(namePrefix = "local-application-dependencies") - val serviceInstance = new ResourceStagingServiceImpl(dependenciesRootDir) + val useServiceAccountCredentials = sparkConf.get( + RESOURCE_STAGING_SERVER_USE_SERVICE_ACCOUNT_CREDENTIALS) + // Namespace doesn't matter because we list resources from various namespaces + val kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient( + apiServerUri, + None, + APISERVER_AUTH_RESOURCE_STAGING_SERVER_CONF_PREFIX, + sparkConf, + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)) + .filter( _ => useServiceAccountCredentials), + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH)) + .filter( _ => useServiceAccountCredentials)) + + val stagedResourcesStore = new StagedResourcesStoreImpl(dependenciesRootDir) + val stagedResourcesCleaner = new StagedResourcesCleanerImpl( + stagedResourcesStore, + kubernetesClient, + ThreadUtils.newDaemonSingleThreadScheduledExecutor("resource-expiration"), + new SystemClock(), + initialAccessExpirationMs) + stagedResourcesCleaner.start() + val serviceInstance = new ResourceStagingServiceImpl( + stagedResourcesStore, stagedResourcesCleaner) val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) val server = new ResourceStagingServer( port = sparkConf.get(RESOURCE_STAGING_SERVER_PORT), diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala index 525711e78c01c..b9d283a99ade9 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingService.scala @@ -52,13 +52,12 @@ private[spark] trait ResourceStagingService { * The tarball should contain the files laid out in a flat hierarchy, without * any directories. We take a stream here to avoid holding these entirely in * memory. - * @param podLabels Labels of pods to monitor. When no more pods are running with the given label, - * after some period of time, these dependencies will be cleared. - * @param podNamespace Namespace of pods to monitor. - * @param kubernetesCredentials These credentials are primarily used to monitor the progress of - * the application. When the application shuts down normally, shuts - * down abnormally and does not restart, or fails to start entirely, - * the data uploaded through this endpoint is cleared. + * @param resourcesOwner A description of the "owner" of a resource. A resource owner is a + * Kubernetes API object in a given namespace, with a specific set of + * labels. When there are no resources of the owner's type in the given + * namespace with the given labels, the resources are cleaned up. The owner + * bundle also includes any Kubernetes credentials that are required for + * resource staging server to watch the object's state over time. * @return A unique token that should be provided when retrieving these dependencies later. */ @POST @@ -66,10 +65,8 @@ private[spark] trait ResourceStagingService { @Produces(Array(MediaType.APPLICATION_JSON)) @Path("/resources") def uploadResources( - @FormDataParam("podLabels") podLabels: Map[String, String], - @FormDataParam("podNamespace") podNamespace: String, - @FormDataParam("resources") resources: InputStream, - @FormDataParam("kubernetesCredentials") kubernetesCredentials: KubernetesCredentials) + @FormDataParam("resources") resources: InputStream, + @FormDataParam("resourcesOwner") resourcesOwner: StagedResourcesOwner) : SubmittedResourceIdAndSecret /** diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala index abe956da9914d..7bc21c21619e1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImpl.scala @@ -31,58 +31,28 @@ import org.apache.spark.deploy.kubernetes.submit.SubmittedResourceIdAndSecret import org.apache.spark.internal.Logging import org.apache.spark.util.Utils -private[spark] class ResourceStagingServiceImpl(dependenciesRootDir: File) +private[spark] class ResourceStagingServiceImpl( + stagedResourcesStore: StagedResourcesStore, + stagedResourcesCleaner: StagedResourcesCleaner) extends ResourceStagingService with Logging { - private val SECURE_RANDOM = new SecureRandom() - // TODO clean up these resources based on the driver's lifecycle - private val stagedResources = TrieMap.empty[String, StagedResources] - override def uploadResources( - podLabels: Map[String, String], - podNamespace: String, resources: InputStream, - kubernetesCredentials: KubernetesCredentials): SubmittedResourceIdAndSecret = { - val resourceId = UUID.randomUUID().toString - val secretBytes = new Array[Byte](1024) - SECURE_RANDOM.nextBytes(secretBytes) - val resourceSecret = resourceId + "-" + BaseEncoding.base64().encode(secretBytes) - - val namespaceDir = new File(dependenciesRootDir, podNamespace) - val resourcesDir = new File(namespaceDir, resourceId) - try { - if (!resourcesDir.exists()) { - if (!resourcesDir.mkdirs()) { - throw new SparkException("Failed to create dependencies directory for application" + - s" at ${resourcesDir.getAbsolutePath}") - } - } - // TODO encrypt the written data with the secret. - val resourcesTgz = new File(resourcesDir, "resources.data") - Utils.tryWithResource(new FileOutputStream(resourcesTgz)) { ByteStreams.copy(resources, _) } - stagedResources(resourceId) = StagedResources( - resourceSecret, - podLabels, - podNamespace, - resourcesTgz, - kubernetesCredentials) - SubmittedResourceIdAndSecret(resourceId, resourceSecret) - } catch { - case e: Throwable => - if (!resourcesDir.delete()) { - logWarning(s"Failed to delete application directory $resourcesDir.") - } - throw e - } + resourcesOwner: StagedResourcesOwner): SubmittedResourceIdAndSecret = { + val stagedResources = stagedResourcesStore.addResources( + resourcesOwner.ownerNamespace, resources) + stagedResourcesCleaner.registerResourceForCleaning( + stagedResources.resourceId, resourcesOwner) + SubmittedResourceIdAndSecret(stagedResources.resourceId, stagedResources.resourceSecret) } override def downloadResources(resourceId: String, resourceSecret: String): StreamingOutput = { - val resource = stagedResources - .get(resourceId) + val resource = stagedResourcesStore.getResources(resourceId) .getOrElse(throw new NotFoundException(s"No resource bundle found with id $resourceId")) if (!resource.resourceSecret.equals(resourceSecret)) { throw new NotAuthorizedException(s"Unauthorized to download resource with id $resourceId") } + stagedResourcesCleaner.markResourceAsUsed(resourceId) new StreamingOutput { override def write(outputStream: OutputStream) = { Files.copy(resource.resourcesFile, outputStream) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala index c0da44838aba3..5fbf0f9c43970 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceRetrofit.scala @@ -31,11 +31,9 @@ private[spark] trait ResourceStagingServiceRetrofit { @Multipart @retrofit2.http.POST("api/v0/resources/") def uploadResources( - @retrofit2.http.Part("podLabels") podLabels: RequestBody, - @retrofit2.http.Part("podNamespace") podNamespace: RequestBody, @retrofit2.http.Part("resources") resources: RequestBody, - @retrofit2.http.Part("kubernetesCredentials") - kubernetesCredentials: RequestBody): Call[SubmittedResourceIdAndSecret] + @retrofit2.http.Part("resourcesOwner") resourcesOwner: RequestBody) + : Call[SubmittedResourceIdAndSecret] @Streaming @retrofit2.http.GET("api/v0/resources/{resourceId}") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResources.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResources.scala new file mode 100644 index 0000000000000..81f394800f803 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResources.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.File + +case class StagedResources( + resourceId: String, + resourceSecret: String, + resourcesFile: File) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleaner.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleaner.scala new file mode 100644 index 0000000000000..5d9db728483fa --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleaner.scala @@ -0,0 +1,150 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.util.concurrent.{ScheduledExecutorService, TimeUnit} + +import io.fabric8.kubernetes.client.KubernetesClient +import scala.collection.JavaConverters._ +import scala.collection.mutable + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging +import org.apache.spark.util.Clock + +private[spark] trait StagedResourcesCleaner { + + def start(): Unit + + def registerResourceForCleaning( + resourceId: String, stagedResourceOwner: StagedResourcesOwner): Unit + + def markResourceAsUsed(resourceId: String): Unit +} + +private class StagedResourcesCleanerImpl( + stagedResourcesStore: StagedResourcesStore, + kubernetesClient: KubernetesClient, + cleanupExecutorService: ScheduledExecutorService, + clock: Clock, + initialAccessExpirationMs: Long) + extends StagedResourcesCleaner { + + private val CLEANUP_INTERVAL_MS = 30000 + private val RESOURCE_LOCK = new Object() + private val activeResources = mutable.Map.empty[String, MonitoredResource] + private val unusedResources = mutable.Map.empty[String, UnusedMonitoredResource] + + override def start(): Unit = { + cleanupExecutorService.scheduleAtFixedRate( + new CleanupRunnable(), + CLEANUP_INTERVAL_MS, + CLEANUP_INTERVAL_MS, + TimeUnit.MILLISECONDS) + } + + override def registerResourceForCleaning( + resourceId: String, stagedResourceOwner: StagedResourcesOwner): Unit = { + RESOURCE_LOCK.synchronized { + unusedResources(resourceId) = UnusedMonitoredResource( + clock.getTimeMillis() + initialAccessExpirationMs, + MonitoredResource(resourceId, stagedResourceOwner)) + + } + } + + override def markResourceAsUsed(resourceId: String): Unit = RESOURCE_LOCK.synchronized { + val resource = unusedResources.remove(resourceId) + resource.foreach { res => + activeResources(resourceId) = res.resource + } + } + + private class CleanupRunnable extends Runnable with Logging { + + override def run(): Unit = { + // Make a copy so we can iterate through this while modifying + val activeResourcesCopy = RESOURCE_LOCK.synchronized { + Map.apply(activeResources.toSeq: _*) + } + for ((resourceId, resource) <- activeResourcesCopy) { + val namespace = kubernetesClient.namespaces() + .withName(resource.stagedResourceOwner.ownerNamespace) + .get() + if (namespace == null) { + logInfo(s"Resource files with id $resourceId is being removed. The owner's namespace" + + s" ${resource.stagedResourceOwner.ownerNamespace} was not found.") + stagedResourcesStore.removeResources(resourceId) + RESOURCE_LOCK.synchronized { + activeResources.remove(resourceId) + } + } else { + val metadataOperation = resource.stagedResourceOwner.ownerType match { + case StagedResourcesOwnerType.Pod => + kubernetesClient.pods().inNamespace(resource.stagedResourceOwner.ownerNamespace) + case _ => + throw new SparkException(s"Unsupported resource owner type for cleanup:" + + s" ${resource.stagedResourceOwner.ownerType}") + } + if (metadataOperation + .withLabels(resource.stagedResourceOwner.ownerLabels.asJava) + .list() + .getItems + .isEmpty) { + logInfo(s"Resource files with id $resourceId is being removed. Owners of the" + + s" resource with namespace: ${resource.stagedResourceOwner.ownerNamespace}," + + s" type: ${resource.stagedResourceOwner.ownerType}, and labels:" + + s" ${resource.stagedResourceOwner.ownerLabels} was not found on the API server.") + stagedResourcesStore.removeResources(resourceId) + RESOURCE_LOCK.synchronized { + activeResources.remove(resourceId) + } + } + } + } + + // Make a copy so we can iterate through this while modifying + val unusedResourcesCopy = RESOURCE_LOCK.synchronized { + Map.apply(unusedResources.toSeq: _*) + } + + for ((resourceId, resource) <- unusedResourcesCopy) { + if (resource.expiresAt < clock.getTimeMillis()) { + RESOURCE_LOCK.synchronized { + // Check for existence again here (via foreach) because in between the time we starting + // iterating over the unused resources copy, we might have already marked the resource + // as active in-between, and likely shouldn't remove the resources in such a case. + unusedResources.remove(resourceId).foreach { _ => + logInfo(s"Resources with id $resourceId was not accessed after being added to" + + s" the staging server at least $initialAccessExpirationMs ms ago. The resource" + + s" will be deleted.") + stagedResourcesStore.removeResources(resourceId) + } + } + } + } + } + } + + private case class MonitoredResource( + resourceId: String, + stagedResourceOwner: StagedResourcesOwner) + + private case class UnusedMonitoredResource(expiresAt: Long, resource: MonitoredResource) +} + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesOwner.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesOwner.scala new file mode 100644 index 0000000000000..4061bc36764d7 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesOwner.scala @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import com.fasterxml.jackson.core.`type`.TypeReference +import com.fasterxml.jackson.module.scala.JsonScalaEnumeration + +object StagedResourcesOwnerType extends Enumeration { + type OwnerType = Value + // In more generic scenarios, we might want to be watching Deployments, etc. + val Pod = Value +} + +class StagedResourcesOwnerTypeReference extends TypeReference[StagedResourcesOwnerType.type] + +case class StagedResourcesOwner( + ownerNamespace: String, + ownerLabels: Map[String, String], + @JsonScalaEnumeration(classOf[StagedResourcesOwnerTypeReference]) + ownerType: StagedResourcesOwnerType.OwnerType) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStore.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStore.scala new file mode 100644 index 0000000000000..0c0d428e035dc --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStore.scala @@ -0,0 +1,108 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.{File, FileOutputStream, InputStream, IOException} +import java.security.SecureRandom +import java.util.UUID + +import com.google.common.io.{BaseEncoding, ByteStreams} +import org.apache.commons.io.FileUtils +import scala.collection.concurrent.TrieMap + +import org.apache.spark.SparkException +import org.apache.spark.internal.Logging +import org.apache.spark.util.Utils + + +private[spark] trait StagedResourcesStore { + + /** + * Store the given stream on disk and return its resource ID and secret. + */ + def addResources( + podNamespace: String, + resources: InputStream): StagedResources + + /** + * Retrieve a resource bundle with the given id. Returns empty if no resources match this id. + */ + def getResources(resourceId: String): Option[StagedResources] + + def removeResources(resourceId: String): Unit +} + +private[spark] class StagedResourcesStoreImpl(dependenciesRootDir: File) + extends StagedResourcesStore with Logging { + + private val SECURE_RANDOM = new SecureRandom() + private val stagedResources = TrieMap.empty[String, StagedResources] + + override def addResources( + podNamespace: String, + resources: InputStream): StagedResources = { + val resourceId = UUID.randomUUID().toString + val secretBytes = new Array[Byte](1024) + SECURE_RANDOM.nextBytes(secretBytes) + val resourceSecret = resourceId + "-" + BaseEncoding.base64().encode(secretBytes) + + val namespaceDir = new File(dependenciesRootDir, podNamespace) + val resourcesDir = new File(namespaceDir, resourceId) + try { + if (!resourcesDir.exists()) { + if (!resourcesDir.mkdirs()) { + throw new SparkException("Failed to create dependencies directory for application" + + s" at ${resourcesDir.getAbsolutePath}") + } + } + // TODO encrypt the written data with the secret. + val resourcesFile = new File(resourcesDir, "resources.data") + Utils.tryWithResource(new FileOutputStream(resourcesFile)) { + ByteStreams.copy(resources, _) + } + val resourceBundle = StagedResources(resourceId, resourceSecret, resourcesFile) + stagedResources(resourceId) = resourceBundle + resourceBundle + } catch { + case e: Throwable => + if (!resourcesDir.delete()) { + logWarning(s"Failed to delete application directory $resourcesDir.") + } + stagedResources.remove(resourceId) + throw e + } + } + + override def getResources(resourceId: String): Option[StagedResources] = { + stagedResources.get(resourceId) + } + + override def removeResources(resourceId: String): Unit = { + stagedResources.remove(resourceId) + .map(_.resourcesFile.getParentFile) + .foreach { resourcesDirectory => + try { + FileUtils.deleteDirectory(resourcesDirectory) + } catch { + case e: IOException => + logWarning(s"Failed to delete resources directory" + + s" at ${resourcesDirectory.getAbsolutePath}", e) + } + } + } +} + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala deleted file mode 100644 index cc2032219f885..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/DriverPodKubernetesClientProvider.scala +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.scheduler.cluster.kubernetes - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.Files -import io.fabric8.kubernetes.client.{Config, ConfigBuilder, DefaultKubernetesClient} -import io.fabric8.kubernetes.client.utils.HttpClientUtils -import okhttp3.Dispatcher - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.util.ThreadUtils - -private[spark] class DriverPodKubernetesClientProvider( - sparkConf: SparkConf, - namespace: Option[String] = None) { - - private val SERVICE_ACCOUNT_TOKEN = new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH) - private val SERVICE_ACCOUNT_CA_CERT = new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH) - private val oauthTokenFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) - private val caCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) - private val clientKeyFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) - private val clientCertFile = sparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) - - /** - * Creates a {@link KubernetesClient}, expecting to be from within the context of a pod. When - * doing so, service account token files can be picked up from canonical locations. - */ - def get: DefaultKubernetesClient = { - val baseClientConfigBuilder = new ConfigBuilder() - .withApiVersion("v1") - .withMasterUrl(KUBERNETES_MASTER_INTERNAL_URL) - - // Build a namespaced client if specified. - val namespacedClientConfigBuilder = namespace - .map(baseClientConfigBuilder.withNamespace(_)).getOrElse(baseClientConfigBuilder) - - val configBuilder = oauthTokenFile - .orElse(caCertFile) - .orElse(clientKeyFile) - .orElse(clientCertFile) - .map { _ => - var mountedAuthConfigBuilder = baseClientConfigBuilder - oauthTokenFile.foreach { tokenFilePath => - val tokenFile = new File(tokenFilePath) - mountedAuthConfigBuilder = mountedAuthConfigBuilder - .withOauthToken(Files.toString(tokenFile, Charsets.UTF_8)) - } - caCertFile.foreach { caFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withCaCertFile(caFile) - } - clientKeyFile.foreach { keyFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientKeyFile(keyFile) - } - clientCertFile.foreach { certFile => - mountedAuthConfigBuilder = mountedAuthConfigBuilder.withClientCertFile(certFile) - } - mountedAuthConfigBuilder - }.getOrElse { - var serviceAccountConfigBuilder = baseClientConfigBuilder - if (SERVICE_ACCOUNT_CA_CERT.isFile) { - serviceAccountConfigBuilder = serviceAccountConfigBuilder.withCaCertFile( - SERVICE_ACCOUNT_CA_CERT.getAbsolutePath) - } - - if (SERVICE_ACCOUNT_TOKEN.isFile) { - serviceAccountConfigBuilder = serviceAccountConfigBuilder.withOauthToken( - Files.toString(SERVICE_ACCOUNT_TOKEN, Charsets.UTF_8)) - } - serviceAccountConfigBuilder - } - // Disable the ping thread that is not daemon, in order to allow - // the driver main thread to shut down upon errors. Otherwise, the driver - // will hang indefinitely. - val config = configBuilder - .withWebsocketPingInterval(0) - .build() - val httpClient = HttpClientUtils.createHttpClient(config).newBuilder() - // Use a Dispatcher with a custom executor service that creates daemon threads. The default - // executor service used by Dispatcher creates non-daemon threads. - .dispatcher(new Dispatcher(ThreadUtils.newDaemonCachedThreadPool("spark-on-k8s"))) - .build() - new DefaultKubernetesClient(httpClient, config) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index e2630b9918b61..6abce55cff209 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -16,9 +16,14 @@ */ package org.apache.spark.scheduler.cluster.kubernetes +import java.io.File + +import io.fabric8.kubernetes.client.Config + import org.apache.spark.SparkContext -import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, SparkKubernetesClientFactory, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.internal.Logging import org.apache.spark.scheduler.{ExternalClusterManager, SchedulerBackend, TaskScheduler, TaskSchedulerImpl} @@ -75,8 +80,15 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit logWarning("The executor's init-container config map key was not specified. Executors will" + " therefore not attempt to fetch remote or submitted dependencies.") } + val kubernetesClient = SparkKubernetesClientFactory.createKubernetesClient( + KUBERNETES_MASTER_INTERNAL_URL, + Some(sparkConf.get(KUBERNETES_NAMESPACE)), + APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX, + sparkConf, + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)), + Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH))) new KubernetesClusterSchedulerBackend( - sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc, bootStrap) + sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc, bootStrap, kubernetesClient) } override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 257cee80fdea9..1852ed021d91a 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -21,7 +21,7 @@ import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} -import io.fabric8.kubernetes.client.{KubernetesClientException, Watcher} +import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.FilenameUtils import scala.collection.JavaConverters._ @@ -43,7 +43,8 @@ import org.apache.spark.util.{ThreadUtils, Utils} private[spark] class KubernetesClusterSchedulerBackend( scheduler: TaskSchedulerImpl, val sc: SparkContext, - executorInitContainerBootstrap: Option[SparkPodInitContainerBootstrap]) + executorInitContainerBootstrap: Option[SparkPodInitContainerBootstrap], + kubernetesClient: KubernetesClient) extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { import KubernetesClusterSchedulerBackend._ @@ -102,9 +103,6 @@ private[spark] class KubernetesClusterSchedulerBackend( private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) - private val kubernetesClient = new DriverPodKubernetesClientProvider(conf, - Some(kubernetesNamespace)).get - private val driverPod = try { kubernetesClient.pods().inNamespace(kubernetesNamespace). withName(kubernetesDriverPodName).get() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index ff6c710117318..00f09c64b53b7 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -37,7 +37,6 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient -import org.apache.spark.scheduler.cluster.kubernetes.DriverPodKubernetesClientProvider class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") @@ -131,8 +130,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { @Mock private var initContainerComponentsProvider: DriverInitContainerComponentsProvider = _ @Mock - private var kubernetesClientProvider: SubmissionKubernetesClientProvider = _ - @Mock private var kubernetesClient: KubernetesClient = _ @Mock private var podOps: MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ @@ -174,7 +171,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .thenReturn(INIT_CONTAINER_SECRET) when(initContainerConfigMapBuilder.build()) .thenReturn(INIT_CONTAINER_CONFIG_MAP) - when(kubernetesClientProvider.get).thenReturn(kubernetesClient) when(kubernetesClient.pods()).thenReturn(podOps) when(podOps.create(any())).thenAnswer(new Answer[Pod] { override def answer(invocation: InvocationOnMock): Pod = { @@ -302,37 +298,13 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { SPARK_JARS, SPARK_FILES, true, - kubernetesClientProvider, + kubernetesClient, initContainerComponentsProvider, credentialsMounterProvider, loggingPodStatusWatcher).run() verify(loggingPodStatusWatcher).awaitCompletion() } - test("Run kubernetes shuffle service.") { - expectationsForNoMountedCredentials() - expectationsForNoDependencyUploader() - - val shuffleService = new KubernetesExternalShuffleService( - SPARK_CONF, - new SecurityManager(SPARK_CONF), - new DriverPodKubernetesClientProvider(SPARK_CONF)) - - val shuffleClient = new KubernetesExternalShuffleClient( - SparkTransportConf.fromSparkConf(SPARK_CONF, "shuffle"), - new SecurityManager(SPARK_CONF), - false, - false) - - shuffleService.start() - shuffleClient.init("newapp") - - // verifies that we can connect to the shuffle service and send - // it a message. - shuffleClient.registerDriverWithShuffleService("localhost", 7337) - shuffleService.stop() - } - private def expectationsForNoDependencyUploader(): Unit = { when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) @@ -409,7 +381,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { SPARK_JARS, SPARK_FILES, false, - kubernetesClientProvider, + kubernetesClient, initContainerComponentsProvider, credentialsMounterProvider, loggingPodStatusWatcher).run() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala index c1005a176408c..2e0a7ba5098b2 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala @@ -111,13 +111,17 @@ class DriverPodKubernetesCredentialsMounterSuite val baseSparkConf = new SparkConf() val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations(baseSparkConf) - assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_KEY_FILE) === + assert(resolvedSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX") === expectedClientKeyFile) - assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CLIENT_CERT_FILE) === + assert(resolvedSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX") === expectedClientCertFile) - assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_CA_CERT_FILE) === + assert(resolvedSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX") === expectedCaCertFile) - assert(resolvedSparkConf.get(KUBERNETES_DRIVER_MOUNTED_OAUTH_TOKEN) === + assert(resolvedSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX") === expectedOAuthTokenFile) } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala new file mode 100644 index 0000000000000..0de1955884c8e --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.KubernetesExternalShuffleService +import org.apache.spark.network.netty.SparkTransportConf +import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient + +private[spark] class KubernetesExternalShuffleServiceSuite extends SparkFunSuite { + + private val SPARK_CONF = new SparkConf() + .set(KUBERNETES_SHUFFLE_USE_SERVICE_ACCOUNT_CREDENTIALS, false) + + test("Run kubernetes shuffle service.") { + val shuffleService = new KubernetesExternalShuffleService( + SPARK_CONF, + new SecurityManager(SPARK_CONF)) + + val shuffleClient = new KubernetesExternalShuffleClient( + SparkTransportConf.fromSparkConf(SPARK_CONF, "shuffle"), + new SecurityManager(SPARK_CONF), + false, + false) + + shuffleService.start() + shuffleClient.init("newapp") + + // verifies that we can connect to the shuffle service and send + // it a message. + shuffleClient.registerDriverWithShuffleService("localhost", 7337) + shuffleService.stop() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala index 8693ff4e15372..c207e3c69cd3c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala @@ -22,26 +22,24 @@ import java.util.UUID import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.common.base.Charsets -import com.google.common.io.Files +import com.google.common.io.{BaseEncoding, Files} import okhttp3.RequestBody import okio.Okio -import org.mockito.Matchers.any -import org.mockito.Mockito -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer +import org.mockito.{ArgumentCaptor, Mockito} import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar._ import retrofit2.{Call, Response} import org.apache.spark.{SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.CompressionUtils -import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory} +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServiceRetrofit, RetrofitClientFactory, StagedResourcesOwner} import org.apache.spark.util.Utils private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with BeforeAndAfter { import SubmittedDependencyUploaderSuite.createTempFile private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val BASE_64 = BaseEncoding.base64() private val APP_ID = "app-id" private val LABELS = Map("label1" -> "label1value", "label2" -> "label2value") private val NAMESPACE = "namespace" @@ -61,18 +59,31 @@ private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with trustStore = Some(TRUSTSTORE_FILE), trustStorePassword = Some(TRUSTSTORE_PASSWORD), trustStoreType = Some(TRUSTSTORE_TYPE)) + private val CLIENT_KEY_FILE = createTempFile("pem") + private val CLIENT_CERT_FILE = createTempFile("pem") + private val OAUTH_TOKEN = "token" private var retrofitClientFactory: RetrofitClientFactory = _ private var retrofitClient: ResourceStagingServiceRetrofit = _ + private var resourcesOwnerCaptor: ArgumentCaptor[RequestBody] = _ + private var resourcesDataCaptor: ArgumentCaptor[RequestBody] = _ private var dependencyUploaderUnderTest: SubmittedDependencyUploader = _ before { + resourcesOwnerCaptor = ArgumentCaptor.forClass(classOf[RequestBody]) + resourcesDataCaptor = ArgumentCaptor.forClass(classOf[RequestBody]) retrofitClientFactory = mock[RetrofitClientFactory] retrofitClient = mock[ResourceStagingServiceRetrofit] Mockito.when( retrofitClientFactory.createRetrofitClient( STAGING_SERVER_URI, classOf[ResourceStagingServiceRetrofit], STAGING_SERVER_SSL_OPTIONS)) .thenReturn(retrofitClient) + val responseCall = mock[Call[SubmittedResourceIdAndSecret]] + Mockito.when(responseCall.execute()).thenReturn( + Response.success(SubmittedResourceIdAndSecret("resourceId", "resourceSecret"))) + Mockito.when(retrofitClient.uploadResources( + resourcesDataCaptor.capture(), resourcesOwnerCaptor.capture())) + .thenReturn(responseCall) dependencyUploaderUnderTest = new SubmittedDependencyUploaderImpl( APP_ID, LABELS, @@ -85,38 +96,24 @@ private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with } test("Uploading jars should contact the staging server with the appropriate parameters") { - val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( - SubmittedResourceIdAndSecret("resourceId", "resourceSecret")) - Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) - .thenAnswer(capturingArgumentsAnswer) dependencyUploaderUnderTest.uploadJars() - testUploadSendsCorrectFiles(LOCAL_JARS, capturingArgumentsAnswer) + testUploadSendsCorrectFiles(LOCAL_JARS) } test("Uploading files should contact the staging server with the appropriate parameters") { - val capturingArgumentsAnswer = new UploadDependenciesArgumentsCapturingAnswer( - SubmittedResourceIdAndSecret("resourceId", "resourceSecret")) - Mockito.when(retrofitClient.uploadResources(any(), any(), any(), any())) - .thenAnswer(capturingArgumentsAnswer) dependencyUploaderUnderTest.uploadFiles() - testUploadSendsCorrectFiles(LOCAL_FILES, capturingArgumentsAnswer) + testUploadSendsCorrectFiles(LOCAL_FILES) } - private def testUploadSendsCorrectFiles( - expectedFiles: Seq[String], - capturingArgumentsAnswer: UploadDependenciesArgumentsCapturingAnswer) = { - val requestLabelsBytes = requestBodyBytes(capturingArgumentsAnswer.podLabelsArg) - val requestLabelsString = new String(requestLabelsBytes, Charsets.UTF_8) - val requestLabelsMap = OBJECT_MAPPER.readValue( - requestLabelsString, classOf[Map[String, String]]) - assert(requestLabelsMap === LABELS) - val requestNamespaceBytes = requestBodyBytes(capturingArgumentsAnswer.podNamespaceArg) - val requestNamespaceString = new String(requestNamespaceBytes, Charsets.UTF_8) - assert(requestNamespaceString === NAMESPACE) - + private def testUploadSendsCorrectFiles(expectedFiles: Seq[String]) = { + val resourceOwnerString = new String( + requestBodyBytes(resourcesOwnerCaptor.getValue), Charsets.UTF_8) + val resourceOwner = OBJECT_MAPPER.readValue(resourceOwnerString, classOf[StagedResourcesOwner]) + assert(resourceOwner.ownerLabels === LABELS) + assert(resourceOwner.ownerNamespace === NAMESPACE) val unpackedFilesDir = Utils.createTempDir(namePrefix = "test-unpacked-files") val compressedBytesInput = new ByteArrayInputStream( - requestBodyBytes(capturingArgumentsAnswer.podResourcesArg)) + requestBodyBytes(resourcesDataCaptor.getValue())) CompressionUtils.unpackTarStreamToDirectory(compressedBytesInput, unpackedFilesDir) val writtenFiles = unpackedFilesDir.listFiles assert(writtenFiles.size === expectedFiles.size) @@ -148,25 +145,6 @@ private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with } } -private class UploadDependenciesArgumentsCapturingAnswer(returnValue: SubmittedResourceIdAndSecret) - extends Answer[Call[SubmittedResourceIdAndSecret]] { - - var podLabelsArg: RequestBody = _ - var podNamespaceArg: RequestBody = _ - var podResourcesArg: RequestBody = _ - var kubernetesCredentialsArg: RequestBody = _ - - override def answer(invocationOnMock: InvocationOnMock): Call[SubmittedResourceIdAndSecret] = { - podLabelsArg = invocationOnMock.getArgumentAt(0, classOf[RequestBody]) - podNamespaceArg = invocationOnMock.getArgumentAt(1, classOf[RequestBody]) - podResourcesArg = invocationOnMock.getArgumentAt(2, classOf[RequestBody]) - kubernetesCredentialsArg = invocationOnMock.getArgumentAt(3, classOf[RequestBody]) - val responseCall = mock[Call[SubmittedResourceIdAndSecret]] - Mockito.when(responseCall.execute()).thenReturn(Response.success(returnValue)) - responseCall - } -} - private object SubmittedDependencyUploaderSuite { def createTempFile(extension: String): String = { val dir = Utils.createTempDir() diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala index 0604e0d6494ae..0c0908da20d89 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala @@ -24,10 +24,11 @@ import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.common.io.ByteStreams import okhttp3.{RequestBody, ResponseBody} import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar.mock import retrofit2.Call import org.apache.spark.{SparkFunSuite, SSLOptions} -import org.apache.spark.deploy.kubernetes.{KubernetesCredentials, SSLUtils} +import org.apache.spark.deploy.kubernetes.SSLUtils import org.apache.spark.util.Utils /** @@ -40,12 +41,21 @@ import org.apache.spark.util.Utils * receive streamed uploads and can stream downloads. */ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { + private var serviceImpl: ResourceStagingService = _ + private var stagedResourcesCleaner: StagedResourcesCleaner = _ + private var server: ResourceStagingServer = _ private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) private val serverPort = new ServerSocket(0).getLocalPort - private val serviceImpl = new ResourceStagingServiceImpl(Utils.createTempDir()) + private val sslOptionsProvider = new SettableReferenceSslOptionsProvider() - private val server = new ResourceStagingServer(serverPort, serviceImpl, sslOptionsProvider) + + before { + stagedResourcesCleaner = mock[StagedResourcesCleaner] + serviceImpl = new ResourceStagingServiceImpl( + new StagedResourcesStoreImpl(Utils.createTempDir()), stagedResourcesCleaner) + server = new ResourceStagingServer(serverPort, serviceImpl, sslOptionsProvider) + } after { server.stop() @@ -83,20 +93,17 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { val resourcesBytes = Array[Byte](1, 2, 3, 4) val labels = Map("label1" -> "label1Value", "label2" -> "label2value") val namespace = "namespace" - val labelsJson = OBJECT_MAPPER.writer().writeValueAsString(labels) + val resourcesOwner = StagedResourcesOwner( + ownerLabels = labels, + ownerNamespace = namespace, + ownerType = StagedResourcesOwnerType.Pod) + val resourcesOwnerJson = OBJECT_MAPPER.writeValueAsString(resourcesOwner) + val resourcesOwnerRequestBody = RequestBody.create( + okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), resourcesOwnerJson) val resourcesRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) - val labelsRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), labelsJson) - val namespaceRequestBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.TEXT_PLAIN), namespace) - val kubernetesCredentials = KubernetesCredentials(Some("token"), Some("ca-cert"), None, None) - val kubernetesCredentialsString = OBJECT_MAPPER.writer() - .writeValueAsString(kubernetesCredentials) - val kubernetesCredentialsBody = RequestBody.create( - okhttp3.MediaType.parse(MediaType.APPLICATION_JSON), kubernetesCredentialsString) + okhttp3.MediaType.parse(MediaType.MULTIPART_FORM_DATA), resourcesBytes) val uploadResponse = retrofitService.uploadResources( - labelsRequestBody, namespaceRequestBody, resourcesRequestBody, kubernetesCredentialsBody) + resourcesRequestBody, resourcesOwnerRequestBody) val resourceIdentifier = getTypedResponseResult(uploadResponse) checkResponseBodyBytesMatches( retrofitService.downloadResources( diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala deleted file mode 100644 index 53396a3f27a1a..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServiceImplSuite.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.rest.kubernetes - -import java.io.{ByteArrayInputStream, File} -import java.nio.file.Paths - -import com.google.common.io.Files - -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.util.Utils - -/** - * Unit, scala-level tests for KubernetesSparkDependencyServiceImpl. The coverage here - * differs from that of KubernetesSparkDependencyServerSuite as here we invoke the - * implementation methods directly as opposed to over HTTP, as well as check the - * data written to the underlying disk. - */ -class ResourceStagingServiceImplSuite extends SparkFunSuite { - - private val dependencyRootDir = Utils.createTempDir() - private val serviceImpl = new ResourceStagingServiceImpl(dependencyRootDir) - private val resourceBytes = Array[Byte](1, 2, 3, 4) - private val kubernetesCredentials = KubernetesCredentials( - Some("token"), Some("caCert"), Some("key"), Some("cert")) - private val namespace = "namespace" - private val labels = Map("label1" -> "label1value", "label2" -> "label2value") - - test("Uploads should write data to the underlying disk") { - Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { resourceStream => - serviceImpl.uploadResources(labels, namespace, resourceStream, kubernetesCredentials) - } - val resourceNamespaceDir = Paths.get(dependencyRootDir.getAbsolutePath, "namespace").toFile - assert(resourceNamespaceDir.isDirectory, s"Resource namespace dir was not created at" + - s" ${resourceNamespaceDir.getAbsolutePath} or is not a directory.") - val resourceDirs = resourceNamespaceDir.listFiles() - assert(resourceDirs.length === 1, s"Resource root directory did not have exactly one" + - s" subdirectory. Got: ${resourceDirs.map(_.getAbsolutePath).mkString(",")}") - val resourceTgz = new File(resourceDirs(0), "resources.data") - assert(resourceTgz.isFile, - s"Resources written to ${resourceTgz.getAbsolutePath} does not exist or is not a file.") - val resourceTgzBytes = Files.toByteArray(resourceTgz) - assert(resourceTgzBytes.toSeq === resourceBytes.toSeq, "Incorrect resource bytes were written.") - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleanerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleanerSuite.scala new file mode 100644 index 0000000000000..8b398a9891f34 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesCleanerSuite.scala @@ -0,0 +1,149 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.util.concurrent.{ScheduledExecutorService, TimeUnit} + +import io.fabric8.kubernetes.api.model.{DoneableNamespace, DoneablePod, Namespace, NamespaceList, Pod, PodList, PodListBuilder} +import io.fabric8.kubernetes.client.{KubernetesClient, Watch, Watcher} +import io.fabric8.kubernetes.client.dsl.{FilterWatchListDeletable, MixedOperation, NonNamespaceOperation, PodResource, Resource} +import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations} +import org.mockito.Matchers.{eq => mockitoEq} +import org.mockito.Mockito.{never, verify, when} +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.util.Clock + +private[spark] class StagedResourcesCleanerSuite extends SparkFunSuite with BeforeAndAfter { + + private type PODS = MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] + private type PODSWITHLABELS = FilterWatchListDeletable[ + Pod, PodList, java.lang.Boolean, Watch, Watcher[Pod]] + private type PODSINNAMESPACE = NonNamespaceOperation[ + Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] + private type NAMESPACES = NonNamespaceOperation[ + Namespace, NamespaceList, DoneableNamespace, Resource[Namespace, DoneableNamespace]] + private type NAMESPACEWITHNAME = Resource[Namespace, DoneableNamespace] + + private val INITIAL_ACCESS_EXPIRATION_MS = 5000L + private val CURRENT_TIME = 10000L + private val RESOURCE_ID = "resource-id" + private val POD_NAMESPACE = "namespace" + private val POD_LABELS = Map("label1" -> "label1value", "label2" -> "label2value") + private val RESOURCES_OWNER = StagedResourcesOwner( + ownerNamespace = POD_NAMESPACE, + ownerLabels = POD_LABELS, + ownerType = StagedResourcesOwnerType.Pod) + + @Mock + private var stagedResourcesStore: StagedResourcesStore = _ + @Mock + private var kubernetesClient: KubernetesClient = _ + @Mock + private var clock: Clock = _ + @Mock + private var cleanerExecutorService: ScheduledExecutorService = _ + @Mock + private var podOperations: PODS = _ + @Mock + private var podsInNamespaceOperations: PODSINNAMESPACE = _ + @Mock + private var podsWithLabelsOperations: PODSWITHLABELS = _ + @Mock + private var namespaceOperations: NAMESPACES = _ + @Mock + private var namedNamespaceOperations: NAMESPACEWITHNAME = _ + private var cleanerUnderTest: StagedResourcesCleaner = _ + + before { + MockitoAnnotations.initMocks(this) + cleanerUnderTest = new StagedResourcesCleanerImpl( + stagedResourcesStore, + kubernetesClient, + cleanerExecutorService, + clock, + INITIAL_ACCESS_EXPIRATION_MS) + when(kubernetesClient.pods()).thenReturn(podOperations) + when(podOperations.withLabels(POD_LABELS.asJava)).thenReturn(podsWithLabelsOperations) + when(kubernetesClient.namespaces()).thenReturn(namespaceOperations) + } + + test("Clean the resource if it is never accessed for the expiration interval.") { + val cleanupRunnable = startCleanupAndGetCleanupRunnable() + cleanerUnderTest.registerResourceForCleaning(RESOURCE_ID, RESOURCES_OWNER) + when(clock.getTimeMillis()).thenReturn(CURRENT_TIME + INITIAL_ACCESS_EXPIRATION_MS) + cleanupRunnable.run() + verify(stagedResourcesStore).removeResources(RESOURCE_ID) + verify(kubernetesClient, never()).pods() + } + + test("Don't clean the resource if it is accessed in the expiration interval" + + " and there are owners available.") { + val cleanupRunnable = startCleanupAndGetCleanupRunnable() + cleanerUnderTest.registerResourceForCleaning(RESOURCE_ID, RESOURCES_OWNER) + cleanerUnderTest.markResourceAsUsed(RESOURCE_ID) + when(clock.getTimeMillis()).thenReturn(CURRENT_TIME + INITIAL_ACCESS_EXPIRATION_MS) + when(namespaceOperations.withName(POD_NAMESPACE)).thenReturn(namedNamespaceOperations) + when(namedNamespaceOperations.get()).thenReturn(new Namespace()) + when(podOperations.inNamespace(POD_NAMESPACE)).thenReturn(podsInNamespaceOperations) + when(podsInNamespaceOperations.withLabels(POD_LABELS.asJava)) + .thenReturn(podsWithLabelsOperations) + when(podsWithLabelsOperations.list()).thenReturn( + new PodListBuilder().addNewItemLike(new Pod()).endItem().build()) + cleanupRunnable.run() + verify(stagedResourcesStore, never()).removeResources(RESOURCE_ID) + } + + test("Clean the resource if no owners are available.") { + val cleanupRunnable = startCleanupAndGetCleanupRunnable() + cleanerUnderTest.registerResourceForCleaning(RESOURCE_ID, RESOURCES_OWNER) + cleanerUnderTest.markResourceAsUsed(RESOURCE_ID) + when(clock.getTimeMillis()).thenReturn(CURRENT_TIME + INITIAL_ACCESS_EXPIRATION_MS) + when(namespaceOperations.withName(POD_NAMESPACE)).thenReturn(namedNamespaceOperations) + when(namedNamespaceOperations.get()).thenReturn(new Namespace()) + when(podOperations.inNamespace(POD_NAMESPACE)).thenReturn(podsInNamespaceOperations) + when(podsInNamespaceOperations.withLabels(POD_LABELS.asJava)) + .thenReturn(podsWithLabelsOperations) + when(podsWithLabelsOperations.list()).thenReturn(new PodListBuilder().build()) + cleanupRunnable.run() + verify(stagedResourcesStore).removeResources(RESOURCE_ID) + } + + test("Clean up the resource if the namespace does not exist.") { + val cleanupRunnable = startCleanupAndGetCleanupRunnable() + cleanerUnderTest.registerResourceForCleaning(RESOURCE_ID, RESOURCES_OWNER) + cleanerUnderTest.markResourceAsUsed(RESOURCE_ID) + when(clock.getTimeMillis()).thenReturn(CURRENT_TIME + INITIAL_ACCESS_EXPIRATION_MS) + when(namespaceOperations.withName(POD_NAMESPACE)).thenReturn(namedNamespaceOperations) + when(namedNamespaceOperations.get()).thenReturn(null) + cleanupRunnable.run() + verify(stagedResourcesStore).removeResources(RESOURCE_ID) + } + + private def startCleanupAndGetCleanupRunnable(): Runnable = { + val captor = ArgumentCaptor.forClass(classOf[Runnable]) + cleanerUnderTest.start() + verify(cleanerExecutorService).scheduleAtFixedRate( + captor.capture(), + mockitoEq(30000L), + mockitoEq(30000L), + mockitoEq(TimeUnit.MILLISECONDS)) + captor.getValue + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStoreSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStoreSuite.scala new file mode 100644 index 0000000000000..6b5737ebf2e23 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/StagedResourcesStoreSuite.scala @@ -0,0 +1,86 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.rest.kubernetes + +import java.io.{ByteArrayInputStream, File} +import java.nio.file.Paths + +import com.google.common.io.Files +import org.scalatest.BeforeAndAfter + +import org.apache.spark.SparkFunSuite +import org.apache.spark.util.Utils + +private[spark] class StagedResourcesStoreSuite extends SparkFunSuite with BeforeAndAfter { + + private val resourceBytes = Array[Byte](1, 2, 3, 4) + private val namespace = "namespace" + private var dependencyRootDir: File = _ + private var stagedResourcesStore: StagedResourcesStore = _ + + before { + dependencyRootDir = Utils.createTempDir() + stagedResourcesStore = new StagedResourcesStoreImpl(dependencyRootDir) + } + + after { + dependencyRootDir.delete() + } + + test("Uploads should write data to the underlying disk") { + val resourceIdAndSecret = Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { + resourceStream => + stagedResourcesStore.addResources(namespace, resourceStream) + } + val resourceNamespaceDir = Paths.get(dependencyRootDir.getAbsolutePath, "namespace").toFile + assert(resourceNamespaceDir.isDirectory, s"Resource namespace dir was not created at" + + s" ${resourceNamespaceDir.getAbsolutePath} or is not a directory.") + val resourceDirs = resourceNamespaceDir.listFiles() + assert(resourceDirs.length === 1, s"Resource root directory did not have exactly one" + + s" subdirectory. Got: ${resourceDirs.map(_.getAbsolutePath).mkString(",")}") + assert(resourceDirs(0).getName === resourceIdAndSecret.resourceId) + val resourceTgz = new File(resourceDirs(0), "resources.data") + assert(resourceTgz.isFile, + s"Resources written to ${resourceTgz.getAbsolutePath} does not exist or is not a file.") + val resourceTgzBytes = Files.toByteArray(resourceTgz) + assert(resourceTgzBytes.toSeq === resourceBytes.toSeq, "Incorrect resource bytes were written.") + } + + test("Uploading and then getting should return a stream with the written bytes.") { + val resourceIdAndSecret = Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { + resourceStream => + stagedResourcesStore.addResources(namespace, resourceStream) + } + val resources = stagedResourcesStore.getResources(resourceIdAndSecret.resourceId) + assert(resources.map(_.resourcesFile) + .map(Files.toByteArray) + .exists(resourceBytes.sameElements(_))) + assert(resources.exists(_.resourceId == resourceIdAndSecret.resourceId)) + assert(resources.exists(_.resourceSecret == resourceIdAndSecret.resourceSecret)) + } + + test("Uploading and then deleting should result in the resource directory being deleted.") { + val resourceIdAndSecret = Utils.tryWithResource(new ByteArrayInputStream(resourceBytes)) { + resourceStream => + stagedResourcesStore.addResources(namespace, resourceStream) + } + stagedResourcesStore.removeResources(resourceIdAndSecret.resourceId) + val resourceNamespaceDir = Paths.get(dependencyRootDir.getAbsolutePath, "namespace").toFile + assert(resourceNamespaceDir.listFiles().isEmpty) + assert(stagedResourcesStore.getResources(resourceIdAndSecret.resourceId).isEmpty) + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 95775d262a69d..6a296d6112c97 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -169,11 +169,14 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { sparkConf.setJars(Seq( CONTAINER_LOCAL_MAIN_APP_RESOURCE, CONTAINER_LOCAL_HELPER_JAR_PATH)) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_KEY_FILE, + sparkConf.set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", kubernetesTestComponents.clientConfig.getClientKeyFile) - sparkConf.set(KUBERNETES_DRIVER_CLIENT_CERT_FILE, + sparkConf.set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", kubernetesTestComponents.clientConfig.getClientCertFile) - sparkConf.set(KUBERNETES_DRIVER_CA_CERT_FILE, + sparkConf.set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", kubernetesTestComponents.clientConfig.getCaCertFile) runSparkPiAndVerifyCompletion(SparkLauncher.NO_RESOURCE) } From c312567dde6cd77cb1fe86c208d4e1a442acccce Mon Sep 17 00:00:00 2001 From: Timothy Chen Date: Sun, 4 Jun 2017 07:25:28 -0700 Subject: [PATCH 126/225] Copy yaml files when making distribution (#327) --- dev/make-distribution.sh | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/dev/make-distribution.sh b/dev/make-distribution.sh index a7a171dee09a3..d90aa4aadec10 100755 --- a/dev/make-distribution.sh +++ b/dev/make-distribution.sh @@ -242,8 +242,9 @@ else fi # Copy other things -mkdir "$DISTDIR/conf" -cp "$SPARK_HOME"/conf/*.template "$DISTDIR/conf" +mkdir "$DISTDIR"/conf +cp "$SPARK_HOME"/conf/*.template "$DISTDIR"/conf +cp "$SPARK_HOME"/conf/*.yaml "$DISTDIR"/conf cp "$SPARK_HOME/README.md" "$DISTDIR" cp -r "$SPARK_HOME/bin" "$DISTDIR" cp -r "$SPARK_HOME/python" "$DISTDIR" From 4f6a4d7e0e5749c3d846dd12b61d65e08c9a6394 Mon Sep 17 00:00:00 2001 From: Timothy Chen Date: Sun, 4 Jun 2017 19:54:56 -0700 Subject: [PATCH 127/225] Allow docker image pull policy to be configurable (#328) * Allow docker image pull policy to be configurable * Add flag documentation * Update running-on-kubernetes.md --- docs/running-on-kubernetes.md | 7 +++++++ .../deploy/kubernetes/SparkPodInitContainerBootstrap.scala | 3 ++- .../scala/org/apache/spark/deploy/kubernetes/config.scala | 6 ++++++ .../org/apache/spark/deploy/kubernetes/submit/Client.scala | 3 ++- .../submit/DriverInitContainerComponentsProvider.scala | 2 ++ .../cluster/kubernetes/KubernetesClusterManager.scala | 4 ++-- .../kubernetes/KubernetesClusterSchedulerBackend.scala | 3 ++- .../kubernetes/SparkPodInitContainerBootstrapSuite.scala | 3 +++ 8 files changed, 26 insertions(+), 5 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index e9002bdfe0502..a88b0d380fac0 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -661,6 +661,13 @@ from the other deployment modes. See the [configuration page](configuration.html Interval between reports of the current Spark job status in cluster mode. + + spark.kubernetes.docker.image.pullPolicy + IfNotPresent + + Docker image pull policy used when pulling Docker images with Kubernetes. + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala index a4d0aeb23d01f..87462dbde17a5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -36,6 +36,7 @@ private[spark] trait SparkPodInitContainerBootstrap { private[spark] class SparkPodInitContainerBootstrapImpl( initContainerImage: String, + dockerImagePullPolicy: String, jarsDownloadPath: String, filesDownloadPath: String, downloadTimeoutMinutes: Long, @@ -60,7 +61,7 @@ private[spark] class SparkPodInitContainerBootstrapImpl( val initContainer = new ContainerBuilder() .withName(s"spark-init") .withImage(initContainerImage) - .withImagePullPolicy("IfNotPresent") + .withImagePullPolicy(dockerImagePullPolicy) .addNewVolumeMount() .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) .withMountPath(INIT_CONTAINER_PROPERTIES_FILE_DIR) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index dd99e0f7a5ae0..47c3c24fa88f7 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -47,6 +47,12 @@ package object config extends Logging { .stringConf .createWithDefault(s"spark-executor:$sparkVersion") + private[spark] val DOCKER_IMAGE_PULL_POLICY = + ConfigBuilder("spark.kubernetes.docker.image.pullPolicy") + .doc("Docker image pull policy when pulling any docker image in Kubernetes integration") + .stringConf + .createWithDefault("IfNotPresent") + private[spark] val APISERVER_AUTH_SUBMISSION_CONF_PREFIX = "spark.kubernetes.authenticate.submission" private[spark] val APISERVER_AUTH_DRIVER_CONF_PREFIX = diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 85dac3df57b4c..1bebaf92501f4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -60,6 +60,7 @@ private[spark] class Client( private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) .getOrElse(kubernetesAppId) private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) + private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) private val memoryOverheadMb = sparkConf .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) @@ -99,7 +100,7 @@ private[spark] class Client( val driverContainer = new ContainerBuilder() .withName(DRIVER_CONTAINER_NAME) .withImage(driverDockerImage) - .withImagePullPolicy("IfNotPresent") + .withImagePullPolicy(dockerImagePullPolicy) .addToEnv(driverExtraClasspathEnv.toSeq: _*) .addNewEnv() .withName(ENV_DRIVER_MEMORY) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index ccb349c5b2988..be9da2582cb47 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -104,6 +104,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val configMapName = s"$kubernetesAppId-init-config" private val configMapKey = s"$kubernetesAppId-init-config-key" private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) + private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) override def provideInitContainerConfigMapBuilder( @@ -196,6 +197,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( } new SparkPodInitContainerBootstrapImpl( initContainerImage, + dockerImagePullPolicy, jarsDownloadPath, filesDownloadPath, downloadTimeoutMinutes, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 6abce55cff209..2a0f6e78c2aea 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -46,7 +46,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit val maybeExecutorInitContainerSecretName = sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET) val maybeExecutorInitContainerSecretMount = - sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) + sparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) val executorInitContainerSecretVolumePlugin = for { initContainerSecretName <- maybeExecutorInitContainerSecretName initContainerSecretMountPath <- maybeExecutorInitContainerSecretMount @@ -65,6 +65,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit } yield { new SparkPodInitContainerBootstrapImpl( sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE), + sparkConf.get(DOCKER_IMAGE_PULL_POLICY), sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION), sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION), sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT), @@ -95,4 +96,3 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit scheduler.asInstanceOf[TaskSchedulerImpl].initialize(backend) } } - diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 1852ed021d91a..c3a6fe28a6255 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -77,6 +77,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) + private val dockerImagePullPolicy = conf.get(DOCKER_IMAGE_PULL_POLICY) private val kubernetesNamespace = conf.get(KUBERNETES_NAMESPACE) private val executorPort = conf.getInt("spark.executor.port", DEFAULT_STATIC_PORT) private val blockmanagerPort = conf @@ -354,7 +355,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .addNewContainer() .withName(s"executor") .withImage(executorDockerImage) - .withImagePullPolicy("IfNotPresent") + .withImagePullPolicy(dockerImagePullPolicy) .withNewResources() .addToRequests("memory", executorMemoryQuantity) .addToLimits("memory", executorMemoryLimitQuantity) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala index 3feba80f800c7..90d7b10df211c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -27,6 +27,7 @@ import org.apache.spark.deploy.kubernetes.constants._ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAfter { private val OBJECT_MAPPER = new ObjectMapper() private val INIT_CONTAINER_IMAGE = "spark-init:latest" + private val DOCKER_IMAGE_PULL_POLICY = "IfNotPresent" private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" private val DOWNLOAD_TIMEOUT_MINUTES = 5 @@ -137,6 +138,7 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf private def bootstrapPodWithoutSubmittedDependencies(): Pod = { val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( INIT_CONTAINER_IMAGE, + DOCKER_IMAGE_PULL_POLICY, JARS_DOWNLOAD_PATH, FILES_DOWNLOAD_PATH, DOWNLOAD_TIMEOUT_MINUTES, @@ -150,6 +152,7 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf private def bootstrapPodWithSubmittedDependencies(): Pod = { val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( INIT_CONTAINER_IMAGE, + DOCKER_IMAGE_PULL_POLICY, JARS_DOWNLOAD_PATH, FILES_DOWNLOAD_PATH, DOWNLOAD_TIMEOUT_MINUTES, From f208d6812e1a00ebe3e7f95866bdb4e814e0a369 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 5 Jun 2017 10:08:57 -0700 Subject: [PATCH 128/225] POM update 0.2.0 (#329) --- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs-helpers/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index aa429f73a5627..a227342f46771 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index c66b87ac0952d..51ca26c0134fa 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index 581bf9453f2f2..206059bd8e5b1 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 9639811479ff5..555398aa3e6d9 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 02904c0e5fe21..bbf4b02cdaaf9 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.1.0-SNAPSHOT + 2.1.0-k8s-0.2.0-SNAPSHOT ../../../pom.xml From 9cdccbe7dfa9b37a486b98994a0d2cd43030e335 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 5 Jun 2017 18:28:16 -0700 Subject: [PATCH 129/225] Update tags (#332) * Update tags * update tags in conf directory --- conf/kubernetes-resource-staging-server.yaml | 2 +- conf/kubernetes-shuffle-service.yaml | 2 +- docs/running-on-kubernetes.md | 30 ++++++++++---------- 3 files changed, 17 insertions(+), 17 deletions(-) diff --git a/conf/kubernetes-resource-staging-server.yaml b/conf/kubernetes-resource-staging-server.yaml index 11f5d3a13b9e3..025b9b125d9e0 100644 --- a/conf/kubernetes-resource-staging-server.yaml +++ b/conf/kubernetes-resource-staging-server.yaml @@ -32,7 +32,7 @@ spec: name: spark-resource-staging-server-config containers: - name: spark-resource-staging-server - image: kubespark/spark-resource-staging-server:v2.1.0-kubernetes-0.1.0-alpha.3 + image: kubespark/spark-resource-staging-server:v2.1.0-kubernetes-0.2.0 resources: requests: cpu: 100m diff --git a/conf/kubernetes-shuffle-service.yaml b/conf/kubernetes-shuffle-service.yaml index c0cc310cf4755..55c170b01a4f5 100644 --- a/conf/kubernetes-shuffle-service.yaml +++ b/conf/kubernetes-shuffle-service.yaml @@ -38,7 +38,7 @@ spec: # This is an official image that is built # from the dockerfiles/shuffle directory # in the spark distribution. - image: spark-shuffle:latest + image: kubespark/spark-shuffle:v2.1.0-kubernetes-0.2.0 imagePullPolicy: IfNotPresent volumeMounts: - mountPath: '/tmp' diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index a88b0d380fac0..36b45526dfb44 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -36,15 +36,15 @@ If you wish to use pre-built docker images, you may use the images published in ComponentImage Spark Driver Image - kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 + kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 Spark Executor Image - kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 + kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 Spark Initialization Image - kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 + kubespark/spark-init:v2.1.0-kubernetes-0.2.0 @@ -76,9 +76,9 @@ are set up as described above: --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.2.0 \ local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar The Spark master, specified either via passing the `--master` command line argument to `spark-submit` or by setting @@ -125,9 +125,9 @@ and then you can compute the value of Pi as follows: --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.2.0 \ --conf spark.kubernetes.resourceStagingServer.uri=http://:31000 \ examples/jars/spark_examples_2.11-2.2.0.jar @@ -168,9 +168,9 @@ If our local proxy were listening on port 8001, we would have our submission loo --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.2.0 \ local:///opt/spark/examples/jars/spark_examples_2.11-2.2.0.jar Communication between Spark and Kubernetes clusters is performed using the fabric8 kubernetes-client library. @@ -284,9 +284,9 @@ communicate with the resource staging server over TLS. The trustStore can be set --kubernetes-namespace default \ --conf spark.executor.instances=5 \ --conf spark.app.name=spark-pi \ - --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.1.0-alpha.2 \ - --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.1.0-alpha.2 \ + --conf spark.kubernetes.driver.docker.image=kubespark/spark-driver:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.executor.docker.image=kubespark/spark-executor:v2.1.0-kubernetes-0.2.0 \ + --conf spark.kubernetes.initcontainer.docker.image=kubespark/spark-init:v2.1.0-kubernetes-0.2.0 \ --conf spark.kubernetes.resourceStagingServer.uri=https://:31000 \ --conf spark.ssl.kubernetes.resourceStagingServer.enabled=true \ --conf spark.ssl.kubernetes.resourceStagingServer.clientCertPem=/home/myuser/cert.pem \ From 5a41e1ef31ca15d25e114969201ab444b40ddbb4 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 5 Jun 2017 22:26:24 -0700 Subject: [PATCH 130/225] nicer readme (#333) --- README.md | 2 +- docs/running-on-kubernetes.md | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/README.md b/README.md index e87da21fe7a8c..352820a084c6f 100644 --- a/README.md +++ b/README.md @@ -8,7 +8,7 @@ This is a collaboratively maintained project working on [SPARK-18278](https://is ## Getting Started -- [Usage guide](docs/running-on-kubernetes.md) shows how to run the code +- [Usage guide](https://apache-spark-on-k8s.github.io/userdocs/) shows how to run the code - [Development docs](resource-managers/kubernetes/README.md) shows how to get set up for development - Code is primarily located in the [resource-managers/kubernetes](resource-managers/kubernetes) folder diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 36b45526dfb44..dc3cf738832ad 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -149,8 +149,6 @@ environment variable in your Dockerfiles. ### Accessing Kubernetes Clusters -For details about running on public cloud environments, such as Google Container Engine (GKE), refer to [running Spark in the cloud with Kubernetes](running-on-kubernetes-cloud.md). - Spark-submit also supports submission through the [local kubectl proxy](https://kubernetes.io/docs/user-guide/accessing-the-cluster/#using-kubectl-proxy). One can use the authenticating proxy to communicate with the api server directly without passing credentials to spark-submit. From 069bd049561f4c7e353e85ba97c2e1e84326f234 Mon Sep 17 00:00:00 2001 From: dyhfighter <1294057873@qq.com> Date: Fri, 9 Jun 2017 00:41:15 +0800 Subject: [PATCH 131/225] Support specify CPU cores and Memory restricts for driver (#340) Signed-off-by: duyanghao <1294057873@qq.com> --- .../deploy/kubernetes/submit/Client.scala | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 1bebaf92501f4..0544bf064844f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -19,7 +19,7 @@ package org.apache.spark.deploy.kubernetes.submit import java.io.File import java.util.Collections -import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.KubernetesClient import scala.collection.JavaConverters._ @@ -61,6 +61,11 @@ private[spark] class Client( .getOrElse(kubernetesAppId) private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) + + // CPU settings + private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") + + // Memory settings private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) private val memoryOverheadMb = sparkConf .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) @@ -97,6 +102,15 @@ private[spark] class Client( .withValue(classPath) .build() } + val driverCpuQuantity = new QuantityBuilder(false) + .withAmount(driverCpuCores) + .build() + val driverMemoryQuantity = new QuantityBuilder(false) + .withAmount(s"${driverMemoryMb}M") + .build() + val driverMemoryLimitQuantity = new QuantityBuilder(false) + .withAmount(s"${driverContainerMemoryWithOverhead}M") + .build() val driverContainer = new ContainerBuilder() .withName(DRIVER_CONTAINER_NAME) .withImage(driverDockerImage) @@ -114,6 +128,12 @@ private[spark] class Client( .withName(ENV_DRIVER_ARGS) .withValue(appArgs.mkString(" ")) .endEnv() + .withNewResources() + .addToRequests("cpu", driverCpuQuantity) + .addToLimits("cpu", driverCpuQuantity) + .addToRequests("memory", driverMemoryQuantity) + .addToLimits("memory", driverMemoryLimitQuantity) + .endResources() .build() val basePod = new PodBuilder() .withNewMetadata() From 4a01baf75cf77825ec40527cb8f1d21f8980d4a5 Mon Sep 17 00:00:00 2001 From: mccheah Date: Thu, 8 Jun 2017 12:40:46 -0700 Subject: [PATCH 132/225] Generate the application ID label irrespective of app name. (#331) * Generate the application ID label irrespective of app name. * Add an integration test. * Fix scalastyle --- .../KubernetesExternalShuffleService.scala | 2 +- .../spark/deploy/kubernetes/config.scala | 7 ++ .../spark/deploy/kubernetes/constants.scala | 6 +- .../deploy/kubernetes/submit/Client.scala | 76 ++++++------ ...riverInitContainerComponentsProvider.scala | 111 +++++++++--------- .../SubmittedDependencyUploaderImpl.scala | 1 - .../KubernetesClusterSchedulerBackend.scala | 15 ++- .../kubernetes/submit/ClientV2Suite.scala | 18 +-- .../SubmittedDependencyUploaderSuite.scala | 15 ++- .../integrationtest/KubernetesSuite.scala | 7 ++ 10 files changed, 144 insertions(+), 114 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala index 01a8a9a6899fd..c61f4f1d44acf 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/KubernetesExternalShuffleService.scala @@ -91,7 +91,7 @@ private[spark] class KubernetesShuffleBlockHandler ( try { Some(kubernetesClient .pods() - .withLabels(Map(SPARK_ROLE_LABEL -> "driver").asJava) + .withLabels(Map(SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE).asJava) .watch(new Watcher[Pod] { override def eventReceived(action: Watcher.Action, p: Pod): Unit = { action match { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 47c3c24fa88f7..d1fd88fc880d1 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -151,6 +151,13 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_EXECUTOR_POD_NAME_PREFIX = + ConfigBuilder("spark.kubernetes.executor.podNamePrefix") + .doc("Prefix to use in front of the executor pod names.") + .internal() + .stringConf + .createWithDefault("spark") + private[spark] val KUBERNETES_SHUFFLE_NAMESPACE = ConfigBuilder("spark.kubernetes.shuffle.namespace") .doc("Namespace of the shuffle service") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index e267c9ff7e1d1..9c46d7494b187 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -19,10 +19,12 @@ package org.apache.spark.deploy.kubernetes package object constants { // Labels private[spark] val SPARK_DRIVER_LABEL = "spark-driver" - private[spark] val SPARK_APP_ID_LABEL = "spark-app-id" - private[spark] val SPARK_APP_NAME_LABEL = "spark-app-name" + private[spark] val SPARK_APP_ID_LABEL = "spark-app-selector" private[spark] val SPARK_EXECUTOR_ID_LABEL = "spark-exec-id" private[spark] val SPARK_ROLE_LABEL = "spark-role" + private[spark] val SPARK_POD_DRIVER_ROLE = "driver" + private[spark] val SPARK_POD_EXECUTOR_ROLE = "executor" + private[spark] val SPARK_APP_NAME_ANNOTATION = "spark-app-name" // Credentials secrets private[spark] val DRIVER_CREDENTIALS_SECRETS_BASE_DIR = diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 0544bf064844f..c2e616eadc1e0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -17,13 +17,13 @@ package org.apache.spark.deploy.kubernetes.submit import java.io.File -import java.util.Collections +import java.util.{Collections, UUID} import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.KubernetesClient import scala.collection.JavaConverters._ -import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkKubernetesClientFactory} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ @@ -43,22 +43,21 @@ import org.apache.spark.util.Utils * where different steps of submission should be factored out into separate classes. */ private[spark] class Client( - appName: String, - kubernetesAppId: String, - mainClass: String, - sparkConf: SparkConf, - appArgs: Array[String], - sparkJars: Seq[String], - sparkFiles: Seq[String], - waitForAppCompletion: Boolean, - kubernetesClient: KubernetesClient, - initContainerComponentsProvider: DriverInitContainerComponentsProvider, - kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, - loggingPodStatusWatcher: LoggingPodStatusWatcher) - extends Logging { - + appName: String, + kubernetesResourceNamePrefix: String, + kubernetesAppId: String, + mainClass: String, + sparkConf: SparkConf, + appArgs: Array[String], + sparkJars: Seq[String], + sparkFiles: Seq[String], + waitForAppCompletion: Boolean, + kubernetesClient: KubernetesClient, + initContainerComponentsProvider: DriverInitContainerComponentsProvider, + kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, + loggingPodStatusWatcher: LoggingPodStatusWatcher) extends Logging { private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) - .getOrElse(kubernetesAppId) + .getOrElse(s"$kubernetesResourceNamePrefix-driver") private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) @@ -86,15 +85,16 @@ private[spark] class Client( val parsedCustomLabels = ConfigurationUtils.parseKeyValuePairs( customLabels, KUBERNETES_DRIVER_LABELS.key, "labels") require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + - s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") - require(!parsedCustomLabels.contains(SPARK_APP_NAME_LABEL), s"Label with key" + - s" $SPARK_APP_NAME_LABEL is not allowed as it is reserved for Spark bookkeeping operations.") + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + + s" operations.") + val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( + customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") + require(!parsedCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), s"Annotation with key" + + s" $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for Spark bookkeeping" + + s" operations.") val allLabels = parsedCustomLabels ++ Map( SPARK_APP_ID_LABEL -> kubernetesAppId, - SPARK_APP_NAME_LABEL -> appName, - SPARK_ROLE_LABEL -> "driver") - val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( - customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") + SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => new EnvVarBuilder() @@ -140,6 +140,7 @@ private[spark] class Client( .withName(kubernetesDriverPodName) .addToLabels(allLabels.asJava) .addToAnnotations(parsedCustomAnnotations.asJava) + .addToAnnotations(SPARK_APP_NAME_ANNOTATION, appName) .endMetadata() .withNewSpec() .withRestartPolicy("Never") @@ -186,6 +187,7 @@ private[spark] class Client( } resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) resolvedSparkConf.set("spark.app.id", kubernetesAppId) + resolvedSparkConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, kubernetesResourceNamePrefix) // We don't need this anymore since we just set the JVM options on the environment resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) val resolvedLocalClasspath = containerLocalizedFilesResolver @@ -234,11 +236,11 @@ private[spark] class Client( throw e } if (waitForAppCompletion) { - logInfo(s"Waiting for application $kubernetesAppId to finish...") + logInfo(s"Waiting for application $appName to finish...") loggingPodStatusWatcher.awaitCompletion() - logInfo(s"Application $kubernetesAppId finished.") + logInfo(s"Application $appName finished.") } else { - logInfo(s"Deployed Spark application $kubernetesAppId into Kubernetes.") + logInfo(s"Deployed Spark application $appName into Kubernetes.") } } } @@ -279,15 +281,21 @@ private[spark] object Client { val sparkFiles = sparkConf.getOption("spark.files") .map(_.split(",")) .getOrElse(Array.empty[String]) - val appName = sparkConf.getOption("spark.app.name") - .getOrElse("spark") - val kubernetesAppId = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val appName = sparkConf.getOption("spark.app.name").getOrElse("spark") + // The resource name prefix is derived from the application name, making it easy to connect the + // names of the Kubernetes resources from e.g. Kubectl or the Kubernetes dashboard to the + // application the user submitted. However, we can't use the application name in the label, as + // label values are considerably restrictive, e.g. must be no longer than 63 characters in + // length. So we generate a separate identifier for the app ID itself, and bookkeeping that + // requires finding "all pods for this application" should use the kubernetesAppId. + val kubernetesResourceNamePrefix = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}" val namespace = sparkConf.get(KUBERNETES_NAMESPACE) val master = resolveK8sMaster(sparkConf.get("spark.master")) val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( sparkConf, - kubernetesAppId, + kubernetesResourceNamePrefix, namespace, sparkJars, sparkFiles, @@ -300,14 +308,16 @@ private[spark] object Client { None, None)) { kubernetesClient => val kubernetesCredentialsMounterProvider = - new DriverPodKubernetesCredentialsMounterProviderImpl(sparkConf, kubernetesAppId) + new DriverPodKubernetesCredentialsMounterProviderImpl( + sparkConf, kubernetesResourceNamePrefix) val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)) .filter( _ => waitForAppCompletion) val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl( - kubernetesAppId, loggingInterval) + kubernetesResourceNamePrefix, loggingInterval) new Client( appName, + kubernetesResourceNamePrefix, kubernetesAppId, mainClass, sparkConf, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index be9da2582cb47..cfc61e193dcff 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -16,8 +16,6 @@ */ package org.apache.spark.deploy.kubernetes.submit -import java.io.File - import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ @@ -46,12 +44,12 @@ private[spark] trait DriverInitContainerComponentsProvider { } private[spark] class DriverInitContainerComponentsProviderImpl( - sparkConf: SparkConf, - kubernetesAppId: String, - namespace: String, - sparkJars: Seq[String], - sparkFiles: Seq[String], - resourceStagingServerExternalSslOptions: SSLOptions) + sparkConf: SparkConf, + kubernetesResourceNamePrefix: String, + namespace: String, + sparkJars: Seq[String], + sparkFiles: Seq[String], + resourceStagingServerExternalSslOptions: SSLOptions) extends DriverInitContainerComponentsProvider { private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) @@ -99,10 +97,10 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) private val maybeSecretName = maybeResourceStagingServerUri.map { _ => - s"$kubernetesAppId-init-secret" + s"$kubernetesResourceNamePrefix-init-secret" } - private val configMapName = s"$kubernetesAppId-init-config" - private val configMapKey = s"$kubernetesAppId-init-config-key" + private val configMapName = s"$kubernetesResourceNamePrefix-init-config" + private val configMapKey = s"$kubernetesResourceNamePrefix-init-config-key" private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) @@ -116,29 +114,29 @@ private[spark] class DriverInitContainerComponentsProviderImpl( filesResourceId <- maybeSubmittedResourceIds.map(_.filesResourceId) } yield { new SubmittedDependencyInitContainerConfigPluginImpl( - // Configure the init-container with the internal URI over the external URI. - maybeResourceStagingServerInternalUri.getOrElse(stagingServerUri), - jarsResourceId, - filesResourceId, - INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, - INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, - resourceStagingServerInternalSslEnabled, - maybeResourceStagingServerInternalTrustStore, - maybeResourceStagingServerInternalClientCert, - maybeResourceStagingServerInternalTrustStorePassword, - maybeResourceStagingServerInternalTrustStoreType, - INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) + // Configure the init-container with the internal URI over the external URI. + maybeResourceStagingServerInternalUri.getOrElse(stagingServerUri), + jarsResourceId, + filesResourceId, + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + resourceStagingServerInternalSslEnabled, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStorePassword, + maybeResourceStagingServerInternalTrustStoreType, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) } new SparkInitContainerConfigMapBuilderImpl( - sparkJars, - sparkFiles, - jarsDownloadPath, - filesDownloadPath, - configMapName, - configMapKey, - submittedDependencyConfigPlugin) + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath, + configMapName, + configMapKey, + submittedDependencyConfigPlugin) } override def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver = { @@ -158,14 +156,13 @@ private[spark] class DriverInitContainerComponentsProviderImpl( driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] = { maybeResourceStagingServerUri.map { stagingServerUri => new SubmittedDependencyUploaderImpl( - kubernetesAppId, - driverPodLabels, - namespace, - stagingServerUri, - sparkJars, - sparkFiles, - resourceStagingServerExternalSslOptions, - RetrofitClientFactoryImpl) + driverPodLabels, + namespace, + stagingServerUri, + sparkJars, + sparkFiles, + resourceStagingServerExternalSslOptions, + RetrofitClientFactoryImpl) } } @@ -178,15 +175,15 @@ private[spark] class DriverInitContainerComponentsProviderImpl( filesResourceSecret <- maybeSubmittedResourceSecrets.map(_.filesResourceSecret) } yield { new SubmittedDependencySecretBuilderImpl( - secretName, - jarsResourceSecret, - filesResourceSecret, - INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, - INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, - maybeResourceStagingServerInternalTrustStore, - maybeResourceStagingServerInternalClientCert) + secretName, + jarsResourceSecret, + filesResourceSecret, + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert) } } @@ -196,13 +193,13 @@ private[spark] class DriverInitContainerComponentsProviderImpl( secret, INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) } new SparkPodInitContainerBootstrapImpl( - initContainerImage, - dockerImagePullPolicy, - jarsDownloadPath, - filesDownloadPath, - downloadTimeoutMinutes, - configMapName, - configMapKey, - resourceStagingServerSecretPlugin) + initContainerImage, + dockerImagePullPolicy, + jarsDownloadPath, + filesDownloadPath, + downloadTimeoutMinutes, + configMapName, + configMapKey, + resourceStagingServerSecretPlugin) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala index a891cf3904d2d..83d7a28f5ca10 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderImpl.scala @@ -50,7 +50,6 @@ private[spark] trait SubmittedDependencyUploader { * Resource Staging Service. */ private[spark] class SubmittedDependencyUploaderImpl( - kubernetesAppId: String, podLabels: Map[String, String], podNamespace: String, stagingServerUri: String, diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index c3a6fe28a6255..6ab6480d848a2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -65,7 +65,8 @@ private[spark] class KubernetesClusterSchedulerBackend( "executor labels") require( !executorLabels.contains(SPARK_APP_ID_LABEL), - s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is reserved for Spark.") + s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is" + + s" reserved for Spark.") require( !executorLabels.contains(SPARK_EXECUTOR_ID_LABEL), s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it is reserved for" + @@ -87,6 +88,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .get(KUBERNETES_DRIVER_POD_NAME) .getOrElse( throw new SparkException("Must specify the driver pod name")) + private val executorPodNamePrefix = conf.get(KUBERNETES_EXECUTOR_POD_NAME_PREFIX) private val executorMemoryMb = conf.get(org.apache.spark.internal.config.EXECUTOR_MEMORY) private val executorMemoryString = conf.get( @@ -225,8 +227,11 @@ private[spark] class KubernetesClusterSchedulerBackend( override def start(): Unit = { super.start() - executorWatchResource.set(kubernetesClient.pods().withLabel(SPARK_APP_ID_LABEL, applicationId()) - .watch(new ExecutorPodsWatcher())) + executorWatchResource.set( + kubernetesClient + .pods() + .withLabel(SPARK_APP_ID_LABEL, applicationId()) + .watch(new ExecutorPodsWatcher())) allocator.scheduleWithFixedDelay( allocatorRunnable, 0, podAllocationInterval, TimeUnit.SECONDS) @@ -280,7 +285,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private def allocateNewExecutorPod(): (String, Pod) = { val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString - val name = s"${applicationId()}-exec-$executorId" + val name = s"$executorPodNamePrefix-exec-$executorId" // hostname must be no longer than 63 characters, so take the last 63 characters of the pod // name as the hostname. This preserves uniqueness since the end of name contains @@ -289,7 +294,7 @@ private[spark] class KubernetesClusterSchedulerBackend( val resolvedExecutorLabels = Map( SPARK_EXECUTOR_ID_LABEL -> executorId, SPARK_APP_ID_LABEL -> applicationId(), - SPARK_ROLE_LABEL -> "executor") ++ + SPARK_ROLE_LABEL -> SPARK_POD_EXECUTOR_ROLE) ++ executorLabels val executorMemoryQuantity = new QuantityBuilder(false) .withAmount(s"${executorMemoryMb}M") diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 00f09c64b53b7..193f36a7423b2 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -45,14 +45,14 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val BOOTSTRAPPED_POD_ANNOTATION = "bootstrapped" private val TRUE = "true" private val APP_NAME = "spark-test" - private val APP_ID = "spark-app-id" + private val APP_RESOURCE_PREFIX = "spark-prefix" + private val APP_ID = "spark-id" private val CUSTOM_LABEL_KEY = "customLabel" private val CUSTOM_LABEL_VALUE = "customLabelValue" private val ALL_EXPECTED_LABELS = Map( CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, SPARK_APP_ID_LABEL -> APP_ID, - SPARK_APP_NAME_LABEL -> APP_NAME, - SPARK_ROLE_LABEL -> "driver") + SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) private val CUSTOM_ANNOTATION_KEY = "customAnnotation" private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" @@ -183,7 +183,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .build() } }) - when(podOps.withName(APP_ID)).thenReturn(namedPodResource) + when(podOps.withName(s"$APP_RESOURCE_PREFIX-driver")).thenReturn(namedPodResource) when(namedPodResource.watch(loggingPodStatusWatcher)).thenReturn(watch) when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) @@ -291,6 +291,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { expectationsForNoDependencyUploader() new Client( APP_NAME, + APP_RESOURCE_PREFIX, APP_ID, MAIN_CLASS, SPARK_CONF, @@ -334,7 +335,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { owners.head.getController && owners.head.getKind == DRIVER_POD_KIND && owners.head.getUid == DRIVER_POD_UID && - owners.head.getName == APP_ID && + owners.head.getName == s"$APP_RESOURCE_PREFIX-driver" && owners.head.getApiVersion == DRIVER_POD_API_VERSION }) } @@ -354,14 +355,15 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .toMap ++ Map( "spark.app.id" -> APP_ID, - KUBERNETES_DRIVER_POD_NAME.key -> APP_ID, + KUBERNETES_DRIVER_POD_NAME.key -> s"$APP_RESOURCE_PREFIX-driver", + KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> APP_RESOURCE_PREFIX, EXECUTOR_INIT_CONF_KEY -> TRUE, CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) runAndVerifyPodMatchesPredicate { p => Option(p) - .filter(_.getMetadata.getName == APP_ID) + .filter(_.getMetadata.getName == s"$APP_RESOURCE_PREFIX-driver") .filter(podHasCorrectAnnotations) .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) .filter(containerHasCorrectBasicContainerConfiguration) @@ -374,6 +376,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private def runAndVerifyPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { new Client( APP_NAME, + APP_RESOURCE_PREFIX, APP_ID, MAIN_CLASS, SPARK_CONF, @@ -442,6 +445,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private def podHasCorrectAnnotations(pod: Pod): Boolean = { val expectedAnnotations = Map( CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, + SPARK_APP_NAME_ANNOTATION -> APP_NAME, BOOTSTRAPPED_POD_ANNOTATION -> TRUE) pod.getMetadata.getAnnotations.asScala == expectedAnnotations } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala index c207e3c69cd3c..96fa92c254297 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyUploaderSuite.scala @@ -85,14 +85,13 @@ private[spark] class SubmittedDependencyUploaderSuite extends SparkFunSuite with resourcesDataCaptor.capture(), resourcesOwnerCaptor.capture())) .thenReturn(responseCall) dependencyUploaderUnderTest = new SubmittedDependencyUploaderImpl( - APP_ID, - LABELS, - NAMESPACE, - STAGING_SERVER_URI, - JARS, - FILES, - STAGING_SERVER_SSL_OPTIONS, - retrofitClientFactory) + LABELS, + NAMESPACE, + STAGING_SERVER_URI, + JARS, + FILES, + STAGING_SERVER_SSL_OPTIONS, + retrofitClientFactory) } test("Uploading jars should contact the staging server with the appropriate parameters") { diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index 6a296d6112c97..e377f285eb9a6 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -195,6 +195,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { Array(testExistenceFile.getName, TEST_EXISTENCE_FILE_CONTENTS)) } + test("Use a very long application name.") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)).setAppName("long" * 40) + runSparkPiAndVerifyCompletion(CONTAINER_LOCAL_MAIN_APP_RESOURCE) + } + private def launchStagingServer( resourceStagingServerSslOptions: SSLOptions, keyAndCertPem: Option[KeyAndCertPem]): Unit = { assume(testBackend.name == MINIKUBE_TEST_BACKEND) From e763252854c8c2321d85c11fdd86057e46a6dbd8 Mon Sep 17 00:00:00 2001 From: Johannes Scheuermann Date: Fri, 9 Jun 2017 00:33:40 +0200 Subject: [PATCH 133/225] Create base-image and minimize layer count (#324) * Create base-image and minimize layer count * Create running-on-kubernetes.md --- docs/running-on-kubernetes.md | 6 +++- .../src/main/docker/driver/Dockerfile | 17 ++------- .../src/main/docker/executor/Dockerfile | 17 ++------- .../src/main/docker/init-container/Dockerfile | 16 +-------- .../docker/resource-staging-server/Dockerfile | 16 +-------- .../main/docker/shuffle-service/Dockerfile | 17 ++------- .../src/main/docker/spark-base/Dockerfile | 35 +++++++++++++++++++ .../docker/SparkDockerImageBuilder.scala | 2 ++ 8 files changed, 50 insertions(+), 76 deletions(-) create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index dc3cf738832ad..c10630fc5c5c6 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -49,7 +49,7 @@ If you wish to use pre-built docker images, you may use the images published in You may also build these docker images from sources, or customize them as required. Spark distributions include the -Docker files for the driver, executor, and init-container at `dockerfiles/driver/Dockerfile`, +Docker files for the base-image, driver, executor, and init-container at `dockerfiles/spark-base/Dockerfile`, `dockerfiles/driver/Dockerfile`, `dockerfiles/executor/Dockerfile`, and `dockerfiles/init-container/Dockerfile` respectively. Use these Docker files to build the Docker images, and then tag them with the registry that the images should be sent to. Finally, push the images to the registry. @@ -57,12 +57,16 @@ to the registry. For example, if the registry host is `registry-host` and the registry is listening on port 5000: cd $SPARK_HOME + docker build -t registry-host:5000/spark-base:latest -f dockerfiles/driver/spark-base . docker build -t registry-host:5000/spark-driver:latest -f dockerfiles/driver/Dockerfile . docker build -t registry-host:5000/spark-executor:latest -f dockerfiles/executor/Dockerfile . docker build -t registry-host:5000/spark-init:latest -f dockerfiles/init-container/Dockerfile . + docker push registry-host:5000/spark-base:latest docker push registry-host:5000/spark-driver:latest docker push registry-host:5000/spark-executor:latest docker push registry-host:5000/spark-init:latest + +Note that `spark-base` is the base image for the other images. It must be built first before the other images, and then afterwards the other images can be built in any order. ## Submitting Applications to Kubernetes diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index fa651ff43aaa0..6bbff8ef64a0f 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -15,26 +15,13 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-driver:latest -f dockerfiles/driver/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark +COPY examples /opt/spark/examples CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index fbad43b6255b9..9c9efb23d7e95 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -15,26 +15,13 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark +COPY examples /opt/spark/examples # TODO support spark.executor.extraClassPath CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index 40557a7465a8a..6bff06da12840 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -15,24 +15,10 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark - ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index c8b13c44207bc..c9a92fa1c5b62 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -15,24 +15,10 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark - ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index 1f64376b89aae..7f4e2aa51b67d 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -15,25 +15,12 @@ # limitations under the License. # -FROM openjdk:8-alpine +FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-shuffle:latest -f dockerfiles/shuffle/Dockerfile . -RUN apk upgrade --update -RUN apk add --update bash tini -RUN mkdir -p /opt/spark -RUN touch /opt/spark/RELEASE - -ADD jars /opt/spark/jars -ADD examples /opt/spark/examples -ADD bin /opt/spark/bin -ADD sbin /opt/spark/sbin -ADD conf /opt/spark/conf - -ENV SPARK_HOME /opt/spark - -WORKDIR /opt/spark +COPY examples /opt/spark/examples ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.kubernetes.KubernetesExternalShuffleService", "1" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile new file mode 100644 index 0000000000000..b0925e3bb0416 --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile @@ -0,0 +1,35 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM openjdk:8-alpine + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-base:latest -f dockerfiles/spark-base/Dockerfile . + +RUN apk upgrade --no-cache && \ + apk add --no-cache bash tini && \ + mkdir -p /opt/spark && \ + touch /opt/spark/RELEASE + +COPY jars /opt/spark/jars +COPY bin /opt/spark/bin +COPY sbin /opt/spark/sbin +COPY conf /opt/spark/conf + +ENV SPARK_HOME /opt/spark + +WORKDIR /opt/spark diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 3ff72829f88a7..4db19478f44bc 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -28,6 +28,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, private val DOCKER_BUILD_PATH = Paths.get("target", "docker") // Dockerfile paths must be relative to the build path. + private val BASE_DOCKER_FILE = "dockerfiles/spark-base/Dockerfile" private val DRIVER_DOCKER_FILE = "dockerfiles/driver/Dockerfile" private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" @@ -60,6 +61,7 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, def buildSparkDockerImages(): Unit = { Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } + buildImage("spark-base", BASE_DOCKER_FILE) buildImage("spark-driver", DRIVER_DOCKER_FILE) buildImage("spark-executor", EXECUTOR_DOCKER_FILE) buildImage("spark-shuffle", SHUFFLE_SERVICE_DOCKER_FILE) From 9f2ce8e9539a9a718c66d24874be8a90b2ba538c Mon Sep 17 00:00:00 2001 From: Shuai Lin Date: Thu, 8 Jun 2017 19:07:14 -0500 Subject: [PATCH 134/225] Added log4j config for k8s unit tests. (#314) --- .../core/src/test/resources/log4j.properties | 31 +++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 resource-managers/kubernetes/core/src/test/resources/log4j.properties diff --git a/resource-managers/kubernetes/core/src/test/resources/log4j.properties b/resource-managers/kubernetes/core/src/test/resources/log4j.properties new file mode 100644 index 0000000000000..ad95fadb7c0c0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/resources/log4j.properties @@ -0,0 +1,31 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +# Set everything to be logged to the file target/unit-tests.log +log4j.rootCategory=INFO, file +log4j.appender.file=org.apache.log4j.FileAppender +log4j.appender.file.append=true +log4j.appender.file.file=target/unit-tests.log +log4j.appender.file.layout=org.apache.log4j.PatternLayout +log4j.appender.file.layout.ConversionPattern=%d{yy/MM/dd HH:mm:ss.SSS} %t %p %c{1}: %m%n + +# Ignore messages below warning level from a few verbose libraries. +log4j.logger.com.sun.jersey=WARN +log4j.logger.org.apache.hadoop=WARN +log4j.logger.org.eclipse.jetty=WARN +log4j.logger.org.mortbay=WARN +log4j.logger.org.spark_project.jetty=WARN From 0010a571319333166087ea797f121907f46e30f7 Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Tue, 13 Jun 2017 18:08:45 -0700 Subject: [PATCH 135/225] Use node affinity to launch executors on preferred nodes benefitting from data locality (#316) * Use node affinity to launch executors on data local nodes * Fix comment style * Use JSON object mapper * Address review comments * Fix a style issue * Clean up and add a TODO * Fix style issue * Address review comments --- .../spark/deploy/kubernetes/constants.scala | 1 + .../KubernetesClusterSchedulerBackend.scala | 103 ++++++++++++++++-- 2 files changed, 96 insertions(+), 8 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index 9c46d7494b187..f2f1136e54fe4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -90,6 +90,7 @@ package object constants { private[spark] val INIT_CONTAINER_SECRET_VOLUME_NAME = "spark-init-secret" // Miscellaneous + private[spark] val ANNOTATION_EXECUTOR_NODE_AFFINITY = "scheduler.alpha.kubernetes.io/affinity" private[spark] val DRIVER_CONTAINER_NAME = "spark-kubernetes-driver" private[spark] val KUBERNETES_MASTER_INTERNAL_URL = "https://kubernetes.default.svc" private[spark] val MEMORY_OVERHEAD_FACTOR = 0.10 diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 6ab6480d848a2..85ce5f01200b2 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -17,9 +17,12 @@ package org.apache.spark.scheduler.cluster.kubernetes import java.io.Closeable +import java.net.InetAddress import java.util.concurrent.TimeUnit import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action @@ -177,16 +180,18 @@ private[spark] class KubernetesClusterSchedulerBackend( .newDaemonSingleThreadScheduledExecutor("kubernetes-pod-allocator") private val allocatorRunnable: Runnable = new Runnable { + override def run(): Unit = { if (totalRegisteredExecutors.get() < runningExecutorPods.size) { logDebug("Waiting for pending executors before scaling") } else if (totalExpectedExecutors.get() <= runningExecutorPods.size) { logDebug("Maximum allowed executor limit reached. Not scaling up further.") } else { + val nodeToLocalTaskCount = getNodesWithLocalTaskCounts RUNNING_EXECUTOR_PODS_LOCK.synchronized { for (i <- 0 until math.min( totalExpectedExecutors.get - runningExecutorPods.size, podAllocationSize)) { - runningExecutorPods += allocateNewExecutorPod() + runningExecutorPods += allocateNewExecutorPod(nodeToLocalTaskCount) logInfo( s"Requesting a new executor, total executors is now ${runningExecutorPods.size}") } @@ -195,6 +200,8 @@ private[spark] class KubernetesClusterSchedulerBackend( } } + private val objectMapper = new ObjectMapper().registerModule(DefaultScalaModule) + private def getShuffleClient(): KubernetesExternalShuffleClient = { new KubernetesExternalShuffleClient( SparkTransportConf.fromSparkConf(conf, "shuffle"), @@ -283,7 +290,70 @@ private[spark] class KubernetesClusterSchedulerBackend( } } - private def allocateNewExecutorPod(): (String, Pod) = { + /** + * @return A map of K8s cluster nodes to the number of tasks that could benefit from data + * locality if an executor launches on the cluster node. + */ + private def getNodesWithLocalTaskCounts() : Map[String, Int] = { + val executorPodsWithIPs = EXECUTOR_PODS_BY_IPS_LOCK.synchronized { + executorPodsByIPs.values.toList // toList makes a defensive copy. + } + val nodeToLocalTaskCount = mutable.Map[String, Int]() ++ + KubernetesClusterSchedulerBackend.this.synchronized { + hostToLocalTaskCount + } + for (pod <- executorPodsWithIPs) { + // Remove cluster nodes that are running our executors already. + // TODO: This prefers spreading out executors across nodes. In case users want + // consolidating executors on fewer nodes, introduce a flag. See the spark.deploy.spreadOut + // flag that Spark standalone has: https://spark.apache.org/docs/latest/spark-standalone.html + nodeToLocalTaskCount.remove(pod.getSpec.getNodeName).nonEmpty || + nodeToLocalTaskCount.remove(pod.getStatus.getHostIP).nonEmpty || + nodeToLocalTaskCount.remove( + InetAddress.getByName(pod.getStatus.getHostIP).getCanonicalHostName).nonEmpty + } + nodeToLocalTaskCount.toMap[String, Int] + } + + private def addNodeAffinityAnnotationIfUseful(basePodBuilder: PodBuilder, + nodeToTaskCount: Map[String, Int]): PodBuilder = { + def scaleToRange(value: Int, baseMin: Double, baseMax: Double, + rangeMin: Double, rangeMax: Double): Int = + (((rangeMax - rangeMin) * (value - baseMin) / (baseMax - baseMin)) + rangeMin).toInt + + if (nodeToTaskCount.nonEmpty) { + val taskTotal = nodeToTaskCount.foldLeft(0)(_ + _._2) + // Normalize to node affinity weights in 1 to 100 range. + val nodeToWeight = nodeToTaskCount.map{ + case (node, taskCount) => + (node, scaleToRange(taskCount, 1, taskTotal, rangeMin = 1, rangeMax = 100))} + val weightToNodes = nodeToWeight.groupBy(_._2).mapValues(_.keys) + // @see https://kubernetes.io/docs/concepts/configuration/assign-pod-node + val nodeAffinityJson = objectMapper.writeValueAsString(SchedulerAffinity(NodeAffinity( + preferredDuringSchedulingIgnoredDuringExecution = + for ((weight, nodes) <- weightToNodes) yield + WeightedPreference(weight, + Preference(Array(MatchExpression("kubernetes.io/hostname", "In", nodes)))) + ))) + // TODO: Use non-annotation syntax when we switch to K8s version 1.6. + logDebug(s"Adding nodeAffinity as annotation $nodeAffinityJson") + basePodBuilder.editMetadata() + .addToAnnotations(ANNOTATION_EXECUTOR_NODE_AFFINITY, nodeAffinityJson) + .endMetadata() + } else { + basePodBuilder + } + } + + /** + * Allocates a new executor pod + * + * @param nodeToLocalTaskCount A map of K8s cluster nodes to the number of tasks that could + * benefit from data locality if an executor launches on the cluster + * node. + * @return A tuple of the new executor name and the Pod data structure. + */ + private def allocateNewExecutorPod(nodeToLocalTaskCount: Map[String, Int]): (String, Pod) = { val executorId = EXECUTOR_ID_COUNTER.incrementAndGet().toString val name = s"$executorPodNamePrefix-exec-$executorId" @@ -393,14 +463,19 @@ private[spark] class KubernetesClusterSchedulerBackend( .endSpec() } }.getOrElse(basePodBuilder) - val resolvedExecutorPod = executorInitContainerBootstrap.map { bootstrap => - bootstrap.bootstrapInitContainerAndVolumes( - "executor", - withMaybeShuffleConfigPodBuilder) - }.getOrElse(withMaybeShuffleConfigPodBuilder) + + val executorInitContainerPodBuilder = executorInitContainerBootstrap.map { + bootstrap => + bootstrap.bootstrapInitContainerAndVolumes( + "executor", + withMaybeShuffleConfigPodBuilder) + }.getOrElse(withMaybeShuffleConfigPodBuilder) + + val resolvedExecutorPodBuilder = addNodeAffinityAnnotationIfUseful( + executorInitContainerPodBuilder, nodeToLocalTaskCount) try { - (executorId, kubernetesClient.pods.create(resolvedExecutorPod.build())) + (executorId, kubernetesClient.pods.create(resolvedExecutorPodBuilder.build())) } catch { case throwable: Throwable => logError("Failed to allocate executor pod.", throwable) @@ -521,3 +596,15 @@ private object KubernetesClusterSchedulerBackend { private val DEFAULT_STATIC_PORT = 10000 private val EXECUTOR_ID_COUNTER = new AtomicLong(0L) } + +/** + * These case classes model K8s node affinity syntax for + * preferredDuringSchedulingIgnoredDuringExecution. + * @see https://kubernetes.io/docs/concepts/configuration/assign-pod-node + */ +case class SchedulerAffinity(nodeAffinity: NodeAffinity) +case class NodeAffinity(preferredDuringSchedulingIgnoredDuringExecution: + Iterable[WeightedPreference]) +case class WeightedPreference(weight: Int, preference: Preference) +case class Preference(matchExpressions: Array[MatchExpression]) +case class MatchExpression(key: String, operator: String, values: Iterable[String]) From efb50814104bf4e45c3daa3518274875f0f3e6af Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 13 Jun 2017 21:52:19 -0700 Subject: [PATCH 136/225] Fix sbt build. (#344) * Fix sbt build. - Remove extraneous Feign dependency that we no longer use in submission v2. - Exclude Jackson from various modules to ensure every Jackson module is forced to 2.6.5. - Fix a linter error only caught by sbt. - Add Kubernetes modules to various parts of the SBT infrastructure * Actually remove feign * Actually exclude Jackson from kubernetes client. --- dev/deps/spark-deps-hadoop-2.2 | 186 +++++++++++++++++ dev/deps/spark-deps-hadoop-2.3 | 193 ++++++++++++++++++ dev/deps/spark-deps-hadoop-2.4 | 193 ++++++++++++++++++ dev/deps/spark-deps-hadoop-2.6 | 21 +- dev/deps/spark-deps-hadoop-2.7 | 21 +- dev/sparktestsupport/modules.py | 8 + dev/test-dependencies.sh | 2 +- pom.xml | 53 +++-- resource-managers/kubernetes/core/pom.xml | 42 ++-- ...riverPodKubernetesCredentialsMounter.scala | 2 +- 10 files changed, 677 insertions(+), 44 deletions(-) create mode 100644 dev/deps/spark-deps-hadoop-2.2 create mode 100644 dev/deps/spark-deps-hadoop-2.3 create mode 100644 dev/deps/spark-deps-hadoop-2.4 diff --git a/dev/deps/spark-deps-hadoop-2.2 b/dev/deps/spark-deps-hadoop-2.2 new file mode 100644 index 0000000000000..beebcccd0b1f6 --- /dev/null +++ b/dev/deps/spark-deps-hadoop-2.2 @@ -0,0 +1,186 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +automaton-1.11-8.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.12.jar +breeze_2.11-0.12.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-3.0.0.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-crypto-1.0.0.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.5.jar +commons-logging-1.1.3.jar +commons-math-2.1.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.12.1.1.jar +eigenbase-properties-1.1.5.jar +generex-1.0.1.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.2.0.jar +hadoop-auth-2.2.0.jar +hadoop-client-2.2.0.jar +hadoop-common-2.2.0.jar +hadoop-hdfs-2.2.0.jar +hadoop-mapreduce-client-app-2.2.0.jar +hadoop-mapreduce-client-common-2.2.0.jar +hadoop-mapreduce-client-core-2.2.0.jar +hadoop-mapreduce-client-jobclient-2.2.0.jar +hadoop-mapreduce-client-shuffle-2.2.0.jar +hadoop-yarn-api-2.2.0.jar +hadoop-yarn-client-2.2.0.jar +hadoop-yarn-common-2.2.0.jar +hadoop-yarn-server-common-2.2.0.jar +hadoop-yarn-server-web-proxy-2.2.0.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +janino-3.0.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.7.1.jar +jetty-util-6.1.26.jar +jline-2.12.1.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar +leveldbjni-all-1.8.jar +libfb303-0.9.3.jar +libthrift-0.9.3.jar +log4j-1.2.17.jar +logging-interceptor-3.6.0.jar +lz4-1.3.0.jar +mesos-1.0.0-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar +minlog-1.3.0.jar +netty-3.8.0.Final.jar +netty-all-4.0.42.Final.jar +objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.4.jar +pyrolite-4.13.jar +retrofit-2.2.0.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +shapeless_2.11-2.0.0.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar +snappy-0.2.jar +snappy-java-1.1.2.6.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.2.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xmlenc-0.52.jar +xz-1.0.jar +zjsonpatch-0.3.0.jar +zookeeper-3.4.5.jar diff --git a/dev/deps/spark-deps-hadoop-2.3 b/dev/deps/spark-deps-hadoop-2.3 new file mode 100644 index 0000000000000..88419f18eef46 --- /dev/null +++ b/dev/deps/spark-deps-hadoop-2.3 @@ -0,0 +1,193 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +automaton-1.11-8.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.12.jar +breeze_2.11-0.12.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-3.0.0.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-crypto-1.0.0.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.5.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.12.1.1.jar +eigenbase-properties-1.1.5.jar +generex-1.0.1.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.3.0.jar +hadoop-auth-2.3.0.jar +hadoop-client-2.3.0.jar +hadoop-common-2.3.0.jar +hadoop-hdfs-2.3.0.jar +hadoop-mapreduce-client-app-2.3.0.jar +hadoop-mapreduce-client-common-2.3.0.jar +hadoop-mapreduce-client-core-2.3.0.jar +hadoop-mapreduce-client-jobclient-2.3.0.jar +hadoop-mapreduce-client-shuffle-2.3.0.jar +hadoop-yarn-api-2.3.0.jar +hadoop-yarn-client-2.3.0.jar +hadoop-yarn-common-2.3.0.jar +hadoop-yarn-server-common-2.3.0.jar +hadoop-yarn-server-web-proxy-2.3.0.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +janino-3.0.0.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.1.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar +leveldbjni-all-1.8.jar +libfb303-0.9.3.jar +libthrift-0.9.3.jar +log4j-1.2.17.jar +logging-interceptor-3.6.0.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-1.0.0-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.42.Final.jar +objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.4.jar +pyrolite-4.13.jar +retrofit-2.2.0.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +shapeless_2.11-2.0.0.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar +snappy-0.2.jar +snappy-java-1.1.2.6.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.2.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xmlenc-0.52.jar +xz-1.0.jar +zjsonpatch-0.3.0.jar +zookeeper-3.4.5.jar diff --git a/dev/deps/spark-deps-hadoop-2.4 b/dev/deps/spark-deps-hadoop-2.4 new file mode 100644 index 0000000000000..644426626aceb --- /dev/null +++ b/dev/deps/spark-deps-hadoop-2.4 @@ -0,0 +1,193 @@ +JavaEWAH-0.3.2.jar +RoaringBitmap-0.5.11.jar +ST4-4.0.4.jar +activation-1.1.1.jar +antlr-2.7.7.jar +antlr-runtime-3.4.jar +antlr4-runtime-4.5.3.jar +aopalliance-1.0.jar +aopalliance-repackaged-2.4.0-b34.jar +apache-log4j-extras-1.2.17.jar +arpack_combined_all-0.1.jar +automaton-1.11-8.jar +avro-1.7.7.jar +avro-ipc-1.7.7.jar +avro-mapred-1.7.7-hadoop2.jar +base64-2.3.8.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar +bonecp-0.8.0.RELEASE.jar +breeze-macros_2.11-0.12.jar +breeze_2.11-0.12.jar +calcite-avatica-1.2.0-incubating.jar +calcite-core-1.2.0-incubating.jar +calcite-linq4j-1.2.0-incubating.jar +chill-java-0.8.0.jar +chill_2.11-0.8.0.jar +commons-beanutils-1.7.0.jar +commons-beanutils-core-1.8.0.jar +commons-cli-1.2.jar +commons-codec-1.10.jar +commons-collections-3.2.2.jar +commons-compiler-3.0.0.jar +commons-compress-1.4.1.jar +commons-configuration-1.6.jar +commons-crypto-1.0.0.jar +commons-dbcp-1.4.jar +commons-digester-1.8.jar +commons-httpclient-3.1.jar +commons-io-2.4.jar +commons-lang-2.6.jar +commons-lang3-3.5.jar +commons-logging-1.1.3.jar +commons-math3-3.4.1.jar +commons-net-2.2.jar +commons-pool-1.5.4.jar +compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar +core-1.1.2.jar +curator-client-2.4.0.jar +curator-framework-2.4.0.jar +curator-recipes-2.4.0.jar +datanucleus-api-jdo-3.2.6.jar +datanucleus-core-3.2.10.jar +datanucleus-rdbms-3.2.9.jar +derby-10.12.1.1.jar +eigenbase-properties-1.1.5.jar +generex-1.0.1.jar +guava-14.0.1.jar +guice-3.0.jar +guice-servlet-3.0.jar +hadoop-annotations-2.4.1.jar +hadoop-auth-2.4.1.jar +hadoop-client-2.4.1.jar +hadoop-common-2.4.1.jar +hadoop-hdfs-2.4.1.jar +hadoop-mapreduce-client-app-2.4.1.jar +hadoop-mapreduce-client-common-2.4.1.jar +hadoop-mapreduce-client-core-2.4.1.jar +hadoop-mapreduce-client-jobclient-2.4.1.jar +hadoop-mapreduce-client-shuffle-2.4.1.jar +hadoop-yarn-api-2.4.1.jar +hadoop-yarn-client-2.4.1.jar +hadoop-yarn-common-2.4.1.jar +hadoop-yarn-server-common-2.4.1.jar +hadoop-yarn-server-web-proxy-2.4.1.jar +hk2-api-2.4.0-b34.jar +hk2-locator-2.4.0-b34.jar +hk2-utils-2.4.0-b34.jar +httpclient-4.5.2.jar +httpcore-4.4.4.jar +ivy-2.4.0.jar +jackson-annotations-2.6.5.jar +jackson-core-2.6.5.jar +jackson-core-asl-1.9.13.jar +jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar +jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar +jackson-module-paranamer-2.6.5.jar +jackson-module-scala_2.11-2.6.5.jar +janino-3.0.0.jar +java-xmlbuilder-1.0.jar +javassist-3.18.1-GA.jar +javax.annotation-api-1.2.jar +javax.inject-1.jar +javax.inject-2.4.0-b34.jar +javax.servlet-api-3.1.0.jar +javax.ws.rs-api-2.0.1.jar +javolution-5.5.1.jar +jaxb-api-2.2.2.jar +jcl-over-slf4j-1.7.16.jar +jdo-api-3.0.1.jar +jersey-client-2.22.2.jar +jersey-common-2.22.2.jar +jersey-container-servlet-2.22.2.jar +jersey-container-servlet-core-2.22.2.jar +jersey-guava-2.22.2.jar +jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar +jersey-server-2.22.2.jar +jets3t-0.9.3.jar +jetty-6.1.26.jar +jetty-util-6.1.26.jar +jline-2.12.1.jar +joda-time-2.9.3.jar +jodd-core-3.5.2.jar +jpam-1.1.jar +json4s-ast_2.11-3.2.11.jar +json4s-core_2.11-3.2.11.jar +json4s-jackson_2.11-3.2.11.jar +jsr305-1.3.9.jar +jta-1.1.jar +jtransforms-2.4.0.jar +jul-to-slf4j-1.7.16.jar +kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar +leveldbjni-all-1.8.jar +libfb303-0.9.3.jar +libthrift-0.9.3.jar +log4j-1.2.17.jar +logging-interceptor-3.6.0.jar +lz4-1.3.0.jar +mail-1.4.7.jar +mesos-1.0.0-shaded-protobuf.jar +metrics-core-3.1.2.jar +metrics-graphite-3.1.2.jar +metrics-json-3.1.2.jar +metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar +minlog-1.3.0.jar +mx4j-3.0.2.jar +netty-3.8.0.Final.jar +netty-all-4.0.42.Final.jar +objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar +opencsv-2.3.jar +oro-2.0.8.jar +osgi-resource-locator-1.0.1.jar +paranamer-2.3.jar +parquet-column-1.8.1.jar +parquet-common-1.8.1.jar +parquet-encoding-1.8.1.jar +parquet-format-2.3.0-incubating.jar +parquet-hadoop-1.8.1.jar +parquet-hadoop-bundle-1.6.0.jar +parquet-jackson-1.8.1.jar +pmml-model-1.2.15.jar +pmml-schema-1.2.15.jar +protobuf-java-2.5.0.jar +py4j-0.10.4.jar +pyrolite-4.13.jar +retrofit-2.2.0.jar +scala-compiler-2.11.8.jar +scala-library-2.11.8.jar +scala-parser-combinators_2.11-1.0.4.jar +scala-reflect-2.11.8.jar +scala-xml_2.11-1.0.2.jar +scalap-2.11.8.jar +shapeless_2.11-2.0.0.jar +slf4j-api-1.7.16.jar +slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar +snappy-0.2.jar +snappy-java-1.1.2.6.jar +spire-macros_2.11-0.7.4.jar +spire_2.11-0.7.4.jar +stax-api-1.0-2.jar +stax-api-1.0.1.jar +stream-2.7.0.jar +stringtemplate-3.2.1.jar +super-csv-2.2.0.jar +univocity-parsers-2.2.1.jar +validation-api-1.1.0.Final.jar +xbean-asm5-shaded-4.4.jar +xmlenc-0.52.jar +xz-1.0.jar +zjsonpatch-0.3.0.jar +zookeeper-3.4.5.jar diff --git a/dev/deps/spark-deps-hadoop-2.6 b/dev/deps/spark-deps-hadoop-2.6 index 9287bd47cf113..e03de518dad2f 100644 --- a/dev/deps/spark-deps-hadoop-2.6 +++ b/dev/deps/spark-deps-hadoop-2.6 @@ -13,11 +13,13 @@ apacheds-kerberos-codec-2.0.0-M15.jar api-asn1-api-1.0.0-M20.jar api-util-1.0.0-M20.jar arpack_combined_all-0.1.jar +automaton-1.11-8.jar avro-1.7.7.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar -bcprov-jdk15on-1.51.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.11-0.13.1.jar breeze_2.11-0.13.1.jar @@ -46,6 +48,8 @@ commons-math3-3.4.1.jar commons-net-2.2.jar commons-pool-1.5.4.jar compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar core-1.1.2.jar curator-client-2.6.0.jar curator-framework-2.6.0.jar @@ -55,6 +59,7 @@ datanucleus-core-3.2.10.jar datanucleus-rdbms-3.2.9.jar derby-10.12.1.1.jar eigenbase-properties-1.1.5.jar +generex-1.0.1.jar gson-2.2.4.jar guava-14.0.1.jar guice-3.0.jar @@ -85,8 +90,12 @@ jackson-annotations-2.6.5.jar jackson-core-2.6.5.jar jackson-core-asl-1.9.13.jar jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar jackson-jaxrs-1.9.13.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar jackson-module-paranamer-2.6.5.jar jackson-module-scala_2.11-2.6.5.jar jackson-xc-1.9.13.jar @@ -108,6 +117,7 @@ jersey-container-servlet-2.22.2.jar jersey-container-servlet-core-2.22.2.jar jersey-guava-2.22.2.jar jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar jersey-server-2.22.2.jar jets3t-0.9.3.jar jetty-6.1.26.jar @@ -124,10 +134,13 @@ jta-1.1.jar jtransforms-2.4.0.jar jul-to-slf4j-1.7.16.jar kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar leveldbjni-all-1.8.jar libfb303-0.9.3.jar libthrift-0.9.3.jar log4j-1.2.17.jar +logging-interceptor-3.6.0.jar lz4-1.3.0.jar machinist_2.11-0.6.1.jar macro-compat_2.11-1.1.1.jar @@ -137,11 +150,14 @@ metrics-core-3.1.2.jar metrics-graphite-3.1.2.jar metrics-json-3.1.2.jar metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar minlog-1.3.0.jar mx4j-3.0.2.jar netty-3.9.9.Final.jar netty-all-4.0.43.Final.jar objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar @@ -158,6 +174,7 @@ pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar py4j-0.10.4.jar pyrolite-4.13.jar +retrofit-2.2.0.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar scala-parser-combinators_2.11-1.0.4.jar @@ -167,6 +184,7 @@ scalap-2.11.8.jar shapeless_2.11-2.3.2.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar snappy-0.2.jar snappy-java-1.1.2.6.jar spire-macros_2.11-0.13.0.jar @@ -182,4 +200,5 @@ xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar xmlenc-0.52.jar xz-1.0.jar +zjsonpatch-0.3.0.jar zookeeper-3.4.6.jar diff --git a/dev/deps/spark-deps-hadoop-2.7 b/dev/deps/spark-deps-hadoop-2.7 index ab1de3d3dd8ad..76f2d711489f5 100644 --- a/dev/deps/spark-deps-hadoop-2.7 +++ b/dev/deps/spark-deps-hadoop-2.7 @@ -13,11 +13,13 @@ apacheds-kerberos-codec-2.0.0-M15.jar api-asn1-api-1.0.0-M20.jar api-util-1.0.0-M20.jar arpack_combined_all-0.1.jar +automaton-1.11-8.jar avro-1.7.7.jar avro-ipc-1.7.7.jar avro-mapred-1.7.7-hadoop2.jar base64-2.3.8.jar -bcprov-jdk15on-1.51.jar +bcpkix-jdk15on-1.54.jar +bcprov-jdk15on-1.54.jar bonecp-0.8.0.RELEASE.jar breeze-macros_2.11-0.13.1.jar breeze_2.11-0.13.1.jar @@ -46,6 +48,8 @@ commons-math3-3.4.1.jar commons-net-2.2.jar commons-pool-1.5.4.jar compress-lzf-1.0.3.jar +converter-jackson-2.2.0.jar +converter-scalars-2.2.0.jar core-1.1.2.jar curator-client-2.6.0.jar curator-framework-2.6.0.jar @@ -55,6 +59,7 @@ datanucleus-core-3.2.10.jar datanucleus-rdbms-3.2.9.jar derby-10.12.1.1.jar eigenbase-properties-1.1.5.jar +generex-1.0.1.jar gson-2.2.4.jar guava-14.0.1.jar guice-3.0.jar @@ -85,8 +90,12 @@ jackson-annotations-2.6.5.jar jackson-core-2.6.5.jar jackson-core-asl-1.9.13.jar jackson-databind-2.6.5.jar +jackson-dataformat-yaml-2.6.5.jar jackson-jaxrs-1.9.13.jar +jackson-jaxrs-base-2.6.5.jar +jackson-jaxrs-json-provider-2.6.5.jar jackson-mapper-asl-1.9.13.jar +jackson-module-jaxb-annotations-2.6.5.jar jackson-module-paranamer-2.6.5.jar jackson-module-scala_2.11-2.6.5.jar jackson-xc-1.9.13.jar @@ -108,6 +117,7 @@ jersey-container-servlet-2.22.2.jar jersey-container-servlet-core-2.22.2.jar jersey-guava-2.22.2.jar jersey-media-jaxb-2.22.2.jar +jersey-media-multipart-2.22.2.jar jersey-server-2.22.2.jar jets3t-0.9.3.jar jetty-6.1.26.jar @@ -125,10 +135,13 @@ jta-1.1.jar jtransforms-2.4.0.jar jul-to-slf4j-1.7.16.jar kryo-shaded-3.0.3.jar +kubernetes-client-2.2.13.jar +kubernetes-model-1.0.67.jar leveldbjni-all-1.8.jar libfb303-0.9.3.jar libthrift-0.9.3.jar log4j-1.2.17.jar +logging-interceptor-3.6.0.jar lz4-1.3.0.jar machinist_2.11-0.6.1.jar macro-compat_2.11-1.1.1.jar @@ -138,11 +151,14 @@ metrics-core-3.1.2.jar metrics-graphite-3.1.2.jar metrics-json-3.1.2.jar metrics-jvm-3.1.2.jar +mimepull-1.9.6.jar minlog-1.3.0.jar mx4j-3.0.2.jar netty-3.9.9.Final.jar netty-all-4.0.43.Final.jar objenesis-2.1.jar +okhttp-3.6.0.jar +okio-1.11.0.jar opencsv-2.3.jar oro-2.0.8.jar osgi-resource-locator-1.0.1.jar @@ -159,6 +175,7 @@ pmml-schema-1.2.15.jar protobuf-java-2.5.0.jar py4j-0.10.4.jar pyrolite-4.13.jar +retrofit-2.2.0.jar scala-compiler-2.11.8.jar scala-library-2.11.8.jar scala-parser-combinators_2.11-1.0.4.jar @@ -168,6 +185,7 @@ scalap-2.11.8.jar shapeless_2.11-2.3.2.jar slf4j-api-1.7.16.jar slf4j-log4j12-1.7.16.jar +snakeyaml-1.15.jar snappy-0.2.jar snappy-java-1.1.2.6.jar spire-macros_2.11-0.13.0.jar @@ -183,4 +201,5 @@ xbean-asm5-shaded-4.4.jar xercesImpl-2.9.1.jar xmlenc-0.52.jar xz-1.0.jar +zjsonpatch-0.3.0.jar zookeeper-3.4.6.jar diff --git a/dev/sparktestsupport/modules.py b/dev/sparktestsupport/modules.py index 2971e0db40496..81be243f4c6cc 100644 --- a/dev/sparktestsupport/modules.py +++ b/dev/sparktestsupport/modules.py @@ -507,6 +507,14 @@ def __hash__(self): sbt_test_goals=["mesos/test"] ) +kubernetes = Module( + name="kubernetes", + dependencies=[], + source_file_regexes=["resource-managers/kubernetes/core"], + build_profile_flags=["-Pkubernetes"], + sbt_test_goals=["kubernetes/test"] +) + # The root module is a dummy module which is used to run all of the tests. # No other modules should directly depend on this module. root = Module( diff --git a/dev/test-dependencies.sh b/dev/test-dependencies.sh index 2906a81f61cd1..b3cbe61301901 100755 --- a/dev/test-dependencies.sh +++ b/dev/test-dependencies.sh @@ -29,7 +29,7 @@ export LC_ALL=C # TODO: This would be much nicer to do in SBT, once SBT supports Maven-style resolution. # NOTE: These should match those in the release publishing script -HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pyarn -Phive" +HADOOP2_MODULE_PROFILES="-Phive-thriftserver -Pmesos -Pyarn -Phive -Pkubernetes" MVN="build/mvn" HADOOP_PROFILES=( hadoop-2.6 diff --git a/pom.xml b/pom.xml index 5752d95466bbd..06c1ff44b90c1 100644 --- a/pom.xml +++ b/pom.xml @@ -307,35 +307,43 @@ ${chill.version} - - com.netflix.feign - feign-core - ${feign.version} - - - com.netflix.feign - feign-okhttp - ${feign.version} - - - com.netflix.feign - feign-jackson - ${feign.version} - - - com.netflix.feign - feign-jaxrs - ${feign.version} - com.squareup.retrofit2 retrofit ${retrofit.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + com.squareup.retrofit2 converter-jackson ${retrofit.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + com.squareup.retrofit2 @@ -648,6 +656,11 @@ jackson-annotations ${fasterxml.jackson.version} + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + ${fasterxml.jackson.version} + diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index a227342f46771..c90a824b1b8b1 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -51,14 +51,30 @@ io.fabric8 kubernetes-client ${kubernetes.client.version} + + + com.fasterxml.jackson.core + jackson-core + + + com.fasterxml.jackson.core + jackson-databind + + + com.fasterxml.jackson.core + jackson-annotations + + + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + + + - com.netflix.feign - feign-core - - - com.netflix.feign - feign-okhttp + com.fasterxml.jackson.dataformat + jackson-dataformat-yaml + ${fasterxml.jackson.version} org.glassfish.jersey.containers @@ -68,10 +84,6 @@ org.glassfish.jersey.media jersey-media-multipart - - com.netflix.feign - feign-jackson - com.squareup.retrofit2 retrofit @@ -85,16 +97,6 @@ converter-scalars - - com.netflix.feign - feign-jaxrs - - - javax.ws.rs - jsr311-api - - - com.fasterxml.jackson.jaxrs jackson-jaxrs-json-provider diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala index b13800f389605..25e7c3b3ebd89 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala @@ -18,12 +18,12 @@ package org.apache.spark.deploy.kubernetes.submit import io.fabric8.kubernetes.api.model.{PodBuilder, Secret, SecretBuilder} import scala.collection.JavaConverters._ +import scala.language.implicitConversions import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.KubernetesCredentials import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.internal.config.OptionalConfigEntry private[spark] trait DriverPodKubernetesCredentialsMounter { From af7297e9e11fe2316dffb6c016f7dcd11d73cc0c Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 14 Jun 2017 14:03:09 -0700 Subject: [PATCH 137/225] New API for custom labels and annotations. (#346) * New API for custom labels and annotations. This APi allows for these labels and annotations to have = and , characters, which is hard to accomplish in the old scheme. * Compare correct values in requirements * Use helper method * Address comments. * Fix scalastyle * Use variable * Remove unused import --- docs/running-on-kubernetes.md | 48 +++++++++++++++++++ .../kubernetes/ConfigurationUtils.scala | 31 +++++++++++- .../spark/deploy/kubernetes/config.scala | 5 ++ .../deploy/kubernetes/submit/Client.scala | 37 ++++++++------ .../KubernetesClusterSchedulerBackend.scala | 19 ++++---- .../kubernetes/submit/ClientV2Suite.scala | 13 ++++- 6 files changed, 127 insertions(+), 26 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index c10630fc5c5c6..52d847b4420cf 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -524,10 +524,52 @@ from the other deployment modes. See the [configuration page](configuration.html (typically 6-10%). + + spark.kubernetes.driver.label.[labelKey] + (none) + + Adds a label to the driver pod, with key labelKey and the value as the configuration's value. For + example, setting spark.kubernetes.driver.label.identifier to myIdentifier will result in + the driver pod having a label with key identifier and value myIdentifier. Multiple labels + can be added by setting multiple configurations with this prefix. + + + + spark.kubernetes.driver.annotation.[annotationKey] + (none) + + Adds an annotation to the driver pod, with key annotationKey and the value as the configuration's + value. For example, setting spark.kubernetes.driver.annotation.identifier to myIdentifier + will result in the driver pod having an annotation with key identifier and value + myIdentifier. Multiple annotations can be added by setting multiple configurations with this prefix. + + + + spark.kubernetes.executor.label.[labelKey] + (none) + + Adds a label to all executor pods, with key labelKey and the value as the configuration's value. For + example, setting spark.kubernetes.executor.label.identifier to myIdentifier will result in + the executor pods having a label with key identifier and value myIdentifier. Multiple + labels can be added by setting multiple configurations with this prefix. + + + + spark.kubernetes.executor.annotation.[annotationKey] + (none) + + Adds an annotation to the executor pods, with key annotationKey and the value as the configuration's + value. For example, setting spark.kubernetes.executor.annotation.identifier to myIdentifier + will result in the executor pods having an annotation with key identifier and value + myIdentifier. Multiple annotations can be added by setting multiple configurations with this prefix. + + spark.kubernetes.driver.labels (none) + Deprecated. Use spark.kubernetes.driver.label. instead which supports = + and , characters in label values. Custom labels that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, where each label is in the format key=value. Note that Spark also adds its own labels to the driver pod for bookkeeping purposes. @@ -537,6 +579,8 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.driver.annotations (none) + Deprecated. Use spark.kubernetes.driver.annotation. instead which supports + = and , characters in annotation values. Custom annotations that will be added to the driver pod. This should be a comma-separated list of label key-value pairs, where each annotation is in the format key=value. @@ -545,6 +589,8 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.executor.labels (none) + Deprecated. Use spark.kubernetes.executor.label. instead which supports + = and , characters in label values. Custom labels that will be added to the executor pods. This should be a comma-separated list of label key-value pairs, where each label is in the format key=value. Note that Spark also adds its own labels to the executor pods for bookkeeping purposes. @@ -554,6 +600,8 @@ from the other deployment modes. See the [configuration page](configuration.html spark.kubernetes.executor.annotations (none) + Deprecated. Use spark.kubernetes.executor.annotation. instead which supports + = and , characters in annotation values. Custom annotations that will be added to the executor pods. This should be a comma-separated list of annotation key-value pairs, where each annotation is in the format key=value. diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala index f3bd598556019..f461da4809b4d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala @@ -17,9 +17,11 @@ package org.apache.spark.deploy.kubernetes -import org.apache.spark.SparkException +import org.apache.spark.{SparkConf, SparkException} +import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.OptionalConfigEntry -object ConfigurationUtils { +object ConfigurationUtils extends Logging { def parseKeyValuePairs( maybeKeyValues: Option[String], configKey: String, @@ -38,4 +40,29 @@ object ConfigurationUtils { }).toMap }).getOrElse(Map.empty[String, String]) } + + def combinePrefixedKeyValuePairsWithDeprecatedConf( + sparkConf: SparkConf, + prefix: String, + deprecatedConf: OptionalConfigEntry[String], + configType: String): Map[String, String] = { + val deprecatedKeyValuePairsString = sparkConf.get(deprecatedConf) + deprecatedKeyValuePairsString.foreach { _ => + logWarning(s"Configuration with key ${deprecatedConf.key} is deprecated. Use" + + s" configurations with prefix $prefix instead.") + } + val fromDeprecated = parseKeyValuePairs( + deprecatedKeyValuePairsString, + deprecatedConf.key, + configType) + val fromPrefix = sparkConf.getAllWithPrefix(prefix) + val combined = fromDeprecated.toSeq ++ fromPrefix + combined.groupBy(_._1).foreach { + case (key, values) => + require(values.size == 1, + s"Cannot have multiple values for a given $configType key, got key $key with" + + s" values $values") + } + combined.toMap + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index d1fd88fc880d1..70ea19e44ef8c 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -113,6 +113,11 @@ package object config extends Logging { .bytesConf(ByteUnit.MiB) .createOptional + private[spark] val KUBERNETES_DRIVER_LABEL_PREFIX = "spark.kubernetes.driver.label." + private[spark] val KUBERNETES_DRIVER_ANNOTATION_PREFIX = "spark.kubernetes.driver.annotation." + private[spark] val KUBERNETES_EXECUTOR_LABEL_PREFIX = "spark.kubernetes.executor.label." + private[spark] val KUBERNETES_EXECUTOR_ANNOTATION_PREFIX = "spark.kubernetes.executor.annotation." + private[spark] val KUBERNETES_DRIVER_LABELS = ConfigBuilder("spark.kubernetes.driver.labels") .doc("Custom labels that will be added to the driver pod. This should be a comma-separated" + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index c2e616eadc1e0..a9699d8c34b4e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -29,6 +29,7 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl import org.apache.spark.internal.Logging +import org.apache.spark.internal.config.ConfigEntry import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils @@ -82,17 +83,25 @@ private[spark] class Client( def run(): Unit = { validateNoDuplicateFileNames(sparkJars) validateNoDuplicateFileNames(sparkFiles) - val parsedCustomLabels = ConfigurationUtils.parseKeyValuePairs( - customLabels, KUBERNETES_DRIVER_LABELS.key, "labels") - require(!parsedCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + - s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + - s" operations.") - val parsedCustomAnnotations = ConfigurationUtils.parseKeyValuePairs( - customAnnotations, KUBERNETES_DRIVER_ANNOTATIONS.key, "annotations") - require(!parsedCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), s"Annotation with key" + - s" $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for Spark bookkeeping" + - s" operations.") - val allLabels = parsedCustomLabels ++ Map( + + val driverCustomLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + sparkConf, + KUBERNETES_DRIVER_LABEL_PREFIX, + KUBERNETES_DRIVER_LABELS, + "label") + require(!driverCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + + s" operations.") + + val driverCustomAnnotations = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + sparkConf, + KUBERNETES_DRIVER_ANNOTATION_PREFIX, + KUBERNETES_DRIVER_ANNOTATIONS, + "annotation") + require(!driverCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), + s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" + + s" Spark bookkeeping operations.") + val allDriverLabels = driverCustomLabels ++ Map( SPARK_APP_ID_LABEL -> kubernetesAppId, SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) @@ -138,8 +147,8 @@ private[spark] class Client( val basePod = new PodBuilder() .withNewMetadata() .withName(kubernetesDriverPodName) - .addToLabels(allLabels.asJava) - .addToAnnotations(parsedCustomAnnotations.asJava) + .addToLabels(allDriverLabels.asJava) + .addToAnnotations(driverCustomAnnotations.toMap.asJava) .addToAnnotations(SPARK_APP_NAME_ANNOTATION, appName) .endMetadata() .withNewSpec() @@ -148,7 +157,7 @@ private[spark] class Client( .endSpec() val maybeSubmittedDependencyUploader = initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(allLabels) + .provideInitContainerSubmittedDependencyUploader(allDriverLabels) val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 85ce5f01200b2..4165eb8cbd067 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -62,10 +62,11 @@ private[spark] class KubernetesClusterSchedulerBackend( org.apache.spark.internal.config.EXECUTOR_CLASS_PATH) private val executorJarsDownloadDir = conf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) - private val executorLabels = ConfigurationUtils.parseKeyValuePairs( - conf.get(KUBERNETES_EXECUTOR_LABELS), - KUBERNETES_EXECUTOR_LABELS.key, - "executor labels") + private val executorLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + conf, + KUBERNETES_EXECUTOR_LABEL_PREFIX, + KUBERNETES_EXECUTOR_LABELS, + "executor label") require( !executorLabels.contains(SPARK_APP_ID_LABEL), s"Custom executor labels cannot contain $SPARK_APP_ID_LABEL as it is" + @@ -74,11 +75,13 @@ private[spark] class KubernetesClusterSchedulerBackend( !executorLabels.contains(SPARK_EXECUTOR_ID_LABEL), s"Custom executor labels cannot contain $SPARK_EXECUTOR_ID_LABEL as it is reserved for" + s" Spark.") - private val executorAnnotations = ConfigurationUtils.parseKeyValuePairs( - conf.get(KUBERNETES_EXECUTOR_ANNOTATIONS), - KUBERNETES_EXECUTOR_ANNOTATIONS.key, - "executor annotations") + private val executorAnnotations = + ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + conf, + KUBERNETES_EXECUTOR_ANNOTATION_PREFIX, + KUBERNETES_EXECUTOR_ANNOTATIONS, + "executor annotation") private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val dockerImagePullPolicy = conf.get(DOCKER_IMAGE_PULL_POLICY) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 193f36a7423b2..3945bef5bcfb8 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -49,12 +49,17 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val APP_ID = "spark-id" private val CUSTOM_LABEL_KEY = "customLabel" private val CUSTOM_LABEL_VALUE = "customLabelValue" + private val DEPRECATED_CUSTOM_LABEL_KEY = "deprecatedCustomLabel" + private val DEPRECATED_CUSTOM_LABEL_VALUE = "deprecatedCustomLabelValue" private val ALL_EXPECTED_LABELS = Map( CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, + DEPRECATED_CUSTOM_LABEL_KEY -> DEPRECATED_CUSTOM_LABEL_VALUE, SPARK_APP_ID_LABEL -> APP_ID, SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) private val CUSTOM_ANNOTATION_KEY = "customAnnotation" private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" + private val DEPRECATED_CUSTOM_ANNOTATION_KEY = "deprecatedCustomAnnotation" + private val DEPRECATED_CUSTOM_ANNOTATION_VALUE = "deprecatedCustomAnnotationValue" private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" private val INIT_CONTAINER_SECRET_DATA = Map("secret-key" -> "secret-data") private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" @@ -94,8 +99,11 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .set(DRIVER_DOCKER_IMAGE, CUSTOM_DRIVER_IMAGE) .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB.toLong) .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEAD_MB.toLong) - .set(KUBERNETES_DRIVER_LABELS, s"$CUSTOM_LABEL_KEY=$CUSTOM_LABEL_VALUE") - .set(KUBERNETES_DRIVER_ANNOTATIONS, s"$CUSTOM_ANNOTATION_KEY=$CUSTOM_ANNOTATION_VALUE") + .set(KUBERNETES_DRIVER_LABELS, s"$DEPRECATED_CUSTOM_LABEL_KEY=$DEPRECATED_CUSTOM_LABEL_VALUE") + .set(KUBERNETES_DRIVER_ANNOTATIONS, + s"$DEPRECATED_CUSTOM_ANNOTATION_KEY=$DEPRECATED_CUSTOM_ANNOTATION_VALUE") + .set(s"$KUBERNETES_DRIVER_LABEL_PREFIX$CUSTOM_LABEL_KEY", CUSTOM_LABEL_VALUE) + .set(s"$KUBERNETES_DRIVER_ANNOTATION_PREFIX$CUSTOM_ANNOTATION_KEY", CUSTOM_ANNOTATION_VALUE) .set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) .set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, DRIVER_JAVA_OPTIONS) private val EXECUTOR_INIT_CONF_KEY = "executor-init-conf" @@ -444,6 +452,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private def podHasCorrectAnnotations(pod: Pod): Boolean = { val expectedAnnotations = Map( + DEPRECATED_CUSTOM_ANNOTATION_KEY -> DEPRECATED_CUSTOM_ANNOTATION_VALUE, CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, SPARK_APP_NAME_ANNOTATION -> APP_NAME, BOOTSTRAPPED_POD_ANNOTATION -> TRUE) From 38287f65d017ebe55a642866db18fb4b4db2e671 Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Thu, 22 Jun 2017 16:57:12 +0800 Subject: [PATCH 138/225] Allow spark driver find shuffle pods in specified namespace (#357) The conf property spark.kubernetes.shuffle.namespace is used to specify the namesapce of shuffle pods. In normal cases, only one "shuffle daemonset" is deployed and shared by all spark pods. The spark driver should be able to list and watch shuffle pods in the namespace specified by user. Note: by default, spark driver pod doesn't have authority to list and watch shuffle pods in another namespace. Some action is needed to grant it the authority. For example, below ABAC policy works. ``` {"apiVersion": "abac.authorization.kubernetes.io/v1beta1", "kind": "Policy", "spec": {"group": "system:serviceaccounts", "namespace": "SHUFFLE_NAMESPACE", "resource": "pods", "readonly": true}} ``` --- .../spark/scheduler/cluster/kubernetes/ShufflePodCache.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala index 53b4e745ce7c7..15e02664589eb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/ShufflePodCache.scala @@ -37,7 +37,8 @@ private[spark] class ShufflePodCache ( def start(): Unit = { // seed the initial cache. - val pods = client.pods().withLabels(dsLabels.asJava).list() + val pods = client.pods() + .inNamespace(dsNamespace).withLabels(dsLabels.asJava).list() pods.getItems.asScala.foreach { pod => if (Readiness.isReady(pod)) { @@ -50,6 +51,7 @@ private[spark] class ShufflePodCache ( watcher = client .pods() + .inNamespace(dsNamespace) .withLabels(dsLabels.asJava) .watch(new Watcher[Pod] { override def eventReceived(action: Watcher.Action, p: Pod): Unit = { From 168ef0a0520f8f7c836d68bd01f34582af4884bd Mon Sep 17 00:00:00 2001 From: Chun Chen Date: Fri, 23 Jun 2017 14:03:24 +0800 Subject: [PATCH 139/225] Bypass init-containers when possible (#348) --- .../deploy/kubernetes/submit/Client.scala | 41 ++++++++++--------- ...riverInitContainerComponentsProvider.scala | 29 +++++++++---- .../submit/InitContainerBundle.scala | 26 ++++++++++++ .../submit/KubernetesFileUtils.scala | 4 ++ .../kubernetes/submit/ClientV2Suite.scala | 24 ++++------- 5 files changed, 80 insertions(+), 44 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index a9699d8c34b4e..ac3a51e74f838 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -156,31 +156,33 @@ private[spark] class Client( .addToContainers(driverContainer) .endSpec() - val maybeSubmittedDependencyUploader = initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(allDriverLabels) - val maybeSubmittedResourceIdentifiers = maybeSubmittedDependencyUploader.map { uploader => + val maybeSubmittedResourceIdentifiers = initContainerComponentsProvider + .provideInitContainerSubmittedDependencyUploader(allDriverLabels) + .map { uploader => SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) } - val maybeSecretBuilder = initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceIdentifiers.map(_.secrets())) - val maybeSubmittedDependenciesSecret = maybeSecretBuilder.map(_.build()) - val initContainerConfigMap = initContainerComponentsProvider - .provideInitContainerConfigMapBuilder(maybeSubmittedResourceIdentifiers.map(_.ids())) - .build() - val podWithInitContainer = initContainerComponentsProvider - .provideInitContainerBootstrap() - .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod) + val maybeSubmittedDependenciesSecret = initContainerComponentsProvider + .provideSubmittedDependenciesSecretBuilder( + maybeSubmittedResourceIdentifiers.map(_.secrets())) + .map(_.build()) val containerLocalizedFilesResolver = initContainerComponentsProvider - .provideContainerLocalizedFilesResolver() + .provideContainerLocalizedFilesResolver() val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() - val executorInitContainerConfiguration = initContainerComponentsProvider - .provideExecutorInitContainerConfiguration() - val sparkConfWithExecutorInit = executorInitContainerConfiguration - .configureSparkConfForExecutorInitContainer(sparkConf) + val initContainerBundler = initContainerComponentsProvider + .provideInitContainerBundle(maybeSubmittedResourceIdentifiers.map(_.ids()), + resolvedSparkJars ++ resolvedSparkFiles) + + val podWithInitContainer = initContainerBundler.map( + _.sparkPodInitContainerBootstrap + .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod)) + .getOrElse(basePod) + val sparkConfWithExecutorInit = initContainerBundler.map( + _.executorInitContainerConfiguration + .configureSparkConfForExecutorInitContainer(sparkConf)) + .getOrElse(sparkConf) val credentialsMounter = kubernetesCredentialsMounterProvider .getDriverPodKubernetesCredentialsMounter() val credentialsSecret = credentialsMounter.createCredentialsSecret() @@ -224,7 +226,8 @@ private[spark] class Client( .watch(loggingPodStatusWatcher)) { _ => val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) try { - val driverOwnedResources = Seq(initContainerConfigMap) ++ + val driverOwnedResources = initContainerBundler.map( + _.sparkInitContainerConfigMap).toSeq ++ maybeSubmittedDependenciesSecret.toSeq ++ credentialsSecret.toSeq val driverPodOwnerReference = new OwnerReferenceBuilder() diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index cfc61e193dcff..cc1837cce6736 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.deploy.kubernetes.submit +import io.fabric8.kubernetes.api.model.ConfigMap + import org.apache.spark.{SparkConf, SSLOptions} import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} import org.apache.spark.deploy.kubernetes.config._ @@ -30,17 +32,15 @@ import org.apache.spark.util.Utils */ private[spark] trait DriverInitContainerComponentsProvider { - def provideInitContainerConfigMapBuilder( - maybeSubmittedResourceIds: Option[SubmittedResourceIds]) - : SparkInitContainerConfigMapBuilder def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver - def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration def provideInitContainerSubmittedDependencyUploader( driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] def provideSubmittedDependenciesSecretBuilder( maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) : Option[SubmittedDependencySecretBuilder] def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap + def provideInitContainerBundle(maybeSubmittedResourceIds: Option[SubmittedResourceIds], + uris: Iterable[String]): Option[InitContainerBundle] } private[spark] class DriverInitContainerComponentsProviderImpl( @@ -105,9 +105,8 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) - override def provideInitContainerConfigMapBuilder( - maybeSubmittedResourceIds: Option[SubmittedResourceIds]) - : SparkInitContainerConfigMapBuilder = { + private def provideInitContainerConfigMap( + maybeSubmittedResourceIds: Option[SubmittedResourceIds]): ConfigMap = { val submittedDependencyConfigPlugin = for { stagingServerUri <- maybeResourceStagingServerUri jarsResourceId <- maybeSubmittedResourceIds.map(_.jarsResourceId) @@ -136,7 +135,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( filesDownloadPath, configMapName, configMapKey, - submittedDependencyConfigPlugin) + submittedDependencyConfigPlugin).build() } override def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver = { @@ -144,7 +143,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( sparkJars, sparkFiles, jarsDownloadPath, filesDownloadPath) } - override def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { + private def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { new ExecutorInitContainerConfigurationImpl( maybeSecretName, INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, @@ -202,4 +201,16 @@ private[spark] class DriverInitContainerComponentsProviderImpl( configMapKey, resourceStagingServerSecretPlugin) } + + override def provideInitContainerBundle( + maybeSubmittedResourceIds: Option[SubmittedResourceIds], + uris: Iterable[String]): Option[InitContainerBundle] = { + val containerLocalizedFilesResolver = provideContainerLocalizedFilesResolver() + // Bypass init-containers if `spark.jars` and `spark.files` is empty or only has `local://` URIs + if (KubernetesFileUtils.getNonContainerLocalFiles(uris).nonEmpty) { + Some(InitContainerBundle(provideInitContainerConfigMap(maybeSubmittedResourceIds), + provideInitContainerBootstrap(), + provideExecutorInitContainerConfiguration())) + } else None + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala new file mode 100644 index 0000000000000..ba44f794d5811 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import io.fabric8.kubernetes.api.model.ConfigMap + +import org.apache.spark.deploy.kubernetes.{SparkPodInitContainerBootstrap} + +case class InitContainerBundle( + sparkInitContainerConfigMap: ConfigMap, + sparkPodInitContainerBootstrap: SparkPodInitContainerBootstrap, + executorInitContainerConfiguration: ExecutorInitContainerConfiguration) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala index 1b0af3fa9fb01..d688bf29808fb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala @@ -33,6 +33,10 @@ private[spark] object KubernetesFileUtils { filterUriStringsByScheme(uris, _ == "local") } + def getNonContainerLocalFiles(uris: Iterable[String]): Iterable[String] = { + filterUriStringsByScheme(uris, _ != "local") + } + def getOnlySubmitterLocalFiles(uris: Iterable[String]): Iterable[String] = { filterUriStringsByScheme(uris, _ == "file") } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 3945bef5bcfb8..8992a56e20c80 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -123,8 +123,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val CREDENTIALS_SET_CONF = "spark.kubernetes.driverCredentials.provided" private val CREDENTIALS_SET_ANNOTATION = "credentials-set" - @Mock - private var initContainerConfigMapBuilder: SparkInitContainerConfigMapBuilder = _ @Mock private var containerLocalizedFilesResolver: ContainerLocalizedFilesResolver = _ @Mock @@ -173,12 +171,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { }) when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver()) .thenReturn(containerLocalizedFilesResolver) - when(initContainerComponentsProvider.provideExecutorInitContainerConfiguration()) - .thenReturn(executorInitContainerConfiguration) when(submittedDependenciesSecretBuilder.build()) .thenReturn(INIT_CONTAINER_SECRET) - when(initContainerConfigMapBuilder.build()) - .thenReturn(INIT_CONTAINER_CONFIG_MAP) when(kubernetesClient.pods()).thenReturn(podOps) when(podOps.create(any())).thenAnswer(new Answer[Pod] { override def answer(invocation: InvocationOnMock): Pod = { @@ -214,9 +208,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(initContainerComponentsProvider .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets()))) .thenReturn(Some(submittedDependenciesSecretBuilder)) - when(initContainerComponentsProvider - .provideInitContainerConfigMapBuilder(Some(SUBMITTED_RESOURCES.ids()))) - .thenReturn(initContainerConfigMapBuilder) + when(initContainerComponentsProvider.provideInitContainerBundle(Some(SUBMITTED_RESOURCES.ids()), + RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES)) + .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, + initContainerBootstrap, executorInitContainerConfiguration))) runAndVerifyDriverPodHasCorrectProperties() val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) @@ -232,8 +227,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verifyConfigMapWasCreated(createdResources) verify(submittedDependencyUploader).uploadJars() verify(submittedDependencyUploader).uploadFiles() - verify(initContainerComponentsProvider) - .provideInitContainerConfigMapBuilder(Some(SUBMITTED_RESOURCES.ids())) verify(initContainerComponentsProvider) .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets())) } @@ -250,8 +243,6 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verifyConfigMapWasCreated(createdResources) verify(submittedDependencyUploader, times(0)).uploadJars() verify(submittedDependencyUploader, times(0)).uploadFiles() - verify(initContainerComponentsProvider) - .provideInitContainerConfigMapBuilder(None) verify(initContainerComponentsProvider) .provideSubmittedDependenciesSecretBuilder(None) } @@ -321,9 +312,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(initContainerComponentsProvider .provideSubmittedDependenciesSecretBuilder(None)) .thenReturn(None) - when(initContainerComponentsProvider - .provideInitContainerConfigMapBuilder(None)) - .thenReturn(initContainerConfigMapBuilder) + when(initContainerComponentsProvider.provideInitContainerBundle(None, RESOLVED_SPARK_JARS ++ + RESOLVED_SPARK_FILES)) + .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, + initContainerBootstrap, executorInitContainerConfiguration))) } private def expectationsForNoMountedCredentials(): Unit = { From cdf6c36df37b5f14d827752341ab897916974e6b Mon Sep 17 00:00:00 2001 From: sandflee Date: Fri, 23 Jun 2017 15:48:51 +0800 Subject: [PATCH 140/225] Config for hard cpu limit on pods; default unlimited (#356) --- docs/running-on-kubernetes.md | 14 ++++++++++++++ .../apache/spark/deploy/kubernetes/config.scala | 12 ++++++++++++ .../spark/deploy/kubernetes/submit/Client.scala | 17 ++++++++++++++++- .../KubernetesClusterSchedulerBackend.scala | 17 ++++++++++++++++- 4 files changed, 58 insertions(+), 2 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 52d847b4420cf..3a50860f826c5 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -718,6 +718,20 @@ from the other deployment modes. See the [configuration page](configuration.html Docker image pull policy used when pulling Docker images with Kubernetes. + + spark.kubernetes.driver.limit.cores + (none) + + Specify the hard cpu limit for the driver pod + + + + spark.kubernetes.executor.limit.cores + (none) + + Specify the hard cpu limit for a single executor pod + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index 70ea19e44ef8c..e1c1ab9d459fc 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -485,6 +485,18 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_DRIVER_LIMIT_CORES = + ConfigBuilder("spark.kubernetes.driver.limit.cores") + .doc("Specify the hard cpu limit for the driver pod") + .stringConf + .createOptional + + private[spark] val KUBERNETES_EXECUTOR_LIMIT_CORES = + ConfigBuilder("spark.kubernetes.executor.limit.cores") + .doc("Specify the hard cpu limit for a single executor pod") + .stringConf + .createOptional + private[spark] def resolveK8sMaster(rawMasterString: String): String = { if (!rawMasterString.startsWith("k8s://")) { throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index ac3a51e74f838..8220127eac449 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -64,6 +64,7 @@ private[spark] class Client( // CPU settings private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") + private val driverLimitCores = sparkConf.getOption(KUBERNETES_DRIVER_LIMIT_CORES.key) // Memory settings private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) @@ -139,7 +140,6 @@ private[spark] class Client( .endEnv() .withNewResources() .addToRequests("cpu", driverCpuQuantity) - .addToLimits("cpu", driverCpuQuantity) .addToRequests("memory", driverMemoryQuantity) .addToLimits("memory", driverMemoryLimitQuantity) .endResources() @@ -156,6 +156,21 @@ private[spark] class Client( .addToContainers(driverContainer) .endSpec() + driverLimitCores.map { + limitCores => + val driverCpuLimitQuantity = new QuantityBuilder(false) + .withAmount(limitCores) + .build() + basePod + .editSpec() + .editFirstContainer() + .editResources + .addToLimits("cpu", driverCpuLimitQuantity) + .endResources() + .endContainer() + .endSpec() + } + val maybeSubmittedResourceIdentifiers = initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(allDriverLabels) .map { uploader => diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 4165eb8cbd067..31cf929b94e8b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -108,6 +108,7 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorMemoryWithOverhead = executorMemoryMb + memoryOverheadMb private val executorCores = conf.getOption("spark.executor.cores").getOrElse("1") + private val executorLimitCores = conf.getOption(KUBERNETES_EXECUTOR_LIMIT_CORES.key) private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("kubernetes-executor-requests")) @@ -438,7 +439,6 @@ private[spark] class KubernetesClusterSchedulerBackend( .addToRequests("memory", executorMemoryQuantity) .addToLimits("memory", executorMemoryLimitQuantity) .addToRequests("cpu", executorCpuQuantity) - .addToLimits("cpu", executorCpuQuantity) .endResources() .addAllToEnv(requiredEnv.asJava) .addToEnv(executorExtraClasspathEnv.toSeq: _*) @@ -446,6 +446,21 @@ private[spark] class KubernetesClusterSchedulerBackend( .endContainer() .endSpec() + executorLimitCores.map { + limitCores => + val executorCpuLimitQuantity = new QuantityBuilder(false) + .withAmount(limitCores) + .build() + basePodBuilder + .editSpec() + .editFirstContainer() + .editResources + .addToLimits("cpu", executorCpuLimitQuantity) + .endResources() + .endContainer() + .endSpec() + } + val withMaybeShuffleConfigPodBuilder = shuffleServiceConfig .map { config => config.shuffleDirs.foldLeft(basePodBuilder) { (builder, dir) => From 9dc5eed9846e6c0e3145c6222d6a8912da1094c6 Mon Sep 17 00:00:00 2001 From: Yinan Li Date: Thu, 29 Jun 2017 12:14:42 -0700 Subject: [PATCH 141/225] Allow number of executor cores to have fractional values (#361) This commit tries to solve issue #359 by allowing the `spark.executor.cores` configuration key to take fractional values, e.g., 0.5 or 1.5. The value is used to specify the cpu request when creating the executor pods, which is allowed to be fractional by Kubernetes. When the value is passed to the executor process through the environment variable `SPARK_EXECUTOR_CORES`, the value is rounded up to the closest integer as required by the `CoarseGrainedExecutorBackend`. Signed-off-by: Yinan Li --- .../kubernetes/KubernetesClusterSchedulerBackend.scala | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 31cf929b94e8b..d880cee315c0d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -107,7 +107,7 @@ private[spark] class KubernetesClusterSchedulerBackend( MEMORY_OVERHEAD_MIN)) private val executorMemoryWithOverhead = executorMemoryMb + memoryOverheadMb - private val executorCores = conf.getOption("spark.executor.cores").getOrElse("1") + private val executorCores = conf.getDouble("spark.executor.cores", 1d) private val executorLimitCores = conf.getOption(KUBERNETES_EXECUTOR_LIMIT_CORES.key) private implicit val requestExecutorContext = ExecutionContext.fromExecutorService( @@ -377,7 +377,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .withAmount(s"${executorMemoryWithOverhead}M") .build() val executorCpuQuantity = new QuantityBuilder(false) - .withAmount(executorCores) + .withAmount(executorCores.toString) .build() val executorExtraClasspathEnv = executorExtraClasspath.map { cp => new EnvVarBuilder() @@ -388,7 +388,8 @@ private[spark] class KubernetesClusterSchedulerBackend( val requiredEnv = Seq( (ENV_EXECUTOR_PORT, executorPort.toString), (ENV_DRIVER_URL, driverUrl), - (ENV_EXECUTOR_CORES, executorCores), + // Executor backend expects integral value for executor cores, so round it up to an int. + (ENV_EXECUTOR_CORES, math.ceil(executorCores).toInt.toString), (ENV_EXECUTOR_MEMORY, executorMemoryString), (ENV_APPLICATION_ID, applicationId()), (ENV_EXECUTOR_ID, executorId), From 442490add532d5d8b87e7ed14a8d0ed0a68f01bd Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Mon, 3 Jul 2017 12:25:59 -0700 Subject: [PATCH 142/225] Python Bindings for launching PySpark Jobs from the JVM (#364) * Adding PySpark Submit functionality. Launching Python from JVM * Addressing scala idioms related to PR351 * Removing extends Logging which was necessary for LogInfo * Refactored code to leverage the ContainerLocalizedFileResolver * Modified Unit tests so that they would pass * Modified Unit Test input to pass Unit Tests * Setup working environent for integration tests for PySpark * Comment out Python thread logic until Jenkins has python in Python * Modifying PythonExec to pass on Jenkins * Modifying python exec * Added unit tests to ClientV2 and refactored to include pyspark submission resources * Modified unit test check * Scalastyle * PR 348 file conflicts * Refactored unit tests and styles * further scala stylzing and logic * Modified unit tests to be more specific towards Class in question * Removed space delimiting for methods * Submission client redesign to use a step-based builder pattern. This change overhauls the underlying architecture of the submission client, but it is intended to entirely preserve existing behavior of Spark applications. Therefore users will find this to be an invisible change. The philosophy behind this design is to reconsider the breakdown of the submission process. It operates off the abstraction of "submission steps", which are transformation functions that take the previous state of the driver and return the new state of the driver. The driver's state includes its Spark configurations and the Kubernetes resources that will be used to deploy it. Such a refactor moves away from a features-first API design, which considers different containers to serve a set of features. The previous design, for example, had a container files resolver API object that returned different resolutions of the dependencies added by the user. However, it was up to the main Client to know how to intelligently invoke all of those APIs. Therefore the API surface area of the file resolver became untenably large and it was not intuitive of how it was to be used or extended. This design changes the encapsulation layout; every module is now responsible for changing the driver specification directly. An orchestrator builds the correct chain of steps and hands it to the client, which then calls it verbatim. The main client then makes any final modifications that put the different pieces of the driver together, particularly to attach the driver container itself to the pod and to apply the Spark configuration as command-line arguments. * Don't add the init-container step if all URIs are local. * Python arguments patch + tests + docs * Revert "Python arguments patch + tests + docs" This reverts commit 4533df2a03e2a8922988b0bd01691ad1f26e5d03. * Revert "Don't add the init-container step if all URIs are local." This reverts commit e103225d9ff54ca17692279cc6a7999f9b8c3265. * Revert "Submission client redesign to use a step-based builder pattern." This reverts commit 5499f6ddf9b42c0526f1dc053317afb38dc71294. * style changes * space for styling --- README.md | 1 + .../org/apache/spark/deploy/SparkSubmit.scala | 14 +- docs/running-on-kubernetes.md | 26 ++++ .../spark/deploy/kubernetes/constants.scala | 2 + .../deploy/kubernetes/submit/Client.scala | 77 ++++++---- .../ContainerLocalizedFilesResolver.scala | 39 +++-- ...riverInitContainerComponentsProvider.scala | 25 ++-- .../DriverPodKubernetesFileMounter.scala | 55 +++++++ .../submit/PythonSubmissionResources.scala | 75 ++++++++++ .../kubernetes/submit/ClientV2Suite.scala | 139 +++++++++++++++--- ...ContainerLocalizedFilesResolverSuite.scala | 24 +++ .../PythonSubmissionResourcesSuite.scala | 109 ++++++++++++++ .../src/main/docker/driver-py/Dockerfile | 48 ++++++ .../src/main/docker/executor-py/Dockerfile | 46 ++++++ .../src/main/docker/init-container/Dockerfile | 2 +- .../docker/resource-staging-server/Dockerfile | 3 +- .../main/docker/shuffle-service/Dockerfile | 2 +- .../kubernetes/integration-tests/pom.xml | 102 +++++++++++++ .../integration-tests/src/test/python/pi.py | 46 ++++++ .../integrationtest/KubernetesSuite.scala | 40 ++++- .../docker/SparkDockerImageBuilder.scala | 29 +++- 21 files changed, 831 insertions(+), 73 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile create mode 100644 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile create mode 100755 resource-managers/kubernetes/integration-tests/src/test/python/pi.py diff --git a/README.md b/README.md index 352820a084c6f..a5109f4e12897 100644 --- a/README.md +++ b/README.md @@ -24,6 +24,7 @@ We've been asked by an Apache Spark Committer to work outside of the Apache infr This is a collaborative effort by several folks from different companies who are interested in seeing this feature be successful. Companies active in this project include (alphabetically): +- Bloomberg - Google - Haiwen - Hyperpilot diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 9c6174901ac40..abd9d58795149 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -346,8 +346,8 @@ object SparkSubmit extends CommandLineUtils { (clusterManager, deployMode) match { case (KUBERNETES, CLIENT) => printErrorAndExit("Client mode is currently not supported for Kubernetes.") - case (KUBERNETES, CLUSTER) if args.isPython || args.isR => - printErrorAndExit("Kubernetes does not currently support python or R applications.") + case (KUBERNETES, CLUSTER) if args.isR => + printErrorAndExit("Kubernetes does not currently support R applications.") case (STANDALONE, CLUSTER) if args.isPython => printErrorAndExit("Cluster deploy mode is currently not supported for python " + "applications on standalone clusters.") @@ -636,8 +636,14 @@ object SparkSubmit extends CommandLineUtils { if (isKubernetesCluster) { childMainClass = "org.apache.spark.deploy.kubernetes.submit.Client" - childArgs += args.primaryResource - childArgs += args.mainClass + if (args.isPython) { + childArgs += args.primaryResource + childArgs += "org.apache.spark.deploy.PythonRunner" + childArgs += args.pyFiles + } else { + childArgs += args.primaryResource + childArgs += args.mainClass + } childArgs ++= args.childArgs } diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 3a50860f826c5..2b4e9a6f96af1 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -180,6 +180,32 @@ The above mechanism using `kubectl proxy` can be used when we have authenticatio kubernetes-client library does not support. Authentication using X509 Client Certs and OAuth tokens is currently supported. +### Running PySpark + +Running PySpark on Kubernetes leverages the same spark-submit logic when launching on Yarn and Mesos. +Python files can be distributed by including, in the conf, `--py-files` + +Below is an example submission: + + +``` + bin/spark-submit \ + --deploy-mode cluster \ + --master k8s://http://127.0.0.1:8001 \ + --kubernetes-namespace default \ + --conf spark.executor.memory=500m \ + --conf spark.driver.memory=1G \ + --conf spark.driver.cores=1 \ + --conf spark.executor.cores=1 \ + --conf spark.executor.instances=1 \ + --conf spark.app.name=spark-pi \ + --conf spark.kubernetes.driver.docker.image=spark-driver-py:latest \ + --conf spark.kubernetes.executor.docker.image=spark-executor-py:latest \ + --conf spark.kubernetes.initcontainer.docker.image=spark-init:latest \ + --py-files local:///opt/spark/examples/src/main/python/sort.py \ + local:///opt/spark/examples/src/main/python/pi.py 100 +``` + ## Dynamic Executor Scaling Spark on Kubernetes supports Dynamic Allocation with cluster mode. This mode requires running diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala index f2f1136e54fe4..92f051b2ac298 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/constants.scala @@ -67,6 +67,8 @@ package object constants { private[spark] val ENV_DRIVER_ARGS = "SPARK_DRIVER_ARGS" private[spark] val ENV_DRIVER_JAVA_OPTS = "SPARK_DRIVER_JAVA_OPTS" private[spark] val ENV_MOUNTED_FILES_DIR = "SPARK_MOUNTED_FILES_DIR" + private[spark] val ENV_PYSPARK_FILES = "PYSPARK_FILES" + private[spark] val ENV_PYSPARK_PRIMARY = "PYSPARK_PRIMARY" // Bootstrapping dependencies with the init-container private[spark] val INIT_CONTAINER_ANNOTATION = "pod.beta.kubernetes.io/init-containers" diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 8220127eac449..781ecbd6c5416 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -47,11 +47,11 @@ private[spark] class Client( appName: String, kubernetesResourceNamePrefix: String, kubernetesAppId: String, + mainAppResource: String, + pythonResource: Option[PythonSubmissionResourcesImpl], mainClass: String, sparkConf: SparkConf, appArgs: Array[String], - sparkJars: Seq[String], - sparkFiles: Seq[String], waitForAppCompletion: Boolean, kubernetesClient: KubernetesClient, initContainerComponentsProvider: DriverInitContainerComponentsProvider, @@ -82,9 +82,7 @@ private[spark] class Client( org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) def run(): Unit = { - validateNoDuplicateFileNames(sparkJars) - validateNoDuplicateFileNames(sparkFiles) - + val arguments = (pythonResource map {p => p.arguments}).getOrElse(appArgs) val driverCustomLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( sparkConf, KUBERNETES_DRIVER_LABEL_PREFIX, @@ -136,7 +134,7 @@ private[spark] class Client( .endEnv() .addNewEnv() .withName(ENV_DRIVER_ARGS) - .withValue(appArgs.mkString(" ")) + .withValue(arguments.mkString(" ")) .endEnv() .withNewResources() .addToRequests("cpu", driverCpuQuantity) @@ -182,10 +180,13 @@ private[spark] class Client( .map(_.build()) val containerLocalizedFilesResolver = initContainerComponentsProvider - .provideContainerLocalizedFilesResolver() + .provideContainerLocalizedFilesResolver(mainAppResource) val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() - + val resolvedPySparkFiles = containerLocalizedFilesResolver.resolveSubmittedPySparkFiles() + val resolvedPrimaryPySparkResource = pythonResource.map { + p => p.primaryPySparkResource(containerLocalizedFilesResolver) + }.getOrElse("") val initContainerBundler = initContainerComponentsProvider .provideInitContainerBundle(maybeSubmittedResourceIdentifiers.map(_.ids()), resolvedSparkJars ++ resolvedSparkFiles) @@ -221,7 +222,7 @@ private[spark] class Client( val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { case (confKey, confValue) => s"-D$confKey=$confValue" }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPod = podWithInitContainerAndMountedCreds.editSpec() + val resolvedDriverPodBuilder = podWithInitContainerAndMountedCreds.editSpec() .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) .addNewEnv() .withName(ENV_MOUNTED_CLASSPATH) @@ -233,7 +234,15 @@ private[spark] class Client( .endEnv() .endContainer() .endSpec() - .build() + val driverPodFileMounter = initContainerComponentsProvider.provideDriverPodFileMounter() + val resolvedDriverPod = pythonResource.map { + p => p.driverPodWithPySparkEnvs( + driverPodFileMounter, + resolvedPrimaryPySparkResource, + resolvedPySparkFiles.mkString(","), + driverContainer.getName, + resolvedDriverPodBuilder + )}.getOrElse(resolvedDriverPodBuilder.build()) Utils.tryWithResource( kubernetesClient .pods() @@ -271,17 +280,6 @@ private[spark] class Client( } } } - - private def validateNoDuplicateFileNames(allFiles: Seq[String]): Unit = { - val fileNamesToUris = allFiles.map { file => - (new File(Utils.resolveURI(file).getPath).getName, file) - } - fileNamesToUris.groupBy(_._1).foreach { - case (fileName, urisWithFileName) => - require(urisWithFileName.size == 1, "Cannot add multiple files with the same name, but" + - s" file name $fileName is shared by all of these URIs: $urisWithFileName") - } - } } private[spark] object Client { @@ -292,22 +290,34 @@ private[spark] object Client { val appArgs = args.drop(2) run(sparkConf, mainAppResource, mainClass, appArgs) } - def run( sparkConf: SparkConf, mainAppResource: String, mainClass: String, appArgs: Array[String]): Unit = { + val isPython = mainAppResource.endsWith(".py") + val pythonResource: Option[PythonSubmissionResourcesImpl] = + if (isPython) { + Option(new PythonSubmissionResourcesImpl(mainAppResource, appArgs)) + } else None + // Since you might need jars for SQL UDFs in PySpark + def sparkJarFilter(): Seq[String] = + pythonResource.map {p => p.sparkJars}.getOrElse( + Option(mainAppResource) + .filterNot(_ == SparkLauncher.NO_RESOURCE) + .toSeq) val sparkJars = sparkConf.getOption("spark.jars") .map(_.split(",")) - .getOrElse(Array.empty[String]) ++ - Option(mainAppResource) - .filterNot(_ == SparkLauncher.NO_RESOURCE) - .toSeq + .getOrElse(Array.empty[String]) ++ sparkJarFilter() val launchTime = System.currentTimeMillis val sparkFiles = sparkConf.getOption("spark.files") .map(_.split(",")) .getOrElse(Array.empty[String]) + val pySparkFilesOption = pythonResource.map {p => p.pySparkFiles} + validateNoDuplicateFileNames(sparkJars) + validateNoDuplicateFileNames(sparkFiles) + pySparkFilesOption.foreach {b => validateNoDuplicateFileNames(b)} + val pySparkFiles = pySparkFilesOption.getOrElse(Array.empty[String]) val appName = sparkConf.getOption("spark.app.name").getOrElse("spark") // The resource name prefix is derived from the application name, making it easy to connect the // names of the Kubernetes resources from e.g. Kubectl or the Kubernetes dashboard to the @@ -326,6 +336,7 @@ private[spark] object Client { namespace, sparkJars, sparkFiles, + pySparkFiles, sslOptionsProvider.getSslOptions) Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient( master, @@ -346,11 +357,11 @@ private[spark] object Client { appName, kubernetesResourceNamePrefix, kubernetesAppId, + mainAppResource, + pythonResource, mainClass, sparkConf, appArgs, - sparkJars, - sparkFiles, waitForAppCompletion, kubernetesClient, initContainerComponentsProvider, @@ -358,4 +369,14 @@ private[spark] object Client { loggingPodStatusWatcher).run() } } + private def validateNoDuplicateFileNames(allFiles: Seq[String]): Unit = { + val fileNamesToUris = allFiles.map { file => + (new File(Utils.resolveURI(file).getPath).getName, file) + } + fileNamesToUris.groupBy(_._1).foreach { + case (fileName, urisWithFileName) => + require(urisWithFileName.size == 1, "Cannot add multiple files with the same name, but" + + s" file name $fileName is shared by all of these URIs: $urisWithFileName") + } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala index c635484c4c124..c31aa5f306bea 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala @@ -24,14 +24,19 @@ private[spark] trait ContainerLocalizedFilesResolver { def resolveSubmittedAndRemoteSparkJars(): Seq[String] def resolveSubmittedSparkJars(): Seq[String] def resolveSubmittedSparkFiles(): Seq[String] + def resolveSubmittedPySparkFiles(): Seq[String] + def resolvePrimaryResourceFile(): String } private[spark] class ContainerLocalizedFilesResolverImpl( sparkJars: Seq[String], sparkFiles: Seq[String], + pySparkFiles: Seq[String], + primaryPyFile: String, jarsDownloadPath: String, filesDownloadPath: String) extends ContainerLocalizedFilesResolver { + override def resolveSubmittedAndRemoteSparkJars(): Seq[String] = { sparkJars.map { jar => val jarUri = Utils.resolveURI(jar) @@ -53,16 +58,30 @@ private[spark] class ContainerLocalizedFilesResolverImpl( resolveSubmittedFiles(sparkFiles, filesDownloadPath) } - private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { - files.map { file => - val fileUri = Utils.resolveURI(file) - Option(fileUri.getScheme).getOrElse("file") match { - case "file" => - val fileName = new File(fileUri.getPath).getName - s"$downloadPath/$fileName" - case _ => - file - } + override def resolveSubmittedPySparkFiles(): Seq[String] = { + def filterMainResource(x: String) = x match { + case `primaryPyFile` => None + case _ => Some(resolveFile(x, filesDownloadPath)) + } + pySparkFiles.flatMap(x => filterMainResource(x)) + } + + override def resolvePrimaryResourceFile(): String = { + Option(primaryPyFile).map(p => resolveFile(p, filesDownloadPath)).getOrElse("") + } + + private def resolveFile(file: String, downloadPath: String) = { + val fileUri = Utils.resolveURI(file) + Option(fileUri.getScheme).getOrElse("file") match { + case "file" => + val fileName = new File(fileUri.getPath).getName + s"$downloadPath/$fileName" + case _ => + file } } + + private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { + files.map { file => resolveFile(file, downloadPath) } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala index cc1837cce6736..6e185d2c069f6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala @@ -32,13 +32,15 @@ import org.apache.spark.util.Utils */ private[spark] trait DriverInitContainerComponentsProvider { - def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver + def provideContainerLocalizedFilesResolver( + mainAppResource: String): ContainerLocalizedFilesResolver def provideInitContainerSubmittedDependencyUploader( driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] def provideSubmittedDependenciesSecretBuilder( maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) : Option[SubmittedDependencySecretBuilder] def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap + def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter def provideInitContainerBundle(maybeSubmittedResourceIds: Option[SubmittedResourceIds], uris: Iterable[String]): Option[InitContainerBundle] } @@ -49,6 +51,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( namespace: String, sparkJars: Seq[String], sparkFiles: Seq[String], + pySparkFiles: Seq[String], resourceStagingServerExternalSslOptions: SSLOptions) extends DriverInitContainerComponentsProvider { @@ -104,6 +107,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) + private val pySparkSubmitted = KubernetesFileUtils.getOnlySubmitterLocalFiles(pySparkFiles) private def provideInitContainerConfigMap( maybeSubmittedResourceIds: Option[SubmittedResourceIds]): ConfigMap = { @@ -130,7 +134,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( } new SparkInitContainerConfigMapBuilderImpl( sparkJars, - sparkFiles, + sparkFiles ++ pySparkSubmitted, jarsDownloadPath, filesDownloadPath, configMapName, @@ -138,9 +142,10 @@ private[spark] class DriverInitContainerComponentsProviderImpl( submittedDependencyConfigPlugin).build() } - override def provideContainerLocalizedFilesResolver(): ContainerLocalizedFilesResolver = { + override def provideContainerLocalizedFilesResolver(mainAppResource: String) + : ContainerLocalizedFilesResolver = { new ContainerLocalizedFilesResolverImpl( - sparkJars, sparkFiles, jarsDownloadPath, filesDownloadPath) + sparkJars, sparkFiles, pySparkFiles, mainAppResource, jarsDownloadPath, filesDownloadPath) } private def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { @@ -159,7 +164,7 @@ private[spark] class DriverInitContainerComponentsProviderImpl( namespace, stagingServerUri, sparkJars, - sparkFiles, + sparkFiles ++ pySparkSubmitted, resourceStagingServerExternalSslOptions, RetrofitClientFactoryImpl) } @@ -201,13 +206,15 @@ private[spark] class DriverInitContainerComponentsProviderImpl( configMapKey, resourceStagingServerSecretPlugin) } - + override def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter = { + new DriverPodKubernetesFileMounterImpl() + } override def provideInitContainerBundle( maybeSubmittedResourceIds: Option[SubmittedResourceIds], uris: Iterable[String]): Option[InitContainerBundle] = { - val containerLocalizedFilesResolver = provideContainerLocalizedFilesResolver() - // Bypass init-containers if `spark.jars` and `spark.files` is empty or only has `local://` URIs - if (KubernetesFileUtils.getNonContainerLocalFiles(uris).nonEmpty) { + // Bypass init-containers if `spark.jars` and `spark.files` and '--py-rilfes' + // is empty or only has `local://` URIs + if ((KubernetesFileUtils.getNonContainerLocalFiles(uris) ++ pySparkSubmitted).nonEmpty) { Some(InitContainerBundle(provideInitContainerConfigMap(maybeSubmittedResourceIds), provideInitContainerBootstrap(), provideExecutorInitContainerConfiguration())) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala new file mode 100644 index 0000000000000..cc0ef0eedb457 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala @@ -0,0 +1,55 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import io.fabric8.kubernetes.api.model.{Container, PodBuilder} + +import org.apache.spark.deploy.kubernetes.constants._ + + /** + * Trait that is responsible for providing full file-paths dynamically after + * the filesDownloadPath has been defined. The file-names are then stored in the + * environmental variables in the driver-pod. + */ +private[spark] trait DriverPodKubernetesFileMounter { + def addPySparkFiles(primaryFile: String, pySparkFiles: String, + mainContainerName: String, originalPodSpec: PodBuilder) : PodBuilder +} + +private[spark] class DriverPodKubernetesFileMounterImpl() + extends DriverPodKubernetesFileMounter { + override def addPySparkFiles( + primaryFile: String, + pySparkFiles: String, + mainContainerName: String, + originalPodSpec: PodBuilder): PodBuilder = { + + originalPodSpec + .editSpec() + .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) + .addNewEnv() + .withName(ENV_PYSPARK_PRIMARY) + .withValue(primaryFile) + .endEnv() + .addNewEnv() + .withName(ENV_PYSPARK_FILES) + .withValue(pySparkFiles) + .endEnv() + .endContainer() + .endSpec() + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala new file mode 100644 index 0000000000000..c61e930a2b97f --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala @@ -0,0 +1,75 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import io.fabric8.kubernetes.api.model.{Pod, PodBuilder} + +private[spark] trait PythonSubmissionResources { + def sparkJars: Seq[String] + def pySparkFiles: Array[String] + def arguments: Array[String] + def primaryPySparkResource(containerLocalizedFilesResolver: ContainerLocalizedFilesResolver) + : String + def driverPodWithPySparkEnvs( + driverPodFileMounter: DriverPodKubernetesFileMounter, + resolvedPrimaryPySparkResource: String, + resolvedPySparkFiles: String, + driverContainerName: String, + driverPodBuilder: PodBuilder): Pod +} + +private[spark] class PythonSubmissionResourcesImpl( + private val mainAppResource: String, + private val appArgs: Array[String] ) extends PythonSubmissionResources { + + private val pyFiles: Array[String] = { + Option(appArgs(0)).map(a => mainAppResource +: a.split(",")) + .getOrElse(Array(mainAppResource)) + } + + override def sparkJars: Seq[String] = Seq.empty[String] + + override def pySparkFiles: Array[String] = pyFiles + + override def arguments: Array[String] = { + pyFiles.toList match { + case Nil => appArgs + case a :: b => a match { + case _ if a == mainAppResource && b == Nil => appArgs + case _ => appArgs.drop(1) + } + } + } + override def primaryPySparkResource( + containerLocalizedFilesResolver: ContainerLocalizedFilesResolver) : String = + containerLocalizedFilesResolver.resolvePrimaryResourceFile() + + override def driverPodWithPySparkEnvs( + driverPodFileMounter: DriverPodKubernetesFileMounter, + resolvedPrimaryPySparkResource: String, + resolvedPySparkFiles: String, + driverContainerName: String, + driverPodBuilder: PodBuilder) : Pod = { + driverPodFileMounter + .addPySparkFiles( + resolvedPrimaryPySparkResource, + resolvedPySparkFiles, + driverContainerName, + driverPodBuilder) + .build() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala index 8992a56e20c80..a58a37691f4eb 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala @@ -18,7 +18,7 @@ package org.apache.spark.deploy.kubernetes.submit import java.io.File -import io.fabric8.kubernetes.api.model.{ConfigMap, ConfigMapBuilder, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} +import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.client.{KubernetesClient, Watch} import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} import org.hamcrest.{BaseMatcher, Description} @@ -27,10 +27,10 @@ import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq} import org.mockito.Mockito.{times, verify, when} import org.mockito.invocation.InvocationOnMock import org.mockito.stubbing.Answer -import org.scalatest.BeforeAndAfter +import org.scalatest.{BeforeAndAfter, Matchers} + import scala.collection.JavaConverters._ import scala.collection.mutable - import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} import org.apache.spark.deploy.kubernetes.{KubernetesExternalShuffleService, KubernetesShuffleBlockHandler, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ @@ -63,6 +63,7 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" private val INIT_CONTAINER_SECRET_DATA = Map("secret-key" -> "secret-data") private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" + private val PYSPARK_APP_ARGS = Array(null, "500") private val APP_ARGS = Array("3", "20") private val SPARK_JARS = Seq( "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") @@ -72,6 +73,20 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") private val SPARK_FILES = Seq( "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") + private val PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "file:///app/files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py", + "file:///app/files/file5.py") + private val RESOLVED_PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "/var/spark-data/spark-files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py") + private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" + private val RESOLVED_PYSPARK_PRIMARY_FILE = "/var/spark-data/spark-file/file5.py" + private val RESOLVED_SPARK_FILES = Seq( "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") private val INIT_CONTAINER_SECRET = new SecretBuilder() @@ -138,7 +153,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { @Mock private var kubernetesClient: KubernetesClient = _ @Mock - private var podOps: MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ + private var podOps: MixedOperation[ + Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ private type ResourceListOps = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ HasMetadata, java.lang.Boolean] @Mock @@ -146,6 +162,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { @Mock private var credentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider = _ @Mock + private var fileMounter: DriverPodKubernetesFileMounter = _ + @Mock private var credentialsMounter: DriverPodKubernetesCredentialsMounter = _ @Mock private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _ @@ -169,8 +187,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .endMetadata() } }) - when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver()) - .thenReturn(containerLocalizedFilesResolver) + when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver( + any[String])).thenReturn(containerLocalizedFilesResolver) + when(initContainerComponentsProvider.provideDriverPodFileMounter()) + .thenReturn(fileMounter) when(submittedDependenciesSecretBuilder.build()) .thenReturn(INIT_CONTAINER_SECRET) when(kubernetesClient.pods()).thenReturn(podOps) @@ -178,14 +198,30 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { override def answer(invocation: InvocationOnMock): Pod = { new PodBuilder(invocation.getArgumentAt(0, classOf[Pod])) .editMetadata() - .withUid(DRIVER_POD_UID) + .withUid(DRIVER_POD_UID) .endMetadata() - .withKind(DRIVER_POD_KIND) + .withKind(DRIVER_POD_KIND) .withApiVersion(DRIVER_POD_API_VERSION) .build() } }) when(podOps.withName(s"$APP_RESOURCE_PREFIX-driver")).thenReturn(namedPodResource) + when(fileMounter.addPySparkFiles( + mockitoEq(RESOLVED_PYSPARK_PRIMARY_FILE), + mockitoEq(RESOLVED_PYSPARK_FILES.mkString(",")), + any[String], + any())).thenAnswer( new Answer[PodBuilder] { + override def answer(invocation: InvocationOnMock) : PodBuilder = { + invocation.getArgumentAt(3, classOf[PodBuilder]) + .editMetadata() + .withUid(DRIVER_POD_UID) + .withName(s"$APP_RESOURCE_PREFIX-driver") + .addToLabels("pyspark-test", "true") + .endMetadata() + .withKind(DRIVER_POD_KIND) + .withApiVersion(DRIVER_POD_API_VERSION) + } + }) when(namedPodResource.watch(loggingPodStatusWatcher)).thenReturn(watch) when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) @@ -193,6 +229,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { .thenReturn(RESOLVED_SPARK_JARS) when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) .thenReturn(RESOLVED_SPARK_FILES) + when(containerLocalizedFilesResolver.resolvePrimaryResourceFile()) + .thenReturn(RESOLVED_PYSPARK_PRIMARY_FILE) + when(containerLocalizedFilesResolver.resolveSubmittedPySparkFiles()) + .thenReturn(RESOLVED_PYSPARK_FILES) when(executorInitContainerConfiguration.configureSparkConfForExecutorInitContainer(SPARK_CONF)) .thenReturn(SPARK_CONF_WITH_EXECUTOR_INIT_CONF) when(kubernetesClient.resourceList(anyVararg[HasMetadata]())).thenReturn(resourceListOps) @@ -208,9 +248,10 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(initContainerComponentsProvider .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets()))) .thenReturn(Some(submittedDependenciesSecretBuilder)) - when(initContainerComponentsProvider.provideInitContainerBundle(Some(SUBMITTED_RESOURCES.ids()), - RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES)) - .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, + when(initContainerComponentsProvider.provideInitContainerBundle(mockitoEq( + Option(SUBMITTED_RESOURCES.ids())), + mockitoEq(RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES))) + .thenReturn(Option(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, initContainerBootstrap, executorInitContainerConfiguration))) runAndVerifyDriverPodHasCorrectProperties() val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) @@ -292,11 +333,11 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { APP_NAME, APP_RESOURCE_PREFIX, APP_ID, + "", + None, MAIN_CLASS, SPARK_CONF, APP_ARGS, - SPARK_JARS, - SPARK_FILES, true, kubernetesClient, initContainerComponentsProvider, @@ -305,6 +346,20 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { verify(loggingPodStatusWatcher).awaitCompletion() } + test("Mounting environmental variables correctly onto Driver Pod for PySpark Jobs") { + expectationsForNoMountedCredentials() + expectationsForNoDependencyUploader() + expectationsForNoSparkJarsOrFiles() + runAndVerifyDriverPodHasCorrectPySparkProperties() + } + + private def expectationsForNoSparkJarsOrFiles(): Unit = { + when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) + .thenReturn(Nil) + when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) + .thenReturn(Nil) + } + private def expectationsForNoDependencyUploader(): Unit = { when(initContainerComponentsProvider .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) @@ -312,8 +367,8 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { when(initContainerComponentsProvider .provideSubmittedDependenciesSecretBuilder(None)) .thenReturn(None) - when(initContainerComponentsProvider.provideInitContainerBundle(None, RESOLVED_SPARK_JARS ++ - RESOLVED_SPARK_FILES)) + when(initContainerComponentsProvider.provideInitContainerBundle(mockitoEq(None), + mockitoEq(RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES))) .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, initContainerBootstrap, executorInitContainerConfiguration))) } @@ -373,16 +428,28 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { } } + private def runAndVerifyDriverPodHasCorrectPySparkProperties(): Unit = { + when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver( + mockitoEq(PYSPARK_PRIMARY_FILE))).thenReturn(containerLocalizedFilesResolver) + when(initContainerComponentsProvider.provideInitContainerBundle( + any[Option[SubmittedResourceIds]], any[Iterable[String]])) + .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, + initContainerBootstrap, executorInitContainerConfiguration))) + runAndVerifyPySparkPodMatchesPredicate { p => + Option(p).exists(pod => containerHasCorrectPySparkEnvs(pod)) + } + } + private def runAndVerifyPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { new Client( APP_NAME, APP_RESOURCE_PREFIX, APP_ID, + "", + None, MAIN_CLASS, SPARK_CONF, APP_ARGS, - SPARK_JARS, - SPARK_FILES, false, kubernetesClient, initContainerComponentsProvider, @@ -434,6 +501,15 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { expectedBasicEnvs.toSet.subsetOf(envs.toSet) } + private def containerHasCorrectPySparkEnvs(pod: Pod): Boolean = { + val driverPodLabels = + pod.getMetadata.getLabels.asScala.map(env => (env._1.toString, env._2.toString)) + val expectedBasicLabels = Map( + "pyspark-test" -> "true", + "spark-role" -> "driver") + expectedBasicLabels.toSet.subsetOf(driverPodLabels.toSet) + } + private def containerHasCorrectBasicContainerConfiguration(pod: Pod): Boolean = { val containers = pod.getSpec.getContainers.asScala containers.size == 1 && @@ -450,4 +526,33 @@ class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { BOOTSTRAPPED_POD_ANNOTATION -> TRUE) pod.getMetadata.getAnnotations.asScala == expectedAnnotations } + + private def runAndVerifyPySparkPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { + new Client( + APP_NAME, + APP_RESOURCE_PREFIX, + APP_ID, + PYSPARK_PRIMARY_FILE, + Option(new PythonSubmissionResourcesImpl(PYSPARK_PRIMARY_FILE, PYSPARK_APP_ARGS)), + MAIN_CLASS, + SPARK_CONF, + PYSPARK_APP_ARGS, + false, + kubernetesClient, + initContainerComponentsProvider, + credentialsMounterProvider, + loggingPodStatusWatcher).run() + val podMatcher = new BaseMatcher[Pod] { + override def matches(o: scala.Any): Boolean = { + o match { + case p: Pod => pred(p) + case _ => false + } + } + override def describeTo(description: Description): Unit = {} + } + verify(podOps).create(argThat(podMatcher)) + } } + + diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala index ca5cd1fff9b74..7e51abcd7b8e0 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala @@ -29,11 +29,20 @@ class ContainerLocalizedFilesResolverSuite extends SparkFunSuite { "file:///app/files/file2.txt", "local:///app/files/file3.txt", "http://app/files/file4.txt") + private val PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "file:///app/files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py", + "file:///app/files/file5.py") private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" + private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" private val localizedFilesResolver = new ContainerLocalizedFilesResolverImpl( SPARK_JARS, SPARK_FILES, + PYSPARK_FILES, + PYSPARK_PRIMARY_FILE, JARS_DOWNLOAD_PATH, FILES_DOWNLOAD_PATH) @@ -66,4 +75,19 @@ class ContainerLocalizedFilesResolverSuite extends SparkFunSuite { "http://app/files/file4.txt") assert(resolvedFiles === expectedResolvedFiles) } + test("Submitted PySpark files should resolve to the download path.") { + val resolvedPySparkFiles = localizedFilesResolver.resolveSubmittedPySparkFiles() + val expectedPySparkFiles = Seq( + "hdfs://localhost:9000/app/files/file1.py", + s"$FILES_DOWNLOAD_PATH/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py") + assert(resolvedPySparkFiles === expectedPySparkFiles) + } + test("Submitted PySpark Primary resource should resolve to the download path.") { + val resolvedPySparkPrimary = + localizedFilesResolver.resolvePrimaryResourceFile() + val expectedPySparkPrimary = s"$FILES_DOWNLOAD_PATH/file5.py" + assert(resolvedPySparkPrimary === expectedPySparkPrimary) + } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala new file mode 100644 index 0000000000000..9b60b7ef2b786 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala @@ -0,0 +1,109 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import org.apache.spark.{SSLOptions, SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ + +import scala.collection.JavaConverters._ +import io.fabric8.kubernetes.api.model.{ContainerBuilder, Pod, PodBuilder} +import org.mockito.{Mock, MockitoAnnotations} +import org.mockito.Mockito.when +import org.scalatest.BeforeAndAfter + +private[spark] class PythonSubmissionResourcesSuite extends SparkFunSuite with BeforeAndAfter { + private val PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "file:///app/files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py", + "file:///app/files/file5.py") + private val RESOLVED_PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "/var/spark-data/spark-files/file2.py", + "local:///app/file`s/file3.py", + "http://app/files/file4.py") + private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" + private val RESOLVED_PYSPARK_PRIMARY_FILE = "/var/data/spark-files/file5.py" + + private val pyFilesResource = new PythonSubmissionResourcesImpl( + PYSPARK_PRIMARY_FILE, Array(PYSPARK_FILES.mkString(","), "500") + ) + private val pyResource = new PythonSubmissionResourcesImpl( + PYSPARK_PRIMARY_FILE, Array(null, "500") + ) + private val DRIVER_CONTAINER_NAME = "pyspark_container" + private val driverContainer = new ContainerBuilder() + .withName(DRIVER_CONTAINER_NAME) + .build() + private val basePodBuilder = new PodBuilder() + .withNewMetadata() + .withName("base_pod") + .endMetadata() + .withNewSpec() + .addToContainers(driverContainer) + .endSpec() + + @Mock + private var driverInitContainer: DriverInitContainerComponentsProviderImpl = _ + @Mock + private var localizedFileResolver: ContainerLocalizedFilesResolverImpl = _ + before { + MockitoAnnotations.initMocks(this) + when(driverInitContainer.provideDriverPodFileMounter()).thenReturn( + new DriverPodKubernetesFileMounterImpl() + ) + when(localizedFileResolver.resolvePrimaryResourceFile()).thenReturn( + RESOLVED_PYSPARK_PRIMARY_FILE) + } + test("Test with --py-files included") { + assert(pyFilesResource.sparkJars === Seq.empty[String]) + assert(pyFilesResource.pySparkFiles === + PYSPARK_PRIMARY_FILE +: PYSPARK_FILES) + assert(pyFilesResource.primaryPySparkResource(localizedFileResolver) === + RESOLVED_PYSPARK_PRIMARY_FILE) + val driverPod: Pod = pyFilesResource.driverPodWithPySparkEnvs( + driverInitContainer.provideDriverPodFileMounter(), + RESOLVED_PYSPARK_PRIMARY_FILE, + RESOLVED_PYSPARK_FILES.mkString(","), + DRIVER_CONTAINER_NAME, + basePodBuilder + ) + val driverContainer = driverPod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)).toMap + envs.get("PYSPARK_PRIMARY") foreach{ a => assert (a === RESOLVED_PYSPARK_PRIMARY_FILE) } + envs.get("PYSPARK_FILES") foreach{ a => assert (a === RESOLVED_PYSPARK_FILES.mkString(",")) } + } + + test("Test without --py-files") { + assert(pyResource.sparkJars === Seq.empty[String]) + assert(pyResource.pySparkFiles === Array(PYSPARK_PRIMARY_FILE)) + assert(pyResource.primaryPySparkResource(localizedFileResolver) === + RESOLVED_PYSPARK_PRIMARY_FILE) + val driverPod: Pod = pyResource.driverPodWithPySparkEnvs( + driverInitContainer.provideDriverPodFileMounter(), + RESOLVED_PYSPARK_PRIMARY_FILE, + "", + DRIVER_CONTAINER_NAME, + basePodBuilder + ) + val driverContainer = driverPod.getSpec.getContainers.asScala.head + val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)).toMap + envs.get("PYSPARK_PRIMARY") foreach{ a => assert (a === RESOLVED_PYSPARK_PRIMARY_FILE) } + envs.get("PYSPARK_FILES") foreach{ a => assert (a === "") } + } +} \ No newline at end of file diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile new file mode 100644 index 0000000000000..6dcc7511c0dd9 --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile @@ -0,0 +1,48 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM spark-base + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-driver-py:latest -f dockerfiles/driver-py/Dockerfile . + +ADD examples /opt/spark/examples +ADD python /opt/spark/python + +RUN apk add --no-cache python && \ + python -m ensurepip && \ + rm -r /usr/lib/python*/ensurepip && \ + pip install --upgrade pip setuptools && \ + rm -r /root/.cache +# UNCOMMENT THE FOLLOWING TO START PIP INSTALLING PYTHON PACKAGES +# RUN apk add --update alpine-sdk python-dev +# RUN pip install numpy + +ENV PYTHON_VERSION 2.7.13 +ENV PYSPARK_PYTHON python +ENV PYSPARK_DRIVER_PYTHON python +ENV PYTHONPATH ${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH} + +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ + exec /sbin/tini -- ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH \ + -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY \ + $SPARK_DRIVER_CLASS $PYSPARK_PRIMARY $PYSPARK_FILES $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile new file mode 100644 index 0000000000000..7a65a4f879376 --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile @@ -0,0 +1,46 @@ +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +FROM spark-base + +# If this docker file is being used in the context of building your images from a Spark distribution, the docker build +# command should be invoked from the top level directory of the Spark distribution. E.g.: +# docker build -t spark-executor-py:latest -f dockerfiles/executor-py/Dockerfile . + +ADD examples /opt/spark/examples +ADD python /opt/spark/python + +RUN apk add --no-cache python && \ + python -m ensurepip && \ + rm -r /usr/lib/python*/ensurepip && \ + pip install --upgrade pip setuptools && \ + rm -r /root/.cache +# UNCOMMENT THE FOLLOWING TO START PIP INSTALLING PYTHON PACKAGES +# RUN apk add --update alpine-sdk python-dev +# RUN pip install numpy + +ENV PYTHON_VERSION 2.7.13 +ENV PYSPARK_PYTHON python +ENV PYSPARK_DRIVER_PYTHON python +ENV PYTHONPATH ${SPARK_HOME}/python/:${SPARK_HOME}/python/lib/py4j-0.10.4-src.zip:${PYTHONPATH} + +# TODO support spark.executor.extraClassPath +CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ + if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ + exec /sbin/tini -- ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP \ No newline at end of file diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index 6bff06da12840..4bafe25e2608f 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -19,6 +19,6 @@ FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: -# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . +# docker build -t spark-init:latest -f dockerfiles/init-container/Dockerfile . ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index c9a92fa1c5b62..9ca96be0f1a88 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -17,8 +17,9 @@ FROM spark-base + # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: -# docker build -t spark-executor:latest -f dockerfiles/executor/Dockerfile . +# docker build -t spark-resource-staging-server:latest -f dockerfiles/resource-staging-server/Dockerfile . ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index 7f4e2aa51b67d..ccb2f1a03d88c 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -19,7 +19,7 @@ FROM spark-base # If this docker file is being used in the context of building your images from a Spark distribution, the docker build # command should be invoked from the top level directory of the Spark distribution. E.g.: -# docker build -t spark-shuffle:latest -f dockerfiles/shuffle/Dockerfile . +# docker build -t spark-shuffle:latest -f dockerfiles/shuffle-service/Dockerfile . COPY examples /opt/spark/examples diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index bbf4b02cdaaf9..cd3ccad0a2b22 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -220,6 +220,108 @@ + + copy-integration-python + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/python + + + ${project.parent.basedir}/python + + ${project.parent.basedir}/python/.egg + ${project.parent.basedir}/python/dist + + + + + + + copy-integration-data + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/data + + + ${project.parent.basedir}/data + true + + + + + + copy-integration-licenses + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/licenses + + + ${project.parent.basedir}/licenses + true + + + + + + copy-integration-examples-jar + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/examples/jars + + + ${project.parent.basedir}/examples/target/scala-2.11/jars + true + + + + + + copy-integration-examples-src + pre-integration-test + + copy-resources + + + ${project.build.directory}/docker/examples/src/main + + + ${project.parent.basedir}/examples/src/main + true + + + + + +
    + + org.apache.maven.plugins + maven-antrun-plugin + 1.6 + + + create-release-file + pre-integration-test + + run + + + + + + + diff --git a/resource-managers/kubernetes/integration-tests/src/test/python/pi.py b/resource-managers/kubernetes/integration-tests/src/test/python/pi.py new file mode 100755 index 0000000000000..e3f0c4aeef1b7 --- /dev/null +++ b/resource-managers/kubernetes/integration-tests/src/test/python/pi.py @@ -0,0 +1,46 @@ +from __future__ import print_function +# +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# + +import sys +from random import random +from operator import add + +from pyspark.sql import SparkSession + + +if __name__ == "__main__": + """ + Usage: pi [partitions] + """ + spark = SparkSession\ + .builder\ + .appName("PythonPi")\ + .getOrCreate() + + partitions = int(sys.argv[1]) if len(sys.argv) > 1 else 2 + n = 100000 * partitions + + def f(_): + x = random() * 2 - 1 + y = random() * 2 - 1 + return 1 if x ** 2 + y ** 2 < 1 else 0 + + count = spark.sparkContext.parallelize(range(1, n + 1), partitions).map(f).reduce(add) + print("Pi is roughly %f" % (4.0 * count / n)) + + spark.stop() diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index e377f285eb9a6..d2082291eba22 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -72,6 +72,34 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { kubernetesTestComponents.deleteNamespace() } + test("Run PySpark Job on file from SUBMITTER") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + launchStagingServer(SSLOptions(), None) + sparkConf + .set(DRIVER_DOCKER_IMAGE, + System.getProperty("spark.docker.test.driverImage", "spark-driver-py:latest")) + .set(EXECUTOR_DOCKER_IMAGE, + System.getProperty("spark.docker.test.executorImage", "spark-executor-py:latest")) + + runPySparkPiAndVerifyCompletion( + PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION) + } + + test("Run PySpark Job on file from CONTAINER with spark.jar defined") { + assume(testBackend.name == MINIKUBE_TEST_BACKEND) + + sparkConf.setJars(Seq(CONTAINER_LOCAL_HELPER_JAR_PATH)) + sparkConf + .set(DRIVER_DOCKER_IMAGE, + System.getProperty("spark.docker.test.driverImage", "spark-driver-py:latest")) + .set(EXECUTOR_DOCKER_IMAGE, + System.getProperty("spark.docker.test.executorImage", "spark-executor-py:latest")) + + runPySparkPiAndVerifyCompletion( + PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION) + } + test("Simple submission test with the resource staging server.") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) @@ -223,6 +251,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { appResource, SPARK_PI_MAIN_CLASS, "Pi is roughly 3", Array.empty[String]) } + private def runPySparkPiAndVerifyCompletion( + appResource: String): Unit = { + runSparkApplicationAndVerifyCompletion( + appResource, PYSPARK_PI_MAIN_CLASS, "Pi is roughly 3", + Array(null, "5")) + } + private def runSparkApplicationAndVerifyCompletion( appResource: String, mainClass: String, @@ -305,11 +340,14 @@ private[spark] object KubernetesSuite { s"integration-tests-jars/${EXAMPLES_JAR_FILE.getName}" val CONTAINER_LOCAL_HELPER_JAR_PATH = s"local:///opt/spark/examples/" + s"integration-tests-jars/${HELPER_JAR_FILE.getName}" - val TIMEOUT = PatienceConfiguration.Timeout(Span(2, Minutes)) val INTERVAL = PatienceConfiguration.Interval(Span(2, Seconds)) val SPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.SparkPiWithInfiniteWait" + val PYSPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.PythonRunner" + val PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION = + "local:///opt/spark/examples/src/main/python/pi.py" + val PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION = "src/test/python/pi.py" val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.FileExistenceTest" val GROUP_BY_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala index 4db19478f44bc..e240fcf953f8c 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/docker/SparkDockerImageBuilder.scala @@ -16,21 +16,32 @@ */ package org.apache.spark.deploy.kubernetes.integrationtest.docker +import java.io.File import java.net.URI import java.nio.file.Paths +import scala.collection.JavaConverters._ + import com.spotify.docker.client.{DefaultDockerClient, DockerCertificates, LoggingBuildHandler} import org.apache.http.client.utils.URIBuilder import org.scalatest.concurrent.{Eventually, PatienceConfiguration} import org.scalatest.time.{Minutes, Seconds, Span} -private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, String]) { +import org.apache.spark.internal.Logging +import org.apache.spark.util.RedirectThread + + + +private[spark] class SparkDockerImageBuilder + (private val dockerEnv: Map[String, String]) extends Logging{ private val DOCKER_BUILD_PATH = Paths.get("target", "docker") // Dockerfile paths must be relative to the build path. private val BASE_DOCKER_FILE = "dockerfiles/spark-base/Dockerfile" private val DRIVER_DOCKER_FILE = "dockerfiles/driver/Dockerfile" + private val DRIVERPY_DOCKER_FILE = "dockerfiles/driver-py/Dockerfile" private val EXECUTOR_DOCKER_FILE = "dockerfiles/executor/Dockerfile" + private val EXECUTORPY_DOCKER_FILE = "dockerfiles/executor-py/Dockerfile" private val SHUFFLE_SERVICE_DOCKER_FILE = "dockerfiles/shuffle-service/Dockerfile" private val INIT_CONTAINER_DOCKER_FILE = "dockerfiles/init-container/Dockerfile" private val STAGING_SERVER_DOCKER_FILE = "dockerfiles/resource-staging-server/Dockerfile" @@ -61,9 +72,25 @@ private[spark] class SparkDockerImageBuilder(private val dockerEnv: Map[String, def buildSparkDockerImages(): Unit = { Eventually.eventually(TIMEOUT, INTERVAL) { dockerClient.ping() } + // Building Python distribution environment + val pythonExec = sys.env.get("PYSPARK_DRIVER_PYTHON") + .orElse(sys.env.get("PYSPARK_PYTHON")) + .getOrElse("/usr/bin/python") + val builder = new ProcessBuilder( + Seq(pythonExec, "setup.py", "sdist").asJava) + builder.directory(new File(DOCKER_BUILD_PATH.toFile, "python")) + builder.redirectErrorStream(true) // Ugly but needed for stdout and stderr to synchronize + val process = builder.start() + new RedirectThread(process.getInputStream, System.out, "redirect output").start() + val exitCode = process.waitFor() + if (exitCode != 0) { + logInfo(s"exitCode: $exitCode") + } buildImage("spark-base", BASE_DOCKER_FILE) buildImage("spark-driver", DRIVER_DOCKER_FILE) + buildImage("spark-driver-py", DRIVERPY_DOCKER_FILE) buildImage("spark-executor", EXECUTOR_DOCKER_FILE) + buildImage("spark-executor-py", EXECUTORPY_DOCKER_FILE) buildImage("spark-shuffle", SHUFFLE_SERVICE_DOCKER_FILE) buildImage("spark-resource-staging-server", STAGING_SERVER_DOCKER_FILE) buildImage("spark-init", INIT_CONTAINER_DOCKER_FILE) From fd30c5d64dab9d41e3ffa8ae8621af5e7ee55e68 Mon Sep 17 00:00:00 2001 From: mccheah Date: Fri, 14 Jul 2017 15:43:44 -0700 Subject: [PATCH 143/225] Submission client redesign to use a step-based builder pattern (#365) * Submission client redesign to use a step-based builder pattern. This change overhauls the underlying architecture of the submission client, but it is intended to entirely preserve existing behavior of Spark applications. Therefore users will find this to be an invisible change. The philosophy behind this design is to reconsider the breakdown of the submission process. It operates off the abstraction of "submission steps", which are transformation functions that take the previous state of the driver and return the new state of the driver. The driver's state includes its Spark configurations and the Kubernetes resources that will be used to deploy it. Such a refactor moves away from a features-first API design, which considers different containers to serve a set of features. The previous design, for example, had a container files resolver API object that returned different resolutions of the dependencies added by the user. However, it was up to the main Client to know how to intelligently invoke all of those APIs. Therefore the API surface area of the file resolver became untenably large and it was not intuitive of how it was to be used or extended. This design changes the encapsulation layout; every module is now responsible for changing the driver specification directly. An orchestrator builds the correct chain of steps and hands it to the client, which then calls it verbatim. The main client then makes any final modifications that put the different pieces of the driver together, particularly to attach the driver container itself to the pod and to apply the Spark configuration as command-line arguments. * Add a unit test for BaseSubmissionStep. * Add unit test for kubernetes credentials mounting. * Add unit test for InitContainerBootstrapStep. * unit tests for initContainer * Add a unit test for DependencyResolutionStep. * further modifications to InitContainer unit tests * Use of resolver in PythonStep and unit tests for PythonStep * refactoring of init unit tests and pythonstep resolver logic * Add unit test for KubernetesSubmissionStepsOrchestrator. * refactoring and addition of secret trustStore+Cert checks in a SubmissionStepSuite * added SparkPodInitContainerBootstrapSuite * Added InitContainerResourceStagingServerSecretPluginSuite * style in Unit tests * extremely minor style fix in variable naming * Address comments. * Rename class for consistency. * Attempt to make spacing consistent. Multi-line methods should have four-space indentation for arguments that aren't on the same line as the method call itself... but this is difficult to do consistently given how IDEs handle Scala multi-line indentation in most cases. --- .../org/apache/spark/deploy/SparkSubmit.scala | 15 +- ...nerResourceStagingServerSecretPlugin.scala | 38 +- .../PodWithDetachedInitContainer.scala | 24 + .../SparkPodInitContainerBootstrap.scala | 50 +- .../deploy/kubernetes/submit/Client.scala | 415 ++++--------- .../ContainerLocalizedFilesResolver.scala | 87 --- ...DriverConfigurationStepsOrchestrator.scala | 138 +++++ ...riverInitContainerComponentsProvider.scala | 223 ------- ...riverPodKubernetesCredentialsMounter.scala | 184 ------ ...KubernetesCredentialsMounterProvider.scala | 49 -- ...iverPodKubernetesCredentialsProvider.scala | 63 -- .../DriverPodKubernetesFileMounter.scala | 55 -- .../ExecutorInitContainerConfiguration.scala | 47 -- .../kubernetes/submit/InitContainerUtil.scala | 18 +- .../submit/KubernetesFileUtils.scala | 45 ++ ...inerBundle.scala => MainAppResource.scala} | 9 +- .../submit/PythonSubmissionResources.scala | 75 --- ...dDependencyInitContainerConfigPlugin.scala | 96 --- .../SubmittedDependencySecretBuilder.scala | 81 --- .../BaseDriverConfigurationStep.scala | 131 ++++ .../DependencyResolutionStep.scala | 66 +++ .../submitsteps/DriverConfigurationStep.scala | 28 + .../DriverKubernetesCredentialsStep.scala | 222 +++++++ .../InitContainerBootstrapStep.scala | 64 ++ .../submitsteps/KubernetesDriverSpec.scala | 47 ++ .../submit/submitsteps/PythonStep.scala | 46 ++ .../BaseInitContainerConfigurationStep.scala} | 41 +- .../InitContainerConfigurationStep.scala | 25 + ...tainerConfigurationStepsOrchestrator.scala | 131 ++++ .../initcontainer/InitContainerSpec.scala | 41 ++ ...ourcesInitContainerConfigurationStep.scala | 146 +++++ ...SparkDependencyDownloadInitContainer.scala | 1 - .../kubernetes/KubernetesClusterManager.scala | 9 +- .../KubernetesClusterSchedulerBackend.scala | 148 +++-- ...sourceStagingServerSecretPluginSuite.scala | 59 ++ .../SparkPodInitContainerBootstrapSuite.scala | 187 ++---- ...dencyInitContainerVolumesPluginSuite.scala | 60 -- .../kubernetes/submit/ClientSuite.scala | 226 +++++++ .../kubernetes/submit/ClientV2Suite.scala | 558 ------------------ ...ContainerLocalizedFilesResolverSuite.scala | 93 --- ...rConfigurationStepsOrchestratorSuite.scala | 95 +++ ...PodKubernetesCredentialsMounterSuite.scala | 171 ------ ...cutorInitContainerConfigurationSuite.scala | 56 -- .../PythonSubmissionResourcesSuite.scala | 109 ---- ...rkInitContainerConfigMapBuilderSuite.scala | 101 ---- ...ndencyInitContainerConfigPluginSuite.scala | 89 --- ...ubmittedDependencySecretBuilderSuite.scala | 109 ---- .../BaseDriverConfigurationStepSuite.scala | 106 ++++ .../DependencyResolutionStepSuite.scala | 79 +++ ...DriverKubernetesCredentialsStepSuite.scala | 152 +++++ .../submit/submitsteps/PythonStepSuite.scala | 76 +++ .../initContainerBootstrapStepSuite.scala | 159 +++++ ...eInitContainerConfigurationStepSuite.scala | 98 +++ ...rConfigurationStepsOrchestratorSuite.scala | 94 +++ ...ittedResourcesInitContainerStepSuite.scala | 252 ++++++++ .../integrationtest/KubernetesSuite.scala | 70 ++- 56 files changed, 2946 insertions(+), 2911 deletions(-) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithDetachedInitContainer.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestrator.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{InitContainerBundle.scala => MainAppResource.scala} (71%) delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala delete mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverConfigurationStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/InitContainerBootstrapStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/KubernetesDriverSpec.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStep.scala rename resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/{SparkInitContainerConfigMapBuilder.scala => submitsteps/initcontainer/BaseInitContainerConfigurationStep.scala} (62%) create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStep.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestrator.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerSpec.scala create mode 100644 resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerConfigurationStep.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestratorSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala delete mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initContainerBootstrapStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStepSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestratorSuite.scala create mode 100644 resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerStepSuite.scala diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index abd9d58795149..1305aeb8c1faf 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -637,14 +637,17 @@ object SparkSubmit extends CommandLineUtils { if (isKubernetesCluster) { childMainClass = "org.apache.spark.deploy.kubernetes.submit.Client" if (args.isPython) { - childArgs += args.primaryResource - childArgs += "org.apache.spark.deploy.PythonRunner" - childArgs += args.pyFiles + childArgs ++= Array("--primary-py-file", args.primaryResource) + childArgs ++= Array("--main-class", "org.apache.spark.deploy.PythonRunner") + childArgs ++= Array("--other-py-files", args.pyFiles) } else { - childArgs += args.primaryResource - childArgs += args.mainClass + childArgs ++= Array("--primary-java-resource", args.primaryResource) + childArgs ++= Array("--main-class", args.mainClass) + } + args.childArgs.foreach { arg => + childArgs += "--arg" + childArgs += arg } - childArgs ++= args.childArgs } // Load any properties specified through --conf and the default properties file diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala index 45b881a8a3737..265b8f197a102 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPlugin.scala @@ -16,7 +16,7 @@ */ package org.apache.spark.deploy.kubernetes -import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, Secret} +import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder, Secret} import org.apache.spark.deploy.kubernetes.constants._ @@ -27,13 +27,13 @@ private[spark] trait InitContainerResourceStagingServerSecretPlugin { * from a resource staging server. */ def mountResourceStagingServerSecretIntoInitContainer( - initContainer: ContainerBuilder): ContainerBuilder + initContainer: Container): Container /** * Configure the pod to attach a Secret volume which hosts secret files allowing the * init-container to retrieve dependencies from the resource staging server. */ - def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder): PodBuilder + def addResourceStagingServerSecretVolumeToPod(basePod: Pod): Pod } private[spark] class InitContainerResourceStagingServerSecretPluginImpl( @@ -42,21 +42,25 @@ private[spark] class InitContainerResourceStagingServerSecretPluginImpl( extends InitContainerResourceStagingServerSecretPlugin { override def mountResourceStagingServerSecretIntoInitContainer( - initContainer: ContainerBuilder): ContainerBuilder = { - initContainer.addNewVolumeMount() - .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) - .withMountPath(initContainerSecretMountPath) - .endVolumeMount() + initContainer: Container): Container = { + new ContainerBuilder(initContainer) + .addNewVolumeMount() + .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) + .withMountPath(initContainerSecretMountPath) + .endVolumeMount() + .build() } - override def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder): PodBuilder = { - basePod.editSpec() - .addNewVolume() - .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) - .withNewSecret() - .withSecretName(initContainerSecretName) - .endSecret() - .endVolume() - .endSpec() + override def addResourceStagingServerSecretVolumeToPod(basePod: Pod): Pod = { + new PodBuilder(basePod) + .editSpec() + .addNewVolume() + .withName(INIT_CONTAINER_SECRET_VOLUME_NAME) + .withNewSecret() + .withSecretName(initContainerSecretName) + .endSecret() + .endVolume() + .endSpec() + .build() } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithDetachedInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithDetachedInitContainer.scala new file mode 100644 index 0000000000000..36b1b07dc6bc4 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/PodWithDetachedInitContainer.scala @@ -0,0 +1,24 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import io.fabric8.kubernetes.api.model.{Container, Pod} + +private[spark] case class PodWithDetachedInitContainer( + pod: Pod, + initContainer: Container, + mainContainer: Container) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala index 87462dbde17a5..2df7ac7a204c4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrap.scala @@ -19,8 +19,11 @@ package org.apache.spark.deploy.kubernetes import io.fabric8.kubernetes.api.model.{ContainerBuilder, EmptyDirVolumeSource, PodBuilder, VolumeMount, VolumeMountBuilder} import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.kubernetes.submit.{ContainerNameEqualityPredicate, InitContainerUtil} +/** + * This is separated out from the init-container steps API because this component can be reused to + * set up the init-container for executors as well. + */ private[spark] trait SparkPodInitContainerBootstrap { /** * Bootstraps an init-container that downloads dependencies to be used by a main container. @@ -28,10 +31,13 @@ private[spark] trait SparkPodInitContainerBootstrap { * by a ConfigMap that was installed by some other component; that is, the implementation * here makes no assumptions about how the init-container is specifically configured. For * example, this class is unaware if the init-container is fetching remote dependencies or if - * it is fetching dependencies from a resource staging server. + * it is fetching dependencies from a resource staging server. Additionally, the container itself + * is not actually attached to the pod, but the init container is returned so it can be attached + * by InitContainerUtil after the caller has decided to make any changes to it. */ def bootstrapInitContainerAndVolumes( - mainContainerName: String, originalPodSpec: PodBuilder): PodBuilder + originalPodWithUnattachedInitContainer: PodWithDetachedInitContainer) + : PodWithDetachedInitContainer } private[spark] class SparkPodInitContainerBootstrapImpl( @@ -41,13 +47,11 @@ private[spark] class SparkPodInitContainerBootstrapImpl( filesDownloadPath: String, downloadTimeoutMinutes: Long, initContainerConfigMapName: String, - initContainerConfigMapKey: String, - resourceStagingServerSecretPlugin: Option[InitContainerResourceStagingServerSecretPlugin]) + initContainerConfigMapKey: String) extends SparkPodInitContainerBootstrap { override def bootstrapInitContainerAndVolumes( - mainContainerName: String, - originalPodSpec: PodBuilder): PodBuilder = { + podWithDetachedInitContainer: PodWithDetachedInitContainer): PodWithDetachedInitContainer = { val sharedVolumeMounts = Seq[VolumeMount]( new VolumeMountBuilder() .withName(INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME) @@ -58,7 +62,7 @@ private[spark] class SparkPodInitContainerBootstrapImpl( .withMountPath(filesDownloadPath) .build()) - val initContainer = new ContainerBuilder() + val initContainer = new ContainerBuilder(podWithDetachedInitContainer.initContainer) .withName(s"spark-init") .withImage(initContainerImage) .withImagePullPolicy(dockerImagePullPolicy) @@ -68,11 +72,8 @@ private[spark] class SparkPodInitContainerBootstrapImpl( .endVolumeMount() .addToVolumeMounts(sharedVolumeMounts: _*) .addToArgs(INIT_CONTAINER_PROPERTIES_FILE_PATH) - val resolvedInitContainer = resourceStagingServerSecretPlugin.map { plugin => - plugin.mountResourceStagingServerSecretIntoInitContainer(initContainer) - }.getOrElse(initContainer).build() - val podWithBasicVolumes = InitContainerUtil.appendInitContainer( - originalPodSpec, resolvedInitContainer) + .build() + val podWithBasicVolumes = new PodBuilder(podWithDetachedInitContainer.pod) .editSpec() .addNewVolume() .withName(INIT_CONTAINER_PROPERTIES_FILE_VOLUME) @@ -92,17 +93,20 @@ private[spark] class SparkPodInitContainerBootstrapImpl( .withName(INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) .withEmptyDir(new EmptyDirVolumeSource()) .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) - .addToVolumeMounts(sharedVolumeMounts: _*) - .addNewEnv() - .withName(ENV_MOUNTED_FILES_DIR) - .withValue(filesDownloadPath) - .endEnv() - .endContainer() .endSpec() - resourceStagingServerSecretPlugin.map { plugin => - plugin.addResourceStagingServerSecretVolumeToPod(podWithBasicVolumes) - }.getOrElse(podWithBasicVolumes) + .build() + val mainContainerWithMountedFiles = new ContainerBuilder( + podWithDetachedInitContainer.mainContainer) + .addToVolumeMounts(sharedVolumeMounts: _*) + .addNewEnv() + .withName(ENV_MOUNTED_FILES_DIR) + .withValue(filesDownloadPath) + .endEnv() + .build() + PodWithDetachedInitContainer( + podWithBasicVolumes, + initContainer, + mainContainerWithMountedFiles) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 781ecbd6c5416..98cd7afcd204d 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -16,233 +16,99 @@ */ package org.apache.spark.deploy.kubernetes.submit -import java.io.File import java.util.{Collections, UUID} -import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, OwnerReferenceBuilder, PodBuilder, QuantityBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, OwnerReferenceBuilder, PodBuilder} import io.fabric8.kubernetes.client.KubernetesClient -import scala.collection.JavaConverters._ +import scala.collection.mutable import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkKubernetesClientFactory} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.ResourceStagingServerSslOptionsProviderImpl +import org.apache.spark.deploy.kubernetes.submit.submitsteps.{DriverConfigurationStep, KubernetesDriverSpec} +import org.apache.spark.deploy.kubernetes.SparkKubernetesClientFactory import org.apache.spark.internal.Logging -import org.apache.spark.internal.config.ConfigEntry -import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils -/** - * Submission client for launching Spark applications on Kubernetes clusters. - * - * This class is responsible for instantiating Kubernetes resources that allow a Spark driver to - * run in a pod on the Kubernetes cluster with the Spark configurations specified by spark-submit. - * The API of this class makes it such that much of the specific behavior can be stubbed for - * testing; most of the detailed logic must be dependency-injected when constructing an instance - * of this client. Therefore the submission process is designed to be as modular as possible, - * where different steps of submission should be factored out into separate classes. - */ +private[spark] case class ClientArguments( + mainAppResource: MainAppResource, + otherPyFiles: Seq[String], + mainClass: String, + driverArgs: Array[String]) + +private[spark] object ClientArguments { + def fromCommandLineArgs(args: Array[String]): ClientArguments = { + var mainAppResource: Option[MainAppResource] = None + var otherPyFiles = Seq.empty[String] + var mainClass: Option[String] = None + val driverArgs = mutable.Buffer.empty[String] + args.sliding(2).toList.collect { + case Array("--primary-py-file", mainPyFile: String) => + mainAppResource = Some(PythonMainAppResource(mainPyFile)) + case Array("--primary-java-resource", primaryJavaResource: String) => + mainAppResource = Some(JavaMainAppResource(primaryJavaResource)) + case Array("--main-class", clazz: String) => + mainClass = Some(clazz) + case Array("--other-py-files", pyFiles: String) => + otherPyFiles = pyFiles.split(",") + case Array("--arg", arg: String) => + driverArgs += arg + case other => + throw new RuntimeException(s"Unknown arguments: $other") + } + require(mainAppResource.isDefined, + "Main app resource must be defined by either --primary-py-file or --primary-java-resource.") + require(mainClass.isDefined, "Main class must be specified via --main-class") + ClientArguments( + mainAppResource.get, + otherPyFiles, + mainClass.get, + driverArgs.toArray) + } +} + private[spark] class Client( - appName: String, - kubernetesResourceNamePrefix: String, - kubernetesAppId: String, - mainAppResource: String, - pythonResource: Option[PythonSubmissionResourcesImpl], - mainClass: String, - sparkConf: SparkConf, - appArgs: Array[String], - waitForAppCompletion: Boolean, + submissionSteps: Seq[DriverConfigurationStep], + submissionSparkConf: SparkConf, kubernetesClient: KubernetesClient, - initContainerComponentsProvider: DriverInitContainerComponentsProvider, - kubernetesCredentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider, + waitForAppCompletion: Boolean, + appName: String, loggingPodStatusWatcher: LoggingPodStatusWatcher) extends Logging { - private val kubernetesDriverPodName = sparkConf.get(KUBERNETES_DRIVER_POD_NAME) - .getOrElse(s"$kubernetesResourceNamePrefix-driver") - private val driverDockerImage = sparkConf.get(DRIVER_DOCKER_IMAGE) - private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) - - // CPU settings - private val driverCpuCores = sparkConf.getOption("spark.driver.cores").getOrElse("1") - private val driverLimitCores = sparkConf.getOption(KUBERNETES_DRIVER_LIMIT_CORES.key) - // Memory settings - private val driverMemoryMb = sparkConf.get(org.apache.spark.internal.config.DRIVER_MEMORY) - private val memoryOverheadMb = sparkConf - .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) - .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverMemoryMb).toInt, - MEMORY_OVERHEAD_MIN)) - private val driverContainerMemoryWithOverhead = driverMemoryMb + memoryOverheadMb - private val customLabels = sparkConf.get(KUBERNETES_DRIVER_LABELS) - private val customAnnotations = sparkConf.get(KUBERNETES_DRIVER_ANNOTATIONS) - - private val driverExtraClasspath = sparkConf.get( - org.apache.spark.internal.config.DRIVER_CLASS_PATH) - private val driverJavaOptions = sparkConf.get( + private val driverJavaOptions = submissionSparkConf.get( org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + /** + * Run command that initalizes a DriverSpec that will be updated after each + * DriverConfigurationStep in the sequence that is passed in. The final KubernetesDriverSpec + * will be used to build the Driver Container, Driver Pod, and Kubernetes Resources + */ def run(): Unit = { - val arguments = (pythonResource map {p => p.arguments}).getOrElse(appArgs) - val driverCustomLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( - sparkConf, - KUBERNETES_DRIVER_LABEL_PREFIX, - KUBERNETES_DRIVER_LABELS, - "label") - require(!driverCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + - s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + - s" operations.") - - val driverCustomAnnotations = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( - sparkConf, - KUBERNETES_DRIVER_ANNOTATION_PREFIX, - KUBERNETES_DRIVER_ANNOTATIONS, - "annotation") - require(!driverCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), - s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" + - s" Spark bookkeeping operations.") - val allDriverLabels = driverCustomLabels ++ Map( - SPARK_APP_ID_LABEL -> kubernetesAppId, - SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) - - val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => - new EnvVarBuilder() - .withName(ENV_SUBMIT_EXTRA_CLASSPATH) - .withValue(classPath) - .build() + var currentDriverSpec = KubernetesDriverSpec.initialSpec(submissionSparkConf) + // submissionSteps contain steps necessary to take, to resolve varying + // client arguments that are passed in, created by orchestrator + for (nextStep <- submissionSteps) { + currentDriverSpec = nextStep.configureDriver(currentDriverSpec) } - val driverCpuQuantity = new QuantityBuilder(false) - .withAmount(driverCpuCores) - .build() - val driverMemoryQuantity = new QuantityBuilder(false) - .withAmount(s"${driverMemoryMb}M") - .build() - val driverMemoryLimitQuantity = new QuantityBuilder(false) - .withAmount(s"${driverContainerMemoryWithOverhead}M") - .build() - val driverContainer = new ContainerBuilder() - .withName(DRIVER_CONTAINER_NAME) - .withImage(driverDockerImage) - .withImagePullPolicy(dockerImagePullPolicy) - .addToEnv(driverExtraClasspathEnv.toSeq: _*) + val resolvedDriverJavaOpts = currentDriverSpec + .driverSparkConf + // We don't need this anymore since we just set the JVM options on the environment + .remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) + .getAll + .map { + case (confKey, confValue) => s"-D$confKey=$confValue" + }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") + val resolvedDriverContainer = new ContainerBuilder(currentDriverSpec.driverContainer) .addNewEnv() - .withName(ENV_DRIVER_MEMORY) - .withValue(driverContainerMemoryWithOverhead + "m") + .withName(ENV_DRIVER_JAVA_OPTS) + .withValue(resolvedDriverJavaOpts) .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_MAIN_CLASS) - .withValue(mainClass) - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_ARGS) - .withValue(arguments.mkString(" ")) - .endEnv() - .withNewResources() - .addToRequests("cpu", driverCpuQuantity) - .addToRequests("memory", driverMemoryQuantity) - .addToLimits("memory", driverMemoryLimitQuantity) - .endResources() .build() - val basePod = new PodBuilder() - .withNewMetadata() - .withName(kubernetesDriverPodName) - .addToLabels(allDriverLabels.asJava) - .addToAnnotations(driverCustomAnnotations.toMap.asJava) - .addToAnnotations(SPARK_APP_NAME_ANNOTATION, appName) - .endMetadata() - .withNewSpec() - .withRestartPolicy("Never") - .addToContainers(driverContainer) + val resolvedDriverPod = new PodBuilder(currentDriverSpec.driverPod) + .editSpec() + .addToContainers(resolvedDriverContainer) .endSpec() - - driverLimitCores.map { - limitCores => - val driverCpuLimitQuantity = new QuantityBuilder(false) - .withAmount(limitCores) - .build() - basePod - .editSpec() - .editFirstContainer() - .editResources - .addToLimits("cpu", driverCpuLimitQuantity) - .endResources() - .endContainer() - .endSpec() - } - - val maybeSubmittedResourceIdentifiers = initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(allDriverLabels) - .map { uploader => - SubmittedResources(uploader.uploadJars(), uploader.uploadFiles()) - } - val maybeSubmittedDependenciesSecret = initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceIdentifiers.map(_.secrets())) - .map(_.build()) - - val containerLocalizedFilesResolver = initContainerComponentsProvider - .provideContainerLocalizedFilesResolver(mainAppResource) - val resolvedSparkJars = containerLocalizedFilesResolver.resolveSubmittedSparkJars() - val resolvedSparkFiles = containerLocalizedFilesResolver.resolveSubmittedSparkFiles() - val resolvedPySparkFiles = containerLocalizedFilesResolver.resolveSubmittedPySparkFiles() - val resolvedPrimaryPySparkResource = pythonResource.map { - p => p.primaryPySparkResource(containerLocalizedFilesResolver) - }.getOrElse("") - val initContainerBundler = initContainerComponentsProvider - .provideInitContainerBundle(maybeSubmittedResourceIdentifiers.map(_.ids()), - resolvedSparkJars ++ resolvedSparkFiles) - - val podWithInitContainer = initContainerBundler.map( - _.sparkPodInitContainerBootstrap - .bootstrapInitContainerAndVolumes(driverContainer.getName, basePod)) - .getOrElse(basePod) - val sparkConfWithExecutorInit = initContainerBundler.map( - _.executorInitContainerConfiguration - .configureSparkConfForExecutorInitContainer(sparkConf)) - .getOrElse(sparkConf) - val credentialsMounter = kubernetesCredentialsMounterProvider - .getDriverPodKubernetesCredentialsMounter() - val credentialsSecret = credentialsMounter.createCredentialsSecret() - val podWithInitContainerAndMountedCreds = credentialsMounter.mountDriverKubernetesCredentials( - podWithInitContainer, driverContainer.getName, credentialsSecret) - val resolvedSparkConf = credentialsMounter.setDriverPodKubernetesCredentialLocations( - sparkConfWithExecutorInit) - if (resolvedSparkJars.nonEmpty) { - resolvedSparkConf.set("spark.jars", resolvedSparkJars.mkString(",")) - } - if (resolvedSparkFiles.nonEmpty) { - resolvedSparkConf.set("spark.files", resolvedSparkFiles.mkString(",")) - } - resolvedSparkConf.setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) - resolvedSparkConf.set("spark.app.id", kubernetesAppId) - resolvedSparkConf.set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, kubernetesResourceNamePrefix) - // We don't need this anymore since we just set the JVM options on the environment - resolvedSparkConf.remove(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS) - val resolvedLocalClasspath = containerLocalizedFilesResolver - .resolveSubmittedAndRemoteSparkJars() - val resolvedDriverJavaOpts = resolvedSparkConf.getAll.map { - case (confKey, confValue) => s"-D$confKey=$confValue" - }.mkString(" ") + driverJavaOptions.map(" " + _).getOrElse("") - val resolvedDriverPodBuilder = podWithInitContainerAndMountedCreds.editSpec() - .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainer.getName)) - .addNewEnv() - .withName(ENV_MOUNTED_CLASSPATH) - .withValue(resolvedLocalClasspath.mkString(File.pathSeparator)) - .endEnv() - .addNewEnv() - .withName(ENV_DRIVER_JAVA_OPTS) - .withValue(resolvedDriverJavaOpts) - .endEnv() - .endContainer() - .endSpec() - val driverPodFileMounter = initContainerComponentsProvider.provideDriverPodFileMounter() - val resolvedDriverPod = pythonResource.map { - p => p.driverPodWithPySparkEnvs( - driverPodFileMounter, - resolvedPrimaryPySparkResource, - resolvedPySparkFiles.mkString(","), - driverContainer.getName, - resolvedDriverPodBuilder - )}.getOrElse(resolvedDriverPodBuilder.build()) + .build() Utils.tryWithResource( kubernetesClient .pods() @@ -250,22 +116,21 @@ private[spark] class Client( .watch(loggingPodStatusWatcher)) { _ => val createdDriverPod = kubernetesClient.pods().create(resolvedDriverPod) try { - val driverOwnedResources = initContainerBundler.map( - _.sparkInitContainerConfigMap).toSeq ++ - maybeSubmittedDependenciesSecret.toSeq ++ - credentialsSecret.toSeq - val driverPodOwnerReference = new OwnerReferenceBuilder() - .withName(createdDriverPod.getMetadata.getName) - .withApiVersion(createdDriverPod.getApiVersion) - .withUid(createdDriverPod.getMetadata.getUid) - .withKind(createdDriverPod.getKind) - .withController(true) - .build() - driverOwnedResources.foreach { resource => - val originalMetadata = resource.getMetadata - originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + if (currentDriverSpec.otherKubernetesResources.nonEmpty) { + val driverPodOwnerReference = new OwnerReferenceBuilder() + .withName(createdDriverPod.getMetadata.getName) + .withApiVersion(createdDriverPod.getApiVersion) + .withUid(createdDriverPod.getMetadata.getUid) + .withKind(createdDriverPod.getKind) + .withController(true) + .build() + currentDriverSpec.otherKubernetesResources.foreach { resource => + val originalMetadata = resource.getMetadata + originalMetadata.setOwnerReferences(Collections.singletonList(driverPodOwnerReference)) + } + val otherKubernetesResources = currentDriverSpec.otherKubernetesResources + kubernetesClient.resourceList(otherKubernetesResources: _*).createOrReplace() } - kubernetesClient.resourceList(driverOwnedResources: _*).createOrReplace() } catch { case e: Throwable => kubernetesClient.pods().delete(createdDriverPod) @@ -283,61 +148,26 @@ private[spark] class Client( } private[spark] object Client { - def main(args: Array[String]): Unit = { - val sparkConf = new SparkConf(true) - val mainAppResource = args(0) - val mainClass = args(1) - val appArgs = args.drop(2) - run(sparkConf, mainAppResource, mainClass, appArgs) - } - def run( - sparkConf: SparkConf, - mainAppResource: String, - mainClass: String, - appArgs: Array[String]): Unit = { - val isPython = mainAppResource.endsWith(".py") - val pythonResource: Option[PythonSubmissionResourcesImpl] = - if (isPython) { - Option(new PythonSubmissionResourcesImpl(mainAppResource, appArgs)) - } else None - // Since you might need jars for SQL UDFs in PySpark - def sparkJarFilter(): Seq[String] = - pythonResource.map {p => p.sparkJars}.getOrElse( - Option(mainAppResource) - .filterNot(_ == SparkLauncher.NO_RESOURCE) - .toSeq) - val sparkJars = sparkConf.getOption("spark.jars") - .map(_.split(",")) - .getOrElse(Array.empty[String]) ++ sparkJarFilter() - val launchTime = System.currentTimeMillis - val sparkFiles = sparkConf.getOption("spark.files") - .map(_.split(",")) - .getOrElse(Array.empty[String]) - val pySparkFilesOption = pythonResource.map {p => p.pySparkFiles} - validateNoDuplicateFileNames(sparkJars) - validateNoDuplicateFileNames(sparkFiles) - pySparkFilesOption.foreach {b => validateNoDuplicateFileNames(b)} - val pySparkFiles = pySparkFilesOption.getOrElse(Array.empty[String]) - val appName = sparkConf.getOption("spark.app.name").getOrElse("spark") - // The resource name prefix is derived from the application name, making it easy to connect the - // names of the Kubernetes resources from e.g. Kubectl or the Kubernetes dashboard to the - // application the user submitted. However, we can't use the application name in the label, as - // label values are considerably restrictive, e.g. must be no longer than 63 characters in - // length. So we generate a separate identifier for the app ID itself, and bookkeeping that - // requires finding "all pods for this application" should use the kubernetesAppId. - val kubernetesResourceNamePrefix = s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") - val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}" + def run(sparkConf: SparkConf, clientArguments: ClientArguments): Unit = { val namespace = sparkConf.get(KUBERNETES_NAMESPACE) + val kubernetesAppId = s"spark-${UUID.randomUUID().toString.replaceAll("-", "")}" + val launchTime = System.currentTimeMillis() + val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) + val appName = sparkConf.getOption("spark.app.name").getOrElse("spark") val master = resolveK8sMaster(sparkConf.get("spark.master")) - val sslOptionsProvider = new ResourceStagingServerSslOptionsProviderImpl(sparkConf) - val initContainerComponentsProvider = new DriverInitContainerComponentsProviderImpl( - sparkConf, - kubernetesResourceNamePrefix, + val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)).filter( _ => waitForAppCompletion) + val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl( + kubernetesAppId, loggingInterval) + val configurationStepsOrchestrator = new DriverConfigurationStepsOrchestrator( namespace, - sparkJars, - sparkFiles, - pySparkFiles, - sslOptionsProvider.getSslOptions) + kubernetesAppId, + launchTime, + clientArguments.mainAppResource, + appName, + clientArguments.mainClass, + clientArguments.driverArgs, + clientArguments.otherPyFiles, + sparkConf) Utils.tryWithResource(SparkKubernetesClientFactory.createKubernetesClient( master, Some(namespace), @@ -345,38 +175,25 @@ private[spark] object Client { sparkConf, None, None)) { kubernetesClient => - val kubernetesCredentialsMounterProvider = - new DriverPodKubernetesCredentialsMounterProviderImpl( - sparkConf, kubernetesResourceNamePrefix) - val waitForAppCompletion = sparkConf.get(WAIT_FOR_APP_COMPLETION) - val loggingInterval = Option(sparkConf.get(REPORT_INTERVAL)) - .filter( _ => waitForAppCompletion) - val loggingPodStatusWatcher = new LoggingPodStatusWatcherImpl( - kubernetesResourceNamePrefix, loggingInterval) new Client( - appName, - kubernetesResourceNamePrefix, - kubernetesAppId, - mainAppResource, - pythonResource, - mainClass, + configurationStepsOrchestrator.getAllConfigurationSteps(), sparkConf, - appArgs, - waitForAppCompletion, kubernetesClient, - initContainerComponentsProvider, - kubernetesCredentialsMounterProvider, + waitForAppCompletion, + appName, loggingPodStatusWatcher).run() } } - private def validateNoDuplicateFileNames(allFiles: Seq[String]): Unit = { - val fileNamesToUris = allFiles.map { file => - (new File(Utils.resolveURI(file).getPath).getName, file) - } - fileNamesToUris.groupBy(_._1).foreach { - case (fileName, urisWithFileName) => - require(urisWithFileName.size == 1, "Cannot add multiple files with the same name, but" + - s" file name $fileName is shared by all of these URIs: $urisWithFileName") - } + + /** + * Entry point from SparkSubmit in spark-core + * + * @param args Array of strings that have interchanging values that will be + * parsed by ClientArguments with the identifiers that precede the values + */ + def main(args: Array[String]): Unit = { + val parsedArguments = ClientArguments.fromCommandLineArgs(args) + val sparkConf = new SparkConf() + run(sparkConf, parsedArguments) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala deleted file mode 100644 index c31aa5f306bea..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolver.scala +++ /dev/null @@ -1,87 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import org.apache.spark.util.Utils - -private[spark] trait ContainerLocalizedFilesResolver { - def resolveSubmittedAndRemoteSparkJars(): Seq[String] - def resolveSubmittedSparkJars(): Seq[String] - def resolveSubmittedSparkFiles(): Seq[String] - def resolveSubmittedPySparkFiles(): Seq[String] - def resolvePrimaryResourceFile(): String -} - -private[spark] class ContainerLocalizedFilesResolverImpl( - sparkJars: Seq[String], - sparkFiles: Seq[String], - pySparkFiles: Seq[String], - primaryPyFile: String, - jarsDownloadPath: String, - filesDownloadPath: String) extends ContainerLocalizedFilesResolver { - - - override def resolveSubmittedAndRemoteSparkJars(): Seq[String] = { - sparkJars.map { jar => - val jarUri = Utils.resolveURI(jar) - Option(jarUri.getScheme).getOrElse("file") match { - case "local" => - jarUri.getPath - case _ => - val jarFileName = new File(jarUri.getPath).getName - s"$jarsDownloadPath/$jarFileName" - } - } - } - - override def resolveSubmittedSparkJars(): Seq[String] = { - resolveSubmittedFiles(sparkJars, jarsDownloadPath) - } - - override def resolveSubmittedSparkFiles(): Seq[String] = { - resolveSubmittedFiles(sparkFiles, filesDownloadPath) - } - - override def resolveSubmittedPySparkFiles(): Seq[String] = { - def filterMainResource(x: String) = x match { - case `primaryPyFile` => None - case _ => Some(resolveFile(x, filesDownloadPath)) - } - pySparkFiles.flatMap(x => filterMainResource(x)) - } - - override def resolvePrimaryResourceFile(): String = { - Option(primaryPyFile).map(p => resolveFile(p, filesDownloadPath)).getOrElse("") - } - - private def resolveFile(file: String, downloadPath: String) = { - val fileUri = Utils.resolveURI(file) - Option(fileUri.getScheme).getOrElse("file") match { - case "file" => - val fileName = new File(fileUri.getPath).getName - s"$downloadPath/$fileName" - case _ => - file - } - } - - private def resolveSubmittedFiles(files: Seq[String], downloadPath: String): Seq[String] = { - files.map { file => resolveFile(file, downloadPath) } - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestrator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestrator.scala new file mode 100644 index 0000000000000..82abe55ac6989 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestrator.scala @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.ConfigurationUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.submitsteps.{BaseDriverConfigurationStep, DependencyResolutionStep, DriverConfigurationStep, DriverKubernetesCredentialsStep, InitContainerBootstrapStep, PythonStep} +import org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer.InitContainerConfigurationStepsOrchestrator +import org.apache.spark.launcher.SparkLauncher +import org.apache.spark.util.Utils + +/** + * Constructs the complete list of driver configuration steps to run to deploy the Spark driver. + */ +private[spark] class DriverConfigurationStepsOrchestrator( + namespace: String, + kubernetesAppId: String, + launchTime: Long, + mainAppResource: MainAppResource, + appName: String, + mainClass: String, + appArgs: Array[String], + additionalPythonFiles: Seq[String], + submissionSparkConf: SparkConf) { + + // The resource name prefix is derived from the application name, making it easy to connect the + // names of the Kubernetes resources from e.g. kubectl or the Kubernetes dashboard to the + // application the user submitted. However, we can't use the application name in the label, as + // label values are considerably restrictive, e.g. must be no longer than 63 characters in + // length. So we generate a separate identifier for the app ID itself, and bookkeeping that + // requires finding "all pods for this application" should use the kubernetesAppId. + private val kubernetesResourceNamePrefix = + s"$appName-$launchTime".toLowerCase.replaceAll("\\.", "-") + private val jarsDownloadPath = submissionSparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) + private val filesDownloadPath = submissionSparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) + private val dockerImagePullPolicy = submissionSparkConf.get(DOCKER_IMAGE_PULL_POLICY) + private val initContainerConfigMapName = s"$kubernetesResourceNamePrefix-init-config" + + def getAllConfigurationSteps(): Seq[DriverConfigurationStep] = { + val additionalMainAppJar = mainAppResource match { + case JavaMainAppResource(resource) if resource != SparkLauncher.NO_RESOURCE => + Option(resource) + case _ => Option.empty + } + val additionalMainAppPythonFile = mainAppResource match { + case PythonMainAppResource(resource) if resource != SparkLauncher.NO_RESOURCE => + Option(resource) + case _ => Option.empty + } + val sparkJars = submissionSparkConf.getOption("spark.jars") + .map(_.split(",")) + .getOrElse(Array.empty[String]) ++ + additionalMainAppJar.toSeq + val sparkFiles = submissionSparkConf.getOption("spark.files") + .map(_.split(",")) + .getOrElse(Array.empty[String]) ++ + additionalMainAppPythonFile.toSeq ++ + additionalPythonFiles + val driverCustomLabels = ConfigurationUtils.combinePrefixedKeyValuePairsWithDeprecatedConf( + submissionSparkConf, + KUBERNETES_DRIVER_LABEL_PREFIX, + KUBERNETES_DRIVER_LABELS, + "label") + require(!driverCustomLabels.contains(SPARK_APP_ID_LABEL), s"Label with key " + + s" $SPARK_APP_ID_LABEL is not allowed as it is reserved for Spark bookkeeping" + + s" operations.") + val allDriverLabels = driverCustomLabels ++ Map( + SPARK_APP_ID_LABEL -> kubernetesAppId, + SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) + val initialSubmissionStep = new BaseDriverConfigurationStep( + kubernetesAppId, + kubernetesResourceNamePrefix, + allDriverLabels, + dockerImagePullPolicy, + appName, + mainClass, + appArgs, + submissionSparkConf) + val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep( + submissionSparkConf, kubernetesResourceNamePrefix) + val pythonStep = mainAppResource match { + case PythonMainAppResource(mainPyResource) => + Option(new PythonStep(mainPyResource, additionalPythonFiles, filesDownloadPath)) + case _ => Option.empty[DriverConfigurationStep] + } + val initContainerBootstrapStep = if ((sparkJars ++ sparkFiles).exists { uri => + Option(Utils.resolveURI(uri).getScheme).getOrElse("file") != "local" + }) { + val initContainerConfigurationStepsOrchestrator = + new InitContainerConfigurationStepsOrchestrator( + namespace, + kubernetesResourceNamePrefix, + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath, + dockerImagePullPolicy, + allDriverLabels, + initContainerConfigMapName, + INIT_CONTAINER_CONFIG_MAP_KEY, + submissionSparkConf) + val initContainerConfigurationSteps = + initContainerConfigurationStepsOrchestrator.getAllConfigurationSteps() + Some(new InitContainerBootstrapStep(initContainerConfigurationSteps, + initContainerConfigMapName, + INIT_CONTAINER_CONFIG_MAP_KEY)) + } else { + Option.empty[DriverConfigurationStep] + } + val dependencyResolutionStep = new DependencyResolutionStep( + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath) + Seq( + initialSubmissionStep, + kubernetesCredentialsStep, + dependencyResolutionStep) ++ + initContainerBootstrapStep.toSeq ++ + pythonStep.toSeq + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala deleted file mode 100644 index 6e185d2c069f6..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverInitContainerComponentsProvider.scala +++ /dev/null @@ -1,223 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.ConfigMap - -import org.apache.spark.{SparkConf, SSLOptions} -import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrap, SparkPodInitContainerBootstrapImpl} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.deploy.rest.kubernetes.RetrofitClientFactoryImpl -import org.apache.spark.util.Utils - -/** - * Interface that wraps the provision of everything the submission client needs to set up the - * driver's init-container. This is all wrapped in the same place to ensure that related - * components are being constructed with consistent configurations with respect to one another. - */ -private[spark] trait DriverInitContainerComponentsProvider { - - def provideContainerLocalizedFilesResolver( - mainAppResource: String): ContainerLocalizedFilesResolver - def provideInitContainerSubmittedDependencyUploader( - driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] - def provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) - : Option[SubmittedDependencySecretBuilder] - def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap - def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter - def provideInitContainerBundle(maybeSubmittedResourceIds: Option[SubmittedResourceIds], - uris: Iterable[String]): Option[InitContainerBundle] -} - -private[spark] class DriverInitContainerComponentsProviderImpl( - sparkConf: SparkConf, - kubernetesResourceNamePrefix: String, - namespace: String, - sparkJars: Seq[String], - sparkFiles: Seq[String], - pySparkFiles: Seq[String], - resourceStagingServerExternalSslOptions: SSLOptions) - extends DriverInitContainerComponentsProvider { - - private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) - private val maybeResourceStagingServerInternalUri = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_URI) - private val maybeResourceStagingServerInternalTrustStore = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_FILE) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE)) - private val maybeResourceStagingServerInternalTrustStorePassword = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_PASSWORD) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD)) - private val maybeResourceStagingServerInternalTrustStoreType = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_TYPE) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE)) - private val maybeResourceStagingServerInternalClientCert = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_CLIENT_CERT_PEM) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM)) - private val resourceStagingServerInternalSslEnabled = - sparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_SSL_ENABLED) - .orElse(sparkConf.get(RESOURCE_STAGING_SERVER_SSL_ENABLED)) - .getOrElse(false) - - OptionRequirements.requireNandDefined( - maybeResourceStagingServerInternalClientCert, - maybeResourceStagingServerInternalTrustStore, - "Cannot provide both a certificate file and a trustStore file for init-containers to" + - " use for contacting the resource staging server over TLS.") - - require(maybeResourceStagingServerInternalTrustStore.forall { trustStore => - Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { - case "file" | "local" => true - case _ => false - } - }, "TrustStore URI used for contacting the resource staging server from init containers must" + - " have no scheme, or scheme file://, or scheme local://.") - - require(maybeResourceStagingServerInternalClientCert.forall { trustStore => - Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { - case "file" | "local" => true - case _ => false - } - }, "Client cert file URI used for contacting the resource staging server from init containers" + - " must have no scheme, or scheme file://, or scheme local://.") - - private val jarsDownloadPath = sparkConf.get(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION) - private val filesDownloadPath = sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION) - private val maybeSecretName = maybeResourceStagingServerUri.map { _ => - s"$kubernetesResourceNamePrefix-init-secret" - } - private val configMapName = s"$kubernetesResourceNamePrefix-init-config" - private val configMapKey = s"$kubernetesResourceNamePrefix-init-config-key" - private val initContainerImage = sparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) - private val dockerImagePullPolicy = sparkConf.get(DOCKER_IMAGE_PULL_POLICY) - private val downloadTimeoutMinutes = sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) - private val pySparkSubmitted = KubernetesFileUtils.getOnlySubmitterLocalFiles(pySparkFiles) - - private def provideInitContainerConfigMap( - maybeSubmittedResourceIds: Option[SubmittedResourceIds]): ConfigMap = { - val submittedDependencyConfigPlugin = for { - stagingServerUri <- maybeResourceStagingServerUri - jarsResourceId <- maybeSubmittedResourceIds.map(_.jarsResourceId) - filesResourceId <- maybeSubmittedResourceIds.map(_.filesResourceId) - } yield { - new SubmittedDependencyInitContainerConfigPluginImpl( - // Configure the init-container with the internal URI over the external URI. - maybeResourceStagingServerInternalUri.getOrElse(stagingServerUri), - jarsResourceId, - filesResourceId, - INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, - INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, - resourceStagingServerInternalSslEnabled, - maybeResourceStagingServerInternalTrustStore, - maybeResourceStagingServerInternalClientCert, - maybeResourceStagingServerInternalTrustStorePassword, - maybeResourceStagingServerInternalTrustStoreType, - INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) - } - new SparkInitContainerConfigMapBuilderImpl( - sparkJars, - sparkFiles ++ pySparkSubmitted, - jarsDownloadPath, - filesDownloadPath, - configMapName, - configMapKey, - submittedDependencyConfigPlugin).build() - } - - override def provideContainerLocalizedFilesResolver(mainAppResource: String) - : ContainerLocalizedFilesResolver = { - new ContainerLocalizedFilesResolverImpl( - sparkJars, sparkFiles, pySparkFiles, mainAppResource, jarsDownloadPath, filesDownloadPath) - } - - private def provideExecutorInitContainerConfiguration(): ExecutorInitContainerConfiguration = { - new ExecutorInitContainerConfigurationImpl( - maybeSecretName, - INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, - configMapName, - configMapKey) - } - - override def provideInitContainerSubmittedDependencyUploader( - driverPodLabels: Map[String, String]): Option[SubmittedDependencyUploader] = { - maybeResourceStagingServerUri.map { stagingServerUri => - new SubmittedDependencyUploaderImpl( - driverPodLabels, - namespace, - stagingServerUri, - sparkJars, - sparkFiles ++ pySparkSubmitted, - resourceStagingServerExternalSslOptions, - RetrofitClientFactoryImpl) - } - } - - override def provideSubmittedDependenciesSecretBuilder( - maybeSubmittedResourceSecrets: Option[SubmittedResourceSecrets]) - : Option[SubmittedDependencySecretBuilder] = { - for { - secretName <- maybeSecretName - jarsResourceSecret <- maybeSubmittedResourceSecrets.map(_.jarsResourceSecret) - filesResourceSecret <- maybeSubmittedResourceSecrets.map(_.filesResourceSecret) - } yield { - new SubmittedDependencySecretBuilderImpl( - secretName, - jarsResourceSecret, - filesResourceSecret, - INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY, - INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, - INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, - maybeResourceStagingServerInternalTrustStore, - maybeResourceStagingServerInternalClientCert) - } - } - - override def provideInitContainerBootstrap(): SparkPodInitContainerBootstrap = { - val resourceStagingServerSecretPlugin = maybeSecretName.map { secret => - new InitContainerResourceStagingServerSecretPluginImpl( - secret, INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) - } - new SparkPodInitContainerBootstrapImpl( - initContainerImage, - dockerImagePullPolicy, - jarsDownloadPath, - filesDownloadPath, - downloadTimeoutMinutes, - configMapName, - configMapKey, - resourceStagingServerSecretPlugin) - } - override def provideDriverPodFileMounter(): DriverPodKubernetesFileMounter = { - new DriverPodKubernetesFileMounterImpl() - } - override def provideInitContainerBundle( - maybeSubmittedResourceIds: Option[SubmittedResourceIds], - uris: Iterable[String]): Option[InitContainerBundle] = { - // Bypass init-containers if `spark.jars` and `spark.files` and '--py-rilfes' - // is empty or only has `local://` URIs - if ((KubernetesFileUtils.getNonContainerLocalFiles(uris) ++ pySparkSubmitted).nonEmpty) { - Some(InitContainerBundle(provideInitContainerConfigMap(maybeSubmittedResourceIds), - provideInitContainerBootstrap(), - provideExecutorInitContainerConfiguration())) - } else None - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala deleted file mode 100644 index 25e7c3b3ebd89..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounter.scala +++ /dev/null @@ -1,184 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.{PodBuilder, Secret, SecretBuilder} -import scala.collection.JavaConverters._ -import scala.language.implicitConversions - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ - -private[spark] trait DriverPodKubernetesCredentialsMounter { - - /** - * Set fields on the Spark configuration that indicate where the driver pod is - * to find its Kubernetes credentials for requesting executors. - */ - def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf - - /** - * Create the Kubernetes secret object that correspond to the driver's credentials - * that have to be created and mounted into the driver pod. The single Secret - * object contains all of the data entries for the driver pod's Kubernetes - * credentials. Returns empty if no secrets are to be mounted. - */ - def createCredentialsSecret(): Option[Secret] - - /** - * Mount any Kubernetes credentials from the submitting machine's disk into the driver pod. The - * secret that is passed in here should have been created from createCredentialsSecret so that - * the implementation does not need to hold its state. - */ - def mountDriverKubernetesCredentials( - originalPodSpec: PodBuilder, - driverContainerName: String, - credentialsSecret: Option[Secret]): PodBuilder -} - -private[spark] class DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId: String, - submitterLocalDriverPodKubernetesCredentials: KubernetesCredentials, - maybeUserSpecifiedMountedClientKeyFile: Option[String], - maybeUserSpecifiedMountedClientCertFile: Option[String], - maybeUserSpecifiedMountedOAuthTokenFile: Option[String], - maybeUserSpecifiedMountedCaCertFile: Option[String]) - extends DriverPodKubernetesCredentialsMounter { - - override def setDriverPodKubernetesCredentialLocations(sparkConf: SparkConf): SparkConf = { - val resolvedMountedClientKeyFile = resolveSecretLocation( - maybeUserSpecifiedMountedClientKeyFile, - submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, - DRIVER_CREDENTIALS_CLIENT_KEY_PATH) - val resolvedMountedClientCertFile = resolveSecretLocation( - maybeUserSpecifiedMountedClientCertFile, - submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, - DRIVER_CREDENTIALS_CLIENT_CERT_PATH) - val resolvedMountedCaCertFile = resolveSecretLocation( - maybeUserSpecifiedMountedCaCertFile, - submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, - DRIVER_CREDENTIALS_CA_CERT_PATH) - val resolvedMountedOAuthTokenFile = resolveSecretLocation( - maybeUserSpecifiedMountedOAuthTokenFile, - submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, - DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) - val sparkConfWithCredentialLocations = sparkConf.clone() - .setOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", - resolvedMountedCaCertFile) - .setOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", - resolvedMountedClientKeyFile) - .setOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", - resolvedMountedClientCertFile) - .setOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX", - resolvedMountedOAuthTokenFile) - // Redact all OAuth token values - sparkConfWithCredentialLocations - .getAll - .filter(_._1.endsWith(OAUTH_TOKEN_CONF_SUFFIX)).map(_._1) - .foreach { - sparkConfWithCredentialLocations.set(_, "") - } - sparkConfWithCredentialLocations - } - - override def createCredentialsSecret(): Option[Secret] = { - val allSecretData = - resolveSecretData( - maybeUserSpecifiedMountedClientKeyFile, - submitterLocalDriverPodKubernetesCredentials.clientKeyDataBase64, - DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME) ++ - resolveSecretData( - maybeUserSpecifiedMountedClientCertFile, - submitterLocalDriverPodKubernetesCredentials.clientCertDataBase64, - DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME) ++ - resolveSecretData( - maybeUserSpecifiedMountedCaCertFile, - submitterLocalDriverPodKubernetesCredentials.caCertDataBase64, - DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME) ++ - resolveSecretData( - maybeUserSpecifiedMountedOAuthTokenFile, - submitterLocalDriverPodKubernetesCredentials.oauthTokenBase64, - DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME) - if (allSecretData.isEmpty) { - None - } else { - Some(new SecretBuilder() - .withNewMetadata().withName(s"$kubernetesAppId-kubernetes-credentials").endMetadata() - .withData(allSecretData.asJava) - .build()) - } - } - - override def mountDriverKubernetesCredentials( - originalPodSpec: PodBuilder, - driverContainerName: String, - credentialsSecret: Option[Secret]): PodBuilder = { - credentialsSecret.map { secret => - originalPodSpec.editSpec() - .addNewVolume() - .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) - .withNewSecret().withSecretName(secret.getMetadata.getName).endSecret() - .endVolume() - .editMatchingContainer(new ContainerNameEqualityPredicate(driverContainerName)) - .addNewVolumeMount() - .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) - .withMountPath(DRIVER_CREDENTIALS_SECRETS_BASE_DIR) - .endVolumeMount() - .endContainer() - .endSpec() - }.getOrElse(originalPodSpec) - } - - private def resolveSecretLocation( - mountedUserSpecified: Option[String], - valueMountedFromSubmitter: Option[String], - mountedCanonicalLocation: String): Option[String] = { - mountedUserSpecified.orElse(valueMountedFromSubmitter.map( _ => { - mountedCanonicalLocation - })) - } - - private def resolveSecretData( - mountedUserSpecified: Option[String], - valueMountedFromSubmitter: Option[String], - secretName: String): Map[String, String] = { - mountedUserSpecified.map { _ => Map.empty[String, String]} - .getOrElse { - valueMountedFromSubmitter.map { valueBase64 => - Map(secretName -> valueBase64) - }.getOrElse(Map.empty[String, String]) - } - } - - private implicit def augmentSparkConf(sparkConf: SparkConf): OptionSettableSparkConf = { - new OptionSettableSparkConf(sparkConf) - } -} - -private class OptionSettableSparkConf(sparkConf: SparkConf) { - def setOption(configEntry: String, option: Option[String]): SparkConf = { - option.map( opt => { - sparkConf.set(configEntry, opt) - }).getOrElse(sparkConf) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala deleted file mode 100644 index 913279198146a..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterProvider.scala +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ - -private[spark] trait DriverPodKubernetesCredentialsMounterProvider { - - def getDriverPodKubernetesCredentialsMounter() - : DriverPodKubernetesCredentialsMounter -} - -private[spark] class DriverPodKubernetesCredentialsMounterProviderImpl( - sparkConf: SparkConf, - kubernetesAppId: String) - extends DriverPodKubernetesCredentialsMounterProvider { - - override def getDriverPodKubernetesCredentialsMounter() - : DriverPodKubernetesCredentialsMounter = { - val submitterLocalDriverPodKubernetesCredentials = - new DriverPodKubernetesCredentialsProvider(sparkConf).get() - new DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId, - submitterLocalDriverPodKubernetesCredentials, - sparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX"), - sparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX"), - sparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX"), - sparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX")) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala deleted file mode 100644 index 41b0cf8ceaeab..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsProvider.scala +++ /dev/null @@ -1,63 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.internal.config.OptionalConfigEntry - -private[spark] class DriverPodKubernetesCredentialsProvider(sparkConf: SparkConf) { - - def get(): KubernetesCredentials = { - val oauthTokenBase64 = sparkConf - .getOption(s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX") - .map { token => - BaseEncoding.base64().encode(token.getBytes(Charsets.UTF_8)) - } - val caCertDataBase64 = safeFileConfToBase64( - s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", - s"Driver CA cert file provided at %s does not exist or is not a file.") - val clientKeyDataBase64 = safeFileConfToBase64( - s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", - s"Driver client key file provided at %s does not exist or is not a file.") - val clientCertDataBase64 = safeFileConfToBase64( - s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", - s"Driver client cert file provided at %s does not exist or is not a file.") - KubernetesCredentials( - oauthTokenBase64 = oauthTokenBase64, - caCertDataBase64 = caCertDataBase64, - clientKeyDataBase64 = clientKeyDataBase64, - clientCertDataBase64 = clientCertDataBase64) - } - - private def safeFileConfToBase64( - conf: String, - fileNotFoundFormatString: String): Option[String] = { - sparkConf.getOption(conf) - .map(new File(_)) - .map { file => - require(file.isFile, String.format(fileNotFoundFormatString, file.getAbsolutePath)) - BaseEncoding.base64().encode(Files.toByteArray(file)) - } - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala deleted file mode 100644 index cc0ef0eedb457..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesFileMounter.scala +++ /dev/null @@ -1,55 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.{Container, PodBuilder} - -import org.apache.spark.deploy.kubernetes.constants._ - - /** - * Trait that is responsible for providing full file-paths dynamically after - * the filesDownloadPath has been defined. The file-names are then stored in the - * environmental variables in the driver-pod. - */ -private[spark] trait DriverPodKubernetesFileMounter { - def addPySparkFiles(primaryFile: String, pySparkFiles: String, - mainContainerName: String, originalPodSpec: PodBuilder) : PodBuilder -} - -private[spark] class DriverPodKubernetesFileMounterImpl() - extends DriverPodKubernetesFileMounter { - override def addPySparkFiles( - primaryFile: String, - pySparkFiles: String, - mainContainerName: String, - originalPodSpec: PodBuilder): PodBuilder = { - - originalPodSpec - .editSpec() - .editMatchingContainer(new ContainerNameEqualityPredicate(mainContainerName)) - .addNewEnv() - .withName(ENV_PYSPARK_PRIMARY) - .withValue(primaryFile) - .endEnv() - .addNewEnv() - .withName(ENV_PYSPARK_FILES) - .withValue(pySparkFiles) - .endEnv() - .endContainer() - .endSpec() - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala deleted file mode 100644 index 2292365995d1f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfiguration.scala +++ /dev/null @@ -1,47 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkConf -import org.apache.spark.deploy.kubernetes.config._ - -private[spark] trait ExecutorInitContainerConfiguration { - /** - * Provide the driver with configuration that allows it to configure executors to - * fetch resources in the same way the driver does. - */ - def configureSparkConfForExecutorInitContainer(originalSparkConf: SparkConf): SparkConf -} - -private[spark] class ExecutorInitContainerConfigurationImpl( - initContainerSecretName: Option[String], - initContainerSecretMountDir: String, - initContainerConfigMapName: String, - initContainerConfigMapKey: String) - extends ExecutorInitContainerConfiguration { - def configureSparkConfForExecutorInitContainer(originalSparkConf: SparkConf): SparkConf = { - val configuredSparkConf = originalSparkConf.clone() - .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP, - initContainerConfigMapName) - .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY, - initContainerConfigMapKey) - .set(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR, initContainerSecretMountDir) - initContainerSecretName.map { secret => - configuredSparkConf.set(EXECUTOR_INIT_CONTAINER_SECRET, secret) - }.getOrElse(configuredSparkConf) - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala index 9b7faaa78a9aa..837ec0e8c867e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerUtil.scala @@ -18,19 +18,18 @@ package org.apache.spark.deploy.kubernetes.submit import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule -import io.fabric8.kubernetes.api.model.{Container, PodBuilder} +import io.fabric8.kubernetes.api.model.{Container, Pod, PodBuilder} import scala.collection.JavaConverters._ import org.apache.spark.deploy.kubernetes.constants._ private[spark] object InitContainerUtil { - private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) + private val OBJECT_MAPPER = new ObjectMapper().registerModule(DefaultScalaModule) - def appendInitContainer( - originalPodSpec: PodBuilder, initContainer: Container): PodBuilder = { + def appendInitContainer(originalPodSpec: Pod, initContainer: Container): Pod = { val resolvedInitContainers = originalPodSpec - .editMetadata() + .getMetadata .getAnnotations .asScala .get(INIT_CONTAINER_ANNOTATION) @@ -40,10 +39,11 @@ private[spark] object InitContainerUtil { existingInitContainers ++ Seq(initContainer) }.getOrElse(Seq(initContainer)) val resolvedSerializedInitContainers = OBJECT_MAPPER.writeValueAsString(resolvedInitContainers) - originalPodSpec + new PodBuilder(originalPodSpec) .editMetadata() - .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) - .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) - .endMetadata() + .removeFromAnnotations(INIT_CONTAINER_ANNOTATION) + .addToAnnotations(INIT_CONTAINER_ANNOTATION, resolvedSerializedInitContainers) + .endMetadata() + .build() } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala index d688bf29808fb..ec591923f1472 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesFileUtils.scala @@ -16,6 +16,8 @@ */ package org.apache.spark.deploy.kubernetes.submit +import java.io.File + import org.apache.spark.util.Utils private[spark] object KubernetesFileUtils { @@ -48,4 +50,47 @@ private[spark] object KubernetesFileUtils { def getOnlyRemoteFiles(uris: Iterable[String]): Iterable[String] = { filterUriStringsByScheme(uris, scheme => scheme != "file" && scheme != "local") } + + /** + * For the collection of uris, resolves any files as follows: + * - Files with scheme file:// are resolved to the given download path + * - Files with scheme local:// resolve to just the path of the URI + * - Otherwise, the URI is returned as-is. + */ + def resolveSubmittedUris(fileUris: Iterable[String], fileDownloadPath: String) + : Iterable[String] = { + fileUris.map { uri => + val fileUri = Utils.resolveURI(uri) + val fileScheme = Option(fileUri.getScheme).getOrElse("file") + fileScheme match { + case "file" => + val fileName = new File(fileUri.getPath).getName + s"$fileDownloadPath/$fileName" + case "local" => + fileUri.getPath + case _ => uri + } + } + } + + /** + * If any file uri has any scheme other than local:// it is mapped as if the file + * was downloaded to the file download path. Otherwise, it is mapped to the path + * part of the URI. + */ + def resolveFilePaths(fileUris: Iterable[String], fileDownloadPath: String): Iterable[String] = { + fileUris.map { uri => + resolveFilePath(uri, fileDownloadPath) + } + } + + def resolveFilePath(uri: String, fileDownloadPath: String): String = { + val fileUri = Utils.resolveURI(uri) + if (Option(fileUri.getScheme).getOrElse("file") == "local") { + fileUri.getPath + } else { + val fileName = new File(fileUri.getPath).getName + s"$fileDownloadPath/$fileName" + } + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/MainAppResource.scala similarity index 71% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/MainAppResource.scala index ba44f794d5811..436d531a850ff 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/InitContainerBundle.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/MainAppResource.scala @@ -16,11 +16,8 @@ */ package org.apache.spark.deploy.kubernetes.submit -import io.fabric8.kubernetes.api.model.ConfigMap +private[spark] sealed trait MainAppResource -import org.apache.spark.deploy.kubernetes.{SparkPodInitContainerBootstrap} +private[spark] case class PythonMainAppResource(primaryPyFile: String) extends MainAppResource -case class InitContainerBundle( - sparkInitContainerConfigMap: ConfigMap, - sparkPodInitContainerBootstrap: SparkPodInitContainerBootstrap, - executorInitContainerConfiguration: ExecutorInitContainerConfiguration) +private[spark] case class JavaMainAppResource(primaryResource: String) extends MainAppResource diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala deleted file mode 100644 index c61e930a2b97f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResources.scala +++ /dev/null @@ -1,75 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.{Pod, PodBuilder} - -private[spark] trait PythonSubmissionResources { - def sparkJars: Seq[String] - def pySparkFiles: Array[String] - def arguments: Array[String] - def primaryPySparkResource(containerLocalizedFilesResolver: ContainerLocalizedFilesResolver) - : String - def driverPodWithPySparkEnvs( - driverPodFileMounter: DriverPodKubernetesFileMounter, - resolvedPrimaryPySparkResource: String, - resolvedPySparkFiles: String, - driverContainerName: String, - driverPodBuilder: PodBuilder): Pod -} - -private[spark] class PythonSubmissionResourcesImpl( - private val mainAppResource: String, - private val appArgs: Array[String] ) extends PythonSubmissionResources { - - private val pyFiles: Array[String] = { - Option(appArgs(0)).map(a => mainAppResource +: a.split(",")) - .getOrElse(Array(mainAppResource)) - } - - override def sparkJars: Seq[String] = Seq.empty[String] - - override def pySparkFiles: Array[String] = pyFiles - - override def arguments: Array[String] = { - pyFiles.toList match { - case Nil => appArgs - case a :: b => a match { - case _ if a == mainAppResource && b == Nil => appArgs - case _ => appArgs.drop(1) - } - } - } - override def primaryPySparkResource( - containerLocalizedFilesResolver: ContainerLocalizedFilesResolver) : String = - containerLocalizedFilesResolver.resolvePrimaryResourceFile() - - override def driverPodWithPySparkEnvs( - driverPodFileMounter: DriverPodKubernetesFileMounter, - resolvedPrimaryPySparkResource: String, - resolvedPySparkFiles: String, - driverContainerName: String, - driverPodBuilder: PodBuilder) : Pod = { - driverPodFileMounter - .addPySparkFiles( - resolvedPrimaryPySparkResource, - resolvedPySparkFiles, - driverContainerName, - driverPodBuilder) - .build() - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala deleted file mode 100644 index 06d3648efb89f..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPlugin.scala +++ /dev/null @@ -1,96 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkException -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.internal.config.OptionalConfigEntry -import org.apache.spark.util.Utils - -private[spark] trait SubmittedDependencyInitContainerConfigPlugin { - /** - * Obtain configuration to fetch submitted dependencies from a resource staging server. - * This includes the resource identifiers for the jar and file bundles, as well as the - * remote location of the resource staging server, and the location of secret files for - * authenticating to the resource staging server. Note that the secret file paths here need to - * line up with the locations the secrets are mounted by - * SubmittedDependencyInitContainerVolumesPlugin; constants provide the consistency and - * convention for these to line up. - */ - def configurationsToFetchSubmittedDependencies(): Map[String, String] -} - -private[spark] class SubmittedDependencyInitContainerConfigPluginImpl( - internalResourceStagingServerUri: String, - jarsResourceId: String, - filesResourceId: String, - jarsSecretKey: String, - filesSecretKey: String, - trustStoreSecretKey: String, - clientCertSecretKey: String, - resourceStagingServerSslEnabled: Boolean, - maybeInternalTrustStoreUri: Option[String], - maybeInternalClientCertUri: Option[String], - maybeInternalTrustStorePassword: Option[String], - maybeInternalTrustStoreType: Option[String], - secretsVolumeMountPath: String) - extends SubmittedDependencyInitContainerConfigPlugin { - - override def configurationsToFetchSubmittedDependencies(): Map[String, String] = { - Map[String, String]( - RESOURCE_STAGING_SERVER_URI.key -> internalResourceStagingServerUri, - INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> jarsResourceId, - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> - s"$secretsVolumeMountPath/$jarsSecretKey", - INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> filesResourceId, - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> - s"$secretsVolumeMountPath/$filesSecretKey", - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> resourceStagingServerSslEnabled.toString) ++ - resolveSecretPath( - maybeInternalTrustStoreUri, - trustStoreSecretKey, - RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, - "TrustStore URI") ++ - resolveSecretPath( - maybeInternalClientCertUri, - clientCertSecretKey, - RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM, - "Client certificate URI") ++ - maybeInternalTrustStorePassword.map { password => - (RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) - }.toMap ++ - maybeInternalTrustStoreType.map { storeType => - (RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) - }.toMap - } - - private def resolveSecretPath( - maybeUri: Option[String], - secretKey: String, - configEntry: OptionalConfigEntry[String], - uriType: String): Map[String, String] = { - maybeUri.map(Utils.resolveURI).map { uri => - val resolvedPath = Option(uri.getScheme).getOrElse("file") match { - case "file" => s"$secretsVolumeMountPath/$secretKey" - case "local" => uri.getPath - case invalid => throw new SparkException(s"$uriType has invalid scheme $invalid must be" + - s" local://, file://, or empty.") - } - (configEntry.key, resolvedPath) - }.toMap - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala deleted file mode 100644 index 7850853df97e6..0000000000000 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilder.scala +++ /dev/null @@ -1,81 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} -import scala.collection.JavaConverters._ - -import org.apache.spark.util.Utils - -private[spark] trait SubmittedDependencySecretBuilder { - /** - * Construct a Kubernetes secret bundle that init-containers can use to retrieve an - * application's dependencies. - */ - def build(): Secret -} - -private[spark] class SubmittedDependencySecretBuilderImpl( - secretName: String, - jarsResourceSecret: String, - filesResourceSecret: String, - jarsSecretKey: String, - filesSecretKey: String, - trustStoreSecretKey: String, - clientCertSecretKey: String, - internalTrustStoreUri: Option[String], - internalClientCertUri: Option[String]) - extends SubmittedDependencySecretBuilder { - - override def build(): Secret = { - val trustStoreBase64 = convertFileToBase64IfSubmitterLocal( - trustStoreSecretKey, internalTrustStoreUri) - val clientCertBase64 = convertFileToBase64IfSubmitterLocal( - clientCertSecretKey, internalClientCertUri) - val jarsSecretBase64 = BaseEncoding.base64().encode(jarsResourceSecret.getBytes(Charsets.UTF_8)) - val filesSecretBase64 = BaseEncoding.base64().encode( - filesResourceSecret.getBytes(Charsets.UTF_8)) - val secretData = Map( - jarsSecretKey -> jarsSecretBase64, - filesSecretKey -> filesSecretBase64) ++ - trustStoreBase64 ++ - clientCertBase64 - val kubernetesSecret = new SecretBuilder() - .withNewMetadata() - .withName(secretName) - .endMetadata() - .addToData(secretData.asJava) - .build() - kubernetesSecret - } - - private def convertFileToBase64IfSubmitterLocal(secretKey: String, secretUri: Option[String]) - : Map[String, String] = { - secretUri.filter { trustStore => - Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") == "file" - }.map { uri => - val file = new File(Utils.resolveURI(uri).getPath) - require(file.isFile, "Dependency server trustStore provided at" + - file.getAbsolutePath + " does not exist or is not a file.") - (secretKey, BaseEncoding.base64().encode(Files.toByteArray(file))) - }.toMap - } -} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala new file mode 100644 index 0000000000000..022b5fccdc5e1 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, EnvVarBuilder, PodBuilder, QuantityBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.ConfigurationUtils +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +/** + * Represents the initial setup required for the driver. + */ +private[spark] class BaseDriverConfigurationStep( + kubernetesAppId: String, + kubernetesResourceNamePrefix: String, + driverLabels: Map[String, String], + dockerImagePullPolicy: String, + appName: String, + mainClass: String, + appArgs: Array[String], + submissionSparkConf: SparkConf) extends DriverConfigurationStep { + + private val kubernetesDriverPodName = submissionSparkConf.get(KUBERNETES_DRIVER_POD_NAME) + .getOrElse(s"$kubernetesResourceNamePrefix-driver") + private val driverExtraClasspath = submissionSparkConf.get( + org.apache.spark.internal.config.DRIVER_CLASS_PATH) + // CPU settings + private val driverCpuCores = submissionSparkConf.getOption("spark.driver.cores").getOrElse("1") + private val driverLimitCores = submissionSparkConf.get(KUBERNETES_DRIVER_LIMIT_CORES) + + // Memory settings + private val driverMemoryMb = submissionSparkConf.get( + org.apache.spark.internal.config.DRIVER_MEMORY) + private val memoryOverheadMb = submissionSparkConf + .get(KUBERNETES_DRIVER_MEMORY_OVERHEAD) + .getOrElse(math.max((MEMORY_OVERHEAD_FACTOR * driverMemoryMb).toInt, + MEMORY_OVERHEAD_MIN)) + private val driverContainerMemoryWithOverhead = driverMemoryMb + memoryOverheadMb + private val driverDockerImage = submissionSparkConf.get(DRIVER_DOCKER_IMAGE) + + override def configureDriver( + driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val driverExtraClasspathEnv = driverExtraClasspath.map { classPath => + new EnvVarBuilder() + .withName(ENV_SUBMIT_EXTRA_CLASSPATH) + .withValue(classPath) + .build() + } + val driverCustomAnnotations = ConfigurationUtils + .combinePrefixedKeyValuePairsWithDeprecatedConf( + submissionSparkConf, + KUBERNETES_DRIVER_ANNOTATION_PREFIX, + KUBERNETES_DRIVER_ANNOTATIONS, + "annotation") + require(!driverCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), + s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" + + s" Spark bookkeeping operations.") + val allDriverAnnotations = driverCustomAnnotations ++ Map(SPARK_APP_NAME_ANNOTATION -> appName) + val driverCpuQuantity = new QuantityBuilder(false) + .withAmount(driverCpuCores) + .build() + val driverMemoryQuantity = new QuantityBuilder(false) + .withAmount(s"${driverMemoryMb}M") + .build() + val driverMemoryLimitQuantity = new QuantityBuilder(false) + .withAmount(s"${driverContainerMemoryWithOverhead}M") + .build() + val maybeCpuLimitQuantity = driverLimitCores.map { limitCores => + ("cpu", new QuantityBuilder(false).withAmount(limitCores).build()) + } + val driverContainer = new ContainerBuilder(driverSpec.driverContainer) + .withName(DRIVER_CONTAINER_NAME) + .withImage(driverDockerImage) + .withImagePullPolicy(dockerImagePullPolicy) + .addToEnv(driverExtraClasspathEnv.toSeq: _*) + .addNewEnv() + .withName(ENV_DRIVER_MEMORY) + .withValue(driverContainerMemoryWithOverhead + "m") + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_MAIN_CLASS) + .withValue(mainClass) + .endEnv() + .addNewEnv() + .withName(ENV_DRIVER_ARGS) + .withValue(appArgs.mkString(" ")) + .endEnv() + .withNewResources() + .addToRequests("cpu", driverCpuQuantity) + .addToRequests("memory", driverMemoryQuantity) + .addToLimits("memory", driverMemoryLimitQuantity) + .addToLimits(maybeCpuLimitQuantity.toMap.asJava) + .endResources() + .build() + val baseDriverPod = new PodBuilder(driverSpec.driverPod) + .editOrNewMetadata() + .withName(kubernetesDriverPodName) + .addToLabels(driverLabels.asJava) + .addToAnnotations(allDriverAnnotations.asJava) + .endMetadata() + .withNewSpec() + .withRestartPolicy("Never") + .endSpec() + .build() + val resolvedSparkConf = driverSpec.driverSparkConf.clone() + .setIfMissing(KUBERNETES_DRIVER_POD_NAME, kubernetesDriverPodName) + .set("spark.app.id", kubernetesAppId) + .set(KUBERNETES_EXECUTOR_POD_NAME_PREFIX, kubernetesResourceNamePrefix) + driverSpec.copy( + driverPod = baseDriverPod, + driverSparkConf = resolvedSparkConf, + driverContainer = driverContainer) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStep.scala new file mode 100644 index 0000000000000..dddc62410d6c9 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStep.scala @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + + +import java.io.File + +import io.fabric8.kubernetes.api.model.ContainerBuilder + +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils +import org.apache.spark.util.Utils + +/** + * Step that configures the classpath, spark.jars, and spark.files for the driver given that + * the init-container will download files to the download paths and that the user may provide + * files with local:// schemes. Note that this is separate from the init-container bootstrap + * step because jars with local schemes can still be provided even if the init-container is + * not being used, and those URIs still need to be resolved. + */ +private[spark] class DependencyResolutionStep( + sparkJars: Seq[String], + sparkFiles: Seq[String], + jarsDownloadPath: String, + filesDownloadPath: String) extends DriverConfigurationStep { + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val resolvedSparkJars = KubernetesFileUtils.resolveSubmittedUris(sparkJars, jarsDownloadPath) + val resolvedSparkFiles = KubernetesFileUtils.resolveSubmittedUris(sparkFiles, filesDownloadPath) + val sparkConfResolvedSparkDependencies = driverSpec.driverSparkConf.clone() + if (resolvedSparkJars.nonEmpty) { + sparkConfResolvedSparkDependencies.set("spark.jars", resolvedSparkJars.mkString(",")) + } + if (resolvedSparkFiles.nonEmpty) { + sparkConfResolvedSparkDependencies.set("spark.files", resolvedSparkFiles.mkString(",")) + } + val resolvedClasspath = KubernetesFileUtils.resolveFilePaths(sparkJars, jarsDownloadPath) + val driverContainerWithResolvedClasspath = if (resolvedClasspath.nonEmpty) { + new ContainerBuilder(driverSpec.driverContainer) + .addNewEnv() + .withName(ENV_MOUNTED_CLASSPATH) + .withValue(resolvedClasspath.mkString(File.pathSeparator)) + .endEnv() + .build() + } else { + driverSpec.driverContainer + } + driverSpec.copy( + driverContainer = driverContainerWithResolvedClasspath, + driverSparkConf = sparkConfResolvedSparkDependencies) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverConfigurationStep.scala new file mode 100644 index 0000000000000..8070e32371f94 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverConfigurationStep.scala @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +/** + * Represents a step in preparing the Kubernetes driver. + */ +private[spark] trait DriverConfigurationStep { + + /** + * Apply some transformation to the previous state of the driver to add a new feature to it. + */ + def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala new file mode 100644 index 0000000000000..0c58006130659 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala @@ -0,0 +1,222 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import java.io.File +import java.nio.charset.StandardCharsets + +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, Secret, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +/** + * Mounts Kubernetes credentials into the driver pod. The driver will use such mounted credentials + * to request executors. + */ +private[spark] class DriverKubernetesCredentialsStep( + submissionSparkConf: SparkConf, + kubernetesResourceNamePrefix: String) extends DriverConfigurationStep { + + private val maybeMountedOAuthTokenFile = submissionSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX") + private val maybeMountedClientKeyFile = submissionSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX") + private val maybeMountedClientCertFile = submissionSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX") + private val maybeMountedCaCertFile = submissionSparkConf.getOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX") + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val driverSparkConf = driverSpec.driverSparkConf.clone() + val oauthTokenBase64 = submissionSparkConf + .getOption(s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX") + .map { token => + BaseEncoding.base64().encode(token.getBytes(StandardCharsets.UTF_8)) + } + val caCertDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + s"Driver CA cert file provided at %s does not exist or is not a file.") + val clientKeyDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + s"Driver client key file provided at %s does not exist or is not a file.") + val clientCertDataBase64 = safeFileConfToBase64( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + s"Driver client cert file provided at %s does not exist or is not a file.") + val driverSparkConfWithCredentialsLocations = setDriverPodKubernetesCredentialLocations( + driverSparkConf, + oauthTokenBase64, + caCertDataBase64, + clientKeyDataBase64, + clientCertDataBase64) + val kubernetesCredentialsSecret = createCredentialsSecret( + oauthTokenBase64, + caCertDataBase64, + clientKeyDataBase64, + clientCertDataBase64) + val driverPodWithMountedKubernetesCredentials = kubernetesCredentialsSecret.map { secret => + new PodBuilder(driverSpec.driverPod) + .editOrNewSpec() + .addNewVolume() + .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + .withNewSecret().withSecretName(secret.getMetadata.getName).endSecret() + .endVolume() + .endSpec() + .build() + }.getOrElse(driverSpec.driverPod) + val driverContainerWithMountedSecretVolume = kubernetesCredentialsSecret.map { secret => + new ContainerBuilder(driverSpec.driverContainer) + .addNewVolumeMount() + .withName(DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + .withMountPath(DRIVER_CREDENTIALS_SECRETS_BASE_DIR) + .endVolumeMount() + .build() + }.getOrElse(driverSpec.driverContainer) + driverSpec.copy( + driverPod = driverPodWithMountedKubernetesCredentials, + otherKubernetesResources = + driverSpec.otherKubernetesResources ++ kubernetesCredentialsSecret.toSeq, + driverSparkConf = driverSparkConfWithCredentialsLocations, + driverContainer = driverContainerWithMountedSecretVolume) + } + + private def createCredentialsSecret( + driverOAuthTokenBase64: Option[String], + driverCaCertDataBase64: Option[String], + driverClientKeyDataBase64: Option[String], + driverClientCertDataBase64: Option[String]): Option[Secret] = { + val allSecretData = + resolveSecretData( + maybeMountedClientKeyFile, + driverClientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME) ++ + resolveSecretData( + maybeMountedClientCertFile, + driverClientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME) ++ + resolveSecretData( + maybeMountedCaCertFile, + driverCaCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME) ++ + resolveSecretData( + maybeMountedOAuthTokenFile, + driverOAuthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME) + if (allSecretData.isEmpty) { + None + } else { + Some(new SecretBuilder() + .withNewMetadata() + .withName(s"$kubernetesResourceNamePrefix-kubernetes-credentials") + .endMetadata() + .withData(allSecretData.asJava) + .build()) + } + } + + private def setDriverPodKubernetesCredentialLocations( + driverSparkConf: SparkConf, + driverOauthTokenBase64: Option[String], + driverCaCertDataBase64: Option[String], + driverClientKeyDataBase64: Option[String], + driverClientCertDataBase64: Option[String]): SparkConf = { + val resolvedMountedOAuthTokenFile = resolveSecretLocation( + maybeMountedOAuthTokenFile, + driverOauthTokenBase64, + DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH) + val resolvedMountedClientKeyFile = resolveSecretLocation( + maybeMountedClientKeyFile, + driverClientKeyDataBase64, + DRIVER_CREDENTIALS_CLIENT_KEY_PATH) + val resolvedMountedClientCertFile = resolveSecretLocation( + maybeMountedClientCertFile, + driverClientCertDataBase64, + DRIVER_CREDENTIALS_CLIENT_CERT_PATH) + val resolvedMountedCaCertFile = resolveSecretLocation( + maybeMountedCaCertFile, + driverCaCertDataBase64, + DRIVER_CREDENTIALS_CA_CERT_PATH) + val sparkConfWithCredentialLocations = driverSparkConf + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + resolvedMountedCaCertFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + resolvedMountedClientKeyFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + resolvedMountedClientCertFile) + .setOption( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX", + resolvedMountedOAuthTokenFile) + // Redact all OAuth token values + sparkConfWithCredentialLocations + .getAll + .filter(_._1.endsWith(OAUTH_TOKEN_CONF_SUFFIX)).map(_._1) + .foreach { + sparkConfWithCredentialLocations.set(_, "") + } + sparkConfWithCredentialLocations + } + + private def safeFileConfToBase64( + conf: String, + fileNotFoundFormatString: String): Option[String] = { + submissionSparkConf.getOption(conf) + .map(new File(_)) + .map { file => + require(file.isFile, String.format(fileNotFoundFormatString, file.getAbsolutePath)) + BaseEncoding.base64().encode(Files.toByteArray(file)) + } + } + + private def resolveSecretLocation( + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + mountedCanonicalLocation: String): Option[String] = { + mountedUserSpecified.orElse(valueMountedFromSubmitter.map( _ => { + mountedCanonicalLocation + })) + } + + private def resolveSecretData( + mountedUserSpecified: Option[String], + valueMountedFromSubmitter: Option[String], + secretName: String): Map[String, String] = { + mountedUserSpecified.map { _ => Map.empty[String, String]} + .getOrElse { + valueMountedFromSubmitter.map { valueBase64 => + Map(secretName -> valueBase64) + }.getOrElse(Map.empty[String, String]) + } + } + + private implicit def augmentSparkConf(sparkConf: SparkConf): OptionSettableSparkConf = { + new OptionSettableSparkConf(sparkConf) + } +} + +private class OptionSettableSparkConf(sparkConf: SparkConf) { + def setOption(configEntry: String, option: Option[String]): SparkConf = { + option.map( opt => { + sparkConf.set(configEntry, opt) + }).getOrElse(sparkConf) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/InitContainerBootstrapStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/InitContainerBootstrapStep.scala new file mode 100644 index 0000000000000..29cad18c484c0 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/InitContainerBootstrapStep.scala @@ -0,0 +1,64 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata} + +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.{InitContainerUtil, PropertiesConfigMapFromScalaMapBuilder} +import org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer.{InitContainerConfigurationStep, InitContainerSpec} + +/** + * Configures the init-container that bootstraps dependencies into the driver pod. + */ +private[spark] class InitContainerBootstrapStep( + initContainerConfigurationSteps: Seq[InitContainerConfigurationStep], + initContainerConfigMapName: String, + initContainerConfigMapKey: String) + extends DriverConfigurationStep { + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + var currentInitContainerSpec = InitContainerSpec( + initContainerProperties = Map.empty[String, String], + additionalDriverSparkConf = Map.empty[String, String], + initContainer = new ContainerBuilder().build(), + driverContainer = driverSpec.driverContainer, + podToInitialize = driverSpec.driverPod, + initContainerDependentResources = Seq.empty[HasMetadata]) + for (nextStep <- initContainerConfigurationSteps) { + currentInitContainerSpec = nextStep.configureInitContainer(currentInitContainerSpec) + } + val configMap = PropertiesConfigMapFromScalaMapBuilder.buildConfigMap( + initContainerConfigMapName, + initContainerConfigMapKey, + currentInitContainerSpec.initContainerProperties) + val resolvedDriverSparkConf = driverSpec.driverSparkConf.clone() + .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP, initContainerConfigMapName) + .set(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY, initContainerConfigMapKey) + .setAll(currentInitContainerSpec.additionalDriverSparkConf) + val resolvedDriverPod = InitContainerUtil.appendInitContainer( + currentInitContainerSpec.podToInitialize, currentInitContainerSpec.initContainer) + driverSpec.copy( + driverPod = resolvedDriverPod, + driverContainer = currentInitContainerSpec.driverContainer, + driverSparkConf = resolvedDriverSparkConf, + otherKubernetesResources = + driverSpec.otherKubernetesResources ++ + currentInitContainerSpec.initContainerDependentResources ++ + Seq(configMap)) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/KubernetesDriverSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/KubernetesDriverSpec.scala new file mode 100644 index 0000000000000..3ec4b6c4df10f --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/KubernetesDriverSpec.scala @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, HasMetadata, Pod, PodBuilder} + +import org.apache.spark.SparkConf + +/** + * Represents the components and characteristics of a Spark driver. The driver can be considered + * as being comprised of the driver pod itself, any other Kubernetes resources that the driver + * pod depends on, and the SparkConf that should be supplied to the Spark application. The driver + * container should be operated on via the specific field of this case class as opposed to trying + * to edit the container directly on the pod. The driver container should be attached at the + * end of executing all submission steps. + */ +private[spark] case class KubernetesDriverSpec( + driverPod: Pod, + driverContainer: Container, + otherKubernetesResources: Seq[HasMetadata], + driverSparkConf: SparkConf) + +private[spark] object KubernetesDriverSpec { + def initialSpec(initialSparkConf: SparkConf): KubernetesDriverSpec = { + KubernetesDriverSpec( + // Set new metadata and a new spec so that submission steps can use + // PodBuilder#editMetadata() and/or PodBuilder#editSpec() safely. + new PodBuilder().withNewMetadata().endMetadata().withNewSpec().endSpec().build(), + new ContainerBuilder().build(), + Seq.empty[HasMetadata], + initialSparkConf.clone()) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStep.scala new file mode 100644 index 0000000000000..024d643ddf9fd --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStep.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.ContainerBuilder + +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils + +private[spark] class PythonStep( + primaryPyFile: String, + otherPyFiles: Seq[String], + filesDownloadPath: String) extends DriverConfigurationStep { + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val resolvedOtherPyFilesString = if (otherPyFiles.isEmpty) { + "null" + } else { + KubernetesFileUtils.resolveFilePaths(otherPyFiles, filesDownloadPath).mkString(",") + } + val withPythonPrimaryFileContainer = new ContainerBuilder(driverSpec.driverContainer) + .addNewEnv() + .withName(ENV_PYSPARK_PRIMARY) + .withValue(KubernetesFileUtils.resolveFilePath(primaryPyFile, filesDownloadPath)) + .endEnv() + .addNewEnv() + .withName(ENV_PYSPARK_FILES) + .withValue(resolvedOtherPyFilesString) + .endEnv() + driverSpec.copy(driverContainer = withPythonPrimaryFileContainer.build()) + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStep.scala similarity index 62% rename from resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala rename to resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStep.scala index 4062a3113eddf..60bf27beacaaf 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilder.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStep.scala @@ -14,32 +14,23 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.ConfigMap +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer +import org.apache.spark.deploy.kubernetes.{PodWithDetachedInitContainer, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.submit.KubernetesFileUtils -private[spark] trait SparkInitContainerConfigMapBuilder { - /** - * Construct a config map that an init-container should reference for fetching - * remote dependencies. The config map includes the remote jars and files to download, - * as well as details to fetch files from a resource staging server, if applicable. - */ - def build(): ConfigMap -} - -private[spark] class SparkInitContainerConfigMapBuilderImpl( +private[spark] class BaseInitContainerConfigurationStep( sparkJars: Seq[String], sparkFiles: Seq[String], jarsDownloadPath: String, filesDownloadPath: String, configMapName: String, configMapKey: String, - submittedDependenciesPlugin: Option[SubmittedDependencyInitContainerConfigPlugin]) - extends SparkInitContainerConfigMapBuilder { + podAndInitContainerBootstrap: SparkPodInitContainerBootstrap) + extends InitContainerConfigurationStep { - override def build(): ConfigMap = { + override def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec = { val remoteJarsToDownload = KubernetesFileUtils.getOnlyRemoteFiles(sparkJars) val remoteFilesToDownload = KubernetesFileUtils.getOnlyRemoteFiles(sparkFiles) val remoteJarsConf = if (remoteJarsToDownload.nonEmpty) { @@ -57,12 +48,16 @@ private[spark] class SparkInitContainerConfigMapBuilderImpl( INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key -> filesDownloadPath) ++ remoteJarsConf ++ remoteFilesConf - val submittedDependenciesConfig = submittedDependenciesPlugin.map { plugin => - plugin.configurationsToFetchSubmittedDependencies() - }.toSeq.flatten.toMap - PropertiesConfigMapFromScalaMapBuilder.buildConfigMap( - configMapName, - configMapKey, - baseInitContainerConfig ++ submittedDependenciesConfig) + val bootstrappedPodAndInitContainer = + podAndInitContainerBootstrap.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + initContainerSpec.podToInitialize, + initContainerSpec.initContainer, + initContainerSpec.driverContainer)) + initContainerSpec.copy( + initContainer = bootstrappedPodAndInitContainer.initContainer, + driverContainer = bootstrappedPodAndInitContainer.mainContainer, + podToInitialize = bootstrappedPodAndInitContainer.pod, + initContainerProperties = baseInitContainerConfig) } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStep.scala new file mode 100644 index 0000000000000..7b7622c3d4f8b --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStep.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +/** + * Represents a step in preparing the init-container for the driver and executors. + */ +private[spark] trait InitContainerConfigurationStep { + + def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestrator.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestrator.scala new file mode 100644 index 0000000000000..e4ea5235af18f --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestrator.scala @@ -0,0 +1,131 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import org.apache.spark.SparkConf +import org.apache.spark.deploy.kubernetes.{InitContainerResourceStagingServerSecretPluginImpl, OptionRequirements, SparkPodInitContainerBootstrapImpl} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.SubmittedDependencyUploaderImpl +import org.apache.spark.deploy.rest.kubernetes.{ResourceStagingServerSslOptionsProviderImpl, RetrofitClientFactoryImpl} +import org.apache.spark.util.Utils + +/** + * Returns the complete ordered list of steps required to configure the init-container. + */ +private[spark] class InitContainerConfigurationStepsOrchestrator( + namespace: String, + kubernetesResourceNamePrefix: String, + sparkJars: Seq[String], + sparkFiles: Seq[String], + jarsDownloadPath: String, + filesDownloadPath: String, + dockerImagePullPolicy: String, + driverLabels: Map[String, String], + initContainerConfigMapName: String, + initContainerConfigMapKey: String, + submissionSparkConf: SparkConf) { + + private val submittedResourcesSecretName = s"$kubernetesResourceNamePrefix-init-secret" + private val resourceStagingServerUri = submissionSparkConf.get(RESOURCE_STAGING_SERVER_URI) + private val resourceStagingServerInternalUri = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_URI) + private val initContainerImage = submissionSparkConf.get(INIT_CONTAINER_DOCKER_IMAGE) + private val downloadTimeoutMinutes = submissionSparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT) + private val maybeResourceStagingServerInternalTrustStore = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_FILE) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE)) + private val maybeResourceStagingServerInternalTrustStorePassword = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_PASSWORD) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD)) + private val maybeResourceStagingServerInternalTrustStoreType = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_TRUSTSTORE_TYPE) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE)) + private val maybeResourceStagingServerInternalClientCert = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_CLIENT_CERT_PEM) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM)) + private val resourceStagingServerInternalSslEnabled = + submissionSparkConf.get(RESOURCE_STAGING_SERVER_INTERNAL_SSL_ENABLED) + .orElse(submissionSparkConf.get(RESOURCE_STAGING_SERVER_SSL_ENABLED)) + .getOrElse(false) + OptionRequirements.requireNandDefined( + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStore, + "Cannot provide both a certificate file and a trustStore file for init-containers to" + + " use for contacting the resource staging server over TLS.") + + require(maybeResourceStagingServerInternalTrustStore.forall { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { + case "file" | "local" => true + case _ => false + } + }, "TrustStore URI used for contacting the resource staging server from init containers must" + + " have no scheme, or scheme file://, or scheme local://.") + + require(maybeResourceStagingServerInternalClientCert.forall { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") match { + case "file" | "local" => true + case _ => false + } + }, "Client cert file URI used for contacting the resource staging server from init containers" + + " must have no scheme, or scheme file://, or scheme local://.") + + def getAllConfigurationSteps(): Seq[InitContainerConfigurationStep] = { + val initContainerBootstrap = new SparkPodInitContainerBootstrapImpl( + initContainerImage, + dockerImagePullPolicy, + jarsDownloadPath, + filesDownloadPath, + downloadTimeoutMinutes, + initContainerConfigMapName, + initContainerConfigMapKey) + val baseInitContainerStep = new BaseInitContainerConfigurationStep( + sparkJars, + sparkFiles, + jarsDownloadPath, + filesDownloadPath, + initContainerConfigMapName, + initContainerConfigMapKey, + initContainerBootstrap) + val submittedResourcesInitContainerStep = resourceStagingServerUri.map { + stagingServerUri => + val mountSecretPlugin = new InitContainerResourceStagingServerSecretPluginImpl( + submittedResourcesSecretName, + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH) + val submittedDependencyUploader = new SubmittedDependencyUploaderImpl( + driverLabels, + namespace, + stagingServerUri, + sparkJars, + sparkFiles, + new ResourceStagingServerSslOptionsProviderImpl(submissionSparkConf).getSslOptions, + RetrofitClientFactoryImpl) + new SubmittedResourcesInitContainerConfigurationStep( + submittedResourcesSecretName, + resourceStagingServerInternalUri.getOrElse(stagingServerUri), + INIT_CONTAINER_SECRET_VOLUME_MOUNT_PATH, + resourceStagingServerInternalSslEnabled, + maybeResourceStagingServerInternalTrustStore, + maybeResourceStagingServerInternalClientCert, + maybeResourceStagingServerInternalTrustStorePassword, + maybeResourceStagingServerInternalTrustStoreType, + submittedDependencyUploader, + mountSecretPlugin) + } + Seq(baseInitContainerStep) ++ submittedResourcesInitContainerStep.toSeq + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerSpec.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerSpec.scala new file mode 100644 index 0000000000000..5b5ac3c1f17c2 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerSpec.scala @@ -0,0 +1,41 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import io.fabric8.kubernetes.api.model.{Container, HasMetadata, Pod} + +/** + * Represents a given configuration of the init-container, informing the main + * InitContainerBootstrapStep of how the driver should be configured. This includes: + *

    + * - What properties should be set on the init-container, + * - What Spark properties should be set on the driver's SparkConf given this init-container, + * - The spec of the init container itself, + * - The spec of the main container so that it can be modified to share volumes with the + * init-container + * - The spec of the pod EXCEPT for the addition of the given init-container (e.g. volumes + * the init-container needs or modifications to a main container that shares data with the + * init-container), + * - Any Kubernetes resources that need to be created for the init-container's function. + */ +private[spark] case class InitContainerSpec( + initContainerProperties: Map[String, String], + additionalDriverSparkConf: Map[String, String], + initContainer: Container, + driverContainer: Container, + podToInitialize: Pod, + initContainerDependentResources: Seq[HasMetadata]) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerConfigurationStep.scala new file mode 100644 index 0000000000000..7aa27a1de6811 --- /dev/null +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerConfigurationStep.scala @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{Secret, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkException +import org.apache.spark.deploy.kubernetes.InitContainerResourceStagingServerSecretPlugin +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.SubmittedDependencyUploader +import org.apache.spark.internal.config.OptionalConfigEntry +import org.apache.spark.util.Utils + +private[spark] class SubmittedResourcesInitContainerConfigurationStep( + submittedResourcesSecretName: String, + internalResourceStagingServerUri: String, + initContainerSecretMountPath: String, + resourceStagingServerSslEnabled: Boolean, + maybeInternalTrustStoreUri: Option[String], + maybeInternalClientCertUri: Option[String], + maybeInternalTrustStorePassword: Option[String], + maybeInternalTrustStoreType: Option[String], + submittedDependencyUploader: SubmittedDependencyUploader, + submittedResourcesSecretPlugin: InitContainerResourceStagingServerSecretPlugin) + extends InitContainerConfigurationStep { + + override def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec = { + val jarsIdAndSecret = submittedDependencyUploader.uploadJars() + val filesIdAndSecret = submittedDependencyUploader.uploadFiles() + + val submittedResourcesInitContainerProperties = Map[String, String]( + RESOURCE_STAGING_SERVER_URI.key -> internalResourceStagingServerUri, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> jarsIdAndSecret.resourceId, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$initContainerSecretMountPath/$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> filesIdAndSecret.resourceId, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$initContainerSecretMountPath/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> resourceStagingServerSslEnabled.toString) ++ + resolveSecretPath( + maybeInternalTrustStoreUri, + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE, + "TrustStore URI") ++ + resolveSecretPath( + maybeInternalClientCertUri, + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM, + "Client certificate URI") ++ + maybeInternalTrustStorePassword.map { password => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key, password) + }.toMap ++ + maybeInternalTrustStoreType.map { storeType => + (RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key, storeType) + }.toMap + val initContainerSecret = createResourceStagingServerSecret( + jarsIdAndSecret.resourceSecret, filesIdAndSecret.resourceSecret) + val additionalDriverSparkConf = + Map( + EXECUTOR_INIT_CONTAINER_SECRET.key -> initContainerSecret.getMetadata.getName, + EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR.key -> initContainerSecretMountPath) + val initContainerWithSecretVolumeMount = submittedResourcesSecretPlugin + .mountResourceStagingServerSecretIntoInitContainer(initContainerSpec.initContainer) + val podWithSecretVolume = submittedResourcesSecretPlugin + .addResourceStagingServerSecretVolumeToPod(initContainerSpec.podToInitialize) + initContainerSpec.copy( + initContainer = initContainerWithSecretVolumeMount, + podToInitialize = podWithSecretVolume, + initContainerDependentResources = + initContainerSpec.initContainerDependentResources ++ Seq(initContainerSecret), + initContainerProperties = + initContainerSpec.initContainerProperties ++ submittedResourcesInitContainerProperties, + additionalDriverSparkConf = additionalDriverSparkConf) + } + + private def createResourceStagingServerSecret( + jarsResourceSecret: String, filesResourceSecret: String): Secret = { + val trustStoreBase64 = convertFileToBase64IfSubmitterLocal( + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY, maybeInternalTrustStoreUri) + val clientCertBase64 = convertFileToBase64IfSubmitterLocal( + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY, maybeInternalClientCertUri) + val jarsSecretBase64 = BaseEncoding.base64().encode(jarsResourceSecret.getBytes(Charsets.UTF_8)) + val filesSecretBase64 = BaseEncoding.base64().encode( + filesResourceSecret.getBytes(Charsets.UTF_8)) + val secretData = Map( + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY -> jarsSecretBase64, + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY -> filesSecretBase64) ++ + trustStoreBase64 ++ + clientCertBase64 + val kubernetesSecret = new SecretBuilder() + .withNewMetadata() + .withName(submittedResourcesSecretName) + .endMetadata() + .addToData(secretData.asJava) + .build() + kubernetesSecret + } + + private def convertFileToBase64IfSubmitterLocal(secretKey: String, secretUri: Option[String]) + : Map[String, String] = { + secretUri.filter { trustStore => + Option(Utils.resolveURI(trustStore).getScheme).getOrElse("file") == "file" + }.map { uri => + val file = new File(Utils.resolveURI(uri).getPath) + require(file.isFile, "Dependency server trustStore provided at" + + file.getAbsolutePath + " does not exist or is not a file.") + (secretKey, BaseEncoding.base64().encode(Files.toByteArray(file))) + }.toMap + } + + private def resolveSecretPath( + maybeUri: Option[String], + secretKey: String, + configEntry: OptionalConfigEntry[String], + uriType: String): Map[String, String] = { + maybeUri.map(Utils.resolveURI).map { uri => + val resolvedPath = Option(uri.getScheme).getOrElse("file") match { + case "file" => s"$initContainerSecretMountPath/$secretKey" + case "local" => uri.getPath + case invalid => throw new SparkException(s"$uriType has invalid scheme $invalid must be" + + s" local://, file://, or empty.") + } + (configEntry.key, resolvedPath) + }.toMap + } +} diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala index ac19c2463218b..0e274678ad6f0 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/rest/kubernetes/KubernetesSparkDependencyDownloadInitContainer.scala @@ -76,7 +76,6 @@ private[spark] class KubernetesSparkDependencyDownloadInitContainer( fileFetcher: FileFetcher, resourceStagingServerSslOptions: SSLOptions) extends Logging { - private implicit val downloadExecutor = ExecutionContext.fromExecutorService( ThreadUtils.newDaemonCachedThreadPool("download-executor")) private val maybeResourceStagingServerUri = sparkConf.get(RESOURCE_STAGING_SERVER_URI) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala index 2a0f6e78c2aea..fa0ecca3b4ee6 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterManager.scala @@ -70,8 +70,7 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit sparkConf.get(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION), sparkConf.get(INIT_CONTAINER_MOUNT_TIMEOUT), configMap, - configMapKey, - executorInitContainerSecretVolumePlugin) + configMapKey) } if (maybeConfigMap.isEmpty) { logWarning("The executor's init-container config map was not specified. Executors will" + @@ -89,7 +88,11 @@ private[spark] class KubernetesClusterManager extends ExternalClusterManager wit Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_TOKEN_PATH)), Some(new File(Config.KUBERNETES_SERVICE_ACCOUNT_CA_CRT_PATH))) new KubernetesClusterSchedulerBackend( - sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], sc, bootStrap, kubernetesClient) + sc.taskScheduler.asInstanceOf[TaskSchedulerImpl], + sc, + bootStrap, + executorInitContainerSecretVolumePlugin, + kubernetesClient) } override def initialize(scheduler: TaskScheduler, backend: SchedulerBackend): Unit = { diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index d880cee315c0d..e5f980ad1f366 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -23,7 +23,7 @@ import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule -import io.fabric8.kubernetes.api.model.{ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.FilenameUtils @@ -32,9 +32,10 @@ import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkEnv, SparkException} -import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, SparkPodInitContainerBootstrap} +import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, InitContainerResourceStagingServerSecretPlugin, PodWithDetachedInitContainer, SparkPodInitContainerBootstrap} import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.InitContainerUtil import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} @@ -47,6 +48,7 @@ private[spark] class KubernetesClusterSchedulerBackend( scheduler: TaskSchedulerImpl, val sc: SparkContext, executorInitContainerBootstrap: Option[SparkPodInitContainerBootstrap], + executorMountInitContainerSecretPlugin: Option[InitContainerResourceStagingServerSecretPlugin], kubernetesClient: KubernetesClient) extends CoarseGrainedSchedulerBackend(scheduler, sc.env.rpcEnv) { @@ -319,8 +321,8 @@ private[spark] class KubernetesClusterSchedulerBackend( nodeToLocalTaskCount.toMap[String, Int] } - private def addNodeAffinityAnnotationIfUseful(basePodBuilder: PodBuilder, - nodeToTaskCount: Map[String, Int]): PodBuilder = { + private def addNodeAffinityAnnotationIfUseful( + baseExecutorPod: Pod, nodeToTaskCount: Map[String, Int]): Pod = { def scaleToRange(value: Int, baseMin: Double, baseMax: Double, rangeMin: Double, rangeMax: Double): Int = (((rangeMax - rangeMin) * (value - baseMin) / (baseMax - baseMin)) + rangeMin).toInt @@ -341,11 +343,12 @@ private[spark] class KubernetesClusterSchedulerBackend( ))) // TODO: Use non-annotation syntax when we switch to K8s version 1.6. logDebug(s"Adding nodeAffinity as annotation $nodeAffinityJson") - basePodBuilder.editMetadata() + new PodBuilder(baseExecutorPod).editMetadata() .addToAnnotations(ANNOTATION_EXECUTOR_NODE_AFFINITY, nodeAffinityJson) .endMetadata() + .build() } else { - basePodBuilder + baseExecutorPod } } @@ -416,7 +419,21 @@ private[spark] class KubernetesClusterSchedulerBackend( .build() }) - val basePodBuilder = new PodBuilder() + val executorContainer = new ContainerBuilder() + .withName(s"executor") + .withImage(executorDockerImage) + .withImagePullPolicy(dockerImagePullPolicy) + .withNewResources() + .addToRequests("memory", executorMemoryQuantity) + .addToLimits("memory", executorMemoryLimitQuantity) + .addToRequests("cpu", executorCpuQuantity) + .endResources() + .addAllToEnv(requiredEnv.asJava) + .addToEnv(executorExtraClasspathEnv.toSeq: _*) + .withPorts(requiredPorts.asJava) + .build() + + val executorPod = new PodBuilder() .withNewMetadata() .withName(name) .withLabels(resolvedExecutorLabels.asJava) @@ -432,69 +449,77 @@ private[spark] class KubernetesClusterSchedulerBackend( .endMetadata() .withNewSpec() .withHostname(hostname) - .addNewContainer() - .withName(s"executor") - .withImage(executorDockerImage) - .withImagePullPolicy(dockerImagePullPolicy) - .withNewResources() - .addToRequests("memory", executorMemoryQuantity) - .addToLimits("memory", executorMemoryLimitQuantity) - .addToRequests("cpu", executorCpuQuantity) - .endResources() - .addAllToEnv(requiredEnv.asJava) - .addToEnv(executorExtraClasspathEnv.toSeq: _*) - .withPorts(requiredPorts.asJava) - .endContainer() .endSpec() + .build() - executorLimitCores.map { + val containerWithExecutorLimitCores = executorLimitCores.map { limitCores => val executorCpuLimitQuantity = new QuantityBuilder(false) .withAmount(limitCores) .build() - basePodBuilder + new ContainerBuilder(executorContainer) + .editResources() + .addToLimits("cpu", executorCpuLimitQuantity) + .endResources() + .build() + }.getOrElse(executorContainer) + + val withMaybeShuffleConfigExecutorContainer = shuffleServiceConfig.map { config => + config.shuffleDirs.foldLeft(containerWithExecutorLimitCores) { (container, dir) => + new ContainerBuilder(container) + .addNewVolumeMount() + .withName(FilenameUtils.getBaseName(dir)) + .withMountPath(dir) + .endVolumeMount() + .build() + } + }.getOrElse(containerWithExecutorLimitCores) + val withMaybeShuffleConfigPod = shuffleServiceConfig.map { config => + config.shuffleDirs.foldLeft(executorPod) { (builder, dir) => + new PodBuilder(builder) .editSpec() - .editFirstContainer() - .editResources - .addToLimits("cpu", executorCpuLimitQuantity) - .endResources() - .endContainer() - .endSpec() - } - - val withMaybeShuffleConfigPodBuilder = shuffleServiceConfig - .map { config => - config.shuffleDirs.foldLeft(basePodBuilder) { (builder, dir) => - builder - .editSpec() - .addNewVolume() - .withName(FilenameUtils.getBaseName(dir)) - .withNewHostPath() - .withPath(dir) + .addNewVolume() + .withName(FilenameUtils.getBaseName(dir)) + .withNewHostPath() + .withPath(dir) .endHostPath() .endVolume() - .editFirstContainer() - .addNewVolumeMount() - .withName(FilenameUtils.getBaseName(dir)) - .withMountPath(dir) - .endVolumeMount() - .endContainer() .endSpec() - } - }.getOrElse(basePodBuilder) - - val executorInitContainerPodBuilder = executorInitContainerBootstrap.map { - bootstrap => - bootstrap.bootstrapInitContainerAndVolumes( - "executor", - withMaybeShuffleConfigPodBuilder) - }.getOrElse(withMaybeShuffleConfigPodBuilder) - - val resolvedExecutorPodBuilder = addNodeAffinityAnnotationIfUseful( - executorInitContainerPodBuilder, nodeToLocalTaskCount) - + .build() + } + }.getOrElse(executorPod) + val (executorPodWithInitContainer, initBootstrappedExecutorContainer) = + executorInitContainerBootstrap.map { bootstrap => + val podWithDetachedInitContainer = bootstrap.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + withMaybeShuffleConfigPod, + new ContainerBuilder().build(), + withMaybeShuffleConfigExecutorContainer)) + + val resolvedInitContainer = executorMountInitContainerSecretPlugin.map { plugin => + plugin.mountResourceStagingServerSecretIntoInitContainer( + podWithDetachedInitContainer.initContainer) + }.getOrElse(podWithDetachedInitContainer.initContainer) + + val podWithAttachedInitContainer = InitContainerUtil.appendInitContainer( + podWithDetachedInitContainer.pod, resolvedInitContainer) + + val resolvedPodWithMountedSecret = executorMountInitContainerSecretPlugin.map { plugin => + plugin.addResourceStagingServerSecretVolumeToPod(podWithAttachedInitContainer) + }.getOrElse(podWithAttachedInitContainer) + + (resolvedPodWithMountedSecret, podWithDetachedInitContainer.mainContainer) + }.getOrElse((withMaybeShuffleConfigPod, withMaybeShuffleConfigExecutorContainer)) + + val executorPodWithNodeAffinity = addNodeAffinityAnnotationIfUseful( + executorPodWithInitContainer, nodeToLocalTaskCount) + val resolvedExecutorPod = new PodBuilder(executorPodWithNodeAffinity) + .editSpec() + .addToContainers(initBootstrappedExecutorContainer) + .endSpec() + .build() try { - (executorId, kubernetesClient.pods.create(resolvedExecutorPodBuilder.build())) + (executorId, kubernetesClient.pods.create(resolvedExecutorPod)) } catch { case throwable: Throwable => logError("Failed to allocate executor pod.", throwable) @@ -606,10 +631,11 @@ private[spark] class KubernetesClusterSchedulerBackend( } } - case class ShuffleServiceConfig(shuffleNamespace: String, +} +case class ShuffleServiceConfig( + shuffleNamespace: String, shuffleLabels: Map[String, String], shuffleDirs: Seq[String]) -} private object KubernetesClusterSchedulerBackend { private val DEFAULT_STATIC_PORT = 10000 diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala new file mode 100644 index 0000000000000..f5b2db36aff8f --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala @@ -0,0 +1,59 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes + +import org.scalatest.BeforeAndAfter +import io.fabric8.kubernetes.api.model._ +import org.apache.spark.deploy.kubernetes.constants._ + +import scala.collection.JavaConverters._ +import org.apache.spark.SparkFunSuite + +class InitContainerResourceStagingServerSecretPluginSuite extends SparkFunSuite with BeforeAndAfter{ + private val INIT_CONTAINER_SECRET_NAME = "init-secret" + private val INIT_CONTAINER_SECRET_MOUNT = "/tmp/secret" + + private val initContainerSecretPlugin = new InitContainerResourceStagingServerSecretPluginImpl( + INIT_CONTAINER_SECRET_NAME, + INIT_CONTAINER_SECRET_MOUNT) + + test("Volume Mount into InitContainer") { + val returnedCont = initContainerSecretPlugin.mountResourceStagingServerSecretIntoInitContainer( + new ContainerBuilder().withName("init-container").build()) + assert(returnedCont.getName === "init-container") + assert(returnedCont.getVolumeMounts.asScala.map( + vm => (vm.getName, vm.getMountPath)) === + List((INIT_CONTAINER_SECRET_VOLUME_NAME, INIT_CONTAINER_SECRET_MOUNT))) + } + + test("Add Volume with Secret to Pod") { + val returnedPod = initContainerSecretPlugin.addResourceStagingServerSecretVolumeToPod( + basePod().build) + assert(returnedPod.getMetadata.getName === "spark-pod") + val volume = returnedPod.getSpec.getVolumes.asScala.head + assert(volume.getName === INIT_CONTAINER_SECRET_VOLUME_NAME) + assert(volume.getSecret.getSecretName === INIT_CONTAINER_SECRET_NAME) + } + private def basePod(): PodBuilder = { + new PodBuilder() + .withNewMetadata() + .withName("spark-pod") + .endMetadata() + .withNewSpec() + .endSpec() + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala index 90d7b10df211c..0557b5677b919 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -16,16 +16,14 @@ */ package org.apache.spark.deploy.kubernetes -import com.fasterxml.jackson.databind.ObjectMapper -import io.fabric8.kubernetes.api.model.{Container, ContainerBuilder, Pod, PodBuilder} import org.scalatest.BeforeAndAfter -import scala.collection.JavaConverters._ +import io.fabric8.kubernetes.api.model._ +import org.apache.spark.deploy.kubernetes.constants._ +import scala.collection.JavaConverters._ import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.constants._ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAfter { - private val OBJECT_MAPPER = new ObjectMapper() private val INIT_CONTAINER_IMAGE = "spark-init:latest" private val DOCKER_IMAGE_PULL_POLICY = "IfNotPresent" private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" @@ -33,134 +31,66 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf private val DOWNLOAD_TIMEOUT_MINUTES = 5 private val INIT_CONTAINER_CONFIG_MAP_NAME = "spark-init-config-map" private val INIT_CONTAINER_CONFIG_MAP_KEY = "spark-init-config-map-key" - private val ADDED_SUBMITTED_DEPENDENCY_ENV = "ADDED_SUBMITTED_DEPENDENCY" - private val ADDED_SUBMITTED_DEPENDENCY_ANNOTATION = "added-submitted-dependencies" private val MAIN_CONTAINER_NAME = "spark-main" - private val TRUE = "true" - private val submittedDependencyPlugin = new InitContainerResourceStagingServerSecretPlugin { - override def addResourceStagingServerSecretVolumeToPod(basePod: PodBuilder) - : PodBuilder = { - basePod.editMetadata() - .addToAnnotations(ADDED_SUBMITTED_DEPENDENCY_ANNOTATION, TRUE) - .endMetadata() - } + private val sparkPodInit = new SparkPodInitContainerBootstrapImpl( + INIT_CONTAINER_IMAGE, + DOCKER_IMAGE_PULL_POLICY, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOWNLOAD_TIMEOUT_MINUTES, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY) + private val expectedSharedVolumeMap = Map( + JARS_DOWNLOAD_PATH -> INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME, + FILES_DOWNLOAD_PATH -> INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) - override def mountResourceStagingServerSecretIntoInitContainer(container: ContainerBuilder) - : ContainerBuilder = { - container - .addNewEnv() - .withName(ADDED_SUBMITTED_DEPENDENCY_ENV) - .withValue(TRUE) - .endEnv() - } - } - - test("Running without submitted dependencies adds init-container with volume mounts.") { - val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() - val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala - assert(podAnnotations.contains(INIT_CONTAINER_ANNOTATION)) - val initContainers = OBJECT_MAPPER.readValue( - podAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) - assert(initContainers.length === 1) - val initContainer = initContainers.head - val initContainerVolumeMounts = initContainer.getVolumeMounts.asScala.map { - mount => (mount.getName, mount.getMountPath) - }.toMap - val expectedInitContainerVolumeMounts = Map( - INIT_CONTAINER_PROPERTIES_FILE_VOLUME -> INIT_CONTAINER_PROPERTIES_FILE_DIR, - INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, - INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) - assert(initContainerVolumeMounts === expectedInitContainerVolumeMounts) + test("InitContainer: Volume mounts, args, and builder specs") { + val returnedPodWithCont = sparkPodInit.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + pod = basePod().build(), + initContainer = new Container(), + mainContainer = new ContainerBuilder().withName(MAIN_CONTAINER_NAME).build())) + val initContainer: Container = returnedPodWithCont.initContainer + val volumes = initContainer.getVolumeMounts.asScala + assert(volumes.map(vm => (vm.getMountPath, vm.getName)).toMap === expectedSharedVolumeMap + ++ Map("/etc/spark-init" -> "spark-init-properties")) assert(initContainer.getName === "spark-init") assert(initContainer.getImage === INIT_CONTAINER_IMAGE) - assert(initContainer.getImagePullPolicy === "IfNotPresent") - assert(initContainer.getArgs.asScala === List(INIT_CONTAINER_PROPERTIES_FILE_PATH)) + assert(initContainer.getImagePullPolicy === DOCKER_IMAGE_PULL_POLICY) + assert(initContainer.getArgs.asScala.head === INIT_CONTAINER_PROPERTIES_FILE_PATH) } - - test("Running without submitted dependencies adds volume mounts to main container.") { - val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() - val containers = bootstrappedPod.getSpec.getContainers.asScala - val mainContainer = containers.find(_.getName === MAIN_CONTAINER_NAME) - assert(mainContainer.isDefined) - val volumeMounts = mainContainer.map(_.getVolumeMounts.asScala).toSeq.flatten.map { - mount => (mount.getName, mount.getMountPath) - }.toMap - val expectedVolumeMounts = Map( - INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME -> JARS_DOWNLOAD_PATH, - INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME -> FILES_DOWNLOAD_PATH) - assert(volumeMounts === expectedVolumeMounts) + test("Main: Volume mounts and env") { + val returnedPodWithCont = sparkPodInit.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + pod = basePod().build(), + initContainer = new Container(), + mainContainer = new ContainerBuilder().withName(MAIN_CONTAINER_NAME).build())) + val mainContainer: Container = returnedPodWithCont.mainContainer + assert(mainContainer.getName === MAIN_CONTAINER_NAME) + val volumeMounts = mainContainer.getVolumeMounts.asScala + assert(volumeMounts.map(vm => (vm.getMountPath, vm.getName)).toMap === expectedSharedVolumeMap) + assert(mainContainer.getEnv.asScala.map(e => (e.getName, e.getValue)).toMap === + Map(ENV_MOUNTED_FILES_DIR -> FILES_DOWNLOAD_PATH)) } - - test("Running without submitted dependencies adds volumes to the pod") { - val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() - val podVolumes = bootstrappedPod.getSpec.getVolumes.asScala - assert(podVolumes.size === 3) - assert(podVolumes.exists { volume => - volume.getName == INIT_CONTAINER_PROPERTIES_FILE_VOLUME && - Option(volume.getConfigMap).map { configMap => - configMap.getItems.asScala.map { - keyToPath => (keyToPath.getKey, keyToPath.getPath) - }.toMap - }.contains(Map(INIT_CONTAINER_CONFIG_MAP_KEY -> INIT_CONTAINER_PROPERTIES_FILE_NAME)) - }) - assert(podVolumes.exists { volume => - volume.getName == INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME && volume.getEmptyDir != null - }) - assert(podVolumes.exists { volume => - volume.getName == INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME && volume.getEmptyDir != null - }) - } - - test("Files download path is set as environment variable") { - val bootstrappedPod = bootstrapPodWithoutSubmittedDependencies() - val containers = bootstrappedPod.getSpec.getContainers.asScala - val maybeMainContainer = containers.find(_.getName === MAIN_CONTAINER_NAME) - assert(maybeMainContainer.exists { mainContainer => - mainContainer.getEnv.asScala.exists(envVar => - envVar.getName == ENV_MOUNTED_FILES_DIR && envVar.getValue == FILES_DOWNLOAD_PATH) - }) - } - - test("Running with submitted dependencies modifies the init container with the plugin.") { - val bootstrappedPod = bootstrapPodWithSubmittedDependencies() - val podAnnotations = bootstrappedPod.getMetadata.getAnnotations.asScala - assert(podAnnotations(ADDED_SUBMITTED_DEPENDENCY_ANNOTATION) === TRUE) - val initContainers = OBJECT_MAPPER.readValue( - podAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) - assert(initContainers.length === 1) - val initContainer = initContainers.head - assert(initContainer.getEnv.asScala.exists { - env => env.getName === ADDED_SUBMITTED_DEPENDENCY_ENV && env.getValue === TRUE - }) - } - - private def bootstrapPodWithoutSubmittedDependencies(): Pod = { - val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( - INIT_CONTAINER_IMAGE, - DOCKER_IMAGE_PULL_POLICY, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - DOWNLOAD_TIMEOUT_MINUTES, - INIT_CONTAINER_CONFIG_MAP_NAME, - INIT_CONTAINER_CONFIG_MAP_KEY, - None) - bootstrapUnderTest.bootstrapInitContainerAndVolumes( - MAIN_CONTAINER_NAME, basePod()).build() - } - - private def bootstrapPodWithSubmittedDependencies(): Pod = { - val bootstrapUnderTest = new SparkPodInitContainerBootstrapImpl( - INIT_CONTAINER_IMAGE, - DOCKER_IMAGE_PULL_POLICY, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - DOWNLOAD_TIMEOUT_MINUTES, - INIT_CONTAINER_CONFIG_MAP_NAME, - INIT_CONTAINER_CONFIG_MAP_KEY, - Some(submittedDependencyPlugin)) - bootstrapUnderTest.bootstrapInitContainerAndVolumes( - MAIN_CONTAINER_NAME, basePod()).build() + test("Pod: Volume Mounts") { + val returnedPodWithCont = sparkPodInit.bootstrapInitContainerAndVolumes( + PodWithDetachedInitContainer( + pod = basePod().build(), + initContainer = new Container(), + mainContainer = new ContainerBuilder().withName(MAIN_CONTAINER_NAME).build())) + val returnedPod = returnedPodWithCont.pod + assert(returnedPod.getMetadata.getName === "spark-pod") + val volumes = returnedPod.getSpec.getVolumes.asScala.toList + assert(volumes.head.getName === INIT_CONTAINER_PROPERTIES_FILE_VOLUME) + assert(volumes.head.getConfigMap.getName === INIT_CONTAINER_CONFIG_MAP_NAME) + assert(volumes.head.getConfigMap.getItems.asScala.map( + i => (i.getKey, i.getPath)) === + List((INIT_CONTAINER_CONFIG_MAP_KEY, INIT_CONTAINER_PROPERTIES_FILE_NAME))) + assert(volumes(1).getName === INIT_CONTAINER_DOWNLOAD_JARS_VOLUME_NAME) + assert(volumes(1).getEmptyDir === new EmptyDirVolumeSource()) + assert(volumes(2).getName === INIT_CONTAINER_DOWNLOAD_FILES_VOLUME_NAME) + assert(volumes(2).getEmptyDir === new EmptyDirVolumeSource()) } private def basePod(): PodBuilder = { @@ -169,9 +99,6 @@ class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAf .withName("spark-pod") .endMetadata() .withNewSpec() - .addNewContainer() - .withName(MAIN_CONTAINER_NAME) - .endContainer() - .endSpec() + .endSpec() } } diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala deleted file mode 100644 index 473d369c8eca3..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SubmittedDependencyInitContainerVolumesPluginSuite.scala +++ /dev/null @@ -1,60 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes - -import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder} -import scala.collection.JavaConverters._ - -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.constants._ - -class SubmittedDependencyInitContainerVolumesPluginSuite extends SparkFunSuite { - - private val SECRET_NAME = "secret" - private val SECRET_MOUNT_PATH = "/mnt/secrets" - private val plugin = new InitContainerResourceStagingServerSecretPluginImpl( - SECRET_NAME, SECRET_MOUNT_PATH) - - test("The init container should have the secret volume mount.") { - val baseInitContainer = new ContainerBuilder().withName("container") - val configuredInitContainer = plugin.mountResourceStagingServerSecretIntoInitContainer( - baseInitContainer).build() - val volumeMounts = configuredInitContainer.getVolumeMounts.asScala - assert(volumeMounts.size === 1) - assert(volumeMounts.exists { volumeMount => - volumeMount.getName === INIT_CONTAINER_SECRET_VOLUME_NAME && - volumeMount.getMountPath === SECRET_MOUNT_PATH - }) - } - - test("The pod should have the secret volume.") { - val basePod = new PodBuilder() - .withNewMetadata().withName("pod").endMetadata() - .withNewSpec() - .addNewContainer() - .withName("container") - .endContainer() - .endSpec() - val configuredPod = plugin.addResourceStagingServerSecretVolumeToPod(basePod).build() - val volumes = configuredPod.getSpec.getVolumes.asScala - assert(volumes.size === 1) - assert(volumes.exists { volume => - volume.getName === INIT_CONTAINER_SECRET_VOLUME_NAME && - Option(volume.getSecret).map(_.getSecretName).contains(SECRET_NAME) - }) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientSuite.scala new file mode 100644 index 0000000000000..965ee75c248b8 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientSuite.scala @@ -0,0 +1,226 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import com.google.common.collect.Iterables +import io.fabric8.kubernetes.api.model.{ContainerBuilder, DoneablePod, HasMetadata, Pod, PodBuilder, PodList, Secret, SecretBuilder} +import io.fabric8.kubernetes.client.{KubernetesClient, Watch} +import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, NamespaceVisitFromServerGetWatchDeleteRecreateWaitApplicable, PodResource, Resource} +import org.mockito.{ArgumentCaptor, Mock, MockitoAnnotations} +import org.mockito.Mockito.{doReturn, verify, when} +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import org.scalatest.mock.MockitoSugar._ +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.submitsteps.{DriverConfigurationStep, KubernetesDriverSpec} + +class ClientSuite extends SparkFunSuite with BeforeAndAfter { + + private val DRIVER_POD_UID = "pod-id" + private val DRIVER_POD_API_VERSION = "v1" + private val DRIVER_POD_KIND = "pod" + + private type ResourceList = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ + HasMetadata, Boolean] + private type Pods = MixedOperation[Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] + + @Mock + private var kubernetesClient: KubernetesClient = _ + + @Mock + private var podOperations: Pods = _ + + @Mock + private var namedPods: PodResource[Pod, DoneablePod] = _ + + @Mock + private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _ + + @Mock + private var resourceList: ResourceList = _ + + private val submissionSteps = Seq(FirstTestConfigurationStep, SecondTestConfigurationStep) + private var createdPodArgumentCaptor: ArgumentCaptor[Pod] = _ + private var createdResourcesArgumentCaptor: ArgumentCaptor[HasMetadata] = _ + + before { + MockitoAnnotations.initMocks(this) + when(kubernetesClient.pods()).thenReturn(podOperations) + when(podOperations.withName(FirstTestConfigurationStep.podName)).thenReturn(namedPods) + + createdPodArgumentCaptor = ArgumentCaptor.forClass(classOf[Pod]) + createdResourcesArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) + when(podOperations.create(createdPodArgumentCaptor.capture())).thenAnswer(new Answer[Pod] { + override def answer(invocation: InvocationOnMock): Pod = { + new PodBuilder(invocation.getArgumentAt(0, classOf[Pod])) + .editMetadata() + .withUid(DRIVER_POD_UID) + .endMetadata() + .withApiVersion(DRIVER_POD_API_VERSION) + .withKind(DRIVER_POD_KIND) + .build() + } + }) + when(podOperations.withName(FirstTestConfigurationStep.podName)).thenReturn(namedPods) + when(namedPods.watch(loggingPodStatusWatcher)).thenReturn(mock[Watch]) + doReturn(resourceList) + .when(kubernetesClient) + .resourceList(createdResourcesArgumentCaptor.capture()) + } + + test("The client should configure the pod with the submission steps.") { + val submissionClient = new Client( + submissionSteps, + new SparkConf(false), + kubernetesClient, + false, + "spark", + loggingPodStatusWatcher) + submissionClient.run() + val createdPod = createdPodArgumentCaptor.getValue + assert(createdPod.getMetadata.getName === FirstTestConfigurationStep.podName) + assert(createdPod.getMetadata.getLabels.asScala === + Map(FirstTestConfigurationStep.labelKey -> FirstTestConfigurationStep.labelValue)) + assert(createdPod.getMetadata.getAnnotations.asScala === + Map(SecondTestConfigurationStep.annotationKey -> + SecondTestConfigurationStep.annotationValue)) + assert(createdPod.getSpec.getContainers.size() === 1) + assert(createdPod.getSpec.getContainers.get(0).getName === + SecondTestConfigurationStep.containerName) + } + + test("The client should create the secondary Kubernetes resources.") { + val submissionClient = new Client( + submissionSteps, + new SparkConf(false), + kubernetesClient, + false, + "spark", + loggingPodStatusWatcher) + submissionClient.run() + val createdPod = createdPodArgumentCaptor.getValue + val otherCreatedResources = createdResourcesArgumentCaptor.getAllValues + assert(otherCreatedResources.size === 1) + val createdResource = Iterables.getOnlyElement(otherCreatedResources).asInstanceOf[Secret] + assert(createdResource.getMetadata.getName === FirstTestConfigurationStep.secretName) + assert(createdResource.getData.asScala === + Map(FirstTestConfigurationStep.secretKey -> FirstTestConfigurationStep.secretData)) + val ownerReference = Iterables.getOnlyElement(createdResource.getMetadata.getOwnerReferences) + assert(ownerReference.getName === createdPod.getMetadata.getName) + assert(ownerReference.getKind === DRIVER_POD_KIND) + assert(ownerReference.getUid === DRIVER_POD_UID) + assert(ownerReference.getApiVersion === DRIVER_POD_API_VERSION) + } + + test("The client should attach the driver container with the appropriate JVM options.") { + val sparkConf = new SparkConf(false) + .set("spark.logConf", "true") + .set( + org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, + "-XX:+|-HeapDumpOnOutOfMemoryError") + val submissionClient = new Client( + submissionSteps, + sparkConf, + kubernetesClient, + false, + "spark", + loggingPodStatusWatcher) + submissionClient.run() + val createdPod = createdPodArgumentCaptor.getValue + val driverContainer = Iterables.getOnlyElement(createdPod.getSpec.getContainers) + assert(driverContainer.getName === SecondTestConfigurationStep.containerName) + val driverJvmOptsEnv = Iterables.getOnlyElement(driverContainer.getEnv) + assert(driverJvmOptsEnv.getName === ENV_DRIVER_JAVA_OPTS) + val driverJvmOpts = driverJvmOptsEnv.getValue.split(" ").toSet + assert(driverJvmOpts.contains("-Dspark.logConf=true")) + assert(driverJvmOpts.contains( + s"-D${SecondTestConfigurationStep.sparkConfKey}=" + + SecondTestConfigurationStep.sparkConfValue)) + assert(driverJvmOpts.contains( + "-XX:+|-HeapDumpOnOutOfMemoryError")) + } + + test("Waiting for app completion should stall on the watcher") { + val submissionClient = new Client( + submissionSteps, + new SparkConf(false), + kubernetesClient, + true, + "spark", + loggingPodStatusWatcher) + submissionClient.run() + verify(loggingPodStatusWatcher).awaitCompletion() + } + +} + +private object FirstTestConfigurationStep extends DriverConfigurationStep { + + val podName = "test-pod" + val secretName = "test-secret" + val labelKey = "first-submit" + val labelValue = "true" + val secretKey = "secretKey" + val secretData = "secretData" + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val modifiedPod = new PodBuilder(driverSpec.driverPod) + .editMetadata() + .withName(podName) + .addToLabels(labelKey, labelValue) + .endMetadata() + .build() + val additionalResource = new SecretBuilder() + .withNewMetadata() + .withName(secretName) + .endMetadata() + .addToData(secretKey, secretData) + .build() + driverSpec.copy( + driverPod = modifiedPod, + otherKubernetesResources = driverSpec.otherKubernetesResources ++ Seq(additionalResource)) + } +} + +private object SecondTestConfigurationStep extends DriverConfigurationStep { + + val annotationKey = "second-submit" + val annotationValue = "submitted" + val sparkConfKey = "spark.custom-conf" + val sparkConfValue = "custom-conf-value" + val containerName = "driverContainer" + + override def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec = { + val modifiedPod = new PodBuilder(driverSpec.driverPod) + .editMetadata() + .addToAnnotations(annotationKey, annotationValue) + .endMetadata() + .build() + val resolvedSparkConf = driverSpec.driverSparkConf.clone().set(sparkConfKey, sparkConfValue) + val modifiedContainer = new ContainerBuilder(driverSpec.driverContainer) + .withName(containerName) + .build() + driverSpec.copy( + driverPod = modifiedPod, + driverSparkConf = resolvedSparkConf, + driverContainer = modifiedContainer) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala deleted file mode 100644 index a58a37691f4eb..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ClientV2Suite.scala +++ /dev/null @@ -1,558 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import io.fabric8.kubernetes.api.model._ -import io.fabric8.kubernetes.client.{KubernetesClient, Watch} -import io.fabric8.kubernetes.client.dsl.{MixedOperation, NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable, PodResource} -import org.hamcrest.{BaseMatcher, Description} -import org.mockito.{AdditionalAnswers, ArgumentCaptor, Mock, MockitoAnnotations} -import org.mockito.Matchers.{any, anyVararg, argThat, eq => mockitoEq} -import org.mockito.Mockito.{times, verify, when} -import org.mockito.invocation.InvocationOnMock -import org.mockito.stubbing.Answer -import org.scalatest.{BeforeAndAfter, Matchers} - -import scala.collection.JavaConverters._ -import scala.collection.mutable -import org.apache.spark.{SecurityManager, SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.{KubernetesExternalShuffleService, KubernetesShuffleBlockHandler, SparkPodInitContainerBootstrap} -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ -import org.apache.spark.network.netty.SparkTransportConf -import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient - -class ClientV2Suite extends SparkFunSuite with BeforeAndAfter { - private val JARS_RESOURCE = SubmittedResourceIdAndSecret("jarsId", "jarsSecret") - private val FILES_RESOURCE = SubmittedResourceIdAndSecret("filesId", "filesSecret") - private val SUBMITTED_RESOURCES = SubmittedResources(JARS_RESOURCE, FILES_RESOURCE) - private val BOOTSTRAPPED_POD_ANNOTATION = "bootstrapped" - private val TRUE = "true" - private val APP_NAME = "spark-test" - private val APP_RESOURCE_PREFIX = "spark-prefix" - private val APP_ID = "spark-id" - private val CUSTOM_LABEL_KEY = "customLabel" - private val CUSTOM_LABEL_VALUE = "customLabelValue" - private val DEPRECATED_CUSTOM_LABEL_KEY = "deprecatedCustomLabel" - private val DEPRECATED_CUSTOM_LABEL_VALUE = "deprecatedCustomLabelValue" - private val ALL_EXPECTED_LABELS = Map( - CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, - DEPRECATED_CUSTOM_LABEL_KEY -> DEPRECATED_CUSTOM_LABEL_VALUE, - SPARK_APP_ID_LABEL -> APP_ID, - SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) - private val CUSTOM_ANNOTATION_KEY = "customAnnotation" - private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" - private val DEPRECATED_CUSTOM_ANNOTATION_KEY = "deprecatedCustomAnnotation" - private val DEPRECATED_CUSTOM_ANNOTATION_VALUE = "deprecatedCustomAnnotationValue" - private val INIT_CONTAINER_SECRET_NAME = "init-container-secret" - private val INIT_CONTAINER_SECRET_DATA = Map("secret-key" -> "secret-data") - private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" - private val PYSPARK_APP_ARGS = Array(null, "500") - private val APP_ARGS = Array("3", "20") - private val SPARK_JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") - private val RESOLVED_SPARK_JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", "file:///var/data/spark-jars/jar2.jar") - private val RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS = Seq( - "/var/data/spark-jars/jar1.jar", "/var/data/spark-jars/jar2.jar") - private val SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") - private val PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "file:///app/files/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py", - "file:///app/files/file5.py") - private val RESOLVED_PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "/var/spark-data/spark-files/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py") - private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" - private val RESOLVED_PYSPARK_PRIMARY_FILE = "/var/spark-data/spark-file/file5.py" - - private val RESOLVED_SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", "file:///var/data/spark-files/file2.txt") - private val INIT_CONTAINER_SECRET = new SecretBuilder() - .withNewMetadata() - .withName(INIT_CONTAINER_SECRET_NAME) - .endMetadata() - .withData(INIT_CONTAINER_SECRET_DATA.asJava) - .build() - private val CUSTOM_JAVA_OPTION_KEY = "myappoption" - private val CUSTOM_JAVA_OPTION_VALUE = "myappoptionvalue" - private val DRIVER_JAVA_OPTIONS = s"-D$CUSTOM_JAVA_OPTION_KEY=$CUSTOM_JAVA_OPTION_VALUE" - private val DRIVER_EXTRA_CLASSPATH = "/var/data/spark-app-custom/custom-jar.jar" - private val CONFIG_MAP_NAME = "config-map" - private val CONFIG_MAP_DATA = Map("config-map-key" -> "config-map-data") - private val INIT_CONTAINER_CONFIG_MAP = new ConfigMapBuilder() - .withNewMetadata() - .withName(CONFIG_MAP_NAME) - .endMetadata() - .withData(CONFIG_MAP_DATA.asJava) - .build() - private val CUSTOM_DRIVER_IMAGE = "spark-custom-driver:latest" - private val DRIVER_MEMORY_MB = 512 - private val DRIVER_MEMORY_OVERHEAD_MB = 128 - private val SPARK_CONF = new SparkConf(true) - .set(DRIVER_DOCKER_IMAGE, CUSTOM_DRIVER_IMAGE) - .set(org.apache.spark.internal.config.DRIVER_MEMORY, DRIVER_MEMORY_MB.toLong) - .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, DRIVER_MEMORY_OVERHEAD_MB.toLong) - .set(KUBERNETES_DRIVER_LABELS, s"$DEPRECATED_CUSTOM_LABEL_KEY=$DEPRECATED_CUSTOM_LABEL_VALUE") - .set(KUBERNETES_DRIVER_ANNOTATIONS, - s"$DEPRECATED_CUSTOM_ANNOTATION_KEY=$DEPRECATED_CUSTOM_ANNOTATION_VALUE") - .set(s"$KUBERNETES_DRIVER_LABEL_PREFIX$CUSTOM_LABEL_KEY", CUSTOM_LABEL_VALUE) - .set(s"$KUBERNETES_DRIVER_ANNOTATION_PREFIX$CUSTOM_ANNOTATION_KEY", CUSTOM_ANNOTATION_VALUE) - .set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, DRIVER_EXTRA_CLASSPATH) - .set(org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS, DRIVER_JAVA_OPTIONS) - private val EXECUTOR_INIT_CONF_KEY = "executor-init-conf" - private val SPARK_CONF_WITH_EXECUTOR_INIT_CONF = SPARK_CONF.clone() - .set(EXECUTOR_INIT_CONF_KEY, TRUE) - private val DRIVER_POD_UID = "driver-pod-uid" - private val DRIVER_POD_KIND = "pod" - private val DRIVER_POD_API_VERSION = "v1" - private val CREDENTIALS_SECRET_NAME = "credentials-secret" - private val CREDENTIALS_SECRET_DATA = Map("credentials-secret-key" -> "credentials-secret-value") - private val CREDENTIALS_SECRET = new SecretBuilder() - .withNewMetadata() - .withName(CREDENTIALS_SECRET_NAME) - .endMetadata() - .withData(CREDENTIALS_SECRET_DATA.asJava) - .build() - private val CREDENTIALS_SET_CONF = "spark.kubernetes.driverCredentials.provided" - private val CREDENTIALS_SET_ANNOTATION = "credentials-set" - - @Mock - private var containerLocalizedFilesResolver: ContainerLocalizedFilesResolver = _ - @Mock - private var executorInitContainerConfiguration: ExecutorInitContainerConfiguration = _ - @Mock - private var submittedDependencyUploader: SubmittedDependencyUploader = _ - @Mock - private var submittedDependenciesSecretBuilder: SubmittedDependencySecretBuilder = _ - @Mock - private var initContainerBootstrap: SparkPodInitContainerBootstrap = _ - @Mock - private var initContainerComponentsProvider: DriverInitContainerComponentsProvider = _ - @Mock - private var kubernetesClient: KubernetesClient = _ - @Mock - private var podOps: MixedOperation[ - Pod, PodList, DoneablePod, PodResource[Pod, DoneablePod]] = _ - private type ResourceListOps = NamespaceListVisitFromServerGetDeleteRecreateWaitApplicable[ - HasMetadata, java.lang.Boolean] - @Mock - private var resourceListOps: ResourceListOps = _ - @Mock - private var credentialsMounterProvider: DriverPodKubernetesCredentialsMounterProvider = _ - @Mock - private var fileMounter: DriverPodKubernetesFileMounter = _ - @Mock - private var credentialsMounter: DriverPodKubernetesCredentialsMounter = _ - @Mock - private var loggingPodStatusWatcher: LoggingPodStatusWatcher = _ - @Mock - private var namedPodResource: PodResource[Pod, DoneablePod] = _ - @Mock - private var watch: Watch = _ - - before { - MockitoAnnotations.initMocks(this) - when(initContainerComponentsProvider.provideInitContainerBootstrap()) - .thenReturn(initContainerBootstrap) - when(submittedDependencyUploader.uploadJars()).thenReturn(JARS_RESOURCE) - when(submittedDependencyUploader.uploadFiles()).thenReturn(FILES_RESOURCE) - when(initContainerBootstrap - .bootstrapInitContainerAndVolumes(mockitoEq(DRIVER_CONTAINER_NAME), any())) - .thenAnswer(new Answer[PodBuilder] { - override def answer(invocationOnMock: InvocationOnMock): PodBuilder = { - invocationOnMock.getArgumentAt(1, classOf[PodBuilder]).editMetadata() - .addToAnnotations(BOOTSTRAPPED_POD_ANNOTATION, TRUE) - .endMetadata() - } - }) - when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver( - any[String])).thenReturn(containerLocalizedFilesResolver) - when(initContainerComponentsProvider.provideDriverPodFileMounter()) - .thenReturn(fileMounter) - when(submittedDependenciesSecretBuilder.build()) - .thenReturn(INIT_CONTAINER_SECRET) - when(kubernetesClient.pods()).thenReturn(podOps) - when(podOps.create(any())).thenAnswer(new Answer[Pod] { - override def answer(invocation: InvocationOnMock): Pod = { - new PodBuilder(invocation.getArgumentAt(0, classOf[Pod])) - .editMetadata() - .withUid(DRIVER_POD_UID) - .endMetadata() - .withKind(DRIVER_POD_KIND) - .withApiVersion(DRIVER_POD_API_VERSION) - .build() - } - }) - when(podOps.withName(s"$APP_RESOURCE_PREFIX-driver")).thenReturn(namedPodResource) - when(fileMounter.addPySparkFiles( - mockitoEq(RESOLVED_PYSPARK_PRIMARY_FILE), - mockitoEq(RESOLVED_PYSPARK_FILES.mkString(",")), - any[String], - any())).thenAnswer( new Answer[PodBuilder] { - override def answer(invocation: InvocationOnMock) : PodBuilder = { - invocation.getArgumentAt(3, classOf[PodBuilder]) - .editMetadata() - .withUid(DRIVER_POD_UID) - .withName(s"$APP_RESOURCE_PREFIX-driver") - .addToLabels("pyspark-test", "true") - .endMetadata() - .withKind(DRIVER_POD_KIND) - .withApiVersion(DRIVER_POD_API_VERSION) - } - }) - when(namedPodResource.watch(loggingPodStatusWatcher)).thenReturn(watch) - when(containerLocalizedFilesResolver.resolveSubmittedAndRemoteSparkJars()) - .thenReturn(RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS) - when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) - .thenReturn(RESOLVED_SPARK_JARS) - when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) - .thenReturn(RESOLVED_SPARK_FILES) - when(containerLocalizedFilesResolver.resolvePrimaryResourceFile()) - .thenReturn(RESOLVED_PYSPARK_PRIMARY_FILE) - when(containerLocalizedFilesResolver.resolveSubmittedPySparkFiles()) - .thenReturn(RESOLVED_PYSPARK_FILES) - when(executorInitContainerConfiguration.configureSparkConfForExecutorInitContainer(SPARK_CONF)) - .thenReturn(SPARK_CONF_WITH_EXECUTOR_INIT_CONF) - when(kubernetesClient.resourceList(anyVararg[HasMetadata]())).thenReturn(resourceListOps) - when(credentialsMounterProvider.getDriverPodKubernetesCredentialsMounter()) - .thenReturn(credentialsMounter) - } - - test("Run with dependency uploader") { - expectationsForNoMountedCredentials() - when(initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) - .thenReturn(Some(submittedDependencyUploader)) - when(initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets()))) - .thenReturn(Some(submittedDependenciesSecretBuilder)) - when(initContainerComponentsProvider.provideInitContainerBundle(mockitoEq( - Option(SUBMITTED_RESOURCES.ids())), - mockitoEq(RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES))) - .thenReturn(Option(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, - initContainerBootstrap, executorInitContainerConfiguration))) - runAndVerifyDriverPodHasCorrectProperties() - val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) - verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) - val createdResources = resourceListArgumentCaptor.getAllValues.asScala - assert(createdResources.size === 2) - verifyCreatedResourcesHaveOwnerReferences(createdResources) - assert(createdResources.exists { - case secret: Secret => - secret.getMetadata.getName == INIT_CONTAINER_SECRET_NAME && - secret.getData.asScala == INIT_CONTAINER_SECRET_DATA - case _ => false - }) - verifyConfigMapWasCreated(createdResources) - verify(submittedDependencyUploader).uploadJars() - verify(submittedDependencyUploader).uploadFiles() - verify(initContainerComponentsProvider) - .provideSubmittedDependenciesSecretBuilder(Some(SUBMITTED_RESOURCES.secrets())) - } - - test("Run without dependency uploader") { - expectationsForNoMountedCredentials() - expectationsForNoDependencyUploader() - runAndVerifyDriverPodHasCorrectProperties() - val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) - verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) - val createdResources = resourceListArgumentCaptor.getAllValues.asScala - assert(createdResources.size === 1) - verifyCreatedResourcesHaveOwnerReferences(createdResources) - verifyConfigMapWasCreated(createdResources) - verify(submittedDependencyUploader, times(0)).uploadJars() - verify(submittedDependencyUploader, times(0)).uploadFiles() - verify(initContainerComponentsProvider) - .provideSubmittedDependenciesSecretBuilder(None) - } - - test("Run with mounted credentials") { - expectationsForNoDependencyUploader() - when(credentialsMounter.createCredentialsSecret()).thenReturn(Some(CREDENTIALS_SECRET)) - when(credentialsMounter.mountDriverKubernetesCredentials( - any(), mockitoEq(DRIVER_CONTAINER_NAME), mockitoEq(Some(CREDENTIALS_SECRET)))) - .thenAnswer(new Answer[PodBuilder] { - override def answer(invocation: InvocationOnMock): PodBuilder = { - invocation.getArgumentAt(0, classOf[PodBuilder]).editMetadata() - .addToAnnotations(CREDENTIALS_SET_ANNOTATION, TRUE) - .endMetadata() - } - }) - when(credentialsMounter.setDriverPodKubernetesCredentialLocations(any())) - .thenAnswer(new Answer[SparkConf] { - override def answer(invocation: InvocationOnMock): SparkConf = { - invocation.getArgumentAt(0, classOf[SparkConf]).clone().set(CREDENTIALS_SET_CONF, TRUE) - } - }) - runAndVerifyPodMatchesPredicate { p => - Option(p) - .filter(pod => containerHasCorrectJvmOptions(pod, _(CREDENTIALS_SET_CONF) == TRUE)) - .exists { pod => - pod.getMetadata.getAnnotations.asScala(CREDENTIALS_SET_ANNOTATION) == TRUE - } - } - val resourceListArgumentCaptor = ArgumentCaptor.forClass(classOf[HasMetadata]) - verify(kubernetesClient).resourceList(resourceListArgumentCaptor.capture()) - val createdResources = resourceListArgumentCaptor.getAllValues.asScala - assert(createdResources.size === 2) - verifyCreatedResourcesHaveOwnerReferences(createdResources) - assert(createdResources.exists { - case secret: Secret => - secret.getMetadata.getName == CREDENTIALS_SECRET_NAME && - secret.getData.asScala == CREDENTIALS_SECRET_DATA - case _ => false - }) - } - - test("Waiting for completion should await completion on the status watcher.") { - expectationsForNoMountedCredentials() - expectationsForNoDependencyUploader() - new Client( - APP_NAME, - APP_RESOURCE_PREFIX, - APP_ID, - "", - None, - MAIN_CLASS, - SPARK_CONF, - APP_ARGS, - true, - kubernetesClient, - initContainerComponentsProvider, - credentialsMounterProvider, - loggingPodStatusWatcher).run() - verify(loggingPodStatusWatcher).awaitCompletion() - } - - test("Mounting environmental variables correctly onto Driver Pod for PySpark Jobs") { - expectationsForNoMountedCredentials() - expectationsForNoDependencyUploader() - expectationsForNoSparkJarsOrFiles() - runAndVerifyDriverPodHasCorrectPySparkProperties() - } - - private def expectationsForNoSparkJarsOrFiles(): Unit = { - when(containerLocalizedFilesResolver.resolveSubmittedSparkFiles()) - .thenReturn(Nil) - when(containerLocalizedFilesResolver.resolveSubmittedSparkJars()) - .thenReturn(Nil) - } - - private def expectationsForNoDependencyUploader(): Unit = { - when(initContainerComponentsProvider - .provideInitContainerSubmittedDependencyUploader(ALL_EXPECTED_LABELS)) - .thenReturn(None) - when(initContainerComponentsProvider - .provideSubmittedDependenciesSecretBuilder(None)) - .thenReturn(None) - when(initContainerComponentsProvider.provideInitContainerBundle(mockitoEq(None), - mockitoEq(RESOLVED_SPARK_JARS ++ RESOLVED_SPARK_FILES))) - .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, - initContainerBootstrap, executorInitContainerConfiguration))) - } - - private def expectationsForNoMountedCredentials(): Unit = { - when(credentialsMounter.setDriverPodKubernetesCredentialLocations(any())) - .thenAnswer(AdditionalAnswers.returnsFirstArg()) - when(credentialsMounter.createCredentialsSecret()).thenReturn(None) - when(credentialsMounter.mountDriverKubernetesCredentials( - any(), mockitoEq(DRIVER_CONTAINER_NAME), mockitoEq(None))) - .thenAnswer(AdditionalAnswers.returnsFirstArg()) - } - - private def verifyCreatedResourcesHaveOwnerReferences( - createdResources: mutable.Buffer[HasMetadata]): Unit = { - assert(createdResources.forall { resource => - val owners = resource.getMetadata.getOwnerReferences.asScala - owners.size === 1 && - owners.head.getController && - owners.head.getKind == DRIVER_POD_KIND && - owners.head.getUid == DRIVER_POD_UID && - owners.head.getName == s"$APP_RESOURCE_PREFIX-driver" && - owners.head.getApiVersion == DRIVER_POD_API_VERSION - }) - } - - private def verifyConfigMapWasCreated(createdResources: mutable.Buffer[HasMetadata]): Unit = { - assert(createdResources.exists { - case configMap: ConfigMap => - configMap.getMetadata.getName == CONFIG_MAP_NAME && - configMap.getData.asScala == CONFIG_MAP_DATA - case _ => false - }) - } - - private def runAndVerifyDriverPodHasCorrectProperties(): Unit = { - val expectedOptions = SPARK_CONF.getAll - .filterNot(_._1 == org.apache.spark.internal.config.DRIVER_JAVA_OPTIONS.key) - .toMap ++ - Map( - "spark.app.id" -> APP_ID, - KUBERNETES_DRIVER_POD_NAME.key -> s"$APP_RESOURCE_PREFIX-driver", - KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> APP_RESOURCE_PREFIX, - EXECUTOR_INIT_CONF_KEY -> TRUE, - CUSTOM_JAVA_OPTION_KEY -> CUSTOM_JAVA_OPTION_VALUE, - "spark.jars" -> RESOLVED_SPARK_JARS.mkString(","), - "spark.files" -> RESOLVED_SPARK_FILES.mkString(",")) - runAndVerifyPodMatchesPredicate { p => - Option(p) - .filter(_.getMetadata.getName == s"$APP_RESOURCE_PREFIX-driver") - .filter(podHasCorrectAnnotations) - .filter(_.getMetadata.getLabels.asScala == ALL_EXPECTED_LABELS) - .filter(containerHasCorrectBasicContainerConfiguration) - .filter(containerHasCorrectBasicEnvs) - .filter(containerHasCorrectMountedClasspath) - .exists(pod => containerHasCorrectJvmOptions(pod, _ == expectedOptions)) - } - } - - private def runAndVerifyDriverPodHasCorrectPySparkProperties(): Unit = { - when(initContainerComponentsProvider.provideContainerLocalizedFilesResolver( - mockitoEq(PYSPARK_PRIMARY_FILE))).thenReturn(containerLocalizedFilesResolver) - when(initContainerComponentsProvider.provideInitContainerBundle( - any[Option[SubmittedResourceIds]], any[Iterable[String]])) - .thenReturn(Some(InitContainerBundle(INIT_CONTAINER_CONFIG_MAP, - initContainerBootstrap, executorInitContainerConfiguration))) - runAndVerifyPySparkPodMatchesPredicate { p => - Option(p).exists(pod => containerHasCorrectPySparkEnvs(pod)) - } - } - - private def runAndVerifyPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { - new Client( - APP_NAME, - APP_RESOURCE_PREFIX, - APP_ID, - "", - None, - MAIN_CLASS, - SPARK_CONF, - APP_ARGS, - false, - kubernetesClient, - initContainerComponentsProvider, - credentialsMounterProvider, - loggingPodStatusWatcher).run() - val podMatcher = new BaseMatcher[Pod] { - override def matches(o: scala.Any): Boolean = { - o match { - case p: Pod => pred(p) - case _ => false - } - } - override def describeTo(description: Description): Unit = {} - } - verify(podOps).create(argThat(podMatcher)) - } - - private def containerHasCorrectJvmOptions( - pod: Pod, optionsCorrectnessPredicate: (Map[String, String] => Boolean)): Boolean = { - val driverContainer = pod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) - envs.toMap.get(ENV_DRIVER_JAVA_OPTS).exists { javaOptions => - val splitOptions = javaOptions.split(" ") - splitOptions.forall(_.startsWith("-D")) && - optionsCorrectnessPredicate(splitOptions.map { option => - val withoutPrefix = option.substring(2) - (withoutPrefix.split("=", 2)(0), withoutPrefix.split("=", 2)(1)) - }.toMap) - } - } - - private def containerHasCorrectMountedClasspath(pod: Pod): Boolean = { - val driverContainer = pod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) - envs.toMap.get(ENV_MOUNTED_CLASSPATH).exists { classpath => - val mountedClasspathEntities = classpath.split(File.pathSeparator) - mountedClasspathEntities.toSet == RESOLVED_SPARK_REMOTE_AND_LOCAL_JARS.toSet - } - } - - private def containerHasCorrectBasicEnvs(pod: Pod): Boolean = { - val driverContainer = pod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)) - val expectedBasicEnvs = Map( - ENV_SUBMIT_EXTRA_CLASSPATH -> DRIVER_EXTRA_CLASSPATH, - ENV_DRIVER_MEMORY -> s"${DRIVER_MEMORY_MB + DRIVER_MEMORY_OVERHEAD_MB}m", - ENV_DRIVER_MAIN_CLASS -> MAIN_CLASS, - ENV_DRIVER_ARGS -> APP_ARGS.mkString(" ")) - expectedBasicEnvs.toSet.subsetOf(envs.toSet) - } - - private def containerHasCorrectPySparkEnvs(pod: Pod): Boolean = { - val driverPodLabels = - pod.getMetadata.getLabels.asScala.map(env => (env._1.toString, env._2.toString)) - val expectedBasicLabels = Map( - "pyspark-test" -> "true", - "spark-role" -> "driver") - expectedBasicLabels.toSet.subsetOf(driverPodLabels.toSet) - } - - private def containerHasCorrectBasicContainerConfiguration(pod: Pod): Boolean = { - val containers = pod.getSpec.getContainers.asScala - containers.size == 1 && - containers.head.getName == DRIVER_CONTAINER_NAME && - containers.head.getImage == CUSTOM_DRIVER_IMAGE && - containers.head.getImagePullPolicy == "IfNotPresent" - } - - private def podHasCorrectAnnotations(pod: Pod): Boolean = { - val expectedAnnotations = Map( - DEPRECATED_CUSTOM_ANNOTATION_KEY -> DEPRECATED_CUSTOM_ANNOTATION_VALUE, - CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, - SPARK_APP_NAME_ANNOTATION -> APP_NAME, - BOOTSTRAPPED_POD_ANNOTATION -> TRUE) - pod.getMetadata.getAnnotations.asScala == expectedAnnotations - } - - private def runAndVerifyPySparkPodMatchesPredicate(pred: (Pod => Boolean)): Unit = { - new Client( - APP_NAME, - APP_RESOURCE_PREFIX, - APP_ID, - PYSPARK_PRIMARY_FILE, - Option(new PythonSubmissionResourcesImpl(PYSPARK_PRIMARY_FILE, PYSPARK_APP_ARGS)), - MAIN_CLASS, - SPARK_CONF, - PYSPARK_APP_ARGS, - false, - kubernetesClient, - initContainerComponentsProvider, - credentialsMounterProvider, - loggingPodStatusWatcher).run() - val podMatcher = new BaseMatcher[Pod] { - override def matches(o: scala.Any): Boolean = { - o match { - case p: Pod => pred(p) - case _ => false - } - } - override def describeTo(description: Description): Unit = {} - } - verify(podOps).create(argThat(podMatcher)) - } -} - - diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala deleted file mode 100644 index 7e51abcd7b8e0..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ContainerLocalizedFilesResolverSuite.scala +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkFunSuite - -class ContainerLocalizedFilesResolverSuite extends SparkFunSuite { - private val SPARK_JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", - "file:///app/jars/jar2.jar", - "local:///app/jars/jar3.jar", - "http://app/jars/jar4.jar") - private val SPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", - "file:///app/files/file2.txt", - "local:///app/files/file3.txt", - "http://app/files/file4.txt") - private val PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "file:///app/files/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py", - "file:///app/files/file5.py") - private val JARS_DOWNLOAD_PATH = "/var/data/spark-jars" - private val FILES_DOWNLOAD_PATH = "/var/data/spark-files" - private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" - private val localizedFilesResolver = new ContainerLocalizedFilesResolverImpl( - SPARK_JARS, - SPARK_FILES, - PYSPARK_FILES, - PYSPARK_PRIMARY_FILE, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH) - - test("Submitted and remote Spark jars should resolve non-local uris to download path.") { - val resolvedJars = localizedFilesResolver.resolveSubmittedAndRemoteSparkJars() - val expectedResolvedJars = Seq( - s"$JARS_DOWNLOAD_PATH/jar1.jar", - s"$JARS_DOWNLOAD_PATH/jar2.jar", - "/app/jars/jar3.jar", - s"$JARS_DOWNLOAD_PATH/jar4.jar") - assert(resolvedJars === expectedResolvedJars) - } - - test("Submitted Spark jars should resolve to the download path.") { - val resolvedJars = localizedFilesResolver.resolveSubmittedSparkJars() - val expectedResolvedJars = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", - s"$JARS_DOWNLOAD_PATH/jar2.jar", - "local:///app/jars/jar3.jar", - "http://app/jars/jar4.jar") - assert(resolvedJars === expectedResolvedJars) - } - - test("Submitted Spark files should resolve to the download path.") { - val resolvedFiles = localizedFilesResolver.resolveSubmittedSparkFiles() - val expectedResolvedFiles = Seq( - "hdfs://localhost:9000/app/files/file1.txt", - s"$FILES_DOWNLOAD_PATH/file2.txt", - "local:///app/files/file3.txt", - "http://app/files/file4.txt") - assert(resolvedFiles === expectedResolvedFiles) - } - test("Submitted PySpark files should resolve to the download path.") { - val resolvedPySparkFiles = localizedFilesResolver.resolveSubmittedPySparkFiles() - val expectedPySparkFiles = Seq( - "hdfs://localhost:9000/app/files/file1.py", - s"$FILES_DOWNLOAD_PATH/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py") - assert(resolvedPySparkFiles === expectedPySparkFiles) - } - test("Submitted PySpark Primary resource should resolve to the download path.") { - val resolvedPySparkPrimary = - localizedFilesResolver.resolvePrimaryResourceFile() - val expectedPySparkPrimary = s"$FILES_DOWNLOAD_PATH/file5.py" - assert(resolvedPySparkPrimary === expectedPySparkPrimary) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestratorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestratorSuite.scala new file mode 100644 index 0000000000000..e4f221ad99cc5 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverConfigurationStepsOrchestratorSuite.scala @@ -0,0 +1,95 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.submit.submitsteps.{BaseDriverConfigurationStep, DependencyResolutionStep, DriverKubernetesCredentialsStep, InitContainerBootstrapStep, PythonStep} + +private[spark] class DriverConfigurationStepsOrchestratorSuite extends SparkFunSuite { + + private val NAMESPACE = "default" + private val APP_ID = "spark-app-id" + private val LAUNCH_TIME = 975256L + private val APP_NAME = "spark" + private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" + private val APP_ARGS = Array("arg1", "arg2") + private val ADDITIONAL_PYTHON_FILES = Seq("local:///var/apps/python/py1.py") + + test("Base submission steps without an init-container or python files.") { + val sparkConf = new SparkConf(false) + .set("spark.jars", "local:///var/apps/jars/jar1.jar") + val mainAppResource = JavaMainAppResource("local:///var/apps/jars/main.jar") + val orchestrator = new DriverConfigurationStepsOrchestrator( + NAMESPACE, + APP_ID, + LAUNCH_TIME, + mainAppResource, + APP_NAME, + MAIN_CLASS, + APP_ARGS, + ADDITIONAL_PYTHON_FILES, + sparkConf) + val steps = orchestrator.getAllConfigurationSteps() + assert(steps.size === 3) + assert(steps(0).isInstanceOf[BaseDriverConfigurationStep]) + assert(steps(1).isInstanceOf[DriverKubernetesCredentialsStep]) + assert(steps(2).isInstanceOf[DependencyResolutionStep]) + } + + test("Submission steps with an init-container.") { + val sparkConf = new SparkConf(false) + .set("spark.jars", "hdfs://localhost:9000/var/apps/jars/jar1.jar") + val mainAppResource = JavaMainAppResource("local:///var/apps/jars/main.jar") + val orchestrator = new DriverConfigurationStepsOrchestrator( + NAMESPACE, + APP_ID, + LAUNCH_TIME, + mainAppResource, + APP_NAME, + MAIN_CLASS, + APP_ARGS, + ADDITIONAL_PYTHON_FILES, + sparkConf) + val steps = orchestrator.getAllConfigurationSteps() + assert(steps.size === 4) + assert(steps(0).isInstanceOf[BaseDriverConfigurationStep]) + assert(steps(1).isInstanceOf[DriverKubernetesCredentialsStep]) + assert(steps(2).isInstanceOf[DependencyResolutionStep]) + assert(steps(3).isInstanceOf[InitContainerBootstrapStep]) + } + + test("Submission steps with python files.") { + val sparkConf = new SparkConf(false) + val mainAppResource = PythonMainAppResource("local:///var/apps/python/main.py") + val orchestrator = new DriverConfigurationStepsOrchestrator( + NAMESPACE, + APP_ID, + LAUNCH_TIME, + mainAppResource, + APP_NAME, + MAIN_CLASS, + APP_ARGS, + ADDITIONAL_PYTHON_FILES, + sparkConf) + val steps = orchestrator.getAllConfigurationSteps() + assert(steps.size === 4) + assert(steps(0).isInstanceOf[BaseDriverConfigurationStep]) + assert(steps(1).isInstanceOf[DriverKubernetesCredentialsStep]) + assert(steps(2).isInstanceOf[DependencyResolutionStep]) + assert(steps(3).isInstanceOf[PythonStep]) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala deleted file mode 100644 index 2e0a7ba5098b2..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/DriverPodKubernetesCredentialsMounterSuite.scala +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import io.fabric8.kubernetes.api.model.{PodBuilder, SecretBuilder} -import org.scalatest.prop.TableDrivenPropertyChecks -import scala.collection.JavaConverters._ - -import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.KubernetesCredentials -import org.apache.spark.deploy.kubernetes.config._ -import org.apache.spark.deploy.kubernetes.constants._ - -class DriverPodKubernetesCredentialsMounterSuite - extends SparkFunSuite with TableDrivenPropertyChecks { - - private val CLIENT_KEY_DATA = "client-key-data" - private val CLIENT_CERT_DATA = "client-cert-data" - private val OAUTH_TOKEN_DATA = "oauth-token" - private val CA_CERT_DATA = "ca-cert-data" - private val SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS = KubernetesCredentials( - caCertDataBase64 = Some(CA_CERT_DATA), - clientKeyDataBase64 = Some(CLIENT_KEY_DATA), - clientCertDataBase64 = Some(CLIENT_CERT_DATA), - oauthTokenBase64 = Some(OAUTH_TOKEN_DATA)) - private val APP_ID = "app-id" - private val USER_SPECIFIED_CLIENT_KEY_FILE = Some("/var/data/client-key.pem") - private val USER_SPECIFIED_CLIENT_CERT_FILE = Some("/var/data/client-cert.pem") - private val USER_SPECIFIED_OAUTH_TOKEN_FILE = Some("/var/data/token.txt") - private val USER_SPECIFIED_CA_CERT_FILE = Some("/var/data/ca.pem") - - // Different configurations of credentials mounters - private val credentialsMounterWithPreMountedFiles = - new DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId = APP_ID, - submitterLocalDriverPodKubernetesCredentials = SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS, - maybeUserSpecifiedMountedClientKeyFile = USER_SPECIFIED_CLIENT_KEY_FILE, - maybeUserSpecifiedMountedClientCertFile = USER_SPECIFIED_CLIENT_CERT_FILE, - maybeUserSpecifiedMountedOAuthTokenFile = USER_SPECIFIED_OAUTH_TOKEN_FILE, - maybeUserSpecifiedMountedCaCertFile = USER_SPECIFIED_CA_CERT_FILE) - private val credentialsMounterWithoutPreMountedFiles = - new DriverPodKubernetesCredentialsMounterImpl( - kubernetesAppId = APP_ID, - submitterLocalDriverPodKubernetesCredentials = SUBMITTER_LOCAL_DRIVER_KUBERNETES_CREDENTIALS, - maybeUserSpecifiedMountedClientKeyFile = None, - maybeUserSpecifiedMountedClientCertFile = None, - maybeUserSpecifiedMountedOAuthTokenFile = None, - maybeUserSpecifiedMountedCaCertFile = None) - private val credentialsMounterWithoutAnyDriverCredentials = - new DriverPodKubernetesCredentialsMounterImpl( - APP_ID, KubernetesCredentials(None, None, None, None), None, None, None, None) - - // Test matrices - private val TEST_MATRIX_EXPECTED_SPARK_CONFS = Table( - ("Credentials Mounter Implementation", - "Expected client key file", - "Expected client cert file", - "Expected CA Cert file", - "Expected OAuth Token File"), - (credentialsMounterWithoutAnyDriverCredentials, - None, - None, - None, - None), - (credentialsMounterWithoutPreMountedFiles, - Some(DRIVER_CREDENTIALS_CLIENT_KEY_PATH), - Some(DRIVER_CREDENTIALS_CLIENT_CERT_PATH), - Some(DRIVER_CREDENTIALS_CA_CERT_PATH), - Some(DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH)), - (credentialsMounterWithPreMountedFiles, - USER_SPECIFIED_CLIENT_KEY_FILE, - USER_SPECIFIED_CLIENT_CERT_FILE, - USER_SPECIFIED_CA_CERT_FILE, - USER_SPECIFIED_OAUTH_TOKEN_FILE)) - - private val TEST_MATRIX_EXPECTED_CREDENTIALS_SECRET = Table( - ("Credentials Mounter Implementation", "Expected Credentials Secret Data"), - (credentialsMounterWithoutAnyDriverCredentials, None), - (credentialsMounterWithoutPreMountedFiles, - Some(KubernetesSecretNameAndData( - data = Map[String, String]( - DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME -> CLIENT_KEY_DATA, - DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME -> CLIENT_CERT_DATA, - DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME -> CA_CERT_DATA, - DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME -> OAUTH_TOKEN_DATA - ), - name = s"$APP_ID-kubernetes-credentials"))), - (credentialsMounterWithPreMountedFiles, None)) - - test("Credentials mounter should set the driver's Kubernetes credentials locations") { - forAll(TEST_MATRIX_EXPECTED_SPARK_CONFS) { - case (credentialsMounter, - expectedClientKeyFile, - expectedClientCertFile, - expectedCaCertFile, - expectedOAuthTokenFile) => - val baseSparkConf = new SparkConf() - val resolvedSparkConf = - credentialsMounter.setDriverPodKubernetesCredentialLocations(baseSparkConf) - assert(resolvedSparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX") === - expectedClientKeyFile) - assert(resolvedSparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX") === - expectedClientCertFile) - assert(resolvedSparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX") === - expectedCaCertFile) - assert(resolvedSparkConf.getOption( - s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX") === - expectedOAuthTokenFile) - } - } - - test("Credentials mounter should create the correct credentials secret.") { - forAll(TEST_MATRIX_EXPECTED_CREDENTIALS_SECRET) { - case (credentialsMounter, expectedSecretNameAndData) => - val builtSecret = credentialsMounter.createCredentialsSecret() - val secretNameAndData = builtSecret.map { secret => - KubernetesSecretNameAndData(secret.getMetadata.getName, secret.getData.asScala.toMap) - } - assert(secretNameAndData === expectedSecretNameAndData) - } - } - - test("When credentials secret is provided, driver pod should mount the secret volume.") { - val credentialsSecret = new SecretBuilder() - .withNewMetadata().withName("secret").endMetadata() - .addToData("secretKey", "secretValue") - .build() - val originalPodSpec = new PodBuilder() - .withNewMetadata().withName("pod").endMetadata() - .withNewSpec() - .addNewContainer() - .withName("container") - .endContainer() - .endSpec() - val podSpecWithMountedDriverKubernetesCredentials = - credentialsMounterWithoutPreMountedFiles.mountDriverKubernetesCredentials( - originalPodSpec, "container", Some(credentialsSecret)).build() - val volumes = podSpecWithMountedDriverKubernetesCredentials.getSpec.getVolumes.asScala - assert(volumes.exists(_.getName == DRIVER_CREDENTIALS_SECRET_VOLUME_NAME)) - volumes.find(_.getName == DRIVER_CREDENTIALS_SECRET_VOLUME_NAME).foreach { secretVolume => - assert(secretVolume.getSecret != null && secretVolume.getSecret.getSecretName == "secret") - } - } - - test("When credentials secret is absent, driver pod should not be changed.") { - val originalPodSpec = new PodBuilder() - val nonAdjustedPodSpec = - credentialsMounterWithoutAnyDriverCredentials.mountDriverKubernetesCredentials( - originalPodSpec, "driver", None) - assert(nonAdjustedPodSpec === originalPodSpec) - } -} - -private case class KubernetesSecretNameAndData(name: String, data: Map[String, String]) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala deleted file mode 100644 index ead1d49b8a37c..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/ExecutorInitContainerConfigurationSuite.scala +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.{SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.config._ - -class ExecutorInitContainerConfigurationSuite extends SparkFunSuite { - - private val SECRET_NAME = "init-container-secret" - private val SECRET_MOUNT_DIR = "/mnt/secrets/spark" - private val CONFIG_MAP_NAME = "spark-config-map" - private val CONFIG_MAP_KEY = "spark-config-map-key" - - test("Not passing a secret name should not set the secret value.") { - val baseSparkConf = new SparkConf(false) - val configurationUnderTest = new ExecutorInitContainerConfigurationImpl( - None, - SECRET_MOUNT_DIR, - CONFIG_MAP_NAME, - CONFIG_MAP_KEY) - val resolvedSparkConf = configurationUnderTest - .configureSparkConfForExecutorInitContainer(baseSparkConf) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP).contains(CONFIG_MAP_NAME)) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY).contains(CONFIG_MAP_KEY)) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR) - .contains(SECRET_MOUNT_DIR)) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET).isEmpty) - } - - test("Passing a secret name should set the secret value.") { - val baseSparkConf = new SparkConf(false) - val configurationUnderTest = new ExecutorInitContainerConfigurationImpl( - Some(SECRET_NAME), - SECRET_MOUNT_DIR, - CONFIG_MAP_NAME, - CONFIG_MAP_KEY) - val resolvedSparkConf = configurationUnderTest - .configureSparkConfForExecutorInitContainer(baseSparkConf) - assert(resolvedSparkConf.get(EXECUTOR_INIT_CONTAINER_SECRET).contains(SECRET_NAME)) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala deleted file mode 100644 index 9b60b7ef2b786..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/PythonSubmissionResourcesSuite.scala +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.{SSLOptions, SparkConf, SparkFunSuite} -import org.apache.spark.deploy.kubernetes.config._ - -import scala.collection.JavaConverters._ -import io.fabric8.kubernetes.api.model.{ContainerBuilder, Pod, PodBuilder} -import org.mockito.{Mock, MockitoAnnotations} -import org.mockito.Mockito.when -import org.scalatest.BeforeAndAfter - -private[spark] class PythonSubmissionResourcesSuite extends SparkFunSuite with BeforeAndAfter { - private val PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "file:///app/files/file2.py", - "local:///app/files/file3.py", - "http://app/files/file4.py", - "file:///app/files/file5.py") - private val RESOLVED_PYSPARK_FILES = Seq( - "hdfs://localhost:9000/app/files/file1.py", - "/var/spark-data/spark-files/file2.py", - "local:///app/file`s/file3.py", - "http://app/files/file4.py") - private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" - private val RESOLVED_PYSPARK_PRIMARY_FILE = "/var/data/spark-files/file5.py" - - private val pyFilesResource = new PythonSubmissionResourcesImpl( - PYSPARK_PRIMARY_FILE, Array(PYSPARK_FILES.mkString(","), "500") - ) - private val pyResource = new PythonSubmissionResourcesImpl( - PYSPARK_PRIMARY_FILE, Array(null, "500") - ) - private val DRIVER_CONTAINER_NAME = "pyspark_container" - private val driverContainer = new ContainerBuilder() - .withName(DRIVER_CONTAINER_NAME) - .build() - private val basePodBuilder = new PodBuilder() - .withNewMetadata() - .withName("base_pod") - .endMetadata() - .withNewSpec() - .addToContainers(driverContainer) - .endSpec() - - @Mock - private var driverInitContainer: DriverInitContainerComponentsProviderImpl = _ - @Mock - private var localizedFileResolver: ContainerLocalizedFilesResolverImpl = _ - before { - MockitoAnnotations.initMocks(this) - when(driverInitContainer.provideDriverPodFileMounter()).thenReturn( - new DriverPodKubernetesFileMounterImpl() - ) - when(localizedFileResolver.resolvePrimaryResourceFile()).thenReturn( - RESOLVED_PYSPARK_PRIMARY_FILE) - } - test("Test with --py-files included") { - assert(pyFilesResource.sparkJars === Seq.empty[String]) - assert(pyFilesResource.pySparkFiles === - PYSPARK_PRIMARY_FILE +: PYSPARK_FILES) - assert(pyFilesResource.primaryPySparkResource(localizedFileResolver) === - RESOLVED_PYSPARK_PRIMARY_FILE) - val driverPod: Pod = pyFilesResource.driverPodWithPySparkEnvs( - driverInitContainer.provideDriverPodFileMounter(), - RESOLVED_PYSPARK_PRIMARY_FILE, - RESOLVED_PYSPARK_FILES.mkString(","), - DRIVER_CONTAINER_NAME, - basePodBuilder - ) - val driverContainer = driverPod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)).toMap - envs.get("PYSPARK_PRIMARY") foreach{ a => assert (a === RESOLVED_PYSPARK_PRIMARY_FILE) } - envs.get("PYSPARK_FILES") foreach{ a => assert (a === RESOLVED_PYSPARK_FILES.mkString(",")) } - } - - test("Test without --py-files") { - assert(pyResource.sparkJars === Seq.empty[String]) - assert(pyResource.pySparkFiles === Array(PYSPARK_PRIMARY_FILE)) - assert(pyResource.primaryPySparkResource(localizedFileResolver) === - RESOLVED_PYSPARK_PRIMARY_FILE) - val driverPod: Pod = pyResource.driverPodWithPySparkEnvs( - driverInitContainer.provideDriverPodFileMounter(), - RESOLVED_PYSPARK_PRIMARY_FILE, - "", - DRIVER_CONTAINER_NAME, - basePodBuilder - ) - val driverContainer = driverPod.getSpec.getContainers.asScala.head - val envs = driverContainer.getEnv.asScala.map(env => (env.getName, env.getValue)).toMap - envs.get("PYSPARK_PRIMARY") foreach{ a => assert (a === RESOLVED_PYSPARK_PRIMARY_FILE) } - envs.get("PYSPARK_FILES") foreach{ a => assert (a === "") } - } -} \ No newline at end of file diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala deleted file mode 100644 index f1e1ff7013496..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SparkInitContainerConfigMapBuilderSuite.scala +++ /dev/null @@ -1,101 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.StringReader -import java.util.Properties - -import com.google.common.collect.Maps -import org.mockito.Mockito.{verify, when} -import org.scalatest.BeforeAndAfter -import org.scalatest.mock.MockitoSugar._ -import scala.collection.JavaConverters._ - -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.config._ - -class SparkInitContainerConfigMapBuilderSuite extends SparkFunSuite with BeforeAndAfter { - - private val JARS = Seq( - "hdfs://localhost:9000/app/jars/jar1.jar", - "file:///app/jars/jar2.jar", - "http://localhost:9000/app/jars/jar3.jar", - "local:///app/jars/jar4.jar") - private val FILES = Seq( - "hdfs://localhost:9000/app/files/file1.txt", - "file:///app/files/file2.txt", - "http://localhost:9000/app/files/file3.txt", - "local:///app/files/file4.txt") - private val JARS_DOWNLOAD_PATH = "/var/data/jars" - private val FILES_DOWNLOAD_PATH = "/var/data/files" - private val CONFIG_MAP_NAME = "config-map" - private val CONFIG_MAP_KEY = "config-map-key" - - test("Config map without submitted dependencies sets remote download configurations") { - val configMap = new SparkInitContainerConfigMapBuilderImpl( - JARS, - FILES, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - CONFIG_MAP_NAME, - CONFIG_MAP_KEY, - None).build() - assert(configMap.getMetadata.getName === CONFIG_MAP_NAME) - val maybeConfigValue = configMap.getData.asScala.get(CONFIG_MAP_KEY) - assert(maybeConfigValue.isDefined) - maybeConfigValue.foreach { configValue => - val propertiesStringReader = new StringReader(configValue) - val properties = new Properties() - properties.load(propertiesStringReader) - val propertiesMap = Maps.fromProperties(properties).asScala - val remoteJarsString = propertiesMap.get(INIT_CONTAINER_REMOTE_JARS.key) - assert(remoteJarsString.isDefined) - val remoteJars = remoteJarsString.map(_.split(",")).toSet.flatten - assert(remoteJars === - Set("hdfs://localhost:9000/app/jars/jar1.jar", "http://localhost:9000/app/jars/jar3.jar")) - val remoteFilesString = propertiesMap.get(INIT_CONTAINER_REMOTE_FILES.key) - assert(remoteFilesString.isDefined) - val remoteFiles = remoteFilesString.map(_.split(",")).toSet.flatten - assert(remoteFiles === - Set("hdfs://localhost:9000/app/files/file1.txt", - "http://localhost:9000/app/files/file3.txt")) - assert(propertiesMap(INIT_CONTAINER_JARS_DOWNLOAD_LOCATION.key) === JARS_DOWNLOAD_PATH) - assert(propertiesMap(INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key) === FILES_DOWNLOAD_PATH) - } - } - - test("Config map with submitted dependencies adds configurations from plugin") { - val submittedDependenciesPlugin = mock[SubmittedDependencyInitContainerConfigPlugin] - when(submittedDependenciesPlugin.configurationsToFetchSubmittedDependencies()) - .thenReturn(Map("customConf" -> "customConfValue")) - val configMap = new SparkInitContainerConfigMapBuilderImpl( - JARS, - FILES, - JARS_DOWNLOAD_PATH, - FILES_DOWNLOAD_PATH, - CONFIG_MAP_NAME, - CONFIG_MAP_KEY, - Some(submittedDependenciesPlugin)).build() - val configValue = configMap.getData.asScala(CONFIG_MAP_KEY) - val propertiesStringReader = new StringReader(configValue) - val properties = new Properties() - properties.load(propertiesStringReader) - val propertiesMap = Maps.fromProperties(properties).asScala - assert(propertiesMap("customConf") === "customConfValue") - verify(submittedDependenciesPlugin).configurationsToFetchSubmittedDependencies() - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala deleted file mode 100644 index 8431b77c9e85f..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencyInitContainerConfigPluginSuite.scala +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import org.apache.spark.SparkFunSuite -import org.apache.spark.deploy.kubernetes.config._ - -class SubmittedDependencyInitContainerConfigPluginSuite extends SparkFunSuite { - private val STAGING_SERVER_URI = "http://localhost:9000" - private val STAGING_SERVER_INTERNAL_URI = "http://internalHost:9000" - private val JARS_RESOURCE_ID = "jars-id" - private val FILES_RESOURCE_ID = "files-id" - private val JARS_SECRET_KEY = "jars" - private val FILES_SECRET_KEY = "files" - private val TRUSTSTORE_SECRET_KEY = "trustStore" - private val CLIENT_CERT_SECRET_KEY = "client-cert" - private val SECRETS_VOLUME_MOUNT_PATH = "/var/data" - private val TRUSTSTORE_PASSWORD = "trustStore" - private val TRUSTSTORE_FILE = "/mnt/secrets/trustStore.jks" - private val CLIENT_CERT_URI = "local:///mnt/secrets/client-cert.pem" - private val TRUSTSTORE_TYPE = "jks" - - test("Plugin should provide configuration for fetching uploaded dependencies") { - val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( - STAGING_SERVER_URI, - JARS_RESOURCE_ID, - FILES_RESOURCE_ID, - JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - false, - None, - None, - None, - None, - SECRETS_VOLUME_MOUNT_PATH) - val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() - val expectedConfigurations = Map( - RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, - INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, - INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, - INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> - s"$SECRETS_VOLUME_MOUNT_PATH/$JARS_SECRET_KEY", - INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> - s"$SECRETS_VOLUME_MOUNT_PATH/$FILES_SECRET_KEY", - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "false") - assert(addedConfigurations === expectedConfigurations) - } - - test("Plugin should set up SSL with the appropriate trustStore if it's provided.") { - val configPluginUnderTest = new SubmittedDependencyInitContainerConfigPluginImpl( - STAGING_SERVER_URI, - JARS_RESOURCE_ID, - FILES_RESOURCE_ID, JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - true, - Some(TRUSTSTORE_FILE), - Some(CLIENT_CERT_URI), - Some(TRUSTSTORE_PASSWORD), - Some(TRUSTSTORE_TYPE), - SECRETS_VOLUME_MOUNT_PATH) - val addedConfigurations = configPluginUnderTest.configurationsToFetchSubmittedDependencies() - val expectedSslConfigurations = Map( - RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> "true", - RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> - s"$SECRETS_VOLUME_MOUNT_PATH/$TRUSTSTORE_SECRET_KEY", - RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASSWORD, - RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE, - RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key -> "/mnt/secrets/client-cert.pem") - assert(expectedSslConfigurations.toSet.subsetOf(addedConfigurations.toSet)) - } -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala deleted file mode 100644 index 83fd568e7a3aa..0000000000000 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/SubmittedDependencySecretBuilderSuite.scala +++ /dev/null @@ -1,109 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.spark.deploy.kubernetes.submit - -import java.io.File - -import com.google.common.base.Charsets -import com.google.common.io.{BaseEncoding, Files} -import io.fabric8.kubernetes.api.model.Secret -import scala.collection.JavaConverters._ -import scala.collection.Map - -import org.apache.spark.SparkFunSuite -import org.apache.spark.util.Utils - -class SubmittedDependencySecretBuilderSuite extends SparkFunSuite { - - private val SECRET_NAME = "submitted-dependency-secret" - private val JARS_SECRET = "jars-secret" - private val FILES_SECRET = "files-secret" - private val JARS_SECRET_KEY = "jars-secret-key" - private val FILES_SECRET_KEY = "files-secret-key" - private val TRUSTSTORE_SECRET_KEY = "truststore-secret-key" - private val CLIENT_CERT_SECRET_KEY = "client-cert" - private val TRUSTSTORE_STRING_CONTENTS = "trustStore-contents" - private val CLIENT_CERT_STRING_CONTENTS = "client-certificate-contents" - - test("Building the secret without a trustStore") { - val builder = new SubmittedDependencySecretBuilderImpl( - SECRET_NAME, - JARS_SECRET, - FILES_SECRET, - JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - None, - None) - val secret = builder.build() - assert(secret.getMetadata.getName === SECRET_NAME) - val secretDecodedData = decodeSecretData(secret) - val expectedSecretData = Map(JARS_SECRET_KEY -> JARS_SECRET, FILES_SECRET_KEY -> FILES_SECRET) - assert(secretDecodedData === expectedSecretData) - } - - private def decodeSecretData(secret: Secret): Map[String, String] = { - val secretData = secret.getData.asScala - secretData.mapValues(encoded => - new String(BaseEncoding.base64().decode(encoded), Charsets.UTF_8)) - } - - test("Building the secret with a trustStore") { - val tempSslDir = Utils.createTempDir(namePrefix = "temp-ssl-tests") - try { - val trustStoreFile = new File(tempSslDir, "trustStore.jks") - Files.write(TRUSTSTORE_STRING_CONTENTS, trustStoreFile, Charsets.UTF_8) - val clientCertFile = new File(tempSslDir, "cert.pem") - Files.write(CLIENT_CERT_STRING_CONTENTS, clientCertFile, Charsets.UTF_8) - val builder = new SubmittedDependencySecretBuilderImpl( - SECRET_NAME, - JARS_SECRET, - FILES_SECRET, - JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - Some(trustStoreFile.getAbsolutePath), - Some(clientCertFile.getAbsolutePath)) - val secret = builder.build() - val decodedSecretData = decodeSecretData(secret) - assert(decodedSecretData(TRUSTSTORE_SECRET_KEY) === TRUSTSTORE_STRING_CONTENTS) - assert(decodedSecretData(CLIENT_CERT_SECRET_KEY) === CLIENT_CERT_STRING_CONTENTS) - } finally { - tempSslDir.delete() - } - } - - test("If trustStore and certificate are container-local, don't add secret entries") { - val builder = new SubmittedDependencySecretBuilderImpl( - SECRET_NAME, - JARS_SECRET, - FILES_SECRET, - JARS_SECRET_KEY, - FILES_SECRET_KEY, - TRUSTSTORE_SECRET_KEY, - CLIENT_CERT_SECRET_KEY, - Some("local:///mnt/secrets/trustStore.jks"), - Some("local:///mnt/secrets/cert.pem")) - val secret = builder.build() - val decodedSecretData = decodeSecretData(secret) - assert(!decodedSecretData.contains(TRUSTSTORE_SECRET_KEY)) - assert(!decodedSecretData.contains(CLIENT_CERT_SECRET_KEY)) - } - -} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala new file mode 100644 index 0000000000000..c7d80a16a1532 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala @@ -0,0 +1,106 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] class BaseDriverConfigurationStepSuite extends SparkFunSuite { + + private val APP_ID = "spark-app-id" + private val RESOURCE_NAME_PREFIX = "spark" + private val DRIVER_LABELS = Map("labelkey" -> "labelvalue") + private val DOCKER_IMAGE_PULL_POLICY = "IfNotPresent" + private val APP_NAME = "spark-test" + private val MAIN_CLASS = "org.apache.spark.examples.SparkPi" + private val APP_ARGS = Array("arg1", "arg2") + private val CUSTOM_ANNOTATION_KEY = "customAnnotation" + private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" + private val DEPRECATED_CUSTOM_ANNOTATION_KEY = "customAnnotationDeprecated" + private val DEPRECATED_CUSTOM_ANNOTATION_VALUE = "customAnnotationDeprecatedValue" + + test("Set all possible configurations from the user.") { + val sparkConf = new SparkConf() + .set(KUBERNETES_DRIVER_POD_NAME, "spark-driver-pod") + .set(org.apache.spark.internal.config.DRIVER_CLASS_PATH, "/opt/spark/spark-exmaples.jar") + .set("spark.driver.cores", "2") + .set(KUBERNETES_DRIVER_LIMIT_CORES, "4") + .set(org.apache.spark.internal.config.DRIVER_MEMORY, 256L) + .set(KUBERNETES_DRIVER_MEMORY_OVERHEAD, 200L) + .set(DRIVER_DOCKER_IMAGE, "spark-driver:latest") + .set(s"spark.kubernetes.driver.annotation.$CUSTOM_ANNOTATION_KEY", CUSTOM_ANNOTATION_VALUE) + .set("spark.kubernetes.driver.annotations", + s"$DEPRECATED_CUSTOM_ANNOTATION_KEY=$DEPRECATED_CUSTOM_ANNOTATION_VALUE") + val submissionStep = new BaseDriverConfigurationStep( + APP_ID, + RESOURCE_NAME_PREFIX, + DRIVER_LABELS, + DOCKER_IMAGE_PULL_POLICY, + APP_NAME, + MAIN_CLASS, + APP_ARGS, + sparkConf) + val basePod = new PodBuilder().withNewMetadata().endMetadata().withNewSpec().endSpec().build() + val baseDriverSpec = KubernetesDriverSpec( + driverPod = basePod, + driverContainer = new ContainerBuilder().build(), + driverSparkConf = new SparkConf(false), + otherKubernetesResources = Seq.empty[HasMetadata]) + + val preparedDriverSpec = submissionStep.configureDriver(baseDriverSpec) + assert(preparedDriverSpec.driverContainer.getName === DRIVER_CONTAINER_NAME) + assert(preparedDriverSpec.driverContainer.getImage === "spark-driver:latest") + assert(preparedDriverSpec.driverContainer.getImagePullPolicy === DOCKER_IMAGE_PULL_POLICY) + val envs = preparedDriverSpec.driverContainer + .getEnv + .asScala + .map(env => (env.getName, env.getValue)) + .toMap + assert(envs.size === 4) + assert(envs(ENV_SUBMIT_EXTRA_CLASSPATH) === "/opt/spark/spark-exmaples.jar") + assert(envs(ENV_DRIVER_MEMORY) === "456m") + assert(envs(ENV_DRIVER_MAIN_CLASS) === MAIN_CLASS) + assert(envs(ENV_DRIVER_ARGS) === "arg1 arg2") + val resourceRequirements = preparedDriverSpec.driverContainer.getResources + val requests = resourceRequirements.getRequests.asScala + assert(requests("cpu").getAmount === "2") + assert(requests("memory").getAmount === "256M") + val limits = resourceRequirements.getLimits.asScala + assert(limits("memory").getAmount === "456M") + assert(limits("cpu").getAmount === "4") + val driverPodMetadata = preparedDriverSpec.driverPod.getMetadata + assert(driverPodMetadata.getName === "spark-driver-pod") + assert(driverPodMetadata.getLabels.asScala === DRIVER_LABELS) + val expectedAnnotations = Map( + CUSTOM_ANNOTATION_KEY -> CUSTOM_ANNOTATION_VALUE, + DEPRECATED_CUSTOM_ANNOTATION_KEY -> DEPRECATED_CUSTOM_ANNOTATION_VALUE, + SPARK_APP_NAME_ANNOTATION -> APP_NAME) + assert(driverPodMetadata.getAnnotations.asScala === expectedAnnotations) + assert(preparedDriverSpec.driverPod.getSpec.getRestartPolicy === "Never") + val resolvedSparkConf = preparedDriverSpec.driverSparkConf.getAll.toMap + val expectedSparkConf = Map( + KUBERNETES_DRIVER_POD_NAME.key -> "spark-driver-pod", + "spark.app.id" -> APP_ID, + KUBERNETES_EXECUTOR_POD_NAME_PREFIX.key -> RESOURCE_NAME_PREFIX) + assert(resolvedSparkConf === expectedSparkConf) + + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStepSuite.scala new file mode 100644 index 0000000000000..3f7ec61074b0c --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DependencyResolutionStepSuite.scala @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import java.io.File + +import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.constants._ + +private[spark] class DependencyResolutionStepSuite extends SparkFunSuite { + + private val SPARK_JARS = Seq( + "hdfs://localhost:9000/apps/jars/jar1.jar", + "file:///home/user/apps/jars/jar2.jar", + "local:///var/apps/jars/jar3.jar") + + private val SPARK_FILES = Seq( + "file:///home/user/apps/files/file1.txt", + "hdfs://localhost:9000/apps/files/file2.txt", + "local:///var/apps/files/file3.txt") + + private val JARS_DOWNLOAD_PATH = "/mnt/spark-data/jars" + private val FILES_DOWNLOAD_PATH = "/mnt/spark-data/files" + + test("Added dependencies should be resolved in Spark configuration and environment") { + val dependencyResolutionStep = new DependencyResolutionStep( + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH) + val driverPod = new PodBuilder().build() + val baseDriverSpec = KubernetesDriverSpec( + driverPod = driverPod, + driverContainer = new ContainerBuilder().build(), + driverSparkConf = new SparkConf(false), + otherKubernetesResources = Seq.empty[HasMetadata]) + val preparedDriverSpec = dependencyResolutionStep.configureDriver(baseDriverSpec) + assert(preparedDriverSpec.driverPod === driverPod) + assert(preparedDriverSpec.otherKubernetesResources.isEmpty) + val resolvedSparkJars = preparedDriverSpec.driverSparkConf.get("spark.jars").split(",").toSet + val expectedResolvedSparkJars = Set( + "hdfs://localhost:9000/apps/jars/jar1.jar", + s"$JARS_DOWNLOAD_PATH/jar2.jar", + "/var/apps/jars/jar3.jar") + assert(resolvedSparkJars === expectedResolvedSparkJars) + val resolvedSparkFiles = preparedDriverSpec.driverSparkConf.get("spark.files").split(",").toSet + val expectedResolvedSparkFiles = Set( + s"$FILES_DOWNLOAD_PATH/file1.txt", + s"hdfs://localhost:9000/apps/files/file2.txt", + s"/var/apps/files/file3.txt") + assert(resolvedSparkFiles === expectedResolvedSparkFiles) + val driverEnv = preparedDriverSpec.driverContainer.getEnv.asScala + assert(driverEnv.size === 1) + assert(driverEnv.head.getName === ENV_MOUNTED_CLASSPATH) + val resolvedDriverClasspath = driverEnv.head.getValue.split(File.pathSeparator).toSet + val expectedResolvedDriverClasspath = Set( + s"$JARS_DOWNLOAD_PATH/jar1.jar", + s"$JARS_DOWNLOAD_PATH/jar2.jar", + "/var/apps/jars/jar3.jar") + assert(resolvedDriverClasspath === expectedResolvedDriverClasspath) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStepSuite.scala new file mode 100644 index 0000000000000..3d5664713a2b8 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStepSuite.scala @@ -0,0 +1,152 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import java.io.File + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model.{ContainerBuilder, HasMetadata, PodBuilder, Secret} +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.util.Utils + +private[spark] class DriverKubernetesCredentialsStepSuite + extends SparkFunSuite with BeforeAndAfter { + + private val KUBERNETES_RESOURCE_NAME_PREFIX = "spark" + private var credentialsTempDirectory: File = _ + private val BASE_DRIVER_SPEC = new KubernetesDriverSpec( + driverPod = new PodBuilder().build(), + driverContainer = new ContainerBuilder().build(), + driverSparkConf = new SparkConf(false), + otherKubernetesResources = Seq.empty[HasMetadata]) + + before { + credentialsTempDirectory = Utils.createTempDir() + } + + after { + credentialsTempDirectory.delete() + } + + test("Don't set any credentials") { + val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep( + new SparkConf(false), KUBERNETES_RESOURCE_NAME_PREFIX) + val preparedDriverSpec = kubernetesCredentialsStep.configureDriver(BASE_DRIVER_SPEC) + assert(preparedDriverSpec.driverPod === BASE_DRIVER_SPEC.driverPod) + assert(preparedDriverSpec.driverContainer === BASE_DRIVER_SPEC.driverContainer) + assert(preparedDriverSpec.otherKubernetesResources.isEmpty) + assert(preparedDriverSpec.driverSparkConf.getAll.isEmpty) + } + + test("Only set credentials that are manually mounted.") { + val submissionSparkConf = new SparkConf(false) + .set( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX", + "/mnt/secrets/my-token.txt") + .set( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + "/mnt/secrets/my-key.pem") + .set( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + "/mnt/secrets/my-cert.pem") + .set( + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + "/mnt/secrets/my-ca.pem") + + val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep( + submissionSparkConf, KUBERNETES_RESOURCE_NAME_PREFIX) + val preparedDriverSpec = kubernetesCredentialsStep.configureDriver(BASE_DRIVER_SPEC) + assert(preparedDriverSpec.driverPod === BASE_DRIVER_SPEC.driverPod) + assert(preparedDriverSpec.driverContainer === BASE_DRIVER_SPEC.driverContainer) + assert(preparedDriverSpec.otherKubernetesResources.isEmpty) + assert(preparedDriverSpec.driverSparkConf.getAll.toMap === submissionSparkConf.getAll.toMap) + } + + test("Mount credentials from the submission client as a secret.") { + val caCertFile = writeCredentials("ca.pem", "ca-cert") + val clientKeyFile = writeCredentials("key.pem", "key") + val clientCertFile = writeCredentials("cert.pem", "cert") + val submissionSparkConf = new SparkConf(false) + .set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX", + "token") + .set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX", + clientKeyFile.getAbsolutePath) + .set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX", + clientCertFile.getAbsolutePath) + .set( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX", + caCertFile.getAbsolutePath) + val kubernetesCredentialsStep = new DriverKubernetesCredentialsStep( + submissionSparkConf, KUBERNETES_RESOURCE_NAME_PREFIX) + val preparedDriverSpec = kubernetesCredentialsStep.configureDriver( + BASE_DRIVER_SPEC.copy(driverSparkConf = submissionSparkConf)) + val expectedSparkConf = Map( + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$OAUTH_TOKEN_CONF_SUFFIX" -> "", + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$OAUTH_TOKEN_FILE_CONF_SUFFIX" -> + DRIVER_CREDENTIALS_OAUTH_TOKEN_PATH, + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX" -> + DRIVER_CREDENTIALS_CLIENT_KEY_PATH, + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX" -> + DRIVER_CREDENTIALS_CLIENT_CERT_PATH, + s"$APISERVER_AUTH_DRIVER_MOUNTED_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX" -> + DRIVER_CREDENTIALS_CA_CERT_PATH, + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_KEY_FILE_CONF_SUFFIX" -> + clientKeyFile.getAbsolutePath, + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CLIENT_CERT_FILE_CONF_SUFFIX" -> + clientCertFile.getAbsolutePath, + s"$APISERVER_AUTH_DRIVER_CONF_PREFIX.$CA_CERT_FILE_CONF_SUFFIX" -> + caCertFile.getAbsolutePath) + assert(preparedDriverSpec.driverSparkConf.getAll.toMap === expectedSparkConf) + assert(preparedDriverSpec.otherKubernetesResources.size === 1) + val credentialsSecret = preparedDriverSpec.otherKubernetesResources.head.asInstanceOf[Secret] + assert(credentialsSecret.getMetadata.getName === + s"$KUBERNETES_RESOURCE_NAME_PREFIX-kubernetes-credentials") + val decodedSecretData = credentialsSecret.getData.asScala.map { data => + (data._1, new String(BaseEncoding.base64().decode(data._2), Charsets.UTF_8)) + } + val expectedSecretData = Map( + DRIVER_CREDENTIALS_CA_CERT_SECRET_NAME -> "ca-cert", + DRIVER_CREDENTIALS_OAUTH_TOKEN_SECRET_NAME -> "token", + DRIVER_CREDENTIALS_CLIENT_KEY_SECRET_NAME -> "key", + DRIVER_CREDENTIALS_CLIENT_CERT_SECRET_NAME -> "cert") + assert(decodedSecretData === expectedSecretData) + val driverPodVolumes = preparedDriverSpec.driverPod.getSpec.getVolumes.asScala + assert(driverPodVolumes.size === 1) + assert(driverPodVolumes.head.getName === DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + assert(driverPodVolumes.head.getSecret != null) + assert(driverPodVolumes.head.getSecret.getSecretName === credentialsSecret.getMetadata.getName) + val driverContainerVolumeMount = preparedDriverSpec.driverContainer.getVolumeMounts.asScala + assert(driverContainerVolumeMount.size === 1) + assert(driverContainerVolumeMount.head.getName === DRIVER_CREDENTIALS_SECRET_VOLUME_NAME) + assert(driverContainerVolumeMount.head.getMountPath === DRIVER_CREDENTIALS_SECRETS_BASE_DIR) + } + + private def writeCredentials(credentialsFileName: String, credentialsContents: String): File = { + val credentialsFile = new File(credentialsTempDirectory, credentialsFileName) + Files.write(credentialsContents, credentialsFile, Charsets.UTF_8) + credentialsFile + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStepSuite.scala new file mode 100644 index 0000000000000..ce0dcee6acc46 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/PythonStepSuite.scala @@ -0,0 +1,76 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import io.fabric8.kubernetes.api.model._ +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} + +class PythonStepSuite extends SparkFunSuite with BeforeAndAfter { + private val FILE_DOWNLOAD_PATH = "/var/data/spark-files" + private val PYSPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.py", + "file:///app/files/file2.py", + "local:///app/files/file3.py", + "http://app/files/file4.py") + private val RESOLVED_PYSPARK_FILES = Seq( + FILE_DOWNLOAD_PATH + "/file1.py", + FILE_DOWNLOAD_PATH + "/file2.py", + "/app/files/file3.py", + FILE_DOWNLOAD_PATH + "/file4.py").mkString(",") + private val PYSPARK_PRIMARY_FILE = "file:///app/files/file5.py" + private val RESOLVED_PYSPARK_PRIMARY_FILE = FILE_DOWNLOAD_PATH + "/file5.py" + + test("testing PySpark with --py-files both local and remote files") { + val pyStep = new PythonStep( + PYSPARK_PRIMARY_FILE, + PYSPARK_FILES, + FILE_DOWNLOAD_PATH) + val returnedDriverContainer = pyStep.configureDriver( + KubernetesDriverSpec( + new Pod(), + new Container(), + Seq.empty[HasMetadata], + new SparkConf)) + assert(returnedDriverContainer.driverContainer.getEnv + .asScala.map(env => (env.getName, env.getValue)).toMap === + Map( + "PYSPARK_PRIMARY" -> RESOLVED_PYSPARK_PRIMARY_FILE, + "PYSPARK_FILES" -> RESOLVED_PYSPARK_FILES)) + } + + test("testing PySpark with empty --py-files ") { + val pyStep = new PythonStep( + PYSPARK_PRIMARY_FILE, + Seq.empty[String], + FILE_DOWNLOAD_PATH) + val returnedDriverContainer = pyStep.configureDriver( + KubernetesDriverSpec( + new Pod(), + new Container(), + Seq.empty[HasMetadata], + new SparkConf)) + assert(returnedDriverContainer.driverContainer.getEnv + .asScala.map(env => (env.getName, env.getValue)).toMap === + Map( + "PYSPARK_PRIMARY" -> RESOLVED_PYSPARK_PRIMARY_FILE, + "PYSPARK_FILES" -> "null")) + } + +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initContainerBootstrapStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initContainerBootstrapStepSuite.scala new file mode 100644 index 0000000000000..b11b487111496 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initContainerBootstrapStepSuite.scala @@ -0,0 +1,159 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +import java.io.StringReader +import java.util.Properties + +import com.fasterxml.jackson.databind.ObjectMapper +import com.fasterxml.jackson.module.scala.DefaultScalaModule +import com.google.common.collect.Maps +import io.fabric8.kubernetes.api.model.{ConfigMap, Container, ContainerBuilder, HasMetadata, PodBuilder, SecretBuilder} +import scala.collection.JavaConverters._ + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer.{InitContainerConfigurationStep, InitContainerSpec} +import org.apache.spark.util.Utils + +private[spark] class initContainerBootstrapStepSuite extends SparkFunSuite { + + private val OBJECT_MAPPER = new ObjectMapper().registerModule(DefaultScalaModule) + private val CONFIG_MAP_NAME = "spark-init-config-map" + private val CONFIG_MAP_KEY = "spark-init-config-map-key" + + test("The init container bootstrap step should use all of the init container steps") { + val baseDriverSpec = KubernetesDriverSpec( + driverPod = new PodBuilder().build(), + driverContainer = new ContainerBuilder().build(), + driverSparkConf = new SparkConf(false), + otherKubernetesResources = Seq.empty[HasMetadata]) + val initContainerSteps = Seq( + FirstTestInitContainerConfigurationStep$, + SecondTestInitContainerConfigurationStep$) + val bootstrapStep = new InitContainerBootstrapStep( + initContainerSteps, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY) + val preparedDriverSpec = bootstrapStep.configureDriver(baseDriverSpec) + assert(preparedDriverSpec.driverPod.getMetadata.getLabels.asScala === + FirstTestInitContainerConfigurationStep$.additionalLabels) + val additionalDriverEnv = preparedDriverSpec.driverContainer.getEnv.asScala + assert(additionalDriverEnv.size === 1) + assert(additionalDriverEnv.head.getName === + FirstTestInitContainerConfigurationStep$.additionalMainContainerEnvKey) + assert(additionalDriverEnv.head.getValue === + FirstTestInitContainerConfigurationStep$.additionalMainContainerEnvValue) + val driverAnnotations = preparedDriverSpec.driverPod.getMetadata.getAnnotations.asScala + assert(driverAnnotations.size === 1) + val initContainers = OBJECT_MAPPER.readValue( + driverAnnotations(INIT_CONTAINER_ANNOTATION), classOf[Array[Container]]) + assert(initContainers.length === 1) + val initContainerEnv = initContainers.head.getEnv.asScala + assert(initContainerEnv.size === 1) + assert(initContainerEnv.head.getName === + SecondTestInitContainerConfigurationStep$.additionalInitContainerEnvKey) + assert(initContainerEnv.head.getValue === + SecondTestInitContainerConfigurationStep$.additionalInitContainerEnvValue) + val expectedSparkConf = Map( + EXECUTOR_INIT_CONTAINER_CONFIG_MAP.key -> CONFIG_MAP_NAME, + EXECUTOR_INIT_CONTAINER_CONFIG_MAP_KEY.key -> CONFIG_MAP_KEY, + SecondTestInitContainerConfigurationStep$.additionalDriverSparkConfKey -> + SecondTestInitContainerConfigurationStep$.additionalDriverSparkConfValue) + assert(preparedDriverSpec.driverSparkConf.getAll.toMap === expectedSparkConf) + assert(preparedDriverSpec.otherKubernetesResources.size === 2) + assert(preparedDriverSpec.otherKubernetesResources.contains( + FirstTestInitContainerConfigurationStep$.additionalKubernetesResource)) + assert(preparedDriverSpec.otherKubernetesResources.exists { + case configMap: ConfigMap => + val hasMatchingName = configMap.getMetadata.getName == CONFIG_MAP_NAME + val configMapData = configMap.getData.asScala + val hasCorrectNumberOfEntries = configMapData.size == 1 + val initContainerPropertiesRaw = configMapData(CONFIG_MAP_KEY) + val initContainerProperties = new Properties() + Utils.tryWithResource(new StringReader(initContainerPropertiesRaw)) { + initContainerProperties.load(_) + } + val initContainerPropertiesMap = Maps.fromProperties(initContainerProperties).asScala + val expectedInitContainerProperties = Map( + SecondTestInitContainerConfigurationStep$.additionalInitContainerPropertyKey -> + SecondTestInitContainerConfigurationStep$.additionalInitContainerPropertyValue) + val hasMatchingProperties = initContainerPropertiesMap == expectedInitContainerProperties + hasMatchingName && hasCorrectNumberOfEntries && hasMatchingProperties + case _ => false + }) + } +} + +private object FirstTestInitContainerConfigurationStep$ extends InitContainerConfigurationStep { + + val additionalLabels = Map("additionalLabelkey" -> "additionalLabelValue") + val additionalMainContainerEnvKey = "TEST_ENV_MAIN_KEY" + val additionalMainContainerEnvValue = "TEST_ENV_MAIN_VALUE" + val additionalKubernetesResource = new SecretBuilder() + .withNewMetadata() + .withName("test-secret") + .endMetadata() + .addToData("secret-key", "secret-value") + .build() + + override def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec = { + val driverPod = new PodBuilder(initContainerSpec.podToInitialize) + .editOrNewMetadata() + .addToLabels(additionalLabels.asJava) + .endMetadata() + .build() + val mainContainer = new ContainerBuilder(initContainerSpec.driverContainer) + .addNewEnv() + .withName(additionalMainContainerEnvKey) + .withValue(additionalMainContainerEnvValue) + .endEnv() + .build() + initContainerSpec.copy( + podToInitialize = driverPod, + driverContainer = mainContainer, + initContainerDependentResources = initContainerSpec.initContainerDependentResources ++ + Seq(additionalKubernetesResource)) + } +} + +private object SecondTestInitContainerConfigurationStep$ extends InitContainerConfigurationStep { + val additionalInitContainerEnvKey = "TEST_ENV_INIT_KEY" + val additionalInitContainerEnvValue = "TEST_ENV_INIT_VALUE" + val additionalInitContainerPropertyKey = "spark.initcontainer.testkey" + val additionalInitContainerPropertyValue = "testvalue" + val additionalDriverSparkConfKey = "spark.driver.testkey" + val additionalDriverSparkConfValue = "spark.driver.testvalue" + + override def configureInitContainer(initContainerSpec: InitContainerSpec): InitContainerSpec = { + val initContainer = new ContainerBuilder(initContainerSpec.initContainer) + .addNewEnv() + .withName(additionalInitContainerEnvKey) + .withValue(additionalInitContainerEnvValue) + .endEnv() + .build() + val initContainerProperties = initContainerSpec.initContainerProperties ++ + Map(additionalInitContainerPropertyKey -> additionalInitContainerPropertyValue) + val driverSparkConf = initContainerSpec.additionalDriverSparkConf ++ + Map(additionalDriverSparkConfKey -> additionalDriverSparkConfValue) + initContainerSpec.copy( + initContainer = initContainer, + initContainerProperties = initContainerProperties, + additionalDriverSparkConf = driverSparkConf) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStepSuite.scala new file mode 100644 index 0000000000000..fe1af4bc5be2a --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/BaseInitContainerConfigurationStepSuite.scala @@ -0,0 +1,98 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import io.fabric8.kubernetes.api.model._ +import org.mockito.{Mock, MockitoAnnotations} +import org.mockito.Matchers.any +import org.mockito.Mockito.when +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.{PodWithDetachedInitContainer, SparkPodInitContainerBootstrap} +import org.apache.spark.deploy.kubernetes.config._ + +class BaseInitContainerConfigurationStepSuite extends SparkFunSuite with BeforeAndAfter{ + private val SPARK_JARS = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") + private val SPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") + private val JARS_DOWNLOAD_PATH = "/var/data/jars" + private val FILES_DOWNLOAD_PATH = "/var/data/files" + private val CONFIG_MAP_NAME = "config-map" + private val CONFIG_MAP_KEY = "config-map-key" + private val POD_LABEL = Map("bootstrap" -> "true") + private val INIT_CONTAINER_NAME = "init-container" + private val DRIVER_CONTAINER_NAME = "driver-container" + + @Mock + private var podAndInitContainerBootstrap : SparkPodInitContainerBootstrap = _ + + before { + MockitoAnnotations.initMocks(this) + when(podAndInitContainerBootstrap.bootstrapInitContainerAndVolumes( + any[PodWithDetachedInitContainer])).thenAnswer(new Answer[PodWithDetachedInitContainer] { + override def answer(invocation: InvocationOnMock) : PodWithDetachedInitContainer = { + val pod = invocation.getArgumentAt(0, classOf[PodWithDetachedInitContainer]) + pod.copy( + pod = + new PodBuilder(pod.pod) + .withNewMetadata() + .addToLabels("bootstrap", "true") + .endMetadata() + .withNewSpec().endSpec() + .build(), + initContainer = + new ContainerBuilder() + .withName(INIT_CONTAINER_NAME).build(), + mainContainer = + new ContainerBuilder() + .withName(DRIVER_CONTAINER_NAME).build() + )}}) + } + + test("Test of additionalDriverSparkConf with mix of remote files and jars") { + val baseInitStep = new BaseInitContainerConfigurationStep( + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + CONFIG_MAP_NAME, + CONFIG_MAP_KEY, + podAndInitContainerBootstrap) + val expectedDriverSparkConf = Map( + INIT_CONTAINER_JARS_DOWNLOAD_LOCATION.key -> JARS_DOWNLOAD_PATH, + INIT_CONTAINER_FILES_DOWNLOAD_LOCATION.key -> FILES_DOWNLOAD_PATH, + INIT_CONTAINER_REMOTE_JARS.key -> "hdfs://localhost:9000/app/jars/jar1.jar", + INIT_CONTAINER_REMOTE_FILES.key -> "hdfs://localhost:9000/app/files/file1.txt") + val initContainerSpec = InitContainerSpec( + Map.empty[String, String], + Map.empty[String, String], + new Container(), + new Container(), + new Pod, + Seq.empty[HasMetadata]) + val returnContainerSpec = baseInitStep.configureInitContainer(initContainerSpec) + assert(expectedDriverSparkConf === returnContainerSpec.initContainerProperties) + assert(returnContainerSpec.initContainer.getName == INIT_CONTAINER_NAME) + assert(returnContainerSpec.driverContainer.getName == DRIVER_CONTAINER_NAME) + assert(returnContainerSpec.podToInitialize.getMetadata.getLabels.asScala === POD_LABEL) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestratorSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestratorSuite.scala new file mode 100644 index 0000000000000..1cc8007803457 --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/InitContainerConfigurationStepsOrchestratorSuite.scala @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import org.apache.spark.{SparkConf, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ + +class InitContainerConfigurationStepsOrchestratorSuite extends SparkFunSuite { + private val NAMESPACE = "namespace" + private val APP_RESOURCE_PREFIX = "spark-prefix" + private val SPARK_JARS = Seq( + "hdfs://localhost:9000/app/jars/jar1.jar", "file:///app/jars/jar2.jar") + private val SPARK_FILES = Seq( + "hdfs://localhost:9000/app/files/file1.txt", "file:///app/files/file2.txt") + private val JARS_DOWNLOAD_PATH = "/var/data/jars" + private val FILES_DOWNLOAD_PATH = "/var/data/files" + private val DOCKER_IMAGE_PULL_POLICY: String = "IfNotPresent" + private val APP_ID = "spark-id" + private val CUSTOM_LABEL_KEY = "customLabel" + private val CUSTOM_LABEL_VALUE = "customLabelValue" + private val DEPRECATED_CUSTOM_LABEL_KEY = "deprecatedCustomLabel" + private val DEPRECATED_CUSTOM_LABEL_VALUE = "deprecatedCustomLabelValue" + private val DRIVER_LABELS = Map( + CUSTOM_LABEL_KEY -> CUSTOM_LABEL_VALUE, + DEPRECATED_CUSTOM_LABEL_KEY -> DEPRECATED_CUSTOM_LABEL_VALUE, + SPARK_APP_ID_LABEL -> APP_ID, + SPARK_ROLE_LABEL -> SPARK_POD_DRIVER_ROLE) + private val INIT_CONTAINER_CONFIG_MAP_NAME = "spark-init-config-map" + private val INIT_CONTAINER_CONFIG_MAP_KEY = "spark-init-config-map-key" + private val STAGING_SERVER_URI = "http://localhost:8000" + + test ("including step to contact resource staging server") { + val sparkConf = new SparkConf(true) + .set(KUBERNETES_DRIVER_LABELS, s"$DEPRECATED_CUSTOM_LABEL_KEY=$DEPRECATED_CUSTOM_LABEL_VALUE") + .set(s"$KUBERNETES_DRIVER_LABEL_PREFIX$CUSTOM_LABEL_KEY", CUSTOM_LABEL_VALUE) + .set(RESOURCE_STAGING_SERVER_URI, STAGING_SERVER_URI) + + val orchestrator = new InitContainerConfigurationStepsOrchestrator( + NAMESPACE, + APP_RESOURCE_PREFIX, + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOCKER_IMAGE_PULL_POLICY, + DRIVER_LABELS, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY, + sparkConf) + val initSteps : Seq[InitContainerConfigurationStep] = + orchestrator.getAllConfigurationSteps() + assert(initSteps.length == 2) + assert(initSteps.head.isInstanceOf[BaseInitContainerConfigurationStep]) + assert(initSteps(1).isInstanceOf[SubmittedResourcesInitContainerConfigurationStep]) + } + + test ("not including steps because no contact to resource staging server") { + val sparkConf = new SparkConf(true) + .set(KUBERNETES_DRIVER_LABELS, s"$DEPRECATED_CUSTOM_LABEL_KEY=$DEPRECATED_CUSTOM_LABEL_VALUE") + .set(s"$KUBERNETES_DRIVER_LABEL_PREFIX$CUSTOM_LABEL_KEY", CUSTOM_LABEL_VALUE) + + val orchestrator = new InitContainerConfigurationStepsOrchestrator( + NAMESPACE, + APP_RESOURCE_PREFIX, + SPARK_JARS, + SPARK_FILES, + JARS_DOWNLOAD_PATH, + FILES_DOWNLOAD_PATH, + DOCKER_IMAGE_PULL_POLICY, + DRIVER_LABELS, + INIT_CONTAINER_CONFIG_MAP_NAME, + INIT_CONTAINER_CONFIG_MAP_KEY, + sparkConf) + val initSteps : Seq[InitContainerConfigurationStep] = + orchestrator.getAllConfigurationSteps() + assert(initSteps.length === 1) + assert(initSteps.head.isInstanceOf[BaseInitContainerConfigurationStep]) + } +} diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerStepSuite.scala new file mode 100644 index 0000000000000..2edaba93fe07f --- /dev/null +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/initcontainer/SubmittedResourcesInitContainerStepSuite.scala @@ -0,0 +1,252 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.spark.deploy.kubernetes.submit.submitsteps.initcontainer + +import java.io.File +import java.util.UUID + +import com.google.common.base.Charsets +import com.google.common.io.{BaseEncoding, Files} +import io.fabric8.kubernetes.api.model._ +import org.mockito.{Mock, MockitoAnnotations} +import org.mockito.Matchers.any +import org.mockito.Mockito.when +import org.mockito.invocation.InvocationOnMock +import org.mockito.stubbing.Answer +import org.scalatest.BeforeAndAfter +import scala.collection.JavaConverters._ + +import org.apache.spark.SparkFunSuite +import org.apache.spark.deploy.kubernetes.InitContainerResourceStagingServerSecretPlugin +import org.apache.spark.deploy.kubernetes.config._ +import org.apache.spark.deploy.kubernetes.constants._ +import org.apache.spark.deploy.kubernetes.submit.{SubmittedDependencyUploader, SubmittedResourceIdAndSecret} +import org.apache.spark.util.Utils + +class SubmittedResourcesInitContainerStepSuite extends SparkFunSuite with BeforeAndAfter { + private val RESOURCE_SECRET_NAME = "secret" + private val JARS_RESOURCE_ID = "jarsID" + private val JARS_SECRET = "jarsSecret" + private val FILES_RESOURCE_ID = "filesID" + private val FILES_SECRET = "filesSecret" + private val STAGING_SERVER_URI = "http://localhost:8000" + private val SECRET_MOUNT_PATH = "/tmp" + private val RSS_SECRET = Map( + INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY -> + BaseEncoding.base64().encode(JARS_SECRET.getBytes(Charsets.UTF_8)), + INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY -> + BaseEncoding.base64().encode(FILES_SECRET.getBytes(Charsets.UTF_8)) + ).asJava + private var RSS_WITH_SSL_SECRET: java.util.Map[String, String] = _ + private var TRUSTSTORE_FILENAME: String = "" + private var TRUSTSTORE_FILE: File = _ + private var TRUSTSTORE_URI: Option[String] = None + private val TRUSTSTORE_PASS = "trustStorePassword" + private val TRUSTSTORE_TYPE = "jks" + private var CERT_FILENAME: String = "" + private var CERT_FILE: File = _ + private var CERT_URI: Option[String] = None + + @Mock + private var submittedDependencyUploader: SubmittedDependencyUploader = _ + @Mock + private var submittedResourcesSecretPlugin: InitContainerResourceStagingServerSecretPlugin = _ + + before { + MockitoAnnotations.initMocks(this) + TRUSTSTORE_FILENAME = createTempFile(".jks") + TRUSTSTORE_FILE = new File(TRUSTSTORE_FILENAME) + TRUSTSTORE_URI = Some(TRUSTSTORE_FILENAME) + CERT_FILENAME = createTempFile("pem") + CERT_FILE = new File(CERT_FILENAME) + CERT_URI = Some(CERT_FILENAME) + RSS_WITH_SSL_SECRET = + (RSS_SECRET.asScala ++ Map( + INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY -> + BaseEncoding.base64().encode(Files.toByteArray(TRUSTSTORE_FILE)), + INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY -> + BaseEncoding.base64().encode(Files.toByteArray(CERT_FILE)) + )).asJava + when(submittedDependencyUploader.uploadJars()).thenReturn( + SubmittedResourceIdAndSecret(JARS_RESOURCE_ID, JARS_SECRET) + ) + when(submittedDependencyUploader.uploadFiles()).thenReturn( + SubmittedResourceIdAndSecret(FILES_RESOURCE_ID, FILES_SECRET) + ) + when(submittedResourcesSecretPlugin.addResourceStagingServerSecretVolumeToPod( + any[Pod])).thenAnswer(new Answer[Pod] { + override def answer(invocation: InvocationOnMock) : Pod = { + val pod = invocation.getArgumentAt(0, classOf[Pod]) + new PodBuilder(pod) + .withNewMetadata() + .addToLabels("mountedSecret", "true") + .endMetadata() + .withNewSpec().endSpec() + .build() + }}) + when(submittedResourcesSecretPlugin.mountResourceStagingServerSecretIntoInitContainer( + any[Container])).thenAnswer(new Answer[Container] { + override def answer(invocation: InvocationOnMock) : Container = { + val con = invocation.getArgumentAt(0, classOf[Container]) + new ContainerBuilder(con).withName("mountedSecret").build() + }}) + } + after { + TRUSTSTORE_FILE.delete() + CERT_FILE.delete() + } + test ("testing vanilla prepareInitContainer on resources and properties") { + val submittedResourceStep = new SubmittedResourcesInitContainerConfigurationStep( + RESOURCE_SECRET_NAME, + STAGING_SERVER_URI, + SECRET_MOUNT_PATH, + false, + None, + None, + None, + None, + submittedDependencyUploader, + submittedResourcesSecretPlugin + ) + val returnedInitContainer = + submittedResourceStep.configureInitContainer(InitContainerSpec( + Map.empty[String, String], + Map.empty[String, String], + new Container(), + new Container(), + new Pod(), + Seq.empty[HasMetadata])) + assert(returnedInitContainer.initContainer.getName === "mountedSecret") + assert(returnedInitContainer.podToInitialize.getMetadata.getLabels.asScala + === Map("mountedSecret" -> "true")) + assert(returnedInitContainer.initContainerDependentResources.length == 1) + val secret = returnedInitContainer.initContainerDependentResources.head.asInstanceOf[Secret] + assert(secret.getData === RSS_SECRET) + assert(secret.getMetadata.getName == RESOURCE_SECRET_NAME) + val expectedinitContainerProperties = Map( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> false.toString) + assert(returnedInitContainer.initContainerProperties === expectedinitContainerProperties) + assert(returnedInitContainer.additionalDriverSparkConf === + Map( + EXECUTOR_INIT_CONTAINER_SECRET.key -> RESOURCE_SECRET_NAME, + EXECUTOR_INIT_CONTAINER_SECRET_MOUNT_DIR.key -> SECRET_MOUNT_PATH)) + } + + test ("testing prepareInitContainer w/ CERT and TrustStore Files w/o SSL") { + val submittedResourceStep = new SubmittedResourcesInitContainerConfigurationStep( + RESOURCE_SECRET_NAME, + STAGING_SERVER_URI, + SECRET_MOUNT_PATH, + false, + TRUSTSTORE_URI, + CERT_URI, + Some(TRUSTSTORE_PASS), + Some(TRUSTSTORE_TYPE), + submittedDependencyUploader, + submittedResourcesSecretPlugin + ) + val returnedInitContainer = + submittedResourceStep.configureInitContainer(InitContainerSpec( + Map.empty[String, String], + Map.empty[String, String], + new Container(), + new Container(), + new Pod(), + Seq.empty[HasMetadata])) + val expectedinitContainerProperties = Map( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> false.toString, + RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASS, + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE, + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_STAGING_SERVER_TRUSTSTORE_SECRET_KEY", + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_STAGING_SERVER_CLIENT_CERT_SECRET_KEY" + ) + assert(returnedInitContainer.initContainerProperties === expectedinitContainerProperties) + assert(returnedInitContainer.initContainerDependentResources.length == 1) + val secret = returnedInitContainer.initContainerDependentResources.head.asInstanceOf[Secret] + assert(secret.getData === RSS_WITH_SSL_SECRET) + assert(secret.getMetadata.getName == RESOURCE_SECRET_NAME) + + } + + test ("testing prepareInitContainer w/ local CERT and TrustStore Files w/o SSL") { + val LOCAL_TRUST_FILE = "local:///tmp/trust.jsk" + val LOCAL_CERT_FILE = "local:///tmp/cert.pem" + val submittedResourceStep = new SubmittedResourcesInitContainerConfigurationStep( + RESOURCE_SECRET_NAME, + STAGING_SERVER_URI, + SECRET_MOUNT_PATH, + false, + Some(LOCAL_TRUST_FILE), + Some(LOCAL_CERT_FILE), + Some(TRUSTSTORE_PASS), + Some(TRUSTSTORE_TYPE), + submittedDependencyUploader, + submittedResourcesSecretPlugin + ) + val returnedInitContainer = + submittedResourceStep.configureInitContainer(InitContainerSpec( + Map.empty[String, String], + Map.empty[String, String], + new Container(), + new Container(), + new Pod(), + Seq.empty[HasMetadata])) + val expectedinitContainerProperties = Map( + RESOURCE_STAGING_SERVER_URI.key -> STAGING_SERVER_URI, + INIT_CONTAINER_DOWNLOAD_JARS_RESOURCE_IDENTIFIER.key -> JARS_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_JARS_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_JARS_SECRET_KEY", + INIT_CONTAINER_DOWNLOAD_FILES_RESOURCE_IDENTIFIER.key -> FILES_RESOURCE_ID, + INIT_CONTAINER_DOWNLOAD_FILES_SECRET_LOCATION.key -> + s"$SECRET_MOUNT_PATH/$INIT_CONTAINER_SUBMITTED_FILES_SECRET_KEY", + RESOURCE_STAGING_SERVER_SSL_ENABLED.key -> false.toString, + RESOURCE_STAGING_SERVER_TRUSTSTORE_PASSWORD.key -> TRUSTSTORE_PASS, + RESOURCE_STAGING_SERVER_TRUSTSTORE_TYPE.key -> TRUSTSTORE_TYPE, + RESOURCE_STAGING_SERVER_TRUSTSTORE_FILE.key -> + "/tmp/trust.jsk", + RESOURCE_STAGING_SERVER_CLIENT_CERT_PEM.key -> + "/tmp/cert.pem" + ) + assert(returnedInitContainer.initContainerProperties === expectedinitContainerProperties) + assert(returnedInitContainer.initContainerDependentResources.length == 1) + val secret = returnedInitContainer.initContainerDependentResources.head.asInstanceOf[Secret] + assert(secret.getData === RSS_SECRET) + assert(secret.getMetadata.getName == RESOURCE_SECRET_NAME) + } + private def createTempFile(extension: String): String = { + val dir = Utils.createTempDir() + val file = new File(dir, s"${UUID.randomUUID().toString}.$extension") + Files.write(UUID.randomUUID().toString, file, Charsets.UTF_8) + file.getAbsolutePath + } +} diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala index d2082291eba22..c6cd6a74c88d1 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/KubernetesSuite.scala @@ -34,7 +34,7 @@ import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.deploy.kubernetes.integrationtest.backend.IntegrationTestBackendFactory import org.apache.spark.deploy.kubernetes.integrationtest.backend.minikube.Minikube import org.apache.spark.deploy.kubernetes.integrationtest.constants.MINIKUBE_TEST_BACKEND -import org.apache.spark.deploy.kubernetes.submit.{Client, KeyAndCertPem} +import org.apache.spark.deploy.kubernetes.submit.{Client, ClientArguments, JavaMainAppResource, KeyAndCertPem, MainAppResource, PythonMainAppResource} import org.apache.spark.launcher.SparkLauncher import org.apache.spark.util.Utils @@ -72,7 +72,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { kubernetesTestComponents.deleteNamespace() } - test("Run PySpark Job on file from SUBMITTER") { + test("Run PySpark Job on file from SUBMITTER with --py-files") { assume(testBackend.name == MINIKUBE_TEST_BACKEND) launchStagingServer(SSLOptions(), None) @@ -83,7 +83,9 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { System.getProperty("spark.docker.test.executorImage", "spark-executor-py:latest")) runPySparkPiAndVerifyCompletion( - PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION) + PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION, + Seq(PYSPARK_SORT_CONTAINER_LOCAL_FILE_LOCATION) + ) } test("Run PySpark Job on file from CONTAINER with spark.jar defined") { @@ -96,8 +98,7 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .set(EXECUTOR_DOCKER_IMAGE, System.getProperty("spark.docker.test.executorImage", "spark-executor-py:latest")) - runPySparkPiAndVerifyCompletion( - PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION) + runPySparkPiAndVerifyCompletion(PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION, Seq.empty[String]) } test("Simple submission test with the resource staging server.") { @@ -154,10 +155,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { sparkConf.set("spark.kubernetes.shuffle.namespace", kubernetesTestComponents.namespace) sparkConf.set("spark.app.name", "group-by-test") runSparkApplicationAndVerifyCompletion( - SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + JavaMainAppResource(SUBMITTER_LOCAL_MAIN_APP_RESOURCE), GROUP_BY_MAIN_CLASS, - "The Result is", - Array.empty[String]) + Seq("The Result is"), + Array.empty[String], + Seq.empty[String]) } test("Use remote resources without the resource staging server.") { @@ -217,10 +219,11 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { launchStagingServer(SSLOptions(), None) sparkConf.set("spark.files", testExistenceFile.getAbsolutePath) runSparkApplicationAndVerifyCompletion( - SUBMITTER_LOCAL_MAIN_APP_RESOURCE, + JavaMainAppResource(SUBMITTER_LOCAL_MAIN_APP_RESOURCE), FILE_EXISTENCE_MAIN_CLASS, - s"File found at /opt/spark/${testExistenceFile.getName} with correct contents.", - Array(testExistenceFile.getName, TEST_EXISTENCE_FILE_CONTENTS)) + Seq(s"File found at /opt/spark/${testExistenceFile.getName} with correct contents."), + Array(testExistenceFile.getName, TEST_EXISTENCE_FILE_CONTENTS), + Seq.empty[String]) } test("Use a very long application name.") { @@ -248,26 +251,35 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { private def runSparkPiAndVerifyCompletion(appResource: String): Unit = { runSparkApplicationAndVerifyCompletion( - appResource, SPARK_PI_MAIN_CLASS, "Pi is roughly 3", Array.empty[String]) + JavaMainAppResource(appResource), + SPARK_PI_MAIN_CLASS, + Seq("Pi is roughly 3"), + Array.empty[String], + Seq.empty[String]) } private def runPySparkPiAndVerifyCompletion( - appResource: String): Unit = { + appResource: String, otherPyFiles: Seq[String]): Unit = { runSparkApplicationAndVerifyCompletion( - appResource, PYSPARK_PI_MAIN_CLASS, "Pi is roughly 3", - Array(null, "5")) + PythonMainAppResource(appResource), + PYSPARK_PI_MAIN_CLASS, + Seq("Submitting 5 missing tasks from ResultStage", "Pi is roughly 3"), + Array("5"), + otherPyFiles) } private def runSparkApplicationAndVerifyCompletion( - appResource: String, + appResource: MainAppResource, mainClass: String, - expectedLogOnCompletion: String, - appArgs: Array[String]): Unit = { - Client.run( - sparkConf = sparkConf, - appArgs = appArgs, + expectedLogOnCompletion: Seq[String], + appArgs: Array[String], + otherPyFiles: Seq[String]): Unit = { + val clientArguments = ClientArguments( + mainAppResource = appResource, mainClass = mainClass, - mainAppResource = appResource) + driverArgs = appArgs, + otherPyFiles = otherPyFiles) + Client.run(sparkConf, clientArguments) val driverPod = kubernetesTestComponents.kubernetesClient .pods() .withLabel("spark-app-locator", APP_LOCATOR_LABEL) @@ -275,11 +287,13 @@ private[spark] class KubernetesSuite extends SparkFunSuite with BeforeAndAfter { .getItems .get(0) Eventually.eventually(TIMEOUT, INTERVAL) { - assert(kubernetesTestComponents.kubernetesClient - .pods() - .withName(driverPod.getMetadata.getName) - .getLog - .contains(expectedLogOnCompletion), "The application did not complete.") + expectedLogOnCompletion.foreach { e => + assert(kubernetesTestComponents.kubernetesClient + .pods() + .withName(driverPod.getMetadata.getName) + .getLog + .contains(e), "The application did not complete.") + } } } @@ -347,6 +361,8 @@ private[spark] object KubernetesSuite { val PYSPARK_PI_MAIN_CLASS = "org.apache.spark.deploy.PythonRunner" val PYSPARK_PI_CONTAINER_LOCAL_FILE_LOCATION = "local:///opt/spark/examples/src/main/python/pi.py" + val PYSPARK_SORT_CONTAINER_LOCAL_FILE_LOCATION = + "local:///opt/spark/examples/src/main/python/sort.py" val PYSPARK_PI_SUBMITTER_LOCAL_FILE_LOCATION = "src/test/python/pi.py" val FILE_EXISTENCE_MAIN_CLASS = "org.apache.spark.deploy.kubernetes" + ".integrationtest.jobs.FileExistenceTest" From f46443e13d201c97f7b7b9991dc7bba454bbf294 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 17 Jul 2017 13:09:41 -0700 Subject: [PATCH 144/225] Add implicit conversions to imports. (#374) Otherwise we can get a Scalastyle error when building from SBT. --- .../submit/submitsteps/DriverKubernetesCredentialsStep.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala index 0c58006130659..70a108edc8678 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/DriverKubernetesCredentialsStep.scala @@ -22,6 +22,7 @@ import java.nio.charset.StandardCharsets import com.google.common.io.{BaseEncoding, Files} import io.fabric8.kubernetes.api.model.{ContainerBuilder, PodBuilder, Secret, SecretBuilder} import scala.collection.JavaConverters._ +import scala.language.implicitConversions import org.apache.spark.SparkConf import org.apache.spark.deploy.kubernetes.config._ From 42f578ff211401332818d10d641def26128560af Mon Sep 17 00:00:00 2001 From: Andrew Ash Date: Mon, 17 Jul 2017 16:56:13 -0700 Subject: [PATCH 145/225] Fix import order and scalastyle (#375) Test with ./dev/scalastyle --- ...nitContainerResourceStagingServerSecretPluginSuite.scala | 6 +++--- .../kubernetes/SparkPodInitContainerBootstrapSuite.scala | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala index f5b2db36aff8f..597bcdb416fc0 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/InitContainerResourceStagingServerSecretPluginSuite.scala @@ -16,11 +16,11 @@ */ package org.apache.spark.deploy.kubernetes -import org.scalatest.BeforeAndAfter import io.fabric8.kubernetes.api.model._ -import org.apache.spark.deploy.kubernetes.constants._ - +import org.scalatest.BeforeAndAfter import scala.collection.JavaConverters._ + +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.SparkFunSuite class InitContainerResourceStagingServerSecretPluginSuite extends SparkFunSuite with BeforeAndAfter{ diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala index 0557b5677b919..d5f25983f5080 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/SparkPodInitContainerBootstrapSuite.scala @@ -16,11 +16,11 @@ */ package org.apache.spark.deploy.kubernetes -import org.scalatest.BeforeAndAfter import io.fabric8.kubernetes.api.model._ -import org.apache.spark.deploy.kubernetes.constants._ - +import org.scalatest.BeforeAndAfter import scala.collection.JavaConverters._ + +import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.SparkFunSuite class SparkPodInitContainerBootstrapSuite extends SparkFunSuite with BeforeAndAfter { From 2c001037e8b8303f9e0639bc462825f1ba929228 Mon Sep 17 00:00:00 2001 From: sandflee Date: Wed, 19 Jul 2017 03:00:36 +0800 Subject: [PATCH 146/225] fix submit job errors (#376) --- .../org/apache/spark/deploy/kubernetes/submit/Client.scala | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala index 98cd7afcd204d..2fa9b416330e5 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/Client.scala @@ -42,7 +42,7 @@ private[spark] object ClientArguments { var otherPyFiles = Seq.empty[String] var mainClass: Option[String] = None val driverArgs = mutable.Buffer.empty[String] - args.sliding(2).toList.collect { + args.sliding(2, 2).toList.collect { case Array("--primary-py-file", mainPyFile: String) => mainAppResource = Some(PythonMainAppResource(mainPyFile)) case Array("--primary-java-resource", primaryJavaResource: String) => @@ -54,7 +54,8 @@ private[spark] object ClientArguments { case Array("--arg", arg: String) => driverArgs += arg case other => - throw new RuntimeException(s"Unknown arguments: $other") + val invalid = other.mkString(" ") + throw new RuntimeException(s"Unknown arguments: $invalid") } require(mainAppResource.isDefined, "Main app resource must be defined by either --primary-py-file or --primary-java-resource.") From e086f4d9d861cfa46a5eda47412d53adc45c8fb9 Mon Sep 17 00:00:00 2001 From: sandflee Date: Wed, 19 Jul 2017 06:43:10 +0800 Subject: [PATCH 147/225] Add node selectors for driver and executor pods (#355) --- docs/running-on-kubernetes.md | 10 ++++++++++ .../deploy/kubernetes/ConfigurationUtils.scala | 14 ++++++++++++++ .../apache/spark/deploy/kubernetes/config.scala | 2 ++ .../submitsteps/BaseDriverConfigurationStep.scala | 3 +++ .../KubernetesClusterSchedulerBackend.scala | 6 ++++++ 5 files changed, 35 insertions(+) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 2b4e9a6f96af1..5e23801e15b10 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -758,6 +758,16 @@ from the other deployment modes. See the [configuration page](configuration.html Specify the hard cpu limit for a single executor pod + + spark.kubernetes.node.selector.[labelKey] + (none) + + Adds to the node selector of the driver pod and executor pods, with key labelKey and the value as the + configuration's value. For example, setting spark.kubernetes.node.selector.identifier to myIdentifier + will result in the driver pod and executors having a node selector with key identifier and value + myIdentifier. Multiple node selector keys can be added by setting multiple configurations with this prefix. + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala index f461da4809b4d..1a008c236d00f 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/ConfigurationUtils.scala @@ -65,4 +65,18 @@ object ConfigurationUtils extends Logging { } combined.toMap } + + def parsePrefixedKeyValuePairs( + sparkConf: SparkConf, + prefix: String, + configType: String): Map[String, String] = { + val fromPrefix = sparkConf.getAllWithPrefix(prefix) + fromPrefix.groupBy(_._1).foreach { + case (key, values) => + require(values.size == 1, + s"Cannot have multiple values for a given $configType key, got key $key with" + + s" values $values") + } + fromPrefix.toMap + } } diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index e1c1ab9d459fc..c6772c1cb5ae4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -497,6 +497,8 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_NODE_SELECTOR_PREFIX = "spark.kubernetes.node.selector." + private[spark] def resolveK8sMaster(rawMasterString: String): String = { if (!rawMasterString.startsWith("k8s://")) { throw new IllegalArgumentException("Master URL should start with k8s:// in Kubernetes mode.") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala index 022b5fccdc5e1..b3f509b44054e 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala @@ -73,6 +73,8 @@ private[spark] class BaseDriverConfigurationStep( s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" + s" Spark bookkeeping operations.") val allDriverAnnotations = driverCustomAnnotations ++ Map(SPARK_APP_NAME_ANNOTATION -> appName) + val nodeSelector = ConfigurationUtils.parsePrefixedKeyValuePairs( + submissionSparkConf, KUBERNETES_NODE_SELECTOR_PREFIX, "node selector") val driverCpuQuantity = new QuantityBuilder(false) .withAmount(driverCpuCores) .build() @@ -117,6 +119,7 @@ private[spark] class BaseDriverConfigurationStep( .endMetadata() .withNewSpec() .withRestartPolicy("Never") + .withNodeSelector(nodeSelector.asJava) .endSpec() .build() val resolvedSparkConf = driverSpec.driverSparkConf.clone() diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index e5f980ad1f366..6dbe918f966e4 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -84,6 +84,11 @@ private[spark] class KubernetesClusterSchedulerBackend( KUBERNETES_EXECUTOR_ANNOTATION_PREFIX, KUBERNETES_EXECUTOR_ANNOTATIONS, "executor annotation") + private val nodeSelector = + ConfigurationUtils.parsePrefixedKeyValuePairs( + conf, + KUBERNETES_NODE_SELECTOR_PREFIX, + "node-selector") private var shufflePodCache: Option[ShufflePodCache] = None private val executorDockerImage = conf.get(EXECUTOR_DOCKER_IMAGE) private val dockerImagePullPolicy = conf.get(DOCKER_IMAGE_PULL_POLICY) @@ -449,6 +454,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .endMetadata() .withNewSpec() .withHostname(hostname) + .withNodeSelector(nodeSelector.asJava) .endSpec() .build() From 7d0fa562d45417bb064bc403a5f0f307613715c3 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 18 Jul 2017 23:16:36 -0700 Subject: [PATCH 148/225] Retry binding server to random port in the resource staging server test. (#378) * Retry binding server to random port in the resource staging server test. * Break if successful start * Start server in try block. * FIx scalastyle * More rigorous cleanup logic. Increment port numbers. * Move around more exception logic. * More exception refactoring. * Remove whitespace * Fix test * Rename variable --- .../ResourceStagingServerSuite.scala | 69 ++++++++++++++++--- 1 file changed, 58 insertions(+), 11 deletions(-) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala index 0c0908da20d89..1bcd85a611e00 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/rest/kubernetes/ResourceStagingServerSuite.scala @@ -23,12 +23,14 @@ import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule import com.google.common.io.ByteStreams import okhttp3.{RequestBody, ResponseBody} +import org.eclipse.jetty.server.Server import org.scalatest.BeforeAndAfter import org.scalatest.mock.MockitoSugar.mock import retrofit2.Call import org.apache.spark.{SparkFunSuite, SSLOptions} import org.apache.spark.deploy.kubernetes.SSLUtils +import org.apache.spark.internal.Logging import org.apache.spark.util.Utils /** @@ -40,30 +42,37 @@ import org.apache.spark.util.Utils * we've configured the Jetty server correctly and that the endpoints reached over HTTP can * receive streamed uploads and can stream downloads. */ -class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { +class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter with Logging { + + private val MAX_SERVER_START_ATTEMPTS = 5 private var serviceImpl: ResourceStagingService = _ private var stagedResourcesCleaner: StagedResourcesCleaner = _ - private var server: ResourceStagingServer = _ + private var server: Option[ResourceStagingServer] = None private val OBJECT_MAPPER = new ObjectMapper().registerModule(new DefaultScalaModule) - private val serverPort = new ServerSocket(0).getLocalPort - private val sslOptionsProvider = new SettableReferenceSslOptionsProvider() before { stagedResourcesCleaner = mock[StagedResourcesCleaner] serviceImpl = new ResourceStagingServiceImpl( new StagedResourcesStoreImpl(Utils.createTempDir()), stagedResourcesCleaner) - server = new ResourceStagingServer(serverPort, serviceImpl, sslOptionsProvider) } after { - server.stop() + server.foreach { s => + try { + s.stop() + } catch { + case e: Throwable => + log.warn("Failed to stop the resource staging server.", e) + } + } + server = None } test("Accept file and jar uploads and downloads") { - server.start() - runUploadAndDownload(SSLOptions()) + val serverPort = startServer() + runUploadAndDownload(SSLOptions(), serverPort) } test("Enable SSL on the server") { @@ -80,11 +89,11 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { trustStore = Some(keyStoreAndTrustStore.trustStore), trustStorePassword = Some("trustStore")) sslOptionsProvider.setOptions(sslOptions) - server.start() - runUploadAndDownload(sslOptions) + val serverPort = startServer() + runUploadAndDownload(sslOptions, serverPort) } - private def runUploadAndDownload(sslOptions: SSLOptions): Unit = { + private def runUploadAndDownload(sslOptions: SSLOptions, serverPort: Int): Unit = { val scheme = if (sslOptions.enabled) "https" else "http" val retrofitService = RetrofitClientFactoryImpl.createRetrofitClient( s"$scheme://127.0.0.1:$serverPort/", @@ -125,6 +134,44 @@ class ResourceStagingServerSuite extends SparkFunSuite with BeforeAndAfter { val downloadedBytes = ByteStreams.toByteArray(responseBody.byteStream()) assert(downloadedBytes.toSeq === bytes) } + + private def startServer(): Int = { + var currentAttempt = 0 + var successfulStart = false + var latestServerPort = new ServerSocket(0).getLocalPort + while (currentAttempt < MAX_SERVER_START_ATTEMPTS && !successfulStart) { + val newServer = new ResourceStagingServer(latestServerPort, serviceImpl, sslOptionsProvider) + try { + newServer.start() + successfulStart = true + server = Some(newServer) + } catch { + case e: Throwable => + try { + newServer.stop() + } catch { + case e1: Throwable => + log.warn("Failed to stop a resource staging server that failed to start.", e1) + } + + if (Utils.isBindCollision(e)) { + currentAttempt += 1 + latestServerPort = latestServerPort + 1 + if (currentAttempt == MAX_SERVER_START_ATTEMPTS) { + throw new RuntimeException(s"Failed to bind to a random port" + + s" $MAX_SERVER_START_ATTEMPTS times. Last attempted port: $latestServerPort", e) + } else { + logWarning(s"Attempt $currentAttempt/$MAX_SERVER_START_ATTEMPTS failed to start" + + s" server on port $latestServerPort.", e) + } + } else { + throw e + } + } + } + logInfo(s"Started resource staging server on port $latestServerPort.") + latestServerPort + } } private class SettableReferenceSslOptionsProvider extends ResourceStagingServerSslOptionsProvider { From 4ffb4d659acdf497bcf8694ae2cd5685e018e817 Mon Sep 17 00:00:00 2001 From: Hong Zhiguo Date: Thu, 20 Jul 2017 04:44:47 +0800 Subject: [PATCH 149/225] set RestartPolicy=Never for executor (#367) * set RestartPolicy=Never for executor As for current implementation the RestartPolicy of executor pod is not set, so the default value "OnFailure" is in effect. But this causes problem. If an executor is terminated unexpectedly, for example, exit by java.lang.OutOfMemoryError, it'll be restarted by k8s with the same executor ID. When the new executor tries to fetch a block hold by the last executor, ShuffleBlockFetcherIterator.splitLocalRemoteBlocks() think it's a **local** block and tries to read it from it's local dir. But the executor's local dir is changed because random generated ID is part of local dir. FetchFailedException will raise and the stage will fail. The rolling Error message: 17/06/29 01:54:56 WARN KubernetesTaskSetManager: Lost task 0.1 in stage 2.0 (TID 7, 172.16.75.92, executor 1): FetchFailed(BlockManagerId(1, 172.16.75.92, 40539, None), shuffleId=2, mapId=0, reduceId=0, message= org.apache.spark.shuffle.FetchFailedException: /data2/spark/blockmgr-0e228d3c-8727-422e-aa97-2841a877c42a/32/shuffle_2_0_0.index (No such file or directory) at org.apache.spark.storage.ShuffleBlockFetcherIterator.throwFetchFailedException(ShuffleBlockFetcherIterator.scala:357) at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:332) at org.apache.spark.storage.ShuffleBlockFetcherIterator.next(ShuffleBlockFetcherIterator.scala:54) at scala.collection.Iterator$$anon$11.next(Iterator.scala:409) * Update KubernetesClusterSchedulerBackend.scala --- .../cluster/kubernetes/KubernetesClusterSchedulerBackend.scala | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index 6dbe918f966e4..a0753728f8cfd 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -454,6 +454,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .endMetadata() .withNewSpec() .withHostname(hostname) + .withRestartPolicy("Never") .withNodeSelector(nodeSelector.asJava) .endSpec() .build() From e3b2360c56050f2b1c6dea8d622af020db99378c Mon Sep 17 00:00:00 2001 From: mccheah Date: Wed, 19 Jul 2017 22:27:16 -0700 Subject: [PATCH 150/225] Read classpath entries from SPARK_EXTRA_CLASSPATH on executors. (#383) This makes executors consistent with the driver. Note that SPARK_EXTRA_CLASSPATH isn't set anywhere by Spark itself, but it's primarily meant to be set by images that inherit from the base driver/executor images. --- .../docker-minimal-bundle/src/main/docker/executor/Dockerfile | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index 9c9efb23d7e95..b3b0acc3b64b8 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -27,5 +27,6 @@ COPY examples /opt/spark/examples CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ + if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ exec /sbin/tini -- ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP From 15e13f4887a338e1e0a0982287011944adec5eb3 Mon Sep 17 00:00:00 2001 From: Varun Date: Fri, 21 Jul 2017 16:36:08 -0700 Subject: [PATCH 151/225] Changes to support executor recovery behavior during static allocation. (#244) * Changes to support executor recovery behavior during static allocation. * addressed review comments * Style changes and removed inocrrectly merged code * addressed latest review comments * changed import order * Minor changes to avoid exceptions when exit code is missing * fixed style check * Addressed review comments from Yinan LiAddressed review comments from Yinan Li.. * Addressed comments and got rid of an explicit lock object. * Fixed imports order. * Addressed review comments from Matt * Couple of style fixes --- .../KubernetesClusterSchedulerBackend.scala | 206 +++++++++++++++--- 1 file changed, 173 insertions(+), 33 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index a0753728f8cfd..c993bff8df962 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -18,18 +18,20 @@ package org.apache.spark.scheduler.cluster.kubernetes import java.io.Closeable import java.net.InetAddress -import java.util.concurrent.TimeUnit +import java.util.Collections +import java.util.concurrent.{ConcurrentHashMap, TimeUnit} import java.util.concurrent.atomic.{AtomicInteger, AtomicLong, AtomicReference} +import scala.collection.{concurrent, mutable} +import scala.collection.JavaConverters._ +import scala.concurrent.{ExecutionContext, Future} + import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.scala.DefaultScalaModule -import io.fabric8.kubernetes.api.model.{ContainerBuilder, ContainerPortBuilder, EnvVarBuilder, EnvVarSourceBuilder, Pod, PodBuilder, QuantityBuilder} +import io.fabric8.kubernetes.api.model._ import io.fabric8.kubernetes.client.{KubernetesClient, KubernetesClientException, Watcher} import io.fabric8.kubernetes.client.Watcher.Action import org.apache.commons.io.FilenameUtils -import scala.collection.JavaConverters._ -import scala.collection.mutable -import scala.concurrent.{ExecutionContext, Future} import org.apache.spark.{SparkContext, SparkEnv, SparkException} import org.apache.spark.deploy.kubernetes.{ConfigurationUtils, InitContainerResourceStagingServerSecretPlugin, PodWithDetachedInitContainer, SparkPodInitContainerBootstrap} @@ -38,8 +40,8 @@ import org.apache.spark.deploy.kubernetes.constants._ import org.apache.spark.deploy.kubernetes.submit.InitContainerUtil import org.apache.spark.network.netty.SparkTransportConf import org.apache.spark.network.shuffle.kubernetes.KubernetesExternalShuffleClient -import org.apache.spark.rpc.{RpcCallContext, RpcEndpointAddress, RpcEnv} -import org.apache.spark.scheduler.TaskSchedulerImpl +import org.apache.spark.rpc.{RpcAddress, RpcCallContext, RpcEndpointAddress, RpcEnv} +import org.apache.spark.scheduler.{ExecutorExited, SlaveLost, TaskSchedulerImpl} import org.apache.spark.scheduler.cluster.CoarseGrainedClusterMessages.{RetrieveSparkAppConfig, SparkAppConfig} import org.apache.spark.scheduler.cluster.CoarseGrainedSchedulerBackend import org.apache.spark.util.{ThreadUtils, Utils} @@ -55,10 +57,18 @@ private[spark] class KubernetesClusterSchedulerBackend( import KubernetesClusterSchedulerBackend._ private val RUNNING_EXECUTOR_PODS_LOCK = new Object - private val runningExecutorPods = new mutable.HashMap[String, Pod] // Indexed by executor IDs. - + // Indexed by executor IDs and guarded by RUNNING_EXECUTOR_PODS_LOCK. + private val runningExecutorsToPods = new mutable.HashMap[String, Pod] + // Indexed by executor pod names and guarded by RUNNING_EXECUTOR_PODS_LOCK. + private val runningPodsToExecutors = new mutable.HashMap[String, String] + // TODO(varun): Get rid of this lock object by my making the underlying map a concurrent hash map. private val EXECUTOR_PODS_BY_IPS_LOCK = new Object - private val executorPodsByIPs = new mutable.HashMap[String, Pod] // Indexed by executor IP addrs. + // Indexed by executor IP addrs and guarded by EXECUTOR_PODS_BY_IPS_LOCK + private val executorPodsByIPs = new mutable.HashMap[String, Pod] + private val failedPods: concurrent.Map[String, ExecutorExited] = new + ConcurrentHashMap[String, ExecutorExited]().asScala + private val executorsToRemove = Collections.newSetFromMap[String]( + new ConcurrentHashMap[String, java.lang.Boolean]()).asScala private val executorExtraClasspath = conf.get( org.apache.spark.internal.config.EXECUTOR_CLASS_PATH) @@ -135,7 +145,7 @@ private[spark] class KubernetesClusterSchedulerBackend( val parsedShuffleLabels = ConfigurationUtils.parseKeyValuePairs( conf.get(KUBERNETES_SHUFFLE_LABELS), KUBERNETES_SHUFFLE_LABELS.key, "shuffle-labels") - if (parsedShuffleLabels.size == 0) { + if (parsedShuffleLabels.isEmpty) { throw new SparkException(s"Dynamic allocation enabled " + s"but no ${KUBERNETES_SHUFFLE_LABELS.key} specified") } @@ -170,12 +180,13 @@ private[spark] class KubernetesClusterSchedulerBackend( private val executorWatchResource = new AtomicReference[Closeable] protected var totalExpectedExecutors = new AtomicInteger(0) + private val driverUrl = RpcEndpointAddress( sc.getConf.get("spark.driver.host"), sc.getConf.getInt("spark.driver.port", DEFAULT_DRIVER_PORT), CoarseGrainedSchedulerBackend.ENDPOINT_NAME).toString - private val initialExecutors = getInitialTargetExecutorNumber(1) + private val initialExecutors = getInitialTargetExecutorNumber() private val podAllocationInterval = conf.get(KUBERNETES_ALLOCATION_BATCH_DELAY) require(podAllocationInterval > 0, s"Allocation batch delay " + @@ -192,23 +203,74 @@ private[spark] class KubernetesClusterSchedulerBackend( private val allocatorRunnable: Runnable = new Runnable { + // Number of times we are allowed check for the loss reason for an executor before we give up + // and assume the executor failed for good, and attribute it to a framework fault. + private val MAX_EXECUTOR_LOST_REASON_CHECKS = 10 + private val executorsToRecover = new mutable.HashSet[String] + // Maintains a map of executor id to count of checks performed to learn the loss reason + // for an executor. + private val executorReasonChecks = new mutable.HashMap[String, Int] + override def run(): Unit = { - if (totalRegisteredExecutors.get() < runningExecutorPods.size) { - logDebug("Waiting for pending executors before scaling") - } else if (totalExpectedExecutors.get() <= runningExecutorPods.size) { - logDebug("Maximum allowed executor limit reached. Not scaling up further.") - } else { - val nodeToLocalTaskCount = getNodesWithLocalTaskCounts - RUNNING_EXECUTOR_PODS_LOCK.synchronized { + removeFailedExecutors() + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + if (totalRegisteredExecutors.get() < runningExecutorsToPods.size) { + logDebug("Waiting for pending executors before scaling") + } else if (totalExpectedExecutors.get() <= runningExecutorsToPods.size) { + logDebug("Maximum allowed executor limit reached. Not scaling up further.") + } else { + val nodeToLocalTaskCount = getNodesWithLocalTaskCounts for (i <- 0 until math.min( - totalExpectedExecutors.get - runningExecutorPods.size, podAllocationSize)) { - runningExecutorPods += allocateNewExecutorPod(nodeToLocalTaskCount) + totalExpectedExecutors.get - runningExecutorsToPods.size, podAllocationSize)) { + val (executorId, pod) = allocateNewExecutorPod(nodeToLocalTaskCount) + runningExecutorsToPods.put(executorId, pod) + runningPodsToExecutors.put(pod.getMetadata.getName, executorId) logInfo( - s"Requesting a new executor, total executors is now ${runningExecutorPods.size}") + s"Requesting a new executor, total executors is now ${runningExecutorsToPods.size}") } } } } + + def removeFailedExecutors(): Unit = { + val localRunningExecutorsToPods = RUNNING_EXECUTOR_PODS_LOCK.synchronized { + runningExecutorsToPods.toMap + } + executorsToRemove.foreach { case (executorId) => + localRunningExecutorsToPods.get(executorId).map { pod: Pod => + failedPods.get(pod.getMetadata.getName).map { executorExited: ExecutorExited => + logDebug(s"Removing executor $executorId with loss reason " + executorExited.message) + removeExecutor(executorId, executorExited) + if (!executorExited.exitCausedByApp) { + executorsToRecover.add(executorId) + } + }.getOrElse(removeExecutorOrIncrementLossReasonCheckCount(executorId)) + }.getOrElse(removeExecutorOrIncrementLossReasonCheckCount(executorId)) + + executorsToRecover.foreach(executorId => { + executorsToRemove -= executorId + executorReasonChecks -= executorId + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + runningExecutorsToPods.remove(executorId).map { pod: Pod => + kubernetesClient.pods().delete(pod) + runningPodsToExecutors.remove(pod.getMetadata.getName) + }.getOrElse(logWarning(s"Unable to remove pod for unknown executor $executorId")) + } + }) + executorsToRecover.clear() + } + } + + def removeExecutorOrIncrementLossReasonCheckCount(executorId: String): Unit = { + val reasonCheckCount = executorReasonChecks.getOrElse(executorId, 0) + if (reasonCheckCount > MAX_EXECUTOR_LOST_REASON_CHECKS) { + removeExecutor(executorId, SlaveLost("Executor lost for unknown reasons")) + executorsToRecover.add(executorId) + executorReasonChecks -= executorId + } else { + executorReasonChecks.put(executorId, reasonCheckCount + 1) + } + } } private val objectMapper = new ObjectMapper().registerModule(DefaultScalaModule) @@ -280,8 +342,9 @@ private[spark] class KubernetesClusterSchedulerBackend( // indication as to why. try { RUNNING_EXECUTOR_PODS_LOCK.synchronized { - runningExecutorPods.values.foreach(kubernetesClient.pods().delete(_)) - runningExecutorPods.clear() + runningExecutorsToPods.values.foreach(kubernetesClient.pods().delete(_)) + runningExecutorsToPods.clear() + runningPodsToExecutors.clear() } EXECUTOR_PODS_BY_IPS_LOCK.synchronized { executorPodsByIPs.clear() @@ -534,11 +597,6 @@ private[spark] class KubernetesClusterSchedulerBackend( } } - override def createDriverEndpoint( - properties: Seq[(String, String)]): DriverEndpoint = { - new KubernetesDriverEndpoint(rpcEnv, properties) - } - override def doRequestTotalExecutors(requestedTotal: Int): Future[Boolean] = Future[Boolean] { totalExpectedExecutors.set(requestedTotal) true @@ -547,8 +605,10 @@ private[spark] class KubernetesClusterSchedulerBackend( override def doKillExecutors(executorIds: Seq[String]): Future[Boolean] = Future[Boolean] { RUNNING_EXECUTOR_PODS_LOCK.synchronized { for (executor <- executorIds) { - runningExecutorPods.remove(executor) match { - case Some(pod) => kubernetesClient.pods().delete(pod) + runningExecutorsToPods.remove(executor) match { + case Some(pod) => + kubernetesClient.pods().delete(pod) + runningPodsToExecutors.remove(pod.getMetadata.getName) case None => logWarning(s"Unable to remove pod for unknown executor $executor") } } @@ -564,6 +624,8 @@ private[spark] class KubernetesClusterSchedulerBackend( private class ExecutorPodsWatcher extends Watcher[Pod] { + private val DEFAULT_CONTAINER_FAILURE_EXIT_STATUS = -1 + override def eventReceived(action: Action, pod: Pod): Unit = { if (action == Action.MODIFIED && pod.getStatus.getPhase == "Running" && pod.getMetadata.getDeletionTimestamp == null) { @@ -583,12 +645,75 @@ private[spark] class KubernetesClusterSchedulerBackend( executorPodsByIPs -= podIP } } + if (action == Action.ERROR) { + logInfo(s"Received pod $podName exited event. Reason: " + pod.getStatus.getReason) + handleErroredPod(pod) + } else if (action == Action.DELETED) { + logInfo(s"Received delete pod $podName event. Reason: " + pod.getStatus.getReason) + handleDeletedPod(pod) + } } } override def onClose(cause: KubernetesClientException): Unit = { logDebug("Executor pod watch closed.", cause) } + + def getExecutorExitStatus(pod: Pod): Int = { + val containerStatuses = pod.getStatus.getContainerStatuses + if (!containerStatuses.isEmpty) { + // we assume the first container represents the pod status. This assumption may not hold + // true in the future. Revisit this if side-car containers start running inside executor + // pods. + getExecutorExitStatus(containerStatuses.get(0)) + } else DEFAULT_CONTAINER_FAILURE_EXIT_STATUS + } + + def getExecutorExitStatus(containerStatus: ContainerStatus): Int = { + Option(containerStatus.getState).map(containerState => + Option(containerState.getTerminated).map(containerStateTerminated => + containerStateTerminated.getExitCode.intValue()).getOrElse(UNKNOWN_EXIT_CODE) + ).getOrElse(UNKNOWN_EXIT_CODE) + } + + def isPodAlreadyReleased(pod: Pod): Boolean = { + RUNNING_EXECUTOR_PODS_LOCK.synchronized { + !runningPodsToExecutors.contains(pod.getMetadata.getName) + } + } + + def handleErroredPod(pod: Pod): Unit = { + val alreadyReleased = isPodAlreadyReleased(pod) + val containerExitStatus = getExecutorExitStatus(pod) + // container was probably actively killed by the driver. + val exitReason = if (alreadyReleased) { + ExecutorExited(containerExitStatus, exitCausedByApp = false, + s"Container in pod " + pod.getMetadata.getName + + " exited from explicit termination request.") + } else { + val containerExitReason = containerExitStatus match { + case VMEM_EXCEEDED_EXIT_CODE | PMEM_EXCEEDED_EXIT_CODE => + memLimitExceededLogMessage(pod.getStatus.getReason) + case _ => + // Here we can't be sure that that exit was caused by the application but this seems + // to be the right default since we know the pod was not explicitly deleted by + // the user. + "Pod exited with following container exit status code " + containerExitStatus + } + ExecutorExited(containerExitStatus, exitCausedByApp = true, containerExitReason) + } + failedPods.put(pod.getMetadata.getName, exitReason) + } + + def handleDeletedPod(pod: Pod): Unit = { + val exitReason = ExecutorExited(getExecutorExitStatus(pod), exitCausedByApp = false, + "Pod " + pod.getMetadata.getName + " deleted or lost.") + failedPods.put(pod.getMetadata.getName, exitReason) + } + } + + override def createDriverEndpoint(properties: Seq[(String, String)]): DriverEndpoint = { + new KubernetesDriverEndpoint(rpcEnv, properties) } private class KubernetesDriverEndpoint( @@ -597,6 +722,14 @@ private[spark] class KubernetesClusterSchedulerBackend( extends DriverEndpoint(rpcEnv, sparkProperties) { private val externalShufflePort = conf.getInt("spark.shuffle.service.port", 7337) + override def onDisconnected(rpcAddress: RpcAddress): Unit = { + addressToExecutorId.get(rpcAddress).foreach { executorId => + if (disableExecutor(executorId)) { + executorsToRemove.add(executorId) + } + } + } + override def receiveAndReply( context: RpcCallContext): PartialFunction[Any, Unit] = { new PartialFunction[Any, Unit]() { @@ -615,7 +748,7 @@ private[spark] class KubernetesClusterSchedulerBackend( var resolvedProperties = sparkProperties val runningExecutorPod = kubernetesClient .pods() - .withName(runningExecutorPods(executorId).getMetadata.getName) + .withName(runningExecutorsToPods(executorId).getMetadata.getName) .get() val nodeName = runningExecutorPod.getSpec.getNodeName val shufflePodIp = shufflePodCache.get.getShufflePodForExecutor(nodeName) @@ -637,7 +770,6 @@ private[spark] class KubernetesClusterSchedulerBackend( }.orElse(super.receiveAndReply(context)) } } - } case class ShuffleServiceConfig( shuffleNamespace: String, @@ -647,6 +779,14 @@ case class ShuffleServiceConfig( private object KubernetesClusterSchedulerBackend { private val DEFAULT_STATIC_PORT = 10000 private val EXECUTOR_ID_COUNTER = new AtomicLong(0L) + private val VMEM_EXCEEDED_EXIT_CODE = -103 + private val PMEM_EXCEEDED_EXIT_CODE = -104 + private val UNKNOWN_EXIT_CODE = -111 + + def memLimitExceededLogMessage(diagnostics: String): String = { + s"Pod/Container killed for exceeding memory limits. $diagnostics" + + " Consider boosting spark executor memory overhead." + } } /** From 823bf0e36bde294819e7923bfa9d1b363513f2d4 Mon Sep 17 00:00:00 2001 From: Anirudh Date: Fri, 21 Jul 2017 16:50:11 -0700 Subject: [PATCH 152/225] Update pom to v0.3.0 of spark-kubernetes --- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs-helpers/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index c90a824b1b8b1..d6d6cb0699e34 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 51ca26c0134fa..4b15d0ed54b4a 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index 206059bd8e5b1..7283b2bb373c4 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 555398aa3e6d9..0f838d991358f 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index cd3ccad0a2b22..4776d2e5d4f0c 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.2.0-SNAPSHOT + 2.1.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml From 436482e9d4bbcfded49f8235950abdaa517df566 Mon Sep 17 00:00:00 2001 From: Anirudh Date: Mon, 24 Jul 2017 11:05:00 -0700 Subject: [PATCH 153/225] Fix: changed signature of ExternalShuffleClient Please enter the commit message for your changes. Lines starting --- .../kubernetes/KubernetesExternalShuffleClient.java | 8 ++++---- .../kubernetes/KubernetesClusterSchedulerBackend.scala | 3 +-- .../submit/KubernetesExternalShuffleServiceSuite.scala | 1 - 3 files changed, 5 insertions(+), 7 deletions(-) diff --git a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java index 49cb5243e32dc..f50b0d3ecb00a 100644 --- a/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java +++ b/common/network-shuffle/src/main/java/org/apache/spark/network/shuffle/kubernetes/KubernetesExternalShuffleClient.java @@ -47,12 +47,12 @@ public class KubernetesExternalShuffleClient extends ExternalShuffleClient { public KubernetesExternalShuffleClient( TransportConf conf, SecretKeyHolder secretKeyHolder, - boolean saslEnabled, - boolean saslEncryptionEnabled) { - super(conf, secretKeyHolder, saslEnabled, saslEncryptionEnabled); + boolean saslEnabled) { + super(conf, secretKeyHolder, saslEnabled); } - public void registerDriverWithShuffleService(String host, int port) throws IOException { + public void registerDriverWithShuffleService(String host, int port) + throws IOException, InterruptedException { checkInit(); ByteBuffer registerDriver = new RegisterDriver(appId, 0).toByteBuffer(); TransportClient client = clientFactory.createClient(host, port); diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index c993bff8df962..a50a9c8bb9c3b 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -279,8 +279,7 @@ private[spark] class KubernetesClusterSchedulerBackend( new KubernetesExternalShuffleClient( SparkTransportConf.fromSparkConf(conf, "shuffle"), sc.env.securityManager, - sc.env.securityManager.isAuthenticationEnabled(), - sc.env.securityManager.isSaslEncryptionEnabled()) + sc.env.securityManager.isAuthenticationEnabled()) } private def getInitialTargetExecutorNumber(defaultNumExecutors: Int = 1): Int = { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala index 0de1955884c8e..425ba58a65d19 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/KubernetesExternalShuffleServiceSuite.scala @@ -35,7 +35,6 @@ private[spark] class KubernetesExternalShuffleServiceSuite extends SparkFunSuite val shuffleClient = new KubernetesExternalShuffleClient( SparkTransportConf.fromSparkConf(SPARK_CONF, "shuffle"), new SecurityManager(SPARK_CONF), - false, false) shuffleService.start() From beb13610721a444e0dbadf21e1a9ef6607fe1c76 Mon Sep 17 00:00:00 2001 From: Anirudh Date: Mon, 24 Jul 2017 13:48:16 -0700 Subject: [PATCH 154/225] Updated poms --- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs-helpers/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 41 insertions(+), 41 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index db5ceab112383..13930630f2338 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 8bf82928ad5c7..2d29bfc8ea89a 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index c622c1a7375e0..b837c8a2be8a4 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 2c2ec89b7fdec..45e128b6e1cfd 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index b9c3ee680b602..121b2489fbb72 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 41d8904e557d0..31d0d2efc654a 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 4fa1aa073d14b..78eaf8624df93 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index dc18146f17cdb..d80b2591d80c3 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/examples/pom.xml b/examples/pom.xml index 1fea01264272b..d2135bf5ff192 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index f474c9ec8aa78..fdf7611936346 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 926104458e098..3ae7d254c95a1 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 8055e62c54ff9..89e61be25d8c9 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 49d0a6eaefc14..897af93b6b8a2 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml index c5c063b3a243a..5513dfea0f281 100644 --- a/external/java8-tests/pom.xml +++ b/external/java8-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 07dfe3727205c..d836fffdb56b6 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index f8ae23dc348ce..10419a1275f73 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 3bbb59c47fb5c..a0a7b26f2b71e 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml index e92964c8d101f..4cd40dbe89689 100644 --- a/external/kafka-0-8-assembly/pom.xml +++ b/external/kafka-0-8-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml index f28c98dba6819..57ad12dc70709 100644 --- a/external/kafka-0-8/pom.xml +++ b/external/kafka-0-8/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index aaa95e5d632bb..240db2098d1f8 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 528a90eab53de..c6a7ceb80c465 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index 5ea9c5e9cff75..cf23a63e23cbe 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 5f4f9c88a55a5..8fddc5bfee0f1 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 008d9b3bc9e5d..705675af7a01f 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 598fe1f5fe5c7..155adae6afa61 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index e0580c5ff2ecb..25c38e2281eff 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 06c1ff44b90c1..7835481531216 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 928d8053a14ec..c798170e81ba7 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index d6d6cb0699e34..3feecb09230bc 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 4b15d0ed54b4a..5f7683280cccf 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index 7283b2bb373c4..9672f49448c0c 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 0f838d991358f..80b0f57a2fcb9 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 4776d2e5d4f0c..0da1e38d8c211 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 81042ad683512..72ba1417bd1e7 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index 942d3be645dd3..c65806e293f25 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 6b8f730b95c00..0aec944d693dd 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index bcc3153a264b9..5943112510b0b 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index a14d2c1196bc5..65c8f520b0f8d 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 0c4b8def896cd..73a68b32d8168 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index ee611f9b6d299..51789b826b673 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 10ca3a5d7d0d6..066d74c1e286d 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.1.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0-SNAPSHOT ../pom.xml From 64f3dddc3a9091ec1beb17536f8dc29ce7b115b2 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Fri, 28 Jul 2017 14:10:00 -0700 Subject: [PATCH 155/225] Add missing code blocks (#403) --- resource-managers/kubernetes/README.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/resource-managers/kubernetes/README.md b/resource-managers/kubernetes/README.md index fd1ad29eb795d..685ff343fa3be 100644 --- a/resource-managers/kubernetes/README.md +++ b/resource-managers/kubernetes/README.md @@ -64,10 +64,12 @@ build/mvn integration-test \ # Running against an arbitrary cluster In order to run against any cluster, use the following: +```sh build/mvn integration-test \ -Pkubernetes -Pkubernetes-integration-tests \ -pl resource-managers/kubernetes/integration-tests -am -DextraScalaTestArgs="-Dspark.kubernetes.test.master=k8s://https:// -Dspark.docker.test.driverImage= -Dspark.docker.test.executorImage=" +``` # Preserve the Minikube VM From bce9b773068aa74c4b3b278e106d31ec97d01c63 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Fri, 28 Jul 2017 16:16:22 -0700 Subject: [PATCH 156/225] Add an entrypoint.sh script to add a passwd entry if one does not exist for the container UID --- .../src/main/docker/driver-py/Dockerfile | 2 +- .../src/main/docker/driver/Dockerfile | 2 +- .../src/main/docker/executor-py/Dockerfile | 2 +- .../src/main/docker/executor/Dockerfile | 2 +- .../src/main/docker/init-container/Dockerfile | 2 +- .../docker/resource-staging-server/Dockerfile | 2 +- .../src/main/docker/shuffle-service/Dockerfile | 2 +- .../src/main/docker/spark-base/Dockerfile | 14 +++++++++----- .../src/main/docker/spark-base/entrypoint.sh | 18 ++++++++++++++++++ 9 files changed, 34 insertions(+), 12 deletions(-) create mode 100755 resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/entrypoint.sh diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile index 6dcc7511c0dd9..731ea897458ce 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver-py/Dockerfile @@ -43,6 +43,6 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ - exec /sbin/tini -- ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH \ + ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH \ -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY \ $SPARK_DRIVER_CLASS $PYSPARK_PRIMARY $PYSPARK_FILES $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile index 6bbff8ef64a0f..bd28af950f4dd 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/driver/Dockerfile @@ -28,4 +28,4 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_SUBMIT_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_SUBMIT_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ - exec /sbin/tini -- ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS + ${JAVA_HOME}/bin/java $SPARK_DRIVER_JAVA_OPTS -cp $SPARK_CLASSPATH -Xms$SPARK_DRIVER_MEMORY -Xmx$SPARK_DRIVER_MEMORY $SPARK_DRIVER_CLASS $SPARK_DRIVER_ARGS diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile index 7a65a4f879376..f52578ad6edda 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor-py/Dockerfile @@ -43,4 +43,4 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_MOUNTED_CLASSPATH}+x} ]; then SPARK_CLASSPATH="$SPARK_MOUNTED_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ - exec /sbin/tini -- ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP \ No newline at end of file + ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile index b3b0acc3b64b8..8ad935ca396b1 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/executor/Dockerfile @@ -29,4 +29,4 @@ CMD SPARK_CLASSPATH="${SPARK_HOME}/jars/*" && \ if ! [ -z ${SPARK_EXECUTOR_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXECUTOR_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_EXTRA_CLASSPATH+x} ]; then SPARK_CLASSPATH="$SPARK_EXTRA_CLASSPATH:$SPARK_CLASSPATH"; fi && \ if ! [ -z ${SPARK_MOUNTED_FILES_DIR} ]; then cp -R "$SPARK_MOUNTED_FILES_DIR/." .; fi && \ - exec /sbin/tini -- ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP + ${JAVA_HOME}/bin/java -Dspark.executor.port=$SPARK_EXECUTOR_PORT -Xms$SPARK_EXECUTOR_MEMORY -Xmx$SPARK_EXECUTOR_MEMORY -cp $SPARK_CLASSPATH org.apache.spark.executor.CoarseGrainedExecutorBackend --driver-url $SPARK_DRIVER_URL --executor-id $SPARK_EXECUTOR_ID --cores $SPARK_EXECUTOR_CORES --app-id $SPARK_APPLICATION_ID --hostname $SPARK_EXECUTOR_POD_IP diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile index 4bafe25e2608f..2ab3e6295b6d8 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/init-container/Dockerfile @@ -21,4 +21,4 @@ FROM spark-base # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-init:latest -f dockerfiles/init-container/Dockerfile . -ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] +ENTRYPOINT [ "/opt/entrypoint.sh", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.KubernetesSparkDependencyDownloadInitContainer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile index 9ca96be0f1a88..0e0c9dd31aad6 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/resource-staging-server/Dockerfile @@ -22,4 +22,4 @@ FROM spark-base # command should be invoked from the top level directory of the Spark distribution. E.g.: # docker build -t spark-resource-staging-server:latest -f dockerfiles/resource-staging-server/Dockerfile . -ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] +ENTRYPOINT [ "/opt/entrypoint.sh", "bin/spark-class", "org.apache.spark.deploy.rest.kubernetes.ResourceStagingServer" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile index ccb2f1a03d88c..2ae0be4ee6c32 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/shuffle-service/Dockerfile @@ -23,4 +23,4 @@ FROM spark-base COPY examples /opt/spark/examples -ENTRYPOINT [ "/sbin/tini", "--", "bin/spark-class", "org.apache.spark.deploy.kubernetes.KubernetesExternalShuffleService", "1" ] +ENTRYPOINT [ "/opt/entrypoint.sh", "bin/spark-class", "org.apache.spark.deploy.kubernetes.KubernetesExternalShuffleService", "1" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile index b0925e3bb0416..0222af3989a73 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile @@ -23,13 +23,17 @@ FROM openjdk:8-alpine RUN apk upgrade --no-cache && \ apk add --no-cache bash tini && \ mkdir -p /opt/spark && \ - touch /opt/spark/RELEASE + touch /opt/spark/RELEASE && \ + chgrp root /etc/passwd && chmod ug+rw /etc/passwd -COPY jars /opt/spark/jars -COPY bin /opt/spark/bin -COPY sbin /opt/spark/sbin -COPY conf /opt/spark/conf +COPY jars /opt/spark/ +COPY bin /opt/spark/ +COPY sbin /opt/spark/ +COPY conf /opt/spark/ +COPY dockerfiles/spark-base/entrypoint.sh /opt/ ENV SPARK_HOME /opt/spark WORKDIR /opt/spark + +ENTRYPOINT [ "/opt/entrypoint.sh" ] diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/entrypoint.sh b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/entrypoint.sh new file mode 100755 index 0000000000000..abd93649b498d --- /dev/null +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/entrypoint.sh @@ -0,0 +1,18 @@ +#!/bin/bash + +# Check whether there is a passwd entry for the container UID +myuid=$(id -u) +mygid=$(id -g) +uidentry=$(getent passwd $myuid) + +# If there is no passwd entry for the container UID, attempt to create one +if [ -z "$uidentry" ] ; then + if [ -w /etc/passwd ] ; then + echo "$myuid:x:$myuid:$mygid:anonymous uid:$SPARK_HOME:/bin/false" >> /etc/passwd + else + echo "Container ENTRYPOINT failed to add passwd entry for anonymous UID" + fi +fi + +# Execute the container CMD under tini for better hygiene +/sbin/tini -s -- "$@" From 8ecff61e0dd662719f10d00aa82815aad35cefd5 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Sat, 29 Jul 2017 07:46:46 -0700 Subject: [PATCH 157/225] revert my COPY mods --- .../src/main/docker/spark-base/Dockerfile | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile index 0222af3989a73..61d295a5b37c2 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/docker/spark-base/Dockerfile @@ -26,10 +26,10 @@ RUN apk upgrade --no-cache && \ touch /opt/spark/RELEASE && \ chgrp root /etc/passwd && chmod ug+rw /etc/passwd -COPY jars /opt/spark/ -COPY bin /opt/spark/ -COPY sbin /opt/spark/ -COPY conf /opt/spark/ +COPY jars /opt/spark/jars +COPY bin /opt/spark/bin +COPY sbin /opt/spark/sbin +COPY conf /opt/spark/conf COPY dockerfiles/spark-base/entrypoint.sh /opt/ ENV SPARK_HOME /opt/spark From 702a8f6c6c41f51b35642b1f901c662ee2c62d5d Mon Sep 17 00:00:00 2001 From: Ilan Filonenko Date: Tue, 1 Aug 2017 12:03:43 -0700 Subject: [PATCH 158/225] Fix bug with null arguments --- core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala index 1305aeb8c1faf..ed46adcbe9dfb 100644 --- a/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala +++ b/core/src/main/scala/org/apache/spark/deploy/SparkSubmit.scala @@ -639,7 +639,9 @@ object SparkSubmit extends CommandLineUtils { if (args.isPython) { childArgs ++= Array("--primary-py-file", args.primaryResource) childArgs ++= Array("--main-class", "org.apache.spark.deploy.PythonRunner") - childArgs ++= Array("--other-py-files", args.pyFiles) + if (args.pyFiles != null) { + childArgs ++= Array("--other-py-files", args.pyFiles) + } } else { childArgs ++= Array("--primary-java-resource", args.primaryResource) childArgs ++= Array("--main-class", args.mainClass) From 5fdaa7f950cbd02486758ceb0bb4596d65c1b621 Mon Sep 17 00:00:00 2001 From: mccheah Date: Mon, 7 Aug 2017 17:48:09 -0700 Subject: [PATCH 159/225] Exclude com.sun.jersey from docker-minimal-bundle. (#420) This probably is not the correct fix long-term as we should find the specific module that is pulling in the bad jersey-1 version. But until we can track down what the specific offending module is, this will have to do. --- .../docker-minimal-bundle/src/main/assembly/docker-assembly.xml | 1 + 1 file changed, 1 insertion(+) diff --git a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml index 2b48d366256fe..e6de2c11a0ecf 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/src/main/assembly/docker-assembly.xml @@ -69,6 +69,7 @@ org.apache.spark:spark-assembly_${scala.binary.version}:pom org.spark-project.spark:unused + com.sun.jersey:* From e3cfaa498958bf886964c96f7798deb84721750c Mon Sep 17 00:00:00 2001 From: Kimoon Kim Date: Mon, 7 Aug 2017 18:13:56 -0700 Subject: [PATCH 160/225] Flag-guard expensive DNS lookup of cluster node full names, part of HDFS locality support (#412) * Flag-guard expensive DNS lookup of cluster node full names, part of HDFS locality support * Clean up a bit * Improve unit tests --- .../spark/deploy/kubernetes/config.scala | 13 +++++++ .../kubernetes/KubernetesTaskSetManager.scala | 21 ++++++++---- .../KubernetesTaskSetManagerSuite.scala | 34 ++++++++++++++++++- 3 files changed, 61 insertions(+), 7 deletions(-) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index c6772c1cb5ae4..f9c4c9c6a1e18 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -491,6 +491,19 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED = + ConfigBuilder("spark.kubernetes.driver.hdfslocality.clusterNodeNameDNSLookup.enabled") + .doc("Whether or not HDFS locality support code should look up DNS for full hostnames of" + + " cluster nodes. In some K8s clusters, notably GKE, cluster node names are short" + + " hostnames, and so comparing them against HDFS datanode hostnames always fail. To fix," + + " enable this flag. This is disabled by default because DNS lookup can be expensive." + + " The driver can slow down and fail to respond to executor heartbeats in time." + + " If enabling this flag, make sure your DNS server has enough capacity" + + " for the workload.") + .internal() + .booleanConf + .createWithDefault(false) + private[spark] val KUBERNETES_EXECUTOR_LIMIT_CORES = ConfigBuilder("spark.kubernetes.executor.limit.cores") .doc("Specify the hard cpu limit for a single executor pod") diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala index 51566d03a7a6c..17710fada2876 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManager.scala @@ -20,6 +20,7 @@ import java.net.InetAddress import scala.collection.mutable.ArrayBuffer +import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.scheduler.{TaskSchedulerImpl, TaskSet, TaskSetManager} private[spark] class KubernetesTaskSetManager( @@ -29,6 +30,8 @@ private[spark] class KubernetesTaskSetManager( inetAddressUtil: InetAddressUtil = new InetAddressUtil) extends TaskSetManager(sched, taskSet, maxTaskFailures) { + private val conf = sched.sc.conf + /** * Overrides the lookup to use not only the executor pod IP, but also the cluster node * name and host IP address that the pod is running on. The base class may have populated @@ -58,13 +61,19 @@ private[spark] class KubernetesTaskSetManager( s"$executorIP using cluster node IP $clusterNodeIP") pendingTasksClusterNodeIP } else { - val clusterNodeFullName = inetAddressUtil.getFullHostName(clusterNodeIP) - val pendingTasksClusterNodeFullName = super.getPendingTasksForHost(clusterNodeFullName) - if (pendingTasksClusterNodeFullName.nonEmpty) { - logDebug(s"Got preferred task list $pendingTasksClusterNodeFullName " + - s"for executor host $executorIP using cluster node full name $clusterNodeFullName") + if (conf.get(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED)) { + val clusterNodeFullName = inetAddressUtil.getFullHostName(clusterNodeIP) + val pendingTasksClusterNodeFullName = super.getPendingTasksForHost( + clusterNodeFullName) + if (pendingTasksClusterNodeFullName.nonEmpty) { + logDebug(s"Got preferred task list $pendingTasksClusterNodeFullName " + + s"for executor host $executorIP using cluster node full name " + + s"$clusterNodeFullName") + } + pendingTasksClusterNodeFullName + } else { + pendingTasksExecutorIP // Empty } - pendingTasksClusterNodeFullName } } } else { diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala index 7618c137ab22b..864ff40d88c5c 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesTaskSetManagerSuite.scala @@ -20,11 +20,13 @@ import scala.collection.mutable.ArrayBuffer import io.fabric8.kubernetes.api.model.{Pod, PodSpec, PodStatus} import org.mockito.Mockito._ +import org.scalatest.BeforeAndAfter import org.apache.spark.{SparkContext, SparkFunSuite} +import org.apache.spark.deploy.kubernetes.config._ import org.apache.spark.scheduler.{FakeTask, FakeTaskScheduler, HostTaskLocation, TaskLocation} -class KubernetesTaskSetManagerSuite extends SparkFunSuite { +class KubernetesTaskSetManagerSuite extends SparkFunSuite with BeforeAndAfter { val sc = new SparkContext("local", "test") val sched = new FakeTaskScheduler(sc, @@ -32,6 +34,10 @@ class KubernetesTaskSetManagerSuite extends SparkFunSuite { val backend = mock(classOf[KubernetesClusterSchedulerBackend]) sched.backend = backend + before { + sc.conf.remove(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED) + } + test("Find pending tasks for executors using executor pod IP addresses") { val taskSet = FakeTask.createTaskSet(3, Seq(TaskLocation("10.0.0.1", "execA")), // Task 0 runs on executor pod 10.0.0.1. @@ -76,7 +82,33 @@ class KubernetesTaskSetManagerSuite extends SparkFunSuite { assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer(1, 0)) } + test("Test DNS lookup is disabled by default for cluster node full hostnames") { + assert(!sc.conf.get(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED)) + } + + test("Find pending tasks for executors, but avoid looking up cluster node FQDNs from DNS") { + sc.conf.set(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED, false) + val taskSet = FakeTask.createTaskSet(2, + Seq(HostTaskLocation("kube-node1.domain1")), // Task 0's partition belongs to datanode here. + Seq(HostTaskLocation("kube-node1.domain1")) // task 1's partition belongs to datanode here. + ) + val spec1 = mock(classOf[PodSpec]) + when(spec1.getNodeName).thenReturn("kube-node1") + val pod1 = mock(classOf[Pod]) + when(pod1.getSpec).thenReturn(spec1) + val status1 = mock(classOf[PodStatus]) + when(status1.getHostIP).thenReturn("196.0.0.5") + when(pod1.getStatus).thenReturn(status1) + val inetAddressUtil = mock(classOf[InetAddressUtil]) + when(inetAddressUtil.getFullHostName("196.0.0.5")).thenReturn("kube-node1.domain1") + when(backend.getExecutorPodByIP("10.0.0.1")).thenReturn(Some(pod1)) + + val manager = new KubernetesTaskSetManager(sched, taskSet, maxTaskFailures = 2, inetAddressUtil) + assert(manager.getPendingTasksForHost("10.0.0.1") == ArrayBuffer()) + } + test("Find pending tasks for executors using cluster node FQDNs that executor pods run on") { + sc.conf.set(KUBERNETES_DRIVER_CLUSTER_NODENAME_DNS_LOOKUP_ENABLED, true) val taskSet = FakeTask.createTaskSet(2, Seq(HostTaskLocation("kube-node1.domain1")), // Task 0's partition belongs to datanode here. Seq(HostTaskLocation("kube-node1.domain1")) // task 1's partition belongs to datanode here. From bd50627d3474df9b6b2ddbd54611a4687794a801 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Tue, 8 Aug 2017 09:52:39 -0700 Subject: [PATCH 161/225] fixes #389 - increase SparkReadinessWatcher wait time (#419) --- .../kubernetes/integrationtest/SparkReadinessWatcher.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala index 20517eb2fc2a6..bd604ab94b936 100644 --- a/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala +++ b/resource-managers/kubernetes/integration-tests/src/test/scala/org/apache/spark/deploy/kubernetes/integrationtest/SparkReadinessWatcher.scala @@ -37,5 +37,5 @@ private[spark] class SparkReadinessWatcher[T <: HasMetadata] extends Watcher[T] override def onClose(cause: KubernetesClientException): Unit = {} - def waitUntilReady(): Boolean = signal.get(30, TimeUnit.SECONDS) + def waitUntilReady(): Boolean = signal.get(60, TimeUnit.SECONDS) } From 24cd9ee3e3dcd67bc8fc4f787babba67c34979c5 Mon Sep 17 00:00:00 2001 From: mccheah Date: Tue, 8 Aug 2017 14:27:58 -0700 Subject: [PATCH 162/225] Initial architecture documentation. (#401) * Initial architecture documentation. Initial full documentation for the submission client. Templates for the external shuffle service and the scheduler backend. * Add title to scheduler backend doc. * edits for PR review feedback --- .../external-shuffle-service.md | 6 + .../architecture-docs/scheduler-backend.md | 5 + .../architecture-docs/submission-client.md | 231 ++++++++++++++++++ 3 files changed, 242 insertions(+) create mode 100644 resource-managers/kubernetes/architecture-docs/external-shuffle-service.md create mode 100644 resource-managers/kubernetes/architecture-docs/scheduler-backend.md create mode 100644 resource-managers/kubernetes/architecture-docs/submission-client.md diff --git a/resource-managers/kubernetes/architecture-docs/external-shuffle-service.md b/resource-managers/kubernetes/architecture-docs/external-shuffle-service.md new file mode 100644 index 0000000000000..2f8f32ad4b3d8 --- /dev/null +++ b/resource-managers/kubernetes/architecture-docs/external-shuffle-service.md @@ -0,0 +1,6 @@ +--- +layout: global +title: Kubernetes Implementation of the External Shuffle Service +--- + + diff --git a/resource-managers/kubernetes/architecture-docs/scheduler-backend.md b/resource-managers/kubernetes/architecture-docs/scheduler-backend.md new file mode 100644 index 0000000000000..c057e8a4b849d --- /dev/null +++ b/resource-managers/kubernetes/architecture-docs/scheduler-backend.md @@ -0,0 +1,5 @@ +--- +layout: global +title: Kubernetes Implementation of the Spark Scheduler Backend +--- + diff --git a/resource-managers/kubernetes/architecture-docs/submission-client.md b/resource-managers/kubernetes/architecture-docs/submission-client.md new file mode 100644 index 0000000000000..022cf15c60be7 --- /dev/null +++ b/resource-managers/kubernetes/architecture-docs/submission-client.md @@ -0,0 +1,231 @@ +--- +layout: global +title: Implementation of Submitting Applications to Kubernetes +--- + + +Similarly to YARN and Standalone mode, it is common for Spark applications to be deployed on Kubernetes through the +`spark-submit` process. Applications are deployed on Kubernetes via sending YAML files to the Kubernetes API server. +These YAML files declare the structure and behavior of the processes that will be run. However, such a declarative +approach to application deployment differs considerably from how Spark applications are deployed via the `spark-submit` +API. There are contracts provided by `spark-submit` that should work in Kubernetes in a consistent manner to the other +cluster managers that `spark-submit` can deploy on. + +This document outlines the design of the **Kubernetes submission client**, which effectively serves as a *translation +of options provided in spark-submit to a specification of one or more Kubernetes API resources that represent the +Spark driver*. + +# Entry Point + +As with the other cluster managers, the user's invocation of `spark-submit` will eventually delegate to running +`org.apache.spark.deploy.SparkSubmit#submit`. This method calls a main method that handles the submission logic +for a specific type of cluster manager. The top level entry point for the Kubernetes submission logic is in +`org.apache.spark.deploy.kubernetes.submit.Client#main()`. + +# Driver Configuration Steps + +In order to render submission parameters into the final Kubernetes driver pod specification, and do it in a scalable +manner, the submission client breaks pod construction down into a +series of configuration steps, each of which is responsible for handling some specific aspect of configuring the driver. +A top level component then iterates through all of the steps to produce a final set of Kubernetes resources that are +then deployed on the cluster. + +## Interface Definitions + +More formally, a configuration step must implement the following trait: + +```scala +package org.apache.spark.deploy.kubernetes.submit.submitsteps + +/** + * Represents a step in preparing the Kubernetes driver. + */ +private[spark] trait DriverConfigurationStep { + + /** + * Apply some transformation to the previous state of the driver to add a new feature to it. + */ + def configureDriver(driverSpec: KubernetesDriverSpec): KubernetesDriverSpec +} +``` + +A `DriverConfigurationStep` is thus a function that transforms a `KubernetesDriverSpec` into another +`KubernetesDriverSpec`, by taking the original specification and making additions to the specification in accordance to +the specific feature that step is responsible for. A `KubernetesDriverSpec` is a data structure with the following +properties: + +```scala +private[spark] case class KubernetesDriverSpec( + driverPod: Pod, + driverContainer: Container, + otherKubernetesResources: Seq[HasMetadata], + driverSparkConf: SparkConf) +``` + +The `Pod` and `Container` classes are Java representations of Kubernetes pods and containers respectively, and the +`HasMetadata` type corresponds to an arbitrary Kubernetes API resource such as a `Secret` or a `ConfigMap`. Kubernetes +primitives are represented using an [open-source Java Kubernetes client](https://github.com/fabric8io/kubernetes-client). +The `otherKubernetesResources` field represents Kubernetes resources that are required by the Spark application. For +example, the driver may require a `ConfigMap` or `Secret` resource to be created that will be mounted into the driver +container. + +## Requirements for Configuration Steps + +Configuration steps must be *independent*. A given configuration step should not be opinionated about the other +configuration steps that are executed before or after it. By extension, configuration steps should be *strictly +additive*. A given configuration step should not attempt to mutate an existing field nor remove fields set in the +input driver specification. + +## Composition of Configuration Steps + +Finally, configuration steps are wired together by an **orchestrator**. The orchestrator effectively translates the +parameters sent to `spark-submit` into the set of steps required to configure the final `KubernetesDriverSpec`. The +top level submission client takes the final `KubernetesDriverSpec` object and builds the final requests to the +Kubernetes API server to deploy the Kubernetes resources that comprise the Spark driver. The top level submission +process can thus be expressed as follows in pseudo-code with roughly Scala syntax: + +```scala +def runApplication(sparkSubmitArguments: SparkSubmitArguments) { + val initialSpec = createEmptyDriverSpec() + val orchestrator = new DriverConfigurationStepsOrchestrator(sparkSubmitArguments) + val steps = orchestrator.getSubmissionSteps() + var currentSpec = initialSpec + // iteratively apply the configuration steps to build up the pod spec: + for (step <- steps) { + currentSpec = step.configureDriver(currentSpec) + } + // Put the container in the pod spec + val resolvedPod = attachContainer(currentSpec.driverPod, currentSpec.driverContainer) + kubernetes.create(resolvedPod + currentSpec.otherKubernetesResources) +} +``` + +## Writing a New Configuration Step + +All configuration steps should be placed in the `org.apache.spark.deploy.kubernetes.submit.submitsteps` package. +Examples of other configuration steps can be found in this package as well. Ensure that the new configuration step is +returned in `org.apache.spark.deploy.kubernetes.submit.DriverConfigurationStepsOrchestrator#getAllConfigurationSteps()`. + +# Dependency Management + +Spark applications typically depend on binaries and various configuration files which are hosted in various locations. +Kubernetes applications typically bundle binary dependencies such as jars inside Docker images. However, Spark's API +fundamentally allows dependencies to be provided from many other locations, including the submitter's local disk. +These dependencies have to be deployed into the driver and executor containers before they run. This is challenging +because unlike Hadoop YARN which requires co-deployment with an HDFS cluster, Kubernetes clusters do not have a +large-scale persistent storage layer that would be available across every Kubernetes cluster. + +## Resource Staging Server + +The *resource staging server* is a lightweight daemon that serves as a file store for application dependencies. It has +two endpoints which effectively correspond to putting files into the server and getting files out of the server. When +files are put into the server, the server returns a unique identifier and a secret token in the response to the client. +This identifier and secret token must be provided when a client makes a request to retrieve the files that were uploaded +to the server. + +### Resource Staging Server API Definition + +The resource staging server has the following Scala API which would then be translated into HTTP endpoints via Jetty and +JAX-RS. Associated structures passed as input and output are also defined below: + +```scala +private[spark] trait ResourceStagingService { + + def uploadResources(resources: InputStream, resourcesOwner: StagedResourcesOwner): SubmittedResourceIdAndSecret + def downloadResources(resourceId: String, resourceSecret: String): StreamingOutput +} + +case class StagedResourcesOwner( + ownerNamespace: String, + ownerLabels: Map[String, String], + ownerType: StagedResourcesOwnerType.OwnerType) + +// Pseudo-code to represent an enum +enum StagedResourcesOwnerType.OwnerType = { Pod } + +case class SubmittedResourceIdAndSecret(resourceId: String, resourceSecret: String) +``` + +Clients that send resources to the server do so in a streaming manner so that both the server and the client do not +need to hold the entire resource bundle in memory. Aside from the notion of the `StagedResourcesOwner` that is provided +on uploads and not for downloads, uploading is symmetrical to downloading. The significance of the +`StagedResourcesOwner` is discussed below. + +### Cleaning Up Stale Resources + +The resource staging server is built to provide resources for the pods and containers in a Kubernetes cluster. These +pods are ephemeral, so at some point there will be no need for the resources that were sent for a specific application. +Clients indicate the set of resources that would be using a given resource bundle by providing a description of the +resource's "owner". The `StagedResourceOwner` is this description, defining the owner as a Kubernetes API object in +a given namespace and having a specific set of labels. + +The resource staging server keeps track of the resources that were sent to it. When the resource is first uploaded, it +is marked as "unused". If the resource remains unused for a period of time, it is cleaned up. A resource is marked as +"used" when a request is made to download it. After that, the server periodically checks the API server to see if any +Kubernetes API objects exist that match the description of the owner. If no such objects exist, then resource staging +server cleans up the uploaded resource. See `org.apache.spark.deploy.rest.kubernetes.StagedResourcesCleaner` for the +code that manages the resource's lifecycle. + +A resource owner can currently only be a pod, but hypothetically one could want to tie the lifetime of a resource to the +lifetime of many pods under a higher level Kubernetes object like a Deployment or a StatefulSet, all of which depend on +the uploaded resource. The resource staging server's API can be extended to tie ownership of a resource to any +Kubernetes API object type, as long as we update the `StagedResourcesOwnerType` enumeration accordingly. + +### Usage in Spark + +Spark-submit supports adding jars and files by passing `--jars` and `--files` to `spark-submit` respectively. The spark +configurations `spark.jars` and `spark.files` can also be set to provide this information. The submission client +determines the list of jars and files that the application needs, and it determines if any of them are files being sent +from the submitter's local machine. If any files are being sent from the local machine, the user must have specified a +URL for the resource staging server to send the files to. + +Local jars and files are compacted into a tarball which are then uploaded to the resource staging server. The submission +client then knows the secret token that the driver and executors must use to download the files again. These secrets +are mounted into an [init-container](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) +that runs before the driver and executor processes run, and the init-container +downloads the uploaded resources from the resource staging server. + +### Other Considered Alternatives + +The resource staging server was considered the best option among other alternative solutions to this problem. + +A first implementation effectively included the resource staging server in the driver container itself. The driver +container ran a custom command that opened an HTTP endpoint and waited for the submission client to send resources to +it. The server would then run the driver application after it had received the resources from the user's local +machine. The problem with this approach is that the submission client needs to deploy the driver in such a way that the +driver itself would be reachable from outside of the cluster, but it is difficult for an automated framework which is +not aware of the cluster's configuration to expose an arbitrary pod in a generic way. The resource staging server allows +a cluster administrator to expose the resource staging server in a manner that makes sense for their cluster, such as +with an Ingress or with a NodePort service. + +It is also impossible to use Kubernetes API objects like Secrets or ConfigMaps to store application binaries. These +objects require their contents to be small so that they can fit in etcd. + +Finally, as mentioned before, the submission client should not be opinionated about storing dependencies in a +distributed storage system like HDFS, because not all Kubernetes clusters will have the same types of persistent storage +layers. Spark supports fetching jars directly from distributed storage layers though, so users can feel free to manually +push their dependencies to their appropriate systems and refer to them by their remote URIs in the submission request. + +## Init-Containers + +The driver pod and executor pods both use [init-containers](https://kubernetes.io/docs/concepts/workloads/pods/init-containers/) +to localize resources before the driver and +executor processes launch. As mentioned before, the init-container fetches dependencies from the resource staging +server. However, even if the resource staging server is not being used, files still need to be localized from remote +locations such as HDFS clusters or HTTP file servers. The init-container will fetch these dependencies accordingly as +well. + +Init-containers were preferred over fetching the dependencies in the main container primarily because this allows the +main container's runtime commands to be simplified. Using init-containers to fetch these remote dependencies allows the +main image command to simply be an invocation of Java that runs the user's main class directly. The execution of the +file localizer process can also be shared by both the driver and the executor images without needing to be copied +into both image commands. Finally, it becomes easier to debug localization failures as they will be easily spotted as +being a failure in the pod's initialization lifecycle phase. + +# Future Work + +* The driver's pod specification should be highly customizable, to the point where users may want to specify a template +pod spec in a YAML file: https://github.com/apache-spark-on-k8s/spark/issues/38. +* The resource staging server can be backed by a distributed file store like HDFS to improve robustness and scalability. +* Additional driver bootstrap steps need to be added to support communication with Kerberized HDFS clusters: + * https://github.com/apache-spark-on-k8s/spark/pull/391 From 372ae4115f00c2bfb4233c7e1e7eaac9e76954d1 Mon Sep 17 00:00:00 2001 From: tangzhankun Date: Thu, 10 Aug 2017 00:31:01 +0800 Subject: [PATCH 163/225] Allow configuration to set environment variables on driver and executor (#424) * allow configuration to set environment variables on driver and executor as below: --conf spark.executorEnv.[EnvironmentVariableName] --conf spark.driverEnv.[EnvironmentVariableName] * change the driver environment key prefix to spark.kubernetes.driverEnv. --- docs/running-on-kubernetes.md | 16 ++++++++++++++++ .../apache/spark/deploy/kubernetes/config.scala | 2 ++ .../BaseDriverConfigurationStep.scala | 8 ++++++++ .../KubernetesClusterSchedulerBackend.scala | 4 ++-- .../BaseDriverConfigurationStepSuite.scala | 9 ++++++++- 5 files changed, 36 insertions(+), 3 deletions(-) diff --git a/docs/running-on-kubernetes.md b/docs/running-on-kubernetes.md index 5e23801e15b10..4286ab19eb3ad 100644 --- a/docs/running-on-kubernetes.md +++ b/docs/running-on-kubernetes.md @@ -768,6 +768,22 @@ from the other deployment modes. See the [configuration page](configuration.html myIdentifier. Multiple node selector keys can be added by setting multiple configurations with this prefix. + + spark.executorEnv.[EnvironmentVariableName] + (none) + + Add the environment variable specified by EnvironmentVariableName to + the Executor process. The user can specify multiple of these to set multiple environment variables. + + + + spark.kubernetes.driverEnv.[EnvironmentVariableName] + (none) + + Add the environment variable specified by EnvironmentVariableName to + the Driver process. The user can specify multiple of these to set multiple environment variables. + + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala index f9c4c9c6a1e18..6e1633f6a63cb 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/config.scala @@ -126,6 +126,8 @@ package object config extends Logging { .stringConf .createOptional + private[spark] val KUBERNETES_DRIVER_ENV_KEY = "spark.kubernetes.driverEnv." + private[spark] val KUBERNETES_DRIVER_ANNOTATIONS = ConfigBuilder("spark.kubernetes.driver.annotations") .doc("Custom annotations that will be added to the driver pod. This should be a" + diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala index b3f509b44054e..a8539e0772163 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStep.scala @@ -72,6 +72,13 @@ private[spark] class BaseDriverConfigurationStep( require(!driverCustomAnnotations.contains(SPARK_APP_NAME_ANNOTATION), s"Annotation with key $SPARK_APP_NAME_ANNOTATION is not allowed as it is reserved for" + s" Spark bookkeeping operations.") + + val driverCustomEnvs = submissionSparkConf.getAllWithPrefix(KUBERNETES_DRIVER_ENV_KEY).toSeq + .map(env => new EnvVarBuilder() + .withName(env._1) + .withValue(env._2) + .build()) + val allDriverAnnotations = driverCustomAnnotations ++ Map(SPARK_APP_NAME_ANNOTATION -> appName) val nodeSelector = ConfigurationUtils.parsePrefixedKeyValuePairs( submissionSparkConf, KUBERNETES_NODE_SELECTOR_PREFIX, "node selector") @@ -91,6 +98,7 @@ private[spark] class BaseDriverConfigurationStep( .withName(DRIVER_CONTAINER_NAME) .withImage(driverDockerImage) .withImagePullPolicy(dockerImagePullPolicy) + .addAllToEnv(driverCustomEnvs.asJava) .addToEnv(driverExtraClasspathEnv.toSeq: _*) .addNewEnv() .withName(ENV_DRIVER_MEMORY) diff --git a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala index a50a9c8bb9c3b..4eae6ee3184ba 100644 --- a/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala +++ b/resource-managers/kubernetes/core/src/main/scala/org/apache/spark/scheduler/cluster/kubernetes/KubernetesClusterSchedulerBackend.scala @@ -455,7 +455,7 @@ private[spark] class KubernetesClusterSchedulerBackend( .withValue(cp) .build() } - val requiredEnv = Seq( + val requiredEnv = (Seq( (ENV_EXECUTOR_PORT, executorPort.toString), (ENV_DRIVER_URL, driverUrl), // Executor backend expects integral value for executor cores, so round it up to an int. @@ -463,7 +463,7 @@ private[spark] class KubernetesClusterSchedulerBackend( (ENV_EXECUTOR_MEMORY, executorMemoryString), (ENV_APPLICATION_ID, applicationId()), (ENV_EXECUTOR_ID, executorId), - (ENV_MOUNTED_CLASSPATH, s"$executorJarsDownloadDir/*")) + (ENV_MOUNTED_CLASSPATH, s"$executorJarsDownloadDir/*")) ++ sc.executorEnvs.toSeq) .map(env => new EnvVarBuilder() .withName(env._1) .withValue(env._2) diff --git a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala index c7d80a16a1532..4520c40ec81c1 100644 --- a/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala +++ b/resource-managers/kubernetes/core/src/test/scala/org/apache/spark/deploy/kubernetes/submit/submitsteps/BaseDriverConfigurationStepSuite.scala @@ -36,6 +36,8 @@ private[spark] class BaseDriverConfigurationStepSuite extends SparkFunSuite { private val CUSTOM_ANNOTATION_VALUE = "customAnnotationValue" private val DEPRECATED_CUSTOM_ANNOTATION_KEY = "customAnnotationDeprecated" private val DEPRECATED_CUSTOM_ANNOTATION_VALUE = "customAnnotationDeprecatedValue" + private val DRIVER_CUSTOM_ENV_KEY1 = "customDriverEnv1" + private val DRIVER_CUSTOM_ENV_KEY2 = "customDriverEnv2" test("Set all possible configurations from the user.") { val sparkConf = new SparkConf() @@ -49,6 +51,9 @@ private[spark] class BaseDriverConfigurationStepSuite extends SparkFunSuite { .set(s"spark.kubernetes.driver.annotation.$CUSTOM_ANNOTATION_KEY", CUSTOM_ANNOTATION_VALUE) .set("spark.kubernetes.driver.annotations", s"$DEPRECATED_CUSTOM_ANNOTATION_KEY=$DEPRECATED_CUSTOM_ANNOTATION_VALUE") + .set(s"$KUBERNETES_DRIVER_ENV_KEY$DRIVER_CUSTOM_ENV_KEY1", "customDriverEnv1") + .set(s"$KUBERNETES_DRIVER_ENV_KEY$DRIVER_CUSTOM_ENV_KEY2", "customDriverEnv2") + val submissionStep = new BaseDriverConfigurationStep( APP_ID, RESOURCE_NAME_PREFIX, @@ -74,11 +79,13 @@ private[spark] class BaseDriverConfigurationStepSuite extends SparkFunSuite { .asScala .map(env => (env.getName, env.getValue)) .toMap - assert(envs.size === 4) + assert(envs.size === 6) assert(envs(ENV_SUBMIT_EXTRA_CLASSPATH) === "/opt/spark/spark-exmaples.jar") assert(envs(ENV_DRIVER_MEMORY) === "456m") assert(envs(ENV_DRIVER_MAIN_CLASS) === MAIN_CLASS) assert(envs(ENV_DRIVER_ARGS) === "arg1 arg2") + assert(envs(DRIVER_CUSTOM_ENV_KEY1) === "customDriverEnv1") + assert(envs(DRIVER_CUSTOM_ENV_KEY2) === "customDriverEnv2") val resourceRequirements = preparedDriverSpec.driverContainer.getResources val requests = resourceRequirements.getRequests.asScala assert(requests("cpu").getAmount === "2") From 410dc9cf84cf53a5b0ee588fe8035f6e7318139f Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Wed, 9 Aug 2017 15:53:18 -0700 Subject: [PATCH 164/225] version 2.2.0-k8s-0.3.0 --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 7835481531216..903ac352bdade 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 pom Spark Project Parent POM http://spark.apache.org/ From 737abdcc874ee0d907cafaba83a7f2c9a94944d8 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Wed, 9 Aug 2017 15:54:10 -0700 Subject: [PATCH 165/225] bump to 2.2.0-k8s-0.4.0-SNAPSHOT --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 903ac352bdade..9eebe92dbb0df 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ From a46b4a35c69253a27a3ec701b45f9a666468a9e6 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Thu, 10 Aug 2017 10:57:11 -0700 Subject: [PATCH 166/225] Revert "bump to 2.2.0-k8s-0.4.0-SNAPSHOT" This reverts commit 737abdcc874ee0d907cafaba83a7f2c9a94944d8. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9eebe92dbb0df..903ac352bdade 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.4.0-SNAPSHOT + 2.2.0-k8s-0.3.0 pom Spark Project Parent POM http://spark.apache.org/ From ff601a3bbc8dfd19be35e5ab78cd8516ebc378ef Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Thu, 10 Aug 2017 10:57:44 -0700 Subject: [PATCH 167/225] Revert "version 2.2.0-k8s-0.3.0" This reverts commit 410dc9cf84cf53a5b0ee588fe8035f6e7318139f. --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 903ac352bdade..7835481531216 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.3.0-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ From 19f49d03d6ea098eb59a60b5a16b6a57c54d67f9 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Thu, 10 Aug 2017 11:03:49 -0700 Subject: [PATCH 168/225] version 2.2.0-k8s-0.3.0 --- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs-helpers/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 41 insertions(+), 41 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 13930630f2338..7230f706fe249 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index 2d29bfc8ea89a..b0619ebc7612a 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index b837c8a2be8a4..6c8f7375e2c8b 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 45e128b6e1cfd..3386ef2f80633 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 121b2489fbb72..48f8ff14b0186 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 31d0d2efc654a..8d7ab46077045 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index 78eaf8624df93..d8b50606363b3 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index d80b2591d80c3..8c2e6881f8b13 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml diff --git a/examples/pom.xml b/examples/pom.xml index d2135bf5ff192..81d90c8205566 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index fdf7611936346..20736b1c09e2e 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 3ae7d254c95a1..9302bdb2c9458 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 89e61be25d8c9..1f01ee418095d 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 897af93b6b8a2..1b0af1f7724e1 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml index 5513dfea0f281..672146f9127aa 100644 --- a/external/java8-tests/pom.xml +++ b/external/java8-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index d836fffdb56b6..28d8b13114c1c 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index 10419a1275f73..a49b086585648 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index a0a7b26f2b71e..2c86740fdc5b3 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml index 4cd40dbe89689..7d9bae1c9aec8 100644 --- a/external/kafka-0-8-assembly/pom.xml +++ b/external/kafka-0-8-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml index 57ad12dc70709..8fe37013eefca 100644 --- a/external/kafka-0-8/pom.xml +++ b/external/kafka-0-8/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index 240db2098d1f8..ae2126c333c17 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index c6a7ceb80c465..597ebbd9d5bc8 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index cf23a63e23cbe..af817b948b427 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index 8fddc5bfee0f1..ba24cc4570e37 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 705675af7a01f..75d4086a56dfb 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 155adae6afa61..016dbf40cd667 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 25c38e2281eff..4090776631f15 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml diff --git a/pom.xml b/pom.xml index 7835481531216..903ac352bdade 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 pom Spark Project Parent POM http://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index c798170e81ba7..38beba43e68c5 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 3feecb09230bc..5e079667cf384 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 5f7683280cccf..20c0a7edbd9aa 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index 9672f49448c0c..26787a4d39fe3 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 80b0f57a2fcb9..78ba9577c3486 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 0da1e38d8c211..36cb17b8f7b07 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 72ba1417bd1e7..031b040372bb7 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index c65806e293f25..b29161672f865 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 0aec944d693dd..21f9439dafcef 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index 5943112510b0b..a20362c38f9ee 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index 65c8f520b0f8d..be03f2472e31a 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index 73a68b32d8168..a75c10fdf9a32 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 51789b826b673..29f2b6e30e20b 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 066d74c1e286d..1185411c8ec76 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0-SNAPSHOT + 2.2.0-k8s-0.3.0 ../pom.xml From 982760c30c4c3e41c2f136dbe3acbe9462a3cd31 Mon Sep 17 00:00:00 2001 From: Erik Erlandson Date: Thu, 10 Aug 2017 11:05:42 -0700 Subject: [PATCH 169/225] bump to 2.2.0-k8s-0.4.0-SNAPSHOT --- assembly/pom.xml | 2 +- common/network-common/pom.xml | 2 +- common/network-shuffle/pom.xml | 2 +- common/network-yarn/pom.xml | 2 +- common/sketch/pom.xml | 2 +- common/tags/pom.xml | 2 +- common/unsafe/pom.xml | 2 +- core/pom.xml | 2 +- examples/pom.xml | 2 +- external/docker-integration-tests/pom.xml | 2 +- external/flume-assembly/pom.xml | 2 +- external/flume-sink/pom.xml | 2 +- external/flume/pom.xml | 2 +- external/java8-tests/pom.xml | 2 +- external/kafka-0-10-assembly/pom.xml | 2 +- external/kafka-0-10-sql/pom.xml | 2 +- external/kafka-0-10/pom.xml | 2 +- external/kafka-0-8-assembly/pom.xml | 2 +- external/kafka-0-8/pom.xml | 2 +- external/kinesis-asl-assembly/pom.xml | 2 +- external/kinesis-asl/pom.xml | 2 +- external/spark-ganglia-lgpl/pom.xml | 2 +- graphx/pom.xml | 2 +- launcher/pom.xml | 2 +- mllib-local/pom.xml | 2 +- mllib/pom.xml | 2 +- pom.xml | 2 +- repl/pom.xml | 2 +- resource-managers/kubernetes/core/pom.xml | 2 +- resource-managers/kubernetes/docker-minimal-bundle/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs-helpers/pom.xml | 2 +- .../kubernetes/integration-tests-spark-jobs/pom.xml | 2 +- resource-managers/kubernetes/integration-tests/pom.xml | 2 +- resource-managers/mesos/pom.xml | 2 +- resource-managers/yarn/pom.xml | 2 +- sql/catalyst/pom.xml | 2 +- sql/core/pom.xml | 2 +- sql/hive-thriftserver/pom.xml | 2 +- sql/hive/pom.xml | 2 +- streaming/pom.xml | 2 +- tools/pom.xml | 2 +- 41 files changed, 41 insertions(+), 41 deletions(-) diff --git a/assembly/pom.xml b/assembly/pom.xml index 7230f706fe249..29ab454fee253 100644 --- a/assembly/pom.xml +++ b/assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml diff --git a/common/network-common/pom.xml b/common/network-common/pom.xml index b0619ebc7612a..f437084ec6dd5 100644 --- a/common/network-common/pom.xml +++ b/common/network-common/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/common/network-shuffle/pom.xml b/common/network-shuffle/pom.xml index 6c8f7375e2c8b..b80c1df889731 100644 --- a/common/network-shuffle/pom.xml +++ b/common/network-shuffle/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/common/network-yarn/pom.xml b/common/network-yarn/pom.xml index 3386ef2f80633..ae9ec59180486 100644 --- a/common/network-yarn/pom.xml +++ b/common/network-yarn/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/common/sketch/pom.xml b/common/sketch/pom.xml index 48f8ff14b0186..60549568be0ef 100644 --- a/common/sketch/pom.xml +++ b/common/sketch/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/common/tags/pom.xml b/common/tags/pom.xml index 8d7ab46077045..764be312470cf 100644 --- a/common/tags/pom.xml +++ b/common/tags/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/common/unsafe/pom.xml b/common/unsafe/pom.xml index d8b50606363b3..d0b543137781f 100644 --- a/common/unsafe/pom.xml +++ b/common/unsafe/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/core/pom.xml b/core/pom.xml index 8c2e6881f8b13..a152b27284abf 100644 --- a/core/pom.xml +++ b/core/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml diff --git a/examples/pom.xml b/examples/pom.xml index 81d90c8205566..3f7814d060526 100644 --- a/examples/pom.xml +++ b/examples/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml diff --git a/external/docker-integration-tests/pom.xml b/external/docker-integration-tests/pom.xml index 20736b1c09e2e..f90eb57c64dfe 100644 --- a/external/docker-integration-tests/pom.xml +++ b/external/docker-integration-tests/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/flume-assembly/pom.xml b/external/flume-assembly/pom.xml index 9302bdb2c9458..8592d85ccbd3f 100644 --- a/external/flume-assembly/pom.xml +++ b/external/flume-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/flume-sink/pom.xml b/external/flume-sink/pom.xml index 1f01ee418095d..ef0231990c3c9 100644 --- a/external/flume-sink/pom.xml +++ b/external/flume-sink/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/flume/pom.xml b/external/flume/pom.xml index 1b0af1f7724e1..606116ad0860c 100644 --- a/external/flume/pom.xml +++ b/external/flume/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/java8-tests/pom.xml b/external/java8-tests/pom.xml index 672146f9127aa..586f87d312ec0 100644 --- a/external/java8-tests/pom.xml +++ b/external/java8-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-assembly/pom.xml b/external/kafka-0-10-assembly/pom.xml index 28d8b13114c1c..39e1339bbf2a1 100644 --- a/external/kafka-0-10-assembly/pom.xml +++ b/external/kafka-0-10-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10-sql/pom.xml b/external/kafka-0-10-sql/pom.xml index a49b086585648..68ee5be2889b2 100644 --- a/external/kafka-0-10-sql/pom.xml +++ b/external/kafka-0-10-sql/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-10/pom.xml b/external/kafka-0-10/pom.xml index 2c86740fdc5b3..23e4eef8253b2 100644 --- a/external/kafka-0-10/pom.xml +++ b/external/kafka-0-10/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8-assembly/pom.xml b/external/kafka-0-8-assembly/pom.xml index 7d9bae1c9aec8..aa36d4a713e1e 100644 --- a/external/kafka-0-8-assembly/pom.xml +++ b/external/kafka-0-8-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/kafka-0-8/pom.xml b/external/kafka-0-8/pom.xml index 8fe37013eefca..af7de9380e271 100644 --- a/external/kafka-0-8/pom.xml +++ b/external/kafka-0-8/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl-assembly/pom.xml b/external/kinesis-asl-assembly/pom.xml index ae2126c333c17..ced0e89bdff16 100644 --- a/external/kinesis-asl-assembly/pom.xml +++ b/external/kinesis-asl-assembly/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/kinesis-asl/pom.xml b/external/kinesis-asl/pom.xml index 597ebbd9d5bc8..0ae897f62dda3 100644 --- a/external/kinesis-asl/pom.xml +++ b/external/kinesis-asl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/external/spark-ganglia-lgpl/pom.xml b/external/spark-ganglia-lgpl/pom.xml index af817b948b427..2744a9fdea489 100644 --- a/external/spark-ganglia-lgpl/pom.xml +++ b/external/spark-ganglia-lgpl/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/graphx/pom.xml b/graphx/pom.xml index ba24cc4570e37..e59db233b54d8 100644 --- a/graphx/pom.xml +++ b/graphx/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml diff --git a/launcher/pom.xml b/launcher/pom.xml index 75d4086a56dfb..9df0c2d79a5fb 100644 --- a/launcher/pom.xml +++ b/launcher/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml diff --git a/mllib-local/pom.xml b/mllib-local/pom.xml index 016dbf40cd667..d120f8f3fc4f0 100644 --- a/mllib-local/pom.xml +++ b/mllib-local/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml diff --git a/mllib/pom.xml b/mllib/pom.xml index 4090776631f15..755f1cfe954f0 100644 --- a/mllib/pom.xml +++ b/mllib/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml diff --git a/pom.xml b/pom.xml index 903ac352bdade..9eebe92dbb0df 100644 --- a/pom.xml +++ b/pom.xml @@ -26,7 +26,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT pom Spark Project Parent POM http://spark.apache.org/ diff --git a/repl/pom.xml b/repl/pom.xml index 38beba43e68c5..c042a69c14644 100644 --- a/repl/pom.xml +++ b/repl/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml diff --git a/resource-managers/kubernetes/core/pom.xml b/resource-managers/kubernetes/core/pom.xml index 5e079667cf384..dfbf8adc5bc7b 100644 --- a/resource-managers/kubernetes/core/pom.xml +++ b/resource-managers/kubernetes/core/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml index 20c0a7edbd9aa..585d8d474bba5 100644 --- a/resource-managers/kubernetes/docker-minimal-bundle/pom.xml +++ b/resource-managers/kubernetes/docker-minimal-bundle/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml index 26787a4d39fe3..2e877daf95c63 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs-helpers/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml index 78ba9577c3486..276bbabdf32e1 100644 --- a/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml +++ b/resource-managers/kubernetes/integration-tests-spark-jobs/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/kubernetes/integration-tests/pom.xml b/resource-managers/kubernetes/integration-tests/pom.xml index 36cb17b8f7b07..fc28026de1e43 100644 --- a/resource-managers/kubernetes/integration-tests/pom.xml +++ b/resource-managers/kubernetes/integration-tests/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../../pom.xml diff --git a/resource-managers/mesos/pom.xml b/resource-managers/mesos/pom.xml index 031b040372bb7..db492f57120e5 100644 --- a/resource-managers/mesos/pom.xml +++ b/resource-managers/mesos/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/resource-managers/yarn/pom.xml b/resource-managers/yarn/pom.xml index b29161672f865..a59cf8c5eb5ac 100644 --- a/resource-managers/yarn/pom.xml +++ b/resource-managers/yarn/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/sql/catalyst/pom.xml b/sql/catalyst/pom.xml index 21f9439dafcef..60fed70731bfd 100644 --- a/sql/catalyst/pom.xml +++ b/sql/catalyst/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/sql/core/pom.xml b/sql/core/pom.xml index a20362c38f9ee..4b55cc859005c 100644 --- a/sql/core/pom.xml +++ b/sql/core/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive-thriftserver/pom.xml b/sql/hive-thriftserver/pom.xml index be03f2472e31a..caa5da8b4d8b2 100644 --- a/sql/hive-thriftserver/pom.xml +++ b/sql/hive-thriftserver/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/sql/hive/pom.xml b/sql/hive/pom.xml index a75c10fdf9a32..df9535de6677b 100644 --- a/sql/hive/pom.xml +++ b/sql/hive/pom.xml @@ -22,7 +22,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../../pom.xml diff --git a/streaming/pom.xml b/streaming/pom.xml index 29f2b6e30e20b..e5ffe7af8c044 100644 --- a/streaming/pom.xml +++ b/streaming/pom.xml @@ -21,7 +21,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml diff --git a/tools/pom.xml b/tools/pom.xml index 1185411c8ec76..7ca0a9bb2d3a3 100644 --- a/tools/pom.xml +++ b/tools/pom.xml @@ -20,7 +20,7 @@ org.apache.spark spark-parent_2.11 - 2.2.0-k8s-0.3.0 + 2.2.0-k8s-0.4.0-SNAPSHOT ../pom.xml From cb645cae5c5841703d0f937bcaabe51975d28d92 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 14 Aug 2017 11:41:28 -0700 Subject: [PATCH 170/225] Update external shuffle service docs --- .../external-shuffle-service.md | 23 +++++++++++++++++++ 1 file changed, 23 insertions(+) diff --git a/resource-managers/kubernetes/architecture-docs/external-shuffle-service.md b/resource-managers/kubernetes/architecture-docs/external-shuffle-service.md index 2f8f32ad4b3d8..e303800943d42 100644 --- a/resource-managers/kubernetes/architecture-docs/external-shuffle-service.md +++ b/resource-managers/kubernetes/architecture-docs/external-shuffle-service.md @@ -3,4 +3,27 @@ layout: global title: Kubernetes Implementation of the External Shuffle Service --- +# External Shuffle Service +The `KubernetesExternalShuffleService` was added to allow Spark to use Dynamic Allocation Mode when +running in Kubernetes. The shuffle service is responsible for persisting shuffle files beyond the +lifetime of the executors, allowing the number of executors to scale up and down without losing +computation. + +The implementation of choice is as a DaemonSet that runs a shuffle-service pod on each node. +Shuffle-service pods and executors pods that land on the same node share disk using hostpath +volumes. Spark requires that each executor must know the IP address of the shuffle-service pod that +shares disk with it. + +The user specifies the shuffle service pods they want executors of a particular SparkJob to use +through two new properties: + +* spark.kubernetes.shuffle.service.labels +* spark.kubernetes.shuffle.namespace + +KubernetesClusterSchedulerBackend is aware of shuffle service pods and the node corresponding to +them in a particular namespace. It uses this data to configure the executor pods to connect with the +shuffle services that are co-located with them on the same node. + +There is additional logic in the `KubernetesExternalShuffleService` to watch the Kubernetes API, +detect failures, and proactively cleanup files in those error cases. From 437eb89cc8bd0b2fda73ee7db2747a2db452b2b5 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 14 Aug 2017 14:10:05 -0600 Subject: [PATCH 171/225] Updated with documentation (#430) Direct copy of revised design doc --- .../architecture-docs/scheduler-backend.md | 44 +++++++++++++++++++ 1 file changed, 44 insertions(+) diff --git a/resource-managers/kubernetes/architecture-docs/scheduler-backend.md b/resource-managers/kubernetes/architecture-docs/scheduler-backend.md index c057e8a4b849d..cc43bd7816a8b 100644 --- a/resource-managers/kubernetes/architecture-docs/scheduler-backend.md +++ b/resource-managers/kubernetes/architecture-docs/scheduler-backend.md @@ -3,3 +3,47 @@ layout: global title: Kubernetes Implementation of the Spark Scheduler Backend --- +# Scheduler Backend + +The general idea is to run Spark drivers and executors inside Kubernetes [Pods](https://kubernetes.io/docs/concepts/workloads/pods/pod/). +Pods are a co-located and co-scheduled group of one or more containers run in a shared context. The main component is KubernetesClusterSchedulerBackend, +an implementation of CoarseGrainedSchedulerBackend, which manages allocating and destroying executors via the Kubernetes API. +There are auxiliary and optional components: `ResourceStagingServer` and `KubernetesExternalShuffleService`, which serve specific purposes described further below. + +The scheduler backend is invoked in the driver associated with a particular job. The driver may run outside the cluster (client mode) or within (cluster mode). +The scheduler backend manages [pods](http://kubernetes.io/docs/user-guide/pods/) for each executor. +The executor code is running within a Kubernetes pod, but remains unmodified and unaware of the orchestration layer. +When a job is running, the scheduler backend configures and creates executor pods with the following properties: + +- The pod's container runs a pre-built Docker image containing a Spark distribution (with Kubernetes integration) and +invokes the Java runtime with the CoarseGrainedExecutorBackend main class. +- The scheduler backend specifies environment variables on the executor pod to configure its runtime, p +articularly for its JVM options, number of cores, heap size, and the driver's hostname. +- The executor container has [resource limits and requests](https://kubernetes.io/docs/concepts/configuration/manage-compute-resources-container/#resource-requests-and-limits-of-pod-and-container) +that are set in accordance to the resource limits specified in the Spark configuration (executor.cores and executor.memory in the application's SparkConf) +- The executor pods may also be launched into a particular [Kubernetes namespace](https://kubernetes.io/docs/concepts/overview/working-with-objects/namespaces/%5C), +or target a particular subset of nodes in the Kubernetes cluster, based on the Spark configuration supplied. + +## Requesting Executors + +Spark requests for new executors through the `doRequestTotalExecutors(numExecutors: Int)` method. +The scheduler backend keeps track of the request made by Spark core for the number of executors. + +A separate kubernetes-pod-allocator thread handles the creation of new executor pods with appropriate throttling and monitoring. +This indirection is required because the Kubernetes API Server accepts requests for new executor pods optimistically, with the +anticipation of being able to eventually run them. However, it is undesirable to have a very large number of pods that cannot be +scheduled and stay pending within the cluster. Hence, the kubernetes-pod-allocator uses the Kubernetes API to make a decision to +submit new requests for executors based on whether previous pod creation requests have completed. This gives us control over how +fast a job scales up (which can be configured), and helps prevent Spark jobs from DOS-ing the Kubernetes API server with pod creation requests. + +## Destroying Executors + +Spark requests deletion of executors through the `doKillExecutors(executorIds: List[String])` +method. + +The inverse behavior is required in the implementation of doKillExecutors(). When the executor +allocation manager desires to remove executors from the application, the scheduler should find the +pods that are running the appropriate executors, and tell the API server to stop these pods. +It's worth noting that this code does not have to decide on the executors that should be +removed. When `doKillExecutors()` is called, the executors that are to be removed have already been +selected by the CoarseGrainedSchedulerBackend and ExecutorAllocationManager. From 3b3aeb7370ff974035259e37d99ea767741b8997 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 14 Aug 2017 15:20:29 -0600 Subject: [PATCH 172/225] Link to architecture docs (#432) --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index a5109f4e12897..9905e6c9f5751 100644 --- a/README.md +++ b/README.md @@ -10,6 +10,7 @@ This is a collaboratively maintained project working on [SPARK-18278](https://is - [Usage guide](https://apache-spark-on-k8s.github.io/userdocs/) shows how to run the code - [Development docs](resource-managers/kubernetes/README.md) shows how to get set up for development +- [Architecture docs](resource-managers/kubernetes/architecture-docs/) shows the high level architecture of Spark on Kubernetes - Code is primarily located in the [resource-managers/kubernetes](resource-managers/kubernetes) folder ## Why does this fork exist? From 6e1d69e6ebed01970906b191d67acb99d8faf421 Mon Sep 17 00:00:00 2001 From: Anirudh Ramanathan Date: Mon, 14 Aug 2017 16:00:25 -0600 Subject: [PATCH 173/225] Removed deprecated option from pom (#433) This fixes local integration testing --- pom.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pom.xml b/pom.xml index 9eebe92dbb0df..d90ad23752f54 100644 --- a/pom.xml +++ b/pom.xml @@ -2127,7 +2127,7 @@ ${project.build.directory}/surefire-reports . SparkTestSuite.txt - -ea -Xmx3g -XX:MaxPermSize=${MaxPermGen} -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraScalaTestArgs} + -ea -Xmx3g -XX:ReservedCodeCacheSize=${CodeCacheSize} ${extraScalaTestArgs}