diff --git a/centaur/src/main/resources/standardTestCases/gcpbatch_retry_with_more_memory.test b/centaur/src/main/resources/standardTestCases/gcpbatch_retry_with_more_memory.test index fb8847b52c..cdb626e658 100644 --- a/centaur/src/main/resources/standardTestCases/gcpbatch_retry_with_more_memory.test +++ b/centaur/src/main/resources/standardTestCases/gcpbatch_retry_with_more_memory.test @@ -1,5 +1,5 @@ name: gcpbatch_retry_with_more_memory -testFormat: workflowfailure +testFormat: workflowsuccess backends: [GCPBATCH] files { @@ -9,13 +9,10 @@ files { metadata { workflowName: retry_with_more_memory - status: Failed - "failures.0.message": "Workflow failed" - "failures.0.causedBy.0.message": "stderr for job `retry_with_more_memory.imitate_oom_error:NA:3` contained one of the `memory-retry-error-keys: [OutOfMemory,Killed]` specified in the Cromwell config. Job might have run out of memory." + status: Succeeded "retry_with_more_memory.imitate_oom_error.-1.1.executionStatus": "RetryableFailure" "retry_with_more_memory.imitate_oom_error.-1.1.runtimeAttributes.memory": "1 GB" "retry_with_more_memory.imitate_oom_error.-1.2.executionStatus": "RetryableFailure" "retry_with_more_memory.imitate_oom_error.-1.2.runtimeAttributes.memory": "1.1 GB" - "retry_with_more_memory.imitate_oom_error.-1.3.executionStatus": "Failed" - "retry_with_more_memory.imitate_oom_error.-1.3.runtimeAttributes.memory": "1.2100000000000002 GB" + "outputs.retry_with_more_memory.memory_output": "1.2100000000000002 GB" } diff --git a/centaur/src/main/resources/standardTestCases/retry_with_more_memory/gcpbatch/retry_with_more_memory.wdl b/centaur/src/main/resources/standardTestCases/retry_with_more_memory/gcpbatch/retry_with_more_memory.wdl index c9efea52dd..2c50ed34c8 100644 --- a/centaur/src/main/resources/standardTestCases/retry_with_more_memory/gcpbatch/retry_with_more_memory.wdl +++ b/centaur/src/main/resources/standardTestCases/retry_with_more_memory/gcpbatch/retry_with_more_memory.wdl @@ -2,12 +2,21 @@ version 1.0 task imitate_oom_error { command { - printf "Exception in thread "main" java.lang.OutOfMemoryError: testing\n\tat Test.main(Test.java:1)\n" >&2 && (exit 1) - # As a simulation of an OOM condition, do not create the 'foo' file. Cromwell should still be able to delocalize important detritus. - # touch foo + echo "$MEM_SIZE $MEM_UNIT" + + # Current bashes do not do floating point arithmetic, Python to the rescue. + LESS=$(python -c "print($MEM_SIZE < 1.21)") + + if [[ "$LESS" = "True" ]] + then + printf "Exception in thread "main" java.lang.OutOfMemoryError: testing\n\tat Test.main(Test.java:1)\n" >&2 + exit 1 + fi + + echo "$MEM_SIZE $MEM_UNIT" > memory_output.txt } output { - File foo = "foo" + String memory_output = read_string("memory_output.txt") } runtime { docker: "python:latest" @@ -19,4 +28,8 @@ task imitate_oom_error { workflow retry_with_more_memory { call imitate_oom_error + + output { + String memory_output = imitate_oom_error.memory_output + } } diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/runnable/RunnableBuilder.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/runnable/RunnableBuilder.scala index d69502295c..09208b9686 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/runnable/RunnableBuilder.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/runnable/RunnableBuilder.scala @@ -6,6 +6,7 @@ import cromwell.backend.google.batch.models.GcpBatchConfigurationAttributes.GcsT import cromwell.backend.google.batch.models.{BatchParameter, GcpBatchInput, GcpBatchOutput} import cromwell.core.path.Path import mouse.all.anySyntaxMouse +import wom.format.MemorySize import scala.concurrent.duration.{Duration, DurationInt, FiniteDuration} import scala.jdk.CollectionConverters._ @@ -147,7 +148,8 @@ object RunnableBuilder { scriptContainerPath: String, jobShell: String, volumes: List[Volume], - dockerhubCredentials: (String, String) + dockerhubCredentials: (String, String), + memory: MemorySize ): Runnable.Builder = { val container = (dockerhubCredentials._1, dockerhubCredentials._2) match { @@ -164,9 +166,20 @@ object RunnableBuilder { .setEntrypoint(jobShell) .addCommands(scriptContainerPath) } + + // adding memory as environment variables makes it easy for a user to retrieve the new value of memory + // on the machine to utilize in their command blocks if needed + val environment = + Environment + .newBuilder() + .putAllVariables( + Map("MEM_UNIT" -> memory.unit.toString, "MEM_SIZE" -> memory.amount.toString).asJava + ) + Runnable .newBuilder() .setContainer(container) + .setEnvironment(environment) .withVolumes(volumes) .putLabels(Key.Tag, Value.UserRunnable) } diff --git a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/runnable/UserRunnable.scala b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/runnable/UserRunnable.scala index cba665dbf9..05b6334ccb 100644 --- a/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/runnable/UserRunnable.scala +++ b/supportedBackends/google/batch/src/main/scala/cromwell/backend/google/batch/runnable/UserRunnable.scala @@ -12,7 +12,8 @@ trait UserRunnable { scriptContainerPath = createParameters.commandScriptContainerPath.pathAsString, jobShell = "/bin/bash", volumes = volumes, - dockerhubCredentials = createParameters.dockerhubCredentials + dockerhubCredentials = createParameters.dockerhubCredentials, + memory = createParameters.runtimeAttributes.memory ) val describeRunnable = RunnableBuilder.describeDocker("user runnable", userRunnable)