Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

GCP Batch: Support passing standard machine types to the Google backend #7545

Open
wants to merge 6 commits into
base: develop
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,7 @@ class GcpBatchRequestFactoryImpl()(implicit gcsTransferConfiguration: GcsTransfe
val machineType = GcpBatchMachineConstraints.machineType(runtimeAttributes.memory,
runtimeAttributes.cpu,
cpuPlatformOption = runtimeAttributes.cpuPlatform,
standardMachineTypeOption = runtimeAttributes.standardMachineType,
javiergaitan marked this conversation as resolved.
Show resolved Hide resolved
jobLogger = jobLogger
)
val instancePolicy =
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@ import wom.format.MemorySize

import scala.math.{log, pow}

case class StandardMachineType(machineType: String) {}

/**
* Adjusts memory and cpu for custom machine types.
*
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,8 @@ final case class GcpBatchRuntimeAttributes(cpu: Int Refined Positive,
failOnStderr: Boolean,
continueOnReturnCode: ContinueOnReturnCode,
noAddress: Boolean,
checkpointFilename: Option[String]
checkpointFilename: Option[String],
standardMachineType: Option[String]
)

object GcpBatchRuntimeAttributes {
Expand Down Expand Up @@ -79,6 +80,8 @@ object GcpBatchRuntimeAttributes {
val CpuPlatformIntelIceLakeValue = "Intel Ice Lake"
val CpuPlatformAMDRomeValue = "AMD Rome"

val StandardMachineTypeKey = "standardMachineType"

val CheckpointFileKey = "checkpointFile"
private val checkpointFileValidationInstance = new StringRuntimeAttributesValidation(CheckpointFileKey).optional

Expand All @@ -92,6 +95,8 @@ object GcpBatchRuntimeAttributes {
)
private def cpuPlatformValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[String] =
cpuPlatformValidationInstance
private def standardMachineTypeValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[String] =
new StringRuntimeAttributesValidation(StandardMachineTypeKey).optional
private def gpuTypeValidation(runtimeConfig: Option[Config]): OptionalRuntimeAttributesValidation[GpuType] =
GpuTypeValidation.optional

Expand Down Expand Up @@ -159,7 +164,8 @@ object GcpBatchRuntimeAttributes {
memoryValidation(runtimeConfig),
bootDiskSizeValidation(runtimeConfig),
checkpointFileValidationInstance,
dockerValidation
dockerValidation,
standardMachineTypeValidation(runtimeConfig)
)
}

Expand Down Expand Up @@ -212,6 +218,10 @@ object GcpBatchRuntimeAttributes {
RuntimeAttributesValidation.extract(memoryValidation(runtimeAttrsConfig), validatedRuntimeAttributes)
val disks: Seq[GcpBatchAttachedDisk] =
RuntimeAttributesValidation.extract(disksValidation(runtimeAttrsConfig), validatedRuntimeAttributes)
val standardMachineType: Option[String] = RuntimeAttributesValidation.extractOption(
standardMachineTypeValidation(runtimeAttrsConfig).key,
validatedRuntimeAttributes
)

new GcpBatchRuntimeAttributes(
cpu = cpu,
Expand All @@ -226,7 +236,8 @@ object GcpBatchRuntimeAttributes {
failOnStderr = failOnStderr,
continueOnReturnCode = continueOnReturnCode,
noAddress = noAddress,
checkpointFilename = checkpointFileName
checkpointFilename = checkpointFileName,
standardMachineType = standardMachineType
)
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@ import cromwell.backend.google.batch.models.{
GcpBatchRuntimeAttributes,
N1CustomMachineType,
N2CustomMachineType,
N2DCustomMachineType
N2DCustomMachineType,
StandardMachineType
}
import cromwell.core.logging.JobLogger
import eu.timepit.refined.api.Refined
Expand All @@ -15,17 +16,21 @@ object GcpBatchMachineConstraints {
def machineType(memory: MemorySize,
cpu: Int Refined Positive,
cpuPlatformOption: Option[String],
standardMachineTypeOption: Option[String],
jobLogger: JobLogger
): String = {
// If someone requests Intel Cascade Lake or Intel Ice Lake as their CPU platform then switch the machine type to n2.
// Similarly, CPU platform of AMD Rome corresponds to the machine type n2d.
val customMachineType =
cpuPlatformOption match {
case Some(GcpBatchRuntimeAttributes.CpuPlatformIntelCascadeLakeValue) => N2CustomMachineType
case Some(GcpBatchRuntimeAttributes.CpuPlatformIntelIceLakeValue) => N2CustomMachineType
case Some(GcpBatchRuntimeAttributes.CpuPlatformAMDRomeValue) => N2DCustomMachineType
case _ => N1CustomMachineType
}
customMachineType.machineType(memory, cpu, jobLogger)
}
): String =
if (standardMachineTypeOption.exists(_.trim.nonEmpty)) {
StandardMachineType(standardMachineTypeOption.get).machineType
} else {
// If someone requests Intel Cascade Lake or Intel Ice Lake as their CPU platform then switch the machine type to n2.
// Similarly, CPU platform of AMD Rome corresponds to the machine type n2d.
val customMachineType =
cpuPlatformOption match {
case Some(GcpBatchRuntimeAttributes.CpuPlatformIntelCascadeLakeValue) => N2CustomMachineType
case Some(GcpBatchRuntimeAttributes.CpuPlatformIntelIceLakeValue) => N2CustomMachineType
case Some(GcpBatchRuntimeAttributes.CpuPlatformAMDRomeValue) => N2DCustomMachineType
case _ => N1CustomMachineType
}
customMachineType.machineType(memory, cpu, jobLogger)
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,8 @@ trait GcpBatchRuntimeAttributesSpecsMixin {
failOnStderr = false,
continueOnReturnCode = ContinueOnReturnCodeSet(Set(0)),
noAddress = false,
checkpointFilename = None
checkpointFilename = None,
standardMachineType = None
)

def assertBatchRuntimeAttributesSuccessfulCreation(runtimeAttributes: Map[String, WomValue],
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,65 +24,89 @@ class GcpBatchMachineConstraintsSpec extends AnyFlatSpec with CromwellTimeoutSpe

it should "generate valid machine types" in {
val validTypes = Table(
("memory", "cpu", "cpuPlatformOption", "machineTypeString"),
("memory", "cpu", "cpuPlatformOption", "standardMachineTypeOption", "machineTypeString"),
// Already ok tuple
(MemorySize(1024, MemoryUnit.MB), refineMV[Positive](1), None, "custom-1-1024"),
(MemorySize(1024, MemoryUnit.MB), refineMV[Positive](1), None, None, "custom-1-1024"),
// CPU must be even (except if it's 1)
(MemorySize(4, MemoryUnit.GB), refineMV[Positive](3), None, "custom-4-4096"),
(MemorySize(4, MemoryUnit.GB), refineMV[Positive](3), None, None, "custom-4-4096"),
// Memory must be a multiple of 256
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](1), None, "custom-1-1024"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](1), None, None, "custom-1-1024"),
// Memory / cpu ratio must be > 0.9GB, increase memory
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](4), None, "custom-4-3840"),
(MemorySize(14, MemoryUnit.GB), refineMV[Positive](16), None, "custom-16-14848"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](4), None, None, "custom-4-3840"),
(MemorySize(14, MemoryUnit.GB), refineMV[Positive](16), None, None, "custom-16-14848"),
// Memory / cpu ratio must be < 6.5GB, increase CPU
(MemorySize(13.65, MemoryUnit.GB), refineMV[Positive](1), None, "custom-4-14080"),
(MemorySize(13.65, MemoryUnit.GB), refineMV[Positive](1), None, None, "custom-4-14080"),
// Memory should be an int
(MemorySize(1520.96, MemoryUnit.MB), refineMV[Positive](1), None, "custom-1-1536"),
(MemorySize(1024.0, MemoryUnit.MB), refineMV[Positive](1), None, "custom-1-1024"),
(MemorySize(1520.96, MemoryUnit.MB), refineMV[Positive](1), None, None, "custom-1-1536"),
(MemorySize(1024.0, MemoryUnit.MB), refineMV[Positive](1), None, None, "custom-1-1024"),
// Increase to a cpu selection not valid for n2 below
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, "custom-34-31488"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, None, "custom-34-31488"),

// Same tests but with cascade lake (n2)
(MemorySize(1024, MemoryUnit.MB), refineMV[Positive](1), n2OptionCascadeLake, "n2-custom-2-2048"),
(MemorySize(4, MemoryUnit.GB), refineMV[Positive](3), n2OptionCascadeLake, "n2-custom-4-4096"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](1), n2OptionCascadeLake, "n2-custom-2-2048"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](4), n2OptionCascadeLake, "n2-custom-4-4096"),
(MemorySize(14, MemoryUnit.GB), refineMV[Positive](16), n2OptionCascadeLake, "n2-custom-16-16384"),
(MemorySize(13.65, MemoryUnit.GB), refineMV[Positive](1), n2OptionCascadeLake, "n2-custom-2-14080"),
(MemorySize(1520.96, MemoryUnit.MB), refineMV[Positive](1), n2OptionCascadeLake, "n2-custom-2-2048"),
(MemorySize(1024.0, MemoryUnit.MB), refineMV[Positive](1), n2OptionCascadeLake, "n2-custom-2-2048"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), n2OptionCascadeLake, "n2-custom-36-36864"),
(MemorySize(1024, MemoryUnit.MB), refineMV[Positive](1), n2OptionCascadeLake, None, "n2-custom-2-2048"),
(MemorySize(4, MemoryUnit.GB), refineMV[Positive](3), n2OptionCascadeLake, None, "n2-custom-4-4096"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](1), n2OptionCascadeLake, None, "n2-custom-2-2048"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](4), n2OptionCascadeLake, None, "n2-custom-4-4096"),
(MemorySize(14, MemoryUnit.GB), refineMV[Positive](16), n2OptionCascadeLake, None, "n2-custom-16-16384"),
(MemorySize(13.65, MemoryUnit.GB), refineMV[Positive](1), n2OptionCascadeLake, None, "n2-custom-2-14080"),
(MemorySize(1520.96, MemoryUnit.MB), refineMV[Positive](1), n2OptionCascadeLake, None, "n2-custom-2-2048"),
(MemorySize(1024.0, MemoryUnit.MB), refineMV[Positive](1), n2OptionCascadeLake, None, "n2-custom-2-2048"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), n2OptionCascadeLake, None, "n2-custom-36-36864"),

// Same tests, but with ice lake. Should produce same results as cascade lake since they're both n2.
(MemorySize(1024, MemoryUnit.MB), refineMV[Positive](1), n2OptionIceLake, "n2-custom-2-2048"),
(MemorySize(4, MemoryUnit.GB), refineMV[Positive](3), n2OptionIceLake, "n2-custom-4-4096"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](1), n2OptionIceLake, "n2-custom-2-2048"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](4), n2OptionIceLake, "n2-custom-4-4096"),
(MemorySize(14, MemoryUnit.GB), refineMV[Positive](16), n2OptionIceLake, "n2-custom-16-16384"),
(MemorySize(13.65, MemoryUnit.GB), refineMV[Positive](1), n2OptionIceLake, "n2-custom-2-14080"),
(MemorySize(1520.96, MemoryUnit.MB), refineMV[Positive](1), n2OptionIceLake, "n2-custom-2-2048"),
(MemorySize(1024.0, MemoryUnit.MB), refineMV[Positive](1), n2OptionIceLake, "n2-custom-2-2048"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), n2OptionIceLake, "n2-custom-36-36864"),
(MemorySize(1024, MemoryUnit.MB), refineMV[Positive](1), n2OptionIceLake, None, "n2-custom-2-2048"),
(MemorySize(4, MemoryUnit.GB), refineMV[Positive](3), n2OptionIceLake, None, "n2-custom-4-4096"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](1), n2OptionIceLake, None, "n2-custom-2-2048"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](4), n2OptionIceLake, None, "n2-custom-4-4096"),
(MemorySize(14, MemoryUnit.GB), refineMV[Positive](16), n2OptionIceLake, None, "n2-custom-16-16384"),
(MemorySize(13.65, MemoryUnit.GB), refineMV[Positive](1), n2OptionIceLake, None, "n2-custom-2-14080"),
(MemorySize(1520.96, MemoryUnit.MB), refineMV[Positive](1), n2OptionIceLake, None, "n2-custom-2-2048"),
(MemorySize(1024.0, MemoryUnit.MB), refineMV[Positive](1), n2OptionIceLake, None, "n2-custom-2-2048"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), n2OptionIceLake, None, "n2-custom-36-36864"),

// Same tests but with AMD Rome (n2d) #cpu > 16 are in increments of 16
(MemorySize(1024, MemoryUnit.MB), refineMV[Positive](1), n2dOption, "n2d-custom-2-1024"),
(MemorySize(4, MemoryUnit.GB), refineMV[Positive](3), n2dOption, "n2d-custom-4-4096"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](1), n2dOption, "n2d-custom-2-1024"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](4), n2dOption, "n2d-custom-4-2048"),
(MemorySize(14, MemoryUnit.GB), refineMV[Positive](16), n2dOption, "n2d-custom-16-14336"),
(MemorySize(13.65, MemoryUnit.GB), refineMV[Positive](1), n2dOption, "n2d-custom-2-14080"),
(MemorySize(1520.96, MemoryUnit.MB), refineMV[Positive](1), n2dOption, "n2d-custom-2-1536"),
(MemorySize(1024.0, MemoryUnit.MB), refineMV[Positive](1), n2dOption, "n2d-custom-2-1024"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), n2dOption, "n2d-custom-48-24576"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](81), n2dOption, "n2d-custom-96-49152"),
(MemorySize(256, MemoryUnit.GB), refineMV[Positive](128), n2dOption, "n2d-custom-96-262144")
(MemorySize(1024, MemoryUnit.MB), refineMV[Positive](1), n2dOption, None, "n2d-custom-2-1024"),
(MemorySize(4, MemoryUnit.GB), refineMV[Positive](3), n2dOption, None, "n2d-custom-4-4096"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](1), n2dOption, None, "n2d-custom-2-1024"),
(MemorySize(1, MemoryUnit.GB), refineMV[Positive](4), n2dOption, None, "n2d-custom-4-2048"),
(MemorySize(14, MemoryUnit.GB), refineMV[Positive](16), n2dOption, None, "n2d-custom-16-14336"),
(MemorySize(13.65, MemoryUnit.GB), refineMV[Positive](1), n2dOption, None, "n2d-custom-2-14080"),
(MemorySize(1520.96, MemoryUnit.MB), refineMV[Positive](1), n2dOption, None, "n2d-custom-2-1536"),
(MemorySize(1024.0, MemoryUnit.MB), refineMV[Positive](1), n2dOption, None, "n2d-custom-2-1024"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), n2dOption, None, "n2d-custom-48-24576"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](81), n2dOption, None, "n2d-custom-96-49152"),
(MemorySize(256, MemoryUnit.GB), refineMV[Positive](128), n2dOption, None, "n2d-custom-96-262144"),

// Test Standard Machine types
// General-purpose machine family
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("n1-standard-2"), "n1-standard-2"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("n1-highmem-2"), "n1-highmem-2"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("n1-highcpu-4"), "n1-highcpu-4"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("f1-micro"), "f1-micro"),

// Accelerator-optimized machine family
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("a2-highgpu-1g"), "a2-highgpu-1g"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("a3-megagpu-8g"), "a3-megagpu-8g"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("g2-standard-4"), "g2-standard-4"),

// Other machine families
// Storage-optimized
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("z3-highmem-88"), "z3-highmem-88"),
// Compute-optimized
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("h3-standard-88"), "h3-standard-88"),
// Memory-optimized
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("m3-ultramem-128"), "m3-ultramem-128"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("a2-highgpu-1g"), "a2-highgpu-1g"),
(MemorySize(2, MemoryUnit.GB), refineMV[Positive](33), None, Option("a2-highgpu-1g"), "a2-highgpu-1g")

)

forAll(validTypes) { (memory, cpu, cpuPlatformOption, expected) =>
forAll(validTypes) { (memory, cpu, cpuPlatformOption, standardMachineTypeOption, expected) =>
GcpBatchMachineConstraints.machineType(
memory = memory,
cpu = cpu,
cpuPlatformOption = cpuPlatformOption,
standardMachineTypeOption = standardMachineTypeOption,
jobLogger = mock[JobLogger]
) shouldBe expected
}
Expand Down
Loading