diff --git a/README.md b/README.md index cc21607..ab42aca 100644 --- a/README.md +++ b/README.md @@ -71,11 +71,16 @@ datacustomcode run ./payload/entrypoint.py After modifying the `entrypoint.py` as needed, using any dependencies you add in the `.venv` virtual environment, you can run this script in Data Cloud: ```zsh datacustomcode scan ./payload/entrypoint.py -datacustomcode deploy --path ./payload --name my_custom_script +datacustomcode deploy --path ./payload --name my_custom_script --compute-type CPU_L ``` > [!TIP] > The `deploy` process can take several minutes. If you'd like more feedback on the underlying process, you can add `--debug` to the command like `datacustomcode --debug deploy --path ./payload --name my_custom_script` +> +> [!NOTE] +> **Compute Types**: Choose the appropriate compute type based on your workload requirements: +> - **CPU_L/CPU_XL/CPU_2XL/CPU_4XL**: Large, X-Large, 2X-Large and 4X-Large CPU instances for data processing +> - Default is `CPU_2XL` which provides a good balance of performance and cost for most use cases You can now use the Salesforce Data Cloud UI to find the created Data Transform and use the `Run Now` button to run it. Once the Data Transform run is successful, check the DLO your script is writing to and verify the correct records were added. @@ -139,6 +144,7 @@ Options: - `--name TEXT`: Name of the transformation job [required] - `--version TEXT`: Version of the transformation job (default: "0.0.1") - `--description TEXT`: Description of the transformation job (default: "") +- `--compute-type TEXT`: Compute type for the deployment (default: "CPU_XL"). Available options: CPU_L(Large), CPU_XL (Extra Large), CPU_2XL (2X Large), CPU_4XL (4X Large) #### `datacustomcode init` Initialize a new development environment with a template. diff --git a/src/datacustomcode/cli.py b/src/datacustomcode/cli.py index 01c0380..8f9d950 100644 --- a/src/datacustomcode/cli.py +++ b/src/datacustomcode/cli.py @@ -83,16 +83,43 @@ def zip(path: str): @click.option("--name", required=True) @click.option("--version", default="0.0.1") @click.option("--description", default="Custom Data Transform Code") -def deploy(path: str, name: str, version: str, description: str): +@click.option( + "--cpu-size", + default="CPU_2XL", + help="""CPU size for deployment. Available options: + + \b + CPU_L - Large CPU instance + CPU_XL - X-Large CPU instance + CPU_2XL - 2X-Large CPU instance [DEFAULT] + CPU_4XL - 4X-Large CPU instance + + Choose based on your workload requirements.""", +) +def deploy(path: str, name: str, version: str, description: str, cpu_size: str): from datacustomcode.credentials import Credentials from datacustomcode.deploy import TransformationJobMetadata, deploy_full logger.debug("Deploying project") + # Validate compute type + from datacustomcode.deploy import COMPUTE_TYPES + + if cpu_size not in COMPUTE_TYPES.keys(): + click.secho( + f"Error: Invalid CPU size '{cpu_size}'. " + f"Available options: {', '.join(COMPUTE_TYPES.keys())}", + fg="red", + ) + raise click.Abort() + + logger.debug(f"Deploying with CPU size: {cpu_size}") + metadata = TransformationJobMetadata( name=name, version=version, description=description, + computeType=COMPUTE_TYPES[cpu_size], ) try: credentials = Credentials.from_available() diff --git a/src/datacustomcode/deploy.py b/src/datacustomcode/deploy.py index dd9f76f..46109db 100644 --- a/src/datacustomcode/deploy.py +++ b/src/datacustomcode/deploy.py @@ -44,11 +44,25 @@ AUTH_PATH = "services/oauth2/token" WAIT_FOR_DEPLOYMENT_TIMEOUT = 3000 +# Available compute types for Data Cloud deployments. +# Nomenclature used by COMPUTE_TYPES keys align with +# compute instances provisioned by Data Cloud. +COMPUTE_TYPES = { + "CPU_L": "CPU_XS", # Large CPU instance + "CPU_XL": "CPU_S", # X-Large CPU instance + "CPU_2XL": "CPU_M", # 2X-Large CPU instance (default) + "CPU_4XL": "CPU_L", # 4X-Large CPU instance +} + class TransformationJobMetadata(BaseModel): name: str version: str description: str + computeType: str + + def __init__(self, **data): + super().__init__(**data) def _join_strip_url(*args: str) -> str: @@ -123,7 +137,7 @@ def create_deployment( "name": metadata.name, "description": metadata.description, "version": metadata.version, - "computeType": "CPU_M", + "computeType": metadata.computeType, } logger.debug(f"Creating deployment {metadata.name}...") try: diff --git a/tests/test_deploy.py b/tests/test_deploy.py index 5a2d238..e4d524d 100644 --- a/tests/test_deploy.py +++ b/tests/test_deploy.py @@ -434,7 +434,10 @@ def test_create_deployment_success(self, mock_make_api_call): access_token="test_token", instance_url="https://instance.example.com" ) metadata = TransformationJobMetadata( - name="test_job", version="1.0.0", description="Test job" + name="test_job", + version="1.0.0", + description="Test job", + computeType="CPU_M", ) mock_make_api_call.return_value = { @@ -454,7 +457,10 @@ def test_create_deployment_conflict(self, mock_make_api_call): access_token="test_token", instance_url="https://instance.example.com" ) metadata = TransformationJobMetadata( - name="test_job", version="1.0.0", description="Test job" + name="test_job", + version="1.0.0", + description="Test job", + computeType="CPU_M", ) # Mock HTTP error with 409 Conflict @@ -571,7 +577,10 @@ def test_get_deployments(self, mock_make_api_call): access_token="test_token", instance_url="https://instance.example.com" ) metadata = TransformationJobMetadata( - name="test_job", version="1.0.0", description="Test job" + name="test_job", + version="1.0.0", + description="Test job", + computeType="CPU_M", ) mock_make_api_call.return_value = {"deploymentStatus": "Deployed"} @@ -595,7 +604,10 @@ def test_wait_for_deployment_success( access_token="test_token", instance_url="https://instance.example.com" ) metadata = TransformationJobMetadata( - name="test_job", version="1.0.0", description="Test job" + name="test_job", + version="1.0.0", + description="Test job", + computeType="CPU_M", ) callback = MagicMock() @@ -622,7 +634,10 @@ def test_wait_for_deployment_timeout( access_token="test_token", instance_url="https://instance.example.com" ) metadata = TransformationJobMetadata( - name="test_job", version="1.0.0", description="Test job" + name="test_job", + version="1.0.0", + description="Test job", + computeType="CPU_M", ) # Mock time to simulate timeout @@ -699,7 +714,10 @@ def test_create_data_transform(self, mock_make_api_call, mock_get_config): access_token="test_token", instance_url="https://instance.example.com" ) metadata = TransformationJobMetadata( - name="test_job", version="1.0.0", description="Test job" + name="test_job", + version="1.0.0", + description="Test job", + computeType="CPU_M", ) mock_get_config.return_value = DataTransformConfig( @@ -762,7 +780,10 @@ def test_deploy_full( login_url="https://example.com", ) metadata = TransformationJobMetadata( - name="test_job", version="1.0.0", description="Test job" + name="test_job", + version="1.0.0", + description="Test job", + computeType="CPU_M", ) callback = MagicMock() @@ -799,7 +820,10 @@ def test_run_data_transform(self, mock_make_api_call): access_token="test_token", instance_url="https://instance.example.com" ) metadata = TransformationJobMetadata( - name="test_job", version="1.0.0", description="Test job" + name="test_job", + version="1.0.0", + description="Test job", + computeType="CPU_M", ) mock_make_api_call.return_value = {"status": "Running"} @@ -839,7 +863,10 @@ def test_deploy_full_happy_path( login_url="https://example.com", ) metadata = TransformationJobMetadata( - name="test_job", version="1.0.0", description="Test job" + name="test_job", + version="1.0.0", + description="Test job", + computeType="CPU_M", ) callback = MagicMock()