|
| 1 | +#!/bin/bash |
| 2 | + |
| 3 | +set -e |
| 4 | + |
| 5 | +aws emr create-cluster --name "Pyspark Benchmark - Shuffle" \ |
| 6 | +--release-label emr-5.29.0 \ |
| 7 | +--applications Name=Spark \ |
| 8 | +--log-uri s3://your-s3-bucket/logs/ \ |
| 9 | +--ec2-attributes KeyName=your-key-pair \ |
| 10 | +--instance-groups InstanceGroupType=MASTER,InstanceCount=1,InstanceType=r5d.xlarge,BidPrice=OnDemandPrice InstanceGroupType=CORE,InstanceCount=6,InstanceType=r5d.2xlarge,BidPrice=OnDemandPrice \ |
| 11 | +--bootstrap-actions Path=s3://your-s3-bucket/emr_bootstrap.sh \ |
| 12 | +--steps Type=Spark,Name="Pyspark Benchmark - Shuffle",\ |
| 13 | +ActionOnFailure=CONTINUE,\ |
| 14 | +Args=[--deploy-mode,cluster,--master,yarn,s3://your-s3-bucket/jobs/benchmark-shuffle.py,s3://your-s3-bucket/data/,-r,250,-n,'pyspark-benchmark-shuffle',-o,s3://your-s3-bucket/results/pyspark-shuffle] \ |
| 15 | +--use-default-roles \ |
| 16 | +--auto-terminate |
| 17 | + |
| 18 | + |
| 19 | +aws emr create-cluster --name "Pyspark Benchmark - CPU" \ |
| 20 | +--release-label emr-5.29.0 \ |
| 21 | +--applications Name=Spark \ |
| 22 | +--log-uri s3://your-s3-bucket/logs/ \ |
| 23 | +--ec2-attributes KeyName=your-key-pair \ |
| 24 | +--instance-groups InstanceGroupType=MASTER,InstanceCount=1,InstanceType=r5d.xlarge,BidPrice=OnDemandPrice InstanceGroupType=CORE,InstanceCount=6,InstanceType=r5d.2xlarge,BidPrice=OnDemandPrice \ |
| 25 | +--bootstrap-actions Path=s3://your-s3-bucket/emr_bootstrap.sh \ |
| 26 | +--steps Type=Spark,Name="Pyspark Benchmark - CPU",\ |
| 27 | +ActionOnFailure=CONTINUE,\ |
| 28 | +Args=[--deploy-mode,cluster,--master,yarn,s3://your-s3-bucket/jobs/benchmark-cpu.py,s3://your-s3-bucket/data/,-s,25000000000,-p,1000,-n,'pyspark-benchmark-cpu',-o,s3://your-s3-bucket/results/pyspark-cpu] \ |
| 29 | +--use-default-roles \ |
| 30 | +--auto-terminate |
0 commit comments