Search

EMR on EKS

AWS_REGION=ap-northeast-2 S3_BUCKET=s3://finance-storage-01 EMR_VIRTUAL_CLUSTER_ID=pj35xafcvyhopbvdi8w5k57k1 EMR_EXECUTION_ROLE_ARN=arn:aws:iam::362708816803:role/EMRContainers-JobExecutionRole CLOUDWATCH_LOG_GROUP=/emr-on-eks/finance-eks-cluster JOB_NAME='taxidata' EMR_EKS_RELEASE_LABEL="emr-6.10.0-latest" SCRIPTS_S3_PATH="${S3_BUCKET}/scripts" INPUT_DATA_S3_PATH="${S3_BUCKET}/input" OUTPUT_DATA_S3_PATH="${S3_BUCKET}/output" aws emr-containers start-job-run \ --virtual-cluster-id $EMR_VIRTUAL_CLUSTER_ID \ --name $JOB_NAME \ --region $AWS_REGION \ --execution-role-arn $EMR_EXECUTION_ROLE_ARN \ --release-label $EMR_EKS_RELEASE_LABEL \ --job-driver '{ "sparkSubmitJobDriver": { "entryPoint": "'"$SCRIPTS_S3_PATH"'/pyspark-taxi-trip.py", "entryPointArguments": ["'"$INPUT_DATA_S3_PATH"'", "'"$OUTPUT_DATA_S3_PATH"'" ], "sparkSubmitParameters": "--conf spark.executor.instances=2" } }' \ --configuration-overrides '{ "applicationConfiguration": [ { "classification": "spark-defaults", "properties": { "spark.driver.cores":"1", "spark.executor.cores":"1", "spark.driver.memory": "4g", "spark.executor.memory": "4g", "spark.kubernetes.driver.podTemplateFile":"'"$SCRIPTS_S3_PATH"'/driver-pod-template.yaml", "spark.kubernetes.executor.podTemplateFile":"'"$SCRIPTS_S3_PATH"'/executor-pod-template.yaml", "spark.local.dir":"/data1", "spark.kubernetes.submission.connectionTimeout": "60000000", "spark.kubernetes.submission.requestTimeout": "60000000", "spark.kubernetes.driver.connectionTimeout": "60000000", "spark.kubernetes.driver.requestTimeout": "60000000", "spark.kubernetes.executor.podNamePrefix":"'"$JOB_NAME"'" } } ], "monitoringConfiguration": { "persistentAppUI":"ENABLED", "cloudWatchMonitoringConfiguration": { "logGroupName":"'"$CLOUDWATCH_LOG_GROUP"'", "logStreamNamePrefix":"'"$JOB_NAME"'" }, "s3MonitoringConfiguration": { "logUri":"'"${S3_BUCKET}/logs/"'" } } }'
Shell
복사