kubectl create ns ws-emr
Shell
복사
ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
OIDC_PROVIDER=$(aws eks describe-cluster --name $CLUSTER_NAME --query "cluster.identity.oidc.issuer" --output text | cut -c 9-100)
Shell
복사
cat << EOF > kms-policy.json
{
"Version": "2012-10-17",
"Statement": [
{
"Sid": "VisualEditor0",
"Effect": "Allow",
"Action": "kms:*",
"Resource": "*"
}
]
}
EOF
Shell
복사
cat << EOF > emr-trust-policy.json
{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Principal": {
"Service": "emr-containers.amazonaws.com"
},
"Action": "sts:AssumeRole"
},
{
"Effect": "Allow",
"Principal": {
"Federated": "arn:aws:iam::${ACCOUNT_ID}:oidc-provider/${OIDC_PROVIDER}"
},
"Action": "sts:AssumeRoleWithWebIdentity",
"Condition": {
"StringLike": {
"${OIDC_PROVIDER}:sub": "system:serviceaccount:ws-emr:emr-containers-sa-*"
}
}
}
]
}
EOF
Shell
복사
aws iam create-policy \
--policy-name ws-kms-policy \
--policy-document file://kms-policy.json
Shell
복사
aws iam create-role \
--role-name ws-job-execution-role \
--assume-role-policy-document file://emr-trust-policy.json
Shell
복사
aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/CloudWatchLogsFullAccess --role-name ws-job-execution-role
aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonS3FullAccess --role-name ws-job-execution-role
aws iam attach-role-policy --policy-arn arn:aws:iam::aws:policy/AmazonRDSFullAccess --role-name ws-job-execution-role
aws iam attach-role-policy --policy-arn arn:aws:iam::${ACCOUNT_ID}:policy/ws-kms-policy --role-name ws-job-execution-role
Shell
복사
cat > ws-job-execution-role.yaml << EOF
apiVersion: rbac.authorization.k8s.io/v1
kind: Role
metadata:
name: emr-containers
namespace: ws-emr
rules:
- apiGroups: [""]
resources: ["namespaces"]
verbs: ["get"]
- apiGroups: [""]
resources: ["serviceaccounts", "services", "configmaps", "events", "pods", "pods/log"]
verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "deletecollection", "annotate", "patch", "label"]
- apiGroups: [""]
resources: ["secrets"]
verbs: ["create", "patch", "delete", "watch"]
- apiGroups: ["apps"]
resources: ["statefulsets", "deployments"]
verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
- apiGroups: ["batch"]
resources: ["jobs"]
verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
- apiGroups: ["extensions"]
resources: ["ingresses"]
verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "annotate", "patch", "label"]
- apiGroups: ["rbac.authorization.k8s.io"]
resources: ["roles", "rolebindings"]
verbs: ["get", "list", "watch", "describe", "create", "edit", "delete", "deletecollection", "annotate", "patch", "label"]
---
apiVersion: rbac.authorization.k8s.io/v1
kind: RoleBinding
metadata:
name: emr-containers
namespace: ws-emr
subjects:
- kind: ServiceAccount
name: emr-sa
namespace: ws-emr
roleRef:
kind: Role
name: emr-containers
apiGroup: rbac.authorization.k8s.io
EOF
Shell
복사
kubectl apply -f ws-job-execution-role.yaml
Shell
복사
aws iam create-policy \
--policy-name ws-emr-policy \
--policy-document '{
"Version": "2012-10-17",
"Statement": [
{
"Effect": "Allow",
"Action": [
"rds:*",
"secretsmanager:*",
"s3:*",
"kms:*",
"logs:*",
"ecr:*"
],
"Resource": "*"
}
]
}'
Shell
복사
eksctl create iamserviceaccount \
--name emr-sa\
--namespace ws-emr \
--cluster ${CLUSTER_NAME} \
--role-name ws-emr-role \
--attach-policy-arn arn:aws:iam::${AWS_ACCOUNT_ID}:policy/ws-emr-policy \
--region ${AWS_DEFAULT_REGION} \
--approve
Shell
복사
aws emr-containers create-virtual-cluster \
--name ws-virtual-cluster \
--container-provider '{
"id": "ws-eks-cluster",
"type": "EKS",
"info": {
"eksInfo": {
"namespace": "ws-emr"
}
}
}' \
--region ap-northeast-2
Shell
복사
export AWS_DEFAULT_REGION="ap-northeast-2"
export ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
export VIRTUAL_CLUSTER_ID=$(aws emr-containers list-virtual-clusters --query "virtualClusters[?name=='ws-virtual-cluster' && state=='RUNNING'].id" --output text --region $AWS_DEFAULT_REGION)
export JOB_EXECUTION_ROLE_ARN=$(aws iam get-role --role-name ws-job-execution-role --query Role.Arn --output text)
export SPARK_IMAGE_URI=$ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/ws-emr/spark:latest
export S3_BUCKET=ws-cc-raw-data-$ACCOUNT_ID
export RDS_HOST=$(aws rds describe-db-instances --db-instance-identifier ws-db-instance --query "DBInstances[0].Endpoint.Address" --output text)
export RDS_PORT="5433"
export RDS_DATABASE="fraud_detection"
export RDS_USER="wsadmin"
export RDS_PASSWORD="wspassword123"
TOLERATIONS='[{\"key\":\"eks.amazonaws.com/compute-type\",\"operator\":\"Equal\",\"value\":\"emr\",\"effect\":\"NoSchedule\"}]'
Shell
복사
cat > submit-spark-job.sh << 'EOF'
#!/bin/bash
export AWS_DEFAULT_REGION="ap-northeast-2"
export ACCOUNT_ID=$(aws sts get-caller-identity --query Account --output text)
export VIRTUAL_CLUSTER_ID=$(aws emr-containers list-virtual-clusters --query "virtualClusters[?name=='ws-virtual-cluster' && state=='RUNNING'].id" --output text --region $AWS_DEFAULT_REGION)
export JOB_EXECUTION_ROLE_ARN=$(aws iam get-role --role-name ws-job-execution-role --query Role.Arn --output text)
export SPARK_IMAGE_URI=$ACCOUNT_ID.dkr.ecr.$AWS_DEFAULT_REGION.amazonaws.com/ws-emr/spark:latest
export S3_BUCKET=ws-cc-raw-data-$ACCOUNT_ID
export RDS_HOST=$(aws rds describe-db-instances --db-instance-identifier ws-db-instance --query "DBInstances[0].Endpoint.Address" --output text)
export RDS_PORT="5433"
export RDS_DATABASE="fraud_detection"
export RDS_USER="wsadmin"
export RDS_PASSWORD="wspassword123"
TOLERATIONS='[{\"key\":\"eks.amazonaws.com/compute-type\",\"operator\":\"Equal\",\"value\":\"emr\",\"effect\":\"NoSchedule\"}]'
# 작업 제출
aws emr-containers start-job-run \
--virtual-cluster-id $VIRTUAL_CLUSTER_ID \
--name credit-card-fraud-detection \
--execution-role-arn $JOB_EXECUTION_ROLE_ARN \
--release-label emr-6.9.0-latest \
--job-driver "{\"sparkSubmitJobDriver\": {\"entryPoint\": \"s3://${S3_BUCKET}/credit_card_analysis.py\", \"sparkSubmitParameters\": \"--conf spark.executor.instances=2 --conf spark.driver.memory=4G --conf spark.driver.cores=2 --conf spark.kubernetes.tolerations=${TOLERATIONS} --conf spark.kubernetes.driver.podTemplateFile=s3://${S3_BUCKET}/templates/driver-template.yaml --conf spark.kubernetes.executor.podTemplateFile=s3://${S3_BUCKET}/templates/executor-template.yaml --conf spark.dynamicAllocation.enabled=true --conf spark.dynamicAllocation.shuffleTracking.enabled=true --conf spark.hadoop.fs.s3.customAWSCredentialsProvider=com.amazonaws.auth.DefaultAWSCredentialsProviderChain\"}}" \
--configuration-overrides "{\"applicationConfiguration\": [{\"classification\": \"spark-defaults\", \"properties\": {\"spark.kubernetes.container.image\": \"${SPARK_IMAGE_URI}\", \"spark.hadoop.fs.s3a.impl\": \"org.apache.hadoop.fs.s3a.S3AFileSystem\", \"spark.hadoop.fs.s3a.aws.credentials.provider\": \"com.amazonaws.auth.DefaultAWSCredentialsProviderChain\"}}, {\"classification\": \"spark-env\", \"properties\": {}, \"configurations\": [{\"classification\":\"export\", \"properties\": {\"RDS_PASSWORD\": \"${RDS_PASSWORD}\",\"RDS_HOST\": \"${RDS_HOST}\"}}]}], \"monitoringConfiguration\": {\"cloudWatchMonitoringConfiguration\": {\"logGroupName\": \"/aws/emr-containers/ws-virtual-cluster\", \"logStreamNamePrefix\": \"credit-card-fraud\"}, \"s3MonitoringConfiguration\": {\"logUri\": \"s3://${S3_BUCKET}/logs/\"}}}" \
--region $AWS_DEFAULT_REGION
EOF
Shell
복사
chmod +x submit-spark-job.sh
./submit-spark-job.sh
Shell
복사
