├── tests ├── requirements.txt └── test_bucket.py ├── requirements.txt ├── aula2 ├── intro_ao_git.md ├── jinja │ ├── config.yaml │ ├── processa_template.py │ ├── exemplo_jinja.py │ ├── deploy.py │ └── redshift.yaml.j2 └── github_actions │ ├── bucket.yaml │ └── deploy.py ├── aula1 ├── 0.yaml_vs_json │ ├── exemplo.yaml │ └── exemplo.json ├── 4.kinesis │ ├── insert_records.py │ └── kinesis.yaml ├── 1.bucket │ └── bucket.yaml ├── 2.Redshift │ └── redshift.yaml └── 3.Iam │ └── iam.yaml ├── .github └── workflows │ ├── 1.github_actions_feature_branch.yaml │ ├── 2.jinja_deploy.yml │ └── 1.github_actions.yaml └── .gitignore /tests/requirements.txt: -------------------------------------------------------------------------------- 1 | pytest -------------------------------------------------------------------------------- /tests/test_bucket.py: -------------------------------------------------------------------------------- 1 | def test_dummy(): 2 | assert True 3 | 4 | 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | boto3~=1.18.42 2 | botocore~=1.21.42 3 | jinja2 4 | pyyaml -------------------------------------------------------------------------------- /aula2/intro_ao_git.md: -------------------------------------------------------------------------------- 1 | # Introdução ao Git 2 | 3 | * `git init`: inicializa repo 4 | * `git checkout`: trocar de branch 5 | * `git push`: envia código para o repositório remoto -------------------------------------------------------------------------------- /aula1/0.yaml_vs_json/exemplo.yaml: -------------------------------------------------------------------------------- 1 | Name: Andre Sionek 2 | Country: UK 3 | Sports: 4 | - Cycling 5 | - Walking # comentário 6 | - Running 7 | Address: 8 | Street: 10 Sutton Plaza 9 | House: Flat 60 10 | -------------------------------------------------------------------------------- /aula1/0.yaml_vs_json/exemplo.json: -------------------------------------------------------------------------------- 1 | { 2 | "Name": "Andre Sionek", 3 | "Country": "UK", 4 | "Sports": [ 5 | "Cycling", 6 | "Walking", 7 | "Running" 8 | ], 9 | "Address": { 10 | "Street": "10 Sutton Plaza", 11 | "House": "Flat 60" 12 | } 13 | } -------------------------------------------------------------------------------- /aula2/jinja/config.yaml: -------------------------------------------------------------------------------- 1 | environments: 2 | - name: production 3 | vpcCidrBlock: 10.0.0.0/16 4 | subnetCidrBlock: 10.0.0.0/24 5 | - name: staging 6 | vpcCidrBlock: 10.1.0.0/16 7 | subnetCidrBlock: 10.1.0.0/24 8 | 9 | 10 | redshiftCluster: 11 | dbName: app 12 | nodeType: dc2.large 13 | numberOfNodes: 2 14 | securityGroup: 15 | whitelistedIps: 16 | - 5.6.7.8/32 17 | - 1.2.3.4/32 18 | - 9.10.11.12/32 -------------------------------------------------------------------------------- /aula2/jinja/processa_template.py: -------------------------------------------------------------------------------- 1 | import jinja2 2 | import yaml 3 | import os 4 | 5 | 6 | def renderiza_template(): 7 | with open('redshift.yaml.j2', 'r') as f: 8 | redshift_yaml = f.read() 9 | 10 | with open('config.yaml', 'r') as f: 11 | config = yaml.safe_load(f) 12 | 13 | 14 | redshift_template = jinja2.Template(redshift_yaml) 15 | redshift_rendered = redshift_template.render({**config, **os.environ}) 16 | 17 | with open('redshift.yaml', 'w') as f: 18 | f.write(redshift_rendered) 19 | 20 | 21 | renderiza_template() -------------------------------------------------------------------------------- /.github/workflows/1.github_actions_feature_branch.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Bucket 2 | 3 | on: 4 | push: 5 | branches-ignore: 6 | - master 7 | 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: '3.8' 18 | - name: Install test dependencies 19 | run: | 20 | pip install -r tests/requirements.txt 21 | - name: Run tests 22 | run: python -m pytest 23 | -------------------------------------------------------------------------------- /aula2/jinja/exemplo_jinja.py: -------------------------------------------------------------------------------- 1 | import jinja2 2 | 3 | 4 | string_template = """ 5 | Olá {{ nome }}, 6 | 7 | Sua senha expirou e teu computador vai esplodir se você não 8 | fizer nada. Clique no link abaicxo para ser hackeado: 9 | 10 | {{ link }} 11 | 12 | {% for idx in lista_de_numeros %} 13 | {{ idx }} 14 | {% endfor %} 15 | 16 | """ 17 | 18 | template = jinja2.Template(string_template) 19 | redered_template = template.render({ 20 | "nome": "Lojas Renner", 21 | "link": "http://caixaeconomica02.com.br", 22 | "lista_de_numeros": [1, 2, 3, 4, 5] 23 | }) 24 | 25 | print(redered_template) 26 | -------------------------------------------------------------------------------- /aula1/4.kinesis/insert_records.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import json 3 | from fake_web_events import Simulation 4 | 5 | 6 | client = boto3.client('firehose') 7 | 8 | 9 | def put_record(event): 10 | data = json.dumps(event) + "\n" 11 | response = client.put_record( 12 | DeliveryStreamName='kinesis-firehose-belisco', 13 | Record={"Data": data} 14 | ) 15 | print(event) 16 | return response 17 | 18 | 19 | simulation = Simulation(user_pool_size=100, sessions_per_day=10000) 20 | events = simulation.run(duration_seconds=300) 21 | 22 | for event in events: 23 | put_record(event) 24 | 25 | 26 | -------------------------------------------------------------------------------- /.github/workflows/2.jinja_deploy.yml: -------------------------------------------------------------------------------- 1 | name: Deploy S3 bucket 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | jobs: 9 | deploy: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - name: Set up Python 14 | uses: actions/setup-python@v2 15 | with: 16 | python-version: '3.x' 17 | - name: Install dependencies 18 | run: | 19 | pip install -r requirements.txt 20 | - name: Deploy 21 | env: 22 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 23 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 24 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 25 | redshiftClusterMasterUsername: ${{ secrets.redshiftClusterMasterUsername }} 26 | redshiftClusterMasterUserPassword: ${{ secrets.redshiftClusterMasterUserPassword }} 27 | ENVIRONMENT: production 28 | run: | 29 | python aula2/jinja/deploy.py 30 | -------------------------------------------------------------------------------- /aula1/1.bucket/bucket.yaml: -------------------------------------------------------------------------------- 1 | Resources: 2 | 3 | BucketBeliscoS3: 4 | Type: AWS::S3::Bucket 5 | Properties: 6 | BucketName: bucket-belisco-0921 7 | BucketEncryption: 8 | ServerSideEncryptionConfiguration: 9 | - ServerSideEncryptionByDefault: 10 | SSEAlgorithm: AES256 11 | PublicAccessBlockConfiguration: 12 | BlockPublicAcls: True 13 | BlockPublicPolicy: True 14 | IgnorePublicAcls: True 15 | RestrictPublicBuckets: True 16 | Tags: 17 | - Key: exemplo 18 | Value: por que o Edson pediu 19 | - Key: owner 20 | Value: data-engineering 21 | - Key: service 22 | Value: airflow 23 | 24 | Bucket2BeliscoS3: 25 | Type: AWS::S3::Bucket 26 | Properties: 27 | BucketName: segundo-bucket-belisco-turma-6 28 | PublicAccessBlockConfiguration: 29 | BlockPublicAcls: False 30 | BlockPublicPolicy: False 31 | IgnorePublicAcls: False 32 | RestrictPublicBuckets: False 33 | -------------------------------------------------------------------------------- /aula2/github_actions/bucket.yaml: -------------------------------------------------------------------------------- 1 | Resources: 2 | 3 | BucketBeliscoS3: 4 | Type: AWS::S3::Bucket 5 | Properties: 6 | BucketName: bucket-belisco-0921 7 | BucketEncryption: 8 | ServerSideEncryptionConfiguration: 9 | - ServerSideEncryptionByDefault: 10 | SSEAlgorithm: AES256 11 | PublicAccessBlockConfiguration: 12 | BlockPublicAcls: True 13 | BlockPublicPolicy: True 14 | IgnorePublicAcls: True 15 | RestrictPublicBuckets: True 16 | Tags: 17 | - Key: exemplo 18 | Value: por que o Edson pediu 19 | - Key: owner 20 | Value: data-engineering 21 | - Key: service 22 | Value: airflow 23 | 24 | Bucket2BeliscoS3: 25 | Type: AWS::S3::Bucket 26 | Properties: 27 | BucketName: segundo-bucket-belisco-turma-6 28 | PublicAccessBlockConfiguration: 29 | BlockPublicAcls: True 30 | BlockPublicPolicy: True 31 | IgnorePublicAcls: True 32 | RestrictPublicBuckets: True 33 | 34 | 35 | -------------------------------------------------------------------------------- /.github/workflows/1.github_actions.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Bucket 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | 8 | 9 | jobs: 10 | test: 11 | runs-on: ubuntu-latest 12 | steps: 13 | - uses: actions/checkout@v2 14 | - name: Set up Python 15 | uses: actions/setup-python@v2 16 | with: 17 | python-version: '3.8' 18 | - name: Install test dependencies 19 | run: | 20 | pip install -r tests/requirements.txt 21 | - name: Run tests 22 | run: python -m pytest 23 | 24 | deploy: 25 | runs-on: ubuntu-latest 26 | needs: test 27 | steps: 28 | - uses: actions/checkout@v2 29 | - name: Install Python 30 | uses: actions/setup-python@v2 31 | with: 32 | python-version: '3.8' 33 | - name: Install requirements 34 | run: | 35 | pip install -r requirements.txt 36 | - name: Deploy 37 | env: 38 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 39 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 40 | AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }} 41 | run: | 42 | echo $AWS_DEFAULT_REGION 43 | python aula2/github_actions/deploy.py 44 | -------------------------------------------------------------------------------- /aula1/4.kinesis/kinesis.yaml: -------------------------------------------------------------------------------- 1 | Resources: 2 | 3 | KinesisFirehoseBelisco: 4 | Type: AWS::KinesisFirehose::DeliveryStream 5 | Properties: 6 | DeliveryStreamName: kinesis-firehose-belisco 7 | DeliveryStreamType: DirectPut 8 | S3DestinationConfiguration: 9 | BucketARN: !GetAtt KinesisFirehoseBucket.Arn 10 | BufferingHints: 11 | IntervalInSeconds: 60 12 | SizeInMBs: 1 13 | CompressionFormat: GZIP 14 | ErrorOutputPrefix: bad_record 15 | Prefix: "atomic-events/landing-date=!{timestamp:yyyy}-!{timestamp:MM}-!{timestamp:dd}/" 16 | RoleARN: !GetAtt KinesisRole.Arn 17 | 18 | 19 | KinesisFirehoseBucket: 20 | Type: AWS::S3::Bucket 21 | Properties: 22 | BucketEncryption: 23 | ServerSideEncryptionConfiguration: 24 | - ServerSideEncryptionByDefault: 25 | SSEAlgorithm: AES256 26 | BucketName: kinesis-firehose-bucket-belisco-turma-6 27 | 28 | KinesisRole: 29 | Type: AWS::IAM::Role 30 | Properties: 31 | AssumeRolePolicyDocument: 32 | Version: "2012-10-17" 33 | Statement: 34 | - Effect: Allow 35 | Principal: 36 | Service: firehose.amazonaws.com 37 | Action: 38 | - sts:AssumeRole 39 | Description: Role to allow Kinesis to save data to S3 40 | ManagedPolicyArns: 41 | - !Ref KinesisPolicy 42 | Path: / 43 | RoleName: role-kinesis 44 | 45 | KinesisPolicy: 46 | Type: AWS::IAM::ManagedPolicy 47 | Properties: 48 | Description: Policy to allow kinesis to access S3 49 | Path: / 50 | PolicyDocument: 51 | Version: "2012-10-17" 52 | Statement: 53 | - Effect: Allow 54 | Action: 55 | - s3:AbortMultipartUpload 56 | - s3:GetBucketLocation 57 | - s3:GetObject 58 | - s3:ListBucket 59 | - s3:ListBucketMultipartUploads 60 | - s3:PutObject 61 | Resource: 62 | - !GetAtt KinesisFirehoseBucket.Arn 63 | - !Join ["/", [!GetAtt KinesisFirehoseBucket.Arn, "*"]] 64 | -------------------------------------------------------------------------------- /aula2/github_actions/deploy.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | from botocore.exceptions import ClientError 4 | import os 5 | 6 | logging.getLogger().setLevel(logging.INFO) 7 | cloudformation_client = boto3.client('cloudformation') 8 | 9 | 10 | def create_stack(stack_name, template_body, **kwargs): 11 | cloudformation_client.create_stack( 12 | StackName=stack_name, 13 | TemplateBody=template_body, 14 | Capabilities=['CAPABILITY_IAM', 'CAPABILITY_NAMED_IAM'], 15 | TimeoutInMinutes=5, 16 | OnFailure='ROLLBACK' 17 | ) 18 | 19 | cloudformation_client.get_waiter('stack_create_complete').wait( 20 | StackName=stack_name, 21 | WaiterConfig={'Delay': 5, 'MaxAttempts': 600} 22 | ) 23 | 24 | cloudformation_client.get_waiter('stack_exists').wait(StackName=stack_name) 25 | logging.info(f'CREATE COMPLETE') 26 | 27 | 28 | def update_stack(stack_name, template_body, **kwargs): 29 | try: 30 | cloudformation_client.update_stack( 31 | StackName=stack_name, 32 | Capabilities=['CAPABILITY_IAM', 'CAPABILITY_NAMED_IAM'], 33 | TemplateBody=template_body 34 | ) 35 | 36 | except ClientError as e: 37 | if 'No updates are to be performed' in str(e): 38 | logging.info(f'SKIPPING UPDATE: No updates to be performed at stack {stack_name}') 39 | return e 40 | 41 | cloudformation_client.get_waiter('stack_update_complete').wait( 42 | StackName=stack_name, 43 | WaiterConfig={'Delay': 5, 'MaxAttempts': 600} 44 | ) 45 | 46 | cloudformation_client.get_waiter('stack_exists').wait(StackName=stack_name) 47 | logging.info(f'UPDATE COMPLETE') 48 | 49 | 50 | def get_existing_stacks(): 51 | response = cloudformation_client.list_stacks( 52 | StackStatusFilter=['CREATE_COMPLETE', 'UPDATE_COMPLETE', 'UPDATE_ROLLBACK_COMPLETE'] 53 | ) 54 | 55 | return [stack['StackName'] for stack in response['StackSummaries']] 56 | 57 | 58 | def _get_abs_path(path): 59 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) 60 | 61 | 62 | def create_or_update_stack(): 63 | stack_name = 's3-bucket-ci' 64 | with open(_get_abs_path('bucket.yaml')) as f: 65 | template_body = f.read() 66 | 67 | existing_stacks = get_existing_stacks() 68 | 69 | if stack_name in existing_stacks: 70 | logging.info(f'UPDATING STACK {stack_name}') 71 | update_stack(stack_name, template_body) 72 | else: 73 | logging.info(f'CREATING STACK {stack_name}') 74 | create_stack(stack_name, template_body) 75 | 76 | 77 | if __name__ == '__main__': 78 | create_or_update_stack() 79 | -------------------------------------------------------------------------------- /aula1/2.Redshift/redshift.yaml: -------------------------------------------------------------------------------- 1 | Resources: 2 | 3 | BeliscoRedshift: 4 | Type: AWS::Redshift::Cluster 5 | Properties: 6 | AllowVersionUpgrade: true 7 | AutomatedSnapshotRetentionPeriod: 30 8 | AvailabilityZone: us-east-1a 9 | ClusterIdentifier: cluster-belisco 10 | ClusterParameterGroupName: !Ref RedshiftParameterGroup 11 | ClusterSubnetGroupName: !Ref RedshiftSubnetGroup 12 | ClusterType: multi-node 13 | DBName: belisco 14 | Encrypted: true 15 | MasterUsername: admin 16 | MasterUserPassword: Admin1234 17 | NodeType: dc2.large 18 | NumberOfNodes: 2 19 | PubliclyAccessible: true 20 | VpcSecurityGroupIds: 21 | - !Ref RedshiftEC2SecurityGroup 22 | Tags: 23 | - Key: owner 24 | Value: data-engineering 25 | 26 | RedshiftParameterGroup: 27 | Type: AWS::Redshift::ClusterParameterGroup 28 | Properties: 29 | Description: Parameter group for redshift cluster 30 | ParameterGroupFamily: redshift-1.0 31 | Parameters: 32 | - ParameterName: max_concurrency_scaling_clusters 33 | ParameterValue: 1 34 | 35 | RedshiftSubnetGroup: 36 | Type: AWS::Redshift::ClusterSubnetGroup 37 | Properties: 38 | Description: Redshift Subnet group 39 | SubnetIds: 40 | - !Ref RedshiftSubnet 41 | 42 | RedshiftSubnet: 43 | Type: AWS::EC2::Subnet 44 | Properties: 45 | AvailabilityZone: us-east-1a 46 | CidrBlock: 10.0.0.0/24 47 | VpcId: !Ref RedshiftVPC 48 | 49 | RedshiftVPC: 50 | Type: AWS::EC2::VPC 51 | Properties: 52 | CidrBlock: 10.0.0.0/16 53 | 54 | RedshiftEC2SecurityGroup: 55 | Type: AWS::EC2::SecurityGroup 56 | Properties: 57 | GroupDescription: Security group for Redshift. Public access 58 | GroupName: redshift-security-group 59 | SecurityGroupEgress: 60 | - CidrIp: 0.0.0.0/0 61 | FromPort: 5439 62 | IpProtocol: tcp 63 | ToPort: 5439 64 | SecurityGroupIngress: 65 | - CidrIp: 37.156.75.55/32 66 | FromPort: 5439 67 | IpProtocol: tcp 68 | ToPort: 5439 69 | VpcId: !Ref RedshiftVPC 70 | 71 | RedshiftVPCInternetGateway: 72 | Type: AWS::EC2::InternetGateway 73 | 74 | RedshiftVPCAttachGateway: 75 | Type: AWS::EC2::VPCGatewayAttachment 76 | Properties: 77 | VpcId: !Ref RedshiftVPC 78 | InternetGatewayId: !Ref RedshiftVPCInternetGateway 79 | 80 | RedshiftRouteTable: 81 | Type: AWS::EC2::RouteTable 82 | Properties: 83 | VpcId: !Ref RedshiftVPC 84 | 85 | RedshiftVPCRoute: 86 | Type: AWS::EC2::Route 87 | Properties: 88 | DestinationCidrBlock: 0.0.0.0/0 89 | GatewayId: !Ref RedshiftVPCInternetGateway 90 | RouteTableId: !Ref RedshiftRouteTable 91 | 92 | RedshiftSubnetRouteTableAssociation: 93 | Type: AWS::EC2::SubnetRouteTableAssociation 94 | Properties: 95 | RouteTableId: !Ref RedshiftRouteTable 96 | SubnetId: !Ref RedshiftSubnet -------------------------------------------------------------------------------- /aula2/jinja/deploy.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | from botocore.exceptions import ClientError 4 | import jinja2 5 | import yaml 6 | import os 7 | 8 | 9 | logging.getLogger().setLevel(logging.INFO) 10 | cloudformation_client = boto3.client('cloudformation') 11 | 12 | 13 | def create_stack(stack_name, template_body, **kwargs): 14 | cloudformation_client.create_stack( 15 | StackName=stack_name, 16 | TemplateBody=template_body, 17 | Capabilities=['CAPABILITY_IAM', 'CAPABILITY_NAMED_IAM'], 18 | TimeoutInMinutes=30, 19 | OnFailure='ROLLBACK' 20 | ) 21 | 22 | cloudformation_client.get_waiter('stack_create_complete').wait( 23 | StackName=stack_name, 24 | WaiterConfig={'Delay': 5, 'MaxAttempts': 600} 25 | ) 26 | 27 | cloudformation_client.get_waiter('stack_exists').wait(StackName=stack_name) 28 | logging.info(f'CREATE COMPLETE') 29 | 30 | 31 | def update_stack(stack_name, template_body, **kwargs): 32 | try: 33 | cloudformation_client.update_stack( 34 | StackName=stack_name, 35 | Capabilities=['CAPABILITY_IAM', 'CAPABILITY_NAMED_IAM'], 36 | TemplateBody=template_body 37 | ) 38 | 39 | except ClientError as e: 40 | if 'No updates are to be performed' in str(e): 41 | logging.info(f'SKIPPING UPDATE: No updates to be performed at stack {stack_name}') 42 | return e 43 | 44 | cloudformation_client.get_waiter('stack_update_complete').wait( 45 | StackName=stack_name, 46 | WaiterConfig={'Delay': 5, 'MaxAttempts': 600} 47 | ) 48 | 49 | cloudformation_client.get_waiter('stack_exists').wait(StackName=stack_name) 50 | logging.info(f'UPDATE COMPLETE') 51 | 52 | 53 | def get_existing_stacks(): 54 | response = cloudformation_client.list_stacks( 55 | StackStatusFilter=['CREATE_COMPLETE', 'UPDATE_COMPLETE', 'UPDATE_ROLLBACK_COMPLETE'] 56 | ) 57 | 58 | return [stack['StackName'] for stack in response['StackSummaries']] 59 | 60 | 61 | def _get_abs_path(path): 62 | return os.path.join(os.path.dirname(os.path.realpath(__file__)), path) 63 | 64 | 65 | def create_or_update_stack(): 66 | stack_name = f'redshift-{os.environ["ENVIRONMENT"]}' 67 | with open(_get_abs_path('redshift.yaml')) as f: 68 | template_body = f.read() 69 | 70 | existing_stacks = get_existing_stacks() 71 | 72 | if stack_name in existing_stacks: 73 | logging.info(f'UPDATING STACK {stack_name}') 74 | update_stack(stack_name, template_body) 75 | else: 76 | logging.info(f'CREATING STACK {stack_name}') 77 | create_stack(stack_name, template_body) 78 | 79 | 80 | def renderiza_template(): 81 | logging.info(f'RENDERING JINJA') 82 | with open(_get_abs_path('redshift.yaml.j2'), 'r') as f: 83 | redshift_yaml = f.read() 84 | 85 | with open(_get_abs_path('config.yaml'), 'r') as f: 86 | config = yaml.safe_load(f) 87 | 88 | 89 | redshift_template = jinja2.Template(redshift_yaml) 90 | redshift_rendered = redshift_template.render({**config, **os.environ}) 91 | 92 | with open(_get_abs_path('redshift.yaml'), 'w') as f: 93 | f.write(redshift_rendered) 94 | logging.info(f'JINJA RENDERED') 95 | 96 | 97 | if __name__ == '__main__': 98 | renderiza_template() 99 | create_or_update_stack() 100 | -------------------------------------------------------------------------------- /aula1/3.Iam/iam.yaml: -------------------------------------------------------------------------------- 1 | Description: Cria grupo, funcao e politicas para engenheiro de dados 2 | 3 | Resources: 4 | IamRoleDataEngineer: 5 | Type: AWS::IAM::Role 6 | Properties: 7 | AssumeRolePolicyDocument: 8 | Version: 2012-10-17 9 | Statement: 10 | - Effect: Allow 11 | Principal: 12 | AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root' 13 | Action: 14 | - sts:AssumeRole 15 | Description: Funcao para ser assumida por engenheiros de dados 16 | ManagedPolicyArns: 17 | - !Ref IamPolicyDataEngineer 18 | RoleName: role-data-engineer 19 | 20 | IamPolicyDataEngineer: 21 | Type: AWS::IAM::ManagedPolicy 22 | Properties: 23 | Description: Politicas de acesso para engenheiro de dados 24 | PolicyDocument: 25 | Version: 2012-10-17 26 | Statement: 27 | - Effect: Allow 28 | Action: 29 | - s3:ListBuckets 30 | Resource: 31 | - arn:aws:s3:::* 32 | - Effect: Allow 33 | Action: 34 | - s3:List* 35 | - s3:Get* 36 | - s3:Delete* 37 | - s3:Put* 38 | Resource: 39 | - arn:aws:s3:::bucket-belisco-0921 40 | - arn:aws:s3:::bucket-belisco-0921/* 41 | - Effect: Allow 42 | Action: 43 | - s3:List* 44 | - s3:Get* 45 | - s3:Put* 46 | Resource: 47 | - arn:aws:s3:::segundo-bucket-belisco-turma-6 48 | - arn:aws:s3:::segundo-bucket-belisco-turma-6/* 49 | 50 | IamRoleDataScientist: 51 | Type: AWS::IAM::Role 52 | Properties: 53 | AssumeRolePolicyDocument: 54 | Version: 2012-10-17 55 | Statement: 56 | - Effect: Allow 57 | Principal: 58 | AWS: !Sub 'arn:aws:iam::${AWS::AccountId}:root' 59 | Action: 60 | - sts:AssumeRole 61 | Description: Funcao para ser assumida por cientistas de dados 62 | ManagedPolicyArns: 63 | - !Ref IamPolicyDataScientist 64 | RoleName: role-data-scientist 65 | 66 | IamPolicyDataScientist: 67 | Type: AWS::IAM::ManagedPolicy 68 | Properties: 69 | Description: Politicas de acesso para cientistas de dados 70 | PolicyDocument: 71 | Version: 2012-10-17 72 | Statement: 73 | - Effect: Allow 74 | Action: 75 | - s3:List* 76 | - s3:Get* 77 | Resource: 78 | - arn:aws:s3:::* 79 | 80 | IamGroupDataEngineer: 81 | Type: AWS::IAM::Group 82 | Properties: 83 | GroupName: iam-group-data-engineer 84 | ManagedPolicyArns: 85 | - arn:aws:iam::aws:policy/ReadOnlyAccess 86 | - !Ref IamPolicyGroupDataEngineer 87 | 88 | IamPolicyGroupDataEngineer: 89 | Type: AWS::IAM::ManagedPolicy 90 | Properties: 91 | Description: Politicas de acesso para o grupo de engenheiros de dados 92 | PolicyDocument: 93 | Version: 2012-10-17 94 | Statement: 95 | - Effect: Allow 96 | Action: 97 | - sts:AssumeRole 98 | Resource: 99 | - !GetAtt IamRoleDataEngineer.Arn 100 | 101 | IamGroupDataScientist: 102 | Type: AWS::IAM::Group 103 | Properties: 104 | GroupName: iam-group-data-scientist 105 | ManagedPolicyArns: 106 | - !Ref IamPolicyGroupDataScientist 107 | 108 | IamPolicyGroupDataScientist: 109 | Type: AWS::IAM::ManagedPolicy 110 | Properties: 111 | Description: Politicas de acesso para o grupo de cientistas de dados 112 | PolicyDocument: 113 | Version: 2012-10-17 114 | Statement: 115 | - Effect: Allow 116 | Action: 117 | - sts:AssumeRole 118 | Resource: 119 | - !GetAtt IamRoleDataScientist.Arn 120 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | ### Python template 2 | # Byte-compiled / optimized / DLL files 3 | __pycache__/ 4 | *.py[cod] 5 | *$py.class 6 | 7 | # C extensions 8 | *.so 9 | 10 | # Distribution / packaging 11 | .Python 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | wheels/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | cover/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # PyBuilder 76 | .pybuilder/ 77 | target/ 78 | 79 | # Jupyter Notebook 80 | .ipynb_checkpoints 81 | 82 | # IPython 83 | profile_default/ 84 | ipython_config.py 85 | 86 | # pyenv 87 | # For a library or package, you might want to ignore these files since the code is 88 | # intended to run in multiple environments; otherwise, check them in: 89 | # .python-version 90 | 91 | # pipenv 92 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 93 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 94 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 95 | # install all needed dependencies. 96 | #Pipfile.lock 97 | 98 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 99 | __pypackages__/ 100 | 101 | # Celery stuff 102 | celerybeat-schedule 103 | celerybeat.pid 104 | 105 | # SageMath parsed files 106 | *.sage.py 107 | 108 | # Environments 109 | .env 110 | .venv 111 | env/ 112 | venv/ 113 | ENV/ 114 | env.bak/ 115 | venv.bak/ 116 | 117 | # Spyder project settings 118 | .spyderproject 119 | .spyproject 120 | 121 | # Rope project settings 122 | .ropeproject 123 | 124 | # mkdocs documentation 125 | /site 126 | 127 | # mypy 128 | .mypy_cache/ 129 | .dmypy.json 130 | dmypy.json 131 | 132 | # Pyre type checker 133 | .pyre/ 134 | 135 | # pytype static type analyzer 136 | .pytype/ 137 | 138 | # Cython debug symbols 139 | cython_debug/ 140 | 141 | ### JetBrains template 142 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 143 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 144 | 145 | # User-specific stuff 146 | .idea/**/workspace.xml 147 | .idea/**/tasks.xml 148 | .idea/**/usage.statistics.xml 149 | .idea/**/dictionaries 150 | .idea/**/shelf 151 | 152 | # Generated files 153 | .idea/**/contentModel.xml 154 | 155 | # Sensitive or high-churn files 156 | .idea/**/dataSources/ 157 | .idea/**/dataSources.ids 158 | .idea/**/dataSources.local.xml 159 | .idea/**/sqlDataSources.xml 160 | .idea/**/dynamic.xml 161 | .idea/**/uiDesigner.xml 162 | .idea/**/dbnavigator.xml 163 | 164 | # Gradle 165 | .idea/**/gradle.xml 166 | .idea/**/libraries 167 | 168 | # Gradle and Maven with auto-import 169 | # When using Gradle or Maven with auto-import, you should exclude module files, 170 | # since they will be recreated, and may cause churn. Uncomment if using 171 | # auto-import. 172 | # .idea/artifacts 173 | # .idea/compiler.xml 174 | # .idea/jarRepositories.xml 175 | # .idea/modules.xml 176 | # .idea/*.iml 177 | # .idea/modules 178 | # *.iml 179 | # *.ipr 180 | 181 | # CMake 182 | cmake-build-*/ 183 | 184 | # Mongo Explorer plugin 185 | .idea/**/mongoSettings.xml 186 | 187 | # File-based project format 188 | *.iws 189 | 190 | # IntelliJ 191 | out/ 192 | 193 | # mpeltonen/sbt-idea plugin 194 | .idea_modules/ 195 | 196 | # JIRA plugin 197 | atlassian-ide-plugin.xml 198 | 199 | # Cursive Clojure plugin 200 | .idea/replstate.xml 201 | 202 | # Crashlytics plugin (for Android Studio and IntelliJ) 203 | com_crashlytics_export_strings.xml 204 | crashlytics.properties 205 | crashlytics-build.properties 206 | fabric.properties 207 | 208 | # Editor-based Rest Client 209 | .idea/httpRequests 210 | 211 | # Android studio 3.1+ serialized cache file 212 | .idea/caches/build_file_checksums.ser 213 | 214 | -------------------------------------------------------------------------------- /aula2/jinja/redshift.yaml.j2: -------------------------------------------------------------------------------- 1 | Description: Cria um cluster Redshift que é publicamente acessível a partir da internet. 2 | 3 | Resources: 4 | 5 | RedshiftCluster: 6 | Type: AWS::Redshift::Cluster 7 | Properties: 8 | AllowVersionUpgrade: true 9 | AutomatedSnapshotRetentionPeriod: 5 10 | AvailabilityZone: us-east-1a 11 | ClusterIdentifier: redshift-{{ ENVIRONMENT }}-cluster 12 | ClusterParameterGroupName: !Ref RedshiftParameterGroup 13 | ClusterSubnetGroupName: !Ref RedshiftSubnetGroup 14 | VpcSecurityGroupIds: 15 | - !Ref RedshiftEC2SecurityGroup 16 | ClusterType: multi-node 17 | DBName: {{ redshiftCluster.dbName }} 18 | Encrypted: true 19 | MasterUsername: {{ redshiftClusterMasterUsername }} 20 | MasterUserPassword: {{ redshiftClusterMasterUserPassword }} 21 | NodeType: {{ redshiftCluster.nodeType }} 22 | NumberOfNodes: {{ redshiftCluster.numberOfNodes }} 23 | PubliclyAccessible: true 24 | IamRoles: 25 | - !GetAtt RedshiftRole.Arn 26 | 27 | RedshiftSubnetGroup: 28 | Type: 'AWS::Redshift::ClusterSubnetGroup' 29 | Properties: 30 | Description: Redshift Subnet group 31 | SubnetIds: 32 | - !Ref RedshiftSubnet 33 | 34 | RedshiftParameterGroup: 35 | Type: AWS::Redshift::ClusterParameterGroup 36 | Properties: 37 | Description: Parameter group for redshift cluster 38 | ParameterGroupFamily: redshift-1.0 39 | Parameters: 40 | - ParameterName: max_concurrency_scaling_clusters 41 | ParameterValue: 0 42 | 43 | {% for env in environments %} 44 | {% if env.name == ENVIRONMENT %} 45 | RedshiftVPC: 46 | Type: AWS::EC2::VPC 47 | Properties: 48 | CidrBlock: {{ env.vpcCidrBlock }} 49 | 50 | RedshiftSubnet: 51 | Type: AWS::EC2::Subnet 52 | Properties: 53 | AvailabilityZone: us-east-1a 54 | CidrBlock: {{ env.subnetCidrBlock }} 55 | VpcId: !Ref RedshiftVPC 56 | {% endif %} 57 | {% endfor %} 58 | 59 | RedshiftEC2SecurityGroup: 60 | Type: AWS::EC2::SecurityGroup 61 | Properties: 62 | GroupDescription: Security group for Redshift. Public access 63 | GroupName: redshift-{{ ENVIRONMENT }}-security-group 64 | SecurityGroupEgress: 65 | - CidrIp: 0.0.0.0/0 66 | FromPort: 5439 67 | IpProtocol: tcp 68 | ToPort: 5439 69 | SecurityGroupIngress: 70 | {% for ip in redshiftCluster.securityGroup.whitelistedIps %} 71 | - CidrIp: {{ ip }} 72 | FromPort: 5439 73 | IpProtocol: tcp 74 | ToPort: 5439 75 | {% endfor %} 76 | VpcId: !Ref RedshiftVPC 77 | 78 | RedshiftEC2SecurityGroupIngress: 79 | Type: AWS::EC2::SecurityGroupIngress 80 | Properties: 81 | GroupId: !GetAtt RedshiftEC2SecurityGroup.GroupId 82 | IpProtocol: -1 83 | FromPort: -1 84 | ToPort: -1 85 | SourceSecurityGroupId: !GetAtt RedshiftEC2SecurityGroup.GroupId 86 | 87 | RedshiftVPCInternetGateway: 88 | Type: AWS::EC2::InternetGateway 89 | 90 | RedshiftVPCAttachGateway: 91 | Type: AWS::EC2::VPCGatewayAttachment 92 | Properties: 93 | VpcId: !Ref RedshiftVPC 94 | InternetGatewayId: !Ref RedshiftVPCInternetGateway 95 | 96 | RedshiftRouteTable: 97 | Type: AWS::EC2::RouteTable 98 | Properties: 99 | VpcId: !Ref RedshiftVPC 100 | 101 | RedshiftVPCRoute: 102 | Type: AWS::EC2::Route 103 | Properties: 104 | DestinationCidrBlock: 0.0.0.0/0 105 | GatewayId: !Ref RedshiftVPCInternetGateway 106 | RouteTableId: !Ref RedshiftRouteTable 107 | 108 | RedshiftSubnetRouteTableAssociation: 109 | Type: AWS::EC2::SubnetRouteTableAssociation 110 | Properties: 111 | RouteTableId: !Ref RedshiftRouteTable 112 | SubnetId: !Ref RedshiftSubnet 113 | 114 | RedshiftRole: 115 | Type: AWS::IAM::Role 116 | Properties: 117 | AssumeRolePolicyDocument: 118 | Version : 2012-10-17 119 | Statement: 120 | - Effect: Allow 121 | Principal: 122 | Service: 123 | - redshift.amazonaws.com 124 | Action: 125 | - sts:AssumeRole 126 | Path: "/" 127 | RoleName: iam-{{ ENVIRONMENT }}-redshift-role 128 | 129 | RedshiftSpectrumPolicy: 130 | Type: AWS::IAM::Policy 131 | Properties: 132 | PolicyDocument: 133 | Statement: 134 | - Effect: Allow 135 | Action: 136 | - s3:Get* 137 | - s3:List* 138 | Resource: 139 | - arn:aws:s3:::* 140 | - Effect: Allow 141 | Action: 142 | - glue:CreateDatabase 143 | - glue:DeleteDatabase 144 | - glue:GetDatabase 145 | - glue:GetDatabases 146 | - glue:UpdateDatabase 147 | - glue:CreateTable 148 | - glue:DeleteTable 149 | - glue:BatchDeleteTable 150 | - glue:UpdateTable 151 | - glue:GetTable 152 | - glue:GetTables 153 | - glue:BatchCreatePartition 154 | - glue:CreatePartition 155 | - glue:DeletePartition 156 | - glue:BatchDeletePartition 157 | - glue:UpdatePartition 158 | - glue:GetPartition 159 | - glue:GetPartitions 160 | - glue:BatchGetPartition 161 | Resource: 162 | - "*" 163 | - Effect: Allow 164 | Action: 165 | - athena:* 166 | Resource: 167 | - "*" 168 | PolicyName: iam-{{ ENVIRONMENT }}-redshift-spectrum-policy 169 | Roles: 170 | - !Ref RedshiftRole --------------------------------------------------------------------------------